SproutCMS

This is the code documentation for the SproutCMS project

source of /sprout/Helpers/AdminSeo.php

  1. <?php
  2. /*
  3.  * Copyright (C) 2018 Karmabunny Pty Ltd.
  4.  *
  5.  * This file is a part of SproutCMS.
  6.  *
  7.  * SproutCMS is free software: you can redistribute it and/or modify it under the terms
  8.  * of the GNU General Public License as published by the Free Software Foundation, either
  9.  * version 2 of the License, or (at your option) any later version.
  10.  *
  11.  * For more information, visit <http://getsproutcms.com>.
  12.  */
  13.  
  14. namespace Sprout\Helpers;
  15.  
  16. use DOMDocument;
  17. use DOMXPath;
  18. use Kohana;
  19.  
  20. use DaveChild\TextStatistics\Maths;
  21. use DaveChild\TextStatistics\Syllables as Syllables;
  22. use DaveChild\TextStatistics\Text as TextDC;
  23. use Exception;
  24. use Sprout\Helpers\Inflector;
  25. use Sprout\Helpers\Sprout;
  26. use Sprout\Helpers\View;
  27.  
  28.  
  29. /**
  30.  * Provide Search Engine Optimisation functionality
  31.  */
  32. class AdminSeo
  33. {
  34. public static $content = '';
  35. public static $extra_links = [];
  36. public static $dom = '';
  37. public static $topic = '';
  38. public static $slug = '';
  39. public static $seo_problems = [];
  40. public static $seo_improvements = [];
  41. public static $seo_considerations = [];
  42. public static $seo_goodresults = [];
  43.  
  44.  
  45. /**
  46.   * Add main content for later processing
  47.   *
  48.   * @param string $str HTML
  49.   * @return void
  50.   */
  51. public static function addContent($str)
  52. {
  53. self::$content .= ' ' . $str;
  54. }
  55.  
  56.  
  57. /**
  58.   * Add external links to inject into content analysis
  59.   *
  60.   * @param array $links [href, text] pairs
  61.   * @return void
  62.   */
  63. public static function addLinks($links)
  64. {
  65. self::$extra_links = $links;
  66. }
  67.  
  68.  
  69. /**
  70.   * Set topic (focus word) for analysis
  71.   *
  72.   * @param string $str Word or words as the main topic
  73.   * @return void
  74.   */
  75. public static function setTopic($str)
  76. {
  77. $str = TextDC::cleanText($str);
  78. $str = trim(strtolower($str));
  79. self::$topic = str_replace('.','',$str);
  80. }
  81.  
  82.  
  83. /**
  84.   * Set page slug for analysis
  85.   *
  86.   * @param string $str The front-end URL for current edited page
  87.   * @return void
  88.   */
  89. public static function setSlug($str)
  90. {
  91. $str = urldecode(trim(strtolower($str)));
  92. self::$slug = $str;
  93. }
  94.  
  95.  
  96. /**
  97.   * Return list of useful keywords from given string
  98.   *
  99.   * @param bool $all True to include stop-words. Default of false (remove stop words)
  100.   * @return array List of words
  101.   */
  102. public static function processString($all = false)
  103. {
  104. $all = (bool) $all;
  105.  
  106. $str = self::$content;
  107. $str = str_replace('&nbsp;', ' ', trim(strtolower($str)));
  108. $str = TextDC::cleanText($str, 0);
  109.  
  110. if (!$all) {
  111. $expr = '/\b(' . implode('|', Kohana::config('admin_seo.stop_words')) . ')\b/i';
  112. $str = preg_replace($expr, '', $str);
  113. $str = preg_replace('/[0-9]/', '', $str);
  114. $str = trim($str);
  115. }
  116.  
  117. $words = preg_split("/[^\w]/", $str);
  118. $words = array_filter($words);
  119. sort($words);
  120.  
  121. return $words;
  122. }
  123.  
  124.  
  125. /**
  126.   * Setup content as DOM object
  127.   *
  128.   * @return void Sets class var directly
  129.   */
  130. public static function processDOM()
  131. {
  132. if (!empty(self::$dom)) return;
  133. self::$dom = new DOMDocument();
  134. self::$dom->loadHTML(self::$content, LIBXML_NOWARNING | LIBXML_NOERROR);
  135. }
  136.  
  137.  
  138. /**
  139.   * Return list of keyword density
  140.   *
  141.   * @param string $str HTML to be processed
  142.   * @param int $limit Number of results. Default of top five words
  143.   * @return array [word => count] pairs
  144.   */
  145. public static function getKeywordDensity($limit = 5)
  146. {
  147. $limit = (int) $limit;
  148. if ($limit <= 0 || $limit > 999) $limit = 5;
  149.  
  150. $words = self::processString();
  151. $list = [];
  152.  
  153. foreach ($words as $word) {
  154. if (empty($list[$word])) $list[$word] = 0;
  155. $list[$word] ++;
  156. }
  157.  
  158. // Order largest to smallest
  159. arsort($list);
  160.  
  161. // Cap at given limit
  162. while (count($list) > $limit) {
  163. array_pop($list);
  164. }
  165.  
  166. return $list;
  167. }
  168.  
  169.  
  170. /**
  171.   * Returns the average word count per section
  172.   *
  173.   * @return int Average words
  174.   */
  175. public static function getWordCountPerSection()
  176. {
  177. self::processDOM();
  178. }
  179.  
  180.  
  181. /**
  182.   * Return list of all links
  183.   *
  184.   * @return array List of URLs
  185.   */
  186. public static function getListOfLinks()
  187. {
  188. self::processDOM();
  189.  
  190. $list = [];
  191. $links = self::$dom->getElementsByTagName("a");
  192. foreach($links as $link) {
  193. $href = $link->getAttribute("href");
  194. $text = trim(preg_replace("/[\r\n]/", " ", $link->nodeValue));
  195. $list[] = [
  196. 'href' => $href,
  197. 'text' => $text
  198. ];
  199. }
  200.  
  201. if (!empty(self::$extra_links)) $list = array_merge($list, self::$extra_links);
  202.  
  203. return $list;
  204. }
  205.  
  206.  
  207. /**
  208.   * Determine if given word is a stop-word
  209.   *
  210.   * @param string $word Word to check
  211.   * @return bool True when is stop-word
  212.   * @return bool False when not stop-word
  213.   */
  214. public static function isStopWord($word)
  215. {
  216. $word = trim(strtolower($word));
  217. return in_array($word, Kohana::config('admin_seo.stop_words'));
  218. }
  219.  
  220.  
  221. /**
  222.   * Determine Flesch reading score
  223.   * 0 = hard, 100 = easy
  224.   * Thanks to github.com/DaveChild
  225.   *
  226.   * @param string $str Text to score
  227.   * @param string $encoding Encoding of text
  228.   * @return int
  229.   */
  230. public static function getFleschReadingScore($str, $encoding = '')
  231. {
  232. $str = TextDC::cleanText($str);
  233.  
  234. try {
  235. $score = Maths::bcCalc(
  236. Maths::bcCalc(
  237. 206.835,
  238. '-',
  239. Maths::bcCalc(
  240. 1.015,
  241. '*',
  242. TextDC::averageWordsPerSentence($str, $encoding)
  243. )
  244. ),
  245. '-',
  246. Maths::bcCalc(
  247. 84.6,
  248. '*',
  249. Syllables::averageSyllablesPerWord($str, $encoding)
  250. )
  251. );
  252. } catch (Exception $ex) {
  253. $score = 0;
  254. }
  255.  
  256.  
  257. return Maths::normaliseScore($score, 0, 100, 1);
  258. }
  259.  
  260.  
  261. /**
  262.   * Populate SEO view with analysis
  263.   *
  264.   * @return string HTML view
  265.   */
  266. public static function getAnalysis()
  267. {
  268. if (empty(self::$content) or TextDC::wordCount(self::$content) < 25) {
  269. $view = new View('sprout/admin/main_seo');
  270. $view->disabled = true;
  271. return $view->render();
  272. }
  273.  
  274. self::determineReadabilityScore();
  275. self::determineWordCountScore();
  276. self::determineAverageWordScore();
  277. self::determineTopicWordsScore();
  278. self::determineSlugWordsScore();
  279. self::determineLinksScore();
  280. self::determineSectionWordScore();
  281.  
  282. $view = new View('sprout/admin/main_seo');
  283.  
  284. $view->keywords = self::getKeywordDensity(6);
  285. $view->seo_problems = self::$seo_problems;
  286. $view->seo_improvements = self::$seo_improvements;
  287. $view->seo_considerations = self::$seo_considerations;
  288. $view->seo_goodresults = self::$seo_goodresults;
  289.  
  290. return $view->render();
  291. }
  292.  
  293.  
  294. /**
  295.   * Determine SEO readability score
  296.   *
  297.   * @return void Updates result arrays directly
  298.   */
  299. public static function determineReadabilityScore()
  300. {
  301. $score = self::getFleschReadingScore(self::$content);
  302. $ratings = Kohana::config('admin_seo.readability_scores');
  303.  
  304. foreach ($ratings as $rating) {
  305. if (floor($score) > $rating['range'][0] and floor($score) <= $rating['range'][1]) {
  306. switch ($rating['type']) {
  307. case 'good':
  308. self::$seo_goodresults[] = sprintf('Readability score: %u%%. %s %s', $score, $rating['desc'], $rating['fix']);
  309. break;
  310.  
  311. case 'problem':
  312. self::$seo_problems[] = sprintf('Readability score: %u%%. %s %s', $score, $rating['desc'], $rating['fix']);
  313. break;
  314. }
  315. break;
  316. }
  317. }
  318. }
  319.  
  320.  
  321. /**
  322.   * Determine SEO word count score
  323.   *
  324.   * @return void Updates result arrays directly
  325.   */
  326. public static function determineWordCountScore()
  327. {
  328. $count = TextDC::wordCount(self::$content);
  329. $score = Kohana::config('admin_seo.word_count');
  330.  
  331. if ($count < $score) {
  332. self::$seo_improvements[] = sprintf('Content contains %u %s. This is below the recommended minimum of %u words.', $count, Inflector::plural('word', $count), $score);
  333. } else if ($count >= $score) {
  334. self::$seo_goodresults[] = sprintf('Content contains the recommended minimum of %u words', $score);
  335. }
  336. }
  337.  
  338.  
  339. /**
  340.   * Determine SEO average word score
  341.   *
  342.   * @return void Updates result arrays directly
  343.   */
  344. public static function determineAverageWordScore()
  345. {
  346. $avg = ceil(TextDC::averageWordsPerSentence(self::$content));
  347. $words = Kohana::config('admin_seo.average_words_sentence');
  348.  
  349. if ($avg < $words) {
  350. self::$seo_goodresults[] = sprintf('Your sentences contain an average of %u words. Aiming for average maximum of %u.', $avg, $words);
  351. } else {
  352. self::$seo_considerations[] = sprintf('Your sentences contain an average of %u words. Aim for an average maximum of %u words.', $avg, $words);
  353. }
  354. }
  355.  
  356.  
  357. /**
  358.   * Determine SEO topic keywords score
  359.   *
  360.   * @return void Updates result arrays directly
  361.   */
  362. public static function determineTopicWordsScore()
  363. {
  364. if (empty(self::$topic)) return;
  365.  
  366. $keywords = self::getKeywordDensity(6);
  367. $words = explode(' ', self::$topic);
  368. $topic = false;
  369. $stopwords = false;
  370. $count = 0;
  371.  
  372. foreach ($words as $word) {
  373. if (self::isStopWord($word)) $stopwords = true;
  374. if (isset($keywords[$word])) {
  375. $topic = true;
  376. $count ++;
  377. }
  378. }
  379.  
  380. if ($topic) {
  381. self::$seo_goodresults[] = sprintf('Keywords appear in topic "%s" %u %s.', self::$topic, $count, Inflector::plural('time', $count));;
  382. } else {
  383. self::$seo_improvements[] = sprintf('Keywords do not appear in your topic "%s".', self::$topic);
  384. }
  385.  
  386. if ($stopwords) {
  387. self::$seo_considerations[] = sprintf('Your topic "%s" contains <a href="https://en.wikipedia.org/wiki/Stop_words" target="_blank">stop words</a>. This may or may not be wise depending on the circumstances.', self::$topic);
  388. }
  389. }
  390.  
  391.  
  392. /**
  393.   * Determine SEO slug stopwords score
  394.   *
  395.   * @return void Updates result arrays directly
  396.   */
  397. public static function determineSlugWordsScore()
  398. {
  399. if (empty(self::$slug)) return;
  400.  
  401. $stopword = false;
  402. $keyword = false;
  403. $kwords = self::getKeywordDensity(6);
  404. $slug_words = preg_split('~[\W_]+~', self::$slug);
  405.  
  406. foreach ($slug_words as $slug_word) {
  407. if (self::isStopWord($slug_word)) $stopword = true;
  408. if (in_array($slug_word, array_keys($kwords))) $keyword = true;
  409. }
  410.  
  411. if (!$keyword) {
  412. self::$seo_improvements[] = 'Keywords do not appear in your URL slug.';
  413. }
  414.  
  415. if ($stopword) {
  416. self::$seo_considerations[] = 'The URL slug contains <a href="https://en.wikipedia.org/wiki/Stop_words" target="_blank">stop words</a>. This may or may not be wise depending on the circumstances.';
  417. }
  418.  
  419. // Topic in slug
  420. if (empty(self::$topic) or empty($slug_words)) return;
  421.  
  422. $topic_words = explode(' ', self::$topic);
  423. $topic = false;
  424.  
  425. foreach ($topic_words as $topic_word) {
  426. if (in_array($topic_word, $slug_words)) $topic = true;
  427. }
  428.  
  429. if ($topic) {
  430. self::$seo_goodresults[] = sprintf('Topic "%s" appears in the URL slug.', self::$topic);
  431. } else {
  432. self::$seo_improvements[] = sprintf('Topic "%s" doesn\'t appear in the URL slug.', self::$topic);
  433. }
  434. }
  435.  
  436.  
  437. /**
  438.   * Determines SEO links score
  439.   *
  440.   * @return void Updates result arrays directly
  441.   */
  442. public static function determineLinksScore()
  443. {
  444. $links = self::getListOfLinks();
  445.  
  446. if (count($links) == 0) {
  447. self::$seo_considerations[] = 'Content contains no links. Try linking to other pages within your site of related content.';
  448. return;
  449. }
  450.  
  451. // Determine internal links
  452. $internal = false;
  453. $read_more = false;
  454. foreach ($links as $link) {
  455. // Determine if "read more" link label
  456. if (in_array(
  457. TextDC::cleanText(TextDC::lowerCase(str_replace(['.', '-'], '', $link['text']))),
  458. ['more', 'read more', 'view more'])
  459. ) $read_more = true;
  460.  
  461. if (strpos($link['href'], Sprout::absRoot()) !== false) {
  462. $internal = true;
  463. } else if (strpos($link['href'], 'http') === false) {
  464. $internal = true;
  465. }
  466. }
  467.  
  468. // No internal links
  469. if (!$internal) {
  470. self::$seo_considerations[] = 'Try linking to pages of related topics within your site.';
  471. }
  472.  
  473. // Generic link labels
  474. if ($read_more) {
  475. self::$seo_problems[] = 'Avoid generic "read more" link labels. Give labels that help users confidently predict what the next page will be.';
  476. }
  477. }
  478.  
  479.  
  480. /**
  481.   * Determine SEO word count per section score
  482.   *
  483.   * @return void Updates result arrays directly
  484.   */
  485. public static function determineSectionWordScore()
  486. {
  487. self::processDOM();
  488. $xpath = new DOMXPath(self::$dom);
  489.  
  490. // Bold as headings
  491. $false_headings = $xpath->query("//p/strong");
  492. foreach ($false_headings as $heading) {
  493. if ($heading->previousSibling === null and $heading->nextSibling === null) {
  494. self::$seo_problems[] = 'Avoid using Bold styling as headings. Use heading styles: <strong>H</strong> buttons in the tool-bar.';
  495. break;
  496. }
  497. }
  498.  
  499. // No headings
  500. $headings = $xpath->query("//h1|//h2|//h3|//h4");
  501. if ($headings->length == 0) {
  502. self::$seo_considerations[] = 'Use headings to break your content into sections for easier reading.';
  503. return;
  504. }
  505.  
  506. // Content between headings (sections)
  507. $contents = [];
  508. $sections = 0;
  509.  
  510. $elems = $xpath->query('//body/*');
  511. foreach ($elems as $elem) {
  512. if (empty($contents[$sections])) $contents[$sections] = '';
  513.  
  514. // Not a heading, concat text to make up a "section"
  515. if (!in_array($elem->tagName, ['h1','h2','h3','h4'])) {
  516. $contents[$sections] .= $elem->nodeValue;
  517. } else {
  518. $sections ++;
  519. }
  520. }
  521.  
  522. // Count words per section
  523. $count = 0;
  524. foreach ($contents as &$content) {
  525. $content = TextDC::cleanText($content);
  526. $words = TextDC::wordCount($content);
  527. if ($words > $count) $count = $words;
  528. }
  529.  
  530. // Check if above recommended maximum
  531. $score = Kohana::config('admin_seo.word_count');
  532. if ($count >= $score) {
  533. self::$seo_improvements[] = sprintf('Content between headings contains %u %s. This is above the recommended maximum of %u words per section.', $count, Inflector::plural('word', $count), $score);
  534. }
  535. }
  536. }
  537.