<?php
/*
 * Copyright (C) 2018 Karmabunny Pty Ltd.
 *
 * This file is a part of SproutCMS.
 *
 * SproutCMS is free software: you can redistribute it and/or modify it under the terms
 * of the GNU General Public License as published by the Free Software Foundation, either
 * version 2 of the License, or (at your option) any later version.
 *
 * For more information, visit <http://getsproutcms.com>.
 */
 
namespace Sprout\Helpers;
 
use DOMDocument;
use DOMXPath;
use Kohana;
 
use DaveChild\TextStatistics\Maths;
use DaveChild\TextStatistics\Syllables as Syllables;
use DaveChild\TextStatistics\Text as TextDC;
use Exception;
use Sprout\Helpers\Inflector;
use Sprout\Helpers\Sprout;
use Sprout\Helpers\View;
 
 
/**
 * Provide Search Engine Optimisation functionality
 */
class AdminSeo
{
    public static $content = '';
    public static $extra_links = [];
    public static $dom = '';
    public static $topic = '';
    public static $slug = '';
    public static $seo_problems = [];
    public static $seo_improvements = [];
    public static $seo_considerations = [];
    public static $seo_goodresults = [];
 
 
    /**
     * Add main content for later processing
     *
     * @param string $str HTML
     * @return void
     */
    public static function addContent($str)
    {
        self::$content .= ' ' . $str;
    }
 
 
    /**
     * Add external links to inject into content analysis
     *
     * @param array $links [href, text] pairs
     * @return void
     */
    public static function addLinks($links)
    {
        self::$extra_links = $links;
    }
 
 
    /**
     * Set topic (focus word) for analysis
     *
     * @param string $str Word or words as the main topic
     * @return void
     */
    public static function setTopic($str)
    {
        $str = TextDC::cleanText($str);
    }
 
 
    /**
     * Set page slug for analysis
     *
     * @param string $str The front-end URL for current edited page
     * @return void
     */
    public static function setSlug($str)
    {
        self::$slug = $str;
    }
 
 
    /**
     * Return list of useful keywords from given string
     *
     * @param bool $all True to include stop-words. Default of false (remove stop words)
     * @return array List of words
     */
    public static function processString($all = false)
    {
        $all = (bool) $all;
 
        $str = self::$content;
        $str = TextDC::cleanText($str, 0);
 
        if (!$all) {
            $expr = '/\b(' . implode('|', Kohana
::config('admin_seo.stop_words')) . ')\b/i';         }
 
 
        return $words;
    }
 
 
    /**
     * Setup content as DOM object
     *
     * @return void Sets class var directly
     */
    public static function processDOM()
    {
        if (!empty(self::$dom)) return;         self::$dom = new DOMDocument();
        self::$dom->loadHTML(self::$content, LIBXML_NOWARNING | LIBXML_NOERROR);
    }
 
 
    /**
     * Return list of keyword density
     *
     * @param string $str HTML to be processed
     * @param int $limit Number of results. Default of top five words
     * @return array [word => count] pairs
     */
    public static function getKeywordDensity($limit = 5)
    {
        $limit = (int) $limit;
        if ($limit <= 0 || $limit > 999) $limit = 5;
 
        $words = self::processString();
        $list = [];
 
        foreach ($words as $word) {
            if (empty($list[$word])) $list[$word] = 0;             $list[$word] ++;
        }
 
        // Order largest to smallest
 
        // Cap at given limit
        while (count($list) > $limit) {         }
 
        return $list;
    }
 
 
    /**
     * Returns the average word count per section
     *
     * @return int Average words
     */
    public static function getWordCountPerSection()
    {
        self::processDOM();
    }
 
 
    /**
     * Return list of all links
     *
     * @return array List of URLs
     */
    public static function getListOfLinks()
    {
        self::processDOM();
 
        $list = [];
        $links = self::$dom->getElementsByTagName("a");
        foreach($links as $link) {
            $href =  $link->getAttribute("href");
            $list[] = [
                'href' => $href,
                'text' => $text
            ];
        }
 
 
        return $list;
    }
 
 
    /**
     * Determine if given word is a stop-word
     *
     * @param string $word Word to check
     * @return bool True when is stop-word
     * @return bool False when not stop-word
     */
    public static function isStopWord($word)
    {
        return in_array($word, Kohana
::config('admin_seo.stop_words'));     }
 
 
    /**
     * Determine Flesch reading score
     * 0 = hard, 100 = easy
     * Thanks to github.com/DaveChild
     *
     * @param string $str Text to score
     * @param string $encoding Encoding of text
     * @return int
     */
    public static function getFleschReadingScore($str, $encoding = '')
    {
        $str = TextDC::cleanText($str);
 
        try {
            $score = Maths::bcCalc(
                Maths::bcCalc(
                    206.835,
                    '-',
                    Maths::bcCalc(
                        1.015,
                        '*',
                        TextDC::averageWordsPerSentence($str, $encoding)
                    )
                ),
                '-',
                Maths::bcCalc(
                    84.6,
                    '*',
                    Syllables::averageSyllablesPerWord($str, $encoding)
                )
            );
        } catch (Exception $ex) {
            $score = 0;
        }
 
 
        return Maths::normaliseScore($score, 0, 100, 1);
    }
 
 
    /**
     * Populate SEO view with analysis
     *
     * @return string HTML view
     */
    public static function getAnalysis()
    {
        if (empty(self::$content) or TextDC
::wordCount(self::$content) < 25) {             $view = new View('sprout/admin/main_seo');
            $view->disabled = true;
            return $view->render();
        }
 
        self::determineReadabilityScore();
        self::determineWordCountScore();
        self::determineAverageWordScore();
        self::determineTopicWordsScore();
        self::determineSlugWordsScore();
        self::determineLinksScore();
        self::determineSectionWordScore();
 
        $view = new View('sprout/admin/main_seo');
 
        $view->keywords = self::getKeywordDensity(6);
        $view->seo_problems = self::$seo_problems;
        $view->seo_improvements = self::$seo_improvements;
        $view->seo_considerations = self::$seo_considerations;
        $view->seo_goodresults = self::$seo_goodresults;
 
        return $view->render();
    }
 
 
    /**
     * Determine SEO readability score
     *
     * @return void Updates result arrays directly
     */
    public static function determineReadabilityScore()
    {
        $score = self::getFleschReadingScore(self::$content);
        $ratings = Kohana::config('admin_seo.readability_scores');
 
        foreach ($ratings as $rating) {
            if (floor($score) > $rating['range'][0] and 
floor($score) <= $rating['range'][1]) {                 switch ($rating['type']) {
                    case 'good':
                        self::$seo_goodresults[] = sprintf('Readability score: %u%%. %s %s', $score, $rating['desc'], $rating['fix']);                         break;
 
                    case 'problem':
                        self::$seo_problems[] = sprintf('Readability score: %u%%. %s %s', $score, $rating['desc'], $rating['fix']);                         break;
                }
                break;
            }
        }
    }
 
 
    /**
     * Determine SEO word count score
     *
     * @return void Updates result arrays directly
     */
    public static function determineWordCountScore()
    {
        $count = TextDC::wordCount(self::$content);
        $score = Kohana::config('admin_seo.word_count');
 
        if ($count < $score) {
            self::$seo_improvements[] = sprintf('Content contains %u %s. This is below the recommended minimum of %u words.', $count, Inflector
::plural('word', $count), $score);         } else if ($count >= $score) {
            self::$seo_goodresults[] = sprintf('Content contains the recommended minimum of %u words', $score);         }
    }
 
 
    /**
     * Determine SEO average word score
     *
     * @return void Updates result arrays directly
     */
    public static function determineAverageWordScore()
    {
        $avg = ceil(TextDC
::averageWordsPerSentence(self::$content));         $words = Kohana::config('admin_seo.average_words_sentence');
 
        if ($avg < $words) {
            self::$seo_goodresults[] = sprintf('Your sentences contain an average of %u words. Aiming for average maximum of %u.', $avg, $words);         } else {
            self::$seo_considerations[] = sprintf('Your sentences contain an average of %u words. Aim for an average maximum of %u words.', $avg, $words);         }
    }
 
 
    /**
     * Determine SEO topic keywords score
     *
     * @return void Updates result arrays directly
     */
    public static function determineTopicWordsScore()
    {
        if (empty(self::$topic)) return;  
        $keywords = self::getKeywordDensity(6);
        $words = explode(' ', self::$topic);         $topic = false;
        $stopwords = false;
        $count = 0;
 
        foreach ($words as $word) {
            if (self::isStopWord($word)) $stopwords = true;
            if (isset($keywords[$word])) {                 $topic = true;
                $count ++;
            }
        }
 
        if ($topic) {
            self::$seo_goodresults[] = sprintf('Keywords appear in topic "%s" %u %s.', self::$topic, $count, Inflector
::plural('time', $count));;         } else {
            self::$seo_improvements[] = sprintf('Keywords do not appear in your topic "%s".', self::$topic);         }
 
        if ($stopwords) {
            self::$seo_considerations[] = sprintf('Your topic "%s" contains <a href="https://en.wikipedia.org/wiki/Stop_words" target="_blank">stop words</a>. This may or may not be wise depending on the circumstances.', self::$topic);         }
    }
 
 
    /**
     * Determine SEO slug stopwords score
     *
     * @return void Updates result arrays directly
     */
    public static function determineSlugWordsScore()
    {
        if (empty(self::$slug)) return;  
        $stopword = false;
        $keyword = false;
        $kwords = self::getKeywordDensity(6);
        $slug_words = preg_split('~[\W_]+~', self::$slug);  
        foreach ($slug_words as $slug_word) {
            if (self::isStopWord($slug_word)) $stopword = true;
        }
 
        if (!$keyword) {
            self::$seo_improvements[] = 'Keywords do not appear in your URL slug.';
        }
 
        if ($stopword) {
            self::$seo_considerations[] = 'The URL slug contains <a href="https://en.wikipedia.org/wiki/Stop_words" target="_blank">stop words</a>. This may or may not be wise depending on the circumstances.';
        }
 
        // Topic in slug
        if (empty(self::$topic) or 
empty($slug_words)) return;  
        $topic_words = explode(' ', self::$topic);         $topic = false;
 
        foreach ($topic_words as $topic_word) {
            if (in_array($topic_word, $slug_words)) $topic = true;         }
 
        if ($topic) {
            self::$seo_goodresults[] = sprintf('Topic "%s" appears in the URL slug.', self::$topic);         } else {
            self::$seo_improvements[] = sprintf('Topic "%s" doesn\'t appear in the URL slug.', self::$topic);         }
    }
 
 
    /**
     * Determines SEO links score
     *
     * @return void Updates result arrays directly
     */
    public static function determineLinksScore()
    {
        $links = self::getListOfLinks();
 
        if (count($links) == 0) {             self::$seo_considerations[] = 'Content contains no links. Try linking to other pages within your site of related content.';
            return;
        }
 
        // Determine internal links
        $internal = false;
        $read_more = false;
        foreach ($links as $link) {
            // Determine if "read more" link label
                TextDC
::cleanText(TextDC
::lowerCase(str_replace(['.', '-'], '', $link['text']))),                ['more', 'read more', 'view more'])
            ) $read_more = true;
 
            if (strpos($link['href'], Sprout
::absRoot()) !== false) {                 $internal = true;
            } else if (strpos($link['href'], 'http') === false)  {                 $internal = true;
            }
        }
 
        // No internal links
        if (!$internal) {
            self::$seo_considerations[] = 'Try linking to pages of related topics within your site.';
        }
 
        // Generic link labels
        if ($read_more) {
            self::$seo_problems[] = 'Avoid generic "read more" link labels. Give labels that help users confidently predict what the next page will be.';
        }
    }
 
 
    /**
     * Determine SEO word count per section score
     *
     * @return void Updates result arrays directly
     */
    public static function determineSectionWordScore()
    {
        self::processDOM();
        $xpath = new DOMXPath(self::$dom);
 
        // Bold as headings
        $false_headings = $xpath->query("//p/strong");
        foreach ($false_headings as $heading) {
            if ($heading->previousSibling === null and $heading->nextSibling === null) {
                self::$seo_problems[] = 'Avoid using Bold styling as headings. Use heading styles: <strong>H</strong> buttons in the tool-bar.';
                break;
            }
        }
 
        // No headings
        $headings = $xpath->query("//h1|//h2|//h3|//h4");
        if ($headings->length == 0) {
            self::$seo_considerations[] = 'Use headings to break your content into sections for easier reading.';
            return;
        }
 
        // Content between headings (sections)
        $contents = [];
        $sections = 0;
 
        $elems = $xpath->query('//body/*');
        foreach ($elems as $elem) {
            if (empty($contents[$sections])) $contents[$sections] = '';  
            // Not a heading, concat text to make up a "section"
            if (!in_array($elem->tagName, ['h1','h2','h3','h4'])) {                 $contents[$sections] .= $elem->nodeValue;
            } else {
                $sections ++;
            }
        }
 
        // Count words per section
        $count = 0;
        foreach ($contents as &$content) {
            $content = TextDC::cleanText($content);
            $words = TextDC::wordCount($content);
            if ($words > $count) $count = $words;
        }
 
        // Check if above recommended maximum
        $score = Kohana::config('admin_seo.word_count');
        if ($count >= $score) {
            self::$seo_improvements[] = sprintf('Content between headings contains %u %s. This is above the recommended maximum of %u words per section.', $count, Inflector
::plural('word', $count), $score);         }
    }
}