| source of /sprout/Helpers/Text.php
Copyright (C) 2017 Karmabunny Pty Ltd.
This file is a part of SproutCMS.
SproutCMS is free software: you can redistribute it and/or modify it under the terms
of the GNU General Public License as published by the Free Software Foundation, either
version 2 of the License, or (at your option) any later version.
For more information, visit <http://getsproutcms.com>.
This class was originally from Kohana 2.3.4
Copyright 2007-2008 Kohana Team <?php/** * Copyright (C) 2017 Karmabunny Pty Ltd. * * This file is a part of SproutCMS. * * SproutCMS is free software: you can redistribute it and/or modify it under the terms * of the GNU General Public License as published by the Free Software Foundation, either * version 2 of the License, or (at your option) any later version. * * For more information, visit <http://getsproutcms.com>. * * This class was originally from Kohana 2.3.4 * Copyright 2007-2008 Kohana Team */namespace Sprout\Helpers;   /** * Various text helpers such as limiting. */class Text{     /**     * Limits a plain-text phrase to a given number of words.     *     * @param string $str Phrase to limit words of, in plain text     * @param int $limit Number of words to limit to     * @param string $end_char Characters to append if text is limited, e.g. '...'     * @return string Plain text     */    public static function limitWords($str, $limit = 100, $end_char = NULL)    {        $limit = (int) $limit;        $end_char = ($end_char === NULL) ? '…' : $end_char;             return $str;         if ($limit <= 0)            return $end_char;         preg_match('/^\s*+(?:\S++\s*+){1,'.$limit.'}/u', $str, $matches);         // Only attach the end character if the matched string is shorter        // than the starting string.    }     /**     * Limits a plain-text phrase to a given number of characters.     *     * @param string Phrase to limit characters of, in plain text     * @param int $limit Number of characters to limit to     * @param string $end_char Characters to append if text is limited, e.g. '...'     * @param boolean $preserve_words True if whole words should be preserved; false to allow ending on a partial word     * @return string Plain text     */    public static function limitChars($str, $limit = 100, $end_char = NULL, $preserve_words = FALSE)    {        $end_char = ($end_char === NULL) ? '…' : $end_char;         $limit = (int) $limit;             return $str;         if ($limit <= 0)            return $end_char;         if ($preserve_words == FALSE)        {        }         preg_match('/^.{'.($limit - 1).'}\S*/us', $str, $matches);     }     /**    * Limits HTML to a certain number of words.    * Is aware of tags etc and will not count them in the word-count, as well as closing them properly.    *    * This doesn't actually pass all unit tests at the moment - an exact match in num words will still put in ... part.    **/    public static function limitWordsHtml($text, $limit = 50)    {        $count = 0;        $offset = 0;        $over = 0;        $out = '';         // These shouldn't have an end tag        $single_tags = '/^(?:br|wbr|area|hr|img|input)$/i';         // Nuke HTML comments and duplicate space         //                     opening tag       closing tag    words            non-words        while (preg_match('!\G(<[a-z0-9]+[^>]*>)|(</[a-z0-9]+>)|([-_a-zA-Z0-9]+)|([^-_a-zA-Z0-9<>]+)!si', $text, $m, 0, $offset)) {            if ($m[1]) {                if ($over) { $out .= '...'; break; }                preg_match('!^<([a-z0-9]+)[^>]*>$!i', $m[0], $matches);                }                $out .= $m[0];             } elseif ($m[2]) {                while ($pop != $m[0]) { $out .= $pop; $pop = array_pop($stack); }                $out .= $pop;             } elseif ($m[3]) {                if ($over) { $out .= '...'; break; }                $out .= $m[0];                $count++;                if ($count == $limit) {                    $over++;                }             } else {                if ($over) { $out .= '...'; break; }                $out .= $m[0];            }         }         while ($pop = array_pop($stack)) { $out .= $pop; }         return $out;    }      /**     * Determines whether given HTML contains a FORM tag, which can cause nested-forms issues     *     * Not tested with malformed input - should not be used as an XSS filter     *     * @param string $html HTML to check     * @return bool True if the string contains a FORM tag, false if it doesn't     */    public static function containsFormTag($html)    {        // Quick test before even doing string manipulation        if (stripos($html, '<form') === false) {            return false;        }         // These tags always contain CDATA so nuke them entirely        $html = preg_replace('!<script[^>]*>.+?</script>!is', '', $html);        $html = preg_replace('!<style[^>]*>.+?</style>!is', '', $html);         return (stripos($html, '<form') !== false);    }      /**     * Alternates between two or more strings.     *     * @param   string  strings to alternate between     * @return  string     */    public static function alternate()    {        static $i;         {            $i = 0;            return '';        }         return $args[($i++ % count($args))];    }     /**     * Reduces multiple slashes in a string to single slashes.     *     * @param   string  string to reduce slashes of     * @return  string     */    public static function reduceSlashes($str)    {    }     /**     * Replaces the given words with a string.     *     * @param string $str Phrase to replace words in     * @param array $badwords Words to replace     * @param string $replacement Replacement string     * @param boolean $replace_partial_words Replace words across word     *        boundaries (space, period, etc). This probably doesn't do what     *        you think it does; check the test suite.     * @return string     */    public static function  censor($str, array $badwords, $replacement = '#', $replace_partial_words = FALSE)    {        foreach ($badwords as $key => $badword) {        }         $regex = '('.implode('|', $badwords).')';         if ($replace_partial_words == TRUE)        {            // Just using \b isn't sufficient when we need to replace a badword that already contains word boundaries itself            $regex = '(?<=\b|\s|^)'.$regex.'(?=\b|\s|$)';        }         $regex = '!'.$regex.'!ui';             $replace = function($matches) use ($replacement) {            };        }     }     /**     * Finds the text that is similar between a set of words.     *     * @param   array   words to find similar text of     * @return  string     */    public static function  similar(array $words)    {        // First word is the word to match against         for ($i = 0, $max = strlen($word); $i < $max; ++$i)        {            foreach ($words as $w)            {                // Once a difference is found, break out of the loops                if ( ! isset($w[$i]) OR $w[$i] !== $word[$i])                    break 2;            }        }         // Return the similar text    }     /**     * Converts text email addresses and anchors into links.     *     * @param   string   text to auto link     * @return  string     */    public static function autoLink($text)    {        // Auto link emails first to prevent problems with "www.domain.com@example.com"        return Text::autoLinkUrls(Text::autoLinkEmails($text));    }     /**     * Converts text anchors into links.     *     * @param   string   text to auto link     * @return  string     */    public static function autoLinkUrls($text)    {        // Finds all http/https/ftp/ftps links that are not part of an existing html anchor        if (preg_match_all('~\b(?<!href="|">)(?:ht|f)tps?://\S+(?:/|\b)~i', $text, $matches))        {            foreach ($matches[0] as $match)            {                // Replace each link with an anchor                $text = str_replace($match, Html::anchor($match), $text);            }        }         // Find all naked www.links.com (without http://)        if (preg_match_all('~\b(?<!://)www(?:\.[a-z0-9][-a-z0-9]*+)+\.[a-z]{2,6}\b~i', $text, $matches))        {            foreach ($matches[0] as $match)            {                // Replace each link with an anchor                $text = str_replace($match, Html::anchor('http://'.$match, $match), $text);            }        }         return $text;    }     /**     * Converts text email addresses into links.     *     * @param   string   text to auto link     * @return  string     */    public static function autoLinkEmails($text)    {        // Finds all email addresses that are not part of an existing html mailto anchor        // Note: The "58;" negative lookbehind prevents matching of existing encoded html mailto anchors        //       The html entity for a colon (:) is : or : or : etc.        if (preg_match_all('~\b(?<!href="mailto:|">|58;)(?!\.)[-+_a-z0-9.]++(?<!\.)@(?![-.])[-a-z0-9.]+(?<!\.)\.[a-z]{2,6}\b~i', $text, $matches))        {            foreach ($matches[0] as $match)            {                // Replace each email with an encoded mailto                $text = str_replace($match, Html::mailto($match), $text);            }        }         return $text;    }     /**     * Automatically applies <p> and <br /> markup to text. Basically nl2br() on steroids.     *     * @param   string   subject     * @return  string     */    public static function autoP($str)    {        // Trim whitespace        if (($str = trim($str)) === '')            return '';         // Standardize newlines         // Trim whitespace on each line         // The following regexes only need to be executed if the string contains html        if ($html_found = (strpos($str, '<') !== FALSE))        {            // Elements that should not be surrounded by p tags            $no_p = '(?:p|div|h[1-6r]|ul|ol|li|blockquote|d[dlt]|pre|t[dhr]|t(?:able|body|foot|head)|c(?:aption|olgroup)|form|s(?:elect|tyle)|a(?:ddress|rea)|ma(?:p|th))';             // Put at least two linebreaks before and after $no_p elements            $str = preg_replace('~^<'.$no_p.'[^>]*+>~im', "\n$0", $str);        }         // Do the <p> magic!        $str = '<p>'.trim($str).'</p>';         // The following regexes only need to be executed if the string contains html        if ($html_found !== FALSE)        {            // Remove p tags around $no_p elements            $str = preg_replace('~<p>(?=</?'.$no_p.'[^>]*+>)~i', '', $str);            $str = preg_replace('~(</?'.$no_p.'[^>]*+>)</p>~i', '$1', $str);        }         // Convert single linebreaks to <br />         return $str;    }     /**     * Returns human readable sizes.     * @see  Based on original functions written by:     * @see  Aidan Lister: http://aidanlister.com/repos/v/function.size_readable.php     * @see  Quentin Zervaas: http://www.phpriot.com/d/code/strings/filesize-format/     *     * @param   integer  size in bytes     * @param   string   a definitive unit     * @param   string   the return string format     * @param   boolean  whether to use SI prefixes or IEC     * @return  string     */    public static function bytes($bytes, $force_unit = NULL, $format = NULL, $si = TRUE)    {        // Format string        $format = ($format === NULL) ? '%01.2f %s' : (string) $format;         // IEC prefixes (binary)        if ($si == FALSE OR strpos($force_unit, 'i') !== FALSE)        {            $units = array('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB');            $mod   = 1024;        }        // SI prefixes (decimal)        else        {            $units = array('B', 'kB', 'MB', 'GB', 'TB', 'PB');            $mod   = 1000;        }         // Determine unit to use        if (($power = array_search((string) $force_unit, $units)) === FALSE)        {            $power = ($bytes > 0) ? floor(log($bytes, $mod)) : 0;        }         return sprintf($format, $bytes / pow($mod, $power), $units[$power]);    }     /**     * Prevents widow words by inserting a non-breaking space between the last two words.     * @see  http://www.shauninman.com/archive/2006/08/22/widont_wordpress_plugin     *     * @param   string  string to remove widows from     * @return  string     */    public static function widont($str)    {         if ($space !== FALSE)        {            $str = substr($str, 0, $space).' '.substr($str, $space + 1);        }         return $str;    }      /**    * Returns a number with an english suffix appended (e.g. 1st, 5th, 12th, 123rd)    **/    public static function ordinalize($number)    {        if ($number % 100 == 11 or $number % 100 == 12 or $number % 100 == 13) {            return $number . 'th';        }         switch ($number % 10) {            case 1:                return $number . 'st';            case 2:                return $number . 'nd';            case 3:                return $number . 'rd';            default:                return $number . 'th';        }    }      /**    * Make a chunk of valid HTML into plain text, and (optionally) limit the number of words.    *    * @param string $html The original HTML    * @param int $max_words The maximum number of words. Use 0 for no limit.    * @return string Plain text    **/    public static function plain($html, $max_words = 50)    {        $html = Enc::cleanfunky($html);         // Normalise newlines into spaces         // Replace some HTML tags with newlines        $html = preg_replace('!<(p|div|h[1-6]|pre|ol|ul)[^>]*?>!i', "\n\n", $html);         // Remove inline style and script tags        $html = preg_replace('!<style[^>]*>.+?<\/style>!i', '', $html);        $html = preg_replace('!<script[^>]*>.+?<\/script>!i', '', $html);         // Remove all other tags, and decode entities         // Combine runs of multiple whitespace         // Trim whitespace on each line        foreach ($lines as &$l) {        }         if ($max_words) {            $html = Text::limitWords($html, $max_words, '...');        }         // Tidy up nbsp characters that break iconv.     }      /**    * Make a chunk of plain text into HTML rich text    * The text will be wrapped within a block element (default is a P tag)    *    * @param string $text The original plain text    * @param string $block_elem The block element to use. Default is a P tag (i.e. 'p').    *        Use null or empty string to get the result without it being wrapped in a tag.    * @return string A HTML representation of the plain text    **/    public static function richtext($text, $block_elem = 'p')    {         $text = Enc::cleanfunky($text);        $text = Enc::html($text);         if (!$block_elem) return $text;         return "<{$block_elem}>{$text}</{$block_elem}>";    }      /**    * Convert a lower_case names into CamelCaps names    *    * @param string $name    * @return string    **/    public static function lc2camelcaps($name)    {            '/([a-z0-9])_([a-z0-9])/i',            function($matches) {            },            $name        );        return $name;    }      /**    * Convert a lower_case names into camelCase names    *    * @param string $name    * @return string    **/    public static function lc2camelcase($name)    {            '/([a-z0-9])_([a-z0-9])/i',            function($matches) {            },            $name        );        $name = lcfirst($name);        return $name;    }      /**    * Convert a CamelCaps or camelCase name into a lower_case names    *    * @param string $name    * @return string    **/    public static function camel2lc($name)    {            '/[A-Z0-9]/',            function($matches) {            },            $name        );        $name = ltrim($name, '_');        return $name;    }      /**     * Encode HTML so it's suitable for direct output, but allow some HTML tags to be left as-is     *     * Only a limited subset of tags are left alone, all other tags are stripped.     * Allowed tags: A, B, I, STRONG, EM, BR, IMG, SPAN, ABBR, SUP, SUB     *     * The algorithm used in this method is quite simple, so this method should not be used     * as a defence against XSS attacks; it should only be used on trusted input such as Form helptext.     *     * @param string $html Plain text or HTML which may contain various tags     * @return string HTML which only contains safe tags     */    public static function limitedSubsetHtml($html)    {        static $allowed = ['a', 'b', 'i', 'strong', 'em', 'br', 'img', 'span', 'abbr', 'sup', 'sub'];         $offset = 0;        $out = '';         //                     opening tag       closing tag    content        while (preg_match('!\G(<[a-z0-9]+[^>]*>)|(</[a-z0-9]+>)|([^<>]+|<|>)!si', $html, $m, 0, $offset)) {            if ($m[1]) {                preg_match('!^<([a-z0-9]+)[^>]*>$!i', $m[0], $matches);                    $out .= $m[0];                }             } else if ($m[2]) {                    $out .= $m[0];                }             } else {                $out .= Enc::html($m[0]);            }         }         return $out;    }      /**     * Returns current year or original year and current year of copyright     * @param string $year The original year of copyright     * @return string Current year, or Original year - Current year     */    public static function copyright($year)    {        }          if ($year == date('Y')) {            return $year;        }         return $year . ' - ' . date('Y');    }}  
 |