source of /sprout/Helpers/Text.php
Copyright (C) 2017 Karmabunny Pty Ltd.
This file is a part of SproutCMS.
SproutCMS is free software: you can redistribute it and/or modify it under the terms
of the GNU General Public License as published by the Free Software Foundation, either
version 2 of the License, or (at your option) any later version.
For more information, visit <http://getsproutcms.com>.
This class was originally from Kohana 2.3.4
Copyright 2007-2008 Kohana Team <?php /**  * Copyright (C) 2017 Karmabunny Pty Ltd.  *  * This file is a part of SproutCMS.  *  * SproutCMS is free software: you can redistribute it and/or modify it under the terms  * of the GNU General Public License as published by the Free Software Foundation, either  * version 2 of the License, or (at your option) any later version.  *  * For more information, visit <http://getsproutcms.com>.  *  * This class was originally from Kohana 2.3.4  * Copyright 2007-2008 Kohana Team  */ namespace Sprout\Helpers;       /**  * Various text helpers such as limiting.  */ class Text {       /**      * Limits a plain-text phrase to a given number of words.      *      * @param string $str Phrase to limit words of, in plain text      * @param int $limit Number of words to limit to      * @param string $end_char Characters to append if text is limited, e.g. '...'      * @return string Plain text      */     public static function limitWords($str, $limit = 100, $end_char = NULL)     {         $limit = (int) $limit;         $end_char = ($end_char === NULL) ? '…' : $end_char;               return $str;           if ($limit <= 0)             return $end_char;           preg_match('/^\s*+(?:\S++\s*+){1,'.$limit.'}/u', $str, $matches);           // Only attach the end character if the matched string is shorter         // than the starting string.     }       /**      * Limits a plain-text phrase to a given number of characters.      *      * @param string Phrase to limit characters of, in plain text      * @param int $limit Number of characters to limit to      * @param string $end_char Characters to append if text is limited, e.g. '...'      * @param boolean $preserve_words True if whole words should be preserved; false to allow ending on a partial word      * @return string Plain text      */     public static function limitChars($str, $limit = 100, $end_char = NULL, $preserve_words = FALSE)     {         $end_char = ($end_char === NULL) ? '…' : $end_char;           $limit = (int) $limit;               return $str;           if ($limit <= 0)             return $end_char;           if ($preserve_words == FALSE)         {         }           preg_match('/^.{'.($limit - 1).'}\S*/us', $str, $matches);       }       /**     * Limits HTML to a certain number of words.     * Is aware of tags etc and will not count them in the word-count, as well as closing them properly.     *     * This doesn't actually pass all unit tests at the moment - an exact match in num words will still put in ... part.     **/     public static function limitWordsHtml($text, $limit = 50)     {         $count = 0;         $offset = 0;         $over = 0;         $out = '';           // These shouldn't have an end tag         $single_tags = '/^(?:br|wbr|area|hr|img|input)$/i';           // Nuke HTML comments and duplicate space           //                     opening tag       closing tag    words            non-words         while (preg_match('!\G(<[a-z0-9]+[^>]*>)|(</[a-z0-9]+>)|([-_a-zA-Z0-9]+)|([^-_a-zA-Z0-9<>]+)!si', $text, $m, 0, $offset)) {             if ($m[1]) {                 if ($over) { $out .= '...'; break; }                 preg_match('!^<([a-z0-9]+)[^>]*>$!i', $m[0], $matches);                 }                 $out .= $m[0];               } elseif ($m[2]) {                 while ($pop != $m[0]) { $out .= $pop; $pop = array_pop($stack); }                 $out .= $pop;               } elseif ($m[3]) {                 if ($over) { $out .= '...'; break; }                 $out .= $m[0];                 $count++;                 if ($count == $limit) {                     $over++;                 }               } else {                 if ($over) { $out .= '...'; break; }                 $out .= $m[0];             }           }           while ($pop = array_pop($stack)) { $out .= $pop; }           return $out;     }         /**      * Determines whether given HTML contains a FORM tag, which can cause nested-forms issues      *      * Not tested with malformed input - should not be used as an XSS filter      *      * @param string $html HTML to check      * @return bool True if the string contains a FORM tag, false if it doesn't      */     public static function containsFormTag($html)     {         // Quick test before even doing string manipulation         if (stripos($html, '<form') === false) {             return false;         }           // These tags always contain CDATA so nuke them entirely         $html = preg_replace('!<script[^>]*>.+?</script>!is', '', $html);         $html = preg_replace('!<style[^>]*>.+?</style>!is', '', $html);           return (stripos($html, '<form') !== false);     }         /**      * Alternates between two or more strings.      *      * @param   string  strings to alternate between      * @return  string      */     public static function alternate()     {         static $i;           {             $i = 0;             return '';         }           return $args[($i++ % count($args))];     }       /**      * Reduces multiple slashes in a string to single slashes.      *      * @param   string  string to reduce slashes of      * @return  string      */     public static function reduceSlashes($str)     {     }       /**      * Replaces the given words with a string.      *      * @param string $str Phrase to replace words in      * @param array $badwords Words to replace      * @param string $replacement Replacement string      * @param boolean $replace_partial_words Replace words across word      *        boundaries (space, period, etc). This probably doesn't do what      *        you think it does; check the test suite.      * @return string      */     public static  function censor ($str, array $badwords, $replacement = '#', $replace_partial_words = FALSE)     {         foreach ($badwords as $key => $badword) {         }           $regex = '('.implode('|', $badwords).')';           if ($replace_partial_words == TRUE)         {             // Just using \b isn't sufficient when we need to replace a badword that already contains word boundaries itself             $regex = '(?<=\b|\s|^)'.$regex.'(?=\b|\s|$)';         }           $regex = '!'.$regex.'!ui';               $replace = function($matches) use ($replacement) {             };         }       }       /**      * Finds the text that is similar between a set of words.      *      * @param   array   words to find similar text of      * @return  string      */     public static  function similar (array $words)     {         // First word is the word to match against           for ($i = 0, $max = strlen($word); $i < $max; ++$i)         {             foreach ($words as $w)             {                 // Once a difference is found, break out of the loops                 if ( ! isset($w[$i]) OR  $w[$i] !== $word[$i])                     break 2;             }         }           // Return the similar text     }       /**      * Converts text email addresses and anchors into links.      *      * @param   string   text to auto link      * @return  string      */     public static function autoLink($text)     {         // Auto link emails first to prevent problems with "www.domain.com@example.com"         return Text::autoLinkUrls(Text::autoLinkEmails($text));     }       /**      * Converts text anchors into links.      *      * @param   string   text to auto link      * @return  string      */     public static function autoLinkUrls($text)     {         // Finds all http/https/ftp/ftps links that are not part of an existing html anchor         if (preg_match_all('~\b(?<!href="|">)(?:ht|f)tps?://\S+(?:/|\b)~i', $text, $matches))         {             foreach ($matches[0] as $match)             {                 // Replace each link with an anchor                 $text = str_replace($match, Html ::anchor($match), $text);             }         }           // Find all naked www.links.com (without http://)         if (preg_match_all('~\b(?<!://)www(?:\.[a-z0-9][-a-z0-9]*+)+\.[a-z]{2,6}\b~i', $text, $matches))         {             foreach ($matches[0] as $match)             {                 // Replace each link with an anchor                 $text = str_replace($match, Html ::anchor('http://'.$match, $match), $text);             }         }           return $text;     }       /**      * Converts text email addresses into links.      *      * @param   string   text to auto link      * @return  string      */     public static function autoLinkEmails($text)     {         // Finds all email addresses that are not part of an existing html mailto anchor         // Note: The "58;" negative lookbehind prevents matching of existing encoded html mailto anchors         //       The html entity for a colon (:) is : or : or : etc.         if (preg_match_all('~\b(?<!href="mailto:|">|58;)(?!\.)[-+_a-z0-9.]++(?<!\.)@(?![-.])[-a-z0-9.]+(?<!\.)\.[a-z]{2,6}\b~i', $text, $matches))         {             foreach ($matches[0] as $match)             {                 // Replace each email with an encoded mailto                 $text = str_replace($match, Html ::mailto($match), $text);             }         }           return $text;     }       /**      * Automatically applies <p> and <br /> markup to text. Basically nl2br() on steroids.      *      * @param   string   subject      * @return  string      */     public static function autoP($str)     {         // Trim whitespace         if (($str = trim($str)) === '')             return '';           // Standardize newlines           // Trim whitespace on each line           // The following regexes only need to be executed if the string contains html         if ($html_found = (strpos($str, '<') !== FALSE))         {             // Elements that should not be surrounded by p tags             $no_p = '(?:p|div|h[1-6r]|ul|ol|li|blockquote|d[dlt]|pre|t[dhr]|t(?:able|body|foot|head)|c(?:aption|olgroup)|form|s(?:elect|tyle)|a(?:ddress|rea)|ma(?:p|th))';               // Put at least two linebreaks before and after $no_p elements             $str = preg_replace('~^<'.$no_p.'[^>]*+>~im', "\n$0", $str);         }           // Do the <p> magic!         $str = '<p>'.trim($str).'</p>';           // The following regexes only need to be executed if the string contains html         if ($html_found !== FALSE)         {             // Remove p tags around $no_p elements             $str = preg_replace('~<p>(?=</?'.$no_p.'[^>]*+>)~i', '', $str);             $str = preg_replace('~(</?'.$no_p.'[^>]*+>)</p>~i', '$1', $str);         }           // Convert single linebreaks to <br />           return $str;     }       /**      * Returns human readable sizes.      * @see  Based on original functions written by:      * @see  Aidan Lister: http://aidanlister.com/repos/v/function.size_readable.php      * @see  Quentin Zervaas: http://www.phpriot.com/d/code/strings/filesize-format/      *      * @param   integer  size in bytes      * @param   string   a definitive unit      * @param   string   the return string format      * @param   boolean  whether to use SI prefixes or IEC      * @return  string      */     public static function bytes($bytes, $force_unit = NULL, $format = NULL, $si = TRUE)     {         // Format string         $format = ($format === NULL) ? '%01.2f %s' : (string) $format;           // IEC prefixes (binary)         if ($si == FALSE OR  strpos($force_unit, 'i') !== FALSE)         {             $units = array('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB');             $mod   = 1024;         }         // SI prefixes (decimal)         else         {             $units = array('B', 'kB', 'MB', 'GB', 'TB', 'PB');             $mod   = 1000;         }           // Determine unit to use         if (($power = array_search((string ) $force_unit, $units)) === FALSE)         {             $power = ($bytes > 0) ?  floor(log($bytes, $mod)) : 0;         }           return sprintf($format, $bytes / pow($mod, $power), $units[$power]);     }       /**      * Prevents widow words by inserting a non-breaking space between the last two words.      * @see  http://www.shauninman.com/archive/2006/08/22/widont_wordpress_plugin      *      * @param   string  string to remove widows from      * @return  string      */     public static function widont($str)     {           if ($space !== FALSE)         {             $str = substr($str, 0, $space).' '.substr($str, $space + 1);         }           return $str;     }         /**     * Returns a number with an english suffix appended (e.g. 1st, 5th, 12th, 123rd)     **/     public static function ordinalize($number)     {         if ($number % 100 == 11 or $number % 100 == 12 or $number % 100 == 13) {             return $number . 'th';         }           switch ($number % 10) {             case 1:                 return $number . 'st';             case 2:                 return $number . 'nd';             case 3:                 return $number . 'rd';             default:                 return $number . 'th';         }     }         /**     * Make a chunk of valid HTML into plain text, and (optionally) limit the number of words.     *     * @param string $html The original HTML     * @param int $max_words The maximum number of words. Use 0 for no limit.     * @return string Plain text     **/     public static function plain($html, $max_words = 50)     {         $html = Enc::cleanfunky($html);           // Normalise newlines into spaces           // Replace some HTML tags with newlines         $html = preg_replace('!<(p|div|h[1-6]|pre|ol|ul)[^>]*?>!i', "\n\n", $html);           // Remove inline style and script tags         $html = preg_replace('!<style[^>]*>.+?<\/style>!i', '', $html);         $html = preg_replace('!<script[^>]*>.+?<\/script>!i', '', $html);           // Remove all other tags, and decode entities           // Combine runs of multiple whitespace           // Trim whitespace on each line         foreach ($lines as &$l) {         }           if ($max_words) {             $html = Text::limitWords($html, $max_words, '...');         }           // Tidy up nbsp characters that break iconv.       }         /**     * Make a chunk of plain text into HTML rich text     * The text will be wrapped within a block element (default is a P tag)     *     * @param string $text The original plain text     * @param string $block_elem The block element to use. Default is a P tag (i.e. 'p').     *        Use null or empty string to get the result without it being wrapped in a tag.     * @return string A HTML representation of the plain text     **/     public static function richtext($text, $block_elem = 'p')     {           $text = Enc::cleanfunky($text);         $text = Enc::html($text);           if (!$block_elem) return $text;           return "<{$block_elem}>{$text}</{$block_elem}>";     }         /**     * Convert a lower_case names into CamelCaps names     *     * @param string $name     * @return string     **/     public static function lc2camelcaps($name)     {             '/([a-z0-9])_([a-z0-9])/i',             function($matches) {             },             $name         );         return $name;     }         /**     * Convert a lower_case names into camelCase names     *     * @param string $name     * @return string     **/     public static function lc2camelcase($name)     {             '/([a-z0-9])_([a-z0-9])/i',             function($matches) {             },             $name         );         $name = lcfirst($name);         return $name;     }         /**     * Convert a CamelCaps or camelCase name into a lower_case names     *     * @param string $name     * @return string     **/     public static function camel2lc($name)     {             '/[A-Z0-9]/',             function($matches) {             },             $name         );         $name = ltrim($name, '_');         return $name;     }         /**      * Encode HTML so it's suitable for direct output, but allow some HTML tags to be left as-is      *      * Only a limited subset of tags are left alone, all other tags are stripped.      * Allowed tags: A, B, I, STRONG, EM, BR, IMG, SPAN, ABBR, SUP, SUB      *      * The algorithm used in this method is quite simple, so this method should not be used      * as a defence against XSS attacks; it should only be used on trusted input such as Form helptext.      *      * @param string $html Plain text or HTML which may contain various tags      * @return string HTML which only contains safe tags      */     public static function limitedSubsetHtml($html)     {         static $allowed = ['a', 'b', 'i', 'strong', 'em', 'br', 'img', 'span', 'abbr', 'sup', 'sub'];           $offset = 0;         $out = '';           //                     opening tag       closing tag    content         while (preg_match('!\G(<[a-z0-9]+[^>]*>)|(</[a-z0-9]+>)|([^<>]+|<|>)!si', $html, $m, 0, $offset)) {             if ($m[1]) {                 preg_match('!^<([a-z0-9]+)[^>]*>$!i', $m[0], $matches);                     $out .= $m[0];                 }               } else if ($m[2]) {                     $out .= $m[0];                 }               } else {                 $out .= Enc::html($m[0]);             }           }           return $out;     }         /**      * Returns current year or original year and current year of copyright      * @param string $year The original year of copyright      * @return string Current year, or Original year - Current year      */     public static function copyright($year)     {         }             if ($year == date('Y')) {             return $year;         }           return $year . ' - ' . date('Y');     } }     
  
 |