SproutCMS

This is the code documentation for the SproutCMS project

source of /sprout/core/utf8.php

  1. <?php
  2. /*
  3.  * Copyright (C) 2017 Karmabunny Pty Ltd.
  4.  *
  5.  * This file is a part of SproutCMS.
  6.  *
  7.  * SproutCMS is free software: you can redistribute it and/or modify it under the terms
  8.  * of the GNU General Public License as published by the Free Software Foundation, either
  9.  * version 2 of the License, or (at your option) any later version.
  10.  *
  11.  * For more information, visit <http://getsproutcms.com>.
  12.  */
  13.  
  14. /**
  15.  * A port of phputf8 to a unified file/class. Checks PHP status to ensure that
  16.  * UTF-8 support is available and normalize global variables to UTF-8. It also
  17.  * provides multi-byte aware replacement string functions.
  18.  *
  19.  * This file is licensed differently from the rest of Kohana. As a port of
  20.  * phputf8, which is LGPL software, this file is released under the LGPL.
  21.  *
  22.  * PCRE needs to be compiled with UTF-8 support (--enable-utf8).
  23.  * Support for Unicode properties is highly recommended (--enable-unicode-properties).
  24.  * @see http://php.net/manual/reference.pcre.pattern.modifiers.php
  25.  *
  26.  * UTF-8 conversion will be much more reliable if the iconv extension is loaded.
  27.  * @see http://php.net/iconv
  28.  *
  29.  * The mbstring extension is highly recommended, but must not be overloading
  30.  * string functions.
  31.  * @see http://php.net/mbstring
  32.  *
  33.  * $Id: utf8.php 3769 2008-12-15 00:48:56Z zombor $
  34.  *
  35.  * @package Core
  36.  * @author Kohana Team
  37.  * @copyright (c) 2007 Kohana Team
  38.  * @copyright (c) 2005 Harry Fuecks
  39.  * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
  40.  */
  41.  
  42. if (!preg_match('/^.$/u', 'ñ')) {
  43. throw new Exception('PCRE is missing UTF-8 support');
  44. }
  45.  
  46. if (!extension_loaded('iconv')) {
  47. throw new Exception('PHP iconv extension not loaded');
  48. }
  49.  
  50. if (defined('MB_OVERLOAD_STRING') and extension_loaded('mbstring') and
  51. (ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING)) {
  52. throw new Exception('String functions overloaded by mbstring');
  53. }
  54.  
  55. // Convert all global variables to UTF-8.
  56. $_GET = utf8::clean($_GET);
  57. $_POST = utf8::clean($_POST);
  58. $_COOKIE = utf8::clean($_COOKIE);
  59. $_SERVER = utf8::clean($_SERVER);
  60.  
  61. if (PHP_SAPI == 'cli')
  62. {
  63. // Convert command line arguments
  64. $_SERVER['argv'] = utf8::clean($_SERVER['argv']);
  65. }
  66.  
  67. final class utf8 {
  68.  
  69. /**
  70.   * Recursively cleans arrays, objects, and strings. Removes ASCII control
  71.   * codes and converts to UTF-8 while silently discarding incompatible
  72.   * UTF-8 characters.
  73.   *
  74.   * @param string|array|object Thing to clean
  75.   * @return string|array|object
  76.   */
  77. public static function clean($str)
  78. {
  79. if (is_array($str) or is_object($str)) {
  80. foreach ($str as $key => $val) {
  81. // Recursion!
  82. $str[self::clean($key)] = self::clean($val);
  83. }
  84. } elseif (is_string($str) and $str !== '') {
  85. // Remove control characters
  86. $str = self::stripAsciiCtrl($str);
  87.  
  88. if (!self::isAscii($str)) {
  89. // Disable notices
  90. $ER = error_reporting(~E_NOTICE);
  91.  
  92. // iconv is expensive, so it is only used when needed
  93. $str = iconv('UTF-8', 'UTF-8//IGNORE', $str);
  94.  
  95. // Turn notices back on
  96. }
  97. }
  98.  
  99. return $str;
  100. }
  101.  
  102. /**
  103.   * Tests whether a string contains only 7bit ASCII bytes. This is used to
  104.   * determine when to use native functions or UTF-8 functions.
  105.   *
  106.   * @param string string to check
  107.   * @return bool
  108.   */
  109. public static function isAscii($str)
  110. {
  111. return ! preg_match('/[^\x00-\x7F]/S', $str);
  112. }
  113.  
  114. /**
  115.   * Strips out device control codes in the ASCII range.
  116.   *
  117.   * @param string string to clean
  118.   * @return string
  119.   */
  120. public static function stripAsciiCtrl($str)
  121. {
  122. return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str);
  123. }
  124.  
  125. /**
  126.   * Strips out all non-7bit ASCII bytes.
  127.   *
  128.   * @param string string to clean
  129.   * @return string
  130.   */
  131. public static function stripNonAscii($str)
  132. {
  133. return preg_replace('/[^\x00-\x7F]+/S', '', $str);
  134. }
  135.  
  136. } // End utf8
  137.