Chris@0: Chris@0: * Chris@0: * For the full copyright and license information, please view the LICENSE Chris@0: * file that was distributed with this source code. Chris@0: */ Chris@0: Chris@0: namespace Symfony\Polyfill\Iconv; Chris@0: Chris@0: /** Chris@0: * iconv implementation in pure PHP, UTF-8 centric. Chris@0: * Chris@0: * Implemented: Chris@0: * - iconv - Convert string to requested character encoding Chris@0: * - iconv_mime_decode - Decodes a MIME header field Chris@0: * - iconv_mime_decode_headers - Decodes multiple MIME header fields at once Chris@0: * - iconv_get_encoding - Retrieve internal configuration variables of iconv extension Chris@0: * - iconv_set_encoding - Set current setting for character encoding conversion Chris@0: * - iconv_mime_encode - Composes a MIME header field Chris@0: * - iconv_strlen - Returns the character count of string Chris@0: * - iconv_strpos - Finds position of first occurrence of a needle within a haystack Chris@0: * - iconv_strrpos - Finds the last occurrence of a needle within a haystack Chris@0: * - iconv_substr - Cut out part of a string Chris@0: * Chris@0: * Charsets available for conversion are defined by files Chris@0: * in the charset/ directory and by Iconv::$alias below. Chris@0: * You're welcome to send back any addition you make. Chris@0: * Chris@0: * @author Nicolas Grekas Chris@0: * Chris@0: * @internal Chris@0: */ Chris@0: final class Iconv Chris@0: { Chris@0: const ERROR_ILLEGAL_CHARACTER = 'iconv(): Detected an illegal character in input string'; Chris@0: const ERROR_WRONG_CHARSET = 'iconv(): Wrong charset, conversion from `%s\' to `%s\' is not allowed'; Chris@0: Chris@0: public static $inputEncoding = 'utf-8'; Chris@0: public static $outputEncoding = 'utf-8'; Chris@0: public static $internalEncoding = 'utf-8'; Chris@0: Chris@0: private static $alias = array( Chris@0: 'utf8' => 'utf-8', Chris@0: 'ascii' => 'us-ascii', Chris@0: 'tis-620' => 'iso-8859-11', Chris@0: 'cp1250' => 'windows-1250', Chris@0: 'cp1251' => 'windows-1251', Chris@0: 'cp1252' => 'windows-1252', Chris@0: 'cp1253' => 'windows-1253', Chris@0: 'cp1254' => 'windows-1254', Chris@0: 'cp1255' => 'windows-1255', Chris@0: 'cp1256' => 'windows-1256', Chris@0: 'cp1257' => 'windows-1257', Chris@0: 'cp1258' => 'windows-1258', Chris@0: 'shift-jis' => 'cp932', Chris@0: 'shift_jis' => 'cp932', Chris@0: 'latin1' => 'iso-8859-1', Chris@0: 'latin2' => 'iso-8859-2', Chris@0: 'latin3' => 'iso-8859-3', Chris@0: 'latin4' => 'iso-8859-4', Chris@0: 'latin5' => 'iso-8859-9', Chris@0: 'latin6' => 'iso-8859-10', Chris@0: 'latin7' => 'iso-8859-13', Chris@0: 'latin8' => 'iso-8859-14', Chris@0: 'latin9' => 'iso-8859-15', Chris@0: 'latin10' => 'iso-8859-16', Chris@0: 'iso8859-1' => 'iso-8859-1', Chris@0: 'iso8859-2' => 'iso-8859-2', Chris@0: 'iso8859-3' => 'iso-8859-3', Chris@0: 'iso8859-4' => 'iso-8859-4', Chris@0: 'iso8859-5' => 'iso-8859-5', Chris@0: 'iso8859-6' => 'iso-8859-6', Chris@0: 'iso8859-7' => 'iso-8859-7', Chris@0: 'iso8859-8' => 'iso-8859-8', Chris@0: 'iso8859-9' => 'iso-8859-9', Chris@0: 'iso8859-10' => 'iso-8859-10', Chris@0: 'iso8859-11' => 'iso-8859-11', Chris@0: 'iso8859-12' => 'iso-8859-12', Chris@0: 'iso8859-13' => 'iso-8859-13', Chris@0: 'iso8859-14' => 'iso-8859-14', Chris@0: 'iso8859-15' => 'iso-8859-15', Chris@0: 'iso8859-16' => 'iso-8859-16', Chris@0: 'iso_8859-1' => 'iso-8859-1', Chris@0: 'iso_8859-2' => 'iso-8859-2', Chris@0: 'iso_8859-3' => 'iso-8859-3', Chris@0: 'iso_8859-4' => 'iso-8859-4', Chris@0: 'iso_8859-5' => 'iso-8859-5', Chris@0: 'iso_8859-6' => 'iso-8859-6', Chris@0: 'iso_8859-7' => 'iso-8859-7', Chris@0: 'iso_8859-8' => 'iso-8859-8', Chris@0: 'iso_8859-9' => 'iso-8859-9', Chris@0: 'iso_8859-10' => 'iso-8859-10', Chris@0: 'iso_8859-11' => 'iso-8859-11', Chris@0: 'iso_8859-12' => 'iso-8859-12', Chris@0: 'iso_8859-13' => 'iso-8859-13', Chris@0: 'iso_8859-14' => 'iso-8859-14', Chris@0: 'iso_8859-15' => 'iso-8859-15', Chris@0: 'iso_8859-16' => 'iso-8859-16', Chris@0: 'iso88591' => 'iso-8859-1', Chris@0: 'iso88592' => 'iso-8859-2', Chris@0: 'iso88593' => 'iso-8859-3', Chris@0: 'iso88594' => 'iso-8859-4', Chris@0: 'iso88595' => 'iso-8859-5', Chris@0: 'iso88596' => 'iso-8859-6', Chris@0: 'iso88597' => 'iso-8859-7', Chris@0: 'iso88598' => 'iso-8859-8', Chris@0: 'iso88599' => 'iso-8859-9', Chris@0: 'iso885910' => 'iso-8859-10', Chris@0: 'iso885911' => 'iso-8859-11', Chris@0: 'iso885912' => 'iso-8859-12', Chris@0: 'iso885913' => 'iso-8859-13', Chris@0: 'iso885914' => 'iso-8859-14', Chris@0: 'iso885915' => 'iso-8859-15', Chris@0: 'iso885916' => 'iso-8859-16', Chris@0: ); Chris@0: private static $translitMap = array(); Chris@0: private static $convertMap = array(); Chris@0: private static $errorHandler; Chris@0: private static $lastError; Chris@0: Chris@0: private static $ulenMask = array("\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4); Chris@0: private static $isValidUtf8; Chris@0: Chris@0: public static function iconv($inCharset, $outCharset, $str) Chris@0: { Chris@12: $str = (string) $str; Chris@12: if ('' === $str) { Chris@0: return ''; Chris@0: } Chris@0: Chris@0: // Prepare for //IGNORE and //TRANSLIT Chris@0: Chris@0: $translit = $ignore = ''; Chris@0: Chris@0: $outCharset = strtolower($outCharset); Chris@0: $inCharset = strtolower($inCharset); Chris@0: Chris@0: if ('' === $outCharset) { Chris@0: $outCharset = 'iso-8859-1'; Chris@0: } Chris@0: if ('' === $inCharset) { Chris@0: $inCharset = 'iso-8859-1'; Chris@0: } Chris@0: Chris@12: do { Chris@12: $loop = false; Chris@0: Chris@12: if ('//translit' === substr($outCharset, -10)) { Chris@12: $loop = $translit = true; Chris@12: $outCharset = substr($outCharset, 0, -10); Chris@12: } Chris@0: Chris@12: if ('//ignore' === substr($outCharset, -8)) { Chris@12: $loop = $ignore = true; Chris@12: $outCharset = substr($outCharset, 0, -8); Chris@12: } Chris@12: } while ($loop); Chris@12: Chris@12: do { Chris@12: $loop = false; Chris@12: Chris@12: if ('//translit' === substr($inCharset, -10)) { Chris@12: $loop = true; Chris@12: $inCharset = substr($inCharset, 0, -10); Chris@12: } Chris@12: Chris@12: if ('//ignore' === substr($inCharset, -8)) { Chris@12: $loop = true; Chris@12: $inCharset = substr($inCharset, 0, -8); Chris@12: } Chris@12: } while ($loop); Chris@0: Chris@17: if (isset(self::$alias[$inCharset])) { Chris@17: $inCharset = self::$alias[$inCharset]; Chris@0: } Chris@0: if (isset(self::$alias[$outCharset])) { Chris@0: $outCharset = self::$alias[$outCharset]; Chris@0: } Chris@0: Chris@0: // Load charset maps Chris@0: Chris@0: if (('utf-8' !== $inCharset && !self::loadMap('from.', $inCharset, $inMap)) Chris@0: || ('utf-8' !== $outCharset && !self::loadMap('to.', $outCharset, $outMap))) { Chris@0: trigger_error(sprintf(self::ERROR_WRONG_CHARSET, $inCharset, $outCharset)); Chris@0: Chris@0: return false; Chris@0: } Chris@0: Chris@0: if ('utf-8' !== $inCharset) { Chris@0: // Convert input to UTF-8 Chris@0: $result = ''; Chris@0: if (self::mapToUtf8($result, $inMap, $str, $ignore)) { Chris@0: $str = $result; Chris@0: } else { Chris@0: $str = false; Chris@0: } Chris@0: self::$isValidUtf8 = true; Chris@0: } else { Chris@0: self::$isValidUtf8 = preg_match('//u', $str); Chris@0: Chris@0: if (!self::$isValidUtf8 && !$ignore) { Chris@0: trigger_error(self::ERROR_ILLEGAL_CHARACTER); Chris@0: Chris@0: return false; Chris@0: } Chris@0: Chris@0: if ('utf-8' === $outCharset) { Chris@0: // UTF-8 validation Chris@0: $str = self::utf8ToUtf8($str, $ignore); Chris@0: } Chris@0: } Chris@0: Chris@0: if ('utf-8' !== $outCharset && false !== $str) { Chris@0: // Convert output to UTF-8 Chris@0: $result = ''; Chris@0: if (self::mapFromUtf8($result, $outMap, $str, $ignore, $translit)) { Chris@0: return $result; Chris@0: } Chris@0: Chris@0: return false; Chris@0: } Chris@0: Chris@0: return $str; Chris@0: } Chris@0: Chris@0: public static function iconv_mime_decode_headers($str, $mode = 0, $charset = null) Chris@0: { Chris@0: if (null === $charset) { Chris@0: $charset = self::$internalEncoding; Chris@0: } Chris@0: Chris@0: if (false !== strpos($str, "\r")) { Chris@0: $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n"); Chris@0: } Chris@0: $str = explode("\n\n", $str, 2); Chris@0: Chris@0: $headers = array(); Chris@0: Chris@0: $str = preg_split('/\n(?![ \t])/', $str[0]); Chris@0: foreach ($str as $str) { Chris@0: $str = self::iconv_mime_decode($str, $mode, $charset); Chris@0: if (false === $str) { Chris@0: return false; Chris@0: } Chris@0: $str = explode(':', $str, 2); Chris@0: Chris@16: if (2 === \count($str)) { Chris@0: if (isset($headers[$str[0]])) { Chris@16: if (!\is_array($headers[$str[0]])) { Chris@0: $headers[$str[0]] = array($headers[$str[0]]); Chris@0: } Chris@0: $headers[$str[0]][] = ltrim($str[1]); Chris@0: } else { Chris@0: $headers[$str[0]] = ltrim($str[1]); Chris@0: } Chris@0: } Chris@0: } Chris@0: Chris@0: return $headers; Chris@0: } Chris@0: Chris@0: public static function iconv_mime_decode($str, $mode = 0, $charset = null) Chris@0: { Chris@0: if (null === $charset) { Chris@0: $charset = self::$internalEncoding; Chris@0: } Chris@0: if (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) { Chris@0: $charset .= '//IGNORE'; Chris@0: } Chris@0: Chris@0: if (false !== strpos($str, "\r")) { Chris@0: $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n"); Chris@0: } Chris@0: $str = preg_split('/\n(?![ \t])/', rtrim($str), 2); Chris@0: $str = preg_replace('/[ \t]*\n[ \t]+/', ' ', rtrim($str[0])); Chris@0: $str = preg_split('/=\?([^?]+)\?([bqBQ])\?(.*?)\?=/', $str, -1, PREG_SPLIT_DELIM_CAPTURE); Chris@0: Chris@0: $result = self::iconv('utf-8', $charset, $str[0]); Chris@0: if (false === $result) { Chris@0: return false; Chris@0: } Chris@0: Chris@0: $i = 1; Chris@16: $len = \count($str); Chris@0: Chris@0: while ($i < $len) { Chris@0: $c = strtolower($str[$i]); Chris@0: if ((ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) Chris@0: && 'utf-8' !== $c Chris@0: && !isset(self::$alias[$c]) Chris@17: && !self::loadMap('from.', $c, $d)) { Chris@0: $d = false; Chris@0: } elseif ('B' === strtoupper($str[$i + 1])) { Chris@0: $d = base64_decode($str[$i + 2]); Chris@0: } else { Chris@0: $d = rawurldecode(strtr(str_replace('%', '%25', $str[$i + 2]), '=_', '% ')); Chris@0: } Chris@0: Chris@0: if (false !== $d) { Chris@0: if ('' !== $d) { Chris@0: if ('' === $d = self::iconv($c, $charset, $d)) { Chris@0: $str[$i + 3] = substr($str[$i + 3], 1); Chris@0: } else { Chris@0: $result .= $d; Chris@0: } Chris@0: } Chris@0: $d = self::iconv('utf-8', $charset, $str[$i + 3]); Chris@0: if ('' !== trim($d)) { Chris@0: $result .= $d; Chris@0: } Chris@0: } elseif (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) { Chris@0: $result .= "=?{$str[$i]}?{$str[$i + 1]}?{$str[$i + 2]}?={$str[$i + 3]}"; Chris@0: } else { Chris@0: $result = false; Chris@0: break; Chris@0: } Chris@0: Chris@0: $i += 4; Chris@0: } Chris@0: Chris@0: return $result; Chris@0: } Chris@0: Chris@0: public static function iconv_get_encoding($type = 'all') Chris@0: { Chris@0: switch ($type) { Chris@0: case 'input_encoding': return self::$inputEncoding; Chris@0: case 'output_encoding': return self::$outputEncoding; Chris@0: case 'internal_encoding': return self::$internalEncoding; Chris@0: } Chris@0: Chris@0: return array( Chris@0: 'input_encoding' => self::$inputEncoding, Chris@0: 'output_encoding' => self::$outputEncoding, Chris@0: 'internal_encoding' => self::$internalEncoding, Chris@0: ); Chris@0: } Chris@0: Chris@0: public static function iconv_set_encoding($type, $charset) Chris@0: { Chris@0: switch ($type) { Chris@0: case 'input_encoding': self::$inputEncoding = $charset; break; Chris@0: case 'output_encoding': self::$outputEncoding = $charset; break; Chris@0: case 'internal_encoding': self::$internalEncoding = $charset; break; Chris@0: Chris@0: default: return false; Chris@0: } Chris@0: Chris@0: return true; Chris@0: } Chris@0: Chris@0: public static function iconv_mime_encode($fieldName, $fieldValue, $pref = null) Chris@0: { Chris@16: if (!\is_array($pref)) { Chris@0: $pref = array(); Chris@0: } Chris@0: Chris@0: $pref += array( Chris@0: 'scheme' => 'B', Chris@0: 'input-charset' => self::$internalEncoding, Chris@0: 'output-charset' => self::$internalEncoding, Chris@0: 'line-length' => 76, Chris@0: 'line-break-chars' => "\r\n", Chris@0: ); Chris@0: Chris@0: if (preg_match('/[\x80-\xFF]/', $fieldName)) { Chris@0: $fieldName = ''; Chris@0: } Chris@0: Chris@0: $scheme = strtoupper(substr($pref['scheme'], 0, 1)); Chris@0: $in = strtolower($pref['input-charset']); Chris@0: $out = strtolower($pref['output-charset']); Chris@0: Chris@0: if ('utf-8' !== $in && false === $fieldValue = self::iconv($in, 'utf-8', $fieldValue)) { Chris@0: return false; Chris@0: } Chris@0: Chris@0: preg_match_all('/./us', $fieldValue, $chars); Chris@0: Chris@0: $chars = isset($chars[0]) ? $chars[0] : array(); Chris@0: Chris@0: $lineBreak = (int) $pref['line-length']; Chris@0: $lineStart = "=?{$pref['output-charset']}?{$scheme}?"; Chris@16: $lineLength = \strlen($fieldName) + 2 + \strlen($lineStart) + 2; Chris@16: $lineOffset = \strlen($lineStart) + 3; Chris@0: $lineData = ''; Chris@0: Chris@0: $fieldValue = array(); Chris@0: Chris@0: $Q = 'Q' === $scheme; Chris@0: Chris@0: foreach ($chars as $c) { Chris@0: if ('utf-8' !== $out && false === $c = self::iconv('utf-8', $out, $c)) { Chris@0: return false; Chris@0: } Chris@0: Chris@0: $o = $Q Chris@0: ? $c = preg_replace_callback( Chris@0: '/[=_\?\x00-\x1F\x80-\xFF]/', Chris@0: array(__CLASS__, 'qpByteCallback'), Chris@0: $c Chris@0: ) Chris@0: : base64_encode($lineData.$c); Chris@0: Chris@0: if (isset($o[$lineBreak - $lineLength])) { Chris@0: if (!$Q) { Chris@0: $lineData = base64_encode($lineData); Chris@0: } Chris@0: $fieldValue[] = $lineStart.$lineData.'?='; Chris@0: $lineLength = $lineOffset; Chris@0: $lineData = ''; Chris@0: } Chris@0: Chris@0: $lineData .= $c; Chris@16: $Q && $lineLength += \strlen($c); Chris@0: } Chris@0: Chris@0: if ('' !== $lineData) { Chris@0: if (!$Q) { Chris@0: $lineData = base64_encode($lineData); Chris@0: } Chris@0: $fieldValue[] = $lineStart.$lineData.'?='; Chris@0: } Chris@0: Chris@0: return $fieldName.': '.implode($pref['line-break-chars'].' ', $fieldValue); Chris@0: } Chris@0: Chris@0: public static function iconv_strlen($s, $encoding = null) Chris@0: { Chris@0: static $hasXml = null; Chris@0: if (null === $hasXml) { Chris@17: $hasXml = \extension_loaded('xml'); Chris@0: } Chris@0: Chris@0: if ($hasXml) { Chris@0: return self::strlen1($s, $encoding); Chris@0: } Chris@0: Chris@0: return self::strlen2($s, $encoding); Chris@0: } Chris@0: Chris@0: public static function strlen1($s, $encoding = null) Chris@0: { Chris@0: if (null === $encoding) { Chris@0: $encoding = self::$internalEncoding; Chris@0: } Chris@0: if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) { Chris@0: return false; Chris@0: } Chris@0: Chris@16: return \strlen(utf8_decode($s)); Chris@0: } Chris@0: Chris@0: public static function strlen2($s, $encoding = null) Chris@0: { Chris@0: if (null === $encoding) { Chris@0: $encoding = self::$internalEncoding; Chris@0: } Chris@0: if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) { Chris@0: return false; Chris@0: } Chris@0: Chris@0: $ulenMask = self::$ulenMask; Chris@0: Chris@0: $i = 0; Chris@0: $j = 0; Chris@16: $len = \strlen($s); Chris@0: Chris@0: while ($i < $len) { Chris@0: $u = $s[$i] & "\xF0"; Chris@0: $i += isset($ulenMask[$u]) ? $ulenMask[$u] : 1; Chris@0: ++$j; Chris@0: } Chris@0: Chris@0: return $j; Chris@0: } Chris@0: Chris@0: public static function iconv_strpos($haystack, $needle, $offset = 0, $encoding = null) Chris@0: { Chris@0: if (null === $encoding) { Chris@0: $encoding = self::$internalEncoding; Chris@0: } Chris@0: Chris@0: if (0 !== stripos($encoding, 'utf-8')) { Chris@0: if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) { Chris@0: return false; Chris@0: } Chris@0: if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) { Chris@0: return false; Chris@0: } Chris@0: } Chris@0: Chris@0: if ($offset = (int) $offset) { Chris@0: $haystack = self::iconv_substr($haystack, $offset, 2147483647, 'utf-8'); Chris@0: } Chris@0: $pos = strpos($haystack, $needle); Chris@0: Chris@0: return false === $pos ? false : ($offset + ($pos ? self::iconv_strlen(substr($haystack, 0, $pos), 'utf-8') : 0)); Chris@0: } Chris@0: Chris@0: public static function iconv_strrpos($haystack, $needle, $encoding = null) Chris@0: { Chris@0: if (null === $encoding) { Chris@0: $encoding = self::$internalEncoding; Chris@0: } Chris@0: Chris@0: if (0 !== stripos($encoding, 'utf-8')) { Chris@0: if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) { Chris@0: return false; Chris@0: } Chris@0: if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) { Chris@0: return false; Chris@0: } Chris@0: } Chris@0: Chris@0: $pos = isset($needle[0]) ? strrpos($haystack, $needle) : false; Chris@0: Chris@0: return false === $pos ? false : self::iconv_strlen($pos ? substr($haystack, 0, $pos) : $haystack, 'utf-8'); Chris@0: } Chris@0: Chris@0: public static function iconv_substr($s, $start, $length = 2147483647, $encoding = null) Chris@0: { Chris@0: if (null === $encoding) { Chris@0: $encoding = self::$internalEncoding; Chris@0: } Chris@0: if (0 !== stripos($encoding, 'utf-8')) { Chris@0: $encoding = null; Chris@0: } elseif (false === $s = self::iconv($encoding, 'utf-8', $s)) { Chris@0: return false; Chris@0: } Chris@0: Chris@12: $s = (string) $s; Chris@0: $slen = self::iconv_strlen($s, 'utf-8'); Chris@0: $start = (int) $start; Chris@0: Chris@0: if (0 > $start) { Chris@0: $start += $slen; Chris@0: } Chris@0: if (0 > $start) { Chris@0: return false; Chris@0: } Chris@0: if ($start >= $slen) { Chris@0: return false; Chris@0: } Chris@0: Chris@0: $rx = $slen - $start; Chris@0: Chris@0: if (0 > $length) { Chris@0: $length += $rx; Chris@0: } Chris@0: if (0 === $length) { Chris@0: return ''; Chris@0: } Chris@0: if (0 > $length) { Chris@0: return false; Chris@0: } Chris@0: Chris@0: if ($length > $rx) { Chris@0: $length = $rx; Chris@0: } Chris@0: Chris@0: $rx = '/^'.($start ? self::pregOffset($start) : '').'('.self::pregOffset($length).')/u'; Chris@0: Chris@0: $s = preg_match($rx, $s, $s) ? $s[1] : ''; Chris@0: Chris@0: if (null === $encoding) { Chris@0: return $s; Chris@0: } Chris@0: Chris@0: return self::iconv('utf-8', $encoding, $s); Chris@0: } Chris@0: Chris@0: private static function loadMap($type, $charset, &$map) Chris@0: { Chris@0: if (!isset(self::$convertMap[$type.$charset])) { Chris@0: if (false === $map = self::getData($type.$charset)) { Chris@0: if ('to.' === $type && self::loadMap('from.', $charset, $map)) { Chris@0: $map = array_flip($map); Chris@0: } else { Chris@0: return false; Chris@0: } Chris@0: } Chris@0: Chris@0: self::$convertMap[$type.$charset] = $map; Chris@0: } else { Chris@0: $map = self::$convertMap[$type.$charset]; Chris@0: } Chris@0: Chris@0: return true; Chris@0: } Chris@0: Chris@0: private static function utf8ToUtf8($str, $ignore) Chris@0: { Chris@0: $ulenMask = self::$ulenMask; Chris@0: $valid = self::$isValidUtf8; Chris@0: Chris@0: $u = $str; Chris@0: $i = $j = 0; Chris@16: $len = \strlen($str); Chris@0: Chris@0: while ($i < $len) { Chris@0: if ($str[$i] < "\x80") { Chris@0: $u[$j++] = $str[$i++]; Chris@0: } else { Chris@0: $ulen = $str[$i] & "\xF0"; Chris@0: $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1; Chris@0: $uchr = substr($str, $i, $ulen); Chris@0: Chris@0: if (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr))) { Chris@0: if ($ignore) { Chris@0: ++$i; Chris@0: continue; Chris@0: } Chris@0: Chris@0: trigger_error(self::ERROR_ILLEGAL_CHARACTER); Chris@0: Chris@0: return false; Chris@0: } else { Chris@0: $i += $ulen; Chris@0: } Chris@0: Chris@0: $u[$j++] = $uchr[0]; Chris@0: Chris@0: isset($uchr[1]) && 0 !== ($u[$j++] = $uchr[1]) Chris@0: && isset($uchr[2]) && 0 !== ($u[$j++] = $uchr[2]) Chris@0: && isset($uchr[3]) && 0 !== ($u[$j++] = $uchr[3]); Chris@0: } Chris@0: } Chris@0: Chris@0: return substr($u, 0, $j); Chris@0: } Chris@0: Chris@12: private static function mapToUtf8(&$result, array $map, $str, $ignore) Chris@0: { Chris@16: $len = \strlen($str); Chris@0: for ($i = 0; $i < $len; ++$i) { Chris@0: if (isset($str[$i + 1], $map[$str[$i].$str[$i + 1]])) { Chris@0: $result .= $map[$str[$i].$str[++$i]]; Chris@0: } elseif (isset($map[$str[$i]])) { Chris@0: $result .= $map[$str[$i]]; Chris@0: } elseif (!$ignore) { Chris@0: trigger_error(self::ERROR_ILLEGAL_CHARACTER); Chris@0: Chris@0: return false; Chris@0: } Chris@0: } Chris@0: Chris@0: return true; Chris@0: } Chris@0: Chris@12: private static function mapFromUtf8(&$result, array $map, $str, $ignore, $translit) Chris@0: { Chris@0: $ulenMask = self::$ulenMask; Chris@0: $valid = self::$isValidUtf8; Chris@0: Chris@0: if ($translit && !self::$translitMap) { Chris@0: self::$translitMap = self::getData('translit'); Chris@0: } Chris@0: Chris@0: $i = 0; Chris@16: $len = \strlen($str); Chris@0: Chris@0: while ($i < $len) { Chris@0: if ($str[$i] < "\x80") { Chris@0: $uchr = $str[$i++]; Chris@0: } else { Chris@0: $ulen = $str[$i] & "\xF0"; Chris@0: $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1; Chris@0: $uchr = substr($str, $i, $ulen); Chris@0: Chris@0: if ($ignore && (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr)))) { Chris@0: ++$i; Chris@0: continue; Chris@0: } else { Chris@0: $i += $ulen; Chris@0: } Chris@0: } Chris@0: Chris@0: if (isset($map[$uchr])) { Chris@0: $result .= $map[$uchr]; Chris@0: } elseif ($translit) { Chris@0: if (isset(self::$translitMap[$uchr])) { Chris@0: $uchr = self::$translitMap[$uchr]; Chris@0: } elseif ($uchr >= "\xC3\x80") { Chris@0: $uchr = \Normalizer::normalize($uchr, \Normalizer::NFD); Chris@0: Chris@0: if ($uchr[0] < "\x80") { Chris@0: $uchr = $uchr[0]; Chris@0: } elseif ($ignore) { Chris@0: continue; Chris@0: } else { Chris@0: return false; Chris@0: } Chris@17: } elseif ($ignore) { Chris@17: continue; Chris@17: } else { Chris@17: return false; Chris@0: } Chris@0: Chris@0: $str = $uchr.substr($str, $i); Chris@16: $len = \strlen($str); Chris@0: $i = 0; Chris@0: } elseif (!$ignore) { Chris@0: return false; Chris@0: } Chris@0: } Chris@0: Chris@0: return true; Chris@0: } Chris@0: Chris@12: private static function qpByteCallback(array $m) Chris@0: { Chris@16: return '='.strtoupper(dechex(\ord($m[0]))); Chris@0: } Chris@0: Chris@0: private static function pregOffset($offset) Chris@0: { Chris@0: $rx = array(); Chris@0: $offset = (int) $offset; Chris@0: Chris@0: while ($offset > 65535) { Chris@0: $rx[] = '.{65535}'; Chris@0: $offset -= 65535; Chris@0: } Chris@0: Chris@0: return implode('', $rx).'.{'.$offset.'}'; Chris@0: } Chris@0: Chris@0: private static function getData($file) Chris@0: { Chris@0: if (file_exists($file = __DIR__.'/Resources/charset/'.$file.'.php')) { Chris@0: return require $file; Chris@0: } Chris@0: Chris@0: return false; Chris@0: } Chris@0: }