annotate vendor/symfony/polyfill-iconv/Iconv.php @ 0:4c8ae668cc8c

Initial import (non-working)
author Chris Cannam
date Wed, 29 Nov 2017 16:09:58 +0000
parents
children 7a779792577d
rev   line source
Chris@0 1 <?php
Chris@0 2
Chris@0 3 /*
Chris@0 4 * This file is part of the Symfony package.
Chris@0 5 *
Chris@0 6 * (c) Fabien Potencier <fabien@symfony.com>
Chris@0 7 *
Chris@0 8 * For the full copyright and license information, please view the LICENSE
Chris@0 9 * file that was distributed with this source code.
Chris@0 10 */
Chris@0 11
Chris@0 12 namespace Symfony\Polyfill\Iconv;
Chris@0 13
Chris@0 14 /**
Chris@0 15 * iconv implementation in pure PHP, UTF-8 centric.
Chris@0 16 *
Chris@0 17 * Implemented:
Chris@0 18 * - iconv - Convert string to requested character encoding
Chris@0 19 * - iconv_mime_decode - Decodes a MIME header field
Chris@0 20 * - iconv_mime_decode_headers - Decodes multiple MIME header fields at once
Chris@0 21 * - iconv_get_encoding - Retrieve internal configuration variables of iconv extension
Chris@0 22 * - iconv_set_encoding - Set current setting for character encoding conversion
Chris@0 23 * - iconv_mime_encode - Composes a MIME header field
Chris@0 24 * - iconv_strlen - Returns the character count of string
Chris@0 25 * - iconv_strpos - Finds position of first occurrence of a needle within a haystack
Chris@0 26 * - iconv_strrpos - Finds the last occurrence of a needle within a haystack
Chris@0 27 * - iconv_substr - Cut out part of a string
Chris@0 28 *
Chris@0 29 * Charsets available for conversion are defined by files
Chris@0 30 * in the charset/ directory and by Iconv::$alias below.
Chris@0 31 * You're welcome to send back any addition you make.
Chris@0 32 *
Chris@0 33 * @author Nicolas Grekas <p@tchwork.com>
Chris@0 34 *
Chris@0 35 * @internal
Chris@0 36 */
Chris@0 37 final class Iconv
Chris@0 38 {
Chris@0 39 const ERROR_ILLEGAL_CHARACTER = 'iconv(): Detected an illegal character in input string';
Chris@0 40 const ERROR_WRONG_CHARSET = 'iconv(): Wrong charset, conversion from `%s\' to `%s\' is not allowed';
Chris@0 41
Chris@0 42 public static $inputEncoding = 'utf-8';
Chris@0 43 public static $outputEncoding = 'utf-8';
Chris@0 44 public static $internalEncoding = 'utf-8';
Chris@0 45
Chris@0 46 private static $alias = array(
Chris@0 47 'utf8' => 'utf-8',
Chris@0 48 'ascii' => 'us-ascii',
Chris@0 49 'tis-620' => 'iso-8859-11',
Chris@0 50 'cp1250' => 'windows-1250',
Chris@0 51 'cp1251' => 'windows-1251',
Chris@0 52 'cp1252' => 'windows-1252',
Chris@0 53 'cp1253' => 'windows-1253',
Chris@0 54 'cp1254' => 'windows-1254',
Chris@0 55 'cp1255' => 'windows-1255',
Chris@0 56 'cp1256' => 'windows-1256',
Chris@0 57 'cp1257' => 'windows-1257',
Chris@0 58 'cp1258' => 'windows-1258',
Chris@0 59 'shift-jis' => 'cp932',
Chris@0 60 'shift_jis' => 'cp932',
Chris@0 61 'latin1' => 'iso-8859-1',
Chris@0 62 'latin2' => 'iso-8859-2',
Chris@0 63 'latin3' => 'iso-8859-3',
Chris@0 64 'latin4' => 'iso-8859-4',
Chris@0 65 'latin5' => 'iso-8859-9',
Chris@0 66 'latin6' => 'iso-8859-10',
Chris@0 67 'latin7' => 'iso-8859-13',
Chris@0 68 'latin8' => 'iso-8859-14',
Chris@0 69 'latin9' => 'iso-8859-15',
Chris@0 70 'latin10' => 'iso-8859-16',
Chris@0 71 'iso8859-1' => 'iso-8859-1',
Chris@0 72 'iso8859-2' => 'iso-8859-2',
Chris@0 73 'iso8859-3' => 'iso-8859-3',
Chris@0 74 'iso8859-4' => 'iso-8859-4',
Chris@0 75 'iso8859-5' => 'iso-8859-5',
Chris@0 76 'iso8859-6' => 'iso-8859-6',
Chris@0 77 'iso8859-7' => 'iso-8859-7',
Chris@0 78 'iso8859-8' => 'iso-8859-8',
Chris@0 79 'iso8859-9' => 'iso-8859-9',
Chris@0 80 'iso8859-10' => 'iso-8859-10',
Chris@0 81 'iso8859-11' => 'iso-8859-11',
Chris@0 82 'iso8859-12' => 'iso-8859-12',
Chris@0 83 'iso8859-13' => 'iso-8859-13',
Chris@0 84 'iso8859-14' => 'iso-8859-14',
Chris@0 85 'iso8859-15' => 'iso-8859-15',
Chris@0 86 'iso8859-16' => 'iso-8859-16',
Chris@0 87 'iso_8859-1' => 'iso-8859-1',
Chris@0 88 'iso_8859-2' => 'iso-8859-2',
Chris@0 89 'iso_8859-3' => 'iso-8859-3',
Chris@0 90 'iso_8859-4' => 'iso-8859-4',
Chris@0 91 'iso_8859-5' => 'iso-8859-5',
Chris@0 92 'iso_8859-6' => 'iso-8859-6',
Chris@0 93 'iso_8859-7' => 'iso-8859-7',
Chris@0 94 'iso_8859-8' => 'iso-8859-8',
Chris@0 95 'iso_8859-9' => 'iso-8859-9',
Chris@0 96 'iso_8859-10' => 'iso-8859-10',
Chris@0 97 'iso_8859-11' => 'iso-8859-11',
Chris@0 98 'iso_8859-12' => 'iso-8859-12',
Chris@0 99 'iso_8859-13' => 'iso-8859-13',
Chris@0 100 'iso_8859-14' => 'iso-8859-14',
Chris@0 101 'iso_8859-15' => 'iso-8859-15',
Chris@0 102 'iso_8859-16' => 'iso-8859-16',
Chris@0 103 'iso88591' => 'iso-8859-1',
Chris@0 104 'iso88592' => 'iso-8859-2',
Chris@0 105 'iso88593' => 'iso-8859-3',
Chris@0 106 'iso88594' => 'iso-8859-4',
Chris@0 107 'iso88595' => 'iso-8859-5',
Chris@0 108 'iso88596' => 'iso-8859-6',
Chris@0 109 'iso88597' => 'iso-8859-7',
Chris@0 110 'iso88598' => 'iso-8859-8',
Chris@0 111 'iso88599' => 'iso-8859-9',
Chris@0 112 'iso885910' => 'iso-8859-10',
Chris@0 113 'iso885911' => 'iso-8859-11',
Chris@0 114 'iso885912' => 'iso-8859-12',
Chris@0 115 'iso885913' => 'iso-8859-13',
Chris@0 116 'iso885914' => 'iso-8859-14',
Chris@0 117 'iso885915' => 'iso-8859-15',
Chris@0 118 'iso885916' => 'iso-8859-16',
Chris@0 119 );
Chris@0 120 private static $translitMap = array();
Chris@0 121 private static $convertMap = array();
Chris@0 122 private static $errorHandler;
Chris@0 123 private static $lastError;
Chris@0 124
Chris@0 125 private static $ulenMask = array("\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4);
Chris@0 126 private static $isValidUtf8;
Chris@0 127
Chris@0 128 public static function iconv($inCharset, $outCharset, $str)
Chris@0 129 {
Chris@0 130 if ('' === $str .= '') {
Chris@0 131 return '';
Chris@0 132 }
Chris@0 133
Chris@0 134 // Prepare for //IGNORE and //TRANSLIT
Chris@0 135
Chris@0 136 $translit = $ignore = '';
Chris@0 137
Chris@0 138 $outCharset = strtolower($outCharset);
Chris@0 139 $inCharset = strtolower($inCharset);
Chris@0 140
Chris@0 141 if ('' === $outCharset) {
Chris@0 142 $outCharset = 'iso-8859-1';
Chris@0 143 }
Chris@0 144 if ('' === $inCharset) {
Chris@0 145 $inCharset = 'iso-8859-1';
Chris@0 146 }
Chris@0 147
Chris@0 148 if ('//translit' === substr($outCharset, -10)) {
Chris@0 149 $translit = '//TRANSLIT';
Chris@0 150 $outCharset = substr($outCharset, 0, -10);
Chris@0 151 }
Chris@0 152
Chris@0 153 if ('//ignore' === substr($outCharset, -8)) {
Chris@0 154 $ignore = '//IGNORE';
Chris@0 155 $outCharset = substr($outCharset, 0, -8);
Chris@0 156 }
Chris@0 157
Chris@0 158 if ('//translit' === substr($inCharset, -10)) {
Chris@0 159 $inCharset = substr($inCharset, 0, -10);
Chris@0 160 }
Chris@0 161 if ('//ignore' === substr($inCharset, -8)) {
Chris@0 162 $inCharset = substr($inCharset, 0, -8);
Chris@0 163 }
Chris@0 164
Chris@0 165 if (isset(self::$alias[ $inCharset])) {
Chris@0 166 $inCharset = self::$alias[ $inCharset];
Chris@0 167 }
Chris@0 168 if (isset(self::$alias[$outCharset])) {
Chris@0 169 $outCharset = self::$alias[$outCharset];
Chris@0 170 }
Chris@0 171
Chris@0 172 // Load charset maps
Chris@0 173
Chris@0 174 if (('utf-8' !== $inCharset && !self::loadMap('from.', $inCharset, $inMap))
Chris@0 175 || ('utf-8' !== $outCharset && !self::loadMap('to.', $outCharset, $outMap))) {
Chris@0 176 trigger_error(sprintf(self::ERROR_WRONG_CHARSET, $inCharset, $outCharset));
Chris@0 177
Chris@0 178 return false;
Chris@0 179 }
Chris@0 180
Chris@0 181 if ('utf-8' !== $inCharset) {
Chris@0 182 // Convert input to UTF-8
Chris@0 183 $result = '';
Chris@0 184 if (self::mapToUtf8($result, $inMap, $str, $ignore)) {
Chris@0 185 $str = $result;
Chris@0 186 } else {
Chris@0 187 $str = false;
Chris@0 188 }
Chris@0 189 self::$isValidUtf8 = true;
Chris@0 190 } else {
Chris@0 191 self::$isValidUtf8 = preg_match('//u', $str);
Chris@0 192
Chris@0 193 if (!self::$isValidUtf8 && !$ignore) {
Chris@0 194 trigger_error(self::ERROR_ILLEGAL_CHARACTER);
Chris@0 195
Chris@0 196 return false;
Chris@0 197 }
Chris@0 198
Chris@0 199 if ('utf-8' === $outCharset) {
Chris@0 200 // UTF-8 validation
Chris@0 201 $str = self::utf8ToUtf8($str, $ignore);
Chris@0 202 }
Chris@0 203 }
Chris@0 204
Chris@0 205 if ('utf-8' !== $outCharset && false !== $str) {
Chris@0 206 // Convert output to UTF-8
Chris@0 207 $result = '';
Chris@0 208 if (self::mapFromUtf8($result, $outMap, $str, $ignore, $translit)) {
Chris@0 209 return $result;
Chris@0 210 }
Chris@0 211
Chris@0 212 return false;
Chris@0 213 }
Chris@0 214
Chris@0 215 return $str;
Chris@0 216 }
Chris@0 217
Chris@0 218 public static function iconv_mime_decode_headers($str, $mode = 0, $charset = null)
Chris@0 219 {
Chris@0 220 if (null === $charset) {
Chris@0 221 $charset = self::$internalEncoding;
Chris@0 222 }
Chris@0 223
Chris@0 224 if (false !== strpos($str, "\r")) {
Chris@0 225 $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");
Chris@0 226 }
Chris@0 227 $str = explode("\n\n", $str, 2);
Chris@0 228
Chris@0 229 $headers = array();
Chris@0 230
Chris@0 231 $str = preg_split('/\n(?![ \t])/', $str[0]);
Chris@0 232 foreach ($str as $str) {
Chris@0 233 $str = self::iconv_mime_decode($str, $mode, $charset);
Chris@0 234 if (false === $str) {
Chris@0 235 return false;
Chris@0 236 }
Chris@0 237 $str = explode(':', $str, 2);
Chris@0 238
Chris@0 239 if (2 === count($str)) {
Chris@0 240 if (isset($headers[$str[0]])) {
Chris@0 241 if (!is_array($headers[$str[0]])) {
Chris@0 242 $headers[$str[0]] = array($headers[$str[0]]);
Chris@0 243 }
Chris@0 244 $headers[$str[0]][] = ltrim($str[1]);
Chris@0 245 } else {
Chris@0 246 $headers[$str[0]] = ltrim($str[1]);
Chris@0 247 }
Chris@0 248 }
Chris@0 249 }
Chris@0 250
Chris@0 251 return $headers;
Chris@0 252 }
Chris@0 253
Chris@0 254 public static function iconv_mime_decode($str, $mode = 0, $charset = null)
Chris@0 255 {
Chris@0 256 if (null === $charset) {
Chris@0 257 $charset = self::$internalEncoding;
Chris@0 258 }
Chris@0 259 if (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {
Chris@0 260 $charset .= '//IGNORE';
Chris@0 261 }
Chris@0 262
Chris@0 263 if (false !== strpos($str, "\r")) {
Chris@0 264 $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");
Chris@0 265 }
Chris@0 266 $str = preg_split('/\n(?![ \t])/', rtrim($str), 2);
Chris@0 267 $str = preg_replace('/[ \t]*\n[ \t]+/', ' ', rtrim($str[0]));
Chris@0 268 $str = preg_split('/=\?([^?]+)\?([bqBQ])\?(.*?)\?=/', $str, -1, PREG_SPLIT_DELIM_CAPTURE);
Chris@0 269
Chris@0 270 $result = self::iconv('utf-8', $charset, $str[0]);
Chris@0 271 if (false === $result) {
Chris@0 272 return false;
Chris@0 273 }
Chris@0 274
Chris@0 275 $i = 1;
Chris@0 276 $len = count($str);
Chris@0 277
Chris@0 278 while ($i < $len) {
Chris@0 279 $c = strtolower($str[$i]);
Chris@0 280 if ((ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode)
Chris@0 281 && 'utf-8' !== $c
Chris@0 282 && !isset(self::$alias[$c])
Chris@0 283 && !self::loadMap('from.', $c, $d)) {
Chris@0 284 $d = false;
Chris@0 285 } elseif ('B' === strtoupper($str[$i + 1])) {
Chris@0 286 $d = base64_decode($str[$i + 2]);
Chris@0 287 } else {
Chris@0 288 $d = rawurldecode(strtr(str_replace('%', '%25', $str[$i + 2]), '=_', '% '));
Chris@0 289 }
Chris@0 290
Chris@0 291 if (false !== $d) {
Chris@0 292 if ('' !== $d) {
Chris@0 293 if ('' === $d = self::iconv($c, $charset, $d)) {
Chris@0 294 $str[$i + 3] = substr($str[$i + 3], 1);
Chris@0 295 } else {
Chris@0 296 $result .= $d;
Chris@0 297 }
Chris@0 298 }
Chris@0 299 $d = self::iconv('utf-8', $charset, $str[$i + 3]);
Chris@0 300 if ('' !== trim($d)) {
Chris@0 301 $result .= $d;
Chris@0 302 }
Chris@0 303 } elseif (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {
Chris@0 304 $result .= "=?{$str[$i]}?{$str[$i + 1]}?{$str[$i + 2]}?={$str[$i + 3]}";
Chris@0 305 } else {
Chris@0 306 $result = false;
Chris@0 307 break;
Chris@0 308 }
Chris@0 309
Chris@0 310 $i += 4;
Chris@0 311 }
Chris@0 312
Chris@0 313 return $result;
Chris@0 314 }
Chris@0 315
Chris@0 316 public static function iconv_get_encoding($type = 'all')
Chris@0 317 {
Chris@0 318 switch ($type) {
Chris@0 319 case 'input_encoding': return self::$inputEncoding;
Chris@0 320 case 'output_encoding': return self::$outputEncoding;
Chris@0 321 case 'internal_encoding': return self::$internalEncoding;
Chris@0 322 }
Chris@0 323
Chris@0 324 return array(
Chris@0 325 'input_encoding' => self::$inputEncoding,
Chris@0 326 'output_encoding' => self::$outputEncoding,
Chris@0 327 'internal_encoding' => self::$internalEncoding,
Chris@0 328 );
Chris@0 329 }
Chris@0 330
Chris@0 331 public static function iconv_set_encoding($type, $charset)
Chris@0 332 {
Chris@0 333 switch ($type) {
Chris@0 334 case 'input_encoding': self::$inputEncoding = $charset; break;
Chris@0 335 case 'output_encoding': self::$outputEncoding = $charset; break;
Chris@0 336 case 'internal_encoding': self::$internalEncoding = $charset; break;
Chris@0 337
Chris@0 338 default: return false;
Chris@0 339 }
Chris@0 340
Chris@0 341 return true;
Chris@0 342 }
Chris@0 343
Chris@0 344 public static function iconv_mime_encode($fieldName, $fieldValue, $pref = null)
Chris@0 345 {
Chris@0 346 if (!is_array($pref)) {
Chris@0 347 $pref = array();
Chris@0 348 }
Chris@0 349
Chris@0 350 $pref += array(
Chris@0 351 'scheme' => 'B',
Chris@0 352 'input-charset' => self::$internalEncoding,
Chris@0 353 'output-charset' => self::$internalEncoding,
Chris@0 354 'line-length' => 76,
Chris@0 355 'line-break-chars' => "\r\n",
Chris@0 356 );
Chris@0 357
Chris@0 358 if (preg_match('/[\x80-\xFF]/', $fieldName)) {
Chris@0 359 $fieldName = '';
Chris@0 360 }
Chris@0 361
Chris@0 362 $scheme = strtoupper(substr($pref['scheme'], 0, 1));
Chris@0 363 $in = strtolower($pref['input-charset']);
Chris@0 364 $out = strtolower($pref['output-charset']);
Chris@0 365
Chris@0 366 if ('utf-8' !== $in && false === $fieldValue = self::iconv($in, 'utf-8', $fieldValue)) {
Chris@0 367 return false;
Chris@0 368 }
Chris@0 369
Chris@0 370 preg_match_all('/./us', $fieldValue, $chars);
Chris@0 371
Chris@0 372 $chars = isset($chars[0]) ? $chars[0] : array();
Chris@0 373
Chris@0 374 $lineBreak = (int) $pref['line-length'];
Chris@0 375 $lineStart = "=?{$pref['output-charset']}?{$scheme}?";
Chris@0 376 $lineLength = strlen($fieldName) + 2 + strlen($lineStart) + 2;
Chris@0 377 $lineOffset = strlen($lineStart) + 3;
Chris@0 378 $lineData = '';
Chris@0 379
Chris@0 380 $fieldValue = array();
Chris@0 381
Chris@0 382 $Q = 'Q' === $scheme;
Chris@0 383
Chris@0 384 foreach ($chars as $c) {
Chris@0 385 if ('utf-8' !== $out && false === $c = self::iconv('utf-8', $out, $c)) {
Chris@0 386 return false;
Chris@0 387 }
Chris@0 388
Chris@0 389 $o = $Q
Chris@0 390 ? $c = preg_replace_callback(
Chris@0 391 '/[=_\?\x00-\x1F\x80-\xFF]/',
Chris@0 392 array(__CLASS__, 'qpByteCallback'),
Chris@0 393 $c
Chris@0 394 )
Chris@0 395 : base64_encode($lineData.$c);
Chris@0 396
Chris@0 397 if (isset($o[$lineBreak - $lineLength])) {
Chris@0 398 if (!$Q) {
Chris@0 399 $lineData = base64_encode($lineData);
Chris@0 400 }
Chris@0 401 $fieldValue[] = $lineStart.$lineData.'?=';
Chris@0 402 $lineLength = $lineOffset;
Chris@0 403 $lineData = '';
Chris@0 404 }
Chris@0 405
Chris@0 406 $lineData .= $c;
Chris@0 407 $Q && $lineLength += strlen($c);
Chris@0 408 }
Chris@0 409
Chris@0 410 if ('' !== $lineData) {
Chris@0 411 if (!$Q) {
Chris@0 412 $lineData = base64_encode($lineData);
Chris@0 413 }
Chris@0 414 $fieldValue[] = $lineStart.$lineData.'?=';
Chris@0 415 }
Chris@0 416
Chris@0 417 return $fieldName.': '.implode($pref['line-break-chars'].' ', $fieldValue);
Chris@0 418 }
Chris@0 419
Chris@0 420 public static function iconv_strlen($s, $encoding = null)
Chris@0 421 {
Chris@0 422 static $hasXml = null;
Chris@0 423 if (null === $hasXml) {
Chris@0 424 $hasXml = extension_loaded('xml');
Chris@0 425 }
Chris@0 426
Chris@0 427 if ($hasXml) {
Chris@0 428 return self::strlen1($s, $encoding);
Chris@0 429 }
Chris@0 430
Chris@0 431 return self::strlen2($s, $encoding);
Chris@0 432 }
Chris@0 433
Chris@0 434 public static function strlen1($s, $encoding = null)
Chris@0 435 {
Chris@0 436 if (null === $encoding) {
Chris@0 437 $encoding = self::$internalEncoding;
Chris@0 438 }
Chris@0 439 if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {
Chris@0 440 return false;
Chris@0 441 }
Chris@0 442
Chris@0 443 return strlen(utf8_decode($s));
Chris@0 444 }
Chris@0 445
Chris@0 446 public static function strlen2($s, $encoding = null)
Chris@0 447 {
Chris@0 448 if (null === $encoding) {
Chris@0 449 $encoding = self::$internalEncoding;
Chris@0 450 }
Chris@0 451 if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {
Chris@0 452 return false;
Chris@0 453 }
Chris@0 454
Chris@0 455 $ulenMask = self::$ulenMask;
Chris@0 456
Chris@0 457 $i = 0;
Chris@0 458 $j = 0;
Chris@0 459 $len = strlen($s);
Chris@0 460
Chris@0 461 while ($i < $len) {
Chris@0 462 $u = $s[$i] & "\xF0";
Chris@0 463 $i += isset($ulenMask[$u]) ? $ulenMask[$u] : 1;
Chris@0 464 ++$j;
Chris@0 465 }
Chris@0 466
Chris@0 467 return $j;
Chris@0 468 }
Chris@0 469
Chris@0 470 public static function iconv_strpos($haystack, $needle, $offset = 0, $encoding = null)
Chris@0 471 {
Chris@0 472 if (null === $encoding) {
Chris@0 473 $encoding = self::$internalEncoding;
Chris@0 474 }
Chris@0 475
Chris@0 476 if (0 !== stripos($encoding, 'utf-8')) {
Chris@0 477 if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {
Chris@0 478 return false;
Chris@0 479 }
Chris@0 480 if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {
Chris@0 481 return false;
Chris@0 482 }
Chris@0 483 }
Chris@0 484
Chris@0 485 if ($offset = (int) $offset) {
Chris@0 486 $haystack = self::iconv_substr($haystack, $offset, 2147483647, 'utf-8');
Chris@0 487 }
Chris@0 488 $pos = strpos($haystack, $needle);
Chris@0 489
Chris@0 490 return false === $pos ? false : ($offset + ($pos ? self::iconv_strlen(substr($haystack, 0, $pos), 'utf-8') : 0));
Chris@0 491 }
Chris@0 492
Chris@0 493 public static function iconv_strrpos($haystack, $needle, $encoding = null)
Chris@0 494 {
Chris@0 495 if (null === $encoding) {
Chris@0 496 $encoding = self::$internalEncoding;
Chris@0 497 }
Chris@0 498
Chris@0 499 if (0 !== stripos($encoding, 'utf-8')) {
Chris@0 500 if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {
Chris@0 501 return false;
Chris@0 502 }
Chris@0 503 if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {
Chris@0 504 return false;
Chris@0 505 }
Chris@0 506 }
Chris@0 507
Chris@0 508 $pos = isset($needle[0]) ? strrpos($haystack, $needle) : false;
Chris@0 509
Chris@0 510 return false === $pos ? false : self::iconv_strlen($pos ? substr($haystack, 0, $pos) : $haystack, 'utf-8');
Chris@0 511 }
Chris@0 512
Chris@0 513 public static function iconv_substr($s, $start, $length = 2147483647, $encoding = null)
Chris@0 514 {
Chris@0 515 if (null === $encoding) {
Chris@0 516 $encoding = self::$internalEncoding;
Chris@0 517 }
Chris@0 518 if (0 !== stripos($encoding, 'utf-8')) {
Chris@0 519 $encoding = null;
Chris@0 520 } elseif (false === $s = self::iconv($encoding, 'utf-8', $s)) {
Chris@0 521 return false;
Chris@0 522 }
Chris@0 523
Chris@0 524 $s .= '';
Chris@0 525 $slen = self::iconv_strlen($s, 'utf-8');
Chris@0 526 $start = (int) $start;
Chris@0 527
Chris@0 528 if (0 > $start) {
Chris@0 529 $start += $slen;
Chris@0 530 }
Chris@0 531 if (0 > $start) {
Chris@0 532 return false;
Chris@0 533 }
Chris@0 534 if ($start >= $slen) {
Chris@0 535 return false;
Chris@0 536 }
Chris@0 537
Chris@0 538 $rx = $slen - $start;
Chris@0 539
Chris@0 540 if (0 > $length) {
Chris@0 541 $length += $rx;
Chris@0 542 }
Chris@0 543 if (0 === $length) {
Chris@0 544 return '';
Chris@0 545 }
Chris@0 546 if (0 > $length) {
Chris@0 547 return false;
Chris@0 548 }
Chris@0 549
Chris@0 550 if ($length > $rx) {
Chris@0 551 $length = $rx;
Chris@0 552 }
Chris@0 553
Chris@0 554 $rx = '/^'.($start ? self::pregOffset($start) : '').'('.self::pregOffset($length).')/u';
Chris@0 555
Chris@0 556 $s = preg_match($rx, $s, $s) ? $s[1] : '';
Chris@0 557
Chris@0 558 if (null === $encoding) {
Chris@0 559 return $s;
Chris@0 560 }
Chris@0 561
Chris@0 562 return self::iconv('utf-8', $encoding, $s);
Chris@0 563 }
Chris@0 564
Chris@0 565 private static function loadMap($type, $charset, &$map)
Chris@0 566 {
Chris@0 567 if (!isset(self::$convertMap[$type.$charset])) {
Chris@0 568 if (false === $map = self::getData($type.$charset)) {
Chris@0 569 if ('to.' === $type && self::loadMap('from.', $charset, $map)) {
Chris@0 570 $map = array_flip($map);
Chris@0 571 } else {
Chris@0 572 return false;
Chris@0 573 }
Chris@0 574 }
Chris@0 575
Chris@0 576 self::$convertMap[$type.$charset] = $map;
Chris@0 577 } else {
Chris@0 578 $map = self::$convertMap[$type.$charset];
Chris@0 579 }
Chris@0 580
Chris@0 581 return true;
Chris@0 582 }
Chris@0 583
Chris@0 584 private static function utf8ToUtf8($str, $ignore)
Chris@0 585 {
Chris@0 586 $ulenMask = self::$ulenMask;
Chris@0 587 $valid = self::$isValidUtf8;
Chris@0 588
Chris@0 589 $u = $str;
Chris@0 590 $i = $j = 0;
Chris@0 591 $len = strlen($str);
Chris@0 592
Chris@0 593 while ($i < $len) {
Chris@0 594 if ($str[$i] < "\x80") {
Chris@0 595 $u[$j++] = $str[$i++];
Chris@0 596 } else {
Chris@0 597 $ulen = $str[$i] & "\xF0";
Chris@0 598 $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1;
Chris@0 599 $uchr = substr($str, $i, $ulen);
Chris@0 600
Chris@0 601 if (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr))) {
Chris@0 602 if ($ignore) {
Chris@0 603 ++$i;
Chris@0 604 continue;
Chris@0 605 }
Chris@0 606
Chris@0 607 trigger_error(self::ERROR_ILLEGAL_CHARACTER);
Chris@0 608
Chris@0 609 return false;
Chris@0 610 } else {
Chris@0 611 $i += $ulen;
Chris@0 612 }
Chris@0 613
Chris@0 614 $u[$j++] = $uchr[0];
Chris@0 615
Chris@0 616 isset($uchr[1]) && 0 !== ($u[$j++] = $uchr[1])
Chris@0 617 && isset($uchr[2]) && 0 !== ($u[$j++] = $uchr[2])
Chris@0 618 && isset($uchr[3]) && 0 !== ($u[$j++] = $uchr[3]);
Chris@0 619 }
Chris@0 620 }
Chris@0 621
Chris@0 622 return substr($u, 0, $j);
Chris@0 623 }
Chris@0 624
Chris@0 625 private static function mapToUtf8(&$result, $map, $str, $ignore)
Chris@0 626 {
Chris@0 627 $len = strlen($str);
Chris@0 628 for ($i = 0; $i < $len; ++$i) {
Chris@0 629 if (isset($str[$i + 1], $map[$str[$i].$str[$i + 1]])) {
Chris@0 630 $result .= $map[$str[$i].$str[++$i]];
Chris@0 631 } elseif (isset($map[$str[$i]])) {
Chris@0 632 $result .= $map[$str[$i]];
Chris@0 633 } elseif (!$ignore) {
Chris@0 634 trigger_error(self::ERROR_ILLEGAL_CHARACTER);
Chris@0 635
Chris@0 636 return false;
Chris@0 637 }
Chris@0 638 }
Chris@0 639
Chris@0 640 return true;
Chris@0 641 }
Chris@0 642
Chris@0 643 private static function mapFromUtf8(&$result, $map, $str, $ignore, $translit)
Chris@0 644 {
Chris@0 645 $ulenMask = self::$ulenMask;
Chris@0 646 $valid = self::$isValidUtf8;
Chris@0 647
Chris@0 648 if ($translit && !self::$translitMap) {
Chris@0 649 self::$translitMap = self::getData('translit');
Chris@0 650 }
Chris@0 651
Chris@0 652 $i = 0;
Chris@0 653 $len = strlen($str);
Chris@0 654
Chris@0 655 while ($i < $len) {
Chris@0 656 if ($str[$i] < "\x80") {
Chris@0 657 $uchr = $str[$i++];
Chris@0 658 } else {
Chris@0 659 $ulen = $str[$i] & "\xF0";
Chris@0 660 $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1;
Chris@0 661 $uchr = substr($str, $i, $ulen);
Chris@0 662
Chris@0 663 if ($ignore && (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr)))) {
Chris@0 664 ++$i;
Chris@0 665 continue;
Chris@0 666 } else {
Chris@0 667 $i += $ulen;
Chris@0 668 }
Chris@0 669 }
Chris@0 670
Chris@0 671 if (isset($map[$uchr])) {
Chris@0 672 $result .= $map[$uchr];
Chris@0 673 } elseif ($translit) {
Chris@0 674 if (isset(self::$translitMap[$uchr])) {
Chris@0 675 $uchr = self::$translitMap[$uchr];
Chris@0 676 } elseif ($uchr >= "\xC3\x80") {
Chris@0 677 $uchr = \Normalizer::normalize($uchr, \Normalizer::NFD);
Chris@0 678
Chris@0 679 if ($uchr[0] < "\x80") {
Chris@0 680 $uchr = $uchr[0];
Chris@0 681 } elseif ($ignore) {
Chris@0 682 continue;
Chris@0 683 } else {
Chris@0 684 return false;
Chris@0 685 }
Chris@0 686 }
Chris@0 687
Chris@0 688 $str = $uchr.substr($str, $i);
Chris@0 689 $len = strlen($str);
Chris@0 690 $i = 0;
Chris@0 691 } elseif (!$ignore) {
Chris@0 692 return false;
Chris@0 693 }
Chris@0 694 }
Chris@0 695
Chris@0 696 return true;
Chris@0 697 }
Chris@0 698
Chris@0 699 private static function qpByteCallback($m)
Chris@0 700 {
Chris@0 701 return '='.strtoupper(dechex(ord($m[0])));
Chris@0 702 }
Chris@0 703
Chris@0 704 private static function pregOffset($offset)
Chris@0 705 {
Chris@0 706 $rx = array();
Chris@0 707 $offset = (int) $offset;
Chris@0 708
Chris@0 709 while ($offset > 65535) {
Chris@0 710 $rx[] = '.{65535}';
Chris@0 711 $offset -= 65535;
Chris@0 712 }
Chris@0 713
Chris@0 714 return implode('', $rx).'.{'.$offset.'}';
Chris@0 715 }
Chris@0 716
Chris@0 717 private static function getData($file)
Chris@0 718 {
Chris@0 719 if (file_exists($file = __DIR__.'/Resources/charset/'.$file.'.php')) {
Chris@0 720 return require $file;
Chris@0 721 }
Chris@0 722
Chris@0 723 return false;
Chris@0 724 }
Chris@0 725 }