annotate vendor/symfony/polyfill-iconv/Iconv.php @ 19:fa3358dc1485 tip

Add ndrum files
author Chris Cannam
date Wed, 28 Aug 2019 13:14:47 +0100
parents 129ea1e6d783
children
rev   line source
Chris@0 1 <?php
Chris@0 2
Chris@0 3 /*
Chris@0 4 * This file is part of the Symfony package.
Chris@0 5 *
Chris@0 6 * (c) Fabien Potencier <fabien@symfony.com>
Chris@0 7 *
Chris@0 8 * For the full copyright and license information, please view the LICENSE
Chris@0 9 * file that was distributed with this source code.
Chris@0 10 */
Chris@0 11
Chris@0 12 namespace Symfony\Polyfill\Iconv;
Chris@0 13
Chris@0 14 /**
Chris@0 15 * iconv implementation in pure PHP, UTF-8 centric.
Chris@0 16 *
Chris@0 17 * Implemented:
Chris@0 18 * - iconv - Convert string to requested character encoding
Chris@0 19 * - iconv_mime_decode - Decodes a MIME header field
Chris@0 20 * - iconv_mime_decode_headers - Decodes multiple MIME header fields at once
Chris@0 21 * - iconv_get_encoding - Retrieve internal configuration variables of iconv extension
Chris@0 22 * - iconv_set_encoding - Set current setting for character encoding conversion
Chris@0 23 * - iconv_mime_encode - Composes a MIME header field
Chris@0 24 * - iconv_strlen - Returns the character count of string
Chris@0 25 * - iconv_strpos - Finds position of first occurrence of a needle within a haystack
Chris@0 26 * - iconv_strrpos - Finds the last occurrence of a needle within a haystack
Chris@0 27 * - iconv_substr - Cut out part of a string
Chris@0 28 *
Chris@0 29 * Charsets available for conversion are defined by files
Chris@0 30 * in the charset/ directory and by Iconv::$alias below.
Chris@0 31 * You're welcome to send back any addition you make.
Chris@0 32 *
Chris@0 33 * @author Nicolas Grekas <p@tchwork.com>
Chris@0 34 *
Chris@0 35 * @internal
Chris@0 36 */
Chris@0 37 final class Iconv
Chris@0 38 {
Chris@0 39 const ERROR_ILLEGAL_CHARACTER = 'iconv(): Detected an illegal character in input string';
Chris@0 40 const ERROR_WRONG_CHARSET = 'iconv(): Wrong charset, conversion from `%s\' to `%s\' is not allowed';
Chris@0 41
Chris@0 42 public static $inputEncoding = 'utf-8';
Chris@0 43 public static $outputEncoding = 'utf-8';
Chris@0 44 public static $internalEncoding = 'utf-8';
Chris@0 45
Chris@0 46 private static $alias = array(
Chris@0 47 'utf8' => 'utf-8',
Chris@0 48 'ascii' => 'us-ascii',
Chris@0 49 'tis-620' => 'iso-8859-11',
Chris@0 50 'cp1250' => 'windows-1250',
Chris@0 51 'cp1251' => 'windows-1251',
Chris@0 52 'cp1252' => 'windows-1252',
Chris@0 53 'cp1253' => 'windows-1253',
Chris@0 54 'cp1254' => 'windows-1254',
Chris@0 55 'cp1255' => 'windows-1255',
Chris@0 56 'cp1256' => 'windows-1256',
Chris@0 57 'cp1257' => 'windows-1257',
Chris@0 58 'cp1258' => 'windows-1258',
Chris@0 59 'shift-jis' => 'cp932',
Chris@0 60 'shift_jis' => 'cp932',
Chris@0 61 'latin1' => 'iso-8859-1',
Chris@0 62 'latin2' => 'iso-8859-2',
Chris@0 63 'latin3' => 'iso-8859-3',
Chris@0 64 'latin4' => 'iso-8859-4',
Chris@0 65 'latin5' => 'iso-8859-9',
Chris@0 66 'latin6' => 'iso-8859-10',
Chris@0 67 'latin7' => 'iso-8859-13',
Chris@0 68 'latin8' => 'iso-8859-14',
Chris@0 69 'latin9' => 'iso-8859-15',
Chris@0 70 'latin10' => 'iso-8859-16',
Chris@0 71 'iso8859-1' => 'iso-8859-1',
Chris@0 72 'iso8859-2' => 'iso-8859-2',
Chris@0 73 'iso8859-3' => 'iso-8859-3',
Chris@0 74 'iso8859-4' => 'iso-8859-4',
Chris@0 75 'iso8859-5' => 'iso-8859-5',
Chris@0 76 'iso8859-6' => 'iso-8859-6',
Chris@0 77 'iso8859-7' => 'iso-8859-7',
Chris@0 78 'iso8859-8' => 'iso-8859-8',
Chris@0 79 'iso8859-9' => 'iso-8859-9',
Chris@0 80 'iso8859-10' => 'iso-8859-10',
Chris@0 81 'iso8859-11' => 'iso-8859-11',
Chris@0 82 'iso8859-12' => 'iso-8859-12',
Chris@0 83 'iso8859-13' => 'iso-8859-13',
Chris@0 84 'iso8859-14' => 'iso-8859-14',
Chris@0 85 'iso8859-15' => 'iso-8859-15',
Chris@0 86 'iso8859-16' => 'iso-8859-16',
Chris@0 87 'iso_8859-1' => 'iso-8859-1',
Chris@0 88 'iso_8859-2' => 'iso-8859-2',
Chris@0 89 'iso_8859-3' => 'iso-8859-3',
Chris@0 90 'iso_8859-4' => 'iso-8859-4',
Chris@0 91 'iso_8859-5' => 'iso-8859-5',
Chris@0 92 'iso_8859-6' => 'iso-8859-6',
Chris@0 93 'iso_8859-7' => 'iso-8859-7',
Chris@0 94 'iso_8859-8' => 'iso-8859-8',
Chris@0 95 'iso_8859-9' => 'iso-8859-9',
Chris@0 96 'iso_8859-10' => 'iso-8859-10',
Chris@0 97 'iso_8859-11' => 'iso-8859-11',
Chris@0 98 'iso_8859-12' => 'iso-8859-12',
Chris@0 99 'iso_8859-13' => 'iso-8859-13',
Chris@0 100 'iso_8859-14' => 'iso-8859-14',
Chris@0 101 'iso_8859-15' => 'iso-8859-15',
Chris@0 102 'iso_8859-16' => 'iso-8859-16',
Chris@0 103 'iso88591' => 'iso-8859-1',
Chris@0 104 'iso88592' => 'iso-8859-2',
Chris@0 105 'iso88593' => 'iso-8859-3',
Chris@0 106 'iso88594' => 'iso-8859-4',
Chris@0 107 'iso88595' => 'iso-8859-5',
Chris@0 108 'iso88596' => 'iso-8859-6',
Chris@0 109 'iso88597' => 'iso-8859-7',
Chris@0 110 'iso88598' => 'iso-8859-8',
Chris@0 111 'iso88599' => 'iso-8859-9',
Chris@0 112 'iso885910' => 'iso-8859-10',
Chris@0 113 'iso885911' => 'iso-8859-11',
Chris@0 114 'iso885912' => 'iso-8859-12',
Chris@0 115 'iso885913' => 'iso-8859-13',
Chris@0 116 'iso885914' => 'iso-8859-14',
Chris@0 117 'iso885915' => 'iso-8859-15',
Chris@0 118 'iso885916' => 'iso-8859-16',
Chris@0 119 );
Chris@0 120 private static $translitMap = array();
Chris@0 121 private static $convertMap = array();
Chris@0 122 private static $errorHandler;
Chris@0 123 private static $lastError;
Chris@0 124
Chris@0 125 private static $ulenMask = array("\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4);
Chris@0 126 private static $isValidUtf8;
Chris@0 127
Chris@0 128 public static function iconv($inCharset, $outCharset, $str)
Chris@0 129 {
Chris@12 130 $str = (string) $str;
Chris@12 131 if ('' === $str) {
Chris@0 132 return '';
Chris@0 133 }
Chris@0 134
Chris@0 135 // Prepare for //IGNORE and //TRANSLIT
Chris@0 136
Chris@0 137 $translit = $ignore = '';
Chris@0 138
Chris@0 139 $outCharset = strtolower($outCharset);
Chris@0 140 $inCharset = strtolower($inCharset);
Chris@0 141
Chris@0 142 if ('' === $outCharset) {
Chris@0 143 $outCharset = 'iso-8859-1';
Chris@0 144 }
Chris@0 145 if ('' === $inCharset) {
Chris@0 146 $inCharset = 'iso-8859-1';
Chris@0 147 }
Chris@0 148
Chris@12 149 do {
Chris@12 150 $loop = false;
Chris@0 151
Chris@12 152 if ('//translit' === substr($outCharset, -10)) {
Chris@12 153 $loop = $translit = true;
Chris@12 154 $outCharset = substr($outCharset, 0, -10);
Chris@12 155 }
Chris@0 156
Chris@12 157 if ('//ignore' === substr($outCharset, -8)) {
Chris@12 158 $loop = $ignore = true;
Chris@12 159 $outCharset = substr($outCharset, 0, -8);
Chris@12 160 }
Chris@12 161 } while ($loop);
Chris@12 162
Chris@12 163 do {
Chris@12 164 $loop = false;
Chris@12 165
Chris@12 166 if ('//translit' === substr($inCharset, -10)) {
Chris@12 167 $loop = true;
Chris@12 168 $inCharset = substr($inCharset, 0, -10);
Chris@12 169 }
Chris@12 170
Chris@12 171 if ('//ignore' === substr($inCharset, -8)) {
Chris@12 172 $loop = true;
Chris@12 173 $inCharset = substr($inCharset, 0, -8);
Chris@12 174 }
Chris@12 175 } while ($loop);
Chris@0 176
Chris@17 177 if (isset(self::$alias[$inCharset])) {
Chris@17 178 $inCharset = self::$alias[$inCharset];
Chris@0 179 }
Chris@0 180 if (isset(self::$alias[$outCharset])) {
Chris@0 181 $outCharset = self::$alias[$outCharset];
Chris@0 182 }
Chris@0 183
Chris@0 184 // Load charset maps
Chris@0 185
Chris@0 186 if (('utf-8' !== $inCharset && !self::loadMap('from.', $inCharset, $inMap))
Chris@0 187 || ('utf-8' !== $outCharset && !self::loadMap('to.', $outCharset, $outMap))) {
Chris@0 188 trigger_error(sprintf(self::ERROR_WRONG_CHARSET, $inCharset, $outCharset));
Chris@0 189
Chris@0 190 return false;
Chris@0 191 }
Chris@0 192
Chris@0 193 if ('utf-8' !== $inCharset) {
Chris@0 194 // Convert input to UTF-8
Chris@0 195 $result = '';
Chris@0 196 if (self::mapToUtf8($result, $inMap, $str, $ignore)) {
Chris@0 197 $str = $result;
Chris@0 198 } else {
Chris@0 199 $str = false;
Chris@0 200 }
Chris@0 201 self::$isValidUtf8 = true;
Chris@0 202 } else {
Chris@0 203 self::$isValidUtf8 = preg_match('//u', $str);
Chris@0 204
Chris@0 205 if (!self::$isValidUtf8 && !$ignore) {
Chris@0 206 trigger_error(self::ERROR_ILLEGAL_CHARACTER);
Chris@0 207
Chris@0 208 return false;
Chris@0 209 }
Chris@0 210
Chris@0 211 if ('utf-8' === $outCharset) {
Chris@0 212 // UTF-8 validation
Chris@0 213 $str = self::utf8ToUtf8($str, $ignore);
Chris@0 214 }
Chris@0 215 }
Chris@0 216
Chris@0 217 if ('utf-8' !== $outCharset && false !== $str) {
Chris@0 218 // Convert output to UTF-8
Chris@0 219 $result = '';
Chris@0 220 if (self::mapFromUtf8($result, $outMap, $str, $ignore, $translit)) {
Chris@0 221 return $result;
Chris@0 222 }
Chris@0 223
Chris@0 224 return false;
Chris@0 225 }
Chris@0 226
Chris@0 227 return $str;
Chris@0 228 }
Chris@0 229
Chris@0 230 public static function iconv_mime_decode_headers($str, $mode = 0, $charset = null)
Chris@0 231 {
Chris@0 232 if (null === $charset) {
Chris@0 233 $charset = self::$internalEncoding;
Chris@0 234 }
Chris@0 235
Chris@0 236 if (false !== strpos($str, "\r")) {
Chris@0 237 $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");
Chris@0 238 }
Chris@0 239 $str = explode("\n\n", $str, 2);
Chris@0 240
Chris@0 241 $headers = array();
Chris@0 242
Chris@0 243 $str = preg_split('/\n(?![ \t])/', $str[0]);
Chris@0 244 foreach ($str as $str) {
Chris@0 245 $str = self::iconv_mime_decode($str, $mode, $charset);
Chris@0 246 if (false === $str) {
Chris@0 247 return false;
Chris@0 248 }
Chris@0 249 $str = explode(':', $str, 2);
Chris@0 250
Chris@16 251 if (2 === \count($str)) {
Chris@0 252 if (isset($headers[$str[0]])) {
Chris@16 253 if (!\is_array($headers[$str[0]])) {
Chris@0 254 $headers[$str[0]] = array($headers[$str[0]]);
Chris@0 255 }
Chris@0 256 $headers[$str[0]][] = ltrim($str[1]);
Chris@0 257 } else {
Chris@0 258 $headers[$str[0]] = ltrim($str[1]);
Chris@0 259 }
Chris@0 260 }
Chris@0 261 }
Chris@0 262
Chris@0 263 return $headers;
Chris@0 264 }
Chris@0 265
Chris@0 266 public static function iconv_mime_decode($str, $mode = 0, $charset = null)
Chris@0 267 {
Chris@0 268 if (null === $charset) {
Chris@0 269 $charset = self::$internalEncoding;
Chris@0 270 }
Chris@0 271 if (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {
Chris@0 272 $charset .= '//IGNORE';
Chris@0 273 }
Chris@0 274
Chris@0 275 if (false !== strpos($str, "\r")) {
Chris@0 276 $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");
Chris@0 277 }
Chris@0 278 $str = preg_split('/\n(?![ \t])/', rtrim($str), 2);
Chris@0 279 $str = preg_replace('/[ \t]*\n[ \t]+/', ' ', rtrim($str[0]));
Chris@0 280 $str = preg_split('/=\?([^?]+)\?([bqBQ])\?(.*?)\?=/', $str, -1, PREG_SPLIT_DELIM_CAPTURE);
Chris@0 281
Chris@0 282 $result = self::iconv('utf-8', $charset, $str[0]);
Chris@0 283 if (false === $result) {
Chris@0 284 return false;
Chris@0 285 }
Chris@0 286
Chris@0 287 $i = 1;
Chris@16 288 $len = \count($str);
Chris@0 289
Chris@0 290 while ($i < $len) {
Chris@0 291 $c = strtolower($str[$i]);
Chris@0 292 if ((ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode)
Chris@0 293 && 'utf-8' !== $c
Chris@0 294 && !isset(self::$alias[$c])
Chris@17 295 && !self::loadMap('from.', $c, $d)) {
Chris@0 296 $d = false;
Chris@0 297 } elseif ('B' === strtoupper($str[$i + 1])) {
Chris@0 298 $d = base64_decode($str[$i + 2]);
Chris@0 299 } else {
Chris@0 300 $d = rawurldecode(strtr(str_replace('%', '%25', $str[$i + 2]), '=_', '% '));
Chris@0 301 }
Chris@0 302
Chris@0 303 if (false !== $d) {
Chris@0 304 if ('' !== $d) {
Chris@0 305 if ('' === $d = self::iconv($c, $charset, $d)) {
Chris@0 306 $str[$i + 3] = substr($str[$i + 3], 1);
Chris@0 307 } else {
Chris@0 308 $result .= $d;
Chris@0 309 }
Chris@0 310 }
Chris@0 311 $d = self::iconv('utf-8', $charset, $str[$i + 3]);
Chris@0 312 if ('' !== trim($d)) {
Chris@0 313 $result .= $d;
Chris@0 314 }
Chris@0 315 } elseif (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {
Chris@0 316 $result .= "=?{$str[$i]}?{$str[$i + 1]}?{$str[$i + 2]}?={$str[$i + 3]}";
Chris@0 317 } else {
Chris@0 318 $result = false;
Chris@0 319 break;
Chris@0 320 }
Chris@0 321
Chris@0 322 $i += 4;
Chris@0 323 }
Chris@0 324
Chris@0 325 return $result;
Chris@0 326 }
Chris@0 327
Chris@0 328 public static function iconv_get_encoding($type = 'all')
Chris@0 329 {
Chris@0 330 switch ($type) {
Chris@0 331 case 'input_encoding': return self::$inputEncoding;
Chris@0 332 case 'output_encoding': return self::$outputEncoding;
Chris@0 333 case 'internal_encoding': return self::$internalEncoding;
Chris@0 334 }
Chris@0 335
Chris@0 336 return array(
Chris@0 337 'input_encoding' => self::$inputEncoding,
Chris@0 338 'output_encoding' => self::$outputEncoding,
Chris@0 339 'internal_encoding' => self::$internalEncoding,
Chris@0 340 );
Chris@0 341 }
Chris@0 342
Chris@0 343 public static function iconv_set_encoding($type, $charset)
Chris@0 344 {
Chris@0 345 switch ($type) {
Chris@0 346 case 'input_encoding': self::$inputEncoding = $charset; break;
Chris@0 347 case 'output_encoding': self::$outputEncoding = $charset; break;
Chris@0 348 case 'internal_encoding': self::$internalEncoding = $charset; break;
Chris@0 349
Chris@0 350 default: return false;
Chris@0 351 }
Chris@0 352
Chris@0 353 return true;
Chris@0 354 }
Chris@0 355
Chris@0 356 public static function iconv_mime_encode($fieldName, $fieldValue, $pref = null)
Chris@0 357 {
Chris@16 358 if (!\is_array($pref)) {
Chris@0 359 $pref = array();
Chris@0 360 }
Chris@0 361
Chris@0 362 $pref += array(
Chris@0 363 'scheme' => 'B',
Chris@0 364 'input-charset' => self::$internalEncoding,
Chris@0 365 'output-charset' => self::$internalEncoding,
Chris@0 366 'line-length' => 76,
Chris@0 367 'line-break-chars' => "\r\n",
Chris@0 368 );
Chris@0 369
Chris@0 370 if (preg_match('/[\x80-\xFF]/', $fieldName)) {
Chris@0 371 $fieldName = '';
Chris@0 372 }
Chris@0 373
Chris@0 374 $scheme = strtoupper(substr($pref['scheme'], 0, 1));
Chris@0 375 $in = strtolower($pref['input-charset']);
Chris@0 376 $out = strtolower($pref['output-charset']);
Chris@0 377
Chris@0 378 if ('utf-8' !== $in && false === $fieldValue = self::iconv($in, 'utf-8', $fieldValue)) {
Chris@0 379 return false;
Chris@0 380 }
Chris@0 381
Chris@0 382 preg_match_all('/./us', $fieldValue, $chars);
Chris@0 383
Chris@0 384 $chars = isset($chars[0]) ? $chars[0] : array();
Chris@0 385
Chris@0 386 $lineBreak = (int) $pref['line-length'];
Chris@0 387 $lineStart = "=?{$pref['output-charset']}?{$scheme}?";
Chris@16 388 $lineLength = \strlen($fieldName) + 2 + \strlen($lineStart) + 2;
Chris@16 389 $lineOffset = \strlen($lineStart) + 3;
Chris@0 390 $lineData = '';
Chris@0 391
Chris@0 392 $fieldValue = array();
Chris@0 393
Chris@0 394 $Q = 'Q' === $scheme;
Chris@0 395
Chris@0 396 foreach ($chars as $c) {
Chris@0 397 if ('utf-8' !== $out && false === $c = self::iconv('utf-8', $out, $c)) {
Chris@0 398 return false;
Chris@0 399 }
Chris@0 400
Chris@0 401 $o = $Q
Chris@0 402 ? $c = preg_replace_callback(
Chris@0 403 '/[=_\?\x00-\x1F\x80-\xFF]/',
Chris@0 404 array(__CLASS__, 'qpByteCallback'),
Chris@0 405 $c
Chris@0 406 )
Chris@0 407 : base64_encode($lineData.$c);
Chris@0 408
Chris@0 409 if (isset($o[$lineBreak - $lineLength])) {
Chris@0 410 if (!$Q) {
Chris@0 411 $lineData = base64_encode($lineData);
Chris@0 412 }
Chris@0 413 $fieldValue[] = $lineStart.$lineData.'?=';
Chris@0 414 $lineLength = $lineOffset;
Chris@0 415 $lineData = '';
Chris@0 416 }
Chris@0 417
Chris@0 418 $lineData .= $c;
Chris@16 419 $Q && $lineLength += \strlen($c);
Chris@0 420 }
Chris@0 421
Chris@0 422 if ('' !== $lineData) {
Chris@0 423 if (!$Q) {
Chris@0 424 $lineData = base64_encode($lineData);
Chris@0 425 }
Chris@0 426 $fieldValue[] = $lineStart.$lineData.'?=';
Chris@0 427 }
Chris@0 428
Chris@0 429 return $fieldName.': '.implode($pref['line-break-chars'].' ', $fieldValue);
Chris@0 430 }
Chris@0 431
Chris@0 432 public static function iconv_strlen($s, $encoding = null)
Chris@0 433 {
Chris@0 434 static $hasXml = null;
Chris@0 435 if (null === $hasXml) {
Chris@17 436 $hasXml = \extension_loaded('xml');
Chris@0 437 }
Chris@0 438
Chris@0 439 if ($hasXml) {
Chris@0 440 return self::strlen1($s, $encoding);
Chris@0 441 }
Chris@0 442
Chris@0 443 return self::strlen2($s, $encoding);
Chris@0 444 }
Chris@0 445
Chris@0 446 public static function strlen1($s, $encoding = null)
Chris@0 447 {
Chris@0 448 if (null === $encoding) {
Chris@0 449 $encoding = self::$internalEncoding;
Chris@0 450 }
Chris@0 451 if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {
Chris@0 452 return false;
Chris@0 453 }
Chris@0 454
Chris@16 455 return \strlen(utf8_decode($s));
Chris@0 456 }
Chris@0 457
Chris@0 458 public static function strlen2($s, $encoding = null)
Chris@0 459 {
Chris@0 460 if (null === $encoding) {
Chris@0 461 $encoding = self::$internalEncoding;
Chris@0 462 }
Chris@0 463 if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {
Chris@0 464 return false;
Chris@0 465 }
Chris@0 466
Chris@0 467 $ulenMask = self::$ulenMask;
Chris@0 468
Chris@0 469 $i = 0;
Chris@0 470 $j = 0;
Chris@16 471 $len = \strlen($s);
Chris@0 472
Chris@0 473 while ($i < $len) {
Chris@0 474 $u = $s[$i] & "\xF0";
Chris@0 475 $i += isset($ulenMask[$u]) ? $ulenMask[$u] : 1;
Chris@0 476 ++$j;
Chris@0 477 }
Chris@0 478
Chris@0 479 return $j;
Chris@0 480 }
Chris@0 481
Chris@0 482 public static function iconv_strpos($haystack, $needle, $offset = 0, $encoding = null)
Chris@0 483 {
Chris@0 484 if (null === $encoding) {
Chris@0 485 $encoding = self::$internalEncoding;
Chris@0 486 }
Chris@0 487
Chris@0 488 if (0 !== stripos($encoding, 'utf-8')) {
Chris@0 489 if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {
Chris@0 490 return false;
Chris@0 491 }
Chris@0 492 if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {
Chris@0 493 return false;
Chris@0 494 }
Chris@0 495 }
Chris@0 496
Chris@0 497 if ($offset = (int) $offset) {
Chris@0 498 $haystack = self::iconv_substr($haystack, $offset, 2147483647, 'utf-8');
Chris@0 499 }
Chris@0 500 $pos = strpos($haystack, $needle);
Chris@0 501
Chris@0 502 return false === $pos ? false : ($offset + ($pos ? self::iconv_strlen(substr($haystack, 0, $pos), 'utf-8') : 0));
Chris@0 503 }
Chris@0 504
Chris@0 505 public static function iconv_strrpos($haystack, $needle, $encoding = null)
Chris@0 506 {
Chris@0 507 if (null === $encoding) {
Chris@0 508 $encoding = self::$internalEncoding;
Chris@0 509 }
Chris@0 510
Chris@0 511 if (0 !== stripos($encoding, 'utf-8')) {
Chris@0 512 if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {
Chris@0 513 return false;
Chris@0 514 }
Chris@0 515 if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {
Chris@0 516 return false;
Chris@0 517 }
Chris@0 518 }
Chris@0 519
Chris@0 520 $pos = isset($needle[0]) ? strrpos($haystack, $needle) : false;
Chris@0 521
Chris@0 522 return false === $pos ? false : self::iconv_strlen($pos ? substr($haystack, 0, $pos) : $haystack, 'utf-8');
Chris@0 523 }
Chris@0 524
Chris@0 525 public static function iconv_substr($s, $start, $length = 2147483647, $encoding = null)
Chris@0 526 {
Chris@0 527 if (null === $encoding) {
Chris@0 528 $encoding = self::$internalEncoding;
Chris@0 529 }
Chris@0 530 if (0 !== stripos($encoding, 'utf-8')) {
Chris@0 531 $encoding = null;
Chris@0 532 } elseif (false === $s = self::iconv($encoding, 'utf-8', $s)) {
Chris@0 533 return false;
Chris@0 534 }
Chris@0 535
Chris@12 536 $s = (string) $s;
Chris@0 537 $slen = self::iconv_strlen($s, 'utf-8');
Chris@0 538 $start = (int) $start;
Chris@0 539
Chris@0 540 if (0 > $start) {
Chris@0 541 $start += $slen;
Chris@0 542 }
Chris@0 543 if (0 > $start) {
Chris@0 544 return false;
Chris@0 545 }
Chris@0 546 if ($start >= $slen) {
Chris@0 547 return false;
Chris@0 548 }
Chris@0 549
Chris@0 550 $rx = $slen - $start;
Chris@0 551
Chris@0 552 if (0 > $length) {
Chris@0 553 $length += $rx;
Chris@0 554 }
Chris@0 555 if (0 === $length) {
Chris@0 556 return '';
Chris@0 557 }
Chris@0 558 if (0 > $length) {
Chris@0 559 return false;
Chris@0 560 }
Chris@0 561
Chris@0 562 if ($length > $rx) {
Chris@0 563 $length = $rx;
Chris@0 564 }
Chris@0 565
Chris@0 566 $rx = '/^'.($start ? self::pregOffset($start) : '').'('.self::pregOffset($length).')/u';
Chris@0 567
Chris@0 568 $s = preg_match($rx, $s, $s) ? $s[1] : '';
Chris@0 569
Chris@0 570 if (null === $encoding) {
Chris@0 571 return $s;
Chris@0 572 }
Chris@0 573
Chris@0 574 return self::iconv('utf-8', $encoding, $s);
Chris@0 575 }
Chris@0 576
Chris@0 577 private static function loadMap($type, $charset, &$map)
Chris@0 578 {
Chris@0 579 if (!isset(self::$convertMap[$type.$charset])) {
Chris@0 580 if (false === $map = self::getData($type.$charset)) {
Chris@0 581 if ('to.' === $type && self::loadMap('from.', $charset, $map)) {
Chris@0 582 $map = array_flip($map);
Chris@0 583 } else {
Chris@0 584 return false;
Chris@0 585 }
Chris@0 586 }
Chris@0 587
Chris@0 588 self::$convertMap[$type.$charset] = $map;
Chris@0 589 } else {
Chris@0 590 $map = self::$convertMap[$type.$charset];
Chris@0 591 }
Chris@0 592
Chris@0 593 return true;
Chris@0 594 }
Chris@0 595
Chris@0 596 private static function utf8ToUtf8($str, $ignore)
Chris@0 597 {
Chris@0 598 $ulenMask = self::$ulenMask;
Chris@0 599 $valid = self::$isValidUtf8;
Chris@0 600
Chris@0 601 $u = $str;
Chris@0 602 $i = $j = 0;
Chris@16 603 $len = \strlen($str);
Chris@0 604
Chris@0 605 while ($i < $len) {
Chris@0 606 if ($str[$i] < "\x80") {
Chris@0 607 $u[$j++] = $str[$i++];
Chris@0 608 } else {
Chris@0 609 $ulen = $str[$i] & "\xF0";
Chris@0 610 $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1;
Chris@0 611 $uchr = substr($str, $i, $ulen);
Chris@0 612
Chris@0 613 if (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr))) {
Chris@0 614 if ($ignore) {
Chris@0 615 ++$i;
Chris@0 616 continue;
Chris@0 617 }
Chris@0 618
Chris@0 619 trigger_error(self::ERROR_ILLEGAL_CHARACTER);
Chris@0 620
Chris@0 621 return false;
Chris@0 622 } else {
Chris@0 623 $i += $ulen;
Chris@0 624 }
Chris@0 625
Chris@0 626 $u[$j++] = $uchr[0];
Chris@0 627
Chris@0 628 isset($uchr[1]) && 0 !== ($u[$j++] = $uchr[1])
Chris@0 629 && isset($uchr[2]) && 0 !== ($u[$j++] = $uchr[2])
Chris@0 630 && isset($uchr[3]) && 0 !== ($u[$j++] = $uchr[3]);
Chris@0 631 }
Chris@0 632 }
Chris@0 633
Chris@0 634 return substr($u, 0, $j);
Chris@0 635 }
Chris@0 636
Chris@12 637 private static function mapToUtf8(&$result, array $map, $str, $ignore)
Chris@0 638 {
Chris@16 639 $len = \strlen($str);
Chris@0 640 for ($i = 0; $i < $len; ++$i) {
Chris@0 641 if (isset($str[$i + 1], $map[$str[$i].$str[$i + 1]])) {
Chris@0 642 $result .= $map[$str[$i].$str[++$i]];
Chris@0 643 } elseif (isset($map[$str[$i]])) {
Chris@0 644 $result .= $map[$str[$i]];
Chris@0 645 } elseif (!$ignore) {
Chris@0 646 trigger_error(self::ERROR_ILLEGAL_CHARACTER);
Chris@0 647
Chris@0 648 return false;
Chris@0 649 }
Chris@0 650 }
Chris@0 651
Chris@0 652 return true;
Chris@0 653 }
Chris@0 654
Chris@12 655 private static function mapFromUtf8(&$result, array $map, $str, $ignore, $translit)
Chris@0 656 {
Chris@0 657 $ulenMask = self::$ulenMask;
Chris@0 658 $valid = self::$isValidUtf8;
Chris@0 659
Chris@0 660 if ($translit && !self::$translitMap) {
Chris@0 661 self::$translitMap = self::getData('translit');
Chris@0 662 }
Chris@0 663
Chris@0 664 $i = 0;
Chris@16 665 $len = \strlen($str);
Chris@0 666
Chris@0 667 while ($i < $len) {
Chris@0 668 if ($str[$i] < "\x80") {
Chris@0 669 $uchr = $str[$i++];
Chris@0 670 } else {
Chris@0 671 $ulen = $str[$i] & "\xF0";
Chris@0 672 $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1;
Chris@0 673 $uchr = substr($str, $i, $ulen);
Chris@0 674
Chris@0 675 if ($ignore && (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr)))) {
Chris@0 676 ++$i;
Chris@0 677 continue;
Chris@0 678 } else {
Chris@0 679 $i += $ulen;
Chris@0 680 }
Chris@0 681 }
Chris@0 682
Chris@0 683 if (isset($map[$uchr])) {
Chris@0 684 $result .= $map[$uchr];
Chris@0 685 } elseif ($translit) {
Chris@0 686 if (isset(self::$translitMap[$uchr])) {
Chris@0 687 $uchr = self::$translitMap[$uchr];
Chris@0 688 } elseif ($uchr >= "\xC3\x80") {
Chris@0 689 $uchr = \Normalizer::normalize($uchr, \Normalizer::NFD);
Chris@0 690
Chris@0 691 if ($uchr[0] < "\x80") {
Chris@0 692 $uchr = $uchr[0];
Chris@0 693 } elseif ($ignore) {
Chris@0 694 continue;
Chris@0 695 } else {
Chris@0 696 return false;
Chris@0 697 }
Chris@17 698 } elseif ($ignore) {
Chris@17 699 continue;
Chris@17 700 } else {
Chris@17 701 return false;
Chris@0 702 }
Chris@0 703
Chris@0 704 $str = $uchr.substr($str, $i);
Chris@16 705 $len = \strlen($str);
Chris@0 706 $i = 0;
Chris@0 707 } elseif (!$ignore) {
Chris@0 708 return false;
Chris@0 709 }
Chris@0 710 }
Chris@0 711
Chris@0 712 return true;
Chris@0 713 }
Chris@0 714
Chris@12 715 private static function qpByteCallback(array $m)
Chris@0 716 {
Chris@16 717 return '='.strtoupper(dechex(\ord($m[0])));
Chris@0 718 }
Chris@0 719
Chris@0 720 private static function pregOffset($offset)
Chris@0 721 {
Chris@0 722 $rx = array();
Chris@0 723 $offset = (int) $offset;
Chris@0 724
Chris@0 725 while ($offset > 65535) {
Chris@0 726 $rx[] = '.{65535}';
Chris@0 727 $offset -= 65535;
Chris@0 728 }
Chris@0 729
Chris@0 730 return implode('', $rx).'.{'.$offset.'}';
Chris@0 731 }
Chris@0 732
Chris@0 733 private static function getData($file)
Chris@0 734 {
Chris@0 735 if (file_exists($file = __DIR__.'/Resources/charset/'.$file.'.php')) {
Chris@0 736 return require $file;
Chris@0 737 }
Chris@0 738
Chris@0 739 return false;
Chris@0 740 }
Chris@0 741 }