comparison vendor/symfony/polyfill-iconv/Iconv.php @ 0:4c8ae668cc8c

Initial import (non-working)
author Chris Cannam
date Wed, 29 Nov 2017 16:09:58 +0000
parents
children 7a779792577d
comparison
equal deleted inserted replaced
-1:000000000000 0:4c8ae668cc8c
1 <?php
2
3 /*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12 namespace Symfony\Polyfill\Iconv;
13
14 /**
15 * iconv implementation in pure PHP, UTF-8 centric.
16 *
17 * Implemented:
18 * - iconv - Convert string to requested character encoding
19 * - iconv_mime_decode - Decodes a MIME header field
20 * - iconv_mime_decode_headers - Decodes multiple MIME header fields at once
21 * - iconv_get_encoding - Retrieve internal configuration variables of iconv extension
22 * - iconv_set_encoding - Set current setting for character encoding conversion
23 * - iconv_mime_encode - Composes a MIME header field
24 * - iconv_strlen - Returns the character count of string
25 * - iconv_strpos - Finds position of first occurrence of a needle within a haystack
26 * - iconv_strrpos - Finds the last occurrence of a needle within a haystack
27 * - iconv_substr - Cut out part of a string
28 *
29 * Charsets available for conversion are defined by files
30 * in the charset/ directory and by Iconv::$alias below.
31 * You're welcome to send back any addition you make.
32 *
33 * @author Nicolas Grekas <p@tchwork.com>
34 *
35 * @internal
36 */
37 final class Iconv
38 {
39 const ERROR_ILLEGAL_CHARACTER = 'iconv(): Detected an illegal character in input string';
40 const ERROR_WRONG_CHARSET = 'iconv(): Wrong charset, conversion from `%s\' to `%s\' is not allowed';
41
42 public static $inputEncoding = 'utf-8';
43 public static $outputEncoding = 'utf-8';
44 public static $internalEncoding = 'utf-8';
45
46 private static $alias = array(
47 'utf8' => 'utf-8',
48 'ascii' => 'us-ascii',
49 'tis-620' => 'iso-8859-11',
50 'cp1250' => 'windows-1250',
51 'cp1251' => 'windows-1251',
52 'cp1252' => 'windows-1252',
53 'cp1253' => 'windows-1253',
54 'cp1254' => 'windows-1254',
55 'cp1255' => 'windows-1255',
56 'cp1256' => 'windows-1256',
57 'cp1257' => 'windows-1257',
58 'cp1258' => 'windows-1258',
59 'shift-jis' => 'cp932',
60 'shift_jis' => 'cp932',
61 'latin1' => 'iso-8859-1',
62 'latin2' => 'iso-8859-2',
63 'latin3' => 'iso-8859-3',
64 'latin4' => 'iso-8859-4',
65 'latin5' => 'iso-8859-9',
66 'latin6' => 'iso-8859-10',
67 'latin7' => 'iso-8859-13',
68 'latin8' => 'iso-8859-14',
69 'latin9' => 'iso-8859-15',
70 'latin10' => 'iso-8859-16',
71 'iso8859-1' => 'iso-8859-1',
72 'iso8859-2' => 'iso-8859-2',
73 'iso8859-3' => 'iso-8859-3',
74 'iso8859-4' => 'iso-8859-4',
75 'iso8859-5' => 'iso-8859-5',
76 'iso8859-6' => 'iso-8859-6',
77 'iso8859-7' => 'iso-8859-7',
78 'iso8859-8' => 'iso-8859-8',
79 'iso8859-9' => 'iso-8859-9',
80 'iso8859-10' => 'iso-8859-10',
81 'iso8859-11' => 'iso-8859-11',
82 'iso8859-12' => 'iso-8859-12',
83 'iso8859-13' => 'iso-8859-13',
84 'iso8859-14' => 'iso-8859-14',
85 'iso8859-15' => 'iso-8859-15',
86 'iso8859-16' => 'iso-8859-16',
87 'iso_8859-1' => 'iso-8859-1',
88 'iso_8859-2' => 'iso-8859-2',
89 'iso_8859-3' => 'iso-8859-3',
90 'iso_8859-4' => 'iso-8859-4',
91 'iso_8859-5' => 'iso-8859-5',
92 'iso_8859-6' => 'iso-8859-6',
93 'iso_8859-7' => 'iso-8859-7',
94 'iso_8859-8' => 'iso-8859-8',
95 'iso_8859-9' => 'iso-8859-9',
96 'iso_8859-10' => 'iso-8859-10',
97 'iso_8859-11' => 'iso-8859-11',
98 'iso_8859-12' => 'iso-8859-12',
99 'iso_8859-13' => 'iso-8859-13',
100 'iso_8859-14' => 'iso-8859-14',
101 'iso_8859-15' => 'iso-8859-15',
102 'iso_8859-16' => 'iso-8859-16',
103 'iso88591' => 'iso-8859-1',
104 'iso88592' => 'iso-8859-2',
105 'iso88593' => 'iso-8859-3',
106 'iso88594' => 'iso-8859-4',
107 'iso88595' => 'iso-8859-5',
108 'iso88596' => 'iso-8859-6',
109 'iso88597' => 'iso-8859-7',
110 'iso88598' => 'iso-8859-8',
111 'iso88599' => 'iso-8859-9',
112 'iso885910' => 'iso-8859-10',
113 'iso885911' => 'iso-8859-11',
114 'iso885912' => 'iso-8859-12',
115 'iso885913' => 'iso-8859-13',
116 'iso885914' => 'iso-8859-14',
117 'iso885915' => 'iso-8859-15',
118 'iso885916' => 'iso-8859-16',
119 );
120 private static $translitMap = array();
121 private static $convertMap = array();
122 private static $errorHandler;
123 private static $lastError;
124
125 private static $ulenMask = array("\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4);
126 private static $isValidUtf8;
127
128 public static function iconv($inCharset, $outCharset, $str)
129 {
130 if ('' === $str .= '') {
131 return '';
132 }
133
134 // Prepare for //IGNORE and //TRANSLIT
135
136 $translit = $ignore = '';
137
138 $outCharset = strtolower($outCharset);
139 $inCharset = strtolower($inCharset);
140
141 if ('' === $outCharset) {
142 $outCharset = 'iso-8859-1';
143 }
144 if ('' === $inCharset) {
145 $inCharset = 'iso-8859-1';
146 }
147
148 if ('//translit' === substr($outCharset, -10)) {
149 $translit = '//TRANSLIT';
150 $outCharset = substr($outCharset, 0, -10);
151 }
152
153 if ('//ignore' === substr($outCharset, -8)) {
154 $ignore = '//IGNORE';
155 $outCharset = substr($outCharset, 0, -8);
156 }
157
158 if ('//translit' === substr($inCharset, -10)) {
159 $inCharset = substr($inCharset, 0, -10);
160 }
161 if ('//ignore' === substr($inCharset, -8)) {
162 $inCharset = substr($inCharset, 0, -8);
163 }
164
165 if (isset(self::$alias[ $inCharset])) {
166 $inCharset = self::$alias[ $inCharset];
167 }
168 if (isset(self::$alias[$outCharset])) {
169 $outCharset = self::$alias[$outCharset];
170 }
171
172 // Load charset maps
173
174 if (('utf-8' !== $inCharset && !self::loadMap('from.', $inCharset, $inMap))
175 || ('utf-8' !== $outCharset && !self::loadMap('to.', $outCharset, $outMap))) {
176 trigger_error(sprintf(self::ERROR_WRONG_CHARSET, $inCharset, $outCharset));
177
178 return false;
179 }
180
181 if ('utf-8' !== $inCharset) {
182 // Convert input to UTF-8
183 $result = '';
184 if (self::mapToUtf8($result, $inMap, $str, $ignore)) {
185 $str = $result;
186 } else {
187 $str = false;
188 }
189 self::$isValidUtf8 = true;
190 } else {
191 self::$isValidUtf8 = preg_match('//u', $str);
192
193 if (!self::$isValidUtf8 && !$ignore) {
194 trigger_error(self::ERROR_ILLEGAL_CHARACTER);
195
196 return false;
197 }
198
199 if ('utf-8' === $outCharset) {
200 // UTF-8 validation
201 $str = self::utf8ToUtf8($str, $ignore);
202 }
203 }
204
205 if ('utf-8' !== $outCharset && false !== $str) {
206 // Convert output to UTF-8
207 $result = '';
208 if (self::mapFromUtf8($result, $outMap, $str, $ignore, $translit)) {
209 return $result;
210 }
211
212 return false;
213 }
214
215 return $str;
216 }
217
218 public static function iconv_mime_decode_headers($str, $mode = 0, $charset = null)
219 {
220 if (null === $charset) {
221 $charset = self::$internalEncoding;
222 }
223
224 if (false !== strpos($str, "\r")) {
225 $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");
226 }
227 $str = explode("\n\n", $str, 2);
228
229 $headers = array();
230
231 $str = preg_split('/\n(?![ \t])/', $str[0]);
232 foreach ($str as $str) {
233 $str = self::iconv_mime_decode($str, $mode, $charset);
234 if (false === $str) {
235 return false;
236 }
237 $str = explode(':', $str, 2);
238
239 if (2 === count($str)) {
240 if (isset($headers[$str[0]])) {
241 if (!is_array($headers[$str[0]])) {
242 $headers[$str[0]] = array($headers[$str[0]]);
243 }
244 $headers[$str[0]][] = ltrim($str[1]);
245 } else {
246 $headers[$str[0]] = ltrim($str[1]);
247 }
248 }
249 }
250
251 return $headers;
252 }
253
254 public static function iconv_mime_decode($str, $mode = 0, $charset = null)
255 {
256 if (null === $charset) {
257 $charset = self::$internalEncoding;
258 }
259 if (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {
260 $charset .= '//IGNORE';
261 }
262
263 if (false !== strpos($str, "\r")) {
264 $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");
265 }
266 $str = preg_split('/\n(?![ \t])/', rtrim($str), 2);
267 $str = preg_replace('/[ \t]*\n[ \t]+/', ' ', rtrim($str[0]));
268 $str = preg_split('/=\?([^?]+)\?([bqBQ])\?(.*?)\?=/', $str, -1, PREG_SPLIT_DELIM_CAPTURE);
269
270 $result = self::iconv('utf-8', $charset, $str[0]);
271 if (false === $result) {
272 return false;
273 }
274
275 $i = 1;
276 $len = count($str);
277
278 while ($i < $len) {
279 $c = strtolower($str[$i]);
280 if ((ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode)
281 && 'utf-8' !== $c
282 && !isset(self::$alias[$c])
283 && !self::loadMap('from.', $c, $d)) {
284 $d = false;
285 } elseif ('B' === strtoupper($str[$i + 1])) {
286 $d = base64_decode($str[$i + 2]);
287 } else {
288 $d = rawurldecode(strtr(str_replace('%', '%25', $str[$i + 2]), '=_', '% '));
289 }
290
291 if (false !== $d) {
292 if ('' !== $d) {
293 if ('' === $d = self::iconv($c, $charset, $d)) {
294 $str[$i + 3] = substr($str[$i + 3], 1);
295 } else {
296 $result .= $d;
297 }
298 }
299 $d = self::iconv('utf-8', $charset, $str[$i + 3]);
300 if ('' !== trim($d)) {
301 $result .= $d;
302 }
303 } elseif (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {
304 $result .= "=?{$str[$i]}?{$str[$i + 1]}?{$str[$i + 2]}?={$str[$i + 3]}";
305 } else {
306 $result = false;
307 break;
308 }
309
310 $i += 4;
311 }
312
313 return $result;
314 }
315
316 public static function iconv_get_encoding($type = 'all')
317 {
318 switch ($type) {
319 case 'input_encoding': return self::$inputEncoding;
320 case 'output_encoding': return self::$outputEncoding;
321 case 'internal_encoding': return self::$internalEncoding;
322 }
323
324 return array(
325 'input_encoding' => self::$inputEncoding,
326 'output_encoding' => self::$outputEncoding,
327 'internal_encoding' => self::$internalEncoding,
328 );
329 }
330
331 public static function iconv_set_encoding($type, $charset)
332 {
333 switch ($type) {
334 case 'input_encoding': self::$inputEncoding = $charset; break;
335 case 'output_encoding': self::$outputEncoding = $charset; break;
336 case 'internal_encoding': self::$internalEncoding = $charset; break;
337
338 default: return false;
339 }
340
341 return true;
342 }
343
344 public static function iconv_mime_encode($fieldName, $fieldValue, $pref = null)
345 {
346 if (!is_array($pref)) {
347 $pref = array();
348 }
349
350 $pref += array(
351 'scheme' => 'B',
352 'input-charset' => self::$internalEncoding,
353 'output-charset' => self::$internalEncoding,
354 'line-length' => 76,
355 'line-break-chars' => "\r\n",
356 );
357
358 if (preg_match('/[\x80-\xFF]/', $fieldName)) {
359 $fieldName = '';
360 }
361
362 $scheme = strtoupper(substr($pref['scheme'], 0, 1));
363 $in = strtolower($pref['input-charset']);
364 $out = strtolower($pref['output-charset']);
365
366 if ('utf-8' !== $in && false === $fieldValue = self::iconv($in, 'utf-8', $fieldValue)) {
367 return false;
368 }
369
370 preg_match_all('/./us', $fieldValue, $chars);
371
372 $chars = isset($chars[0]) ? $chars[0] : array();
373
374 $lineBreak = (int) $pref['line-length'];
375 $lineStart = "=?{$pref['output-charset']}?{$scheme}?";
376 $lineLength = strlen($fieldName) + 2 + strlen($lineStart) + 2;
377 $lineOffset = strlen($lineStart) + 3;
378 $lineData = '';
379
380 $fieldValue = array();
381
382 $Q = 'Q' === $scheme;
383
384 foreach ($chars as $c) {
385 if ('utf-8' !== $out && false === $c = self::iconv('utf-8', $out, $c)) {
386 return false;
387 }
388
389 $o = $Q
390 ? $c = preg_replace_callback(
391 '/[=_\?\x00-\x1F\x80-\xFF]/',
392 array(__CLASS__, 'qpByteCallback'),
393 $c
394 )
395 : base64_encode($lineData.$c);
396
397 if (isset($o[$lineBreak - $lineLength])) {
398 if (!$Q) {
399 $lineData = base64_encode($lineData);
400 }
401 $fieldValue[] = $lineStart.$lineData.'?=';
402 $lineLength = $lineOffset;
403 $lineData = '';
404 }
405
406 $lineData .= $c;
407 $Q && $lineLength += strlen($c);
408 }
409
410 if ('' !== $lineData) {
411 if (!$Q) {
412 $lineData = base64_encode($lineData);
413 }
414 $fieldValue[] = $lineStart.$lineData.'?=';
415 }
416
417 return $fieldName.': '.implode($pref['line-break-chars'].' ', $fieldValue);
418 }
419
420 public static function iconv_strlen($s, $encoding = null)
421 {
422 static $hasXml = null;
423 if (null === $hasXml) {
424 $hasXml = extension_loaded('xml');
425 }
426
427 if ($hasXml) {
428 return self::strlen1($s, $encoding);
429 }
430
431 return self::strlen2($s, $encoding);
432 }
433
434 public static function strlen1($s, $encoding = null)
435 {
436 if (null === $encoding) {
437 $encoding = self::$internalEncoding;
438 }
439 if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {
440 return false;
441 }
442
443 return strlen(utf8_decode($s));
444 }
445
446 public static function strlen2($s, $encoding = null)
447 {
448 if (null === $encoding) {
449 $encoding = self::$internalEncoding;
450 }
451 if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {
452 return false;
453 }
454
455 $ulenMask = self::$ulenMask;
456
457 $i = 0;
458 $j = 0;
459 $len = strlen($s);
460
461 while ($i < $len) {
462 $u = $s[$i] & "\xF0";
463 $i += isset($ulenMask[$u]) ? $ulenMask[$u] : 1;
464 ++$j;
465 }
466
467 return $j;
468 }
469
470 public static function iconv_strpos($haystack, $needle, $offset = 0, $encoding = null)
471 {
472 if (null === $encoding) {
473 $encoding = self::$internalEncoding;
474 }
475
476 if (0 !== stripos($encoding, 'utf-8')) {
477 if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {
478 return false;
479 }
480 if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {
481 return false;
482 }
483 }
484
485 if ($offset = (int) $offset) {
486 $haystack = self::iconv_substr($haystack, $offset, 2147483647, 'utf-8');
487 }
488 $pos = strpos($haystack, $needle);
489
490 return false === $pos ? false : ($offset + ($pos ? self::iconv_strlen(substr($haystack, 0, $pos), 'utf-8') : 0));
491 }
492
493 public static function iconv_strrpos($haystack, $needle, $encoding = null)
494 {
495 if (null === $encoding) {
496 $encoding = self::$internalEncoding;
497 }
498
499 if (0 !== stripos($encoding, 'utf-8')) {
500 if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {
501 return false;
502 }
503 if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {
504 return false;
505 }
506 }
507
508 $pos = isset($needle[0]) ? strrpos($haystack, $needle) : false;
509
510 return false === $pos ? false : self::iconv_strlen($pos ? substr($haystack, 0, $pos) : $haystack, 'utf-8');
511 }
512
513 public static function iconv_substr($s, $start, $length = 2147483647, $encoding = null)
514 {
515 if (null === $encoding) {
516 $encoding = self::$internalEncoding;
517 }
518 if (0 !== stripos($encoding, 'utf-8')) {
519 $encoding = null;
520 } elseif (false === $s = self::iconv($encoding, 'utf-8', $s)) {
521 return false;
522 }
523
524 $s .= '';
525 $slen = self::iconv_strlen($s, 'utf-8');
526 $start = (int) $start;
527
528 if (0 > $start) {
529 $start += $slen;
530 }
531 if (0 > $start) {
532 return false;
533 }
534 if ($start >= $slen) {
535 return false;
536 }
537
538 $rx = $slen - $start;
539
540 if (0 > $length) {
541 $length += $rx;
542 }
543 if (0 === $length) {
544 return '';
545 }
546 if (0 > $length) {
547 return false;
548 }
549
550 if ($length > $rx) {
551 $length = $rx;
552 }
553
554 $rx = '/^'.($start ? self::pregOffset($start) : '').'('.self::pregOffset($length).')/u';
555
556 $s = preg_match($rx, $s, $s) ? $s[1] : '';
557
558 if (null === $encoding) {
559 return $s;
560 }
561
562 return self::iconv('utf-8', $encoding, $s);
563 }
564
565 private static function loadMap($type, $charset, &$map)
566 {
567 if (!isset(self::$convertMap[$type.$charset])) {
568 if (false === $map = self::getData($type.$charset)) {
569 if ('to.' === $type && self::loadMap('from.', $charset, $map)) {
570 $map = array_flip($map);
571 } else {
572 return false;
573 }
574 }
575
576 self::$convertMap[$type.$charset] = $map;
577 } else {
578 $map = self::$convertMap[$type.$charset];
579 }
580
581 return true;
582 }
583
584 private static function utf8ToUtf8($str, $ignore)
585 {
586 $ulenMask = self::$ulenMask;
587 $valid = self::$isValidUtf8;
588
589 $u = $str;
590 $i = $j = 0;
591 $len = strlen($str);
592
593 while ($i < $len) {
594 if ($str[$i] < "\x80") {
595 $u[$j++] = $str[$i++];
596 } else {
597 $ulen = $str[$i] & "\xF0";
598 $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1;
599 $uchr = substr($str, $i, $ulen);
600
601 if (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr))) {
602 if ($ignore) {
603 ++$i;
604 continue;
605 }
606
607 trigger_error(self::ERROR_ILLEGAL_CHARACTER);
608
609 return false;
610 } else {
611 $i += $ulen;
612 }
613
614 $u[$j++] = $uchr[0];
615
616 isset($uchr[1]) && 0 !== ($u[$j++] = $uchr[1])
617 && isset($uchr[2]) && 0 !== ($u[$j++] = $uchr[2])
618 && isset($uchr[3]) && 0 !== ($u[$j++] = $uchr[3]);
619 }
620 }
621
622 return substr($u, 0, $j);
623 }
624
625 private static function mapToUtf8(&$result, $map, $str, $ignore)
626 {
627 $len = strlen($str);
628 for ($i = 0; $i < $len; ++$i) {
629 if (isset($str[$i + 1], $map[$str[$i].$str[$i + 1]])) {
630 $result .= $map[$str[$i].$str[++$i]];
631 } elseif (isset($map[$str[$i]])) {
632 $result .= $map[$str[$i]];
633 } elseif (!$ignore) {
634 trigger_error(self::ERROR_ILLEGAL_CHARACTER);
635
636 return false;
637 }
638 }
639
640 return true;
641 }
642
643 private static function mapFromUtf8(&$result, $map, $str, $ignore, $translit)
644 {
645 $ulenMask = self::$ulenMask;
646 $valid = self::$isValidUtf8;
647
648 if ($translit && !self::$translitMap) {
649 self::$translitMap = self::getData('translit');
650 }
651
652 $i = 0;
653 $len = strlen($str);
654
655 while ($i < $len) {
656 if ($str[$i] < "\x80") {
657 $uchr = $str[$i++];
658 } else {
659 $ulen = $str[$i] & "\xF0";
660 $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1;
661 $uchr = substr($str, $i, $ulen);
662
663 if ($ignore && (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr)))) {
664 ++$i;
665 continue;
666 } else {
667 $i += $ulen;
668 }
669 }
670
671 if (isset($map[$uchr])) {
672 $result .= $map[$uchr];
673 } elseif ($translit) {
674 if (isset(self::$translitMap[$uchr])) {
675 $uchr = self::$translitMap[$uchr];
676 } elseif ($uchr >= "\xC3\x80") {
677 $uchr = \Normalizer::normalize($uchr, \Normalizer::NFD);
678
679 if ($uchr[0] < "\x80") {
680 $uchr = $uchr[0];
681 } elseif ($ignore) {
682 continue;
683 } else {
684 return false;
685 }
686 }
687
688 $str = $uchr.substr($str, $i);
689 $len = strlen($str);
690 $i = 0;
691 } elseif (!$ignore) {
692 return false;
693 }
694 }
695
696 return true;
697 }
698
699 private static function qpByteCallback($m)
700 {
701 return '='.strtoupper(dechex(ord($m[0])));
702 }
703
704 private static function pregOffset($offset)
705 {
706 $rx = array();
707 $offset = (int) $offset;
708
709 while ($offset > 65535) {
710 $rx[] = '.{65535}';
711 $offset -= 65535;
712 }
713
714 return implode('', $rx).'.{'.$offset.'}';
715 }
716
717 private static function getData($file)
718 {
719 if (file_exists($file = __DIR__.'/Resources/charset/'.$file.'.php')) {
720 return require $file;
721 }
722
723 return false;
724 }
725 }