Mercurial > hg > isophonics-drupal-site
comparison vendor/symfony/polyfill-iconv/Iconv.php @ 0:4c8ae668cc8c
Initial import (non-working)
author | Chris Cannam |
---|---|
date | Wed, 29 Nov 2017 16:09:58 +0000 |
parents | |
children | 7a779792577d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4c8ae668cc8c |
---|---|
1 <?php | |
2 | |
3 /* | |
4 * This file is part of the Symfony package. | |
5 * | |
6 * (c) Fabien Potencier <fabien@symfony.com> | |
7 * | |
8 * For the full copyright and license information, please view the LICENSE | |
9 * file that was distributed with this source code. | |
10 */ | |
11 | |
12 namespace Symfony\Polyfill\Iconv; | |
13 | |
14 /** | |
15 * iconv implementation in pure PHP, UTF-8 centric. | |
16 * | |
17 * Implemented: | |
18 * - iconv - Convert string to requested character encoding | |
19 * - iconv_mime_decode - Decodes a MIME header field | |
20 * - iconv_mime_decode_headers - Decodes multiple MIME header fields at once | |
21 * - iconv_get_encoding - Retrieve internal configuration variables of iconv extension | |
22 * - iconv_set_encoding - Set current setting for character encoding conversion | |
23 * - iconv_mime_encode - Composes a MIME header field | |
24 * - iconv_strlen - Returns the character count of string | |
25 * - iconv_strpos - Finds position of first occurrence of a needle within a haystack | |
26 * - iconv_strrpos - Finds the last occurrence of a needle within a haystack | |
27 * - iconv_substr - Cut out part of a string | |
28 * | |
29 * Charsets available for conversion are defined by files | |
30 * in the charset/ directory and by Iconv::$alias below. | |
31 * You're welcome to send back any addition you make. | |
32 * | |
33 * @author Nicolas Grekas <p@tchwork.com> | |
34 * | |
35 * @internal | |
36 */ | |
37 final class Iconv | |
38 { | |
39 const ERROR_ILLEGAL_CHARACTER = 'iconv(): Detected an illegal character in input string'; | |
40 const ERROR_WRONG_CHARSET = 'iconv(): Wrong charset, conversion from `%s\' to `%s\' is not allowed'; | |
41 | |
42 public static $inputEncoding = 'utf-8'; | |
43 public static $outputEncoding = 'utf-8'; | |
44 public static $internalEncoding = 'utf-8'; | |
45 | |
46 private static $alias = array( | |
47 'utf8' => 'utf-8', | |
48 'ascii' => 'us-ascii', | |
49 'tis-620' => 'iso-8859-11', | |
50 'cp1250' => 'windows-1250', | |
51 'cp1251' => 'windows-1251', | |
52 'cp1252' => 'windows-1252', | |
53 'cp1253' => 'windows-1253', | |
54 'cp1254' => 'windows-1254', | |
55 'cp1255' => 'windows-1255', | |
56 'cp1256' => 'windows-1256', | |
57 'cp1257' => 'windows-1257', | |
58 'cp1258' => 'windows-1258', | |
59 'shift-jis' => 'cp932', | |
60 'shift_jis' => 'cp932', | |
61 'latin1' => 'iso-8859-1', | |
62 'latin2' => 'iso-8859-2', | |
63 'latin3' => 'iso-8859-3', | |
64 'latin4' => 'iso-8859-4', | |
65 'latin5' => 'iso-8859-9', | |
66 'latin6' => 'iso-8859-10', | |
67 'latin7' => 'iso-8859-13', | |
68 'latin8' => 'iso-8859-14', | |
69 'latin9' => 'iso-8859-15', | |
70 'latin10' => 'iso-8859-16', | |
71 'iso8859-1' => 'iso-8859-1', | |
72 'iso8859-2' => 'iso-8859-2', | |
73 'iso8859-3' => 'iso-8859-3', | |
74 'iso8859-4' => 'iso-8859-4', | |
75 'iso8859-5' => 'iso-8859-5', | |
76 'iso8859-6' => 'iso-8859-6', | |
77 'iso8859-7' => 'iso-8859-7', | |
78 'iso8859-8' => 'iso-8859-8', | |
79 'iso8859-9' => 'iso-8859-9', | |
80 'iso8859-10' => 'iso-8859-10', | |
81 'iso8859-11' => 'iso-8859-11', | |
82 'iso8859-12' => 'iso-8859-12', | |
83 'iso8859-13' => 'iso-8859-13', | |
84 'iso8859-14' => 'iso-8859-14', | |
85 'iso8859-15' => 'iso-8859-15', | |
86 'iso8859-16' => 'iso-8859-16', | |
87 'iso_8859-1' => 'iso-8859-1', | |
88 'iso_8859-2' => 'iso-8859-2', | |
89 'iso_8859-3' => 'iso-8859-3', | |
90 'iso_8859-4' => 'iso-8859-4', | |
91 'iso_8859-5' => 'iso-8859-5', | |
92 'iso_8859-6' => 'iso-8859-6', | |
93 'iso_8859-7' => 'iso-8859-7', | |
94 'iso_8859-8' => 'iso-8859-8', | |
95 'iso_8859-9' => 'iso-8859-9', | |
96 'iso_8859-10' => 'iso-8859-10', | |
97 'iso_8859-11' => 'iso-8859-11', | |
98 'iso_8859-12' => 'iso-8859-12', | |
99 'iso_8859-13' => 'iso-8859-13', | |
100 'iso_8859-14' => 'iso-8859-14', | |
101 'iso_8859-15' => 'iso-8859-15', | |
102 'iso_8859-16' => 'iso-8859-16', | |
103 'iso88591' => 'iso-8859-1', | |
104 'iso88592' => 'iso-8859-2', | |
105 'iso88593' => 'iso-8859-3', | |
106 'iso88594' => 'iso-8859-4', | |
107 'iso88595' => 'iso-8859-5', | |
108 'iso88596' => 'iso-8859-6', | |
109 'iso88597' => 'iso-8859-7', | |
110 'iso88598' => 'iso-8859-8', | |
111 'iso88599' => 'iso-8859-9', | |
112 'iso885910' => 'iso-8859-10', | |
113 'iso885911' => 'iso-8859-11', | |
114 'iso885912' => 'iso-8859-12', | |
115 'iso885913' => 'iso-8859-13', | |
116 'iso885914' => 'iso-8859-14', | |
117 'iso885915' => 'iso-8859-15', | |
118 'iso885916' => 'iso-8859-16', | |
119 ); | |
120 private static $translitMap = array(); | |
121 private static $convertMap = array(); | |
122 private static $errorHandler; | |
123 private static $lastError; | |
124 | |
125 private static $ulenMask = array("\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4); | |
126 private static $isValidUtf8; | |
127 | |
128 public static function iconv($inCharset, $outCharset, $str) | |
129 { | |
130 if ('' === $str .= '') { | |
131 return ''; | |
132 } | |
133 | |
134 // Prepare for //IGNORE and //TRANSLIT | |
135 | |
136 $translit = $ignore = ''; | |
137 | |
138 $outCharset = strtolower($outCharset); | |
139 $inCharset = strtolower($inCharset); | |
140 | |
141 if ('' === $outCharset) { | |
142 $outCharset = 'iso-8859-1'; | |
143 } | |
144 if ('' === $inCharset) { | |
145 $inCharset = 'iso-8859-1'; | |
146 } | |
147 | |
148 if ('//translit' === substr($outCharset, -10)) { | |
149 $translit = '//TRANSLIT'; | |
150 $outCharset = substr($outCharset, 0, -10); | |
151 } | |
152 | |
153 if ('//ignore' === substr($outCharset, -8)) { | |
154 $ignore = '//IGNORE'; | |
155 $outCharset = substr($outCharset, 0, -8); | |
156 } | |
157 | |
158 if ('//translit' === substr($inCharset, -10)) { | |
159 $inCharset = substr($inCharset, 0, -10); | |
160 } | |
161 if ('//ignore' === substr($inCharset, -8)) { | |
162 $inCharset = substr($inCharset, 0, -8); | |
163 } | |
164 | |
165 if (isset(self::$alias[ $inCharset])) { | |
166 $inCharset = self::$alias[ $inCharset]; | |
167 } | |
168 if (isset(self::$alias[$outCharset])) { | |
169 $outCharset = self::$alias[$outCharset]; | |
170 } | |
171 | |
172 // Load charset maps | |
173 | |
174 if (('utf-8' !== $inCharset && !self::loadMap('from.', $inCharset, $inMap)) | |
175 || ('utf-8' !== $outCharset && !self::loadMap('to.', $outCharset, $outMap))) { | |
176 trigger_error(sprintf(self::ERROR_WRONG_CHARSET, $inCharset, $outCharset)); | |
177 | |
178 return false; | |
179 } | |
180 | |
181 if ('utf-8' !== $inCharset) { | |
182 // Convert input to UTF-8 | |
183 $result = ''; | |
184 if (self::mapToUtf8($result, $inMap, $str, $ignore)) { | |
185 $str = $result; | |
186 } else { | |
187 $str = false; | |
188 } | |
189 self::$isValidUtf8 = true; | |
190 } else { | |
191 self::$isValidUtf8 = preg_match('//u', $str); | |
192 | |
193 if (!self::$isValidUtf8 && !$ignore) { | |
194 trigger_error(self::ERROR_ILLEGAL_CHARACTER); | |
195 | |
196 return false; | |
197 } | |
198 | |
199 if ('utf-8' === $outCharset) { | |
200 // UTF-8 validation | |
201 $str = self::utf8ToUtf8($str, $ignore); | |
202 } | |
203 } | |
204 | |
205 if ('utf-8' !== $outCharset && false !== $str) { | |
206 // Convert output to UTF-8 | |
207 $result = ''; | |
208 if (self::mapFromUtf8($result, $outMap, $str, $ignore, $translit)) { | |
209 return $result; | |
210 } | |
211 | |
212 return false; | |
213 } | |
214 | |
215 return $str; | |
216 } | |
217 | |
218 public static function iconv_mime_decode_headers($str, $mode = 0, $charset = null) | |
219 { | |
220 if (null === $charset) { | |
221 $charset = self::$internalEncoding; | |
222 } | |
223 | |
224 if (false !== strpos($str, "\r")) { | |
225 $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n"); | |
226 } | |
227 $str = explode("\n\n", $str, 2); | |
228 | |
229 $headers = array(); | |
230 | |
231 $str = preg_split('/\n(?![ \t])/', $str[0]); | |
232 foreach ($str as $str) { | |
233 $str = self::iconv_mime_decode($str, $mode, $charset); | |
234 if (false === $str) { | |
235 return false; | |
236 } | |
237 $str = explode(':', $str, 2); | |
238 | |
239 if (2 === count($str)) { | |
240 if (isset($headers[$str[0]])) { | |
241 if (!is_array($headers[$str[0]])) { | |
242 $headers[$str[0]] = array($headers[$str[0]]); | |
243 } | |
244 $headers[$str[0]][] = ltrim($str[1]); | |
245 } else { | |
246 $headers[$str[0]] = ltrim($str[1]); | |
247 } | |
248 } | |
249 } | |
250 | |
251 return $headers; | |
252 } | |
253 | |
254 public static function iconv_mime_decode($str, $mode = 0, $charset = null) | |
255 { | |
256 if (null === $charset) { | |
257 $charset = self::$internalEncoding; | |
258 } | |
259 if (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) { | |
260 $charset .= '//IGNORE'; | |
261 } | |
262 | |
263 if (false !== strpos($str, "\r")) { | |
264 $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n"); | |
265 } | |
266 $str = preg_split('/\n(?![ \t])/', rtrim($str), 2); | |
267 $str = preg_replace('/[ \t]*\n[ \t]+/', ' ', rtrim($str[0])); | |
268 $str = preg_split('/=\?([^?]+)\?([bqBQ])\?(.*?)\?=/', $str, -1, PREG_SPLIT_DELIM_CAPTURE); | |
269 | |
270 $result = self::iconv('utf-8', $charset, $str[0]); | |
271 if (false === $result) { | |
272 return false; | |
273 } | |
274 | |
275 $i = 1; | |
276 $len = count($str); | |
277 | |
278 while ($i < $len) { | |
279 $c = strtolower($str[$i]); | |
280 if ((ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) | |
281 && 'utf-8' !== $c | |
282 && !isset(self::$alias[$c]) | |
283 && !self::loadMap('from.', $c, $d)) { | |
284 $d = false; | |
285 } elseif ('B' === strtoupper($str[$i + 1])) { | |
286 $d = base64_decode($str[$i + 2]); | |
287 } else { | |
288 $d = rawurldecode(strtr(str_replace('%', '%25', $str[$i + 2]), '=_', '% ')); | |
289 } | |
290 | |
291 if (false !== $d) { | |
292 if ('' !== $d) { | |
293 if ('' === $d = self::iconv($c, $charset, $d)) { | |
294 $str[$i + 3] = substr($str[$i + 3], 1); | |
295 } else { | |
296 $result .= $d; | |
297 } | |
298 } | |
299 $d = self::iconv('utf-8', $charset, $str[$i + 3]); | |
300 if ('' !== trim($d)) { | |
301 $result .= $d; | |
302 } | |
303 } elseif (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) { | |
304 $result .= "=?{$str[$i]}?{$str[$i + 1]}?{$str[$i + 2]}?={$str[$i + 3]}"; | |
305 } else { | |
306 $result = false; | |
307 break; | |
308 } | |
309 | |
310 $i += 4; | |
311 } | |
312 | |
313 return $result; | |
314 } | |
315 | |
316 public static function iconv_get_encoding($type = 'all') | |
317 { | |
318 switch ($type) { | |
319 case 'input_encoding': return self::$inputEncoding; | |
320 case 'output_encoding': return self::$outputEncoding; | |
321 case 'internal_encoding': return self::$internalEncoding; | |
322 } | |
323 | |
324 return array( | |
325 'input_encoding' => self::$inputEncoding, | |
326 'output_encoding' => self::$outputEncoding, | |
327 'internal_encoding' => self::$internalEncoding, | |
328 ); | |
329 } | |
330 | |
331 public static function iconv_set_encoding($type, $charset) | |
332 { | |
333 switch ($type) { | |
334 case 'input_encoding': self::$inputEncoding = $charset; break; | |
335 case 'output_encoding': self::$outputEncoding = $charset; break; | |
336 case 'internal_encoding': self::$internalEncoding = $charset; break; | |
337 | |
338 default: return false; | |
339 } | |
340 | |
341 return true; | |
342 } | |
343 | |
344 public static function iconv_mime_encode($fieldName, $fieldValue, $pref = null) | |
345 { | |
346 if (!is_array($pref)) { | |
347 $pref = array(); | |
348 } | |
349 | |
350 $pref += array( | |
351 'scheme' => 'B', | |
352 'input-charset' => self::$internalEncoding, | |
353 'output-charset' => self::$internalEncoding, | |
354 'line-length' => 76, | |
355 'line-break-chars' => "\r\n", | |
356 ); | |
357 | |
358 if (preg_match('/[\x80-\xFF]/', $fieldName)) { | |
359 $fieldName = ''; | |
360 } | |
361 | |
362 $scheme = strtoupper(substr($pref['scheme'], 0, 1)); | |
363 $in = strtolower($pref['input-charset']); | |
364 $out = strtolower($pref['output-charset']); | |
365 | |
366 if ('utf-8' !== $in && false === $fieldValue = self::iconv($in, 'utf-8', $fieldValue)) { | |
367 return false; | |
368 } | |
369 | |
370 preg_match_all('/./us', $fieldValue, $chars); | |
371 | |
372 $chars = isset($chars[0]) ? $chars[0] : array(); | |
373 | |
374 $lineBreak = (int) $pref['line-length']; | |
375 $lineStart = "=?{$pref['output-charset']}?{$scheme}?"; | |
376 $lineLength = strlen($fieldName) + 2 + strlen($lineStart) + 2; | |
377 $lineOffset = strlen($lineStart) + 3; | |
378 $lineData = ''; | |
379 | |
380 $fieldValue = array(); | |
381 | |
382 $Q = 'Q' === $scheme; | |
383 | |
384 foreach ($chars as $c) { | |
385 if ('utf-8' !== $out && false === $c = self::iconv('utf-8', $out, $c)) { | |
386 return false; | |
387 } | |
388 | |
389 $o = $Q | |
390 ? $c = preg_replace_callback( | |
391 '/[=_\?\x00-\x1F\x80-\xFF]/', | |
392 array(__CLASS__, 'qpByteCallback'), | |
393 $c | |
394 ) | |
395 : base64_encode($lineData.$c); | |
396 | |
397 if (isset($o[$lineBreak - $lineLength])) { | |
398 if (!$Q) { | |
399 $lineData = base64_encode($lineData); | |
400 } | |
401 $fieldValue[] = $lineStart.$lineData.'?='; | |
402 $lineLength = $lineOffset; | |
403 $lineData = ''; | |
404 } | |
405 | |
406 $lineData .= $c; | |
407 $Q && $lineLength += strlen($c); | |
408 } | |
409 | |
410 if ('' !== $lineData) { | |
411 if (!$Q) { | |
412 $lineData = base64_encode($lineData); | |
413 } | |
414 $fieldValue[] = $lineStart.$lineData.'?='; | |
415 } | |
416 | |
417 return $fieldName.': '.implode($pref['line-break-chars'].' ', $fieldValue); | |
418 } | |
419 | |
420 public static function iconv_strlen($s, $encoding = null) | |
421 { | |
422 static $hasXml = null; | |
423 if (null === $hasXml) { | |
424 $hasXml = extension_loaded('xml'); | |
425 } | |
426 | |
427 if ($hasXml) { | |
428 return self::strlen1($s, $encoding); | |
429 } | |
430 | |
431 return self::strlen2($s, $encoding); | |
432 } | |
433 | |
434 public static function strlen1($s, $encoding = null) | |
435 { | |
436 if (null === $encoding) { | |
437 $encoding = self::$internalEncoding; | |
438 } | |
439 if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) { | |
440 return false; | |
441 } | |
442 | |
443 return strlen(utf8_decode($s)); | |
444 } | |
445 | |
446 public static function strlen2($s, $encoding = null) | |
447 { | |
448 if (null === $encoding) { | |
449 $encoding = self::$internalEncoding; | |
450 } | |
451 if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) { | |
452 return false; | |
453 } | |
454 | |
455 $ulenMask = self::$ulenMask; | |
456 | |
457 $i = 0; | |
458 $j = 0; | |
459 $len = strlen($s); | |
460 | |
461 while ($i < $len) { | |
462 $u = $s[$i] & "\xF0"; | |
463 $i += isset($ulenMask[$u]) ? $ulenMask[$u] : 1; | |
464 ++$j; | |
465 } | |
466 | |
467 return $j; | |
468 } | |
469 | |
470 public static function iconv_strpos($haystack, $needle, $offset = 0, $encoding = null) | |
471 { | |
472 if (null === $encoding) { | |
473 $encoding = self::$internalEncoding; | |
474 } | |
475 | |
476 if (0 !== stripos($encoding, 'utf-8')) { | |
477 if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) { | |
478 return false; | |
479 } | |
480 if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) { | |
481 return false; | |
482 } | |
483 } | |
484 | |
485 if ($offset = (int) $offset) { | |
486 $haystack = self::iconv_substr($haystack, $offset, 2147483647, 'utf-8'); | |
487 } | |
488 $pos = strpos($haystack, $needle); | |
489 | |
490 return false === $pos ? false : ($offset + ($pos ? self::iconv_strlen(substr($haystack, 0, $pos), 'utf-8') : 0)); | |
491 } | |
492 | |
493 public static function iconv_strrpos($haystack, $needle, $encoding = null) | |
494 { | |
495 if (null === $encoding) { | |
496 $encoding = self::$internalEncoding; | |
497 } | |
498 | |
499 if (0 !== stripos($encoding, 'utf-8')) { | |
500 if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) { | |
501 return false; | |
502 } | |
503 if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) { | |
504 return false; | |
505 } | |
506 } | |
507 | |
508 $pos = isset($needle[0]) ? strrpos($haystack, $needle) : false; | |
509 | |
510 return false === $pos ? false : self::iconv_strlen($pos ? substr($haystack, 0, $pos) : $haystack, 'utf-8'); | |
511 } | |
512 | |
513 public static function iconv_substr($s, $start, $length = 2147483647, $encoding = null) | |
514 { | |
515 if (null === $encoding) { | |
516 $encoding = self::$internalEncoding; | |
517 } | |
518 if (0 !== stripos($encoding, 'utf-8')) { | |
519 $encoding = null; | |
520 } elseif (false === $s = self::iconv($encoding, 'utf-8', $s)) { | |
521 return false; | |
522 } | |
523 | |
524 $s .= ''; | |
525 $slen = self::iconv_strlen($s, 'utf-8'); | |
526 $start = (int) $start; | |
527 | |
528 if (0 > $start) { | |
529 $start += $slen; | |
530 } | |
531 if (0 > $start) { | |
532 return false; | |
533 } | |
534 if ($start >= $slen) { | |
535 return false; | |
536 } | |
537 | |
538 $rx = $slen - $start; | |
539 | |
540 if (0 > $length) { | |
541 $length += $rx; | |
542 } | |
543 if (0 === $length) { | |
544 return ''; | |
545 } | |
546 if (0 > $length) { | |
547 return false; | |
548 } | |
549 | |
550 if ($length > $rx) { | |
551 $length = $rx; | |
552 } | |
553 | |
554 $rx = '/^'.($start ? self::pregOffset($start) : '').'('.self::pregOffset($length).')/u'; | |
555 | |
556 $s = preg_match($rx, $s, $s) ? $s[1] : ''; | |
557 | |
558 if (null === $encoding) { | |
559 return $s; | |
560 } | |
561 | |
562 return self::iconv('utf-8', $encoding, $s); | |
563 } | |
564 | |
565 private static function loadMap($type, $charset, &$map) | |
566 { | |
567 if (!isset(self::$convertMap[$type.$charset])) { | |
568 if (false === $map = self::getData($type.$charset)) { | |
569 if ('to.' === $type && self::loadMap('from.', $charset, $map)) { | |
570 $map = array_flip($map); | |
571 } else { | |
572 return false; | |
573 } | |
574 } | |
575 | |
576 self::$convertMap[$type.$charset] = $map; | |
577 } else { | |
578 $map = self::$convertMap[$type.$charset]; | |
579 } | |
580 | |
581 return true; | |
582 } | |
583 | |
584 private static function utf8ToUtf8($str, $ignore) | |
585 { | |
586 $ulenMask = self::$ulenMask; | |
587 $valid = self::$isValidUtf8; | |
588 | |
589 $u = $str; | |
590 $i = $j = 0; | |
591 $len = strlen($str); | |
592 | |
593 while ($i < $len) { | |
594 if ($str[$i] < "\x80") { | |
595 $u[$j++] = $str[$i++]; | |
596 } else { | |
597 $ulen = $str[$i] & "\xF0"; | |
598 $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1; | |
599 $uchr = substr($str, $i, $ulen); | |
600 | |
601 if (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr))) { | |
602 if ($ignore) { | |
603 ++$i; | |
604 continue; | |
605 } | |
606 | |
607 trigger_error(self::ERROR_ILLEGAL_CHARACTER); | |
608 | |
609 return false; | |
610 } else { | |
611 $i += $ulen; | |
612 } | |
613 | |
614 $u[$j++] = $uchr[0]; | |
615 | |
616 isset($uchr[1]) && 0 !== ($u[$j++] = $uchr[1]) | |
617 && isset($uchr[2]) && 0 !== ($u[$j++] = $uchr[2]) | |
618 && isset($uchr[3]) && 0 !== ($u[$j++] = $uchr[3]); | |
619 } | |
620 } | |
621 | |
622 return substr($u, 0, $j); | |
623 } | |
624 | |
625 private static function mapToUtf8(&$result, $map, $str, $ignore) | |
626 { | |
627 $len = strlen($str); | |
628 for ($i = 0; $i < $len; ++$i) { | |
629 if (isset($str[$i + 1], $map[$str[$i].$str[$i + 1]])) { | |
630 $result .= $map[$str[$i].$str[++$i]]; | |
631 } elseif (isset($map[$str[$i]])) { | |
632 $result .= $map[$str[$i]]; | |
633 } elseif (!$ignore) { | |
634 trigger_error(self::ERROR_ILLEGAL_CHARACTER); | |
635 | |
636 return false; | |
637 } | |
638 } | |
639 | |
640 return true; | |
641 } | |
642 | |
643 private static function mapFromUtf8(&$result, $map, $str, $ignore, $translit) | |
644 { | |
645 $ulenMask = self::$ulenMask; | |
646 $valid = self::$isValidUtf8; | |
647 | |
648 if ($translit && !self::$translitMap) { | |
649 self::$translitMap = self::getData('translit'); | |
650 } | |
651 | |
652 $i = 0; | |
653 $len = strlen($str); | |
654 | |
655 while ($i < $len) { | |
656 if ($str[$i] < "\x80") { | |
657 $uchr = $str[$i++]; | |
658 } else { | |
659 $ulen = $str[$i] & "\xF0"; | |
660 $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1; | |
661 $uchr = substr($str, $i, $ulen); | |
662 | |
663 if ($ignore && (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr)))) { | |
664 ++$i; | |
665 continue; | |
666 } else { | |
667 $i += $ulen; | |
668 } | |
669 } | |
670 | |
671 if (isset($map[$uchr])) { | |
672 $result .= $map[$uchr]; | |
673 } elseif ($translit) { | |
674 if (isset(self::$translitMap[$uchr])) { | |
675 $uchr = self::$translitMap[$uchr]; | |
676 } elseif ($uchr >= "\xC3\x80") { | |
677 $uchr = \Normalizer::normalize($uchr, \Normalizer::NFD); | |
678 | |
679 if ($uchr[0] < "\x80") { | |
680 $uchr = $uchr[0]; | |
681 } elseif ($ignore) { | |
682 continue; | |
683 } else { | |
684 return false; | |
685 } | |
686 } | |
687 | |
688 $str = $uchr.substr($str, $i); | |
689 $len = strlen($str); | |
690 $i = 0; | |
691 } elseif (!$ignore) { | |
692 return false; | |
693 } | |
694 } | |
695 | |
696 return true; | |
697 } | |
698 | |
699 private static function qpByteCallback($m) | |
700 { | |
701 return '='.strtoupper(dechex(ord($m[0]))); | |
702 } | |
703 | |
704 private static function pregOffset($offset) | |
705 { | |
706 $rx = array(); | |
707 $offset = (int) $offset; | |
708 | |
709 while ($offset > 65535) { | |
710 $rx[] = '.{65535}'; | |
711 $offset -= 65535; | |
712 } | |
713 | |
714 return implode('', $rx).'.{'.$offset.'}'; | |
715 } | |
716 | |
717 private static function getData($file) | |
718 { | |
719 if (file_exists($file = __DIR__.'/Resources/charset/'.$file.'.php')) { | |
720 return require $file; | |
721 } | |
722 | |
723 return false; | |
724 } | |
725 } |