Chris@0
|
1 <?php
|
Chris@0
|
2
|
Chris@0
|
3 /*
|
Chris@0
|
4 * This file is part of the Symfony package.
|
Chris@0
|
5 *
|
Chris@0
|
6 * (c) Fabien Potencier <fabien@symfony.com>
|
Chris@0
|
7 *
|
Chris@0
|
8 * For the full copyright and license information, please view the LICENSE
|
Chris@0
|
9 * file that was distributed with this source code.
|
Chris@0
|
10 */
|
Chris@0
|
11
|
Chris@0
|
12 namespace Symfony\Polyfill\Iconv;
|
Chris@0
|
13
|
Chris@0
|
14 /**
|
Chris@0
|
15 * iconv implementation in pure PHP, UTF-8 centric.
|
Chris@0
|
16 *
|
Chris@0
|
17 * Implemented:
|
Chris@0
|
18 * - iconv - Convert string to requested character encoding
|
Chris@0
|
19 * - iconv_mime_decode - Decodes a MIME header field
|
Chris@0
|
20 * - iconv_mime_decode_headers - Decodes multiple MIME header fields at once
|
Chris@0
|
21 * - iconv_get_encoding - Retrieve internal configuration variables of iconv extension
|
Chris@0
|
22 * - iconv_set_encoding - Set current setting for character encoding conversion
|
Chris@0
|
23 * - iconv_mime_encode - Composes a MIME header field
|
Chris@0
|
24 * - iconv_strlen - Returns the character count of string
|
Chris@0
|
25 * - iconv_strpos - Finds position of first occurrence of a needle within a haystack
|
Chris@0
|
26 * - iconv_strrpos - Finds the last occurrence of a needle within a haystack
|
Chris@0
|
27 * - iconv_substr - Cut out part of a string
|
Chris@0
|
28 *
|
Chris@0
|
29 * Charsets available for conversion are defined by files
|
Chris@0
|
30 * in the charset/ directory and by Iconv::$alias below.
|
Chris@0
|
31 * You're welcome to send back any addition you make.
|
Chris@0
|
32 *
|
Chris@0
|
33 * @author Nicolas Grekas <p@tchwork.com>
|
Chris@0
|
34 *
|
Chris@0
|
35 * @internal
|
Chris@0
|
36 */
|
Chris@0
|
37 final class Iconv
|
Chris@0
|
38 {
|
Chris@0
|
39 const ERROR_ILLEGAL_CHARACTER = 'iconv(): Detected an illegal character in input string';
|
Chris@0
|
40 const ERROR_WRONG_CHARSET = 'iconv(): Wrong charset, conversion from `%s\' to `%s\' is not allowed';
|
Chris@0
|
41
|
Chris@0
|
42 public static $inputEncoding = 'utf-8';
|
Chris@0
|
43 public static $outputEncoding = 'utf-8';
|
Chris@0
|
44 public static $internalEncoding = 'utf-8';
|
Chris@0
|
45
|
Chris@0
|
46 private static $alias = array(
|
Chris@0
|
47 'utf8' => 'utf-8',
|
Chris@0
|
48 'ascii' => 'us-ascii',
|
Chris@0
|
49 'tis-620' => 'iso-8859-11',
|
Chris@0
|
50 'cp1250' => 'windows-1250',
|
Chris@0
|
51 'cp1251' => 'windows-1251',
|
Chris@0
|
52 'cp1252' => 'windows-1252',
|
Chris@0
|
53 'cp1253' => 'windows-1253',
|
Chris@0
|
54 'cp1254' => 'windows-1254',
|
Chris@0
|
55 'cp1255' => 'windows-1255',
|
Chris@0
|
56 'cp1256' => 'windows-1256',
|
Chris@0
|
57 'cp1257' => 'windows-1257',
|
Chris@0
|
58 'cp1258' => 'windows-1258',
|
Chris@0
|
59 'shift-jis' => 'cp932',
|
Chris@0
|
60 'shift_jis' => 'cp932',
|
Chris@0
|
61 'latin1' => 'iso-8859-1',
|
Chris@0
|
62 'latin2' => 'iso-8859-2',
|
Chris@0
|
63 'latin3' => 'iso-8859-3',
|
Chris@0
|
64 'latin4' => 'iso-8859-4',
|
Chris@0
|
65 'latin5' => 'iso-8859-9',
|
Chris@0
|
66 'latin6' => 'iso-8859-10',
|
Chris@0
|
67 'latin7' => 'iso-8859-13',
|
Chris@0
|
68 'latin8' => 'iso-8859-14',
|
Chris@0
|
69 'latin9' => 'iso-8859-15',
|
Chris@0
|
70 'latin10' => 'iso-8859-16',
|
Chris@0
|
71 'iso8859-1' => 'iso-8859-1',
|
Chris@0
|
72 'iso8859-2' => 'iso-8859-2',
|
Chris@0
|
73 'iso8859-3' => 'iso-8859-3',
|
Chris@0
|
74 'iso8859-4' => 'iso-8859-4',
|
Chris@0
|
75 'iso8859-5' => 'iso-8859-5',
|
Chris@0
|
76 'iso8859-6' => 'iso-8859-6',
|
Chris@0
|
77 'iso8859-7' => 'iso-8859-7',
|
Chris@0
|
78 'iso8859-8' => 'iso-8859-8',
|
Chris@0
|
79 'iso8859-9' => 'iso-8859-9',
|
Chris@0
|
80 'iso8859-10' => 'iso-8859-10',
|
Chris@0
|
81 'iso8859-11' => 'iso-8859-11',
|
Chris@0
|
82 'iso8859-12' => 'iso-8859-12',
|
Chris@0
|
83 'iso8859-13' => 'iso-8859-13',
|
Chris@0
|
84 'iso8859-14' => 'iso-8859-14',
|
Chris@0
|
85 'iso8859-15' => 'iso-8859-15',
|
Chris@0
|
86 'iso8859-16' => 'iso-8859-16',
|
Chris@0
|
87 'iso_8859-1' => 'iso-8859-1',
|
Chris@0
|
88 'iso_8859-2' => 'iso-8859-2',
|
Chris@0
|
89 'iso_8859-3' => 'iso-8859-3',
|
Chris@0
|
90 'iso_8859-4' => 'iso-8859-4',
|
Chris@0
|
91 'iso_8859-5' => 'iso-8859-5',
|
Chris@0
|
92 'iso_8859-6' => 'iso-8859-6',
|
Chris@0
|
93 'iso_8859-7' => 'iso-8859-7',
|
Chris@0
|
94 'iso_8859-8' => 'iso-8859-8',
|
Chris@0
|
95 'iso_8859-9' => 'iso-8859-9',
|
Chris@0
|
96 'iso_8859-10' => 'iso-8859-10',
|
Chris@0
|
97 'iso_8859-11' => 'iso-8859-11',
|
Chris@0
|
98 'iso_8859-12' => 'iso-8859-12',
|
Chris@0
|
99 'iso_8859-13' => 'iso-8859-13',
|
Chris@0
|
100 'iso_8859-14' => 'iso-8859-14',
|
Chris@0
|
101 'iso_8859-15' => 'iso-8859-15',
|
Chris@0
|
102 'iso_8859-16' => 'iso-8859-16',
|
Chris@0
|
103 'iso88591' => 'iso-8859-1',
|
Chris@0
|
104 'iso88592' => 'iso-8859-2',
|
Chris@0
|
105 'iso88593' => 'iso-8859-3',
|
Chris@0
|
106 'iso88594' => 'iso-8859-4',
|
Chris@0
|
107 'iso88595' => 'iso-8859-5',
|
Chris@0
|
108 'iso88596' => 'iso-8859-6',
|
Chris@0
|
109 'iso88597' => 'iso-8859-7',
|
Chris@0
|
110 'iso88598' => 'iso-8859-8',
|
Chris@0
|
111 'iso88599' => 'iso-8859-9',
|
Chris@0
|
112 'iso885910' => 'iso-8859-10',
|
Chris@0
|
113 'iso885911' => 'iso-8859-11',
|
Chris@0
|
114 'iso885912' => 'iso-8859-12',
|
Chris@0
|
115 'iso885913' => 'iso-8859-13',
|
Chris@0
|
116 'iso885914' => 'iso-8859-14',
|
Chris@0
|
117 'iso885915' => 'iso-8859-15',
|
Chris@0
|
118 'iso885916' => 'iso-8859-16',
|
Chris@0
|
119 );
|
Chris@0
|
120 private static $translitMap = array();
|
Chris@0
|
121 private static $convertMap = array();
|
Chris@0
|
122 private static $errorHandler;
|
Chris@0
|
123 private static $lastError;
|
Chris@0
|
124
|
Chris@0
|
125 private static $ulenMask = array("\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4);
|
Chris@0
|
126 private static $isValidUtf8;
|
Chris@0
|
127
|
Chris@0
|
128 public static function iconv($inCharset, $outCharset, $str)
|
Chris@0
|
129 {
|
Chris@12
|
130 $str = (string) $str;
|
Chris@12
|
131 if ('' === $str) {
|
Chris@0
|
132 return '';
|
Chris@0
|
133 }
|
Chris@0
|
134
|
Chris@0
|
135 // Prepare for //IGNORE and //TRANSLIT
|
Chris@0
|
136
|
Chris@0
|
137 $translit = $ignore = '';
|
Chris@0
|
138
|
Chris@0
|
139 $outCharset = strtolower($outCharset);
|
Chris@0
|
140 $inCharset = strtolower($inCharset);
|
Chris@0
|
141
|
Chris@0
|
142 if ('' === $outCharset) {
|
Chris@0
|
143 $outCharset = 'iso-8859-1';
|
Chris@0
|
144 }
|
Chris@0
|
145 if ('' === $inCharset) {
|
Chris@0
|
146 $inCharset = 'iso-8859-1';
|
Chris@0
|
147 }
|
Chris@0
|
148
|
Chris@12
|
149 do {
|
Chris@12
|
150 $loop = false;
|
Chris@0
|
151
|
Chris@12
|
152 if ('//translit' === substr($outCharset, -10)) {
|
Chris@12
|
153 $loop = $translit = true;
|
Chris@12
|
154 $outCharset = substr($outCharset, 0, -10);
|
Chris@12
|
155 }
|
Chris@0
|
156
|
Chris@12
|
157 if ('//ignore' === substr($outCharset, -8)) {
|
Chris@12
|
158 $loop = $ignore = true;
|
Chris@12
|
159 $outCharset = substr($outCharset, 0, -8);
|
Chris@12
|
160 }
|
Chris@12
|
161 } while ($loop);
|
Chris@12
|
162
|
Chris@12
|
163 do {
|
Chris@12
|
164 $loop = false;
|
Chris@12
|
165
|
Chris@12
|
166 if ('//translit' === substr($inCharset, -10)) {
|
Chris@12
|
167 $loop = true;
|
Chris@12
|
168 $inCharset = substr($inCharset, 0, -10);
|
Chris@12
|
169 }
|
Chris@12
|
170
|
Chris@12
|
171 if ('//ignore' === substr($inCharset, -8)) {
|
Chris@12
|
172 $loop = true;
|
Chris@12
|
173 $inCharset = substr($inCharset, 0, -8);
|
Chris@12
|
174 }
|
Chris@12
|
175 } while ($loop);
|
Chris@0
|
176
|
Chris@17
|
177 if (isset(self::$alias[$inCharset])) {
|
Chris@17
|
178 $inCharset = self::$alias[$inCharset];
|
Chris@0
|
179 }
|
Chris@0
|
180 if (isset(self::$alias[$outCharset])) {
|
Chris@0
|
181 $outCharset = self::$alias[$outCharset];
|
Chris@0
|
182 }
|
Chris@0
|
183
|
Chris@0
|
184 // Load charset maps
|
Chris@0
|
185
|
Chris@0
|
186 if (('utf-8' !== $inCharset && !self::loadMap('from.', $inCharset, $inMap))
|
Chris@0
|
187 || ('utf-8' !== $outCharset && !self::loadMap('to.', $outCharset, $outMap))) {
|
Chris@0
|
188 trigger_error(sprintf(self::ERROR_WRONG_CHARSET, $inCharset, $outCharset));
|
Chris@0
|
189
|
Chris@0
|
190 return false;
|
Chris@0
|
191 }
|
Chris@0
|
192
|
Chris@0
|
193 if ('utf-8' !== $inCharset) {
|
Chris@0
|
194 // Convert input to UTF-8
|
Chris@0
|
195 $result = '';
|
Chris@0
|
196 if (self::mapToUtf8($result, $inMap, $str, $ignore)) {
|
Chris@0
|
197 $str = $result;
|
Chris@0
|
198 } else {
|
Chris@0
|
199 $str = false;
|
Chris@0
|
200 }
|
Chris@0
|
201 self::$isValidUtf8 = true;
|
Chris@0
|
202 } else {
|
Chris@0
|
203 self::$isValidUtf8 = preg_match('//u', $str);
|
Chris@0
|
204
|
Chris@0
|
205 if (!self::$isValidUtf8 && !$ignore) {
|
Chris@0
|
206 trigger_error(self::ERROR_ILLEGAL_CHARACTER);
|
Chris@0
|
207
|
Chris@0
|
208 return false;
|
Chris@0
|
209 }
|
Chris@0
|
210
|
Chris@0
|
211 if ('utf-8' === $outCharset) {
|
Chris@0
|
212 // UTF-8 validation
|
Chris@0
|
213 $str = self::utf8ToUtf8($str, $ignore);
|
Chris@0
|
214 }
|
Chris@0
|
215 }
|
Chris@0
|
216
|
Chris@0
|
217 if ('utf-8' !== $outCharset && false !== $str) {
|
Chris@0
|
218 // Convert output to UTF-8
|
Chris@0
|
219 $result = '';
|
Chris@0
|
220 if (self::mapFromUtf8($result, $outMap, $str, $ignore, $translit)) {
|
Chris@0
|
221 return $result;
|
Chris@0
|
222 }
|
Chris@0
|
223
|
Chris@0
|
224 return false;
|
Chris@0
|
225 }
|
Chris@0
|
226
|
Chris@0
|
227 return $str;
|
Chris@0
|
228 }
|
Chris@0
|
229
|
Chris@0
|
230 public static function iconv_mime_decode_headers($str, $mode = 0, $charset = null)
|
Chris@0
|
231 {
|
Chris@0
|
232 if (null === $charset) {
|
Chris@0
|
233 $charset = self::$internalEncoding;
|
Chris@0
|
234 }
|
Chris@0
|
235
|
Chris@0
|
236 if (false !== strpos($str, "\r")) {
|
Chris@0
|
237 $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");
|
Chris@0
|
238 }
|
Chris@0
|
239 $str = explode("\n\n", $str, 2);
|
Chris@0
|
240
|
Chris@0
|
241 $headers = array();
|
Chris@0
|
242
|
Chris@0
|
243 $str = preg_split('/\n(?![ \t])/', $str[0]);
|
Chris@0
|
244 foreach ($str as $str) {
|
Chris@0
|
245 $str = self::iconv_mime_decode($str, $mode, $charset);
|
Chris@0
|
246 if (false === $str) {
|
Chris@0
|
247 return false;
|
Chris@0
|
248 }
|
Chris@0
|
249 $str = explode(':', $str, 2);
|
Chris@0
|
250
|
Chris@16
|
251 if (2 === \count($str)) {
|
Chris@0
|
252 if (isset($headers[$str[0]])) {
|
Chris@16
|
253 if (!\is_array($headers[$str[0]])) {
|
Chris@0
|
254 $headers[$str[0]] = array($headers[$str[0]]);
|
Chris@0
|
255 }
|
Chris@0
|
256 $headers[$str[0]][] = ltrim($str[1]);
|
Chris@0
|
257 } else {
|
Chris@0
|
258 $headers[$str[0]] = ltrim($str[1]);
|
Chris@0
|
259 }
|
Chris@0
|
260 }
|
Chris@0
|
261 }
|
Chris@0
|
262
|
Chris@0
|
263 return $headers;
|
Chris@0
|
264 }
|
Chris@0
|
265
|
Chris@0
|
266 public static function iconv_mime_decode($str, $mode = 0, $charset = null)
|
Chris@0
|
267 {
|
Chris@0
|
268 if (null === $charset) {
|
Chris@0
|
269 $charset = self::$internalEncoding;
|
Chris@0
|
270 }
|
Chris@0
|
271 if (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {
|
Chris@0
|
272 $charset .= '//IGNORE';
|
Chris@0
|
273 }
|
Chris@0
|
274
|
Chris@0
|
275 if (false !== strpos($str, "\r")) {
|
Chris@0
|
276 $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");
|
Chris@0
|
277 }
|
Chris@0
|
278 $str = preg_split('/\n(?![ \t])/', rtrim($str), 2);
|
Chris@0
|
279 $str = preg_replace('/[ \t]*\n[ \t]+/', ' ', rtrim($str[0]));
|
Chris@0
|
280 $str = preg_split('/=\?([^?]+)\?([bqBQ])\?(.*?)\?=/', $str, -1, PREG_SPLIT_DELIM_CAPTURE);
|
Chris@0
|
281
|
Chris@0
|
282 $result = self::iconv('utf-8', $charset, $str[0]);
|
Chris@0
|
283 if (false === $result) {
|
Chris@0
|
284 return false;
|
Chris@0
|
285 }
|
Chris@0
|
286
|
Chris@0
|
287 $i = 1;
|
Chris@16
|
288 $len = \count($str);
|
Chris@0
|
289
|
Chris@0
|
290 while ($i < $len) {
|
Chris@0
|
291 $c = strtolower($str[$i]);
|
Chris@0
|
292 if ((ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode)
|
Chris@0
|
293 && 'utf-8' !== $c
|
Chris@0
|
294 && !isset(self::$alias[$c])
|
Chris@17
|
295 && !self::loadMap('from.', $c, $d)) {
|
Chris@0
|
296 $d = false;
|
Chris@0
|
297 } elseif ('B' === strtoupper($str[$i + 1])) {
|
Chris@0
|
298 $d = base64_decode($str[$i + 2]);
|
Chris@0
|
299 } else {
|
Chris@0
|
300 $d = rawurldecode(strtr(str_replace('%', '%25', $str[$i + 2]), '=_', '% '));
|
Chris@0
|
301 }
|
Chris@0
|
302
|
Chris@0
|
303 if (false !== $d) {
|
Chris@0
|
304 if ('' !== $d) {
|
Chris@0
|
305 if ('' === $d = self::iconv($c, $charset, $d)) {
|
Chris@0
|
306 $str[$i + 3] = substr($str[$i + 3], 1);
|
Chris@0
|
307 } else {
|
Chris@0
|
308 $result .= $d;
|
Chris@0
|
309 }
|
Chris@0
|
310 }
|
Chris@0
|
311 $d = self::iconv('utf-8', $charset, $str[$i + 3]);
|
Chris@0
|
312 if ('' !== trim($d)) {
|
Chris@0
|
313 $result .= $d;
|
Chris@0
|
314 }
|
Chris@0
|
315 } elseif (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {
|
Chris@0
|
316 $result .= "=?{$str[$i]}?{$str[$i + 1]}?{$str[$i + 2]}?={$str[$i + 3]}";
|
Chris@0
|
317 } else {
|
Chris@0
|
318 $result = false;
|
Chris@0
|
319 break;
|
Chris@0
|
320 }
|
Chris@0
|
321
|
Chris@0
|
322 $i += 4;
|
Chris@0
|
323 }
|
Chris@0
|
324
|
Chris@0
|
325 return $result;
|
Chris@0
|
326 }
|
Chris@0
|
327
|
Chris@0
|
328 public static function iconv_get_encoding($type = 'all')
|
Chris@0
|
329 {
|
Chris@0
|
330 switch ($type) {
|
Chris@0
|
331 case 'input_encoding': return self::$inputEncoding;
|
Chris@0
|
332 case 'output_encoding': return self::$outputEncoding;
|
Chris@0
|
333 case 'internal_encoding': return self::$internalEncoding;
|
Chris@0
|
334 }
|
Chris@0
|
335
|
Chris@0
|
336 return array(
|
Chris@0
|
337 'input_encoding' => self::$inputEncoding,
|
Chris@0
|
338 'output_encoding' => self::$outputEncoding,
|
Chris@0
|
339 'internal_encoding' => self::$internalEncoding,
|
Chris@0
|
340 );
|
Chris@0
|
341 }
|
Chris@0
|
342
|
Chris@0
|
343 public static function iconv_set_encoding($type, $charset)
|
Chris@0
|
344 {
|
Chris@0
|
345 switch ($type) {
|
Chris@0
|
346 case 'input_encoding': self::$inputEncoding = $charset; break;
|
Chris@0
|
347 case 'output_encoding': self::$outputEncoding = $charset; break;
|
Chris@0
|
348 case 'internal_encoding': self::$internalEncoding = $charset; break;
|
Chris@0
|
349
|
Chris@0
|
350 default: return false;
|
Chris@0
|
351 }
|
Chris@0
|
352
|
Chris@0
|
353 return true;
|
Chris@0
|
354 }
|
Chris@0
|
355
|
Chris@0
|
356 public static function iconv_mime_encode($fieldName, $fieldValue, $pref = null)
|
Chris@0
|
357 {
|
Chris@16
|
358 if (!\is_array($pref)) {
|
Chris@0
|
359 $pref = array();
|
Chris@0
|
360 }
|
Chris@0
|
361
|
Chris@0
|
362 $pref += array(
|
Chris@0
|
363 'scheme' => 'B',
|
Chris@0
|
364 'input-charset' => self::$internalEncoding,
|
Chris@0
|
365 'output-charset' => self::$internalEncoding,
|
Chris@0
|
366 'line-length' => 76,
|
Chris@0
|
367 'line-break-chars' => "\r\n",
|
Chris@0
|
368 );
|
Chris@0
|
369
|
Chris@0
|
370 if (preg_match('/[\x80-\xFF]/', $fieldName)) {
|
Chris@0
|
371 $fieldName = '';
|
Chris@0
|
372 }
|
Chris@0
|
373
|
Chris@0
|
374 $scheme = strtoupper(substr($pref['scheme'], 0, 1));
|
Chris@0
|
375 $in = strtolower($pref['input-charset']);
|
Chris@0
|
376 $out = strtolower($pref['output-charset']);
|
Chris@0
|
377
|
Chris@0
|
378 if ('utf-8' !== $in && false === $fieldValue = self::iconv($in, 'utf-8', $fieldValue)) {
|
Chris@0
|
379 return false;
|
Chris@0
|
380 }
|
Chris@0
|
381
|
Chris@0
|
382 preg_match_all('/./us', $fieldValue, $chars);
|
Chris@0
|
383
|
Chris@0
|
384 $chars = isset($chars[0]) ? $chars[0] : array();
|
Chris@0
|
385
|
Chris@0
|
386 $lineBreak = (int) $pref['line-length'];
|
Chris@0
|
387 $lineStart = "=?{$pref['output-charset']}?{$scheme}?";
|
Chris@16
|
388 $lineLength = \strlen($fieldName) + 2 + \strlen($lineStart) + 2;
|
Chris@16
|
389 $lineOffset = \strlen($lineStart) + 3;
|
Chris@0
|
390 $lineData = '';
|
Chris@0
|
391
|
Chris@0
|
392 $fieldValue = array();
|
Chris@0
|
393
|
Chris@0
|
394 $Q = 'Q' === $scheme;
|
Chris@0
|
395
|
Chris@0
|
396 foreach ($chars as $c) {
|
Chris@0
|
397 if ('utf-8' !== $out && false === $c = self::iconv('utf-8', $out, $c)) {
|
Chris@0
|
398 return false;
|
Chris@0
|
399 }
|
Chris@0
|
400
|
Chris@0
|
401 $o = $Q
|
Chris@0
|
402 ? $c = preg_replace_callback(
|
Chris@0
|
403 '/[=_\?\x00-\x1F\x80-\xFF]/',
|
Chris@0
|
404 array(__CLASS__, 'qpByteCallback'),
|
Chris@0
|
405 $c
|
Chris@0
|
406 )
|
Chris@0
|
407 : base64_encode($lineData.$c);
|
Chris@0
|
408
|
Chris@0
|
409 if (isset($o[$lineBreak - $lineLength])) {
|
Chris@0
|
410 if (!$Q) {
|
Chris@0
|
411 $lineData = base64_encode($lineData);
|
Chris@0
|
412 }
|
Chris@0
|
413 $fieldValue[] = $lineStart.$lineData.'?=';
|
Chris@0
|
414 $lineLength = $lineOffset;
|
Chris@0
|
415 $lineData = '';
|
Chris@0
|
416 }
|
Chris@0
|
417
|
Chris@0
|
418 $lineData .= $c;
|
Chris@16
|
419 $Q && $lineLength += \strlen($c);
|
Chris@0
|
420 }
|
Chris@0
|
421
|
Chris@0
|
422 if ('' !== $lineData) {
|
Chris@0
|
423 if (!$Q) {
|
Chris@0
|
424 $lineData = base64_encode($lineData);
|
Chris@0
|
425 }
|
Chris@0
|
426 $fieldValue[] = $lineStart.$lineData.'?=';
|
Chris@0
|
427 }
|
Chris@0
|
428
|
Chris@0
|
429 return $fieldName.': '.implode($pref['line-break-chars'].' ', $fieldValue);
|
Chris@0
|
430 }
|
Chris@0
|
431
|
Chris@0
|
432 public static function iconv_strlen($s, $encoding = null)
|
Chris@0
|
433 {
|
Chris@0
|
434 static $hasXml = null;
|
Chris@0
|
435 if (null === $hasXml) {
|
Chris@17
|
436 $hasXml = \extension_loaded('xml');
|
Chris@0
|
437 }
|
Chris@0
|
438
|
Chris@0
|
439 if ($hasXml) {
|
Chris@0
|
440 return self::strlen1($s, $encoding);
|
Chris@0
|
441 }
|
Chris@0
|
442
|
Chris@0
|
443 return self::strlen2($s, $encoding);
|
Chris@0
|
444 }
|
Chris@0
|
445
|
Chris@0
|
446 public static function strlen1($s, $encoding = null)
|
Chris@0
|
447 {
|
Chris@0
|
448 if (null === $encoding) {
|
Chris@0
|
449 $encoding = self::$internalEncoding;
|
Chris@0
|
450 }
|
Chris@0
|
451 if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {
|
Chris@0
|
452 return false;
|
Chris@0
|
453 }
|
Chris@0
|
454
|
Chris@16
|
455 return \strlen(utf8_decode($s));
|
Chris@0
|
456 }
|
Chris@0
|
457
|
Chris@0
|
458 public static function strlen2($s, $encoding = null)
|
Chris@0
|
459 {
|
Chris@0
|
460 if (null === $encoding) {
|
Chris@0
|
461 $encoding = self::$internalEncoding;
|
Chris@0
|
462 }
|
Chris@0
|
463 if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {
|
Chris@0
|
464 return false;
|
Chris@0
|
465 }
|
Chris@0
|
466
|
Chris@0
|
467 $ulenMask = self::$ulenMask;
|
Chris@0
|
468
|
Chris@0
|
469 $i = 0;
|
Chris@0
|
470 $j = 0;
|
Chris@16
|
471 $len = \strlen($s);
|
Chris@0
|
472
|
Chris@0
|
473 while ($i < $len) {
|
Chris@0
|
474 $u = $s[$i] & "\xF0";
|
Chris@0
|
475 $i += isset($ulenMask[$u]) ? $ulenMask[$u] : 1;
|
Chris@0
|
476 ++$j;
|
Chris@0
|
477 }
|
Chris@0
|
478
|
Chris@0
|
479 return $j;
|
Chris@0
|
480 }
|
Chris@0
|
481
|
Chris@0
|
482 public static function iconv_strpos($haystack, $needle, $offset = 0, $encoding = null)
|
Chris@0
|
483 {
|
Chris@0
|
484 if (null === $encoding) {
|
Chris@0
|
485 $encoding = self::$internalEncoding;
|
Chris@0
|
486 }
|
Chris@0
|
487
|
Chris@0
|
488 if (0 !== stripos($encoding, 'utf-8')) {
|
Chris@0
|
489 if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {
|
Chris@0
|
490 return false;
|
Chris@0
|
491 }
|
Chris@0
|
492 if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {
|
Chris@0
|
493 return false;
|
Chris@0
|
494 }
|
Chris@0
|
495 }
|
Chris@0
|
496
|
Chris@0
|
497 if ($offset = (int) $offset) {
|
Chris@0
|
498 $haystack = self::iconv_substr($haystack, $offset, 2147483647, 'utf-8');
|
Chris@0
|
499 }
|
Chris@0
|
500 $pos = strpos($haystack, $needle);
|
Chris@0
|
501
|
Chris@0
|
502 return false === $pos ? false : ($offset + ($pos ? self::iconv_strlen(substr($haystack, 0, $pos), 'utf-8') : 0));
|
Chris@0
|
503 }
|
Chris@0
|
504
|
Chris@0
|
505 public static function iconv_strrpos($haystack, $needle, $encoding = null)
|
Chris@0
|
506 {
|
Chris@0
|
507 if (null === $encoding) {
|
Chris@0
|
508 $encoding = self::$internalEncoding;
|
Chris@0
|
509 }
|
Chris@0
|
510
|
Chris@0
|
511 if (0 !== stripos($encoding, 'utf-8')) {
|
Chris@0
|
512 if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {
|
Chris@0
|
513 return false;
|
Chris@0
|
514 }
|
Chris@0
|
515 if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {
|
Chris@0
|
516 return false;
|
Chris@0
|
517 }
|
Chris@0
|
518 }
|
Chris@0
|
519
|
Chris@0
|
520 $pos = isset($needle[0]) ? strrpos($haystack, $needle) : false;
|
Chris@0
|
521
|
Chris@0
|
522 return false === $pos ? false : self::iconv_strlen($pos ? substr($haystack, 0, $pos) : $haystack, 'utf-8');
|
Chris@0
|
523 }
|
Chris@0
|
524
|
Chris@0
|
525 public static function iconv_substr($s, $start, $length = 2147483647, $encoding = null)
|
Chris@0
|
526 {
|
Chris@0
|
527 if (null === $encoding) {
|
Chris@0
|
528 $encoding = self::$internalEncoding;
|
Chris@0
|
529 }
|
Chris@0
|
530 if (0 !== stripos($encoding, 'utf-8')) {
|
Chris@0
|
531 $encoding = null;
|
Chris@0
|
532 } elseif (false === $s = self::iconv($encoding, 'utf-8', $s)) {
|
Chris@0
|
533 return false;
|
Chris@0
|
534 }
|
Chris@0
|
535
|
Chris@12
|
536 $s = (string) $s;
|
Chris@0
|
537 $slen = self::iconv_strlen($s, 'utf-8');
|
Chris@0
|
538 $start = (int) $start;
|
Chris@0
|
539
|
Chris@0
|
540 if (0 > $start) {
|
Chris@0
|
541 $start += $slen;
|
Chris@0
|
542 }
|
Chris@0
|
543 if (0 > $start) {
|
Chris@0
|
544 return false;
|
Chris@0
|
545 }
|
Chris@0
|
546 if ($start >= $slen) {
|
Chris@0
|
547 return false;
|
Chris@0
|
548 }
|
Chris@0
|
549
|
Chris@0
|
550 $rx = $slen - $start;
|
Chris@0
|
551
|
Chris@0
|
552 if (0 > $length) {
|
Chris@0
|
553 $length += $rx;
|
Chris@0
|
554 }
|
Chris@0
|
555 if (0 === $length) {
|
Chris@0
|
556 return '';
|
Chris@0
|
557 }
|
Chris@0
|
558 if (0 > $length) {
|
Chris@0
|
559 return false;
|
Chris@0
|
560 }
|
Chris@0
|
561
|
Chris@0
|
562 if ($length > $rx) {
|
Chris@0
|
563 $length = $rx;
|
Chris@0
|
564 }
|
Chris@0
|
565
|
Chris@0
|
566 $rx = '/^'.($start ? self::pregOffset($start) : '').'('.self::pregOffset($length).')/u';
|
Chris@0
|
567
|
Chris@0
|
568 $s = preg_match($rx, $s, $s) ? $s[1] : '';
|
Chris@0
|
569
|
Chris@0
|
570 if (null === $encoding) {
|
Chris@0
|
571 return $s;
|
Chris@0
|
572 }
|
Chris@0
|
573
|
Chris@0
|
574 return self::iconv('utf-8', $encoding, $s);
|
Chris@0
|
575 }
|
Chris@0
|
576
|
Chris@0
|
577 private static function loadMap($type, $charset, &$map)
|
Chris@0
|
578 {
|
Chris@0
|
579 if (!isset(self::$convertMap[$type.$charset])) {
|
Chris@0
|
580 if (false === $map = self::getData($type.$charset)) {
|
Chris@0
|
581 if ('to.' === $type && self::loadMap('from.', $charset, $map)) {
|
Chris@0
|
582 $map = array_flip($map);
|
Chris@0
|
583 } else {
|
Chris@0
|
584 return false;
|
Chris@0
|
585 }
|
Chris@0
|
586 }
|
Chris@0
|
587
|
Chris@0
|
588 self::$convertMap[$type.$charset] = $map;
|
Chris@0
|
589 } else {
|
Chris@0
|
590 $map = self::$convertMap[$type.$charset];
|
Chris@0
|
591 }
|
Chris@0
|
592
|
Chris@0
|
593 return true;
|
Chris@0
|
594 }
|
Chris@0
|
595
|
Chris@0
|
596 private static function utf8ToUtf8($str, $ignore)
|
Chris@0
|
597 {
|
Chris@0
|
598 $ulenMask = self::$ulenMask;
|
Chris@0
|
599 $valid = self::$isValidUtf8;
|
Chris@0
|
600
|
Chris@0
|
601 $u = $str;
|
Chris@0
|
602 $i = $j = 0;
|
Chris@16
|
603 $len = \strlen($str);
|
Chris@0
|
604
|
Chris@0
|
605 while ($i < $len) {
|
Chris@0
|
606 if ($str[$i] < "\x80") {
|
Chris@0
|
607 $u[$j++] = $str[$i++];
|
Chris@0
|
608 } else {
|
Chris@0
|
609 $ulen = $str[$i] & "\xF0";
|
Chris@0
|
610 $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1;
|
Chris@0
|
611 $uchr = substr($str, $i, $ulen);
|
Chris@0
|
612
|
Chris@0
|
613 if (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr))) {
|
Chris@0
|
614 if ($ignore) {
|
Chris@0
|
615 ++$i;
|
Chris@0
|
616 continue;
|
Chris@0
|
617 }
|
Chris@0
|
618
|
Chris@0
|
619 trigger_error(self::ERROR_ILLEGAL_CHARACTER);
|
Chris@0
|
620
|
Chris@0
|
621 return false;
|
Chris@0
|
622 } else {
|
Chris@0
|
623 $i += $ulen;
|
Chris@0
|
624 }
|
Chris@0
|
625
|
Chris@0
|
626 $u[$j++] = $uchr[0];
|
Chris@0
|
627
|
Chris@0
|
628 isset($uchr[1]) && 0 !== ($u[$j++] = $uchr[1])
|
Chris@0
|
629 && isset($uchr[2]) && 0 !== ($u[$j++] = $uchr[2])
|
Chris@0
|
630 && isset($uchr[3]) && 0 !== ($u[$j++] = $uchr[3]);
|
Chris@0
|
631 }
|
Chris@0
|
632 }
|
Chris@0
|
633
|
Chris@0
|
634 return substr($u, 0, $j);
|
Chris@0
|
635 }
|
Chris@0
|
636
|
Chris@12
|
637 private static function mapToUtf8(&$result, array $map, $str, $ignore)
|
Chris@0
|
638 {
|
Chris@16
|
639 $len = \strlen($str);
|
Chris@0
|
640 for ($i = 0; $i < $len; ++$i) {
|
Chris@0
|
641 if (isset($str[$i + 1], $map[$str[$i].$str[$i + 1]])) {
|
Chris@0
|
642 $result .= $map[$str[$i].$str[++$i]];
|
Chris@0
|
643 } elseif (isset($map[$str[$i]])) {
|
Chris@0
|
644 $result .= $map[$str[$i]];
|
Chris@0
|
645 } elseif (!$ignore) {
|
Chris@0
|
646 trigger_error(self::ERROR_ILLEGAL_CHARACTER);
|
Chris@0
|
647
|
Chris@0
|
648 return false;
|
Chris@0
|
649 }
|
Chris@0
|
650 }
|
Chris@0
|
651
|
Chris@0
|
652 return true;
|
Chris@0
|
653 }
|
Chris@0
|
654
|
Chris@12
|
655 private static function mapFromUtf8(&$result, array $map, $str, $ignore, $translit)
|
Chris@0
|
656 {
|
Chris@0
|
657 $ulenMask = self::$ulenMask;
|
Chris@0
|
658 $valid = self::$isValidUtf8;
|
Chris@0
|
659
|
Chris@0
|
660 if ($translit && !self::$translitMap) {
|
Chris@0
|
661 self::$translitMap = self::getData('translit');
|
Chris@0
|
662 }
|
Chris@0
|
663
|
Chris@0
|
664 $i = 0;
|
Chris@16
|
665 $len = \strlen($str);
|
Chris@0
|
666
|
Chris@0
|
667 while ($i < $len) {
|
Chris@0
|
668 if ($str[$i] < "\x80") {
|
Chris@0
|
669 $uchr = $str[$i++];
|
Chris@0
|
670 } else {
|
Chris@0
|
671 $ulen = $str[$i] & "\xF0";
|
Chris@0
|
672 $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1;
|
Chris@0
|
673 $uchr = substr($str, $i, $ulen);
|
Chris@0
|
674
|
Chris@0
|
675 if ($ignore && (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr)))) {
|
Chris@0
|
676 ++$i;
|
Chris@0
|
677 continue;
|
Chris@0
|
678 } else {
|
Chris@0
|
679 $i += $ulen;
|
Chris@0
|
680 }
|
Chris@0
|
681 }
|
Chris@0
|
682
|
Chris@0
|
683 if (isset($map[$uchr])) {
|
Chris@0
|
684 $result .= $map[$uchr];
|
Chris@0
|
685 } elseif ($translit) {
|
Chris@0
|
686 if (isset(self::$translitMap[$uchr])) {
|
Chris@0
|
687 $uchr = self::$translitMap[$uchr];
|
Chris@0
|
688 } elseif ($uchr >= "\xC3\x80") {
|
Chris@0
|
689 $uchr = \Normalizer::normalize($uchr, \Normalizer::NFD);
|
Chris@0
|
690
|
Chris@0
|
691 if ($uchr[0] < "\x80") {
|
Chris@0
|
692 $uchr = $uchr[0];
|
Chris@0
|
693 } elseif ($ignore) {
|
Chris@0
|
694 continue;
|
Chris@0
|
695 } else {
|
Chris@0
|
696 return false;
|
Chris@0
|
697 }
|
Chris@17
|
698 } elseif ($ignore) {
|
Chris@17
|
699 continue;
|
Chris@17
|
700 } else {
|
Chris@17
|
701 return false;
|
Chris@0
|
702 }
|
Chris@0
|
703
|
Chris@0
|
704 $str = $uchr.substr($str, $i);
|
Chris@16
|
705 $len = \strlen($str);
|
Chris@0
|
706 $i = 0;
|
Chris@0
|
707 } elseif (!$ignore) {
|
Chris@0
|
708 return false;
|
Chris@0
|
709 }
|
Chris@0
|
710 }
|
Chris@0
|
711
|
Chris@0
|
712 return true;
|
Chris@0
|
713 }
|
Chris@0
|
714
|
Chris@12
|
715 private static function qpByteCallback(array $m)
|
Chris@0
|
716 {
|
Chris@16
|
717 return '='.strtoupper(dechex(\ord($m[0])));
|
Chris@0
|
718 }
|
Chris@0
|
719
|
Chris@0
|
720 private static function pregOffset($offset)
|
Chris@0
|
721 {
|
Chris@0
|
722 $rx = array();
|
Chris@0
|
723 $offset = (int) $offset;
|
Chris@0
|
724
|
Chris@0
|
725 while ($offset > 65535) {
|
Chris@0
|
726 $rx[] = '.{65535}';
|
Chris@0
|
727 $offset -= 65535;
|
Chris@0
|
728 }
|
Chris@0
|
729
|
Chris@0
|
730 return implode('', $rx).'.{'.$offset.'}';
|
Chris@0
|
731 }
|
Chris@0
|
732
|
Chris@0
|
733 private static function getData($file)
|
Chris@0
|
734 {
|
Chris@0
|
735 if (file_exists($file = __DIR__.'/Resources/charset/'.$file.'.php')) {
|
Chris@0
|
736 return require $file;
|
Chris@0
|
737 }
|
Chris@0
|
738
|
Chris@0
|
739 return false;
|
Chris@0
|
740 }
|
Chris@0
|
741 }
|