Mercurial > hg > isophonics-drupal-site
comparison vendor/zendframework/zend-stdlib/src/StringWrapper/Iconv.php @ 0:4c8ae668cc8c
Initial import (non-working)
author | Chris Cannam |
---|---|
date | Wed, 29 Nov 2017 16:09:58 +0000 |
parents | |
children | 7a779792577d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4c8ae668cc8c |
---|---|
1 <?php | |
2 /** | |
3 * Zend Framework (http://framework.zend.com/) | |
4 * | |
5 * @link http://github.com/zendframework/zf2 for the canonical source repository | |
6 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) | |
7 * @license http://framework.zend.com/license/new-bsd New BSD License | |
8 */ | |
9 | |
10 namespace Zend\Stdlib\StringWrapper; | |
11 | |
12 use Zend\Stdlib\Exception; | |
13 | |
14 class Iconv extends AbstractStringWrapper | |
15 { | |
16 /** | |
17 * List of supported character sets (upper case) | |
18 * | |
19 * @var string[] | |
20 * @link http://www.gnu.org/software/libiconv/ | |
21 */ | |
22 protected static $encodings = [ | |
23 // European languages | |
24 'ASCII', | |
25 'ISO-8859-1', | |
26 'ISO-8859-2', | |
27 'ISO-8859-3', | |
28 'ISO-8859-4', | |
29 'ISO-8859-5', | |
30 'ISO-8859-7', | |
31 'ISO-8859-9', | |
32 'ISO-8859-10', | |
33 'ISO-8859-13', | |
34 'ISO-8859-14', | |
35 'ISO-8859-15', | |
36 'ISO-8859-16', | |
37 'KOI8-R', | |
38 'KOI8-U', | |
39 'KOI8-RU', | |
40 'CP1250', | |
41 'CP1251', | |
42 'CP1252', | |
43 'CP1253', | |
44 'CP1254', | |
45 'CP1257', | |
46 'CP850', | |
47 'CP866', | |
48 'CP1131', | |
49 'MACROMAN', | |
50 'MACCENTRALEUROPE', | |
51 'MACICELAND', | |
52 'MACCROATIAN', | |
53 'MACROMANIA', | |
54 'MACCYRILLIC', | |
55 'MACUKRAINE', | |
56 'MACGREEK', | |
57 'MACTURKISH', | |
58 'MACINTOSH', | |
59 | |
60 // Semitic languages | |
61 'ISO-8859-6', | |
62 'ISO-8859-8', | |
63 'CP1255', | |
64 'CP1256', | |
65 'CP862', | |
66 'MACHEBREW', | |
67 'MACARABIC', | |
68 | |
69 // Japanese | |
70 'EUC-JP', | |
71 'SHIFT_JIS', | |
72 'CP932', | |
73 'ISO-2022-JP', | |
74 'ISO-2022-JP-2', | |
75 'ISO-2022-JP-1', | |
76 | |
77 // Chinese | |
78 'EUC-CN', | |
79 'HZ', | |
80 'GBK', | |
81 'CP936', | |
82 'GB18030', | |
83 'EUC-TW', | |
84 'BIG5', | |
85 'CP950', | |
86 'BIG5-HKSCS', | |
87 'BIG5-HKSCS:2004', | |
88 'BIG5-HKSCS:2001', | |
89 'BIG5-HKSCS:1999', | |
90 'ISO-2022-CN', | |
91 'ISO-2022-CN-EXT', | |
92 | |
93 // Korean | |
94 'EUC-KR', | |
95 'CP949', | |
96 'ISO-2022-KR', | |
97 'JOHAB', | |
98 | |
99 // Armenian | |
100 'ARMSCII-8', | |
101 | |
102 // Georgian | |
103 'GEORGIAN-ACADEMY', | |
104 'GEORGIAN-PS', | |
105 | |
106 // Tajik | |
107 'KOI8-T', | |
108 | |
109 // Kazakh | |
110 'PT154', | |
111 'RK1048', | |
112 | |
113 // Thai | |
114 'ISO-8859-11', | |
115 'TIS-620', | |
116 'CP874', | |
117 'MACTHAI', | |
118 | |
119 // Laotian | |
120 'MULELAO-1', | |
121 'CP1133', | |
122 | |
123 // Vietnamese | |
124 'VISCII', | |
125 'TCVN', | |
126 'CP1258', | |
127 | |
128 // Platform specifics | |
129 'HP-ROMAN8', | |
130 'NEXTSTEP', | |
131 | |
132 // Full Unicode | |
133 'UTF-8', | |
134 'UCS-2', | |
135 'UCS-2BE', | |
136 'UCS-2LE', | |
137 'UCS-4', | |
138 'UCS-4BE', | |
139 'UCS-4LE', | |
140 'UTF-16', | |
141 'UTF-16BE', | |
142 'UTF-16LE', | |
143 'UTF-32', | |
144 'UTF-32BE', | |
145 'UTF-32LE', | |
146 'UTF-7', | |
147 'C99', | |
148 'JAVA', | |
149 | |
150 /* Commented out because that's internal encodings not existing in real world | |
151 // Full Unicode, in terms of uint16_t or uint32_t (with machine dependent endianness and alignment) | |
152 'UCS-2-INTERNAL', | |
153 'UCS-4-INTERNAL', | |
154 | |
155 // Locale dependent, in terms of `char' or `wchar_t' (with machine dependent endianness and alignment, | |
156 // and with OS and locale dependent semantics) | |
157 'char', | |
158 'wchar_t', | |
159 '', // The empty encoding name is equivalent to "char": it denotes the locale dependent character encoding. | |
160 */ | |
161 | |
162 // When configured with the option --enable-extra-encodings, | |
163 // it also provides support for a few extra encodings: | |
164 | |
165 // European languages | |
166 'CP437', | |
167 'CP737', | |
168 'CP775', | |
169 'CP852', | |
170 'CP853', | |
171 'CP855', | |
172 'CP857', | |
173 'CP858', | |
174 'CP860', | |
175 'CP861', | |
176 'CP863', | |
177 'CP865', | |
178 'CP869', | |
179 'CP1125', | |
180 | |
181 // Semitic languages | |
182 'CP864', | |
183 | |
184 // Japanese | |
185 'EUC-JISX0213', | |
186 'Shift_JISX0213', | |
187 'ISO-2022-JP-3', | |
188 | |
189 // Chinese | |
190 'BIG5-2003', // (experimental) | |
191 | |
192 // Turkmen | |
193 'TDS565', | |
194 | |
195 // Platform specifics | |
196 'ATARIST', | |
197 'RISCOS-LATIN1', | |
198 ]; | |
199 | |
200 /** | |
201 * Get a list of supported character encodings | |
202 * | |
203 * @return string[] | |
204 */ | |
205 public static function getSupportedEncodings() | |
206 { | |
207 return static::$encodings; | |
208 } | |
209 | |
210 /** | |
211 * Constructor | |
212 * | |
213 * @throws Exception\ExtensionNotLoadedException | |
214 */ | |
215 public function __construct() | |
216 { | |
217 if (!extension_loaded('iconv')) { | |
218 throw new Exception\ExtensionNotLoadedException( | |
219 'PHP extension "iconv" is required for this wrapper' | |
220 ); | |
221 } | |
222 } | |
223 | |
224 /** | |
225 * Returns the length of the given string | |
226 * | |
227 * @param string $str | |
228 * @return int|false | |
229 */ | |
230 public function strlen($str) | |
231 { | |
232 return iconv_strlen($str, $this->getEncoding()); | |
233 } | |
234 | |
235 /** | |
236 * Returns the portion of string specified by the start and length parameters | |
237 * | |
238 * @param string $str | |
239 * @param int $offset | |
240 * @param int|null $length | |
241 * @return string|false | |
242 */ | |
243 public function substr($str, $offset = 0, $length = null) | |
244 { | |
245 return iconv_substr($str, $offset, $length, $this->getEncoding()); | |
246 } | |
247 | |
248 /** | |
249 * Find the position of the first occurrence of a substring in a string | |
250 * | |
251 * @param string $haystack | |
252 * @param string $needle | |
253 * @param int $offset | |
254 * @return int|false | |
255 */ | |
256 public function strpos($haystack, $needle, $offset = 0) | |
257 { | |
258 return iconv_strpos($haystack, $needle, $offset, $this->getEncoding()); | |
259 } | |
260 | |
261 /** | |
262 * Convert a string from defined encoding to the defined convert encoding | |
263 * | |
264 * @param string $str | |
265 * @param bool $reverse | |
266 * @return string|false | |
267 */ | |
268 public function convert($str, $reverse = false) | |
269 { | |
270 $encoding = $this->getEncoding(); | |
271 $convertEncoding = $this->getConvertEncoding(); | |
272 if ($convertEncoding === null) { | |
273 throw new Exception\LogicException( | |
274 'No convert encoding defined' | |
275 ); | |
276 } | |
277 | |
278 if ($encoding === $convertEncoding) { | |
279 return $str; | |
280 } | |
281 | |
282 $fromEncoding = $reverse ? $convertEncoding : $encoding; | |
283 $toEncoding = $reverse ? $encoding : $convertEncoding; | |
284 | |
285 // automatically add "//IGNORE" to not stop converting on invalid characters | |
286 // invalid characters triggers a notice anyway | |
287 return iconv($fromEncoding, $toEncoding . '//IGNORE', $str); | |
288 } | |
289 } |