Chris@0
|
1 <?php
|
Chris@0
|
2 /**
|
Chris@0
|
3 * Zend Framework (http://framework.zend.com/)
|
Chris@0
|
4 *
|
Chris@0
|
5 * @link http://github.com/zendframework/zf2 for the canonical source repository
|
Chris@0
|
6 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
|
Chris@0
|
7 * @license http://framework.zend.com/license/new-bsd New BSD License
|
Chris@0
|
8 */
|
Chris@0
|
9
|
Chris@0
|
10 namespace Zend\Stdlib\StringWrapper;
|
Chris@0
|
11
|
Chris@0
|
12 use Zend\Stdlib\Exception;
|
Chris@0
|
13
|
Chris@0
|
14 class Iconv extends AbstractStringWrapper
|
Chris@0
|
15 {
|
Chris@0
|
16 /**
|
Chris@0
|
17 * List of supported character sets (upper case)
|
Chris@0
|
18 *
|
Chris@0
|
19 * @var string[]
|
Chris@0
|
20 * @link http://www.gnu.org/software/libiconv/
|
Chris@0
|
21 */
|
Chris@0
|
22 protected static $encodings = [
|
Chris@0
|
23 // European languages
|
Chris@0
|
24 'ASCII',
|
Chris@0
|
25 'ISO-8859-1',
|
Chris@0
|
26 'ISO-8859-2',
|
Chris@0
|
27 'ISO-8859-3',
|
Chris@0
|
28 'ISO-8859-4',
|
Chris@0
|
29 'ISO-8859-5',
|
Chris@0
|
30 'ISO-8859-7',
|
Chris@0
|
31 'ISO-8859-9',
|
Chris@0
|
32 'ISO-8859-10',
|
Chris@0
|
33 'ISO-8859-13',
|
Chris@0
|
34 'ISO-8859-14',
|
Chris@0
|
35 'ISO-8859-15',
|
Chris@0
|
36 'ISO-8859-16',
|
Chris@0
|
37 'KOI8-R',
|
Chris@0
|
38 'KOI8-U',
|
Chris@0
|
39 'KOI8-RU',
|
Chris@0
|
40 'CP1250',
|
Chris@0
|
41 'CP1251',
|
Chris@0
|
42 'CP1252',
|
Chris@0
|
43 'CP1253',
|
Chris@0
|
44 'CP1254',
|
Chris@0
|
45 'CP1257',
|
Chris@0
|
46 'CP850',
|
Chris@0
|
47 'CP866',
|
Chris@0
|
48 'CP1131',
|
Chris@0
|
49 'MACROMAN',
|
Chris@0
|
50 'MACCENTRALEUROPE',
|
Chris@0
|
51 'MACICELAND',
|
Chris@0
|
52 'MACCROATIAN',
|
Chris@0
|
53 'MACROMANIA',
|
Chris@0
|
54 'MACCYRILLIC',
|
Chris@0
|
55 'MACUKRAINE',
|
Chris@0
|
56 'MACGREEK',
|
Chris@0
|
57 'MACTURKISH',
|
Chris@0
|
58 'MACINTOSH',
|
Chris@0
|
59
|
Chris@0
|
60 // Semitic languages
|
Chris@0
|
61 'ISO-8859-6',
|
Chris@0
|
62 'ISO-8859-8',
|
Chris@0
|
63 'CP1255',
|
Chris@0
|
64 'CP1256',
|
Chris@0
|
65 'CP862',
|
Chris@0
|
66 'MACHEBREW',
|
Chris@0
|
67 'MACARABIC',
|
Chris@0
|
68
|
Chris@0
|
69 // Japanese
|
Chris@0
|
70 'EUC-JP',
|
Chris@0
|
71 'SHIFT_JIS',
|
Chris@0
|
72 'CP932',
|
Chris@0
|
73 'ISO-2022-JP',
|
Chris@0
|
74 'ISO-2022-JP-2',
|
Chris@0
|
75 'ISO-2022-JP-1',
|
Chris@0
|
76
|
Chris@0
|
77 // Chinese
|
Chris@0
|
78 'EUC-CN',
|
Chris@0
|
79 'HZ',
|
Chris@0
|
80 'GBK',
|
Chris@0
|
81 'CP936',
|
Chris@0
|
82 'GB18030',
|
Chris@0
|
83 'EUC-TW',
|
Chris@0
|
84 'BIG5',
|
Chris@0
|
85 'CP950',
|
Chris@0
|
86 'BIG5-HKSCS',
|
Chris@0
|
87 'BIG5-HKSCS:2004',
|
Chris@0
|
88 'BIG5-HKSCS:2001',
|
Chris@0
|
89 'BIG5-HKSCS:1999',
|
Chris@0
|
90 'ISO-2022-CN',
|
Chris@0
|
91 'ISO-2022-CN-EXT',
|
Chris@0
|
92
|
Chris@0
|
93 // Korean
|
Chris@0
|
94 'EUC-KR',
|
Chris@0
|
95 'CP949',
|
Chris@0
|
96 'ISO-2022-KR',
|
Chris@0
|
97 'JOHAB',
|
Chris@0
|
98
|
Chris@0
|
99 // Armenian
|
Chris@0
|
100 'ARMSCII-8',
|
Chris@0
|
101
|
Chris@0
|
102 // Georgian
|
Chris@0
|
103 'GEORGIAN-ACADEMY',
|
Chris@0
|
104 'GEORGIAN-PS',
|
Chris@0
|
105
|
Chris@0
|
106 // Tajik
|
Chris@0
|
107 'KOI8-T',
|
Chris@0
|
108
|
Chris@0
|
109 // Kazakh
|
Chris@0
|
110 'PT154',
|
Chris@0
|
111 'RK1048',
|
Chris@0
|
112
|
Chris@0
|
113 // Thai
|
Chris@0
|
114 'ISO-8859-11',
|
Chris@0
|
115 'TIS-620',
|
Chris@0
|
116 'CP874',
|
Chris@0
|
117 'MACTHAI',
|
Chris@0
|
118
|
Chris@0
|
119 // Laotian
|
Chris@0
|
120 'MULELAO-1',
|
Chris@0
|
121 'CP1133',
|
Chris@0
|
122
|
Chris@0
|
123 // Vietnamese
|
Chris@0
|
124 'VISCII',
|
Chris@0
|
125 'TCVN',
|
Chris@0
|
126 'CP1258',
|
Chris@0
|
127
|
Chris@0
|
128 // Platform specifics
|
Chris@0
|
129 'HP-ROMAN8',
|
Chris@0
|
130 'NEXTSTEP',
|
Chris@0
|
131
|
Chris@0
|
132 // Full Unicode
|
Chris@0
|
133 'UTF-8',
|
Chris@0
|
134 'UCS-2',
|
Chris@0
|
135 'UCS-2BE',
|
Chris@0
|
136 'UCS-2LE',
|
Chris@0
|
137 'UCS-4',
|
Chris@0
|
138 'UCS-4BE',
|
Chris@0
|
139 'UCS-4LE',
|
Chris@0
|
140 'UTF-16',
|
Chris@0
|
141 'UTF-16BE',
|
Chris@0
|
142 'UTF-16LE',
|
Chris@0
|
143 'UTF-32',
|
Chris@0
|
144 'UTF-32BE',
|
Chris@0
|
145 'UTF-32LE',
|
Chris@0
|
146 'UTF-7',
|
Chris@0
|
147 'C99',
|
Chris@0
|
148 'JAVA',
|
Chris@0
|
149
|
Chris@0
|
150 /* Commented out because that's internal encodings not existing in real world
|
Chris@0
|
151 // Full Unicode, in terms of uint16_t or uint32_t (with machine dependent endianness and alignment)
|
Chris@0
|
152 'UCS-2-INTERNAL',
|
Chris@0
|
153 'UCS-4-INTERNAL',
|
Chris@0
|
154
|
Chris@0
|
155 // Locale dependent, in terms of `char' or `wchar_t' (with machine dependent endianness and alignment,
|
Chris@0
|
156 // and with OS and locale dependent semantics)
|
Chris@0
|
157 'char',
|
Chris@0
|
158 'wchar_t',
|
Chris@0
|
159 '', // The empty encoding name is equivalent to "char": it denotes the locale dependent character encoding.
|
Chris@0
|
160 */
|
Chris@0
|
161
|
Chris@0
|
162 // When configured with the option --enable-extra-encodings,
|
Chris@0
|
163 // it also provides support for a few extra encodings:
|
Chris@0
|
164
|
Chris@0
|
165 // European languages
|
Chris@0
|
166 'CP437',
|
Chris@0
|
167 'CP737',
|
Chris@0
|
168 'CP775',
|
Chris@0
|
169 'CP852',
|
Chris@0
|
170 'CP853',
|
Chris@0
|
171 'CP855',
|
Chris@0
|
172 'CP857',
|
Chris@0
|
173 'CP858',
|
Chris@0
|
174 'CP860',
|
Chris@0
|
175 'CP861',
|
Chris@0
|
176 'CP863',
|
Chris@0
|
177 'CP865',
|
Chris@0
|
178 'CP869',
|
Chris@0
|
179 'CP1125',
|
Chris@0
|
180
|
Chris@0
|
181 // Semitic languages
|
Chris@0
|
182 'CP864',
|
Chris@0
|
183
|
Chris@0
|
184 // Japanese
|
Chris@0
|
185 'EUC-JISX0213',
|
Chris@0
|
186 'Shift_JISX0213',
|
Chris@0
|
187 'ISO-2022-JP-3',
|
Chris@0
|
188
|
Chris@0
|
189 // Chinese
|
Chris@0
|
190 'BIG5-2003', // (experimental)
|
Chris@0
|
191
|
Chris@0
|
192 // Turkmen
|
Chris@0
|
193 'TDS565',
|
Chris@0
|
194
|
Chris@0
|
195 // Platform specifics
|
Chris@0
|
196 'ATARIST',
|
Chris@0
|
197 'RISCOS-LATIN1',
|
Chris@0
|
198 ];
|
Chris@0
|
199
|
Chris@0
|
200 /**
|
Chris@0
|
201 * Get a list of supported character encodings
|
Chris@0
|
202 *
|
Chris@0
|
203 * @return string[]
|
Chris@0
|
204 */
|
Chris@0
|
205 public static function getSupportedEncodings()
|
Chris@0
|
206 {
|
Chris@0
|
207 return static::$encodings;
|
Chris@0
|
208 }
|
Chris@0
|
209
|
Chris@0
|
210 /**
|
Chris@0
|
211 * Constructor
|
Chris@0
|
212 *
|
Chris@0
|
213 * @throws Exception\ExtensionNotLoadedException
|
Chris@0
|
214 */
|
Chris@0
|
215 public function __construct()
|
Chris@0
|
216 {
|
Chris@12
|
217 if (! extension_loaded('iconv')) {
|
Chris@0
|
218 throw new Exception\ExtensionNotLoadedException(
|
Chris@0
|
219 'PHP extension "iconv" is required for this wrapper'
|
Chris@0
|
220 );
|
Chris@0
|
221 }
|
Chris@0
|
222 }
|
Chris@0
|
223
|
Chris@0
|
224 /**
|
Chris@0
|
225 * Returns the length of the given string
|
Chris@0
|
226 *
|
Chris@0
|
227 * @param string $str
|
Chris@0
|
228 * @return int|false
|
Chris@0
|
229 */
|
Chris@0
|
230 public function strlen($str)
|
Chris@0
|
231 {
|
Chris@0
|
232 return iconv_strlen($str, $this->getEncoding());
|
Chris@0
|
233 }
|
Chris@0
|
234
|
Chris@0
|
235 /**
|
Chris@0
|
236 * Returns the portion of string specified by the start and length parameters
|
Chris@0
|
237 *
|
Chris@0
|
238 * @param string $str
|
Chris@0
|
239 * @param int $offset
|
Chris@0
|
240 * @param int|null $length
|
Chris@0
|
241 * @return string|false
|
Chris@0
|
242 */
|
Chris@0
|
243 public function substr($str, $offset = 0, $length = null)
|
Chris@0
|
244 {
|
Chris@0
|
245 return iconv_substr($str, $offset, $length, $this->getEncoding());
|
Chris@0
|
246 }
|
Chris@0
|
247
|
Chris@0
|
248 /**
|
Chris@0
|
249 * Find the position of the first occurrence of a substring in a string
|
Chris@0
|
250 *
|
Chris@0
|
251 * @param string $haystack
|
Chris@0
|
252 * @param string $needle
|
Chris@0
|
253 * @param int $offset
|
Chris@0
|
254 * @return int|false
|
Chris@0
|
255 */
|
Chris@0
|
256 public function strpos($haystack, $needle, $offset = 0)
|
Chris@0
|
257 {
|
Chris@0
|
258 return iconv_strpos($haystack, $needle, $offset, $this->getEncoding());
|
Chris@0
|
259 }
|
Chris@0
|
260
|
Chris@0
|
261 /**
|
Chris@0
|
262 * Convert a string from defined encoding to the defined convert encoding
|
Chris@0
|
263 *
|
Chris@0
|
264 * @param string $str
|
Chris@0
|
265 * @param bool $reverse
|
Chris@0
|
266 * @return string|false
|
Chris@0
|
267 */
|
Chris@0
|
268 public function convert($str, $reverse = false)
|
Chris@0
|
269 {
|
Chris@0
|
270 $encoding = $this->getEncoding();
|
Chris@0
|
271 $convertEncoding = $this->getConvertEncoding();
|
Chris@0
|
272 if ($convertEncoding === null) {
|
Chris@0
|
273 throw new Exception\LogicException(
|
Chris@0
|
274 'No convert encoding defined'
|
Chris@0
|
275 );
|
Chris@0
|
276 }
|
Chris@0
|
277
|
Chris@0
|
278 if ($encoding === $convertEncoding) {
|
Chris@0
|
279 return $str;
|
Chris@0
|
280 }
|
Chris@0
|
281
|
Chris@0
|
282 $fromEncoding = $reverse ? $convertEncoding : $encoding;
|
Chris@0
|
283 $toEncoding = $reverse ? $encoding : $convertEncoding;
|
Chris@0
|
284
|
Chris@0
|
285 // automatically add "//IGNORE" to not stop converting on invalid characters
|
Chris@0
|
286 // invalid characters triggers a notice anyway
|
Chris@0
|
287 return iconv($fromEncoding, $toEncoding . '//IGNORE', $str);
|
Chris@0
|
288 }
|
Chris@0
|
289 }
|