annotate vendor/zendframework/zend-stdlib/src/StringWrapper/Iconv.php @ 19:fa3358dc1485 tip

Add ndrum files
author Chris Cannam
date Wed, 28 Aug 2019 13:14:47 +0100
parents 7a779792577d
children
rev   line source
Chris@0 1 <?php
Chris@0 2 /**
Chris@0 3 * Zend Framework (http://framework.zend.com/)
Chris@0 4 *
Chris@0 5 * @link http://github.com/zendframework/zf2 for the canonical source repository
Chris@0 6 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
Chris@0 7 * @license http://framework.zend.com/license/new-bsd New BSD License
Chris@0 8 */
Chris@0 9
Chris@0 10 namespace Zend\Stdlib\StringWrapper;
Chris@0 11
Chris@0 12 use Zend\Stdlib\Exception;
Chris@0 13
Chris@0 14 class Iconv extends AbstractStringWrapper
Chris@0 15 {
Chris@0 16 /**
Chris@0 17 * List of supported character sets (upper case)
Chris@0 18 *
Chris@0 19 * @var string[]
Chris@0 20 * @link http://www.gnu.org/software/libiconv/
Chris@0 21 */
Chris@0 22 protected static $encodings = [
Chris@0 23 // European languages
Chris@0 24 'ASCII',
Chris@0 25 'ISO-8859-1',
Chris@0 26 'ISO-8859-2',
Chris@0 27 'ISO-8859-3',
Chris@0 28 'ISO-8859-4',
Chris@0 29 'ISO-8859-5',
Chris@0 30 'ISO-8859-7',
Chris@0 31 'ISO-8859-9',
Chris@0 32 'ISO-8859-10',
Chris@0 33 'ISO-8859-13',
Chris@0 34 'ISO-8859-14',
Chris@0 35 'ISO-8859-15',
Chris@0 36 'ISO-8859-16',
Chris@0 37 'KOI8-R',
Chris@0 38 'KOI8-U',
Chris@0 39 'KOI8-RU',
Chris@0 40 'CP1250',
Chris@0 41 'CP1251',
Chris@0 42 'CP1252',
Chris@0 43 'CP1253',
Chris@0 44 'CP1254',
Chris@0 45 'CP1257',
Chris@0 46 'CP850',
Chris@0 47 'CP866',
Chris@0 48 'CP1131',
Chris@0 49 'MACROMAN',
Chris@0 50 'MACCENTRALEUROPE',
Chris@0 51 'MACICELAND',
Chris@0 52 'MACCROATIAN',
Chris@0 53 'MACROMANIA',
Chris@0 54 'MACCYRILLIC',
Chris@0 55 'MACUKRAINE',
Chris@0 56 'MACGREEK',
Chris@0 57 'MACTURKISH',
Chris@0 58 'MACINTOSH',
Chris@0 59
Chris@0 60 // Semitic languages
Chris@0 61 'ISO-8859-6',
Chris@0 62 'ISO-8859-8',
Chris@0 63 'CP1255',
Chris@0 64 'CP1256',
Chris@0 65 'CP862',
Chris@0 66 'MACHEBREW',
Chris@0 67 'MACARABIC',
Chris@0 68
Chris@0 69 // Japanese
Chris@0 70 'EUC-JP',
Chris@0 71 'SHIFT_JIS',
Chris@0 72 'CP932',
Chris@0 73 'ISO-2022-JP',
Chris@0 74 'ISO-2022-JP-2',
Chris@0 75 'ISO-2022-JP-1',
Chris@0 76
Chris@0 77 // Chinese
Chris@0 78 'EUC-CN',
Chris@0 79 'HZ',
Chris@0 80 'GBK',
Chris@0 81 'CP936',
Chris@0 82 'GB18030',
Chris@0 83 'EUC-TW',
Chris@0 84 'BIG5',
Chris@0 85 'CP950',
Chris@0 86 'BIG5-HKSCS',
Chris@0 87 'BIG5-HKSCS:2004',
Chris@0 88 'BIG5-HKSCS:2001',
Chris@0 89 'BIG5-HKSCS:1999',
Chris@0 90 'ISO-2022-CN',
Chris@0 91 'ISO-2022-CN-EXT',
Chris@0 92
Chris@0 93 // Korean
Chris@0 94 'EUC-KR',
Chris@0 95 'CP949',
Chris@0 96 'ISO-2022-KR',
Chris@0 97 'JOHAB',
Chris@0 98
Chris@0 99 // Armenian
Chris@0 100 'ARMSCII-8',
Chris@0 101
Chris@0 102 // Georgian
Chris@0 103 'GEORGIAN-ACADEMY',
Chris@0 104 'GEORGIAN-PS',
Chris@0 105
Chris@0 106 // Tajik
Chris@0 107 'KOI8-T',
Chris@0 108
Chris@0 109 // Kazakh
Chris@0 110 'PT154',
Chris@0 111 'RK1048',
Chris@0 112
Chris@0 113 // Thai
Chris@0 114 'ISO-8859-11',
Chris@0 115 'TIS-620',
Chris@0 116 'CP874',
Chris@0 117 'MACTHAI',
Chris@0 118
Chris@0 119 // Laotian
Chris@0 120 'MULELAO-1',
Chris@0 121 'CP1133',
Chris@0 122
Chris@0 123 // Vietnamese
Chris@0 124 'VISCII',
Chris@0 125 'TCVN',
Chris@0 126 'CP1258',
Chris@0 127
Chris@0 128 // Platform specifics
Chris@0 129 'HP-ROMAN8',
Chris@0 130 'NEXTSTEP',
Chris@0 131
Chris@0 132 // Full Unicode
Chris@0 133 'UTF-8',
Chris@0 134 'UCS-2',
Chris@0 135 'UCS-2BE',
Chris@0 136 'UCS-2LE',
Chris@0 137 'UCS-4',
Chris@0 138 'UCS-4BE',
Chris@0 139 'UCS-4LE',
Chris@0 140 'UTF-16',
Chris@0 141 'UTF-16BE',
Chris@0 142 'UTF-16LE',
Chris@0 143 'UTF-32',
Chris@0 144 'UTF-32BE',
Chris@0 145 'UTF-32LE',
Chris@0 146 'UTF-7',
Chris@0 147 'C99',
Chris@0 148 'JAVA',
Chris@0 149
Chris@0 150 /* Commented out because that's internal encodings not existing in real world
Chris@0 151 // Full Unicode, in terms of uint16_t or uint32_t (with machine dependent endianness and alignment)
Chris@0 152 'UCS-2-INTERNAL',
Chris@0 153 'UCS-4-INTERNAL',
Chris@0 154
Chris@0 155 // Locale dependent, in terms of `char' or `wchar_t' (with machine dependent endianness and alignment,
Chris@0 156 // and with OS and locale dependent semantics)
Chris@0 157 'char',
Chris@0 158 'wchar_t',
Chris@0 159 '', // The empty encoding name is equivalent to "char": it denotes the locale dependent character encoding.
Chris@0 160 */
Chris@0 161
Chris@0 162 // When configured with the option --enable-extra-encodings,
Chris@0 163 // it also provides support for a few extra encodings:
Chris@0 164
Chris@0 165 // European languages
Chris@0 166 'CP437',
Chris@0 167 'CP737',
Chris@0 168 'CP775',
Chris@0 169 'CP852',
Chris@0 170 'CP853',
Chris@0 171 'CP855',
Chris@0 172 'CP857',
Chris@0 173 'CP858',
Chris@0 174 'CP860',
Chris@0 175 'CP861',
Chris@0 176 'CP863',
Chris@0 177 'CP865',
Chris@0 178 'CP869',
Chris@0 179 'CP1125',
Chris@0 180
Chris@0 181 // Semitic languages
Chris@0 182 'CP864',
Chris@0 183
Chris@0 184 // Japanese
Chris@0 185 'EUC-JISX0213',
Chris@0 186 'Shift_JISX0213',
Chris@0 187 'ISO-2022-JP-3',
Chris@0 188
Chris@0 189 // Chinese
Chris@0 190 'BIG5-2003', // (experimental)
Chris@0 191
Chris@0 192 // Turkmen
Chris@0 193 'TDS565',
Chris@0 194
Chris@0 195 // Platform specifics
Chris@0 196 'ATARIST',
Chris@0 197 'RISCOS-LATIN1',
Chris@0 198 ];
Chris@0 199
Chris@0 200 /**
Chris@0 201 * Get a list of supported character encodings
Chris@0 202 *
Chris@0 203 * @return string[]
Chris@0 204 */
Chris@0 205 public static function getSupportedEncodings()
Chris@0 206 {
Chris@0 207 return static::$encodings;
Chris@0 208 }
Chris@0 209
Chris@0 210 /**
Chris@0 211 * Constructor
Chris@0 212 *
Chris@0 213 * @throws Exception\ExtensionNotLoadedException
Chris@0 214 */
Chris@0 215 public function __construct()
Chris@0 216 {
Chris@12 217 if (! extension_loaded('iconv')) {
Chris@0 218 throw new Exception\ExtensionNotLoadedException(
Chris@0 219 'PHP extension "iconv" is required for this wrapper'
Chris@0 220 );
Chris@0 221 }
Chris@0 222 }
Chris@0 223
Chris@0 224 /**
Chris@0 225 * Returns the length of the given string
Chris@0 226 *
Chris@0 227 * @param string $str
Chris@0 228 * @return int|false
Chris@0 229 */
Chris@0 230 public function strlen($str)
Chris@0 231 {
Chris@0 232 return iconv_strlen($str, $this->getEncoding());
Chris@0 233 }
Chris@0 234
Chris@0 235 /**
Chris@0 236 * Returns the portion of string specified by the start and length parameters
Chris@0 237 *
Chris@0 238 * @param string $str
Chris@0 239 * @param int $offset
Chris@0 240 * @param int|null $length
Chris@0 241 * @return string|false
Chris@0 242 */
Chris@0 243 public function substr($str, $offset = 0, $length = null)
Chris@0 244 {
Chris@0 245 return iconv_substr($str, $offset, $length, $this->getEncoding());
Chris@0 246 }
Chris@0 247
Chris@0 248 /**
Chris@0 249 * Find the position of the first occurrence of a substring in a string
Chris@0 250 *
Chris@0 251 * @param string $haystack
Chris@0 252 * @param string $needle
Chris@0 253 * @param int $offset
Chris@0 254 * @return int|false
Chris@0 255 */
Chris@0 256 public function strpos($haystack, $needle, $offset = 0)
Chris@0 257 {
Chris@0 258 return iconv_strpos($haystack, $needle, $offset, $this->getEncoding());
Chris@0 259 }
Chris@0 260
Chris@0 261 /**
Chris@0 262 * Convert a string from defined encoding to the defined convert encoding
Chris@0 263 *
Chris@0 264 * @param string $str
Chris@0 265 * @param bool $reverse
Chris@0 266 * @return string|false
Chris@0 267 */
Chris@0 268 public function convert($str, $reverse = false)
Chris@0 269 {
Chris@0 270 $encoding = $this->getEncoding();
Chris@0 271 $convertEncoding = $this->getConvertEncoding();
Chris@0 272 if ($convertEncoding === null) {
Chris@0 273 throw new Exception\LogicException(
Chris@0 274 'No convert encoding defined'
Chris@0 275 );
Chris@0 276 }
Chris@0 277
Chris@0 278 if ($encoding === $convertEncoding) {
Chris@0 279 return $str;
Chris@0 280 }
Chris@0 281
Chris@0 282 $fromEncoding = $reverse ? $convertEncoding : $encoding;
Chris@0 283 $toEncoding = $reverse ? $encoding : $convertEncoding;
Chris@0 284
Chris@0 285 // automatically add "//IGNORE" to not stop converting on invalid characters
Chris@0 286 // invalid characters triggers a notice anyway
Chris@0 287 return iconv($fromEncoding, $toEncoding . '//IGNORE', $str);
Chris@0 288 }
Chris@0 289 }