Mercurial > hg > isophonics-drupal-site

diff vendor/zendframework/zend-stdlib/src/StringWrapper/Iconv.php @ 0:4c8ae668cc8c
Initial import (non-working)
author: Chris Cannam
date: Wed, 29 Nov 2017 16:09:58 +0000
children: 7a779792577d
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vendor/zendframework/zend-stdlib/src/StringWrapper/Iconv.php	Wed Nov 29 16:09:58 2017 +0000
@@ -0,0 +1,289 @@
+<?php
+/**
+ * Zend Framework (http://framework.zend.com/)
+ *
+ * @link      http://github.com/zendframework/zf2 for the canonical source repository
+ * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+namespace Zend\Stdlib\StringWrapper;
+
+use Zend\Stdlib\Exception;
+
+class Iconv extends AbstractStringWrapper
+{
+    /**
+     * List of supported character sets (upper case)
+     *
+     * @var string[]
+     * @link http://www.gnu.org/software/libiconv/
+     */
+    protected static $encodings = [
+        // European languages
+        'ASCII',
+        'ISO-8859-1',
+        'ISO-8859-2',
+        'ISO-8859-3',
+        'ISO-8859-4',
+        'ISO-8859-5',
+        'ISO-8859-7',
+        'ISO-8859-9',
+        'ISO-8859-10',
+        'ISO-8859-13',
+        'ISO-8859-14',
+        'ISO-8859-15',
+        'ISO-8859-16',
+        'KOI8-R',
+        'KOI8-U',
+        'KOI8-RU',
+        'CP1250',
+        'CP1251',
+        'CP1252',
+        'CP1253',
+        'CP1254',
+        'CP1257',
+        'CP850',
+        'CP866',
+        'CP1131',
+        'MACROMAN',
+        'MACCENTRALEUROPE',
+        'MACICELAND',
+        'MACCROATIAN',
+        'MACROMANIA',
+        'MACCYRILLIC',
+        'MACUKRAINE',
+        'MACGREEK',
+        'MACTURKISH',
+        'MACINTOSH',
+
+        // Semitic languages
+        'ISO-8859-6',
+        'ISO-8859-8',
+        'CP1255',
+        'CP1256',
+        'CP862',
+        'MACHEBREW',
+        'MACARABIC',
+
+        // Japanese
+        'EUC-JP',
+        'SHIFT_JIS',
+        'CP932',
+        'ISO-2022-JP',
+        'ISO-2022-JP-2',
+        'ISO-2022-JP-1',
+
+        // Chinese
+        'EUC-CN',
+        'HZ',
+        'GBK',
+        'CP936',
+        'GB18030',
+        'EUC-TW',
+        'BIG5',
+        'CP950',
+        'BIG5-HKSCS',
+        'BIG5-HKSCS:2004',
+        'BIG5-HKSCS:2001',
+        'BIG5-HKSCS:1999',
+        'ISO-2022-CN',
+        'ISO-2022-CN-EXT',
+
+        // Korean
+        'EUC-KR',
+        'CP949',
+        'ISO-2022-KR',
+        'JOHAB',
+
+        // Armenian
+        'ARMSCII-8',
+
+        // Georgian
+        'GEORGIAN-ACADEMY',
+        'GEORGIAN-PS',
+
+        // Tajik
+        'KOI8-T',
+
+        // Kazakh
+        'PT154',
+        'RK1048',
+
+        // Thai
+        'ISO-8859-11',
+        'TIS-620',
+        'CP874',
+        'MACTHAI',
+
+        // Laotian
+        'MULELAO-1',
+        'CP1133',
+
+        // Vietnamese
+        'VISCII',
+        'TCVN',
+        'CP1258',
+
+        // Platform specifics
+        'HP-ROMAN8',
+        'NEXTSTEP',
+
+        // Full Unicode
+        'UTF-8',
+        'UCS-2',
+        'UCS-2BE',
+        'UCS-2LE',
+        'UCS-4',
+        'UCS-4BE',
+        'UCS-4LE',
+        'UTF-16',
+        'UTF-16BE',
+        'UTF-16LE',
+        'UTF-32',
+        'UTF-32BE',
+        'UTF-32LE',
+        'UTF-7',
+        'C99',
+        'JAVA',
+
+        /* Commented out because that's internal encodings not existing in real world
+        // Full Unicode, in terms of uint16_t or uint32_t (with machine dependent endianness and alignment)
+        'UCS-2-INTERNAL',
+        'UCS-4-INTERNAL',
+
+        // Locale dependent, in terms of `char' or `wchar_t' (with machine dependent endianness and alignment,
+        // and with OS and locale dependent semantics)
+        'char',
+        'wchar_t',
+        '', // The empty encoding name is equivalent to "char": it denotes the locale dependent character encoding.
+        */
+
+        // When configured with the option --enable-extra-encodings,
+        // it also provides support for a few extra encodings:
+
+        // European languages
+        'CP437',
+        'CP737',
+        'CP775',
+        'CP852',
+        'CP853',
+        'CP855',
+        'CP857',
+        'CP858',
+        'CP860',
+        'CP861',
+        'CP863',
+        'CP865',
+        'CP869',
+        'CP1125',
+
+        // Semitic languages
+        'CP864',
+
+        // Japanese
+        'EUC-JISX0213',
+        'Shift_JISX0213',
+        'ISO-2022-JP-3',
+
+        // Chinese
+        'BIG5-2003', // (experimental)
+
+        // Turkmen
+        'TDS565',
+
+        // Platform specifics
+        'ATARIST',
+        'RISCOS-LATIN1',
+    ];
+
+    /**
+     * Get a list of supported character encodings
+     *
+     * @return string[]
+     */
+    public static function getSupportedEncodings()
+    {
+        return static::$encodings;
+    }
+
+    /**
+     * Constructor
+     *
+     * @throws Exception\ExtensionNotLoadedException
+     */
+    public function __construct()
+    {
+        if (!extension_loaded('iconv')) {
+            throw new Exception\ExtensionNotLoadedException(
+                'PHP extension "iconv" is required for this wrapper'
+            );
+        }
+    }
+
+    /**
+     * Returns the length of the given string
+     *
+     * @param string $str
+     * @return int|false
+     */
+    public function strlen($str)
+    {
+        return iconv_strlen($str, $this->getEncoding());
+    }
+
+    /**
+     * Returns the portion of string specified by the start and length parameters
+     *
+     * @param string   $str
+     * @param int      $offset
+     * @param int|null $length
+     * @return string|false
+     */
+    public function substr($str, $offset = 0, $length = null)
+    {
+        return iconv_substr($str, $offset, $length, $this->getEncoding());
+    }
+
+    /**
+     * Find the position of the first occurrence of a substring in a string
+     *
+     * @param string $haystack
+     * @param string $needle
+     * @param int    $offset
+     * @return int|false
+     */
+    public function strpos($haystack, $needle, $offset = 0)
+    {
+        return iconv_strpos($haystack, $needle, $offset, $this->getEncoding());
+    }
+
+    /**
+     * Convert a string from defined encoding to the defined convert encoding
+     *
+     * @param string  $str
+     * @param bool $reverse
+     * @return string|false
+     */
+    public function convert($str, $reverse = false)
+    {
+        $encoding        = $this->getEncoding();
+        $convertEncoding = $this->getConvertEncoding();
+        if ($convertEncoding === null) {
+            throw new Exception\LogicException(
+                'No convert encoding defined'
+            );
+        }
+
+        if ($encoding === $convertEncoding) {
+            return $str;
+        }
+
+        $fromEncoding = $reverse ? $convertEncoding : $encoding;
+        $toEncoding   = $reverse ? $encoding : $convertEncoding;
+
+        // automatically add "//IGNORE" to not stop converting on invalid characters
+        // invalid characters triggers a notice anyway
+        return iconv($fromEncoding, $toEncoding . '//IGNORE', $str);
+    }
+}
author	Chris Cannam
date	Wed, 29 Nov 2017 16:09:58 +0000
parents
children	7a779792577d