vamp-build-and-test: DEPENDENCIES/generic/include/boost/locale/util.hpp annotate

annotate DEPENDENCIES/generic/include/boost/locale/util.hpp @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)

author	Chris Cannam
date	Tue, 30 Jul 2019 12:25:44 +0100
parents	2665513ce2d3
children

rev	line source
Chris@16	1 //
Chris@16	2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
Chris@16	3 //
Chris@16	4 // Distributed under the Boost Software License, Version 1.0. (See
Chris@16	5 // accompanying file LICENSE_1_0.txt or copy at
Chris@16	6 // http://www.boost.org/LICENSE_1_0.txt)
Chris@16	7 //
Chris@16	8 #ifndef BOOST_LOCALE_UTIL_HPP
Chris@16	9 #define BOOST_LOCALE_UTIL_HPP
Chris@16	10 #include <locale>
Chris@16	11 #include <typeinfo>
Chris@16	12 #include <boost/cstdint.hpp>
Chris@16	13 #include <boost/locale/utf.hpp>
Chris@16	14 #include <boost/locale/generator.hpp>
Chris@16	15 #include <boost/assert.hpp>
Chris@16	16
Chris@16	17 #include <vector>
Chris@16	18 namespace boost {
Chris@16	19 namespace locale {
Chris@16	20 ///
Chris@16	21 /// \brief This namespace provides various utility function useful for Boost.Locale backends
Chris@16	22 /// implementations
Chris@16	23 ///
Chris@16	24 namespace util {
Chris@16	25
Chris@16	26 ///
Chris@16	27 /// \brief Return default system locale name in POSIX format.
Chris@16	28 ///
Chris@16	29 /// This function tries to detect the locale using, LC_CTYPE, LC_ALL and LANG environment
Chris@16	30 /// variables in this order and if all of them unset, in POSIX platforms it returns "C"
Chris@16	31 ///
Chris@16	32 /// On Windows additionally to check the above environment variables, this function
Chris@16	33 /// tries to creates locale name from ISO-339 and ISO-3199 country codes defined
Chris@16	34 /// for user default locale.
Chris@16	35 /// If \a use_utf8_on_windows is true it sets the encoding to UTF-8, otherwise, if system
Chris@16	36 /// locale supports ANSI code-page it defines the ANSI encoding like windows-1252, otherwise it fall-backs
Chris@16	37 /// to UTF-8 encoding if ANSI code-page is not available.
Chris@16	38 ///
Chris@16	39 BOOST_LOCALE_DECL
Chris@16	40 std::string get_system_locale(bool use_utf8_on_windows = false);
Chris@16	41
Chris@16	42 ///
Chris@16	43 /// \brief Installs information facet to locale in based on locale name \a name
Chris@16	44 ///
Chris@16	45 /// This function installs boost::locale::info facet into the locale \a in and returns
Chris@16	46 /// newly created locale.
Chris@16	47 ///
Chris@16	48 /// Note: all information is based only on parsing of string \a name;
Chris@16	49 ///
Chris@16	50 /// The name has following format: language[_COUNTRY][.encoding][\@variant]
Chris@16	51 /// Where language is ISO-639 language code like "en" or "ru", COUNTRY is ISO-3166
Chris@16	52 /// country identifier like "US" or "RU". the Encoding is a charracter set name
Chris@16	53 /// like UTF-8 or ISO-8859-1. Variant is backend specific variant like \c euro or
Chris@16	54 /// calendar=hebrew.
Chris@16	55 ///
Chris@16	56 /// If some parameters are missing they are specified as blanks, default encoding
Chris@16	57 /// is assumed to be US-ASCII and missing language is assumed to be "C"
Chris@16	58 ///
Chris@16	59 BOOST_LOCALE_DECL
Chris@16	60 std::locale create_info(std::locale const &in,std::string const &name);
Chris@16	61
Chris@16	62
Chris@16	63 ///
Chris@16	64 /// \brief This class represent a simple stateless converter from UCS-4 and to UCS-4 for
Chris@16	65 /// each single code point
Chris@16	66 ///
Chris@16	67 /// This class is used for creation of std::codecvt facet for converting utf-16/utf-32 encoding
Chris@16	68 /// to encoding supported by this converter
Chris@16	69 ///
Chris@16	70 /// Please note, this converter should be fully stateless. Fully stateless means it should
Chris@16	71 /// never assume that it is called in any specific order on the text. Even if the
Chris@16	72 /// encoding itself seems to be stateless like windows-1255 or shift-jis, some
Chris@16	73 /// encoders (most notably iconv) can actually compose several code-point into one or
Chris@16	74 /// decompose them in case composite characters are found. So be very careful when implementing
Chris@16	75 /// these converters for certain character set.
Chris@16	76 ///
Chris@16	77 class base_converter {
Chris@16	78 public:
Chris@16	79
Chris@16	80 ///
Chris@16	81 /// This value should be returned when an illegal input sequence or code-point is observed:
Chris@16	82 /// For example if a UCS-32 code-point is in the range reserved for UTF-16 surrogates
Chris@16	83 /// or an invalid UTF-8 sequence is found
Chris@16	84 ///
Chris@16	85 static const uint32_t illegal=utf::illegal;
Chris@16	86
Chris@16	87 ///
Chris@16	88 /// This value is returned in following cases: The of incomplete input sequence was found or
Chris@16	89 /// insufficient output buffer was provided so complete output could not be written.
Chris@16	90 ///
Chris@16	91 static const uint32_t incomplete=utf::incomplete;
Chris@16	92
Chris@16	93 virtual ~base_converter()
Chris@16	94 {
Chris@16	95 }
Chris@16	96 ///
Chris@16	97 /// Return the maximal length that one Unicode code-point can be converted to, for example
Chris@16	98 /// for UTF-8 it is 4, for Shift-JIS it is 2 and ISO-8859-1 is 1
Chris@16	99 ///
Chris@16	100 virtual int max_len() const
Chris@16	101 {
Chris@16	102 return 1;
Chris@16	103 }
Chris@16	104 ///
Chris@16	105 /// Returns true if calling the functions from_unicode, to_unicode, and max_len is thread safe.
Chris@16	106 ///
Chris@16	107 /// Rule of thumb: if this class' implementation uses simple tables that are unchanged
Chris@16	108 /// or is purely algorithmic like UTF-8 - so it does not share any mutable bit for
Chris@16	109 /// independent to_unicode, from_unicode calls, you may set it to true, otherwise,
Chris@16	110 /// for example if you use iconv_t descriptor or UConverter as conversion object return false,
Chris@16	111 /// and this object will be cloned for each use.
Chris@16	112 ///
Chris@16	113 virtual bool is_thread_safe() const
Chris@16	114 {
Chris@16	115 return false;
Chris@16	116 }
Chris@16	117 ///
Chris@16	118 /// Create a polymorphic copy of this object, usually called only if is_thread_safe() return false
Chris@16	119 ///
Chris@16	120 virtual base_converter *clone() const
Chris@16	121 {
Chris@16	122 BOOST_ASSERT(typeid(*this)==typeid(base_converter));
Chris@16	123 return new base_converter();
Chris@16	124 }
Chris@16	125
Chris@16	126 ///
Chris@16	127 /// Convert a single character starting at begin and ending at most at end to Unicode code-point.
Chris@16	128 ///
Chris@16	129 /// if valid input sequence found in [\a begin,\a code_point_end) such as \a begin < \a code_point_end && \a code_point_end <= \a end
Chris@16	130 /// it is converted to its Unicode code point equivalent, \a begin is set to \a code_point_end
Chris@16	131 ///
Chris@16	132 /// if incomplete input sequence found in [\a begin,\a end), i.e. there my be such \a code_point_end that \a code_point_end > \a end
Chris@16	133 /// and [\a begin, \a code_point_end) would be valid input sequence, then \a incomplete is returned begin stays unchanged, for example
Chris@16	134 /// for UTF-8 conversion a *begin = 0xc2, \a begin +1 = \a end is such situation.
Chris@16	135 ///
Chris@16	136 /// if invalid input sequence found, i.e. there is a sequence [\a begin, \a code_point_end) such as \a code_point_end <= \a end
Chris@16	137 /// that is illegal for this encoding, \a illegal is returned and begin stays unchanged. For example if *begin = 0xFF and begin < end
Chris@16	138 /// for UTF-8, then \a illegal is returned.
Chris@16	139 ///
Chris@16	140 ///
Chris@16	141 virtual uint32_t to_unicode(char const &begin,char const end)
Chris@16	142 {
Chris@16	143 if(begin == end)
Chris@16	144 return incomplete;
Chris@16	145 unsigned char cp = *begin;
Chris@16	146 if(cp <= 0x7F) {
Chris@16	147 begin++;
Chris@16	148 return cp;
Chris@16	149 }
Chris@16	150 return illegal;
Chris@16	151 }
Chris@16	152 ///
Chris@16	153 /// Convert a single code-point \a u into encoding and store it in [begin,end) range.
Chris@16	154 ///
Chris@16	155 /// If u is invalid Unicode code-point, or it can not be mapped correctly to represented character set,
Chris@16	156 /// \a illegal should be returned
Chris@16	157 ///
Chris@16	158 /// If u can be converted to a sequence of bytes c1, ... , cN (1<= N <= max_len() ) then
Chris@16	159 ///
Chris@16	160 /// -# If end - begin >= N, c1, ... cN are written starting at begin and N is returned
Chris@16	161 /// -# If end - begin < N, incomplete is returned, it is unspecified what would be
Chris@16	162 /// stored in bytes in range [begin,end)
Chris@16	163
Chris@16	164 virtual uint32_t from_unicode(uint32_t u,char begin,char const end)
Chris@16	165 {
Chris@16	166 if(begin==end)
Chris@16	167 return incomplete;
Chris@16	168 if(u >= 0x80)
Chris@16	169 return illegal;
Chris@16	170 *begin = static_cast<char>(u);
Chris@16	171 return 1;
Chris@16	172 }
Chris@16	173 };
Chris@16	174
Chris@16	175 ///
Chris@16	176 /// This function creates a \a base_converter that can be used for conversion between UTF-8 and
Chris@16	177 /// unicode code points
Chris@16	178 ///
Chris@16	179 BOOST_LOCALE_DECL std::auto_ptr<base_converter> create_utf8_converter();
Chris@16	180 ///
Chris@16	181 /// This function creates a \a base_converter that can be used for conversion between single byte
Chris@16	182 /// character encodings like ISO-8859-1, koi8-r, windows-1255 and Unicode code points,
Chris@16	183 ///
Chris@16	184 /// If \a encoding is not supported, empty pointer is returned. You should check if
Chris@16	185 /// std::auto_ptr<base_converter>::get() != 0
Chris@16	186 ///
Chris@16	187 BOOST_LOCALE_DECL std::auto_ptr<base_converter> create_simple_converter(std::string const &encoding);
Chris@16	188
Chris@16	189
Chris@16	190 ///
Chris@16	191 /// Install codecvt facet into locale \a in and return new locale that is based on \a in and uses new
Chris@16	192 /// facet.
Chris@16	193 ///
Chris@16	194 /// codecvt facet would convert between narrow and wide/char16_t/char32_t encodings using \a cvt converter.
Chris@16	195 /// If \a cvt is null pointer, always failure conversion would be used that fails on every first input or output.
Chris@16	196 ///
Chris@16	197 /// Note: the codecvt facet handles both UTF-16 and UTF-32 wide encodings, it knows to break and join
Chris@16	198 /// Unicode code-points above 0xFFFF to and from surrogate pairs correctly. \a cvt should be unaware
Chris@16	199 /// of wide encoding type
Chris@16	200 ///
Chris@16	201 BOOST_LOCALE_DECL
Chris@16	202 std::locale create_codecvt(std::locale const &in,std::auto_ptr<base_converter> cvt,character_facet_type type);
Chris@16	203
Chris@16	204 } // util
Chris@16	205 } // locale
Chris@16	206 } // boost
Chris@16	207
Chris@16	208 #endif
Chris@16	209 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4

Mercurial > hg > vamp-build-and-test

annotate DEPENDENCIES/generic/include/boost/locale/util.hpp @ 133:4acb5d8d80b6 tip