vamp-build-and-test: DEPENDENCIES/generic/include/boost/locale/util.hpp comparison

comparison DEPENDENCIES/generic/include/boost/locale/util.hpp @ 16:2665513ce2d3

Add boost headers

author	Chris Cannam
date	Tue, 05 Aug 2014 11:11:38 +0100
parents
children

comparison

equal deleted inserted replaced

-:663ca0da4350
+:2665513ce2d3
+//
+//  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+//
+#ifndef BOOST_LOCALE_UTIL_HPP
+#define BOOST_LOCALE_UTIL_HPP
+#include <locale>
+#include <typeinfo>
+#include <boost/cstdint.hpp>
+#include <boost/locale/utf.hpp>
+#include <boost/locale/generator.hpp>
+#include <boost/assert.hpp>
+#include <vector>
+namespace boost {
+namespace locale {
+///
+/// \brief This namespace provides various utility function useful for Boost.Locale backends
+/// implementations
+///
+namespace util {
+///
+/// \brief Return default system locale name in POSIX format.
+///
+/// This function tries to detect the locale using, LC_CTYPE, LC_ALL and LANG environment
+/// variables in this order and if all of them unset, in POSIX platforms it returns "C"
+///
+/// On Windows additionally to check the above environment variables, this function
+/// tries to creates locale name from ISO-339 and ISO-3199 country codes defined
+/// for user default locale.
+/// If \a use_utf8_on_windows is true it sets the encoding to UTF-8, otherwise, if system
+/// locale supports ANSI code-page it defines the ANSI encoding like windows-1252, otherwise it fall-backs
+/// to UTF-8 encoding if ANSI code-page is not available.
+///
+BOOST_LOCALE_DECL
+std::string get_system_locale(bool use_utf8_on_windows = false);
+///
+/// \brief Installs information facet to locale in based on locale name \a name
+///
+/// This function installs boost::locale::info facet into the locale \a in and returns
+/// newly created locale.
+///
+/// Note: all information is based only on parsing of string \a name;
+///
+/// The name has following format: language[_COUNTRY][.encoding][\@variant]
+/// Where language is ISO-639 language code like "en" or "ru", COUNTRY is ISO-3166
+/// country identifier like "US" or "RU". the Encoding is a charracter set name
+/// like UTF-8 or ISO-8859-1. Variant is backend specific variant like \c euro or
+/// calendar=hebrew.
+///
+/// If some parameters are missing they are specified as blanks, default encoding
+/// is assumed to be US-ASCII and missing language is assumed to be "C"
+///
+BOOST_LOCALE_DECL
+std::locale create_info(std::locale const &in,std::string const &name);
+///
+/// \brief This class represent a simple stateless converter from UCS-4 and to UCS-4 for
+///  each single code point
+///
+/// This class is used for creation of std::codecvt facet for converting utf-16/utf-32 encoding
+/// to encoding supported by this converter
+///
+/// Please note, this converter should be fully stateless. Fully stateless means it should
+/// never assume that it is called in any specific order on the text. Even if the
+/// encoding itself seems to be stateless like windows-1255 or shift-jis, some
+/// encoders (most notably iconv) can actually compose several code-point into one or
+/// decompose them in case composite characters are found. So be very careful when implementing
+/// these converters for certain character set.
+///
+class base_converter {
+public:
+///
+/// This value should be returned when an illegal input sequence or code-point is observed:
+/// For example if a UCS-32 code-point is in the range reserved for UTF-16 surrogates
+/// or an invalid UTF-8 sequence is found
+///
+static const uint32_t illegal=utf::illegal;
+///
+/// This value is returned in following cases: The of incomplete input sequence was found or
+/// insufficient output buffer was provided so complete output could not be written.
+///
+static const uint32_t incomplete=utf::incomplete;
+virtual ~base_converter()
+{
+}
+///
+/// Return the maximal length that one Unicode code-point can be converted to, for example
+/// for UTF-8 it is 4, for Shift-JIS it is 2 and ISO-8859-1 is 1
+///
+virtual int max_len() const
+{
+return 1;
+}
+///
+/// Returns true if calling the functions from_unicode, to_unicode, and max_len is thread safe.
+///
+/// Rule of thumb: if this class' implementation uses simple tables that are unchanged
+/// or is purely algorithmic like UTF-8 - so it does not share any mutable bit for
+/// independent to_unicode, from_unicode calls, you may set it to true, otherwise,
+/// for example if you use iconv_t descriptor or UConverter as conversion object return false,
+/// and this object will be cloned for each use.
+///
+virtual bool is_thread_safe() const
+{
+return false;
+}
+///
+/// Create a polymorphic copy of this object, usually called only if is_thread_safe() return false
+///
+virtual base_converter *clone() const
+{
+BOOST_ASSERT(typeid(*this)==typeid(base_converter));
+return new base_converter();
+}
+///
+/// Convert a single character starting at begin and ending at most at end to Unicode code-point.
+///
+/// if valid input sequence found in [\a begin,\a code_point_end) such as \a begin < \a code_point_end && \a code_point_end <= \a end
+/// it is converted to its Unicode code point equivalent, \a begin is set to \a code_point_end
+///
+/// if incomplete input sequence found in [\a begin,\a end), i.e. there my be such \a code_point_end that \a code_point_end > \a end
+/// and [\a begin, \a code_point_end) would be valid input sequence, then \a incomplete is returned begin stays unchanged, for example
+/// for UTF-8 conversion a *begin = 0xc2, \a begin +1 = \a end is such situation.
+///
+/// if invalid input sequence found, i.e. there is a sequence [\a begin, \a code_point_end) such as \a code_point_end <= \a end
+/// that is illegal for this encoding, \a illegal is returned and begin stays unchanged. For example if *begin = 0xFF and begin < end
+/// for UTF-8, then \a illegal is returned.
+///
+///
+virtual uint32_t to_unicode(char const *&begin,char const *end)
+{
+if(begin == end)
+return incomplete;
+unsigned char cp = *begin;
+if(cp <= 0x7F) {
+begin++;
+return cp;
+}
+return illegal;
+}
+///
+/// Convert a single code-point \a u into encoding and store it in [begin,end) range.
+///
+/// If u is invalid Unicode code-point, or it can not be mapped correctly to represented character set,
+/// \a illegal should be returned
+///
+/// If u can be converted to a sequence of bytes c1, ... , cN (1<= N <= max_len() ) then
+///
+/// -# If end - begin >= N, c1, ... cN are written starting at begin and N is returned
+/// -# If end - begin < N, incomplete is returned, it is unspecified what would be
+///    stored in bytes in range [begin,end)
+virtual uint32_t from_unicode(uint32_t u,char *begin,char const *end)
+{
+if(begin==end)
+return incomplete;
+if(u >= 0x80)
+return illegal;
+*begin = static_cast<char>(u);
+return 1;
+}
+};
+///
+/// This function creates a \a base_converter that can be used for conversion between UTF-8 and
+/// unicode code points
+///
+BOOST_LOCALE_DECL std::auto_ptr<base_converter> create_utf8_converter();
+///
+/// This function creates a \a base_converter that can be used for conversion between single byte
+/// character encodings like ISO-8859-1, koi8-r, windows-1255 and Unicode code points,
+///
+/// If \a encoding is not supported, empty pointer is returned. You should check if
+/// std::auto_ptr<base_converter>::get() != 0
+///
+BOOST_LOCALE_DECL std::auto_ptr<base_converter> create_simple_converter(std::string const &encoding);
+///
+/// Install codecvt facet into locale \a in and return new locale that is based on \a in and uses new
+/// facet.
+///
+/// codecvt facet would convert between narrow and wide/char16_t/char32_t encodings using \a cvt converter.
+/// If \a cvt is null pointer, always failure conversion would be used that fails on every first input or output.
+///
+/// Note: the codecvt facet handles both UTF-16 and UTF-32 wide encodings, it knows to break and join
+/// Unicode code-points above 0xFFFF to and from surrogate pairs correctly. \a cvt should be unaware
+/// of wide encoding type
+///
+BOOST_LOCALE_DECL
+std::locale create_codecvt(std::locale const &in,std::auto_ptr<base_converter> cvt,character_facet_type type);
+} // util
+} // locale
+} // boost
+#endif
+// vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4

Mercurial > hg > vamp-build-and-test

comparison DEPENDENCIES/generic/include/boost/locale/util.hpp @ 16:2665513ce2d3