vamp-build-and-test: DEPENDENCIES/generic/include/boost/detail/utf8_codecvt

annotate DEPENDENCIES/generic/include/boost/detail/utf8_codecvt_facet.hpp @ 125:34e428693f5d vext

Vext -> Repoint

author	Chris Cannam
date	Thu, 14 Jun 2018 11:15:39 +0100
parents	c530137014c0
children

rev	line source
Chris@16	1 // Copyright (c) 2001 Ronald Garcia, Indiana University (garcia@osl.iu.edu)
Chris@16	2 // Andrew Lumsdaine, Indiana University (lums@osl.iu.edu).
Chris@16	3 // Distributed under the Boost Software License, Version 1.0. (See accompany-
Chris@16	4 // ing file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16	5
Chris@16	6 #ifndef BOOST_UTF8_CODECVT_FACET_HPP
Chris@16	7 #define BOOST_UTF8_CODECVT_FACET_HPP
Chris@16	8
Chris@16	9 // MS compatible compilers support #pragma once
Chris@16	10 #if defined(_MSC_VER) && (_MSC_VER >= 1020)
Chris@16	11 # pragma once
Chris@16	12 #endif
Chris@16	13
Chris@16	14 /////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
Chris@16	15 // utf8_codecvt_facet.hpp
Chris@16	16
Chris@101	17 // This header defines class utf8_codecvt_facet, derived from
Chris@16	18 // std::codecvt<wchar_t, char>, which can be used to convert utf8 data in
Chris@16	19 // files into wchar_t strings in the application.
Chris@16	20 //
Chris@16	21 // The header is NOT STANDALONE, and is not to be included by the USER.
Chris@16	22 // There are at least two libraries which want to use this functionality, and
Chris@16	23 // we want to avoid code duplication. It would be possible to create utf8
Chris@16	24 // library, but:
Chris@16	25 // - this requires review process first
Chris@16	26 // - in the case, when linking the a library which uses utf8
Chris@16	27 // (say 'program_options'), user should also link to the utf8 library.
Chris@16	28 // This seems inconvenient, and asking a user to link to an unrevieved
Chris@16	29 // library is strange.
Chris@16	30 // Until the above points are fixed, a library which wants to use utf8 must:
Chris@101	31 // - include this header in one of it's headers or sources
Chris@101	32 // - include the corresponding boost/detail/utf8_codecvt_facet.ipp file in one
Chris@101	33 // of its sources
Chris@16	34 // - before including either file, the library must define
Chris@16	35 // - BOOST_UTF8_BEGIN_NAMESPACE to the namespace declaration that must be used
Chris@16	36 // - BOOST_UTF8_END_NAMESPACE to the code to close the previous namespace
Chris@101	37 // declaration.
Chris@16	38 // - BOOST_UTF8_DECL -- to the code which must be used for all 'exportable'
Chris@16	39 // symbols.
Chris@16	40 //
Chris@16	41 // For example, program_options library might contain:
Chris@16	42 // #define BOOST_UTF8_BEGIN_NAMESPACE <backslash character>
Chris@16	43 // namespace boost { namespace program_options {
Chris@16	44 // #define BOOST_UTF8_END_NAMESPACE }}
Chris@16	45 // #define BOOST_UTF8_DECL BOOST_PROGRAM_OPTIONS_DECL
Chris@101	46 // #include <boost/detail/utf8_codecvt_facet.ipp>
Chris@16	47 //
Chris@16	48 // Essentially, each library will have its own copy of utf8 code, in
Chris@16	49 // different namespaces.
Chris@16	50
Chris@16	51 // Note:(Robert Ramey). I have made the following alterations in the original
Chris@16	52 // code.
Chris@16	53 // a) Rendered utf8_codecvt<wchar_t, char> with using templates
Chris@16	54 // b) Move longer functions outside class definition to prevent inlining
Chris@16	55 // and make code smaller
Chris@16	56 // c) added on a derived class to permit translation to/from current
Chris@16	57 // locale to utf8
Chris@16	58
Chris@16	59 // See http://www.boost.org for updates, documentation, and revision history.
Chris@16	60
Chris@16	61 // archives stored as text - note these ar templated on the basic
Chris@16	62 // stream templates to accommodate wide (and other?) kind of characters
Chris@16	63 //
Chris@16	64 // note the fact that on libraries without wide characters, ostream is
Chris@16	65 // is not a specialization of basic_ostream which in fact is not defined
Chris@16	66 // in such cases. So we can't use basic_ostream<OStream::char_type> but rather
Chris@16	67 // use two template parameters
Chris@16	68 //
Chris@16	69 // utf8_codecvt_facet
Chris@16	70 // This is an implementation of a std::codecvt facet for translating
Chris@16	71 // from UTF-8 externally to UCS-4. Note that this is not tied to
Chris@16	72 // any specific types in order to allow customization on platforms
Chris@16	73 // where wchar_t is not big enough.
Chris@16	74 //
Chris@16	75 // NOTES: The current implementation jumps through some unpleasant hoops in
Chris@16	76 // order to deal with signed character types. As a std::codecvt_base::result,
Chris@16	77 // it is necessary for the ExternType to be convertible to unsigned char.
Chris@16	78 // I chose not to tie the extern_type explicitly to char. But if any combination
Chris@16	79 // of types other than <wchar_t,char_t> is used, then std::codecvt must be
Chris@16	80 // specialized on those types for this to work.
Chris@16	81
Chris@16	82 #include <locale>
Chris@16	83 #include <cwchar> // for mbstate_t
Chris@16	84 #include <cstddef> // for std::size_t
Chris@16	85
Chris@16	86 #include <boost/config.hpp>
Chris@16	87 #include <boost/detail/workaround.hpp>
Chris@16	88
Chris@16	89 #if defined(BOOST_NO_STDC_NAMESPACE)
Chris@16	90 namespace std {
Chris@16	91 using ::mbstate_t;
Chris@16	92 using ::size_t;
Chris@16	93 }
Chris@16	94 #endif
Chris@16	95
Chris@16	96 // maximum lenght of a multibyte string
Chris@16	97 #define MB_LENGTH_MAX 8
Chris@16	98
Chris@16	99 BOOST_UTF8_BEGIN_NAMESPACE
Chris@16	100
Chris@101	101 //----------------------------------------------------------------------------//
Chris@101	102 // //
Chris@101	103 // utf8_codecvt_facet //
Chris@101	104 // //
Chris@101	105 // See utf8_codecvt_facet.ipp for the implementation. //
Chris@101	106 //----------------------------------------------------------------------------//
Chris@101	107
Chris@101	108
Chris@16	109 struct BOOST_UTF8_DECL utf8_codecvt_facet :
Chris@16	110 public std::codecvt<wchar_t, char, std::mbstate_t>
Chris@16	111 {
Chris@16	112 public:
Chris@16	113 explicit utf8_codecvt_facet(std::size_t no_locale_manage=0)
Chris@16	114 : std::codecvt<wchar_t, char, std::mbstate_t>(no_locale_manage)
Chris@16	115 {}
Chris@16	116 protected:
Chris@16	117 virtual std::codecvt_base::result do_in(
Chris@16	118 std::mbstate_t& state,
Chris@16	119 const char * from,
Chris@16	120 const char * from_end,
Chris@16	121 const char * & from_next,
Chris@16	122 wchar_t * to,
Chris@16	123 wchar_t * to_end,
Chris@16	124 wchar_t*& to_next
Chris@16	125 ) const;
Chris@16	126
Chris@16	127 virtual std::codecvt_base::result do_out(
Chris@101	128 std::mbstate_t & state,
Chris@101	129 const wchar_t * from,
Chris@101	130 const wchar_t * from_end,
Chris@101	131 const wchar_t* & from_next,
Chris@101	132 char * to,
Chris@101	133 char * to_end,
Chris@101	134 char * & to_next
Chris@16	135 ) const;
Chris@16	136
Chris@16	137 bool invalid_continuing_octet(unsigned char octet_1) const {
Chris@16	138 return (octet_1 < 0x80\|\| 0xbf< octet_1);
Chris@16	139 }
Chris@16	140
Chris@16	141 bool invalid_leading_octet(unsigned char octet_1) const {
Chris@16	142 return (0x7f < octet_1 && octet_1 < 0xc0) \|\|
Chris@16	143 (octet_1 > 0xfd);
Chris@16	144 }
Chris@16	145
Chris@16	146 // continuing octets = octets except for the leading octet
Chris@101	147 static unsigned int get_cont_octet_count(unsigned char lead_octet) {
Chris@16	148 return get_octet_count(lead_octet) - 1;
Chris@16	149 }
Chris@16	150
Chris@101	151 static unsigned int get_octet_count(unsigned char lead_octet);
Chris@16	152
Chris@16	153 // How many "continuing octets" will be needed for this word
Chris@16	154 // == total octets - 1.
Chris@16	155 int get_cont_octet_out_count(wchar_t word) const ;
Chris@16	156
Chris@101	157 virtual bool do_always_noconv() const BOOST_NOEXCEPT_OR_NOTHROW {
Chris@101	158 return false;
Chris@101	159 }
Chris@16	160
Chris@16	161 // UTF-8 isn't really stateful since we rewind on partial conversions
Chris@16	162 virtual std::codecvt_base::result do_unshift(
Chris@16	163 std::mbstate_t&,
Chris@16	164 char * from,
Chris@16	165 char * /to/,
Chris@16	166 char * & next
Chris@101	167 ) const {
Chris@16	168 next = from;
Chris@16	169 return ok;
Chris@16	170 }
Chris@16	171
Chris@101	172 virtual int do_encoding() const BOOST_NOEXCEPT_OR_NOTHROW {
Chris@16	173 const int variable_byte_external_encoding=0;
Chris@16	174 return variable_byte_external_encoding;
Chris@16	175 }
Chris@16	176
Chris@16	177 // How many char objects can I process to get <= max_limit
Chris@16	178 // wchar_t objects?
Chris@16	179 virtual int do_length(
Chris@101	180 const std::mbstate_t &,
Chris@16	181 const char * from,
Chris@16	182 const char * from_end,
Chris@16	183 std::size_t max_limit
Chris@101	184 ) const
Chris@16	185 #if BOOST_WORKAROUND(__IBMCPP__, BOOST_TESTED_AT(600))
Chris@101	186 throw()
Chris@16	187 #endif
Chris@101	188 ;
Chris@101	189 virtual int do_length(
Chris@101	190 std::mbstate_t & s,
Chris@101	191 const char * from,
Chris@101	192 const char * from_end,
Chris@101	193 std::size_t max_limit
Chris@101	194 ) const
Chris@101	195 #if BOOST_WORKAROUND(__IBMCPP__, BOOST_TESTED_AT(600))
Chris@101	196 throw()
Chris@101	197 #endif
Chris@101	198 {
Chris@101	199 return do_length(
Chris@101	200 const_cast<const std::mbstate_t &>(s),
Chris@101	201 from,
Chris@101	202 from_end,
Chris@101	203 max_limit
Chris@101	204 );
Chris@101	205 }
Chris@16	206 // Largest possible value do_length(state,from,from_end,1) could return.
Chris@101	207 virtual int do_max_length() const BOOST_NOEXCEPT_OR_NOTHROW {
Chris@16	208 return 6; // largest UTF-8 encoding of a UCS-4 character
Chris@16	209 }
Chris@16	210 };
Chris@16	211
Chris@16	212 BOOST_UTF8_END_NAMESPACE
Chris@16	213
Chris@16	214 #endif // BOOST_UTF8_CODECVT_FACET_HPP

Mercurial > hg > vamp-build-and-test

annotate DEPENDENCIES/generic/include/boost/detail/utf8_codecvt_facet.hpp @ 125:34e428693f5d vext