Chris@16
|
1 //
|
Chris@16
|
2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
|
Chris@16
|
3 //
|
Chris@16
|
4 // Distributed under the Boost Software License, Version 1.0. (See
|
Chris@16
|
5 // accompanying file LICENSE_1_0.txt or copy at
|
Chris@16
|
6 // http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
7 //
|
Chris@16
|
8 #ifndef BOOST_LOCALE_CONVERTER_HPP_INCLUDED
|
Chris@16
|
9 #define BOOST_LOCALE_CONVERTER_HPP_INCLUDED
|
Chris@16
|
10
|
Chris@16
|
11 #include <boost/locale/config.hpp>
|
Chris@16
|
12 #ifdef BOOST_MSVC
|
Chris@16
|
13 # pragma warning(push)
|
Chris@16
|
14 # pragma warning(disable : 4275 4251 4231 4660)
|
Chris@16
|
15 #endif
|
Chris@16
|
16 #include <locale>
|
Chris@16
|
17
|
Chris@16
|
18
|
Chris@16
|
19 namespace boost {
|
Chris@16
|
20 namespace locale {
|
Chris@16
|
21
|
Chris@16
|
22 ///
|
Chris@16
|
23 /// \defgroup convert Text Conversions
|
Chris@16
|
24 ///
|
Chris@16
|
25 /// This module provides various function for string manipulation like Unicode normalization, case conversion etc.
|
Chris@16
|
26 /// @{
|
Chris@16
|
27 ///
|
Chris@16
|
28
|
Chris@16
|
29
|
Chris@16
|
30 ///
|
Chris@16
|
31 /// \brief This class provides base flags for text manipulation. It is used as base for converter facet.
|
Chris@16
|
32 ///
|
Chris@16
|
33 class converter_base {
|
Chris@16
|
34 public:
|
Chris@16
|
35 ///
|
Chris@16
|
36 /// The flag used for facet - the type of operation to perform
|
Chris@16
|
37 ///
|
Chris@16
|
38 typedef enum {
|
Chris@16
|
39 normalization, ///< Apply Unicode normalization on the text
|
Chris@16
|
40 upper_case, ///< Convert text to upper case
|
Chris@16
|
41 lower_case, ///< Convert text to lower case
|
Chris@16
|
42 case_folding, ///< Fold case in the text
|
Chris@16
|
43 title_case ///< Convert text to title case
|
Chris@16
|
44 } conversion_type;
|
Chris@16
|
45 };
|
Chris@16
|
46
|
Chris@16
|
47 template<typename CharType>
|
Chris@16
|
48 class converter;
|
Chris@16
|
49
|
Chris@16
|
50 #ifdef BOOST_LOCALE_DOXYGEN
|
Chris@16
|
51 ///
|
Chris@16
|
52 /// \brief The facet that implements text manipulation
|
Chris@16
|
53 ///
|
Chris@16
|
54 /// It is used to performs text conversion operations defined by \ref conversion_type. It is specialized
|
Chris@16
|
55 /// for four types of characters \c char, \c wchar_t, \c char16_t, \c char32_t
|
Chris@16
|
56 ///
|
Chris@16
|
57 template<typename Char>
|
Chris@16
|
58 class BOOST_LOCALE_DECL converter: public converter_base, public std::locale::facet {
|
Chris@16
|
59 public:
|
Chris@16
|
60 /// Locale identification
|
Chris@16
|
61 static std::locale::id id;
|
Chris@16
|
62
|
Chris@16
|
63 /// Standard constructor
|
Chris@16
|
64 converter(size_t refs = 0) : std::locale::facet(refs)
|
Chris@16
|
65 {
|
Chris@16
|
66 }
|
Chris@16
|
67 ///
|
Chris@16
|
68 /// Convert text in range [\a begin, \a end) according to conversion method \a how. Parameter
|
Chris@16
|
69 /// \a flags is used for specification of normalization method like nfd, nfc etc.
|
Chris@16
|
70 ///
|
Chris@16
|
71 virtual std::basic_string<Char> convert(conversion_type how,Char const *begin,Char const *end,int flags = 0) const = 0;
|
Chris@16
|
72 #if defined (__SUNPRO_CC) && defined (_RWSTD_VER)
|
Chris@16
|
73 std::locale::id& __get_id (void) const { return id; }
|
Chris@16
|
74 #endif
|
Chris@16
|
75 };
|
Chris@16
|
76 #else
|
Chris@16
|
77
|
Chris@16
|
78 template<>
|
Chris@16
|
79 class BOOST_LOCALE_DECL converter<char> : public converter_base, public std::locale::facet {
|
Chris@16
|
80 public:
|
Chris@16
|
81 static std::locale::id id;
|
Chris@16
|
82
|
Chris@16
|
83 converter(size_t refs = 0) : std::locale::facet(refs)
|
Chris@16
|
84 {
|
Chris@16
|
85 }
|
Chris@16
|
86 virtual std::string convert(conversion_type how,char const *begin,char const *end,int flags = 0) const = 0;
|
Chris@16
|
87 #if defined (__SUNPRO_CC) && defined (_RWSTD_VER)
|
Chris@16
|
88 std::locale::id& __get_id (void) const { return id; }
|
Chris@16
|
89 #endif
|
Chris@16
|
90 };
|
Chris@16
|
91
|
Chris@16
|
92 template<>
|
Chris@16
|
93 class BOOST_LOCALE_DECL converter<wchar_t> : public converter_base, public std::locale::facet {
|
Chris@16
|
94 public:
|
Chris@16
|
95 static std::locale::id id;
|
Chris@16
|
96 converter(size_t refs = 0) : std::locale::facet(refs)
|
Chris@16
|
97 {
|
Chris@16
|
98 }
|
Chris@16
|
99 virtual std::wstring convert(conversion_type how,wchar_t const *begin,wchar_t const *end,int flags = 0) const = 0;
|
Chris@16
|
100 #if defined (__SUNPRO_CC) && defined (_RWSTD_VER)
|
Chris@16
|
101 std::locale::id& __get_id (void) const { return id; }
|
Chris@16
|
102 #endif
|
Chris@16
|
103 };
|
Chris@16
|
104
|
Chris@16
|
105 #ifdef BOOST_HAS_CHAR16_T
|
Chris@16
|
106 template<>
|
Chris@16
|
107 class BOOST_LOCALE_DECL converter<char16_t> : public converter_base, public std::locale::facet {
|
Chris@16
|
108 public:
|
Chris@16
|
109 static std::locale::id id;
|
Chris@16
|
110 converter(size_t refs = 0) : std::locale::facet(refs)
|
Chris@16
|
111 {
|
Chris@16
|
112 }
|
Chris@16
|
113 virtual std::u16string convert(conversion_type how,char16_t const *begin,char16_t const *end,int flags = 0) const = 0;
|
Chris@16
|
114 #if defined (__SUNPRO_CC) && defined (_RWSTD_VER)
|
Chris@16
|
115 std::locale::id& __get_id (void) const { return id; }
|
Chris@16
|
116 #endif
|
Chris@16
|
117 };
|
Chris@16
|
118 #endif
|
Chris@16
|
119
|
Chris@16
|
120 #ifdef BOOST_HAS_CHAR32_T
|
Chris@16
|
121 template<>
|
Chris@16
|
122 class BOOST_LOCALE_DECL converter<char32_t> : public converter_base, public std::locale::facet {
|
Chris@16
|
123 public:
|
Chris@16
|
124 static std::locale::id id;
|
Chris@16
|
125 converter(size_t refs = 0) : std::locale::facet(refs)
|
Chris@16
|
126 {
|
Chris@16
|
127 }
|
Chris@16
|
128 virtual std::u32string convert(conversion_type how,char32_t const *begin,char32_t const *end,int flags = 0) const = 0;
|
Chris@16
|
129 #if defined (__SUNPRO_CC) && defined (_RWSTD_VER)
|
Chris@16
|
130 std::locale::id& __get_id (void) const { return id; }
|
Chris@16
|
131 #endif
|
Chris@16
|
132 };
|
Chris@16
|
133 #endif
|
Chris@16
|
134
|
Chris@16
|
135 #endif
|
Chris@16
|
136
|
Chris@16
|
137 ///
|
Chris@16
|
138 /// The type that defined <a href="http://unicode.org/reports/tr15/#Norm_Forms">normalization form</a>
|
Chris@16
|
139 ///
|
Chris@16
|
140
|
Chris@16
|
141 typedef enum {
|
Chris@16
|
142 norm_nfd, ///< Canonical decomposition
|
Chris@16
|
143 norm_nfc, ///< Canonical decomposition followed by canonical composition
|
Chris@16
|
144 norm_nfkd, ///< Compatibility decomposition
|
Chris@16
|
145 norm_nfkc, ///< Compatibility decomposition followed by canonical composition.
|
Chris@16
|
146 norm_default = norm_nfc, ///< Default normalization - canonical decomposition followed by canonical composition
|
Chris@16
|
147 } norm_type;
|
Chris@16
|
148
|
Chris@16
|
149 ///
|
Chris@16
|
150 /// Normalize Unicode string \a str according to \ref norm_type "normalization form" \a n
|
Chris@16
|
151 ///
|
Chris@16
|
152 /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take
|
Chris@16
|
153 /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside
|
Chris@16
|
154 /// of a Unicode character set.
|
Chris@16
|
155 ///
|
Chris@16
|
156 /// \note throws std::bad_cast if loc does not have \ref converter facet installed
|
Chris@16
|
157 ///
|
Chris@16
|
158 template<typename CharType>
|
Chris@16
|
159 std::basic_string<CharType> normalize(std::basic_string<CharType> const &str,norm_type n=norm_default,std::locale const &loc=std::locale())
|
Chris@16
|
160 {
|
Chris@16
|
161 return std::use_facet<converter<CharType> >(loc).convert(converter_base::normalization,str.data(),str.data() + str.size(),n);
|
Chris@16
|
162 }
|
Chris@16
|
163
|
Chris@16
|
164 ///
|
Chris@16
|
165 /// Normalize NUL terminated Unicode string \a str according to \ref norm_type "normalization form" \a n
|
Chris@16
|
166 ///
|
Chris@16
|
167 /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take
|
Chris@16
|
168 /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside
|
Chris@16
|
169 /// of a Unicode character set.
|
Chris@16
|
170 ///
|
Chris@16
|
171 /// \note throws std::bad_cast if loc does not have \ref converter facet installed
|
Chris@16
|
172 ///
|
Chris@16
|
173 template<typename CharType>
|
Chris@16
|
174 std::basic_string<CharType> normalize(CharType const *str,norm_type n=norm_default,std::locale const &loc=std::locale())
|
Chris@16
|
175 {
|
Chris@16
|
176 CharType const *end=str;
|
Chris@16
|
177 while(*end)
|
Chris@16
|
178 end++;
|
Chris@16
|
179 return std::use_facet<converter<CharType> >(loc).convert(converter_base::normalization,str,end,n);
|
Chris@16
|
180 }
|
Chris@16
|
181
|
Chris@16
|
182 ///
|
Chris@16
|
183 /// Normalize Unicode string in range [begin,end) according to \ref norm_type "normalization form" \a n
|
Chris@16
|
184 ///
|
Chris@16
|
185 /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take
|
Chris@16
|
186 /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside
|
Chris@16
|
187 /// of a Unicode character set.
|
Chris@16
|
188 ///
|
Chris@16
|
189 /// \note throws std::bad_cast if loc does not have \ref converter facet installed
|
Chris@16
|
190 ///
|
Chris@16
|
191 template<typename CharType>
|
Chris@16
|
192 std::basic_string<CharType> normalize( CharType const *begin,
|
Chris@16
|
193 CharType const *end,
|
Chris@16
|
194 norm_type n=norm_default,
|
Chris@16
|
195 std::locale const &loc=std::locale())
|
Chris@16
|
196 {
|
Chris@16
|
197 return std::use_facet<converter<CharType> >(loc).convert(converter_base::normalization,begin,end,n);
|
Chris@16
|
198 }
|
Chris@16
|
199
|
Chris@16
|
200 ///////////////////////////////////////////////////
|
Chris@16
|
201
|
Chris@16
|
202 ///
|
Chris@16
|
203 /// Convert a string \a str to upper case according to locale \a loc
|
Chris@16
|
204 ///
|
Chris@16
|
205 /// \note throws std::bad_cast if loc does not have \ref converter facet installed
|
Chris@16
|
206 ///
|
Chris@16
|
207
|
Chris@16
|
208 template<typename CharType>
|
Chris@16
|
209 std::basic_string<CharType> to_upper(std::basic_string<CharType> const &str,std::locale const &loc=std::locale())
|
Chris@16
|
210 {
|
Chris@16
|
211 return std::use_facet<converter<CharType> >(loc).convert(converter_base::upper_case,str.data(),str.data()+str.size());
|
Chris@16
|
212 }
|
Chris@16
|
213
|
Chris@16
|
214 ///
|
Chris@16
|
215 /// Convert a NUL terminated string \a str to upper case according to locale \a loc
|
Chris@16
|
216 ///
|
Chris@16
|
217 /// \note throws std::bad_cast if loc does not have \ref converter facet installed
|
Chris@16
|
218 ///
|
Chris@16
|
219 template<typename CharType>
|
Chris@16
|
220 std::basic_string<CharType> to_upper(CharType const *str,std::locale const &loc=std::locale())
|
Chris@16
|
221 {
|
Chris@16
|
222 CharType const *end=str;
|
Chris@16
|
223 while(*end)
|
Chris@16
|
224 end++;
|
Chris@16
|
225 return std::use_facet<converter<CharType> >(loc).convert(converter_base::upper_case,str,end);
|
Chris@16
|
226 }
|
Chris@16
|
227
|
Chris@16
|
228 ///
|
Chris@16
|
229 /// Convert a string in range [begin,end) to upper case according to locale \a loc
|
Chris@16
|
230 ///
|
Chris@16
|
231 /// \note throws std::bad_cast if loc does not have \ref converter facet installed
|
Chris@16
|
232 ///
|
Chris@16
|
233 template<typename CharType>
|
Chris@16
|
234 std::basic_string<CharType> to_upper(CharType const *begin,CharType const *end,std::locale const &loc=std::locale())
|
Chris@16
|
235 {
|
Chris@16
|
236 return std::use_facet<converter<CharType> >(loc).convert(converter_base::upper_case,begin,end);
|
Chris@16
|
237 }
|
Chris@16
|
238
|
Chris@16
|
239 ///////////////////////////////////////////////////
|
Chris@16
|
240
|
Chris@16
|
241 ///
|
Chris@16
|
242 /// Convert a string \a str to lower case according to locale \a loc
|
Chris@16
|
243 ///
|
Chris@16
|
244 /// \note throws std::bad_cast if loc does not have \ref converter facet installed
|
Chris@16
|
245 ///
|
Chris@16
|
246
|
Chris@16
|
247 template<typename CharType>
|
Chris@16
|
248 std::basic_string<CharType> to_lower(std::basic_string<CharType> const &str,std::locale const &loc=std::locale())
|
Chris@16
|
249 {
|
Chris@16
|
250 return std::use_facet<converter<CharType> >(loc).convert(converter_base::lower_case,str.data(),str.data()+str.size());
|
Chris@16
|
251 }
|
Chris@16
|
252
|
Chris@16
|
253 ///
|
Chris@16
|
254 /// Convert a NUL terminated string \a str to lower case according to locale \a loc
|
Chris@16
|
255 ///
|
Chris@16
|
256 /// \note throws std::bad_cast if loc does not have \ref converter facet installed
|
Chris@16
|
257 ///
|
Chris@16
|
258 template<typename CharType>
|
Chris@16
|
259 std::basic_string<CharType> to_lower(CharType const *str,std::locale const &loc=std::locale())
|
Chris@16
|
260 {
|
Chris@16
|
261 CharType const *end=str;
|
Chris@16
|
262 while(*end)
|
Chris@16
|
263 end++;
|
Chris@16
|
264 return std::use_facet<converter<CharType> >(loc).convert(converter_base::lower_case,str,end);
|
Chris@16
|
265 }
|
Chris@16
|
266
|
Chris@16
|
267 ///
|
Chris@16
|
268 /// Convert a string in range [begin,end) to lower case according to locale \a loc
|
Chris@16
|
269 ///
|
Chris@16
|
270 /// \note throws std::bad_cast if loc does not have \ref converter facet installed
|
Chris@16
|
271 ///
|
Chris@16
|
272 template<typename CharType>
|
Chris@16
|
273 std::basic_string<CharType> to_lower(CharType const *begin,CharType const *end,std::locale const &loc=std::locale())
|
Chris@16
|
274 {
|
Chris@16
|
275 return std::use_facet<converter<CharType> >(loc).convert(converter_base::lower_case,begin,end);
|
Chris@16
|
276 }
|
Chris@16
|
277 ///////////////////////////////////////////////////
|
Chris@16
|
278
|
Chris@16
|
279 ///
|
Chris@16
|
280 /// Convert a string \a str to title case according to locale \a loc
|
Chris@16
|
281 ///
|
Chris@16
|
282 /// \note throws std::bad_cast if loc does not have \ref converter facet installed
|
Chris@16
|
283 ///
|
Chris@16
|
284
|
Chris@16
|
285 template<typename CharType>
|
Chris@16
|
286 std::basic_string<CharType> to_title(std::basic_string<CharType> const &str,std::locale const &loc=std::locale())
|
Chris@16
|
287 {
|
Chris@16
|
288 return std::use_facet<converter<CharType> >(loc).convert(converter_base::title_case,str.data(),str.data()+str.size());
|
Chris@16
|
289 }
|
Chris@16
|
290
|
Chris@16
|
291 ///
|
Chris@16
|
292 /// Convert a NUL terminated string \a str to title case according to locale \a loc
|
Chris@16
|
293 ///
|
Chris@16
|
294 /// \note throws std::bad_cast if loc does not have \ref converter facet installed
|
Chris@16
|
295 ///
|
Chris@16
|
296 template<typename CharType>
|
Chris@16
|
297 std::basic_string<CharType> to_title(CharType const *str,std::locale const &loc=std::locale())
|
Chris@16
|
298 {
|
Chris@16
|
299 CharType const *end=str;
|
Chris@16
|
300 while(*end)
|
Chris@16
|
301 end++;
|
Chris@16
|
302 return std::use_facet<converter<CharType> >(loc).convert(converter_base::title_case,str,end);
|
Chris@16
|
303 }
|
Chris@16
|
304
|
Chris@16
|
305 ///
|
Chris@16
|
306 /// Convert a string in range [begin,end) to title case according to locale \a loc
|
Chris@16
|
307 ///
|
Chris@16
|
308 /// \note throws std::bad_cast if loc does not have \ref converter facet installed
|
Chris@16
|
309 ///
|
Chris@16
|
310 template<typename CharType>
|
Chris@16
|
311 std::basic_string<CharType> to_title(CharType const *begin,CharType const *end,std::locale const &loc=std::locale())
|
Chris@16
|
312 {
|
Chris@16
|
313 return std::use_facet<converter<CharType> >(loc).convert(converter_base::title_case,begin,end);
|
Chris@16
|
314 }
|
Chris@16
|
315
|
Chris@16
|
316 ///////////////////////////////////////////////////
|
Chris@16
|
317
|
Chris@16
|
318 ///
|
Chris@16
|
319 /// Fold case of a string \a str according to locale \a loc
|
Chris@16
|
320 ///
|
Chris@16
|
321 /// \note throws std::bad_cast if loc does not have \ref converter facet installed
|
Chris@16
|
322 ///
|
Chris@16
|
323
|
Chris@16
|
324 template<typename CharType>
|
Chris@16
|
325 std::basic_string<CharType> fold_case(std::basic_string<CharType> const &str,std::locale const &loc=std::locale())
|
Chris@16
|
326 {
|
Chris@16
|
327 return std::use_facet<converter<CharType> >(loc).convert(converter_base::case_folding,str.data(),str.data()+str.size());
|
Chris@16
|
328 }
|
Chris@16
|
329
|
Chris@16
|
330 ///
|
Chris@16
|
331 /// Fold case of a NUL terminated string \a str according to locale \a loc
|
Chris@16
|
332 ///
|
Chris@16
|
333 /// \note throws std::bad_cast if loc does not have \ref converter facet installed
|
Chris@16
|
334 ///
|
Chris@16
|
335 template<typename CharType>
|
Chris@16
|
336 std::basic_string<CharType> fold_case(CharType const *str,std::locale const &loc=std::locale())
|
Chris@16
|
337 {
|
Chris@16
|
338 CharType const *end=str;
|
Chris@16
|
339 while(*end)
|
Chris@16
|
340 end++;
|
Chris@16
|
341 return std::use_facet<converter<CharType> >(loc).convert(converter_base::case_folding,str,end);
|
Chris@16
|
342 }
|
Chris@16
|
343
|
Chris@16
|
344 ///
|
Chris@16
|
345 /// Fold case of a string in range [begin,end) according to locale \a loc
|
Chris@16
|
346 ///
|
Chris@16
|
347 /// \note throws std::bad_cast if loc does not have \ref converter facet installed
|
Chris@16
|
348 ///
|
Chris@16
|
349 template<typename CharType>
|
Chris@16
|
350 std::basic_string<CharType> fold_case(CharType const *begin,CharType const *end,std::locale const &loc=std::locale())
|
Chris@16
|
351 {
|
Chris@16
|
352 return std::use_facet<converter<CharType> >(loc).convert(converter_base::case_folding,begin,end);
|
Chris@16
|
353 }
|
Chris@16
|
354
|
Chris@16
|
355 ///
|
Chris@16
|
356 ///@}
|
Chris@16
|
357 ///
|
Chris@16
|
358 } // locale
|
Chris@16
|
359
|
Chris@16
|
360 } // boost
|
Chris@16
|
361
|
Chris@16
|
362 #ifdef BOOST_MSVC
|
Chris@16
|
363 #pragma warning(pop)
|
Chris@16
|
364 #endif
|
Chris@16
|
365
|
Chris@16
|
366
|
Chris@16
|
367 #endif
|
Chris@16
|
368
|
Chris@16
|
369 ///
|
Chris@16
|
370 /// \example conversions.cpp
|
Chris@16
|
371 ///
|
Chris@16
|
372 /// Example of using various text conversion functions.
|
Chris@16
|
373 ///
|
Chris@16
|
374 /// \example wconversions.cpp
|
Chris@16
|
375 ///
|
Chris@16
|
376 /// Example of using various text conversion functions with wide strings.
|
Chris@16
|
377 ///
|
Chris@16
|
378
|
Chris@16
|
379 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
|
Chris@16
|
380
|