Chris@16
|
1 //////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
2 /// \file c_regex_traits.hpp
|
Chris@16
|
3 /// Contains the definition of the c_regex_traits\<\> template, which is a
|
Chris@16
|
4 /// wrapper for the C locale functions that can be used to customize the
|
Chris@16
|
5 /// behavior of static and dynamic regexes.
|
Chris@16
|
6 //
|
Chris@16
|
7 // Copyright 2008 Eric Niebler. Distributed under the Boost
|
Chris@16
|
8 // Software License, Version 1.0. (See accompanying file
|
Chris@16
|
9 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
10
|
Chris@16
|
11 #ifndef BOOST_XPRESSIVE_TRAITS_C_REGEX_TRAITS_HPP_EAN_10_04_2005
|
Chris@16
|
12 #define BOOST_XPRESSIVE_TRAITS_C_REGEX_TRAITS_HPP_EAN_10_04_2005
|
Chris@16
|
13
|
Chris@16
|
14 // MS compatible compilers support #pragma once
|
Chris@101
|
15 #if defined(_MSC_VER)
|
Chris@16
|
16 # pragma once
|
Chris@16
|
17 #endif
|
Chris@16
|
18
|
Chris@16
|
19 #include <cstdlib>
|
Chris@16
|
20 #include <boost/config.hpp>
|
Chris@16
|
21 #include <boost/assert.hpp>
|
Chris@16
|
22 #include <boost/xpressive/traits/detail/c_ctype.hpp>
|
Chris@16
|
23
|
Chris@16
|
24 namespace boost { namespace xpressive
|
Chris@16
|
25 {
|
Chris@16
|
26
|
Chris@16
|
27 namespace detail
|
Chris@16
|
28 {
|
Chris@16
|
29 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
30 // empty_locale
|
Chris@16
|
31 struct empty_locale
|
Chris@16
|
32 {
|
Chris@16
|
33 };
|
Chris@16
|
34
|
Chris@16
|
35 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
36 // c_regex_traits_base
|
Chris@16
|
37 template<typename Char, std::size_t SizeOfChar = sizeof(Char)>
|
Chris@16
|
38 struct c_regex_traits_base
|
Chris@16
|
39 {
|
Chris@16
|
40 protected:
|
Chris@16
|
41 template<typename Traits>
|
Chris@16
|
42 void imbue(Traits const &tr)
|
Chris@16
|
43 {
|
Chris@16
|
44 }
|
Chris@16
|
45 };
|
Chris@16
|
46
|
Chris@16
|
47 template<typename Char>
|
Chris@16
|
48 struct c_regex_traits_base<Char, 1>
|
Chris@16
|
49 {
|
Chris@16
|
50 protected:
|
Chris@16
|
51 template<typename Traits>
|
Chris@16
|
52 static void imbue(Traits const &)
|
Chris@16
|
53 {
|
Chris@16
|
54 }
|
Chris@16
|
55 };
|
Chris@16
|
56
|
Chris@16
|
57 #ifndef BOOST_XPRESSIVE_NO_WREGEX
|
Chris@16
|
58 template<std::size_t SizeOfChar>
|
Chris@16
|
59 struct c_regex_traits_base<wchar_t, SizeOfChar>
|
Chris@16
|
60 {
|
Chris@16
|
61 protected:
|
Chris@16
|
62 template<typename Traits>
|
Chris@16
|
63 static void imbue(Traits const &)
|
Chris@16
|
64 {
|
Chris@16
|
65 }
|
Chris@16
|
66 };
|
Chris@16
|
67 #endif
|
Chris@16
|
68
|
Chris@16
|
69 template<typename Char>
|
Chris@16
|
70 Char c_tolower(Char);
|
Chris@16
|
71
|
Chris@16
|
72 template<typename Char>
|
Chris@16
|
73 Char c_toupper(Char);
|
Chris@16
|
74
|
Chris@16
|
75 template<>
|
Chris@16
|
76 inline char c_tolower(char ch)
|
Chris@16
|
77 {
|
Chris@16
|
78 using namespace std;
|
Chris@16
|
79 return static_cast<char>(tolower(static_cast<unsigned char>(ch)));
|
Chris@16
|
80 }
|
Chris@16
|
81
|
Chris@16
|
82 template<>
|
Chris@16
|
83 inline char c_toupper(char ch)
|
Chris@16
|
84 {
|
Chris@16
|
85 using namespace std;
|
Chris@16
|
86 return static_cast<char>(toupper(static_cast<unsigned char>(ch)));
|
Chris@16
|
87 }
|
Chris@16
|
88
|
Chris@16
|
89 #ifndef BOOST_XPRESSIVE_NO_WREGEX
|
Chris@16
|
90 template<>
|
Chris@16
|
91 inline wchar_t c_tolower(wchar_t ch)
|
Chris@16
|
92 {
|
Chris@16
|
93 using namespace std;
|
Chris@16
|
94 return towlower(ch);
|
Chris@16
|
95 }
|
Chris@16
|
96
|
Chris@16
|
97 template<>
|
Chris@16
|
98 inline wchar_t c_toupper(wchar_t ch)
|
Chris@16
|
99 {
|
Chris@16
|
100 using namespace std;
|
Chris@16
|
101 return towupper(ch);
|
Chris@16
|
102 }
|
Chris@16
|
103 #endif
|
Chris@16
|
104
|
Chris@16
|
105 } // namespace detail
|
Chris@16
|
106
|
Chris@16
|
107 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
108 // regex_traits_version_1_tag
|
Chris@16
|
109 //
|
Chris@16
|
110 struct regex_traits_version_1_tag;
|
Chris@16
|
111
|
Chris@16
|
112 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
113 // c_regex_traits
|
Chris@16
|
114 //
|
Chris@16
|
115 /// \brief Encapsaulates the standard C locale functions for use by the
|
Chris@16
|
116 /// \c basic_regex\<\> class template.
|
Chris@16
|
117 template<typename Char>
|
Chris@16
|
118 struct c_regex_traits
|
Chris@16
|
119 : detail::c_regex_traits_base<Char>
|
Chris@16
|
120 {
|
Chris@16
|
121 typedef Char char_type;
|
Chris@16
|
122 typedef std::basic_string<char_type> string_type;
|
Chris@16
|
123 typedef detail::empty_locale locale_type;
|
Chris@16
|
124 typedef typename detail::char_class_impl<Char>::char_class_type char_class_type;
|
Chris@16
|
125 typedef regex_traits_version_2_tag version_tag;
|
Chris@16
|
126 typedef detail::c_regex_traits_base<Char> base_type;
|
Chris@16
|
127
|
Chris@16
|
128 /// Initialize a c_regex_traits object to use the global C locale.
|
Chris@16
|
129 ///
|
Chris@16
|
130 c_regex_traits(locale_type const &loc = locale_type())
|
Chris@16
|
131 : base_type()
|
Chris@16
|
132 {
|
Chris@16
|
133 this->imbue(loc);
|
Chris@16
|
134 }
|
Chris@16
|
135
|
Chris@16
|
136 /// Checks two c_regex_traits objects for equality
|
Chris@16
|
137 ///
|
Chris@16
|
138 /// \return true.
|
Chris@16
|
139 bool operator ==(c_regex_traits<char_type> const &) const
|
Chris@16
|
140 {
|
Chris@16
|
141 return true;
|
Chris@16
|
142 }
|
Chris@16
|
143
|
Chris@16
|
144 /// Checks two c_regex_traits objects for inequality
|
Chris@16
|
145 ///
|
Chris@16
|
146 /// \return false.
|
Chris@16
|
147 bool operator !=(c_regex_traits<char_type> const &) const
|
Chris@16
|
148 {
|
Chris@16
|
149 return false;
|
Chris@16
|
150 }
|
Chris@16
|
151
|
Chris@16
|
152 /// Convert a char to a Char
|
Chris@16
|
153 ///
|
Chris@16
|
154 /// \param ch The source character.
|
Chris@16
|
155 /// \return ch if Char is char, std::btowc(ch) if Char is wchar_t.
|
Chris@16
|
156 static char_type widen(char ch);
|
Chris@16
|
157
|
Chris@16
|
158 /// Returns a hash value for a Char in the range [0, UCHAR_MAX]
|
Chris@16
|
159 ///
|
Chris@16
|
160 /// \param ch The source character.
|
Chris@16
|
161 /// \return a value between 0 and UCHAR_MAX, inclusive.
|
Chris@16
|
162 static unsigned char hash(char_type ch)
|
Chris@16
|
163 {
|
Chris@16
|
164 return static_cast<unsigned char>(std::char_traits<Char>::to_int_type(ch));
|
Chris@16
|
165 }
|
Chris@16
|
166
|
Chris@16
|
167 /// No-op
|
Chris@16
|
168 ///
|
Chris@16
|
169 /// \param ch The source character.
|
Chris@16
|
170 /// \return ch
|
Chris@16
|
171 static char_type translate(char_type ch)
|
Chris@16
|
172 {
|
Chris@16
|
173 return ch;
|
Chris@16
|
174 }
|
Chris@16
|
175
|
Chris@16
|
176 /// Converts a character to lower-case using the current global C locale.
|
Chris@16
|
177 ///
|
Chris@16
|
178 /// \param ch The source character.
|
Chris@16
|
179 /// \return std::tolower(ch) if Char is char, std::towlower(ch) if Char is wchar_t.
|
Chris@16
|
180 static char_type translate_nocase(char_type ch)
|
Chris@16
|
181 {
|
Chris@16
|
182 return detail::c_tolower(ch);
|
Chris@16
|
183 }
|
Chris@16
|
184
|
Chris@16
|
185 /// Converts a character to lower-case using the current global C locale.
|
Chris@16
|
186 ///
|
Chris@16
|
187 /// \param ch The source character.
|
Chris@16
|
188 /// \return std::tolower(ch) if Char is char, std::towlower(ch) if Char is wchar_t.
|
Chris@16
|
189 static char_type tolower(char_type ch)
|
Chris@16
|
190 {
|
Chris@16
|
191 return detail::c_tolower(ch);
|
Chris@16
|
192 }
|
Chris@16
|
193
|
Chris@16
|
194 /// Converts a character to upper-case using the current global C locale.
|
Chris@16
|
195 ///
|
Chris@16
|
196 /// \param ch The source character.
|
Chris@16
|
197 /// \return std::toupper(ch) if Char is char, std::towupper(ch) if Char is wchar_t.
|
Chris@16
|
198 static char_type toupper(char_type ch)
|
Chris@16
|
199 {
|
Chris@16
|
200 return detail::c_toupper(ch);
|
Chris@16
|
201 }
|
Chris@16
|
202
|
Chris@16
|
203 /// Returns a \c string_type containing all the characters that compare equal
|
Chris@16
|
204 /// disregrarding case to the one passed in. This function can only be called
|
Chris@16
|
205 /// if <tt>has_fold_case\<c_regex_traits\<Char\> \>::value</tt> is \c true.
|
Chris@16
|
206 ///
|
Chris@16
|
207 /// \param ch The source character.
|
Chris@16
|
208 /// \return \c string_type containing all chars which are equal to \c ch when disregarding
|
Chris@16
|
209 /// case
|
Chris@16
|
210 //typedef array<char_type, 2> fold_case_type;
|
Chris@16
|
211 string_type fold_case(char_type ch) const
|
Chris@16
|
212 {
|
Chris@16
|
213 BOOST_MPL_ASSERT((is_same<char_type, char>));
|
Chris@16
|
214 char_type ntcs[] = {
|
Chris@16
|
215 detail::c_tolower(ch)
|
Chris@16
|
216 , detail::c_toupper(ch)
|
Chris@16
|
217 , 0
|
Chris@16
|
218 };
|
Chris@16
|
219 if(ntcs[1] == ntcs[0])
|
Chris@16
|
220 ntcs[1] = 0;
|
Chris@16
|
221 return string_type(ntcs);
|
Chris@16
|
222 }
|
Chris@16
|
223
|
Chris@16
|
224 /// Checks to see if a character is within a character range.
|
Chris@16
|
225 ///
|
Chris@16
|
226 /// \param first The bottom of the range, inclusive.
|
Chris@16
|
227 /// \param last The top of the range, inclusive.
|
Chris@16
|
228 /// \param ch The source character.
|
Chris@16
|
229 /// \return first <= ch && ch <= last.
|
Chris@16
|
230 static bool in_range(char_type first, char_type last, char_type ch)
|
Chris@16
|
231 {
|
Chris@16
|
232 return first <= ch && ch <= last;
|
Chris@16
|
233 }
|
Chris@16
|
234
|
Chris@16
|
235 /// Checks to see if a character is within a character range, irregardless of case.
|
Chris@16
|
236 ///
|
Chris@16
|
237 /// \param first The bottom of the range, inclusive.
|
Chris@16
|
238 /// \param last The top of the range, inclusive.
|
Chris@16
|
239 /// \param ch The source character.
|
Chris@16
|
240 /// \return in_range(first, last, ch) || in_range(first, last, tolower(ch)) || in_range(first,
|
Chris@16
|
241 /// last, toupper(ch))
|
Chris@16
|
242 /// \attention The default implementation doesn't do proper Unicode
|
Chris@16
|
243 /// case folding, but this is the best we can do with the standard
|
Chris@16
|
244 /// C locale functions.
|
Chris@16
|
245 static bool in_range_nocase(char_type first, char_type last, char_type ch)
|
Chris@16
|
246 {
|
Chris@16
|
247 return c_regex_traits::in_range(first, last, ch)
|
Chris@16
|
248 || c_regex_traits::in_range(first, last, detail::c_tolower(ch))
|
Chris@16
|
249 || c_regex_traits::in_range(first, last, detail::c_toupper(ch));
|
Chris@16
|
250 }
|
Chris@16
|
251
|
Chris@16
|
252 /// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
|
Chris@16
|
253 /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
|
Chris@16
|
254 /// then v.transform(G1, G2) < v.transform(H1, H2).
|
Chris@16
|
255 ///
|
Chris@16
|
256 /// \attention Not currently used
|
Chris@16
|
257 template<typename FwdIter>
|
Chris@16
|
258 static string_type transform(FwdIter begin, FwdIter end)
|
Chris@16
|
259 {
|
Chris@16
|
260 BOOST_ASSERT(false); // BUGBUG implement me
|
Chris@16
|
261 }
|
Chris@16
|
262
|
Chris@16
|
263 /// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
|
Chris@16
|
264 /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
|
Chris@16
|
265 /// when character case is not considered then
|
Chris@16
|
266 /// v.transform_primary(G1, G2) < v.transform_primary(H1, H2).
|
Chris@16
|
267 ///
|
Chris@16
|
268 /// \attention Not currently used
|
Chris@16
|
269 template<typename FwdIter>
|
Chris@16
|
270 static string_type transform_primary(FwdIter begin, FwdIter end)
|
Chris@16
|
271 {
|
Chris@16
|
272 BOOST_ASSERT(false); // BUGBUG implement me
|
Chris@16
|
273 }
|
Chris@16
|
274
|
Chris@16
|
275 /// Returns a sequence of characters that represents the collating element
|
Chris@16
|
276 /// consisting of the character sequence designated by the iterator range [F1, F2).
|
Chris@16
|
277 /// Returns an empty string if the character sequence is not a valid collating element.
|
Chris@16
|
278 ///
|
Chris@16
|
279 /// \attention Not currently used
|
Chris@16
|
280 template<typename FwdIter>
|
Chris@16
|
281 static string_type lookup_collatename(FwdIter begin, FwdIter end)
|
Chris@16
|
282 {
|
Chris@16
|
283 BOOST_ASSERT(false); // BUGBUG implement me
|
Chris@16
|
284 }
|
Chris@16
|
285
|
Chris@16
|
286 /// For the character class name represented by the specified character sequence,
|
Chris@16
|
287 /// return the corresponding bitmask representation.
|
Chris@16
|
288 ///
|
Chris@16
|
289 /// \param begin A forward iterator to the start of the character sequence representing
|
Chris@16
|
290 /// the name of the character class.
|
Chris@16
|
291 /// \param end The end of the character sequence.
|
Chris@16
|
292 /// \param icase Specifies whether the returned bitmask should represent the case-insensitive
|
Chris@16
|
293 /// version of the character class.
|
Chris@16
|
294 /// \return A bitmask representing the character class.
|
Chris@16
|
295 template<typename FwdIter>
|
Chris@16
|
296 static char_class_type lookup_classname(FwdIter begin, FwdIter end, bool icase)
|
Chris@16
|
297 {
|
Chris@16
|
298 return detail::char_class_impl<char_type>::lookup_classname(begin, end, icase);
|
Chris@16
|
299 }
|
Chris@16
|
300
|
Chris@16
|
301 /// Tests a character against a character class bitmask.
|
Chris@16
|
302 ///
|
Chris@16
|
303 /// \param ch The character to test.
|
Chris@16
|
304 /// \param mask The character class bitmask against which to test.
|
Chris@16
|
305 /// \pre mask is a bitmask returned by lookup_classname, or is several such masks bit-or'ed
|
Chris@16
|
306 /// together.
|
Chris@16
|
307 /// \return true if the character is a member of any of the specified character classes, false
|
Chris@16
|
308 /// otherwise.
|
Chris@16
|
309 static bool isctype(char_type ch, char_class_type mask)
|
Chris@16
|
310 {
|
Chris@16
|
311 return detail::char_class_impl<char_type>::isctype(ch, mask);
|
Chris@16
|
312 }
|
Chris@16
|
313
|
Chris@16
|
314 /// Convert a digit character into the integer it represents.
|
Chris@16
|
315 ///
|
Chris@16
|
316 /// \param ch The digit character.
|
Chris@16
|
317 /// \param radix The radix to use for the conversion.
|
Chris@16
|
318 /// \pre radix is one of 8, 10, or 16.
|
Chris@16
|
319 /// \return -1 if ch is not a digit character, the integer value of the character otherwise. If
|
Chris@16
|
320 /// char_type is char, std::strtol is used for the conversion. If char_type is wchar_t,
|
Chris@16
|
321 /// std::wcstol is used.
|
Chris@16
|
322 static int value(char_type ch, int radix);
|
Chris@16
|
323
|
Chris@16
|
324 /// No-op
|
Chris@16
|
325 ///
|
Chris@16
|
326 locale_type imbue(locale_type loc)
|
Chris@16
|
327 {
|
Chris@16
|
328 this->base_type::imbue(*this);
|
Chris@16
|
329 return loc;
|
Chris@16
|
330 }
|
Chris@16
|
331
|
Chris@16
|
332 /// No-op
|
Chris@16
|
333 ///
|
Chris@16
|
334 static locale_type getloc()
|
Chris@16
|
335 {
|
Chris@16
|
336 locale_type loc;
|
Chris@16
|
337 return loc;
|
Chris@16
|
338 }
|
Chris@16
|
339 };
|
Chris@16
|
340
|
Chris@16
|
341 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
342 // c_regex_traits<>::widen specializations
|
Chris@16
|
343 /// INTERNAL ONLY
|
Chris@16
|
344 template<>
|
Chris@16
|
345 inline char c_regex_traits<char>::widen(char ch)
|
Chris@16
|
346 {
|
Chris@16
|
347 return ch;
|
Chris@16
|
348 }
|
Chris@16
|
349
|
Chris@16
|
350 #ifndef BOOST_XPRESSIVE_NO_WREGEX
|
Chris@16
|
351 /// INTERNAL ONLY
|
Chris@16
|
352 template<>
|
Chris@16
|
353 inline wchar_t c_regex_traits<wchar_t>::widen(char ch)
|
Chris@16
|
354 {
|
Chris@16
|
355 using namespace std;
|
Chris@16
|
356 return btowc(ch);
|
Chris@16
|
357 }
|
Chris@16
|
358 #endif
|
Chris@16
|
359
|
Chris@16
|
360 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
361 // c_regex_traits<>::hash specializations
|
Chris@16
|
362 /// INTERNAL ONLY
|
Chris@16
|
363 template<>
|
Chris@16
|
364 inline unsigned char c_regex_traits<char>::hash(char ch)
|
Chris@16
|
365 {
|
Chris@16
|
366 return static_cast<unsigned char>(ch);
|
Chris@16
|
367 }
|
Chris@16
|
368
|
Chris@16
|
369 #ifndef BOOST_XPRESSIVE_NO_WREGEX
|
Chris@16
|
370 /// INTERNAL ONLY
|
Chris@16
|
371 template<>
|
Chris@16
|
372 inline unsigned char c_regex_traits<wchar_t>::hash(wchar_t ch)
|
Chris@16
|
373 {
|
Chris@16
|
374 return static_cast<unsigned char>(ch);
|
Chris@16
|
375 }
|
Chris@16
|
376 #endif
|
Chris@16
|
377
|
Chris@16
|
378 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
379 // c_regex_traits<>::value specializations
|
Chris@16
|
380 /// INTERNAL ONLY
|
Chris@16
|
381 template<>
|
Chris@16
|
382 inline int c_regex_traits<char>::value(char ch, int radix)
|
Chris@16
|
383 {
|
Chris@16
|
384 using namespace std;
|
Chris@16
|
385 BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix);
|
Chris@16
|
386 char begin[2] = { ch, '\0' }, *end = 0;
|
Chris@16
|
387 int val = strtol(begin, &end, radix);
|
Chris@16
|
388 return begin == end ? -1 : val;
|
Chris@16
|
389 }
|
Chris@16
|
390
|
Chris@16
|
391 #ifndef BOOST_XPRESSIVE_NO_WREGEX
|
Chris@16
|
392 /// INTERNAL ONLY
|
Chris@16
|
393 template<>
|
Chris@16
|
394 inline int c_regex_traits<wchar_t>::value(wchar_t ch, int radix)
|
Chris@16
|
395 {
|
Chris@16
|
396 using namespace std;
|
Chris@16
|
397 BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix);
|
Chris@16
|
398 wchar_t begin[2] = { ch, L'\0' }, *end = 0;
|
Chris@16
|
399 int val = wcstol(begin, &end, radix);
|
Chris@16
|
400 return begin == end ? -1 : val;
|
Chris@16
|
401 }
|
Chris@16
|
402 #endif
|
Chris@16
|
403
|
Chris@16
|
404 // Narrow C traits has fold_case() member function.
|
Chris@16
|
405 template<>
|
Chris@16
|
406 struct has_fold_case<c_regex_traits<char> >
|
Chris@16
|
407 : mpl::true_
|
Chris@16
|
408 {
|
Chris@16
|
409 };
|
Chris@16
|
410
|
Chris@16
|
411 }}
|
Chris@16
|
412
|
Chris@16
|
413 #endif
|