Chris@16
|
1 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
2 /// \file cpp_regex_traits.hpp
|
Chris@16
|
3 /// Contains the definition of the cpp_regex_traits\<\> template, which is a
|
Chris@16
|
4 /// wrapper for std::locale that can be used to customize the behavior of
|
Chris@16
|
5 /// static and dynamic regexes.
|
Chris@16
|
6 //
|
Chris@16
|
7 // Copyright 2008 Eric Niebler. Distributed under the Boost
|
Chris@16
|
8 // Software License, Version 1.0. (See accompanying file
|
Chris@16
|
9 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
10
|
Chris@16
|
11 #ifndef BOOST_XPRESSIVE_TRAITS_CPP_REGEX_TRAITS_HPP_EAN_10_04_2005
|
Chris@16
|
12 #define BOOST_XPRESSIVE_TRAITS_CPP_REGEX_TRAITS_HPP_EAN_10_04_2005
|
Chris@16
|
13
|
Chris@16
|
14 // MS compatible compilers support #pragma once
|
Chris@101
|
15 #if defined(_MSC_VER)
|
Chris@16
|
16 # pragma once
|
Chris@16
|
17 #endif
|
Chris@16
|
18
|
Chris@16
|
19 #include <ios>
|
Chris@16
|
20 #include <string>
|
Chris@16
|
21 #include <locale>
|
Chris@16
|
22 #include <sstream>
|
Chris@16
|
23 #include <climits>
|
Chris@16
|
24 #include <boost/config.hpp>
|
Chris@16
|
25 #include <boost/assert.hpp>
|
Chris@16
|
26 #include <boost/integer.hpp>
|
Chris@16
|
27 #include <boost/mpl/assert.hpp>
|
Chris@16
|
28 #include <boost/static_assert.hpp>
|
Chris@16
|
29 #include <boost/detail/workaround.hpp>
|
Chris@16
|
30 #include <boost/type_traits/is_same.hpp>
|
Chris@16
|
31 #include <boost/xpressive/detail/detail_fwd.hpp>
|
Chris@16
|
32 #include <boost/xpressive/detail/utility/literals.hpp>
|
Chris@16
|
33
|
Chris@16
|
34 // From John Maddock:
|
Chris@16
|
35 // Fix for gcc prior to 3.4: std::ctype<wchar_t> doesn't allow masks to be combined, for example:
|
Chris@16
|
36 // std::use_facet<std::ctype<wchar_t> >(locale()).is(std::ctype_base::lower|std::ctype_base::upper, L'a');
|
Chris@16
|
37 // incorrectly returns false.
|
Chris@16
|
38 // NOTE: later version of the gcc define __GLIBCXX__, not __GLIBCPP__
|
Chris@16
|
39 #if BOOST_WORKAROUND(__GLIBCPP__, != 0)
|
Chris@16
|
40 # define BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
|
Chris@16
|
41 #endif
|
Chris@16
|
42
|
Chris@16
|
43 namespace boost { namespace xpressive
|
Chris@16
|
44 {
|
Chris@16
|
45
|
Chris@16
|
46 namespace detail
|
Chris@16
|
47 {
|
Chris@16
|
48 // define an unsigned integral typedef of the same size as std::ctype_base::mask
|
Chris@16
|
49 typedef boost::uint_t<sizeof(std::ctype_base::mask) * CHAR_BIT>::least umask_t;
|
Chris@16
|
50 BOOST_MPL_ASSERT_RELATION(sizeof(std::ctype_base::mask), ==, sizeof(umask_t));
|
Chris@16
|
51
|
Chris@16
|
52 // Calculate what the size of the umaskex_t type should be to fix the 3 extra bitmasks
|
Chris@16
|
53 // 11 char categories in ctype_base
|
Chris@16
|
54 // + 3 extra categories for xpressive
|
Chris@16
|
55 // = 14 total bits needed
|
Chris@16
|
56 int const umaskex_bits = (14 > (sizeof(umask_t) * CHAR_BIT)) ? 14 : sizeof(umask_t) * CHAR_BIT;
|
Chris@16
|
57
|
Chris@16
|
58 // define an unsigned integral type with at least umaskex_bits
|
Chris@16
|
59 typedef boost::uint_t<umaskex_bits>::fast umaskex_t;
|
Chris@16
|
60 BOOST_MPL_ASSERT_RELATION(sizeof(umask_t), <=, sizeof(umaskex_t));
|
Chris@16
|
61
|
Chris@16
|
62 // cast a ctype mask to a umaskex_t
|
Chris@16
|
63 template<std::ctype_base::mask Mask>
|
Chris@16
|
64 struct mask_cast
|
Chris@16
|
65 {
|
Chris@16
|
66 BOOST_STATIC_CONSTANT(umaskex_t, value = static_cast<umask_t>(Mask));
|
Chris@16
|
67 };
|
Chris@16
|
68
|
Chris@16
|
69 #ifdef __CYGWIN__
|
Chris@16
|
70 // Work around a gcc warning on cygwin
|
Chris@16
|
71 template<>
|
Chris@16
|
72 struct mask_cast<std::ctype_base::print>
|
Chris@16
|
73 {
|
Chris@16
|
74 BOOST_MPL_ASSERT_RELATION('\227', ==, std::ctype_base::print);
|
Chris@16
|
75 BOOST_STATIC_CONSTANT(umaskex_t, value = 0227);
|
Chris@16
|
76 };
|
Chris@16
|
77 #endif
|
Chris@16
|
78
|
Chris@16
|
79 #ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION
|
Chris@16
|
80 template<std::ctype_base::mask Mask>
|
Chris@16
|
81 umaskex_t const mask_cast<Mask>::value;
|
Chris@16
|
82 #endif
|
Chris@16
|
83
|
Chris@16
|
84 #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
|
Chris@16
|
85 // an unsigned integer with the highest bit set
|
Chris@16
|
86 umaskex_t const highest_bit = static_cast<umaskex_t>(1) << (sizeof(umaskex_t) * CHAR_BIT - 1);
|
Chris@16
|
87
|
Chris@16
|
88 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
89 // unused_mask
|
Chris@16
|
90 // find a bit in an int that isn't set
|
Chris@16
|
91 template<umaskex_t In, umaskex_t Out = highest_bit, bool Done = (0 == (Out & In))>
|
Chris@16
|
92 struct unused_mask
|
Chris@16
|
93 {
|
Chris@16
|
94 BOOST_STATIC_ASSERT(1 != Out);
|
Chris@16
|
95 BOOST_STATIC_CONSTANT(umaskex_t, value = (unused_mask<In, (Out >> 1)>::value));
|
Chris@16
|
96 };
|
Chris@16
|
97
|
Chris@16
|
98 template<umaskex_t In, umaskex_t Out>
|
Chris@16
|
99 struct unused_mask<In, Out, true>
|
Chris@16
|
100 {
|
Chris@16
|
101 BOOST_STATIC_CONSTANT(umaskex_t, value = Out);
|
Chris@16
|
102 };
|
Chris@16
|
103
|
Chris@16
|
104 #ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION
|
Chris@16
|
105 template<umaskex_t In, umaskex_t Out, bool Done>
|
Chris@16
|
106 umaskex_t const unused_mask<In, Out, Done>::value;
|
Chris@16
|
107 #endif
|
Chris@16
|
108
|
Chris@16
|
109 umaskex_t const std_ctype_alnum = mask_cast<std::ctype_base::alnum>::value;
|
Chris@16
|
110 umaskex_t const std_ctype_alpha = mask_cast<std::ctype_base::alpha>::value;
|
Chris@16
|
111 umaskex_t const std_ctype_cntrl = mask_cast<std::ctype_base::cntrl>::value;
|
Chris@16
|
112 umaskex_t const std_ctype_digit = mask_cast<std::ctype_base::digit>::value;
|
Chris@16
|
113 umaskex_t const std_ctype_graph = mask_cast<std::ctype_base::graph>::value;
|
Chris@16
|
114 umaskex_t const std_ctype_lower = mask_cast<std::ctype_base::lower>::value;
|
Chris@16
|
115 umaskex_t const std_ctype_print = mask_cast<std::ctype_base::print>::value;
|
Chris@16
|
116 umaskex_t const std_ctype_punct = mask_cast<std::ctype_base::punct>::value;
|
Chris@16
|
117 umaskex_t const std_ctype_space = mask_cast<std::ctype_base::space>::value;
|
Chris@16
|
118 umaskex_t const std_ctype_upper = mask_cast<std::ctype_base::upper>::value;
|
Chris@16
|
119 umaskex_t const std_ctype_xdigit = mask_cast<std::ctype_base::xdigit>::value;
|
Chris@16
|
120
|
Chris@16
|
121 // Reserve some bits for the implementation
|
Chris@16
|
122 #if defined(__GLIBCXX__)
|
Chris@16
|
123 umaskex_t const std_ctype_reserved = 0x8000;
|
Chris@16
|
124 #elif defined(_CPPLIB_VER) && defined(BOOST_WINDOWS)
|
Chris@16
|
125 umaskex_t const std_ctype_reserved = 0x8200;
|
Chris@16
|
126 #elif defined(_LIBCPP_VERSION)
|
Chris@16
|
127 umaskex_t const std_ctype_reserved = 0x8000;
|
Chris@16
|
128 #else
|
Chris@16
|
129 umaskex_t const std_ctype_reserved = 0;
|
Chris@16
|
130 #endif
|
Chris@16
|
131
|
Chris@16
|
132 // Bitwise-or all the ctype masks together
|
Chris@16
|
133 umaskex_t const all_ctype_masks = std_ctype_reserved
|
Chris@16
|
134 | std_ctype_alnum | std_ctype_alpha | std_ctype_cntrl | std_ctype_digit
|
Chris@16
|
135 | std_ctype_graph | std_ctype_lower | std_ctype_print | std_ctype_punct
|
Chris@16
|
136 | std_ctype_space | std_ctype_upper | std_ctype_xdigit;
|
Chris@16
|
137
|
Chris@16
|
138 // define a new mask for "underscore" ("word" == alnum | underscore)
|
Chris@16
|
139 umaskex_t const non_std_ctype_underscore = unused_mask<all_ctype_masks>::value;
|
Chris@16
|
140
|
Chris@16
|
141 // define a new mask for "blank"
|
Chris@16
|
142 umaskex_t const non_std_ctype_blank = unused_mask<all_ctype_masks | non_std_ctype_underscore>::value;
|
Chris@16
|
143
|
Chris@16
|
144 // define a new mask for "newline"
|
Chris@16
|
145 umaskex_t const non_std_ctype_newline = unused_mask<all_ctype_masks | non_std_ctype_underscore | non_std_ctype_blank>::value;
|
Chris@16
|
146
|
Chris@16
|
147 #else
|
Chris@16
|
148 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
149 // Ugly work-around for buggy ctype facets.
|
Chris@16
|
150 umaskex_t const std_ctype_alnum = 1 << 0;
|
Chris@16
|
151 umaskex_t const std_ctype_alpha = 1 << 1;
|
Chris@16
|
152 umaskex_t const std_ctype_cntrl = 1 << 2;
|
Chris@16
|
153 umaskex_t const std_ctype_digit = 1 << 3;
|
Chris@16
|
154 umaskex_t const std_ctype_graph = 1 << 4;
|
Chris@16
|
155 umaskex_t const std_ctype_lower = 1 << 5;
|
Chris@16
|
156 umaskex_t const std_ctype_print = 1 << 6;
|
Chris@16
|
157 umaskex_t const std_ctype_punct = 1 << 7;
|
Chris@16
|
158 umaskex_t const std_ctype_space = 1 << 8;
|
Chris@16
|
159 umaskex_t const std_ctype_upper = 1 << 9;
|
Chris@16
|
160 umaskex_t const std_ctype_xdigit = 1 << 10;
|
Chris@16
|
161 umaskex_t const non_std_ctype_underscore = 1 << 11;
|
Chris@16
|
162 umaskex_t const non_std_ctype_blank = 1 << 12;
|
Chris@16
|
163 umaskex_t const non_std_ctype_newline = 1 << 13;
|
Chris@16
|
164
|
Chris@16
|
165 static umaskex_t const std_masks[] =
|
Chris@16
|
166 {
|
Chris@16
|
167 mask_cast<std::ctype_base::alnum>::value
|
Chris@16
|
168 , mask_cast<std::ctype_base::alpha>::value
|
Chris@16
|
169 , mask_cast<std::ctype_base::cntrl>::value
|
Chris@16
|
170 , mask_cast<std::ctype_base::digit>::value
|
Chris@16
|
171 , mask_cast<std::ctype_base::graph>::value
|
Chris@16
|
172 , mask_cast<std::ctype_base::lower>::value
|
Chris@16
|
173 , mask_cast<std::ctype_base::print>::value
|
Chris@16
|
174 , mask_cast<std::ctype_base::punct>::value
|
Chris@16
|
175 , mask_cast<std::ctype_base::space>::value
|
Chris@16
|
176 , mask_cast<std::ctype_base::upper>::value
|
Chris@16
|
177 , mask_cast<std::ctype_base::xdigit>::value
|
Chris@16
|
178 };
|
Chris@16
|
179
|
Chris@16
|
180 inline int mylog2(umaskex_t i)
|
Chris@16
|
181 {
|
Chris@16
|
182 return "\0\0\1\0\2\0\0\0\3"[i & 0xf]
|
Chris@16
|
183 + "\0\4\5\0\6\0\0\0\7"[(i & 0xf0) >> 04]
|
Chris@16
|
184 + "\0\10\11\0\12\0\0\0\13"[(i & 0xf00) >> 010];
|
Chris@16
|
185 }
|
Chris@16
|
186 #endif
|
Chris@16
|
187
|
Chris@16
|
188 // convenient constant for the extra masks
|
Chris@16
|
189 umaskex_t const non_std_ctype_masks = non_std_ctype_underscore | non_std_ctype_blank | non_std_ctype_newline;
|
Chris@16
|
190
|
Chris@16
|
191 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
192 // cpp_regex_traits_base
|
Chris@16
|
193 // BUGBUG this should be replaced with a regex facet that lets you query for
|
Chris@16
|
194 // an array of underscore characters and an array of line separator characters.
|
Chris@16
|
195 template<typename Char, std::size_t SizeOfChar = sizeof(Char)>
|
Chris@16
|
196 struct cpp_regex_traits_base
|
Chris@16
|
197 {
|
Chris@16
|
198 protected:
|
Chris@16
|
199 void imbue(std::locale const &)
|
Chris@16
|
200 {
|
Chris@16
|
201 }
|
Chris@16
|
202
|
Chris@16
|
203 static bool is(std::ctype<Char> const &ct, Char ch, umaskex_t mask)
|
Chris@16
|
204 {
|
Chris@16
|
205 #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
|
Chris@16
|
206
|
Chris@16
|
207 if(ct.is((std::ctype_base::mask)(umask_t)mask, ch))
|
Chris@16
|
208 {
|
Chris@16
|
209 return true;
|
Chris@16
|
210 }
|
Chris@16
|
211
|
Chris@16
|
212 // HACKHACK Cygwin and mingw have buggy ctype facets for wchar_t
|
Chris@16
|
213 #if defined(__CYGWIN__) || defined(__MINGW32_VERSION)
|
Chris@16
|
214 if (std::ctype_base::xdigit == ((std::ctype_base::mask)(umask_t)mask & std::ctype_base::xdigit))
|
Chris@16
|
215 {
|
Chris@16
|
216 typename std::char_traits<Char>::int_type i = std::char_traits<Char>::to_int_type(ch);
|
Chris@16
|
217 if(UCHAR_MAX >= i && std::isxdigit(static_cast<int>(i)))
|
Chris@16
|
218 return true;
|
Chris@16
|
219 }
|
Chris@16
|
220 #endif
|
Chris@16
|
221
|
Chris@16
|
222 #else
|
Chris@16
|
223
|
Chris@16
|
224 umaskex_t tmp = mask & ~non_std_ctype_masks;
|
Chris@16
|
225 for(umaskex_t i; 0 != (i = (tmp & (~tmp+1))); tmp &= ~i)
|
Chris@16
|
226 {
|
Chris@16
|
227 std::ctype_base::mask m = (std::ctype_base::mask)(umask_t)std_masks[mylog2(i)];
|
Chris@16
|
228 if(ct.is(m, ch))
|
Chris@16
|
229 {
|
Chris@16
|
230 return true;
|
Chris@16
|
231 }
|
Chris@16
|
232 }
|
Chris@16
|
233
|
Chris@16
|
234 #endif
|
Chris@16
|
235
|
Chris@16
|
236 return ((mask & non_std_ctype_blank) && cpp_regex_traits_base::is_blank(ch))
|
Chris@16
|
237 || ((mask & non_std_ctype_underscore) && cpp_regex_traits_base::is_underscore(ch))
|
Chris@16
|
238 || ((mask & non_std_ctype_newline) && cpp_regex_traits_base::is_newline(ch));
|
Chris@16
|
239 }
|
Chris@16
|
240
|
Chris@16
|
241 private:
|
Chris@16
|
242 static bool is_blank(Char ch)
|
Chris@16
|
243 {
|
Chris@16
|
244 BOOST_MPL_ASSERT_RELATION('\t', ==, L'\t');
|
Chris@16
|
245 BOOST_MPL_ASSERT_RELATION(' ', ==, L' ');
|
Chris@16
|
246 return L' ' == ch || L'\t' == ch;
|
Chris@16
|
247 }
|
Chris@16
|
248
|
Chris@16
|
249 static bool is_underscore(Char ch)
|
Chris@16
|
250 {
|
Chris@16
|
251 BOOST_MPL_ASSERT_RELATION('_', ==, L'_');
|
Chris@16
|
252 return L'_' == ch;
|
Chris@16
|
253 }
|
Chris@16
|
254
|
Chris@16
|
255 static bool is_newline(Char ch)
|
Chris@16
|
256 {
|
Chris@16
|
257 BOOST_MPL_ASSERT_RELATION('\r', ==, L'\r');
|
Chris@16
|
258 BOOST_MPL_ASSERT_RELATION('\n', ==, L'\n');
|
Chris@16
|
259 BOOST_MPL_ASSERT_RELATION('\f', ==, L'\f');
|
Chris@16
|
260 return L'\r' == ch || L'\n' == ch || L'\f' == ch
|
Chris@16
|
261 || (1 < SizeOfChar && (0x2028u == ch || 0x2029u == ch || 0x85u == ch));
|
Chris@16
|
262 }
|
Chris@16
|
263 };
|
Chris@16
|
264
|
Chris@16
|
265 #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
|
Chris@16
|
266
|
Chris@16
|
267 template<typename Char>
|
Chris@16
|
268 struct cpp_regex_traits_base<Char, 1>
|
Chris@16
|
269 {
|
Chris@16
|
270 protected:
|
Chris@16
|
271 void imbue(std::locale const &loc)
|
Chris@16
|
272 {
|
Chris@16
|
273 int i = 0;
|
Chris@16
|
274 Char allchars[UCHAR_MAX + 1];
|
Chris@16
|
275 for(i = 0; i <= static_cast<int>(UCHAR_MAX); ++i)
|
Chris@16
|
276 {
|
Chris@16
|
277 allchars[i] = static_cast<Char>(i);
|
Chris@16
|
278 }
|
Chris@16
|
279
|
Chris@16
|
280 std::ctype<Char> const &ct = BOOST_USE_FACET(std::ctype<Char>, loc);
|
Chris@16
|
281 std::ctype_base::mask tmp[UCHAR_MAX + 1];
|
Chris@16
|
282 ct.is(allchars, allchars + UCHAR_MAX + 1, tmp);
|
Chris@16
|
283 for(i = 0; i <= static_cast<int>(UCHAR_MAX); ++i)
|
Chris@16
|
284 {
|
Chris@16
|
285 this->masks_[i] = static_cast<umask_t>(tmp[i]);
|
Chris@16
|
286 BOOST_ASSERT(0 == (this->masks_[i] & non_std_ctype_masks));
|
Chris@16
|
287 }
|
Chris@16
|
288
|
Chris@16
|
289 this->masks_[static_cast<unsigned char>('_')] |= non_std_ctype_underscore;
|
Chris@16
|
290 this->masks_[static_cast<unsigned char>(' ')] |= non_std_ctype_blank;
|
Chris@16
|
291 this->masks_[static_cast<unsigned char>('\t')] |= non_std_ctype_blank;
|
Chris@16
|
292 this->masks_[static_cast<unsigned char>('\n')] |= non_std_ctype_newline;
|
Chris@16
|
293 this->masks_[static_cast<unsigned char>('\r')] |= non_std_ctype_newline;
|
Chris@16
|
294 this->masks_[static_cast<unsigned char>('\f')] |= non_std_ctype_newline;
|
Chris@16
|
295 }
|
Chris@16
|
296
|
Chris@16
|
297 bool is(std::ctype<Char> const &, Char ch, umaskex_t mask) const
|
Chris@16
|
298 {
|
Chris@16
|
299 return 0 != (this->masks_[static_cast<unsigned char>(ch)] & mask);
|
Chris@16
|
300 }
|
Chris@16
|
301
|
Chris@16
|
302 private:
|
Chris@16
|
303 umaskex_t masks_[UCHAR_MAX + 1];
|
Chris@16
|
304 };
|
Chris@16
|
305
|
Chris@16
|
306 #endif
|
Chris@16
|
307
|
Chris@16
|
308 } // namespace detail
|
Chris@16
|
309
|
Chris@16
|
310
|
Chris@16
|
311 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
312 // cpp_regex_traits
|
Chris@16
|
313 //
|
Chris@16
|
314 /// \brief Encapsaulates a \c std::locale for use by the
|
Chris@16
|
315 /// \c basic_regex\<\> class template.
|
Chris@16
|
316 template<typename Char>
|
Chris@16
|
317 struct cpp_regex_traits
|
Chris@16
|
318 : detail::cpp_regex_traits_base<Char>
|
Chris@16
|
319 {
|
Chris@16
|
320 typedef Char char_type;
|
Chris@16
|
321 typedef std::basic_string<char_type> string_type;
|
Chris@16
|
322 typedef std::locale locale_type;
|
Chris@16
|
323 typedef detail::umaskex_t char_class_type;
|
Chris@16
|
324 typedef regex_traits_version_2_tag version_tag;
|
Chris@16
|
325 typedef detail::cpp_regex_traits_base<Char> base_type;
|
Chris@16
|
326
|
Chris@16
|
327 /// Initialize a cpp_regex_traits object to use the specified std::locale,
|
Chris@16
|
328 /// or the global std::locale if none is specified.
|
Chris@16
|
329 ///
|
Chris@16
|
330 cpp_regex_traits(locale_type const &loc = locale_type())
|
Chris@16
|
331 : base_type()
|
Chris@16
|
332 , loc_()
|
Chris@16
|
333 {
|
Chris@16
|
334 this->imbue(loc);
|
Chris@16
|
335 }
|
Chris@16
|
336
|
Chris@16
|
337 /// Checks two cpp_regex_traits objects for equality
|
Chris@16
|
338 ///
|
Chris@16
|
339 /// \return this->getloc() == that.getloc().
|
Chris@16
|
340 bool operator ==(cpp_regex_traits<char_type> const &that) const
|
Chris@16
|
341 {
|
Chris@16
|
342 return this->loc_ == that.loc_;
|
Chris@16
|
343 }
|
Chris@16
|
344
|
Chris@16
|
345 /// Checks two cpp_regex_traits objects for inequality
|
Chris@16
|
346 ///
|
Chris@16
|
347 /// \return this->getloc() != that.getloc().
|
Chris@16
|
348 bool operator !=(cpp_regex_traits<char_type> const &that) const
|
Chris@16
|
349 {
|
Chris@16
|
350 return this->loc_ != that.loc_;
|
Chris@16
|
351 }
|
Chris@16
|
352
|
Chris@16
|
353 /// Convert a char to a Char
|
Chris@16
|
354 ///
|
Chris@16
|
355 /// \param ch The source character.
|
Chris@16
|
356 /// \return std::use_facet\<std::ctype\<char_type\> \>(this->getloc()).widen(ch).
|
Chris@16
|
357 char_type widen(char ch) const
|
Chris@16
|
358 {
|
Chris@16
|
359 return this->ctype_->widen(ch);
|
Chris@16
|
360 }
|
Chris@16
|
361
|
Chris@16
|
362 /// Returns a hash value for a Char in the range [0, UCHAR_MAX]
|
Chris@16
|
363 ///
|
Chris@16
|
364 /// \param ch The source character.
|
Chris@16
|
365 /// \return a value between 0 and UCHAR_MAX, inclusive.
|
Chris@16
|
366 static unsigned char hash(char_type ch)
|
Chris@16
|
367 {
|
Chris@16
|
368 return static_cast<unsigned char>(std::char_traits<Char>::to_int_type(ch));
|
Chris@16
|
369 }
|
Chris@16
|
370
|
Chris@16
|
371 /// No-op
|
Chris@16
|
372 ///
|
Chris@16
|
373 /// \param ch The source character.
|
Chris@16
|
374 /// \return ch
|
Chris@16
|
375 static char_type translate(char_type ch)
|
Chris@16
|
376 {
|
Chris@16
|
377 return ch;
|
Chris@16
|
378 }
|
Chris@16
|
379
|
Chris@16
|
380 /// Converts a character to lower-case using the internally-stored std::locale.
|
Chris@16
|
381 ///
|
Chris@16
|
382 /// \param ch The source character.
|
Chris@16
|
383 /// \return std::tolower(ch, this->getloc()).
|
Chris@16
|
384 char_type translate_nocase(char_type ch) const
|
Chris@16
|
385 {
|
Chris@16
|
386 return this->ctype_->tolower(ch);
|
Chris@16
|
387 }
|
Chris@16
|
388
|
Chris@16
|
389 /// Converts a character to lower-case using the internally-stored std::locale.
|
Chris@16
|
390 ///
|
Chris@16
|
391 /// \param ch The source character.
|
Chris@16
|
392 /// \return std::tolower(ch, this->getloc()).
|
Chris@16
|
393 char_type tolower(char_type ch) const
|
Chris@16
|
394 {
|
Chris@16
|
395 return this->ctype_->tolower(ch);
|
Chris@16
|
396 }
|
Chris@16
|
397
|
Chris@16
|
398 /// Converts a character to upper-case using the internally-stored std::locale.
|
Chris@16
|
399 ///
|
Chris@16
|
400 /// \param ch The source character.
|
Chris@16
|
401 /// \return std::toupper(ch, this->getloc()).
|
Chris@16
|
402 char_type toupper(char_type ch) const
|
Chris@16
|
403 {
|
Chris@16
|
404 return this->ctype_->toupper(ch);
|
Chris@16
|
405 }
|
Chris@16
|
406
|
Chris@16
|
407 /// Returns a \c string_type containing all the characters that compare equal
|
Chris@16
|
408 /// disregrarding case to the one passed in. This function can only be called
|
Chris@16
|
409 /// if <tt>has_fold_case\<cpp_regex_traits\<Char\> \>::value</tt> is \c true.
|
Chris@16
|
410 ///
|
Chris@16
|
411 /// \param ch The source character.
|
Chris@16
|
412 /// \return \c string_type containing all chars which are equal to \c ch when disregarding
|
Chris@16
|
413 /// case
|
Chris@16
|
414 string_type fold_case(char_type ch) const
|
Chris@16
|
415 {
|
Chris@16
|
416 BOOST_MPL_ASSERT((is_same<char_type, char>));
|
Chris@16
|
417 char_type ntcs[] = {
|
Chris@16
|
418 this->ctype_->tolower(ch)
|
Chris@16
|
419 , this->ctype_->toupper(ch)
|
Chris@16
|
420 , 0
|
Chris@16
|
421 };
|
Chris@16
|
422 if(ntcs[1] == ntcs[0])
|
Chris@16
|
423 ntcs[1] = 0;
|
Chris@16
|
424 return string_type(ntcs);
|
Chris@16
|
425 }
|
Chris@16
|
426
|
Chris@16
|
427 /// Checks to see if a character is within a character range.
|
Chris@16
|
428 ///
|
Chris@16
|
429 /// \param first The bottom of the range, inclusive.
|
Chris@16
|
430 /// \param last The top of the range, inclusive.
|
Chris@16
|
431 /// \param ch The source character.
|
Chris@16
|
432 /// \return first <= ch && ch <= last.
|
Chris@16
|
433 static bool in_range(char_type first, char_type last, char_type ch)
|
Chris@16
|
434 {
|
Chris@16
|
435 return first <= ch && ch <= last;
|
Chris@16
|
436 }
|
Chris@16
|
437
|
Chris@16
|
438 /// Checks to see if a character is within a character range, irregardless of case.
|
Chris@16
|
439 ///
|
Chris@16
|
440 /// \param first The bottom of the range, inclusive.
|
Chris@16
|
441 /// \param last The top of the range, inclusive.
|
Chris@16
|
442 /// \param ch The source character.
|
Chris@16
|
443 /// \return in_range(first, last, ch) || in_range(first, last, tolower(ch, this->getloc())) ||
|
Chris@16
|
444 /// in_range(first, last, toupper(ch, this->getloc()))
|
Chris@16
|
445 /// \attention The default implementation doesn't do proper Unicode
|
Chris@16
|
446 /// case folding, but this is the best we can do with the standard
|
Chris@16
|
447 /// ctype facet.
|
Chris@16
|
448 bool in_range_nocase(char_type first, char_type last, char_type ch) const
|
Chris@16
|
449 {
|
Chris@16
|
450 // NOTE: this default implementation doesn't do proper Unicode
|
Chris@16
|
451 // case folding, but this is the best we can do with the standard
|
Chris@16
|
452 // std::ctype facet.
|
Chris@16
|
453 return this->in_range(first, last, ch)
|
Chris@16
|
454 || this->in_range(first, last, this->ctype_->toupper(ch))
|
Chris@16
|
455 || this->in_range(first, last, this->ctype_->tolower(ch));
|
Chris@16
|
456 }
|
Chris@16
|
457
|
Chris@16
|
458 /// INTERNAL ONLY
|
Chris@16
|
459 //string_type transform(char_type const *begin, char_type const *end) const
|
Chris@16
|
460 //{
|
Chris@16
|
461 // return this->collate_->transform(begin, end);
|
Chris@16
|
462 //}
|
Chris@16
|
463
|
Chris@16
|
464 /// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
|
Chris@16
|
465 /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
|
Chris@16
|
466 /// then v.transform(G1, G2) \< v.transform(H1, H2).
|
Chris@16
|
467 ///
|
Chris@16
|
468 /// \attention Not currently used
|
Chris@16
|
469 template<typename FwdIter>
|
Chris@16
|
470 string_type transform(FwdIter, FwdIter) const
|
Chris@16
|
471 {
|
Chris@16
|
472 //string_type str(begin, end);
|
Chris@16
|
473 //return this->transform(str.data(), str.data() + str.size());
|
Chris@16
|
474
|
Chris@16
|
475 BOOST_ASSERT(false);
|
Chris@16
|
476 return string_type();
|
Chris@16
|
477 }
|
Chris@16
|
478
|
Chris@16
|
479 /// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
|
Chris@16
|
480 /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
|
Chris@16
|
481 /// when character case is not considered then
|
Chris@16
|
482 /// v.transform_primary(G1, G2) \< v.transform_primary(H1, H2).
|
Chris@16
|
483 ///
|
Chris@16
|
484 /// \attention Not currently used
|
Chris@16
|
485 template<typename FwdIter>
|
Chris@16
|
486 string_type transform_primary(FwdIter, FwdIter ) const
|
Chris@16
|
487 {
|
Chris@16
|
488 BOOST_ASSERT(false); // TODO implement me
|
Chris@16
|
489 return string_type();
|
Chris@16
|
490 }
|
Chris@16
|
491
|
Chris@16
|
492 /// Returns a sequence of characters that represents the collating element
|
Chris@16
|
493 /// consisting of the character sequence designated by the iterator range [F1, F2).
|
Chris@16
|
494 /// Returns an empty string if the character sequence is not a valid collating element.
|
Chris@16
|
495 ///
|
Chris@16
|
496 /// \attention Not currently used
|
Chris@16
|
497 template<typename FwdIter>
|
Chris@16
|
498 string_type lookup_collatename(FwdIter, FwdIter) const
|
Chris@16
|
499 {
|
Chris@16
|
500 BOOST_ASSERT(false); // TODO implement me
|
Chris@16
|
501 return string_type();
|
Chris@16
|
502 }
|
Chris@16
|
503
|
Chris@16
|
504 /// For the character class name represented by the specified character sequence,
|
Chris@16
|
505 /// return the corresponding bitmask representation.
|
Chris@16
|
506 ///
|
Chris@16
|
507 /// \param begin A forward iterator to the start of the character sequence representing
|
Chris@16
|
508 /// the name of the character class.
|
Chris@16
|
509 /// \param end The end of the character sequence.
|
Chris@16
|
510 /// \param icase Specifies whether the returned bitmask should represent the case-insensitive
|
Chris@16
|
511 /// version of the character class.
|
Chris@16
|
512 /// \return A bitmask representing the character class.
|
Chris@16
|
513 template<typename FwdIter>
|
Chris@16
|
514 char_class_type lookup_classname(FwdIter begin, FwdIter end, bool icase) const
|
Chris@16
|
515 {
|
Chris@16
|
516 static detail::umaskex_t const icase_masks =
|
Chris@16
|
517 detail::std_ctype_lower | detail::std_ctype_upper;
|
Chris@16
|
518
|
Chris@16
|
519 BOOST_ASSERT(begin != end);
|
Chris@16
|
520 char_class_type char_class = this->lookup_classname_impl_(begin, end);
|
Chris@16
|
521 if(0 == char_class)
|
Chris@16
|
522 {
|
Chris@16
|
523 // convert the string to lowercase
|
Chris@16
|
524 string_type classname(begin, end);
|
Chris@16
|
525 for(typename string_type::size_type i = 0, len = classname.size(); i < len; ++i)
|
Chris@16
|
526 {
|
Chris@16
|
527 classname[i] = this->translate_nocase(classname[i]);
|
Chris@16
|
528 }
|
Chris@16
|
529 char_class = this->lookup_classname_impl_(classname.begin(), classname.end());
|
Chris@16
|
530 }
|
Chris@16
|
531 // erase case-sensitivity if icase==true
|
Chris@16
|
532 if(icase && 0 != (char_class & icase_masks))
|
Chris@16
|
533 {
|
Chris@16
|
534 char_class |= icase_masks;
|
Chris@16
|
535 }
|
Chris@16
|
536 return char_class;
|
Chris@16
|
537 }
|
Chris@16
|
538
|
Chris@16
|
539 /// Tests a character against a character class bitmask.
|
Chris@16
|
540 ///
|
Chris@16
|
541 /// \param ch The character to test.
|
Chris@16
|
542 /// \param mask The character class bitmask against which to test.
|
Chris@16
|
543 /// \pre mask is a bitmask returned by lookup_classname, or is several such masks bit-or'ed
|
Chris@16
|
544 /// together.
|
Chris@16
|
545 /// \return true if the character is a member of any of the specified character classes, false
|
Chris@16
|
546 /// otherwise.
|
Chris@16
|
547 bool isctype(char_type ch, char_class_type mask) const
|
Chris@16
|
548 {
|
Chris@16
|
549 return this->base_type::is(*this->ctype_, ch, mask);
|
Chris@16
|
550 }
|
Chris@16
|
551
|
Chris@16
|
552 /// Convert a digit character into the integer it represents.
|
Chris@16
|
553 ///
|
Chris@16
|
554 /// \param ch The digit character.
|
Chris@16
|
555 /// \param radix The radix to use for the conversion.
|
Chris@16
|
556 /// \pre radix is one of 8, 10, or 16.
|
Chris@16
|
557 /// \return -1 if ch is not a digit character, the integer value of the character otherwise.
|
Chris@16
|
558 /// The conversion is performed by imbueing a std::stringstream with this-\>getloc();
|
Chris@16
|
559 /// setting the radix to one of oct, hex or dec; inserting ch into the stream; and
|
Chris@16
|
560 /// extracting an int.
|
Chris@16
|
561 int value(char_type ch, int radix) const
|
Chris@16
|
562 {
|
Chris@16
|
563 BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix);
|
Chris@16
|
564 int val = -1;
|
Chris@16
|
565 std::basic_stringstream<char_type> str;
|
Chris@16
|
566 str.imbue(this->getloc());
|
Chris@16
|
567 str << (8 == radix ? std::oct : (16 == radix ? std::hex : std::dec));
|
Chris@16
|
568 str.put(ch);
|
Chris@16
|
569 str >> val;
|
Chris@16
|
570 return str.fail() ? -1 : val;
|
Chris@16
|
571 }
|
Chris@16
|
572
|
Chris@16
|
573 /// Imbues *this with loc
|
Chris@16
|
574 ///
|
Chris@16
|
575 /// \param loc A std::locale.
|
Chris@16
|
576 /// \return the previous std::locale used by *this.
|
Chris@16
|
577 locale_type imbue(locale_type loc)
|
Chris@16
|
578 {
|
Chris@16
|
579 locale_type old_loc = this->loc_;
|
Chris@16
|
580 this->loc_ = loc;
|
Chris@16
|
581 this->ctype_ = &BOOST_USE_FACET(std::ctype<char_type>, this->loc_);
|
Chris@16
|
582 //this->collate_ = &BOOST_USE_FACET(std::collate<char_type>, this->loc_);
|
Chris@16
|
583 this->base_type::imbue(this->loc_);
|
Chris@16
|
584 return old_loc;
|
Chris@16
|
585 }
|
Chris@16
|
586
|
Chris@16
|
587 /// Returns the current std::locale used by *this.
|
Chris@16
|
588 ///
|
Chris@16
|
589 locale_type getloc() const
|
Chris@16
|
590 {
|
Chris@16
|
591 return this->loc_;
|
Chris@16
|
592 }
|
Chris@16
|
593
|
Chris@16
|
594 private:
|
Chris@16
|
595
|
Chris@16
|
596 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
597 // char_class_pair
|
Chris@16
|
598 /// INTERNAL ONLY
|
Chris@16
|
599 struct char_class_pair
|
Chris@16
|
600 {
|
Chris@16
|
601 char_type const *class_name_;
|
Chris@16
|
602 char_class_type class_type_;
|
Chris@16
|
603 };
|
Chris@16
|
604
|
Chris@16
|
605 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
606 // char_class
|
Chris@16
|
607 /// INTERNAL ONLY
|
Chris@16
|
608 static char_class_pair const &char_class(std::size_t j)
|
Chris@16
|
609 {
|
Chris@16
|
610 static BOOST_CONSTEXPR_OR_CONST char_class_pair s_char_class_map[] =
|
Chris@16
|
611 {
|
Chris@16
|
612 { BOOST_XPR_CSTR_(char_type, "alnum"), detail::std_ctype_alnum }
|
Chris@16
|
613 , { BOOST_XPR_CSTR_(char_type, "alpha"), detail::std_ctype_alpha }
|
Chris@16
|
614 , { BOOST_XPR_CSTR_(char_type, "blank"), detail::non_std_ctype_blank }
|
Chris@16
|
615 , { BOOST_XPR_CSTR_(char_type, "cntrl"), detail::std_ctype_cntrl }
|
Chris@16
|
616 , { BOOST_XPR_CSTR_(char_type, "d"), detail::std_ctype_digit }
|
Chris@16
|
617 , { BOOST_XPR_CSTR_(char_type, "digit"), detail::std_ctype_digit }
|
Chris@16
|
618 , { BOOST_XPR_CSTR_(char_type, "graph"), detail::std_ctype_graph }
|
Chris@16
|
619 , { BOOST_XPR_CSTR_(char_type, "lower"), detail::std_ctype_lower }
|
Chris@16
|
620 , { BOOST_XPR_CSTR_(char_type, "newline"),detail::non_std_ctype_newline }
|
Chris@16
|
621 , { BOOST_XPR_CSTR_(char_type, "print"), detail::std_ctype_print }
|
Chris@16
|
622 , { BOOST_XPR_CSTR_(char_type, "punct"), detail::std_ctype_punct }
|
Chris@16
|
623 , { BOOST_XPR_CSTR_(char_type, "s"), detail::std_ctype_space }
|
Chris@16
|
624 , { BOOST_XPR_CSTR_(char_type, "space"), detail::std_ctype_space }
|
Chris@16
|
625 , { BOOST_XPR_CSTR_(char_type, "upper"), detail::std_ctype_upper }
|
Chris@16
|
626 , { BOOST_XPR_CSTR_(char_type, "w"), detail::std_ctype_alnum | detail::non_std_ctype_underscore }
|
Chris@16
|
627 , { BOOST_XPR_CSTR_(char_type, "xdigit"), detail::std_ctype_xdigit }
|
Chris@16
|
628 , { 0, 0 }
|
Chris@16
|
629 };
|
Chris@16
|
630 return s_char_class_map[j];
|
Chris@16
|
631 }
|
Chris@16
|
632
|
Chris@16
|
633 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
634 // lookup_classname_impl
|
Chris@16
|
635 /// INTERNAL ONLY
|
Chris@16
|
636 template<typename FwdIter>
|
Chris@16
|
637 static char_class_type lookup_classname_impl_(FwdIter begin, FwdIter end)
|
Chris@16
|
638 {
|
Chris@16
|
639 // find the classname
|
Chris@16
|
640 typedef cpp_regex_traits<Char> this_t;
|
Chris@16
|
641 for(std::size_t j = 0; 0 != this_t::char_class(j).class_name_; ++j)
|
Chris@16
|
642 {
|
Chris@16
|
643 if(this_t::compare_(this_t::char_class(j).class_name_, begin, end))
|
Chris@16
|
644 {
|
Chris@16
|
645 return this_t::char_class(j).class_type_;
|
Chris@16
|
646 }
|
Chris@16
|
647 }
|
Chris@16
|
648 return 0;
|
Chris@16
|
649 }
|
Chris@16
|
650
|
Chris@16
|
651 /// INTERNAL ONLY
|
Chris@16
|
652 template<typename FwdIter>
|
Chris@16
|
653 static bool compare_(char_type const *name, FwdIter begin, FwdIter end)
|
Chris@16
|
654 {
|
Chris@16
|
655 for(; *name && begin != end; ++name, ++begin)
|
Chris@16
|
656 {
|
Chris@16
|
657 if(*name != *begin)
|
Chris@16
|
658 {
|
Chris@16
|
659 return false;
|
Chris@16
|
660 }
|
Chris@16
|
661 }
|
Chris@16
|
662 return !*name && begin == end;
|
Chris@16
|
663 }
|
Chris@16
|
664
|
Chris@16
|
665 locale_type loc_;
|
Chris@16
|
666 std::ctype<char_type> const *ctype_;
|
Chris@16
|
667 //std::collate<char_type> const *collate_;
|
Chris@16
|
668 };
|
Chris@16
|
669
|
Chris@16
|
670 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
671 // cpp_regex_traits<>::hash specializations
|
Chris@16
|
672 template<>
|
Chris@16
|
673 inline unsigned char cpp_regex_traits<unsigned char>::hash(unsigned char ch)
|
Chris@16
|
674 {
|
Chris@16
|
675 return ch;
|
Chris@16
|
676 }
|
Chris@16
|
677
|
Chris@16
|
678 template<>
|
Chris@16
|
679 inline unsigned char cpp_regex_traits<char>::hash(char ch)
|
Chris@16
|
680 {
|
Chris@16
|
681 return static_cast<unsigned char>(ch);
|
Chris@16
|
682 }
|
Chris@16
|
683
|
Chris@16
|
684 template<>
|
Chris@16
|
685 inline unsigned char cpp_regex_traits<signed char>::hash(signed char ch)
|
Chris@16
|
686 {
|
Chris@16
|
687 return static_cast<unsigned char>(ch);
|
Chris@16
|
688 }
|
Chris@16
|
689
|
Chris@16
|
690 #ifndef BOOST_XPRESSIVE_NO_WREGEX
|
Chris@16
|
691 template<>
|
Chris@16
|
692 inline unsigned char cpp_regex_traits<wchar_t>::hash(wchar_t ch)
|
Chris@16
|
693 {
|
Chris@16
|
694 return static_cast<unsigned char>(ch);
|
Chris@16
|
695 }
|
Chris@16
|
696 #endif
|
Chris@16
|
697
|
Chris@16
|
698 // Narrow C++ traits has fold_case() member function.
|
Chris@16
|
699 template<>
|
Chris@16
|
700 struct has_fold_case<cpp_regex_traits<char> >
|
Chris@16
|
701 : mpl::true_
|
Chris@16
|
702 {
|
Chris@16
|
703 };
|
Chris@16
|
704
|
Chris@16
|
705
|
Chris@16
|
706 }}
|
Chris@16
|
707
|
Chris@16
|
708 #endif
|