Chris@16
|
1 /*
|
Chris@16
|
2 *
|
Chris@16
|
3 * Copyright (c) 2004
|
Chris@16
|
4 * John Maddock
|
Chris@16
|
5 *
|
Chris@16
|
6 * Use, modification and distribution are subject to the
|
Chris@16
|
7 * Boost Software License, Version 1.0. (See accompanying file
|
Chris@16
|
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
9 *
|
Chris@16
|
10 */
|
Chris@16
|
11
|
Chris@16
|
12 /*
|
Chris@16
|
13 * LOCATION: see http://www.boost.org for most recent version.
|
Chris@16
|
14 * FILE regex_traits_defaults.hpp
|
Chris@16
|
15 * VERSION see <boost/version.hpp>
|
Chris@16
|
16 * DESCRIPTION: Declares API's for access to regex_traits default properties.
|
Chris@16
|
17 */
|
Chris@16
|
18
|
Chris@16
|
19 #ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED
|
Chris@16
|
20 #define BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED
|
Chris@16
|
21
|
Chris@16
|
22 #ifdef BOOST_MSVC
|
Chris@16
|
23 #pragma warning(push)
|
Chris@16
|
24 #pragma warning(disable: 4103)
|
Chris@16
|
25 #endif
|
Chris@16
|
26 #ifdef BOOST_HAS_ABI_HEADERS
|
Chris@16
|
27 # include BOOST_ABI_PREFIX
|
Chris@16
|
28 #endif
|
Chris@16
|
29 #ifdef BOOST_MSVC
|
Chris@16
|
30 #pragma warning(pop)
|
Chris@16
|
31 #endif
|
Chris@16
|
32
|
Chris@16
|
33 #ifndef BOOST_REGEX_SYNTAX_TYPE_HPP
|
Chris@16
|
34 #include <boost/regex/v4/syntax_type.hpp>
|
Chris@16
|
35 #endif
|
Chris@16
|
36 #ifndef BOOST_REGEX_ERROR_TYPE_HPP
|
Chris@16
|
37 #include <boost/regex/v4/error_type.hpp>
|
Chris@16
|
38 #endif
|
Chris@16
|
39
|
Chris@16
|
40 #ifdef BOOST_NO_STDC_NAMESPACE
|
Chris@16
|
41 namespace std{
|
Chris@16
|
42 using ::strlen;
|
Chris@16
|
43 }
|
Chris@16
|
44 #endif
|
Chris@16
|
45
|
Chris@16
|
46 namespace boost{ namespace re_detail{
|
Chris@16
|
47
|
Chris@16
|
48
|
Chris@16
|
49 //
|
Chris@16
|
50 // helpers to suppress warnings:
|
Chris@16
|
51 //
|
Chris@16
|
52 template <class charT>
|
Chris@16
|
53 inline bool is_extended(charT c)
|
Chris@16
|
54 { return c > 256; }
|
Chris@16
|
55 inline bool is_extended(char)
|
Chris@16
|
56 { return false; }
|
Chris@16
|
57
|
Chris@16
|
58
|
Chris@16
|
59 BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n);
|
Chris@16
|
60 BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n);
|
Chris@16
|
61 BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c);
|
Chris@16
|
62 BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c);
|
Chris@16
|
63
|
Chris@16
|
64 // is charT c a combining character?
|
Chris@16
|
65 BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(uint_least16_t s);
|
Chris@16
|
66
|
Chris@16
|
67 template <class charT>
|
Chris@16
|
68 inline bool is_combining(charT c)
|
Chris@16
|
69 {
|
Chris@16
|
70 return (c <= static_cast<charT>(0)) ? false : ((c >= static_cast<charT>((std::numeric_limits<uint_least16_t>::max)())) ? false : is_combining_implementation(static_cast<unsigned short>(c)));
|
Chris@16
|
71 }
|
Chris@16
|
72 template <>
|
Chris@16
|
73 inline bool is_combining<char>(char)
|
Chris@16
|
74 {
|
Chris@16
|
75 return false;
|
Chris@16
|
76 }
|
Chris@16
|
77 template <>
|
Chris@16
|
78 inline bool is_combining<signed char>(signed char)
|
Chris@16
|
79 {
|
Chris@16
|
80 return false;
|
Chris@16
|
81 }
|
Chris@16
|
82 template <>
|
Chris@16
|
83 inline bool is_combining<unsigned char>(unsigned char)
|
Chris@16
|
84 {
|
Chris@16
|
85 return false;
|
Chris@16
|
86 }
|
Chris@16
|
87 #if !defined(__hpux) && !defined(__WINSCW__) // can't use WCHAR_MAX/MIN in pp-directives
|
Chris@16
|
88 #ifdef _MSC_VER
|
Chris@16
|
89 template<>
|
Chris@16
|
90 inline bool is_combining<wchar_t>(wchar_t c)
|
Chris@16
|
91 {
|
Chris@16
|
92 return is_combining_implementation(static_cast<unsigned short>(c));
|
Chris@16
|
93 }
|
Chris@16
|
94 #elif !defined(__DECCXX) && !defined(__osf__) && !defined(__OSF__) && defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T)
|
Chris@16
|
95 #if defined(WCHAR_MAX) && (WCHAR_MAX <= USHRT_MAX)
|
Chris@16
|
96 template<>
|
Chris@16
|
97 inline bool is_combining<wchar_t>(wchar_t c)
|
Chris@16
|
98 {
|
Chris@16
|
99 return is_combining_implementation(static_cast<unsigned short>(c));
|
Chris@16
|
100 }
|
Chris@16
|
101 #else
|
Chris@16
|
102 template<>
|
Chris@16
|
103 inline bool is_combining<wchar_t>(wchar_t c)
|
Chris@16
|
104 {
|
Chris@16
|
105 return (c >= (std::numeric_limits<uint_least16_t>::max)()) ? false : is_combining_implementation(static_cast<unsigned short>(c));
|
Chris@16
|
106 }
|
Chris@16
|
107 #endif
|
Chris@16
|
108 #endif
|
Chris@16
|
109 #endif
|
Chris@16
|
110
|
Chris@16
|
111 //
|
Chris@16
|
112 // is a charT c a line separator?
|
Chris@16
|
113 //
|
Chris@16
|
114 template <class charT>
|
Chris@16
|
115 inline bool is_separator(charT c)
|
Chris@16
|
116 {
|
Chris@16
|
117 return BOOST_REGEX_MAKE_BOOL(
|
Chris@16
|
118 (c == static_cast<charT>('\n'))
|
Chris@16
|
119 || (c == static_cast<charT>('\r'))
|
Chris@16
|
120 || (c == static_cast<charT>('\f'))
|
Chris@16
|
121 || (static_cast<boost::uint16_t>(c) == 0x2028u)
|
Chris@16
|
122 || (static_cast<boost::uint16_t>(c) == 0x2029u)
|
Chris@16
|
123 || (static_cast<boost::uint16_t>(c) == 0x85u));
|
Chris@16
|
124 }
|
Chris@16
|
125 template <>
|
Chris@16
|
126 inline bool is_separator<char>(char c)
|
Chris@16
|
127 {
|
Chris@16
|
128 return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (c == '\f'));
|
Chris@16
|
129 }
|
Chris@16
|
130
|
Chris@16
|
131 //
|
Chris@16
|
132 // get a default collating element:
|
Chris@16
|
133 //
|
Chris@16
|
134 BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name);
|
Chris@16
|
135
|
Chris@16
|
136 //
|
Chris@16
|
137 // get the state_id of a character clasification, the individual
|
Chris@16
|
138 // traits classes then transform that state_id into a bitmask:
|
Chris@16
|
139 //
|
Chris@16
|
140 template <class charT>
|
Chris@16
|
141 struct character_pointer_range
|
Chris@16
|
142 {
|
Chris@16
|
143 const charT* p1;
|
Chris@16
|
144 const charT* p2;
|
Chris@16
|
145
|
Chris@16
|
146 bool operator < (const character_pointer_range& r)const
|
Chris@16
|
147 {
|
Chris@16
|
148 return std::lexicographical_compare(p1, p2, r.p1, r.p2);
|
Chris@16
|
149 }
|
Chris@16
|
150 bool operator == (const character_pointer_range& r)const
|
Chris@16
|
151 {
|
Chris@16
|
152 // Not only do we check that the ranges are of equal size before
|
Chris@16
|
153 // calling std::equal, but there is no other algorithm available:
|
Chris@16
|
154 // not even a non-standard MS one. So forward to unchecked_equal
|
Chris@16
|
155 // in the MS case.
|
Chris@16
|
156 return ((p2 - p1) == (r.p2 - r.p1)) && re_detail::equal(p1, p2, r.p1);
|
Chris@16
|
157 }
|
Chris@16
|
158 };
|
Chris@16
|
159 template <class charT>
|
Chris@16
|
160 int get_default_class_id(const charT* p1, const charT* p2)
|
Chris@16
|
161 {
|
Chris@16
|
162 static const charT data[73] = {
|
Chris@16
|
163 'a', 'l', 'n', 'u', 'm',
|
Chris@16
|
164 'a', 'l', 'p', 'h', 'a',
|
Chris@16
|
165 'b', 'l', 'a', 'n', 'k',
|
Chris@16
|
166 'c', 'n', 't', 'r', 'l',
|
Chris@16
|
167 'd', 'i', 'g', 'i', 't',
|
Chris@16
|
168 'g', 'r', 'a', 'p', 'h',
|
Chris@16
|
169 'l', 'o', 'w', 'e', 'r',
|
Chris@16
|
170 'p', 'r', 'i', 'n', 't',
|
Chris@16
|
171 'p', 'u', 'n', 'c', 't',
|
Chris@16
|
172 's', 'p', 'a', 'c', 'e',
|
Chris@16
|
173 'u', 'n', 'i', 'c', 'o', 'd', 'e',
|
Chris@16
|
174 'u', 'p', 'p', 'e', 'r',
|
Chris@16
|
175 'v',
|
Chris@16
|
176 'w', 'o', 'r', 'd',
|
Chris@16
|
177 'x', 'd', 'i', 'g', 'i', 't',
|
Chris@16
|
178 };
|
Chris@16
|
179
|
Chris@16
|
180 static const character_pointer_range<charT> ranges[21] =
|
Chris@16
|
181 {
|
Chris@16
|
182 {data+0, data+5,}, // alnum
|
Chris@16
|
183 {data+5, data+10,}, // alpha
|
Chris@16
|
184 {data+10, data+15,}, // blank
|
Chris@16
|
185 {data+15, data+20,}, // cntrl
|
Chris@16
|
186 {data+20, data+21,}, // d
|
Chris@16
|
187 {data+20, data+25,}, // digit
|
Chris@16
|
188 {data+25, data+30,}, // graph
|
Chris@16
|
189 {data+29, data+30,}, // h
|
Chris@16
|
190 {data+30, data+31,}, // l
|
Chris@16
|
191 {data+30, data+35,}, // lower
|
Chris@16
|
192 {data+35, data+40,}, // print
|
Chris@16
|
193 {data+40, data+45,}, // punct
|
Chris@16
|
194 {data+45, data+46,}, // s
|
Chris@16
|
195 {data+45, data+50,}, // space
|
Chris@16
|
196 {data+57, data+58,}, // u
|
Chris@16
|
197 {data+50, data+57,}, // unicode
|
Chris@16
|
198 {data+57, data+62,}, // upper
|
Chris@16
|
199 {data+62, data+63,}, // v
|
Chris@16
|
200 {data+63, data+64,}, // w
|
Chris@16
|
201 {data+63, data+67,}, // word
|
Chris@16
|
202 {data+67, data+73,}, // xdigit
|
Chris@16
|
203 };
|
Chris@16
|
204 static const character_pointer_range<charT>* ranges_begin = ranges;
|
Chris@16
|
205 static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0]));
|
Chris@16
|
206
|
Chris@16
|
207 character_pointer_range<charT> t = { p1, p2, };
|
Chris@16
|
208 const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t);
|
Chris@16
|
209 if((p != ranges_end) && (t == *p))
|
Chris@16
|
210 return static_cast<int>(p - ranges);
|
Chris@16
|
211 return -1;
|
Chris@16
|
212 }
|
Chris@16
|
213
|
Chris@16
|
214 //
|
Chris@16
|
215 // helper functions:
|
Chris@16
|
216 //
|
Chris@16
|
217 template <class charT>
|
Chris@16
|
218 std::ptrdiff_t global_length(const charT* p)
|
Chris@16
|
219 {
|
Chris@16
|
220 std::ptrdiff_t n = 0;
|
Chris@16
|
221 while(*p)
|
Chris@16
|
222 {
|
Chris@16
|
223 ++p;
|
Chris@16
|
224 ++n;
|
Chris@16
|
225 }
|
Chris@16
|
226 return n;
|
Chris@16
|
227 }
|
Chris@16
|
228 template<>
|
Chris@16
|
229 inline std::ptrdiff_t global_length<char>(const char* p)
|
Chris@16
|
230 {
|
Chris@16
|
231 return (std::strlen)(p);
|
Chris@16
|
232 }
|
Chris@16
|
233 #ifndef BOOST_NO_WREGEX
|
Chris@16
|
234 template<>
|
Chris@16
|
235 inline std::ptrdiff_t global_length<wchar_t>(const wchar_t* p)
|
Chris@16
|
236 {
|
Chris@16
|
237 return (std::wcslen)(p);
|
Chris@16
|
238 }
|
Chris@16
|
239 #endif
|
Chris@16
|
240 template <class charT>
|
Chris@16
|
241 inline charT BOOST_REGEX_CALL global_lower(charT c)
|
Chris@16
|
242 {
|
Chris@16
|
243 return c;
|
Chris@16
|
244 }
|
Chris@16
|
245 template <class charT>
|
Chris@16
|
246 inline charT BOOST_REGEX_CALL global_upper(charT c)
|
Chris@16
|
247 {
|
Chris@16
|
248 return c;
|
Chris@16
|
249 }
|
Chris@16
|
250
|
Chris@16
|
251 BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c);
|
Chris@16
|
252 BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c);
|
Chris@16
|
253 #ifndef BOOST_NO_WREGEX
|
Chris@16
|
254 BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c);
|
Chris@16
|
255 BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c);
|
Chris@16
|
256 #endif
|
Chris@16
|
257 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
|
Chris@16
|
258 BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c);
|
Chris@16
|
259 BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c);
|
Chris@16
|
260 #endif
|
Chris@16
|
261 //
|
Chris@16
|
262 // This sucks: declare template specialisations of global_lower/global_upper
|
Chris@16
|
263 // that just forward to the non-template implementation functions. We do
|
Chris@16
|
264 // this because there is one compiler (Compaq Tru64 C++) that doesn't seem
|
Chris@16
|
265 // to differentiate between templates and non-template overloads....
|
Chris@16
|
266 // what's more, the primary template, plus all overloads have to be
|
Chris@16
|
267 // defined in the same translation unit (if one is inline they all must be)
|
Chris@16
|
268 // otherwise the "local template instantiation" compiler option can pick
|
Chris@16
|
269 // the wrong instantiation when linking:
|
Chris@16
|
270 //
|
Chris@16
|
271 template<> inline char BOOST_REGEX_CALL global_lower<char>(char c){ return do_global_lower(c); }
|
Chris@16
|
272 template<> inline char BOOST_REGEX_CALL global_upper<char>(char c){ return do_global_upper(c); }
|
Chris@16
|
273 #ifndef BOOST_NO_WREGEX
|
Chris@16
|
274 template<> inline wchar_t BOOST_REGEX_CALL global_lower<wchar_t>(wchar_t c){ return do_global_lower(c); }
|
Chris@16
|
275 template<> inline wchar_t BOOST_REGEX_CALL global_upper<wchar_t>(wchar_t c){ return do_global_upper(c); }
|
Chris@16
|
276 #endif
|
Chris@16
|
277 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
|
Chris@16
|
278 template<> inline unsigned short BOOST_REGEX_CALL global_lower<unsigned short>(unsigned short c){ return do_global_lower(c); }
|
Chris@16
|
279 template<> inline unsigned short BOOST_REGEX_CALL global_upper<unsigned short>(unsigned short c){ return do_global_upper(c); }
|
Chris@16
|
280 #endif
|
Chris@16
|
281
|
Chris@16
|
282 template <class charT>
|
Chris@16
|
283 int global_value(charT c)
|
Chris@16
|
284 {
|
Chris@16
|
285 static const charT zero = '0';
|
Chris@16
|
286 static const charT nine = '9';
|
Chris@16
|
287 static const charT a = 'a';
|
Chris@16
|
288 static const charT f = 'f';
|
Chris@16
|
289 static const charT A = 'A';
|
Chris@16
|
290 static const charT F = 'F';
|
Chris@16
|
291
|
Chris@16
|
292 if(c > f) return -1;
|
Chris@16
|
293 if(c >= a) return 10 + (c - a);
|
Chris@16
|
294 if(c > F) return -1;
|
Chris@16
|
295 if(c >= A) return 10 + (c - A);
|
Chris@16
|
296 if(c > nine) return -1;
|
Chris@16
|
297 if(c >= zero) return c - zero;
|
Chris@16
|
298 return -1;
|
Chris@16
|
299 }
|
Chris@16
|
300 template <class charT, class traits>
|
Chris@16
|
301 int global_toi(const charT*& p1, const charT* p2, int radix, const traits& t)
|
Chris@16
|
302 {
|
Chris@16
|
303 (void)t; // warning suppression
|
Chris@16
|
304 int next_value = t.value(*p1, radix);
|
Chris@16
|
305 if((p1 == p2) || (next_value < 0) || (next_value >= radix))
|
Chris@16
|
306 return -1;
|
Chris@16
|
307 int result = 0;
|
Chris@16
|
308 while(p1 != p2)
|
Chris@16
|
309 {
|
Chris@16
|
310 next_value = t.value(*p1, radix);
|
Chris@16
|
311 if((next_value < 0) || (next_value >= radix))
|
Chris@16
|
312 break;
|
Chris@16
|
313 result *= radix;
|
Chris@16
|
314 result += next_value;
|
Chris@16
|
315 ++p1;
|
Chris@16
|
316 }
|
Chris@16
|
317 return result;
|
Chris@16
|
318 }
|
Chris@16
|
319
|
Chris@16
|
320 template <class charT>
|
Chris@16
|
321 inline const charT* get_escape_R_string()
|
Chris@16
|
322 {
|
Chris@16
|
323 #ifdef BOOST_MSVC
|
Chris@16
|
324 # pragma warning(push)
|
Chris@16
|
325 # pragma warning(disable:4309 4245)
|
Chris@16
|
326 #endif
|
Chris@16
|
327 static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?',
|
Chris@16
|
328 '|', '[', '\x0A', '\x0B', '\x0C', static_cast<unsigned char>('\x85'), '\\', 'x', '{', '2', '0', '2', '8', '}',
|
Chris@16
|
329 '\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' };
|
Chris@16
|
330 static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?',
|
Chris@16
|
331 '|', '[', '\x0A', '\x0B', '\x0C', static_cast<unsigned char>('\x85'), ']', ')', '\0' };
|
Chris@16
|
332
|
Chris@16
|
333 charT c = static_cast<charT>(0x2029u);
|
Chris@16
|
334 bool b = (static_cast<unsigned>(c) == 0x2029u);
|
Chris@16
|
335
|
Chris@16
|
336 return (b ? e1 : e2);
|
Chris@16
|
337 #ifdef BOOST_MSVC
|
Chris@16
|
338 # pragma warning(pop)
|
Chris@16
|
339 #endif
|
Chris@16
|
340 }
|
Chris@16
|
341
|
Chris@16
|
342 template <>
|
Chris@16
|
343 inline const char* get_escape_R_string<char>()
|
Chris@16
|
344 {
|
Chris@16
|
345 #ifdef BOOST_MSVC
|
Chris@16
|
346 # pragma warning(push)
|
Chris@16
|
347 # pragma warning(disable:4309)
|
Chris@16
|
348 #endif
|
Chris@16
|
349 static const char e2[] = { '(', '?', '>', '\x0D', '\x0A', '?',
|
Chris@16
|
350 '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' };
|
Chris@16
|
351 return e2;
|
Chris@16
|
352 #ifdef BOOST_MSVC
|
Chris@16
|
353 # pragma warning(pop)
|
Chris@16
|
354 #endif
|
Chris@16
|
355 }
|
Chris@16
|
356
|
Chris@16
|
357 } // re_detail
|
Chris@16
|
358 } // boost
|
Chris@16
|
359
|
Chris@16
|
360 #ifdef BOOST_MSVC
|
Chris@16
|
361 #pragma warning(push)
|
Chris@16
|
362 #pragma warning(disable: 4103)
|
Chris@16
|
363 #endif
|
Chris@16
|
364 #ifdef BOOST_HAS_ABI_HEADERS
|
Chris@16
|
365 # include BOOST_ABI_SUFFIX
|
Chris@16
|
366 #endif
|
Chris@16
|
367 #ifdef BOOST_MSVC
|
Chris@16
|
368 #pragma warning(pop)
|
Chris@16
|
369 #endif
|
Chris@16
|
370
|
Chris@16
|
371 #endif
|