Chris@16
|
1 /*
|
Chris@16
|
2 *
|
Chris@16
|
3 * Copyright (c) 2004
|
Chris@16
|
4 * John Maddock
|
Chris@16
|
5 *
|
Chris@16
|
6 * Use, modification and distribution are subject to the
|
Chris@16
|
7 * Boost Software License, Version 1.0. (See accompanying file
|
Chris@16
|
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
9 *
|
Chris@16
|
10 */
|
Chris@16
|
11
|
Chris@16
|
12 /*
|
Chris@16
|
13 * LOCATION: see http://www.boost.org for most recent version.
|
Chris@16
|
14 * FILE icu.hpp
|
Chris@16
|
15 * VERSION see <boost/version.hpp>
|
Chris@16
|
16 * DESCRIPTION: Unicode regular expressions on top of the ICU Library.
|
Chris@16
|
17 */
|
Chris@16
|
18
|
Chris@16
|
19 #ifndef BOOST_REGEX_ICU_HPP
|
Chris@16
|
20 #define BOOST_REGEX_ICU_HPP
|
Chris@16
|
21
|
Chris@16
|
22 #include <unicode/utypes.h>
|
Chris@16
|
23 #include <unicode/uchar.h>
|
Chris@16
|
24 #include <unicode/coll.h>
|
Chris@16
|
25 #include <boost/regex.hpp>
|
Chris@16
|
26 #include <boost/regex/pending/unicode_iterator.hpp>
|
Chris@16
|
27 #include <boost/mpl/int_fwd.hpp>
|
Chris@16
|
28 #include <bitset>
|
Chris@16
|
29
|
Chris@16
|
30 #ifdef BOOST_MSVC
|
Chris@16
|
31 #pragma warning (push)
|
Chris@16
|
32 #pragma warning (disable: 4251)
|
Chris@16
|
33 #endif
|
Chris@16
|
34
|
Chris@16
|
35 namespace boost{
|
Chris@16
|
36
|
Chris@16
|
37 namespace re_detail{
|
Chris@16
|
38
|
Chris@16
|
39 //
|
Chris@16
|
40 // Implementation details:
|
Chris@16
|
41 //
|
Chris@16
|
42 class BOOST_REGEX_DECL icu_regex_traits_implementation
|
Chris@16
|
43 {
|
Chris@16
|
44 typedef UChar32 char_type;
|
Chris@16
|
45 typedef std::size_t size_type;
|
Chris@16
|
46 typedef std::vector<char_type> string_type;
|
Chris@16
|
47 typedef U_NAMESPACE_QUALIFIER Locale locale_type;
|
Chris@16
|
48 typedef boost::uint_least32_t char_class_type;
|
Chris@16
|
49 public:
|
Chris@16
|
50 icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& l)
|
Chris@16
|
51 : m_locale(l)
|
Chris@16
|
52 {
|
Chris@16
|
53 UErrorCode success = U_ZERO_ERROR;
|
Chris@16
|
54 m_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success));
|
Chris@16
|
55 if(U_SUCCESS(success) == 0)
|
Chris@16
|
56 init_error();
|
Chris@16
|
57 m_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::IDENTICAL);
|
Chris@16
|
58 success = U_ZERO_ERROR;
|
Chris@16
|
59 m_primary_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success));
|
Chris@16
|
60 if(U_SUCCESS(success) == 0)
|
Chris@16
|
61 init_error();
|
Chris@16
|
62 m_primary_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::PRIMARY);
|
Chris@16
|
63 }
|
Chris@16
|
64 U_NAMESPACE_QUALIFIER Locale getloc()const
|
Chris@16
|
65 {
|
Chris@16
|
66 return m_locale;
|
Chris@16
|
67 }
|
Chris@16
|
68 string_type do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const;
|
Chris@16
|
69 string_type transform(const char_type* p1, const char_type* p2) const
|
Chris@16
|
70 {
|
Chris@16
|
71 return do_transform(p1, p2, m_collator.get());
|
Chris@16
|
72 }
|
Chris@16
|
73 string_type transform_primary(const char_type* p1, const char_type* p2) const
|
Chris@16
|
74 {
|
Chris@16
|
75 return do_transform(p1, p2, m_primary_collator.get());
|
Chris@16
|
76 }
|
Chris@16
|
77 private:
|
Chris@16
|
78 void init_error()
|
Chris@16
|
79 {
|
Chris@16
|
80 std::runtime_error e("Could not initialize ICU resources");
|
Chris@16
|
81 boost::throw_exception(e);
|
Chris@16
|
82 }
|
Chris@16
|
83 U_NAMESPACE_QUALIFIER Locale m_locale; // The ICU locale that we're using
|
Chris@16
|
84 boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_collator; // The full collation object
|
Chris@16
|
85 boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_primary_collator; // The primary collation object
|
Chris@16
|
86 };
|
Chris@16
|
87
|
Chris@16
|
88 inline boost::shared_ptr<icu_regex_traits_implementation> get_icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& loc)
|
Chris@16
|
89 {
|
Chris@16
|
90 return boost::shared_ptr<icu_regex_traits_implementation>(new icu_regex_traits_implementation(loc));
|
Chris@16
|
91 }
|
Chris@16
|
92
|
Chris@16
|
93 }
|
Chris@16
|
94
|
Chris@16
|
95 class BOOST_REGEX_DECL icu_regex_traits
|
Chris@16
|
96 {
|
Chris@16
|
97 public:
|
Chris@16
|
98 typedef UChar32 char_type;
|
Chris@16
|
99 typedef std::size_t size_type;
|
Chris@16
|
100 typedef std::vector<char_type> string_type;
|
Chris@16
|
101 typedef U_NAMESPACE_QUALIFIER Locale locale_type;
|
Chris@16
|
102 #ifdef BOOST_NO_INT64_T
|
Chris@16
|
103 typedef std::bitset<64> char_class_type;
|
Chris@16
|
104 #else
|
Chris@16
|
105 typedef boost::uint64_t char_class_type;
|
Chris@16
|
106 #endif
|
Chris@16
|
107
|
Chris@16
|
108 struct boost_extensions_tag{};
|
Chris@16
|
109
|
Chris@16
|
110 icu_regex_traits()
|
Chris@16
|
111 : m_pimpl(re_detail::get_icu_regex_traits_implementation(U_NAMESPACE_QUALIFIER Locale()))
|
Chris@16
|
112 {
|
Chris@16
|
113 }
|
Chris@16
|
114 static size_type length(const char_type* p);
|
Chris@16
|
115
|
Chris@16
|
116 ::boost::regex_constants::syntax_type syntax_type(char_type c)const
|
Chris@16
|
117 {
|
Chris@16
|
118 return ((c < 0x7f) && (c > 0)) ? re_detail::get_default_syntax_type(static_cast<char>(c)) : regex_constants::syntax_char;
|
Chris@16
|
119 }
|
Chris@16
|
120 ::boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c) const
|
Chris@16
|
121 {
|
Chris@16
|
122 return ((c < 0x7f) && (c > 0)) ? re_detail::get_default_escape_syntax_type(static_cast<char>(c)) : regex_constants::syntax_char;
|
Chris@16
|
123 }
|
Chris@16
|
124 char_type translate(char_type c) const
|
Chris@16
|
125 {
|
Chris@16
|
126 return c;
|
Chris@16
|
127 }
|
Chris@16
|
128 char_type translate_nocase(char_type c) const
|
Chris@16
|
129 {
|
Chris@16
|
130 return ::u_tolower(c);
|
Chris@16
|
131 }
|
Chris@16
|
132 char_type translate(char_type c, bool icase) const
|
Chris@16
|
133 {
|
Chris@16
|
134 return icase ? translate_nocase(c) : translate(c);
|
Chris@16
|
135 }
|
Chris@16
|
136 char_type tolower(char_type c) const
|
Chris@16
|
137 {
|
Chris@16
|
138 return ::u_tolower(c);
|
Chris@16
|
139 }
|
Chris@16
|
140 char_type toupper(char_type c) const
|
Chris@16
|
141 {
|
Chris@16
|
142 return ::u_toupper(c);
|
Chris@16
|
143 }
|
Chris@16
|
144 string_type transform(const char_type* p1, const char_type* p2) const
|
Chris@16
|
145 {
|
Chris@16
|
146 return m_pimpl->transform(p1, p2);
|
Chris@16
|
147 }
|
Chris@16
|
148 string_type transform_primary(const char_type* p1, const char_type* p2) const
|
Chris@16
|
149 {
|
Chris@16
|
150 return m_pimpl->transform_primary(p1, p2);
|
Chris@16
|
151 }
|
Chris@16
|
152 char_class_type lookup_classname(const char_type* p1, const char_type* p2) const;
|
Chris@16
|
153 string_type lookup_collatename(const char_type* p1, const char_type* p2) const;
|
Chris@16
|
154 bool isctype(char_type c, char_class_type f) const;
|
Chris@16
|
155 int toi(const char_type*& p1, const char_type* p2, int radix)const
|
Chris@16
|
156 {
|
Chris@16
|
157 return re_detail::global_toi(p1, p2, radix, *this);
|
Chris@16
|
158 }
|
Chris@16
|
159 int value(char_type c, int radix)const
|
Chris@16
|
160 {
|
Chris@16
|
161 return u_digit(c, static_cast< ::int8_t>(radix));
|
Chris@16
|
162 }
|
Chris@16
|
163 locale_type imbue(locale_type l)
|
Chris@16
|
164 {
|
Chris@16
|
165 locale_type result(m_pimpl->getloc());
|
Chris@16
|
166 m_pimpl = re_detail::get_icu_regex_traits_implementation(l);
|
Chris@16
|
167 return result;
|
Chris@16
|
168 }
|
Chris@16
|
169 locale_type getloc()const
|
Chris@16
|
170 {
|
Chris@16
|
171 return locale_type();
|
Chris@16
|
172 }
|
Chris@16
|
173 std::string error_string(::boost::regex_constants::error_type n) const
|
Chris@16
|
174 {
|
Chris@16
|
175 return re_detail::get_default_error_string(n);
|
Chris@16
|
176 }
|
Chris@16
|
177 private:
|
Chris@16
|
178 icu_regex_traits(const icu_regex_traits&);
|
Chris@16
|
179 icu_regex_traits& operator=(const icu_regex_traits&);
|
Chris@16
|
180
|
Chris@16
|
181 //
|
Chris@16
|
182 // define the bitmasks offsets we need for additional character properties:
|
Chris@16
|
183 //
|
Chris@16
|
184 enum{
|
Chris@16
|
185 offset_blank = U_CHAR_CATEGORY_COUNT,
|
Chris@16
|
186 offset_space = U_CHAR_CATEGORY_COUNT+1,
|
Chris@16
|
187 offset_xdigit = U_CHAR_CATEGORY_COUNT+2,
|
Chris@16
|
188 offset_underscore = U_CHAR_CATEGORY_COUNT+3,
|
Chris@16
|
189 offset_unicode = U_CHAR_CATEGORY_COUNT+4,
|
Chris@16
|
190 offset_any = U_CHAR_CATEGORY_COUNT+5,
|
Chris@16
|
191 offset_ascii = U_CHAR_CATEGORY_COUNT+6,
|
Chris@16
|
192 offset_horizontal = U_CHAR_CATEGORY_COUNT+7,
|
Chris@16
|
193 offset_vertical = U_CHAR_CATEGORY_COUNT+8
|
Chris@16
|
194 };
|
Chris@16
|
195
|
Chris@16
|
196 //
|
Chris@16
|
197 // and now the masks:
|
Chris@16
|
198 //
|
Chris@16
|
199 static const char_class_type mask_blank;
|
Chris@16
|
200 static const char_class_type mask_space;
|
Chris@16
|
201 static const char_class_type mask_xdigit;
|
Chris@16
|
202 static const char_class_type mask_underscore;
|
Chris@16
|
203 static const char_class_type mask_unicode;
|
Chris@16
|
204 static const char_class_type mask_any;
|
Chris@16
|
205 static const char_class_type mask_ascii;
|
Chris@16
|
206 static const char_class_type mask_horizontal;
|
Chris@16
|
207 static const char_class_type mask_vertical;
|
Chris@16
|
208
|
Chris@16
|
209 static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2);
|
Chris@16
|
210
|
Chris@16
|
211 boost::shared_ptr< ::boost::re_detail::icu_regex_traits_implementation> m_pimpl;
|
Chris@16
|
212 };
|
Chris@16
|
213
|
Chris@16
|
214 } // namespace boost
|
Chris@16
|
215
|
Chris@16
|
216 //
|
Chris@16
|
217 // template instances:
|
Chris@16
|
218 //
|
Chris@16
|
219 #define BOOST_REGEX_CHAR_T UChar32
|
Chris@16
|
220 #undef BOOST_REGEX_TRAITS_T
|
Chris@16
|
221 #define BOOST_REGEX_TRAITS_T , icu_regex_traits
|
Chris@16
|
222 #define BOOST_REGEX_ICU_INSTANCES
|
Chris@16
|
223 #ifdef BOOST_REGEX_ICU_INSTANTIATE
|
Chris@16
|
224 # define BOOST_REGEX_INSTANTIATE
|
Chris@16
|
225 #endif
|
Chris@16
|
226 #include <boost/regex/v4/instances.hpp>
|
Chris@16
|
227 #undef BOOST_REGEX_CHAR_T
|
Chris@16
|
228 #undef BOOST_REGEX_TRAITS_T
|
Chris@16
|
229 #undef BOOST_REGEX_ICU_INSTANCES
|
Chris@16
|
230 #ifdef BOOST_REGEX_INSTANTIATE
|
Chris@16
|
231 # undef BOOST_REGEX_INSTANTIATE
|
Chris@16
|
232 #endif
|
Chris@16
|
233
|
Chris@16
|
234 namespace boost{
|
Chris@16
|
235
|
Chris@16
|
236 // types:
|
Chris@16
|
237 typedef basic_regex< ::UChar32, icu_regex_traits> u32regex;
|
Chris@16
|
238 typedef match_results<const ::UChar32*> u32match;
|
Chris@16
|
239 typedef match_results<const ::UChar*> u16match;
|
Chris@16
|
240
|
Chris@16
|
241 //
|
Chris@16
|
242 // Construction of 32-bit regex types from UTF-8 and UTF-16 primitives:
|
Chris@16
|
243 //
|
Chris@16
|
244 namespace re_detail{
|
Chris@16
|
245
|
Chris@16
|
246 #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__)
|
Chris@16
|
247 template <class InputIterator>
|
Chris@16
|
248 inline u32regex do_make_u32regex(InputIterator i,
|
Chris@16
|
249 InputIterator j,
|
Chris@16
|
250 boost::regex_constants::syntax_option_type opt,
|
Chris@16
|
251 const boost::mpl::int_<1>*)
|
Chris@16
|
252 {
|
Chris@16
|
253 typedef boost::u8_to_u32_iterator<InputIterator, UChar32> conv_type;
|
Chris@16
|
254 return u32regex(conv_type(i, i, j), conv_type(j, i, j), opt);
|
Chris@16
|
255 }
|
Chris@16
|
256
|
Chris@16
|
257 template <class InputIterator>
|
Chris@16
|
258 inline u32regex do_make_u32regex(InputIterator i,
|
Chris@16
|
259 InputIterator j,
|
Chris@16
|
260 boost::regex_constants::syntax_option_type opt,
|
Chris@16
|
261 const boost::mpl::int_<2>*)
|
Chris@16
|
262 {
|
Chris@16
|
263 typedef boost::u16_to_u32_iterator<InputIterator, UChar32> conv_type;
|
Chris@16
|
264 return u32regex(conv_type(i, i, j), conv_type(j, i, j), opt);
|
Chris@16
|
265 }
|
Chris@16
|
266
|
Chris@16
|
267 template <class InputIterator>
|
Chris@16
|
268 inline u32regex do_make_u32regex(InputIterator i,
|
Chris@16
|
269 InputIterator j,
|
Chris@16
|
270 boost::regex_constants::syntax_option_type opt,
|
Chris@16
|
271 const boost::mpl::int_<4>*)
|
Chris@16
|
272 {
|
Chris@16
|
273 return u32regex(i, j, opt);
|
Chris@16
|
274 }
|
Chris@16
|
275 #else
|
Chris@16
|
276 template <class InputIterator>
|
Chris@16
|
277 inline u32regex do_make_u32regex(InputIterator i,
|
Chris@16
|
278 InputIterator j,
|
Chris@16
|
279 boost::regex_constants::syntax_option_type opt,
|
Chris@16
|
280 const boost::mpl::int_<1>*)
|
Chris@16
|
281 {
|
Chris@16
|
282 typedef boost::u8_to_u32_iterator<InputIterator, UChar32> conv_type;
|
Chris@16
|
283 typedef std::vector<UChar32> vector_type;
|
Chris@16
|
284 vector_type v;
|
Chris@16
|
285 conv_type a(i, i, j), b(j, i, j);
|
Chris@16
|
286 while(a != b)
|
Chris@16
|
287 {
|
Chris@16
|
288 v.push_back(*a);
|
Chris@16
|
289 ++a;
|
Chris@16
|
290 }
|
Chris@16
|
291 if(v.size())
|
Chris@16
|
292 return u32regex(&*v.begin(), v.size(), opt);
|
Chris@16
|
293 return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
|
Chris@16
|
294 }
|
Chris@16
|
295
|
Chris@16
|
296 template <class InputIterator>
|
Chris@16
|
297 inline u32regex do_make_u32regex(InputIterator i,
|
Chris@16
|
298 InputIterator j,
|
Chris@16
|
299 boost::regex_constants::syntax_option_type opt,
|
Chris@16
|
300 const boost::mpl::int_<2>*)
|
Chris@16
|
301 {
|
Chris@16
|
302 typedef boost::u16_to_u32_iterator<InputIterator, UChar32> conv_type;
|
Chris@16
|
303 typedef std::vector<UChar32> vector_type;
|
Chris@16
|
304 vector_type v;
|
Chris@16
|
305 conv_type a(i, i, j), b(j, i, j);
|
Chris@16
|
306 while(a != b)
|
Chris@16
|
307 {
|
Chris@16
|
308 v.push_back(*a);
|
Chris@16
|
309 ++a;
|
Chris@16
|
310 }
|
Chris@16
|
311 if(v.size())
|
Chris@16
|
312 return u32regex(&*v.begin(), v.size(), opt);
|
Chris@16
|
313 return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
|
Chris@16
|
314 }
|
Chris@16
|
315
|
Chris@16
|
316 template <class InputIterator>
|
Chris@16
|
317 inline u32regex do_make_u32regex(InputIterator i,
|
Chris@16
|
318 InputIterator j,
|
Chris@16
|
319 boost::regex_constants::syntax_option_type opt,
|
Chris@16
|
320 const boost::mpl::int_<4>*)
|
Chris@16
|
321 {
|
Chris@16
|
322 typedef std::vector<UChar32> vector_type;
|
Chris@16
|
323 vector_type v;
|
Chris@16
|
324 while(i != j)
|
Chris@16
|
325 {
|
Chris@16
|
326 v.push_back((UChar32)(*i));
|
Chris@16
|
327 ++i;
|
Chris@16
|
328 }
|
Chris@16
|
329 if(v.size())
|
Chris@16
|
330 return u32regex(&*v.begin(), v.size(), opt);
|
Chris@16
|
331 return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
|
Chris@16
|
332 }
|
Chris@16
|
333 #endif
|
Chris@16
|
334 }
|
Chris@16
|
335
|
Chris@16
|
336 //
|
Chris@16
|
337 // Construction from an iterator pair:
|
Chris@16
|
338 //
|
Chris@16
|
339 template <class InputIterator>
|
Chris@16
|
340 inline u32regex make_u32regex(InputIterator i,
|
Chris@16
|
341 InputIterator j,
|
Chris@16
|
342 boost::regex_constants::syntax_option_type opt)
|
Chris@16
|
343 {
|
Chris@16
|
344 return re_detail::do_make_u32regex(i, j, opt, static_cast<boost::mpl::int_<sizeof(*i)> const*>(0));
|
Chris@16
|
345 }
|
Chris@16
|
346 //
|
Chris@16
|
347 // construction from UTF-8 nul-terminated strings:
|
Chris@16
|
348 //
|
Chris@16
|
349 inline u32regex make_u32regex(const char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
|
Chris@16
|
350 {
|
Chris@16
|
351 return re_detail::do_make_u32regex(p, p + std::strlen(p), opt, static_cast<boost::mpl::int_<1> const*>(0));
|
Chris@16
|
352 }
|
Chris@16
|
353 inline u32regex make_u32regex(const unsigned char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
|
Chris@16
|
354 {
|
Chris@16
|
355 return re_detail::do_make_u32regex(p, p + std::strlen(reinterpret_cast<const char*>(p)), opt, static_cast<boost::mpl::int_<1> const*>(0));
|
Chris@16
|
356 }
|
Chris@16
|
357 //
|
Chris@16
|
358 // construction from UTF-16 nul-terminated strings:
|
Chris@16
|
359 //
|
Chris@16
|
360 #ifndef BOOST_NO_WREGEX
|
Chris@16
|
361 inline u32regex make_u32regex(const wchar_t* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
|
Chris@16
|
362 {
|
Chris@16
|
363 return re_detail::do_make_u32regex(p, p + std::wcslen(p), opt, static_cast<boost::mpl::int_<sizeof(wchar_t)> const*>(0));
|
Chris@16
|
364 }
|
Chris@16
|
365 #endif
|
Chris@16
|
366 #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2)
|
Chris@16
|
367 inline u32regex make_u32regex(const UChar* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
|
Chris@16
|
368 {
|
Chris@16
|
369 return re_detail::do_make_u32regex(p, p + u_strlen(p), opt, static_cast<boost::mpl::int_<2> const*>(0));
|
Chris@16
|
370 }
|
Chris@16
|
371 #endif
|
Chris@16
|
372 //
|
Chris@16
|
373 // construction from basic_string class-template:
|
Chris@16
|
374 //
|
Chris@16
|
375 template<class C, class T, class A>
|
Chris@16
|
376 inline u32regex make_u32regex(const std::basic_string<C, T, A>& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
|
Chris@16
|
377 {
|
Chris@16
|
378 return re_detail::do_make_u32regex(s.begin(), s.end(), opt, static_cast<boost::mpl::int_<sizeof(C)> const*>(0));
|
Chris@16
|
379 }
|
Chris@16
|
380 //
|
Chris@16
|
381 // Construction from ICU string type:
|
Chris@16
|
382 //
|
Chris@16
|
383 inline u32regex make_u32regex(const U_NAMESPACE_QUALIFIER UnicodeString& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
|
Chris@16
|
384 {
|
Chris@16
|
385 return re_detail::do_make_u32regex(s.getBuffer(), s.getBuffer() + s.length(), opt, static_cast<boost::mpl::int_<2> const*>(0));
|
Chris@16
|
386 }
|
Chris@16
|
387
|
Chris@16
|
388 //
|
Chris@16
|
389 // regex_match overloads that widen the character type as appropriate:
|
Chris@16
|
390 //
|
Chris@16
|
391 namespace re_detail{
|
Chris@16
|
392 template<class MR1, class MR2>
|
Chris@16
|
393 void copy_results(MR1& out, MR2 const& in)
|
Chris@16
|
394 {
|
Chris@16
|
395 // copy results from an adapted MR2 match_results:
|
Chris@16
|
396 out.set_size(in.size(), in.prefix().first.base(), in.suffix().second.base());
|
Chris@16
|
397 out.set_base(in.base().base());
|
Chris@16
|
398 for(int i = 0; i < (int)in.size(); ++i)
|
Chris@16
|
399 {
|
Chris@101
|
400 if(in[i].matched || !i)
|
Chris@16
|
401 {
|
Chris@16
|
402 out.set_first(in[i].first.base(), i);
|
Chris@101
|
403 out.set_second(in[i].second.base(), i, in[i].matched);
|
Chris@16
|
404 }
|
Chris@16
|
405 }
|
Chris@16
|
406 }
|
Chris@16
|
407
|
Chris@16
|
408 template <class BidiIterator, class Allocator>
|
Chris@16
|
409 inline bool do_regex_match(BidiIterator first, BidiIterator last,
|
Chris@16
|
410 match_results<BidiIterator, Allocator>& m,
|
Chris@16
|
411 const u32regex& e,
|
Chris@16
|
412 match_flag_type flags,
|
Chris@16
|
413 boost::mpl::int_<4> const*)
|
Chris@16
|
414 {
|
Chris@16
|
415 return ::boost::regex_match(first, last, m, e, flags);
|
Chris@16
|
416 }
|
Chris@16
|
417 template <class BidiIterator, class Allocator>
|
Chris@16
|
418 bool do_regex_match(BidiIterator first, BidiIterator last,
|
Chris@16
|
419 match_results<BidiIterator, Allocator>& m,
|
Chris@16
|
420 const u32regex& e,
|
Chris@16
|
421 match_flag_type flags,
|
Chris@16
|
422 boost::mpl::int_<2> const*)
|
Chris@16
|
423 {
|
Chris@16
|
424 typedef u16_to_u32_iterator<BidiIterator, UChar32> conv_type;
|
Chris@16
|
425 typedef match_results<conv_type> match_type;
|
Chris@16
|
426 //typedef typename match_type::allocator_type alloc_type;
|
Chris@16
|
427 match_type what;
|
Chris@16
|
428 bool result = ::boost::regex_match(conv_type(first, first, last), conv_type(last, first, last), what, e, flags);
|
Chris@16
|
429 // copy results across to m:
|
Chris@16
|
430 if(result) copy_results(m, what);
|
Chris@16
|
431 return result;
|
Chris@16
|
432 }
|
Chris@16
|
433 template <class BidiIterator, class Allocator>
|
Chris@16
|
434 bool do_regex_match(BidiIterator first, BidiIterator last,
|
Chris@16
|
435 match_results<BidiIterator, Allocator>& m,
|
Chris@16
|
436 const u32regex& e,
|
Chris@16
|
437 match_flag_type flags,
|
Chris@16
|
438 boost::mpl::int_<1> const*)
|
Chris@16
|
439 {
|
Chris@16
|
440 typedef u8_to_u32_iterator<BidiIterator, UChar32> conv_type;
|
Chris@16
|
441 typedef match_results<conv_type> match_type;
|
Chris@16
|
442 //typedef typename match_type::allocator_type alloc_type;
|
Chris@16
|
443 match_type what;
|
Chris@16
|
444 bool result = ::boost::regex_match(conv_type(first, first, last), conv_type(last, first, last), what, e, flags);
|
Chris@16
|
445 // copy results across to m:
|
Chris@16
|
446 if(result) copy_results(m, what);
|
Chris@16
|
447 return result;
|
Chris@16
|
448 }
|
Chris@16
|
449 } // namespace re_detail
|
Chris@16
|
450
|
Chris@16
|
451 template <class BidiIterator, class Allocator>
|
Chris@16
|
452 inline bool u32regex_match(BidiIterator first, BidiIterator last,
|
Chris@16
|
453 match_results<BidiIterator, Allocator>& m,
|
Chris@16
|
454 const u32regex& e,
|
Chris@16
|
455 match_flag_type flags = match_default)
|
Chris@16
|
456 {
|
Chris@16
|
457 return re_detail::do_regex_match(first, last, m, e, flags, static_cast<mpl::int_<sizeof(*first)> const*>(0));
|
Chris@16
|
458 }
|
Chris@16
|
459 inline bool u32regex_match(const UChar* p,
|
Chris@16
|
460 match_results<const UChar*>& m,
|
Chris@16
|
461 const u32regex& e,
|
Chris@16
|
462 match_flag_type flags = match_default)
|
Chris@16
|
463 {
|
Chris@16
|
464 return re_detail::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast<mpl::int_<2> const*>(0));
|
Chris@16
|
465 }
|
Chris@16
|
466 #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
|
Chris@16
|
467 inline bool u32regex_match(const wchar_t* p,
|
Chris@16
|
468 match_results<const wchar_t*>& m,
|
Chris@16
|
469 const u32regex& e,
|
Chris@16
|
470 match_flag_type flags = match_default)
|
Chris@16
|
471 {
|
Chris@16
|
472 return re_detail::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
|
Chris@16
|
473 }
|
Chris@16
|
474 #endif
|
Chris@16
|
475 inline bool u32regex_match(const char* p,
|
Chris@16
|
476 match_results<const char*>& m,
|
Chris@16
|
477 const u32regex& e,
|
Chris@16
|
478 match_flag_type flags = match_default)
|
Chris@16
|
479 {
|
Chris@16
|
480 return re_detail::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
|
Chris@16
|
481 }
|
Chris@16
|
482 inline bool u32regex_match(const unsigned char* p,
|
Chris@16
|
483 match_results<const unsigned char*>& m,
|
Chris@16
|
484 const u32regex& e,
|
Chris@16
|
485 match_flag_type flags = match_default)
|
Chris@16
|
486 {
|
Chris@16
|
487 return re_detail::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
|
Chris@16
|
488 }
|
Chris@16
|
489 inline bool u32regex_match(const std::string& s,
|
Chris@16
|
490 match_results<std::string::const_iterator>& m,
|
Chris@16
|
491 const u32regex& e,
|
Chris@16
|
492 match_flag_type flags = match_default)
|
Chris@16
|
493 {
|
Chris@16
|
494 return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<1> const*>(0));
|
Chris@16
|
495 }
|
Chris@16
|
496 #ifndef BOOST_NO_STD_WSTRING
|
Chris@16
|
497 inline bool u32regex_match(const std::wstring& s,
|
Chris@16
|
498 match_results<std::wstring::const_iterator>& m,
|
Chris@16
|
499 const u32regex& e,
|
Chris@16
|
500 match_flag_type flags = match_default)
|
Chris@16
|
501 {
|
Chris@16
|
502 return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
|
Chris@16
|
503 }
|
Chris@16
|
504 #endif
|
Chris@16
|
505 inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s,
|
Chris@16
|
506 match_results<const UChar*>& m,
|
Chris@16
|
507 const u32regex& e,
|
Chris@16
|
508 match_flag_type flags = match_default)
|
Chris@16
|
509 {
|
Chris@16
|
510 return re_detail::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
|
Chris@16
|
511 }
|
Chris@16
|
512 //
|
Chris@16
|
513 // regex_match overloads that do not return what matched:
|
Chris@16
|
514 //
|
Chris@16
|
515 template <class BidiIterator>
|
Chris@16
|
516 inline bool u32regex_match(BidiIterator first, BidiIterator last,
|
Chris@16
|
517 const u32regex& e,
|
Chris@16
|
518 match_flag_type flags = match_default)
|
Chris@16
|
519 {
|
Chris@16
|
520 match_results<BidiIterator> m;
|
Chris@16
|
521 return re_detail::do_regex_match(first, last, m, e, flags, static_cast<mpl::int_<sizeof(*first)> const*>(0));
|
Chris@16
|
522 }
|
Chris@16
|
523 inline bool u32regex_match(const UChar* p,
|
Chris@16
|
524 const u32regex& e,
|
Chris@16
|
525 match_flag_type flags = match_default)
|
Chris@16
|
526 {
|
Chris@16
|
527 match_results<const UChar*> m;
|
Chris@16
|
528 return re_detail::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast<mpl::int_<2> const*>(0));
|
Chris@16
|
529 }
|
Chris@16
|
530 #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
|
Chris@16
|
531 inline bool u32regex_match(const wchar_t* p,
|
Chris@16
|
532 const u32regex& e,
|
Chris@16
|
533 match_flag_type flags = match_default)
|
Chris@16
|
534 {
|
Chris@16
|
535 match_results<const wchar_t*> m;
|
Chris@16
|
536 return re_detail::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
|
Chris@16
|
537 }
|
Chris@16
|
538 #endif
|
Chris@16
|
539 inline bool u32regex_match(const char* p,
|
Chris@16
|
540 const u32regex& e,
|
Chris@16
|
541 match_flag_type flags = match_default)
|
Chris@16
|
542 {
|
Chris@16
|
543 match_results<const char*> m;
|
Chris@16
|
544 return re_detail::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
|
Chris@16
|
545 }
|
Chris@16
|
546 inline bool u32regex_match(const unsigned char* p,
|
Chris@16
|
547 const u32regex& e,
|
Chris@16
|
548 match_flag_type flags = match_default)
|
Chris@16
|
549 {
|
Chris@16
|
550 match_results<const unsigned char*> m;
|
Chris@16
|
551 return re_detail::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
|
Chris@16
|
552 }
|
Chris@16
|
553 inline bool u32regex_match(const std::string& s,
|
Chris@16
|
554 const u32regex& e,
|
Chris@16
|
555 match_flag_type flags = match_default)
|
Chris@16
|
556 {
|
Chris@16
|
557 match_results<std::string::const_iterator> m;
|
Chris@16
|
558 return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<1> const*>(0));
|
Chris@16
|
559 }
|
Chris@16
|
560 #ifndef BOOST_NO_STD_WSTRING
|
Chris@16
|
561 inline bool u32regex_match(const std::wstring& s,
|
Chris@16
|
562 const u32regex& e,
|
Chris@16
|
563 match_flag_type flags = match_default)
|
Chris@16
|
564 {
|
Chris@16
|
565 match_results<std::wstring::const_iterator> m;
|
Chris@16
|
566 return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
|
Chris@16
|
567 }
|
Chris@16
|
568 #endif
|
Chris@16
|
569 inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s,
|
Chris@16
|
570 const u32regex& e,
|
Chris@16
|
571 match_flag_type flags = match_default)
|
Chris@16
|
572 {
|
Chris@16
|
573 match_results<const UChar*> m;
|
Chris@16
|
574 return re_detail::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
|
Chris@16
|
575 }
|
Chris@16
|
576
|
Chris@16
|
577 //
|
Chris@16
|
578 // regex_search overloads that widen the character type as appropriate:
|
Chris@16
|
579 //
|
Chris@16
|
580 namespace re_detail{
|
Chris@16
|
581 template <class BidiIterator, class Allocator>
|
Chris@16
|
582 inline bool do_regex_search(BidiIterator first, BidiIterator last,
|
Chris@16
|
583 match_results<BidiIterator, Allocator>& m,
|
Chris@16
|
584 const u32regex& e,
|
Chris@16
|
585 match_flag_type flags,
|
Chris@16
|
586 BidiIterator base,
|
Chris@16
|
587 boost::mpl::int_<4> const*)
|
Chris@16
|
588 {
|
Chris@16
|
589 return ::boost::regex_search(first, last, m, e, flags, base);
|
Chris@16
|
590 }
|
Chris@16
|
591 template <class BidiIterator, class Allocator>
|
Chris@16
|
592 bool do_regex_search(BidiIterator first, BidiIterator last,
|
Chris@16
|
593 match_results<BidiIterator, Allocator>& m,
|
Chris@16
|
594 const u32regex& e,
|
Chris@16
|
595 match_flag_type flags,
|
Chris@16
|
596 BidiIterator base,
|
Chris@16
|
597 boost::mpl::int_<2> const*)
|
Chris@16
|
598 {
|
Chris@16
|
599 typedef u16_to_u32_iterator<BidiIterator, UChar32> conv_type;
|
Chris@16
|
600 typedef match_results<conv_type> match_type;
|
Chris@16
|
601 //typedef typename match_type::allocator_type alloc_type;
|
Chris@16
|
602 match_type what;
|
Chris@16
|
603 bool result = ::boost::regex_search(conv_type(first, first, last), conv_type(last, first, last), what, e, flags, conv_type(base));
|
Chris@16
|
604 // copy results across to m:
|
Chris@16
|
605 if(result) copy_results(m, what);
|
Chris@16
|
606 return result;
|
Chris@16
|
607 }
|
Chris@16
|
608 template <class BidiIterator, class Allocator>
|
Chris@16
|
609 bool do_regex_search(BidiIterator first, BidiIterator last,
|
Chris@16
|
610 match_results<BidiIterator, Allocator>& m,
|
Chris@16
|
611 const u32regex& e,
|
Chris@16
|
612 match_flag_type flags,
|
Chris@16
|
613 BidiIterator base,
|
Chris@16
|
614 boost::mpl::int_<1> const*)
|
Chris@16
|
615 {
|
Chris@16
|
616 typedef u8_to_u32_iterator<BidiIterator, UChar32> conv_type;
|
Chris@16
|
617 typedef match_results<conv_type> match_type;
|
Chris@16
|
618 //typedef typename match_type::allocator_type alloc_type;
|
Chris@16
|
619 match_type what;
|
Chris@16
|
620 bool result = ::boost::regex_search(conv_type(first, first, last), conv_type(last, first, last), what, e, flags, conv_type(base));
|
Chris@16
|
621 // copy results across to m:
|
Chris@16
|
622 if(result) copy_results(m, what);
|
Chris@16
|
623 return result;
|
Chris@16
|
624 }
|
Chris@16
|
625 }
|
Chris@16
|
626
|
Chris@16
|
627 template <class BidiIterator, class Allocator>
|
Chris@16
|
628 inline bool u32regex_search(BidiIterator first, BidiIterator last,
|
Chris@16
|
629 match_results<BidiIterator, Allocator>& m,
|
Chris@16
|
630 const u32regex& e,
|
Chris@16
|
631 match_flag_type flags = match_default)
|
Chris@16
|
632 {
|
Chris@16
|
633 return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast<mpl::int_<sizeof(*first)> const*>(0));
|
Chris@16
|
634 }
|
Chris@16
|
635 template <class BidiIterator, class Allocator>
|
Chris@16
|
636 inline bool u32regex_search(BidiIterator first, BidiIterator last,
|
Chris@16
|
637 match_results<BidiIterator, Allocator>& m,
|
Chris@16
|
638 const u32regex& e,
|
Chris@16
|
639 match_flag_type flags,
|
Chris@16
|
640 BidiIterator base)
|
Chris@16
|
641 {
|
Chris@16
|
642 return re_detail::do_regex_search(first, last, m, e, flags, base, static_cast<mpl::int_<sizeof(*first)> const*>(0));
|
Chris@16
|
643 }
|
Chris@16
|
644 inline bool u32regex_search(const UChar* p,
|
Chris@16
|
645 match_results<const UChar*>& m,
|
Chris@16
|
646 const u32regex& e,
|
Chris@16
|
647 match_flag_type flags = match_default)
|
Chris@16
|
648 {
|
Chris@16
|
649 return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast<mpl::int_<2> const*>(0));
|
Chris@16
|
650 }
|
Chris@16
|
651 #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
|
Chris@16
|
652 inline bool u32regex_search(const wchar_t* p,
|
Chris@16
|
653 match_results<const wchar_t*>& m,
|
Chris@16
|
654 const u32regex& e,
|
Chris@16
|
655 match_flag_type flags = match_default)
|
Chris@16
|
656 {
|
Chris@16
|
657 return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
|
Chris@16
|
658 }
|
Chris@16
|
659 #endif
|
Chris@16
|
660 inline bool u32regex_search(const char* p,
|
Chris@16
|
661 match_results<const char*>& m,
|
Chris@16
|
662 const u32regex& e,
|
Chris@16
|
663 match_flag_type flags = match_default)
|
Chris@16
|
664 {
|
Chris@16
|
665 return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
|
Chris@16
|
666 }
|
Chris@16
|
667 inline bool u32regex_search(const unsigned char* p,
|
Chris@16
|
668 match_results<const unsigned char*>& m,
|
Chris@16
|
669 const u32regex& e,
|
Chris@16
|
670 match_flag_type flags = match_default)
|
Chris@16
|
671 {
|
Chris@16
|
672 return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
|
Chris@16
|
673 }
|
Chris@16
|
674 inline bool u32regex_search(const std::string& s,
|
Chris@16
|
675 match_results<std::string::const_iterator>& m,
|
Chris@16
|
676 const u32regex& e,
|
Chris@16
|
677 match_flag_type flags = match_default)
|
Chris@16
|
678 {
|
Chris@16
|
679 return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<1> const*>(0));
|
Chris@16
|
680 }
|
Chris@16
|
681 #ifndef BOOST_NO_STD_WSTRING
|
Chris@16
|
682 inline bool u32regex_search(const std::wstring& s,
|
Chris@16
|
683 match_results<std::wstring::const_iterator>& m,
|
Chris@16
|
684 const u32regex& e,
|
Chris@16
|
685 match_flag_type flags = match_default)
|
Chris@16
|
686 {
|
Chris@16
|
687 return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
|
Chris@16
|
688 }
|
Chris@16
|
689 #endif
|
Chris@16
|
690 inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s,
|
Chris@16
|
691 match_results<const UChar*>& m,
|
Chris@16
|
692 const u32regex& e,
|
Chris@16
|
693 match_flag_type flags = match_default)
|
Chris@16
|
694 {
|
Chris@16
|
695 return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
|
Chris@16
|
696 }
|
Chris@16
|
697 template <class BidiIterator>
|
Chris@16
|
698 inline bool u32regex_search(BidiIterator first, BidiIterator last,
|
Chris@16
|
699 const u32regex& e,
|
Chris@16
|
700 match_flag_type flags = match_default)
|
Chris@16
|
701 {
|
Chris@16
|
702 match_results<BidiIterator> m;
|
Chris@16
|
703 return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast<mpl::int_<sizeof(*first)> const*>(0));
|
Chris@16
|
704 }
|
Chris@16
|
705 inline bool u32regex_search(const UChar* p,
|
Chris@16
|
706 const u32regex& e,
|
Chris@16
|
707 match_flag_type flags = match_default)
|
Chris@16
|
708 {
|
Chris@16
|
709 match_results<const UChar*> m;
|
Chris@16
|
710 return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast<mpl::int_<2> const*>(0));
|
Chris@16
|
711 }
|
Chris@16
|
712 #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
|
Chris@16
|
713 inline bool u32regex_search(const wchar_t* p,
|
Chris@16
|
714 const u32regex& e,
|
Chris@16
|
715 match_flag_type flags = match_default)
|
Chris@16
|
716 {
|
Chris@16
|
717 match_results<const wchar_t*> m;
|
Chris@16
|
718 return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
|
Chris@16
|
719 }
|
Chris@16
|
720 #endif
|
Chris@16
|
721 inline bool u32regex_search(const char* p,
|
Chris@16
|
722 const u32regex& e,
|
Chris@16
|
723 match_flag_type flags = match_default)
|
Chris@16
|
724 {
|
Chris@16
|
725 match_results<const char*> m;
|
Chris@16
|
726 return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
|
Chris@16
|
727 }
|
Chris@16
|
728 inline bool u32regex_search(const unsigned char* p,
|
Chris@16
|
729 const u32regex& e,
|
Chris@16
|
730 match_flag_type flags = match_default)
|
Chris@16
|
731 {
|
Chris@16
|
732 match_results<const unsigned char*> m;
|
Chris@16
|
733 return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
|
Chris@16
|
734 }
|
Chris@16
|
735 inline bool u32regex_search(const std::string& s,
|
Chris@16
|
736 const u32regex& e,
|
Chris@16
|
737 match_flag_type flags = match_default)
|
Chris@16
|
738 {
|
Chris@16
|
739 match_results<std::string::const_iterator> m;
|
Chris@16
|
740 return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<1> const*>(0));
|
Chris@16
|
741 }
|
Chris@16
|
742 #ifndef BOOST_NO_STD_WSTRING
|
Chris@16
|
743 inline bool u32regex_search(const std::wstring& s,
|
Chris@16
|
744 const u32regex& e,
|
Chris@16
|
745 match_flag_type flags = match_default)
|
Chris@16
|
746 {
|
Chris@16
|
747 match_results<std::wstring::const_iterator> m;
|
Chris@16
|
748 return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
|
Chris@16
|
749 }
|
Chris@16
|
750 #endif
|
Chris@16
|
751 inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s,
|
Chris@16
|
752 const u32regex& e,
|
Chris@16
|
753 match_flag_type flags = match_default)
|
Chris@16
|
754 {
|
Chris@16
|
755 match_results<const UChar*> m;
|
Chris@16
|
756 return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
|
Chris@16
|
757 }
|
Chris@16
|
758
|
Chris@16
|
759 //
|
Chris@16
|
760 // overloads for regex_replace with utf-8 and utf-16 data types:
|
Chris@16
|
761 //
|
Chris@16
|
762 namespace re_detail{
|
Chris@16
|
763 template <class I>
|
Chris@16
|
764 inline std::pair< boost::u8_to_u32_iterator<I>, boost::u8_to_u32_iterator<I> >
|
Chris@16
|
765 make_utf32_seq(I i, I j, mpl::int_<1> const*)
|
Chris@16
|
766 {
|
Chris@16
|
767 return std::pair< boost::u8_to_u32_iterator<I>, boost::u8_to_u32_iterator<I> >(boost::u8_to_u32_iterator<I>(i, i, j), boost::u8_to_u32_iterator<I>(j, i, j));
|
Chris@16
|
768 }
|
Chris@16
|
769 template <class I>
|
Chris@16
|
770 inline std::pair< boost::u16_to_u32_iterator<I>, boost::u16_to_u32_iterator<I> >
|
Chris@16
|
771 make_utf32_seq(I i, I j, mpl::int_<2> const*)
|
Chris@16
|
772 {
|
Chris@16
|
773 return std::pair< boost::u16_to_u32_iterator<I>, boost::u16_to_u32_iterator<I> >(boost::u16_to_u32_iterator<I>(i, i, j), boost::u16_to_u32_iterator<I>(j, i, j));
|
Chris@16
|
774 }
|
Chris@16
|
775 template <class I>
|
Chris@16
|
776 inline std::pair< I, I >
|
Chris@16
|
777 make_utf32_seq(I i, I j, mpl::int_<4> const*)
|
Chris@16
|
778 {
|
Chris@16
|
779 return std::pair< I, I >(i, j);
|
Chris@16
|
780 }
|
Chris@16
|
781 template <class charT>
|
Chris@16
|
782 inline std::pair< boost::u8_to_u32_iterator<const charT*>, boost::u8_to_u32_iterator<const charT*> >
|
Chris@16
|
783 make_utf32_seq(const charT* p, mpl::int_<1> const*)
|
Chris@16
|
784 {
|
Chris@16
|
785 std::size_t len = std::strlen((const char*)p);
|
Chris@16
|
786 return std::pair< boost::u8_to_u32_iterator<const charT*>, boost::u8_to_u32_iterator<const charT*> >(boost::u8_to_u32_iterator<const charT*>(p, p, p+len), boost::u8_to_u32_iterator<const charT*>(p+len, p, p+len));
|
Chris@16
|
787 }
|
Chris@16
|
788 template <class charT>
|
Chris@16
|
789 inline std::pair< boost::u16_to_u32_iterator<const charT*>, boost::u16_to_u32_iterator<const charT*> >
|
Chris@16
|
790 make_utf32_seq(const charT* p, mpl::int_<2> const*)
|
Chris@16
|
791 {
|
Chris@16
|
792 std::size_t len = u_strlen((const UChar*)p);
|
Chris@16
|
793 return std::pair< boost::u16_to_u32_iterator<const charT*>, boost::u16_to_u32_iterator<const charT*> >(boost::u16_to_u32_iterator<const charT*>(p, p, p + len), boost::u16_to_u32_iterator<const charT*>(p+len, p, p + len));
|
Chris@16
|
794 }
|
Chris@16
|
795 template <class charT>
|
Chris@16
|
796 inline std::pair< const charT*, const charT* >
|
Chris@16
|
797 make_utf32_seq(const charT* p, mpl::int_<4> const*)
|
Chris@16
|
798 {
|
Chris@16
|
799 return std::pair< const charT*, const charT* >(p, p+icu_regex_traits::length((UChar32 const*)p));
|
Chris@16
|
800 }
|
Chris@16
|
801 template <class OutputIterator>
|
Chris@16
|
802 inline OutputIterator make_utf32_out(OutputIterator o, mpl::int_<4> const*)
|
Chris@16
|
803 {
|
Chris@16
|
804 return o;
|
Chris@16
|
805 }
|
Chris@16
|
806 template <class OutputIterator>
|
Chris@16
|
807 inline utf16_output_iterator<OutputIterator> make_utf32_out(OutputIterator o, mpl::int_<2> const*)
|
Chris@16
|
808 {
|
Chris@16
|
809 return o;
|
Chris@16
|
810 }
|
Chris@16
|
811 template <class OutputIterator>
|
Chris@16
|
812 inline utf8_output_iterator<OutputIterator> make_utf32_out(OutputIterator o, mpl::int_<1> const*)
|
Chris@16
|
813 {
|
Chris@16
|
814 return o;
|
Chris@16
|
815 }
|
Chris@16
|
816
|
Chris@16
|
817 template <class OutputIterator, class I1, class I2>
|
Chris@16
|
818 OutputIterator do_regex_replace(OutputIterator out,
|
Chris@16
|
819 std::pair<I1, I1> const& in,
|
Chris@16
|
820 const u32regex& e,
|
Chris@16
|
821 const std::pair<I2, I2>& fmt,
|
Chris@16
|
822 match_flag_type flags
|
Chris@16
|
823 )
|
Chris@16
|
824 {
|
Chris@16
|
825 // unfortunately we have to copy the format string in order to pass in onward:
|
Chris@16
|
826 std::vector<UChar32> f;
|
Chris@16
|
827 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
|
Chris@16
|
828 f.assign(fmt.first, fmt.second);
|
Chris@16
|
829 #else
|
Chris@16
|
830 f.clear();
|
Chris@16
|
831 I2 pos = fmt.first;
|
Chris@16
|
832 while(pos != fmt.second)
|
Chris@16
|
833 f.push_back(*pos++);
|
Chris@16
|
834 #endif
|
Chris@16
|
835
|
Chris@16
|
836 regex_iterator<I1, UChar32, icu_regex_traits> i(in.first, in.second, e, flags);
|
Chris@16
|
837 regex_iterator<I1, UChar32, icu_regex_traits> j;
|
Chris@16
|
838 if(i == j)
|
Chris@16
|
839 {
|
Chris@16
|
840 if(!(flags & regex_constants::format_no_copy))
|
Chris@16
|
841 out = re_detail::copy(in.first, in.second, out);
|
Chris@16
|
842 }
|
Chris@16
|
843 else
|
Chris@16
|
844 {
|
Chris@16
|
845 I1 last_m = in.first;
|
Chris@16
|
846 while(i != j)
|
Chris@16
|
847 {
|
Chris@16
|
848 if(!(flags & regex_constants::format_no_copy))
|
Chris@16
|
849 out = re_detail::copy(i->prefix().first, i->prefix().second, out);
|
Chris@16
|
850 if(f.size())
|
Chris@16
|
851 out = ::boost::re_detail::regex_format_imp(out, *i, &*f.begin(), &*f.begin() + f.size(), flags, e.get_traits());
|
Chris@16
|
852 else
|
Chris@16
|
853 out = ::boost::re_detail::regex_format_imp(out, *i, static_cast<UChar32 const*>(0), static_cast<UChar32 const*>(0), flags, e.get_traits());
|
Chris@16
|
854 last_m = (*i)[0].second;
|
Chris@16
|
855 if(flags & regex_constants::format_first_only)
|
Chris@16
|
856 break;
|
Chris@16
|
857 ++i;
|
Chris@16
|
858 }
|
Chris@16
|
859 if(!(flags & regex_constants::format_no_copy))
|
Chris@16
|
860 out = re_detail::copy(last_m, in.second, out);
|
Chris@16
|
861 }
|
Chris@16
|
862 return out;
|
Chris@16
|
863 }
|
Chris@16
|
864 template <class BaseIterator>
|
Chris@16
|
865 inline const BaseIterator& extract_output_base(const BaseIterator& b)
|
Chris@16
|
866 {
|
Chris@16
|
867 return b;
|
Chris@16
|
868 }
|
Chris@16
|
869 template <class BaseIterator>
|
Chris@16
|
870 inline BaseIterator extract_output_base(const utf8_output_iterator<BaseIterator>& b)
|
Chris@16
|
871 {
|
Chris@16
|
872 return b.base();
|
Chris@16
|
873 }
|
Chris@16
|
874 template <class BaseIterator>
|
Chris@16
|
875 inline BaseIterator extract_output_base(const utf16_output_iterator<BaseIterator>& b)
|
Chris@16
|
876 {
|
Chris@16
|
877 return b.base();
|
Chris@16
|
878 }
|
Chris@16
|
879 } // re_detail
|
Chris@16
|
880
|
Chris@16
|
881 template <class OutputIterator, class BidirectionalIterator, class charT>
|
Chris@16
|
882 inline OutputIterator u32regex_replace(OutputIterator out,
|
Chris@16
|
883 BidirectionalIterator first,
|
Chris@16
|
884 BidirectionalIterator last,
|
Chris@16
|
885 const u32regex& e,
|
Chris@16
|
886 const charT* fmt,
|
Chris@16
|
887 match_flag_type flags = match_default)
|
Chris@16
|
888 {
|
Chris@16
|
889 return re_detail::extract_output_base
|
Chris@16
|
890 (
|
Chris@16
|
891 re_detail::do_regex_replace(
|
Chris@16
|
892 re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
|
Chris@16
|
893 re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
|
Chris@16
|
894 e,
|
Chris@16
|
895 re_detail::make_utf32_seq(fmt, static_cast<mpl::int_<sizeof(*fmt)> const*>(0)),
|
Chris@16
|
896 flags)
|
Chris@16
|
897 );
|
Chris@16
|
898 }
|
Chris@16
|
899
|
Chris@16
|
900 template <class OutputIterator, class Iterator, class charT>
|
Chris@16
|
901 inline OutputIterator u32regex_replace(OutputIterator out,
|
Chris@16
|
902 Iterator first,
|
Chris@16
|
903 Iterator last,
|
Chris@16
|
904 const u32regex& e,
|
Chris@16
|
905 const std::basic_string<charT>& fmt,
|
Chris@16
|
906 match_flag_type flags = match_default)
|
Chris@16
|
907 {
|
Chris@16
|
908 return re_detail::extract_output_base
|
Chris@16
|
909 (
|
Chris@16
|
910 re_detail::do_regex_replace(
|
Chris@16
|
911 re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
|
Chris@16
|
912 re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
|
Chris@16
|
913 e,
|
Chris@16
|
914 re_detail::make_utf32_seq(fmt.begin(), fmt.end(), static_cast<mpl::int_<sizeof(charT)> const*>(0)),
|
Chris@16
|
915 flags)
|
Chris@16
|
916 );
|
Chris@16
|
917 }
|
Chris@16
|
918
|
Chris@16
|
919 template <class OutputIterator, class Iterator>
|
Chris@16
|
920 inline OutputIterator u32regex_replace(OutputIterator out,
|
Chris@16
|
921 Iterator first,
|
Chris@16
|
922 Iterator last,
|
Chris@16
|
923 const u32regex& e,
|
Chris@16
|
924 const U_NAMESPACE_QUALIFIER UnicodeString& fmt,
|
Chris@16
|
925 match_flag_type flags = match_default)
|
Chris@16
|
926 {
|
Chris@16
|
927 return re_detail::extract_output_base
|
Chris@16
|
928 (
|
Chris@16
|
929 re_detail::do_regex_replace(
|
Chris@16
|
930 re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
|
Chris@16
|
931 re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
|
Chris@16
|
932 e,
|
Chris@16
|
933 re_detail::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast<mpl::int_<2> const*>(0)),
|
Chris@16
|
934 flags)
|
Chris@16
|
935 );
|
Chris@16
|
936 }
|
Chris@16
|
937
|
Chris@16
|
938 template <class charT>
|
Chris@16
|
939 std::basic_string<charT> u32regex_replace(const std::basic_string<charT>& s,
|
Chris@16
|
940 const u32regex& e,
|
Chris@16
|
941 const charT* fmt,
|
Chris@16
|
942 match_flag_type flags = match_default)
|
Chris@16
|
943 {
|
Chris@16
|
944 std::basic_string<charT> result;
|
Chris@16
|
945 re_detail::string_out_iterator<std::basic_string<charT> > i(result);
|
Chris@16
|
946 u32regex_replace(i, s.begin(), s.end(), e, fmt, flags);
|
Chris@16
|
947 return result;
|
Chris@16
|
948 }
|
Chris@16
|
949
|
Chris@16
|
950 template <class charT>
|
Chris@16
|
951 std::basic_string<charT> u32regex_replace(const std::basic_string<charT>& s,
|
Chris@16
|
952 const u32regex& e,
|
Chris@16
|
953 const std::basic_string<charT>& fmt,
|
Chris@16
|
954 match_flag_type flags = match_default)
|
Chris@16
|
955 {
|
Chris@16
|
956 std::basic_string<charT> result;
|
Chris@16
|
957 re_detail::string_out_iterator<std::basic_string<charT> > i(result);
|
Chris@16
|
958 u32regex_replace(i, s.begin(), s.end(), e, fmt.c_str(), flags);
|
Chris@16
|
959 return result;
|
Chris@16
|
960 }
|
Chris@16
|
961
|
Chris@16
|
962 namespace re_detail{
|
Chris@16
|
963
|
Chris@16
|
964 class unicode_string_out_iterator
|
Chris@16
|
965 {
|
Chris@16
|
966 U_NAMESPACE_QUALIFIER UnicodeString* out;
|
Chris@16
|
967 public:
|
Chris@16
|
968 unicode_string_out_iterator(U_NAMESPACE_QUALIFIER UnicodeString& s) : out(&s) {}
|
Chris@16
|
969 unicode_string_out_iterator& operator++() { return *this; }
|
Chris@16
|
970 unicode_string_out_iterator& operator++(int) { return *this; }
|
Chris@16
|
971 unicode_string_out_iterator& operator*() { return *this; }
|
Chris@16
|
972 unicode_string_out_iterator& operator=(UChar v)
|
Chris@16
|
973 {
|
Chris@16
|
974 *out += v;
|
Chris@16
|
975 return *this;
|
Chris@16
|
976 }
|
Chris@16
|
977 typedef std::ptrdiff_t difference_type;
|
Chris@16
|
978 typedef UChar value_type;
|
Chris@16
|
979 typedef value_type* pointer;
|
Chris@16
|
980 typedef value_type& reference;
|
Chris@16
|
981 typedef std::output_iterator_tag iterator_category;
|
Chris@16
|
982 };
|
Chris@16
|
983
|
Chris@16
|
984 }
|
Chris@16
|
985
|
Chris@16
|
986 inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s,
|
Chris@16
|
987 const u32regex& e,
|
Chris@16
|
988 const UChar* fmt,
|
Chris@16
|
989 match_flag_type flags = match_default)
|
Chris@16
|
990 {
|
Chris@16
|
991 U_NAMESPACE_QUALIFIER UnicodeString result;
|
Chris@16
|
992 re_detail::unicode_string_out_iterator i(result);
|
Chris@16
|
993 u32regex_replace(i, s.getBuffer(), s.getBuffer()+s.length(), e, fmt, flags);
|
Chris@16
|
994 return result;
|
Chris@16
|
995 }
|
Chris@16
|
996
|
Chris@16
|
997 inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s,
|
Chris@16
|
998 const u32regex& e,
|
Chris@16
|
999 const U_NAMESPACE_QUALIFIER UnicodeString& fmt,
|
Chris@16
|
1000 match_flag_type flags = match_default)
|
Chris@16
|
1001 {
|
Chris@16
|
1002 U_NAMESPACE_QUALIFIER UnicodeString result;
|
Chris@16
|
1003 re_detail::unicode_string_out_iterator i(result);
|
Chris@16
|
1004 re_detail::do_regex_replace(
|
Chris@16
|
1005 re_detail::make_utf32_out(i, static_cast<mpl::int_<2> const*>(0)),
|
Chris@16
|
1006 re_detail::make_utf32_seq(s.getBuffer(), s.getBuffer()+s.length(), static_cast<mpl::int_<2> const*>(0)),
|
Chris@16
|
1007 e,
|
Chris@16
|
1008 re_detail::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast<mpl::int_<2> const*>(0)),
|
Chris@16
|
1009 flags);
|
Chris@16
|
1010 return result;
|
Chris@16
|
1011 }
|
Chris@16
|
1012
|
Chris@16
|
1013 } // namespace boost.
|
Chris@16
|
1014
|
Chris@16
|
1015 #ifdef BOOST_MSVC
|
Chris@16
|
1016 #pragma warning (pop)
|
Chris@16
|
1017 #endif
|
Chris@16
|
1018
|
Chris@16
|
1019 #include <boost/regex/v4/u32regex_iterator.hpp>
|
Chris@16
|
1020 #include <boost/regex/v4/u32regex_token_iterator.hpp>
|
Chris@16
|
1021
|
Chris@16
|
1022 #endif
|