Chris@16
|
1 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
2 /// \file regex_compiler.hpp
|
Chris@16
|
3 /// Contains the definition of regex_compiler, a factory for building regex objects
|
Chris@16
|
4 /// from strings.
|
Chris@16
|
5 //
|
Chris@16
|
6 // Copyright 2008 Eric Niebler. Distributed under the Boost
|
Chris@16
|
7 // Software License, Version 1.0. (See accompanying file
|
Chris@16
|
8 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
9
|
Chris@16
|
10 #ifndef BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
|
Chris@16
|
11 #define BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
|
Chris@16
|
12
|
Chris@16
|
13 // MS compatible compilers support #pragma once
|
Chris@101
|
14 #if defined(_MSC_VER)
|
Chris@16
|
15 # pragma once
|
Chris@16
|
16 #endif
|
Chris@16
|
17
|
Chris@16
|
18 #include <map>
|
Chris@16
|
19 #include <boost/config.hpp>
|
Chris@16
|
20 #include <boost/assert.hpp>
|
Chris@16
|
21 #include <boost/next_prior.hpp>
|
Chris@16
|
22 #include <boost/range/begin.hpp>
|
Chris@16
|
23 #include <boost/range/end.hpp>
|
Chris@16
|
24 #include <boost/mpl/assert.hpp>
|
Chris@16
|
25 #include <boost/throw_exception.hpp>
|
Chris@16
|
26 #include <boost/type_traits/is_same.hpp>
|
Chris@16
|
27 #include <boost/type_traits/is_pointer.hpp>
|
Chris@16
|
28 #include <boost/utility/enable_if.hpp>
|
Chris@16
|
29 #include <boost/iterator/iterator_traits.hpp>
|
Chris@16
|
30 #include <boost/xpressive/basic_regex.hpp>
|
Chris@16
|
31 #include <boost/xpressive/detail/dynamic/parser.hpp>
|
Chris@16
|
32 #include <boost/xpressive/detail/dynamic/parse_charset.hpp>
|
Chris@16
|
33 #include <boost/xpressive/detail/dynamic/parser_enum.hpp>
|
Chris@16
|
34 #include <boost/xpressive/detail/dynamic/parser_traits.hpp>
|
Chris@16
|
35 #include <boost/xpressive/detail/core/linker.hpp>
|
Chris@16
|
36 #include <boost/xpressive/detail/core/optimize.hpp>
|
Chris@16
|
37
|
Chris@16
|
38 namespace boost { namespace xpressive
|
Chris@16
|
39 {
|
Chris@16
|
40
|
Chris@16
|
41 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
42 // regex_compiler
|
Chris@16
|
43 //
|
Chris@16
|
44 /// \brief Class template regex_compiler is a factory for building basic_regex objects from a string.
|
Chris@16
|
45 ///
|
Chris@16
|
46 /// Class template regex_compiler is used to construct a basic_regex object from a string. The string
|
Chris@16
|
47 /// should contain a valid regular expression. You can imbue a regex_compiler object with a locale,
|
Chris@16
|
48 /// after which all basic_regex objects created with that regex_compiler object will use that locale.
|
Chris@16
|
49 /// After creating a regex_compiler object, and optionally imbueing it with a locale, you can call the
|
Chris@16
|
50 /// compile() method to construct a basic_regex object, passing it the string representing the regular
|
Chris@16
|
51 /// expression. You can call compile() multiple times on the same regex_compiler object. Two basic_regex
|
Chris@16
|
52 /// objects compiled from the same string will have different regex_id's.
|
Chris@16
|
53 template<typename BidiIter, typename RegexTraits, typename CompilerTraits>
|
Chris@16
|
54 struct regex_compiler
|
Chris@16
|
55 {
|
Chris@16
|
56 typedef BidiIter iterator_type;
|
Chris@16
|
57 typedef typename iterator_value<BidiIter>::type char_type;
|
Chris@16
|
58 typedef regex_constants::syntax_option_type flag_type;
|
Chris@16
|
59 typedef RegexTraits traits_type;
|
Chris@16
|
60 typedef typename traits_type::string_type string_type;
|
Chris@16
|
61 typedef typename traits_type::locale_type locale_type;
|
Chris@16
|
62 typedef typename traits_type::char_class_type char_class_type;
|
Chris@16
|
63
|
Chris@16
|
64 explicit regex_compiler(RegexTraits const &traits = RegexTraits())
|
Chris@16
|
65 : mark_count_(0)
|
Chris@16
|
66 , hidden_mark_count_(0)
|
Chris@16
|
67 , traits_(traits)
|
Chris@16
|
68 , upper_(0)
|
Chris@16
|
69 , self_()
|
Chris@16
|
70 , rules_()
|
Chris@16
|
71 {
|
Chris@16
|
72 this->upper_ = lookup_classname(this->rxtraits(), "upper");
|
Chris@16
|
73 }
|
Chris@16
|
74
|
Chris@16
|
75 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
76 // imbue
|
Chris@16
|
77 /// Specify the locale to be used by a regex_compiler.
|
Chris@16
|
78 ///
|
Chris@16
|
79 /// \param loc The locale that this regex_compiler should use.
|
Chris@16
|
80 /// \return The previous locale.
|
Chris@16
|
81 locale_type imbue(locale_type loc)
|
Chris@16
|
82 {
|
Chris@16
|
83 locale_type oldloc = this->traits_.imbue(loc);
|
Chris@16
|
84 this->upper_ = lookup_classname(this->rxtraits(), "upper");
|
Chris@16
|
85 return oldloc;
|
Chris@16
|
86 }
|
Chris@16
|
87
|
Chris@16
|
88 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
89 // getloc
|
Chris@16
|
90 /// Get the locale used by a regex_compiler.
|
Chris@16
|
91 ///
|
Chris@16
|
92 /// \return The locale used by this regex_compiler.
|
Chris@16
|
93 locale_type getloc() const
|
Chris@16
|
94 {
|
Chris@16
|
95 return this->traits_.getloc();
|
Chris@16
|
96 }
|
Chris@16
|
97
|
Chris@16
|
98 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
99 // compile
|
Chris@16
|
100 /// Builds a basic_regex object from a range of characters.
|
Chris@16
|
101 ///
|
Chris@16
|
102 /// \param begin The beginning of a range of characters representing the
|
Chris@16
|
103 /// regular expression to compile.
|
Chris@16
|
104 /// \param end The end of a range of characters representing the
|
Chris@16
|
105 /// regular expression to compile.
|
Chris@16
|
106 /// \param flags Optional bitmask that determines how the pat string is
|
Chris@16
|
107 /// interpreted. (See syntax_option_type.)
|
Chris@16
|
108 /// \return A basic_regex object corresponding to the regular expression
|
Chris@16
|
109 /// represented by the character range.
|
Chris@16
|
110 /// \pre InputIter is a model of the InputIterator concept.
|
Chris@16
|
111 /// \pre [begin,end) is a valid range.
|
Chris@16
|
112 /// \pre The range of characters specified by [begin,end) contains a
|
Chris@16
|
113 /// valid string-based representation of a regular expression.
|
Chris@16
|
114 /// \throw regex_error when the range of characters has invalid regular
|
Chris@16
|
115 /// expression syntax.
|
Chris@16
|
116 template<typename InputIter>
|
Chris@16
|
117 basic_regex<BidiIter>
|
Chris@16
|
118 compile(InputIter begin, InputIter end, flag_type flags = regex_constants::ECMAScript)
|
Chris@16
|
119 {
|
Chris@16
|
120 typedef typename iterator_category<InputIter>::type category;
|
Chris@16
|
121 return this->compile_(begin, end, flags, category());
|
Chris@16
|
122 }
|
Chris@16
|
123
|
Chris@16
|
124 /// \overload
|
Chris@16
|
125 ///
|
Chris@16
|
126 template<typename InputRange>
|
Chris@16
|
127 typename disable_if<is_pointer<InputRange>, basic_regex<BidiIter> >::type
|
Chris@16
|
128 compile(InputRange const &pat, flag_type flags = regex_constants::ECMAScript)
|
Chris@16
|
129 {
|
Chris@16
|
130 return this->compile(boost::begin(pat), boost::end(pat), flags);
|
Chris@16
|
131 }
|
Chris@16
|
132
|
Chris@16
|
133 /// \overload
|
Chris@16
|
134 ///
|
Chris@16
|
135 basic_regex<BidiIter>
|
Chris@16
|
136 compile(char_type const *begin, flag_type flags = regex_constants::ECMAScript)
|
Chris@16
|
137 {
|
Chris@16
|
138 BOOST_ASSERT(0 != begin);
|
Chris@16
|
139 char_type const *end = begin + std::char_traits<char_type>::length(begin);
|
Chris@16
|
140 return this->compile(begin, end, flags);
|
Chris@16
|
141 }
|
Chris@16
|
142
|
Chris@16
|
143 /// \overload
|
Chris@16
|
144 ///
|
Chris@16
|
145 basic_regex<BidiIter> compile(char_type const *begin, std::size_t size, flag_type flags)
|
Chris@16
|
146 {
|
Chris@16
|
147 BOOST_ASSERT(0 != begin);
|
Chris@16
|
148 char_type const *end = begin + size;
|
Chris@16
|
149 return this->compile(begin, end, flags);
|
Chris@16
|
150 }
|
Chris@16
|
151
|
Chris@16
|
152 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
153 // operator[]
|
Chris@16
|
154 /// Return a reference to the named regular expression. If no such named
|
Chris@16
|
155 /// regular expression exists, create a new regular expression and return
|
Chris@16
|
156 /// a reference to it.
|
Chris@16
|
157 ///
|
Chris@16
|
158 /// \param name A std::string containing the name of the regular expression.
|
Chris@16
|
159 /// \pre The string is not empty.
|
Chris@16
|
160 /// \throw bad_alloc on allocation failure.
|
Chris@16
|
161 basic_regex<BidiIter> &operator [](string_type const &name)
|
Chris@16
|
162 {
|
Chris@16
|
163 BOOST_ASSERT(!name.empty());
|
Chris@16
|
164 return this->rules_[name];
|
Chris@16
|
165 }
|
Chris@16
|
166
|
Chris@16
|
167 /// \overload
|
Chris@16
|
168 ///
|
Chris@16
|
169 basic_regex<BidiIter> const &operator [](string_type const &name) const
|
Chris@16
|
170 {
|
Chris@16
|
171 BOOST_ASSERT(!name.empty());
|
Chris@16
|
172 return this->rules_[name];
|
Chris@16
|
173 }
|
Chris@16
|
174
|
Chris@16
|
175 private:
|
Chris@16
|
176
|
Chris@16
|
177 typedef detail::escape_value<char_type, char_class_type> escape_value;
|
Chris@16
|
178 typedef detail::alternate_matcher<detail::alternates_vector<BidiIter>, RegexTraits> alternate_matcher;
|
Chris@16
|
179
|
Chris@16
|
180 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
181 // compile_
|
Chris@16
|
182 /// INTERNAL ONLY
|
Chris@16
|
183 template<typename FwdIter>
|
Chris@16
|
184 basic_regex<BidiIter> compile_(FwdIter begin, FwdIter end, flag_type flags, std::forward_iterator_tag)
|
Chris@16
|
185 {
|
Chris@16
|
186 BOOST_MPL_ASSERT((is_same<char_type, typename iterator_value<FwdIter>::type>));
|
Chris@16
|
187 using namespace regex_constants;
|
Chris@16
|
188 this->reset();
|
Chris@16
|
189 this->traits_.flags(flags);
|
Chris@16
|
190
|
Chris@16
|
191 basic_regex<BidiIter> rextmp, *prex = &rextmp;
|
Chris@16
|
192 FwdIter tmp = begin;
|
Chris@16
|
193
|
Chris@16
|
194 // Check if this regex is a named rule:
|
Chris@16
|
195 string_type name;
|
Chris@16
|
196 if(token_group_begin == this->traits_.get_token(tmp, end) &&
|
Chris@16
|
197 BOOST_XPR_ENSURE_(tmp != end, error_paren, "mismatched parenthesis") &&
|
Chris@16
|
198 token_rule_assign == this->traits_.get_group_type(tmp, end, name))
|
Chris@16
|
199 {
|
Chris@16
|
200 begin = tmp;
|
Chris@16
|
201 BOOST_XPR_ENSURE_
|
Chris@16
|
202 (
|
Chris@16
|
203 begin != end && token_group_end == this->traits_.get_token(begin, end)
|
Chris@16
|
204 , error_paren
|
Chris@16
|
205 , "mismatched parenthesis"
|
Chris@16
|
206 );
|
Chris@16
|
207 prex = &this->rules_[name];
|
Chris@16
|
208 }
|
Chris@16
|
209
|
Chris@16
|
210 this->self_ = detail::core_access<BidiIter>::get_regex_impl(*prex);
|
Chris@16
|
211
|
Chris@16
|
212 // at the top level, a regex is a sequence of alternates
|
Chris@16
|
213 detail::sequence<BidiIter> seq = this->parse_alternates(begin, end);
|
Chris@16
|
214 BOOST_XPR_ENSURE_(begin == end, error_paren, "mismatched parenthesis");
|
Chris@16
|
215
|
Chris@16
|
216 // terminate the sequence
|
Chris@16
|
217 seq += detail::make_dynamic<BidiIter>(detail::end_matcher());
|
Chris@16
|
218
|
Chris@16
|
219 // bundle the regex information into a regex_impl object
|
Chris@16
|
220 detail::common_compile(seq.xpr().matchable(), *this->self_, this->rxtraits());
|
Chris@16
|
221
|
Chris@16
|
222 this->self_->traits_ = new detail::traits_holder<RegexTraits>(this->rxtraits());
|
Chris@16
|
223 this->self_->mark_count_ = this->mark_count_;
|
Chris@16
|
224 this->self_->hidden_mark_count_ = this->hidden_mark_count_;
|
Chris@16
|
225
|
Chris@16
|
226 // References changed, update dependencies.
|
Chris@16
|
227 this->self_->tracking_update();
|
Chris@16
|
228 this->self_.reset();
|
Chris@16
|
229 return *prex;
|
Chris@16
|
230 }
|
Chris@16
|
231
|
Chris@16
|
232 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
233 // compile_
|
Chris@16
|
234 /// INTERNAL ONLY
|
Chris@16
|
235 template<typename InputIter>
|
Chris@16
|
236 basic_regex<BidiIter> compile_(InputIter begin, InputIter end, flag_type flags, std::input_iterator_tag)
|
Chris@16
|
237 {
|
Chris@16
|
238 string_type pat(begin, end);
|
Chris@16
|
239 return this->compile_(boost::begin(pat), boost::end(pat), flags, std::forward_iterator_tag());
|
Chris@16
|
240 }
|
Chris@16
|
241
|
Chris@16
|
242 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
243 // reset
|
Chris@16
|
244 /// INTERNAL ONLY
|
Chris@16
|
245 void reset()
|
Chris@16
|
246 {
|
Chris@16
|
247 this->mark_count_ = 0;
|
Chris@16
|
248 this->hidden_mark_count_ = 0;
|
Chris@16
|
249 this->traits_.flags(regex_constants::ECMAScript);
|
Chris@16
|
250 }
|
Chris@16
|
251
|
Chris@16
|
252 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
253 // regex_traits
|
Chris@16
|
254 /// INTERNAL ONLY
|
Chris@16
|
255 traits_type &rxtraits()
|
Chris@16
|
256 {
|
Chris@16
|
257 return this->traits_.traits();
|
Chris@16
|
258 }
|
Chris@16
|
259
|
Chris@16
|
260 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
261 // regex_traits
|
Chris@16
|
262 /// INTERNAL ONLY
|
Chris@16
|
263 traits_type const &rxtraits() const
|
Chris@16
|
264 {
|
Chris@16
|
265 return this->traits_.traits();
|
Chris@16
|
266 }
|
Chris@16
|
267
|
Chris@16
|
268 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
269 // parse_alternates
|
Chris@16
|
270 /// INTERNAL ONLY
|
Chris@16
|
271 template<typename FwdIter>
|
Chris@16
|
272 detail::sequence<BidiIter> parse_alternates(FwdIter &begin, FwdIter end)
|
Chris@16
|
273 {
|
Chris@16
|
274 using namespace regex_constants;
|
Chris@16
|
275 int count = 0;
|
Chris@16
|
276 FwdIter tmp = begin;
|
Chris@16
|
277 detail::sequence<BidiIter> seq;
|
Chris@16
|
278
|
Chris@16
|
279 do switch(++count)
|
Chris@16
|
280 {
|
Chris@16
|
281 case 1:
|
Chris@16
|
282 seq = this->parse_sequence(tmp, end);
|
Chris@16
|
283 break;
|
Chris@16
|
284 case 2:
|
Chris@16
|
285 seq = detail::make_dynamic<BidiIter>(alternate_matcher()) | seq;
|
Chris@16
|
286 BOOST_FALLTHROUGH;
|
Chris@16
|
287 default:
|
Chris@16
|
288 seq |= this->parse_sequence(tmp, end);
|
Chris@16
|
289 }
|
Chris@16
|
290 while((begin = tmp) != end && token_alternate == this->traits_.get_token(tmp, end));
|
Chris@16
|
291
|
Chris@16
|
292 return seq;
|
Chris@16
|
293 }
|
Chris@16
|
294
|
Chris@16
|
295 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
296 // parse_group
|
Chris@16
|
297 /// INTERNAL ONLY
|
Chris@16
|
298 template<typename FwdIter>
|
Chris@16
|
299 detail::sequence<BidiIter> parse_group(FwdIter &begin, FwdIter end)
|
Chris@16
|
300 {
|
Chris@16
|
301 using namespace regex_constants;
|
Chris@16
|
302 int mark_nbr = 0;
|
Chris@16
|
303 bool keeper = false;
|
Chris@16
|
304 bool lookahead = false;
|
Chris@16
|
305 bool lookbehind = false;
|
Chris@16
|
306 bool negative = false;
|
Chris@16
|
307 string_type name;
|
Chris@16
|
308
|
Chris@16
|
309 detail::sequence<BidiIter> seq, seq_end;
|
Chris@16
|
310 FwdIter tmp = FwdIter();
|
Chris@16
|
311
|
Chris@16
|
312 syntax_option_type old_flags = this->traits_.flags();
|
Chris@16
|
313
|
Chris@16
|
314 switch(this->traits_.get_group_type(begin, end, name))
|
Chris@16
|
315 {
|
Chris@16
|
316 case token_no_mark:
|
Chris@16
|
317 // Don't process empty groups like (?:) or (?i)
|
Chris@16
|
318 // BUGBUG this doesn't handle the degenerate (?:)+ correctly
|
Chris@16
|
319 if(token_group_end == this->traits_.get_token(tmp = begin, end))
|
Chris@16
|
320 {
|
Chris@16
|
321 return this->parse_atom(begin = tmp, end);
|
Chris@16
|
322 }
|
Chris@16
|
323 break;
|
Chris@16
|
324
|
Chris@16
|
325 case token_negative_lookahead:
|
Chris@16
|
326 negative = true;
|
Chris@16
|
327 BOOST_FALLTHROUGH;
|
Chris@16
|
328 case token_positive_lookahead:
|
Chris@16
|
329 lookahead = true;
|
Chris@16
|
330 break;
|
Chris@16
|
331
|
Chris@16
|
332 case token_negative_lookbehind:
|
Chris@16
|
333 negative = true;
|
Chris@16
|
334 BOOST_FALLTHROUGH;
|
Chris@16
|
335 case token_positive_lookbehind:
|
Chris@16
|
336 lookbehind = true;
|
Chris@16
|
337 break;
|
Chris@16
|
338
|
Chris@16
|
339 case token_independent_sub_expression:
|
Chris@16
|
340 keeper = true;
|
Chris@16
|
341 break;
|
Chris@16
|
342
|
Chris@16
|
343 case token_comment:
|
Chris@16
|
344 while(BOOST_XPR_ENSURE_(begin != end, error_paren, "mismatched parenthesis"))
|
Chris@16
|
345 {
|
Chris@16
|
346 switch(this->traits_.get_token(begin, end))
|
Chris@16
|
347 {
|
Chris@16
|
348 case token_group_end:
|
Chris@16
|
349 return this->parse_atom(begin, end);
|
Chris@16
|
350 case token_escape:
|
Chris@16
|
351 BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence");
|
Chris@16
|
352 BOOST_FALLTHROUGH;
|
Chris@16
|
353 case token_literal:
|
Chris@16
|
354 ++begin;
|
Chris@16
|
355 break;
|
Chris@16
|
356 default:
|
Chris@16
|
357 break;
|
Chris@16
|
358 }
|
Chris@16
|
359 }
|
Chris@16
|
360 break;
|
Chris@16
|
361
|
Chris@16
|
362 case token_recurse:
|
Chris@16
|
363 BOOST_XPR_ENSURE_
|
Chris@16
|
364 (
|
Chris@16
|
365 begin != end && token_group_end == this->traits_.get_token(begin, end)
|
Chris@16
|
366 , error_paren
|
Chris@16
|
367 , "mismatched parenthesis"
|
Chris@16
|
368 );
|
Chris@16
|
369 return detail::make_dynamic<BidiIter>(detail::regex_byref_matcher<BidiIter>(this->self_));
|
Chris@16
|
370
|
Chris@16
|
371 case token_rule_assign:
|
Chris@16
|
372 BOOST_THROW_EXCEPTION(
|
Chris@16
|
373 regex_error(error_badrule, "rule assignments must be at the front of the regex")
|
Chris@16
|
374 );
|
Chris@16
|
375 break;
|
Chris@16
|
376
|
Chris@16
|
377 case token_rule_ref:
|
Chris@16
|
378 {
|
Chris@16
|
379 typedef detail::core_access<BidiIter> access;
|
Chris@16
|
380 BOOST_XPR_ENSURE_
|
Chris@16
|
381 (
|
Chris@16
|
382 begin != end && token_group_end == this->traits_.get_token(begin, end)
|
Chris@16
|
383 , error_paren
|
Chris@16
|
384 , "mismatched parenthesis"
|
Chris@16
|
385 );
|
Chris@16
|
386 basic_regex<BidiIter> &rex = this->rules_[name];
|
Chris@16
|
387 shared_ptr<detail::regex_impl<BidiIter> > impl = access::get_regex_impl(rex);
|
Chris@16
|
388 this->self_->track_reference(*impl);
|
Chris@16
|
389 return detail::make_dynamic<BidiIter>(detail::regex_byref_matcher<BidiIter>(impl));
|
Chris@16
|
390 }
|
Chris@16
|
391
|
Chris@16
|
392 case token_named_mark:
|
Chris@16
|
393 mark_nbr = static_cast<int>(++this->mark_count_);
|
Chris@16
|
394 for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i)
|
Chris@16
|
395 {
|
Chris@16
|
396 BOOST_XPR_ENSURE_(this->self_->named_marks_[i].name_ != name, error_badmark, "named mark already exists");
|
Chris@16
|
397 }
|
Chris@16
|
398 this->self_->named_marks_.push_back(detail::named_mark<char_type>(name, this->mark_count_));
|
Chris@16
|
399 seq = detail::make_dynamic<BidiIter>(detail::mark_begin_matcher(mark_nbr));
|
Chris@16
|
400 seq_end = detail::make_dynamic<BidiIter>(detail::mark_end_matcher(mark_nbr));
|
Chris@16
|
401 break;
|
Chris@16
|
402
|
Chris@16
|
403 case token_named_mark_ref:
|
Chris@16
|
404 BOOST_XPR_ENSURE_
|
Chris@16
|
405 (
|
Chris@16
|
406 begin != end && token_group_end == this->traits_.get_token(begin, end)
|
Chris@16
|
407 , error_paren
|
Chris@16
|
408 , "mismatched parenthesis"
|
Chris@16
|
409 );
|
Chris@16
|
410 for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i)
|
Chris@16
|
411 {
|
Chris@16
|
412 if(this->self_->named_marks_[i].name_ == name)
|
Chris@16
|
413 {
|
Chris@16
|
414 mark_nbr = static_cast<int>(this->self_->named_marks_[i].mark_nbr_);
|
Chris@16
|
415 return detail::make_backref_xpression<BidiIter>
|
Chris@16
|
416 (
|
Chris@16
|
417 mark_nbr, this->traits_.flags(), this->rxtraits()
|
Chris@16
|
418 );
|
Chris@16
|
419 }
|
Chris@16
|
420 }
|
Chris@16
|
421 BOOST_THROW_EXCEPTION(regex_error(error_badmark, "invalid named back-reference"));
|
Chris@16
|
422 break;
|
Chris@16
|
423
|
Chris@16
|
424 default:
|
Chris@16
|
425 mark_nbr = static_cast<int>(++this->mark_count_);
|
Chris@16
|
426 seq = detail::make_dynamic<BidiIter>(detail::mark_begin_matcher(mark_nbr));
|
Chris@16
|
427 seq_end = detail::make_dynamic<BidiIter>(detail::mark_end_matcher(mark_nbr));
|
Chris@16
|
428 break;
|
Chris@16
|
429 }
|
Chris@16
|
430
|
Chris@16
|
431 // alternates
|
Chris@16
|
432 seq += this->parse_alternates(begin, end);
|
Chris@16
|
433 seq += seq_end;
|
Chris@16
|
434 BOOST_XPR_ENSURE_
|
Chris@16
|
435 (
|
Chris@16
|
436 begin != end && token_group_end == this->traits_.get_token(begin, end)
|
Chris@16
|
437 , error_paren
|
Chris@16
|
438 , "mismatched parenthesis"
|
Chris@16
|
439 );
|
Chris@16
|
440
|
Chris@16
|
441 typedef detail::shared_matchable<BidiIter> xpr_type;
|
Chris@16
|
442 if(lookahead)
|
Chris@16
|
443 {
|
Chris@16
|
444 seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
|
Chris@101
|
445 detail::lookahead_matcher<xpr_type> lam(seq.xpr(), negative, seq.pure());
|
Chris@101
|
446 seq = detail::make_dynamic<BidiIter>(lam);
|
Chris@16
|
447 }
|
Chris@16
|
448 else if(lookbehind)
|
Chris@16
|
449 {
|
Chris@16
|
450 seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
|
Chris@101
|
451 detail::lookbehind_matcher<xpr_type> lbm(seq.xpr(), seq.width().value(), negative, seq.pure());
|
Chris@101
|
452 seq = detail::make_dynamic<BidiIter>(lbm);
|
Chris@16
|
453 }
|
Chris@16
|
454 else if(keeper) // independent sub-expression
|
Chris@16
|
455 {
|
Chris@16
|
456 seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
|
Chris@101
|
457 detail::keeper_matcher<xpr_type> km(seq.xpr(), seq.pure());
|
Chris@101
|
458 seq = detail::make_dynamic<BidiIter>(km);
|
Chris@16
|
459 }
|
Chris@16
|
460
|
Chris@16
|
461 // restore the modifiers
|
Chris@16
|
462 this->traits_.flags(old_flags);
|
Chris@16
|
463 return seq;
|
Chris@16
|
464 }
|
Chris@16
|
465
|
Chris@16
|
466 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
467 // parse_charset
|
Chris@16
|
468 /// INTERNAL ONLY
|
Chris@16
|
469 template<typename FwdIter>
|
Chris@16
|
470 detail::sequence<BidiIter> parse_charset(FwdIter &begin, FwdIter end)
|
Chris@16
|
471 {
|
Chris@16
|
472 detail::compound_charset<traits_type> chset;
|
Chris@16
|
473
|
Chris@16
|
474 // call out to a helper to actually parse the character set
|
Chris@16
|
475 detail::parse_charset(begin, end, chset, this->traits_);
|
Chris@16
|
476
|
Chris@16
|
477 return detail::make_charset_xpression<BidiIter>
|
Chris@16
|
478 (
|
Chris@16
|
479 chset
|
Chris@16
|
480 , this->rxtraits()
|
Chris@16
|
481 , this->traits_.flags()
|
Chris@16
|
482 );
|
Chris@16
|
483 }
|
Chris@16
|
484
|
Chris@16
|
485 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
486 // parse_atom
|
Chris@16
|
487 /// INTERNAL ONLY
|
Chris@16
|
488 template<typename FwdIter>
|
Chris@16
|
489 detail::sequence<BidiIter> parse_atom(FwdIter &begin, FwdIter end)
|
Chris@16
|
490 {
|
Chris@16
|
491 using namespace regex_constants;
|
Chris@16
|
492 escape_value esc = { 0, 0, 0, detail::escape_char };
|
Chris@16
|
493 FwdIter old_begin = begin;
|
Chris@16
|
494
|
Chris@16
|
495 switch(this->traits_.get_token(begin, end))
|
Chris@16
|
496 {
|
Chris@16
|
497 case token_literal:
|
Chris@16
|
498 return detail::make_literal_xpression<BidiIter>
|
Chris@16
|
499 (
|
Chris@16
|
500 this->parse_literal(begin, end), this->traits_.flags(), this->rxtraits()
|
Chris@16
|
501 );
|
Chris@16
|
502
|
Chris@16
|
503 case token_any:
|
Chris@16
|
504 return detail::make_any_xpression<BidiIter>(this->traits_.flags(), this->rxtraits());
|
Chris@16
|
505
|
Chris@16
|
506 case token_assert_begin_sequence:
|
Chris@16
|
507 return detail::make_dynamic<BidiIter>(detail::assert_bos_matcher());
|
Chris@16
|
508
|
Chris@16
|
509 case token_assert_end_sequence:
|
Chris@16
|
510 return detail::make_dynamic<BidiIter>(detail::assert_eos_matcher());
|
Chris@16
|
511
|
Chris@16
|
512 case token_assert_begin_line:
|
Chris@16
|
513 return detail::make_assert_begin_line<BidiIter>(this->traits_.flags(), this->rxtraits());
|
Chris@16
|
514
|
Chris@16
|
515 case token_assert_end_line:
|
Chris@16
|
516 return detail::make_assert_end_line<BidiIter>(this->traits_.flags(), this->rxtraits());
|
Chris@16
|
517
|
Chris@16
|
518 case token_assert_word_boundary:
|
Chris@16
|
519 return detail::make_assert_word<BidiIter>(detail::word_boundary<mpl::true_>(), this->rxtraits());
|
Chris@16
|
520
|
Chris@16
|
521 case token_assert_not_word_boundary:
|
Chris@16
|
522 return detail::make_assert_word<BidiIter>(detail::word_boundary<mpl::false_>(), this->rxtraits());
|
Chris@16
|
523
|
Chris@16
|
524 case token_assert_word_begin:
|
Chris@16
|
525 return detail::make_assert_word<BidiIter>(detail::word_begin(), this->rxtraits());
|
Chris@16
|
526
|
Chris@16
|
527 case token_assert_word_end:
|
Chris@16
|
528 return detail::make_assert_word<BidiIter>(detail::word_end(), this->rxtraits());
|
Chris@16
|
529
|
Chris@16
|
530 case token_escape:
|
Chris@16
|
531 esc = this->parse_escape(begin, end);
|
Chris@16
|
532 switch(esc.type_)
|
Chris@16
|
533 {
|
Chris@16
|
534 case detail::escape_mark:
|
Chris@16
|
535 return detail::make_backref_xpression<BidiIter>
|
Chris@16
|
536 (
|
Chris@16
|
537 esc.mark_nbr_, this->traits_.flags(), this->rxtraits()
|
Chris@16
|
538 );
|
Chris@16
|
539 case detail::escape_char:
|
Chris@16
|
540 return detail::make_char_xpression<BidiIter>
|
Chris@16
|
541 (
|
Chris@16
|
542 esc.ch_, this->traits_.flags(), this->rxtraits()
|
Chris@16
|
543 );
|
Chris@16
|
544 case detail::escape_class:
|
Chris@16
|
545 return detail::make_posix_charset_xpression<BidiIter>
|
Chris@16
|
546 (
|
Chris@16
|
547 esc.class_
|
Chris@16
|
548 , this->is_upper_(*begin++)
|
Chris@16
|
549 , this->traits_.flags()
|
Chris@16
|
550 , this->rxtraits()
|
Chris@16
|
551 );
|
Chris@16
|
552 }
|
Chris@16
|
553
|
Chris@16
|
554 case token_group_begin:
|
Chris@16
|
555 return this->parse_group(begin, end);
|
Chris@16
|
556
|
Chris@16
|
557 case token_charset_begin:
|
Chris@16
|
558 return this->parse_charset(begin, end);
|
Chris@16
|
559
|
Chris@16
|
560 case token_invalid_quantifier:
|
Chris@16
|
561 BOOST_THROW_EXCEPTION(regex_error(error_badrepeat, "quantifier not expected"));
|
Chris@16
|
562 break;
|
Chris@16
|
563
|
Chris@16
|
564 case token_quote_meta_begin:
|
Chris@16
|
565 return detail::make_literal_xpression<BidiIter>
|
Chris@16
|
566 (
|
Chris@16
|
567 this->parse_quote_meta(begin, end), this->traits_.flags(), this->rxtraits()
|
Chris@16
|
568 );
|
Chris@16
|
569
|
Chris@16
|
570 case token_quote_meta_end:
|
Chris@16
|
571 BOOST_THROW_EXCEPTION(
|
Chris@16
|
572 regex_error(
|
Chris@16
|
573 error_escape
|
Chris@16
|
574 , "found quote-meta end without corresponding quote-meta begin"
|
Chris@16
|
575 )
|
Chris@16
|
576 );
|
Chris@16
|
577 break;
|
Chris@16
|
578
|
Chris@16
|
579 case token_end_of_pattern:
|
Chris@16
|
580 break;
|
Chris@16
|
581
|
Chris@16
|
582 default:
|
Chris@16
|
583 begin = old_begin;
|
Chris@16
|
584 break;
|
Chris@16
|
585 }
|
Chris@16
|
586
|
Chris@16
|
587 return detail::sequence<BidiIter>();
|
Chris@16
|
588 }
|
Chris@16
|
589
|
Chris@16
|
590 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
591 // parse_quant
|
Chris@16
|
592 /// INTERNAL ONLY
|
Chris@16
|
593 template<typename FwdIter>
|
Chris@16
|
594 detail::sequence<BidiIter> parse_quant(FwdIter &begin, FwdIter end)
|
Chris@16
|
595 {
|
Chris@16
|
596 BOOST_ASSERT(begin != end);
|
Chris@16
|
597 detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ };
|
Chris@16
|
598 detail::sequence<BidiIter> seq = this->parse_atom(begin, end);
|
Chris@16
|
599
|
Chris@16
|
600 // BUGBUG this doesn't handle the degenerate (?:)+ correctly
|
Chris@16
|
601 if(!seq.empty() && begin != end && detail::quant_none != seq.quant())
|
Chris@16
|
602 {
|
Chris@16
|
603 if(this->traits_.get_quant_spec(begin, end, spec))
|
Chris@16
|
604 {
|
Chris@16
|
605 BOOST_ASSERT(spec.min_ <= spec.max_);
|
Chris@16
|
606
|
Chris@16
|
607 if(0 == spec.max_) // quant {0,0} is degenerate -- matches nothing.
|
Chris@16
|
608 {
|
Chris@16
|
609 seq = this->parse_quant(begin, end);
|
Chris@16
|
610 }
|
Chris@16
|
611 else
|
Chris@16
|
612 {
|
Chris@16
|
613 seq.repeat(spec);
|
Chris@16
|
614 }
|
Chris@16
|
615 }
|
Chris@16
|
616 }
|
Chris@16
|
617
|
Chris@16
|
618 return seq;
|
Chris@16
|
619 }
|
Chris@16
|
620
|
Chris@16
|
621 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
622 // parse_sequence
|
Chris@16
|
623 /// INTERNAL ONLY
|
Chris@16
|
624 template<typename FwdIter>
|
Chris@16
|
625 detail::sequence<BidiIter> parse_sequence(FwdIter &begin, FwdIter end)
|
Chris@16
|
626 {
|
Chris@16
|
627 detail::sequence<BidiIter> seq;
|
Chris@16
|
628
|
Chris@16
|
629 while(begin != end)
|
Chris@16
|
630 {
|
Chris@16
|
631 detail::sequence<BidiIter> seq_quant = this->parse_quant(begin, end);
|
Chris@16
|
632
|
Chris@16
|
633 // did we find a quantified atom?
|
Chris@16
|
634 if(seq_quant.empty())
|
Chris@16
|
635 break;
|
Chris@16
|
636
|
Chris@16
|
637 // chain it to the end of the xpression sequence
|
Chris@16
|
638 seq += seq_quant;
|
Chris@16
|
639 }
|
Chris@16
|
640
|
Chris@16
|
641 return seq;
|
Chris@16
|
642 }
|
Chris@16
|
643
|
Chris@16
|
644 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
645 // parse_literal
|
Chris@16
|
646 // scan ahead looking for char literals to be globbed together into a string literal
|
Chris@16
|
647 /// INTERNAL ONLY
|
Chris@16
|
648 template<typename FwdIter>
|
Chris@16
|
649 string_type parse_literal(FwdIter &begin, FwdIter end)
|
Chris@16
|
650 {
|
Chris@16
|
651 using namespace regex_constants;
|
Chris@16
|
652 BOOST_ASSERT(begin != end);
|
Chris@16
|
653 BOOST_ASSERT(token_literal == this->traits_.get_token(begin, end));
|
Chris@16
|
654 escape_value esc = { 0, 0, 0, detail::escape_char };
|
Chris@16
|
655 string_type literal(1, *begin);
|
Chris@16
|
656
|
Chris@16
|
657 for(FwdIter prev = begin, tmp = ++begin; begin != end; prev = begin, begin = tmp)
|
Chris@16
|
658 {
|
Chris@16
|
659 detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ };
|
Chris@16
|
660 if(this->traits_.get_quant_spec(tmp, end, spec))
|
Chris@16
|
661 {
|
Chris@16
|
662 if(literal.size() != 1)
|
Chris@16
|
663 {
|
Chris@16
|
664 begin = prev;
|
Chris@16
|
665 literal.erase(boost::prior(literal.end()));
|
Chris@16
|
666 }
|
Chris@16
|
667 return literal;
|
Chris@16
|
668 }
|
Chris@16
|
669 else switch(this->traits_.get_token(tmp, end))
|
Chris@16
|
670 {
|
Chris@16
|
671 case token_escape:
|
Chris@16
|
672 esc = this->parse_escape(tmp, end);
|
Chris@16
|
673 if(detail::escape_char != esc.type_) return literal;
|
Chris@16
|
674 literal.insert(literal.end(), esc.ch_);
|
Chris@16
|
675 break;
|
Chris@16
|
676 case token_literal:
|
Chris@16
|
677 literal.insert(literal.end(), *tmp++);
|
Chris@16
|
678 break;
|
Chris@16
|
679 default:
|
Chris@16
|
680 return literal;
|
Chris@16
|
681 }
|
Chris@16
|
682 }
|
Chris@16
|
683
|
Chris@16
|
684 return literal;
|
Chris@16
|
685 }
|
Chris@16
|
686
|
Chris@16
|
687 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
688 // parse_quote_meta
|
Chris@16
|
689 // scan ahead looking for char literals to be globbed together into a string literal
|
Chris@16
|
690 /// INTERNAL ONLY
|
Chris@16
|
691 template<typename FwdIter>
|
Chris@16
|
692 string_type parse_quote_meta(FwdIter &begin, FwdIter end)
|
Chris@16
|
693 {
|
Chris@16
|
694 using namespace regex_constants;
|
Chris@16
|
695 FwdIter old_begin = begin, old_end;
|
Chris@16
|
696 while(end != (old_end = begin))
|
Chris@16
|
697 {
|
Chris@16
|
698 switch(this->traits_.get_token(begin, end))
|
Chris@16
|
699 {
|
Chris@16
|
700 case token_quote_meta_end:
|
Chris@16
|
701 return string_type(old_begin, old_end);
|
Chris@16
|
702 case token_escape:
|
Chris@16
|
703 BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence");
|
Chris@16
|
704 BOOST_FALLTHROUGH;
|
Chris@16
|
705 case token_invalid_quantifier:
|
Chris@16
|
706 case token_literal:
|
Chris@16
|
707 ++begin;
|
Chris@16
|
708 break;
|
Chris@16
|
709 default:
|
Chris@16
|
710 break;
|
Chris@16
|
711 }
|
Chris@16
|
712 }
|
Chris@16
|
713 return string_type(old_begin, begin);
|
Chris@16
|
714 }
|
Chris@16
|
715
|
Chris@16
|
716 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
717 // parse_escape
|
Chris@16
|
718 /// INTERNAL ONLY
|
Chris@16
|
719 template<typename FwdIter>
|
Chris@16
|
720 escape_value parse_escape(FwdIter &begin, FwdIter end)
|
Chris@16
|
721 {
|
Chris@16
|
722 BOOST_XPR_ENSURE_(begin != end, regex_constants::error_escape, "incomplete escape sequence");
|
Chris@16
|
723
|
Chris@16
|
724 // first, check to see if this can be a backreference
|
Chris@16
|
725 if(0 < this->rxtraits().value(*begin, 10))
|
Chris@16
|
726 {
|
Chris@16
|
727 // Parse at most 3 decimal digits.
|
Chris@16
|
728 FwdIter tmp = begin;
|
Chris@16
|
729 int mark_nbr = detail::toi(tmp, end, this->rxtraits(), 10, 999);
|
Chris@16
|
730
|
Chris@16
|
731 // If the resulting number could conceivably be a backref, then it is.
|
Chris@16
|
732 if(10 > mark_nbr || mark_nbr <= static_cast<int>(this->mark_count_))
|
Chris@16
|
733 {
|
Chris@16
|
734 begin = tmp;
|
Chris@16
|
735 escape_value esc = {0, mark_nbr, 0, detail::escape_mark};
|
Chris@16
|
736 return esc;
|
Chris@16
|
737 }
|
Chris@16
|
738 }
|
Chris@16
|
739
|
Chris@16
|
740 // Not a backreference, defer to the parse_escape helper
|
Chris@16
|
741 return detail::parse_escape(begin, end, this->traits_);
|
Chris@16
|
742 }
|
Chris@16
|
743
|
Chris@16
|
744 bool is_upper_(char_type ch) const
|
Chris@16
|
745 {
|
Chris@16
|
746 return 0 != this->upper_ && this->rxtraits().isctype(ch, this->upper_);
|
Chris@16
|
747 }
|
Chris@16
|
748
|
Chris@16
|
749 std::size_t mark_count_;
|
Chris@16
|
750 std::size_t hidden_mark_count_;
|
Chris@16
|
751 CompilerTraits traits_;
|
Chris@16
|
752 typename RegexTraits::char_class_type upper_;
|
Chris@16
|
753 shared_ptr<detail::regex_impl<BidiIter> > self_;
|
Chris@16
|
754 std::map<string_type, basic_regex<BidiIter> > rules_;
|
Chris@16
|
755 };
|
Chris@16
|
756
|
Chris@16
|
757 }} // namespace boost::xpressive
|
Chris@16
|
758
|
Chris@16
|
759 #endif
|