Chris@16
|
1 /*
|
Chris@16
|
2 *
|
Chris@16
|
3 * Copyright (c) 2004
|
Chris@16
|
4 * John Maddock
|
Chris@16
|
5 *
|
Chris@16
|
6 * Use, modification and distribution are subject to the
|
Chris@16
|
7 * Boost Software License, Version 1.0. (See accompanying file
|
Chris@16
|
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
9 *
|
Chris@16
|
10 */
|
Chris@16
|
11
|
Chris@16
|
12 /*
|
Chris@16
|
13 * LOCATION: see http://www.boost.org for most recent version.
|
Chris@16
|
14 * FILE basic_regex_parser.cpp
|
Chris@16
|
15 * VERSION see <boost/version.hpp>
|
Chris@16
|
16 * DESCRIPTION: Declares template class basic_regex_parser.
|
Chris@16
|
17 */
|
Chris@16
|
18
|
Chris@16
|
19 #ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
|
Chris@16
|
20 #define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
|
Chris@16
|
21
|
Chris@16
|
22 #ifdef BOOST_MSVC
|
Chris@16
|
23 #pragma warning(push)
|
Chris@16
|
24 #pragma warning(disable: 4103)
|
Chris@16
|
25 #endif
|
Chris@16
|
26 #ifdef BOOST_HAS_ABI_HEADERS
|
Chris@16
|
27 # include BOOST_ABI_PREFIX
|
Chris@16
|
28 #endif
|
Chris@16
|
29 #ifdef BOOST_MSVC
|
Chris@16
|
30 #pragma warning(pop)
|
Chris@16
|
31 #endif
|
Chris@16
|
32
|
Chris@16
|
33 namespace boost{
|
Chris@16
|
34 namespace re_detail{
|
Chris@16
|
35
|
Chris@16
|
36 #ifdef BOOST_MSVC
|
Chris@16
|
37 #pragma warning(push)
|
Chris@16
|
38 #pragma warning(disable:4244 4800)
|
Chris@16
|
39 #endif
|
Chris@16
|
40
|
Chris@16
|
41 template <class charT, class traits>
|
Chris@16
|
42 class basic_regex_parser : public basic_regex_creator<charT, traits>
|
Chris@16
|
43 {
|
Chris@16
|
44 public:
|
Chris@16
|
45 basic_regex_parser(regex_data<charT, traits>* data);
|
Chris@16
|
46 void parse(const charT* p1, const charT* p2, unsigned flags);
|
Chris@16
|
47 void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
|
Chris@16
|
48 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
|
Chris@16
|
49 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
|
Chris@16
|
50 {
|
Chris@16
|
51 fail(error_code, position, message, position);
|
Chris@16
|
52 }
|
Chris@16
|
53
|
Chris@16
|
54 bool parse_all();
|
Chris@16
|
55 bool parse_basic();
|
Chris@16
|
56 bool parse_extended();
|
Chris@16
|
57 bool parse_literal();
|
Chris@16
|
58 bool parse_open_paren();
|
Chris@16
|
59 bool parse_basic_escape();
|
Chris@16
|
60 bool parse_extended_escape();
|
Chris@16
|
61 bool parse_match_any();
|
Chris@16
|
62 bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
|
Chris@16
|
63 bool parse_repeat_range(bool isbasic);
|
Chris@16
|
64 bool parse_alt();
|
Chris@16
|
65 bool parse_set();
|
Chris@16
|
66 bool parse_backref();
|
Chris@16
|
67 void parse_set_literal(basic_char_set<charT, traits>& char_set);
|
Chris@16
|
68 bool parse_inner_set(basic_char_set<charT, traits>& char_set);
|
Chris@16
|
69 bool parse_QE();
|
Chris@16
|
70 bool parse_perl_extension();
|
Chris@16
|
71 bool add_emacs_code(bool negate);
|
Chris@16
|
72 bool unwind_alts(std::ptrdiff_t last_paren_start);
|
Chris@16
|
73 digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
|
Chris@16
|
74 charT unescape_character();
|
Chris@16
|
75 regex_constants::syntax_option_type parse_options();
|
Chris@16
|
76
|
Chris@16
|
77 private:
|
Chris@16
|
78 typedef bool (basic_regex_parser::*parser_proc_type)();
|
Chris@16
|
79 typedef typename traits::string_type string_type;
|
Chris@16
|
80 typedef typename traits::char_class_type char_class_type;
|
Chris@16
|
81 parser_proc_type m_parser_proc; // the main parser to use
|
Chris@16
|
82 const charT* m_base; // the start of the string being parsed
|
Chris@16
|
83 const charT* m_end; // the end of the string being parsed
|
Chris@16
|
84 const charT* m_position; // our current parser position
|
Chris@16
|
85 unsigned m_mark_count; // how many sub-expressions we have
|
Chris@16
|
86 int m_mark_reset; // used to indicate that we're inside a (?|...) block.
|
Chris@16
|
87 unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
|
Chris@16
|
88 std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
|
Chris@16
|
89 std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
|
Chris@16
|
90 bool m_has_case_change; // true if somewhere in the current block the case has changed
|
Chris@16
|
91 #if defined(BOOST_MSVC) && defined(_M_IX86)
|
Chris@16
|
92 // This is an ugly warning suppression workaround (for warnings *inside* std::vector
|
Chris@16
|
93 // that can not otherwise be suppressed)...
|
Chris@16
|
94 BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
|
Chris@16
|
95 std::vector<long> m_alt_jumps; // list of alternative in the current scope.
|
Chris@16
|
96 #else
|
Chris@16
|
97 std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
|
Chris@16
|
98 #endif
|
Chris@16
|
99
|
Chris@16
|
100 basic_regex_parser& operator=(const basic_regex_parser&);
|
Chris@16
|
101 basic_regex_parser(const basic_regex_parser&);
|
Chris@16
|
102 };
|
Chris@16
|
103
|
Chris@16
|
104 template <class charT, class traits>
|
Chris@16
|
105 basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
|
Chris@16
|
106 : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false)
|
Chris@16
|
107 {
|
Chris@16
|
108 }
|
Chris@16
|
109
|
Chris@16
|
110 template <class charT, class traits>
|
Chris@16
|
111 void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
|
Chris@16
|
112 {
|
Chris@16
|
113 // pass l_flags on to base class:
|
Chris@16
|
114 this->init(l_flags);
|
Chris@16
|
115 // set up pointers:
|
Chris@16
|
116 m_position = m_base = p1;
|
Chris@16
|
117 m_end = p2;
|
Chris@16
|
118 // empty strings are errors:
|
Chris@16
|
119 if((p1 == p2) &&
|
Chris@16
|
120 (
|
Chris@16
|
121 ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
|
Chris@16
|
122 || (l_flags & regbase::no_empty_expressions)
|
Chris@16
|
123 )
|
Chris@16
|
124 )
|
Chris@16
|
125 {
|
Chris@16
|
126 fail(regex_constants::error_empty, 0);
|
Chris@16
|
127 return;
|
Chris@16
|
128 }
|
Chris@16
|
129 // select which parser to use:
|
Chris@16
|
130 switch(l_flags & regbase::main_option_type)
|
Chris@16
|
131 {
|
Chris@16
|
132 case regbase::perl_syntax_group:
|
Chris@16
|
133 {
|
Chris@16
|
134 m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
|
Chris@16
|
135 //
|
Chris@16
|
136 // Add a leading paren with index zero to give recursions a target:
|
Chris@16
|
137 //
|
Chris@16
|
138 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
|
Chris@16
|
139 br->index = 0;
|
Chris@16
|
140 br->icase = this->flags() & regbase::icase;
|
Chris@16
|
141 break;
|
Chris@16
|
142 }
|
Chris@16
|
143 case regbase::basic_syntax_group:
|
Chris@16
|
144 m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
|
Chris@16
|
145 break;
|
Chris@16
|
146 case regbase::literal:
|
Chris@16
|
147 m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
|
Chris@16
|
148 break;
|
Chris@16
|
149 default:
|
Chris@16
|
150 // Ooops, someone has managed to set more than one of the main option flags,
|
Chris@16
|
151 // so this must be an error:
|
Chris@16
|
152 fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
|
Chris@16
|
153 return;
|
Chris@16
|
154 }
|
Chris@16
|
155
|
Chris@16
|
156 // parse all our characters:
|
Chris@16
|
157 bool result = parse_all();
|
Chris@16
|
158 //
|
Chris@16
|
159 // Unwind our alternatives:
|
Chris@16
|
160 //
|
Chris@16
|
161 unwind_alts(-1);
|
Chris@16
|
162 // reset l_flags as a global scope (?imsx) may have altered them:
|
Chris@16
|
163 this->flags(l_flags);
|
Chris@16
|
164 // if we haven't gobbled up all the characters then we must
|
Chris@16
|
165 // have had an unexpected ')' :
|
Chris@16
|
166 if(!result)
|
Chris@16
|
167 {
|
Chris@16
|
168 fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis.");
|
Chris@16
|
169 return;
|
Chris@16
|
170 }
|
Chris@16
|
171 // if an error has been set then give up now:
|
Chris@16
|
172 if(this->m_pdata->m_status)
|
Chris@16
|
173 return;
|
Chris@16
|
174 // fill in our sub-expression count:
|
Chris@16
|
175 this->m_pdata->m_mark_count = 1 + m_mark_count;
|
Chris@16
|
176 this->finalize(p1, p2);
|
Chris@16
|
177 }
|
Chris@16
|
178
|
Chris@16
|
179 template <class charT, class traits>
|
Chris@16
|
180 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
|
Chris@16
|
181 {
|
Chris@16
|
182 // get the error message:
|
Chris@16
|
183 std::string message = this->m_pdata->m_ptraits->error_string(error_code);
|
Chris@16
|
184 fail(error_code, position, message);
|
Chris@16
|
185 }
|
Chris@16
|
186
|
Chris@16
|
187 template <class charT, class traits>
|
Chris@16
|
188 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
|
Chris@16
|
189 {
|
Chris@16
|
190 if(0 == this->m_pdata->m_status) // update the error code if not already set
|
Chris@16
|
191 this->m_pdata->m_status = error_code;
|
Chris@16
|
192 m_position = m_end; // don't bother parsing anything else
|
Chris@16
|
193
|
Chris@16
|
194 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
|
Chris@16
|
195 //
|
Chris@16
|
196 // Augment error message with the regular expression text:
|
Chris@16
|
197 //
|
Chris@16
|
198 if(start_pos == position)
|
Chris@16
|
199 start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
|
Chris@16
|
200 std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
|
Chris@16
|
201 if(error_code != regex_constants::error_empty)
|
Chris@16
|
202 {
|
Chris@16
|
203 if((start_pos != 0) || (end_pos != (m_end - m_base)))
|
Chris@16
|
204 message += " The error occurred while parsing the regular expression fragment: '";
|
Chris@16
|
205 else
|
Chris@16
|
206 message += " The error occurred while parsing the regular expression: '";
|
Chris@16
|
207 if(start_pos != end_pos)
|
Chris@16
|
208 {
|
Chris@16
|
209 message += std::string(m_base + start_pos, m_base + position);
|
Chris@16
|
210 message += ">>>HERE>>>";
|
Chris@16
|
211 message += std::string(m_base + position, m_base + end_pos);
|
Chris@16
|
212 }
|
Chris@16
|
213 message += "'.";
|
Chris@16
|
214 }
|
Chris@16
|
215 #endif
|
Chris@16
|
216
|
Chris@16
|
217 #ifndef BOOST_NO_EXCEPTIONS
|
Chris@16
|
218 if(0 == (this->flags() & regex_constants::no_except))
|
Chris@16
|
219 {
|
Chris@16
|
220 boost::regex_error e(message, error_code, position);
|
Chris@16
|
221 e.raise();
|
Chris@16
|
222 }
|
Chris@16
|
223 #else
|
Chris@16
|
224 (void)position; // suppress warnings.
|
Chris@16
|
225 #endif
|
Chris@16
|
226 }
|
Chris@16
|
227
|
Chris@16
|
228 template <class charT, class traits>
|
Chris@16
|
229 bool basic_regex_parser<charT, traits>::parse_all()
|
Chris@16
|
230 {
|
Chris@16
|
231 bool result = true;
|
Chris@16
|
232 while(result && (m_position != m_end))
|
Chris@16
|
233 {
|
Chris@16
|
234 result = (this->*m_parser_proc)();
|
Chris@16
|
235 }
|
Chris@16
|
236 return result;
|
Chris@16
|
237 }
|
Chris@16
|
238
|
Chris@16
|
239 #ifdef BOOST_MSVC
|
Chris@16
|
240 #pragma warning(push)
|
Chris@16
|
241 #pragma warning(disable:4702)
|
Chris@16
|
242 #endif
|
Chris@16
|
243 template <class charT, class traits>
|
Chris@16
|
244 bool basic_regex_parser<charT, traits>::parse_basic()
|
Chris@16
|
245 {
|
Chris@16
|
246 switch(this->m_traits.syntax_type(*m_position))
|
Chris@16
|
247 {
|
Chris@16
|
248 case regex_constants::syntax_escape:
|
Chris@16
|
249 return parse_basic_escape();
|
Chris@16
|
250 case regex_constants::syntax_dot:
|
Chris@16
|
251 return parse_match_any();
|
Chris@16
|
252 case regex_constants::syntax_caret:
|
Chris@16
|
253 ++m_position;
|
Chris@16
|
254 this->append_state(syntax_element_start_line);
|
Chris@16
|
255 break;
|
Chris@16
|
256 case regex_constants::syntax_dollar:
|
Chris@16
|
257 ++m_position;
|
Chris@16
|
258 this->append_state(syntax_element_end_line);
|
Chris@16
|
259 break;
|
Chris@16
|
260 case regex_constants::syntax_star:
|
Chris@16
|
261 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
|
Chris@16
|
262 return parse_literal();
|
Chris@16
|
263 else
|
Chris@16
|
264 {
|
Chris@16
|
265 ++m_position;
|
Chris@16
|
266 return parse_repeat();
|
Chris@16
|
267 }
|
Chris@16
|
268 case regex_constants::syntax_plus:
|
Chris@16
|
269 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
|
Chris@16
|
270 return parse_literal();
|
Chris@16
|
271 else
|
Chris@16
|
272 {
|
Chris@16
|
273 ++m_position;
|
Chris@16
|
274 return parse_repeat(1);
|
Chris@16
|
275 }
|
Chris@16
|
276 case regex_constants::syntax_question:
|
Chris@16
|
277 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
|
Chris@16
|
278 return parse_literal();
|
Chris@16
|
279 else
|
Chris@16
|
280 {
|
Chris@16
|
281 ++m_position;
|
Chris@16
|
282 return parse_repeat(0, 1);
|
Chris@16
|
283 }
|
Chris@16
|
284 case regex_constants::syntax_open_set:
|
Chris@16
|
285 return parse_set();
|
Chris@16
|
286 case regex_constants::syntax_newline:
|
Chris@16
|
287 if(this->flags() & regbase::newline_alt)
|
Chris@16
|
288 return parse_alt();
|
Chris@16
|
289 else
|
Chris@16
|
290 return parse_literal();
|
Chris@16
|
291 default:
|
Chris@16
|
292 return parse_literal();
|
Chris@16
|
293 }
|
Chris@16
|
294 return true;
|
Chris@16
|
295 }
|
Chris@16
|
296
|
Chris@16
|
297 template <class charT, class traits>
|
Chris@16
|
298 bool basic_regex_parser<charT, traits>::parse_extended()
|
Chris@16
|
299 {
|
Chris@16
|
300 bool result = true;
|
Chris@16
|
301 switch(this->m_traits.syntax_type(*m_position))
|
Chris@16
|
302 {
|
Chris@16
|
303 case regex_constants::syntax_open_mark:
|
Chris@16
|
304 return parse_open_paren();
|
Chris@16
|
305 case regex_constants::syntax_close_mark:
|
Chris@16
|
306 return false;
|
Chris@16
|
307 case regex_constants::syntax_escape:
|
Chris@16
|
308 return parse_extended_escape();
|
Chris@16
|
309 case regex_constants::syntax_dot:
|
Chris@16
|
310 return parse_match_any();
|
Chris@16
|
311 case regex_constants::syntax_caret:
|
Chris@16
|
312 ++m_position;
|
Chris@16
|
313 this->append_state(
|
Chris@16
|
314 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
|
Chris@16
|
315 break;
|
Chris@16
|
316 case regex_constants::syntax_dollar:
|
Chris@16
|
317 ++m_position;
|
Chris@16
|
318 this->append_state(
|
Chris@16
|
319 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
|
Chris@16
|
320 break;
|
Chris@16
|
321 case regex_constants::syntax_star:
|
Chris@16
|
322 if(m_position == this->m_base)
|
Chris@16
|
323 {
|
Chris@16
|
324 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
|
Chris@16
|
325 return false;
|
Chris@16
|
326 }
|
Chris@16
|
327 ++m_position;
|
Chris@16
|
328 return parse_repeat();
|
Chris@16
|
329 case regex_constants::syntax_question:
|
Chris@16
|
330 if(m_position == this->m_base)
|
Chris@16
|
331 {
|
Chris@16
|
332 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
|
Chris@16
|
333 return false;
|
Chris@16
|
334 }
|
Chris@16
|
335 ++m_position;
|
Chris@16
|
336 return parse_repeat(0,1);
|
Chris@16
|
337 case regex_constants::syntax_plus:
|
Chris@16
|
338 if(m_position == this->m_base)
|
Chris@16
|
339 {
|
Chris@16
|
340 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
|
Chris@16
|
341 return false;
|
Chris@16
|
342 }
|
Chris@16
|
343 ++m_position;
|
Chris@16
|
344 return parse_repeat(1);
|
Chris@16
|
345 case regex_constants::syntax_open_brace:
|
Chris@16
|
346 ++m_position;
|
Chris@16
|
347 return parse_repeat_range(false);
|
Chris@16
|
348 case regex_constants::syntax_close_brace:
|
Chris@16
|
349 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
|
Chris@16
|
350 return false;
|
Chris@16
|
351 case regex_constants::syntax_or:
|
Chris@16
|
352 return parse_alt();
|
Chris@16
|
353 case regex_constants::syntax_open_set:
|
Chris@16
|
354 return parse_set();
|
Chris@16
|
355 case regex_constants::syntax_newline:
|
Chris@16
|
356 if(this->flags() & regbase::newline_alt)
|
Chris@16
|
357 return parse_alt();
|
Chris@16
|
358 else
|
Chris@16
|
359 return parse_literal();
|
Chris@16
|
360 case regex_constants::syntax_hash:
|
Chris@16
|
361 //
|
Chris@16
|
362 // If we have a mod_x flag set, then skip until
|
Chris@16
|
363 // we get to a newline character:
|
Chris@16
|
364 //
|
Chris@16
|
365 if((this->flags()
|
Chris@16
|
366 & (regbase::no_perl_ex|regbase::mod_x))
|
Chris@16
|
367 == regbase::mod_x)
|
Chris@16
|
368 {
|
Chris@16
|
369 while((m_position != m_end) && !is_separator(*m_position++)){}
|
Chris@16
|
370 return true;
|
Chris@16
|
371 }
|
Chris@16
|
372 BOOST_FALLTHROUGH;
|
Chris@16
|
373 default:
|
Chris@16
|
374 result = parse_literal();
|
Chris@16
|
375 break;
|
Chris@16
|
376 }
|
Chris@16
|
377 return result;
|
Chris@16
|
378 }
|
Chris@16
|
379 #ifdef BOOST_MSVC
|
Chris@16
|
380 #pragma warning(pop)
|
Chris@16
|
381 #endif
|
Chris@16
|
382
|
Chris@16
|
383 template <class charT, class traits>
|
Chris@16
|
384 bool basic_regex_parser<charT, traits>::parse_literal()
|
Chris@16
|
385 {
|
Chris@16
|
386 // append this as a literal provided it's not a space character
|
Chris@16
|
387 // or the perl option regbase::mod_x is not set:
|
Chris@16
|
388 if(
|
Chris@16
|
389 ((this->flags()
|
Chris@16
|
390 & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
|
Chris@16
|
391 != regbase::mod_x)
|
Chris@16
|
392 || !this->m_traits.isctype(*m_position, this->m_mask_space))
|
Chris@16
|
393 this->append_literal(*m_position);
|
Chris@16
|
394 ++m_position;
|
Chris@16
|
395 return true;
|
Chris@16
|
396 }
|
Chris@16
|
397
|
Chris@16
|
398 template <class charT, class traits>
|
Chris@16
|
399 bool basic_regex_parser<charT, traits>::parse_open_paren()
|
Chris@16
|
400 {
|
Chris@16
|
401 //
|
Chris@16
|
402 // skip the '(' and error check:
|
Chris@16
|
403 //
|
Chris@16
|
404 if(++m_position == m_end)
|
Chris@16
|
405 {
|
Chris@16
|
406 fail(regex_constants::error_paren, m_position - m_base);
|
Chris@16
|
407 return false;
|
Chris@16
|
408 }
|
Chris@16
|
409 //
|
Chris@16
|
410 // begin by checking for a perl-style (?...) extension:
|
Chris@16
|
411 //
|
Chris@16
|
412 if(
|
Chris@16
|
413 ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
|
Chris@16
|
414 || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
|
Chris@16
|
415 )
|
Chris@16
|
416 {
|
Chris@16
|
417 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
|
Chris@16
|
418 return parse_perl_extension();
|
Chris@16
|
419 }
|
Chris@16
|
420 //
|
Chris@16
|
421 // update our mark count, and append the required state:
|
Chris@16
|
422 //
|
Chris@16
|
423 unsigned markid = 0;
|
Chris@16
|
424 if(0 == (this->flags() & regbase::nosubs))
|
Chris@16
|
425 {
|
Chris@16
|
426 markid = ++m_mark_count;
|
Chris@16
|
427 #ifndef BOOST_NO_STD_DISTANCE
|
Chris@16
|
428 if(this->flags() & regbase::save_subexpression_location)
|
Chris@16
|
429 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
|
Chris@16
|
430 #else
|
Chris@16
|
431 if(this->flags() & regbase::save_subexpression_location)
|
Chris@16
|
432 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
|
Chris@16
|
433 #endif
|
Chris@16
|
434 }
|
Chris@16
|
435 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
|
Chris@16
|
436 pb->index = markid;
|
Chris@16
|
437 pb->icase = this->flags() & regbase::icase;
|
Chris@16
|
438 std::ptrdiff_t last_paren_start = this->getoffset(pb);
|
Chris@16
|
439 // back up insertion point for alternations, and set new point:
|
Chris@16
|
440 std::ptrdiff_t last_alt_point = m_alt_insert_point;
|
Chris@16
|
441 this->m_pdata->m_data.align();
|
Chris@16
|
442 m_alt_insert_point = this->m_pdata->m_data.size();
|
Chris@16
|
443 //
|
Chris@16
|
444 // back up the current flags in case we have a nested (?imsx) group:
|
Chris@16
|
445 //
|
Chris@16
|
446 regex_constants::syntax_option_type opts = this->flags();
|
Chris@16
|
447 bool old_case_change = m_has_case_change;
|
Chris@16
|
448 m_has_case_change = false; // no changes to this scope as yet...
|
Chris@16
|
449 //
|
Chris@16
|
450 // Back up branch reset data in case we have a nested (?|...)
|
Chris@16
|
451 //
|
Chris@16
|
452 int mark_reset = m_mark_reset;
|
Chris@16
|
453 m_mark_reset = -1;
|
Chris@16
|
454 //
|
Chris@16
|
455 // now recursively add more states, this will terminate when we get to a
|
Chris@16
|
456 // matching ')' :
|
Chris@16
|
457 //
|
Chris@16
|
458 parse_all();
|
Chris@16
|
459 //
|
Chris@16
|
460 // Unwind pushed alternatives:
|
Chris@16
|
461 //
|
Chris@16
|
462 if(0 == unwind_alts(last_paren_start))
|
Chris@16
|
463 return false;
|
Chris@16
|
464 //
|
Chris@16
|
465 // restore flags:
|
Chris@16
|
466 //
|
Chris@16
|
467 if(m_has_case_change)
|
Chris@16
|
468 {
|
Chris@16
|
469 // the case has changed in one or more of the alternatives
|
Chris@16
|
470 // within the scoped (...) block: we have to add a state
|
Chris@16
|
471 // to reset the case sensitivity:
|
Chris@16
|
472 static_cast<re_case*>(
|
Chris@16
|
473 this->append_state(syntax_element_toggle_case, sizeof(re_case))
|
Chris@16
|
474 )->icase = opts & regbase::icase;
|
Chris@16
|
475 }
|
Chris@16
|
476 this->flags(opts);
|
Chris@16
|
477 m_has_case_change = old_case_change;
|
Chris@16
|
478 //
|
Chris@16
|
479 // restore branch reset:
|
Chris@16
|
480 //
|
Chris@16
|
481 m_mark_reset = mark_reset;
|
Chris@16
|
482 //
|
Chris@16
|
483 // we either have a ')' or we have run out of characters prematurely:
|
Chris@16
|
484 //
|
Chris@16
|
485 if(m_position == m_end)
|
Chris@16
|
486 {
|
Chris@16
|
487 this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
|
Chris@16
|
488 return false;
|
Chris@16
|
489 }
|
Chris@16
|
490 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
|
Chris@16
|
491 #ifndef BOOST_NO_STD_DISTANCE
|
Chris@16
|
492 if(markid && (this->flags() & regbase::save_subexpression_location))
|
Chris@16
|
493 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
|
Chris@16
|
494 #else
|
Chris@16
|
495 if(markid && (this->flags() & regbase::save_subexpression_location))
|
Chris@16
|
496 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
|
Chris@16
|
497 #endif
|
Chris@16
|
498 ++m_position;
|
Chris@16
|
499 //
|
Chris@16
|
500 // append closing parenthesis state:
|
Chris@16
|
501 //
|
Chris@16
|
502 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
|
Chris@16
|
503 pb->index = markid;
|
Chris@16
|
504 pb->icase = this->flags() & regbase::icase;
|
Chris@16
|
505 this->m_paren_start = last_paren_start;
|
Chris@16
|
506 //
|
Chris@16
|
507 // restore the alternate insertion point:
|
Chris@16
|
508 //
|
Chris@16
|
509 this->m_alt_insert_point = last_alt_point;
|
Chris@16
|
510 //
|
Chris@16
|
511 // allow backrefs to this mark:
|
Chris@16
|
512 //
|
Chris@16
|
513 if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
|
Chris@16
|
514 this->m_backrefs |= 1u << (markid - 1);
|
Chris@16
|
515
|
Chris@16
|
516 return true;
|
Chris@16
|
517 }
|
Chris@16
|
518
|
Chris@16
|
519 template <class charT, class traits>
|
Chris@16
|
520 bool basic_regex_parser<charT, traits>::parse_basic_escape()
|
Chris@16
|
521 {
|
Chris@16
|
522 ++m_position;
|
Chris@16
|
523 bool result = true;
|
Chris@16
|
524 switch(this->m_traits.escape_syntax_type(*m_position))
|
Chris@16
|
525 {
|
Chris@16
|
526 case regex_constants::syntax_open_mark:
|
Chris@16
|
527 return parse_open_paren();
|
Chris@16
|
528 case regex_constants::syntax_close_mark:
|
Chris@16
|
529 return false;
|
Chris@16
|
530 case regex_constants::syntax_plus:
|
Chris@16
|
531 if(this->flags() & regex_constants::bk_plus_qm)
|
Chris@16
|
532 {
|
Chris@16
|
533 ++m_position;
|
Chris@16
|
534 return parse_repeat(1);
|
Chris@16
|
535 }
|
Chris@16
|
536 else
|
Chris@16
|
537 return parse_literal();
|
Chris@16
|
538 case regex_constants::syntax_question:
|
Chris@16
|
539 if(this->flags() & regex_constants::bk_plus_qm)
|
Chris@16
|
540 {
|
Chris@16
|
541 ++m_position;
|
Chris@16
|
542 return parse_repeat(0, 1);
|
Chris@16
|
543 }
|
Chris@16
|
544 else
|
Chris@16
|
545 return parse_literal();
|
Chris@16
|
546 case regex_constants::syntax_open_brace:
|
Chris@16
|
547 if(this->flags() & regbase::no_intervals)
|
Chris@16
|
548 return parse_literal();
|
Chris@16
|
549 ++m_position;
|
Chris@16
|
550 return parse_repeat_range(true);
|
Chris@16
|
551 case regex_constants::syntax_close_brace:
|
Chris@16
|
552 if(this->flags() & regbase::no_intervals)
|
Chris@16
|
553 return parse_literal();
|
Chris@16
|
554 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
|
Chris@16
|
555 return false;
|
Chris@16
|
556 case regex_constants::syntax_or:
|
Chris@16
|
557 if(this->flags() & regbase::bk_vbar)
|
Chris@16
|
558 return parse_alt();
|
Chris@16
|
559 else
|
Chris@16
|
560 result = parse_literal();
|
Chris@16
|
561 break;
|
Chris@16
|
562 case regex_constants::syntax_digit:
|
Chris@16
|
563 return parse_backref();
|
Chris@16
|
564 case regex_constants::escape_type_start_buffer:
|
Chris@16
|
565 if(this->flags() & regbase::emacs_ex)
|
Chris@16
|
566 {
|
Chris@16
|
567 ++m_position;
|
Chris@16
|
568 this->append_state(syntax_element_buffer_start);
|
Chris@16
|
569 }
|
Chris@16
|
570 else
|
Chris@16
|
571 result = parse_literal();
|
Chris@16
|
572 break;
|
Chris@16
|
573 case regex_constants::escape_type_end_buffer:
|
Chris@16
|
574 if(this->flags() & regbase::emacs_ex)
|
Chris@16
|
575 {
|
Chris@16
|
576 ++m_position;
|
Chris@16
|
577 this->append_state(syntax_element_buffer_end);
|
Chris@16
|
578 }
|
Chris@16
|
579 else
|
Chris@16
|
580 result = parse_literal();
|
Chris@16
|
581 break;
|
Chris@16
|
582 case regex_constants::escape_type_word_assert:
|
Chris@16
|
583 if(this->flags() & regbase::emacs_ex)
|
Chris@16
|
584 {
|
Chris@16
|
585 ++m_position;
|
Chris@16
|
586 this->append_state(syntax_element_word_boundary);
|
Chris@16
|
587 }
|
Chris@16
|
588 else
|
Chris@16
|
589 result = parse_literal();
|
Chris@16
|
590 break;
|
Chris@16
|
591 case regex_constants::escape_type_not_word_assert:
|
Chris@16
|
592 if(this->flags() & regbase::emacs_ex)
|
Chris@16
|
593 {
|
Chris@16
|
594 ++m_position;
|
Chris@16
|
595 this->append_state(syntax_element_within_word);
|
Chris@16
|
596 }
|
Chris@16
|
597 else
|
Chris@16
|
598 result = parse_literal();
|
Chris@16
|
599 break;
|
Chris@16
|
600 case regex_constants::escape_type_left_word:
|
Chris@16
|
601 if(this->flags() & regbase::emacs_ex)
|
Chris@16
|
602 {
|
Chris@16
|
603 ++m_position;
|
Chris@16
|
604 this->append_state(syntax_element_word_start);
|
Chris@16
|
605 }
|
Chris@16
|
606 else
|
Chris@16
|
607 result = parse_literal();
|
Chris@16
|
608 break;
|
Chris@16
|
609 case regex_constants::escape_type_right_word:
|
Chris@16
|
610 if(this->flags() & regbase::emacs_ex)
|
Chris@16
|
611 {
|
Chris@16
|
612 ++m_position;
|
Chris@16
|
613 this->append_state(syntax_element_word_end);
|
Chris@16
|
614 }
|
Chris@16
|
615 else
|
Chris@16
|
616 result = parse_literal();
|
Chris@16
|
617 break;
|
Chris@16
|
618 default:
|
Chris@16
|
619 if(this->flags() & regbase::emacs_ex)
|
Chris@16
|
620 {
|
Chris@16
|
621 bool negate = true;
|
Chris@16
|
622 switch(*m_position)
|
Chris@16
|
623 {
|
Chris@16
|
624 case 'w':
|
Chris@16
|
625 negate = false;
|
Chris@16
|
626 BOOST_FALLTHROUGH;
|
Chris@16
|
627 case 'W':
|
Chris@16
|
628 {
|
Chris@16
|
629 basic_char_set<charT, traits> char_set;
|
Chris@16
|
630 if(negate)
|
Chris@16
|
631 char_set.negate();
|
Chris@16
|
632 char_set.add_class(this->m_word_mask);
|
Chris@16
|
633 if(0 == this->append_set(char_set))
|
Chris@16
|
634 {
|
Chris@16
|
635 fail(regex_constants::error_ctype, m_position - m_base);
|
Chris@16
|
636 return false;
|
Chris@16
|
637 }
|
Chris@16
|
638 ++m_position;
|
Chris@16
|
639 return true;
|
Chris@16
|
640 }
|
Chris@16
|
641 case 's':
|
Chris@16
|
642 negate = false;
|
Chris@16
|
643 BOOST_FALLTHROUGH;
|
Chris@16
|
644 case 'S':
|
Chris@16
|
645 return add_emacs_code(negate);
|
Chris@16
|
646 case 'c':
|
Chris@16
|
647 case 'C':
|
Chris@16
|
648 // not supported yet:
|
Chris@16
|
649 fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
|
Chris@16
|
650 return false;
|
Chris@16
|
651 default:
|
Chris@16
|
652 break;
|
Chris@16
|
653 }
|
Chris@16
|
654 }
|
Chris@16
|
655 result = parse_literal();
|
Chris@16
|
656 break;
|
Chris@16
|
657 }
|
Chris@16
|
658 return result;
|
Chris@16
|
659 }
|
Chris@16
|
660
|
Chris@16
|
661 template <class charT, class traits>
|
Chris@16
|
662 bool basic_regex_parser<charT, traits>::parse_extended_escape()
|
Chris@16
|
663 {
|
Chris@16
|
664 ++m_position;
|
Chris@16
|
665 if(m_position == m_end)
|
Chris@16
|
666 {
|
Chris@16
|
667 fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
|
Chris@16
|
668 return false;
|
Chris@16
|
669 }
|
Chris@16
|
670 bool negate = false; // in case this is a character class escape: \w \d etc
|
Chris@16
|
671 switch(this->m_traits.escape_syntax_type(*m_position))
|
Chris@16
|
672 {
|
Chris@16
|
673 case regex_constants::escape_type_not_class:
|
Chris@16
|
674 negate = true;
|
Chris@16
|
675 BOOST_FALLTHROUGH;
|
Chris@16
|
676 case regex_constants::escape_type_class:
|
Chris@16
|
677 {
|
Chris@16
|
678 escape_type_class_jump:
|
Chris@16
|
679 typedef typename traits::char_class_type m_type;
|
Chris@16
|
680 m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
|
Chris@16
|
681 if(m != 0)
|
Chris@16
|
682 {
|
Chris@16
|
683 basic_char_set<charT, traits> char_set;
|
Chris@16
|
684 if(negate)
|
Chris@16
|
685 char_set.negate();
|
Chris@16
|
686 char_set.add_class(m);
|
Chris@16
|
687 if(0 == this->append_set(char_set))
|
Chris@16
|
688 {
|
Chris@16
|
689 fail(regex_constants::error_ctype, m_position - m_base);
|
Chris@16
|
690 return false;
|
Chris@16
|
691 }
|
Chris@16
|
692 ++m_position;
|
Chris@16
|
693 return true;
|
Chris@16
|
694 }
|
Chris@16
|
695 //
|
Chris@16
|
696 // not a class, just a regular unknown escape:
|
Chris@16
|
697 //
|
Chris@16
|
698 this->append_literal(unescape_character());
|
Chris@16
|
699 break;
|
Chris@16
|
700 }
|
Chris@16
|
701 case regex_constants::syntax_digit:
|
Chris@16
|
702 return parse_backref();
|
Chris@16
|
703 case regex_constants::escape_type_left_word:
|
Chris@16
|
704 ++m_position;
|
Chris@16
|
705 this->append_state(syntax_element_word_start);
|
Chris@16
|
706 break;
|
Chris@16
|
707 case regex_constants::escape_type_right_word:
|
Chris@16
|
708 ++m_position;
|
Chris@16
|
709 this->append_state(syntax_element_word_end);
|
Chris@16
|
710 break;
|
Chris@16
|
711 case regex_constants::escape_type_start_buffer:
|
Chris@16
|
712 ++m_position;
|
Chris@16
|
713 this->append_state(syntax_element_buffer_start);
|
Chris@16
|
714 break;
|
Chris@16
|
715 case regex_constants::escape_type_end_buffer:
|
Chris@16
|
716 ++m_position;
|
Chris@16
|
717 this->append_state(syntax_element_buffer_end);
|
Chris@16
|
718 break;
|
Chris@16
|
719 case regex_constants::escape_type_word_assert:
|
Chris@16
|
720 ++m_position;
|
Chris@16
|
721 this->append_state(syntax_element_word_boundary);
|
Chris@16
|
722 break;
|
Chris@16
|
723 case regex_constants::escape_type_not_word_assert:
|
Chris@16
|
724 ++m_position;
|
Chris@16
|
725 this->append_state(syntax_element_within_word);
|
Chris@16
|
726 break;
|
Chris@16
|
727 case regex_constants::escape_type_Z:
|
Chris@16
|
728 ++m_position;
|
Chris@16
|
729 this->append_state(syntax_element_soft_buffer_end);
|
Chris@16
|
730 break;
|
Chris@16
|
731 case regex_constants::escape_type_Q:
|
Chris@16
|
732 return parse_QE();
|
Chris@16
|
733 case regex_constants::escape_type_C:
|
Chris@16
|
734 return parse_match_any();
|
Chris@16
|
735 case regex_constants::escape_type_X:
|
Chris@16
|
736 ++m_position;
|
Chris@16
|
737 this->append_state(syntax_element_combining);
|
Chris@16
|
738 break;
|
Chris@16
|
739 case regex_constants::escape_type_G:
|
Chris@16
|
740 ++m_position;
|
Chris@16
|
741 this->append_state(syntax_element_restart_continue);
|
Chris@16
|
742 break;
|
Chris@16
|
743 case regex_constants::escape_type_not_property:
|
Chris@16
|
744 negate = true;
|
Chris@16
|
745 BOOST_FALLTHROUGH;
|
Chris@16
|
746 case regex_constants::escape_type_property:
|
Chris@16
|
747 {
|
Chris@16
|
748 ++m_position;
|
Chris@16
|
749 char_class_type m;
|
Chris@16
|
750 if(m_position == m_end)
|
Chris@16
|
751 {
|
Chris@16
|
752 fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
|
Chris@16
|
753 return false;
|
Chris@16
|
754 }
|
Chris@16
|
755 // maybe have \p{ddd}
|
Chris@16
|
756 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
|
Chris@16
|
757 {
|
Chris@16
|
758 const charT* base = m_position;
|
Chris@16
|
759 // skip forward until we find enclosing brace:
|
Chris@16
|
760 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
|
Chris@16
|
761 ++m_position;
|
Chris@16
|
762 if(m_position == m_end)
|
Chris@16
|
763 {
|
Chris@16
|
764 fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
|
Chris@16
|
765 return false;
|
Chris@16
|
766 }
|
Chris@16
|
767 m = this->m_traits.lookup_classname(++base, m_position++);
|
Chris@16
|
768 }
|
Chris@16
|
769 else
|
Chris@16
|
770 {
|
Chris@16
|
771 m = this->m_traits.lookup_classname(m_position, m_position+1);
|
Chris@16
|
772 ++m_position;
|
Chris@16
|
773 }
|
Chris@16
|
774 if(m != 0)
|
Chris@16
|
775 {
|
Chris@16
|
776 basic_char_set<charT, traits> char_set;
|
Chris@16
|
777 if(negate)
|
Chris@16
|
778 char_set.negate();
|
Chris@16
|
779 char_set.add_class(m);
|
Chris@16
|
780 if(0 == this->append_set(char_set))
|
Chris@16
|
781 {
|
Chris@16
|
782 fail(regex_constants::error_ctype, m_position - m_base);
|
Chris@16
|
783 return false;
|
Chris@16
|
784 }
|
Chris@16
|
785 return true;
|
Chris@16
|
786 }
|
Chris@16
|
787 fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
|
Chris@16
|
788 return false;
|
Chris@16
|
789 }
|
Chris@16
|
790 case regex_constants::escape_type_reset_start_mark:
|
Chris@16
|
791 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
|
Chris@16
|
792 {
|
Chris@16
|
793 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
|
Chris@16
|
794 pb->index = -5;
|
Chris@16
|
795 pb->icase = this->flags() & regbase::icase;
|
Chris@16
|
796 this->m_pdata->m_data.align();
|
Chris@16
|
797 ++m_position;
|
Chris@16
|
798 return true;
|
Chris@16
|
799 }
|
Chris@16
|
800 goto escape_type_class_jump;
|
Chris@16
|
801 case regex_constants::escape_type_line_ending:
|
Chris@16
|
802 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
|
Chris@16
|
803 {
|
Chris@16
|
804 const charT* e = get_escape_R_string<charT>();
|
Chris@16
|
805 const charT* old_position = m_position;
|
Chris@16
|
806 const charT* old_end = m_end;
|
Chris@16
|
807 const charT* old_base = m_base;
|
Chris@16
|
808 m_position = e;
|
Chris@16
|
809 m_base = e;
|
Chris@16
|
810 m_end = e + traits::length(e);
|
Chris@16
|
811 bool r = parse_all();
|
Chris@16
|
812 m_position = ++old_position;
|
Chris@16
|
813 m_end = old_end;
|
Chris@16
|
814 m_base = old_base;
|
Chris@16
|
815 return r;
|
Chris@16
|
816 }
|
Chris@16
|
817 goto escape_type_class_jump;
|
Chris@16
|
818 case regex_constants::escape_type_extended_backref:
|
Chris@16
|
819 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
|
Chris@16
|
820 {
|
Chris@16
|
821 bool have_brace = false;
|
Chris@16
|
822 bool negative = false;
|
Chris@16
|
823 static const char* incomplete_message = "Incomplete \\g escape found.";
|
Chris@16
|
824 if(++m_position == m_end)
|
Chris@16
|
825 {
|
Chris@16
|
826 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
|
Chris@16
|
827 return false;
|
Chris@16
|
828 }
|
Chris@16
|
829 // maybe have \g{ddd}
|
Chris@16
|
830 regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
|
Chris@16
|
831 regex_constants::syntax_type syn_end = 0;
|
Chris@16
|
832 if((syn == regex_constants::syntax_open_brace)
|
Chris@16
|
833 || (syn == regex_constants::escape_type_left_word)
|
Chris@16
|
834 || (syn == regex_constants::escape_type_end_buffer))
|
Chris@16
|
835 {
|
Chris@16
|
836 if(++m_position == m_end)
|
Chris@16
|
837 {
|
Chris@16
|
838 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
|
Chris@16
|
839 return false;
|
Chris@16
|
840 }
|
Chris@16
|
841 have_brace = true;
|
Chris@16
|
842 switch(syn)
|
Chris@16
|
843 {
|
Chris@16
|
844 case regex_constants::syntax_open_brace:
|
Chris@16
|
845 syn_end = regex_constants::syntax_close_brace;
|
Chris@16
|
846 break;
|
Chris@16
|
847 case regex_constants::escape_type_left_word:
|
Chris@16
|
848 syn_end = regex_constants::escape_type_right_word;
|
Chris@16
|
849 break;
|
Chris@16
|
850 default:
|
Chris@16
|
851 syn_end = regex_constants::escape_type_end_buffer;
|
Chris@16
|
852 break;
|
Chris@16
|
853 }
|
Chris@16
|
854 }
|
Chris@16
|
855 negative = (*m_position == static_cast<charT>('-'));
|
Chris@16
|
856 if((negative) && (++m_position == m_end))
|
Chris@16
|
857 {
|
Chris@16
|
858 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
|
Chris@16
|
859 return false;
|
Chris@16
|
860 }
|
Chris@16
|
861 const charT* pc = m_position;
|
Chris@16
|
862 int i = this->m_traits.toi(pc, m_end, 10);
|
Chris@16
|
863 if((i < 0) && syn_end)
|
Chris@16
|
864 {
|
Chris@16
|
865 // Check for a named capture, get the leftmost one if there is more than one:
|
Chris@16
|
866 const charT* base = m_position;
|
Chris@16
|
867 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
|
Chris@16
|
868 {
|
Chris@16
|
869 ++m_position;
|
Chris@16
|
870 }
|
Chris@16
|
871 i = hash_value_from_capture_name(base, m_position);
|
Chris@16
|
872 pc = m_position;
|
Chris@16
|
873 }
|
Chris@16
|
874 if(negative)
|
Chris@16
|
875 i = 1 + m_mark_count - i;
|
Chris@16
|
876 if(((i > 0) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
|
Chris@16
|
877 {
|
Chris@16
|
878 m_position = pc;
|
Chris@16
|
879 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
|
Chris@16
|
880 pb->index = i;
|
Chris@16
|
881 pb->icase = this->flags() & regbase::icase;
|
Chris@16
|
882 }
|
Chris@16
|
883 else
|
Chris@16
|
884 {
|
Chris@16
|
885 fail(regex_constants::error_backref, m_position - m_base);
|
Chris@16
|
886 return false;
|
Chris@16
|
887 }
|
Chris@16
|
888 m_position = pc;
|
Chris@16
|
889 if(have_brace)
|
Chris@16
|
890 {
|
Chris@16
|
891 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
|
Chris@16
|
892 {
|
Chris@16
|
893 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
|
Chris@16
|
894 return false;
|
Chris@16
|
895 }
|
Chris@16
|
896 ++m_position;
|
Chris@16
|
897 }
|
Chris@16
|
898 return true;
|
Chris@16
|
899 }
|
Chris@16
|
900 goto escape_type_class_jump;
|
Chris@16
|
901 case regex_constants::escape_type_control_v:
|
Chris@16
|
902 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
|
Chris@16
|
903 goto escape_type_class_jump;
|
Chris@16
|
904 BOOST_FALLTHROUGH;
|
Chris@16
|
905 default:
|
Chris@16
|
906 this->append_literal(unescape_character());
|
Chris@16
|
907 break;
|
Chris@16
|
908 }
|
Chris@16
|
909 return true;
|
Chris@16
|
910 }
|
Chris@16
|
911
|
Chris@16
|
912 template <class charT, class traits>
|
Chris@16
|
913 bool basic_regex_parser<charT, traits>::parse_match_any()
|
Chris@16
|
914 {
|
Chris@16
|
915 //
|
Chris@16
|
916 // we have a '.' that can match any character:
|
Chris@16
|
917 //
|
Chris@16
|
918 ++m_position;
|
Chris@16
|
919 static_cast<re_dot*>(
|
Chris@16
|
920 this->append_state(syntax_element_wild, sizeof(re_dot))
|
Chris@16
|
921 )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
|
Chris@16
|
922 ? re_detail::force_not_newline
|
Chris@16
|
923 : this->flags() & regbase::mod_s ?
|
Chris@16
|
924 re_detail::force_newline : re_detail::dont_care);
|
Chris@16
|
925 return true;
|
Chris@16
|
926 }
|
Chris@16
|
927
|
Chris@16
|
928 template <class charT, class traits>
|
Chris@16
|
929 bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
|
Chris@16
|
930 {
|
Chris@16
|
931 bool greedy = true;
|
Chris@16
|
932 bool pocessive = false;
|
Chris@16
|
933 std::size_t insert_point;
|
Chris@16
|
934 //
|
Chris@16
|
935 // when we get to here we may have a non-greedy ? mark still to come:
|
Chris@16
|
936 //
|
Chris@16
|
937 if((m_position != m_end)
|
Chris@16
|
938 && (
|
Chris@16
|
939 (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
|
Chris@16
|
940 || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
|
Chris@16
|
941 )
|
Chris@16
|
942 )
|
Chris@16
|
943 {
|
Chris@16
|
944 // OK we have a perl or emacs regex, check for a '?':
|
Chris@16
|
945 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
|
Chris@16
|
946 {
|
Chris@16
|
947 greedy = false;
|
Chris@16
|
948 ++m_position;
|
Chris@16
|
949 }
|
Chris@16
|
950 // for perl regexes only check for pocessive ++ repeats.
|
Chris@16
|
951 if((m_position != m_end)
|
Chris@16
|
952 && (0 == (this->flags() & regbase::main_option_type))
|
Chris@16
|
953 && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
|
Chris@16
|
954 {
|
Chris@16
|
955 pocessive = true;
|
Chris@16
|
956 ++m_position;
|
Chris@16
|
957 }
|
Chris@16
|
958 }
|
Chris@16
|
959 if(0 == this->m_last_state)
|
Chris@16
|
960 {
|
Chris@16
|
961 fail(regex_constants::error_badrepeat, ::boost::re_detail::distance(m_base, m_position), "Nothing to repeat.");
|
Chris@16
|
962 return false;
|
Chris@16
|
963 }
|
Chris@16
|
964 if(this->m_last_state->type == syntax_element_endmark)
|
Chris@16
|
965 {
|
Chris@16
|
966 // insert a repeat before the '(' matching the last ')':
|
Chris@16
|
967 insert_point = this->m_paren_start;
|
Chris@16
|
968 }
|
Chris@16
|
969 else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
|
Chris@16
|
970 {
|
Chris@16
|
971 // the last state was a literal with more than one character, split it in two:
|
Chris@16
|
972 re_literal* lit = static_cast<re_literal*>(this->m_last_state);
|
Chris@16
|
973 charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
|
Chris@16
|
974 --(lit->length);
|
Chris@16
|
975 // now append new state:
|
Chris@16
|
976 lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
|
Chris@16
|
977 lit->length = 1;
|
Chris@16
|
978 (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
|
Chris@16
|
979 insert_point = this->getoffset(this->m_last_state);
|
Chris@16
|
980 }
|
Chris@16
|
981 else
|
Chris@16
|
982 {
|
Chris@16
|
983 // repeat the last state whatever it was, need to add some error checking here:
|
Chris@16
|
984 switch(this->m_last_state->type)
|
Chris@16
|
985 {
|
Chris@16
|
986 case syntax_element_start_line:
|
Chris@16
|
987 case syntax_element_end_line:
|
Chris@16
|
988 case syntax_element_word_boundary:
|
Chris@16
|
989 case syntax_element_within_word:
|
Chris@16
|
990 case syntax_element_word_start:
|
Chris@16
|
991 case syntax_element_word_end:
|
Chris@16
|
992 case syntax_element_buffer_start:
|
Chris@16
|
993 case syntax_element_buffer_end:
|
Chris@16
|
994 case syntax_element_alt:
|
Chris@16
|
995 case syntax_element_soft_buffer_end:
|
Chris@16
|
996 case syntax_element_restart_continue:
|
Chris@16
|
997 case syntax_element_jump:
|
Chris@16
|
998 case syntax_element_startmark:
|
Chris@16
|
999 case syntax_element_backstep:
|
Chris@16
|
1000 // can't legally repeat any of the above:
|
Chris@16
|
1001 fail(regex_constants::error_badrepeat, m_position - m_base);
|
Chris@16
|
1002 return false;
|
Chris@16
|
1003 default:
|
Chris@16
|
1004 // do nothing...
|
Chris@16
|
1005 break;
|
Chris@16
|
1006 }
|
Chris@16
|
1007 insert_point = this->getoffset(this->m_last_state);
|
Chris@16
|
1008 }
|
Chris@16
|
1009 //
|
Chris@16
|
1010 // OK we now know what to repeat, so insert the repeat around it:
|
Chris@16
|
1011 //
|
Chris@16
|
1012 re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
|
Chris@16
|
1013 rep->min = low;
|
Chris@16
|
1014 rep->max = high;
|
Chris@16
|
1015 rep->greedy = greedy;
|
Chris@16
|
1016 rep->leading = false;
|
Chris@16
|
1017 // store our repeater position for later:
|
Chris@16
|
1018 std::ptrdiff_t rep_off = this->getoffset(rep);
|
Chris@16
|
1019 // and append a back jump to the repeat:
|
Chris@16
|
1020 re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
|
Chris@16
|
1021 jmp->alt.i = rep_off - this->getoffset(jmp);
|
Chris@16
|
1022 this->m_pdata->m_data.align();
|
Chris@16
|
1023 // now fill in the alt jump for the repeat:
|
Chris@16
|
1024 rep = static_cast<re_repeat*>(this->getaddress(rep_off));
|
Chris@16
|
1025 rep->alt.i = this->m_pdata->m_data.size() - rep_off;
|
Chris@16
|
1026 //
|
Chris@16
|
1027 // If the repeat is pocessive then bracket the repeat with a (?>...)
|
Chris@16
|
1028 // independent sub-expression construct:
|
Chris@16
|
1029 //
|
Chris@16
|
1030 if(pocessive)
|
Chris@16
|
1031 {
|
Chris@16
|
1032 if(m_position != m_end)
|
Chris@16
|
1033 {
|
Chris@16
|
1034 //
|
Chris@16
|
1035 // Check for illegal following quantifier, we have to do this here, because
|
Chris@16
|
1036 // the extra states we insert below circumvents our usual error checking :-(
|
Chris@16
|
1037 //
|
Chris@16
|
1038 switch(this->m_traits.syntax_type(*m_position))
|
Chris@16
|
1039 {
|
Chris@16
|
1040 case regex_constants::syntax_star:
|
Chris@16
|
1041 case regex_constants::syntax_plus:
|
Chris@16
|
1042 case regex_constants::syntax_question:
|
Chris@16
|
1043 case regex_constants::syntax_open_brace:
|
Chris@16
|
1044 fail(regex_constants::error_badrepeat, m_position - m_base);
|
Chris@16
|
1045 return false;
|
Chris@16
|
1046 }
|
Chris@16
|
1047 }
|
Chris@16
|
1048 re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
|
Chris@16
|
1049 pb->index = -3;
|
Chris@16
|
1050 pb->icase = this->flags() & regbase::icase;
|
Chris@16
|
1051 jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
|
Chris@16
|
1052 this->m_pdata->m_data.align();
|
Chris@16
|
1053 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
|
Chris@16
|
1054 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
|
Chris@16
|
1055 pb->index = -3;
|
Chris@16
|
1056 pb->icase = this->flags() & regbase::icase;
|
Chris@16
|
1057 }
|
Chris@16
|
1058 return true;
|
Chris@16
|
1059 }
|
Chris@16
|
1060
|
Chris@16
|
1061 template <class charT, class traits>
|
Chris@16
|
1062 bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
|
Chris@16
|
1063 {
|
Chris@16
|
1064 static const char* incomplete_message = "Missing } in quantified repetition.";
|
Chris@16
|
1065 //
|
Chris@16
|
1066 // parse a repeat-range:
|
Chris@16
|
1067 //
|
Chris@16
|
1068 std::size_t min, max;
|
Chris@16
|
1069 int v;
|
Chris@16
|
1070 // skip whitespace:
|
Chris@16
|
1071 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
|
Chris@16
|
1072 ++m_position;
|
Chris@16
|
1073 if(this->m_position == this->m_end)
|
Chris@16
|
1074 {
|
Chris@16
|
1075 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
|
Chris@16
|
1076 {
|
Chris@16
|
1077 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
|
Chris@16
|
1078 return false;
|
Chris@16
|
1079 }
|
Chris@16
|
1080 // Treat the opening '{' as a literal character, rewind to start of error:
|
Chris@16
|
1081 --m_position;
|
Chris@16
|
1082 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
|
Chris@16
|
1083 return parse_literal();
|
Chris@16
|
1084 }
|
Chris@16
|
1085 // get min:
|
Chris@16
|
1086 v = this->m_traits.toi(m_position, m_end, 10);
|
Chris@16
|
1087 // skip whitespace:
|
Chris@16
|
1088 if(v < 0)
|
Chris@16
|
1089 {
|
Chris@16
|
1090 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
|
Chris@16
|
1091 {
|
Chris@16
|
1092 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
|
Chris@16
|
1093 return false;
|
Chris@16
|
1094 }
|
Chris@16
|
1095 // Treat the opening '{' as a literal character, rewind to start of error:
|
Chris@16
|
1096 --m_position;
|
Chris@16
|
1097 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
|
Chris@16
|
1098 return parse_literal();
|
Chris@16
|
1099 }
|
Chris@16
|
1100 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
|
Chris@16
|
1101 ++m_position;
|
Chris@16
|
1102 if(this->m_position == this->m_end)
|
Chris@16
|
1103 {
|
Chris@16
|
1104 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
|
Chris@16
|
1105 {
|
Chris@16
|
1106 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
|
Chris@16
|
1107 return false;
|
Chris@16
|
1108 }
|
Chris@16
|
1109 // Treat the opening '{' as a literal character, rewind to start of error:
|
Chris@16
|
1110 --m_position;
|
Chris@16
|
1111 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
|
Chris@16
|
1112 return parse_literal();
|
Chris@16
|
1113 }
|
Chris@16
|
1114 min = v;
|
Chris@16
|
1115 // see if we have a comma:
|
Chris@16
|
1116 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
|
Chris@16
|
1117 {
|
Chris@16
|
1118 // move on and error check:
|
Chris@16
|
1119 ++m_position;
|
Chris@16
|
1120 // skip whitespace:
|
Chris@16
|
1121 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
|
Chris@16
|
1122 ++m_position;
|
Chris@16
|
1123 if(this->m_position == this->m_end)
|
Chris@16
|
1124 {
|
Chris@16
|
1125 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
|
Chris@16
|
1126 {
|
Chris@16
|
1127 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
|
Chris@16
|
1128 return false;
|
Chris@16
|
1129 }
|
Chris@16
|
1130 // Treat the opening '{' as a literal character, rewind to start of error:
|
Chris@16
|
1131 --m_position;
|
Chris@16
|
1132 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
|
Chris@16
|
1133 return parse_literal();
|
Chris@16
|
1134 }
|
Chris@16
|
1135 // get the value if any:
|
Chris@16
|
1136 v = this->m_traits.toi(m_position, m_end, 10);
|
Chris@16
|
1137 max = (v >= 0) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
|
Chris@16
|
1138 }
|
Chris@16
|
1139 else
|
Chris@16
|
1140 {
|
Chris@16
|
1141 // no comma, max = min:
|
Chris@16
|
1142 max = min;
|
Chris@16
|
1143 }
|
Chris@16
|
1144 // skip whitespace:
|
Chris@16
|
1145 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
|
Chris@16
|
1146 ++m_position;
|
Chris@16
|
1147 // OK now check trailing }:
|
Chris@16
|
1148 if(this->m_position == this->m_end)
|
Chris@16
|
1149 {
|
Chris@16
|
1150 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
|
Chris@16
|
1151 {
|
Chris@16
|
1152 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
|
Chris@16
|
1153 return false;
|
Chris@16
|
1154 }
|
Chris@16
|
1155 // Treat the opening '{' as a literal character, rewind to start of error:
|
Chris@16
|
1156 --m_position;
|
Chris@16
|
1157 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
|
Chris@16
|
1158 return parse_literal();
|
Chris@16
|
1159 }
|
Chris@16
|
1160 if(isbasic)
|
Chris@16
|
1161 {
|
Chris@16
|
1162 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
|
Chris@16
|
1163 {
|
Chris@16
|
1164 ++m_position;
|
Chris@16
|
1165 if(this->m_position == this->m_end)
|
Chris@16
|
1166 {
|
Chris@16
|
1167 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
|
Chris@16
|
1168 return false;
|
Chris@16
|
1169 }
|
Chris@16
|
1170 }
|
Chris@16
|
1171 else
|
Chris@16
|
1172 {
|
Chris@16
|
1173 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
|
Chris@16
|
1174 return false;
|
Chris@16
|
1175 }
|
Chris@16
|
1176 }
|
Chris@16
|
1177 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
|
Chris@16
|
1178 ++m_position;
|
Chris@16
|
1179 else
|
Chris@16
|
1180 {
|
Chris@16
|
1181 // Treat the opening '{' as a literal character, rewind to start of error:
|
Chris@16
|
1182 --m_position;
|
Chris@16
|
1183 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
|
Chris@16
|
1184 return parse_literal();
|
Chris@16
|
1185 }
|
Chris@16
|
1186 //
|
Chris@16
|
1187 // finally go and add the repeat, unless error:
|
Chris@16
|
1188 //
|
Chris@16
|
1189 if(min > max)
|
Chris@16
|
1190 {
|
Chris@16
|
1191 // Backtrack to error location:
|
Chris@16
|
1192 m_position -= 2;
|
Chris@16
|
1193 while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
|
Chris@16
|
1194 ++m_position;
|
Chris@16
|
1195 fail(regex_constants::error_badbrace, m_position - m_base);
|
Chris@16
|
1196 return false;
|
Chris@16
|
1197 }
|
Chris@16
|
1198 return parse_repeat(min, max);
|
Chris@16
|
1199 }
|
Chris@16
|
1200
|
Chris@16
|
1201 template <class charT, class traits>
|
Chris@16
|
1202 bool basic_regex_parser<charT, traits>::parse_alt()
|
Chris@16
|
1203 {
|
Chris@16
|
1204 //
|
Chris@16
|
1205 // error check: if there have been no previous states,
|
Chris@16
|
1206 // or if the last state was a '(' then error:
|
Chris@16
|
1207 //
|
Chris@16
|
1208 if(
|
Chris@16
|
1209 ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
|
Chris@16
|
1210 &&
|
Chris@16
|
1211 !(
|
Chris@16
|
1212 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
|
Chris@16
|
1213 &&
|
Chris@16
|
1214 ((this->flags() & regbase::no_empty_expressions) == 0)
|
Chris@16
|
1215 )
|
Chris@16
|
1216 )
|
Chris@16
|
1217 {
|
Chris@16
|
1218 fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression can start with the alternation operator |.");
|
Chris@16
|
1219 return false;
|
Chris@16
|
1220 }
|
Chris@16
|
1221 //
|
Chris@16
|
1222 // Reset mark count if required:
|
Chris@16
|
1223 //
|
Chris@16
|
1224 if(m_max_mark < m_mark_count)
|
Chris@16
|
1225 m_max_mark = m_mark_count;
|
Chris@16
|
1226 if(m_mark_reset >= 0)
|
Chris@16
|
1227 m_mark_count = m_mark_reset;
|
Chris@16
|
1228
|
Chris@16
|
1229 ++m_position;
|
Chris@16
|
1230 //
|
Chris@16
|
1231 // we need to append a trailing jump:
|
Chris@16
|
1232 //
|
Chris@16
|
1233 re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump));
|
Chris@16
|
1234 std::ptrdiff_t jump_offset = this->getoffset(pj);
|
Chris@16
|
1235 //
|
Chris@16
|
1236 // now insert the alternative:
|
Chris@16
|
1237 //
|
Chris@16
|
1238 re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
|
Chris@16
|
1239 jump_offset += re_alt_size;
|
Chris@16
|
1240 this->m_pdata->m_data.align();
|
Chris@16
|
1241 palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
|
Chris@16
|
1242 //
|
Chris@16
|
1243 // update m_alt_insert_point so that the next alternate gets
|
Chris@16
|
1244 // inserted at the start of the second of the two we've just created:
|
Chris@16
|
1245 //
|
Chris@16
|
1246 this->m_alt_insert_point = this->m_pdata->m_data.size();
|
Chris@16
|
1247 //
|
Chris@16
|
1248 // the start of this alternative must have a case changes state
|
Chris@16
|
1249 // if the current block has messed around with case changes:
|
Chris@16
|
1250 //
|
Chris@16
|
1251 if(m_has_case_change)
|
Chris@16
|
1252 {
|
Chris@16
|
1253 static_cast<re_case*>(
|
Chris@16
|
1254 this->append_state(syntax_element_toggle_case, sizeof(re_case))
|
Chris@16
|
1255 )->icase = this->m_icase;
|
Chris@16
|
1256 }
|
Chris@16
|
1257 //
|
Chris@16
|
1258 // push the alternative onto our stack, a recursive
|
Chris@16
|
1259 // implementation here is easier to understand (and faster
|
Chris@16
|
1260 // as it happens), but causes all kinds of stack overflow problems
|
Chris@16
|
1261 // on programs with small stacks (COM+).
|
Chris@16
|
1262 //
|
Chris@16
|
1263 m_alt_jumps.push_back(jump_offset);
|
Chris@16
|
1264 return true;
|
Chris@16
|
1265 }
|
Chris@16
|
1266
|
Chris@16
|
1267 template <class charT, class traits>
|
Chris@16
|
1268 bool basic_regex_parser<charT, traits>::parse_set()
|
Chris@16
|
1269 {
|
Chris@16
|
1270 static const char* incomplete_message = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
|
Chris@16
|
1271 ++m_position;
|
Chris@16
|
1272 if(m_position == m_end)
|
Chris@16
|
1273 {
|
Chris@16
|
1274 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
|
Chris@16
|
1275 return false;
|
Chris@16
|
1276 }
|
Chris@16
|
1277 basic_char_set<charT, traits> char_set;
|
Chris@16
|
1278
|
Chris@16
|
1279 const charT* base = m_position; // where the '[' was
|
Chris@16
|
1280 const charT* item_base = m_position; // where the '[' or '^' was
|
Chris@16
|
1281
|
Chris@16
|
1282 while(m_position != m_end)
|
Chris@16
|
1283 {
|
Chris@16
|
1284 switch(this->m_traits.syntax_type(*m_position))
|
Chris@16
|
1285 {
|
Chris@16
|
1286 case regex_constants::syntax_caret:
|
Chris@16
|
1287 if(m_position == base)
|
Chris@16
|
1288 {
|
Chris@16
|
1289 char_set.negate();
|
Chris@16
|
1290 ++m_position;
|
Chris@16
|
1291 item_base = m_position;
|
Chris@16
|
1292 }
|
Chris@16
|
1293 else
|
Chris@16
|
1294 parse_set_literal(char_set);
|
Chris@16
|
1295 break;
|
Chris@16
|
1296 case regex_constants::syntax_close_set:
|
Chris@16
|
1297 if(m_position == item_base)
|
Chris@16
|
1298 {
|
Chris@16
|
1299 parse_set_literal(char_set);
|
Chris@16
|
1300 break;
|
Chris@16
|
1301 }
|
Chris@16
|
1302 else
|
Chris@16
|
1303 {
|
Chris@16
|
1304 ++m_position;
|
Chris@16
|
1305 if(0 == this->append_set(char_set))
|
Chris@16
|
1306 {
|
Chris@16
|
1307 fail(regex_constants::error_ctype, m_position - m_base);
|
Chris@16
|
1308 return false;
|
Chris@16
|
1309 }
|
Chris@16
|
1310 }
|
Chris@16
|
1311 return true;
|
Chris@16
|
1312 case regex_constants::syntax_open_set:
|
Chris@16
|
1313 if(parse_inner_set(char_set))
|
Chris@16
|
1314 break;
|
Chris@16
|
1315 return true;
|
Chris@16
|
1316 case regex_constants::syntax_escape:
|
Chris@16
|
1317 {
|
Chris@16
|
1318 //
|
Chris@16
|
1319 // look ahead and see if this is a character class shortcut
|
Chris@16
|
1320 // \d \w \s etc...
|
Chris@16
|
1321 //
|
Chris@16
|
1322 ++m_position;
|
Chris@16
|
1323 if(this->m_traits.escape_syntax_type(*m_position)
|
Chris@16
|
1324 == regex_constants::escape_type_class)
|
Chris@16
|
1325 {
|
Chris@16
|
1326 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
|
Chris@16
|
1327 if(m != 0)
|
Chris@16
|
1328 {
|
Chris@16
|
1329 char_set.add_class(m);
|
Chris@16
|
1330 ++m_position;
|
Chris@16
|
1331 break;
|
Chris@16
|
1332 }
|
Chris@16
|
1333 }
|
Chris@16
|
1334 else if(this->m_traits.escape_syntax_type(*m_position)
|
Chris@16
|
1335 == regex_constants::escape_type_not_class)
|
Chris@16
|
1336 {
|
Chris@16
|
1337 // negated character class:
|
Chris@16
|
1338 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
|
Chris@16
|
1339 if(m != 0)
|
Chris@16
|
1340 {
|
Chris@16
|
1341 char_set.add_negated_class(m);
|
Chris@16
|
1342 ++m_position;
|
Chris@16
|
1343 break;
|
Chris@16
|
1344 }
|
Chris@16
|
1345 }
|
Chris@16
|
1346 // not a character class, just a regular escape:
|
Chris@16
|
1347 --m_position;
|
Chris@16
|
1348 parse_set_literal(char_set);
|
Chris@16
|
1349 break;
|
Chris@16
|
1350 }
|
Chris@16
|
1351 default:
|
Chris@16
|
1352 parse_set_literal(char_set);
|
Chris@16
|
1353 break;
|
Chris@16
|
1354 }
|
Chris@16
|
1355 }
|
Chris@16
|
1356 return m_position != m_end;
|
Chris@16
|
1357 }
|
Chris@16
|
1358
|
Chris@16
|
1359 template <class charT, class traits>
|
Chris@16
|
1360 bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
|
Chris@16
|
1361 {
|
Chris@16
|
1362 static const char* incomplete_message = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
|
Chris@16
|
1363 //
|
Chris@16
|
1364 // we have either a character class [:name:]
|
Chris@16
|
1365 // a collating element [.name.]
|
Chris@16
|
1366 // or an equivalence class [=name=]
|
Chris@16
|
1367 //
|
Chris@16
|
1368 if(m_end == ++m_position)
|
Chris@16
|
1369 {
|
Chris@16
|
1370 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
|
Chris@16
|
1371 return false;
|
Chris@16
|
1372 }
|
Chris@16
|
1373 switch(this->m_traits.syntax_type(*m_position))
|
Chris@16
|
1374 {
|
Chris@16
|
1375 case regex_constants::syntax_dot:
|
Chris@16
|
1376 //
|
Chris@16
|
1377 // a collating element is treated as a literal:
|
Chris@16
|
1378 //
|
Chris@16
|
1379 --m_position;
|
Chris@16
|
1380 parse_set_literal(char_set);
|
Chris@16
|
1381 return true;
|
Chris@16
|
1382 case regex_constants::syntax_colon:
|
Chris@16
|
1383 {
|
Chris@16
|
1384 // check that character classes are actually enabled:
|
Chris@16
|
1385 if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
|
Chris@16
|
1386 == (regbase::basic_syntax_group | regbase::no_char_classes))
|
Chris@16
|
1387 {
|
Chris@16
|
1388 --m_position;
|
Chris@16
|
1389 parse_set_literal(char_set);
|
Chris@16
|
1390 return true;
|
Chris@16
|
1391 }
|
Chris@16
|
1392 // skip the ':'
|
Chris@16
|
1393 if(m_end == ++m_position)
|
Chris@16
|
1394 {
|
Chris@16
|
1395 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
|
Chris@16
|
1396 return false;
|
Chris@16
|
1397 }
|
Chris@16
|
1398 const charT* name_first = m_position;
|
Chris@16
|
1399 // skip at least one character, then find the matching ':]'
|
Chris@16
|
1400 if(m_end == ++m_position)
|
Chris@16
|
1401 {
|
Chris@16
|
1402 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
|
Chris@16
|
1403 return false;
|
Chris@16
|
1404 }
|
Chris@16
|
1405 while((m_position != m_end)
|
Chris@16
|
1406 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
|
Chris@16
|
1407 ++m_position;
|
Chris@16
|
1408 const charT* name_last = m_position;
|
Chris@16
|
1409 if(m_end == m_position)
|
Chris@16
|
1410 {
|
Chris@16
|
1411 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
|
Chris@16
|
1412 return false;
|
Chris@16
|
1413 }
|
Chris@16
|
1414 if((m_end == ++m_position)
|
Chris@16
|
1415 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
|
Chris@16
|
1416 {
|
Chris@16
|
1417 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
|
Chris@16
|
1418 return false;
|
Chris@16
|
1419 }
|
Chris@16
|
1420 //
|
Chris@16
|
1421 // check for negated class:
|
Chris@16
|
1422 //
|
Chris@16
|
1423 bool negated = false;
|
Chris@16
|
1424 if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
|
Chris@16
|
1425 {
|
Chris@16
|
1426 ++name_first;
|
Chris@16
|
1427 negated = true;
|
Chris@16
|
1428 }
|
Chris@16
|
1429 typedef typename traits::char_class_type m_type;
|
Chris@16
|
1430 m_type m = this->m_traits.lookup_classname(name_first, name_last);
|
Chris@16
|
1431 if(m == 0)
|
Chris@16
|
1432 {
|
Chris@16
|
1433 if(char_set.empty() && (name_last - name_first == 1))
|
Chris@16
|
1434 {
|
Chris@16
|
1435 // maybe a special case:
|
Chris@16
|
1436 ++m_position;
|
Chris@16
|
1437 if( (m_position != m_end)
|
Chris@16
|
1438 && (this->m_traits.syntax_type(*m_position)
|
Chris@16
|
1439 == regex_constants::syntax_close_set))
|
Chris@16
|
1440 {
|
Chris@16
|
1441 if(this->m_traits.escape_syntax_type(*name_first)
|
Chris@16
|
1442 == regex_constants::escape_type_left_word)
|
Chris@16
|
1443 {
|
Chris@16
|
1444 ++m_position;
|
Chris@16
|
1445 this->append_state(syntax_element_word_start);
|
Chris@16
|
1446 return false;
|
Chris@16
|
1447 }
|
Chris@16
|
1448 if(this->m_traits.escape_syntax_type(*name_first)
|
Chris@16
|
1449 == regex_constants::escape_type_right_word)
|
Chris@16
|
1450 {
|
Chris@16
|
1451 ++m_position;
|
Chris@16
|
1452 this->append_state(syntax_element_word_end);
|
Chris@16
|
1453 return false;
|
Chris@16
|
1454 }
|
Chris@16
|
1455 }
|
Chris@16
|
1456 }
|
Chris@16
|
1457 fail(regex_constants::error_ctype, name_first - m_base);
|
Chris@16
|
1458 return false;
|
Chris@16
|
1459 }
|
Chris@16
|
1460 if(negated == false)
|
Chris@16
|
1461 char_set.add_class(m);
|
Chris@16
|
1462 else
|
Chris@16
|
1463 char_set.add_negated_class(m);
|
Chris@16
|
1464 ++m_position;
|
Chris@16
|
1465 break;
|
Chris@16
|
1466 }
|
Chris@16
|
1467 case regex_constants::syntax_equal:
|
Chris@16
|
1468 {
|
Chris@16
|
1469 // skip the '='
|
Chris@16
|
1470 if(m_end == ++m_position)
|
Chris@16
|
1471 {
|
Chris@16
|
1472 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
|
Chris@16
|
1473 return false;
|
Chris@16
|
1474 }
|
Chris@16
|
1475 const charT* name_first = m_position;
|
Chris@16
|
1476 // skip at least one character, then find the matching '=]'
|
Chris@16
|
1477 if(m_end == ++m_position)
|
Chris@16
|
1478 {
|
Chris@16
|
1479 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
|
Chris@16
|
1480 return false;
|
Chris@16
|
1481 }
|
Chris@16
|
1482 while((m_position != m_end)
|
Chris@16
|
1483 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
|
Chris@16
|
1484 ++m_position;
|
Chris@16
|
1485 const charT* name_last = m_position;
|
Chris@16
|
1486 if(m_end == m_position)
|
Chris@16
|
1487 {
|
Chris@16
|
1488 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
|
Chris@16
|
1489 return false;
|
Chris@16
|
1490 }
|
Chris@16
|
1491 if((m_end == ++m_position)
|
Chris@16
|
1492 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
|
Chris@16
|
1493 {
|
Chris@16
|
1494 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
|
Chris@16
|
1495 return false;
|
Chris@16
|
1496 }
|
Chris@16
|
1497 string_type m = this->m_traits.lookup_collatename(name_first, name_last);
|
Chris@16
|
1498 if((0 == m.size()) || (m.size() > 2))
|
Chris@16
|
1499 {
|
Chris@16
|
1500 fail(regex_constants::error_collate, name_first - m_base);
|
Chris@16
|
1501 return false;
|
Chris@16
|
1502 }
|
Chris@16
|
1503 digraph<charT> d;
|
Chris@16
|
1504 d.first = m[0];
|
Chris@16
|
1505 if(m.size() > 1)
|
Chris@16
|
1506 d.second = m[1];
|
Chris@16
|
1507 else
|
Chris@16
|
1508 d.second = 0;
|
Chris@16
|
1509 char_set.add_equivalent(d);
|
Chris@16
|
1510 ++m_position;
|
Chris@16
|
1511 break;
|
Chris@16
|
1512 }
|
Chris@16
|
1513 default:
|
Chris@16
|
1514 --m_position;
|
Chris@16
|
1515 parse_set_literal(char_set);
|
Chris@16
|
1516 break;
|
Chris@16
|
1517 }
|
Chris@16
|
1518 return true;
|
Chris@16
|
1519 }
|
Chris@16
|
1520
|
Chris@16
|
1521 template <class charT, class traits>
|
Chris@16
|
1522 void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
|
Chris@16
|
1523 {
|
Chris@16
|
1524 digraph<charT> start_range(get_next_set_literal(char_set));
|
Chris@16
|
1525 if(m_end == m_position)
|
Chris@16
|
1526 {
|
Chris@16
|
1527 fail(regex_constants::error_brack, m_position - m_base);
|
Chris@16
|
1528 return;
|
Chris@16
|
1529 }
|
Chris@16
|
1530 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
|
Chris@16
|
1531 {
|
Chris@16
|
1532 // we have a range:
|
Chris@16
|
1533 if(m_end == ++m_position)
|
Chris@16
|
1534 {
|
Chris@16
|
1535 fail(regex_constants::error_brack, m_position - m_base);
|
Chris@16
|
1536 return;
|
Chris@16
|
1537 }
|
Chris@16
|
1538 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
|
Chris@16
|
1539 {
|
Chris@16
|
1540 digraph<charT> end_range = get_next_set_literal(char_set);
|
Chris@16
|
1541 char_set.add_range(start_range, end_range);
|
Chris@16
|
1542 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
|
Chris@16
|
1543 {
|
Chris@16
|
1544 if(m_end == ++m_position)
|
Chris@16
|
1545 {
|
Chris@16
|
1546 fail(regex_constants::error_brack, m_position - m_base);
|
Chris@16
|
1547 return;
|
Chris@16
|
1548 }
|
Chris@16
|
1549 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
|
Chris@16
|
1550 {
|
Chris@16
|
1551 // trailing - :
|
Chris@16
|
1552 --m_position;
|
Chris@16
|
1553 return;
|
Chris@16
|
1554 }
|
Chris@16
|
1555 fail(regex_constants::error_range, m_position - m_base);
|
Chris@16
|
1556 return;
|
Chris@16
|
1557 }
|
Chris@16
|
1558 return;
|
Chris@16
|
1559 }
|
Chris@16
|
1560 --m_position;
|
Chris@16
|
1561 }
|
Chris@16
|
1562 char_set.add_single(start_range);
|
Chris@16
|
1563 }
|
Chris@16
|
1564
|
Chris@16
|
1565 template <class charT, class traits>
|
Chris@16
|
1566 digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
|
Chris@16
|
1567 {
|
Chris@16
|
1568 digraph<charT> result;
|
Chris@16
|
1569 switch(this->m_traits.syntax_type(*m_position))
|
Chris@16
|
1570 {
|
Chris@16
|
1571 case regex_constants::syntax_dash:
|
Chris@16
|
1572 if(!char_set.empty())
|
Chris@16
|
1573 {
|
Chris@16
|
1574 // see if we are at the end of the set:
|
Chris@16
|
1575 if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
|
Chris@16
|
1576 {
|
Chris@16
|
1577 fail(regex_constants::error_range, m_position - m_base);
|
Chris@16
|
1578 return result;
|
Chris@16
|
1579 }
|
Chris@16
|
1580 --m_position;
|
Chris@16
|
1581 }
|
Chris@16
|
1582 result.first = *m_position++;
|
Chris@16
|
1583 return result;
|
Chris@16
|
1584 case regex_constants::syntax_escape:
|
Chris@16
|
1585 // check to see if escapes are supported first:
|
Chris@16
|
1586 if(this->flags() & regex_constants::no_escape_in_lists)
|
Chris@16
|
1587 {
|
Chris@16
|
1588 result = *m_position++;
|
Chris@16
|
1589 break;
|
Chris@16
|
1590 }
|
Chris@16
|
1591 ++m_position;
|
Chris@16
|
1592 result = unescape_character();
|
Chris@16
|
1593 break;
|
Chris@16
|
1594 case regex_constants::syntax_open_set:
|
Chris@16
|
1595 {
|
Chris@16
|
1596 if(m_end == ++m_position)
|
Chris@16
|
1597 {
|
Chris@16
|
1598 fail(regex_constants::error_collate, m_position - m_base);
|
Chris@16
|
1599 return result;
|
Chris@16
|
1600 }
|
Chris@16
|
1601 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
|
Chris@16
|
1602 {
|
Chris@16
|
1603 --m_position;
|
Chris@16
|
1604 result.first = *m_position;
|
Chris@16
|
1605 ++m_position;
|
Chris@16
|
1606 return result;
|
Chris@16
|
1607 }
|
Chris@16
|
1608 if(m_end == ++m_position)
|
Chris@16
|
1609 {
|
Chris@16
|
1610 fail(regex_constants::error_collate, m_position - m_base);
|
Chris@16
|
1611 return result;
|
Chris@16
|
1612 }
|
Chris@16
|
1613 const charT* name_first = m_position;
|
Chris@16
|
1614 // skip at least one character, then find the matching ':]'
|
Chris@16
|
1615 if(m_end == ++m_position)
|
Chris@16
|
1616 {
|
Chris@16
|
1617 fail(regex_constants::error_collate, name_first - m_base);
|
Chris@16
|
1618 return result;
|
Chris@16
|
1619 }
|
Chris@16
|
1620 while((m_position != m_end)
|
Chris@16
|
1621 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
|
Chris@16
|
1622 ++m_position;
|
Chris@16
|
1623 const charT* name_last = m_position;
|
Chris@16
|
1624 if(m_end == m_position)
|
Chris@16
|
1625 {
|
Chris@16
|
1626 fail(regex_constants::error_collate, name_first - m_base);
|
Chris@16
|
1627 return result;
|
Chris@16
|
1628 }
|
Chris@16
|
1629 if((m_end == ++m_position)
|
Chris@16
|
1630 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
|
Chris@16
|
1631 {
|
Chris@16
|
1632 fail(regex_constants::error_collate, name_first - m_base);
|
Chris@16
|
1633 return result;
|
Chris@16
|
1634 }
|
Chris@16
|
1635 ++m_position;
|
Chris@16
|
1636 string_type s = this->m_traits.lookup_collatename(name_first, name_last);
|
Chris@16
|
1637 if(s.empty() || (s.size() > 2))
|
Chris@16
|
1638 {
|
Chris@16
|
1639 fail(regex_constants::error_collate, name_first - m_base);
|
Chris@16
|
1640 return result;
|
Chris@16
|
1641 }
|
Chris@16
|
1642 result.first = s[0];
|
Chris@16
|
1643 if(s.size() > 1)
|
Chris@16
|
1644 result.second = s[1];
|
Chris@16
|
1645 else
|
Chris@16
|
1646 result.second = 0;
|
Chris@16
|
1647 return result;
|
Chris@16
|
1648 }
|
Chris@16
|
1649 default:
|
Chris@16
|
1650 result = *m_position++;
|
Chris@16
|
1651 }
|
Chris@16
|
1652 return result;
|
Chris@16
|
1653 }
|
Chris@16
|
1654
|
Chris@16
|
1655 //
|
Chris@16
|
1656 // does a value fit in the specified charT type?
|
Chris@16
|
1657 //
|
Chris@16
|
1658 template <class charT>
|
Chris@16
|
1659 bool valid_value(charT, int v, const mpl::true_&)
|
Chris@16
|
1660 {
|
Chris@16
|
1661 return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
|
Chris@16
|
1662 }
|
Chris@16
|
1663 template <class charT>
|
Chris@16
|
1664 bool valid_value(charT, int, const mpl::false_&)
|
Chris@16
|
1665 {
|
Chris@16
|
1666 return true; // v will alsways fit in a charT
|
Chris@16
|
1667 }
|
Chris@16
|
1668 template <class charT>
|
Chris@16
|
1669 bool valid_value(charT c, int v)
|
Chris@16
|
1670 {
|
Chris@16
|
1671 return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>());
|
Chris@16
|
1672 }
|
Chris@16
|
1673
|
Chris@16
|
1674 template <class charT, class traits>
|
Chris@16
|
1675 charT basic_regex_parser<charT, traits>::unescape_character()
|
Chris@16
|
1676 {
|
Chris@16
|
1677 #ifdef BOOST_MSVC
|
Chris@16
|
1678 #pragma warning(push)
|
Chris@16
|
1679 #pragma warning(disable:4127)
|
Chris@16
|
1680 #endif
|
Chris@16
|
1681 charT result(0);
|
Chris@16
|
1682 if(m_position == m_end)
|
Chris@16
|
1683 {
|
Chris@16
|
1684 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
|
Chris@16
|
1685 return false;
|
Chris@16
|
1686 }
|
Chris@16
|
1687 switch(this->m_traits.escape_syntax_type(*m_position))
|
Chris@16
|
1688 {
|
Chris@16
|
1689 case regex_constants::escape_type_control_a:
|
Chris@16
|
1690 result = charT('\a');
|
Chris@16
|
1691 break;
|
Chris@16
|
1692 case regex_constants::escape_type_e:
|
Chris@16
|
1693 result = charT(27);
|
Chris@16
|
1694 break;
|
Chris@16
|
1695 case regex_constants::escape_type_control_f:
|
Chris@16
|
1696 result = charT('\f');
|
Chris@16
|
1697 break;
|
Chris@16
|
1698 case regex_constants::escape_type_control_n:
|
Chris@16
|
1699 result = charT('\n');
|
Chris@16
|
1700 break;
|
Chris@16
|
1701 case regex_constants::escape_type_control_r:
|
Chris@16
|
1702 result = charT('\r');
|
Chris@16
|
1703 break;
|
Chris@16
|
1704 case regex_constants::escape_type_control_t:
|
Chris@16
|
1705 result = charT('\t');
|
Chris@16
|
1706 break;
|
Chris@16
|
1707 case regex_constants::escape_type_control_v:
|
Chris@16
|
1708 result = charT('\v');
|
Chris@16
|
1709 break;
|
Chris@16
|
1710 case regex_constants::escape_type_word_assert:
|
Chris@16
|
1711 result = charT('\b');
|
Chris@16
|
1712 break;
|
Chris@16
|
1713 case regex_constants::escape_type_ascii_control:
|
Chris@16
|
1714 ++m_position;
|
Chris@16
|
1715 if(m_position == m_end)
|
Chris@16
|
1716 {
|
Chris@16
|
1717 // Rewind to start of escape:
|
Chris@16
|
1718 --m_position;
|
Chris@16
|
1719 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
|
Chris@16
|
1720 fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
|
Chris@16
|
1721 return result;
|
Chris@16
|
1722 }
|
Chris@16
|
1723 result = static_cast<charT>(*m_position % 32);
|
Chris@16
|
1724 break;
|
Chris@16
|
1725 case regex_constants::escape_type_hex:
|
Chris@16
|
1726 ++m_position;
|
Chris@16
|
1727 if(m_position == m_end)
|
Chris@16
|
1728 {
|
Chris@16
|
1729 // Rewind to start of escape:
|
Chris@16
|
1730 --m_position;
|
Chris@16
|
1731 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
|
Chris@16
|
1732 fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
|
Chris@16
|
1733 return result;
|
Chris@16
|
1734 }
|
Chris@16
|
1735 // maybe have \x{ddd}
|
Chris@16
|
1736 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
|
Chris@16
|
1737 {
|
Chris@16
|
1738 ++m_position;
|
Chris@16
|
1739 if(m_position == m_end)
|
Chris@16
|
1740 {
|
Chris@16
|
1741 // Rewind to start of escape:
|
Chris@16
|
1742 --m_position;
|
Chris@16
|
1743 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
|
Chris@16
|
1744 fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
|
Chris@16
|
1745 return result;
|
Chris@16
|
1746 }
|
Chris@16
|
1747 int i = this->m_traits.toi(m_position, m_end, 16);
|
Chris@16
|
1748 if((m_position == m_end)
|
Chris@16
|
1749 || (i < 0)
|
Chris@16
|
1750 || ((std::numeric_limits<charT>::is_specialized) && (i > (int)(std::numeric_limits<charT>::max)()))
|
Chris@16
|
1751 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
|
Chris@16
|
1752 {
|
Chris@16
|
1753 // Rewind to start of escape:
|
Chris@16
|
1754 --m_position;
|
Chris@16
|
1755 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
|
Chris@16
|
1756 fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
|
Chris@16
|
1757 return result;
|
Chris@16
|
1758 }
|
Chris@16
|
1759 ++m_position;
|
Chris@16
|
1760 result = charT(i);
|
Chris@16
|
1761 }
|
Chris@16
|
1762 else
|
Chris@16
|
1763 {
|
Chris@16
|
1764 std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
|
Chris@16
|
1765 int i = this->m_traits.toi(m_position, m_position + len, 16);
|
Chris@16
|
1766 if((i < 0)
|
Chris@16
|
1767 || !valid_value(charT(0), i))
|
Chris@16
|
1768 {
|
Chris@16
|
1769 // Rewind to start of escape:
|
Chris@16
|
1770 --m_position;
|
Chris@16
|
1771 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
|
Chris@16
|
1772 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
|
Chris@16
|
1773 return result;
|
Chris@16
|
1774 }
|
Chris@16
|
1775 result = charT(i);
|
Chris@16
|
1776 }
|
Chris@16
|
1777 return result;
|
Chris@16
|
1778 case regex_constants::syntax_digit:
|
Chris@16
|
1779 {
|
Chris@16
|
1780 // an octal escape sequence, the first character must be a zero
|
Chris@16
|
1781 // followed by up to 3 octal digits:
|
Chris@16
|
1782 std::ptrdiff_t len = (std::min)(::boost::re_detail::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
|
Chris@16
|
1783 const charT* bp = m_position;
|
Chris@16
|
1784 int val = this->m_traits.toi(bp, bp + 1, 8);
|
Chris@16
|
1785 if(val != 0)
|
Chris@16
|
1786 {
|
Chris@16
|
1787 // Rewind to start of escape:
|
Chris@16
|
1788 --m_position;
|
Chris@16
|
1789 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
|
Chris@16
|
1790 // Oops not an octal escape after all:
|
Chris@16
|
1791 fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
|
Chris@16
|
1792 return result;
|
Chris@16
|
1793 }
|
Chris@16
|
1794 val = this->m_traits.toi(m_position, m_position + len, 8);
|
Chris@16
|
1795 if(val < 0)
|
Chris@16
|
1796 {
|
Chris@16
|
1797 // Rewind to start of escape:
|
Chris@16
|
1798 --m_position;
|
Chris@16
|
1799 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
|
Chris@16
|
1800 fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
|
Chris@16
|
1801 return result;
|
Chris@16
|
1802 }
|
Chris@16
|
1803 return static_cast<charT>(val);
|
Chris@16
|
1804 }
|
Chris@16
|
1805 case regex_constants::escape_type_named_char:
|
Chris@16
|
1806 {
|
Chris@16
|
1807 ++m_position;
|
Chris@16
|
1808 if(m_position == m_end)
|
Chris@16
|
1809 {
|
Chris@16
|
1810 // Rewind to start of escape:
|
Chris@16
|
1811 --m_position;
|
Chris@16
|
1812 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
|
Chris@16
|
1813 fail(regex_constants::error_escape, m_position - m_base);
|
Chris@16
|
1814 return false;
|
Chris@16
|
1815 }
|
Chris@16
|
1816 // maybe have \N{name}
|
Chris@16
|
1817 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
|
Chris@16
|
1818 {
|
Chris@16
|
1819 const charT* base = m_position;
|
Chris@16
|
1820 // skip forward until we find enclosing brace:
|
Chris@16
|
1821 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
|
Chris@16
|
1822 ++m_position;
|
Chris@16
|
1823 if(m_position == m_end)
|
Chris@16
|
1824 {
|
Chris@16
|
1825 // Rewind to start of escape:
|
Chris@16
|
1826 --m_position;
|
Chris@16
|
1827 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
|
Chris@16
|
1828 fail(regex_constants::error_escape, m_position - m_base);
|
Chris@16
|
1829 return false;
|
Chris@16
|
1830 }
|
Chris@16
|
1831 string_type s = this->m_traits.lookup_collatename(++base, m_position++);
|
Chris@16
|
1832 if(s.empty())
|
Chris@16
|
1833 {
|
Chris@16
|
1834 // Rewind to start of escape:
|
Chris@16
|
1835 --m_position;
|
Chris@16
|
1836 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
|
Chris@16
|
1837 fail(regex_constants::error_collate, m_position - m_base);
|
Chris@16
|
1838 return false;
|
Chris@16
|
1839 }
|
Chris@16
|
1840 if(s.size() == 1)
|
Chris@16
|
1841 {
|
Chris@16
|
1842 return s[0];
|
Chris@16
|
1843 }
|
Chris@16
|
1844 }
|
Chris@16
|
1845 // fall through is a failure:
|
Chris@16
|
1846 // Rewind to start of escape:
|
Chris@16
|
1847 --m_position;
|
Chris@16
|
1848 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
|
Chris@16
|
1849 fail(regex_constants::error_escape, m_position - m_base);
|
Chris@16
|
1850 return false;
|
Chris@16
|
1851 }
|
Chris@16
|
1852 default:
|
Chris@16
|
1853 result = *m_position;
|
Chris@16
|
1854 break;
|
Chris@16
|
1855 }
|
Chris@16
|
1856 ++m_position;
|
Chris@16
|
1857 return result;
|
Chris@16
|
1858 #ifdef BOOST_MSVC
|
Chris@16
|
1859 #pragma warning(pop)
|
Chris@16
|
1860 #endif
|
Chris@16
|
1861 }
|
Chris@16
|
1862
|
Chris@16
|
1863 template <class charT, class traits>
|
Chris@16
|
1864 bool basic_regex_parser<charT, traits>::parse_backref()
|
Chris@16
|
1865 {
|
Chris@16
|
1866 BOOST_ASSERT(m_position != m_end);
|
Chris@16
|
1867 const charT* pc = m_position;
|
Chris@16
|
1868 int i = this->m_traits.toi(pc, pc + 1, 10);
|
Chris@16
|
1869 if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
|
Chris@16
|
1870 {
|
Chris@16
|
1871 // not a backref at all but an octal escape sequence:
|
Chris@16
|
1872 charT c = unescape_character();
|
Chris@16
|
1873 this->append_literal(c);
|
Chris@16
|
1874 }
|
Chris@16
|
1875 else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
|
Chris@16
|
1876 {
|
Chris@16
|
1877 m_position = pc;
|
Chris@16
|
1878 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
|
Chris@16
|
1879 pb->index = i;
|
Chris@16
|
1880 pb->icase = this->flags() & regbase::icase;
|
Chris@16
|
1881 }
|
Chris@16
|
1882 else
|
Chris@16
|
1883 {
|
Chris@16
|
1884 // Rewind to start of escape:
|
Chris@16
|
1885 --m_position;
|
Chris@16
|
1886 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
|
Chris@16
|
1887 fail(regex_constants::error_backref, m_position - m_base);
|
Chris@16
|
1888 return false;
|
Chris@16
|
1889 }
|
Chris@16
|
1890 return true;
|
Chris@16
|
1891 }
|
Chris@16
|
1892
|
Chris@16
|
1893 template <class charT, class traits>
|
Chris@16
|
1894 bool basic_regex_parser<charT, traits>::parse_QE()
|
Chris@16
|
1895 {
|
Chris@16
|
1896 #ifdef BOOST_MSVC
|
Chris@16
|
1897 #pragma warning(push)
|
Chris@16
|
1898 #pragma warning(disable:4127)
|
Chris@16
|
1899 #endif
|
Chris@16
|
1900 //
|
Chris@16
|
1901 // parse a \Q...\E sequence:
|
Chris@16
|
1902 //
|
Chris@16
|
1903 ++m_position; // skip the Q
|
Chris@16
|
1904 const charT* start = m_position;
|
Chris@16
|
1905 const charT* end;
|
Chris@16
|
1906 do
|
Chris@16
|
1907 {
|
Chris@16
|
1908 while((m_position != m_end)
|
Chris@16
|
1909 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
|
Chris@16
|
1910 ++m_position;
|
Chris@16
|
1911 if(m_position == m_end)
|
Chris@16
|
1912 {
|
Chris@16
|
1913 // a \Q...\E sequence may terminate with the end of the expression:
|
Chris@16
|
1914 end = m_position;
|
Chris@16
|
1915 break;
|
Chris@16
|
1916 }
|
Chris@16
|
1917 if(++m_position == m_end) // skip the escape
|
Chris@16
|
1918 {
|
Chris@16
|
1919 fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
|
Chris@16
|
1920 return false;
|
Chris@16
|
1921 }
|
Chris@16
|
1922 // check to see if it's a \E:
|
Chris@16
|
1923 if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
|
Chris@16
|
1924 {
|
Chris@16
|
1925 ++m_position;
|
Chris@16
|
1926 end = m_position - 2;
|
Chris@16
|
1927 break;
|
Chris@16
|
1928 }
|
Chris@16
|
1929 // otherwise go round again:
|
Chris@16
|
1930 }while(true);
|
Chris@16
|
1931 //
|
Chris@16
|
1932 // now add all the character between the two escapes as literals:
|
Chris@16
|
1933 //
|
Chris@16
|
1934 while(start != end)
|
Chris@16
|
1935 {
|
Chris@16
|
1936 this->append_literal(*start);
|
Chris@16
|
1937 ++start;
|
Chris@16
|
1938 }
|
Chris@16
|
1939 return true;
|
Chris@16
|
1940 #ifdef BOOST_MSVC
|
Chris@16
|
1941 #pragma warning(pop)
|
Chris@16
|
1942 #endif
|
Chris@16
|
1943 }
|
Chris@16
|
1944
|
Chris@16
|
1945 template <class charT, class traits>
|
Chris@16
|
1946 bool basic_regex_parser<charT, traits>::parse_perl_extension()
|
Chris@16
|
1947 {
|
Chris@16
|
1948 if(++m_position == m_end)
|
Chris@16
|
1949 {
|
Chris@16
|
1950 // Rewind to start of (? sequence:
|
Chris@16
|
1951 --m_position;
|
Chris@16
|
1952 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
1953 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
1954 return false;
|
Chris@16
|
1955 }
|
Chris@16
|
1956 //
|
Chris@16
|
1957 // treat comments as a special case, as these
|
Chris@16
|
1958 // are the only ones that don't start with a leading
|
Chris@16
|
1959 // startmark state:
|
Chris@16
|
1960 //
|
Chris@16
|
1961 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
|
Chris@16
|
1962 {
|
Chris@16
|
1963 while((m_position != m_end)
|
Chris@16
|
1964 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
|
Chris@16
|
1965 {}
|
Chris@16
|
1966 return true;
|
Chris@16
|
1967 }
|
Chris@16
|
1968 //
|
Chris@16
|
1969 // backup some state, and prepare the way:
|
Chris@16
|
1970 //
|
Chris@16
|
1971 int markid = 0;
|
Chris@16
|
1972 std::ptrdiff_t jump_offset = 0;
|
Chris@16
|
1973 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
|
Chris@16
|
1974 pb->icase = this->flags() & regbase::icase;
|
Chris@16
|
1975 std::ptrdiff_t last_paren_start = this->getoffset(pb);
|
Chris@16
|
1976 // back up insertion point for alternations, and set new point:
|
Chris@16
|
1977 std::ptrdiff_t last_alt_point = m_alt_insert_point;
|
Chris@16
|
1978 this->m_pdata->m_data.align();
|
Chris@16
|
1979 m_alt_insert_point = this->m_pdata->m_data.size();
|
Chris@16
|
1980 std::ptrdiff_t expected_alt_point = m_alt_insert_point;
|
Chris@16
|
1981 bool restore_flags = true;
|
Chris@16
|
1982 regex_constants::syntax_option_type old_flags = this->flags();
|
Chris@16
|
1983 bool old_case_change = m_has_case_change;
|
Chris@16
|
1984 m_has_case_change = false;
|
Chris@16
|
1985 charT name_delim;
|
Chris@16
|
1986 int mark_reset = m_mark_reset;
|
Chris@16
|
1987 int max_mark = m_max_mark;
|
Chris@16
|
1988 m_mark_reset = -1;
|
Chris@16
|
1989 m_max_mark = m_mark_count;
|
Chris@16
|
1990 int v;
|
Chris@16
|
1991 //
|
Chris@16
|
1992 // select the actual extension used:
|
Chris@16
|
1993 //
|
Chris@16
|
1994 switch(this->m_traits.syntax_type(*m_position))
|
Chris@16
|
1995 {
|
Chris@16
|
1996 case regex_constants::syntax_or:
|
Chris@16
|
1997 m_mark_reset = m_mark_count;
|
Chris@16
|
1998 BOOST_FALLTHROUGH;
|
Chris@16
|
1999 case regex_constants::syntax_colon:
|
Chris@16
|
2000 //
|
Chris@16
|
2001 // a non-capturing mark:
|
Chris@16
|
2002 //
|
Chris@16
|
2003 pb->index = markid = 0;
|
Chris@16
|
2004 ++m_position;
|
Chris@16
|
2005 break;
|
Chris@16
|
2006 case regex_constants::syntax_digit:
|
Chris@16
|
2007 {
|
Chris@16
|
2008 //
|
Chris@16
|
2009 // a recursive subexpression:
|
Chris@16
|
2010 //
|
Chris@16
|
2011 v = this->m_traits.toi(m_position, m_end, 10);
|
Chris@16
|
2012 if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
|
Chris@16
|
2013 {
|
Chris@16
|
2014 // Rewind to start of (? sequence:
|
Chris@16
|
2015 --m_position;
|
Chris@16
|
2016 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2017 fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
|
Chris@16
|
2018 return false;
|
Chris@16
|
2019 }
|
Chris@16
|
2020 insert_recursion:
|
Chris@16
|
2021 pb->index = markid = 0;
|
Chris@16
|
2022 re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
|
Chris@16
|
2023 pr->alt.i = v;
|
Chris@16
|
2024 pr->state_id = 0;
|
Chris@16
|
2025 static_cast<re_case*>(
|
Chris@16
|
2026 this->append_state(syntax_element_toggle_case, sizeof(re_case))
|
Chris@16
|
2027 )->icase = this->flags() & regbase::icase;
|
Chris@16
|
2028 break;
|
Chris@16
|
2029 }
|
Chris@16
|
2030 case regex_constants::syntax_plus:
|
Chris@16
|
2031 //
|
Chris@16
|
2032 // A forward-relative recursive subexpression:
|
Chris@16
|
2033 //
|
Chris@16
|
2034 ++m_position;
|
Chris@16
|
2035 v = this->m_traits.toi(m_position, m_end, 10);
|
Chris@16
|
2036 if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
|
Chris@16
|
2037 {
|
Chris@16
|
2038 // Rewind to start of (? sequence:
|
Chris@16
|
2039 --m_position;
|
Chris@16
|
2040 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2041 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
|
Chris@16
|
2042 return false;
|
Chris@16
|
2043 }
|
Chris@16
|
2044 v += m_mark_count;
|
Chris@16
|
2045 goto insert_recursion;
|
Chris@16
|
2046 case regex_constants::syntax_dash:
|
Chris@16
|
2047 //
|
Chris@16
|
2048 // Possibly a backward-relative recursive subexpression:
|
Chris@16
|
2049 //
|
Chris@16
|
2050 ++m_position;
|
Chris@16
|
2051 v = this->m_traits.toi(m_position, m_end, 10);
|
Chris@16
|
2052 if(v <= 0)
|
Chris@16
|
2053 {
|
Chris@16
|
2054 --m_position;
|
Chris@16
|
2055 // Oops not a relative recursion at all, but a (?-imsx) group:
|
Chris@16
|
2056 goto option_group_jump;
|
Chris@16
|
2057 }
|
Chris@16
|
2058 v = m_mark_count + 1 - v;
|
Chris@16
|
2059 if(v <= 0)
|
Chris@16
|
2060 {
|
Chris@16
|
2061 // Rewind to start of (? sequence:
|
Chris@16
|
2062 --m_position;
|
Chris@16
|
2063 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2064 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
|
Chris@16
|
2065 return false;
|
Chris@16
|
2066 }
|
Chris@16
|
2067 goto insert_recursion;
|
Chris@16
|
2068 case regex_constants::syntax_equal:
|
Chris@16
|
2069 pb->index = markid = -1;
|
Chris@16
|
2070 ++m_position;
|
Chris@16
|
2071 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
|
Chris@16
|
2072 this->m_pdata->m_data.align();
|
Chris@16
|
2073 m_alt_insert_point = this->m_pdata->m_data.size();
|
Chris@16
|
2074 break;
|
Chris@16
|
2075 case regex_constants::syntax_not:
|
Chris@16
|
2076 pb->index = markid = -2;
|
Chris@16
|
2077 ++m_position;
|
Chris@16
|
2078 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
|
Chris@16
|
2079 this->m_pdata->m_data.align();
|
Chris@16
|
2080 m_alt_insert_point = this->m_pdata->m_data.size();
|
Chris@16
|
2081 break;
|
Chris@16
|
2082 case regex_constants::escape_type_left_word:
|
Chris@16
|
2083 {
|
Chris@16
|
2084 // a lookbehind assertion:
|
Chris@16
|
2085 if(++m_position == m_end)
|
Chris@16
|
2086 {
|
Chris@16
|
2087 // Rewind to start of (? sequence:
|
Chris@16
|
2088 --m_position;
|
Chris@16
|
2089 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2090 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2091 return false;
|
Chris@16
|
2092 }
|
Chris@16
|
2093 regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
|
Chris@16
|
2094 if(t == regex_constants::syntax_not)
|
Chris@16
|
2095 pb->index = markid = -2;
|
Chris@16
|
2096 else if(t == regex_constants::syntax_equal)
|
Chris@16
|
2097 pb->index = markid = -1;
|
Chris@16
|
2098 else
|
Chris@16
|
2099 {
|
Chris@16
|
2100 // Probably a named capture which also starts (?< :
|
Chris@16
|
2101 name_delim = '>';
|
Chris@16
|
2102 --m_position;
|
Chris@16
|
2103 goto named_capture_jump;
|
Chris@16
|
2104 }
|
Chris@16
|
2105 ++m_position;
|
Chris@16
|
2106 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
|
Chris@16
|
2107 this->append_state(syntax_element_backstep, sizeof(re_brace));
|
Chris@16
|
2108 this->m_pdata->m_data.align();
|
Chris@16
|
2109 m_alt_insert_point = this->m_pdata->m_data.size();
|
Chris@16
|
2110 break;
|
Chris@16
|
2111 }
|
Chris@16
|
2112 case regex_constants::escape_type_right_word:
|
Chris@16
|
2113 //
|
Chris@16
|
2114 // an independent sub-expression:
|
Chris@16
|
2115 //
|
Chris@16
|
2116 pb->index = markid = -3;
|
Chris@16
|
2117 ++m_position;
|
Chris@16
|
2118 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
|
Chris@16
|
2119 this->m_pdata->m_data.align();
|
Chris@16
|
2120 m_alt_insert_point = this->m_pdata->m_data.size();
|
Chris@16
|
2121 break;
|
Chris@16
|
2122 case regex_constants::syntax_open_mark:
|
Chris@16
|
2123 {
|
Chris@16
|
2124 // a conditional expression:
|
Chris@16
|
2125 pb->index = markid = -4;
|
Chris@16
|
2126 if(++m_position == m_end)
|
Chris@16
|
2127 {
|
Chris@16
|
2128 // Rewind to start of (? sequence:
|
Chris@16
|
2129 --m_position;
|
Chris@16
|
2130 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2131 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2132 return false;
|
Chris@16
|
2133 }
|
Chris@16
|
2134 v = this->m_traits.toi(m_position, m_end, 10);
|
Chris@16
|
2135 if(m_position == m_end)
|
Chris@16
|
2136 {
|
Chris@16
|
2137 // Rewind to start of (? sequence:
|
Chris@16
|
2138 --m_position;
|
Chris@16
|
2139 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2140 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2141 return false;
|
Chris@16
|
2142 }
|
Chris@16
|
2143 if(*m_position == charT('R'))
|
Chris@16
|
2144 {
|
Chris@16
|
2145 if(++m_position == m_end)
|
Chris@16
|
2146 {
|
Chris@16
|
2147 // Rewind to start of (? sequence:
|
Chris@16
|
2148 --m_position;
|
Chris@16
|
2149 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2150 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2151 return false;
|
Chris@16
|
2152 }
|
Chris@16
|
2153 if(*m_position == charT('&'))
|
Chris@16
|
2154 {
|
Chris@16
|
2155 const charT* base = ++m_position;
|
Chris@16
|
2156 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
|
Chris@16
|
2157 ++m_position;
|
Chris@16
|
2158 if(m_position == m_end)
|
Chris@16
|
2159 {
|
Chris@16
|
2160 // Rewind to start of (? sequence:
|
Chris@16
|
2161 --m_position;
|
Chris@16
|
2162 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2163 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2164 return false;
|
Chris@16
|
2165 }
|
Chris@16
|
2166 v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
|
Chris@16
|
2167 }
|
Chris@16
|
2168 else
|
Chris@16
|
2169 {
|
Chris@16
|
2170 v = -this->m_traits.toi(m_position, m_end, 10);
|
Chris@16
|
2171 }
|
Chris@16
|
2172 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
|
Chris@16
|
2173 br->index = v < 0 ? (v - 1) : 0;
|
Chris@16
|
2174 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
|
Chris@16
|
2175 {
|
Chris@16
|
2176 // Rewind to start of (? sequence:
|
Chris@16
|
2177 --m_position;
|
Chris@16
|
2178 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2179 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2180 return false;
|
Chris@16
|
2181 }
|
Chris@16
|
2182 if(++m_position == m_end)
|
Chris@16
|
2183 {
|
Chris@16
|
2184 // Rewind to start of (? sequence:
|
Chris@16
|
2185 --m_position;
|
Chris@16
|
2186 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2187 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2188 return false;
|
Chris@16
|
2189 }
|
Chris@16
|
2190 }
|
Chris@16
|
2191 else if((*m_position == charT('\'')) || (*m_position == charT('<')))
|
Chris@16
|
2192 {
|
Chris@16
|
2193 const charT* base = ++m_position;
|
Chris@16
|
2194 while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
|
Chris@16
|
2195 ++m_position;
|
Chris@16
|
2196 if(m_position == m_end)
|
Chris@16
|
2197 {
|
Chris@16
|
2198 // Rewind to start of (? sequence:
|
Chris@16
|
2199 --m_position;
|
Chris@16
|
2200 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2201 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2202 return false;
|
Chris@16
|
2203 }
|
Chris@16
|
2204 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
|
Chris@16
|
2205 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
|
Chris@16
|
2206 br->index = v;
|
Chris@16
|
2207 if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
|
Chris@16
|
2208 {
|
Chris@16
|
2209 // Rewind to start of (? sequence:
|
Chris@16
|
2210 --m_position;
|
Chris@16
|
2211 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2212 fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
|
Chris@16
|
2213 return false;
|
Chris@16
|
2214 }
|
Chris@16
|
2215 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
|
Chris@16
|
2216 {
|
Chris@16
|
2217 // Rewind to start of (? sequence:
|
Chris@16
|
2218 --m_position;
|
Chris@16
|
2219 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2220 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2221 return false;
|
Chris@16
|
2222 }
|
Chris@16
|
2223 if(++m_position == m_end)
|
Chris@16
|
2224 {
|
Chris@16
|
2225 // Rewind to start of (? sequence:
|
Chris@16
|
2226 --m_position;
|
Chris@16
|
2227 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2228 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2229 return false;
|
Chris@16
|
2230 }
|
Chris@16
|
2231 }
|
Chris@16
|
2232 else if(*m_position == charT('D'))
|
Chris@16
|
2233 {
|
Chris@16
|
2234 const char* def = "DEFINE";
|
Chris@16
|
2235 while(*def && (m_position != m_end) && (*m_position == charT(*def)))
|
Chris@16
|
2236 ++m_position, ++def;
|
Chris@16
|
2237 if((m_position == m_end) || *def)
|
Chris@16
|
2238 {
|
Chris@16
|
2239 // Rewind to start of (? sequence:
|
Chris@16
|
2240 --m_position;
|
Chris@16
|
2241 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2242 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2243 return false;
|
Chris@16
|
2244 }
|
Chris@16
|
2245 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
|
Chris@16
|
2246 br->index = 9999; // special magic value!
|
Chris@16
|
2247 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
|
Chris@16
|
2248 {
|
Chris@16
|
2249 // Rewind to start of (? sequence:
|
Chris@16
|
2250 --m_position;
|
Chris@16
|
2251 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2252 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2253 return false;
|
Chris@16
|
2254 }
|
Chris@16
|
2255 if(++m_position == m_end)
|
Chris@16
|
2256 {
|
Chris@16
|
2257 // Rewind to start of (? sequence:
|
Chris@16
|
2258 --m_position;
|
Chris@16
|
2259 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2260 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2261 return false;
|
Chris@16
|
2262 }
|
Chris@16
|
2263 }
|
Chris@16
|
2264 else if(v > 0)
|
Chris@16
|
2265 {
|
Chris@16
|
2266 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
|
Chris@16
|
2267 br->index = v;
|
Chris@16
|
2268 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
|
Chris@16
|
2269 {
|
Chris@16
|
2270 // Rewind to start of (? sequence:
|
Chris@16
|
2271 --m_position;
|
Chris@16
|
2272 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2273 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2274 return false;
|
Chris@16
|
2275 }
|
Chris@16
|
2276 if(++m_position == m_end)
|
Chris@16
|
2277 {
|
Chris@16
|
2278 // Rewind to start of (? sequence:
|
Chris@16
|
2279 --m_position;
|
Chris@16
|
2280 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2281 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2282 return false;
|
Chris@16
|
2283 }
|
Chris@16
|
2284 }
|
Chris@16
|
2285 else
|
Chris@16
|
2286 {
|
Chris@16
|
2287 // verify that we have a lookahead or lookbehind assert:
|
Chris@16
|
2288 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
|
Chris@16
|
2289 {
|
Chris@16
|
2290 // Rewind to start of (? sequence:
|
Chris@16
|
2291 --m_position;
|
Chris@16
|
2292 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2293 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2294 return false;
|
Chris@16
|
2295 }
|
Chris@16
|
2296 if(++m_position == m_end)
|
Chris@16
|
2297 {
|
Chris@16
|
2298 // Rewind to start of (? sequence:
|
Chris@16
|
2299 --m_position;
|
Chris@16
|
2300 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2301 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2302 return false;
|
Chris@16
|
2303 }
|
Chris@16
|
2304 if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
|
Chris@16
|
2305 {
|
Chris@16
|
2306 if(++m_position == m_end)
|
Chris@16
|
2307 {
|
Chris@16
|
2308 // Rewind to start of (? sequence:
|
Chris@16
|
2309 --m_position;
|
Chris@16
|
2310 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2311 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2312 return false;
|
Chris@16
|
2313 }
|
Chris@16
|
2314 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
|
Chris@16
|
2315 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
|
Chris@16
|
2316 {
|
Chris@16
|
2317 // Rewind to start of (? sequence:
|
Chris@16
|
2318 --m_position;
|
Chris@16
|
2319 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2320 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2321 return false;
|
Chris@16
|
2322 }
|
Chris@16
|
2323 m_position -= 3;
|
Chris@16
|
2324 }
|
Chris@16
|
2325 else
|
Chris@16
|
2326 {
|
Chris@16
|
2327 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
|
Chris@16
|
2328 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
|
Chris@16
|
2329 {
|
Chris@16
|
2330 // Rewind to start of (? sequence:
|
Chris@16
|
2331 --m_position;
|
Chris@16
|
2332 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2333 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2334 return false;
|
Chris@16
|
2335 }
|
Chris@16
|
2336 m_position -= 2;
|
Chris@16
|
2337 }
|
Chris@16
|
2338 }
|
Chris@16
|
2339 break;
|
Chris@16
|
2340 }
|
Chris@16
|
2341 case regex_constants::syntax_close_mark:
|
Chris@16
|
2342 // Rewind to start of (? sequence:
|
Chris@16
|
2343 --m_position;
|
Chris@16
|
2344 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2345 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2346 return false;
|
Chris@16
|
2347 case regex_constants::escape_type_end_buffer:
|
Chris@16
|
2348 {
|
Chris@16
|
2349 name_delim = *m_position;
|
Chris@16
|
2350 named_capture_jump:
|
Chris@16
|
2351 markid = 0;
|
Chris@16
|
2352 if(0 == (this->flags() & regbase::nosubs))
|
Chris@16
|
2353 {
|
Chris@16
|
2354 markid = ++m_mark_count;
|
Chris@16
|
2355 #ifndef BOOST_NO_STD_DISTANCE
|
Chris@16
|
2356 if(this->flags() & regbase::save_subexpression_location)
|
Chris@16
|
2357 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
|
Chris@16
|
2358 #else
|
Chris@16
|
2359 if(this->flags() & regbase::save_subexpression_location)
|
Chris@16
|
2360 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
|
Chris@16
|
2361 #endif
|
Chris@16
|
2362 }
|
Chris@16
|
2363 pb->index = markid;
|
Chris@16
|
2364 const charT* base = ++m_position;
|
Chris@16
|
2365 if(m_position == m_end)
|
Chris@16
|
2366 {
|
Chris@16
|
2367 // Rewind to start of (? sequence:
|
Chris@16
|
2368 --m_position;
|
Chris@16
|
2369 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2370 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2371 return false;
|
Chris@16
|
2372 }
|
Chris@16
|
2373 while((m_position != m_end) && (*m_position != name_delim))
|
Chris@16
|
2374 ++m_position;
|
Chris@16
|
2375 if(m_position == m_end)
|
Chris@16
|
2376 {
|
Chris@16
|
2377 // Rewind to start of (? sequence:
|
Chris@16
|
2378 --m_position;
|
Chris@16
|
2379 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2380 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2381 return false;
|
Chris@16
|
2382 }
|
Chris@16
|
2383 this->m_pdata->set_name(base, m_position, markid);
|
Chris@16
|
2384 ++m_position;
|
Chris@16
|
2385 break;
|
Chris@16
|
2386 }
|
Chris@16
|
2387 default:
|
Chris@16
|
2388 if(*m_position == charT('R'))
|
Chris@16
|
2389 {
|
Chris@16
|
2390 ++m_position;
|
Chris@16
|
2391 v = 0;
|
Chris@16
|
2392 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
|
Chris@16
|
2393 {
|
Chris@16
|
2394 // Rewind to start of (? sequence:
|
Chris@16
|
2395 --m_position;
|
Chris@16
|
2396 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2397 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2398 return false;
|
Chris@16
|
2399 }
|
Chris@16
|
2400 goto insert_recursion;
|
Chris@16
|
2401 }
|
Chris@16
|
2402 if(*m_position == charT('&'))
|
Chris@16
|
2403 {
|
Chris@16
|
2404 ++m_position;
|
Chris@16
|
2405 const charT* base = m_position;
|
Chris@16
|
2406 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
|
Chris@16
|
2407 ++m_position;
|
Chris@16
|
2408 if(m_position == m_end)
|
Chris@16
|
2409 {
|
Chris@16
|
2410 // Rewind to start of (? sequence:
|
Chris@16
|
2411 --m_position;
|
Chris@16
|
2412 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2413 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2414 return false;
|
Chris@16
|
2415 }
|
Chris@16
|
2416 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
|
Chris@16
|
2417 goto insert_recursion;
|
Chris@16
|
2418 }
|
Chris@16
|
2419 if(*m_position == charT('P'))
|
Chris@16
|
2420 {
|
Chris@16
|
2421 ++m_position;
|
Chris@16
|
2422 if(m_position == m_end)
|
Chris@16
|
2423 {
|
Chris@16
|
2424 // Rewind to start of (? sequence:
|
Chris@16
|
2425 --m_position;
|
Chris@16
|
2426 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2427 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2428 return false;
|
Chris@16
|
2429 }
|
Chris@16
|
2430 if(*m_position == charT('>'))
|
Chris@16
|
2431 {
|
Chris@16
|
2432 ++m_position;
|
Chris@16
|
2433 const charT* base = m_position;
|
Chris@16
|
2434 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
|
Chris@16
|
2435 ++m_position;
|
Chris@16
|
2436 if(m_position == m_end)
|
Chris@16
|
2437 {
|
Chris@16
|
2438 // Rewind to start of (? sequence:
|
Chris@16
|
2439 --m_position;
|
Chris@16
|
2440 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2441 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2442 return false;
|
Chris@16
|
2443 }
|
Chris@16
|
2444 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
|
Chris@16
|
2445 goto insert_recursion;
|
Chris@16
|
2446 }
|
Chris@16
|
2447 }
|
Chris@16
|
2448 //
|
Chris@16
|
2449 // lets assume that we have a (?imsx) group and try and parse it:
|
Chris@16
|
2450 //
|
Chris@16
|
2451 option_group_jump:
|
Chris@16
|
2452 regex_constants::syntax_option_type opts = parse_options();
|
Chris@16
|
2453 if(m_position == m_end)
|
Chris@16
|
2454 {
|
Chris@16
|
2455 // Rewind to start of (? sequence:
|
Chris@16
|
2456 --m_position;
|
Chris@16
|
2457 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2458 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2459 return false;
|
Chris@16
|
2460 }
|
Chris@16
|
2461 // make a note of whether we have a case change:
|
Chris@16
|
2462 m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
|
Chris@16
|
2463 pb->index = markid = 0;
|
Chris@16
|
2464 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
|
Chris@16
|
2465 {
|
Chris@16
|
2466 // update flags and carry on as normal:
|
Chris@16
|
2467 this->flags(opts);
|
Chris@16
|
2468 restore_flags = false;
|
Chris@16
|
2469 old_case_change |= m_has_case_change; // defer end of scope by one ')'
|
Chris@16
|
2470 }
|
Chris@16
|
2471 else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
|
Chris@16
|
2472 {
|
Chris@16
|
2473 // update flags and carry on until the matching ')' is found:
|
Chris@16
|
2474 this->flags(opts);
|
Chris@16
|
2475 ++m_position;
|
Chris@16
|
2476 }
|
Chris@16
|
2477 else
|
Chris@16
|
2478 {
|
Chris@16
|
2479 // Rewind to start of (? sequence:
|
Chris@16
|
2480 --m_position;
|
Chris@16
|
2481 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2482 fail(regex_constants::error_perl_extension, m_position - m_base);
|
Chris@16
|
2483 return false;
|
Chris@16
|
2484 }
|
Chris@16
|
2485
|
Chris@16
|
2486 // finally append a case change state if we need it:
|
Chris@16
|
2487 if(m_has_case_change)
|
Chris@16
|
2488 {
|
Chris@16
|
2489 static_cast<re_case*>(
|
Chris@16
|
2490 this->append_state(syntax_element_toggle_case, sizeof(re_case))
|
Chris@16
|
2491 )->icase = opts & regbase::icase;
|
Chris@16
|
2492 }
|
Chris@16
|
2493
|
Chris@16
|
2494 }
|
Chris@16
|
2495 //
|
Chris@16
|
2496 // now recursively add more states, this will terminate when we get to a
|
Chris@16
|
2497 // matching ')' :
|
Chris@16
|
2498 //
|
Chris@16
|
2499 parse_all();
|
Chris@16
|
2500 //
|
Chris@16
|
2501 // Unwind alternatives:
|
Chris@16
|
2502 //
|
Chris@16
|
2503 if(0 == unwind_alts(last_paren_start))
|
Chris@16
|
2504 {
|
Chris@16
|
2505 // Rewind to start of (? sequence:
|
Chris@16
|
2506 --m_position;
|
Chris@16
|
2507 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2508 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
|
Chris@16
|
2509 return false;
|
Chris@16
|
2510 }
|
Chris@16
|
2511 //
|
Chris@16
|
2512 // we either have a ')' or we have run out of characters prematurely:
|
Chris@16
|
2513 //
|
Chris@16
|
2514 if(m_position == m_end)
|
Chris@16
|
2515 {
|
Chris@16
|
2516 // Rewind to start of (? sequence:
|
Chris@16
|
2517 --m_position;
|
Chris@16
|
2518 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2519 this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
|
Chris@16
|
2520 return false;
|
Chris@16
|
2521 }
|
Chris@16
|
2522 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
|
Chris@16
|
2523 ++m_position;
|
Chris@16
|
2524 //
|
Chris@16
|
2525 // restore the flags:
|
Chris@16
|
2526 //
|
Chris@16
|
2527 if(restore_flags)
|
Chris@16
|
2528 {
|
Chris@16
|
2529 // append a case change state if we need it:
|
Chris@16
|
2530 if(m_has_case_change)
|
Chris@16
|
2531 {
|
Chris@16
|
2532 static_cast<re_case*>(
|
Chris@16
|
2533 this->append_state(syntax_element_toggle_case, sizeof(re_case))
|
Chris@16
|
2534 )->icase = old_flags & regbase::icase;
|
Chris@16
|
2535 }
|
Chris@16
|
2536 this->flags(old_flags);
|
Chris@16
|
2537 }
|
Chris@16
|
2538 //
|
Chris@16
|
2539 // set up the jump pointer if we have one:
|
Chris@16
|
2540 //
|
Chris@16
|
2541 if(jump_offset)
|
Chris@16
|
2542 {
|
Chris@16
|
2543 this->m_pdata->m_data.align();
|
Chris@16
|
2544 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
|
Chris@16
|
2545 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
|
Chris@16
|
2546 if((this->m_last_state == jmp) && (markid != -2))
|
Chris@16
|
2547 {
|
Chris@16
|
2548 // Oops... we didn't have anything inside the assertion.
|
Chris@16
|
2549 // Note we don't get here for negated forward lookahead as (?!)
|
Chris@16
|
2550 // does have some uses.
|
Chris@16
|
2551 // Rewind to start of (? sequence:
|
Chris@16
|
2552 --m_position;
|
Chris@16
|
2553 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2554 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
|
Chris@16
|
2555 return false;
|
Chris@16
|
2556 }
|
Chris@16
|
2557 }
|
Chris@16
|
2558 //
|
Chris@16
|
2559 // verify that if this is conditional expression, that we do have
|
Chris@16
|
2560 // an alternative, if not add one:
|
Chris@16
|
2561 //
|
Chris@16
|
2562 if(markid == -4)
|
Chris@16
|
2563 {
|
Chris@16
|
2564 re_syntax_base* b = this->getaddress(expected_alt_point);
|
Chris@16
|
2565 // Make sure we have exactly one alternative following this state:
|
Chris@16
|
2566 if(b->type != syntax_element_alt)
|
Chris@16
|
2567 {
|
Chris@16
|
2568 re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
|
Chris@16
|
2569 alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
|
Chris@16
|
2570 }
|
Chris@16
|
2571 else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
|
Chris@16
|
2572 {
|
Chris@16
|
2573 // Can't have seen more than one alternative:
|
Chris@16
|
2574 // Rewind to start of (? sequence:
|
Chris@16
|
2575 --m_position;
|
Chris@16
|
2576 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2577 fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
|
Chris@16
|
2578 return false;
|
Chris@16
|
2579 }
|
Chris@16
|
2580 else
|
Chris@16
|
2581 {
|
Chris@16
|
2582 // We must *not* have seen an alternative inside a (DEFINE) block:
|
Chris@16
|
2583 b = this->getaddress(b->next.i, b);
|
Chris@16
|
2584 if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
|
Chris@16
|
2585 {
|
Chris@16
|
2586 // Rewind to start of (? sequence:
|
Chris@16
|
2587 --m_position;
|
Chris@16
|
2588 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2589 fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
|
Chris@16
|
2590 return false;
|
Chris@16
|
2591 }
|
Chris@16
|
2592 }
|
Chris@16
|
2593 // check for invalid repetition of next state:
|
Chris@16
|
2594 b = this->getaddress(expected_alt_point);
|
Chris@16
|
2595 b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
|
Chris@16
|
2596 if((b->type != syntax_element_assert_backref)
|
Chris@16
|
2597 && (b->type != syntax_element_startmark))
|
Chris@16
|
2598 {
|
Chris@16
|
2599 // Rewind to start of (? sequence:
|
Chris@16
|
2600 --m_position;
|
Chris@16
|
2601 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2602 fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
|
Chris@16
|
2603 return false;
|
Chris@16
|
2604 }
|
Chris@16
|
2605 }
|
Chris@16
|
2606 //
|
Chris@16
|
2607 // append closing parenthesis state:
|
Chris@16
|
2608 //
|
Chris@16
|
2609 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
|
Chris@16
|
2610 pb->index = markid;
|
Chris@16
|
2611 pb->icase = this->flags() & regbase::icase;
|
Chris@16
|
2612 this->m_paren_start = last_paren_start;
|
Chris@16
|
2613 //
|
Chris@16
|
2614 // restore the alternate insertion point:
|
Chris@16
|
2615 //
|
Chris@16
|
2616 this->m_alt_insert_point = last_alt_point;
|
Chris@16
|
2617 //
|
Chris@16
|
2618 // and the case change data:
|
Chris@16
|
2619 //
|
Chris@16
|
2620 m_has_case_change = old_case_change;
|
Chris@16
|
2621 //
|
Chris@16
|
2622 // And the mark_reset data:
|
Chris@16
|
2623 //
|
Chris@16
|
2624 if(m_max_mark > m_mark_count)
|
Chris@16
|
2625 {
|
Chris@16
|
2626 m_mark_count = m_max_mark;
|
Chris@16
|
2627 }
|
Chris@16
|
2628 m_mark_reset = mark_reset;
|
Chris@16
|
2629 m_max_mark = max_mark;
|
Chris@16
|
2630
|
Chris@16
|
2631
|
Chris@16
|
2632 if(markid > 0)
|
Chris@16
|
2633 {
|
Chris@16
|
2634 #ifndef BOOST_NO_STD_DISTANCE
|
Chris@16
|
2635 if(this->flags() & regbase::save_subexpression_location)
|
Chris@16
|
2636 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1;
|
Chris@16
|
2637 #else
|
Chris@16
|
2638 if(this->flags() & regbase::save_subexpression_location)
|
Chris@16
|
2639 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
|
Chris@16
|
2640 #endif
|
Chris@16
|
2641 //
|
Chris@16
|
2642 // allow backrefs to this mark:
|
Chris@16
|
2643 //
|
Chris@16
|
2644 if((markid > 0) && (markid < (int)(sizeof(unsigned) * CHAR_BIT)))
|
Chris@16
|
2645 this->m_backrefs |= 1u << (markid - 1);
|
Chris@16
|
2646 }
|
Chris@16
|
2647 return true;
|
Chris@16
|
2648 }
|
Chris@16
|
2649
|
Chris@16
|
2650 template <class charT, class traits>
|
Chris@16
|
2651 bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
|
Chris@16
|
2652 {
|
Chris@16
|
2653 //
|
Chris@16
|
2654 // parses an emacs style \sx or \Sx construct.
|
Chris@16
|
2655 //
|
Chris@16
|
2656 if(++m_position == m_end)
|
Chris@16
|
2657 {
|
Chris@16
|
2658 // Rewind to start of sequence:
|
Chris@16
|
2659 --m_position;
|
Chris@16
|
2660 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
|
Chris@16
|
2661 fail(regex_constants::error_escape, m_position - m_base);
|
Chris@16
|
2662 return false;
|
Chris@16
|
2663 }
|
Chris@16
|
2664 basic_char_set<charT, traits> char_set;
|
Chris@16
|
2665 if(negate)
|
Chris@16
|
2666 char_set.negate();
|
Chris@16
|
2667
|
Chris@16
|
2668 static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
|
Chris@16
|
2669
|
Chris@16
|
2670 switch(*m_position)
|
Chris@16
|
2671 {
|
Chris@16
|
2672 case 's':
|
Chris@16
|
2673 case ' ':
|
Chris@16
|
2674 char_set.add_class(this->m_mask_space);
|
Chris@16
|
2675 break;
|
Chris@16
|
2676 case 'w':
|
Chris@16
|
2677 char_set.add_class(this->m_word_mask);
|
Chris@16
|
2678 break;
|
Chris@16
|
2679 case '_':
|
Chris@16
|
2680 char_set.add_single(digraph<charT>(charT('$')));
|
Chris@16
|
2681 char_set.add_single(digraph<charT>(charT('&')));
|
Chris@16
|
2682 char_set.add_single(digraph<charT>(charT('*')));
|
Chris@16
|
2683 char_set.add_single(digraph<charT>(charT('+')));
|
Chris@16
|
2684 char_set.add_single(digraph<charT>(charT('-')));
|
Chris@16
|
2685 char_set.add_single(digraph<charT>(charT('_')));
|
Chris@16
|
2686 char_set.add_single(digraph<charT>(charT('<')));
|
Chris@16
|
2687 char_set.add_single(digraph<charT>(charT('>')));
|
Chris@16
|
2688 break;
|
Chris@16
|
2689 case '.':
|
Chris@16
|
2690 char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
|
Chris@16
|
2691 break;
|
Chris@16
|
2692 case '(':
|
Chris@16
|
2693 char_set.add_single(digraph<charT>(charT('(')));
|
Chris@16
|
2694 char_set.add_single(digraph<charT>(charT('[')));
|
Chris@16
|
2695 char_set.add_single(digraph<charT>(charT('{')));
|
Chris@16
|
2696 break;
|
Chris@16
|
2697 case ')':
|
Chris@16
|
2698 char_set.add_single(digraph<charT>(charT(')')));
|
Chris@16
|
2699 char_set.add_single(digraph<charT>(charT(']')));
|
Chris@16
|
2700 char_set.add_single(digraph<charT>(charT('}')));
|
Chris@16
|
2701 break;
|
Chris@16
|
2702 case '"':
|
Chris@16
|
2703 char_set.add_single(digraph<charT>(charT('"')));
|
Chris@16
|
2704 char_set.add_single(digraph<charT>(charT('\'')));
|
Chris@16
|
2705 char_set.add_single(digraph<charT>(charT('`')));
|
Chris@16
|
2706 break;
|
Chris@16
|
2707 case '\'':
|
Chris@16
|
2708 char_set.add_single(digraph<charT>(charT('\'')));
|
Chris@16
|
2709 char_set.add_single(digraph<charT>(charT(',')));
|
Chris@16
|
2710 char_set.add_single(digraph<charT>(charT('#')));
|
Chris@16
|
2711 break;
|
Chris@16
|
2712 case '<':
|
Chris@16
|
2713 char_set.add_single(digraph<charT>(charT(';')));
|
Chris@16
|
2714 break;
|
Chris@16
|
2715 case '>':
|
Chris@16
|
2716 char_set.add_single(digraph<charT>(charT('\n')));
|
Chris@16
|
2717 char_set.add_single(digraph<charT>(charT('\f')));
|
Chris@16
|
2718 break;
|
Chris@16
|
2719 default:
|
Chris@16
|
2720 fail(regex_constants::error_ctype, m_position - m_base);
|
Chris@16
|
2721 return false;
|
Chris@16
|
2722 }
|
Chris@16
|
2723 if(0 == this->append_set(char_set))
|
Chris@16
|
2724 {
|
Chris@16
|
2725 fail(regex_constants::error_ctype, m_position - m_base);
|
Chris@16
|
2726 return false;
|
Chris@16
|
2727 }
|
Chris@16
|
2728 ++m_position;
|
Chris@16
|
2729 return true;
|
Chris@16
|
2730 }
|
Chris@16
|
2731
|
Chris@16
|
2732 template <class charT, class traits>
|
Chris@16
|
2733 regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
|
Chris@16
|
2734 {
|
Chris@16
|
2735 // we have a (?imsx-imsx) group, convert it into a set of flags:
|
Chris@16
|
2736 regex_constants::syntax_option_type f = this->flags();
|
Chris@16
|
2737 bool breakout = false;
|
Chris@16
|
2738 do
|
Chris@16
|
2739 {
|
Chris@16
|
2740 switch(*m_position)
|
Chris@16
|
2741 {
|
Chris@16
|
2742 case 's':
|
Chris@16
|
2743 f |= regex_constants::mod_s;
|
Chris@16
|
2744 f &= ~regex_constants::no_mod_s;
|
Chris@16
|
2745 break;
|
Chris@16
|
2746 case 'm':
|
Chris@16
|
2747 f &= ~regex_constants::no_mod_m;
|
Chris@16
|
2748 break;
|
Chris@16
|
2749 case 'i':
|
Chris@16
|
2750 f |= regex_constants::icase;
|
Chris@16
|
2751 break;
|
Chris@16
|
2752 case 'x':
|
Chris@16
|
2753 f |= regex_constants::mod_x;
|
Chris@16
|
2754 break;
|
Chris@16
|
2755 default:
|
Chris@16
|
2756 breakout = true;
|
Chris@16
|
2757 continue;
|
Chris@16
|
2758 }
|
Chris@16
|
2759 if(++m_position == m_end)
|
Chris@16
|
2760 {
|
Chris@16
|
2761 // Rewind to start of (? sequence:
|
Chris@16
|
2762 --m_position;
|
Chris@16
|
2763 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2764 fail(regex_constants::error_paren, m_position - m_base);
|
Chris@16
|
2765 return false;
|
Chris@16
|
2766 }
|
Chris@16
|
2767 }
|
Chris@16
|
2768 while(!breakout);
|
Chris@16
|
2769
|
Chris@16
|
2770 breakout = false;
|
Chris@16
|
2771
|
Chris@16
|
2772 if(*m_position == static_cast<charT>('-'))
|
Chris@16
|
2773 {
|
Chris@16
|
2774 if(++m_position == m_end)
|
Chris@16
|
2775 {
|
Chris@16
|
2776 // Rewind to start of (? sequence:
|
Chris@16
|
2777 --m_position;
|
Chris@16
|
2778 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2779 fail(regex_constants::error_paren, m_position - m_base);
|
Chris@16
|
2780 return false;
|
Chris@16
|
2781 }
|
Chris@16
|
2782 do
|
Chris@16
|
2783 {
|
Chris@16
|
2784 switch(*m_position)
|
Chris@16
|
2785 {
|
Chris@16
|
2786 case 's':
|
Chris@16
|
2787 f &= ~regex_constants::mod_s;
|
Chris@16
|
2788 f |= regex_constants::no_mod_s;
|
Chris@16
|
2789 break;
|
Chris@16
|
2790 case 'm':
|
Chris@16
|
2791 f |= regex_constants::no_mod_m;
|
Chris@16
|
2792 break;
|
Chris@16
|
2793 case 'i':
|
Chris@16
|
2794 f &= ~regex_constants::icase;
|
Chris@16
|
2795 break;
|
Chris@16
|
2796 case 'x':
|
Chris@16
|
2797 f &= ~regex_constants::mod_x;
|
Chris@16
|
2798 break;
|
Chris@16
|
2799 default:
|
Chris@16
|
2800 breakout = true;
|
Chris@16
|
2801 continue;
|
Chris@16
|
2802 }
|
Chris@16
|
2803 if(++m_position == m_end)
|
Chris@16
|
2804 {
|
Chris@16
|
2805 // Rewind to start of (? sequence:
|
Chris@16
|
2806 --m_position;
|
Chris@16
|
2807 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
|
Chris@16
|
2808 fail(regex_constants::error_paren, m_position - m_base);
|
Chris@16
|
2809 return false;
|
Chris@16
|
2810 }
|
Chris@16
|
2811 }
|
Chris@16
|
2812 while(!breakout);
|
Chris@16
|
2813 }
|
Chris@16
|
2814 return f;
|
Chris@16
|
2815 }
|
Chris@16
|
2816
|
Chris@16
|
2817 template <class charT, class traits>
|
Chris@16
|
2818 bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
|
Chris@16
|
2819 {
|
Chris@16
|
2820 //
|
Chris@16
|
2821 // If we didn't actually add any states after the last
|
Chris@16
|
2822 // alternative then that's an error:
|
Chris@16
|
2823 //
|
Chris@16
|
2824 if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
|
Chris@16
|
2825 && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)
|
Chris@16
|
2826 &&
|
Chris@16
|
2827 !(
|
Chris@16
|
2828 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
|
Chris@16
|
2829 &&
|
Chris@16
|
2830 ((this->flags() & regbase::no_empty_expressions) == 0)
|
Chris@16
|
2831 )
|
Chris@16
|
2832 )
|
Chris@16
|
2833 {
|
Chris@16
|
2834 fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
|
Chris@16
|
2835 return false;
|
Chris@16
|
2836 }
|
Chris@16
|
2837 //
|
Chris@16
|
2838 // Fix up our alternatives:
|
Chris@16
|
2839 //
|
Chris@16
|
2840 while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))
|
Chris@16
|
2841 {
|
Chris@16
|
2842 //
|
Chris@16
|
2843 // fix up the jump to point to the end of the states
|
Chris@16
|
2844 // that we've just added:
|
Chris@16
|
2845 //
|
Chris@16
|
2846 std::ptrdiff_t jump_offset = m_alt_jumps.back();
|
Chris@16
|
2847 m_alt_jumps.pop_back();
|
Chris@16
|
2848 this->m_pdata->m_data.align();
|
Chris@16
|
2849 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
|
Chris@16
|
2850 BOOST_ASSERT(jmp->type == syntax_element_jump);
|
Chris@16
|
2851 jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
|
Chris@16
|
2852 }
|
Chris@16
|
2853 return true;
|
Chris@16
|
2854 }
|
Chris@16
|
2855
|
Chris@16
|
2856 #ifdef BOOST_MSVC
|
Chris@16
|
2857 #pragma warning(pop)
|
Chris@16
|
2858 #endif
|
Chris@16
|
2859
|
Chris@16
|
2860 } // namespace re_detail
|
Chris@16
|
2861 } // namespace boost
|
Chris@16
|
2862
|
Chris@16
|
2863 #ifdef BOOST_MSVC
|
Chris@16
|
2864 #pragma warning(push)
|
Chris@16
|
2865 #pragma warning(disable: 4103)
|
Chris@16
|
2866 #endif
|
Chris@16
|
2867 #ifdef BOOST_HAS_ABI_HEADERS
|
Chris@16
|
2868 # include BOOST_ABI_SUFFIX
|
Chris@16
|
2869 #endif
|
Chris@16
|
2870 #ifdef BOOST_MSVC
|
Chris@16
|
2871 #pragma warning(pop)
|
Chris@16
|
2872 #endif
|
Chris@16
|
2873
|
Chris@16
|
2874 #endif
|