annotate DEPENDENCIES/generic/include/boost/regex/v4/basic_regex_parser.hpp @ 47:fe753ff3d0b5

Simpler solution to .cat test
author Chris Cannam
date Thu, 07 Aug 2014 15:05:37 +0100
parents 2665513ce2d3
children c530137014c0
rev   line source
Chris@16 1 /*
Chris@16 2 *
Chris@16 3 * Copyright (c) 2004
Chris@16 4 * John Maddock
Chris@16 5 *
Chris@16 6 * Use, modification and distribution are subject to the
Chris@16 7 * Boost Software License, Version 1.0. (See accompanying file
Chris@16 8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16 9 *
Chris@16 10 */
Chris@16 11
Chris@16 12 /*
Chris@16 13 * LOCATION: see http://www.boost.org for most recent version.
Chris@16 14 * FILE basic_regex_parser.cpp
Chris@16 15 * VERSION see <boost/version.hpp>
Chris@16 16 * DESCRIPTION: Declares template class basic_regex_parser.
Chris@16 17 */
Chris@16 18
Chris@16 19 #ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
Chris@16 20 #define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
Chris@16 21
Chris@16 22 #ifdef BOOST_MSVC
Chris@16 23 #pragma warning(push)
Chris@16 24 #pragma warning(disable: 4103)
Chris@16 25 #endif
Chris@16 26 #ifdef BOOST_HAS_ABI_HEADERS
Chris@16 27 # include BOOST_ABI_PREFIX
Chris@16 28 #endif
Chris@16 29 #ifdef BOOST_MSVC
Chris@16 30 #pragma warning(pop)
Chris@16 31 #endif
Chris@16 32
Chris@16 33 namespace boost{
Chris@16 34 namespace re_detail{
Chris@16 35
Chris@16 36 #ifdef BOOST_MSVC
Chris@16 37 #pragma warning(push)
Chris@16 38 #pragma warning(disable:4244 4800)
Chris@16 39 #endif
Chris@16 40
Chris@16 41 template <class charT, class traits>
Chris@16 42 class basic_regex_parser : public basic_regex_creator<charT, traits>
Chris@16 43 {
Chris@16 44 public:
Chris@16 45 basic_regex_parser(regex_data<charT, traits>* data);
Chris@16 46 void parse(const charT* p1, const charT* p2, unsigned flags);
Chris@16 47 void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
Chris@16 48 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
Chris@16 49 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
Chris@16 50 {
Chris@16 51 fail(error_code, position, message, position);
Chris@16 52 }
Chris@16 53
Chris@16 54 bool parse_all();
Chris@16 55 bool parse_basic();
Chris@16 56 bool parse_extended();
Chris@16 57 bool parse_literal();
Chris@16 58 bool parse_open_paren();
Chris@16 59 bool parse_basic_escape();
Chris@16 60 bool parse_extended_escape();
Chris@16 61 bool parse_match_any();
Chris@16 62 bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
Chris@16 63 bool parse_repeat_range(bool isbasic);
Chris@16 64 bool parse_alt();
Chris@16 65 bool parse_set();
Chris@16 66 bool parse_backref();
Chris@16 67 void parse_set_literal(basic_char_set<charT, traits>& char_set);
Chris@16 68 bool parse_inner_set(basic_char_set<charT, traits>& char_set);
Chris@16 69 bool parse_QE();
Chris@16 70 bool parse_perl_extension();
Chris@16 71 bool add_emacs_code(bool negate);
Chris@16 72 bool unwind_alts(std::ptrdiff_t last_paren_start);
Chris@16 73 digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
Chris@16 74 charT unescape_character();
Chris@16 75 regex_constants::syntax_option_type parse_options();
Chris@16 76
Chris@16 77 private:
Chris@16 78 typedef bool (basic_regex_parser::*parser_proc_type)();
Chris@16 79 typedef typename traits::string_type string_type;
Chris@16 80 typedef typename traits::char_class_type char_class_type;
Chris@16 81 parser_proc_type m_parser_proc; // the main parser to use
Chris@16 82 const charT* m_base; // the start of the string being parsed
Chris@16 83 const charT* m_end; // the end of the string being parsed
Chris@16 84 const charT* m_position; // our current parser position
Chris@16 85 unsigned m_mark_count; // how many sub-expressions we have
Chris@16 86 int m_mark_reset; // used to indicate that we're inside a (?|...) block.
Chris@16 87 unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
Chris@16 88 std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
Chris@16 89 std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
Chris@16 90 bool m_has_case_change; // true if somewhere in the current block the case has changed
Chris@16 91 #if defined(BOOST_MSVC) && defined(_M_IX86)
Chris@16 92 // This is an ugly warning suppression workaround (for warnings *inside* std::vector
Chris@16 93 // that can not otherwise be suppressed)...
Chris@16 94 BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
Chris@16 95 std::vector<long> m_alt_jumps; // list of alternative in the current scope.
Chris@16 96 #else
Chris@16 97 std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
Chris@16 98 #endif
Chris@16 99
Chris@16 100 basic_regex_parser& operator=(const basic_regex_parser&);
Chris@16 101 basic_regex_parser(const basic_regex_parser&);
Chris@16 102 };
Chris@16 103
Chris@16 104 template <class charT, class traits>
Chris@16 105 basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
Chris@16 106 : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false)
Chris@16 107 {
Chris@16 108 }
Chris@16 109
Chris@16 110 template <class charT, class traits>
Chris@16 111 void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
Chris@16 112 {
Chris@16 113 // pass l_flags on to base class:
Chris@16 114 this->init(l_flags);
Chris@16 115 // set up pointers:
Chris@16 116 m_position = m_base = p1;
Chris@16 117 m_end = p2;
Chris@16 118 // empty strings are errors:
Chris@16 119 if((p1 == p2) &&
Chris@16 120 (
Chris@16 121 ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
Chris@16 122 || (l_flags & regbase::no_empty_expressions)
Chris@16 123 )
Chris@16 124 )
Chris@16 125 {
Chris@16 126 fail(regex_constants::error_empty, 0);
Chris@16 127 return;
Chris@16 128 }
Chris@16 129 // select which parser to use:
Chris@16 130 switch(l_flags & regbase::main_option_type)
Chris@16 131 {
Chris@16 132 case regbase::perl_syntax_group:
Chris@16 133 {
Chris@16 134 m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
Chris@16 135 //
Chris@16 136 // Add a leading paren with index zero to give recursions a target:
Chris@16 137 //
Chris@16 138 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
Chris@16 139 br->index = 0;
Chris@16 140 br->icase = this->flags() & regbase::icase;
Chris@16 141 break;
Chris@16 142 }
Chris@16 143 case regbase::basic_syntax_group:
Chris@16 144 m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
Chris@16 145 break;
Chris@16 146 case regbase::literal:
Chris@16 147 m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
Chris@16 148 break;
Chris@16 149 default:
Chris@16 150 // Ooops, someone has managed to set more than one of the main option flags,
Chris@16 151 // so this must be an error:
Chris@16 152 fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
Chris@16 153 return;
Chris@16 154 }
Chris@16 155
Chris@16 156 // parse all our characters:
Chris@16 157 bool result = parse_all();
Chris@16 158 //
Chris@16 159 // Unwind our alternatives:
Chris@16 160 //
Chris@16 161 unwind_alts(-1);
Chris@16 162 // reset l_flags as a global scope (?imsx) may have altered them:
Chris@16 163 this->flags(l_flags);
Chris@16 164 // if we haven't gobbled up all the characters then we must
Chris@16 165 // have had an unexpected ')' :
Chris@16 166 if(!result)
Chris@16 167 {
Chris@16 168 fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis.");
Chris@16 169 return;
Chris@16 170 }
Chris@16 171 // if an error has been set then give up now:
Chris@16 172 if(this->m_pdata->m_status)
Chris@16 173 return;
Chris@16 174 // fill in our sub-expression count:
Chris@16 175 this->m_pdata->m_mark_count = 1 + m_mark_count;
Chris@16 176 this->finalize(p1, p2);
Chris@16 177 }
Chris@16 178
Chris@16 179 template <class charT, class traits>
Chris@16 180 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
Chris@16 181 {
Chris@16 182 // get the error message:
Chris@16 183 std::string message = this->m_pdata->m_ptraits->error_string(error_code);
Chris@16 184 fail(error_code, position, message);
Chris@16 185 }
Chris@16 186
Chris@16 187 template <class charT, class traits>
Chris@16 188 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
Chris@16 189 {
Chris@16 190 if(0 == this->m_pdata->m_status) // update the error code if not already set
Chris@16 191 this->m_pdata->m_status = error_code;
Chris@16 192 m_position = m_end; // don't bother parsing anything else
Chris@16 193
Chris@16 194 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
Chris@16 195 //
Chris@16 196 // Augment error message with the regular expression text:
Chris@16 197 //
Chris@16 198 if(start_pos == position)
Chris@16 199 start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
Chris@16 200 std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
Chris@16 201 if(error_code != regex_constants::error_empty)
Chris@16 202 {
Chris@16 203 if((start_pos != 0) || (end_pos != (m_end - m_base)))
Chris@16 204 message += " The error occurred while parsing the regular expression fragment: '";
Chris@16 205 else
Chris@16 206 message += " The error occurred while parsing the regular expression: '";
Chris@16 207 if(start_pos != end_pos)
Chris@16 208 {
Chris@16 209 message += std::string(m_base + start_pos, m_base + position);
Chris@16 210 message += ">>>HERE>>>";
Chris@16 211 message += std::string(m_base + position, m_base + end_pos);
Chris@16 212 }
Chris@16 213 message += "'.";
Chris@16 214 }
Chris@16 215 #endif
Chris@16 216
Chris@16 217 #ifndef BOOST_NO_EXCEPTIONS
Chris@16 218 if(0 == (this->flags() & regex_constants::no_except))
Chris@16 219 {
Chris@16 220 boost::regex_error e(message, error_code, position);
Chris@16 221 e.raise();
Chris@16 222 }
Chris@16 223 #else
Chris@16 224 (void)position; // suppress warnings.
Chris@16 225 #endif
Chris@16 226 }
Chris@16 227
Chris@16 228 template <class charT, class traits>
Chris@16 229 bool basic_regex_parser<charT, traits>::parse_all()
Chris@16 230 {
Chris@16 231 bool result = true;
Chris@16 232 while(result && (m_position != m_end))
Chris@16 233 {
Chris@16 234 result = (this->*m_parser_proc)();
Chris@16 235 }
Chris@16 236 return result;
Chris@16 237 }
Chris@16 238
Chris@16 239 #ifdef BOOST_MSVC
Chris@16 240 #pragma warning(push)
Chris@16 241 #pragma warning(disable:4702)
Chris@16 242 #endif
Chris@16 243 template <class charT, class traits>
Chris@16 244 bool basic_regex_parser<charT, traits>::parse_basic()
Chris@16 245 {
Chris@16 246 switch(this->m_traits.syntax_type(*m_position))
Chris@16 247 {
Chris@16 248 case regex_constants::syntax_escape:
Chris@16 249 return parse_basic_escape();
Chris@16 250 case regex_constants::syntax_dot:
Chris@16 251 return parse_match_any();
Chris@16 252 case regex_constants::syntax_caret:
Chris@16 253 ++m_position;
Chris@16 254 this->append_state(syntax_element_start_line);
Chris@16 255 break;
Chris@16 256 case regex_constants::syntax_dollar:
Chris@16 257 ++m_position;
Chris@16 258 this->append_state(syntax_element_end_line);
Chris@16 259 break;
Chris@16 260 case regex_constants::syntax_star:
Chris@16 261 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
Chris@16 262 return parse_literal();
Chris@16 263 else
Chris@16 264 {
Chris@16 265 ++m_position;
Chris@16 266 return parse_repeat();
Chris@16 267 }
Chris@16 268 case regex_constants::syntax_plus:
Chris@16 269 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
Chris@16 270 return parse_literal();
Chris@16 271 else
Chris@16 272 {
Chris@16 273 ++m_position;
Chris@16 274 return parse_repeat(1);
Chris@16 275 }
Chris@16 276 case regex_constants::syntax_question:
Chris@16 277 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
Chris@16 278 return parse_literal();
Chris@16 279 else
Chris@16 280 {
Chris@16 281 ++m_position;
Chris@16 282 return parse_repeat(0, 1);
Chris@16 283 }
Chris@16 284 case regex_constants::syntax_open_set:
Chris@16 285 return parse_set();
Chris@16 286 case regex_constants::syntax_newline:
Chris@16 287 if(this->flags() & regbase::newline_alt)
Chris@16 288 return parse_alt();
Chris@16 289 else
Chris@16 290 return parse_literal();
Chris@16 291 default:
Chris@16 292 return parse_literal();
Chris@16 293 }
Chris@16 294 return true;
Chris@16 295 }
Chris@16 296
Chris@16 297 template <class charT, class traits>
Chris@16 298 bool basic_regex_parser<charT, traits>::parse_extended()
Chris@16 299 {
Chris@16 300 bool result = true;
Chris@16 301 switch(this->m_traits.syntax_type(*m_position))
Chris@16 302 {
Chris@16 303 case regex_constants::syntax_open_mark:
Chris@16 304 return parse_open_paren();
Chris@16 305 case regex_constants::syntax_close_mark:
Chris@16 306 return false;
Chris@16 307 case regex_constants::syntax_escape:
Chris@16 308 return parse_extended_escape();
Chris@16 309 case regex_constants::syntax_dot:
Chris@16 310 return parse_match_any();
Chris@16 311 case regex_constants::syntax_caret:
Chris@16 312 ++m_position;
Chris@16 313 this->append_state(
Chris@16 314 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
Chris@16 315 break;
Chris@16 316 case regex_constants::syntax_dollar:
Chris@16 317 ++m_position;
Chris@16 318 this->append_state(
Chris@16 319 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
Chris@16 320 break;
Chris@16 321 case regex_constants::syntax_star:
Chris@16 322 if(m_position == this->m_base)
Chris@16 323 {
Chris@16 324 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
Chris@16 325 return false;
Chris@16 326 }
Chris@16 327 ++m_position;
Chris@16 328 return parse_repeat();
Chris@16 329 case regex_constants::syntax_question:
Chris@16 330 if(m_position == this->m_base)
Chris@16 331 {
Chris@16 332 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
Chris@16 333 return false;
Chris@16 334 }
Chris@16 335 ++m_position;
Chris@16 336 return parse_repeat(0,1);
Chris@16 337 case regex_constants::syntax_plus:
Chris@16 338 if(m_position == this->m_base)
Chris@16 339 {
Chris@16 340 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
Chris@16 341 return false;
Chris@16 342 }
Chris@16 343 ++m_position;
Chris@16 344 return parse_repeat(1);
Chris@16 345 case regex_constants::syntax_open_brace:
Chris@16 346 ++m_position;
Chris@16 347 return parse_repeat_range(false);
Chris@16 348 case regex_constants::syntax_close_brace:
Chris@16 349 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
Chris@16 350 return false;
Chris@16 351 case regex_constants::syntax_or:
Chris@16 352 return parse_alt();
Chris@16 353 case regex_constants::syntax_open_set:
Chris@16 354 return parse_set();
Chris@16 355 case regex_constants::syntax_newline:
Chris@16 356 if(this->flags() & regbase::newline_alt)
Chris@16 357 return parse_alt();
Chris@16 358 else
Chris@16 359 return parse_literal();
Chris@16 360 case regex_constants::syntax_hash:
Chris@16 361 //
Chris@16 362 // If we have a mod_x flag set, then skip until
Chris@16 363 // we get to a newline character:
Chris@16 364 //
Chris@16 365 if((this->flags()
Chris@16 366 & (regbase::no_perl_ex|regbase::mod_x))
Chris@16 367 == regbase::mod_x)
Chris@16 368 {
Chris@16 369 while((m_position != m_end) && !is_separator(*m_position++)){}
Chris@16 370 return true;
Chris@16 371 }
Chris@16 372 BOOST_FALLTHROUGH;
Chris@16 373 default:
Chris@16 374 result = parse_literal();
Chris@16 375 break;
Chris@16 376 }
Chris@16 377 return result;
Chris@16 378 }
Chris@16 379 #ifdef BOOST_MSVC
Chris@16 380 #pragma warning(pop)
Chris@16 381 #endif
Chris@16 382
Chris@16 383 template <class charT, class traits>
Chris@16 384 bool basic_regex_parser<charT, traits>::parse_literal()
Chris@16 385 {
Chris@16 386 // append this as a literal provided it's not a space character
Chris@16 387 // or the perl option regbase::mod_x is not set:
Chris@16 388 if(
Chris@16 389 ((this->flags()
Chris@16 390 & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
Chris@16 391 != regbase::mod_x)
Chris@16 392 || !this->m_traits.isctype(*m_position, this->m_mask_space))
Chris@16 393 this->append_literal(*m_position);
Chris@16 394 ++m_position;
Chris@16 395 return true;
Chris@16 396 }
Chris@16 397
Chris@16 398 template <class charT, class traits>
Chris@16 399 bool basic_regex_parser<charT, traits>::parse_open_paren()
Chris@16 400 {
Chris@16 401 //
Chris@16 402 // skip the '(' and error check:
Chris@16 403 //
Chris@16 404 if(++m_position == m_end)
Chris@16 405 {
Chris@16 406 fail(regex_constants::error_paren, m_position - m_base);
Chris@16 407 return false;
Chris@16 408 }
Chris@16 409 //
Chris@16 410 // begin by checking for a perl-style (?...) extension:
Chris@16 411 //
Chris@16 412 if(
Chris@16 413 ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
Chris@16 414 || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
Chris@16 415 )
Chris@16 416 {
Chris@16 417 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
Chris@16 418 return parse_perl_extension();
Chris@16 419 }
Chris@16 420 //
Chris@16 421 // update our mark count, and append the required state:
Chris@16 422 //
Chris@16 423 unsigned markid = 0;
Chris@16 424 if(0 == (this->flags() & regbase::nosubs))
Chris@16 425 {
Chris@16 426 markid = ++m_mark_count;
Chris@16 427 #ifndef BOOST_NO_STD_DISTANCE
Chris@16 428 if(this->flags() & regbase::save_subexpression_location)
Chris@16 429 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
Chris@16 430 #else
Chris@16 431 if(this->flags() & regbase::save_subexpression_location)
Chris@16 432 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
Chris@16 433 #endif
Chris@16 434 }
Chris@16 435 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
Chris@16 436 pb->index = markid;
Chris@16 437 pb->icase = this->flags() & regbase::icase;
Chris@16 438 std::ptrdiff_t last_paren_start = this->getoffset(pb);
Chris@16 439 // back up insertion point for alternations, and set new point:
Chris@16 440 std::ptrdiff_t last_alt_point = m_alt_insert_point;
Chris@16 441 this->m_pdata->m_data.align();
Chris@16 442 m_alt_insert_point = this->m_pdata->m_data.size();
Chris@16 443 //
Chris@16 444 // back up the current flags in case we have a nested (?imsx) group:
Chris@16 445 //
Chris@16 446 regex_constants::syntax_option_type opts = this->flags();
Chris@16 447 bool old_case_change = m_has_case_change;
Chris@16 448 m_has_case_change = false; // no changes to this scope as yet...
Chris@16 449 //
Chris@16 450 // Back up branch reset data in case we have a nested (?|...)
Chris@16 451 //
Chris@16 452 int mark_reset = m_mark_reset;
Chris@16 453 m_mark_reset = -1;
Chris@16 454 //
Chris@16 455 // now recursively add more states, this will terminate when we get to a
Chris@16 456 // matching ')' :
Chris@16 457 //
Chris@16 458 parse_all();
Chris@16 459 //
Chris@16 460 // Unwind pushed alternatives:
Chris@16 461 //
Chris@16 462 if(0 == unwind_alts(last_paren_start))
Chris@16 463 return false;
Chris@16 464 //
Chris@16 465 // restore flags:
Chris@16 466 //
Chris@16 467 if(m_has_case_change)
Chris@16 468 {
Chris@16 469 // the case has changed in one or more of the alternatives
Chris@16 470 // within the scoped (...) block: we have to add a state
Chris@16 471 // to reset the case sensitivity:
Chris@16 472 static_cast<re_case*>(
Chris@16 473 this->append_state(syntax_element_toggle_case, sizeof(re_case))
Chris@16 474 )->icase = opts & regbase::icase;
Chris@16 475 }
Chris@16 476 this->flags(opts);
Chris@16 477 m_has_case_change = old_case_change;
Chris@16 478 //
Chris@16 479 // restore branch reset:
Chris@16 480 //
Chris@16 481 m_mark_reset = mark_reset;
Chris@16 482 //
Chris@16 483 // we either have a ')' or we have run out of characters prematurely:
Chris@16 484 //
Chris@16 485 if(m_position == m_end)
Chris@16 486 {
Chris@16 487 this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
Chris@16 488 return false;
Chris@16 489 }
Chris@16 490 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
Chris@16 491 #ifndef BOOST_NO_STD_DISTANCE
Chris@16 492 if(markid && (this->flags() & regbase::save_subexpression_location))
Chris@16 493 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
Chris@16 494 #else
Chris@16 495 if(markid && (this->flags() & regbase::save_subexpression_location))
Chris@16 496 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
Chris@16 497 #endif
Chris@16 498 ++m_position;
Chris@16 499 //
Chris@16 500 // append closing parenthesis state:
Chris@16 501 //
Chris@16 502 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
Chris@16 503 pb->index = markid;
Chris@16 504 pb->icase = this->flags() & regbase::icase;
Chris@16 505 this->m_paren_start = last_paren_start;
Chris@16 506 //
Chris@16 507 // restore the alternate insertion point:
Chris@16 508 //
Chris@16 509 this->m_alt_insert_point = last_alt_point;
Chris@16 510 //
Chris@16 511 // allow backrefs to this mark:
Chris@16 512 //
Chris@16 513 if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
Chris@16 514 this->m_backrefs |= 1u << (markid - 1);
Chris@16 515
Chris@16 516 return true;
Chris@16 517 }
Chris@16 518
Chris@16 519 template <class charT, class traits>
Chris@16 520 bool basic_regex_parser<charT, traits>::parse_basic_escape()
Chris@16 521 {
Chris@16 522 ++m_position;
Chris@16 523 bool result = true;
Chris@16 524 switch(this->m_traits.escape_syntax_type(*m_position))
Chris@16 525 {
Chris@16 526 case regex_constants::syntax_open_mark:
Chris@16 527 return parse_open_paren();
Chris@16 528 case regex_constants::syntax_close_mark:
Chris@16 529 return false;
Chris@16 530 case regex_constants::syntax_plus:
Chris@16 531 if(this->flags() & regex_constants::bk_plus_qm)
Chris@16 532 {
Chris@16 533 ++m_position;
Chris@16 534 return parse_repeat(1);
Chris@16 535 }
Chris@16 536 else
Chris@16 537 return parse_literal();
Chris@16 538 case regex_constants::syntax_question:
Chris@16 539 if(this->flags() & regex_constants::bk_plus_qm)
Chris@16 540 {
Chris@16 541 ++m_position;
Chris@16 542 return parse_repeat(0, 1);
Chris@16 543 }
Chris@16 544 else
Chris@16 545 return parse_literal();
Chris@16 546 case regex_constants::syntax_open_brace:
Chris@16 547 if(this->flags() & regbase::no_intervals)
Chris@16 548 return parse_literal();
Chris@16 549 ++m_position;
Chris@16 550 return parse_repeat_range(true);
Chris@16 551 case regex_constants::syntax_close_brace:
Chris@16 552 if(this->flags() & regbase::no_intervals)
Chris@16 553 return parse_literal();
Chris@16 554 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
Chris@16 555 return false;
Chris@16 556 case regex_constants::syntax_or:
Chris@16 557 if(this->flags() & regbase::bk_vbar)
Chris@16 558 return parse_alt();
Chris@16 559 else
Chris@16 560 result = parse_literal();
Chris@16 561 break;
Chris@16 562 case regex_constants::syntax_digit:
Chris@16 563 return parse_backref();
Chris@16 564 case regex_constants::escape_type_start_buffer:
Chris@16 565 if(this->flags() & regbase::emacs_ex)
Chris@16 566 {
Chris@16 567 ++m_position;
Chris@16 568 this->append_state(syntax_element_buffer_start);
Chris@16 569 }
Chris@16 570 else
Chris@16 571 result = parse_literal();
Chris@16 572 break;
Chris@16 573 case regex_constants::escape_type_end_buffer:
Chris@16 574 if(this->flags() & regbase::emacs_ex)
Chris@16 575 {
Chris@16 576 ++m_position;
Chris@16 577 this->append_state(syntax_element_buffer_end);
Chris@16 578 }
Chris@16 579 else
Chris@16 580 result = parse_literal();
Chris@16 581 break;
Chris@16 582 case regex_constants::escape_type_word_assert:
Chris@16 583 if(this->flags() & regbase::emacs_ex)
Chris@16 584 {
Chris@16 585 ++m_position;
Chris@16 586 this->append_state(syntax_element_word_boundary);
Chris@16 587 }
Chris@16 588 else
Chris@16 589 result = parse_literal();
Chris@16 590 break;
Chris@16 591 case regex_constants::escape_type_not_word_assert:
Chris@16 592 if(this->flags() & regbase::emacs_ex)
Chris@16 593 {
Chris@16 594 ++m_position;
Chris@16 595 this->append_state(syntax_element_within_word);
Chris@16 596 }
Chris@16 597 else
Chris@16 598 result = parse_literal();
Chris@16 599 break;
Chris@16 600 case regex_constants::escape_type_left_word:
Chris@16 601 if(this->flags() & regbase::emacs_ex)
Chris@16 602 {
Chris@16 603 ++m_position;
Chris@16 604 this->append_state(syntax_element_word_start);
Chris@16 605 }
Chris@16 606 else
Chris@16 607 result = parse_literal();
Chris@16 608 break;
Chris@16 609 case regex_constants::escape_type_right_word:
Chris@16 610 if(this->flags() & regbase::emacs_ex)
Chris@16 611 {
Chris@16 612 ++m_position;
Chris@16 613 this->append_state(syntax_element_word_end);
Chris@16 614 }
Chris@16 615 else
Chris@16 616 result = parse_literal();
Chris@16 617 break;
Chris@16 618 default:
Chris@16 619 if(this->flags() & regbase::emacs_ex)
Chris@16 620 {
Chris@16 621 bool negate = true;
Chris@16 622 switch(*m_position)
Chris@16 623 {
Chris@16 624 case 'w':
Chris@16 625 negate = false;
Chris@16 626 BOOST_FALLTHROUGH;
Chris@16 627 case 'W':
Chris@16 628 {
Chris@16 629 basic_char_set<charT, traits> char_set;
Chris@16 630 if(negate)
Chris@16 631 char_set.negate();
Chris@16 632 char_set.add_class(this->m_word_mask);
Chris@16 633 if(0 == this->append_set(char_set))
Chris@16 634 {
Chris@16 635 fail(regex_constants::error_ctype, m_position - m_base);
Chris@16 636 return false;
Chris@16 637 }
Chris@16 638 ++m_position;
Chris@16 639 return true;
Chris@16 640 }
Chris@16 641 case 's':
Chris@16 642 negate = false;
Chris@16 643 BOOST_FALLTHROUGH;
Chris@16 644 case 'S':
Chris@16 645 return add_emacs_code(negate);
Chris@16 646 case 'c':
Chris@16 647 case 'C':
Chris@16 648 // not supported yet:
Chris@16 649 fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
Chris@16 650 return false;
Chris@16 651 default:
Chris@16 652 break;
Chris@16 653 }
Chris@16 654 }
Chris@16 655 result = parse_literal();
Chris@16 656 break;
Chris@16 657 }
Chris@16 658 return result;
Chris@16 659 }
Chris@16 660
Chris@16 661 template <class charT, class traits>
Chris@16 662 bool basic_regex_parser<charT, traits>::parse_extended_escape()
Chris@16 663 {
Chris@16 664 ++m_position;
Chris@16 665 if(m_position == m_end)
Chris@16 666 {
Chris@16 667 fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
Chris@16 668 return false;
Chris@16 669 }
Chris@16 670 bool negate = false; // in case this is a character class escape: \w \d etc
Chris@16 671 switch(this->m_traits.escape_syntax_type(*m_position))
Chris@16 672 {
Chris@16 673 case regex_constants::escape_type_not_class:
Chris@16 674 negate = true;
Chris@16 675 BOOST_FALLTHROUGH;
Chris@16 676 case regex_constants::escape_type_class:
Chris@16 677 {
Chris@16 678 escape_type_class_jump:
Chris@16 679 typedef typename traits::char_class_type m_type;
Chris@16 680 m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
Chris@16 681 if(m != 0)
Chris@16 682 {
Chris@16 683 basic_char_set<charT, traits> char_set;
Chris@16 684 if(negate)
Chris@16 685 char_set.negate();
Chris@16 686 char_set.add_class(m);
Chris@16 687 if(0 == this->append_set(char_set))
Chris@16 688 {
Chris@16 689 fail(regex_constants::error_ctype, m_position - m_base);
Chris@16 690 return false;
Chris@16 691 }
Chris@16 692 ++m_position;
Chris@16 693 return true;
Chris@16 694 }
Chris@16 695 //
Chris@16 696 // not a class, just a regular unknown escape:
Chris@16 697 //
Chris@16 698 this->append_literal(unescape_character());
Chris@16 699 break;
Chris@16 700 }
Chris@16 701 case regex_constants::syntax_digit:
Chris@16 702 return parse_backref();
Chris@16 703 case regex_constants::escape_type_left_word:
Chris@16 704 ++m_position;
Chris@16 705 this->append_state(syntax_element_word_start);
Chris@16 706 break;
Chris@16 707 case regex_constants::escape_type_right_word:
Chris@16 708 ++m_position;
Chris@16 709 this->append_state(syntax_element_word_end);
Chris@16 710 break;
Chris@16 711 case regex_constants::escape_type_start_buffer:
Chris@16 712 ++m_position;
Chris@16 713 this->append_state(syntax_element_buffer_start);
Chris@16 714 break;
Chris@16 715 case regex_constants::escape_type_end_buffer:
Chris@16 716 ++m_position;
Chris@16 717 this->append_state(syntax_element_buffer_end);
Chris@16 718 break;
Chris@16 719 case regex_constants::escape_type_word_assert:
Chris@16 720 ++m_position;
Chris@16 721 this->append_state(syntax_element_word_boundary);
Chris@16 722 break;
Chris@16 723 case regex_constants::escape_type_not_word_assert:
Chris@16 724 ++m_position;
Chris@16 725 this->append_state(syntax_element_within_word);
Chris@16 726 break;
Chris@16 727 case regex_constants::escape_type_Z:
Chris@16 728 ++m_position;
Chris@16 729 this->append_state(syntax_element_soft_buffer_end);
Chris@16 730 break;
Chris@16 731 case regex_constants::escape_type_Q:
Chris@16 732 return parse_QE();
Chris@16 733 case regex_constants::escape_type_C:
Chris@16 734 return parse_match_any();
Chris@16 735 case regex_constants::escape_type_X:
Chris@16 736 ++m_position;
Chris@16 737 this->append_state(syntax_element_combining);
Chris@16 738 break;
Chris@16 739 case regex_constants::escape_type_G:
Chris@16 740 ++m_position;
Chris@16 741 this->append_state(syntax_element_restart_continue);
Chris@16 742 break;
Chris@16 743 case regex_constants::escape_type_not_property:
Chris@16 744 negate = true;
Chris@16 745 BOOST_FALLTHROUGH;
Chris@16 746 case regex_constants::escape_type_property:
Chris@16 747 {
Chris@16 748 ++m_position;
Chris@16 749 char_class_type m;
Chris@16 750 if(m_position == m_end)
Chris@16 751 {
Chris@16 752 fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
Chris@16 753 return false;
Chris@16 754 }
Chris@16 755 // maybe have \p{ddd}
Chris@16 756 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
Chris@16 757 {
Chris@16 758 const charT* base = m_position;
Chris@16 759 // skip forward until we find enclosing brace:
Chris@16 760 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
Chris@16 761 ++m_position;
Chris@16 762 if(m_position == m_end)
Chris@16 763 {
Chris@16 764 fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
Chris@16 765 return false;
Chris@16 766 }
Chris@16 767 m = this->m_traits.lookup_classname(++base, m_position++);
Chris@16 768 }
Chris@16 769 else
Chris@16 770 {
Chris@16 771 m = this->m_traits.lookup_classname(m_position, m_position+1);
Chris@16 772 ++m_position;
Chris@16 773 }
Chris@16 774 if(m != 0)
Chris@16 775 {
Chris@16 776 basic_char_set<charT, traits> char_set;
Chris@16 777 if(negate)
Chris@16 778 char_set.negate();
Chris@16 779 char_set.add_class(m);
Chris@16 780 if(0 == this->append_set(char_set))
Chris@16 781 {
Chris@16 782 fail(regex_constants::error_ctype, m_position - m_base);
Chris@16 783 return false;
Chris@16 784 }
Chris@16 785 return true;
Chris@16 786 }
Chris@16 787 fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
Chris@16 788 return false;
Chris@16 789 }
Chris@16 790 case regex_constants::escape_type_reset_start_mark:
Chris@16 791 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
Chris@16 792 {
Chris@16 793 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
Chris@16 794 pb->index = -5;
Chris@16 795 pb->icase = this->flags() & regbase::icase;
Chris@16 796 this->m_pdata->m_data.align();
Chris@16 797 ++m_position;
Chris@16 798 return true;
Chris@16 799 }
Chris@16 800 goto escape_type_class_jump;
Chris@16 801 case regex_constants::escape_type_line_ending:
Chris@16 802 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
Chris@16 803 {
Chris@16 804 const charT* e = get_escape_R_string<charT>();
Chris@16 805 const charT* old_position = m_position;
Chris@16 806 const charT* old_end = m_end;
Chris@16 807 const charT* old_base = m_base;
Chris@16 808 m_position = e;
Chris@16 809 m_base = e;
Chris@16 810 m_end = e + traits::length(e);
Chris@16 811 bool r = parse_all();
Chris@16 812 m_position = ++old_position;
Chris@16 813 m_end = old_end;
Chris@16 814 m_base = old_base;
Chris@16 815 return r;
Chris@16 816 }
Chris@16 817 goto escape_type_class_jump;
Chris@16 818 case regex_constants::escape_type_extended_backref:
Chris@16 819 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
Chris@16 820 {
Chris@16 821 bool have_brace = false;
Chris@16 822 bool negative = false;
Chris@16 823 static const char* incomplete_message = "Incomplete \\g escape found.";
Chris@16 824 if(++m_position == m_end)
Chris@16 825 {
Chris@16 826 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
Chris@16 827 return false;
Chris@16 828 }
Chris@16 829 // maybe have \g{ddd}
Chris@16 830 regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
Chris@16 831 regex_constants::syntax_type syn_end = 0;
Chris@16 832 if((syn == regex_constants::syntax_open_brace)
Chris@16 833 || (syn == regex_constants::escape_type_left_word)
Chris@16 834 || (syn == regex_constants::escape_type_end_buffer))
Chris@16 835 {
Chris@16 836 if(++m_position == m_end)
Chris@16 837 {
Chris@16 838 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
Chris@16 839 return false;
Chris@16 840 }
Chris@16 841 have_brace = true;
Chris@16 842 switch(syn)
Chris@16 843 {
Chris@16 844 case regex_constants::syntax_open_brace:
Chris@16 845 syn_end = regex_constants::syntax_close_brace;
Chris@16 846 break;
Chris@16 847 case regex_constants::escape_type_left_word:
Chris@16 848 syn_end = regex_constants::escape_type_right_word;
Chris@16 849 break;
Chris@16 850 default:
Chris@16 851 syn_end = regex_constants::escape_type_end_buffer;
Chris@16 852 break;
Chris@16 853 }
Chris@16 854 }
Chris@16 855 negative = (*m_position == static_cast<charT>('-'));
Chris@16 856 if((negative) && (++m_position == m_end))
Chris@16 857 {
Chris@16 858 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
Chris@16 859 return false;
Chris@16 860 }
Chris@16 861 const charT* pc = m_position;
Chris@16 862 int i = this->m_traits.toi(pc, m_end, 10);
Chris@16 863 if((i < 0) && syn_end)
Chris@16 864 {
Chris@16 865 // Check for a named capture, get the leftmost one if there is more than one:
Chris@16 866 const charT* base = m_position;
Chris@16 867 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
Chris@16 868 {
Chris@16 869 ++m_position;
Chris@16 870 }
Chris@16 871 i = hash_value_from_capture_name(base, m_position);
Chris@16 872 pc = m_position;
Chris@16 873 }
Chris@16 874 if(negative)
Chris@16 875 i = 1 + m_mark_count - i;
Chris@16 876 if(((i > 0) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
Chris@16 877 {
Chris@16 878 m_position = pc;
Chris@16 879 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
Chris@16 880 pb->index = i;
Chris@16 881 pb->icase = this->flags() & regbase::icase;
Chris@16 882 }
Chris@16 883 else
Chris@16 884 {
Chris@16 885 fail(regex_constants::error_backref, m_position - m_base);
Chris@16 886 return false;
Chris@16 887 }
Chris@16 888 m_position = pc;
Chris@16 889 if(have_brace)
Chris@16 890 {
Chris@16 891 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
Chris@16 892 {
Chris@16 893 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
Chris@16 894 return false;
Chris@16 895 }
Chris@16 896 ++m_position;
Chris@16 897 }
Chris@16 898 return true;
Chris@16 899 }
Chris@16 900 goto escape_type_class_jump;
Chris@16 901 case regex_constants::escape_type_control_v:
Chris@16 902 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
Chris@16 903 goto escape_type_class_jump;
Chris@16 904 BOOST_FALLTHROUGH;
Chris@16 905 default:
Chris@16 906 this->append_literal(unescape_character());
Chris@16 907 break;
Chris@16 908 }
Chris@16 909 return true;
Chris@16 910 }
Chris@16 911
Chris@16 912 template <class charT, class traits>
Chris@16 913 bool basic_regex_parser<charT, traits>::parse_match_any()
Chris@16 914 {
Chris@16 915 //
Chris@16 916 // we have a '.' that can match any character:
Chris@16 917 //
Chris@16 918 ++m_position;
Chris@16 919 static_cast<re_dot*>(
Chris@16 920 this->append_state(syntax_element_wild, sizeof(re_dot))
Chris@16 921 )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
Chris@16 922 ? re_detail::force_not_newline
Chris@16 923 : this->flags() & regbase::mod_s ?
Chris@16 924 re_detail::force_newline : re_detail::dont_care);
Chris@16 925 return true;
Chris@16 926 }
Chris@16 927
Chris@16 928 template <class charT, class traits>
Chris@16 929 bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
Chris@16 930 {
Chris@16 931 bool greedy = true;
Chris@16 932 bool pocessive = false;
Chris@16 933 std::size_t insert_point;
Chris@16 934 //
Chris@16 935 // when we get to here we may have a non-greedy ? mark still to come:
Chris@16 936 //
Chris@16 937 if((m_position != m_end)
Chris@16 938 && (
Chris@16 939 (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
Chris@16 940 || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
Chris@16 941 )
Chris@16 942 )
Chris@16 943 {
Chris@16 944 // OK we have a perl or emacs regex, check for a '?':
Chris@16 945 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
Chris@16 946 {
Chris@16 947 greedy = false;
Chris@16 948 ++m_position;
Chris@16 949 }
Chris@16 950 // for perl regexes only check for pocessive ++ repeats.
Chris@16 951 if((m_position != m_end)
Chris@16 952 && (0 == (this->flags() & regbase::main_option_type))
Chris@16 953 && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
Chris@16 954 {
Chris@16 955 pocessive = true;
Chris@16 956 ++m_position;
Chris@16 957 }
Chris@16 958 }
Chris@16 959 if(0 == this->m_last_state)
Chris@16 960 {
Chris@16 961 fail(regex_constants::error_badrepeat, ::boost::re_detail::distance(m_base, m_position), "Nothing to repeat.");
Chris@16 962 return false;
Chris@16 963 }
Chris@16 964 if(this->m_last_state->type == syntax_element_endmark)
Chris@16 965 {
Chris@16 966 // insert a repeat before the '(' matching the last ')':
Chris@16 967 insert_point = this->m_paren_start;
Chris@16 968 }
Chris@16 969 else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
Chris@16 970 {
Chris@16 971 // the last state was a literal with more than one character, split it in two:
Chris@16 972 re_literal* lit = static_cast<re_literal*>(this->m_last_state);
Chris@16 973 charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
Chris@16 974 --(lit->length);
Chris@16 975 // now append new state:
Chris@16 976 lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
Chris@16 977 lit->length = 1;
Chris@16 978 (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
Chris@16 979 insert_point = this->getoffset(this->m_last_state);
Chris@16 980 }
Chris@16 981 else
Chris@16 982 {
Chris@16 983 // repeat the last state whatever it was, need to add some error checking here:
Chris@16 984 switch(this->m_last_state->type)
Chris@16 985 {
Chris@16 986 case syntax_element_start_line:
Chris@16 987 case syntax_element_end_line:
Chris@16 988 case syntax_element_word_boundary:
Chris@16 989 case syntax_element_within_word:
Chris@16 990 case syntax_element_word_start:
Chris@16 991 case syntax_element_word_end:
Chris@16 992 case syntax_element_buffer_start:
Chris@16 993 case syntax_element_buffer_end:
Chris@16 994 case syntax_element_alt:
Chris@16 995 case syntax_element_soft_buffer_end:
Chris@16 996 case syntax_element_restart_continue:
Chris@16 997 case syntax_element_jump:
Chris@16 998 case syntax_element_startmark:
Chris@16 999 case syntax_element_backstep:
Chris@16 1000 // can't legally repeat any of the above:
Chris@16 1001 fail(regex_constants::error_badrepeat, m_position - m_base);
Chris@16 1002 return false;
Chris@16 1003 default:
Chris@16 1004 // do nothing...
Chris@16 1005 break;
Chris@16 1006 }
Chris@16 1007 insert_point = this->getoffset(this->m_last_state);
Chris@16 1008 }
Chris@16 1009 //
Chris@16 1010 // OK we now know what to repeat, so insert the repeat around it:
Chris@16 1011 //
Chris@16 1012 re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
Chris@16 1013 rep->min = low;
Chris@16 1014 rep->max = high;
Chris@16 1015 rep->greedy = greedy;
Chris@16 1016 rep->leading = false;
Chris@16 1017 // store our repeater position for later:
Chris@16 1018 std::ptrdiff_t rep_off = this->getoffset(rep);
Chris@16 1019 // and append a back jump to the repeat:
Chris@16 1020 re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
Chris@16 1021 jmp->alt.i = rep_off - this->getoffset(jmp);
Chris@16 1022 this->m_pdata->m_data.align();
Chris@16 1023 // now fill in the alt jump for the repeat:
Chris@16 1024 rep = static_cast<re_repeat*>(this->getaddress(rep_off));
Chris@16 1025 rep->alt.i = this->m_pdata->m_data.size() - rep_off;
Chris@16 1026 //
Chris@16 1027 // If the repeat is pocessive then bracket the repeat with a (?>...)
Chris@16 1028 // independent sub-expression construct:
Chris@16 1029 //
Chris@16 1030 if(pocessive)
Chris@16 1031 {
Chris@16 1032 if(m_position != m_end)
Chris@16 1033 {
Chris@16 1034 //
Chris@16 1035 // Check for illegal following quantifier, we have to do this here, because
Chris@16 1036 // the extra states we insert below circumvents our usual error checking :-(
Chris@16 1037 //
Chris@16 1038 switch(this->m_traits.syntax_type(*m_position))
Chris@16 1039 {
Chris@16 1040 case regex_constants::syntax_star:
Chris@16 1041 case regex_constants::syntax_plus:
Chris@16 1042 case regex_constants::syntax_question:
Chris@16 1043 case regex_constants::syntax_open_brace:
Chris@16 1044 fail(regex_constants::error_badrepeat, m_position - m_base);
Chris@16 1045 return false;
Chris@16 1046 }
Chris@16 1047 }
Chris@16 1048 re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
Chris@16 1049 pb->index = -3;
Chris@16 1050 pb->icase = this->flags() & regbase::icase;
Chris@16 1051 jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
Chris@16 1052 this->m_pdata->m_data.align();
Chris@16 1053 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
Chris@16 1054 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
Chris@16 1055 pb->index = -3;
Chris@16 1056 pb->icase = this->flags() & regbase::icase;
Chris@16 1057 }
Chris@16 1058 return true;
Chris@16 1059 }
Chris@16 1060
Chris@16 1061 template <class charT, class traits>
Chris@16 1062 bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
Chris@16 1063 {
Chris@16 1064 static const char* incomplete_message = "Missing } in quantified repetition.";
Chris@16 1065 //
Chris@16 1066 // parse a repeat-range:
Chris@16 1067 //
Chris@16 1068 std::size_t min, max;
Chris@16 1069 int v;
Chris@16 1070 // skip whitespace:
Chris@16 1071 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
Chris@16 1072 ++m_position;
Chris@16 1073 if(this->m_position == this->m_end)
Chris@16 1074 {
Chris@16 1075 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
Chris@16 1076 {
Chris@16 1077 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
Chris@16 1078 return false;
Chris@16 1079 }
Chris@16 1080 // Treat the opening '{' as a literal character, rewind to start of error:
Chris@16 1081 --m_position;
Chris@16 1082 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
Chris@16 1083 return parse_literal();
Chris@16 1084 }
Chris@16 1085 // get min:
Chris@16 1086 v = this->m_traits.toi(m_position, m_end, 10);
Chris@16 1087 // skip whitespace:
Chris@16 1088 if(v < 0)
Chris@16 1089 {
Chris@16 1090 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
Chris@16 1091 {
Chris@16 1092 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
Chris@16 1093 return false;
Chris@16 1094 }
Chris@16 1095 // Treat the opening '{' as a literal character, rewind to start of error:
Chris@16 1096 --m_position;
Chris@16 1097 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
Chris@16 1098 return parse_literal();
Chris@16 1099 }
Chris@16 1100 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
Chris@16 1101 ++m_position;
Chris@16 1102 if(this->m_position == this->m_end)
Chris@16 1103 {
Chris@16 1104 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
Chris@16 1105 {
Chris@16 1106 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
Chris@16 1107 return false;
Chris@16 1108 }
Chris@16 1109 // Treat the opening '{' as a literal character, rewind to start of error:
Chris@16 1110 --m_position;
Chris@16 1111 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
Chris@16 1112 return parse_literal();
Chris@16 1113 }
Chris@16 1114 min = v;
Chris@16 1115 // see if we have a comma:
Chris@16 1116 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
Chris@16 1117 {
Chris@16 1118 // move on and error check:
Chris@16 1119 ++m_position;
Chris@16 1120 // skip whitespace:
Chris@16 1121 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
Chris@16 1122 ++m_position;
Chris@16 1123 if(this->m_position == this->m_end)
Chris@16 1124 {
Chris@16 1125 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
Chris@16 1126 {
Chris@16 1127 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
Chris@16 1128 return false;
Chris@16 1129 }
Chris@16 1130 // Treat the opening '{' as a literal character, rewind to start of error:
Chris@16 1131 --m_position;
Chris@16 1132 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
Chris@16 1133 return parse_literal();
Chris@16 1134 }
Chris@16 1135 // get the value if any:
Chris@16 1136 v = this->m_traits.toi(m_position, m_end, 10);
Chris@16 1137 max = (v >= 0) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
Chris@16 1138 }
Chris@16 1139 else
Chris@16 1140 {
Chris@16 1141 // no comma, max = min:
Chris@16 1142 max = min;
Chris@16 1143 }
Chris@16 1144 // skip whitespace:
Chris@16 1145 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
Chris@16 1146 ++m_position;
Chris@16 1147 // OK now check trailing }:
Chris@16 1148 if(this->m_position == this->m_end)
Chris@16 1149 {
Chris@16 1150 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
Chris@16 1151 {
Chris@16 1152 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
Chris@16 1153 return false;
Chris@16 1154 }
Chris@16 1155 // Treat the opening '{' as a literal character, rewind to start of error:
Chris@16 1156 --m_position;
Chris@16 1157 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
Chris@16 1158 return parse_literal();
Chris@16 1159 }
Chris@16 1160 if(isbasic)
Chris@16 1161 {
Chris@16 1162 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
Chris@16 1163 {
Chris@16 1164 ++m_position;
Chris@16 1165 if(this->m_position == this->m_end)
Chris@16 1166 {
Chris@16 1167 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
Chris@16 1168 return false;
Chris@16 1169 }
Chris@16 1170 }
Chris@16 1171 else
Chris@16 1172 {
Chris@16 1173 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
Chris@16 1174 return false;
Chris@16 1175 }
Chris@16 1176 }
Chris@16 1177 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
Chris@16 1178 ++m_position;
Chris@16 1179 else
Chris@16 1180 {
Chris@16 1181 // Treat the opening '{' as a literal character, rewind to start of error:
Chris@16 1182 --m_position;
Chris@16 1183 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
Chris@16 1184 return parse_literal();
Chris@16 1185 }
Chris@16 1186 //
Chris@16 1187 // finally go and add the repeat, unless error:
Chris@16 1188 //
Chris@16 1189 if(min > max)
Chris@16 1190 {
Chris@16 1191 // Backtrack to error location:
Chris@16 1192 m_position -= 2;
Chris@16 1193 while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
Chris@16 1194 ++m_position;
Chris@16 1195 fail(regex_constants::error_badbrace, m_position - m_base);
Chris@16 1196 return false;
Chris@16 1197 }
Chris@16 1198 return parse_repeat(min, max);
Chris@16 1199 }
Chris@16 1200
Chris@16 1201 template <class charT, class traits>
Chris@16 1202 bool basic_regex_parser<charT, traits>::parse_alt()
Chris@16 1203 {
Chris@16 1204 //
Chris@16 1205 // error check: if there have been no previous states,
Chris@16 1206 // or if the last state was a '(' then error:
Chris@16 1207 //
Chris@16 1208 if(
Chris@16 1209 ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
Chris@16 1210 &&
Chris@16 1211 !(
Chris@16 1212 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
Chris@16 1213 &&
Chris@16 1214 ((this->flags() & regbase::no_empty_expressions) == 0)
Chris@16 1215 )
Chris@16 1216 )
Chris@16 1217 {
Chris@16 1218 fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression can start with the alternation operator |.");
Chris@16 1219 return false;
Chris@16 1220 }
Chris@16 1221 //
Chris@16 1222 // Reset mark count if required:
Chris@16 1223 //
Chris@16 1224 if(m_max_mark < m_mark_count)
Chris@16 1225 m_max_mark = m_mark_count;
Chris@16 1226 if(m_mark_reset >= 0)
Chris@16 1227 m_mark_count = m_mark_reset;
Chris@16 1228
Chris@16 1229 ++m_position;
Chris@16 1230 //
Chris@16 1231 // we need to append a trailing jump:
Chris@16 1232 //
Chris@16 1233 re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump));
Chris@16 1234 std::ptrdiff_t jump_offset = this->getoffset(pj);
Chris@16 1235 //
Chris@16 1236 // now insert the alternative:
Chris@16 1237 //
Chris@16 1238 re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
Chris@16 1239 jump_offset += re_alt_size;
Chris@16 1240 this->m_pdata->m_data.align();
Chris@16 1241 palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
Chris@16 1242 //
Chris@16 1243 // update m_alt_insert_point so that the next alternate gets
Chris@16 1244 // inserted at the start of the second of the two we've just created:
Chris@16 1245 //
Chris@16 1246 this->m_alt_insert_point = this->m_pdata->m_data.size();
Chris@16 1247 //
Chris@16 1248 // the start of this alternative must have a case changes state
Chris@16 1249 // if the current block has messed around with case changes:
Chris@16 1250 //
Chris@16 1251 if(m_has_case_change)
Chris@16 1252 {
Chris@16 1253 static_cast<re_case*>(
Chris@16 1254 this->append_state(syntax_element_toggle_case, sizeof(re_case))
Chris@16 1255 )->icase = this->m_icase;
Chris@16 1256 }
Chris@16 1257 //
Chris@16 1258 // push the alternative onto our stack, a recursive
Chris@16 1259 // implementation here is easier to understand (and faster
Chris@16 1260 // as it happens), but causes all kinds of stack overflow problems
Chris@16 1261 // on programs with small stacks (COM+).
Chris@16 1262 //
Chris@16 1263 m_alt_jumps.push_back(jump_offset);
Chris@16 1264 return true;
Chris@16 1265 }
Chris@16 1266
Chris@16 1267 template <class charT, class traits>
Chris@16 1268 bool basic_regex_parser<charT, traits>::parse_set()
Chris@16 1269 {
Chris@16 1270 static const char* incomplete_message = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
Chris@16 1271 ++m_position;
Chris@16 1272 if(m_position == m_end)
Chris@16 1273 {
Chris@16 1274 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1275 return false;
Chris@16 1276 }
Chris@16 1277 basic_char_set<charT, traits> char_set;
Chris@16 1278
Chris@16 1279 const charT* base = m_position; // where the '[' was
Chris@16 1280 const charT* item_base = m_position; // where the '[' or '^' was
Chris@16 1281
Chris@16 1282 while(m_position != m_end)
Chris@16 1283 {
Chris@16 1284 switch(this->m_traits.syntax_type(*m_position))
Chris@16 1285 {
Chris@16 1286 case regex_constants::syntax_caret:
Chris@16 1287 if(m_position == base)
Chris@16 1288 {
Chris@16 1289 char_set.negate();
Chris@16 1290 ++m_position;
Chris@16 1291 item_base = m_position;
Chris@16 1292 }
Chris@16 1293 else
Chris@16 1294 parse_set_literal(char_set);
Chris@16 1295 break;
Chris@16 1296 case regex_constants::syntax_close_set:
Chris@16 1297 if(m_position == item_base)
Chris@16 1298 {
Chris@16 1299 parse_set_literal(char_set);
Chris@16 1300 break;
Chris@16 1301 }
Chris@16 1302 else
Chris@16 1303 {
Chris@16 1304 ++m_position;
Chris@16 1305 if(0 == this->append_set(char_set))
Chris@16 1306 {
Chris@16 1307 fail(regex_constants::error_ctype, m_position - m_base);
Chris@16 1308 return false;
Chris@16 1309 }
Chris@16 1310 }
Chris@16 1311 return true;
Chris@16 1312 case regex_constants::syntax_open_set:
Chris@16 1313 if(parse_inner_set(char_set))
Chris@16 1314 break;
Chris@16 1315 return true;
Chris@16 1316 case regex_constants::syntax_escape:
Chris@16 1317 {
Chris@16 1318 //
Chris@16 1319 // look ahead and see if this is a character class shortcut
Chris@16 1320 // \d \w \s etc...
Chris@16 1321 //
Chris@16 1322 ++m_position;
Chris@16 1323 if(this->m_traits.escape_syntax_type(*m_position)
Chris@16 1324 == regex_constants::escape_type_class)
Chris@16 1325 {
Chris@16 1326 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
Chris@16 1327 if(m != 0)
Chris@16 1328 {
Chris@16 1329 char_set.add_class(m);
Chris@16 1330 ++m_position;
Chris@16 1331 break;
Chris@16 1332 }
Chris@16 1333 }
Chris@16 1334 else if(this->m_traits.escape_syntax_type(*m_position)
Chris@16 1335 == regex_constants::escape_type_not_class)
Chris@16 1336 {
Chris@16 1337 // negated character class:
Chris@16 1338 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
Chris@16 1339 if(m != 0)
Chris@16 1340 {
Chris@16 1341 char_set.add_negated_class(m);
Chris@16 1342 ++m_position;
Chris@16 1343 break;
Chris@16 1344 }
Chris@16 1345 }
Chris@16 1346 // not a character class, just a regular escape:
Chris@16 1347 --m_position;
Chris@16 1348 parse_set_literal(char_set);
Chris@16 1349 break;
Chris@16 1350 }
Chris@16 1351 default:
Chris@16 1352 parse_set_literal(char_set);
Chris@16 1353 break;
Chris@16 1354 }
Chris@16 1355 }
Chris@16 1356 return m_position != m_end;
Chris@16 1357 }
Chris@16 1358
Chris@16 1359 template <class charT, class traits>
Chris@16 1360 bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
Chris@16 1361 {
Chris@16 1362 static const char* incomplete_message = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
Chris@16 1363 //
Chris@16 1364 // we have either a character class [:name:]
Chris@16 1365 // a collating element [.name.]
Chris@16 1366 // or an equivalence class [=name=]
Chris@16 1367 //
Chris@16 1368 if(m_end == ++m_position)
Chris@16 1369 {
Chris@16 1370 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1371 return false;
Chris@16 1372 }
Chris@16 1373 switch(this->m_traits.syntax_type(*m_position))
Chris@16 1374 {
Chris@16 1375 case regex_constants::syntax_dot:
Chris@16 1376 //
Chris@16 1377 // a collating element is treated as a literal:
Chris@16 1378 //
Chris@16 1379 --m_position;
Chris@16 1380 parse_set_literal(char_set);
Chris@16 1381 return true;
Chris@16 1382 case regex_constants::syntax_colon:
Chris@16 1383 {
Chris@16 1384 // check that character classes are actually enabled:
Chris@16 1385 if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
Chris@16 1386 == (regbase::basic_syntax_group | regbase::no_char_classes))
Chris@16 1387 {
Chris@16 1388 --m_position;
Chris@16 1389 parse_set_literal(char_set);
Chris@16 1390 return true;
Chris@16 1391 }
Chris@16 1392 // skip the ':'
Chris@16 1393 if(m_end == ++m_position)
Chris@16 1394 {
Chris@16 1395 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1396 return false;
Chris@16 1397 }
Chris@16 1398 const charT* name_first = m_position;
Chris@16 1399 // skip at least one character, then find the matching ':]'
Chris@16 1400 if(m_end == ++m_position)
Chris@16 1401 {
Chris@16 1402 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1403 return false;
Chris@16 1404 }
Chris@16 1405 while((m_position != m_end)
Chris@16 1406 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
Chris@16 1407 ++m_position;
Chris@16 1408 const charT* name_last = m_position;
Chris@16 1409 if(m_end == m_position)
Chris@16 1410 {
Chris@16 1411 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1412 return false;
Chris@16 1413 }
Chris@16 1414 if((m_end == ++m_position)
Chris@16 1415 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
Chris@16 1416 {
Chris@16 1417 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1418 return false;
Chris@16 1419 }
Chris@16 1420 //
Chris@16 1421 // check for negated class:
Chris@16 1422 //
Chris@16 1423 bool negated = false;
Chris@16 1424 if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
Chris@16 1425 {
Chris@16 1426 ++name_first;
Chris@16 1427 negated = true;
Chris@16 1428 }
Chris@16 1429 typedef typename traits::char_class_type m_type;
Chris@16 1430 m_type m = this->m_traits.lookup_classname(name_first, name_last);
Chris@16 1431 if(m == 0)
Chris@16 1432 {
Chris@16 1433 if(char_set.empty() && (name_last - name_first == 1))
Chris@16 1434 {
Chris@16 1435 // maybe a special case:
Chris@16 1436 ++m_position;
Chris@16 1437 if( (m_position != m_end)
Chris@16 1438 && (this->m_traits.syntax_type(*m_position)
Chris@16 1439 == regex_constants::syntax_close_set))
Chris@16 1440 {
Chris@16 1441 if(this->m_traits.escape_syntax_type(*name_first)
Chris@16 1442 == regex_constants::escape_type_left_word)
Chris@16 1443 {
Chris@16 1444 ++m_position;
Chris@16 1445 this->append_state(syntax_element_word_start);
Chris@16 1446 return false;
Chris@16 1447 }
Chris@16 1448 if(this->m_traits.escape_syntax_type(*name_first)
Chris@16 1449 == regex_constants::escape_type_right_word)
Chris@16 1450 {
Chris@16 1451 ++m_position;
Chris@16 1452 this->append_state(syntax_element_word_end);
Chris@16 1453 return false;
Chris@16 1454 }
Chris@16 1455 }
Chris@16 1456 }
Chris@16 1457 fail(regex_constants::error_ctype, name_first - m_base);
Chris@16 1458 return false;
Chris@16 1459 }
Chris@16 1460 if(negated == false)
Chris@16 1461 char_set.add_class(m);
Chris@16 1462 else
Chris@16 1463 char_set.add_negated_class(m);
Chris@16 1464 ++m_position;
Chris@16 1465 break;
Chris@16 1466 }
Chris@16 1467 case regex_constants::syntax_equal:
Chris@16 1468 {
Chris@16 1469 // skip the '='
Chris@16 1470 if(m_end == ++m_position)
Chris@16 1471 {
Chris@16 1472 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1473 return false;
Chris@16 1474 }
Chris@16 1475 const charT* name_first = m_position;
Chris@16 1476 // skip at least one character, then find the matching '=]'
Chris@16 1477 if(m_end == ++m_position)
Chris@16 1478 {
Chris@16 1479 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1480 return false;
Chris@16 1481 }
Chris@16 1482 while((m_position != m_end)
Chris@16 1483 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
Chris@16 1484 ++m_position;
Chris@16 1485 const charT* name_last = m_position;
Chris@16 1486 if(m_end == m_position)
Chris@16 1487 {
Chris@16 1488 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1489 return false;
Chris@16 1490 }
Chris@16 1491 if((m_end == ++m_position)
Chris@16 1492 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
Chris@16 1493 {
Chris@16 1494 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1495 return false;
Chris@16 1496 }
Chris@16 1497 string_type m = this->m_traits.lookup_collatename(name_first, name_last);
Chris@16 1498 if((0 == m.size()) || (m.size() > 2))
Chris@16 1499 {
Chris@16 1500 fail(regex_constants::error_collate, name_first - m_base);
Chris@16 1501 return false;
Chris@16 1502 }
Chris@16 1503 digraph<charT> d;
Chris@16 1504 d.first = m[0];
Chris@16 1505 if(m.size() > 1)
Chris@16 1506 d.second = m[1];
Chris@16 1507 else
Chris@16 1508 d.second = 0;
Chris@16 1509 char_set.add_equivalent(d);
Chris@16 1510 ++m_position;
Chris@16 1511 break;
Chris@16 1512 }
Chris@16 1513 default:
Chris@16 1514 --m_position;
Chris@16 1515 parse_set_literal(char_set);
Chris@16 1516 break;
Chris@16 1517 }
Chris@16 1518 return true;
Chris@16 1519 }
Chris@16 1520
Chris@16 1521 template <class charT, class traits>
Chris@16 1522 void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
Chris@16 1523 {
Chris@16 1524 digraph<charT> start_range(get_next_set_literal(char_set));
Chris@16 1525 if(m_end == m_position)
Chris@16 1526 {
Chris@16 1527 fail(regex_constants::error_brack, m_position - m_base);
Chris@16 1528 return;
Chris@16 1529 }
Chris@16 1530 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
Chris@16 1531 {
Chris@16 1532 // we have a range:
Chris@16 1533 if(m_end == ++m_position)
Chris@16 1534 {
Chris@16 1535 fail(regex_constants::error_brack, m_position - m_base);
Chris@16 1536 return;
Chris@16 1537 }
Chris@16 1538 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
Chris@16 1539 {
Chris@16 1540 digraph<charT> end_range = get_next_set_literal(char_set);
Chris@16 1541 char_set.add_range(start_range, end_range);
Chris@16 1542 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
Chris@16 1543 {
Chris@16 1544 if(m_end == ++m_position)
Chris@16 1545 {
Chris@16 1546 fail(regex_constants::error_brack, m_position - m_base);
Chris@16 1547 return;
Chris@16 1548 }
Chris@16 1549 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
Chris@16 1550 {
Chris@16 1551 // trailing - :
Chris@16 1552 --m_position;
Chris@16 1553 return;
Chris@16 1554 }
Chris@16 1555 fail(regex_constants::error_range, m_position - m_base);
Chris@16 1556 return;
Chris@16 1557 }
Chris@16 1558 return;
Chris@16 1559 }
Chris@16 1560 --m_position;
Chris@16 1561 }
Chris@16 1562 char_set.add_single(start_range);
Chris@16 1563 }
Chris@16 1564
Chris@16 1565 template <class charT, class traits>
Chris@16 1566 digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
Chris@16 1567 {
Chris@16 1568 digraph<charT> result;
Chris@16 1569 switch(this->m_traits.syntax_type(*m_position))
Chris@16 1570 {
Chris@16 1571 case regex_constants::syntax_dash:
Chris@16 1572 if(!char_set.empty())
Chris@16 1573 {
Chris@16 1574 // see if we are at the end of the set:
Chris@16 1575 if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
Chris@16 1576 {
Chris@16 1577 fail(regex_constants::error_range, m_position - m_base);
Chris@16 1578 return result;
Chris@16 1579 }
Chris@16 1580 --m_position;
Chris@16 1581 }
Chris@16 1582 result.first = *m_position++;
Chris@16 1583 return result;
Chris@16 1584 case regex_constants::syntax_escape:
Chris@16 1585 // check to see if escapes are supported first:
Chris@16 1586 if(this->flags() & regex_constants::no_escape_in_lists)
Chris@16 1587 {
Chris@16 1588 result = *m_position++;
Chris@16 1589 break;
Chris@16 1590 }
Chris@16 1591 ++m_position;
Chris@16 1592 result = unescape_character();
Chris@16 1593 break;
Chris@16 1594 case regex_constants::syntax_open_set:
Chris@16 1595 {
Chris@16 1596 if(m_end == ++m_position)
Chris@16 1597 {
Chris@16 1598 fail(regex_constants::error_collate, m_position - m_base);
Chris@16 1599 return result;
Chris@16 1600 }
Chris@16 1601 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
Chris@16 1602 {
Chris@16 1603 --m_position;
Chris@16 1604 result.first = *m_position;
Chris@16 1605 ++m_position;
Chris@16 1606 return result;
Chris@16 1607 }
Chris@16 1608 if(m_end == ++m_position)
Chris@16 1609 {
Chris@16 1610 fail(regex_constants::error_collate, m_position - m_base);
Chris@16 1611 return result;
Chris@16 1612 }
Chris@16 1613 const charT* name_first = m_position;
Chris@16 1614 // skip at least one character, then find the matching ':]'
Chris@16 1615 if(m_end == ++m_position)
Chris@16 1616 {
Chris@16 1617 fail(regex_constants::error_collate, name_first - m_base);
Chris@16 1618 return result;
Chris@16 1619 }
Chris@16 1620 while((m_position != m_end)
Chris@16 1621 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
Chris@16 1622 ++m_position;
Chris@16 1623 const charT* name_last = m_position;
Chris@16 1624 if(m_end == m_position)
Chris@16 1625 {
Chris@16 1626 fail(regex_constants::error_collate, name_first - m_base);
Chris@16 1627 return result;
Chris@16 1628 }
Chris@16 1629 if((m_end == ++m_position)
Chris@16 1630 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
Chris@16 1631 {
Chris@16 1632 fail(regex_constants::error_collate, name_first - m_base);
Chris@16 1633 return result;
Chris@16 1634 }
Chris@16 1635 ++m_position;
Chris@16 1636 string_type s = this->m_traits.lookup_collatename(name_first, name_last);
Chris@16 1637 if(s.empty() || (s.size() > 2))
Chris@16 1638 {
Chris@16 1639 fail(regex_constants::error_collate, name_first - m_base);
Chris@16 1640 return result;
Chris@16 1641 }
Chris@16 1642 result.first = s[0];
Chris@16 1643 if(s.size() > 1)
Chris@16 1644 result.second = s[1];
Chris@16 1645 else
Chris@16 1646 result.second = 0;
Chris@16 1647 return result;
Chris@16 1648 }
Chris@16 1649 default:
Chris@16 1650 result = *m_position++;
Chris@16 1651 }
Chris@16 1652 return result;
Chris@16 1653 }
Chris@16 1654
Chris@16 1655 //
Chris@16 1656 // does a value fit in the specified charT type?
Chris@16 1657 //
Chris@16 1658 template <class charT>
Chris@16 1659 bool valid_value(charT, int v, const mpl::true_&)
Chris@16 1660 {
Chris@16 1661 return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
Chris@16 1662 }
Chris@16 1663 template <class charT>
Chris@16 1664 bool valid_value(charT, int, const mpl::false_&)
Chris@16 1665 {
Chris@16 1666 return true; // v will alsways fit in a charT
Chris@16 1667 }
Chris@16 1668 template <class charT>
Chris@16 1669 bool valid_value(charT c, int v)
Chris@16 1670 {
Chris@16 1671 return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>());
Chris@16 1672 }
Chris@16 1673
Chris@16 1674 template <class charT, class traits>
Chris@16 1675 charT basic_regex_parser<charT, traits>::unescape_character()
Chris@16 1676 {
Chris@16 1677 #ifdef BOOST_MSVC
Chris@16 1678 #pragma warning(push)
Chris@16 1679 #pragma warning(disable:4127)
Chris@16 1680 #endif
Chris@16 1681 charT result(0);
Chris@16 1682 if(m_position == m_end)
Chris@16 1683 {
Chris@16 1684 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
Chris@16 1685 return false;
Chris@16 1686 }
Chris@16 1687 switch(this->m_traits.escape_syntax_type(*m_position))
Chris@16 1688 {
Chris@16 1689 case regex_constants::escape_type_control_a:
Chris@16 1690 result = charT('\a');
Chris@16 1691 break;
Chris@16 1692 case regex_constants::escape_type_e:
Chris@16 1693 result = charT(27);
Chris@16 1694 break;
Chris@16 1695 case regex_constants::escape_type_control_f:
Chris@16 1696 result = charT('\f');
Chris@16 1697 break;
Chris@16 1698 case regex_constants::escape_type_control_n:
Chris@16 1699 result = charT('\n');
Chris@16 1700 break;
Chris@16 1701 case regex_constants::escape_type_control_r:
Chris@16 1702 result = charT('\r');
Chris@16 1703 break;
Chris@16 1704 case regex_constants::escape_type_control_t:
Chris@16 1705 result = charT('\t');
Chris@16 1706 break;
Chris@16 1707 case regex_constants::escape_type_control_v:
Chris@16 1708 result = charT('\v');
Chris@16 1709 break;
Chris@16 1710 case regex_constants::escape_type_word_assert:
Chris@16 1711 result = charT('\b');
Chris@16 1712 break;
Chris@16 1713 case regex_constants::escape_type_ascii_control:
Chris@16 1714 ++m_position;
Chris@16 1715 if(m_position == m_end)
Chris@16 1716 {
Chris@16 1717 // Rewind to start of escape:
Chris@16 1718 --m_position;
Chris@16 1719 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1720 fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
Chris@16 1721 return result;
Chris@16 1722 }
Chris@16 1723 result = static_cast<charT>(*m_position % 32);
Chris@16 1724 break;
Chris@16 1725 case regex_constants::escape_type_hex:
Chris@16 1726 ++m_position;
Chris@16 1727 if(m_position == m_end)
Chris@16 1728 {
Chris@16 1729 // Rewind to start of escape:
Chris@16 1730 --m_position;
Chris@16 1731 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1732 fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
Chris@16 1733 return result;
Chris@16 1734 }
Chris@16 1735 // maybe have \x{ddd}
Chris@16 1736 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
Chris@16 1737 {
Chris@16 1738 ++m_position;
Chris@16 1739 if(m_position == m_end)
Chris@16 1740 {
Chris@16 1741 // Rewind to start of escape:
Chris@16 1742 --m_position;
Chris@16 1743 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1744 fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
Chris@16 1745 return result;
Chris@16 1746 }
Chris@16 1747 int i = this->m_traits.toi(m_position, m_end, 16);
Chris@16 1748 if((m_position == m_end)
Chris@16 1749 || (i < 0)
Chris@16 1750 || ((std::numeric_limits<charT>::is_specialized) && (i > (int)(std::numeric_limits<charT>::max)()))
Chris@16 1751 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
Chris@16 1752 {
Chris@16 1753 // Rewind to start of escape:
Chris@16 1754 --m_position;
Chris@16 1755 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1756 fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
Chris@16 1757 return result;
Chris@16 1758 }
Chris@16 1759 ++m_position;
Chris@16 1760 result = charT(i);
Chris@16 1761 }
Chris@16 1762 else
Chris@16 1763 {
Chris@16 1764 std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
Chris@16 1765 int i = this->m_traits.toi(m_position, m_position + len, 16);
Chris@16 1766 if((i < 0)
Chris@16 1767 || !valid_value(charT(0), i))
Chris@16 1768 {
Chris@16 1769 // Rewind to start of escape:
Chris@16 1770 --m_position;
Chris@16 1771 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1772 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
Chris@16 1773 return result;
Chris@16 1774 }
Chris@16 1775 result = charT(i);
Chris@16 1776 }
Chris@16 1777 return result;
Chris@16 1778 case regex_constants::syntax_digit:
Chris@16 1779 {
Chris@16 1780 // an octal escape sequence, the first character must be a zero
Chris@16 1781 // followed by up to 3 octal digits:
Chris@16 1782 std::ptrdiff_t len = (std::min)(::boost::re_detail::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
Chris@16 1783 const charT* bp = m_position;
Chris@16 1784 int val = this->m_traits.toi(bp, bp + 1, 8);
Chris@16 1785 if(val != 0)
Chris@16 1786 {
Chris@16 1787 // Rewind to start of escape:
Chris@16 1788 --m_position;
Chris@16 1789 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1790 // Oops not an octal escape after all:
Chris@16 1791 fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
Chris@16 1792 return result;
Chris@16 1793 }
Chris@16 1794 val = this->m_traits.toi(m_position, m_position + len, 8);
Chris@16 1795 if(val < 0)
Chris@16 1796 {
Chris@16 1797 // Rewind to start of escape:
Chris@16 1798 --m_position;
Chris@16 1799 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1800 fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
Chris@16 1801 return result;
Chris@16 1802 }
Chris@16 1803 return static_cast<charT>(val);
Chris@16 1804 }
Chris@16 1805 case regex_constants::escape_type_named_char:
Chris@16 1806 {
Chris@16 1807 ++m_position;
Chris@16 1808 if(m_position == m_end)
Chris@16 1809 {
Chris@16 1810 // Rewind to start of escape:
Chris@16 1811 --m_position;
Chris@16 1812 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1813 fail(regex_constants::error_escape, m_position - m_base);
Chris@16 1814 return false;
Chris@16 1815 }
Chris@16 1816 // maybe have \N{name}
Chris@16 1817 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
Chris@16 1818 {
Chris@16 1819 const charT* base = m_position;
Chris@16 1820 // skip forward until we find enclosing brace:
Chris@16 1821 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
Chris@16 1822 ++m_position;
Chris@16 1823 if(m_position == m_end)
Chris@16 1824 {
Chris@16 1825 // Rewind to start of escape:
Chris@16 1826 --m_position;
Chris@16 1827 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1828 fail(regex_constants::error_escape, m_position - m_base);
Chris@16 1829 return false;
Chris@16 1830 }
Chris@16 1831 string_type s = this->m_traits.lookup_collatename(++base, m_position++);
Chris@16 1832 if(s.empty())
Chris@16 1833 {
Chris@16 1834 // Rewind to start of escape:
Chris@16 1835 --m_position;
Chris@16 1836 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1837 fail(regex_constants::error_collate, m_position - m_base);
Chris@16 1838 return false;
Chris@16 1839 }
Chris@16 1840 if(s.size() == 1)
Chris@16 1841 {
Chris@16 1842 return s[0];
Chris@16 1843 }
Chris@16 1844 }
Chris@16 1845 // fall through is a failure:
Chris@16 1846 // Rewind to start of escape:
Chris@16 1847 --m_position;
Chris@16 1848 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1849 fail(regex_constants::error_escape, m_position - m_base);
Chris@16 1850 return false;
Chris@16 1851 }
Chris@16 1852 default:
Chris@16 1853 result = *m_position;
Chris@16 1854 break;
Chris@16 1855 }
Chris@16 1856 ++m_position;
Chris@16 1857 return result;
Chris@16 1858 #ifdef BOOST_MSVC
Chris@16 1859 #pragma warning(pop)
Chris@16 1860 #endif
Chris@16 1861 }
Chris@16 1862
Chris@16 1863 template <class charT, class traits>
Chris@16 1864 bool basic_regex_parser<charT, traits>::parse_backref()
Chris@16 1865 {
Chris@16 1866 BOOST_ASSERT(m_position != m_end);
Chris@16 1867 const charT* pc = m_position;
Chris@16 1868 int i = this->m_traits.toi(pc, pc + 1, 10);
Chris@16 1869 if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
Chris@16 1870 {
Chris@16 1871 // not a backref at all but an octal escape sequence:
Chris@16 1872 charT c = unescape_character();
Chris@16 1873 this->append_literal(c);
Chris@16 1874 }
Chris@16 1875 else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
Chris@16 1876 {
Chris@16 1877 m_position = pc;
Chris@16 1878 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
Chris@16 1879 pb->index = i;
Chris@16 1880 pb->icase = this->flags() & regbase::icase;
Chris@16 1881 }
Chris@16 1882 else
Chris@16 1883 {
Chris@16 1884 // Rewind to start of escape:
Chris@16 1885 --m_position;
Chris@16 1886 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1887 fail(regex_constants::error_backref, m_position - m_base);
Chris@16 1888 return false;
Chris@16 1889 }
Chris@16 1890 return true;
Chris@16 1891 }
Chris@16 1892
Chris@16 1893 template <class charT, class traits>
Chris@16 1894 bool basic_regex_parser<charT, traits>::parse_QE()
Chris@16 1895 {
Chris@16 1896 #ifdef BOOST_MSVC
Chris@16 1897 #pragma warning(push)
Chris@16 1898 #pragma warning(disable:4127)
Chris@16 1899 #endif
Chris@16 1900 //
Chris@16 1901 // parse a \Q...\E sequence:
Chris@16 1902 //
Chris@16 1903 ++m_position; // skip the Q
Chris@16 1904 const charT* start = m_position;
Chris@16 1905 const charT* end;
Chris@16 1906 do
Chris@16 1907 {
Chris@16 1908 while((m_position != m_end)
Chris@16 1909 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
Chris@16 1910 ++m_position;
Chris@16 1911 if(m_position == m_end)
Chris@16 1912 {
Chris@16 1913 // a \Q...\E sequence may terminate with the end of the expression:
Chris@16 1914 end = m_position;
Chris@16 1915 break;
Chris@16 1916 }
Chris@16 1917 if(++m_position == m_end) // skip the escape
Chris@16 1918 {
Chris@16 1919 fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
Chris@16 1920 return false;
Chris@16 1921 }
Chris@16 1922 // check to see if it's a \E:
Chris@16 1923 if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
Chris@16 1924 {
Chris@16 1925 ++m_position;
Chris@16 1926 end = m_position - 2;
Chris@16 1927 break;
Chris@16 1928 }
Chris@16 1929 // otherwise go round again:
Chris@16 1930 }while(true);
Chris@16 1931 //
Chris@16 1932 // now add all the character between the two escapes as literals:
Chris@16 1933 //
Chris@16 1934 while(start != end)
Chris@16 1935 {
Chris@16 1936 this->append_literal(*start);
Chris@16 1937 ++start;
Chris@16 1938 }
Chris@16 1939 return true;
Chris@16 1940 #ifdef BOOST_MSVC
Chris@16 1941 #pragma warning(pop)
Chris@16 1942 #endif
Chris@16 1943 }
Chris@16 1944
Chris@16 1945 template <class charT, class traits>
Chris@16 1946 bool basic_regex_parser<charT, traits>::parse_perl_extension()
Chris@16 1947 {
Chris@16 1948 if(++m_position == m_end)
Chris@16 1949 {
Chris@16 1950 // Rewind to start of (? sequence:
Chris@16 1951 --m_position;
Chris@16 1952 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 1953 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 1954 return false;
Chris@16 1955 }
Chris@16 1956 //
Chris@16 1957 // treat comments as a special case, as these
Chris@16 1958 // are the only ones that don't start with a leading
Chris@16 1959 // startmark state:
Chris@16 1960 //
Chris@16 1961 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
Chris@16 1962 {
Chris@16 1963 while((m_position != m_end)
Chris@16 1964 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
Chris@16 1965 {}
Chris@16 1966 return true;
Chris@16 1967 }
Chris@16 1968 //
Chris@16 1969 // backup some state, and prepare the way:
Chris@16 1970 //
Chris@16 1971 int markid = 0;
Chris@16 1972 std::ptrdiff_t jump_offset = 0;
Chris@16 1973 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
Chris@16 1974 pb->icase = this->flags() & regbase::icase;
Chris@16 1975 std::ptrdiff_t last_paren_start = this->getoffset(pb);
Chris@16 1976 // back up insertion point for alternations, and set new point:
Chris@16 1977 std::ptrdiff_t last_alt_point = m_alt_insert_point;
Chris@16 1978 this->m_pdata->m_data.align();
Chris@16 1979 m_alt_insert_point = this->m_pdata->m_data.size();
Chris@16 1980 std::ptrdiff_t expected_alt_point = m_alt_insert_point;
Chris@16 1981 bool restore_flags = true;
Chris@16 1982 regex_constants::syntax_option_type old_flags = this->flags();
Chris@16 1983 bool old_case_change = m_has_case_change;
Chris@16 1984 m_has_case_change = false;
Chris@16 1985 charT name_delim;
Chris@16 1986 int mark_reset = m_mark_reset;
Chris@16 1987 int max_mark = m_max_mark;
Chris@16 1988 m_mark_reset = -1;
Chris@16 1989 m_max_mark = m_mark_count;
Chris@16 1990 int v;
Chris@16 1991 //
Chris@16 1992 // select the actual extension used:
Chris@16 1993 //
Chris@16 1994 switch(this->m_traits.syntax_type(*m_position))
Chris@16 1995 {
Chris@16 1996 case regex_constants::syntax_or:
Chris@16 1997 m_mark_reset = m_mark_count;
Chris@16 1998 BOOST_FALLTHROUGH;
Chris@16 1999 case regex_constants::syntax_colon:
Chris@16 2000 //
Chris@16 2001 // a non-capturing mark:
Chris@16 2002 //
Chris@16 2003 pb->index = markid = 0;
Chris@16 2004 ++m_position;
Chris@16 2005 break;
Chris@16 2006 case regex_constants::syntax_digit:
Chris@16 2007 {
Chris@16 2008 //
Chris@16 2009 // a recursive subexpression:
Chris@16 2010 //
Chris@16 2011 v = this->m_traits.toi(m_position, m_end, 10);
Chris@16 2012 if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
Chris@16 2013 {
Chris@16 2014 // Rewind to start of (? sequence:
Chris@16 2015 --m_position;
Chris@16 2016 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2017 fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
Chris@16 2018 return false;
Chris@16 2019 }
Chris@16 2020 insert_recursion:
Chris@16 2021 pb->index = markid = 0;
Chris@16 2022 re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
Chris@16 2023 pr->alt.i = v;
Chris@16 2024 pr->state_id = 0;
Chris@16 2025 static_cast<re_case*>(
Chris@16 2026 this->append_state(syntax_element_toggle_case, sizeof(re_case))
Chris@16 2027 )->icase = this->flags() & regbase::icase;
Chris@16 2028 break;
Chris@16 2029 }
Chris@16 2030 case regex_constants::syntax_plus:
Chris@16 2031 //
Chris@16 2032 // A forward-relative recursive subexpression:
Chris@16 2033 //
Chris@16 2034 ++m_position;
Chris@16 2035 v = this->m_traits.toi(m_position, m_end, 10);
Chris@16 2036 if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
Chris@16 2037 {
Chris@16 2038 // Rewind to start of (? sequence:
Chris@16 2039 --m_position;
Chris@16 2040 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2041 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
Chris@16 2042 return false;
Chris@16 2043 }
Chris@16 2044 v += m_mark_count;
Chris@16 2045 goto insert_recursion;
Chris@16 2046 case regex_constants::syntax_dash:
Chris@16 2047 //
Chris@16 2048 // Possibly a backward-relative recursive subexpression:
Chris@16 2049 //
Chris@16 2050 ++m_position;
Chris@16 2051 v = this->m_traits.toi(m_position, m_end, 10);
Chris@16 2052 if(v <= 0)
Chris@16 2053 {
Chris@16 2054 --m_position;
Chris@16 2055 // Oops not a relative recursion at all, but a (?-imsx) group:
Chris@16 2056 goto option_group_jump;
Chris@16 2057 }
Chris@16 2058 v = m_mark_count + 1 - v;
Chris@16 2059 if(v <= 0)
Chris@16 2060 {
Chris@16 2061 // Rewind to start of (? sequence:
Chris@16 2062 --m_position;
Chris@16 2063 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2064 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
Chris@16 2065 return false;
Chris@16 2066 }
Chris@16 2067 goto insert_recursion;
Chris@16 2068 case regex_constants::syntax_equal:
Chris@16 2069 pb->index = markid = -1;
Chris@16 2070 ++m_position;
Chris@16 2071 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
Chris@16 2072 this->m_pdata->m_data.align();
Chris@16 2073 m_alt_insert_point = this->m_pdata->m_data.size();
Chris@16 2074 break;
Chris@16 2075 case regex_constants::syntax_not:
Chris@16 2076 pb->index = markid = -2;
Chris@16 2077 ++m_position;
Chris@16 2078 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
Chris@16 2079 this->m_pdata->m_data.align();
Chris@16 2080 m_alt_insert_point = this->m_pdata->m_data.size();
Chris@16 2081 break;
Chris@16 2082 case regex_constants::escape_type_left_word:
Chris@16 2083 {
Chris@16 2084 // a lookbehind assertion:
Chris@16 2085 if(++m_position == m_end)
Chris@16 2086 {
Chris@16 2087 // Rewind to start of (? sequence:
Chris@16 2088 --m_position;
Chris@16 2089 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2090 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2091 return false;
Chris@16 2092 }
Chris@16 2093 regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
Chris@16 2094 if(t == regex_constants::syntax_not)
Chris@16 2095 pb->index = markid = -2;
Chris@16 2096 else if(t == regex_constants::syntax_equal)
Chris@16 2097 pb->index = markid = -1;
Chris@16 2098 else
Chris@16 2099 {
Chris@16 2100 // Probably a named capture which also starts (?< :
Chris@16 2101 name_delim = '>';
Chris@16 2102 --m_position;
Chris@16 2103 goto named_capture_jump;
Chris@16 2104 }
Chris@16 2105 ++m_position;
Chris@16 2106 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
Chris@16 2107 this->append_state(syntax_element_backstep, sizeof(re_brace));
Chris@16 2108 this->m_pdata->m_data.align();
Chris@16 2109 m_alt_insert_point = this->m_pdata->m_data.size();
Chris@16 2110 break;
Chris@16 2111 }
Chris@16 2112 case regex_constants::escape_type_right_word:
Chris@16 2113 //
Chris@16 2114 // an independent sub-expression:
Chris@16 2115 //
Chris@16 2116 pb->index = markid = -3;
Chris@16 2117 ++m_position;
Chris@16 2118 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
Chris@16 2119 this->m_pdata->m_data.align();
Chris@16 2120 m_alt_insert_point = this->m_pdata->m_data.size();
Chris@16 2121 break;
Chris@16 2122 case regex_constants::syntax_open_mark:
Chris@16 2123 {
Chris@16 2124 // a conditional expression:
Chris@16 2125 pb->index = markid = -4;
Chris@16 2126 if(++m_position == m_end)
Chris@16 2127 {
Chris@16 2128 // Rewind to start of (? sequence:
Chris@16 2129 --m_position;
Chris@16 2130 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2131 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2132 return false;
Chris@16 2133 }
Chris@16 2134 v = this->m_traits.toi(m_position, m_end, 10);
Chris@16 2135 if(m_position == m_end)
Chris@16 2136 {
Chris@16 2137 // Rewind to start of (? sequence:
Chris@16 2138 --m_position;
Chris@16 2139 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2140 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2141 return false;
Chris@16 2142 }
Chris@16 2143 if(*m_position == charT('R'))
Chris@16 2144 {
Chris@16 2145 if(++m_position == m_end)
Chris@16 2146 {
Chris@16 2147 // Rewind to start of (? sequence:
Chris@16 2148 --m_position;
Chris@16 2149 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2150 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2151 return false;
Chris@16 2152 }
Chris@16 2153 if(*m_position == charT('&'))
Chris@16 2154 {
Chris@16 2155 const charT* base = ++m_position;
Chris@16 2156 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
Chris@16 2157 ++m_position;
Chris@16 2158 if(m_position == m_end)
Chris@16 2159 {
Chris@16 2160 // Rewind to start of (? sequence:
Chris@16 2161 --m_position;
Chris@16 2162 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2163 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2164 return false;
Chris@16 2165 }
Chris@16 2166 v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
Chris@16 2167 }
Chris@16 2168 else
Chris@16 2169 {
Chris@16 2170 v = -this->m_traits.toi(m_position, m_end, 10);
Chris@16 2171 }
Chris@16 2172 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
Chris@16 2173 br->index = v < 0 ? (v - 1) : 0;
Chris@16 2174 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
Chris@16 2175 {
Chris@16 2176 // Rewind to start of (? sequence:
Chris@16 2177 --m_position;
Chris@16 2178 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2179 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2180 return false;
Chris@16 2181 }
Chris@16 2182 if(++m_position == m_end)
Chris@16 2183 {
Chris@16 2184 // Rewind to start of (? sequence:
Chris@16 2185 --m_position;
Chris@16 2186 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2187 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2188 return false;
Chris@16 2189 }
Chris@16 2190 }
Chris@16 2191 else if((*m_position == charT('\'')) || (*m_position == charT('<')))
Chris@16 2192 {
Chris@16 2193 const charT* base = ++m_position;
Chris@16 2194 while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
Chris@16 2195 ++m_position;
Chris@16 2196 if(m_position == m_end)
Chris@16 2197 {
Chris@16 2198 // Rewind to start of (? sequence:
Chris@16 2199 --m_position;
Chris@16 2200 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2201 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2202 return false;
Chris@16 2203 }
Chris@16 2204 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
Chris@16 2205 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
Chris@16 2206 br->index = v;
Chris@16 2207 if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
Chris@16 2208 {
Chris@16 2209 // Rewind to start of (? sequence:
Chris@16 2210 --m_position;
Chris@16 2211 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2212 fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
Chris@16 2213 return false;
Chris@16 2214 }
Chris@16 2215 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
Chris@16 2216 {
Chris@16 2217 // Rewind to start of (? sequence:
Chris@16 2218 --m_position;
Chris@16 2219 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2220 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2221 return false;
Chris@16 2222 }
Chris@16 2223 if(++m_position == m_end)
Chris@16 2224 {
Chris@16 2225 // Rewind to start of (? sequence:
Chris@16 2226 --m_position;
Chris@16 2227 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2228 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2229 return false;
Chris@16 2230 }
Chris@16 2231 }
Chris@16 2232 else if(*m_position == charT('D'))
Chris@16 2233 {
Chris@16 2234 const char* def = "DEFINE";
Chris@16 2235 while(*def && (m_position != m_end) && (*m_position == charT(*def)))
Chris@16 2236 ++m_position, ++def;
Chris@16 2237 if((m_position == m_end) || *def)
Chris@16 2238 {
Chris@16 2239 // Rewind to start of (? sequence:
Chris@16 2240 --m_position;
Chris@16 2241 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2242 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2243 return false;
Chris@16 2244 }
Chris@16 2245 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
Chris@16 2246 br->index = 9999; // special magic value!
Chris@16 2247 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
Chris@16 2248 {
Chris@16 2249 // Rewind to start of (? sequence:
Chris@16 2250 --m_position;
Chris@16 2251 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2252 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2253 return false;
Chris@16 2254 }
Chris@16 2255 if(++m_position == m_end)
Chris@16 2256 {
Chris@16 2257 // Rewind to start of (? sequence:
Chris@16 2258 --m_position;
Chris@16 2259 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2260 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2261 return false;
Chris@16 2262 }
Chris@16 2263 }
Chris@16 2264 else if(v > 0)
Chris@16 2265 {
Chris@16 2266 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
Chris@16 2267 br->index = v;
Chris@16 2268 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
Chris@16 2269 {
Chris@16 2270 // Rewind to start of (? sequence:
Chris@16 2271 --m_position;
Chris@16 2272 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2273 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2274 return false;
Chris@16 2275 }
Chris@16 2276 if(++m_position == m_end)
Chris@16 2277 {
Chris@16 2278 // Rewind to start of (? sequence:
Chris@16 2279 --m_position;
Chris@16 2280 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2281 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2282 return false;
Chris@16 2283 }
Chris@16 2284 }
Chris@16 2285 else
Chris@16 2286 {
Chris@16 2287 // verify that we have a lookahead or lookbehind assert:
Chris@16 2288 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
Chris@16 2289 {
Chris@16 2290 // Rewind to start of (? sequence:
Chris@16 2291 --m_position;
Chris@16 2292 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2293 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2294 return false;
Chris@16 2295 }
Chris@16 2296 if(++m_position == m_end)
Chris@16 2297 {
Chris@16 2298 // Rewind to start of (? sequence:
Chris@16 2299 --m_position;
Chris@16 2300 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2301 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2302 return false;
Chris@16 2303 }
Chris@16 2304 if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
Chris@16 2305 {
Chris@16 2306 if(++m_position == m_end)
Chris@16 2307 {
Chris@16 2308 // Rewind to start of (? sequence:
Chris@16 2309 --m_position;
Chris@16 2310 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2311 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2312 return false;
Chris@16 2313 }
Chris@16 2314 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
Chris@16 2315 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
Chris@16 2316 {
Chris@16 2317 // Rewind to start of (? sequence:
Chris@16 2318 --m_position;
Chris@16 2319 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2320 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2321 return false;
Chris@16 2322 }
Chris@16 2323 m_position -= 3;
Chris@16 2324 }
Chris@16 2325 else
Chris@16 2326 {
Chris@16 2327 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
Chris@16 2328 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
Chris@16 2329 {
Chris@16 2330 // Rewind to start of (? sequence:
Chris@16 2331 --m_position;
Chris@16 2332 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2333 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2334 return false;
Chris@16 2335 }
Chris@16 2336 m_position -= 2;
Chris@16 2337 }
Chris@16 2338 }
Chris@16 2339 break;
Chris@16 2340 }
Chris@16 2341 case regex_constants::syntax_close_mark:
Chris@16 2342 // Rewind to start of (? sequence:
Chris@16 2343 --m_position;
Chris@16 2344 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2345 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2346 return false;
Chris@16 2347 case regex_constants::escape_type_end_buffer:
Chris@16 2348 {
Chris@16 2349 name_delim = *m_position;
Chris@16 2350 named_capture_jump:
Chris@16 2351 markid = 0;
Chris@16 2352 if(0 == (this->flags() & regbase::nosubs))
Chris@16 2353 {
Chris@16 2354 markid = ++m_mark_count;
Chris@16 2355 #ifndef BOOST_NO_STD_DISTANCE
Chris@16 2356 if(this->flags() & regbase::save_subexpression_location)
Chris@16 2357 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
Chris@16 2358 #else
Chris@16 2359 if(this->flags() & regbase::save_subexpression_location)
Chris@16 2360 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
Chris@16 2361 #endif
Chris@16 2362 }
Chris@16 2363 pb->index = markid;
Chris@16 2364 const charT* base = ++m_position;
Chris@16 2365 if(m_position == m_end)
Chris@16 2366 {
Chris@16 2367 // Rewind to start of (? sequence:
Chris@16 2368 --m_position;
Chris@16 2369 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2370 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2371 return false;
Chris@16 2372 }
Chris@16 2373 while((m_position != m_end) && (*m_position != name_delim))
Chris@16 2374 ++m_position;
Chris@16 2375 if(m_position == m_end)
Chris@16 2376 {
Chris@16 2377 // Rewind to start of (? sequence:
Chris@16 2378 --m_position;
Chris@16 2379 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2380 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2381 return false;
Chris@16 2382 }
Chris@16 2383 this->m_pdata->set_name(base, m_position, markid);
Chris@16 2384 ++m_position;
Chris@16 2385 break;
Chris@16 2386 }
Chris@16 2387 default:
Chris@16 2388 if(*m_position == charT('R'))
Chris@16 2389 {
Chris@16 2390 ++m_position;
Chris@16 2391 v = 0;
Chris@16 2392 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
Chris@16 2393 {
Chris@16 2394 // Rewind to start of (? sequence:
Chris@16 2395 --m_position;
Chris@16 2396 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2397 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2398 return false;
Chris@16 2399 }
Chris@16 2400 goto insert_recursion;
Chris@16 2401 }
Chris@16 2402 if(*m_position == charT('&'))
Chris@16 2403 {
Chris@16 2404 ++m_position;
Chris@16 2405 const charT* base = m_position;
Chris@16 2406 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
Chris@16 2407 ++m_position;
Chris@16 2408 if(m_position == m_end)
Chris@16 2409 {
Chris@16 2410 // Rewind to start of (? sequence:
Chris@16 2411 --m_position;
Chris@16 2412 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2413 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2414 return false;
Chris@16 2415 }
Chris@16 2416 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
Chris@16 2417 goto insert_recursion;
Chris@16 2418 }
Chris@16 2419 if(*m_position == charT('P'))
Chris@16 2420 {
Chris@16 2421 ++m_position;
Chris@16 2422 if(m_position == m_end)
Chris@16 2423 {
Chris@16 2424 // Rewind to start of (? sequence:
Chris@16 2425 --m_position;
Chris@16 2426 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2427 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2428 return false;
Chris@16 2429 }
Chris@16 2430 if(*m_position == charT('>'))
Chris@16 2431 {
Chris@16 2432 ++m_position;
Chris@16 2433 const charT* base = m_position;
Chris@16 2434 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
Chris@16 2435 ++m_position;
Chris@16 2436 if(m_position == m_end)
Chris@16 2437 {
Chris@16 2438 // Rewind to start of (? sequence:
Chris@16 2439 --m_position;
Chris@16 2440 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2441 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2442 return false;
Chris@16 2443 }
Chris@16 2444 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
Chris@16 2445 goto insert_recursion;
Chris@16 2446 }
Chris@16 2447 }
Chris@16 2448 //
Chris@16 2449 // lets assume that we have a (?imsx) group and try and parse it:
Chris@16 2450 //
Chris@16 2451 option_group_jump:
Chris@16 2452 regex_constants::syntax_option_type opts = parse_options();
Chris@16 2453 if(m_position == m_end)
Chris@16 2454 {
Chris@16 2455 // Rewind to start of (? sequence:
Chris@16 2456 --m_position;
Chris@16 2457 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2458 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2459 return false;
Chris@16 2460 }
Chris@16 2461 // make a note of whether we have a case change:
Chris@16 2462 m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
Chris@16 2463 pb->index = markid = 0;
Chris@16 2464 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
Chris@16 2465 {
Chris@16 2466 // update flags and carry on as normal:
Chris@16 2467 this->flags(opts);
Chris@16 2468 restore_flags = false;
Chris@16 2469 old_case_change |= m_has_case_change; // defer end of scope by one ')'
Chris@16 2470 }
Chris@16 2471 else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
Chris@16 2472 {
Chris@16 2473 // update flags and carry on until the matching ')' is found:
Chris@16 2474 this->flags(opts);
Chris@16 2475 ++m_position;
Chris@16 2476 }
Chris@16 2477 else
Chris@16 2478 {
Chris@16 2479 // Rewind to start of (? sequence:
Chris@16 2480 --m_position;
Chris@16 2481 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2482 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2483 return false;
Chris@16 2484 }
Chris@16 2485
Chris@16 2486 // finally append a case change state if we need it:
Chris@16 2487 if(m_has_case_change)
Chris@16 2488 {
Chris@16 2489 static_cast<re_case*>(
Chris@16 2490 this->append_state(syntax_element_toggle_case, sizeof(re_case))
Chris@16 2491 )->icase = opts & regbase::icase;
Chris@16 2492 }
Chris@16 2493
Chris@16 2494 }
Chris@16 2495 //
Chris@16 2496 // now recursively add more states, this will terminate when we get to a
Chris@16 2497 // matching ')' :
Chris@16 2498 //
Chris@16 2499 parse_all();
Chris@16 2500 //
Chris@16 2501 // Unwind alternatives:
Chris@16 2502 //
Chris@16 2503 if(0 == unwind_alts(last_paren_start))
Chris@16 2504 {
Chris@16 2505 // Rewind to start of (? sequence:
Chris@16 2506 --m_position;
Chris@16 2507 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2508 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
Chris@16 2509 return false;
Chris@16 2510 }
Chris@16 2511 //
Chris@16 2512 // we either have a ')' or we have run out of characters prematurely:
Chris@16 2513 //
Chris@16 2514 if(m_position == m_end)
Chris@16 2515 {
Chris@16 2516 // Rewind to start of (? sequence:
Chris@16 2517 --m_position;
Chris@16 2518 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2519 this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
Chris@16 2520 return false;
Chris@16 2521 }
Chris@16 2522 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
Chris@16 2523 ++m_position;
Chris@16 2524 //
Chris@16 2525 // restore the flags:
Chris@16 2526 //
Chris@16 2527 if(restore_flags)
Chris@16 2528 {
Chris@16 2529 // append a case change state if we need it:
Chris@16 2530 if(m_has_case_change)
Chris@16 2531 {
Chris@16 2532 static_cast<re_case*>(
Chris@16 2533 this->append_state(syntax_element_toggle_case, sizeof(re_case))
Chris@16 2534 )->icase = old_flags & regbase::icase;
Chris@16 2535 }
Chris@16 2536 this->flags(old_flags);
Chris@16 2537 }
Chris@16 2538 //
Chris@16 2539 // set up the jump pointer if we have one:
Chris@16 2540 //
Chris@16 2541 if(jump_offset)
Chris@16 2542 {
Chris@16 2543 this->m_pdata->m_data.align();
Chris@16 2544 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
Chris@16 2545 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
Chris@16 2546 if((this->m_last_state == jmp) && (markid != -2))
Chris@16 2547 {
Chris@16 2548 // Oops... we didn't have anything inside the assertion.
Chris@16 2549 // Note we don't get here for negated forward lookahead as (?!)
Chris@16 2550 // does have some uses.
Chris@16 2551 // Rewind to start of (? sequence:
Chris@16 2552 --m_position;
Chris@16 2553 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2554 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
Chris@16 2555 return false;
Chris@16 2556 }
Chris@16 2557 }
Chris@16 2558 //
Chris@16 2559 // verify that if this is conditional expression, that we do have
Chris@16 2560 // an alternative, if not add one:
Chris@16 2561 //
Chris@16 2562 if(markid == -4)
Chris@16 2563 {
Chris@16 2564 re_syntax_base* b = this->getaddress(expected_alt_point);
Chris@16 2565 // Make sure we have exactly one alternative following this state:
Chris@16 2566 if(b->type != syntax_element_alt)
Chris@16 2567 {
Chris@16 2568 re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
Chris@16 2569 alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
Chris@16 2570 }
Chris@16 2571 else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
Chris@16 2572 {
Chris@16 2573 // Can't have seen more than one alternative:
Chris@16 2574 // Rewind to start of (? sequence:
Chris@16 2575 --m_position;
Chris@16 2576 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2577 fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
Chris@16 2578 return false;
Chris@16 2579 }
Chris@16 2580 else
Chris@16 2581 {
Chris@16 2582 // We must *not* have seen an alternative inside a (DEFINE) block:
Chris@16 2583 b = this->getaddress(b->next.i, b);
Chris@16 2584 if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
Chris@16 2585 {
Chris@16 2586 // Rewind to start of (? sequence:
Chris@16 2587 --m_position;
Chris@16 2588 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2589 fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
Chris@16 2590 return false;
Chris@16 2591 }
Chris@16 2592 }
Chris@16 2593 // check for invalid repetition of next state:
Chris@16 2594 b = this->getaddress(expected_alt_point);
Chris@16 2595 b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
Chris@16 2596 if((b->type != syntax_element_assert_backref)
Chris@16 2597 && (b->type != syntax_element_startmark))
Chris@16 2598 {
Chris@16 2599 // Rewind to start of (? sequence:
Chris@16 2600 --m_position;
Chris@16 2601 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2602 fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
Chris@16 2603 return false;
Chris@16 2604 }
Chris@16 2605 }
Chris@16 2606 //
Chris@16 2607 // append closing parenthesis state:
Chris@16 2608 //
Chris@16 2609 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
Chris@16 2610 pb->index = markid;
Chris@16 2611 pb->icase = this->flags() & regbase::icase;
Chris@16 2612 this->m_paren_start = last_paren_start;
Chris@16 2613 //
Chris@16 2614 // restore the alternate insertion point:
Chris@16 2615 //
Chris@16 2616 this->m_alt_insert_point = last_alt_point;
Chris@16 2617 //
Chris@16 2618 // and the case change data:
Chris@16 2619 //
Chris@16 2620 m_has_case_change = old_case_change;
Chris@16 2621 //
Chris@16 2622 // And the mark_reset data:
Chris@16 2623 //
Chris@16 2624 if(m_max_mark > m_mark_count)
Chris@16 2625 {
Chris@16 2626 m_mark_count = m_max_mark;
Chris@16 2627 }
Chris@16 2628 m_mark_reset = mark_reset;
Chris@16 2629 m_max_mark = max_mark;
Chris@16 2630
Chris@16 2631
Chris@16 2632 if(markid > 0)
Chris@16 2633 {
Chris@16 2634 #ifndef BOOST_NO_STD_DISTANCE
Chris@16 2635 if(this->flags() & regbase::save_subexpression_location)
Chris@16 2636 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1;
Chris@16 2637 #else
Chris@16 2638 if(this->flags() & regbase::save_subexpression_location)
Chris@16 2639 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
Chris@16 2640 #endif
Chris@16 2641 //
Chris@16 2642 // allow backrefs to this mark:
Chris@16 2643 //
Chris@16 2644 if((markid > 0) && (markid < (int)(sizeof(unsigned) * CHAR_BIT)))
Chris@16 2645 this->m_backrefs |= 1u << (markid - 1);
Chris@16 2646 }
Chris@16 2647 return true;
Chris@16 2648 }
Chris@16 2649
Chris@16 2650 template <class charT, class traits>
Chris@16 2651 bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
Chris@16 2652 {
Chris@16 2653 //
Chris@16 2654 // parses an emacs style \sx or \Sx construct.
Chris@16 2655 //
Chris@16 2656 if(++m_position == m_end)
Chris@16 2657 {
Chris@16 2658 // Rewind to start of sequence:
Chris@16 2659 --m_position;
Chris@16 2660 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 2661 fail(regex_constants::error_escape, m_position - m_base);
Chris@16 2662 return false;
Chris@16 2663 }
Chris@16 2664 basic_char_set<charT, traits> char_set;
Chris@16 2665 if(negate)
Chris@16 2666 char_set.negate();
Chris@16 2667
Chris@16 2668 static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
Chris@16 2669
Chris@16 2670 switch(*m_position)
Chris@16 2671 {
Chris@16 2672 case 's':
Chris@16 2673 case ' ':
Chris@16 2674 char_set.add_class(this->m_mask_space);
Chris@16 2675 break;
Chris@16 2676 case 'w':
Chris@16 2677 char_set.add_class(this->m_word_mask);
Chris@16 2678 break;
Chris@16 2679 case '_':
Chris@16 2680 char_set.add_single(digraph<charT>(charT('$')));
Chris@16 2681 char_set.add_single(digraph<charT>(charT('&')));
Chris@16 2682 char_set.add_single(digraph<charT>(charT('*')));
Chris@16 2683 char_set.add_single(digraph<charT>(charT('+')));
Chris@16 2684 char_set.add_single(digraph<charT>(charT('-')));
Chris@16 2685 char_set.add_single(digraph<charT>(charT('_')));
Chris@16 2686 char_set.add_single(digraph<charT>(charT('<')));
Chris@16 2687 char_set.add_single(digraph<charT>(charT('>')));
Chris@16 2688 break;
Chris@16 2689 case '.':
Chris@16 2690 char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
Chris@16 2691 break;
Chris@16 2692 case '(':
Chris@16 2693 char_set.add_single(digraph<charT>(charT('(')));
Chris@16 2694 char_set.add_single(digraph<charT>(charT('[')));
Chris@16 2695 char_set.add_single(digraph<charT>(charT('{')));
Chris@16 2696 break;
Chris@16 2697 case ')':
Chris@16 2698 char_set.add_single(digraph<charT>(charT(')')));
Chris@16 2699 char_set.add_single(digraph<charT>(charT(']')));
Chris@16 2700 char_set.add_single(digraph<charT>(charT('}')));
Chris@16 2701 break;
Chris@16 2702 case '"':
Chris@16 2703 char_set.add_single(digraph<charT>(charT('"')));
Chris@16 2704 char_set.add_single(digraph<charT>(charT('\'')));
Chris@16 2705 char_set.add_single(digraph<charT>(charT('`')));
Chris@16 2706 break;
Chris@16 2707 case '\'':
Chris@16 2708 char_set.add_single(digraph<charT>(charT('\'')));
Chris@16 2709 char_set.add_single(digraph<charT>(charT(',')));
Chris@16 2710 char_set.add_single(digraph<charT>(charT('#')));
Chris@16 2711 break;
Chris@16 2712 case '<':
Chris@16 2713 char_set.add_single(digraph<charT>(charT(';')));
Chris@16 2714 break;
Chris@16 2715 case '>':
Chris@16 2716 char_set.add_single(digraph<charT>(charT('\n')));
Chris@16 2717 char_set.add_single(digraph<charT>(charT('\f')));
Chris@16 2718 break;
Chris@16 2719 default:
Chris@16 2720 fail(regex_constants::error_ctype, m_position - m_base);
Chris@16 2721 return false;
Chris@16 2722 }
Chris@16 2723 if(0 == this->append_set(char_set))
Chris@16 2724 {
Chris@16 2725 fail(regex_constants::error_ctype, m_position - m_base);
Chris@16 2726 return false;
Chris@16 2727 }
Chris@16 2728 ++m_position;
Chris@16 2729 return true;
Chris@16 2730 }
Chris@16 2731
Chris@16 2732 template <class charT, class traits>
Chris@16 2733 regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
Chris@16 2734 {
Chris@16 2735 // we have a (?imsx-imsx) group, convert it into a set of flags:
Chris@16 2736 regex_constants::syntax_option_type f = this->flags();
Chris@16 2737 bool breakout = false;
Chris@16 2738 do
Chris@16 2739 {
Chris@16 2740 switch(*m_position)
Chris@16 2741 {
Chris@16 2742 case 's':
Chris@16 2743 f |= regex_constants::mod_s;
Chris@16 2744 f &= ~regex_constants::no_mod_s;
Chris@16 2745 break;
Chris@16 2746 case 'm':
Chris@16 2747 f &= ~regex_constants::no_mod_m;
Chris@16 2748 break;
Chris@16 2749 case 'i':
Chris@16 2750 f |= regex_constants::icase;
Chris@16 2751 break;
Chris@16 2752 case 'x':
Chris@16 2753 f |= regex_constants::mod_x;
Chris@16 2754 break;
Chris@16 2755 default:
Chris@16 2756 breakout = true;
Chris@16 2757 continue;
Chris@16 2758 }
Chris@16 2759 if(++m_position == m_end)
Chris@16 2760 {
Chris@16 2761 // Rewind to start of (? sequence:
Chris@16 2762 --m_position;
Chris@16 2763 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2764 fail(regex_constants::error_paren, m_position - m_base);
Chris@16 2765 return false;
Chris@16 2766 }
Chris@16 2767 }
Chris@16 2768 while(!breakout);
Chris@16 2769
Chris@16 2770 breakout = false;
Chris@16 2771
Chris@16 2772 if(*m_position == static_cast<charT>('-'))
Chris@16 2773 {
Chris@16 2774 if(++m_position == m_end)
Chris@16 2775 {
Chris@16 2776 // Rewind to start of (? sequence:
Chris@16 2777 --m_position;
Chris@16 2778 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2779 fail(regex_constants::error_paren, m_position - m_base);
Chris@16 2780 return false;
Chris@16 2781 }
Chris@16 2782 do
Chris@16 2783 {
Chris@16 2784 switch(*m_position)
Chris@16 2785 {
Chris@16 2786 case 's':
Chris@16 2787 f &= ~regex_constants::mod_s;
Chris@16 2788 f |= regex_constants::no_mod_s;
Chris@16 2789 break;
Chris@16 2790 case 'm':
Chris@16 2791 f |= regex_constants::no_mod_m;
Chris@16 2792 break;
Chris@16 2793 case 'i':
Chris@16 2794 f &= ~regex_constants::icase;
Chris@16 2795 break;
Chris@16 2796 case 'x':
Chris@16 2797 f &= ~regex_constants::mod_x;
Chris@16 2798 break;
Chris@16 2799 default:
Chris@16 2800 breakout = true;
Chris@16 2801 continue;
Chris@16 2802 }
Chris@16 2803 if(++m_position == m_end)
Chris@16 2804 {
Chris@16 2805 // Rewind to start of (? sequence:
Chris@16 2806 --m_position;
Chris@16 2807 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2808 fail(regex_constants::error_paren, m_position - m_base);
Chris@16 2809 return false;
Chris@16 2810 }
Chris@16 2811 }
Chris@16 2812 while(!breakout);
Chris@16 2813 }
Chris@16 2814 return f;
Chris@16 2815 }
Chris@16 2816
Chris@16 2817 template <class charT, class traits>
Chris@16 2818 bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
Chris@16 2819 {
Chris@16 2820 //
Chris@16 2821 // If we didn't actually add any states after the last
Chris@16 2822 // alternative then that's an error:
Chris@16 2823 //
Chris@16 2824 if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
Chris@16 2825 && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)
Chris@16 2826 &&
Chris@16 2827 !(
Chris@16 2828 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
Chris@16 2829 &&
Chris@16 2830 ((this->flags() & regbase::no_empty_expressions) == 0)
Chris@16 2831 )
Chris@16 2832 )
Chris@16 2833 {
Chris@16 2834 fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
Chris@16 2835 return false;
Chris@16 2836 }
Chris@16 2837 //
Chris@16 2838 // Fix up our alternatives:
Chris@16 2839 //
Chris@16 2840 while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))
Chris@16 2841 {
Chris@16 2842 //
Chris@16 2843 // fix up the jump to point to the end of the states
Chris@16 2844 // that we've just added:
Chris@16 2845 //
Chris@16 2846 std::ptrdiff_t jump_offset = m_alt_jumps.back();
Chris@16 2847 m_alt_jumps.pop_back();
Chris@16 2848 this->m_pdata->m_data.align();
Chris@16 2849 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
Chris@16 2850 BOOST_ASSERT(jmp->type == syntax_element_jump);
Chris@16 2851 jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
Chris@16 2852 }
Chris@16 2853 return true;
Chris@16 2854 }
Chris@16 2855
Chris@16 2856 #ifdef BOOST_MSVC
Chris@16 2857 #pragma warning(pop)
Chris@16 2858 #endif
Chris@16 2859
Chris@16 2860 } // namespace re_detail
Chris@16 2861 } // namespace boost
Chris@16 2862
Chris@16 2863 #ifdef BOOST_MSVC
Chris@16 2864 #pragma warning(push)
Chris@16 2865 #pragma warning(disable: 4103)
Chris@16 2866 #endif
Chris@16 2867 #ifdef BOOST_HAS_ABI_HEADERS
Chris@16 2868 # include BOOST_ABI_SUFFIX
Chris@16 2869 #endif
Chris@16 2870 #ifdef BOOST_MSVC
Chris@16 2871 #pragma warning(pop)
Chris@16 2872 #endif
Chris@16 2873
Chris@16 2874 #endif