annotate DEPENDENCIES/generic/include/boost/regex/v4/basic_regex_parser.hpp @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)
author Chris Cannam
date Tue, 30 Jul 2019 12:25:44 +0100
parents c530137014c0
children
rev   line source
Chris@16 1 /*
Chris@16 2 *
Chris@16 3 * Copyright (c) 2004
Chris@16 4 * John Maddock
Chris@16 5 *
Chris@16 6 * Use, modification and distribution are subject to the
Chris@16 7 * Boost Software License, Version 1.0. (See accompanying file
Chris@16 8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16 9 *
Chris@16 10 */
Chris@16 11
Chris@16 12 /*
Chris@16 13 * LOCATION: see http://www.boost.org for most recent version.
Chris@16 14 * FILE basic_regex_parser.cpp
Chris@16 15 * VERSION see <boost/version.hpp>
Chris@16 16 * DESCRIPTION: Declares template class basic_regex_parser.
Chris@16 17 */
Chris@16 18
Chris@16 19 #ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
Chris@16 20 #define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
Chris@16 21
Chris@16 22 #ifdef BOOST_MSVC
Chris@16 23 #pragma warning(push)
Chris@16 24 #pragma warning(disable: 4103)
Chris@16 25 #endif
Chris@16 26 #ifdef BOOST_HAS_ABI_HEADERS
Chris@16 27 # include BOOST_ABI_PREFIX
Chris@16 28 #endif
Chris@16 29 #ifdef BOOST_MSVC
Chris@16 30 #pragma warning(pop)
Chris@16 31 #endif
Chris@16 32
Chris@16 33 namespace boost{
Chris@16 34 namespace re_detail{
Chris@16 35
Chris@16 36 #ifdef BOOST_MSVC
Chris@16 37 #pragma warning(push)
Chris@16 38 #pragma warning(disable:4244 4800)
Chris@16 39 #endif
Chris@16 40
Chris@16 41 template <class charT, class traits>
Chris@16 42 class basic_regex_parser : public basic_regex_creator<charT, traits>
Chris@16 43 {
Chris@16 44 public:
Chris@16 45 basic_regex_parser(regex_data<charT, traits>* data);
Chris@16 46 void parse(const charT* p1, const charT* p2, unsigned flags);
Chris@16 47 void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
Chris@16 48 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
Chris@16 49 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
Chris@16 50 {
Chris@16 51 fail(error_code, position, message, position);
Chris@16 52 }
Chris@16 53
Chris@16 54 bool parse_all();
Chris@16 55 bool parse_basic();
Chris@16 56 bool parse_extended();
Chris@16 57 bool parse_literal();
Chris@16 58 bool parse_open_paren();
Chris@16 59 bool parse_basic_escape();
Chris@16 60 bool parse_extended_escape();
Chris@16 61 bool parse_match_any();
Chris@16 62 bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
Chris@16 63 bool parse_repeat_range(bool isbasic);
Chris@16 64 bool parse_alt();
Chris@16 65 bool parse_set();
Chris@16 66 bool parse_backref();
Chris@16 67 void parse_set_literal(basic_char_set<charT, traits>& char_set);
Chris@16 68 bool parse_inner_set(basic_char_set<charT, traits>& char_set);
Chris@16 69 bool parse_QE();
Chris@16 70 bool parse_perl_extension();
Chris@16 71 bool add_emacs_code(bool negate);
Chris@16 72 bool unwind_alts(std::ptrdiff_t last_paren_start);
Chris@16 73 digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
Chris@16 74 charT unescape_character();
Chris@16 75 regex_constants::syntax_option_type parse_options();
Chris@16 76
Chris@16 77 private:
Chris@16 78 typedef bool (basic_regex_parser::*parser_proc_type)();
Chris@16 79 typedef typename traits::string_type string_type;
Chris@16 80 typedef typename traits::char_class_type char_class_type;
Chris@16 81 parser_proc_type m_parser_proc; // the main parser to use
Chris@16 82 const charT* m_base; // the start of the string being parsed
Chris@16 83 const charT* m_end; // the end of the string being parsed
Chris@16 84 const charT* m_position; // our current parser position
Chris@16 85 unsigned m_mark_count; // how many sub-expressions we have
Chris@16 86 int m_mark_reset; // used to indicate that we're inside a (?|...) block.
Chris@16 87 unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
Chris@16 88 std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
Chris@16 89 std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
Chris@16 90 bool m_has_case_change; // true if somewhere in the current block the case has changed
Chris@16 91 #if defined(BOOST_MSVC) && defined(_M_IX86)
Chris@16 92 // This is an ugly warning suppression workaround (for warnings *inside* std::vector
Chris@16 93 // that can not otherwise be suppressed)...
Chris@16 94 BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
Chris@16 95 std::vector<long> m_alt_jumps; // list of alternative in the current scope.
Chris@16 96 #else
Chris@16 97 std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
Chris@16 98 #endif
Chris@16 99
Chris@16 100 basic_regex_parser& operator=(const basic_regex_parser&);
Chris@16 101 basic_regex_parser(const basic_regex_parser&);
Chris@16 102 };
Chris@16 103
Chris@16 104 template <class charT, class traits>
Chris@16 105 basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
Chris@16 106 : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false)
Chris@16 107 {
Chris@16 108 }
Chris@16 109
Chris@16 110 template <class charT, class traits>
Chris@16 111 void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
Chris@16 112 {
Chris@16 113 // pass l_flags on to base class:
Chris@16 114 this->init(l_flags);
Chris@16 115 // set up pointers:
Chris@16 116 m_position = m_base = p1;
Chris@16 117 m_end = p2;
Chris@16 118 // empty strings are errors:
Chris@16 119 if((p1 == p2) &&
Chris@16 120 (
Chris@16 121 ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
Chris@16 122 || (l_flags & regbase::no_empty_expressions)
Chris@16 123 )
Chris@16 124 )
Chris@16 125 {
Chris@16 126 fail(regex_constants::error_empty, 0);
Chris@16 127 return;
Chris@16 128 }
Chris@16 129 // select which parser to use:
Chris@16 130 switch(l_flags & regbase::main_option_type)
Chris@16 131 {
Chris@16 132 case regbase::perl_syntax_group:
Chris@16 133 {
Chris@16 134 m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
Chris@16 135 //
Chris@16 136 // Add a leading paren with index zero to give recursions a target:
Chris@16 137 //
Chris@16 138 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
Chris@16 139 br->index = 0;
Chris@16 140 br->icase = this->flags() & regbase::icase;
Chris@16 141 break;
Chris@16 142 }
Chris@16 143 case regbase::basic_syntax_group:
Chris@16 144 m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
Chris@16 145 break;
Chris@16 146 case regbase::literal:
Chris@16 147 m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
Chris@16 148 break;
Chris@16 149 default:
Chris@16 150 // Ooops, someone has managed to set more than one of the main option flags,
Chris@16 151 // so this must be an error:
Chris@16 152 fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
Chris@16 153 return;
Chris@16 154 }
Chris@16 155
Chris@16 156 // parse all our characters:
Chris@16 157 bool result = parse_all();
Chris@16 158 //
Chris@16 159 // Unwind our alternatives:
Chris@16 160 //
Chris@16 161 unwind_alts(-1);
Chris@16 162 // reset l_flags as a global scope (?imsx) may have altered them:
Chris@16 163 this->flags(l_flags);
Chris@16 164 // if we haven't gobbled up all the characters then we must
Chris@16 165 // have had an unexpected ')' :
Chris@16 166 if(!result)
Chris@16 167 {
Chris@16 168 fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis.");
Chris@16 169 return;
Chris@16 170 }
Chris@16 171 // if an error has been set then give up now:
Chris@16 172 if(this->m_pdata->m_status)
Chris@16 173 return;
Chris@16 174 // fill in our sub-expression count:
Chris@16 175 this->m_pdata->m_mark_count = 1 + m_mark_count;
Chris@16 176 this->finalize(p1, p2);
Chris@16 177 }
Chris@16 178
Chris@16 179 template <class charT, class traits>
Chris@16 180 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
Chris@16 181 {
Chris@16 182 // get the error message:
Chris@16 183 std::string message = this->m_pdata->m_ptraits->error_string(error_code);
Chris@16 184 fail(error_code, position, message);
Chris@16 185 }
Chris@16 186
Chris@16 187 template <class charT, class traits>
Chris@16 188 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
Chris@16 189 {
Chris@16 190 if(0 == this->m_pdata->m_status) // update the error code if not already set
Chris@16 191 this->m_pdata->m_status = error_code;
Chris@16 192 m_position = m_end; // don't bother parsing anything else
Chris@16 193
Chris@16 194 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
Chris@16 195 //
Chris@16 196 // Augment error message with the regular expression text:
Chris@16 197 //
Chris@16 198 if(start_pos == position)
Chris@16 199 start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
Chris@16 200 std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
Chris@16 201 if(error_code != regex_constants::error_empty)
Chris@16 202 {
Chris@16 203 if((start_pos != 0) || (end_pos != (m_end - m_base)))
Chris@16 204 message += " The error occurred while parsing the regular expression fragment: '";
Chris@16 205 else
Chris@16 206 message += " The error occurred while parsing the regular expression: '";
Chris@16 207 if(start_pos != end_pos)
Chris@16 208 {
Chris@16 209 message += std::string(m_base + start_pos, m_base + position);
Chris@16 210 message += ">>>HERE>>>";
Chris@16 211 message += std::string(m_base + position, m_base + end_pos);
Chris@16 212 }
Chris@16 213 message += "'.";
Chris@16 214 }
Chris@16 215 #endif
Chris@16 216
Chris@16 217 #ifndef BOOST_NO_EXCEPTIONS
Chris@16 218 if(0 == (this->flags() & regex_constants::no_except))
Chris@16 219 {
Chris@16 220 boost::regex_error e(message, error_code, position);
Chris@16 221 e.raise();
Chris@16 222 }
Chris@16 223 #else
Chris@16 224 (void)position; // suppress warnings.
Chris@16 225 #endif
Chris@16 226 }
Chris@16 227
Chris@16 228 template <class charT, class traits>
Chris@16 229 bool basic_regex_parser<charT, traits>::parse_all()
Chris@16 230 {
Chris@16 231 bool result = true;
Chris@16 232 while(result && (m_position != m_end))
Chris@16 233 {
Chris@16 234 result = (this->*m_parser_proc)();
Chris@16 235 }
Chris@16 236 return result;
Chris@16 237 }
Chris@16 238
Chris@16 239 #ifdef BOOST_MSVC
Chris@16 240 #pragma warning(push)
Chris@16 241 #pragma warning(disable:4702)
Chris@16 242 #endif
Chris@16 243 template <class charT, class traits>
Chris@16 244 bool basic_regex_parser<charT, traits>::parse_basic()
Chris@16 245 {
Chris@16 246 switch(this->m_traits.syntax_type(*m_position))
Chris@16 247 {
Chris@16 248 case regex_constants::syntax_escape:
Chris@16 249 return parse_basic_escape();
Chris@16 250 case regex_constants::syntax_dot:
Chris@16 251 return parse_match_any();
Chris@16 252 case regex_constants::syntax_caret:
Chris@16 253 ++m_position;
Chris@16 254 this->append_state(syntax_element_start_line);
Chris@16 255 break;
Chris@16 256 case regex_constants::syntax_dollar:
Chris@16 257 ++m_position;
Chris@16 258 this->append_state(syntax_element_end_line);
Chris@16 259 break;
Chris@16 260 case regex_constants::syntax_star:
Chris@16 261 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
Chris@16 262 return parse_literal();
Chris@16 263 else
Chris@16 264 {
Chris@16 265 ++m_position;
Chris@16 266 return parse_repeat();
Chris@16 267 }
Chris@16 268 case regex_constants::syntax_plus:
Chris@16 269 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
Chris@16 270 return parse_literal();
Chris@16 271 else
Chris@16 272 {
Chris@16 273 ++m_position;
Chris@16 274 return parse_repeat(1);
Chris@16 275 }
Chris@16 276 case regex_constants::syntax_question:
Chris@16 277 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
Chris@16 278 return parse_literal();
Chris@16 279 else
Chris@16 280 {
Chris@16 281 ++m_position;
Chris@16 282 return parse_repeat(0, 1);
Chris@16 283 }
Chris@16 284 case regex_constants::syntax_open_set:
Chris@16 285 return parse_set();
Chris@16 286 case regex_constants::syntax_newline:
Chris@16 287 if(this->flags() & regbase::newline_alt)
Chris@16 288 return parse_alt();
Chris@16 289 else
Chris@16 290 return parse_literal();
Chris@16 291 default:
Chris@16 292 return parse_literal();
Chris@16 293 }
Chris@16 294 return true;
Chris@16 295 }
Chris@16 296
Chris@16 297 template <class charT, class traits>
Chris@16 298 bool basic_regex_parser<charT, traits>::parse_extended()
Chris@16 299 {
Chris@16 300 bool result = true;
Chris@16 301 switch(this->m_traits.syntax_type(*m_position))
Chris@16 302 {
Chris@16 303 case regex_constants::syntax_open_mark:
Chris@16 304 return parse_open_paren();
Chris@16 305 case regex_constants::syntax_close_mark:
Chris@16 306 return false;
Chris@16 307 case regex_constants::syntax_escape:
Chris@16 308 return parse_extended_escape();
Chris@16 309 case regex_constants::syntax_dot:
Chris@16 310 return parse_match_any();
Chris@16 311 case regex_constants::syntax_caret:
Chris@16 312 ++m_position;
Chris@16 313 this->append_state(
Chris@16 314 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
Chris@16 315 break;
Chris@16 316 case regex_constants::syntax_dollar:
Chris@16 317 ++m_position;
Chris@16 318 this->append_state(
Chris@16 319 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
Chris@16 320 break;
Chris@16 321 case regex_constants::syntax_star:
Chris@16 322 if(m_position == this->m_base)
Chris@16 323 {
Chris@16 324 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
Chris@16 325 return false;
Chris@16 326 }
Chris@16 327 ++m_position;
Chris@16 328 return parse_repeat();
Chris@16 329 case regex_constants::syntax_question:
Chris@16 330 if(m_position == this->m_base)
Chris@16 331 {
Chris@16 332 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
Chris@16 333 return false;
Chris@16 334 }
Chris@16 335 ++m_position;
Chris@16 336 return parse_repeat(0,1);
Chris@16 337 case regex_constants::syntax_plus:
Chris@16 338 if(m_position == this->m_base)
Chris@16 339 {
Chris@16 340 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
Chris@16 341 return false;
Chris@16 342 }
Chris@16 343 ++m_position;
Chris@16 344 return parse_repeat(1);
Chris@16 345 case regex_constants::syntax_open_brace:
Chris@16 346 ++m_position;
Chris@16 347 return parse_repeat_range(false);
Chris@16 348 case regex_constants::syntax_close_brace:
Chris@101 349 if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
Chris@101 350 {
Chris@101 351 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
Chris@101 352 return false;
Chris@101 353 }
Chris@101 354 result = parse_literal();
Chris@101 355 break;
Chris@16 356 case regex_constants::syntax_or:
Chris@16 357 return parse_alt();
Chris@16 358 case regex_constants::syntax_open_set:
Chris@16 359 return parse_set();
Chris@16 360 case regex_constants::syntax_newline:
Chris@16 361 if(this->flags() & regbase::newline_alt)
Chris@16 362 return parse_alt();
Chris@16 363 else
Chris@16 364 return parse_literal();
Chris@16 365 case regex_constants::syntax_hash:
Chris@16 366 //
Chris@16 367 // If we have a mod_x flag set, then skip until
Chris@16 368 // we get to a newline character:
Chris@16 369 //
Chris@16 370 if((this->flags()
Chris@16 371 & (regbase::no_perl_ex|regbase::mod_x))
Chris@16 372 == regbase::mod_x)
Chris@16 373 {
Chris@16 374 while((m_position != m_end) && !is_separator(*m_position++)){}
Chris@16 375 return true;
Chris@16 376 }
Chris@16 377 BOOST_FALLTHROUGH;
Chris@16 378 default:
Chris@16 379 result = parse_literal();
Chris@16 380 break;
Chris@16 381 }
Chris@16 382 return result;
Chris@16 383 }
Chris@16 384 #ifdef BOOST_MSVC
Chris@16 385 #pragma warning(pop)
Chris@16 386 #endif
Chris@16 387
Chris@16 388 template <class charT, class traits>
Chris@16 389 bool basic_regex_parser<charT, traits>::parse_literal()
Chris@16 390 {
Chris@16 391 // append this as a literal provided it's not a space character
Chris@16 392 // or the perl option regbase::mod_x is not set:
Chris@16 393 if(
Chris@16 394 ((this->flags()
Chris@16 395 & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
Chris@16 396 != regbase::mod_x)
Chris@16 397 || !this->m_traits.isctype(*m_position, this->m_mask_space))
Chris@16 398 this->append_literal(*m_position);
Chris@16 399 ++m_position;
Chris@16 400 return true;
Chris@16 401 }
Chris@16 402
Chris@16 403 template <class charT, class traits>
Chris@16 404 bool basic_regex_parser<charT, traits>::parse_open_paren()
Chris@16 405 {
Chris@16 406 //
Chris@16 407 // skip the '(' and error check:
Chris@16 408 //
Chris@16 409 if(++m_position == m_end)
Chris@16 410 {
Chris@16 411 fail(regex_constants::error_paren, m_position - m_base);
Chris@16 412 return false;
Chris@16 413 }
Chris@16 414 //
Chris@16 415 // begin by checking for a perl-style (?...) extension:
Chris@16 416 //
Chris@16 417 if(
Chris@16 418 ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
Chris@16 419 || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
Chris@16 420 )
Chris@16 421 {
Chris@16 422 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
Chris@16 423 return parse_perl_extension();
Chris@16 424 }
Chris@16 425 //
Chris@16 426 // update our mark count, and append the required state:
Chris@16 427 //
Chris@16 428 unsigned markid = 0;
Chris@16 429 if(0 == (this->flags() & regbase::nosubs))
Chris@16 430 {
Chris@16 431 markid = ++m_mark_count;
Chris@16 432 #ifndef BOOST_NO_STD_DISTANCE
Chris@16 433 if(this->flags() & regbase::save_subexpression_location)
Chris@16 434 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
Chris@16 435 #else
Chris@16 436 if(this->flags() & regbase::save_subexpression_location)
Chris@16 437 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
Chris@16 438 #endif
Chris@16 439 }
Chris@16 440 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
Chris@16 441 pb->index = markid;
Chris@16 442 pb->icase = this->flags() & regbase::icase;
Chris@16 443 std::ptrdiff_t last_paren_start = this->getoffset(pb);
Chris@16 444 // back up insertion point for alternations, and set new point:
Chris@16 445 std::ptrdiff_t last_alt_point = m_alt_insert_point;
Chris@16 446 this->m_pdata->m_data.align();
Chris@16 447 m_alt_insert_point = this->m_pdata->m_data.size();
Chris@16 448 //
Chris@16 449 // back up the current flags in case we have a nested (?imsx) group:
Chris@16 450 //
Chris@16 451 regex_constants::syntax_option_type opts = this->flags();
Chris@16 452 bool old_case_change = m_has_case_change;
Chris@16 453 m_has_case_change = false; // no changes to this scope as yet...
Chris@16 454 //
Chris@16 455 // Back up branch reset data in case we have a nested (?|...)
Chris@16 456 //
Chris@16 457 int mark_reset = m_mark_reset;
Chris@16 458 m_mark_reset = -1;
Chris@16 459 //
Chris@16 460 // now recursively add more states, this will terminate when we get to a
Chris@16 461 // matching ')' :
Chris@16 462 //
Chris@16 463 parse_all();
Chris@16 464 //
Chris@16 465 // Unwind pushed alternatives:
Chris@16 466 //
Chris@16 467 if(0 == unwind_alts(last_paren_start))
Chris@16 468 return false;
Chris@16 469 //
Chris@16 470 // restore flags:
Chris@16 471 //
Chris@16 472 if(m_has_case_change)
Chris@16 473 {
Chris@16 474 // the case has changed in one or more of the alternatives
Chris@16 475 // within the scoped (...) block: we have to add a state
Chris@16 476 // to reset the case sensitivity:
Chris@16 477 static_cast<re_case*>(
Chris@16 478 this->append_state(syntax_element_toggle_case, sizeof(re_case))
Chris@16 479 )->icase = opts & regbase::icase;
Chris@16 480 }
Chris@16 481 this->flags(opts);
Chris@16 482 m_has_case_change = old_case_change;
Chris@16 483 //
Chris@16 484 // restore branch reset:
Chris@16 485 //
Chris@16 486 m_mark_reset = mark_reset;
Chris@16 487 //
Chris@16 488 // we either have a ')' or we have run out of characters prematurely:
Chris@16 489 //
Chris@16 490 if(m_position == m_end)
Chris@16 491 {
Chris@16 492 this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
Chris@16 493 return false;
Chris@16 494 }
Chris@16 495 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
Chris@16 496 #ifndef BOOST_NO_STD_DISTANCE
Chris@16 497 if(markid && (this->flags() & regbase::save_subexpression_location))
Chris@16 498 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
Chris@16 499 #else
Chris@16 500 if(markid && (this->flags() & regbase::save_subexpression_location))
Chris@16 501 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
Chris@16 502 #endif
Chris@16 503 ++m_position;
Chris@16 504 //
Chris@16 505 // append closing parenthesis state:
Chris@16 506 //
Chris@16 507 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
Chris@16 508 pb->index = markid;
Chris@16 509 pb->icase = this->flags() & regbase::icase;
Chris@16 510 this->m_paren_start = last_paren_start;
Chris@16 511 //
Chris@16 512 // restore the alternate insertion point:
Chris@16 513 //
Chris@16 514 this->m_alt_insert_point = last_alt_point;
Chris@16 515 //
Chris@16 516 // allow backrefs to this mark:
Chris@16 517 //
Chris@16 518 if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
Chris@16 519 this->m_backrefs |= 1u << (markid - 1);
Chris@16 520
Chris@16 521 return true;
Chris@16 522 }
Chris@16 523
Chris@16 524 template <class charT, class traits>
Chris@16 525 bool basic_regex_parser<charT, traits>::parse_basic_escape()
Chris@16 526 {
Chris@16 527 ++m_position;
Chris@16 528 bool result = true;
Chris@16 529 switch(this->m_traits.escape_syntax_type(*m_position))
Chris@16 530 {
Chris@16 531 case regex_constants::syntax_open_mark:
Chris@16 532 return parse_open_paren();
Chris@16 533 case regex_constants::syntax_close_mark:
Chris@16 534 return false;
Chris@16 535 case regex_constants::syntax_plus:
Chris@16 536 if(this->flags() & regex_constants::bk_plus_qm)
Chris@16 537 {
Chris@16 538 ++m_position;
Chris@16 539 return parse_repeat(1);
Chris@16 540 }
Chris@16 541 else
Chris@16 542 return parse_literal();
Chris@16 543 case regex_constants::syntax_question:
Chris@16 544 if(this->flags() & regex_constants::bk_plus_qm)
Chris@16 545 {
Chris@16 546 ++m_position;
Chris@16 547 return parse_repeat(0, 1);
Chris@16 548 }
Chris@16 549 else
Chris@16 550 return parse_literal();
Chris@16 551 case regex_constants::syntax_open_brace:
Chris@16 552 if(this->flags() & regbase::no_intervals)
Chris@16 553 return parse_literal();
Chris@16 554 ++m_position;
Chris@16 555 return parse_repeat_range(true);
Chris@16 556 case regex_constants::syntax_close_brace:
Chris@16 557 if(this->flags() & regbase::no_intervals)
Chris@16 558 return parse_literal();
Chris@16 559 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
Chris@16 560 return false;
Chris@16 561 case regex_constants::syntax_or:
Chris@16 562 if(this->flags() & regbase::bk_vbar)
Chris@16 563 return parse_alt();
Chris@16 564 else
Chris@16 565 result = parse_literal();
Chris@16 566 break;
Chris@16 567 case regex_constants::syntax_digit:
Chris@16 568 return parse_backref();
Chris@16 569 case regex_constants::escape_type_start_buffer:
Chris@16 570 if(this->flags() & regbase::emacs_ex)
Chris@16 571 {
Chris@16 572 ++m_position;
Chris@16 573 this->append_state(syntax_element_buffer_start);
Chris@16 574 }
Chris@16 575 else
Chris@16 576 result = parse_literal();
Chris@16 577 break;
Chris@16 578 case regex_constants::escape_type_end_buffer:
Chris@16 579 if(this->flags() & regbase::emacs_ex)
Chris@16 580 {
Chris@16 581 ++m_position;
Chris@16 582 this->append_state(syntax_element_buffer_end);
Chris@16 583 }
Chris@16 584 else
Chris@16 585 result = parse_literal();
Chris@16 586 break;
Chris@16 587 case regex_constants::escape_type_word_assert:
Chris@16 588 if(this->flags() & regbase::emacs_ex)
Chris@16 589 {
Chris@16 590 ++m_position;
Chris@16 591 this->append_state(syntax_element_word_boundary);
Chris@16 592 }
Chris@16 593 else
Chris@16 594 result = parse_literal();
Chris@16 595 break;
Chris@16 596 case regex_constants::escape_type_not_word_assert:
Chris@16 597 if(this->flags() & regbase::emacs_ex)
Chris@16 598 {
Chris@16 599 ++m_position;
Chris@16 600 this->append_state(syntax_element_within_word);
Chris@16 601 }
Chris@16 602 else
Chris@16 603 result = parse_literal();
Chris@16 604 break;
Chris@16 605 case regex_constants::escape_type_left_word:
Chris@16 606 if(this->flags() & regbase::emacs_ex)
Chris@16 607 {
Chris@16 608 ++m_position;
Chris@16 609 this->append_state(syntax_element_word_start);
Chris@16 610 }
Chris@16 611 else
Chris@16 612 result = parse_literal();
Chris@16 613 break;
Chris@16 614 case regex_constants::escape_type_right_word:
Chris@16 615 if(this->flags() & regbase::emacs_ex)
Chris@16 616 {
Chris@16 617 ++m_position;
Chris@16 618 this->append_state(syntax_element_word_end);
Chris@16 619 }
Chris@16 620 else
Chris@16 621 result = parse_literal();
Chris@16 622 break;
Chris@16 623 default:
Chris@16 624 if(this->flags() & regbase::emacs_ex)
Chris@16 625 {
Chris@16 626 bool negate = true;
Chris@16 627 switch(*m_position)
Chris@16 628 {
Chris@16 629 case 'w':
Chris@16 630 negate = false;
Chris@16 631 BOOST_FALLTHROUGH;
Chris@16 632 case 'W':
Chris@16 633 {
Chris@16 634 basic_char_set<charT, traits> char_set;
Chris@16 635 if(negate)
Chris@16 636 char_set.negate();
Chris@16 637 char_set.add_class(this->m_word_mask);
Chris@16 638 if(0 == this->append_set(char_set))
Chris@16 639 {
Chris@16 640 fail(regex_constants::error_ctype, m_position - m_base);
Chris@16 641 return false;
Chris@16 642 }
Chris@16 643 ++m_position;
Chris@16 644 return true;
Chris@16 645 }
Chris@16 646 case 's':
Chris@16 647 negate = false;
Chris@16 648 BOOST_FALLTHROUGH;
Chris@16 649 case 'S':
Chris@16 650 return add_emacs_code(negate);
Chris@16 651 case 'c':
Chris@16 652 case 'C':
Chris@16 653 // not supported yet:
Chris@16 654 fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
Chris@16 655 return false;
Chris@16 656 default:
Chris@16 657 break;
Chris@16 658 }
Chris@16 659 }
Chris@16 660 result = parse_literal();
Chris@16 661 break;
Chris@16 662 }
Chris@16 663 return result;
Chris@16 664 }
Chris@16 665
Chris@16 666 template <class charT, class traits>
Chris@16 667 bool basic_regex_parser<charT, traits>::parse_extended_escape()
Chris@16 668 {
Chris@16 669 ++m_position;
Chris@16 670 if(m_position == m_end)
Chris@16 671 {
Chris@16 672 fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
Chris@16 673 return false;
Chris@16 674 }
Chris@16 675 bool negate = false; // in case this is a character class escape: \w \d etc
Chris@16 676 switch(this->m_traits.escape_syntax_type(*m_position))
Chris@16 677 {
Chris@16 678 case regex_constants::escape_type_not_class:
Chris@16 679 negate = true;
Chris@16 680 BOOST_FALLTHROUGH;
Chris@16 681 case regex_constants::escape_type_class:
Chris@16 682 {
Chris@16 683 escape_type_class_jump:
Chris@16 684 typedef typename traits::char_class_type m_type;
Chris@16 685 m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
Chris@16 686 if(m != 0)
Chris@16 687 {
Chris@16 688 basic_char_set<charT, traits> char_set;
Chris@16 689 if(negate)
Chris@16 690 char_set.negate();
Chris@16 691 char_set.add_class(m);
Chris@16 692 if(0 == this->append_set(char_set))
Chris@16 693 {
Chris@16 694 fail(regex_constants::error_ctype, m_position - m_base);
Chris@16 695 return false;
Chris@16 696 }
Chris@16 697 ++m_position;
Chris@16 698 return true;
Chris@16 699 }
Chris@16 700 //
Chris@16 701 // not a class, just a regular unknown escape:
Chris@16 702 //
Chris@16 703 this->append_literal(unescape_character());
Chris@16 704 break;
Chris@16 705 }
Chris@16 706 case regex_constants::syntax_digit:
Chris@16 707 return parse_backref();
Chris@16 708 case regex_constants::escape_type_left_word:
Chris@16 709 ++m_position;
Chris@16 710 this->append_state(syntax_element_word_start);
Chris@16 711 break;
Chris@16 712 case regex_constants::escape_type_right_word:
Chris@16 713 ++m_position;
Chris@16 714 this->append_state(syntax_element_word_end);
Chris@16 715 break;
Chris@16 716 case regex_constants::escape_type_start_buffer:
Chris@16 717 ++m_position;
Chris@16 718 this->append_state(syntax_element_buffer_start);
Chris@16 719 break;
Chris@16 720 case regex_constants::escape_type_end_buffer:
Chris@16 721 ++m_position;
Chris@16 722 this->append_state(syntax_element_buffer_end);
Chris@16 723 break;
Chris@16 724 case regex_constants::escape_type_word_assert:
Chris@16 725 ++m_position;
Chris@16 726 this->append_state(syntax_element_word_boundary);
Chris@16 727 break;
Chris@16 728 case regex_constants::escape_type_not_word_assert:
Chris@16 729 ++m_position;
Chris@16 730 this->append_state(syntax_element_within_word);
Chris@16 731 break;
Chris@16 732 case regex_constants::escape_type_Z:
Chris@16 733 ++m_position;
Chris@16 734 this->append_state(syntax_element_soft_buffer_end);
Chris@16 735 break;
Chris@16 736 case regex_constants::escape_type_Q:
Chris@16 737 return parse_QE();
Chris@16 738 case regex_constants::escape_type_C:
Chris@16 739 return parse_match_any();
Chris@16 740 case regex_constants::escape_type_X:
Chris@16 741 ++m_position;
Chris@16 742 this->append_state(syntax_element_combining);
Chris@16 743 break;
Chris@16 744 case regex_constants::escape_type_G:
Chris@16 745 ++m_position;
Chris@16 746 this->append_state(syntax_element_restart_continue);
Chris@16 747 break;
Chris@16 748 case regex_constants::escape_type_not_property:
Chris@16 749 negate = true;
Chris@16 750 BOOST_FALLTHROUGH;
Chris@16 751 case regex_constants::escape_type_property:
Chris@16 752 {
Chris@16 753 ++m_position;
Chris@16 754 char_class_type m;
Chris@16 755 if(m_position == m_end)
Chris@16 756 {
Chris@16 757 fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
Chris@16 758 return false;
Chris@16 759 }
Chris@16 760 // maybe have \p{ddd}
Chris@16 761 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
Chris@16 762 {
Chris@16 763 const charT* base = m_position;
Chris@16 764 // skip forward until we find enclosing brace:
Chris@16 765 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
Chris@16 766 ++m_position;
Chris@16 767 if(m_position == m_end)
Chris@16 768 {
Chris@16 769 fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
Chris@16 770 return false;
Chris@16 771 }
Chris@16 772 m = this->m_traits.lookup_classname(++base, m_position++);
Chris@16 773 }
Chris@16 774 else
Chris@16 775 {
Chris@16 776 m = this->m_traits.lookup_classname(m_position, m_position+1);
Chris@16 777 ++m_position;
Chris@16 778 }
Chris@16 779 if(m != 0)
Chris@16 780 {
Chris@16 781 basic_char_set<charT, traits> char_set;
Chris@16 782 if(negate)
Chris@16 783 char_set.negate();
Chris@16 784 char_set.add_class(m);
Chris@16 785 if(0 == this->append_set(char_set))
Chris@16 786 {
Chris@16 787 fail(regex_constants::error_ctype, m_position - m_base);
Chris@16 788 return false;
Chris@16 789 }
Chris@16 790 return true;
Chris@16 791 }
Chris@16 792 fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
Chris@16 793 return false;
Chris@16 794 }
Chris@16 795 case regex_constants::escape_type_reset_start_mark:
Chris@16 796 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
Chris@16 797 {
Chris@16 798 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
Chris@16 799 pb->index = -5;
Chris@16 800 pb->icase = this->flags() & regbase::icase;
Chris@16 801 this->m_pdata->m_data.align();
Chris@16 802 ++m_position;
Chris@16 803 return true;
Chris@16 804 }
Chris@16 805 goto escape_type_class_jump;
Chris@16 806 case regex_constants::escape_type_line_ending:
Chris@16 807 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
Chris@16 808 {
Chris@16 809 const charT* e = get_escape_R_string<charT>();
Chris@16 810 const charT* old_position = m_position;
Chris@16 811 const charT* old_end = m_end;
Chris@16 812 const charT* old_base = m_base;
Chris@16 813 m_position = e;
Chris@16 814 m_base = e;
Chris@16 815 m_end = e + traits::length(e);
Chris@16 816 bool r = parse_all();
Chris@16 817 m_position = ++old_position;
Chris@16 818 m_end = old_end;
Chris@16 819 m_base = old_base;
Chris@16 820 return r;
Chris@16 821 }
Chris@16 822 goto escape_type_class_jump;
Chris@16 823 case regex_constants::escape_type_extended_backref:
Chris@16 824 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
Chris@16 825 {
Chris@16 826 bool have_brace = false;
Chris@16 827 bool negative = false;
Chris@16 828 static const char* incomplete_message = "Incomplete \\g escape found.";
Chris@16 829 if(++m_position == m_end)
Chris@16 830 {
Chris@16 831 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
Chris@16 832 return false;
Chris@16 833 }
Chris@16 834 // maybe have \g{ddd}
Chris@16 835 regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
Chris@16 836 regex_constants::syntax_type syn_end = 0;
Chris@16 837 if((syn == regex_constants::syntax_open_brace)
Chris@16 838 || (syn == regex_constants::escape_type_left_word)
Chris@16 839 || (syn == regex_constants::escape_type_end_buffer))
Chris@16 840 {
Chris@16 841 if(++m_position == m_end)
Chris@16 842 {
Chris@16 843 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
Chris@16 844 return false;
Chris@16 845 }
Chris@16 846 have_brace = true;
Chris@16 847 switch(syn)
Chris@16 848 {
Chris@16 849 case regex_constants::syntax_open_brace:
Chris@16 850 syn_end = regex_constants::syntax_close_brace;
Chris@16 851 break;
Chris@16 852 case regex_constants::escape_type_left_word:
Chris@16 853 syn_end = regex_constants::escape_type_right_word;
Chris@16 854 break;
Chris@16 855 default:
Chris@16 856 syn_end = regex_constants::escape_type_end_buffer;
Chris@16 857 break;
Chris@16 858 }
Chris@16 859 }
Chris@16 860 negative = (*m_position == static_cast<charT>('-'));
Chris@16 861 if((negative) && (++m_position == m_end))
Chris@16 862 {
Chris@16 863 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
Chris@16 864 return false;
Chris@16 865 }
Chris@16 866 const charT* pc = m_position;
Chris@16 867 int i = this->m_traits.toi(pc, m_end, 10);
Chris@16 868 if((i < 0) && syn_end)
Chris@16 869 {
Chris@16 870 // Check for a named capture, get the leftmost one if there is more than one:
Chris@16 871 const charT* base = m_position;
Chris@16 872 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
Chris@16 873 {
Chris@16 874 ++m_position;
Chris@16 875 }
Chris@16 876 i = hash_value_from_capture_name(base, m_position);
Chris@16 877 pc = m_position;
Chris@16 878 }
Chris@16 879 if(negative)
Chris@16 880 i = 1 + m_mark_count - i;
Chris@16 881 if(((i > 0) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
Chris@16 882 {
Chris@16 883 m_position = pc;
Chris@16 884 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
Chris@16 885 pb->index = i;
Chris@16 886 pb->icase = this->flags() & regbase::icase;
Chris@16 887 }
Chris@16 888 else
Chris@16 889 {
Chris@16 890 fail(regex_constants::error_backref, m_position - m_base);
Chris@16 891 return false;
Chris@16 892 }
Chris@16 893 m_position = pc;
Chris@16 894 if(have_brace)
Chris@16 895 {
Chris@16 896 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
Chris@16 897 {
Chris@16 898 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
Chris@16 899 return false;
Chris@16 900 }
Chris@16 901 ++m_position;
Chris@16 902 }
Chris@16 903 return true;
Chris@16 904 }
Chris@16 905 goto escape_type_class_jump;
Chris@16 906 case regex_constants::escape_type_control_v:
Chris@16 907 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
Chris@16 908 goto escape_type_class_jump;
Chris@16 909 BOOST_FALLTHROUGH;
Chris@16 910 default:
Chris@16 911 this->append_literal(unescape_character());
Chris@16 912 break;
Chris@16 913 }
Chris@16 914 return true;
Chris@16 915 }
Chris@16 916
Chris@16 917 template <class charT, class traits>
Chris@16 918 bool basic_regex_parser<charT, traits>::parse_match_any()
Chris@16 919 {
Chris@16 920 //
Chris@16 921 // we have a '.' that can match any character:
Chris@16 922 //
Chris@16 923 ++m_position;
Chris@16 924 static_cast<re_dot*>(
Chris@16 925 this->append_state(syntax_element_wild, sizeof(re_dot))
Chris@16 926 )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
Chris@16 927 ? re_detail::force_not_newline
Chris@16 928 : this->flags() & regbase::mod_s ?
Chris@16 929 re_detail::force_newline : re_detail::dont_care);
Chris@16 930 return true;
Chris@16 931 }
Chris@16 932
Chris@16 933 template <class charT, class traits>
Chris@16 934 bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
Chris@16 935 {
Chris@16 936 bool greedy = true;
Chris@16 937 bool pocessive = false;
Chris@16 938 std::size_t insert_point;
Chris@16 939 //
Chris@16 940 // when we get to here we may have a non-greedy ? mark still to come:
Chris@16 941 //
Chris@16 942 if((m_position != m_end)
Chris@16 943 && (
Chris@16 944 (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
Chris@16 945 || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
Chris@16 946 )
Chris@16 947 )
Chris@16 948 {
Chris@16 949 // OK we have a perl or emacs regex, check for a '?':
Chris@16 950 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
Chris@16 951 {
Chris@16 952 greedy = false;
Chris@16 953 ++m_position;
Chris@16 954 }
Chris@16 955 // for perl regexes only check for pocessive ++ repeats.
Chris@16 956 if((m_position != m_end)
Chris@16 957 && (0 == (this->flags() & regbase::main_option_type))
Chris@16 958 && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
Chris@16 959 {
Chris@16 960 pocessive = true;
Chris@16 961 ++m_position;
Chris@16 962 }
Chris@16 963 }
Chris@16 964 if(0 == this->m_last_state)
Chris@16 965 {
Chris@16 966 fail(regex_constants::error_badrepeat, ::boost::re_detail::distance(m_base, m_position), "Nothing to repeat.");
Chris@16 967 return false;
Chris@16 968 }
Chris@16 969 if(this->m_last_state->type == syntax_element_endmark)
Chris@16 970 {
Chris@16 971 // insert a repeat before the '(' matching the last ')':
Chris@16 972 insert_point = this->m_paren_start;
Chris@16 973 }
Chris@16 974 else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
Chris@16 975 {
Chris@16 976 // the last state was a literal with more than one character, split it in two:
Chris@16 977 re_literal* lit = static_cast<re_literal*>(this->m_last_state);
Chris@16 978 charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
Chris@101 979 lit->length -= 1;
Chris@16 980 // now append new state:
Chris@16 981 lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
Chris@16 982 lit->length = 1;
Chris@16 983 (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
Chris@16 984 insert_point = this->getoffset(this->m_last_state);
Chris@16 985 }
Chris@16 986 else
Chris@16 987 {
Chris@16 988 // repeat the last state whatever it was, need to add some error checking here:
Chris@16 989 switch(this->m_last_state->type)
Chris@16 990 {
Chris@16 991 case syntax_element_start_line:
Chris@16 992 case syntax_element_end_line:
Chris@16 993 case syntax_element_word_boundary:
Chris@16 994 case syntax_element_within_word:
Chris@16 995 case syntax_element_word_start:
Chris@16 996 case syntax_element_word_end:
Chris@16 997 case syntax_element_buffer_start:
Chris@16 998 case syntax_element_buffer_end:
Chris@16 999 case syntax_element_alt:
Chris@16 1000 case syntax_element_soft_buffer_end:
Chris@16 1001 case syntax_element_restart_continue:
Chris@16 1002 case syntax_element_jump:
Chris@16 1003 case syntax_element_startmark:
Chris@16 1004 case syntax_element_backstep:
Chris@16 1005 // can't legally repeat any of the above:
Chris@16 1006 fail(regex_constants::error_badrepeat, m_position - m_base);
Chris@16 1007 return false;
Chris@16 1008 default:
Chris@16 1009 // do nothing...
Chris@16 1010 break;
Chris@16 1011 }
Chris@16 1012 insert_point = this->getoffset(this->m_last_state);
Chris@16 1013 }
Chris@16 1014 //
Chris@16 1015 // OK we now know what to repeat, so insert the repeat around it:
Chris@16 1016 //
Chris@16 1017 re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
Chris@16 1018 rep->min = low;
Chris@16 1019 rep->max = high;
Chris@16 1020 rep->greedy = greedy;
Chris@16 1021 rep->leading = false;
Chris@16 1022 // store our repeater position for later:
Chris@16 1023 std::ptrdiff_t rep_off = this->getoffset(rep);
Chris@16 1024 // and append a back jump to the repeat:
Chris@16 1025 re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
Chris@16 1026 jmp->alt.i = rep_off - this->getoffset(jmp);
Chris@16 1027 this->m_pdata->m_data.align();
Chris@16 1028 // now fill in the alt jump for the repeat:
Chris@16 1029 rep = static_cast<re_repeat*>(this->getaddress(rep_off));
Chris@16 1030 rep->alt.i = this->m_pdata->m_data.size() - rep_off;
Chris@16 1031 //
Chris@16 1032 // If the repeat is pocessive then bracket the repeat with a (?>...)
Chris@16 1033 // independent sub-expression construct:
Chris@16 1034 //
Chris@16 1035 if(pocessive)
Chris@16 1036 {
Chris@16 1037 if(m_position != m_end)
Chris@16 1038 {
Chris@16 1039 //
Chris@16 1040 // Check for illegal following quantifier, we have to do this here, because
Chris@16 1041 // the extra states we insert below circumvents our usual error checking :-(
Chris@16 1042 //
Chris@16 1043 switch(this->m_traits.syntax_type(*m_position))
Chris@16 1044 {
Chris@16 1045 case regex_constants::syntax_star:
Chris@16 1046 case regex_constants::syntax_plus:
Chris@16 1047 case regex_constants::syntax_question:
Chris@16 1048 case regex_constants::syntax_open_brace:
Chris@16 1049 fail(regex_constants::error_badrepeat, m_position - m_base);
Chris@16 1050 return false;
Chris@16 1051 }
Chris@16 1052 }
Chris@16 1053 re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
Chris@16 1054 pb->index = -3;
Chris@16 1055 pb->icase = this->flags() & regbase::icase;
Chris@16 1056 jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
Chris@16 1057 this->m_pdata->m_data.align();
Chris@16 1058 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
Chris@16 1059 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
Chris@16 1060 pb->index = -3;
Chris@16 1061 pb->icase = this->flags() & regbase::icase;
Chris@16 1062 }
Chris@16 1063 return true;
Chris@16 1064 }
Chris@16 1065
Chris@16 1066 template <class charT, class traits>
Chris@16 1067 bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
Chris@16 1068 {
Chris@16 1069 static const char* incomplete_message = "Missing } in quantified repetition.";
Chris@16 1070 //
Chris@16 1071 // parse a repeat-range:
Chris@16 1072 //
Chris@16 1073 std::size_t min, max;
Chris@16 1074 int v;
Chris@16 1075 // skip whitespace:
Chris@16 1076 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
Chris@16 1077 ++m_position;
Chris@16 1078 if(this->m_position == this->m_end)
Chris@16 1079 {
Chris@16 1080 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
Chris@16 1081 {
Chris@16 1082 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
Chris@16 1083 return false;
Chris@16 1084 }
Chris@16 1085 // Treat the opening '{' as a literal character, rewind to start of error:
Chris@16 1086 --m_position;
Chris@16 1087 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
Chris@16 1088 return parse_literal();
Chris@16 1089 }
Chris@16 1090 // get min:
Chris@16 1091 v = this->m_traits.toi(m_position, m_end, 10);
Chris@16 1092 // skip whitespace:
Chris@16 1093 if(v < 0)
Chris@16 1094 {
Chris@16 1095 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
Chris@16 1096 {
Chris@16 1097 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
Chris@16 1098 return false;
Chris@16 1099 }
Chris@16 1100 // Treat the opening '{' as a literal character, rewind to start of error:
Chris@16 1101 --m_position;
Chris@16 1102 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
Chris@16 1103 return parse_literal();
Chris@16 1104 }
Chris@16 1105 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
Chris@16 1106 ++m_position;
Chris@16 1107 if(this->m_position == this->m_end)
Chris@16 1108 {
Chris@16 1109 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
Chris@16 1110 {
Chris@16 1111 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
Chris@16 1112 return false;
Chris@16 1113 }
Chris@16 1114 // Treat the opening '{' as a literal character, rewind to start of error:
Chris@16 1115 --m_position;
Chris@16 1116 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
Chris@16 1117 return parse_literal();
Chris@16 1118 }
Chris@16 1119 min = v;
Chris@16 1120 // see if we have a comma:
Chris@16 1121 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
Chris@16 1122 {
Chris@16 1123 // move on and error check:
Chris@16 1124 ++m_position;
Chris@16 1125 // skip whitespace:
Chris@16 1126 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
Chris@16 1127 ++m_position;
Chris@16 1128 if(this->m_position == this->m_end)
Chris@16 1129 {
Chris@16 1130 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
Chris@16 1131 {
Chris@16 1132 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
Chris@16 1133 return false;
Chris@16 1134 }
Chris@16 1135 // Treat the opening '{' as a literal character, rewind to start of error:
Chris@16 1136 --m_position;
Chris@16 1137 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
Chris@16 1138 return parse_literal();
Chris@16 1139 }
Chris@16 1140 // get the value if any:
Chris@16 1141 v = this->m_traits.toi(m_position, m_end, 10);
Chris@16 1142 max = (v >= 0) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
Chris@16 1143 }
Chris@16 1144 else
Chris@16 1145 {
Chris@16 1146 // no comma, max = min:
Chris@16 1147 max = min;
Chris@16 1148 }
Chris@16 1149 // skip whitespace:
Chris@16 1150 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
Chris@16 1151 ++m_position;
Chris@16 1152 // OK now check trailing }:
Chris@16 1153 if(this->m_position == this->m_end)
Chris@16 1154 {
Chris@16 1155 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
Chris@16 1156 {
Chris@16 1157 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
Chris@16 1158 return false;
Chris@16 1159 }
Chris@16 1160 // Treat the opening '{' as a literal character, rewind to start of error:
Chris@16 1161 --m_position;
Chris@16 1162 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
Chris@16 1163 return parse_literal();
Chris@16 1164 }
Chris@16 1165 if(isbasic)
Chris@16 1166 {
Chris@16 1167 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
Chris@16 1168 {
Chris@16 1169 ++m_position;
Chris@16 1170 if(this->m_position == this->m_end)
Chris@16 1171 {
Chris@16 1172 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
Chris@16 1173 return false;
Chris@16 1174 }
Chris@16 1175 }
Chris@16 1176 else
Chris@16 1177 {
Chris@16 1178 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
Chris@16 1179 return false;
Chris@16 1180 }
Chris@16 1181 }
Chris@16 1182 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
Chris@16 1183 ++m_position;
Chris@16 1184 else
Chris@16 1185 {
Chris@16 1186 // Treat the opening '{' as a literal character, rewind to start of error:
Chris@16 1187 --m_position;
Chris@16 1188 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
Chris@16 1189 return parse_literal();
Chris@16 1190 }
Chris@16 1191 //
Chris@16 1192 // finally go and add the repeat, unless error:
Chris@16 1193 //
Chris@16 1194 if(min > max)
Chris@16 1195 {
Chris@16 1196 // Backtrack to error location:
Chris@16 1197 m_position -= 2;
Chris@16 1198 while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
Chris@16 1199 ++m_position;
Chris@16 1200 fail(regex_constants::error_badbrace, m_position - m_base);
Chris@16 1201 return false;
Chris@16 1202 }
Chris@16 1203 return parse_repeat(min, max);
Chris@16 1204 }
Chris@16 1205
Chris@16 1206 template <class charT, class traits>
Chris@16 1207 bool basic_regex_parser<charT, traits>::parse_alt()
Chris@16 1208 {
Chris@16 1209 //
Chris@16 1210 // error check: if there have been no previous states,
Chris@16 1211 // or if the last state was a '(' then error:
Chris@16 1212 //
Chris@16 1213 if(
Chris@16 1214 ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
Chris@16 1215 &&
Chris@16 1216 !(
Chris@16 1217 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
Chris@16 1218 &&
Chris@16 1219 ((this->flags() & regbase::no_empty_expressions) == 0)
Chris@16 1220 )
Chris@16 1221 )
Chris@16 1222 {
Chris@101 1223 fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |.");
Chris@16 1224 return false;
Chris@16 1225 }
Chris@16 1226 //
Chris@16 1227 // Reset mark count if required:
Chris@16 1228 //
Chris@16 1229 if(m_max_mark < m_mark_count)
Chris@16 1230 m_max_mark = m_mark_count;
Chris@16 1231 if(m_mark_reset >= 0)
Chris@16 1232 m_mark_count = m_mark_reset;
Chris@16 1233
Chris@16 1234 ++m_position;
Chris@16 1235 //
Chris@16 1236 // we need to append a trailing jump:
Chris@16 1237 //
Chris@16 1238 re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump));
Chris@16 1239 std::ptrdiff_t jump_offset = this->getoffset(pj);
Chris@16 1240 //
Chris@16 1241 // now insert the alternative:
Chris@16 1242 //
Chris@16 1243 re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
Chris@16 1244 jump_offset += re_alt_size;
Chris@16 1245 this->m_pdata->m_data.align();
Chris@16 1246 palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
Chris@16 1247 //
Chris@16 1248 // update m_alt_insert_point so that the next alternate gets
Chris@16 1249 // inserted at the start of the second of the two we've just created:
Chris@16 1250 //
Chris@16 1251 this->m_alt_insert_point = this->m_pdata->m_data.size();
Chris@16 1252 //
Chris@16 1253 // the start of this alternative must have a case changes state
Chris@16 1254 // if the current block has messed around with case changes:
Chris@16 1255 //
Chris@16 1256 if(m_has_case_change)
Chris@16 1257 {
Chris@16 1258 static_cast<re_case*>(
Chris@16 1259 this->append_state(syntax_element_toggle_case, sizeof(re_case))
Chris@16 1260 )->icase = this->m_icase;
Chris@16 1261 }
Chris@16 1262 //
Chris@16 1263 // push the alternative onto our stack, a recursive
Chris@16 1264 // implementation here is easier to understand (and faster
Chris@16 1265 // as it happens), but causes all kinds of stack overflow problems
Chris@16 1266 // on programs with small stacks (COM+).
Chris@16 1267 //
Chris@16 1268 m_alt_jumps.push_back(jump_offset);
Chris@16 1269 return true;
Chris@16 1270 }
Chris@16 1271
Chris@16 1272 template <class charT, class traits>
Chris@16 1273 bool basic_regex_parser<charT, traits>::parse_set()
Chris@16 1274 {
Chris@16 1275 static const char* incomplete_message = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
Chris@16 1276 ++m_position;
Chris@16 1277 if(m_position == m_end)
Chris@16 1278 {
Chris@16 1279 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1280 return false;
Chris@16 1281 }
Chris@16 1282 basic_char_set<charT, traits> char_set;
Chris@16 1283
Chris@16 1284 const charT* base = m_position; // where the '[' was
Chris@16 1285 const charT* item_base = m_position; // where the '[' or '^' was
Chris@16 1286
Chris@16 1287 while(m_position != m_end)
Chris@16 1288 {
Chris@16 1289 switch(this->m_traits.syntax_type(*m_position))
Chris@16 1290 {
Chris@16 1291 case regex_constants::syntax_caret:
Chris@16 1292 if(m_position == base)
Chris@16 1293 {
Chris@16 1294 char_set.negate();
Chris@16 1295 ++m_position;
Chris@16 1296 item_base = m_position;
Chris@16 1297 }
Chris@16 1298 else
Chris@16 1299 parse_set_literal(char_set);
Chris@16 1300 break;
Chris@16 1301 case regex_constants::syntax_close_set:
Chris@16 1302 if(m_position == item_base)
Chris@16 1303 {
Chris@16 1304 parse_set_literal(char_set);
Chris@16 1305 break;
Chris@16 1306 }
Chris@16 1307 else
Chris@16 1308 {
Chris@16 1309 ++m_position;
Chris@16 1310 if(0 == this->append_set(char_set))
Chris@16 1311 {
Chris@16 1312 fail(regex_constants::error_ctype, m_position - m_base);
Chris@16 1313 return false;
Chris@16 1314 }
Chris@16 1315 }
Chris@16 1316 return true;
Chris@16 1317 case regex_constants::syntax_open_set:
Chris@16 1318 if(parse_inner_set(char_set))
Chris@16 1319 break;
Chris@16 1320 return true;
Chris@16 1321 case regex_constants::syntax_escape:
Chris@16 1322 {
Chris@16 1323 //
Chris@16 1324 // look ahead and see if this is a character class shortcut
Chris@16 1325 // \d \w \s etc...
Chris@16 1326 //
Chris@16 1327 ++m_position;
Chris@16 1328 if(this->m_traits.escape_syntax_type(*m_position)
Chris@16 1329 == regex_constants::escape_type_class)
Chris@16 1330 {
Chris@16 1331 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
Chris@16 1332 if(m != 0)
Chris@16 1333 {
Chris@16 1334 char_set.add_class(m);
Chris@16 1335 ++m_position;
Chris@16 1336 break;
Chris@16 1337 }
Chris@16 1338 }
Chris@16 1339 else if(this->m_traits.escape_syntax_type(*m_position)
Chris@16 1340 == regex_constants::escape_type_not_class)
Chris@16 1341 {
Chris@16 1342 // negated character class:
Chris@16 1343 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
Chris@16 1344 if(m != 0)
Chris@16 1345 {
Chris@16 1346 char_set.add_negated_class(m);
Chris@16 1347 ++m_position;
Chris@16 1348 break;
Chris@16 1349 }
Chris@16 1350 }
Chris@16 1351 // not a character class, just a regular escape:
Chris@16 1352 --m_position;
Chris@16 1353 parse_set_literal(char_set);
Chris@16 1354 break;
Chris@16 1355 }
Chris@16 1356 default:
Chris@16 1357 parse_set_literal(char_set);
Chris@16 1358 break;
Chris@16 1359 }
Chris@16 1360 }
Chris@16 1361 return m_position != m_end;
Chris@16 1362 }
Chris@16 1363
Chris@16 1364 template <class charT, class traits>
Chris@16 1365 bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
Chris@16 1366 {
Chris@16 1367 static const char* incomplete_message = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
Chris@16 1368 //
Chris@16 1369 // we have either a character class [:name:]
Chris@16 1370 // a collating element [.name.]
Chris@16 1371 // or an equivalence class [=name=]
Chris@16 1372 //
Chris@16 1373 if(m_end == ++m_position)
Chris@16 1374 {
Chris@16 1375 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1376 return false;
Chris@16 1377 }
Chris@16 1378 switch(this->m_traits.syntax_type(*m_position))
Chris@16 1379 {
Chris@16 1380 case regex_constants::syntax_dot:
Chris@16 1381 //
Chris@16 1382 // a collating element is treated as a literal:
Chris@16 1383 //
Chris@16 1384 --m_position;
Chris@16 1385 parse_set_literal(char_set);
Chris@16 1386 return true;
Chris@16 1387 case regex_constants::syntax_colon:
Chris@16 1388 {
Chris@16 1389 // check that character classes are actually enabled:
Chris@16 1390 if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
Chris@16 1391 == (regbase::basic_syntax_group | regbase::no_char_classes))
Chris@16 1392 {
Chris@16 1393 --m_position;
Chris@16 1394 parse_set_literal(char_set);
Chris@16 1395 return true;
Chris@16 1396 }
Chris@16 1397 // skip the ':'
Chris@16 1398 if(m_end == ++m_position)
Chris@16 1399 {
Chris@16 1400 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1401 return false;
Chris@16 1402 }
Chris@16 1403 const charT* name_first = m_position;
Chris@16 1404 // skip at least one character, then find the matching ':]'
Chris@16 1405 if(m_end == ++m_position)
Chris@16 1406 {
Chris@16 1407 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1408 return false;
Chris@16 1409 }
Chris@16 1410 while((m_position != m_end)
Chris@16 1411 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
Chris@16 1412 ++m_position;
Chris@16 1413 const charT* name_last = m_position;
Chris@16 1414 if(m_end == m_position)
Chris@16 1415 {
Chris@16 1416 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1417 return false;
Chris@16 1418 }
Chris@16 1419 if((m_end == ++m_position)
Chris@16 1420 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
Chris@16 1421 {
Chris@16 1422 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1423 return false;
Chris@16 1424 }
Chris@16 1425 //
Chris@16 1426 // check for negated class:
Chris@16 1427 //
Chris@16 1428 bool negated = false;
Chris@16 1429 if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
Chris@16 1430 {
Chris@16 1431 ++name_first;
Chris@16 1432 negated = true;
Chris@16 1433 }
Chris@16 1434 typedef typename traits::char_class_type m_type;
Chris@16 1435 m_type m = this->m_traits.lookup_classname(name_first, name_last);
Chris@16 1436 if(m == 0)
Chris@16 1437 {
Chris@16 1438 if(char_set.empty() && (name_last - name_first == 1))
Chris@16 1439 {
Chris@16 1440 // maybe a special case:
Chris@16 1441 ++m_position;
Chris@16 1442 if( (m_position != m_end)
Chris@16 1443 && (this->m_traits.syntax_type(*m_position)
Chris@16 1444 == regex_constants::syntax_close_set))
Chris@16 1445 {
Chris@16 1446 if(this->m_traits.escape_syntax_type(*name_first)
Chris@16 1447 == regex_constants::escape_type_left_word)
Chris@16 1448 {
Chris@16 1449 ++m_position;
Chris@16 1450 this->append_state(syntax_element_word_start);
Chris@16 1451 return false;
Chris@16 1452 }
Chris@16 1453 if(this->m_traits.escape_syntax_type(*name_first)
Chris@16 1454 == regex_constants::escape_type_right_word)
Chris@16 1455 {
Chris@16 1456 ++m_position;
Chris@16 1457 this->append_state(syntax_element_word_end);
Chris@16 1458 return false;
Chris@16 1459 }
Chris@16 1460 }
Chris@16 1461 }
Chris@16 1462 fail(regex_constants::error_ctype, name_first - m_base);
Chris@16 1463 return false;
Chris@16 1464 }
Chris@16 1465 if(negated == false)
Chris@16 1466 char_set.add_class(m);
Chris@16 1467 else
Chris@16 1468 char_set.add_negated_class(m);
Chris@16 1469 ++m_position;
Chris@16 1470 break;
Chris@16 1471 }
Chris@16 1472 case regex_constants::syntax_equal:
Chris@16 1473 {
Chris@16 1474 // skip the '='
Chris@16 1475 if(m_end == ++m_position)
Chris@16 1476 {
Chris@16 1477 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1478 return false;
Chris@16 1479 }
Chris@16 1480 const charT* name_first = m_position;
Chris@16 1481 // skip at least one character, then find the matching '=]'
Chris@16 1482 if(m_end == ++m_position)
Chris@16 1483 {
Chris@16 1484 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1485 return false;
Chris@16 1486 }
Chris@16 1487 while((m_position != m_end)
Chris@16 1488 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
Chris@16 1489 ++m_position;
Chris@16 1490 const charT* name_last = m_position;
Chris@16 1491 if(m_end == m_position)
Chris@16 1492 {
Chris@16 1493 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1494 return false;
Chris@16 1495 }
Chris@16 1496 if((m_end == ++m_position)
Chris@16 1497 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
Chris@16 1498 {
Chris@16 1499 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
Chris@16 1500 return false;
Chris@16 1501 }
Chris@16 1502 string_type m = this->m_traits.lookup_collatename(name_first, name_last);
Chris@16 1503 if((0 == m.size()) || (m.size() > 2))
Chris@16 1504 {
Chris@16 1505 fail(regex_constants::error_collate, name_first - m_base);
Chris@16 1506 return false;
Chris@16 1507 }
Chris@16 1508 digraph<charT> d;
Chris@16 1509 d.first = m[0];
Chris@16 1510 if(m.size() > 1)
Chris@16 1511 d.second = m[1];
Chris@16 1512 else
Chris@16 1513 d.second = 0;
Chris@16 1514 char_set.add_equivalent(d);
Chris@16 1515 ++m_position;
Chris@16 1516 break;
Chris@16 1517 }
Chris@16 1518 default:
Chris@16 1519 --m_position;
Chris@16 1520 parse_set_literal(char_set);
Chris@16 1521 break;
Chris@16 1522 }
Chris@16 1523 return true;
Chris@16 1524 }
Chris@16 1525
Chris@16 1526 template <class charT, class traits>
Chris@16 1527 void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
Chris@16 1528 {
Chris@16 1529 digraph<charT> start_range(get_next_set_literal(char_set));
Chris@16 1530 if(m_end == m_position)
Chris@16 1531 {
Chris@16 1532 fail(regex_constants::error_brack, m_position - m_base);
Chris@16 1533 return;
Chris@16 1534 }
Chris@16 1535 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
Chris@16 1536 {
Chris@16 1537 // we have a range:
Chris@16 1538 if(m_end == ++m_position)
Chris@16 1539 {
Chris@16 1540 fail(regex_constants::error_brack, m_position - m_base);
Chris@16 1541 return;
Chris@16 1542 }
Chris@16 1543 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
Chris@16 1544 {
Chris@16 1545 digraph<charT> end_range = get_next_set_literal(char_set);
Chris@16 1546 char_set.add_range(start_range, end_range);
Chris@16 1547 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
Chris@16 1548 {
Chris@16 1549 if(m_end == ++m_position)
Chris@16 1550 {
Chris@16 1551 fail(regex_constants::error_brack, m_position - m_base);
Chris@16 1552 return;
Chris@16 1553 }
Chris@16 1554 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
Chris@16 1555 {
Chris@16 1556 // trailing - :
Chris@16 1557 --m_position;
Chris@16 1558 return;
Chris@16 1559 }
Chris@16 1560 fail(regex_constants::error_range, m_position - m_base);
Chris@16 1561 return;
Chris@16 1562 }
Chris@16 1563 return;
Chris@16 1564 }
Chris@16 1565 --m_position;
Chris@16 1566 }
Chris@16 1567 char_set.add_single(start_range);
Chris@16 1568 }
Chris@16 1569
Chris@16 1570 template <class charT, class traits>
Chris@16 1571 digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
Chris@16 1572 {
Chris@16 1573 digraph<charT> result;
Chris@16 1574 switch(this->m_traits.syntax_type(*m_position))
Chris@16 1575 {
Chris@16 1576 case regex_constants::syntax_dash:
Chris@16 1577 if(!char_set.empty())
Chris@16 1578 {
Chris@16 1579 // see if we are at the end of the set:
Chris@16 1580 if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
Chris@16 1581 {
Chris@16 1582 fail(regex_constants::error_range, m_position - m_base);
Chris@16 1583 return result;
Chris@16 1584 }
Chris@16 1585 --m_position;
Chris@16 1586 }
Chris@16 1587 result.first = *m_position++;
Chris@16 1588 return result;
Chris@16 1589 case regex_constants::syntax_escape:
Chris@16 1590 // check to see if escapes are supported first:
Chris@16 1591 if(this->flags() & regex_constants::no_escape_in_lists)
Chris@16 1592 {
Chris@16 1593 result = *m_position++;
Chris@16 1594 break;
Chris@16 1595 }
Chris@16 1596 ++m_position;
Chris@16 1597 result = unescape_character();
Chris@16 1598 break;
Chris@16 1599 case regex_constants::syntax_open_set:
Chris@16 1600 {
Chris@16 1601 if(m_end == ++m_position)
Chris@16 1602 {
Chris@16 1603 fail(regex_constants::error_collate, m_position - m_base);
Chris@16 1604 return result;
Chris@16 1605 }
Chris@16 1606 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
Chris@16 1607 {
Chris@16 1608 --m_position;
Chris@16 1609 result.first = *m_position;
Chris@16 1610 ++m_position;
Chris@16 1611 return result;
Chris@16 1612 }
Chris@16 1613 if(m_end == ++m_position)
Chris@16 1614 {
Chris@16 1615 fail(regex_constants::error_collate, m_position - m_base);
Chris@16 1616 return result;
Chris@16 1617 }
Chris@16 1618 const charT* name_first = m_position;
Chris@16 1619 // skip at least one character, then find the matching ':]'
Chris@16 1620 if(m_end == ++m_position)
Chris@16 1621 {
Chris@16 1622 fail(regex_constants::error_collate, name_first - m_base);
Chris@16 1623 return result;
Chris@16 1624 }
Chris@16 1625 while((m_position != m_end)
Chris@16 1626 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
Chris@16 1627 ++m_position;
Chris@16 1628 const charT* name_last = m_position;
Chris@16 1629 if(m_end == m_position)
Chris@16 1630 {
Chris@16 1631 fail(regex_constants::error_collate, name_first - m_base);
Chris@16 1632 return result;
Chris@16 1633 }
Chris@16 1634 if((m_end == ++m_position)
Chris@16 1635 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
Chris@16 1636 {
Chris@16 1637 fail(regex_constants::error_collate, name_first - m_base);
Chris@16 1638 return result;
Chris@16 1639 }
Chris@16 1640 ++m_position;
Chris@16 1641 string_type s = this->m_traits.lookup_collatename(name_first, name_last);
Chris@16 1642 if(s.empty() || (s.size() > 2))
Chris@16 1643 {
Chris@16 1644 fail(regex_constants::error_collate, name_first - m_base);
Chris@16 1645 return result;
Chris@16 1646 }
Chris@16 1647 result.first = s[0];
Chris@16 1648 if(s.size() > 1)
Chris@16 1649 result.second = s[1];
Chris@16 1650 else
Chris@16 1651 result.second = 0;
Chris@16 1652 return result;
Chris@16 1653 }
Chris@16 1654 default:
Chris@16 1655 result = *m_position++;
Chris@16 1656 }
Chris@16 1657 return result;
Chris@16 1658 }
Chris@16 1659
Chris@16 1660 //
Chris@16 1661 // does a value fit in the specified charT type?
Chris@16 1662 //
Chris@16 1663 template <class charT>
Chris@16 1664 bool valid_value(charT, int v, const mpl::true_&)
Chris@16 1665 {
Chris@16 1666 return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
Chris@16 1667 }
Chris@16 1668 template <class charT>
Chris@16 1669 bool valid_value(charT, int, const mpl::false_&)
Chris@16 1670 {
Chris@16 1671 return true; // v will alsways fit in a charT
Chris@16 1672 }
Chris@16 1673 template <class charT>
Chris@16 1674 bool valid_value(charT c, int v)
Chris@16 1675 {
Chris@16 1676 return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>());
Chris@16 1677 }
Chris@16 1678
Chris@16 1679 template <class charT, class traits>
Chris@16 1680 charT basic_regex_parser<charT, traits>::unescape_character()
Chris@16 1681 {
Chris@16 1682 #ifdef BOOST_MSVC
Chris@16 1683 #pragma warning(push)
Chris@16 1684 #pragma warning(disable:4127)
Chris@16 1685 #endif
Chris@16 1686 charT result(0);
Chris@16 1687 if(m_position == m_end)
Chris@16 1688 {
Chris@16 1689 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
Chris@16 1690 return false;
Chris@16 1691 }
Chris@16 1692 switch(this->m_traits.escape_syntax_type(*m_position))
Chris@16 1693 {
Chris@16 1694 case regex_constants::escape_type_control_a:
Chris@16 1695 result = charT('\a');
Chris@16 1696 break;
Chris@16 1697 case regex_constants::escape_type_e:
Chris@16 1698 result = charT(27);
Chris@16 1699 break;
Chris@16 1700 case regex_constants::escape_type_control_f:
Chris@16 1701 result = charT('\f');
Chris@16 1702 break;
Chris@16 1703 case regex_constants::escape_type_control_n:
Chris@16 1704 result = charT('\n');
Chris@16 1705 break;
Chris@16 1706 case regex_constants::escape_type_control_r:
Chris@16 1707 result = charT('\r');
Chris@16 1708 break;
Chris@16 1709 case regex_constants::escape_type_control_t:
Chris@16 1710 result = charT('\t');
Chris@16 1711 break;
Chris@16 1712 case regex_constants::escape_type_control_v:
Chris@16 1713 result = charT('\v');
Chris@16 1714 break;
Chris@16 1715 case regex_constants::escape_type_word_assert:
Chris@16 1716 result = charT('\b');
Chris@16 1717 break;
Chris@16 1718 case regex_constants::escape_type_ascii_control:
Chris@16 1719 ++m_position;
Chris@16 1720 if(m_position == m_end)
Chris@16 1721 {
Chris@16 1722 // Rewind to start of escape:
Chris@16 1723 --m_position;
Chris@16 1724 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1725 fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
Chris@16 1726 return result;
Chris@16 1727 }
Chris@16 1728 result = static_cast<charT>(*m_position % 32);
Chris@16 1729 break;
Chris@16 1730 case regex_constants::escape_type_hex:
Chris@16 1731 ++m_position;
Chris@16 1732 if(m_position == m_end)
Chris@16 1733 {
Chris@16 1734 // Rewind to start of escape:
Chris@16 1735 --m_position;
Chris@16 1736 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1737 fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
Chris@16 1738 return result;
Chris@16 1739 }
Chris@16 1740 // maybe have \x{ddd}
Chris@16 1741 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
Chris@16 1742 {
Chris@16 1743 ++m_position;
Chris@16 1744 if(m_position == m_end)
Chris@16 1745 {
Chris@16 1746 // Rewind to start of escape:
Chris@16 1747 --m_position;
Chris@16 1748 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1749 fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
Chris@16 1750 return result;
Chris@16 1751 }
Chris@16 1752 int i = this->m_traits.toi(m_position, m_end, 16);
Chris@16 1753 if((m_position == m_end)
Chris@16 1754 || (i < 0)
Chris@16 1755 || ((std::numeric_limits<charT>::is_specialized) && (i > (int)(std::numeric_limits<charT>::max)()))
Chris@16 1756 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
Chris@16 1757 {
Chris@16 1758 // Rewind to start of escape:
Chris@16 1759 --m_position;
Chris@16 1760 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1761 fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
Chris@16 1762 return result;
Chris@16 1763 }
Chris@16 1764 ++m_position;
Chris@16 1765 result = charT(i);
Chris@16 1766 }
Chris@16 1767 else
Chris@16 1768 {
Chris@16 1769 std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
Chris@16 1770 int i = this->m_traits.toi(m_position, m_position + len, 16);
Chris@16 1771 if((i < 0)
Chris@16 1772 || !valid_value(charT(0), i))
Chris@16 1773 {
Chris@16 1774 // Rewind to start of escape:
Chris@16 1775 --m_position;
Chris@16 1776 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1777 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
Chris@16 1778 return result;
Chris@16 1779 }
Chris@16 1780 result = charT(i);
Chris@16 1781 }
Chris@16 1782 return result;
Chris@16 1783 case regex_constants::syntax_digit:
Chris@16 1784 {
Chris@16 1785 // an octal escape sequence, the first character must be a zero
Chris@16 1786 // followed by up to 3 octal digits:
Chris@16 1787 std::ptrdiff_t len = (std::min)(::boost::re_detail::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
Chris@16 1788 const charT* bp = m_position;
Chris@16 1789 int val = this->m_traits.toi(bp, bp + 1, 8);
Chris@16 1790 if(val != 0)
Chris@16 1791 {
Chris@16 1792 // Rewind to start of escape:
Chris@16 1793 --m_position;
Chris@16 1794 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1795 // Oops not an octal escape after all:
Chris@16 1796 fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
Chris@16 1797 return result;
Chris@16 1798 }
Chris@16 1799 val = this->m_traits.toi(m_position, m_position + len, 8);
Chris@16 1800 if(val < 0)
Chris@16 1801 {
Chris@16 1802 // Rewind to start of escape:
Chris@16 1803 --m_position;
Chris@16 1804 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1805 fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
Chris@16 1806 return result;
Chris@16 1807 }
Chris@16 1808 return static_cast<charT>(val);
Chris@16 1809 }
Chris@16 1810 case regex_constants::escape_type_named_char:
Chris@16 1811 {
Chris@16 1812 ++m_position;
Chris@16 1813 if(m_position == m_end)
Chris@16 1814 {
Chris@16 1815 // Rewind to start of escape:
Chris@16 1816 --m_position;
Chris@16 1817 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1818 fail(regex_constants::error_escape, m_position - m_base);
Chris@16 1819 return false;
Chris@16 1820 }
Chris@16 1821 // maybe have \N{name}
Chris@16 1822 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
Chris@16 1823 {
Chris@16 1824 const charT* base = m_position;
Chris@16 1825 // skip forward until we find enclosing brace:
Chris@16 1826 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
Chris@16 1827 ++m_position;
Chris@16 1828 if(m_position == m_end)
Chris@16 1829 {
Chris@16 1830 // Rewind to start of escape:
Chris@16 1831 --m_position;
Chris@16 1832 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1833 fail(regex_constants::error_escape, m_position - m_base);
Chris@16 1834 return false;
Chris@16 1835 }
Chris@16 1836 string_type s = this->m_traits.lookup_collatename(++base, m_position++);
Chris@16 1837 if(s.empty())
Chris@16 1838 {
Chris@16 1839 // Rewind to start of escape:
Chris@16 1840 --m_position;
Chris@16 1841 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1842 fail(regex_constants::error_collate, m_position - m_base);
Chris@16 1843 return false;
Chris@16 1844 }
Chris@16 1845 if(s.size() == 1)
Chris@16 1846 {
Chris@16 1847 return s[0];
Chris@16 1848 }
Chris@16 1849 }
Chris@16 1850 // fall through is a failure:
Chris@16 1851 // Rewind to start of escape:
Chris@16 1852 --m_position;
Chris@16 1853 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1854 fail(regex_constants::error_escape, m_position - m_base);
Chris@16 1855 return false;
Chris@16 1856 }
Chris@16 1857 default:
Chris@16 1858 result = *m_position;
Chris@16 1859 break;
Chris@16 1860 }
Chris@16 1861 ++m_position;
Chris@16 1862 return result;
Chris@16 1863 #ifdef BOOST_MSVC
Chris@16 1864 #pragma warning(pop)
Chris@16 1865 #endif
Chris@16 1866 }
Chris@16 1867
Chris@16 1868 template <class charT, class traits>
Chris@16 1869 bool basic_regex_parser<charT, traits>::parse_backref()
Chris@16 1870 {
Chris@16 1871 BOOST_ASSERT(m_position != m_end);
Chris@16 1872 const charT* pc = m_position;
Chris@16 1873 int i = this->m_traits.toi(pc, pc + 1, 10);
Chris@16 1874 if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
Chris@16 1875 {
Chris@16 1876 // not a backref at all but an octal escape sequence:
Chris@16 1877 charT c = unescape_character();
Chris@16 1878 this->append_literal(c);
Chris@16 1879 }
Chris@16 1880 else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
Chris@16 1881 {
Chris@16 1882 m_position = pc;
Chris@16 1883 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
Chris@16 1884 pb->index = i;
Chris@16 1885 pb->icase = this->flags() & regbase::icase;
Chris@16 1886 }
Chris@16 1887 else
Chris@16 1888 {
Chris@16 1889 // Rewind to start of escape:
Chris@16 1890 --m_position;
Chris@16 1891 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 1892 fail(regex_constants::error_backref, m_position - m_base);
Chris@16 1893 return false;
Chris@16 1894 }
Chris@16 1895 return true;
Chris@16 1896 }
Chris@16 1897
Chris@16 1898 template <class charT, class traits>
Chris@16 1899 bool basic_regex_parser<charT, traits>::parse_QE()
Chris@16 1900 {
Chris@16 1901 #ifdef BOOST_MSVC
Chris@16 1902 #pragma warning(push)
Chris@16 1903 #pragma warning(disable:4127)
Chris@16 1904 #endif
Chris@16 1905 //
Chris@16 1906 // parse a \Q...\E sequence:
Chris@16 1907 //
Chris@16 1908 ++m_position; // skip the Q
Chris@16 1909 const charT* start = m_position;
Chris@16 1910 const charT* end;
Chris@16 1911 do
Chris@16 1912 {
Chris@16 1913 while((m_position != m_end)
Chris@16 1914 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
Chris@16 1915 ++m_position;
Chris@16 1916 if(m_position == m_end)
Chris@16 1917 {
Chris@16 1918 // a \Q...\E sequence may terminate with the end of the expression:
Chris@16 1919 end = m_position;
Chris@16 1920 break;
Chris@16 1921 }
Chris@16 1922 if(++m_position == m_end) // skip the escape
Chris@16 1923 {
Chris@16 1924 fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
Chris@16 1925 return false;
Chris@16 1926 }
Chris@16 1927 // check to see if it's a \E:
Chris@16 1928 if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
Chris@16 1929 {
Chris@16 1930 ++m_position;
Chris@16 1931 end = m_position - 2;
Chris@16 1932 break;
Chris@16 1933 }
Chris@16 1934 // otherwise go round again:
Chris@16 1935 }while(true);
Chris@16 1936 //
Chris@16 1937 // now add all the character between the two escapes as literals:
Chris@16 1938 //
Chris@16 1939 while(start != end)
Chris@16 1940 {
Chris@16 1941 this->append_literal(*start);
Chris@16 1942 ++start;
Chris@16 1943 }
Chris@16 1944 return true;
Chris@16 1945 #ifdef BOOST_MSVC
Chris@16 1946 #pragma warning(pop)
Chris@16 1947 #endif
Chris@16 1948 }
Chris@16 1949
Chris@16 1950 template <class charT, class traits>
Chris@16 1951 bool basic_regex_parser<charT, traits>::parse_perl_extension()
Chris@16 1952 {
Chris@16 1953 if(++m_position == m_end)
Chris@16 1954 {
Chris@16 1955 // Rewind to start of (? sequence:
Chris@16 1956 --m_position;
Chris@16 1957 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 1958 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 1959 return false;
Chris@16 1960 }
Chris@16 1961 //
Chris@16 1962 // treat comments as a special case, as these
Chris@16 1963 // are the only ones that don't start with a leading
Chris@16 1964 // startmark state:
Chris@16 1965 //
Chris@16 1966 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
Chris@16 1967 {
Chris@16 1968 while((m_position != m_end)
Chris@16 1969 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
Chris@16 1970 {}
Chris@16 1971 return true;
Chris@16 1972 }
Chris@16 1973 //
Chris@16 1974 // backup some state, and prepare the way:
Chris@16 1975 //
Chris@16 1976 int markid = 0;
Chris@16 1977 std::ptrdiff_t jump_offset = 0;
Chris@16 1978 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
Chris@16 1979 pb->icase = this->flags() & regbase::icase;
Chris@16 1980 std::ptrdiff_t last_paren_start = this->getoffset(pb);
Chris@16 1981 // back up insertion point for alternations, and set new point:
Chris@16 1982 std::ptrdiff_t last_alt_point = m_alt_insert_point;
Chris@16 1983 this->m_pdata->m_data.align();
Chris@16 1984 m_alt_insert_point = this->m_pdata->m_data.size();
Chris@16 1985 std::ptrdiff_t expected_alt_point = m_alt_insert_point;
Chris@16 1986 bool restore_flags = true;
Chris@16 1987 regex_constants::syntax_option_type old_flags = this->flags();
Chris@16 1988 bool old_case_change = m_has_case_change;
Chris@16 1989 m_has_case_change = false;
Chris@16 1990 charT name_delim;
Chris@16 1991 int mark_reset = m_mark_reset;
Chris@16 1992 int max_mark = m_max_mark;
Chris@16 1993 m_mark_reset = -1;
Chris@16 1994 m_max_mark = m_mark_count;
Chris@16 1995 int v;
Chris@16 1996 //
Chris@16 1997 // select the actual extension used:
Chris@16 1998 //
Chris@16 1999 switch(this->m_traits.syntax_type(*m_position))
Chris@16 2000 {
Chris@16 2001 case regex_constants::syntax_or:
Chris@16 2002 m_mark_reset = m_mark_count;
Chris@16 2003 BOOST_FALLTHROUGH;
Chris@16 2004 case regex_constants::syntax_colon:
Chris@16 2005 //
Chris@16 2006 // a non-capturing mark:
Chris@16 2007 //
Chris@16 2008 pb->index = markid = 0;
Chris@16 2009 ++m_position;
Chris@16 2010 break;
Chris@16 2011 case regex_constants::syntax_digit:
Chris@16 2012 {
Chris@16 2013 //
Chris@16 2014 // a recursive subexpression:
Chris@16 2015 //
Chris@16 2016 v = this->m_traits.toi(m_position, m_end, 10);
Chris@16 2017 if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
Chris@16 2018 {
Chris@16 2019 // Rewind to start of (? sequence:
Chris@16 2020 --m_position;
Chris@16 2021 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2022 fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
Chris@16 2023 return false;
Chris@16 2024 }
Chris@16 2025 insert_recursion:
Chris@16 2026 pb->index = markid = 0;
Chris@16 2027 re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
Chris@16 2028 pr->alt.i = v;
Chris@16 2029 pr->state_id = 0;
Chris@16 2030 static_cast<re_case*>(
Chris@16 2031 this->append_state(syntax_element_toggle_case, sizeof(re_case))
Chris@16 2032 )->icase = this->flags() & regbase::icase;
Chris@16 2033 break;
Chris@16 2034 }
Chris@16 2035 case regex_constants::syntax_plus:
Chris@16 2036 //
Chris@16 2037 // A forward-relative recursive subexpression:
Chris@16 2038 //
Chris@16 2039 ++m_position;
Chris@16 2040 v = this->m_traits.toi(m_position, m_end, 10);
Chris@16 2041 if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
Chris@16 2042 {
Chris@16 2043 // Rewind to start of (? sequence:
Chris@16 2044 --m_position;
Chris@16 2045 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2046 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
Chris@16 2047 return false;
Chris@16 2048 }
Chris@16 2049 v += m_mark_count;
Chris@16 2050 goto insert_recursion;
Chris@16 2051 case regex_constants::syntax_dash:
Chris@16 2052 //
Chris@16 2053 // Possibly a backward-relative recursive subexpression:
Chris@16 2054 //
Chris@16 2055 ++m_position;
Chris@16 2056 v = this->m_traits.toi(m_position, m_end, 10);
Chris@16 2057 if(v <= 0)
Chris@16 2058 {
Chris@16 2059 --m_position;
Chris@16 2060 // Oops not a relative recursion at all, but a (?-imsx) group:
Chris@16 2061 goto option_group_jump;
Chris@16 2062 }
Chris@16 2063 v = m_mark_count + 1 - v;
Chris@16 2064 if(v <= 0)
Chris@16 2065 {
Chris@16 2066 // Rewind to start of (? sequence:
Chris@16 2067 --m_position;
Chris@16 2068 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2069 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
Chris@16 2070 return false;
Chris@16 2071 }
Chris@16 2072 goto insert_recursion;
Chris@16 2073 case regex_constants::syntax_equal:
Chris@16 2074 pb->index = markid = -1;
Chris@16 2075 ++m_position;
Chris@16 2076 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
Chris@16 2077 this->m_pdata->m_data.align();
Chris@16 2078 m_alt_insert_point = this->m_pdata->m_data.size();
Chris@16 2079 break;
Chris@16 2080 case regex_constants::syntax_not:
Chris@16 2081 pb->index = markid = -2;
Chris@16 2082 ++m_position;
Chris@16 2083 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
Chris@16 2084 this->m_pdata->m_data.align();
Chris@16 2085 m_alt_insert_point = this->m_pdata->m_data.size();
Chris@16 2086 break;
Chris@16 2087 case regex_constants::escape_type_left_word:
Chris@16 2088 {
Chris@16 2089 // a lookbehind assertion:
Chris@16 2090 if(++m_position == m_end)
Chris@16 2091 {
Chris@16 2092 // Rewind to start of (? sequence:
Chris@16 2093 --m_position;
Chris@16 2094 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2095 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2096 return false;
Chris@16 2097 }
Chris@16 2098 regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
Chris@16 2099 if(t == regex_constants::syntax_not)
Chris@16 2100 pb->index = markid = -2;
Chris@16 2101 else if(t == regex_constants::syntax_equal)
Chris@16 2102 pb->index = markid = -1;
Chris@16 2103 else
Chris@16 2104 {
Chris@16 2105 // Probably a named capture which also starts (?< :
Chris@16 2106 name_delim = '>';
Chris@16 2107 --m_position;
Chris@16 2108 goto named_capture_jump;
Chris@16 2109 }
Chris@16 2110 ++m_position;
Chris@16 2111 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
Chris@16 2112 this->append_state(syntax_element_backstep, sizeof(re_brace));
Chris@16 2113 this->m_pdata->m_data.align();
Chris@16 2114 m_alt_insert_point = this->m_pdata->m_data.size();
Chris@16 2115 break;
Chris@16 2116 }
Chris@16 2117 case regex_constants::escape_type_right_word:
Chris@16 2118 //
Chris@16 2119 // an independent sub-expression:
Chris@16 2120 //
Chris@16 2121 pb->index = markid = -3;
Chris@16 2122 ++m_position;
Chris@16 2123 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
Chris@16 2124 this->m_pdata->m_data.align();
Chris@16 2125 m_alt_insert_point = this->m_pdata->m_data.size();
Chris@16 2126 break;
Chris@16 2127 case regex_constants::syntax_open_mark:
Chris@16 2128 {
Chris@16 2129 // a conditional expression:
Chris@16 2130 pb->index = markid = -4;
Chris@16 2131 if(++m_position == m_end)
Chris@16 2132 {
Chris@16 2133 // Rewind to start of (? sequence:
Chris@16 2134 --m_position;
Chris@16 2135 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2136 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2137 return false;
Chris@16 2138 }
Chris@16 2139 v = this->m_traits.toi(m_position, m_end, 10);
Chris@16 2140 if(m_position == m_end)
Chris@16 2141 {
Chris@16 2142 // Rewind to start of (? sequence:
Chris@16 2143 --m_position;
Chris@16 2144 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2145 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2146 return false;
Chris@16 2147 }
Chris@16 2148 if(*m_position == charT('R'))
Chris@16 2149 {
Chris@16 2150 if(++m_position == m_end)
Chris@16 2151 {
Chris@16 2152 // Rewind to start of (? sequence:
Chris@16 2153 --m_position;
Chris@16 2154 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2155 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2156 return false;
Chris@16 2157 }
Chris@16 2158 if(*m_position == charT('&'))
Chris@16 2159 {
Chris@16 2160 const charT* base = ++m_position;
Chris@16 2161 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
Chris@16 2162 ++m_position;
Chris@16 2163 if(m_position == m_end)
Chris@16 2164 {
Chris@16 2165 // Rewind to start of (? sequence:
Chris@16 2166 --m_position;
Chris@16 2167 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2168 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2169 return false;
Chris@16 2170 }
Chris@16 2171 v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
Chris@16 2172 }
Chris@16 2173 else
Chris@16 2174 {
Chris@16 2175 v = -this->m_traits.toi(m_position, m_end, 10);
Chris@16 2176 }
Chris@16 2177 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
Chris@16 2178 br->index = v < 0 ? (v - 1) : 0;
Chris@16 2179 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
Chris@16 2180 {
Chris@16 2181 // Rewind to start of (? sequence:
Chris@16 2182 --m_position;
Chris@16 2183 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2184 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2185 return false;
Chris@16 2186 }
Chris@16 2187 if(++m_position == m_end)
Chris@16 2188 {
Chris@16 2189 // Rewind to start of (? sequence:
Chris@16 2190 --m_position;
Chris@16 2191 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2192 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2193 return false;
Chris@16 2194 }
Chris@16 2195 }
Chris@16 2196 else if((*m_position == charT('\'')) || (*m_position == charT('<')))
Chris@16 2197 {
Chris@16 2198 const charT* base = ++m_position;
Chris@16 2199 while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
Chris@16 2200 ++m_position;
Chris@16 2201 if(m_position == m_end)
Chris@16 2202 {
Chris@16 2203 // Rewind to start of (? sequence:
Chris@16 2204 --m_position;
Chris@16 2205 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2206 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2207 return false;
Chris@16 2208 }
Chris@16 2209 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
Chris@16 2210 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
Chris@16 2211 br->index = v;
Chris@16 2212 if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
Chris@16 2213 {
Chris@16 2214 // Rewind to start of (? sequence:
Chris@16 2215 --m_position;
Chris@16 2216 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2217 fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
Chris@16 2218 return false;
Chris@16 2219 }
Chris@16 2220 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
Chris@16 2221 {
Chris@16 2222 // Rewind to start of (? sequence:
Chris@16 2223 --m_position;
Chris@16 2224 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2225 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2226 return false;
Chris@16 2227 }
Chris@16 2228 if(++m_position == m_end)
Chris@16 2229 {
Chris@16 2230 // Rewind to start of (? sequence:
Chris@16 2231 --m_position;
Chris@16 2232 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2233 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2234 return false;
Chris@16 2235 }
Chris@16 2236 }
Chris@16 2237 else if(*m_position == charT('D'))
Chris@16 2238 {
Chris@16 2239 const char* def = "DEFINE";
Chris@16 2240 while(*def && (m_position != m_end) && (*m_position == charT(*def)))
Chris@16 2241 ++m_position, ++def;
Chris@16 2242 if((m_position == m_end) || *def)
Chris@16 2243 {
Chris@16 2244 // Rewind to start of (? sequence:
Chris@16 2245 --m_position;
Chris@16 2246 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2247 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2248 return false;
Chris@16 2249 }
Chris@16 2250 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
Chris@16 2251 br->index = 9999; // special magic value!
Chris@16 2252 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
Chris@16 2253 {
Chris@16 2254 // Rewind to start of (? sequence:
Chris@16 2255 --m_position;
Chris@16 2256 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2257 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2258 return false;
Chris@16 2259 }
Chris@16 2260 if(++m_position == m_end)
Chris@16 2261 {
Chris@16 2262 // Rewind to start of (? sequence:
Chris@16 2263 --m_position;
Chris@16 2264 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2265 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2266 return false;
Chris@16 2267 }
Chris@16 2268 }
Chris@16 2269 else if(v > 0)
Chris@16 2270 {
Chris@16 2271 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
Chris@16 2272 br->index = v;
Chris@16 2273 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
Chris@16 2274 {
Chris@16 2275 // Rewind to start of (? sequence:
Chris@16 2276 --m_position;
Chris@16 2277 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2278 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2279 return false;
Chris@16 2280 }
Chris@16 2281 if(++m_position == m_end)
Chris@16 2282 {
Chris@16 2283 // Rewind to start of (? sequence:
Chris@16 2284 --m_position;
Chris@16 2285 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2286 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2287 return false;
Chris@16 2288 }
Chris@16 2289 }
Chris@16 2290 else
Chris@16 2291 {
Chris@16 2292 // verify that we have a lookahead or lookbehind assert:
Chris@16 2293 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
Chris@16 2294 {
Chris@16 2295 // Rewind to start of (? sequence:
Chris@16 2296 --m_position;
Chris@16 2297 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2298 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2299 return false;
Chris@16 2300 }
Chris@16 2301 if(++m_position == m_end)
Chris@16 2302 {
Chris@16 2303 // Rewind to start of (? sequence:
Chris@16 2304 --m_position;
Chris@16 2305 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2306 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2307 return false;
Chris@16 2308 }
Chris@16 2309 if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
Chris@16 2310 {
Chris@16 2311 if(++m_position == m_end)
Chris@16 2312 {
Chris@16 2313 // Rewind to start of (? sequence:
Chris@16 2314 --m_position;
Chris@16 2315 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2316 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2317 return false;
Chris@16 2318 }
Chris@16 2319 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
Chris@16 2320 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
Chris@16 2321 {
Chris@16 2322 // Rewind to start of (? sequence:
Chris@16 2323 --m_position;
Chris@16 2324 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2325 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2326 return false;
Chris@16 2327 }
Chris@16 2328 m_position -= 3;
Chris@16 2329 }
Chris@16 2330 else
Chris@16 2331 {
Chris@16 2332 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
Chris@16 2333 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
Chris@16 2334 {
Chris@16 2335 // Rewind to start of (? sequence:
Chris@16 2336 --m_position;
Chris@16 2337 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2338 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2339 return false;
Chris@16 2340 }
Chris@16 2341 m_position -= 2;
Chris@16 2342 }
Chris@16 2343 }
Chris@16 2344 break;
Chris@16 2345 }
Chris@16 2346 case regex_constants::syntax_close_mark:
Chris@16 2347 // Rewind to start of (? sequence:
Chris@16 2348 --m_position;
Chris@16 2349 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2350 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2351 return false;
Chris@16 2352 case regex_constants::escape_type_end_buffer:
Chris@16 2353 {
Chris@16 2354 name_delim = *m_position;
Chris@16 2355 named_capture_jump:
Chris@16 2356 markid = 0;
Chris@16 2357 if(0 == (this->flags() & regbase::nosubs))
Chris@16 2358 {
Chris@16 2359 markid = ++m_mark_count;
Chris@16 2360 #ifndef BOOST_NO_STD_DISTANCE
Chris@16 2361 if(this->flags() & regbase::save_subexpression_location)
Chris@16 2362 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
Chris@16 2363 #else
Chris@16 2364 if(this->flags() & regbase::save_subexpression_location)
Chris@16 2365 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
Chris@16 2366 #endif
Chris@16 2367 }
Chris@16 2368 pb->index = markid;
Chris@16 2369 const charT* base = ++m_position;
Chris@16 2370 if(m_position == m_end)
Chris@16 2371 {
Chris@16 2372 // Rewind to start of (? sequence:
Chris@16 2373 --m_position;
Chris@16 2374 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2375 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2376 return false;
Chris@16 2377 }
Chris@16 2378 while((m_position != m_end) && (*m_position != name_delim))
Chris@16 2379 ++m_position;
Chris@16 2380 if(m_position == m_end)
Chris@16 2381 {
Chris@16 2382 // Rewind to start of (? sequence:
Chris@16 2383 --m_position;
Chris@16 2384 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2385 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2386 return false;
Chris@16 2387 }
Chris@16 2388 this->m_pdata->set_name(base, m_position, markid);
Chris@16 2389 ++m_position;
Chris@16 2390 break;
Chris@16 2391 }
Chris@16 2392 default:
Chris@16 2393 if(*m_position == charT('R'))
Chris@16 2394 {
Chris@16 2395 ++m_position;
Chris@16 2396 v = 0;
Chris@16 2397 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
Chris@16 2398 {
Chris@16 2399 // Rewind to start of (? sequence:
Chris@16 2400 --m_position;
Chris@16 2401 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2402 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2403 return false;
Chris@16 2404 }
Chris@16 2405 goto insert_recursion;
Chris@16 2406 }
Chris@16 2407 if(*m_position == charT('&'))
Chris@16 2408 {
Chris@16 2409 ++m_position;
Chris@16 2410 const charT* base = m_position;
Chris@16 2411 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
Chris@16 2412 ++m_position;
Chris@16 2413 if(m_position == m_end)
Chris@16 2414 {
Chris@16 2415 // Rewind to start of (? sequence:
Chris@16 2416 --m_position;
Chris@16 2417 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2418 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2419 return false;
Chris@16 2420 }
Chris@16 2421 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
Chris@16 2422 goto insert_recursion;
Chris@16 2423 }
Chris@16 2424 if(*m_position == charT('P'))
Chris@16 2425 {
Chris@16 2426 ++m_position;
Chris@16 2427 if(m_position == m_end)
Chris@16 2428 {
Chris@16 2429 // Rewind to start of (? sequence:
Chris@16 2430 --m_position;
Chris@16 2431 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2432 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2433 return false;
Chris@16 2434 }
Chris@16 2435 if(*m_position == charT('>'))
Chris@16 2436 {
Chris@16 2437 ++m_position;
Chris@16 2438 const charT* base = m_position;
Chris@16 2439 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
Chris@16 2440 ++m_position;
Chris@16 2441 if(m_position == m_end)
Chris@16 2442 {
Chris@16 2443 // Rewind to start of (? sequence:
Chris@16 2444 --m_position;
Chris@16 2445 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2446 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2447 return false;
Chris@16 2448 }
Chris@16 2449 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
Chris@16 2450 goto insert_recursion;
Chris@16 2451 }
Chris@16 2452 }
Chris@16 2453 //
Chris@16 2454 // lets assume that we have a (?imsx) group and try and parse it:
Chris@16 2455 //
Chris@16 2456 option_group_jump:
Chris@16 2457 regex_constants::syntax_option_type opts = parse_options();
Chris@16 2458 if(m_position == m_end)
Chris@16 2459 {
Chris@16 2460 // Rewind to start of (? sequence:
Chris@16 2461 --m_position;
Chris@16 2462 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2463 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2464 return false;
Chris@16 2465 }
Chris@16 2466 // make a note of whether we have a case change:
Chris@16 2467 m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
Chris@16 2468 pb->index = markid = 0;
Chris@16 2469 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
Chris@16 2470 {
Chris@16 2471 // update flags and carry on as normal:
Chris@16 2472 this->flags(opts);
Chris@16 2473 restore_flags = false;
Chris@16 2474 old_case_change |= m_has_case_change; // defer end of scope by one ')'
Chris@16 2475 }
Chris@16 2476 else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
Chris@16 2477 {
Chris@16 2478 // update flags and carry on until the matching ')' is found:
Chris@16 2479 this->flags(opts);
Chris@16 2480 ++m_position;
Chris@16 2481 }
Chris@16 2482 else
Chris@16 2483 {
Chris@16 2484 // Rewind to start of (? sequence:
Chris@16 2485 --m_position;
Chris@16 2486 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2487 fail(regex_constants::error_perl_extension, m_position - m_base);
Chris@16 2488 return false;
Chris@16 2489 }
Chris@16 2490
Chris@16 2491 // finally append a case change state if we need it:
Chris@16 2492 if(m_has_case_change)
Chris@16 2493 {
Chris@16 2494 static_cast<re_case*>(
Chris@16 2495 this->append_state(syntax_element_toggle_case, sizeof(re_case))
Chris@16 2496 )->icase = opts & regbase::icase;
Chris@16 2497 }
Chris@16 2498
Chris@16 2499 }
Chris@16 2500 //
Chris@16 2501 // now recursively add more states, this will terminate when we get to a
Chris@16 2502 // matching ')' :
Chris@16 2503 //
Chris@16 2504 parse_all();
Chris@16 2505 //
Chris@16 2506 // Unwind alternatives:
Chris@16 2507 //
Chris@16 2508 if(0 == unwind_alts(last_paren_start))
Chris@16 2509 {
Chris@16 2510 // Rewind to start of (? sequence:
Chris@16 2511 --m_position;
Chris@16 2512 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2513 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
Chris@16 2514 return false;
Chris@16 2515 }
Chris@16 2516 //
Chris@16 2517 // we either have a ')' or we have run out of characters prematurely:
Chris@16 2518 //
Chris@16 2519 if(m_position == m_end)
Chris@16 2520 {
Chris@16 2521 // Rewind to start of (? sequence:
Chris@16 2522 --m_position;
Chris@16 2523 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2524 this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
Chris@16 2525 return false;
Chris@16 2526 }
Chris@16 2527 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
Chris@16 2528 ++m_position;
Chris@16 2529 //
Chris@16 2530 // restore the flags:
Chris@16 2531 //
Chris@16 2532 if(restore_flags)
Chris@16 2533 {
Chris@16 2534 // append a case change state if we need it:
Chris@16 2535 if(m_has_case_change)
Chris@16 2536 {
Chris@16 2537 static_cast<re_case*>(
Chris@16 2538 this->append_state(syntax_element_toggle_case, sizeof(re_case))
Chris@16 2539 )->icase = old_flags & regbase::icase;
Chris@16 2540 }
Chris@16 2541 this->flags(old_flags);
Chris@16 2542 }
Chris@16 2543 //
Chris@16 2544 // set up the jump pointer if we have one:
Chris@16 2545 //
Chris@16 2546 if(jump_offset)
Chris@16 2547 {
Chris@16 2548 this->m_pdata->m_data.align();
Chris@16 2549 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
Chris@16 2550 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
Chris@16 2551 if((this->m_last_state == jmp) && (markid != -2))
Chris@16 2552 {
Chris@16 2553 // Oops... we didn't have anything inside the assertion.
Chris@16 2554 // Note we don't get here for negated forward lookahead as (?!)
Chris@16 2555 // does have some uses.
Chris@16 2556 // Rewind to start of (? sequence:
Chris@16 2557 --m_position;
Chris@16 2558 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2559 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
Chris@16 2560 return false;
Chris@16 2561 }
Chris@16 2562 }
Chris@16 2563 //
Chris@16 2564 // verify that if this is conditional expression, that we do have
Chris@16 2565 // an alternative, if not add one:
Chris@16 2566 //
Chris@16 2567 if(markid == -4)
Chris@16 2568 {
Chris@16 2569 re_syntax_base* b = this->getaddress(expected_alt_point);
Chris@16 2570 // Make sure we have exactly one alternative following this state:
Chris@16 2571 if(b->type != syntax_element_alt)
Chris@16 2572 {
Chris@16 2573 re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
Chris@16 2574 alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
Chris@16 2575 }
Chris@16 2576 else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
Chris@16 2577 {
Chris@16 2578 // Can't have seen more than one alternative:
Chris@16 2579 // Rewind to start of (? sequence:
Chris@16 2580 --m_position;
Chris@16 2581 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2582 fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
Chris@16 2583 return false;
Chris@16 2584 }
Chris@16 2585 else
Chris@16 2586 {
Chris@16 2587 // We must *not* have seen an alternative inside a (DEFINE) block:
Chris@16 2588 b = this->getaddress(b->next.i, b);
Chris@16 2589 if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
Chris@16 2590 {
Chris@16 2591 // Rewind to start of (? sequence:
Chris@16 2592 --m_position;
Chris@16 2593 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2594 fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
Chris@16 2595 return false;
Chris@16 2596 }
Chris@16 2597 }
Chris@16 2598 // check for invalid repetition of next state:
Chris@16 2599 b = this->getaddress(expected_alt_point);
Chris@16 2600 b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
Chris@16 2601 if((b->type != syntax_element_assert_backref)
Chris@16 2602 && (b->type != syntax_element_startmark))
Chris@16 2603 {
Chris@16 2604 // Rewind to start of (? sequence:
Chris@16 2605 --m_position;
Chris@16 2606 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2607 fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
Chris@16 2608 return false;
Chris@16 2609 }
Chris@16 2610 }
Chris@16 2611 //
Chris@16 2612 // append closing parenthesis state:
Chris@16 2613 //
Chris@16 2614 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
Chris@16 2615 pb->index = markid;
Chris@16 2616 pb->icase = this->flags() & regbase::icase;
Chris@16 2617 this->m_paren_start = last_paren_start;
Chris@16 2618 //
Chris@16 2619 // restore the alternate insertion point:
Chris@16 2620 //
Chris@16 2621 this->m_alt_insert_point = last_alt_point;
Chris@16 2622 //
Chris@16 2623 // and the case change data:
Chris@16 2624 //
Chris@16 2625 m_has_case_change = old_case_change;
Chris@16 2626 //
Chris@16 2627 // And the mark_reset data:
Chris@16 2628 //
Chris@16 2629 if(m_max_mark > m_mark_count)
Chris@16 2630 {
Chris@16 2631 m_mark_count = m_max_mark;
Chris@16 2632 }
Chris@16 2633 m_mark_reset = mark_reset;
Chris@16 2634 m_max_mark = max_mark;
Chris@16 2635
Chris@16 2636
Chris@16 2637 if(markid > 0)
Chris@16 2638 {
Chris@16 2639 #ifndef BOOST_NO_STD_DISTANCE
Chris@16 2640 if(this->flags() & regbase::save_subexpression_location)
Chris@16 2641 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1;
Chris@16 2642 #else
Chris@16 2643 if(this->flags() & regbase::save_subexpression_location)
Chris@16 2644 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
Chris@16 2645 #endif
Chris@16 2646 //
Chris@16 2647 // allow backrefs to this mark:
Chris@16 2648 //
Chris@16 2649 if((markid > 0) && (markid < (int)(sizeof(unsigned) * CHAR_BIT)))
Chris@16 2650 this->m_backrefs |= 1u << (markid - 1);
Chris@16 2651 }
Chris@16 2652 return true;
Chris@16 2653 }
Chris@16 2654
Chris@16 2655 template <class charT, class traits>
Chris@16 2656 bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
Chris@16 2657 {
Chris@16 2658 //
Chris@16 2659 // parses an emacs style \sx or \Sx construct.
Chris@16 2660 //
Chris@16 2661 if(++m_position == m_end)
Chris@16 2662 {
Chris@16 2663 // Rewind to start of sequence:
Chris@16 2664 --m_position;
Chris@16 2665 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
Chris@16 2666 fail(regex_constants::error_escape, m_position - m_base);
Chris@16 2667 return false;
Chris@16 2668 }
Chris@16 2669 basic_char_set<charT, traits> char_set;
Chris@16 2670 if(negate)
Chris@16 2671 char_set.negate();
Chris@16 2672
Chris@16 2673 static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
Chris@16 2674
Chris@16 2675 switch(*m_position)
Chris@16 2676 {
Chris@16 2677 case 's':
Chris@16 2678 case ' ':
Chris@16 2679 char_set.add_class(this->m_mask_space);
Chris@16 2680 break;
Chris@16 2681 case 'w':
Chris@16 2682 char_set.add_class(this->m_word_mask);
Chris@16 2683 break;
Chris@16 2684 case '_':
Chris@16 2685 char_set.add_single(digraph<charT>(charT('$')));
Chris@16 2686 char_set.add_single(digraph<charT>(charT('&')));
Chris@16 2687 char_set.add_single(digraph<charT>(charT('*')));
Chris@16 2688 char_set.add_single(digraph<charT>(charT('+')));
Chris@16 2689 char_set.add_single(digraph<charT>(charT('-')));
Chris@16 2690 char_set.add_single(digraph<charT>(charT('_')));
Chris@16 2691 char_set.add_single(digraph<charT>(charT('<')));
Chris@16 2692 char_set.add_single(digraph<charT>(charT('>')));
Chris@16 2693 break;
Chris@16 2694 case '.':
Chris@16 2695 char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
Chris@16 2696 break;
Chris@16 2697 case '(':
Chris@16 2698 char_set.add_single(digraph<charT>(charT('(')));
Chris@16 2699 char_set.add_single(digraph<charT>(charT('[')));
Chris@16 2700 char_set.add_single(digraph<charT>(charT('{')));
Chris@16 2701 break;
Chris@16 2702 case ')':
Chris@16 2703 char_set.add_single(digraph<charT>(charT(')')));
Chris@16 2704 char_set.add_single(digraph<charT>(charT(']')));
Chris@16 2705 char_set.add_single(digraph<charT>(charT('}')));
Chris@16 2706 break;
Chris@16 2707 case '"':
Chris@16 2708 char_set.add_single(digraph<charT>(charT('"')));
Chris@16 2709 char_set.add_single(digraph<charT>(charT('\'')));
Chris@16 2710 char_set.add_single(digraph<charT>(charT('`')));
Chris@16 2711 break;
Chris@16 2712 case '\'':
Chris@16 2713 char_set.add_single(digraph<charT>(charT('\'')));
Chris@16 2714 char_set.add_single(digraph<charT>(charT(',')));
Chris@16 2715 char_set.add_single(digraph<charT>(charT('#')));
Chris@16 2716 break;
Chris@16 2717 case '<':
Chris@16 2718 char_set.add_single(digraph<charT>(charT(';')));
Chris@16 2719 break;
Chris@16 2720 case '>':
Chris@16 2721 char_set.add_single(digraph<charT>(charT('\n')));
Chris@16 2722 char_set.add_single(digraph<charT>(charT('\f')));
Chris@16 2723 break;
Chris@16 2724 default:
Chris@16 2725 fail(regex_constants::error_ctype, m_position - m_base);
Chris@16 2726 return false;
Chris@16 2727 }
Chris@16 2728 if(0 == this->append_set(char_set))
Chris@16 2729 {
Chris@16 2730 fail(regex_constants::error_ctype, m_position - m_base);
Chris@16 2731 return false;
Chris@16 2732 }
Chris@16 2733 ++m_position;
Chris@16 2734 return true;
Chris@16 2735 }
Chris@16 2736
Chris@16 2737 template <class charT, class traits>
Chris@16 2738 regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
Chris@16 2739 {
Chris@16 2740 // we have a (?imsx-imsx) group, convert it into a set of flags:
Chris@16 2741 regex_constants::syntax_option_type f = this->flags();
Chris@16 2742 bool breakout = false;
Chris@16 2743 do
Chris@16 2744 {
Chris@16 2745 switch(*m_position)
Chris@16 2746 {
Chris@16 2747 case 's':
Chris@16 2748 f |= regex_constants::mod_s;
Chris@16 2749 f &= ~regex_constants::no_mod_s;
Chris@16 2750 break;
Chris@16 2751 case 'm':
Chris@16 2752 f &= ~regex_constants::no_mod_m;
Chris@16 2753 break;
Chris@16 2754 case 'i':
Chris@16 2755 f |= regex_constants::icase;
Chris@16 2756 break;
Chris@16 2757 case 'x':
Chris@16 2758 f |= regex_constants::mod_x;
Chris@16 2759 break;
Chris@16 2760 default:
Chris@16 2761 breakout = true;
Chris@16 2762 continue;
Chris@16 2763 }
Chris@16 2764 if(++m_position == m_end)
Chris@16 2765 {
Chris@16 2766 // Rewind to start of (? sequence:
Chris@16 2767 --m_position;
Chris@16 2768 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2769 fail(regex_constants::error_paren, m_position - m_base);
Chris@16 2770 return false;
Chris@16 2771 }
Chris@16 2772 }
Chris@16 2773 while(!breakout);
Chris@16 2774
Chris@16 2775 breakout = false;
Chris@16 2776
Chris@16 2777 if(*m_position == static_cast<charT>('-'))
Chris@16 2778 {
Chris@16 2779 if(++m_position == m_end)
Chris@16 2780 {
Chris@16 2781 // Rewind to start of (? sequence:
Chris@16 2782 --m_position;
Chris@16 2783 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2784 fail(regex_constants::error_paren, m_position - m_base);
Chris@16 2785 return false;
Chris@16 2786 }
Chris@16 2787 do
Chris@16 2788 {
Chris@16 2789 switch(*m_position)
Chris@16 2790 {
Chris@16 2791 case 's':
Chris@16 2792 f &= ~regex_constants::mod_s;
Chris@16 2793 f |= regex_constants::no_mod_s;
Chris@16 2794 break;
Chris@16 2795 case 'm':
Chris@16 2796 f |= regex_constants::no_mod_m;
Chris@16 2797 break;
Chris@16 2798 case 'i':
Chris@16 2799 f &= ~regex_constants::icase;
Chris@16 2800 break;
Chris@16 2801 case 'x':
Chris@16 2802 f &= ~regex_constants::mod_x;
Chris@16 2803 break;
Chris@16 2804 default:
Chris@16 2805 breakout = true;
Chris@16 2806 continue;
Chris@16 2807 }
Chris@16 2808 if(++m_position == m_end)
Chris@16 2809 {
Chris@16 2810 // Rewind to start of (? sequence:
Chris@16 2811 --m_position;
Chris@16 2812 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
Chris@16 2813 fail(regex_constants::error_paren, m_position - m_base);
Chris@16 2814 return false;
Chris@16 2815 }
Chris@16 2816 }
Chris@16 2817 while(!breakout);
Chris@16 2818 }
Chris@16 2819 return f;
Chris@16 2820 }
Chris@16 2821
Chris@16 2822 template <class charT, class traits>
Chris@16 2823 bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
Chris@16 2824 {
Chris@16 2825 //
Chris@16 2826 // If we didn't actually add any states after the last
Chris@16 2827 // alternative then that's an error:
Chris@16 2828 //
Chris@16 2829 if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
Chris@16 2830 && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)
Chris@16 2831 &&
Chris@16 2832 !(
Chris@16 2833 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
Chris@16 2834 &&
Chris@16 2835 ((this->flags() & regbase::no_empty_expressions) == 0)
Chris@16 2836 )
Chris@16 2837 )
Chris@16 2838 {
Chris@16 2839 fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
Chris@16 2840 return false;
Chris@16 2841 }
Chris@16 2842 //
Chris@16 2843 // Fix up our alternatives:
Chris@16 2844 //
Chris@16 2845 while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))
Chris@16 2846 {
Chris@16 2847 //
Chris@16 2848 // fix up the jump to point to the end of the states
Chris@16 2849 // that we've just added:
Chris@16 2850 //
Chris@16 2851 std::ptrdiff_t jump_offset = m_alt_jumps.back();
Chris@16 2852 m_alt_jumps.pop_back();
Chris@16 2853 this->m_pdata->m_data.align();
Chris@16 2854 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
Chris@16 2855 BOOST_ASSERT(jmp->type == syntax_element_jump);
Chris@16 2856 jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
Chris@16 2857 }
Chris@16 2858 return true;
Chris@16 2859 }
Chris@16 2860
Chris@16 2861 #ifdef BOOST_MSVC
Chris@16 2862 #pragma warning(pop)
Chris@16 2863 #endif
Chris@16 2864
Chris@16 2865 } // namespace re_detail
Chris@16 2866 } // namespace boost
Chris@16 2867
Chris@16 2868 #ifdef BOOST_MSVC
Chris@16 2869 #pragma warning(push)
Chris@16 2870 #pragma warning(disable: 4103)
Chris@16 2871 #endif
Chris@16 2872 #ifdef BOOST_HAS_ABI_HEADERS
Chris@16 2873 # include BOOST_ABI_SUFFIX
Chris@16 2874 #endif
Chris@16 2875 #ifdef BOOST_MSVC
Chris@16 2876 #pragma warning(pop)
Chris@16 2877 #endif
Chris@16 2878
Chris@16 2879 #endif