annotate DEPENDENCIES/generic/include/boost/xpressive/regex_compiler.hpp @ 125:34e428693f5d vext

Vext -> Repoint
author Chris Cannam
date Thu, 14 Jun 2018 11:15:39 +0100
parents c530137014c0
children
rev   line source
Chris@16 1 ///////////////////////////////////////////////////////////////////////////////
Chris@16 2 /// \file regex_compiler.hpp
Chris@16 3 /// Contains the definition of regex_compiler, a factory for building regex objects
Chris@16 4 /// from strings.
Chris@16 5 //
Chris@16 6 // Copyright 2008 Eric Niebler. Distributed under the Boost
Chris@16 7 // Software License, Version 1.0. (See accompanying file
Chris@16 8 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16 9
Chris@16 10 #ifndef BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
Chris@16 11 #define BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
Chris@16 12
Chris@16 13 // MS compatible compilers support #pragma once
Chris@101 14 #if defined(_MSC_VER)
Chris@16 15 # pragma once
Chris@16 16 #endif
Chris@16 17
Chris@16 18 #include <map>
Chris@16 19 #include <boost/config.hpp>
Chris@16 20 #include <boost/assert.hpp>
Chris@16 21 #include <boost/next_prior.hpp>
Chris@16 22 #include <boost/range/begin.hpp>
Chris@16 23 #include <boost/range/end.hpp>
Chris@16 24 #include <boost/mpl/assert.hpp>
Chris@16 25 #include <boost/throw_exception.hpp>
Chris@16 26 #include <boost/type_traits/is_same.hpp>
Chris@16 27 #include <boost/type_traits/is_pointer.hpp>
Chris@16 28 #include <boost/utility/enable_if.hpp>
Chris@16 29 #include <boost/iterator/iterator_traits.hpp>
Chris@16 30 #include <boost/xpressive/basic_regex.hpp>
Chris@16 31 #include <boost/xpressive/detail/dynamic/parser.hpp>
Chris@16 32 #include <boost/xpressive/detail/dynamic/parse_charset.hpp>
Chris@16 33 #include <boost/xpressive/detail/dynamic/parser_enum.hpp>
Chris@16 34 #include <boost/xpressive/detail/dynamic/parser_traits.hpp>
Chris@16 35 #include <boost/xpressive/detail/core/linker.hpp>
Chris@16 36 #include <boost/xpressive/detail/core/optimize.hpp>
Chris@16 37
Chris@16 38 namespace boost { namespace xpressive
Chris@16 39 {
Chris@16 40
Chris@16 41 ///////////////////////////////////////////////////////////////////////////////
Chris@16 42 // regex_compiler
Chris@16 43 //
Chris@16 44 /// \brief Class template regex_compiler is a factory for building basic_regex objects from a string.
Chris@16 45 ///
Chris@16 46 /// Class template regex_compiler is used to construct a basic_regex object from a string. The string
Chris@16 47 /// should contain a valid regular expression. You can imbue a regex_compiler object with a locale,
Chris@16 48 /// after which all basic_regex objects created with that regex_compiler object will use that locale.
Chris@16 49 /// After creating a regex_compiler object, and optionally imbueing it with a locale, you can call the
Chris@16 50 /// compile() method to construct a basic_regex object, passing it the string representing the regular
Chris@16 51 /// expression. You can call compile() multiple times on the same regex_compiler object. Two basic_regex
Chris@16 52 /// objects compiled from the same string will have different regex_id's.
Chris@16 53 template<typename BidiIter, typename RegexTraits, typename CompilerTraits>
Chris@16 54 struct regex_compiler
Chris@16 55 {
Chris@16 56 typedef BidiIter iterator_type;
Chris@16 57 typedef typename iterator_value<BidiIter>::type char_type;
Chris@16 58 typedef regex_constants::syntax_option_type flag_type;
Chris@16 59 typedef RegexTraits traits_type;
Chris@16 60 typedef typename traits_type::string_type string_type;
Chris@16 61 typedef typename traits_type::locale_type locale_type;
Chris@16 62 typedef typename traits_type::char_class_type char_class_type;
Chris@16 63
Chris@16 64 explicit regex_compiler(RegexTraits const &traits = RegexTraits())
Chris@16 65 : mark_count_(0)
Chris@16 66 , hidden_mark_count_(0)
Chris@16 67 , traits_(traits)
Chris@16 68 , upper_(0)
Chris@16 69 , self_()
Chris@16 70 , rules_()
Chris@16 71 {
Chris@16 72 this->upper_ = lookup_classname(this->rxtraits(), "upper");
Chris@16 73 }
Chris@16 74
Chris@16 75 ///////////////////////////////////////////////////////////////////////////
Chris@16 76 // imbue
Chris@16 77 /// Specify the locale to be used by a regex_compiler.
Chris@16 78 ///
Chris@16 79 /// \param loc The locale that this regex_compiler should use.
Chris@16 80 /// \return The previous locale.
Chris@16 81 locale_type imbue(locale_type loc)
Chris@16 82 {
Chris@16 83 locale_type oldloc = this->traits_.imbue(loc);
Chris@16 84 this->upper_ = lookup_classname(this->rxtraits(), "upper");
Chris@16 85 return oldloc;
Chris@16 86 }
Chris@16 87
Chris@16 88 ///////////////////////////////////////////////////////////////////////////
Chris@16 89 // getloc
Chris@16 90 /// Get the locale used by a regex_compiler.
Chris@16 91 ///
Chris@16 92 /// \return The locale used by this regex_compiler.
Chris@16 93 locale_type getloc() const
Chris@16 94 {
Chris@16 95 return this->traits_.getloc();
Chris@16 96 }
Chris@16 97
Chris@16 98 ///////////////////////////////////////////////////////////////////////////
Chris@16 99 // compile
Chris@16 100 /// Builds a basic_regex object from a range of characters.
Chris@16 101 ///
Chris@16 102 /// \param begin The beginning of a range of characters representing the
Chris@16 103 /// regular expression to compile.
Chris@16 104 /// \param end The end of a range of characters representing the
Chris@16 105 /// regular expression to compile.
Chris@16 106 /// \param flags Optional bitmask that determines how the pat string is
Chris@16 107 /// interpreted. (See syntax_option_type.)
Chris@16 108 /// \return A basic_regex object corresponding to the regular expression
Chris@16 109 /// represented by the character range.
Chris@16 110 /// \pre InputIter is a model of the InputIterator concept.
Chris@16 111 /// \pre [begin,end) is a valid range.
Chris@16 112 /// \pre The range of characters specified by [begin,end) contains a
Chris@16 113 /// valid string-based representation of a regular expression.
Chris@16 114 /// \throw regex_error when the range of characters has invalid regular
Chris@16 115 /// expression syntax.
Chris@16 116 template<typename InputIter>
Chris@16 117 basic_regex<BidiIter>
Chris@16 118 compile(InputIter begin, InputIter end, flag_type flags = regex_constants::ECMAScript)
Chris@16 119 {
Chris@16 120 typedef typename iterator_category<InputIter>::type category;
Chris@16 121 return this->compile_(begin, end, flags, category());
Chris@16 122 }
Chris@16 123
Chris@16 124 /// \overload
Chris@16 125 ///
Chris@16 126 template<typename InputRange>
Chris@16 127 typename disable_if<is_pointer<InputRange>, basic_regex<BidiIter> >::type
Chris@16 128 compile(InputRange const &pat, flag_type flags = regex_constants::ECMAScript)
Chris@16 129 {
Chris@16 130 return this->compile(boost::begin(pat), boost::end(pat), flags);
Chris@16 131 }
Chris@16 132
Chris@16 133 /// \overload
Chris@16 134 ///
Chris@16 135 basic_regex<BidiIter>
Chris@16 136 compile(char_type const *begin, flag_type flags = regex_constants::ECMAScript)
Chris@16 137 {
Chris@16 138 BOOST_ASSERT(0 != begin);
Chris@16 139 char_type const *end = begin + std::char_traits<char_type>::length(begin);
Chris@16 140 return this->compile(begin, end, flags);
Chris@16 141 }
Chris@16 142
Chris@16 143 /// \overload
Chris@16 144 ///
Chris@16 145 basic_regex<BidiIter> compile(char_type const *begin, std::size_t size, flag_type flags)
Chris@16 146 {
Chris@16 147 BOOST_ASSERT(0 != begin);
Chris@16 148 char_type const *end = begin + size;
Chris@16 149 return this->compile(begin, end, flags);
Chris@16 150 }
Chris@16 151
Chris@16 152 ///////////////////////////////////////////////////////////////////////////
Chris@16 153 // operator[]
Chris@16 154 /// Return a reference to the named regular expression. If no such named
Chris@16 155 /// regular expression exists, create a new regular expression and return
Chris@16 156 /// a reference to it.
Chris@16 157 ///
Chris@16 158 /// \param name A std::string containing the name of the regular expression.
Chris@16 159 /// \pre The string is not empty.
Chris@16 160 /// \throw bad_alloc on allocation failure.
Chris@16 161 basic_regex<BidiIter> &operator [](string_type const &name)
Chris@16 162 {
Chris@16 163 BOOST_ASSERT(!name.empty());
Chris@16 164 return this->rules_[name];
Chris@16 165 }
Chris@16 166
Chris@16 167 /// \overload
Chris@16 168 ///
Chris@16 169 basic_regex<BidiIter> const &operator [](string_type const &name) const
Chris@16 170 {
Chris@16 171 BOOST_ASSERT(!name.empty());
Chris@16 172 return this->rules_[name];
Chris@16 173 }
Chris@16 174
Chris@16 175 private:
Chris@16 176
Chris@16 177 typedef detail::escape_value<char_type, char_class_type> escape_value;
Chris@16 178 typedef detail::alternate_matcher<detail::alternates_vector<BidiIter>, RegexTraits> alternate_matcher;
Chris@16 179
Chris@16 180 ///////////////////////////////////////////////////////////////////////////
Chris@16 181 // compile_
Chris@16 182 /// INTERNAL ONLY
Chris@16 183 template<typename FwdIter>
Chris@16 184 basic_regex<BidiIter> compile_(FwdIter begin, FwdIter end, flag_type flags, std::forward_iterator_tag)
Chris@16 185 {
Chris@16 186 BOOST_MPL_ASSERT((is_same<char_type, typename iterator_value<FwdIter>::type>));
Chris@16 187 using namespace regex_constants;
Chris@16 188 this->reset();
Chris@16 189 this->traits_.flags(flags);
Chris@16 190
Chris@16 191 basic_regex<BidiIter> rextmp, *prex = &rextmp;
Chris@16 192 FwdIter tmp = begin;
Chris@16 193
Chris@16 194 // Check if this regex is a named rule:
Chris@16 195 string_type name;
Chris@16 196 if(token_group_begin == this->traits_.get_token(tmp, end) &&
Chris@16 197 BOOST_XPR_ENSURE_(tmp != end, error_paren, "mismatched parenthesis") &&
Chris@16 198 token_rule_assign == this->traits_.get_group_type(tmp, end, name))
Chris@16 199 {
Chris@16 200 begin = tmp;
Chris@16 201 BOOST_XPR_ENSURE_
Chris@16 202 (
Chris@16 203 begin != end && token_group_end == this->traits_.get_token(begin, end)
Chris@16 204 , error_paren
Chris@16 205 , "mismatched parenthesis"
Chris@16 206 );
Chris@16 207 prex = &this->rules_[name];
Chris@16 208 }
Chris@16 209
Chris@16 210 this->self_ = detail::core_access<BidiIter>::get_regex_impl(*prex);
Chris@16 211
Chris@16 212 // at the top level, a regex is a sequence of alternates
Chris@16 213 detail::sequence<BidiIter> seq = this->parse_alternates(begin, end);
Chris@16 214 BOOST_XPR_ENSURE_(begin == end, error_paren, "mismatched parenthesis");
Chris@16 215
Chris@16 216 // terminate the sequence
Chris@16 217 seq += detail::make_dynamic<BidiIter>(detail::end_matcher());
Chris@16 218
Chris@16 219 // bundle the regex information into a regex_impl object
Chris@16 220 detail::common_compile(seq.xpr().matchable(), *this->self_, this->rxtraits());
Chris@16 221
Chris@16 222 this->self_->traits_ = new detail::traits_holder<RegexTraits>(this->rxtraits());
Chris@16 223 this->self_->mark_count_ = this->mark_count_;
Chris@16 224 this->self_->hidden_mark_count_ = this->hidden_mark_count_;
Chris@16 225
Chris@16 226 // References changed, update dependencies.
Chris@16 227 this->self_->tracking_update();
Chris@16 228 this->self_.reset();
Chris@16 229 return *prex;
Chris@16 230 }
Chris@16 231
Chris@16 232 ///////////////////////////////////////////////////////////////////////////
Chris@16 233 // compile_
Chris@16 234 /// INTERNAL ONLY
Chris@16 235 template<typename InputIter>
Chris@16 236 basic_regex<BidiIter> compile_(InputIter begin, InputIter end, flag_type flags, std::input_iterator_tag)
Chris@16 237 {
Chris@16 238 string_type pat(begin, end);
Chris@16 239 return this->compile_(boost::begin(pat), boost::end(pat), flags, std::forward_iterator_tag());
Chris@16 240 }
Chris@16 241
Chris@16 242 ///////////////////////////////////////////////////////////////////////////
Chris@16 243 // reset
Chris@16 244 /// INTERNAL ONLY
Chris@16 245 void reset()
Chris@16 246 {
Chris@16 247 this->mark_count_ = 0;
Chris@16 248 this->hidden_mark_count_ = 0;
Chris@16 249 this->traits_.flags(regex_constants::ECMAScript);
Chris@16 250 }
Chris@16 251
Chris@16 252 ///////////////////////////////////////////////////////////////////////////
Chris@16 253 // regex_traits
Chris@16 254 /// INTERNAL ONLY
Chris@16 255 traits_type &rxtraits()
Chris@16 256 {
Chris@16 257 return this->traits_.traits();
Chris@16 258 }
Chris@16 259
Chris@16 260 ///////////////////////////////////////////////////////////////////////////
Chris@16 261 // regex_traits
Chris@16 262 /// INTERNAL ONLY
Chris@16 263 traits_type const &rxtraits() const
Chris@16 264 {
Chris@16 265 return this->traits_.traits();
Chris@16 266 }
Chris@16 267
Chris@16 268 ///////////////////////////////////////////////////////////////////////////
Chris@16 269 // parse_alternates
Chris@16 270 /// INTERNAL ONLY
Chris@16 271 template<typename FwdIter>
Chris@16 272 detail::sequence<BidiIter> parse_alternates(FwdIter &begin, FwdIter end)
Chris@16 273 {
Chris@16 274 using namespace regex_constants;
Chris@16 275 int count = 0;
Chris@16 276 FwdIter tmp = begin;
Chris@16 277 detail::sequence<BidiIter> seq;
Chris@16 278
Chris@16 279 do switch(++count)
Chris@16 280 {
Chris@16 281 case 1:
Chris@16 282 seq = this->parse_sequence(tmp, end);
Chris@16 283 break;
Chris@16 284 case 2:
Chris@16 285 seq = detail::make_dynamic<BidiIter>(alternate_matcher()) | seq;
Chris@16 286 BOOST_FALLTHROUGH;
Chris@16 287 default:
Chris@16 288 seq |= this->parse_sequence(tmp, end);
Chris@16 289 }
Chris@16 290 while((begin = tmp) != end && token_alternate == this->traits_.get_token(tmp, end));
Chris@16 291
Chris@16 292 return seq;
Chris@16 293 }
Chris@16 294
Chris@16 295 ///////////////////////////////////////////////////////////////////////////
Chris@16 296 // parse_group
Chris@16 297 /// INTERNAL ONLY
Chris@16 298 template<typename FwdIter>
Chris@16 299 detail::sequence<BidiIter> parse_group(FwdIter &begin, FwdIter end)
Chris@16 300 {
Chris@16 301 using namespace regex_constants;
Chris@16 302 int mark_nbr = 0;
Chris@16 303 bool keeper = false;
Chris@16 304 bool lookahead = false;
Chris@16 305 bool lookbehind = false;
Chris@16 306 bool negative = false;
Chris@16 307 string_type name;
Chris@16 308
Chris@16 309 detail::sequence<BidiIter> seq, seq_end;
Chris@16 310 FwdIter tmp = FwdIter();
Chris@16 311
Chris@16 312 syntax_option_type old_flags = this->traits_.flags();
Chris@16 313
Chris@16 314 switch(this->traits_.get_group_type(begin, end, name))
Chris@16 315 {
Chris@16 316 case token_no_mark:
Chris@16 317 // Don't process empty groups like (?:) or (?i)
Chris@16 318 // BUGBUG this doesn't handle the degenerate (?:)+ correctly
Chris@16 319 if(token_group_end == this->traits_.get_token(tmp = begin, end))
Chris@16 320 {
Chris@16 321 return this->parse_atom(begin = tmp, end);
Chris@16 322 }
Chris@16 323 break;
Chris@16 324
Chris@16 325 case token_negative_lookahead:
Chris@16 326 negative = true;
Chris@16 327 BOOST_FALLTHROUGH;
Chris@16 328 case token_positive_lookahead:
Chris@16 329 lookahead = true;
Chris@16 330 break;
Chris@16 331
Chris@16 332 case token_negative_lookbehind:
Chris@16 333 negative = true;
Chris@16 334 BOOST_FALLTHROUGH;
Chris@16 335 case token_positive_lookbehind:
Chris@16 336 lookbehind = true;
Chris@16 337 break;
Chris@16 338
Chris@16 339 case token_independent_sub_expression:
Chris@16 340 keeper = true;
Chris@16 341 break;
Chris@16 342
Chris@16 343 case token_comment:
Chris@16 344 while(BOOST_XPR_ENSURE_(begin != end, error_paren, "mismatched parenthesis"))
Chris@16 345 {
Chris@16 346 switch(this->traits_.get_token(begin, end))
Chris@16 347 {
Chris@16 348 case token_group_end:
Chris@16 349 return this->parse_atom(begin, end);
Chris@16 350 case token_escape:
Chris@16 351 BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence");
Chris@16 352 BOOST_FALLTHROUGH;
Chris@16 353 case token_literal:
Chris@16 354 ++begin;
Chris@16 355 break;
Chris@16 356 default:
Chris@16 357 break;
Chris@16 358 }
Chris@16 359 }
Chris@16 360 break;
Chris@16 361
Chris@16 362 case token_recurse:
Chris@16 363 BOOST_XPR_ENSURE_
Chris@16 364 (
Chris@16 365 begin != end && token_group_end == this->traits_.get_token(begin, end)
Chris@16 366 , error_paren
Chris@16 367 , "mismatched parenthesis"
Chris@16 368 );
Chris@16 369 return detail::make_dynamic<BidiIter>(detail::regex_byref_matcher<BidiIter>(this->self_));
Chris@16 370
Chris@16 371 case token_rule_assign:
Chris@16 372 BOOST_THROW_EXCEPTION(
Chris@16 373 regex_error(error_badrule, "rule assignments must be at the front of the regex")
Chris@16 374 );
Chris@16 375 break;
Chris@16 376
Chris@16 377 case token_rule_ref:
Chris@16 378 {
Chris@16 379 typedef detail::core_access<BidiIter> access;
Chris@16 380 BOOST_XPR_ENSURE_
Chris@16 381 (
Chris@16 382 begin != end && token_group_end == this->traits_.get_token(begin, end)
Chris@16 383 , error_paren
Chris@16 384 , "mismatched parenthesis"
Chris@16 385 );
Chris@16 386 basic_regex<BidiIter> &rex = this->rules_[name];
Chris@16 387 shared_ptr<detail::regex_impl<BidiIter> > impl = access::get_regex_impl(rex);
Chris@16 388 this->self_->track_reference(*impl);
Chris@16 389 return detail::make_dynamic<BidiIter>(detail::regex_byref_matcher<BidiIter>(impl));
Chris@16 390 }
Chris@16 391
Chris@16 392 case token_named_mark:
Chris@16 393 mark_nbr = static_cast<int>(++this->mark_count_);
Chris@16 394 for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i)
Chris@16 395 {
Chris@16 396 BOOST_XPR_ENSURE_(this->self_->named_marks_[i].name_ != name, error_badmark, "named mark already exists");
Chris@16 397 }
Chris@16 398 this->self_->named_marks_.push_back(detail::named_mark<char_type>(name, this->mark_count_));
Chris@16 399 seq = detail::make_dynamic<BidiIter>(detail::mark_begin_matcher(mark_nbr));
Chris@16 400 seq_end = detail::make_dynamic<BidiIter>(detail::mark_end_matcher(mark_nbr));
Chris@16 401 break;
Chris@16 402
Chris@16 403 case token_named_mark_ref:
Chris@16 404 BOOST_XPR_ENSURE_
Chris@16 405 (
Chris@16 406 begin != end && token_group_end == this->traits_.get_token(begin, end)
Chris@16 407 , error_paren
Chris@16 408 , "mismatched parenthesis"
Chris@16 409 );
Chris@16 410 for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i)
Chris@16 411 {
Chris@16 412 if(this->self_->named_marks_[i].name_ == name)
Chris@16 413 {
Chris@16 414 mark_nbr = static_cast<int>(this->self_->named_marks_[i].mark_nbr_);
Chris@16 415 return detail::make_backref_xpression<BidiIter>
Chris@16 416 (
Chris@16 417 mark_nbr, this->traits_.flags(), this->rxtraits()
Chris@16 418 );
Chris@16 419 }
Chris@16 420 }
Chris@16 421 BOOST_THROW_EXCEPTION(regex_error(error_badmark, "invalid named back-reference"));
Chris@16 422 break;
Chris@16 423
Chris@16 424 default:
Chris@16 425 mark_nbr = static_cast<int>(++this->mark_count_);
Chris@16 426 seq = detail::make_dynamic<BidiIter>(detail::mark_begin_matcher(mark_nbr));
Chris@16 427 seq_end = detail::make_dynamic<BidiIter>(detail::mark_end_matcher(mark_nbr));
Chris@16 428 break;
Chris@16 429 }
Chris@16 430
Chris@16 431 // alternates
Chris@16 432 seq += this->parse_alternates(begin, end);
Chris@16 433 seq += seq_end;
Chris@16 434 BOOST_XPR_ENSURE_
Chris@16 435 (
Chris@16 436 begin != end && token_group_end == this->traits_.get_token(begin, end)
Chris@16 437 , error_paren
Chris@16 438 , "mismatched parenthesis"
Chris@16 439 );
Chris@16 440
Chris@16 441 typedef detail::shared_matchable<BidiIter> xpr_type;
Chris@16 442 if(lookahead)
Chris@16 443 {
Chris@16 444 seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
Chris@101 445 detail::lookahead_matcher<xpr_type> lam(seq.xpr(), negative, seq.pure());
Chris@101 446 seq = detail::make_dynamic<BidiIter>(lam);
Chris@16 447 }
Chris@16 448 else if(lookbehind)
Chris@16 449 {
Chris@16 450 seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
Chris@101 451 detail::lookbehind_matcher<xpr_type> lbm(seq.xpr(), seq.width().value(), negative, seq.pure());
Chris@101 452 seq = detail::make_dynamic<BidiIter>(lbm);
Chris@16 453 }
Chris@16 454 else if(keeper) // independent sub-expression
Chris@16 455 {
Chris@16 456 seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
Chris@101 457 detail::keeper_matcher<xpr_type> km(seq.xpr(), seq.pure());
Chris@101 458 seq = detail::make_dynamic<BidiIter>(km);
Chris@16 459 }
Chris@16 460
Chris@16 461 // restore the modifiers
Chris@16 462 this->traits_.flags(old_flags);
Chris@16 463 return seq;
Chris@16 464 }
Chris@16 465
Chris@16 466 ///////////////////////////////////////////////////////////////////////////
Chris@16 467 // parse_charset
Chris@16 468 /// INTERNAL ONLY
Chris@16 469 template<typename FwdIter>
Chris@16 470 detail::sequence<BidiIter> parse_charset(FwdIter &begin, FwdIter end)
Chris@16 471 {
Chris@16 472 detail::compound_charset<traits_type> chset;
Chris@16 473
Chris@16 474 // call out to a helper to actually parse the character set
Chris@16 475 detail::parse_charset(begin, end, chset, this->traits_);
Chris@16 476
Chris@16 477 return detail::make_charset_xpression<BidiIter>
Chris@16 478 (
Chris@16 479 chset
Chris@16 480 , this->rxtraits()
Chris@16 481 , this->traits_.flags()
Chris@16 482 );
Chris@16 483 }
Chris@16 484
Chris@16 485 ///////////////////////////////////////////////////////////////////////////
Chris@16 486 // parse_atom
Chris@16 487 /// INTERNAL ONLY
Chris@16 488 template<typename FwdIter>
Chris@16 489 detail::sequence<BidiIter> parse_atom(FwdIter &begin, FwdIter end)
Chris@16 490 {
Chris@16 491 using namespace regex_constants;
Chris@16 492 escape_value esc = { 0, 0, 0, detail::escape_char };
Chris@16 493 FwdIter old_begin = begin;
Chris@16 494
Chris@16 495 switch(this->traits_.get_token(begin, end))
Chris@16 496 {
Chris@16 497 case token_literal:
Chris@16 498 return detail::make_literal_xpression<BidiIter>
Chris@16 499 (
Chris@16 500 this->parse_literal(begin, end), this->traits_.flags(), this->rxtraits()
Chris@16 501 );
Chris@16 502
Chris@16 503 case token_any:
Chris@16 504 return detail::make_any_xpression<BidiIter>(this->traits_.flags(), this->rxtraits());
Chris@16 505
Chris@16 506 case token_assert_begin_sequence:
Chris@16 507 return detail::make_dynamic<BidiIter>(detail::assert_bos_matcher());
Chris@16 508
Chris@16 509 case token_assert_end_sequence:
Chris@16 510 return detail::make_dynamic<BidiIter>(detail::assert_eos_matcher());
Chris@16 511
Chris@16 512 case token_assert_begin_line:
Chris@16 513 return detail::make_assert_begin_line<BidiIter>(this->traits_.flags(), this->rxtraits());
Chris@16 514
Chris@16 515 case token_assert_end_line:
Chris@16 516 return detail::make_assert_end_line<BidiIter>(this->traits_.flags(), this->rxtraits());
Chris@16 517
Chris@16 518 case token_assert_word_boundary:
Chris@16 519 return detail::make_assert_word<BidiIter>(detail::word_boundary<mpl::true_>(), this->rxtraits());
Chris@16 520
Chris@16 521 case token_assert_not_word_boundary:
Chris@16 522 return detail::make_assert_word<BidiIter>(detail::word_boundary<mpl::false_>(), this->rxtraits());
Chris@16 523
Chris@16 524 case token_assert_word_begin:
Chris@16 525 return detail::make_assert_word<BidiIter>(detail::word_begin(), this->rxtraits());
Chris@16 526
Chris@16 527 case token_assert_word_end:
Chris@16 528 return detail::make_assert_word<BidiIter>(detail::word_end(), this->rxtraits());
Chris@16 529
Chris@16 530 case token_escape:
Chris@16 531 esc = this->parse_escape(begin, end);
Chris@16 532 switch(esc.type_)
Chris@16 533 {
Chris@16 534 case detail::escape_mark:
Chris@16 535 return detail::make_backref_xpression<BidiIter>
Chris@16 536 (
Chris@16 537 esc.mark_nbr_, this->traits_.flags(), this->rxtraits()
Chris@16 538 );
Chris@16 539 case detail::escape_char:
Chris@16 540 return detail::make_char_xpression<BidiIter>
Chris@16 541 (
Chris@16 542 esc.ch_, this->traits_.flags(), this->rxtraits()
Chris@16 543 );
Chris@16 544 case detail::escape_class:
Chris@16 545 return detail::make_posix_charset_xpression<BidiIter>
Chris@16 546 (
Chris@16 547 esc.class_
Chris@16 548 , this->is_upper_(*begin++)
Chris@16 549 , this->traits_.flags()
Chris@16 550 , this->rxtraits()
Chris@16 551 );
Chris@16 552 }
Chris@16 553
Chris@16 554 case token_group_begin:
Chris@16 555 return this->parse_group(begin, end);
Chris@16 556
Chris@16 557 case token_charset_begin:
Chris@16 558 return this->parse_charset(begin, end);
Chris@16 559
Chris@16 560 case token_invalid_quantifier:
Chris@16 561 BOOST_THROW_EXCEPTION(regex_error(error_badrepeat, "quantifier not expected"));
Chris@16 562 break;
Chris@16 563
Chris@16 564 case token_quote_meta_begin:
Chris@16 565 return detail::make_literal_xpression<BidiIter>
Chris@16 566 (
Chris@16 567 this->parse_quote_meta(begin, end), this->traits_.flags(), this->rxtraits()
Chris@16 568 );
Chris@16 569
Chris@16 570 case token_quote_meta_end:
Chris@16 571 BOOST_THROW_EXCEPTION(
Chris@16 572 regex_error(
Chris@16 573 error_escape
Chris@16 574 , "found quote-meta end without corresponding quote-meta begin"
Chris@16 575 )
Chris@16 576 );
Chris@16 577 break;
Chris@16 578
Chris@16 579 case token_end_of_pattern:
Chris@16 580 break;
Chris@16 581
Chris@16 582 default:
Chris@16 583 begin = old_begin;
Chris@16 584 break;
Chris@16 585 }
Chris@16 586
Chris@16 587 return detail::sequence<BidiIter>();
Chris@16 588 }
Chris@16 589
Chris@16 590 ///////////////////////////////////////////////////////////////////////////
Chris@16 591 // parse_quant
Chris@16 592 /// INTERNAL ONLY
Chris@16 593 template<typename FwdIter>
Chris@16 594 detail::sequence<BidiIter> parse_quant(FwdIter &begin, FwdIter end)
Chris@16 595 {
Chris@16 596 BOOST_ASSERT(begin != end);
Chris@16 597 detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ };
Chris@16 598 detail::sequence<BidiIter> seq = this->parse_atom(begin, end);
Chris@16 599
Chris@16 600 // BUGBUG this doesn't handle the degenerate (?:)+ correctly
Chris@16 601 if(!seq.empty() && begin != end && detail::quant_none != seq.quant())
Chris@16 602 {
Chris@16 603 if(this->traits_.get_quant_spec(begin, end, spec))
Chris@16 604 {
Chris@16 605 BOOST_ASSERT(spec.min_ <= spec.max_);
Chris@16 606
Chris@16 607 if(0 == spec.max_) // quant {0,0} is degenerate -- matches nothing.
Chris@16 608 {
Chris@16 609 seq = this->parse_quant(begin, end);
Chris@16 610 }
Chris@16 611 else
Chris@16 612 {
Chris@16 613 seq.repeat(spec);
Chris@16 614 }
Chris@16 615 }
Chris@16 616 }
Chris@16 617
Chris@16 618 return seq;
Chris@16 619 }
Chris@16 620
Chris@16 621 ///////////////////////////////////////////////////////////////////////////
Chris@16 622 // parse_sequence
Chris@16 623 /// INTERNAL ONLY
Chris@16 624 template<typename FwdIter>
Chris@16 625 detail::sequence<BidiIter> parse_sequence(FwdIter &begin, FwdIter end)
Chris@16 626 {
Chris@16 627 detail::sequence<BidiIter> seq;
Chris@16 628
Chris@16 629 while(begin != end)
Chris@16 630 {
Chris@16 631 detail::sequence<BidiIter> seq_quant = this->parse_quant(begin, end);
Chris@16 632
Chris@16 633 // did we find a quantified atom?
Chris@16 634 if(seq_quant.empty())
Chris@16 635 break;
Chris@16 636
Chris@16 637 // chain it to the end of the xpression sequence
Chris@16 638 seq += seq_quant;
Chris@16 639 }
Chris@16 640
Chris@16 641 return seq;
Chris@16 642 }
Chris@16 643
Chris@16 644 ///////////////////////////////////////////////////////////////////////////
Chris@16 645 // parse_literal
Chris@16 646 // scan ahead looking for char literals to be globbed together into a string literal
Chris@16 647 /// INTERNAL ONLY
Chris@16 648 template<typename FwdIter>
Chris@16 649 string_type parse_literal(FwdIter &begin, FwdIter end)
Chris@16 650 {
Chris@16 651 using namespace regex_constants;
Chris@16 652 BOOST_ASSERT(begin != end);
Chris@16 653 BOOST_ASSERT(token_literal == this->traits_.get_token(begin, end));
Chris@16 654 escape_value esc = { 0, 0, 0, detail::escape_char };
Chris@16 655 string_type literal(1, *begin);
Chris@16 656
Chris@16 657 for(FwdIter prev = begin, tmp = ++begin; begin != end; prev = begin, begin = tmp)
Chris@16 658 {
Chris@16 659 detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ };
Chris@16 660 if(this->traits_.get_quant_spec(tmp, end, spec))
Chris@16 661 {
Chris@16 662 if(literal.size() != 1)
Chris@16 663 {
Chris@16 664 begin = prev;
Chris@16 665 literal.erase(boost::prior(literal.end()));
Chris@16 666 }
Chris@16 667 return literal;
Chris@16 668 }
Chris@16 669 else switch(this->traits_.get_token(tmp, end))
Chris@16 670 {
Chris@16 671 case token_escape:
Chris@16 672 esc = this->parse_escape(tmp, end);
Chris@16 673 if(detail::escape_char != esc.type_) return literal;
Chris@16 674 literal.insert(literal.end(), esc.ch_);
Chris@16 675 break;
Chris@16 676 case token_literal:
Chris@16 677 literal.insert(literal.end(), *tmp++);
Chris@16 678 break;
Chris@16 679 default:
Chris@16 680 return literal;
Chris@16 681 }
Chris@16 682 }
Chris@16 683
Chris@16 684 return literal;
Chris@16 685 }
Chris@16 686
Chris@16 687 ///////////////////////////////////////////////////////////////////////////
Chris@16 688 // parse_quote_meta
Chris@16 689 // scan ahead looking for char literals to be globbed together into a string literal
Chris@16 690 /// INTERNAL ONLY
Chris@16 691 template<typename FwdIter>
Chris@16 692 string_type parse_quote_meta(FwdIter &begin, FwdIter end)
Chris@16 693 {
Chris@16 694 using namespace regex_constants;
Chris@16 695 FwdIter old_begin = begin, old_end;
Chris@16 696 while(end != (old_end = begin))
Chris@16 697 {
Chris@16 698 switch(this->traits_.get_token(begin, end))
Chris@16 699 {
Chris@16 700 case token_quote_meta_end:
Chris@16 701 return string_type(old_begin, old_end);
Chris@16 702 case token_escape:
Chris@16 703 BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence");
Chris@16 704 BOOST_FALLTHROUGH;
Chris@16 705 case token_invalid_quantifier:
Chris@16 706 case token_literal:
Chris@16 707 ++begin;
Chris@16 708 break;
Chris@16 709 default:
Chris@16 710 break;
Chris@16 711 }
Chris@16 712 }
Chris@16 713 return string_type(old_begin, begin);
Chris@16 714 }
Chris@16 715
Chris@16 716 ///////////////////////////////////////////////////////////////////////////////
Chris@16 717 // parse_escape
Chris@16 718 /// INTERNAL ONLY
Chris@16 719 template<typename FwdIter>
Chris@16 720 escape_value parse_escape(FwdIter &begin, FwdIter end)
Chris@16 721 {
Chris@16 722 BOOST_XPR_ENSURE_(begin != end, regex_constants::error_escape, "incomplete escape sequence");
Chris@16 723
Chris@16 724 // first, check to see if this can be a backreference
Chris@16 725 if(0 < this->rxtraits().value(*begin, 10))
Chris@16 726 {
Chris@16 727 // Parse at most 3 decimal digits.
Chris@16 728 FwdIter tmp = begin;
Chris@16 729 int mark_nbr = detail::toi(tmp, end, this->rxtraits(), 10, 999);
Chris@16 730
Chris@16 731 // If the resulting number could conceivably be a backref, then it is.
Chris@16 732 if(10 > mark_nbr || mark_nbr <= static_cast<int>(this->mark_count_))
Chris@16 733 {
Chris@16 734 begin = tmp;
Chris@16 735 escape_value esc = {0, mark_nbr, 0, detail::escape_mark};
Chris@16 736 return esc;
Chris@16 737 }
Chris@16 738 }
Chris@16 739
Chris@16 740 // Not a backreference, defer to the parse_escape helper
Chris@16 741 return detail::parse_escape(begin, end, this->traits_);
Chris@16 742 }
Chris@16 743
Chris@16 744 bool is_upper_(char_type ch) const
Chris@16 745 {
Chris@16 746 return 0 != this->upper_ && this->rxtraits().isctype(ch, this->upper_);
Chris@16 747 }
Chris@16 748
Chris@16 749 std::size_t mark_count_;
Chris@16 750 std::size_t hidden_mark_count_;
Chris@16 751 CompilerTraits traits_;
Chris@16 752 typename RegexTraits::char_class_type upper_;
Chris@16 753 shared_ptr<detail::regex_impl<BidiIter> > self_;
Chris@16 754 std::map<string_type, basic_regex<BidiIter> > rules_;
Chris@16 755 };
Chris@16 756
Chris@16 757 }} // namespace boost::xpressive
Chris@16 758
Chris@16 759 #endif