Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \file regex_compiler.hpp Chris@16: /// Contains the definition of regex_compiler, a factory for building regex objects Chris@16: /// from strings. Chris@16: // Chris@16: // Copyright 2008 Eric Niebler. Distributed under the Boost Chris@16: // Software License, Version 1.0. (See accompanying file Chris@16: // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Chris@16: Chris@16: #ifndef BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005 Chris@16: #define BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005 Chris@16: Chris@16: // MS compatible compilers support #pragma once Chris@101: #if defined(_MSC_VER) Chris@16: # pragma once Chris@16: #endif Chris@16: Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: Chris@16: namespace boost { namespace xpressive Chris@16: { Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // regex_compiler Chris@16: // Chris@16: /// \brief Class template regex_compiler is a factory for building basic_regex objects from a string. Chris@16: /// Chris@16: /// Class template regex_compiler is used to construct a basic_regex object from a string. The string Chris@16: /// should contain a valid regular expression. You can imbue a regex_compiler object with a locale, Chris@16: /// after which all basic_regex objects created with that regex_compiler object will use that locale. Chris@16: /// After creating a regex_compiler object, and optionally imbueing it with a locale, you can call the Chris@16: /// compile() method to construct a basic_regex object, passing it the string representing the regular Chris@16: /// expression. You can call compile() multiple times on the same regex_compiler object. Two basic_regex Chris@16: /// objects compiled from the same string will have different regex_id's. Chris@16: template Chris@16: struct regex_compiler Chris@16: { Chris@16: typedef BidiIter iterator_type; Chris@16: typedef typename iterator_value::type char_type; Chris@16: typedef regex_constants::syntax_option_type flag_type; Chris@16: typedef RegexTraits traits_type; Chris@16: typedef typename traits_type::string_type string_type; Chris@16: typedef typename traits_type::locale_type locale_type; Chris@16: typedef typename traits_type::char_class_type char_class_type; Chris@16: Chris@16: explicit regex_compiler(RegexTraits const &traits = RegexTraits()) Chris@16: : mark_count_(0) Chris@16: , hidden_mark_count_(0) Chris@16: , traits_(traits) Chris@16: , upper_(0) Chris@16: , self_() Chris@16: , rules_() Chris@16: { Chris@16: this->upper_ = lookup_classname(this->rxtraits(), "upper"); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // imbue Chris@16: /// Specify the locale to be used by a regex_compiler. Chris@16: /// Chris@16: /// \param loc The locale that this regex_compiler should use. Chris@16: /// \return The previous locale. Chris@16: locale_type imbue(locale_type loc) Chris@16: { Chris@16: locale_type oldloc = this->traits_.imbue(loc); Chris@16: this->upper_ = lookup_classname(this->rxtraits(), "upper"); Chris@16: return oldloc; Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // getloc Chris@16: /// Get the locale used by a regex_compiler. Chris@16: /// Chris@16: /// \return The locale used by this regex_compiler. Chris@16: locale_type getloc() const Chris@16: { Chris@16: return this->traits_.getloc(); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // compile Chris@16: /// Builds a basic_regex object from a range of characters. Chris@16: /// Chris@16: /// \param begin The beginning of a range of characters representing the Chris@16: /// regular expression to compile. Chris@16: /// \param end The end of a range of characters representing the Chris@16: /// regular expression to compile. Chris@16: /// \param flags Optional bitmask that determines how the pat string is Chris@16: /// interpreted. (See syntax_option_type.) Chris@16: /// \return A basic_regex object corresponding to the regular expression Chris@16: /// represented by the character range. Chris@16: /// \pre InputIter is a model of the InputIterator concept. Chris@16: /// \pre [begin,end) is a valid range. Chris@16: /// \pre The range of characters specified by [begin,end) contains a Chris@16: /// valid string-based representation of a regular expression. Chris@16: /// \throw regex_error when the range of characters has invalid regular Chris@16: /// expression syntax. Chris@16: template Chris@16: basic_regex Chris@16: compile(InputIter begin, InputIter end, flag_type flags = regex_constants::ECMAScript) Chris@16: { Chris@16: typedef typename iterator_category::type category; Chris@16: return this->compile_(begin, end, flags, category()); Chris@16: } Chris@16: Chris@16: /// \overload Chris@16: /// Chris@16: template Chris@16: typename disable_if, basic_regex >::type Chris@16: compile(InputRange const &pat, flag_type flags = regex_constants::ECMAScript) Chris@16: { Chris@16: return this->compile(boost::begin(pat), boost::end(pat), flags); Chris@16: } Chris@16: Chris@16: /// \overload Chris@16: /// Chris@16: basic_regex Chris@16: compile(char_type const *begin, flag_type flags = regex_constants::ECMAScript) Chris@16: { Chris@16: BOOST_ASSERT(0 != begin); Chris@16: char_type const *end = begin + std::char_traits::length(begin); Chris@16: return this->compile(begin, end, flags); Chris@16: } Chris@16: Chris@16: /// \overload Chris@16: /// Chris@16: basic_regex compile(char_type const *begin, std::size_t size, flag_type flags) Chris@16: { Chris@16: BOOST_ASSERT(0 != begin); Chris@16: char_type const *end = begin + size; Chris@16: return this->compile(begin, end, flags); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // operator[] Chris@16: /// Return a reference to the named regular expression. If no such named Chris@16: /// regular expression exists, create a new regular expression and return Chris@16: /// a reference to it. Chris@16: /// Chris@16: /// \param name A std::string containing the name of the regular expression. Chris@16: /// \pre The string is not empty. Chris@16: /// \throw bad_alloc on allocation failure. Chris@16: basic_regex &operator [](string_type const &name) Chris@16: { Chris@16: BOOST_ASSERT(!name.empty()); Chris@16: return this->rules_[name]; Chris@16: } Chris@16: Chris@16: /// \overload Chris@16: /// Chris@16: basic_regex const &operator [](string_type const &name) const Chris@16: { Chris@16: BOOST_ASSERT(!name.empty()); Chris@16: return this->rules_[name]; Chris@16: } Chris@16: Chris@16: private: Chris@16: Chris@16: typedef detail::escape_value escape_value; Chris@16: typedef detail::alternate_matcher, RegexTraits> alternate_matcher; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // compile_ Chris@16: /// INTERNAL ONLY Chris@16: template Chris@16: basic_regex compile_(FwdIter begin, FwdIter end, flag_type flags, std::forward_iterator_tag) Chris@16: { Chris@16: BOOST_MPL_ASSERT((is_same::type>)); Chris@16: using namespace regex_constants; Chris@16: this->reset(); Chris@16: this->traits_.flags(flags); Chris@16: Chris@16: basic_regex rextmp, *prex = &rextmp; Chris@16: FwdIter tmp = begin; Chris@16: Chris@16: // Check if this regex is a named rule: Chris@16: string_type name; Chris@16: if(token_group_begin == this->traits_.get_token(tmp, end) && Chris@16: BOOST_XPR_ENSURE_(tmp != end, error_paren, "mismatched parenthesis") && Chris@16: token_rule_assign == this->traits_.get_group_type(tmp, end, name)) Chris@16: { Chris@16: begin = tmp; Chris@16: BOOST_XPR_ENSURE_ Chris@16: ( Chris@16: begin != end && token_group_end == this->traits_.get_token(begin, end) Chris@16: , error_paren Chris@16: , "mismatched parenthesis" Chris@16: ); Chris@16: prex = &this->rules_[name]; Chris@16: } Chris@16: Chris@16: this->self_ = detail::core_access::get_regex_impl(*prex); Chris@16: Chris@16: // at the top level, a regex is a sequence of alternates Chris@16: detail::sequence seq = this->parse_alternates(begin, end); Chris@16: BOOST_XPR_ENSURE_(begin == end, error_paren, "mismatched parenthesis"); Chris@16: Chris@16: // terminate the sequence Chris@16: seq += detail::make_dynamic(detail::end_matcher()); Chris@16: Chris@16: // bundle the regex information into a regex_impl object Chris@16: detail::common_compile(seq.xpr().matchable(), *this->self_, this->rxtraits()); Chris@16: Chris@16: this->self_->traits_ = new detail::traits_holder(this->rxtraits()); Chris@16: this->self_->mark_count_ = this->mark_count_; Chris@16: this->self_->hidden_mark_count_ = this->hidden_mark_count_; Chris@16: Chris@16: // References changed, update dependencies. Chris@16: this->self_->tracking_update(); Chris@16: this->self_.reset(); Chris@16: return *prex; Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // compile_ Chris@16: /// INTERNAL ONLY Chris@16: template Chris@16: basic_regex compile_(InputIter begin, InputIter end, flag_type flags, std::input_iterator_tag) Chris@16: { Chris@16: string_type pat(begin, end); Chris@16: return this->compile_(boost::begin(pat), boost::end(pat), flags, std::forward_iterator_tag()); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // reset Chris@16: /// INTERNAL ONLY Chris@16: void reset() Chris@16: { Chris@16: this->mark_count_ = 0; Chris@16: this->hidden_mark_count_ = 0; Chris@16: this->traits_.flags(regex_constants::ECMAScript); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // regex_traits Chris@16: /// INTERNAL ONLY Chris@16: traits_type &rxtraits() Chris@16: { Chris@16: return this->traits_.traits(); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // regex_traits Chris@16: /// INTERNAL ONLY Chris@16: traits_type const &rxtraits() const Chris@16: { Chris@16: return this->traits_.traits(); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // parse_alternates Chris@16: /// INTERNAL ONLY Chris@16: template Chris@16: detail::sequence parse_alternates(FwdIter &begin, FwdIter end) Chris@16: { Chris@16: using namespace regex_constants; Chris@16: int count = 0; Chris@16: FwdIter tmp = begin; Chris@16: detail::sequence seq; Chris@16: Chris@16: do switch(++count) Chris@16: { Chris@16: case 1: Chris@16: seq = this->parse_sequence(tmp, end); Chris@16: break; Chris@16: case 2: Chris@16: seq = detail::make_dynamic(alternate_matcher()) | seq; Chris@16: BOOST_FALLTHROUGH; Chris@16: default: Chris@16: seq |= this->parse_sequence(tmp, end); Chris@16: } Chris@16: while((begin = tmp) != end && token_alternate == this->traits_.get_token(tmp, end)); Chris@16: Chris@16: return seq; Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // parse_group Chris@16: /// INTERNAL ONLY Chris@16: template Chris@16: detail::sequence parse_group(FwdIter &begin, FwdIter end) Chris@16: { Chris@16: using namespace regex_constants; Chris@16: int mark_nbr = 0; Chris@16: bool keeper = false; Chris@16: bool lookahead = false; Chris@16: bool lookbehind = false; Chris@16: bool negative = false; Chris@16: string_type name; Chris@16: Chris@16: detail::sequence seq, seq_end; Chris@16: FwdIter tmp = FwdIter(); Chris@16: Chris@16: syntax_option_type old_flags = this->traits_.flags(); Chris@16: Chris@16: switch(this->traits_.get_group_type(begin, end, name)) Chris@16: { Chris@16: case token_no_mark: Chris@16: // Don't process empty groups like (?:) or (?i) Chris@16: // BUGBUG this doesn't handle the degenerate (?:)+ correctly Chris@16: if(token_group_end == this->traits_.get_token(tmp = begin, end)) Chris@16: { Chris@16: return this->parse_atom(begin = tmp, end); Chris@16: } Chris@16: break; Chris@16: Chris@16: case token_negative_lookahead: Chris@16: negative = true; Chris@16: BOOST_FALLTHROUGH; Chris@16: case token_positive_lookahead: Chris@16: lookahead = true; Chris@16: break; Chris@16: Chris@16: case token_negative_lookbehind: Chris@16: negative = true; Chris@16: BOOST_FALLTHROUGH; Chris@16: case token_positive_lookbehind: Chris@16: lookbehind = true; Chris@16: break; Chris@16: Chris@16: case token_independent_sub_expression: Chris@16: keeper = true; Chris@16: break; Chris@16: Chris@16: case token_comment: Chris@16: while(BOOST_XPR_ENSURE_(begin != end, error_paren, "mismatched parenthesis")) Chris@16: { Chris@16: switch(this->traits_.get_token(begin, end)) Chris@16: { Chris@16: case token_group_end: Chris@16: return this->parse_atom(begin, end); Chris@16: case token_escape: Chris@16: BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence"); Chris@16: BOOST_FALLTHROUGH; Chris@16: case token_literal: Chris@16: ++begin; Chris@16: break; Chris@16: default: Chris@16: break; Chris@16: } Chris@16: } Chris@16: break; Chris@16: Chris@16: case token_recurse: Chris@16: BOOST_XPR_ENSURE_ Chris@16: ( Chris@16: begin != end && token_group_end == this->traits_.get_token(begin, end) Chris@16: , error_paren Chris@16: , "mismatched parenthesis" Chris@16: ); Chris@16: return detail::make_dynamic(detail::regex_byref_matcher(this->self_)); Chris@16: Chris@16: case token_rule_assign: Chris@16: BOOST_THROW_EXCEPTION( Chris@16: regex_error(error_badrule, "rule assignments must be at the front of the regex") Chris@16: ); Chris@16: break; Chris@16: Chris@16: case token_rule_ref: Chris@16: { Chris@16: typedef detail::core_access access; Chris@16: BOOST_XPR_ENSURE_ Chris@16: ( Chris@16: begin != end && token_group_end == this->traits_.get_token(begin, end) Chris@16: , error_paren Chris@16: , "mismatched parenthesis" Chris@16: ); Chris@16: basic_regex &rex = this->rules_[name]; Chris@16: shared_ptr > impl = access::get_regex_impl(rex); Chris@16: this->self_->track_reference(*impl); Chris@16: return detail::make_dynamic(detail::regex_byref_matcher(impl)); Chris@16: } Chris@16: Chris@16: case token_named_mark: Chris@16: mark_nbr = static_cast(++this->mark_count_); Chris@16: for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i) Chris@16: { Chris@16: BOOST_XPR_ENSURE_(this->self_->named_marks_[i].name_ != name, error_badmark, "named mark already exists"); Chris@16: } Chris@16: this->self_->named_marks_.push_back(detail::named_mark(name, this->mark_count_)); Chris@16: seq = detail::make_dynamic(detail::mark_begin_matcher(mark_nbr)); Chris@16: seq_end = detail::make_dynamic(detail::mark_end_matcher(mark_nbr)); Chris@16: break; Chris@16: Chris@16: case token_named_mark_ref: Chris@16: BOOST_XPR_ENSURE_ Chris@16: ( Chris@16: begin != end && token_group_end == this->traits_.get_token(begin, end) Chris@16: , error_paren Chris@16: , "mismatched parenthesis" Chris@16: ); Chris@16: for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i) Chris@16: { Chris@16: if(this->self_->named_marks_[i].name_ == name) Chris@16: { Chris@16: mark_nbr = static_cast(this->self_->named_marks_[i].mark_nbr_); Chris@16: return detail::make_backref_xpression Chris@16: ( Chris@16: mark_nbr, this->traits_.flags(), this->rxtraits() Chris@16: ); Chris@16: } Chris@16: } Chris@16: BOOST_THROW_EXCEPTION(regex_error(error_badmark, "invalid named back-reference")); Chris@16: break; Chris@16: Chris@16: default: Chris@16: mark_nbr = static_cast(++this->mark_count_); Chris@16: seq = detail::make_dynamic(detail::mark_begin_matcher(mark_nbr)); Chris@16: seq_end = detail::make_dynamic(detail::mark_end_matcher(mark_nbr)); Chris@16: break; Chris@16: } Chris@16: Chris@16: // alternates Chris@16: seq += this->parse_alternates(begin, end); Chris@16: seq += seq_end; Chris@16: BOOST_XPR_ENSURE_ Chris@16: ( Chris@16: begin != end && token_group_end == this->traits_.get_token(begin, end) Chris@16: , error_paren Chris@16: , "mismatched parenthesis" Chris@16: ); Chris@16: Chris@16: typedef detail::shared_matchable xpr_type; Chris@16: if(lookahead) Chris@16: { Chris@16: seq += detail::make_independent_end_xpression(seq.pure()); Chris@101: detail::lookahead_matcher lam(seq.xpr(), negative, seq.pure()); Chris@101: seq = detail::make_dynamic(lam); Chris@16: } Chris@16: else if(lookbehind) Chris@16: { Chris@16: seq += detail::make_independent_end_xpression(seq.pure()); Chris@101: detail::lookbehind_matcher lbm(seq.xpr(), seq.width().value(), negative, seq.pure()); Chris@101: seq = detail::make_dynamic(lbm); Chris@16: } Chris@16: else if(keeper) // independent sub-expression Chris@16: { Chris@16: seq += detail::make_independent_end_xpression(seq.pure()); Chris@101: detail::keeper_matcher km(seq.xpr(), seq.pure()); Chris@101: seq = detail::make_dynamic(km); Chris@16: } Chris@16: Chris@16: // restore the modifiers Chris@16: this->traits_.flags(old_flags); Chris@16: return seq; Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // parse_charset Chris@16: /// INTERNAL ONLY Chris@16: template Chris@16: detail::sequence parse_charset(FwdIter &begin, FwdIter end) Chris@16: { Chris@16: detail::compound_charset chset; Chris@16: Chris@16: // call out to a helper to actually parse the character set Chris@16: detail::parse_charset(begin, end, chset, this->traits_); Chris@16: Chris@16: return detail::make_charset_xpression Chris@16: ( Chris@16: chset Chris@16: , this->rxtraits() Chris@16: , this->traits_.flags() Chris@16: ); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // parse_atom Chris@16: /// INTERNAL ONLY Chris@16: template Chris@16: detail::sequence parse_atom(FwdIter &begin, FwdIter end) Chris@16: { Chris@16: using namespace regex_constants; Chris@16: escape_value esc = { 0, 0, 0, detail::escape_char }; Chris@16: FwdIter old_begin = begin; Chris@16: Chris@16: switch(this->traits_.get_token(begin, end)) Chris@16: { Chris@16: case token_literal: Chris@16: return detail::make_literal_xpression Chris@16: ( Chris@16: this->parse_literal(begin, end), this->traits_.flags(), this->rxtraits() Chris@16: ); Chris@16: Chris@16: case token_any: Chris@16: return detail::make_any_xpression(this->traits_.flags(), this->rxtraits()); Chris@16: Chris@16: case token_assert_begin_sequence: Chris@16: return detail::make_dynamic(detail::assert_bos_matcher()); Chris@16: Chris@16: case token_assert_end_sequence: Chris@16: return detail::make_dynamic(detail::assert_eos_matcher()); Chris@16: Chris@16: case token_assert_begin_line: Chris@16: return detail::make_assert_begin_line(this->traits_.flags(), this->rxtraits()); Chris@16: Chris@16: case token_assert_end_line: Chris@16: return detail::make_assert_end_line(this->traits_.flags(), this->rxtraits()); Chris@16: Chris@16: case token_assert_word_boundary: Chris@16: return detail::make_assert_word(detail::word_boundary(), this->rxtraits()); Chris@16: Chris@16: case token_assert_not_word_boundary: Chris@16: return detail::make_assert_word(detail::word_boundary(), this->rxtraits()); Chris@16: Chris@16: case token_assert_word_begin: Chris@16: return detail::make_assert_word(detail::word_begin(), this->rxtraits()); Chris@16: Chris@16: case token_assert_word_end: Chris@16: return detail::make_assert_word(detail::word_end(), this->rxtraits()); Chris@16: Chris@16: case token_escape: Chris@16: esc = this->parse_escape(begin, end); Chris@16: switch(esc.type_) Chris@16: { Chris@16: case detail::escape_mark: Chris@16: return detail::make_backref_xpression Chris@16: ( Chris@16: esc.mark_nbr_, this->traits_.flags(), this->rxtraits() Chris@16: ); Chris@16: case detail::escape_char: Chris@16: return detail::make_char_xpression Chris@16: ( Chris@16: esc.ch_, this->traits_.flags(), this->rxtraits() Chris@16: ); Chris@16: case detail::escape_class: Chris@16: return detail::make_posix_charset_xpression Chris@16: ( Chris@16: esc.class_ Chris@16: , this->is_upper_(*begin++) Chris@16: , this->traits_.flags() Chris@16: , this->rxtraits() Chris@16: ); Chris@16: } Chris@16: Chris@16: case token_group_begin: Chris@16: return this->parse_group(begin, end); Chris@16: Chris@16: case token_charset_begin: Chris@16: return this->parse_charset(begin, end); Chris@16: Chris@16: case token_invalid_quantifier: Chris@16: BOOST_THROW_EXCEPTION(regex_error(error_badrepeat, "quantifier not expected")); Chris@16: break; Chris@16: Chris@16: case token_quote_meta_begin: Chris@16: return detail::make_literal_xpression Chris@16: ( Chris@16: this->parse_quote_meta(begin, end), this->traits_.flags(), this->rxtraits() Chris@16: ); Chris@16: Chris@16: case token_quote_meta_end: Chris@16: BOOST_THROW_EXCEPTION( Chris@16: regex_error( Chris@16: error_escape Chris@16: , "found quote-meta end without corresponding quote-meta begin" Chris@16: ) Chris@16: ); Chris@16: break; Chris@16: Chris@16: case token_end_of_pattern: Chris@16: break; Chris@16: Chris@16: default: Chris@16: begin = old_begin; Chris@16: break; Chris@16: } Chris@16: Chris@16: return detail::sequence(); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // parse_quant Chris@16: /// INTERNAL ONLY Chris@16: template Chris@16: detail::sequence parse_quant(FwdIter &begin, FwdIter end) Chris@16: { Chris@16: BOOST_ASSERT(begin != end); Chris@16: detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ }; Chris@16: detail::sequence seq = this->parse_atom(begin, end); Chris@16: Chris@16: // BUGBUG this doesn't handle the degenerate (?:)+ correctly Chris@16: if(!seq.empty() && begin != end && detail::quant_none != seq.quant()) Chris@16: { Chris@16: if(this->traits_.get_quant_spec(begin, end, spec)) Chris@16: { Chris@16: BOOST_ASSERT(spec.min_ <= spec.max_); Chris@16: Chris@16: if(0 == spec.max_) // quant {0,0} is degenerate -- matches nothing. Chris@16: { Chris@16: seq = this->parse_quant(begin, end); Chris@16: } Chris@16: else Chris@16: { Chris@16: seq.repeat(spec); Chris@16: } Chris@16: } Chris@16: } Chris@16: Chris@16: return seq; Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // parse_sequence Chris@16: /// INTERNAL ONLY Chris@16: template Chris@16: detail::sequence parse_sequence(FwdIter &begin, FwdIter end) Chris@16: { Chris@16: detail::sequence seq; Chris@16: Chris@16: while(begin != end) Chris@16: { Chris@16: detail::sequence seq_quant = this->parse_quant(begin, end); Chris@16: Chris@16: // did we find a quantified atom? Chris@16: if(seq_quant.empty()) Chris@16: break; Chris@16: Chris@16: // chain it to the end of the xpression sequence Chris@16: seq += seq_quant; Chris@16: } Chris@16: Chris@16: return seq; Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // parse_literal Chris@16: // scan ahead looking for char literals to be globbed together into a string literal Chris@16: /// INTERNAL ONLY Chris@16: template Chris@16: string_type parse_literal(FwdIter &begin, FwdIter end) Chris@16: { Chris@16: using namespace regex_constants; Chris@16: BOOST_ASSERT(begin != end); Chris@16: BOOST_ASSERT(token_literal == this->traits_.get_token(begin, end)); Chris@16: escape_value esc = { 0, 0, 0, detail::escape_char }; Chris@16: string_type literal(1, *begin); Chris@16: Chris@16: for(FwdIter prev = begin, tmp = ++begin; begin != end; prev = begin, begin = tmp) Chris@16: { Chris@16: detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ }; Chris@16: if(this->traits_.get_quant_spec(tmp, end, spec)) Chris@16: { Chris@16: if(literal.size() != 1) Chris@16: { Chris@16: begin = prev; Chris@16: literal.erase(boost::prior(literal.end())); Chris@16: } Chris@16: return literal; Chris@16: } Chris@16: else switch(this->traits_.get_token(tmp, end)) Chris@16: { Chris@16: case token_escape: Chris@16: esc = this->parse_escape(tmp, end); Chris@16: if(detail::escape_char != esc.type_) return literal; Chris@16: literal.insert(literal.end(), esc.ch_); Chris@16: break; Chris@16: case token_literal: Chris@16: literal.insert(literal.end(), *tmp++); Chris@16: break; Chris@16: default: Chris@16: return literal; Chris@16: } Chris@16: } Chris@16: Chris@16: return literal; Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // parse_quote_meta Chris@16: // scan ahead looking for char literals to be globbed together into a string literal Chris@16: /// INTERNAL ONLY Chris@16: template Chris@16: string_type parse_quote_meta(FwdIter &begin, FwdIter end) Chris@16: { Chris@16: using namespace regex_constants; Chris@16: FwdIter old_begin = begin, old_end; Chris@16: while(end != (old_end = begin)) Chris@16: { Chris@16: switch(this->traits_.get_token(begin, end)) Chris@16: { Chris@16: case token_quote_meta_end: Chris@16: return string_type(old_begin, old_end); Chris@16: case token_escape: Chris@16: BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence"); Chris@16: BOOST_FALLTHROUGH; Chris@16: case token_invalid_quantifier: Chris@16: case token_literal: Chris@16: ++begin; Chris@16: break; Chris@16: default: Chris@16: break; Chris@16: } Chris@16: } Chris@16: return string_type(old_begin, begin); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // parse_escape Chris@16: /// INTERNAL ONLY Chris@16: template Chris@16: escape_value parse_escape(FwdIter &begin, FwdIter end) Chris@16: { Chris@16: BOOST_XPR_ENSURE_(begin != end, regex_constants::error_escape, "incomplete escape sequence"); Chris@16: Chris@16: // first, check to see if this can be a backreference Chris@16: if(0 < this->rxtraits().value(*begin, 10)) Chris@16: { Chris@16: // Parse at most 3 decimal digits. Chris@16: FwdIter tmp = begin; Chris@16: int mark_nbr = detail::toi(tmp, end, this->rxtraits(), 10, 999); Chris@16: Chris@16: // If the resulting number could conceivably be a backref, then it is. Chris@16: if(10 > mark_nbr || mark_nbr <= static_cast(this->mark_count_)) Chris@16: { Chris@16: begin = tmp; Chris@16: escape_value esc = {0, mark_nbr, 0, detail::escape_mark}; Chris@16: return esc; Chris@16: } Chris@16: } Chris@16: Chris@16: // Not a backreference, defer to the parse_escape helper Chris@16: return detail::parse_escape(begin, end, this->traits_); Chris@16: } Chris@16: Chris@16: bool is_upper_(char_type ch) const Chris@16: { Chris@16: return 0 != this->upper_ && this->rxtraits().isctype(ch, this->upper_); Chris@16: } Chris@16: Chris@16: std::size_t mark_count_; Chris@16: std::size_t hidden_mark_count_; Chris@16: CompilerTraits traits_; Chris@16: typename RegexTraits::char_class_type upper_; Chris@16: shared_ptr > self_; Chris@16: std::map > rules_; Chris@16: }; Chris@16: Chris@16: }} // namespace boost::xpressive Chris@16: Chris@16: #endif