Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // detail/dynamic/parser_traits.hpp Chris@16: // Chris@16: // Copyright 2008 Eric Niebler. Distributed under the Boost Chris@16: // Software License, Version 1.0. (See accompanying file Chris@16: // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Chris@16: Chris@16: #ifndef BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005 Chris@16: #define BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005 Chris@16: Chris@16: // MS compatible compilers support #pragma once Chris@101: #if defined(_MSC_VER) Chris@16: # pragma once Chris@16: #endif Chris@16: Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: Chris@16: namespace boost { namespace xpressive Chris@16: { Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // compiler_traits Chris@16: // this works for char and wchar_t. it must be specialized for anything else. Chris@16: // Chris@16: template Chris@16: struct compiler_traits Chris@16: { Chris@16: typedef RegexTraits regex_traits; Chris@16: typedef typename regex_traits::char_type char_type; Chris@16: typedef typename regex_traits::string_type string_type; Chris@16: typedef typename regex_traits::locale_type locale_type; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // constructor Chris@16: explicit compiler_traits(RegexTraits const &traits = RegexTraits()) Chris@16: : traits_(traits) Chris@16: , flags_(regex_constants::ECMAScript) Chris@16: , space_(lookup_classname(traits_, "space")) Chris@16: , alnum_(lookup_classname(traits_, "alnum")) Chris@16: { Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // flags Chris@16: regex_constants::syntax_option_type flags() const Chris@16: { Chris@16: return this->flags_; Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // flags Chris@16: void flags(regex_constants::syntax_option_type flags) Chris@16: { Chris@16: this->flags_ = flags; Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // traits Chris@16: regex_traits &traits() Chris@16: { Chris@16: return this->traits_; Chris@16: } Chris@16: Chris@16: regex_traits const &traits() const Chris@16: { Chris@16: return this->traits_; Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // imbue Chris@16: locale_type imbue(locale_type const &loc) Chris@16: { Chris@16: locale_type oldloc = this->traits().imbue(loc); Chris@16: this->space_ = lookup_classname(this->traits(), "space"); Chris@16: this->alnum_ = lookup_classname(this->traits(), "alnum"); Chris@16: return oldloc; Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // getloc Chris@16: locale_type getloc() const Chris@16: { Chris@16: return this->traits().getloc(); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // get_token Chris@16: // get a token and advance the iterator Chris@16: template Chris@16: regex_constants::compiler_token_type get_token(FwdIter &begin, FwdIter end) Chris@16: { Chris@16: using namespace regex_constants; Chris@16: if(this->eat_ws_(begin, end) == end) Chris@16: { Chris@16: return regex_constants::token_end_of_pattern; Chris@16: } Chris@16: Chris@16: switch(*begin) Chris@16: { Chris@16: case BOOST_XPR_CHAR_(char_type, '\\'): return this->get_escape_token(++begin, end); Chris@16: case BOOST_XPR_CHAR_(char_type, '.'): ++begin; return token_any; Chris@16: case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_assert_begin_line; Chris@16: case BOOST_XPR_CHAR_(char_type, '$'): ++begin; return token_assert_end_line; Chris@16: case BOOST_XPR_CHAR_(char_type, '('): ++begin; return token_group_begin; Chris@16: case BOOST_XPR_CHAR_(char_type, ')'): ++begin; return token_group_end; Chris@16: case BOOST_XPR_CHAR_(char_type, '|'): ++begin; return token_alternate; Chris@16: case BOOST_XPR_CHAR_(char_type, '['): ++begin; return token_charset_begin; Chris@16: Chris@16: case BOOST_XPR_CHAR_(char_type, '*'): Chris@16: case BOOST_XPR_CHAR_(char_type, '+'): Chris@16: case BOOST_XPR_CHAR_(char_type, '?'): Chris@16: return token_invalid_quantifier; Chris@16: Chris@16: case BOOST_XPR_CHAR_(char_type, ']'): Chris@16: case BOOST_XPR_CHAR_(char_type, '{'): Chris@16: default: Chris@16: return token_literal; Chris@16: } Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // get_quant_spec Chris@16: template Chris@16: bool get_quant_spec(FwdIter &begin, FwdIter end, detail::quant_spec &spec) Chris@16: { Chris@16: using namespace regex_constants; Chris@16: FwdIter old_begin; Chris@16: Chris@16: if(this->eat_ws_(begin, end) == end) Chris@16: { Chris@16: return false; Chris@16: } Chris@16: Chris@16: switch(*begin) Chris@16: { Chris@16: case BOOST_XPR_CHAR_(char_type, '*'): Chris@16: spec.min_ = 0; Chris@16: spec.max_ = (std::numeric_limits::max)(); Chris@16: break; Chris@16: Chris@16: case BOOST_XPR_CHAR_(char_type, '+'): Chris@16: spec.min_ = 1; Chris@16: spec.max_ = (std::numeric_limits::max)(); Chris@16: break; Chris@16: Chris@16: case BOOST_XPR_CHAR_(char_type, '?'): Chris@16: spec.min_ = 0; Chris@16: spec.max_ = 1; Chris@16: break; Chris@16: Chris@16: case BOOST_XPR_CHAR_(char_type, '{'): Chris@16: old_begin = this->eat_ws_(++begin, end); Chris@16: spec.min_ = spec.max_ = detail::toi(begin, end, this->traits()); Chris@16: BOOST_XPR_ENSURE_ Chris@16: ( Chris@16: begin != old_begin && begin != end, error_brace, "invalid quantifier" Chris@16: ); Chris@16: Chris@16: if(*begin == BOOST_XPR_CHAR_(char_type, ',')) Chris@16: { Chris@16: old_begin = this->eat_ws_(++begin, end); Chris@16: spec.max_ = detail::toi(begin, end, this->traits()); Chris@16: BOOST_XPR_ENSURE_ Chris@16: ( Chris@16: begin != end && BOOST_XPR_CHAR_(char_type, '}') == *begin Chris@16: , error_brace, "invalid quantifier" Chris@16: ); Chris@16: Chris@16: if(begin == old_begin) Chris@16: { Chris@16: spec.max_ = (std::numeric_limits::max)(); Chris@16: } Chris@16: else Chris@16: { Chris@16: BOOST_XPR_ENSURE_ Chris@16: ( Chris@16: spec.min_ <= spec.max_, error_badbrace, "invalid quantification range" Chris@16: ); Chris@16: } Chris@16: } Chris@16: else Chris@16: { Chris@16: BOOST_XPR_ENSURE_ Chris@16: ( Chris@16: BOOST_XPR_CHAR_(char_type, '}') == *begin, error_brace, "invalid quantifier" Chris@16: ); Chris@16: } Chris@16: break; Chris@16: Chris@16: default: Chris@16: return false; Chris@16: } Chris@16: Chris@16: spec.greedy_ = true; Chris@16: if(this->eat_ws_(++begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin) Chris@16: { Chris@16: ++begin; Chris@16: spec.greedy_ = false; Chris@16: } Chris@16: Chris@16: return true; Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // get_group_type Chris@16: template Chris@16: regex_constants::compiler_token_type get_group_type(FwdIter &begin, FwdIter end, string_type &name) Chris@16: { Chris@16: using namespace regex_constants; Chris@16: if(this->eat_ws_(begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin) Chris@16: { Chris@16: this->eat_ws_(++begin, end); Chris@16: BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension"); Chris@16: Chris@16: switch(*begin) Chris@16: { Chris@16: case BOOST_XPR_CHAR_(char_type, ':'): ++begin; return token_no_mark; Chris@16: case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_independent_sub_expression; Chris@16: case BOOST_XPR_CHAR_(char_type, '#'): ++begin; return token_comment; Chris@16: case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookahead; Chris@16: case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookahead; Chris@16: case BOOST_XPR_CHAR_(char_type, 'R'): ++begin; return token_recurse; Chris@16: case BOOST_XPR_CHAR_(char_type, '$'): Chris@16: this->get_name_(++begin, end, name); Chris@16: BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension"); Chris@16: if(BOOST_XPR_CHAR_(char_type, '=') == *begin) Chris@16: { Chris@16: ++begin; Chris@16: return token_rule_assign; Chris@16: } Chris@16: return token_rule_ref; Chris@16: Chris@16: case BOOST_XPR_CHAR_(char_type, '<'): Chris@16: this->eat_ws_(++begin, end); Chris@16: BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension"); Chris@16: switch(*begin) Chris@16: { Chris@16: case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookbehind; Chris@16: case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookbehind; Chris@16: default: Chris@16: BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension")); Chris@16: } Chris@16: Chris@16: case BOOST_XPR_CHAR_(char_type, 'P'): Chris@16: this->eat_ws_(++begin, end); Chris@16: BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension"); Chris@16: switch(*begin) Chris@16: { Chris@16: case BOOST_XPR_CHAR_(char_type, '<'): Chris@16: this->get_name_(++begin, end, name); Chris@16: BOOST_XPR_ENSURE_(begin != end && BOOST_XPR_CHAR_(char_type, '>') == *begin++, error_paren, "incomplete extension"); Chris@16: return token_named_mark; Chris@16: case BOOST_XPR_CHAR_(char_type, '='): Chris@16: this->get_name_(++begin, end, name); Chris@16: BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension"); Chris@16: return token_named_mark_ref; Chris@16: default: Chris@16: BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension")); Chris@16: } Chris@16: Chris@16: case BOOST_XPR_CHAR_(char_type, 'i'): Chris@16: case BOOST_XPR_CHAR_(char_type, 'm'): Chris@16: case BOOST_XPR_CHAR_(char_type, 's'): Chris@16: case BOOST_XPR_CHAR_(char_type, 'x'): Chris@16: case BOOST_XPR_CHAR_(char_type, '-'): Chris@16: return this->parse_mods_(begin, end); Chris@16: Chris@16: default: Chris@16: BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension")); Chris@16: } Chris@16: } Chris@16: Chris@16: return token_literal; Chris@16: } Chris@16: Chris@16: ////////////////////////////////////////////////////////////////////////// Chris@16: // get_charset_token Chris@16: // NOTE: white-space is *never* ignored in a charset. Chris@16: template Chris@16: regex_constants::compiler_token_type get_charset_token(FwdIter &begin, FwdIter end) Chris@16: { Chris@16: using namespace regex_constants; Chris@16: BOOST_ASSERT(begin != end); Chris@16: switch(*begin) Chris@16: { Chris@16: case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_charset_invert; Chris@16: case BOOST_XPR_CHAR_(char_type, '-'): ++begin; return token_charset_hyphen; Chris@16: case BOOST_XPR_CHAR_(char_type, ']'): ++begin; return token_charset_end; Chris@16: case BOOST_XPR_CHAR_(char_type, '['): Chris@16: { Chris@16: FwdIter next = begin; ++next; Chris@16: if(next != end) Chris@16: { Chris@16: BOOST_XPR_ENSURE_( Chris@16: *next != BOOST_XPR_CHAR_(char_type, '=') Chris@16: , error_collate Chris@16: , "equivalence classes are not yet supported" Chris@16: ); Chris@16: Chris@16: BOOST_XPR_ENSURE_( Chris@16: *next != BOOST_XPR_CHAR_(char_type, '.') Chris@16: , error_collate Chris@16: , "collation sequences are not yet supported" Chris@16: ); Chris@16: Chris@16: if(*next == BOOST_XPR_CHAR_(char_type, ':')) Chris@16: { Chris@16: begin = ++next; Chris@16: return token_posix_charset_begin; Chris@16: } Chris@16: } Chris@16: } Chris@16: break; Chris@16: case BOOST_XPR_CHAR_(char_type, ':'): Chris@16: { Chris@16: FwdIter next = begin; ++next; Chris@16: if(next != end && *next == BOOST_XPR_CHAR_(char_type, ']')) Chris@16: { Chris@16: begin = ++next; Chris@16: return token_posix_charset_end; Chris@16: } Chris@16: } Chris@16: break; Chris@16: case BOOST_XPR_CHAR_(char_type, '\\'): Chris@16: if(++begin != end) Chris@16: { Chris@16: switch(*begin) Chris@16: { Chris@16: case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_charset_backspace; Chris@16: default:; Chris@16: } Chris@16: } Chris@16: return token_escape; Chris@16: default:; Chris@16: } Chris@16: return token_literal; Chris@16: } Chris@16: Chris@16: ////////////////////////////////////////////////////////////////////////// Chris@16: // get_escape_token Chris@16: template Chris@16: regex_constants::compiler_token_type get_escape_token(FwdIter &begin, FwdIter end) Chris@16: { Chris@16: using namespace regex_constants; Chris@16: if(begin != end) Chris@16: { Chris@16: switch(*begin) Chris@16: { Chris@16: //case BOOST_XPR_CHAR_(char_type, 'a'): ++begin; return token_escape_bell; Chris@16: //case BOOST_XPR_CHAR_(char_type, 'c'): ++begin; return token_escape_control; Chris@16: //case BOOST_XPR_CHAR_(char_type, 'e'): ++begin; return token_escape_escape; Chris@16: //case BOOST_XPR_CHAR_(char_type, 'f'): ++begin; return token_escape_formfeed; Chris@16: //case BOOST_XPR_CHAR_(char_type, 'n'): ++begin; return token_escape_newline; Chris@16: //case BOOST_XPR_CHAR_(char_type, 't'): ++begin; return token_escape_horizontal_tab; Chris@16: //case BOOST_XPR_CHAR_(char_type, 'v'): ++begin; return token_escape_vertical_tab; Chris@16: case BOOST_XPR_CHAR_(char_type, 'A'): ++begin; return token_assert_begin_sequence; Chris@16: case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_assert_word_boundary; Chris@16: case BOOST_XPR_CHAR_(char_type, 'B'): ++begin; return token_assert_not_word_boundary; Chris@16: case BOOST_XPR_CHAR_(char_type, 'E'): ++begin; return token_quote_meta_end; Chris@16: case BOOST_XPR_CHAR_(char_type, 'Q'): ++begin; return token_quote_meta_begin; Chris@16: case BOOST_XPR_CHAR_(char_type, 'Z'): ++begin; return token_assert_end_sequence; Chris@16: // Non-standard extension to ECMAScript syntax Chris@16: case BOOST_XPR_CHAR_(char_type, '<'): ++begin; return token_assert_word_begin; Chris@16: case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_assert_word_end; Chris@16: default:; // fall-through Chris@16: } Chris@16: } Chris@16: Chris@16: return token_escape; Chris@16: } Chris@16: Chris@16: private: Chris@16: Chris@16: ////////////////////////////////////////////////////////////////////////// Chris@16: // parse_mods_ Chris@16: template Chris@16: regex_constants::compiler_token_type parse_mods_(FwdIter &begin, FwdIter end) Chris@16: { Chris@16: using namespace regex_constants; Chris@16: bool set = true; Chris@16: do switch(*begin) Chris@16: { Chris@16: case BOOST_XPR_CHAR_(char_type, 'i'): this->flag_(set, icase_); break; Chris@16: case BOOST_XPR_CHAR_(char_type, 'm'): this->flag_(!set, single_line); break; Chris@16: case BOOST_XPR_CHAR_(char_type, 's'): this->flag_(!set, not_dot_newline); break; Chris@16: case BOOST_XPR_CHAR_(char_type, 'x'): this->flag_(set, ignore_white_space); break; Chris@16: case BOOST_XPR_CHAR_(char_type, ':'): ++begin; BOOST_FALLTHROUGH; Chris@16: case BOOST_XPR_CHAR_(char_type, ')'): return token_no_mark; Chris@16: case BOOST_XPR_CHAR_(char_type, '-'): if(false == (set = !set)) break; BOOST_FALLTHROUGH; Chris@16: default: BOOST_THROW_EXCEPTION(regex_error(error_paren, "unknown pattern modifier")); Chris@16: } Chris@16: while(BOOST_XPR_ENSURE_(++begin != end, error_paren, "incomplete extension")); Chris@16: // this return is technically unreachable, but this must Chris@16: // be here to work around a bug in gcc 4.0 Chris@16: return token_no_mark; Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // flag_ Chris@16: void flag_(bool set, regex_constants::syntax_option_type flag) Chris@16: { Chris@16: this->flags_ = set ? (this->flags_ | flag) : (this->flags_ & ~flag); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // is_space_ Chris@16: bool is_space_(char_type ch) const Chris@16: { Chris@16: return 0 != this->space_ && this->traits().isctype(ch, this->space_); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // is_alnum_ Chris@16: bool is_alnum_(char_type ch) const Chris@16: { Chris@16: return 0 != this->alnum_ && this->traits().isctype(ch, this->alnum_); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // get_name_ Chris@16: template Chris@16: void get_name_(FwdIter &begin, FwdIter end, string_type &name) Chris@16: { Chris@16: this->eat_ws_(begin, end); Chris@16: for(name.clear(); begin != end && this->is_alnum_(*begin); ++begin) Chris@16: { Chris@16: name.push_back(*begin); Chris@16: } Chris@16: this->eat_ws_(begin, end); Chris@16: BOOST_XPR_ENSURE_(!name.empty(), regex_constants::error_paren, "incomplete extension"); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // eat_ws_ Chris@16: template Chris@16: FwdIter &eat_ws_(FwdIter &begin, FwdIter end) Chris@16: { Chris@16: if(0 != (regex_constants::ignore_white_space & this->flags())) Chris@16: { Chris@16: while(end != begin && (BOOST_XPR_CHAR_(char_type, '#') == *begin || this->is_space_(*begin))) Chris@16: { Chris@16: if(BOOST_XPR_CHAR_(char_type, '#') == *begin++) Chris@16: { Chris@16: while(end != begin && BOOST_XPR_CHAR_(char_type, '\n') != *begin++) {} Chris@16: } Chris@16: else Chris@16: { Chris@16: for(; end != begin && this->is_space_(*begin); ++begin) {} Chris@16: } Chris@16: } Chris@16: } Chris@16: Chris@16: return begin; Chris@16: } Chris@16: Chris@16: regex_traits traits_; Chris@16: regex_constants::syntax_option_type flags_; Chris@16: typename regex_traits::char_class_type space_; Chris@16: typename regex_traits::char_class_type alnum_; Chris@16: }; Chris@16: Chris@16: }} // namespace boost::xpressive Chris@16: Chris@16: #endif