Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \file regex_primitives.hpp Chris@16: /// Contains the syntax elements for writing static regular expressions. Chris@16: // Chris@16: // Copyright 2008 Eric Niebler. Distributed under the Boost Chris@16: // Software License, Version 1.0. (See accompanying file Chris@16: // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Chris@16: Chris@16: #ifndef BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005 Chris@16: #define BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005 Chris@16: Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: Chris@16: // Doxygen can't handle proto :-( Chris@16: #ifndef BOOST_XPRESSIVE_DOXYGEN_INVOKED Chris@16: # include Chris@16: # include Chris@16: # include Chris@16: # include Chris@16: # include Chris@16: # include Chris@16: #endif Chris@16: Chris@16: namespace boost { namespace xpressive { namespace detail Chris@16: { Chris@16: Chris@16: typedef assert_word_placeholder > assert_word_boundary; Chris@16: typedef assert_word_placeholder assert_word_begin; Chris@16: typedef assert_word_placeholder assert_word_end; Chris@16: Chris@16: // workaround msvc-7.1 bug with function pointer types Chris@16: // within function types: Chris@16: #if BOOST_WORKAROUND(BOOST_MSVC, == 1310) Chris@16: #define mark_number(x) proto::call Chris@16: #define minus_one() proto::make Chris@16: #endif Chris@16: Chris@16: struct push_back : proto::callable Chris@16: { Chris@16: typedef int result_type; Chris@16: Chris@16: template Chris@16: int operator ()(Subs &subs, int i) const Chris@16: { Chris@16: subs.push_back(i); Chris@16: return i; Chris@16: } Chris@16: }; Chris@16: Chris@16: struct mark_number : proto::callable Chris@16: { Chris@16: typedef int result_type; Chris@16: Chris@16: template Chris@16: int operator ()(Expr const &expr) const Chris@16: { Chris@16: return expr.mark_number_; Chris@16: } Chris@16: }; Chris@16: Chris@16: typedef mpl::int_<-1> minus_one; Chris@16: Chris@16: // s1 or -s1 Chris@16: struct SubMatch Chris@16: : proto::or_< Chris@16: proto::when Chris@16: , proto::when, push_back(proto::_data, minus_one()) > Chris@16: > Chris@16: {}; Chris@16: Chris@16: struct SubMatchList Chris@16: : proto::or_ > Chris@16: {}; Chris@16: Chris@16: template Chris@16: typename enable_if< Chris@16: mpl::and_, proto::matches > Chris@16: , std::vector Chris@16: >::type Chris@16: to_vector(Subs const &subs) Chris@16: { Chris@16: std::vector subs_; Chris@16: SubMatchList()(subs, 0, subs_); Chris@16: return subs_; Chris@16: } Chris@16: Chris@16: #if BOOST_WORKAROUND(BOOST_MSVC, == 1310) Chris@16: #undef mark_number Chris@16: #undef minus_one Chris@16: #endif Chris@16: Chris@16: // replace "Expr" with "keep(*State) >> Expr" Chris@16: struct skip_primitives : proto::transform Chris@16: { Chris@16: template Chris@16: struct impl : proto::transform_impl Chris@16: { Chris@16: typedef Chris@16: typename proto::shift_right< Chris@16: typename proto::unary_expr< Chris@16: keeper_tag Chris@16: , typename proto::dereference::type Chris@16: >::type Chris@16: , Expr Chris@16: >::type Chris@16: result_type; Chris@16: Chris@16: result_type operator ()( Chris@16: typename impl::expr_param expr Chris@16: , typename impl::state_param state Chris@16: , typename impl::data_param Chris@16: ) const Chris@16: { Chris@16: result_type that = {{{state}}, expr}; Chris@16: return that; Chris@16: } Chris@16: }; Chris@16: }; Chris@16: Chris@16: struct Primitives Chris@16: : proto::or_< Chris@16: proto::terminal Chris@16: , proto::comma Chris@16: , proto::subscript, proto::_> Chris@16: , proto::assign, proto::_> Chris@16: , proto::assign >, proto::_> Chris@16: , proto::complement Chris@16: > Chris@16: {}; Chris@16: Chris@16: struct SkipGrammar Chris@16: : proto::or_< Chris@16: proto::when Chris@16: , proto::assign, SkipGrammar> // don't "skip" mark tags Chris@16: , proto::subscript // don't put skips in actions Chris@16: , proto::binary_expr // don't skip modifiers Chris@16: , proto::unary_expr // don't skip lookbehinds Chris@16: , proto::nary_expr > // everything else is fair game! Chris@16: > Chris@16: {}; Chris@16: Chris@16: template Chris@16: struct skip_directive Chris@16: { Chris@16: typedef typename proto::result_of::as_expr::type skip_type; Chris@16: Chris@16: skip_directive(Skip const &skip) Chris@16: : skip_(proto::as_expr(skip)) Chris@16: {} Chris@16: Chris@16: template Chris@16: struct result {}; Chris@16: Chris@16: template Chris@16: struct result Chris@16: { Chris@16: typedef Chris@16: SkipGrammar::impl< Chris@16: typename proto::result_of::as_expr::type Chris@16: , skip_type const & Chris@16: , mpl::void_ & Chris@16: > Chris@16: skip_transform; Chris@16: Chris@16: typedef Chris@16: typename proto::shift_right< Chris@16: typename skip_transform::result_type Chris@16: , typename proto::dereference::type Chris@16: >::type Chris@16: type; Chris@16: }; Chris@16: Chris@16: template Chris@16: typename result::type Chris@16: operator ()(Expr const &expr) const Chris@16: { Chris@16: mpl::void_ ignore; Chris@16: typedef result result_fun; Chris@16: typename result_fun::type that = { Chris@16: typename result_fun::skip_transform()(proto::as_expr(expr), this->skip_, ignore) Chris@16: , {skip_} Chris@16: }; Chris@16: return that; Chris@16: } Chris@16: Chris@16: private: Chris@16: skip_type skip_; Chris@16: }; Chris@16: Chris@16: /* Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// INTERNAL ONLY Chris@16: // BOOST_XPRESSIVE_GLOBAL Chris@16: // for defining globals that neither violate the One Definition Rule nor Chris@16: // lead to undefined behavior due to global object initialization order. Chris@16: //#define BOOST_XPRESSIVE_GLOBAL(type, name, init) \ Chris@16: // namespace detail \ Chris@16: // { \ Chris@16: // template \ Chris@16: // struct BOOST_PP_CAT(global_pod_, name) \ Chris@16: // { \ Chris@16: // static type const value; \ Chris@16: // private: \ Chris@16: // union type_must_be_pod \ Chris@16: // { \ Chris@16: // type t; \ Chris@16: // char ch; \ Chris@16: // } u; \ Chris@16: // }; \ Chris@16: // template \ Chris@16: // type const BOOST_PP_CAT(global_pod_, name)::value = init; \ Chris@16: // } \ Chris@16: // type const &name = detail::BOOST_PP_CAT(global_pod_, name)<0>::value Chris@16: */ Chris@16: Chris@16: Chris@16: } // namespace detail Chris@16: Chris@16: /// INTERNAL ONLY (for backwards compatibility) Chris@16: unsigned int const repeat_max = UINT_MAX-1; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief For infinite repetition of a sub-expression. Chris@16: /// Chris@16: /// Magic value used with the repeat\<\>() function template Chris@16: /// to specify an unbounded repeat. Use as: repeat<17, inf>('a'). Chris@16: /// The equivalent in perl is /a{17,}/. Chris@16: unsigned int const inf = UINT_MAX-1; Chris@16: Chris@16: /// INTERNAL ONLY (for backwards compatibility) Chris@16: proto::terminal::type const epsilon = {{}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Successfully matches nothing. Chris@16: /// Chris@16: /// Successfully matches a zero-width sequence. nil always succeeds and Chris@16: /// never consumes any characters. Chris@16: proto::terminal::type const nil = {{}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches an alpha-numeric character. Chris@16: /// Chris@16: /// The regex traits are used to determine which characters are alpha-numeric. Chris@16: /// To match any character that is not alpha-numeric, use ~alnum. Chris@16: /// Chris@16: /// \attention alnum is equivalent to /[[:alnum:]]/ in perl. ~alnum is equivalent Chris@16: /// to /[[:^alnum:]]/ in perl. Chris@16: proto::terminal::type const alnum = {{"alnum", false}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches an alphabetic character. Chris@16: /// Chris@16: /// The regex traits are used to determine which characters are alphabetic. Chris@16: /// To match any character that is not alphabetic, use ~alpha. Chris@16: /// Chris@16: /// \attention alpha is equivalent to /[[:alpha:]]/ in perl. ~alpha is equivalent Chris@16: /// to /[[:^alpha:]]/ in perl. Chris@16: proto::terminal::type const alpha = {{"alpha", false}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches a blank (horizonal white-space) character. Chris@16: /// Chris@16: /// The regex traits are used to determine which characters are blank characters. Chris@16: /// To match any character that is not blank, use ~blank. Chris@16: /// Chris@16: /// \attention blank is equivalent to /[[:blank:]]/ in perl. ~blank is equivalent Chris@16: /// to /[[:^blank:]]/ in perl. Chris@16: proto::terminal::type const blank = {{"blank", false}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches a control character. Chris@16: /// Chris@16: /// The regex traits are used to determine which characters are control characters. Chris@16: /// To match any character that is not a control character, use ~cntrl. Chris@16: /// Chris@16: /// \attention cntrl is equivalent to /[[:cntrl:]]/ in perl. ~cntrl is equivalent Chris@16: /// to /[[:^cntrl:]]/ in perl. Chris@16: proto::terminal::type const cntrl = {{"cntrl", false}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches a digit character. Chris@16: /// Chris@16: /// The regex traits are used to determine which characters are digits. Chris@16: /// To match any character that is not a digit, use ~digit. Chris@16: /// Chris@16: /// \attention digit is equivalent to /[[:digit:]]/ in perl. ~digit is equivalent Chris@16: /// to /[[:^digit:]]/ in perl. Chris@16: proto::terminal::type const digit = {{"digit", false}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches a graph character. Chris@16: /// Chris@16: /// The regex traits are used to determine which characters are graphable. Chris@16: /// To match any character that is not graphable, use ~graph. Chris@16: /// Chris@16: /// \attention graph is equivalent to /[[:graph:]]/ in perl. ~graph is equivalent Chris@16: /// to /[[:^graph:]]/ in perl. Chris@16: proto::terminal::type const graph = {{"graph", false}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches a lower-case character. Chris@16: /// Chris@16: /// The regex traits are used to determine which characters are lower-case. Chris@16: /// To match any character that is not a lower-case character, use ~lower. Chris@16: /// Chris@16: /// \attention lower is equivalent to /[[:lower:]]/ in perl. ~lower is equivalent Chris@16: /// to /[[:^lower:]]/ in perl. Chris@16: proto::terminal::type const lower = {{"lower", false}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches a printable character. Chris@16: /// Chris@16: /// The regex traits are used to determine which characters are printable. Chris@16: /// To match any character that is not printable, use ~print. Chris@16: /// Chris@16: /// \attention print is equivalent to /[[:print:]]/ in perl. ~print is equivalent Chris@16: /// to /[[:^print:]]/ in perl. Chris@16: proto::terminal::type const print = {{"print", false}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches a punctuation character. Chris@16: /// Chris@16: /// The regex traits are used to determine which characters are punctuation. Chris@16: /// To match any character that is not punctuation, use ~punct. Chris@16: /// Chris@16: /// \attention punct is equivalent to /[[:punct:]]/ in perl. ~punct is equivalent Chris@16: /// to /[[:^punct:]]/ in perl. Chris@16: proto::terminal::type const punct = {{"punct", false}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches a space character. Chris@16: /// Chris@16: /// The regex traits are used to determine which characters are space characters. Chris@16: /// To match any character that is not white-space, use ~space. Chris@16: /// Chris@16: /// \attention space is equivalent to /[[:space:]]/ in perl. ~space is equivalent Chris@16: /// to /[[:^space:]]/ in perl. Chris@16: proto::terminal::type const space = {{"space", false}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches an upper-case character. Chris@16: /// Chris@16: /// The regex traits are used to determine which characters are upper-case. Chris@16: /// To match any character that is not upper-case, use ~upper. Chris@16: /// Chris@16: /// \attention upper is equivalent to /[[:upper:]]/ in perl. ~upper is equivalent Chris@16: /// to /[[:^upper:]]/ in perl. Chris@16: proto::terminal::type const upper = {{"upper", false}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches a hexadecimal digit character. Chris@16: /// Chris@16: /// The regex traits are used to determine which characters are hex digits. Chris@16: /// To match any character that is not a hex digit, use ~xdigit. Chris@16: /// Chris@16: /// \attention xdigit is equivalent to /[[:xdigit:]]/ in perl. ~xdigit is equivalent Chris@16: /// to /[[:^xdigit:]]/ in perl. Chris@16: proto::terminal::type const xdigit = {{"xdigit", false}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Beginning of sequence assertion. Chris@16: /// Chris@16: /// For the character sequence [begin, end), 'bos' matches the Chris@16: /// zero-width sub-sequence [begin, begin). Chris@16: proto::terminal::type const bos = {{}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief End of sequence assertion. Chris@16: /// Chris@16: /// For the character sequence [begin, end), Chris@16: /// 'eos' matches the zero-width sub-sequence [end, end). Chris@16: /// Chris@16: /// \attention Unlike the perl end of sequence assertion \$, 'eos' will Chris@16: /// not match at the position [end-1, end-1) if *(end-1) is '\\n'. To Chris@16: /// get that behavior, use (!_n >> eos). Chris@16: proto::terminal::type const eos = {{}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Beginning of line assertion. Chris@16: /// Chris@16: /// 'bol' matches the zero-width sub-sequence Chris@16: /// immediately following a logical newline sequence. The regex traits Chris@16: /// is used to determine what constitutes a logical newline sequence. Chris@16: proto::terminal::type const bol = {{}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief End of line assertion. Chris@16: /// Chris@16: /// 'eol' matches the zero-width sub-sequence Chris@16: /// immediately preceeding a logical newline sequence. The regex traits Chris@16: /// is used to determine what constitutes a logical newline sequence. Chris@16: proto::terminal::type const eol = {{}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Beginning of word assertion. Chris@16: /// Chris@16: /// 'bow' matches the zero-width sub-sequence Chris@16: /// immediately following a non-word character and preceeding a word character. Chris@16: /// The regex traits are used to determine what constitutes a word character. Chris@16: proto::terminal::type const bow = {{}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief End of word assertion. Chris@16: /// Chris@16: /// 'eow' matches the zero-width sub-sequence Chris@16: /// immediately following a word character and preceeding a non-word character. Chris@16: /// The regex traits are used to determine what constitutes a word character. Chris@16: proto::terminal::type const eow = {{}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Word boundary assertion. Chris@16: /// Chris@16: /// '_b' matches the zero-width sub-sequence at the beginning or the end of a word. Chris@16: /// It is equivalent to (bow | eow). The regex traits are used to determine what Chris@16: /// constitutes a word character. To match a non-word boundary, use ~_b. Chris@16: /// Chris@16: /// \attention _b is like \\b in perl. ~_b is like \\B in perl. Chris@16: proto::terminal::type const _b = {{}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches a word character. Chris@16: /// Chris@16: /// '_w' matches a single word character. The regex traits are used to determine which Chris@16: /// characters are word characters. Use ~_w to match a character that is not a word Chris@16: /// character. Chris@16: /// Chris@16: /// \attention _w is like \\w in perl. ~_w is like \\W in perl. Chris@16: proto::terminal::type const _w = {{"w", false}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches a digit character. Chris@16: /// Chris@16: /// '_d' matches a single digit character. The regex traits are used to determine which Chris@16: /// characters are digits. Use ~_d to match a character that is not a digit Chris@16: /// character. Chris@16: /// Chris@16: /// \attention _d is like \\d in perl. ~_d is like \\D in perl. Chris@16: proto::terminal::type const _d = {{"d", false}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches a space character. Chris@16: /// Chris@16: /// '_s' matches a single space character. The regex traits are used to determine which Chris@16: /// characters are space characters. Use ~_s to match a character that is not a space Chris@16: /// character. Chris@16: /// Chris@16: /// \attention _s is like \\s in perl. ~_s is like \\S in perl. Chris@16: proto::terminal::type const _s = {{"s", false}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches a literal newline character, '\\n'. Chris@16: /// Chris@16: /// '_n' matches a single newline character, '\\n'. Use ~_n to match a character Chris@16: /// that is not a newline. Chris@16: /// Chris@16: /// \attention ~_n is like '.' in perl without the /s modifier. Chris@16: proto::terminal::type const _n = {'\n'}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches a logical newline sequence. Chris@16: /// Chris@16: /// '_ln' matches a logical newline sequence. This can be any character in the Chris@16: /// line separator class, as determined by the regex traits, or the '\\r\\n' sequence. Chris@16: /// For the purpose of back-tracking, '\\r\\n' is treated as a unit. Chris@16: /// To match any one character that is not a logical newline, use ~_ln. Chris@16: detail::logical_newline_xpression const _ln = {{}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Matches any one character. Chris@16: /// Chris@16: /// Match any character, similar to '.' in perl syntax with the /s modifier. Chris@16: /// '_' matches any one character, including the newline. Chris@16: /// Chris@16: /// \attention To match any character except the newline, use ~_n Chris@16: proto::terminal::type const _ = {{}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Reference to the current regex object Chris@16: /// Chris@16: /// Useful when constructing recursive regular expression objects. The 'self' Chris@16: /// identifier is a short-hand for the current regex object. For instance, Chris@16: /// sregex rx = '(' >> (self | nil) >> ')'; will create a regex object that Chris@16: /// matches balanced parens such as "((()))". Chris@16: proto::terminal::type const self = {{}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Used to create character sets. Chris@16: /// Chris@16: /// There are two ways to create character sets with the 'set' identifier. The Chris@16: /// easiest is to create a comma-separated list of the characters in the set, Chris@16: /// as in (set= 'a','b','c'). This set will match 'a', 'b', or 'c'. The other Chris@16: /// way is to define the set as an argument to the set subscript operator. Chris@16: /// For instance, set[ 'a' | range('b','c') | digit ] will match an 'a', 'b', Chris@16: /// 'c' or a digit character. Chris@16: /// Chris@16: /// To complement a set, apply the '~' operator. For instance, ~(set= 'a','b','c') Chris@16: /// will match any character that is not an 'a', 'b', or 'c'. Chris@16: /// Chris@16: /// Sets can be composed of other, possibly complemented, sets. For instance, Chris@16: /// set[ ~digit | ~(set= 'a','b','c') ]. Chris@16: detail::set_initializer_type const set = {{}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Sub-match placeholder type, used to create named captures in Chris@16: /// static regexes. Chris@16: /// Chris@16: /// \c mark_tag is the type of the global sub-match placeholders \c s0, \c s1, etc.. You Chris@16: /// can use the \c mark_tag type to create your own sub-match placeholders with Chris@16: /// more meaningful names. This is roughly equivalent to the "named capture" Chris@16: /// feature of dynamic regular expressions. Chris@16: /// Chris@16: /// To create a named sub-match placeholder, initialize it with a unique integer. Chris@16: /// The integer must only be unique within the regex in which the placeholder Chris@16: /// is used. Then you can use it within static regexes to created sub-matches Chris@16: /// by assigning a sub-expression to it, or to refer back to already created Chris@16: /// sub-matches. Chris@16: /// Chris@16: /// \code Chris@16: /// mark_tag number(1); // "number" is now equivalent to "s1" Chris@16: /// // Match a number, followed by a space and the same number again Chris@16: /// sregex rx = (number = +_d) >> ' ' >> number; Chris@16: /// \endcode Chris@16: /// Chris@16: /// After a successful \c regex_match() or \c regex_search(), the sub-match placeholder Chris@16: /// can be used to index into the match_results\<\> object to retrieve the Chris@16: /// corresponding sub-match. Chris@16: struct mark_tag Chris@16: : proto::extends Chris@16: { Chris@16: private: Chris@16: typedef proto::extends base_type; Chris@16: Chris@16: static detail::basic_mark_tag make_tag(int mark_nbr) Chris@16: { Chris@16: detail::basic_mark_tag mark = {{mark_nbr}}; Chris@16: return mark; Chris@16: } Chris@16: Chris@16: public: Chris@16: /// \brief Initialize a mark_tag placeholder Chris@16: /// \param mark_nbr An integer that uniquely identifies this \c mark_tag Chris@16: /// within the static regexes in which this \c mark_tag will be used. Chris@16: /// \pre mark_nbr \> 0 Chris@16: mark_tag(int mark_nbr) Chris@16: : base_type(mark_tag::make_tag(mark_nbr)) Chris@16: { Chris@16: // Marks numbers must be integers greater than 0. Chris@16: BOOST_ASSERT(mark_nbr > 0); Chris@16: } Chris@16: Chris@16: /// INTERNAL ONLY Chris@16: operator detail::basic_mark_tag const &() const Chris@16: { Chris@16: return this->proto_base(); Chris@16: } Chris@16: Chris@16: BOOST_PROTO_EXTENDS_USING_ASSIGN_NON_DEPENDENT(mark_tag) Chris@16: }; Chris@16: Chris@16: // This macro is used when declaring mark_tags that are global because Chris@16: // it guarantees that they are statically initialized. That avoids Chris@16: // order-of-initialization bugs. In user code, the simpler: mark_tag s0(0); Chris@16: // would be preferable. Chris@16: /// INTERNAL ONLY Chris@16: #define BOOST_XPRESSIVE_GLOBAL_MARK_TAG(NAME, VALUE) \ Chris@16: boost::xpressive::mark_tag::proto_base_expr const NAME = {{VALUE}} \ Chris@16: /**/ Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Sub-match placeholder, like $& in Perl Chris@16: BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s0, 0); Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Sub-match placeholder, like $1 in perl. Chris@16: /// Chris@16: /// To create a sub-match, assign a sub-expression to the sub-match placeholder. Chris@16: /// For instance, (s1= _) will match any one character and remember which Chris@16: /// character was matched in the 1st sub-match. Later in the pattern, you can Chris@16: /// refer back to the sub-match. For instance, (s1= _) >> s1 will match any Chris@16: /// character, and then match the same character again. Chris@16: /// Chris@16: /// After a successful regex_match() or regex_search(), the sub-match placeholders Chris@16: /// can be used to index into the match_results\<\> object to retrieve the Nth Chris@16: /// sub-match. Chris@16: BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s1, 1); Chris@16: BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s2, 2); Chris@16: BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s3, 3); Chris@16: BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s4, 4); Chris@16: BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s5, 5); Chris@16: BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s6, 6); Chris@16: BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s7, 7); Chris@16: BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s8, 8); Chris@16: BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s9, 9); Chris@16: Chris@16: // NOTE: For the purpose of xpressive's documentation, make icase() look like an Chris@16: // ordinary function. In reality, it is a function object defined in detail/icase.hpp Chris@16: // so that it can serve double-duty as regex_constants::icase, the syntax_option_type. Chris@16: #ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Makes a sub-expression case-insensitive. Chris@16: /// Chris@16: /// Use icase() to make a sub-expression case-insensitive. For instance, Chris@16: /// "foo" >> icase(set['b'] >> "ar") will match "foo" exactly followed by Chris@16: /// "bar" irrespective of case. Chris@16: template detail::unspecified icase(Expr const &expr) { return 0; } Chris@16: #endif Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Makes a literal into a regular expression. Chris@16: /// Chris@16: /// Use as_xpr() to turn a literal into a regular expression. For instance, Chris@16: /// "foo" >> "bar" will not compile because both operands to the right-shift Chris@16: /// operator are const char*, and no such operator exists. Use as_xpr("foo") >> "bar" Chris@16: /// instead. Chris@16: /// Chris@16: /// You can use as_xpr() with character literals in addition to string literals. Chris@16: /// For instance, as_xpr('a') will match an 'a'. You can also complement a Chris@16: /// character literal, as with ~as_xpr('a'). This will match any one character Chris@16: /// that is not an 'a'. Chris@16: #ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED Chris@16: template detail::unspecified as_xpr(Literal const &literal) { return 0; } Chris@16: #else Chris@16: proto::functional::as_expr<> const as_xpr = {}; Chris@16: #endif Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Embed a regex object by reference. Chris@16: /// Chris@16: /// \param rex The basic_regex object to embed by reference. Chris@16: template Chris@16: inline typename proto::terminal const> >::type const Chris@16: by_ref(basic_regex const &rex) Chris@16: { Chris@16: reference_wrapper const> ref(rex); Chris@16: return proto::terminal const> >::type::make(ref); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Match a range of characters. Chris@16: /// Chris@16: /// Match any character in the range [ch_min, ch_max]. Chris@16: /// Chris@16: /// \param ch_min The lower end of the range to match. Chris@16: /// \param ch_max The upper end of the range to match. Chris@16: template Chris@16: inline typename proto::terminal >::type const Chris@16: range(Char ch_min, Char ch_max) Chris@16: { Chris@16: detail::range_placeholder that = {ch_min, ch_max, false}; Chris@16: return proto::terminal >::type::make(that); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Make a sub-expression optional. Equivalent to !as_xpr(expr). Chris@16: /// Chris@16: /// \param expr The sub-expression to make optional. Chris@16: template Chris@16: typename proto::result_of::make_expr< Chris@16: proto::tag::logical_not Chris@16: , proto::default_domain Chris@16: , Expr const & Chris@16: >::type const Chris@16: optional(Expr const &expr) Chris@16: { Chris@16: return proto::make_expr< Chris@16: proto::tag::logical_not Chris@16: , proto::default_domain Chris@16: >(boost::ref(expr)); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Repeat a sub-expression multiple times. Chris@16: /// Chris@16: /// There are two forms of the repeat\<\>() function template. To match a Chris@16: /// sub-expression N times, use repeat\(expr). To match a sub-expression Chris@16: /// from M to N times, use repeat\(expr). Chris@16: /// Chris@16: /// The repeat\<\>() function creates a greedy quantifier. To make the quantifier Chris@16: /// non-greedy, apply the unary minus operator, as in -repeat\(expr). Chris@16: /// Chris@16: /// \param expr The sub-expression to repeat. Chris@16: template Chris@16: typename proto::result_of::make_expr< Chris@16: detail::generic_quant_tag Chris@16: , proto::default_domain Chris@16: , Expr const & Chris@16: >::type const Chris@16: repeat(Expr const &expr) Chris@16: { Chris@16: return proto::make_expr< Chris@16: detail::generic_quant_tag Chris@16: , proto::default_domain Chris@16: >(boost::ref(expr)); Chris@16: } Chris@16: Chris@16: /// \overload Chris@16: /// Chris@16: template Chris@16: typename proto::result_of::make_expr< Chris@16: detail::generic_quant_tag Chris@16: , proto::default_domain Chris@16: , Expr2 const & Chris@16: >::type const Chris@16: repeat(Expr2 const &expr2) Chris@16: { Chris@16: return proto::make_expr< Chris@16: detail::generic_quant_tag Chris@16: , proto::default_domain Chris@16: >(boost::ref(expr2)); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Create an independent sub-expression. Chris@16: /// Chris@16: /// Turn off back-tracking for a sub-expression. Any branches or repeats within Chris@16: /// the sub-expression will match only one way, and no other alternatives are Chris@16: /// tried. Chris@16: /// Chris@16: /// \attention keep(expr) is equivalent to the perl (?>...) extension. Chris@16: /// Chris@16: /// \param expr The sub-expression to modify. Chris@16: template Chris@16: typename proto::result_of::make_expr< Chris@16: detail::keeper_tag Chris@16: , proto::default_domain Chris@16: , Expr const & Chris@16: >::type const Chris@16: keep(Expr const &expr) Chris@16: { Chris@16: return proto::make_expr< Chris@16: detail::keeper_tag Chris@16: , proto::default_domain Chris@16: >(boost::ref(expr)); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Look-ahead assertion. Chris@16: /// Chris@16: /// before(expr) succeeds if the expr sub-expression would match at the current Chris@16: /// position in the sequence, but expr is not included in the match. For instance, Chris@16: /// before("foo") succeeds if we are before a "foo". Look-ahead assertions can be Chris@16: /// negated with the bit-compliment operator. Chris@16: /// Chris@16: /// \attention before(expr) is equivalent to the perl (?=...) extension. Chris@16: /// ~before(expr) is a negative look-ahead assertion, equivalent to the Chris@16: /// perl (?!...) extension. Chris@16: /// Chris@16: /// \param expr The sub-expression to put in the look-ahead assertion. Chris@16: template Chris@16: typename proto::result_of::make_expr< Chris@16: detail::lookahead_tag Chris@16: , proto::default_domain Chris@16: , Expr const & Chris@16: >::type const Chris@16: before(Expr const &expr) Chris@16: { Chris@16: return proto::make_expr< Chris@16: detail::lookahead_tag Chris@16: , proto::default_domain Chris@16: >(boost::ref(expr)); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Look-behind assertion. Chris@16: /// Chris@16: /// after(expr) succeeds if the expr sub-expression would match at the current Chris@16: /// position minus N in the sequence, where N is the width of expr. expr is not included in Chris@16: /// the match. For instance, after("foo") succeeds if we are after a "foo". Look-behind Chris@16: /// assertions can be negated with the bit-complement operator. Chris@16: /// Chris@16: /// \attention after(expr) is equivalent to the perl (?<=...) extension. Chris@16: /// ~after(expr) is a negative look-behind assertion, equivalent to the Chris@16: /// perl (? Chris@16: typename proto::result_of::make_expr< Chris@16: detail::lookbehind_tag Chris@16: , proto::default_domain Chris@16: , Expr const & Chris@16: >::type const Chris@16: after(Expr const &expr) Chris@16: { Chris@16: return proto::make_expr< Chris@16: detail::lookbehind_tag Chris@16: , proto::default_domain Chris@16: >(boost::ref(expr)); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Specify a regex traits or a std::locale. Chris@16: /// Chris@16: /// imbue() instructs the regex engine to use the specified traits or locale Chris@16: /// when matching the regex. The entire expression must use the same traits/locale. Chris@16: /// For instance, the following specifies a locale for use with a regex: Chris@16: /// std::locale loc; Chris@16: /// sregex rx = imbue(loc)(+digit); Chris@16: /// Chris@16: /// \param loc The std::locale or regex traits object. Chris@16: template Chris@16: inline detail::modifier_op > const Chris@16: imbue(Locale const &loc) Chris@16: { Chris@16: detail::modifier_op > mod = Chris@16: { Chris@16: detail::locale_modifier(loc) Chris@16: , regex_constants::ECMAScript Chris@16: }; Chris@16: return mod; Chris@16: } Chris@16: Chris@16: proto::terminal > >::type const a1 = {{}}; Chris@16: proto::terminal > >::type const a2 = {{}}; Chris@16: proto::terminal > >::type const a3 = {{}}; Chris@16: proto::terminal > >::type const a4 = {{}}; Chris@16: proto::terminal > >::type const a5 = {{}}; Chris@16: proto::terminal > >::type const a6 = {{}}; Chris@16: proto::terminal > >::type const a7 = {{}}; Chris@16: proto::terminal > >::type const a8 = {{}}; Chris@16: proto::terminal > >::type const a9 = {{}}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \brief Specify which characters to skip when matching a regex. Chris@16: /// Chris@16: /// skip() instructs the regex engine to skip certain characters when matching Chris@16: /// a regex. It is most useful for writing regexes that ignore whitespace. Chris@16: /// For instance, the following specifies a regex that skips whitespace and Chris@16: /// punctuation: Chris@16: /// Chris@16: /// \code Chris@16: /// // A sentence is one or more words separated by whitespace Chris@16: /// // and punctuation. Chris@16: /// sregex word = +alpha; Chris@16: /// sregex sentence = skip(set[_s | punct])( +word ); Chris@16: /// \endcode Chris@16: /// Chris@16: /// The way it works in the above example is to insert Chris@16: /// keep(*set[_s | punct]) before each primitive within the regex. Chris@16: /// A "primitive" includes terminals like strings, character sets and nested Chris@16: /// regexes. A final *set[_s | punct] is added to the end of the Chris@16: /// regex. The regex sentence specified above is equivalent to Chris@16: /// the following: Chris@16: /// Chris@16: /// \code Chris@16: /// sregex sentence = +( keep(*set[_s | punct]) >> word ) Chris@16: /// >> *set[_s | punct]; Chris@16: /// \endcode Chris@16: /// Chris@16: /// \attention Skipping does not affect how nested regexes are handled because Chris@16: /// they are treated atomically. String literals are also treated Chris@16: /// atomically; that is, no skipping is done within a string literal. So Chris@16: /// skip(_s)("this that") is not the same as Chris@16: /// skip(_s)("this" >> as_xpr("that")). The first will only match Chris@16: /// when there is only one space between "this" and "that". The second will Chris@16: /// skip any and all whitespace between "this" and "that". Chris@16: /// Chris@16: /// \param skip A regex that specifies which characters to skip. Chris@16: template Chris@16: detail::skip_directive skip(Skip const &skip) Chris@16: { Chris@16: return detail::skip_directive(skip); Chris@16: } Chris@16: Chris@16: namespace detail Chris@16: { Chris@16: inline void ignore_unused_regex_primitives() Chris@16: { Chris@16: detail::ignore_unused(repeat_max); Chris@16: detail::ignore_unused(inf); Chris@16: detail::ignore_unused(epsilon); Chris@16: detail::ignore_unused(nil); Chris@16: detail::ignore_unused(alnum); Chris@16: detail::ignore_unused(bos); Chris@16: detail::ignore_unused(eos); Chris@16: detail::ignore_unused(bol); Chris@16: detail::ignore_unused(eol); Chris@16: detail::ignore_unused(bow); Chris@16: detail::ignore_unused(eow); Chris@16: detail::ignore_unused(_b); Chris@16: detail::ignore_unused(_w); Chris@16: detail::ignore_unused(_d); Chris@16: detail::ignore_unused(_s); Chris@16: detail::ignore_unused(_n); Chris@16: detail::ignore_unused(_ln); Chris@16: detail::ignore_unused(_); Chris@16: detail::ignore_unused(self); Chris@16: detail::ignore_unused(set); Chris@16: detail::ignore_unused(s0); Chris@16: detail::ignore_unused(s1); Chris@16: detail::ignore_unused(s2); Chris@16: detail::ignore_unused(s3); Chris@16: detail::ignore_unused(s4); Chris@16: detail::ignore_unused(s5); Chris@16: detail::ignore_unused(s6); Chris@16: detail::ignore_unused(s7); Chris@16: detail::ignore_unused(s8); Chris@16: detail::ignore_unused(s9); Chris@16: detail::ignore_unused(a1); Chris@16: detail::ignore_unused(a2); Chris@16: detail::ignore_unused(a3); Chris@16: detail::ignore_unused(a4); Chris@16: detail::ignore_unused(a5); Chris@16: detail::ignore_unused(a6); Chris@16: detail::ignore_unused(a7); Chris@16: detail::ignore_unused(a8); Chris@16: detail::ignore_unused(a9); Chris@16: detail::ignore_unused(as_xpr); Chris@16: } Chris@16: } Chris@16: Chris@16: }} // namespace boost::xpressive Chris@16: Chris@16: #endif