Chris@16: // Copyright (c) 2001-2011 Hartmut Kaiser Chris@16: // Chris@16: // Distributed under the Boost Software License, Version 1.0. (See accompanying Chris@16: // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Chris@16: Chris@16: #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM) Chris@16: #define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM Chris@16: Chris@16: #if defined(_MSC_VER) Chris@16: #pragma once Chris@16: #endif Chris@16: Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: Chris@16: namespace boost { namespace spirit { namespace lex Chris@16: { Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: namespace detail Chris@16: { Chris@16: /////////////////////////////////////////////////////////////////////// Chris@16: template Chris@16: struct lexer_def_ Chris@16: : proto::extends< Chris@16: typename proto::terminal< Chris@16: lex::reference const> Chris@16: >::type Chris@16: , lexer_def_ > Chris@16: , qi::parser > Chris@16: , lex::lexer_type > Chris@16: { Chris@16: private: Chris@16: // avoid warnings about using 'this' in constructor Chris@16: lexer_def_& this_() { return *this; } Chris@16: Chris@16: typedef typename LexerDef::char_type char_type; Chris@16: typedef typename LexerDef::string_type string_type; Chris@16: typedef typename LexerDef::id_type id_type; Chris@16: Chris@16: typedef lex::reference reference_; Chris@16: typedef typename proto::terminal::type terminal_type; Chris@16: typedef proto::extends proto_base_type; Chris@16: Chris@16: reference_ alias() const Chris@16: { Chris@16: return reference_(*this); Chris@16: } Chris@16: Chris@16: public: Chris@16: // Qi interface: metafunction calculating parser attribute type Chris@16: template Chris@16: struct attribute Chris@16: { Chris@16: // the return value of a token set contains the matched token Chris@16: // id, and the corresponding pair of iterators Chris@16: typedef typename Iterator::base_iterator_type iterator_type; Chris@16: typedef Chris@16: fusion::vector2 > Chris@16: type; Chris@16: }; Chris@16: Chris@16: // Qi interface: parse functionality Chris@16: template Chris@16: bool parse(Iterator& first, Iterator const& last Chris@16: , Context& /*context*/, Skipper const& skipper Chris@16: , Attribute& attr) const Chris@16: { Chris@16: qi::skip_over(first, last, skipper); // always do a pre-skip Chris@16: Chris@16: if (first != last) { Chris@16: typedef typename Chris@16: boost::detail::iterator_traits::value_type Chris@16: token_type; Chris@16: Chris@16: token_type const& t = *first; Chris@16: if (token_is_valid(t) && t.state() == first.get_state()) { Chris@16: // any of the token definitions matched Chris@16: spirit::traits::assign_to(t, attr); Chris@16: ++first; Chris@16: return true; Chris@16: } Chris@16: } Chris@16: return false; Chris@16: } Chris@16: Chris@16: // Qi interface: 'what' functionality Chris@16: template Chris@16: info what(Context& /*context*/) const Chris@16: { Chris@16: return info("lexer"); Chris@16: } Chris@16: Chris@16: private: Chris@16: // allow to use the lexer.self.add("regex1", id1)("regex2", id2); Chris@16: // syntax Chris@16: struct adder Chris@16: { Chris@16: adder(lexer_def_& def_) Chris@16: : def(def_) {} Chris@16: Chris@16: // Add a token definition based on a single character as given Chris@16: // by the first parameter, the second parameter allows to Chris@16: // specify the token id to use for the new token. If no token Chris@16: // id is given the character code is used. Chris@16: adder const& operator()(char_type c Chris@16: , id_type token_id = id_type()) const Chris@16: { Chris@16: if (id_type() == token_id) Chris@16: token_id = static_cast(c); Chris@16: def.def.add_token (def.state.c_str(), c, token_id Chris@16: , def.targetstate.empty() ? 0 : def.targetstate.c_str()); Chris@16: return *this; Chris@16: } Chris@16: Chris@16: // Add a token definition based on a character sequence as Chris@16: // given by the first parameter, the second parameter allows to Chris@16: // specify the token id to use for the new token. If no token Chris@16: // id is given this function will generate a unique id to be Chris@16: // used as the token's id. Chris@16: adder const& operator()(string_type const& s Chris@16: , id_type token_id = id_type()) const Chris@16: { Chris@16: if (id_type() == token_id) Chris@16: token_id = def.def.get_next_id(); Chris@16: def.def.add_token (def.state.c_str(), s, token_id Chris@16: , def.targetstate.empty() ? 0 : def.targetstate.c_str()); Chris@16: return *this; Chris@16: } Chris@16: Chris@16: template Chris@16: adder const& operator()( Chris@16: token_def& tokdef Chris@16: , id_type token_id = id_type()) const Chris@16: { Chris@16: // make sure we have a token id Chris@16: if (id_type() == token_id) { Chris@16: if (id_type() == tokdef.id()) { Chris@16: token_id = def.def.get_next_id(); Chris@16: tokdef.id(token_id); Chris@16: } Chris@16: else { Chris@16: token_id = tokdef.id(); Chris@16: } Chris@16: } Chris@16: else { Chris@16: // the following assertion makes sure that the token_def Chris@16: // instance has not been assigned a different id earlier Chris@16: BOOST_ASSERT(id_type() == tokdef.id() Chris@16: || token_id == tokdef.id()); Chris@16: tokdef.id(token_id); Chris@16: } Chris@16: Chris@16: def.define(tokdef); Chris@16: return *this; Chris@16: } Chris@16: Chris@16: // template Chris@16: // adder const& operator()(char_type c, id_type token_id, F act) const Chris@16: // { Chris@16: // if (id_type() == token_id) Chris@16: // token_id = def.def.get_next_id(); Chris@16: // std::size_t unique_id = Chris@16: // def.def.add_token (def.state.c_str(), s, token_id); Chris@16: // def.def.add_action(unique_id, def.state.c_str(), act); Chris@16: // return *this; Chris@16: // } Chris@16: Chris@16: lexer_def_& def; Chris@16: Chris@16: private: Chris@16: // silence MSVC warning C4512: assignment operator could not be generated Chris@16: adder& operator= (adder const&); Chris@16: }; Chris@16: friend struct adder; Chris@16: Chris@16: // allow to use lexer.self.add_pattern("pattern1", "regex1")(...); Chris@16: // syntax Chris@16: struct pattern_adder Chris@16: { Chris@16: pattern_adder(lexer_def_& def_) Chris@16: : def(def_) {} Chris@16: Chris@16: pattern_adder const& operator()(string_type const& p Chris@16: , string_type const& s) const Chris@16: { Chris@16: def.def.add_pattern (def.state.c_str(), p, s); Chris@16: return *this; Chris@16: } Chris@16: Chris@16: lexer_def_& def; Chris@16: Chris@16: private: Chris@16: // silence MSVC warning C4512: assignment operator could not be generated Chris@16: pattern_adder& operator= (pattern_adder const&); Chris@16: }; Chris@16: friend struct pattern_adder; Chris@16: Chris@16: private: Chris@16: // Helper function to invoke the necessary 2 step compilation Chris@16: // process on token definition expressions Chris@16: template Chris@16: void compile2pass(TokenExpr const& expr) Chris@16: { Chris@16: expr.collect(def, state, targetstate); Chris@16: expr.add_actions(def); Chris@16: } Chris@16: Chris@16: public: Chris@16: /////////////////////////////////////////////////////////////////// Chris@16: template Chris@16: void define(Expr const& expr) Chris@16: { Chris@16: compile2pass(compile(expr)); Chris@16: } Chris@16: Chris@16: lexer_def_(LexerDef& def_, string_type const& state_ Chris@16: , string_type const& targetstate_ = string_type()) Chris@16: : proto_base_type(terminal_type::make(alias())) Chris@16: , add(this_()), add_pattern(this_()), def(def_) Chris@16: , state(state_), targetstate(targetstate_) Chris@16: {} Chris@16: Chris@16: // allow to switch states Chris@16: lexer_def_ operator()(char_type const* state) const Chris@16: { Chris@16: return lexer_def_(def, state); Chris@16: } Chris@16: lexer_def_ operator()(char_type const* state Chris@16: , char_type const* targetstate) const Chris@16: { Chris@16: return lexer_def_(def, state, targetstate); Chris@16: } Chris@16: lexer_def_ operator()(string_type const& state Chris@16: , string_type const& targetstate = string_type()) const Chris@16: { Chris@16: return lexer_def_(def, state, targetstate); Chris@16: } Chris@16: Chris@16: // allow to assign a token definition expression Chris@16: template Chris@16: lexer_def_& operator= (Expr const& xpr) Chris@16: { Chris@16: // Report invalid expression error as early as possible. Chris@16: // If you got an error_invalid_expression error message here, Chris@16: // then the expression (expr) is not a valid spirit lex Chris@16: // expression. Chris@16: BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); Chris@16: Chris@16: def.clear(state.c_str()); Chris@16: define(xpr); Chris@16: return *this; Chris@16: } Chris@16: Chris@16: // explicitly tell the lexer that the given state will be defined Chris@16: // (useful in conjunction with "*") Chris@16: std::size_t add_state(char_type const* state = 0) Chris@16: { Chris@16: return def.add_state(state ? state : def.initial_state().c_str()); Chris@16: } Chris@16: Chris@16: adder add; Chris@16: pattern_adder add_pattern; Chris@16: Chris@16: private: Chris@16: LexerDef& def; Chris@16: string_type state; Chris@16: string_type targetstate; Chris@16: Chris@16: private: Chris@16: // silence MSVC warning C4512: assignment operator could not be generated Chris@16: lexer_def_& operator= (lexer_def_ const&); Chris@16: }; Chris@16: Chris@16: #if defined(BOOST_NO_CXX11_RVALUE_REFERENCES) Chris@16: // allow to assign a token definition expression Chris@16: template Chris@16: inline lexer_def_& Chris@16: operator+= (lexer_def_& lexdef, Expr& xpr) Chris@16: { Chris@16: // Report invalid expression error as early as possible. Chris@16: // If you got an error_invalid_expression error message here, Chris@16: // then the expression (expr) is not a valid spirit lex Chris@16: // expression. Chris@16: BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); Chris@16: Chris@16: lexdef.define(xpr); Chris@16: return lexdef; Chris@16: } Chris@16: #else Chris@16: // allow to assign a token definition expression Chris@16: template Chris@16: inline lexer_def_& Chris@16: operator+= (lexer_def_& lexdef, Expr&& xpr) Chris@16: { Chris@16: // Report invalid expression error as early as possible. Chris@16: // If you got an error_invalid_expression error message here, Chris@16: // then the expression (expr) is not a valid spirit lex Chris@16: // expression. Chris@16: BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); Chris@16: Chris@16: lexdef.define(xpr); Chris@16: return lexdef; Chris@16: } Chris@16: #endif Chris@16: Chris@16: template Chris@16: inline lexer_def_& Chris@16: operator+= (lexer_def_& lexdef, Expr const& xpr) Chris@16: { Chris@16: // Report invalid expression error as early as possible. Chris@16: // If you got an error_invalid_expression error message here, Chris@16: // then the expression (expr) is not a valid spirit lex Chris@16: // expression. Chris@16: BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); Chris@16: Chris@16: lexdef.define(xpr); Chris@16: return lexdef; Chris@16: } Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // The match_flags flags are used to influence different matching Chris@16: // modes of the lexer Chris@16: struct match_flags Chris@16: { Chris@16: enum enum_type Chris@16: { Chris@16: match_default = 0, // no flags Chris@16: match_not_dot_newline = 1, // the regex '.' doesn't match newlines Chris@16: match_icase = 2 // all matching operations are case insensitive Chris@16: }; Chris@16: }; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // This represents a lexer object Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // This is the first token id automatically assigned by the library Chris@16: // if needed Chris@16: enum tokenids Chris@16: { Chris@16: min_token_id = 0x10000 Chris@16: }; Chris@16: Chris@16: template Chris@16: class lexer : public Lexer Chris@16: { Chris@16: private: Chris@16: // avoid warnings about using 'this' in constructor Chris@16: lexer& this_() { return *this; } Chris@16: Chris@16: std::size_t next_token_id; // has to be an integral type Chris@16: Chris@16: public: Chris@16: typedef Lexer lexer_type; Chris@16: typedef typename Lexer::id_type id_type; Chris@16: typedef typename Lexer::char_type char_type; Chris@16: typedef typename Lexer::iterator_type iterator_type; Chris@16: typedef lexer base_type; Chris@16: Chris@16: typedef detail::lexer_def_ lexer_def; Chris@16: typedef std::basic_string string_type; Chris@16: Chris@16: lexer(unsigned int flags = match_flags::match_default Chris@16: , id_type first_id = id_type(min_token_id)) Chris@16: : lexer_type(flags) Chris@16: , next_token_id(first_id) Chris@16: , self(this_(), lexer_type::initial_state()) Chris@16: {} Chris@16: Chris@16: // access iterator interface Chris@16: template Chris@16: iterator_type begin(Iterator& first, Iterator const& last Chris@16: , char_type const* initial_state = 0) const Chris@16: { return this->lexer_type::begin(first, last, initial_state); } Chris@16: iterator_type end() const Chris@16: { return this->lexer_type::end(); } Chris@16: Chris@16: std::size_t map_state(char_type const* state) Chris@16: { return this->lexer_type::add_state(state); } Chris@16: Chris@16: // create a unique token id Chris@16: id_type get_next_id() { return id_type(next_token_id++); } Chris@16: Chris@16: lexer_def self; // allow for easy token definition Chris@16: }; Chris@16: Chris@16: }}} Chris@16: Chris@16: #endif