Chris@16: // Copyright (c) 2001-2011 Hartmut Kaiser Chris@16: // Chris@16: // Distributed under the Boost Software License, Version 1.0. (See accompanying Chris@16: // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Chris@16: Chris@16: #if !defined(BOOST_SPIRIT_LEX_TOKEN_FEB_10_2008_0751PM) Chris@16: #define BOOST_SPIRIT_LEX_TOKEN_FEB_10_2008_0751PM Chris@16: Chris@16: #if defined(_MSC_VER) Chris@16: #pragma once Chris@16: #endif Chris@16: Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: Chris@16: #if defined(BOOST_SPIRIT_DEBUG) Chris@16: #include Chris@16: #endif Chris@16: Chris@16: namespace boost { namespace spirit { namespace lex { namespace lexertl Chris@16: { Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // Chris@16: // The token is the type of the objects returned by default by the Chris@16: // iterator. Chris@16: // Chris@16: // template parameters: Chris@16: // Iterator The type of the iterator used to access the Chris@16: // underlying character stream. Chris@16: // AttributeTypes A mpl sequence containing the types of all Chris@16: // required different token values to be supported Chris@16: // by this token type. Chris@16: // HasState A mpl::bool_ indicating, whether this token type Chris@16: // should support lexer states. Chris@16: // Idtype The type to use for the token id (defaults to Chris@16: // std::size_t). Chris@16: // Chris@16: // It is possible to use other token types with the spirit::lex Chris@16: // framework as well. If you plan to use a different type as your token Chris@16: // type, you'll need to expose the following things from your token type Chris@16: // to make it compatible with spirit::lex: Chris@16: // Chris@16: // typedefs Chris@16: // iterator_type The type of the iterator used to access the Chris@16: // underlying character stream. Chris@16: // Chris@16: // id_type The type of the token id used. Chris@16: // Chris@16: // methods Chris@16: // default constructor Chris@16: // This should initialize the token as an end of Chris@16: // input token. Chris@16: // constructors The prototype of the other required Chris@16: // constructors should be: Chris@16: // Chris@16: // token(int) Chris@16: // This constructor should initialize the token as Chris@16: // an invalid token (not carrying any specific Chris@16: // values) Chris@16: // Chris@16: // where: the int is used as a tag only and its value is Chris@16: // ignored Chris@16: // Chris@16: // and: Chris@16: // Chris@16: // token(Idtype id, std::size_t state, Chris@16: // iterator_type first, iterator_type last); Chris@16: // Chris@16: // where: id: token id Chris@16: // state: lexer state this token was matched in Chris@16: // first, last: pair of iterators marking the matched Chris@16: // range in the underlying input stream Chris@16: // Chris@16: // accessors Chris@16: // id() return the token id of the matched input sequence Chris@16: // id(newid) set the token id of the token instance Chris@16: // Chris@16: // state() return the lexer state this token was matched in Chris@16: // Chris@16: // value() return the token value Chris@16: // Chris@16: // Additionally, you will have to implement a couple of helper functions Chris@16: // in the same namespace as the token type: a comparison operator==() to Chris@16: // compare your token instances, a token_is_valid() function and different Chris@16: // specializations of the Spirit customization point Chris@16: // assign_to_attribute_from_value as shown below. Chris@16: // Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: template Chris@16: , typename HasState = mpl::true_ Chris@16: , typename Idtype = std::size_t> Chris@16: struct token; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // This specialization of the token type doesn't contain any item data and Chris@16: // doesn't support working with lexer states. Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: template Chris@16: struct token Chris@16: { Chris@16: typedef Iterator iterator_type; Chris@16: typedef mpl::false_ has_state; Chris@16: typedef Idtype id_type; Chris@16: typedef unused_type token_value_type; Chris@16: Chris@16: // default constructed tokens correspond to EOI tokens Chris@16: token() : id_(id_type(boost::lexer::npos)) {} Chris@16: Chris@16: // construct an invalid token Chris@16: explicit token(int) : id_(id_type(0)) {} Chris@16: Chris@16: token(id_type id, std::size_t) : id_(id) {} Chris@16: Chris@16: token(id_type id, std::size_t, token_value_type) Chris@16: : id_(id) {} Chris@16: Chris@16: token_value_type& value() { static token_value_type u; return u; } Chris@16: token_value_type const& value() const { return unused; } Chris@16: Chris@16: #if defined(BOOST_SPIRIT_DEBUG) Chris@16: token(id_type id, std::size_t, Iterator const& first Chris@16: , Iterator const& last) Chris@16: : matched_(first, last) Chris@16: , id_(id) {} Chris@16: #else Chris@16: token(id_type id, std::size_t, Iterator const&, Iterator const&) Chris@16: : id_(id) {} Chris@16: #endif Chris@16: Chris@16: // this default conversion operator is needed to allow the direct Chris@16: // usage of tokens in conjunction with the primitive parsers defined Chris@16: // in Qi Chris@16: operator id_type() const { return id_; } Chris@16: Chris@16: // Retrieve or set the token id of this token instance. Chris@16: id_type id() const { return id_; } Chris@16: void id(id_type newid) { id_ = newid; } Chris@16: Chris@16: std::size_t state() const { return 0; } // always '0' (INITIAL state) Chris@16: Chris@16: bool is_valid() const Chris@16: { Chris@16: return 0 != id_ && id_type(boost::lexer::npos) != id_; Chris@16: } Chris@16: Chris@16: #if defined(BOOST_SPIRIT_DEBUG) Chris@16: #if BOOST_WORKAROUND(BOOST_MSVC, == 1600) Chris@16: // workaround for MSVC10 which has problems copying a default Chris@16: // constructed iterator_range Chris@16: token& operator= (token const& rhs) Chris@16: { Chris@16: if (this != &rhs) Chris@16: { Chris@16: id_ = rhs.id_; Chris@16: if (is_valid()) Chris@16: matched_ = rhs.matched_; Chris@16: } Chris@16: return *this; Chris@16: } Chris@16: #endif Chris@16: std::pair matched_; Chris@16: #endif Chris@16: Chris@16: protected: Chris@16: id_type id_; // token id, 0 if nothing has been matched Chris@16: }; Chris@16: Chris@16: #if defined(BOOST_SPIRIT_DEBUG) Chris@16: template Chris@16: inline std::basic_ostream& Chris@16: operator<< (std::basic_ostream& os Chris@16: , token const& t) Chris@16: { Chris@16: if (t.is_valid()) { Chris@16: Iterator end = t.matched_.second; Chris@16: for (Iterator it = t.matched_.first; it != end; ++it) Chris@16: os << *it; Chris@16: } Chris@16: else { Chris@16: os << ""; Chris@16: } Chris@16: return os; Chris@16: } Chris@16: #endif Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // This specialization of the token type doesn't contain any item data but Chris@16: // supports working with lexer states. Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: template Chris@16: struct token Chris@16: : token Chris@16: { Chris@16: private: Chris@16: typedef token base_type; Chris@16: Chris@16: public: Chris@16: typedef typename base_type::id_type id_type; Chris@16: typedef Iterator iterator_type; Chris@16: typedef mpl::true_ has_state; Chris@16: typedef unused_type token_value_type; Chris@16: Chris@16: // default constructed tokens correspond to EOI tokens Chris@16: token() : state_(boost::lexer::npos) {} Chris@16: Chris@16: // construct an invalid token Chris@16: explicit token(int) : base_type(0), state_(boost::lexer::npos) {} Chris@16: Chris@16: token(id_type id, std::size_t state) Chris@16: : base_type(id, boost::lexer::npos), state_(state) {} Chris@16: Chris@16: token(id_type id, std::size_t state, token_value_type) Chris@16: : base_type(id, boost::lexer::npos, unused) Chris@16: , state_(state) {} Chris@16: Chris@16: token(id_type id, std::size_t state Chris@16: , Iterator const& first, Iterator const& last) Chris@16: : base_type(id, boost::lexer::npos, first, last) Chris@16: , state_(state) {} Chris@16: Chris@16: std::size_t state() const { return state_; } Chris@16: Chris@16: #if defined(BOOST_SPIRIT_DEBUG) && BOOST_WORKAROUND(BOOST_MSVC, == 1600) Chris@16: // workaround for MSVC10 which has problems copying a default Chris@16: // constructed iterator_range Chris@16: token& operator= (token const& rhs) Chris@16: { Chris@16: if (this != &rhs) Chris@16: { Chris@16: this->base_type::operator=(static_cast(rhs)); Chris@16: state_ = rhs.state_; Chris@16: } Chris@16: return *this; Chris@16: } Chris@16: #endif Chris@16: Chris@16: protected: Chris@16: std::size_t state_; // lexer state this token was matched in Chris@16: }; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // The generic version of the token type derives from the Chris@16: // specialization above and adds a single data member holding the item Chris@16: // data carried by the token instance. Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: namespace detail Chris@16: { Chris@16: /////////////////////////////////////////////////////////////////////// Chris@16: // Meta-function to calculate the type of the variant data item to be Chris@16: // stored with each token instance. Chris@16: // Chris@16: // Note: The iterator pair needs to be the first type in the list of Chris@16: // types supported by the generated variant type (this is being Chris@16: // used to identify whether the stored data item in a particular Chris@16: // token instance needs to be converted from the pair of Chris@16: // iterators (see the first of the assign_to_attribute_from_value Chris@16: // specializations below). Chris@16: /////////////////////////////////////////////////////////////////////// Chris@16: template Chris@16: struct token_value_typesequence Chris@16: { Chris@16: typedef typename mpl::insert< Chris@16: AttributeTypes Chris@16: , typename mpl::begin::type Chris@16: , IteratorPair Chris@16: >::type sequence_type; Chris@16: typedef typename make_variant_over::type type; Chris@16: }; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////// Chris@16: // The type of the data item stored with a token instance is defined Chris@16: // by the template parameter 'AttributeTypes' and may be: Chris@16: // Chris@16: // lex::omit: no data item is stored with the token Chris@16: // instance (this is handled by the Chris@16: // specializations of the token class Chris@16: // below) Chris@16: // mpl::vector0<>: each token instance stores a pair of Chris@16: // iterators pointing to the matched input Chris@16: // sequence Chris@16: // mpl::vector<...>: each token instance stores a variant being Chris@16: // able to store the pair of iterators pointing Chris@16: // to the matched input sequence, or any of the Chris@16: // types a specified in the mpl::vector<> Chris@16: // Chris@16: // All this is done to ensure the token type is as small (in terms Chris@16: // of its byte-size) as possible. Chris@16: /////////////////////////////////////////////////////////////////////// Chris@16: template Chris@16: struct token_value_type Chris@16: : mpl::eval_if< Chris@16: mpl::or_< Chris@16: is_same > Chris@16: , is_same > > Chris@16: , mpl::identity Chris@16: , token_value_typesequence > Chris@16: {}; Chris@16: } Chris@16: Chris@16: template Chris@16: struct token : token Chris@16: { Chris@16: private: // precondition assertions Chris@16: BOOST_STATIC_ASSERT((mpl::is_sequence::value || Chris@16: is_same::value)); Chris@16: typedef token base_type; Chris@16: Chris@16: protected: Chris@101: // If no additional token value types are given, the token will Chris@16: // hold the plain pair of iterators pointing to the matched range Chris@16: // in the underlying input sequence. Otherwise the token value is Chris@16: // stored as a variant and will again hold the pair of iterators but Chris@16: // is able to hold any of the given data types as well. The conversion Chris@16: // from the iterator pair to the required data type is done when it is Chris@16: // accessed for the first time. Chris@16: typedef iterator_range iterpair_type; Chris@16: Chris@16: public: Chris@16: typedef typename base_type::id_type id_type; Chris@16: typedef typename detail::token_value_type< Chris@16: iterpair_type, AttributeTypes Chris@16: >::type token_value_type; Chris@16: Chris@16: typedef Iterator iterator_type; Chris@16: Chris@16: // default constructed tokens correspond to EOI tokens Chris@16: token() : value_(iterpair_type(iterator_type(), iterator_type())) {} Chris@16: Chris@16: // construct an invalid token Chris@16: explicit token(int) Chris@16: : base_type(0) Chris@16: , value_(iterpair_type(iterator_type(), iterator_type())) {} Chris@16: Chris@16: token(id_type id, std::size_t state, token_value_type const& value) Chris@16: : base_type(id, state, value) Chris@16: , value_(value) {} Chris@16: Chris@16: token(id_type id, std::size_t state, Iterator const& first Chris@16: , Iterator const& last) Chris@16: : base_type(id, state, first, last) Chris@16: , value_(iterpair_type(first, last)) {} Chris@16: Chris@16: token_value_type& value() { return value_; } Chris@16: token_value_type const& value() const { return value_; } Chris@16: Chris@16: #if BOOST_WORKAROUND(BOOST_MSVC, == 1600) Chris@16: // workaround for MSVC10 which has problems copying a default Chris@16: // constructed iterator_range Chris@16: token& operator= (token const& rhs) Chris@16: { Chris@16: if (this != &rhs) Chris@16: { Chris@16: this->base_type::operator=(static_cast(rhs)); Chris@16: if (this->is_valid()) Chris@16: value_ = rhs.value_; Chris@16: } Chris@16: return *this; Chris@16: } Chris@16: #endif Chris@16: Chris@16: protected: Chris@16: token_value_type value_; // token value, by default a pair of iterators Chris@16: }; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // tokens are considered equal, if their id's match (these are unique) Chris@16: template Chris@16: inline bool Chris@16: operator== (token const& lhs, Chris@16: token const& rhs) Chris@16: { Chris@16: return lhs.id() == rhs.id(); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // This overload is needed by the multi_pass/functor_input_policy to Chris@16: // validate a token instance. It has to be defined in the same namespace Chris@16: // as the token class itself to allow ADL to find it. Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: template Chris@16: inline bool Chris@16: token_is_valid(token const& t) Chris@16: { Chris@16: return t.is_valid(); Chris@16: } Chris@16: }}}} Chris@16: Chris@16: namespace boost { namespace spirit { namespace traits Chris@16: { Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // We have to provide specializations for the customization point Chris@16: // assign_to_attribute_from_value allowing to extract the needed value Chris@16: // from the token. Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: Chris@16: // This is called from the parse function of token_def if the token_def Chris@16: // has been defined to carry a special attribute type Chris@16: template Chris@16: struct assign_to_attribute_from_value > Chris@16: { Chris@16: static void Chris@16: call(lex::lexertl::token const& t Chris@16: , Attribute& attr) Chris@16: { Chris@16: // The goal of this function is to avoid the conversion of the pair of Chris@16: // iterators (to the matched character sequence) into the token value Chris@16: // of the required type being done more than once. For this purpose it Chris@16: // checks whether the stored value type is still the default one (pair Chris@16: // of iterators) and if yes, replaces the pair of iterators with the Chris@16: // converted value to be returned from subsequent calls. Chris@16: Chris@16: if (0 == t.value().which()) { Chris@16: // first access to the token value Chris@16: typedef iterator_range iterpair_type; Chris@16: iterpair_type const& ip = boost::get(t.value()); Chris@16: Chris@16: // Interestingly enough we use the assign_to() framework defined in Chris@16: // Spirit.Qi allowing to convert the pair of iterators to almost any Chris@16: // required type (assign_to(), if available, uses the standard Spirit Chris@16: // parsers to do the conversion). Chris@16: spirit::traits::assign_to(ip.begin(), ip.end(), attr); Chris@16: Chris@16: // If you get an error during the compilation of the following Chris@16: // assignment expression, you probably forgot to list one or more Chris@16: // types used as token value types (in your token_def<...> Chris@16: // definitions) in your definition of the token class. I.e. any token Chris@16: // value type used for a token_def<...> definition has to be listed Chris@16: // during the declaration of the token type to use. For instance let's Chris@16: // assume we have two token_def's: Chris@16: // Chris@16: // token_def number; number = "..."; Chris@16: // token_def identifier; identifier = "..."; Chris@16: // Chris@16: // Then you'll have to use the following token type definition Chris@16: // (assuming you are using the token class): Chris@16: // Chris@16: // typedef mpl::vector token_values; Chris@16: // typedef token token_type; Chris@16: // Chris@16: // where: base_iter_type is the iterator type used to expose the Chris@16: // underlying input stream. Chris@16: // Chris@16: // This token_type has to be used as the second template parameter Chris@16: // to the lexer class: Chris@16: // Chris@16: // typedef lexer lexer_type; Chris@16: // Chris@16: // again, assuming you're using the lexer<> template for your Chris@16: // tokenization. Chris@16: Chris@16: typedef lex::lexertl::token< Chris@16: Iterator, AttributeTypes, HasState, Idtype> token_type; Chris@16: spirit::traits::assign_to( Chris@16: attr, const_cast(t).value()); // re-assign value Chris@16: } Chris@16: else { Chris@16: // reuse the already assigned value Chris@16: spirit::traits::assign_to(boost::get(t.value()), attr); Chris@16: } Chris@16: } Chris@16: }; Chris@16: Chris@16: template Chris@16: struct assign_to_container_from_value > Chris@16: : assign_to_attribute_from_value > Chris@16: {}; Chris@16: Chris@16: template Chris@16: struct assign_to_container_from_value > Chris@16: : assign_to_attribute_from_value > Chris@16: {}; Chris@16: Chris@16: template Chris@16: struct assign_to_container_from_value< Chris@16: iterator_range, iterator_range > Chris@16: { Chris@16: static void Chris@16: call(iterator_range const& val, iterator_range& attr) Chris@16: { Chris@16: attr = val; Chris@16: } Chris@16: }; Chris@16: Chris@16: // These are called from the parse function of token_def if the token type Chris@16: // has no special attribute type assigned Chris@16: template Chris@16: struct assign_to_attribute_from_value, HasState, Idtype> > Chris@16: { Chris@16: static void Chris@16: call(lex::lexertl::token, HasState, Idtype> const& t Chris@16: , Attribute& attr) Chris@16: { Chris@16: // The default type returned by the token_def parser component (if Chris@16: // it has no token value type assigned) is the pair of iterators Chris@16: // to the matched character sequence. Chris@16: spirit::traits::assign_to(t.value().begin(), t.value().end(), attr); Chris@16: } Chris@16: }; Chris@16: Chris@16: // template Chris@16: // struct assign_to_container_from_value, HasState, Idtype> > Chris@16: // : assign_to_attribute_from_value, HasState, Idtype> > Chris@16: // {}; Chris@16: Chris@16: // same as above but using mpl::vector<> instead of mpl::vector0<> Chris@16: template Chris@16: struct assign_to_attribute_from_value, HasState, Idtype> > Chris@16: { Chris@16: static void Chris@16: call(lex::lexertl::token, HasState, Idtype> const& t Chris@16: , Attribute& attr) Chris@16: { Chris@16: // The default type returned by the token_def parser component (if Chris@16: // it has no token value type assigned) is the pair of iterators Chris@16: // to the matched character sequence. Chris@16: spirit::traits::assign_to(t.value().begin(), t.value().end(), attr); Chris@16: } Chris@16: }; Chris@16: Chris@16: // template Chris@16: // struct assign_to_container_from_value, HasState, Idtype> > Chris@16: // : assign_to_attribute_from_value, HasState, Idtype> > Chris@16: // {}; Chris@16: Chris@16: // This is called from the parse function of token_def if the token type Chris@16: // has been explicitly omitted (i.e. no attribute value is used), which Chris@16: // essentially means that every attribute gets initialized using default Chris@16: // constructed values. Chris@16: template Chris@16: struct assign_to_attribute_from_value > Chris@16: { Chris@16: static void Chris@16: call(lex::lexertl::token const& t Chris@16: , Attribute& attr) Chris@16: { Chris@16: // do nothing Chris@16: } Chris@16: }; Chris@16: Chris@16: template Chris@16: struct assign_to_container_from_value > Chris@16: : assign_to_attribute_from_value > Chris@16: {}; Chris@16: Chris@16: // This is called from the parse function of lexer_def_ Chris@16: template Chris@16: struct assign_to_attribute_from_value< Chris@16: fusion::vector2 > Chris@16: , lex::lexertl::token > Chris@16: { Chris@16: static void Chris@16: call(lex::lexertl::token const& t Chris@16: , fusion::vector2 >& attr) Chris@16: { Chris@16: // The type returned by the lexer_def_ parser components is a Chris@16: // fusion::vector containing the token id of the matched token Chris@16: // and the pair of iterators to the matched character sequence. Chris@16: typedef iterator_range iterpair_type; Chris@16: typedef fusion::vector2 > Chris@16: attribute_type; Chris@16: Chris@16: iterpair_type const& ip = boost::get(t.value()); Chris@16: attr = attribute_type(t.id(), ip); Chris@16: } Chris@16: }; Chris@16: Chris@16: template Chris@16: struct assign_to_container_from_value< Chris@16: fusion::vector2 > Chris@16: , lex::lexertl::token > Chris@16: : assign_to_attribute_from_value< Chris@16: fusion::vector2 > Chris@16: , lex::lexertl::token > Chris@16: {}; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: // Overload debug output for a single token, this integrates lexer tokens Chris@16: // with Qi's simple_trace debug facilities Chris@16: template Chris@16: struct token_printer_debug< Chris@16: lex::lexertl::token > Chris@16: { Chris@16: typedef lex::lexertl::token token_type; Chris@16: Chris@16: template Chris@16: static void print(Out& out, token_type const& val) Chris@16: { Chris@16: out << '['; Chris@16: spirit::traits::print_token(out, val.value()); Chris@16: out << ']'; Chris@16: } Chris@16: }; Chris@16: }}} Chris@16: Chris@16: #endif