annotate DEPENDENCIES/generic/include/boost/spirit/home/lex/lexer/lexer.hpp @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)
author Chris Cannam
date Tue, 30 Jul 2019 12:25:44 +0100
parents 2665513ce2d3
children
rev   line source
Chris@16 1 // Copyright (c) 2001-2011 Hartmut Kaiser
Chris@16 2 //
Chris@16 3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
Chris@16 4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16 5
Chris@16 6 #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM)
Chris@16 7 #define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM
Chris@16 8
Chris@16 9 #if defined(_MSC_VER)
Chris@16 10 #pragma once
Chris@16 11 #endif
Chris@16 12
Chris@16 13 #include <boost/spirit/home/support/info.hpp>
Chris@16 14 #include <boost/spirit/home/qi/skip_over.hpp>
Chris@16 15 #include <boost/spirit/home/qi/parser.hpp>
Chris@16 16 #include <boost/spirit/home/qi/detail/assign_to.hpp>
Chris@16 17 #include <boost/spirit/home/lex/reference.hpp>
Chris@16 18 #include <boost/spirit/home/lex/meta_compiler.hpp>
Chris@16 19 #include <boost/spirit/home/lex/lexer_type.hpp>
Chris@16 20 #include <boost/spirit/home/lex/lexer/token_def.hpp>
Chris@16 21 #include <boost/assert.hpp>
Chris@16 22 #include <boost/noncopyable.hpp>
Chris@16 23 #include <boost/detail/iterator.hpp>
Chris@16 24 #include <boost/fusion/include/vector.hpp>
Chris@16 25 #include <boost/mpl/assert.hpp>
Chris@16 26 #include <boost/range/iterator_range.hpp>
Chris@16 27 #include <string>
Chris@16 28
Chris@16 29 namespace boost { namespace spirit { namespace lex
Chris@16 30 {
Chris@16 31 ///////////////////////////////////////////////////////////////////////////
Chris@16 32 namespace detail
Chris@16 33 {
Chris@16 34 ///////////////////////////////////////////////////////////////////////
Chris@16 35 template <typename LexerDef>
Chris@16 36 struct lexer_def_
Chris@16 37 : proto::extends<
Chris@16 38 typename proto::terminal<
Chris@16 39 lex::reference<lexer_def_<LexerDef> const>
Chris@16 40 >::type
Chris@16 41 , lexer_def_<LexerDef> >
Chris@16 42 , qi::parser<lexer_def_<LexerDef> >
Chris@16 43 , lex::lexer_type<lexer_def_<LexerDef> >
Chris@16 44 {
Chris@16 45 private:
Chris@16 46 // avoid warnings about using 'this' in constructor
Chris@16 47 lexer_def_& this_() { return *this; }
Chris@16 48
Chris@16 49 typedef typename LexerDef::char_type char_type;
Chris@16 50 typedef typename LexerDef::string_type string_type;
Chris@16 51 typedef typename LexerDef::id_type id_type;
Chris@16 52
Chris@16 53 typedef lex::reference<lexer_def_ const> reference_;
Chris@16 54 typedef typename proto::terminal<reference_>::type terminal_type;
Chris@16 55 typedef proto::extends<terminal_type, lexer_def_> proto_base_type;
Chris@16 56
Chris@16 57 reference_ alias() const
Chris@16 58 {
Chris@16 59 return reference_(*this);
Chris@16 60 }
Chris@16 61
Chris@16 62 public:
Chris@16 63 // Qi interface: metafunction calculating parser attribute type
Chris@16 64 template <typename Context, typename Iterator>
Chris@16 65 struct attribute
Chris@16 66 {
Chris@16 67 // the return value of a token set contains the matched token
Chris@16 68 // id, and the corresponding pair of iterators
Chris@16 69 typedef typename Iterator::base_iterator_type iterator_type;
Chris@16 70 typedef
Chris@16 71 fusion::vector2<id_type, iterator_range<iterator_type> >
Chris@16 72 type;
Chris@16 73 };
Chris@16 74
Chris@16 75 // Qi interface: parse functionality
Chris@16 76 template <typename Iterator, typename Context
Chris@16 77 , typename Skipper, typename Attribute>
Chris@16 78 bool parse(Iterator& first, Iterator const& last
Chris@16 79 , Context& /*context*/, Skipper const& skipper
Chris@16 80 , Attribute& attr) const
Chris@16 81 {
Chris@16 82 qi::skip_over(first, last, skipper); // always do a pre-skip
Chris@16 83
Chris@16 84 if (first != last) {
Chris@16 85 typedef typename
Chris@16 86 boost::detail::iterator_traits<Iterator>::value_type
Chris@16 87 token_type;
Chris@16 88
Chris@16 89 token_type const& t = *first;
Chris@16 90 if (token_is_valid(t) && t.state() == first.get_state()) {
Chris@16 91 // any of the token definitions matched
Chris@16 92 spirit::traits::assign_to(t, attr);
Chris@16 93 ++first;
Chris@16 94 return true;
Chris@16 95 }
Chris@16 96 }
Chris@16 97 return false;
Chris@16 98 }
Chris@16 99
Chris@16 100 // Qi interface: 'what' functionality
Chris@16 101 template <typename Context>
Chris@16 102 info what(Context& /*context*/) const
Chris@16 103 {
Chris@16 104 return info("lexer");
Chris@16 105 }
Chris@16 106
Chris@16 107 private:
Chris@16 108 // allow to use the lexer.self.add("regex1", id1)("regex2", id2);
Chris@16 109 // syntax
Chris@16 110 struct adder
Chris@16 111 {
Chris@16 112 adder(lexer_def_& def_)
Chris@16 113 : def(def_) {}
Chris@16 114
Chris@16 115 // Add a token definition based on a single character as given
Chris@16 116 // by the first parameter, the second parameter allows to
Chris@16 117 // specify the token id to use for the new token. If no token
Chris@16 118 // id is given the character code is used.
Chris@16 119 adder const& operator()(char_type c
Chris@16 120 , id_type token_id = id_type()) const
Chris@16 121 {
Chris@16 122 if (id_type() == token_id)
Chris@16 123 token_id = static_cast<id_type>(c);
Chris@16 124 def.def.add_token (def.state.c_str(), c, token_id
Chris@16 125 , def.targetstate.empty() ? 0 : def.targetstate.c_str());
Chris@16 126 return *this;
Chris@16 127 }
Chris@16 128
Chris@16 129 // Add a token definition based on a character sequence as
Chris@16 130 // given by the first parameter, the second parameter allows to
Chris@16 131 // specify the token id to use for the new token. If no token
Chris@16 132 // id is given this function will generate a unique id to be
Chris@16 133 // used as the token's id.
Chris@16 134 adder const& operator()(string_type const& s
Chris@16 135 , id_type token_id = id_type()) const
Chris@16 136 {
Chris@16 137 if (id_type() == token_id)
Chris@16 138 token_id = def.def.get_next_id();
Chris@16 139 def.def.add_token (def.state.c_str(), s, token_id
Chris@16 140 , def.targetstate.empty() ? 0 : def.targetstate.c_str());
Chris@16 141 return *this;
Chris@16 142 }
Chris@16 143
Chris@16 144 template <typename Attribute>
Chris@16 145 adder const& operator()(
Chris@16 146 token_def<Attribute, char_type, id_type>& tokdef
Chris@16 147 , id_type token_id = id_type()) const
Chris@16 148 {
Chris@16 149 // make sure we have a token id
Chris@16 150 if (id_type() == token_id) {
Chris@16 151 if (id_type() == tokdef.id()) {
Chris@16 152 token_id = def.def.get_next_id();
Chris@16 153 tokdef.id(token_id);
Chris@16 154 }
Chris@16 155 else {
Chris@16 156 token_id = tokdef.id();
Chris@16 157 }
Chris@16 158 }
Chris@16 159 else {
Chris@16 160 // the following assertion makes sure that the token_def
Chris@16 161 // instance has not been assigned a different id earlier
Chris@16 162 BOOST_ASSERT(id_type() == tokdef.id()
Chris@16 163 || token_id == tokdef.id());
Chris@16 164 tokdef.id(token_id);
Chris@16 165 }
Chris@16 166
Chris@16 167 def.define(tokdef);
Chris@16 168 return *this;
Chris@16 169 }
Chris@16 170
Chris@16 171 // template <typename F>
Chris@16 172 // adder const& operator()(char_type c, id_type token_id, F act) const
Chris@16 173 // {
Chris@16 174 // if (id_type() == token_id)
Chris@16 175 // token_id = def.def.get_next_id();
Chris@16 176 // std::size_t unique_id =
Chris@16 177 // def.def.add_token (def.state.c_str(), s, token_id);
Chris@16 178 // def.def.add_action(unique_id, def.state.c_str(), act);
Chris@16 179 // return *this;
Chris@16 180 // }
Chris@16 181
Chris@16 182 lexer_def_& def;
Chris@16 183
Chris@16 184 private:
Chris@16 185 // silence MSVC warning C4512: assignment operator could not be generated
Chris@16 186 adder& operator= (adder const&);
Chris@16 187 };
Chris@16 188 friend struct adder;
Chris@16 189
Chris@16 190 // allow to use lexer.self.add_pattern("pattern1", "regex1")(...);
Chris@16 191 // syntax
Chris@16 192 struct pattern_adder
Chris@16 193 {
Chris@16 194 pattern_adder(lexer_def_& def_)
Chris@16 195 : def(def_) {}
Chris@16 196
Chris@16 197 pattern_adder const& operator()(string_type const& p
Chris@16 198 , string_type const& s) const
Chris@16 199 {
Chris@16 200 def.def.add_pattern (def.state.c_str(), p, s);
Chris@16 201 return *this;
Chris@16 202 }
Chris@16 203
Chris@16 204 lexer_def_& def;
Chris@16 205
Chris@16 206 private:
Chris@16 207 // silence MSVC warning C4512: assignment operator could not be generated
Chris@16 208 pattern_adder& operator= (pattern_adder const&);
Chris@16 209 };
Chris@16 210 friend struct pattern_adder;
Chris@16 211
Chris@16 212 private:
Chris@16 213 // Helper function to invoke the necessary 2 step compilation
Chris@16 214 // process on token definition expressions
Chris@16 215 template <typename TokenExpr>
Chris@16 216 void compile2pass(TokenExpr const& expr)
Chris@16 217 {
Chris@16 218 expr.collect(def, state, targetstate);
Chris@16 219 expr.add_actions(def);
Chris@16 220 }
Chris@16 221
Chris@16 222 public:
Chris@16 223 ///////////////////////////////////////////////////////////////////
Chris@16 224 template <typename Expr>
Chris@16 225 void define(Expr const& expr)
Chris@16 226 {
Chris@16 227 compile2pass(compile<lex::domain>(expr));
Chris@16 228 }
Chris@16 229
Chris@16 230 lexer_def_(LexerDef& def_, string_type const& state_
Chris@16 231 , string_type const& targetstate_ = string_type())
Chris@16 232 : proto_base_type(terminal_type::make(alias()))
Chris@16 233 , add(this_()), add_pattern(this_()), def(def_)
Chris@16 234 , state(state_), targetstate(targetstate_)
Chris@16 235 {}
Chris@16 236
Chris@16 237 // allow to switch states
Chris@16 238 lexer_def_ operator()(char_type const* state) const
Chris@16 239 {
Chris@16 240 return lexer_def_(def, state);
Chris@16 241 }
Chris@16 242 lexer_def_ operator()(char_type const* state
Chris@16 243 , char_type const* targetstate) const
Chris@16 244 {
Chris@16 245 return lexer_def_(def, state, targetstate);
Chris@16 246 }
Chris@16 247 lexer_def_ operator()(string_type const& state
Chris@16 248 , string_type const& targetstate = string_type()) const
Chris@16 249 {
Chris@16 250 return lexer_def_(def, state, targetstate);
Chris@16 251 }
Chris@16 252
Chris@16 253 // allow to assign a token definition expression
Chris@16 254 template <typename Expr>
Chris@16 255 lexer_def_& operator= (Expr const& xpr)
Chris@16 256 {
Chris@16 257 // Report invalid expression error as early as possible.
Chris@16 258 // If you got an error_invalid_expression error message here,
Chris@16 259 // then the expression (expr) is not a valid spirit lex
Chris@16 260 // expression.
Chris@16 261 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
Chris@16 262
Chris@16 263 def.clear(state.c_str());
Chris@16 264 define(xpr);
Chris@16 265 return *this;
Chris@16 266 }
Chris@16 267
Chris@16 268 // explicitly tell the lexer that the given state will be defined
Chris@16 269 // (useful in conjunction with "*")
Chris@16 270 std::size_t add_state(char_type const* state = 0)
Chris@16 271 {
Chris@16 272 return def.add_state(state ? state : def.initial_state().c_str());
Chris@16 273 }
Chris@16 274
Chris@16 275 adder add;
Chris@16 276 pattern_adder add_pattern;
Chris@16 277
Chris@16 278 private:
Chris@16 279 LexerDef& def;
Chris@16 280 string_type state;
Chris@16 281 string_type targetstate;
Chris@16 282
Chris@16 283 private:
Chris@16 284 // silence MSVC warning C4512: assignment operator could not be generated
Chris@16 285 lexer_def_& operator= (lexer_def_ const&);
Chris@16 286 };
Chris@16 287
Chris@16 288 #if defined(BOOST_NO_CXX11_RVALUE_REFERENCES)
Chris@16 289 // allow to assign a token definition expression
Chris@16 290 template <typename LexerDef, typename Expr>
Chris@16 291 inline lexer_def_<LexerDef>&
Chris@16 292 operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr)
Chris@16 293 {
Chris@16 294 // Report invalid expression error as early as possible.
Chris@16 295 // If you got an error_invalid_expression error message here,
Chris@16 296 // then the expression (expr) is not a valid spirit lex
Chris@16 297 // expression.
Chris@16 298 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
Chris@16 299
Chris@16 300 lexdef.define(xpr);
Chris@16 301 return lexdef;
Chris@16 302 }
Chris@16 303 #else
Chris@16 304 // allow to assign a token definition expression
Chris@16 305 template <typename LexerDef, typename Expr>
Chris@16 306 inline lexer_def_<LexerDef>&
Chris@16 307 operator+= (lexer_def_<LexerDef>& lexdef, Expr&& xpr)
Chris@16 308 {
Chris@16 309 // Report invalid expression error as early as possible.
Chris@16 310 // If you got an error_invalid_expression error message here,
Chris@16 311 // then the expression (expr) is not a valid spirit lex
Chris@16 312 // expression.
Chris@16 313 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
Chris@16 314
Chris@16 315 lexdef.define(xpr);
Chris@16 316 return lexdef;
Chris@16 317 }
Chris@16 318 #endif
Chris@16 319
Chris@16 320 template <typename LexerDef, typename Expr>
Chris@16 321 inline lexer_def_<LexerDef>&
Chris@16 322 operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr)
Chris@16 323 {
Chris@16 324 // Report invalid expression error as early as possible.
Chris@16 325 // If you got an error_invalid_expression error message here,
Chris@16 326 // then the expression (expr) is not a valid spirit lex
Chris@16 327 // expression.
Chris@16 328 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
Chris@16 329
Chris@16 330 lexdef.define(xpr);
Chris@16 331 return lexdef;
Chris@16 332 }
Chris@16 333 }
Chris@16 334
Chris@16 335 ///////////////////////////////////////////////////////////////////////////
Chris@16 336 // The match_flags flags are used to influence different matching
Chris@16 337 // modes of the lexer
Chris@16 338 struct match_flags
Chris@16 339 {
Chris@16 340 enum enum_type
Chris@16 341 {
Chris@16 342 match_default = 0, // no flags
Chris@16 343 match_not_dot_newline = 1, // the regex '.' doesn't match newlines
Chris@16 344 match_icase = 2 // all matching operations are case insensitive
Chris@16 345 };
Chris@16 346 };
Chris@16 347
Chris@16 348 ///////////////////////////////////////////////////////////////////////////
Chris@16 349 // This represents a lexer object
Chris@16 350 ///////////////////////////////////////////////////////////////////////////
Chris@16 351
Chris@16 352 ///////////////////////////////////////////////////////////////////////////
Chris@16 353 // This is the first token id automatically assigned by the library
Chris@16 354 // if needed
Chris@16 355 enum tokenids
Chris@16 356 {
Chris@16 357 min_token_id = 0x10000
Chris@16 358 };
Chris@16 359
Chris@16 360 template <typename Lexer>
Chris@16 361 class lexer : public Lexer
Chris@16 362 {
Chris@16 363 private:
Chris@16 364 // avoid warnings about using 'this' in constructor
Chris@16 365 lexer& this_() { return *this; }
Chris@16 366
Chris@16 367 std::size_t next_token_id; // has to be an integral type
Chris@16 368
Chris@16 369 public:
Chris@16 370 typedef Lexer lexer_type;
Chris@16 371 typedef typename Lexer::id_type id_type;
Chris@16 372 typedef typename Lexer::char_type char_type;
Chris@16 373 typedef typename Lexer::iterator_type iterator_type;
Chris@16 374 typedef lexer base_type;
Chris@16 375
Chris@16 376 typedef detail::lexer_def_<lexer> lexer_def;
Chris@16 377 typedef std::basic_string<char_type> string_type;
Chris@16 378
Chris@16 379 lexer(unsigned int flags = match_flags::match_default
Chris@16 380 , id_type first_id = id_type(min_token_id))
Chris@16 381 : lexer_type(flags)
Chris@16 382 , next_token_id(first_id)
Chris@16 383 , self(this_(), lexer_type::initial_state())
Chris@16 384 {}
Chris@16 385
Chris@16 386 // access iterator interface
Chris@16 387 template <typename Iterator>
Chris@16 388 iterator_type begin(Iterator& first, Iterator const& last
Chris@16 389 , char_type const* initial_state = 0) const
Chris@16 390 { return this->lexer_type::begin(first, last, initial_state); }
Chris@16 391 iterator_type end() const
Chris@16 392 { return this->lexer_type::end(); }
Chris@16 393
Chris@16 394 std::size_t map_state(char_type const* state)
Chris@16 395 { return this->lexer_type::add_state(state); }
Chris@16 396
Chris@16 397 // create a unique token id
Chris@16 398 id_type get_next_id() { return id_type(next_token_id++); }
Chris@16 399
Chris@16 400 lexer_def self; // allow for easy token definition
Chris@16 401 };
Chris@16 402
Chris@16 403 }}}
Chris@16 404
Chris@16 405 #endif