annotate DEPENDENCIES/generic/include/boost/spirit/home/lex/lexer/lexertl/lexer.hpp @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)
author Chris Cannam
date Tue, 30 Jul 2019 12:25:44 +0100
parents 2665513ce2d3
children
rev   line source
Chris@16 1 // Copyright (c) 2001-2011 Hartmut Kaiser
Chris@16 2 //
Chris@16 3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
Chris@16 4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16 5
Chris@16 6 #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM)
Chris@16 7 #define BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM
Chris@16 8
Chris@16 9 #if defined(_MSC_VER)
Chris@16 10 #pragma once
Chris@16 11 #endif
Chris@16 12
Chris@16 13 #include <iosfwd>
Chris@16 14
Chris@16 15 #include <boost/spirit/home/support/detail/lexer/generator.hpp>
Chris@16 16 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
Chris@16 17 #include <boost/spirit/home/support/detail/lexer/consts.hpp>
Chris@16 18 #include <boost/spirit/home/support/unused.hpp>
Chris@16 19
Chris@16 20 #include <boost/spirit/home/lex/lexer/lexertl/token.hpp>
Chris@16 21 #include <boost/spirit/home/lex/lexer/lexertl/functor.hpp>
Chris@16 22 #include <boost/spirit/home/lex/lexer/lexertl/functor_data.hpp>
Chris@16 23 #include <boost/spirit/home/lex/lexer/lexertl/iterator.hpp>
Chris@16 24 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
Chris@16 25 #include <boost/spirit/home/support/detail/lexer/debug.hpp>
Chris@16 26 #endif
Chris@16 27
Chris@16 28 #include <boost/foreach.hpp>
Chris@16 29
Chris@16 30 namespace boost { namespace spirit { namespace lex { namespace lexertl
Chris@16 31 {
Chris@16 32 ///////////////////////////////////////////////////////////////////////////
Chris@16 33 namespace detail
Chris@16 34 {
Chris@16 35 ///////////////////////////////////////////////////////////////////////
Chris@16 36 // The must_escape function checks if the given character value needs
Chris@16 37 // to be preceded by a backslash character to disable its special
Chris@16 38 // meaning in the context of a regular expression
Chris@16 39 ///////////////////////////////////////////////////////////////////////
Chris@16 40 template <typename Char>
Chris@16 41 inline bool must_escape(Char c)
Chris@16 42 {
Chris@16 43 // FIXME: more needed?
Chris@16 44 switch (c) {
Chris@16 45 case '+': case '/': case '*': case '?':
Chris@16 46 case '|':
Chris@16 47 case '(': case ')':
Chris@16 48 case '[': case ']':
Chris@16 49 case '{': case '}':
Chris@16 50 case '.':
Chris@16 51 case '^': case '$':
Chris@16 52 case '\\':
Chris@16 53 case '"':
Chris@16 54 return true;
Chris@16 55
Chris@16 56 default:
Chris@16 57 break;
Chris@16 58 }
Chris@16 59 return false;
Chris@16 60 }
Chris@16 61
Chris@16 62 ///////////////////////////////////////////////////////////////////////
Chris@16 63 // The escape function returns the string representation of the given
Chris@16 64 // character value, possibly escaped with a backslash character, to
Chris@16 65 // allow it being safely used in a regular expression definition.
Chris@16 66 ///////////////////////////////////////////////////////////////////////
Chris@16 67 template <typename Char>
Chris@16 68 inline std::basic_string<Char> escape(Char ch)
Chris@16 69 {
Chris@16 70 std::basic_string<Char> result(1, ch);
Chris@16 71 if (detail::must_escape(ch))
Chris@16 72 {
Chris@16 73 typedef typename std::basic_string<Char>::size_type size_type;
Chris@16 74 result.insert((size_type)0, 1, '\\');
Chris@16 75 }
Chris@16 76 return result;
Chris@16 77 }
Chris@16 78
Chris@16 79 ///////////////////////////////////////////////////////////////////////
Chris@16 80 //
Chris@16 81 ///////////////////////////////////////////////////////////////////////
Chris@16 82 inline boost::lexer::regex_flags map_flags(unsigned int flags)
Chris@16 83 {
Chris@16 84 unsigned int retval = boost::lexer::none;
Chris@16 85 if (flags & match_flags::match_not_dot_newline)
Chris@16 86 retval |= boost::lexer::dot_not_newline;
Chris@16 87 if (flags & match_flags::match_icase)
Chris@16 88 retval |= boost::lexer::icase;
Chris@16 89
Chris@16 90 return boost::lexer::regex_flags(retval);
Chris@16 91 }
Chris@16 92 }
Chris@16 93
Chris@16 94 ///////////////////////////////////////////////////////////////////////////
Chris@16 95 template <typename Lexer, typename F>
Chris@16 96 bool generate_static(Lexer const&
Chris@16 97 , std::basic_ostream<typename Lexer::char_type>&
Chris@16 98 , typename Lexer::char_type const*, F);
Chris@16 99
Chris@16 100 ///////////////////////////////////////////////////////////////////////////
Chris@16 101 //
Chris@16 102 // Every lexer type to be used as a lexer for Spirit has to conform to
Chris@16 103 // the following public interface:
Chris@16 104 //
Chris@16 105 // typedefs:
Chris@16 106 // iterator_type The type of the iterator exposed by this lexer.
Chris@16 107 // token_type The type of the tokens returned from the exposed
Chris@16 108 // iterators.
Chris@16 109 //
Chris@16 110 // functions:
Chris@16 111 // default constructor
Chris@16 112 // Since lexers are instantiated as base classes
Chris@16 113 // only it might be a good idea to make this
Chris@16 114 // constructor protected.
Chris@16 115 // begin, end Return a pair of iterators, when dereferenced
Chris@16 116 // returning the sequence of tokens recognized in
Chris@16 117 // the input stream given as the parameters to the
Chris@16 118 // begin() function.
Chris@16 119 // add_token Should add the definition of a token to be
Chris@16 120 // recognized by this lexer.
Chris@16 121 // clear Should delete all current token definitions
Chris@16 122 // associated with the given state of this lexer
Chris@16 123 // object.
Chris@16 124 //
Chris@16 125 // template parameters:
Chris@16 126 // Iterator The type of the iterator used to access the
Chris@16 127 // underlying character stream.
Chris@16 128 // Token The type of the tokens to be returned from the
Chris@16 129 // exposed token iterator.
Chris@16 130 // Functor The type of the InputPolicy to use to instantiate
Chris@16 131 // the multi_pass iterator type to be used as the
Chris@16 132 // token iterator (returned from begin()/end()).
Chris@16 133 //
Chris@16 134 ///////////////////////////////////////////////////////////////////////////
Chris@16 135
Chris@16 136 ///////////////////////////////////////////////////////////////////////////
Chris@16 137 //
Chris@16 138 // The lexer class is a implementation of a Spirit.Lex lexer on
Chris@16 139 // top of Ben Hanson's lexertl library as outlined above (For more
Chris@16 140 // information about lexertl go here: http://www.benhanson.net/lexertl.html).
Chris@16 141 //
Chris@16 142 // This class is supposed to be used as the first and only template
Chris@16 143 // parameter while instantiating instances of a lex::lexer class.
Chris@16 144 //
Chris@16 145 ///////////////////////////////////////////////////////////////////////////
Chris@16 146 template <typename Token = token<>
Chris@16 147 , typename Iterator = typename Token::iterator_type
Chris@16 148 , typename Functor = functor<Token, lexertl::detail::data, Iterator> >
Chris@16 149 class lexer
Chris@16 150 {
Chris@16 151 private:
Chris@16 152 struct dummy { void true_() {} };
Chris@16 153 typedef void (dummy::*safe_bool)();
Chris@16 154
Chris@16 155 static std::size_t const all_states_id = static_cast<std::size_t>(-2);
Chris@16 156
Chris@16 157 public:
Chris@16 158 operator safe_bool() const
Chris@16 159 { return initialized_dfa_ ? &dummy::true_ : 0; }
Chris@16 160
Chris@16 161 typedef typename boost::detail::iterator_traits<Iterator>::value_type
Chris@16 162 char_type;
Chris@16 163 typedef std::basic_string<char_type> string_type;
Chris@16 164
Chris@16 165 typedef boost::lexer::basic_rules<char_type> basic_rules_type;
Chris@16 166
Chris@16 167 // Every lexer type to be used as a lexer for Spirit has to conform to
Chris@16 168 // a public interface .
Chris@16 169 typedef Token token_type;
Chris@16 170 typedef typename Token::id_type id_type;
Chris@16 171 typedef iterator<Functor> iterator_type;
Chris@16 172
Chris@16 173 private:
Chris@16 174 // this type is purely used for the iterator_type construction below
Chris@16 175 struct iterator_data_type
Chris@16 176 {
Chris@16 177 typedef typename Functor::semantic_actions_type semantic_actions_type;
Chris@16 178
Chris@16 179 iterator_data_type(
Chris@16 180 boost::lexer::basic_state_machine<char_type> const& sm
Chris@16 181 , boost::lexer::basic_rules<char_type> const& rules
Chris@16 182 , semantic_actions_type const& actions)
Chris@16 183 : state_machine_(sm), rules_(rules), actions_(actions)
Chris@16 184 {}
Chris@16 185
Chris@16 186 boost::lexer::basic_state_machine<char_type> const& state_machine_;
Chris@16 187 boost::lexer::basic_rules<char_type> const& rules_;
Chris@16 188 semantic_actions_type const& actions_;
Chris@16 189
Chris@16 190 private:
Chris@16 191 // silence MSVC warning C4512: assignment operator could not be generated
Chris@16 192 iterator_data_type& operator= (iterator_data_type const&);
Chris@16 193 };
Chris@16 194
Chris@16 195 public:
Chris@16 196 // Return the start iterator usable for iterating over the generated
Chris@16 197 // tokens.
Chris@16 198 iterator_type begin(Iterator& first, Iterator const& last
Chris@16 199 , char_type const* initial_state = 0) const
Chris@16 200 {
Chris@16 201 if (!init_dfa()) // never minimize DFA for dynamic lexers
Chris@16 202 return iterator_type();
Chris@16 203
Chris@16 204 iterator_data_type iterator_data(state_machine_, rules_, actions_);
Chris@16 205 return iterator_type(iterator_data, first, last, initial_state);
Chris@16 206 }
Chris@16 207
Chris@16 208 // Return the end iterator usable to stop iterating over the generated
Chris@16 209 // tokens.
Chris@16 210 iterator_type end() const
Chris@16 211 {
Chris@16 212 return iterator_type();
Chris@16 213 }
Chris@16 214
Chris@16 215 protected:
Chris@16 216 // Lexer instances can be created by means of a derived class only.
Chris@16 217 lexer(unsigned int flags)
Chris@16 218 : flags_(detail::map_flags(flags))
Chris@16 219 , rules_(flags_)
Chris@16 220 , initialized_dfa_(false)
Chris@16 221 {}
Chris@16 222
Chris@16 223 public:
Chris@16 224 // interface for token definition management
Chris@16 225 std::size_t add_token(char_type const* state, char_type tokendef,
Chris@16 226 std::size_t token_id, char_type const* targetstate)
Chris@16 227 {
Chris@16 228 add_state(state);
Chris@16 229 initialized_dfa_ = false;
Chris@16 230 if (state == all_states())
Chris@16 231 return rules_.add(state, detail::escape(tokendef), token_id, rules_.dot());
Chris@16 232
Chris@16 233 if (0 == targetstate)
Chris@16 234 targetstate = state;
Chris@16 235 else
Chris@16 236 add_state(targetstate);
Chris@16 237 return rules_.add(state, detail::escape(tokendef), token_id, targetstate);
Chris@16 238 }
Chris@16 239 std::size_t add_token(char_type const* state, string_type const& tokendef,
Chris@16 240 std::size_t token_id, char_type const* targetstate)
Chris@16 241 {
Chris@16 242 add_state(state);
Chris@16 243 initialized_dfa_ = false;
Chris@16 244 if (state == all_states())
Chris@16 245 return rules_.add(state, tokendef, token_id, rules_.dot());
Chris@16 246
Chris@16 247 if (0 == targetstate)
Chris@16 248 targetstate = state;
Chris@16 249 else
Chris@16 250 add_state(targetstate);
Chris@16 251 return rules_.add(state, tokendef, token_id, targetstate);
Chris@16 252 }
Chris@16 253
Chris@16 254 // interface for pattern definition management
Chris@16 255 void add_pattern (char_type const* state, string_type const& name,
Chris@16 256 string_type const& patterndef)
Chris@16 257 {
Chris@16 258 add_state(state);
Chris@16 259 rules_.add_macro(name.c_str(), patterndef);
Chris@16 260 initialized_dfa_ = false;
Chris@16 261 }
Chris@16 262
Chris@16 263 boost::lexer::rules const& get_rules() const { return rules_; }
Chris@16 264
Chris@16 265 void clear(char_type const* state)
Chris@16 266 {
Chris@16 267 std::size_t s = rules_.state(state);
Chris@16 268 if (boost::lexer::npos != s)
Chris@16 269 rules_.clear(state);
Chris@16 270 initialized_dfa_ = false;
Chris@16 271 }
Chris@16 272 std::size_t add_state(char_type const* state)
Chris@16 273 {
Chris@16 274 if (state == all_states())
Chris@16 275 return all_states_id;
Chris@16 276
Chris@16 277 std::size_t stateid = rules_.state(state);
Chris@16 278 if (boost::lexer::npos == stateid) {
Chris@16 279 stateid = rules_.add_state(state);
Chris@16 280 initialized_dfa_ = false;
Chris@16 281 }
Chris@16 282 return stateid;
Chris@16 283 }
Chris@16 284 string_type initial_state() const
Chris@16 285 {
Chris@16 286 return string_type(rules_.initial());
Chris@16 287 }
Chris@16 288 string_type all_states() const
Chris@16 289 {
Chris@16 290 return string_type(rules_.all_states());
Chris@16 291 }
Chris@16 292
Chris@16 293 // Register a semantic action with the given id
Chris@16 294 template <typename F>
Chris@16 295 void add_action(std::size_t unique_id, std::size_t state, F act)
Chris@16 296 {
Chris@16 297 // If you see an error here stating add_action is not a member of
Chris@16 298 // fusion::unused_type then you are probably having semantic actions
Chris@16 299 // attached to at least one token in the lexer definition without
Chris@16 300 // using the lex::lexertl::actor_lexer<> as its base class.
Chris@16 301 typedef typename Functor::wrap_action_type wrapper_type;
Chris@16 302 if (state == all_states_id) {
Chris@16 303 // add the action to all known states
Chris@16 304 typedef typename
Chris@16 305 basic_rules_type::string_size_t_map::value_type
Chris@16 306 state_type;
Chris@16 307
Chris@16 308 std::size_t states = rules_.statemap().size();
Chris@16 309 BOOST_FOREACH(state_type const& s, rules_.statemap()) {
Chris@16 310 for (std::size_t j = 0; j < states; ++j)
Chris@16 311 actions_.add_action(unique_id + j, s.second, wrapper_type::call(act));
Chris@16 312 }
Chris@16 313 }
Chris@16 314 else {
Chris@16 315 actions_.add_action(unique_id, state, wrapper_type::call(act));
Chris@16 316 }
Chris@16 317 }
Chris@16 318 // template <typename F>
Chris@16 319 // void add_action(std::size_t unique_id, char_type const* state, F act)
Chris@16 320 // {
Chris@16 321 // typedef typename Functor::wrap_action_type wrapper_type;
Chris@16 322 // actions_.add_action(unique_id, add_state(state), wrapper_type::call(act));
Chris@16 323 // }
Chris@16 324
Chris@16 325 // We do not minimize the state machine by default anymore because
Chris@16 326 // Ben said: "If you can afford to generate a lexer at runtime, there
Chris@16 327 // is little point in calling minimise."
Chris@16 328 // Go figure.
Chris@16 329 bool init_dfa(bool minimize = false) const
Chris@16 330 {
Chris@16 331 if (!initialized_dfa_) {
Chris@16 332 state_machine_.clear();
Chris@16 333 typedef boost::lexer::basic_generator<char_type> generator;
Chris@16 334 generator::build (rules_, state_machine_);
Chris@16 335 if (minimize)
Chris@16 336 generator::minimise (state_machine_);
Chris@16 337
Chris@16 338 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
Chris@16 339 boost::lexer::debug::dump(state_machine_, std::cerr);
Chris@16 340 #endif
Chris@16 341 initialized_dfa_ = true;
Chris@16 342
Chris@16 343 // // release memory held by rules description
Chris@16 344 // basic_rules_type rules;
Chris@16 345 // rules.init_state_info(rules_); // preserve states
Chris@16 346 // std::swap(rules, rules_);
Chris@16 347 }
Chris@16 348 return true;
Chris@16 349 }
Chris@16 350
Chris@16 351 private:
Chris@16 352 // lexertl specific data
Chris@16 353 mutable boost::lexer::basic_state_machine<char_type> state_machine_;
Chris@16 354 boost::lexer::regex_flags flags_;
Chris@16 355 /*mutable*/ basic_rules_type rules_;
Chris@16 356
Chris@16 357 typename Functor::semantic_actions_type actions_;
Chris@16 358 mutable bool initialized_dfa_;
Chris@16 359
Chris@16 360 // generator functions must be able to access members directly
Chris@16 361 template <typename Lexer, typename F>
Chris@16 362 friend bool generate_static(Lexer const&
Chris@16 363 , std::basic_ostream<typename Lexer::char_type>&
Chris@16 364 , typename Lexer::char_type const*, F);
Chris@16 365 };
Chris@16 366
Chris@16 367 ///////////////////////////////////////////////////////////////////////////
Chris@16 368 //
Chris@16 369 // The actor_lexer class is another implementation of a Spirit.Lex
Chris@16 370 // lexer on top of Ben Hanson's lexertl library as outlined above (For
Chris@16 371 // more information about lexertl go here:
Chris@16 372 // http://www.benhanson.net/lexertl.html).
Chris@16 373 //
Chris@16 374 // The only difference to the lexer class above is that token_def
Chris@16 375 // definitions may have semantic (lexer) actions attached while being
Chris@16 376 // defined:
Chris@16 377 //
Chris@16 378 // int w;
Chris@16 379 // token_def word = "[^ \t\n]+";
Chris@16 380 // self = word[++ref(w)]; // see example: word_count_lexer
Chris@16 381 //
Chris@16 382 // This class is supposed to be used as the first and only template
Chris@16 383 // parameter while instantiating instances of a lex::lexer class.
Chris@16 384 //
Chris@16 385 ///////////////////////////////////////////////////////////////////////////
Chris@16 386 template <typename Token = token<>
Chris@16 387 , typename Iterator = typename Token::iterator_type
Chris@16 388 , typename Functor = functor<Token, lexertl::detail::data, Iterator, mpl::true_> >
Chris@16 389 class actor_lexer : public lexer<Token, Iterator, Functor>
Chris@16 390 {
Chris@16 391 protected:
Chris@16 392 // Lexer instances can be created by means of a derived class only.
Chris@16 393 actor_lexer(unsigned int flags)
Chris@16 394 : lexer<Token, Iterator, Functor>(flags) {}
Chris@16 395 };
Chris@16 396
Chris@16 397 }}}}
Chris@16 398
Chris@16 399 #endif