Chris@16
|
1 // Copyright (c) 2001-2011 Hartmut Kaiser
|
Chris@16
|
2 //
|
Chris@16
|
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
|
Chris@16
|
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
5
|
Chris@16
|
6 #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM)
|
Chris@16
|
7 #define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM
|
Chris@16
|
8
|
Chris@16
|
9 #if defined(_MSC_VER)
|
Chris@16
|
10 #pragma once
|
Chris@16
|
11 #endif
|
Chris@16
|
12
|
Chris@16
|
13 #include <boost/spirit/home/support/info.hpp>
|
Chris@16
|
14 #include <boost/spirit/home/qi/skip_over.hpp>
|
Chris@16
|
15 #include <boost/spirit/home/qi/parser.hpp>
|
Chris@16
|
16 #include <boost/spirit/home/qi/detail/assign_to.hpp>
|
Chris@16
|
17 #include <boost/spirit/home/lex/reference.hpp>
|
Chris@16
|
18 #include <boost/spirit/home/lex/meta_compiler.hpp>
|
Chris@16
|
19 #include <boost/spirit/home/lex/lexer_type.hpp>
|
Chris@16
|
20 #include <boost/spirit/home/lex/lexer/token_def.hpp>
|
Chris@16
|
21 #include <boost/assert.hpp>
|
Chris@16
|
22 #include <boost/noncopyable.hpp>
|
Chris@16
|
23 #include <boost/detail/iterator.hpp>
|
Chris@16
|
24 #include <boost/fusion/include/vector.hpp>
|
Chris@16
|
25 #include <boost/mpl/assert.hpp>
|
Chris@16
|
26 #include <boost/range/iterator_range.hpp>
|
Chris@16
|
27 #include <string>
|
Chris@16
|
28
|
Chris@16
|
29 namespace boost { namespace spirit { namespace lex
|
Chris@16
|
30 {
|
Chris@16
|
31 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
32 namespace detail
|
Chris@16
|
33 {
|
Chris@16
|
34 ///////////////////////////////////////////////////////////////////////
|
Chris@16
|
35 template <typename LexerDef>
|
Chris@16
|
36 struct lexer_def_
|
Chris@16
|
37 : proto::extends<
|
Chris@16
|
38 typename proto::terminal<
|
Chris@16
|
39 lex::reference<lexer_def_<LexerDef> const>
|
Chris@16
|
40 >::type
|
Chris@16
|
41 , lexer_def_<LexerDef> >
|
Chris@16
|
42 , qi::parser<lexer_def_<LexerDef> >
|
Chris@16
|
43 , lex::lexer_type<lexer_def_<LexerDef> >
|
Chris@16
|
44 {
|
Chris@16
|
45 private:
|
Chris@16
|
46 // avoid warnings about using 'this' in constructor
|
Chris@16
|
47 lexer_def_& this_() { return *this; }
|
Chris@16
|
48
|
Chris@16
|
49 typedef typename LexerDef::char_type char_type;
|
Chris@16
|
50 typedef typename LexerDef::string_type string_type;
|
Chris@16
|
51 typedef typename LexerDef::id_type id_type;
|
Chris@16
|
52
|
Chris@16
|
53 typedef lex::reference<lexer_def_ const> reference_;
|
Chris@16
|
54 typedef typename proto::terminal<reference_>::type terminal_type;
|
Chris@16
|
55 typedef proto::extends<terminal_type, lexer_def_> proto_base_type;
|
Chris@16
|
56
|
Chris@16
|
57 reference_ alias() const
|
Chris@16
|
58 {
|
Chris@16
|
59 return reference_(*this);
|
Chris@16
|
60 }
|
Chris@16
|
61
|
Chris@16
|
62 public:
|
Chris@16
|
63 // Qi interface: metafunction calculating parser attribute type
|
Chris@16
|
64 template <typename Context, typename Iterator>
|
Chris@16
|
65 struct attribute
|
Chris@16
|
66 {
|
Chris@16
|
67 // the return value of a token set contains the matched token
|
Chris@16
|
68 // id, and the corresponding pair of iterators
|
Chris@16
|
69 typedef typename Iterator::base_iterator_type iterator_type;
|
Chris@16
|
70 typedef
|
Chris@16
|
71 fusion::vector2<id_type, iterator_range<iterator_type> >
|
Chris@16
|
72 type;
|
Chris@16
|
73 };
|
Chris@16
|
74
|
Chris@16
|
75 // Qi interface: parse functionality
|
Chris@16
|
76 template <typename Iterator, typename Context
|
Chris@16
|
77 , typename Skipper, typename Attribute>
|
Chris@16
|
78 bool parse(Iterator& first, Iterator const& last
|
Chris@16
|
79 , Context& /*context*/, Skipper const& skipper
|
Chris@16
|
80 , Attribute& attr) const
|
Chris@16
|
81 {
|
Chris@16
|
82 qi::skip_over(first, last, skipper); // always do a pre-skip
|
Chris@16
|
83
|
Chris@16
|
84 if (first != last) {
|
Chris@16
|
85 typedef typename
|
Chris@16
|
86 boost::detail::iterator_traits<Iterator>::value_type
|
Chris@16
|
87 token_type;
|
Chris@16
|
88
|
Chris@16
|
89 token_type const& t = *first;
|
Chris@16
|
90 if (token_is_valid(t) && t.state() == first.get_state()) {
|
Chris@16
|
91 // any of the token definitions matched
|
Chris@16
|
92 spirit::traits::assign_to(t, attr);
|
Chris@16
|
93 ++first;
|
Chris@16
|
94 return true;
|
Chris@16
|
95 }
|
Chris@16
|
96 }
|
Chris@16
|
97 return false;
|
Chris@16
|
98 }
|
Chris@16
|
99
|
Chris@16
|
100 // Qi interface: 'what' functionality
|
Chris@16
|
101 template <typename Context>
|
Chris@16
|
102 info what(Context& /*context*/) const
|
Chris@16
|
103 {
|
Chris@16
|
104 return info("lexer");
|
Chris@16
|
105 }
|
Chris@16
|
106
|
Chris@16
|
107 private:
|
Chris@16
|
108 // allow to use the lexer.self.add("regex1", id1)("regex2", id2);
|
Chris@16
|
109 // syntax
|
Chris@16
|
110 struct adder
|
Chris@16
|
111 {
|
Chris@16
|
112 adder(lexer_def_& def_)
|
Chris@16
|
113 : def(def_) {}
|
Chris@16
|
114
|
Chris@16
|
115 // Add a token definition based on a single character as given
|
Chris@16
|
116 // by the first parameter, the second parameter allows to
|
Chris@16
|
117 // specify the token id to use for the new token. If no token
|
Chris@16
|
118 // id is given the character code is used.
|
Chris@16
|
119 adder const& operator()(char_type c
|
Chris@16
|
120 , id_type token_id = id_type()) const
|
Chris@16
|
121 {
|
Chris@16
|
122 if (id_type() == token_id)
|
Chris@16
|
123 token_id = static_cast<id_type>(c);
|
Chris@16
|
124 def.def.add_token (def.state.c_str(), c, token_id
|
Chris@16
|
125 , def.targetstate.empty() ? 0 : def.targetstate.c_str());
|
Chris@16
|
126 return *this;
|
Chris@16
|
127 }
|
Chris@16
|
128
|
Chris@16
|
129 // Add a token definition based on a character sequence as
|
Chris@16
|
130 // given by the first parameter, the second parameter allows to
|
Chris@16
|
131 // specify the token id to use for the new token. If no token
|
Chris@16
|
132 // id is given this function will generate a unique id to be
|
Chris@16
|
133 // used as the token's id.
|
Chris@16
|
134 adder const& operator()(string_type const& s
|
Chris@16
|
135 , id_type token_id = id_type()) const
|
Chris@16
|
136 {
|
Chris@16
|
137 if (id_type() == token_id)
|
Chris@16
|
138 token_id = def.def.get_next_id();
|
Chris@16
|
139 def.def.add_token (def.state.c_str(), s, token_id
|
Chris@16
|
140 , def.targetstate.empty() ? 0 : def.targetstate.c_str());
|
Chris@16
|
141 return *this;
|
Chris@16
|
142 }
|
Chris@16
|
143
|
Chris@16
|
144 template <typename Attribute>
|
Chris@16
|
145 adder const& operator()(
|
Chris@16
|
146 token_def<Attribute, char_type, id_type>& tokdef
|
Chris@16
|
147 , id_type token_id = id_type()) const
|
Chris@16
|
148 {
|
Chris@16
|
149 // make sure we have a token id
|
Chris@16
|
150 if (id_type() == token_id) {
|
Chris@16
|
151 if (id_type() == tokdef.id()) {
|
Chris@16
|
152 token_id = def.def.get_next_id();
|
Chris@16
|
153 tokdef.id(token_id);
|
Chris@16
|
154 }
|
Chris@16
|
155 else {
|
Chris@16
|
156 token_id = tokdef.id();
|
Chris@16
|
157 }
|
Chris@16
|
158 }
|
Chris@16
|
159 else {
|
Chris@16
|
160 // the following assertion makes sure that the token_def
|
Chris@16
|
161 // instance has not been assigned a different id earlier
|
Chris@16
|
162 BOOST_ASSERT(id_type() == tokdef.id()
|
Chris@16
|
163 || token_id == tokdef.id());
|
Chris@16
|
164 tokdef.id(token_id);
|
Chris@16
|
165 }
|
Chris@16
|
166
|
Chris@16
|
167 def.define(tokdef);
|
Chris@16
|
168 return *this;
|
Chris@16
|
169 }
|
Chris@16
|
170
|
Chris@16
|
171 // template <typename F>
|
Chris@16
|
172 // adder const& operator()(char_type c, id_type token_id, F act) const
|
Chris@16
|
173 // {
|
Chris@16
|
174 // if (id_type() == token_id)
|
Chris@16
|
175 // token_id = def.def.get_next_id();
|
Chris@16
|
176 // std::size_t unique_id =
|
Chris@16
|
177 // def.def.add_token (def.state.c_str(), s, token_id);
|
Chris@16
|
178 // def.def.add_action(unique_id, def.state.c_str(), act);
|
Chris@16
|
179 // return *this;
|
Chris@16
|
180 // }
|
Chris@16
|
181
|
Chris@16
|
182 lexer_def_& def;
|
Chris@16
|
183
|
Chris@16
|
184 private:
|
Chris@16
|
185 // silence MSVC warning C4512: assignment operator could not be generated
|
Chris@16
|
186 adder& operator= (adder const&);
|
Chris@16
|
187 };
|
Chris@16
|
188 friend struct adder;
|
Chris@16
|
189
|
Chris@16
|
190 // allow to use lexer.self.add_pattern("pattern1", "regex1")(...);
|
Chris@16
|
191 // syntax
|
Chris@16
|
192 struct pattern_adder
|
Chris@16
|
193 {
|
Chris@16
|
194 pattern_adder(lexer_def_& def_)
|
Chris@16
|
195 : def(def_) {}
|
Chris@16
|
196
|
Chris@16
|
197 pattern_adder const& operator()(string_type const& p
|
Chris@16
|
198 , string_type const& s) const
|
Chris@16
|
199 {
|
Chris@16
|
200 def.def.add_pattern (def.state.c_str(), p, s);
|
Chris@16
|
201 return *this;
|
Chris@16
|
202 }
|
Chris@16
|
203
|
Chris@16
|
204 lexer_def_& def;
|
Chris@16
|
205
|
Chris@16
|
206 private:
|
Chris@16
|
207 // silence MSVC warning C4512: assignment operator could not be generated
|
Chris@16
|
208 pattern_adder& operator= (pattern_adder const&);
|
Chris@16
|
209 };
|
Chris@16
|
210 friend struct pattern_adder;
|
Chris@16
|
211
|
Chris@16
|
212 private:
|
Chris@16
|
213 // Helper function to invoke the necessary 2 step compilation
|
Chris@16
|
214 // process on token definition expressions
|
Chris@16
|
215 template <typename TokenExpr>
|
Chris@16
|
216 void compile2pass(TokenExpr const& expr)
|
Chris@16
|
217 {
|
Chris@16
|
218 expr.collect(def, state, targetstate);
|
Chris@16
|
219 expr.add_actions(def);
|
Chris@16
|
220 }
|
Chris@16
|
221
|
Chris@16
|
222 public:
|
Chris@16
|
223 ///////////////////////////////////////////////////////////////////
|
Chris@16
|
224 template <typename Expr>
|
Chris@16
|
225 void define(Expr const& expr)
|
Chris@16
|
226 {
|
Chris@16
|
227 compile2pass(compile<lex::domain>(expr));
|
Chris@16
|
228 }
|
Chris@16
|
229
|
Chris@16
|
230 lexer_def_(LexerDef& def_, string_type const& state_
|
Chris@16
|
231 , string_type const& targetstate_ = string_type())
|
Chris@16
|
232 : proto_base_type(terminal_type::make(alias()))
|
Chris@16
|
233 , add(this_()), add_pattern(this_()), def(def_)
|
Chris@16
|
234 , state(state_), targetstate(targetstate_)
|
Chris@16
|
235 {}
|
Chris@16
|
236
|
Chris@16
|
237 // allow to switch states
|
Chris@16
|
238 lexer_def_ operator()(char_type const* state) const
|
Chris@16
|
239 {
|
Chris@16
|
240 return lexer_def_(def, state);
|
Chris@16
|
241 }
|
Chris@16
|
242 lexer_def_ operator()(char_type const* state
|
Chris@16
|
243 , char_type const* targetstate) const
|
Chris@16
|
244 {
|
Chris@16
|
245 return lexer_def_(def, state, targetstate);
|
Chris@16
|
246 }
|
Chris@16
|
247 lexer_def_ operator()(string_type const& state
|
Chris@16
|
248 , string_type const& targetstate = string_type()) const
|
Chris@16
|
249 {
|
Chris@16
|
250 return lexer_def_(def, state, targetstate);
|
Chris@16
|
251 }
|
Chris@16
|
252
|
Chris@16
|
253 // allow to assign a token definition expression
|
Chris@16
|
254 template <typename Expr>
|
Chris@16
|
255 lexer_def_& operator= (Expr const& xpr)
|
Chris@16
|
256 {
|
Chris@16
|
257 // Report invalid expression error as early as possible.
|
Chris@16
|
258 // If you got an error_invalid_expression error message here,
|
Chris@16
|
259 // then the expression (expr) is not a valid spirit lex
|
Chris@16
|
260 // expression.
|
Chris@16
|
261 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
|
Chris@16
|
262
|
Chris@16
|
263 def.clear(state.c_str());
|
Chris@16
|
264 define(xpr);
|
Chris@16
|
265 return *this;
|
Chris@16
|
266 }
|
Chris@16
|
267
|
Chris@16
|
268 // explicitly tell the lexer that the given state will be defined
|
Chris@16
|
269 // (useful in conjunction with "*")
|
Chris@16
|
270 std::size_t add_state(char_type const* state = 0)
|
Chris@16
|
271 {
|
Chris@16
|
272 return def.add_state(state ? state : def.initial_state().c_str());
|
Chris@16
|
273 }
|
Chris@16
|
274
|
Chris@16
|
275 adder add;
|
Chris@16
|
276 pattern_adder add_pattern;
|
Chris@16
|
277
|
Chris@16
|
278 private:
|
Chris@16
|
279 LexerDef& def;
|
Chris@16
|
280 string_type state;
|
Chris@16
|
281 string_type targetstate;
|
Chris@16
|
282
|
Chris@16
|
283 private:
|
Chris@16
|
284 // silence MSVC warning C4512: assignment operator could not be generated
|
Chris@16
|
285 lexer_def_& operator= (lexer_def_ const&);
|
Chris@16
|
286 };
|
Chris@16
|
287
|
Chris@16
|
288 #if defined(BOOST_NO_CXX11_RVALUE_REFERENCES)
|
Chris@16
|
289 // allow to assign a token definition expression
|
Chris@16
|
290 template <typename LexerDef, typename Expr>
|
Chris@16
|
291 inline lexer_def_<LexerDef>&
|
Chris@16
|
292 operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr)
|
Chris@16
|
293 {
|
Chris@16
|
294 // Report invalid expression error as early as possible.
|
Chris@16
|
295 // If you got an error_invalid_expression error message here,
|
Chris@16
|
296 // then the expression (expr) is not a valid spirit lex
|
Chris@16
|
297 // expression.
|
Chris@16
|
298 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
|
Chris@16
|
299
|
Chris@16
|
300 lexdef.define(xpr);
|
Chris@16
|
301 return lexdef;
|
Chris@16
|
302 }
|
Chris@16
|
303 #else
|
Chris@16
|
304 // allow to assign a token definition expression
|
Chris@16
|
305 template <typename LexerDef, typename Expr>
|
Chris@16
|
306 inline lexer_def_<LexerDef>&
|
Chris@16
|
307 operator+= (lexer_def_<LexerDef>& lexdef, Expr&& xpr)
|
Chris@16
|
308 {
|
Chris@16
|
309 // Report invalid expression error as early as possible.
|
Chris@16
|
310 // If you got an error_invalid_expression error message here,
|
Chris@16
|
311 // then the expression (expr) is not a valid spirit lex
|
Chris@16
|
312 // expression.
|
Chris@16
|
313 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
|
Chris@16
|
314
|
Chris@16
|
315 lexdef.define(xpr);
|
Chris@16
|
316 return lexdef;
|
Chris@16
|
317 }
|
Chris@16
|
318 #endif
|
Chris@16
|
319
|
Chris@16
|
320 template <typename LexerDef, typename Expr>
|
Chris@16
|
321 inline lexer_def_<LexerDef>&
|
Chris@16
|
322 operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr)
|
Chris@16
|
323 {
|
Chris@16
|
324 // Report invalid expression error as early as possible.
|
Chris@16
|
325 // If you got an error_invalid_expression error message here,
|
Chris@16
|
326 // then the expression (expr) is not a valid spirit lex
|
Chris@16
|
327 // expression.
|
Chris@16
|
328 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
|
Chris@16
|
329
|
Chris@16
|
330 lexdef.define(xpr);
|
Chris@16
|
331 return lexdef;
|
Chris@16
|
332 }
|
Chris@16
|
333 }
|
Chris@16
|
334
|
Chris@16
|
335 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
336 // The match_flags flags are used to influence different matching
|
Chris@16
|
337 // modes of the lexer
|
Chris@16
|
338 struct match_flags
|
Chris@16
|
339 {
|
Chris@16
|
340 enum enum_type
|
Chris@16
|
341 {
|
Chris@16
|
342 match_default = 0, // no flags
|
Chris@16
|
343 match_not_dot_newline = 1, // the regex '.' doesn't match newlines
|
Chris@16
|
344 match_icase = 2 // all matching operations are case insensitive
|
Chris@16
|
345 };
|
Chris@16
|
346 };
|
Chris@16
|
347
|
Chris@16
|
348 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
349 // This represents a lexer object
|
Chris@16
|
350 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
351
|
Chris@16
|
352 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
353 // This is the first token id automatically assigned by the library
|
Chris@16
|
354 // if needed
|
Chris@16
|
355 enum tokenids
|
Chris@16
|
356 {
|
Chris@16
|
357 min_token_id = 0x10000
|
Chris@16
|
358 };
|
Chris@16
|
359
|
Chris@16
|
360 template <typename Lexer>
|
Chris@16
|
361 class lexer : public Lexer
|
Chris@16
|
362 {
|
Chris@16
|
363 private:
|
Chris@16
|
364 // avoid warnings about using 'this' in constructor
|
Chris@16
|
365 lexer& this_() { return *this; }
|
Chris@16
|
366
|
Chris@16
|
367 std::size_t next_token_id; // has to be an integral type
|
Chris@16
|
368
|
Chris@16
|
369 public:
|
Chris@16
|
370 typedef Lexer lexer_type;
|
Chris@16
|
371 typedef typename Lexer::id_type id_type;
|
Chris@16
|
372 typedef typename Lexer::char_type char_type;
|
Chris@16
|
373 typedef typename Lexer::iterator_type iterator_type;
|
Chris@16
|
374 typedef lexer base_type;
|
Chris@16
|
375
|
Chris@16
|
376 typedef detail::lexer_def_<lexer> lexer_def;
|
Chris@16
|
377 typedef std::basic_string<char_type> string_type;
|
Chris@16
|
378
|
Chris@16
|
379 lexer(unsigned int flags = match_flags::match_default
|
Chris@16
|
380 , id_type first_id = id_type(min_token_id))
|
Chris@16
|
381 : lexer_type(flags)
|
Chris@16
|
382 , next_token_id(first_id)
|
Chris@16
|
383 , self(this_(), lexer_type::initial_state())
|
Chris@16
|
384 {}
|
Chris@16
|
385
|
Chris@16
|
386 // access iterator interface
|
Chris@16
|
387 template <typename Iterator>
|
Chris@16
|
388 iterator_type begin(Iterator& first, Iterator const& last
|
Chris@16
|
389 , char_type const* initial_state = 0) const
|
Chris@16
|
390 { return this->lexer_type::begin(first, last, initial_state); }
|
Chris@16
|
391 iterator_type end() const
|
Chris@16
|
392 { return this->lexer_type::end(); }
|
Chris@16
|
393
|
Chris@16
|
394 std::size_t map_state(char_type const* state)
|
Chris@16
|
395 { return this->lexer_type::add_state(state); }
|
Chris@16
|
396
|
Chris@16
|
397 // create a unique token id
|
Chris@16
|
398 id_type get_next_id() { return id_type(next_token_id++); }
|
Chris@16
|
399
|
Chris@16
|
400 lexer_def self; // allow for easy token definition
|
Chris@16
|
401 };
|
Chris@16
|
402
|
Chris@16
|
403 }}}
|
Chris@16
|
404
|
Chris@16
|
405 #endif
|