Mercurial > hg > vamp-build-and-test
comparison DEPENDENCIES/generic/include/boost/spirit/home/lex/lexer/lexer.hpp @ 16:2665513ce2d3
Add boost headers
author | Chris Cannam |
---|---|
date | Tue, 05 Aug 2014 11:11:38 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
15:663ca0da4350 | 16:2665513ce2d3 |
---|---|
1 // Copyright (c) 2001-2011 Hartmut Kaiser | |
2 // | |
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | |
6 #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM) | |
7 #define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM | |
8 | |
9 #if defined(_MSC_VER) | |
10 #pragma once | |
11 #endif | |
12 | |
13 #include <boost/spirit/home/support/info.hpp> | |
14 #include <boost/spirit/home/qi/skip_over.hpp> | |
15 #include <boost/spirit/home/qi/parser.hpp> | |
16 #include <boost/spirit/home/qi/detail/assign_to.hpp> | |
17 #include <boost/spirit/home/lex/reference.hpp> | |
18 #include <boost/spirit/home/lex/meta_compiler.hpp> | |
19 #include <boost/spirit/home/lex/lexer_type.hpp> | |
20 #include <boost/spirit/home/lex/lexer/token_def.hpp> | |
21 #include <boost/assert.hpp> | |
22 #include <boost/noncopyable.hpp> | |
23 #include <boost/detail/iterator.hpp> | |
24 #include <boost/fusion/include/vector.hpp> | |
25 #include <boost/mpl/assert.hpp> | |
26 #include <boost/range/iterator_range.hpp> | |
27 #include <string> | |
28 | |
29 namespace boost { namespace spirit { namespace lex | |
30 { | |
31 /////////////////////////////////////////////////////////////////////////// | |
32 namespace detail | |
33 { | |
34 /////////////////////////////////////////////////////////////////////// | |
35 template <typename LexerDef> | |
36 struct lexer_def_ | |
37 : proto::extends< | |
38 typename proto::terminal< | |
39 lex::reference<lexer_def_<LexerDef> const> | |
40 >::type | |
41 , lexer_def_<LexerDef> > | |
42 , qi::parser<lexer_def_<LexerDef> > | |
43 , lex::lexer_type<lexer_def_<LexerDef> > | |
44 { | |
45 private: | |
46 // avoid warnings about using 'this' in constructor | |
47 lexer_def_& this_() { return *this; } | |
48 | |
49 typedef typename LexerDef::char_type char_type; | |
50 typedef typename LexerDef::string_type string_type; | |
51 typedef typename LexerDef::id_type id_type; | |
52 | |
53 typedef lex::reference<lexer_def_ const> reference_; | |
54 typedef typename proto::terminal<reference_>::type terminal_type; | |
55 typedef proto::extends<terminal_type, lexer_def_> proto_base_type; | |
56 | |
57 reference_ alias() const | |
58 { | |
59 return reference_(*this); | |
60 } | |
61 | |
62 public: | |
63 // Qi interface: metafunction calculating parser attribute type | |
64 template <typename Context, typename Iterator> | |
65 struct attribute | |
66 { | |
67 // the return value of a token set contains the matched token | |
68 // id, and the corresponding pair of iterators | |
69 typedef typename Iterator::base_iterator_type iterator_type; | |
70 typedef | |
71 fusion::vector2<id_type, iterator_range<iterator_type> > | |
72 type; | |
73 }; | |
74 | |
75 // Qi interface: parse functionality | |
76 template <typename Iterator, typename Context | |
77 , typename Skipper, typename Attribute> | |
78 bool parse(Iterator& first, Iterator const& last | |
79 , Context& /*context*/, Skipper const& skipper | |
80 , Attribute& attr) const | |
81 { | |
82 qi::skip_over(first, last, skipper); // always do a pre-skip | |
83 | |
84 if (first != last) { | |
85 typedef typename | |
86 boost::detail::iterator_traits<Iterator>::value_type | |
87 token_type; | |
88 | |
89 token_type const& t = *first; | |
90 if (token_is_valid(t) && t.state() == first.get_state()) { | |
91 // any of the token definitions matched | |
92 spirit::traits::assign_to(t, attr); | |
93 ++first; | |
94 return true; | |
95 } | |
96 } | |
97 return false; | |
98 } | |
99 | |
100 // Qi interface: 'what' functionality | |
101 template <typename Context> | |
102 info what(Context& /*context*/) const | |
103 { | |
104 return info("lexer"); | |
105 } | |
106 | |
107 private: | |
108 // allow to use the lexer.self.add("regex1", id1)("regex2", id2); | |
109 // syntax | |
110 struct adder | |
111 { | |
112 adder(lexer_def_& def_) | |
113 : def(def_) {} | |
114 | |
115 // Add a token definition based on a single character as given | |
116 // by the first parameter, the second parameter allows to | |
117 // specify the token id to use for the new token. If no token | |
118 // id is given the character code is used. | |
119 adder const& operator()(char_type c | |
120 , id_type token_id = id_type()) const | |
121 { | |
122 if (id_type() == token_id) | |
123 token_id = static_cast<id_type>(c); | |
124 def.def.add_token (def.state.c_str(), c, token_id | |
125 , def.targetstate.empty() ? 0 : def.targetstate.c_str()); | |
126 return *this; | |
127 } | |
128 | |
129 // Add a token definition based on a character sequence as | |
130 // given by the first parameter, the second parameter allows to | |
131 // specify the token id to use for the new token. If no token | |
132 // id is given this function will generate a unique id to be | |
133 // used as the token's id. | |
134 adder const& operator()(string_type const& s | |
135 , id_type token_id = id_type()) const | |
136 { | |
137 if (id_type() == token_id) | |
138 token_id = def.def.get_next_id(); | |
139 def.def.add_token (def.state.c_str(), s, token_id | |
140 , def.targetstate.empty() ? 0 : def.targetstate.c_str()); | |
141 return *this; | |
142 } | |
143 | |
144 template <typename Attribute> | |
145 adder const& operator()( | |
146 token_def<Attribute, char_type, id_type>& tokdef | |
147 , id_type token_id = id_type()) const | |
148 { | |
149 // make sure we have a token id | |
150 if (id_type() == token_id) { | |
151 if (id_type() == tokdef.id()) { | |
152 token_id = def.def.get_next_id(); | |
153 tokdef.id(token_id); | |
154 } | |
155 else { | |
156 token_id = tokdef.id(); | |
157 } | |
158 } | |
159 else { | |
160 // the following assertion makes sure that the token_def | |
161 // instance has not been assigned a different id earlier | |
162 BOOST_ASSERT(id_type() == tokdef.id() | |
163 || token_id == tokdef.id()); | |
164 tokdef.id(token_id); | |
165 } | |
166 | |
167 def.define(tokdef); | |
168 return *this; | |
169 } | |
170 | |
171 // template <typename F> | |
172 // adder const& operator()(char_type c, id_type token_id, F act) const | |
173 // { | |
174 // if (id_type() == token_id) | |
175 // token_id = def.def.get_next_id(); | |
176 // std::size_t unique_id = | |
177 // def.def.add_token (def.state.c_str(), s, token_id); | |
178 // def.def.add_action(unique_id, def.state.c_str(), act); | |
179 // return *this; | |
180 // } | |
181 | |
182 lexer_def_& def; | |
183 | |
184 private: | |
185 // silence MSVC warning C4512: assignment operator could not be generated | |
186 adder& operator= (adder const&); | |
187 }; | |
188 friend struct adder; | |
189 | |
190 // allow to use lexer.self.add_pattern("pattern1", "regex1")(...); | |
191 // syntax | |
192 struct pattern_adder | |
193 { | |
194 pattern_adder(lexer_def_& def_) | |
195 : def(def_) {} | |
196 | |
197 pattern_adder const& operator()(string_type const& p | |
198 , string_type const& s) const | |
199 { | |
200 def.def.add_pattern (def.state.c_str(), p, s); | |
201 return *this; | |
202 } | |
203 | |
204 lexer_def_& def; | |
205 | |
206 private: | |
207 // silence MSVC warning C4512: assignment operator could not be generated | |
208 pattern_adder& operator= (pattern_adder const&); | |
209 }; | |
210 friend struct pattern_adder; | |
211 | |
212 private: | |
213 // Helper function to invoke the necessary 2 step compilation | |
214 // process on token definition expressions | |
215 template <typename TokenExpr> | |
216 void compile2pass(TokenExpr const& expr) | |
217 { | |
218 expr.collect(def, state, targetstate); | |
219 expr.add_actions(def); | |
220 } | |
221 | |
222 public: | |
223 /////////////////////////////////////////////////////////////////// | |
224 template <typename Expr> | |
225 void define(Expr const& expr) | |
226 { | |
227 compile2pass(compile<lex::domain>(expr)); | |
228 } | |
229 | |
230 lexer_def_(LexerDef& def_, string_type const& state_ | |
231 , string_type const& targetstate_ = string_type()) | |
232 : proto_base_type(terminal_type::make(alias())) | |
233 , add(this_()), add_pattern(this_()), def(def_) | |
234 , state(state_), targetstate(targetstate_) | |
235 {} | |
236 | |
237 // allow to switch states | |
238 lexer_def_ operator()(char_type const* state) const | |
239 { | |
240 return lexer_def_(def, state); | |
241 } | |
242 lexer_def_ operator()(char_type const* state | |
243 , char_type const* targetstate) const | |
244 { | |
245 return lexer_def_(def, state, targetstate); | |
246 } | |
247 lexer_def_ operator()(string_type const& state | |
248 , string_type const& targetstate = string_type()) const | |
249 { | |
250 return lexer_def_(def, state, targetstate); | |
251 } | |
252 | |
253 // allow to assign a token definition expression | |
254 template <typename Expr> | |
255 lexer_def_& operator= (Expr const& xpr) | |
256 { | |
257 // Report invalid expression error as early as possible. | |
258 // If you got an error_invalid_expression error message here, | |
259 // then the expression (expr) is not a valid spirit lex | |
260 // expression. | |
261 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
262 | |
263 def.clear(state.c_str()); | |
264 define(xpr); | |
265 return *this; | |
266 } | |
267 | |
268 // explicitly tell the lexer that the given state will be defined | |
269 // (useful in conjunction with "*") | |
270 std::size_t add_state(char_type const* state = 0) | |
271 { | |
272 return def.add_state(state ? state : def.initial_state().c_str()); | |
273 } | |
274 | |
275 adder add; | |
276 pattern_adder add_pattern; | |
277 | |
278 private: | |
279 LexerDef& def; | |
280 string_type state; | |
281 string_type targetstate; | |
282 | |
283 private: | |
284 // silence MSVC warning C4512: assignment operator could not be generated | |
285 lexer_def_& operator= (lexer_def_ const&); | |
286 }; | |
287 | |
288 #if defined(BOOST_NO_CXX11_RVALUE_REFERENCES) | |
289 // allow to assign a token definition expression | |
290 template <typename LexerDef, typename Expr> | |
291 inline lexer_def_<LexerDef>& | |
292 operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr) | |
293 { | |
294 // Report invalid expression error as early as possible. | |
295 // If you got an error_invalid_expression error message here, | |
296 // then the expression (expr) is not a valid spirit lex | |
297 // expression. | |
298 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
299 | |
300 lexdef.define(xpr); | |
301 return lexdef; | |
302 } | |
303 #else | |
304 // allow to assign a token definition expression | |
305 template <typename LexerDef, typename Expr> | |
306 inline lexer_def_<LexerDef>& | |
307 operator+= (lexer_def_<LexerDef>& lexdef, Expr&& xpr) | |
308 { | |
309 // Report invalid expression error as early as possible. | |
310 // If you got an error_invalid_expression error message here, | |
311 // then the expression (expr) is not a valid spirit lex | |
312 // expression. | |
313 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
314 | |
315 lexdef.define(xpr); | |
316 return lexdef; | |
317 } | |
318 #endif | |
319 | |
320 template <typename LexerDef, typename Expr> | |
321 inline lexer_def_<LexerDef>& | |
322 operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr) | |
323 { | |
324 // Report invalid expression error as early as possible. | |
325 // If you got an error_invalid_expression error message here, | |
326 // then the expression (expr) is not a valid spirit lex | |
327 // expression. | |
328 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
329 | |
330 lexdef.define(xpr); | |
331 return lexdef; | |
332 } | |
333 } | |
334 | |
335 /////////////////////////////////////////////////////////////////////////// | |
336 // The match_flags flags are used to influence different matching | |
337 // modes of the lexer | |
338 struct match_flags | |
339 { | |
340 enum enum_type | |
341 { | |
342 match_default = 0, // no flags | |
343 match_not_dot_newline = 1, // the regex '.' doesn't match newlines | |
344 match_icase = 2 // all matching operations are case insensitive | |
345 }; | |
346 }; | |
347 | |
348 /////////////////////////////////////////////////////////////////////////// | |
349 // This represents a lexer object | |
350 /////////////////////////////////////////////////////////////////////////// | |
351 | |
352 /////////////////////////////////////////////////////////////////////////// | |
353 // This is the first token id automatically assigned by the library | |
354 // if needed | |
355 enum tokenids | |
356 { | |
357 min_token_id = 0x10000 | |
358 }; | |
359 | |
360 template <typename Lexer> | |
361 class lexer : public Lexer | |
362 { | |
363 private: | |
364 // avoid warnings about using 'this' in constructor | |
365 lexer& this_() { return *this; } | |
366 | |
367 std::size_t next_token_id; // has to be an integral type | |
368 | |
369 public: | |
370 typedef Lexer lexer_type; | |
371 typedef typename Lexer::id_type id_type; | |
372 typedef typename Lexer::char_type char_type; | |
373 typedef typename Lexer::iterator_type iterator_type; | |
374 typedef lexer base_type; | |
375 | |
376 typedef detail::lexer_def_<lexer> lexer_def; | |
377 typedef std::basic_string<char_type> string_type; | |
378 | |
379 lexer(unsigned int flags = match_flags::match_default | |
380 , id_type first_id = id_type(min_token_id)) | |
381 : lexer_type(flags) | |
382 , next_token_id(first_id) | |
383 , self(this_(), lexer_type::initial_state()) | |
384 {} | |
385 | |
386 // access iterator interface | |
387 template <typename Iterator> | |
388 iterator_type begin(Iterator& first, Iterator const& last | |
389 , char_type const* initial_state = 0) const | |
390 { return this->lexer_type::begin(first, last, initial_state); } | |
391 iterator_type end() const | |
392 { return this->lexer_type::end(); } | |
393 | |
394 std::size_t map_state(char_type const* state) | |
395 { return this->lexer_type::add_state(state); } | |
396 | |
397 // create a unique token id | |
398 id_type get_next_id() { return id_type(next_token_id++); } | |
399 | |
400 lexer_def self; // allow for easy token definition | |
401 }; | |
402 | |
403 }}} | |
404 | |
405 #endif |