Mercurial > hg > vamp-build-and-test
comparison DEPENDENCIES/generic/include/boost/spirit/home/lex/lexer/lexertl/lexer.hpp @ 16:2665513ce2d3
Add boost headers
author | Chris Cannam |
---|---|
date | Tue, 05 Aug 2014 11:11:38 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
15:663ca0da4350 | 16:2665513ce2d3 |
---|---|
1 // Copyright (c) 2001-2011 Hartmut Kaiser | |
2 // | |
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | |
6 #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM) | |
7 #define BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM | |
8 | |
9 #if defined(_MSC_VER) | |
10 #pragma once | |
11 #endif | |
12 | |
13 #include <iosfwd> | |
14 | |
15 #include <boost/spirit/home/support/detail/lexer/generator.hpp> | |
16 #include <boost/spirit/home/support/detail/lexer/rules.hpp> | |
17 #include <boost/spirit/home/support/detail/lexer/consts.hpp> | |
18 #include <boost/spirit/home/support/unused.hpp> | |
19 | |
20 #include <boost/spirit/home/lex/lexer/lexertl/token.hpp> | |
21 #include <boost/spirit/home/lex/lexer/lexertl/functor.hpp> | |
22 #include <boost/spirit/home/lex/lexer/lexertl/functor_data.hpp> | |
23 #include <boost/spirit/home/lex/lexer/lexertl/iterator.hpp> | |
24 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) | |
25 #include <boost/spirit/home/support/detail/lexer/debug.hpp> | |
26 #endif | |
27 | |
28 #include <boost/foreach.hpp> | |
29 | |
30 namespace boost { namespace spirit { namespace lex { namespace lexertl | |
31 { | |
32 /////////////////////////////////////////////////////////////////////////// | |
33 namespace detail | |
34 { | |
35 /////////////////////////////////////////////////////////////////////// | |
36 // The must_escape function checks if the given character value needs | |
37 // to be preceded by a backslash character to disable its special | |
38 // meaning in the context of a regular expression | |
39 /////////////////////////////////////////////////////////////////////// | |
40 template <typename Char> | |
41 inline bool must_escape(Char c) | |
42 { | |
43 // FIXME: more needed? | |
44 switch (c) { | |
45 case '+': case '/': case '*': case '?': | |
46 case '|': | |
47 case '(': case ')': | |
48 case '[': case ']': | |
49 case '{': case '}': | |
50 case '.': | |
51 case '^': case '$': | |
52 case '\\': | |
53 case '"': | |
54 return true; | |
55 | |
56 default: | |
57 break; | |
58 } | |
59 return false; | |
60 } | |
61 | |
62 /////////////////////////////////////////////////////////////////////// | |
63 // The escape function returns the string representation of the given | |
64 // character value, possibly escaped with a backslash character, to | |
65 // allow it being safely used in a regular expression definition. | |
66 /////////////////////////////////////////////////////////////////////// | |
67 template <typename Char> | |
68 inline std::basic_string<Char> escape(Char ch) | |
69 { | |
70 std::basic_string<Char> result(1, ch); | |
71 if (detail::must_escape(ch)) | |
72 { | |
73 typedef typename std::basic_string<Char>::size_type size_type; | |
74 result.insert((size_type)0, 1, '\\'); | |
75 } | |
76 return result; | |
77 } | |
78 | |
79 /////////////////////////////////////////////////////////////////////// | |
80 // | |
81 /////////////////////////////////////////////////////////////////////// | |
82 inline boost::lexer::regex_flags map_flags(unsigned int flags) | |
83 { | |
84 unsigned int retval = boost::lexer::none; | |
85 if (flags & match_flags::match_not_dot_newline) | |
86 retval |= boost::lexer::dot_not_newline; | |
87 if (flags & match_flags::match_icase) | |
88 retval |= boost::lexer::icase; | |
89 | |
90 return boost::lexer::regex_flags(retval); | |
91 } | |
92 } | |
93 | |
94 /////////////////////////////////////////////////////////////////////////// | |
95 template <typename Lexer, typename F> | |
96 bool generate_static(Lexer const& | |
97 , std::basic_ostream<typename Lexer::char_type>& | |
98 , typename Lexer::char_type const*, F); | |
99 | |
100 /////////////////////////////////////////////////////////////////////////// | |
101 // | |
102 // Every lexer type to be used as a lexer for Spirit has to conform to | |
103 // the following public interface: | |
104 // | |
105 // typedefs: | |
106 // iterator_type The type of the iterator exposed by this lexer. | |
107 // token_type The type of the tokens returned from the exposed | |
108 // iterators. | |
109 // | |
110 // functions: | |
111 // default constructor | |
112 // Since lexers are instantiated as base classes | |
113 // only it might be a good idea to make this | |
114 // constructor protected. | |
115 // begin, end Return a pair of iterators, when dereferenced | |
116 // returning the sequence of tokens recognized in | |
117 // the input stream given as the parameters to the | |
118 // begin() function. | |
119 // add_token Should add the definition of a token to be | |
120 // recognized by this lexer. | |
121 // clear Should delete all current token definitions | |
122 // associated with the given state of this lexer | |
123 // object. | |
124 // | |
125 // template parameters: | |
126 // Iterator The type of the iterator used to access the | |
127 // underlying character stream. | |
128 // Token The type of the tokens to be returned from the | |
129 // exposed token iterator. | |
130 // Functor The type of the InputPolicy to use to instantiate | |
131 // the multi_pass iterator type to be used as the | |
132 // token iterator (returned from begin()/end()). | |
133 // | |
134 /////////////////////////////////////////////////////////////////////////// | |
135 | |
136 /////////////////////////////////////////////////////////////////////////// | |
137 // | |
138 // The lexer class is a implementation of a Spirit.Lex lexer on | |
139 // top of Ben Hanson's lexertl library as outlined above (For more | |
140 // information about lexertl go here: http://www.benhanson.net/lexertl.html). | |
141 // | |
142 // This class is supposed to be used as the first and only template | |
143 // parameter while instantiating instances of a lex::lexer class. | |
144 // | |
145 /////////////////////////////////////////////////////////////////////////// | |
146 template <typename Token = token<> | |
147 , typename Iterator = typename Token::iterator_type | |
148 , typename Functor = functor<Token, lexertl::detail::data, Iterator> > | |
149 class lexer | |
150 { | |
151 private: | |
152 struct dummy { void true_() {} }; | |
153 typedef void (dummy::*safe_bool)(); | |
154 | |
155 static std::size_t const all_states_id = static_cast<std::size_t>(-2); | |
156 | |
157 public: | |
158 operator safe_bool() const | |
159 { return initialized_dfa_ ? &dummy::true_ : 0; } | |
160 | |
161 typedef typename boost::detail::iterator_traits<Iterator>::value_type | |
162 char_type; | |
163 typedef std::basic_string<char_type> string_type; | |
164 | |
165 typedef boost::lexer::basic_rules<char_type> basic_rules_type; | |
166 | |
167 // Every lexer type to be used as a lexer for Spirit has to conform to | |
168 // a public interface . | |
169 typedef Token token_type; | |
170 typedef typename Token::id_type id_type; | |
171 typedef iterator<Functor> iterator_type; | |
172 | |
173 private: | |
174 // this type is purely used for the iterator_type construction below | |
175 struct iterator_data_type | |
176 { | |
177 typedef typename Functor::semantic_actions_type semantic_actions_type; | |
178 | |
179 iterator_data_type( | |
180 boost::lexer::basic_state_machine<char_type> const& sm | |
181 , boost::lexer::basic_rules<char_type> const& rules | |
182 , semantic_actions_type const& actions) | |
183 : state_machine_(sm), rules_(rules), actions_(actions) | |
184 {} | |
185 | |
186 boost::lexer::basic_state_machine<char_type> const& state_machine_; | |
187 boost::lexer::basic_rules<char_type> const& rules_; | |
188 semantic_actions_type const& actions_; | |
189 | |
190 private: | |
191 // silence MSVC warning C4512: assignment operator could not be generated | |
192 iterator_data_type& operator= (iterator_data_type const&); | |
193 }; | |
194 | |
195 public: | |
196 // Return the start iterator usable for iterating over the generated | |
197 // tokens. | |
198 iterator_type begin(Iterator& first, Iterator const& last | |
199 , char_type const* initial_state = 0) const | |
200 { | |
201 if (!init_dfa()) // never minimize DFA for dynamic lexers | |
202 return iterator_type(); | |
203 | |
204 iterator_data_type iterator_data(state_machine_, rules_, actions_); | |
205 return iterator_type(iterator_data, first, last, initial_state); | |
206 } | |
207 | |
208 // Return the end iterator usable to stop iterating over the generated | |
209 // tokens. | |
210 iterator_type end() const | |
211 { | |
212 return iterator_type(); | |
213 } | |
214 | |
215 protected: | |
216 // Lexer instances can be created by means of a derived class only. | |
217 lexer(unsigned int flags) | |
218 : flags_(detail::map_flags(flags)) | |
219 , rules_(flags_) | |
220 , initialized_dfa_(false) | |
221 {} | |
222 | |
223 public: | |
224 // interface for token definition management | |
225 std::size_t add_token(char_type const* state, char_type tokendef, | |
226 std::size_t token_id, char_type const* targetstate) | |
227 { | |
228 add_state(state); | |
229 initialized_dfa_ = false; | |
230 if (state == all_states()) | |
231 return rules_.add(state, detail::escape(tokendef), token_id, rules_.dot()); | |
232 | |
233 if (0 == targetstate) | |
234 targetstate = state; | |
235 else | |
236 add_state(targetstate); | |
237 return rules_.add(state, detail::escape(tokendef), token_id, targetstate); | |
238 } | |
239 std::size_t add_token(char_type const* state, string_type const& tokendef, | |
240 std::size_t token_id, char_type const* targetstate) | |
241 { | |
242 add_state(state); | |
243 initialized_dfa_ = false; | |
244 if (state == all_states()) | |
245 return rules_.add(state, tokendef, token_id, rules_.dot()); | |
246 | |
247 if (0 == targetstate) | |
248 targetstate = state; | |
249 else | |
250 add_state(targetstate); | |
251 return rules_.add(state, tokendef, token_id, targetstate); | |
252 } | |
253 | |
254 // interface for pattern definition management | |
255 void add_pattern (char_type const* state, string_type const& name, | |
256 string_type const& patterndef) | |
257 { | |
258 add_state(state); | |
259 rules_.add_macro(name.c_str(), patterndef); | |
260 initialized_dfa_ = false; | |
261 } | |
262 | |
263 boost::lexer::rules const& get_rules() const { return rules_; } | |
264 | |
265 void clear(char_type const* state) | |
266 { | |
267 std::size_t s = rules_.state(state); | |
268 if (boost::lexer::npos != s) | |
269 rules_.clear(state); | |
270 initialized_dfa_ = false; | |
271 } | |
272 std::size_t add_state(char_type const* state) | |
273 { | |
274 if (state == all_states()) | |
275 return all_states_id; | |
276 | |
277 std::size_t stateid = rules_.state(state); | |
278 if (boost::lexer::npos == stateid) { | |
279 stateid = rules_.add_state(state); | |
280 initialized_dfa_ = false; | |
281 } | |
282 return stateid; | |
283 } | |
284 string_type initial_state() const | |
285 { | |
286 return string_type(rules_.initial()); | |
287 } | |
288 string_type all_states() const | |
289 { | |
290 return string_type(rules_.all_states()); | |
291 } | |
292 | |
293 // Register a semantic action with the given id | |
294 template <typename F> | |
295 void add_action(std::size_t unique_id, std::size_t state, F act) | |
296 { | |
297 // If you see an error here stating add_action is not a member of | |
298 // fusion::unused_type then you are probably having semantic actions | |
299 // attached to at least one token in the lexer definition without | |
300 // using the lex::lexertl::actor_lexer<> as its base class. | |
301 typedef typename Functor::wrap_action_type wrapper_type; | |
302 if (state == all_states_id) { | |
303 // add the action to all known states | |
304 typedef typename | |
305 basic_rules_type::string_size_t_map::value_type | |
306 state_type; | |
307 | |
308 std::size_t states = rules_.statemap().size(); | |
309 BOOST_FOREACH(state_type const& s, rules_.statemap()) { | |
310 for (std::size_t j = 0; j < states; ++j) | |
311 actions_.add_action(unique_id + j, s.second, wrapper_type::call(act)); | |
312 } | |
313 } | |
314 else { | |
315 actions_.add_action(unique_id, state, wrapper_type::call(act)); | |
316 } | |
317 } | |
318 // template <typename F> | |
319 // void add_action(std::size_t unique_id, char_type const* state, F act) | |
320 // { | |
321 // typedef typename Functor::wrap_action_type wrapper_type; | |
322 // actions_.add_action(unique_id, add_state(state), wrapper_type::call(act)); | |
323 // } | |
324 | |
325 // We do not minimize the state machine by default anymore because | |
326 // Ben said: "If you can afford to generate a lexer at runtime, there | |
327 // is little point in calling minimise." | |
328 // Go figure. | |
329 bool init_dfa(bool minimize = false) const | |
330 { | |
331 if (!initialized_dfa_) { | |
332 state_machine_.clear(); | |
333 typedef boost::lexer::basic_generator<char_type> generator; | |
334 generator::build (rules_, state_machine_); | |
335 if (minimize) | |
336 generator::minimise (state_machine_); | |
337 | |
338 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) | |
339 boost::lexer::debug::dump(state_machine_, std::cerr); | |
340 #endif | |
341 initialized_dfa_ = true; | |
342 | |
343 // // release memory held by rules description | |
344 // basic_rules_type rules; | |
345 // rules.init_state_info(rules_); // preserve states | |
346 // std::swap(rules, rules_); | |
347 } | |
348 return true; | |
349 } | |
350 | |
351 private: | |
352 // lexertl specific data | |
353 mutable boost::lexer::basic_state_machine<char_type> state_machine_; | |
354 boost::lexer::regex_flags flags_; | |
355 /*mutable*/ basic_rules_type rules_; | |
356 | |
357 typename Functor::semantic_actions_type actions_; | |
358 mutable bool initialized_dfa_; | |
359 | |
360 // generator functions must be able to access members directly | |
361 template <typename Lexer, typename F> | |
362 friend bool generate_static(Lexer const& | |
363 , std::basic_ostream<typename Lexer::char_type>& | |
364 , typename Lexer::char_type const*, F); | |
365 }; | |
366 | |
367 /////////////////////////////////////////////////////////////////////////// | |
368 // | |
369 // The actor_lexer class is another implementation of a Spirit.Lex | |
370 // lexer on top of Ben Hanson's lexertl library as outlined above (For | |
371 // more information about lexertl go here: | |
372 // http://www.benhanson.net/lexertl.html). | |
373 // | |
374 // The only difference to the lexer class above is that token_def | |
375 // definitions may have semantic (lexer) actions attached while being | |
376 // defined: | |
377 // | |
378 // int w; | |
379 // token_def word = "[^ \t\n]+"; | |
380 // self = word[++ref(w)]; // see example: word_count_lexer | |
381 // | |
382 // This class is supposed to be used as the first and only template | |
383 // parameter while instantiating instances of a lex::lexer class. | |
384 // | |
385 /////////////////////////////////////////////////////////////////////////// | |
386 template <typename Token = token<> | |
387 , typename Iterator = typename Token::iterator_type | |
388 , typename Functor = functor<Token, lexertl::detail::data, Iterator, mpl::true_> > | |
389 class actor_lexer : public lexer<Token, Iterator, Functor> | |
390 { | |
391 protected: | |
392 // Lexer instances can be created by means of a derived class only. | |
393 actor_lexer(unsigned int flags) | |
394 : lexer<Token, Iterator, Functor>(flags) {} | |
395 }; | |
396 | |
397 }}}} | |
398 | |
399 #endif |