Chris@16
|
1 // Copyright (c) 2001-2011 Hartmut Kaiser
|
Chris@16
|
2 //
|
Chris@16
|
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
|
Chris@16
|
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
5
|
Chris@16
|
6 #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM)
|
Chris@16
|
7 #define BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM
|
Chris@16
|
8
|
Chris@16
|
9 #if defined(_MSC_VER)
|
Chris@16
|
10 #pragma once
|
Chris@16
|
11 #endif
|
Chris@16
|
12
|
Chris@16
|
13 #include <iosfwd>
|
Chris@16
|
14
|
Chris@16
|
15 #include <boost/spirit/home/support/detail/lexer/generator.hpp>
|
Chris@16
|
16 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
|
Chris@16
|
17 #include <boost/spirit/home/support/detail/lexer/consts.hpp>
|
Chris@16
|
18 #include <boost/spirit/home/support/unused.hpp>
|
Chris@16
|
19
|
Chris@16
|
20 #include <boost/spirit/home/lex/lexer/lexertl/token.hpp>
|
Chris@16
|
21 #include <boost/spirit/home/lex/lexer/lexertl/functor.hpp>
|
Chris@16
|
22 #include <boost/spirit/home/lex/lexer/lexertl/functor_data.hpp>
|
Chris@16
|
23 #include <boost/spirit/home/lex/lexer/lexertl/iterator.hpp>
|
Chris@16
|
24 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
|
Chris@16
|
25 #include <boost/spirit/home/support/detail/lexer/debug.hpp>
|
Chris@16
|
26 #endif
|
Chris@16
|
27
|
Chris@16
|
28 #include <boost/foreach.hpp>
|
Chris@16
|
29
|
Chris@16
|
30 namespace boost { namespace spirit { namespace lex { namespace lexertl
|
Chris@16
|
31 {
|
Chris@16
|
32 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
33 namespace detail
|
Chris@16
|
34 {
|
Chris@16
|
35 ///////////////////////////////////////////////////////////////////////
|
Chris@16
|
36 // The must_escape function checks if the given character value needs
|
Chris@16
|
37 // to be preceded by a backslash character to disable its special
|
Chris@16
|
38 // meaning in the context of a regular expression
|
Chris@16
|
39 ///////////////////////////////////////////////////////////////////////
|
Chris@16
|
40 template <typename Char>
|
Chris@16
|
41 inline bool must_escape(Char c)
|
Chris@16
|
42 {
|
Chris@16
|
43 // FIXME: more needed?
|
Chris@16
|
44 switch (c) {
|
Chris@16
|
45 case '+': case '/': case '*': case '?':
|
Chris@16
|
46 case '|':
|
Chris@16
|
47 case '(': case ')':
|
Chris@16
|
48 case '[': case ']':
|
Chris@16
|
49 case '{': case '}':
|
Chris@16
|
50 case '.':
|
Chris@16
|
51 case '^': case '$':
|
Chris@16
|
52 case '\\':
|
Chris@16
|
53 case '"':
|
Chris@16
|
54 return true;
|
Chris@16
|
55
|
Chris@16
|
56 default:
|
Chris@16
|
57 break;
|
Chris@16
|
58 }
|
Chris@16
|
59 return false;
|
Chris@16
|
60 }
|
Chris@16
|
61
|
Chris@16
|
62 ///////////////////////////////////////////////////////////////////////
|
Chris@16
|
63 // The escape function returns the string representation of the given
|
Chris@16
|
64 // character value, possibly escaped with a backslash character, to
|
Chris@16
|
65 // allow it being safely used in a regular expression definition.
|
Chris@16
|
66 ///////////////////////////////////////////////////////////////////////
|
Chris@16
|
67 template <typename Char>
|
Chris@16
|
68 inline std::basic_string<Char> escape(Char ch)
|
Chris@16
|
69 {
|
Chris@16
|
70 std::basic_string<Char> result(1, ch);
|
Chris@16
|
71 if (detail::must_escape(ch))
|
Chris@16
|
72 {
|
Chris@16
|
73 typedef typename std::basic_string<Char>::size_type size_type;
|
Chris@16
|
74 result.insert((size_type)0, 1, '\\');
|
Chris@16
|
75 }
|
Chris@16
|
76 return result;
|
Chris@16
|
77 }
|
Chris@16
|
78
|
Chris@16
|
79 ///////////////////////////////////////////////////////////////////////
|
Chris@16
|
80 //
|
Chris@16
|
81 ///////////////////////////////////////////////////////////////////////
|
Chris@16
|
82 inline boost::lexer::regex_flags map_flags(unsigned int flags)
|
Chris@16
|
83 {
|
Chris@16
|
84 unsigned int retval = boost::lexer::none;
|
Chris@16
|
85 if (flags & match_flags::match_not_dot_newline)
|
Chris@16
|
86 retval |= boost::lexer::dot_not_newline;
|
Chris@16
|
87 if (flags & match_flags::match_icase)
|
Chris@16
|
88 retval |= boost::lexer::icase;
|
Chris@16
|
89
|
Chris@16
|
90 return boost::lexer::regex_flags(retval);
|
Chris@16
|
91 }
|
Chris@16
|
92 }
|
Chris@16
|
93
|
Chris@16
|
94 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
95 template <typename Lexer, typename F>
|
Chris@16
|
96 bool generate_static(Lexer const&
|
Chris@16
|
97 , std::basic_ostream<typename Lexer::char_type>&
|
Chris@16
|
98 , typename Lexer::char_type const*, F);
|
Chris@16
|
99
|
Chris@16
|
100 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
101 //
|
Chris@16
|
102 // Every lexer type to be used as a lexer for Spirit has to conform to
|
Chris@16
|
103 // the following public interface:
|
Chris@16
|
104 //
|
Chris@16
|
105 // typedefs:
|
Chris@16
|
106 // iterator_type The type of the iterator exposed by this lexer.
|
Chris@16
|
107 // token_type The type of the tokens returned from the exposed
|
Chris@16
|
108 // iterators.
|
Chris@16
|
109 //
|
Chris@16
|
110 // functions:
|
Chris@16
|
111 // default constructor
|
Chris@16
|
112 // Since lexers are instantiated as base classes
|
Chris@16
|
113 // only it might be a good idea to make this
|
Chris@16
|
114 // constructor protected.
|
Chris@16
|
115 // begin, end Return a pair of iterators, when dereferenced
|
Chris@16
|
116 // returning the sequence of tokens recognized in
|
Chris@16
|
117 // the input stream given as the parameters to the
|
Chris@16
|
118 // begin() function.
|
Chris@16
|
119 // add_token Should add the definition of a token to be
|
Chris@16
|
120 // recognized by this lexer.
|
Chris@16
|
121 // clear Should delete all current token definitions
|
Chris@16
|
122 // associated with the given state of this lexer
|
Chris@16
|
123 // object.
|
Chris@16
|
124 //
|
Chris@16
|
125 // template parameters:
|
Chris@16
|
126 // Iterator The type of the iterator used to access the
|
Chris@16
|
127 // underlying character stream.
|
Chris@16
|
128 // Token The type of the tokens to be returned from the
|
Chris@16
|
129 // exposed token iterator.
|
Chris@16
|
130 // Functor The type of the InputPolicy to use to instantiate
|
Chris@16
|
131 // the multi_pass iterator type to be used as the
|
Chris@16
|
132 // token iterator (returned from begin()/end()).
|
Chris@16
|
133 //
|
Chris@16
|
134 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
135
|
Chris@16
|
136 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
137 //
|
Chris@16
|
138 // The lexer class is a implementation of a Spirit.Lex lexer on
|
Chris@16
|
139 // top of Ben Hanson's lexertl library as outlined above (For more
|
Chris@16
|
140 // information about lexertl go here: http://www.benhanson.net/lexertl.html).
|
Chris@16
|
141 //
|
Chris@16
|
142 // This class is supposed to be used as the first and only template
|
Chris@16
|
143 // parameter while instantiating instances of a lex::lexer class.
|
Chris@16
|
144 //
|
Chris@16
|
145 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
146 template <typename Token = token<>
|
Chris@16
|
147 , typename Iterator = typename Token::iterator_type
|
Chris@16
|
148 , typename Functor = functor<Token, lexertl::detail::data, Iterator> >
|
Chris@16
|
149 class lexer
|
Chris@16
|
150 {
|
Chris@16
|
151 private:
|
Chris@16
|
152 struct dummy { void true_() {} };
|
Chris@16
|
153 typedef void (dummy::*safe_bool)();
|
Chris@16
|
154
|
Chris@16
|
155 static std::size_t const all_states_id = static_cast<std::size_t>(-2);
|
Chris@16
|
156
|
Chris@16
|
157 public:
|
Chris@16
|
158 operator safe_bool() const
|
Chris@16
|
159 { return initialized_dfa_ ? &dummy::true_ : 0; }
|
Chris@16
|
160
|
Chris@16
|
161 typedef typename boost::detail::iterator_traits<Iterator>::value_type
|
Chris@16
|
162 char_type;
|
Chris@16
|
163 typedef std::basic_string<char_type> string_type;
|
Chris@16
|
164
|
Chris@16
|
165 typedef boost::lexer::basic_rules<char_type> basic_rules_type;
|
Chris@16
|
166
|
Chris@16
|
167 // Every lexer type to be used as a lexer for Spirit has to conform to
|
Chris@16
|
168 // a public interface .
|
Chris@16
|
169 typedef Token token_type;
|
Chris@16
|
170 typedef typename Token::id_type id_type;
|
Chris@16
|
171 typedef iterator<Functor> iterator_type;
|
Chris@16
|
172
|
Chris@16
|
173 private:
|
Chris@16
|
174 // this type is purely used for the iterator_type construction below
|
Chris@16
|
175 struct iterator_data_type
|
Chris@16
|
176 {
|
Chris@16
|
177 typedef typename Functor::semantic_actions_type semantic_actions_type;
|
Chris@16
|
178
|
Chris@16
|
179 iterator_data_type(
|
Chris@16
|
180 boost::lexer::basic_state_machine<char_type> const& sm
|
Chris@16
|
181 , boost::lexer::basic_rules<char_type> const& rules
|
Chris@16
|
182 , semantic_actions_type const& actions)
|
Chris@16
|
183 : state_machine_(sm), rules_(rules), actions_(actions)
|
Chris@16
|
184 {}
|
Chris@16
|
185
|
Chris@16
|
186 boost::lexer::basic_state_machine<char_type> const& state_machine_;
|
Chris@16
|
187 boost::lexer::basic_rules<char_type> const& rules_;
|
Chris@16
|
188 semantic_actions_type const& actions_;
|
Chris@16
|
189
|
Chris@16
|
190 private:
|
Chris@16
|
191 // silence MSVC warning C4512: assignment operator could not be generated
|
Chris@16
|
192 iterator_data_type& operator= (iterator_data_type const&);
|
Chris@16
|
193 };
|
Chris@16
|
194
|
Chris@16
|
195 public:
|
Chris@16
|
196 // Return the start iterator usable for iterating over the generated
|
Chris@16
|
197 // tokens.
|
Chris@16
|
198 iterator_type begin(Iterator& first, Iterator const& last
|
Chris@16
|
199 , char_type const* initial_state = 0) const
|
Chris@16
|
200 {
|
Chris@16
|
201 if (!init_dfa()) // never minimize DFA for dynamic lexers
|
Chris@16
|
202 return iterator_type();
|
Chris@16
|
203
|
Chris@16
|
204 iterator_data_type iterator_data(state_machine_, rules_, actions_);
|
Chris@16
|
205 return iterator_type(iterator_data, first, last, initial_state);
|
Chris@16
|
206 }
|
Chris@16
|
207
|
Chris@16
|
208 // Return the end iterator usable to stop iterating over the generated
|
Chris@16
|
209 // tokens.
|
Chris@16
|
210 iterator_type end() const
|
Chris@16
|
211 {
|
Chris@16
|
212 return iterator_type();
|
Chris@16
|
213 }
|
Chris@16
|
214
|
Chris@16
|
215 protected:
|
Chris@16
|
216 // Lexer instances can be created by means of a derived class only.
|
Chris@16
|
217 lexer(unsigned int flags)
|
Chris@16
|
218 : flags_(detail::map_flags(flags))
|
Chris@16
|
219 , rules_(flags_)
|
Chris@16
|
220 , initialized_dfa_(false)
|
Chris@16
|
221 {}
|
Chris@16
|
222
|
Chris@16
|
223 public:
|
Chris@16
|
224 // interface for token definition management
|
Chris@16
|
225 std::size_t add_token(char_type const* state, char_type tokendef,
|
Chris@16
|
226 std::size_t token_id, char_type const* targetstate)
|
Chris@16
|
227 {
|
Chris@16
|
228 add_state(state);
|
Chris@16
|
229 initialized_dfa_ = false;
|
Chris@16
|
230 if (state == all_states())
|
Chris@16
|
231 return rules_.add(state, detail::escape(tokendef), token_id, rules_.dot());
|
Chris@16
|
232
|
Chris@16
|
233 if (0 == targetstate)
|
Chris@16
|
234 targetstate = state;
|
Chris@16
|
235 else
|
Chris@16
|
236 add_state(targetstate);
|
Chris@16
|
237 return rules_.add(state, detail::escape(tokendef), token_id, targetstate);
|
Chris@16
|
238 }
|
Chris@16
|
239 std::size_t add_token(char_type const* state, string_type const& tokendef,
|
Chris@16
|
240 std::size_t token_id, char_type const* targetstate)
|
Chris@16
|
241 {
|
Chris@16
|
242 add_state(state);
|
Chris@16
|
243 initialized_dfa_ = false;
|
Chris@16
|
244 if (state == all_states())
|
Chris@16
|
245 return rules_.add(state, tokendef, token_id, rules_.dot());
|
Chris@16
|
246
|
Chris@16
|
247 if (0 == targetstate)
|
Chris@16
|
248 targetstate = state;
|
Chris@16
|
249 else
|
Chris@16
|
250 add_state(targetstate);
|
Chris@16
|
251 return rules_.add(state, tokendef, token_id, targetstate);
|
Chris@16
|
252 }
|
Chris@16
|
253
|
Chris@16
|
254 // interface for pattern definition management
|
Chris@16
|
255 void add_pattern (char_type const* state, string_type const& name,
|
Chris@16
|
256 string_type const& patterndef)
|
Chris@16
|
257 {
|
Chris@16
|
258 add_state(state);
|
Chris@16
|
259 rules_.add_macro(name.c_str(), patterndef);
|
Chris@16
|
260 initialized_dfa_ = false;
|
Chris@16
|
261 }
|
Chris@16
|
262
|
Chris@16
|
263 boost::lexer::rules const& get_rules() const { return rules_; }
|
Chris@16
|
264
|
Chris@16
|
265 void clear(char_type const* state)
|
Chris@16
|
266 {
|
Chris@16
|
267 std::size_t s = rules_.state(state);
|
Chris@16
|
268 if (boost::lexer::npos != s)
|
Chris@16
|
269 rules_.clear(state);
|
Chris@16
|
270 initialized_dfa_ = false;
|
Chris@16
|
271 }
|
Chris@16
|
272 std::size_t add_state(char_type const* state)
|
Chris@16
|
273 {
|
Chris@16
|
274 if (state == all_states())
|
Chris@16
|
275 return all_states_id;
|
Chris@16
|
276
|
Chris@16
|
277 std::size_t stateid = rules_.state(state);
|
Chris@16
|
278 if (boost::lexer::npos == stateid) {
|
Chris@16
|
279 stateid = rules_.add_state(state);
|
Chris@16
|
280 initialized_dfa_ = false;
|
Chris@16
|
281 }
|
Chris@16
|
282 return stateid;
|
Chris@16
|
283 }
|
Chris@16
|
284 string_type initial_state() const
|
Chris@16
|
285 {
|
Chris@16
|
286 return string_type(rules_.initial());
|
Chris@16
|
287 }
|
Chris@16
|
288 string_type all_states() const
|
Chris@16
|
289 {
|
Chris@16
|
290 return string_type(rules_.all_states());
|
Chris@16
|
291 }
|
Chris@16
|
292
|
Chris@16
|
293 // Register a semantic action with the given id
|
Chris@16
|
294 template <typename F>
|
Chris@16
|
295 void add_action(std::size_t unique_id, std::size_t state, F act)
|
Chris@16
|
296 {
|
Chris@16
|
297 // If you see an error here stating add_action is not a member of
|
Chris@16
|
298 // fusion::unused_type then you are probably having semantic actions
|
Chris@16
|
299 // attached to at least one token in the lexer definition without
|
Chris@16
|
300 // using the lex::lexertl::actor_lexer<> as its base class.
|
Chris@16
|
301 typedef typename Functor::wrap_action_type wrapper_type;
|
Chris@16
|
302 if (state == all_states_id) {
|
Chris@16
|
303 // add the action to all known states
|
Chris@16
|
304 typedef typename
|
Chris@16
|
305 basic_rules_type::string_size_t_map::value_type
|
Chris@16
|
306 state_type;
|
Chris@16
|
307
|
Chris@16
|
308 std::size_t states = rules_.statemap().size();
|
Chris@16
|
309 BOOST_FOREACH(state_type const& s, rules_.statemap()) {
|
Chris@16
|
310 for (std::size_t j = 0; j < states; ++j)
|
Chris@16
|
311 actions_.add_action(unique_id + j, s.second, wrapper_type::call(act));
|
Chris@16
|
312 }
|
Chris@16
|
313 }
|
Chris@16
|
314 else {
|
Chris@16
|
315 actions_.add_action(unique_id, state, wrapper_type::call(act));
|
Chris@16
|
316 }
|
Chris@16
|
317 }
|
Chris@16
|
318 // template <typename F>
|
Chris@16
|
319 // void add_action(std::size_t unique_id, char_type const* state, F act)
|
Chris@16
|
320 // {
|
Chris@16
|
321 // typedef typename Functor::wrap_action_type wrapper_type;
|
Chris@16
|
322 // actions_.add_action(unique_id, add_state(state), wrapper_type::call(act));
|
Chris@16
|
323 // }
|
Chris@16
|
324
|
Chris@16
|
325 // We do not minimize the state machine by default anymore because
|
Chris@16
|
326 // Ben said: "If you can afford to generate a lexer at runtime, there
|
Chris@16
|
327 // is little point in calling minimise."
|
Chris@16
|
328 // Go figure.
|
Chris@16
|
329 bool init_dfa(bool minimize = false) const
|
Chris@16
|
330 {
|
Chris@16
|
331 if (!initialized_dfa_) {
|
Chris@16
|
332 state_machine_.clear();
|
Chris@16
|
333 typedef boost::lexer::basic_generator<char_type> generator;
|
Chris@16
|
334 generator::build (rules_, state_machine_);
|
Chris@16
|
335 if (minimize)
|
Chris@16
|
336 generator::minimise (state_machine_);
|
Chris@16
|
337
|
Chris@16
|
338 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
|
Chris@16
|
339 boost::lexer::debug::dump(state_machine_, std::cerr);
|
Chris@16
|
340 #endif
|
Chris@16
|
341 initialized_dfa_ = true;
|
Chris@16
|
342
|
Chris@16
|
343 // // release memory held by rules description
|
Chris@16
|
344 // basic_rules_type rules;
|
Chris@16
|
345 // rules.init_state_info(rules_); // preserve states
|
Chris@16
|
346 // std::swap(rules, rules_);
|
Chris@16
|
347 }
|
Chris@16
|
348 return true;
|
Chris@16
|
349 }
|
Chris@16
|
350
|
Chris@16
|
351 private:
|
Chris@16
|
352 // lexertl specific data
|
Chris@16
|
353 mutable boost::lexer::basic_state_machine<char_type> state_machine_;
|
Chris@16
|
354 boost::lexer::regex_flags flags_;
|
Chris@16
|
355 /*mutable*/ basic_rules_type rules_;
|
Chris@16
|
356
|
Chris@16
|
357 typename Functor::semantic_actions_type actions_;
|
Chris@16
|
358 mutable bool initialized_dfa_;
|
Chris@16
|
359
|
Chris@16
|
360 // generator functions must be able to access members directly
|
Chris@16
|
361 template <typename Lexer, typename F>
|
Chris@16
|
362 friend bool generate_static(Lexer const&
|
Chris@16
|
363 , std::basic_ostream<typename Lexer::char_type>&
|
Chris@16
|
364 , typename Lexer::char_type const*, F);
|
Chris@16
|
365 };
|
Chris@16
|
366
|
Chris@16
|
367 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
368 //
|
Chris@16
|
369 // The actor_lexer class is another implementation of a Spirit.Lex
|
Chris@16
|
370 // lexer on top of Ben Hanson's lexertl library as outlined above (For
|
Chris@16
|
371 // more information about lexertl go here:
|
Chris@16
|
372 // http://www.benhanson.net/lexertl.html).
|
Chris@16
|
373 //
|
Chris@16
|
374 // The only difference to the lexer class above is that token_def
|
Chris@16
|
375 // definitions may have semantic (lexer) actions attached while being
|
Chris@16
|
376 // defined:
|
Chris@16
|
377 //
|
Chris@16
|
378 // int w;
|
Chris@16
|
379 // token_def word = "[^ \t\n]+";
|
Chris@16
|
380 // self = word[++ref(w)]; // see example: word_count_lexer
|
Chris@16
|
381 //
|
Chris@16
|
382 // This class is supposed to be used as the first and only template
|
Chris@16
|
383 // parameter while instantiating instances of a lex::lexer class.
|
Chris@16
|
384 //
|
Chris@16
|
385 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
386 template <typename Token = token<>
|
Chris@16
|
387 , typename Iterator = typename Token::iterator_type
|
Chris@16
|
388 , typename Functor = functor<Token, lexertl::detail::data, Iterator, mpl::true_> >
|
Chris@16
|
389 class actor_lexer : public lexer<Token, Iterator, Functor>
|
Chris@16
|
390 {
|
Chris@16
|
391 protected:
|
Chris@16
|
392 // Lexer instances can be created by means of a derived class only.
|
Chris@16
|
393 actor_lexer(unsigned int flags)
|
Chris@16
|
394 : lexer<Token, Iterator, Functor>(flags) {}
|
Chris@16
|
395 };
|
Chris@16
|
396
|
Chris@16
|
397 }}}}
|
Chris@16
|
398
|
Chris@16
|
399 #endif
|