annotate DEPENDENCIES/generic/include/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp @ 125:34e428693f5d vext

Vext -> Repoint
author Chris Cannam
date Thu, 14 Jun 2018 11:15:39 +0100
parents 2665513ce2d3
children
rev   line source
Chris@16 1 /*=============================================================================
Chris@16 2 Boost.Wave: A Standard compliant C++ preprocessor library
Chris@16 3
Chris@16 4 Re2C based C++ lexer
Chris@16 5
Chris@16 6 http://www.boost.org/
Chris@16 7
Chris@16 8 Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost
Chris@16 9 Software License, Version 1.0. (See accompanying file
Chris@16 10 LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16 11 =============================================================================*/
Chris@16 12
Chris@16 13 #if !defined(CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)
Chris@16 14 #define CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED
Chris@16 15
Chris@16 16 #include <string>
Chris@16 17 #include <cstdio>
Chris@16 18 #include <cstdarg>
Chris@16 19 #if defined(BOOST_SPIRIT_DEBUG)
Chris@16 20 #include <iostream>
Chris@16 21 #endif // defined(BOOST_SPIRIT_DEBUG)
Chris@16 22
Chris@16 23 #include <boost/concept_check.hpp>
Chris@16 24 #include <boost/assert.hpp>
Chris@16 25 #include <boost/spirit/include/classic_core.hpp>
Chris@16 26
Chris@16 27 #include <boost/wave/wave_config.hpp>
Chris@16 28 #include <boost/wave/language_support.hpp>
Chris@16 29 #include <boost/wave/token_ids.hpp>
Chris@16 30 #include <boost/wave/util/file_position.hpp>
Chris@16 31 #include <boost/wave/cpplexer/validate_universal_char.hpp>
Chris@16 32 #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
Chris@16 33 #include <boost/wave/cpplexer/token_cache.hpp>
Chris@16 34 #include <boost/wave/cpplexer/convert_trigraphs.hpp>
Chris@16 35
Chris@16 36 #include <boost/wave/cpplexer/cpp_lex_interface.hpp>
Chris@16 37 #include <boost/wave/cpplexer/re2clex/scanner.hpp>
Chris@16 38 #include <boost/wave/cpplexer/re2clex/cpp_re.hpp>
Chris@16 39 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
Chris@16 40 #include <boost/wave/cpplexer/detect_include_guards.hpp>
Chris@16 41 #endif
Chris@16 42
Chris@16 43 #include <boost/wave/cpplexer/cpp_lex_interface_generator.hpp>
Chris@16 44
Chris@16 45 // this must occur after all of the includes and before any code appears
Chris@16 46 #ifdef BOOST_HAS_ABI_HEADERS
Chris@16 47 #include BOOST_ABI_PREFIX
Chris@16 48 #endif
Chris@16 49
Chris@16 50 ///////////////////////////////////////////////////////////////////////////////
Chris@16 51 namespace boost {
Chris@16 52 namespace wave {
Chris@16 53 namespace cpplexer {
Chris@16 54 namespace re2clex {
Chris@16 55
Chris@16 56 ///////////////////////////////////////////////////////////////////////////////
Chris@16 57 //
Chris@16 58 // encapsulation of the re2c based cpp lexer
Chris@16 59 //
Chris@16 60 ///////////////////////////////////////////////////////////////////////////////
Chris@16 61
Chris@16 62 template <typename IteratorT,
Chris@16 63 typename PositionT = boost::wave::util::file_position_type,
Chris@16 64 typename TokenT = lex_token<PositionT> >
Chris@16 65 class lexer
Chris@16 66 {
Chris@16 67 public:
Chris@16 68 typedef TokenT token_type;
Chris@16 69 typedef typename token_type::string_type string_type;
Chris@16 70
Chris@16 71 lexer(IteratorT const &first, IteratorT const &last,
Chris@16 72 PositionT const &pos, boost::wave::language_support language_);
Chris@16 73 ~lexer();
Chris@16 74
Chris@16 75 token_type& get(token_type&);
Chris@16 76 void set_position(PositionT const &pos)
Chris@16 77 {
Chris@16 78 // set position has to change the file name and line number only
Chris@16 79 filename = pos.get_file();
Chris@16 80 scanner.line = pos.get_line();
Chris@16 81 // scanner.column = scanner.curr_column = pos.get_column();
Chris@16 82 scanner.file_name = filename.c_str();
Chris@16 83 }
Chris@16 84 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
Chris@16 85 bool has_include_guards(std::string& guard_name) const
Chris@16 86 {
Chris@16 87 return guards.detected(guard_name);
Chris@16 88 }
Chris@16 89 #endif
Chris@16 90
Chris@16 91 // error reporting from the re2c generated lexer
Chris@16 92 static int report_error(Scanner const* s, int code, char const *, ...);
Chris@16 93
Chris@16 94 private:
Chris@16 95 static char const *tok_names[];
Chris@16 96
Chris@16 97 Scanner scanner;
Chris@16 98 string_type filename;
Chris@16 99 string_type value;
Chris@16 100 bool at_eof;
Chris@16 101 boost::wave::language_support language;
Chris@16 102 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
Chris@16 103 include_guards<token_type> guards;
Chris@16 104 #endif
Chris@16 105
Chris@16 106 #if BOOST_WAVE_SUPPORT_THREADING == 0
Chris@16 107 static token_cache<string_type> const cache;
Chris@16 108 #else
Chris@16 109 token_cache<string_type> const cache;
Chris@16 110 #endif
Chris@16 111 };
Chris@16 112
Chris@16 113 ///////////////////////////////////////////////////////////////////////////////
Chris@16 114 // initialize cpp lexer
Chris@16 115 template <typename IteratorT, typename PositionT, typename TokenT>
Chris@16 116 inline
Chris@16 117 lexer<IteratorT, PositionT, TokenT>::lexer(IteratorT const &first,
Chris@16 118 IteratorT const &last, PositionT const &pos,
Chris@16 119 boost::wave::language_support language_)
Chris@16 120 : filename(pos.get_file()), at_eof(false), language(language_)
Chris@16 121 #if BOOST_WAVE_SUPPORT_THREADING != 0
Chris@16 122 , cache()
Chris@16 123 #endif
Chris@16 124 {
Chris@16 125 using namespace std; // some systems have memset in std
Chris@16 126 memset(&scanner, '\0', sizeof(Scanner));
Chris@16 127 scanner.eol_offsets = aq_create();
Chris@16 128 if (first != last) {
Chris@16 129 scanner.first = scanner.act = (uchar *)&(*first);
Chris@16 130 scanner.last = scanner.first + std::distance(first, last);
Chris@16 131 }
Chris@16 132 scanner.line = pos.get_line();
Chris@16 133 scanner.column = scanner.curr_column = pos.get_column();
Chris@16 134 scanner.error_proc = report_error;
Chris@16 135 scanner.file_name = filename.c_str();
Chris@16 136
Chris@16 137 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
Chris@16 138 scanner.enable_ms_extensions = true;
Chris@16 139 #else
Chris@16 140 scanner.enable_ms_extensions = false;
Chris@16 141 #endif
Chris@16 142
Chris@16 143 #if BOOST_WAVE_SUPPORT_VARIADICS_PLACEMARKERS != 0
Chris@16 144 scanner.act_in_c99_mode = boost::wave::need_c99(language_);
Chris@16 145 #endif
Chris@16 146
Chris@16 147 #if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0
Chris@16 148 scanner.enable_import_keyword = !boost::wave::need_c99(language_);
Chris@16 149 #else
Chris@16 150 scanner.enable_import_keyword = false;
Chris@16 151 #endif
Chris@16 152
Chris@16 153 scanner.detect_pp_numbers = boost::wave::need_prefer_pp_numbers(language_);
Chris@16 154 scanner.single_line_only = boost::wave::need_single_line(language_);
Chris@16 155
Chris@16 156 #if BOOST_WAVE_SUPPORT_CPP0X != 0
Chris@16 157 scanner.act_in_cpp0x_mode = boost::wave::need_cpp0x(language_);
Chris@16 158 #else
Chris@16 159 scanner.act_in_cpp0x_mode = false;
Chris@16 160 #endif
Chris@16 161 }
Chris@16 162
Chris@16 163 template <typename IteratorT, typename PositionT, typename TokenT>
Chris@16 164 inline
Chris@16 165 lexer<IteratorT, PositionT, TokenT>::~lexer()
Chris@16 166 {
Chris@16 167 using namespace std; // some systems have free in std
Chris@16 168 aq_terminate(scanner.eol_offsets);
Chris@16 169 free(scanner.bot);
Chris@16 170 }
Chris@16 171
Chris@16 172 ///////////////////////////////////////////////////////////////////////////////
Chris@16 173 // get the next token from the input stream
Chris@16 174 template <typename IteratorT, typename PositionT, typename TokenT>
Chris@16 175 inline TokenT&
Chris@16 176 lexer<IteratorT, PositionT, TokenT>::get(TokenT& result)
Chris@16 177 {
Chris@16 178 if (at_eof)
Chris@16 179 return result = token_type(); // return T_EOI
Chris@16 180
Chris@16 181 std::size_t actline = scanner.line;
Chris@16 182 token_id id = token_id(scan(&scanner));
Chris@16 183
Chris@16 184 switch (static_cast<unsigned int>(id)) {
Chris@16 185 case T_IDENTIFIER:
Chris@16 186 // test identifier characters for validity (throws if invalid chars found)
Chris@16 187 value = string_type((char const *)scanner.tok,
Chris@16 188 scanner.cur-scanner.tok);
Chris@16 189 if (!boost::wave::need_no_character_validation(language))
Chris@16 190 impl::validate_identifier_name(value, actline, scanner.column, filename);
Chris@16 191 break;
Chris@16 192
Chris@16 193 case T_STRINGLIT:
Chris@16 194 case T_CHARLIT:
Chris@16 195 case T_RAWSTRINGLIT:
Chris@16 196 // test literal characters for validity (throws if invalid chars found)
Chris@16 197 value = string_type((char const *)scanner.tok,
Chris@16 198 scanner.cur-scanner.tok);
Chris@16 199 if (boost::wave::need_convert_trigraphs(language))
Chris@16 200 value = impl::convert_trigraphs(value);
Chris@16 201 if (!boost::wave::need_no_character_validation(language))
Chris@16 202 impl::validate_literal(value, actline, scanner.column, filename);
Chris@16 203 break;
Chris@16 204
Chris@16 205 #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
Chris@16 206 case T_PP_HHEADER:
Chris@16 207 case T_PP_QHEADER:
Chris@16 208 case T_PP_INCLUDE:
Chris@16 209 // convert to the corresponding ..._next token, if appropriate
Chris@16 210 {
Chris@16 211 value = string_type((char const *)scanner.tok,
Chris@16 212 scanner.cur-scanner.tok);
Chris@16 213
Chris@16 214 // Skip '#' and whitespace and see whether we find an 'include_next' here.
Chris@16 215 typename string_type::size_type start = value.find("include");
Chris@16 216 if (value.compare(start, 12, "include_next", 12) == 0)
Chris@16 217 id = token_id(id | AltTokenType);
Chris@16 218 break;
Chris@16 219 }
Chris@16 220 #endif
Chris@16 221
Chris@16 222 case T_LONGINTLIT: // supported in C++11, C99 and long_long mode
Chris@16 223 value = string_type((char const *)scanner.tok,
Chris@16 224 scanner.cur-scanner.tok);
Chris@16 225 if (!boost::wave::need_long_long(language)) {
Chris@16 226 // syntax error: not allowed in C++ mode
Chris@16 227 BOOST_WAVE_LEXER_THROW(lexing_exception, invalid_long_long_literal,
Chris@16 228 value.c_str(), actline, scanner.column, filename.c_str());
Chris@16 229 }
Chris@16 230 break;
Chris@16 231
Chris@16 232 case T_OCTALINT:
Chris@16 233 case T_DECIMALINT:
Chris@16 234 case T_HEXAINT:
Chris@16 235 case T_INTLIT:
Chris@16 236 case T_FLOATLIT:
Chris@16 237 case T_FIXEDPOINTLIT:
Chris@16 238 case T_CCOMMENT:
Chris@16 239 case T_CPPCOMMENT:
Chris@16 240 case T_SPACE:
Chris@16 241 case T_SPACE2:
Chris@16 242 case T_ANY:
Chris@16 243 case T_PP_NUMBER:
Chris@16 244 value = string_type((char const *)scanner.tok,
Chris@16 245 scanner.cur-scanner.tok);
Chris@16 246 break;
Chris@16 247
Chris@16 248 case T_EOF:
Chris@16 249 // T_EOF is returned as a valid token, the next call will return T_EOI,
Chris@16 250 // i.e. the actual end of input
Chris@16 251 at_eof = true;
Chris@16 252 value.clear();
Chris@16 253 break;
Chris@16 254
Chris@16 255 case T_OR_TRIGRAPH:
Chris@16 256 case T_XOR_TRIGRAPH:
Chris@16 257 case T_LEFTBRACE_TRIGRAPH:
Chris@16 258 case T_RIGHTBRACE_TRIGRAPH:
Chris@16 259 case T_LEFTBRACKET_TRIGRAPH:
Chris@16 260 case T_RIGHTBRACKET_TRIGRAPH:
Chris@16 261 case T_COMPL_TRIGRAPH:
Chris@16 262 case T_POUND_TRIGRAPH:
Chris@16 263 if (boost::wave::need_convert_trigraphs(language)) {
Chris@16 264 value = cache.get_token_value(BASEID_FROM_TOKEN(id));
Chris@16 265 }
Chris@16 266 else {
Chris@16 267 value = string_type((char const *)scanner.tok,
Chris@16 268 scanner.cur-scanner.tok);
Chris@16 269 }
Chris@16 270 break;
Chris@16 271
Chris@16 272 case T_ANY_TRIGRAPH:
Chris@16 273 if (boost::wave::need_convert_trigraphs(language)) {
Chris@16 274 value = impl::convert_trigraph(
Chris@16 275 string_type((char const *)scanner.tok));
Chris@16 276 }
Chris@16 277 else {
Chris@16 278 value = string_type((char const *)scanner.tok,
Chris@16 279 scanner.cur-scanner.tok);
Chris@16 280 }
Chris@16 281 break;
Chris@16 282
Chris@16 283 default:
Chris@16 284 if (CATEGORY_FROM_TOKEN(id) != EXTCATEGORY_FROM_TOKEN(id) ||
Chris@16 285 IS_CATEGORY(id, UnknownTokenType))
Chris@16 286 {
Chris@16 287 value = string_type((char const *)scanner.tok,
Chris@16 288 scanner.cur-scanner.tok);
Chris@16 289 }
Chris@16 290 else {
Chris@16 291 value = cache.get_token_value(id);
Chris@16 292 }
Chris@16 293 break;
Chris@16 294 }
Chris@16 295
Chris@16 296 // std::cerr << boost::wave::get_token_name(id) << ": " << value << std::endl;
Chris@16 297
Chris@16 298 // the re2c lexer reports the new line number for newline tokens
Chris@16 299 result = token_type(id, value, PositionT(filename, actline, scanner.column));
Chris@16 300
Chris@16 301 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
Chris@16 302 return guards.detect_guard(result);
Chris@16 303 #else
Chris@16 304 return result;
Chris@16 305 #endif
Chris@16 306 }
Chris@16 307
Chris@16 308 template <typename IteratorT, typename PositionT, typename TokenT>
Chris@16 309 inline int
Chris@16 310 lexer<IteratorT, PositionT, TokenT>::report_error(Scanner const *s, int errcode,
Chris@16 311 char const *msg, ...)
Chris@16 312 {
Chris@16 313 BOOST_ASSERT(0 != s);
Chris@16 314 BOOST_ASSERT(0 != msg);
Chris@16 315
Chris@16 316 using namespace std; // some system have vsprintf in namespace std
Chris@16 317
Chris@16 318 char buffer[200]; // should be large enough
Chris@16 319 va_list params;
Chris@16 320 va_start(params, msg);
Chris@16 321 vsprintf(buffer, msg, params);
Chris@16 322 va_end(params);
Chris@16 323
Chris@16 324 BOOST_WAVE_LEXER_THROW_VAR(lexing_exception, errcode, buffer, s->line,
Chris@16 325 s->column, s->file_name);
Chris@16 326 // BOOST_UNREACHABLE_RETURN(0);
Chris@16 327 return 0;
Chris@16 328 }
Chris@16 329
Chris@16 330 ///////////////////////////////////////////////////////////////////////////////
Chris@16 331 //
Chris@16 332 // lex_functor
Chris@16 333 //
Chris@16 334 ///////////////////////////////////////////////////////////////////////////////
Chris@16 335
Chris@16 336 template <typename IteratorT,
Chris@16 337 typename PositionT = boost::wave::util::file_position_type,
Chris@16 338 typename TokenT = typename lexer<IteratorT, PositionT>::token_type>
Chris@16 339 class lex_functor
Chris@16 340 : public lex_input_interface_generator<TokenT>
Chris@16 341 {
Chris@16 342 public:
Chris@16 343 typedef TokenT token_type;
Chris@16 344
Chris@16 345 lex_functor(IteratorT const &first, IteratorT const &last,
Chris@16 346 PositionT const &pos, boost::wave::language_support language)
Chris@16 347 : re2c_lexer(first, last, pos, language)
Chris@16 348 {}
Chris@16 349 virtual ~lex_functor() {}
Chris@16 350
Chris@16 351 // get the next token from the input stream
Chris@16 352 token_type& get(token_type& result) { return re2c_lexer.get(result); }
Chris@16 353 void set_position(PositionT const &pos) { re2c_lexer.set_position(pos); }
Chris@16 354 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
Chris@16 355 bool has_include_guards(std::string& guard_name) const
Chris@16 356 { return re2c_lexer.has_include_guards(guard_name); }
Chris@16 357 #endif
Chris@16 358
Chris@16 359 private:
Chris@16 360 lexer<IteratorT, PositionT, TokenT> re2c_lexer;
Chris@16 361 };
Chris@16 362
Chris@16 363 #if BOOST_WAVE_SUPPORT_THREADING == 0
Chris@16 364 ///////////////////////////////////////////////////////////////////////////////
Chris@16 365 template <typename IteratorT, typename PositionT, typename TokenT>
Chris@16 366 token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type> const
Chris@16 367 lexer<IteratorT, PositionT, TokenT>::cache =
Chris@16 368 token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type>();
Chris@16 369 #endif
Chris@16 370
Chris@16 371 } // namespace re2clex
Chris@16 372
Chris@16 373 ///////////////////////////////////////////////////////////////////////////////
Chris@16 374 //
Chris@16 375 // The new_lexer_gen<>::new_lexer function (declared in cpp_lex_interface.hpp)
Chris@16 376 // should be defined inline, if the lex_functor shouldn't be instantiated
Chris@16 377 // separately from the lex_iterator.
Chris@16 378 //
Chris@16 379 // Separate (explicit) instantiation helps to reduce compilation time.
Chris@16 380 //
Chris@16 381 ///////////////////////////////////////////////////////////////////////////////
Chris@16 382
Chris@16 383 #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
Chris@16 384 #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE
Chris@16 385 #else
Chris@16 386 #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE inline
Chris@16 387 #endif
Chris@16 388
Chris@16 389 ///////////////////////////////////////////////////////////////////////////////
Chris@16 390 //
Chris@16 391 // The 'new_lexer' function allows the opaque generation of a new lexer object.
Chris@16 392 // It is coupled to the iterator type to allow to decouple the lexer/iterator
Chris@16 393 // configurations at compile time.
Chris@16 394 //
Chris@16 395 // This function is declared inside the cpp_lex_token.hpp file, which is
Chris@16 396 // referenced by the source file calling the lexer and the source file, which
Chris@16 397 // instantiates the lex_functor. But it is defined here, so it will be
Chris@16 398 // instantiated only while compiling the source file, which instantiates the
Chris@16 399 // lex_functor. While the cpp_re2c_token.hpp file may be included everywhere,
Chris@16 400 // this file (cpp_re2c_lexer.hpp) should be included only once. This allows
Chris@16 401 // to decouple the lexer interface from the lexer implementation and reduces
Chris@16 402 // compilation time.
Chris@16 403 //
Chris@16 404 ///////////////////////////////////////////////////////////////////////////////
Chris@16 405
Chris@16 406 template <typename IteratorT, typename PositionT, typename TokenT>
Chris@16 407 BOOST_WAVE_RE2C_NEW_LEXER_INLINE
Chris@16 408 lex_input_interface<TokenT> *
Chris@16 409 new_lexer_gen<IteratorT, PositionT, TokenT>::new_lexer(IteratorT const &first,
Chris@16 410 IteratorT const &last, PositionT const &pos,
Chris@16 411 boost::wave::language_support language)
Chris@16 412 {
Chris@16 413 using re2clex::lex_functor;
Chris@16 414 return new lex_functor<IteratorT, PositionT, TokenT>(first, last, pos, language);
Chris@16 415 }
Chris@16 416
Chris@16 417 #undef BOOST_WAVE_RE2C_NEW_LEXER_INLINE
Chris@16 418
Chris@16 419 ///////////////////////////////////////////////////////////////////////////////
Chris@16 420 } // namespace cpplexer
Chris@16 421 } // namespace wave
Chris@16 422 } // namespace boost
Chris@16 423
Chris@16 424 // the suffix header occurs after all of the code
Chris@16 425 #ifdef BOOST_HAS_ABI_HEADERS
Chris@16 426 #include BOOST_ABI_SUFFIX
Chris@16 427 #endif
Chris@16 428
Chris@16 429 #endif // !defined(CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)