Chris@16: /*============================================================================= Chris@16: Boost.Wave: A Standard compliant C++ preprocessor library Chris@16: Chris@16: Re2C based C++ lexer Chris@16: Chris@16: http://www.boost.org/ Chris@16: Chris@16: Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost Chris@16: Software License, Version 1.0. (See accompanying file Chris@16: LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Chris@16: =============================================================================*/ Chris@16: Chris@16: #if !defined(CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED) Chris@16: #define CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED Chris@16: Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #if defined(BOOST_SPIRIT_DEBUG) Chris@16: #include Chris@16: #endif // defined(BOOST_SPIRIT_DEBUG) Chris@16: Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 Chris@16: #include Chris@16: #endif Chris@16: Chris@16: #include Chris@16: Chris@16: // this must occur after all of the includes and before any code appears Chris@16: #ifdef BOOST_HAS_ABI_HEADERS Chris@16: #include BOOST_ABI_PREFIX Chris@16: #endif Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: namespace boost { Chris@16: namespace wave { Chris@16: namespace cpplexer { Chris@16: namespace re2clex { Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // Chris@16: // encapsulation of the re2c based cpp lexer Chris@16: // Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: Chris@16: template > Chris@16: class lexer Chris@16: { Chris@16: public: Chris@16: typedef TokenT token_type; Chris@16: typedef typename token_type::string_type string_type; Chris@16: Chris@16: lexer(IteratorT const &first, IteratorT const &last, Chris@16: PositionT const &pos, boost::wave::language_support language_); Chris@16: ~lexer(); Chris@16: Chris@16: token_type& get(token_type&); Chris@16: void set_position(PositionT const &pos) Chris@16: { Chris@16: // set position has to change the file name and line number only Chris@16: filename = pos.get_file(); Chris@16: scanner.line = pos.get_line(); Chris@16: // scanner.column = scanner.curr_column = pos.get_column(); Chris@16: scanner.file_name = filename.c_str(); Chris@16: } Chris@16: #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 Chris@16: bool has_include_guards(std::string& guard_name) const Chris@16: { Chris@16: return guards.detected(guard_name); Chris@16: } Chris@16: #endif Chris@16: Chris@16: // error reporting from the re2c generated lexer Chris@16: static int report_error(Scanner const* s, int code, char const *, ...); Chris@16: Chris@16: private: Chris@16: static char const *tok_names[]; Chris@16: Chris@16: Scanner scanner; Chris@16: string_type filename; Chris@16: string_type value; Chris@16: bool at_eof; Chris@16: boost::wave::language_support language; Chris@16: #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 Chris@16: include_guards guards; Chris@16: #endif Chris@16: Chris@16: #if BOOST_WAVE_SUPPORT_THREADING == 0 Chris@16: static token_cache const cache; Chris@16: #else Chris@16: token_cache const cache; Chris@16: #endif Chris@16: }; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // initialize cpp lexer Chris@16: template Chris@16: inline Chris@16: lexer::lexer(IteratorT const &first, Chris@16: IteratorT const &last, PositionT const &pos, Chris@16: boost::wave::language_support language_) Chris@16: : filename(pos.get_file()), at_eof(false), language(language_) Chris@16: #if BOOST_WAVE_SUPPORT_THREADING != 0 Chris@16: , cache() Chris@16: #endif Chris@16: { Chris@16: using namespace std; // some systems have memset in std Chris@16: memset(&scanner, '\0', sizeof(Scanner)); Chris@16: scanner.eol_offsets = aq_create(); Chris@16: if (first != last) { Chris@16: scanner.first = scanner.act = (uchar *)&(*first); Chris@16: scanner.last = scanner.first + std::distance(first, last); Chris@16: } Chris@16: scanner.line = pos.get_line(); Chris@16: scanner.column = scanner.curr_column = pos.get_column(); Chris@16: scanner.error_proc = report_error; Chris@16: scanner.file_name = filename.c_str(); Chris@16: Chris@16: #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0 Chris@16: scanner.enable_ms_extensions = true; Chris@16: #else Chris@16: scanner.enable_ms_extensions = false; Chris@16: #endif Chris@16: Chris@16: #if BOOST_WAVE_SUPPORT_VARIADICS_PLACEMARKERS != 0 Chris@16: scanner.act_in_c99_mode = boost::wave::need_c99(language_); Chris@16: #endif Chris@16: Chris@16: #if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0 Chris@16: scanner.enable_import_keyword = !boost::wave::need_c99(language_); Chris@16: #else Chris@16: scanner.enable_import_keyword = false; Chris@16: #endif Chris@16: Chris@16: scanner.detect_pp_numbers = boost::wave::need_prefer_pp_numbers(language_); Chris@16: scanner.single_line_only = boost::wave::need_single_line(language_); Chris@16: Chris@16: #if BOOST_WAVE_SUPPORT_CPP0X != 0 Chris@16: scanner.act_in_cpp0x_mode = boost::wave::need_cpp0x(language_); Chris@16: #else Chris@16: scanner.act_in_cpp0x_mode = false; Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: inline Chris@16: lexer::~lexer() Chris@16: { Chris@16: using namespace std; // some systems have free in std Chris@16: aq_terminate(scanner.eol_offsets); Chris@16: free(scanner.bot); Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // get the next token from the input stream Chris@16: template Chris@16: inline TokenT& Chris@16: lexer::get(TokenT& result) Chris@16: { Chris@16: if (at_eof) Chris@16: return result = token_type(); // return T_EOI Chris@16: Chris@16: std::size_t actline = scanner.line; Chris@16: token_id id = token_id(scan(&scanner)); Chris@16: Chris@16: switch (static_cast(id)) { Chris@16: case T_IDENTIFIER: Chris@16: // test identifier characters for validity (throws if invalid chars found) Chris@16: value = string_type((char const *)scanner.tok, Chris@16: scanner.cur-scanner.tok); Chris@16: if (!boost::wave::need_no_character_validation(language)) Chris@16: impl::validate_identifier_name(value, actline, scanner.column, filename); Chris@16: break; Chris@16: Chris@16: case T_STRINGLIT: Chris@16: case T_CHARLIT: Chris@16: case T_RAWSTRINGLIT: Chris@16: // test literal characters for validity (throws if invalid chars found) Chris@16: value = string_type((char const *)scanner.tok, Chris@16: scanner.cur-scanner.tok); Chris@16: if (boost::wave::need_convert_trigraphs(language)) Chris@16: value = impl::convert_trigraphs(value); Chris@16: if (!boost::wave::need_no_character_validation(language)) Chris@16: impl::validate_literal(value, actline, scanner.column, filename); Chris@16: break; Chris@16: Chris@16: #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0 Chris@16: case T_PP_HHEADER: Chris@16: case T_PP_QHEADER: Chris@16: case T_PP_INCLUDE: Chris@16: // convert to the corresponding ..._next token, if appropriate Chris@16: { Chris@16: value = string_type((char const *)scanner.tok, Chris@16: scanner.cur-scanner.tok); Chris@16: Chris@16: // Skip '#' and whitespace and see whether we find an 'include_next' here. Chris@16: typename string_type::size_type start = value.find("include"); Chris@16: if (value.compare(start, 12, "include_next", 12) == 0) Chris@16: id = token_id(id | AltTokenType); Chris@16: break; Chris@16: } Chris@16: #endif Chris@16: Chris@16: case T_LONGINTLIT: // supported in C++11, C99 and long_long mode Chris@16: value = string_type((char const *)scanner.tok, Chris@16: scanner.cur-scanner.tok); Chris@16: if (!boost::wave::need_long_long(language)) { Chris@16: // syntax error: not allowed in C++ mode Chris@16: BOOST_WAVE_LEXER_THROW(lexing_exception, invalid_long_long_literal, Chris@16: value.c_str(), actline, scanner.column, filename.c_str()); Chris@16: } Chris@16: break; Chris@16: Chris@16: case T_OCTALINT: Chris@16: case T_DECIMALINT: Chris@16: case T_HEXAINT: Chris@16: case T_INTLIT: Chris@16: case T_FLOATLIT: Chris@16: case T_FIXEDPOINTLIT: Chris@16: case T_CCOMMENT: Chris@16: case T_CPPCOMMENT: Chris@16: case T_SPACE: Chris@16: case T_SPACE2: Chris@16: case T_ANY: Chris@16: case T_PP_NUMBER: Chris@16: value = string_type((char const *)scanner.tok, Chris@16: scanner.cur-scanner.tok); Chris@16: break; Chris@16: Chris@16: case T_EOF: Chris@16: // T_EOF is returned as a valid token, the next call will return T_EOI, Chris@16: // i.e. the actual end of input Chris@16: at_eof = true; Chris@16: value.clear(); Chris@16: break; Chris@16: Chris@16: case T_OR_TRIGRAPH: Chris@16: case T_XOR_TRIGRAPH: Chris@16: case T_LEFTBRACE_TRIGRAPH: Chris@16: case T_RIGHTBRACE_TRIGRAPH: Chris@16: case T_LEFTBRACKET_TRIGRAPH: Chris@16: case T_RIGHTBRACKET_TRIGRAPH: Chris@16: case T_COMPL_TRIGRAPH: Chris@16: case T_POUND_TRIGRAPH: Chris@16: if (boost::wave::need_convert_trigraphs(language)) { Chris@16: value = cache.get_token_value(BASEID_FROM_TOKEN(id)); Chris@16: } Chris@16: else { Chris@16: value = string_type((char const *)scanner.tok, Chris@16: scanner.cur-scanner.tok); Chris@16: } Chris@16: break; Chris@16: Chris@16: case T_ANY_TRIGRAPH: Chris@16: if (boost::wave::need_convert_trigraphs(language)) { Chris@16: value = impl::convert_trigraph( Chris@16: string_type((char const *)scanner.tok)); Chris@16: } Chris@16: else { Chris@16: value = string_type((char const *)scanner.tok, Chris@16: scanner.cur-scanner.tok); Chris@16: } Chris@16: break; Chris@16: Chris@16: default: Chris@16: if (CATEGORY_FROM_TOKEN(id) != EXTCATEGORY_FROM_TOKEN(id) || Chris@16: IS_CATEGORY(id, UnknownTokenType)) Chris@16: { Chris@16: value = string_type((char const *)scanner.tok, Chris@16: scanner.cur-scanner.tok); Chris@16: } Chris@16: else { Chris@16: value = cache.get_token_value(id); Chris@16: } Chris@16: break; Chris@16: } Chris@16: Chris@16: // std::cerr << boost::wave::get_token_name(id) << ": " << value << std::endl; Chris@16: Chris@16: // the re2c lexer reports the new line number for newline tokens Chris@16: result = token_type(id, value, PositionT(filename, actline, scanner.column)); Chris@16: Chris@16: #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 Chris@16: return guards.detect_guard(result); Chris@16: #else Chris@16: return result; Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: inline int Chris@16: lexer::report_error(Scanner const *s, int errcode, Chris@16: char const *msg, ...) Chris@16: { Chris@16: BOOST_ASSERT(0 != s); Chris@16: BOOST_ASSERT(0 != msg); Chris@16: Chris@16: using namespace std; // some system have vsprintf in namespace std Chris@16: Chris@16: char buffer[200]; // should be large enough Chris@16: va_list params; Chris@16: va_start(params, msg); Chris@16: vsprintf(buffer, msg, params); Chris@16: va_end(params); Chris@16: Chris@16: BOOST_WAVE_LEXER_THROW_VAR(lexing_exception, errcode, buffer, s->line, Chris@16: s->column, s->file_name); Chris@16: // BOOST_UNREACHABLE_RETURN(0); Chris@16: return 0; Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // Chris@16: // lex_functor Chris@16: // Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: Chris@16: template ::token_type> Chris@16: class lex_functor Chris@16: : public lex_input_interface_generator Chris@16: { Chris@16: public: Chris@16: typedef TokenT token_type; Chris@16: Chris@16: lex_functor(IteratorT const &first, IteratorT const &last, Chris@16: PositionT const &pos, boost::wave::language_support language) Chris@16: : re2c_lexer(first, last, pos, language) Chris@16: {} Chris@16: virtual ~lex_functor() {} Chris@16: Chris@16: // get the next token from the input stream Chris@16: token_type& get(token_type& result) { return re2c_lexer.get(result); } Chris@16: void set_position(PositionT const &pos) { re2c_lexer.set_position(pos); } Chris@16: #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 Chris@16: bool has_include_guards(std::string& guard_name) const Chris@16: { return re2c_lexer.has_include_guards(guard_name); } Chris@16: #endif Chris@16: Chris@16: private: Chris@16: lexer re2c_lexer; Chris@16: }; Chris@16: Chris@16: #if BOOST_WAVE_SUPPORT_THREADING == 0 Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: template Chris@16: token_cache::string_type> const Chris@16: lexer::cache = Chris@16: token_cache::string_type>(); Chris@16: #endif Chris@16: Chris@16: } // namespace re2clex Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // Chris@16: // The new_lexer_gen<>::new_lexer function (declared in cpp_lex_interface.hpp) Chris@16: // should be defined inline, if the lex_functor shouldn't be instantiated Chris@16: // separately from the lex_iterator. Chris@16: // Chris@16: // Separate (explicit) instantiation helps to reduce compilation time. Chris@16: // Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: Chris@16: #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0 Chris@16: #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE Chris@16: #else Chris@16: #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE inline Chris@16: #endif Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // Chris@16: // The 'new_lexer' function allows the opaque generation of a new lexer object. Chris@16: // It is coupled to the iterator type to allow to decouple the lexer/iterator Chris@16: // configurations at compile time. Chris@16: // Chris@16: // This function is declared inside the cpp_lex_token.hpp file, which is Chris@16: // referenced by the source file calling the lexer and the source file, which Chris@16: // instantiates the lex_functor. But it is defined here, so it will be Chris@16: // instantiated only while compiling the source file, which instantiates the Chris@16: // lex_functor. While the cpp_re2c_token.hpp file may be included everywhere, Chris@16: // this file (cpp_re2c_lexer.hpp) should be included only once. This allows Chris@16: // to decouple the lexer interface from the lexer implementation and reduces Chris@16: // compilation time. Chris@16: // Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: Chris@16: template Chris@16: BOOST_WAVE_RE2C_NEW_LEXER_INLINE Chris@16: lex_input_interface * Chris@16: new_lexer_gen::new_lexer(IteratorT const &first, Chris@16: IteratorT const &last, PositionT const &pos, Chris@16: boost::wave::language_support language) Chris@16: { Chris@16: using re2clex::lex_functor; Chris@16: return new lex_functor(first, last, pos, language); Chris@16: } Chris@16: Chris@16: #undef BOOST_WAVE_RE2C_NEW_LEXER_INLINE Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: } // namespace cpplexer Chris@16: } // namespace wave Chris@16: } // namespace boost Chris@16: Chris@16: // the suffix header occurs after all of the code Chris@16: #ifdef BOOST_HAS_ABI_HEADERS Chris@16: #include BOOST_ABI_SUFFIX Chris@16: #endif Chris@16: Chris@16: #endif // !defined(CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)