Chris@16: /* Chris@16: * Chris@16: * Copyright (c) 2002 Chris@16: * John Maddock Chris@16: * Chris@16: * Use, modification and distribution are subject to the Chris@16: * Boost Software License, Version 1.0. (See accompanying file Chris@16: * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Chris@16: * Chris@16: */ Chris@16: Chris@16: #ifndef BOOST_REGEX_MATCHER_HPP Chris@16: #define BOOST_REGEX_MATCHER_HPP Chris@16: Chris@16: #include Chris@16: Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable: 4103) Chris@16: #endif Chris@16: #ifdef BOOST_HAS_ABI_HEADERS Chris@16: # include BOOST_ABI_PREFIX Chris@16: #endif Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: Chris@16: #ifdef BOOST_MSVC Chris@16: # pragma warning(push) Chris@16: # pragma warning(disable: 4800) Chris@16: #endif Chris@16: Chris@16: namespace boost{ Chris@16: namespace re_detail{ Chris@16: Chris@16: // Chris@16: // error checking API: Chris@16: // Chris@16: BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(boost::regex_constants::syntax_option_type ef, match_flag_type mf); Chris@16: // Chris@16: // function can_start: Chris@16: // Chris@16: template Chris@16: inline bool can_start(charT c, const unsigned char* map, unsigned char mask) Chris@16: { Chris@16: return ((c < static_cast(0)) ? true : ((c >= static_cast(1 << CHAR_BIT)) ? true : map[c] & mask)); Chris@16: } Chris@16: inline bool can_start(char c, const unsigned char* map, unsigned char mask) Chris@16: { Chris@16: return map[(unsigned char)c] & mask; Chris@16: } Chris@16: inline bool can_start(signed char c, const unsigned char* map, unsigned char mask) Chris@16: { Chris@16: return map[(unsigned char)c] & mask; Chris@16: } Chris@16: inline bool can_start(unsigned char c, const unsigned char* map, unsigned char mask) Chris@16: { Chris@16: return map[c] & mask; Chris@16: } Chris@16: inline bool can_start(unsigned short c, const unsigned char* map, unsigned char mask) Chris@16: { Chris@16: return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask); Chris@16: } Chris@16: #if !defined(__hpux) && !defined(__WINSCW__)// WCHAR_MIN not usable in pp-directives. Chris@16: #if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) Chris@16: inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask) Chris@16: { Chris@16: return ((c >= static_cast(1u << CHAR_BIT)) ? true : map[c] & mask); Chris@16: } Chris@16: #endif Chris@16: #endif Chris@16: #if !defined(BOOST_NO_INTRINSIC_WCHAR_T) Chris@16: inline bool can_start(unsigned int c, const unsigned char* map, unsigned char mask) Chris@16: { Chris@16: return (((c >= static_cast(1u << CHAR_BIT)) ? true : map[c] & mask)); Chris@16: } Chris@16: #endif Chris@16: Chris@16: Chris@16: // Chris@16: // Unfortunately Rogue Waves standard library appears to have a bug Chris@16: // in std::basic_string::compare that results in eroneous answers Chris@16: // in some cases (tested with Borland C++ 5.1, Rogue Wave lib version Chris@16: // 0x020101) the test case was: Chris@16: // {39135,0} < {0xff,0} Chris@16: // which succeeds when it should not. Chris@16: // Chris@16: #ifndef _RWSTD_VER Chris@16: template Chris@16: inline int string_compare(const std::basic_string& s, const C* p) Chris@16: { Chris@16: if(0 == *p) Chris@16: { Chris@16: if(s.empty() || ((s.size() == 1) && (s[0] == 0))) Chris@16: return 0; Chris@16: } Chris@16: return s.compare(p); Chris@16: } Chris@16: #else Chris@16: template Chris@16: inline int string_compare(const std::basic_string& s, const C* p) Chris@16: { Chris@16: if(0 == *p) Chris@16: { Chris@16: if(s.empty() || ((s.size() == 1) && (s[0] == 0))) Chris@16: return 0; Chris@16: } Chris@16: return s.compare(p); Chris@16: } Chris@16: inline int string_compare(const std::string& s, const char* p) Chris@16: { return std::strcmp(s.c_str(), p); } Chris@16: # ifndef BOOST_NO_WREGEX Chris@16: inline int string_compare(const std::wstring& s, const wchar_t* p) Chris@16: { return std::wcscmp(s.c_str(), p); } Chris@16: #endif Chris@16: #endif Chris@16: template Chris@16: inline int string_compare(const Seq& s, const C* p) Chris@16: { Chris@16: std::size_t i = 0; Chris@16: while((i < s.size()) && (p[i] == s[i])) Chris@16: { Chris@16: ++i; Chris@16: } Chris@16: return (i == s.size()) ? -p[i] : s[i] - p[i]; Chris@16: } Chris@16: # define STR_COMP(s,p) string_compare(s,p) Chris@16: Chris@16: template Chris@16: inline const charT* re_skip_past_null(const charT* p) Chris@16: { Chris@16: while (*p != static_cast(0)) ++p; Chris@16: return ++p; Chris@16: } Chris@16: Chris@16: template Chris@16: iterator BOOST_REGEX_CALL re_is_set_member(iterator next, Chris@16: iterator last, Chris@16: const re_set_long* set_, Chris@16: const regex_data& e, bool icase) Chris@16: { Chris@16: const charT* p = reinterpret_cast(set_+1); Chris@16: iterator ptr; Chris@16: unsigned int i; Chris@16: //bool icase = e.m_flags & regex_constants::icase; Chris@16: Chris@16: if(next == last) return next; Chris@16: Chris@16: typedef typename traits_type::string_type traits_string_type; Chris@16: const ::boost::regex_traits_wrapper& traits_inst = *(e.m_ptraits); Chris@16: Chris@16: // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never Chris@16: // referenced Chris@16: (void)traits_inst; Chris@16: Chris@16: // try and match a single character, could be a multi-character Chris@16: // collating element... Chris@16: for(i = 0; i < set_->csingles; ++i) Chris@16: { Chris@16: ptr = next; Chris@16: if(*p == static_cast(0)) Chris@16: { Chris@16: // treat null string as special case: Chris@16: if(traits_inst.translate(*ptr, icase) != *p) Chris@16: { Chris@16: while(*p == static_cast(0))++p; Chris@16: continue; Chris@16: } Chris@16: return set_->isnot ? next : (ptr == next) ? ++next : ptr; Chris@16: } Chris@16: else Chris@16: { Chris@16: while(*p && (ptr != last)) Chris@16: { Chris@16: if(traits_inst.translate(*ptr, icase) != *p) Chris@16: break; Chris@16: ++p; Chris@16: ++ptr; Chris@16: } Chris@16: Chris@16: if(*p == static_cast(0)) // if null we've matched Chris@16: return set_->isnot ? next : (ptr == next) ? ++next : ptr; Chris@16: Chris@16: p = re_skip_past_null(p); // skip null Chris@16: } Chris@16: } Chris@16: Chris@16: charT col = traits_inst.translate(*next, icase); Chris@16: Chris@16: Chris@16: if(set_->cranges || set_->cequivalents) Chris@16: { Chris@16: traits_string_type s1; Chris@16: // Chris@16: // try and match a range, NB only a single character can match Chris@16: if(set_->cranges) Chris@16: { Chris@16: if((e.m_flags & regex_constants::collate) == 0) Chris@16: s1.assign(1, col); Chris@16: else Chris@16: { Chris@16: charT a[2] = { col, charT(0), }; Chris@16: s1 = traits_inst.transform(a, a + 1); Chris@16: } Chris@16: for(i = 0; i < set_->cranges; ++i) Chris@16: { Chris@16: if(STR_COMP(s1, p) >= 0) Chris@16: { Chris@16: do{ ++p; }while(*p); Chris@16: ++p; Chris@16: if(STR_COMP(s1, p) <= 0) Chris@16: return set_->isnot ? next : ++next; Chris@16: } Chris@16: else Chris@16: { Chris@16: // skip first string Chris@16: do{ ++p; }while(*p); Chris@16: ++p; Chris@16: } Chris@16: // skip second string Chris@16: do{ ++p; }while(*p); Chris@16: ++p; Chris@16: } Chris@16: } Chris@16: // Chris@16: // try and match an equivalence class, NB only a single character can match Chris@16: if(set_->cequivalents) Chris@16: { Chris@16: charT a[2] = { col, charT(0), }; Chris@16: s1 = traits_inst.transform_primary(a, a +1); Chris@16: for(i = 0; i < set_->cequivalents; ++i) Chris@16: { Chris@16: if(STR_COMP(s1, p) == 0) Chris@16: return set_->isnot ? next : ++next; Chris@16: // skip string Chris@16: do{ ++p; }while(*p); Chris@16: ++p; Chris@16: } Chris@16: } Chris@16: } Chris@16: if(traits_inst.isctype(col, set_->cclasses) == true) Chris@16: return set_->isnot ? next : ++next; Chris@16: if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false)) Chris@16: return set_->isnot ? next : ++next; Chris@16: return set_->isnot ? ++next : next; Chris@16: } Chris@16: Chris@16: template Chris@16: class repeater_count Chris@16: { Chris@16: repeater_count** stack; Chris@16: repeater_count* next; Chris@16: int state_id; Chris@16: std::size_t count; // the number of iterations so far Chris@16: BidiIterator start_pos; // where the last repeat started Chris@16: public: Chris@101: repeater_count(repeater_count** s) : stack(s), next(0), state_id(-1), count(0), start_pos() {} Chris@101: Chris@16: repeater_count(int i, repeater_count** s, BidiIterator start) Chris@16: : start_pos(start) Chris@16: { Chris@16: state_id = i; Chris@16: stack = s; Chris@16: next = *stack; Chris@16: *stack = this; Chris@16: if(state_id > next->state_id) Chris@16: count = 0; Chris@16: else Chris@16: { Chris@16: repeater_count* p = next; Chris@16: while(p && (p->state_id != state_id)) Chris@16: p = p->next; Chris@16: if(p) Chris@16: { Chris@16: count = p->count; Chris@16: start_pos = p->start_pos; Chris@16: } Chris@16: else Chris@16: count = 0; Chris@16: } Chris@16: } Chris@16: ~repeater_count() Chris@16: { Chris@16: if(next) Chris@16: *stack = next; Chris@16: } Chris@16: std::size_t get_count() { return count; } Chris@16: int get_id() { return state_id; } Chris@16: std::size_t operator++() { return ++count; } Chris@16: bool check_null_repeat(const BidiIterator& pos, std::size_t max) Chris@16: { Chris@16: // this is called when we are about to start a new repeat, Chris@16: // if the last one was NULL move our count to max, Chris@16: // otherwise save the current position. Chris@16: bool result = (count == 0) ? false : (pos == start_pos); Chris@16: if(result) Chris@16: count = max; Chris@16: else Chris@16: start_pos = pos; Chris@16: return result; Chris@16: } Chris@16: }; Chris@16: Chris@16: struct saved_state; Chris@16: Chris@16: enum saved_state_type Chris@16: { Chris@16: saved_type_end = 0, Chris@16: saved_type_paren = 1, Chris@16: saved_type_recurse = 2, Chris@16: saved_type_assertion = 3, Chris@16: saved_state_alt = 4, Chris@16: saved_state_repeater_count = 5, Chris@16: saved_state_extra_block = 6, Chris@16: saved_state_greedy_single_repeat = 7, Chris@16: saved_state_rep_slow_dot = 8, Chris@16: saved_state_rep_fast_dot = 9, Chris@16: saved_state_rep_char = 10, Chris@16: saved_state_rep_short_set = 11, Chris@16: saved_state_rep_long_set = 12, Chris@16: saved_state_non_greedy_long_repeat = 13, Chris@16: saved_state_count = 14 Chris@16: }; Chris@16: Chris@16: template Chris@16: struct recursion_info Chris@16: { Chris@16: typedef typename Results::value_type value_type; Chris@16: typedef typename value_type::iterator iterator; Chris@16: int idx; Chris@16: const re_syntax_base* preturn_address; Chris@16: Results results; Chris@16: repeater_count* repeater_stack; Chris@16: }; Chris@16: Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable : 4251 4231) Chris@16: # if BOOST_MSVC < 1600 Chris@16: # pragma warning(disable : 4660) Chris@16: # endif Chris@16: #endif Chris@16: Chris@16: template Chris@16: class perl_matcher Chris@16: { Chris@16: public: Chris@16: typedef typename traits::char_type char_type; Chris@16: typedef perl_matcher self_type; Chris@16: typedef bool (self_type::*matcher_proc_type)(void); Chris@16: typedef std::size_t traits_size_type; Chris@16: typedef typename is_byte::width_type width_type; Chris@16: typedef typename regex_iterator_traits::difference_type difference_type; Chris@16: typedef match_results results_type; Chris@16: Chris@16: perl_matcher(BidiIterator first, BidiIterator end, Chris@16: match_results& what, Chris@16: const basic_regex& e, Chris@16: match_flag_type f, Chris@16: BidiIterator l_base) Chris@16: : m_result(what), base(first), last(end), Chris@16: position(first), backstop(l_base), re(e), traits_inst(e.get_traits()), Chris@16: m_independent(false), next_count(&rep_obj), rep_obj(&next_count) Chris@16: { Chris@16: construct_init(e, f); Chris@16: } Chris@16: Chris@16: bool match(); Chris@16: bool find(); Chris@16: Chris@16: void setf(match_flag_type f) Chris@16: { m_match_flags |= f; } Chris@16: void unsetf(match_flag_type f) Chris@16: { m_match_flags &= ~f; } Chris@16: Chris@16: private: Chris@16: void construct_init(const basic_regex& e, match_flag_type f); Chris@16: Chris@16: bool find_imp(); Chris@16: bool match_imp(); Chris@16: #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD Chris@16: typedef bool (perl_matcher::*protected_proc_type)(); Chris@16: bool protected_call(protected_proc_type); Chris@16: #endif Chris@16: void estimate_max_state_count(std::random_access_iterator_tag*); Chris@16: void estimate_max_state_count(void*); Chris@16: bool match_prefix(); Chris@16: bool match_all_states(); Chris@16: Chris@16: // match procs, stored in s_match_vtable: Chris@16: bool match_startmark(); Chris@16: bool match_endmark(); Chris@16: bool match_literal(); Chris@16: bool match_start_line(); Chris@16: bool match_end_line(); Chris@16: bool match_wild(); Chris@16: bool match_match(); Chris@16: bool match_word_boundary(); Chris@16: bool match_within_word(); Chris@16: bool match_word_start(); Chris@16: bool match_word_end(); Chris@16: bool match_buffer_start(); Chris@16: bool match_buffer_end(); Chris@16: bool match_backref(); Chris@16: bool match_long_set(); Chris@16: bool match_set(); Chris@16: bool match_jump(); Chris@16: bool match_alt(); Chris@16: bool match_rep(); Chris@16: bool match_combining(); Chris@16: bool match_soft_buffer_end(); Chris@16: bool match_restart_continue(); Chris@16: bool match_long_set_repeat(); Chris@16: bool match_set_repeat(); Chris@16: bool match_char_repeat(); Chris@16: bool match_dot_repeat_fast(); Chris@16: bool match_dot_repeat_slow(); Chris@16: bool match_dot_repeat_dispatch() Chris@16: { Chris@16: return ::boost::is_random_access_iterator::value ? match_dot_repeat_fast() : match_dot_repeat_slow(); Chris@16: } Chris@16: bool match_backstep(); Chris@16: bool match_assert_backref(); Chris@16: bool match_toggle_case(); Chris@16: #ifdef BOOST_REGEX_RECURSIVE Chris@16: bool backtrack_till_match(std::size_t count); Chris@16: #endif Chris@16: bool match_recursion(); Chris@16: Chris@16: // find procs stored in s_find_vtable: Chris@16: bool find_restart_any(); Chris@16: bool find_restart_word(); Chris@16: bool find_restart_line(); Chris@16: bool find_restart_buf(); Chris@16: bool find_restart_lit(); Chris@16: Chris@16: private: Chris@16: // final result structure to be filled in: Chris@16: match_results& m_result; Chris@16: // temporary result for POSIX matches: Chris@16: scoped_ptr > m_temp_match; Chris@16: // pointer to actual result structure to fill in: Chris@16: match_results* m_presult; Chris@16: // start of sequence being searched: Chris@16: BidiIterator base; Chris@16: // end of sequence being searched: Chris@16: BidiIterator last; Chris@16: // current character being examined: Chris@16: BidiIterator position; Chris@16: // where to restart next search after failed match attempt: Chris@16: BidiIterator restart; Chris@16: // where the current search started from, acts as base for $` during grep: Chris@16: BidiIterator search_base; Chris@16: // how far we can go back when matching lookbehind: Chris@16: BidiIterator backstop; Chris@16: // the expression being examined: Chris@16: const basic_regex& re; Chris@16: // the expression's traits class: Chris@16: const ::boost::regex_traits_wrapper& traits_inst; Chris@16: // the next state in the machine being matched: Chris@16: const re_syntax_base* pstate; Chris@16: // matching flags in use: Chris@16: match_flag_type m_match_flags; Chris@16: // how many states we have examined so far: Chris@16: std::ptrdiff_t state_count; Chris@16: // max number of states to examine before giving up: Chris@16: std::ptrdiff_t max_state_count; Chris@16: // whether we should ignore case or not: Chris@16: bool icase; Chris@16: // set to true when (position == last), indicates that we may have a partial match: Chris@16: bool m_has_partial_match; Chris@16: // set to true whenever we get a match: Chris@16: bool m_has_found_match; Chris@16: // set to true whenever we're inside an independent sub-expression: Chris@16: bool m_independent; Chris@16: // the current repeat being examined: Chris@16: repeater_count* next_count; Chris@16: // the first repeat being examined (top of linked list): Chris@16: repeater_count rep_obj; Chris@16: // the mask to pass when matching word boundaries: Chris@16: typename traits::char_class_type m_word_mask; Chris@16: // the bitmask to use when determining whether a match_any matches a newline or not: Chris@16: unsigned char match_any_mask; Chris@16: // recursion information: Chris@16: std::vector > recursion_stack; Chris@16: Chris@16: #ifdef BOOST_REGEX_NON_RECURSIVE Chris@16: // Chris@16: // additional members for non-recursive version: Chris@16: // Chris@16: typedef bool (self_type::*unwind_proc_type)(bool); Chris@16: Chris@16: void extend_stack(); Chris@16: bool unwind(bool); Chris@16: bool unwind_end(bool); Chris@16: bool unwind_paren(bool); Chris@16: bool unwind_recursion_stopper(bool); Chris@16: bool unwind_assertion(bool); Chris@16: bool unwind_alt(bool); Chris@16: bool unwind_repeater_counter(bool); Chris@16: bool unwind_extra_block(bool); Chris@16: bool unwind_greedy_single_repeat(bool); Chris@16: bool unwind_slow_dot_repeat(bool); Chris@16: bool unwind_fast_dot_repeat(bool); Chris@16: bool unwind_char_repeat(bool); Chris@16: bool unwind_short_set_repeat(bool); Chris@16: bool unwind_long_set_repeat(bool); Chris@16: bool unwind_non_greedy_repeat(bool); Chris@16: bool unwind_recursion(bool); Chris@16: bool unwind_recursion_pop(bool); Chris@16: void destroy_single_repeat(); Chris@16: void push_matched_paren(int index, const sub_match& sub); Chris@16: void push_recursion_stopper(); Chris@16: void push_assertion(const re_syntax_base* ps, bool positive); Chris@16: void push_alt(const re_syntax_base* ps); Chris@16: void push_repeater_count(int i, repeater_count** s); Chris@16: void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id); Chris@16: void push_non_greedy_repeat(const re_syntax_base* ps); Chris@16: void push_recursion(int idx, const re_syntax_base* p, results_type* presults); Chris@16: void push_recursion_pop(); Chris@16: Chris@16: // pointer to base of stack: Chris@16: saved_state* m_stack_base; Chris@16: // pointer to current stack position: Chris@16: saved_state* m_backup_state; Chris@16: // determines what value to return when unwinding from recursion, Chris@16: // allows for mixed recursive/non-recursive algorithm: Chris@16: bool m_recursive_result; Chris@16: // how many memory blocks have we used up?: Chris@16: unsigned used_block_count; Chris@16: #endif Chris@16: Chris@16: // these operations aren't allowed, so are declared private, Chris@16: // bodies are provided to keep explicit-instantiation requests happy: Chris@16: perl_matcher& operator=(const perl_matcher&) Chris@16: { Chris@16: return *this; Chris@16: } Chris@16: perl_matcher(const perl_matcher& that) Chris@16: : m_result(that.m_result), re(that.re), traits_inst(that.traits_inst), rep_obj(0) {} Chris@16: }; Chris@16: Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: Chris@16: } // namespace re_detail Chris@16: Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable: 4103) Chris@16: #endif Chris@16: #ifdef BOOST_HAS_ABI_HEADERS Chris@16: # include BOOST_ABI_SUFFIX Chris@16: #endif Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: Chris@16: } // namespace boost Chris@16: Chris@16: #ifdef BOOST_MSVC Chris@16: # pragma warning(pop) Chris@16: #endif Chris@16: Chris@16: // Chris@16: // include the implementation of perl_matcher: Chris@16: // Chris@16: #ifdef BOOST_REGEX_RECURSIVE Chris@16: #include Chris@16: #else Chris@16: #include Chris@16: #endif Chris@16: // this one has to be last: Chris@16: #include Chris@16: Chris@16: #endif Chris@16: