Chris@16: /* Chris@16: * Chris@16: * Copyright (c) 2002 Chris@16: * John Maddock Chris@16: * Chris@16: * Use, modification and distribution are subject to the Chris@16: * Boost Software License, Version 1.0. (See accompanying file Chris@16: * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Chris@16: * Chris@16: */ Chris@16: Chris@16: /* Chris@16: * LOCATION: see http://www.boost.org for most recent version. Chris@16: * FILE perl_matcher_common.cpp Chris@16: * VERSION see Chris@16: * DESCRIPTION: Definitions of perl_matcher member functions that are Chris@16: * common to both the recursive and non-recursive versions. Chris@16: */ Chris@16: Chris@16: #ifndef BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP Chris@16: #define BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP Chris@16: Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable: 4103) Chris@16: #endif Chris@16: #ifdef BOOST_HAS_ABI_HEADERS Chris@16: # include BOOST_ABI_PREFIX Chris@16: #endif Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: Chris@16: #ifdef __BORLANDC__ Chris@16: # pragma option push -w-8008 -w-8066 Chris@16: #endif Chris@16: #ifdef BOOST_MSVC Chris@16: # pragma warning(push) Chris@16: # pragma warning(disable: 4800) Chris@16: #endif Chris@16: Chris@16: namespace boost{ Chris@16: namespace re_detail{ Chris@16: Chris@16: template Chris@16: void perl_matcher::construct_init(const basic_regex& e, match_flag_type f) Chris@16: { Chris@16: typedef typename regex_iterator_traits::iterator_category category; Chris@16: typedef typename basic_regex::flag_type expression_flag_type; Chris@16: Chris@16: if(e.empty()) Chris@16: { Chris@16: // precondition failure: e is not a valid regex. Chris@16: std::invalid_argument ex("Invalid regular expression object"); Chris@16: boost::throw_exception(ex); Chris@16: } Chris@16: pstate = 0; Chris@16: m_match_flags = f; Chris@16: estimate_max_state_count(static_cast(0)); Chris@16: expression_flag_type re_f = re.flags(); Chris@16: icase = re_f & regex_constants::icase; Chris@16: if(!(m_match_flags & (match_perl|match_posix))) Chris@16: { Chris@16: if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0) Chris@16: m_match_flags |= match_perl; Chris@16: else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) Chris@16: m_match_flags |= match_perl; Chris@16: else if((re_f & (regbase::main_option_type|regbase::literal)) == (regbase::literal)) Chris@16: m_match_flags |= match_perl; Chris@16: else Chris@16: m_match_flags |= match_posix; Chris@16: } Chris@16: if(m_match_flags & match_posix) Chris@16: { Chris@16: m_temp_match.reset(new match_results()); Chris@16: m_presult = m_temp_match.get(); Chris@16: } Chris@16: else Chris@16: m_presult = &m_result; Chris@16: #ifdef BOOST_REGEX_NON_RECURSIVE Chris@16: m_stack_base = 0; Chris@16: m_backup_state = 0; Chris@16: #endif Chris@16: // find the value to use for matching word boundaries: Chris@16: m_word_mask = re.get_data().m_word_mask; Chris@16: // find bitmask to use for matching '.': Chris@16: match_any_mask = static_cast((f & match_not_dot_newline) ? re_detail::test_not_newline : re_detail::test_newline); Chris@16: } Chris@16: Chris@16: template Chris@16: void perl_matcher::estimate_max_state_count(std::random_access_iterator_tag*) Chris@16: { Chris@16: // Chris@16: // How many states should we allow our machine to visit before giving up? Chris@16: // This is a heuristic: it takes the greater of O(N^2) and O(NS^2) Chris@16: // where N is the length of the string, and S is the number of states Chris@16: // in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2) Chris@16: // but these take unreasonably amounts of time to bale out in pathological Chris@16: // cases. Chris@16: // Chris@16: // Calculate NS^2 first: Chris@16: // Chris@16: static const std::ptrdiff_t k = 100000; Chris@16: std::ptrdiff_t dist = boost::re_detail::distance(base, last); Chris@16: if(dist == 0) Chris@16: dist = 1; Chris@16: std::ptrdiff_t states = re.size(); Chris@16: if(states == 0) Chris@16: states = 1; Chris@16: states *= states; Chris@16: if((std::numeric_limits::max)() / dist < states) Chris@16: { Chris@16: max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); Chris@16: return; Chris@16: } Chris@16: states *= dist; Chris@16: if((std::numeric_limits::max)() - k < states) Chris@16: { Chris@16: max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); Chris@16: return; Chris@16: } Chris@16: states += k; Chris@16: Chris@16: max_state_count = states; Chris@16: Chris@16: // Chris@16: // Now calculate N^2: Chris@16: // Chris@16: states = dist; Chris@16: if((std::numeric_limits::max)() / dist < states) Chris@16: { Chris@16: max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); Chris@16: return; Chris@16: } Chris@16: states *= dist; Chris@16: if((std::numeric_limits::max)() - k < states) Chris@16: { Chris@16: max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); Chris@16: return; Chris@16: } Chris@16: states += k; Chris@16: // Chris@16: // N^2 can be a very large number indeed, to prevent things getting out Chris@16: // of control, cap the max states: Chris@16: // Chris@16: if(states > BOOST_REGEX_MAX_STATE_COUNT) Chris@16: states = BOOST_REGEX_MAX_STATE_COUNT; Chris@16: // Chris@16: // If (the possibly capped) N^2 is larger than our first estimate, Chris@16: // use this instead: Chris@16: // Chris@16: if(states > max_state_count) Chris@16: max_state_count = states; Chris@16: } Chris@16: Chris@16: template Chris@16: inline void perl_matcher::estimate_max_state_count(void*) Chris@16: { Chris@16: // we don't know how long the sequence is: Chris@16: max_state_count = BOOST_REGEX_MAX_STATE_COUNT; Chris@16: } Chris@16: Chris@16: #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD Chris@16: template Chris@16: inline bool perl_matcher::protected_call( Chris@16: protected_proc_type proc) Chris@16: { Chris@16: ::boost::re_detail::concrete_protected_call Chris@16: > Chris@16: obj(this, proc); Chris@16: return obj.execute(); Chris@16: Chris@16: } Chris@16: #endif Chris@16: Chris@16: template Chris@16: inline bool perl_matcher::match() Chris@16: { Chris@16: #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD Chris@16: return protected_call(&perl_matcher::match_imp); Chris@16: #else Chris@16: return match_imp(); Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_imp() Chris@16: { Chris@16: // initialise our stack if we are non-recursive: Chris@16: #ifdef BOOST_REGEX_NON_RECURSIVE Chris@16: save_state_init init(&m_stack_base, &m_backup_state); Chris@16: used_block_count = BOOST_REGEX_MAX_BLOCKS; Chris@16: #if !defined(BOOST_NO_EXCEPTIONS) Chris@16: try{ Chris@16: #endif Chris@16: #endif Chris@16: Chris@16: // reset our state machine: Chris@16: position = base; Chris@16: search_base = base; Chris@16: state_count = 0; Chris@16: m_match_flags |= regex_constants::match_all; Chris@101: m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), search_base, last); Chris@16: m_presult->set_base(base); Chris@16: m_presult->set_named_subs(this->re.get_named_subs()); Chris@16: if(m_match_flags & match_posix) Chris@16: m_result = *m_presult; Chris@16: verify_options(re.flags(), m_match_flags); Chris@16: if(0 == match_prefix()) Chris@16: return false; Chris@16: return (m_result[0].second == last) && (m_result[0].first == base); Chris@16: Chris@16: #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS) Chris@16: } Chris@16: catch(...) Chris@16: { Chris@16: // unwind all pushed states, apart from anything else this Chris@16: // ensures that all the states are correctly destructed Chris@16: // not just the memory freed. Chris@16: while(unwind(true)){} Chris@16: throw; Chris@16: } Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: inline bool perl_matcher::find() Chris@16: { Chris@16: #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD Chris@16: return protected_call(&perl_matcher::find_imp); Chris@16: #else Chris@16: return find_imp(); Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::find_imp() Chris@16: { Chris@16: static matcher_proc_type const s_find_vtable[7] = Chris@16: { Chris@16: &perl_matcher::find_restart_any, Chris@16: &perl_matcher::find_restart_word, Chris@16: &perl_matcher::find_restart_line, Chris@16: &perl_matcher::find_restart_buf, Chris@16: &perl_matcher::match_prefix, Chris@16: &perl_matcher::find_restart_lit, Chris@16: &perl_matcher::find_restart_lit, Chris@16: }; Chris@16: Chris@16: // initialise our stack if we are non-recursive: Chris@16: #ifdef BOOST_REGEX_NON_RECURSIVE Chris@16: save_state_init init(&m_stack_base, &m_backup_state); Chris@16: used_block_count = BOOST_REGEX_MAX_BLOCKS; Chris@16: #if !defined(BOOST_NO_EXCEPTIONS) Chris@16: try{ Chris@16: #endif Chris@16: #endif Chris@16: Chris@16: state_count = 0; Chris@16: if((m_match_flags & regex_constants::match_init) == 0) Chris@16: { Chris@16: // reset our state machine: Chris@16: search_base = position = base; Chris@16: pstate = re.get_first_state(); Chris@101: m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), base, last); Chris@16: m_presult->set_base(base); Chris@16: m_presult->set_named_subs(this->re.get_named_subs()); Chris@16: m_match_flags |= regex_constants::match_init; Chris@16: } Chris@16: else Chris@16: { Chris@16: // start again: Chris@16: search_base = position = m_result[0].second; Chris@16: // If last match was null and match_not_null was not set then increment Chris@16: // our start position, otherwise we go into an infinite loop: Chris@16: if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0)) Chris@16: { Chris@16: if(position == last) Chris@16: return false; Chris@16: else Chris@16: ++position; Chris@16: } Chris@16: // reset $` start: Chris@101: m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), search_base, last); Chris@16: //if((base != search_base) && (base == backstop)) Chris@16: // m_match_flags |= match_prev_avail; Chris@16: } Chris@16: if(m_match_flags & match_posix) Chris@16: { Chris@101: m_result.set_size(1 + re.mark_count(), base, last); Chris@16: m_result.set_base(base); Chris@16: } Chris@16: Chris@16: verify_options(re.flags(), m_match_flags); Chris@16: // find out what kind of expression we have: Chris@16: unsigned type = (m_match_flags & match_continuous) ? Chris@16: static_cast(regbase::restart_continue) Chris@16: : static_cast(re.get_restart_type()); Chris@16: Chris@16: // call the appropriate search routine: Chris@16: matcher_proc_type proc = s_find_vtable[type]; Chris@16: return (this->*proc)(); Chris@16: Chris@16: #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS) Chris@16: } Chris@16: catch(...) Chris@16: { Chris@16: // unwind all pushed states, apart from anything else this Chris@16: // ensures that all the states are correctly destructed Chris@16: // not just the memory freed. Chris@16: while(unwind(true)){} Chris@16: throw; Chris@16: } Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_prefix() Chris@16: { Chris@16: m_has_partial_match = false; Chris@16: m_has_found_match = false; Chris@16: pstate = re.get_first_state(); Chris@16: m_presult->set_first(position); Chris@16: restart = position; Chris@16: match_all_states(); Chris@16: if(!m_has_found_match && m_has_partial_match && (m_match_flags & match_partial)) Chris@16: { Chris@16: m_has_found_match = true; Chris@16: m_presult->set_second(last, 0, false); Chris@16: position = last; Chris@16: if((m_match_flags & match_posix) == match_posix) Chris@16: { Chris@16: m_result.maybe_assign(*m_presult); Chris@16: } Chris@16: } Chris@16: #ifdef BOOST_REGEX_MATCH_EXTRA Chris@16: if(m_has_found_match && (match_extra & m_match_flags)) Chris@16: { Chris@16: // Chris@16: // we have a match, reverse the capture information: Chris@16: // Chris@16: for(unsigned i = 0; i < m_presult->size(); ++i) Chris@16: { Chris@16: typename sub_match::capture_sequence_type & seq = ((*m_presult)[i]).get_captures(); Chris@16: std::reverse(seq.begin(), seq.end()); Chris@16: } Chris@16: } Chris@16: #endif Chris@16: if(!m_has_found_match) Chris@16: position = restart; // reset search postion Chris@16: return m_has_found_match; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_literal() Chris@16: { Chris@16: unsigned int len = static_cast(pstate)->length; Chris@16: const char_type* what = reinterpret_cast(static_cast(pstate) + 1); Chris@16: // Chris@16: // compare string with what we stored in Chris@16: // our records: Chris@16: for(unsigned int i = 0; i < len; ++i, ++position) Chris@16: { Chris@16: if((position == last) || (traits_inst.translate(*position, icase) != what[i])) Chris@16: return false; Chris@16: } Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_start_line() Chris@16: { Chris@16: if(position == backstop) Chris@16: { Chris@16: if((m_match_flags & match_prev_avail) == 0) Chris@16: { Chris@16: if((m_match_flags & match_not_bol) == 0) Chris@16: { Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: return false; Chris@16: } Chris@16: } Chris@16: else if(m_match_flags & match_single_line) Chris@16: return false; Chris@16: Chris@16: // check the previous value character: Chris@16: BidiIterator t(position); Chris@16: --t; Chris@16: if(position != last) Chris@16: { Chris@16: if(is_separator(*t) && !((*t == static_cast('\r')) && (*position == static_cast('\n'))) ) Chris@16: { Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: } Chris@16: else if(is_separator(*t)) Chris@16: { Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: return false; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_end_line() Chris@16: { Chris@16: if(position != last) Chris@16: { Chris@16: if(m_match_flags & match_single_line) Chris@16: return false; Chris@16: // we're not yet at the end so *first is always valid: Chris@16: if(is_separator(*position)) Chris@16: { Chris@16: if((position != backstop) || (m_match_flags & match_prev_avail)) Chris@16: { Chris@16: // check that we're not in the middle of \r\n sequence Chris@16: BidiIterator t(position); Chris@16: --t; Chris@16: if((*t == static_cast('\r')) && (*position == static_cast('\n'))) Chris@16: { Chris@16: return false; Chris@16: } Chris@16: } Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: } Chris@16: else if((m_match_flags & match_not_eol) == 0) Chris@16: { Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: return false; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_wild() Chris@16: { Chris@16: if(position == last) Chris@16: return false; Chris@16: if(is_separator(*position) && ((match_any_mask & static_cast(pstate)->mask) == 0)) Chris@16: return false; Chris@16: if((*position == char_type(0)) && (m_match_flags & match_not_dot_null)) Chris@16: return false; Chris@16: pstate = pstate->next.p; Chris@16: ++position; Chris@16: return true; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_word_boundary() Chris@16: { Chris@16: bool b; // indcates whether next character is a word character Chris@16: if(position != last) Chris@16: { Chris@16: // prev and this character must be opposites: Chris@16: b = traits_inst.isctype(*position, m_word_mask); Chris@16: } Chris@16: else Chris@16: { Chris@16: b = (m_match_flags & match_not_eow) ? true : false; Chris@16: } Chris@16: if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) Chris@16: { Chris@16: if(m_match_flags & match_not_bow) Chris@16: b ^= true; Chris@16: else Chris@16: b ^= false; Chris@16: } Chris@16: else Chris@16: { Chris@16: --position; Chris@16: b ^= traits_inst.isctype(*position, m_word_mask); Chris@16: ++position; Chris@16: } Chris@16: if(b) Chris@16: { Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: return false; // no match if we get to here... Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_within_word() Chris@16: { Chris@16: if(position == last) Chris@16: return false; Chris@16: // both prev and this character must be m_word_mask: Chris@16: bool prev = traits_inst.isctype(*position, m_word_mask); Chris@16: { Chris@16: bool b; Chris@16: if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) Chris@16: return false; Chris@16: else Chris@16: { Chris@16: --position; Chris@16: b = traits_inst.isctype(*position, m_word_mask); Chris@16: ++position; Chris@16: } Chris@16: if(b == prev) Chris@16: { Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: } Chris@16: return false; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_word_start() Chris@16: { Chris@16: if(position == last) Chris@16: return false; // can't be starting a word if we're already at the end of input Chris@16: if(!traits_inst.isctype(*position, m_word_mask)) Chris@16: return false; // next character isn't a word character Chris@16: if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) Chris@16: { Chris@16: if(m_match_flags & match_not_bow) Chris@16: return false; // no previous input Chris@16: } Chris@16: else Chris@16: { Chris@16: // otherwise inside buffer: Chris@16: BidiIterator t(position); Chris@16: --t; Chris@16: if(traits_inst.isctype(*t, m_word_mask)) Chris@16: return false; // previous character not non-word Chris@16: } Chris@16: // OK we have a match: Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_word_end() Chris@16: { Chris@16: if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) Chris@16: return false; // start of buffer can't be end of word Chris@16: BidiIterator t(position); Chris@16: --t; Chris@16: if(traits_inst.isctype(*t, m_word_mask) == false) Chris@16: return false; // previous character wasn't a word character Chris@16: Chris@16: if(position == last) Chris@16: { Chris@16: if(m_match_flags & match_not_eow) Chris@16: return false; // end of buffer but not end of word Chris@16: } Chris@16: else Chris@16: { Chris@16: // otherwise inside buffer: Chris@16: if(traits_inst.isctype(*position, m_word_mask)) Chris@16: return false; // next character is a word character Chris@16: } Chris@16: pstate = pstate->next.p; Chris@16: return true; // if we fall through to here then we've succeeded Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_buffer_start() Chris@16: { Chris@16: if((position != backstop) || (m_match_flags & match_not_bob)) Chris@16: return false; Chris@16: // OK match: Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_buffer_end() Chris@16: { Chris@16: if((position != last) || (m_match_flags & match_not_eob)) Chris@16: return false; Chris@16: // OK match: Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_backref() Chris@16: { Chris@16: // Chris@16: // Compare with what we previously matched. Chris@16: // Note that this succeeds if the backref did not partisipate Chris@16: // in the match, this is in line with ECMAScript, but not Perl Chris@16: // or PCRE. Chris@16: // Chris@16: int index = static_cast(pstate)->index; Chris@16: if(index >= 10000) Chris@16: { Chris@16: named_subexpressions::range_type r = re.get_data().equal_range(index); Chris@16: BOOST_ASSERT(r.first != r.second); Chris@16: do Chris@16: { Chris@16: index = r.first->index; Chris@16: ++r.first; Chris@16: }while((r.first != r.second) && ((*m_presult)[index].matched != true)); Chris@16: } Chris@16: Chris@16: if((m_match_flags & match_perl) && !(*m_presult)[index].matched) Chris@16: return false; Chris@16: Chris@16: BidiIterator i = (*m_presult)[index].first; Chris@16: BidiIterator j = (*m_presult)[index].second; Chris@16: while(i != j) Chris@16: { Chris@16: if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase))) Chris@16: return false; Chris@16: ++i; Chris@16: ++position; Chris@16: } Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_long_set() Chris@16: { Chris@16: typedef typename traits::char_class_type char_class_type; Chris@16: // let the traits class do the work: Chris@16: if(position == last) Chris@16: return false; Chris@16: BidiIterator t = re_is_set_member(position, last, static_cast*>(pstate), re.get_data(), icase); Chris@16: if(t != position) Chris@16: { Chris@16: pstate = pstate->next.p; Chris@16: position = t; Chris@16: return true; Chris@16: } Chris@16: return false; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_set() Chris@16: { Chris@16: if(position == last) Chris@16: return false; Chris@16: if(static_cast(pstate)->_map[static_cast(traits_inst.translate(*position, icase))]) Chris@16: { Chris@16: pstate = pstate->next.p; Chris@16: ++position; Chris@16: return true; Chris@16: } Chris@16: return false; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_jump() Chris@16: { Chris@16: pstate = static_cast(pstate)->alt.p; Chris@16: return true; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_combining() Chris@16: { Chris@16: if(position == last) Chris@16: return false; Chris@16: if(is_combining(traits_inst.translate(*position, icase))) Chris@16: return false; Chris@16: ++position; Chris@16: while((position != last) && is_combining(traits_inst.translate(*position, icase))) Chris@16: ++position; Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_soft_buffer_end() Chris@16: { Chris@16: if(m_match_flags & match_not_eob) Chris@16: return false; Chris@16: BidiIterator p(position); Chris@16: while((p != last) && is_separator(traits_inst.translate(*p, icase)))++p; Chris@16: if(p != last) Chris@16: return false; Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_restart_continue() Chris@16: { Chris@16: if(position == search_base) Chris@16: { Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: return false; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_backstep() Chris@16: { Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable:4127) Chris@16: #endif Chris@16: if( ::boost::is_random_access_iterator::value) Chris@16: { Chris@16: std::ptrdiff_t maxlen = ::boost::re_detail::distance(backstop, position); Chris@16: if(maxlen < static_cast(pstate)->index) Chris@16: return false; Chris@16: std::advance(position, -static_cast(pstate)->index); Chris@16: } Chris@16: else Chris@16: { Chris@16: int c = static_cast(pstate)->index; Chris@16: while(c--) Chris@16: { Chris@16: if(position == backstop) Chris@16: return false; Chris@16: --position; Chris@16: } Chris@16: } Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: inline bool perl_matcher::match_assert_backref() Chris@16: { Chris@16: // return true if marked sub-expression N has been matched: Chris@16: int index = static_cast(pstate)->index; Chris@16: bool result = false; Chris@16: if(index == 9999) Chris@16: { Chris@16: // Magic value for a (DEFINE) block: Chris@16: return false; Chris@16: } Chris@16: else if(index > 0) Chris@16: { Chris@16: // Have we matched subexpression "index"? Chris@16: // Check if index is a hash value: Chris@16: if(index >= 10000) Chris@16: { Chris@16: named_subexpressions::range_type r = re.get_data().equal_range(index); Chris@16: while(r.first != r.second) Chris@16: { Chris@16: if((*m_presult)[r.first->index].matched) Chris@16: { Chris@16: result = true; Chris@16: break; Chris@16: } Chris@16: ++r.first; Chris@16: } Chris@16: } Chris@16: else Chris@16: { Chris@16: result = (*m_presult)[index].matched; Chris@16: } Chris@16: pstate = pstate->next.p; Chris@16: } Chris@16: else Chris@16: { Chris@16: // Have we recursed into subexpression "index"? Chris@16: // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1. Chris@16: int idx = -index-1; Chris@16: if(idx >= 10000) Chris@16: { Chris@16: named_subexpressions::range_type r = re.get_data().equal_range(idx); Chris@16: int stack_index = recursion_stack.empty() ? -1 : recursion_stack.back().idx; Chris@16: while(r.first != r.second) Chris@16: { Chris@16: result |= (stack_index == r.first->index); Chris@16: if(result)break; Chris@16: ++r.first; Chris@16: } Chris@16: } Chris@16: else Chris@16: { Chris@16: result = !recursion_stack.empty() && ((recursion_stack.back().idx == idx) || (index == 0)); Chris@16: } Chris@16: pstate = pstate->next.p; Chris@16: } Chris@16: return result; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_toggle_case() Chris@16: { Chris@16: // change our case sensitivity: Chris@16: this->icase = static_cast(pstate)->icase; Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: Chris@16: Chris@16: template Chris@16: bool perl_matcher::find_restart_any() Chris@16: { Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable:4127) Chris@16: #endif Chris@16: const unsigned char* _map = re.get_map(); Chris@16: while(true) Chris@16: { Chris@16: // skip everything we can't match: Chris@16: while((position != last) && !can_start(*position, _map, (unsigned char)mask_any) ) Chris@16: ++position; Chris@16: if(position == last) Chris@16: { Chris@16: // run out of characters, try a null match if possible: Chris@16: if(re.can_be_null()) Chris@16: return match_prefix(); Chris@16: break; Chris@16: } Chris@16: // now try and obtain a match: Chris@16: if(match_prefix()) Chris@16: return true; Chris@16: if(position == last) Chris@16: return false; Chris@16: ++position; Chris@16: } Chris@16: return false; Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::find_restart_word() Chris@16: { Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable:4127) Chris@16: #endif Chris@16: // do search optimised for word starts: Chris@16: const unsigned char* _map = re.get_map(); Chris@16: if((m_match_flags & match_prev_avail) || (position != base)) Chris@16: --position; Chris@16: else if(match_prefix()) Chris@16: return true; Chris@16: do Chris@16: { Chris@16: while((position != last) && traits_inst.isctype(*position, m_word_mask)) Chris@16: ++position; Chris@16: while((position != last) && !traits_inst.isctype(*position, m_word_mask)) Chris@16: ++position; Chris@16: if(position == last) Chris@16: break; Chris@16: Chris@16: if(can_start(*position, _map, (unsigned char)mask_any) ) Chris@16: { Chris@16: if(match_prefix()) Chris@16: return true; Chris@16: } Chris@16: if(position == last) Chris@16: break; Chris@16: } while(true); Chris@16: return false; Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::find_restart_line() Chris@16: { Chris@16: // do search optimised for line starts: Chris@16: const unsigned char* _map = re.get_map(); Chris@16: if(match_prefix()) Chris@16: return true; Chris@16: while(position != last) Chris@16: { Chris@16: while((position != last) && !is_separator(*position)) Chris@16: ++position; Chris@16: if(position == last) Chris@16: return false; Chris@16: ++position; Chris@16: if(position == last) Chris@16: { Chris@16: if(re.can_be_null() && match_prefix()) Chris@16: return true; Chris@16: return false; Chris@16: } Chris@16: Chris@16: if( can_start(*position, _map, (unsigned char)mask_any) ) Chris@16: { Chris@16: if(match_prefix()) Chris@16: return true; Chris@16: } Chris@16: if(position == last) Chris@16: return false; Chris@16: //++position; Chris@16: } Chris@16: return false; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::find_restart_buf() Chris@16: { Chris@16: if((position == base) && ((m_match_flags & match_not_bob) == 0)) Chris@16: return match_prefix(); Chris@16: return false; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::find_restart_lit() Chris@16: { Chris@16: #if 0 Chris@16: if(position == last) Chris@16: return false; // can't possibly match if we're at the end already Chris@16: Chris@16: unsigned type = (m_match_flags & match_continuous) ? Chris@16: static_cast(regbase::restart_continue) Chris@16: : static_cast(re.get_restart_type()); Chris@16: Chris@16: const kmp_info* info = access::get_kmp(re); Chris@16: int len = info->len; Chris@16: const char_type* x = info->pstr; Chris@16: int j = 0; Chris@16: while (position != last) Chris@16: { Chris@16: while((j > -1) && (x[j] != traits_inst.translate(*position, icase))) Chris@16: j = info->kmp_next[j]; Chris@16: ++position; Chris@16: ++j; Chris@16: if(j >= len) Chris@16: { Chris@16: if(type == regbase::restart_fixed_lit) Chris@16: { Chris@16: std::advance(position, -j); Chris@16: restart = position; Chris@16: std::advance(restart, len); Chris@16: m_result.set_first(position); Chris@16: m_result.set_second(restart); Chris@16: position = restart; Chris@16: return true; Chris@16: } Chris@16: else Chris@16: { Chris@16: restart = position; Chris@16: std::advance(position, -j); Chris@16: if(match_prefix()) Chris@16: return true; Chris@16: else Chris@16: { Chris@16: for(int k = 0; (restart != position) && (k < j); ++k, --restart) Chris@16: {} // dwa 10/20/2000 - warning suppression for MWCW Chris@16: if(restart != last) Chris@16: ++restart; Chris@16: position = restart; Chris@16: j = 0; //we could do better than this... Chris@16: } Chris@16: } Chris@16: } Chris@16: } Chris@16: if((m_match_flags & match_partial) && (position == last) && j) Chris@16: { Chris@16: // we need to check for a partial match: Chris@16: restart = position; Chris@16: std::advance(position, -j); Chris@16: return match_prefix(); Chris@16: } Chris@16: #endif Chris@16: return false; Chris@16: } Chris@16: Chris@16: } // namespace re_detail Chris@16: Chris@16: } // namespace boost Chris@16: Chris@16: #ifdef BOOST_MSVC Chris@16: # pragma warning(pop) Chris@16: #endif Chris@16: Chris@16: #ifdef __BORLANDC__ Chris@16: # pragma option pop Chris@16: #endif Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable: 4103) Chris@16: #endif Chris@16: #ifdef BOOST_HAS_ABI_HEADERS Chris@16: # include BOOST_ABI_SUFFIX Chris@16: #endif Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: Chris@16: #endif Chris@16: