Chris@16: /* Chris@16: * Chris@16: * Copyright (c) 2002 Chris@16: * John Maddock Chris@16: * Chris@16: * Use, modification and distribution are subject to the Chris@16: * Boost Software License, Version 1.0. (See accompanying file Chris@16: * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Chris@16: * Chris@16: */ Chris@16: Chris@16: /* Chris@16: * LOCATION: see http://www.boost.org for most recent version. Chris@16: * FILE perl_matcher_common.cpp Chris@16: * VERSION see Chris@16: * DESCRIPTION: Definitions of perl_matcher member functions that are Chris@16: * specific to the recursive implementation. Chris@16: */ Chris@16: Chris@16: #ifndef BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP Chris@16: #define BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP Chris@16: Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable: 4103) Chris@16: #endif Chris@16: #ifdef BOOST_HAS_ABI_HEADERS Chris@16: # include BOOST_ABI_PREFIX Chris@16: #endif Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable: 4800) Chris@16: #endif Chris@16: Chris@16: namespace boost{ Chris@16: namespace re_detail{ Chris@16: Chris@16: template Chris@16: class backup_subex Chris@16: { Chris@16: int index; Chris@16: sub_match sub; Chris@16: public: Chris@16: template Chris@16: backup_subex(const match_results& w, int i) Chris@16: : index(i), sub(w[i], false) {} Chris@16: template Chris@16: void restore(match_results& w) Chris@16: { Chris@16: w.set_first(sub.first, index, index == 0); Chris@16: w.set_second(sub.second, index, sub.matched, index == 0); Chris@16: } Chris@16: const sub_match& get() { return sub; } Chris@16: }; Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_all_states() Chris@16: { Chris@16: static matcher_proc_type const s_match_vtable[30] = Chris@16: { Chris@16: (&perl_matcher::match_startmark), Chris@16: &perl_matcher::match_endmark, Chris@16: &perl_matcher::match_literal, Chris@16: &perl_matcher::match_start_line, Chris@16: &perl_matcher::match_end_line, Chris@16: &perl_matcher::match_wild, Chris@16: &perl_matcher::match_match, Chris@16: &perl_matcher::match_word_boundary, Chris@16: &perl_matcher::match_within_word, Chris@16: &perl_matcher::match_word_start, Chris@16: &perl_matcher::match_word_end, Chris@16: &perl_matcher::match_buffer_start, Chris@16: &perl_matcher::match_buffer_end, Chris@16: &perl_matcher::match_backref, Chris@16: &perl_matcher::match_long_set, Chris@16: &perl_matcher::match_set, Chris@16: &perl_matcher::match_jump, Chris@16: &perl_matcher::match_alt, Chris@16: &perl_matcher::match_rep, Chris@16: &perl_matcher::match_combining, Chris@16: &perl_matcher::match_soft_buffer_end, Chris@16: &perl_matcher::match_restart_continue, Chris@16: // Although this next line *should* be evaluated at compile time, in practice Chris@16: // some compilers (VC++) emit run-time initialisation which breaks thread Chris@16: // safety, so use a dispatch function instead: Chris@16: //(::boost::is_random_access_iterator::value ? &perl_matcher::match_dot_repeat_fast : &perl_matcher::match_dot_repeat_slow), Chris@16: &perl_matcher::match_dot_repeat_dispatch, Chris@16: &perl_matcher::match_char_repeat, Chris@16: &perl_matcher::match_set_repeat, Chris@16: &perl_matcher::match_long_set_repeat, Chris@16: &perl_matcher::match_backstep, Chris@16: &perl_matcher::match_assert_backref, Chris@16: &perl_matcher::match_toggle_case, Chris@16: &perl_matcher::match_recursion, Chris@16: }; Chris@16: Chris@16: if(state_count > max_state_count) Chris@16: raise_error(traits_inst, regex_constants::error_complexity); Chris@16: while(pstate) Chris@16: { Chris@16: matcher_proc_type proc = s_match_vtable[pstate->type]; Chris@16: ++state_count; Chris@16: if(!(this->*proc)()) Chris@16: { Chris@16: if((m_match_flags & match_partial) && (position == last) && (position != search_base)) Chris@16: m_has_partial_match = true; Chris@16: return 0; Chris@16: } Chris@16: } Chris@16: return true; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_startmark() Chris@16: { Chris@16: int index = static_cast(pstate)->index; Chris@16: icase = static_cast(pstate)->icase; Chris@16: bool r = true; Chris@16: switch(index) Chris@16: { Chris@16: case 0: Chris@16: pstate = pstate->next.p; Chris@16: break; Chris@16: case -1: Chris@16: case -2: Chris@16: { Chris@16: // forward lookahead assert: Chris@16: BidiIterator old_position(position); Chris@16: const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; Chris@16: pstate = pstate->next.p->next.p; Chris@16: r = match_all_states(); Chris@16: pstate = next_pstate; Chris@16: position = old_position; Chris@16: if((r && (index != -1)) || (!r && (index != -2))) Chris@16: r = false; Chris@16: else Chris@16: r = true; Chris@16: break; Chris@16: } Chris@16: case -3: Chris@16: { Chris@16: // independent sub-expression: Chris@16: bool old_independent = m_independent; Chris@16: m_independent = true; Chris@16: const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; Chris@16: pstate = pstate->next.p->next.p; Chris@16: r = match_all_states(); Chris@16: pstate = next_pstate; Chris@16: m_independent = old_independent; Chris@16: #ifdef BOOST_REGEX_MATCH_EXTRA Chris@16: if(r && (m_match_flags & match_extra)) Chris@16: { Chris@16: // Chris@16: // our captures have been stored in *m_presult Chris@16: // we need to unpack them, and insert them Chris@16: // back in the right order when we unwind the stack: Chris@16: // Chris@16: unsigned i; Chris@16: match_results tm(*m_presult); Chris@16: for(i = 0; i < tm.size(); ++i) Chris@16: (*m_presult)[i].get_captures().clear(); Chris@16: // match everything else: Chris@16: r = match_all_states(); Chris@16: // now place the stored captures back: Chris@16: for(i = 0; i < tm.size(); ++i) Chris@16: { Chris@16: typedef typename sub_match::capture_sequence_type seq; Chris@16: seq& s1 = (*m_presult)[i].get_captures(); Chris@16: const seq& s2 = tm[i].captures(); Chris@16: s1.insert( Chris@16: s1.end(), Chris@16: s2.begin(), Chris@16: s2.end()); Chris@16: } Chris@16: } Chris@16: #endif Chris@16: break; Chris@16: } Chris@16: case -4: Chris@16: { Chris@16: // conditional expression: Chris@16: const re_alt* alt = static_cast(pstate->next.p); Chris@16: BOOST_ASSERT(alt->type == syntax_element_alt); Chris@16: pstate = alt->next.p; Chris@16: if(pstate->type == syntax_element_assert_backref) Chris@16: { Chris@16: if(!match_assert_backref()) Chris@16: pstate = alt->alt.p; Chris@16: break; Chris@16: } Chris@16: else Chris@16: { Chris@16: // zero width assertion, have to match this recursively: Chris@16: BOOST_ASSERT(pstate->type == syntax_element_startmark); Chris@16: bool negated = static_cast(pstate)->index == -2; Chris@16: BidiIterator saved_position = position; Chris@16: const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; Chris@16: pstate = pstate->next.p->next.p; Chris@16: bool res = match_all_states(); Chris@16: position = saved_position; Chris@16: if(negated) Chris@16: res = !res; Chris@16: if(res) Chris@16: pstate = next_pstate; Chris@16: else Chris@16: pstate = alt->alt.p; Chris@16: break; Chris@16: } Chris@16: } Chris@16: case -5: Chris@16: { Chris@16: // Reset start of $0, since we have a \K escape Chris@16: backup_subex sub(*m_presult, 0); Chris@16: m_presult->set_first(position, 0, true); Chris@16: pstate = pstate->next.p; Chris@16: r = match_all_states(); Chris@16: if(r == false) Chris@16: sub.restore(*m_presult); Chris@16: break; Chris@16: } Chris@16: default: Chris@16: { Chris@16: BOOST_ASSERT(index > 0); Chris@16: if((m_match_flags & match_nosubs) == 0) Chris@16: { Chris@16: backup_subex sub(*m_presult, index); Chris@16: m_presult->set_first(position, index); Chris@16: pstate = pstate->next.p; Chris@16: r = match_all_states(); Chris@16: if(r == false) Chris@16: sub.restore(*m_presult); Chris@16: #ifdef BOOST_REGEX_MATCH_EXTRA Chris@16: // Chris@16: // we have a match, push the capture information onto the stack: Chris@16: // Chris@16: else if(sub.get().matched && (match_extra & m_match_flags)) Chris@16: ((*m_presult)[index]).get_captures().push_back(sub.get()); Chris@16: #endif Chris@16: } Chris@16: else Chris@16: { Chris@16: pstate = pstate->next.p; Chris@16: } Chris@16: break; Chris@16: } Chris@16: } Chris@16: return r; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_alt() Chris@16: { Chris@16: bool take_first, take_second; Chris@16: const re_alt* jmp = static_cast(pstate); Chris@16: Chris@16: // find out which of these two alternatives we need to take: Chris@16: if(position == last) Chris@16: { Chris@16: take_first = jmp->can_be_null & mask_take; Chris@16: take_second = jmp->can_be_null & mask_skip; Chris@16: } Chris@16: else Chris@16: { Chris@16: take_first = can_start(*position, jmp->_map, (unsigned char)mask_take); Chris@16: take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip); Chris@16: } Chris@16: Chris@16: if(take_first) Chris@16: { Chris@16: // we can take the first alternative, Chris@16: // see if we need to push next alternative: Chris@16: if(take_second) Chris@16: { Chris@16: BidiIterator oldposition(position); Chris@16: const re_syntax_base* old_pstate = jmp->alt.p; Chris@16: pstate = pstate->next.p; Chris@16: if(!match_all_states()) Chris@16: { Chris@16: pstate = old_pstate; Chris@16: position = oldposition; Chris@16: } Chris@16: return true; Chris@16: } Chris@16: pstate = pstate->next.p; Chris@16: return true; Chris@16: } Chris@16: if(take_second) Chris@16: { Chris@16: pstate = jmp->alt.p; Chris@16: return true; Chris@16: } Chris@16: return false; // neither option is possible Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_rep() Chris@16: { Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable:4127 4244) Chris@16: #endif Chris@16: const re_repeat* rep = static_cast(pstate); Chris@16: // Chris@16: // Always copy the repeat count, so that the state is restored Chris@16: // when we exit this scope: Chris@16: // Chris@16: repeater_count r(rep->state_id, &next_count, position); Chris@16: // Chris@16: // If we've had at least one repeat already, and the last one Chris@16: // matched the NULL string then set the repeat count to Chris@16: // maximum: Chris@16: // Chris@16: next_count->check_null_repeat(position, rep->max); Chris@16: Chris@16: // find out which of these two alternatives we need to take: Chris@16: bool take_first, take_second; Chris@16: if(position == last) Chris@16: { Chris@16: take_first = rep->can_be_null & mask_take; Chris@16: take_second = rep->can_be_null & mask_skip; Chris@16: } Chris@16: else Chris@16: { Chris@16: take_first = can_start(*position, rep->_map, (unsigned char)mask_take); Chris@16: take_second = can_start(*position, rep->_map, (unsigned char)mask_skip); Chris@16: } Chris@16: Chris@16: if(next_count->get_count() < rep->min) Chris@16: { Chris@16: // we must take the repeat: Chris@16: if(take_first) Chris@16: { Chris@16: // increase the counter: Chris@16: ++(*next_count); Chris@16: pstate = rep->next.p; Chris@16: return match_all_states(); Chris@16: } Chris@16: return false; Chris@16: } Chris@16: bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); Chris@16: if(greedy) Chris@16: { Chris@16: // try and take the repeat if we can: Chris@16: if((next_count->get_count() < rep->max) && take_first) Chris@16: { Chris@16: // store position in case we fail: Chris@16: BidiIterator pos = position; Chris@16: // increase the counter: Chris@16: ++(*next_count); Chris@16: pstate = rep->next.p; Chris@16: if(match_all_states()) Chris@16: return true; Chris@16: // failed repeat, reset posistion and fall through for alternative: Chris@16: position = pos; Chris@16: } Chris@16: if(take_second) Chris@16: { Chris@16: pstate = rep->alt.p; Chris@16: return true; Chris@16: } Chris@16: return false; // can't take anything, fail... Chris@16: } Chris@16: else // non-greedy Chris@16: { Chris@16: // try and skip the repeat if we can: Chris@16: if(take_second) Chris@16: { Chris@16: // store position in case we fail: Chris@16: BidiIterator pos = position; Chris@16: pstate = rep->alt.p; Chris@16: if(match_all_states()) Chris@16: return true; Chris@16: // failed alternative, reset posistion and fall through for repeat: Chris@16: position = pos; Chris@16: } Chris@16: if((next_count->get_count() < rep->max) && take_first) Chris@16: { Chris@16: // increase the counter: Chris@16: ++(*next_count); Chris@16: pstate = rep->next.p; Chris@16: return match_all_states(); Chris@16: } Chris@16: } Chris@16: return false; Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_dot_repeat_slow() Chris@16: { Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable:4127) Chris@16: #endif Chris@16: unsigned count = 0; Chris@16: const re_repeat* rep = static_cast(pstate); Chris@16: re_syntax_base* psingle = rep->next.p; Chris@16: // match compulsary repeats first: Chris@16: while(count < rep->min) Chris@16: { Chris@16: pstate = psingle; Chris@16: if(!match_wild()) Chris@16: return false; Chris@16: ++count; Chris@16: } Chris@16: bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); Chris@16: if(greedy) Chris@16: { Chris@16: // normal repeat: Chris@16: while(count < rep->max) Chris@16: { Chris@16: pstate = psingle; Chris@16: if(!match_wild()) Chris@16: break; Chris@16: ++count; Chris@16: } Chris@16: if((rep->leading) && (count < rep->max)) Chris@16: restart = position; Chris@16: pstate = rep; Chris@16: return backtrack_till_match(count - rep->min); Chris@16: } Chris@16: else Chris@16: { Chris@16: // non-greedy, keep trying till we get a match: Chris@16: BidiIterator save_pos; Chris@16: do Chris@16: { Chris@16: if((rep->leading) && (rep->max == UINT_MAX)) Chris@16: restart = position; Chris@16: pstate = rep->alt.p; Chris@16: save_pos = position; Chris@16: ++state_count; Chris@16: if(match_all_states()) Chris@16: return true; Chris@16: if(count >= rep->max) Chris@16: return false; Chris@16: ++count; Chris@16: pstate = psingle; Chris@16: position = save_pos; Chris@16: if(!match_wild()) Chris@16: return false; Chris@16: }while(true); Chris@16: } Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_dot_repeat_fast() Chris@16: { Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable:4127) Chris@16: #endif Chris@16: if(m_match_flags & match_not_dot_null) Chris@16: return match_dot_repeat_slow(); Chris@16: if((static_cast(pstate->next.p)->mask & match_any_mask) == 0) Chris@16: return match_dot_repeat_slow(); Chris@16: // Chris@16: // start by working out how much we can skip: Chris@16: // Chris@16: const re_repeat* rep = static_cast(pstate); Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable:4267) Chris@16: #endif Chris@16: bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); Chris@16: std::size_t count = (std::min)(static_cast(::boost::re_detail::distance(position, last)), static_cast(greedy ? rep->max : rep->min)); Chris@16: if(rep->min > count) Chris@16: { Chris@16: position = last; Chris@16: return false; // not enough text left to match Chris@16: } Chris@16: std::advance(position, count); Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: if((rep->leading) && (count < rep->max) && greedy) Chris@16: restart = position; Chris@16: if(greedy) Chris@16: return backtrack_till_match(count - rep->min); Chris@16: Chris@16: // non-greedy, keep trying till we get a match: Chris@16: BidiIterator save_pos; Chris@16: do Chris@16: { Chris@16: while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) Chris@16: { Chris@16: ++position; Chris@16: ++count; Chris@16: } Chris@16: if((rep->leading) && (rep->max == UINT_MAX)) Chris@16: restart = position; Chris@16: pstate = rep->alt.p; Chris@16: save_pos = position; Chris@16: ++state_count; Chris@16: if(match_all_states()) Chris@16: return true; Chris@16: if(count >= rep->max) Chris@16: return false; Chris@16: if(save_pos == last) Chris@16: return false; Chris@16: position = ++save_pos; Chris@16: ++count; Chris@16: }while(true); Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_char_repeat() Chris@16: { Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable:4127) Chris@16: #pragma warning(disable:4267) Chris@16: #endif Chris@16: #ifdef __BORLANDC__ Chris@16: #pragma option push -w-8008 -w-8066 -w-8004 Chris@16: #endif Chris@16: const re_repeat* rep = static_cast(pstate); Chris@16: BOOST_ASSERT(1 == static_cast(rep->next.p)->length); Chris@16: const char_type what = *reinterpret_cast(static_cast(rep->next.p) + 1); Chris@16: // Chris@16: // start by working out how much we can skip: Chris@16: // Chris@16: bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); Chris@16: std::size_t count, desired; Chris@16: if(::boost::is_random_access_iterator::value) Chris@16: { Chris@16: desired = Chris@16: (std::min)( Chris@16: (std::size_t)(greedy ? rep->max : rep->min), Chris@16: (std::size_t)::boost::re_detail::distance(position, last)); Chris@16: count = desired; Chris@16: ++desired; Chris@16: if(icase) Chris@16: { Chris@16: while(--desired && (traits_inst.translate_nocase(*position) == what)) Chris@16: { Chris@16: ++position; Chris@16: } Chris@16: } Chris@16: else Chris@16: { Chris@16: while(--desired && (traits_inst.translate(*position) == what)) Chris@16: { Chris@16: ++position; Chris@16: } Chris@16: } Chris@16: count = count - desired; Chris@16: } Chris@16: else Chris@16: { Chris@16: count = 0; Chris@16: desired = greedy ? rep->max : rep->min; Chris@16: while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what)) Chris@16: { Chris@16: ++position; Chris@16: ++count; Chris@16: } Chris@16: } Chris@16: if((rep->leading) && (count < rep->max) && greedy) Chris@16: restart = position; Chris@16: if(count < rep->min) Chris@16: return false; Chris@16: Chris@16: if(greedy) Chris@16: return backtrack_till_match(count - rep->min); Chris@16: Chris@16: // non-greedy, keep trying till we get a match: Chris@16: BidiIterator save_pos; Chris@16: do Chris@16: { Chris@16: while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) Chris@16: { Chris@16: if((traits_inst.translate(*position, icase) == what)) Chris@16: { Chris@16: ++position; Chris@16: ++count; Chris@16: } Chris@16: else Chris@16: return false; // counldn't repeat even though it was the only option Chris@16: } Chris@16: if((rep->leading) && (rep->max == UINT_MAX)) Chris@16: restart = position; Chris@16: pstate = rep->alt.p; Chris@16: save_pos = position; Chris@16: ++state_count; Chris@16: if(match_all_states()) Chris@16: return true; Chris@16: if(count >= rep->max) Chris@16: return false; Chris@16: position = save_pos; Chris@16: if(position == last) Chris@16: return false; Chris@16: if(traits_inst.translate(*position, icase) == what) Chris@16: { Chris@16: ++position; Chris@16: ++count; Chris@16: } Chris@16: else Chris@16: { Chris@16: return false; Chris@16: } Chris@16: }while(true); Chris@16: #ifdef __BORLANDC__ Chris@16: #pragma option pop Chris@16: #endif Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_set_repeat() Chris@16: { Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable:4127) Chris@16: #endif Chris@16: #ifdef __BORLANDC__ Chris@16: #pragma option push -w-8008 -w-8066 -w-8004 Chris@16: #endif Chris@16: const re_repeat* rep = static_cast(pstate); Chris@16: const unsigned char* map = static_cast(rep->next.p)->_map; Chris@16: unsigned count = 0; Chris@16: // Chris@16: // start by working out how much we can skip: Chris@16: // Chris@16: bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); Chris@16: std::size_t desired = greedy ? rep->max : rep->min; Chris@16: if(::boost::is_random_access_iterator::value) Chris@16: { Chris@16: BidiIterator end = position; Chris@16: // Move end forward by "desired", preferably without using distance or advance if we can Chris@16: // as these can be slow for some iterator types. Chris@16: std::size_t len = (desired == (std::numeric_limits::max)()) ? 0u : ::boost::re_detail::distance(position, last); Chris@16: if(desired >= len) Chris@16: end = last; Chris@16: else Chris@16: std::advance(end, desired); Chris@16: BidiIterator origin(position); Chris@16: while((position != end) && map[static_cast(traits_inst.translate(*position, icase))]) Chris@16: { Chris@16: ++position; Chris@16: } Chris@16: count = (unsigned)::boost::re_detail::distance(origin, position); Chris@16: } Chris@16: else Chris@16: { Chris@16: while((count < desired) && (position != last) && map[static_cast(traits_inst.translate(*position, icase))]) Chris@16: { Chris@16: ++position; Chris@16: ++count; Chris@16: } Chris@16: } Chris@16: if((rep->leading) && (count < rep->max) && greedy) Chris@16: restart = position; Chris@16: if(count < rep->min) Chris@16: return false; Chris@16: Chris@16: if(greedy) Chris@16: return backtrack_till_match(count - rep->min); Chris@16: Chris@16: // non-greedy, keep trying till we get a match: Chris@16: BidiIterator save_pos; Chris@16: do Chris@16: { Chris@16: while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) Chris@16: { Chris@16: if(map[static_cast(traits_inst.translate(*position, icase))]) Chris@16: { Chris@16: ++position; Chris@16: ++count; Chris@16: } Chris@16: else Chris@16: return false; // counldn't repeat even though it was the only option Chris@16: } Chris@16: if((rep->leading) && (rep->max == UINT_MAX)) Chris@16: restart = position; Chris@16: pstate = rep->alt.p; Chris@16: save_pos = position; Chris@16: ++state_count; Chris@16: if(match_all_states()) Chris@16: return true; Chris@16: if(count >= rep->max) Chris@16: return false; Chris@16: position = save_pos; Chris@16: if(position == last) Chris@16: return false; Chris@16: if(map[static_cast(traits_inst.translate(*position, icase))]) Chris@16: { Chris@16: ++position; Chris@16: ++count; Chris@16: } Chris@16: else Chris@16: { Chris@16: return false; Chris@16: } Chris@16: }while(true); Chris@16: #ifdef __BORLANDC__ Chris@16: #pragma option pop Chris@16: #endif Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_long_set_repeat() Chris@16: { Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable:4127) Chris@16: #endif Chris@16: #ifdef __BORLANDC__ Chris@16: #pragma option push -w-8008 -w-8066 -w-8004 Chris@16: #endif Chris@16: typedef typename traits::char_class_type char_class_type; Chris@16: const re_repeat* rep = static_cast(pstate); Chris@16: const re_set_long* set = static_cast*>(pstate->next.p); Chris@16: unsigned count = 0; Chris@16: // Chris@16: // start by working out how much we can skip: Chris@16: // Chris@16: bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); Chris@16: std::size_t desired = greedy ? rep->max : rep->min; Chris@16: if(::boost::is_random_access_iterator::value) Chris@16: { Chris@16: BidiIterator end = position; Chris@16: // Move end forward by "desired", preferably without using distance or advance if we can Chris@16: // as these can be slow for some iterator types. Chris@16: std::size_t len = (desired == (std::numeric_limits::max)()) ? 0u : ::boost::re_detail::distance(position, last); Chris@16: if(desired >= len) Chris@16: end = last; Chris@16: else Chris@16: std::advance(end, desired); Chris@16: BidiIterator origin(position); Chris@16: while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) Chris@16: { Chris@16: ++position; Chris@16: } Chris@16: count = (unsigned)::boost::re_detail::distance(origin, position); Chris@16: } Chris@16: else Chris@16: { Chris@16: while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) Chris@16: { Chris@16: ++position; Chris@16: ++count; Chris@16: } Chris@16: } Chris@16: if((rep->leading) && (count < rep->max) && greedy) Chris@16: restart = position; Chris@16: if(count < rep->min) Chris@16: return false; Chris@16: Chris@16: if(greedy) Chris@16: return backtrack_till_match(count - rep->min); Chris@16: Chris@16: // non-greedy, keep trying till we get a match: Chris@16: BidiIterator save_pos; Chris@16: do Chris@16: { Chris@16: while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) Chris@16: { Chris@16: if(position != re_is_set_member(position, last, set, re.get_data(), icase)) Chris@16: { Chris@16: ++position; Chris@16: ++count; Chris@16: } Chris@16: else Chris@16: return false; // counldn't repeat even though it was the only option Chris@16: } Chris@16: if((rep->leading) && (rep->max == UINT_MAX)) Chris@16: restart = position; Chris@16: pstate = rep->alt.p; Chris@16: save_pos = position; Chris@16: ++state_count; Chris@16: if(match_all_states()) Chris@16: return true; Chris@16: if(count >= rep->max) Chris@16: return false; Chris@16: position = save_pos; Chris@16: if(position == last) Chris@16: return false; Chris@16: if(position != re_is_set_member(position, last, set, re.get_data(), icase)) Chris@16: { Chris@16: ++position; Chris@16: ++count; Chris@16: } Chris@16: else Chris@16: { Chris@16: return false; Chris@16: } Chris@16: }while(true); Chris@16: #ifdef __BORLANDC__ Chris@16: #pragma option pop Chris@16: #endif Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::backtrack_till_match(std::size_t count) Chris@16: { Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable:4127) Chris@16: #endif Chris@16: if((m_match_flags & match_partial) && (position == last)) Chris@16: m_has_partial_match = true; Chris@16: Chris@16: const re_repeat* rep = static_cast(pstate); Chris@16: BidiIterator backtrack = position; Chris@16: if(position == last) Chris@16: { Chris@16: if(rep->can_be_null & mask_skip) Chris@16: { Chris@16: pstate = rep->alt.p; Chris@16: if(match_all_states()) Chris@16: return true; Chris@16: } Chris@16: if(count) Chris@16: { Chris@16: position = --backtrack; Chris@16: --count; Chris@16: } Chris@16: else Chris@16: return false; Chris@16: } Chris@16: do Chris@16: { Chris@16: while(count && !can_start(*position, rep->_map, mask_skip)) Chris@16: { Chris@16: --position; Chris@16: --count; Chris@16: ++state_count; Chris@16: } Chris@16: pstate = rep->alt.p; Chris@16: backtrack = position; Chris@16: if(match_all_states()) Chris@16: return true; Chris@16: if(count == 0) Chris@16: return false; Chris@16: position = --backtrack; Chris@16: ++state_count; Chris@16: --count; Chris@16: }while(true); Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_recursion() Chris@16: { Chris@16: BOOST_ASSERT(pstate->type == syntax_element_recurse); Chris@16: // Chris@16: // Set new call stack: Chris@16: // Chris@16: if(recursion_stack.capacity() == 0) Chris@16: { Chris@16: recursion_stack.reserve(50); Chris@16: } Chris@16: recursion_stack.push_back(recursion_info()); Chris@16: recursion_stack.back().preturn_address = pstate->next.p; Chris@16: recursion_stack.back().results = *m_presult; Chris@16: recursion_stack.back().repeater_stack = next_count; Chris@16: pstate = static_cast(pstate)->alt.p; Chris@16: recursion_stack.back().idx = static_cast(pstate)->index; Chris@16: Chris@16: repeater_count* saved = next_count; Chris@16: repeater_count r(&next_count); // resets all repeat counts since we're recursing and starting fresh on those Chris@16: next_count = &r; Chris@16: bool result = match_all_states(); Chris@16: next_count = saved; Chris@16: Chris@16: if(!result) Chris@16: { Chris@16: next_count = recursion_stack.back().repeater_stack; Chris@16: *m_presult = recursion_stack.back().results; Chris@16: recursion_stack.pop_back(); Chris@16: return false; Chris@16: } Chris@16: return true; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_endmark() Chris@16: { Chris@16: int index = static_cast(pstate)->index; Chris@16: icase = static_cast(pstate)->icase; Chris@16: if(index > 0) Chris@16: { Chris@16: if((m_match_flags & match_nosubs) == 0) Chris@16: { Chris@16: m_presult->set_second(position, index); Chris@16: } Chris@16: if(!recursion_stack.empty()) Chris@16: { Chris@16: if(index == recursion_stack.back().idx) Chris@16: { Chris@16: recursion_info saved = recursion_stack.back(); Chris@16: recursion_stack.pop_back(); Chris@16: pstate = saved.preturn_address; Chris@16: repeater_count* saved_count = next_count; Chris@16: next_count = saved.repeater_stack; Chris@16: *m_presult = saved.results; Chris@16: if(!match_all_states()) Chris@16: { Chris@16: recursion_stack.push_back(saved); Chris@16: next_count = saved_count; Chris@16: return false; Chris@16: } Chris@16: } Chris@16: } Chris@16: } Chris@16: else if((index < 0) && (index != -4)) Chris@16: { Chris@16: // matched forward lookahead: Chris@16: pstate = 0; Chris@16: return true; Chris@16: } Chris@16: pstate = pstate ? pstate->next.p : 0; Chris@16: return true; Chris@16: } Chris@16: Chris@16: template Chris@16: bool perl_matcher::match_match() Chris@16: { Chris@16: if(!recursion_stack.empty()) Chris@16: { Chris@16: BOOST_ASSERT(0 == recursion_stack.back().idx); Chris@16: const re_syntax_base* saved_state = pstate = recursion_stack.back().preturn_address; Chris@16: *m_presult = recursion_stack.back().results; Chris@16: recursion_stack.pop_back(); Chris@16: if(!match_all_states()) Chris@16: { Chris@16: recursion_stack.push_back(recursion_info()); Chris@16: recursion_stack.back().preturn_address = saved_state; Chris@16: recursion_stack.back().results = *m_presult; Chris@16: return false; Chris@16: } Chris@16: return true; Chris@16: } Chris@16: if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first)) Chris@16: return false; Chris@16: if((m_match_flags & match_all) && (position != last)) Chris@16: return false; Chris@16: if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base)) Chris@16: return false; Chris@16: m_presult->set_second(position); Chris@16: pstate = 0; Chris@16: m_has_found_match = true; Chris@16: if((m_match_flags & match_posix) == match_posix) Chris@16: { Chris@16: m_result.maybe_assign(*m_presult); Chris@16: if((m_match_flags & match_any) == 0) Chris@16: return false; Chris@16: } Chris@16: #ifdef BOOST_REGEX_MATCH_EXTRA Chris@16: if(match_extra & m_match_flags) Chris@16: { Chris@16: for(unsigned i = 0; i < m_presult->size(); ++i) Chris@16: if((*m_presult)[i].matched) Chris@16: ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]); Chris@16: } Chris@16: #endif Chris@16: return true; Chris@16: } Chris@16: Chris@16: Chris@16: Chris@16: } // namespace re_detail Chris@16: } // namespace boost Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable: 4103) Chris@16: #endif Chris@16: #ifdef BOOST_HAS_ABI_HEADERS Chris@16: # include BOOST_ABI_SUFFIX Chris@16: #endif Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: Chris@16: #endif Chris@16: