annotate DEPENDENCIES/generic/include/boost/regex/v4/perl_matcher_common.hpp @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)
author Chris Cannam
date Tue, 30 Jul 2019 12:25:44 +0100
parents c530137014c0
children
rev   line source
Chris@16 1 /*
Chris@16 2 *
Chris@16 3 * Copyright (c) 2002
Chris@16 4 * John Maddock
Chris@16 5 *
Chris@16 6 * Use, modification and distribution are subject to the
Chris@16 7 * Boost Software License, Version 1.0. (See accompanying file
Chris@16 8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16 9 *
Chris@16 10 */
Chris@16 11
Chris@16 12 /*
Chris@16 13 * LOCATION: see http://www.boost.org for most recent version.
Chris@16 14 * FILE perl_matcher_common.cpp
Chris@16 15 * VERSION see <boost/version.hpp>
Chris@16 16 * DESCRIPTION: Definitions of perl_matcher member functions that are
Chris@16 17 * common to both the recursive and non-recursive versions.
Chris@16 18 */
Chris@16 19
Chris@16 20 #ifndef BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
Chris@16 21 #define BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
Chris@16 22
Chris@16 23 #ifdef BOOST_MSVC
Chris@16 24 #pragma warning(push)
Chris@16 25 #pragma warning(disable: 4103)
Chris@16 26 #endif
Chris@16 27 #ifdef BOOST_HAS_ABI_HEADERS
Chris@16 28 # include BOOST_ABI_PREFIX
Chris@16 29 #endif
Chris@16 30 #ifdef BOOST_MSVC
Chris@16 31 #pragma warning(pop)
Chris@16 32 #endif
Chris@16 33
Chris@16 34 #ifdef __BORLANDC__
Chris@16 35 # pragma option push -w-8008 -w-8066
Chris@16 36 #endif
Chris@16 37 #ifdef BOOST_MSVC
Chris@16 38 # pragma warning(push)
Chris@16 39 # pragma warning(disable: 4800)
Chris@16 40 #endif
Chris@16 41
Chris@16 42 namespace boost{
Chris@16 43 namespace re_detail{
Chris@16 44
Chris@16 45 template <class BidiIterator, class Allocator, class traits>
Chris@16 46 void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_regex<char_type, traits>& e, match_flag_type f)
Chris@16 47 {
Chris@16 48 typedef typename regex_iterator_traits<BidiIterator>::iterator_category category;
Chris@16 49 typedef typename basic_regex<char_type, traits>::flag_type expression_flag_type;
Chris@16 50
Chris@16 51 if(e.empty())
Chris@16 52 {
Chris@16 53 // precondition failure: e is not a valid regex.
Chris@16 54 std::invalid_argument ex("Invalid regular expression object");
Chris@16 55 boost::throw_exception(ex);
Chris@16 56 }
Chris@16 57 pstate = 0;
Chris@16 58 m_match_flags = f;
Chris@16 59 estimate_max_state_count(static_cast<category*>(0));
Chris@16 60 expression_flag_type re_f = re.flags();
Chris@16 61 icase = re_f & regex_constants::icase;
Chris@16 62 if(!(m_match_flags & (match_perl|match_posix)))
Chris@16 63 {
Chris@16 64 if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0)
Chris@16 65 m_match_flags |= match_perl;
Chris@16 66 else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
Chris@16 67 m_match_flags |= match_perl;
Chris@16 68 else if((re_f & (regbase::main_option_type|regbase::literal)) == (regbase::literal))
Chris@16 69 m_match_flags |= match_perl;
Chris@16 70 else
Chris@16 71 m_match_flags |= match_posix;
Chris@16 72 }
Chris@16 73 if(m_match_flags & match_posix)
Chris@16 74 {
Chris@16 75 m_temp_match.reset(new match_results<BidiIterator, Allocator>());
Chris@16 76 m_presult = m_temp_match.get();
Chris@16 77 }
Chris@16 78 else
Chris@16 79 m_presult = &m_result;
Chris@16 80 #ifdef BOOST_REGEX_NON_RECURSIVE
Chris@16 81 m_stack_base = 0;
Chris@16 82 m_backup_state = 0;
Chris@16 83 #endif
Chris@16 84 // find the value to use for matching word boundaries:
Chris@16 85 m_word_mask = re.get_data().m_word_mask;
Chris@16 86 // find bitmask to use for matching '.':
Chris@16 87 match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? re_detail::test_not_newline : re_detail::test_newline);
Chris@16 88 }
Chris@16 89
Chris@16 90 template <class BidiIterator, class Allocator, class traits>
Chris@16 91 void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*)
Chris@16 92 {
Chris@16 93 //
Chris@16 94 // How many states should we allow our machine to visit before giving up?
Chris@16 95 // This is a heuristic: it takes the greater of O(N^2) and O(NS^2)
Chris@16 96 // where N is the length of the string, and S is the number of states
Chris@16 97 // in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2)
Chris@16 98 // but these take unreasonably amounts of time to bale out in pathological
Chris@16 99 // cases.
Chris@16 100 //
Chris@16 101 // Calculate NS^2 first:
Chris@16 102 //
Chris@16 103 static const std::ptrdiff_t k = 100000;
Chris@16 104 std::ptrdiff_t dist = boost::re_detail::distance(base, last);
Chris@16 105 if(dist == 0)
Chris@16 106 dist = 1;
Chris@16 107 std::ptrdiff_t states = re.size();
Chris@16 108 if(states == 0)
Chris@16 109 states = 1;
Chris@16 110 states *= states;
Chris@16 111 if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
Chris@16 112 {
Chris@16 113 max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
Chris@16 114 return;
Chris@16 115 }
Chris@16 116 states *= dist;
Chris@16 117 if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
Chris@16 118 {
Chris@16 119 max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
Chris@16 120 return;
Chris@16 121 }
Chris@16 122 states += k;
Chris@16 123
Chris@16 124 max_state_count = states;
Chris@16 125
Chris@16 126 //
Chris@16 127 // Now calculate N^2:
Chris@16 128 //
Chris@16 129 states = dist;
Chris@16 130 if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
Chris@16 131 {
Chris@16 132 max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
Chris@16 133 return;
Chris@16 134 }
Chris@16 135 states *= dist;
Chris@16 136 if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
Chris@16 137 {
Chris@16 138 max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
Chris@16 139 return;
Chris@16 140 }
Chris@16 141 states += k;
Chris@16 142 //
Chris@16 143 // N^2 can be a very large number indeed, to prevent things getting out
Chris@16 144 // of control, cap the max states:
Chris@16 145 //
Chris@16 146 if(states > BOOST_REGEX_MAX_STATE_COUNT)
Chris@16 147 states = BOOST_REGEX_MAX_STATE_COUNT;
Chris@16 148 //
Chris@16 149 // If (the possibly capped) N^2 is larger than our first estimate,
Chris@16 150 // use this instead:
Chris@16 151 //
Chris@16 152 if(states > max_state_count)
Chris@16 153 max_state_count = states;
Chris@16 154 }
Chris@16 155
Chris@16 156 template <class BidiIterator, class Allocator, class traits>
Chris@16 157 inline void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
Chris@16 158 {
Chris@16 159 // we don't know how long the sequence is:
Chris@16 160 max_state_count = BOOST_REGEX_MAX_STATE_COUNT;
Chris@16 161 }
Chris@16 162
Chris@16 163 #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
Chris@16 164 template <class BidiIterator, class Allocator, class traits>
Chris@16 165 inline bool perl_matcher<BidiIterator, Allocator, traits>::protected_call(
Chris@16 166 protected_proc_type proc)
Chris@16 167 {
Chris@16 168 ::boost::re_detail::concrete_protected_call
Chris@16 169 <perl_matcher<BidiIterator, Allocator, traits> >
Chris@16 170 obj(this, proc);
Chris@16 171 return obj.execute();
Chris@16 172
Chris@16 173 }
Chris@16 174 #endif
Chris@16 175
Chris@16 176 template <class BidiIterator, class Allocator, class traits>
Chris@16 177 inline bool perl_matcher<BidiIterator, Allocator, traits>::match()
Chris@16 178 {
Chris@16 179 #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
Chris@16 180 return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::match_imp);
Chris@16 181 #else
Chris@16 182 return match_imp();
Chris@16 183 #endif
Chris@16 184 }
Chris@16 185
Chris@16 186 template <class BidiIterator, class Allocator, class traits>
Chris@16 187 bool perl_matcher<BidiIterator, Allocator, traits>::match_imp()
Chris@16 188 {
Chris@16 189 // initialise our stack if we are non-recursive:
Chris@16 190 #ifdef BOOST_REGEX_NON_RECURSIVE
Chris@16 191 save_state_init init(&m_stack_base, &m_backup_state);
Chris@16 192 used_block_count = BOOST_REGEX_MAX_BLOCKS;
Chris@16 193 #if !defined(BOOST_NO_EXCEPTIONS)
Chris@16 194 try{
Chris@16 195 #endif
Chris@16 196 #endif
Chris@16 197
Chris@16 198 // reset our state machine:
Chris@16 199 position = base;
Chris@16 200 search_base = base;
Chris@16 201 state_count = 0;
Chris@16 202 m_match_flags |= regex_constants::match_all;
Chris@101 203 m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), search_base, last);
Chris@16 204 m_presult->set_base(base);
Chris@16 205 m_presult->set_named_subs(this->re.get_named_subs());
Chris@16 206 if(m_match_flags & match_posix)
Chris@16 207 m_result = *m_presult;
Chris@16 208 verify_options(re.flags(), m_match_flags);
Chris@16 209 if(0 == match_prefix())
Chris@16 210 return false;
Chris@16 211 return (m_result[0].second == last) && (m_result[0].first == base);
Chris@16 212
Chris@16 213 #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
Chris@16 214 }
Chris@16 215 catch(...)
Chris@16 216 {
Chris@16 217 // unwind all pushed states, apart from anything else this
Chris@16 218 // ensures that all the states are correctly destructed
Chris@16 219 // not just the memory freed.
Chris@16 220 while(unwind(true)){}
Chris@16 221 throw;
Chris@16 222 }
Chris@16 223 #endif
Chris@16 224 }
Chris@16 225
Chris@16 226 template <class BidiIterator, class Allocator, class traits>
Chris@16 227 inline bool perl_matcher<BidiIterator, Allocator, traits>::find()
Chris@16 228 {
Chris@16 229 #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
Chris@16 230 return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::find_imp);
Chris@16 231 #else
Chris@16 232 return find_imp();
Chris@16 233 #endif
Chris@16 234 }
Chris@16 235
Chris@16 236 template <class BidiIterator, class Allocator, class traits>
Chris@16 237 bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
Chris@16 238 {
Chris@16 239 static matcher_proc_type const s_find_vtable[7] =
Chris@16 240 {
Chris@16 241 &perl_matcher<BidiIterator, Allocator, traits>::find_restart_any,
Chris@16 242 &perl_matcher<BidiIterator, Allocator, traits>::find_restart_word,
Chris@16 243 &perl_matcher<BidiIterator, Allocator, traits>::find_restart_line,
Chris@16 244 &perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf,
Chris@16 245 &perl_matcher<BidiIterator, Allocator, traits>::match_prefix,
Chris@16 246 &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
Chris@16 247 &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
Chris@16 248 };
Chris@16 249
Chris@16 250 // initialise our stack if we are non-recursive:
Chris@16 251 #ifdef BOOST_REGEX_NON_RECURSIVE
Chris@16 252 save_state_init init(&m_stack_base, &m_backup_state);
Chris@16 253 used_block_count = BOOST_REGEX_MAX_BLOCKS;
Chris@16 254 #if !defined(BOOST_NO_EXCEPTIONS)
Chris@16 255 try{
Chris@16 256 #endif
Chris@16 257 #endif
Chris@16 258
Chris@16 259 state_count = 0;
Chris@16 260 if((m_match_flags & regex_constants::match_init) == 0)
Chris@16 261 {
Chris@16 262 // reset our state machine:
Chris@16 263 search_base = position = base;
Chris@16 264 pstate = re.get_first_state();
Chris@101 265 m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), base, last);
Chris@16 266 m_presult->set_base(base);
Chris@16 267 m_presult->set_named_subs(this->re.get_named_subs());
Chris@16 268 m_match_flags |= regex_constants::match_init;
Chris@16 269 }
Chris@16 270 else
Chris@16 271 {
Chris@16 272 // start again:
Chris@16 273 search_base = position = m_result[0].second;
Chris@16 274 // If last match was null and match_not_null was not set then increment
Chris@16 275 // our start position, otherwise we go into an infinite loop:
Chris@16 276 if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0))
Chris@16 277 {
Chris@16 278 if(position == last)
Chris@16 279 return false;
Chris@16 280 else
Chris@16 281 ++position;
Chris@16 282 }
Chris@16 283 // reset $` start:
Chris@101 284 m_presult->set_size((m_match_flags & match_nosubs) ? 1 : 1 + re.mark_count(), search_base, last);
Chris@16 285 //if((base != search_base) && (base == backstop))
Chris@16 286 // m_match_flags |= match_prev_avail;
Chris@16 287 }
Chris@16 288 if(m_match_flags & match_posix)
Chris@16 289 {
Chris@101 290 m_result.set_size(1 + re.mark_count(), base, last);
Chris@16 291 m_result.set_base(base);
Chris@16 292 }
Chris@16 293
Chris@16 294 verify_options(re.flags(), m_match_flags);
Chris@16 295 // find out what kind of expression we have:
Chris@16 296 unsigned type = (m_match_flags & match_continuous) ?
Chris@16 297 static_cast<unsigned int>(regbase::restart_continue)
Chris@16 298 : static_cast<unsigned int>(re.get_restart_type());
Chris@16 299
Chris@16 300 // call the appropriate search routine:
Chris@16 301 matcher_proc_type proc = s_find_vtable[type];
Chris@16 302 return (this->*proc)();
Chris@16 303
Chris@16 304 #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
Chris@16 305 }
Chris@16 306 catch(...)
Chris@16 307 {
Chris@16 308 // unwind all pushed states, apart from anything else this
Chris@16 309 // ensures that all the states are correctly destructed
Chris@16 310 // not just the memory freed.
Chris@16 311 while(unwind(true)){}
Chris@16 312 throw;
Chris@16 313 }
Chris@16 314 #endif
Chris@16 315 }
Chris@16 316
Chris@16 317 template <class BidiIterator, class Allocator, class traits>
Chris@16 318 bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix()
Chris@16 319 {
Chris@16 320 m_has_partial_match = false;
Chris@16 321 m_has_found_match = false;
Chris@16 322 pstate = re.get_first_state();
Chris@16 323 m_presult->set_first(position);
Chris@16 324 restart = position;
Chris@16 325 match_all_states();
Chris@16 326 if(!m_has_found_match && m_has_partial_match && (m_match_flags & match_partial))
Chris@16 327 {
Chris@16 328 m_has_found_match = true;
Chris@16 329 m_presult->set_second(last, 0, false);
Chris@16 330 position = last;
Chris@16 331 if((m_match_flags & match_posix) == match_posix)
Chris@16 332 {
Chris@16 333 m_result.maybe_assign(*m_presult);
Chris@16 334 }
Chris@16 335 }
Chris@16 336 #ifdef BOOST_REGEX_MATCH_EXTRA
Chris@16 337 if(m_has_found_match && (match_extra & m_match_flags))
Chris@16 338 {
Chris@16 339 //
Chris@16 340 // we have a match, reverse the capture information:
Chris@16 341 //
Chris@16 342 for(unsigned i = 0; i < m_presult->size(); ++i)
Chris@16 343 {
Chris@16 344 typename sub_match<BidiIterator>::capture_sequence_type & seq = ((*m_presult)[i]).get_captures();
Chris@16 345 std::reverse(seq.begin(), seq.end());
Chris@16 346 }
Chris@16 347 }
Chris@16 348 #endif
Chris@16 349 if(!m_has_found_match)
Chris@16 350 position = restart; // reset search postion
Chris@16 351 return m_has_found_match;
Chris@16 352 }
Chris@16 353
Chris@16 354 template <class BidiIterator, class Allocator, class traits>
Chris@16 355 bool perl_matcher<BidiIterator, Allocator, traits>::match_literal()
Chris@16 356 {
Chris@16 357 unsigned int len = static_cast<const re_literal*>(pstate)->length;
Chris@16 358 const char_type* what = reinterpret_cast<const char_type*>(static_cast<const re_literal*>(pstate) + 1);
Chris@16 359 //
Chris@16 360 // compare string with what we stored in
Chris@16 361 // our records:
Chris@16 362 for(unsigned int i = 0; i < len; ++i, ++position)
Chris@16 363 {
Chris@16 364 if((position == last) || (traits_inst.translate(*position, icase) != what[i]))
Chris@16 365 return false;
Chris@16 366 }
Chris@16 367 pstate = pstate->next.p;
Chris@16 368 return true;
Chris@16 369 }
Chris@16 370
Chris@16 371 template <class BidiIterator, class Allocator, class traits>
Chris@16 372 bool perl_matcher<BidiIterator, Allocator, traits>::match_start_line()
Chris@16 373 {
Chris@16 374 if(position == backstop)
Chris@16 375 {
Chris@16 376 if((m_match_flags & match_prev_avail) == 0)
Chris@16 377 {
Chris@16 378 if((m_match_flags & match_not_bol) == 0)
Chris@16 379 {
Chris@16 380 pstate = pstate->next.p;
Chris@16 381 return true;
Chris@16 382 }
Chris@16 383 return false;
Chris@16 384 }
Chris@16 385 }
Chris@16 386 else if(m_match_flags & match_single_line)
Chris@16 387 return false;
Chris@16 388
Chris@16 389 // check the previous value character:
Chris@16 390 BidiIterator t(position);
Chris@16 391 --t;
Chris@16 392 if(position != last)
Chris@16 393 {
Chris@16 394 if(is_separator(*t) && !((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n'))) )
Chris@16 395 {
Chris@16 396 pstate = pstate->next.p;
Chris@16 397 return true;
Chris@16 398 }
Chris@16 399 }
Chris@16 400 else if(is_separator(*t))
Chris@16 401 {
Chris@16 402 pstate = pstate->next.p;
Chris@16 403 return true;
Chris@16 404 }
Chris@16 405 return false;
Chris@16 406 }
Chris@16 407
Chris@16 408 template <class BidiIterator, class Allocator, class traits>
Chris@16 409 bool perl_matcher<BidiIterator, Allocator, traits>::match_end_line()
Chris@16 410 {
Chris@16 411 if(position != last)
Chris@16 412 {
Chris@16 413 if(m_match_flags & match_single_line)
Chris@16 414 return false;
Chris@16 415 // we're not yet at the end so *first is always valid:
Chris@16 416 if(is_separator(*position))
Chris@16 417 {
Chris@16 418 if((position != backstop) || (m_match_flags & match_prev_avail))
Chris@16 419 {
Chris@16 420 // check that we're not in the middle of \r\n sequence
Chris@16 421 BidiIterator t(position);
Chris@16 422 --t;
Chris@16 423 if((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n')))
Chris@16 424 {
Chris@16 425 return false;
Chris@16 426 }
Chris@16 427 }
Chris@16 428 pstate = pstate->next.p;
Chris@16 429 return true;
Chris@16 430 }
Chris@16 431 }
Chris@16 432 else if((m_match_flags & match_not_eol) == 0)
Chris@16 433 {
Chris@16 434 pstate = pstate->next.p;
Chris@16 435 return true;
Chris@16 436 }
Chris@16 437 return false;
Chris@16 438 }
Chris@16 439
Chris@16 440 template <class BidiIterator, class Allocator, class traits>
Chris@16 441 bool perl_matcher<BidiIterator, Allocator, traits>::match_wild()
Chris@16 442 {
Chris@16 443 if(position == last)
Chris@16 444 return false;
Chris@16 445 if(is_separator(*position) && ((match_any_mask & static_cast<const re_dot*>(pstate)->mask) == 0))
Chris@16 446 return false;
Chris@16 447 if((*position == char_type(0)) && (m_match_flags & match_not_dot_null))
Chris@16 448 return false;
Chris@16 449 pstate = pstate->next.p;
Chris@16 450 ++position;
Chris@16 451 return true;
Chris@16 452 }
Chris@16 453
Chris@16 454 template <class BidiIterator, class Allocator, class traits>
Chris@16 455 bool perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary()
Chris@16 456 {
Chris@16 457 bool b; // indcates whether next character is a word character
Chris@16 458 if(position != last)
Chris@16 459 {
Chris@16 460 // prev and this character must be opposites:
Chris@16 461 b = traits_inst.isctype(*position, m_word_mask);
Chris@16 462 }
Chris@16 463 else
Chris@16 464 {
Chris@16 465 b = (m_match_flags & match_not_eow) ? true : false;
Chris@16 466 }
Chris@16 467 if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
Chris@16 468 {
Chris@16 469 if(m_match_flags & match_not_bow)
Chris@16 470 b ^= true;
Chris@16 471 else
Chris@16 472 b ^= false;
Chris@16 473 }
Chris@16 474 else
Chris@16 475 {
Chris@16 476 --position;
Chris@16 477 b ^= traits_inst.isctype(*position, m_word_mask);
Chris@16 478 ++position;
Chris@16 479 }
Chris@16 480 if(b)
Chris@16 481 {
Chris@16 482 pstate = pstate->next.p;
Chris@16 483 return true;
Chris@16 484 }
Chris@16 485 return false; // no match if we get to here...
Chris@16 486 }
Chris@16 487
Chris@16 488 template <class BidiIterator, class Allocator, class traits>
Chris@16 489 bool perl_matcher<BidiIterator, Allocator, traits>::match_within_word()
Chris@16 490 {
Chris@16 491 if(position == last)
Chris@16 492 return false;
Chris@16 493 // both prev and this character must be m_word_mask:
Chris@16 494 bool prev = traits_inst.isctype(*position, m_word_mask);
Chris@16 495 {
Chris@16 496 bool b;
Chris@16 497 if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
Chris@16 498 return false;
Chris@16 499 else
Chris@16 500 {
Chris@16 501 --position;
Chris@16 502 b = traits_inst.isctype(*position, m_word_mask);
Chris@16 503 ++position;
Chris@16 504 }
Chris@16 505 if(b == prev)
Chris@16 506 {
Chris@16 507 pstate = pstate->next.p;
Chris@16 508 return true;
Chris@16 509 }
Chris@16 510 }
Chris@16 511 return false;
Chris@16 512 }
Chris@16 513
Chris@16 514 template <class BidiIterator, class Allocator, class traits>
Chris@16 515 bool perl_matcher<BidiIterator, Allocator, traits>::match_word_start()
Chris@16 516 {
Chris@16 517 if(position == last)
Chris@16 518 return false; // can't be starting a word if we're already at the end of input
Chris@16 519 if(!traits_inst.isctype(*position, m_word_mask))
Chris@16 520 return false; // next character isn't a word character
Chris@16 521 if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
Chris@16 522 {
Chris@16 523 if(m_match_flags & match_not_bow)
Chris@16 524 return false; // no previous input
Chris@16 525 }
Chris@16 526 else
Chris@16 527 {
Chris@16 528 // otherwise inside buffer:
Chris@16 529 BidiIterator t(position);
Chris@16 530 --t;
Chris@16 531 if(traits_inst.isctype(*t, m_word_mask))
Chris@16 532 return false; // previous character not non-word
Chris@16 533 }
Chris@16 534 // OK we have a match:
Chris@16 535 pstate = pstate->next.p;
Chris@16 536 return true;
Chris@16 537 }
Chris@16 538
Chris@16 539 template <class BidiIterator, class Allocator, class traits>
Chris@16 540 bool perl_matcher<BidiIterator, Allocator, traits>::match_word_end()
Chris@16 541 {
Chris@16 542 if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
Chris@16 543 return false; // start of buffer can't be end of word
Chris@16 544 BidiIterator t(position);
Chris@16 545 --t;
Chris@16 546 if(traits_inst.isctype(*t, m_word_mask) == false)
Chris@16 547 return false; // previous character wasn't a word character
Chris@16 548
Chris@16 549 if(position == last)
Chris@16 550 {
Chris@16 551 if(m_match_flags & match_not_eow)
Chris@16 552 return false; // end of buffer but not end of word
Chris@16 553 }
Chris@16 554 else
Chris@16 555 {
Chris@16 556 // otherwise inside buffer:
Chris@16 557 if(traits_inst.isctype(*position, m_word_mask))
Chris@16 558 return false; // next character is a word character
Chris@16 559 }
Chris@16 560 pstate = pstate->next.p;
Chris@16 561 return true; // if we fall through to here then we've succeeded
Chris@16 562 }
Chris@16 563
Chris@16 564 template <class BidiIterator, class Allocator, class traits>
Chris@16 565 bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start()
Chris@16 566 {
Chris@16 567 if((position != backstop) || (m_match_flags & match_not_bob))
Chris@16 568 return false;
Chris@16 569 // OK match:
Chris@16 570 pstate = pstate->next.p;
Chris@16 571 return true;
Chris@16 572 }
Chris@16 573
Chris@16 574 template <class BidiIterator, class Allocator, class traits>
Chris@16 575 bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end()
Chris@16 576 {
Chris@16 577 if((position != last) || (m_match_flags & match_not_eob))
Chris@16 578 return false;
Chris@16 579 // OK match:
Chris@16 580 pstate = pstate->next.p;
Chris@16 581 return true;
Chris@16 582 }
Chris@16 583
Chris@16 584 template <class BidiIterator, class Allocator, class traits>
Chris@16 585 bool perl_matcher<BidiIterator, Allocator, traits>::match_backref()
Chris@16 586 {
Chris@16 587 //
Chris@16 588 // Compare with what we previously matched.
Chris@16 589 // Note that this succeeds if the backref did not partisipate
Chris@16 590 // in the match, this is in line with ECMAScript, but not Perl
Chris@16 591 // or PCRE.
Chris@16 592 //
Chris@16 593 int index = static_cast<const re_brace*>(pstate)->index;
Chris@16 594 if(index >= 10000)
Chris@16 595 {
Chris@16 596 named_subexpressions::range_type r = re.get_data().equal_range(index);
Chris@16 597 BOOST_ASSERT(r.first != r.second);
Chris@16 598 do
Chris@16 599 {
Chris@16 600 index = r.first->index;
Chris@16 601 ++r.first;
Chris@16 602 }while((r.first != r.second) && ((*m_presult)[index].matched != true));
Chris@16 603 }
Chris@16 604
Chris@16 605 if((m_match_flags & match_perl) && !(*m_presult)[index].matched)
Chris@16 606 return false;
Chris@16 607
Chris@16 608 BidiIterator i = (*m_presult)[index].first;
Chris@16 609 BidiIterator j = (*m_presult)[index].second;
Chris@16 610 while(i != j)
Chris@16 611 {
Chris@16 612 if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase)))
Chris@16 613 return false;
Chris@16 614 ++i;
Chris@16 615 ++position;
Chris@16 616 }
Chris@16 617 pstate = pstate->next.p;
Chris@16 618 return true;
Chris@16 619 }
Chris@16 620
Chris@16 621 template <class BidiIterator, class Allocator, class traits>
Chris@16 622 bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set()
Chris@16 623 {
Chris@16 624 typedef typename traits::char_class_type char_class_type;
Chris@16 625 // let the traits class do the work:
Chris@16 626 if(position == last)
Chris@16 627 return false;
Chris@16 628 BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re.get_data(), icase);
Chris@16 629 if(t != position)
Chris@16 630 {
Chris@16 631 pstate = pstate->next.p;
Chris@16 632 position = t;
Chris@16 633 return true;
Chris@16 634 }
Chris@16 635 return false;
Chris@16 636 }
Chris@16 637
Chris@16 638 template <class BidiIterator, class Allocator, class traits>
Chris@16 639 bool perl_matcher<BidiIterator, Allocator, traits>::match_set()
Chris@16 640 {
Chris@16 641 if(position == last)
Chris@16 642 return false;
Chris@16 643 if(static_cast<const re_set*>(pstate)->_map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
Chris@16 644 {
Chris@16 645 pstate = pstate->next.p;
Chris@16 646 ++position;
Chris@16 647 return true;
Chris@16 648 }
Chris@16 649 return false;
Chris@16 650 }
Chris@16 651
Chris@16 652 template <class BidiIterator, class Allocator, class traits>
Chris@16 653 bool perl_matcher<BidiIterator, Allocator, traits>::match_jump()
Chris@16 654 {
Chris@16 655 pstate = static_cast<const re_jump*>(pstate)->alt.p;
Chris@16 656 return true;
Chris@16 657 }
Chris@16 658
Chris@16 659 template <class BidiIterator, class Allocator, class traits>
Chris@16 660 bool perl_matcher<BidiIterator, Allocator, traits>::match_combining()
Chris@16 661 {
Chris@16 662 if(position == last)
Chris@16 663 return false;
Chris@16 664 if(is_combining(traits_inst.translate(*position, icase)))
Chris@16 665 return false;
Chris@16 666 ++position;
Chris@16 667 while((position != last) && is_combining(traits_inst.translate(*position, icase)))
Chris@16 668 ++position;
Chris@16 669 pstate = pstate->next.p;
Chris@16 670 return true;
Chris@16 671 }
Chris@16 672
Chris@16 673 template <class BidiIterator, class Allocator, class traits>
Chris@16 674 bool perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end()
Chris@16 675 {
Chris@16 676 if(m_match_flags & match_not_eob)
Chris@16 677 return false;
Chris@16 678 BidiIterator p(position);
Chris@16 679 while((p != last) && is_separator(traits_inst.translate(*p, icase)))++p;
Chris@16 680 if(p != last)
Chris@16 681 return false;
Chris@16 682 pstate = pstate->next.p;
Chris@16 683 return true;
Chris@16 684 }
Chris@16 685
Chris@16 686 template <class BidiIterator, class Allocator, class traits>
Chris@16 687 bool perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue()
Chris@16 688 {
Chris@16 689 if(position == search_base)
Chris@16 690 {
Chris@16 691 pstate = pstate->next.p;
Chris@16 692 return true;
Chris@16 693 }
Chris@16 694 return false;
Chris@16 695 }
Chris@16 696
Chris@16 697 template <class BidiIterator, class Allocator, class traits>
Chris@16 698 bool perl_matcher<BidiIterator, Allocator, traits>::match_backstep()
Chris@16 699 {
Chris@16 700 #ifdef BOOST_MSVC
Chris@16 701 #pragma warning(push)
Chris@16 702 #pragma warning(disable:4127)
Chris@16 703 #endif
Chris@16 704 if( ::boost::is_random_access_iterator<BidiIterator>::value)
Chris@16 705 {
Chris@16 706 std::ptrdiff_t maxlen = ::boost::re_detail::distance(backstop, position);
Chris@16 707 if(maxlen < static_cast<const re_brace*>(pstate)->index)
Chris@16 708 return false;
Chris@16 709 std::advance(position, -static_cast<const re_brace*>(pstate)->index);
Chris@16 710 }
Chris@16 711 else
Chris@16 712 {
Chris@16 713 int c = static_cast<const re_brace*>(pstate)->index;
Chris@16 714 while(c--)
Chris@16 715 {
Chris@16 716 if(position == backstop)
Chris@16 717 return false;
Chris@16 718 --position;
Chris@16 719 }
Chris@16 720 }
Chris@16 721 pstate = pstate->next.p;
Chris@16 722 return true;
Chris@16 723 #ifdef BOOST_MSVC
Chris@16 724 #pragma warning(pop)
Chris@16 725 #endif
Chris@16 726 }
Chris@16 727
Chris@16 728 template <class BidiIterator, class Allocator, class traits>
Chris@16 729 inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref()
Chris@16 730 {
Chris@16 731 // return true if marked sub-expression N has been matched:
Chris@16 732 int index = static_cast<const re_brace*>(pstate)->index;
Chris@16 733 bool result = false;
Chris@16 734 if(index == 9999)
Chris@16 735 {
Chris@16 736 // Magic value for a (DEFINE) block:
Chris@16 737 return false;
Chris@16 738 }
Chris@16 739 else if(index > 0)
Chris@16 740 {
Chris@16 741 // Have we matched subexpression "index"?
Chris@16 742 // Check if index is a hash value:
Chris@16 743 if(index >= 10000)
Chris@16 744 {
Chris@16 745 named_subexpressions::range_type r = re.get_data().equal_range(index);
Chris@16 746 while(r.first != r.second)
Chris@16 747 {
Chris@16 748 if((*m_presult)[r.first->index].matched)
Chris@16 749 {
Chris@16 750 result = true;
Chris@16 751 break;
Chris@16 752 }
Chris@16 753 ++r.first;
Chris@16 754 }
Chris@16 755 }
Chris@16 756 else
Chris@16 757 {
Chris@16 758 result = (*m_presult)[index].matched;
Chris@16 759 }
Chris@16 760 pstate = pstate->next.p;
Chris@16 761 }
Chris@16 762 else
Chris@16 763 {
Chris@16 764 // Have we recursed into subexpression "index"?
Chris@16 765 // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1.
Chris@16 766 int idx = -index-1;
Chris@16 767 if(idx >= 10000)
Chris@16 768 {
Chris@16 769 named_subexpressions::range_type r = re.get_data().equal_range(idx);
Chris@16 770 int stack_index = recursion_stack.empty() ? -1 : recursion_stack.back().idx;
Chris@16 771 while(r.first != r.second)
Chris@16 772 {
Chris@16 773 result |= (stack_index == r.first->index);
Chris@16 774 if(result)break;
Chris@16 775 ++r.first;
Chris@16 776 }
Chris@16 777 }
Chris@16 778 else
Chris@16 779 {
Chris@16 780 result = !recursion_stack.empty() && ((recursion_stack.back().idx == idx) || (index == 0));
Chris@16 781 }
Chris@16 782 pstate = pstate->next.p;
Chris@16 783 }
Chris@16 784 return result;
Chris@16 785 }
Chris@16 786
Chris@16 787 template <class BidiIterator, class Allocator, class traits>
Chris@16 788 bool perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case()
Chris@16 789 {
Chris@16 790 // change our case sensitivity:
Chris@16 791 this->icase = static_cast<const re_case*>(pstate)->icase;
Chris@16 792 pstate = pstate->next.p;
Chris@16 793 return true;
Chris@16 794 }
Chris@16 795
Chris@16 796
Chris@16 797 template <class BidiIterator, class Allocator, class traits>
Chris@16 798 bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()
Chris@16 799 {
Chris@16 800 #ifdef BOOST_MSVC
Chris@16 801 #pragma warning(push)
Chris@16 802 #pragma warning(disable:4127)
Chris@16 803 #endif
Chris@16 804 const unsigned char* _map = re.get_map();
Chris@16 805 while(true)
Chris@16 806 {
Chris@16 807 // skip everything we can't match:
Chris@16 808 while((position != last) && !can_start(*position, _map, (unsigned char)mask_any) )
Chris@16 809 ++position;
Chris@16 810 if(position == last)
Chris@16 811 {
Chris@16 812 // run out of characters, try a null match if possible:
Chris@16 813 if(re.can_be_null())
Chris@16 814 return match_prefix();
Chris@16 815 break;
Chris@16 816 }
Chris@16 817 // now try and obtain a match:
Chris@16 818 if(match_prefix())
Chris@16 819 return true;
Chris@16 820 if(position == last)
Chris@16 821 return false;
Chris@16 822 ++position;
Chris@16 823 }
Chris@16 824 return false;
Chris@16 825 #ifdef BOOST_MSVC
Chris@16 826 #pragma warning(pop)
Chris@16 827 #endif
Chris@16 828 }
Chris@16 829
Chris@16 830 template <class BidiIterator, class Allocator, class traits>
Chris@16 831 bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_word()
Chris@16 832 {
Chris@16 833 #ifdef BOOST_MSVC
Chris@16 834 #pragma warning(push)
Chris@16 835 #pragma warning(disable:4127)
Chris@16 836 #endif
Chris@16 837 // do search optimised for word starts:
Chris@16 838 const unsigned char* _map = re.get_map();
Chris@16 839 if((m_match_flags & match_prev_avail) || (position != base))
Chris@16 840 --position;
Chris@16 841 else if(match_prefix())
Chris@16 842 return true;
Chris@16 843 do
Chris@16 844 {
Chris@16 845 while((position != last) && traits_inst.isctype(*position, m_word_mask))
Chris@16 846 ++position;
Chris@16 847 while((position != last) && !traits_inst.isctype(*position, m_word_mask))
Chris@16 848 ++position;
Chris@16 849 if(position == last)
Chris@16 850 break;
Chris@16 851
Chris@16 852 if(can_start(*position, _map, (unsigned char)mask_any) )
Chris@16 853 {
Chris@16 854 if(match_prefix())
Chris@16 855 return true;
Chris@16 856 }
Chris@16 857 if(position == last)
Chris@16 858 break;
Chris@16 859 } while(true);
Chris@16 860 return false;
Chris@16 861 #ifdef BOOST_MSVC
Chris@16 862 #pragma warning(pop)
Chris@16 863 #endif
Chris@16 864 }
Chris@16 865
Chris@16 866 template <class BidiIterator, class Allocator, class traits>
Chris@16 867 bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_line()
Chris@16 868 {
Chris@16 869 // do search optimised for line starts:
Chris@16 870 const unsigned char* _map = re.get_map();
Chris@16 871 if(match_prefix())
Chris@16 872 return true;
Chris@16 873 while(position != last)
Chris@16 874 {
Chris@16 875 while((position != last) && !is_separator(*position))
Chris@16 876 ++position;
Chris@16 877 if(position == last)
Chris@16 878 return false;
Chris@16 879 ++position;
Chris@16 880 if(position == last)
Chris@16 881 {
Chris@16 882 if(re.can_be_null() && match_prefix())
Chris@16 883 return true;
Chris@16 884 return false;
Chris@16 885 }
Chris@16 886
Chris@16 887 if( can_start(*position, _map, (unsigned char)mask_any) )
Chris@16 888 {
Chris@16 889 if(match_prefix())
Chris@16 890 return true;
Chris@16 891 }
Chris@16 892 if(position == last)
Chris@16 893 return false;
Chris@16 894 //++position;
Chris@16 895 }
Chris@16 896 return false;
Chris@16 897 }
Chris@16 898
Chris@16 899 template <class BidiIterator, class Allocator, class traits>
Chris@16 900 bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf()
Chris@16 901 {
Chris@16 902 if((position == base) && ((m_match_flags & match_not_bob) == 0))
Chris@16 903 return match_prefix();
Chris@16 904 return false;
Chris@16 905 }
Chris@16 906
Chris@16 907 template <class BidiIterator, class Allocator, class traits>
Chris@16 908 bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit()
Chris@16 909 {
Chris@16 910 #if 0
Chris@16 911 if(position == last)
Chris@16 912 return false; // can't possibly match if we're at the end already
Chris@16 913
Chris@16 914 unsigned type = (m_match_flags & match_continuous) ?
Chris@16 915 static_cast<unsigned int>(regbase::restart_continue)
Chris@16 916 : static_cast<unsigned int>(re.get_restart_type());
Chris@16 917
Chris@16 918 const kmp_info<char_type>* info = access::get_kmp(re);
Chris@16 919 int len = info->len;
Chris@16 920 const char_type* x = info->pstr;
Chris@16 921 int j = 0;
Chris@16 922 while (position != last)
Chris@16 923 {
Chris@16 924 while((j > -1) && (x[j] != traits_inst.translate(*position, icase)))
Chris@16 925 j = info->kmp_next[j];
Chris@16 926 ++position;
Chris@16 927 ++j;
Chris@16 928 if(j >= len)
Chris@16 929 {
Chris@16 930 if(type == regbase::restart_fixed_lit)
Chris@16 931 {
Chris@16 932 std::advance(position, -j);
Chris@16 933 restart = position;
Chris@16 934 std::advance(restart, len);
Chris@16 935 m_result.set_first(position);
Chris@16 936 m_result.set_second(restart);
Chris@16 937 position = restart;
Chris@16 938 return true;
Chris@16 939 }
Chris@16 940 else
Chris@16 941 {
Chris@16 942 restart = position;
Chris@16 943 std::advance(position, -j);
Chris@16 944 if(match_prefix())
Chris@16 945 return true;
Chris@16 946 else
Chris@16 947 {
Chris@16 948 for(int k = 0; (restart != position) && (k < j); ++k, --restart)
Chris@16 949 {} // dwa 10/20/2000 - warning suppression for MWCW
Chris@16 950 if(restart != last)
Chris@16 951 ++restart;
Chris@16 952 position = restart;
Chris@16 953 j = 0; //we could do better than this...
Chris@16 954 }
Chris@16 955 }
Chris@16 956 }
Chris@16 957 }
Chris@16 958 if((m_match_flags & match_partial) && (position == last) && j)
Chris@16 959 {
Chris@16 960 // we need to check for a partial match:
Chris@16 961 restart = position;
Chris@16 962 std::advance(position, -j);
Chris@16 963 return match_prefix();
Chris@16 964 }
Chris@16 965 #endif
Chris@16 966 return false;
Chris@16 967 }
Chris@16 968
Chris@16 969 } // namespace re_detail
Chris@16 970
Chris@16 971 } // namespace boost
Chris@16 972
Chris@16 973 #ifdef BOOST_MSVC
Chris@16 974 # pragma warning(pop)
Chris@16 975 #endif
Chris@16 976
Chris@16 977 #ifdef __BORLANDC__
Chris@16 978 # pragma option pop
Chris@16 979 #endif
Chris@16 980 #ifdef BOOST_MSVC
Chris@16 981 #pragma warning(push)
Chris@16 982 #pragma warning(disable: 4103)
Chris@16 983 #endif
Chris@16 984 #ifdef BOOST_HAS_ABI_HEADERS
Chris@16 985 # include BOOST_ABI_SUFFIX
Chris@16 986 #endif
Chris@16 987 #ifdef BOOST_MSVC
Chris@16 988 #pragma warning(pop)
Chris@16 989 #endif
Chris@16 990
Chris@16 991 #endif
Chris@16 992