annotate DEPENDENCIES/generic/include/boost/regex/v4/basic_regex_creator.hpp @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)
author Chris Cannam
date Tue, 30 Jul 2019 12:25:44 +0100
parents c530137014c0
children
rev   line source
Chris@16 1 /*
Chris@16 2 *
Chris@16 3 * Copyright (c) 2004
Chris@16 4 * John Maddock
Chris@16 5 *
Chris@16 6 * Use, modification and distribution are subject to the
Chris@16 7 * Boost Software License, Version 1.0. (See accompanying file
Chris@16 8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16 9 *
Chris@16 10 */
Chris@16 11
Chris@16 12 /*
Chris@16 13 * LOCATION: see http://www.boost.org for most recent version.
Chris@16 14 * FILE basic_regex_creator.cpp
Chris@16 15 * VERSION see <boost/version.hpp>
Chris@16 16 * DESCRIPTION: Declares template class basic_regex_creator which fills in
Chris@16 17 * the data members of a regex_data object.
Chris@16 18 */
Chris@16 19
Chris@16 20 #ifndef BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP
Chris@16 21 #define BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP
Chris@16 22
Chris@16 23 #ifdef BOOST_MSVC
Chris@16 24 #pragma warning(push)
Chris@16 25 #pragma warning(disable: 4103)
Chris@16 26 #endif
Chris@16 27 #ifdef BOOST_HAS_ABI_HEADERS
Chris@16 28 # include BOOST_ABI_PREFIX
Chris@16 29 #endif
Chris@16 30 #ifdef BOOST_MSVC
Chris@16 31 #pragma warning(pop)
Chris@16 32 #endif
Chris@16 33
Chris@16 34 #ifdef BOOST_MSVC
Chris@16 35 # pragma warning(push)
Chris@16 36 # pragma warning(disable: 4800)
Chris@16 37 #endif
Chris@16 38
Chris@16 39 namespace boost{
Chris@16 40
Chris@16 41 namespace re_detail{
Chris@16 42
Chris@16 43 template <class charT>
Chris@16 44 struct digraph : public std::pair<charT, charT>
Chris@16 45 {
Chris@16 46 digraph() : std::pair<charT, charT>(0, 0){}
Chris@16 47 digraph(charT c1) : std::pair<charT, charT>(c1, 0){}
Chris@16 48 digraph(charT c1, charT c2) : std::pair<charT, charT>(c1, c2)
Chris@16 49 {}
Chris@16 50 digraph(const digraph<charT>& d) : std::pair<charT, charT>(d.first, d.second){}
Chris@16 51 template <class Seq>
Chris@16 52 digraph(const Seq& s) : std::pair<charT, charT>()
Chris@16 53 {
Chris@16 54 BOOST_ASSERT(s.size() <= 2);
Chris@16 55 BOOST_ASSERT(s.size());
Chris@16 56 this->first = s[0];
Chris@16 57 this->second = (s.size() > 1) ? s[1] : 0;
Chris@16 58 }
Chris@16 59 };
Chris@16 60
Chris@16 61 template <class charT, class traits>
Chris@16 62 class basic_char_set
Chris@16 63 {
Chris@16 64 public:
Chris@16 65 typedef digraph<charT> digraph_type;
Chris@16 66 typedef typename traits::string_type string_type;
Chris@16 67 typedef typename traits::char_class_type m_type;
Chris@16 68
Chris@16 69 basic_char_set()
Chris@16 70 {
Chris@16 71 m_negate = false;
Chris@16 72 m_has_digraphs = false;
Chris@16 73 m_classes = 0;
Chris@16 74 m_negated_classes = 0;
Chris@16 75 m_empty = true;
Chris@16 76 }
Chris@16 77
Chris@16 78 void add_single(const digraph_type& s)
Chris@16 79 {
Chris@16 80 m_singles.insert(m_singles.end(), s);
Chris@16 81 if(s.second)
Chris@16 82 m_has_digraphs = true;
Chris@16 83 m_empty = false;
Chris@16 84 }
Chris@16 85 void add_range(const digraph_type& first, const digraph_type& end)
Chris@16 86 {
Chris@16 87 m_ranges.insert(m_ranges.end(), first);
Chris@16 88 m_ranges.insert(m_ranges.end(), end);
Chris@16 89 if(first.second)
Chris@16 90 {
Chris@16 91 m_has_digraphs = true;
Chris@16 92 add_single(first);
Chris@16 93 }
Chris@16 94 if(end.second)
Chris@16 95 {
Chris@16 96 m_has_digraphs = true;
Chris@16 97 add_single(end);
Chris@16 98 }
Chris@16 99 m_empty = false;
Chris@16 100 }
Chris@16 101 void add_class(m_type m)
Chris@16 102 {
Chris@16 103 m_classes |= m;
Chris@16 104 m_empty = false;
Chris@16 105 }
Chris@16 106 void add_negated_class(m_type m)
Chris@16 107 {
Chris@16 108 m_negated_classes |= m;
Chris@16 109 m_empty = false;
Chris@16 110 }
Chris@16 111 void add_equivalent(const digraph_type& s)
Chris@16 112 {
Chris@16 113 m_equivalents.insert(m_equivalents.end(), s);
Chris@16 114 if(s.second)
Chris@16 115 {
Chris@16 116 m_has_digraphs = true;
Chris@16 117 add_single(s);
Chris@16 118 }
Chris@16 119 m_empty = false;
Chris@16 120 }
Chris@16 121 void negate()
Chris@16 122 {
Chris@16 123 m_negate = true;
Chris@16 124 //m_empty = false;
Chris@16 125 }
Chris@16 126
Chris@16 127 //
Chris@16 128 // accessor functions:
Chris@16 129 //
Chris@16 130 bool has_digraphs()const
Chris@16 131 {
Chris@16 132 return m_has_digraphs;
Chris@16 133 }
Chris@16 134 bool is_negated()const
Chris@16 135 {
Chris@16 136 return m_negate;
Chris@16 137 }
Chris@16 138 typedef typename std::vector<digraph_type>::const_iterator list_iterator;
Chris@16 139 list_iterator singles_begin()const
Chris@16 140 {
Chris@16 141 return m_singles.begin();
Chris@16 142 }
Chris@16 143 list_iterator singles_end()const
Chris@16 144 {
Chris@16 145 return m_singles.end();
Chris@16 146 }
Chris@16 147 list_iterator ranges_begin()const
Chris@16 148 {
Chris@16 149 return m_ranges.begin();
Chris@16 150 }
Chris@16 151 list_iterator ranges_end()const
Chris@16 152 {
Chris@16 153 return m_ranges.end();
Chris@16 154 }
Chris@16 155 list_iterator equivalents_begin()const
Chris@16 156 {
Chris@16 157 return m_equivalents.begin();
Chris@16 158 }
Chris@16 159 list_iterator equivalents_end()const
Chris@16 160 {
Chris@16 161 return m_equivalents.end();
Chris@16 162 }
Chris@16 163 m_type classes()const
Chris@16 164 {
Chris@16 165 return m_classes;
Chris@16 166 }
Chris@16 167 m_type negated_classes()const
Chris@16 168 {
Chris@16 169 return m_negated_classes;
Chris@16 170 }
Chris@16 171 bool empty()const
Chris@16 172 {
Chris@16 173 return m_empty;
Chris@16 174 }
Chris@16 175 private:
Chris@16 176 std::vector<digraph_type> m_singles; // a list of single characters to match
Chris@16 177 std::vector<digraph_type> m_ranges; // a list of end points of our ranges
Chris@16 178 bool m_negate; // true if the set is to be negated
Chris@16 179 bool m_has_digraphs; // true if we have digraphs present
Chris@16 180 m_type m_classes; // character classes to match
Chris@16 181 m_type m_negated_classes; // negated character classes to match
Chris@16 182 bool m_empty; // whether we've added anything yet
Chris@16 183 std::vector<digraph_type> m_equivalents; // a list of equivalence classes
Chris@16 184 };
Chris@16 185
Chris@16 186 template <class charT, class traits>
Chris@16 187 class basic_regex_creator
Chris@16 188 {
Chris@16 189 public:
Chris@16 190 basic_regex_creator(regex_data<charT, traits>* data);
Chris@16 191 std::ptrdiff_t getoffset(void* addr)
Chris@16 192 {
Chris@16 193 return getoffset(addr, m_pdata->m_data.data());
Chris@16 194 }
Chris@16 195 std::ptrdiff_t getoffset(const void* addr, const void* base)
Chris@16 196 {
Chris@16 197 return static_cast<const char*>(addr) - static_cast<const char*>(base);
Chris@16 198 }
Chris@16 199 re_syntax_base* getaddress(std::ptrdiff_t off)
Chris@16 200 {
Chris@16 201 return getaddress(off, m_pdata->m_data.data());
Chris@16 202 }
Chris@16 203 re_syntax_base* getaddress(std::ptrdiff_t off, void* base)
Chris@16 204 {
Chris@16 205 return static_cast<re_syntax_base*>(static_cast<void*>(static_cast<char*>(base) + off));
Chris@16 206 }
Chris@16 207 void init(unsigned l_flags)
Chris@16 208 {
Chris@16 209 m_pdata->m_flags = l_flags;
Chris@16 210 m_icase = l_flags & regex_constants::icase;
Chris@16 211 }
Chris@16 212 regbase::flag_type flags()
Chris@16 213 {
Chris@16 214 return m_pdata->m_flags;
Chris@16 215 }
Chris@16 216 void flags(regbase::flag_type f)
Chris@16 217 {
Chris@16 218 m_pdata->m_flags = f;
Chris@16 219 if(m_icase != static_cast<bool>(f & regbase::icase))
Chris@16 220 {
Chris@16 221 m_icase = static_cast<bool>(f & regbase::icase);
Chris@16 222 }
Chris@16 223 }
Chris@16 224 re_syntax_base* append_state(syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
Chris@16 225 re_syntax_base* insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
Chris@16 226 re_literal* append_literal(charT c);
Chris@16 227 re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set);
Chris@16 228 re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::false_*);
Chris@16 229 re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::true_*);
Chris@16 230 void finalize(const charT* p1, const charT* p2);
Chris@16 231 protected:
Chris@16 232 regex_data<charT, traits>* m_pdata; // pointer to the basic_regex_data struct we are filling in
Chris@16 233 const ::boost::regex_traits_wrapper<traits>&
Chris@16 234 m_traits; // convenience reference to traits class
Chris@16 235 re_syntax_base* m_last_state; // the last state we added
Chris@16 236 bool m_icase; // true for case insensitive matches
Chris@16 237 unsigned m_repeater_id; // the state_id of the next repeater
Chris@16 238 bool m_has_backrefs; // true if there are actually any backrefs
Chris@16 239 unsigned m_backrefs; // bitmask of permitted backrefs
Chris@16 240 boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
Chris@16 241 bool m_has_recursions; // set when we have recursive expresisons to fixup
Chris@16 242 std::vector<bool> m_recursion_checks; // notes which recursions we've followed while analysing this expression
Chris@16 243 typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
Chris@16 244 typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
Chris@16 245 typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character
Chris@16 246 typename traits::char_class_type m_upper_mask; // mask used to determine if a character is an uppercase character
Chris@16 247 typename traits::char_class_type m_alpha_mask; // mask used to determine if a character is an alphabetic character
Chris@16 248 private:
Chris@16 249 basic_regex_creator& operator=(const basic_regex_creator&);
Chris@16 250 basic_regex_creator(const basic_regex_creator&);
Chris@16 251
Chris@16 252 void fixup_pointers(re_syntax_base* state);
Chris@16 253 void fixup_recursions(re_syntax_base* state);
Chris@16 254 void create_startmaps(re_syntax_base* state);
Chris@16 255 int calculate_backstep(re_syntax_base* state);
Chris@16 256 void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask);
Chris@16 257 unsigned get_restart_type(re_syntax_base* state);
Chris@16 258 void set_all_masks(unsigned char* bits, unsigned char);
Chris@16 259 bool is_bad_repeat(re_syntax_base* pt);
Chris@16 260 void set_bad_repeat(re_syntax_base* pt);
Chris@16 261 syntax_element_type get_repeat_type(re_syntax_base* state);
Chris@16 262 void probe_leading_repeat(re_syntax_base* state);
Chris@16 263 };
Chris@16 264
Chris@16 265 template <class charT, class traits>
Chris@16 266 basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data)
Chris@16 267 : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0), m_has_recursions(false)
Chris@16 268 {
Chris@16 269 m_pdata->m_data.clear();
Chris@16 270 m_pdata->m_status = ::boost::regex_constants::error_ok;
Chris@16 271 static const charT w = 'w';
Chris@16 272 static const charT s = 's';
Chris@16 273 static const charT l[5] = { 'l', 'o', 'w', 'e', 'r', };
Chris@16 274 static const charT u[5] = { 'u', 'p', 'p', 'e', 'r', };
Chris@16 275 static const charT a[5] = { 'a', 'l', 'p', 'h', 'a', };
Chris@16 276 m_word_mask = m_traits.lookup_classname(&w, &w +1);
Chris@16 277 m_mask_space = m_traits.lookup_classname(&s, &s +1);
Chris@16 278 m_lower_mask = m_traits.lookup_classname(l, l + 5);
Chris@16 279 m_upper_mask = m_traits.lookup_classname(u, u + 5);
Chris@16 280 m_alpha_mask = m_traits.lookup_classname(a, a + 5);
Chris@16 281 m_pdata->m_word_mask = m_word_mask;
Chris@16 282 BOOST_ASSERT(m_word_mask != 0);
Chris@16 283 BOOST_ASSERT(m_mask_space != 0);
Chris@16 284 BOOST_ASSERT(m_lower_mask != 0);
Chris@16 285 BOOST_ASSERT(m_upper_mask != 0);
Chris@16 286 BOOST_ASSERT(m_alpha_mask != 0);
Chris@16 287 }
Chris@16 288
Chris@16 289 template <class charT, class traits>
Chris@16 290 re_syntax_base* basic_regex_creator<charT, traits>::append_state(syntax_element_type t, std::size_t s)
Chris@16 291 {
Chris@16 292 // if the state is a backref then make a note of it:
Chris@16 293 if(t == syntax_element_backref)
Chris@16 294 this->m_has_backrefs = true;
Chris@16 295 // append a new state, start by aligning our last one:
Chris@16 296 m_pdata->m_data.align();
Chris@16 297 // set the offset to the next state in our last one:
Chris@16 298 if(m_last_state)
Chris@16 299 m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state);
Chris@16 300 // now actually extent our data:
Chris@16 301 m_last_state = static_cast<re_syntax_base*>(m_pdata->m_data.extend(s));
Chris@16 302 // fill in boilerplate options in the new state:
Chris@16 303 m_last_state->next.i = 0;
Chris@16 304 m_last_state->type = t;
Chris@16 305 return m_last_state;
Chris@16 306 }
Chris@16 307
Chris@16 308 template <class charT, class traits>
Chris@16 309 re_syntax_base* basic_regex_creator<charT, traits>::insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s)
Chris@16 310 {
Chris@16 311 // append a new state, start by aligning our last one:
Chris@16 312 m_pdata->m_data.align();
Chris@16 313 // set the offset to the next state in our last one:
Chris@16 314 if(m_last_state)
Chris@16 315 m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state);
Chris@16 316 // remember the last state position:
Chris@16 317 std::ptrdiff_t off = getoffset(m_last_state) + s;
Chris@16 318 // now actually insert our data:
Chris@16 319 re_syntax_base* new_state = static_cast<re_syntax_base*>(m_pdata->m_data.insert(pos, s));
Chris@16 320 // fill in boilerplate options in the new state:
Chris@16 321 new_state->next.i = s;
Chris@16 322 new_state->type = t;
Chris@16 323 m_last_state = getaddress(off);
Chris@16 324 return new_state;
Chris@16 325 }
Chris@16 326
Chris@16 327 template <class charT, class traits>
Chris@16 328 re_literal* basic_regex_creator<charT, traits>::append_literal(charT c)
Chris@16 329 {
Chris@16 330 re_literal* result;
Chris@16 331 // start by seeing if we have an existing re_literal we can extend:
Chris@16 332 if((0 == m_last_state) || (m_last_state->type != syntax_element_literal))
Chris@16 333 {
Chris@16 334 // no existing re_literal, create a new one:
Chris@16 335 result = static_cast<re_literal*>(append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
Chris@16 336 result->length = 1;
Chris@16 337 *static_cast<charT*>(static_cast<void*>(result+1)) = m_traits.translate(c, m_icase);
Chris@16 338 }
Chris@16 339 else
Chris@16 340 {
Chris@16 341 // we have an existing re_literal, extend it:
Chris@16 342 std::ptrdiff_t off = getoffset(m_last_state);
Chris@16 343 m_pdata->m_data.extend(sizeof(charT));
Chris@16 344 m_last_state = result = static_cast<re_literal*>(getaddress(off));
Chris@16 345 charT* characters = static_cast<charT*>(static_cast<void*>(result+1));
Chris@16 346 characters[result->length] = m_traits.translate(c, m_icase);
Chris@101 347 result->length += 1;
Chris@16 348 }
Chris@16 349 return result;
Chris@16 350 }
Chris@16 351
Chris@16 352 template <class charT, class traits>
Chris@16 353 inline re_syntax_base* basic_regex_creator<charT, traits>::append_set(
Chris@16 354 const basic_char_set<charT, traits>& char_set)
Chris@16 355 {
Chris@16 356 typedef mpl::bool_< (sizeof(charT) == 1) > truth_type;
Chris@16 357 return char_set.has_digraphs()
Chris@16 358 ? append_set(char_set, static_cast<mpl::false_*>(0))
Chris@16 359 : append_set(char_set, static_cast<truth_type*>(0));
Chris@16 360 }
Chris@16 361
Chris@16 362 template <class charT, class traits>
Chris@16 363 re_syntax_base* basic_regex_creator<charT, traits>::append_set(
Chris@16 364 const basic_char_set<charT, traits>& char_set, mpl::false_*)
Chris@16 365 {
Chris@16 366 typedef typename traits::string_type string_type;
Chris@16 367 typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
Chris@16 368 typedef typename traits::char_class_type m_type;
Chris@16 369
Chris@16 370 re_set_long<m_type>* result = static_cast<re_set_long<m_type>*>(append_state(syntax_element_long_set, sizeof(re_set_long<m_type>)));
Chris@16 371 //
Chris@16 372 // fill in the basics:
Chris@16 373 //
Chris@16 374 result->csingles = static_cast<unsigned int>(::boost::re_detail::distance(char_set.singles_begin(), char_set.singles_end()));
Chris@16 375 result->cranges = static_cast<unsigned int>(::boost::re_detail::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
Chris@16 376 result->cequivalents = static_cast<unsigned int>(::boost::re_detail::distance(char_set.equivalents_begin(), char_set.equivalents_end()));
Chris@16 377 result->cclasses = char_set.classes();
Chris@16 378 result->cnclasses = char_set.negated_classes();
Chris@16 379 if(flags() & regbase::icase)
Chris@16 380 {
Chris@16 381 // adjust classes as needed:
Chris@16 382 if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask))
Chris@16 383 result->cclasses |= m_alpha_mask;
Chris@16 384 if(((result->cnclasses & m_lower_mask) == m_lower_mask) || ((result->cnclasses & m_upper_mask) == m_upper_mask))
Chris@16 385 result->cnclasses |= m_alpha_mask;
Chris@16 386 }
Chris@16 387
Chris@16 388 result->isnot = char_set.is_negated();
Chris@16 389 result->singleton = !char_set.has_digraphs();
Chris@16 390 //
Chris@16 391 // remember where the state is for later:
Chris@16 392 //
Chris@16 393 std::ptrdiff_t offset = getoffset(result);
Chris@16 394 //
Chris@16 395 // now extend with all the singles:
Chris@16 396 //
Chris@16 397 item_iterator first, last;
Chris@16 398 first = char_set.singles_begin();
Chris@16 399 last = char_set.singles_end();
Chris@16 400 while(first != last)
Chris@16 401 {
Chris@16 402 charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (first->second ? 3 : 2)));
Chris@16 403 p[0] = m_traits.translate(first->first, m_icase);
Chris@16 404 if(first->second)
Chris@16 405 {
Chris@16 406 p[1] = m_traits.translate(first->second, m_icase);
Chris@16 407 p[2] = 0;
Chris@16 408 }
Chris@16 409 else
Chris@16 410 p[1] = 0;
Chris@16 411 ++first;
Chris@16 412 }
Chris@16 413 //
Chris@16 414 // now extend with all the ranges:
Chris@16 415 //
Chris@16 416 first = char_set.ranges_begin();
Chris@16 417 last = char_set.ranges_end();
Chris@16 418 while(first != last)
Chris@16 419 {
Chris@16 420 // first grab the endpoints of the range:
Chris@16 421 digraph<charT> c1 = *first;
Chris@16 422 c1.first = this->m_traits.translate(c1.first, this->m_icase);
Chris@16 423 c1.second = this->m_traits.translate(c1.second, this->m_icase);
Chris@16 424 ++first;
Chris@16 425 digraph<charT> c2 = *first;
Chris@16 426 c2.first = this->m_traits.translate(c2.first, this->m_icase);
Chris@16 427 c2.second = this->m_traits.translate(c2.second, this->m_icase);
Chris@16 428 ++first;
Chris@16 429 string_type s1, s2;
Chris@16 430 // different actions now depending upon whether collation is turned on:
Chris@16 431 if(flags() & regex_constants::collate)
Chris@16 432 {
Chris@16 433 // we need to transform our range into sort keys:
Chris@16 434 charT a1[3] = { c1.first, c1.second, charT(0), };
Chris@16 435 charT a2[3] = { c2.first, c2.second, charT(0), };
Chris@16 436 s1 = this->m_traits.transform(a1, (a1[1] ? a1+2 : a1+1));
Chris@16 437 s2 = this->m_traits.transform(a2, (a2[1] ? a2+2 : a2+1));
Chris@16 438 if(s1.size() == 0)
Chris@16 439 s1 = string_type(1, charT(0));
Chris@16 440 if(s2.size() == 0)
Chris@16 441 s2 = string_type(1, charT(0));
Chris@16 442 }
Chris@16 443 else
Chris@16 444 {
Chris@16 445 if(c1.second)
Chris@16 446 {
Chris@16 447 s1.insert(s1.end(), c1.first);
Chris@16 448 s1.insert(s1.end(), c1.second);
Chris@16 449 }
Chris@16 450 else
Chris@16 451 s1 = string_type(1, c1.first);
Chris@16 452 if(c2.second)
Chris@16 453 {
Chris@16 454 s2.insert(s2.end(), c2.first);
Chris@16 455 s2.insert(s2.end(), c2.second);
Chris@16 456 }
Chris@16 457 else
Chris@16 458 s2.insert(s2.end(), c2.first);
Chris@16 459 }
Chris@16 460 if(s1 > s2)
Chris@16 461 {
Chris@16 462 // Oops error:
Chris@16 463 return 0;
Chris@16 464 }
Chris@16 465 charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) );
Chris@16 466 re_detail::copy(s1.begin(), s1.end(), p);
Chris@16 467 p[s1.size()] = charT(0);
Chris@16 468 p += s1.size() + 1;
Chris@16 469 re_detail::copy(s2.begin(), s2.end(), p);
Chris@16 470 p[s2.size()] = charT(0);
Chris@16 471 }
Chris@16 472 //
Chris@16 473 // now process the equivalence classes:
Chris@16 474 //
Chris@16 475 first = char_set.equivalents_begin();
Chris@16 476 last = char_set.equivalents_end();
Chris@16 477 while(first != last)
Chris@16 478 {
Chris@16 479 string_type s;
Chris@16 480 if(first->second)
Chris@16 481 {
Chris@16 482 charT cs[3] = { first->first, first->second, charT(0), };
Chris@16 483 s = m_traits.transform_primary(cs, cs+2);
Chris@16 484 }
Chris@16 485 else
Chris@16 486 s = m_traits.transform_primary(&first->first, &first->first+1);
Chris@16 487 if(s.empty())
Chris@16 488 return 0; // invalid or unsupported equivalence class
Chris@16 489 charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) );
Chris@16 490 re_detail::copy(s.begin(), s.end(), p);
Chris@16 491 p[s.size()] = charT(0);
Chris@16 492 ++first;
Chris@16 493 }
Chris@16 494 //
Chris@16 495 // finally reset the address of our last state:
Chris@16 496 //
Chris@16 497 m_last_state = result = static_cast<re_set_long<m_type>*>(getaddress(offset));
Chris@16 498 return result;
Chris@16 499 }
Chris@16 500
Chris@16 501 template<class T>
Chris@16 502 inline bool char_less(T t1, T t2)
Chris@16 503 {
Chris@16 504 return t1 < t2;
Chris@16 505 }
Chris@16 506 inline bool char_less(char t1, char t2)
Chris@16 507 {
Chris@16 508 return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
Chris@16 509 }
Chris@16 510 inline bool char_less(signed char t1, signed char t2)
Chris@16 511 {
Chris@16 512 return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
Chris@16 513 }
Chris@16 514
Chris@16 515 template <class charT, class traits>
Chris@16 516 re_syntax_base* basic_regex_creator<charT, traits>::append_set(
Chris@16 517 const basic_char_set<charT, traits>& char_set, mpl::true_*)
Chris@16 518 {
Chris@16 519 typedef typename traits::string_type string_type;
Chris@16 520 typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
Chris@16 521
Chris@16 522 re_set* result = static_cast<re_set*>(append_state(syntax_element_set, sizeof(re_set)));
Chris@16 523 bool negate = char_set.is_negated();
Chris@16 524 std::memset(result->_map, 0, sizeof(result->_map));
Chris@16 525 //
Chris@16 526 // handle singles first:
Chris@16 527 //
Chris@16 528 item_iterator first, last;
Chris@16 529 first = char_set.singles_begin();
Chris@16 530 last = char_set.singles_end();
Chris@16 531 while(first != last)
Chris@16 532 {
Chris@16 533 for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i)
Chris@16 534 {
Chris@16 535 if(this->m_traits.translate(static_cast<charT>(i), this->m_icase)
Chris@16 536 == this->m_traits.translate(first->first, this->m_icase))
Chris@16 537 result->_map[i] = true;
Chris@16 538 }
Chris@16 539 ++first;
Chris@16 540 }
Chris@16 541 //
Chris@16 542 // OK now handle ranges:
Chris@16 543 //
Chris@16 544 first = char_set.ranges_begin();
Chris@16 545 last = char_set.ranges_end();
Chris@16 546 while(first != last)
Chris@16 547 {
Chris@16 548 // first grab the endpoints of the range:
Chris@16 549 charT c1 = this->m_traits.translate(first->first, this->m_icase);
Chris@16 550 ++first;
Chris@16 551 charT c2 = this->m_traits.translate(first->first, this->m_icase);
Chris@16 552 ++first;
Chris@16 553 // different actions now depending upon whether collation is turned on:
Chris@16 554 if(flags() & regex_constants::collate)
Chris@16 555 {
Chris@16 556 // we need to transform our range into sort keys:
Chris@16 557 charT c3[2] = { c1, charT(0), };
Chris@16 558 string_type s1 = this->m_traits.transform(c3, c3+1);
Chris@16 559 c3[0] = c2;
Chris@16 560 string_type s2 = this->m_traits.transform(c3, c3+1);
Chris@16 561 if(s1 > s2)
Chris@16 562 {
Chris@16 563 // Oops error:
Chris@16 564 return 0;
Chris@16 565 }
Chris@16 566 BOOST_ASSERT(c3[1] == charT(0));
Chris@16 567 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
Chris@16 568 {
Chris@16 569 c3[0] = static_cast<charT>(i);
Chris@16 570 string_type s3 = this->m_traits.transform(c3, c3 +1);
Chris@16 571 if((s1 <= s3) && (s3 <= s2))
Chris@16 572 result->_map[i] = true;
Chris@16 573 }
Chris@16 574 }
Chris@16 575 else
Chris@16 576 {
Chris@16 577 if(char_less(c2, c1))
Chris@16 578 {
Chris@16 579 // Oops error:
Chris@16 580 return 0;
Chris@16 581 }
Chris@16 582 // everything in range matches:
Chris@16 583 std::memset(result->_map + static_cast<unsigned char>(c1), true, 1 + static_cast<unsigned char>(c2) - static_cast<unsigned char>(c1));
Chris@16 584 }
Chris@16 585 }
Chris@16 586 //
Chris@16 587 // and now the classes:
Chris@16 588 //
Chris@16 589 typedef typename traits::char_class_type m_type;
Chris@16 590 m_type m = char_set.classes();
Chris@16 591 if(flags() & regbase::icase)
Chris@16 592 {
Chris@16 593 // adjust m as needed:
Chris@16 594 if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
Chris@16 595 m |= m_alpha_mask;
Chris@16 596 }
Chris@16 597 if(m != 0)
Chris@16 598 {
Chris@16 599 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
Chris@16 600 {
Chris@16 601 if(this->m_traits.isctype(static_cast<charT>(i), m))
Chris@16 602 result->_map[i] = true;
Chris@16 603 }
Chris@16 604 }
Chris@16 605 //
Chris@16 606 // and now the negated classes:
Chris@16 607 //
Chris@16 608 m = char_set.negated_classes();
Chris@16 609 if(flags() & regbase::icase)
Chris@16 610 {
Chris@16 611 // adjust m as needed:
Chris@16 612 if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
Chris@16 613 m |= m_alpha_mask;
Chris@16 614 }
Chris@16 615 if(m != 0)
Chris@16 616 {
Chris@16 617 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
Chris@16 618 {
Chris@16 619 if(0 == this->m_traits.isctype(static_cast<charT>(i), m))
Chris@16 620 result->_map[i] = true;
Chris@16 621 }
Chris@16 622 }
Chris@16 623 //
Chris@16 624 // now process the equivalence classes:
Chris@16 625 //
Chris@16 626 first = char_set.equivalents_begin();
Chris@16 627 last = char_set.equivalents_end();
Chris@16 628 while(first != last)
Chris@16 629 {
Chris@16 630 string_type s;
Chris@16 631 BOOST_ASSERT(static_cast<charT>(0) == first->second);
Chris@16 632 s = m_traits.transform_primary(&first->first, &first->first+1);
Chris@16 633 if(s.empty())
Chris@16 634 return 0; // invalid or unsupported equivalence class
Chris@16 635 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
Chris@16 636 {
Chris@16 637 charT c[2] = { (static_cast<charT>(i)), charT(0), };
Chris@16 638 string_type s2 = this->m_traits.transform_primary(c, c+1);
Chris@16 639 if(s == s2)
Chris@16 640 result->_map[i] = true;
Chris@16 641 }
Chris@16 642 ++first;
Chris@16 643 }
Chris@16 644 if(negate)
Chris@16 645 {
Chris@16 646 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
Chris@16 647 {
Chris@16 648 result->_map[i] = !(result->_map[i]);
Chris@16 649 }
Chris@16 650 }
Chris@16 651 return result;
Chris@16 652 }
Chris@16 653
Chris@16 654 template <class charT, class traits>
Chris@16 655 void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
Chris@16 656 {
Chris@16 657 if(this->m_pdata->m_status)
Chris@16 658 return;
Chris@16 659 // we've added all the states we need, now finish things off.
Chris@16 660 // start by adding a terminating state:
Chris@16 661 append_state(syntax_element_match);
Chris@16 662 // extend storage to store original expression:
Chris@16 663 std::ptrdiff_t len = p2 - p1;
Chris@16 664 m_pdata->m_expression_len = len;
Chris@16 665 charT* ps = static_cast<charT*>(m_pdata->m_data.extend(sizeof(charT) * (1 + (p2 - p1))));
Chris@16 666 m_pdata->m_expression = ps;
Chris@16 667 re_detail::copy(p1, p2, ps);
Chris@16 668 ps[p2 - p1] = 0;
Chris@16 669 // fill in our other data...
Chris@16 670 // successful parsing implies a zero status:
Chris@16 671 m_pdata->m_status = 0;
Chris@16 672 // get the first state of the machine:
Chris@16 673 m_pdata->m_first_state = static_cast<re_syntax_base*>(m_pdata->m_data.data());
Chris@16 674 // fixup pointers in the machine:
Chris@16 675 fixup_pointers(m_pdata->m_first_state);
Chris@16 676 if(m_has_recursions)
Chris@16 677 {
Chris@16 678 m_pdata->m_has_recursions = true;
Chris@16 679 fixup_recursions(m_pdata->m_first_state);
Chris@16 680 if(this->m_pdata->m_status)
Chris@16 681 return;
Chris@16 682 }
Chris@16 683 else
Chris@16 684 m_pdata->m_has_recursions = false;
Chris@16 685 // create nested startmaps:
Chris@16 686 create_startmaps(m_pdata->m_first_state);
Chris@16 687 // create main startmap:
Chris@16 688 std::memset(m_pdata->m_startmap, 0, sizeof(m_pdata->m_startmap));
Chris@16 689 m_pdata->m_can_be_null = 0;
Chris@16 690
Chris@16 691 m_bad_repeats = 0;
Chris@16 692 if(m_has_recursions)
Chris@16 693 m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
Chris@16 694 create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all);
Chris@16 695 // get the restart type:
Chris@16 696 m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state);
Chris@16 697 // optimise a leading repeat if there is one:
Chris@16 698 probe_leading_repeat(m_pdata->m_first_state);
Chris@16 699 }
Chris@16 700
Chris@16 701 template <class charT, class traits>
Chris@16 702 void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
Chris@16 703 {
Chris@16 704 while(state)
Chris@16 705 {
Chris@16 706 switch(state->type)
Chris@16 707 {
Chris@16 708 case syntax_element_recurse:
Chris@16 709 m_has_recursions = true;
Chris@16 710 if(state->next.i)
Chris@16 711 state->next.p = getaddress(state->next.i, state);
Chris@16 712 else
Chris@16 713 state->next.p = 0;
Chris@16 714 break;
Chris@16 715 case syntax_element_rep:
Chris@16 716 case syntax_element_dot_rep:
Chris@16 717 case syntax_element_char_rep:
Chris@16 718 case syntax_element_short_set_rep:
Chris@16 719 case syntax_element_long_set_rep:
Chris@16 720 // set the state_id of this repeat:
Chris@16 721 static_cast<re_repeat*>(state)->state_id = m_repeater_id++;
Chris@16 722 BOOST_FALLTHROUGH;
Chris@16 723 case syntax_element_alt:
Chris@16 724 std::memset(static_cast<re_alt*>(state)->_map, 0, sizeof(static_cast<re_alt*>(state)->_map));
Chris@16 725 static_cast<re_alt*>(state)->can_be_null = 0;
Chris@16 726 BOOST_FALLTHROUGH;
Chris@16 727 case syntax_element_jump:
Chris@16 728 static_cast<re_jump*>(state)->alt.p = getaddress(static_cast<re_jump*>(state)->alt.i, state);
Chris@16 729 BOOST_FALLTHROUGH;
Chris@16 730 default:
Chris@16 731 if(state->next.i)
Chris@16 732 state->next.p = getaddress(state->next.i, state);
Chris@16 733 else
Chris@16 734 state->next.p = 0;
Chris@16 735 }
Chris@16 736 state = state->next.p;
Chris@16 737 }
Chris@16 738 }
Chris@16 739
Chris@16 740 template <class charT, class traits>
Chris@16 741 void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
Chris@16 742 {
Chris@16 743 re_syntax_base* base = state;
Chris@16 744 while(state)
Chris@16 745 {
Chris@16 746 switch(state->type)
Chris@16 747 {
Chris@16 748 case syntax_element_assert_backref:
Chris@16 749 {
Chris@16 750 // just check that the index is valid:
Chris@16 751 int idx = static_cast<const re_brace*>(state)->index;
Chris@16 752 if(idx < 0)
Chris@16 753 {
Chris@16 754 idx = -idx-1;
Chris@16 755 if(idx >= 10000)
Chris@16 756 {
Chris@16 757 idx = m_pdata->get_id(idx);
Chris@16 758 if(idx <= 0)
Chris@16 759 {
Chris@16 760 // check of sub-expression that doesn't exist:
Chris@16 761 if(0 == this->m_pdata->m_status) // update the error code if not already set
Chris@16 762 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
Chris@16 763 //
Chris@16 764 // clear the expression, we should be empty:
Chris@16 765 //
Chris@16 766 this->m_pdata->m_expression = 0;
Chris@16 767 this->m_pdata->m_expression_len = 0;
Chris@16 768 //
Chris@16 769 // and throw if required:
Chris@16 770 //
Chris@16 771 if(0 == (this->flags() & regex_constants::no_except))
Chris@16 772 {
Chris@16 773 std::string message = "Encountered a forward reference to a marked sub-expression that does not exist.";
Chris@16 774 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
Chris@16 775 e.raise();
Chris@16 776 }
Chris@16 777 }
Chris@16 778 }
Chris@16 779 }
Chris@16 780 }
Chris@16 781 break;
Chris@16 782 case syntax_element_recurse:
Chris@16 783 {
Chris@16 784 bool ok = false;
Chris@16 785 re_syntax_base* p = base;
Chris@16 786 std::ptrdiff_t idx = static_cast<re_jump*>(state)->alt.i;
Chris@16 787 if(idx > 10000)
Chris@16 788 {
Chris@16 789 //
Chris@16 790 // There may be more than one capture group with this hash, just do what Perl
Chris@16 791 // does and recurse to the leftmost:
Chris@16 792 //
Chris@16 793 idx = m_pdata->get_id(static_cast<int>(idx));
Chris@16 794 }
Chris@16 795 while(p)
Chris@16 796 {
Chris@16 797 if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == idx))
Chris@16 798 {
Chris@16 799 //
Chris@16 800 // We've found the target of the recursion, set the jump target:
Chris@16 801 //
Chris@16 802 static_cast<re_jump*>(state)->alt.p = p;
Chris@16 803 ok = true;
Chris@16 804 //
Chris@16 805 // Now scan the target for nested repeats:
Chris@16 806 //
Chris@16 807 p = p->next.p;
Chris@16 808 int next_rep_id = 0;
Chris@16 809 while(p)
Chris@16 810 {
Chris@16 811 switch(p->type)
Chris@16 812 {
Chris@16 813 case syntax_element_rep:
Chris@16 814 case syntax_element_dot_rep:
Chris@16 815 case syntax_element_char_rep:
Chris@16 816 case syntax_element_short_set_rep:
Chris@16 817 case syntax_element_long_set_rep:
Chris@16 818 next_rep_id = static_cast<re_repeat*>(p)->state_id;
Chris@16 819 break;
Chris@16 820 case syntax_element_endmark:
Chris@16 821 if(static_cast<const re_brace*>(p)->index == idx)
Chris@16 822 next_rep_id = -1;
Chris@16 823 break;
Chris@16 824 default:
Chris@16 825 break;
Chris@16 826 }
Chris@16 827 if(next_rep_id)
Chris@16 828 break;
Chris@16 829 p = p->next.p;
Chris@16 830 }
Chris@16 831 if(next_rep_id > 0)
Chris@16 832 {
Chris@16 833 static_cast<re_recurse*>(state)->state_id = next_rep_id - 1;
Chris@16 834 }
Chris@16 835
Chris@16 836 break;
Chris@16 837 }
Chris@16 838 p = p->next.p;
Chris@16 839 }
Chris@16 840 if(!ok)
Chris@16 841 {
Chris@16 842 // recursion to sub-expression that doesn't exist:
Chris@16 843 if(0 == this->m_pdata->m_status) // update the error code if not already set
Chris@16 844 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
Chris@16 845 //
Chris@16 846 // clear the expression, we should be empty:
Chris@16 847 //
Chris@16 848 this->m_pdata->m_expression = 0;
Chris@16 849 this->m_pdata->m_expression_len = 0;
Chris@16 850 //
Chris@16 851 // and throw if required:
Chris@16 852 //
Chris@16 853 if(0 == (this->flags() & regex_constants::no_except))
Chris@16 854 {
Chris@16 855 std::string message = "Encountered a forward reference to a recursive sub-expression that does not exist.";
Chris@16 856 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
Chris@16 857 e.raise();
Chris@16 858 }
Chris@16 859 }
Chris@16 860 }
Chris@16 861 break;
Chris@16 862 default:
Chris@16 863 break;
Chris@16 864 }
Chris@16 865 state = state->next.p;
Chris@16 866 }
Chris@16 867 }
Chris@16 868
Chris@16 869 template <class charT, class traits>
Chris@16 870 void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
Chris@16 871 {
Chris@16 872 // non-recursive implementation:
Chris@16 873 // create the last map in the machine first, so that earlier maps
Chris@16 874 // can make use of the result...
Chris@16 875 //
Chris@16 876 // This was originally a recursive implementation, but that caused stack
Chris@16 877 // overflows with complex expressions on small stacks (think COM+).
Chris@16 878
Chris@16 879 // start by saving the case setting:
Chris@16 880 bool l_icase = m_icase;
Chris@16 881 std::vector<std::pair<bool, re_syntax_base*> > v;
Chris@16 882
Chris@16 883 while(state)
Chris@16 884 {
Chris@16 885 switch(state->type)
Chris@16 886 {
Chris@16 887 case syntax_element_toggle_case:
Chris@16 888 // we need to track case changes here:
Chris@16 889 m_icase = static_cast<re_case*>(state)->icase;
Chris@16 890 state = state->next.p;
Chris@16 891 continue;
Chris@16 892 case syntax_element_alt:
Chris@16 893 case syntax_element_rep:
Chris@16 894 case syntax_element_dot_rep:
Chris@16 895 case syntax_element_char_rep:
Chris@16 896 case syntax_element_short_set_rep:
Chris@16 897 case syntax_element_long_set_rep:
Chris@16 898 // just push the state onto our stack for now:
Chris@16 899 v.push_back(std::pair<bool, re_syntax_base*>(m_icase, state));
Chris@16 900 state = state->next.p;
Chris@16 901 break;
Chris@16 902 case syntax_element_backstep:
Chris@16 903 // we need to calculate how big the backstep is:
Chris@16 904 static_cast<re_brace*>(state)->index
Chris@16 905 = this->calculate_backstep(state->next.p);
Chris@16 906 if(static_cast<re_brace*>(state)->index < 0)
Chris@16 907 {
Chris@16 908 // Oops error:
Chris@16 909 if(0 == this->m_pdata->m_status) // update the error code if not already set
Chris@16 910 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
Chris@16 911 //
Chris@16 912 // clear the expression, we should be empty:
Chris@16 913 //
Chris@16 914 this->m_pdata->m_expression = 0;
Chris@16 915 this->m_pdata->m_expression_len = 0;
Chris@16 916 //
Chris@16 917 // and throw if required:
Chris@16 918 //
Chris@16 919 if(0 == (this->flags() & regex_constants::no_except))
Chris@16 920 {
Chris@16 921 std::string message = "Invalid lookbehind assertion encountered in the regular expression.";
Chris@16 922 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
Chris@16 923 e.raise();
Chris@16 924 }
Chris@16 925 }
Chris@16 926 BOOST_FALLTHROUGH;
Chris@16 927 default:
Chris@16 928 state = state->next.p;
Chris@16 929 }
Chris@16 930 }
Chris@16 931
Chris@16 932 // now work through our list, building all the maps as we go:
Chris@16 933 while(v.size())
Chris@16 934 {
Chris@16 935 // Initialize m_recursion_checks if we need it:
Chris@16 936 if(m_has_recursions)
Chris@16 937 m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
Chris@16 938
Chris@16 939 const std::pair<bool, re_syntax_base*>& p = v.back();
Chris@16 940 m_icase = p.first;
Chris@16 941 state = p.second;
Chris@16 942 v.pop_back();
Chris@16 943
Chris@16 944 // Build maps:
Chris@16 945 m_bad_repeats = 0;
Chris@16 946 create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take);
Chris@16 947 m_bad_repeats = 0;
Chris@16 948
Chris@16 949 if(m_has_recursions)
Chris@16 950 m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
Chris@16 951 create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
Chris@16 952 // adjust the type of the state to allow for faster matching:
Chris@16 953 state->type = this->get_repeat_type(state);
Chris@16 954 }
Chris@16 955 // restore case sensitivity:
Chris@16 956 m_icase = l_icase;
Chris@16 957 }
Chris@16 958
Chris@16 959 template <class charT, class traits>
Chris@16 960 int basic_regex_creator<charT, traits>::calculate_backstep(re_syntax_base* state)
Chris@16 961 {
Chris@16 962 typedef typename traits::char_class_type m_type;
Chris@16 963 int result = 0;
Chris@16 964 while(state)
Chris@16 965 {
Chris@16 966 switch(state->type)
Chris@16 967 {
Chris@16 968 case syntax_element_startmark:
Chris@16 969 if((static_cast<re_brace*>(state)->index == -1)
Chris@16 970 || (static_cast<re_brace*>(state)->index == -2))
Chris@16 971 {
Chris@16 972 state = static_cast<re_jump*>(state->next.p)->alt.p->next.p;
Chris@16 973 continue;
Chris@16 974 }
Chris@16 975 else if(static_cast<re_brace*>(state)->index == -3)
Chris@16 976 {
Chris@16 977 state = state->next.p->next.p;
Chris@16 978 continue;
Chris@16 979 }
Chris@16 980 break;
Chris@16 981 case syntax_element_endmark:
Chris@16 982 if((static_cast<re_brace*>(state)->index == -1)
Chris@16 983 || (static_cast<re_brace*>(state)->index == -2))
Chris@16 984 return result;
Chris@16 985 break;
Chris@16 986 case syntax_element_literal:
Chris@16 987 result += static_cast<re_literal*>(state)->length;
Chris@16 988 break;
Chris@16 989 case syntax_element_wild:
Chris@16 990 case syntax_element_set:
Chris@16 991 result += 1;
Chris@16 992 break;
Chris@16 993 case syntax_element_dot_rep:
Chris@16 994 case syntax_element_char_rep:
Chris@16 995 case syntax_element_short_set_rep:
Chris@16 996 case syntax_element_backref:
Chris@16 997 case syntax_element_rep:
Chris@16 998 case syntax_element_combining:
Chris@16 999 case syntax_element_long_set_rep:
Chris@16 1000 case syntax_element_backstep:
Chris@16 1001 {
Chris@16 1002 re_repeat* rep = static_cast<re_repeat *>(state);
Chris@16 1003 // adjust the type of the state to allow for faster matching:
Chris@16 1004 state->type = this->get_repeat_type(state);
Chris@16 1005 if((state->type == syntax_element_dot_rep)
Chris@16 1006 || (state->type == syntax_element_char_rep)
Chris@16 1007 || (state->type == syntax_element_short_set_rep))
Chris@16 1008 {
Chris@16 1009 if(rep->max != rep->min)
Chris@16 1010 return -1;
Chris@16 1011 result += static_cast<int>(rep->min);
Chris@16 1012 state = rep->alt.p;
Chris@16 1013 continue;
Chris@16 1014 }
Chris@16 1015 else if(state->type == syntax_element_long_set_rep)
Chris@16 1016 {
Chris@16 1017 BOOST_ASSERT(rep->next.p->type == syntax_element_long_set);
Chris@16 1018 if(static_cast<re_set_long<m_type>*>(rep->next.p)->singleton == 0)
Chris@16 1019 return -1;
Chris@16 1020 if(rep->max != rep->min)
Chris@16 1021 return -1;
Chris@16 1022 result += static_cast<int>(rep->min);
Chris@16 1023 state = rep->alt.p;
Chris@16 1024 continue;
Chris@16 1025 }
Chris@16 1026 }
Chris@16 1027 return -1;
Chris@16 1028 case syntax_element_long_set:
Chris@16 1029 if(static_cast<re_set_long<m_type>*>(state)->singleton == 0)
Chris@16 1030 return -1;
Chris@16 1031 result += 1;
Chris@16 1032 break;
Chris@16 1033 case syntax_element_jump:
Chris@16 1034 state = static_cast<re_jump*>(state)->alt.p;
Chris@16 1035 continue;
Chris@16 1036 case syntax_element_alt:
Chris@16 1037 {
Chris@16 1038 int r1 = calculate_backstep(state->next.p);
Chris@16 1039 int r2 = calculate_backstep(static_cast<re_alt*>(state)->alt.p);
Chris@16 1040 if((r1 < 0) || (r1 != r2))
Chris@16 1041 return -1;
Chris@16 1042 return result + r1;
Chris@16 1043 }
Chris@16 1044 default:
Chris@16 1045 break;
Chris@16 1046 }
Chris@16 1047 state = state->next.p;
Chris@16 1048 }
Chris@16 1049 return -1;
Chris@16 1050 }
Chris@16 1051
Chris@16 1052 template <class charT, class traits>
Chris@16 1053 void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
Chris@16 1054 {
Chris@16 1055 int not_last_jump = 1;
Chris@16 1056 re_syntax_base* recursion_start = 0;
Chris@16 1057 int recursion_sub = 0;
Chris@16 1058 re_syntax_base* recursion_restart = 0;
Chris@16 1059
Chris@16 1060 // track case sensitivity:
Chris@16 1061 bool l_icase = m_icase;
Chris@16 1062
Chris@16 1063 while(state)
Chris@16 1064 {
Chris@16 1065 switch(state->type)
Chris@16 1066 {
Chris@16 1067 case syntax_element_toggle_case:
Chris@16 1068 l_icase = static_cast<re_case*>(state)->icase;
Chris@16 1069 state = state->next.p;
Chris@16 1070 break;
Chris@16 1071 case syntax_element_literal:
Chris@16 1072 {
Chris@16 1073 // don't set anything in *pnull, set each element in l_map
Chris@16 1074 // that could match the first character in the literal:
Chris@16 1075 if(l_map)
Chris@16 1076 {
Chris@16 1077 l_map[0] |= mask_init;
Chris@16 1078 charT first_char = *static_cast<charT*>(static_cast<void*>(static_cast<re_literal*>(state) + 1));
Chris@16 1079 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
Chris@16 1080 {
Chris@16 1081 if(m_traits.translate(static_cast<charT>(i), l_icase) == first_char)
Chris@16 1082 l_map[i] |= mask;
Chris@16 1083 }
Chris@16 1084 }
Chris@16 1085 return;
Chris@16 1086 }
Chris@16 1087 case syntax_element_end_line:
Chris@16 1088 {
Chris@16 1089 // next character must be a line separator (if there is one):
Chris@16 1090 if(l_map)
Chris@16 1091 {
Chris@16 1092 l_map[0] |= mask_init;
Chris@16 1093 l_map[static_cast<unsigned>('\n')] |= mask;
Chris@16 1094 l_map[static_cast<unsigned>('\r')] |= mask;
Chris@16 1095 l_map[static_cast<unsigned>('\f')] |= mask;
Chris@16 1096 l_map[0x85] |= mask;
Chris@16 1097 }
Chris@16 1098 // now figure out if we can match a NULL string at this point:
Chris@16 1099 if(pnull)
Chris@16 1100 create_startmap(state->next.p, 0, pnull, mask);
Chris@16 1101 return;
Chris@16 1102 }
Chris@16 1103 case syntax_element_recurse:
Chris@16 1104 {
Chris@16 1105 if(state->type == syntax_element_startmark)
Chris@16 1106 recursion_sub = static_cast<re_brace*>(state)->index;
Chris@16 1107 else
Chris@16 1108 recursion_sub = 0;
Chris@16 1109 if(m_recursion_checks[recursion_sub])
Chris@16 1110 {
Chris@16 1111 // Infinite recursion!!
Chris@16 1112 if(0 == this->m_pdata->m_status) // update the error code if not already set
Chris@16 1113 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
Chris@16 1114 //
Chris@16 1115 // clear the expression, we should be empty:
Chris@16 1116 //
Chris@16 1117 this->m_pdata->m_expression = 0;
Chris@16 1118 this->m_pdata->m_expression_len = 0;
Chris@16 1119 //
Chris@16 1120 // and throw if required:
Chris@16 1121 //
Chris@16 1122 if(0 == (this->flags() & regex_constants::no_except))
Chris@16 1123 {
Chris@16 1124 std::string message = "Encountered an infinite recursion.";
Chris@16 1125 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
Chris@16 1126 e.raise();
Chris@16 1127 }
Chris@16 1128 }
Chris@16 1129 else if(recursion_start == 0)
Chris@16 1130 {
Chris@16 1131 recursion_start = state;
Chris@16 1132 recursion_restart = state->next.p;
Chris@16 1133 state = static_cast<re_jump*>(state)->alt.p;
Chris@16 1134 m_recursion_checks[recursion_sub] = true;
Chris@16 1135 break;
Chris@16 1136 }
Chris@16 1137 m_recursion_checks[recursion_sub] = true;
Chris@16 1138 // can't handle nested recursion here...
Chris@16 1139 BOOST_FALLTHROUGH;
Chris@16 1140 }
Chris@16 1141 case syntax_element_backref:
Chris@16 1142 // can be null, and any character can match:
Chris@16 1143 if(pnull)
Chris@16 1144 *pnull |= mask;
Chris@16 1145 BOOST_FALLTHROUGH;
Chris@16 1146 case syntax_element_wild:
Chris@16 1147 {
Chris@16 1148 // can't be null, any character can match:
Chris@16 1149 set_all_masks(l_map, mask);
Chris@16 1150 return;
Chris@16 1151 }
Chris@16 1152 case syntax_element_match:
Chris@16 1153 {
Chris@16 1154 // must be null, any character can match:
Chris@16 1155 set_all_masks(l_map, mask);
Chris@16 1156 if(pnull)
Chris@16 1157 *pnull |= mask;
Chris@16 1158 return;
Chris@16 1159 }
Chris@16 1160 case syntax_element_word_start:
Chris@16 1161 {
Chris@16 1162 // recurse, then AND with all the word characters:
Chris@16 1163 create_startmap(state->next.p, l_map, pnull, mask);
Chris@16 1164 if(l_map)
Chris@16 1165 {
Chris@16 1166 l_map[0] |= mask_init;
Chris@16 1167 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
Chris@16 1168 {
Chris@16 1169 if(!m_traits.isctype(static_cast<charT>(i), m_word_mask))
Chris@16 1170 l_map[i] &= static_cast<unsigned char>(~mask);
Chris@16 1171 }
Chris@16 1172 }
Chris@16 1173 return;
Chris@16 1174 }
Chris@16 1175 case syntax_element_word_end:
Chris@16 1176 {
Chris@16 1177 // recurse, then AND with all the word characters:
Chris@16 1178 create_startmap(state->next.p, l_map, pnull, mask);
Chris@16 1179 if(l_map)
Chris@16 1180 {
Chris@16 1181 l_map[0] |= mask_init;
Chris@16 1182 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
Chris@16 1183 {
Chris@16 1184 if(m_traits.isctype(static_cast<charT>(i), m_word_mask))
Chris@16 1185 l_map[i] &= static_cast<unsigned char>(~mask);
Chris@16 1186 }
Chris@16 1187 }
Chris@16 1188 return;
Chris@16 1189 }
Chris@16 1190 case syntax_element_buffer_end:
Chris@16 1191 {
Chris@16 1192 // we *must be null* :
Chris@16 1193 if(pnull)
Chris@16 1194 *pnull |= mask;
Chris@16 1195 return;
Chris@16 1196 }
Chris@16 1197 case syntax_element_long_set:
Chris@16 1198 if(l_map)
Chris@16 1199 {
Chris@16 1200 typedef typename traits::char_class_type m_type;
Chris@16 1201 if(static_cast<re_set_long<m_type>*>(state)->singleton)
Chris@16 1202 {
Chris@16 1203 l_map[0] |= mask_init;
Chris@16 1204 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
Chris@16 1205 {
Chris@16 1206 charT c = static_cast<charT>(i);
Chris@16 1207 if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<m_type>*>(state), *m_pdata, l_icase))
Chris@16 1208 l_map[i] |= mask;
Chris@16 1209 }
Chris@16 1210 }
Chris@16 1211 else
Chris@16 1212 set_all_masks(l_map, mask);
Chris@16 1213 }
Chris@16 1214 return;
Chris@16 1215 case syntax_element_set:
Chris@16 1216 if(l_map)
Chris@16 1217 {
Chris@16 1218 l_map[0] |= mask_init;
Chris@16 1219 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
Chris@16 1220 {
Chris@16 1221 if(static_cast<re_set*>(state)->_map[
Chris@16 1222 static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), l_icase))])
Chris@16 1223 l_map[i] |= mask;
Chris@16 1224 }
Chris@16 1225 }
Chris@16 1226 return;
Chris@16 1227 case syntax_element_jump:
Chris@16 1228 // take the jump:
Chris@16 1229 state = static_cast<re_alt*>(state)->alt.p;
Chris@16 1230 not_last_jump = -1;
Chris@16 1231 break;
Chris@16 1232 case syntax_element_alt:
Chris@16 1233 case syntax_element_rep:
Chris@16 1234 case syntax_element_dot_rep:
Chris@16 1235 case syntax_element_char_rep:
Chris@16 1236 case syntax_element_short_set_rep:
Chris@16 1237 case syntax_element_long_set_rep:
Chris@16 1238 {
Chris@16 1239 re_alt* rep = static_cast<re_alt*>(state);
Chris@16 1240 if(rep->_map[0] & mask_init)
Chris@16 1241 {
Chris@16 1242 if(l_map)
Chris@16 1243 {
Chris@16 1244 // copy previous results:
Chris@16 1245 l_map[0] |= mask_init;
Chris@16 1246 for(unsigned int i = 0; i <= UCHAR_MAX; ++i)
Chris@16 1247 {
Chris@16 1248 if(rep->_map[i] & mask_any)
Chris@16 1249 l_map[i] |= mask;
Chris@16 1250 }
Chris@16 1251 }
Chris@16 1252 if(pnull)
Chris@16 1253 {
Chris@16 1254 if(rep->can_be_null & mask_any)
Chris@16 1255 *pnull |= mask;
Chris@16 1256 }
Chris@16 1257 }
Chris@16 1258 else
Chris@16 1259 {
Chris@16 1260 // we haven't created a startmap for this alternative yet
Chris@16 1261 // so take the union of the two options:
Chris@16 1262 if(is_bad_repeat(state))
Chris@16 1263 {
Chris@16 1264 set_all_masks(l_map, mask);
Chris@16 1265 if(pnull)
Chris@16 1266 *pnull |= mask;
Chris@16 1267 return;
Chris@16 1268 }
Chris@16 1269 set_bad_repeat(state);
Chris@16 1270 create_startmap(state->next.p, l_map, pnull, mask);
Chris@16 1271 if((state->type == syntax_element_alt)
Chris@16 1272 || (static_cast<re_repeat*>(state)->min == 0)
Chris@16 1273 || (not_last_jump == 0))
Chris@16 1274 create_startmap(rep->alt.p, l_map, pnull, mask);
Chris@16 1275 }
Chris@16 1276 }
Chris@16 1277 return;
Chris@16 1278 case syntax_element_soft_buffer_end:
Chris@16 1279 // match newline or null:
Chris@16 1280 if(l_map)
Chris@16 1281 {
Chris@16 1282 l_map[0] |= mask_init;
Chris@16 1283 l_map[static_cast<unsigned>('\n')] |= mask;
Chris@16 1284 l_map[static_cast<unsigned>('\r')] |= mask;
Chris@16 1285 }
Chris@16 1286 if(pnull)
Chris@16 1287 *pnull |= mask;
Chris@16 1288 return;
Chris@16 1289 case syntax_element_endmark:
Chris@16 1290 // need to handle independent subs as a special case:
Chris@16 1291 if(static_cast<re_brace*>(state)->index < 0)
Chris@16 1292 {
Chris@16 1293 // can be null, any character can match:
Chris@16 1294 set_all_masks(l_map, mask);
Chris@16 1295 if(pnull)
Chris@16 1296 *pnull |= mask;
Chris@16 1297 return;
Chris@16 1298 }
Chris@16 1299 else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast<re_brace*>(state)->index))
Chris@16 1300 {
Chris@16 1301 // recursion termination:
Chris@16 1302 recursion_start = 0;
Chris@16 1303 state = recursion_restart;
Chris@16 1304 break;
Chris@16 1305 }
Chris@16 1306
Chris@16 1307 //
Chris@16 1308 // Normally we just go to the next state... but if this sub-expression is
Chris@16 1309 // the target of a recursion, then we might be ending a recursion, in which
Chris@16 1310 // case we should check whatever follows that recursion, as well as whatever
Chris@16 1311 // follows this state:
Chris@16 1312 //
Chris@16 1313 if(m_pdata->m_has_recursions && static_cast<re_brace*>(state)->index)
Chris@16 1314 {
Chris@16 1315 bool ok = false;
Chris@16 1316 re_syntax_base* p = m_pdata->m_first_state;
Chris@16 1317 while(p)
Chris@16 1318 {
Chris@16 1319 if(p->type == syntax_element_recurse)
Chris@16 1320 {
Chris@16 1321 re_brace* p2 = static_cast<re_brace*>(static_cast<re_jump*>(p)->alt.p);
Chris@16 1322 if((p2->type == syntax_element_startmark) && (p2->index == static_cast<re_brace*>(state)->index))
Chris@16 1323 {
Chris@16 1324 ok = true;
Chris@16 1325 break;
Chris@16 1326 }
Chris@16 1327 }
Chris@16 1328 p = p->next.p;
Chris@16 1329 }
Chris@16 1330 if(ok)
Chris@16 1331 {
Chris@16 1332 create_startmap(p->next.p, l_map, pnull, mask);
Chris@16 1333 }
Chris@16 1334 }
Chris@16 1335 state = state->next.p;
Chris@16 1336 break;
Chris@16 1337
Chris@16 1338 case syntax_element_startmark:
Chris@16 1339 // need to handle independent subs as a special case:
Chris@16 1340 if(static_cast<re_brace*>(state)->index == -3)
Chris@16 1341 {
Chris@16 1342 state = state->next.p->next.p;
Chris@16 1343 break;
Chris@16 1344 }
Chris@16 1345 BOOST_FALLTHROUGH;
Chris@16 1346 default:
Chris@16 1347 state = state->next.p;
Chris@16 1348 }
Chris@16 1349 ++not_last_jump;
Chris@16 1350 }
Chris@16 1351 }
Chris@16 1352
Chris@16 1353 template <class charT, class traits>
Chris@16 1354 unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* state)
Chris@16 1355 {
Chris@16 1356 //
Chris@16 1357 // find out how the machine starts, so we can optimise the search:
Chris@16 1358 //
Chris@16 1359 while(state)
Chris@16 1360 {
Chris@16 1361 switch(state->type)
Chris@16 1362 {
Chris@16 1363 case syntax_element_startmark:
Chris@16 1364 case syntax_element_endmark:
Chris@16 1365 state = state->next.p;
Chris@16 1366 continue;
Chris@16 1367 case syntax_element_start_line:
Chris@16 1368 return regbase::restart_line;
Chris@16 1369 case syntax_element_word_start:
Chris@16 1370 return regbase::restart_word;
Chris@16 1371 case syntax_element_buffer_start:
Chris@16 1372 return regbase::restart_buf;
Chris@16 1373 case syntax_element_restart_continue:
Chris@16 1374 return regbase::restart_continue;
Chris@16 1375 default:
Chris@16 1376 state = 0;
Chris@16 1377 continue;
Chris@16 1378 }
Chris@16 1379 }
Chris@16 1380 return regbase::restart_any;
Chris@16 1381 }
Chris@16 1382
Chris@16 1383 template <class charT, class traits>
Chris@16 1384 void basic_regex_creator<charT, traits>::set_all_masks(unsigned char* bits, unsigned char mask)
Chris@16 1385 {
Chris@16 1386 //
Chris@16 1387 // set mask in all of bits elements,
Chris@16 1388 // if bits[0] has mask_init not set then we can
Chris@16 1389 // optimise this to a call to memset:
Chris@16 1390 //
Chris@16 1391 if(bits)
Chris@16 1392 {
Chris@16 1393 if(bits[0] == 0)
Chris@16 1394 (std::memset)(bits, mask, 1u << CHAR_BIT);
Chris@16 1395 else
Chris@16 1396 {
Chris@16 1397 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
Chris@16 1398 bits[i] |= mask;
Chris@16 1399 }
Chris@16 1400 bits[0] |= mask_init;
Chris@16 1401 }
Chris@16 1402 }
Chris@16 1403
Chris@16 1404 template <class charT, class traits>
Chris@16 1405 bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt)
Chris@16 1406 {
Chris@16 1407 switch(pt->type)
Chris@16 1408 {
Chris@16 1409 case syntax_element_rep:
Chris@16 1410 case syntax_element_dot_rep:
Chris@16 1411 case syntax_element_char_rep:
Chris@16 1412 case syntax_element_short_set_rep:
Chris@16 1413 case syntax_element_long_set_rep:
Chris@16 1414 {
Chris@16 1415 unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
Chris@16 1416 if(state_id > sizeof(m_bad_repeats) * CHAR_BIT)
Chris@16 1417 return true; // run out of bits, assume we can't traverse this one.
Chris@16 1418 static const boost::uintmax_t one = 1uL;
Chris@16 1419 return m_bad_repeats & (one << state_id);
Chris@16 1420 }
Chris@16 1421 default:
Chris@16 1422 return false;
Chris@16 1423 }
Chris@16 1424 }
Chris@16 1425
Chris@16 1426 template <class charT, class traits>
Chris@16 1427 void basic_regex_creator<charT, traits>::set_bad_repeat(re_syntax_base* pt)
Chris@16 1428 {
Chris@16 1429 switch(pt->type)
Chris@16 1430 {
Chris@16 1431 case syntax_element_rep:
Chris@16 1432 case syntax_element_dot_rep:
Chris@16 1433 case syntax_element_char_rep:
Chris@16 1434 case syntax_element_short_set_rep:
Chris@16 1435 case syntax_element_long_set_rep:
Chris@16 1436 {
Chris@16 1437 unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
Chris@16 1438 static const boost::uintmax_t one = 1uL;
Chris@16 1439 if(state_id <= sizeof(m_bad_repeats) * CHAR_BIT)
Chris@16 1440 m_bad_repeats |= (one << state_id);
Chris@16 1441 }
Chris@16 1442 break;
Chris@16 1443 default:
Chris@16 1444 break;
Chris@16 1445 }
Chris@16 1446 }
Chris@16 1447
Chris@16 1448 template <class charT, class traits>
Chris@16 1449 syntax_element_type basic_regex_creator<charT, traits>::get_repeat_type(re_syntax_base* state)
Chris@16 1450 {
Chris@16 1451 typedef typename traits::char_class_type m_type;
Chris@16 1452 if(state->type == syntax_element_rep)
Chris@16 1453 {
Chris@16 1454 // check to see if we are repeating a single state:
Chris@16 1455 if(state->next.p->next.p->next.p == static_cast<re_alt*>(state)->alt.p)
Chris@16 1456 {
Chris@16 1457 switch(state->next.p->type)
Chris@16 1458 {
Chris@16 1459 case re_detail::syntax_element_wild:
Chris@16 1460 return re_detail::syntax_element_dot_rep;
Chris@16 1461 case re_detail::syntax_element_literal:
Chris@16 1462 return re_detail::syntax_element_char_rep;
Chris@16 1463 case re_detail::syntax_element_set:
Chris@16 1464 return re_detail::syntax_element_short_set_rep;
Chris@16 1465 case re_detail::syntax_element_long_set:
Chris@16 1466 if(static_cast<re_detail::re_set_long<m_type>*>(state->next.p)->singleton)
Chris@16 1467 return re_detail::syntax_element_long_set_rep;
Chris@16 1468 break;
Chris@16 1469 default:
Chris@16 1470 break;
Chris@16 1471 }
Chris@16 1472 }
Chris@16 1473 }
Chris@16 1474 return state->type;
Chris@16 1475 }
Chris@16 1476
Chris@16 1477 template <class charT, class traits>
Chris@16 1478 void basic_regex_creator<charT, traits>::probe_leading_repeat(re_syntax_base* state)
Chris@16 1479 {
Chris@16 1480 // enumerate our states, and see if we have a leading repeat
Chris@16 1481 // for which failed search restarts can be optimised;
Chris@16 1482 do
Chris@16 1483 {
Chris@16 1484 switch(state->type)
Chris@16 1485 {
Chris@16 1486 case syntax_element_startmark:
Chris@16 1487 if(static_cast<re_brace*>(state)->index >= 0)
Chris@16 1488 {
Chris@16 1489 state = state->next.p;
Chris@16 1490 continue;
Chris@16 1491 }
Chris@16 1492 if((static_cast<re_brace*>(state)->index == -1)
Chris@16 1493 || (static_cast<re_brace*>(state)->index == -2))
Chris@16 1494 {
Chris@16 1495 // skip past the zero width assertion:
Chris@16 1496 state = static_cast<const re_jump*>(state->next.p)->alt.p->next.p;
Chris@16 1497 continue;
Chris@16 1498 }
Chris@16 1499 if(static_cast<re_brace*>(state)->index == -3)
Chris@16 1500 {
Chris@16 1501 // Have to skip the leading jump state:
Chris@16 1502 state = state->next.p->next.p;
Chris@16 1503 continue;
Chris@16 1504 }
Chris@16 1505 return;
Chris@16 1506 case syntax_element_endmark:
Chris@16 1507 case syntax_element_start_line:
Chris@16 1508 case syntax_element_end_line:
Chris@16 1509 case syntax_element_word_boundary:
Chris@16 1510 case syntax_element_within_word:
Chris@16 1511 case syntax_element_word_start:
Chris@16 1512 case syntax_element_word_end:
Chris@16 1513 case syntax_element_buffer_start:
Chris@16 1514 case syntax_element_buffer_end:
Chris@16 1515 case syntax_element_restart_continue:
Chris@16 1516 state = state->next.p;
Chris@16 1517 break;
Chris@16 1518 case syntax_element_dot_rep:
Chris@16 1519 case syntax_element_char_rep:
Chris@16 1520 case syntax_element_short_set_rep:
Chris@16 1521 case syntax_element_long_set_rep:
Chris@16 1522 if(this->m_has_backrefs == 0)
Chris@16 1523 static_cast<re_repeat*>(state)->leading = true;
Chris@16 1524 BOOST_FALLTHROUGH;
Chris@16 1525 default:
Chris@16 1526 return;
Chris@16 1527 }
Chris@16 1528 }while(state);
Chris@16 1529 }
Chris@16 1530
Chris@16 1531
Chris@16 1532 } // namespace re_detail
Chris@16 1533
Chris@16 1534 } // namespace boost
Chris@16 1535
Chris@16 1536 #ifdef BOOST_MSVC
Chris@16 1537 # pragma warning(pop)
Chris@16 1538 #endif
Chris@16 1539
Chris@16 1540 #ifdef BOOST_MSVC
Chris@16 1541 #pragma warning(push)
Chris@16 1542 #pragma warning(disable: 4103)
Chris@16 1543 #endif
Chris@16 1544 #ifdef BOOST_HAS_ABI_HEADERS
Chris@16 1545 # include BOOST_ABI_SUFFIX
Chris@16 1546 #endif
Chris@16 1547 #ifdef BOOST_MSVC
Chris@16 1548 #pragma warning(pop)
Chris@16 1549 #endif
Chris@16 1550
Chris@16 1551 #endif
Chris@16 1552