annotate DEPENDENCIES/generic/include/boost/regex/v4/states.hpp @ 125:34e428693f5d vext

Vext -> Repoint
author Chris Cannam
date Thu, 14 Jun 2018 11:15:39 +0100
parents 2665513ce2d3
children
rev   line source
Chris@16 1 /*
Chris@16 2 *
Chris@16 3 * Copyright (c) 1998-2002
Chris@16 4 * John Maddock
Chris@16 5 *
Chris@16 6 * Use, modification and distribution are subject to the
Chris@16 7 * Boost Software License, Version 1.0. (See accompanying file
Chris@16 8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16 9 *
Chris@16 10 */
Chris@16 11
Chris@16 12 /*
Chris@16 13 * LOCATION: see http://www.boost.org for most recent version.
Chris@16 14 * FILE states.cpp
Chris@16 15 * VERSION see <boost/version.hpp>
Chris@16 16 * DESCRIPTION: Declares internal state machine structures.
Chris@16 17 */
Chris@16 18
Chris@16 19 #ifndef BOOST_REGEX_V4_STATES_HPP
Chris@16 20 #define BOOST_REGEX_V4_STATES_HPP
Chris@16 21
Chris@16 22 #ifdef BOOST_MSVC
Chris@16 23 #pragma warning(push)
Chris@16 24 #pragma warning(disable: 4103)
Chris@16 25 #endif
Chris@16 26 #ifdef BOOST_HAS_ABI_HEADERS
Chris@16 27 # include BOOST_ABI_PREFIX
Chris@16 28 #endif
Chris@16 29 #ifdef BOOST_MSVC
Chris@16 30 #pragma warning(pop)
Chris@16 31 #endif
Chris@16 32
Chris@16 33 namespace boost{
Chris@16 34 namespace re_detail{
Chris@16 35
Chris@16 36 /*** mask_type *******************************************************
Chris@16 37 Whenever we have a choice of two alternatives, we use an array of bytes
Chris@16 38 to indicate which of the two alternatives it is possible to take for any
Chris@16 39 given input character. If mask_take is set, then we can take the next
Chris@16 40 state, and if mask_skip is set then we can take the alternative.
Chris@16 41 ***********************************************************************/
Chris@16 42 enum mask_type
Chris@16 43 {
Chris@16 44 mask_take = 1,
Chris@16 45 mask_skip = 2,
Chris@16 46 mask_init = 4,
Chris@16 47 mask_any = mask_skip | mask_take,
Chris@16 48 mask_all = mask_any
Chris@16 49 };
Chris@16 50
Chris@16 51 /*** helpers **********************************************************
Chris@16 52 These helpers let us use function overload resolution to detect whether
Chris@16 53 we have narrow or wide character strings:
Chris@16 54 ***********************************************************************/
Chris@16 55 struct _narrow_type{};
Chris@16 56 struct _wide_type{};
Chris@16 57 template <class charT> struct is_byte;
Chris@16 58 template<> struct is_byte<char> { typedef _narrow_type width_type; };
Chris@16 59 template<> struct is_byte<unsigned char>{ typedef _narrow_type width_type; };
Chris@16 60 template<> struct is_byte<signed char> { typedef _narrow_type width_type; };
Chris@16 61 template <class charT> struct is_byte { typedef _wide_type width_type; };
Chris@16 62
Chris@16 63 /*** enum syntax_element_type ******************************************
Chris@16 64 Every record in the state machine falls into one of the following types:
Chris@16 65 ***********************************************************************/
Chris@16 66 enum syntax_element_type
Chris@16 67 {
Chris@16 68 // start of a marked sub-expression, or perl-style (?...) extension
Chris@16 69 syntax_element_startmark = 0,
Chris@16 70 // end of a marked sub-expression, or perl-style (?...) extension
Chris@16 71 syntax_element_endmark = syntax_element_startmark + 1,
Chris@16 72 // any sequence of literal characters
Chris@16 73 syntax_element_literal = syntax_element_endmark + 1,
Chris@16 74 // start of line assertion: ^
Chris@16 75 syntax_element_start_line = syntax_element_literal + 1,
Chris@16 76 // end of line assertion $
Chris@16 77 syntax_element_end_line = syntax_element_start_line + 1,
Chris@16 78 // match any character: .
Chris@16 79 syntax_element_wild = syntax_element_end_line + 1,
Chris@16 80 // end of expression: we have a match when we get here
Chris@16 81 syntax_element_match = syntax_element_wild + 1,
Chris@16 82 // perl style word boundary: \b
Chris@16 83 syntax_element_word_boundary = syntax_element_match + 1,
Chris@16 84 // perl style within word boundary: \B
Chris@16 85 syntax_element_within_word = syntax_element_word_boundary + 1,
Chris@16 86 // start of word assertion: \<
Chris@16 87 syntax_element_word_start = syntax_element_within_word + 1,
Chris@16 88 // end of word assertion: \>
Chris@16 89 syntax_element_word_end = syntax_element_word_start + 1,
Chris@16 90 // start of buffer assertion: \`
Chris@16 91 syntax_element_buffer_start = syntax_element_word_end + 1,
Chris@16 92 // end of buffer assertion: \'
Chris@16 93 syntax_element_buffer_end = syntax_element_buffer_start + 1,
Chris@16 94 // backreference to previously matched sub-expression
Chris@16 95 syntax_element_backref = syntax_element_buffer_end + 1,
Chris@16 96 // either a wide character set [..] or one with multicharacter collating elements:
Chris@16 97 syntax_element_long_set = syntax_element_backref + 1,
Chris@16 98 // narrow character set: [...]
Chris@16 99 syntax_element_set = syntax_element_long_set + 1,
Chris@16 100 // jump to a new state in the machine:
Chris@16 101 syntax_element_jump = syntax_element_set + 1,
Chris@16 102 // choose between two production states:
Chris@16 103 syntax_element_alt = syntax_element_jump + 1,
Chris@16 104 // a repeat
Chris@16 105 syntax_element_rep = syntax_element_alt + 1,
Chris@16 106 // match a combining character sequence
Chris@16 107 syntax_element_combining = syntax_element_rep + 1,
Chris@16 108 // perl style soft buffer end: \z
Chris@16 109 syntax_element_soft_buffer_end = syntax_element_combining + 1,
Chris@16 110 // perl style continuation: \G
Chris@16 111 syntax_element_restart_continue = syntax_element_soft_buffer_end + 1,
Chris@16 112 // single character repeats:
Chris@16 113 syntax_element_dot_rep = syntax_element_restart_continue + 1,
Chris@16 114 syntax_element_char_rep = syntax_element_dot_rep + 1,
Chris@16 115 syntax_element_short_set_rep = syntax_element_char_rep + 1,
Chris@16 116 syntax_element_long_set_rep = syntax_element_short_set_rep + 1,
Chris@16 117 // a backstep for lookbehind repeats:
Chris@16 118 syntax_element_backstep = syntax_element_long_set_rep + 1,
Chris@16 119 // an assertion that a mark was matched:
Chris@16 120 syntax_element_assert_backref = syntax_element_backstep + 1,
Chris@16 121 syntax_element_toggle_case = syntax_element_assert_backref + 1,
Chris@16 122 // a recursive expression:
Chris@16 123 syntax_element_recurse = syntax_element_toggle_case + 1
Chris@16 124 };
Chris@16 125
Chris@16 126 #ifdef BOOST_REGEX_DEBUG
Chris@16 127 // dwa 09/26/00 - This is needed to suppress warnings about an ambiguous conversion
Chris@16 128 std::ostream& operator<<(std::ostream&, syntax_element_type);
Chris@16 129 #endif
Chris@16 130
Chris@16 131 struct re_syntax_base;
Chris@16 132
Chris@16 133 /*** union offset_type ************************************************
Chris@16 134 Points to another state in the machine. During machine construction
Chris@16 135 we use integral offsets, but these are converted to pointers before
Chris@16 136 execution of the machine.
Chris@16 137 ***********************************************************************/
Chris@16 138 union offset_type
Chris@16 139 {
Chris@16 140 re_syntax_base* p;
Chris@16 141 std::ptrdiff_t i;
Chris@16 142 };
Chris@16 143
Chris@16 144 /*** struct re_syntax_base ********************************************
Chris@16 145 Base class for all states in the machine.
Chris@16 146 ***********************************************************************/
Chris@16 147 struct re_syntax_base
Chris@16 148 {
Chris@16 149 syntax_element_type type; // what kind of state this is
Chris@16 150 offset_type next; // next state in the machine
Chris@16 151 };
Chris@16 152
Chris@16 153 /*** struct re_brace **************************************************
Chris@16 154 A marked parenthesis.
Chris@16 155 ***********************************************************************/
Chris@16 156 struct re_brace : public re_syntax_base
Chris@16 157 {
Chris@16 158 // The index to match, can be zero (don't mark the sub-expression)
Chris@16 159 // or negative (for perl style (?...) extentions):
Chris@16 160 int index;
Chris@16 161 bool icase;
Chris@16 162 };
Chris@16 163
Chris@16 164 /*** struct re_dot **************************************************
Chris@16 165 Match anything.
Chris@16 166 ***********************************************************************/
Chris@16 167 enum
Chris@16 168 {
Chris@16 169 dont_care = 1,
Chris@16 170 force_not_newline = 0,
Chris@16 171 force_newline = 2,
Chris@16 172
Chris@16 173 test_not_newline = 2,
Chris@16 174 test_newline = 3
Chris@16 175 };
Chris@16 176 struct re_dot : public re_syntax_base
Chris@16 177 {
Chris@16 178 unsigned char mask;
Chris@16 179 };
Chris@16 180
Chris@16 181 /*** struct re_literal ************************************************
Chris@16 182 A string of literals, following this structure will be an
Chris@16 183 array of characters: charT[length]
Chris@16 184 ***********************************************************************/
Chris@16 185 struct re_literal : public re_syntax_base
Chris@16 186 {
Chris@16 187 unsigned int length;
Chris@16 188 };
Chris@16 189
Chris@16 190 /*** struct re_case ************************************************
Chris@16 191 Indicates whether we are moving to a case insensive block or not
Chris@16 192 ***********************************************************************/
Chris@16 193 struct re_case : public re_syntax_base
Chris@16 194 {
Chris@16 195 bool icase;
Chris@16 196 };
Chris@16 197
Chris@16 198 /*** struct re_set_long ***********************************************
Chris@16 199 A wide character set of characters, following this structure will be
Chris@16 200 an array of type charT:
Chris@16 201 First csingles null-terminated strings
Chris@16 202 Then 2 * cranges NULL terminated strings
Chris@16 203 Then cequivalents NULL terminated strings
Chris@16 204 ***********************************************************************/
Chris@16 205 template <class mask_type>
Chris@16 206 struct re_set_long : public re_syntax_base
Chris@16 207 {
Chris@16 208 unsigned int csingles, cranges, cequivalents;
Chris@16 209 mask_type cclasses;
Chris@16 210 mask_type cnclasses;
Chris@16 211 bool isnot;
Chris@16 212 bool singleton;
Chris@16 213 };
Chris@16 214
Chris@16 215 /*** struct re_set ****************************************************
Chris@16 216 A set of narrow-characters, matches any of _map which is none-zero
Chris@16 217 ***********************************************************************/
Chris@16 218 struct re_set : public re_syntax_base
Chris@16 219 {
Chris@16 220 unsigned char _map[1 << CHAR_BIT];
Chris@16 221 };
Chris@16 222
Chris@16 223 /*** struct re_jump ***************************************************
Chris@16 224 Jump to a new location in the machine (not next).
Chris@16 225 ***********************************************************************/
Chris@16 226 struct re_jump : public re_syntax_base
Chris@16 227 {
Chris@16 228 offset_type alt; // location to jump to
Chris@16 229 };
Chris@16 230
Chris@16 231 /*** struct re_alt ***************************************************
Chris@16 232 Jump to a new location in the machine (possibly next).
Chris@16 233 ***********************************************************************/
Chris@16 234 struct re_alt : public re_jump
Chris@16 235 {
Chris@16 236 unsigned char _map[1 << CHAR_BIT]; // which characters can take the jump
Chris@16 237 unsigned int can_be_null; // true if we match a NULL string
Chris@16 238 };
Chris@16 239
Chris@16 240 /*** struct re_repeat *************************************************
Chris@16 241 Repeat a section of the machine
Chris@16 242 ***********************************************************************/
Chris@16 243 struct re_repeat : public re_alt
Chris@16 244 {
Chris@16 245 std::size_t min, max; // min and max allowable repeats
Chris@16 246 int state_id; // Unique identifier for this repeat
Chris@16 247 bool leading; // True if this repeat is at the start of the machine (lets us optimize some searches)
Chris@16 248 bool greedy; // True if this is a greedy repeat
Chris@16 249 };
Chris@16 250
Chris@16 251 /*** struct re_recurse ************************************************
Chris@16 252 Recurse to a particular subexpression.
Chris@16 253 **********************************************************************/
Chris@16 254 struct re_recurse : public re_jump
Chris@16 255 {
Chris@16 256 int state_id; // identifier of first nested repeat within the recursion.
Chris@16 257 };
Chris@16 258
Chris@16 259 /*** enum re_jump_size_type *******************************************
Chris@16 260 Provides compiled size of re_jump structure (allowing for trailing alignment).
Chris@16 261 We provide this so we know how manybytes to insert when constructing the machine
Chris@16 262 (The value of padding_mask is defined in regex_raw_buffer.hpp).
Chris@16 263 ***********************************************************************/
Chris@16 264 enum re_jump_size_type
Chris@16 265 {
Chris@16 266 re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask),
Chris@16 267 re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask),
Chris@16 268 re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask)
Chris@16 269 };
Chris@16 270
Chris@16 271 /*** proc re_is_set_member *********************************************
Chris@16 272 Forward declaration: we'll need this one later...
Chris@16 273 ***********************************************************************/
Chris@16 274
Chris@16 275 template<class charT, class traits>
Chris@16 276 struct regex_data;
Chris@16 277
Chris@16 278 template <class iterator, class charT, class traits_type, class char_classT>
Chris@16 279 iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
Chris@16 280 iterator last,
Chris@16 281 const re_set_long<char_classT>* set_,
Chris@16 282 const regex_data<charT, traits_type>& e, bool icase);
Chris@16 283
Chris@16 284 } // namespace re_detail
Chris@16 285
Chris@16 286 } // namespace boost
Chris@16 287
Chris@16 288 #ifdef BOOST_MSVC
Chris@16 289 #pragma warning(push)
Chris@16 290 #pragma warning(disable: 4103)
Chris@16 291 #endif
Chris@16 292 #ifdef BOOST_HAS_ABI_HEADERS
Chris@16 293 # include BOOST_ABI_SUFFIX
Chris@16 294 #endif
Chris@16 295 #ifdef BOOST_MSVC
Chris@16 296 #pragma warning(pop)
Chris@16 297 #endif
Chris@16 298
Chris@16 299 #endif
Chris@16 300
Chris@16 301