comparison DEPENDENCIES/generic/include/boost/regex/v4/states.hpp @ 16:2665513ce2d3

Add boost headers
author Chris Cannam
date Tue, 05 Aug 2014 11:11:38 +0100
parents
children
comparison
equal deleted inserted replaced
15:663ca0da4350 16:2665513ce2d3
1 /*
2 *
3 * Copyright (c) 1998-2002
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE states.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares internal state machine structures.
17 */
18
19 #ifndef BOOST_REGEX_V4_STATES_HPP
20 #define BOOST_REGEX_V4_STATES_HPP
21
22 #ifdef BOOST_MSVC
23 #pragma warning(push)
24 #pragma warning(disable: 4103)
25 #endif
26 #ifdef BOOST_HAS_ABI_HEADERS
27 # include BOOST_ABI_PREFIX
28 #endif
29 #ifdef BOOST_MSVC
30 #pragma warning(pop)
31 #endif
32
33 namespace boost{
34 namespace re_detail{
35
36 /*** mask_type *******************************************************
37 Whenever we have a choice of two alternatives, we use an array of bytes
38 to indicate which of the two alternatives it is possible to take for any
39 given input character. If mask_take is set, then we can take the next
40 state, and if mask_skip is set then we can take the alternative.
41 ***********************************************************************/
42 enum mask_type
43 {
44 mask_take = 1,
45 mask_skip = 2,
46 mask_init = 4,
47 mask_any = mask_skip | mask_take,
48 mask_all = mask_any
49 };
50
51 /*** helpers **********************************************************
52 These helpers let us use function overload resolution to detect whether
53 we have narrow or wide character strings:
54 ***********************************************************************/
55 struct _narrow_type{};
56 struct _wide_type{};
57 template <class charT> struct is_byte;
58 template<> struct is_byte<char> { typedef _narrow_type width_type; };
59 template<> struct is_byte<unsigned char>{ typedef _narrow_type width_type; };
60 template<> struct is_byte<signed char> { typedef _narrow_type width_type; };
61 template <class charT> struct is_byte { typedef _wide_type width_type; };
62
63 /*** enum syntax_element_type ******************************************
64 Every record in the state machine falls into one of the following types:
65 ***********************************************************************/
66 enum syntax_element_type
67 {
68 // start of a marked sub-expression, or perl-style (?...) extension
69 syntax_element_startmark = 0,
70 // end of a marked sub-expression, or perl-style (?...) extension
71 syntax_element_endmark = syntax_element_startmark + 1,
72 // any sequence of literal characters
73 syntax_element_literal = syntax_element_endmark + 1,
74 // start of line assertion: ^
75 syntax_element_start_line = syntax_element_literal + 1,
76 // end of line assertion $
77 syntax_element_end_line = syntax_element_start_line + 1,
78 // match any character: .
79 syntax_element_wild = syntax_element_end_line + 1,
80 // end of expression: we have a match when we get here
81 syntax_element_match = syntax_element_wild + 1,
82 // perl style word boundary: \b
83 syntax_element_word_boundary = syntax_element_match + 1,
84 // perl style within word boundary: \B
85 syntax_element_within_word = syntax_element_word_boundary + 1,
86 // start of word assertion: \<
87 syntax_element_word_start = syntax_element_within_word + 1,
88 // end of word assertion: \>
89 syntax_element_word_end = syntax_element_word_start + 1,
90 // start of buffer assertion: \`
91 syntax_element_buffer_start = syntax_element_word_end + 1,
92 // end of buffer assertion: \'
93 syntax_element_buffer_end = syntax_element_buffer_start + 1,
94 // backreference to previously matched sub-expression
95 syntax_element_backref = syntax_element_buffer_end + 1,
96 // either a wide character set [..] or one with multicharacter collating elements:
97 syntax_element_long_set = syntax_element_backref + 1,
98 // narrow character set: [...]
99 syntax_element_set = syntax_element_long_set + 1,
100 // jump to a new state in the machine:
101 syntax_element_jump = syntax_element_set + 1,
102 // choose between two production states:
103 syntax_element_alt = syntax_element_jump + 1,
104 // a repeat
105 syntax_element_rep = syntax_element_alt + 1,
106 // match a combining character sequence
107 syntax_element_combining = syntax_element_rep + 1,
108 // perl style soft buffer end: \z
109 syntax_element_soft_buffer_end = syntax_element_combining + 1,
110 // perl style continuation: \G
111 syntax_element_restart_continue = syntax_element_soft_buffer_end + 1,
112 // single character repeats:
113 syntax_element_dot_rep = syntax_element_restart_continue + 1,
114 syntax_element_char_rep = syntax_element_dot_rep + 1,
115 syntax_element_short_set_rep = syntax_element_char_rep + 1,
116 syntax_element_long_set_rep = syntax_element_short_set_rep + 1,
117 // a backstep for lookbehind repeats:
118 syntax_element_backstep = syntax_element_long_set_rep + 1,
119 // an assertion that a mark was matched:
120 syntax_element_assert_backref = syntax_element_backstep + 1,
121 syntax_element_toggle_case = syntax_element_assert_backref + 1,
122 // a recursive expression:
123 syntax_element_recurse = syntax_element_toggle_case + 1
124 };
125
126 #ifdef BOOST_REGEX_DEBUG
127 // dwa 09/26/00 - This is needed to suppress warnings about an ambiguous conversion
128 std::ostream& operator<<(std::ostream&, syntax_element_type);
129 #endif
130
131 struct re_syntax_base;
132
133 /*** union offset_type ************************************************
134 Points to another state in the machine. During machine construction
135 we use integral offsets, but these are converted to pointers before
136 execution of the machine.
137 ***********************************************************************/
138 union offset_type
139 {
140 re_syntax_base* p;
141 std::ptrdiff_t i;
142 };
143
144 /*** struct re_syntax_base ********************************************
145 Base class for all states in the machine.
146 ***********************************************************************/
147 struct re_syntax_base
148 {
149 syntax_element_type type; // what kind of state this is
150 offset_type next; // next state in the machine
151 };
152
153 /*** struct re_brace **************************************************
154 A marked parenthesis.
155 ***********************************************************************/
156 struct re_brace : public re_syntax_base
157 {
158 // The index to match, can be zero (don't mark the sub-expression)
159 // or negative (for perl style (?...) extentions):
160 int index;
161 bool icase;
162 };
163
164 /*** struct re_dot **************************************************
165 Match anything.
166 ***********************************************************************/
167 enum
168 {
169 dont_care = 1,
170 force_not_newline = 0,
171 force_newline = 2,
172
173 test_not_newline = 2,
174 test_newline = 3
175 };
176 struct re_dot : public re_syntax_base
177 {
178 unsigned char mask;
179 };
180
181 /*** struct re_literal ************************************************
182 A string of literals, following this structure will be an
183 array of characters: charT[length]
184 ***********************************************************************/
185 struct re_literal : public re_syntax_base
186 {
187 unsigned int length;
188 };
189
190 /*** struct re_case ************************************************
191 Indicates whether we are moving to a case insensive block or not
192 ***********************************************************************/
193 struct re_case : public re_syntax_base
194 {
195 bool icase;
196 };
197
198 /*** struct re_set_long ***********************************************
199 A wide character set of characters, following this structure will be
200 an array of type charT:
201 First csingles null-terminated strings
202 Then 2 * cranges NULL terminated strings
203 Then cequivalents NULL terminated strings
204 ***********************************************************************/
205 template <class mask_type>
206 struct re_set_long : public re_syntax_base
207 {
208 unsigned int csingles, cranges, cequivalents;
209 mask_type cclasses;
210 mask_type cnclasses;
211 bool isnot;
212 bool singleton;
213 };
214
215 /*** struct re_set ****************************************************
216 A set of narrow-characters, matches any of _map which is none-zero
217 ***********************************************************************/
218 struct re_set : public re_syntax_base
219 {
220 unsigned char _map[1 << CHAR_BIT];
221 };
222
223 /*** struct re_jump ***************************************************
224 Jump to a new location in the machine (not next).
225 ***********************************************************************/
226 struct re_jump : public re_syntax_base
227 {
228 offset_type alt; // location to jump to
229 };
230
231 /*** struct re_alt ***************************************************
232 Jump to a new location in the machine (possibly next).
233 ***********************************************************************/
234 struct re_alt : public re_jump
235 {
236 unsigned char _map[1 << CHAR_BIT]; // which characters can take the jump
237 unsigned int can_be_null; // true if we match a NULL string
238 };
239
240 /*** struct re_repeat *************************************************
241 Repeat a section of the machine
242 ***********************************************************************/
243 struct re_repeat : public re_alt
244 {
245 std::size_t min, max; // min and max allowable repeats
246 int state_id; // Unique identifier for this repeat
247 bool leading; // True if this repeat is at the start of the machine (lets us optimize some searches)
248 bool greedy; // True if this is a greedy repeat
249 };
250
251 /*** struct re_recurse ************************************************
252 Recurse to a particular subexpression.
253 **********************************************************************/
254 struct re_recurse : public re_jump
255 {
256 int state_id; // identifier of first nested repeat within the recursion.
257 };
258
259 /*** enum re_jump_size_type *******************************************
260 Provides compiled size of re_jump structure (allowing for trailing alignment).
261 We provide this so we know how manybytes to insert when constructing the machine
262 (The value of padding_mask is defined in regex_raw_buffer.hpp).
263 ***********************************************************************/
264 enum re_jump_size_type
265 {
266 re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask),
267 re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask),
268 re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask)
269 };
270
271 /*** proc re_is_set_member *********************************************
272 Forward declaration: we'll need this one later...
273 ***********************************************************************/
274
275 template<class charT, class traits>
276 struct regex_data;
277
278 template <class iterator, class charT, class traits_type, class char_classT>
279 iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
280 iterator last,
281 const re_set_long<char_classT>* set_,
282 const regex_data<charT, traits_type>& e, bool icase);
283
284 } // namespace re_detail
285
286 } // namespace boost
287
288 #ifdef BOOST_MSVC
289 #pragma warning(push)
290 #pragma warning(disable: 4103)
291 #endif
292 #ifdef BOOST_HAS_ABI_HEADERS
293 # include BOOST_ABI_SUFFIX
294 #endif
295 #ifdef BOOST_MSVC
296 #pragma warning(pop)
297 #endif
298
299 #endif
300
301