Mercurial > hg > vamp-build-and-test
comparison DEPENDENCIES/generic/include/boost/regex/v4/states.hpp @ 16:2665513ce2d3
Add boost headers
author | Chris Cannam |
---|---|
date | Tue, 05 Aug 2014 11:11:38 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
15:663ca0da4350 | 16:2665513ce2d3 |
---|---|
1 /* | |
2 * | |
3 * Copyright (c) 1998-2002 | |
4 * John Maddock | |
5 * | |
6 * Use, modification and distribution are subject to the | |
7 * Boost Software License, Version 1.0. (See accompanying file | |
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
9 * | |
10 */ | |
11 | |
12 /* | |
13 * LOCATION: see http://www.boost.org for most recent version. | |
14 * FILE states.cpp | |
15 * VERSION see <boost/version.hpp> | |
16 * DESCRIPTION: Declares internal state machine structures. | |
17 */ | |
18 | |
19 #ifndef BOOST_REGEX_V4_STATES_HPP | |
20 #define BOOST_REGEX_V4_STATES_HPP | |
21 | |
22 #ifdef BOOST_MSVC | |
23 #pragma warning(push) | |
24 #pragma warning(disable: 4103) | |
25 #endif | |
26 #ifdef BOOST_HAS_ABI_HEADERS | |
27 # include BOOST_ABI_PREFIX | |
28 #endif | |
29 #ifdef BOOST_MSVC | |
30 #pragma warning(pop) | |
31 #endif | |
32 | |
33 namespace boost{ | |
34 namespace re_detail{ | |
35 | |
36 /*** mask_type ******************************************************* | |
37 Whenever we have a choice of two alternatives, we use an array of bytes | |
38 to indicate which of the two alternatives it is possible to take for any | |
39 given input character. If mask_take is set, then we can take the next | |
40 state, and if mask_skip is set then we can take the alternative. | |
41 ***********************************************************************/ | |
42 enum mask_type | |
43 { | |
44 mask_take = 1, | |
45 mask_skip = 2, | |
46 mask_init = 4, | |
47 mask_any = mask_skip | mask_take, | |
48 mask_all = mask_any | |
49 }; | |
50 | |
51 /*** helpers ********************************************************** | |
52 These helpers let us use function overload resolution to detect whether | |
53 we have narrow or wide character strings: | |
54 ***********************************************************************/ | |
55 struct _narrow_type{}; | |
56 struct _wide_type{}; | |
57 template <class charT> struct is_byte; | |
58 template<> struct is_byte<char> { typedef _narrow_type width_type; }; | |
59 template<> struct is_byte<unsigned char>{ typedef _narrow_type width_type; }; | |
60 template<> struct is_byte<signed char> { typedef _narrow_type width_type; }; | |
61 template <class charT> struct is_byte { typedef _wide_type width_type; }; | |
62 | |
63 /*** enum syntax_element_type ****************************************** | |
64 Every record in the state machine falls into one of the following types: | |
65 ***********************************************************************/ | |
66 enum syntax_element_type | |
67 { | |
68 // start of a marked sub-expression, or perl-style (?...) extension | |
69 syntax_element_startmark = 0, | |
70 // end of a marked sub-expression, or perl-style (?...) extension | |
71 syntax_element_endmark = syntax_element_startmark + 1, | |
72 // any sequence of literal characters | |
73 syntax_element_literal = syntax_element_endmark + 1, | |
74 // start of line assertion: ^ | |
75 syntax_element_start_line = syntax_element_literal + 1, | |
76 // end of line assertion $ | |
77 syntax_element_end_line = syntax_element_start_line + 1, | |
78 // match any character: . | |
79 syntax_element_wild = syntax_element_end_line + 1, | |
80 // end of expression: we have a match when we get here | |
81 syntax_element_match = syntax_element_wild + 1, | |
82 // perl style word boundary: \b | |
83 syntax_element_word_boundary = syntax_element_match + 1, | |
84 // perl style within word boundary: \B | |
85 syntax_element_within_word = syntax_element_word_boundary + 1, | |
86 // start of word assertion: \< | |
87 syntax_element_word_start = syntax_element_within_word + 1, | |
88 // end of word assertion: \> | |
89 syntax_element_word_end = syntax_element_word_start + 1, | |
90 // start of buffer assertion: \` | |
91 syntax_element_buffer_start = syntax_element_word_end + 1, | |
92 // end of buffer assertion: \' | |
93 syntax_element_buffer_end = syntax_element_buffer_start + 1, | |
94 // backreference to previously matched sub-expression | |
95 syntax_element_backref = syntax_element_buffer_end + 1, | |
96 // either a wide character set [..] or one with multicharacter collating elements: | |
97 syntax_element_long_set = syntax_element_backref + 1, | |
98 // narrow character set: [...] | |
99 syntax_element_set = syntax_element_long_set + 1, | |
100 // jump to a new state in the machine: | |
101 syntax_element_jump = syntax_element_set + 1, | |
102 // choose between two production states: | |
103 syntax_element_alt = syntax_element_jump + 1, | |
104 // a repeat | |
105 syntax_element_rep = syntax_element_alt + 1, | |
106 // match a combining character sequence | |
107 syntax_element_combining = syntax_element_rep + 1, | |
108 // perl style soft buffer end: \z | |
109 syntax_element_soft_buffer_end = syntax_element_combining + 1, | |
110 // perl style continuation: \G | |
111 syntax_element_restart_continue = syntax_element_soft_buffer_end + 1, | |
112 // single character repeats: | |
113 syntax_element_dot_rep = syntax_element_restart_continue + 1, | |
114 syntax_element_char_rep = syntax_element_dot_rep + 1, | |
115 syntax_element_short_set_rep = syntax_element_char_rep + 1, | |
116 syntax_element_long_set_rep = syntax_element_short_set_rep + 1, | |
117 // a backstep for lookbehind repeats: | |
118 syntax_element_backstep = syntax_element_long_set_rep + 1, | |
119 // an assertion that a mark was matched: | |
120 syntax_element_assert_backref = syntax_element_backstep + 1, | |
121 syntax_element_toggle_case = syntax_element_assert_backref + 1, | |
122 // a recursive expression: | |
123 syntax_element_recurse = syntax_element_toggle_case + 1 | |
124 }; | |
125 | |
126 #ifdef BOOST_REGEX_DEBUG | |
127 // dwa 09/26/00 - This is needed to suppress warnings about an ambiguous conversion | |
128 std::ostream& operator<<(std::ostream&, syntax_element_type); | |
129 #endif | |
130 | |
131 struct re_syntax_base; | |
132 | |
133 /*** union offset_type ************************************************ | |
134 Points to another state in the machine. During machine construction | |
135 we use integral offsets, but these are converted to pointers before | |
136 execution of the machine. | |
137 ***********************************************************************/ | |
138 union offset_type | |
139 { | |
140 re_syntax_base* p; | |
141 std::ptrdiff_t i; | |
142 }; | |
143 | |
144 /*** struct re_syntax_base ******************************************** | |
145 Base class for all states in the machine. | |
146 ***********************************************************************/ | |
147 struct re_syntax_base | |
148 { | |
149 syntax_element_type type; // what kind of state this is | |
150 offset_type next; // next state in the machine | |
151 }; | |
152 | |
153 /*** struct re_brace ************************************************** | |
154 A marked parenthesis. | |
155 ***********************************************************************/ | |
156 struct re_brace : public re_syntax_base | |
157 { | |
158 // The index to match, can be zero (don't mark the sub-expression) | |
159 // or negative (for perl style (?...) extentions): | |
160 int index; | |
161 bool icase; | |
162 }; | |
163 | |
164 /*** struct re_dot ************************************************** | |
165 Match anything. | |
166 ***********************************************************************/ | |
167 enum | |
168 { | |
169 dont_care = 1, | |
170 force_not_newline = 0, | |
171 force_newline = 2, | |
172 | |
173 test_not_newline = 2, | |
174 test_newline = 3 | |
175 }; | |
176 struct re_dot : public re_syntax_base | |
177 { | |
178 unsigned char mask; | |
179 }; | |
180 | |
181 /*** struct re_literal ************************************************ | |
182 A string of literals, following this structure will be an | |
183 array of characters: charT[length] | |
184 ***********************************************************************/ | |
185 struct re_literal : public re_syntax_base | |
186 { | |
187 unsigned int length; | |
188 }; | |
189 | |
190 /*** struct re_case ************************************************ | |
191 Indicates whether we are moving to a case insensive block or not | |
192 ***********************************************************************/ | |
193 struct re_case : public re_syntax_base | |
194 { | |
195 bool icase; | |
196 }; | |
197 | |
198 /*** struct re_set_long *********************************************** | |
199 A wide character set of characters, following this structure will be | |
200 an array of type charT: | |
201 First csingles null-terminated strings | |
202 Then 2 * cranges NULL terminated strings | |
203 Then cequivalents NULL terminated strings | |
204 ***********************************************************************/ | |
205 template <class mask_type> | |
206 struct re_set_long : public re_syntax_base | |
207 { | |
208 unsigned int csingles, cranges, cequivalents; | |
209 mask_type cclasses; | |
210 mask_type cnclasses; | |
211 bool isnot; | |
212 bool singleton; | |
213 }; | |
214 | |
215 /*** struct re_set **************************************************** | |
216 A set of narrow-characters, matches any of _map which is none-zero | |
217 ***********************************************************************/ | |
218 struct re_set : public re_syntax_base | |
219 { | |
220 unsigned char _map[1 << CHAR_BIT]; | |
221 }; | |
222 | |
223 /*** struct re_jump *************************************************** | |
224 Jump to a new location in the machine (not next). | |
225 ***********************************************************************/ | |
226 struct re_jump : public re_syntax_base | |
227 { | |
228 offset_type alt; // location to jump to | |
229 }; | |
230 | |
231 /*** struct re_alt *************************************************** | |
232 Jump to a new location in the machine (possibly next). | |
233 ***********************************************************************/ | |
234 struct re_alt : public re_jump | |
235 { | |
236 unsigned char _map[1 << CHAR_BIT]; // which characters can take the jump | |
237 unsigned int can_be_null; // true if we match a NULL string | |
238 }; | |
239 | |
240 /*** struct re_repeat ************************************************* | |
241 Repeat a section of the machine | |
242 ***********************************************************************/ | |
243 struct re_repeat : public re_alt | |
244 { | |
245 std::size_t min, max; // min and max allowable repeats | |
246 int state_id; // Unique identifier for this repeat | |
247 bool leading; // True if this repeat is at the start of the machine (lets us optimize some searches) | |
248 bool greedy; // True if this is a greedy repeat | |
249 }; | |
250 | |
251 /*** struct re_recurse ************************************************ | |
252 Recurse to a particular subexpression. | |
253 **********************************************************************/ | |
254 struct re_recurse : public re_jump | |
255 { | |
256 int state_id; // identifier of first nested repeat within the recursion. | |
257 }; | |
258 | |
259 /*** enum re_jump_size_type ******************************************* | |
260 Provides compiled size of re_jump structure (allowing for trailing alignment). | |
261 We provide this so we know how manybytes to insert when constructing the machine | |
262 (The value of padding_mask is defined in regex_raw_buffer.hpp). | |
263 ***********************************************************************/ | |
264 enum re_jump_size_type | |
265 { | |
266 re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask), | |
267 re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask), | |
268 re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask) | |
269 }; | |
270 | |
271 /*** proc re_is_set_member ********************************************* | |
272 Forward declaration: we'll need this one later... | |
273 ***********************************************************************/ | |
274 | |
275 template<class charT, class traits> | |
276 struct regex_data; | |
277 | |
278 template <class iterator, class charT, class traits_type, class char_classT> | |
279 iterator BOOST_REGEX_CALL re_is_set_member(iterator next, | |
280 iterator last, | |
281 const re_set_long<char_classT>* set_, | |
282 const regex_data<charT, traits_type>& e, bool icase); | |
283 | |
284 } // namespace re_detail | |
285 | |
286 } // namespace boost | |
287 | |
288 #ifdef BOOST_MSVC | |
289 #pragma warning(push) | |
290 #pragma warning(disable: 4103) | |
291 #endif | |
292 #ifdef BOOST_HAS_ABI_HEADERS | |
293 # include BOOST_ABI_SUFFIX | |
294 #endif | |
295 #ifdef BOOST_MSVC | |
296 #pragma warning(pop) | |
297 #endif | |
298 | |
299 #endif | |
300 | |
301 |