Chris@16
|
1 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
2 /// \file regex_token_iterator.hpp
|
Chris@16
|
3 /// Contains the definition of regex_token_iterator, and STL-compatible iterator
|
Chris@16
|
4 /// for tokenizing a string using a regular expression.
|
Chris@16
|
5 //
|
Chris@16
|
6 // Copyright 2008 Eric Niebler. Distributed under the Boost
|
Chris@16
|
7 // Software License, Version 1.0. (See accompanying file
|
Chris@16
|
8 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
9
|
Chris@16
|
10 #ifndef BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005
|
Chris@16
|
11 #define BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005
|
Chris@16
|
12
|
Chris@16
|
13 // MS compatible compilers support #pragma once
|
Chris@101
|
14 #if defined(_MSC_VER)
|
Chris@16
|
15 # pragma once
|
Chris@16
|
16 #endif
|
Chris@16
|
17
|
Chris@16
|
18 #include <vector>
|
Chris@16
|
19 #include <boost/assert.hpp>
|
Chris@16
|
20 #include <boost/mpl/assert.hpp>
|
Chris@16
|
21 #include <boost/type_traits/is_same.hpp>
|
Chris@16
|
22 #include <boost/type_traits/is_convertible.hpp>
|
Chris@16
|
23 #include <boost/xpressive/regex_iterator.hpp>
|
Chris@16
|
24
|
Chris@16
|
25 namespace boost { namespace xpressive { namespace detail
|
Chris@16
|
26 {
|
Chris@16
|
27
|
Chris@16
|
28 //////////////////////////////////////////////////////////////////////////
|
Chris@16
|
29 // regex_token_iterator_impl
|
Chris@16
|
30 //
|
Chris@16
|
31 template<typename BidiIter>
|
Chris@16
|
32 struct regex_token_iterator_impl
|
Chris@16
|
33 : counted_base<regex_token_iterator_impl<BidiIter> >
|
Chris@16
|
34 {
|
Chris@16
|
35 typedef sub_match<BidiIter> value_type;
|
Chris@16
|
36
|
Chris@16
|
37 regex_token_iterator_impl
|
Chris@16
|
38 (
|
Chris@16
|
39 BidiIter begin
|
Chris@16
|
40 , BidiIter cur
|
Chris@16
|
41 , BidiIter end
|
Chris@16
|
42 , BidiIter next_search
|
Chris@16
|
43 , basic_regex<BidiIter> const &rex
|
Chris@16
|
44 , regex_constants::match_flag_type flags = regex_constants::match_default
|
Chris@16
|
45 , std::vector<int> subs = std::vector<int>(1, 0)
|
Chris@16
|
46 , int n = -2
|
Chris@16
|
47 , bool not_null = false
|
Chris@16
|
48 )
|
Chris@16
|
49 : iter_(begin, cur, end, next_search, rex, flags, not_null)
|
Chris@16
|
50 , result_()
|
Chris@16
|
51 , n_((-2 == n) ? (int)subs.size() - 1 : n)
|
Chris@16
|
52 , subs_()
|
Chris@16
|
53 {
|
Chris@16
|
54 BOOST_ASSERT(0 != subs.size());
|
Chris@16
|
55 this->subs_.swap(subs);
|
Chris@16
|
56 }
|
Chris@16
|
57
|
Chris@16
|
58 bool next()
|
Chris@16
|
59 {
|
Chris@16
|
60 if(-1 != this->n_)
|
Chris@16
|
61 {
|
Chris@16
|
62 BidiIter cur = this->iter_.state_.cur_;
|
Chris@16
|
63 if(0 != (++this->n_ %= (int)this->subs_.size()) || this->iter_.next())
|
Chris@16
|
64 {
|
Chris@16
|
65 this->result_ = (-1 == this->subs_[ this->n_ ])
|
Chris@16
|
66 ? this->iter_.what_.prefix()
|
Chris@16
|
67 : this->iter_.what_[ this->subs_[ this->n_ ] ];
|
Chris@16
|
68 return true;
|
Chris@16
|
69 }
|
Chris@16
|
70 else if(-1 == this->subs_[ this->n_-- ] && cur != this->iter_.state_.end_)
|
Chris@16
|
71 {
|
Chris@16
|
72 this->result_ = value_type(cur, this->iter_.state_.end_, true);
|
Chris@16
|
73 return true;
|
Chris@16
|
74 }
|
Chris@16
|
75 }
|
Chris@16
|
76
|
Chris@16
|
77 return false;
|
Chris@16
|
78 }
|
Chris@16
|
79
|
Chris@16
|
80 bool equal_to(regex_token_iterator_impl<BidiIter> const &that) const
|
Chris@16
|
81 {
|
Chris@16
|
82 return this->iter_.equal_to(that.iter_) && this->n_ == that.n_;
|
Chris@16
|
83 }
|
Chris@16
|
84
|
Chris@16
|
85 regex_iterator_impl<BidiIter> iter_;
|
Chris@16
|
86 value_type result_;
|
Chris@16
|
87 int n_;
|
Chris@16
|
88 std::vector<int> subs_;
|
Chris@16
|
89 };
|
Chris@16
|
90
|
Chris@16
|
91 inline int get_mark_number(int i)
|
Chris@16
|
92 {
|
Chris@16
|
93 return i;
|
Chris@16
|
94 }
|
Chris@16
|
95
|
Chris@16
|
96 inline std::vector<int> to_vector(int subs)
|
Chris@16
|
97 {
|
Chris@16
|
98 return std::vector<int>(1, subs);
|
Chris@16
|
99 }
|
Chris@16
|
100
|
Chris@16
|
101 inline std::vector<int> const &to_vector(std::vector<int> const &subs)
|
Chris@16
|
102 {
|
Chris@16
|
103 return subs;
|
Chris@16
|
104 }
|
Chris@16
|
105
|
Chris@16
|
106 template<typename Int, std::size_t Size>
|
Chris@16
|
107 inline std::vector<int> to_vector(Int const (&sub_matches)[ Size ])
|
Chris@16
|
108 {
|
Chris@16
|
109 // so that people can specify sub-match indices inline with
|
Chris@16
|
110 // string literals, like "\1\2\3", leave off the trailing '\0'
|
Chris@16
|
111 std::size_t const size = Size - is_same<Int, char>::value;
|
Chris@16
|
112 std::vector<int> vect(size);
|
Chris@16
|
113 for(std::size_t i = 0; i < size; ++i)
|
Chris@16
|
114 {
|
Chris@16
|
115 vect[i] = get_mark_number(sub_matches[i]);
|
Chris@16
|
116 }
|
Chris@16
|
117 return vect;
|
Chris@16
|
118 }
|
Chris@16
|
119
|
Chris@16
|
120 template<typename Int>
|
Chris@16
|
121 inline std::vector<int> to_vector(std::vector<Int> const &sub_matches)
|
Chris@16
|
122 {
|
Chris@16
|
123 BOOST_MPL_ASSERT((is_convertible<Int, int>));
|
Chris@16
|
124 return std::vector<int>(sub_matches.begin(), sub_matches.end());
|
Chris@16
|
125 }
|
Chris@16
|
126
|
Chris@16
|
127 } // namespace detail
|
Chris@16
|
128
|
Chris@16
|
129 //////////////////////////////////////////////////////////////////////////
|
Chris@16
|
130 // regex_token_iterator
|
Chris@16
|
131 //
|
Chris@16
|
132 template<typename BidiIter>
|
Chris@16
|
133 struct regex_token_iterator
|
Chris@16
|
134 {
|
Chris@16
|
135 typedef basic_regex<BidiIter> regex_type;
|
Chris@16
|
136 typedef typename iterator_value<BidiIter>::type char_type;
|
Chris@16
|
137 typedef sub_match<BidiIter> value_type;
|
Chris@16
|
138 typedef std::ptrdiff_t difference_type;
|
Chris@16
|
139 typedef value_type const *pointer;
|
Chris@16
|
140 typedef value_type const &reference;
|
Chris@16
|
141 typedef std::forward_iterator_tag iterator_category;
|
Chris@16
|
142
|
Chris@16
|
143 /// INTERNAL ONLY
|
Chris@16
|
144 typedef detail::regex_token_iterator_impl<BidiIter> impl_type_;
|
Chris@16
|
145
|
Chris@16
|
146 /// \post \c *this is the end of sequence iterator.
|
Chris@16
|
147 regex_token_iterator()
|
Chris@16
|
148 : impl_()
|
Chris@16
|
149 {
|
Chris@16
|
150 }
|
Chris@16
|
151
|
Chris@16
|
152 /// \param begin The beginning of the character range to search.
|
Chris@16
|
153 /// \param end The end of the character range to search.
|
Chris@16
|
154 /// \param rex The regex pattern to search for.
|
Chris@16
|
155 /// \pre \c [begin,end) is a valid range.
|
Chris@16
|
156 regex_token_iterator
|
Chris@16
|
157 (
|
Chris@16
|
158 BidiIter begin
|
Chris@16
|
159 , BidiIter end
|
Chris@16
|
160 , basic_regex<BidiIter> const &rex
|
Chris@16
|
161 )
|
Chris@16
|
162 : impl_()
|
Chris@16
|
163 {
|
Chris@16
|
164 if(0 != rex.regex_id())
|
Chris@16
|
165 {
|
Chris@16
|
166 this->impl_ = new impl_type_(begin, begin, end, begin, rex);
|
Chris@16
|
167 this->next_();
|
Chris@16
|
168 }
|
Chris@16
|
169 }
|
Chris@16
|
170
|
Chris@16
|
171 /// \param begin The beginning of the character range to search.
|
Chris@16
|
172 /// \param end The end of the character range to search.
|
Chris@16
|
173 /// \param rex The regex pattern to search for.
|
Chris@16
|
174 /// \param args A let() expression with argument bindings for semantic actions.
|
Chris@16
|
175 /// \pre \c [begin,end) is a valid range.
|
Chris@16
|
176 template<typename LetExpr>
|
Chris@16
|
177 regex_token_iterator
|
Chris@16
|
178 (
|
Chris@16
|
179 BidiIter begin
|
Chris@16
|
180 , BidiIter end
|
Chris@16
|
181 , basic_regex<BidiIter> const &rex
|
Chris@16
|
182 , detail::let_<LetExpr> const &args
|
Chris@16
|
183 )
|
Chris@16
|
184 : impl_()
|
Chris@16
|
185 {
|
Chris@16
|
186 if(0 != rex.regex_id())
|
Chris@16
|
187 {
|
Chris@16
|
188 this->impl_ = new impl_type_(begin, begin, end, begin, rex);
|
Chris@16
|
189 detail::bind_args(args, this->impl_->iter_.what_);
|
Chris@16
|
190 this->next_();
|
Chris@16
|
191 }
|
Chris@16
|
192 }
|
Chris@16
|
193
|
Chris@16
|
194 /// \param begin The beginning of the character range to search.
|
Chris@16
|
195 /// \param end The end of the character range to search.
|
Chris@16
|
196 /// \param rex The regex pattern to search for.
|
Chris@16
|
197 /// \param subs A range of integers designating sub-matches to be treated as tokens.
|
Chris@16
|
198 /// \param flags Optional match flags, used to control how the expression is matched against the sequence. (See match_flag_type.)
|
Chris@16
|
199 /// \pre \c [begin,end) is a valid range.
|
Chris@16
|
200 /// \pre \c subs is either an integer greater or equal to -1,
|
Chris@16
|
201 /// or else an array or non-empty \c std::vector\<\> of such integers.
|
Chris@16
|
202 template<typename Subs>
|
Chris@16
|
203 regex_token_iterator
|
Chris@16
|
204 (
|
Chris@16
|
205 BidiIter begin
|
Chris@16
|
206 , BidiIter end
|
Chris@16
|
207 , basic_regex<BidiIter> const &rex
|
Chris@16
|
208 , Subs const &subs
|
Chris@16
|
209 , regex_constants::match_flag_type flags = regex_constants::match_default
|
Chris@16
|
210 )
|
Chris@16
|
211 : impl_()
|
Chris@16
|
212 {
|
Chris@16
|
213 if(0 != rex.regex_id())
|
Chris@16
|
214 {
|
Chris@16
|
215 this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags, detail::to_vector(subs));
|
Chris@16
|
216 this->next_();
|
Chris@16
|
217 }
|
Chris@16
|
218 }
|
Chris@16
|
219
|
Chris@16
|
220 /// \param begin The beginning of the character range to search.
|
Chris@16
|
221 /// \param end The end of the character range to search.
|
Chris@16
|
222 /// \param rex The regex pattern to search for.
|
Chris@16
|
223 /// \param subs A range of integers designating sub-matches to be treated as tokens.
|
Chris@16
|
224 /// \param args A let() expression with argument bindings for semantic actions.
|
Chris@16
|
225 /// \param flags Optional match flags, used to control how the expression is matched against the sequence. (See match_flag_type.)
|
Chris@16
|
226 /// \pre \c [begin,end) is a valid range.
|
Chris@16
|
227 /// \pre \c subs is either an integer greater or equal to -1,
|
Chris@16
|
228 /// or else an array or non-empty \c std::vector\<\> of such integers.
|
Chris@16
|
229 template<typename Subs, typename LetExpr>
|
Chris@16
|
230 regex_token_iterator
|
Chris@16
|
231 (
|
Chris@16
|
232 BidiIter begin
|
Chris@16
|
233 , BidiIter end
|
Chris@16
|
234 , basic_regex<BidiIter> const &rex
|
Chris@16
|
235 , Subs const &subs
|
Chris@16
|
236 , detail::let_<LetExpr> const &args
|
Chris@16
|
237 , regex_constants::match_flag_type flags = regex_constants::match_default
|
Chris@16
|
238 )
|
Chris@16
|
239 : impl_()
|
Chris@16
|
240 {
|
Chris@16
|
241 if(0 != rex.regex_id())
|
Chris@16
|
242 {
|
Chris@16
|
243 this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags, detail::to_vector(subs));
|
Chris@16
|
244 detail::bind_args(args, this->impl_->iter_.what_);
|
Chris@16
|
245 this->next_();
|
Chris@16
|
246 }
|
Chris@16
|
247 }
|
Chris@16
|
248
|
Chris@16
|
249 /// \post <tt>*this == that</tt>
|
Chris@16
|
250 regex_token_iterator(regex_token_iterator<BidiIter> const &that)
|
Chris@16
|
251 : impl_(that.impl_) // COW
|
Chris@16
|
252 {
|
Chris@16
|
253 }
|
Chris@16
|
254
|
Chris@16
|
255 /// \post <tt>*this == that</tt>
|
Chris@16
|
256 regex_token_iterator<BidiIter> &operator =(regex_token_iterator<BidiIter> const &that)
|
Chris@16
|
257 {
|
Chris@16
|
258 this->impl_ = that.impl_; // COW
|
Chris@16
|
259 return *this;
|
Chris@16
|
260 }
|
Chris@16
|
261
|
Chris@16
|
262 friend bool operator ==(regex_token_iterator<BidiIter> const &left, regex_token_iterator<BidiIter> const &right)
|
Chris@16
|
263 {
|
Chris@16
|
264 if(!left.impl_ || !right.impl_)
|
Chris@16
|
265 {
|
Chris@16
|
266 return !left.impl_ && !right.impl_;
|
Chris@16
|
267 }
|
Chris@16
|
268
|
Chris@16
|
269 return left.impl_->equal_to(*right.impl_);
|
Chris@16
|
270 }
|
Chris@16
|
271
|
Chris@16
|
272 friend bool operator !=(regex_token_iterator<BidiIter> const &left, regex_token_iterator<BidiIter> const &right)
|
Chris@16
|
273 {
|
Chris@16
|
274 return !(left == right);
|
Chris@16
|
275 }
|
Chris@16
|
276
|
Chris@16
|
277 value_type const &operator *() const
|
Chris@16
|
278 {
|
Chris@16
|
279 return this->impl_->result_;
|
Chris@16
|
280 }
|
Chris@16
|
281
|
Chris@16
|
282 value_type const *operator ->() const
|
Chris@16
|
283 {
|
Chris@16
|
284 return &this->impl_->result_;
|
Chris@16
|
285 }
|
Chris@16
|
286
|
Chris@16
|
287 /// If N == -1 then sets *this equal to the end of sequence iterator.
|
Chris@16
|
288 /// Otherwise if N+1 \< subs.size(), then increments N and sets result equal to
|
Chris@16
|
289 /// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())).
|
Chris@16
|
290 /// Otherwise if what.prefix().first != what[0].second and if the element match_prev_avail is
|
Chris@16
|
291 /// not set in flags then sets it. Then locates the next match as if by calling
|
Chris@16
|
292 /// regex_search(what[0].second, end, what, *pre, flags), with the following variation:
|
Chris@16
|
293 /// in the event that the previous match found was of zero length (what[0].length() == 0)
|
Chris@16
|
294 /// then attempts to find a non-zero length match starting at what[0].second, only if that
|
Chris@16
|
295 /// fails and provided what[0].second != suffix().second does it look for a (possibly zero
|
Chris@16
|
296 /// length) match starting from what[0].second + 1. If such a match is found then sets N
|
Chris@16
|
297 /// equal to zero, and sets result equal to
|
Chris@16
|
298 /// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())).
|
Chris@16
|
299 /// Otherwise if no further matches were found, then let last_end be the endpoint of the last
|
Chris@16
|
300 /// match that was found. Then if last_end != end and subs[0] == -1 sets N equal to -1 and
|
Chris@16
|
301 /// sets result equal to value_type(last_end, end). Otherwise sets *this equal to the end
|
Chris@16
|
302 /// of sequence iterator.
|
Chris@16
|
303 regex_token_iterator<BidiIter> &operator ++()
|
Chris@16
|
304 {
|
Chris@16
|
305 this->fork_(); // un-share the implementation
|
Chris@16
|
306 this->next_();
|
Chris@16
|
307 return *this;
|
Chris@16
|
308 }
|
Chris@16
|
309
|
Chris@16
|
310 regex_token_iterator<BidiIter> operator ++(int)
|
Chris@16
|
311 {
|
Chris@16
|
312 regex_token_iterator<BidiIter> tmp(*this);
|
Chris@16
|
313 ++*this;
|
Chris@16
|
314 return tmp;
|
Chris@16
|
315 }
|
Chris@16
|
316
|
Chris@16
|
317 private:
|
Chris@16
|
318
|
Chris@16
|
319 /// INTERNAL ONLY
|
Chris@16
|
320 void fork_()
|
Chris@16
|
321 {
|
Chris@16
|
322 if(1 != this->impl_->use_count())
|
Chris@16
|
323 {
|
Chris@16
|
324 intrusive_ptr<impl_type_> clone = new impl_type_
|
Chris@16
|
325 (
|
Chris@16
|
326 this->impl_->iter_.state_.begin_
|
Chris@16
|
327 , this->impl_->iter_.state_.cur_
|
Chris@16
|
328 , this->impl_->iter_.state_.end_
|
Chris@16
|
329 , this->impl_->iter_.state_.next_search_
|
Chris@16
|
330 , this->impl_->iter_.rex_
|
Chris@16
|
331 , this->impl_->iter_.flags_
|
Chris@16
|
332 , this->impl_->subs_
|
Chris@16
|
333 , this->impl_->n_
|
Chris@16
|
334 , this->impl_->iter_.not_null_
|
Chris@16
|
335 );
|
Chris@16
|
336
|
Chris@16
|
337 // only copy the match_results struct if we have to. Note: if the next call
|
Chris@16
|
338 // to impl_->next() will return false or call regex_search, we don't need to
|
Chris@16
|
339 // copy the match_results struct.
|
Chris@16
|
340 if(-1 != this->impl_->n_ && this->impl_->n_ + 1 != static_cast<int>(this->impl_->subs_.size()))
|
Chris@16
|
341 {
|
Chris@16
|
342 // BUGBUG This is expensive -- it causes the sequence_stack to be cleared.
|
Chris@16
|
343 // Find a better way
|
Chris@16
|
344 clone->iter_.what_ = this->impl_->iter_.what_;
|
Chris@16
|
345 }
|
Chris@16
|
346 else
|
Chris@16
|
347 {
|
Chris@16
|
348 // At the very least, copy the action args
|
Chris@16
|
349 detail::core_access<BidiIter>::get_action_args(clone->iter_.what_)
|
Chris@16
|
350 = detail::core_access<BidiIter>::get_action_args(this->impl_->iter_.what_);
|
Chris@16
|
351 }
|
Chris@16
|
352
|
Chris@16
|
353 this->impl_.swap(clone);
|
Chris@16
|
354 }
|
Chris@16
|
355 }
|
Chris@16
|
356
|
Chris@16
|
357 /// INTERNAL ONLY
|
Chris@16
|
358 void next_()
|
Chris@16
|
359 {
|
Chris@16
|
360 BOOST_ASSERT(this->impl_ && 1 == this->impl_->use_count());
|
Chris@16
|
361 if(!this->impl_->next())
|
Chris@16
|
362 {
|
Chris@16
|
363 this->impl_ = 0;
|
Chris@16
|
364 }
|
Chris@16
|
365 }
|
Chris@16
|
366
|
Chris@16
|
367 intrusive_ptr<impl_type_> impl_;
|
Chris@16
|
368 };
|
Chris@16
|
369
|
Chris@16
|
370 }} // namespace boost::xpressive
|
Chris@16
|
371
|
Chris@16
|
372 #endif
|