annotate DEPENDENCIES/generic/include/boost/xpressive/regex_token_iterator.hpp @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)
author Chris Cannam
date Tue, 30 Jul 2019 12:25:44 +0100
parents c530137014c0
children
rev   line source
Chris@16 1 ///////////////////////////////////////////////////////////////////////////////
Chris@16 2 /// \file regex_token_iterator.hpp
Chris@16 3 /// Contains the definition of regex_token_iterator, and STL-compatible iterator
Chris@16 4 /// for tokenizing a string using a regular expression.
Chris@16 5 //
Chris@16 6 // Copyright 2008 Eric Niebler. Distributed under the Boost
Chris@16 7 // Software License, Version 1.0. (See accompanying file
Chris@16 8 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16 9
Chris@16 10 #ifndef BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005
Chris@16 11 #define BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005
Chris@16 12
Chris@16 13 // MS compatible compilers support #pragma once
Chris@101 14 #if defined(_MSC_VER)
Chris@16 15 # pragma once
Chris@16 16 #endif
Chris@16 17
Chris@16 18 #include <vector>
Chris@16 19 #include <boost/assert.hpp>
Chris@16 20 #include <boost/mpl/assert.hpp>
Chris@16 21 #include <boost/type_traits/is_same.hpp>
Chris@16 22 #include <boost/type_traits/is_convertible.hpp>
Chris@16 23 #include <boost/xpressive/regex_iterator.hpp>
Chris@16 24
Chris@16 25 namespace boost { namespace xpressive { namespace detail
Chris@16 26 {
Chris@16 27
Chris@16 28 //////////////////////////////////////////////////////////////////////////
Chris@16 29 // regex_token_iterator_impl
Chris@16 30 //
Chris@16 31 template<typename BidiIter>
Chris@16 32 struct regex_token_iterator_impl
Chris@16 33 : counted_base<regex_token_iterator_impl<BidiIter> >
Chris@16 34 {
Chris@16 35 typedef sub_match<BidiIter> value_type;
Chris@16 36
Chris@16 37 regex_token_iterator_impl
Chris@16 38 (
Chris@16 39 BidiIter begin
Chris@16 40 , BidiIter cur
Chris@16 41 , BidiIter end
Chris@16 42 , BidiIter next_search
Chris@16 43 , basic_regex<BidiIter> const &rex
Chris@16 44 , regex_constants::match_flag_type flags = regex_constants::match_default
Chris@16 45 , std::vector<int> subs = std::vector<int>(1, 0)
Chris@16 46 , int n = -2
Chris@16 47 , bool not_null = false
Chris@16 48 )
Chris@16 49 : iter_(begin, cur, end, next_search, rex, flags, not_null)
Chris@16 50 , result_()
Chris@16 51 , n_((-2 == n) ? (int)subs.size() - 1 : n)
Chris@16 52 , subs_()
Chris@16 53 {
Chris@16 54 BOOST_ASSERT(0 != subs.size());
Chris@16 55 this->subs_.swap(subs);
Chris@16 56 }
Chris@16 57
Chris@16 58 bool next()
Chris@16 59 {
Chris@16 60 if(-1 != this->n_)
Chris@16 61 {
Chris@16 62 BidiIter cur = this->iter_.state_.cur_;
Chris@16 63 if(0 != (++this->n_ %= (int)this->subs_.size()) || this->iter_.next())
Chris@16 64 {
Chris@16 65 this->result_ = (-1 == this->subs_[ this->n_ ])
Chris@16 66 ? this->iter_.what_.prefix()
Chris@16 67 : this->iter_.what_[ this->subs_[ this->n_ ] ];
Chris@16 68 return true;
Chris@16 69 }
Chris@16 70 else if(-1 == this->subs_[ this->n_-- ] && cur != this->iter_.state_.end_)
Chris@16 71 {
Chris@16 72 this->result_ = value_type(cur, this->iter_.state_.end_, true);
Chris@16 73 return true;
Chris@16 74 }
Chris@16 75 }
Chris@16 76
Chris@16 77 return false;
Chris@16 78 }
Chris@16 79
Chris@16 80 bool equal_to(regex_token_iterator_impl<BidiIter> const &that) const
Chris@16 81 {
Chris@16 82 return this->iter_.equal_to(that.iter_) && this->n_ == that.n_;
Chris@16 83 }
Chris@16 84
Chris@16 85 regex_iterator_impl<BidiIter> iter_;
Chris@16 86 value_type result_;
Chris@16 87 int n_;
Chris@16 88 std::vector<int> subs_;
Chris@16 89 };
Chris@16 90
Chris@16 91 inline int get_mark_number(int i)
Chris@16 92 {
Chris@16 93 return i;
Chris@16 94 }
Chris@16 95
Chris@16 96 inline std::vector<int> to_vector(int subs)
Chris@16 97 {
Chris@16 98 return std::vector<int>(1, subs);
Chris@16 99 }
Chris@16 100
Chris@16 101 inline std::vector<int> const &to_vector(std::vector<int> const &subs)
Chris@16 102 {
Chris@16 103 return subs;
Chris@16 104 }
Chris@16 105
Chris@16 106 template<typename Int, std::size_t Size>
Chris@16 107 inline std::vector<int> to_vector(Int const (&sub_matches)[ Size ])
Chris@16 108 {
Chris@16 109 // so that people can specify sub-match indices inline with
Chris@16 110 // string literals, like "\1\2\3", leave off the trailing '\0'
Chris@16 111 std::size_t const size = Size - is_same<Int, char>::value;
Chris@16 112 std::vector<int> vect(size);
Chris@16 113 for(std::size_t i = 0; i < size; ++i)
Chris@16 114 {
Chris@16 115 vect[i] = get_mark_number(sub_matches[i]);
Chris@16 116 }
Chris@16 117 return vect;
Chris@16 118 }
Chris@16 119
Chris@16 120 template<typename Int>
Chris@16 121 inline std::vector<int> to_vector(std::vector<Int> const &sub_matches)
Chris@16 122 {
Chris@16 123 BOOST_MPL_ASSERT((is_convertible<Int, int>));
Chris@16 124 return std::vector<int>(sub_matches.begin(), sub_matches.end());
Chris@16 125 }
Chris@16 126
Chris@16 127 } // namespace detail
Chris@16 128
Chris@16 129 //////////////////////////////////////////////////////////////////////////
Chris@16 130 // regex_token_iterator
Chris@16 131 //
Chris@16 132 template<typename BidiIter>
Chris@16 133 struct regex_token_iterator
Chris@16 134 {
Chris@16 135 typedef basic_regex<BidiIter> regex_type;
Chris@16 136 typedef typename iterator_value<BidiIter>::type char_type;
Chris@16 137 typedef sub_match<BidiIter> value_type;
Chris@16 138 typedef std::ptrdiff_t difference_type;
Chris@16 139 typedef value_type const *pointer;
Chris@16 140 typedef value_type const &reference;
Chris@16 141 typedef std::forward_iterator_tag iterator_category;
Chris@16 142
Chris@16 143 /// INTERNAL ONLY
Chris@16 144 typedef detail::regex_token_iterator_impl<BidiIter> impl_type_;
Chris@16 145
Chris@16 146 /// \post \c *this is the end of sequence iterator.
Chris@16 147 regex_token_iterator()
Chris@16 148 : impl_()
Chris@16 149 {
Chris@16 150 }
Chris@16 151
Chris@16 152 /// \param begin The beginning of the character range to search.
Chris@16 153 /// \param end The end of the character range to search.
Chris@16 154 /// \param rex The regex pattern to search for.
Chris@16 155 /// \pre \c [begin,end) is a valid range.
Chris@16 156 regex_token_iterator
Chris@16 157 (
Chris@16 158 BidiIter begin
Chris@16 159 , BidiIter end
Chris@16 160 , basic_regex<BidiIter> const &rex
Chris@16 161 )
Chris@16 162 : impl_()
Chris@16 163 {
Chris@16 164 if(0 != rex.regex_id())
Chris@16 165 {
Chris@16 166 this->impl_ = new impl_type_(begin, begin, end, begin, rex);
Chris@16 167 this->next_();
Chris@16 168 }
Chris@16 169 }
Chris@16 170
Chris@16 171 /// \param begin The beginning of the character range to search.
Chris@16 172 /// \param end The end of the character range to search.
Chris@16 173 /// \param rex The regex pattern to search for.
Chris@16 174 /// \param args A let() expression with argument bindings for semantic actions.
Chris@16 175 /// \pre \c [begin,end) is a valid range.
Chris@16 176 template<typename LetExpr>
Chris@16 177 regex_token_iterator
Chris@16 178 (
Chris@16 179 BidiIter begin
Chris@16 180 , BidiIter end
Chris@16 181 , basic_regex<BidiIter> const &rex
Chris@16 182 , detail::let_<LetExpr> const &args
Chris@16 183 )
Chris@16 184 : impl_()
Chris@16 185 {
Chris@16 186 if(0 != rex.regex_id())
Chris@16 187 {
Chris@16 188 this->impl_ = new impl_type_(begin, begin, end, begin, rex);
Chris@16 189 detail::bind_args(args, this->impl_->iter_.what_);
Chris@16 190 this->next_();
Chris@16 191 }
Chris@16 192 }
Chris@16 193
Chris@16 194 /// \param begin The beginning of the character range to search.
Chris@16 195 /// \param end The end of the character range to search.
Chris@16 196 /// \param rex The regex pattern to search for.
Chris@16 197 /// \param subs A range of integers designating sub-matches to be treated as tokens.
Chris@16 198 /// \param flags Optional match flags, used to control how the expression is matched against the sequence. (See match_flag_type.)
Chris@16 199 /// \pre \c [begin,end) is a valid range.
Chris@16 200 /// \pre \c subs is either an integer greater or equal to -1,
Chris@16 201 /// or else an array or non-empty \c std::vector\<\> of such integers.
Chris@16 202 template<typename Subs>
Chris@16 203 regex_token_iterator
Chris@16 204 (
Chris@16 205 BidiIter begin
Chris@16 206 , BidiIter end
Chris@16 207 , basic_regex<BidiIter> const &rex
Chris@16 208 , Subs const &subs
Chris@16 209 , regex_constants::match_flag_type flags = regex_constants::match_default
Chris@16 210 )
Chris@16 211 : impl_()
Chris@16 212 {
Chris@16 213 if(0 != rex.regex_id())
Chris@16 214 {
Chris@16 215 this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags, detail::to_vector(subs));
Chris@16 216 this->next_();
Chris@16 217 }
Chris@16 218 }
Chris@16 219
Chris@16 220 /// \param begin The beginning of the character range to search.
Chris@16 221 /// \param end The end of the character range to search.
Chris@16 222 /// \param rex The regex pattern to search for.
Chris@16 223 /// \param subs A range of integers designating sub-matches to be treated as tokens.
Chris@16 224 /// \param args A let() expression with argument bindings for semantic actions.
Chris@16 225 /// \param flags Optional match flags, used to control how the expression is matched against the sequence. (See match_flag_type.)
Chris@16 226 /// \pre \c [begin,end) is a valid range.
Chris@16 227 /// \pre \c subs is either an integer greater or equal to -1,
Chris@16 228 /// or else an array or non-empty \c std::vector\<\> of such integers.
Chris@16 229 template<typename Subs, typename LetExpr>
Chris@16 230 regex_token_iterator
Chris@16 231 (
Chris@16 232 BidiIter begin
Chris@16 233 , BidiIter end
Chris@16 234 , basic_regex<BidiIter> const &rex
Chris@16 235 , Subs const &subs
Chris@16 236 , detail::let_<LetExpr> const &args
Chris@16 237 , regex_constants::match_flag_type flags = regex_constants::match_default
Chris@16 238 )
Chris@16 239 : impl_()
Chris@16 240 {
Chris@16 241 if(0 != rex.regex_id())
Chris@16 242 {
Chris@16 243 this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags, detail::to_vector(subs));
Chris@16 244 detail::bind_args(args, this->impl_->iter_.what_);
Chris@16 245 this->next_();
Chris@16 246 }
Chris@16 247 }
Chris@16 248
Chris@16 249 /// \post <tt>*this == that</tt>
Chris@16 250 regex_token_iterator(regex_token_iterator<BidiIter> const &that)
Chris@16 251 : impl_(that.impl_) // COW
Chris@16 252 {
Chris@16 253 }
Chris@16 254
Chris@16 255 /// \post <tt>*this == that</tt>
Chris@16 256 regex_token_iterator<BidiIter> &operator =(regex_token_iterator<BidiIter> const &that)
Chris@16 257 {
Chris@16 258 this->impl_ = that.impl_; // COW
Chris@16 259 return *this;
Chris@16 260 }
Chris@16 261
Chris@16 262 friend bool operator ==(regex_token_iterator<BidiIter> const &left, regex_token_iterator<BidiIter> const &right)
Chris@16 263 {
Chris@16 264 if(!left.impl_ || !right.impl_)
Chris@16 265 {
Chris@16 266 return !left.impl_ && !right.impl_;
Chris@16 267 }
Chris@16 268
Chris@16 269 return left.impl_->equal_to(*right.impl_);
Chris@16 270 }
Chris@16 271
Chris@16 272 friend bool operator !=(regex_token_iterator<BidiIter> const &left, regex_token_iterator<BidiIter> const &right)
Chris@16 273 {
Chris@16 274 return !(left == right);
Chris@16 275 }
Chris@16 276
Chris@16 277 value_type const &operator *() const
Chris@16 278 {
Chris@16 279 return this->impl_->result_;
Chris@16 280 }
Chris@16 281
Chris@16 282 value_type const *operator ->() const
Chris@16 283 {
Chris@16 284 return &this->impl_->result_;
Chris@16 285 }
Chris@16 286
Chris@16 287 /// If N == -1 then sets *this equal to the end of sequence iterator.
Chris@16 288 /// Otherwise if N+1 \< subs.size(), then increments N and sets result equal to
Chris@16 289 /// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())).
Chris@16 290 /// Otherwise if what.prefix().first != what[0].second and if the element match_prev_avail is
Chris@16 291 /// not set in flags then sets it. Then locates the next match as if by calling
Chris@16 292 /// regex_search(what[0].second, end, what, *pre, flags), with the following variation:
Chris@16 293 /// in the event that the previous match found was of zero length (what[0].length() == 0)
Chris@16 294 /// then attempts to find a non-zero length match starting at what[0].second, only if that
Chris@16 295 /// fails and provided what[0].second != suffix().second does it look for a (possibly zero
Chris@16 296 /// length) match starting from what[0].second + 1. If such a match is found then sets N
Chris@16 297 /// equal to zero, and sets result equal to
Chris@16 298 /// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())).
Chris@16 299 /// Otherwise if no further matches were found, then let last_end be the endpoint of the last
Chris@16 300 /// match that was found. Then if last_end != end and subs[0] == -1 sets N equal to -1 and
Chris@16 301 /// sets result equal to value_type(last_end, end). Otherwise sets *this equal to the end
Chris@16 302 /// of sequence iterator.
Chris@16 303 regex_token_iterator<BidiIter> &operator ++()
Chris@16 304 {
Chris@16 305 this->fork_(); // un-share the implementation
Chris@16 306 this->next_();
Chris@16 307 return *this;
Chris@16 308 }
Chris@16 309
Chris@16 310 regex_token_iterator<BidiIter> operator ++(int)
Chris@16 311 {
Chris@16 312 regex_token_iterator<BidiIter> tmp(*this);
Chris@16 313 ++*this;
Chris@16 314 return tmp;
Chris@16 315 }
Chris@16 316
Chris@16 317 private:
Chris@16 318
Chris@16 319 /// INTERNAL ONLY
Chris@16 320 void fork_()
Chris@16 321 {
Chris@16 322 if(1 != this->impl_->use_count())
Chris@16 323 {
Chris@16 324 intrusive_ptr<impl_type_> clone = new impl_type_
Chris@16 325 (
Chris@16 326 this->impl_->iter_.state_.begin_
Chris@16 327 , this->impl_->iter_.state_.cur_
Chris@16 328 , this->impl_->iter_.state_.end_
Chris@16 329 , this->impl_->iter_.state_.next_search_
Chris@16 330 , this->impl_->iter_.rex_
Chris@16 331 , this->impl_->iter_.flags_
Chris@16 332 , this->impl_->subs_
Chris@16 333 , this->impl_->n_
Chris@16 334 , this->impl_->iter_.not_null_
Chris@16 335 );
Chris@16 336
Chris@16 337 // only copy the match_results struct if we have to. Note: if the next call
Chris@16 338 // to impl_->next() will return false or call regex_search, we don't need to
Chris@16 339 // copy the match_results struct.
Chris@16 340 if(-1 != this->impl_->n_ && this->impl_->n_ + 1 != static_cast<int>(this->impl_->subs_.size()))
Chris@16 341 {
Chris@16 342 // BUGBUG This is expensive -- it causes the sequence_stack to be cleared.
Chris@16 343 // Find a better way
Chris@16 344 clone->iter_.what_ = this->impl_->iter_.what_;
Chris@16 345 }
Chris@16 346 else
Chris@16 347 {
Chris@16 348 // At the very least, copy the action args
Chris@16 349 detail::core_access<BidiIter>::get_action_args(clone->iter_.what_)
Chris@16 350 = detail::core_access<BidiIter>::get_action_args(this->impl_->iter_.what_);
Chris@16 351 }
Chris@16 352
Chris@16 353 this->impl_.swap(clone);
Chris@16 354 }
Chris@16 355 }
Chris@16 356
Chris@16 357 /// INTERNAL ONLY
Chris@16 358 void next_()
Chris@16 359 {
Chris@16 360 BOOST_ASSERT(this->impl_ && 1 == this->impl_->use_count());
Chris@16 361 if(!this->impl_->next())
Chris@16 362 {
Chris@16 363 this->impl_ = 0;
Chris@16 364 }
Chris@16 365 }
Chris@16 366
Chris@16 367 intrusive_ptr<impl_type_> impl_;
Chris@16 368 };
Chris@16 369
Chris@16 370 }} // namespace boost::xpressive
Chris@16 371
Chris@16 372 #endif