Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \file regex_token_iterator.hpp Chris@16: /// Contains the definition of regex_token_iterator, and STL-compatible iterator Chris@16: /// for tokenizing a string using a regular expression. Chris@16: // Chris@16: // Copyright 2008 Eric Niebler. Distributed under the Boost Chris@16: // Software License, Version 1.0. (See accompanying file Chris@16: // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Chris@16: Chris@16: #ifndef BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005 Chris@16: #define BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005 Chris@16: Chris@16: // MS compatible compilers support #pragma once Chris@101: #if defined(_MSC_VER) Chris@16: # pragma once Chris@16: #endif Chris@16: Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: Chris@16: namespace boost { namespace xpressive { namespace detail Chris@16: { Chris@16: Chris@16: ////////////////////////////////////////////////////////////////////////// Chris@16: // regex_token_iterator_impl Chris@16: // Chris@16: template Chris@16: struct regex_token_iterator_impl Chris@16: : counted_base > Chris@16: { Chris@16: typedef sub_match value_type; Chris@16: Chris@16: regex_token_iterator_impl Chris@16: ( Chris@16: BidiIter begin Chris@16: , BidiIter cur Chris@16: , BidiIter end Chris@16: , BidiIter next_search Chris@16: , basic_regex const &rex Chris@16: , regex_constants::match_flag_type flags = regex_constants::match_default Chris@16: , std::vector subs = std::vector(1, 0) Chris@16: , int n = -2 Chris@16: , bool not_null = false Chris@16: ) Chris@16: : iter_(begin, cur, end, next_search, rex, flags, not_null) Chris@16: , result_() Chris@16: , n_((-2 == n) ? (int)subs.size() - 1 : n) Chris@16: , subs_() Chris@16: { Chris@16: BOOST_ASSERT(0 != subs.size()); Chris@16: this->subs_.swap(subs); Chris@16: } Chris@16: Chris@16: bool next() Chris@16: { Chris@16: if(-1 != this->n_) Chris@16: { Chris@16: BidiIter cur = this->iter_.state_.cur_; Chris@16: if(0 != (++this->n_ %= (int)this->subs_.size()) || this->iter_.next()) Chris@16: { Chris@16: this->result_ = (-1 == this->subs_[ this->n_ ]) Chris@16: ? this->iter_.what_.prefix() Chris@16: : this->iter_.what_[ this->subs_[ this->n_ ] ]; Chris@16: return true; Chris@16: } Chris@16: else if(-1 == this->subs_[ this->n_-- ] && cur != this->iter_.state_.end_) Chris@16: { Chris@16: this->result_ = value_type(cur, this->iter_.state_.end_, true); Chris@16: return true; Chris@16: } Chris@16: } Chris@16: Chris@16: return false; Chris@16: } Chris@16: Chris@16: bool equal_to(regex_token_iterator_impl const &that) const Chris@16: { Chris@16: return this->iter_.equal_to(that.iter_) && this->n_ == that.n_; Chris@16: } Chris@16: Chris@16: regex_iterator_impl iter_; Chris@16: value_type result_; Chris@16: int n_; Chris@16: std::vector subs_; Chris@16: }; Chris@16: Chris@16: inline int get_mark_number(int i) Chris@16: { Chris@16: return i; Chris@16: } Chris@16: Chris@16: inline std::vector to_vector(int subs) Chris@16: { Chris@16: return std::vector(1, subs); Chris@16: } Chris@16: Chris@16: inline std::vector const &to_vector(std::vector const &subs) Chris@16: { Chris@16: return subs; Chris@16: } Chris@16: Chris@16: template Chris@16: inline std::vector to_vector(Int const (&sub_matches)[ Size ]) Chris@16: { Chris@16: // so that people can specify sub-match indices inline with Chris@16: // string literals, like "\1\2\3", leave off the trailing '\0' Chris@16: std::size_t const size = Size - is_same::value; Chris@16: std::vector vect(size); Chris@16: for(std::size_t i = 0; i < size; ++i) Chris@16: { Chris@16: vect[i] = get_mark_number(sub_matches[i]); Chris@16: } Chris@16: return vect; Chris@16: } Chris@16: Chris@16: template Chris@16: inline std::vector to_vector(std::vector const &sub_matches) Chris@16: { Chris@16: BOOST_MPL_ASSERT((is_convertible)); Chris@16: return std::vector(sub_matches.begin(), sub_matches.end()); Chris@16: } Chris@16: Chris@16: } // namespace detail Chris@16: Chris@16: ////////////////////////////////////////////////////////////////////////// Chris@16: // regex_token_iterator Chris@16: // Chris@16: template Chris@16: struct regex_token_iterator Chris@16: { Chris@16: typedef basic_regex regex_type; Chris@16: typedef typename iterator_value::type char_type; Chris@16: typedef sub_match value_type; Chris@16: typedef std::ptrdiff_t difference_type; Chris@16: typedef value_type const *pointer; Chris@16: typedef value_type const &reference; Chris@16: typedef std::forward_iterator_tag iterator_category; Chris@16: Chris@16: /// INTERNAL ONLY Chris@16: typedef detail::regex_token_iterator_impl impl_type_; Chris@16: Chris@16: /// \post \c *this is the end of sequence iterator. Chris@16: regex_token_iterator() Chris@16: : impl_() Chris@16: { Chris@16: } Chris@16: Chris@16: /// \param begin The beginning of the character range to search. Chris@16: /// \param end The end of the character range to search. Chris@16: /// \param rex The regex pattern to search for. Chris@16: /// \pre \c [begin,end) is a valid range. Chris@16: regex_token_iterator Chris@16: ( Chris@16: BidiIter begin Chris@16: , BidiIter end Chris@16: , basic_regex const &rex Chris@16: ) Chris@16: : impl_() Chris@16: { Chris@16: if(0 != rex.regex_id()) Chris@16: { Chris@16: this->impl_ = new impl_type_(begin, begin, end, begin, rex); Chris@16: this->next_(); Chris@16: } Chris@16: } Chris@16: Chris@16: /// \param begin The beginning of the character range to search. Chris@16: /// \param end The end of the character range to search. Chris@16: /// \param rex The regex pattern to search for. Chris@16: /// \param args A let() expression with argument bindings for semantic actions. Chris@16: /// \pre \c [begin,end) is a valid range. Chris@16: template Chris@16: regex_token_iterator Chris@16: ( Chris@16: BidiIter begin Chris@16: , BidiIter end Chris@16: , basic_regex const &rex Chris@16: , detail::let_ const &args Chris@16: ) Chris@16: : impl_() Chris@16: { Chris@16: if(0 != rex.regex_id()) Chris@16: { Chris@16: this->impl_ = new impl_type_(begin, begin, end, begin, rex); Chris@16: detail::bind_args(args, this->impl_->iter_.what_); Chris@16: this->next_(); Chris@16: } Chris@16: } Chris@16: Chris@16: /// \param begin The beginning of the character range to search. Chris@16: /// \param end The end of the character range to search. Chris@16: /// \param rex The regex pattern to search for. Chris@16: /// \param subs A range of integers designating sub-matches to be treated as tokens. Chris@16: /// \param flags Optional match flags, used to control how the expression is matched against the sequence. (See match_flag_type.) Chris@16: /// \pre \c [begin,end) is a valid range. Chris@16: /// \pre \c subs is either an integer greater or equal to -1, Chris@16: /// or else an array or non-empty \c std::vector\<\> of such integers. Chris@16: template Chris@16: regex_token_iterator Chris@16: ( Chris@16: BidiIter begin Chris@16: , BidiIter end Chris@16: , basic_regex const &rex Chris@16: , Subs const &subs Chris@16: , regex_constants::match_flag_type flags = regex_constants::match_default Chris@16: ) Chris@16: : impl_() Chris@16: { Chris@16: if(0 != rex.regex_id()) Chris@16: { Chris@16: this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags, detail::to_vector(subs)); Chris@16: this->next_(); Chris@16: } Chris@16: } Chris@16: Chris@16: /// \param begin The beginning of the character range to search. Chris@16: /// \param end The end of the character range to search. Chris@16: /// \param rex The regex pattern to search for. Chris@16: /// \param subs A range of integers designating sub-matches to be treated as tokens. Chris@16: /// \param args A let() expression with argument bindings for semantic actions. Chris@16: /// \param flags Optional match flags, used to control how the expression is matched against the sequence. (See match_flag_type.) Chris@16: /// \pre \c [begin,end) is a valid range. Chris@16: /// \pre \c subs is either an integer greater or equal to -1, Chris@16: /// or else an array or non-empty \c std::vector\<\> of such integers. Chris@16: template Chris@16: regex_token_iterator Chris@16: ( Chris@16: BidiIter begin Chris@16: , BidiIter end Chris@16: , basic_regex const &rex Chris@16: , Subs const &subs Chris@16: , detail::let_ const &args Chris@16: , regex_constants::match_flag_type flags = regex_constants::match_default Chris@16: ) Chris@16: : impl_() Chris@16: { Chris@16: if(0 != rex.regex_id()) Chris@16: { Chris@16: this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags, detail::to_vector(subs)); Chris@16: detail::bind_args(args, this->impl_->iter_.what_); Chris@16: this->next_(); Chris@16: } Chris@16: } Chris@16: Chris@16: /// \post *this == that Chris@16: regex_token_iterator(regex_token_iterator const &that) Chris@16: : impl_(that.impl_) // COW Chris@16: { Chris@16: } Chris@16: Chris@16: /// \post *this == that Chris@16: regex_token_iterator &operator =(regex_token_iterator const &that) Chris@16: { Chris@16: this->impl_ = that.impl_; // COW Chris@16: return *this; Chris@16: } Chris@16: Chris@16: friend bool operator ==(regex_token_iterator const &left, regex_token_iterator const &right) Chris@16: { Chris@16: if(!left.impl_ || !right.impl_) Chris@16: { Chris@16: return !left.impl_ && !right.impl_; Chris@16: } Chris@16: Chris@16: return left.impl_->equal_to(*right.impl_); Chris@16: } Chris@16: Chris@16: friend bool operator !=(regex_token_iterator const &left, regex_token_iterator const &right) Chris@16: { Chris@16: return !(left == right); Chris@16: } Chris@16: Chris@16: value_type const &operator *() const Chris@16: { Chris@16: return this->impl_->result_; Chris@16: } Chris@16: Chris@16: value_type const *operator ->() const Chris@16: { Chris@16: return &this->impl_->result_; Chris@16: } Chris@16: Chris@16: /// If N == -1 then sets *this equal to the end of sequence iterator. Chris@16: /// Otherwise if N+1 \< subs.size(), then increments N and sets result equal to Chris@16: /// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())). Chris@16: /// Otherwise if what.prefix().first != what[0].second and if the element match_prev_avail is Chris@16: /// not set in flags then sets it. Then locates the next match as if by calling Chris@16: /// regex_search(what[0].second, end, what, *pre, flags), with the following variation: Chris@16: /// in the event that the previous match found was of zero length (what[0].length() == 0) Chris@16: /// then attempts to find a non-zero length match starting at what[0].second, only if that Chris@16: /// fails and provided what[0].second != suffix().second does it look for a (possibly zero Chris@16: /// length) match starting from what[0].second + 1. If such a match is found then sets N Chris@16: /// equal to zero, and sets result equal to Chris@16: /// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())). Chris@16: /// Otherwise if no further matches were found, then let last_end be the endpoint of the last Chris@16: /// match that was found. Then if last_end != end and subs[0] == -1 sets N equal to -1 and Chris@16: /// sets result equal to value_type(last_end, end). Otherwise sets *this equal to the end Chris@16: /// of sequence iterator. Chris@16: regex_token_iterator &operator ++() Chris@16: { Chris@16: this->fork_(); // un-share the implementation Chris@16: this->next_(); Chris@16: return *this; Chris@16: } Chris@16: Chris@16: regex_token_iterator operator ++(int) Chris@16: { Chris@16: regex_token_iterator tmp(*this); Chris@16: ++*this; Chris@16: return tmp; Chris@16: } Chris@16: Chris@16: private: Chris@16: Chris@16: /// INTERNAL ONLY Chris@16: void fork_() Chris@16: { Chris@16: if(1 != this->impl_->use_count()) Chris@16: { Chris@16: intrusive_ptr clone = new impl_type_ Chris@16: ( Chris@16: this->impl_->iter_.state_.begin_ Chris@16: , this->impl_->iter_.state_.cur_ Chris@16: , this->impl_->iter_.state_.end_ Chris@16: , this->impl_->iter_.state_.next_search_ Chris@16: , this->impl_->iter_.rex_ Chris@16: , this->impl_->iter_.flags_ Chris@16: , this->impl_->subs_ Chris@16: , this->impl_->n_ Chris@16: , this->impl_->iter_.not_null_ Chris@16: ); Chris@16: Chris@16: // only copy the match_results struct if we have to. Note: if the next call Chris@16: // to impl_->next() will return false or call regex_search, we don't need to Chris@16: // copy the match_results struct. Chris@16: if(-1 != this->impl_->n_ && this->impl_->n_ + 1 != static_cast(this->impl_->subs_.size())) Chris@16: { Chris@16: // BUGBUG This is expensive -- it causes the sequence_stack to be cleared. Chris@16: // Find a better way Chris@16: clone->iter_.what_ = this->impl_->iter_.what_; Chris@16: } Chris@16: else Chris@16: { Chris@16: // At the very least, copy the action args Chris@16: detail::core_access::get_action_args(clone->iter_.what_) Chris@16: = detail::core_access::get_action_args(this->impl_->iter_.what_); Chris@16: } Chris@16: Chris@16: this->impl_.swap(clone); Chris@16: } Chris@16: } Chris@16: Chris@16: /// INTERNAL ONLY Chris@16: void next_() Chris@16: { Chris@16: BOOST_ASSERT(this->impl_ && 1 == this->impl_->use_count()); Chris@16: if(!this->impl_->next()) Chris@16: { Chris@16: this->impl_ = 0; Chris@16: } Chris@16: } Chris@16: Chris@16: intrusive_ptr impl_; Chris@16: }; Chris@16: Chris@16: }} // namespace boost::xpressive Chris@16: Chris@16: #endif