Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \file boyer_moore.hpp Chris@16: /// Contains the boyer-moore implementation. Note: this is *not* a general- Chris@16: /// purpose boyer-moore implementation. It truncates the search string at Chris@16: /// 256 characters, but it is sufficient for the needs of xpressive. Chris@16: // Chris@16: // Copyright 2008 Eric Niebler. Distributed under the Boost Chris@16: // Software License, Version 1.0. (See accompanying file Chris@16: // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Chris@16: Chris@16: #ifndef BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005 Chris@16: #define BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005 Chris@16: Chris@16: // MS compatible compilers support #pragma once Chris@101: #if defined(_MSC_VER) Chris@16: # pragma once Chris@16: # pragma warning(push) Chris@16: # pragma warning(disable : 4100) // unreferenced formal parameter Chris@16: #endif Chris@16: Chris@16: #include // for UCHAR_MAX Chris@16: #include // for std::ptrdiff_t Chris@16: #include // for std::max Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: Chris@16: namespace boost { namespace xpressive { namespace detail Chris@16: { Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // boyer_moore Chris@16: // Chris@16: template Chris@16: struct boyer_moore Chris@16: : noncopyable Chris@16: { Chris@16: typedef typename iterator_value::type char_type; Chris@16: typedef Traits traits_type; Chris@16: typedef has_fold_case case_fold; Chris@16: typedef typename Traits::string_type string_type; Chris@16: Chris@16: // initialize the Boyer-Moore search data structure, using the Chris@16: // search sub-sequence to prime the pump. Chris@16: boyer_moore(char_type const *begin, char_type const *end, Traits const &tr, bool icase) Chris@16: : begin_(begin) Chris@16: , last_(begin) Chris@16: , fold_() Chris@16: , find_fun_( Chris@16: icase Chris@16: ? (case_fold() ? &boyer_moore::find_nocase_fold_ : &boyer_moore::find_nocase_) Chris@16: : &boyer_moore::find_ Chris@16: ) Chris@16: { Chris@16: std::ptrdiff_t const uchar_max = UCHAR_MAX; Chris@16: std::ptrdiff_t diff = std::distance(begin, end); Chris@16: this->length_ = static_cast((std::min)(diff, uchar_max)); Chris@16: std::fill_n(static_cast(this->offsets_), uchar_max + 1, this->length_); Chris@16: --this->length_; Chris@16: Chris@16: icase ? this->init_(tr, case_fold()) : this->init_(tr, mpl::false_()); Chris@16: } Chris@16: Chris@16: BidiIter find(BidiIter begin, BidiIter end, Traits const &tr) const Chris@16: { Chris@16: return (this->*this->find_fun_)(begin, end, tr); Chris@16: } Chris@16: Chris@16: private: Chris@16: Chris@16: void init_(Traits const &tr, mpl::false_) Chris@16: { Chris@16: for(unsigned char offset = this->length_; offset; --offset, ++this->last_) Chris@16: { Chris@16: this->offsets_[tr.hash(*this->last_)] = offset; Chris@16: } Chris@16: } Chris@16: Chris@16: void init_(Traits const &tr, mpl::true_) Chris@16: { Chris@16: this->fold_.reserve(this->length_ + 1); Chris@16: for(unsigned char offset = this->length_; offset; --offset, ++this->last_) Chris@16: { Chris@16: this->fold_.push_back(tr.fold_case(*this->last_)); Chris@16: for(typename string_type::const_iterator beg = this->fold_.back().begin(), end = this->fold_.back().end(); Chris@16: beg != end; ++beg) Chris@16: { Chris@16: this->offsets_[tr.hash(*beg)] = offset; Chris@16: } Chris@16: } Chris@16: this->fold_.push_back(tr.fold_case(*this->last_)); Chris@16: } Chris@16: Chris@16: // case-sensitive Boyer-Moore search Chris@16: BidiIter find_(BidiIter begin, BidiIter end, Traits const &tr) const Chris@16: { Chris@16: typedef typename boost::iterator_difference::type diff_type; Chris@16: diff_type const endpos = std::distance(begin, end); Chris@16: diff_type offset = static_cast(this->length_); Chris@16: Chris@16: for(diff_type curpos = offset; curpos < endpos; curpos += offset) Chris@16: { Chris@16: std::advance(begin, offset); Chris@16: Chris@16: char_type const *pat_tmp = this->last_; Chris@16: BidiIter str_tmp = begin; Chris@16: Chris@16: for(; tr.translate(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp) Chris@16: { Chris@16: if(pat_tmp == this->begin_) Chris@16: { Chris@16: return str_tmp; Chris@16: } Chris@16: } Chris@16: Chris@16: offset = this->offsets_[tr.hash(tr.translate(*begin))]; Chris@16: } Chris@16: Chris@16: return end; Chris@16: } Chris@16: Chris@16: // case-insensitive Boyer-Moore search Chris@16: BidiIter find_nocase_(BidiIter begin, BidiIter end, Traits const &tr) const Chris@16: { Chris@16: typedef typename boost::iterator_difference::type diff_type; Chris@16: diff_type const endpos = std::distance(begin, end); Chris@16: diff_type offset = static_cast(this->length_); Chris@16: Chris@16: for(diff_type curpos = offset; curpos < endpos; curpos += offset) Chris@16: { Chris@16: std::advance(begin, offset); Chris@16: Chris@16: char_type const *pat_tmp = this->last_; Chris@16: BidiIter str_tmp = begin; Chris@16: Chris@16: for(; tr.translate_nocase(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp) Chris@16: { Chris@16: if(pat_tmp == this->begin_) Chris@16: { Chris@16: return str_tmp; Chris@16: } Chris@16: } Chris@16: Chris@16: offset = this->offsets_[tr.hash(tr.translate_nocase(*begin))]; Chris@16: } Chris@16: Chris@16: return end; Chris@16: } Chris@16: Chris@16: // case-insensitive Boyer-Moore search with case-folding Chris@16: BidiIter find_nocase_fold_(BidiIter begin, BidiIter end, Traits const &tr) const Chris@16: { Chris@16: typedef typename boost::iterator_difference::type diff_type; Chris@16: diff_type const endpos = std::distance(begin, end); Chris@16: diff_type offset = static_cast(this->length_); Chris@16: Chris@16: for(diff_type curpos = offset; curpos < endpos; curpos += offset) Chris@16: { Chris@16: std::advance(begin, offset); Chris@16: Chris@16: string_type const *pat_tmp = &this->fold_.back(); Chris@16: BidiIter str_tmp = begin; Chris@16: Chris@16: for(; pat_tmp->end() != std::find(pat_tmp->begin(), pat_tmp->end(), *str_tmp); Chris@16: --pat_tmp, --str_tmp) Chris@16: { Chris@16: if(pat_tmp == &this->fold_.front()) Chris@16: { Chris@16: return str_tmp; Chris@16: } Chris@16: } Chris@16: Chris@16: offset = this->offsets_[tr.hash(*begin)]; Chris@16: } Chris@16: Chris@16: return end; Chris@16: } Chris@16: Chris@16: private: Chris@16: Chris@16: char_type const *begin_; Chris@16: char_type const *last_; Chris@16: std::vector fold_; Chris@16: BidiIter (boyer_moore::*const find_fun_)(BidiIter, BidiIter, Traits const &) const; Chris@16: unsigned char length_; Chris@16: unsigned char offsets_[UCHAR_MAX + 1]; Chris@16: }; Chris@16: Chris@16: }}} // namespace boost::xpressive::detail Chris@16: Chris@101: #if defined(_MSC_VER) Chris@16: # pragma warning(pop) Chris@16: #endif Chris@16: Chris@16: #endif