annotate DEPENDENCIES/generic/include/boost/xpressive/detail/utility/boyer_moore.hpp @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)
author Chris Cannam
date Tue, 30 Jul 2019 12:25:44 +0100
parents c530137014c0
children
rev   line source
Chris@16 1 ///////////////////////////////////////////////////////////////////////////////
Chris@16 2 /// \file boyer_moore.hpp
Chris@16 3 /// Contains the boyer-moore implementation. Note: this is *not* a general-
Chris@16 4 /// purpose boyer-moore implementation. It truncates the search string at
Chris@16 5 /// 256 characters, but it is sufficient for the needs of xpressive.
Chris@16 6 //
Chris@16 7 // Copyright 2008 Eric Niebler. Distributed under the Boost
Chris@16 8 // Software License, Version 1.0. (See accompanying file
Chris@16 9 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16 10
Chris@16 11 #ifndef BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005
Chris@16 12 #define BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005
Chris@16 13
Chris@16 14 // MS compatible compilers support #pragma once
Chris@101 15 #if defined(_MSC_VER)
Chris@16 16 # pragma once
Chris@16 17 # pragma warning(push)
Chris@16 18 # pragma warning(disable : 4100) // unreferenced formal parameter
Chris@16 19 #endif
Chris@16 20
Chris@16 21 #include <climits> // for UCHAR_MAX
Chris@16 22 #include <cstddef> // for std::ptrdiff_t
Chris@16 23 #include <utility> // for std::max
Chris@16 24 #include <vector>
Chris@16 25 #include <boost/mpl/bool.hpp>
Chris@16 26 #include <boost/noncopyable.hpp>
Chris@16 27 #include <boost/iterator/iterator_traits.hpp>
Chris@16 28 #include <boost/type_traits/is_convertible.hpp>
Chris@16 29 #include <boost/xpressive/detail/detail_fwd.hpp>
Chris@16 30
Chris@16 31 namespace boost { namespace xpressive { namespace detail
Chris@16 32 {
Chris@16 33
Chris@16 34 ///////////////////////////////////////////////////////////////////////////////
Chris@16 35 // boyer_moore
Chris@16 36 //
Chris@16 37 template<typename BidiIter, typename Traits>
Chris@16 38 struct boyer_moore
Chris@16 39 : noncopyable
Chris@16 40 {
Chris@16 41 typedef typename iterator_value<BidiIter>::type char_type;
Chris@16 42 typedef Traits traits_type;
Chris@16 43 typedef has_fold_case<Traits> case_fold;
Chris@16 44 typedef typename Traits::string_type string_type;
Chris@16 45
Chris@16 46 // initialize the Boyer-Moore search data structure, using the
Chris@16 47 // search sub-sequence to prime the pump.
Chris@16 48 boyer_moore(char_type const *begin, char_type const *end, Traits const &tr, bool icase)
Chris@16 49 : begin_(begin)
Chris@16 50 , last_(begin)
Chris@16 51 , fold_()
Chris@16 52 , find_fun_(
Chris@16 53 icase
Chris@16 54 ? (case_fold() ? &boyer_moore::find_nocase_fold_ : &boyer_moore::find_nocase_)
Chris@16 55 : &boyer_moore::find_
Chris@16 56 )
Chris@16 57 {
Chris@16 58 std::ptrdiff_t const uchar_max = UCHAR_MAX;
Chris@16 59 std::ptrdiff_t diff = std::distance(begin, end);
Chris@16 60 this->length_ = static_cast<unsigned char>((std::min)(diff, uchar_max));
Chris@16 61 std::fill_n(static_cast<unsigned char *>(this->offsets_), uchar_max + 1, this->length_);
Chris@16 62 --this->length_;
Chris@16 63
Chris@16 64 icase ? this->init_(tr, case_fold()) : this->init_(tr, mpl::false_());
Chris@16 65 }
Chris@16 66
Chris@16 67 BidiIter find(BidiIter begin, BidiIter end, Traits const &tr) const
Chris@16 68 {
Chris@16 69 return (this->*this->find_fun_)(begin, end, tr);
Chris@16 70 }
Chris@16 71
Chris@16 72 private:
Chris@16 73
Chris@16 74 void init_(Traits const &tr, mpl::false_)
Chris@16 75 {
Chris@16 76 for(unsigned char offset = this->length_; offset; --offset, ++this->last_)
Chris@16 77 {
Chris@16 78 this->offsets_[tr.hash(*this->last_)] = offset;
Chris@16 79 }
Chris@16 80 }
Chris@16 81
Chris@16 82 void init_(Traits const &tr, mpl::true_)
Chris@16 83 {
Chris@16 84 this->fold_.reserve(this->length_ + 1);
Chris@16 85 for(unsigned char offset = this->length_; offset; --offset, ++this->last_)
Chris@16 86 {
Chris@16 87 this->fold_.push_back(tr.fold_case(*this->last_));
Chris@16 88 for(typename string_type::const_iterator beg = this->fold_.back().begin(), end = this->fold_.back().end();
Chris@16 89 beg != end; ++beg)
Chris@16 90 {
Chris@16 91 this->offsets_[tr.hash(*beg)] = offset;
Chris@16 92 }
Chris@16 93 }
Chris@16 94 this->fold_.push_back(tr.fold_case(*this->last_));
Chris@16 95 }
Chris@16 96
Chris@16 97 // case-sensitive Boyer-Moore search
Chris@16 98 BidiIter find_(BidiIter begin, BidiIter end, Traits const &tr) const
Chris@16 99 {
Chris@16 100 typedef typename boost::iterator_difference<BidiIter>::type diff_type;
Chris@16 101 diff_type const endpos = std::distance(begin, end);
Chris@16 102 diff_type offset = static_cast<diff_type>(this->length_);
Chris@16 103
Chris@16 104 for(diff_type curpos = offset; curpos < endpos; curpos += offset)
Chris@16 105 {
Chris@16 106 std::advance(begin, offset);
Chris@16 107
Chris@16 108 char_type const *pat_tmp = this->last_;
Chris@16 109 BidiIter str_tmp = begin;
Chris@16 110
Chris@16 111 for(; tr.translate(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp)
Chris@16 112 {
Chris@16 113 if(pat_tmp == this->begin_)
Chris@16 114 {
Chris@16 115 return str_tmp;
Chris@16 116 }
Chris@16 117 }
Chris@16 118
Chris@16 119 offset = this->offsets_[tr.hash(tr.translate(*begin))];
Chris@16 120 }
Chris@16 121
Chris@16 122 return end;
Chris@16 123 }
Chris@16 124
Chris@16 125 // case-insensitive Boyer-Moore search
Chris@16 126 BidiIter find_nocase_(BidiIter begin, BidiIter end, Traits const &tr) const
Chris@16 127 {
Chris@16 128 typedef typename boost::iterator_difference<BidiIter>::type diff_type;
Chris@16 129 diff_type const endpos = std::distance(begin, end);
Chris@16 130 diff_type offset = static_cast<diff_type>(this->length_);
Chris@16 131
Chris@16 132 for(diff_type curpos = offset; curpos < endpos; curpos += offset)
Chris@16 133 {
Chris@16 134 std::advance(begin, offset);
Chris@16 135
Chris@16 136 char_type const *pat_tmp = this->last_;
Chris@16 137 BidiIter str_tmp = begin;
Chris@16 138
Chris@16 139 for(; tr.translate_nocase(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp)
Chris@16 140 {
Chris@16 141 if(pat_tmp == this->begin_)
Chris@16 142 {
Chris@16 143 return str_tmp;
Chris@16 144 }
Chris@16 145 }
Chris@16 146
Chris@16 147 offset = this->offsets_[tr.hash(tr.translate_nocase(*begin))];
Chris@16 148 }
Chris@16 149
Chris@16 150 return end;
Chris@16 151 }
Chris@16 152
Chris@16 153 // case-insensitive Boyer-Moore search with case-folding
Chris@16 154 BidiIter find_nocase_fold_(BidiIter begin, BidiIter end, Traits const &tr) const
Chris@16 155 {
Chris@16 156 typedef typename boost::iterator_difference<BidiIter>::type diff_type;
Chris@16 157 diff_type const endpos = std::distance(begin, end);
Chris@16 158 diff_type offset = static_cast<diff_type>(this->length_);
Chris@16 159
Chris@16 160 for(diff_type curpos = offset; curpos < endpos; curpos += offset)
Chris@16 161 {
Chris@16 162 std::advance(begin, offset);
Chris@16 163
Chris@16 164 string_type const *pat_tmp = &this->fold_.back();
Chris@16 165 BidiIter str_tmp = begin;
Chris@16 166
Chris@16 167 for(; pat_tmp->end() != std::find(pat_tmp->begin(), pat_tmp->end(), *str_tmp);
Chris@16 168 --pat_tmp, --str_tmp)
Chris@16 169 {
Chris@16 170 if(pat_tmp == &this->fold_.front())
Chris@16 171 {
Chris@16 172 return str_tmp;
Chris@16 173 }
Chris@16 174 }
Chris@16 175
Chris@16 176 offset = this->offsets_[tr.hash(*begin)];
Chris@16 177 }
Chris@16 178
Chris@16 179 return end;
Chris@16 180 }
Chris@16 181
Chris@16 182 private:
Chris@16 183
Chris@16 184 char_type const *begin_;
Chris@16 185 char_type const *last_;
Chris@16 186 std::vector<string_type> fold_;
Chris@16 187 BidiIter (boyer_moore::*const find_fun_)(BidiIter, BidiIter, Traits const &) const;
Chris@16 188 unsigned char length_;
Chris@16 189 unsigned char offsets_[UCHAR_MAX + 1];
Chris@16 190 };
Chris@16 191
Chris@16 192 }}} // namespace boost::xpressive::detail
Chris@16 193
Chris@101 194 #if defined(_MSC_VER)
Chris@16 195 # pragma warning(pop)
Chris@16 196 #endif
Chris@16 197
Chris@16 198 #endif