Chris@16
|
1 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
2 /// \file boyer_moore.hpp
|
Chris@16
|
3 /// Contains the boyer-moore implementation. Note: this is *not* a general-
|
Chris@16
|
4 /// purpose boyer-moore implementation. It truncates the search string at
|
Chris@16
|
5 /// 256 characters, but it is sufficient for the needs of xpressive.
|
Chris@16
|
6 //
|
Chris@16
|
7 // Copyright 2008 Eric Niebler. Distributed under the Boost
|
Chris@16
|
8 // Software License, Version 1.0. (See accompanying file
|
Chris@16
|
9 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
10
|
Chris@16
|
11 #ifndef BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005
|
Chris@16
|
12 #define BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005
|
Chris@16
|
13
|
Chris@16
|
14 // MS compatible compilers support #pragma once
|
Chris@101
|
15 #if defined(_MSC_VER)
|
Chris@16
|
16 # pragma once
|
Chris@16
|
17 # pragma warning(push)
|
Chris@16
|
18 # pragma warning(disable : 4100) // unreferenced formal parameter
|
Chris@16
|
19 #endif
|
Chris@16
|
20
|
Chris@16
|
21 #include <climits> // for UCHAR_MAX
|
Chris@16
|
22 #include <cstddef> // for std::ptrdiff_t
|
Chris@16
|
23 #include <utility> // for std::max
|
Chris@16
|
24 #include <vector>
|
Chris@16
|
25 #include <boost/mpl/bool.hpp>
|
Chris@16
|
26 #include <boost/noncopyable.hpp>
|
Chris@16
|
27 #include <boost/iterator/iterator_traits.hpp>
|
Chris@16
|
28 #include <boost/type_traits/is_convertible.hpp>
|
Chris@16
|
29 #include <boost/xpressive/detail/detail_fwd.hpp>
|
Chris@16
|
30
|
Chris@16
|
31 namespace boost { namespace xpressive { namespace detail
|
Chris@16
|
32 {
|
Chris@16
|
33
|
Chris@16
|
34 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
35 // boyer_moore
|
Chris@16
|
36 //
|
Chris@16
|
37 template<typename BidiIter, typename Traits>
|
Chris@16
|
38 struct boyer_moore
|
Chris@16
|
39 : noncopyable
|
Chris@16
|
40 {
|
Chris@16
|
41 typedef typename iterator_value<BidiIter>::type char_type;
|
Chris@16
|
42 typedef Traits traits_type;
|
Chris@16
|
43 typedef has_fold_case<Traits> case_fold;
|
Chris@16
|
44 typedef typename Traits::string_type string_type;
|
Chris@16
|
45
|
Chris@16
|
46 // initialize the Boyer-Moore search data structure, using the
|
Chris@16
|
47 // search sub-sequence to prime the pump.
|
Chris@16
|
48 boyer_moore(char_type const *begin, char_type const *end, Traits const &tr, bool icase)
|
Chris@16
|
49 : begin_(begin)
|
Chris@16
|
50 , last_(begin)
|
Chris@16
|
51 , fold_()
|
Chris@16
|
52 , find_fun_(
|
Chris@16
|
53 icase
|
Chris@16
|
54 ? (case_fold() ? &boyer_moore::find_nocase_fold_ : &boyer_moore::find_nocase_)
|
Chris@16
|
55 : &boyer_moore::find_
|
Chris@16
|
56 )
|
Chris@16
|
57 {
|
Chris@16
|
58 std::ptrdiff_t const uchar_max = UCHAR_MAX;
|
Chris@16
|
59 std::ptrdiff_t diff = std::distance(begin, end);
|
Chris@16
|
60 this->length_ = static_cast<unsigned char>((std::min)(diff, uchar_max));
|
Chris@16
|
61 std::fill_n(static_cast<unsigned char *>(this->offsets_), uchar_max + 1, this->length_);
|
Chris@16
|
62 --this->length_;
|
Chris@16
|
63
|
Chris@16
|
64 icase ? this->init_(tr, case_fold()) : this->init_(tr, mpl::false_());
|
Chris@16
|
65 }
|
Chris@16
|
66
|
Chris@16
|
67 BidiIter find(BidiIter begin, BidiIter end, Traits const &tr) const
|
Chris@16
|
68 {
|
Chris@16
|
69 return (this->*this->find_fun_)(begin, end, tr);
|
Chris@16
|
70 }
|
Chris@16
|
71
|
Chris@16
|
72 private:
|
Chris@16
|
73
|
Chris@16
|
74 void init_(Traits const &tr, mpl::false_)
|
Chris@16
|
75 {
|
Chris@16
|
76 for(unsigned char offset = this->length_; offset; --offset, ++this->last_)
|
Chris@16
|
77 {
|
Chris@16
|
78 this->offsets_[tr.hash(*this->last_)] = offset;
|
Chris@16
|
79 }
|
Chris@16
|
80 }
|
Chris@16
|
81
|
Chris@16
|
82 void init_(Traits const &tr, mpl::true_)
|
Chris@16
|
83 {
|
Chris@16
|
84 this->fold_.reserve(this->length_ + 1);
|
Chris@16
|
85 for(unsigned char offset = this->length_; offset; --offset, ++this->last_)
|
Chris@16
|
86 {
|
Chris@16
|
87 this->fold_.push_back(tr.fold_case(*this->last_));
|
Chris@16
|
88 for(typename string_type::const_iterator beg = this->fold_.back().begin(), end = this->fold_.back().end();
|
Chris@16
|
89 beg != end; ++beg)
|
Chris@16
|
90 {
|
Chris@16
|
91 this->offsets_[tr.hash(*beg)] = offset;
|
Chris@16
|
92 }
|
Chris@16
|
93 }
|
Chris@16
|
94 this->fold_.push_back(tr.fold_case(*this->last_));
|
Chris@16
|
95 }
|
Chris@16
|
96
|
Chris@16
|
97 // case-sensitive Boyer-Moore search
|
Chris@16
|
98 BidiIter find_(BidiIter begin, BidiIter end, Traits const &tr) const
|
Chris@16
|
99 {
|
Chris@16
|
100 typedef typename boost::iterator_difference<BidiIter>::type diff_type;
|
Chris@16
|
101 diff_type const endpos = std::distance(begin, end);
|
Chris@16
|
102 diff_type offset = static_cast<diff_type>(this->length_);
|
Chris@16
|
103
|
Chris@16
|
104 for(diff_type curpos = offset; curpos < endpos; curpos += offset)
|
Chris@16
|
105 {
|
Chris@16
|
106 std::advance(begin, offset);
|
Chris@16
|
107
|
Chris@16
|
108 char_type const *pat_tmp = this->last_;
|
Chris@16
|
109 BidiIter str_tmp = begin;
|
Chris@16
|
110
|
Chris@16
|
111 for(; tr.translate(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp)
|
Chris@16
|
112 {
|
Chris@16
|
113 if(pat_tmp == this->begin_)
|
Chris@16
|
114 {
|
Chris@16
|
115 return str_tmp;
|
Chris@16
|
116 }
|
Chris@16
|
117 }
|
Chris@16
|
118
|
Chris@16
|
119 offset = this->offsets_[tr.hash(tr.translate(*begin))];
|
Chris@16
|
120 }
|
Chris@16
|
121
|
Chris@16
|
122 return end;
|
Chris@16
|
123 }
|
Chris@16
|
124
|
Chris@16
|
125 // case-insensitive Boyer-Moore search
|
Chris@16
|
126 BidiIter find_nocase_(BidiIter begin, BidiIter end, Traits const &tr) const
|
Chris@16
|
127 {
|
Chris@16
|
128 typedef typename boost::iterator_difference<BidiIter>::type diff_type;
|
Chris@16
|
129 diff_type const endpos = std::distance(begin, end);
|
Chris@16
|
130 diff_type offset = static_cast<diff_type>(this->length_);
|
Chris@16
|
131
|
Chris@16
|
132 for(diff_type curpos = offset; curpos < endpos; curpos += offset)
|
Chris@16
|
133 {
|
Chris@16
|
134 std::advance(begin, offset);
|
Chris@16
|
135
|
Chris@16
|
136 char_type const *pat_tmp = this->last_;
|
Chris@16
|
137 BidiIter str_tmp = begin;
|
Chris@16
|
138
|
Chris@16
|
139 for(; tr.translate_nocase(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp)
|
Chris@16
|
140 {
|
Chris@16
|
141 if(pat_tmp == this->begin_)
|
Chris@16
|
142 {
|
Chris@16
|
143 return str_tmp;
|
Chris@16
|
144 }
|
Chris@16
|
145 }
|
Chris@16
|
146
|
Chris@16
|
147 offset = this->offsets_[tr.hash(tr.translate_nocase(*begin))];
|
Chris@16
|
148 }
|
Chris@16
|
149
|
Chris@16
|
150 return end;
|
Chris@16
|
151 }
|
Chris@16
|
152
|
Chris@16
|
153 // case-insensitive Boyer-Moore search with case-folding
|
Chris@16
|
154 BidiIter find_nocase_fold_(BidiIter begin, BidiIter end, Traits const &tr) const
|
Chris@16
|
155 {
|
Chris@16
|
156 typedef typename boost::iterator_difference<BidiIter>::type diff_type;
|
Chris@16
|
157 diff_type const endpos = std::distance(begin, end);
|
Chris@16
|
158 diff_type offset = static_cast<diff_type>(this->length_);
|
Chris@16
|
159
|
Chris@16
|
160 for(diff_type curpos = offset; curpos < endpos; curpos += offset)
|
Chris@16
|
161 {
|
Chris@16
|
162 std::advance(begin, offset);
|
Chris@16
|
163
|
Chris@16
|
164 string_type const *pat_tmp = &this->fold_.back();
|
Chris@16
|
165 BidiIter str_tmp = begin;
|
Chris@16
|
166
|
Chris@16
|
167 for(; pat_tmp->end() != std::find(pat_tmp->begin(), pat_tmp->end(), *str_tmp);
|
Chris@16
|
168 --pat_tmp, --str_tmp)
|
Chris@16
|
169 {
|
Chris@16
|
170 if(pat_tmp == &this->fold_.front())
|
Chris@16
|
171 {
|
Chris@16
|
172 return str_tmp;
|
Chris@16
|
173 }
|
Chris@16
|
174 }
|
Chris@16
|
175
|
Chris@16
|
176 offset = this->offsets_[tr.hash(*begin)];
|
Chris@16
|
177 }
|
Chris@16
|
178
|
Chris@16
|
179 return end;
|
Chris@16
|
180 }
|
Chris@16
|
181
|
Chris@16
|
182 private:
|
Chris@16
|
183
|
Chris@16
|
184 char_type const *begin_;
|
Chris@16
|
185 char_type const *last_;
|
Chris@16
|
186 std::vector<string_type> fold_;
|
Chris@16
|
187 BidiIter (boyer_moore::*const find_fun_)(BidiIter, BidiIter, Traits const &) const;
|
Chris@16
|
188 unsigned char length_;
|
Chris@16
|
189 unsigned char offsets_[UCHAR_MAX + 1];
|
Chris@16
|
190 };
|
Chris@16
|
191
|
Chris@16
|
192 }}} // namespace boost::xpressive::detail
|
Chris@16
|
193
|
Chris@101
|
194 #if defined(_MSC_VER)
|
Chris@16
|
195 # pragma warning(pop)
|
Chris@16
|
196 #endif
|
Chris@16
|
197
|
Chris@16
|
198 #endif
|