Chris@16
|
1 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
2 /// \file symbols.hpp
|
Chris@16
|
3 /// Contains the Ternary Search Trie implementation.
|
Chris@16
|
4 /// Based on the following papers:
|
Chris@16
|
5 /// J. Bentley and R. Sedgewick. (1998) Ternary search trees. Dr. Dobbs Journal
|
Chris@16
|
6 /// G. Badr and B.J. Oommen. (2005) Self-Adjusting of Ternary Search Tries Using
|
Chris@16
|
7 /// Conditional Rotations and Randomized Heuristics
|
Chris@16
|
8 //
|
Chris@16
|
9 // Copyright 2007 David Jenkins.
|
Chris@16
|
10 // Copyright 2007 Eric Niebler.
|
Chris@16
|
11 //
|
Chris@16
|
12 // Distributed under the Boost Software License, Version 1.0. (See
|
Chris@16
|
13 // accompanying file LICENSE_1_0.txt or copy at
|
Chris@16
|
14 // http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
15
|
Chris@16
|
16 #ifndef BOOST_XPRESSIVE_DETAIL_SYMBOLS_HPP_DRJ_06_11_2007
|
Chris@16
|
17 #define BOOST_XPRESSIVE_DETAIL_SYMBOLS_HPP_DRJ_06_11_2007
|
Chris@16
|
18
|
Chris@16
|
19 // MS compatible compilers support #pragma once
|
Chris@101
|
20 #if defined(_MSC_VER)
|
Chris@16
|
21 # pragma once
|
Chris@16
|
22 #endif
|
Chris@16
|
23
|
Chris@16
|
24 #include <boost/noncopyable.hpp>
|
Chris@16
|
25 #include <boost/range/begin.hpp>
|
Chris@16
|
26 #include <boost/range/end.hpp>
|
Chris@16
|
27 #include <boost/range/value_type.hpp>
|
Chris@16
|
28 #include <boost/range/const_iterator.hpp>
|
Chris@16
|
29 #include <boost/shared_ptr.hpp>
|
Chris@16
|
30
|
Chris@16
|
31 namespace boost { namespace xpressive { namespace detail
|
Chris@16
|
32 {
|
Chris@16
|
33
|
Chris@16
|
34 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
35 // symbols (using a ternary search trie)
|
Chris@16
|
36 //
|
Chris@16
|
37 template<typename Map>
|
Chris@16
|
38 struct symbols
|
Chris@16
|
39 {
|
Chris@16
|
40 typedef typename range_value<Map>::type::first_type key_type;
|
Chris@16
|
41 typedef typename range_value<Map>::type::second_type value_type;
|
Chris@16
|
42 typedef typename range_value<key_type>::type char_type;
|
Chris@16
|
43 typedef typename range_const_iterator<Map>::type iterator;
|
Chris@16
|
44 typedef typename range_const_iterator<key_type>::type key_iterator;
|
Chris@16
|
45 typedef value_type const *result_type;
|
Chris@16
|
46
|
Chris@16
|
47 // copies of this symbol table share the TST
|
Chris@16
|
48
|
Chris@16
|
49 template<typename Trans>
|
Chris@16
|
50 void load(Map const &map, Trans trans)
|
Chris@16
|
51 {
|
Chris@16
|
52 iterator begin = boost::begin(map);
|
Chris@16
|
53 iterator end = boost::end(map);
|
Chris@16
|
54 node* root_p = this->root.get();
|
Chris@16
|
55 for(; begin != end; ++begin)
|
Chris@16
|
56 {
|
Chris@16
|
57 key_iterator kbegin = boost::begin(begin->first);
|
Chris@16
|
58 key_iterator kend = boost::end(begin->first);
|
Chris@16
|
59 root_p = this->insert(root_p, kbegin, kend, &begin->second, trans);
|
Chris@16
|
60 }
|
Chris@16
|
61 this->root.reset(root_p);
|
Chris@16
|
62 }
|
Chris@16
|
63
|
Chris@16
|
64 template<typename BidiIter, typename Trans>
|
Chris@16
|
65 result_type operator ()(BidiIter &begin, BidiIter end, Trans trans) const
|
Chris@16
|
66 {
|
Chris@16
|
67 return this->search(begin, end, trans, this->root.get());
|
Chris@16
|
68 }
|
Chris@16
|
69
|
Chris@16
|
70 template<typename Sink>
|
Chris@16
|
71 void peek(Sink const &sink) const
|
Chris@16
|
72 {
|
Chris@16
|
73 this->peek_(this->root.get(), sink);
|
Chris@16
|
74 }
|
Chris@16
|
75
|
Chris@16
|
76 private:
|
Chris@16
|
77 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
78 // struct node : a node in the TST.
|
Chris@16
|
79 // The "eq" field stores the result pointer when ch is zero.
|
Chris@16
|
80 //
|
Chris@16
|
81 struct node
|
Chris@16
|
82 : boost::noncopyable
|
Chris@16
|
83 {
|
Chris@16
|
84 node(char_type c)
|
Chris@16
|
85 : ch(c)
|
Chris@16
|
86 , lo(0)
|
Chris@16
|
87 , eq(0)
|
Chris@16
|
88 , hi(0)
|
Chris@16
|
89 #ifdef BOOST_DISABLE_THREADS
|
Chris@16
|
90 , tau(0)
|
Chris@16
|
91 #endif
|
Chris@16
|
92 {}
|
Chris@16
|
93
|
Chris@16
|
94 ~node()
|
Chris@16
|
95 {
|
Chris@16
|
96 delete lo;
|
Chris@16
|
97 if (ch)
|
Chris@16
|
98 delete eq;
|
Chris@16
|
99 delete hi;
|
Chris@16
|
100 }
|
Chris@16
|
101
|
Chris@16
|
102 void swap(node& that)
|
Chris@16
|
103 {
|
Chris@16
|
104 std::swap(ch, that.ch);
|
Chris@16
|
105 std::swap(lo, that.lo);
|
Chris@16
|
106 std::swap(eq, that.eq);
|
Chris@16
|
107 std::swap(hi, that.hi);
|
Chris@16
|
108 #ifdef BOOST_DISABLE_THREADS
|
Chris@16
|
109 std::swap(tau, that.tau);
|
Chris@16
|
110 #endif
|
Chris@16
|
111 }
|
Chris@16
|
112
|
Chris@16
|
113 char_type ch;
|
Chris@16
|
114 node* lo;
|
Chris@16
|
115 union
|
Chris@16
|
116 {
|
Chris@16
|
117 node* eq;
|
Chris@16
|
118 result_type result;
|
Chris@16
|
119 };
|
Chris@16
|
120 node* hi;
|
Chris@16
|
121 #ifdef BOOST_DISABLE_THREADS
|
Chris@16
|
122 long tau;
|
Chris@16
|
123 #endif
|
Chris@16
|
124 };
|
Chris@16
|
125
|
Chris@16
|
126 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
127 // insert : insert a string into the TST
|
Chris@16
|
128 //
|
Chris@16
|
129 template<typename Trans>
|
Chris@16
|
130 node* insert(node* p, key_iterator &begin, key_iterator end, result_type r, Trans trans) const
|
Chris@16
|
131 {
|
Chris@16
|
132 char_type c1 = 0;
|
Chris@16
|
133
|
Chris@16
|
134 if(begin != end)
|
Chris@16
|
135 {
|
Chris@16
|
136 c1 = trans(*begin);
|
Chris@16
|
137 }
|
Chris@16
|
138
|
Chris@16
|
139 if(!p)
|
Chris@16
|
140 {
|
Chris@16
|
141 p = new node(c1);
|
Chris@16
|
142 }
|
Chris@16
|
143
|
Chris@16
|
144 if(c1 < p->ch)
|
Chris@16
|
145 {
|
Chris@16
|
146 p->lo = this->insert(p->lo, begin, end, r, trans);
|
Chris@16
|
147 }
|
Chris@16
|
148 else if(c1 == p->ch)
|
Chris@16
|
149 {
|
Chris@16
|
150 if(0 == c1)
|
Chris@16
|
151 {
|
Chris@16
|
152 p->result = r;
|
Chris@16
|
153 }
|
Chris@16
|
154 else
|
Chris@16
|
155 {
|
Chris@16
|
156 p->eq = this->insert(p->eq, ++begin, end, r, trans);
|
Chris@16
|
157 }
|
Chris@16
|
158 }
|
Chris@16
|
159 else
|
Chris@16
|
160 {
|
Chris@16
|
161 p->hi = this->insert(p->hi, begin, end, r, trans);
|
Chris@16
|
162 }
|
Chris@16
|
163
|
Chris@16
|
164 return p;
|
Chris@16
|
165 }
|
Chris@16
|
166
|
Chris@16
|
167 #ifdef BOOST_DISABLE_THREADS
|
Chris@16
|
168 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
169 // conditional rotation : the goal is to minimize the overall
|
Chris@16
|
170 // weighted path length of each binary search tree
|
Chris@16
|
171 //
|
Chris@16
|
172 bool cond_rotation(bool left, node* const i, node* const j) const
|
Chris@16
|
173 {
|
Chris@16
|
174 // don't rotate top node in binary search tree
|
Chris@16
|
175 if (i == j)
|
Chris@16
|
176 return false;
|
Chris@16
|
177 // calculate psi (the rotation condition)
|
Chris@16
|
178 node* const k = (left ? i->hi : i->lo);
|
Chris@16
|
179 long psi = 2*i->tau - j->tau - (k ? k->tau : 0);
|
Chris@16
|
180 if (psi <= 0)
|
Chris@16
|
181 return false;
|
Chris@16
|
182
|
Chris@16
|
183 // recalculate the tau values
|
Chris@16
|
184 j->tau += -i->tau + (k ? k->tau : 0);
|
Chris@16
|
185 i->tau += j->tau - (k ? k->tau : 0);
|
Chris@16
|
186 // fixup links and swap nodes
|
Chris@16
|
187 if (left)
|
Chris@16
|
188 {
|
Chris@16
|
189 j->lo = k;
|
Chris@16
|
190 i->hi = i;
|
Chris@16
|
191 }
|
Chris@16
|
192 else
|
Chris@16
|
193 {
|
Chris@16
|
194 j->hi = k;
|
Chris@16
|
195 i->lo = i;
|
Chris@16
|
196 }
|
Chris@16
|
197 (*i).swap(*j);
|
Chris@16
|
198 return true;
|
Chris@16
|
199 }
|
Chris@16
|
200 #endif
|
Chris@16
|
201
|
Chris@16
|
202 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
203 // search : find a string in the TST
|
Chris@16
|
204 //
|
Chris@16
|
205 template<typename BidiIter, typename Trans>
|
Chris@16
|
206 result_type search(BidiIter &begin, BidiIter end, Trans trans, node* p) const
|
Chris@16
|
207 {
|
Chris@16
|
208 result_type r = 0;
|
Chris@16
|
209 #ifdef BOOST_DISABLE_THREADS
|
Chris@16
|
210 node* p2 = p;
|
Chris@16
|
211 bool left = false;
|
Chris@16
|
212 #endif
|
Chris@16
|
213 char_type c1 = (begin != end ? trans(*begin) : 0);
|
Chris@16
|
214 while(p)
|
Chris@16
|
215 {
|
Chris@16
|
216 #ifdef BOOST_DISABLE_THREADS
|
Chris@16
|
217 ++p->tau;
|
Chris@16
|
218 #endif
|
Chris@16
|
219 if(c1 == p->ch)
|
Chris@16
|
220 {
|
Chris@16
|
221 // conditional rotation test
|
Chris@16
|
222 #ifdef BOOST_DISABLE_THREADS
|
Chris@16
|
223 if (this->cond_rotation(left, p, p2))
|
Chris@16
|
224 p = p2;
|
Chris@16
|
225 #endif
|
Chris@16
|
226 if (0 == p->ch)
|
Chris@16
|
227 {
|
Chris@16
|
228 // it's a match!
|
Chris@16
|
229 r = p->result;
|
Chris@16
|
230 }
|
Chris@16
|
231 if(begin == end)
|
Chris@16
|
232 break;
|
Chris@16
|
233 ++begin;
|
Chris@16
|
234 p = p->eq;
|
Chris@16
|
235 // search for the longest match first
|
Chris@16
|
236 r = search(begin,end,trans,p);
|
Chris@16
|
237 if (0 == r)
|
Chris@16
|
238 {
|
Chris@16
|
239 // search for a match ending here
|
Chris@16
|
240 r = search(end,end,trans,p);
|
Chris@16
|
241 if (0 == r)
|
Chris@16
|
242 {
|
Chris@16
|
243 --begin;
|
Chris@16
|
244 }
|
Chris@16
|
245 }
|
Chris@16
|
246 break;
|
Chris@16
|
247 }
|
Chris@16
|
248 else if(c1 < p->ch)
|
Chris@16
|
249 {
|
Chris@16
|
250 #ifdef BOOST_DISABLE_THREADS
|
Chris@16
|
251 left = true;
|
Chris@16
|
252 p2 = p;
|
Chris@16
|
253 #endif
|
Chris@16
|
254 p = p->lo;
|
Chris@16
|
255 }
|
Chris@16
|
256 else // (c1 > p->ch)
|
Chris@16
|
257 {
|
Chris@16
|
258 #ifdef BOOST_DISABLE_THREADS
|
Chris@16
|
259 left = false;
|
Chris@16
|
260 p2 = p;
|
Chris@16
|
261 #endif
|
Chris@16
|
262 p = p->hi;
|
Chris@16
|
263 }
|
Chris@16
|
264 }
|
Chris@16
|
265 return r;
|
Chris@16
|
266 }
|
Chris@16
|
267
|
Chris@16
|
268 template<typename Sink>
|
Chris@16
|
269 void peek_(node const *const &p, Sink const &sink) const
|
Chris@16
|
270 {
|
Chris@16
|
271 if(p)
|
Chris@16
|
272 {
|
Chris@16
|
273 sink(p->ch);
|
Chris@16
|
274 this->peek_(p->lo, sink);
|
Chris@16
|
275 this->peek_(p->hi, sink);
|
Chris@16
|
276 }
|
Chris@16
|
277 }
|
Chris@16
|
278
|
Chris@16
|
279 boost::shared_ptr<node> root;
|
Chris@16
|
280 };
|
Chris@16
|
281
|
Chris@16
|
282 }}} // namespace boost::xpressive::detail
|
Chris@16
|
283
|
Chris@16
|
284 #endif
|