Chris@16
|
1 /*=============================================================================
|
Chris@16
|
2 Copyright (c) 2001-2003 Joel de Guzman
|
Chris@16
|
3 http://spirit.sourceforge.net/
|
Chris@16
|
4
|
Chris@16
|
5 Use, modification and distribution is subject to the Boost Software
|
Chris@16
|
6 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
Chris@16
|
7 http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
8 =============================================================================*/
|
Chris@16
|
9 #ifndef BOOST_SPIRIT_TST_IPP
|
Chris@16
|
10 #define BOOST_SPIRIT_TST_IPP
|
Chris@16
|
11
|
Chris@16
|
12 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
13 #include <memory> // for std::auto_ptr
|
Chris@16
|
14 #include <boost/spirit/home/classic/core/assert.hpp>
|
Chris@16
|
15
|
Chris@16
|
16 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
17 namespace boost { namespace spirit {
|
Chris@16
|
18
|
Chris@16
|
19 BOOST_SPIRIT_CLASSIC_NAMESPACE_BEGIN
|
Chris@16
|
20
|
Chris@16
|
21 namespace impl
|
Chris@16
|
22 {
|
Chris@16
|
23
|
Chris@16
|
24 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
25 //
|
Chris@16
|
26 // tst class
|
Chris@16
|
27 //
|
Chris@16
|
28 // Ternary Search Tree implementation. The data structure is faster than
|
Chris@16
|
29 // hashing for many typical search problems especially when the search
|
Chris@16
|
30 // interface is iterator based. Searching for a string of length k in a
|
Chris@16
|
31 // ternary search tree with n strings will require at most O(log n+k)
|
Chris@16
|
32 // character comparisons. TSTs are many times faster than hash tables
|
Chris@16
|
33 // for unsuccessful searches since mismatches are discovered earlier
|
Chris@16
|
34 // after examining only a few characters. Hash tables always examine an
|
Chris@16
|
35 // entire key when searching.
|
Chris@16
|
36 //
|
Chris@16
|
37 // For details see http://www.cs.princeton.edu/~rs/strings/.
|
Chris@16
|
38 //
|
Chris@16
|
39 // *** This is a low level class and is
|
Chris@16
|
40 // not meant for public consumption ***
|
Chris@16
|
41 //
|
Chris@16
|
42 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
43 template <typename T, typename CharT>
|
Chris@16
|
44 struct tst_node
|
Chris@16
|
45 {
|
Chris@16
|
46 tst_node(CharT value_)
|
Chris@16
|
47 : value(value_)
|
Chris@16
|
48 , left(0)
|
Chris@16
|
49 , right(0)
|
Chris@16
|
50 { middle.link = 0; }
|
Chris@16
|
51
|
Chris@16
|
52 ~tst_node()
|
Chris@16
|
53 {
|
Chris@16
|
54 delete left;
|
Chris@16
|
55 delete right;
|
Chris@16
|
56 if (value)
|
Chris@16
|
57 delete middle.link;
|
Chris@16
|
58 else
|
Chris@16
|
59 delete middle.data;
|
Chris@16
|
60 }
|
Chris@16
|
61
|
Chris@16
|
62 tst_node*
|
Chris@16
|
63 clone() const
|
Chris@16
|
64 {
|
Chris@16
|
65 std::auto_ptr<tst_node> copy(new tst_node(value));
|
Chris@16
|
66
|
Chris@16
|
67 if (left)
|
Chris@16
|
68 copy->left = left->clone();
|
Chris@16
|
69 if (right)
|
Chris@16
|
70 copy->right = right->clone();
|
Chris@16
|
71
|
Chris@16
|
72 if (value && middle.link)
|
Chris@16
|
73 {
|
Chris@16
|
74 copy->middle.link = middle.link->clone();
|
Chris@16
|
75 }
|
Chris@16
|
76 else
|
Chris@16
|
77 {
|
Chris@16
|
78 std::auto_ptr<T> mid_data(new T(*middle.data));
|
Chris@16
|
79 copy->middle.data = mid_data.release();
|
Chris@16
|
80 }
|
Chris@16
|
81
|
Chris@16
|
82 return copy.release();
|
Chris@16
|
83 }
|
Chris@16
|
84
|
Chris@16
|
85 union center {
|
Chris@16
|
86
|
Chris@16
|
87 tst_node* link;
|
Chris@16
|
88 T* data;
|
Chris@16
|
89 };
|
Chris@16
|
90
|
Chris@16
|
91 CharT value;
|
Chris@16
|
92 tst_node* left;
|
Chris@16
|
93 center middle;
|
Chris@16
|
94 tst_node* right;
|
Chris@16
|
95 };
|
Chris@16
|
96
|
Chris@16
|
97 ///////////////////////////////////////////////////////////////////////////
|
Chris@16
|
98 template <typename T, typename CharT>
|
Chris@16
|
99 class tst
|
Chris@16
|
100 {
|
Chris@16
|
101 public:
|
Chris@16
|
102
|
Chris@16
|
103 struct search_info
|
Chris@16
|
104 {
|
Chris@16
|
105 T* data;
|
Chris@16
|
106 std::size_t length;
|
Chris@16
|
107 };
|
Chris@16
|
108
|
Chris@16
|
109 tst()
|
Chris@16
|
110 : root(0) {}
|
Chris@16
|
111
|
Chris@16
|
112 tst(tst const& other)
|
Chris@16
|
113 : root(other.root ? other.root->clone() : 0) {}
|
Chris@16
|
114
|
Chris@16
|
115 ~tst()
|
Chris@16
|
116 { delete root; }
|
Chris@16
|
117
|
Chris@16
|
118 tst&
|
Chris@16
|
119 operator=(tst const& other)
|
Chris@16
|
120 {
|
Chris@16
|
121 if (this != &other)
|
Chris@16
|
122 {
|
Chris@16
|
123 node_t* new_root = other.root ? other.root->clone() : 0;
|
Chris@16
|
124 delete root;
|
Chris@16
|
125 root = new_root;
|
Chris@16
|
126 }
|
Chris@16
|
127 return *this;
|
Chris@16
|
128 }
|
Chris@16
|
129
|
Chris@16
|
130 template <typename IteratorT>
|
Chris@16
|
131 T* add(IteratorT first, IteratorT const& last, T const& data)
|
Chris@16
|
132 {
|
Chris@16
|
133 if (first == last)
|
Chris@16
|
134 return 0;
|
Chris@16
|
135
|
Chris@16
|
136 node_t** np = &root;
|
Chris@16
|
137 CharT ch = *first;
|
Chris@16
|
138
|
Chris@16
|
139 BOOST_SPIRIT_ASSERT((first == last || ch != 0)
|
Chris@16
|
140 && "Won't add string containing null character");
|
Chris@16
|
141
|
Chris@16
|
142 for (;;)
|
Chris@16
|
143 {
|
Chris@16
|
144 if (*np == 0 || ch == 0)
|
Chris@16
|
145 {
|
Chris@16
|
146 node_t* right = 0;
|
Chris@16
|
147 if (np != 0)
|
Chris@16
|
148 right = *np;
|
Chris@16
|
149 *np = new node_t(ch);
|
Chris@16
|
150 if (right)
|
Chris@16
|
151 (**np).right = right;
|
Chris@16
|
152 }
|
Chris@16
|
153
|
Chris@16
|
154 if (ch < (**np).value)
|
Chris@16
|
155 {
|
Chris@16
|
156 np = &(**np).left;
|
Chris@16
|
157 }
|
Chris@16
|
158 else
|
Chris@16
|
159 {
|
Chris@16
|
160 if (ch == (**np).value)
|
Chris@16
|
161 {
|
Chris@16
|
162 if (ch == 0)
|
Chris@16
|
163 {
|
Chris@16
|
164 if ((**np).middle.data == 0)
|
Chris@16
|
165 {
|
Chris@16
|
166 (**np).middle.data = new T(data);
|
Chris@16
|
167 return (**np).middle.data;
|
Chris@16
|
168 }
|
Chris@16
|
169 else
|
Chris@16
|
170 {
|
Chris@16
|
171 // re-addition is disallowed
|
Chris@16
|
172 return 0;
|
Chris@16
|
173 }
|
Chris@16
|
174 }
|
Chris@16
|
175 ++first;
|
Chris@16
|
176 ch = (first == last) ? CharT(0) : *first;
|
Chris@16
|
177 BOOST_SPIRIT_ASSERT((first == last || ch != 0)
|
Chris@16
|
178 && "Won't add string containing null character");
|
Chris@16
|
179 np = &(**np).middle.link;
|
Chris@16
|
180 }
|
Chris@16
|
181 else
|
Chris@16
|
182 {
|
Chris@16
|
183 np = &(**np).right;
|
Chris@16
|
184 }
|
Chris@16
|
185 }
|
Chris@16
|
186 }
|
Chris@16
|
187 }
|
Chris@16
|
188
|
Chris@16
|
189 template <typename ScannerT>
|
Chris@16
|
190 search_info find(ScannerT const& scan) const
|
Chris@16
|
191 {
|
Chris@16
|
192 search_info result = { 0, 0 };
|
Chris@16
|
193 if (scan.at_end()) {
|
Chris@16
|
194 return result;
|
Chris@16
|
195 }
|
Chris@16
|
196
|
Chris@16
|
197 typedef typename ScannerT::iterator_t iterator_t;
|
Chris@16
|
198 node_t* np = root;
|
Chris@16
|
199 CharT ch = *scan;
|
Chris@16
|
200 iterator_t save = scan.first;
|
Chris@16
|
201 iterator_t latest = scan.first;
|
Chris@16
|
202 std::size_t latest_len = 0;
|
Chris@16
|
203
|
Chris@16
|
204 while (np)
|
Chris@16
|
205 {
|
Chris@16
|
206
|
Chris@16
|
207 if (ch < np->value) // => go left!
|
Chris@16
|
208 {
|
Chris@16
|
209 if (np->value == 0)
|
Chris@16
|
210 {
|
Chris@16
|
211 result.data = np->middle.data;
|
Chris@16
|
212 if (result.data)
|
Chris@16
|
213 {
|
Chris@16
|
214 latest = scan.first;
|
Chris@16
|
215 latest_len = result.length;
|
Chris@16
|
216 }
|
Chris@16
|
217 }
|
Chris@16
|
218
|
Chris@16
|
219 np = np->left;
|
Chris@16
|
220 }
|
Chris@16
|
221 else if (ch == np->value) // => go middle!
|
Chris@16
|
222 {
|
Chris@16
|
223 // Matching the null character is not allowed.
|
Chris@16
|
224 if (np->value == 0)
|
Chris@16
|
225 {
|
Chris@16
|
226 result.data = np->middle.data;
|
Chris@16
|
227 if (result.data)
|
Chris@16
|
228 {
|
Chris@16
|
229 latest = scan.first;
|
Chris@16
|
230 latest_len = result.length;
|
Chris@16
|
231 }
|
Chris@16
|
232 break;
|
Chris@16
|
233 }
|
Chris@16
|
234
|
Chris@16
|
235 ++scan;
|
Chris@16
|
236 ch = scan.at_end() ? CharT(0) : *scan;
|
Chris@16
|
237 np = np->middle.link;
|
Chris@16
|
238 ++result.length;
|
Chris@16
|
239 }
|
Chris@16
|
240 else // (ch > np->value) => go right!
|
Chris@16
|
241 {
|
Chris@16
|
242 if (np->value == 0)
|
Chris@16
|
243 {
|
Chris@16
|
244 result.data = np->middle.data;
|
Chris@16
|
245 if (result.data)
|
Chris@16
|
246 {
|
Chris@16
|
247 latest = scan.first;
|
Chris@16
|
248 latest_len = result.length;
|
Chris@16
|
249 }
|
Chris@16
|
250 }
|
Chris@16
|
251
|
Chris@16
|
252 np = np->right;
|
Chris@16
|
253 }
|
Chris@16
|
254 }
|
Chris@16
|
255
|
Chris@16
|
256 if (result.data == 0)
|
Chris@16
|
257 {
|
Chris@16
|
258 scan.first = save;
|
Chris@16
|
259 }
|
Chris@16
|
260 else
|
Chris@16
|
261 {
|
Chris@16
|
262 scan.first = latest;
|
Chris@16
|
263 result.length = latest_len;
|
Chris@16
|
264 }
|
Chris@16
|
265 return result;
|
Chris@16
|
266 }
|
Chris@16
|
267
|
Chris@16
|
268 private:
|
Chris@16
|
269
|
Chris@16
|
270 typedef tst_node<T, CharT> node_t;
|
Chris@16
|
271 node_t* root;
|
Chris@16
|
272 };
|
Chris@16
|
273
|
Chris@16
|
274 ///////////////////////////////////////////////////////////////////////////////
|
Chris@16
|
275 } // namespace impl
|
Chris@16
|
276
|
Chris@16
|
277 BOOST_SPIRIT_CLASSIC_NAMESPACE_END
|
Chris@16
|
278
|
Chris@16
|
279 }} // namespace boost::spirit
|
Chris@16
|
280
|
Chris@16
|
281 #endif
|