Chris@16
|
1 #ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
|
Chris@16
|
2 #define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
|
Chris@16
|
3
|
Chris@16
|
4 /* Copyright (c) 2004-2005 CrystalClear Software, Inc.
|
Chris@16
|
5 * Use, modification and distribution is subject to the
|
Chris@16
|
6 * Boost Software License, Version 1.0. (See accompanying
|
Chris@16
|
7 * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
8 * Author: Jeff Garland, Bart Garst
|
Chris@101
|
9 * $Date$
|
Chris@16
|
10 */
|
Chris@16
|
11
|
Chris@16
|
12
|
Chris@16
|
13 #include "boost/lexical_cast.hpp" //error without?
|
Chris@16
|
14 #include "boost/algorithm/string/case_conv.hpp"
|
Chris@16
|
15 #include <map>
|
Chris@16
|
16 #include <string>
|
Chris@16
|
17 #include <vector>
|
Chris@16
|
18 #include <algorithm>
|
Chris@16
|
19
|
Chris@16
|
20 namespace boost { namespace date_time {
|
Chris@16
|
21
|
Chris@16
|
22
|
Chris@16
|
23 template<typename charT>
|
Chris@16
|
24 struct parse_match_result
|
Chris@16
|
25 {
|
Chris@16
|
26 parse_match_result() :
|
Chris@16
|
27 match_depth(0),
|
Chris@16
|
28 current_match(-1)// -1 is match_not-found value
|
Chris@16
|
29 {}
|
Chris@16
|
30 typedef std::basic_string<charT> string_type;
|
Chris@16
|
31 string_type remaining() const
|
Chris@16
|
32 {
|
Chris@16
|
33 if (match_depth == cache.size()) {
|
Chris@16
|
34 return string_type();
|
Chris@16
|
35 }
|
Chris@16
|
36 if (current_match == -1) {
|
Chris@16
|
37 return cache;
|
Chris@16
|
38 }
|
Chris@16
|
39 //some of the cache was used return the rest
|
Chris@16
|
40 return string_type(cache, match_depth);
|
Chris@16
|
41 }
|
Chris@16
|
42 charT last_char() const
|
Chris@16
|
43 {
|
Chris@16
|
44 return cache[cache.size()-1];
|
Chris@16
|
45 }
|
Chris@16
|
46 //! Returns true if more characters were parsed than was necessary
|
Chris@16
|
47 /*! Should be used in conjunction with last_char()
|
Chris@16
|
48 * to get the remaining character.
|
Chris@16
|
49 */
|
Chris@16
|
50 bool has_remaining() const
|
Chris@16
|
51 {
|
Chris@16
|
52 return (cache.size() > match_depth);
|
Chris@16
|
53 }
|
Chris@16
|
54
|
Chris@16
|
55 // cache will hold characters that have been read from the stream
|
Chris@16
|
56 string_type cache;
|
Chris@16
|
57 unsigned short match_depth;
|
Chris@16
|
58 short current_match;
|
Chris@16
|
59 enum PARSE_STATE { PARSE_ERROR= -1 };
|
Chris@16
|
60 };
|
Chris@16
|
61
|
Chris@16
|
62 //for debug -- really only char streams...
|
Chris@16
|
63 template<typename charT>
|
Chris@16
|
64 std::basic_ostream<charT>&
|
Chris@16
|
65 operator<<(std::basic_ostream<charT>& os, parse_match_result<charT>& mr)
|
Chris@16
|
66 {
|
Chris@16
|
67 os << "cm: " << mr.current_match
|
Chris@16
|
68 << " C: '" << mr.cache
|
Chris@16
|
69 << "' md: " << mr.match_depth
|
Chris@16
|
70 << " R: " << mr.remaining();
|
Chris@16
|
71 return os;
|
Chris@16
|
72 }
|
Chris@16
|
73
|
Chris@16
|
74
|
Chris@16
|
75
|
Chris@16
|
76 //! Recursive data structure to allow efficient parsing of various strings
|
Chris@16
|
77 /*! This class provides a quick lookup by building what amounts to a
|
Chris@16
|
78 * tree data structure. It also features a match function which can
|
Chris@16
|
79 * can handle nasty input interators by caching values as it recurses
|
Chris@16
|
80 * the tree so that it can backtrack as needed.
|
Chris@16
|
81 */
|
Chris@16
|
82 template<typename charT>
|
Chris@16
|
83 struct string_parse_tree
|
Chris@16
|
84 {
|
Chris@16
|
85 #if BOOST_WORKAROUND( __BORLANDC__, BOOST_TESTED_AT(0x581) )
|
Chris@16
|
86 typedef std::multimap<charT, string_parse_tree< charT> > ptree_coll;
|
Chris@16
|
87 #else
|
Chris@16
|
88 typedef std::multimap<charT, string_parse_tree > ptree_coll;
|
Chris@16
|
89 #endif
|
Chris@16
|
90 typedef typename ptree_coll::value_type value_type;
|
Chris@16
|
91 typedef typename ptree_coll::iterator iterator;
|
Chris@16
|
92 typedef typename ptree_coll::const_iterator const_iterator;
|
Chris@16
|
93 typedef std::basic_string<charT> string_type;
|
Chris@16
|
94 typedef std::vector<std::basic_string<charT> > collection_type;
|
Chris@16
|
95 typedef parse_match_result<charT> parse_match_result_type;
|
Chris@16
|
96
|
Chris@16
|
97 /*! Parameter "starting_point" designates where the numbering begins.
|
Chris@16
|
98 * A starting_point of zero will start the numbering at zero
|
Chris@16
|
99 * (Sun=0, Mon=1, ...) were a starting_point of one starts the
|
Chris@16
|
100 * numbering at one (Jan=1, Feb=2, ...). The default is zero,
|
Chris@16
|
101 * negative vaules are not allowed */
|
Chris@16
|
102 string_parse_tree(collection_type names, unsigned int starting_point=0)
|
Chris@16
|
103 {
|
Chris@16
|
104 // iterate thru all the elements and build the tree
|
Chris@16
|
105 unsigned short index = 0;
|
Chris@16
|
106 while (index != names.size() ) {
|
Chris@16
|
107 string_type s = boost::algorithm::to_lower_copy(names[index]);
|
Chris@16
|
108 insert(s, static_cast<unsigned short>(index + starting_point));
|
Chris@16
|
109 index++;
|
Chris@16
|
110 }
|
Chris@16
|
111 //set the last tree node = index+1 indicating a value
|
Chris@16
|
112 index++;
|
Chris@16
|
113 }
|
Chris@16
|
114
|
Chris@16
|
115
|
Chris@16
|
116 string_parse_tree(short value = -1) :
|
Chris@16
|
117 m_value(value)
|
Chris@16
|
118 {}
|
Chris@16
|
119 ptree_coll m_next_chars;
|
Chris@16
|
120 short m_value;
|
Chris@16
|
121
|
Chris@16
|
122 void insert(const string_type& s, unsigned short value)
|
Chris@16
|
123 {
|
Chris@16
|
124 unsigned int i = 0;
|
Chris@16
|
125 iterator ti;
|
Chris@16
|
126 while(i < s.size()) {
|
Chris@16
|
127 if (i==0) {
|
Chris@16
|
128 if (i == (s.size()-1)) {
|
Chris@16
|
129 ti = m_next_chars.insert(value_type(s[i],
|
Chris@16
|
130 string_parse_tree<charT>(value)));
|
Chris@16
|
131 }
|
Chris@16
|
132 else {
|
Chris@16
|
133 ti = m_next_chars.insert(value_type(s[i],
|
Chris@16
|
134 string_parse_tree<charT>()));
|
Chris@16
|
135 }
|
Chris@16
|
136 }
|
Chris@16
|
137 else {
|
Chris@16
|
138 if (i == (s.size()-1)) {
|
Chris@16
|
139 ti = ti->second.m_next_chars.insert(value_type(s[i],
|
Chris@16
|
140 string_parse_tree<charT>(value)));
|
Chris@16
|
141 }
|
Chris@16
|
142
|
Chris@16
|
143 else {
|
Chris@16
|
144 ti = ti->second.m_next_chars.insert(value_type(s[i],
|
Chris@16
|
145 string_parse_tree<charT>()));
|
Chris@16
|
146 }
|
Chris@16
|
147
|
Chris@16
|
148 }
|
Chris@16
|
149 i++;
|
Chris@16
|
150 }
|
Chris@16
|
151 }
|
Chris@16
|
152
|
Chris@16
|
153
|
Chris@16
|
154 //! Recursive function that finds a matching string in the tree.
|
Chris@16
|
155 /*! Must check match_results::has_remaining() after match() is
|
Chris@16
|
156 * called. This is required so the user can determine if
|
Chris@16
|
157 * stream iterator is already pointing to the expected
|
Chris@16
|
158 * character or not (match() might advance sitr to next char in stream).
|
Chris@16
|
159 *
|
Chris@16
|
160 * A parse_match_result that has been returned from a failed match
|
Chris@16
|
161 * attempt can be sent in to the match function of a different
|
Chris@16
|
162 * string_parse_tree to attempt a match there. Use the iterators
|
Chris@16
|
163 * for the partially consumed stream, the parse_match_result object,
|
Chris@16
|
164 * and '0' for the level parameter. */
|
Chris@16
|
165 short
|
Chris@16
|
166 match(std::istreambuf_iterator<charT>& sitr,
|
Chris@16
|
167 std::istreambuf_iterator<charT>& stream_end,
|
Chris@16
|
168 parse_match_result_type& result,
|
Chris@16
|
169 unsigned int& level) const
|
Chris@16
|
170 {
|
Chris@16
|
171
|
Chris@16
|
172 level++;
|
Chris@16
|
173 charT c;
|
Chris@16
|
174 // if we conditionally advance sitr, we won't have
|
Chris@16
|
175 // to consume the next character past the input
|
Chris@16
|
176 bool adv_itr = true;
|
Chris@16
|
177 if (level > result.cache.size()) {
|
Chris@16
|
178 if (sitr == stream_end) return 0; //bail - input exhausted
|
Chris@16
|
179 c = static_cast<charT>(std::tolower(*sitr));
|
Chris@16
|
180 //result.cache += c;
|
Chris@16
|
181 //sitr++;
|
Chris@16
|
182 }
|
Chris@16
|
183 else {
|
Chris@16
|
184 // if we're looking for characters from the cache,
|
Chris@16
|
185 // we don't want to increment sitr
|
Chris@16
|
186 adv_itr = false;
|
Chris@16
|
187 c = static_cast<charT>(std::tolower(result.cache[level-1]));
|
Chris@16
|
188 }
|
Chris@16
|
189 const_iterator litr = m_next_chars.lower_bound(c);
|
Chris@16
|
190 const_iterator uitr = m_next_chars.upper_bound(c);
|
Chris@16
|
191 while (litr != uitr) { // equal if not found
|
Chris@16
|
192 if(adv_itr) {
|
Chris@16
|
193 sitr++;
|
Chris@16
|
194 result.cache += c;
|
Chris@16
|
195 }
|
Chris@16
|
196 if (litr->second.m_value != -1) { // -1 is default value
|
Chris@16
|
197 if (result.match_depth < level) {
|
Chris@16
|
198 result.current_match = litr->second.m_value;
|
Chris@16
|
199 result.match_depth = static_cast<unsigned short>(level);
|
Chris@16
|
200 }
|
Chris@16
|
201 litr->second.match(sitr, stream_end,
|
Chris@16
|
202 result, level);
|
Chris@16
|
203 level--;
|
Chris@16
|
204 }
|
Chris@16
|
205 else {
|
Chris@16
|
206 litr->second.match(sitr, stream_end,
|
Chris@16
|
207 result, level);
|
Chris@16
|
208 level--;
|
Chris@16
|
209 }
|
Chris@16
|
210
|
Chris@16
|
211 if(level <= result.cache.size()) {
|
Chris@16
|
212 adv_itr = false;
|
Chris@16
|
213 }
|
Chris@16
|
214
|
Chris@16
|
215 litr++;
|
Chris@16
|
216 }
|
Chris@16
|
217 return result.current_match;
|
Chris@16
|
218
|
Chris@16
|
219 }
|
Chris@16
|
220
|
Chris@16
|
221 /*! Must check match_results::has_remaining() after match() is
|
Chris@16
|
222 * called. This is required so the user can determine if
|
Chris@16
|
223 * stream iterator is already pointing to the expected
|
Chris@16
|
224 * character or not (match() might advance sitr to next char in stream).
|
Chris@16
|
225 */
|
Chris@16
|
226 parse_match_result_type
|
Chris@16
|
227 match(std::istreambuf_iterator<charT>& sitr,
|
Chris@16
|
228 std::istreambuf_iterator<charT>& stream_end) const
|
Chris@16
|
229 {
|
Chris@16
|
230 // lookup to_lower of char in tree.
|
Chris@16
|
231 unsigned int level = 0;
|
Chris@16
|
232 // string_type cache;
|
Chris@16
|
233 parse_match_result_type result;
|
Chris@16
|
234 match(sitr, stream_end, result, level);
|
Chris@16
|
235 return result;
|
Chris@16
|
236 }
|
Chris@16
|
237
|
Chris@16
|
238 void printme(std::ostream& os, int& level)
|
Chris@16
|
239 {
|
Chris@16
|
240 level++;
|
Chris@16
|
241 iterator itr = m_next_chars.begin();
|
Chris@16
|
242 iterator end = m_next_chars.end();
|
Chris@16
|
243 // os << "starting level: " << level << std::endl;
|
Chris@16
|
244 while (itr != end) {
|
Chris@16
|
245 os << "level: " << level
|
Chris@16
|
246 << " node: " << itr->first
|
Chris@16
|
247 << " value: " << itr->second.m_value
|
Chris@16
|
248 << std::endl;
|
Chris@16
|
249 itr->second.printme(os, level);
|
Chris@16
|
250 itr++;
|
Chris@16
|
251 }
|
Chris@16
|
252 level--;
|
Chris@16
|
253 }
|
Chris@16
|
254
|
Chris@16
|
255 void print(std::ostream& os)
|
Chris@16
|
256 {
|
Chris@16
|
257 int level = 0;
|
Chris@16
|
258 printme(os, level);
|
Chris@16
|
259 }
|
Chris@16
|
260
|
Chris@16
|
261 void printmatch(std::ostream& os, charT c)
|
Chris@16
|
262 {
|
Chris@16
|
263 iterator litr = m_next_chars.lower_bound(c);
|
Chris@16
|
264 iterator uitr = m_next_chars.upper_bound(c);
|
Chris@16
|
265 os << "matches for: " << c << std::endl;
|
Chris@16
|
266 while (litr != uitr) {
|
Chris@16
|
267 os << " node: " << litr->first
|
Chris@16
|
268 << " value: " << litr->second.m_value
|
Chris@16
|
269 << std::endl;
|
Chris@16
|
270 litr++;
|
Chris@16
|
271 }
|
Chris@16
|
272 }
|
Chris@16
|
273
|
Chris@16
|
274 };
|
Chris@16
|
275
|
Chris@16
|
276
|
Chris@16
|
277 } } //namespace
|
Chris@16
|
278 #endif
|