Chris@16
|
1 //
|
Chris@16
|
2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
|
Chris@16
|
3 //
|
Chris@16
|
4 // Distributed under the Boost Software License, Version 1.0. (See
|
Chris@16
|
5 // accompanying file LICENSE_1_0.txt or copy at
|
Chris@16
|
6 // http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
7 //
|
Chris@16
|
8 #ifndef BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
|
Chris@16
|
9 #define BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
|
Chris@16
|
10
|
Chris@16
|
11 #include <boost/locale/config.hpp>
|
Chris@16
|
12 #include <boost/locale/boundary/types.hpp>
|
Chris@16
|
13 #include <boost/locale/boundary/facets.hpp>
|
Chris@16
|
14 #include <boost/locale/boundary/segment.hpp>
|
Chris@16
|
15 #include <boost/locale/boundary/boundary_point.hpp>
|
Chris@16
|
16 #include <boost/iterator/iterator_facade.hpp>
|
Chris@16
|
17 #include <boost/type_traits/is_same.hpp>
|
Chris@16
|
18 #include <boost/shared_ptr.hpp>
|
Chris@16
|
19 #include <boost/cstdint.hpp>
|
Chris@16
|
20 #include <boost/assert.hpp>
|
Chris@16
|
21 #ifdef BOOST_MSVC
|
Chris@16
|
22 # pragma warning(push)
|
Chris@16
|
23 # pragma warning(disable : 4275 4251 4231 4660)
|
Chris@16
|
24 #endif
|
Chris@16
|
25 #include <string>
|
Chris@16
|
26 #include <locale>
|
Chris@16
|
27 #include <vector>
|
Chris@16
|
28 #include <iterator>
|
Chris@16
|
29 #include <algorithm>
|
Chris@16
|
30 #include <stdexcept>
|
Chris@16
|
31
|
Chris@16
|
32 #include <iostream>
|
Chris@16
|
33
|
Chris@16
|
34 namespace boost {
|
Chris@16
|
35
|
Chris@16
|
36 namespace locale {
|
Chris@16
|
37
|
Chris@16
|
38 namespace boundary {
|
Chris@16
|
39 ///
|
Chris@16
|
40 /// \defgroup boundary Boundary Analysis
|
Chris@16
|
41 ///
|
Chris@16
|
42 /// This module contains all operations required for %boundary analysis of text: character, word, like and sentence boundaries
|
Chris@16
|
43 ///
|
Chris@16
|
44 /// @{
|
Chris@16
|
45 ///
|
Chris@16
|
46
|
Chris@16
|
47 /// \cond INTERNAL
|
Chris@16
|
48
|
Chris@16
|
49 namespace details {
|
Chris@16
|
50
|
Chris@16
|
51 template<typename IteratorType,typename CategoryType = typename std::iterator_traits<IteratorType>::iterator_category>
|
Chris@16
|
52 struct mapping_traits {
|
Chris@16
|
53 typedef typename std::iterator_traits<IteratorType>::value_type char_type;
|
Chris@16
|
54 static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l)
|
Chris@16
|
55 {
|
Chris@16
|
56 std::basic_string<char_type> str(b,e);
|
Chris@16
|
57 return std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size());
|
Chris@16
|
58 }
|
Chris@16
|
59 };
|
Chris@16
|
60
|
Chris@16
|
61 template<typename CharType,typename SomeIteratorType>
|
Chris@16
|
62 struct linear_iterator_traits {
|
Chris@16
|
63 static const bool is_linear =
|
Chris@16
|
64 is_same<SomeIteratorType,CharType*>::value
|
Chris@16
|
65 || is_same<SomeIteratorType,CharType const*>::value
|
Chris@16
|
66 || is_same<SomeIteratorType,typename std::basic_string<CharType>::iterator>::value
|
Chris@16
|
67 || is_same<SomeIteratorType,typename std::basic_string<CharType>::const_iterator>::value
|
Chris@16
|
68 || is_same<SomeIteratorType,typename std::vector<CharType>::iterator>::value
|
Chris@16
|
69 || is_same<SomeIteratorType,typename std::vector<CharType>::const_iterator>::value
|
Chris@16
|
70 ;
|
Chris@16
|
71 };
|
Chris@16
|
72
|
Chris@16
|
73
|
Chris@16
|
74
|
Chris@16
|
75 template<typename IteratorType>
|
Chris@16
|
76 struct mapping_traits<IteratorType,std::random_access_iterator_tag> {
|
Chris@16
|
77
|
Chris@16
|
78 typedef typename std::iterator_traits<IteratorType>::value_type char_type;
|
Chris@16
|
79
|
Chris@16
|
80
|
Chris@16
|
81
|
Chris@16
|
82 static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l)
|
Chris@16
|
83 {
|
Chris@16
|
84 index_type result;
|
Chris@16
|
85
|
Chris@16
|
86 //
|
Chris@16
|
87 // Optimize for most common cases
|
Chris@16
|
88 //
|
Chris@16
|
89 // C++0x requires that string is continious in memory and all known
|
Chris@16
|
90 // string implementations
|
Chris@16
|
91 // do this because of c_str() support.
|
Chris@16
|
92 //
|
Chris@16
|
93
|
Chris@16
|
94 if(linear_iterator_traits<char_type,IteratorType>::is_linear && b!=e)
|
Chris@16
|
95 {
|
Chris@16
|
96 char_type const *begin = &*b;
|
Chris@16
|
97 char_type const *end = begin + (e-b);
|
Chris@16
|
98 index_type tmp=std::use_facet<boundary_indexing<char_type> >(l).map(t,begin,end);
|
Chris@16
|
99 result.swap(tmp);
|
Chris@16
|
100 }
|
Chris@16
|
101 else {
|
Chris@16
|
102 std::basic_string<char_type> str(b,e);
|
Chris@16
|
103 index_type tmp = std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size());
|
Chris@16
|
104 result.swap(tmp);
|
Chris@16
|
105 }
|
Chris@16
|
106 return result;
|
Chris@16
|
107 }
|
Chris@16
|
108 };
|
Chris@16
|
109
|
Chris@16
|
110 template<typename BaseIterator>
|
Chris@16
|
111 class mapping {
|
Chris@16
|
112 public:
|
Chris@16
|
113 typedef BaseIterator base_iterator;
|
Chris@16
|
114 typedef typename std::iterator_traits<base_iterator>::value_type char_type;
|
Chris@16
|
115
|
Chris@16
|
116
|
Chris@16
|
117 mapping(boundary_type type,
|
Chris@16
|
118 base_iterator begin,
|
Chris@16
|
119 base_iterator end,
|
Chris@16
|
120 std::locale const &loc)
|
Chris@16
|
121 :
|
Chris@16
|
122 index_(new index_type()),
|
Chris@16
|
123 begin_(begin),
|
Chris@16
|
124 end_(end)
|
Chris@16
|
125 {
|
Chris@16
|
126 index_type idx=details::mapping_traits<base_iterator>::map(type,begin,end,loc);
|
Chris@16
|
127 index_->swap(idx);
|
Chris@16
|
128 }
|
Chris@16
|
129
|
Chris@16
|
130 mapping()
|
Chris@16
|
131 {
|
Chris@16
|
132 }
|
Chris@16
|
133
|
Chris@16
|
134 index_type const &index() const
|
Chris@16
|
135 {
|
Chris@16
|
136 return *index_;
|
Chris@16
|
137 }
|
Chris@16
|
138
|
Chris@16
|
139 base_iterator begin() const
|
Chris@16
|
140 {
|
Chris@16
|
141 return begin_;
|
Chris@16
|
142 }
|
Chris@16
|
143
|
Chris@16
|
144 base_iterator end() const
|
Chris@16
|
145 {
|
Chris@16
|
146 return end_;
|
Chris@16
|
147 }
|
Chris@16
|
148
|
Chris@16
|
149 private:
|
Chris@16
|
150 boost::shared_ptr<index_type> index_;
|
Chris@16
|
151 base_iterator begin_,end_;
|
Chris@16
|
152 };
|
Chris@16
|
153
|
Chris@16
|
154 template<typename BaseIterator>
|
Chris@16
|
155 class segment_index_iterator :
|
Chris@16
|
156 public boost::iterator_facade<
|
Chris@16
|
157 segment_index_iterator<BaseIterator>,
|
Chris@16
|
158 segment<BaseIterator>,
|
Chris@16
|
159 boost::bidirectional_traversal_tag,
|
Chris@16
|
160 segment<BaseIterator> const &
|
Chris@16
|
161 >
|
Chris@16
|
162 {
|
Chris@16
|
163 public:
|
Chris@16
|
164 typedef BaseIterator base_iterator;
|
Chris@16
|
165 typedef mapping<base_iterator> mapping_type;
|
Chris@16
|
166 typedef segment<base_iterator> segment_type;
|
Chris@16
|
167
|
Chris@16
|
168 segment_index_iterator() : current_(0,0),map_(0)
|
Chris@16
|
169 {
|
Chris@16
|
170 }
|
Chris@16
|
171
|
Chris@16
|
172 segment_index_iterator(base_iterator p,mapping_type const *map,rule_type mask,bool full_select) :
|
Chris@16
|
173 map_(map),
|
Chris@16
|
174 mask_(mask),
|
Chris@16
|
175 full_select_(full_select)
|
Chris@16
|
176 {
|
Chris@16
|
177 set(p);
|
Chris@16
|
178 }
|
Chris@16
|
179 segment_index_iterator(bool is_begin,mapping_type const *map,rule_type mask,bool full_select) :
|
Chris@16
|
180 map_(map),
|
Chris@16
|
181 mask_(mask),
|
Chris@16
|
182 full_select_(full_select)
|
Chris@16
|
183 {
|
Chris@16
|
184 if(is_begin)
|
Chris@16
|
185 set_begin();
|
Chris@16
|
186 else
|
Chris@16
|
187 set_end();
|
Chris@16
|
188 }
|
Chris@16
|
189
|
Chris@16
|
190 segment_type const &dereference() const
|
Chris@16
|
191 {
|
Chris@16
|
192 return value_;
|
Chris@16
|
193 }
|
Chris@16
|
194
|
Chris@16
|
195 bool equal(segment_index_iterator const &other) const
|
Chris@16
|
196 {
|
Chris@16
|
197 return map_ == other.map_ && current_.second == other.current_.second;
|
Chris@16
|
198 }
|
Chris@16
|
199
|
Chris@16
|
200 void increment()
|
Chris@16
|
201 {
|
Chris@16
|
202 std::pair<size_t,size_t> next = current_;
|
Chris@16
|
203 if(full_select_) {
|
Chris@16
|
204 next.first = next.second;
|
Chris@16
|
205 while(next.second < size()) {
|
Chris@16
|
206 next.second++;
|
Chris@16
|
207 if(valid_offset(next.second))
|
Chris@16
|
208 break;
|
Chris@16
|
209 }
|
Chris@16
|
210 if(next.second == size())
|
Chris@16
|
211 next.first = next.second - 1;
|
Chris@16
|
212 }
|
Chris@16
|
213 else {
|
Chris@16
|
214 while(next.second < size()) {
|
Chris@16
|
215 next.first = next.second;
|
Chris@16
|
216 next.second++;
|
Chris@16
|
217 if(valid_offset(next.second))
|
Chris@16
|
218 break;
|
Chris@16
|
219 }
|
Chris@16
|
220 }
|
Chris@16
|
221 update_current(next);
|
Chris@16
|
222 }
|
Chris@16
|
223
|
Chris@16
|
224 void decrement()
|
Chris@16
|
225 {
|
Chris@16
|
226 std::pair<size_t,size_t> next = current_;
|
Chris@16
|
227 if(full_select_) {
|
Chris@16
|
228 while(next.second >1) {
|
Chris@16
|
229 next.second--;
|
Chris@16
|
230 if(valid_offset(next.second))
|
Chris@16
|
231 break;
|
Chris@16
|
232 }
|
Chris@16
|
233 next.first = next.second;
|
Chris@16
|
234 while(next.first >0) {
|
Chris@16
|
235 next.first--;
|
Chris@16
|
236 if(valid_offset(next.first))
|
Chris@16
|
237 break;
|
Chris@16
|
238 }
|
Chris@16
|
239 }
|
Chris@16
|
240 else {
|
Chris@16
|
241 while(next.second >1) {
|
Chris@16
|
242 next.second--;
|
Chris@16
|
243 if(valid_offset(next.second))
|
Chris@16
|
244 break;
|
Chris@16
|
245 }
|
Chris@16
|
246 next.first = next.second - 1;
|
Chris@16
|
247 }
|
Chris@16
|
248 update_current(next);
|
Chris@16
|
249 }
|
Chris@16
|
250
|
Chris@16
|
251 private:
|
Chris@16
|
252
|
Chris@16
|
253 void set_end()
|
Chris@16
|
254 {
|
Chris@16
|
255 current_.first = size() - 1;
|
Chris@16
|
256 current_.second = size();
|
Chris@16
|
257 value_ = segment_type(map_->end(),map_->end(),0);
|
Chris@16
|
258 }
|
Chris@16
|
259 void set_begin()
|
Chris@16
|
260 {
|
Chris@16
|
261 current_.first = current_.second = 0;
|
Chris@16
|
262 value_ = segment_type(map_->begin(),map_->begin(),0);
|
Chris@16
|
263 increment();
|
Chris@16
|
264 }
|
Chris@16
|
265
|
Chris@16
|
266 void set(base_iterator p)
|
Chris@16
|
267 {
|
Chris@16
|
268 size_t dist=std::distance(map_->begin(),p);
|
Chris@16
|
269 index_type::const_iterator b=map_->index().begin(),e=map_->index().end();
|
Chris@16
|
270 index_type::const_iterator
|
Chris@16
|
271 boundary_point=std::upper_bound(b,e,break_info(dist));
|
Chris@16
|
272 while(boundary_point != e && (boundary_point->rule & mask_)==0)
|
Chris@16
|
273 boundary_point++;
|
Chris@16
|
274
|
Chris@16
|
275 current_.first = current_.second = boundary_point - b;
|
Chris@16
|
276
|
Chris@16
|
277 if(full_select_) {
|
Chris@16
|
278 while(current_.first > 0) {
|
Chris@16
|
279 current_.first --;
|
Chris@16
|
280 if(valid_offset(current_.first))
|
Chris@16
|
281 break;
|
Chris@16
|
282 }
|
Chris@16
|
283 }
|
Chris@16
|
284 else {
|
Chris@16
|
285 if(current_.first > 0)
|
Chris@16
|
286 current_.first --;
|
Chris@16
|
287 }
|
Chris@16
|
288 value_.first = map_->begin();
|
Chris@16
|
289 std::advance(value_.first,get_offset(current_.first));
|
Chris@16
|
290 value_.second = value_.first;
|
Chris@16
|
291 std::advance(value_.second,get_offset(current_.second) - get_offset(current_.first));
|
Chris@16
|
292
|
Chris@16
|
293 update_rule();
|
Chris@16
|
294 }
|
Chris@16
|
295
|
Chris@16
|
296 void update_current(std::pair<size_t,size_t> pos)
|
Chris@16
|
297 {
|
Chris@16
|
298 std::ptrdiff_t first_diff = get_offset(pos.first) - get_offset(current_.first);
|
Chris@16
|
299 std::ptrdiff_t second_diff = get_offset(pos.second) - get_offset(current_.second);
|
Chris@16
|
300 std::advance(value_.first,first_diff);
|
Chris@16
|
301 std::advance(value_.second,second_diff);
|
Chris@16
|
302 current_ = pos;
|
Chris@16
|
303 update_rule();
|
Chris@16
|
304 }
|
Chris@16
|
305
|
Chris@16
|
306 void update_rule()
|
Chris@16
|
307 {
|
Chris@16
|
308 if(current_.second != size()) {
|
Chris@16
|
309 value_.rule(index()[current_.second].rule);
|
Chris@16
|
310 }
|
Chris@16
|
311 }
|
Chris@16
|
312 size_t get_offset(size_t ind) const
|
Chris@16
|
313 {
|
Chris@16
|
314 if(ind == size())
|
Chris@16
|
315 return index().back().offset;
|
Chris@16
|
316 return index()[ind].offset;
|
Chris@16
|
317 }
|
Chris@16
|
318
|
Chris@16
|
319 bool valid_offset(size_t offset) const
|
Chris@16
|
320 {
|
Chris@16
|
321 return offset == 0
|
Chris@16
|
322 || offset == size() // make sure we not acess index[size]
|
Chris@16
|
323 || (index()[offset].rule & mask_)!=0;
|
Chris@16
|
324 }
|
Chris@16
|
325
|
Chris@16
|
326 size_t size() const
|
Chris@16
|
327 {
|
Chris@16
|
328 return index().size();
|
Chris@16
|
329 }
|
Chris@16
|
330
|
Chris@16
|
331 index_type const &index() const
|
Chris@16
|
332 {
|
Chris@16
|
333 return map_->index();
|
Chris@16
|
334 }
|
Chris@16
|
335
|
Chris@16
|
336
|
Chris@16
|
337 segment_type value_;
|
Chris@16
|
338 std::pair<size_t,size_t> current_;
|
Chris@16
|
339 mapping_type const *map_;
|
Chris@16
|
340 rule_type mask_;
|
Chris@16
|
341 bool full_select_;
|
Chris@16
|
342 };
|
Chris@16
|
343
|
Chris@16
|
344 template<typename BaseIterator>
|
Chris@16
|
345 class boundary_point_index_iterator :
|
Chris@16
|
346 public boost::iterator_facade<
|
Chris@16
|
347 boundary_point_index_iterator<BaseIterator>,
|
Chris@16
|
348 boundary_point<BaseIterator>,
|
Chris@16
|
349 boost::bidirectional_traversal_tag,
|
Chris@16
|
350 boundary_point<BaseIterator> const &
|
Chris@16
|
351 >
|
Chris@16
|
352 {
|
Chris@16
|
353 public:
|
Chris@16
|
354 typedef BaseIterator base_iterator;
|
Chris@16
|
355 typedef mapping<base_iterator> mapping_type;
|
Chris@16
|
356 typedef boundary_point<base_iterator> boundary_point_type;
|
Chris@16
|
357
|
Chris@16
|
358 boundary_point_index_iterator() : current_(0),map_(0)
|
Chris@16
|
359 {
|
Chris@16
|
360 }
|
Chris@16
|
361
|
Chris@16
|
362 boundary_point_index_iterator(bool is_begin,mapping_type const *map,rule_type mask) :
|
Chris@16
|
363 map_(map),
|
Chris@16
|
364 mask_(mask)
|
Chris@16
|
365 {
|
Chris@16
|
366 if(is_begin)
|
Chris@16
|
367 set_begin();
|
Chris@16
|
368 else
|
Chris@16
|
369 set_end();
|
Chris@16
|
370 }
|
Chris@16
|
371 boundary_point_index_iterator(base_iterator p,mapping_type const *map,rule_type mask) :
|
Chris@16
|
372 map_(map),
|
Chris@16
|
373 mask_(mask)
|
Chris@16
|
374 {
|
Chris@16
|
375 set(p);
|
Chris@16
|
376 }
|
Chris@16
|
377
|
Chris@16
|
378 boundary_point_type const &dereference() const
|
Chris@16
|
379 {
|
Chris@16
|
380 return value_;
|
Chris@16
|
381 }
|
Chris@16
|
382
|
Chris@16
|
383 bool equal(boundary_point_index_iterator const &other) const
|
Chris@16
|
384 {
|
Chris@16
|
385 return map_ == other.map_ && current_ == other.current_;
|
Chris@16
|
386 }
|
Chris@16
|
387
|
Chris@16
|
388 void increment()
|
Chris@16
|
389 {
|
Chris@16
|
390 size_t next = current_;
|
Chris@16
|
391 while(next < size()) {
|
Chris@16
|
392 next++;
|
Chris@16
|
393 if(valid_offset(next))
|
Chris@16
|
394 break;
|
Chris@16
|
395 }
|
Chris@16
|
396 update_current(next);
|
Chris@16
|
397 }
|
Chris@16
|
398
|
Chris@16
|
399 void decrement()
|
Chris@16
|
400 {
|
Chris@16
|
401 size_t next = current_;
|
Chris@16
|
402 while(next>0) {
|
Chris@16
|
403 next--;
|
Chris@16
|
404 if(valid_offset(next))
|
Chris@16
|
405 break;
|
Chris@16
|
406 }
|
Chris@16
|
407 update_current(next);
|
Chris@16
|
408 }
|
Chris@16
|
409
|
Chris@16
|
410 private:
|
Chris@16
|
411 void set_end()
|
Chris@16
|
412 {
|
Chris@16
|
413 current_ = size();
|
Chris@16
|
414 value_ = boundary_point_type(map_->end(),0);
|
Chris@16
|
415 }
|
Chris@16
|
416 void set_begin()
|
Chris@16
|
417 {
|
Chris@16
|
418 current_ = 0;
|
Chris@16
|
419 value_ = boundary_point_type(map_->begin(),0);
|
Chris@16
|
420 }
|
Chris@16
|
421
|
Chris@16
|
422 void set(base_iterator p)
|
Chris@16
|
423 {
|
Chris@16
|
424 size_t dist = std::distance(map_->begin(),p);
|
Chris@16
|
425
|
Chris@16
|
426 index_type::const_iterator b=index().begin();
|
Chris@16
|
427 index_type::const_iterator e=index().end();
|
Chris@16
|
428 index_type::const_iterator ptr = std::lower_bound(b,e,break_info(dist));
|
Chris@16
|
429
|
Chris@16
|
430 if(ptr==index().end())
|
Chris@16
|
431 current_=size()-1;
|
Chris@16
|
432 else
|
Chris@16
|
433 current_=ptr - index().begin();
|
Chris@16
|
434
|
Chris@16
|
435 while(!valid_offset(current_))
|
Chris@16
|
436 current_ ++;
|
Chris@16
|
437
|
Chris@16
|
438 std::ptrdiff_t diff = get_offset(current_) - dist;
|
Chris@16
|
439 std::advance(p,diff);
|
Chris@16
|
440 value_.iterator(p);
|
Chris@16
|
441 update_rule();
|
Chris@16
|
442 }
|
Chris@16
|
443
|
Chris@16
|
444 void update_current(size_t pos)
|
Chris@16
|
445 {
|
Chris@16
|
446 std::ptrdiff_t diff = get_offset(pos) - get_offset(current_);
|
Chris@16
|
447 base_iterator i=value_.iterator();
|
Chris@16
|
448 std::advance(i,diff);
|
Chris@16
|
449 current_ = pos;
|
Chris@16
|
450 value_.iterator(i);
|
Chris@16
|
451 update_rule();
|
Chris@16
|
452 }
|
Chris@16
|
453
|
Chris@16
|
454 void update_rule()
|
Chris@16
|
455 {
|
Chris@16
|
456 if(current_ != size()) {
|
Chris@16
|
457 value_.rule(index()[current_].rule);
|
Chris@16
|
458 }
|
Chris@16
|
459 }
|
Chris@16
|
460 size_t get_offset(size_t ind) const
|
Chris@16
|
461 {
|
Chris@16
|
462 if(ind == size())
|
Chris@16
|
463 return index().back().offset;
|
Chris@16
|
464 return index()[ind].offset;
|
Chris@16
|
465 }
|
Chris@16
|
466
|
Chris@16
|
467 bool valid_offset(size_t offset) const
|
Chris@16
|
468 {
|
Chris@16
|
469 return offset == 0
|
Chris@16
|
470 || offset + 1 >= size() // last and first are always valid regardless of mark
|
Chris@16
|
471 || (index()[offset].rule & mask_)!=0;
|
Chris@16
|
472 }
|
Chris@16
|
473
|
Chris@16
|
474 size_t size() const
|
Chris@16
|
475 {
|
Chris@16
|
476 return index().size();
|
Chris@16
|
477 }
|
Chris@16
|
478
|
Chris@16
|
479 index_type const &index() const
|
Chris@16
|
480 {
|
Chris@16
|
481 return map_->index();
|
Chris@16
|
482 }
|
Chris@16
|
483
|
Chris@16
|
484
|
Chris@16
|
485 boundary_point_type value_;
|
Chris@16
|
486 size_t current_;
|
Chris@16
|
487 mapping_type const *map_;
|
Chris@16
|
488 rule_type mask_;
|
Chris@16
|
489 };
|
Chris@16
|
490
|
Chris@16
|
491
|
Chris@16
|
492 } // details
|
Chris@16
|
493
|
Chris@16
|
494 /// \endcond
|
Chris@16
|
495
|
Chris@16
|
496 template<typename BaseIterator>
|
Chris@16
|
497 class segment_index;
|
Chris@16
|
498
|
Chris@16
|
499 template<typename BaseIterator>
|
Chris@16
|
500 class boundary_point_index;
|
Chris@16
|
501
|
Chris@16
|
502
|
Chris@16
|
503 ///
|
Chris@16
|
504 /// \brief This class holds an index of segments in the text range and allows to iterate over them
|
Chris@16
|
505 ///
|
Chris@16
|
506 /// This class is provides \ref begin() and \ref end() member functions that return bidirectional iterators
|
Chris@16
|
507 /// to the \ref segment objects.
|
Chris@16
|
508 ///
|
Chris@16
|
509 /// It provides two options on way of selecting segments:
|
Chris@16
|
510 ///
|
Chris@16
|
511 /// - \ref rule(rule_type mask) - a mask that allows to select only specific types of segments according to
|
Chris@16
|
512 /// various masks %as \ref word_any.
|
Chris@16
|
513 /// \n
|
Chris@16
|
514 /// The default is to select any types of boundaries.
|
Chris@16
|
515 /// \n
|
Chris@16
|
516 /// For example: using word %boundary analysis, when the provided mask is \ref word_kana then the iterators
|
Chris@16
|
517 /// would iterate only over the words containing Kana letters and \ref word_any would select all types of
|
Chris@16
|
518 /// words excluding ranges that consist of white space and punctuation marks. So iterating over the text
|
Chris@16
|
519 /// "to be or not to be?" with \ref word_any rule would return segments "to", "be", "or", "not", "to", "be", instead
|
Chris@16
|
520 /// of default "to", " ", "be", " ", "or", " ", "not", " ", "to", " ", "be", "?".
|
Chris@16
|
521 /// - \ref full_select(bool how) - a flag that defines the way a range is selected if the rule of the previous
|
Chris@16
|
522 /// %boundary point does not fit the selected rule.
|
Chris@16
|
523 /// \n
|
Chris@16
|
524 /// For example: We want to fetch all sentences from the following text: "Hello! How\nare you?".
|
Chris@16
|
525 /// \n
|
Chris@16
|
526 /// This text contains three %boundary points separating it to sentences by different rules:
|
Chris@16
|
527 /// - The exclamation mark "!" ends the sentence "Hello!"
|
Chris@16
|
528 /// - The line feed that splits the sentence "How\nare you?" into two parts.
|
Chris@16
|
529 /// - The question mark that ends the second sentence.
|
Chris@16
|
530 /// \n
|
Chris@16
|
531 /// If you would only change the \ref rule() to \ref sentence_term then the segment_index would
|
Chris@16
|
532 /// provide two sentences "Hello!" and "are you?" %as only them actually terminated with required
|
Chris@16
|
533 /// terminator "!" or "?". But changing \ref full_select() to true, the selected segment would include
|
Chris@16
|
534 /// all the text up to previous valid %boundary point and would return two expected sentences:
|
Chris@16
|
535 /// "Hello!" and "How\nare you?".
|
Chris@16
|
536 ///
|
Chris@16
|
537 /// This class allows to find a segment according to the given iterator in range using \ref find() member
|
Chris@16
|
538 /// function.
|
Chris@16
|
539 ///
|
Chris@16
|
540 /// \note
|
Chris@16
|
541 ///
|
Chris@16
|
542 /// - Changing any of the options - \ref rule() or \ref full_select() and of course re-indexing the text
|
Chris@16
|
543 /// invalidates existing iterators and they can't be used any more.
|
Chris@16
|
544 /// - segment_index can be created from boundary_point_index or other segment_index that was created with
|
Chris@16
|
545 /// same \ref boundary_type. This is very fast operation %as they shared same index
|
Chris@16
|
546 /// and it does not require its regeneration.
|
Chris@16
|
547 ///
|
Chris@16
|
548 /// \see
|
Chris@16
|
549 ///
|
Chris@16
|
550 /// - \ref boundary_point_index
|
Chris@16
|
551 /// - \ref segment
|
Chris@16
|
552 /// - \ref boundary_point
|
Chris@16
|
553 ///
|
Chris@16
|
554
|
Chris@16
|
555 template<typename BaseIterator>
|
Chris@16
|
556 class segment_index {
|
Chris@16
|
557 public:
|
Chris@16
|
558
|
Chris@16
|
559 ///
|
Chris@16
|
560 /// The type of the iterator used to iterate over the original text
|
Chris@16
|
561 ///
|
Chris@16
|
562 typedef BaseIterator base_iterator;
|
Chris@16
|
563 #ifdef BOOST_LOCALE_DOXYGEN
|
Chris@16
|
564 ///
|
Chris@16
|
565 /// The bidirectional iterator that iterates over \ref value_type objects.
|
Chris@16
|
566 ///
|
Chris@16
|
567 /// - The iterators may be invalidated by use of any non-const member function
|
Chris@16
|
568 /// including but not limited to \ref rule(rule_type) and \ref full_select(bool).
|
Chris@16
|
569 /// - The returned value_type object is valid %as long %as iterator points to it.
|
Chris@16
|
570 /// So this following code is wrong %as t used after p was updated:
|
Chris@16
|
571 /// \code
|
Chris@16
|
572 /// segment_index<some_iterator>::iterator p=index.begin();
|
Chris@16
|
573 /// segment<some_iterator> &t = *p;
|
Chris@16
|
574 /// ++p;
|
Chris@16
|
575 /// cout << t.str() << endl;
|
Chris@16
|
576 /// \endcode
|
Chris@16
|
577 ///
|
Chris@16
|
578 typedef unspecified_iterator_type iterator;
|
Chris@16
|
579 ///
|
Chris@16
|
580 /// \copydoc iterator
|
Chris@16
|
581 ///
|
Chris@16
|
582 typedef unspecified_iterator_type const_iterator;
|
Chris@16
|
583 #else
|
Chris@16
|
584 typedef details::segment_index_iterator<base_iterator> iterator;
|
Chris@16
|
585 typedef details::segment_index_iterator<base_iterator> const_iterator;
|
Chris@16
|
586 #endif
|
Chris@16
|
587 ///
|
Chris@16
|
588 /// The type dereferenced by the \ref iterator and \ref const_iterator. It is
|
Chris@16
|
589 /// an object that represents selected segment.
|
Chris@16
|
590 ///
|
Chris@16
|
591 typedef segment<base_iterator> value_type;
|
Chris@16
|
592
|
Chris@16
|
593 ///
|
Chris@16
|
594 /// Default constructor.
|
Chris@16
|
595 ///
|
Chris@16
|
596 /// \note
|
Chris@16
|
597 ///
|
Chris@16
|
598 /// When this object is constructed by default it does not include a valid index, thus
|
Chris@16
|
599 /// calling \ref begin(), \ref end() or \ref find() member functions would lead to undefined
|
Chris@16
|
600 /// behavior
|
Chris@16
|
601 ///
|
Chris@16
|
602 segment_index() : mask_(0xFFFFFFFFu),full_select_(false)
|
Chris@16
|
603 {
|
Chris@16
|
604 }
|
Chris@16
|
605 ///
|
Chris@16
|
606 /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
|
Chris@16
|
607 /// in range [begin,end) using a rule \a mask for locale \a loc.
|
Chris@16
|
608 ///
|
Chris@16
|
609 segment_index(boundary_type type,
|
Chris@16
|
610 base_iterator begin,
|
Chris@16
|
611 base_iterator end,
|
Chris@16
|
612 rule_type mask,
|
Chris@16
|
613 std::locale const &loc=std::locale())
|
Chris@16
|
614 :
|
Chris@16
|
615 map_(type,begin,end,loc),
|
Chris@16
|
616 mask_(mask),
|
Chris@16
|
617 full_select_(false)
|
Chris@16
|
618 {
|
Chris@16
|
619 }
|
Chris@16
|
620 ///
|
Chris@16
|
621 /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
|
Chris@16
|
622 /// in range [begin,end) selecting all possible segments (full mask) for locale \a loc.
|
Chris@16
|
623 ///
|
Chris@16
|
624 segment_index(boundary_type type,
|
Chris@16
|
625 base_iterator begin,
|
Chris@16
|
626 base_iterator end,
|
Chris@16
|
627 std::locale const &loc=std::locale())
|
Chris@16
|
628 :
|
Chris@16
|
629 map_(type,begin,end,loc),
|
Chris@16
|
630 mask_(0xFFFFFFFFu),
|
Chris@16
|
631 full_select_(false)
|
Chris@16
|
632 {
|
Chris@16
|
633 }
|
Chris@16
|
634
|
Chris@16
|
635 ///
|
Chris@16
|
636 /// Create a segment_index from a \ref boundary_point_index. It copies all indexing information
|
Chris@16
|
637 /// and used default rule (all possible segments)
|
Chris@16
|
638 ///
|
Chris@16
|
639 /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
|
Chris@16
|
640 /// range it is much better to create one from another rather then indexing the same
|
Chris@16
|
641 /// range twice.
|
Chris@16
|
642 ///
|
Chris@16
|
643 /// \note \ref rule() flags are not copied
|
Chris@16
|
644 ///
|
Chris@16
|
645 segment_index(boundary_point_index<base_iterator> const &);
|
Chris@16
|
646 ///
|
Chris@16
|
647 /// Copy an index from a \ref boundary_point_index. It copies all indexing information
|
Chris@16
|
648 /// and uses the default rule (all possible segments)
|
Chris@16
|
649 ///
|
Chris@16
|
650 /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
|
Chris@16
|
651 /// range it is much better to create one from another rather then indexing the same
|
Chris@16
|
652 /// range twice.
|
Chris@16
|
653 ///
|
Chris@16
|
654 /// \note \ref rule() flags are not copied
|
Chris@16
|
655 ///
|
Chris@16
|
656 segment_index const &operator = (boundary_point_index<base_iterator> const &);
|
Chris@16
|
657
|
Chris@16
|
658
|
Chris@16
|
659 ///
|
Chris@16
|
660 /// Create a new index for %boundary analysis \ref boundary_type "type" of the text
|
Chris@16
|
661 /// in range [begin,end) for locale \a loc.
|
Chris@16
|
662 ///
|
Chris@16
|
663 /// \note \ref rule() and \ref full_select() remain unchanged.
|
Chris@16
|
664 ///
|
Chris@16
|
665 void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale())
|
Chris@16
|
666 {
|
Chris@16
|
667 map_ = mapping_type(type,begin,end,loc);
|
Chris@16
|
668 }
|
Chris@16
|
669
|
Chris@16
|
670 ///
|
Chris@16
|
671 /// Get the \ref iterator on the beginning of the segments range.
|
Chris@16
|
672 ///
|
Chris@16
|
673 /// Preconditions: the segment_index should have a mapping
|
Chris@16
|
674 ///
|
Chris@16
|
675 /// \note
|
Chris@16
|
676 ///
|
Chris@16
|
677 /// The returned iterator is invalidated by access to any non-const member functions of this object
|
Chris@16
|
678 ///
|
Chris@16
|
679 iterator begin() const
|
Chris@16
|
680 {
|
Chris@16
|
681 return iterator(true,&map_,mask_,full_select_);
|
Chris@16
|
682 }
|
Chris@16
|
683
|
Chris@16
|
684 ///
|
Chris@16
|
685 /// Get the \ref iterator on the ending of the segments range.
|
Chris@16
|
686 ///
|
Chris@16
|
687 /// Preconditions: the segment_index should have a mapping
|
Chris@16
|
688 ///
|
Chris@16
|
689 /// The returned iterator is invalidated by access to any non-const member functions of this object
|
Chris@16
|
690 ///
|
Chris@16
|
691 iterator end() const
|
Chris@16
|
692 {
|
Chris@16
|
693 return iterator(false,&map_,mask_,full_select_);
|
Chris@16
|
694 }
|
Chris@16
|
695
|
Chris@16
|
696 ///
|
Chris@16
|
697 /// Find a first valid segment following a position \a p.
|
Chris@16
|
698 ///
|
Chris@16
|
699 /// If \a p is inside a valid segment this segment is selected:
|
Chris@16
|
700 ///
|
Chris@16
|
701 /// For example: For \ref word %boundary analysis with \ref word_any rule():
|
Chris@16
|
702 ///
|
Chris@16
|
703 /// - "to| be or ", would point to "be",
|
Chris@16
|
704 /// - "t|o be or ", would point to "to",
|
Chris@16
|
705 /// - "to be or| ", would point to end.
|
Chris@16
|
706 ///
|
Chris@16
|
707 ///
|
Chris@16
|
708 /// Preconditions: the segment_index should have a mapping and \a p should be valid iterator
|
Chris@16
|
709 /// to the text in the mapped range.
|
Chris@16
|
710 ///
|
Chris@16
|
711 /// The returned iterator is invalidated by access to any non-const member functions of this object
|
Chris@16
|
712 ///
|
Chris@16
|
713 iterator find(base_iterator p) const
|
Chris@16
|
714 {
|
Chris@16
|
715 return iterator(p,&map_,mask_,full_select_);
|
Chris@16
|
716 }
|
Chris@16
|
717
|
Chris@16
|
718 ///
|
Chris@16
|
719 /// Get the mask of rules that are used
|
Chris@16
|
720 ///
|
Chris@16
|
721 rule_type rule() const
|
Chris@16
|
722 {
|
Chris@16
|
723 return mask_;
|
Chris@16
|
724 }
|
Chris@16
|
725 ///
|
Chris@16
|
726 /// Set the mask of rules that are used
|
Chris@16
|
727 ///
|
Chris@16
|
728 void rule(rule_type v)
|
Chris@16
|
729 {
|
Chris@16
|
730 mask_ = v;
|
Chris@16
|
731 }
|
Chris@16
|
732
|
Chris@16
|
733 ///
|
Chris@16
|
734 /// Get the full_select property value - should segment include in the range
|
Chris@16
|
735 /// values that not belong to specific \ref rule() or not.
|
Chris@16
|
736 ///
|
Chris@16
|
737 /// The default value is false.
|
Chris@16
|
738 ///
|
Chris@16
|
739 /// For example for \ref sentence %boundary with rule \ref sentence_term the segments
|
Chris@16
|
740 /// of text "Hello! How\nare you?" are "Hello!\", "are you?" when full_select() is false
|
Chris@16
|
741 /// because "How\n" is selected %as sentence by a rule spits the text by line feed. If full_select()
|
Chris@16
|
742 /// is true the returned segments are "Hello! ", "How\nare you?" where "How\n" is joined with the
|
Chris@16
|
743 /// following part "are you?"
|
Chris@16
|
744 ///
|
Chris@16
|
745
|
Chris@16
|
746 bool full_select() const
|
Chris@16
|
747 {
|
Chris@16
|
748 return full_select_;
|
Chris@16
|
749 }
|
Chris@16
|
750
|
Chris@16
|
751 ///
|
Chris@16
|
752 /// Set the full_select property value - should segment include in the range
|
Chris@16
|
753 /// values that not belong to specific \ref rule() or not.
|
Chris@16
|
754 ///
|
Chris@16
|
755 /// The default value is false.
|
Chris@16
|
756 ///
|
Chris@16
|
757 /// For example for \ref sentence %boundary with rule \ref sentence_term the segments
|
Chris@16
|
758 /// of text "Hello! How\nare you?" are "Hello!\", "are you?" when full_select() is false
|
Chris@16
|
759 /// because "How\n" is selected %as sentence by a rule spits the text by line feed. If full_select()
|
Chris@16
|
760 /// is true the returned segments are "Hello! ", "How\nare you?" where "How\n" is joined with the
|
Chris@16
|
761 /// following part "are you?"
|
Chris@16
|
762 ///
|
Chris@16
|
763
|
Chris@16
|
764 void full_select(bool v)
|
Chris@16
|
765 {
|
Chris@16
|
766 full_select_ = v;
|
Chris@16
|
767 }
|
Chris@16
|
768
|
Chris@16
|
769 private:
|
Chris@16
|
770 friend class boundary_point_index<base_iterator>;
|
Chris@16
|
771 typedef details::mapping<base_iterator> mapping_type;
|
Chris@16
|
772 mapping_type map_;
|
Chris@16
|
773 rule_type mask_;
|
Chris@16
|
774 bool full_select_;
|
Chris@16
|
775 };
|
Chris@16
|
776
|
Chris@16
|
777 ///
|
Chris@16
|
778 /// \brief This class holds an index of \ref boundary_point "boundary points" and allows iterating
|
Chris@16
|
779 /// over them.
|
Chris@16
|
780 ///
|
Chris@16
|
781 /// This class is provides \ref begin() and \ref end() member functions that return bidirectional iterators
|
Chris@16
|
782 /// to the \ref boundary_point objects.
|
Chris@16
|
783 ///
|
Chris@16
|
784 /// It provides an option that affects selecting %boundary points according to different rules:
|
Chris@16
|
785 /// using \ref rule(rule_type mask) member function. It allows to set a mask that select only specific
|
Chris@16
|
786 /// types of %boundary points like \ref sentence_term.
|
Chris@16
|
787 ///
|
Chris@16
|
788 /// For example for a sentence %boundary analysis of a text "Hello! How\nare you?" when the default
|
Chris@16
|
789 /// rule is used the %boundary points would be:
|
Chris@16
|
790 ///
|
Chris@16
|
791 /// - "|Hello! How\nare you?"
|
Chris@16
|
792 /// - "Hello! |How\nare you?"
|
Chris@16
|
793 /// - "Hello! How\n|are you?"
|
Chris@16
|
794 /// - "Hello! How\nare you?|"
|
Chris@16
|
795 ///
|
Chris@16
|
796 /// However if \ref rule() is set to \ref sentence_term then the selected %boundary points would be:
|
Chris@16
|
797 ///
|
Chris@16
|
798 /// - "|Hello! How\nare you?"
|
Chris@16
|
799 /// - "Hello! |How\nare you?"
|
Chris@16
|
800 /// - "Hello! How\nare you?|"
|
Chris@16
|
801 ///
|
Chris@16
|
802 /// Such that a %boundary point defined by a line feed character would be ignored.
|
Chris@16
|
803 ///
|
Chris@16
|
804 /// This class allows to find a boundary_point according to the given iterator in range using \ref find() member
|
Chris@16
|
805 /// function.
|
Chris@16
|
806 ///
|
Chris@16
|
807 /// \note
|
Chris@16
|
808 /// - Even an empty text range [x,x) considered to have a one %boundary point x.
|
Chris@16
|
809 /// - \a a and \a b points of the range [a,b) are always considered %boundary points
|
Chris@16
|
810 /// regardless the rules used.
|
Chris@16
|
811 /// - Changing any of the option \ref rule() or course re-indexing the text
|
Chris@16
|
812 /// invalidates existing iterators and they can't be used any more.
|
Chris@16
|
813 /// - boundary_point_index can be created from segment_index or other boundary_point_index that was created with
|
Chris@16
|
814 /// same \ref boundary_type. This is very fast operation %as they shared same index
|
Chris@16
|
815 /// and it does not require its regeneration.
|
Chris@16
|
816 ///
|
Chris@16
|
817 /// \see
|
Chris@16
|
818 ///
|
Chris@16
|
819 /// - \ref segment_index
|
Chris@16
|
820 /// - \ref boundary_point
|
Chris@16
|
821 /// - \ref segment
|
Chris@16
|
822 ///
|
Chris@16
|
823
|
Chris@16
|
824
|
Chris@16
|
825 template<typename BaseIterator>
|
Chris@16
|
826 class boundary_point_index {
|
Chris@16
|
827 public:
|
Chris@16
|
828 ///
|
Chris@16
|
829 /// The type of the iterator used to iterate over the original text
|
Chris@16
|
830 ///
|
Chris@16
|
831 typedef BaseIterator base_iterator;
|
Chris@16
|
832 #ifdef BOOST_LOCALE_DOXYGEN
|
Chris@16
|
833 ///
|
Chris@16
|
834 /// The bidirectional iterator that iterates over \ref value_type objects.
|
Chris@16
|
835 ///
|
Chris@16
|
836 /// - The iterators may be invalidated by use of any non-const member function
|
Chris@16
|
837 /// including but not limited to \ref rule(rule_type) member function.
|
Chris@16
|
838 /// - The returned value_type object is valid %as long %as iterator points to it.
|
Chris@16
|
839 /// So this following code is wrong %as t used after p was updated:
|
Chris@16
|
840 /// \code
|
Chris@16
|
841 /// boundary_point_index<some_iterator>::iterator p=index.begin();
|
Chris@16
|
842 /// boundary_point<some_iterator> &t = *p;
|
Chris@16
|
843 /// ++p;
|
Chris@16
|
844 /// rule_type r = t->rule();
|
Chris@16
|
845 /// \endcode
|
Chris@16
|
846 ///
|
Chris@16
|
847 typedef unspecified_iterator_type iterator;
|
Chris@16
|
848 ///
|
Chris@16
|
849 /// \copydoc iterator
|
Chris@16
|
850 ///
|
Chris@16
|
851 typedef unspecified_iterator_type const_iterator;
|
Chris@16
|
852 #else
|
Chris@16
|
853 typedef details::boundary_point_index_iterator<base_iterator> iterator;
|
Chris@16
|
854 typedef details::boundary_point_index_iterator<base_iterator> const_iterator;
|
Chris@16
|
855 #endif
|
Chris@16
|
856 ///
|
Chris@16
|
857 /// The type dereferenced by the \ref iterator and \ref const_iterator. It is
|
Chris@16
|
858 /// an object that represents the selected \ref boundary_point "boundary point".
|
Chris@16
|
859 ///
|
Chris@16
|
860 typedef boundary_point<base_iterator> value_type;
|
Chris@16
|
861
|
Chris@16
|
862 ///
|
Chris@16
|
863 /// Default constructor.
|
Chris@16
|
864 ///
|
Chris@16
|
865 /// \note
|
Chris@16
|
866 ///
|
Chris@16
|
867 /// When this object is constructed by default it does not include a valid index, thus
|
Chris@16
|
868 /// calling \ref begin(), \ref end() or \ref find() member functions would lead to undefined
|
Chris@16
|
869 /// behavior
|
Chris@16
|
870 ///
|
Chris@16
|
871 boundary_point_index() : mask_(0xFFFFFFFFu)
|
Chris@16
|
872 {
|
Chris@16
|
873 }
|
Chris@16
|
874
|
Chris@16
|
875 ///
|
Chris@16
|
876 /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
|
Chris@16
|
877 /// in range [begin,end) using a rule \a mask for locale \a loc.
|
Chris@16
|
878 ///
|
Chris@16
|
879 boundary_point_index(boundary_type type,
|
Chris@16
|
880 base_iterator begin,
|
Chris@16
|
881 base_iterator end,
|
Chris@16
|
882 rule_type mask,
|
Chris@16
|
883 std::locale const &loc=std::locale())
|
Chris@16
|
884 :
|
Chris@16
|
885 map_(type,begin,end,loc),
|
Chris@16
|
886 mask_(mask)
|
Chris@16
|
887 {
|
Chris@16
|
888 }
|
Chris@16
|
889 ///
|
Chris@16
|
890 /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
|
Chris@16
|
891 /// in range [begin,end) selecting all possible %boundary points (full mask) for locale \a loc.
|
Chris@16
|
892 ///
|
Chris@16
|
893 boundary_point_index(boundary_type type,
|
Chris@16
|
894 base_iterator begin,
|
Chris@16
|
895 base_iterator end,
|
Chris@16
|
896 std::locale const &loc=std::locale())
|
Chris@16
|
897 :
|
Chris@16
|
898 map_(type,begin,end,loc),
|
Chris@16
|
899 mask_(0xFFFFFFFFu)
|
Chris@16
|
900 {
|
Chris@16
|
901 }
|
Chris@16
|
902
|
Chris@16
|
903 ///
|
Chris@16
|
904 /// Create a boundary_point_index from a \ref segment_index. It copies all indexing information
|
Chris@16
|
905 /// and uses the default rule (all possible %boundary points)
|
Chris@16
|
906 ///
|
Chris@16
|
907 /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
|
Chris@16
|
908 /// range it is much better to create one from another rather then indexing the same
|
Chris@16
|
909 /// range twice.
|
Chris@16
|
910 ///
|
Chris@16
|
911 /// \note \ref rule() flags are not copied
|
Chris@16
|
912 ///
|
Chris@16
|
913 boundary_point_index(segment_index<base_iterator> const &other);
|
Chris@16
|
914 ///
|
Chris@16
|
915 /// Copy a boundary_point_index from a \ref segment_index. It copies all indexing information
|
Chris@16
|
916 /// and keeps the current \ref rule() unchanged
|
Chris@16
|
917 ///
|
Chris@16
|
918 /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
|
Chris@16
|
919 /// range it is much better to create one from another rather then indexing the same
|
Chris@16
|
920 /// range twice.
|
Chris@16
|
921 ///
|
Chris@16
|
922 /// \note \ref rule() flags are not copied
|
Chris@16
|
923 ///
|
Chris@16
|
924 boundary_point_index const &operator=(segment_index<base_iterator> const &other);
|
Chris@16
|
925
|
Chris@16
|
926 ///
|
Chris@16
|
927 /// Create a new index for %boundary analysis \ref boundary_type "type" of the text
|
Chris@16
|
928 /// in range [begin,end) for locale \a loc.
|
Chris@16
|
929 ///
|
Chris@16
|
930 /// \note \ref rule() remains unchanged.
|
Chris@16
|
931 ///
|
Chris@16
|
932 void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale())
|
Chris@16
|
933 {
|
Chris@16
|
934 map_ = mapping_type(type,begin,end,loc);
|
Chris@16
|
935 }
|
Chris@16
|
936
|
Chris@16
|
937 ///
|
Chris@16
|
938 /// Get the \ref iterator on the beginning of the %boundary points range.
|
Chris@16
|
939 ///
|
Chris@16
|
940 /// Preconditions: this boundary_point_index should have a mapping
|
Chris@16
|
941 ///
|
Chris@16
|
942 /// \note
|
Chris@16
|
943 ///
|
Chris@16
|
944 /// The returned iterator is invalidated by access to any non-const member functions of this object
|
Chris@16
|
945 ///
|
Chris@16
|
946 iterator begin() const
|
Chris@16
|
947 {
|
Chris@16
|
948 return iterator(true,&map_,mask_);
|
Chris@16
|
949 }
|
Chris@16
|
950
|
Chris@16
|
951 ///
|
Chris@16
|
952 /// Get the \ref iterator on the ending of the %boundary points range.
|
Chris@16
|
953 ///
|
Chris@16
|
954 /// Preconditions: this boundary_point_index should have a mapping
|
Chris@16
|
955 ///
|
Chris@16
|
956 /// \note
|
Chris@16
|
957 ///
|
Chris@16
|
958 /// The returned iterator is invalidated by access to any non-const member functions of this object
|
Chris@16
|
959 ///
|
Chris@16
|
960 iterator end() const
|
Chris@16
|
961 {
|
Chris@16
|
962 return iterator(false,&map_,mask_);
|
Chris@16
|
963 }
|
Chris@16
|
964
|
Chris@16
|
965 ///
|
Chris@16
|
966 /// Find a first valid %boundary point on a position \a p or following it.
|
Chris@16
|
967 ///
|
Chris@16
|
968 /// For example: For \ref word %boundary analysis of the text "to be or"
|
Chris@16
|
969 ///
|
Chris@16
|
970 /// - "|to be", would return %boundary point at "|to be",
|
Chris@16
|
971 /// - "t|o be", would point to "to| be"
|
Chris@16
|
972 ///
|
Chris@16
|
973 /// Preconditions: the boundary_point_index should have a mapping and \a p should be valid iterator
|
Chris@16
|
974 /// to the text in the mapped range.
|
Chris@16
|
975 ///
|
Chris@16
|
976 /// The returned iterator is invalidated by access to any non-const member functions of this object
|
Chris@16
|
977 ///
|
Chris@16
|
978 iterator find(base_iterator p) const
|
Chris@16
|
979 {
|
Chris@16
|
980 return iterator(p,&map_,mask_);
|
Chris@16
|
981 }
|
Chris@16
|
982
|
Chris@16
|
983 ///
|
Chris@16
|
984 /// Get the mask of rules that are used
|
Chris@16
|
985 ///
|
Chris@16
|
986 rule_type rule() const
|
Chris@16
|
987 {
|
Chris@16
|
988 return mask_;
|
Chris@16
|
989 }
|
Chris@16
|
990 ///
|
Chris@16
|
991 /// Set the mask of rules that are used
|
Chris@16
|
992 ///
|
Chris@16
|
993 void rule(rule_type v)
|
Chris@16
|
994 {
|
Chris@16
|
995 mask_ = v;
|
Chris@16
|
996 }
|
Chris@16
|
997
|
Chris@16
|
998 private:
|
Chris@16
|
999
|
Chris@16
|
1000 friend class segment_index<base_iterator>;
|
Chris@16
|
1001 typedef details::mapping<base_iterator> mapping_type;
|
Chris@16
|
1002 mapping_type map_;
|
Chris@16
|
1003 rule_type mask_;
|
Chris@16
|
1004 };
|
Chris@16
|
1005
|
Chris@16
|
1006 /// \cond INTERNAL
|
Chris@16
|
1007 template<typename BaseIterator>
|
Chris@16
|
1008 segment_index<BaseIterator>::segment_index(boundary_point_index<BaseIterator> const &other) :
|
Chris@16
|
1009 map_(other.map_),
|
Chris@16
|
1010 mask_(0xFFFFFFFFu),
|
Chris@16
|
1011 full_select_(false)
|
Chris@16
|
1012 {
|
Chris@16
|
1013 }
|
Chris@16
|
1014
|
Chris@16
|
1015 template<typename BaseIterator>
|
Chris@16
|
1016 boundary_point_index<BaseIterator>::boundary_point_index(segment_index<BaseIterator> const &other) :
|
Chris@16
|
1017 map_(other.map_),
|
Chris@16
|
1018 mask_(0xFFFFFFFFu)
|
Chris@16
|
1019 {
|
Chris@16
|
1020 }
|
Chris@16
|
1021
|
Chris@16
|
1022 template<typename BaseIterator>
|
Chris@16
|
1023 segment_index<BaseIterator> const &segment_index<BaseIterator>::operator=(boundary_point_index<BaseIterator> const &other)
|
Chris@16
|
1024 {
|
Chris@16
|
1025 map_ = other.map_;
|
Chris@16
|
1026 return *this;
|
Chris@16
|
1027 }
|
Chris@16
|
1028
|
Chris@16
|
1029 template<typename BaseIterator>
|
Chris@16
|
1030 boundary_point_index<BaseIterator> const &boundary_point_index<BaseIterator>::operator=(segment_index<BaseIterator> const &other)
|
Chris@16
|
1031 {
|
Chris@16
|
1032 map_ = other.map_;
|
Chris@16
|
1033 return *this;
|
Chris@16
|
1034 }
|
Chris@16
|
1035 /// \endcond
|
Chris@16
|
1036
|
Chris@16
|
1037 typedef segment_index<std::string::const_iterator> ssegment_index; ///< convenience typedef
|
Chris@16
|
1038 typedef segment_index<std::wstring::const_iterator> wssegment_index; ///< convenience typedef
|
Chris@16
|
1039 #ifdef BOOST_HAS_CHAR16_T
|
Chris@16
|
1040 typedef segment_index<std::u16string::const_iterator> u16ssegment_index;///< convenience typedef
|
Chris@16
|
1041 #endif
|
Chris@16
|
1042 #ifdef BOOST_HAS_CHAR32_T
|
Chris@16
|
1043 typedef segment_index<std::u32string::const_iterator> u32ssegment_index;///< convenience typedef
|
Chris@16
|
1044 #endif
|
Chris@16
|
1045
|
Chris@16
|
1046 typedef segment_index<char const *> csegment_index; ///< convenience typedef
|
Chris@16
|
1047 typedef segment_index<wchar_t const *> wcsegment_index; ///< convenience typedef
|
Chris@16
|
1048 #ifdef BOOST_HAS_CHAR16_T
|
Chris@16
|
1049 typedef segment_index<char16_t const *> u16csegment_index; ///< convenience typedef
|
Chris@16
|
1050 #endif
|
Chris@16
|
1051 #ifdef BOOST_HAS_CHAR32_T
|
Chris@16
|
1052 typedef segment_index<char32_t const *> u32csegment_index; ///< convenience typedef
|
Chris@16
|
1053 #endif
|
Chris@16
|
1054
|
Chris@16
|
1055 typedef boundary_point_index<std::string::const_iterator> sboundary_point_index;///< convenience typedef
|
Chris@16
|
1056 typedef boundary_point_index<std::wstring::const_iterator> wsboundary_point_index;///< convenience typedef
|
Chris@16
|
1057 #ifdef BOOST_HAS_CHAR16_T
|
Chris@16
|
1058 typedef boundary_point_index<std::u16string::const_iterator> u16sboundary_point_index;///< convenience typedef
|
Chris@16
|
1059 #endif
|
Chris@16
|
1060 #ifdef BOOST_HAS_CHAR32_T
|
Chris@16
|
1061 typedef boundary_point_index<std::u32string::const_iterator> u32sboundary_point_index;///< convenience typedef
|
Chris@16
|
1062 #endif
|
Chris@16
|
1063
|
Chris@16
|
1064 typedef boundary_point_index<char const *> cboundary_point_index; ///< convenience typedef
|
Chris@16
|
1065 typedef boundary_point_index<wchar_t const *> wcboundary_point_index; ///< convenience typedef
|
Chris@16
|
1066 #ifdef BOOST_HAS_CHAR16_T
|
Chris@16
|
1067 typedef boundary_point_index<char16_t const *> u16cboundary_point_index;///< convenience typedef
|
Chris@16
|
1068 #endif
|
Chris@16
|
1069 #ifdef BOOST_HAS_CHAR32_T
|
Chris@16
|
1070 typedef boundary_point_index<char32_t const *> u32cboundary_point_index;///< convenience typedef
|
Chris@16
|
1071 #endif
|
Chris@16
|
1072
|
Chris@16
|
1073
|
Chris@16
|
1074
|
Chris@16
|
1075 } // boundary
|
Chris@16
|
1076
|
Chris@16
|
1077 } // locale
|
Chris@16
|
1078 } // boost
|
Chris@16
|
1079
|
Chris@16
|
1080 ///
|
Chris@16
|
1081 /// \example boundary.cpp
|
Chris@16
|
1082 /// Example of using segment_index
|
Chris@16
|
1083 /// \example wboundary.cpp
|
Chris@16
|
1084 /// Example of using segment_index over wide strings
|
Chris@16
|
1085 ///
|
Chris@16
|
1086
|
Chris@16
|
1087 #ifdef BOOST_MSVC
|
Chris@16
|
1088 #pragma warning(pop)
|
Chris@16
|
1089 #endif
|
Chris@16
|
1090
|
Chris@16
|
1091 #endif
|
Chris@16
|
1092 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
|