annotate DEPENDENCIES/generic/include/boost/token_functions.hpp @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)
author Chris Cannam
date Tue, 30 Jul 2019 12:25:44 +0100
parents c530137014c0
children
rev   line source
Chris@16 1 // Boost token_functions.hpp ------------------------------------------------//
Chris@16 2
Chris@16 3 // Copyright John R. Bandela 2001.
Chris@16 4
Chris@16 5 // Distributed under the Boost Software License, Version 1.0. (See
Chris@16 6 // accompanying file LICENSE_1_0.txt or copy at
Chris@16 7 // http://www.boost.org/LICENSE_1_0.txt)
Chris@16 8
Chris@16 9 // See http://www.boost.org/libs/tokenizer/ for documentation.
Chris@16 10
Chris@16 11 // Revision History:
Chris@16 12 // 01 Oct 2004 Joaquin M Lopez Munoz
Chris@16 13 // Workaround for a problem with string::assign in msvc-stlport
Chris@16 14 // 06 Apr 2004 John Bandela
Chris@16 15 // Fixed a bug involving using char_delimiter with a true input iterator
Chris@16 16 // 28 Nov 2003 Robert Zeh and John Bandela
Chris@16 17 // Converted into "fast" functions that avoid using += when
Chris@16 18 // the supplied iterator isn't an input_iterator; based on
Chris@16 19 // some work done at Archelon and a version that was checked into
Chris@16 20 // the boost CVS for a short period of time.
Chris@16 21 // 20 Feb 2002 John Maddock
Chris@16 22 // Removed using namespace std declarations and added
Chris@16 23 // workaround for BOOST_NO_STDC_NAMESPACE (the library
Chris@16 24 // can be safely mixed with regex).
Chris@16 25 // 06 Feb 2002 Jeremy Siek
Chris@16 26 // Added char_separator.
Chris@16 27 // 02 Feb 2002 Jeremy Siek
Chris@16 28 // Removed tabs and a little cleanup.
Chris@16 29
Chris@16 30
Chris@16 31 #ifndef BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_
Chris@16 32 #define BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_
Chris@16 33
Chris@16 34 #include <vector>
Chris@16 35 #include <stdexcept>
Chris@16 36 #include <string>
Chris@16 37 #include <cctype>
Chris@16 38 #include <algorithm> // for find_if
Chris@16 39 #include <boost/config.hpp>
Chris@16 40 #include <boost/assert.hpp>
Chris@16 41 #include <boost/detail/workaround.hpp>
Chris@16 42 #include <boost/mpl/if.hpp>
Chris@101 43 #include <boost/throw_exception.hpp>
Chris@16 44 #if !defined(BOOST_NO_CWCTYPE)
Chris@16 45 #include <cwctype>
Chris@16 46 #endif
Chris@16 47
Chris@16 48 //
Chris@16 49 // the following must not be macros if we are to prefix them
Chris@16 50 // with std:: (they shouldn't be macros anyway...)
Chris@16 51 //
Chris@16 52 #ifdef ispunct
Chris@16 53 # undef ispunct
Chris@16 54 #endif
Chris@16 55 #ifdef iswpunct
Chris@16 56 # undef iswpunct
Chris@16 57 #endif
Chris@16 58 #ifdef isspace
Chris@16 59 # undef isspace
Chris@16 60 #endif
Chris@16 61 #ifdef iswspace
Chris@16 62 # undef iswspace
Chris@16 63 #endif
Chris@16 64 //
Chris@16 65 // fix namespace problems:
Chris@16 66 //
Chris@16 67 #ifdef BOOST_NO_STDC_NAMESPACE
Chris@16 68 namespace std{
Chris@16 69 using ::ispunct;
Chris@16 70 using ::isspace;
Chris@16 71 #if !defined(BOOST_NO_CWCTYPE)
Chris@16 72 using ::iswpunct;
Chris@16 73 using ::iswspace;
Chris@16 74 #endif
Chris@16 75 }
Chris@16 76 #endif
Chris@16 77
Chris@16 78 namespace boost{
Chris@16 79 //===========================================================================
Chris@16 80 // The escaped_list_separator class. Which is a model of TokenizerFunction
Chris@16 81 // An escaped list is a super-set of what is commonly known as a comma
Chris@16 82 // separated value (csv) list.It is separated into fields by a comma or
Chris@16 83 // other character. If the delimiting character is inside quotes, then it is
Chris@16 84 // counted as a regular character.To allow for embedded quotes in a field,
Chris@16 85 // there can be escape sequences using the \ much like C.
Chris@16 86 // The role of the comma, the quotation mark, and the escape
Chris@16 87 // character (backslash \), can be assigned to other characters.
Chris@16 88
Chris@16 89 struct escaped_list_error : public std::runtime_error{
Chris@16 90 escaped_list_error(const std::string& what_arg):std::runtime_error(what_arg) { }
Chris@16 91 };
Chris@16 92
Chris@16 93
Chris@16 94 // The out of the box GCC 2.95 on cygwin does not have a char_traits class.
Chris@16 95 // MSVC does not like the following typename
Chris@16 96 template <class Char,
Chris@16 97 class Traits = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type >
Chris@16 98 class escaped_list_separator {
Chris@16 99
Chris@16 100 private:
Chris@16 101 typedef std::basic_string<Char,Traits> string_type;
Chris@16 102 struct char_eq {
Chris@16 103 Char e_;
Chris@16 104 char_eq(Char e):e_(e) { }
Chris@16 105 bool operator()(Char c) {
Chris@16 106 return Traits::eq(e_,c);
Chris@16 107 }
Chris@16 108 };
Chris@16 109 string_type escape_;
Chris@16 110 string_type c_;
Chris@16 111 string_type quote_;
Chris@16 112 bool last_;
Chris@16 113
Chris@16 114 bool is_escape(Char e) {
Chris@16 115 char_eq f(e);
Chris@16 116 return std::find_if(escape_.begin(),escape_.end(),f)!=escape_.end();
Chris@16 117 }
Chris@16 118 bool is_c(Char e) {
Chris@16 119 char_eq f(e);
Chris@16 120 return std::find_if(c_.begin(),c_.end(),f)!=c_.end();
Chris@16 121 }
Chris@16 122 bool is_quote(Char e) {
Chris@16 123 char_eq f(e);
Chris@16 124 return std::find_if(quote_.begin(),quote_.end(),f)!=quote_.end();
Chris@16 125 }
Chris@16 126 template <typename iterator, typename Token>
Chris@16 127 void do_escape(iterator& next,iterator end,Token& tok) {
Chris@16 128 if (++next == end)
Chris@101 129 BOOST_THROW_EXCEPTION(escaped_list_error(std::string("cannot end with escape")));
Chris@16 130 if (Traits::eq(*next,'n')) {
Chris@16 131 tok+='\n';
Chris@16 132 return;
Chris@16 133 }
Chris@16 134 else if (is_quote(*next)) {
Chris@16 135 tok+=*next;
Chris@16 136 return;
Chris@16 137 }
Chris@16 138 else if (is_c(*next)) {
Chris@16 139 tok+=*next;
Chris@16 140 return;
Chris@16 141 }
Chris@16 142 else if (is_escape(*next)) {
Chris@16 143 tok+=*next;
Chris@16 144 return;
Chris@16 145 }
Chris@16 146 else
Chris@101 147 BOOST_THROW_EXCEPTION(escaped_list_error(std::string("unknown escape sequence")));
Chris@16 148 }
Chris@16 149
Chris@16 150 public:
Chris@16 151
Chris@16 152 explicit escaped_list_separator(Char e = '\\',
Chris@16 153 Char c = ',',Char q = '\"')
Chris@16 154 : escape_(1,e), c_(1,c), quote_(1,q), last_(false) { }
Chris@16 155
Chris@16 156 escaped_list_separator(string_type e, string_type c, string_type q)
Chris@16 157 : escape_(e), c_(c), quote_(q), last_(false) { }
Chris@16 158
Chris@16 159 void reset() {last_=false;}
Chris@16 160
Chris@16 161 template <typename InputIterator, typename Token>
Chris@16 162 bool operator()(InputIterator& next,InputIterator end,Token& tok) {
Chris@16 163 bool bInQuote = false;
Chris@16 164 tok = Token();
Chris@16 165
Chris@16 166 if (next == end) {
Chris@16 167 if (last_) {
Chris@16 168 last_ = false;
Chris@16 169 return true;
Chris@16 170 }
Chris@16 171 else
Chris@16 172 return false;
Chris@16 173 }
Chris@16 174 last_ = false;
Chris@16 175 for (;next != end;++next) {
Chris@16 176 if (is_escape(*next)) {
Chris@16 177 do_escape(next,end,tok);
Chris@16 178 }
Chris@16 179 else if (is_c(*next)) {
Chris@16 180 if (!bInQuote) {
Chris@16 181 // If we are not in quote, then we are done
Chris@16 182 ++next;
Chris@16 183 // The last character was a c, that means there is
Chris@16 184 // 1 more blank field
Chris@16 185 last_ = true;
Chris@16 186 return true;
Chris@16 187 }
Chris@16 188 else tok+=*next;
Chris@16 189 }
Chris@16 190 else if (is_quote(*next)) {
Chris@16 191 bInQuote=!bInQuote;
Chris@16 192 }
Chris@16 193 else {
Chris@16 194 tok += *next;
Chris@16 195 }
Chris@16 196 }
Chris@16 197 return true;
Chris@16 198 }
Chris@16 199 };
Chris@16 200
Chris@16 201 //===========================================================================
Chris@16 202 // The classes here are used by offset_separator and char_separator to implement
Chris@16 203 // faster assigning of tokens using assign instead of +=
Chris@16 204
Chris@16 205 namespace tokenizer_detail {
Chris@16 206 //===========================================================================
Chris@16 207 // Tokenizer was broken for wide character separators, at least on Windows, since
Chris@16 208 // CRT functions isspace etc only expect values in [0, 0xFF]. Debug build asserts
Chris@16 209 // if higher values are passed in. The traits extension class should take care of this.
Chris@16 210 // Assuming that the conditional will always get optimized out in the function
Chris@16 211 // implementations, argument types are not a problem since both forms of character classifiers
Chris@16 212 // expect an int.
Chris@16 213
Chris@16 214 #if !defined(BOOST_NO_CWCTYPE)
Chris@16 215 template<typename traits, int N>
Chris@16 216 struct traits_extension_details : public traits {
Chris@16 217 typedef typename traits::char_type char_type;
Chris@16 218 static bool isspace(char_type c)
Chris@16 219 {
Chris@16 220 return std::iswspace(c) != 0;
Chris@16 221 }
Chris@16 222 static bool ispunct(char_type c)
Chris@16 223 {
Chris@16 224 return std::iswpunct(c) != 0;
Chris@16 225 }
Chris@16 226 };
Chris@16 227
Chris@16 228 template<typename traits>
Chris@16 229 struct traits_extension_details<traits, 1> : public traits {
Chris@16 230 typedef typename traits::char_type char_type;
Chris@16 231 static bool isspace(char_type c)
Chris@16 232 {
Chris@16 233 return std::isspace(c) != 0;
Chris@16 234 }
Chris@16 235 static bool ispunct(char_type c)
Chris@16 236 {
Chris@16 237 return std::ispunct(c) != 0;
Chris@16 238 }
Chris@16 239 };
Chris@16 240 #endif
Chris@16 241
Chris@16 242
Chris@16 243 // In case there is no cwctype header, we implement the checks manually.
Chris@16 244 // We make use of the fact that the tested categories should fit in ASCII.
Chris@16 245 template<typename traits>
Chris@16 246 struct traits_extension : public traits {
Chris@16 247 typedef typename traits::char_type char_type;
Chris@16 248 static bool isspace(char_type c)
Chris@16 249 {
Chris@16 250 #if !defined(BOOST_NO_CWCTYPE)
Chris@16 251 return traits_extension_details<traits, sizeof(char_type)>::isspace(c);
Chris@16 252 #else
Chris@16 253 return static_cast< unsigned >(c) <= 255 && std::isspace(c) != 0;
Chris@16 254 #endif
Chris@16 255 }
Chris@16 256
Chris@16 257 static bool ispunct(char_type c)
Chris@16 258 {
Chris@16 259 #if !defined(BOOST_NO_CWCTYPE)
Chris@16 260 return traits_extension_details<traits, sizeof(char_type)>::ispunct(c);
Chris@16 261 #else
Chris@16 262 return static_cast< unsigned >(c) <= 255 && std::ispunct(c) != 0;
Chris@16 263 #endif
Chris@16 264 }
Chris@16 265 };
Chris@16 266
Chris@16 267 // The assign_or_plus_equal struct contains functions that implement
Chris@16 268 // assign, +=, and clearing based on the iterator type. The
Chris@16 269 // generic case does nothing for plus_equal and clearing, while
Chris@16 270 // passing through the call for assign.
Chris@16 271 //
Chris@16 272 // When an input iterator is being used, the situation is reversed.
Chris@16 273 // The assign method does nothing, plus_equal invokes operator +=,
Chris@16 274 // and the clearing method sets the supplied token to the default
Chris@16 275 // token constructor's result.
Chris@16 276 //
Chris@16 277
Chris@16 278 template<class IteratorTag>
Chris@16 279 struct assign_or_plus_equal {
Chris@16 280 template<class Iterator, class Token>
Chris@16 281 static void assign(Iterator b, Iterator e, Token &t) {
Chris@16 282 t.assign(b, e);
Chris@16 283 }
Chris@16 284
Chris@16 285 template<class Token, class Value>
Chris@16 286 static void plus_equal(Token &, const Value &) { }
Chris@16 287
Chris@16 288 // If we are doing an assign, there is no need for the
Chris@16 289 // the clear.
Chris@16 290 //
Chris@16 291 template<class Token>
Chris@16 292 static void clear(Token &) { }
Chris@16 293 };
Chris@16 294
Chris@16 295 template <>
Chris@16 296 struct assign_or_plus_equal<std::input_iterator_tag> {
Chris@16 297 template<class Iterator, class Token>
Chris@16 298 static void assign(Iterator , Iterator , Token &) { }
Chris@16 299 template<class Token, class Value>
Chris@16 300 static void plus_equal(Token &t, const Value &v) {
Chris@16 301 t += v;
Chris@16 302 }
Chris@16 303 template<class Token>
Chris@16 304 static void clear(Token &t) {
Chris@16 305 t = Token();
Chris@16 306 }
Chris@16 307 };
Chris@16 308
Chris@16 309
Chris@16 310 template<class Iterator>
Chris@16 311 struct pointer_iterator_category{
Chris@16 312 typedef std::random_access_iterator_tag type;
Chris@16 313 };
Chris@16 314
Chris@16 315
Chris@16 316 template<class Iterator>
Chris@16 317 struct class_iterator_category{
Chris@16 318 typedef typename Iterator::iterator_category type;
Chris@16 319 };
Chris@16 320
Chris@16 321
Chris@16 322
Chris@16 323 // This portably gets the iterator_tag without partial template specialization
Chris@16 324 template<class Iterator>
Chris@16 325 struct get_iterator_category{
Chris@16 326 typedef typename mpl::if_<is_pointer<Iterator>,
Chris@16 327 pointer_iterator_category<Iterator>,
Chris@16 328 class_iterator_category<Iterator>
Chris@16 329 >::type cat;
Chris@16 330
Chris@16 331 typedef typename cat::type iterator_category;
Chris@16 332 };
Chris@16 333
Chris@16 334
Chris@16 335 } // namespace tokenizer_detail
Chris@16 336
Chris@16 337
Chris@16 338 //===========================================================================
Chris@16 339 // The offset_separator class, which is a model of TokenizerFunction.
Chris@16 340 // Offset breaks a string into tokens based on a range of offsets
Chris@16 341
Chris@16 342 class offset_separator {
Chris@16 343 private:
Chris@16 344
Chris@16 345 std::vector<int> offsets_;
Chris@16 346 unsigned int current_offset_;
Chris@16 347 bool wrap_offsets_;
Chris@16 348 bool return_partial_last_;
Chris@16 349
Chris@16 350 public:
Chris@16 351 template <typename Iter>
Chris@16 352 offset_separator(Iter begin, Iter end, bool wrap_offsets = true,
Chris@16 353 bool return_partial_last = true)
Chris@16 354 : offsets_(begin,end), current_offset_(0),
Chris@16 355 wrap_offsets_(wrap_offsets),
Chris@16 356 return_partial_last_(return_partial_last) { }
Chris@16 357
Chris@16 358 offset_separator()
Chris@16 359 : offsets_(1,1), current_offset_(),
Chris@16 360 wrap_offsets_(true), return_partial_last_(true) { }
Chris@16 361
Chris@16 362 void reset() {
Chris@16 363 current_offset_ = 0;
Chris@16 364 }
Chris@16 365
Chris@16 366 template <typename InputIterator, typename Token>
Chris@16 367 bool operator()(InputIterator& next, InputIterator end, Token& tok)
Chris@16 368 {
Chris@16 369 typedef tokenizer_detail::assign_or_plus_equal<
Chris@16 370 BOOST_DEDUCED_TYPENAME tokenizer_detail::get_iterator_category<
Chris@16 371 InputIterator
Chris@16 372 >::iterator_category
Chris@16 373 > assigner;
Chris@16 374
Chris@16 375 BOOST_ASSERT(!offsets_.empty());
Chris@16 376
Chris@16 377 assigner::clear(tok);
Chris@16 378 InputIterator start(next);
Chris@16 379
Chris@16 380 if (next == end)
Chris@16 381 return false;
Chris@16 382
Chris@16 383 if (current_offset_ == offsets_.size())
Chris@16 384 {
Chris@16 385 if (wrap_offsets_)
Chris@16 386 current_offset_=0;
Chris@16 387 else
Chris@16 388 return false;
Chris@16 389 }
Chris@16 390
Chris@16 391 int c = offsets_[current_offset_];
Chris@16 392 int i = 0;
Chris@16 393 for (; i < c; ++i) {
Chris@16 394 if (next == end)break;
Chris@16 395 assigner::plus_equal(tok,*next++);
Chris@16 396 }
Chris@16 397 assigner::assign(start,next,tok);
Chris@16 398
Chris@16 399 if (!return_partial_last_)
Chris@16 400 if (i < (c-1) )
Chris@16 401 return false;
Chris@16 402
Chris@16 403 ++current_offset_;
Chris@16 404 return true;
Chris@16 405 }
Chris@16 406 };
Chris@16 407
Chris@16 408
Chris@16 409 //===========================================================================
Chris@16 410 // The char_separator class breaks a sequence of characters into
Chris@16 411 // tokens based on the character delimiters (very much like bad old
Chris@16 412 // strtok). A delimiter character can either be kept or dropped. A
Chris@16 413 // kept delimiter shows up as an output token, whereas a dropped
Chris@16 414 // delimiter does not.
Chris@16 415
Chris@16 416 // This class replaces the char_delimiters_separator class. The
Chris@16 417 // constructor for the char_delimiters_separator class was too
Chris@16 418 // confusing and needed to be deprecated. However, because of the
Chris@16 419 // default arguments to the constructor, adding the new constructor
Chris@16 420 // would cause ambiguity, so instead I deprecated the whole class.
Chris@16 421 // The implementation of the class was also simplified considerably.
Chris@16 422
Chris@16 423 enum empty_token_policy { drop_empty_tokens, keep_empty_tokens };
Chris@16 424
Chris@16 425 // The out of the box GCC 2.95 on cygwin does not have a char_traits class.
Chris@16 426 template <typename Char,
Chris@16 427 typename Tr = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type >
Chris@16 428 class char_separator
Chris@16 429 {
Chris@16 430 typedef tokenizer_detail::traits_extension<Tr> Traits;
Chris@16 431 typedef std::basic_string<Char,Tr> string_type;
Chris@16 432 public:
Chris@16 433 explicit
Chris@16 434 char_separator(const Char* dropped_delims,
Chris@16 435 const Char* kept_delims = 0,
Chris@16 436 empty_token_policy empty_tokens = drop_empty_tokens)
Chris@16 437 : m_dropped_delims(dropped_delims),
Chris@16 438 m_use_ispunct(false),
Chris@16 439 m_use_isspace(false),
Chris@16 440 m_empty_tokens(empty_tokens),
Chris@16 441 m_output_done(false)
Chris@16 442 {
Chris@16 443 // Borland workaround
Chris@16 444 if (kept_delims)
Chris@16 445 m_kept_delims = kept_delims;
Chris@16 446 }
Chris@16 447
Chris@16 448 // use ispunct() for kept delimiters and isspace for dropped.
Chris@16 449 explicit
Chris@16 450 char_separator()
Chris@16 451 : m_use_ispunct(true),
Chris@16 452 m_use_isspace(true),
Chris@16 453 m_empty_tokens(drop_empty_tokens) { }
Chris@16 454
Chris@16 455 void reset() { }
Chris@16 456
Chris@16 457 template <typename InputIterator, typename Token>
Chris@16 458 bool operator()(InputIterator& next, InputIterator end, Token& tok)
Chris@16 459 {
Chris@16 460 typedef tokenizer_detail::assign_or_plus_equal<
Chris@16 461 BOOST_DEDUCED_TYPENAME tokenizer_detail::get_iterator_category<
Chris@16 462 InputIterator
Chris@16 463 >::iterator_category
Chris@16 464 > assigner;
Chris@16 465
Chris@16 466 assigner::clear(tok);
Chris@16 467
Chris@16 468 // skip past all dropped_delims
Chris@16 469 if (m_empty_tokens == drop_empty_tokens)
Chris@16 470 for (; next != end && is_dropped(*next); ++next)
Chris@16 471 { }
Chris@16 472
Chris@16 473 InputIterator start(next);
Chris@16 474
Chris@16 475 if (m_empty_tokens == drop_empty_tokens) {
Chris@16 476
Chris@16 477 if (next == end)
Chris@16 478 return false;
Chris@16 479
Chris@16 480
Chris@16 481 // if we are on a kept_delims move past it and stop
Chris@16 482 if (is_kept(*next)) {
Chris@16 483 assigner::plus_equal(tok,*next);
Chris@16 484 ++next;
Chris@16 485 } else
Chris@16 486 // append all the non delim characters
Chris@16 487 for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next)
Chris@16 488 assigner::plus_equal(tok,*next);
Chris@16 489 }
Chris@16 490 else { // m_empty_tokens == keep_empty_tokens
Chris@16 491
Chris@16 492 // Handle empty token at the end
Chris@16 493 if (next == end)
Chris@16 494 {
Chris@16 495 if (m_output_done == false)
Chris@16 496 {
Chris@16 497 m_output_done = true;
Chris@16 498 assigner::assign(start,next,tok);
Chris@16 499 return true;
Chris@16 500 }
Chris@16 501 else
Chris@16 502 return false;
Chris@16 503 }
Chris@16 504
Chris@16 505 if (is_kept(*next)) {
Chris@16 506 if (m_output_done == false)
Chris@16 507 m_output_done = true;
Chris@16 508 else {
Chris@16 509 assigner::plus_equal(tok,*next);
Chris@16 510 ++next;
Chris@16 511 m_output_done = false;
Chris@16 512 }
Chris@16 513 }
Chris@16 514 else if (m_output_done == false && is_dropped(*next)) {
Chris@16 515 m_output_done = true;
Chris@16 516 }
Chris@16 517 else {
Chris@16 518 if (is_dropped(*next))
Chris@16 519 start=++next;
Chris@16 520 for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next)
Chris@16 521 assigner::plus_equal(tok,*next);
Chris@16 522 m_output_done = true;
Chris@16 523 }
Chris@16 524 }
Chris@16 525 assigner::assign(start,next,tok);
Chris@16 526 return true;
Chris@16 527 }
Chris@16 528
Chris@16 529 private:
Chris@16 530 string_type m_kept_delims;
Chris@16 531 string_type m_dropped_delims;
Chris@16 532 bool m_use_ispunct;
Chris@16 533 bool m_use_isspace;
Chris@16 534 empty_token_policy m_empty_tokens;
Chris@16 535 bool m_output_done;
Chris@16 536
Chris@16 537 bool is_kept(Char E) const
Chris@16 538 {
Chris@16 539 if (m_kept_delims.length())
Chris@16 540 return m_kept_delims.find(E) != string_type::npos;
Chris@16 541 else if (m_use_ispunct) {
Chris@16 542 return Traits::ispunct(E) != 0;
Chris@16 543 } else
Chris@16 544 return false;
Chris@16 545 }
Chris@16 546 bool is_dropped(Char E) const
Chris@16 547 {
Chris@16 548 if (m_dropped_delims.length())
Chris@16 549 return m_dropped_delims.find(E) != string_type::npos;
Chris@16 550 else if (m_use_isspace) {
Chris@16 551 return Traits::isspace(E) != 0;
Chris@16 552 } else
Chris@16 553 return false;
Chris@16 554 }
Chris@16 555 };
Chris@16 556
Chris@16 557 //===========================================================================
Chris@16 558 // The following class is DEPRECATED, use class char_separators instead.
Chris@16 559 //
Chris@16 560 // The char_delimiters_separator class, which is a model of
Chris@16 561 // TokenizerFunction. char_delimiters_separator breaks a string
Chris@16 562 // into tokens based on character delimiters. There are 2 types of
Chris@16 563 // delimiters. returnable delimiters can be returned as
Chris@16 564 // tokens. These are often punctuation. nonreturnable delimiters
Chris@16 565 // cannot be returned as tokens. These are often whitespace
Chris@16 566
Chris@16 567 // The out of the box GCC 2.95 on cygwin does not have a char_traits class.
Chris@16 568 template <class Char,
Chris@16 569 class Tr = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type >
Chris@16 570 class char_delimiters_separator {
Chris@16 571 private:
Chris@16 572
Chris@16 573 typedef tokenizer_detail::traits_extension<Tr> Traits;
Chris@16 574 typedef std::basic_string<Char,Tr> string_type;
Chris@16 575 string_type returnable_;
Chris@16 576 string_type nonreturnable_;
Chris@16 577 bool return_delims_;
Chris@16 578 bool no_ispunct_;
Chris@16 579 bool no_isspace_;
Chris@16 580
Chris@16 581 bool is_ret(Char E)const
Chris@16 582 {
Chris@16 583 if (returnable_.length())
Chris@16 584 return returnable_.find(E) != string_type::npos;
Chris@16 585 else{
Chris@16 586 if (no_ispunct_) {return false;}
Chris@16 587 else{
Chris@16 588 int r = Traits::ispunct(E);
Chris@16 589 return r != 0;
Chris@16 590 }
Chris@16 591 }
Chris@16 592 }
Chris@16 593 bool is_nonret(Char E)const
Chris@16 594 {
Chris@16 595 if (nonreturnable_.length())
Chris@16 596 return nonreturnable_.find(E) != string_type::npos;
Chris@16 597 else{
Chris@16 598 if (no_isspace_) {return false;}
Chris@16 599 else{
Chris@16 600 int r = Traits::isspace(E);
Chris@16 601 return r != 0;
Chris@16 602 }
Chris@16 603 }
Chris@16 604 }
Chris@16 605
Chris@16 606 public:
Chris@16 607 explicit char_delimiters_separator(bool return_delims = false,
Chris@16 608 const Char* returnable = 0,
Chris@16 609 const Char* nonreturnable = 0)
Chris@16 610 : returnable_(returnable ? returnable : string_type().c_str()),
Chris@16 611 nonreturnable_(nonreturnable ? nonreturnable:string_type().c_str()),
Chris@16 612 return_delims_(return_delims), no_ispunct_(returnable!=0),
Chris@16 613 no_isspace_(nonreturnable!=0) { }
Chris@16 614
Chris@16 615 void reset() { }
Chris@16 616
Chris@16 617 public:
Chris@16 618
Chris@16 619 template <typename InputIterator, typename Token>
Chris@16 620 bool operator()(InputIterator& next, InputIterator end,Token& tok) {
Chris@16 621 tok = Token();
Chris@16 622
Chris@16 623 // skip past all nonreturnable delims
Chris@16 624 // skip past the returnable only if we are not returning delims
Chris@16 625 for (;next!=end && ( is_nonret(*next) || (is_ret(*next)
Chris@16 626 && !return_delims_ ) );++next) { }
Chris@16 627
Chris@16 628 if (next == end) {
Chris@16 629 return false;
Chris@16 630 }
Chris@16 631
Chris@16 632 // if we are to return delims and we are one a returnable one
Chris@16 633 // move past it and stop
Chris@16 634 if (is_ret(*next) && return_delims_) {
Chris@16 635 tok+=*next;
Chris@16 636 ++next;
Chris@16 637 }
Chris@16 638 else
Chris@16 639 // append all the non delim characters
Chris@16 640 for (;next!=end && !is_nonret(*next) && !is_ret(*next);++next)
Chris@16 641 tok+=*next;
Chris@16 642
Chris@16 643
Chris@16 644 return true;
Chris@16 645 }
Chris@16 646 };
Chris@16 647
Chris@16 648
Chris@16 649 } //namespace boost
Chris@16 650
Chris@16 651 #endif