Chris@16
|
1 // (C) Copyright Jeremy Siek 2004
|
Chris@16
|
2 // Distributed under the Boost Software License, Version 1.0. (See
|
Chris@16
|
3 // accompanying file LICENSE_1_0.txt or copy at
|
Chris@16
|
4 // http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
5
|
Chris@16
|
6 #ifndef BOOST_STRINGTOK_HPP
|
Chris@16
|
7 #define BOOST_STRINGTOK_HPP
|
Chris@16
|
8
|
Chris@16
|
9 /*
|
Chris@16
|
10 * stringtok.hpp -- Breaks a string into tokens. This is an example for lib3.
|
Chris@16
|
11 *
|
Chris@16
|
12 * Template function looks like this:
|
Chris@16
|
13 *
|
Chris@16
|
14 * template <typename Container>
|
Chris@16
|
15 * void stringtok (Container &l,
|
Chris@16
|
16 * string const &s,
|
Chris@16
|
17 * char const * const ws = " \t\n");
|
Chris@16
|
18 *
|
Chris@16
|
19 * A nondestructive version of strtok() that handles its own memory and can
|
Chris@16
|
20 * be broken up by any character(s). Does all the work at once rather than
|
Chris@16
|
21 * in an invocation loop like strtok() requires.
|
Chris@16
|
22 *
|
Chris@16
|
23 * Container is any type that supports push_back(a_string), although using
|
Chris@16
|
24 * list<string> and deque<string> are indicated due to their O(1) push_back.
|
Chris@16
|
25 * (I prefer deque<> because op[]/at() is available as well.) The first
|
Chris@16
|
26 * parameter references an existing Container.
|
Chris@16
|
27 *
|
Chris@16
|
28 * s is the string to be tokenized. From the parameter declaration, it can
|
Chris@16
|
29 * be seen that s is not affected. Since references-to-const may refer to
|
Chris@16
|
30 * temporaries, you could use stringtok(some_container, readline("")) when
|
Chris@16
|
31 * using the GNU readline library.
|
Chris@16
|
32 *
|
Chris@16
|
33 * The final parameter is an array of characters that serve as whitespace.
|
Chris@16
|
34 * Whitespace characters default to one or more of tab, space, and newline,
|
Chris@16
|
35 * in any combination.
|
Chris@16
|
36 *
|
Chris@16
|
37 * 'l' need not be empty on entry. On return, 'l' will have the token
|
Chris@16
|
38 * strings appended.
|
Chris@16
|
39 *
|
Chris@16
|
40 *
|
Chris@16
|
41 * [Example:
|
Chris@16
|
42 * list<string> ls;
|
Chris@16
|
43 * stringtok (ls, " this \t is\t\n a test ");
|
Chris@16
|
44 * for (list<string>::const_iterator i = ls.begin();
|
Chris@16
|
45 * i != ls.end(); ++i)
|
Chris@16
|
46 * {
|
Chris@16
|
47 * cerr << ':' << (*i) << ":\n";
|
Chris@16
|
48 * }
|
Chris@16
|
49 *
|
Chris@16
|
50 * would print
|
Chris@16
|
51 * :this:
|
Chris@16
|
52 * :is:
|
Chris@16
|
53 * :a:
|
Chris@16
|
54 * :test:
|
Chris@16
|
55 * -end example]
|
Chris@16
|
56 *
|
Chris@16
|
57 * pedwards@jaj.com May 1999
|
Chris@16
|
58 */
|
Chris@16
|
59
|
Chris@16
|
60
|
Chris@16
|
61 #include <string>
|
Chris@16
|
62 #include <cstring> // for strchr
|
Chris@16
|
63
|
Chris@16
|
64
|
Chris@16
|
65 /*****************************************************************
|
Chris@16
|
66 * This is the only part of the implementation that I don't like.
|
Chris@16
|
67 * It can probably be improved upon by the reader...
|
Chris@16
|
68 */
|
Chris@16
|
69
|
Chris@16
|
70 inline bool
|
Chris@16
|
71 isws (char c, char const * const wstr)
|
Chris@16
|
72 {
|
Chris@16
|
73 using namespace std;
|
Chris@16
|
74 return (strchr(wstr,c) != NULL);
|
Chris@16
|
75 }
|
Chris@16
|
76
|
Chris@16
|
77
|
Chris@16
|
78 namespace boost {
|
Chris@16
|
79
|
Chris@16
|
80 /*****************************************************************
|
Chris@16
|
81 * Simplistic and quite Standard, but a bit slow. This should be
|
Chris@16
|
82 * templatized on basic_string instead, or on a more generic StringT
|
Chris@16
|
83 * that just happens to support ::size_type, .substr(), and so on.
|
Chris@16
|
84 * I had hoped that "whitespace" would be a trait, but it isn't, so
|
Chris@16
|
85 * the user must supply it. Enh, this lets them break up strings on
|
Chris@16
|
86 * different things easier than traits would anyhow.
|
Chris@16
|
87 */
|
Chris@16
|
88 template <typename Container>
|
Chris@16
|
89 void
|
Chris@16
|
90 stringtok (Container &l, std::string const &s, char const * const ws = " \t\n")
|
Chris@16
|
91 {
|
Chris@16
|
92 typedef std::string::size_type size_type;
|
Chris@16
|
93 const size_type S = s.size();
|
Chris@16
|
94 size_type i = 0;
|
Chris@16
|
95
|
Chris@16
|
96 while (i < S) {
|
Chris@16
|
97 // eat leading whitespace
|
Chris@16
|
98 while ((i < S) && (isws(s[i],ws))) ++i;
|
Chris@16
|
99 if (i == S) return; // nothing left but WS
|
Chris@16
|
100
|
Chris@16
|
101 // find end of word
|
Chris@16
|
102 size_type j = i+1;
|
Chris@16
|
103 while ((j < S) && (!isws(s[j],ws))) ++j;
|
Chris@16
|
104
|
Chris@16
|
105 // add word
|
Chris@16
|
106 l.push_back(s.substr(i,j-i));
|
Chris@16
|
107
|
Chris@16
|
108 // set up for next loop
|
Chris@16
|
109 i = j+1;
|
Chris@16
|
110 }
|
Chris@16
|
111 }
|
Chris@16
|
112
|
Chris@16
|
113
|
Chris@16
|
114 } // namespace boost
|
Chris@16
|
115
|
Chris@16
|
116 #endif // BOOST_STRINGTOK_HPP
|