Chris@16
|
1 //
|
Chris@16
|
2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
|
Chris@16
|
3 //
|
Chris@16
|
4 // Distributed under the Boost Software License, Version 1.0. (See
|
Chris@16
|
5 // accompanying file LICENSE_1_0.txt or copy at
|
Chris@16
|
6 // http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
7 //
|
Chris@16
|
8 #ifndef BOOST_LOCALE_COLLATOR_HPP_INCLUDED
|
Chris@16
|
9 #define BOOST_LOCALE_COLLATOR_HPP_INCLUDED
|
Chris@16
|
10
|
Chris@16
|
11 #include <boost/locale/config.hpp>
|
Chris@16
|
12 #ifdef BOOST_MSVC
|
Chris@16
|
13 # pragma warning(push)
|
Chris@16
|
14 # pragma warning(disable : 4275 4251 4231 4660)
|
Chris@16
|
15 #endif
|
Chris@16
|
16 #include <locale>
|
Chris@16
|
17
|
Chris@16
|
18
|
Chris@16
|
19 namespace boost {
|
Chris@16
|
20 namespace locale {
|
Chris@16
|
21
|
Chris@16
|
22 class info;
|
Chris@16
|
23
|
Chris@16
|
24 ///
|
Chris@16
|
25 /// \defgroup collation Collation
|
Chris@16
|
26 ///
|
Chris@16
|
27 /// This module introduces collation related classes
|
Chris@16
|
28 ///
|
Chris@16
|
29 /// @{
|
Chris@16
|
30
|
Chris@16
|
31 ///
|
Chris@16
|
32 /// \brief a base class that includes collation level flags
|
Chris@16
|
33 ///
|
Chris@16
|
34
|
Chris@16
|
35 class collator_base {
|
Chris@16
|
36 public:
|
Chris@16
|
37 ///
|
Chris@16
|
38 /// Unicode collation level types
|
Chris@16
|
39 ///
|
Chris@16
|
40 typedef enum {
|
Chris@16
|
41 primary = 0, ///< 1st collation level: base letters
|
Chris@16
|
42 secondary = 1, ///< 2nd collation level: letters and accents
|
Chris@16
|
43 tertiary = 2, ///< 3rd collation level: letters, accents and case
|
Chris@16
|
44 quaternary = 3, ///< 4th collation level: letters, accents, case and punctuation
|
Chris@16
|
45 identical = 4 ///< identical collation level: include code-point comparison
|
Chris@16
|
46 } level_type;
|
Chris@16
|
47 };
|
Chris@16
|
48
|
Chris@16
|
49 ///
|
Chris@16
|
50 /// \brief Collation facet.
|
Chris@16
|
51 ///
|
Chris@16
|
52 /// It reimplements standard C++ std::collate,
|
Chris@16
|
53 /// allowing usage of std::locale for direct string comparison
|
Chris@16
|
54 ///
|
Chris@16
|
55 template<typename CharType>
|
Chris@16
|
56 class collator :
|
Chris@16
|
57 public std::collate<CharType>,
|
Chris@16
|
58 public collator_base
|
Chris@16
|
59 {
|
Chris@16
|
60 public:
|
Chris@16
|
61 ///
|
Chris@16
|
62 /// Type of the underlying character
|
Chris@16
|
63 ///
|
Chris@16
|
64 typedef CharType char_type;
|
Chris@16
|
65 ///
|
Chris@16
|
66 /// Type of string used with this facet
|
Chris@16
|
67 ///
|
Chris@16
|
68 typedef std::basic_string<CharType> string_type;
|
Chris@16
|
69
|
Chris@16
|
70
|
Chris@16
|
71 ///
|
Chris@16
|
72 /// Compare two strings in rage [b1,e1), [b2,e2) according using a collation level \a level. Calls do_compare
|
Chris@16
|
73 ///
|
Chris@16
|
74 /// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if
|
Chris@16
|
75 /// they considered equal.
|
Chris@16
|
76 ///
|
Chris@16
|
77 int compare(level_type level,
|
Chris@16
|
78 char_type const *b1,char_type const *e1,
|
Chris@16
|
79 char_type const *b2,char_type const *e2) const
|
Chris@16
|
80 {
|
Chris@16
|
81 return do_compare(level,b1,e1,b2,e2);
|
Chris@16
|
82 }
|
Chris@16
|
83 ///
|
Chris@16
|
84 /// Create a binary string that can be compared to other in order to get collation order. The string is created
|
Chris@16
|
85 /// for text in range [b,e). It is useful for collation of multiple strings for text.
|
Chris@16
|
86 ///
|
Chris@16
|
87 /// The transformation follows these rules:
|
Chris@16
|
88 /// \code
|
Chris@16
|
89 /// compare(level,b1,e1,b2,e2) == sign( transform(level,b1,e1).compare(transform(level,b2,e2)) );
|
Chris@16
|
90 /// \endcode
|
Chris@16
|
91 ///
|
Chris@16
|
92 /// Calls do_transform
|
Chris@16
|
93 ///
|
Chris@16
|
94 string_type transform(level_type level,char_type const *b,char_type const *e) const
|
Chris@16
|
95 {
|
Chris@16
|
96 return do_transform(level,b,e);
|
Chris@16
|
97 }
|
Chris@16
|
98
|
Chris@16
|
99 ///
|
Chris@16
|
100 /// Calculate a hash of a text in range [b,e). The value can be used for collation sensitive string comparison.
|
Chris@16
|
101 ///
|
Chris@16
|
102 /// If compare(level,b1,e1,b2,e2) == 0 then hash(level,b1,e1) == hash(level,b2,e2)
|
Chris@16
|
103 ///
|
Chris@16
|
104 /// Calls do_hash
|
Chris@16
|
105 ///
|
Chris@16
|
106 long hash(level_type level,char_type const *b,char_type const *e) const
|
Chris@16
|
107 {
|
Chris@16
|
108 return do_hash(level,b,e);
|
Chris@16
|
109 }
|
Chris@16
|
110
|
Chris@16
|
111 ///
|
Chris@16
|
112 /// Compare two strings \a l and \a r using collation level \a level
|
Chris@16
|
113 ///
|
Chris@16
|
114 /// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if
|
Chris@16
|
115 /// they considered equal.
|
Chris@16
|
116 ///
|
Chris@16
|
117 ///
|
Chris@16
|
118 int compare(level_type level,string_type const &l,string_type const &r) const
|
Chris@16
|
119 {
|
Chris@16
|
120 return do_compare(level,l.data(),l.data()+l.size(),r.data(),r.data()+r.size());
|
Chris@16
|
121 }
|
Chris@16
|
122
|
Chris@16
|
123 ///
|
Chris@16
|
124 /// Calculate a hash that can be used for collation sensitive string comparison of a string \a s
|
Chris@16
|
125 ///
|
Chris@16
|
126 /// If compare(level,s1,s2) == 0 then hash(level,s1) == hash(level,s2)
|
Chris@16
|
127 ///
|
Chris@16
|
128
|
Chris@16
|
129 long hash(level_type level,string_type const &s) const
|
Chris@16
|
130 {
|
Chris@16
|
131 return do_hash(level,s.data(),s.data()+s.size());
|
Chris@16
|
132 }
|
Chris@16
|
133 ///
|
Chris@16
|
134 /// Create a binary string from string \a s, that can be compared to other, useful for collation of multiple
|
Chris@16
|
135 /// strings.
|
Chris@16
|
136 ///
|
Chris@16
|
137 /// The transformation follows these rules:
|
Chris@16
|
138 /// \code
|
Chris@16
|
139 /// compare(level,s1,s2) == sign( transform(level,s1).compare(transform(level,s2)) );
|
Chris@16
|
140 /// \endcode
|
Chris@16
|
141 ///
|
Chris@16
|
142 string_type transform(level_type level,string_type const &s) const
|
Chris@16
|
143 {
|
Chris@16
|
144 return do_transform(level,s.data(),s.data()+s.size());
|
Chris@16
|
145 }
|
Chris@16
|
146
|
Chris@16
|
147 protected:
|
Chris@16
|
148
|
Chris@16
|
149 ///
|
Chris@16
|
150 /// constructor of the collator object
|
Chris@16
|
151 ///
|
Chris@16
|
152 collator(size_t refs = 0) : std::collate<CharType>(refs)
|
Chris@16
|
153 {
|
Chris@16
|
154 }
|
Chris@16
|
155
|
Chris@16
|
156 virtual ~collator()
|
Chris@16
|
157 {
|
Chris@16
|
158 }
|
Chris@16
|
159
|
Chris@16
|
160 ///
|
Chris@16
|
161 /// This function is used to override default collation function that does not take in account collation level.
|
Chris@16
|
162 /// Uses primary level
|
Chris@16
|
163 ///
|
Chris@16
|
164 virtual int do_compare( char_type const *b1,char_type const *e1,
|
Chris@16
|
165 char_type const *b2,char_type const *e2) const
|
Chris@16
|
166 {
|
Chris@16
|
167 return do_compare(identical,b1,e1,b2,e2);
|
Chris@16
|
168 }
|
Chris@16
|
169 ///
|
Chris@16
|
170 /// This function is used to override default collation function that does not take in account collation level.
|
Chris@16
|
171 /// Uses primary level
|
Chris@16
|
172 ///
|
Chris@16
|
173 virtual string_type do_transform(char_type const *b,char_type const *e) const
|
Chris@16
|
174 {
|
Chris@16
|
175 return do_transform(identical,b,e);
|
Chris@16
|
176 }
|
Chris@16
|
177 ///
|
Chris@16
|
178 /// This function is used to override default collation function that does not take in account collation level.
|
Chris@16
|
179 /// Uses primary level
|
Chris@16
|
180 ///
|
Chris@16
|
181 virtual long do_hash(char_type const *b,char_type const *e) const
|
Chris@16
|
182 {
|
Chris@16
|
183 return do_hash(identical,b,e);
|
Chris@16
|
184 }
|
Chris@16
|
185
|
Chris@16
|
186 ///
|
Chris@16
|
187 /// Actual function that performs comparison between the strings. For details see compare member function. Can be overridden.
|
Chris@16
|
188 ///
|
Chris@16
|
189 virtual int do_compare( level_type level,
|
Chris@16
|
190 char_type const *b1,char_type const *e1,
|
Chris@16
|
191 char_type const *b2,char_type const *e2) const = 0;
|
Chris@16
|
192 ///
|
Chris@16
|
193 /// Actual function that performs transformation. For details see transform member function. Can be overridden.
|
Chris@16
|
194 ///
|
Chris@16
|
195 virtual string_type do_transform(level_type level,char_type const *b,char_type const *e) const = 0;
|
Chris@16
|
196 ///
|
Chris@16
|
197 /// Actual function that calculates hash. For details see hash member function. Can be overridden.
|
Chris@16
|
198 ///
|
Chris@16
|
199 virtual long do_hash(level_type level,char_type const *b,char_type const *e) const = 0;
|
Chris@16
|
200
|
Chris@16
|
201
|
Chris@16
|
202 };
|
Chris@16
|
203
|
Chris@16
|
204 ///
|
Chris@16
|
205 /// \brief This class can be used in STL algorithms and containers for comparison of strings
|
Chris@16
|
206 /// with a level other than primary
|
Chris@16
|
207 ///
|
Chris@16
|
208 /// For example:
|
Chris@16
|
209 ///
|
Chris@16
|
210 /// \code
|
Chris@16
|
211 /// std::map<std::string,std::string,comparator<char,collator_base::secondary> > data;
|
Chris@16
|
212 /// \endcode
|
Chris@16
|
213 ///
|
Chris@16
|
214 /// Would create a map the keys of which are sorted using secondary collation level
|
Chris@16
|
215 ///
|
Chris@16
|
216 template<typename CharType,collator_base::level_type default_level = collator_base::identical>
|
Chris@16
|
217 struct comparator
|
Chris@16
|
218 {
|
Chris@16
|
219 public:
|
Chris@16
|
220 ///
|
Chris@16
|
221 /// Create a comparator class for locale \a l and with collation leval \a level
|
Chris@16
|
222 ///
|
Chris@16
|
223 /// \note throws std::bad_cast if l does not have \ref collator facet installed
|
Chris@16
|
224 ///
|
Chris@16
|
225 comparator(std::locale const &l=std::locale(),collator_base::level_type level=default_level) :
|
Chris@16
|
226 locale_(l),
|
Chris@16
|
227 level_(level)
|
Chris@16
|
228 {
|
Chris@16
|
229 }
|
Chris@16
|
230
|
Chris@16
|
231 ///
|
Chris@16
|
232 /// Compare two strings -- equivalent to return left < right according to collation rules
|
Chris@16
|
233 ///
|
Chris@16
|
234 bool operator()(std::basic_string<CharType> const &left,std::basic_string<CharType> const &right) const
|
Chris@16
|
235 {
|
Chris@16
|
236 return std::use_facet<collator<CharType> >(locale_).compare(level_,left,right) < 0;
|
Chris@16
|
237 }
|
Chris@16
|
238 private:
|
Chris@16
|
239 std::locale locale_;
|
Chris@16
|
240 collator_base::level_type level_;
|
Chris@16
|
241 };
|
Chris@16
|
242
|
Chris@16
|
243
|
Chris@16
|
244 ///
|
Chris@16
|
245 ///@}
|
Chris@16
|
246 ///
|
Chris@16
|
247
|
Chris@16
|
248 } // locale
|
Chris@16
|
249 } // boost
|
Chris@16
|
250
|
Chris@16
|
251 #ifdef BOOST_MSVC
|
Chris@16
|
252 #pragma warning(pop)
|
Chris@16
|
253 #endif
|
Chris@16
|
254
|
Chris@16
|
255
|
Chris@16
|
256 #endif
|
Chris@16
|
257 ///
|
Chris@16
|
258 /// \example collate.cpp
|
Chris@16
|
259 /// Example of using collation functions
|
Chris@16
|
260 ///
|
Chris@16
|
261 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
|