annotate DEPENDENCIES/generic/include/boost/xpressive/traits/cpp_regex_traits.hpp @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)
author Chris Cannam
date Tue, 30 Jul 2019 12:25:44 +0100
parents c530137014c0
children
rev   line source
Chris@16 1 ///////////////////////////////////////////////////////////////////////////////
Chris@16 2 /// \file cpp_regex_traits.hpp
Chris@16 3 /// Contains the definition of the cpp_regex_traits\<\> template, which is a
Chris@16 4 /// wrapper for std::locale that can be used to customize the behavior of
Chris@16 5 /// static and dynamic regexes.
Chris@16 6 //
Chris@16 7 // Copyright 2008 Eric Niebler. Distributed under the Boost
Chris@16 8 // Software License, Version 1.0. (See accompanying file
Chris@16 9 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16 10
Chris@16 11 #ifndef BOOST_XPRESSIVE_TRAITS_CPP_REGEX_TRAITS_HPP_EAN_10_04_2005
Chris@16 12 #define BOOST_XPRESSIVE_TRAITS_CPP_REGEX_TRAITS_HPP_EAN_10_04_2005
Chris@16 13
Chris@16 14 // MS compatible compilers support #pragma once
Chris@101 15 #if defined(_MSC_VER)
Chris@16 16 # pragma once
Chris@16 17 #endif
Chris@16 18
Chris@16 19 #include <ios>
Chris@16 20 #include <string>
Chris@16 21 #include <locale>
Chris@16 22 #include <sstream>
Chris@16 23 #include <climits>
Chris@16 24 #include <boost/config.hpp>
Chris@16 25 #include <boost/assert.hpp>
Chris@16 26 #include <boost/integer.hpp>
Chris@16 27 #include <boost/mpl/assert.hpp>
Chris@16 28 #include <boost/static_assert.hpp>
Chris@16 29 #include <boost/detail/workaround.hpp>
Chris@16 30 #include <boost/type_traits/is_same.hpp>
Chris@16 31 #include <boost/xpressive/detail/detail_fwd.hpp>
Chris@16 32 #include <boost/xpressive/detail/utility/literals.hpp>
Chris@16 33
Chris@16 34 // From John Maddock:
Chris@16 35 // Fix for gcc prior to 3.4: std::ctype<wchar_t> doesn't allow masks to be combined, for example:
Chris@16 36 // std::use_facet<std::ctype<wchar_t> >(locale()).is(std::ctype_base::lower|std::ctype_base::upper, L'a');
Chris@16 37 // incorrectly returns false.
Chris@16 38 // NOTE: later version of the gcc define __GLIBCXX__, not __GLIBCPP__
Chris@16 39 #if BOOST_WORKAROUND(__GLIBCPP__, != 0)
Chris@16 40 # define BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
Chris@16 41 #endif
Chris@16 42
Chris@16 43 namespace boost { namespace xpressive
Chris@16 44 {
Chris@16 45
Chris@16 46 namespace detail
Chris@16 47 {
Chris@16 48 // define an unsigned integral typedef of the same size as std::ctype_base::mask
Chris@16 49 typedef boost::uint_t<sizeof(std::ctype_base::mask) * CHAR_BIT>::least umask_t;
Chris@16 50 BOOST_MPL_ASSERT_RELATION(sizeof(std::ctype_base::mask), ==, sizeof(umask_t));
Chris@16 51
Chris@16 52 // Calculate what the size of the umaskex_t type should be to fix the 3 extra bitmasks
Chris@16 53 // 11 char categories in ctype_base
Chris@16 54 // + 3 extra categories for xpressive
Chris@16 55 // = 14 total bits needed
Chris@16 56 int const umaskex_bits = (14 > (sizeof(umask_t) * CHAR_BIT)) ? 14 : sizeof(umask_t) * CHAR_BIT;
Chris@16 57
Chris@16 58 // define an unsigned integral type with at least umaskex_bits
Chris@16 59 typedef boost::uint_t<umaskex_bits>::fast umaskex_t;
Chris@16 60 BOOST_MPL_ASSERT_RELATION(sizeof(umask_t), <=, sizeof(umaskex_t));
Chris@16 61
Chris@16 62 // cast a ctype mask to a umaskex_t
Chris@16 63 template<std::ctype_base::mask Mask>
Chris@16 64 struct mask_cast
Chris@16 65 {
Chris@16 66 BOOST_STATIC_CONSTANT(umaskex_t, value = static_cast<umask_t>(Mask));
Chris@16 67 };
Chris@16 68
Chris@16 69 #ifdef __CYGWIN__
Chris@16 70 // Work around a gcc warning on cygwin
Chris@16 71 template<>
Chris@16 72 struct mask_cast<std::ctype_base::print>
Chris@16 73 {
Chris@16 74 BOOST_MPL_ASSERT_RELATION('\227', ==, std::ctype_base::print);
Chris@16 75 BOOST_STATIC_CONSTANT(umaskex_t, value = 0227);
Chris@16 76 };
Chris@16 77 #endif
Chris@16 78
Chris@16 79 #ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION
Chris@16 80 template<std::ctype_base::mask Mask>
Chris@16 81 umaskex_t const mask_cast<Mask>::value;
Chris@16 82 #endif
Chris@16 83
Chris@16 84 #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
Chris@16 85 // an unsigned integer with the highest bit set
Chris@16 86 umaskex_t const highest_bit = static_cast<umaskex_t>(1) << (sizeof(umaskex_t) * CHAR_BIT - 1);
Chris@16 87
Chris@16 88 ///////////////////////////////////////////////////////////////////////////////
Chris@16 89 // unused_mask
Chris@16 90 // find a bit in an int that isn't set
Chris@16 91 template<umaskex_t In, umaskex_t Out = highest_bit, bool Done = (0 == (Out & In))>
Chris@16 92 struct unused_mask
Chris@16 93 {
Chris@16 94 BOOST_STATIC_ASSERT(1 != Out);
Chris@16 95 BOOST_STATIC_CONSTANT(umaskex_t, value = (unused_mask<In, (Out >> 1)>::value));
Chris@16 96 };
Chris@16 97
Chris@16 98 template<umaskex_t In, umaskex_t Out>
Chris@16 99 struct unused_mask<In, Out, true>
Chris@16 100 {
Chris@16 101 BOOST_STATIC_CONSTANT(umaskex_t, value = Out);
Chris@16 102 };
Chris@16 103
Chris@16 104 #ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION
Chris@16 105 template<umaskex_t In, umaskex_t Out, bool Done>
Chris@16 106 umaskex_t const unused_mask<In, Out, Done>::value;
Chris@16 107 #endif
Chris@16 108
Chris@16 109 umaskex_t const std_ctype_alnum = mask_cast<std::ctype_base::alnum>::value;
Chris@16 110 umaskex_t const std_ctype_alpha = mask_cast<std::ctype_base::alpha>::value;
Chris@16 111 umaskex_t const std_ctype_cntrl = mask_cast<std::ctype_base::cntrl>::value;
Chris@16 112 umaskex_t const std_ctype_digit = mask_cast<std::ctype_base::digit>::value;
Chris@16 113 umaskex_t const std_ctype_graph = mask_cast<std::ctype_base::graph>::value;
Chris@16 114 umaskex_t const std_ctype_lower = mask_cast<std::ctype_base::lower>::value;
Chris@16 115 umaskex_t const std_ctype_print = mask_cast<std::ctype_base::print>::value;
Chris@16 116 umaskex_t const std_ctype_punct = mask_cast<std::ctype_base::punct>::value;
Chris@16 117 umaskex_t const std_ctype_space = mask_cast<std::ctype_base::space>::value;
Chris@16 118 umaskex_t const std_ctype_upper = mask_cast<std::ctype_base::upper>::value;
Chris@16 119 umaskex_t const std_ctype_xdigit = mask_cast<std::ctype_base::xdigit>::value;
Chris@16 120
Chris@16 121 // Reserve some bits for the implementation
Chris@16 122 #if defined(__GLIBCXX__)
Chris@16 123 umaskex_t const std_ctype_reserved = 0x8000;
Chris@16 124 #elif defined(_CPPLIB_VER) && defined(BOOST_WINDOWS)
Chris@16 125 umaskex_t const std_ctype_reserved = 0x8200;
Chris@16 126 #elif defined(_LIBCPP_VERSION)
Chris@16 127 umaskex_t const std_ctype_reserved = 0x8000;
Chris@16 128 #else
Chris@16 129 umaskex_t const std_ctype_reserved = 0;
Chris@16 130 #endif
Chris@16 131
Chris@16 132 // Bitwise-or all the ctype masks together
Chris@16 133 umaskex_t const all_ctype_masks = std_ctype_reserved
Chris@16 134 | std_ctype_alnum | std_ctype_alpha | std_ctype_cntrl | std_ctype_digit
Chris@16 135 | std_ctype_graph | std_ctype_lower | std_ctype_print | std_ctype_punct
Chris@16 136 | std_ctype_space | std_ctype_upper | std_ctype_xdigit;
Chris@16 137
Chris@16 138 // define a new mask for "underscore" ("word" == alnum | underscore)
Chris@16 139 umaskex_t const non_std_ctype_underscore = unused_mask<all_ctype_masks>::value;
Chris@16 140
Chris@16 141 // define a new mask for "blank"
Chris@16 142 umaskex_t const non_std_ctype_blank = unused_mask<all_ctype_masks | non_std_ctype_underscore>::value;
Chris@16 143
Chris@16 144 // define a new mask for "newline"
Chris@16 145 umaskex_t const non_std_ctype_newline = unused_mask<all_ctype_masks | non_std_ctype_underscore | non_std_ctype_blank>::value;
Chris@16 146
Chris@16 147 #else
Chris@16 148 ///////////////////////////////////////////////////////////////////////////////
Chris@16 149 // Ugly work-around for buggy ctype facets.
Chris@16 150 umaskex_t const std_ctype_alnum = 1 << 0;
Chris@16 151 umaskex_t const std_ctype_alpha = 1 << 1;
Chris@16 152 umaskex_t const std_ctype_cntrl = 1 << 2;
Chris@16 153 umaskex_t const std_ctype_digit = 1 << 3;
Chris@16 154 umaskex_t const std_ctype_graph = 1 << 4;
Chris@16 155 umaskex_t const std_ctype_lower = 1 << 5;
Chris@16 156 umaskex_t const std_ctype_print = 1 << 6;
Chris@16 157 umaskex_t const std_ctype_punct = 1 << 7;
Chris@16 158 umaskex_t const std_ctype_space = 1 << 8;
Chris@16 159 umaskex_t const std_ctype_upper = 1 << 9;
Chris@16 160 umaskex_t const std_ctype_xdigit = 1 << 10;
Chris@16 161 umaskex_t const non_std_ctype_underscore = 1 << 11;
Chris@16 162 umaskex_t const non_std_ctype_blank = 1 << 12;
Chris@16 163 umaskex_t const non_std_ctype_newline = 1 << 13;
Chris@16 164
Chris@16 165 static umaskex_t const std_masks[] =
Chris@16 166 {
Chris@16 167 mask_cast<std::ctype_base::alnum>::value
Chris@16 168 , mask_cast<std::ctype_base::alpha>::value
Chris@16 169 , mask_cast<std::ctype_base::cntrl>::value
Chris@16 170 , mask_cast<std::ctype_base::digit>::value
Chris@16 171 , mask_cast<std::ctype_base::graph>::value
Chris@16 172 , mask_cast<std::ctype_base::lower>::value
Chris@16 173 , mask_cast<std::ctype_base::print>::value
Chris@16 174 , mask_cast<std::ctype_base::punct>::value
Chris@16 175 , mask_cast<std::ctype_base::space>::value
Chris@16 176 , mask_cast<std::ctype_base::upper>::value
Chris@16 177 , mask_cast<std::ctype_base::xdigit>::value
Chris@16 178 };
Chris@16 179
Chris@16 180 inline int mylog2(umaskex_t i)
Chris@16 181 {
Chris@16 182 return "\0\0\1\0\2\0\0\0\3"[i & 0xf]
Chris@16 183 + "\0\4\5\0\6\0\0\0\7"[(i & 0xf0) >> 04]
Chris@16 184 + "\0\10\11\0\12\0\0\0\13"[(i & 0xf00) >> 010];
Chris@16 185 }
Chris@16 186 #endif
Chris@16 187
Chris@16 188 // convenient constant for the extra masks
Chris@16 189 umaskex_t const non_std_ctype_masks = non_std_ctype_underscore | non_std_ctype_blank | non_std_ctype_newline;
Chris@16 190
Chris@16 191 ///////////////////////////////////////////////////////////////////////////////
Chris@16 192 // cpp_regex_traits_base
Chris@16 193 // BUGBUG this should be replaced with a regex facet that lets you query for
Chris@16 194 // an array of underscore characters and an array of line separator characters.
Chris@16 195 template<typename Char, std::size_t SizeOfChar = sizeof(Char)>
Chris@16 196 struct cpp_regex_traits_base
Chris@16 197 {
Chris@16 198 protected:
Chris@16 199 void imbue(std::locale const &)
Chris@16 200 {
Chris@16 201 }
Chris@16 202
Chris@16 203 static bool is(std::ctype<Char> const &ct, Char ch, umaskex_t mask)
Chris@16 204 {
Chris@16 205 #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
Chris@16 206
Chris@16 207 if(ct.is((std::ctype_base::mask)(umask_t)mask, ch))
Chris@16 208 {
Chris@16 209 return true;
Chris@16 210 }
Chris@16 211
Chris@16 212 // HACKHACK Cygwin and mingw have buggy ctype facets for wchar_t
Chris@16 213 #if defined(__CYGWIN__) || defined(__MINGW32_VERSION)
Chris@16 214 if (std::ctype_base::xdigit == ((std::ctype_base::mask)(umask_t)mask & std::ctype_base::xdigit))
Chris@16 215 {
Chris@16 216 typename std::char_traits<Char>::int_type i = std::char_traits<Char>::to_int_type(ch);
Chris@16 217 if(UCHAR_MAX >= i && std::isxdigit(static_cast<int>(i)))
Chris@16 218 return true;
Chris@16 219 }
Chris@16 220 #endif
Chris@16 221
Chris@16 222 #else
Chris@16 223
Chris@16 224 umaskex_t tmp = mask & ~non_std_ctype_masks;
Chris@16 225 for(umaskex_t i; 0 != (i = (tmp & (~tmp+1))); tmp &= ~i)
Chris@16 226 {
Chris@16 227 std::ctype_base::mask m = (std::ctype_base::mask)(umask_t)std_masks[mylog2(i)];
Chris@16 228 if(ct.is(m, ch))
Chris@16 229 {
Chris@16 230 return true;
Chris@16 231 }
Chris@16 232 }
Chris@16 233
Chris@16 234 #endif
Chris@16 235
Chris@16 236 return ((mask & non_std_ctype_blank) && cpp_regex_traits_base::is_blank(ch))
Chris@16 237 || ((mask & non_std_ctype_underscore) && cpp_regex_traits_base::is_underscore(ch))
Chris@16 238 || ((mask & non_std_ctype_newline) && cpp_regex_traits_base::is_newline(ch));
Chris@16 239 }
Chris@16 240
Chris@16 241 private:
Chris@16 242 static bool is_blank(Char ch)
Chris@16 243 {
Chris@16 244 BOOST_MPL_ASSERT_RELATION('\t', ==, L'\t');
Chris@16 245 BOOST_MPL_ASSERT_RELATION(' ', ==, L' ');
Chris@16 246 return L' ' == ch || L'\t' == ch;
Chris@16 247 }
Chris@16 248
Chris@16 249 static bool is_underscore(Char ch)
Chris@16 250 {
Chris@16 251 BOOST_MPL_ASSERT_RELATION('_', ==, L'_');
Chris@16 252 return L'_' == ch;
Chris@16 253 }
Chris@16 254
Chris@16 255 static bool is_newline(Char ch)
Chris@16 256 {
Chris@16 257 BOOST_MPL_ASSERT_RELATION('\r', ==, L'\r');
Chris@16 258 BOOST_MPL_ASSERT_RELATION('\n', ==, L'\n');
Chris@16 259 BOOST_MPL_ASSERT_RELATION('\f', ==, L'\f');
Chris@16 260 return L'\r' == ch || L'\n' == ch || L'\f' == ch
Chris@16 261 || (1 < SizeOfChar && (0x2028u == ch || 0x2029u == ch || 0x85u == ch));
Chris@16 262 }
Chris@16 263 };
Chris@16 264
Chris@16 265 #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
Chris@16 266
Chris@16 267 template<typename Char>
Chris@16 268 struct cpp_regex_traits_base<Char, 1>
Chris@16 269 {
Chris@16 270 protected:
Chris@16 271 void imbue(std::locale const &loc)
Chris@16 272 {
Chris@16 273 int i = 0;
Chris@16 274 Char allchars[UCHAR_MAX + 1];
Chris@16 275 for(i = 0; i <= static_cast<int>(UCHAR_MAX); ++i)
Chris@16 276 {
Chris@16 277 allchars[i] = static_cast<Char>(i);
Chris@16 278 }
Chris@16 279
Chris@16 280 std::ctype<Char> const &ct = BOOST_USE_FACET(std::ctype<Char>, loc);
Chris@16 281 std::ctype_base::mask tmp[UCHAR_MAX + 1];
Chris@16 282 ct.is(allchars, allchars + UCHAR_MAX + 1, tmp);
Chris@16 283 for(i = 0; i <= static_cast<int>(UCHAR_MAX); ++i)
Chris@16 284 {
Chris@16 285 this->masks_[i] = static_cast<umask_t>(tmp[i]);
Chris@16 286 BOOST_ASSERT(0 == (this->masks_[i] & non_std_ctype_masks));
Chris@16 287 }
Chris@16 288
Chris@16 289 this->masks_[static_cast<unsigned char>('_')] |= non_std_ctype_underscore;
Chris@16 290 this->masks_[static_cast<unsigned char>(' ')] |= non_std_ctype_blank;
Chris@16 291 this->masks_[static_cast<unsigned char>('\t')] |= non_std_ctype_blank;
Chris@16 292 this->masks_[static_cast<unsigned char>('\n')] |= non_std_ctype_newline;
Chris@16 293 this->masks_[static_cast<unsigned char>('\r')] |= non_std_ctype_newline;
Chris@16 294 this->masks_[static_cast<unsigned char>('\f')] |= non_std_ctype_newline;
Chris@16 295 }
Chris@16 296
Chris@16 297 bool is(std::ctype<Char> const &, Char ch, umaskex_t mask) const
Chris@16 298 {
Chris@16 299 return 0 != (this->masks_[static_cast<unsigned char>(ch)] & mask);
Chris@16 300 }
Chris@16 301
Chris@16 302 private:
Chris@16 303 umaskex_t masks_[UCHAR_MAX + 1];
Chris@16 304 };
Chris@16 305
Chris@16 306 #endif
Chris@16 307
Chris@16 308 } // namespace detail
Chris@16 309
Chris@16 310
Chris@16 311 ///////////////////////////////////////////////////////////////////////////////
Chris@16 312 // cpp_regex_traits
Chris@16 313 //
Chris@16 314 /// \brief Encapsaulates a \c std::locale for use by the
Chris@16 315 /// \c basic_regex\<\> class template.
Chris@16 316 template<typename Char>
Chris@16 317 struct cpp_regex_traits
Chris@16 318 : detail::cpp_regex_traits_base<Char>
Chris@16 319 {
Chris@16 320 typedef Char char_type;
Chris@16 321 typedef std::basic_string<char_type> string_type;
Chris@16 322 typedef std::locale locale_type;
Chris@16 323 typedef detail::umaskex_t char_class_type;
Chris@16 324 typedef regex_traits_version_2_tag version_tag;
Chris@16 325 typedef detail::cpp_regex_traits_base<Char> base_type;
Chris@16 326
Chris@16 327 /// Initialize a cpp_regex_traits object to use the specified std::locale,
Chris@16 328 /// or the global std::locale if none is specified.
Chris@16 329 ///
Chris@16 330 cpp_regex_traits(locale_type const &loc = locale_type())
Chris@16 331 : base_type()
Chris@16 332 , loc_()
Chris@16 333 {
Chris@16 334 this->imbue(loc);
Chris@16 335 }
Chris@16 336
Chris@16 337 /// Checks two cpp_regex_traits objects for equality
Chris@16 338 ///
Chris@16 339 /// \return this->getloc() == that.getloc().
Chris@16 340 bool operator ==(cpp_regex_traits<char_type> const &that) const
Chris@16 341 {
Chris@16 342 return this->loc_ == that.loc_;
Chris@16 343 }
Chris@16 344
Chris@16 345 /// Checks two cpp_regex_traits objects for inequality
Chris@16 346 ///
Chris@16 347 /// \return this->getloc() != that.getloc().
Chris@16 348 bool operator !=(cpp_regex_traits<char_type> const &that) const
Chris@16 349 {
Chris@16 350 return this->loc_ != that.loc_;
Chris@16 351 }
Chris@16 352
Chris@16 353 /// Convert a char to a Char
Chris@16 354 ///
Chris@16 355 /// \param ch The source character.
Chris@16 356 /// \return std::use_facet\<std::ctype\<char_type\> \>(this->getloc()).widen(ch).
Chris@16 357 char_type widen(char ch) const
Chris@16 358 {
Chris@16 359 return this->ctype_->widen(ch);
Chris@16 360 }
Chris@16 361
Chris@16 362 /// Returns a hash value for a Char in the range [0, UCHAR_MAX]
Chris@16 363 ///
Chris@16 364 /// \param ch The source character.
Chris@16 365 /// \return a value between 0 and UCHAR_MAX, inclusive.
Chris@16 366 static unsigned char hash(char_type ch)
Chris@16 367 {
Chris@16 368 return static_cast<unsigned char>(std::char_traits<Char>::to_int_type(ch));
Chris@16 369 }
Chris@16 370
Chris@16 371 /// No-op
Chris@16 372 ///
Chris@16 373 /// \param ch The source character.
Chris@16 374 /// \return ch
Chris@16 375 static char_type translate(char_type ch)
Chris@16 376 {
Chris@16 377 return ch;
Chris@16 378 }
Chris@16 379
Chris@16 380 /// Converts a character to lower-case using the internally-stored std::locale.
Chris@16 381 ///
Chris@16 382 /// \param ch The source character.
Chris@16 383 /// \return std::tolower(ch, this->getloc()).
Chris@16 384 char_type translate_nocase(char_type ch) const
Chris@16 385 {
Chris@16 386 return this->ctype_->tolower(ch);
Chris@16 387 }
Chris@16 388
Chris@16 389 /// Converts a character to lower-case using the internally-stored std::locale.
Chris@16 390 ///
Chris@16 391 /// \param ch The source character.
Chris@16 392 /// \return std::tolower(ch, this->getloc()).
Chris@16 393 char_type tolower(char_type ch) const
Chris@16 394 {
Chris@16 395 return this->ctype_->tolower(ch);
Chris@16 396 }
Chris@16 397
Chris@16 398 /// Converts a character to upper-case using the internally-stored std::locale.
Chris@16 399 ///
Chris@16 400 /// \param ch The source character.
Chris@16 401 /// \return std::toupper(ch, this->getloc()).
Chris@16 402 char_type toupper(char_type ch) const
Chris@16 403 {
Chris@16 404 return this->ctype_->toupper(ch);
Chris@16 405 }
Chris@16 406
Chris@16 407 /// Returns a \c string_type containing all the characters that compare equal
Chris@16 408 /// disregrarding case to the one passed in. This function can only be called
Chris@16 409 /// if <tt>has_fold_case\<cpp_regex_traits\<Char\> \>::value</tt> is \c true.
Chris@16 410 ///
Chris@16 411 /// \param ch The source character.
Chris@16 412 /// \return \c string_type containing all chars which are equal to \c ch when disregarding
Chris@16 413 /// case
Chris@16 414 string_type fold_case(char_type ch) const
Chris@16 415 {
Chris@16 416 BOOST_MPL_ASSERT((is_same<char_type, char>));
Chris@16 417 char_type ntcs[] = {
Chris@16 418 this->ctype_->tolower(ch)
Chris@16 419 , this->ctype_->toupper(ch)
Chris@16 420 , 0
Chris@16 421 };
Chris@16 422 if(ntcs[1] == ntcs[0])
Chris@16 423 ntcs[1] = 0;
Chris@16 424 return string_type(ntcs);
Chris@16 425 }
Chris@16 426
Chris@16 427 /// Checks to see if a character is within a character range.
Chris@16 428 ///
Chris@16 429 /// \param first The bottom of the range, inclusive.
Chris@16 430 /// \param last The top of the range, inclusive.
Chris@16 431 /// \param ch The source character.
Chris@16 432 /// \return first <= ch && ch <= last.
Chris@16 433 static bool in_range(char_type first, char_type last, char_type ch)
Chris@16 434 {
Chris@16 435 return first <= ch && ch <= last;
Chris@16 436 }
Chris@16 437
Chris@16 438 /// Checks to see if a character is within a character range, irregardless of case.
Chris@16 439 ///
Chris@16 440 /// \param first The bottom of the range, inclusive.
Chris@16 441 /// \param last The top of the range, inclusive.
Chris@16 442 /// \param ch The source character.
Chris@16 443 /// \return in_range(first, last, ch) || in_range(first, last, tolower(ch, this->getloc())) ||
Chris@16 444 /// in_range(first, last, toupper(ch, this->getloc()))
Chris@16 445 /// \attention The default implementation doesn't do proper Unicode
Chris@16 446 /// case folding, but this is the best we can do with the standard
Chris@16 447 /// ctype facet.
Chris@16 448 bool in_range_nocase(char_type first, char_type last, char_type ch) const
Chris@16 449 {
Chris@16 450 // NOTE: this default implementation doesn't do proper Unicode
Chris@16 451 // case folding, but this is the best we can do with the standard
Chris@16 452 // std::ctype facet.
Chris@16 453 return this->in_range(first, last, ch)
Chris@16 454 || this->in_range(first, last, this->ctype_->toupper(ch))
Chris@16 455 || this->in_range(first, last, this->ctype_->tolower(ch));
Chris@16 456 }
Chris@16 457
Chris@16 458 /// INTERNAL ONLY
Chris@16 459 //string_type transform(char_type const *begin, char_type const *end) const
Chris@16 460 //{
Chris@16 461 // return this->collate_->transform(begin, end);
Chris@16 462 //}
Chris@16 463
Chris@16 464 /// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
Chris@16 465 /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
Chris@16 466 /// then v.transform(G1, G2) \< v.transform(H1, H2).
Chris@16 467 ///
Chris@16 468 /// \attention Not currently used
Chris@16 469 template<typename FwdIter>
Chris@16 470 string_type transform(FwdIter, FwdIter) const
Chris@16 471 {
Chris@16 472 //string_type str(begin, end);
Chris@16 473 //return this->transform(str.data(), str.data() + str.size());
Chris@16 474
Chris@16 475 BOOST_ASSERT(false);
Chris@16 476 return string_type();
Chris@16 477 }
Chris@16 478
Chris@16 479 /// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
Chris@16 480 /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
Chris@16 481 /// when character case is not considered then
Chris@16 482 /// v.transform_primary(G1, G2) \< v.transform_primary(H1, H2).
Chris@16 483 ///
Chris@16 484 /// \attention Not currently used
Chris@16 485 template<typename FwdIter>
Chris@16 486 string_type transform_primary(FwdIter, FwdIter ) const
Chris@16 487 {
Chris@16 488 BOOST_ASSERT(false); // TODO implement me
Chris@16 489 return string_type();
Chris@16 490 }
Chris@16 491
Chris@16 492 /// Returns a sequence of characters that represents the collating element
Chris@16 493 /// consisting of the character sequence designated by the iterator range [F1, F2).
Chris@16 494 /// Returns an empty string if the character sequence is not a valid collating element.
Chris@16 495 ///
Chris@16 496 /// \attention Not currently used
Chris@16 497 template<typename FwdIter>
Chris@16 498 string_type lookup_collatename(FwdIter, FwdIter) const
Chris@16 499 {
Chris@16 500 BOOST_ASSERT(false); // TODO implement me
Chris@16 501 return string_type();
Chris@16 502 }
Chris@16 503
Chris@16 504 /// For the character class name represented by the specified character sequence,
Chris@16 505 /// return the corresponding bitmask representation.
Chris@16 506 ///
Chris@16 507 /// \param begin A forward iterator to the start of the character sequence representing
Chris@16 508 /// the name of the character class.
Chris@16 509 /// \param end The end of the character sequence.
Chris@16 510 /// \param icase Specifies whether the returned bitmask should represent the case-insensitive
Chris@16 511 /// version of the character class.
Chris@16 512 /// \return A bitmask representing the character class.
Chris@16 513 template<typename FwdIter>
Chris@16 514 char_class_type lookup_classname(FwdIter begin, FwdIter end, bool icase) const
Chris@16 515 {
Chris@16 516 static detail::umaskex_t const icase_masks =
Chris@16 517 detail::std_ctype_lower | detail::std_ctype_upper;
Chris@16 518
Chris@16 519 BOOST_ASSERT(begin != end);
Chris@16 520 char_class_type char_class = this->lookup_classname_impl_(begin, end);
Chris@16 521 if(0 == char_class)
Chris@16 522 {
Chris@16 523 // convert the string to lowercase
Chris@16 524 string_type classname(begin, end);
Chris@16 525 for(typename string_type::size_type i = 0, len = classname.size(); i < len; ++i)
Chris@16 526 {
Chris@16 527 classname[i] = this->translate_nocase(classname[i]);
Chris@16 528 }
Chris@16 529 char_class = this->lookup_classname_impl_(classname.begin(), classname.end());
Chris@16 530 }
Chris@16 531 // erase case-sensitivity if icase==true
Chris@16 532 if(icase && 0 != (char_class & icase_masks))
Chris@16 533 {
Chris@16 534 char_class |= icase_masks;
Chris@16 535 }
Chris@16 536 return char_class;
Chris@16 537 }
Chris@16 538
Chris@16 539 /// Tests a character against a character class bitmask.
Chris@16 540 ///
Chris@16 541 /// \param ch The character to test.
Chris@16 542 /// \param mask The character class bitmask against which to test.
Chris@16 543 /// \pre mask is a bitmask returned by lookup_classname, or is several such masks bit-or'ed
Chris@16 544 /// together.
Chris@16 545 /// \return true if the character is a member of any of the specified character classes, false
Chris@16 546 /// otherwise.
Chris@16 547 bool isctype(char_type ch, char_class_type mask) const
Chris@16 548 {
Chris@16 549 return this->base_type::is(*this->ctype_, ch, mask);
Chris@16 550 }
Chris@16 551
Chris@16 552 /// Convert a digit character into the integer it represents.
Chris@16 553 ///
Chris@16 554 /// \param ch The digit character.
Chris@16 555 /// \param radix The radix to use for the conversion.
Chris@16 556 /// \pre radix is one of 8, 10, or 16.
Chris@16 557 /// \return -1 if ch is not a digit character, the integer value of the character otherwise.
Chris@16 558 /// The conversion is performed by imbueing a std::stringstream with this-\>getloc();
Chris@16 559 /// setting the radix to one of oct, hex or dec; inserting ch into the stream; and
Chris@16 560 /// extracting an int.
Chris@16 561 int value(char_type ch, int radix) const
Chris@16 562 {
Chris@16 563 BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix);
Chris@16 564 int val = -1;
Chris@16 565 std::basic_stringstream<char_type> str;
Chris@16 566 str.imbue(this->getloc());
Chris@16 567 str << (8 == radix ? std::oct : (16 == radix ? std::hex : std::dec));
Chris@16 568 str.put(ch);
Chris@16 569 str >> val;
Chris@16 570 return str.fail() ? -1 : val;
Chris@16 571 }
Chris@16 572
Chris@16 573 /// Imbues *this with loc
Chris@16 574 ///
Chris@16 575 /// \param loc A std::locale.
Chris@16 576 /// \return the previous std::locale used by *this.
Chris@16 577 locale_type imbue(locale_type loc)
Chris@16 578 {
Chris@16 579 locale_type old_loc = this->loc_;
Chris@16 580 this->loc_ = loc;
Chris@16 581 this->ctype_ = &BOOST_USE_FACET(std::ctype<char_type>, this->loc_);
Chris@16 582 //this->collate_ = &BOOST_USE_FACET(std::collate<char_type>, this->loc_);
Chris@16 583 this->base_type::imbue(this->loc_);
Chris@16 584 return old_loc;
Chris@16 585 }
Chris@16 586
Chris@16 587 /// Returns the current std::locale used by *this.
Chris@16 588 ///
Chris@16 589 locale_type getloc() const
Chris@16 590 {
Chris@16 591 return this->loc_;
Chris@16 592 }
Chris@16 593
Chris@16 594 private:
Chris@16 595
Chris@16 596 ///////////////////////////////////////////////////////////////////////////////
Chris@16 597 // char_class_pair
Chris@16 598 /// INTERNAL ONLY
Chris@16 599 struct char_class_pair
Chris@16 600 {
Chris@16 601 char_type const *class_name_;
Chris@16 602 char_class_type class_type_;
Chris@16 603 };
Chris@16 604
Chris@16 605 ///////////////////////////////////////////////////////////////////////////////
Chris@16 606 // char_class
Chris@16 607 /// INTERNAL ONLY
Chris@16 608 static char_class_pair const &char_class(std::size_t j)
Chris@16 609 {
Chris@16 610 static BOOST_CONSTEXPR_OR_CONST char_class_pair s_char_class_map[] =
Chris@16 611 {
Chris@16 612 { BOOST_XPR_CSTR_(char_type, "alnum"), detail::std_ctype_alnum }
Chris@16 613 , { BOOST_XPR_CSTR_(char_type, "alpha"), detail::std_ctype_alpha }
Chris@16 614 , { BOOST_XPR_CSTR_(char_type, "blank"), detail::non_std_ctype_blank }
Chris@16 615 , { BOOST_XPR_CSTR_(char_type, "cntrl"), detail::std_ctype_cntrl }
Chris@16 616 , { BOOST_XPR_CSTR_(char_type, "d"), detail::std_ctype_digit }
Chris@16 617 , { BOOST_XPR_CSTR_(char_type, "digit"), detail::std_ctype_digit }
Chris@16 618 , { BOOST_XPR_CSTR_(char_type, "graph"), detail::std_ctype_graph }
Chris@16 619 , { BOOST_XPR_CSTR_(char_type, "lower"), detail::std_ctype_lower }
Chris@16 620 , { BOOST_XPR_CSTR_(char_type, "newline"),detail::non_std_ctype_newline }
Chris@16 621 , { BOOST_XPR_CSTR_(char_type, "print"), detail::std_ctype_print }
Chris@16 622 , { BOOST_XPR_CSTR_(char_type, "punct"), detail::std_ctype_punct }
Chris@16 623 , { BOOST_XPR_CSTR_(char_type, "s"), detail::std_ctype_space }
Chris@16 624 , { BOOST_XPR_CSTR_(char_type, "space"), detail::std_ctype_space }
Chris@16 625 , { BOOST_XPR_CSTR_(char_type, "upper"), detail::std_ctype_upper }
Chris@16 626 , { BOOST_XPR_CSTR_(char_type, "w"), detail::std_ctype_alnum | detail::non_std_ctype_underscore }
Chris@16 627 , { BOOST_XPR_CSTR_(char_type, "xdigit"), detail::std_ctype_xdigit }
Chris@16 628 , { 0, 0 }
Chris@16 629 };
Chris@16 630 return s_char_class_map[j];
Chris@16 631 }
Chris@16 632
Chris@16 633 ///////////////////////////////////////////////////////////////////////////////
Chris@16 634 // lookup_classname_impl
Chris@16 635 /// INTERNAL ONLY
Chris@16 636 template<typename FwdIter>
Chris@16 637 static char_class_type lookup_classname_impl_(FwdIter begin, FwdIter end)
Chris@16 638 {
Chris@16 639 // find the classname
Chris@16 640 typedef cpp_regex_traits<Char> this_t;
Chris@16 641 for(std::size_t j = 0; 0 != this_t::char_class(j).class_name_; ++j)
Chris@16 642 {
Chris@16 643 if(this_t::compare_(this_t::char_class(j).class_name_, begin, end))
Chris@16 644 {
Chris@16 645 return this_t::char_class(j).class_type_;
Chris@16 646 }
Chris@16 647 }
Chris@16 648 return 0;
Chris@16 649 }
Chris@16 650
Chris@16 651 /// INTERNAL ONLY
Chris@16 652 template<typename FwdIter>
Chris@16 653 static bool compare_(char_type const *name, FwdIter begin, FwdIter end)
Chris@16 654 {
Chris@16 655 for(; *name && begin != end; ++name, ++begin)
Chris@16 656 {
Chris@16 657 if(*name != *begin)
Chris@16 658 {
Chris@16 659 return false;
Chris@16 660 }
Chris@16 661 }
Chris@16 662 return !*name && begin == end;
Chris@16 663 }
Chris@16 664
Chris@16 665 locale_type loc_;
Chris@16 666 std::ctype<char_type> const *ctype_;
Chris@16 667 //std::collate<char_type> const *collate_;
Chris@16 668 };
Chris@16 669
Chris@16 670 ///////////////////////////////////////////////////////////////////////////////
Chris@16 671 // cpp_regex_traits<>::hash specializations
Chris@16 672 template<>
Chris@16 673 inline unsigned char cpp_regex_traits<unsigned char>::hash(unsigned char ch)
Chris@16 674 {
Chris@16 675 return ch;
Chris@16 676 }
Chris@16 677
Chris@16 678 template<>
Chris@16 679 inline unsigned char cpp_regex_traits<char>::hash(char ch)
Chris@16 680 {
Chris@16 681 return static_cast<unsigned char>(ch);
Chris@16 682 }
Chris@16 683
Chris@16 684 template<>
Chris@16 685 inline unsigned char cpp_regex_traits<signed char>::hash(signed char ch)
Chris@16 686 {
Chris@16 687 return static_cast<unsigned char>(ch);
Chris@16 688 }
Chris@16 689
Chris@16 690 #ifndef BOOST_XPRESSIVE_NO_WREGEX
Chris@16 691 template<>
Chris@16 692 inline unsigned char cpp_regex_traits<wchar_t>::hash(wchar_t ch)
Chris@16 693 {
Chris@16 694 return static_cast<unsigned char>(ch);
Chris@16 695 }
Chris@16 696 #endif
Chris@16 697
Chris@16 698 // Narrow C++ traits has fold_case() member function.
Chris@16 699 template<>
Chris@16 700 struct has_fold_case<cpp_regex_traits<char> >
Chris@16 701 : mpl::true_
Chris@16 702 {
Chris@16 703 };
Chris@16 704
Chris@16 705
Chris@16 706 }}
Chris@16 707
Chris@16 708 #endif