annotate DEPENDENCIES/generic/include/boost/xpressive/detail/dynamic/parse_charset.hpp @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)
author Chris Cannam
date Tue, 30 Jul 2019 12:25:44 +0100
parents c530137014c0
children
rev   line source
Chris@16 1 ///////////////////////////////////////////////////////////////////////////////
Chris@16 2 // parse_charset.hpp
Chris@16 3 //
Chris@16 4 // Copyright 2008 Eric Niebler. Distributed under the Boost
Chris@16 5 // Software License, Version 1.0. (See accompanying file
Chris@16 6 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16 7
Chris@16 8 #ifndef BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSE_CHARSET_HPP_EAN_10_04_2005
Chris@16 9 #define BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSE_CHARSET_HPP_EAN_10_04_2005
Chris@16 10
Chris@16 11 // MS compatible compilers support #pragma once
Chris@101 12 #if defined(_MSC_VER)
Chris@16 13 # pragma once
Chris@16 14 #endif
Chris@16 15
Chris@16 16 #include <boost/config.hpp>
Chris@16 17 #include <boost/integer.hpp>
Chris@16 18 #include <boost/mpl/bool.hpp>
Chris@16 19 #include <boost/throw_exception.hpp>
Chris@16 20 #include <boost/numeric/conversion/converter.hpp>
Chris@16 21 #include <boost/xpressive/detail/detail_fwd.hpp>
Chris@16 22 #include <boost/xpressive/detail/dynamic/parser_enum.hpp>
Chris@16 23 #include <boost/xpressive/detail/utility/literals.hpp>
Chris@16 24 #include <boost/xpressive/detail/utility/chset/chset.hpp>
Chris@16 25 #include <boost/xpressive/regex_constants.hpp>
Chris@16 26
Chris@16 27 namespace boost { namespace xpressive { namespace detail
Chris@16 28 {
Chris@16 29
Chris@16 30 enum escape_type
Chris@16 31 {
Chris@16 32 escape_char
Chris@16 33 , escape_mark
Chris@16 34 , escape_class
Chris@16 35 };
Chris@16 36
Chris@16 37 ///////////////////////////////////////////////////////////////////////////////
Chris@16 38 // escape_value
Chris@16 39 //
Chris@16 40 template<typename Char, typename Class>
Chris@16 41 struct escape_value
Chris@16 42 {
Chris@16 43 Char ch_;
Chris@16 44 int mark_nbr_;
Chris@16 45 Class class_;
Chris@16 46 escape_type type_;
Chris@16 47 };
Chris@16 48
Chris@16 49 ///////////////////////////////////////////////////////////////////////////////
Chris@16 50 // char_overflow_handler
Chris@16 51 //
Chris@16 52 struct char_overflow_handler
Chris@16 53 {
Chris@16 54 void operator ()(numeric::range_check_result result) const // throw(regex_error)
Chris@16 55 {
Chris@16 56 if(numeric::cInRange != result)
Chris@16 57 {
Chris@16 58 BOOST_THROW_EXCEPTION(
Chris@16 59 regex_error(
Chris@16 60 regex_constants::error_escape
Chris@16 61 , "character escape too large to fit in target character type"
Chris@16 62 )
Chris@16 63 );
Chris@16 64 }
Chris@16 65 }
Chris@16 66 };
Chris@16 67
Chris@16 68 ///////////////////////////////////////////////////////////////////////////////
Chris@16 69 // parse_escape
Chris@16 70 //
Chris@16 71 template<typename FwdIter, typename CompilerTraits>
Chris@16 72 escape_value<typename iterator_value<FwdIter>::type, typename CompilerTraits::regex_traits::char_class_type>
Chris@16 73 parse_escape(FwdIter &begin, FwdIter end, CompilerTraits &tr)
Chris@16 74 {
Chris@16 75 using namespace regex_constants;
Chris@16 76 typedef typename iterator_value<FwdIter>::type char_type;
Chris@16 77 typedef typename CompilerTraits::regex_traits regex_traits;
Chris@16 78 typedef typename regex_traits::char_class_type char_class_type;
Chris@16 79
Chris@16 80 // define an unsigned type the same size as char_type
Chris@16 81 typedef typename boost::uint_t<CHAR_BIT * sizeof(char_type)>::least uchar_t;
Chris@16 82 BOOST_MPL_ASSERT_RELATION(sizeof(uchar_t), ==, sizeof(char_type));
Chris@16 83 typedef numeric::conversion_traits<uchar_t, int> converstion_traits;
Chris@16 84
Chris@16 85 BOOST_XPR_ENSURE_(begin != end, error_escape, "unexpected end of pattern found");
Chris@16 86 numeric::converter<int, uchar_t, converstion_traits, char_overflow_handler> converter;
Chris@16 87 escape_value<char_type,char_class_type> esc = { 0, 0, 0, escape_char };
Chris@16 88 bool const icase = (0 != (regex_constants::icase_ & tr.flags()));
Chris@16 89 regex_traits const &rxtraits = tr.traits();
Chris@16 90 FwdIter tmp;
Chris@16 91
Chris@16 92 esc.class_ = rxtraits.lookup_classname(begin, begin + 1, icase);
Chris@16 93 if(0 != esc.class_)
Chris@16 94 {
Chris@16 95 esc.type_ = escape_class;
Chris@16 96 return esc;
Chris@16 97 }
Chris@16 98
Chris@16 99 if(-1 != rxtraits.value(*begin, 8))
Chris@16 100 {
Chris@16 101 esc.ch_ = converter(toi(begin, end, rxtraits, 8, 0777));
Chris@16 102 return esc;
Chris@16 103 }
Chris@16 104
Chris@16 105 switch(*begin)
Chris@16 106 {
Chris@16 107 // bell character
Chris@16 108 case BOOST_XPR_CHAR_(char_type, 'a'):
Chris@16 109 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\a');
Chris@16 110 ++begin;
Chris@16 111 break;
Chris@16 112 // escape character
Chris@16 113 case BOOST_XPR_CHAR_(char_type, 'e'):
Chris@16 114 esc.ch_ = converter(27);
Chris@16 115 ++begin;
Chris@16 116 break;
Chris@16 117 // control character
Chris@16 118 case BOOST_XPR_CHAR_(char_type, 'c'):
Chris@16 119 BOOST_XPR_ENSURE_(++begin != end, error_escape, "unexpected end of pattern found");
Chris@16 120 BOOST_XPR_ENSURE_
Chris@16 121 (
Chris@16 122 rxtraits.in_range(BOOST_XPR_CHAR_(char_type, 'a'), BOOST_XPR_CHAR_(char_type, 'z'), *begin)
Chris@16 123 || rxtraits.in_range(BOOST_XPR_CHAR_(char_type, 'A'), BOOST_XPR_CHAR_(char_type, 'Z'), *begin)
Chris@16 124 , error_escape
Chris@16 125 , "invalid escape control letter; must be one of a-z or A-Z"
Chris@16 126 );
Chris@16 127 // Convert to character according to ECMA-262, section 15.10.2.10:
Chris@16 128 esc.ch_ = converter(*begin % 32);
Chris@16 129 ++begin;
Chris@16 130 break;
Chris@16 131 // formfeed character
Chris@16 132 case BOOST_XPR_CHAR_(char_type, 'f'):
Chris@16 133 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\f');
Chris@16 134 ++begin;
Chris@16 135 break;
Chris@16 136 // newline
Chris@16 137 case BOOST_XPR_CHAR_(char_type, 'n'):
Chris@16 138 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\n');
Chris@16 139 ++begin;
Chris@16 140 break;
Chris@16 141 // return
Chris@16 142 case BOOST_XPR_CHAR_(char_type, 'r'):
Chris@16 143 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\r');
Chris@16 144 ++begin;
Chris@16 145 break;
Chris@16 146 // horizontal tab
Chris@16 147 case BOOST_XPR_CHAR_(char_type, 't'):
Chris@16 148 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\t');
Chris@16 149 ++begin;
Chris@16 150 break;
Chris@16 151 // vertical tab
Chris@16 152 case BOOST_XPR_CHAR_(char_type, 'v'):
Chris@16 153 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\v');
Chris@16 154 ++begin;
Chris@16 155 break;
Chris@16 156 // hex escape sequence
Chris@16 157 case BOOST_XPR_CHAR_(char_type, 'x'):
Chris@16 158 BOOST_XPR_ENSURE_(++begin != end, error_escape, "unexpected end of pattern found");
Chris@16 159 tmp = begin;
Chris@16 160 esc.ch_ = converter(toi(begin, end, rxtraits, 16, 0xff));
Chris@16 161 BOOST_XPR_ENSURE_(2 == std::distance(tmp, begin), error_escape, "invalid hex escape : "
Chris@16 162 "must be \\x HexDigit HexDigit");
Chris@16 163 break;
Chris@16 164 // Unicode escape sequence
Chris@16 165 case BOOST_XPR_CHAR_(char_type, 'u'):
Chris@16 166 BOOST_XPR_ENSURE_(++begin != end, error_escape, "unexpected end of pattern found");
Chris@16 167 tmp = begin;
Chris@16 168 esc.ch_ = converter(toi(begin, end, rxtraits, 16, 0xffff));
Chris@16 169 BOOST_XPR_ENSURE_(4 == std::distance(tmp, begin), error_escape, "invalid Unicode escape : "
Chris@16 170 "must be \\u HexDigit HexDigit HexDigit HexDigit");
Chris@16 171 break;
Chris@16 172 // backslash
Chris@16 173 case BOOST_XPR_CHAR_(char_type, '\\'):
Chris@16 174 //esc.ch_ = BOOST_XPR_CHAR_(char_type, '\\');
Chris@16 175 //++begin;
Chris@16 176 //break;
Chris@16 177 // all other escaped characters represent themselves
Chris@16 178 default:
Chris@16 179 esc.ch_ = *begin;
Chris@16 180 ++begin;
Chris@16 181 break;
Chris@16 182 }
Chris@16 183
Chris@16 184 return esc;
Chris@16 185 }
Chris@16 186
Chris@16 187 //////////////////////////////////////////////////////////////////////////
Chris@16 188 // parse_charset
Chris@16 189 //
Chris@16 190 template<typename FwdIter, typename RegexTraits, typename CompilerTraits>
Chris@16 191 inline void parse_charset
Chris@16 192 (
Chris@16 193 FwdIter &begin
Chris@16 194 , FwdIter end
Chris@16 195 , compound_charset<RegexTraits> &chset
Chris@16 196 , CompilerTraits &tr
Chris@16 197 )
Chris@16 198 {
Chris@16 199 using namespace regex_constants;
Chris@16 200 typedef typename RegexTraits::char_type char_type;
Chris@16 201 typedef typename RegexTraits::char_class_type char_class_type;
Chris@16 202 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
Chris@16 203 RegexTraits const &rxtraits = tr.traits();
Chris@16 204 bool const icase = (0 != (regex_constants::icase_ & tr.flags()));
Chris@16 205 FwdIter iprev = FwdIter();
Chris@16 206 escape_value<char_type, char_class_type> esc = {0, 0, 0, escape_char};
Chris@16 207 bool invert = false;
Chris@16 208
Chris@16 209 // check to see if we have an inverse charset
Chris@16 210 if(begin != end && token_charset_invert == tr.get_charset_token(iprev = begin, end))
Chris@16 211 {
Chris@16 212 begin = iprev;
Chris@16 213 invert = true;
Chris@16 214 }
Chris@16 215
Chris@16 216 // skip the end token if-and-only-if it is the first token in the charset
Chris@16 217 if(begin != end && token_charset_end == tr.get_charset_token(iprev = begin, end))
Chris@16 218 {
Chris@16 219 for(; begin != iprev; ++begin)
Chris@16 220 {
Chris@16 221 chset.set_char(*begin, rxtraits, icase);
Chris@16 222 }
Chris@16 223 }
Chris@16 224
Chris@16 225 compiler_token_type tok;
Chris@16 226 char_type ch_prev = char_type(), ch_next = char_type();
Chris@16 227 bool have_prev = false;
Chris@16 228
Chris@16 229 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
Chris@16 230
Chris@16 231 // remember the current position and grab the next token
Chris@16 232 iprev = begin;
Chris@16 233 tok = tr.get_charset_token(begin, end);
Chris@16 234 do
Chris@16 235 {
Chris@16 236 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
Chris@16 237
Chris@16 238 if(token_charset_hyphen == tok && have_prev)
Chris@16 239 {
Chris@16 240 // remember the current position
Chris@16 241 FwdIter iprev2 = begin;
Chris@16 242 have_prev = false;
Chris@16 243
Chris@16 244 // ch_prev is lower bound of a range
Chris@16 245 switch(tr.get_charset_token(begin, end))
Chris@16 246 {
Chris@16 247 case token_charset_hyphen:
Chris@16 248 case token_charset_invert:
Chris@16 249 begin = iprev2; // un-get these tokens and fall through
Chris@16 250 BOOST_FALLTHROUGH;
Chris@16 251 case token_literal:
Chris@16 252 ch_next = *begin++;
Chris@16 253 BOOST_XPR_ENSURE_(ch_prev <= ch_next, error_range, "invalid charset range");
Chris@16 254 chset.set_range(ch_prev, ch_next, rxtraits, icase);
Chris@16 255 continue;
Chris@16 256 case token_charset_backspace:
Chris@16 257 ch_next = char_type(8); // backspace
Chris@16 258 BOOST_XPR_ENSURE_(ch_prev <= ch_next, error_range, "invalid charset range");
Chris@16 259 chset.set_range(ch_prev, ch_next, rxtraits, icase);
Chris@16 260 continue;
Chris@16 261 case token_escape:
Chris@16 262 esc = parse_escape(begin, end, tr);
Chris@16 263 if(escape_char == esc.type_)
Chris@16 264 {
Chris@16 265 BOOST_XPR_ENSURE_(ch_prev <= esc.ch_, error_range, "invalid charset range");
Chris@16 266 chset.set_range(ch_prev, esc.ch_, rxtraits, icase);
Chris@16 267 continue;
Chris@16 268 }
Chris@16 269 BOOST_FALLTHROUGH;
Chris@16 270 case token_charset_end:
Chris@16 271 default: // not a range.
Chris@16 272 begin = iprev; // backup to hyphen token
Chris@16 273 chset.set_char(ch_prev, rxtraits, icase);
Chris@16 274 chset.set_char(*begin++, rxtraits, icase);
Chris@16 275 continue;
Chris@16 276 }
Chris@16 277 }
Chris@16 278
Chris@16 279 if(have_prev)
Chris@16 280 {
Chris@16 281 chset.set_char(ch_prev, rxtraits, icase);
Chris@16 282 have_prev = false;
Chris@16 283 }
Chris@16 284
Chris@16 285 switch(tok)
Chris@16 286 {
Chris@16 287 case token_charset_hyphen:
Chris@16 288 case token_charset_invert:
Chris@16 289 case token_charset_end:
Chris@16 290 case token_posix_charset_end:
Chris@16 291 begin = iprev; // un-get these tokens
Chris@16 292 ch_prev = *begin++;
Chris@16 293 have_prev = true;
Chris@16 294 continue;
Chris@16 295
Chris@16 296 case token_charset_backspace:
Chris@16 297 ch_prev = char_type(8); // backspace
Chris@16 298 have_prev = true;
Chris@16 299 continue;
Chris@16 300
Chris@16 301 case token_posix_charset_begin:
Chris@16 302 {
Chris@16 303 FwdIter tmp = begin, start = begin;
Chris@16 304 bool invert = (token_charset_invert == tr.get_charset_token(tmp, end));
Chris@16 305 if(invert)
Chris@16 306 {
Chris@16 307 begin = start = tmp;
Chris@16 308 }
Chris@16 309 while(token_literal == (tok = tr.get_charset_token(begin, end)))
Chris@16 310 {
Chris@16 311 tmp = ++begin;
Chris@16 312 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
Chris@16 313 }
Chris@16 314 if(token_posix_charset_end == tok)
Chris@16 315 {
Chris@16 316 char_class_type chclass = rxtraits.lookup_classname(start, tmp, icase);
Chris@16 317 BOOST_XPR_ENSURE_(0 != chclass, error_ctype, "unknown class name");
Chris@16 318 chset.set_class(chclass, invert);
Chris@16 319 continue;
Chris@16 320 }
Chris@16 321 begin = iprev; // un-get this token
Chris@16 322 ch_prev = *begin++;
Chris@16 323 have_prev = true;
Chris@16 324 }
Chris@16 325 continue;
Chris@16 326
Chris@16 327 case token_escape:
Chris@16 328 esc = parse_escape(begin, end, tr);
Chris@16 329 if(escape_char == esc.type_)
Chris@16 330 {
Chris@16 331 ch_prev = esc.ch_;
Chris@16 332 have_prev = true;
Chris@16 333 }
Chris@16 334 else if(escape_class == esc.type_)
Chris@16 335 {
Chris@16 336 char_class_type upper_ = lookup_classname(rxtraits, "upper");
Chris@16 337 BOOST_ASSERT(0 != upper_);
Chris@16 338 chset.set_class(esc.class_, rxtraits.isctype(*begin++, upper_));
Chris@16 339 }
Chris@16 340 else
Chris@16 341 {
Chris@16 342 BOOST_ASSERT(false);
Chris@16 343 }
Chris@16 344 continue;
Chris@16 345
Chris@16 346 default:
Chris@16 347 ch_prev = *begin++;
Chris@16 348 have_prev = true;
Chris@16 349 continue;
Chris@16 350 }
Chris@16 351 }
Chris@16 352 while(BOOST_XPR_ENSURE_((iprev = begin) != end, error_brack, "unexpected end of pattern found"),
Chris@16 353 token_charset_end != (tok = tr.get_charset_token(begin, end)));
Chris@16 354
Chris@16 355 if(have_prev)
Chris@16 356 {
Chris@16 357 chset.set_char(ch_prev, rxtraits, icase);
Chris@16 358 }
Chris@16 359
Chris@16 360 if(invert)
Chris@16 361 {
Chris@16 362 chset.inverse();
Chris@16 363 }
Chris@16 364 }
Chris@16 365
Chris@16 366 }}} // namespace boost::xpressive::detail
Chris@16 367
Chris@16 368 #endif