vamp-build-and-test: DEPENDENCIES/generic/include/boost/xpressive/detail/dynamic/parse

annotate DEPENDENCIES/generic/include/boost/xpressive/detail/dynamic/parse_charset.hpp @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)

author	Chris Cannam
date	Tue, 30 Jul 2019 12:25:44 +0100
parents	c530137014c0
children

rev	line source
Chris@16	1 ///////////////////////////////////////////////////////////////////////////////
Chris@16	2 // parse_charset.hpp
Chris@16	3 //
Chris@16	4 // Copyright 2008 Eric Niebler. Distributed under the Boost
Chris@16	5 // Software License, Version 1.0. (See accompanying file
Chris@16	6 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16	7
Chris@16	8 #ifndef BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSE_CHARSET_HPP_EAN_10_04_2005
Chris@16	9 #define BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSE_CHARSET_HPP_EAN_10_04_2005
Chris@16	10
Chris@16	11 // MS compatible compilers support #pragma once
Chris@101	12 #if defined(_MSC_VER)
Chris@16	13 # pragma once
Chris@16	14 #endif
Chris@16	15
Chris@16	16 #include <boost/config.hpp>
Chris@16	17 #include <boost/integer.hpp>
Chris@16	18 #include <boost/mpl/bool.hpp>
Chris@16	19 #include <boost/throw_exception.hpp>
Chris@16	20 #include <boost/numeric/conversion/converter.hpp>
Chris@16	21 #include <boost/xpressive/detail/detail_fwd.hpp>
Chris@16	22 #include <boost/xpressive/detail/dynamic/parser_enum.hpp>
Chris@16	23 #include <boost/xpressive/detail/utility/literals.hpp>
Chris@16	24 #include <boost/xpressive/detail/utility/chset/chset.hpp>
Chris@16	25 #include <boost/xpressive/regex_constants.hpp>
Chris@16	26
Chris@16	27 namespace boost { namespace xpressive { namespace detail
Chris@16	28 {
Chris@16	29
Chris@16	30 enum escape_type
Chris@16	31 {
Chris@16	32 escape_char
Chris@16	33 , escape_mark
Chris@16	34 , escape_class
Chris@16	35 };
Chris@16	36
Chris@16	37 ///////////////////////////////////////////////////////////////////////////////
Chris@16	38 // escape_value
Chris@16	39 //
Chris@16	40 template<typename Char, typename Class>
Chris@16	41 struct escape_value
Chris@16	42 {
Chris@16	43 Char ch_;
Chris@16	44 int mark_nbr_;
Chris@16	45 Class class_;
Chris@16	46 escape_type type_;
Chris@16	47 };
Chris@16	48
Chris@16	49 ///////////////////////////////////////////////////////////////////////////////
Chris@16	50 // char_overflow_handler
Chris@16	51 //
Chris@16	52 struct char_overflow_handler
Chris@16	53 {
Chris@16	54 void operator ()(numeric::range_check_result result) const // throw(regex_error)
Chris@16	55 {
Chris@16	56 if(numeric::cInRange != result)
Chris@16	57 {
Chris@16	58 BOOST_THROW_EXCEPTION(
Chris@16	59 regex_error(
Chris@16	60 regex_constants::error_escape
Chris@16	61 , "character escape too large to fit in target character type"
Chris@16	62 )
Chris@16	63 );
Chris@16	64 }
Chris@16	65 }
Chris@16	66 };
Chris@16	67
Chris@16	68 ///////////////////////////////////////////////////////////////////////////////
Chris@16	69 // parse_escape
Chris@16	70 //
Chris@16	71 template<typename FwdIter, typename CompilerTraits>
Chris@16	72 escape_value<typename iterator_value<FwdIter>::type, typename CompilerTraits::regex_traits::char_class_type>
Chris@16	73 parse_escape(FwdIter &begin, FwdIter end, CompilerTraits &tr)
Chris@16	74 {
Chris@16	75 using namespace regex_constants;
Chris@16	76 typedef typename iterator_value<FwdIter>::type char_type;
Chris@16	77 typedef typename CompilerTraits::regex_traits regex_traits;
Chris@16	78 typedef typename regex_traits::char_class_type char_class_type;
Chris@16	79
Chris@16	80 // define an unsigned type the same size as char_type
Chris@16	81 typedef typename boost::uint_t<CHAR_BIT * sizeof(char_type)>::least uchar_t;
Chris@16	82 BOOST_MPL_ASSERT_RELATION(sizeof(uchar_t), ==, sizeof(char_type));
Chris@16	83 typedef numeric::conversion_traits<uchar_t, int> converstion_traits;
Chris@16	84
Chris@16	85 BOOST_XPR_ENSURE_(begin != end, error_escape, "unexpected end of pattern found");
Chris@16	86 numeric::converter<int, uchar_t, converstion_traits, char_overflow_handler> converter;
Chris@16	87 escape_value<char_type,char_class_type> esc = { 0, 0, 0, escape_char };
Chris@16	88 bool const icase = (0 != (regex_constants::icase_ & tr.flags()));
Chris@16	89 regex_traits const &rxtraits = tr.traits();
Chris@16	90 FwdIter tmp;
Chris@16	91
Chris@16	92 esc.class_ = rxtraits.lookup_classname(begin, begin + 1, icase);
Chris@16	93 if(0 != esc.class_)
Chris@16	94 {
Chris@16	95 esc.type_ = escape_class;
Chris@16	96 return esc;
Chris@16	97 }
Chris@16	98
Chris@16	99 if(-1 != rxtraits.value(*begin, 8))
Chris@16	100 {
Chris@16	101 esc.ch_ = converter(toi(begin, end, rxtraits, 8, 0777));
Chris@16	102 return esc;
Chris@16	103 }
Chris@16	104
Chris@16	105 switch(*begin)
Chris@16	106 {
Chris@16	107 // bell character
Chris@16	108 case BOOST_XPR_CHAR_(char_type, 'a'):
Chris@16	109 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\a');
Chris@16	110 ++begin;
Chris@16	111 break;
Chris@16	112 // escape character
Chris@16	113 case BOOST_XPR_CHAR_(char_type, 'e'):
Chris@16	114 esc.ch_ = converter(27);
Chris@16	115 ++begin;
Chris@16	116 break;
Chris@16	117 // control character
Chris@16	118 case BOOST_XPR_CHAR_(char_type, 'c'):
Chris@16	119 BOOST_XPR_ENSURE_(++begin != end, error_escape, "unexpected end of pattern found");
Chris@16	120 BOOST_XPR_ENSURE_
Chris@16	121 (
Chris@16	122 rxtraits.in_range(BOOST_XPR_CHAR_(char_type, 'a'), BOOST_XPR_CHAR_(char_type, 'z'), *begin)
Chris@16	123 \|\| rxtraits.in_range(BOOST_XPR_CHAR_(char_type, 'A'), BOOST_XPR_CHAR_(char_type, 'Z'), *begin)
Chris@16	124 , error_escape
Chris@16	125 , "invalid escape control letter; must be one of a-z or A-Z"
Chris@16	126 );
Chris@16	127 // Convert to character according to ECMA-262, section 15.10.2.10:
Chris@16	128 esc.ch_ = converter(*begin % 32);
Chris@16	129 ++begin;
Chris@16	130 break;
Chris@16	131 // formfeed character
Chris@16	132 case BOOST_XPR_CHAR_(char_type, 'f'):
Chris@16	133 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\f');
Chris@16	134 ++begin;
Chris@16	135 break;
Chris@16	136 // newline
Chris@16	137 case BOOST_XPR_CHAR_(char_type, 'n'):
Chris@16	138 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\n');
Chris@16	139 ++begin;
Chris@16	140 break;
Chris@16	141 // return
Chris@16	142 case BOOST_XPR_CHAR_(char_type, 'r'):
Chris@16	143 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\r');
Chris@16	144 ++begin;
Chris@16	145 break;
Chris@16	146 // horizontal tab
Chris@16	147 case BOOST_XPR_CHAR_(char_type, 't'):
Chris@16	148 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\t');
Chris@16	149 ++begin;
Chris@16	150 break;
Chris@16	151 // vertical tab
Chris@16	152 case BOOST_XPR_CHAR_(char_type, 'v'):
Chris@16	153 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\v');
Chris@16	154 ++begin;
Chris@16	155 break;
Chris@16	156 // hex escape sequence
Chris@16	157 case BOOST_XPR_CHAR_(char_type, 'x'):
Chris@16	158 BOOST_XPR_ENSURE_(++begin != end, error_escape, "unexpected end of pattern found");
Chris@16	159 tmp = begin;
Chris@16	160 esc.ch_ = converter(toi(begin, end, rxtraits, 16, 0xff));
Chris@16	161 BOOST_XPR_ENSURE_(2 == std::distance(tmp, begin), error_escape, "invalid hex escape : "
Chris@16	162 "must be \\x HexDigit HexDigit");
Chris@16	163 break;
Chris@16	164 // Unicode escape sequence
Chris@16	165 case BOOST_XPR_CHAR_(char_type, 'u'):
Chris@16	166 BOOST_XPR_ENSURE_(++begin != end, error_escape, "unexpected end of pattern found");
Chris@16	167 tmp = begin;
Chris@16	168 esc.ch_ = converter(toi(begin, end, rxtraits, 16, 0xffff));
Chris@16	169 BOOST_XPR_ENSURE_(4 == std::distance(tmp, begin), error_escape, "invalid Unicode escape : "
Chris@16	170 "must be \\u HexDigit HexDigit HexDigit HexDigit");
Chris@16	171 break;
Chris@16	172 // backslash
Chris@16	173 case BOOST_XPR_CHAR_(char_type, '\\'):
Chris@16	174 //esc.ch_ = BOOST_XPR_CHAR_(char_type, '\\');
Chris@16	175 //++begin;
Chris@16	176 //break;
Chris@16	177 // all other escaped characters represent themselves
Chris@16	178 default:
Chris@16	179 esc.ch_ = *begin;
Chris@16	180 ++begin;
Chris@16	181 break;
Chris@16	182 }
Chris@16	183
Chris@16	184 return esc;
Chris@16	185 }
Chris@16	186
Chris@16	187 //////////////////////////////////////////////////////////////////////////
Chris@16	188 // parse_charset
Chris@16	189 //
Chris@16	190 template<typename FwdIter, typename RegexTraits, typename CompilerTraits>
Chris@16	191 inline void parse_charset
Chris@16	192 (
Chris@16	193 FwdIter &begin
Chris@16	194 , FwdIter end
Chris@16	195 , compound_charset<RegexTraits> &chset
Chris@16	196 , CompilerTraits &tr
Chris@16	197 )
Chris@16	198 {
Chris@16	199 using namespace regex_constants;
Chris@16	200 typedef typename RegexTraits::char_type char_type;
Chris@16	201 typedef typename RegexTraits::char_class_type char_class_type;
Chris@16	202 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
Chris@16	203 RegexTraits const &rxtraits = tr.traits();
Chris@16	204 bool const icase = (0 != (regex_constants::icase_ & tr.flags()));
Chris@16	205 FwdIter iprev = FwdIter();
Chris@16	206 escape_value<char_type, char_class_type> esc = {0, 0, 0, escape_char};
Chris@16	207 bool invert = false;
Chris@16	208
Chris@16	209 // check to see if we have an inverse charset
Chris@16	210 if(begin != end && token_charset_invert == tr.get_charset_token(iprev = begin, end))
Chris@16	211 {
Chris@16	212 begin = iprev;
Chris@16	213 invert = true;
Chris@16	214 }
Chris@16	215
Chris@16	216 // skip the end token if-and-only-if it is the first token in the charset
Chris@16	217 if(begin != end && token_charset_end == tr.get_charset_token(iprev = begin, end))
Chris@16	218 {
Chris@16	219 for(; begin != iprev; ++begin)
Chris@16	220 {
Chris@16	221 chset.set_char(*begin, rxtraits, icase);
Chris@16	222 }
Chris@16	223 }
Chris@16	224
Chris@16	225 compiler_token_type tok;
Chris@16	226 char_type ch_prev = char_type(), ch_next = char_type();
Chris@16	227 bool have_prev = false;
Chris@16	228
Chris@16	229 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
Chris@16	230
Chris@16	231 // remember the current position and grab the next token
Chris@16	232 iprev = begin;
Chris@16	233 tok = tr.get_charset_token(begin, end);
Chris@16	234 do
Chris@16	235 {
Chris@16	236 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
Chris@16	237
Chris@16	238 if(token_charset_hyphen == tok && have_prev)
Chris@16	239 {
Chris@16	240 // remember the current position
Chris@16	241 FwdIter iprev2 = begin;
Chris@16	242 have_prev = false;
Chris@16	243
Chris@16	244 // ch_prev is lower bound of a range
Chris@16	245 switch(tr.get_charset_token(begin, end))
Chris@16	246 {
Chris@16	247 case token_charset_hyphen:
Chris@16	248 case token_charset_invert:
Chris@16	249 begin = iprev2; // un-get these tokens and fall through
Chris@16	250 BOOST_FALLTHROUGH;
Chris@16	251 case token_literal:
Chris@16	252 ch_next = *begin++;
Chris@16	253 BOOST_XPR_ENSURE_(ch_prev <= ch_next, error_range, "invalid charset range");
Chris@16	254 chset.set_range(ch_prev, ch_next, rxtraits, icase);
Chris@16	255 continue;
Chris@16	256 case token_charset_backspace:
Chris@16	257 ch_next = char_type(8); // backspace
Chris@16	258 BOOST_XPR_ENSURE_(ch_prev <= ch_next, error_range, "invalid charset range");
Chris@16	259 chset.set_range(ch_prev, ch_next, rxtraits, icase);
Chris@16	260 continue;
Chris@16	261 case token_escape:
Chris@16	262 esc = parse_escape(begin, end, tr);
Chris@16	263 if(escape_char == esc.type_)
Chris@16	264 {
Chris@16	265 BOOST_XPR_ENSURE_(ch_prev <= esc.ch_, error_range, "invalid charset range");
Chris@16	266 chset.set_range(ch_prev, esc.ch_, rxtraits, icase);
Chris@16	267 continue;
Chris@16	268 }
Chris@16	269 BOOST_FALLTHROUGH;
Chris@16	270 case token_charset_end:
Chris@16	271 default: // not a range.
Chris@16	272 begin = iprev; // backup to hyphen token
Chris@16	273 chset.set_char(ch_prev, rxtraits, icase);
Chris@16	274 chset.set_char(*begin++, rxtraits, icase);
Chris@16	275 continue;
Chris@16	276 }
Chris@16	277 }
Chris@16	278
Chris@16	279 if(have_prev)
Chris@16	280 {
Chris@16	281 chset.set_char(ch_prev, rxtraits, icase);
Chris@16	282 have_prev = false;
Chris@16	283 }
Chris@16	284
Chris@16	285 switch(tok)
Chris@16	286 {
Chris@16	287 case token_charset_hyphen:
Chris@16	288 case token_charset_invert:
Chris@16	289 case token_charset_end:
Chris@16	290 case token_posix_charset_end:
Chris@16	291 begin = iprev; // un-get these tokens
Chris@16	292 ch_prev = *begin++;
Chris@16	293 have_prev = true;
Chris@16	294 continue;
Chris@16	295
Chris@16	296 case token_charset_backspace:
Chris@16	297 ch_prev = char_type(8); // backspace
Chris@16	298 have_prev = true;
Chris@16	299 continue;
Chris@16	300
Chris@16	301 case token_posix_charset_begin:
Chris@16	302 {
Chris@16	303 FwdIter tmp = begin, start = begin;
Chris@16	304 bool invert = (token_charset_invert == tr.get_charset_token(tmp, end));
Chris@16	305 if(invert)
Chris@16	306 {
Chris@16	307 begin = start = tmp;
Chris@16	308 }
Chris@16	309 while(token_literal == (tok = tr.get_charset_token(begin, end)))
Chris@16	310 {
Chris@16	311 tmp = ++begin;
Chris@16	312 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
Chris@16	313 }
Chris@16	314 if(token_posix_charset_end == tok)
Chris@16	315 {
Chris@16	316 char_class_type chclass = rxtraits.lookup_classname(start, tmp, icase);
Chris@16	317 BOOST_XPR_ENSURE_(0 != chclass, error_ctype, "unknown class name");
Chris@16	318 chset.set_class(chclass, invert);
Chris@16	319 continue;
Chris@16	320 }
Chris@16	321 begin = iprev; // un-get this token
Chris@16	322 ch_prev = *begin++;
Chris@16	323 have_prev = true;
Chris@16	324 }
Chris@16	325 continue;
Chris@16	326
Chris@16	327 case token_escape:
Chris@16	328 esc = parse_escape(begin, end, tr);
Chris@16	329 if(escape_char == esc.type_)
Chris@16	330 {
Chris@16	331 ch_prev = esc.ch_;
Chris@16	332 have_prev = true;
Chris@16	333 }
Chris@16	334 else if(escape_class == esc.type_)
Chris@16	335 {
Chris@16	336 char_class_type upper_ = lookup_classname(rxtraits, "upper");
Chris@16	337 BOOST_ASSERT(0 != upper_);
Chris@16	338 chset.set_class(esc.class_, rxtraits.isctype(*begin++, upper_));
Chris@16	339 }
Chris@16	340 else
Chris@16	341 {
Chris@16	342 BOOST_ASSERT(false);
Chris@16	343 }
Chris@16	344 continue;
Chris@16	345
Chris@16	346 default:
Chris@16	347 ch_prev = *begin++;
Chris@16	348 have_prev = true;
Chris@16	349 continue;
Chris@16	350 }
Chris@16	351 }
Chris@16	352 while(BOOST_XPR_ENSURE_((iprev = begin) != end, error_brack, "unexpected end of pattern found"),
Chris@16	353 token_charset_end != (tok = tr.get_charset_token(begin, end)));
Chris@16	354
Chris@16	355 if(have_prev)
Chris@16	356 {
Chris@16	357 chset.set_char(ch_prev, rxtraits, icase);
Chris@16	358 }
Chris@16	359
Chris@16	360 if(invert)
Chris@16	361 {
Chris@16	362 chset.inverse();
Chris@16	363 }
Chris@16	364 }
Chris@16	365
Chris@16	366 }}} // namespace boost::xpressive::detail
Chris@16	367
Chris@16	368 #endif

Mercurial > hg > vamp-build-and-test

annotate DEPENDENCIES/generic/include/boost/xpressive/detail/dynamic/parse_charset.hpp @ 133:4acb5d8d80b6 tip