Chris@16: /* Chris@16: * Chris@16: * Copyright (c) 2004 Chris@16: * John Maddock Chris@16: * Chris@16: * Use, modification and distribution are subject to the Chris@16: * Boost Software License, Version 1.0. (See accompanying file Chris@16: * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Chris@16: * Chris@16: */ Chris@16: Chris@16: /* Chris@16: * LOCATION: see http://www.boost.org for most recent version. Chris@16: * FILE regex_traits_defaults.hpp Chris@16: * VERSION see Chris@16: * DESCRIPTION: Declares API's for access to regex_traits default properties. Chris@16: */ Chris@16: Chris@16: #ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED Chris@16: #define BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED Chris@16: Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable: 4103) Chris@16: #endif Chris@16: #ifdef BOOST_HAS_ABI_HEADERS Chris@16: # include BOOST_ABI_PREFIX Chris@16: #endif Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: Chris@16: #ifndef BOOST_REGEX_SYNTAX_TYPE_HPP Chris@16: #include Chris@16: #endif Chris@16: #ifndef BOOST_REGEX_ERROR_TYPE_HPP Chris@16: #include Chris@16: #endif Chris@16: Chris@16: #ifdef BOOST_NO_STDC_NAMESPACE Chris@16: namespace std{ Chris@16: using ::strlen; Chris@16: } Chris@16: #endif Chris@16: Chris@16: namespace boost{ namespace re_detail{ Chris@16: Chris@16: Chris@16: // Chris@16: // helpers to suppress warnings: Chris@16: // Chris@16: template Chris@16: inline bool is_extended(charT c) Chris@16: { return c > 256; } Chris@16: inline bool is_extended(char) Chris@16: { return false; } Chris@16: Chris@16: Chris@16: BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n); Chris@16: BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n); Chris@16: BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c); Chris@16: BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c); Chris@16: Chris@16: // is charT c a combining character? Chris@16: BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(uint_least16_t s); Chris@16: Chris@16: template Chris@16: inline bool is_combining(charT c) Chris@16: { Chris@16: return (c <= static_cast(0)) ? false : ((c >= static_cast((std::numeric_limits::max)())) ? false : is_combining_implementation(static_cast(c))); Chris@16: } Chris@16: template <> Chris@16: inline bool is_combining(char) Chris@16: { Chris@16: return false; Chris@16: } Chris@16: template <> Chris@16: inline bool is_combining(signed char) Chris@16: { Chris@16: return false; Chris@16: } Chris@16: template <> Chris@16: inline bool is_combining(unsigned char) Chris@16: { Chris@16: return false; Chris@16: } Chris@16: #if !defined(__hpux) && !defined(__WINSCW__) // can't use WCHAR_MAX/MIN in pp-directives Chris@16: #ifdef _MSC_VER Chris@16: template<> Chris@16: inline bool is_combining(wchar_t c) Chris@16: { Chris@16: return is_combining_implementation(static_cast(c)); Chris@16: } Chris@16: #elif !defined(__DECCXX) && !defined(__osf__) && !defined(__OSF__) && defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) Chris@16: #if defined(WCHAR_MAX) && (WCHAR_MAX <= USHRT_MAX) Chris@16: template<> Chris@16: inline bool is_combining(wchar_t c) Chris@16: { Chris@16: return is_combining_implementation(static_cast(c)); Chris@16: } Chris@16: #else Chris@16: template<> Chris@16: inline bool is_combining(wchar_t c) Chris@16: { Chris@16: return (c >= (std::numeric_limits::max)()) ? false : is_combining_implementation(static_cast(c)); Chris@16: } Chris@16: #endif Chris@16: #endif Chris@16: #endif Chris@16: Chris@16: // Chris@16: // is a charT c a line separator? Chris@16: // Chris@16: template Chris@16: inline bool is_separator(charT c) Chris@16: { Chris@16: return BOOST_REGEX_MAKE_BOOL( Chris@16: (c == static_cast('\n')) Chris@16: || (c == static_cast('\r')) Chris@16: || (c == static_cast('\f')) Chris@16: || (static_cast(c) == 0x2028u) Chris@16: || (static_cast(c) == 0x2029u) Chris@16: || (static_cast(c) == 0x85u)); Chris@16: } Chris@16: template <> Chris@16: inline bool is_separator(char c) Chris@16: { Chris@16: return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (c == '\f')); Chris@16: } Chris@16: Chris@16: // Chris@16: // get a default collating element: Chris@16: // Chris@16: BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name); Chris@16: Chris@16: // Chris@16: // get the state_id of a character clasification, the individual Chris@16: // traits classes then transform that state_id into a bitmask: Chris@16: // Chris@16: template Chris@16: struct character_pointer_range Chris@16: { Chris@16: const charT* p1; Chris@16: const charT* p2; Chris@16: Chris@16: bool operator < (const character_pointer_range& r)const Chris@16: { Chris@16: return std::lexicographical_compare(p1, p2, r.p1, r.p2); Chris@16: } Chris@16: bool operator == (const character_pointer_range& r)const Chris@16: { Chris@16: // Not only do we check that the ranges are of equal size before Chris@16: // calling std::equal, but there is no other algorithm available: Chris@16: // not even a non-standard MS one. So forward to unchecked_equal Chris@16: // in the MS case. Chris@16: return ((p2 - p1) == (r.p2 - r.p1)) && re_detail::equal(p1, p2, r.p1); Chris@16: } Chris@16: }; Chris@16: template Chris@16: int get_default_class_id(const charT* p1, const charT* p2) Chris@16: { Chris@16: static const charT data[73] = { Chris@16: 'a', 'l', 'n', 'u', 'm', Chris@16: 'a', 'l', 'p', 'h', 'a', Chris@16: 'b', 'l', 'a', 'n', 'k', Chris@16: 'c', 'n', 't', 'r', 'l', Chris@16: 'd', 'i', 'g', 'i', 't', Chris@16: 'g', 'r', 'a', 'p', 'h', Chris@16: 'l', 'o', 'w', 'e', 'r', Chris@16: 'p', 'r', 'i', 'n', 't', Chris@16: 'p', 'u', 'n', 'c', 't', Chris@16: 's', 'p', 'a', 'c', 'e', Chris@16: 'u', 'n', 'i', 'c', 'o', 'd', 'e', Chris@16: 'u', 'p', 'p', 'e', 'r', Chris@16: 'v', Chris@16: 'w', 'o', 'r', 'd', Chris@16: 'x', 'd', 'i', 'g', 'i', 't', Chris@16: }; Chris@16: Chris@16: static const character_pointer_range ranges[21] = Chris@16: { Chris@16: {data+0, data+5,}, // alnum Chris@16: {data+5, data+10,}, // alpha Chris@16: {data+10, data+15,}, // blank Chris@16: {data+15, data+20,}, // cntrl Chris@16: {data+20, data+21,}, // d Chris@16: {data+20, data+25,}, // digit Chris@16: {data+25, data+30,}, // graph Chris@16: {data+29, data+30,}, // h Chris@16: {data+30, data+31,}, // l Chris@16: {data+30, data+35,}, // lower Chris@16: {data+35, data+40,}, // print Chris@16: {data+40, data+45,}, // punct Chris@16: {data+45, data+46,}, // s Chris@16: {data+45, data+50,}, // space Chris@16: {data+57, data+58,}, // u Chris@16: {data+50, data+57,}, // unicode Chris@16: {data+57, data+62,}, // upper Chris@16: {data+62, data+63,}, // v Chris@16: {data+63, data+64,}, // w Chris@16: {data+63, data+67,}, // word Chris@16: {data+67, data+73,}, // xdigit Chris@16: }; Chris@16: static const character_pointer_range* ranges_begin = ranges; Chris@16: static const character_pointer_range* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0])); Chris@16: Chris@16: character_pointer_range t = { p1, p2, }; Chris@16: const character_pointer_range* p = std::lower_bound(ranges_begin, ranges_end, t); Chris@16: if((p != ranges_end) && (t == *p)) Chris@16: return static_cast(p - ranges); Chris@16: return -1; Chris@16: } Chris@16: Chris@16: // Chris@16: // helper functions: Chris@16: // Chris@16: template Chris@16: std::ptrdiff_t global_length(const charT* p) Chris@16: { Chris@16: std::ptrdiff_t n = 0; Chris@16: while(*p) Chris@16: { Chris@16: ++p; Chris@16: ++n; Chris@16: } Chris@16: return n; Chris@16: } Chris@16: template<> Chris@16: inline std::ptrdiff_t global_length(const char* p) Chris@16: { Chris@16: return (std::strlen)(p); Chris@16: } Chris@16: #ifndef BOOST_NO_WREGEX Chris@16: template<> Chris@16: inline std::ptrdiff_t global_length(const wchar_t* p) Chris@16: { Chris@16: return (std::wcslen)(p); Chris@16: } Chris@16: #endif Chris@16: template Chris@16: inline charT BOOST_REGEX_CALL global_lower(charT c) Chris@16: { Chris@16: return c; Chris@16: } Chris@16: template Chris@16: inline charT BOOST_REGEX_CALL global_upper(charT c) Chris@16: { Chris@16: return c; Chris@16: } Chris@16: Chris@16: BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c); Chris@16: BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c); Chris@16: #ifndef BOOST_NO_WREGEX Chris@16: BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c); Chris@16: BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c); Chris@16: #endif Chris@16: #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T Chris@16: BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c); Chris@16: BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c); Chris@16: #endif Chris@16: // Chris@16: // This sucks: declare template specialisations of global_lower/global_upper Chris@16: // that just forward to the non-template implementation functions. We do Chris@16: // this because there is one compiler (Compaq Tru64 C++) that doesn't seem Chris@16: // to differentiate between templates and non-template overloads.... Chris@16: // what's more, the primary template, plus all overloads have to be Chris@16: // defined in the same translation unit (if one is inline they all must be) Chris@16: // otherwise the "local template instantiation" compiler option can pick Chris@16: // the wrong instantiation when linking: Chris@16: // Chris@16: template<> inline char BOOST_REGEX_CALL global_lower(char c){ return do_global_lower(c); } Chris@16: template<> inline char BOOST_REGEX_CALL global_upper(char c){ return do_global_upper(c); } Chris@16: #ifndef BOOST_NO_WREGEX Chris@16: template<> inline wchar_t BOOST_REGEX_CALL global_lower(wchar_t c){ return do_global_lower(c); } Chris@16: template<> inline wchar_t BOOST_REGEX_CALL global_upper(wchar_t c){ return do_global_upper(c); } Chris@16: #endif Chris@16: #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T Chris@16: template<> inline unsigned short BOOST_REGEX_CALL global_lower(unsigned short c){ return do_global_lower(c); } Chris@16: template<> inline unsigned short BOOST_REGEX_CALL global_upper(unsigned short c){ return do_global_upper(c); } Chris@16: #endif Chris@16: Chris@16: template Chris@16: int global_value(charT c) Chris@16: { Chris@16: static const charT zero = '0'; Chris@16: static const charT nine = '9'; Chris@16: static const charT a = 'a'; Chris@16: static const charT f = 'f'; Chris@16: static const charT A = 'A'; Chris@16: static const charT F = 'F'; Chris@16: Chris@16: if(c > f) return -1; Chris@16: if(c >= a) return 10 + (c - a); Chris@16: if(c > F) return -1; Chris@16: if(c >= A) return 10 + (c - A); Chris@16: if(c > nine) return -1; Chris@16: if(c >= zero) return c - zero; Chris@16: return -1; Chris@16: } Chris@16: template Chris@16: int global_toi(const charT*& p1, const charT* p2, int radix, const traits& t) Chris@16: { Chris@16: (void)t; // warning suppression Chris@16: int next_value = t.value(*p1, radix); Chris@16: if((p1 == p2) || (next_value < 0) || (next_value >= radix)) Chris@16: return -1; Chris@16: int result = 0; Chris@16: while(p1 != p2) Chris@16: { Chris@16: next_value = t.value(*p1, radix); Chris@16: if((next_value < 0) || (next_value >= radix)) Chris@16: break; Chris@16: result *= radix; Chris@16: result += next_value; Chris@16: ++p1; Chris@16: } Chris@16: return result; Chris@16: } Chris@16: Chris@16: template Chris@16: inline const charT* get_escape_R_string() Chris@16: { Chris@16: #ifdef BOOST_MSVC Chris@16: # pragma warning(push) Chris@16: # pragma warning(disable:4309 4245) Chris@16: #endif Chris@16: static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?', Chris@16: '|', '[', '\x0A', '\x0B', '\x0C', static_cast('\x85'), '\\', 'x', '{', '2', '0', '2', '8', '}', Chris@16: '\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' }; Chris@16: static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', Chris@16: '|', '[', '\x0A', '\x0B', '\x0C', static_cast('\x85'), ']', ')', '\0' }; Chris@16: Chris@16: charT c = static_cast(0x2029u); Chris@16: bool b = (static_cast(c) == 0x2029u); Chris@16: Chris@16: return (b ? e1 : e2); Chris@16: #ifdef BOOST_MSVC Chris@16: # pragma warning(pop) Chris@16: #endif Chris@16: } Chris@16: Chris@16: template <> Chris@16: inline const char* get_escape_R_string() Chris@16: { Chris@16: #ifdef BOOST_MSVC Chris@16: # pragma warning(push) Chris@16: # pragma warning(disable:4309) Chris@16: #endif Chris@16: static const char e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', Chris@16: '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' }; Chris@16: return e2; Chris@16: #ifdef BOOST_MSVC Chris@16: # pragma warning(pop) Chris@16: #endif Chris@16: } Chris@16: Chris@16: } // re_detail Chris@16: } // boost Chris@16: Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(push) Chris@16: #pragma warning(disable: 4103) Chris@16: #endif Chris@16: #ifdef BOOST_HAS_ABI_HEADERS Chris@16: # include BOOST_ABI_SUFFIX Chris@16: #endif Chris@16: #ifdef BOOST_MSVC Chris@16: #pragma warning(pop) Chris@16: #endif Chris@16: Chris@16: #endif