Chris@16: #ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__ Chris@16: #define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__ Chris@16: Chris@16: /* Copyright (c) 2004-2005 CrystalClear Software, Inc. Chris@16: * Use, modification and distribution is subject to the Chris@16: * Boost Software License, Version 1.0. (See accompanying Chris@16: * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) Chris@16: * Author: Jeff Garland, Bart Garst Chris@101: * $Date$ Chris@16: */ Chris@16: Chris@16: Chris@16: #include "boost/lexical_cast.hpp" //error without? Chris@16: #include "boost/algorithm/string/case_conv.hpp" Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: Chris@16: namespace boost { namespace date_time { Chris@16: Chris@16: Chris@16: template Chris@16: struct parse_match_result Chris@16: { Chris@16: parse_match_result() : Chris@16: match_depth(0), Chris@16: current_match(-1)// -1 is match_not-found value Chris@16: {} Chris@16: typedef std::basic_string string_type; Chris@16: string_type remaining() const Chris@16: { Chris@16: if (match_depth == cache.size()) { Chris@16: return string_type(); Chris@16: } Chris@16: if (current_match == -1) { Chris@16: return cache; Chris@16: } Chris@16: //some of the cache was used return the rest Chris@16: return string_type(cache, match_depth); Chris@16: } Chris@16: charT last_char() const Chris@16: { Chris@16: return cache[cache.size()-1]; Chris@16: } Chris@16: //! Returns true if more characters were parsed than was necessary Chris@16: /*! Should be used in conjunction with last_char() Chris@16: * to get the remaining character. Chris@16: */ Chris@16: bool has_remaining() const Chris@16: { Chris@16: return (cache.size() > match_depth); Chris@16: } Chris@16: Chris@16: // cache will hold characters that have been read from the stream Chris@16: string_type cache; Chris@16: unsigned short match_depth; Chris@16: short current_match; Chris@16: enum PARSE_STATE { PARSE_ERROR= -1 }; Chris@16: }; Chris@16: Chris@16: //for debug -- really only char streams... Chris@16: template Chris@16: std::basic_ostream& Chris@16: operator<<(std::basic_ostream& os, parse_match_result& mr) Chris@16: { Chris@16: os << "cm: " << mr.current_match Chris@16: << " C: '" << mr.cache Chris@16: << "' md: " << mr.match_depth Chris@16: << " R: " << mr.remaining(); Chris@16: return os; Chris@16: } Chris@16: Chris@16: Chris@16: Chris@16: //! Recursive data structure to allow efficient parsing of various strings Chris@16: /*! This class provides a quick lookup by building what amounts to a Chris@16: * tree data structure. It also features a match function which can Chris@16: * can handle nasty input interators by caching values as it recurses Chris@16: * the tree so that it can backtrack as needed. Chris@16: */ Chris@16: template Chris@16: struct string_parse_tree Chris@16: { Chris@16: #if BOOST_WORKAROUND( __BORLANDC__, BOOST_TESTED_AT(0x581) ) Chris@16: typedef std::multimap > ptree_coll; Chris@16: #else Chris@16: typedef std::multimap ptree_coll; Chris@16: #endif Chris@16: typedef typename ptree_coll::value_type value_type; Chris@16: typedef typename ptree_coll::iterator iterator; Chris@16: typedef typename ptree_coll::const_iterator const_iterator; Chris@16: typedef std::basic_string string_type; Chris@16: typedef std::vector > collection_type; Chris@16: typedef parse_match_result parse_match_result_type; Chris@16: Chris@16: /*! Parameter "starting_point" designates where the numbering begins. Chris@16: * A starting_point of zero will start the numbering at zero Chris@16: * (Sun=0, Mon=1, ...) were a starting_point of one starts the Chris@16: * numbering at one (Jan=1, Feb=2, ...). The default is zero, Chris@16: * negative vaules are not allowed */ Chris@16: string_parse_tree(collection_type names, unsigned int starting_point=0) Chris@16: { Chris@16: // iterate thru all the elements and build the tree Chris@16: unsigned short index = 0; Chris@16: while (index != names.size() ) { Chris@16: string_type s = boost::algorithm::to_lower_copy(names[index]); Chris@16: insert(s, static_cast(index + starting_point)); Chris@16: index++; Chris@16: } Chris@16: //set the last tree node = index+1 indicating a value Chris@16: index++; Chris@16: } Chris@16: Chris@16: Chris@16: string_parse_tree(short value = -1) : Chris@16: m_value(value) Chris@16: {} Chris@16: ptree_coll m_next_chars; Chris@16: short m_value; Chris@16: Chris@16: void insert(const string_type& s, unsigned short value) Chris@16: { Chris@16: unsigned int i = 0; Chris@16: iterator ti; Chris@16: while(i < s.size()) { Chris@16: if (i==0) { Chris@16: if (i == (s.size()-1)) { Chris@16: ti = m_next_chars.insert(value_type(s[i], Chris@16: string_parse_tree(value))); Chris@16: } Chris@16: else { Chris@16: ti = m_next_chars.insert(value_type(s[i], Chris@16: string_parse_tree())); Chris@16: } Chris@16: } Chris@16: else { Chris@16: if (i == (s.size()-1)) { Chris@16: ti = ti->second.m_next_chars.insert(value_type(s[i], Chris@16: string_parse_tree(value))); Chris@16: } Chris@16: Chris@16: else { Chris@16: ti = ti->second.m_next_chars.insert(value_type(s[i], Chris@16: string_parse_tree())); Chris@16: } Chris@16: Chris@16: } Chris@16: i++; Chris@16: } Chris@16: } Chris@16: Chris@16: Chris@16: //! Recursive function that finds a matching string in the tree. Chris@16: /*! Must check match_results::has_remaining() after match() is Chris@16: * called. This is required so the user can determine if Chris@16: * stream iterator is already pointing to the expected Chris@16: * character or not (match() might advance sitr to next char in stream). Chris@16: * Chris@16: * A parse_match_result that has been returned from a failed match Chris@16: * attempt can be sent in to the match function of a different Chris@16: * string_parse_tree to attempt a match there. Use the iterators Chris@16: * for the partially consumed stream, the parse_match_result object, Chris@16: * and '0' for the level parameter. */ Chris@16: short Chris@16: match(std::istreambuf_iterator& sitr, Chris@16: std::istreambuf_iterator& stream_end, Chris@16: parse_match_result_type& result, Chris@16: unsigned int& level) const Chris@16: { Chris@16: Chris@16: level++; Chris@16: charT c; Chris@16: // if we conditionally advance sitr, we won't have Chris@16: // to consume the next character past the input Chris@16: bool adv_itr = true; Chris@16: if (level > result.cache.size()) { Chris@16: if (sitr == stream_end) return 0; //bail - input exhausted Chris@16: c = static_cast(std::tolower(*sitr)); Chris@16: //result.cache += c; Chris@16: //sitr++; Chris@16: } Chris@16: else { Chris@16: // if we're looking for characters from the cache, Chris@16: // we don't want to increment sitr Chris@16: adv_itr = false; Chris@16: c = static_cast(std::tolower(result.cache[level-1])); Chris@16: } Chris@16: const_iterator litr = m_next_chars.lower_bound(c); Chris@16: const_iterator uitr = m_next_chars.upper_bound(c); Chris@16: while (litr != uitr) { // equal if not found Chris@16: if(adv_itr) { Chris@16: sitr++; Chris@16: result.cache += c; Chris@16: } Chris@16: if (litr->second.m_value != -1) { // -1 is default value Chris@16: if (result.match_depth < level) { Chris@16: result.current_match = litr->second.m_value; Chris@16: result.match_depth = static_cast(level); Chris@16: } Chris@16: litr->second.match(sitr, stream_end, Chris@16: result, level); Chris@16: level--; Chris@16: } Chris@16: else { Chris@16: litr->second.match(sitr, stream_end, Chris@16: result, level); Chris@16: level--; Chris@16: } Chris@16: Chris@16: if(level <= result.cache.size()) { Chris@16: adv_itr = false; Chris@16: } Chris@16: Chris@16: litr++; Chris@16: } Chris@16: return result.current_match; Chris@16: Chris@16: } Chris@16: Chris@16: /*! Must check match_results::has_remaining() after match() is Chris@16: * called. This is required so the user can determine if Chris@16: * stream iterator is already pointing to the expected Chris@16: * character or not (match() might advance sitr to next char in stream). Chris@16: */ Chris@16: parse_match_result_type Chris@16: match(std::istreambuf_iterator& sitr, Chris@16: std::istreambuf_iterator& stream_end) const Chris@16: { Chris@16: // lookup to_lower of char in tree. Chris@16: unsigned int level = 0; Chris@16: // string_type cache; Chris@16: parse_match_result_type result; Chris@16: match(sitr, stream_end, result, level); Chris@16: return result; Chris@16: } Chris@16: Chris@16: void printme(std::ostream& os, int& level) Chris@16: { Chris@16: level++; Chris@16: iterator itr = m_next_chars.begin(); Chris@16: iterator end = m_next_chars.end(); Chris@16: // os << "starting level: " << level << std::endl; Chris@16: while (itr != end) { Chris@16: os << "level: " << level Chris@16: << " node: " << itr->first Chris@16: << " value: " << itr->second.m_value Chris@16: << std::endl; Chris@16: itr->second.printme(os, level); Chris@16: itr++; Chris@16: } Chris@16: level--; Chris@16: } Chris@16: Chris@16: void print(std::ostream& os) Chris@16: { Chris@16: int level = 0; Chris@16: printme(os, level); Chris@16: } Chris@16: Chris@16: void printmatch(std::ostream& os, charT c) Chris@16: { Chris@16: iterator litr = m_next_chars.lower_bound(c); Chris@16: iterator uitr = m_next_chars.upper_bound(c); Chris@16: os << "matches for: " << c << std::endl; Chris@16: while (litr != uitr) { Chris@16: os << " node: " << litr->first Chris@16: << " value: " << litr->second.m_value Chris@16: << std::endl; Chris@16: litr++; Chris@16: } Chris@16: } Chris@16: Chris@16: }; Chris@16: Chris@16: Chris@16: } } //namespace Chris@16: #endif