Chris@16: /*============================================================================= Chris@16: Copyright (c) 2001-2003 Joel de Guzman Chris@16: http://spirit.sourceforge.net/ Chris@16: Chris@16: Use, modification and distribution is subject to the Boost Software Chris@16: License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at Chris@16: http://www.boost.org/LICENSE_1_0.txt) Chris@16: =============================================================================*/ Chris@16: #ifndef BOOST_SPIRIT_TST_IPP Chris@16: #define BOOST_SPIRIT_TST_IPP Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: #include // for std::auto_ptr Chris@16: #include Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: namespace boost { namespace spirit { Chris@16: Chris@16: BOOST_SPIRIT_CLASSIC_NAMESPACE_BEGIN Chris@16: Chris@16: namespace impl Chris@16: { Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // Chris@16: // tst class Chris@16: // Chris@16: // Ternary Search Tree implementation. The data structure is faster than Chris@16: // hashing for many typical search problems especially when the search Chris@16: // interface is iterator based. Searching for a string of length k in a Chris@16: // ternary search tree with n strings will require at most O(log n+k) Chris@16: // character comparisons. TSTs are many times faster than hash tables Chris@16: // for unsuccessful searches since mismatches are discovered earlier Chris@16: // after examining only a few characters. Hash tables always examine an Chris@16: // entire key when searching. Chris@16: // Chris@16: // For details see http://www.cs.princeton.edu/~rs/strings/. Chris@16: // Chris@16: // *** This is a low level class and is Chris@16: // not meant for public consumption *** Chris@16: // Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: template Chris@16: struct tst_node Chris@16: { Chris@16: tst_node(CharT value_) Chris@16: : value(value_) Chris@16: , left(0) Chris@16: , right(0) Chris@16: { middle.link = 0; } Chris@16: Chris@16: ~tst_node() Chris@16: { Chris@16: delete left; Chris@16: delete right; Chris@16: if (value) Chris@16: delete middle.link; Chris@16: else Chris@16: delete middle.data; Chris@16: } Chris@16: Chris@16: tst_node* Chris@16: clone() const Chris@16: { Chris@16: std::auto_ptr copy(new tst_node(value)); Chris@16: Chris@16: if (left) Chris@16: copy->left = left->clone(); Chris@16: if (right) Chris@16: copy->right = right->clone(); Chris@16: Chris@16: if (value && middle.link) Chris@16: { Chris@16: copy->middle.link = middle.link->clone(); Chris@16: } Chris@16: else Chris@16: { Chris@16: std::auto_ptr mid_data(new T(*middle.data)); Chris@16: copy->middle.data = mid_data.release(); Chris@16: } Chris@16: Chris@16: return copy.release(); Chris@16: } Chris@16: Chris@16: union center { Chris@16: Chris@16: tst_node* link; Chris@16: T* data; Chris@16: }; Chris@16: Chris@16: CharT value; Chris@16: tst_node* left; Chris@16: center middle; Chris@16: tst_node* right; Chris@16: }; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////// Chris@16: template Chris@16: class tst Chris@16: { Chris@16: public: Chris@16: Chris@16: struct search_info Chris@16: { Chris@16: T* data; Chris@16: std::size_t length; Chris@16: }; Chris@16: Chris@16: tst() Chris@16: : root(0) {} Chris@16: Chris@16: tst(tst const& other) Chris@16: : root(other.root ? other.root->clone() : 0) {} Chris@16: Chris@16: ~tst() Chris@16: { delete root; } Chris@16: Chris@16: tst& Chris@16: operator=(tst const& other) Chris@16: { Chris@16: if (this != &other) Chris@16: { Chris@16: node_t* new_root = other.root ? other.root->clone() : 0; Chris@16: delete root; Chris@16: root = new_root; Chris@16: } Chris@16: return *this; Chris@16: } Chris@16: Chris@16: template Chris@16: T* add(IteratorT first, IteratorT const& last, T const& data) Chris@16: { Chris@16: if (first == last) Chris@16: return 0; Chris@16: Chris@16: node_t** np = &root; Chris@16: CharT ch = *first; Chris@16: Chris@16: BOOST_SPIRIT_ASSERT((first == last || ch != 0) Chris@16: && "Won't add string containing null character"); Chris@16: Chris@16: for (;;) Chris@16: { Chris@16: if (*np == 0 || ch == 0) Chris@16: { Chris@16: node_t* right = 0; Chris@16: if (np != 0) Chris@16: right = *np; Chris@16: *np = new node_t(ch); Chris@16: if (right) Chris@16: (**np).right = right; Chris@16: } Chris@16: Chris@16: if (ch < (**np).value) Chris@16: { Chris@16: np = &(**np).left; Chris@16: } Chris@16: else Chris@16: { Chris@16: if (ch == (**np).value) Chris@16: { Chris@16: if (ch == 0) Chris@16: { Chris@16: if ((**np).middle.data == 0) Chris@16: { Chris@16: (**np).middle.data = new T(data); Chris@16: return (**np).middle.data; Chris@16: } Chris@16: else Chris@16: { Chris@16: // re-addition is disallowed Chris@16: return 0; Chris@16: } Chris@16: } Chris@16: ++first; Chris@16: ch = (first == last) ? CharT(0) : *first; Chris@16: BOOST_SPIRIT_ASSERT((first == last || ch != 0) Chris@16: && "Won't add string containing null character"); Chris@16: np = &(**np).middle.link; Chris@16: } Chris@16: else Chris@16: { Chris@16: np = &(**np).right; Chris@16: } Chris@16: } Chris@16: } Chris@16: } Chris@16: Chris@16: template Chris@16: search_info find(ScannerT const& scan) const Chris@16: { Chris@16: search_info result = { 0, 0 }; Chris@16: if (scan.at_end()) { Chris@16: return result; Chris@16: } Chris@16: Chris@16: typedef typename ScannerT::iterator_t iterator_t; Chris@16: node_t* np = root; Chris@16: CharT ch = *scan; Chris@16: iterator_t save = scan.first; Chris@16: iterator_t latest = scan.first; Chris@16: std::size_t latest_len = 0; Chris@16: Chris@16: while (np) Chris@16: { Chris@16: Chris@16: if (ch < np->value) // => go left! Chris@16: { Chris@16: if (np->value == 0) Chris@16: { Chris@16: result.data = np->middle.data; Chris@16: if (result.data) Chris@16: { Chris@16: latest = scan.first; Chris@16: latest_len = result.length; Chris@16: } Chris@16: } Chris@16: Chris@16: np = np->left; Chris@16: } Chris@16: else if (ch == np->value) // => go middle! Chris@16: { Chris@16: // Matching the null character is not allowed. Chris@16: if (np->value == 0) Chris@16: { Chris@16: result.data = np->middle.data; Chris@16: if (result.data) Chris@16: { Chris@16: latest = scan.first; Chris@16: latest_len = result.length; Chris@16: } Chris@16: break; Chris@16: } Chris@16: Chris@16: ++scan; Chris@16: ch = scan.at_end() ? CharT(0) : *scan; Chris@16: np = np->middle.link; Chris@16: ++result.length; Chris@16: } Chris@16: else // (ch > np->value) => go right! Chris@16: { Chris@16: if (np->value == 0) Chris@16: { Chris@16: result.data = np->middle.data; Chris@16: if (result.data) Chris@16: { Chris@16: latest = scan.first; Chris@16: latest_len = result.length; Chris@16: } Chris@16: } Chris@16: Chris@16: np = np->right; Chris@16: } Chris@16: } Chris@16: Chris@16: if (result.data == 0) Chris@16: { Chris@16: scan.first = save; Chris@16: } Chris@16: else Chris@16: { Chris@16: scan.first = latest; Chris@16: result.length = latest_len; Chris@16: } Chris@16: return result; Chris@16: } Chris@16: Chris@16: private: Chris@16: Chris@16: typedef tst_node node_t; Chris@16: node_t* root; Chris@16: }; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: } // namespace impl Chris@16: Chris@16: BOOST_SPIRIT_CLASSIC_NAMESPACE_END Chris@16: Chris@16: }} // namespace boost::spirit Chris@16: Chris@16: #endif