Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: /// \file symbols.hpp Chris@16: /// Contains the Ternary Search Trie implementation. Chris@16: /// Based on the following papers: Chris@16: /// J. Bentley and R. Sedgewick. (1998) Ternary search trees. Dr. Dobbs Journal Chris@16: /// G. Badr and B.J. Oommen. (2005) Self-Adjusting of Ternary Search Tries Using Chris@16: /// Conditional Rotations and Randomized Heuristics Chris@16: // Chris@16: // Copyright 2007 David Jenkins. Chris@16: // Copyright 2007 Eric Niebler. Chris@16: // Chris@16: // Distributed under the Boost Software License, Version 1.0. (See Chris@16: // accompanying file LICENSE_1_0.txt or copy at Chris@16: // http://www.boost.org/LICENSE_1_0.txt) Chris@16: Chris@16: #ifndef BOOST_XPRESSIVE_DETAIL_SYMBOLS_HPP_DRJ_06_11_2007 Chris@16: #define BOOST_XPRESSIVE_DETAIL_SYMBOLS_HPP_DRJ_06_11_2007 Chris@16: Chris@16: // MS compatible compilers support #pragma once Chris@101: #if defined(_MSC_VER) Chris@16: # pragma once Chris@16: #endif Chris@16: Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: Chris@16: namespace boost { namespace xpressive { namespace detail Chris@16: { Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // symbols (using a ternary search trie) Chris@16: // Chris@16: template Chris@16: struct symbols Chris@16: { Chris@16: typedef typename range_value::type::first_type key_type; Chris@16: typedef typename range_value::type::second_type value_type; Chris@16: typedef typename range_value::type char_type; Chris@16: typedef typename range_const_iterator::type iterator; Chris@16: typedef typename range_const_iterator::type key_iterator; Chris@16: typedef value_type const *result_type; Chris@16: Chris@16: // copies of this symbol table share the TST Chris@16: Chris@16: template Chris@16: void load(Map const &map, Trans trans) Chris@16: { Chris@16: iterator begin = boost::begin(map); Chris@16: iterator end = boost::end(map); Chris@16: node* root_p = this->root.get(); Chris@16: for(; begin != end; ++begin) Chris@16: { Chris@16: key_iterator kbegin = boost::begin(begin->first); Chris@16: key_iterator kend = boost::end(begin->first); Chris@16: root_p = this->insert(root_p, kbegin, kend, &begin->second, trans); Chris@16: } Chris@16: this->root.reset(root_p); Chris@16: } Chris@16: Chris@16: template Chris@16: result_type operator ()(BidiIter &begin, BidiIter end, Trans trans) const Chris@16: { Chris@16: return this->search(begin, end, trans, this->root.get()); Chris@16: } Chris@16: Chris@16: template Chris@16: void peek(Sink const &sink) const Chris@16: { Chris@16: this->peek_(this->root.get(), sink); Chris@16: } Chris@16: Chris@16: private: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // struct node : a node in the TST. Chris@16: // The "eq" field stores the result pointer when ch is zero. Chris@16: // Chris@16: struct node Chris@16: : boost::noncopyable Chris@16: { Chris@16: node(char_type c) Chris@16: : ch(c) Chris@16: , lo(0) Chris@16: , eq(0) Chris@16: , hi(0) Chris@16: #ifdef BOOST_DISABLE_THREADS Chris@16: , tau(0) Chris@16: #endif Chris@16: {} Chris@16: Chris@16: ~node() Chris@16: { Chris@16: delete lo; Chris@16: if (ch) Chris@16: delete eq; Chris@16: delete hi; Chris@16: } Chris@16: Chris@16: void swap(node& that) Chris@16: { Chris@16: std::swap(ch, that.ch); Chris@16: std::swap(lo, that.lo); Chris@16: std::swap(eq, that.eq); Chris@16: std::swap(hi, that.hi); Chris@16: #ifdef BOOST_DISABLE_THREADS Chris@16: std::swap(tau, that.tau); Chris@16: #endif Chris@16: } Chris@16: Chris@16: char_type ch; Chris@16: node* lo; Chris@16: union Chris@16: { Chris@16: node* eq; Chris@16: result_type result; Chris@16: }; Chris@16: node* hi; Chris@16: #ifdef BOOST_DISABLE_THREADS Chris@16: long tau; Chris@16: #endif Chris@16: }; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // insert : insert a string into the TST Chris@16: // Chris@16: template Chris@16: node* insert(node* p, key_iterator &begin, key_iterator end, result_type r, Trans trans) const Chris@16: { Chris@16: char_type c1 = 0; Chris@16: Chris@16: if(begin != end) Chris@16: { Chris@16: c1 = trans(*begin); Chris@16: } Chris@16: Chris@16: if(!p) Chris@16: { Chris@16: p = new node(c1); Chris@16: } Chris@16: Chris@16: if(c1 < p->ch) Chris@16: { Chris@16: p->lo = this->insert(p->lo, begin, end, r, trans); Chris@16: } Chris@16: else if(c1 == p->ch) Chris@16: { Chris@16: if(0 == c1) Chris@16: { Chris@16: p->result = r; Chris@16: } Chris@16: else Chris@16: { Chris@16: p->eq = this->insert(p->eq, ++begin, end, r, trans); Chris@16: } Chris@16: } Chris@16: else Chris@16: { Chris@16: p->hi = this->insert(p->hi, begin, end, r, trans); Chris@16: } Chris@16: Chris@16: return p; Chris@16: } Chris@16: Chris@16: #ifdef BOOST_DISABLE_THREADS Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // conditional rotation : the goal is to minimize the overall Chris@16: // weighted path length of each binary search tree Chris@16: // Chris@16: bool cond_rotation(bool left, node* const i, node* const j) const Chris@16: { Chris@16: // don't rotate top node in binary search tree Chris@16: if (i == j) Chris@16: return false; Chris@16: // calculate psi (the rotation condition) Chris@16: node* const k = (left ? i->hi : i->lo); Chris@16: long psi = 2*i->tau - j->tau - (k ? k->tau : 0); Chris@16: if (psi <= 0) Chris@16: return false; Chris@16: Chris@16: // recalculate the tau values Chris@16: j->tau += -i->tau + (k ? k->tau : 0); Chris@16: i->tau += j->tau - (k ? k->tau : 0); Chris@16: // fixup links and swap nodes Chris@16: if (left) Chris@16: { Chris@16: j->lo = k; Chris@16: i->hi = i; Chris@16: } Chris@16: else Chris@16: { Chris@16: j->hi = k; Chris@16: i->lo = i; Chris@16: } Chris@16: (*i).swap(*j); Chris@16: return true; Chris@16: } Chris@16: #endif Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // search : find a string in the TST Chris@16: // Chris@16: template Chris@16: result_type search(BidiIter &begin, BidiIter end, Trans trans, node* p) const Chris@16: { Chris@16: result_type r = 0; Chris@16: #ifdef BOOST_DISABLE_THREADS Chris@16: node* p2 = p; Chris@16: bool left = false; Chris@16: #endif Chris@16: char_type c1 = (begin != end ? trans(*begin) : 0); Chris@16: while(p) Chris@16: { Chris@16: #ifdef BOOST_DISABLE_THREADS Chris@16: ++p->tau; Chris@16: #endif Chris@16: if(c1 == p->ch) Chris@16: { Chris@16: // conditional rotation test Chris@16: #ifdef BOOST_DISABLE_THREADS Chris@16: if (this->cond_rotation(left, p, p2)) Chris@16: p = p2; Chris@16: #endif Chris@16: if (0 == p->ch) Chris@16: { Chris@16: // it's a match! Chris@16: r = p->result; Chris@16: } Chris@16: if(begin == end) Chris@16: break; Chris@16: ++begin; Chris@16: p = p->eq; Chris@16: // search for the longest match first Chris@16: r = search(begin,end,trans,p); Chris@16: if (0 == r) Chris@16: { Chris@16: // search for a match ending here Chris@16: r = search(end,end,trans,p); Chris@16: if (0 == r) Chris@16: { Chris@16: --begin; Chris@16: } Chris@16: } Chris@16: break; Chris@16: } Chris@16: else if(c1 < p->ch) Chris@16: { Chris@16: #ifdef BOOST_DISABLE_THREADS Chris@16: left = true; Chris@16: p2 = p; Chris@16: #endif Chris@16: p = p->lo; Chris@16: } Chris@16: else // (c1 > p->ch) Chris@16: { Chris@16: #ifdef BOOST_DISABLE_THREADS Chris@16: left = false; Chris@16: p2 = p; Chris@16: #endif Chris@16: p = p->hi; Chris@16: } Chris@16: } Chris@16: return r; Chris@16: } Chris@16: Chris@16: template Chris@16: void peek_(node const *const &p, Sink const &sink) const Chris@16: { Chris@16: if(p) Chris@16: { Chris@16: sink(p->ch); Chris@16: this->peek_(p->lo, sink); Chris@16: this->peek_(p->hi, sink); Chris@16: } Chris@16: } Chris@16: Chris@16: boost::shared_ptr root; Chris@16: }; Chris@16: Chris@16: }}} // namespace boost::xpressive::detail Chris@16: Chris@16: #endif