Mercurial > hg > audiodb
changeset 518:ca1ee92c359c multiprobeLSH
Completed initial implementation of LSH MultiProbe class. Now needs to be joined to lshlib.cpp retrieve() method to perform multi-probe queries.
author | mas01mc |
---|---|
date | Mon, 26 Jan 2009 02:50:44 +0000 |
parents | 807c8be7dd45 |
children | fdcd436d7cbd |
files | multiprobe.cpp multiprobe.h |
diffstat | 2 files changed, 145 insertions(+), 71 deletions(-) [+] |
line wrap: on
line diff
--- a/multiprobe.cpp Sun Jan 25 06:10:38 2009 +0000 +++ b/multiprobe.cpp Mon Jan 26 02:50:44 2009 +0000 @@ -1,7 +1,8 @@ /* * MultiProbe C++ class * - * Given a LSH structure, perform lookup by probing nearby hash-function locations + * Given a vector of LSH boundary distances for a query, + * perform lookup by probing nearby hash-function locations * * Implementation using C++ STL * @@ -18,8 +19,16 @@ #include "multiprobe.h" +#define _TEST_MP_LSH -#define _TEST_MP_LSH +MinHeapElement::MinHeapElement(perturbation_set a, double s): + perturbs(a), + score(s) +{ + +} + +MinHeapElement::~MinHeapElement(){;} MultiProbe::MultiProbe(): minHeap(0), @@ -27,17 +36,36 @@ distFuns(0), numHashBoundaries(0) { - minHeap = new min_heap_of_perturbation_set(); - outSets = new vector_of_perturbation_set(); + } MultiProbe::~MultiProbe(){ - // FIXME: Are these arrays ? + cleanup(); +} + +void MultiProbe::initialize(){ + minHeap = new min_heap_of_perturbation_set(); + outSets = new min_heap_of_perturbation_set(); +} + +void MultiProbe::cleanup(){ delete minHeap; + minHeap = 0; delete outSets; + outSets = 0; delete distFuns; + distFuns = 0; } +inline size_t MultiProbe::size(){ + return outSets->size(); +} + +inline bool MultiProbe::empty(){ + return !outSets->size(); +} + + // Generate the optimal T perturbation sets for current query // pre-conditions: // an LSH structure was initialized and passed to constructor @@ -45,50 +73,55 @@ // the query-to-boundary distances are stored in x[hashFunIndex] // // post-conditions: -// // generates an ordered list of perturbation sets (stored as an array of sets) // these are indexes into pi_j=(i,delta) pairs representing x_i(delta) in sort order z_j // data structures are cleared and reset to zeros thereby making them re-entrant // -void MultiProbe::generatePerturbationSets(int T, vector<double>& x){ +void MultiProbe::generatePerturbationSets(vector<double>& x, int T){ perturbation_set ai,as,ae; + double ai_score; + cleanup(); // Make re-entrant + initialize(); makeSortedDistFuns(x); - ai.insert(1); // Initialize for this query - minHeap->push(make_pair(perturbation_set(ai), score(ai))); // unique instance stored in mhe + ai.insert(0); // Initialize for this query + minHeap->push(min_heap_element(ai, score(ai))); // unique instance stored in mhe - for(int i = 0 ; i < T ; i++ ){ + min_heap_element mhe = minHeap->top(); + + for(int i = 0 ; i != T ; i++ ){ do{ - min_heap_element mhe = minHeap->top(); + if(minHeap->empty()) + break; + mhe = minHeap->top(); + ai = mhe.perturbs; + ai_score = mhe.score; minHeap->pop(); - ai = mhe.first; - cout << "ai: "; - dump(ai); - as = perturbation_set(ai); + as=ai; shift(as); - cout << "as: "; - dump(as); - minHeap->push(make_pair(perturbation_set(as), score(as))); - ae = perturbation_set(ai); + minHeap->push(min_heap_element(as, score(as))); + ae=ai; expand(ae); - cout << "ae: "; - dump(ae); - minHeap->push(make_pair(perturbation_set(ae), score(ae))); + minHeap->push(min_heap_element(ae, score(ae))); }while(!valid(ai)); - outSets->push_back(ai); // Ordered list of perturbation sets + outSets->push(mhe); // Ordered list of perturbation sets } } -void MultiProbe::dump(perturbation_set& a){ +void MultiProbe::dump(perturbation_set a){ perturbation_set::iterator it = a.begin(); - while(it != a.end()) - cout << *it++ << " "; - cout << "\n"; + while(it != a.end()){ + cout << "[" << (*distFuns)[*it].second.first << "," << (*distFuns)[*it].second.second << "]" << " " + << (*distFuns)[*it].first << *it << ", "; + it++; + } + cout << "(" << score(a) << ")"; + cout << endl; } // Given the set a, add 1 to last element of the set -perturbation_set& MultiProbe::shift(perturbation_set& a){ +inline perturbation_set& MultiProbe::shift(perturbation_set& a){ perturbation_set::iterator it = a.end(); int val = *(--it) + 1; a.erase(it); @@ -96,7 +129,7 @@ } // Given the set a, add a new element one greater than the max -perturbation_set& MultiProbe::expand(perturbation_set& a){ +inline perturbation_set& MultiProbe::expand(perturbation_set& a){ perturbation_set::reverse_iterator ri = a.rbegin(); a.insert(*ri+1); } @@ -107,9 +140,8 @@ numHashBoundaries = x.size(); // x.size() == 2M delete distFuns; distFuns = new std::vector<sorted_distance_functions>(numHashBoundaries); - for(int i = 0; i != numHashBoundaries ; i++ ){ - (*distFuns)[i] = make_pair(x[i], make_pair(i, i%2?-1:+1)); - } + for(int i = 0; i != numHashBoundaries ; i++ ) + (*distFuns)[i] = make_pair(x[i], make_pair(i, i%2?1:-1)); // SORT sort( distFuns->begin(), distFuns->end() ); } @@ -122,9 +154,9 @@ perturbation_set::iterator it; it = a.begin(); do{ - tmp = (*distFuns)[*it++].first; + tmp = (*distFuns)[*it].first; score += tmp*tmp; - }while( it != a.end() ); + }while( ++it != a.end() ); return score; } @@ -136,27 +168,49 @@ int j; perturbation_set::iterator it = a.begin(); while( it != a.end() ){ - j = *it++; + j = *it; + it++; if( ( j > numHashBoundaries ) || ( a.find( numHashBoundaries + 1 - j ) != a.end() ) ) return false; } return true; } +// copy return next perturbation_set +perturbation_set MultiProbe::getNextPerturbationSet(){ + perturbation_set s = outSets->top().perturbs; + outSets->pop(); + return s; +} + +// Test routine: generate 100 random boundary distance pairs +// call generatePerturbationSets on these distances +// dump output for inspection #ifdef _TEST_MP_LSH int main(const int argc, const char* argv[]){ - MultiProbe mp = MultiProbe(); - vector<double> x(4); - x[0]=0.1; - x[1]=0.9; - x[2]=0.2; - x[3]=0.8; - mp.generatePerturbationSets(2, x); + int N_SAMPS = 100; // Number of random samples + int W = 4; // simulated hash-bucket size + int N_ITER = 100; // How many re-entrant iterations + int T = 10; // Number of multi-probe sets to generate + + MultiProbe mp= MultiProbe(); + vector<double> x(N_SAMPS); + + srand((unsigned)time(0)); + + // Test re-entrance on single instance + for(int j = 0; j< N_ITER ; j++){ + cout << "********** ITERATION " << j << " **********" << endl; + cout.flush(); + for (int i = 0 ; i != x.size()/2 ; i++ ){ + x[2*i] = W*(rand()/(RAND_MAX+1.0)); + x[2*i+1] = W - x[2*i]; + } + // Generate multi-probe sets + mp.generatePerturbationSets(x, T); + // Output contents of multi-probe sets + while(!mp.empty()) + mp.dump(mp.getNextPerturbationSet()); + } } #endif - - - - - -
--- a/multiprobe.h Sun Jan 25 06:10:38 2009 +0000 +++ b/multiprobe.h Mon Jan 26 02:50:44 2009 +0000 @@ -1,7 +1,8 @@ /* * MultiProbe C++ class * - * Given a LSH structure, perform lookup by probing nearby hash-function locations + * Given a vector of LSH boundary distances for a query, + * perform lookup by probing nearby hash-function locations * * Implementation using C++ STL * @@ -16,6 +17,9 @@ * */ +#ifndef __LSH_MULTIPROBE_ +#define __LSH_MULTIPROBE_ + #include <functional> #include <queue> #include <vector> @@ -23,26 +27,31 @@ #include <algorithm> #include <iostream> -using namespace std ; +using namespace std; typedef set<int > perturbation_set ; -typedef vector<perturbation_set > vector_of_perturbation_set ; -typedef pair<perturbation_set, float > min_heap_element ; -typedef vector<min_heap_element > vector_of_min_heap_element ; -typedef priority_queue< - min_heap_element, - vector_of_min_heap_element, - greater<min_heap_element> - > min_heap_of_perturbation_set ; + +typedef class MinHeapElement{ +public: + perturbation_set perturbs; + double score; + MinHeapElement(perturbation_set a, double s); + virtual ~MinHeapElement(); +} min_heap_element; + +typedef priority_queue<min_heap_element, + vector<min_heap_element>, + greater<min_heap_element> + > min_heap_of_perturbation_set ; + typedef pair<double, pair<int, int> > sorted_distance_functions ; -typedef vector<sorted_distance_functions> vector_of_sorted_distance_functions; -bool operator>(const min_heap_element& a, const min_heap_element& b){ - return a.second > b.second; +bool operator> (const min_heap_element& a, const min_heap_element& b){ + return a.score > b.score; } -bool operator<(const min_heap_element& a, const min_heap_element& b){ - return a.second < b.second; +bool operator< (const min_heap_element& a, const min_heap_element& b){ + return a.score < b.score; } bool operator>(const sorted_distance_functions& a, const sorted_distance_functions& b){ @@ -56,25 +65,34 @@ class MultiProbe{ protected: min_heap_of_perturbation_set* minHeap; - vector_of_perturbation_set* outSets; - vector_of_sorted_distance_functions* distFuns; + min_heap_of_perturbation_set* outSets; + vector<sorted_distance_functions>* distFuns; unsigned numHashBoundaries; -public: - MultiProbe(); - ~MultiProbe(); + // data structure initialization and cleanup + void initialize(); + void cleanup(); + // perturbation set operations perturbation_set& shift(perturbation_set&); perturbation_set& expand(perturbation_set&); double score(perturbation_set&); bool valid(perturbation_set&); + void makeSortedDistFuns(vector<double> &); + +public: + MultiProbe(); + ~MultiProbe(); // generation of perturbation sets - void generatePerturbationSets(int, vector<double>&); - void makeSortedDistFuns(vector<double> &); - void dump(perturbation_set& ); + void generatePerturbationSets(vector<double>& vectorOfBounaryDistances, int numSetsToGenerate); + perturbation_set getNextPerturbationSet(); + void dump(perturbation_set); + size_t size(); // Number of perturbation sets are in the output queue + bool empty(); // predicate for empty MultiProbe set }; + /* NOTES: Reference: @@ -113,3 +131,5 @@ end for */ + +#endif // __LSH_MULTIPROBE_