# HG changeset patch # User mas01mc # Date 1232863838 0 # Node ID 807c8be7dd4566e3fc5278853596be94c8570de7 # Parent 2a7bad47a4a721169f84d0fef7d061d6094774fa Completed multiprobe framework for LSH. Requires testing. diff -r 2a7bad47a4a7 -r 807c8be7dd45 multiprobe.cpp --- a/multiprobe.cpp Sat Jan 24 19:51:46 2009 +0000 +++ b/multiprobe.cpp Sun Jan 25 06:10:38 2009 +0000 @@ -21,7 +21,12 @@ #define _TEST_MP_LSH -MultiProbe::MultiProbe(){ +MultiProbe::MultiProbe(): + minHeap(0), + outSets(0), + distFuns(0), + numHashBoundaries(0) +{ minHeap = new min_heap_of_perturbation_set(); outSets = new vector_of_perturbation_set(); } @@ -30,14 +35,7 @@ // FIXME: Are these arrays ? delete minHeap; delete outSets; -} - -perturbation_set& MultiProbe::shift(perturbation_set a){ - // a[a.length]++ -} - -perturbation_set& MultiProbe::expand(perturbation_set a){ - // a[a.length+1]=a[a.length]+1; + delete distFuns; } // Generate the optimal T perturbation sets for current query @@ -52,12 +50,12 @@ // these are indexes into pi_j=(i,delta) pairs representing x_i(delta) in sort order z_j // data structures are cleared and reset to zeros thereby making them re-entrant // -void MultiProbe::generatePerturbationSets(int T, vector& x){ +void MultiProbe::generatePerturbationSets(int T, vector& x){ perturbation_set ai,as,ae; - distFuns = makeSortedDistFuns(x); // NEED TO IMPLEMENT, distances from query to boundaries + makeSortedDistFuns(x); - ai.insert(1); + ai.insert(1); // Initialize for this query minHeap->push(make_pair(perturbation_set(ai), score(ai))); // unique instance stored in mhe for(int i = 0 ; i < T ; i++ ){ @@ -65,47 +63,95 @@ min_heap_element mhe = minHeap->top(); minHeap->pop(); ai = mhe.first; - as = shift(perturbation_set(ai)); + cout << "ai: "; + dump(ai); + as = perturbation_set(ai); + shift(as); + cout << "as: "; + dump(as); minHeap->push(make_pair(perturbation_set(as), score(as))); - ae = expand(perturbation_set(ai)); + ae = perturbation_set(ai); + expand(ae); + cout << "ae: "; + dump(ae); minHeap->push(make_pair(perturbation_set(ae), score(ae))); }while(!valid(ai)); outSets->push_back(ai); // Ordered list of perturbation sets } } +void MultiProbe::dump(perturbation_set& a){ + perturbation_set::iterator it = a.begin(); + while(it != a.end()) + cout << *it++ << " "; + cout << "\n"; +} + +// Given the set a, add 1 to last element of the set +perturbation_set& MultiProbe::shift(perturbation_set& a){ + perturbation_set::iterator it = a.end(); + int val = *(--it) + 1; + a.erase(it); + a.insert(it,val); +} + +// Given the set a, add a new element one greater than the max +perturbation_set& MultiProbe::expand(perturbation_set& a){ + perturbation_set::reverse_iterator ri = a.rbegin(); + a.insert(*ri+1); +} // Take the list of distances (x) assuming list len is 2M and // delta = (-1)^i, i = { 0 .. 2M-1 } -sorted_distance_functions& MultiProbe::makeSortedDistFuns(vector& x){ - // Change these to vector > > - sorted_distance_functions* distFuns = new sorted_distance_functions[x.size()]; - for(int i = 0; i < x.size() ; i++){ - distFuns[i] = make_pair(x[i], make_pair(i, i%2?-1:+1)); +void MultiProbe::makeSortedDistFuns(vector& x){ + numHashBoundaries = x.size(); // x.size() == 2M + delete distFuns; + distFuns = new std::vector(numHashBoundaries); + for(int i = 0; i != numHashBoundaries ; i++ ){ + (*distFuns)[i] = make_pair(x[i], make_pair(i, i%2?-1:+1)); } + // SORT + sort( distFuns->begin(), distFuns->end() ); } +// For a given perturbation set, the score is the +// sum of squares of corresponding distances in x double MultiProbe::score(perturbation_set& a){ - // Needs implementing + //assert(!a.empty()); + double score = 0.0, tmp; + perturbation_set::iterator it; + it = a.begin(); + do{ + tmp = (*distFuns)[*it++].first; + score += tmp*tmp; + }while( it != a.end() ); + return score; } +// A valid set must have at most one +// of the two elements {j, 2M + 1 - j} for every j +// +// A perturbation set containing an element > 2M is invalid bool MultiProbe::valid(perturbation_set& a){ - - - // A valid set must have at most one - // of the two elements {j, 2M + 1 - j} for every j - // - // A perturbation set containing an element > 2M is invalid - - - + int j; + perturbation_set::iterator it = a.begin(); + while( it != a.end() ){ + j = *it++; + if( ( j > numHashBoundaries ) || ( a.find( numHashBoundaries + 1 - j ) != a.end() ) ) + return false; + } + return true; } #ifdef _TEST_MP_LSH int main(const int argc, const char* argv[]){ - - MultiProbe mp(); - + MultiProbe mp = MultiProbe(); + vector x(4); + x[0]=0.1; + x[1]=0.9; + x[2]=0.2; + x[3]=0.8; + mp.generatePerturbationSets(2, x); } #endif diff -r 2a7bad47a4a7 -r 807c8be7dd45 multiprobe.h --- a/multiprobe.h Sat Jan 24 19:51:46 2009 +0000 +++ b/multiprobe.h Sun Jan 25 06:10:38 2009 +0000 @@ -20,6 +20,8 @@ #include #include #include +#include +#include using namespace std ; @@ -33,6 +35,7 @@ greater > min_heap_of_perturbation_set ; typedef pair > sorted_distance_functions ; +typedef vector vector_of_sorted_distance_functions; bool operator>(const min_heap_element& a, const min_heap_element& b){ return a.second > b.second; @@ -43,31 +46,33 @@ } bool operator>(const sorted_distance_functions& a, const sorted_distance_functions& b){ - return a.second > b.second; + return a.first > b.first; } bool operator<(const sorted_distance_functions& a, const sorted_distance_functions& b){ - return a.second < b.second; + return a.first < b.first; } class MultiProbe{ protected: min_heap_of_perturbation_set* minHeap; vector_of_perturbation_set* outSets; - sorted_distance_functions distFuns; + vector_of_sorted_distance_functions* distFuns; + unsigned numHashBoundaries; public: MultiProbe(); ~MultiProbe(); // perturbation set operations - perturbation_set& shift(perturbation_set); - perturbation_set& expand(perturbation_set); + perturbation_set& shift(perturbation_set&); + perturbation_set& expand(perturbation_set&); double score(perturbation_set&); bool valid(perturbation_set&); // generation of perturbation sets - void generatePerturbationSets(int, vector&); - sorted_distance_functions& makeSortedDistFuns(vector &); + void generatePerturbationSets(int, vector&); + void makeSortedDistFuns(vector &); + void dump(perturbation_set& ); }; /* NOTES: