Mercurial > hg > audiodb
view multiprobe.cpp @ 516:2a7bad47a4a7 multiprobeLSH
New branch to implement multiprobe LSH. Copy of trunk:802. Added multiprobe.{cpp,h} source files.
author | mas01mc |
---|---|
date | Sat, 24 Jan 2009 19:51:46 +0000 |
parents | |
children | 807c8be7dd45 |
line wrap: on
line source
/* * MultiProbe C++ class * * Given a LSH structure, perform lookup by probing nearby hash-function locations * * Implementation using C++ STL * * Reference: * Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li, * "Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity * Search", Proc. Intl. Conf. VLDB, 2007 * * * Copyright (C) 2009 Michael Casey, Dartmouth College, All Rights Reserved * License: GNU Public License 2.0 * */ #include "multiprobe.h" #define _TEST_MP_LSH MultiProbe::MultiProbe(){ minHeap = new min_heap_of_perturbation_set(); outSets = new vector_of_perturbation_set(); } MultiProbe::~MultiProbe(){ // FIXME: Are these arrays ? delete minHeap; delete outSets; } perturbation_set& MultiProbe::shift(perturbation_set a){ // a[a.length]++ } perturbation_set& MultiProbe::expand(perturbation_set a){ // a[a.length+1]=a[a.length]+1; } // Generate the optimal T perturbation sets for current query // pre-conditions: // an LSH structure was initialized and passed to constructor // a query vector was passed to lsh->compute_hash_functions() // the query-to-boundary distances are stored in x[hashFunIndex] // // post-conditions: // // generates an ordered list of perturbation sets (stored as an array of sets) // these are indexes into pi_j=(i,delta) pairs representing x_i(delta) in sort order z_j // data structures are cleared and reset to zeros thereby making them re-entrant // void MultiProbe::generatePerturbationSets(int T, vector<int>& x){ perturbation_set ai,as,ae; distFuns = makeSortedDistFuns(x); // NEED TO IMPLEMENT, distances from query to boundaries ai.insert(1); minHeap->push(make_pair(perturbation_set(ai), score(ai))); // unique instance stored in mhe for(int i = 0 ; i < T ; i++ ){ do{ min_heap_element mhe = minHeap->top(); minHeap->pop(); ai = mhe.first; as = shift(perturbation_set(ai)); minHeap->push(make_pair(perturbation_set(as), score(as))); ae = expand(perturbation_set(ai)); minHeap->push(make_pair(perturbation_set(ae), score(ae))); }while(!valid(ai)); outSets->push_back(ai); // Ordered list of perturbation sets } } // Take the list of distances (x) assuming list len is 2M and // delta = (-1)^i, i = { 0 .. 2M-1 } sorted_distance_functions& MultiProbe::makeSortedDistFuns(vector<int>& x){ // Change these to vector<pair<double, pair<int,int> > > sorted_distance_functions* distFuns = new sorted_distance_functions[x.size()]; for(int i = 0; i < x.size() ; i++){ distFuns[i] = make_pair(x[i], make_pair(i, i%2?-1:+1)); } } double MultiProbe::score(perturbation_set& a){ // Needs implementing } bool MultiProbe::valid(perturbation_set& a){ // A valid set must have at most one // of the two elements {j, 2M + 1 - j} for every j // // A perturbation set containing an element > 2M is invalid } #ifdef _TEST_MP_LSH int main(const int argc, const char* argv[]){ MultiProbe mp(); } #endif