annotate multiprobe.cpp @ 516:2a7bad47a4a7 multiprobeLSH

New branch to implement multiprobe LSH. Copy of trunk:802. Added multiprobe.{cpp,h} source files.
author mas01mc
date Sat, 24 Jan 2009 19:51:46 +0000
parents
children 807c8be7dd45
rev   line source
mas01mc@516 1 /*
mas01mc@516 2 * MultiProbe C++ class
mas01mc@516 3 *
mas01mc@516 4 * Given a LSH structure, perform lookup by probing nearby hash-function locations
mas01mc@516 5 *
mas01mc@516 6 * Implementation using C++ STL
mas01mc@516 7 *
mas01mc@516 8 * Reference:
mas01mc@516 9 * Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li,
mas01mc@516 10 * "Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity
mas01mc@516 11 * Search", Proc. Intl. Conf. VLDB, 2007
mas01mc@516 12 *
mas01mc@516 13 *
mas01mc@516 14 * Copyright (C) 2009 Michael Casey, Dartmouth College, All Rights Reserved
mas01mc@516 15 * License: GNU Public License 2.0
mas01mc@516 16 *
mas01mc@516 17 */
mas01mc@516 18
mas01mc@516 19 #include "multiprobe.h"
mas01mc@516 20
mas01mc@516 21
mas01mc@516 22 #define _TEST_MP_LSH
mas01mc@516 23
mas01mc@516 24 MultiProbe::MultiProbe(){
mas01mc@516 25 minHeap = new min_heap_of_perturbation_set();
mas01mc@516 26 outSets = new vector_of_perturbation_set();
mas01mc@516 27 }
mas01mc@516 28
mas01mc@516 29 MultiProbe::~MultiProbe(){
mas01mc@516 30 // FIXME: Are these arrays ?
mas01mc@516 31 delete minHeap;
mas01mc@516 32 delete outSets;
mas01mc@516 33 }
mas01mc@516 34
mas01mc@516 35 perturbation_set& MultiProbe::shift(perturbation_set a){
mas01mc@516 36 // a[a.length]++
mas01mc@516 37 }
mas01mc@516 38
mas01mc@516 39 perturbation_set& MultiProbe::expand(perturbation_set a){
mas01mc@516 40 // a[a.length+1]=a[a.length]+1;
mas01mc@516 41 }
mas01mc@516 42
mas01mc@516 43 // Generate the optimal T perturbation sets for current query
mas01mc@516 44 // pre-conditions:
mas01mc@516 45 // an LSH structure was initialized and passed to constructor
mas01mc@516 46 // a query vector was passed to lsh->compute_hash_functions()
mas01mc@516 47 // the query-to-boundary distances are stored in x[hashFunIndex]
mas01mc@516 48 //
mas01mc@516 49 // post-conditions:
mas01mc@516 50 //
mas01mc@516 51 // generates an ordered list of perturbation sets (stored as an array of sets)
mas01mc@516 52 // these are indexes into pi_j=(i,delta) pairs representing x_i(delta) in sort order z_j
mas01mc@516 53 // data structures are cleared and reset to zeros thereby making them re-entrant
mas01mc@516 54 //
mas01mc@516 55 void MultiProbe::generatePerturbationSets(int T, vector<int>& x){
mas01mc@516 56 perturbation_set ai,as,ae;
mas01mc@516 57
mas01mc@516 58 distFuns = makeSortedDistFuns(x); // NEED TO IMPLEMENT, distances from query to boundaries
mas01mc@516 59
mas01mc@516 60 ai.insert(1);
mas01mc@516 61 minHeap->push(make_pair(perturbation_set(ai), score(ai))); // unique instance stored in mhe
mas01mc@516 62
mas01mc@516 63 for(int i = 0 ; i < T ; i++ ){
mas01mc@516 64 do{
mas01mc@516 65 min_heap_element mhe = minHeap->top();
mas01mc@516 66 minHeap->pop();
mas01mc@516 67 ai = mhe.first;
mas01mc@516 68 as = shift(perturbation_set(ai));
mas01mc@516 69 minHeap->push(make_pair(perturbation_set(as), score(as)));
mas01mc@516 70 ae = expand(perturbation_set(ai));
mas01mc@516 71 minHeap->push(make_pair(perturbation_set(ae), score(ae)));
mas01mc@516 72 }while(!valid(ai));
mas01mc@516 73 outSets->push_back(ai); // Ordered list of perturbation sets
mas01mc@516 74 }
mas01mc@516 75 }
mas01mc@516 76
mas01mc@516 77
mas01mc@516 78 // Take the list of distances (x) assuming list len is 2M and
mas01mc@516 79 // delta = (-1)^i, i = { 0 .. 2M-1 }
mas01mc@516 80 sorted_distance_functions& MultiProbe::makeSortedDistFuns(vector<int>& x){
mas01mc@516 81 // Change these to vector<pair<double, pair<int,int> > >
mas01mc@516 82 sorted_distance_functions* distFuns = new sorted_distance_functions[x.size()];
mas01mc@516 83 for(int i = 0; i < x.size() ; i++){
mas01mc@516 84 distFuns[i] = make_pair(x[i], make_pair(i, i%2?-1:+1));
mas01mc@516 85 }
mas01mc@516 86 }
mas01mc@516 87
mas01mc@516 88 double MultiProbe::score(perturbation_set& a){
mas01mc@516 89 // Needs implementing
mas01mc@516 90 }
mas01mc@516 91
mas01mc@516 92 bool MultiProbe::valid(perturbation_set& a){
mas01mc@516 93
mas01mc@516 94
mas01mc@516 95 // A valid set must have at most one
mas01mc@516 96 // of the two elements {j, 2M + 1 - j} for every j
mas01mc@516 97 //
mas01mc@516 98 // A perturbation set containing an element > 2M is invalid
mas01mc@516 99
mas01mc@516 100
mas01mc@516 101
mas01mc@516 102 }
mas01mc@516 103
mas01mc@516 104 #ifdef _TEST_MP_LSH
mas01mc@516 105 int main(const int argc, const char* argv[]){
mas01mc@516 106
mas01mc@516 107 MultiProbe mp();
mas01mc@516 108
mas01mc@516 109 }
mas01mc@516 110 #endif
mas01mc@516 111
mas01mc@516 112
mas01mc@516 113
mas01mc@516 114
mas01mc@516 115
mas01mc@516 116