mas01mc@516
|
1 /*
|
mas01mc@516
|
2 * MultiProbe C++ class
|
mas01mc@516
|
3 *
|
mas01mc@516
|
4 * Given a LSH structure, perform lookup by probing nearby hash-function locations
|
mas01mc@516
|
5 *
|
mas01mc@516
|
6 * Implementation using C++ STL
|
mas01mc@516
|
7 *
|
mas01mc@516
|
8 * Reference:
|
mas01mc@516
|
9 * Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li,
|
mas01mc@516
|
10 * "Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity
|
mas01mc@516
|
11 * Search", Proc. Intl. Conf. VLDB, 2007
|
mas01mc@516
|
12 *
|
mas01mc@516
|
13 *
|
mas01mc@516
|
14 * Copyright (C) 2009 Michael Casey, Dartmouth College, All Rights Reserved
|
mas01mc@516
|
15 * License: GNU Public License 2.0
|
mas01mc@516
|
16 *
|
mas01mc@516
|
17 */
|
mas01mc@516
|
18
|
mas01mc@516
|
19 #include "multiprobe.h"
|
mas01mc@516
|
20
|
mas01mc@516
|
21
|
mas01mc@516
|
22 #define _TEST_MP_LSH
|
mas01mc@516
|
23
|
mas01mc@516
|
24 MultiProbe::MultiProbe(){
|
mas01mc@516
|
25 minHeap = new min_heap_of_perturbation_set();
|
mas01mc@516
|
26 outSets = new vector_of_perturbation_set();
|
mas01mc@516
|
27 }
|
mas01mc@516
|
28
|
mas01mc@516
|
29 MultiProbe::~MultiProbe(){
|
mas01mc@516
|
30 // FIXME: Are these arrays ?
|
mas01mc@516
|
31 delete minHeap;
|
mas01mc@516
|
32 delete outSets;
|
mas01mc@516
|
33 }
|
mas01mc@516
|
34
|
mas01mc@516
|
35 perturbation_set& MultiProbe::shift(perturbation_set a){
|
mas01mc@516
|
36 // a[a.length]++
|
mas01mc@516
|
37 }
|
mas01mc@516
|
38
|
mas01mc@516
|
39 perturbation_set& MultiProbe::expand(perturbation_set a){
|
mas01mc@516
|
40 // a[a.length+1]=a[a.length]+1;
|
mas01mc@516
|
41 }
|
mas01mc@516
|
42
|
mas01mc@516
|
43 // Generate the optimal T perturbation sets for current query
|
mas01mc@516
|
44 // pre-conditions:
|
mas01mc@516
|
45 // an LSH structure was initialized and passed to constructor
|
mas01mc@516
|
46 // a query vector was passed to lsh->compute_hash_functions()
|
mas01mc@516
|
47 // the query-to-boundary distances are stored in x[hashFunIndex]
|
mas01mc@516
|
48 //
|
mas01mc@516
|
49 // post-conditions:
|
mas01mc@516
|
50 //
|
mas01mc@516
|
51 // generates an ordered list of perturbation sets (stored as an array of sets)
|
mas01mc@516
|
52 // these are indexes into pi_j=(i,delta) pairs representing x_i(delta) in sort order z_j
|
mas01mc@516
|
53 // data structures are cleared and reset to zeros thereby making them re-entrant
|
mas01mc@516
|
54 //
|
mas01mc@516
|
55 void MultiProbe::generatePerturbationSets(int T, vector<int>& x){
|
mas01mc@516
|
56 perturbation_set ai,as,ae;
|
mas01mc@516
|
57
|
mas01mc@516
|
58 distFuns = makeSortedDistFuns(x); // NEED TO IMPLEMENT, distances from query to boundaries
|
mas01mc@516
|
59
|
mas01mc@516
|
60 ai.insert(1);
|
mas01mc@516
|
61 minHeap->push(make_pair(perturbation_set(ai), score(ai))); // unique instance stored in mhe
|
mas01mc@516
|
62
|
mas01mc@516
|
63 for(int i = 0 ; i < T ; i++ ){
|
mas01mc@516
|
64 do{
|
mas01mc@516
|
65 min_heap_element mhe = minHeap->top();
|
mas01mc@516
|
66 minHeap->pop();
|
mas01mc@516
|
67 ai = mhe.first;
|
mas01mc@516
|
68 as = shift(perturbation_set(ai));
|
mas01mc@516
|
69 minHeap->push(make_pair(perturbation_set(as), score(as)));
|
mas01mc@516
|
70 ae = expand(perturbation_set(ai));
|
mas01mc@516
|
71 minHeap->push(make_pair(perturbation_set(ae), score(ae)));
|
mas01mc@516
|
72 }while(!valid(ai));
|
mas01mc@516
|
73 outSets->push_back(ai); // Ordered list of perturbation sets
|
mas01mc@516
|
74 }
|
mas01mc@516
|
75 }
|
mas01mc@516
|
76
|
mas01mc@516
|
77
|
mas01mc@516
|
78 // Take the list of distances (x) assuming list len is 2M and
|
mas01mc@516
|
79 // delta = (-1)^i, i = { 0 .. 2M-1 }
|
mas01mc@516
|
80 sorted_distance_functions& MultiProbe::makeSortedDistFuns(vector<int>& x){
|
mas01mc@516
|
81 // Change these to vector<pair<double, pair<int,int> > >
|
mas01mc@516
|
82 sorted_distance_functions* distFuns = new sorted_distance_functions[x.size()];
|
mas01mc@516
|
83 for(int i = 0; i < x.size() ; i++){
|
mas01mc@516
|
84 distFuns[i] = make_pair(x[i], make_pair(i, i%2?-1:+1));
|
mas01mc@516
|
85 }
|
mas01mc@516
|
86 }
|
mas01mc@516
|
87
|
mas01mc@516
|
88 double MultiProbe::score(perturbation_set& a){
|
mas01mc@516
|
89 // Needs implementing
|
mas01mc@516
|
90 }
|
mas01mc@516
|
91
|
mas01mc@516
|
92 bool MultiProbe::valid(perturbation_set& a){
|
mas01mc@516
|
93
|
mas01mc@516
|
94
|
mas01mc@516
|
95 // A valid set must have at most one
|
mas01mc@516
|
96 // of the two elements {j, 2M + 1 - j} for every j
|
mas01mc@516
|
97 //
|
mas01mc@516
|
98 // A perturbation set containing an element > 2M is invalid
|
mas01mc@516
|
99
|
mas01mc@516
|
100
|
mas01mc@516
|
101
|
mas01mc@516
|
102 }
|
mas01mc@516
|
103
|
mas01mc@516
|
104 #ifdef _TEST_MP_LSH
|
mas01mc@516
|
105 int main(const int argc, const char* argv[]){
|
mas01mc@516
|
106
|
mas01mc@516
|
107 MultiProbe mp();
|
mas01mc@516
|
108
|
mas01mc@516
|
109 }
|
mas01mc@516
|
110 #endif
|
mas01mc@516
|
111
|
mas01mc@516
|
112
|
mas01mc@516
|
113
|
mas01mc@516
|
114
|
mas01mc@516
|
115
|
mas01mc@516
|
116
|