comparison multiprobe.h @ 516:2a7bad47a4a7 multiprobeLSH

New branch to implement multiprobe LSH. Copy of trunk:802. Added multiprobe.{cpp,h} source files.
author mas01mc
date Sat, 24 Jan 2009 19:51:46 +0000
parents
children 807c8be7dd45
comparison
equal deleted inserted replaced
515:c7bdb7913762 516:2a7bad47a4a7
1 /*
2 * MultiProbe C++ class
3 *
4 * Given a LSH structure, perform lookup by probing nearby hash-function locations
5 *
6 * Implementation using C++ STL
7 *
8 * Reference:
9 * Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li,
10 * "Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity
11 * Search", Proc. Intl. Conf. VLDB, 2007
12 *
13 *
14 * Copyright (C) 2009 Michael Casey, Dartmouth College, All Rights Reserved
15 * License: GNU Public License 2.0
16 *
17 */
18
19 #include <functional>
20 #include <queue>
21 #include <vector>
22 #include <set>
23
24 using namespace std ;
25
26 typedef set<int > perturbation_set ;
27 typedef vector<perturbation_set > vector_of_perturbation_set ;
28 typedef pair<perturbation_set, float > min_heap_element ;
29 typedef vector<min_heap_element > vector_of_min_heap_element ;
30 typedef priority_queue<
31 min_heap_element,
32 vector_of_min_heap_element,
33 greater<min_heap_element>
34 > min_heap_of_perturbation_set ;
35 typedef pair<double, pair<int, int> > sorted_distance_functions ;
36
37 bool operator>(const min_heap_element& a, const min_heap_element& b){
38 return a.second > b.second;
39 }
40
41 bool operator<(const min_heap_element& a, const min_heap_element& b){
42 return a.second < b.second;
43 }
44
45 bool operator>(const sorted_distance_functions& a, const sorted_distance_functions& b){
46 return a.second > b.second;
47 }
48
49 bool operator<(const sorted_distance_functions& a, const sorted_distance_functions& b){
50 return a.second < b.second;
51 }
52
53 class MultiProbe{
54 protected:
55 min_heap_of_perturbation_set* minHeap;
56 vector_of_perturbation_set* outSets;
57 sorted_distance_functions distFuns;
58 public:
59 MultiProbe();
60 ~MultiProbe();
61
62 // perturbation set operations
63 perturbation_set& shift(perturbation_set);
64 perturbation_set& expand(perturbation_set);
65 double score(perturbation_set&);
66 bool valid(perturbation_set&);
67
68 // generation of perturbation sets
69 void generatePerturbationSets(int, vector<int>&);
70 sorted_distance_functions& makeSortedDistFuns(vector<int> &);
71 };
72
73 /* NOTES:
74
75 Reference:
76 Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li,
77 "Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity
78 Search", Proc. Intl. Conf. VLDB, 2007
79
80 i = 1..M (number of hash functions used)
81 f_i(q) = a_i * q + b_i // projection to real line
82 h_i(q) = floor( ( a_i * q + b_i ) / w ) // hash slot
83 delta \in {-1, +1}
84 x_i( delta ) = distance of query to left or right boundary
85 Delta = [delta_1 delta_2 ... delta_M]
86 score(Delta) = sum_{i=1}_M x_i(delta_i).^2
87
88 z = sort(x(delta_i), increasing) // i = 1..2M delta={+1,-1}
89 p_j = (i, delta) if z_j == x_i(delta)
90
91 A_k is index into p_j
92
93 Multi-probe algorithm (after Lv et al. 2007)
94 ---------------------------------------------
95
96 A0 = {1}
97 minHeap.insert(A0, score(A0))
98
99 for i = 1 to T do // T = number of perturbation sets
100 repeat
101 Ai = minHeap.extractMin()
102 As = shift(Ai)
103 minHeap.insert(As, score(As))
104 Ae = expand(Ai)
105 minHeap.insert(Ae, score(Ae))
106 until valid(Ai)
107 output Ai
108 end for
109
110 */