Mercurial > hg > audiodb
comparison multiprobe.h @ 516:2a7bad47a4a7 multiprobeLSH
New branch to implement multiprobe LSH. Copy of trunk:802. Added multiprobe.{cpp,h} source files.
author | mas01mc |
---|---|
date | Sat, 24 Jan 2009 19:51:46 +0000 |
parents | |
children | 807c8be7dd45 |
comparison
equal
deleted
inserted
replaced
515:c7bdb7913762 | 516:2a7bad47a4a7 |
---|---|
1 /* | |
2 * MultiProbe C++ class | |
3 * | |
4 * Given a LSH structure, perform lookup by probing nearby hash-function locations | |
5 * | |
6 * Implementation using C++ STL | |
7 * | |
8 * Reference: | |
9 * Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li, | |
10 * "Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity | |
11 * Search", Proc. Intl. Conf. VLDB, 2007 | |
12 * | |
13 * | |
14 * Copyright (C) 2009 Michael Casey, Dartmouth College, All Rights Reserved | |
15 * License: GNU Public License 2.0 | |
16 * | |
17 */ | |
18 | |
19 #include <functional> | |
20 #include <queue> | |
21 #include <vector> | |
22 #include <set> | |
23 | |
24 using namespace std ; | |
25 | |
26 typedef set<int > perturbation_set ; | |
27 typedef vector<perturbation_set > vector_of_perturbation_set ; | |
28 typedef pair<perturbation_set, float > min_heap_element ; | |
29 typedef vector<min_heap_element > vector_of_min_heap_element ; | |
30 typedef priority_queue< | |
31 min_heap_element, | |
32 vector_of_min_heap_element, | |
33 greater<min_heap_element> | |
34 > min_heap_of_perturbation_set ; | |
35 typedef pair<double, pair<int, int> > sorted_distance_functions ; | |
36 | |
37 bool operator>(const min_heap_element& a, const min_heap_element& b){ | |
38 return a.second > b.second; | |
39 } | |
40 | |
41 bool operator<(const min_heap_element& a, const min_heap_element& b){ | |
42 return a.second < b.second; | |
43 } | |
44 | |
45 bool operator>(const sorted_distance_functions& a, const sorted_distance_functions& b){ | |
46 return a.second > b.second; | |
47 } | |
48 | |
49 bool operator<(const sorted_distance_functions& a, const sorted_distance_functions& b){ | |
50 return a.second < b.second; | |
51 } | |
52 | |
53 class MultiProbe{ | |
54 protected: | |
55 min_heap_of_perturbation_set* minHeap; | |
56 vector_of_perturbation_set* outSets; | |
57 sorted_distance_functions distFuns; | |
58 public: | |
59 MultiProbe(); | |
60 ~MultiProbe(); | |
61 | |
62 // perturbation set operations | |
63 perturbation_set& shift(perturbation_set); | |
64 perturbation_set& expand(perturbation_set); | |
65 double score(perturbation_set&); | |
66 bool valid(perturbation_set&); | |
67 | |
68 // generation of perturbation sets | |
69 void generatePerturbationSets(int, vector<int>&); | |
70 sorted_distance_functions& makeSortedDistFuns(vector<int> &); | |
71 }; | |
72 | |
73 /* NOTES: | |
74 | |
75 Reference: | |
76 Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li, | |
77 "Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity | |
78 Search", Proc. Intl. Conf. VLDB, 2007 | |
79 | |
80 i = 1..M (number of hash functions used) | |
81 f_i(q) = a_i * q + b_i // projection to real line | |
82 h_i(q) = floor( ( a_i * q + b_i ) / w ) // hash slot | |
83 delta \in {-1, +1} | |
84 x_i( delta ) = distance of query to left or right boundary | |
85 Delta = [delta_1 delta_2 ... delta_M] | |
86 score(Delta) = sum_{i=1}_M x_i(delta_i).^2 | |
87 | |
88 z = sort(x(delta_i), increasing) // i = 1..2M delta={+1,-1} | |
89 p_j = (i, delta) if z_j == x_i(delta) | |
90 | |
91 A_k is index into p_j | |
92 | |
93 Multi-probe algorithm (after Lv et al. 2007) | |
94 --------------------------------------------- | |
95 | |
96 A0 = {1} | |
97 minHeap.insert(A0, score(A0)) | |
98 | |
99 for i = 1 to T do // T = number of perturbation sets | |
100 repeat | |
101 Ai = minHeap.extractMin() | |
102 As = shift(Ai) | |
103 minHeap.insert(As, score(As)) | |
104 Ae = expand(Ai) | |
105 minHeap.insert(Ae, score(Ae)) | |
106 until valid(Ai) | |
107 output Ai | |
108 end for | |
109 | |
110 */ |