Mercurial > hg > audiodb
comparison multiprobe.h @ 754:9bd13c7819ae mkc_lsh_update
Adding mkc_lsh_update branch, trunk candidate with improved LSH: merged trunk 1095 and branch multiprobe_lsh
author | mas01mc |
---|---|
date | Thu, 25 Nov 2010 13:42:40 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
747:fbf16508421f | 754:9bd13c7819ae |
---|---|
1 /* | |
2 * MultiProbe C++ class | |
3 * | |
4 * Given a vector of LSH boundary distances for a query, | |
5 * perform lookup by probing nearby hash-function locations | |
6 * | |
7 * Implementation using C++ STL | |
8 * | |
9 * Reference: | |
10 * Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li, | |
11 * "Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity | |
12 * Search", Proc. Intl. Conf. VLDB, 2007 | |
13 * | |
14 * | |
15 * Copyright (C) 2009 Michael Casey, Dartmouth College, All Rights Reserved | |
16 * License: GNU Public License 2.0 | |
17 * | |
18 */ | |
19 | |
20 #ifndef __LSH_MULTIPROBE_ | |
21 #define __LSH_MULTIPROBE_ | |
22 | |
23 #include <functional> | |
24 #include <queue> | |
25 #include <vector> | |
26 #include <set> | |
27 #include <algorithm> | |
28 #include <iostream> | |
29 | |
30 using namespace std; | |
31 | |
32 typedef set<int > perturbation_set ; | |
33 | |
34 typedef class MinHeapElement{ | |
35 public: | |
36 perturbation_set perturbs; | |
37 float score; | |
38 MinHeapElement(perturbation_set a, float s); | |
39 virtual ~MinHeapElement(); | |
40 } min_heap_element; | |
41 | |
42 typedef priority_queue<min_heap_element, | |
43 vector<min_heap_element>, | |
44 greater<min_heap_element> | |
45 > min_heap_of_perturbation_set ; | |
46 | |
47 typedef pair<float, pair<int, int> > sorted_distance_functions ; | |
48 | |
49 class MultiProbe{ | |
50 protected: | |
51 min_heap_of_perturbation_set* minHeap; | |
52 min_heap_of_perturbation_set* outSets; | |
53 vector<sorted_distance_functions>* distFuns; | |
54 unsigned numHashBoundaries; | |
55 | |
56 // data structure initialization and cleanup | |
57 void initialize(); | |
58 void cleanup(); | |
59 | |
60 // perturbation set operations | |
61 perturbation_set& shift(perturbation_set&); | |
62 perturbation_set& expand(perturbation_set&); | |
63 float score(perturbation_set&); | |
64 bool valid(perturbation_set&); | |
65 void makeSortedDistFuns(vector<float> &); | |
66 void makeSortedDistFuns(float* x, unsigned N); | |
67 | |
68 // perturbation set generation algorithm | |
69 void algorithm1(unsigned T); | |
70 | |
71 public: | |
72 MultiProbe(); | |
73 ~MultiProbe(); | |
74 | |
75 // generation of perturbation sets | |
76 void generatePerturbationSets(vector<float>& vectorOfBounaryDistances, unsigned numSetsToGenerate); | |
77 void generatePerturbationSets(float* arrayOfBoundaryDistances, unsigned numDistances, unsigned numSetsToGenerate); | |
78 perturbation_set getNextPerturbationSet(); | |
79 void dump(perturbation_set); | |
80 size_t size(); // Number of perturbation sets are in the output queue | |
81 bool empty(); // predicate for empty MultiProbe set | |
82 int getIndex(perturbation_set::iterator it); // return index of hash function for given set entry | |
83 int getBoundary(perturbation_set::iterator it); // return boundary {-1,+1} for given set entry | |
84 }; | |
85 | |
86 | |
87 /* NOTES: | |
88 | |
89 Reference: | |
90 Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li, | |
91 "Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity | |
92 Search", Proc. Intl. Conf. VLDB, 2007 | |
93 | |
94 i = 1..M (number of hash functions used) | |
95 f_i(q) = a_i * q + b_i // projection to real line | |
96 h_i(q) = floor( ( a_i * q + b_i ) / w ) // hash slot | |
97 delta \in {-1, +1} | |
98 x_i( delta ) = distance of query to left or right boundary | |
99 Delta = [delta_1 delta_2 ... delta_M] | |
100 score(Delta) = sum_{i=1}_M x_i(delta_i).^2 | |
101 | |
102 z = sort(x(delta_i), increasing) // i = 1..2M delta={+1,-1} | |
103 p_j = (i, delta) if z_j == x_i(delta) | |
104 | |
105 A_k is index into p_j | |
106 | |
107 Multi-probe algorithm (after Lv et al. 2007) | |
108 --------------------------------------------- | |
109 | |
110 A0 = {1} | |
111 minHeap.insert(A0, score(A0)) | |
112 | |
113 for i = 1 to T do // T = number of perturbation sets | |
114 repeat | |
115 Ai = minHeap.extractMin() | |
116 As = shift(Ai) | |
117 minHeap.insert(As, score(As)) | |
118 Ae = expand(Ai) | |
119 minHeap.insert(Ae, score(Ae)) | |
120 until valid(Ai) | |
121 output Ai | |
122 end for | |
123 | |
124 */ | |
125 | |
126 #endif // __LSH_MULTIPROBE_ |