comparison multiprobe.h @ 518:ca1ee92c359c multiprobeLSH

Completed initial implementation of LSH MultiProbe class. Now needs to be joined to lshlib.cpp retrieve() method to perform multi-probe queries.
author mas01mc
date Mon, 26 Jan 2009 02:50:44 +0000
parents 807c8be7dd45
children fdcd436d7cbd
comparison
equal deleted inserted replaced
517:807c8be7dd45 518:ca1ee92c359c
1 /* 1 /*
2 * MultiProbe C++ class 2 * MultiProbe C++ class
3 * 3 *
4 * Given a LSH structure, perform lookup by probing nearby hash-function locations 4 * Given a vector of LSH boundary distances for a query,
5 * perform lookup by probing nearby hash-function locations
5 * 6 *
6 * Implementation using C++ STL 7 * Implementation using C++ STL
7 * 8 *
8 * Reference: 9 * Reference:
9 * Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li, 10 * Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li,
14 * Copyright (C) 2009 Michael Casey, Dartmouth College, All Rights Reserved 15 * Copyright (C) 2009 Michael Casey, Dartmouth College, All Rights Reserved
15 * License: GNU Public License 2.0 16 * License: GNU Public License 2.0
16 * 17 *
17 */ 18 */
18 19
20 #ifndef __LSH_MULTIPROBE_
21 #define __LSH_MULTIPROBE_
22
19 #include <functional> 23 #include <functional>
20 #include <queue> 24 #include <queue>
21 #include <vector> 25 #include <vector>
22 #include <set> 26 #include <set>
23 #include <algorithm> 27 #include <algorithm>
24 #include <iostream> 28 #include <iostream>
25 29
26 using namespace std ; 30 using namespace std;
27 31
28 typedef set<int > perturbation_set ; 32 typedef set<int > perturbation_set ;
29 typedef vector<perturbation_set > vector_of_perturbation_set ; 33
30 typedef pair<perturbation_set, float > min_heap_element ; 34 typedef class MinHeapElement{
31 typedef vector<min_heap_element > vector_of_min_heap_element ; 35 public:
32 typedef priority_queue< 36 perturbation_set perturbs;
33 min_heap_element, 37 double score;
34 vector_of_min_heap_element, 38 MinHeapElement(perturbation_set a, double s);
35 greater<min_heap_element> 39 virtual ~MinHeapElement();
36 > min_heap_of_perturbation_set ; 40 } min_heap_element;
41
42 typedef priority_queue<min_heap_element,
43 vector<min_heap_element>,
44 greater<min_heap_element>
45 > min_heap_of_perturbation_set ;
46
37 typedef pair<double, pair<int, int> > sorted_distance_functions ; 47 typedef pair<double, pair<int, int> > sorted_distance_functions ;
38 typedef vector<sorted_distance_functions> vector_of_sorted_distance_functions;
39 48
40 bool operator>(const min_heap_element& a, const min_heap_element& b){ 49 bool operator> (const min_heap_element& a, const min_heap_element& b){
41 return a.second > b.second; 50 return a.score > b.score;
42 } 51 }
43 52
44 bool operator<(const min_heap_element& a, const min_heap_element& b){ 53 bool operator< (const min_heap_element& a, const min_heap_element& b){
45 return a.second < b.second; 54 return a.score < b.score;
46 } 55 }
47 56
48 bool operator>(const sorted_distance_functions& a, const sorted_distance_functions& b){ 57 bool operator>(const sorted_distance_functions& a, const sorted_distance_functions& b){
49 return a.first > b.first; 58 return a.first > b.first;
50 } 59 }
54 } 63 }
55 64
56 class MultiProbe{ 65 class MultiProbe{
57 protected: 66 protected:
58 min_heap_of_perturbation_set* minHeap; 67 min_heap_of_perturbation_set* minHeap;
59 vector_of_perturbation_set* outSets; 68 min_heap_of_perturbation_set* outSets;
60 vector_of_sorted_distance_functions* distFuns; 69 vector<sorted_distance_functions>* distFuns;
61 unsigned numHashBoundaries; 70 unsigned numHashBoundaries;
62 public:
63 MultiProbe();
64 ~MultiProbe();
65 71
72 // data structure initialization and cleanup
73 void initialize();
74 void cleanup();
75
66 // perturbation set operations 76 // perturbation set operations
67 perturbation_set& shift(perturbation_set&); 77 perturbation_set& shift(perturbation_set&);
68 perturbation_set& expand(perturbation_set&); 78 perturbation_set& expand(perturbation_set&);
69 double score(perturbation_set&); 79 double score(perturbation_set&);
70 bool valid(perturbation_set&); 80 bool valid(perturbation_set&);
81 void makeSortedDistFuns(vector<double> &);
82
83 public:
84 MultiProbe();
85 ~MultiProbe();
71 86
72 // generation of perturbation sets 87 // generation of perturbation sets
73 void generatePerturbationSets(int, vector<double>&); 88 void generatePerturbationSets(vector<double>& vectorOfBounaryDistances, int numSetsToGenerate);
74 void makeSortedDistFuns(vector<double> &); 89 perturbation_set getNextPerturbationSet();
75 void dump(perturbation_set& ); 90 void dump(perturbation_set);
91 size_t size(); // Number of perturbation sets are in the output queue
92 bool empty(); // predicate for empty MultiProbe set
76 }; 93 };
94
77 95
78 /* NOTES: 96 /* NOTES:
79 97
80 Reference: 98 Reference:
81 Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li, 99 Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li,
111 until valid(Ai) 129 until valid(Ai)
112 output Ai 130 output Ai
113 end for 131 end for
114 132
115 */ 133 */
134
135 #endif // __LSH_MULTIPROBE_