Mercurial > hg > audiodb
view multiprobe.h @ 518:ca1ee92c359c multiprobeLSH
Completed initial implementation of LSH MultiProbe class. Now needs to be joined to lshlib.cpp retrieve() method to perform multi-probe queries.
author | mas01mc |
---|---|
date | Mon, 26 Jan 2009 02:50:44 +0000 |
parents | 807c8be7dd45 |
children | fdcd436d7cbd |
line wrap: on
line source
/* * MultiProbe C++ class * * Given a vector of LSH boundary distances for a query, * perform lookup by probing nearby hash-function locations * * Implementation using C++ STL * * Reference: * Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li, * "Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity * Search", Proc. Intl. Conf. VLDB, 2007 * * * Copyright (C) 2009 Michael Casey, Dartmouth College, All Rights Reserved * License: GNU Public License 2.0 * */ #ifndef __LSH_MULTIPROBE_ #define __LSH_MULTIPROBE_ #include <functional> #include <queue> #include <vector> #include <set> #include <algorithm> #include <iostream> using namespace std; typedef set<int > perturbation_set ; typedef class MinHeapElement{ public: perturbation_set perturbs; double score; MinHeapElement(perturbation_set a, double s); virtual ~MinHeapElement(); } min_heap_element; typedef priority_queue<min_heap_element, vector<min_heap_element>, greater<min_heap_element> > min_heap_of_perturbation_set ; typedef pair<double, pair<int, int> > sorted_distance_functions ; bool operator> (const min_heap_element& a, const min_heap_element& b){ return a.score > b.score; } bool operator< (const min_heap_element& a, const min_heap_element& b){ return a.score < b.score; } bool operator>(const sorted_distance_functions& a, const sorted_distance_functions& b){ return a.first > b.first; } bool operator<(const sorted_distance_functions& a, const sorted_distance_functions& b){ return a.first < b.first; } class MultiProbe{ protected: min_heap_of_perturbation_set* minHeap; min_heap_of_perturbation_set* outSets; vector<sorted_distance_functions>* distFuns; unsigned numHashBoundaries; // data structure initialization and cleanup void initialize(); void cleanup(); // perturbation set operations perturbation_set& shift(perturbation_set&); perturbation_set& expand(perturbation_set&); double score(perturbation_set&); bool valid(perturbation_set&); void makeSortedDistFuns(vector<double> &); public: MultiProbe(); ~MultiProbe(); // generation of perturbation sets void generatePerturbationSets(vector<double>& vectorOfBounaryDistances, int numSetsToGenerate); perturbation_set getNextPerturbationSet(); void dump(perturbation_set); size_t size(); // Number of perturbation sets are in the output queue bool empty(); // predicate for empty MultiProbe set }; /* NOTES: Reference: Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li, "Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity Search", Proc. Intl. Conf. VLDB, 2007 i = 1..M (number of hash functions used) f_i(q) = a_i * q + b_i // projection to real line h_i(q) = floor( ( a_i * q + b_i ) / w ) // hash slot delta \in {-1, +1} x_i( delta ) = distance of query to left or right boundary Delta = [delta_1 delta_2 ... delta_M] score(Delta) = sum_{i=1}_M x_i(delta_i).^2 z = sort(x(delta_i), increasing) // i = 1..2M delta={+1,-1} p_j = (i, delta) if z_j == x_i(delta) A_k is index into p_j Multi-probe algorithm (after Lv et al. 2007) --------------------------------------------- A0 = {1} minHeap.insert(A0, score(A0)) for i = 1 to T do // T = number of perturbation sets repeat Ai = minHeap.extractMin() As = shift(Ai) minHeap.insert(As, score(As)) Ae = expand(Ai) minHeap.insert(Ae, score(Ae)) until valid(Ai) output Ai end for */ #endif // __LSH_MULTIPROBE_