view multiprobe.cpp @ 516:2a7bad47a4a7 multiprobeLSH

New branch to implement multiprobe LSH. Copy of trunk:802. Added multiprobe.{cpp,h} source files.
author mas01mc
date Sat, 24 Jan 2009 19:51:46 +0000
parents
children 807c8be7dd45
line wrap: on
line source
/*
 * MultiProbe C++ class
 *
 * Given a LSH structure, perform lookup by probing nearby hash-function locations
 *
 * Implementation using C++ STL
 *
 * Reference:
 * Qin Lv, William Josephson, Zhe Wang, Moses Charikar and Kai Li,
 * "Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity
 * Search", Proc. Intl. Conf. VLDB, 2007
 *
 *
 * Copyright (C) 2009 Michael Casey, Dartmouth College, All Rights Reserved
 * License: GNU Public License 2.0
 *
 */

#include "multiprobe.h"


#define _TEST_MP_LSH

MultiProbe::MultiProbe(){
  minHeap = new min_heap_of_perturbation_set();
  outSets = new vector_of_perturbation_set();
}

MultiProbe::~MultiProbe(){  
  // FIXME: Are these arrays ?  
  delete minHeap;
  delete outSets;
}

perturbation_set& MultiProbe::shift(perturbation_set a){  
  // a[a.length]++
}

perturbation_set& MultiProbe::expand(perturbation_set a){
  // a[a.length+1]=a[a.length]+1;
}

// Generate the optimal T perturbation sets for current query
// pre-conditions:
//   an LSH structure was initialized and passed to constructor
//   a query vector was passed to lsh->compute_hash_functions()
//   the query-to-boundary distances are stored in x[hashFunIndex]
//
// post-conditions:
//
//  generates an ordered list of perturbation sets (stored as an array of sets)
//  these are indexes into pi_j=(i,delta) pairs representing x_i(delta) in sort order z_j
//  data structures are cleared and reset to zeros thereby making them re-entrant
//
void MultiProbe::generatePerturbationSets(int T, vector<int>& x){
  perturbation_set ai,as,ae;

  distFuns = makeSortedDistFuns(x); // NEED TO IMPLEMENT, distances from query to boundaries

  ai.insert(1);
  minHeap->push(make_pair(perturbation_set(ai), score(ai))); // unique instance stored in mhe

  for(int i = 0 ; i < T ; i++ ){
    do{
      min_heap_element mhe = minHeap->top();
      minHeap->pop();
      ai = mhe.first;
      as = shift(perturbation_set(ai));
      minHeap->push(make_pair(perturbation_set(as), score(as)));
      ae = expand(perturbation_set(ai));
      minHeap->push(make_pair(perturbation_set(ae), score(ae)));
    }while(!valid(ai));
    outSets->push_back(ai); // Ordered list of perturbation sets
  }
}


// Take the list of distances (x) assuming list len is 2M and
// delta = (-1)^i, i = { 0 .. 2M-1 }
sorted_distance_functions& MultiProbe::makeSortedDistFuns(vector<int>& x){
  // Change these to vector<pair<double, pair<int,int> > >
  sorted_distance_functions* distFuns = new sorted_distance_functions[x.size()];
  for(int i = 0; i < x.size() ; i++){  
    distFuns[i] = make_pair(x[i], make_pair(i, i%2?-1:+1));
  }
}

double MultiProbe::score(perturbation_set& a){
  // Needs implementing
}

bool MultiProbe::valid(perturbation_set& a){


  // A valid set must have at most one
  // of the two elements {j, 2M + 1 - j} for every j
  //
  // A perturbation set containing an element > 2M is invalid

  

}

#ifdef _TEST_MP_LSH
int main(const int argc, const char* argv[]){

  MultiProbe mp();

}
#endif