view UNIT_TEST_LSH.cpp @ 584:e3790284fd4a

Merged through librdf storage hooks and apache2 module. Squashed commit of the following: commit a6cfca8f04036e12e7d7fcd55c47224e802582f0 Author: Michael Jewell <mjewell@harrison.(none)> Date: Fri Jul 31 15:23:32 2009 +0100 Removed leftover bits and bobs. commit f1f0dd074d0767de3e24ba636779fd8701d73d9e Author: Michael Jewell <mjewell@harrison.(none)> Date: Fri Jul 31 15:07:20 2009 +0100 Simple test of database creation via librdf. commit 90e6350538e004d8785137e5ff2ac878c22a5d42 Author: Michael Jewell <mjewell@harrison.(none)> Date: Fri Jul 31 15:05:10 2009 +0100 Added the apache2 module which will hook into the librdf storage module commit c75bf53763b7078c83ae97fcf247da2576baa79a Author: Michael Jewell <mjewell@harrison.(none)> Date: Fri Jul 31 15:04:53 2009 +0100 Added sparql librdf source - requires the librdf sources to compile. commit 0646f0190112a73ddb2533537e2cc9832c066b52 Author: Michael Jewell <mjewell@harrison.(none)> Date: Mon Jul 27 12:12:26 2009 +0100 Adding execution to mod_audiodb commit 8f83f27ba4d917278bca0c7cb665d930e28c86df Author: Michael Jewell <mjewell@harrison.(none)> Date: Wed Jul 22 12:15:57 2009 +0100 Some initial returns for the sparql handler. commit dc639aed11943a5b0c379eb47cf293f76908b1b7 Author: Michael Jewell <mjewell@harrison.(none)> Date: Wed Jul 22 12:06:20 2009 +0100 Added a little setup.sh script to do libtoolize/autoconf etc. commit 3a679da499db647fc82cf2797daeb5cc44ed7655 Author: Michael Jewell <mjewell@harrison.(none)> Date: Wed Jul 22 12:03:42 2009 +0100 Adding initial bits for apache mod
author mas01mj
date Fri, 31 Jul 2009 14:36:12 +0000
parents 071a108580a4
children
line wrap: on
line source
// UNIT_TEST_LSH.cpp

#include <vector>
#include "lshlib.h"
#include "reporter.h"

#define LSH_IN_CORE


#define N_POINT_BITS 14
#define POINT_BIT_MASK 0x00003FFF

// Callback method for LSH point retrieval
void add_point(void* reporter, Uns32T pointID, Uns32T qpos, float dist)
{
  ReporterBase* pr = (ReporterBase*)reporter;
  pr->add_point(pointID>>N_POINT_BITS, qpos, pointID&POINT_BIT_MASK, dist);
}

int main(int argc, char* argv[]){

  int nT = 100; // num tracks 
  int nP = 1000;  // num points-per-track
  float w = 4.0;// LSH bucket width
  int k = 10;
  int m = 2;
  int d = 10;
  int N = 100000;
  int C = 200;

  float radius = 0.001;
  char FILENAME[] = "foo.lsh";

  assert(nP>=nT);

  int fid = open(FILENAME,O_RDONLY);
  LSH* lsh;
  bool serialized = false;
  Uns32T trackBase = 0;

  if(fid< 0){ // Make a new serial LSH file
    lsh = new LSH(w,k,m,d,N,C,radius);
    assert(lsh);
    cout << "NEW LSH:" << endl;
    }
  else{
    close(fid); // Load LSH structures from disk
    lsh = new LSH(FILENAME); 
    assert(lsh);
    cout << "MERGE WITH EXISTING LSH:" << FILENAME << endl;
    serialized=true;
    trackBase = (lsh->get_maxp()>>N_POINT_BITS)+1; // Our encoding of tracks and points
  }  
  cout << "k:" << lsh->get_numFuns() << " ";
  cout << "L:" << lsh->get_numTables() << " ";
  cout << "d:" << lsh->get_dataDim() << " ";
  cout << "N:" << lsh->get_numRows() << " ";
  cout << "C:" << lsh->get_numCols() << " ";
  cout << "R:" << lsh->get_radius() << " ";
  cout << "p:" << lsh->get_maxp() << endl;
  cout.flush();

  cout << endl << "Constructing " << nT << " tracks with " << nP << " vectors of dimension " << d << endl;
  cout.flush();
  // Construct sets of database vectors, use one point from each set for testing
  vector< vector<float> > vv = vector< vector<float> >(nP); // track vectors
  vector< vector<float> > qq = vector< vector<float> >(nP);// query vectors
  for(int i=0; i< nP ; i++){
    vv[i]=vector<float>(d);  // allocate vector
    qq[i]=vector<float>(d);  // allocate vector
  }
  
  for(int k = 0 ; k < nT ; k ++){
    cout << "[" << k << "]";
    cout.flush();
    for(int i = 0 ; i< nP ; i++)
      for(int j=0; j< d ; j++)
	vv[i][j] =   genrand_real2() / radius; // MT_19937 random numbers
    lsh->insert_point_set(vv, (trackBase+k)<<N_POINT_BITS);
    qq[k] = vv[k]; // One identity query per set of database vectors
  }
  cout << endl;
  cout.flush();

  cout << "Writing serialized LSH tables..." << endl;
  // TEST SERIALIZED LSH RETRIEVAL
  lsh->serialize(FILENAME);

  // TEST LSH RETRIEVAL IN CORE
  printf("\n********** In-core LSH retrieval from %d track%c **********\n", 
	 (lsh->get_maxp()>>N_POINT_BITS)+1,(lsh->get_maxp()>>N_POINT_BITS)>0?'s':' ');
  fflush(stdout);  
  for(int i = 0; i < nT ; i++ ){
    trackSequenceQueryRadNNReporter* pr = new trackSequenceQueryRadNNReporter(nP,nT,(lsh->get_maxp()>>N_POINT_BITS)+1); 
    lsh->retrieve_point(qq[i], i, &add_point, (void*)pr); // LSH point retrieval from core
    printf("query vector %d] t1:%u t2:%0X\n", i, lsh->get_t1(), lsh->get_t2());
    fflush(stdout);
    pr->report(0,0);
    delete pr;
  }
  delete lsh;

  cout << "Loading Serialized LSH functions from disk ..." << endl;
  cout.flush();
  lsh = new LSH(FILENAME);
  assert(lsh);  
  //  lsh->serial_dump_tables(FILENAME);
  printf("\n********** Serialized LSH retrieval from %d track%c **********\n", (lsh->get_maxp()>>N_POINT_BITS)+1,(lsh->get_maxp()>>N_POINT_BITS)>1?'s':' ');
  fflush(stdout);  
  for(int i= 0; i < nT ; i++ ){
    trackSequenceQueryRadNNReporter* pr = new trackSequenceQueryRadNNReporter(nP,nT,(lsh->get_maxp()>>N_POINT_BITS)+1); 
    lsh->serial_retrieve_point(FILENAME, qq[i], i, &add_point, (void*) pr); // LSH serialized point retrieval method  
    printf("query vector %d] t1:%u t2:%0X\n", i, lsh->get_t1(), lsh->get_t2());
    fflush(stdout);
    pr->report(0,0);
    delete pr;
  }  
  delete lsh;

#ifdef LSH_IN_CORE  
  cout << "Loading Serialized LSH functions and tables from disk ..." << endl;
  cout.flush();
  // Unserialize entire lsh tree to core
  lsh = new LSH(FILENAME,1);
  
  // TEST UNSERIALIZED LSH RETRIEVAL IN CORE
  printf("\n********** Unserialized LSH in-core retrieval from %d track%c **********\n", (lsh->get_maxp()>>N_POINT_BITS)+1,(lsh->get_maxp()>>N_POINT_BITS)>1?'s':' ');
  fflush(stdout);
  for(int i = 0; i < nT ; i++ ){
    trackSequenceQueryRadNNReporter* pr = new trackSequenceQueryRadNNReporter(nP,nT,(lsh->get_maxp()>>N_POINT_BITS)+1); 
    lsh->retrieve_point(qq[i], i, &add_point, (void*) pr); // LSH point retrieval from core
    printf("query vector %d] t1:%u t2:%0X\n", i, lsh->get_t1(), lsh->get_t2());
    fflush(stdout);
    pr->report(0,0);
    delete pr;
  }  
  delete lsh;
#endif

}