view UNIT_TEST_LSH.cpp @ 409:99e6cbad7f76 api-inversion

The lesser of two evils, part 2. Implement paths through audiodb_insert_datum_internal() for databases with O2_FLAG_LARGE_ADB, including in some of the helper functions. Most of the nasty stuff is concentrated in writing out the paths in what is now step 6, and everything else looks much as before, apart from a renumbering of the steps taken. Now we can implement audiodb_insert() for O2_FLAG_LARGE_ADB databases; we need to construct an adb_datum_internal_t from our adb_insert_t, but that's straightforward -- even just about straightforward enough to do it inline. Then audioDB::batchinsert() can be rewritten completely in terms of API functions, and doesn't need any kind of special treatment for the large case. Hooray. The real point of that is of course that we can now delete wodges of dead code, and move out audioDB::insert and audioDB::batchinsert into audioDB.cpp, because all they're doing now is dealing with command-line logic. This point marks the limit of what can be achieved in terms of "API inversion" at this time; the only remaining function, audiodb_query() / audioDB::query cannot be inverted because its API implementation is incomplete. Future plans, in some order: - merge this branch to trunk (check with current API/ABI clients); - complete audiodb_query() implementation; - invert audioDB::query / audiodb_query(); - MORE TESTS; - remove audioDB.cpp from list of files compiled into the library; - implement missing API functions (index, liszt, sample) directly; - source code rearrangement into library and command-line directories; - include bindings to library for some plausible candidate environments (Perl, Python, Lisp, Pd, Max/MSP) as examples; - API documentation.
author mas01cr
date Tue, 09 Dec 2008 22:48:30 +0000
parents 071a108580a4
children
line wrap: on
line source
// UNIT_TEST_LSH.cpp

#include <vector>
#include "lshlib.h"
#include "reporter.h"

#define LSH_IN_CORE


#define N_POINT_BITS 14
#define POINT_BIT_MASK 0x00003FFF

// Callback method for LSH point retrieval
void add_point(void* reporter, Uns32T pointID, Uns32T qpos, float dist)
{
  ReporterBase* pr = (ReporterBase*)reporter;
  pr->add_point(pointID>>N_POINT_BITS, qpos, pointID&POINT_BIT_MASK, dist);
}

int main(int argc, char* argv[]){

  int nT = 100; // num tracks 
  int nP = 1000;  // num points-per-track
  float w = 4.0;// LSH bucket width
  int k = 10;
  int m = 2;
  int d = 10;
  int N = 100000;
  int C = 200;

  float radius = 0.001;
  char FILENAME[] = "foo.lsh";

  assert(nP>=nT);

  int fid = open(FILENAME,O_RDONLY);
  LSH* lsh;
  bool serialized = false;
  Uns32T trackBase = 0;

  if(fid< 0){ // Make a new serial LSH file
    lsh = new LSH(w,k,m,d,N,C,radius);
    assert(lsh);
    cout << "NEW LSH:" << endl;
    }
  else{
    close(fid); // Load LSH structures from disk
    lsh = new LSH(FILENAME); 
    assert(lsh);
    cout << "MERGE WITH EXISTING LSH:" << FILENAME << endl;
    serialized=true;
    trackBase = (lsh->get_maxp()>>N_POINT_BITS)+1; // Our encoding of tracks and points
  }  
  cout << "k:" << lsh->get_numFuns() << " ";
  cout << "L:" << lsh->get_numTables() << " ";
  cout << "d:" << lsh->get_dataDim() << " ";
  cout << "N:" << lsh->get_numRows() << " ";
  cout << "C:" << lsh->get_numCols() << " ";
  cout << "R:" << lsh->get_radius() << " ";
  cout << "p:" << lsh->get_maxp() << endl;
  cout.flush();

  cout << endl << "Constructing " << nT << " tracks with " << nP << " vectors of dimension " << d << endl;
  cout.flush();
  // Construct sets of database vectors, use one point from each set for testing
  vector< vector<float> > vv = vector< vector<float> >(nP); // track vectors
  vector< vector<float> > qq = vector< vector<float> >(nP);// query vectors
  for(int i=0; i< nP ; i++){
    vv[i]=vector<float>(d);  // allocate vector
    qq[i]=vector<float>(d);  // allocate vector
  }
  
  for(int k = 0 ; k < nT ; k ++){
    cout << "[" << k << "]";
    cout.flush();
    for(int i = 0 ; i< nP ; i++)
      for(int j=0; j< d ; j++)
	vv[i][j] =   genrand_real2() / radius; // MT_19937 random numbers
    lsh->insert_point_set(vv, (trackBase+k)<<N_POINT_BITS);
    qq[k] = vv[k]; // One identity query per set of database vectors
  }
  cout << endl;
  cout.flush();

  cout << "Writing serialized LSH tables..." << endl;
  // TEST SERIALIZED LSH RETRIEVAL
  lsh->serialize(FILENAME);

  // TEST LSH RETRIEVAL IN CORE
  printf("\n********** In-core LSH retrieval from %d track%c **********\n", 
	 (lsh->get_maxp()>>N_POINT_BITS)+1,(lsh->get_maxp()>>N_POINT_BITS)>0?'s':' ');
  fflush(stdout);  
  for(int i = 0; i < nT ; i++ ){
    trackSequenceQueryRadNNReporter* pr = new trackSequenceQueryRadNNReporter(nP,nT,(lsh->get_maxp()>>N_POINT_BITS)+1); 
    lsh->retrieve_point(qq[i], i, &add_point, (void*)pr); // LSH point retrieval from core
    printf("query vector %d] t1:%u t2:%0X\n", i, lsh->get_t1(), lsh->get_t2());
    fflush(stdout);
    pr->report(0,0);
    delete pr;
  }
  delete lsh;

  cout << "Loading Serialized LSH functions from disk ..." << endl;
  cout.flush();
  lsh = new LSH(FILENAME);
  assert(lsh);  
  //  lsh->serial_dump_tables(FILENAME);
  printf("\n********** Serialized LSH retrieval from %d track%c **********\n", (lsh->get_maxp()>>N_POINT_BITS)+1,(lsh->get_maxp()>>N_POINT_BITS)>1?'s':' ');
  fflush(stdout);  
  for(int i= 0; i < nT ; i++ ){
    trackSequenceQueryRadNNReporter* pr = new trackSequenceQueryRadNNReporter(nP,nT,(lsh->get_maxp()>>N_POINT_BITS)+1); 
    lsh->serial_retrieve_point(FILENAME, qq[i], i, &add_point, (void*) pr); // LSH serialized point retrieval method  
    printf("query vector %d] t1:%u t2:%0X\n", i, lsh->get_t1(), lsh->get_t2());
    fflush(stdout);
    pr->report(0,0);
    delete pr;
  }  
  delete lsh;

#ifdef LSH_IN_CORE  
  cout << "Loading Serialized LSH functions and tables from disk ..." << endl;
  cout.flush();
  // Unserialize entire lsh tree to core
  lsh = new LSH(FILENAME,1);
  
  // TEST UNSERIALIZED LSH RETRIEVAL IN CORE
  printf("\n********** Unserialized LSH in-core retrieval from %d track%c **********\n", (lsh->get_maxp()>>N_POINT_BITS)+1,(lsh->get_maxp()>>N_POINT_BITS)>1?'s':' ');
  fflush(stdout);
  for(int i = 0; i < nT ; i++ ){
    trackSequenceQueryRadNNReporter* pr = new trackSequenceQueryRadNNReporter(nP,nT,(lsh->get_maxp()>>N_POINT_BITS)+1); 
    lsh->retrieve_point(qq[i], i, &add_point, (void*) pr); // LSH point retrieval from core
    printf("query vector %d] t1:%u t2:%0X\n", i, lsh->get_t1(), lsh->get_t2());
    fflush(stdout);
    pr->report(0,0);
    delete pr;
  }  
  delete lsh;
#endif

}