annotate UNIT_TEST_LSH.cpp @ 770:c54bc2ffbf92 tip

update tags
author convert-repo
date Fri, 16 Dec 2011 11:34:01 +0000
parents 071a108580a4
children
rev   line source
mas01mc@292 1 // UNIT_TEST_LSH.cpp
mas01mc@292 2
mas01mc@292 3 #include <vector>
mas01mc@292 4 #include "lshlib.h"
mas01mc@292 5 #include "reporter.h"
mas01mc@292 6
mas01mc@292 7 #define LSH_IN_CORE
mas01mc@292 8
mas01mc@292 9
mas01mc@292 10 #define N_POINT_BITS 14
mas01mc@292 11 #define POINT_BIT_MASK 0x00003FFF
mas01mc@292 12
mas01mc@292 13 // Callback method for LSH point retrieval
mas01mc@292 14 void add_point(void* reporter, Uns32T pointID, Uns32T qpos, float dist)
mas01mc@292 15 {
mas01mc@292 16 ReporterBase* pr = (ReporterBase*)reporter;
mas01mc@292 17 pr->add_point(pointID>>N_POINT_BITS, qpos, pointID&POINT_BIT_MASK, dist);
mas01mc@292 18 }
mas01mc@292 19
mas01mc@292 20 int main(int argc, char* argv[]){
mas01mc@292 21
mas01mc@292 22 int nT = 100; // num tracks
mas01mc@292 23 int nP = 1000; // num points-per-track
mas01mc@292 24 float w = 4.0;// LSH bucket width
mas01mc@292 25 int k = 10;
mas01mc@292 26 int m = 2;
mas01mc@292 27 int d = 10;
mas01mc@292 28 int N = 100000;
mas01mc@292 29 int C = 200;
mas01mc@292 30
mas01mc@292 31 float radius = 0.001;
mas01mc@292 32 char FILENAME[] = "foo.lsh";
mas01mc@292 33
mas01mc@292 34 assert(nP>=nT);
mas01mc@292 35
mas01mc@292 36 int fid = open(FILENAME,O_RDONLY);
mas01mc@292 37 LSH* lsh;
mas01mc@292 38 bool serialized = false;
mas01mc@292 39 Uns32T trackBase = 0;
mas01mc@292 40
mas01mc@292 41 if(fid< 0){ // Make a new serial LSH file
mas01mc@292 42 lsh = new LSH(w,k,m,d,N,C,radius);
mas01mc@292 43 assert(lsh);
mas01mc@292 44 cout << "NEW LSH:" << endl;
mas01mc@292 45 }
mas01mc@292 46 else{
mas01mc@292 47 close(fid); // Load LSH structures from disk
mas01mc@292 48 lsh = new LSH(FILENAME);
mas01mc@292 49 assert(lsh);
mas01mc@292 50 cout << "MERGE WITH EXISTING LSH:" << FILENAME << endl;
mas01mc@292 51 serialized=true;
mas01mc@292 52 trackBase = (lsh->get_maxp()>>N_POINT_BITS)+1; // Our encoding of tracks and points
mas01mc@292 53 }
mas01mc@294 54 cout << "k:" << lsh->get_numFuns() << " ";
mas01mc@294 55 cout << "L:" << lsh->get_numTables() << " ";
mas01mc@294 56 cout << "d:" << lsh->get_dataDim() << " ";
mas01mc@294 57 cout << "N:" << lsh->get_numRows() << " ";
mas01mc@294 58 cout << "C:" << lsh->get_numCols() << " ";
mas01mc@294 59 cout << "R:" << lsh->get_radius() << " ";
mas01mc@294 60 cout << "p:" << lsh->get_maxp() << endl;
mas01mc@292 61 cout.flush();
mas01mc@292 62
mas01mc@292 63 cout << endl << "Constructing " << nT << " tracks with " << nP << " vectors of dimension " << d << endl;
mas01mc@292 64 cout.flush();
mas01mc@292 65 // Construct sets of database vectors, use one point from each set for testing
mas01mc@292 66 vector< vector<float> > vv = vector< vector<float> >(nP); // track vectors
mas01mc@292 67 vector< vector<float> > qq = vector< vector<float> >(nP);// query vectors
mas01mc@292 68 for(int i=0; i< nP ; i++){
mas01mc@292 69 vv[i]=vector<float>(d); // allocate vector
mas01mc@292 70 qq[i]=vector<float>(d); // allocate vector
mas01mc@292 71 }
mas01mc@292 72
mas01mc@292 73 for(int k = 0 ; k < nT ; k ++){
mas01mc@292 74 cout << "[" << k << "]";
mas01mc@292 75 cout.flush();
mas01mc@292 76 for(int i = 0 ; i< nP ; i++)
mas01mc@292 77 for(int j=0; j< d ; j++)
mas01mc@292 78 vv[i][j] = genrand_real2() / radius; // MT_19937 random numbers
mas01mc@292 79 lsh->insert_point_set(vv, (trackBase+k)<<N_POINT_BITS);
mas01mc@292 80 qq[k] = vv[k]; // One identity query per set of database vectors
mas01mc@292 81 }
mas01mc@292 82 cout << endl;
mas01mc@292 83 cout.flush();
mas01mc@292 84
mas01mc@292 85 cout << "Writing serialized LSH tables..." << endl;
mas01mc@292 86 // TEST SERIALIZED LSH RETRIEVAL
mas01mc@292 87 lsh->serialize(FILENAME);
mas01mc@292 88
mas01mc@292 89 // TEST LSH RETRIEVAL IN CORE
mas01mc@292 90 printf("\n********** In-core LSH retrieval from %d track%c **********\n",
mas01mc@292 91 (lsh->get_maxp()>>N_POINT_BITS)+1,(lsh->get_maxp()>>N_POINT_BITS)>0?'s':' ');
mas01mc@292 92 fflush(stdout);
mas01mc@292 93 for(int i = 0; i < nT ; i++ ){
mas01mc@292 94 trackSequenceQueryRadNNReporter* pr = new trackSequenceQueryRadNNReporter(nP,nT,(lsh->get_maxp()>>N_POINT_BITS)+1);
mas01mc@292 95 lsh->retrieve_point(qq[i], i, &add_point, (void*)pr); // LSH point retrieval from core
mas01mc@292 96 printf("query vector %d] t1:%u t2:%0X\n", i, lsh->get_t1(), lsh->get_t2());
mas01mc@292 97 fflush(stdout);
mas01mc@292 98 pr->report(0,0);
mas01mc@292 99 delete pr;
mas01mc@292 100 }
mas01mc@292 101 delete lsh;
mas01mc@292 102
mas01mc@292 103 cout << "Loading Serialized LSH functions from disk ..." << endl;
mas01mc@292 104 cout.flush();
mas01mc@292 105 lsh = new LSH(FILENAME);
mas01mc@292 106 assert(lsh);
mas01mc@292 107 // lsh->serial_dump_tables(FILENAME);
mas01mc@292 108 printf("\n********** Serialized LSH retrieval from %d track%c **********\n", (lsh->get_maxp()>>N_POINT_BITS)+1,(lsh->get_maxp()>>N_POINT_BITS)>1?'s':' ');
mas01mc@292 109 fflush(stdout);
mas01mc@292 110 for(int i= 0; i < nT ; i++ ){
mas01mc@292 111 trackSequenceQueryRadNNReporter* pr = new trackSequenceQueryRadNNReporter(nP,nT,(lsh->get_maxp()>>N_POINT_BITS)+1);
mas01mc@292 112 lsh->serial_retrieve_point(FILENAME, qq[i], i, &add_point, (void*) pr); // LSH serialized point retrieval method
mas01mc@292 113 printf("query vector %d] t1:%u t2:%0X\n", i, lsh->get_t1(), lsh->get_t2());
mas01mc@292 114 fflush(stdout);
mas01mc@292 115 pr->report(0,0);
mas01mc@292 116 delete pr;
mas01mc@292 117 }
mas01mc@292 118 delete lsh;
mas01mc@292 119
mas01mc@292 120 #ifdef LSH_IN_CORE
mas01mc@292 121 cout << "Loading Serialized LSH functions and tables from disk ..." << endl;
mas01mc@292 122 cout.flush();
mas01mc@292 123 // Unserialize entire lsh tree to core
mas01mc@292 124 lsh = new LSH(FILENAME,1);
mas01mc@292 125
mas01mc@292 126 // TEST UNSERIALIZED LSH RETRIEVAL IN CORE
mas01mc@292 127 printf("\n********** Unserialized LSH in-core retrieval from %d track%c **********\n", (lsh->get_maxp()>>N_POINT_BITS)+1,(lsh->get_maxp()>>N_POINT_BITS)>1?'s':' ');
mas01mc@292 128 fflush(stdout);
mas01mc@292 129 for(int i = 0; i < nT ; i++ ){
mas01mc@292 130 trackSequenceQueryRadNNReporter* pr = new trackSequenceQueryRadNNReporter(nP,nT,(lsh->get_maxp()>>N_POINT_BITS)+1);
mas01mc@292 131 lsh->retrieve_point(qq[i], i, &add_point, (void*) pr); // LSH point retrieval from core
mas01mc@292 132 printf("query vector %d] t1:%u t2:%0X\n", i, lsh->get_t1(), lsh->get_t2());
mas01mc@292 133 fflush(stdout);
mas01mc@292 134 pr->report(0,0);
mas01mc@292 135 delete pr;
mas01mc@292 136 }
mas01mc@292 137 delete lsh;
mas01mc@292 138 #endif
mas01mc@292 139
mas01mc@292 140 }