mas01mc@292
|
1 // UNIT_TEST_LSH.cpp
|
mas01mc@292
|
2
|
mas01mc@292
|
3 #include <vector>
|
mas01mc@292
|
4 #include "lshlib.h"
|
mas01mc@292
|
5 #include "reporter.h"
|
mas01mc@292
|
6
|
mas01mc@292
|
7 #define LSH_IN_CORE
|
mas01mc@292
|
8
|
mas01mc@292
|
9
|
mas01mc@292
|
10 #define N_POINT_BITS 14
|
mas01mc@292
|
11 #define POINT_BIT_MASK 0x00003FFF
|
mas01mc@292
|
12
|
mas01mc@292
|
13 // Callback method for LSH point retrieval
|
mas01mc@292
|
14 void add_point(void* reporter, Uns32T pointID, Uns32T qpos, float dist)
|
mas01mc@292
|
15 {
|
mas01mc@292
|
16 ReporterBase* pr = (ReporterBase*)reporter;
|
mas01mc@292
|
17 pr->add_point(pointID>>N_POINT_BITS, qpos, pointID&POINT_BIT_MASK, dist);
|
mas01mc@292
|
18 }
|
mas01mc@292
|
19
|
mas01mc@292
|
20 int main(int argc, char* argv[]){
|
mas01mc@292
|
21
|
mas01mc@292
|
22 int nT = 100; // num tracks
|
mas01mc@292
|
23 int nP = 1000; // num points-per-track
|
mas01mc@292
|
24 float w = 4.0;// LSH bucket width
|
mas01mc@292
|
25 int k = 10;
|
mas01mc@292
|
26 int m = 2;
|
mas01mc@292
|
27 int d = 10;
|
mas01mc@292
|
28 int N = 100000;
|
mas01mc@292
|
29 int C = 200;
|
mas01mc@292
|
30
|
mas01mc@292
|
31 float radius = 0.001;
|
mas01mc@292
|
32 char FILENAME[] = "foo.lsh";
|
mas01mc@292
|
33
|
mas01mc@292
|
34 assert(nP>=nT);
|
mas01mc@292
|
35
|
mas01mc@292
|
36 int fid = open(FILENAME,O_RDONLY);
|
mas01mc@292
|
37 LSH* lsh;
|
mas01mc@292
|
38 bool serialized = false;
|
mas01mc@292
|
39 Uns32T trackBase = 0;
|
mas01mc@292
|
40
|
mas01mc@292
|
41 if(fid< 0){ // Make a new serial LSH file
|
mas01mc@292
|
42 lsh = new LSH(w,k,m,d,N,C,radius);
|
mas01mc@292
|
43 assert(lsh);
|
mas01mc@292
|
44 cout << "NEW LSH:" << endl;
|
mas01mc@292
|
45 }
|
mas01mc@292
|
46 else{
|
mas01mc@292
|
47 close(fid); // Load LSH structures from disk
|
mas01mc@292
|
48 lsh = new LSH(FILENAME);
|
mas01mc@292
|
49 assert(lsh);
|
mas01mc@292
|
50 cout << "MERGE WITH EXISTING LSH:" << FILENAME << endl;
|
mas01mc@292
|
51 serialized=true;
|
mas01mc@292
|
52 trackBase = (lsh->get_maxp()>>N_POINT_BITS)+1; // Our encoding of tracks and points
|
mas01mc@292
|
53 }
|
mas01mc@294
|
54 cout << "k:" << lsh->get_numFuns() << " ";
|
mas01mc@294
|
55 cout << "L:" << lsh->get_numTables() << " ";
|
mas01mc@294
|
56 cout << "d:" << lsh->get_dataDim() << " ";
|
mas01mc@294
|
57 cout << "N:" << lsh->get_numRows() << " ";
|
mas01mc@294
|
58 cout << "C:" << lsh->get_numCols() << " ";
|
mas01mc@294
|
59 cout << "R:" << lsh->get_radius() << " ";
|
mas01mc@294
|
60 cout << "p:" << lsh->get_maxp() << endl;
|
mas01mc@292
|
61 cout.flush();
|
mas01mc@292
|
62
|
mas01mc@292
|
63 cout << endl << "Constructing " << nT << " tracks with " << nP << " vectors of dimension " << d << endl;
|
mas01mc@292
|
64 cout.flush();
|
mas01mc@292
|
65 // Construct sets of database vectors, use one point from each set for testing
|
mas01mc@292
|
66 vector< vector<float> > vv = vector< vector<float> >(nP); // track vectors
|
mas01mc@292
|
67 vector< vector<float> > qq = vector< vector<float> >(nP);// query vectors
|
mas01mc@292
|
68 for(int i=0; i< nP ; i++){
|
mas01mc@292
|
69 vv[i]=vector<float>(d); // allocate vector
|
mas01mc@292
|
70 qq[i]=vector<float>(d); // allocate vector
|
mas01mc@292
|
71 }
|
mas01mc@292
|
72
|
mas01mc@292
|
73 for(int k = 0 ; k < nT ; k ++){
|
mas01mc@292
|
74 cout << "[" << k << "]";
|
mas01mc@292
|
75 cout.flush();
|
mas01mc@292
|
76 for(int i = 0 ; i< nP ; i++)
|
mas01mc@292
|
77 for(int j=0; j< d ; j++)
|
mas01mc@292
|
78 vv[i][j] = genrand_real2() / radius; // MT_19937 random numbers
|
mas01mc@292
|
79 lsh->insert_point_set(vv, (trackBase+k)<<N_POINT_BITS);
|
mas01mc@292
|
80 qq[k] = vv[k]; // One identity query per set of database vectors
|
mas01mc@292
|
81 }
|
mas01mc@292
|
82 cout << endl;
|
mas01mc@292
|
83 cout.flush();
|
mas01mc@292
|
84
|
mas01mc@292
|
85 cout << "Writing serialized LSH tables..." << endl;
|
mas01mc@292
|
86 // TEST SERIALIZED LSH RETRIEVAL
|
mas01mc@292
|
87 lsh->serialize(FILENAME);
|
mas01mc@292
|
88
|
mas01mc@292
|
89 // TEST LSH RETRIEVAL IN CORE
|
mas01mc@292
|
90 printf("\n********** In-core LSH retrieval from %d track%c **********\n",
|
mas01mc@292
|
91 (lsh->get_maxp()>>N_POINT_BITS)+1,(lsh->get_maxp()>>N_POINT_BITS)>0?'s':' ');
|
mas01mc@292
|
92 fflush(stdout);
|
mas01mc@292
|
93 for(int i = 0; i < nT ; i++ ){
|
mas01mc@292
|
94 trackSequenceQueryRadNNReporter* pr = new trackSequenceQueryRadNNReporter(nP,nT,(lsh->get_maxp()>>N_POINT_BITS)+1);
|
mas01mc@292
|
95 lsh->retrieve_point(qq[i], i, &add_point, (void*)pr); // LSH point retrieval from core
|
mas01mc@292
|
96 printf("query vector %d] t1:%u t2:%0X\n", i, lsh->get_t1(), lsh->get_t2());
|
mas01mc@292
|
97 fflush(stdout);
|
mas01mc@292
|
98 pr->report(0,0);
|
mas01mc@292
|
99 delete pr;
|
mas01mc@292
|
100 }
|
mas01mc@292
|
101 delete lsh;
|
mas01mc@292
|
102
|
mas01mc@292
|
103 cout << "Loading Serialized LSH functions from disk ..." << endl;
|
mas01mc@292
|
104 cout.flush();
|
mas01mc@292
|
105 lsh = new LSH(FILENAME);
|
mas01mc@292
|
106 assert(lsh);
|
mas01mc@292
|
107 // lsh->serial_dump_tables(FILENAME);
|
mas01mc@292
|
108 printf("\n********** Serialized LSH retrieval from %d track%c **********\n", (lsh->get_maxp()>>N_POINT_BITS)+1,(lsh->get_maxp()>>N_POINT_BITS)>1?'s':' ');
|
mas01mc@292
|
109 fflush(stdout);
|
mas01mc@292
|
110 for(int i= 0; i < nT ; i++ ){
|
mas01mc@292
|
111 trackSequenceQueryRadNNReporter* pr = new trackSequenceQueryRadNNReporter(nP,nT,(lsh->get_maxp()>>N_POINT_BITS)+1);
|
mas01mc@292
|
112 lsh->serial_retrieve_point(FILENAME, qq[i], i, &add_point, (void*) pr); // LSH serialized point retrieval method
|
mas01mc@292
|
113 printf("query vector %d] t1:%u t2:%0X\n", i, lsh->get_t1(), lsh->get_t2());
|
mas01mc@292
|
114 fflush(stdout);
|
mas01mc@292
|
115 pr->report(0,0);
|
mas01mc@292
|
116 delete pr;
|
mas01mc@292
|
117 }
|
mas01mc@292
|
118 delete lsh;
|
mas01mc@292
|
119
|
mas01mc@292
|
120 #ifdef LSH_IN_CORE
|
mas01mc@292
|
121 cout << "Loading Serialized LSH functions and tables from disk ..." << endl;
|
mas01mc@292
|
122 cout.flush();
|
mas01mc@292
|
123 // Unserialize entire lsh tree to core
|
mas01mc@292
|
124 lsh = new LSH(FILENAME,1);
|
mas01mc@292
|
125
|
mas01mc@292
|
126 // TEST UNSERIALIZED LSH RETRIEVAL IN CORE
|
mas01mc@292
|
127 printf("\n********** Unserialized LSH in-core retrieval from %d track%c **********\n", (lsh->get_maxp()>>N_POINT_BITS)+1,(lsh->get_maxp()>>N_POINT_BITS)>1?'s':' ');
|
mas01mc@292
|
128 fflush(stdout);
|
mas01mc@292
|
129 for(int i = 0; i < nT ; i++ ){
|
mas01mc@292
|
130 trackSequenceQueryRadNNReporter* pr = new trackSequenceQueryRadNNReporter(nP,nT,(lsh->get_maxp()>>N_POINT_BITS)+1);
|
mas01mc@292
|
131 lsh->retrieve_point(qq[i], i, &add_point, (void*) pr); // LSH point retrieval from core
|
mas01mc@292
|
132 printf("query vector %d] t1:%u t2:%0X\n", i, lsh->get_t1(), lsh->get_t2());
|
mas01mc@292
|
133 fflush(stdout);
|
mas01mc@292
|
134 pr->report(0,0);
|
mas01mc@292
|
135 delete pr;
|
mas01mc@292
|
136 }
|
mas01mc@292
|
137 delete lsh;
|
mas01mc@292
|
138 #endif
|
mas01mc@292
|
139
|
mas01mc@292
|
140 }
|