comparison index.cpp @ 308:896679d8cc39

Added server-side loading of persistent index (LSH hashtables) via --load_index -d dbName -R radius -l sequenceLength. Queries using these parameters will lookup the memory-resident hashtable instead of loading one from disk.
author mas01mc
date Thu, 07 Aug 2008 01:53:38 +0000
parents 8cec6eb40526
children cac5b3465318
comparison
equal deleted inserted replaced
307:d1b8b2dec37e 308:896679d8cc39
48 48
49 if(lshfid<0) 49 if(lshfid<0)
50 return false; 50 return false;
51 else 51 else
52 return true; 52 return true;
53 }
54
55 LSH* audioDB::index_allocate(char* indexName, bool load_hashTables){
56 LSH* gIndx=SERVER_LSH_INDEX_SINGLETON;
57 if(isServer && gIndx && (strncmp(gIndx->get_indexName(), indexName, MAXSTR)==0) )
58 audioDB::lsh = gIndx; // Use the global SERVER resident index
59 else{
60 if(audioDB::lsh)
61 delete audioDB::lsh;
62 audioDB::lsh = new LSH(indexName, load_hashTables);
63 }
64 assert(audioDB::lsh);
65 return audioDB::lsh;
53 } 66 }
54 67
55 vector<vector<float> >* audioDB::index_initialize_shingles(Uns32T sz){ 68 vector<vector<float> >* audioDB::index_initialize_shingles(Uns32T sz){
56 if(vv) 69 if(vv)
57 delete vv; 70 delete vv;
150 index_insert_tracks(0, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr); 163 index_insert_tracks(0, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr);
151 lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); 164 lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1);
152 165
153 // Clean up 166 // Clean up
154 delete lsh; 167 delete lsh;
168 lsh = 0;
155 close(lshfid); 169 close(lshfid);
156 } 170 }
157 171
158 // Attempt to open LSH file 172 // Attempt to open LSH file
159 if((lshfid = open(newIndexName,O_RDONLY))>0){ 173 if((lshfid = open(newIndexName,O_RDONLY))>0){
163 // Get the lsh header info and find how many tracks are inserted already 177 // Get the lsh header info and find how many tracks are inserted already
164 lsh = new LSH(newIndexName, false); // lshInCore=false to avoid loading hashTables here 178 lsh = new LSH(newIndexName, false); // lshInCore=false to avoid loading hashTables here
165 assert(lsh); 179 assert(lsh);
166 Uns32T maxs = index_to_trackID(lsh->get_maxp())+1; 180 Uns32T maxs = index_to_trackID(lsh->get_maxp())+1;
167 delete lsh; 181 delete lsh;
182 lsh = 0;
168 183
169 // This allows for updating index after more tracks are inserted into audioDB 184 // This allows for updating index after more tracks are inserted into audioDB
170 for(Uns32T startTrack = maxs; startTrack < dbH->numFiles; startTrack+=lsh_param_b){ 185 for(Uns32T startTrack = maxs; startTrack < dbH->numFiles; startTrack+=lsh_param_b){
171 186
172 Uns32T endTrack = startTrack + lsh_param_b; 187 Uns32T endTrack = startTrack + lsh_param_b;
181 index_insert_tracks(startTrack, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr); 196 index_insert_tracks(startTrack, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr);
182 197
183 // Serialize to file 198 // Serialize to file
184 lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); // Serialize core LSH heap to disk 199 lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); // Serialize core LSH heap to disk
185 delete lsh; 200 delete lsh;
201 lsh = 0;
186 } 202 }
187 203
188 close(lshfid); 204 close(lshfid);
189 printf("INDEX: done constructing LSH index.\n"); 205 printf("INDEX: done constructing LSH index.\n");
190 fflush(stdout); 206 fflush(stdout);
360 if((lshfid = open (indexName, O_RDONLY)) < 0){ 376 if((lshfid = open (indexName, O_RDONLY)) < 0){
361 delete[] indexName; 377 delete[] indexName;
362 return false; 378 return false;
363 } 379 }
364 380
365 printf("INDEX: initializing header\n"); 381 lsh = index_allocate(indexName, false); // Get the header only here
366
367 lsh = new LSH(indexName, false); // Get the header only here
368 assert(lsh);
369 sequenceLength = lsh->get_lshHeader()->dataDim / dbH->dim; // shingleDim / vectorDim 382 sequenceLength = lsh->get_lshHeader()->dataDim / dbH->dim; // shingleDim / vectorDim
370 383
371 384 if(!SERVER_LSH_INDEX_SINGLETON){
372 if( fabs(radius - lsh->get_radius())>fabs(O2_DISTANCE_TOLERANCE)) 385 if( fabs(radius - lsh->get_radius())>fabs(O2_DISTANCE_TOLERANCE))
373 printf("*** Warning: adb_radius (%f) != lsh_radius (%f) ***\n", radius, lsh->get_radius()); 386 printf("*** Warning: adb_radius (%f) != lsh_radius (%f) ***\n", radius, lsh->get_radius());
374 387 printf("INDEX: dim %d\n", dbH->dim);
375 printf("INDEX: dim %d\n", dbH->dim); 388 printf("INDEX: R %f\n", lsh->get_radius());
376 printf("INDEX: R %f\n", lsh->get_radius()); 389 printf("INDEX: seqlen %d\n", sequenceLength);
377 printf("INDEX: seqlen %d\n", sequenceLength); 390 printf("INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth());
378 printf("INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth()); 391 printf("INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns());
379 printf("INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns()); 392 printf("INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables());
380 printf("INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables()); 393 printf("INDEX: N %d\n", lsh->get_lshHeader()->get_numRows());
381 printf("INDEX: N %d\n", lsh->get_lshHeader()->get_numRows()); 394 printf("INDEX: s %d\n", index_to_trackID(lsh->get_maxp()));
382 printf("INDEX: s %d\n", index_to_trackID(lsh->get_maxp())); 395 printf("INDEX: Opened LSH index file %s\n", indexName);
383 printf("INDEX: Opened LSH index file %s\n", indexName); 396 fflush(stdout);
384 fflush(stdout); 397 }
385 398
386 // Check to see if we are loading hash tables into core, and do so if true 399 // Check to see if we are loading hash tables into core, and do so if true
387 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core){ 400 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core){
388 printf("INDEX: loading hash tables into core %s\n", (lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2)?"FORMAT2":"FORMAT1"); 401 if(SERVER_LSH_INDEX_SINGLETON)
389 delete lsh; 402 fprintf(stderr,"INDEX: using persistent hash tables: %s\n", lsh->get_indexName());
390 lsh = new LSH(indexName, true); 403 else
404 printf("INDEX: loading hash tables into core %s\n", (lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2)?"FORMAT2":"FORMAT1");
405 lsh = index_allocate(indexName, true);
391 } 406 }
392 407
393 delete[] indexName; 408 delete[] indexName;
394 return true; 409 return true;
395 } 410 }