Mercurial > hg > audiodb
comparison index.cpp @ 308:896679d8cc39
Added server-side loading of persistent index (LSH hashtables) via --load_index -d dbName -R radius -l sequenceLength. Queries using these parameters will lookup the memory-resident hashtable instead of loading one from disk.
author | mas01mc |
---|---|
date | Thu, 07 Aug 2008 01:53:38 +0000 |
parents | 8cec6eb40526 |
children | cac5b3465318 |
comparison
equal
deleted
inserted
replaced
307:d1b8b2dec37e | 308:896679d8cc39 |
---|---|
48 | 48 |
49 if(lshfid<0) | 49 if(lshfid<0) |
50 return false; | 50 return false; |
51 else | 51 else |
52 return true; | 52 return true; |
53 } | |
54 | |
55 LSH* audioDB::index_allocate(char* indexName, bool load_hashTables){ | |
56 LSH* gIndx=SERVER_LSH_INDEX_SINGLETON; | |
57 if(isServer && gIndx && (strncmp(gIndx->get_indexName(), indexName, MAXSTR)==0) ) | |
58 audioDB::lsh = gIndx; // Use the global SERVER resident index | |
59 else{ | |
60 if(audioDB::lsh) | |
61 delete audioDB::lsh; | |
62 audioDB::lsh = new LSH(indexName, load_hashTables); | |
63 } | |
64 assert(audioDB::lsh); | |
65 return audioDB::lsh; | |
53 } | 66 } |
54 | 67 |
55 vector<vector<float> >* audioDB::index_initialize_shingles(Uns32T sz){ | 68 vector<vector<float> >* audioDB::index_initialize_shingles(Uns32T sz){ |
56 if(vv) | 69 if(vv) |
57 delete vv; | 70 delete vv; |
150 index_insert_tracks(0, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr); | 163 index_insert_tracks(0, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr); |
151 lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); | 164 lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); |
152 | 165 |
153 // Clean up | 166 // Clean up |
154 delete lsh; | 167 delete lsh; |
168 lsh = 0; | |
155 close(lshfid); | 169 close(lshfid); |
156 } | 170 } |
157 | 171 |
158 // Attempt to open LSH file | 172 // Attempt to open LSH file |
159 if((lshfid = open(newIndexName,O_RDONLY))>0){ | 173 if((lshfid = open(newIndexName,O_RDONLY))>0){ |
163 // Get the lsh header info and find how many tracks are inserted already | 177 // Get the lsh header info and find how many tracks are inserted already |
164 lsh = new LSH(newIndexName, false); // lshInCore=false to avoid loading hashTables here | 178 lsh = new LSH(newIndexName, false); // lshInCore=false to avoid loading hashTables here |
165 assert(lsh); | 179 assert(lsh); |
166 Uns32T maxs = index_to_trackID(lsh->get_maxp())+1; | 180 Uns32T maxs = index_to_trackID(lsh->get_maxp())+1; |
167 delete lsh; | 181 delete lsh; |
182 lsh = 0; | |
168 | 183 |
169 // This allows for updating index after more tracks are inserted into audioDB | 184 // This allows for updating index after more tracks are inserted into audioDB |
170 for(Uns32T startTrack = maxs; startTrack < dbH->numFiles; startTrack+=lsh_param_b){ | 185 for(Uns32T startTrack = maxs; startTrack < dbH->numFiles; startTrack+=lsh_param_b){ |
171 | 186 |
172 Uns32T endTrack = startTrack + lsh_param_b; | 187 Uns32T endTrack = startTrack + lsh_param_b; |
181 index_insert_tracks(startTrack, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr); | 196 index_insert_tracks(startTrack, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr); |
182 | 197 |
183 // Serialize to file | 198 // Serialize to file |
184 lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); // Serialize core LSH heap to disk | 199 lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); // Serialize core LSH heap to disk |
185 delete lsh; | 200 delete lsh; |
201 lsh = 0; | |
186 } | 202 } |
187 | 203 |
188 close(lshfid); | 204 close(lshfid); |
189 printf("INDEX: done constructing LSH index.\n"); | 205 printf("INDEX: done constructing LSH index.\n"); |
190 fflush(stdout); | 206 fflush(stdout); |
360 if((lshfid = open (indexName, O_RDONLY)) < 0){ | 376 if((lshfid = open (indexName, O_RDONLY)) < 0){ |
361 delete[] indexName; | 377 delete[] indexName; |
362 return false; | 378 return false; |
363 } | 379 } |
364 | 380 |
365 printf("INDEX: initializing header\n"); | 381 lsh = index_allocate(indexName, false); // Get the header only here |
366 | |
367 lsh = new LSH(indexName, false); // Get the header only here | |
368 assert(lsh); | |
369 sequenceLength = lsh->get_lshHeader()->dataDim / dbH->dim; // shingleDim / vectorDim | 382 sequenceLength = lsh->get_lshHeader()->dataDim / dbH->dim; // shingleDim / vectorDim |
370 | 383 |
371 | 384 if(!SERVER_LSH_INDEX_SINGLETON){ |
372 if( fabs(radius - lsh->get_radius())>fabs(O2_DISTANCE_TOLERANCE)) | 385 if( fabs(radius - lsh->get_radius())>fabs(O2_DISTANCE_TOLERANCE)) |
373 printf("*** Warning: adb_radius (%f) != lsh_radius (%f) ***\n", radius, lsh->get_radius()); | 386 printf("*** Warning: adb_radius (%f) != lsh_radius (%f) ***\n", radius, lsh->get_radius()); |
374 | 387 printf("INDEX: dim %d\n", dbH->dim); |
375 printf("INDEX: dim %d\n", dbH->dim); | 388 printf("INDEX: R %f\n", lsh->get_radius()); |
376 printf("INDEX: R %f\n", lsh->get_radius()); | 389 printf("INDEX: seqlen %d\n", sequenceLength); |
377 printf("INDEX: seqlen %d\n", sequenceLength); | 390 printf("INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth()); |
378 printf("INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth()); | 391 printf("INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns()); |
379 printf("INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns()); | 392 printf("INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables()); |
380 printf("INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables()); | 393 printf("INDEX: N %d\n", lsh->get_lshHeader()->get_numRows()); |
381 printf("INDEX: N %d\n", lsh->get_lshHeader()->get_numRows()); | 394 printf("INDEX: s %d\n", index_to_trackID(lsh->get_maxp())); |
382 printf("INDEX: s %d\n", index_to_trackID(lsh->get_maxp())); | 395 printf("INDEX: Opened LSH index file %s\n", indexName); |
383 printf("INDEX: Opened LSH index file %s\n", indexName); | 396 fflush(stdout); |
384 fflush(stdout); | 397 } |
385 | 398 |
386 // Check to see if we are loading hash tables into core, and do so if true | 399 // Check to see if we are loading hash tables into core, and do so if true |
387 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core){ | 400 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core){ |
388 printf("INDEX: loading hash tables into core %s\n", (lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2)?"FORMAT2":"FORMAT1"); | 401 if(SERVER_LSH_INDEX_SINGLETON) |
389 delete lsh; | 402 fprintf(stderr,"INDEX: using persistent hash tables: %s\n", lsh->get_indexName()); |
390 lsh = new LSH(indexName, true); | 403 else |
404 printf("INDEX: loading hash tables into core %s\n", (lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2)?"FORMAT2":"FORMAT1"); | |
405 lsh = index_allocate(indexName, true); | |
391 } | 406 } |
392 | 407 |
393 delete[] indexName; | 408 delete[] indexName; |
394 return true; | 409 return true; |
395 } | 410 } |