comparison index.cpp @ 320:a995e5ad999a large_adb

working LARGE_ADB support. Activiate at creation time with -N --ntracks 20001 or greater, or with --datasize 1356 or greater, or both. LARGE_ADB blocks non-indexed QUERY.
author mas01mc
date Wed, 20 Aug 2008 13:50:58 +0000
parents b9eff6896943
children da2272e029b3
comparison
equal deleted inserted replaced
319:b9eff6896943 320:a995e5ad999a
117 } 117 }
118 118
119 119
120 /************************ LSH indexing ***********************************/ 120 /************************ LSH indexing ***********************************/
121 void audioDB::index_index_db(const char* dbName){ 121 void audioDB::index_index_db(const char* dbName){
122
123 char* newIndexName; 122 char* newIndexName;
124 double *fvp = 0, *sNorm = 0, *snPtr = 0, *sPower = 0, *spPtr = 0; 123 double *fvp = 0, *sNorm = 0, *snPtr = 0, *sPower = 0, *spPtr = 0;
125 Uns32T dbVectors = 0; 124 Uns32T dbVectors = 0;
125
126 126
127 printf("INDEX: initializing header\n"); 127 printf("INDEX: initializing header\n");
128 // Check if audioDB exists, initialize header and open database for read 128 // Check if audioDB exists, initialize header and open database for read
129 forWrite = false; 129 forWrite = false;
130 initDBHeader(dbName); 130 initDBHeader(dbName);
131 131
132 if(dbH->flags & O2_FLAG_POWER)
133 usingPower = true;
134
135 if(dbH->flags & O2_FLAG_TIMES)
136 usingTimes = true;
137
132 newIndexName = index_get_name(dbName, radius, sequenceLength); 138 newIndexName = index_get_name(dbName, radius, sequenceLength);
133 139
134 // Set unit norming flag override 140 // Set unit norming flag override
135 audioDB::normalizedDistance = !audioDB::no_unit_norming; 141 audioDB::normalizedDistance = !audioDB::no_unit_norming;
136 142
137 printf("INDEX: dim %d\n", dbH->dim); 143 printf("INDEX: dim %d\n", (int)dbH->dim);
138 printf("INDEX: R %f\n", radius); 144 printf("INDEX: R %f\n", radius);
139 printf("INDEX: seqlen %d\n", sequenceLength); 145 printf("INDEX: seqlen %d\n", sequenceLength);
140 printf("INDEX: lsh_w %f\n", lsh_param_w); 146 printf("INDEX: lsh_w %f\n", lsh_param_w);
141 printf("INDEX: lsh_k %d\n", lsh_param_k); 147 printf("INDEX: lsh_k %d\n", lsh_param_k);
142 printf("INDEX: lsh_m %d\n", lsh_param_m); 148 printf("INDEX: lsh_m %d\n", lsh_param_m);
162 168
163 Uns32T endTrack = lsh_param_b; 169 Uns32T endTrack = lsh_param_b;
164 if( endTrack > dbH->numFiles) 170 if( endTrack > dbH->numFiles)
165 endTrack = dbH->numFiles; 171 endTrack = dbH->numFiles;
166 // Insert up to lsh_param_b tracks 172 // Insert up to lsh_param_b tracks
167 if( dbH->flags & O2_FLAG_LARGE_ADB ){ 173 if( ! (dbH->flags & O2_FLAG_LARGE_ADB) ){
168 }
169 else{
170 index_initialize(&sNorm, &snPtr, &sPower, &spPtr, &dbVectors); 174 index_initialize(&sNorm, &snPtr, &sPower, &spPtr, &dbVectors);
171 index_insert_tracks(0, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr); 175 }
172 } 176 index_insert_tracks(0, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr);
173 lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); 177 lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1);
174 178
175 // Clean up 179 // Clean up
176 delete lsh; 180 delete lsh;
177 lsh = 0; 181 lsh = 0;
218 else{ 222 else{
219 error("Something's wrong with LSH index file"); 223 error("Something's wrong with LSH index file");
220 exit(1); 224 exit(1);
221 } 225 }
222 226
223
224 delete[] newIndexName; 227 delete[] newIndexName;
225 if(sNorm) 228 delete[] sNorm;
226 delete[] sNorm; 229 delete[] sPower;
227 if(sPower)
228 delete[] sPower;
229
230
231 } 230 }
232 231
233 232
234 // initialize auxillary track data from filesystem 233 // initialize auxillary track data from filesystem
235 // pre-conditions: 234 // pre-conditions:
256 error("fstat error finding size of power file", powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, "fstat"); 255 error("fstat error finding size of power file", powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, "fstat");
257 } 256 }
258 257
259 if( (statbuf.st_size - sizeof(int)) / (sizeof(double)) != trackTable[trackID] ) 258 if( (statbuf.st_size - sizeof(int)) / (sizeof(double)) != trackTable[trackID] )
260 error("Dimension mismatch: numPowers != numVectors", powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE); 259 error("Dimension mismatch: numPowers != numVectors", powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE);
261 260
262 *sPowerp = new double[trackTable[trackID]]; // Allocate memory for power values 261 *sPowerp = new double[trackTable[trackID]]; // Allocate memory for power values
263 assert(*sPowerp); 262 assert(*sPowerp);
264 *spPtrp = *sPowerp; 263 *spPtrp = *sPowerp;
265 insertPowerData(trackTable[trackID], powerfd, *sPowerp); 264 insertPowerData(trackTable[trackID], powerfd, *sPowerp);
266 if (0 < powerfd) { 265 if (0 < powerfd) {
306 305
307 if(!index_insert_track(trackID, fvpp, snPtrp, spPtrp)) 306 if(!index_insert_track(trackID, fvpp, snPtrp, spPtrp))
308 break; 307 break;
309 if ( dbH->flags & O2_FLAG_LARGE_ADB ){ 308 if ( dbH->flags & O2_FLAG_LARGE_ADB ){
310 close(infid); 309 close(infid);
311 delete *sNormpp; 310 delete[] *sNormpp;
312 delete *sPowerp; 311 delete[] *sPowerp;
313 *sNormpp = *sPowerp = *snPtrp = *snPtrp = 0; 312 *sNormpp = *sPowerp = *snPtrp = *snPtrp = 0;
314 } 313 }
315 } // end for(trackID = start_track ; ... ) 314 } // end for(trackID = start_track ; ... )
316 std::cout << "finished inserting." << endl; 315 std::cout << "finished inserting." << endl;
317 } 316 }
466 sequenceLength = lsh->get_lshHeader()->dataDim / dbH->dim; // shingleDim / vectorDim 465 sequenceLength = lsh->get_lshHeader()->dataDim / dbH->dim; // shingleDim / vectorDim
467 466
468 if(lsh!=SERVER_LSH_INDEX_SINGLETON){ 467 if(lsh!=SERVER_LSH_INDEX_SINGLETON){
469 if( fabs(radius - lsh->get_radius())>fabs(O2_DISTANCE_TOLERANCE)) 468 if( fabs(radius - lsh->get_radius())>fabs(O2_DISTANCE_TOLERANCE))
470 printf("*** Warning: adb_radius (%f) != lsh_radius (%f) ***\n", radius, lsh->get_radius()); 469 printf("*** Warning: adb_radius (%f) != lsh_radius (%f) ***\n", radius, lsh->get_radius());
471 printf("INDEX: dim %d\n", dbH->dim); 470 printf("INDEX: dim %d\n", (int)dbH->dim);
472 printf("INDEX: R %f\n", lsh->get_radius()); 471 printf("INDEX: R %f\n", lsh->get_radius());
473 printf("INDEX: seqlen %d\n", sequenceLength); 472 printf("INDEX: seqlen %d\n", sequenceLength);
474 printf("INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth()); 473 printf("INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth());
475 printf("INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns()); 474 printf("INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns());
476 printf("INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables()); 475 printf("INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables());
529 528
530 // return 0: if index does not exist 529 // return 0: if index does not exist
531 // return nqv: if index exists 530 // return nqv: if index exists
532 int audioDB::index_query_loop(const char* dbName, Uns32T queryIndex) { 531 int audioDB::index_query_loop(const char* dbName, Uns32T queryIndex) {
533 532
534 unsigned int numVectors; 533 unsigned int numVectors = 0;
535 double *query, *query_data; 534 double *query = 0, *query_data = 0;
536 double *qNorm, *qnPtr, *qPower = 0, *qpPtr = 0; 535 double *qNorm = 0, *qnPtr = 0, *qPower = 0, *qpPtr = 0;
537 double meanQdur; 536 double meanQdur = 0;
538 void (*add_point_func)(void*,Uns32T,Uns32T,float); 537 void (*add_point_func)(void*,Uns32T,Uns32T,float);
539 538
540 // Set the point-reporter callback based on the value of lsh_exact 539 // Set the point-reporter callback based on the value of lsh_exact
541 if(lsh_exact){ 540 if(lsh_exact){
542 initialize_exact_evalutation_queue(); 541 initialize_exact_evalutation_queue();