annotate index.cpp @ 305:8cec6eb40526

Guard against too-short tracks. Fixes a segfault on attempting to index the AWAL.frames1.mfcc20.adb with default parameters.
author mas01cr
date Tue, 05 Aug 2008 15:34:10 +0000
parents f922c234462f
children 896679d8cc39
rev   line source
mas01mc@292 1 // LSH indexing
mas01mc@292 2 //
mas01mc@292 3 // Construct a persistent LSH table structure
mas01mc@292 4 // Store at the same location as dbName
mas01mc@292 5 // Naming convention:
mas01mc@292 6 // dbName.lsh.${radius}.${sequenceLength}
mas01mc@292 7 //
mas01mc@292 8 //
mas01mc@292 9 // Author: Michael Casey
mas01mc@292 10 // Date: 23 June 2008
mas01mc@292 11
mas01mc@292 12 #include "audioDB.h"
mas01mc@292 13 #include "ReporterBase.h"
mas01mc@292 14
mas01mc@292 15
mas01mc@292 16 /************************* LSH point index to audioDB conversion *****************/
mas01mc@292 17 Uns32T audioDB::index_to_trackID(Uns32T lshID){
mas01mc@292 18 return lshID>>LSH_N_POINT_BITS;
mas01mc@292 19 }
mas01mc@292 20
mas01mc@292 21 Uns32T audioDB::index_to_trackPos(Uns32T lshID){
mas01mc@292 22 return lshID&LSH_POINT_MASK;
mas01mc@292 23 }
mas01mc@292 24
mas01mc@292 25 Uns32T audioDB::index_from_trackInfo(Uns32T trackID, Uns32T spos){
mas01mc@292 26 return (trackID << LSH_N_POINT_BITS) | spos;
mas01mc@292 27 }
mas01mc@292 28
mas01mc@292 29 /************************* LSH indexing and query initialization *****************/
mas01mc@292 30
mas01mc@292 31 char* audioDB::index_get_name(const char*dbName, double radius, Uns32T sequenceLength){
mas01mc@292 32 char* indexName = new char[MAXSTR];
mas01mc@292 33 // Attempt to make new file
mas01mc@292 34 if(strlen(dbName) > (MAXSTR - 32))
mas01mc@292 35 error("dbName is too long for LSH index filename appendages");
mas01mc@292 36 strncpy(indexName, dbName, MAXSTR);
mas01mc@292 37 sprintf(indexName+strlen(dbName), ".lsh.%019.9f.%d", radius, sequenceLength);
mas01mc@292 38 return indexName;
mas01mc@292 39 }
mas01mc@292 40
mas01mc@292 41 // return true if index exists else return false
mas01mc@292 42 int audioDB::index_exists(const char* dbName, double radius, Uns32T sequenceLength){
mas01mc@292 43 // Test to see if file exists
mas01mc@292 44 char* indexName = index_get_name(dbName, radius, sequenceLength);
mas01mc@292 45 lshfid = open (indexName, O_RDONLY);
mas01mc@292 46 delete[] indexName;
mas01mc@292 47 close(lshfid);
mas01mc@292 48
mas01mc@292 49 if(lshfid<0)
mas01mc@292 50 return false;
mas01mc@292 51 else
mas01mc@292 52 return true;
mas01mc@292 53 }
mas01mc@292 54
mas01mc@292 55 vector<vector<float> >* audioDB::index_initialize_shingles(Uns32T sz){
mas01mc@292 56 if(vv)
mas01mc@292 57 delete vv;
mas01mc@292 58 vv = new vector<vector<float> >(sz);
mas01mc@292 59 for(Uns32T i=0 ; i < sz ; i++)
mas01mc@292 60 (*vv)[i]=vector<float>(dbH->dim*sequenceLength); // allocate shingle storage
mas01mc@292 61 return vv;
mas01mc@292 62 }
mas01mc@292 63
mas01mc@292 64 /******************** LSH indexing audioDB database access forall s \in {S} ***********************/
mas01mc@292 65
mas01mc@292 66 // Prepare the AudioDB database for read access and allocate auxillary memory
mas01mc@292 67 void audioDB::index_initialize(double **snp, double **vsnp, double **spp, double **vspp, Uns32T *dvp) {
mas01mc@292 68 *dvp = dbH->length / (dbH->dim * sizeof(double)); // number of database vectors
mas01mc@292 69 *snp = new double[*dvp]; // songs norm pointer: L2 norm table for each vector
mas01mc@292 70
mas01mc@292 71 double *snpp = *snp, *sppp = 0;
mas01mc@292 72 memcpy(*snp, l2normTable, *dvp * sizeof(double));
mas01mc@292 73
mas01mc@292 74 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01mc@292 75 error("database not power-enabled", dbName);
mas01mc@292 76 }
mas01mc@292 77 *spp = new double[*dvp]; // song powertable pointer
mas01mc@292 78 sppp = *spp;
mas01mc@292 79 memcpy(*spp, powerTable, *dvp * sizeof(double));
mas01mc@292 80
mas01mc@292 81 for(Uns32T i = 0; i < dbH->numFiles; i++){
mas01mc@292 82 if(trackTable[i] >= sequenceLength) {
mas01mc@292 83 sequence_sum(snpp, trackTable[i], sequenceLength);
mas01mc@292 84 sequence_sqrt(snpp, trackTable[i], sequenceLength);
mas01mc@292 85
mas01mc@292 86 sequence_sum(sppp, trackTable[i], sequenceLength);
mas01mc@292 87 sequence_average(sppp, trackTable[i], sequenceLength);
mas01mc@292 88 }
mas01mc@292 89 snpp += trackTable[i];
mas01mc@292 90 sppp += trackTable[i];
mas01mc@292 91 }
mas01mc@292 92
mas01mc@292 93 *vsnp = *snp;
mas01mc@292 94 *vspp = *spp;
mas01mc@292 95
mas01mc@292 96 // Move the feature vector read pointer to start of fetures in database
mas01mc@292 97 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01mc@292 98 }
mas01mc@292 99
mas01mc@292 100
mas01mc@292 101 /************************ LSH indexing ***********************************/
mas01mc@292 102 void audioDB::index_index_db(const char* dbName){
mas01mc@292 103
mas01mc@292 104 char* newIndexName;
mas01mc@292 105 double *fvp = 0, *sNorm = 0, *snPtr = 0, *sPower = 0, *spPtr = 0;
mas01mc@292 106 Uns32T dbVectors = 0;
mas01mc@292 107
mas01mc@292 108 printf("INDEX: initializing header\n");
mas01mc@292 109 // Check if audioDB exists, initialize header and open database for read
mas01mc@292 110 forWrite = false;
mas01mc@292 111 initDBHeader(dbName);
mas01mc@292 112
mas01mc@292 113 newIndexName = index_get_name(dbName, radius, sequenceLength);
mas01mc@292 114
mas01mc@292 115 // Set unit norming flag override
mas01mc@292 116 audioDB::normalizedDistance = !audioDB::no_unit_norming;
mas01mc@292 117
mas01mc@292 118 printf("INDEX: dim %d\n", dbH->dim);
mas01mc@292 119 printf("INDEX: R %f\n", radius);
mas01mc@292 120 printf("INDEX: seqlen %d\n", sequenceLength);
mas01mc@292 121 printf("INDEX: lsh_w %f\n", lsh_param_w);
mas01mc@292 122 printf("INDEX: lsh_k %d\n", lsh_param_k);
mas01mc@292 123 printf("INDEX: lsh_m %d\n", lsh_param_m);
mas01mc@292 124 printf("INDEX: lsh_N %d\n", lsh_param_N);
mas01mc@296 125 printf("INDEX: lsh_C %d\n", lsh_param_ncols);
mas01mc@292 126 printf("INDEX: lsh_b %d\n", lsh_param_b);
mas01mc@292 127 printf("INDEX: normalized? %s\n", normalizedDistance?"true":"false");
mas01mc@292 128 fflush(stdout);
mas01mc@292 129
mas01mc@292 130
mas01mc@292 131 index_initialize(&sNorm, &snPtr, &sPower, &spPtr, &dbVectors);
mas01mc@292 132
mas01mc@292 133 if((lshfid = open(newIndexName,O_RDONLY))<0){
mas01mc@292 134 printf("INDEX: constructing new LSH index\n");
mas01mc@292 135 printf("INDEX: making index file %s\n", newIndexName);
mas01mc@292 136 fflush(stdout);
mas01mc@292 137 // Construct new LSH index
mas01mc@292 138 lsh = new LSH((float)lsh_param_w, lsh_param_k,
mas01mc@292 139 lsh_param_m,
mas01mc@292 140 (Uns32T)(sequenceLength*dbH->dim),
mas01mc@292 141 lsh_param_N,
mas01mc@292 142 lsh_param_ncols,
mas01mc@292 143 (float)radius);
mas01mc@292 144 assert(lsh);
mas01mc@292 145
mas01mc@292 146 Uns32T endTrack = lsh_param_b;
mas01mc@292 147 if( endTrack > dbH->numFiles)
mas01mc@292 148 endTrack = dbH->numFiles;
mas01mc@292 149 // Insert up to lsh_param_b tracks
mas01mc@292 150 index_insert_tracks(0, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr);
mas01mc@292 151 lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1);
mas01mc@292 152
mas01mc@292 153 // Clean up
mas01mc@292 154 delete lsh;
mas01mc@292 155 close(lshfid);
mas01mc@292 156 }
mas01mc@292 157
mas01mc@292 158 // Attempt to open LSH file
mas01mc@292 159 if((lshfid = open(newIndexName,O_RDONLY))>0){
mas01mc@292 160 printf("INDEX: merging with existing LSH index\n");
mas01mc@292 161 fflush(stdout);
mas01mc@292 162
mas01mc@292 163 // Get the lsh header info and find how many tracks are inserted already
mas01mc@292 164 lsh = new LSH(newIndexName, false); // lshInCore=false to avoid loading hashTables here
mas01mc@292 165 assert(lsh);
mas01mc@292 166 Uns32T maxs = index_to_trackID(lsh->get_maxp())+1;
mas01mc@292 167 delete lsh;
mas01mc@292 168
mas01mc@292 169 // This allows for updating index after more tracks are inserted into audioDB
mas01mc@292 170 for(Uns32T startTrack = maxs; startTrack < dbH->numFiles; startTrack+=lsh_param_b){
mas01mc@292 171
mas01mc@292 172 Uns32T endTrack = startTrack + lsh_param_b;
mas01mc@292 173 if( endTrack > dbH->numFiles)
mas01mc@292 174 endTrack = dbH->numFiles;
mas01mc@292 175 printf("Indexing track range: %d - %d\n", startTrack, endTrack);
mas01mc@292 176 fflush(stdout);
mas01mc@292 177 lsh = new LSH(newIndexName, lsh_in_core); // Initialize core memory for LSH tables
mas01mc@292 178 assert(lsh);
mas01mc@292 179
mas01mc@292 180 // Insert up to lsh_param_b database tracks
mas01mc@292 181 index_insert_tracks(startTrack, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr);
mas01mc@292 182
mas01mc@292 183 // Serialize to file
mas01mc@292 184 lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); // Serialize core LSH heap to disk
mas01mc@292 185 delete lsh;
mas01mc@292 186 }
mas01mc@292 187
mas01mc@292 188 close(lshfid);
mas01mc@292 189 printf("INDEX: done constructing LSH index.\n");
mas01mc@292 190 fflush(stdout);
mas01mc@292 191
mas01mc@292 192 }
mas01mc@292 193 else{
mas01mc@292 194 error("Something's wrong with LSH index file");
mas01mc@292 195 exit(1);
mas01mc@292 196 }
mas01mc@292 197
mas01mc@292 198
mas01mc@292 199 delete[] newIndexName;
mas01mc@292 200 if(sNorm)
mas01mc@292 201 delete[] sNorm;
mas01mc@292 202 if(sPower)
mas01mc@292 203 delete[] sPower;
mas01mc@292 204
mas01mc@292 205
mas01mc@292 206 }
mas01mc@292 207
mas01mc@292 208 void audioDB::index_insert_tracks(Uns32T start_track, Uns32T end_track,
mas01mc@292 209 double** fvpp, double** sNormpp,double** snPtrp,
mas01mc@292 210 double** sPowerp, double** spPtrp){
mas01mc@292 211 size_t nfv = 0;
mas01mc@292 212 double* fvp = 0; // Keep pointer for memory allocation and free() for track data
mas01mc@292 213 Uns32T trackID = 0;
mas01mc@292 214
mas01mc@292 215 VERB_LOG(1, "indexing tracks...");
mas01mc@292 216
mas01mc@292 217
mas01mc@292 218 for(trackID = start_track ; trackID < end_track ; trackID++ ){
mas01mc@292 219 read_data(trackID, &fvp, &nfv); // over-writes fvp and nfv
mas01mc@292 220 *fvpp = fvp; // Protect memory allocation and free() for track data
mas01mc@292 221 if(!index_insert_track(trackID, fvpp, snPtrp, spPtrp))
mas01mc@292 222 break;
mas01mc@292 223 }
mas01mc@292 224 std::cout << "finished inserting." << endl;
mas01mc@292 225 }
mas01mc@292 226
mas01mc@292 227 int audioDB::index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp){
mas01mc@292 228 // Loop over the current input track's vectors
mas01cr@305 229 Uns32T numVecs = 0;
mas01cr@305 230 if (trackTable[trackID] > O2_MAXTRACKLEN) {
mas01cr@305 231 if (O2_MAXTRACKLEN < sequenceLength - 1) {
mas01cr@305 232 numVecs = 0;
mas01cr@305 233 } else {
mas01cr@305 234 numVecs = O2_MAXTRACKLEN - sequenceLength + 1;
mas01cr@305 235 }
mas01cr@305 236 } else {
mas01cr@305 237 if (trackTable[trackID] < sequenceLength - 1) {
mas01cr@305 238 numVecs = 0;
mas01cr@305 239 } else {
mas01cr@305 240 numVecs = trackTable[trackID] - sequenceLength + 1;
mas01cr@305 241 }
mas01cr@305 242 }
mas01mc@292 243 vv = index_initialize_shingles(numVecs);
mas01mc@292 244
mas01mc@292 245 for( Uns32T pointID = 0 ; pointID < numVecs; pointID++ )
mas01mc@292 246 index_make_shingle(vv, pointID, *fvpp, dbH->dim, sequenceLength);
mas01mc@292 247
mas01mc@292 248 Uns32T numVecsAboveThreshold = index_norm_shingles(vv, *snpp, *sppp);
mas01mc@292 249 Uns32T collisionCount = index_insert_shingles(vv, trackID, *sppp);
mas01mc@292 250 float meanCollisionCount = numVecsAboveThreshold?(float)collisionCount/numVecsAboveThreshold:0;
mas01mc@292 251
mas01mc@292 252 /* index_norm_shingles() only goes as far as the end of the
mas01mc@292 253 sequence, which is right, but the space allocated is for the
mas01mc@292 254 whole track. */
mas01mc@292 255
mas01mc@292 256 /* But numVecs will be <trackTable[track] if trackTable[track]>O2_MAXTRACKLEN
mas01mc@292 257 * So let's be certain the pointers are in the correct place
mas01mc@292 258 */
mas01mc@292 259
mas01mc@292 260 *snpp += trackTable[trackID];
mas01mc@292 261 *sppp += trackTable[trackID];
mas01mc@292 262 *fvpp += trackTable[trackID] * dbH->dim;
mas01mc@292 263
mas01mc@292 264 std::cout << " n=" << trackTable[trackID] << " n'=" << numVecsAboveThreshold << " E[#c]=" << lsh->get_mean_collision_rate() << " E[#p]=" << meanCollisionCount << endl;
mas01mc@292 265 std::cout.flush();
mas01mc@292 266 return true;
mas01mc@292 267 }
mas01mc@292 268
mas01mc@292 269 Uns32T audioDB::index_insert_shingles(vector<vector<float> >* vv, Uns32T trackID, double* spp){
mas01mc@292 270 Uns32T collisionCount = 0;
mas01mc@292 271 cout << "[" << trackID << "]" << fileTable+trackID*O2_FILETABLE_ENTRY_SIZE;
mas01mc@292 272 for( Uns32T pointID=0 ; pointID < (*vv).size(); pointID++)
mas01mc@292 273 if(!use_absolute_threshold || (use_absolute_threshold && (*spp++ >= absolute_threshold)))
mas01mc@292 274 collisionCount += lsh->insert_point((*vv)[pointID], index_from_trackInfo(trackID, pointID));
mas01mc@292 275 return collisionCount;
mas01mc@292 276 }
mas01mc@292 277
mas01mc@292 278 /********************* LSH shingle construction ***************************/
mas01mc@292 279
mas01mc@292 280 // Construct shingles out of a feature matrix
mas01mc@292 281 // inputs:
mas01mc@292 282 // idx is vector index in feature matrix
mas01mc@292 283 // fvp is base feature matrix pointer double* [numVecs x dbH->dim]
mas01mc@292 284 //
mas01mc@292 285 // pre-conditions:
mas01mc@292 286 // dbH->dim
mas01mc@292 287 // sequenceLength
mas01mc@292 288 // idx < numVectors - sequenceLength + 1
mas01mc@292 289 //
mas01mc@292 290 // post-conditions:
mas01mc@292 291 // (*vv)[idx] contains a shingle with dbH->dim*sequenceLength float values
mas01mc@292 292
mas01mc@292 293 void audioDB::index_make_shingle(vector<vector<float> >* vv, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen){
mas01mc@292 294 assert(idx<(*vv).size());
mas01mc@292 295 vector<float>::iterator ve = (*vv)[idx].end();
mas01mc@292 296 vi=(*vv)[idx].begin(); // shingle iterator
mas01mc@292 297 // First feature vector in shingle
mas01mc@292 298 if(idx==0){
mas01mc@292 299 while(vi!=ve)
mas01mc@292 300 *vi++ = (float)(*fvp++);
mas01mc@292 301 }
mas01mc@292 302 // Not first feature vector in shingle
mas01mc@292 303 else{
mas01mc@292 304 vector<float>::iterator ui=(*vv)[idx-1].begin() + dim; // previous shingle iterator
mas01mc@292 305 // Previous seqLen-1 dim-vectors
mas01mc@292 306 while(vi!=ve-dim)
mas01mc@292 307 *vi++=*ui++;
mas01mc@292 308 // Move data pointer to next feature vector
mas01mc@292 309 fvp += ( seqLen + idx - 1 ) * dim ;
mas01mc@292 310 // New d-vector
mas01mc@292 311 while(vi!=ve)
mas01mc@292 312 *vi++ = (float)(*fvp++);
mas01mc@292 313 }
mas01mc@292 314 }
mas01mc@292 315
mas01mc@292 316 // norm shingles
mas01mc@292 317 // in-place norming, no deletions
mas01mc@292 318 // If using power, return number of shingles above power threshold
mas01mc@292 319 int audioDB::index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp){
mas01mc@292 320 int z = 0; // number of above-threshold shingles
mas01mc@292 321 float l2norm;
mas01mc@292 322 double power;
mas01mc@292 323 float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2
mas01mc@292 324 float oneOverSqrtl2NormDivRad = oneOverRadius;
mas01mc@292 325 if(!spp)
mas01mc@292 326 error("LSH indexing and query requires a power feature using -w or -W");
mas01mc@292 327 Uns32T shingleSize = sequenceLength*dbH->dim;
mas01mc@292 328 for(Uns32T a=0; a<(*vv).size(); a++){
mas01mc@292 329 l2norm = (float)(*snp++);
mas01mc@292 330 if(audioDB::normalizedDistance)
mas01mc@292 331 oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius;
mas01mc@292 332
mas01mc@292 333 for(Uns32T b=0; b < shingleSize ; b++)
mas01mc@292 334 (*vv)[a][b]*=oneOverSqrtl2NormDivRad;
mas01mc@292 335
mas01mc@292 336 power = *spp++;
mas01mc@292 337 if(use_absolute_threshold){
mas01mc@292 338 if ( power >= absolute_threshold )
mas01mc@292 339 z++;
mas01mc@292 340 }
mas01mc@292 341 else
mas01mc@292 342 z++;
mas01mc@292 343 }
mas01mc@292 344 return z;
mas01mc@292 345 }
mas01mc@292 346
mas01mc@292 347
mas01mc@292 348 /*********************** LSH retrieval ****************************/
mas01mc@292 349
mas01mc@292 350
mas01mc@292 351 // return true if indexed query performed else return false
mas01mc@292 352 int audioDB::index_init_query(const char* dbName){
mas01mc@292 353
mas01mc@292 354 if(!(index_exists(dbName, radius, sequenceLength)))
mas01mc@292 355 return false;
mas01mc@292 356
mas01mc@292 357 char* indexName = index_get_name(dbName, radius, sequenceLength);
mas01mc@292 358
mas01mc@292 359 // Test to see if file exists
mas01mc@292 360 if((lshfid = open (indexName, O_RDONLY)) < 0){
mas01mc@292 361 delete[] indexName;
mas01mc@292 362 return false;
mas01mc@292 363 }
mas01mc@292 364
mas01mc@292 365 printf("INDEX: initializing header\n");
mas01mc@292 366
mas01mc@292 367 lsh = new LSH(indexName, false); // Get the header only here
mas01mc@292 368 assert(lsh);
mas01mc@292 369 sequenceLength = lsh->get_lshHeader()->dataDim / dbH->dim; // shingleDim / vectorDim
mas01mc@292 370
mas01mc@292 371
mas01mc@292 372 if( fabs(radius - lsh->get_radius())>fabs(O2_DISTANCE_TOLERANCE))
mas01mc@292 373 printf("*** Warning: adb_radius (%f) != lsh_radius (%f) ***\n", radius, lsh->get_radius());
mas01mc@292 374
mas01mc@292 375 printf("INDEX: dim %d\n", dbH->dim);
mas01mc@292 376 printf("INDEX: R %f\n", lsh->get_radius());
mas01mc@292 377 printf("INDEX: seqlen %d\n", sequenceLength);
mas01mc@292 378 printf("INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth());
mas01mc@292 379 printf("INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns());
mas01mc@292 380 printf("INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables());
mas01mc@292 381 printf("INDEX: N %d\n", lsh->get_lshHeader()->get_numRows());
mas01mc@292 382 printf("INDEX: s %d\n", index_to_trackID(lsh->get_maxp()));
mas01mc@292 383 printf("INDEX: Opened LSH index file %s\n", indexName);
mas01mc@292 384 fflush(stdout);
mas01mc@292 385
mas01mc@292 386 // Check to see if we are loading hash tables into core, and do so if true
mas01mc@292 387 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core){
mas01mc@292 388 printf("INDEX: loading hash tables into core %s\n", (lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2)?"FORMAT2":"FORMAT1");
mas01mc@292 389 delete lsh;
mas01mc@292 390 lsh = new LSH(indexName, true);
mas01mc@292 391 }
mas01mc@292 392
mas01mc@292 393 delete[] indexName;
mas01mc@292 394 return true;
mas01mc@292 395 }
mas01mc@292 396
mas01mc@292 397 // *Static* approximate NN point reporter callback method for lshlib
mas01mc@292 398 void audioDB::index_add_point_approximate(void* instancePtr, Uns32T pointID, Uns32T qpos, float dist){
mas01mc@292 399 assert(instancePtr); // We need an instance for this callback
mas01mc@292 400 audioDB* myself = (audioDB*) instancePtr; // Use explicit cast to recover "this" instance
mas01mc@292 401 Uns32T trackID = index_to_trackID(pointID);
mas01mc@292 402 Uns32T spos = index_to_trackPos(pointID);
mas01mc@292 403 // Skip identity in query_from_key
mas01mc@292 404 if( !myself->query_from_key || (myself->query_from_key && ( trackID != myself->query_from_key_index )) )
mas01mc@292 405 myself->reporter->add_point(trackID, qpos, spos, dist);
mas01mc@292 406 }
mas01mc@292 407
mas01mc@292 408 // *Static* exact NN point reporter callback method for lshlib
mas01mc@292 409 // Maintain a queue of points to pass to query_points() for exact evaluation
mas01mc@292 410 void audioDB::index_add_point_exact(void* instancePtr, Uns32T pointID, Uns32T qpos, float dist){
mas01mc@292 411 assert(instancePtr); // We need an instance for this callback
mas01mc@292 412 audioDB* myself = (audioDB*) instancePtr; // Use explicit cast to recover "this" instance
mas01mc@292 413 Uns32T trackID = index_to_trackID(pointID);
mas01mc@292 414 Uns32T spos = index_to_trackPos(pointID);
mas01mc@292 415 // Skip identity in query_from_key
mas01mc@292 416 if( !myself->query_from_key || (myself->query_from_key && ( trackID != myself->query_from_key_index )) )
mas01mc@292 417 myself->index_insert_exact_evaluation_queue(trackID, qpos, spos);
mas01mc@292 418 }
mas01mc@292 419
mas01mc@292 420 void audioDB::initialize_exact_evalutation_queue(){
mas01mc@292 421 if(exact_evaluation_queue)
mas01mc@292 422 delete exact_evaluation_queue;
mas01mc@292 423 exact_evaluation_queue = new priority_queue<PointPair, std::vector<PointPair>, std::less<PointPair> >;
mas01mc@292 424 }
mas01mc@292 425
mas01mc@292 426 void audioDB::index_insert_exact_evaluation_queue(Uns32T trackID, Uns32T qpos, Uns32T spos){
mas01mc@292 427 PointPair p(trackID, qpos, spos);
mas01mc@292 428 exact_evaluation_queue->push(p);
mas01mc@292 429 }
mas01mc@292 430
mas01mc@292 431 // return 0: if index does not exist
mas01mc@292 432 // return nqv: if index exists
mas01mc@292 433 int audioDB::index_query_loop(const char* dbName, Uns32T queryIndex) {
mas01mc@292 434
mas01mc@292 435 unsigned int numVectors;
mas01mc@292 436 double *query, *query_data;
mas01mc@292 437 double *qNorm, *qnPtr, *qPower = 0, *qpPtr = 0;
mas01mc@292 438 double meanQdur;
mas01mc@292 439 void (*add_point_func)(void*,Uns32T,Uns32T,float);
mas01mc@292 440
mas01mc@292 441 // Set the point-reporter callback based on the value of lsh_exact
mas01mc@292 442 if(lsh_exact){
mas01mc@292 443 initialize_exact_evalutation_queue();
mas01mc@292 444 add_point_func = &index_add_point_exact;
mas01mc@292 445 }
mas01mc@292 446 else
mas01mc@292 447 add_point_func = &index_add_point_approximate;
mas01mc@292 448
mas01mc@292 449 if(!index_init_query(dbName)) // sets-up LSH index structures for querying
mas01mc@292 450 return 0;
mas01mc@292 451
mas01mc@292 452 char* database = index_get_name(dbName, radius, sequenceLength);
mas01mc@292 453
mas01mc@292 454 if(query_from_key)
mas01mc@292 455 set_up_query_from_key(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors, queryIndex);
mas01mc@292 456 else
mas01mc@292 457 set_up_query(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors); // get query vectors
mas01mc@292 458
mas01mc@292 459 VERB_LOG(1, "retrieving tracks...");
mas01mc@292 460
mas01mc@292 461 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01mc@292 462 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01mc@292 463
mas01mc@292 464 gettimeofday(&tv1, NULL);
mas01mc@292 465 // query vector index
mas01mc@292 466 Uns32T Nq = (numVectors>O2_MAXTRACKLEN?O2_MAXTRACKLEN:numVectors) - sequenceLength + 1;
mas01mc@292 467 vv = index_initialize_shingles(Nq); // allocate memory to copy query vectors to shingles
mas01mc@292 468 cout << "Nq=" << Nq; cout.flush();
mas01mc@292 469 // Construct shingles from query features
mas01mc@292 470 for( Uns32T pointID = 0 ; pointID < Nq ; pointID++ )
mas01mc@292 471 index_make_shingle(vv, pointID, query, dbH->dim, sequenceLength);
mas01mc@292 472
mas01mc@292 473 // Normalize query vectors
mas01mc@292 474 Uns32T numVecsAboveThreshold = index_norm_shingles( vv, qnPtr, qpPtr );
mas01mc@292 475 cout << " Nq'=" << numVecsAboveThreshold << endl; cout.flush();
mas01mc@292 476
mas01mc@292 477 // Nq contains number of inspected points in query file,
mas01mc@292 478 // numVecsAboveThreshold is number of points with power >= absolute_threshold
mas01mc@292 479 double* qpp = qpPtr; // Keep original qpPtr for possible exact evaluation
mas01mc@292 480 if(usingQueryPoint && numVecsAboveThreshold){
mas01mc@292 481 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core)
mas01mc@292 482 lsh->retrieve_point((*vv)[0], queryPoint, add_point_func, (void*)this);
mas01mc@292 483 else
mas01mc@292 484 lsh->serial_retrieve_point(database, (*vv)[0], queryPoint, add_point_func, (void*)this);
mas01mc@292 485 }
mas01mc@292 486 else if(numVecsAboveThreshold)
mas01mc@292 487 for( Uns32T pointID = 0 ; pointID < Nq; pointID++ )
mas01mc@292 488 if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold)))
mas01mc@292 489 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core)
mas01mc@292 490 lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, (void*)this);
mas01mc@292 491 else
mas01mc@292 492 lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, (void*)this);
mas01mc@292 493
mas01mc@292 494 if(lsh_exact)
mas01mc@292 495 // Perform exact distance computation on point pairs in exact_evaluation_queue
mas01mc@292 496 query_loop_points(query, qnPtr, qpPtr, meanQdur, numVectors);
mas01mc@292 497
mas01mc@292 498 gettimeofday(&tv2,NULL);
mas01mc@292 499 VERB_LOG(1,"elapsed time: %ld msec\n",
mas01mc@292 500 (tv2.tv_sec*1000 + tv2.tv_usec/1000) -
mas01mc@292 501 (tv1.tv_sec*1000 + tv1.tv_usec/1000))
mas01mc@292 502
mas01mc@292 503 // Close the index file
mas01mc@292 504 close(lshfid);
mas01mc@292 505
mas01mc@292 506 // Clean up
mas01mc@292 507 if(query_data)
mas01mc@292 508 delete[] query_data;
mas01mc@292 509 if(qNorm)
mas01mc@292 510 delete[] qNorm;
mas01mc@292 511 if(qPower)
mas01mc@292 512 delete[] qPower;
mas01mc@292 513 if(database)
mas01mc@292 514 delete[] database;
mas01mc@292 515
mas01mc@292 516 return Nq;
mas01mc@292 517 }
mas01mc@292 518