annotate index.cpp @ 369:6564be3109c5 gcc-4.3-cleanups

gcc-4.3 warning cleanups for lshlib.cpp (I do not believe that any of these changes contain significant copyrightable "intellectual property". However, to the extent that they do, the changes are hereby released into the Public Domain, and may be therefore be used by anyone for any purpose without need for consideration of any kind.)
author mas01cr
date Wed, 12 Nov 2008 15:23:32 +0000
parents 6ff688bac7b7
children ef4792df8f93 dc099cd34b5b
rev   line source
mas01mc@292 1 // LSH indexing
mas01mc@292 2 //
mas01mc@292 3 // Construct a persistent LSH table structure
mas01mc@292 4 // Store at the same location as dbName
mas01mc@292 5 // Naming convention:
mas01mc@292 6 // dbName.lsh.${radius}.${sequenceLength}
mas01mc@292 7 //
mas01mc@292 8 //
mas01mc@292 9 // Author: Michael Casey
mas01mc@292 10 // Date: 23 June 2008
mas01mc@324 11 //
mas01mc@324 12 // 19th August 2008 - added O2_FLAG_LARGE_ADB support
mas01mc@292 13
mas01mc@292 14 #include "audioDB.h"
mas01mc@292 15 #include "ReporterBase.h"
mas01mc@292 16
mas01mc@292 17
mas01mc@292 18 /************************* LSH point index to audioDB conversion *****************/
mas01mc@324 19 Uns32T audioDB::index_to_trackID(Uns32T lshID, Uns32T nPntBits){
mas01mc@324 20 assert(nPntBits);
mas01mc@324 21 return lshID>>nPntBits;
mas01mc@292 22 }
mas01mc@292 23
mas01mc@324 24 Uns32T audioDB::index_to_trackPos(Uns32T lshID, Uns32T nPntBits){
mas01mc@324 25 assert(nPntBits);
mas01mc@324 26 return lshID&((1<<nPntBits)-1);
mas01mc@292 27 }
mas01mc@292 28
mas01mc@324 29 Uns32T audioDB::index_from_trackInfo(Uns32T trackID, Uns32T spos, Uns32T nPntBits){
mas01mc@324 30 assert(nPntBits);
mas01mc@324 31 return (trackID << nPntBits) | spos;
mas01mc@292 32 }
mas01mc@292 33
mas01mc@292 34 /************************* LSH indexing and query initialization *****************/
mas01mc@292 35
mas01mc@292 36 char* audioDB::index_get_name(const char*dbName, double radius, Uns32T sequenceLength){
mas01mc@292 37 char* indexName = new char[MAXSTR];
mas01mc@292 38 // Attempt to make new file
mas01mc@292 39 if(strlen(dbName) > (MAXSTR - 32))
mas01mc@292 40 error("dbName is too long for LSH index filename appendages");
mas01mc@292 41 strncpy(indexName, dbName, MAXSTR);
mas01mc@292 42 sprintf(indexName+strlen(dbName), ".lsh.%019.9f.%d", radius, sequenceLength);
mas01mc@292 43 return indexName;
mas01mc@292 44 }
mas01mc@292 45
mas01mc@292 46 // return true if index exists else return false
mas01mc@292 47 int audioDB::index_exists(const char* dbName, double radius, Uns32T sequenceLength){
mas01mc@292 48 // Test to see if file exists
mas01mc@292 49 char* indexName = index_get_name(dbName, radius, sequenceLength);
mas01mc@292 50 lshfid = open (indexName, O_RDONLY);
mas01mc@292 51 delete[] indexName;
mas01mc@292 52 close(lshfid);
mas01mc@292 53
mas01mc@292 54 if(lshfid<0)
mas01mc@292 55 return false;
mas01mc@292 56 else
mas01mc@292 57 return true;
mas01mc@292 58 }
mas01mc@292 59
mas01mc@324 60 // If we are a server and have a memory-resident index, check the indexName against the resident index (using get_indexName())
mas01mc@324 61 // If they match, i.e. path+dbName_resident == path+dbName_requested, use
mas01mc@324 62 // the memory-resident index.
mas01mc@324 63 // Else allocate a new LSH instance and load the index from disk
mas01mc@308 64 LSH* audioDB::index_allocate(char* indexName, bool load_hashTables){
mas01mc@308 65 LSH* gIndx=SERVER_LSH_INDEX_SINGLETON;
mas01mc@308 66 if(isServer && gIndx && (strncmp(gIndx->get_indexName(), indexName, MAXSTR)==0) )
mas01mc@308 67 audioDB::lsh = gIndx; // Use the global SERVER resident index
mas01mc@308 68 else{
mas01mc@308 69 if(audioDB::lsh)
mas01mc@308 70 delete audioDB::lsh;
mas01mc@308 71 audioDB::lsh = new LSH(indexName, load_hashTables);
mas01mc@308 72 }
mas01mc@308 73 assert(audioDB::lsh);
mas01mc@308 74 return audioDB::lsh;
mas01mc@308 75 }
mas01mc@308 76
mas01mc@292 77 vector<vector<float> >* audioDB::index_initialize_shingles(Uns32T sz){
mas01mc@292 78 if(vv)
mas01mc@292 79 delete vv;
mas01mc@292 80 vv = new vector<vector<float> >(sz);
mas01mc@292 81 for(Uns32T i=0 ; i < sz ; i++)
mas01mc@292 82 (*vv)[i]=vector<float>(dbH->dim*sequenceLength); // allocate shingle storage
mas01mc@292 83 return vv;
mas01mc@292 84 }
mas01mc@292 85
mas01mc@292 86 /******************** LSH indexing audioDB database access forall s \in {S} ***********************/
mas01mc@292 87
mas01mc@292 88 // Prepare the AudioDB database for read access and allocate auxillary memory
mas01mc@292 89 void audioDB::index_initialize(double **snp, double **vsnp, double **spp, double **vspp, Uns32T *dvp) {
mas01mc@324 90 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01mc@324 91 error("INDEXed database must be power-enabled", dbName);
mas01mc@324 92 }
mas01mc@324 93
mas01mc@325 94 double *snpp = 0, *sppp = 0;
mas01mc@324 95
mas01mc@292 96 *dvp = dbH->length / (dbH->dim * sizeof(double)); // number of database vectors
mas01mc@292 97 *snp = new double[*dvp]; // songs norm pointer: L2 norm table for each vector
mas01mc@325 98 snpp = *snp;
mas01mc@292 99 *spp = new double[*dvp]; // song powertable pointer
mas01mc@292 100 sppp = *spp;
mas01mc@325 101
mas01mc@324 102 memcpy(*snp, l2normTable, *dvp * sizeof(double));
mas01mc@292 103 memcpy(*spp, powerTable, *dvp * sizeof(double));
mas01mc@324 104
mas01mc@324 105
mas01mc@292 106 for(Uns32T i = 0; i < dbH->numFiles; i++){
mas01mc@292 107 if(trackTable[i] >= sequenceLength) {
mas01mc@292 108 sequence_sum(snpp, trackTable[i], sequenceLength);
mas01mc@292 109 sequence_sqrt(snpp, trackTable[i], sequenceLength);
mas01mc@292 110
mas01mc@292 111 sequence_sum(sppp, trackTable[i], sequenceLength);
mas01mc@292 112 sequence_average(sppp, trackTable[i], sequenceLength);
mas01mc@292 113 }
mas01mc@292 114 snpp += trackTable[i];
mas01mc@292 115 sppp += trackTable[i];
mas01mc@292 116 }
mas01mc@324 117
mas01mc@292 118 *vsnp = *snp;
mas01mc@292 119 *vspp = *spp;
mas01mc@324 120
mas01mc@292 121 // Move the feature vector read pointer to start of fetures in database
mas01mc@292 122 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01mc@292 123 }
mas01mc@292 124
mas01mc@292 125
mas01mc@292 126 /************************ LSH indexing ***********************************/
mas01mc@292 127 void audioDB::index_index_db(const char* dbName){
mas01mc@292 128 char* newIndexName;
mas01mc@292 129 double *fvp = 0, *sNorm = 0, *snPtr = 0, *sPower = 0, *spPtr = 0;
mas01mc@292 130 Uns32T dbVectors = 0;
mas01mc@292 131
mas01mc@324 132
mas01mc@292 133 printf("INDEX: initializing header\n");
mas01mc@292 134 // Check if audioDB exists, initialize header and open database for read
mas01mc@292 135 forWrite = false;
mas01mc@292 136 initDBHeader(dbName);
mas01mc@292 137
mas01mc@324 138 if(dbH->flags & O2_FLAG_POWER)
mas01mc@324 139 usingPower = true;
mas01mc@324 140
mas01mc@324 141 if(dbH->flags & O2_FLAG_TIMES)
mas01mc@324 142 usingTimes = true;
mas01mc@324 143
mas01mc@292 144 newIndexName = index_get_name(dbName, radius, sequenceLength);
mas01mc@292 145
mas01mc@292 146 // Set unit norming flag override
mas01mc@292 147 audioDB::normalizedDistance = !audioDB::no_unit_norming;
mas01mc@292 148
mas01mc@327 149 VERB_LOG(1, "INDEX: dim %d\n", (int)dbH->dim);
mas01mc@327 150 VERB_LOG(1, "INDEX: R %f\n", radius);
mas01mc@327 151 VERB_LOG(1, "INDEX: seqlen %d\n", sequenceLength);
mas01mc@327 152 VERB_LOG(1, "INDEX: lsh_w %f\n", lsh_param_w);
mas01mc@327 153 VERB_LOG(1, "INDEX: lsh_k %d\n", lsh_param_k);
mas01mc@327 154 VERB_LOG(1, "INDEX: lsh_m %d\n", lsh_param_m);
mas01mc@327 155 VERB_LOG(1, "INDEX: lsh_N %d\n", lsh_param_N);
mas01mc@327 156 VERB_LOG(1, "INDEX: lsh_C %d\n", lsh_param_ncols);
mas01mc@327 157 VERB_LOG(1, "INDEX: lsh_b %d\n", lsh_param_b);
mas01mc@327 158 VERB_LOG(1, "INDEX: normalized? %s\n", normalizedDistance?"true":"false");
mas01mc@292 159
mas01mc@292 160 if((lshfid = open(newIndexName,O_RDONLY))<0){
mas01mc@292 161 printf("INDEX: constructing new LSH index\n");
mas01mc@292 162 printf("INDEX: making index file %s\n", newIndexName);
mas01mc@292 163 fflush(stdout);
mas01mc@292 164 // Construct new LSH index
mas01mc@292 165 lsh = new LSH((float)lsh_param_w, lsh_param_k,
mas01mc@292 166 lsh_param_m,
mas01mc@292 167 (Uns32T)(sequenceLength*dbH->dim),
mas01mc@292 168 lsh_param_N,
mas01mc@292 169 lsh_param_ncols,
mas01mc@292 170 (float)radius);
mas01mc@292 171 assert(lsh);
mas01mc@292 172
mas01mc@292 173 Uns32T endTrack = lsh_param_b;
mas01mc@292 174 if( endTrack > dbH->numFiles)
mas01mc@292 175 endTrack = dbH->numFiles;
mas01mc@292 176 // Insert up to lsh_param_b tracks
mas01mc@324 177 if( ! (dbH->flags & O2_FLAG_LARGE_ADB) ){
mas01mc@324 178 index_initialize(&sNorm, &snPtr, &sPower, &spPtr, &dbVectors);
mas01mc@324 179 }
mas01mc@324 180 index_insert_tracks(0, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr);
mas01mc@292 181 lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1);
mas01mc@292 182
mas01mc@292 183 // Clean up
mas01mc@292 184 delete lsh;
mas01mc@308 185 lsh = 0;
mas01mc@292 186 close(lshfid);
mas01mc@292 187 }
mas01mc@292 188
mas01mc@292 189 // Attempt to open LSH file
mas01mc@292 190 if((lshfid = open(newIndexName,O_RDONLY))>0){
mas01mc@292 191 printf("INDEX: merging with existing LSH index\n");
mas01mc@292 192 fflush(stdout);
mas01mc@340 193 char* mergeIndexName = newIndexName;
mas01mc@292 194
mas01mc@292 195 // Get the lsh header info and find how many tracks are inserted already
mas01mc@340 196 lsh = new LSH(mergeIndexName, false); // lshInCore=false to avoid loading hashTables here
mas01mc@292 197 assert(lsh);
mas01mc@324 198 Uns32T maxs = index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1;
mas01mc@292 199 delete lsh;
mas01mc@308 200 lsh = 0;
mas01mc@292 201
mas01mc@340 202 // Insert up to lsh_param_b tracks
mas01mc@340 203 if( !sNorm && !(dbH->flags & O2_FLAG_LARGE_ADB) ){
mas01mc@340 204 index_initialize(&sNorm, &snPtr, &sPower, &spPtr, &dbVectors);
mas01mc@340 205 }
mas01mc@292 206 // This allows for updating index after more tracks are inserted into audioDB
mas01mc@292 207 for(Uns32T startTrack = maxs; startTrack < dbH->numFiles; startTrack+=lsh_param_b){
mas01mc@292 208
mas01mc@292 209 Uns32T endTrack = startTrack + lsh_param_b;
mas01mc@292 210 if( endTrack > dbH->numFiles)
mas01mc@292 211 endTrack = dbH->numFiles;
mas01mc@292 212 printf("Indexing track range: %d - %d\n", startTrack, endTrack);
mas01mc@292 213 fflush(stdout);
mas01mc@340 214 lsh = new LSH(mergeIndexName, false); // Initialize empty LSH tables
mas01mc@292 215 assert(lsh);
mas01mc@292 216
mas01mc@292 217 // Insert up to lsh_param_b database tracks
mas01mc@292 218 index_insert_tracks(startTrack, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr);
mas01mc@292 219
mas01mc@340 220 // Serialize to file (merging is performed here)
mas01mc@340 221 lsh->serialize(mergeIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); // Serialize core LSH heap to disk
mas01mc@292 222 delete lsh;
mas01mc@308 223 lsh = 0;
mas01mc@340 224 }
mas01mc@292 225
mas01mc@292 226 close(lshfid);
mas01mc@292 227 printf("INDEX: done constructing LSH index.\n");
mas01mc@292 228 fflush(stdout);
mas01mc@292 229
mas01mc@292 230 }
mas01mc@292 231 else{
mas01mc@292 232 error("Something's wrong with LSH index file");
mas01mc@292 233 exit(1);
mas01mc@292 234 }
mas01mc@292 235
mas01mc@324 236 delete[] newIndexName;
mas01mc@324 237 delete[] sNorm;
mas01mc@324 238 delete[] sPower;
mas01mc@324 239 }
mas01mc@292 240
mas01mc@292 241
mas01mc@324 242 // initialize auxillary track data from filesystem
mas01mc@324 243 // pre-conditions:
mas01mc@324 244 // dbH->flags & O2_FLAG_LARGE_ADB
mas01mc@324 245 // feature data allocated and copied (fvp)
mas01mc@324 246 //
mas01mc@324 247 // post-conditions:
mas01mc@324 248 // allocated power data
mas01mc@324 249 // allocated l2norm data
mas01mc@324 250 //
mas01mc@324 251 void audioDB::init_track_aux_data(Uns32T trackID, double* fvp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp){
mas01mc@324 252 if( !(dbH->flags & O2_FLAG_LARGE_ADB) )
mas01mc@324 253 error("error: init_track_large_adb required O2_FLAG_LARGE_ADB");
mas01mc@292 254
mas01mc@324 255 // Allocate and read the power sequence
mas01mc@324 256 if(trackTable[trackID]>=sequenceLength){
mas01mc@324 257
mas01mc@324 258 char* prefixedString = new char[O2_MAXFILESTR];
mas01mc@324 259 char* tmpStr = prefixedString;
mas01mc@324 260 // Open and check dimensions of power file
mas01mc@324 261 strncpy(prefixedString, powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
mas01mc@324 262 prefix_name((char ** const)&prefixedString, adb_feature_root);
mas01mc@324 263 if(prefixedString!=tmpStr)
mas01mc@324 264 delete[] tmpStr;
mas01mc@324 265 powerfd = open(prefixedString, O_RDONLY);
mas01mc@324 266 if (powerfd < 0) {
mas01mc@324 267 error("failed to open power file", prefixedString);
mas01mc@324 268 }
mas01mc@324 269 if (fstat(powerfd, &statbuf) < 0) {
mas01mc@324 270 error("fstat error finding size of power file", prefixedString, "fstat");
mas01mc@324 271 }
mas01mc@324 272
mas01mc@324 273 if( (statbuf.st_size - sizeof(int)) / (sizeof(double)) != trackTable[trackID] )
mas01mc@324 274 error("Dimension mismatch: numPowers != numVectors", prefixedString);
mas01mc@324 275
mas01mc@324 276 *sPowerp = new double[trackTable[trackID]]; // Allocate memory for power values
mas01mc@324 277 assert(*sPowerp);
mas01mc@324 278 *spPtrp = *sPowerp;
mas01mc@324 279 insertPowerData(trackTable[trackID], powerfd, *sPowerp);
mas01mc@324 280 if (0 < powerfd) {
mas01mc@324 281 close(powerfd);
mas01mc@324 282 }
mas01mc@324 283
mas01mc@324 284 sequence_sum(*sPowerp, trackTable[trackID], sequenceLength);
mas01mc@324 285 sequence_average(*sPowerp, trackTable[trackID], sequenceLength);
mas01mc@324 286 powerTable = 0;
mas01mc@324 287
mas01mc@324 288 // Allocate and calculate the l2norm sequence
mas01mc@324 289 *sNormpp = new double[trackTable[trackID]];
mas01mc@324 290 assert(*sNormpp);
mas01mc@324 291 *snPtrp = *sNormpp;
mas01mc@324 292 unitNorm(fvp, dbH->dim, trackTable[trackID], *sNormpp);
mas01mc@324 293 sequence_sum(*sNormpp, trackTable[trackID], sequenceLength);
mas01mc@324 294 sequence_sqrt(*sNormpp, trackTable[trackID], sequenceLength);
mas01mc@324 295 }
mas01mc@292 296 }
mas01mc@292 297
mas01mc@292 298 void audioDB::index_insert_tracks(Uns32T start_track, Uns32T end_track,
mas01mc@292 299 double** fvpp, double** sNormpp,double** snPtrp,
mas01mc@292 300 double** sPowerp, double** spPtrp){
mas01mc@292 301 size_t nfv = 0;
mas01mc@292 302 double* fvp = 0; // Keep pointer for memory allocation and free() for track data
mas01mc@292 303 Uns32T trackID = 0;
mas01mc@292 304
mas01mc@292 305 VERB_LOG(1, "indexing tracks...");
mas01mc@292 306
mas01mc@324 307 int trackfd = dbfid;
mas01mc@292 308 for(trackID = start_track ; trackID < end_track ; trackID++ ){
mas01mc@324 309 if( dbH->flags & O2_FLAG_LARGE_ADB ){
mas01mc@324 310 char* prefixedString = new char[O2_MAXFILESTR];
mas01mc@324 311 char* tmpStr = prefixedString;
mas01mc@324 312 // Open and check dimensions of feature file
mas01mc@324 313 strncpy(prefixedString, featureFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
mas01mc@324 314 prefix_name((char ** const) &prefixedString, adb_feature_root);
mas01mc@324 315 if(prefixedString!=tmpStr)
mas01mc@324 316 delete[] tmpStr;
mas01mc@324 317 initInputFile(prefixedString, false); // nommap, file pointer at correct position
mas01mc@324 318 trackfd = infid;
mas01mc@324 319 }
mas01mc@324 320 read_data(trackfd, trackID, &fvp, &nfv); // over-writes fvp and nfv
mas01mc@292 321 *fvpp = fvp; // Protect memory allocation and free() for track data
mas01mc@324 322
mas01mc@324 323 if( dbH->flags & O2_FLAG_LARGE_ADB )
mas01mc@324 324 // Load power and calculate power and l2norm sequence sums
mas01mc@324 325 init_track_aux_data(trackID, fvp, sNormpp, snPtrp, sPowerp, spPtrp);
mas01mc@324 326
mas01mc@292 327 if(!index_insert_track(trackID, fvpp, snPtrp, spPtrp))
mas01mc@292 328 break;
mas01mc@324 329 if ( dbH->flags & O2_FLAG_LARGE_ADB ){
mas01mc@324 330 close(infid);
mas01mc@324 331 delete[] *sNormpp;
mas01mc@324 332 delete[] *sPowerp;
mas01mc@324 333 *sNormpp = *sPowerp = *snPtrp = *snPtrp = 0;
mas01mc@324 334 }
mas01mc@324 335 } // end for(trackID = start_track ; ... )
mas01mc@292 336 std::cout << "finished inserting." << endl;
mas01mc@292 337 }
mas01mc@292 338
mas01mc@292 339 int audioDB::index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp){
mas01mc@292 340 // Loop over the current input track's vectors
mas01cr@305 341 Uns32T numVecs = 0;
mas01cr@305 342 if (trackTable[trackID] > O2_MAXTRACKLEN) {
mas01cr@305 343 if (O2_MAXTRACKLEN < sequenceLength - 1) {
mas01cr@305 344 numVecs = 0;
mas01cr@305 345 } else {
mas01cr@305 346 numVecs = O2_MAXTRACKLEN - sequenceLength + 1;
mas01cr@305 347 }
mas01cr@305 348 } else {
mas01cr@305 349 if (trackTable[trackID] < sequenceLength - 1) {
mas01cr@305 350 numVecs = 0;
mas01cr@305 351 } else {
mas01cr@305 352 numVecs = trackTable[trackID] - sequenceLength + 1;
mas01cr@305 353 }
mas01cr@305 354 }
mas01mc@292 355
mas01mc@324 356 Uns32T numVecsAboveThreshold = 0, collisionCount = 0;
mas01mc@324 357 if(numVecs){
mas01mc@324 358 vv = index_initialize_shingles(numVecs);
mas01mc@324 359
mas01mc@324 360 for( Uns32T pointID = 0 ; pointID < numVecs; pointID++ )
mas01mc@324 361 index_make_shingle(vv, pointID, *fvpp, dbH->dim, sequenceLength);
mas01mc@324 362
mas01mc@324 363 numVecsAboveThreshold = index_norm_shingles(vv, *snpp, *sppp);
mas01mc@324 364 collisionCount = index_insert_shingles(vv, trackID, *sppp);
mas01mc@324 365 }
mas01mc@292 366 float meanCollisionCount = numVecsAboveThreshold?(float)collisionCount/numVecsAboveThreshold:0;
mas01mc@292 367
mas01mc@292 368 /* index_norm_shingles() only goes as far as the end of the
mas01mc@292 369 sequence, which is right, but the space allocated is for the
mas01mc@292 370 whole track. */
mas01mc@292 371
mas01mc@292 372 /* But numVecs will be <trackTable[track] if trackTable[track]>O2_MAXTRACKLEN
mas01mc@292 373 * So let's be certain the pointers are in the correct place
mas01mc@292 374 */
mas01mc@292 375
mas01mc@324 376 if( !(dbH->flags & O2_FLAG_LARGE_ADB) ){
mas01mc@324 377 *snpp += trackTable[trackID];
mas01mc@324 378 *sppp += trackTable[trackID];
mas01mc@324 379 *fvpp += trackTable[trackID] * dbH->dim;
mas01mc@324 380 }
mas01mc@292 381
mas01mc@292 382 std::cout << " n=" << trackTable[trackID] << " n'=" << numVecsAboveThreshold << " E[#c]=" << lsh->get_mean_collision_rate() << " E[#p]=" << meanCollisionCount << endl;
mas01mc@292 383 std::cout.flush();
mas01mc@292 384 return true;
mas01mc@292 385 }
mas01mc@292 386
mas01mc@292 387 Uns32T audioDB::index_insert_shingles(vector<vector<float> >* vv, Uns32T trackID, double* spp){
mas01mc@292 388 Uns32T collisionCount = 0;
mas01mc@292 389 cout << "[" << trackID << "]" << fileTable+trackID*O2_FILETABLE_ENTRY_SIZE;
mas01mc@324 390 for( Uns32T pointID=0 ; pointID < (*vv).size(); pointID+=sequenceHop){
mas01mc@324 391 if(!use_absolute_threshold || (use_absolute_threshold && (*spp >= absolute_threshold)))
mas01mc@324 392 collisionCount += lsh->insert_point((*vv)[pointID], index_from_trackInfo(trackID, pointID, lsh_n_point_bits));
mas01mc@324 393 spp+=sequenceHop;
mas01mc@311 394 }
mas01mc@292 395 return collisionCount;
mas01mc@292 396 }
mas01mc@292 397
mas01mc@292 398 /********************* LSH shingle construction ***************************/
mas01mc@292 399
mas01mc@292 400 // Construct shingles out of a feature matrix
mas01mc@292 401 // inputs:
mas01mc@292 402 // idx is vector index in feature matrix
mas01mc@292 403 // fvp is base feature matrix pointer double* [numVecs x dbH->dim]
mas01mc@292 404 //
mas01mc@292 405 // pre-conditions:
mas01mc@292 406 // dbH->dim
mas01mc@292 407 // sequenceLength
mas01mc@292 408 // idx < numVectors - sequenceLength + 1
mas01mc@292 409 //
mas01mc@292 410 // post-conditions:
mas01mc@292 411 // (*vv)[idx] contains a shingle with dbH->dim*sequenceLength float values
mas01mc@292 412
mas01mc@292 413 void audioDB::index_make_shingle(vector<vector<float> >* vv, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen){
mas01mc@292 414 assert(idx<(*vv).size());
mas01mc@292 415 vector<float>::iterator ve = (*vv)[idx].end();
mas01mc@292 416 vi=(*vv)[idx].begin(); // shingle iterator
mas01mc@292 417 // First feature vector in shingle
mas01mc@292 418 if(idx==0){
mas01mc@292 419 while(vi!=ve)
mas01mc@292 420 *vi++ = (float)(*fvp++);
mas01mc@292 421 }
mas01mc@292 422 // Not first feature vector in shingle
mas01mc@292 423 else{
mas01mc@292 424 vector<float>::iterator ui=(*vv)[idx-1].begin() + dim; // previous shingle iterator
mas01mc@292 425 // Previous seqLen-1 dim-vectors
mas01mc@292 426 while(vi!=ve-dim)
mas01mc@292 427 *vi++=*ui++;
mas01mc@292 428 // Move data pointer to next feature vector
mas01mc@292 429 fvp += ( seqLen + idx - 1 ) * dim ;
mas01mc@292 430 // New d-vector
mas01mc@292 431 while(vi!=ve)
mas01mc@292 432 *vi++ = (float)(*fvp++);
mas01mc@292 433 }
mas01mc@292 434 }
mas01mc@292 435
mas01mc@292 436 // norm shingles
mas01mc@292 437 // in-place norming, no deletions
mas01mc@292 438 // If using power, return number of shingles above power threshold
mas01mc@292 439 int audioDB::index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp){
mas01mc@292 440 int z = 0; // number of above-threshold shingles
mas01mc@292 441 float l2norm;
mas01mc@292 442 double power;
mas01mc@292 443 float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2
mas01mc@292 444 float oneOverSqrtl2NormDivRad = oneOverRadius;
mas01mc@292 445 if(!spp)
mas01mc@292 446 error("LSH indexing and query requires a power feature using -w or -W");
mas01mc@292 447 Uns32T shingleSize = sequenceLength*dbH->dim;
mas01mc@292 448 for(Uns32T a=0; a<(*vv).size(); a++){
mas01mc@292 449 l2norm = (float)(*snp++);
mas01mc@292 450 if(audioDB::normalizedDistance)
mas01mc@292 451 oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius;
mas01mc@292 452
mas01mc@292 453 for(Uns32T b=0; b < shingleSize ; b++)
mas01mc@292 454 (*vv)[a][b]*=oneOverSqrtl2NormDivRad;
mas01mc@292 455
mas01mc@292 456 power = *spp++;
mas01mc@292 457 if(use_absolute_threshold){
mas01mc@292 458 if ( power >= absolute_threshold )
mas01mc@292 459 z++;
mas01mc@292 460 }
mas01mc@292 461 else
mas01mc@292 462 z++;
mas01mc@292 463 }
mas01mc@292 464 return z;
mas01mc@292 465 }
mas01mc@292 466
mas01mc@292 467
mas01mc@292 468 /*********************** LSH retrieval ****************************/
mas01mc@292 469
mas01mc@292 470
mas01mc@292 471 // return true if indexed query performed else return false
mas01mc@292 472 int audioDB::index_init_query(const char* dbName){
mas01mc@292 473
mas01mc@292 474 if(!(index_exists(dbName, radius, sequenceLength)))
mas01mc@292 475 return false;
mas01mc@292 476
mas01mc@292 477 char* indexName = index_get_name(dbName, radius, sequenceLength);
mas01mc@292 478
mas01mc@292 479 // Test to see if file exists
mas01mc@292 480 if((lshfid = open (indexName, O_RDONLY)) < 0){
mas01mc@292 481 delete[] indexName;
mas01mc@292 482 return false;
mas01mc@292 483 }
mas01mc@292 484
mas01mc@308 485 lsh = index_allocate(indexName, false); // Get the header only here
mas01mc@292 486 sequenceLength = lsh->get_lshHeader()->dataDim / dbH->dim; // shingleDim / vectorDim
mas01mc@292 487
mas01mc@311 488 if(lsh!=SERVER_LSH_INDEX_SINGLETON){
mas01mc@308 489 if( fabs(radius - lsh->get_radius())>fabs(O2_DISTANCE_TOLERANCE))
mas01mc@308 490 printf("*** Warning: adb_radius (%f) != lsh_radius (%f) ***\n", radius, lsh->get_radius());
mas01mc@327 491 VERB_LOG(1,"INDEX: dim %d\n", (int)dbH->dim);
mas01mc@327 492 VERB_LOG(1,"INDEX: R %f\n", lsh->get_radius());
mas01mc@327 493 VERB_LOG(1,"INDEX: seqlen %d\n", sequenceLength);
mas01mc@327 494 VERB_LOG(1,"INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth());
mas01mc@327 495 VERB_LOG(1,"INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns());
mas01mc@327 496 VERB_LOG(1,"INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables());
mas01mc@327 497 VERB_LOG(1,"INDEX: N %d\n", lsh->get_lshHeader()->get_numRows());
mas01mc@327 498 VERB_LOG(1,"INDEX: s %d\n", index_to_trackID(lsh->get_maxp(), lsh_n_point_bits));
mas01mc@327 499 VERB_LOG(1,"INDEX: Opened LSH index file %s\n", indexName);
mas01mc@308 500 }
mas01mc@292 501
mas01mc@292 502 // Check to see if we are loading hash tables into core, and do so if true
mas01mc@292 503 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core){
mas01mc@308 504 if(SERVER_LSH_INDEX_SINGLETON)
mas01mc@308 505 fprintf(stderr,"INDEX: using persistent hash tables: %s\n", lsh->get_indexName());
mas01mc@308 506 else
mas01mc@327 507 VERB_LOG(1,"INDEX: loading hash tables into core %s\n", (lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2)?"FORMAT2":"FORMAT1");
mas01mc@308 508 lsh = index_allocate(indexName, true);
mas01mc@292 509 }
mas01mc@292 510
mas01mc@292 511 delete[] indexName;
mas01mc@292 512 return true;
mas01mc@292 513 }
mas01mc@292 514
mas01mc@292 515 // *Static* approximate NN point reporter callback method for lshlib
mas01mc@292 516 void audioDB::index_add_point_approximate(void* instancePtr, Uns32T pointID, Uns32T qpos, float dist){
mas01mc@292 517 assert(instancePtr); // We need an instance for this callback
mas01mc@292 518 audioDB* myself = (audioDB*) instancePtr; // Use explicit cast to recover "this" instance
mas01mc@324 519 Uns32T trackID = index_to_trackID(pointID, myself->lsh_n_point_bits);
mas01mc@324 520 Uns32T spos = index_to_trackPos(pointID, myself->lsh_n_point_bits);
mas01mc@292 521 // Skip identity in query_from_key
mas01mc@292 522 if( !myself->query_from_key || (myself->query_from_key && ( trackID != myself->query_from_key_index )) )
mas01mc@292 523 myself->reporter->add_point(trackID, qpos, spos, dist);
mas01mc@292 524 }
mas01mc@292 525
mas01mc@292 526 // *Static* exact NN point reporter callback method for lshlib
mas01mc@292 527 // Maintain a queue of points to pass to query_points() for exact evaluation
mas01mc@292 528 void audioDB::index_add_point_exact(void* instancePtr, Uns32T pointID, Uns32T qpos, float dist){
mas01mc@292 529 assert(instancePtr); // We need an instance for this callback
mas01mc@292 530 audioDB* myself = (audioDB*) instancePtr; // Use explicit cast to recover "this" instance
mas01mc@324 531 Uns32T trackID = index_to_trackID(pointID, myself->lsh_n_point_bits);
mas01mc@324 532 Uns32T spos = index_to_trackPos(pointID, myself->lsh_n_point_bits);
mas01mc@292 533 // Skip identity in query_from_key
mas01mc@292 534 if( !myself->query_from_key || (myself->query_from_key && ( trackID != myself->query_from_key_index )) )
mas01mc@292 535 myself->index_insert_exact_evaluation_queue(trackID, qpos, spos);
mas01mc@292 536 }
mas01mc@292 537
mas01mc@292 538 void audioDB::initialize_exact_evalutation_queue(){
mas01mc@292 539 if(exact_evaluation_queue)
mas01mc@292 540 delete exact_evaluation_queue;
mas01mc@292 541 exact_evaluation_queue = new priority_queue<PointPair, std::vector<PointPair>, std::less<PointPair> >;
mas01mc@292 542 }
mas01mc@292 543
mas01mc@292 544 void audioDB::index_insert_exact_evaluation_queue(Uns32T trackID, Uns32T qpos, Uns32T spos){
mas01mc@292 545 PointPair p(trackID, qpos, spos);
mas01mc@292 546 exact_evaluation_queue->push(p);
mas01mc@292 547 }
mas01mc@292 548
mas01mc@292 549 // return 0: if index does not exist
mas01mc@292 550 // return nqv: if index exists
mas01mc@292 551 int audioDB::index_query_loop(const char* dbName, Uns32T queryIndex) {
mas01mc@292 552
mas01mc@324 553 unsigned int numVectors = 0;
mas01mc@324 554 double *query = 0, *query_data = 0;
mas01mc@324 555 double *qNorm = 0, *qnPtr = 0, *qPower = 0, *qpPtr = 0;
mas01mc@324 556 double meanQdur = 0;
mas01mc@292 557 void (*add_point_func)(void*,Uns32T,Uns32T,float);
mas01mc@292 558
mas01mc@292 559 // Set the point-reporter callback based on the value of lsh_exact
mas01mc@292 560 if(lsh_exact){
mas01mc@292 561 initialize_exact_evalutation_queue();
mas01mc@292 562 add_point_func = &index_add_point_exact;
mas01mc@292 563 }
mas01mc@292 564 else
mas01mc@292 565 add_point_func = &index_add_point_approximate;
mas01mc@292 566
mas01mc@292 567 if(!index_init_query(dbName)) // sets-up LSH index structures for querying
mas01mc@292 568 return 0;
mas01mc@292 569
mas01mc@292 570 char* database = index_get_name(dbName, radius, sequenceLength);
mas01mc@292 571
mas01mc@292 572 if(query_from_key)
mas01mc@292 573 set_up_query_from_key(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors, queryIndex);
mas01mc@292 574 else
mas01mc@292 575 set_up_query(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors); // get query vectors
mas01mc@292 576
mas01mc@292 577 VERB_LOG(1, "retrieving tracks...");
mas01mc@292 578
mas01mc@292 579 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01mc@292 580 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01mc@292 581
mas01mc@292 582 gettimeofday(&tv1, NULL);
mas01mc@292 583 // query vector index
mas01mc@292 584 Uns32T Nq = (numVectors>O2_MAXTRACKLEN?O2_MAXTRACKLEN:numVectors) - sequenceLength + 1;
mas01mc@292 585 vv = index_initialize_shingles(Nq); // allocate memory to copy query vectors to shingles
mas01mc@327 586 VERB_LOG(1, "Nq=%d", Nq);
mas01mc@292 587 // Construct shingles from query features
mas01mc@292 588 for( Uns32T pointID = 0 ; pointID < Nq ; pointID++ )
mas01mc@292 589 index_make_shingle(vv, pointID, query, dbH->dim, sequenceLength);
mas01mc@292 590
mas01mc@292 591 // Normalize query vectors
mas01mc@292 592 Uns32T numVecsAboveThreshold = index_norm_shingles( vv, qnPtr, qpPtr );
mas01mc@327 593 VERB_LOG(1, "Nq'=%d\n", numVecsAboveThreshold);
mas01mc@292 594
mas01mc@292 595 // Nq contains number of inspected points in query file,
mas01mc@292 596 // numVecsAboveThreshold is number of points with power >= absolute_threshold
mas01mc@292 597 double* qpp = qpPtr; // Keep original qpPtr for possible exact evaluation
mas01mc@292 598 if(usingQueryPoint && numVecsAboveThreshold){
mas01mc@292 599 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core)
mas01mc@292 600 lsh->retrieve_point((*vv)[0], queryPoint, add_point_func, (void*)this);
mas01mc@292 601 else
mas01mc@292 602 lsh->serial_retrieve_point(database, (*vv)[0], queryPoint, add_point_func, (void*)this);
mas01mc@292 603 }
mas01mc@292 604 else if(numVecsAboveThreshold)
mas01mc@292 605 for( Uns32T pointID = 0 ; pointID < Nq; pointID++ )
mas01cr@365 606 if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold))) {
mas01cr@365 607 if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) {
mas01mc@292 608 lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, (void*)this);
mas01cr@365 609 } else {
mas01mc@292 610 lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, (void*)this);
mas01cr@365 611 }
mas01cr@365 612 }
mas01mc@292 613
mas01mc@292 614 if(lsh_exact)
mas01mc@292 615 // Perform exact distance computation on point pairs in exact_evaluation_queue
mas01mc@292 616 query_loop_points(query, qnPtr, qpPtr, meanQdur, numVectors);
mas01mc@292 617
mas01mc@292 618 gettimeofday(&tv2,NULL);
mas01mc@292 619 VERB_LOG(1,"elapsed time: %ld msec\n",
mas01mc@292 620 (tv2.tv_sec*1000 + tv2.tv_usec/1000) -
mas01mc@292 621 (tv1.tv_sec*1000 + tv1.tv_usec/1000))
mas01mc@292 622
mas01mc@292 623 // Close the index file
mas01mc@292 624 close(lshfid);
mas01mc@292 625
mas01mc@292 626 // Clean up
mas01mc@292 627 if(query_data)
mas01mc@292 628 delete[] query_data;
mas01mc@292 629 if(qNorm)
mas01mc@292 630 delete[] qNorm;
mas01mc@292 631 if(qPower)
mas01mc@292 632 delete[] qPower;
mas01mc@292 633 if(database)
mas01mc@292 634 delete[] database;
mas01mc@292 635
mas01mc@292 636 return Nq;
mas01mc@292 637 }
mas01mc@292 638