annotate query.cpp @ 411:ad2206c24986 api-inversion

Fix a memory corruption bug. When allocating the adb_t in audiodb_open(), zero the memory; then we're not going to try to free() or delete some arbitrary uninitialized thing if the thing that we're opening turns out not to be an audiodb database.
author mas01cr
date Thu, 11 Dec 2008 08:54:06 +0000
parents ef4792df8f93
children a7d61291fbda
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@239 2 #include "reporter.h"
mas01cr@239 3
mas01cr@239 4 bool audioDB::powers_acceptable(double p1, double p2) {
mas01cr@239 5 if (use_absolute_threshold) {
mas01cr@239 6 if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) {
mas01cr@239 7 return false;
mas01cr@239 8 }
mas01cr@239 9 }
mas01cr@239 10 if (use_relative_threshold) {
mas01cr@239 11 if (fabs(p1-p2) > fabs(relative_threshold)) {
mas01cr@239 12 return false;
mas01cr@239 13 }
mas01cr@239 14 }
mas01cr@239 15 return true;
mas01cr@239 16 }
mas01cr@239 17
mas01cr@239 18 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
mas01mc@292 19 // init database tables and dbH first
mas01mc@292 20 if(query_from_key)
mas01mc@292 21 initTables(dbName);
mas01mc@292 22 else
mas01mc@292 23 initTables(dbName, inFile);
mas01mc@292 24
mas01mc@292 25 // keyKeyPos requires dbH to be initialized
mas01mc@292 26 if(query_from_key && (!key || (query_from_key_index = getKeyPos((char*)key))==O2_ERR_KEYNOTFOUND))
mas01mc@292 27 error("Query key not found :",key);
mas01mc@292 28
mas01cr@239 29 switch (queryType) {
mas01cr@239 30 case O2_POINT_QUERY:
mas01cr@239 31 sequenceLength = 1;
mas01cr@239 32 normalizedDistance = false;
mas01mc@292 33 reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN);
mas01cr@239 34 break;
mas01cr@239 35 case O2_TRACK_QUERY:
mas01cr@239 36 sequenceLength = 1;
mas01cr@239 37 normalizedDistance = false;
mas01mc@292 38 reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@239 39 break;
mas01mc@292 40 case O2_SEQUENCE_QUERY:
mas01mc@292 41 if(no_unit_norming)
mas01mc@292 42 normalizedDistance = false;
mas01cr@239 43 if(radius == 0) {
mas01mc@292 44 reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@239 45 } else {
mas01mc@292 46 if(index_exists(dbName, radius, sequenceLength)){
mas01mc@292 47 char* indexName = index_get_name(dbName, radius, sequenceLength);
mas01mc@308 48 lsh = index_allocate(indexName, false);
mas01mc@324 49 reporter = new trackSequenceQueryRadReporter(trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
mas01mc@292 50 delete[] indexName;
mas01mc@292 51 }
mas01mc@292 52 else
mas01mc@292 53 reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles);
mas01cr@239 54 }
mas01cr@239 55 break;
mas01mc@292 56 case O2_N_SEQUENCE_QUERY:
mas01mc@292 57 if(no_unit_norming)
mas01mc@292 58 normalizedDistance = false;
mas01mc@248 59 if(radius == 0) {
mas01mc@292 60 reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01mc@248 61 } else {
mas01mc@292 62 if(index_exists(dbName, radius, sequenceLength)){
mas01mc@292 63 char* indexName = index_get_name(dbName, radius, sequenceLength);
mas01mc@308 64 lsh = index_allocate(indexName, false);
mas01mc@324 65 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
mas01mc@292 66 delete[] indexName;
mas01mc@292 67 }
mas01mc@292 68 else
mas01mc@292 69 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles);
mas01mc@248 70 }
mas01mc@248 71 break;
mas01mc@263 72 case O2_ONE_TO_ONE_N_SEQUENCE_QUERY :
mas01mc@263 73 if(radius == 0) {
mas01mc@263 74 error("query-type not yet supported");
mas01mc@263 75 } else {
mas01mc@292 76 reporter = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, dbH->numFiles);
mas01mc@263 77 }
mas01mc@263 78 break;
mas01cr@239 79 default:
mas01cr@239 80 error("unrecognized queryType in query()");
mas01cr@239 81 }
mas01mc@292 82
mas01mc@292 83 // Test for index (again) here
mas01mc@329 84 if(radius && index_exists(dbName, radius, sequenceLength)){
mas01mc@329 85 VERB_LOG(1, "Calling indexed query on database %s, radius=%f, sequenceLength=%d\n", dbName, radius, sequenceLength);
mas01mc@292 86 index_query_loop(dbName, query_from_key_index);
mas01mc@329 87 }
mas01mc@329 88 else{
mas01mc@329 89 VERB_LOG(1, "Calling brute-force query on database %s\n", dbName);
mas01mc@292 90 query_loop(dbName, query_from_key_index);
mas01mc@329 91 }
mas01mc@292 92
mas01mc@292 93 reporter->report(fileTable, adbQueryResponse);
mas01cr@239 94 }
mas01cr@239 95
mas01cr@239 96 // return ordinal position of key in keyTable
mas01mc@292 97 // this should really be a STL hash map search
mas01cr@239 98 unsigned audioDB::getKeyPos(char* key){
mas01mc@292 99 if(!dbH)
mas01mc@292 100 error("dbH not initialized","getKeyPos");
mas01cr@239 101 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@256 102 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key))==0)
mas01cr@239 103 return k;
mas01cr@239 104 error("Key not found",key);
mas01cr@239 105 return O2_ERR_KEYNOTFOUND;
mas01cr@239 106 }
mas01cr@239 107
mas01cr@239 108 // This is a common pattern in sequence queries: what we are doing is
mas01cr@239 109 // taking a window of length seqlen over a buffer of length length,
mas01cr@239 110 // and placing the sum of the elements in that window in the first
mas01cr@239 111 // element of the window: thus replacing all but the last seqlen
mas01cr@239 112 // elements in the buffer with the corresponding windowed sum.
mas01cr@239 113 void audioDB::sequence_sum(double *buffer, int length, int seqlen) {
mas01cr@239 114 double tmp1, tmp2, *ps;
mas01cr@239 115 int j, w;
mas01cr@239 116
mas01cr@239 117 tmp1 = *buffer;
mas01cr@239 118 j = 1;
mas01cr@239 119 w = seqlen - 1;
mas01cr@239 120 while(w--) {
mas01cr@239 121 *buffer += buffer[j++];
mas01cr@239 122 }
mas01cr@239 123 ps = buffer + 1;
mas01cr@239 124 w = length - seqlen; // +1 - 1
mas01cr@239 125 while(w--) {
mas01cr@239 126 tmp2 = *ps;
mas01cr@239 127 if(isfinite(tmp1)) {
mas01cr@239 128 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1);
mas01cr@239 129 } else {
mas01cr@239 130 for(int i = 1; i < seqlen; i++) {
mas01cr@239 131 *ps += *(ps + i);
mas01cr@239 132 }
mas01cr@239 133 }
mas01cr@239 134 tmp1 = tmp2;
mas01cr@239 135 ps++;
mas01cr@239 136 }
mas01cr@239 137 }
mas01cr@239 138
mas01cr@239 139 // In contrast to sequence_sum() above, sequence_sqrt() and
mas01cr@239 140 // sequence_average() below are simple mappers across the sequence.
mas01cr@239 141 void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) {
mas01cr@239 142 int w = length - seqlen + 1;
mas01cr@239 143 while(w--) {
mas01cr@239 144 *buffer = sqrt(*buffer);
mas01cr@239 145 buffer++;
mas01cr@239 146 }
mas01cr@239 147 }
mas01cr@239 148
mas01cr@239 149 void audioDB::sequence_average(double *buffer, int length, int seqlen) {
mas01cr@239 150 int w = length - seqlen + 1;
mas01cr@239 151 while(w--) {
mas01cr@239 152 *buffer /= seqlen;
mas01cr@239 153 buffer++;
mas01cr@239 154 }
mas01cr@239 155 }
mas01cr@239 156
mas01cr@239 157 void audioDB::initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
mas01cr@239 158 unsigned int j, k, l, w;
mas01cr@239 159 double *dp, *qp, *sp;
mas01cr@239 160
mas01cr@239 161 const unsigned HOP_SIZE = sequenceHop;
mas01cr@239 162 const unsigned wL = sequenceLength;
mas01cr@239 163
mas01cr@239 164 for(j = 0; j < numVectors; j++) {
mas01cr@239 165 // Sum products matrix
mas01cr@239 166 D[j] = new double[trackTable[track]];
mas01cr@239 167 assert(D[j]);
mas01cr@239 168 // Matched filter matrix
mas01cr@239 169 DD[j]=new double[trackTable[track]];
mas01cr@239 170 assert(DD[j]);
mas01cr@239 171 }
mas01cr@239 172
mas01cr@239 173 // Dot product
mas01cr@239 174 for(j = 0; j < numVectors; j++)
mas01cr@239 175 for(k = 0; k < trackTable[track]; k++){
mas01cr@239 176 qp = query + j * dbH->dim;
mas01cr@239 177 sp = data_buffer + k * dbH->dim;
mas01cr@239 178 DD[j][k] = 0.0; // Initialize matched filter array
mas01cr@239 179 dp = &D[j][k]; // point to correlation cell j,k
mas01cr@239 180 *dp = 0.0; // initialize correlation cell
mas01cr@239 181 l = dbH->dim; // size of vectors
mas01cr@239 182 while(l--)
mas01cr@239 183 *dp += *qp++ * *sp++;
mas01cr@239 184 }
mas01cr@239 185
mas01cr@239 186 // Matched Filter
mas01cr@239 187 // HOP SIZE == 1
mas01cr@239 188 double* spd;
mas01cr@239 189 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop
mas01cr@239 190 for(w = 0; w < wL; w++) {
mas01cr@239 191 for(j = 0; j < numVectors - w; j++) {
mas01cr@239 192 sp = DD[j];
mas01cr@239 193 spd = D[j+w] + w;
mas01cr@239 194 k = trackTable[track] - w;
mas01mc@292 195 while(k--)
mas01mc@292 196 *sp++ += *spd++;
mas01cr@239 197 }
mas01cr@239 198 }
mas01cr@239 199 } else { // HOP_SIZE != 1
mas01cr@239 200 for(w = 0; w < wL; w++) {
mas01cr@239 201 for(j = 0; j < numVectors - w; j += HOP_SIZE) {
mas01cr@239 202 sp = DD[j];
mas01cr@239 203 spd = D[j+w]+w;
mas01cr@239 204 for(k = 0; k < trackTable[track] - w; k += HOP_SIZE) {
mas01cr@239 205 *sp += *spd;
mas01cr@239 206 sp += HOP_SIZE;
mas01cr@239 207 spd += HOP_SIZE;
mas01cr@239 208 }
mas01cr@239 209 }
mas01cr@239 210 }
mas01cr@239 211 }
mas01cr@239 212 }
mas01cr@239 213
mas01cr@239 214 void audioDB::delete_arrays(int track, unsigned int numVectors, double **D, double **DD) {
mas01cr@239 215 if(D != NULL) {
mas01cr@239 216 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 217 delete[] D[j];
mas01cr@239 218 }
mas01cr@239 219 }
mas01cr@239 220 if(DD != NULL) {
mas01cr@239 221 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 222 delete[] DD[j];
mas01cr@239 223 }
mas01cr@239 224 }
mas01cr@239 225 }
mas01cr@239 226
mas01mc@324 227 void audioDB::read_data(int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) {
mas01cr@239 228 if (trackTable[track] * sizeof(double) * dbH->dim > *data_buffer_size_p) {
mas01cr@239 229 if(*data_buffer_p) {
mas01cr@239 230 free(*data_buffer_p);
mas01cr@239 231 }
mas01cr@239 232 {
mas01cr@239 233 *data_buffer_size_p = trackTable[track] * sizeof(double) * dbH->dim;
mas01cr@239 234 void *tmp = malloc(*data_buffer_size_p);
mas01cr@239 235 if (tmp == NULL) {
mas01cr@239 236 error("error allocating data buffer");
mas01cr@239 237 }
mas01cr@239 238 *data_buffer_p = (double *) tmp;
mas01cr@239 239 }
mas01cr@239 240 }
mas01cr@239 241
mas01cr@370 242 CHECKED_READ(trkfid, *data_buffer_p, trackTable[track] * sizeof(double) * dbH->dim);
mas01cr@239 243 }
mas01cr@239 244
mas01cr@405 245 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
mas01cr@405 246 assert(usingTimes);
mas01cr@405 247
mas01cr@405 248 unsigned numtimes = 0;
mas01cr@405 249
mas01cr@405 250 if(!timesFile->is_open()) {
mas01cr@405 251 error("problem opening times file on timestamped database", timesFileName);
mas01cr@405 252 }
mas01cr@405 253
mas01cr@405 254 double timepoint, next;
mas01cr@405 255 *timesFile >> timepoint;
mas01cr@405 256 if (timesFile->eof()) {
mas01cr@405 257 error("no entries in times file", timesFileName);
mas01cr@405 258 }
mas01cr@405 259 numtimes++;
mas01cr@405 260 do {
mas01cr@405 261 *timesFile >> next;
mas01cr@405 262 if (timesFile->eof()) {
mas01cr@405 263 break;
mas01cr@405 264 }
mas01cr@405 265 numtimes++;
mas01cr@405 266 timesdata[0] = timepoint;
mas01cr@405 267 timepoint = (timesdata[1] = next);
mas01cr@405 268 timesdata += 2;
mas01cr@405 269 } while (numtimes < numVectors + 1);
mas01cr@405 270
mas01cr@405 271 if (numtimes < numVectors + 1) {
mas01cr@405 272 error("too few timepoints in times file", timesFileName);
mas01cr@405 273 }
mas01cr@405 274
mas01cr@405 275 *timesFile >> next;
mas01cr@405 276 if (!timesFile->eof()) {
mas01cr@405 277 error("too many timepoints in times file", timesFileName);
mas01cr@405 278 }
mas01cr@405 279 }
mas01cr@405 280
mas01cr@239 281 // These names deserve some unpicking. The names starting with a "q"
mas01cr@239 282 // are pointers to the query, norm and power vectors; the names
mas01cr@239 283 // starting with "v" are things that will end up pointing to the
mas01cr@239 284 // actual query point's information. -- CSR, 2007-12-05
mas01cr@239 285 void audioDB::set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp) {
mas01cr@239 286 *nvp = (statbuf.st_size - sizeof(int)) / (dbH->dim * sizeof(double));
mas01mc@292 287
mas01cr@239 288 if(!(dbH->flags & O2_FLAG_L2NORM)) {
mas01cr@239 289 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01cr@239 290 }
mas01cr@239 291
mas01cr@239 292 if(*nvp < sequenceLength) {
mas01cr@239 293 error("Query shorter than requested sequence length", "maybe use -l");
mas01cr@239 294 }
mas01cr@239 295
mas01cr@239 296 VERB_LOG(1, "performing norms... ");
mas01cr@239 297
mas01cr@239 298 *qp = new double[*nvp * dbH->dim];
mas01cr@239 299 memcpy(*qp, indata+sizeof(int), *nvp * dbH->dim * sizeof(double));
mas01cr@239 300 *qnp = new double[*nvp];
mas01cr@239 301 unitNorm(*qp, dbH->dim, *nvp, *qnp);
mas01cr@239 302
mas01cr@239 303 sequence_sum(*qnp, *nvp, sequenceLength);
mas01cr@239 304 sequence_sqrt(*qnp, *nvp, sequenceLength);
mas01cr@239 305
mas01cr@239 306 if (usingPower) {
mas01cr@239 307 *qpp = new double[*nvp];
mas01cr@239 308 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
mas01cr@239 309 error("error seeking to data", powerFileName, "lseek");
mas01cr@239 310 }
mas01cr@239 311 int count = read(powerfd, *qpp, *nvp * sizeof(double));
mas01cr@239 312 if (count == -1) {
mas01cr@239 313 error("error reading data", powerFileName, "read");
mas01cr@239 314 }
mas01cr@239 315 if ((unsigned) count != *nvp * sizeof(double)) {
mas01cr@239 316 error("short read", powerFileName);
mas01cr@239 317 }
mas01cr@239 318
mas01cr@239 319 sequence_sum(*qpp, *nvp, sequenceLength);
mas01cr@239 320 sequence_average(*qpp, *nvp, sequenceLength);
mas01cr@239 321 }
mas01cr@239 322
mas01cr@239 323 if (usingTimes) {
mas01cr@239 324 unsigned int k;
mas01cr@239 325 *mqdp = 0.0;
mas01cr@239 326 double *querydurs = new double[*nvp];
mas01cr@239 327 double *timesdata = new double[*nvp*2];
mas01cr@239 328 insertTimeStamps(*nvp, timesFile, timesdata);
mas01cr@239 329 for(k = 0; k < *nvp; k++) {
mas01cr@239 330 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01cr@239 331 *mqdp += querydurs[k];
mas01cr@239 332 }
mas01cr@239 333 *mqdp /= k;
mas01cr@239 334
mas01cr@239 335 VERB_LOG(1, "mean query file duration: %f\n", *mqdp);
mas01cr@239 336
mas01cr@239 337 delete [] querydurs;
mas01cr@239 338 delete [] timesdata;
mas01cr@239 339 }
mas01cr@239 340
mas01cr@239 341 // Defaults, for exhaustive search (!usingQueryPoint)
mas01cr@239 342 *vqp = *qp;
mas01cr@239 343 *vqnp = *qnp;
mas01cr@239 344 *vqpp = *qpp;
mas01cr@239 345
mas01cr@239 346 if(usingQueryPoint) {
mas01mc@341 347 if( !(queryPoint < *nvp && queryPoint < *nvp - sequenceLength + 1) ) {
mas01mc@342 348 error("queryPoint >= numVectors-sequenceLength+1 in query");
mas01cr@239 349 } else {
mas01cr@239 350 VERB_LOG(1, "query point: %u\n", queryPoint);
mas01cr@239 351 *vqp = *qp + queryPoint * dbH->dim;
mas01cr@239 352 *vqnp = *qnp + queryPoint;
mas01cr@239 353 if (usingPower) {
mas01cr@239 354 *vqpp = *qpp + queryPoint;
mas01cr@239 355 }
mas01cr@239 356 *nvp = sequenceLength;
mas01cr@239 357 }
mas01cr@239 358 }
mas01cr@239 359 }
mas01cr@239 360
mas01mc@292 361 // Does the same as set_up_query(...) but from database features instead of from a file
mas01mc@292 362 // Constructs the same outputs as set_up_query
mas01mc@292 363 void audioDB::set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex) {
mas01mc@292 364 if(!trackTable)
mas01mc@292 365 error("trackTable not initialized","set_up_query_from_key");
mas01mc@292 366
mas01mc@292 367 if(!(dbH->flags & O2_FLAG_L2NORM)) {
mas01mc@292 368 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01mc@292 369 }
mas01mc@292 370
mas01mc@292 371 if(dbH->flags & O2_FLAG_POWER)
mas01mc@292 372 usingPower = true;
mas01mc@292 373
mas01mc@292 374 if(dbH->flags & O2_FLAG_TIMES)
mas01mc@292 375 usingTimes = true;
mas01mc@292 376
mas01mc@292 377 *nvp = trackTable[queryIndex];
mas01mc@292 378 if(*nvp < sequenceLength) {
mas01mc@292 379 error("Query shorter than requested sequence length", "maybe use -l");
mas01mc@292 380 }
mas01mc@292 381
mas01mc@292 382 VERB_LOG(1, "performing norms... ");
mas01mc@292 383
mas01mc@324 384 // For LARGE_ADB load query features from file
mas01mc@324 385 if( dbH->flags & O2_FLAG_LARGE_ADB ){
mas01mc@324 386 if(infid>0)
mas01mc@324 387 close(infid);
mas01mc@324 388 char* prefixedString = new char[O2_MAXFILESTR];
mas01mc@324 389 char* tmpStr = prefixedString;
mas01mc@324 390 strncpy(prefixedString, featureFileNameTable+queryIndex*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
mas01mc@324 391 prefix_name(&prefixedString, adb_feature_root);
mas01mc@324 392 if(tmpStr!=prefixedString)
mas01mc@324 393 delete[] tmpStr;
mas01mc@324 394 initInputFile(prefixedString, false); // nommap, file pointer at correct position
mas01mc@324 395 size_t allocatedSize = 0;
mas01mc@324 396 read_data(infid, queryIndex, qp, &allocatedSize); // over-writes qp and allocatedSize
mas01mc@324 397 // Consistency check on allocated memory and query feature size
mas01mc@324 398 if(*nvp*sizeof(double)*dbH->dim != allocatedSize)
mas01mc@324 399 error("Query memory allocation failed consitency check","set_up_query_from_key");
mas01mc@324 400 // Allocated and calculate auxillary sequences: l2norm and power
mas01mc@324 401 init_track_aux_data(queryIndex, *qp, qnp, vqnp, qpp, vqpp);
mas01mc@324 402 }
mas01mc@324 403 else{ // Load from self-contained ADB database
mas01mc@324 404 // Read query feature vectors from database
mas01mc@324 405 *qp = NULL;
mas01mc@324 406 lseek(dbfid, dbH->dataOffset + trackOffsetTable[queryIndex] * sizeof(double), SEEK_SET);
mas01mc@324 407 size_t allocatedSize = 0;
mas01mc@324 408 read_data(dbfid, queryIndex, qp, &allocatedSize);
mas01mc@324 409 // Consistency check on allocated memory and query feature size
mas01mc@324 410 if(*nvp*sizeof(double)*dbH->dim != allocatedSize)
mas01mc@324 411 error("Query memory allocation failed consitency check","set_up_query_from_key");
mas01mc@324 412
mas01mc@324 413 Uns32T trackIndexOffset = trackOffsetTable[queryIndex]/dbH->dim; // Convert num data elements to num vectors
mas01mc@324 414 // Copy L2 norm partial-sum coefficients
mas01mc@324 415 assert(*qnp = new double[*nvp]);
mas01mc@324 416 memcpy(*qnp, l2normTable+trackIndexOffset, *nvp*sizeof(double));
mas01mc@324 417 sequence_sum(*qnp, *nvp, sequenceLength);
mas01mc@324 418 sequence_sqrt(*qnp, *nvp, sequenceLength);
mas01mc@324 419
mas01mc@324 420 if( usingPower ){
mas01mc@324 421 // Copy Power partial-sum coefficients
mas01mc@324 422 assert(*qpp = new double[*nvp]);
mas01mc@324 423 memcpy(*qpp, powerTable+trackIndexOffset, *nvp*sizeof(double));
mas01mc@324 424 sequence_sum(*qpp, *nvp, sequenceLength);
mas01mc@324 425 sequence_average(*qpp, *nvp, sequenceLength);
mas01mc@324 426 }
mas01mc@324 427
mas01mc@324 428 if (usingTimes) {
mas01mc@324 429 unsigned int k;
mas01mc@324 430 *mqdp = 0.0;
mas01mc@324 431 double *querydurs = new double[*nvp];
mas01mc@324 432 double *timesdata = new double[*nvp*2];
mas01mc@324 433 assert(querydurs && timesdata);
mas01mc@324 434 memcpy(timesdata, timesTable+trackIndexOffset, *nvp*sizeof(double));
mas01mc@324 435 for(k = 0; k < *nvp; k++) {
mas01mc@324 436 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01mc@324 437 *mqdp += querydurs[k];
mas01mc@324 438 }
mas01mc@324 439 *mqdp /= k;
mas01mc@324 440
mas01mc@324 441 VERB_LOG(1, "mean query file duration: %f\n", *mqdp);
mas01mc@324 442
mas01mc@324 443 delete [] querydurs;
mas01mc@324 444 delete [] timesdata;
mas01mc@324 445 }
mas01mc@292 446 }
mas01mc@292 447
mas01mc@292 448 // Defaults, for exhaustive search (!usingQueryPoint)
mas01mc@292 449 *vqp = *qp;
mas01mc@292 450 *vqnp = *qnp;
mas01mc@292 451 *vqpp = *qpp;
mas01mc@292 452
mas01mc@292 453 if(usingQueryPoint) {
mas01mc@341 454 if( !(queryPoint < *nvp && queryPoint < *nvp - sequenceLength + 1) ) {
mas01mc@342 455 error("queryPoint >= numVectors-sequenceLength+1 in query");
mas01mc@292 456 } else {
mas01mc@292 457 VERB_LOG(1, "query point: %u\n", queryPoint);
mas01mc@292 458 *vqp = *qp + queryPoint * dbH->dim;
mas01mc@292 459 *vqnp = *qnp + queryPoint;
mas01mc@292 460 if (usingPower) {
mas01mc@292 461 *vqpp = *qpp + queryPoint;
mas01mc@292 462 }
mas01mc@292 463 *nvp = sequenceLength;
mas01mc@292 464 }
mas01mc@292 465 }
mas01mc@292 466 }
mas01mc@292 467
mas01mc@292 468
mas01cr@239 469 // FIXME: this is not the right name; we're not actually setting up
mas01cr@239 470 // the database, but copying various bits of it out of mmap()ed tables
mas01cr@239 471 // in order to reduce seeks.
mas01cr@239 472 void audioDB::set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp) {
mas01cr@239 473 *dvp = dbH->length / (dbH->dim * sizeof(double));
mas01cr@239 474 *snp = new double[*dvp];
mas01cr@239 475
mas01cr@239 476 double *snpp = *snp, *sppp = 0;
mas01cr@239 477 memcpy(*snp, l2normTable, *dvp * sizeof(double));
mas01cr@239 478
mas01cr@239 479 if (usingPower) {
mas01cr@239 480 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@239 481 error("database not power-enabled", dbName);
mas01cr@239 482 }
mas01cr@239 483 *spp = new double[*dvp];
mas01cr@239 484 sppp = *spp;
mas01cr@239 485 memcpy(*spp, powerTable, *dvp * sizeof(double));
mas01cr@239 486 }
mas01cr@239 487
mas01cr@239 488 for(unsigned int i = 0; i < dbH->numFiles; i++){
mas01cr@239 489 if(trackTable[i] >= sequenceLength) {
mas01cr@239 490 sequence_sum(snpp, trackTable[i], sequenceLength);
mas01cr@239 491 sequence_sqrt(snpp, trackTable[i], sequenceLength);
mas01cr@239 492
mas01cr@239 493 if (usingPower) {
mas01cr@239 494 sequence_sum(sppp, trackTable[i], sequenceLength);
mas01cr@239 495 sequence_average(sppp, trackTable[i], sequenceLength);
mas01cr@239 496 }
mas01cr@239 497 }
mas01cr@239 498 snpp += trackTable[i];
mas01cr@239 499 if (usingPower) {
mas01cr@239 500 sppp += trackTable[i];
mas01cr@239 501 }
mas01cr@239 502 }
mas01cr@239 503
mas01cr@239 504 if (usingTimes) {
mas01cr@239 505 if(!(dbH->flags & O2_FLAG_TIMES)) {
mas01cr@239 506 error("query timestamps provided for non-timed database", dbName);
mas01cr@239 507 }
mas01cr@239 508
mas01cr@239 509 *mddp = new double[dbH->numFiles];
mas01cr@239 510
mas01cr@239 511 for(unsigned int k = 0; k < dbH->numFiles; k++) {
mas01cr@239 512 unsigned int j;
mas01cr@239 513 (*mddp)[k] = 0.0;
mas01cr@239 514 for(j = 0; j < trackTable[k]; j++) {
mas01cr@239 515 (*mddp)[k] += timesTable[2*j+1] - timesTable[2*j];
mas01cr@239 516 }
mas01cr@239 517 (*mddp)[k] /= j;
mas01cr@239 518 }
mas01cr@239 519 }
mas01cr@239 520
mas01cr@239 521 *vsnp = *snp;
mas01cr@239 522 *vspp = *spp;
mas01cr@239 523 }
mas01cr@239 524
mas01mc@292 525 // query_points()
mas01mc@292 526 //
mas01mc@292 527 // using PointPairs held in the exact_evaluation_queue compute squared distance for each PointPair
mas01mc@292 528 // and insert result into the current reporter.
mas01mc@292 529 //
mas01mc@292 530 // Preconditions:
mas01mc@292 531 // A query inFile has been opened with setup_query(...) and query pointers initialized
mas01mc@292 532 // The database contains some points
mas01mc@292 533 // An exact_evaluation_queue has been allocated and populated
mas01mc@292 534 // A reporter has been allocated
mas01mc@292 535 //
mas01mc@292 536 // Postconditions:
mas01mc@292 537 // reporter contains the points and distances that meet the reporter constraints
mas01mc@292 538
mas01mc@292 539 void audioDB::query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors){
mas01mc@292 540 unsigned int dbVectors;
mas01mc@315 541 double *sNorm = 0, *snPtr, *sPower = 0, *spPtr = 0;
mas01mc@292 542 double *meanDBdur = 0;
mas01mc@292 543
mas01mc@292 544 // check pre-conditions
mas01mc@292 545 assert(exact_evaluation_queue&&reporter);
mas01mc@292 546 if(!exact_evaluation_queue->size()) // Exit if no points to evaluate
mas01mc@292 547 return;
mas01mc@292 548
mas01mc@292 549 // Compute database info
mas01mc@292 550 // FIXME: we more than likely don't need very much of the database
mas01mc@292 551 // so make a new method to build these values per-track or, even better, per-point
mas01mc@324 552 if( !( dbH->flags & O2_FLAG_LARGE_ADB) )
mas01mc@324 553 set_up_db(&sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors);
mas01mc@292 554
mas01mc@292 555 VERB_LOG(1, "matching points...");
mas01mc@292 556
mas01mc@292 557 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01mc@292 558 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01mc@292 559
mas01mc@292 560 // We are guaranteed that the order of points is sorted by:
mas01mc@324 561 // trackID, spos, qpos
mas01mc@292 562 // so we can be relatively efficient in initialization of track data.
mas01mc@292 563 // Here we assume that points don't overlap, so we will use exhaustive dot
mas01mc@324 564 // product evaluation instead of memoization of partial sums which is used
mas01mc@324 565 // for exhaustive brute-force evaluation from smaller databases: e.g. query_loop()
mas01mc@292 566 double dist;
mas01mc@292 567 size_t data_buffer_size = 0;
mas01mc@292 568 double *data_buffer = 0;
mas01mc@324 569 Uns32T trackOffset = 0;
mas01mc@324 570 Uns32T trackIndexOffset = 0;
mas01mc@292 571 Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range
mas01mc@292 572 Uns32T npairs = exact_evaluation_queue->size();
mas01mc@292 573 while(npairs--){
mas01mc@292 574 PointPair pp = exact_evaluation_queue->top();
mas01mc@324 575 // Large ADB track data must be loaded here for sPower
mas01mc@324 576 if(dbH->flags & O2_FLAG_LARGE_ADB){
mas01mc@324 577 trackOffset=0;
mas01mc@324 578 trackIndexOffset=0;
mas01mc@292 579 if(currentTrack!=pp.trackID){
mas01mc@324 580 char* prefixedString = new char[O2_MAXFILESTR];
mas01mc@324 581 char* tmpStr = prefixedString;
mas01mc@324 582 // On currentTrack change, allocate and load track data
mas01mc@292 583 currentTrack=pp.trackID;
mas01mc@324 584 SAFE_DELETE_ARRAY(sNorm);
mas01mc@324 585 SAFE_DELETE_ARRAY(sPower);
mas01mc@324 586 if(infid>0)
mas01mc@324 587 close(infid);
mas01mc@324 588 // Open and check dimensions of feature file
mas01mc@324 589 strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
mas01mc@324 590 prefix_name((char ** const) &prefixedString, adb_feature_root);
mas01mc@324 591 if (prefixedString!=tmpStr)
mas01mc@324 592 delete[] tmpStr;
mas01mc@324 593 initInputFile(prefixedString, false); // nommap, file pointer at correct position
mas01mc@324 594 // Load the feature vector data for current track into data_buffer
mas01mc@324 595 read_data(infid, pp.trackID, &data_buffer, &data_buffer_size);
mas01mc@324 596 // Load power and calculate power and l2norm sequence sums
mas01mc@324 597 init_track_aux_data(pp.trackID, data_buffer, &sNorm, &snPtr, &sPower, &spPtr);
mas01mc@292 598 }
mas01mc@324 599 }
mas01mc@324 600 else{
mas01mc@324 601 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables
mas01mc@324 602 trackOffset=trackOffsetTable[pp.trackID]; // num data elements offset
mas01mc@324 603 trackIndexOffset=trackOffset/dbH->dim; // num vectors offset
mas01mc@324 604 }
mas01mc@324 605 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point
mas01mc@324 606 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table
mas01mc@324 607 // Test power thresholds before computing distance
mas01mc@324 608 if( ( !usingPower || powers_acceptable(qpPtr[qPos], sPower[sPos])) &&
mas01mc@324 609 ( qPos<numVectors-sequenceLength+1 && pp.spos<trackTable[pp.trackID]-sequenceLength+1 ) ){
mas01mc@324 610 // Non-large ADB track data is loaded inside power test for efficiency
mas01mc@324 611 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){
mas01mc@324 612 // On currentTrack change, allocate and load track data
mas01mc@324 613 currentTrack=pp.trackID;
mas01mc@324 614 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01mc@324 615 read_data(dbfid, currentTrack, &data_buffer, &data_buffer_size);
mas01mc@324 616 }
mas01mc@324 617 // Compute distance
mas01mc@324 618 dist = dot_product_points(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequenceLength);
mas01mc@324 619 double qn = qnPtr[qPos];
mas01mc@324 620 double sn = sNorm[sPos];
mas01mc@292 621 if(normalizedDistance)
mas01mc@324 622 dist = 2 - (2/(qn*sn))*dist;
mas01mc@292 623 else
mas01mc@292 624 if(no_unit_norming)
mas01mc@324 625 dist = qn*qn + sn*sn - 2*dist;
mas01mc@292 626 // else
mas01mc@292 627 // dist = dist;
mas01mc@314 628 if((!radius) || dist <= (O2_LSH_EXACT_MULT*radius+O2_DISTANCE_TOLERANCE))
mas01mc@324 629 reporter->add_point(pp.trackID, pp.qpos, pp.spos, dist);
mas01mc@292 630 }
mas01mc@292 631 exact_evaluation_queue->pop();
mas01mc@292 632 }
mas01mc@315 633 // Cleanup
mas01mc@324 634 SAFE_DELETE_ARRAY(sNorm);
mas01mc@324 635 SAFE_DELETE_ARRAY(sPower);
mas01mc@324 636 SAFE_DELETE_ARRAY(meanDBdur);
mas01mc@292 637 }
mas01mc@292 638
mas01mc@292 639 // A completely unprotected dot-product method
mas01mc@292 640 // Caller is responsible for ensuring that memory is within bounds
mas01mc@292 641 inline double audioDB::dot_product_points(double* q, double* p, Uns32T L){
mas01mc@292 642 double dist = 0.0;
mas01mc@292 643 while(L--)
mas01mc@292 644 dist += *q++ * *p++;
mas01mc@292 645 return dist;
mas01mc@292 646 }
mas01mc@292 647
mas01mc@292 648 void audioDB::query_loop(const char* dbName, Uns32T queryIndex) {
mas01cr@239 649
mas01cr@239 650 unsigned int numVectors;
mas01cr@239 651 double *query, *query_data;
mas01cr@239 652 double *qNorm, *qnPtr, *qPower = 0, *qpPtr = 0;
mas01cr@239 653 double meanQdur;
mas01cr@239 654
mas01mc@324 655 if( dbH->flags & O2_FLAG_LARGE_ADB )
mas01mc@324 656 error("error: LARGE_ADB requires indexed query");
mas01mc@324 657
mas01mc@292 658 if(query_from_key)
mas01mc@292 659 set_up_query_from_key(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors, queryIndex);
mas01mc@292 660 else
mas01mc@292 661 set_up_query(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors);
mas01cr@239 662
mas01cr@239 663 unsigned int dbVectors;
mas01cr@239 664 double *sNorm, *snPtr, *sPower = 0, *spPtr = 0;
mas01cr@239 665 double *meanDBdur = 0;
mas01cr@239 666
mas01cr@239 667 set_up_db(&sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors);
mas01cr@239 668
mas01cr@239 669 VERB_LOG(1, "matching tracks...");
mas01cr@239 670
mas01cr@239 671 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@239 672 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@239 673
mas01cr@239 674 unsigned j,k,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
mas01cr@239 675 double **D = 0; // Differences query and target
mas01cr@239 676 double **DD = 0; // Matched filter distance
mas01cr@239 677
mas01mc@292 678 D = new double*[numVectors]; // pre-allocate
mas01cr@239 679 DD = new double*[numVectors];
mas01cr@239 680
mas01cr@239 681 gettimeofday(&tv1, NULL);
mas01cr@239 682 unsigned processedTracks = 0;
mas01cr@239 683 off_t trackIndexOffset;
mas01cr@239 684 char nextKey[MAXSTR];
mas01cr@239 685
mas01cr@239 686 // Track loop
mas01cr@239 687 size_t data_buffer_size = 0;
mas01cr@239 688 double *data_buffer = 0;
mas01cr@239 689 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01cr@239 690
mas01cr@239 691 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) {
mas01cr@239 692
mas01cr@239 693 trackOffset = trackOffsetTable[track]; // numDoubles offset
mas01cr@239 694
mas01cr@239 695 // get trackID from file if using a control file
mas01cr@239 696 if(trackFile) {
mas01cr@239 697 trackFile->getline(nextKey,MAXSTR);
mas01cr@239 698 if(!trackFile->eof()) {
mas01cr@239 699 track = getKeyPos(nextKey);
mas01cr@239 700 trackOffset = trackOffsetTable[track];
mas01cr@239 701 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01cr@239 702 } else {
mas01cr@239 703 break;
mas01cr@239 704 }
mas01cr@239 705 }
mas01cr@239 706
mas01mc@292 707 // skip identity on query_from_key
mas01mc@292 708 if( query_from_key && (track == queryIndex) ) {
mas01mc@292 709 if(queryIndex!=dbH->numFiles-1){
mas01mc@292 710 track++;
mas01mc@292 711 trackOffset = trackOffsetTable[track];
mas01mc@292 712 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01mc@292 713 }
mas01mc@292 714 else{
mas01mc@292 715 break;
mas01mc@292 716 }
mas01mc@292 717 }
mas01mc@292 718
mas01cr@239 719 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@239 720
mas01mc@324 721 read_data(dbfid, track, &data_buffer, &data_buffer_size);
mas01cr@239 722 if(sequenceLength <= trackTable[track]) { // test for short sequences
mas01cr@239 723
mas01cr@239 724 VERB_LOG(7,"%u.%jd.%u | ", track, (intmax_t) trackIndexOffset, trackTable[track]);
mas01cr@239 725
mas01cr@239 726 initialize_arrays(track, numVectors, query, data_buffer, D, DD);
mas01cr@239 727
mas01cr@239 728 if(usingTimes) {
mas01cr@239 729 VERB_LOG(3,"meanQdur=%f meanDBdur=%f\n", meanQdur, meanDBdur[track]);
mas01cr@239 730 }
mas01cr@239 731
mas01cr@239 732 if((!usingTimes) || fabs(meanDBdur[track]-meanQdur) < meanQdur*timesTol) {
mas01cr@239 733 if(usingTimes) {
mas01cr@239 734 VERB_LOG(3,"within duration tolerance.\n");
mas01cr@239 735 }
mas01cr@239 736
mas01cr@239 737 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@239 738 for(j = 0; j <= numVectors - wL; j += HOP_SIZE) {
mas01cr@239 739 for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) {
mas01cr@239 740 double thisDist;
mas01mc@292 741 if(normalizedDistance)
mas01cr@239 742 thisDist = 2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
mas01mc@292 743 else
mas01mc@292 744 if(no_unit_norming)
mas01mc@292 745 thisDist = qnPtr[j]*qnPtr[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k];
mas01mc@292 746 else
mas01mc@292 747 thisDist = DD[j][k];
mas01mc@292 748
mas01cr@239 749 // Power test
mas01cr@239 750 if ((!usingPower) || powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k])) {
mas01cr@239 751 // radius test
mas01mc@292 752 if((!radius) || thisDist <= (radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@239 753 reporter->add_point(track, usingQueryPoint ? queryPoint : j, k, thisDist);
mas01cr@239 754 }
mas01cr@239 755 }
mas01cr@239 756 }
mas01cr@239 757 }
mas01cr@239 758 } // Duration match
mas01cr@239 759 delete_arrays(track, numVectors, D, DD);
mas01cr@239 760 }
mas01cr@239 761 }
mas01cr@239 762
mas01cr@239 763 free(data_buffer);
mas01cr@239 764
mas01cr@239 765 gettimeofday(&tv2,NULL);
mas01cr@239 766 VERB_LOG(1,"elapsed time: %ld msec\n",
mas01cr@239 767 (tv2.tv_sec*1000 + tv2.tv_usec/1000) -
mas01cr@239 768 (tv1.tv_sec*1000 + tv1.tv_usec/1000))
mas01cr@239 769
mas01cr@239 770 // Clean up
mas01cr@239 771 if(query_data)
mas01cr@239 772 delete[] query_data;
mas01cr@239 773 if(qNorm)
mas01cr@239 774 delete[] qNorm;
mas01cr@239 775 if(sNorm)
mas01cr@239 776 delete[] sNorm;
mas01cr@239 777 if(qPower)
mas01cr@239 778 delete[] qPower;
mas01cr@239 779 if(sPower)
mas01cr@239 780 delete[] sPower;
mas01cr@239 781 if(D)
mas01cr@239 782 delete[] D;
mas01cr@239 783 if(DD)
mas01cr@239 784 delete[] DD;
mas01cr@239 785 if(meanDBdur)
mas01cr@239 786 delete[] meanDBdur;
mas01cr@239 787 }
mas01cr@239 788
mas01cr@239 789 // Unit norm block of features
mas01cr@239 790 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){
mas01cr@239 791 unsigned d;
mas01cr@239 792 double L2, *p;
mas01cr@239 793
mas01cr@239 794 VERB_LOG(2, "norming %u vectors...", n);
mas01cr@239 795 while(n--) {
mas01cr@239 796 p = X;
mas01cr@239 797 L2 = 0.0;
mas01cr@239 798 d = dim;
mas01cr@239 799 while(d--) {
mas01cr@239 800 L2 += *p * *p;
mas01cr@239 801 p++;
mas01cr@239 802 }
mas01cr@239 803 if(qNorm) {
mas01cr@239 804 *qNorm++=L2;
mas01cr@239 805 }
mas01cr@239 806 X += dim;
mas01cr@239 807 }
mas01cr@239 808 VERB_LOG(2, "done.\n");
mas01cr@239 809 }
mas01mc@292 810
mas01mc@292 811