annotate query.cpp @ 427:adaa6a688a04 api-inversion

Move sequence_foo() functions out of audioDB:: namespace... ... and into the internals header, mostly to get them out of the way. That means they have to be inline, which is probably suboptimal but will do for now.
author mas01cr
date Wed, 24 Dec 2008 10:55:24 +0000
parents 4a22a0bdf9a9
children 2d14d21f826b
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@239 2 #include "reporter.h"
mas01cr@239 3
mas01cr@422 4 #include "audioDB-internals.h"
mas01cr@422 5 #include "accumulators.h"
mas01cr@422 6
mas01cr@425 7 static bool audiodb_powers_acceptable(adb_query_refine_t *r, double p1, double p2) {
mas01cr@425 8 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) {
mas01cr@425 9 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) {
mas01cr@239 10 return false;
mas01cr@239 11 }
mas01cr@239 12 }
mas01cr@425 13 if (r->flags & ADB_REFINE_RELATIVE_THRESHOLD) {
mas01cr@425 14 if (fabs(p1-p2) > fabs(r->relative_threshold)) {
mas01cr@239 15 return false;
mas01cr@239 16 }
mas01cr@239 17 }
mas01cr@239 18 return true;
mas01cr@239 19 }
mas01cr@239 20
mas01cr@239 21 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
mas01cr@425 22
mas01cr@425 23 adb_query_refine_t refine;
mas01cr@425 24 refine.flags = 0;
mas01cr@425 25 /* FIXME: trackFile / ADB_REFINE_KEYLIST */
mas01cr@425 26 if(radius) {
mas01cr@425 27 refine.flags |= ADB_REFINE_RADIUS;
mas01cr@425 28 refine.radius = radius;
mas01cr@425 29 }
mas01cr@425 30 if(use_absolute_threshold) {
mas01cr@425 31 refine.flags |= ADB_REFINE_ABSOLUTE_THRESHOLD;
mas01cr@425 32 refine.absolute_threshold = absolute_threshold;
mas01cr@425 33 }
mas01cr@425 34 if(use_relative_threshold) {
mas01cr@425 35 refine.flags |= ADB_REFINE_RELATIVE_THRESHOLD;
mas01cr@425 36 refine.relative_threshold = relative_threshold;
mas01cr@425 37 }
mas01cr@425 38 if(usingTimes) {
mas01cr@425 39 refine.flags |= ADB_REFINE_DURATION_RATIO;
mas01cr@425 40 refine.duration_ratio = timesTol;
mas01cr@425 41 }
mas01cr@425 42 /* FIXME: not sure about this any more; maybe it belongs in query_id */
mas01cr@425 43 if(sequenceHop != 1) {
mas01cr@425 44 refine.flags |= ADB_REFINE_HOP_SIZE;
mas01cr@425 45 refine.hopsize = sequenceHop;
mas01cr@425 46 }
mas01cr@425 47
mas01mc@292 48 // init database tables and dbH first
mas01mc@292 49 if(query_from_key)
mas01mc@292 50 initTables(dbName);
mas01mc@292 51 else
mas01mc@292 52 initTables(dbName, inFile);
mas01mc@292 53
mas01mc@292 54 // keyKeyPos requires dbH to be initialized
mas01mc@292 55 if(query_from_key && (!key || (query_from_key_index = getKeyPos((char*)key))==O2_ERR_KEYNOTFOUND))
mas01mc@292 56 error("Query key not found :",key);
mas01mc@292 57
mas01cr@239 58 switch (queryType) {
mas01cr@239 59 case O2_POINT_QUERY:
mas01cr@239 60 sequenceLength = 1;
mas01cr@239 61 normalizedDistance = false;
mas01mc@292 62 reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN);
mas01cr@422 63 accumulator = new DBAccumulator<adb_result_dist_gt>(pointNN);
mas01cr@239 64 break;
mas01cr@239 65 case O2_TRACK_QUERY:
mas01cr@239 66 sequenceLength = 1;
mas01cr@239 67 normalizedDistance = false;
mas01mc@292 68 reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@422 69 accumulator = new PerTrackAccumulator<adb_result_dist_gt>(pointNN, trackNN);
mas01cr@239 70 break;
mas01mc@292 71 case O2_SEQUENCE_QUERY:
mas01mc@292 72 if(no_unit_norming)
mas01mc@292 73 normalizedDistance = false;
mas01cr@422 74 accumulator = new PerTrackAccumulator<adb_result_dist_lt>(pointNN, trackNN);
mas01cr@425 75 if(!(refine.flags & ADB_REFINE_RADIUS)) {
mas01mc@292 76 reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@239 77 } else {
mas01mc@292 78 if(index_exists(dbName, radius, sequenceLength)){
mas01mc@292 79 char* indexName = index_get_name(dbName, radius, sequenceLength);
mas01mc@308 80 lsh = index_allocate(indexName, false);
mas01mc@324 81 reporter = new trackSequenceQueryRadReporter(trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
mas01mc@292 82 delete[] indexName;
mas01mc@292 83 }
mas01mc@292 84 else
mas01mc@292 85 reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles);
mas01cr@239 86 }
mas01cr@239 87 break;
mas01mc@292 88 case O2_N_SEQUENCE_QUERY:
mas01mc@292 89 if(no_unit_norming)
mas01mc@292 90 normalizedDistance = false;
mas01cr@422 91 accumulator = new PerTrackAccumulator<adb_result_dist_lt>(pointNN, trackNN);
mas01cr@425 92 if(!(refine.flags & ADB_REFINE_RADIUS)) {
mas01mc@292 93 reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01mc@248 94 } else {
mas01mc@292 95 if(index_exists(dbName, radius, sequenceLength)){
mas01mc@292 96 char* indexName = index_get_name(dbName, radius, sequenceLength);
mas01mc@308 97 lsh = index_allocate(indexName, false);
mas01mc@324 98 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
mas01mc@292 99 delete[] indexName;
mas01mc@292 100 }
mas01mc@292 101 else
mas01mc@292 102 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles);
mas01mc@248 103 }
mas01mc@248 104 break;
mas01mc@263 105 case O2_ONE_TO_ONE_N_SEQUENCE_QUERY :
mas01cr@422 106 accumulator = new NearestAccumulator<adb_result_dist_lt>();
mas01cr@425 107 if(!(refine.flags & ADB_REFINE_RADIUS)) {
mas01mc@263 108 error("query-type not yet supported");
mas01mc@263 109 } else {
mas01mc@292 110 reporter = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, dbH->numFiles);
mas01mc@263 111 }
mas01mc@263 112 break;
mas01cr@239 113 default:
mas01cr@239 114 error("unrecognized queryType in query()");
mas01cr@239 115 }
mas01mc@292 116
mas01mc@292 117 // Test for index (again) here
mas01cr@425 118 if((refine.flags & ADB_REFINE_RADIUS) && index_exists(dbName, radius, sequenceLength)){
mas01mc@329 119 VERB_LOG(1, "Calling indexed query on database %s, radius=%f, sequenceLength=%d\n", dbName, radius, sequenceLength);
mas01cr@425 120 index_query_loop(&refine, dbName, query_from_key_index);
mas01mc@329 121 }
mas01mc@329 122 else{
mas01mc@329 123 VERB_LOG(1, "Calling brute-force query on database %s\n", dbName);
mas01cr@425 124 query_loop(&refine, query_from_key_index);
mas01mc@329 125 }
mas01mc@292 126
mas01cr@423 127 adb_query_results_t *rs = accumulator->get_points();
mas01cr@423 128 for(unsigned int k = 0; k < rs->nresults; k++) {
mas01cr@423 129 adb_result_t r = rs->results[k];
mas01cr@423 130 reporter->add_point(getKeyPos(r.key), r.qpos, r.ipos, r.dist);
mas01cr@423 131 }
mas01cr@423 132
mas01mc@292 133 reporter->report(fileTable, adbQueryResponse);
mas01cr@239 134 }
mas01cr@239 135
mas01cr@239 136 // return ordinal position of key in keyTable
mas01mc@292 137 // this should really be a STL hash map search
mas01cr@423 138 unsigned audioDB::getKeyPos(const char* key){
mas01mc@292 139 if(!dbH)
mas01mc@292 140 error("dbH not initialized","getKeyPos");
mas01cr@239 141 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@256 142 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key))==0)
mas01cr@239 143 return k;
mas01cr@239 144 error("Key not found",key);
mas01cr@239 145 return O2_ERR_KEYNOTFOUND;
mas01cr@239 146 }
mas01cr@239 147
mas01cr@239 148 void audioDB::initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
mas01cr@239 149 unsigned int j, k, l, w;
mas01cr@239 150 double *dp, *qp, *sp;
mas01cr@239 151
mas01cr@239 152 const unsigned HOP_SIZE = sequenceHop;
mas01cr@239 153 const unsigned wL = sequenceLength;
mas01cr@239 154
mas01cr@239 155 for(j = 0; j < numVectors; j++) {
mas01cr@239 156 // Sum products matrix
mas01cr@239 157 D[j] = new double[trackTable[track]];
mas01cr@239 158 assert(D[j]);
mas01cr@239 159 // Matched filter matrix
mas01cr@239 160 DD[j]=new double[trackTable[track]];
mas01cr@239 161 assert(DD[j]);
mas01cr@239 162 }
mas01cr@239 163
mas01cr@239 164 // Dot product
mas01cr@239 165 for(j = 0; j < numVectors; j++)
mas01cr@239 166 for(k = 0; k < trackTable[track]; k++){
mas01cr@239 167 qp = query + j * dbH->dim;
mas01cr@239 168 sp = data_buffer + k * dbH->dim;
mas01cr@239 169 DD[j][k] = 0.0; // Initialize matched filter array
mas01cr@239 170 dp = &D[j][k]; // point to correlation cell j,k
mas01cr@239 171 *dp = 0.0; // initialize correlation cell
mas01cr@239 172 l = dbH->dim; // size of vectors
mas01cr@239 173 while(l--)
mas01cr@239 174 *dp += *qp++ * *sp++;
mas01cr@239 175 }
mas01cr@239 176
mas01cr@239 177 // Matched Filter
mas01cr@239 178 // HOP SIZE == 1
mas01cr@239 179 double* spd;
mas01cr@239 180 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop
mas01cr@239 181 for(w = 0; w < wL; w++) {
mas01cr@239 182 for(j = 0; j < numVectors - w; j++) {
mas01cr@239 183 sp = DD[j];
mas01cr@239 184 spd = D[j+w] + w;
mas01cr@239 185 k = trackTable[track] - w;
mas01mc@292 186 while(k--)
mas01mc@292 187 *sp++ += *spd++;
mas01cr@239 188 }
mas01cr@239 189 }
mas01cr@239 190 } else { // HOP_SIZE != 1
mas01cr@239 191 for(w = 0; w < wL; w++) {
mas01cr@239 192 for(j = 0; j < numVectors - w; j += HOP_SIZE) {
mas01cr@239 193 sp = DD[j];
mas01cr@239 194 spd = D[j+w]+w;
mas01cr@239 195 for(k = 0; k < trackTable[track] - w; k += HOP_SIZE) {
mas01cr@239 196 *sp += *spd;
mas01cr@239 197 sp += HOP_SIZE;
mas01cr@239 198 spd += HOP_SIZE;
mas01cr@239 199 }
mas01cr@239 200 }
mas01cr@239 201 }
mas01cr@239 202 }
mas01cr@239 203 }
mas01cr@239 204
mas01cr@239 205 void audioDB::delete_arrays(int track, unsigned int numVectors, double **D, double **DD) {
mas01cr@239 206 if(D != NULL) {
mas01cr@239 207 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 208 delete[] D[j];
mas01cr@239 209 }
mas01cr@239 210 }
mas01cr@239 211 if(DD != NULL) {
mas01cr@239 212 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 213 delete[] DD[j];
mas01cr@239 214 }
mas01cr@239 215 }
mas01cr@239 216 }
mas01cr@239 217
mas01mc@324 218 void audioDB::read_data(int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) {
mas01cr@239 219 if (trackTable[track] * sizeof(double) * dbH->dim > *data_buffer_size_p) {
mas01cr@239 220 if(*data_buffer_p) {
mas01cr@239 221 free(*data_buffer_p);
mas01cr@239 222 }
mas01cr@239 223 {
mas01cr@239 224 *data_buffer_size_p = trackTable[track] * sizeof(double) * dbH->dim;
mas01cr@239 225 void *tmp = malloc(*data_buffer_size_p);
mas01cr@239 226 if (tmp == NULL) {
mas01cr@239 227 error("error allocating data buffer");
mas01cr@239 228 }
mas01cr@239 229 *data_buffer_p = (double *) tmp;
mas01cr@239 230 }
mas01cr@239 231 }
mas01cr@239 232
mas01cr@370 233 CHECKED_READ(trkfid, *data_buffer_p, trackTable[track] * sizeof(double) * dbH->dim);
mas01cr@239 234 }
mas01cr@239 235
mas01cr@405 236 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
mas01cr@405 237 assert(usingTimes);
mas01cr@405 238
mas01cr@405 239 unsigned numtimes = 0;
mas01cr@405 240
mas01cr@405 241 if(!timesFile->is_open()) {
mas01cr@405 242 error("problem opening times file on timestamped database", timesFileName);
mas01cr@405 243 }
mas01cr@405 244
mas01cr@405 245 double timepoint, next;
mas01cr@405 246 *timesFile >> timepoint;
mas01cr@405 247 if (timesFile->eof()) {
mas01cr@405 248 error("no entries in times file", timesFileName);
mas01cr@405 249 }
mas01cr@405 250 numtimes++;
mas01cr@405 251 do {
mas01cr@405 252 *timesFile >> next;
mas01cr@405 253 if (timesFile->eof()) {
mas01cr@405 254 break;
mas01cr@405 255 }
mas01cr@405 256 numtimes++;
mas01cr@405 257 timesdata[0] = timepoint;
mas01cr@405 258 timepoint = (timesdata[1] = next);
mas01cr@405 259 timesdata += 2;
mas01cr@405 260 } while (numtimes < numVectors + 1);
mas01cr@405 261
mas01cr@405 262 if (numtimes < numVectors + 1) {
mas01cr@405 263 error("too few timepoints in times file", timesFileName);
mas01cr@405 264 }
mas01cr@405 265
mas01cr@405 266 *timesFile >> next;
mas01cr@405 267 if (!timesFile->eof()) {
mas01cr@405 268 error("too many timepoints in times file", timesFileName);
mas01cr@405 269 }
mas01cr@405 270 }
mas01cr@405 271
mas01cr@239 272 // These names deserve some unpicking. The names starting with a "q"
mas01cr@239 273 // are pointers to the query, norm and power vectors; the names
mas01cr@239 274 // starting with "v" are things that will end up pointing to the
mas01cr@239 275 // actual query point's information. -- CSR, 2007-12-05
mas01cr@239 276 void audioDB::set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp) {
mas01cr@239 277 *nvp = (statbuf.st_size - sizeof(int)) / (dbH->dim * sizeof(double));
mas01mc@292 278
mas01cr@239 279 if(!(dbH->flags & O2_FLAG_L2NORM)) {
mas01cr@239 280 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01cr@239 281 }
mas01cr@239 282
mas01cr@239 283 if(*nvp < sequenceLength) {
mas01cr@239 284 error("Query shorter than requested sequence length", "maybe use -l");
mas01cr@239 285 }
mas01cr@239 286
mas01cr@239 287 VERB_LOG(1, "performing norms... ");
mas01cr@239 288
mas01cr@239 289 *qp = new double[*nvp * dbH->dim];
mas01cr@239 290 memcpy(*qp, indata+sizeof(int), *nvp * dbH->dim * sizeof(double));
mas01cr@239 291 *qnp = new double[*nvp];
mas01cr@426 292 audiodb_l2norm_buffer(*qp, dbH->dim, *nvp, *qnp);
mas01cr@239 293
mas01cr@427 294 audiodb_sequence_sum(*qnp, *nvp, sequenceLength);
mas01cr@427 295 audiodb_sequence_sqrt(*qnp, *nvp, sequenceLength);
mas01cr@239 296
mas01cr@239 297 if (usingPower) {
mas01cr@239 298 *qpp = new double[*nvp];
mas01cr@239 299 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
mas01cr@239 300 error("error seeking to data", powerFileName, "lseek");
mas01cr@239 301 }
mas01cr@239 302 int count = read(powerfd, *qpp, *nvp * sizeof(double));
mas01cr@239 303 if (count == -1) {
mas01cr@239 304 error("error reading data", powerFileName, "read");
mas01cr@239 305 }
mas01cr@239 306 if ((unsigned) count != *nvp * sizeof(double)) {
mas01cr@239 307 error("short read", powerFileName);
mas01cr@239 308 }
mas01cr@239 309
mas01cr@427 310 audiodb_sequence_sum(*qpp, *nvp, sequenceLength);
mas01cr@427 311 audiodb_sequence_average(*qpp, *nvp, sequenceLength);
mas01cr@239 312 }
mas01cr@239 313
mas01cr@239 314 if (usingTimes) {
mas01cr@239 315 unsigned int k;
mas01cr@239 316 *mqdp = 0.0;
mas01cr@239 317 double *querydurs = new double[*nvp];
mas01cr@239 318 double *timesdata = new double[*nvp*2];
mas01cr@239 319 insertTimeStamps(*nvp, timesFile, timesdata);
mas01cr@239 320 for(k = 0; k < *nvp; k++) {
mas01cr@239 321 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01cr@239 322 *mqdp += querydurs[k];
mas01cr@239 323 }
mas01cr@239 324 *mqdp /= k;
mas01cr@239 325
mas01cr@239 326 VERB_LOG(1, "mean query file duration: %f\n", *mqdp);
mas01cr@239 327
mas01cr@239 328 delete [] querydurs;
mas01cr@239 329 delete [] timesdata;
mas01cr@239 330 }
mas01cr@239 331
mas01cr@239 332 // Defaults, for exhaustive search (!usingQueryPoint)
mas01cr@239 333 *vqp = *qp;
mas01cr@239 334 *vqnp = *qnp;
mas01cr@239 335 *vqpp = *qpp;
mas01cr@239 336
mas01cr@239 337 if(usingQueryPoint) {
mas01mc@341 338 if( !(queryPoint < *nvp && queryPoint < *nvp - sequenceLength + 1) ) {
mas01mc@342 339 error("queryPoint >= numVectors-sequenceLength+1 in query");
mas01cr@239 340 } else {
mas01cr@239 341 VERB_LOG(1, "query point: %u\n", queryPoint);
mas01cr@239 342 *vqp = *qp + queryPoint * dbH->dim;
mas01cr@239 343 *vqnp = *qnp + queryPoint;
mas01cr@239 344 if (usingPower) {
mas01cr@239 345 *vqpp = *qpp + queryPoint;
mas01cr@239 346 }
mas01cr@239 347 *nvp = sequenceLength;
mas01cr@239 348 }
mas01cr@239 349 }
mas01cr@239 350 }
mas01cr@239 351
mas01mc@292 352 // Does the same as set_up_query(...) but from database features instead of from a file
mas01mc@292 353 // Constructs the same outputs as set_up_query
mas01mc@292 354 void audioDB::set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex) {
mas01mc@292 355 if(!trackTable)
mas01mc@292 356 error("trackTable not initialized","set_up_query_from_key");
mas01mc@292 357
mas01mc@292 358 if(!(dbH->flags & O2_FLAG_L2NORM)) {
mas01mc@292 359 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01mc@292 360 }
mas01mc@292 361
mas01mc@292 362 if(dbH->flags & O2_FLAG_POWER)
mas01mc@292 363 usingPower = true;
mas01mc@292 364
mas01mc@292 365 if(dbH->flags & O2_FLAG_TIMES)
mas01mc@292 366 usingTimes = true;
mas01mc@292 367
mas01mc@292 368 *nvp = trackTable[queryIndex];
mas01mc@292 369 if(*nvp < sequenceLength) {
mas01mc@292 370 error("Query shorter than requested sequence length", "maybe use -l");
mas01mc@292 371 }
mas01mc@292 372
mas01mc@292 373 VERB_LOG(1, "performing norms... ");
mas01mc@292 374
mas01mc@324 375 // For LARGE_ADB load query features from file
mas01mc@324 376 if( dbH->flags & O2_FLAG_LARGE_ADB ){
mas01mc@324 377 if(infid>0)
mas01mc@324 378 close(infid);
mas01mc@324 379 char* prefixedString = new char[O2_MAXFILESTR];
mas01mc@324 380 char* tmpStr = prefixedString;
mas01mc@324 381 strncpy(prefixedString, featureFileNameTable+queryIndex*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
mas01mc@324 382 prefix_name(&prefixedString, adb_feature_root);
mas01mc@324 383 if(tmpStr!=prefixedString)
mas01mc@324 384 delete[] tmpStr;
mas01mc@324 385 initInputFile(prefixedString, false); // nommap, file pointer at correct position
mas01mc@324 386 size_t allocatedSize = 0;
mas01mc@324 387 read_data(infid, queryIndex, qp, &allocatedSize); // over-writes qp and allocatedSize
mas01mc@324 388 // Consistency check on allocated memory and query feature size
mas01mc@324 389 if(*nvp*sizeof(double)*dbH->dim != allocatedSize)
mas01mc@324 390 error("Query memory allocation failed consitency check","set_up_query_from_key");
mas01mc@324 391 // Allocated and calculate auxillary sequences: l2norm and power
mas01mc@324 392 init_track_aux_data(queryIndex, *qp, qnp, vqnp, qpp, vqpp);
mas01mc@324 393 }
mas01mc@324 394 else{ // Load from self-contained ADB database
mas01mc@324 395 // Read query feature vectors from database
mas01mc@324 396 *qp = NULL;
mas01mc@324 397 lseek(dbfid, dbH->dataOffset + trackOffsetTable[queryIndex] * sizeof(double), SEEK_SET);
mas01mc@324 398 size_t allocatedSize = 0;
mas01mc@324 399 read_data(dbfid, queryIndex, qp, &allocatedSize);
mas01mc@324 400 // Consistency check on allocated memory and query feature size
mas01mc@324 401 if(*nvp*sizeof(double)*dbH->dim != allocatedSize)
mas01mc@324 402 error("Query memory allocation failed consitency check","set_up_query_from_key");
mas01mc@324 403
mas01mc@324 404 Uns32T trackIndexOffset = trackOffsetTable[queryIndex]/dbH->dim; // Convert num data elements to num vectors
mas01mc@324 405 // Copy L2 norm partial-sum coefficients
mas01mc@324 406 assert(*qnp = new double[*nvp]);
mas01mc@324 407 memcpy(*qnp, l2normTable+trackIndexOffset, *nvp*sizeof(double));
mas01cr@427 408 audiodb_sequence_sum(*qnp, *nvp, sequenceLength);
mas01cr@427 409 audiodb_sequence_sqrt(*qnp, *nvp, sequenceLength);
mas01mc@324 410
mas01mc@324 411 if( usingPower ){
mas01mc@324 412 // Copy Power partial-sum coefficients
mas01mc@324 413 assert(*qpp = new double[*nvp]);
mas01mc@324 414 memcpy(*qpp, powerTable+trackIndexOffset, *nvp*sizeof(double));
mas01cr@427 415 audiodb_sequence_sum(*qpp, *nvp, sequenceLength);
mas01cr@427 416 audiodb_sequence_average(*qpp, *nvp, sequenceLength);
mas01mc@324 417 }
mas01mc@324 418
mas01mc@324 419 if (usingTimes) {
mas01mc@324 420 unsigned int k;
mas01mc@324 421 *mqdp = 0.0;
mas01mc@324 422 double *querydurs = new double[*nvp];
mas01mc@324 423 double *timesdata = new double[*nvp*2];
mas01mc@324 424 assert(querydurs && timesdata);
mas01mc@324 425 memcpy(timesdata, timesTable+trackIndexOffset, *nvp*sizeof(double));
mas01mc@324 426 for(k = 0; k < *nvp; k++) {
mas01mc@324 427 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01mc@324 428 *mqdp += querydurs[k];
mas01mc@324 429 }
mas01mc@324 430 *mqdp /= k;
mas01mc@324 431
mas01mc@324 432 VERB_LOG(1, "mean query file duration: %f\n", *mqdp);
mas01mc@324 433
mas01mc@324 434 delete [] querydurs;
mas01mc@324 435 delete [] timesdata;
mas01mc@324 436 }
mas01mc@292 437 }
mas01mc@292 438
mas01mc@292 439 // Defaults, for exhaustive search (!usingQueryPoint)
mas01mc@292 440 *vqp = *qp;
mas01mc@292 441 *vqnp = *qnp;
mas01mc@292 442 *vqpp = *qpp;
mas01mc@292 443
mas01mc@292 444 if(usingQueryPoint) {
mas01mc@341 445 if( !(queryPoint < *nvp && queryPoint < *nvp - sequenceLength + 1) ) {
mas01mc@342 446 error("queryPoint >= numVectors-sequenceLength+1 in query");
mas01mc@292 447 } else {
mas01mc@292 448 VERB_LOG(1, "query point: %u\n", queryPoint);
mas01mc@292 449 *vqp = *qp + queryPoint * dbH->dim;
mas01mc@292 450 *vqnp = *qnp + queryPoint;
mas01mc@292 451 if (usingPower) {
mas01mc@292 452 *vqpp = *qpp + queryPoint;
mas01mc@292 453 }
mas01mc@292 454 *nvp = sequenceLength;
mas01mc@292 455 }
mas01mc@292 456 }
mas01mc@292 457 }
mas01mc@292 458
mas01mc@292 459
mas01cr@239 460 // FIXME: this is not the right name; we're not actually setting up
mas01cr@239 461 // the database, but copying various bits of it out of mmap()ed tables
mas01cr@239 462 // in order to reduce seeks.
mas01cr@239 463 void audioDB::set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp) {
mas01cr@239 464 *dvp = dbH->length / (dbH->dim * sizeof(double));
mas01cr@239 465 *snp = new double[*dvp];
mas01cr@239 466
mas01cr@239 467 double *snpp = *snp, *sppp = 0;
mas01cr@239 468 memcpy(*snp, l2normTable, *dvp * sizeof(double));
mas01cr@239 469
mas01cr@239 470 if (usingPower) {
mas01cr@239 471 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@239 472 error("database not power-enabled", dbName);
mas01cr@239 473 }
mas01cr@239 474 *spp = new double[*dvp];
mas01cr@239 475 sppp = *spp;
mas01cr@239 476 memcpy(*spp, powerTable, *dvp * sizeof(double));
mas01cr@239 477 }
mas01cr@239 478
mas01cr@239 479 for(unsigned int i = 0; i < dbH->numFiles; i++){
mas01cr@239 480 if(trackTable[i] >= sequenceLength) {
mas01cr@427 481 audiodb_sequence_sum(snpp, trackTable[i], sequenceLength);
mas01cr@427 482 audiodb_sequence_sqrt(snpp, trackTable[i], sequenceLength);
mas01cr@239 483
mas01cr@239 484 if (usingPower) {
mas01cr@427 485 audiodb_sequence_sum(sppp, trackTable[i], sequenceLength);
mas01cr@427 486 audiodb_sequence_average(sppp, trackTable[i], sequenceLength);
mas01cr@239 487 }
mas01cr@239 488 }
mas01cr@239 489 snpp += trackTable[i];
mas01cr@239 490 if (usingPower) {
mas01cr@239 491 sppp += trackTable[i];
mas01cr@239 492 }
mas01cr@239 493 }
mas01cr@239 494
mas01cr@239 495 if (usingTimes) {
mas01cr@239 496 if(!(dbH->flags & O2_FLAG_TIMES)) {
mas01cr@239 497 error("query timestamps provided for non-timed database", dbName);
mas01cr@239 498 }
mas01cr@239 499
mas01cr@239 500 *mddp = new double[dbH->numFiles];
mas01cr@239 501
mas01cr@239 502 for(unsigned int k = 0; k < dbH->numFiles; k++) {
mas01cr@239 503 unsigned int j;
mas01cr@239 504 (*mddp)[k] = 0.0;
mas01cr@239 505 for(j = 0; j < trackTable[k]; j++) {
mas01cr@239 506 (*mddp)[k] += timesTable[2*j+1] - timesTable[2*j];
mas01cr@239 507 }
mas01cr@239 508 (*mddp)[k] /= j;
mas01cr@239 509 }
mas01cr@239 510 }
mas01cr@239 511
mas01cr@239 512 *vsnp = *snp;
mas01cr@239 513 *vspp = *spp;
mas01cr@239 514 }
mas01cr@239 515
mas01mc@292 516 // query_points()
mas01mc@292 517 //
mas01mc@292 518 // using PointPairs held in the exact_evaluation_queue compute squared distance for each PointPair
mas01mc@292 519 // and insert result into the current reporter.
mas01mc@292 520 //
mas01mc@292 521 // Preconditions:
mas01mc@292 522 // A query inFile has been opened with setup_query(...) and query pointers initialized
mas01mc@292 523 // The database contains some points
mas01mc@292 524 // An exact_evaluation_queue has been allocated and populated
mas01mc@292 525 // A reporter has been allocated
mas01mc@292 526 //
mas01mc@292 527 // Postconditions:
mas01mc@292 528 // reporter contains the points and distances that meet the reporter constraints
mas01mc@292 529
mas01cr@425 530 void audioDB::query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors, adb_query_refine_t *refine){
mas01mc@292 531 unsigned int dbVectors;
mas01mc@315 532 double *sNorm = 0, *snPtr, *sPower = 0, *spPtr = 0;
mas01mc@292 533 double *meanDBdur = 0;
mas01mc@292 534
mas01mc@292 535 // check pre-conditions
mas01mc@292 536 assert(exact_evaluation_queue&&reporter);
mas01mc@292 537 if(!exact_evaluation_queue->size()) // Exit if no points to evaluate
mas01mc@292 538 return;
mas01mc@292 539
mas01mc@292 540 // Compute database info
mas01mc@292 541 // FIXME: we more than likely don't need very much of the database
mas01mc@292 542 // so make a new method to build these values per-track or, even better, per-point
mas01mc@324 543 if( !( dbH->flags & O2_FLAG_LARGE_ADB) )
mas01mc@324 544 set_up_db(&sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors);
mas01mc@292 545
mas01mc@292 546 VERB_LOG(1, "matching points...");
mas01mc@292 547
mas01mc@292 548 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01mc@292 549 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01mc@292 550
mas01mc@292 551 // We are guaranteed that the order of points is sorted by:
mas01mc@324 552 // trackID, spos, qpos
mas01mc@292 553 // so we can be relatively efficient in initialization of track data.
mas01mc@292 554 // Here we assume that points don't overlap, so we will use exhaustive dot
mas01mc@324 555 // product evaluation instead of memoization of partial sums which is used
mas01mc@324 556 // for exhaustive brute-force evaluation from smaller databases: e.g. query_loop()
mas01mc@292 557 double dist;
mas01mc@292 558 size_t data_buffer_size = 0;
mas01mc@292 559 double *data_buffer = 0;
mas01mc@324 560 Uns32T trackOffset = 0;
mas01mc@324 561 Uns32T trackIndexOffset = 0;
mas01mc@292 562 Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range
mas01mc@292 563 Uns32T npairs = exact_evaluation_queue->size();
mas01mc@292 564 while(npairs--){
mas01mc@292 565 PointPair pp = exact_evaluation_queue->top();
mas01mc@324 566 // Large ADB track data must be loaded here for sPower
mas01mc@324 567 if(dbH->flags & O2_FLAG_LARGE_ADB){
mas01mc@324 568 trackOffset=0;
mas01mc@324 569 trackIndexOffset=0;
mas01mc@292 570 if(currentTrack!=pp.trackID){
mas01mc@324 571 char* prefixedString = new char[O2_MAXFILESTR];
mas01mc@324 572 char* tmpStr = prefixedString;
mas01mc@324 573 // On currentTrack change, allocate and load track data
mas01mc@292 574 currentTrack=pp.trackID;
mas01mc@324 575 SAFE_DELETE_ARRAY(sNorm);
mas01mc@324 576 SAFE_DELETE_ARRAY(sPower);
mas01mc@324 577 if(infid>0)
mas01mc@324 578 close(infid);
mas01mc@324 579 // Open and check dimensions of feature file
mas01mc@324 580 strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
mas01mc@324 581 prefix_name((char ** const) &prefixedString, adb_feature_root);
mas01mc@324 582 if (prefixedString!=tmpStr)
mas01mc@324 583 delete[] tmpStr;
mas01mc@324 584 initInputFile(prefixedString, false); // nommap, file pointer at correct position
mas01mc@324 585 // Load the feature vector data for current track into data_buffer
mas01mc@324 586 read_data(infid, pp.trackID, &data_buffer, &data_buffer_size);
mas01mc@324 587 // Load power and calculate power and l2norm sequence sums
mas01mc@324 588 init_track_aux_data(pp.trackID, data_buffer, &sNorm, &snPtr, &sPower, &spPtr);
mas01mc@292 589 }
mas01mc@324 590 }
mas01mc@324 591 else{
mas01mc@324 592 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables
mas01mc@324 593 trackOffset=trackOffsetTable[pp.trackID]; // num data elements offset
mas01mc@324 594 trackIndexOffset=trackOffset/dbH->dim; // num vectors offset
mas01mc@324 595 }
mas01mc@324 596 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point
mas01mc@324 597 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table
mas01mc@324 598 // Test power thresholds before computing distance
mas01cr@425 599 if( ( !usingPower || audiodb_powers_acceptable(refine, qpPtr[qPos], sPower[sPos])) &&
mas01mc@324 600 ( qPos<numVectors-sequenceLength+1 && pp.spos<trackTable[pp.trackID]-sequenceLength+1 ) ){
mas01mc@324 601 // Non-large ADB track data is loaded inside power test for efficiency
mas01mc@324 602 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){
mas01mc@324 603 // On currentTrack change, allocate and load track data
mas01mc@324 604 currentTrack=pp.trackID;
mas01mc@324 605 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01mc@324 606 read_data(dbfid, currentTrack, &data_buffer, &data_buffer_size);
mas01mc@324 607 }
mas01mc@324 608 // Compute distance
mas01cr@425 609 dist = audiodb_dot_product(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequenceLength);
mas01mc@324 610 double qn = qnPtr[qPos];
mas01mc@324 611 double sn = sNorm[sPos];
mas01mc@292 612 if(normalizedDistance)
mas01mc@324 613 dist = 2 - (2/(qn*sn))*dist;
mas01mc@292 614 else
mas01mc@292 615 if(no_unit_norming)
mas01mc@324 616 dist = qn*qn + sn*sn - 2*dist;
mas01mc@292 617 // else
mas01mc@292 618 // dist = dist;
mas01cr@424 619 if((!radius) || dist <= (O2_LSH_EXACT_MULT*radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@424 620 adb_result_t r;
mas01cr@424 621 r.key = fileTable + pp.trackID * O2_FILETABLE_ENTRY_SIZE;
mas01cr@424 622 r.dist = dist;
mas01cr@424 623 r.qpos = pp.qpos;
mas01cr@424 624 r.ipos = pp.spos;
mas01cr@424 625 accumulator->add_point(&r);
mas01cr@424 626 }
mas01mc@292 627 }
mas01mc@292 628 exact_evaluation_queue->pop();
mas01mc@292 629 }
mas01mc@315 630 // Cleanup
mas01mc@324 631 SAFE_DELETE_ARRAY(sNorm);
mas01mc@324 632 SAFE_DELETE_ARRAY(sPower);
mas01mc@324 633 SAFE_DELETE_ARRAY(meanDBdur);
mas01mc@292 634 }
mas01mc@292 635
mas01cr@425 636 void audioDB::query_loop(adb_query_refine_t *refine, Uns32T queryIndex) {
mas01cr@239 637
mas01cr@239 638 unsigned int numVectors;
mas01cr@239 639 double *query, *query_data;
mas01cr@239 640 double *qNorm, *qnPtr, *qPower = 0, *qpPtr = 0;
mas01cr@239 641 double meanQdur;
mas01cr@239 642
mas01mc@324 643 if( dbH->flags & O2_FLAG_LARGE_ADB )
mas01mc@324 644 error("error: LARGE_ADB requires indexed query");
mas01mc@324 645
mas01mc@292 646 if(query_from_key)
mas01mc@292 647 set_up_query_from_key(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors, queryIndex);
mas01mc@292 648 else
mas01mc@292 649 set_up_query(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors);
mas01cr@239 650
mas01cr@239 651 unsigned int dbVectors;
mas01cr@239 652 double *sNorm, *snPtr, *sPower = 0, *spPtr = 0;
mas01cr@239 653 double *meanDBdur = 0;
mas01cr@239 654
mas01cr@239 655 set_up_db(&sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors);
mas01cr@239 656
mas01cr@239 657 VERB_LOG(1, "matching tracks...");
mas01cr@239 658
mas01cr@239 659 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@239 660 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@239 661
mas01cr@239 662 unsigned j,k,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
mas01cr@239 663 double **D = 0; // Differences query and target
mas01cr@239 664 double **DD = 0; // Matched filter distance
mas01cr@239 665
mas01mc@292 666 D = new double*[numVectors]; // pre-allocate
mas01cr@239 667 DD = new double*[numVectors];
mas01cr@239 668
mas01cr@239 669 gettimeofday(&tv1, NULL);
mas01cr@239 670 unsigned processedTracks = 0;
mas01cr@239 671 off_t trackIndexOffset;
mas01cr@239 672 char nextKey[MAXSTR];
mas01cr@239 673
mas01cr@239 674 // Track loop
mas01cr@239 675 size_t data_buffer_size = 0;
mas01cr@239 676 double *data_buffer = 0;
mas01cr@239 677 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01cr@239 678
mas01cr@239 679 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) {
mas01cr@239 680
mas01cr@239 681 trackOffset = trackOffsetTable[track]; // numDoubles offset
mas01cr@239 682
mas01cr@239 683 // get trackID from file if using a control file
mas01cr@239 684 if(trackFile) {
mas01cr@239 685 trackFile->getline(nextKey,MAXSTR);
mas01cr@239 686 if(!trackFile->eof()) {
mas01cr@239 687 track = getKeyPos(nextKey);
mas01cr@239 688 trackOffset = trackOffsetTable[track];
mas01cr@239 689 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01cr@239 690 } else {
mas01cr@239 691 break;
mas01cr@239 692 }
mas01cr@239 693 }
mas01cr@239 694
mas01mc@292 695 // skip identity on query_from_key
mas01mc@292 696 if( query_from_key && (track == queryIndex) ) {
mas01mc@292 697 if(queryIndex!=dbH->numFiles-1){
mas01mc@292 698 track++;
mas01mc@292 699 trackOffset = trackOffsetTable[track];
mas01mc@292 700 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01mc@292 701 }
mas01mc@292 702 else{
mas01mc@292 703 break;
mas01mc@292 704 }
mas01mc@292 705 }
mas01mc@292 706
mas01cr@239 707 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@239 708
mas01mc@324 709 read_data(dbfid, track, &data_buffer, &data_buffer_size);
mas01cr@239 710 if(sequenceLength <= trackTable[track]) { // test for short sequences
mas01cr@239 711
mas01cr@239 712 VERB_LOG(7,"%u.%jd.%u | ", track, (intmax_t) trackIndexOffset, trackTable[track]);
mas01cr@239 713
mas01cr@239 714 initialize_arrays(track, numVectors, query, data_buffer, D, DD);
mas01cr@239 715
mas01cr@425 716 if(refine->flags & ADB_REFINE_DURATION_RATIO) {
mas01cr@239 717 VERB_LOG(3,"meanQdur=%f meanDBdur=%f\n", meanQdur, meanDBdur[track]);
mas01cr@239 718 }
mas01cr@239 719
mas01cr@425 720 if((!(refine->flags & ADB_REFINE_DURATION_RATIO)) || fabs(meanDBdur[track]-meanQdur) < meanQdur*refine->duration_ratio) {
mas01cr@425 721 if(refine->flags & ADB_REFINE_DURATION_RATIO) {
mas01cr@239 722 VERB_LOG(3,"within duration tolerance.\n");
mas01cr@239 723 }
mas01cr@239 724
mas01cr@239 725 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@239 726 for(j = 0; j <= numVectors - wL; j += HOP_SIZE) {
mas01cr@239 727 for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) {
mas01cr@239 728 double thisDist;
mas01mc@292 729 if(normalizedDistance)
mas01cr@239 730 thisDist = 2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
mas01mc@292 731 else
mas01mc@292 732 if(no_unit_norming)
mas01mc@292 733 thisDist = qnPtr[j]*qnPtr[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k];
mas01mc@292 734 else
mas01mc@292 735 thisDist = DD[j][k];
mas01mc@292 736
mas01cr@239 737 // Power test
mas01cr@425 738 if ((!usingPower) || audiodb_powers_acceptable(refine, qpPtr[j], sPower[trackIndexOffset + k])) {
mas01cr@239 739 // radius test
mas01cr@425 740 if((!(refine->flags & ADB_REFINE_RADIUS)) ||
mas01cr@425 741 thisDist <= (refine->radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@423 742 adb_result_t r;
mas01cr@423 743 r.key = fileTable + track * O2_FILETABLE_ENTRY_SIZE;
mas01cr@423 744 r.dist = thisDist;
mas01cr@423 745 r.qpos = usingQueryPoint ? queryPoint : j;
mas01cr@423 746 r.ipos = k;
mas01cr@423 747 accumulator->add_point(&r);
mas01cr@239 748 }
mas01cr@239 749 }
mas01cr@239 750 }
mas01cr@239 751 }
mas01cr@239 752 } // Duration match
mas01cr@239 753 delete_arrays(track, numVectors, D, DD);
mas01cr@239 754 }
mas01cr@239 755 }
mas01cr@239 756
mas01cr@239 757 free(data_buffer);
mas01cr@239 758
mas01cr@239 759 gettimeofday(&tv2,NULL);
mas01cr@239 760 VERB_LOG(1,"elapsed time: %ld msec\n",
mas01cr@239 761 (tv2.tv_sec*1000 + tv2.tv_usec/1000) -
mas01cr@239 762 (tv1.tv_sec*1000 + tv1.tv_usec/1000))
mas01cr@239 763
mas01cr@239 764 // Clean up
mas01cr@239 765 if(query_data)
mas01cr@239 766 delete[] query_data;
mas01cr@239 767 if(qNorm)
mas01cr@239 768 delete[] qNorm;
mas01cr@239 769 if(sNorm)
mas01cr@239 770 delete[] sNorm;
mas01cr@239 771 if(qPower)
mas01cr@239 772 delete[] qPower;
mas01cr@239 773 if(sPower)
mas01cr@239 774 delete[] sPower;
mas01cr@239 775 if(D)
mas01cr@239 776 delete[] D;
mas01cr@239 777 if(DD)
mas01cr@239 778 delete[] DD;
mas01cr@239 779 if(meanDBdur)
mas01cr@239 780 delete[] meanDBdur;
mas01cr@239 781 }