annotate query.cpp @ 439:5294ea1b1bf2 api-inversion

audioDB::initialize_arrays no longer depends on anything in the C++ class. static void audiodb_initialize_arrays() it is.
author mas01cr
date Wed, 24 Dec 2008 10:56:16 +0000
parents 8c1d8a40db91
children cb44e57a96fa
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@239 2 #include "reporter.h"
mas01cr@239 3
mas01cr@422 4 #include "audioDB-internals.h"
mas01cr@422 5 #include "accumulators.h"
mas01cr@422 6
mas01cr@425 7 static bool audiodb_powers_acceptable(adb_query_refine_t *r, double p1, double p2) {
mas01cr@425 8 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) {
mas01cr@425 9 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) {
mas01cr@239 10 return false;
mas01cr@239 11 }
mas01cr@239 12 }
mas01cr@425 13 if (r->flags & ADB_REFINE_RELATIVE_THRESHOLD) {
mas01cr@425 14 if (fabs(p1-p2) > fabs(r->relative_threshold)) {
mas01cr@239 15 return false;
mas01cr@239 16 }
mas01cr@239 17 }
mas01cr@239 18 return true;
mas01cr@239 19 }
mas01cr@239 20
mas01cr@239 21 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
mas01cr@425 22
mas01cr@431 23 // init database tables and dbH first
mas01cr@431 24 if(query_from_key)
mas01cr@431 25 initTables(dbName);
mas01cr@431 26 else
mas01cr@431 27 initTables(dbName, inFile);
mas01cr@431 28
mas01cr@435 29 adb_query_spec_t qspec;
mas01cr@435 30
mas01cr@435 31 qspec.refine.flags = 0;
mas01cr@425 32 /* FIXME: trackFile / ADB_REFINE_KEYLIST */
mas01cr@425 33 if(radius) {
mas01cr@435 34 qspec.refine.flags |= ADB_REFINE_RADIUS;
mas01cr@435 35 qspec.refine.radius = radius;
mas01cr@425 36 }
mas01cr@425 37 if(use_absolute_threshold) {
mas01cr@435 38 qspec.refine.flags |= ADB_REFINE_ABSOLUTE_THRESHOLD;
mas01cr@435 39 qspec.refine.absolute_threshold = absolute_threshold;
mas01cr@425 40 }
mas01cr@425 41 if(use_relative_threshold) {
mas01cr@435 42 qspec.refine.flags |= ADB_REFINE_RELATIVE_THRESHOLD;
mas01cr@435 43 qspec.refine.relative_threshold = relative_threshold;
mas01cr@425 44 }
mas01cr@425 45 if(usingTimes) {
mas01cr@435 46 qspec.refine.flags |= ADB_REFINE_DURATION_RATIO;
mas01cr@435 47 qspec.refine.duration_ratio = timesTol;
mas01cr@425 48 }
mas01cr@439 49 /* FIXME: not sure about this any more; maybe it belongs in
mas01cr@439 50 query_id? Or maybe we just don't need a flag for it? */
mas01cr@439 51 qspec.refine.hopsize = sequenceHop;
mas01cr@425 52 if(sequenceHop != 1) {
mas01cr@435 53 qspec.refine.flags |= ADB_REFINE_HOP_SIZE;
mas01cr@425 54 }
mas01cr@425 55
mas01cr@435 56 /* FIXME qspec.qid.datum */
mas01cr@435 57 qspec.qid.sequence_length = sequenceLength;
mas01cr@435 58 qspec.qid.flags = usingQueryPoint ? 0 : ADB_QUERY_ID_FLAG_EXHAUSTIVE;
mas01cr@435 59 qspec.qid.sequence_start = queryPoint;
mas01cr@435 60
mas01cr@431 61 switch(queryType) {
mas01cr@431 62 case O2_POINT_QUERY:
mas01cr@435 63 qspec.qid.sequence_length = 1;
mas01cr@435 64 qspec.params.accumulation = ADB_ACCUMULATION_DB;
mas01cr@435 65 qspec.params.distance = ADB_DISTANCE_DOT_PRODUCT;
mas01cr@435 66 qspec.params.npoints = pointNN;
mas01cr@435 67 qspec.params.ntracks = 0;
mas01cr@431 68 reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN);
mas01cr@431 69 break;
mas01cr@431 70 case O2_TRACK_QUERY:
mas01cr@435 71 qspec.qid.sequence_length = 1;
mas01cr@435 72 qspec.params.accumulation = ADB_ACCUMULATION_PER_TRACK;
mas01cr@435 73 qspec.params.distance = ADB_DISTANCE_DOT_PRODUCT;
mas01cr@435 74 qspec.params.npoints = pointNN;
mas01cr@435 75 qspec.params.ntracks = trackNN;
mas01cr@431 76 reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@431 77 break;
mas01cr@431 78 case O2_SEQUENCE_QUERY:
mas01cr@431 79 case O2_N_SEQUENCE_QUERY:
mas01cr@435 80 qspec.params.accumulation = ADB_ACCUMULATION_PER_TRACK;
mas01cr@435 81 qspec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED;
mas01cr@435 82 qspec.params.npoints = pointNN;
mas01cr@435 83 qspec.params.ntracks = trackNN;
mas01cr@431 84 switch(queryType) {
mas01cr@431 85 case O2_SEQUENCE_QUERY:
mas01cr@435 86 if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) {
mas01cr@431 87 reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@435 88 } else if (index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)) {
mas01cr@435 89 char* indexName = index_get_name(adb->path, qspec.refine.radius, qspec.qid.sequence_length);
mas01cr@431 90 lsh = index_allocate(indexName, false);
mas01cr@431 91 reporter = new trackSequenceQueryRadReporter(trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
mas01cr@431 92 delete[] indexName;
mas01cr@431 93 } else {
mas01cr@431 94 reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles);
mas01cr@431 95 }
mas01cr@431 96 break;
mas01cr@431 97 case O2_N_SEQUENCE_QUERY:
mas01cr@435 98 if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) {
mas01cr@431 99 reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@435 100 } else if (index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)){
mas01cr@435 101 char* indexName = index_get_name(adb->path, qspec.refine.radius, qspec.qid.sequence_length);
mas01cr@431 102 lsh = index_allocate(indexName, false);
mas01cr@431 103 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
mas01cr@431 104 delete[] indexName;
mas01cr@431 105 } else {
mas01cr@431 106 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles);
mas01cr@431 107 }
mas01cr@431 108 break;
mas01cr@431 109 }
mas01cr@431 110 break;
mas01cr@431 111 case O2_ONE_TO_ONE_N_SEQUENCE_QUERY:
mas01cr@435 112 qspec.params.accumulation = ADB_ACCUMULATION_ONE_TO_ONE;
mas01cr@435 113 qspec.params.distance = ADB_DISTANCE_EUCLIDEAN_NORMED;
mas01cr@435 114 qspec.params.npoints = 0;
mas01cr@435 115 qspec.params.ntracks = 0;
mas01cr@431 116 break;
mas01cr@431 117 default:
mas01cr@431 118 error("unrecognized queryType");
mas01cr@431 119 }
mas01mc@292 120
mas01mc@292 121 // keyKeyPos requires dbH to be initialized
mas01cr@430 122 if(query_from_key && (!key || (query_from_key_index = audiodb_key_index(adb, key)) == (uint32_t) -1))
mas01cr@430 123 error("Query key not found", key);
mas01cr@431 124
mas01cr@435 125 switch(qspec.params.distance) {
mas01cr@431 126 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@435 127 switch(qspec.params.accumulation) {
mas01cr@431 128 case ADB_ACCUMULATION_DB:
mas01cr@435 129 accumulator = new DBAccumulator<adb_result_dist_gt>(qspec.params.npoints);
mas01cr@431 130 break;
mas01cr@431 131 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@435 132 accumulator = new PerTrackAccumulator<adb_result_dist_gt>(qspec.params.npoints, qspec.params.ntracks);
mas01cr@431 133 break;
mas01cr@431 134 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@431 135 accumulator = new NearestAccumulator<adb_result_dist_gt>();
mas01cr@431 136 break;
mas01cr@431 137 default:
mas01cr@431 138 error("unknown accumulation");
mas01cr@239 139 }
mas01cr@239 140 break;
mas01cr@431 141 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@431 142 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@435 143 switch(qspec.params.accumulation) {
mas01cr@431 144 case ADB_ACCUMULATION_DB:
mas01cr@435 145 accumulator = new DBAccumulator<adb_result_dist_lt>(qspec.params.npoints);
mas01cr@431 146 break;
mas01cr@431 147 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@435 148 accumulator = new PerTrackAccumulator<adb_result_dist_lt>(qspec.params.npoints, qspec.params.ntracks);
mas01cr@431 149 break;
mas01cr@431 150 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@431 151 accumulator = new NearestAccumulator<adb_result_dist_lt>();
mas01cr@431 152 break;
mas01cr@431 153 default:
mas01cr@431 154 error("unknown accumulation");
mas01mc@263 155 }
mas01mc@263 156 break;
mas01cr@239 157 default:
mas01cr@431 158 error("unknown distance function");
mas01cr@431 159 }
mas01cr@431 160
mas01mc@292 161 // Test for index (again) here
mas01cr@435 162 if((qspec.refine.flags & ADB_REFINE_RADIUS) && index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)){
mas01cr@436 163 VERB_LOG(1, "Calling indexed query on database %s, radius=%f, sequence_length=%d\n", adb->path, qspec.refine.radius, qspec.qid.sequence_length);
mas01cr@435 164 index_query_loop(&qspec, dbName, query_from_key_index);
mas01mc@329 165 }
mas01mc@329 166 else{
mas01mc@329 167 VERB_LOG(1, "Calling brute-force query on database %s\n", dbName);
mas01cr@435 168 query_loop(&qspec, query_from_key_index);
mas01mc@329 169 }
mas01mc@292 170
mas01cr@423 171 adb_query_results_t *rs = accumulator->get_points();
mas01cr@423 172 for(unsigned int k = 0; k < rs->nresults; k++) {
mas01cr@423 173 adb_result_t r = rs->results[k];
mas01cr@430 174 reporter->add_point(audiodb_key_index(adb, r.key), r.qpos, r.ipos, r.dist);
mas01cr@423 175 }
mas01cr@423 176
mas01mc@292 177 reporter->report(fileTable, adbQueryResponse);
mas01cr@239 178 }
mas01cr@239 179
mas01cr@439 180 static void audiodb_initialize_arrays(adb_t *adb, adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
mas01cr@239 181 unsigned int j, k, l, w;
mas01cr@239 182 double *dp, *qp, *sp;
mas01cr@239 183
mas01cr@439 184 const unsigned HOP_SIZE = spec->refine.hopsize;
mas01cr@435 185 const unsigned wL = spec->qid.sequence_length;
mas01cr@239 186
mas01cr@239 187 for(j = 0; j < numVectors; j++) {
mas01cr@239 188 // Sum products matrix
mas01cr@433 189 D[j] = new double[(*adb->track_lengths)[track]];
mas01cr@239 190 assert(D[j]);
mas01cr@239 191 // Matched filter matrix
mas01cr@433 192 DD[j]=new double[(*adb->track_lengths)[track]];
mas01cr@239 193 assert(DD[j]);
mas01cr@239 194 }
mas01cr@239 195
mas01cr@239 196 // Dot product
mas01cr@239 197 for(j = 0; j < numVectors; j++)
mas01cr@433 198 for(k = 0; k < (*adb->track_lengths)[track]; k++){
mas01cr@439 199 qp = query + j * adb->header->dim;
mas01cr@439 200 sp = data_buffer + k * adb->header->dim;
mas01cr@239 201 DD[j][k] = 0.0; // Initialize matched filter array
mas01cr@239 202 dp = &D[j][k]; // point to correlation cell j,k
mas01cr@239 203 *dp = 0.0; // initialize correlation cell
mas01cr@439 204 l = adb->header->dim; // size of vectors
mas01cr@239 205 while(l--)
mas01cr@239 206 *dp += *qp++ * *sp++;
mas01cr@239 207 }
mas01cr@239 208
mas01cr@239 209 // Matched Filter
mas01cr@239 210 // HOP SIZE == 1
mas01cr@239 211 double* spd;
mas01cr@239 212 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop
mas01cr@239 213 for(w = 0; w < wL; w++) {
mas01cr@239 214 for(j = 0; j < numVectors - w; j++) {
mas01cr@239 215 sp = DD[j];
mas01cr@239 216 spd = D[j+w] + w;
mas01cr@433 217 k = (*adb->track_lengths)[track] - w;
mas01mc@292 218 while(k--)
mas01mc@292 219 *sp++ += *spd++;
mas01cr@239 220 }
mas01cr@239 221 }
mas01cr@239 222 } else { // HOP_SIZE != 1
mas01cr@239 223 for(w = 0; w < wL; w++) {
mas01cr@239 224 for(j = 0; j < numVectors - w; j += HOP_SIZE) {
mas01cr@239 225 sp = DD[j];
mas01cr@239 226 spd = D[j+w]+w;
mas01cr@433 227 for(k = 0; k < (*adb->track_lengths)[track] - w; k += HOP_SIZE) {
mas01cr@239 228 *sp += *spd;
mas01cr@239 229 sp += HOP_SIZE;
mas01cr@239 230 spd += HOP_SIZE;
mas01cr@239 231 }
mas01cr@239 232 }
mas01cr@239 233 }
mas01cr@239 234 }
mas01cr@239 235 }
mas01cr@239 236
mas01cr@433 237 static void audiodb_delete_arrays(int track, unsigned int numVectors, double **D, double **DD) {
mas01cr@239 238 if(D != NULL) {
mas01cr@239 239 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 240 delete[] D[j];
mas01cr@239 241 }
mas01cr@239 242 }
mas01cr@239 243 if(DD != NULL) {
mas01cr@239 244 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 245 delete[] DD[j];
mas01cr@239 246 }
mas01cr@239 247 }
mas01cr@239 248 }
mas01cr@239 249
mas01cr@433 250 int audiodb_read_data(adb_t *adb, int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) {
mas01cr@433 251 uint32_t track_length = (*adb->track_lengths)[track];
mas01cr@433 252 size_t track_size = track_length * sizeof(double) * adb->header->dim;
mas01cr@433 253 if (track_size > *data_buffer_size_p) {
mas01cr@239 254 if(*data_buffer_p) {
mas01cr@239 255 free(*data_buffer_p);
mas01cr@239 256 }
mas01cr@239 257 {
mas01cr@433 258 *data_buffer_size_p = track_size;
mas01cr@433 259 void *tmp = malloc(track_size);
mas01cr@239 260 if (tmp == NULL) {
mas01cr@433 261 goto error;
mas01cr@239 262 }
mas01cr@239 263 *data_buffer_p = (double *) tmp;
mas01cr@239 264 }
mas01cr@239 265 }
mas01cr@239 266
mas01cr@433 267 read_or_goto_error(trkfid, *data_buffer_p, track_size);
mas01cr@433 268 return 0;
mas01cr@433 269
mas01cr@433 270 error:
mas01cr@433 271 return 1;
mas01cr@239 272 }
mas01cr@239 273
mas01cr@405 274 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
mas01cr@405 275 assert(usingTimes);
mas01cr@405 276
mas01cr@405 277 unsigned numtimes = 0;
mas01cr@405 278
mas01cr@405 279 if(!timesFile->is_open()) {
mas01cr@405 280 error("problem opening times file on timestamped database", timesFileName);
mas01cr@405 281 }
mas01cr@405 282
mas01cr@405 283 double timepoint, next;
mas01cr@405 284 *timesFile >> timepoint;
mas01cr@405 285 if (timesFile->eof()) {
mas01cr@405 286 error("no entries in times file", timesFileName);
mas01cr@405 287 }
mas01cr@405 288 numtimes++;
mas01cr@405 289 do {
mas01cr@405 290 *timesFile >> next;
mas01cr@405 291 if (timesFile->eof()) {
mas01cr@405 292 break;
mas01cr@405 293 }
mas01cr@405 294 numtimes++;
mas01cr@405 295 timesdata[0] = timepoint;
mas01cr@405 296 timepoint = (timesdata[1] = next);
mas01cr@405 297 timesdata += 2;
mas01cr@405 298 } while (numtimes < numVectors + 1);
mas01cr@405 299
mas01cr@405 300 if (numtimes < numVectors + 1) {
mas01cr@405 301 error("too few timepoints in times file", timesFileName);
mas01cr@405 302 }
mas01cr@405 303
mas01cr@405 304 *timesFile >> next;
mas01cr@405 305 if (!timesFile->eof()) {
mas01cr@405 306 error("too many timepoints in times file", timesFileName);
mas01cr@405 307 }
mas01cr@405 308 }
mas01cr@405 309
mas01cr@239 310 // These names deserve some unpicking. The names starting with a "q"
mas01cr@239 311 // are pointers to the query, norm and power vectors; the names
mas01cr@239 312 // starting with "v" are things that will end up pointing to the
mas01cr@239 313 // actual query point's information. -- CSR, 2007-12-05
mas01cr@437 314 void audioDB::set_up_query(adb_query_spec_t *spec, double **qp, double **vqp, adb_qpointers_internal_t *qpointers) {
mas01cr@437 315 uint32_t nvectors = (statbuf.st_size - sizeof(int)) / (dbH->dim * sizeof(double));
mas01cr@437 316 qpointers->nvectors = nvectors;
mas01mc@292 317
mas01cr@435 318 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@239 319 if(!(dbH->flags & O2_FLAG_L2NORM)) {
mas01cr@239 320 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01cr@239 321 }
mas01cr@239 322
mas01cr@437 323 if(nvectors < sequence_length) {
mas01cr@239 324 error("Query shorter than requested sequence length", "maybe use -l");
mas01cr@239 325 }
mas01cr@239 326
mas01cr@239 327 VERB_LOG(1, "performing norms... ");
mas01cr@239 328
mas01cr@437 329 *qp = new double[nvectors * dbH->dim];
mas01cr@437 330 memcpy(*qp, indata+sizeof(int), nvectors * dbH->dim * sizeof(double));
mas01cr@437 331 qpointers->l2norm_data = new double[nvectors];
mas01cr@437 332 audiodb_l2norm_buffer(*qp, dbH->dim, nvectors, qpointers->l2norm_data);
mas01cr@239 333
mas01cr@437 334 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@437 335 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@239 336
mas01cr@239 337 if (usingPower) {
mas01cr@437 338 qpointers->power_data = new double[nvectors];
mas01cr@239 339 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
mas01cr@239 340 error("error seeking to data", powerFileName, "lseek");
mas01cr@239 341 }
mas01cr@437 342 int count = read(powerfd, qpointers->power_data, nvectors * sizeof(double));
mas01cr@239 343 if (count == -1) {
mas01cr@239 344 error("error reading data", powerFileName, "read");
mas01cr@239 345 }
mas01cr@437 346 if ((unsigned) count != nvectors * sizeof(double)) {
mas01cr@239 347 error("short read", powerFileName);
mas01cr@239 348 }
mas01cr@239 349
mas01cr@437 350 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
mas01cr@437 351 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
mas01cr@239 352 }
mas01cr@239 353
mas01cr@239 354 if (usingTimes) {
mas01cr@239 355 unsigned int k;
mas01cr@437 356 qpointers->mean_duration = new double[1];
mas01cr@437 357 *qpointers->mean_duration = 0.0;
mas01cr@437 358 double *querydurs = new double[nvectors];
mas01cr@437 359 double *timesdata = new double[2*nvectors];
mas01cr@437 360 insertTimeStamps(nvectors, timesFile, timesdata);
mas01cr@437 361 for(k = 0; k < nvectors; k++) {
mas01cr@239 362 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01cr@437 363 *qpointers->mean_duration += querydurs[k];
mas01cr@239 364 }
mas01cr@437 365 *qpointers->mean_duration /= k;
mas01cr@239 366
mas01cr@437 367 VERB_LOG(1, "mean query file duration: %f\n", *qpointers->mean_duration);
mas01cr@239 368
mas01cr@239 369 delete [] querydurs;
mas01cr@239 370 delete [] timesdata;
mas01cr@239 371 }
mas01cr@239 372
mas01cr@239 373 // Defaults, for exhaustive search (!usingQueryPoint)
mas01cr@239 374 *vqp = *qp;
mas01cr@437 375 qpointers->l2norm = qpointers->l2norm_data;
mas01cr@437 376 qpointers->power = qpointers->power_data;
mas01cr@239 377
mas01cr@239 378 if(usingQueryPoint) {
mas01cr@437 379 if( !(queryPoint < nvectors && queryPoint < nvectors - sequence_length + 1) ) {
mas01cr@437 380 error("queryPoint >= nvectors-sequence_length+1 in query");
mas01cr@239 381 } else {
mas01cr@239 382 VERB_LOG(1, "query point: %u\n", queryPoint);
mas01cr@239 383 *vqp = *qp + queryPoint * dbH->dim;
mas01cr@437 384 qpointers->l2norm = qpointers->l2norm_data + queryPoint;
mas01cr@239 385 if (usingPower) {
mas01cr@437 386 qpointers->power = qpointers->power_data + queryPoint;
mas01cr@239 387 }
mas01cr@437 388 qpointers->nvectors = sequence_length;
mas01cr@239 389 }
mas01cr@239 390 }
mas01cr@239 391 }
mas01cr@239 392
mas01mc@292 393 // Does the same as set_up_query(...) but from database features instead of from a file
mas01mc@292 394 // Constructs the same outputs as set_up_query
mas01cr@437 395 void audioDB::set_up_query_from_key(adb_query_spec_t *spec, double **qp, double **vqp, adb_qpointers_internal_t *qpointers, Uns32T queryIndex) {
mas01cr@435 396 uint32_t sequence_length = spec->qid.sequence_length;
mas01mc@292 397 if(!trackTable)
mas01mc@292 398 error("trackTable not initialized","set_up_query_from_key");
mas01mc@292 399
mas01mc@292 400 if(!(dbH->flags & O2_FLAG_L2NORM)) {
mas01mc@292 401 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01mc@292 402 }
mas01mc@292 403
mas01mc@292 404 if(dbH->flags & O2_FLAG_POWER)
mas01mc@292 405 usingPower = true;
mas01mc@292 406
mas01mc@292 407 if(dbH->flags & O2_FLAG_TIMES)
mas01mc@292 408 usingTimes = true;
mas01mc@292 409
mas01cr@437 410 uint32_t nvectors = trackTable[queryIndex];
mas01cr@437 411 qpointers->nvectors = nvectors;
mas01cr@437 412 if(nvectors < sequence_length) {
mas01mc@292 413 error("Query shorter than requested sequence length", "maybe use -l");
mas01mc@292 414 }
mas01mc@292 415
mas01mc@292 416 VERB_LOG(1, "performing norms... ");
mas01mc@292 417
mas01mc@324 418 // For LARGE_ADB load query features from file
mas01mc@324 419 if( dbH->flags & O2_FLAG_LARGE_ADB ){
mas01mc@324 420 if(infid>0)
mas01mc@324 421 close(infid);
mas01mc@324 422 char* prefixedString = new char[O2_MAXFILESTR];
mas01mc@324 423 char* tmpStr = prefixedString;
mas01mc@324 424 strncpy(prefixedString, featureFileNameTable+queryIndex*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
mas01mc@324 425 prefix_name(&prefixedString, adb_feature_root);
mas01mc@324 426 if(tmpStr!=prefixedString)
mas01mc@324 427 delete[] tmpStr;
mas01mc@324 428 initInputFile(prefixedString, false); // nommap, file pointer at correct position
mas01mc@324 429 size_t allocatedSize = 0;
mas01cr@433 430 if(audiodb_read_data(adb, infid, queryIndex, qp, &allocatedSize))
mas01cr@433 431 error("failed to read data"); // over-writes qp and allocatedSize
mas01mc@324 432 // Consistency check on allocated memory and query feature size
mas01cr@437 433 if(nvectors*sizeof(double)*dbH->dim != allocatedSize)
mas01mc@324 434 error("Query memory allocation failed consitency check","set_up_query_from_key");
mas01mc@324 435 // Allocated and calculate auxillary sequences: l2norm and power
mas01cr@437 436 init_track_aux_data(queryIndex, *qp, &qpointers->l2norm_data, &qpointers->l2norm, &qpointers->power_data, &qpointers->power);
mas01mc@324 437 }
mas01mc@324 438 else{ // Load from self-contained ADB database
mas01mc@324 439 // Read query feature vectors from database
mas01mc@324 440 *qp = NULL;
mas01mc@324 441 lseek(dbfid, dbH->dataOffset + trackOffsetTable[queryIndex] * sizeof(double), SEEK_SET);
mas01mc@324 442 size_t allocatedSize = 0;
mas01cr@433 443 if(audiodb_read_data(adb, dbfid, queryIndex, qp, &allocatedSize))
mas01cr@433 444 error("failed to read data");
mas01mc@324 445 // Consistency check on allocated memory and query feature size
mas01cr@437 446 if(nvectors*sizeof(double)*dbH->dim != allocatedSize)
mas01mc@324 447 error("Query memory allocation failed consitency check","set_up_query_from_key");
mas01mc@324 448
mas01mc@324 449 Uns32T trackIndexOffset = trackOffsetTable[queryIndex]/dbH->dim; // Convert num data elements to num vectors
mas01mc@324 450 // Copy L2 norm partial-sum coefficients
mas01cr@437 451 assert(qpointers->l2norm_data = new double[nvectors]);
mas01cr@437 452 memcpy(qpointers->l2norm_data, l2normTable+trackIndexOffset, nvectors*sizeof(double));
mas01cr@437 453 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@437 454 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
mas01mc@324 455
mas01mc@324 456 if( usingPower ){
mas01mc@324 457 // Copy Power partial-sum coefficients
mas01cr@437 458 assert(qpointers->power_data = new double[nvectors]);
mas01cr@437 459 memcpy(qpointers->power_data, powerTable+trackIndexOffset, nvectors*sizeof(double));
mas01cr@437 460 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
mas01cr@437 461 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
mas01mc@324 462 }
mas01mc@324 463
mas01mc@324 464 if (usingTimes) {
mas01mc@324 465 unsigned int k;
mas01cr@437 466 qpointers->mean_duration = new double[1];
mas01cr@437 467 *qpointers->mean_duration = 0.0;
mas01cr@437 468 double *querydurs = new double[nvectors];
mas01cr@437 469 double *timesdata = new double[nvectors*2];
mas01mc@324 470 assert(querydurs && timesdata);
mas01cr@437 471 memcpy(timesdata, timesTable+trackIndexOffset, nvectors*sizeof(double));
mas01cr@437 472 for(k = 0; k < nvectors; k++) {
mas01mc@324 473 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01cr@437 474 *qpointers->mean_duration += querydurs[k];
mas01mc@324 475 }
mas01cr@437 476 *qpointers->mean_duration /= k;
mas01mc@324 477
mas01cr@437 478 VERB_LOG(1, "mean query file duration: %f\n", *qpointers->mean_duration);
mas01mc@324 479
mas01mc@324 480 delete [] querydurs;
mas01mc@324 481 delete [] timesdata;
mas01mc@324 482 }
mas01mc@292 483 }
mas01mc@292 484
mas01mc@292 485 // Defaults, for exhaustive search (!usingQueryPoint)
mas01mc@292 486 *vqp = *qp;
mas01cr@437 487 qpointers->l2norm = qpointers->l2norm_data;
mas01cr@437 488 qpointers->power = qpointers->power_data;
mas01mc@292 489
mas01mc@292 490 if(usingQueryPoint) {
mas01cr@437 491 if( !(queryPoint < nvectors && queryPoint < nvectors - sequence_length + 1) ) {
mas01cr@437 492 error("queryPoint >= nvectors-sequence_length+1 in query");
mas01mc@292 493 } else {
mas01mc@292 494 VERB_LOG(1, "query point: %u\n", queryPoint);
mas01mc@292 495 *vqp = *qp + queryPoint * dbH->dim;
mas01cr@437 496 qpointers->l2norm = qpointers->l2norm_data + queryPoint;
mas01mc@292 497 if (usingPower) {
mas01cr@437 498 qpointers->power = qpointers->power_data + queryPoint;
mas01mc@292 499 }
mas01cr@437 500 qpointers->nvectors = sequence_length;
mas01mc@292 501 }
mas01mc@292 502 }
mas01mc@292 503 }
mas01mc@292 504
mas01mc@292 505
mas01cr@239 506 // FIXME: this is not the right name; we're not actually setting up
mas01cr@239 507 // the database, but copying various bits of it out of mmap()ed tables
mas01cr@239 508 // in order to reduce seeks.
mas01cr@438 509 static int audiodb_set_up_db(adb_t *adb, adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) {
mas01cr@438 510 uint32_t nvectors = adb->header->length / (adb->header->dim * sizeof(double));
mas01cr@435 511 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@438 512
mas01cr@437 513 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@437 514 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO;
mas01cr@437 515 double *times_table = NULL;
mas01cr@435 516
mas01cr@239 517
mas01cr@438 518 dbpointers->nvectors = nvectors;
mas01cr@438 519 dbpointers->l2norm_data = new double[nvectors];
mas01cr@438 520
mas01cr@438 521 double *snpp = dbpointers->l2norm_data, *sppp = 0;
mas01cr@434 522 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET);
mas01cr@438 523 read_or_goto_error(adb->fd, dbpointers->l2norm_data, nvectors * sizeof(double));
mas01cr@239 524
mas01cr@437 525 if (using_power) {
mas01cr@434 526 if (!(adb->header->flags & O2_FLAG_POWER)) {
mas01cr@434 527 goto error;
mas01cr@239 528 }
mas01cr@438 529 dbpointers->power_data = new double[nvectors];
mas01cr@438 530 sppp = dbpointers->power_data;
mas01cr@434 531 lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET);
mas01cr@438 532 read_or_goto_error(adb->fd, dbpointers->power_data, nvectors * sizeof(double));
mas01cr@239 533 }
mas01cr@239 534
mas01cr@434 535 for(unsigned int i = 0; i < adb->header->numFiles; i++){
mas01cr@434 536 size_t track_length = (*adb->track_lengths)[i];
mas01cr@435 537 if(track_length >= sequence_length) {
mas01cr@435 538 audiodb_sequence_sum(snpp, track_length, sequence_length);
mas01cr@435 539 audiodb_sequence_sqrt(snpp, track_length, sequence_length);
mas01cr@437 540 if (using_power) {
mas01cr@435 541 audiodb_sequence_sum(sppp, track_length, sequence_length);
mas01cr@435 542 audiodb_sequence_average(sppp, track_length, sequence_length);
mas01cr@239 543 }
mas01cr@239 544 }
mas01cr@434 545 snpp += track_length;
mas01cr@437 546 if (using_power) {
mas01cr@434 547 sppp += track_length;
mas01cr@239 548 }
mas01cr@239 549 }
mas01cr@239 550
mas01cr@437 551 if (using_times) {
mas01cr@434 552 if(!(adb->header->flags & O2_FLAG_TIMES)) {
mas01cr@437 553 goto error;
mas01cr@239 554 }
mas01cr@239 555
mas01cr@438 556 dbpointers->mean_duration = new double[adb->header->numFiles];
mas01cr@239 557
mas01cr@438 558 times_table = (double *) malloc(2 * nvectors * sizeof(double));
mas01cr@437 559 if(!times_table) {
mas01cr@437 560 goto error;
mas01cr@437 561 }
mas01cr@437 562 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET);
mas01cr@438 563 read_or_goto_error(adb->fd, times_table, 2 * nvectors * sizeof(double));
mas01cr@434 564 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@434 565 size_t track_length = (*adb->track_lengths)[k];
mas01cr@239 566 unsigned int j;
mas01cr@438 567 dbpointers->mean_duration[k] = 0.0;
mas01cr@434 568 for(j = 0; j < track_length; j++) {
mas01cr@438 569 dbpointers->mean_duration[k] += times_table[2*j+1] - times_table[2*j];
mas01cr@239 570 }
mas01cr@438 571 dbpointers->mean_duration[k] /= j;
mas01cr@239 572 }
mas01cr@437 573
mas01cr@437 574 free(times_table);
mas01cr@437 575 times_table = NULL;
mas01cr@239 576 }
mas01cr@239 577
mas01cr@438 578 dbpointers->l2norm = dbpointers->l2norm_data;
mas01cr@438 579 dbpointers->power = dbpointers->power_data;
mas01cr@434 580 return 0;
mas01cr@434 581
mas01cr@434 582 error:
mas01cr@438 583 if(dbpointers->l2norm_data) {
mas01cr@438 584 delete [] dbpointers->l2norm_data;
mas01cr@434 585 }
mas01cr@438 586 if(dbpointers->power_data) {
mas01cr@438 587 delete [] dbpointers->power_data;
mas01cr@434 588 }
mas01cr@438 589 if(dbpointers->mean_duration) {
mas01cr@438 590 delete [] dbpointers->mean_duration;
mas01cr@434 591 }
mas01cr@437 592 if(times_table) {
mas01cr@437 593 free(times_table);
mas01cr@437 594 }
mas01cr@434 595 return 1;
mas01cr@434 596
mas01cr@239 597 }
mas01cr@239 598
mas01mc@292 599 // query_points()
mas01mc@292 600 //
mas01mc@292 601 // using PointPairs held in the exact_evaluation_queue compute squared distance for each PointPair
mas01mc@292 602 // and insert result into the current reporter.
mas01mc@292 603 //
mas01mc@292 604 // Preconditions:
mas01mc@292 605 // A query inFile has been opened with setup_query(...) and query pointers initialized
mas01mc@292 606 // The database contains some points
mas01mc@292 607 // An exact_evaluation_queue has been allocated and populated
mas01mc@292 608 // A reporter has been allocated
mas01mc@292 609 //
mas01mc@292 610 // Postconditions:
mas01mc@292 611 // reporter contains the points and distances that meet the reporter constraints
mas01mc@292 612
mas01cr@437 613 void audioDB::query_loop_points(adb_query_spec_t *spec, double* query, adb_qpointers_internal_t *qpointers) {
mas01cr@438 614 adb_qpointers_internal_t dbpointers = {0};
mas01mc@292 615
mas01cr@436 616 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@437 617 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@436 618
mas01mc@292 619 // check pre-conditions
mas01mc@292 620 assert(exact_evaluation_queue&&reporter);
mas01mc@292 621 if(!exact_evaluation_queue->size()) // Exit if no points to evaluate
mas01mc@292 622 return;
mas01mc@292 623
mas01mc@292 624 // Compute database info
mas01mc@292 625 // FIXME: we more than likely don't need very much of the database
mas01mc@292 626 // so make a new method to build these values per-track or, even better, per-point
mas01mc@324 627 if( !( dbH->flags & O2_FLAG_LARGE_ADB) )
mas01cr@438 628 if(audiodb_set_up_db(adb, spec, &dbpointers)) {
mas01cr@434 629 error("failed to set up db");
mas01cr@434 630 }
mas01mc@292 631
mas01mc@292 632 VERB_LOG(1, "matching points...");
mas01mc@292 633
mas01mc@292 634 // We are guaranteed that the order of points is sorted by:
mas01mc@324 635 // trackID, spos, qpos
mas01mc@292 636 // so we can be relatively efficient in initialization of track data.
mas01mc@292 637 // Here we assume that points don't overlap, so we will use exhaustive dot
mas01mc@324 638 // product evaluation instead of memoization of partial sums which is used
mas01mc@324 639 // for exhaustive brute-force evaluation from smaller databases: e.g. query_loop()
mas01mc@292 640 double dist;
mas01mc@292 641 size_t data_buffer_size = 0;
mas01mc@292 642 double *data_buffer = 0;
mas01mc@324 643 Uns32T trackOffset = 0;
mas01mc@324 644 Uns32T trackIndexOffset = 0;
mas01mc@292 645 Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range
mas01mc@292 646 Uns32T npairs = exact_evaluation_queue->size();
mas01mc@292 647 while(npairs--){
mas01mc@292 648 PointPair pp = exact_evaluation_queue->top();
mas01mc@324 649 // Large ADB track data must be loaded here for sPower
mas01mc@324 650 if(dbH->flags & O2_FLAG_LARGE_ADB){
mas01mc@324 651 trackOffset=0;
mas01mc@324 652 trackIndexOffset=0;
mas01mc@292 653 if(currentTrack!=pp.trackID){
mas01mc@324 654 char* prefixedString = new char[O2_MAXFILESTR];
mas01mc@324 655 char* tmpStr = prefixedString;
mas01mc@324 656 // On currentTrack change, allocate and load track data
mas01mc@292 657 currentTrack=pp.trackID;
mas01cr@438 658 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
mas01cr@438 659 SAFE_DELETE_ARRAY(dbpointers.power_data);
mas01mc@324 660 if(infid>0)
mas01mc@324 661 close(infid);
mas01mc@324 662 // Open and check dimensions of feature file
mas01mc@324 663 strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
mas01mc@324 664 prefix_name((char ** const) &prefixedString, adb_feature_root);
mas01mc@324 665 if (prefixedString!=tmpStr)
mas01mc@324 666 delete[] tmpStr;
mas01mc@324 667 initInputFile(prefixedString, false); // nommap, file pointer at correct position
mas01mc@324 668 // Load the feature vector data for current track into data_buffer
mas01cr@433 669 if(audiodb_read_data(adb, infid, pp.trackID, &data_buffer, &data_buffer_size))
mas01cr@433 670 error("failed to read data");
mas01mc@324 671 // Load power and calculate power and l2norm sequence sums
mas01cr@438 672 init_track_aux_data(pp.trackID, data_buffer, &dbpointers.l2norm_data, &dbpointers.l2norm, &dbpointers.power_data, &dbpointers.power);
mas01mc@292 673 }
mas01mc@324 674 }
mas01mc@324 675 else{
mas01mc@324 676 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables
mas01mc@324 677 trackOffset=trackOffsetTable[pp.trackID]; // num data elements offset
mas01mc@324 678 trackIndexOffset=trackOffset/dbH->dim; // num vectors offset
mas01mc@324 679 }
mas01mc@324 680 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point
mas01mc@324 681 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table
mas01mc@324 682 // Test power thresholds before computing distance
mas01cr@438 683 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) &&
mas01cr@437 684 ( qPos<qpointers->nvectors-sequence_length+1 && pp.spos<trackTable[pp.trackID]-sequence_length+1 ) ){
mas01mc@324 685 // Non-large ADB track data is loaded inside power test for efficiency
mas01mc@324 686 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){
mas01mc@324 687 // On currentTrack change, allocate and load track data
mas01mc@324 688 currentTrack=pp.trackID;
mas01mc@324 689 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01cr@433 690 if(audiodb_read_data(adb, dbfid, currentTrack, &data_buffer, &data_buffer_size))
mas01cr@433 691 error("failed to read data");
mas01mc@324 692 }
mas01mc@324 693 // Compute distance
mas01cr@436 694 dist = audiodb_dot_product(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequence_length);
mas01cr@437 695 double qn = qpointers->l2norm[qPos];
mas01cr@438 696 double sn = dbpointers.l2norm[sPos];
mas01cr@435 697 switch(spec->params.distance) {
mas01cr@431 698 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01mc@324 699 dist = 2 - (2/(qn*sn))*dist;
mas01cr@431 700 break;
mas01cr@431 701 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@431 702 dist = qn*qn + sn*sn - 2*dist;
mas01cr@431 703 break;
mas01cr@431 704 }
mas01cr@424 705 if((!radius) || dist <= (O2_LSH_EXACT_MULT*radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@424 706 adb_result_t r;
mas01cr@424 707 r.key = fileTable + pp.trackID * O2_FILETABLE_ENTRY_SIZE;
mas01cr@424 708 r.dist = dist;
mas01cr@424 709 r.qpos = pp.qpos;
mas01cr@424 710 r.ipos = pp.spos;
mas01cr@424 711 accumulator->add_point(&r);
mas01cr@424 712 }
mas01mc@292 713 }
mas01mc@292 714 exact_evaluation_queue->pop();
mas01mc@292 715 }
mas01mc@315 716 // Cleanup
mas01cr@438 717 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
mas01cr@438 718 SAFE_DELETE_ARRAY(dbpointers.power_data);
mas01cr@438 719 SAFE_DELETE_ARRAY(dbpointers.mean_duration);
mas01mc@292 720 }
mas01mc@292 721
mas01cr@435 722 void audioDB::query_loop(adb_query_spec_t *spec, Uns32T queryIndex) {
mas01cr@239 723
mas01cr@239 724 double *query, *query_data;
mas01cr@438 725 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0};
mas01cr@437 726
mas01cr@437 727 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@239 728
mas01mc@324 729 if( dbH->flags & O2_FLAG_LARGE_ADB )
mas01mc@324 730 error("error: LARGE_ADB requires indexed query");
mas01mc@324 731
mas01mc@292 732 if(query_from_key)
mas01cr@437 733 set_up_query_from_key(spec, &query_data, &query, &qpointers, queryIndex);
mas01mc@292 734 else
mas01cr@437 735 set_up_query(spec, &query_data, &query, &qpointers);
mas01cr@239 736
mas01cr@438 737 if(audiodb_set_up_db(adb, spec, &dbpointers)) {
mas01cr@434 738 error("failed to set up db");
mas01cr@434 739 }
mas01cr@239 740
mas01cr@239 741 VERB_LOG(1, "matching tracks...");
mas01cr@239 742
mas01cr@435 743 unsigned j,k,track,trackOffset=0, HOP_SIZE=sequenceHop;
mas01cr@435 744 unsigned wL = spec->qid.sequence_length;
mas01cr@239 745 double **D = 0; // Differences query and target
mas01cr@239 746 double **DD = 0; // Matched filter distance
mas01cr@239 747
mas01cr@437 748 D = new double*[qpointers.nvectors]; // pre-allocate
mas01cr@437 749 DD = new double*[qpointers.nvectors];
mas01cr@239 750
mas01cr@239 751 gettimeofday(&tv1, NULL);
mas01cr@239 752 unsigned processedTracks = 0;
mas01cr@239 753 off_t trackIndexOffset;
mas01cr@239 754 char nextKey[MAXSTR];
mas01cr@239 755
mas01cr@239 756 // Track loop
mas01cr@239 757 size_t data_buffer_size = 0;
mas01cr@239 758 double *data_buffer = 0;
mas01cr@239 759 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01cr@239 760
mas01cr@239 761 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) {
mas01cr@239 762
mas01cr@239 763 trackOffset = trackOffsetTable[track]; // numDoubles offset
mas01cr@239 764
mas01cr@239 765 // get trackID from file if using a control file
mas01cr@239 766 if(trackFile) {
mas01cr@239 767 trackFile->getline(nextKey,MAXSTR);
mas01cr@239 768 if(!trackFile->eof()) {
mas01cr@430 769 track = audiodb_key_index(adb, nextKey);
mas01cr@430 770 if(track == (uint32_t) -1) {
mas01cr@430 771 error("key not found", nextKey);
mas01cr@430 772 }
mas01cr@239 773 trackOffset = trackOffsetTable[track];
mas01cr@239 774 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01cr@239 775 } else {
mas01cr@239 776 break;
mas01cr@239 777 }
mas01cr@239 778 }
mas01cr@239 779
mas01mc@292 780 // skip identity on query_from_key
mas01mc@292 781 if( query_from_key && (track == queryIndex) ) {
mas01mc@292 782 if(queryIndex!=dbH->numFiles-1){
mas01mc@292 783 track++;
mas01mc@292 784 trackOffset = trackOffsetTable[track];
mas01mc@292 785 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01mc@292 786 }
mas01mc@292 787 else{
mas01mc@292 788 break;
mas01mc@292 789 }
mas01mc@292 790 }
mas01mc@292 791
mas01cr@437 792 trackIndexOffset=trackOffset/dbH->dim; // qpointers.nvectors offset
mas01cr@239 793
mas01cr@433 794 if(audiodb_read_data(adb, dbfid, track, &data_buffer, &data_buffer_size))
mas01cr@433 795 error("failed to read data");
mas01cr@435 796 if(wL <= trackTable[track]) { // test for short sequences
mas01cr@239 797
mas01cr@239 798 VERB_LOG(7,"%u.%jd.%u | ", track, (intmax_t) trackIndexOffset, trackTable[track]);
mas01cr@239 799
mas01cr@439 800 audiodb_initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD);
mas01cr@239 801
mas01cr@435 802 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) {
mas01cr@438 803 VERB_LOG(3,"meanQdur=%f meanDBdur=%f\n", qpointers.mean_duration[0], dbpointers.mean_duration[track]);
mas01cr@239 804 }
mas01cr@239 805
mas01cr@437 806 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) ||
mas01cr@438 807 fabs(dbpointers.mean_duration[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) {
mas01cr@435 808 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) {
mas01cr@239 809 VERB_LOG(3,"within duration tolerance.\n");
mas01cr@239 810 }
mas01cr@239 811
mas01cr@239 812 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@437 813 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) {
mas01cr@239 814 for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) {
mas01cr@431 815 double thisDist = 0;
mas01cr@438 816 double qn = qpointers.l2norm[j];
mas01cr@438 817 double sn = dbpointers.l2norm[trackIndexOffset + k];
mas01cr@435 818 switch(spec->params.distance) {
mas01cr@431 819 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@438 820 thisDist = 2-(2/(qn*sn))*DD[j][k];
mas01cr@431 821 break;
mas01cr@431 822 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@438 823 thisDist = qn*qn + sn*sn - 2*DD[j][k];
mas01cr@431 824 break;
mas01cr@431 825 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@431 826 thisDist = DD[j][k];
mas01cr@431 827 break;
mas01cr@431 828 }
mas01cr@239 829 // Power test
mas01cr@438 830 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], dbpointers.power[trackIndexOffset + k])) {
mas01cr@239 831 // radius test
mas01cr@435 832 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@435 833 thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@423 834 adb_result_t r;
mas01cr@423 835 r.key = fileTable + track * O2_FILETABLE_ENTRY_SIZE;
mas01cr@423 836 r.dist = thisDist;
mas01cr@423 837 r.qpos = usingQueryPoint ? queryPoint : j;
mas01cr@423 838 r.ipos = k;
mas01cr@423 839 accumulator->add_point(&r);
mas01cr@239 840 }
mas01cr@239 841 }
mas01cr@239 842 }
mas01cr@239 843 }
mas01cr@239 844 } // Duration match
mas01cr@437 845 audiodb_delete_arrays(track, qpointers.nvectors, D, DD);
mas01cr@239 846 }
mas01cr@239 847 }
mas01cr@239 848
mas01cr@239 849 free(data_buffer);
mas01cr@239 850
mas01cr@239 851 gettimeofday(&tv2,NULL);
mas01cr@239 852 VERB_LOG(1,"elapsed time: %ld msec\n",
mas01cr@239 853 (tv2.tv_sec*1000 + tv2.tv_usec/1000) -
mas01cr@239 854 (tv1.tv_sec*1000 + tv1.tv_usec/1000))
mas01cr@239 855
mas01cr@239 856 // Clean up
mas01cr@239 857 if(query_data)
mas01cr@239 858 delete[] query_data;
mas01cr@437 859 if(qpointers.l2norm_data)
mas01cr@437 860 delete[] qpointers.l2norm_data;
mas01cr@437 861 if(qpointers.power_data)
mas01cr@437 862 delete[] qpointers.power_data;
mas01cr@437 863 if(qpointers.mean_duration)
mas01cr@437 864 delete[] qpointers.mean_duration;
mas01cr@438 865 if(dbpointers.power_data)
mas01cr@438 866 delete[] dbpointers.power_data;
mas01cr@438 867 if(dbpointers.l2norm_data)
mas01cr@438 868 delete[] dbpointers.l2norm_data;
mas01cr@239 869 if(D)
mas01cr@239 870 delete[] D;
mas01cr@239 871 if(DD)
mas01cr@239 872 delete[] DD;
mas01cr@438 873 if(dbpointers.mean_duration)
mas01cr@438 874 delete[] dbpointers.mean_duration;
mas01cr@239 875 }