annotate query.cpp @ 453:16a903968d18 api-inversion

Almost finish with audioDB::query_loop. This patch is a little bit noisy, because we rename adb->keys to adb->keymap, introduce a new vector adb->keys (essentially to replace fileTable), and introduce new functionality (both include and exclude keylists in adb_query_refine_t) as well as modifying the query_loop function itself to take advantage of all of these goodies. Oh, and we also fix an embarrassing state bug in adb->track_offsets for insert -- what was I thinking? (Thank you, regression test suites). Since we are on a private branch at the moment, we can take the luxury of renumbering the ADB_REFINE_ flags to include the exclude list at the logical place; once we have an ABI to support, that won't be possible. Now audioDB::query builds up include and exclude lists as appropriate; query_loop does an [O(NlogN) probably] buildup of the keys to consider, and then iterates over tracks sequentially, seeking only if one or more tracks have been excluded. No more trackFile, yay! The only remaining thing to deal with is the accumulator. It's easy enough to pass it around, but I want to read the indexed version before doing so to see how that all fits together.
author mas01cr
date Wed, 24 Dec 2008 10:57:14 +0000
parents 25ee0b77f8ca
children f3b0ddc1ead0
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@239 2 #include "reporter.h"
mas01cr@239 3
mas01cr@422 4 #include "audioDB-internals.h"
mas01cr@422 5 #include "accumulators.h"
mas01cr@422 6
mas01cr@444 7 bool audiodb_powers_acceptable(adb_query_refine_t *r, double p1, double p2) {
mas01cr@425 8 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) {
mas01cr@425 9 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) {
mas01cr@239 10 return false;
mas01cr@239 11 }
mas01cr@239 12 }
mas01cr@425 13 if (r->flags & ADB_REFINE_RELATIVE_THRESHOLD) {
mas01cr@425 14 if (fabs(p1-p2) > fabs(r->relative_threshold)) {
mas01cr@239 15 return false;
mas01cr@239 16 }
mas01cr@239 17 }
mas01cr@239 18 return true;
mas01cr@239 19 }
mas01cr@239 20
mas01cr@239 21 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
mas01cr@425 22
mas01cr@431 23 // init database tables and dbH first
mas01cr@431 24 if(query_from_key)
mas01cr@431 25 initTables(dbName);
mas01cr@431 26 else
mas01cr@431 27 initTables(dbName, inFile);
mas01cr@431 28
mas01cr@435 29 adb_query_spec_t qspec;
mas01cr@444 30 adb_datum_t datum = {0};
mas01cr@435 31
mas01cr@435 32 qspec.refine.flags = 0;
mas01cr@453 33 if(trackFile) {
mas01cr@453 34 qspec.refine.flags |= ADB_REFINE_INCLUDE_KEYLIST;
mas01cr@453 35 std::vector<const char *> v;
mas01cr@453 36 char *k = new char[MAXSTR];
mas01cr@453 37 trackFile->getline(k, MAXSTR);
mas01cr@453 38 while(!trackFile->eof()) {
mas01cr@453 39 v.push_back(k);
mas01cr@453 40 k = new char[MAXSTR];
mas01cr@453 41 trackFile->getline(k, MAXSTR);
mas01cr@453 42 }
mas01cr@453 43 delete [] k;
mas01cr@453 44 qspec.refine.include.nkeys = v.size();
mas01cr@453 45 qspec.refine.include.keys = new const char *[qspec.refine.include.nkeys];
mas01cr@453 46 for(unsigned int k = 0; k < qspec.refine.include.nkeys; k++) {
mas01cr@453 47 qspec.refine.include.keys[k] = v[k];
mas01cr@453 48 }
mas01cr@453 49 }
mas01cr@453 50 if(query_from_key) {
mas01cr@453 51 qspec.refine.flags |= ADB_REFINE_EXCLUDE_KEYLIST;
mas01cr@453 52 qspec.refine.exclude.nkeys = 1;
mas01cr@453 53 qspec.refine.exclude.keys = &key;
mas01cr@453 54 }
mas01cr@425 55 if(radius) {
mas01cr@435 56 qspec.refine.flags |= ADB_REFINE_RADIUS;
mas01cr@435 57 qspec.refine.radius = radius;
mas01cr@425 58 }
mas01cr@425 59 if(use_absolute_threshold) {
mas01cr@435 60 qspec.refine.flags |= ADB_REFINE_ABSOLUTE_THRESHOLD;
mas01cr@435 61 qspec.refine.absolute_threshold = absolute_threshold;
mas01cr@425 62 }
mas01cr@425 63 if(use_relative_threshold) {
mas01cr@435 64 qspec.refine.flags |= ADB_REFINE_RELATIVE_THRESHOLD;
mas01cr@435 65 qspec.refine.relative_threshold = relative_threshold;
mas01cr@425 66 }
mas01cr@425 67 if(usingTimes) {
mas01cr@435 68 qspec.refine.flags |= ADB_REFINE_DURATION_RATIO;
mas01cr@435 69 qspec.refine.duration_ratio = timesTol;
mas01cr@425 70 }
mas01cr@439 71 /* FIXME: not sure about this any more; maybe it belongs in
mas01cr@439 72 query_id? Or maybe we just don't need a flag for it? */
mas01cr@439 73 qspec.refine.hopsize = sequenceHop;
mas01cr@425 74 if(sequenceHop != 1) {
mas01cr@435 75 qspec.refine.flags |= ADB_REFINE_HOP_SIZE;
mas01cr@425 76 }
mas01cr@425 77
mas01cr@444 78 if(query_from_key) {
mas01cr@444 79 datum.key = key;
mas01cr@444 80 } else {
mas01cr@444 81 int fd;
mas01cr@444 82 struct stat st;
mas01cr@444 83
mas01cr@444 84 /* FIXME: around here there are all sorts of hideous leaks. */
mas01cr@444 85 fd = open(inFile, O_RDONLY);
mas01cr@444 86 if(fd < 0) {
mas01cr@444 87 error("failed to open feature file", inFile);
mas01cr@444 88 }
mas01cr@444 89 fstat(fd, &st);
mas01cr@444 90 read(fd, &datum.dim, sizeof(uint32_t));
mas01cr@444 91 datum.nvectors = (st.st_size - sizeof(uint32_t)) / (datum.dim * sizeof(double));
mas01cr@444 92 datum.data = (double *) malloc(st.st_size - sizeof(uint32_t));
mas01cr@444 93 read(fd, datum.data, st.st_size - sizeof(uint32_t));
mas01cr@444 94 close(fd);
mas01cr@444 95 if(usingPower) {
mas01cr@444 96 uint32_t one;
mas01cr@444 97 fd = open(powerFileName, O_RDONLY);
mas01cr@444 98 if(fd < 0) {
mas01cr@444 99 error("failed to open power file", powerFileName);
mas01cr@444 100 }
mas01cr@444 101 read(fd, &one, sizeof(uint32_t));
mas01cr@444 102 if(one != 1) {
mas01cr@444 103 error("malformed power file dimensionality", powerFileName);
mas01cr@444 104 }
mas01cr@444 105 datum.power = (double *) malloc(datum.nvectors * sizeof(double));
mas01cr@444 106 if(read(fd, datum.power, datum.nvectors * sizeof(double)) != (ssize_t) (datum.nvectors * sizeof(double))) {
mas01cr@444 107 error("malformed power file", powerFileName);
mas01cr@444 108 }
mas01cr@444 109 close(fd);
mas01cr@444 110 }
mas01cr@444 111 if(usingTimes) {
mas01cr@444 112 datum.times = (double *) malloc(2 * datum.nvectors * sizeof(double));
mas01cr@444 113 insertTimeStamps(datum.nvectors, timesFile, datum.times);
mas01cr@444 114 }
mas01cr@444 115 }
mas01cr@444 116
mas01cr@444 117 qspec.qid.datum = &datum;
mas01cr@435 118 qspec.qid.sequence_length = sequenceLength;
mas01cr@435 119 qspec.qid.flags = usingQueryPoint ? 0 : ADB_QUERY_ID_FLAG_EXHAUSTIVE;
mas01cr@435 120 qspec.qid.sequence_start = queryPoint;
mas01cr@435 121
mas01cr@431 122 switch(queryType) {
mas01cr@431 123 case O2_POINT_QUERY:
mas01cr@435 124 qspec.qid.sequence_length = 1;
mas01cr@435 125 qspec.params.accumulation = ADB_ACCUMULATION_DB;
mas01cr@435 126 qspec.params.distance = ADB_DISTANCE_DOT_PRODUCT;
mas01cr@435 127 qspec.params.npoints = pointNN;
mas01cr@435 128 qspec.params.ntracks = 0;
mas01cr@431 129 reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN);
mas01cr@431 130 break;
mas01cr@431 131 case O2_TRACK_QUERY:
mas01cr@435 132 qspec.qid.sequence_length = 1;
mas01cr@435 133 qspec.params.accumulation = ADB_ACCUMULATION_PER_TRACK;
mas01cr@435 134 qspec.params.distance = ADB_DISTANCE_DOT_PRODUCT;
mas01cr@435 135 qspec.params.npoints = pointNN;
mas01cr@435 136 qspec.params.ntracks = trackNN;
mas01cr@431 137 reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@431 138 break;
mas01cr@431 139 case O2_SEQUENCE_QUERY:
mas01cr@431 140 case O2_N_SEQUENCE_QUERY:
mas01cr@435 141 qspec.params.accumulation = ADB_ACCUMULATION_PER_TRACK;
mas01cr@435 142 qspec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED;
mas01cr@435 143 qspec.params.npoints = pointNN;
mas01cr@435 144 qspec.params.ntracks = trackNN;
mas01cr@431 145 switch(queryType) {
mas01cr@431 146 case O2_SEQUENCE_QUERY:
mas01cr@435 147 if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) {
mas01cr@431 148 reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@435 149 } else if (index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)) {
mas01cr@435 150 char* indexName = index_get_name(adb->path, qspec.refine.radius, qspec.qid.sequence_length);
mas01cr@431 151 lsh = index_allocate(indexName, false);
mas01cr@431 152 reporter = new trackSequenceQueryRadReporter(trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
mas01cr@431 153 delete[] indexName;
mas01cr@431 154 } else {
mas01cr@431 155 reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles);
mas01cr@431 156 }
mas01cr@431 157 break;
mas01cr@431 158 case O2_N_SEQUENCE_QUERY:
mas01cr@435 159 if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) {
mas01cr@431 160 reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@435 161 } else if (index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)){
mas01cr@435 162 char* indexName = index_get_name(adb->path, qspec.refine.radius, qspec.qid.sequence_length);
mas01cr@431 163 lsh = index_allocate(indexName, false);
mas01cr@431 164 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
mas01cr@431 165 delete[] indexName;
mas01cr@431 166 } else {
mas01cr@431 167 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles);
mas01cr@431 168 }
mas01cr@431 169 break;
mas01cr@431 170 }
mas01cr@431 171 break;
mas01cr@431 172 case O2_ONE_TO_ONE_N_SEQUENCE_QUERY:
mas01cr@435 173 qspec.params.accumulation = ADB_ACCUMULATION_ONE_TO_ONE;
mas01cr@435 174 qspec.params.distance = ADB_DISTANCE_EUCLIDEAN_NORMED;
mas01cr@435 175 qspec.params.npoints = 0;
mas01cr@435 176 qspec.params.ntracks = 0;
mas01cr@431 177 break;
mas01cr@431 178 default:
mas01cr@431 179 error("unrecognized queryType");
mas01cr@431 180 }
mas01mc@292 181
mas01mc@292 182 // keyKeyPos requires dbH to be initialized
mas01cr@430 183 if(query_from_key && (!key || (query_from_key_index = audiodb_key_index(adb, key)) == (uint32_t) -1))
mas01cr@430 184 error("Query key not found", key);
mas01cr@431 185
mas01cr@435 186 switch(qspec.params.distance) {
mas01cr@431 187 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@435 188 switch(qspec.params.accumulation) {
mas01cr@431 189 case ADB_ACCUMULATION_DB:
mas01cr@435 190 accumulator = new DBAccumulator<adb_result_dist_gt>(qspec.params.npoints);
mas01cr@431 191 break;
mas01cr@431 192 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@435 193 accumulator = new PerTrackAccumulator<adb_result_dist_gt>(qspec.params.npoints, qspec.params.ntracks);
mas01cr@431 194 break;
mas01cr@431 195 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@431 196 accumulator = new NearestAccumulator<adb_result_dist_gt>();
mas01cr@431 197 break;
mas01cr@431 198 default:
mas01cr@431 199 error("unknown accumulation");
mas01cr@239 200 }
mas01cr@239 201 break;
mas01cr@431 202 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@431 203 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@435 204 switch(qspec.params.accumulation) {
mas01cr@431 205 case ADB_ACCUMULATION_DB:
mas01cr@435 206 accumulator = new DBAccumulator<adb_result_dist_lt>(qspec.params.npoints);
mas01cr@431 207 break;
mas01cr@431 208 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@435 209 accumulator = new PerTrackAccumulator<adb_result_dist_lt>(qspec.params.npoints, qspec.params.ntracks);
mas01cr@431 210 break;
mas01cr@431 211 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@431 212 accumulator = new NearestAccumulator<adb_result_dist_lt>();
mas01cr@431 213 break;
mas01cr@431 214 default:
mas01cr@431 215 error("unknown accumulation");
mas01mc@263 216 }
mas01mc@263 217 break;
mas01cr@239 218 default:
mas01cr@431 219 error("unknown distance function");
mas01cr@431 220 }
mas01cr@431 221
mas01mc@292 222 // Test for index (again) here
mas01cr@435 223 if((qspec.refine.flags & ADB_REFINE_RADIUS) && index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)){
mas01cr@436 224 VERB_LOG(1, "Calling indexed query on database %s, radius=%f, sequence_length=%d\n", adb->path, qspec.refine.radius, qspec.qid.sequence_length);
mas01cr@435 225 index_query_loop(&qspec, dbName, query_from_key_index);
mas01mc@329 226 }
mas01mc@329 227 else{
mas01mc@329 228 VERB_LOG(1, "Calling brute-force query on database %s\n", dbName);
mas01cr@453 229 if(query_loop(adb, &qspec)) {
mas01cr@452 230 error("query_loop failed");
mas01cr@452 231 }
mas01mc@329 232 }
mas01mc@292 233
mas01cr@423 234 adb_query_results_t *rs = accumulator->get_points();
mas01cr@423 235 for(unsigned int k = 0; k < rs->nresults; k++) {
mas01cr@423 236 adb_result_t r = rs->results[k];
mas01cr@430 237 reporter->add_point(audiodb_key_index(adb, r.key), r.qpos, r.ipos, r.dist);
mas01cr@423 238 }
mas01cr@423 239
mas01mc@292 240 reporter->report(fileTable, adbQueryResponse);
mas01cr@239 241 }
mas01cr@239 242
mas01cr@439 243 static void audiodb_initialize_arrays(adb_t *adb, adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
mas01cr@239 244 unsigned int j, k, l, w;
mas01cr@239 245 double *dp, *qp, *sp;
mas01cr@239 246
mas01cr@439 247 const unsigned HOP_SIZE = spec->refine.hopsize;
mas01cr@435 248 const unsigned wL = spec->qid.sequence_length;
mas01cr@239 249
mas01cr@239 250 for(j = 0; j < numVectors; j++) {
mas01cr@239 251 // Sum products matrix
mas01cr@433 252 D[j] = new double[(*adb->track_lengths)[track]];
mas01cr@239 253 assert(D[j]);
mas01cr@239 254 // Matched filter matrix
mas01cr@433 255 DD[j]=new double[(*adb->track_lengths)[track]];
mas01cr@239 256 assert(DD[j]);
mas01cr@239 257 }
mas01cr@239 258
mas01cr@239 259 // Dot product
mas01cr@239 260 for(j = 0; j < numVectors; j++)
mas01cr@433 261 for(k = 0; k < (*adb->track_lengths)[track]; k++){
mas01cr@439 262 qp = query + j * adb->header->dim;
mas01cr@439 263 sp = data_buffer + k * adb->header->dim;
mas01cr@239 264 DD[j][k] = 0.0; // Initialize matched filter array
mas01cr@239 265 dp = &D[j][k]; // point to correlation cell j,k
mas01cr@239 266 *dp = 0.0; // initialize correlation cell
mas01cr@439 267 l = adb->header->dim; // size of vectors
mas01cr@239 268 while(l--)
mas01cr@239 269 *dp += *qp++ * *sp++;
mas01cr@239 270 }
mas01cr@239 271
mas01cr@239 272 // Matched Filter
mas01cr@239 273 // HOP SIZE == 1
mas01cr@239 274 double* spd;
mas01cr@239 275 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop
mas01cr@239 276 for(w = 0; w < wL; w++) {
mas01cr@239 277 for(j = 0; j < numVectors - w; j++) {
mas01cr@239 278 sp = DD[j];
mas01cr@239 279 spd = D[j+w] + w;
mas01cr@433 280 k = (*adb->track_lengths)[track] - w;
mas01mc@292 281 while(k--)
mas01mc@292 282 *sp++ += *spd++;
mas01cr@239 283 }
mas01cr@239 284 }
mas01cr@239 285 } else { // HOP_SIZE != 1
mas01cr@239 286 for(w = 0; w < wL; w++) {
mas01cr@239 287 for(j = 0; j < numVectors - w; j += HOP_SIZE) {
mas01cr@239 288 sp = DD[j];
mas01cr@239 289 spd = D[j+w]+w;
mas01cr@433 290 for(k = 0; k < (*adb->track_lengths)[track] - w; k += HOP_SIZE) {
mas01cr@239 291 *sp += *spd;
mas01cr@239 292 sp += HOP_SIZE;
mas01cr@239 293 spd += HOP_SIZE;
mas01cr@239 294 }
mas01cr@239 295 }
mas01cr@239 296 }
mas01cr@239 297 }
mas01cr@239 298 }
mas01cr@239 299
mas01cr@433 300 static void audiodb_delete_arrays(int track, unsigned int numVectors, double **D, double **DD) {
mas01cr@239 301 if(D != NULL) {
mas01cr@239 302 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 303 delete[] D[j];
mas01cr@239 304 }
mas01cr@239 305 }
mas01cr@239 306 if(DD != NULL) {
mas01cr@239 307 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 308 delete[] DD[j];
mas01cr@239 309 }
mas01cr@239 310 }
mas01cr@239 311 }
mas01cr@239 312
mas01cr@433 313 int audiodb_read_data(adb_t *adb, int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) {
mas01cr@433 314 uint32_t track_length = (*adb->track_lengths)[track];
mas01cr@433 315 size_t track_size = track_length * sizeof(double) * adb->header->dim;
mas01cr@433 316 if (track_size > *data_buffer_size_p) {
mas01cr@239 317 if(*data_buffer_p) {
mas01cr@239 318 free(*data_buffer_p);
mas01cr@239 319 }
mas01cr@239 320 {
mas01cr@433 321 *data_buffer_size_p = track_size;
mas01cr@433 322 void *tmp = malloc(track_size);
mas01cr@239 323 if (tmp == NULL) {
mas01cr@433 324 goto error;
mas01cr@239 325 }
mas01cr@239 326 *data_buffer_p = (double *) tmp;
mas01cr@239 327 }
mas01cr@239 328 }
mas01cr@239 329
mas01cr@433 330 read_or_goto_error(trkfid, *data_buffer_p, track_size);
mas01cr@433 331 return 0;
mas01cr@433 332
mas01cr@433 333 error:
mas01cr@433 334 return 1;
mas01cr@239 335 }
mas01cr@239 336
mas01cr@405 337 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
mas01cr@405 338 assert(usingTimes);
mas01cr@405 339
mas01cr@405 340 unsigned numtimes = 0;
mas01cr@405 341
mas01cr@405 342 if(!timesFile->is_open()) {
mas01cr@405 343 error("problem opening times file on timestamped database", timesFileName);
mas01cr@405 344 }
mas01cr@405 345
mas01cr@405 346 double timepoint, next;
mas01cr@405 347 *timesFile >> timepoint;
mas01cr@405 348 if (timesFile->eof()) {
mas01cr@405 349 error("no entries in times file", timesFileName);
mas01cr@405 350 }
mas01cr@405 351 numtimes++;
mas01cr@405 352 do {
mas01cr@405 353 *timesFile >> next;
mas01cr@405 354 if (timesFile->eof()) {
mas01cr@405 355 break;
mas01cr@405 356 }
mas01cr@405 357 numtimes++;
mas01cr@405 358 timesdata[0] = timepoint;
mas01cr@405 359 timepoint = (timesdata[1] = next);
mas01cr@405 360 timesdata += 2;
mas01cr@405 361 } while (numtimes < numVectors + 1);
mas01cr@405 362
mas01cr@405 363 if (numtimes < numVectors + 1) {
mas01cr@405 364 error("too few timepoints in times file", timesFileName);
mas01cr@405 365 }
mas01cr@405 366
mas01cr@405 367 *timesFile >> next;
mas01cr@405 368 if (!timesFile->eof()) {
mas01cr@405 369 error("too many timepoints in times file", timesFileName);
mas01cr@405 370 }
mas01cr@405 371 }
mas01cr@405 372
mas01cr@444 373 int audiodb_query_spec_qpointers(adb_t *adb, adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
mas01cr@443 374 adb_datum_t *datum;
mas01cr@443 375 adb_datum_t d = {0};
mas01cr@443 376 uint32_t nvectors;
mas01cr@443 377 uint32_t sequence_length;
mas01cr@443 378 uint32_t sequence_start;
mas01cr@443 379
mas01cr@443 380 datum = spec->qid.datum;
mas01cr@443 381 sequence_length = spec->qid.sequence_length;
mas01cr@443 382 sequence_start = spec->qid.sequence_start;
mas01cr@443 383
mas01cr@443 384 if(datum->data) {
mas01cr@443 385 if(datum->dim != adb->header->dim) {
mas01cr@443 386 return 1;
mas01cr@443 387 }
mas01cr@443 388 /* initialize d, and mark that nothing needs freeing later. */
mas01cr@443 389 d = *datum;
mas01cr@443 390 datum = &d;
mas01cr@443 391 } else if (datum->key) {
mas01cr@449 392 uint32_t track_id;
mas01cr@449 393 if((track_id = audiodb_key_index(adb, datum->key)) == (uint32_t) -1) {
mas01cr@443 394 return 1;
mas01cr@443 395 }
mas01cr@443 396 off_t track_offset = (*adb->track_offsets)[track_id];
mas01cr@443 397
mas01cr@443 398 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
mas01cr@443 399 /* create a reference/insert, then use adb_insert_create_datum() */
mas01cr@443 400 adb_reference_t reference = {0};
mas01cr@443 401 char features[MAXSTR], power[MAXSTR], times[MAXSTR];
mas01cr@443 402 lseek(adb->fd, adb->header->dataOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@443 403 /* FIXME: learn not to worry and love the bomb^Wbuffer overflow */
mas01cr@443 404 read(adb->fd, features, MAXSTR);
mas01cr@443 405 reference.features = features;
mas01cr@443 406 if(adb->header->flags & O2_FLAG_POWER) {
mas01cr@443 407 lseek(adb->fd, adb->header->powerTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@443 408 read(adb->fd, power, MAXSTR);
mas01cr@443 409 reference.power = power;
mas01cr@443 410 }
mas01cr@443 411 if(adb->header->flags & O2_FLAG_TIMES) {
mas01cr@443 412 lseek(adb->fd, adb->header->timesTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@443 413 read(adb->fd, times, MAXSTR);
mas01cr@443 414 reference.times = times;
mas01cr@443 415 }
mas01cr@443 416 audiodb_insert_create_datum(&reference, &d);
mas01cr@443 417 } else {
mas01cr@443 418 /* initialize from sources of data that we already have */
mas01cr@443 419 d.nvectors = (*adb->track_lengths)[track_id];
mas01cr@443 420 d.dim = adb->header->dim;
mas01cr@443 421 d.key = datum->key;
mas01cr@443 422 /* read out stuff from the database tables */
mas01cr@443 423 d.data = (double *) malloc(d.nvectors * d.dim * sizeof(double));
mas01cr@443 424 lseek(adb->fd, adb->header->dataOffset + track_offset, SEEK_SET);
mas01cr@443 425 read(adb->fd, d.data, d.nvectors * d.dim * sizeof(double));
mas01cr@443 426 if(adb->header->flags & O2_FLAG_POWER) {
mas01cr@443 427 d.power = (double *) malloc(d.nvectors * sizeof(double));
mas01cr@443 428 lseek(adb->fd, adb->header->powerTableOffset + track_offset / d.dim, SEEK_SET);
mas01cr@443 429 read(adb->fd, d.power, d.nvectors * sizeof(double));
mas01cr@443 430 }
mas01cr@443 431 if(adb->header->flags & O2_FLAG_TIMES) {
mas01cr@443 432 d.times = (double *) malloc(2 * d.nvectors * sizeof(double));
mas01cr@443 433 lseek(adb->fd, adb->header->timesTableOffset + track_offset / d.dim, SEEK_SET);
mas01cr@443 434 read(adb->fd, d.times, 2 * d.nvectors * sizeof(double));
mas01cr@443 435 }
mas01cr@443 436 }
mas01cr@443 437 } else {
mas01cr@443 438 return 1;
mas01cr@443 439 }
mas01cr@443 440
mas01cr@443 441 /* Now we have a full(ish) datum, compute all the qpointery stuff
mas01cr@443 442 that we care about (l2norm/power/mean duration). (This bit could
mas01cr@443 443 conceivably become a new function) */
mas01cr@443 444 nvectors = d.nvectors;
mas01cr@444 445 /* FIXME: check the overflow logic here */
mas01cr@444 446 if(sequence_start + sequence_length > nvectors) {
mas01cr@443 447 /* is there something to free? goto error */
mas01cr@443 448 return 1;
mas01cr@443 449 }
mas01cr@443 450
mas01cr@443 451 qpointers->nvectors = nvectors;
mas01cr@443 452
mas01cr@443 453 size_t vector_size = nvectors * sizeof(double) * d.dim;
mas01cr@445 454 *vector_data = new double[vector_size];
mas01cr@443 455 memcpy(*vector_data, d.data, vector_size);
mas01cr@443 456
mas01cr@445 457 qpointers->l2norm_data = new double[vector_size / d.dim];
mas01cr@443 458 audiodb_l2norm_buffer(*vector_data, d.dim, nvectors, qpointers->l2norm_data);
mas01cr@443 459 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@443 460 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@443 461
mas01cr@443 462 if(d.power) {
mas01cr@445 463 qpointers->power_data = new double[vector_size / d.dim];
mas01cr@443 464 memcpy(qpointers->power_data, d.power, vector_size / d.dim);
mas01cr@443 465 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
mas01cr@443 466 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
mas01cr@443 467 }
mas01cr@443 468
mas01cr@443 469 if(d.times) {
mas01cr@445 470 qpointers->mean_duration = new double[1];
mas01cr@445 471 *qpointers->mean_duration = 0;
mas01cr@443 472 for(unsigned int k = 0; k < nvectors; k++) {
mas01cr@443 473 *qpointers->mean_duration += d.times[2*k+1] - d.times[2*k];
mas01cr@443 474 }
mas01cr@443 475 *qpointers->mean_duration /= nvectors;
mas01cr@443 476 }
mas01cr@443 477
mas01cr@443 478
mas01cr@443 479 /* Finally, set up the moving qpointers. */
mas01cr@443 480 if(spec->qid.flags & ADB_QUERY_ID_FLAG_EXHAUSTIVE) {
mas01cr@443 481 *vector = *vector_data;
mas01cr@443 482 qpointers->l2norm = qpointers->l2norm_data;
mas01cr@443 483 qpointers->power = qpointers->power_data;
mas01cr@443 484 } else {
mas01cr@443 485 *vector = *vector_data + spec->qid.sequence_start * d.dim;
mas01cr@443 486 qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start;
mas01cr@444 487 if(d.power) {
mas01cr@444 488 qpointers->power = qpointers->power_data + spec->qid.sequence_start;
mas01cr@444 489 }
mas01cr@444 490 /* FIXME: this is a little bit ugly. No, a lot ugly. But at the
mas01cr@444 491 * moment this is how query_loop() knows when to stop, so for
mas01cr@444 492 * now... */
mas01cr@444 493 qpointers->nvectors = sequence_length;
mas01cr@443 494 }
mas01cr@443 495
mas01cr@443 496 /* Clean up: free any bits of datum that we have ourselves
mas01cr@443 497 * allocated. */
mas01cr@443 498 if(datum != &d) {
mas01cr@443 499 audiodb_free_datum(&d);
mas01cr@443 500 }
mas01cr@444 501
mas01cr@444 502 return 0;
mas01cr@443 503 }
mas01cr@443 504
mas01cr@448 505 static int audiodb_set_up_dbpointers(adb_t *adb, adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) {
mas01cr@438 506 uint32_t nvectors = adb->header->length / (adb->header->dim * sizeof(double));
mas01cr@435 507 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@438 508
mas01cr@437 509 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@437 510 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO;
mas01cr@437 511 double *times_table = NULL;
mas01cr@435 512
mas01cr@239 513
mas01cr@438 514 dbpointers->nvectors = nvectors;
mas01cr@438 515 dbpointers->l2norm_data = new double[nvectors];
mas01cr@438 516
mas01cr@438 517 double *snpp = dbpointers->l2norm_data, *sppp = 0;
mas01cr@434 518 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET);
mas01cr@438 519 read_or_goto_error(adb->fd, dbpointers->l2norm_data, nvectors * sizeof(double));
mas01cr@239 520
mas01cr@437 521 if (using_power) {
mas01cr@434 522 if (!(adb->header->flags & O2_FLAG_POWER)) {
mas01cr@434 523 goto error;
mas01cr@239 524 }
mas01cr@438 525 dbpointers->power_data = new double[nvectors];
mas01cr@438 526 sppp = dbpointers->power_data;
mas01cr@434 527 lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET);
mas01cr@438 528 read_or_goto_error(adb->fd, dbpointers->power_data, nvectors * sizeof(double));
mas01cr@239 529 }
mas01cr@239 530
mas01cr@434 531 for(unsigned int i = 0; i < adb->header->numFiles; i++){
mas01cr@434 532 size_t track_length = (*adb->track_lengths)[i];
mas01cr@435 533 if(track_length >= sequence_length) {
mas01cr@435 534 audiodb_sequence_sum(snpp, track_length, sequence_length);
mas01cr@435 535 audiodb_sequence_sqrt(snpp, track_length, sequence_length);
mas01cr@437 536 if (using_power) {
mas01cr@435 537 audiodb_sequence_sum(sppp, track_length, sequence_length);
mas01cr@435 538 audiodb_sequence_average(sppp, track_length, sequence_length);
mas01cr@239 539 }
mas01cr@239 540 }
mas01cr@434 541 snpp += track_length;
mas01cr@437 542 if (using_power) {
mas01cr@434 543 sppp += track_length;
mas01cr@239 544 }
mas01cr@239 545 }
mas01cr@239 546
mas01cr@437 547 if (using_times) {
mas01cr@434 548 if(!(adb->header->flags & O2_FLAG_TIMES)) {
mas01cr@437 549 goto error;
mas01cr@239 550 }
mas01cr@239 551
mas01cr@438 552 dbpointers->mean_duration = new double[adb->header->numFiles];
mas01cr@239 553
mas01cr@438 554 times_table = (double *) malloc(2 * nvectors * sizeof(double));
mas01cr@437 555 if(!times_table) {
mas01cr@437 556 goto error;
mas01cr@437 557 }
mas01cr@437 558 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET);
mas01cr@438 559 read_or_goto_error(adb->fd, times_table, 2 * nvectors * sizeof(double));
mas01cr@434 560 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@434 561 size_t track_length = (*adb->track_lengths)[k];
mas01cr@239 562 unsigned int j;
mas01cr@438 563 dbpointers->mean_duration[k] = 0.0;
mas01cr@434 564 for(j = 0; j < track_length; j++) {
mas01cr@438 565 dbpointers->mean_duration[k] += times_table[2*j+1] - times_table[2*j];
mas01cr@239 566 }
mas01cr@438 567 dbpointers->mean_duration[k] /= j;
mas01cr@239 568 }
mas01cr@437 569
mas01cr@437 570 free(times_table);
mas01cr@437 571 times_table = NULL;
mas01cr@239 572 }
mas01cr@239 573
mas01cr@438 574 dbpointers->l2norm = dbpointers->l2norm_data;
mas01cr@438 575 dbpointers->power = dbpointers->power_data;
mas01cr@434 576 return 0;
mas01cr@434 577
mas01cr@434 578 error:
mas01cr@438 579 if(dbpointers->l2norm_data) {
mas01cr@438 580 delete [] dbpointers->l2norm_data;
mas01cr@434 581 }
mas01cr@438 582 if(dbpointers->power_data) {
mas01cr@438 583 delete [] dbpointers->power_data;
mas01cr@434 584 }
mas01cr@438 585 if(dbpointers->mean_duration) {
mas01cr@438 586 delete [] dbpointers->mean_duration;
mas01cr@434 587 }
mas01cr@437 588 if(times_table) {
mas01cr@437 589 free(times_table);
mas01cr@437 590 }
mas01cr@434 591 return 1;
mas01cr@434 592
mas01cr@239 593 }
mas01cr@239 594
mas01mc@292 595 // query_points()
mas01mc@292 596 //
mas01mc@292 597 // using PointPairs held in the exact_evaluation_queue compute squared distance for each PointPair
mas01mc@292 598 // and insert result into the current reporter.
mas01mc@292 599 //
mas01mc@292 600 // Preconditions:
mas01mc@292 601 // A query inFile has been opened with setup_query(...) and query pointers initialized
mas01mc@292 602 // The database contains some points
mas01mc@292 603 // An exact_evaluation_queue has been allocated and populated
mas01mc@292 604 // A reporter has been allocated
mas01mc@292 605 //
mas01mc@292 606 // Postconditions:
mas01mc@292 607 // reporter contains the points and distances that meet the reporter constraints
mas01mc@292 608
mas01cr@437 609 void audioDB::query_loop_points(adb_query_spec_t *spec, double* query, adb_qpointers_internal_t *qpointers) {
mas01cr@438 610 adb_qpointers_internal_t dbpointers = {0};
mas01mc@292 611
mas01cr@436 612 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@437 613 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@436 614
mas01mc@292 615 // check pre-conditions
mas01mc@292 616 assert(exact_evaluation_queue&&reporter);
mas01mc@292 617 if(!exact_evaluation_queue->size()) // Exit if no points to evaluate
mas01mc@292 618 return;
mas01mc@292 619
mas01mc@292 620 // Compute database info
mas01mc@292 621 // FIXME: we more than likely don't need very much of the database
mas01mc@292 622 // so make a new method to build these values per-track or, even better, per-point
mas01mc@324 623 if( !( dbH->flags & O2_FLAG_LARGE_ADB) )
mas01cr@448 624 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) {
mas01cr@434 625 error("failed to set up db");
mas01cr@434 626 }
mas01mc@292 627
mas01mc@292 628 // We are guaranteed that the order of points is sorted by:
mas01mc@324 629 // trackID, spos, qpos
mas01mc@292 630 // so we can be relatively efficient in initialization of track data.
mas01mc@292 631 // Here we assume that points don't overlap, so we will use exhaustive dot
mas01mc@324 632 // product evaluation instead of memoization of partial sums which is used
mas01mc@324 633 // for exhaustive brute-force evaluation from smaller databases: e.g. query_loop()
mas01mc@292 634 double dist;
mas01mc@292 635 size_t data_buffer_size = 0;
mas01mc@292 636 double *data_buffer = 0;
mas01mc@324 637 Uns32T trackOffset = 0;
mas01mc@324 638 Uns32T trackIndexOffset = 0;
mas01mc@292 639 Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range
mas01mc@292 640 Uns32T npairs = exact_evaluation_queue->size();
mas01mc@292 641 while(npairs--){
mas01mc@292 642 PointPair pp = exact_evaluation_queue->top();
mas01mc@324 643 // Large ADB track data must be loaded here for sPower
mas01mc@324 644 if(dbH->flags & O2_FLAG_LARGE_ADB){
mas01mc@324 645 trackOffset=0;
mas01mc@324 646 trackIndexOffset=0;
mas01mc@292 647 if(currentTrack!=pp.trackID){
mas01mc@324 648 char* prefixedString = new char[O2_MAXFILESTR];
mas01mc@324 649 char* tmpStr = prefixedString;
mas01mc@324 650 // On currentTrack change, allocate and load track data
mas01mc@292 651 currentTrack=pp.trackID;
mas01cr@438 652 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
mas01cr@438 653 SAFE_DELETE_ARRAY(dbpointers.power_data);
mas01mc@324 654 if(infid>0)
mas01mc@324 655 close(infid);
mas01mc@324 656 // Open and check dimensions of feature file
mas01mc@324 657 strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
mas01mc@324 658 prefix_name((char ** const) &prefixedString, adb_feature_root);
mas01mc@324 659 if (prefixedString!=tmpStr)
mas01mc@324 660 delete[] tmpStr;
mas01mc@324 661 initInputFile(prefixedString, false); // nommap, file pointer at correct position
mas01mc@324 662 // Load the feature vector data for current track into data_buffer
mas01cr@433 663 if(audiodb_read_data(adb, infid, pp.trackID, &data_buffer, &data_buffer_size))
mas01cr@433 664 error("failed to read data");
mas01mc@324 665 // Load power and calculate power and l2norm sequence sums
mas01cr@438 666 init_track_aux_data(pp.trackID, data_buffer, &dbpointers.l2norm_data, &dbpointers.l2norm, &dbpointers.power_data, &dbpointers.power);
mas01mc@292 667 }
mas01mc@324 668 }
mas01mc@324 669 else{
mas01mc@324 670 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables
mas01cr@450 671 trackOffset = (*adb->track_offsets)[pp.trackID];
mas01cr@450 672 trackIndexOffset = trackOffset/(dbH->dim * sizeof(double)); // num vectors offset
mas01mc@324 673 }
mas01mc@324 674 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point
mas01mc@324 675 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table
mas01mc@324 676 // Test power thresholds before computing distance
mas01cr@438 677 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) &&
mas01cr@451 678 ( qPos<qpointers->nvectors-sequence_length+1 && pp.spos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){
mas01mc@324 679 // Non-large ADB track data is loaded inside power test for efficiency
mas01mc@324 680 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){
mas01mc@324 681 // On currentTrack change, allocate and load track data
mas01mc@324 682 currentTrack=pp.trackID;
mas01cr@450 683 lseek(dbfid, dbH->dataOffset + trackOffset, SEEK_SET);
mas01cr@433 684 if(audiodb_read_data(adb, dbfid, currentTrack, &data_buffer, &data_buffer_size))
mas01cr@433 685 error("failed to read data");
mas01mc@324 686 }
mas01mc@324 687 // Compute distance
mas01cr@436 688 dist = audiodb_dot_product(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequence_length);
mas01cr@437 689 double qn = qpointers->l2norm[qPos];
mas01cr@438 690 double sn = dbpointers.l2norm[sPos];
mas01cr@435 691 switch(spec->params.distance) {
mas01cr@431 692 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01mc@324 693 dist = 2 - (2/(qn*sn))*dist;
mas01cr@431 694 break;
mas01cr@431 695 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@431 696 dist = qn*qn + sn*sn - 2*dist;
mas01cr@431 697 break;
mas01cr@431 698 }
mas01cr@424 699 if((!radius) || dist <= (O2_LSH_EXACT_MULT*radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@424 700 adb_result_t r;
mas01cr@453 701 r.key = (*adb->keys)[pp.trackID].c_str();
mas01cr@424 702 r.dist = dist;
mas01cr@424 703 r.qpos = pp.qpos;
mas01cr@424 704 r.ipos = pp.spos;
mas01cr@424 705 accumulator->add_point(&r);
mas01cr@424 706 }
mas01mc@292 707 }
mas01mc@292 708 exact_evaluation_queue->pop();
mas01mc@292 709 }
mas01mc@315 710 // Cleanup
mas01cr@438 711 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
mas01cr@438 712 SAFE_DELETE_ARRAY(dbpointers.power_data);
mas01cr@438 713 SAFE_DELETE_ARRAY(dbpointers.mean_duration);
mas01mc@292 714 }
mas01mc@292 715
mas01cr@453 716 int audioDB::query_loop(adb_t *adb, adb_query_spec_t *spec) {
mas01cr@239 717
mas01cr@239 718 double *query, *query_data;
mas01cr@438 719 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0};
mas01cr@437 720
mas01cr@437 721 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@239 722
mas01cr@453 723 std::set<std::string> keys;
mas01cr@453 724 if(spec->refine.flags & ADB_REFINE_INCLUDE_KEYLIST) {
mas01cr@453 725 for(unsigned int k = 0; k < spec->refine.include.nkeys; k++) {
mas01cr@453 726 keys.insert(spec->refine.include.keys[k]);
mas01cr@453 727 }
mas01cr@453 728 } else {
mas01cr@453 729 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@453 730 keys.insert((*adb->keys)[k]);
mas01cr@453 731 }
mas01cr@453 732 }
mas01cr@453 733 if(spec->refine.flags & ADB_REFINE_EXCLUDE_KEYLIST) {
mas01cr@453 734 for(unsigned int k = 0; k < spec->refine.exclude.nkeys; k++) {
mas01cr@453 735 keys.erase(spec->refine.exclude.keys[k]);
mas01cr@453 736 }
mas01cr@453 737 }
mas01cr@453 738
mas01cr@452 739 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
mas01cr@452 740 /* FIXME: actually it would be nice to support this mode of
mas01cr@452 741 * operation, but for now... */
mas01cr@452 742 return 1;
mas01cr@452 743 }
mas01mc@324 744
mas01cr@444 745 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
mas01cr@452 746 return 1;
mas01cr@444 747 }
mas01cr@239 748
mas01cr@448 749 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) {
mas01cr@452 750 return 1;
mas01cr@434 751 }
mas01cr@239 752
mas01cr@451 753 unsigned j,k,track,trackOffset=0, HOP_SIZE = spec->refine.hopsize;
mas01cr@435 754 unsigned wL = spec->qid.sequence_length;
mas01cr@239 755 double **D = 0; // Differences query and target
mas01cr@239 756 double **DD = 0; // Matched filter distance
mas01cr@239 757
mas01cr@437 758 D = new double*[qpointers.nvectors]; // pre-allocate
mas01cr@437 759 DD = new double*[qpointers.nvectors];
mas01cr@239 760
mas01cr@239 761 off_t trackIndexOffset;
mas01cr@239 762
mas01cr@239 763 // Track loop
mas01cr@239 764 size_t data_buffer_size = 0;
mas01cr@239 765 double *data_buffer = 0;
mas01cr@451 766 lseek(adb->fd, adb->header->dataOffset, SEEK_SET);
mas01cr@239 767
mas01cr@453 768 for(track = 0; track < adb->header->numFiles; track++) {
mas01cr@453 769 unsigned t = track;
mas01cr@239 770
mas01cr@453 771 while (keys.find((*adb->keys)[track]) == keys.end()) {
mas01cr@453 772 track++;
mas01cr@453 773 if(track == adb->header->numFiles) {
mas01cr@453 774 goto loop_finish;
mas01cr@239 775 }
mas01cr@239 776 }
mas01cr@453 777 trackOffset = (*adb->track_offsets)[track];
mas01cr@453 778 if(track != t) {
mas01cr@453 779 lseek(adb->fd, adb->header->dataOffset + trackOffset, SEEK_SET);
mas01mc@292 780 }
mas01cr@451 781 trackIndexOffset = trackOffset / (adb->header->dim * sizeof(double)); // dbpointers.nvectors offset
mas01cr@239 782
mas01cr@452 783 if(audiodb_read_data(adb, adb->fd, track, &data_buffer, &data_buffer_size)) {
mas01cr@452 784 return 1;
mas01cr@452 785 }
mas01cr@451 786 if(wL <= (*adb->track_lengths)[track]) { // test for short sequences
mas01cr@239 787
mas01cr@439 788 audiodb_initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD);
mas01cr@239 789
mas01cr@437 790 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) ||
mas01cr@438 791 fabs(dbpointers.mean_duration[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) {
mas01cr@239 792
mas01cr@239 793 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@437 794 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) {
mas01cr@451 795 for(k = 0; k <= (*adb->track_lengths)[track] - wL; k += HOP_SIZE) {
mas01cr@431 796 double thisDist = 0;
mas01cr@438 797 double qn = qpointers.l2norm[j];
mas01cr@438 798 double sn = dbpointers.l2norm[trackIndexOffset + k];
mas01cr@435 799 switch(spec->params.distance) {
mas01cr@431 800 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@438 801 thisDist = 2-(2/(qn*sn))*DD[j][k];
mas01cr@431 802 break;
mas01cr@431 803 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@438 804 thisDist = qn*qn + sn*sn - 2*DD[j][k];
mas01cr@431 805 break;
mas01cr@431 806 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@431 807 thisDist = DD[j][k];
mas01cr@431 808 break;
mas01cr@431 809 }
mas01cr@239 810 // Power test
mas01cr@438 811 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], dbpointers.power[trackIndexOffset + k])) {
mas01cr@239 812 // radius test
mas01cr@435 813 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@435 814 thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@423 815 adb_result_t r;
mas01cr@453 816 r.key = (*adb->keys)[track].c_str();
mas01cr@423 817 r.dist = thisDist;
mas01cr@451 818 if(spec->qid.flags & ADB_QUERY_ID_FLAG_EXHAUSTIVE) {
mas01cr@451 819 r.qpos = j;
mas01cr@451 820 } else {
mas01cr@451 821 r.qpos = spec->qid.sequence_start;
mas01cr@451 822 }
mas01cr@423 823 r.ipos = k;
mas01cr@423 824 accumulator->add_point(&r);
mas01cr@239 825 }
mas01cr@239 826 }
mas01cr@239 827 }
mas01cr@239 828 }
mas01cr@239 829 } // Duration match
mas01cr@437 830 audiodb_delete_arrays(track, qpointers.nvectors, D, DD);
mas01cr@239 831 }
mas01cr@239 832 }
mas01cr@239 833
mas01cr@453 834 loop_finish:
mas01cr@453 835
mas01cr@239 836 free(data_buffer);
mas01cr@239 837
mas01cr@239 838 // Clean up
mas01cr@239 839 if(query_data)
mas01cr@239 840 delete[] query_data;
mas01cr@437 841 if(qpointers.l2norm_data)
mas01cr@437 842 delete[] qpointers.l2norm_data;
mas01cr@437 843 if(qpointers.power_data)
mas01cr@437 844 delete[] qpointers.power_data;
mas01cr@437 845 if(qpointers.mean_duration)
mas01cr@437 846 delete[] qpointers.mean_duration;
mas01cr@438 847 if(dbpointers.power_data)
mas01cr@438 848 delete[] dbpointers.power_data;
mas01cr@438 849 if(dbpointers.l2norm_data)
mas01cr@438 850 delete[] dbpointers.l2norm_data;
mas01cr@239 851 if(D)
mas01cr@239 852 delete[] D;
mas01cr@239 853 if(DD)
mas01cr@239 854 delete[] DD;
mas01cr@438 855 if(dbpointers.mean_duration)
mas01cr@438 856 delete[] dbpointers.mean_duration;
mas01cr@452 857
mas01cr@452 858 return 0;
mas01cr@239 859 }