annotate query.cpp @ 459:fcc6f7c4856b api-inversion

No more global shingle vector of vectors. Convert audioDB::index_initialize_shingles and audioDB::index_norm_shingles to plain old functions. In doing so, the latter in particular acquires a silly argument list; we need that complexity for now because it's called both from audioDB::query (which we're currently inverting) and from audioDB::index (which is out of scope for now). The loss of the global vv thing made me check up on memory discipline [hence the new API function audiodb_query_free_results() as well as the internal audiodb_index_delete_shingles()]. It's not too bad, but there are plenty of leaks for those with time to do AUDIODB="valgrind --leak-check=full ../../audioDB" sh ./run-test.sh on their favourite test case. For example, the Radius reporters leak one triple per hit. (Honestly, C++ memory management is teh suck.)
author mas01cr
date Sun, 28 Dec 2008 22:43:50 +0000
parents 913a95f06998
children 17003dff8127
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@239 2 #include "reporter.h"
mas01cr@239 3
mas01cr@422 4 #include "audioDB-internals.h"
mas01cr@422 5 #include "accumulators.h"
mas01cr@422 6
mas01cr@444 7 bool audiodb_powers_acceptable(adb_query_refine_t *r, double p1, double p2) {
mas01cr@425 8 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) {
mas01cr@425 9 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) {
mas01cr@239 10 return false;
mas01cr@239 11 }
mas01cr@239 12 }
mas01cr@425 13 if (r->flags & ADB_REFINE_RELATIVE_THRESHOLD) {
mas01cr@425 14 if (fabs(p1-p2) > fabs(r->relative_threshold)) {
mas01cr@239 15 return false;
mas01cr@239 16 }
mas01cr@239 17 }
mas01cr@239 18 return true;
mas01cr@239 19 }
mas01cr@239 20
mas01cr@239 21 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
mas01cr@425 22
mas01cr@431 23 // init database tables and dbH first
mas01cr@431 24 if(query_from_key)
mas01cr@431 25 initTables(dbName);
mas01cr@431 26 else
mas01cr@431 27 initTables(dbName, inFile);
mas01cr@431 28
mas01cr@435 29 adb_query_spec_t qspec;
mas01cr@444 30 adb_datum_t datum = {0};
mas01cr@435 31
mas01cr@435 32 qspec.refine.flags = 0;
mas01cr@453 33 if(trackFile) {
mas01cr@453 34 qspec.refine.flags |= ADB_REFINE_INCLUDE_KEYLIST;
mas01cr@453 35 std::vector<const char *> v;
mas01cr@453 36 char *k = new char[MAXSTR];
mas01cr@453 37 trackFile->getline(k, MAXSTR);
mas01cr@453 38 while(!trackFile->eof()) {
mas01cr@453 39 v.push_back(k);
mas01cr@453 40 k = new char[MAXSTR];
mas01cr@453 41 trackFile->getline(k, MAXSTR);
mas01cr@453 42 }
mas01cr@453 43 delete [] k;
mas01cr@453 44 qspec.refine.include.nkeys = v.size();
mas01cr@453 45 qspec.refine.include.keys = new const char *[qspec.refine.include.nkeys];
mas01cr@453 46 for(unsigned int k = 0; k < qspec.refine.include.nkeys; k++) {
mas01cr@453 47 qspec.refine.include.keys[k] = v[k];
mas01cr@453 48 }
mas01cr@453 49 }
mas01cr@453 50 if(query_from_key) {
mas01cr@453 51 qspec.refine.flags |= ADB_REFINE_EXCLUDE_KEYLIST;
mas01cr@453 52 qspec.refine.exclude.nkeys = 1;
mas01cr@453 53 qspec.refine.exclude.keys = &key;
mas01cr@453 54 }
mas01cr@425 55 if(radius) {
mas01cr@435 56 qspec.refine.flags |= ADB_REFINE_RADIUS;
mas01cr@435 57 qspec.refine.radius = radius;
mas01cr@425 58 }
mas01cr@425 59 if(use_absolute_threshold) {
mas01cr@435 60 qspec.refine.flags |= ADB_REFINE_ABSOLUTE_THRESHOLD;
mas01cr@435 61 qspec.refine.absolute_threshold = absolute_threshold;
mas01cr@425 62 }
mas01cr@425 63 if(use_relative_threshold) {
mas01cr@435 64 qspec.refine.flags |= ADB_REFINE_RELATIVE_THRESHOLD;
mas01cr@435 65 qspec.refine.relative_threshold = relative_threshold;
mas01cr@425 66 }
mas01cr@425 67 if(usingTimes) {
mas01cr@435 68 qspec.refine.flags |= ADB_REFINE_DURATION_RATIO;
mas01cr@435 69 qspec.refine.duration_ratio = timesTol;
mas01cr@425 70 }
mas01cr@439 71 /* FIXME: not sure about this any more; maybe it belongs in
mas01cr@439 72 query_id? Or maybe we just don't need a flag for it? */
mas01cr@439 73 qspec.refine.hopsize = sequenceHop;
mas01cr@425 74 if(sequenceHop != 1) {
mas01cr@435 75 qspec.refine.flags |= ADB_REFINE_HOP_SIZE;
mas01cr@425 76 }
mas01cr@425 77
mas01cr@444 78 if(query_from_key) {
mas01cr@444 79 datum.key = key;
mas01cr@444 80 } else {
mas01cr@444 81 int fd;
mas01cr@444 82 struct stat st;
mas01cr@444 83
mas01cr@444 84 /* FIXME: around here there are all sorts of hideous leaks. */
mas01cr@444 85 fd = open(inFile, O_RDONLY);
mas01cr@444 86 if(fd < 0) {
mas01cr@444 87 error("failed to open feature file", inFile);
mas01cr@444 88 }
mas01cr@444 89 fstat(fd, &st);
mas01cr@444 90 read(fd, &datum.dim, sizeof(uint32_t));
mas01cr@444 91 datum.nvectors = (st.st_size - sizeof(uint32_t)) / (datum.dim * sizeof(double));
mas01cr@444 92 datum.data = (double *) malloc(st.st_size - sizeof(uint32_t));
mas01cr@444 93 read(fd, datum.data, st.st_size - sizeof(uint32_t));
mas01cr@444 94 close(fd);
mas01cr@444 95 if(usingPower) {
mas01cr@444 96 uint32_t one;
mas01cr@444 97 fd = open(powerFileName, O_RDONLY);
mas01cr@444 98 if(fd < 0) {
mas01cr@444 99 error("failed to open power file", powerFileName);
mas01cr@444 100 }
mas01cr@444 101 read(fd, &one, sizeof(uint32_t));
mas01cr@444 102 if(one != 1) {
mas01cr@444 103 error("malformed power file dimensionality", powerFileName);
mas01cr@444 104 }
mas01cr@444 105 datum.power = (double *) malloc(datum.nvectors * sizeof(double));
mas01cr@444 106 if(read(fd, datum.power, datum.nvectors * sizeof(double)) != (ssize_t) (datum.nvectors * sizeof(double))) {
mas01cr@444 107 error("malformed power file", powerFileName);
mas01cr@444 108 }
mas01cr@444 109 close(fd);
mas01cr@444 110 }
mas01cr@444 111 if(usingTimes) {
mas01cr@444 112 datum.times = (double *) malloc(2 * datum.nvectors * sizeof(double));
mas01cr@444 113 insertTimeStamps(datum.nvectors, timesFile, datum.times);
mas01cr@444 114 }
mas01cr@444 115 }
mas01cr@444 116
mas01cr@444 117 qspec.qid.datum = &datum;
mas01cr@435 118 qspec.qid.sequence_length = sequenceLength;
mas01cr@435 119 qspec.qid.flags = usingQueryPoint ? 0 : ADB_QUERY_ID_FLAG_EXHAUSTIVE;
mas01cr@435 120 qspec.qid.sequence_start = queryPoint;
mas01cr@435 121
mas01cr@431 122 switch(queryType) {
mas01cr@431 123 case O2_POINT_QUERY:
mas01cr@435 124 qspec.qid.sequence_length = 1;
mas01cr@435 125 qspec.params.accumulation = ADB_ACCUMULATION_DB;
mas01cr@435 126 qspec.params.distance = ADB_DISTANCE_DOT_PRODUCT;
mas01cr@435 127 qspec.params.npoints = pointNN;
mas01cr@435 128 qspec.params.ntracks = 0;
mas01cr@431 129 reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN);
mas01cr@431 130 break;
mas01cr@431 131 case O2_TRACK_QUERY:
mas01cr@435 132 qspec.qid.sequence_length = 1;
mas01cr@435 133 qspec.params.accumulation = ADB_ACCUMULATION_PER_TRACK;
mas01cr@435 134 qspec.params.distance = ADB_DISTANCE_DOT_PRODUCT;
mas01cr@435 135 qspec.params.npoints = pointNN;
mas01cr@435 136 qspec.params.ntracks = trackNN;
mas01cr@431 137 reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@431 138 break;
mas01cr@431 139 case O2_SEQUENCE_QUERY:
mas01cr@431 140 case O2_N_SEQUENCE_QUERY:
mas01cr@435 141 qspec.params.accumulation = ADB_ACCUMULATION_PER_TRACK;
mas01cr@435 142 qspec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED;
mas01cr@435 143 qspec.params.npoints = pointNN;
mas01cr@435 144 qspec.params.ntracks = trackNN;
mas01cr@431 145 switch(queryType) {
mas01cr@431 146 case O2_SEQUENCE_QUERY:
mas01cr@435 147 if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) {
mas01cr@431 148 reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@435 149 } else if (index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)) {
mas01cr@435 150 char* indexName = index_get_name(adb->path, qspec.refine.radius, qspec.qid.sequence_length);
mas01cr@431 151 lsh = index_allocate(indexName, false);
mas01cr@458 152 reporter = new trackSequenceQueryRadReporter(trackNN, audiodb_index_to_track_id(lsh->get_maxp(), audiodb_lsh_n_point_bits(adb))+1);
mas01cr@431 153 delete[] indexName;
mas01cr@431 154 } else {
mas01cr@431 155 reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles);
mas01cr@431 156 }
mas01cr@431 157 break;
mas01cr@431 158 case O2_N_SEQUENCE_QUERY:
mas01cr@435 159 if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) {
mas01cr@431 160 reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@435 161 } else if (index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)){
mas01cr@435 162 char* indexName = index_get_name(adb->path, qspec.refine.radius, qspec.qid.sequence_length);
mas01cr@431 163 lsh = index_allocate(indexName, false);
mas01cr@458 164 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, audiodb_index_to_track_id(lsh->get_maxp(), audiodb_lsh_n_point_bits(adb))+1);
mas01cr@431 165 delete[] indexName;
mas01cr@431 166 } else {
mas01cr@431 167 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles);
mas01cr@431 168 }
mas01cr@431 169 break;
mas01cr@431 170 }
mas01cr@431 171 break;
mas01cr@431 172 case O2_ONE_TO_ONE_N_SEQUENCE_QUERY:
mas01cr@435 173 qspec.params.accumulation = ADB_ACCUMULATION_ONE_TO_ONE;
mas01cr@435 174 qspec.params.distance = ADB_DISTANCE_EUCLIDEAN_NORMED;
mas01cr@435 175 qspec.params.npoints = 0;
mas01cr@435 176 qspec.params.ntracks = 0;
mas01cr@431 177 break;
mas01cr@431 178 default:
mas01cr@431 179 error("unrecognized queryType");
mas01cr@431 180 }
mas01mc@292 181
mas01cr@458 182 /* Somewhere around here is where the implementation of
mas01cr@458 183 * audiodb_query_spec() starts. */
mas01cr@458 184
mas01cr@458 185 adb_qstate_internal_t qstate;
mas01cr@458 186 qstate.allowed_keys = new std::set<std::string>;
mas01cr@458 187 if(qspec.refine.flags & ADB_REFINE_INCLUDE_KEYLIST) {
mas01cr@458 188 for(unsigned int k = 0; k < qspec.refine.include.nkeys; k++) {
mas01cr@458 189 qstate.allowed_keys->insert(qspec.refine.include.keys[k]);
mas01cr@458 190 }
mas01cr@458 191 } else {
mas01cr@458 192 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@458 193 qstate.allowed_keys->insert((*adb->keys)[k]);
mas01cr@458 194 }
mas01cr@458 195 }
mas01cr@458 196 if(qspec.refine.flags & ADB_REFINE_EXCLUDE_KEYLIST) {
mas01cr@458 197 for(unsigned int k = 0; k < qspec.refine.exclude.nkeys; k++) {
mas01cr@458 198 qstate.allowed_keys->erase(qspec.refine.exclude.keys[k]);
mas01cr@458 199 }
mas01cr@458 200 }
mas01cr@431 201
mas01cr@435 202 switch(qspec.params.distance) {
mas01cr@431 203 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@435 204 switch(qspec.params.accumulation) {
mas01cr@431 205 case ADB_ACCUMULATION_DB:
mas01cr@458 206 qstate.accumulator = new DBAccumulator<adb_result_dist_gt>(qspec.params.npoints);
mas01cr@431 207 break;
mas01cr@431 208 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@458 209 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_gt>(qspec.params.npoints, qspec.params.ntracks);
mas01cr@431 210 break;
mas01cr@431 211 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@458 212 qstate.accumulator = new NearestAccumulator<adb_result_dist_gt>();
mas01cr@431 213 break;
mas01cr@431 214 default:
mas01cr@431 215 error("unknown accumulation");
mas01cr@239 216 }
mas01cr@239 217 break;
mas01cr@431 218 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@431 219 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@435 220 switch(qspec.params.accumulation) {
mas01cr@431 221 case ADB_ACCUMULATION_DB:
mas01cr@458 222 qstate.accumulator = new DBAccumulator<adb_result_dist_lt>(qspec.params.npoints);
mas01cr@431 223 break;
mas01cr@431 224 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@458 225 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_lt>(qspec.params.npoints, qspec.params.ntracks);
mas01cr@431 226 break;
mas01cr@431 227 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@458 228 qstate.accumulator = new NearestAccumulator<adb_result_dist_lt>();
mas01cr@431 229 break;
mas01cr@431 230 default:
mas01cr@431 231 error("unknown accumulation");
mas01mc@263 232 }
mas01mc@263 233 break;
mas01cr@239 234 default:
mas01cr@431 235 error("unknown distance function");
mas01cr@431 236 }
mas01cr@431 237
mas01mc@292 238 // Test for index (again) here
mas01cr@435 239 if((qspec.refine.flags & ADB_REFINE_RADIUS) && index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)){
mas01cr@436 240 VERB_LOG(1, "Calling indexed query on database %s, radius=%f, sequence_length=%d\n", adb->path, qspec.refine.radius, qspec.qid.sequence_length);
mas01cr@458 241 index_query_loop(adb, &qspec, &qstate);
mas01mc@329 242 }
mas01mc@329 243 else{
mas01mc@329 244 VERB_LOG(1, "Calling brute-force query on database %s\n", dbName);
mas01cr@458 245 if(query_loop(adb, &qspec, &qstate)) {
mas01cr@452 246 error("query_loop failed");
mas01cr@452 247 }
mas01mc@329 248 }
mas01mc@292 249
mas01cr@458 250 adb_query_results_t *rs = qstate.accumulator->get_points();
mas01cr@458 251
mas01cr@458 252 delete qstate.accumulator;
mas01cr@458 253 delete qstate.allowed_keys;
mas01cr@458 254
mas01cr@458 255 /* End of audiodb_query_spec() function */
mas01cr@458 256
mas01cr@423 257 for(unsigned int k = 0; k < rs->nresults; k++) {
mas01cr@423 258 adb_result_t r = rs->results[k];
mas01cr@430 259 reporter->add_point(audiodb_key_index(adb, r.key), r.qpos, r.ipos, r.dist);
mas01cr@423 260 }
mas01cr@459 261 audiodb_query_free_results(adb, &qspec, rs);
mas01cr@423 262
mas01mc@292 263 reporter->report(fileTable, adbQueryResponse);
mas01cr@239 264 }
mas01cr@239 265
mas01cr@459 266 int audiodb_query_free_results(adb_t *adb, adb_query_spec_t *spec, adb_query_results_t *rs) {
mas01cr@459 267 free(rs->results);
mas01cr@459 268 free(rs);
mas01cr@459 269 return 0;
mas01cr@459 270 }
mas01cr@459 271
mas01cr@439 272 static void audiodb_initialize_arrays(adb_t *adb, adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
mas01cr@239 273 unsigned int j, k, l, w;
mas01cr@239 274 double *dp, *qp, *sp;
mas01cr@239 275
mas01cr@439 276 const unsigned HOP_SIZE = spec->refine.hopsize;
mas01cr@435 277 const unsigned wL = spec->qid.sequence_length;
mas01cr@239 278
mas01cr@239 279 for(j = 0; j < numVectors; j++) {
mas01cr@239 280 // Sum products matrix
mas01cr@433 281 D[j] = new double[(*adb->track_lengths)[track]];
mas01cr@239 282 assert(D[j]);
mas01cr@239 283 // Matched filter matrix
mas01cr@433 284 DD[j]=new double[(*adb->track_lengths)[track]];
mas01cr@239 285 assert(DD[j]);
mas01cr@239 286 }
mas01cr@239 287
mas01cr@239 288 // Dot product
mas01cr@239 289 for(j = 0; j < numVectors; j++)
mas01cr@433 290 for(k = 0; k < (*adb->track_lengths)[track]; k++){
mas01cr@439 291 qp = query + j * adb->header->dim;
mas01cr@439 292 sp = data_buffer + k * adb->header->dim;
mas01cr@239 293 DD[j][k] = 0.0; // Initialize matched filter array
mas01cr@239 294 dp = &D[j][k]; // point to correlation cell j,k
mas01cr@239 295 *dp = 0.0; // initialize correlation cell
mas01cr@439 296 l = adb->header->dim; // size of vectors
mas01cr@239 297 while(l--)
mas01cr@239 298 *dp += *qp++ * *sp++;
mas01cr@239 299 }
mas01cr@239 300
mas01cr@239 301 // Matched Filter
mas01cr@239 302 // HOP SIZE == 1
mas01cr@239 303 double* spd;
mas01cr@239 304 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop
mas01cr@239 305 for(w = 0; w < wL; w++) {
mas01cr@239 306 for(j = 0; j < numVectors - w; j++) {
mas01cr@239 307 sp = DD[j];
mas01cr@239 308 spd = D[j+w] + w;
mas01cr@433 309 k = (*adb->track_lengths)[track] - w;
mas01mc@292 310 while(k--)
mas01mc@292 311 *sp++ += *spd++;
mas01cr@239 312 }
mas01cr@239 313 }
mas01cr@239 314 } else { // HOP_SIZE != 1
mas01cr@239 315 for(w = 0; w < wL; w++) {
mas01cr@239 316 for(j = 0; j < numVectors - w; j += HOP_SIZE) {
mas01cr@239 317 sp = DD[j];
mas01cr@239 318 spd = D[j+w]+w;
mas01cr@433 319 for(k = 0; k < (*adb->track_lengths)[track] - w; k += HOP_SIZE) {
mas01cr@239 320 *sp += *spd;
mas01cr@239 321 sp += HOP_SIZE;
mas01cr@239 322 spd += HOP_SIZE;
mas01cr@239 323 }
mas01cr@239 324 }
mas01cr@239 325 }
mas01cr@239 326 }
mas01cr@239 327 }
mas01cr@239 328
mas01cr@433 329 static void audiodb_delete_arrays(int track, unsigned int numVectors, double **D, double **DD) {
mas01cr@239 330 if(D != NULL) {
mas01cr@239 331 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 332 delete[] D[j];
mas01cr@239 333 }
mas01cr@239 334 }
mas01cr@239 335 if(DD != NULL) {
mas01cr@239 336 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 337 delete[] DD[j];
mas01cr@239 338 }
mas01cr@239 339 }
mas01cr@239 340 }
mas01cr@239 341
mas01cr@433 342 int audiodb_read_data(adb_t *adb, int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) {
mas01cr@433 343 uint32_t track_length = (*adb->track_lengths)[track];
mas01cr@433 344 size_t track_size = track_length * sizeof(double) * adb->header->dim;
mas01cr@433 345 if (track_size > *data_buffer_size_p) {
mas01cr@239 346 if(*data_buffer_p) {
mas01cr@239 347 free(*data_buffer_p);
mas01cr@239 348 }
mas01cr@239 349 {
mas01cr@433 350 *data_buffer_size_p = track_size;
mas01cr@433 351 void *tmp = malloc(track_size);
mas01cr@239 352 if (tmp == NULL) {
mas01cr@433 353 goto error;
mas01cr@239 354 }
mas01cr@239 355 *data_buffer_p = (double *) tmp;
mas01cr@239 356 }
mas01cr@239 357 }
mas01cr@239 358
mas01cr@433 359 read_or_goto_error(trkfid, *data_buffer_p, track_size);
mas01cr@433 360 return 0;
mas01cr@433 361
mas01cr@433 362 error:
mas01cr@433 363 return 1;
mas01cr@239 364 }
mas01cr@239 365
mas01cr@405 366 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
mas01cr@405 367 assert(usingTimes);
mas01cr@405 368
mas01cr@405 369 unsigned numtimes = 0;
mas01cr@405 370
mas01cr@405 371 if(!timesFile->is_open()) {
mas01cr@405 372 error("problem opening times file on timestamped database", timesFileName);
mas01cr@405 373 }
mas01cr@405 374
mas01cr@405 375 double timepoint, next;
mas01cr@405 376 *timesFile >> timepoint;
mas01cr@405 377 if (timesFile->eof()) {
mas01cr@405 378 error("no entries in times file", timesFileName);
mas01cr@405 379 }
mas01cr@405 380 numtimes++;
mas01cr@405 381 do {
mas01cr@405 382 *timesFile >> next;
mas01cr@405 383 if (timesFile->eof()) {
mas01cr@405 384 break;
mas01cr@405 385 }
mas01cr@405 386 numtimes++;
mas01cr@405 387 timesdata[0] = timepoint;
mas01cr@405 388 timepoint = (timesdata[1] = next);
mas01cr@405 389 timesdata += 2;
mas01cr@405 390 } while (numtimes < numVectors + 1);
mas01cr@405 391
mas01cr@405 392 if (numtimes < numVectors + 1) {
mas01cr@405 393 error("too few timepoints in times file", timesFileName);
mas01cr@405 394 }
mas01cr@405 395
mas01cr@405 396 *timesFile >> next;
mas01cr@405 397 if (!timesFile->eof()) {
mas01cr@405 398 error("too many timepoints in times file", timesFileName);
mas01cr@405 399 }
mas01cr@405 400 }
mas01cr@405 401
mas01cr@444 402 int audiodb_query_spec_qpointers(adb_t *adb, adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
mas01cr@443 403 adb_datum_t *datum;
mas01cr@443 404 adb_datum_t d = {0};
mas01cr@443 405 uint32_t nvectors;
mas01cr@443 406 uint32_t sequence_length;
mas01cr@443 407 uint32_t sequence_start;
mas01cr@443 408
mas01cr@443 409 datum = spec->qid.datum;
mas01cr@443 410 sequence_length = spec->qid.sequence_length;
mas01cr@443 411 sequence_start = spec->qid.sequence_start;
mas01cr@443 412
mas01cr@443 413 if(datum->data) {
mas01cr@443 414 if(datum->dim != adb->header->dim) {
mas01cr@443 415 return 1;
mas01cr@443 416 }
mas01cr@443 417 /* initialize d, and mark that nothing needs freeing later. */
mas01cr@443 418 d = *datum;
mas01cr@443 419 datum = &d;
mas01cr@443 420 } else if (datum->key) {
mas01cr@449 421 uint32_t track_id;
mas01cr@449 422 if((track_id = audiodb_key_index(adb, datum->key)) == (uint32_t) -1) {
mas01cr@443 423 return 1;
mas01cr@443 424 }
mas01cr@443 425 off_t track_offset = (*adb->track_offsets)[track_id];
mas01cr@443 426
mas01cr@443 427 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
mas01cr@443 428 /* create a reference/insert, then use adb_insert_create_datum() */
mas01cr@443 429 adb_reference_t reference = {0};
mas01cr@443 430 char features[MAXSTR], power[MAXSTR], times[MAXSTR];
mas01cr@443 431 lseek(adb->fd, adb->header->dataOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@443 432 /* FIXME: learn not to worry and love the bomb^Wbuffer overflow */
mas01cr@443 433 read(adb->fd, features, MAXSTR);
mas01cr@443 434 reference.features = features;
mas01cr@443 435 if(adb->header->flags & O2_FLAG_POWER) {
mas01cr@443 436 lseek(adb->fd, adb->header->powerTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@443 437 read(adb->fd, power, MAXSTR);
mas01cr@443 438 reference.power = power;
mas01cr@443 439 }
mas01cr@443 440 if(adb->header->flags & O2_FLAG_TIMES) {
mas01cr@443 441 lseek(adb->fd, adb->header->timesTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@443 442 read(adb->fd, times, MAXSTR);
mas01cr@443 443 reference.times = times;
mas01cr@443 444 }
mas01cr@443 445 audiodb_insert_create_datum(&reference, &d);
mas01cr@443 446 } else {
mas01cr@443 447 /* initialize from sources of data that we already have */
mas01cr@443 448 d.nvectors = (*adb->track_lengths)[track_id];
mas01cr@443 449 d.dim = adb->header->dim;
mas01cr@443 450 d.key = datum->key;
mas01cr@443 451 /* read out stuff from the database tables */
mas01cr@443 452 d.data = (double *) malloc(d.nvectors * d.dim * sizeof(double));
mas01cr@443 453 lseek(adb->fd, adb->header->dataOffset + track_offset, SEEK_SET);
mas01cr@443 454 read(adb->fd, d.data, d.nvectors * d.dim * sizeof(double));
mas01cr@443 455 if(adb->header->flags & O2_FLAG_POWER) {
mas01cr@443 456 d.power = (double *) malloc(d.nvectors * sizeof(double));
mas01cr@443 457 lseek(adb->fd, adb->header->powerTableOffset + track_offset / d.dim, SEEK_SET);
mas01cr@443 458 read(adb->fd, d.power, d.nvectors * sizeof(double));
mas01cr@443 459 }
mas01cr@443 460 if(adb->header->flags & O2_FLAG_TIMES) {
mas01cr@443 461 d.times = (double *) malloc(2 * d.nvectors * sizeof(double));
mas01cr@443 462 lseek(adb->fd, adb->header->timesTableOffset + track_offset / d.dim, SEEK_SET);
mas01cr@443 463 read(adb->fd, d.times, 2 * d.nvectors * sizeof(double));
mas01cr@443 464 }
mas01cr@443 465 }
mas01cr@443 466 } else {
mas01cr@443 467 return 1;
mas01cr@443 468 }
mas01cr@443 469
mas01cr@443 470 /* Now we have a full(ish) datum, compute all the qpointery stuff
mas01cr@443 471 that we care about (l2norm/power/mean duration). (This bit could
mas01cr@443 472 conceivably become a new function) */
mas01cr@443 473 nvectors = d.nvectors;
mas01cr@444 474 /* FIXME: check the overflow logic here */
mas01cr@444 475 if(sequence_start + sequence_length > nvectors) {
mas01cr@443 476 /* is there something to free? goto error */
mas01cr@443 477 return 1;
mas01cr@443 478 }
mas01cr@443 479
mas01cr@443 480 qpointers->nvectors = nvectors;
mas01cr@443 481
mas01cr@443 482 size_t vector_size = nvectors * sizeof(double) * d.dim;
mas01cr@445 483 *vector_data = new double[vector_size];
mas01cr@443 484 memcpy(*vector_data, d.data, vector_size);
mas01cr@443 485
mas01cr@445 486 qpointers->l2norm_data = new double[vector_size / d.dim];
mas01cr@443 487 audiodb_l2norm_buffer(*vector_data, d.dim, nvectors, qpointers->l2norm_data);
mas01cr@443 488 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@443 489 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@443 490
mas01cr@443 491 if(d.power) {
mas01cr@445 492 qpointers->power_data = new double[vector_size / d.dim];
mas01cr@443 493 memcpy(qpointers->power_data, d.power, vector_size / d.dim);
mas01cr@443 494 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
mas01cr@443 495 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
mas01cr@443 496 }
mas01cr@443 497
mas01cr@443 498 if(d.times) {
mas01cr@445 499 qpointers->mean_duration = new double[1];
mas01cr@445 500 *qpointers->mean_duration = 0;
mas01cr@443 501 for(unsigned int k = 0; k < nvectors; k++) {
mas01cr@443 502 *qpointers->mean_duration += d.times[2*k+1] - d.times[2*k];
mas01cr@443 503 }
mas01cr@443 504 *qpointers->mean_duration /= nvectors;
mas01cr@443 505 }
mas01cr@443 506
mas01cr@443 507
mas01cr@443 508 /* Finally, set up the moving qpointers. */
mas01cr@443 509 if(spec->qid.flags & ADB_QUERY_ID_FLAG_EXHAUSTIVE) {
mas01cr@443 510 *vector = *vector_data;
mas01cr@443 511 qpointers->l2norm = qpointers->l2norm_data;
mas01cr@443 512 qpointers->power = qpointers->power_data;
mas01cr@443 513 } else {
mas01cr@443 514 *vector = *vector_data + spec->qid.sequence_start * d.dim;
mas01cr@443 515 qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start;
mas01cr@444 516 if(d.power) {
mas01cr@444 517 qpointers->power = qpointers->power_data + spec->qid.sequence_start;
mas01cr@444 518 }
mas01cr@444 519 /* FIXME: this is a little bit ugly. No, a lot ugly. But at the
mas01cr@444 520 * moment this is how query_loop() knows when to stop, so for
mas01cr@444 521 * now... */
mas01cr@444 522 qpointers->nvectors = sequence_length;
mas01cr@443 523 }
mas01cr@443 524
mas01cr@443 525 /* Clean up: free any bits of datum that we have ourselves
mas01cr@443 526 * allocated. */
mas01cr@443 527 if(datum != &d) {
mas01cr@443 528 audiodb_free_datum(&d);
mas01cr@443 529 }
mas01cr@444 530
mas01cr@444 531 return 0;
mas01cr@443 532 }
mas01cr@443 533
mas01cr@448 534 static int audiodb_set_up_dbpointers(adb_t *adb, adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) {
mas01cr@438 535 uint32_t nvectors = adb->header->length / (adb->header->dim * sizeof(double));
mas01cr@435 536 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@438 537
mas01cr@437 538 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@437 539 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO;
mas01cr@437 540 double *times_table = NULL;
mas01cr@435 541
mas01cr@239 542
mas01cr@438 543 dbpointers->nvectors = nvectors;
mas01cr@438 544 dbpointers->l2norm_data = new double[nvectors];
mas01cr@438 545
mas01cr@438 546 double *snpp = dbpointers->l2norm_data, *sppp = 0;
mas01cr@434 547 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET);
mas01cr@438 548 read_or_goto_error(adb->fd, dbpointers->l2norm_data, nvectors * sizeof(double));
mas01cr@239 549
mas01cr@437 550 if (using_power) {
mas01cr@434 551 if (!(adb->header->flags & O2_FLAG_POWER)) {
mas01cr@434 552 goto error;
mas01cr@239 553 }
mas01cr@438 554 dbpointers->power_data = new double[nvectors];
mas01cr@438 555 sppp = dbpointers->power_data;
mas01cr@434 556 lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET);
mas01cr@438 557 read_or_goto_error(adb->fd, dbpointers->power_data, nvectors * sizeof(double));
mas01cr@239 558 }
mas01cr@239 559
mas01cr@434 560 for(unsigned int i = 0; i < adb->header->numFiles; i++){
mas01cr@434 561 size_t track_length = (*adb->track_lengths)[i];
mas01cr@435 562 if(track_length >= sequence_length) {
mas01cr@435 563 audiodb_sequence_sum(snpp, track_length, sequence_length);
mas01cr@435 564 audiodb_sequence_sqrt(snpp, track_length, sequence_length);
mas01cr@437 565 if (using_power) {
mas01cr@435 566 audiodb_sequence_sum(sppp, track_length, sequence_length);
mas01cr@435 567 audiodb_sequence_average(sppp, track_length, sequence_length);
mas01cr@239 568 }
mas01cr@239 569 }
mas01cr@434 570 snpp += track_length;
mas01cr@437 571 if (using_power) {
mas01cr@434 572 sppp += track_length;
mas01cr@239 573 }
mas01cr@239 574 }
mas01cr@239 575
mas01cr@437 576 if (using_times) {
mas01cr@434 577 if(!(adb->header->flags & O2_FLAG_TIMES)) {
mas01cr@437 578 goto error;
mas01cr@239 579 }
mas01cr@239 580
mas01cr@438 581 dbpointers->mean_duration = new double[adb->header->numFiles];
mas01cr@239 582
mas01cr@438 583 times_table = (double *) malloc(2 * nvectors * sizeof(double));
mas01cr@437 584 if(!times_table) {
mas01cr@437 585 goto error;
mas01cr@437 586 }
mas01cr@437 587 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET);
mas01cr@438 588 read_or_goto_error(adb->fd, times_table, 2 * nvectors * sizeof(double));
mas01cr@434 589 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@434 590 size_t track_length = (*adb->track_lengths)[k];
mas01cr@239 591 unsigned int j;
mas01cr@438 592 dbpointers->mean_duration[k] = 0.0;
mas01cr@434 593 for(j = 0; j < track_length; j++) {
mas01cr@438 594 dbpointers->mean_duration[k] += times_table[2*j+1] - times_table[2*j];
mas01cr@239 595 }
mas01cr@438 596 dbpointers->mean_duration[k] /= j;
mas01cr@239 597 }
mas01cr@437 598
mas01cr@437 599 free(times_table);
mas01cr@437 600 times_table = NULL;
mas01cr@239 601 }
mas01cr@239 602
mas01cr@438 603 dbpointers->l2norm = dbpointers->l2norm_data;
mas01cr@438 604 dbpointers->power = dbpointers->power_data;
mas01cr@434 605 return 0;
mas01cr@434 606
mas01cr@434 607 error:
mas01cr@438 608 if(dbpointers->l2norm_data) {
mas01cr@438 609 delete [] dbpointers->l2norm_data;
mas01cr@434 610 }
mas01cr@438 611 if(dbpointers->power_data) {
mas01cr@438 612 delete [] dbpointers->power_data;
mas01cr@434 613 }
mas01cr@438 614 if(dbpointers->mean_duration) {
mas01cr@438 615 delete [] dbpointers->mean_duration;
mas01cr@434 616 }
mas01cr@437 617 if(times_table) {
mas01cr@437 618 free(times_table);
mas01cr@437 619 }
mas01cr@434 620 return 1;
mas01cr@434 621
mas01cr@239 622 }
mas01cr@239 623
mas01mc@292 624 // query_points()
mas01mc@292 625 //
mas01mc@292 626 // using PointPairs held in the exact_evaluation_queue compute squared distance for each PointPair
mas01mc@292 627 // and insert result into the current reporter.
mas01mc@292 628 //
mas01mc@292 629 // Preconditions:
mas01mc@292 630 // A query inFile has been opened with setup_query(...) and query pointers initialized
mas01mc@292 631 // The database contains some points
mas01mc@292 632 // An exact_evaluation_queue has been allocated and populated
mas01mc@292 633 // A reporter has been allocated
mas01mc@292 634 //
mas01mc@292 635 // Postconditions:
mas01mc@292 636 // reporter contains the points and distances that meet the reporter constraints
mas01mc@292 637
mas01cr@458 638 void audioDB::query_loop_points(adb_t *adb, adb_query_spec_t *spec, adb_qstate_internal_t *qstate, double *query, adb_qpointers_internal_t *qpointers) {
mas01cr@438 639 adb_qpointers_internal_t dbpointers = {0};
mas01mc@292 640
mas01cr@436 641 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@437 642 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@436 643
mas01cr@458 644 if(qstate->exact_evaluation_queue->size() == 0) {
mas01mc@292 645 return;
mas01cr@455 646 }
mas01mc@292 647
mas01cr@455 648 // Compute database info. FIXME: we more than likely don't need
mas01cr@455 649 // very much of the database so write a new function to build these
mas01cr@455 650 // values per-track or, even better, per-point
mas01cr@455 651 if(!(adb->header->flags & O2_FLAG_LARGE_ADB)) {
mas01cr@448 652 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) {
mas01cr@455 653 error("failed to set up dbpointers");
mas01cr@434 654 }
mas01cr@455 655 }
mas01mc@292 656
mas01mc@292 657 // We are guaranteed that the order of points is sorted by:
mas01cr@455 658 // {trackID, spos, qpos} so we can be relatively efficient in
mas01cr@455 659 // initialization of track data. We assume that points usually
mas01cr@455 660 // don't overlap, so we will use exhaustive dot product evaluation
mas01cr@455 661 // instead of memoization of partial sums which is used for
mas01cr@455 662 // exhaustive brute-force evaluation from smaller databases in
mas01cr@455 663 // query_loop()
mas01mc@292 664 double dist;
mas01mc@292 665 size_t data_buffer_size = 0;
mas01mc@292 666 double *data_buffer = 0;
mas01mc@324 667 Uns32T trackOffset = 0;
mas01mc@324 668 Uns32T trackIndexOffset = 0;
mas01mc@292 669 Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range
mas01cr@458 670 Uns32T npairs = qstate->exact_evaluation_queue->size();
mas01mc@292 671 while(npairs--){
mas01cr@458 672 PointPair pp = qstate->exact_evaluation_queue->top();
mas01mc@324 673 // Large ADB track data must be loaded here for sPower
mas01cr@455 674 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
mas01mc@324 675 trackOffset=0;
mas01mc@324 676 trackIndexOffset=0;
mas01mc@292 677 if(currentTrack!=pp.trackID){
mas01mc@324 678 char* prefixedString = new char[O2_MAXFILESTR];
mas01mc@324 679 char* tmpStr = prefixedString;
mas01mc@324 680 // On currentTrack change, allocate and load track data
mas01mc@292 681 currentTrack=pp.trackID;
mas01cr@438 682 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
mas01cr@438 683 SAFE_DELETE_ARRAY(dbpointers.power_data);
mas01mc@324 684 if(infid>0)
mas01mc@324 685 close(infid);
mas01mc@324 686 // Open and check dimensions of feature file
mas01mc@324 687 strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
mas01mc@324 688 prefix_name((char ** const) &prefixedString, adb_feature_root);
mas01mc@324 689 if (prefixedString!=tmpStr)
mas01mc@324 690 delete[] tmpStr;
mas01cr@454 691 initInputFile(prefixedString);
mas01mc@324 692 // Load the feature vector data for current track into data_buffer
mas01cr@433 693 if(audiodb_read_data(adb, infid, pp.trackID, &data_buffer, &data_buffer_size))
mas01cr@433 694 error("failed to read data");
mas01mc@324 695 // Load power and calculate power and l2norm sequence sums
mas01cr@438 696 init_track_aux_data(pp.trackID, data_buffer, &dbpointers.l2norm_data, &dbpointers.l2norm, &dbpointers.power_data, &dbpointers.power);
mas01mc@292 697 }
mas01mc@324 698 }
mas01mc@324 699 else{
mas01mc@324 700 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables
mas01cr@450 701 trackOffset = (*adb->track_offsets)[pp.trackID];
mas01cr@455 702 trackIndexOffset = trackOffset/(adb->header->dim * sizeof(double)); // num vectors offset
mas01mc@324 703 }
mas01mc@324 704 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point
mas01mc@324 705 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table
mas01mc@324 706 // Test power thresholds before computing distance
mas01cr@438 707 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) &&
mas01cr@451 708 ( qPos<qpointers->nvectors-sequence_length+1 && pp.spos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){
mas01mc@324 709 // Non-large ADB track data is loaded inside power test for efficiency
mas01cr@455 710 if(!(adb->header->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){
mas01mc@324 711 // On currentTrack change, allocate and load track data
mas01mc@324 712 currentTrack=pp.trackID;
mas01cr@455 713 lseek(dbfid, adb->header->dataOffset + trackOffset, SEEK_SET);
mas01cr@433 714 if(audiodb_read_data(adb, dbfid, currentTrack, &data_buffer, &data_buffer_size))
mas01cr@433 715 error("failed to read data");
mas01mc@324 716 }
mas01mc@324 717 // Compute distance
mas01cr@455 718 dist = audiodb_dot_product(query + qPos*adb->header->dim, data_buffer + pp.spos*adb->header->dim, adb->header->dim*sequence_length);
mas01cr@437 719 double qn = qpointers->l2norm[qPos];
mas01cr@438 720 double sn = dbpointers.l2norm[sPos];
mas01cr@435 721 switch(spec->params.distance) {
mas01cr@431 722 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01mc@324 723 dist = 2 - (2/(qn*sn))*dist;
mas01cr@431 724 break;
mas01cr@431 725 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@431 726 dist = qn*qn + sn*sn - 2*dist;
mas01cr@431 727 break;
mas01cr@431 728 }
mas01cr@424 729 if((!radius) || dist <= (O2_LSH_EXACT_MULT*radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@424 730 adb_result_t r;
mas01cr@453 731 r.key = (*adb->keys)[pp.trackID].c_str();
mas01cr@424 732 r.dist = dist;
mas01cr@424 733 r.qpos = pp.qpos;
mas01cr@424 734 r.ipos = pp.spos;
mas01cr@458 735 qstate->accumulator->add_point(&r);
mas01cr@424 736 }
mas01mc@292 737 }
mas01cr@458 738 qstate->exact_evaluation_queue->pop();
mas01mc@292 739 }
mas01mc@315 740 // Cleanup
mas01cr@438 741 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
mas01cr@438 742 SAFE_DELETE_ARRAY(dbpointers.power_data);
mas01cr@438 743 SAFE_DELETE_ARRAY(dbpointers.mean_duration);
mas01cr@458 744 delete qstate->exact_evaluation_queue;
mas01mc@292 745 }
mas01mc@292 746
mas01cr@458 747 int audioDB::query_loop(adb_t *adb, adb_query_spec_t *spec, adb_qstate_internal_t *qstate) {
mas01cr@239 748
mas01cr@239 749 double *query, *query_data;
mas01cr@438 750 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0};
mas01cr@437 751
mas01cr@437 752 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@239 753
mas01cr@452 754 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
mas01cr@452 755 /* FIXME: actually it would be nice to support this mode of
mas01cr@452 756 * operation, but for now... */
mas01cr@452 757 return 1;
mas01cr@452 758 }
mas01mc@324 759
mas01cr@444 760 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
mas01cr@452 761 return 1;
mas01cr@444 762 }
mas01cr@239 763
mas01cr@448 764 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) {
mas01cr@452 765 return 1;
mas01cr@434 766 }
mas01cr@239 767
mas01cr@451 768 unsigned j,k,track,trackOffset=0, HOP_SIZE = spec->refine.hopsize;
mas01cr@435 769 unsigned wL = spec->qid.sequence_length;
mas01cr@239 770 double **D = 0; // Differences query and target
mas01cr@239 771 double **DD = 0; // Matched filter distance
mas01cr@239 772
mas01cr@437 773 D = new double*[qpointers.nvectors]; // pre-allocate
mas01cr@437 774 DD = new double*[qpointers.nvectors];
mas01cr@239 775
mas01cr@239 776 off_t trackIndexOffset;
mas01cr@239 777
mas01cr@239 778 // Track loop
mas01cr@239 779 size_t data_buffer_size = 0;
mas01cr@239 780 double *data_buffer = 0;
mas01cr@451 781 lseek(adb->fd, adb->header->dataOffset, SEEK_SET);
mas01cr@239 782
mas01cr@458 783 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
mas01cr@453 784 for(track = 0; track < adb->header->numFiles; track++) {
mas01cr@453 785 unsigned t = track;
mas01cr@458 786
mas01cr@458 787 while (qstate->allowed_keys->find((*adb->keys)[track]) == keys_end) {
mas01cr@453 788 track++;
mas01cr@453 789 if(track == adb->header->numFiles) {
mas01cr@453 790 goto loop_finish;
mas01cr@239 791 }
mas01cr@239 792 }
mas01cr@453 793 trackOffset = (*adb->track_offsets)[track];
mas01cr@453 794 if(track != t) {
mas01cr@453 795 lseek(adb->fd, adb->header->dataOffset + trackOffset, SEEK_SET);
mas01mc@292 796 }
mas01cr@451 797 trackIndexOffset = trackOffset / (adb->header->dim * sizeof(double)); // dbpointers.nvectors offset
mas01cr@239 798
mas01cr@452 799 if(audiodb_read_data(adb, adb->fd, track, &data_buffer, &data_buffer_size)) {
mas01cr@452 800 return 1;
mas01cr@452 801 }
mas01cr@451 802 if(wL <= (*adb->track_lengths)[track]) { // test for short sequences
mas01cr@239 803
mas01cr@439 804 audiodb_initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD);
mas01cr@239 805
mas01cr@437 806 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) ||
mas01cr@438 807 fabs(dbpointers.mean_duration[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) {
mas01cr@239 808
mas01cr@239 809 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@437 810 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) {
mas01cr@451 811 for(k = 0; k <= (*adb->track_lengths)[track] - wL; k += HOP_SIZE) {
mas01cr@431 812 double thisDist = 0;
mas01cr@438 813 double qn = qpointers.l2norm[j];
mas01cr@438 814 double sn = dbpointers.l2norm[trackIndexOffset + k];
mas01cr@435 815 switch(spec->params.distance) {
mas01cr@431 816 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@438 817 thisDist = 2-(2/(qn*sn))*DD[j][k];
mas01cr@431 818 break;
mas01cr@431 819 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@438 820 thisDist = qn*qn + sn*sn - 2*DD[j][k];
mas01cr@431 821 break;
mas01cr@431 822 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@431 823 thisDist = DD[j][k];
mas01cr@431 824 break;
mas01cr@431 825 }
mas01cr@239 826 // Power test
mas01cr@438 827 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], dbpointers.power[trackIndexOffset + k])) {
mas01cr@239 828 // radius test
mas01cr@435 829 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@435 830 thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@423 831 adb_result_t r;
mas01cr@453 832 r.key = (*adb->keys)[track].c_str();
mas01cr@423 833 r.dist = thisDist;
mas01cr@451 834 if(spec->qid.flags & ADB_QUERY_ID_FLAG_EXHAUSTIVE) {
mas01cr@451 835 r.qpos = j;
mas01cr@451 836 } else {
mas01cr@451 837 r.qpos = spec->qid.sequence_start;
mas01cr@451 838 }
mas01cr@423 839 r.ipos = k;
mas01cr@458 840 qstate->accumulator->add_point(&r);
mas01cr@239 841 }
mas01cr@239 842 }
mas01cr@239 843 }
mas01cr@239 844 }
mas01cr@239 845 } // Duration match
mas01cr@437 846 audiodb_delete_arrays(track, qpointers.nvectors, D, DD);
mas01cr@239 847 }
mas01cr@239 848 }
mas01cr@239 849
mas01cr@453 850 loop_finish:
mas01cr@453 851
mas01cr@239 852 free(data_buffer);
mas01cr@239 853
mas01cr@239 854 // Clean up
mas01cr@239 855 if(query_data)
mas01cr@239 856 delete[] query_data;
mas01cr@437 857 if(qpointers.l2norm_data)
mas01cr@437 858 delete[] qpointers.l2norm_data;
mas01cr@437 859 if(qpointers.power_data)
mas01cr@437 860 delete[] qpointers.power_data;
mas01cr@437 861 if(qpointers.mean_duration)
mas01cr@437 862 delete[] qpointers.mean_duration;
mas01cr@438 863 if(dbpointers.power_data)
mas01cr@438 864 delete[] dbpointers.power_data;
mas01cr@438 865 if(dbpointers.l2norm_data)
mas01cr@438 866 delete[] dbpointers.l2norm_data;
mas01cr@239 867 if(D)
mas01cr@239 868 delete[] D;
mas01cr@239 869 if(DD)
mas01cr@239 870 delete[] DD;
mas01cr@438 871 if(dbpointers.mean_duration)
mas01cr@438 872 delete[] dbpointers.mean_duration;
mas01cr@452 873
mas01cr@452 874 return 0;
mas01cr@239 875 }