annotate query.cpp @ 601:82d23418d867

Fix some fd leaks in the command-line binary Strictly speaking, they're not really leaks, because the only codepath that suffers from these leaks exits immediately afterwards. On the other hand, this fix makes valgrind on e.g. tests/0025 happier, going from 5 errors to none.
author mas01cr
date Fri, 14 Aug 2009 16:39:32 +0000
parents b2a941a372fb
children e21a3db643af
rev   line source
mas01cr@509 1 extern "C" {
mas01cr@509 2 #include "audioDB_API.h"
mas01cr@509 3 }
mas01cr@498 4 #include "audioDB-internals.h"
mas01cr@498 5 #include "accumulators.h"
mas01cr@239 6
mas01cr@498 7 bool audiodb_powers_acceptable(const adb_query_refine_t *r, double p1, double p2) {
mas01cr@498 8 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) {
mas01cr@498 9 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) {
mas01cr@239 10 return false;
mas01cr@239 11 }
mas01cr@239 12 }
mas01cr@498 13 if (r->flags & ADB_REFINE_RELATIVE_THRESHOLD) {
mas01cr@498 14 if (fabs(p1-p2) > fabs(r->relative_threshold)) {
mas01cr@239 15 return false;
mas01cr@239 16 }
mas01cr@239 17 }
mas01cr@239 18 return true;
mas01cr@239 19 }
mas01cr@239 20
mas01cr@498 21 adb_query_results_t *audiodb_query_spec(adb_t *adb, const adb_query_spec_t *qspec) {
mas01cr@498 22 adb_qstate_internal_t qstate = {0};
mas01cr@498 23 qstate.allowed_keys = new std::set<std::string>;
mas01cr@498 24 adb_query_results_t *results;
mas01cr@498 25 if(qspec->refine.flags & ADB_REFINE_INCLUDE_KEYLIST) {
mas01cr@498 26 for(unsigned int k = 0; k < qspec->refine.include.nkeys; k++) {
mas01cr@498 27 qstate.allowed_keys->insert(qspec->refine.include.keys[k]);
mas01cr@498 28 }
mas01cr@498 29 } else {
mas01cr@498 30 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@498 31 qstate.allowed_keys->insert((*adb->keys)[k]);
mas01cr@498 32 }
mas01cr@498 33 }
mas01cr@498 34 if(qspec->refine.flags & ADB_REFINE_EXCLUDE_KEYLIST) {
mas01cr@498 35 for(unsigned int k = 0; k < qspec->refine.exclude.nkeys; k++) {
mas01cr@498 36 qstate.allowed_keys->erase(qspec->refine.exclude.keys[k]);
mas01cr@498 37 }
mas01cr@498 38 }
mas01mc@292 39
mas01cr@498 40 switch(qspec->params.distance) {
mas01cr@498 41 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@498 42 switch(qspec->params.accumulation) {
mas01cr@498 43 case ADB_ACCUMULATION_DB:
mas01cr@498 44 qstate.accumulator = new DBAccumulator<adb_result_dist_gt>(qspec->params.npoints);
mas01cr@498 45 break;
mas01cr@498 46 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@498 47 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_gt>(qspec->params.npoints, qspec->params.ntracks);
mas01cr@498 48 break;
mas01cr@498 49 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@498 50 qstate.accumulator = new NearestAccumulator<adb_result_dist_gt>();
mas01cr@498 51 break;
mas01cr@498 52 default:
mas01cr@498 53 goto error;
mas01cr@239 54 }
mas01cr@239 55 break;
mas01cr@498 56 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@498 57 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@498 58 switch(qspec->params.accumulation) {
mas01cr@498 59 case ADB_ACCUMULATION_DB:
mas01cr@498 60 qstate.accumulator = new DBAccumulator<adb_result_dist_lt>(qspec->params.npoints);
mas01cr@498 61 break;
mas01cr@498 62 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@498 63 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_lt>(qspec->params.npoints, qspec->params.ntracks);
mas01cr@498 64 break;
mas01cr@498 65 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@498 66 qstate.accumulator = new NearestAccumulator<adb_result_dist_lt>();
mas01cr@498 67 break;
mas01cr@498 68 default:
mas01cr@498 69 goto error;
mas01mc@263 70 }
mas01mc@263 71 break;
mas01cr@239 72 default:
mas01cr@498 73 goto error;
mas01mc@329 74 }
mas01cr@498 75
mas01cr@498 76 if((qspec->refine.flags & ADB_REFINE_RADIUS) && audiodb_index_exists(adb->path, qspec->refine.radius, qspec->qid.sequence_length)) {
mas01cr@498 77 if(audiodb_index_query_loop(adb, qspec, &qstate) < 0) {
mas01cr@498 78 goto error;
mas01cr@498 79 }
mas01cr@498 80 } else {
mas01cr@498 81 if(audiodb_query_loop(adb, qspec, &qstate)) {
mas01cr@498 82 goto error;
mas01cr@498 83 }
mas01mc@329 84 }
mas01mc@292 85
mas01cr@498 86 results = qstate.accumulator->get_points();
mas01cr@498 87
mas01cr@498 88 delete qstate.accumulator;
mas01cr@498 89 delete qstate.allowed_keys;
mas01cr@498 90
mas01cr@498 91 return results;
mas01cr@498 92
mas01cr@498 93 error:
mas01cr@498 94 if(qstate.accumulator)
mas01cr@498 95 delete qstate.accumulator;
mas01cr@498 96 if(qstate.allowed_keys)
mas01cr@498 97 delete qstate.allowed_keys;
mas01cr@498 98 return NULL;
mas01cr@239 99 }
mas01cr@239 100
mas01cr@498 101 int audiodb_query_free_results(adb_t *adb, const adb_query_spec_t *spec, adb_query_results_t *rs) {
mas01cr@498 102 free(rs->results);
mas01cr@498 103 free(rs);
mas01cr@498 104 return 0;
mas01cr@239 105 }
mas01cr@239 106
mas01cr@589 107 /* FIXME: we should check the return values from allocation */
mas01cr@498 108 static void audiodb_initialize_arrays(adb_t *adb, const adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
mas01cr@239 109 unsigned int j, k, l, w;
mas01cr@239 110 double *dp, *qp, *sp;
mas01cr@239 111
mas01cr@498 112 const unsigned HOP_SIZE = spec->refine.hopsize;
mas01cr@498 113 const unsigned wL = spec->qid.sequence_length;
mas01cr@239 114
mas01cr@239 115 for(j = 0; j < numVectors; j++) {
mas01cr@239 116 // Sum products matrix
mas01cr@498 117 D[j] = new double[(*adb->track_lengths)[track]];
mas01cr@239 118 // Matched filter matrix
mas01cr@498 119 DD[j]=new double[(*adb->track_lengths)[track]];
mas01cr@239 120 }
mas01cr@239 121
mas01cr@239 122 // Dot product
mas01cr@239 123 for(j = 0; j < numVectors; j++)
mas01cr@498 124 for(k = 0; k < (*adb->track_lengths)[track]; k++){
mas01cr@498 125 qp = query + j * adb->header->dim;
mas01cr@498 126 sp = data_buffer + k * adb->header->dim;
mas01cr@239 127 DD[j][k] = 0.0; // Initialize matched filter array
mas01cr@239 128 dp = &D[j][k]; // point to correlation cell j,k
mas01cr@239 129 *dp = 0.0; // initialize correlation cell
mas01cr@498 130 l = adb->header->dim; // size of vectors
mas01cr@239 131 while(l--)
mas01cr@239 132 *dp += *qp++ * *sp++;
mas01cr@239 133 }
mas01cr@239 134
mas01cr@239 135 // Matched Filter
mas01cr@239 136 // HOP SIZE == 1
mas01cr@239 137 double* spd;
mas01cr@239 138 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop
mas01cr@239 139 for(w = 0; w < wL; w++) {
mas01cr@239 140 for(j = 0; j < numVectors - w; j++) {
mas01cr@239 141 sp = DD[j];
mas01cr@239 142 spd = D[j+w] + w;
mas01cr@498 143 k = (*adb->track_lengths)[track] - w;
mas01mc@292 144 while(k--)
mas01mc@292 145 *sp++ += *spd++;
mas01cr@239 146 }
mas01cr@239 147 }
mas01cr@239 148 } else { // HOP_SIZE != 1
mas01cr@239 149 for(w = 0; w < wL; w++) {
mas01cr@239 150 for(j = 0; j < numVectors - w; j += HOP_SIZE) {
mas01cr@239 151 sp = DD[j];
mas01cr@239 152 spd = D[j+w]+w;
mas01cr@498 153 for(k = 0; k < (*adb->track_lengths)[track] - w; k += HOP_SIZE) {
mas01cr@239 154 *sp += *spd;
mas01cr@239 155 sp += HOP_SIZE;
mas01cr@239 156 spd += HOP_SIZE;
mas01cr@239 157 }
mas01cr@239 158 }
mas01cr@239 159 }
mas01cr@239 160 }
mas01cr@239 161 }
mas01cr@239 162
mas01cr@498 163 static void audiodb_delete_arrays(int track, unsigned int numVectors, double **D, double **DD) {
mas01cr@239 164 if(D != NULL) {
mas01cr@239 165 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 166 delete[] D[j];
mas01cr@239 167 }
mas01cr@239 168 }
mas01cr@239 169 if(DD != NULL) {
mas01cr@239 170 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 171 delete[] DD[j];
mas01cr@239 172 }
mas01cr@239 173 }
mas01cr@239 174 }
mas01cr@239 175
mas01cr@498 176 int audiodb_read_data(adb_t *adb, int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) {
mas01cr@498 177 uint32_t track_length = (*adb->track_lengths)[track];
mas01cr@498 178 size_t track_size = track_length * sizeof(double) * adb->header->dim;
mas01cr@498 179 if (track_size > *data_buffer_size_p) {
mas01cr@239 180 if(*data_buffer_p) {
mas01cr@239 181 free(*data_buffer_p);
mas01cr@239 182 }
mas01cr@239 183 {
mas01cr@498 184 *data_buffer_size_p = track_size;
mas01cr@498 185 void *tmp = malloc(track_size);
mas01cr@239 186 if (tmp == NULL) {
mas01cr@498 187 goto error;
mas01cr@239 188 }
mas01cr@239 189 *data_buffer_p = (double *) tmp;
mas01cr@239 190 }
mas01cr@239 191 }
mas01cr@239 192
mas01cr@498 193 read_or_goto_error(trkfid, *data_buffer_p, track_size);
mas01cr@498 194 return 0;
mas01cr@498 195
mas01cr@498 196 error:
mas01cr@498 197 return 1;
mas01cr@239 198 }
mas01cr@239 199
mas01cr@498 200 int audiodb_track_id_datum(adb_t *adb, uint32_t track_id, adb_datum_t *d) {
mas01cr@498 201 off_t track_offset = (*adb->track_offsets)[track_id];
mas01cr@509 202 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 203 /* create a reference/insert, then use adb_insert_create_datum() */
mas01cr@498 204 adb_reference_t reference = {0};
mas01cr@509 205 char features[ADB_MAXSTR], power[ADB_MAXSTR], times[ADB_MAXSTR];
mas01cr@596 206 lseek_set_or_goto_error(adb->fd, adb->header->dataOffset + track_id * ADB_FILETABLE_ENTRY_SIZE);
mas01cr@509 207 read_or_goto_error(adb->fd, features, ADB_MAXSTR);
mas01cr@498 208 reference.features = features;
mas01cr@509 209 if(adb->header->flags & ADB_HEADER_FLAG_POWER) {
mas01cr@596 210 lseek_set_or_goto_error(adb->fd, adb->header->powerTableOffset + track_id * ADB_FILETABLE_ENTRY_SIZE);
mas01cr@509 211 read_or_goto_error(adb->fd, power, ADB_MAXSTR);
mas01cr@498 212 reference.power = power;
mas01cr@498 213 }
mas01cr@509 214 if(adb->header->flags & ADB_HEADER_FLAG_TIMES) {
mas01cr@596 215 lseek_set_or_goto_error(adb->fd, adb->header->timesTableOffset + track_id * ADB_FILETABLE_ENTRY_SIZE);
mas01cr@509 216 read_or_goto_error(adb->fd, times, ADB_MAXSTR);
mas01cr@498 217 reference.times = times;
mas01cr@498 218 }
mas01cr@498 219 return audiodb_insert_create_datum(&reference, d);
mas01cr@498 220 } else {
mas01cr@498 221 /* initialize from sources of data that we already have */
mas01cr@498 222 d->nvectors = (*adb->track_lengths)[track_id];
mas01cr@498 223 d->dim = adb->header->dim;
mas01cr@498 224 d->key = (*adb->keys)[track_id].c_str();
mas01cr@498 225 /* read out stuff from the database tables */
mas01cr@596 226 malloc_and_fill_or_goto_error(double *, d->data, adb->header->dataOffset + track_offset, d->nvectors * d->dim * sizeof(double));
mas01cr@509 227 if(adb->header->flags & ADB_HEADER_FLAG_POWER) {
mas01cr@596 228 malloc_and_fill_or_goto_error(double *, d->power, adb->header->powerTableOffset + track_offset / d->dim, d->nvectors * sizeof(double));
mas01cr@598 229 } else {
mas01cr@598 230 d->power = NULL;
mas01cr@498 231 }
mas01cr@509 232 if(adb->header->flags & ADB_HEADER_FLAG_TIMES) {
mas01cr@596 233 malloc_and_fill_or_goto_error(double *, d->times, adb->header->timesTableOffset + 2 * track_offset / d->dim, 2 * d->nvectors * sizeof(double));
mas01cr@598 234 } else {
mas01cr@598 235 d->times = NULL;
mas01cr@498 236 }
mas01cr@498 237 return 0;
mas01cr@498 238 }
mas01cr@498 239 error:
mas01cr@580 240 audiodb_really_free_datum(d);
mas01cr@498 241 return 1;
mas01cr@498 242 }
mas01mc@292 243
mas01cr@498 244 int audiodb_datum_qpointers(adb_datum_t *d, uint32_t sequence_length, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
mas01cr@498 245 uint32_t nvectors = d->nvectors;
mas01cr@498 246
mas01cr@498 247 qpointers->nvectors = nvectors;
mas01cr@498 248
mas01cr@498 249 size_t vector_size = nvectors * sizeof(double) * d->dim;
mas01cr@498 250 *vector_data = new double[vector_size];
mas01cr@498 251 memcpy(*vector_data, d->data, vector_size);
mas01cr@498 252
mas01cr@498 253 qpointers->l2norm_data = new double[vector_size / d->dim];
mas01cr@498 254 audiodb_l2norm_buffer(*vector_data, d->dim, nvectors, qpointers->l2norm_data);
mas01cr@498 255 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@498 256 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@498 257
mas01cr@498 258 if(d->power) {
mas01cr@498 259 qpointers->power_data = new double[vector_size / d->dim];
mas01cr@498 260 memcpy(qpointers->power_data, d->power, vector_size / d->dim);
mas01cr@498 261 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
mas01cr@498 262 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
mas01cr@239 263 }
mas01cr@239 264
mas01cr@498 265 if(d->times) {
mas01cr@498 266 qpointers->mean_duration = new double[1];
mas01cr@498 267 *qpointers->mean_duration = 0;
mas01cr@498 268 for(unsigned int k = 0; k < nvectors; k++) {
mas01cr@498 269 *qpointers->mean_duration += d->times[2*k+1] - d->times[2*k];
mas01cr@239 270 }
mas01cr@498 271 *qpointers->mean_duration /= nvectors;
mas01cr@239 272 }
mas01cr@239 273
mas01cr@498 274 *vector = *vector_data;
mas01cr@498 275 qpointers->l2norm = qpointers->l2norm_data;
mas01cr@498 276 qpointers->power = qpointers->power_data;
mas01cr@498 277 return 0;
mas01cr@498 278 }
mas01cr@498 279
mas01cr@498 280 int audiodb_query_spec_qpointers(adb_t *adb, const adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
mas01cr@498 281 adb_datum_t *datum;
mas01cr@498 282 adb_datum_t d = {0};
mas01cr@498 283 uint32_t sequence_length;
mas01cr@498 284 uint32_t sequence_start;
mas01cr@498 285
mas01cr@498 286 datum = spec->qid.datum;
mas01cr@498 287 sequence_length = spec->qid.sequence_length;
mas01cr@498 288 sequence_start = spec->qid.sequence_start;
mas01cr@498 289
mas01cr@498 290 if(datum->data) {
mas01cr@498 291 if(datum->dim != adb->header->dim) {
mas01cr@498 292 return 1;
mas01cr@239 293 }
mas01cr@498 294 /* initialize d, and mark that nothing needs freeing later. */
mas01cr@498 295 d = *datum;
mas01cr@498 296 datum = &d;
mas01cr@498 297 } else if (datum->key) {
mas01cr@498 298 uint32_t track_id;
mas01cr@498 299 if((track_id = audiodb_key_index(adb, datum->key)) == (uint32_t) -1) {
mas01cr@498 300 return 1;
mas01cr@498 301 }
mas01cr@498 302 audiodb_track_id_datum(adb, track_id, &d);
mas01cr@498 303 } else {
mas01cr@498 304 return 1;
mas01cr@239 305 }
mas01cr@239 306
mas01cr@498 307 /* FIXME: check the overflow logic here */
mas01cr@498 308 if(sequence_start + sequence_length > d.nvectors) {
mas01cr@498 309 if(datum != &d) {
mas01cr@580 310 audiodb_really_free_datum(&d);
mas01cr@498 311 }
mas01cr@498 312 return 1;
mas01cr@498 313 }
mas01cr@239 314
mas01cr@498 315 audiodb_datum_qpointers(&d, sequence_length, vector_data, vector, qpointers);
mas01cr@498 316
mas01cr@498 317 /* Finally, if applicable, set up the moving qpointers. */
mas01cr@498 318 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
mas01cr@498 319 /* the qpointers are already at the start, and so correct. */
mas01cr@498 320 } else {
mas01cr@498 321 /* adjust the qpointers to point to the correct place in the sequence */
mas01cr@498 322 *vector = *vector_data + spec->qid.sequence_start * d.dim;
mas01cr@498 323 qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start;
mas01cr@498 324 if(d.power) {
mas01cr@498 325 qpointers->power = qpointers->power_data + spec->qid.sequence_start;
mas01cr@239 326 }
mas01cr@498 327 qpointers->nvectors = sequence_length;
mas01cr@239 328 }
mas01cr@498 329
mas01cr@498 330 /* Clean up: free any bits of datum that we have ourselves
mas01cr@498 331 * allocated. */
mas01cr@498 332 if(datum != &d) {
mas01cr@580 333 audiodb_really_free_datum(&d);
mas01cr@498 334 }
mas01cr@498 335
mas01cr@498 336 return 0;
mas01cr@239 337 }
mas01cr@239 338
mas01cr@498 339 static int audiodb_set_up_dbpointers(adb_t *adb, const adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) {
mas01cr@498 340 uint32_t nvectors = adb->header->length / (adb->header->dim * sizeof(double));
mas01cr@498 341 uint32_t sequence_length = spec->qid.sequence_length;
mas01mc@292 342
mas01cr@498 343 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@498 344 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO;
mas01cr@498 345 double *times_table = NULL;
mas01cr@498 346
mas01cr@498 347
mas01cr@498 348 dbpointers->nvectors = nvectors;
mas01cr@498 349 dbpointers->l2norm_data = new double[nvectors];
mas01cr@498 350
mas01cr@498 351 double *snpp = dbpointers->l2norm_data, *sppp = 0;
mas01cr@596 352 lseek_set_or_goto_error(adb->fd, adb->header->l2normTableOffset);
mas01cr@498 353 read_or_goto_error(adb->fd, dbpointers->l2norm_data, nvectors * sizeof(double));
mas01cr@498 354
mas01cr@498 355 if (using_power) {
mas01cr@509 356 if (!(adb->header->flags & ADB_HEADER_FLAG_POWER)) {
mas01cr@498 357 goto error;
mas01cr@498 358 }
mas01cr@498 359 dbpointers->power_data = new double[nvectors];
mas01cr@498 360 sppp = dbpointers->power_data;
mas01cr@596 361 lseek_set_or_goto_error(adb->fd, adb->header->powerTableOffset);
mas01cr@498 362 read_or_goto_error(adb->fd, dbpointers->power_data, nvectors * sizeof(double));
mas01mc@292 363 }
mas01mc@292 364
mas01cr@498 365 for(unsigned int i = 0; i < adb->header->numFiles; i++){
mas01cr@498 366 size_t track_length = (*adb->track_lengths)[i];
mas01cr@498 367 if(track_length >= sequence_length) {
mas01cr@498 368 audiodb_sequence_sum(snpp, track_length, sequence_length);
mas01cr@498 369 audiodb_sequence_sqrt(snpp, track_length, sequence_length);
mas01cr@498 370 if (using_power) {
mas01cr@498 371 audiodb_sequence_sum(sppp, track_length, sequence_length);
mas01cr@498 372 audiodb_sequence_average(sppp, track_length, sequence_length);
mas01cr@498 373 }
mas01mc@324 374 }
mas01cr@498 375 snpp += track_length;
mas01cr@498 376 if (using_power) {
mas01cr@498 377 sppp += track_length;
mas01mc@324 378 }
mas01mc@292 379 }
mas01mc@292 380
mas01cr@498 381 if (using_times) {
mas01cr@509 382 if(!(adb->header->flags & ADB_HEADER_FLAG_TIMES)) {
mas01cr@498 383 goto error;
mas01cr@498 384 }
mas01mc@292 385
mas01cr@498 386 dbpointers->mean_duration = new double[adb->header->numFiles];
mas01cr@498 387
mas01cr@596 388 malloc_and_fill_or_goto_error(double *, times_table, adb->header->timesTableOffset, 2 * nvectors * sizeof(double));
mas01cr@498 389 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@498 390 size_t track_length = (*adb->track_lengths)[k];
mas01cr@498 391 unsigned int j;
mas01cr@498 392 dbpointers->mean_duration[k] = 0.0;
mas01cr@498 393 for(j = 0; j < track_length; j++) {
mas01cr@498 394 dbpointers->mean_duration[k] += times_table[2*j+1] - times_table[2*j];
mas01mc@292 395 }
mas01cr@498 396 dbpointers->mean_duration[k] /= j;
mas01mc@292 397 }
mas01cr@498 398
mas01cr@498 399 free(times_table);
mas01cr@498 400 times_table = NULL;
mas01mc@292 401 }
mas01cr@498 402
mas01cr@498 403 dbpointers->l2norm = dbpointers->l2norm_data;
mas01cr@498 404 dbpointers->power = dbpointers->power_data;
mas01cr@498 405 return 0;
mas01cr@498 406
mas01cr@498 407 error:
mas01cr@596 408 maybe_delete_array(dbpointers->l2norm_data);
mas01cr@596 409 maybe_delete_array(dbpointers->power_data);
mas01cr@596 410 maybe_delete_array(dbpointers->mean_duration);
mas01cr@596 411 maybe_free(times_table);
mas01cr@498 412 return 1;
mas01cr@498 413
mas01mc@292 414 }
mas01mc@292 415
mas01cr@498 416 int audiodb_query_queue_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate, double *query, adb_qpointers_internal_t *qpointers) {
mas01cr@498 417 adb_qpointers_internal_t dbpointers = {0};
mas01mc@292 418
mas01cr@498 419 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@498 420 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@239 421
mas01cr@498 422 if(qstate->exact_evaluation_queue->size() == 0) {
mas01cr@498 423 return 0;
mas01cr@239 424 }
mas01cr@239 425
mas01cr@498 426 /* We are guaranteed that the order of points is sorted by:
mas01cr@498 427 * {trackID, spos, qpos} so we can be relatively efficient in
mas01cr@498 428 * initialization of track data. We assume that points usually
mas01cr@498 429 * don't overlap, so we will use exhaustive dot product evaluation
mas01cr@498 430 * (instead of memoization of partial sums, as in query_loop()).
mas01cr@498 431 */
mas01cr@498 432 double dist;
mas01cr@498 433 double *dbdata = 0, *dbdata_pointer;
mas01cr@589 434 uint32_t currentTrack = 0x80000000; // KLUDGE: Initialize with a value outside of track index range
mas01cr@589 435 uint32_t npairs = qstate->exact_evaluation_queue->size();
mas01cr@498 436 while(npairs--) {
mas01cr@498 437 PointPair pp = qstate->exact_evaluation_queue->top();
mas01cr@498 438 if(currentTrack != pp.trackID) {
mas01cr@509 439 maybe_delete_array(dbdata);
mas01cr@509 440 maybe_delete_array(dbpointers.l2norm_data);
mas01cr@509 441 maybe_delete_array(dbpointers.power_data);
mas01cr@509 442 maybe_delete_array(dbpointers.mean_duration);
mas01cr@498 443 currentTrack = pp.trackID;
mas01cr@498 444 adb_datum_t d = {0};
mas01cr@498 445 if(audiodb_track_id_datum(adb, pp.trackID, &d)) {
mas01cr@498 446 delete qstate->exact_evaluation_queue;
mas01cr@498 447 return 1;
mas01cr@498 448 }
mas01cr@498 449 if(audiodb_datum_qpointers(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers)) {
mas01cr@498 450 delete qstate->exact_evaluation_queue;
mas01cr@580 451 audiodb_really_free_datum(&d);
mas01cr@498 452 return 1;
mas01cr@498 453 }
mas01cr@580 454 audiodb_really_free_datum(&d);
mas01cr@498 455 }
mas01cr@589 456 uint32_t qPos = (spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) ? pp.qpos : 0;
mas01cr@589 457 uint32_t sPos = pp.spos; // index into l2norm table
mas01cr@498 458 // Test power thresholds before computing distance
mas01cr@498 459 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) &&
mas01cr@498 460 ( qPos<qpointers->nvectors-sequence_length+1 && sPos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){
mas01cr@498 461 // Compute distance
mas01cr@498 462 dist = audiodb_dot_product(query + qPos*adb->header->dim, dbdata + sPos*adb->header->dim, adb->header->dim*sequence_length);
mas01cr@498 463 double qn = qpointers->l2norm[qPos];
mas01cr@498 464 double sn = dbpointers.l2norm[sPos];
mas01cr@498 465 switch(spec->params.distance) {
mas01cr@498 466 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@498 467 dist = 2 - (2/(qn*sn))*dist;
mas01cr@498 468 break;
mas01cr@498 469 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@498 470 dist = qn*qn + sn*sn - 2*dist;
mas01cr@498 471 break;
mas01cr@498 472 }
mas01cr@498 473 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@509 474 dist <= (spec->refine.radius + ADB_DISTANCE_TOLERANCE)) {
mas01cr@498 475 adb_result_t r;
mas01cr@498 476 r.key = (*adb->keys)[pp.trackID].c_str();
mas01cr@498 477 r.dist = dist;
mas01cr@498 478 r.qpos = pp.qpos;
mas01cr@498 479 r.ipos = pp.spos;
mas01cr@498 480 qstate->accumulator->add_point(&r);
mas01cr@239 481 }
mas01cr@239 482 }
mas01cr@498 483 qstate->exact_evaluation_queue->pop();
mas01mc@292 484 }
mas01mc@474 485
mas01mc@315 486 // Cleanup
mas01cr@509 487 maybe_delete_array(dbdata);
mas01cr@509 488 maybe_delete_array(dbpointers.l2norm_data);
mas01cr@509 489 maybe_delete_array(dbpointers.power_data);
mas01cr@509 490 maybe_delete_array(dbpointers.mean_duration);
mas01cr@498 491 delete qstate->exact_evaluation_queue;
mas01cr@498 492 return 0;
mas01mc@292 493 }
mas01mc@292 494
mas01cr@498 495 int audiodb_query_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) {
mas01cr@498 496
mas01cr@498 497 double *query, *query_data;
mas01cr@498 498 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0};
mas01mc@292 499
mas01cr@498 500 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@239 501
mas01cr@509 502 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 503 /* FIXME: actually it would be nice to support this mode of
mas01cr@498 504 * operation, but for now... */
mas01cr@498 505 return 1;
mas01cr@498 506 }
mas01mc@324 507
mas01cr@498 508 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
mas01cr@498 509 return 1;
mas01cr@498 510 }
mas01cr@239 511
mas01cr@498 512 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) {
mas01cr@498 513 return 1;
mas01cr@498 514 }
mas01cr@239 515
mas01cr@498 516 unsigned j,k,track,trackOffset=0, HOP_SIZE = spec->refine.hopsize;
mas01cr@498 517 unsigned wL = spec->qid.sequence_length;
mas01cr@239 518 double **D = 0; // Differences query and target
mas01cr@239 519 double **DD = 0; // Matched filter distance
mas01cr@239 520
mas01cr@498 521 D = new double*[qpointers.nvectors]; // pre-allocate
mas01cr@498 522 DD = new double*[qpointers.nvectors];
mas01cr@239 523
mas01cr@239 524 off_t trackIndexOffset;
mas01cr@239 525
mas01cr@239 526 // Track loop
mas01cr@239 527 size_t data_buffer_size = 0;
mas01cr@239 528 double *data_buffer = 0;
mas01cr@498 529 lseek(adb->fd, adb->header->dataOffset, SEEK_SET);
mas01cr@239 530
mas01cr@498 531 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
mas01cr@498 532 for(track = 0; track < adb->header->numFiles; track++) {
mas01cr@498 533 unsigned t = track;
mas01cr@498 534
mas01cr@498 535 while (qstate->allowed_keys->find((*adb->keys)[track]) == keys_end) {
mas01cr@498 536 track++;
mas01cr@498 537 if(track == adb->header->numFiles) {
mas01cr@498 538 goto loop_finish;
mas01cr@239 539 }
mas01cr@239 540 }
mas01cr@498 541 trackOffset = (*adb->track_offsets)[track];
mas01cr@498 542 if(track != t) {
mas01cr@498 543 lseek(adb->fd, adb->header->dataOffset + trackOffset, SEEK_SET);
mas01cr@498 544 }
mas01cr@498 545 trackIndexOffset = trackOffset / (adb->header->dim * sizeof(double)); // dbpointers.nvectors offset
mas01cr@239 546
mas01cr@498 547 if(audiodb_read_data(adb, adb->fd, track, &data_buffer, &data_buffer_size)) {
mas01cr@498 548 return 1;
mas01mc@292 549 }
mas01cr@498 550 if(wL <= (*adb->track_lengths)[track]) { // test for short sequences
mas01cr@498 551
mas01cr@498 552 audiodb_initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD);
mas01mc@292 553
mas01cr@498 554 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) ||
mas01cr@498 555 fabs(dbpointers.mean_duration[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) {
mas01cr@239 556
mas01cr@239 557 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@498 558 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) {
mas01cr@498 559 for(k = 0; k <= (*adb->track_lengths)[track] - wL; k += HOP_SIZE) {
mas01cr@498 560 double thisDist = 0;
mas01cr@498 561 double qn = qpointers.l2norm[j];
mas01cr@498 562 double sn = dbpointers.l2norm[trackIndexOffset + k];
mas01cr@498 563 switch(spec->params.distance) {
mas01cr@498 564 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@498 565 thisDist = 2-(2/(qn*sn))*DD[j][k];
mas01cr@498 566 break;
mas01cr@498 567 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@498 568 thisDist = qn*qn + sn*sn - 2*DD[j][k];
mas01cr@498 569 break;
mas01cr@498 570 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@498 571 thisDist = DD[j][k];
mas01cr@498 572 break;
mas01cr@498 573 }
mas01cr@239 574 // Power test
mas01cr@498 575 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], dbpointers.power[trackIndexOffset + k])) {
mas01cr@239 576 // radius test
mas01cr@498 577 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@509 578 thisDist <= (spec->refine.radius + ADB_DISTANCE_TOLERANCE)) {
mas01cr@498 579 adb_result_t r;
mas01cr@498 580 r.key = (*adb->keys)[track].c_str();
mas01cr@498 581 r.dist = thisDist;
mas01cr@498 582 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
mas01cr@498 583 r.qpos = j;
mas01cr@498 584 } else {
mas01cr@498 585 r.qpos = spec->qid.sequence_start;
mas01cr@498 586 }
mas01cr@498 587 r.ipos = k;
mas01cr@498 588 qstate->accumulator->add_point(&r);
mas01cr@239 589 }
mas01cr@239 590 }
mas01cr@239 591 }
mas01cr@239 592 }
mas01cr@239 593 } // Duration match
mas01cr@498 594 audiodb_delete_arrays(track, qpointers.nvectors, D, DD);
mas01cr@239 595 }
mas01cr@239 596 }
mas01cr@239 597
mas01cr@498 598 loop_finish:
mas01cr@498 599
mas01cr@239 600 free(data_buffer);
mas01cr@596 601 maybe_delete_array(query_data);
mas01cr@596 602 maybe_delete_array(qpointers.power_data);
mas01cr@596 603 maybe_delete_array(qpointers.l2norm_data);
mas01cr@596 604 maybe_delete_array(qpointers.mean_duration);
mas01cr@596 605 maybe_delete_array(dbpointers.power_data);
mas01cr@596 606 maybe_delete_array(dbpointers.l2norm_data);
mas01cr@596 607 maybe_delete_array(dbpointers.mean_duration);
mas01cr@596 608 maybe_delete_array(D);
mas01cr@596 609 maybe_delete_array(DD);
mas01cr@498 610
mas01cr@498 611 return 0;
mas01cr@239 612 }