annotate query.cpp @ 469:d3afc91d205d api-inversion

Move audioDB::query over to audioDB.cpp At the same time, remove all the abstraction violations in audioDB::query, which came in two flavours: use of dbH->numFiles, which is dealt with by getting the database status instead (and is eventually unnecessary, being only needed now because reporters are implemented in terms of vectors indexed by ID), and use of fileTable in reporter's report functions (dealt with by passing in the adb instead). To actually implement reporting as of now, we continue to use stuff from audioDB-internals.h; maybe someday we will be clean and shiny.
author mas01cr
date Wed, 31 Dec 2008 15:44:16 +0000
parents 4dbd7917bf9e
children 0f96ad351990
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@422 2 #include "audioDB-internals.h"
mas01cr@422 3 #include "accumulators.h"
mas01cr@422 4
mas01cr@444 5 bool audiodb_powers_acceptable(adb_query_refine_t *r, double p1, double p2) {
mas01cr@425 6 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) {
mas01cr@425 7 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) {
mas01cr@239 8 return false;
mas01cr@239 9 }
mas01cr@239 10 }
mas01cr@425 11 if (r->flags & ADB_REFINE_RELATIVE_THRESHOLD) {
mas01cr@425 12 if (fabs(p1-p2) > fabs(r->relative_threshold)) {
mas01cr@239 13 return false;
mas01cr@239 14 }
mas01cr@239 15 }
mas01cr@239 16 return true;
mas01cr@239 17 }
mas01cr@239 18
mas01cr@469 19 adb_query_results_t *audiodb_query_spec(adb_t *adb, adb_query_spec_t *qspec) {
mas01cr@469 20 adb_qstate_internal_t qstate = {0};
mas01cr@458 21 qstate.allowed_keys = new std::set<std::string>;
mas01cr@469 22 adb_query_results_t *results;
mas01cr@469 23 if(qspec->refine.flags & ADB_REFINE_INCLUDE_KEYLIST) {
mas01cr@469 24 for(unsigned int k = 0; k < qspec->refine.include.nkeys; k++) {
mas01cr@469 25 qstate.allowed_keys->insert(qspec->refine.include.keys[k]);
mas01cr@458 26 }
mas01cr@458 27 } else {
mas01cr@458 28 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@458 29 qstate.allowed_keys->insert((*adb->keys)[k]);
mas01cr@458 30 }
mas01cr@458 31 }
mas01cr@469 32 if(qspec->refine.flags & ADB_REFINE_EXCLUDE_KEYLIST) {
mas01cr@469 33 for(unsigned int k = 0; k < qspec->refine.exclude.nkeys; k++) {
mas01cr@469 34 qstate.allowed_keys->erase(qspec->refine.exclude.keys[k]);
mas01cr@458 35 }
mas01cr@458 36 }
mas01cr@431 37
mas01cr@469 38 switch(qspec->params.distance) {
mas01cr@431 39 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@469 40 switch(qspec->params.accumulation) {
mas01cr@431 41 case ADB_ACCUMULATION_DB:
mas01cr@469 42 qstate.accumulator = new DBAccumulator<adb_result_dist_gt>(qspec->params.npoints);
mas01cr@431 43 break;
mas01cr@431 44 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@469 45 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_gt>(qspec->params.npoints, qspec->params.ntracks);
mas01cr@431 46 break;
mas01cr@431 47 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@458 48 qstate.accumulator = new NearestAccumulator<adb_result_dist_gt>();
mas01cr@431 49 break;
mas01cr@431 50 default:
mas01cr@469 51 goto error;
mas01cr@239 52 }
mas01cr@239 53 break;
mas01cr@431 54 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@431 55 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@469 56 switch(qspec->params.accumulation) {
mas01cr@431 57 case ADB_ACCUMULATION_DB:
mas01cr@469 58 qstate.accumulator = new DBAccumulator<adb_result_dist_lt>(qspec->params.npoints);
mas01cr@431 59 break;
mas01cr@431 60 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@469 61 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_lt>(qspec->params.npoints, qspec->params.ntracks);
mas01cr@431 62 break;
mas01cr@431 63 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@458 64 qstate.accumulator = new NearestAccumulator<adb_result_dist_lt>();
mas01cr@431 65 break;
mas01cr@431 66 default:
mas01cr@469 67 goto error;
mas01mc@263 68 }
mas01mc@263 69 break;
mas01cr@239 70 default:
mas01cr@469 71 goto error;
mas01cr@431 72 }
mas01cr@431 73
mas01cr@469 74 if((qspec->refine.flags & ADB_REFINE_RADIUS) && audiodb_index_exists(adb->path, qspec->refine.radius, qspec->qid.sequence_length)) {
mas01cr@469 75 if(audiodb_index_query_loop(adb, qspec, &qstate) < 0) {
mas01cr@469 76 goto error;
mas01cr@466 77 }
mas01cr@466 78 } else {
mas01cr@469 79 if(audiodb_query_loop(adb, qspec, &qstate)) {
mas01cr@469 80 goto error;
mas01cr@452 81 }
mas01mc@329 82 }
mas01mc@292 83
mas01cr@469 84 results = qstate.accumulator->get_points();
mas01cr@458 85
mas01cr@458 86 delete qstate.accumulator;
mas01cr@458 87 delete qstate.allowed_keys;
mas01cr@458 88
mas01cr@469 89 return results;
mas01cr@458 90
mas01cr@469 91 error:
mas01cr@469 92 if(qstate.accumulator)
mas01cr@469 93 delete qstate.accumulator;
mas01cr@469 94 if(qstate.allowed_keys)
mas01cr@469 95 delete qstate.allowed_keys;
mas01cr@469 96 return NULL;
mas01cr@239 97 }
mas01cr@239 98
mas01cr@459 99 int audiodb_query_free_results(adb_t *adb, adb_query_spec_t *spec, adb_query_results_t *rs) {
mas01cr@459 100 free(rs->results);
mas01cr@459 101 free(rs);
mas01cr@459 102 return 0;
mas01cr@459 103 }
mas01cr@459 104
mas01cr@439 105 static void audiodb_initialize_arrays(adb_t *adb, adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
mas01cr@239 106 unsigned int j, k, l, w;
mas01cr@239 107 double *dp, *qp, *sp;
mas01cr@239 108
mas01cr@439 109 const unsigned HOP_SIZE = spec->refine.hopsize;
mas01cr@435 110 const unsigned wL = spec->qid.sequence_length;
mas01cr@239 111
mas01cr@239 112 for(j = 0; j < numVectors; j++) {
mas01cr@239 113 // Sum products matrix
mas01cr@433 114 D[j] = new double[(*adb->track_lengths)[track]];
mas01cr@239 115 assert(D[j]);
mas01cr@239 116 // Matched filter matrix
mas01cr@433 117 DD[j]=new double[(*adb->track_lengths)[track]];
mas01cr@239 118 assert(DD[j]);
mas01cr@239 119 }
mas01cr@239 120
mas01cr@239 121 // Dot product
mas01cr@239 122 for(j = 0; j < numVectors; j++)
mas01cr@433 123 for(k = 0; k < (*adb->track_lengths)[track]; k++){
mas01cr@439 124 qp = query + j * adb->header->dim;
mas01cr@439 125 sp = data_buffer + k * adb->header->dim;
mas01cr@239 126 DD[j][k] = 0.0; // Initialize matched filter array
mas01cr@239 127 dp = &D[j][k]; // point to correlation cell j,k
mas01cr@239 128 *dp = 0.0; // initialize correlation cell
mas01cr@439 129 l = adb->header->dim; // size of vectors
mas01cr@239 130 while(l--)
mas01cr@239 131 *dp += *qp++ * *sp++;
mas01cr@239 132 }
mas01cr@239 133
mas01cr@239 134 // Matched Filter
mas01cr@239 135 // HOP SIZE == 1
mas01cr@239 136 double* spd;
mas01cr@239 137 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop
mas01cr@239 138 for(w = 0; w < wL; w++) {
mas01cr@239 139 for(j = 0; j < numVectors - w; j++) {
mas01cr@239 140 sp = DD[j];
mas01cr@239 141 spd = D[j+w] + w;
mas01cr@433 142 k = (*adb->track_lengths)[track] - w;
mas01mc@292 143 while(k--)
mas01mc@292 144 *sp++ += *spd++;
mas01cr@239 145 }
mas01cr@239 146 }
mas01cr@239 147 } else { // HOP_SIZE != 1
mas01cr@239 148 for(w = 0; w < wL; w++) {
mas01cr@239 149 for(j = 0; j < numVectors - w; j += HOP_SIZE) {
mas01cr@239 150 sp = DD[j];
mas01cr@239 151 spd = D[j+w]+w;
mas01cr@433 152 for(k = 0; k < (*adb->track_lengths)[track] - w; k += HOP_SIZE) {
mas01cr@239 153 *sp += *spd;
mas01cr@239 154 sp += HOP_SIZE;
mas01cr@239 155 spd += HOP_SIZE;
mas01cr@239 156 }
mas01cr@239 157 }
mas01cr@239 158 }
mas01cr@239 159 }
mas01cr@239 160 }
mas01cr@239 161
mas01cr@433 162 static void audiodb_delete_arrays(int track, unsigned int numVectors, double **D, double **DD) {
mas01cr@239 163 if(D != NULL) {
mas01cr@239 164 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 165 delete[] D[j];
mas01cr@239 166 }
mas01cr@239 167 }
mas01cr@239 168 if(DD != NULL) {
mas01cr@239 169 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 170 delete[] DD[j];
mas01cr@239 171 }
mas01cr@239 172 }
mas01cr@239 173 }
mas01cr@239 174
mas01cr@433 175 int audiodb_read_data(adb_t *adb, int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) {
mas01cr@433 176 uint32_t track_length = (*adb->track_lengths)[track];
mas01cr@433 177 size_t track_size = track_length * sizeof(double) * adb->header->dim;
mas01cr@433 178 if (track_size > *data_buffer_size_p) {
mas01cr@239 179 if(*data_buffer_p) {
mas01cr@239 180 free(*data_buffer_p);
mas01cr@239 181 }
mas01cr@239 182 {
mas01cr@433 183 *data_buffer_size_p = track_size;
mas01cr@433 184 void *tmp = malloc(track_size);
mas01cr@239 185 if (tmp == NULL) {
mas01cr@433 186 goto error;
mas01cr@239 187 }
mas01cr@239 188 *data_buffer_p = (double *) tmp;
mas01cr@239 189 }
mas01cr@239 190 }
mas01cr@239 191
mas01cr@433 192 read_or_goto_error(trkfid, *data_buffer_p, track_size);
mas01cr@433 193 return 0;
mas01cr@433 194
mas01cr@433 195 error:
mas01cr@433 196 return 1;
mas01cr@239 197 }
mas01cr@239 198
mas01cr@461 199 int audiodb_track_id_datum(adb_t *adb, uint32_t track_id, adb_datum_t *d) {
mas01cr@461 200 off_t track_offset = (*adb->track_offsets)[track_id];
mas01cr@461 201 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
mas01cr@461 202 /* create a reference/insert, then use adb_insert_create_datum() */
mas01cr@461 203 adb_reference_t reference = {0};
mas01cr@461 204 char features[MAXSTR], power[MAXSTR], times[MAXSTR];
mas01cr@461 205 lseek(adb->fd, adb->header->dataOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@461 206 /* FIXME: learn not to worry and love the bomb^Wbuffer overflow */
mas01cr@461 207 read(adb->fd, features, MAXSTR);
mas01cr@461 208 reference.features = features;
mas01cr@461 209 if(adb->header->flags & O2_FLAG_POWER) {
mas01cr@461 210 lseek(adb->fd, adb->header->powerTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@461 211 read(adb->fd, power, MAXSTR);
mas01cr@461 212 reference.power = power;
mas01cr@461 213 }
mas01cr@461 214 if(adb->header->flags & O2_FLAG_TIMES) {
mas01cr@461 215 lseek(adb->fd, adb->header->timesTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@461 216 read(adb->fd, times, MAXSTR);
mas01cr@461 217 reference.times = times;
mas01cr@461 218 }
mas01cr@461 219 audiodb_insert_create_datum(&reference, d);
mas01cr@461 220 } else {
mas01cr@461 221 /* initialize from sources of data that we already have */
mas01cr@461 222 d->nvectors = (*adb->track_lengths)[track_id];
mas01cr@461 223 d->dim = adb->header->dim;
mas01cr@461 224 d->key = (*adb->keys)[track_id].c_str();
mas01cr@461 225 /* read out stuff from the database tables */
mas01cr@461 226 d->data = (double *) malloc(d->nvectors * d->dim * sizeof(double));
mas01cr@461 227 lseek(adb->fd, adb->header->dataOffset + track_offset, SEEK_SET);
mas01cr@461 228 read(adb->fd, d->data, d->nvectors * d->dim * sizeof(double));
mas01cr@461 229 if(adb->header->flags & O2_FLAG_POWER) {
mas01cr@461 230 d->power = (double *) malloc(d->nvectors * sizeof(double));
mas01cr@461 231 lseek(adb->fd, adb->header->powerTableOffset + track_offset / d->dim, SEEK_SET);
mas01cr@461 232 read(adb->fd, d->power, d->nvectors * sizeof(double));
mas01cr@461 233 }
mas01cr@461 234 if(adb->header->flags & O2_FLAG_TIMES) {
mas01cr@461 235 d->times = (double *) malloc(2 * d->nvectors * sizeof(double));
mas01cr@461 236 lseek(adb->fd, adb->header->timesTableOffset + track_offset / d->dim, SEEK_SET);
mas01cr@461 237 read(adb->fd, d->times, 2 * d->nvectors * sizeof(double));
mas01cr@461 238 }
mas01cr@461 239 }
mas01cr@461 240 return 0;
mas01cr@461 241 }
mas01cr@461 242
mas01cr@461 243 int audiodb_datum_qpointers(adb_datum_t *d, uint32_t sequence_length, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
mas01cr@461 244 uint32_t nvectors = d->nvectors;
mas01cr@461 245
mas01cr@461 246 qpointers->nvectors = nvectors;
mas01cr@461 247
mas01cr@461 248 size_t vector_size = nvectors * sizeof(double) * d->dim;
mas01cr@461 249 *vector_data = new double[vector_size];
mas01cr@461 250 memcpy(*vector_data, d->data, vector_size);
mas01cr@461 251
mas01cr@461 252 qpointers->l2norm_data = new double[vector_size / d->dim];
mas01cr@461 253 audiodb_l2norm_buffer(*vector_data, d->dim, nvectors, qpointers->l2norm_data);
mas01cr@461 254 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@461 255 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@461 256
mas01cr@461 257 if(d->power) {
mas01cr@461 258 qpointers->power_data = new double[vector_size / d->dim];
mas01cr@461 259 memcpy(qpointers->power_data, d->power, vector_size / d->dim);
mas01cr@461 260 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
mas01cr@461 261 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
mas01cr@461 262 }
mas01cr@461 263
mas01cr@461 264 if(d->times) {
mas01cr@461 265 qpointers->mean_duration = new double[1];
mas01cr@461 266 *qpointers->mean_duration = 0;
mas01cr@461 267 for(unsigned int k = 0; k < nvectors; k++) {
mas01cr@461 268 *qpointers->mean_duration += d->times[2*k+1] - d->times[2*k];
mas01cr@461 269 }
mas01cr@461 270 *qpointers->mean_duration /= nvectors;
mas01cr@461 271 }
mas01cr@461 272
mas01cr@461 273 *vector = *vector_data;
mas01cr@461 274 qpointers->l2norm = qpointers->l2norm_data;
mas01cr@461 275 qpointers->power = qpointers->power_data;
mas01cr@461 276 return 0;
mas01cr@461 277 }
mas01cr@461 278
mas01cr@444 279 int audiodb_query_spec_qpointers(adb_t *adb, adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
mas01cr@443 280 adb_datum_t *datum;
mas01cr@443 281 adb_datum_t d = {0};
mas01cr@443 282 uint32_t sequence_length;
mas01cr@443 283 uint32_t sequence_start;
mas01cr@443 284
mas01cr@443 285 datum = spec->qid.datum;
mas01cr@443 286 sequence_length = spec->qid.sequence_length;
mas01cr@443 287 sequence_start = spec->qid.sequence_start;
mas01cr@443 288
mas01cr@443 289 if(datum->data) {
mas01cr@443 290 if(datum->dim != adb->header->dim) {
mas01cr@443 291 return 1;
mas01cr@443 292 }
mas01cr@443 293 /* initialize d, and mark that nothing needs freeing later. */
mas01cr@443 294 d = *datum;
mas01cr@443 295 datum = &d;
mas01cr@443 296 } else if (datum->key) {
mas01cr@449 297 uint32_t track_id;
mas01cr@449 298 if((track_id = audiodb_key_index(adb, datum->key)) == (uint32_t) -1) {
mas01cr@443 299 return 1;
mas01cr@443 300 }
mas01cr@461 301 audiodb_track_id_datum(adb, track_id, &d);
mas01cr@443 302 } else {
mas01cr@443 303 return 1;
mas01cr@443 304 }
mas01cr@443 305
mas01cr@444 306 /* FIXME: check the overflow logic here */
mas01cr@461 307 if(sequence_start + sequence_length > d.nvectors) {
mas01cr@461 308 if(datum != &d) {
mas01cr@461 309 audiodb_free_datum(&d);
mas01cr@461 310 }
mas01cr@443 311 return 1;
mas01cr@443 312 }
mas01cr@443 313
mas01cr@461 314 audiodb_datum_qpointers(&d, sequence_length, vector_data, vector, qpointers);
mas01cr@443 315
mas01cr@461 316 /* Finally, if applicable, set up the moving qpointers. */
mas01cr@468 317 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
mas01cr@461 318 /* the qpointers are already at the start, and so correct. */
mas01cr@443 319 } else {
mas01cr@461 320 /* adjust the qpointers to point to the correct place in the sequence */
mas01cr@443 321 *vector = *vector_data + spec->qid.sequence_start * d.dim;
mas01cr@443 322 qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start;
mas01cr@444 323 if(d.power) {
mas01cr@444 324 qpointers->power = qpointers->power_data + spec->qid.sequence_start;
mas01cr@444 325 }
mas01cr@444 326 qpointers->nvectors = sequence_length;
mas01cr@443 327 }
mas01cr@443 328
mas01cr@443 329 /* Clean up: free any bits of datum that we have ourselves
mas01cr@443 330 * allocated. */
mas01cr@443 331 if(datum != &d) {
mas01cr@443 332 audiodb_free_datum(&d);
mas01cr@443 333 }
mas01cr@444 334
mas01cr@444 335 return 0;
mas01cr@443 336 }
mas01cr@443 337
mas01cr@448 338 static int audiodb_set_up_dbpointers(adb_t *adb, adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) {
mas01cr@438 339 uint32_t nvectors = adb->header->length / (adb->header->dim * sizeof(double));
mas01cr@435 340 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@438 341
mas01cr@437 342 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@437 343 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO;
mas01cr@437 344 double *times_table = NULL;
mas01cr@435 345
mas01cr@239 346
mas01cr@438 347 dbpointers->nvectors = nvectors;
mas01cr@438 348 dbpointers->l2norm_data = new double[nvectors];
mas01cr@438 349
mas01cr@438 350 double *snpp = dbpointers->l2norm_data, *sppp = 0;
mas01cr@434 351 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET);
mas01cr@438 352 read_or_goto_error(adb->fd, dbpointers->l2norm_data, nvectors * sizeof(double));
mas01cr@239 353
mas01cr@437 354 if (using_power) {
mas01cr@434 355 if (!(adb->header->flags & O2_FLAG_POWER)) {
mas01cr@434 356 goto error;
mas01cr@239 357 }
mas01cr@438 358 dbpointers->power_data = new double[nvectors];
mas01cr@438 359 sppp = dbpointers->power_data;
mas01cr@434 360 lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET);
mas01cr@438 361 read_or_goto_error(adb->fd, dbpointers->power_data, nvectors * sizeof(double));
mas01cr@239 362 }
mas01cr@239 363
mas01cr@434 364 for(unsigned int i = 0; i < adb->header->numFiles; i++){
mas01cr@434 365 size_t track_length = (*adb->track_lengths)[i];
mas01cr@435 366 if(track_length >= sequence_length) {
mas01cr@435 367 audiodb_sequence_sum(snpp, track_length, sequence_length);
mas01cr@435 368 audiodb_sequence_sqrt(snpp, track_length, sequence_length);
mas01cr@437 369 if (using_power) {
mas01cr@435 370 audiodb_sequence_sum(sppp, track_length, sequence_length);
mas01cr@435 371 audiodb_sequence_average(sppp, track_length, sequence_length);
mas01cr@239 372 }
mas01cr@239 373 }
mas01cr@434 374 snpp += track_length;
mas01cr@437 375 if (using_power) {
mas01cr@434 376 sppp += track_length;
mas01cr@239 377 }
mas01cr@239 378 }
mas01cr@239 379
mas01cr@437 380 if (using_times) {
mas01cr@434 381 if(!(adb->header->flags & O2_FLAG_TIMES)) {
mas01cr@437 382 goto error;
mas01cr@239 383 }
mas01cr@239 384
mas01cr@438 385 dbpointers->mean_duration = new double[adb->header->numFiles];
mas01cr@239 386
mas01cr@438 387 times_table = (double *) malloc(2 * nvectors * sizeof(double));
mas01cr@437 388 if(!times_table) {
mas01cr@437 389 goto error;
mas01cr@437 390 }
mas01cr@437 391 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET);
mas01cr@438 392 read_or_goto_error(adb->fd, times_table, 2 * nvectors * sizeof(double));
mas01cr@434 393 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@434 394 size_t track_length = (*adb->track_lengths)[k];
mas01cr@239 395 unsigned int j;
mas01cr@438 396 dbpointers->mean_duration[k] = 0.0;
mas01cr@434 397 for(j = 0; j < track_length; j++) {
mas01cr@438 398 dbpointers->mean_duration[k] += times_table[2*j+1] - times_table[2*j];
mas01cr@239 399 }
mas01cr@438 400 dbpointers->mean_duration[k] /= j;
mas01cr@239 401 }
mas01cr@437 402
mas01cr@437 403 free(times_table);
mas01cr@437 404 times_table = NULL;
mas01cr@239 405 }
mas01cr@239 406
mas01cr@438 407 dbpointers->l2norm = dbpointers->l2norm_data;
mas01cr@438 408 dbpointers->power = dbpointers->power_data;
mas01cr@434 409 return 0;
mas01cr@434 410
mas01cr@434 411 error:
mas01cr@438 412 if(dbpointers->l2norm_data) {
mas01cr@438 413 delete [] dbpointers->l2norm_data;
mas01cr@434 414 }
mas01cr@438 415 if(dbpointers->power_data) {
mas01cr@438 416 delete [] dbpointers->power_data;
mas01cr@434 417 }
mas01cr@438 418 if(dbpointers->mean_duration) {
mas01cr@438 419 delete [] dbpointers->mean_duration;
mas01cr@434 420 }
mas01cr@437 421 if(times_table) {
mas01cr@437 422 free(times_table);
mas01cr@437 423 }
mas01cr@434 424 return 1;
mas01cr@434 425
mas01cr@239 426 }
mas01cr@239 427
mas01cr@463 428 int audiodb_query_queue_loop(adb_t *adb, adb_query_spec_t *spec, adb_qstate_internal_t *qstate, double *query, adb_qpointers_internal_t *qpointers) {
mas01cr@438 429 adb_qpointers_internal_t dbpointers = {0};
mas01mc@292 430
mas01cr@436 431 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@437 432 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@436 433
mas01cr@458 434 if(qstate->exact_evaluation_queue->size() == 0) {
mas01cr@463 435 return 0;
mas01cr@455 436 }
mas01mc@292 437
mas01cr@462 438 /* We are guaranteed that the order of points is sorted by:
mas01cr@462 439 * {trackID, spos, qpos} so we can be relatively efficient in
mas01cr@462 440 * initialization of track data. We assume that points usually
mas01cr@462 441 * don't overlap, so we will use exhaustive dot product evaluation
mas01cr@463 442 * (instead of memoization of partial sums, as in query_loop()).
mas01cr@463 443 */
mas01cr@462 444 double dist;
mas01cr@462 445 double *dbdata = 0, *dbdata_pointer;
mas01cr@462 446 Uns32T currentTrack = 0x80000000; // KLUDGE: Initialize with a value outside of track index range
mas01cr@462 447 Uns32T npairs = qstate->exact_evaluation_queue->size();
mas01cr@462 448 while(npairs--) {
mas01cr@462 449 PointPair pp = qstate->exact_evaluation_queue->top();
mas01cr@462 450 if(currentTrack != pp.trackID) {
mas01cr@462 451 SAFE_DELETE_ARRAY(dbdata);
mas01cr@462 452 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
mas01cr@462 453 SAFE_DELETE_ARRAY(dbpointers.power_data);
mas01cr@462 454 SAFE_DELETE_ARRAY(dbpointers.mean_duration);
mas01cr@462 455 currentTrack = pp.trackID;
mas01cr@462 456 adb_datum_t d = {0};
mas01cr@462 457 if(audiodb_track_id_datum(adb, pp.trackID, &d)) {
mas01cr@463 458 delete qstate->exact_evaluation_queue;
mas01cr@463 459 return 1;
mas01cr@462 460 }
mas01cr@462 461 if(audiodb_datum_qpointers(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers)) {
mas01cr@463 462 delete qstate->exact_evaluation_queue;
mas01cr@462 463 audiodb_free_datum(&d);
mas01cr@463 464 return 1;
mas01cr@462 465 }
mas01cr@462 466 audiodb_free_datum(&d);
mas01cr@434 467 }
mas01cr@468 468 Uns32T qPos = (spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) ? pp.qpos : 0;
mas01cr@462 469 Uns32T sPos = pp.spos; // index into l2norm table
mas01mc@324 470 // Test power thresholds before computing distance
mas01cr@438 471 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) &&
mas01cr@462 472 ( qPos<qpointers->nvectors-sequence_length+1 && sPos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){
mas01mc@324 473 // Compute distance
mas01cr@462 474 dist = audiodb_dot_product(query + qPos*adb->header->dim, dbdata + sPos*adb->header->dim, adb->header->dim*sequence_length);
mas01cr@437 475 double qn = qpointers->l2norm[qPos];
mas01cr@438 476 double sn = dbpointers.l2norm[sPos];
mas01cr@435 477 switch(spec->params.distance) {
mas01cr@431 478 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01mc@324 479 dist = 2 - (2/(qn*sn))*dist;
mas01cr@431 480 break;
mas01cr@431 481 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@431 482 dist = qn*qn + sn*sn - 2*dist;
mas01cr@431 483 break;
mas01cr@431 484 }
mas01cr@463 485 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@463 486 dist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@424 487 adb_result_t r;
mas01cr@453 488 r.key = (*adb->keys)[pp.trackID].c_str();
mas01cr@424 489 r.dist = dist;
mas01cr@424 490 r.qpos = pp.qpos;
mas01cr@424 491 r.ipos = pp.spos;
mas01cr@458 492 qstate->accumulator->add_point(&r);
mas01cr@424 493 }
mas01mc@292 494 }
mas01cr@458 495 qstate->exact_evaluation_queue->pop();
mas01mc@292 496 }
mas01mc@315 497 // Cleanup
mas01cr@462 498 SAFE_DELETE_ARRAY(dbdata);
mas01cr@438 499 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
mas01cr@438 500 SAFE_DELETE_ARRAY(dbpointers.power_data);
mas01cr@438 501 SAFE_DELETE_ARRAY(dbpointers.mean_duration);
mas01cr@458 502 delete qstate->exact_evaluation_queue;
mas01cr@463 503 return 0;
mas01mc@292 504 }
mas01mc@292 505
mas01cr@463 506 int audiodb_query_loop(adb_t *adb, adb_query_spec_t *spec, adb_qstate_internal_t *qstate) {
mas01cr@239 507
mas01cr@239 508 double *query, *query_data;
mas01cr@438 509 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0};
mas01cr@437 510
mas01cr@437 511 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@239 512
mas01cr@452 513 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
mas01cr@452 514 /* FIXME: actually it would be nice to support this mode of
mas01cr@452 515 * operation, but for now... */
mas01cr@452 516 return 1;
mas01cr@452 517 }
mas01mc@324 518
mas01cr@444 519 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
mas01cr@452 520 return 1;
mas01cr@444 521 }
mas01cr@239 522
mas01cr@448 523 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) {
mas01cr@452 524 return 1;
mas01cr@434 525 }
mas01cr@239 526
mas01cr@451 527 unsigned j,k,track,trackOffset=0, HOP_SIZE = spec->refine.hopsize;
mas01cr@435 528 unsigned wL = spec->qid.sequence_length;
mas01cr@239 529 double **D = 0; // Differences query and target
mas01cr@239 530 double **DD = 0; // Matched filter distance
mas01cr@239 531
mas01cr@437 532 D = new double*[qpointers.nvectors]; // pre-allocate
mas01cr@437 533 DD = new double*[qpointers.nvectors];
mas01cr@239 534
mas01cr@239 535 off_t trackIndexOffset;
mas01cr@239 536
mas01cr@239 537 // Track loop
mas01cr@239 538 size_t data_buffer_size = 0;
mas01cr@239 539 double *data_buffer = 0;
mas01cr@451 540 lseek(adb->fd, adb->header->dataOffset, SEEK_SET);
mas01cr@239 541
mas01cr@458 542 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
mas01cr@453 543 for(track = 0; track < adb->header->numFiles; track++) {
mas01cr@453 544 unsigned t = track;
mas01cr@458 545
mas01cr@458 546 while (qstate->allowed_keys->find((*adb->keys)[track]) == keys_end) {
mas01cr@453 547 track++;
mas01cr@453 548 if(track == adb->header->numFiles) {
mas01cr@453 549 goto loop_finish;
mas01cr@239 550 }
mas01cr@239 551 }
mas01cr@453 552 trackOffset = (*adb->track_offsets)[track];
mas01cr@453 553 if(track != t) {
mas01cr@453 554 lseek(adb->fd, adb->header->dataOffset + trackOffset, SEEK_SET);
mas01mc@292 555 }
mas01cr@451 556 trackIndexOffset = trackOffset / (adb->header->dim * sizeof(double)); // dbpointers.nvectors offset
mas01cr@239 557
mas01cr@452 558 if(audiodb_read_data(adb, adb->fd, track, &data_buffer, &data_buffer_size)) {
mas01cr@452 559 return 1;
mas01cr@452 560 }
mas01cr@451 561 if(wL <= (*adb->track_lengths)[track]) { // test for short sequences
mas01cr@239 562
mas01cr@439 563 audiodb_initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD);
mas01cr@239 564
mas01cr@437 565 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) ||
mas01cr@438 566 fabs(dbpointers.mean_duration[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) {
mas01cr@239 567
mas01cr@239 568 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@437 569 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) {
mas01cr@451 570 for(k = 0; k <= (*adb->track_lengths)[track] - wL; k += HOP_SIZE) {
mas01cr@431 571 double thisDist = 0;
mas01cr@438 572 double qn = qpointers.l2norm[j];
mas01cr@438 573 double sn = dbpointers.l2norm[trackIndexOffset + k];
mas01cr@435 574 switch(spec->params.distance) {
mas01cr@431 575 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@438 576 thisDist = 2-(2/(qn*sn))*DD[j][k];
mas01cr@431 577 break;
mas01cr@431 578 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@438 579 thisDist = qn*qn + sn*sn - 2*DD[j][k];
mas01cr@431 580 break;
mas01cr@431 581 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@431 582 thisDist = DD[j][k];
mas01cr@431 583 break;
mas01cr@431 584 }
mas01cr@239 585 // Power test
mas01cr@438 586 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], dbpointers.power[trackIndexOffset + k])) {
mas01cr@239 587 // radius test
mas01cr@435 588 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@435 589 thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@423 590 adb_result_t r;
mas01cr@453 591 r.key = (*adb->keys)[track].c_str();
mas01cr@423 592 r.dist = thisDist;
mas01cr@468 593 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
mas01cr@451 594 r.qpos = j;
mas01cr@451 595 } else {
mas01cr@451 596 r.qpos = spec->qid.sequence_start;
mas01cr@451 597 }
mas01cr@423 598 r.ipos = k;
mas01cr@458 599 qstate->accumulator->add_point(&r);
mas01cr@239 600 }
mas01cr@239 601 }
mas01cr@239 602 }
mas01cr@239 603 }
mas01cr@239 604 } // Duration match
mas01cr@437 605 audiodb_delete_arrays(track, qpointers.nvectors, D, DD);
mas01cr@239 606 }
mas01cr@239 607 }
mas01cr@239 608
mas01cr@453 609 loop_finish:
mas01cr@453 610
mas01cr@239 611 free(data_buffer);
mas01cr@239 612
mas01cr@239 613 // Clean up
mas01cr@239 614 if(query_data)
mas01cr@239 615 delete[] query_data;
mas01cr@437 616 if(qpointers.l2norm_data)
mas01cr@437 617 delete[] qpointers.l2norm_data;
mas01cr@437 618 if(qpointers.power_data)
mas01cr@437 619 delete[] qpointers.power_data;
mas01cr@437 620 if(qpointers.mean_duration)
mas01cr@437 621 delete[] qpointers.mean_duration;
mas01cr@438 622 if(dbpointers.power_data)
mas01cr@438 623 delete[] dbpointers.power_data;
mas01cr@438 624 if(dbpointers.l2norm_data)
mas01cr@438 625 delete[] dbpointers.l2norm_data;
mas01cr@239 626 if(D)
mas01cr@239 627 delete[] D;
mas01cr@239 628 if(DD)
mas01cr@239 629 delete[] DD;
mas01cr@438 630 if(dbpointers.mean_duration)
mas01cr@438 631 delete[] dbpointers.mean_duration;
mas01cr@452 632
mas01cr@452 633 return 0;
mas01cr@239 634 }