annotate query.cpp @ 473:b2fd8113d8bc api-inversion

const declarations for some API arguments. This should make it slightly clearer whose responsibility (the user's) it is to manage the memory pointed to by the corresponding arguments. Suggested by Chris Cannam.
author mas01cr
date Tue, 06 Jan 2009 16:27:01 +0000
parents 0f96ad351990
children
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@422 2 #include "audioDB-internals.h"
mas01cr@422 3 #include "accumulators.h"
mas01cr@422 4
mas01cr@473 5 bool audiodb_powers_acceptable(const adb_query_refine_t *r, double p1, double p2) {
mas01cr@425 6 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) {
mas01cr@425 7 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) {
mas01cr@239 8 return false;
mas01cr@239 9 }
mas01cr@239 10 }
mas01cr@425 11 if (r->flags & ADB_REFINE_RELATIVE_THRESHOLD) {
mas01cr@425 12 if (fabs(p1-p2) > fabs(r->relative_threshold)) {
mas01cr@239 13 return false;
mas01cr@239 14 }
mas01cr@239 15 }
mas01cr@239 16 return true;
mas01cr@239 17 }
mas01cr@239 18
mas01cr@473 19 adb_query_results_t *audiodb_query_spec(adb_t *adb, const adb_query_spec_t *qspec) {
mas01cr@469 20 adb_qstate_internal_t qstate = {0};
mas01cr@458 21 qstate.allowed_keys = new std::set<std::string>;
mas01cr@469 22 adb_query_results_t *results;
mas01cr@469 23 if(qspec->refine.flags & ADB_REFINE_INCLUDE_KEYLIST) {
mas01cr@469 24 for(unsigned int k = 0; k < qspec->refine.include.nkeys; k++) {
mas01cr@469 25 qstate.allowed_keys->insert(qspec->refine.include.keys[k]);
mas01cr@458 26 }
mas01cr@458 27 } else {
mas01cr@458 28 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@458 29 qstate.allowed_keys->insert((*adb->keys)[k]);
mas01cr@458 30 }
mas01cr@458 31 }
mas01cr@469 32 if(qspec->refine.flags & ADB_REFINE_EXCLUDE_KEYLIST) {
mas01cr@469 33 for(unsigned int k = 0; k < qspec->refine.exclude.nkeys; k++) {
mas01cr@469 34 qstate.allowed_keys->erase(qspec->refine.exclude.keys[k]);
mas01cr@458 35 }
mas01cr@458 36 }
mas01cr@431 37
mas01cr@469 38 switch(qspec->params.distance) {
mas01cr@431 39 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@469 40 switch(qspec->params.accumulation) {
mas01cr@431 41 case ADB_ACCUMULATION_DB:
mas01cr@469 42 qstate.accumulator = new DBAccumulator<adb_result_dist_gt>(qspec->params.npoints);
mas01cr@431 43 break;
mas01cr@431 44 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@469 45 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_gt>(qspec->params.npoints, qspec->params.ntracks);
mas01cr@431 46 break;
mas01cr@431 47 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@458 48 qstate.accumulator = new NearestAccumulator<adb_result_dist_gt>();
mas01cr@431 49 break;
mas01cr@431 50 default:
mas01cr@469 51 goto error;
mas01cr@239 52 }
mas01cr@239 53 break;
mas01cr@431 54 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@431 55 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@469 56 switch(qspec->params.accumulation) {
mas01cr@431 57 case ADB_ACCUMULATION_DB:
mas01cr@469 58 qstate.accumulator = new DBAccumulator<adb_result_dist_lt>(qspec->params.npoints);
mas01cr@431 59 break;
mas01cr@431 60 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@469 61 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_lt>(qspec->params.npoints, qspec->params.ntracks);
mas01cr@431 62 break;
mas01cr@431 63 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@458 64 qstate.accumulator = new NearestAccumulator<adb_result_dist_lt>();
mas01cr@431 65 break;
mas01cr@431 66 default:
mas01cr@469 67 goto error;
mas01mc@263 68 }
mas01mc@263 69 break;
mas01cr@239 70 default:
mas01cr@469 71 goto error;
mas01cr@431 72 }
mas01cr@431 73
mas01cr@469 74 if((qspec->refine.flags & ADB_REFINE_RADIUS) && audiodb_index_exists(adb->path, qspec->refine.radius, qspec->qid.sequence_length)) {
mas01cr@469 75 if(audiodb_index_query_loop(adb, qspec, &qstate) < 0) {
mas01cr@469 76 goto error;
mas01cr@466 77 }
mas01cr@466 78 } else {
mas01cr@469 79 if(audiodb_query_loop(adb, qspec, &qstate)) {
mas01cr@469 80 goto error;
mas01cr@452 81 }
mas01mc@329 82 }
mas01mc@292 83
mas01cr@469 84 results = qstate.accumulator->get_points();
mas01cr@458 85
mas01cr@458 86 delete qstate.accumulator;
mas01cr@458 87 delete qstate.allowed_keys;
mas01cr@458 88
mas01cr@469 89 return results;
mas01cr@458 90
mas01cr@469 91 error:
mas01cr@469 92 if(qstate.accumulator)
mas01cr@469 93 delete qstate.accumulator;
mas01cr@469 94 if(qstate.allowed_keys)
mas01cr@469 95 delete qstate.allowed_keys;
mas01cr@469 96 return NULL;
mas01cr@239 97 }
mas01cr@239 98
mas01cr@473 99 int audiodb_query_free_results(adb_t *adb, const adb_query_spec_t *spec, adb_query_results_t *rs) {
mas01cr@459 100 free(rs->results);
mas01cr@459 101 free(rs);
mas01cr@459 102 return 0;
mas01cr@459 103 }
mas01cr@459 104
mas01cr@473 105 static void audiodb_initialize_arrays(adb_t *adb, const adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
mas01cr@239 106 unsigned int j, k, l, w;
mas01cr@239 107 double *dp, *qp, *sp;
mas01cr@239 108
mas01cr@439 109 const unsigned HOP_SIZE = spec->refine.hopsize;
mas01cr@435 110 const unsigned wL = spec->qid.sequence_length;
mas01cr@239 111
mas01cr@239 112 for(j = 0; j < numVectors; j++) {
mas01cr@239 113 // Sum products matrix
mas01cr@433 114 D[j] = new double[(*adb->track_lengths)[track]];
mas01cr@239 115 assert(D[j]);
mas01cr@239 116 // Matched filter matrix
mas01cr@433 117 DD[j]=new double[(*adb->track_lengths)[track]];
mas01cr@239 118 assert(DD[j]);
mas01cr@239 119 }
mas01cr@239 120
mas01cr@239 121 // Dot product
mas01cr@239 122 for(j = 0; j < numVectors; j++)
mas01cr@433 123 for(k = 0; k < (*adb->track_lengths)[track]; k++){
mas01cr@439 124 qp = query + j * adb->header->dim;
mas01cr@439 125 sp = data_buffer + k * adb->header->dim;
mas01cr@239 126 DD[j][k] = 0.0; // Initialize matched filter array
mas01cr@239 127 dp = &D[j][k]; // point to correlation cell j,k
mas01cr@239 128 *dp = 0.0; // initialize correlation cell
mas01cr@439 129 l = adb->header->dim; // size of vectors
mas01cr@239 130 while(l--)
mas01cr@239 131 *dp += *qp++ * *sp++;
mas01cr@239 132 }
mas01cr@239 133
mas01cr@239 134 // Matched Filter
mas01cr@239 135 // HOP SIZE == 1
mas01cr@239 136 double* spd;
mas01cr@239 137 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop
mas01cr@239 138 for(w = 0; w < wL; w++) {
mas01cr@239 139 for(j = 0; j < numVectors - w; j++) {
mas01cr@239 140 sp = DD[j];
mas01cr@239 141 spd = D[j+w] + w;
mas01cr@433 142 k = (*adb->track_lengths)[track] - w;
mas01mc@292 143 while(k--)
mas01mc@292 144 *sp++ += *spd++;
mas01cr@239 145 }
mas01cr@239 146 }
mas01cr@239 147 } else { // HOP_SIZE != 1
mas01cr@239 148 for(w = 0; w < wL; w++) {
mas01cr@239 149 for(j = 0; j < numVectors - w; j += HOP_SIZE) {
mas01cr@239 150 sp = DD[j];
mas01cr@239 151 spd = D[j+w]+w;
mas01cr@433 152 for(k = 0; k < (*adb->track_lengths)[track] - w; k += HOP_SIZE) {
mas01cr@239 153 *sp += *spd;
mas01cr@239 154 sp += HOP_SIZE;
mas01cr@239 155 spd += HOP_SIZE;
mas01cr@239 156 }
mas01cr@239 157 }
mas01cr@239 158 }
mas01cr@239 159 }
mas01cr@239 160 }
mas01cr@239 161
mas01cr@433 162 static void audiodb_delete_arrays(int track, unsigned int numVectors, double **D, double **DD) {
mas01cr@239 163 if(D != NULL) {
mas01cr@239 164 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 165 delete[] D[j];
mas01cr@239 166 }
mas01cr@239 167 }
mas01cr@239 168 if(DD != NULL) {
mas01cr@239 169 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 170 delete[] DD[j];
mas01cr@239 171 }
mas01cr@239 172 }
mas01cr@239 173 }
mas01cr@239 174
mas01cr@433 175 int audiodb_read_data(adb_t *adb, int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) {
mas01cr@433 176 uint32_t track_length = (*adb->track_lengths)[track];
mas01cr@433 177 size_t track_size = track_length * sizeof(double) * adb->header->dim;
mas01cr@433 178 if (track_size > *data_buffer_size_p) {
mas01cr@239 179 if(*data_buffer_p) {
mas01cr@239 180 free(*data_buffer_p);
mas01cr@239 181 }
mas01cr@239 182 {
mas01cr@433 183 *data_buffer_size_p = track_size;
mas01cr@433 184 void *tmp = malloc(track_size);
mas01cr@239 185 if (tmp == NULL) {
mas01cr@433 186 goto error;
mas01cr@239 187 }
mas01cr@239 188 *data_buffer_p = (double *) tmp;
mas01cr@239 189 }
mas01cr@239 190 }
mas01cr@239 191
mas01cr@433 192 read_or_goto_error(trkfid, *data_buffer_p, track_size);
mas01cr@433 193 return 0;
mas01cr@433 194
mas01cr@433 195 error:
mas01cr@433 196 return 1;
mas01cr@239 197 }
mas01cr@239 198
mas01cr@461 199 int audiodb_track_id_datum(adb_t *adb, uint32_t track_id, adb_datum_t *d) {
mas01cr@461 200 off_t track_offset = (*adb->track_offsets)[track_id];
mas01cr@461 201 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
mas01cr@461 202 /* create a reference/insert, then use adb_insert_create_datum() */
mas01cr@461 203 adb_reference_t reference = {0};
mas01cr@461 204 char features[MAXSTR], power[MAXSTR], times[MAXSTR];
mas01cr@461 205 lseek(adb->fd, adb->header->dataOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@472 206 read_or_goto_error(adb->fd, features, MAXSTR);
mas01cr@461 207 reference.features = features;
mas01cr@461 208 if(adb->header->flags & O2_FLAG_POWER) {
mas01cr@461 209 lseek(adb->fd, adb->header->powerTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@472 210 read_or_goto_error(adb->fd, power, MAXSTR);
mas01cr@461 211 reference.power = power;
mas01cr@461 212 }
mas01cr@461 213 if(adb->header->flags & O2_FLAG_TIMES) {
mas01cr@461 214 lseek(adb->fd, adb->header->timesTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@472 215 read_or_goto_error(adb->fd, times, MAXSTR);
mas01cr@461 216 reference.times = times;
mas01cr@461 217 }
mas01cr@472 218 return audiodb_insert_create_datum(&reference, d);
mas01cr@461 219 } else {
mas01cr@461 220 /* initialize from sources of data that we already have */
mas01cr@461 221 d->nvectors = (*adb->track_lengths)[track_id];
mas01cr@461 222 d->dim = adb->header->dim;
mas01cr@461 223 d->key = (*adb->keys)[track_id].c_str();
mas01cr@461 224 /* read out stuff from the database tables */
mas01cr@461 225 d->data = (double *) malloc(d->nvectors * d->dim * sizeof(double));
mas01cr@461 226 lseek(adb->fd, adb->header->dataOffset + track_offset, SEEK_SET);
mas01cr@472 227 read_or_goto_error(adb->fd, d->data, d->nvectors * d->dim * sizeof(double));
mas01cr@461 228 if(adb->header->flags & O2_FLAG_POWER) {
mas01cr@461 229 d->power = (double *) malloc(d->nvectors * sizeof(double));
mas01cr@461 230 lseek(adb->fd, adb->header->powerTableOffset + track_offset / d->dim, SEEK_SET);
mas01cr@472 231 read_or_goto_error(adb->fd, d->power, d->nvectors * sizeof(double));
mas01cr@461 232 }
mas01cr@461 233 if(adb->header->flags & O2_FLAG_TIMES) {
mas01cr@461 234 d->times = (double *) malloc(2 * d->nvectors * sizeof(double));
mas01cr@461 235 lseek(adb->fd, adb->header->timesTableOffset + track_offset / d->dim, SEEK_SET);
mas01cr@472 236 read_or_goto_error(adb->fd, d->times, 2 * d->nvectors * sizeof(double));
mas01cr@461 237 }
mas01cr@472 238 return 0;
mas01cr@461 239 }
mas01cr@472 240 error:
mas01cr@472 241 audiodb_free_datum(d);
mas01cr@472 242 return 1;
mas01cr@461 243 }
mas01cr@461 244
mas01cr@461 245 int audiodb_datum_qpointers(adb_datum_t *d, uint32_t sequence_length, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
mas01cr@461 246 uint32_t nvectors = d->nvectors;
mas01cr@461 247
mas01cr@461 248 qpointers->nvectors = nvectors;
mas01cr@461 249
mas01cr@461 250 size_t vector_size = nvectors * sizeof(double) * d->dim;
mas01cr@461 251 *vector_data = new double[vector_size];
mas01cr@461 252 memcpy(*vector_data, d->data, vector_size);
mas01cr@461 253
mas01cr@461 254 qpointers->l2norm_data = new double[vector_size / d->dim];
mas01cr@461 255 audiodb_l2norm_buffer(*vector_data, d->dim, nvectors, qpointers->l2norm_data);
mas01cr@461 256 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@461 257 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@461 258
mas01cr@461 259 if(d->power) {
mas01cr@461 260 qpointers->power_data = new double[vector_size / d->dim];
mas01cr@461 261 memcpy(qpointers->power_data, d->power, vector_size / d->dim);
mas01cr@461 262 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
mas01cr@461 263 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
mas01cr@461 264 }
mas01cr@461 265
mas01cr@461 266 if(d->times) {
mas01cr@461 267 qpointers->mean_duration = new double[1];
mas01cr@461 268 *qpointers->mean_duration = 0;
mas01cr@461 269 for(unsigned int k = 0; k < nvectors; k++) {
mas01cr@461 270 *qpointers->mean_duration += d->times[2*k+1] - d->times[2*k];
mas01cr@461 271 }
mas01cr@461 272 *qpointers->mean_duration /= nvectors;
mas01cr@461 273 }
mas01cr@461 274
mas01cr@461 275 *vector = *vector_data;
mas01cr@461 276 qpointers->l2norm = qpointers->l2norm_data;
mas01cr@461 277 qpointers->power = qpointers->power_data;
mas01cr@461 278 return 0;
mas01cr@461 279 }
mas01cr@461 280
mas01cr@473 281 int audiodb_query_spec_qpointers(adb_t *adb, const adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
mas01cr@443 282 adb_datum_t *datum;
mas01cr@443 283 adb_datum_t d = {0};
mas01cr@443 284 uint32_t sequence_length;
mas01cr@443 285 uint32_t sequence_start;
mas01cr@443 286
mas01cr@443 287 datum = spec->qid.datum;
mas01cr@443 288 sequence_length = spec->qid.sequence_length;
mas01cr@443 289 sequence_start = spec->qid.sequence_start;
mas01cr@443 290
mas01cr@443 291 if(datum->data) {
mas01cr@443 292 if(datum->dim != adb->header->dim) {
mas01cr@443 293 return 1;
mas01cr@443 294 }
mas01cr@443 295 /* initialize d, and mark that nothing needs freeing later. */
mas01cr@443 296 d = *datum;
mas01cr@443 297 datum = &d;
mas01cr@443 298 } else if (datum->key) {
mas01cr@449 299 uint32_t track_id;
mas01cr@449 300 if((track_id = audiodb_key_index(adb, datum->key)) == (uint32_t) -1) {
mas01cr@443 301 return 1;
mas01cr@443 302 }
mas01cr@461 303 audiodb_track_id_datum(adb, track_id, &d);
mas01cr@443 304 } else {
mas01cr@443 305 return 1;
mas01cr@443 306 }
mas01cr@443 307
mas01cr@444 308 /* FIXME: check the overflow logic here */
mas01cr@461 309 if(sequence_start + sequence_length > d.nvectors) {
mas01cr@461 310 if(datum != &d) {
mas01cr@461 311 audiodb_free_datum(&d);
mas01cr@461 312 }
mas01cr@443 313 return 1;
mas01cr@443 314 }
mas01cr@443 315
mas01cr@461 316 audiodb_datum_qpointers(&d, sequence_length, vector_data, vector, qpointers);
mas01cr@443 317
mas01cr@461 318 /* Finally, if applicable, set up the moving qpointers. */
mas01cr@468 319 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
mas01cr@461 320 /* the qpointers are already at the start, and so correct. */
mas01cr@443 321 } else {
mas01cr@461 322 /* adjust the qpointers to point to the correct place in the sequence */
mas01cr@443 323 *vector = *vector_data + spec->qid.sequence_start * d.dim;
mas01cr@443 324 qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start;
mas01cr@444 325 if(d.power) {
mas01cr@444 326 qpointers->power = qpointers->power_data + spec->qid.sequence_start;
mas01cr@444 327 }
mas01cr@444 328 qpointers->nvectors = sequence_length;
mas01cr@443 329 }
mas01cr@443 330
mas01cr@443 331 /* Clean up: free any bits of datum that we have ourselves
mas01cr@443 332 * allocated. */
mas01cr@443 333 if(datum != &d) {
mas01cr@443 334 audiodb_free_datum(&d);
mas01cr@443 335 }
mas01cr@444 336
mas01cr@444 337 return 0;
mas01cr@443 338 }
mas01cr@443 339
mas01cr@473 340 static int audiodb_set_up_dbpointers(adb_t *adb, const adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) {
mas01cr@438 341 uint32_t nvectors = adb->header->length / (adb->header->dim * sizeof(double));
mas01cr@435 342 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@438 343
mas01cr@437 344 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@437 345 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO;
mas01cr@437 346 double *times_table = NULL;
mas01cr@435 347
mas01cr@239 348
mas01cr@438 349 dbpointers->nvectors = nvectors;
mas01cr@438 350 dbpointers->l2norm_data = new double[nvectors];
mas01cr@438 351
mas01cr@438 352 double *snpp = dbpointers->l2norm_data, *sppp = 0;
mas01cr@434 353 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET);
mas01cr@438 354 read_or_goto_error(adb->fd, dbpointers->l2norm_data, nvectors * sizeof(double));
mas01cr@239 355
mas01cr@437 356 if (using_power) {
mas01cr@434 357 if (!(adb->header->flags & O2_FLAG_POWER)) {
mas01cr@434 358 goto error;
mas01cr@239 359 }
mas01cr@438 360 dbpointers->power_data = new double[nvectors];
mas01cr@438 361 sppp = dbpointers->power_data;
mas01cr@434 362 lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET);
mas01cr@438 363 read_or_goto_error(adb->fd, dbpointers->power_data, nvectors * sizeof(double));
mas01cr@239 364 }
mas01cr@239 365
mas01cr@434 366 for(unsigned int i = 0; i < adb->header->numFiles; i++){
mas01cr@434 367 size_t track_length = (*adb->track_lengths)[i];
mas01cr@435 368 if(track_length >= sequence_length) {
mas01cr@435 369 audiodb_sequence_sum(snpp, track_length, sequence_length);
mas01cr@435 370 audiodb_sequence_sqrt(snpp, track_length, sequence_length);
mas01cr@437 371 if (using_power) {
mas01cr@435 372 audiodb_sequence_sum(sppp, track_length, sequence_length);
mas01cr@435 373 audiodb_sequence_average(sppp, track_length, sequence_length);
mas01cr@239 374 }
mas01cr@239 375 }
mas01cr@434 376 snpp += track_length;
mas01cr@437 377 if (using_power) {
mas01cr@434 378 sppp += track_length;
mas01cr@239 379 }
mas01cr@239 380 }
mas01cr@239 381
mas01cr@437 382 if (using_times) {
mas01cr@434 383 if(!(adb->header->flags & O2_FLAG_TIMES)) {
mas01cr@437 384 goto error;
mas01cr@239 385 }
mas01cr@239 386
mas01cr@438 387 dbpointers->mean_duration = new double[adb->header->numFiles];
mas01cr@239 388
mas01cr@438 389 times_table = (double *) malloc(2 * nvectors * sizeof(double));
mas01cr@437 390 if(!times_table) {
mas01cr@437 391 goto error;
mas01cr@437 392 }
mas01cr@437 393 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET);
mas01cr@438 394 read_or_goto_error(adb->fd, times_table, 2 * nvectors * sizeof(double));
mas01cr@434 395 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@434 396 size_t track_length = (*adb->track_lengths)[k];
mas01cr@239 397 unsigned int j;
mas01cr@438 398 dbpointers->mean_duration[k] = 0.0;
mas01cr@434 399 for(j = 0; j < track_length; j++) {
mas01cr@438 400 dbpointers->mean_duration[k] += times_table[2*j+1] - times_table[2*j];
mas01cr@239 401 }
mas01cr@438 402 dbpointers->mean_duration[k] /= j;
mas01cr@239 403 }
mas01cr@437 404
mas01cr@437 405 free(times_table);
mas01cr@437 406 times_table = NULL;
mas01cr@239 407 }
mas01cr@239 408
mas01cr@438 409 dbpointers->l2norm = dbpointers->l2norm_data;
mas01cr@438 410 dbpointers->power = dbpointers->power_data;
mas01cr@434 411 return 0;
mas01cr@434 412
mas01cr@434 413 error:
mas01cr@438 414 if(dbpointers->l2norm_data) {
mas01cr@438 415 delete [] dbpointers->l2norm_data;
mas01cr@434 416 }
mas01cr@438 417 if(dbpointers->power_data) {
mas01cr@438 418 delete [] dbpointers->power_data;
mas01cr@434 419 }
mas01cr@438 420 if(dbpointers->mean_duration) {
mas01cr@438 421 delete [] dbpointers->mean_duration;
mas01cr@434 422 }
mas01cr@437 423 if(times_table) {
mas01cr@437 424 free(times_table);
mas01cr@437 425 }
mas01cr@434 426 return 1;
mas01cr@434 427
mas01cr@239 428 }
mas01cr@239 429
mas01cr@473 430 int audiodb_query_queue_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate, double *query, adb_qpointers_internal_t *qpointers) {
mas01cr@438 431 adb_qpointers_internal_t dbpointers = {0};
mas01mc@292 432
mas01cr@436 433 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@437 434 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@436 435
mas01cr@458 436 if(qstate->exact_evaluation_queue->size() == 0) {
mas01cr@463 437 return 0;
mas01cr@455 438 }
mas01mc@292 439
mas01cr@462 440 /* We are guaranteed that the order of points is sorted by:
mas01cr@462 441 * {trackID, spos, qpos} so we can be relatively efficient in
mas01cr@462 442 * initialization of track data. We assume that points usually
mas01cr@462 443 * don't overlap, so we will use exhaustive dot product evaluation
mas01cr@463 444 * (instead of memoization of partial sums, as in query_loop()).
mas01cr@463 445 */
mas01cr@462 446 double dist;
mas01cr@462 447 double *dbdata = 0, *dbdata_pointer;
mas01cr@462 448 Uns32T currentTrack = 0x80000000; // KLUDGE: Initialize with a value outside of track index range
mas01cr@462 449 Uns32T npairs = qstate->exact_evaluation_queue->size();
mas01cr@462 450 while(npairs--) {
mas01cr@462 451 PointPair pp = qstate->exact_evaluation_queue->top();
mas01cr@462 452 if(currentTrack != pp.trackID) {
mas01cr@462 453 SAFE_DELETE_ARRAY(dbdata);
mas01cr@462 454 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
mas01cr@462 455 SAFE_DELETE_ARRAY(dbpointers.power_data);
mas01cr@462 456 SAFE_DELETE_ARRAY(dbpointers.mean_duration);
mas01cr@462 457 currentTrack = pp.trackID;
mas01cr@462 458 adb_datum_t d = {0};
mas01cr@462 459 if(audiodb_track_id_datum(adb, pp.trackID, &d)) {
mas01cr@463 460 delete qstate->exact_evaluation_queue;
mas01cr@463 461 return 1;
mas01cr@462 462 }
mas01cr@462 463 if(audiodb_datum_qpointers(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers)) {
mas01cr@463 464 delete qstate->exact_evaluation_queue;
mas01cr@462 465 audiodb_free_datum(&d);
mas01cr@463 466 return 1;
mas01cr@462 467 }
mas01cr@462 468 audiodb_free_datum(&d);
mas01cr@434 469 }
mas01cr@468 470 Uns32T qPos = (spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) ? pp.qpos : 0;
mas01cr@462 471 Uns32T sPos = pp.spos; // index into l2norm table
mas01mc@324 472 // Test power thresholds before computing distance
mas01cr@438 473 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) &&
mas01cr@462 474 ( qPos<qpointers->nvectors-sequence_length+1 && sPos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){
mas01mc@324 475 // Compute distance
mas01cr@462 476 dist = audiodb_dot_product(query + qPos*adb->header->dim, dbdata + sPos*adb->header->dim, adb->header->dim*sequence_length);
mas01cr@437 477 double qn = qpointers->l2norm[qPos];
mas01cr@438 478 double sn = dbpointers.l2norm[sPos];
mas01cr@435 479 switch(spec->params.distance) {
mas01cr@431 480 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01mc@324 481 dist = 2 - (2/(qn*sn))*dist;
mas01cr@431 482 break;
mas01cr@431 483 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@431 484 dist = qn*qn + sn*sn - 2*dist;
mas01cr@431 485 break;
mas01cr@431 486 }
mas01cr@463 487 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@463 488 dist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@424 489 adb_result_t r;
mas01cr@453 490 r.key = (*adb->keys)[pp.trackID].c_str();
mas01cr@424 491 r.dist = dist;
mas01cr@424 492 r.qpos = pp.qpos;
mas01cr@424 493 r.ipos = pp.spos;
mas01cr@458 494 qstate->accumulator->add_point(&r);
mas01cr@424 495 }
mas01mc@292 496 }
mas01cr@458 497 qstate->exact_evaluation_queue->pop();
mas01mc@292 498 }
mas01mc@315 499 // Cleanup
mas01cr@462 500 SAFE_DELETE_ARRAY(dbdata);
mas01cr@438 501 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
mas01cr@438 502 SAFE_DELETE_ARRAY(dbpointers.power_data);
mas01cr@438 503 SAFE_DELETE_ARRAY(dbpointers.mean_duration);
mas01cr@458 504 delete qstate->exact_evaluation_queue;
mas01cr@463 505 return 0;
mas01mc@292 506 }
mas01mc@292 507
mas01cr@473 508 int audiodb_query_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) {
mas01cr@239 509
mas01cr@239 510 double *query, *query_data;
mas01cr@438 511 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0};
mas01cr@437 512
mas01cr@437 513 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@239 514
mas01cr@452 515 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
mas01cr@452 516 /* FIXME: actually it would be nice to support this mode of
mas01cr@452 517 * operation, but for now... */
mas01cr@452 518 return 1;
mas01cr@452 519 }
mas01mc@324 520
mas01cr@444 521 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
mas01cr@452 522 return 1;
mas01cr@444 523 }
mas01cr@239 524
mas01cr@448 525 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) {
mas01cr@452 526 return 1;
mas01cr@434 527 }
mas01cr@239 528
mas01cr@451 529 unsigned j,k,track,trackOffset=0, HOP_SIZE = spec->refine.hopsize;
mas01cr@435 530 unsigned wL = spec->qid.sequence_length;
mas01cr@239 531 double **D = 0; // Differences query and target
mas01cr@239 532 double **DD = 0; // Matched filter distance
mas01cr@239 533
mas01cr@437 534 D = new double*[qpointers.nvectors]; // pre-allocate
mas01cr@437 535 DD = new double*[qpointers.nvectors];
mas01cr@239 536
mas01cr@239 537 off_t trackIndexOffset;
mas01cr@239 538
mas01cr@239 539 // Track loop
mas01cr@239 540 size_t data_buffer_size = 0;
mas01cr@239 541 double *data_buffer = 0;
mas01cr@451 542 lseek(adb->fd, adb->header->dataOffset, SEEK_SET);
mas01cr@239 543
mas01cr@458 544 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
mas01cr@453 545 for(track = 0; track < adb->header->numFiles; track++) {
mas01cr@453 546 unsigned t = track;
mas01cr@458 547
mas01cr@458 548 while (qstate->allowed_keys->find((*adb->keys)[track]) == keys_end) {
mas01cr@453 549 track++;
mas01cr@453 550 if(track == adb->header->numFiles) {
mas01cr@453 551 goto loop_finish;
mas01cr@239 552 }
mas01cr@239 553 }
mas01cr@453 554 trackOffset = (*adb->track_offsets)[track];
mas01cr@453 555 if(track != t) {
mas01cr@453 556 lseek(adb->fd, adb->header->dataOffset + trackOffset, SEEK_SET);
mas01mc@292 557 }
mas01cr@451 558 trackIndexOffset = trackOffset / (adb->header->dim * sizeof(double)); // dbpointers.nvectors offset
mas01cr@239 559
mas01cr@452 560 if(audiodb_read_data(adb, adb->fd, track, &data_buffer, &data_buffer_size)) {
mas01cr@452 561 return 1;
mas01cr@452 562 }
mas01cr@451 563 if(wL <= (*adb->track_lengths)[track]) { // test for short sequences
mas01cr@239 564
mas01cr@439 565 audiodb_initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD);
mas01cr@239 566
mas01cr@437 567 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) ||
mas01cr@438 568 fabs(dbpointers.mean_duration[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) {
mas01cr@239 569
mas01cr@239 570 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@437 571 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) {
mas01cr@451 572 for(k = 0; k <= (*adb->track_lengths)[track] - wL; k += HOP_SIZE) {
mas01cr@431 573 double thisDist = 0;
mas01cr@438 574 double qn = qpointers.l2norm[j];
mas01cr@438 575 double sn = dbpointers.l2norm[trackIndexOffset + k];
mas01cr@435 576 switch(spec->params.distance) {
mas01cr@431 577 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@438 578 thisDist = 2-(2/(qn*sn))*DD[j][k];
mas01cr@431 579 break;
mas01cr@431 580 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@438 581 thisDist = qn*qn + sn*sn - 2*DD[j][k];
mas01cr@431 582 break;
mas01cr@431 583 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@431 584 thisDist = DD[j][k];
mas01cr@431 585 break;
mas01cr@431 586 }
mas01cr@239 587 // Power test
mas01cr@438 588 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], dbpointers.power[trackIndexOffset + k])) {
mas01cr@239 589 // radius test
mas01cr@435 590 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@435 591 thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@423 592 adb_result_t r;
mas01cr@453 593 r.key = (*adb->keys)[track].c_str();
mas01cr@423 594 r.dist = thisDist;
mas01cr@468 595 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
mas01cr@451 596 r.qpos = j;
mas01cr@451 597 } else {
mas01cr@451 598 r.qpos = spec->qid.sequence_start;
mas01cr@451 599 }
mas01cr@423 600 r.ipos = k;
mas01cr@458 601 qstate->accumulator->add_point(&r);
mas01cr@239 602 }
mas01cr@239 603 }
mas01cr@239 604 }
mas01cr@239 605 }
mas01cr@239 606 } // Duration match
mas01cr@437 607 audiodb_delete_arrays(track, qpointers.nvectors, D, DD);
mas01cr@239 608 }
mas01cr@239 609 }
mas01cr@239 610
mas01cr@453 611 loop_finish:
mas01cr@453 612
mas01cr@239 613 free(data_buffer);
mas01cr@239 614
mas01cr@239 615 // Clean up
mas01cr@239 616 if(query_data)
mas01cr@239 617 delete[] query_data;
mas01cr@437 618 if(qpointers.l2norm_data)
mas01cr@437 619 delete[] qpointers.l2norm_data;
mas01cr@437 620 if(qpointers.power_data)
mas01cr@437 621 delete[] qpointers.power_data;
mas01cr@437 622 if(qpointers.mean_duration)
mas01cr@437 623 delete[] qpointers.mean_duration;
mas01cr@438 624 if(dbpointers.power_data)
mas01cr@438 625 delete[] dbpointers.power_data;
mas01cr@438 626 if(dbpointers.l2norm_data)
mas01cr@438 627 delete[] dbpointers.l2norm_data;
mas01cr@239 628 if(D)
mas01cr@239 629 delete[] D;
mas01cr@239 630 if(DD)
mas01cr@239 631 delete[] DD;
mas01cr@438 632 if(dbpointers.mean_duration)
mas01cr@438 633 delete[] dbpointers.mean_duration;
mas01cr@452 634
mas01cr@452 635 return 0;
mas01cr@239 636 }