annotate query.cpp @ 507:e7fd50483311

Free bits of the datum constructed in audioDB::query. We're not quite safe: error calls between allocation of some of these bits and pieces and their use will cause failure... but not freeing things here is definitely wrong.
author mas01cr
date Tue, 13 Jan 2009 21:37:10 +0000
parents 342822c2d49a
children cc2b97d020b1
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@498 2 #include "audioDB-internals.h"
mas01cr@498 3 #include "accumulators.h"
mas01cr@239 4
mas01cr@498 5 bool audiodb_powers_acceptable(const adb_query_refine_t *r, double p1, double p2) {
mas01cr@498 6 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) {
mas01cr@498 7 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) {
mas01cr@239 8 return false;
mas01cr@239 9 }
mas01cr@239 10 }
mas01cr@498 11 if (r->flags & ADB_REFINE_RELATIVE_THRESHOLD) {
mas01cr@498 12 if (fabs(p1-p2) > fabs(r->relative_threshold)) {
mas01cr@239 13 return false;
mas01cr@239 14 }
mas01cr@239 15 }
mas01cr@239 16 return true;
mas01cr@239 17 }
mas01cr@239 18
mas01cr@498 19 adb_query_results_t *audiodb_query_spec(adb_t *adb, const adb_query_spec_t *qspec) {
mas01cr@498 20 adb_qstate_internal_t qstate = {0};
mas01cr@498 21 qstate.allowed_keys = new std::set<std::string>;
mas01cr@498 22 adb_query_results_t *results;
mas01cr@498 23 if(qspec->refine.flags & ADB_REFINE_INCLUDE_KEYLIST) {
mas01cr@498 24 for(unsigned int k = 0; k < qspec->refine.include.nkeys; k++) {
mas01cr@498 25 qstate.allowed_keys->insert(qspec->refine.include.keys[k]);
mas01cr@498 26 }
mas01cr@498 27 } else {
mas01cr@498 28 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@498 29 qstate.allowed_keys->insert((*adb->keys)[k]);
mas01cr@498 30 }
mas01cr@498 31 }
mas01cr@498 32 if(qspec->refine.flags & ADB_REFINE_EXCLUDE_KEYLIST) {
mas01cr@498 33 for(unsigned int k = 0; k < qspec->refine.exclude.nkeys; k++) {
mas01cr@498 34 qstate.allowed_keys->erase(qspec->refine.exclude.keys[k]);
mas01cr@498 35 }
mas01cr@498 36 }
mas01mc@292 37
mas01cr@498 38 switch(qspec->params.distance) {
mas01cr@498 39 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@498 40 switch(qspec->params.accumulation) {
mas01cr@498 41 case ADB_ACCUMULATION_DB:
mas01cr@498 42 qstate.accumulator = new DBAccumulator<adb_result_dist_gt>(qspec->params.npoints);
mas01cr@498 43 break;
mas01cr@498 44 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@498 45 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_gt>(qspec->params.npoints, qspec->params.ntracks);
mas01cr@498 46 break;
mas01cr@498 47 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@498 48 qstate.accumulator = new NearestAccumulator<adb_result_dist_gt>();
mas01cr@498 49 break;
mas01cr@498 50 default:
mas01cr@498 51 goto error;
mas01cr@239 52 }
mas01cr@239 53 break;
mas01cr@498 54 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@498 55 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@498 56 switch(qspec->params.accumulation) {
mas01cr@498 57 case ADB_ACCUMULATION_DB:
mas01cr@498 58 qstate.accumulator = new DBAccumulator<adb_result_dist_lt>(qspec->params.npoints);
mas01cr@498 59 break;
mas01cr@498 60 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@498 61 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_lt>(qspec->params.npoints, qspec->params.ntracks);
mas01cr@498 62 break;
mas01cr@498 63 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@498 64 qstate.accumulator = new NearestAccumulator<adb_result_dist_lt>();
mas01cr@498 65 break;
mas01cr@498 66 default:
mas01cr@498 67 goto error;
mas01mc@263 68 }
mas01mc@263 69 break;
mas01cr@239 70 default:
mas01cr@498 71 goto error;
mas01mc@329 72 }
mas01cr@498 73
mas01cr@498 74 if((qspec->refine.flags & ADB_REFINE_RADIUS) && audiodb_index_exists(adb->path, qspec->refine.radius, qspec->qid.sequence_length)) {
mas01cr@498 75 if(audiodb_index_query_loop(adb, qspec, &qstate) < 0) {
mas01cr@498 76 goto error;
mas01cr@498 77 }
mas01cr@498 78 } else {
mas01cr@498 79 if(audiodb_query_loop(adb, qspec, &qstate)) {
mas01cr@498 80 goto error;
mas01cr@498 81 }
mas01mc@329 82 }
mas01mc@292 83
mas01cr@498 84 results = qstate.accumulator->get_points();
mas01cr@498 85
mas01cr@498 86 delete qstate.accumulator;
mas01cr@498 87 delete qstate.allowed_keys;
mas01cr@498 88
mas01cr@498 89 return results;
mas01cr@498 90
mas01cr@498 91 error:
mas01cr@498 92 if(qstate.accumulator)
mas01cr@498 93 delete qstate.accumulator;
mas01cr@498 94 if(qstate.allowed_keys)
mas01cr@498 95 delete qstate.allowed_keys;
mas01cr@498 96 return NULL;
mas01cr@239 97 }
mas01cr@239 98
mas01cr@498 99 int audiodb_query_free_results(adb_t *adb, const adb_query_spec_t *spec, adb_query_results_t *rs) {
mas01cr@498 100 free(rs->results);
mas01cr@498 101 free(rs);
mas01cr@498 102 return 0;
mas01cr@239 103 }
mas01cr@239 104
mas01cr@498 105 static void audiodb_initialize_arrays(adb_t *adb, const adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
mas01cr@239 106 unsigned int j, k, l, w;
mas01cr@239 107 double *dp, *qp, *sp;
mas01cr@239 108
mas01cr@498 109 const unsigned HOP_SIZE = spec->refine.hopsize;
mas01cr@498 110 const unsigned wL = spec->qid.sequence_length;
mas01cr@239 111
mas01cr@239 112 for(j = 0; j < numVectors; j++) {
mas01cr@239 113 // Sum products matrix
mas01cr@498 114 D[j] = new double[(*adb->track_lengths)[track]];
mas01cr@239 115 assert(D[j]);
mas01cr@239 116 // Matched filter matrix
mas01cr@498 117 DD[j]=new double[(*adb->track_lengths)[track]];
mas01cr@239 118 assert(DD[j]);
mas01cr@239 119 }
mas01cr@239 120
mas01cr@239 121 // Dot product
mas01cr@239 122 for(j = 0; j < numVectors; j++)
mas01cr@498 123 for(k = 0; k < (*adb->track_lengths)[track]; k++){
mas01cr@498 124 qp = query + j * adb->header->dim;
mas01cr@498 125 sp = data_buffer + k * adb->header->dim;
mas01cr@239 126 DD[j][k] = 0.0; // Initialize matched filter array
mas01cr@239 127 dp = &D[j][k]; // point to correlation cell j,k
mas01cr@239 128 *dp = 0.0; // initialize correlation cell
mas01cr@498 129 l = adb->header->dim; // size of vectors
mas01cr@239 130 while(l--)
mas01cr@239 131 *dp += *qp++ * *sp++;
mas01cr@239 132 }
mas01cr@239 133
mas01cr@239 134 // Matched Filter
mas01cr@239 135 // HOP SIZE == 1
mas01cr@239 136 double* spd;
mas01cr@239 137 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop
mas01cr@239 138 for(w = 0; w < wL; w++) {
mas01cr@239 139 for(j = 0; j < numVectors - w; j++) {
mas01cr@239 140 sp = DD[j];
mas01cr@239 141 spd = D[j+w] + w;
mas01cr@498 142 k = (*adb->track_lengths)[track] - w;
mas01mc@292 143 while(k--)
mas01mc@292 144 *sp++ += *spd++;
mas01cr@239 145 }
mas01cr@239 146 }
mas01cr@239 147 } else { // HOP_SIZE != 1
mas01cr@239 148 for(w = 0; w < wL; w++) {
mas01cr@239 149 for(j = 0; j < numVectors - w; j += HOP_SIZE) {
mas01cr@239 150 sp = DD[j];
mas01cr@239 151 spd = D[j+w]+w;
mas01cr@498 152 for(k = 0; k < (*adb->track_lengths)[track] - w; k += HOP_SIZE) {
mas01cr@239 153 *sp += *spd;
mas01cr@239 154 sp += HOP_SIZE;
mas01cr@239 155 spd += HOP_SIZE;
mas01cr@239 156 }
mas01cr@239 157 }
mas01cr@239 158 }
mas01cr@239 159 }
mas01cr@239 160 }
mas01cr@239 161
mas01cr@498 162 static void audiodb_delete_arrays(int track, unsigned int numVectors, double **D, double **DD) {
mas01cr@239 163 if(D != NULL) {
mas01cr@239 164 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 165 delete[] D[j];
mas01cr@239 166 }
mas01cr@239 167 }
mas01cr@239 168 if(DD != NULL) {
mas01cr@239 169 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 170 delete[] DD[j];
mas01cr@239 171 }
mas01cr@239 172 }
mas01cr@239 173 }
mas01cr@239 174
mas01cr@498 175 int audiodb_read_data(adb_t *adb, int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) {
mas01cr@498 176 uint32_t track_length = (*adb->track_lengths)[track];
mas01cr@498 177 size_t track_size = track_length * sizeof(double) * adb->header->dim;
mas01cr@498 178 if (track_size > *data_buffer_size_p) {
mas01cr@239 179 if(*data_buffer_p) {
mas01cr@239 180 free(*data_buffer_p);
mas01cr@239 181 }
mas01cr@239 182 {
mas01cr@498 183 *data_buffer_size_p = track_size;
mas01cr@498 184 void *tmp = malloc(track_size);
mas01cr@239 185 if (tmp == NULL) {
mas01cr@498 186 goto error;
mas01cr@239 187 }
mas01cr@239 188 *data_buffer_p = (double *) tmp;
mas01cr@239 189 }
mas01cr@239 190 }
mas01cr@239 191
mas01cr@498 192 read_or_goto_error(trkfid, *data_buffer_p, track_size);
mas01cr@498 193 return 0;
mas01cr@498 194
mas01cr@498 195 error:
mas01cr@498 196 return 1;
mas01cr@239 197 }
mas01cr@239 198
mas01cr@498 199 int audiodb_track_id_datum(adb_t *adb, uint32_t track_id, adb_datum_t *d) {
mas01cr@498 200 off_t track_offset = (*adb->track_offsets)[track_id];
mas01cr@498 201 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
mas01cr@498 202 /* create a reference/insert, then use adb_insert_create_datum() */
mas01cr@498 203 adb_reference_t reference = {0};
mas01cr@498 204 char features[MAXSTR], power[MAXSTR], times[MAXSTR];
mas01cr@498 205 lseek(adb->fd, adb->header->dataOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 206 read_or_goto_error(adb->fd, features, MAXSTR);
mas01cr@498 207 reference.features = features;
mas01cr@498 208 if(adb->header->flags & O2_FLAG_POWER) {
mas01cr@498 209 lseek(adb->fd, adb->header->powerTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 210 read_or_goto_error(adb->fd, power, MAXSTR);
mas01cr@498 211 reference.power = power;
mas01cr@498 212 }
mas01cr@498 213 if(adb->header->flags & O2_FLAG_TIMES) {
mas01cr@498 214 lseek(adb->fd, adb->header->timesTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 215 read_or_goto_error(adb->fd, times, MAXSTR);
mas01cr@498 216 reference.times = times;
mas01cr@498 217 }
mas01cr@498 218 return audiodb_insert_create_datum(&reference, d);
mas01cr@498 219 } else {
mas01cr@498 220 /* initialize from sources of data that we already have */
mas01cr@498 221 d->nvectors = (*adb->track_lengths)[track_id];
mas01cr@498 222 d->dim = adb->header->dim;
mas01cr@498 223 d->key = (*adb->keys)[track_id].c_str();
mas01cr@498 224 /* read out stuff from the database tables */
mas01cr@498 225 d->data = (double *) malloc(d->nvectors * d->dim * sizeof(double));
mas01cr@498 226 lseek(adb->fd, adb->header->dataOffset + track_offset, SEEK_SET);
mas01cr@498 227 read_or_goto_error(adb->fd, d->data, d->nvectors * d->dim * sizeof(double));
mas01cr@498 228 if(adb->header->flags & O2_FLAG_POWER) {
mas01cr@498 229 d->power = (double *) malloc(d->nvectors * sizeof(double));
mas01cr@498 230 lseek(adb->fd, adb->header->powerTableOffset + track_offset / d->dim, SEEK_SET);
mas01cr@498 231 read_or_goto_error(adb->fd, d->power, d->nvectors * sizeof(double));
mas01cr@498 232 }
mas01cr@498 233 if(adb->header->flags & O2_FLAG_TIMES) {
mas01cr@498 234 d->times = (double *) malloc(2 * d->nvectors * sizeof(double));
mas01cr@498 235 lseek(adb->fd, adb->header->timesTableOffset + track_offset / d->dim, SEEK_SET);
mas01cr@498 236 read_or_goto_error(adb->fd, d->times, 2 * d->nvectors * sizeof(double));
mas01cr@498 237 }
mas01cr@498 238 return 0;
mas01cr@498 239 }
mas01cr@498 240 error:
mas01cr@498 241 audiodb_free_datum(d);
mas01cr@498 242 return 1;
mas01cr@498 243 }
mas01mc@292 244
mas01cr@498 245 int audiodb_datum_qpointers(adb_datum_t *d, uint32_t sequence_length, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
mas01cr@498 246 uint32_t nvectors = d->nvectors;
mas01cr@498 247
mas01cr@498 248 qpointers->nvectors = nvectors;
mas01cr@498 249
mas01cr@498 250 size_t vector_size = nvectors * sizeof(double) * d->dim;
mas01cr@498 251 *vector_data = new double[vector_size];
mas01cr@498 252 memcpy(*vector_data, d->data, vector_size);
mas01cr@498 253
mas01cr@498 254 qpointers->l2norm_data = new double[vector_size / d->dim];
mas01cr@498 255 audiodb_l2norm_buffer(*vector_data, d->dim, nvectors, qpointers->l2norm_data);
mas01cr@498 256 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@498 257 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@498 258
mas01cr@498 259 if(d->power) {
mas01cr@498 260 qpointers->power_data = new double[vector_size / d->dim];
mas01cr@498 261 memcpy(qpointers->power_data, d->power, vector_size / d->dim);
mas01cr@498 262 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
mas01cr@498 263 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
mas01cr@239 264 }
mas01cr@239 265
mas01cr@498 266 if(d->times) {
mas01cr@498 267 qpointers->mean_duration = new double[1];
mas01cr@498 268 *qpointers->mean_duration = 0;
mas01cr@498 269 for(unsigned int k = 0; k < nvectors; k++) {
mas01cr@498 270 *qpointers->mean_duration += d->times[2*k+1] - d->times[2*k];
mas01cr@239 271 }
mas01cr@498 272 *qpointers->mean_duration /= nvectors;
mas01cr@239 273 }
mas01cr@239 274
mas01cr@498 275 *vector = *vector_data;
mas01cr@498 276 qpointers->l2norm = qpointers->l2norm_data;
mas01cr@498 277 qpointers->power = qpointers->power_data;
mas01cr@498 278 return 0;
mas01cr@498 279 }
mas01cr@498 280
mas01cr@498 281 int audiodb_query_spec_qpointers(adb_t *adb, const adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
mas01cr@498 282 adb_datum_t *datum;
mas01cr@498 283 adb_datum_t d = {0};
mas01cr@498 284 uint32_t sequence_length;
mas01cr@498 285 uint32_t sequence_start;
mas01cr@498 286
mas01cr@498 287 datum = spec->qid.datum;
mas01cr@498 288 sequence_length = spec->qid.sequence_length;
mas01cr@498 289 sequence_start = spec->qid.sequence_start;
mas01cr@498 290
mas01cr@498 291 if(datum->data) {
mas01cr@498 292 if(datum->dim != adb->header->dim) {
mas01cr@498 293 return 1;
mas01cr@239 294 }
mas01cr@498 295 /* initialize d, and mark that nothing needs freeing later. */
mas01cr@498 296 d = *datum;
mas01cr@498 297 datum = &d;
mas01cr@498 298 } else if (datum->key) {
mas01cr@498 299 uint32_t track_id;
mas01cr@498 300 if((track_id = audiodb_key_index(adb, datum->key)) == (uint32_t) -1) {
mas01cr@498 301 return 1;
mas01cr@498 302 }
mas01cr@498 303 audiodb_track_id_datum(adb, track_id, &d);
mas01cr@498 304 } else {
mas01cr@498 305 return 1;
mas01cr@239 306 }
mas01cr@239 307
mas01cr@498 308 /* FIXME: check the overflow logic here */
mas01cr@498 309 if(sequence_start + sequence_length > d.nvectors) {
mas01cr@498 310 if(datum != &d) {
mas01cr@498 311 audiodb_free_datum(&d);
mas01cr@498 312 }
mas01cr@498 313 return 1;
mas01cr@498 314 }
mas01cr@239 315
mas01cr@498 316 audiodb_datum_qpointers(&d, sequence_length, vector_data, vector, qpointers);
mas01cr@498 317
mas01cr@498 318 /* Finally, if applicable, set up the moving qpointers. */
mas01cr@498 319 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
mas01cr@498 320 /* the qpointers are already at the start, and so correct. */
mas01cr@498 321 } else {
mas01cr@498 322 /* adjust the qpointers to point to the correct place in the sequence */
mas01cr@498 323 *vector = *vector_data + spec->qid.sequence_start * d.dim;
mas01cr@498 324 qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start;
mas01cr@498 325 if(d.power) {
mas01cr@498 326 qpointers->power = qpointers->power_data + spec->qid.sequence_start;
mas01cr@239 327 }
mas01cr@498 328 qpointers->nvectors = sequence_length;
mas01cr@239 329 }
mas01cr@498 330
mas01cr@498 331 /* Clean up: free any bits of datum that we have ourselves
mas01cr@498 332 * allocated. */
mas01cr@498 333 if(datum != &d) {
mas01cr@498 334 audiodb_free_datum(&d);
mas01cr@498 335 }
mas01cr@498 336
mas01cr@498 337 return 0;
mas01cr@239 338 }
mas01cr@239 339
mas01cr@498 340 static int audiodb_set_up_dbpointers(adb_t *adb, const adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) {
mas01cr@498 341 uint32_t nvectors = adb->header->length / (adb->header->dim * sizeof(double));
mas01cr@498 342 uint32_t sequence_length = spec->qid.sequence_length;
mas01mc@292 343
mas01cr@498 344 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@498 345 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO;
mas01cr@498 346 double *times_table = NULL;
mas01cr@498 347
mas01cr@498 348
mas01cr@498 349 dbpointers->nvectors = nvectors;
mas01cr@498 350 dbpointers->l2norm_data = new double[nvectors];
mas01cr@498 351
mas01cr@498 352 double *snpp = dbpointers->l2norm_data, *sppp = 0;
mas01cr@498 353 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET);
mas01cr@498 354 read_or_goto_error(adb->fd, dbpointers->l2norm_data, nvectors * sizeof(double));
mas01cr@498 355
mas01cr@498 356 if (using_power) {
mas01cr@498 357 if (!(adb->header->flags & O2_FLAG_POWER)) {
mas01cr@498 358 goto error;
mas01cr@498 359 }
mas01cr@498 360 dbpointers->power_data = new double[nvectors];
mas01cr@498 361 sppp = dbpointers->power_data;
mas01cr@498 362 lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET);
mas01cr@498 363 read_or_goto_error(adb->fd, dbpointers->power_data, nvectors * sizeof(double));
mas01mc@292 364 }
mas01mc@292 365
mas01cr@498 366 for(unsigned int i = 0; i < adb->header->numFiles; i++){
mas01cr@498 367 size_t track_length = (*adb->track_lengths)[i];
mas01cr@498 368 if(track_length >= sequence_length) {
mas01cr@498 369 audiodb_sequence_sum(snpp, track_length, sequence_length);
mas01cr@498 370 audiodb_sequence_sqrt(snpp, track_length, sequence_length);
mas01cr@498 371 if (using_power) {
mas01cr@498 372 audiodb_sequence_sum(sppp, track_length, sequence_length);
mas01cr@498 373 audiodb_sequence_average(sppp, track_length, sequence_length);
mas01cr@498 374 }
mas01mc@324 375 }
mas01cr@498 376 snpp += track_length;
mas01cr@498 377 if (using_power) {
mas01cr@498 378 sppp += track_length;
mas01mc@324 379 }
mas01mc@292 380 }
mas01mc@292 381
mas01cr@498 382 if (using_times) {
mas01cr@498 383 if(!(adb->header->flags & O2_FLAG_TIMES)) {
mas01cr@498 384 goto error;
mas01cr@498 385 }
mas01mc@292 386
mas01cr@498 387 dbpointers->mean_duration = new double[adb->header->numFiles];
mas01cr@498 388
mas01cr@498 389 times_table = (double *) malloc(2 * nvectors * sizeof(double));
mas01cr@498 390 if(!times_table) {
mas01cr@498 391 goto error;
mas01cr@498 392 }
mas01cr@498 393 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET);
mas01cr@498 394 read_or_goto_error(adb->fd, times_table, 2 * nvectors * sizeof(double));
mas01cr@498 395 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@498 396 size_t track_length = (*adb->track_lengths)[k];
mas01cr@498 397 unsigned int j;
mas01cr@498 398 dbpointers->mean_duration[k] = 0.0;
mas01cr@498 399 for(j = 0; j < track_length; j++) {
mas01cr@498 400 dbpointers->mean_duration[k] += times_table[2*j+1] - times_table[2*j];
mas01mc@292 401 }
mas01cr@498 402 dbpointers->mean_duration[k] /= j;
mas01mc@292 403 }
mas01cr@498 404
mas01cr@498 405 free(times_table);
mas01cr@498 406 times_table = NULL;
mas01mc@292 407 }
mas01cr@498 408
mas01cr@498 409 dbpointers->l2norm = dbpointers->l2norm_data;
mas01cr@498 410 dbpointers->power = dbpointers->power_data;
mas01cr@498 411 return 0;
mas01cr@498 412
mas01cr@498 413 error:
mas01cr@498 414 if(dbpointers->l2norm_data) {
mas01cr@498 415 delete [] dbpointers->l2norm_data;
mas01cr@498 416 }
mas01cr@498 417 if(dbpointers->power_data) {
mas01cr@498 418 delete [] dbpointers->power_data;
mas01cr@498 419 }
mas01cr@498 420 if(dbpointers->mean_duration) {
mas01cr@498 421 delete [] dbpointers->mean_duration;
mas01cr@498 422 }
mas01cr@498 423 if(times_table) {
mas01cr@498 424 free(times_table);
mas01cr@498 425 }
mas01cr@498 426 return 1;
mas01cr@498 427
mas01mc@292 428 }
mas01mc@292 429
mas01cr@498 430 int audiodb_query_queue_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate, double *query, adb_qpointers_internal_t *qpointers) {
mas01cr@498 431 adb_qpointers_internal_t dbpointers = {0};
mas01mc@292 432
mas01cr@498 433 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@498 434 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@239 435
mas01cr@498 436 if(qstate->exact_evaluation_queue->size() == 0) {
mas01cr@498 437 return 0;
mas01cr@239 438 }
mas01cr@239 439
mas01cr@498 440 /* We are guaranteed that the order of points is sorted by:
mas01cr@498 441 * {trackID, spos, qpos} so we can be relatively efficient in
mas01cr@498 442 * initialization of track data. We assume that points usually
mas01cr@498 443 * don't overlap, so we will use exhaustive dot product evaluation
mas01cr@498 444 * (instead of memoization of partial sums, as in query_loop()).
mas01cr@498 445 */
mas01cr@498 446 double dist;
mas01cr@498 447 double *dbdata = 0, *dbdata_pointer;
mas01cr@498 448 Uns32T currentTrack = 0x80000000; // KLUDGE: Initialize with a value outside of track index range
mas01cr@498 449 Uns32T npairs = qstate->exact_evaluation_queue->size();
mas01cr@498 450 while(npairs--) {
mas01cr@498 451 PointPair pp = qstate->exact_evaluation_queue->top();
mas01cr@498 452 if(currentTrack != pp.trackID) {
mas01cr@498 453 SAFE_DELETE_ARRAY(dbdata);
mas01cr@498 454 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
mas01cr@498 455 SAFE_DELETE_ARRAY(dbpointers.power_data);
mas01cr@498 456 SAFE_DELETE_ARRAY(dbpointers.mean_duration);
mas01cr@498 457 currentTrack = pp.trackID;
mas01cr@498 458 adb_datum_t d = {0};
mas01cr@498 459 if(audiodb_track_id_datum(adb, pp.trackID, &d)) {
mas01cr@498 460 delete qstate->exact_evaluation_queue;
mas01cr@498 461 return 1;
mas01cr@498 462 }
mas01cr@498 463 if(audiodb_datum_qpointers(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers)) {
mas01cr@498 464 delete qstate->exact_evaluation_queue;
mas01cr@498 465 audiodb_free_datum(&d);
mas01cr@498 466 return 1;
mas01cr@498 467 }
mas01cr@498 468 audiodb_free_datum(&d);
mas01cr@498 469 }
mas01cr@498 470 Uns32T qPos = (spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) ? pp.qpos : 0;
mas01cr@498 471 Uns32T sPos = pp.spos; // index into l2norm table
mas01cr@498 472 // Test power thresholds before computing distance
mas01cr@498 473 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) &&
mas01cr@498 474 ( qPos<qpointers->nvectors-sequence_length+1 && sPos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){
mas01cr@498 475 // Compute distance
mas01cr@498 476 dist = audiodb_dot_product(query + qPos*adb->header->dim, dbdata + sPos*adb->header->dim, adb->header->dim*sequence_length);
mas01cr@498 477 double qn = qpointers->l2norm[qPos];
mas01cr@498 478 double sn = dbpointers.l2norm[sPos];
mas01cr@498 479 switch(spec->params.distance) {
mas01cr@498 480 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@498 481 dist = 2 - (2/(qn*sn))*dist;
mas01cr@498 482 break;
mas01cr@498 483 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@498 484 dist = qn*qn + sn*sn - 2*dist;
mas01cr@498 485 break;
mas01cr@498 486 }
mas01cr@498 487 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@498 488 dist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@498 489 adb_result_t r;
mas01cr@498 490 r.key = (*adb->keys)[pp.trackID].c_str();
mas01cr@498 491 r.dist = dist;
mas01cr@498 492 r.qpos = pp.qpos;
mas01cr@498 493 r.ipos = pp.spos;
mas01cr@498 494 qstate->accumulator->add_point(&r);
mas01cr@239 495 }
mas01cr@239 496 }
mas01cr@498 497 qstate->exact_evaluation_queue->pop();
mas01mc@292 498 }
mas01mc@474 499
mas01mc@474 500
mas01mc@315 501 // Cleanup
mas01cr@498 502 SAFE_DELETE_ARRAY(dbdata);
mas01cr@498 503 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
mas01cr@498 504 SAFE_DELETE_ARRAY(dbpointers.power_data);
mas01cr@498 505 SAFE_DELETE_ARRAY(dbpointers.mean_duration);
mas01cr@498 506 delete qstate->exact_evaluation_queue;
mas01cr@498 507 return 0;
mas01mc@292 508 }
mas01mc@292 509
mas01cr@498 510 int audiodb_query_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) {
mas01cr@498 511
mas01cr@498 512 double *query, *query_data;
mas01cr@498 513 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0};
mas01mc@292 514
mas01cr@498 515 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@239 516
mas01cr@498 517 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
mas01cr@498 518 /* FIXME: actually it would be nice to support this mode of
mas01cr@498 519 * operation, but for now... */
mas01cr@498 520 return 1;
mas01cr@498 521 }
mas01mc@324 522
mas01cr@498 523 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
mas01cr@498 524 return 1;
mas01cr@498 525 }
mas01cr@239 526
mas01cr@498 527 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) {
mas01cr@498 528 return 1;
mas01cr@498 529 }
mas01cr@239 530
mas01cr@498 531 unsigned j,k,track,trackOffset=0, HOP_SIZE = spec->refine.hopsize;
mas01cr@498 532 unsigned wL = spec->qid.sequence_length;
mas01cr@239 533 double **D = 0; // Differences query and target
mas01cr@239 534 double **DD = 0; // Matched filter distance
mas01cr@239 535
mas01cr@498 536 D = new double*[qpointers.nvectors]; // pre-allocate
mas01cr@498 537 DD = new double*[qpointers.nvectors];
mas01cr@239 538
mas01cr@239 539 off_t trackIndexOffset;
mas01cr@239 540
mas01cr@239 541 // Track loop
mas01cr@239 542 size_t data_buffer_size = 0;
mas01cr@239 543 double *data_buffer = 0;
mas01cr@498 544 lseek(adb->fd, adb->header->dataOffset, SEEK_SET);
mas01cr@239 545
mas01cr@498 546 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
mas01cr@498 547 for(track = 0; track < adb->header->numFiles; track++) {
mas01cr@498 548 unsigned t = track;
mas01cr@498 549
mas01cr@498 550 while (qstate->allowed_keys->find((*adb->keys)[track]) == keys_end) {
mas01cr@498 551 track++;
mas01cr@498 552 if(track == adb->header->numFiles) {
mas01cr@498 553 goto loop_finish;
mas01cr@239 554 }
mas01cr@239 555 }
mas01cr@498 556 trackOffset = (*adb->track_offsets)[track];
mas01cr@498 557 if(track != t) {
mas01cr@498 558 lseek(adb->fd, adb->header->dataOffset + trackOffset, SEEK_SET);
mas01cr@498 559 }
mas01cr@498 560 trackIndexOffset = trackOffset / (adb->header->dim * sizeof(double)); // dbpointers.nvectors offset
mas01cr@239 561
mas01cr@498 562 if(audiodb_read_data(adb, adb->fd, track, &data_buffer, &data_buffer_size)) {
mas01cr@498 563 return 1;
mas01mc@292 564 }
mas01cr@498 565 if(wL <= (*adb->track_lengths)[track]) { // test for short sequences
mas01cr@498 566
mas01cr@498 567 audiodb_initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD);
mas01mc@292 568
mas01cr@498 569 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) ||
mas01cr@498 570 fabs(dbpointers.mean_duration[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) {
mas01cr@239 571
mas01cr@239 572 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@498 573 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) {
mas01cr@498 574 for(k = 0; k <= (*adb->track_lengths)[track] - wL; k += HOP_SIZE) {
mas01cr@498 575 double thisDist = 0;
mas01cr@498 576 double qn = qpointers.l2norm[j];
mas01cr@498 577 double sn = dbpointers.l2norm[trackIndexOffset + k];
mas01cr@498 578 switch(spec->params.distance) {
mas01cr@498 579 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@498 580 thisDist = 2-(2/(qn*sn))*DD[j][k];
mas01cr@498 581 break;
mas01cr@498 582 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@498 583 thisDist = qn*qn + sn*sn - 2*DD[j][k];
mas01cr@498 584 break;
mas01cr@498 585 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@498 586 thisDist = DD[j][k];
mas01cr@498 587 break;
mas01cr@498 588 }
mas01cr@239 589 // Power test
mas01cr@498 590 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], dbpointers.power[trackIndexOffset + k])) {
mas01cr@239 591 // radius test
mas01cr@498 592 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@498 593 thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@498 594 adb_result_t r;
mas01cr@498 595 r.key = (*adb->keys)[track].c_str();
mas01cr@498 596 r.dist = thisDist;
mas01cr@498 597 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
mas01cr@498 598 r.qpos = j;
mas01cr@498 599 } else {
mas01cr@498 600 r.qpos = spec->qid.sequence_start;
mas01cr@498 601 }
mas01cr@498 602 r.ipos = k;
mas01cr@498 603 qstate->accumulator->add_point(&r);
mas01cr@239 604 }
mas01cr@239 605 }
mas01cr@239 606 }
mas01cr@239 607 }
mas01cr@239 608 } // Duration match
mas01cr@498 609 audiodb_delete_arrays(track, qpointers.nvectors, D, DD);
mas01cr@239 610 }
mas01cr@239 611 }
mas01cr@239 612
mas01cr@498 613 loop_finish:
mas01cr@498 614
mas01cr@239 615 free(data_buffer);
mas01cr@239 616
mas01cr@239 617 // Clean up
mas01cr@239 618 if(query_data)
mas01cr@239 619 delete[] query_data;
mas01cr@498 620 if(qpointers.l2norm_data)
mas01cr@498 621 delete[] qpointers.l2norm_data;
mas01cr@498 622 if(qpointers.power_data)
mas01cr@498 623 delete[] qpointers.power_data;
mas01cr@498 624 if(qpointers.mean_duration)
mas01cr@498 625 delete[] qpointers.mean_duration;
mas01cr@498 626 if(dbpointers.power_data)
mas01cr@498 627 delete[] dbpointers.power_data;
mas01cr@498 628 if(dbpointers.l2norm_data)
mas01cr@498 629 delete[] dbpointers.l2norm_data;
mas01cr@239 630 if(D)
mas01cr@239 631 delete[] D;
mas01cr@239 632 if(DD)
mas01cr@239 633 delete[] DD;
mas01cr@498 634 if(dbpointers.mean_duration)
mas01cr@498 635 delete[] dbpointers.mean_duration;
mas01cr@498 636
mas01cr@498 637 return 0;
mas01cr@239 638 }