annotate query.cpp @ 539:06ed85832c3b multiprobeLSH

Optimized the query_loop_points inner loop for memcpy and I/O efficiency. Uses sparse seeks and reads to perform scattered reads across data set. Current version does not cache fid between open calls to the same trackID.
author mas01mc
date Sat, 07 Feb 2009 01:20:05 +0000
parents ddf763553175
children 52d82badc544
rev   line source
mas01cr@509 1 extern "C" {
mas01cr@509 2 #include "audioDB_API.h"
mas01cr@509 3 }
mas01cr@498 4 #include "audioDB-internals.h"
mas01cr@498 5 #include "accumulators.h"
mas01cr@239 6
mas01cr@498 7 bool audiodb_powers_acceptable(const adb_query_refine_t *r, double p1, double p2) {
mas01cr@498 8 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) {
mas01cr@498 9 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) {
mas01cr@239 10 return false;
mas01cr@239 11 }
mas01cr@239 12 }
mas01cr@498 13 if (r->flags & ADB_REFINE_RELATIVE_THRESHOLD) {
mas01cr@498 14 if (fabs(p1-p2) > fabs(r->relative_threshold)) {
mas01cr@239 15 return false;
mas01cr@239 16 }
mas01cr@239 17 }
mas01cr@239 18 return true;
mas01cr@239 19 }
mas01cr@239 20
mas01cr@498 21 adb_query_results_t *audiodb_query_spec(adb_t *adb, const adb_query_spec_t *qspec) {
mas01cr@498 22 adb_qstate_internal_t qstate = {0};
mas01cr@498 23 qstate.allowed_keys = new std::set<std::string>;
mas01cr@498 24 adb_query_results_t *results;
mas01cr@498 25 if(qspec->refine.flags & ADB_REFINE_INCLUDE_KEYLIST) {
mas01cr@498 26 for(unsigned int k = 0; k < qspec->refine.include.nkeys; k++) {
mas01cr@498 27 qstate.allowed_keys->insert(qspec->refine.include.keys[k]);
mas01cr@498 28 }
mas01cr@498 29 } else {
mas01cr@498 30 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@498 31 qstate.allowed_keys->insert((*adb->keys)[k]);
mas01cr@498 32 }
mas01cr@498 33 }
mas01cr@498 34 if(qspec->refine.flags & ADB_REFINE_EXCLUDE_KEYLIST) {
mas01cr@498 35 for(unsigned int k = 0; k < qspec->refine.exclude.nkeys; k++) {
mas01cr@498 36 qstate.allowed_keys->erase(qspec->refine.exclude.keys[k]);
mas01cr@498 37 }
mas01cr@498 38 }
mas01mc@292 39
mas01cr@498 40 switch(qspec->params.distance) {
mas01cr@498 41 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@498 42 switch(qspec->params.accumulation) {
mas01cr@498 43 case ADB_ACCUMULATION_DB:
mas01cr@498 44 qstate.accumulator = new DBAccumulator<adb_result_dist_gt>(qspec->params.npoints);
mas01cr@498 45 break;
mas01cr@498 46 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@498 47 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_gt>(qspec->params.npoints, qspec->params.ntracks);
mas01cr@498 48 break;
mas01cr@498 49 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@498 50 qstate.accumulator = new NearestAccumulator<adb_result_dist_gt>();
mas01cr@498 51 break;
mas01cr@498 52 default:
mas01cr@498 53 goto error;
mas01cr@239 54 }
mas01cr@239 55 break;
mas01cr@498 56 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@498 57 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@498 58 switch(qspec->params.accumulation) {
mas01cr@498 59 case ADB_ACCUMULATION_DB:
mas01cr@498 60 qstate.accumulator = new DBAccumulator<adb_result_dist_lt>(qspec->params.npoints);
mas01cr@498 61 break;
mas01cr@498 62 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@498 63 qstate.accumulator = new PerTrackAccumulator<adb_result_dist_lt>(qspec->params.npoints, qspec->params.ntracks);
mas01cr@498 64 break;
mas01cr@498 65 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@498 66 qstate.accumulator = new NearestAccumulator<adb_result_dist_lt>();
mas01cr@498 67 break;
mas01cr@498 68 default:
mas01cr@498 69 goto error;
mas01mc@263 70 }
mas01mc@263 71 break;
mas01cr@239 72 default:
mas01cr@498 73 goto error;
mas01mc@329 74 }
mas01cr@498 75
mas01cr@498 76 if((qspec->refine.flags & ADB_REFINE_RADIUS) && audiodb_index_exists(adb->path, qspec->refine.radius, qspec->qid.sequence_length)) {
mas01cr@498 77 if(audiodb_index_query_loop(adb, qspec, &qstate) < 0) {
mas01cr@498 78 goto error;
mas01cr@498 79 }
mas01cr@498 80 } else {
mas01cr@498 81 if(audiodb_query_loop(adb, qspec, &qstate)) {
mas01cr@498 82 goto error;
mas01cr@498 83 }
mas01mc@329 84 }
mas01mc@292 85
mas01cr@498 86 results = qstate.accumulator->get_points();
mas01cr@498 87
mas01cr@498 88 delete qstate.accumulator;
mas01cr@498 89 delete qstate.allowed_keys;
mas01cr@498 90
mas01cr@498 91 return results;
mas01cr@498 92
mas01cr@498 93 error:
mas01cr@498 94 if(qstate.accumulator)
mas01cr@498 95 delete qstate.accumulator;
mas01cr@498 96 if(qstate.allowed_keys)
mas01cr@498 97 delete qstate.allowed_keys;
mas01cr@498 98 return NULL;
mas01cr@239 99 }
mas01cr@239 100
mas01cr@498 101 int audiodb_query_free_results(adb_t *adb, const adb_query_spec_t *spec, adb_query_results_t *rs) {
mas01cr@498 102 free(rs->results);
mas01cr@498 103 free(rs);
mas01cr@498 104 return 0;
mas01cr@239 105 }
mas01cr@239 106
mas01cr@498 107 static void audiodb_initialize_arrays(adb_t *adb, const adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
mas01cr@239 108 unsigned int j, k, l, w;
mas01cr@239 109 double *dp, *qp, *sp;
mas01cr@239 110
mas01cr@498 111 const unsigned HOP_SIZE = spec->refine.hopsize;
mas01cr@498 112 const unsigned wL = spec->qid.sequence_length;
mas01cr@239 113
mas01cr@239 114 for(j = 0; j < numVectors; j++) {
mas01cr@239 115 // Sum products matrix
mas01cr@498 116 D[j] = new double[(*adb->track_lengths)[track]];
mas01cr@239 117 assert(D[j]);
mas01cr@239 118 // Matched filter matrix
mas01cr@498 119 DD[j]=new double[(*adb->track_lengths)[track]];
mas01cr@239 120 assert(DD[j]);
mas01cr@239 121 }
mas01cr@239 122
mas01cr@239 123 // Dot product
mas01cr@239 124 for(j = 0; j < numVectors; j++)
mas01cr@498 125 for(k = 0; k < (*adb->track_lengths)[track]; k++){
mas01cr@498 126 qp = query + j * adb->header->dim;
mas01cr@498 127 sp = data_buffer + k * adb->header->dim;
mas01cr@239 128 DD[j][k] = 0.0; // Initialize matched filter array
mas01cr@239 129 dp = &D[j][k]; // point to correlation cell j,k
mas01cr@239 130 *dp = 0.0; // initialize correlation cell
mas01cr@498 131 l = adb->header->dim; // size of vectors
mas01cr@239 132 while(l--)
mas01cr@239 133 *dp += *qp++ * *sp++;
mas01cr@239 134 }
mas01cr@239 135
mas01cr@239 136 // Matched Filter
mas01cr@239 137 // HOP SIZE == 1
mas01cr@239 138 double* spd;
mas01cr@239 139 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop
mas01cr@239 140 for(w = 0; w < wL; w++) {
mas01cr@239 141 for(j = 0; j < numVectors - w; j++) {
mas01cr@239 142 sp = DD[j];
mas01cr@239 143 spd = D[j+w] + w;
mas01cr@498 144 k = (*adb->track_lengths)[track] - w;
mas01mc@292 145 while(k--)
mas01mc@292 146 *sp++ += *spd++;
mas01cr@239 147 }
mas01cr@239 148 }
mas01cr@239 149 } else { // HOP_SIZE != 1
mas01cr@239 150 for(w = 0; w < wL; w++) {
mas01cr@239 151 for(j = 0; j < numVectors - w; j += HOP_SIZE) {
mas01cr@239 152 sp = DD[j];
mas01cr@239 153 spd = D[j+w]+w;
mas01cr@498 154 for(k = 0; k < (*adb->track_lengths)[track] - w; k += HOP_SIZE) {
mas01cr@239 155 *sp += *spd;
mas01cr@239 156 sp += HOP_SIZE;
mas01cr@239 157 spd += HOP_SIZE;
mas01cr@239 158 }
mas01cr@239 159 }
mas01cr@239 160 }
mas01cr@239 161 }
mas01cr@239 162 }
mas01cr@239 163
mas01cr@498 164 static void audiodb_delete_arrays(int track, unsigned int numVectors, double **D, double **DD) {
mas01cr@239 165 if(D != NULL) {
mas01cr@239 166 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 167 delete[] D[j];
mas01cr@239 168 }
mas01cr@239 169 }
mas01cr@239 170 if(DD != NULL) {
mas01cr@239 171 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 172 delete[] DD[j];
mas01cr@239 173 }
mas01cr@239 174 }
mas01cr@239 175 }
mas01cr@239 176
mas01cr@498 177 int audiodb_read_data(adb_t *adb, int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) {
mas01cr@498 178 uint32_t track_length = (*adb->track_lengths)[track];
mas01cr@498 179 size_t track_size = track_length * sizeof(double) * adb->header->dim;
mas01cr@498 180 if (track_size > *data_buffer_size_p) {
mas01cr@239 181 if(*data_buffer_p) {
mas01cr@239 182 free(*data_buffer_p);
mas01cr@239 183 }
mas01cr@239 184 {
mas01cr@498 185 *data_buffer_size_p = track_size;
mas01cr@498 186 void *tmp = malloc(track_size);
mas01cr@239 187 if (tmp == NULL) {
mas01cr@498 188 goto error;
mas01cr@239 189 }
mas01cr@239 190 *data_buffer_p = (double *) tmp;
mas01cr@239 191 }
mas01cr@239 192 }
mas01cr@239 193
mas01cr@498 194 read_or_goto_error(trkfid, *data_buffer_p, track_size);
mas01cr@498 195 return 0;
mas01cr@498 196
mas01cr@498 197 error:
mas01cr@498 198 return 1;
mas01cr@239 199 }
mas01cr@239 200
mas01mc@539 201 int audiodb_track_id_datum(adb_t *adb, uint32_t track_id, adb_datum_t *d, off_t vector_offset=0, size_t num_vectors=0) {
mas01cr@498 202 off_t track_offset = (*adb->track_offsets)[track_id];
mas01cr@509 203 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 204 /* create a reference/insert, then use adb_insert_create_datum() */
mas01cr@498 205 adb_reference_t reference = {0};
mas01cr@509 206 char features[ADB_MAXSTR], power[ADB_MAXSTR], times[ADB_MAXSTR];
mas01cr@509 207 lseek(adb->fd, adb->header->dataOffset + track_id * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@509 208 read_or_goto_error(adb->fd, features, ADB_MAXSTR);
mas01cr@498 209 reference.features = features;
mas01cr@509 210 if(adb->header->flags & ADB_HEADER_FLAG_POWER) {
mas01cr@509 211 lseek(adb->fd, adb->header->powerTableOffset + track_id * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@509 212 read_or_goto_error(adb->fd, power, ADB_MAXSTR);
mas01cr@498 213 reference.power = power;
mas01cr@498 214 }
mas01cr@509 215 if(adb->header->flags & ADB_HEADER_FLAG_TIMES) {
mas01cr@509 216 lseek(adb->fd, adb->header->timesTableOffset + track_id * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@509 217 read_or_goto_error(adb->fd, times, ADB_MAXSTR);
mas01cr@498 218 reference.times = times;
mas01cr@498 219 }
mas01mc@539 220 return audiodb_insert_create_datum(&reference, d, vector_offset*adb->header->dim*sizeof(double), num_vectors*adb->header->dim*sizeof(double));
mas01cr@498 221 } else {
mas01cr@498 222 /* initialize from sources of data that we already have */
mas01mc@539 223 if(num_vectors)
mas01mc@539 224 d->nvectors = num_vectors;
mas01mc@539 225 else
mas01mc@539 226 d->nvectors = (*adb->track_lengths)[track_id];
mas01cr@498 227 d->dim = adb->header->dim;
mas01cr@498 228 d->key = (*adb->keys)[track_id].c_str();
mas01cr@498 229 /* read out stuff from the database tables */
mas01cr@498 230 d->data = (double *) malloc(d->nvectors * d->dim * sizeof(double));
mas01mc@539 231 lseek(adb->fd, adb->header->dataOffset + track_offset + vector_offset*d->dim*sizeof(double), SEEK_SET);
mas01cr@498 232 read_or_goto_error(adb->fd, d->data, d->nvectors * d->dim * sizeof(double));
mas01cr@509 233 if(adb->header->flags & ADB_HEADER_FLAG_POWER) {
mas01cr@498 234 d->power = (double *) malloc(d->nvectors * sizeof(double));
mas01mc@539 235 lseek(adb->fd, adb->header->powerTableOffset + track_offset / d->dim + vector_offset*sizeof(double), SEEK_SET);
mas01cr@498 236 read_or_goto_error(adb->fd, d->power, d->nvectors * sizeof(double));
mas01cr@498 237 }
mas01cr@509 238 if(adb->header->flags & ADB_HEADER_FLAG_TIMES) {
mas01cr@498 239 d->times = (double *) malloc(2 * d->nvectors * sizeof(double));
mas01mc@539 240 lseek(adb->fd, adb->header->timesTableOffset + track_offset / d->dim + 2 * vector_offset*sizeof(double), SEEK_SET);
mas01cr@498 241 read_or_goto_error(adb->fd, d->times, 2 * d->nvectors * sizeof(double));
mas01cr@498 242 }
mas01cr@498 243 return 0;
mas01cr@498 244 }
mas01cr@498 245 error:
mas01cr@498 246 audiodb_free_datum(d);
mas01cr@498 247 return 1;
mas01cr@498 248 }
mas01mc@292 249
mas01cr@498 250 int audiodb_datum_qpointers(adb_datum_t *d, uint32_t sequence_length, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
mas01cr@498 251 uint32_t nvectors = d->nvectors;
mas01cr@498 252
mas01cr@498 253 qpointers->nvectors = nvectors;
mas01cr@498 254
mas01cr@498 255 size_t vector_size = nvectors * sizeof(double) * d->dim;
mas01cr@498 256 *vector_data = new double[vector_size];
mas01cr@498 257 memcpy(*vector_data, d->data, vector_size);
mas01cr@498 258
mas01cr@498 259 qpointers->l2norm_data = new double[vector_size / d->dim];
mas01cr@498 260 audiodb_l2norm_buffer(*vector_data, d->dim, nvectors, qpointers->l2norm_data);
mas01cr@498 261 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@498 262 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@498 263
mas01cr@498 264 if(d->power) {
mas01cr@498 265 qpointers->power_data = new double[vector_size / d->dim];
mas01cr@498 266 memcpy(qpointers->power_data, d->power, vector_size / d->dim);
mas01cr@498 267 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
mas01cr@498 268 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
mas01cr@239 269 }
mas01cr@239 270
mas01cr@498 271 if(d->times) {
mas01cr@498 272 qpointers->mean_duration = new double[1];
mas01cr@498 273 *qpointers->mean_duration = 0;
mas01cr@498 274 for(unsigned int k = 0; k < nvectors; k++) {
mas01cr@498 275 *qpointers->mean_duration += d->times[2*k+1] - d->times[2*k];
mas01cr@239 276 }
mas01cr@498 277 *qpointers->mean_duration /= nvectors;
mas01cr@239 278 }
mas01cr@239 279
mas01cr@498 280 *vector = *vector_data;
mas01cr@498 281 qpointers->l2norm = qpointers->l2norm_data;
mas01cr@498 282 qpointers->power = qpointers->power_data;
mas01cr@498 283 return 0;
mas01cr@498 284 }
mas01cr@498 285
mas01mc@528 286 int audiodb_datum_qpointers_partial(adb_datum_t *d, uint32_t sequence_length, double **vector_data,
mas01mc@528 287 double **vector, adb_qpointers_internal_t *qpointers,
mas01mc@528 288 adb_qstate_internal_t *qstate){
mas01mc@528 289 uint32_t nvectors = d->nvectors;
mas01mc@528 290 qpointers->nvectors = nvectors;
mas01mc@528 291
mas01mc@539 292 PointPair pp = (*qstate->exact_evaluation_queue).top();
mas01mc@529 293 #ifdef _LSH_DEBUG_
mas01mc@539 294 cout << "tid=" << pp.trackID << " qpos=" << pp.qpos << " spos=" << pp.spos << endl;
mas01mc@539 295 cout.flush();
mas01mc@529 296 #endif
mas01mc@539 297
mas01mc@539 298 if(d->power) {
mas01mc@539 299 //memcpy(qpointers->power_data, d->power, seq_len_dbl);
mas01mc@539 300 audiodb_sequence_sum(d->power, sequence_length, sequence_length);
mas01mc@539 301 audiodb_sequence_average(d->power, sequence_length, sequence_length);
mas01mc@528 302 }
mas01mc@539 303
mas01mc@528 304 if(d->times) {
mas01mc@528 305 qpointers->mean_duration = new double[1];
mas01mc@528 306 *qpointers->mean_duration = 0;
mas01mc@528 307 for(unsigned int k = 0; k < nvectors; k++) {
mas01mc@528 308 *qpointers->mean_duration += d->times[2*k+1] - d->times[2*k];
mas01mc@528 309 }
mas01mc@528 310 *qpointers->mean_duration /= nvectors;
mas01mc@528 311 }
mas01mc@539 312
mas01mc@531 313 *vector = d->data;
mas01mc@531 314 *vector_data = d->data;
mas01mc@531 315 qpointers->l2norm = 0 ;
mas01mc@539 316 qpointers->power = d->power;
mas01mc@528 317 return 0;
mas01mc@528 318 }
mas01mc@528 319
mas01cr@498 320 int audiodb_query_spec_qpointers(adb_t *adb, const adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
mas01cr@498 321 adb_datum_t *datum;
mas01cr@498 322 adb_datum_t d = {0};
mas01cr@498 323 uint32_t sequence_length;
mas01cr@498 324 uint32_t sequence_start;
mas01cr@498 325
mas01cr@498 326 datum = spec->qid.datum;
mas01cr@498 327 sequence_length = spec->qid.sequence_length;
mas01cr@498 328 sequence_start = spec->qid.sequence_start;
mas01cr@498 329
mas01cr@498 330 if(datum->data) {
mas01cr@498 331 if(datum->dim != adb->header->dim) {
mas01cr@498 332 return 1;
mas01cr@239 333 }
mas01cr@498 334 /* initialize d, and mark that nothing needs freeing later. */
mas01cr@498 335 d = *datum;
mas01cr@498 336 datum = &d;
mas01cr@498 337 } else if (datum->key) {
mas01cr@498 338 uint32_t track_id;
mas01cr@498 339 if((track_id = audiodb_key_index(adb, datum->key)) == (uint32_t) -1) {
mas01cr@498 340 return 1;
mas01cr@498 341 }
mas01cr@498 342 audiodb_track_id_datum(adb, track_id, &d);
mas01cr@498 343 } else {
mas01cr@498 344 return 1;
mas01cr@239 345 }
mas01cr@239 346
mas01cr@498 347 /* FIXME: check the overflow logic here */
mas01cr@498 348 if(sequence_start + sequence_length > d.nvectors) {
mas01cr@498 349 if(datum != &d) {
mas01cr@498 350 audiodb_free_datum(&d);
mas01cr@498 351 }
mas01cr@498 352 return 1;
mas01cr@498 353 }
mas01cr@239 354
mas01cr@498 355 audiodb_datum_qpointers(&d, sequence_length, vector_data, vector, qpointers);
mas01cr@498 356
mas01cr@498 357 /* Finally, if applicable, set up the moving qpointers. */
mas01cr@498 358 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
mas01cr@498 359 /* the qpointers are already at the start, and so correct. */
mas01cr@498 360 } else {
mas01cr@498 361 /* adjust the qpointers to point to the correct place in the sequence */
mas01cr@498 362 *vector = *vector_data + spec->qid.sequence_start * d.dim;
mas01cr@498 363 qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start;
mas01cr@498 364 if(d.power) {
mas01cr@498 365 qpointers->power = qpointers->power_data + spec->qid.sequence_start;
mas01cr@239 366 }
mas01cr@498 367 qpointers->nvectors = sequence_length;
mas01cr@239 368 }
mas01cr@498 369
mas01cr@498 370 /* Clean up: free any bits of datum that we have ourselves
mas01cr@498 371 * allocated. */
mas01cr@498 372 if(datum != &d) {
mas01cr@498 373 audiodb_free_datum(&d);
mas01cr@498 374 }
mas01cr@498 375
mas01cr@498 376 return 0;
mas01cr@239 377 }
mas01cr@239 378
mas01cr@498 379 static int audiodb_set_up_dbpointers(adb_t *adb, const adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) {
mas01cr@498 380 uint32_t nvectors = adb->header->length / (adb->header->dim * sizeof(double));
mas01cr@498 381 uint32_t sequence_length = spec->qid.sequence_length;
mas01mc@292 382
mas01cr@498 383 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@498 384 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO;
mas01cr@498 385 double *times_table = NULL;
mas01cr@498 386
mas01cr@498 387
mas01cr@498 388 dbpointers->nvectors = nvectors;
mas01cr@498 389 dbpointers->l2norm_data = new double[nvectors];
mas01cr@498 390
mas01cr@498 391 double *snpp = dbpointers->l2norm_data, *sppp = 0;
mas01cr@498 392 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET);
mas01cr@498 393 read_or_goto_error(adb->fd, dbpointers->l2norm_data, nvectors * sizeof(double));
mas01cr@498 394
mas01cr@498 395 if (using_power) {
mas01cr@509 396 if (!(adb->header->flags & ADB_HEADER_FLAG_POWER)) {
mas01cr@498 397 goto error;
mas01cr@498 398 }
mas01cr@498 399 dbpointers->power_data = new double[nvectors];
mas01cr@498 400 sppp = dbpointers->power_data;
mas01cr@498 401 lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET);
mas01cr@498 402 read_or_goto_error(adb->fd, dbpointers->power_data, nvectors * sizeof(double));
mas01mc@292 403 }
mas01mc@292 404
mas01cr@498 405 for(unsigned int i = 0; i < adb->header->numFiles; i++){
mas01cr@498 406 size_t track_length = (*adb->track_lengths)[i];
mas01cr@498 407 if(track_length >= sequence_length) {
mas01cr@498 408 audiodb_sequence_sum(snpp, track_length, sequence_length);
mas01cr@498 409 audiodb_sequence_sqrt(snpp, track_length, sequence_length);
mas01cr@498 410 if (using_power) {
mas01cr@498 411 audiodb_sequence_sum(sppp, track_length, sequence_length);
mas01cr@498 412 audiodb_sequence_average(sppp, track_length, sequence_length);
mas01cr@498 413 }
mas01mc@324 414 }
mas01cr@498 415 snpp += track_length;
mas01cr@498 416 if (using_power) {
mas01cr@498 417 sppp += track_length;
mas01mc@324 418 }
mas01mc@292 419 }
mas01mc@292 420
mas01cr@498 421 if (using_times) {
mas01cr@509 422 if(!(adb->header->flags & ADB_HEADER_FLAG_TIMES)) {
mas01cr@498 423 goto error;
mas01cr@498 424 }
mas01mc@292 425
mas01cr@498 426 dbpointers->mean_duration = new double[adb->header->numFiles];
mas01cr@498 427
mas01cr@498 428 times_table = (double *) malloc(2 * nvectors * sizeof(double));
mas01cr@498 429 if(!times_table) {
mas01cr@498 430 goto error;
mas01cr@498 431 }
mas01cr@498 432 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET);
mas01cr@498 433 read_or_goto_error(adb->fd, times_table, 2 * nvectors * sizeof(double));
mas01cr@498 434 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@498 435 size_t track_length = (*adb->track_lengths)[k];
mas01cr@498 436 unsigned int j;
mas01cr@498 437 dbpointers->mean_duration[k] = 0.0;
mas01cr@498 438 for(j = 0; j < track_length; j++) {
mas01cr@498 439 dbpointers->mean_duration[k] += times_table[2*j+1] - times_table[2*j];
mas01mc@292 440 }
mas01cr@498 441 dbpointers->mean_duration[k] /= j;
mas01mc@292 442 }
mas01cr@498 443
mas01cr@498 444 free(times_table);
mas01cr@498 445 times_table = NULL;
mas01mc@292 446 }
mas01cr@498 447
mas01cr@498 448 dbpointers->l2norm = dbpointers->l2norm_data;
mas01cr@498 449 dbpointers->power = dbpointers->power_data;
mas01cr@498 450 return 0;
mas01cr@498 451
mas01cr@498 452 error:
mas01cr@498 453 if(dbpointers->l2norm_data) {
mas01cr@498 454 delete [] dbpointers->l2norm_data;
mas01cr@498 455 }
mas01cr@498 456 if(dbpointers->power_data) {
mas01cr@498 457 delete [] dbpointers->power_data;
mas01cr@498 458 }
mas01cr@498 459 if(dbpointers->mean_duration) {
mas01cr@498 460 delete [] dbpointers->mean_duration;
mas01cr@498 461 }
mas01cr@498 462 if(times_table) {
mas01cr@498 463 free(times_table);
mas01cr@498 464 }
mas01cr@498 465 return 1;
mas01cr@498 466
mas01mc@292 467 }
mas01mc@292 468
mas01cr@498 469 int audiodb_query_queue_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate, double *query, adb_qpointers_internal_t *qpointers) {
mas01cr@498 470 adb_qpointers_internal_t dbpointers = {0};
mas01mc@292 471
mas01cr@498 472 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@498 473 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@239 474
mas01cr@498 475 if(qstate->exact_evaluation_queue->size() == 0) {
mas01cr@498 476 return 0;
mas01cr@239 477 }
mas01cr@239 478
mas01cr@498 479 /* We are guaranteed that the order of points is sorted by:
mas01cr@498 480 * {trackID, spos, qpos} so we can be relatively efficient in
mas01cr@498 481 * initialization of track data. We assume that points usually
mas01cr@498 482 * don't overlap, so we will use exhaustive dot product evaluation
mas01cr@498 483 * (instead of memoization of partial sums, as in query_loop()).
mas01cr@498 484 */
mas01cr@498 485 double dist;
mas01cr@498 486 double *dbdata = 0, *dbdata_pointer;
mas01cr@498 487 Uns32T npairs = qstate->exact_evaluation_queue->size();
mas01mc@527 488 #ifdef _LSH_DEBUG_
mas01mc@527 489 cout << "Num vector pairs to evaluate: " << npairs << "..." << endl;
mas01mc@527 490 cout.flush();
mas01mc@527 491 #endif
mas01mc@531 492 adb_datum_t d = {0};
mas01cr@498 493 while(npairs--) {
mas01cr@498 494 PointPair pp = qstate->exact_evaluation_queue->top();
mas01mc@539 495 maybe_delete_array(dbpointers.mean_duration);
mas01mc@539 496 if(audiodb_track_id_datum(adb, pp.trackID, &d, pp.spos, sequence_length)) {
mas01mc@539 497 delete qstate->exact_evaluation_queue;
mas01mc@539 498 delete qstate->set;
mas01mc@539 499 return 1;
mas01mc@539 500 }
mas01mc@539 501
mas01mc@539 502 if(audiodb_datum_qpointers_partial(&d, sequence_length, &dbdata, &dbdata_pointer, &dbpointers, qstate)) {
mas01mc@539 503 delete qstate->exact_evaluation_queue;
mas01mc@539 504 delete qstate->set;
mas01mc@531 505 audiodb_free_datum(&d);
mas01mc@539 506 return 1;
mas01mc@539 507 }
mas01mc@531 508
mas01cr@498 509 Uns32T qPos = (spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) ? pp.qpos : 0;
mas01cr@498 510 // Test power thresholds before computing distance
mas01mc@539 511 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[0])) &&
mas01mc@539 512 ( qPos<qpointers->nvectors-sequence_length+1 && pp.spos<(*adb->track_lengths)[pp.trackID]-sequence_length+1 ) ){
mas01mc@531 513 // Compute distance
mas01mc@539 514 dist = audiodb_dot_product(query + qPos*adb->header->dim, dbdata, adb->header->dim*sequence_length);
mas01mc@531 515 double qn = audiodb_dot_product(query + qPos*adb->header->dim, query + qPos*adb->header->dim, adb->header->dim*sequence_length);
mas01mc@539 516 double sn = audiodb_dot_product(dbdata, dbdata, adb->header->dim*sequence_length);
mas01mc@531 517 qn = sqrt(qn);
mas01mc@531 518 sn = sqrt(sn);
mas01cr@498 519 switch(spec->params.distance) {
mas01cr@498 520 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@498 521 dist = 2 - (2/(qn*sn))*dist;
mas01cr@498 522 break;
mas01cr@498 523 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@498 524 dist = qn*qn + sn*sn - 2*dist;
mas01cr@498 525 break;
mas01cr@498 526 }
mas01cr@498 527 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@509 528 dist <= (spec->refine.radius + ADB_DISTANCE_TOLERANCE)) {
mas01cr@498 529 adb_result_t r;
mas01cr@498 530 r.key = (*adb->keys)[pp.trackID].c_str();
mas01cr@498 531 r.dist = dist;
mas01cr@498 532 r.qpos = pp.qpos;
mas01cr@498 533 r.ipos = pp.spos;
mas01cr@498 534 qstate->accumulator->add_point(&r);
mas01cr@239 535 }
mas01cr@239 536 }
mas01cr@498 537 qstate->exact_evaluation_queue->pop();
mas01mc@539 538 audiodb_free_datum(&d);
mas01mc@292 539 }
mas01mc@474 540
mas01mc@315 541 // Cleanup
mas01mc@531 542 // maybe_delete_array(dbdata);
mas01mc@531 543 //maybe_delete_array(dbpointers.l2norm_data);
mas01mc@539 544 //maybe_delete_array(dbpointers.power_data);
mas01cr@509 545 maybe_delete_array(dbpointers.mean_duration);
mas01cr@498 546 delete qstate->exact_evaluation_queue;
mas01mc@529 547 delete qstate->set;
mas01cr@498 548 return 0;
mas01mc@292 549 }
mas01mc@292 550
mas01cr@498 551 int audiodb_query_loop(adb_t *adb, const adb_query_spec_t *spec, adb_qstate_internal_t *qstate) {
mas01cr@498 552
mas01cr@498 553 double *query, *query_data;
mas01cr@498 554 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0};
mas01mc@292 555
mas01cr@498 556 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@239 557
mas01cr@509 558 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 559 /* FIXME: actually it would be nice to support this mode of
mas01cr@498 560 * operation, but for now... */
mas01cr@498 561 return 1;
mas01cr@498 562 }
mas01mc@324 563
mas01cr@498 564 if(audiodb_query_spec_qpointers(adb, spec, &query_data, &query, &qpointers)) {
mas01cr@498 565 return 1;
mas01cr@498 566 }
mas01cr@239 567
mas01cr@498 568 if(audiodb_set_up_dbpointers(adb, spec, &dbpointers)) {
mas01cr@498 569 return 1;
mas01cr@498 570 }
mas01cr@239 571
mas01cr@498 572 unsigned j,k,track,trackOffset=0, HOP_SIZE = spec->refine.hopsize;
mas01cr@498 573 unsigned wL = spec->qid.sequence_length;
mas01cr@239 574 double **D = 0; // Differences query and target
mas01cr@239 575 double **DD = 0; // Matched filter distance
mas01cr@239 576
mas01cr@498 577 D = new double*[qpointers.nvectors]; // pre-allocate
mas01cr@498 578 DD = new double*[qpointers.nvectors];
mas01cr@239 579
mas01cr@239 580 off_t trackIndexOffset;
mas01cr@239 581
mas01cr@239 582 // Track loop
mas01cr@239 583 size_t data_buffer_size = 0;
mas01cr@239 584 double *data_buffer = 0;
mas01cr@498 585 lseek(adb->fd, adb->header->dataOffset, SEEK_SET);
mas01cr@239 586
mas01cr@498 587 std::set<std::string>::iterator keys_end = qstate->allowed_keys->end();
mas01cr@498 588 for(track = 0; track < adb->header->numFiles; track++) {
mas01cr@498 589 unsigned t = track;
mas01cr@498 590
mas01cr@498 591 while (qstate->allowed_keys->find((*adb->keys)[track]) == keys_end) {
mas01cr@498 592 track++;
mas01cr@498 593 if(track == adb->header->numFiles) {
mas01cr@498 594 goto loop_finish;
mas01cr@239 595 }
mas01cr@239 596 }
mas01cr@498 597 trackOffset = (*adb->track_offsets)[track];
mas01cr@498 598 if(track != t) {
mas01cr@498 599 lseek(adb->fd, adb->header->dataOffset + trackOffset, SEEK_SET);
mas01cr@498 600 }
mas01cr@498 601 trackIndexOffset = trackOffset / (adb->header->dim * sizeof(double)); // dbpointers.nvectors offset
mas01cr@239 602
mas01cr@498 603 if(audiodb_read_data(adb, adb->fd, track, &data_buffer, &data_buffer_size)) {
mas01cr@498 604 return 1;
mas01mc@292 605 }
mas01cr@498 606 if(wL <= (*adb->track_lengths)[track]) { // test for short sequences
mas01cr@498 607
mas01cr@498 608 audiodb_initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD);
mas01mc@292 609
mas01cr@498 610 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) ||
mas01cr@498 611 fabs(dbpointers.mean_duration[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) {
mas01cr@239 612
mas01cr@239 613 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@498 614 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) {
mas01cr@498 615 for(k = 0; k <= (*adb->track_lengths)[track] - wL; k += HOP_SIZE) {
mas01cr@498 616 double thisDist = 0;
mas01cr@498 617 double qn = qpointers.l2norm[j];
mas01cr@498 618 double sn = dbpointers.l2norm[trackIndexOffset + k];
mas01cr@498 619 switch(spec->params.distance) {
mas01cr@498 620 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@498 621 thisDist = 2-(2/(qn*sn))*DD[j][k];
mas01cr@498 622 break;
mas01cr@498 623 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@498 624 thisDist = qn*qn + sn*sn - 2*DD[j][k];
mas01cr@498 625 break;
mas01cr@498 626 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@498 627 thisDist = DD[j][k];
mas01cr@498 628 break;
mas01cr@498 629 }
mas01cr@239 630 // Power test
mas01cr@498 631 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], dbpointers.power[trackIndexOffset + k])) {
mas01cr@239 632 // radius test
mas01cr@498 633 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@509 634 thisDist <= (spec->refine.radius + ADB_DISTANCE_TOLERANCE)) {
mas01cr@498 635 adb_result_t r;
mas01cr@498 636 r.key = (*adb->keys)[track].c_str();
mas01cr@498 637 r.dist = thisDist;
mas01cr@498 638 if(spec->qid.flags & ADB_QID_FLAG_EXHAUSTIVE) {
mas01cr@498 639 r.qpos = j;
mas01cr@498 640 } else {
mas01cr@498 641 r.qpos = spec->qid.sequence_start;
mas01cr@498 642 }
mas01cr@498 643 r.ipos = k;
mas01cr@498 644 qstate->accumulator->add_point(&r);
mas01cr@239 645 }
mas01cr@239 646 }
mas01cr@239 647 }
mas01cr@239 648 }
mas01cr@239 649 } // Duration match
mas01cr@498 650 audiodb_delete_arrays(track, qpointers.nvectors, D, DD);
mas01cr@239 651 }
mas01cr@239 652 }
mas01cr@239 653
mas01cr@498 654 loop_finish:
mas01cr@498 655
mas01cr@239 656 free(data_buffer);
mas01cr@239 657
mas01cr@239 658 // Clean up
mas01cr@239 659 if(query_data)
mas01cr@239 660 delete[] query_data;
mas01cr@498 661 if(qpointers.l2norm_data)
mas01cr@498 662 delete[] qpointers.l2norm_data;
mas01cr@498 663 if(qpointers.power_data)
mas01cr@498 664 delete[] qpointers.power_data;
mas01cr@498 665 if(qpointers.mean_duration)
mas01cr@498 666 delete[] qpointers.mean_duration;
mas01cr@498 667 if(dbpointers.power_data)
mas01cr@498 668 delete[] dbpointers.power_data;
mas01cr@498 669 if(dbpointers.l2norm_data)
mas01cr@498 670 delete[] dbpointers.l2norm_data;
mas01cr@239 671 if(D)
mas01cr@239 672 delete[] D;
mas01cr@239 673 if(DD)
mas01cr@239 674 delete[] DD;
mas01cr@498 675 if(dbpointers.mean_duration)
mas01cr@498 676 delete[] dbpointers.mean_duration;
mas01cr@498 677
mas01cr@498 678 return 0;
mas01cr@239 679 }