annotate query.cpp @ 443:cb44e57a96fa api-inversion

New (internal) function audiodb_query_spec_qpointers() Intended to replace audioDB::set_up_query and audioDB::set_up_query_from_key; it isn't yet called, but I want to save my work now before messing with stuff, because the function itself is long and complicated: it handles three separate ways of getting data together (datum provided, key provided + LARGE_ADB, key provided + !LARGE_ADB). There is insufficient error checking as yet, but hey.
author mas01cr
date Wed, 24 Dec 2008 10:56:33 +0000
parents 5294ea1b1bf2
children 4fe90fd568fc
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@239 2 #include "reporter.h"
mas01cr@239 3
mas01cr@422 4 #include "audioDB-internals.h"
mas01cr@422 5 #include "accumulators.h"
mas01cr@422 6
mas01cr@425 7 static bool audiodb_powers_acceptable(adb_query_refine_t *r, double p1, double p2) {
mas01cr@425 8 if (r->flags & ADB_REFINE_ABSOLUTE_THRESHOLD) {
mas01cr@425 9 if ((p1 < r->absolute_threshold) || (p2 < r->absolute_threshold)) {
mas01cr@239 10 return false;
mas01cr@239 11 }
mas01cr@239 12 }
mas01cr@425 13 if (r->flags & ADB_REFINE_RELATIVE_THRESHOLD) {
mas01cr@425 14 if (fabs(p1-p2) > fabs(r->relative_threshold)) {
mas01cr@239 15 return false;
mas01cr@239 16 }
mas01cr@239 17 }
mas01cr@239 18 return true;
mas01cr@239 19 }
mas01cr@239 20
mas01cr@239 21 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
mas01cr@425 22
mas01cr@431 23 // init database tables and dbH first
mas01cr@431 24 if(query_from_key)
mas01cr@431 25 initTables(dbName);
mas01cr@431 26 else
mas01cr@431 27 initTables(dbName, inFile);
mas01cr@431 28
mas01cr@435 29 adb_query_spec_t qspec;
mas01cr@435 30
mas01cr@435 31 qspec.refine.flags = 0;
mas01cr@425 32 /* FIXME: trackFile / ADB_REFINE_KEYLIST */
mas01cr@425 33 if(radius) {
mas01cr@435 34 qspec.refine.flags |= ADB_REFINE_RADIUS;
mas01cr@435 35 qspec.refine.radius = radius;
mas01cr@425 36 }
mas01cr@425 37 if(use_absolute_threshold) {
mas01cr@435 38 qspec.refine.flags |= ADB_REFINE_ABSOLUTE_THRESHOLD;
mas01cr@435 39 qspec.refine.absolute_threshold = absolute_threshold;
mas01cr@425 40 }
mas01cr@425 41 if(use_relative_threshold) {
mas01cr@435 42 qspec.refine.flags |= ADB_REFINE_RELATIVE_THRESHOLD;
mas01cr@435 43 qspec.refine.relative_threshold = relative_threshold;
mas01cr@425 44 }
mas01cr@425 45 if(usingTimes) {
mas01cr@435 46 qspec.refine.flags |= ADB_REFINE_DURATION_RATIO;
mas01cr@435 47 qspec.refine.duration_ratio = timesTol;
mas01cr@425 48 }
mas01cr@439 49 /* FIXME: not sure about this any more; maybe it belongs in
mas01cr@439 50 query_id? Or maybe we just don't need a flag for it? */
mas01cr@439 51 qspec.refine.hopsize = sequenceHop;
mas01cr@425 52 if(sequenceHop != 1) {
mas01cr@435 53 qspec.refine.flags |= ADB_REFINE_HOP_SIZE;
mas01cr@425 54 }
mas01cr@425 55
mas01cr@435 56 /* FIXME qspec.qid.datum */
mas01cr@435 57 qspec.qid.sequence_length = sequenceLength;
mas01cr@435 58 qspec.qid.flags = usingQueryPoint ? 0 : ADB_QUERY_ID_FLAG_EXHAUSTIVE;
mas01cr@435 59 qspec.qid.sequence_start = queryPoint;
mas01cr@435 60
mas01cr@431 61 switch(queryType) {
mas01cr@431 62 case O2_POINT_QUERY:
mas01cr@435 63 qspec.qid.sequence_length = 1;
mas01cr@435 64 qspec.params.accumulation = ADB_ACCUMULATION_DB;
mas01cr@435 65 qspec.params.distance = ADB_DISTANCE_DOT_PRODUCT;
mas01cr@435 66 qspec.params.npoints = pointNN;
mas01cr@435 67 qspec.params.ntracks = 0;
mas01cr@431 68 reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN);
mas01cr@431 69 break;
mas01cr@431 70 case O2_TRACK_QUERY:
mas01cr@435 71 qspec.qid.sequence_length = 1;
mas01cr@435 72 qspec.params.accumulation = ADB_ACCUMULATION_PER_TRACK;
mas01cr@435 73 qspec.params.distance = ADB_DISTANCE_DOT_PRODUCT;
mas01cr@435 74 qspec.params.npoints = pointNN;
mas01cr@435 75 qspec.params.ntracks = trackNN;
mas01cr@431 76 reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@431 77 break;
mas01cr@431 78 case O2_SEQUENCE_QUERY:
mas01cr@431 79 case O2_N_SEQUENCE_QUERY:
mas01cr@435 80 qspec.params.accumulation = ADB_ACCUMULATION_PER_TRACK;
mas01cr@435 81 qspec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED;
mas01cr@435 82 qspec.params.npoints = pointNN;
mas01cr@435 83 qspec.params.ntracks = trackNN;
mas01cr@431 84 switch(queryType) {
mas01cr@431 85 case O2_SEQUENCE_QUERY:
mas01cr@435 86 if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) {
mas01cr@431 87 reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@435 88 } else if (index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)) {
mas01cr@435 89 char* indexName = index_get_name(adb->path, qspec.refine.radius, qspec.qid.sequence_length);
mas01cr@431 90 lsh = index_allocate(indexName, false);
mas01cr@431 91 reporter = new trackSequenceQueryRadReporter(trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
mas01cr@431 92 delete[] indexName;
mas01cr@431 93 } else {
mas01cr@431 94 reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles);
mas01cr@431 95 }
mas01cr@431 96 break;
mas01cr@431 97 case O2_N_SEQUENCE_QUERY:
mas01cr@435 98 if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) {
mas01cr@431 99 reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@435 100 } else if (index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)){
mas01cr@435 101 char* indexName = index_get_name(adb->path, qspec.refine.radius, qspec.qid.sequence_length);
mas01cr@431 102 lsh = index_allocate(indexName, false);
mas01cr@431 103 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
mas01cr@431 104 delete[] indexName;
mas01cr@431 105 } else {
mas01cr@431 106 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles);
mas01cr@431 107 }
mas01cr@431 108 break;
mas01cr@431 109 }
mas01cr@431 110 break;
mas01cr@431 111 case O2_ONE_TO_ONE_N_SEQUENCE_QUERY:
mas01cr@435 112 qspec.params.accumulation = ADB_ACCUMULATION_ONE_TO_ONE;
mas01cr@435 113 qspec.params.distance = ADB_DISTANCE_EUCLIDEAN_NORMED;
mas01cr@435 114 qspec.params.npoints = 0;
mas01cr@435 115 qspec.params.ntracks = 0;
mas01cr@431 116 break;
mas01cr@431 117 default:
mas01cr@431 118 error("unrecognized queryType");
mas01cr@431 119 }
mas01mc@292 120
mas01mc@292 121 // keyKeyPos requires dbH to be initialized
mas01cr@430 122 if(query_from_key && (!key || (query_from_key_index = audiodb_key_index(adb, key)) == (uint32_t) -1))
mas01cr@430 123 error("Query key not found", key);
mas01cr@431 124
mas01cr@435 125 switch(qspec.params.distance) {
mas01cr@431 126 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@435 127 switch(qspec.params.accumulation) {
mas01cr@431 128 case ADB_ACCUMULATION_DB:
mas01cr@435 129 accumulator = new DBAccumulator<adb_result_dist_gt>(qspec.params.npoints);
mas01cr@431 130 break;
mas01cr@431 131 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@435 132 accumulator = new PerTrackAccumulator<adb_result_dist_gt>(qspec.params.npoints, qspec.params.ntracks);
mas01cr@431 133 break;
mas01cr@431 134 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@431 135 accumulator = new NearestAccumulator<adb_result_dist_gt>();
mas01cr@431 136 break;
mas01cr@431 137 default:
mas01cr@431 138 error("unknown accumulation");
mas01cr@239 139 }
mas01cr@239 140 break;
mas01cr@431 141 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@431 142 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@435 143 switch(qspec.params.accumulation) {
mas01cr@431 144 case ADB_ACCUMULATION_DB:
mas01cr@435 145 accumulator = new DBAccumulator<adb_result_dist_lt>(qspec.params.npoints);
mas01cr@431 146 break;
mas01cr@431 147 case ADB_ACCUMULATION_PER_TRACK:
mas01cr@435 148 accumulator = new PerTrackAccumulator<adb_result_dist_lt>(qspec.params.npoints, qspec.params.ntracks);
mas01cr@431 149 break;
mas01cr@431 150 case ADB_ACCUMULATION_ONE_TO_ONE:
mas01cr@431 151 accumulator = new NearestAccumulator<adb_result_dist_lt>();
mas01cr@431 152 break;
mas01cr@431 153 default:
mas01cr@431 154 error("unknown accumulation");
mas01mc@263 155 }
mas01mc@263 156 break;
mas01cr@239 157 default:
mas01cr@431 158 error("unknown distance function");
mas01cr@431 159 }
mas01cr@431 160
mas01mc@292 161 // Test for index (again) here
mas01cr@435 162 if((qspec.refine.flags & ADB_REFINE_RADIUS) && index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)){
mas01cr@436 163 VERB_LOG(1, "Calling indexed query on database %s, radius=%f, sequence_length=%d\n", adb->path, qspec.refine.radius, qspec.qid.sequence_length);
mas01cr@435 164 index_query_loop(&qspec, dbName, query_from_key_index);
mas01mc@329 165 }
mas01mc@329 166 else{
mas01mc@329 167 VERB_LOG(1, "Calling brute-force query on database %s\n", dbName);
mas01cr@435 168 query_loop(&qspec, query_from_key_index);
mas01mc@329 169 }
mas01mc@292 170
mas01cr@423 171 adb_query_results_t *rs = accumulator->get_points();
mas01cr@423 172 for(unsigned int k = 0; k < rs->nresults; k++) {
mas01cr@423 173 adb_result_t r = rs->results[k];
mas01cr@430 174 reporter->add_point(audiodb_key_index(adb, r.key), r.qpos, r.ipos, r.dist);
mas01cr@423 175 }
mas01cr@423 176
mas01mc@292 177 reporter->report(fileTable, adbQueryResponse);
mas01cr@239 178 }
mas01cr@239 179
mas01cr@439 180 static void audiodb_initialize_arrays(adb_t *adb, adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
mas01cr@239 181 unsigned int j, k, l, w;
mas01cr@239 182 double *dp, *qp, *sp;
mas01cr@239 183
mas01cr@439 184 const unsigned HOP_SIZE = spec->refine.hopsize;
mas01cr@435 185 const unsigned wL = spec->qid.sequence_length;
mas01cr@239 186
mas01cr@239 187 for(j = 0; j < numVectors; j++) {
mas01cr@239 188 // Sum products matrix
mas01cr@433 189 D[j] = new double[(*adb->track_lengths)[track]];
mas01cr@239 190 assert(D[j]);
mas01cr@239 191 // Matched filter matrix
mas01cr@433 192 DD[j]=new double[(*adb->track_lengths)[track]];
mas01cr@239 193 assert(DD[j]);
mas01cr@239 194 }
mas01cr@239 195
mas01cr@239 196 // Dot product
mas01cr@239 197 for(j = 0; j < numVectors; j++)
mas01cr@433 198 for(k = 0; k < (*adb->track_lengths)[track]; k++){
mas01cr@439 199 qp = query + j * adb->header->dim;
mas01cr@439 200 sp = data_buffer + k * adb->header->dim;
mas01cr@239 201 DD[j][k] = 0.0; // Initialize matched filter array
mas01cr@239 202 dp = &D[j][k]; // point to correlation cell j,k
mas01cr@239 203 *dp = 0.0; // initialize correlation cell
mas01cr@439 204 l = adb->header->dim; // size of vectors
mas01cr@239 205 while(l--)
mas01cr@239 206 *dp += *qp++ * *sp++;
mas01cr@239 207 }
mas01cr@239 208
mas01cr@239 209 // Matched Filter
mas01cr@239 210 // HOP SIZE == 1
mas01cr@239 211 double* spd;
mas01cr@239 212 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop
mas01cr@239 213 for(w = 0; w < wL; w++) {
mas01cr@239 214 for(j = 0; j < numVectors - w; j++) {
mas01cr@239 215 sp = DD[j];
mas01cr@239 216 spd = D[j+w] + w;
mas01cr@433 217 k = (*adb->track_lengths)[track] - w;
mas01mc@292 218 while(k--)
mas01mc@292 219 *sp++ += *spd++;
mas01cr@239 220 }
mas01cr@239 221 }
mas01cr@239 222 } else { // HOP_SIZE != 1
mas01cr@239 223 for(w = 0; w < wL; w++) {
mas01cr@239 224 for(j = 0; j < numVectors - w; j += HOP_SIZE) {
mas01cr@239 225 sp = DD[j];
mas01cr@239 226 spd = D[j+w]+w;
mas01cr@433 227 for(k = 0; k < (*adb->track_lengths)[track] - w; k += HOP_SIZE) {
mas01cr@239 228 *sp += *spd;
mas01cr@239 229 sp += HOP_SIZE;
mas01cr@239 230 spd += HOP_SIZE;
mas01cr@239 231 }
mas01cr@239 232 }
mas01cr@239 233 }
mas01cr@239 234 }
mas01cr@239 235 }
mas01cr@239 236
mas01cr@433 237 static void audiodb_delete_arrays(int track, unsigned int numVectors, double **D, double **DD) {
mas01cr@239 238 if(D != NULL) {
mas01cr@239 239 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 240 delete[] D[j];
mas01cr@239 241 }
mas01cr@239 242 }
mas01cr@239 243 if(DD != NULL) {
mas01cr@239 244 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 245 delete[] DD[j];
mas01cr@239 246 }
mas01cr@239 247 }
mas01cr@239 248 }
mas01cr@239 249
mas01cr@433 250 int audiodb_read_data(adb_t *adb, int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) {
mas01cr@433 251 uint32_t track_length = (*adb->track_lengths)[track];
mas01cr@433 252 size_t track_size = track_length * sizeof(double) * adb->header->dim;
mas01cr@433 253 if (track_size > *data_buffer_size_p) {
mas01cr@239 254 if(*data_buffer_p) {
mas01cr@239 255 free(*data_buffer_p);
mas01cr@239 256 }
mas01cr@239 257 {
mas01cr@433 258 *data_buffer_size_p = track_size;
mas01cr@433 259 void *tmp = malloc(track_size);
mas01cr@239 260 if (tmp == NULL) {
mas01cr@433 261 goto error;
mas01cr@239 262 }
mas01cr@239 263 *data_buffer_p = (double *) tmp;
mas01cr@239 264 }
mas01cr@239 265 }
mas01cr@239 266
mas01cr@433 267 read_or_goto_error(trkfid, *data_buffer_p, track_size);
mas01cr@433 268 return 0;
mas01cr@433 269
mas01cr@433 270 error:
mas01cr@433 271 return 1;
mas01cr@239 272 }
mas01cr@239 273
mas01cr@405 274 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
mas01cr@405 275 assert(usingTimes);
mas01cr@405 276
mas01cr@405 277 unsigned numtimes = 0;
mas01cr@405 278
mas01cr@405 279 if(!timesFile->is_open()) {
mas01cr@405 280 error("problem opening times file on timestamped database", timesFileName);
mas01cr@405 281 }
mas01cr@405 282
mas01cr@405 283 double timepoint, next;
mas01cr@405 284 *timesFile >> timepoint;
mas01cr@405 285 if (timesFile->eof()) {
mas01cr@405 286 error("no entries in times file", timesFileName);
mas01cr@405 287 }
mas01cr@405 288 numtimes++;
mas01cr@405 289 do {
mas01cr@405 290 *timesFile >> next;
mas01cr@405 291 if (timesFile->eof()) {
mas01cr@405 292 break;
mas01cr@405 293 }
mas01cr@405 294 numtimes++;
mas01cr@405 295 timesdata[0] = timepoint;
mas01cr@405 296 timepoint = (timesdata[1] = next);
mas01cr@405 297 timesdata += 2;
mas01cr@405 298 } while (numtimes < numVectors + 1);
mas01cr@405 299
mas01cr@405 300 if (numtimes < numVectors + 1) {
mas01cr@405 301 error("too few timepoints in times file", timesFileName);
mas01cr@405 302 }
mas01cr@405 303
mas01cr@405 304 *timesFile >> next;
mas01cr@405 305 if (!timesFile->eof()) {
mas01cr@405 306 error("too many timepoints in times file", timesFileName);
mas01cr@405 307 }
mas01cr@405 308 }
mas01cr@405 309
mas01cr@443 310 static int audiodb_query_spec_qpointers(adb_t *adb, adb_query_spec_t *spec, double **vector_data, double **vector, adb_qpointers_internal_t *qpointers) {
mas01cr@443 311 adb_datum_t *datum;
mas01cr@443 312 adb_datum_t d = {0};
mas01cr@443 313 uint32_t nvectors;
mas01cr@443 314 uint32_t sequence_length;
mas01cr@443 315 uint32_t sequence_start;
mas01cr@443 316
mas01cr@443 317 datum = spec->qid.datum;
mas01cr@443 318 sequence_length = spec->qid.sequence_length;
mas01cr@443 319 sequence_start = spec->qid.sequence_start;
mas01cr@443 320
mas01cr@443 321 if(datum->data) {
mas01cr@443 322 if(datum->dim != adb->header->dim) {
mas01cr@443 323 return 1;
mas01cr@443 324 }
mas01cr@443 325 /* initialize d, and mark that nothing needs freeing later. */
mas01cr@443 326 d = *datum;
mas01cr@443 327 datum = &d;
mas01cr@443 328 } else if (datum->key) {
mas01cr@443 329 std::map<std::string,uint32_t>::iterator it;
mas01cr@443 330 it = adb->keys->find(datum->key);
mas01cr@443 331 if(it == adb->keys->end()) {
mas01cr@443 332 return 1;
mas01cr@443 333 }
mas01cr@443 334 uint32_t track_id = (*it).second;
mas01cr@443 335 off_t track_offset = (*adb->track_offsets)[track_id];
mas01cr@443 336
mas01cr@443 337 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
mas01cr@443 338 /* create a reference/insert, then use adb_insert_create_datum() */
mas01cr@443 339 adb_reference_t reference = {0};
mas01cr@443 340 char features[MAXSTR], power[MAXSTR], times[MAXSTR];
mas01cr@443 341 lseek(adb->fd, adb->header->dataOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@443 342 /* FIXME: learn not to worry and love the bomb^Wbuffer overflow */
mas01cr@443 343 read(adb->fd, features, MAXSTR);
mas01cr@443 344 reference.features = features;
mas01cr@443 345 if(adb->header->flags & O2_FLAG_POWER) {
mas01cr@443 346 lseek(adb->fd, adb->header->powerTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@443 347 read(adb->fd, power, MAXSTR);
mas01cr@443 348 reference.power = power;
mas01cr@443 349 }
mas01cr@443 350 if(adb->header->flags & O2_FLAG_TIMES) {
mas01cr@443 351 lseek(adb->fd, adb->header->timesTableOffset + track_id * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@443 352 read(adb->fd, times, MAXSTR);
mas01cr@443 353 reference.times = times;
mas01cr@443 354 }
mas01cr@443 355 audiodb_insert_create_datum(&reference, &d);
mas01cr@443 356 } else {
mas01cr@443 357 /* initialize from sources of data that we already have */
mas01cr@443 358 d.nvectors = (*adb->track_lengths)[track_id];
mas01cr@443 359 d.dim = adb->header->dim;
mas01cr@443 360 d.key = datum->key;
mas01cr@443 361 /* read out stuff from the database tables */
mas01cr@443 362 d.data = (double *) malloc(d.nvectors * d.dim * sizeof(double));
mas01cr@443 363 lseek(adb->fd, adb->header->dataOffset + track_offset, SEEK_SET);
mas01cr@443 364 read(adb->fd, d.data, d.nvectors * d.dim * sizeof(double));
mas01cr@443 365 if(adb->header->flags & O2_FLAG_POWER) {
mas01cr@443 366 d.power = (double *) malloc(d.nvectors * sizeof(double));
mas01cr@443 367 lseek(adb->fd, adb->header->powerTableOffset + track_offset / d.dim, SEEK_SET);
mas01cr@443 368 read(adb->fd, d.power, d.nvectors * sizeof(double));
mas01cr@443 369 }
mas01cr@443 370 if(adb->header->flags & O2_FLAG_TIMES) {
mas01cr@443 371 d.times = (double *) malloc(2 * d.nvectors * sizeof(double));
mas01cr@443 372 lseek(adb->fd, adb->header->timesTableOffset + track_offset / d.dim, SEEK_SET);
mas01cr@443 373 read(adb->fd, d.times, 2 * d.nvectors * sizeof(double));
mas01cr@443 374 }
mas01cr@443 375 }
mas01cr@443 376 } else {
mas01cr@443 377 return 1;
mas01cr@443 378 }
mas01cr@443 379
mas01cr@443 380 /* Now we have a full(ish) datum, compute all the qpointery stuff
mas01cr@443 381 that we care about (l2norm/power/mean duration). (This bit could
mas01cr@443 382 conceivably become a new function) */
mas01cr@443 383 nvectors = d.nvectors;
mas01cr@443 384 if(sequence_start > nvectors - sequence_length) {
mas01cr@443 385 /* is there something to free? goto error */
mas01cr@443 386 return 1;
mas01cr@443 387 }
mas01cr@443 388
mas01cr@443 389 qpointers->nvectors = nvectors;
mas01cr@443 390
mas01cr@443 391 size_t vector_size = nvectors * sizeof(double) * d.dim;
mas01cr@443 392 *vector_data = (double *) malloc(vector_size);
mas01cr@443 393 memcpy(*vector_data, d.data, vector_size);
mas01cr@443 394
mas01cr@443 395 qpointers->l2norm_data = (double *) malloc(vector_size / d.dim);
mas01cr@443 396 audiodb_l2norm_buffer(*vector_data, d.dim, nvectors, qpointers->l2norm_data);
mas01cr@443 397 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@443 398 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@443 399
mas01cr@443 400 if(d.power) {
mas01cr@443 401 qpointers->power_data = (double *) malloc(vector_size / d.dim);
mas01cr@443 402 memcpy(qpointers->power_data, d.power, vector_size / d.dim);
mas01cr@443 403 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
mas01cr@443 404 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
mas01cr@443 405 }
mas01cr@443 406
mas01cr@443 407 if(d.times) {
mas01cr@443 408 qpointers->mean_duration = (double *) calloc(1, sizeof(double));
mas01cr@443 409 for(unsigned int k = 0; k < nvectors; k++) {
mas01cr@443 410 *qpointers->mean_duration += d.times[2*k+1] - d.times[2*k];
mas01cr@443 411 }
mas01cr@443 412 *qpointers->mean_duration /= nvectors;
mas01cr@443 413 }
mas01cr@443 414
mas01cr@443 415
mas01cr@443 416 /* Finally, set up the moving qpointers. */
mas01cr@443 417 if(spec->qid.flags & ADB_QUERY_ID_FLAG_EXHAUSTIVE) {
mas01cr@443 418 *vector = *vector_data;
mas01cr@443 419 qpointers->l2norm = qpointers->l2norm_data;
mas01cr@443 420 qpointers->power = qpointers->power_data;
mas01cr@443 421 } else {
mas01cr@443 422 *vector = *vector_data + spec->qid.sequence_start * d.dim;
mas01cr@443 423 qpointers->l2norm = qpointers->l2norm_data + spec->qid.sequence_start;
mas01cr@443 424 qpointers->power = qpointers->power_data + spec->qid.sequence_start;
mas01cr@443 425 }
mas01cr@443 426
mas01cr@443 427
mas01cr@443 428 /* Clean up: free any bits of datum that we have ourselves
mas01cr@443 429 * allocated. */
mas01cr@443 430 if(datum != &d) {
mas01cr@443 431 audiodb_free_datum(&d);
mas01cr@443 432 }
mas01cr@443 433 }
mas01cr@443 434
mas01cr@239 435 // These names deserve some unpicking. The names starting with a "q"
mas01cr@239 436 // are pointers to the query, norm and power vectors; the names
mas01cr@239 437 // starting with "v" are things that will end up pointing to the
mas01cr@239 438 // actual query point's information. -- CSR, 2007-12-05
mas01cr@437 439 void audioDB::set_up_query(adb_query_spec_t *spec, double **qp, double **vqp, adb_qpointers_internal_t *qpointers) {
mas01cr@437 440 uint32_t nvectors = (statbuf.st_size - sizeof(int)) / (dbH->dim * sizeof(double));
mas01cr@437 441 qpointers->nvectors = nvectors;
mas01mc@292 442
mas01cr@435 443 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@239 444 if(!(dbH->flags & O2_FLAG_L2NORM)) {
mas01cr@239 445 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01cr@239 446 }
mas01cr@239 447
mas01cr@437 448 if(nvectors < sequence_length) {
mas01cr@239 449 error("Query shorter than requested sequence length", "maybe use -l");
mas01cr@239 450 }
mas01cr@239 451
mas01cr@239 452 VERB_LOG(1, "performing norms... ");
mas01cr@239 453
mas01cr@437 454 *qp = new double[nvectors * dbH->dim];
mas01cr@437 455 memcpy(*qp, indata+sizeof(int), nvectors * dbH->dim * sizeof(double));
mas01cr@437 456 qpointers->l2norm_data = new double[nvectors];
mas01cr@437 457 audiodb_l2norm_buffer(*qp, dbH->dim, nvectors, qpointers->l2norm_data);
mas01cr@239 458
mas01cr@437 459 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@437 460 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@239 461
mas01cr@239 462 if (usingPower) {
mas01cr@437 463 qpointers->power_data = new double[nvectors];
mas01cr@239 464 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
mas01cr@239 465 error("error seeking to data", powerFileName, "lseek");
mas01cr@239 466 }
mas01cr@437 467 int count = read(powerfd, qpointers->power_data, nvectors * sizeof(double));
mas01cr@239 468 if (count == -1) {
mas01cr@239 469 error("error reading data", powerFileName, "read");
mas01cr@239 470 }
mas01cr@437 471 if ((unsigned) count != nvectors * sizeof(double)) {
mas01cr@239 472 error("short read", powerFileName);
mas01cr@239 473 }
mas01cr@239 474
mas01cr@437 475 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
mas01cr@437 476 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
mas01cr@239 477 }
mas01cr@239 478
mas01cr@239 479 if (usingTimes) {
mas01cr@239 480 unsigned int k;
mas01cr@437 481 qpointers->mean_duration = new double[1];
mas01cr@437 482 *qpointers->mean_duration = 0.0;
mas01cr@437 483 double *querydurs = new double[nvectors];
mas01cr@437 484 double *timesdata = new double[2*nvectors];
mas01cr@437 485 insertTimeStamps(nvectors, timesFile, timesdata);
mas01cr@437 486 for(k = 0; k < nvectors; k++) {
mas01cr@239 487 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01cr@437 488 *qpointers->mean_duration += querydurs[k];
mas01cr@239 489 }
mas01cr@437 490 *qpointers->mean_duration /= k;
mas01cr@239 491
mas01cr@437 492 VERB_LOG(1, "mean query file duration: %f\n", *qpointers->mean_duration);
mas01cr@239 493
mas01cr@239 494 delete [] querydurs;
mas01cr@239 495 delete [] timesdata;
mas01cr@239 496 }
mas01cr@239 497
mas01cr@239 498 // Defaults, for exhaustive search (!usingQueryPoint)
mas01cr@239 499 *vqp = *qp;
mas01cr@437 500 qpointers->l2norm = qpointers->l2norm_data;
mas01cr@437 501 qpointers->power = qpointers->power_data;
mas01cr@239 502
mas01cr@239 503 if(usingQueryPoint) {
mas01cr@437 504 if( !(queryPoint < nvectors && queryPoint < nvectors - sequence_length + 1) ) {
mas01cr@437 505 error("queryPoint >= nvectors-sequence_length+1 in query");
mas01cr@239 506 } else {
mas01cr@239 507 VERB_LOG(1, "query point: %u\n", queryPoint);
mas01cr@239 508 *vqp = *qp + queryPoint * dbH->dim;
mas01cr@437 509 qpointers->l2norm = qpointers->l2norm_data + queryPoint;
mas01cr@239 510 if (usingPower) {
mas01cr@437 511 qpointers->power = qpointers->power_data + queryPoint;
mas01cr@239 512 }
mas01cr@437 513 qpointers->nvectors = sequence_length;
mas01cr@239 514 }
mas01cr@239 515 }
mas01cr@239 516 }
mas01cr@239 517
mas01mc@292 518 // Does the same as set_up_query(...) but from database features instead of from a file
mas01mc@292 519 // Constructs the same outputs as set_up_query
mas01cr@437 520 void audioDB::set_up_query_from_key(adb_query_spec_t *spec, double **qp, double **vqp, adb_qpointers_internal_t *qpointers, Uns32T queryIndex) {
mas01cr@435 521 uint32_t sequence_length = spec->qid.sequence_length;
mas01mc@292 522 if(!trackTable)
mas01mc@292 523 error("trackTable not initialized","set_up_query_from_key");
mas01mc@292 524
mas01mc@292 525 if(!(dbH->flags & O2_FLAG_L2NORM)) {
mas01mc@292 526 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01mc@292 527 }
mas01mc@292 528
mas01mc@292 529 if(dbH->flags & O2_FLAG_POWER)
mas01mc@292 530 usingPower = true;
mas01mc@292 531
mas01mc@292 532 if(dbH->flags & O2_FLAG_TIMES)
mas01mc@292 533 usingTimes = true;
mas01mc@292 534
mas01cr@437 535 uint32_t nvectors = trackTable[queryIndex];
mas01cr@437 536 qpointers->nvectors = nvectors;
mas01cr@437 537 if(nvectors < sequence_length) {
mas01mc@292 538 error("Query shorter than requested sequence length", "maybe use -l");
mas01mc@292 539 }
mas01mc@292 540
mas01mc@292 541 VERB_LOG(1, "performing norms... ");
mas01mc@292 542
mas01mc@324 543 // For LARGE_ADB load query features from file
mas01mc@324 544 if( dbH->flags & O2_FLAG_LARGE_ADB ){
mas01mc@324 545 if(infid>0)
mas01mc@324 546 close(infid);
mas01mc@324 547 char* prefixedString = new char[O2_MAXFILESTR];
mas01mc@324 548 char* tmpStr = prefixedString;
mas01mc@324 549 strncpy(prefixedString, featureFileNameTable+queryIndex*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
mas01mc@324 550 prefix_name(&prefixedString, adb_feature_root);
mas01mc@324 551 if(tmpStr!=prefixedString)
mas01mc@324 552 delete[] tmpStr;
mas01mc@324 553 initInputFile(prefixedString, false); // nommap, file pointer at correct position
mas01mc@324 554 size_t allocatedSize = 0;
mas01cr@433 555 if(audiodb_read_data(adb, infid, queryIndex, qp, &allocatedSize))
mas01cr@433 556 error("failed to read data"); // over-writes qp and allocatedSize
mas01mc@324 557 // Consistency check on allocated memory and query feature size
mas01cr@437 558 if(nvectors*sizeof(double)*dbH->dim != allocatedSize)
mas01mc@324 559 error("Query memory allocation failed consitency check","set_up_query_from_key");
mas01mc@324 560 // Allocated and calculate auxillary sequences: l2norm and power
mas01cr@437 561 init_track_aux_data(queryIndex, *qp, &qpointers->l2norm_data, &qpointers->l2norm, &qpointers->power_data, &qpointers->power);
mas01mc@324 562 }
mas01mc@324 563 else{ // Load from self-contained ADB database
mas01mc@324 564 // Read query feature vectors from database
mas01mc@324 565 *qp = NULL;
mas01mc@324 566 lseek(dbfid, dbH->dataOffset + trackOffsetTable[queryIndex] * sizeof(double), SEEK_SET);
mas01mc@324 567 size_t allocatedSize = 0;
mas01cr@433 568 if(audiodb_read_data(adb, dbfid, queryIndex, qp, &allocatedSize))
mas01cr@433 569 error("failed to read data");
mas01mc@324 570 // Consistency check on allocated memory and query feature size
mas01cr@437 571 if(nvectors*sizeof(double)*dbH->dim != allocatedSize)
mas01mc@324 572 error("Query memory allocation failed consitency check","set_up_query_from_key");
mas01mc@324 573
mas01mc@324 574 Uns32T trackIndexOffset = trackOffsetTable[queryIndex]/dbH->dim; // Convert num data elements to num vectors
mas01mc@324 575 // Copy L2 norm partial-sum coefficients
mas01cr@437 576 assert(qpointers->l2norm_data = new double[nvectors]);
mas01cr@437 577 memcpy(qpointers->l2norm_data, l2normTable+trackIndexOffset, nvectors*sizeof(double));
mas01cr@437 578 audiodb_sequence_sum(qpointers->l2norm_data, nvectors, sequence_length);
mas01cr@437 579 audiodb_sequence_sqrt(qpointers->l2norm_data, nvectors, sequence_length);
mas01mc@324 580
mas01mc@324 581 if( usingPower ){
mas01mc@324 582 // Copy Power partial-sum coefficients
mas01cr@437 583 assert(qpointers->power_data = new double[nvectors]);
mas01cr@437 584 memcpy(qpointers->power_data, powerTable+trackIndexOffset, nvectors*sizeof(double));
mas01cr@437 585 audiodb_sequence_sum(qpointers->power_data, nvectors, sequence_length);
mas01cr@437 586 audiodb_sequence_average(qpointers->power_data, nvectors, sequence_length);
mas01mc@324 587 }
mas01mc@324 588
mas01mc@324 589 if (usingTimes) {
mas01mc@324 590 unsigned int k;
mas01cr@437 591 qpointers->mean_duration = new double[1];
mas01cr@437 592 *qpointers->mean_duration = 0.0;
mas01cr@437 593 double *querydurs = new double[nvectors];
mas01cr@437 594 double *timesdata = new double[nvectors*2];
mas01mc@324 595 assert(querydurs && timesdata);
mas01cr@437 596 memcpy(timesdata, timesTable+trackIndexOffset, nvectors*sizeof(double));
mas01cr@437 597 for(k = 0; k < nvectors; k++) {
mas01mc@324 598 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01cr@437 599 *qpointers->mean_duration += querydurs[k];
mas01mc@324 600 }
mas01cr@437 601 *qpointers->mean_duration /= k;
mas01mc@324 602
mas01cr@437 603 VERB_LOG(1, "mean query file duration: %f\n", *qpointers->mean_duration);
mas01mc@324 604
mas01mc@324 605 delete [] querydurs;
mas01mc@324 606 delete [] timesdata;
mas01mc@324 607 }
mas01mc@292 608 }
mas01mc@292 609
mas01mc@292 610 // Defaults, for exhaustive search (!usingQueryPoint)
mas01mc@292 611 *vqp = *qp;
mas01cr@437 612 qpointers->l2norm = qpointers->l2norm_data;
mas01cr@437 613 qpointers->power = qpointers->power_data;
mas01mc@292 614
mas01mc@292 615 if(usingQueryPoint) {
mas01cr@437 616 if( !(queryPoint < nvectors && queryPoint < nvectors - sequence_length + 1) ) {
mas01cr@437 617 error("queryPoint >= nvectors-sequence_length+1 in query");
mas01mc@292 618 } else {
mas01mc@292 619 VERB_LOG(1, "query point: %u\n", queryPoint);
mas01mc@292 620 *vqp = *qp + queryPoint * dbH->dim;
mas01cr@437 621 qpointers->l2norm = qpointers->l2norm_data + queryPoint;
mas01mc@292 622 if (usingPower) {
mas01cr@437 623 qpointers->power = qpointers->power_data + queryPoint;
mas01mc@292 624 }
mas01cr@437 625 qpointers->nvectors = sequence_length;
mas01mc@292 626 }
mas01mc@292 627 }
mas01mc@292 628 }
mas01mc@292 629
mas01mc@292 630
mas01cr@239 631 // FIXME: this is not the right name; we're not actually setting up
mas01cr@239 632 // the database, but copying various bits of it out of mmap()ed tables
mas01cr@239 633 // in order to reduce seeks.
mas01cr@438 634 static int audiodb_set_up_db(adb_t *adb, adb_query_spec_t *spec, adb_qpointers_internal_t *dbpointers) {
mas01cr@438 635 uint32_t nvectors = adb->header->length / (adb->header->dim * sizeof(double));
mas01cr@435 636 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@438 637
mas01cr@437 638 bool using_power = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@437 639 bool using_times = spec->refine.flags & ADB_REFINE_DURATION_RATIO;
mas01cr@437 640 double *times_table = NULL;
mas01cr@435 641
mas01cr@239 642
mas01cr@438 643 dbpointers->nvectors = nvectors;
mas01cr@438 644 dbpointers->l2norm_data = new double[nvectors];
mas01cr@438 645
mas01cr@438 646 double *snpp = dbpointers->l2norm_data, *sppp = 0;
mas01cr@434 647 lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET);
mas01cr@438 648 read_or_goto_error(adb->fd, dbpointers->l2norm_data, nvectors * sizeof(double));
mas01cr@239 649
mas01cr@437 650 if (using_power) {
mas01cr@434 651 if (!(adb->header->flags & O2_FLAG_POWER)) {
mas01cr@434 652 goto error;
mas01cr@239 653 }
mas01cr@438 654 dbpointers->power_data = new double[nvectors];
mas01cr@438 655 sppp = dbpointers->power_data;
mas01cr@434 656 lseek(adb->fd, adb->header->powerTableOffset, SEEK_SET);
mas01cr@438 657 read_or_goto_error(adb->fd, dbpointers->power_data, nvectors * sizeof(double));
mas01cr@239 658 }
mas01cr@239 659
mas01cr@434 660 for(unsigned int i = 0; i < adb->header->numFiles; i++){
mas01cr@434 661 size_t track_length = (*adb->track_lengths)[i];
mas01cr@435 662 if(track_length >= sequence_length) {
mas01cr@435 663 audiodb_sequence_sum(snpp, track_length, sequence_length);
mas01cr@435 664 audiodb_sequence_sqrt(snpp, track_length, sequence_length);
mas01cr@437 665 if (using_power) {
mas01cr@435 666 audiodb_sequence_sum(sppp, track_length, sequence_length);
mas01cr@435 667 audiodb_sequence_average(sppp, track_length, sequence_length);
mas01cr@239 668 }
mas01cr@239 669 }
mas01cr@434 670 snpp += track_length;
mas01cr@437 671 if (using_power) {
mas01cr@434 672 sppp += track_length;
mas01cr@239 673 }
mas01cr@239 674 }
mas01cr@239 675
mas01cr@437 676 if (using_times) {
mas01cr@434 677 if(!(adb->header->flags & O2_FLAG_TIMES)) {
mas01cr@437 678 goto error;
mas01cr@239 679 }
mas01cr@239 680
mas01cr@438 681 dbpointers->mean_duration = new double[adb->header->numFiles];
mas01cr@239 682
mas01cr@438 683 times_table = (double *) malloc(2 * nvectors * sizeof(double));
mas01cr@437 684 if(!times_table) {
mas01cr@437 685 goto error;
mas01cr@437 686 }
mas01cr@437 687 lseek(adb->fd, adb->header->timesTableOffset, SEEK_SET);
mas01cr@438 688 read_or_goto_error(adb->fd, times_table, 2 * nvectors * sizeof(double));
mas01cr@434 689 for(unsigned int k = 0; k < adb->header->numFiles; k++) {
mas01cr@434 690 size_t track_length = (*adb->track_lengths)[k];
mas01cr@239 691 unsigned int j;
mas01cr@438 692 dbpointers->mean_duration[k] = 0.0;
mas01cr@434 693 for(j = 0; j < track_length; j++) {
mas01cr@438 694 dbpointers->mean_duration[k] += times_table[2*j+1] - times_table[2*j];
mas01cr@239 695 }
mas01cr@438 696 dbpointers->mean_duration[k] /= j;
mas01cr@239 697 }
mas01cr@437 698
mas01cr@437 699 free(times_table);
mas01cr@437 700 times_table = NULL;
mas01cr@239 701 }
mas01cr@239 702
mas01cr@438 703 dbpointers->l2norm = dbpointers->l2norm_data;
mas01cr@438 704 dbpointers->power = dbpointers->power_data;
mas01cr@434 705 return 0;
mas01cr@434 706
mas01cr@434 707 error:
mas01cr@438 708 if(dbpointers->l2norm_data) {
mas01cr@438 709 delete [] dbpointers->l2norm_data;
mas01cr@434 710 }
mas01cr@438 711 if(dbpointers->power_data) {
mas01cr@438 712 delete [] dbpointers->power_data;
mas01cr@434 713 }
mas01cr@438 714 if(dbpointers->mean_duration) {
mas01cr@438 715 delete [] dbpointers->mean_duration;
mas01cr@434 716 }
mas01cr@437 717 if(times_table) {
mas01cr@437 718 free(times_table);
mas01cr@437 719 }
mas01cr@434 720 return 1;
mas01cr@434 721
mas01cr@239 722 }
mas01cr@239 723
mas01mc@292 724 // query_points()
mas01mc@292 725 //
mas01mc@292 726 // using PointPairs held in the exact_evaluation_queue compute squared distance for each PointPair
mas01mc@292 727 // and insert result into the current reporter.
mas01mc@292 728 //
mas01mc@292 729 // Preconditions:
mas01mc@292 730 // A query inFile has been opened with setup_query(...) and query pointers initialized
mas01mc@292 731 // The database contains some points
mas01mc@292 732 // An exact_evaluation_queue has been allocated and populated
mas01mc@292 733 // A reporter has been allocated
mas01mc@292 734 //
mas01mc@292 735 // Postconditions:
mas01mc@292 736 // reporter contains the points and distances that meet the reporter constraints
mas01mc@292 737
mas01cr@437 738 void audioDB::query_loop_points(adb_query_spec_t *spec, double* query, adb_qpointers_internal_t *qpointers) {
mas01cr@438 739 adb_qpointers_internal_t dbpointers = {0};
mas01mc@292 740
mas01cr@436 741 uint32_t sequence_length = spec->qid.sequence_length;
mas01cr@437 742 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@436 743
mas01mc@292 744 // check pre-conditions
mas01mc@292 745 assert(exact_evaluation_queue&&reporter);
mas01mc@292 746 if(!exact_evaluation_queue->size()) // Exit if no points to evaluate
mas01mc@292 747 return;
mas01mc@292 748
mas01mc@292 749 // Compute database info
mas01mc@292 750 // FIXME: we more than likely don't need very much of the database
mas01mc@292 751 // so make a new method to build these values per-track or, even better, per-point
mas01mc@324 752 if( !( dbH->flags & O2_FLAG_LARGE_ADB) )
mas01cr@438 753 if(audiodb_set_up_db(adb, spec, &dbpointers)) {
mas01cr@434 754 error("failed to set up db");
mas01cr@434 755 }
mas01mc@292 756
mas01mc@292 757 VERB_LOG(1, "matching points...");
mas01mc@292 758
mas01mc@292 759 // We are guaranteed that the order of points is sorted by:
mas01mc@324 760 // trackID, spos, qpos
mas01mc@292 761 // so we can be relatively efficient in initialization of track data.
mas01mc@292 762 // Here we assume that points don't overlap, so we will use exhaustive dot
mas01mc@324 763 // product evaluation instead of memoization of partial sums which is used
mas01mc@324 764 // for exhaustive brute-force evaluation from smaller databases: e.g. query_loop()
mas01mc@292 765 double dist;
mas01mc@292 766 size_t data_buffer_size = 0;
mas01mc@292 767 double *data_buffer = 0;
mas01mc@324 768 Uns32T trackOffset = 0;
mas01mc@324 769 Uns32T trackIndexOffset = 0;
mas01mc@292 770 Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range
mas01mc@292 771 Uns32T npairs = exact_evaluation_queue->size();
mas01mc@292 772 while(npairs--){
mas01mc@292 773 PointPair pp = exact_evaluation_queue->top();
mas01mc@324 774 // Large ADB track data must be loaded here for sPower
mas01mc@324 775 if(dbH->flags & O2_FLAG_LARGE_ADB){
mas01mc@324 776 trackOffset=0;
mas01mc@324 777 trackIndexOffset=0;
mas01mc@292 778 if(currentTrack!=pp.trackID){
mas01mc@324 779 char* prefixedString = new char[O2_MAXFILESTR];
mas01mc@324 780 char* tmpStr = prefixedString;
mas01mc@324 781 // On currentTrack change, allocate and load track data
mas01mc@292 782 currentTrack=pp.trackID;
mas01cr@438 783 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
mas01cr@438 784 SAFE_DELETE_ARRAY(dbpointers.power_data);
mas01mc@324 785 if(infid>0)
mas01mc@324 786 close(infid);
mas01mc@324 787 // Open and check dimensions of feature file
mas01mc@324 788 strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
mas01mc@324 789 prefix_name((char ** const) &prefixedString, adb_feature_root);
mas01mc@324 790 if (prefixedString!=tmpStr)
mas01mc@324 791 delete[] tmpStr;
mas01mc@324 792 initInputFile(prefixedString, false); // nommap, file pointer at correct position
mas01mc@324 793 // Load the feature vector data for current track into data_buffer
mas01cr@433 794 if(audiodb_read_data(adb, infid, pp.trackID, &data_buffer, &data_buffer_size))
mas01cr@433 795 error("failed to read data");
mas01mc@324 796 // Load power and calculate power and l2norm sequence sums
mas01cr@438 797 init_track_aux_data(pp.trackID, data_buffer, &dbpointers.l2norm_data, &dbpointers.l2norm, &dbpointers.power_data, &dbpointers.power);
mas01mc@292 798 }
mas01mc@324 799 }
mas01mc@324 800 else{
mas01mc@324 801 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables
mas01mc@324 802 trackOffset=trackOffsetTable[pp.trackID]; // num data elements offset
mas01mc@324 803 trackIndexOffset=trackOffset/dbH->dim; // num vectors offset
mas01mc@324 804 }
mas01mc@324 805 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point
mas01mc@324 806 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table
mas01mc@324 807 // Test power thresholds before computing distance
mas01cr@438 808 if( ( (!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers->power[qPos], dbpointers.power[sPos])) &&
mas01cr@437 809 ( qPos<qpointers->nvectors-sequence_length+1 && pp.spos<trackTable[pp.trackID]-sequence_length+1 ) ){
mas01mc@324 810 // Non-large ADB track data is loaded inside power test for efficiency
mas01mc@324 811 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){
mas01mc@324 812 // On currentTrack change, allocate and load track data
mas01mc@324 813 currentTrack=pp.trackID;
mas01mc@324 814 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01cr@433 815 if(audiodb_read_data(adb, dbfid, currentTrack, &data_buffer, &data_buffer_size))
mas01cr@433 816 error("failed to read data");
mas01mc@324 817 }
mas01mc@324 818 // Compute distance
mas01cr@436 819 dist = audiodb_dot_product(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequence_length);
mas01cr@437 820 double qn = qpointers->l2norm[qPos];
mas01cr@438 821 double sn = dbpointers.l2norm[sPos];
mas01cr@435 822 switch(spec->params.distance) {
mas01cr@431 823 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01mc@324 824 dist = 2 - (2/(qn*sn))*dist;
mas01cr@431 825 break;
mas01cr@431 826 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@431 827 dist = qn*qn + sn*sn - 2*dist;
mas01cr@431 828 break;
mas01cr@431 829 }
mas01cr@424 830 if((!radius) || dist <= (O2_LSH_EXACT_MULT*radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@424 831 adb_result_t r;
mas01cr@424 832 r.key = fileTable + pp.trackID * O2_FILETABLE_ENTRY_SIZE;
mas01cr@424 833 r.dist = dist;
mas01cr@424 834 r.qpos = pp.qpos;
mas01cr@424 835 r.ipos = pp.spos;
mas01cr@424 836 accumulator->add_point(&r);
mas01cr@424 837 }
mas01mc@292 838 }
mas01mc@292 839 exact_evaluation_queue->pop();
mas01mc@292 840 }
mas01mc@315 841 // Cleanup
mas01cr@438 842 SAFE_DELETE_ARRAY(dbpointers.l2norm_data);
mas01cr@438 843 SAFE_DELETE_ARRAY(dbpointers.power_data);
mas01cr@438 844 SAFE_DELETE_ARRAY(dbpointers.mean_duration);
mas01mc@292 845 }
mas01mc@292 846
mas01cr@435 847 void audioDB::query_loop(adb_query_spec_t *spec, Uns32T queryIndex) {
mas01cr@239 848
mas01cr@239 849 double *query, *query_data;
mas01cr@438 850 adb_qpointers_internal_t qpointers = {0}, dbpointers = {0};
mas01cr@437 851
mas01cr@437 852 bool power_refine = spec->refine.flags & (ADB_REFINE_ABSOLUTE_THRESHOLD|ADB_REFINE_RELATIVE_THRESHOLD);
mas01cr@239 853
mas01mc@324 854 if( dbH->flags & O2_FLAG_LARGE_ADB )
mas01mc@324 855 error("error: LARGE_ADB requires indexed query");
mas01mc@324 856
mas01mc@292 857 if(query_from_key)
mas01cr@437 858 set_up_query_from_key(spec, &query_data, &query, &qpointers, queryIndex);
mas01mc@292 859 else
mas01cr@437 860 set_up_query(spec, &query_data, &query, &qpointers);
mas01cr@239 861
mas01cr@438 862 if(audiodb_set_up_db(adb, spec, &dbpointers)) {
mas01cr@434 863 error("failed to set up db");
mas01cr@434 864 }
mas01cr@239 865
mas01cr@239 866 VERB_LOG(1, "matching tracks...");
mas01cr@239 867
mas01cr@435 868 unsigned j,k,track,trackOffset=0, HOP_SIZE=sequenceHop;
mas01cr@435 869 unsigned wL = spec->qid.sequence_length;
mas01cr@239 870 double **D = 0; // Differences query and target
mas01cr@239 871 double **DD = 0; // Matched filter distance
mas01cr@239 872
mas01cr@437 873 D = new double*[qpointers.nvectors]; // pre-allocate
mas01cr@437 874 DD = new double*[qpointers.nvectors];
mas01cr@239 875
mas01cr@239 876 gettimeofday(&tv1, NULL);
mas01cr@239 877 unsigned processedTracks = 0;
mas01cr@239 878 off_t trackIndexOffset;
mas01cr@239 879 char nextKey[MAXSTR];
mas01cr@239 880
mas01cr@239 881 // Track loop
mas01cr@239 882 size_t data_buffer_size = 0;
mas01cr@239 883 double *data_buffer = 0;
mas01cr@239 884 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01cr@239 885
mas01cr@239 886 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) {
mas01cr@239 887
mas01cr@239 888 trackOffset = trackOffsetTable[track]; // numDoubles offset
mas01cr@239 889
mas01cr@239 890 // get trackID from file if using a control file
mas01cr@239 891 if(trackFile) {
mas01cr@239 892 trackFile->getline(nextKey,MAXSTR);
mas01cr@239 893 if(!trackFile->eof()) {
mas01cr@430 894 track = audiodb_key_index(adb, nextKey);
mas01cr@430 895 if(track == (uint32_t) -1) {
mas01cr@430 896 error("key not found", nextKey);
mas01cr@430 897 }
mas01cr@239 898 trackOffset = trackOffsetTable[track];
mas01cr@239 899 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01cr@239 900 } else {
mas01cr@239 901 break;
mas01cr@239 902 }
mas01cr@239 903 }
mas01cr@239 904
mas01mc@292 905 // skip identity on query_from_key
mas01mc@292 906 if( query_from_key && (track == queryIndex) ) {
mas01mc@292 907 if(queryIndex!=dbH->numFiles-1){
mas01mc@292 908 track++;
mas01mc@292 909 trackOffset = trackOffsetTable[track];
mas01mc@292 910 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01mc@292 911 }
mas01mc@292 912 else{
mas01mc@292 913 break;
mas01mc@292 914 }
mas01mc@292 915 }
mas01mc@292 916
mas01cr@437 917 trackIndexOffset=trackOffset/dbH->dim; // qpointers.nvectors offset
mas01cr@239 918
mas01cr@433 919 if(audiodb_read_data(adb, dbfid, track, &data_buffer, &data_buffer_size))
mas01cr@433 920 error("failed to read data");
mas01cr@435 921 if(wL <= trackTable[track]) { // test for short sequences
mas01cr@239 922
mas01cr@239 923 VERB_LOG(7,"%u.%jd.%u | ", track, (intmax_t) trackIndexOffset, trackTable[track]);
mas01cr@239 924
mas01cr@439 925 audiodb_initialize_arrays(adb, spec, track, qpointers.nvectors, query, data_buffer, D, DD);
mas01cr@239 926
mas01cr@435 927 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) {
mas01cr@438 928 VERB_LOG(3,"meanQdur=%f meanDBdur=%f\n", qpointers.mean_duration[0], dbpointers.mean_duration[track]);
mas01cr@239 929 }
mas01cr@239 930
mas01cr@437 931 if((!(spec->refine.flags & ADB_REFINE_DURATION_RATIO)) ||
mas01cr@438 932 fabs(dbpointers.mean_duration[track]-qpointers.mean_duration[0]) < qpointers.mean_duration[0]*spec->refine.duration_ratio) {
mas01cr@435 933 if(spec->refine.flags & ADB_REFINE_DURATION_RATIO) {
mas01cr@239 934 VERB_LOG(3,"within duration tolerance.\n");
mas01cr@239 935 }
mas01cr@239 936
mas01cr@239 937 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@437 938 for(j = 0; j <= qpointers.nvectors - wL; j += HOP_SIZE) {
mas01cr@239 939 for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) {
mas01cr@431 940 double thisDist = 0;
mas01cr@438 941 double qn = qpointers.l2norm[j];
mas01cr@438 942 double sn = dbpointers.l2norm[trackIndexOffset + k];
mas01cr@435 943 switch(spec->params.distance) {
mas01cr@431 944 case ADB_DISTANCE_EUCLIDEAN_NORMED:
mas01cr@438 945 thisDist = 2-(2/(qn*sn))*DD[j][k];
mas01cr@431 946 break;
mas01cr@431 947 case ADB_DISTANCE_EUCLIDEAN:
mas01cr@438 948 thisDist = qn*qn + sn*sn - 2*DD[j][k];
mas01cr@431 949 break;
mas01cr@431 950 case ADB_DISTANCE_DOT_PRODUCT:
mas01cr@431 951 thisDist = DD[j][k];
mas01cr@431 952 break;
mas01cr@431 953 }
mas01cr@239 954 // Power test
mas01cr@438 955 if ((!power_refine) || audiodb_powers_acceptable(&spec->refine, qpointers.power[j], dbpointers.power[trackIndexOffset + k])) {
mas01cr@239 956 // radius test
mas01cr@435 957 if((!(spec->refine.flags & ADB_REFINE_RADIUS)) ||
mas01cr@435 958 thisDist <= (spec->refine.radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@423 959 adb_result_t r;
mas01cr@423 960 r.key = fileTable + track * O2_FILETABLE_ENTRY_SIZE;
mas01cr@423 961 r.dist = thisDist;
mas01cr@423 962 r.qpos = usingQueryPoint ? queryPoint : j;
mas01cr@423 963 r.ipos = k;
mas01cr@423 964 accumulator->add_point(&r);
mas01cr@239 965 }
mas01cr@239 966 }
mas01cr@239 967 }
mas01cr@239 968 }
mas01cr@239 969 } // Duration match
mas01cr@437 970 audiodb_delete_arrays(track, qpointers.nvectors, D, DD);
mas01cr@239 971 }
mas01cr@239 972 }
mas01cr@239 973
mas01cr@239 974 free(data_buffer);
mas01cr@239 975
mas01cr@239 976 gettimeofday(&tv2,NULL);
mas01cr@239 977 VERB_LOG(1,"elapsed time: %ld msec\n",
mas01cr@239 978 (tv2.tv_sec*1000 + tv2.tv_usec/1000) -
mas01cr@239 979 (tv1.tv_sec*1000 + tv1.tv_usec/1000))
mas01cr@239 980
mas01cr@239 981 // Clean up
mas01cr@239 982 if(query_data)
mas01cr@239 983 delete[] query_data;
mas01cr@437 984 if(qpointers.l2norm_data)
mas01cr@437 985 delete[] qpointers.l2norm_data;
mas01cr@437 986 if(qpointers.power_data)
mas01cr@437 987 delete[] qpointers.power_data;
mas01cr@437 988 if(qpointers.mean_duration)
mas01cr@437 989 delete[] qpointers.mean_duration;
mas01cr@438 990 if(dbpointers.power_data)
mas01cr@438 991 delete[] dbpointers.power_data;
mas01cr@438 992 if(dbpointers.l2norm_data)
mas01cr@438 993 delete[] dbpointers.l2norm_data;
mas01cr@239 994 if(D)
mas01cr@239 995 delete[] D;
mas01cr@239 996 if(DD)
mas01cr@239 997 delete[] DD;
mas01cr@438 998 if(dbpointers.mean_duration)
mas01cr@438 999 delete[] dbpointers.mean_duration;
mas01cr@239 1000 }