annotate query.cpp @ 314:b671a46873c2

working SIIGRAPH08 version. Fixed powerTable mmap memory leak in WS calls (only showed up in big databases). Implements radius queries over WS with new wsdl file
author mas01mc
date Tue, 12 Aug 2008 01:21:44 +0000
parents 896679d8cc39
children d2c56d4f841e
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@239 2 #include "reporter.h"
mas01cr@239 3
mas01cr@239 4 bool audioDB::powers_acceptable(double p1, double p2) {
mas01cr@239 5 if (use_absolute_threshold) {
mas01cr@239 6 if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) {
mas01cr@239 7 return false;
mas01cr@239 8 }
mas01cr@239 9 }
mas01cr@239 10 if (use_relative_threshold) {
mas01cr@239 11 if (fabs(p1-p2) > fabs(relative_threshold)) {
mas01cr@239 12 return false;
mas01cr@239 13 }
mas01cr@239 14 }
mas01cr@239 15 return true;
mas01cr@239 16 }
mas01cr@239 17
mas01cr@239 18 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
mas01mc@292 19 // init database tables and dbH first
mas01mc@292 20 if(query_from_key)
mas01mc@292 21 initTables(dbName);
mas01mc@292 22 else
mas01mc@292 23 initTables(dbName, inFile);
mas01mc@292 24
mas01mc@292 25 // keyKeyPos requires dbH to be initialized
mas01mc@292 26 if(query_from_key && (!key || (query_from_key_index = getKeyPos((char*)key))==O2_ERR_KEYNOTFOUND))
mas01mc@292 27 error("Query key not found :",key);
mas01mc@292 28
mas01cr@239 29 switch (queryType) {
mas01cr@239 30 case O2_POINT_QUERY:
mas01cr@239 31 sequenceLength = 1;
mas01cr@239 32 normalizedDistance = false;
mas01mc@292 33 reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN);
mas01cr@239 34 break;
mas01cr@239 35 case O2_TRACK_QUERY:
mas01cr@239 36 sequenceLength = 1;
mas01cr@239 37 normalizedDistance = false;
mas01mc@292 38 reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@239 39 break;
mas01mc@292 40 case O2_SEQUENCE_QUERY:
mas01mc@292 41 if(no_unit_norming)
mas01mc@292 42 normalizedDistance = false;
mas01cr@239 43 if(radius == 0) {
mas01mc@292 44 reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@239 45 } else {
mas01mc@292 46 if(index_exists(dbName, radius, sequenceLength)){
mas01mc@292 47 char* indexName = index_get_name(dbName, radius, sequenceLength);
mas01mc@308 48 lsh = index_allocate(indexName, false);
mas01mc@292 49 reporter = new trackSequenceQueryRadReporter(trackNN, index_to_trackID(lsh->get_maxp())+1);
mas01mc@292 50 delete[] indexName;
mas01mc@292 51 }
mas01mc@292 52 else
mas01mc@292 53 reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles);
mas01cr@239 54 }
mas01cr@239 55 break;
mas01mc@292 56 case O2_N_SEQUENCE_QUERY:
mas01mc@292 57 if(no_unit_norming)
mas01mc@292 58 normalizedDistance = false;
mas01mc@248 59 if(radius == 0) {
mas01mc@292 60 reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01mc@248 61 } else {
mas01mc@292 62 if(index_exists(dbName, radius, sequenceLength)){
mas01mc@292 63 char* indexName = index_get_name(dbName, radius, sequenceLength);
mas01mc@308 64 lsh = index_allocate(indexName, false);
mas01mc@292 65 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, index_to_trackID(lsh->get_maxp())+1);
mas01mc@292 66 delete[] indexName;
mas01mc@292 67 }
mas01mc@292 68 else
mas01mc@292 69 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles);
mas01mc@248 70 }
mas01mc@248 71 break;
mas01mc@263 72 case O2_ONE_TO_ONE_N_SEQUENCE_QUERY :
mas01mc@263 73 if(radius == 0) {
mas01mc@263 74 error("query-type not yet supported");
mas01mc@263 75 } else {
mas01mc@292 76 reporter = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, dbH->numFiles);
mas01mc@263 77 }
mas01mc@263 78 break;
mas01cr@239 79 default:
mas01cr@239 80 error("unrecognized queryType in query()");
mas01cr@239 81 }
mas01mc@292 82
mas01mc@292 83 // Test for index (again) here
mas01mc@292 84 if(radius && index_exists(dbName, radius, sequenceLength))
mas01mc@292 85 index_query_loop(dbName, query_from_key_index);
mas01mc@292 86 else
mas01mc@292 87 query_loop(dbName, query_from_key_index);
mas01mc@292 88
mas01mc@292 89 reporter->report(fileTable, adbQueryResponse);
mas01cr@239 90 }
mas01cr@239 91
mas01cr@239 92 // return ordinal position of key in keyTable
mas01mc@292 93 // this should really be a STL hash map search
mas01cr@239 94 unsigned audioDB::getKeyPos(char* key){
mas01mc@292 95 if(!dbH)
mas01mc@292 96 error("dbH not initialized","getKeyPos");
mas01cr@239 97 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@256 98 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key))==0)
mas01cr@239 99 return k;
mas01cr@239 100 error("Key not found",key);
mas01cr@239 101 return O2_ERR_KEYNOTFOUND;
mas01cr@239 102 }
mas01cr@239 103
mas01cr@239 104 // This is a common pattern in sequence queries: what we are doing is
mas01cr@239 105 // taking a window of length seqlen over a buffer of length length,
mas01cr@239 106 // and placing the sum of the elements in that window in the first
mas01cr@239 107 // element of the window: thus replacing all but the last seqlen
mas01cr@239 108 // elements in the buffer with the corresponding windowed sum.
mas01cr@239 109 void audioDB::sequence_sum(double *buffer, int length, int seqlen) {
mas01cr@239 110 double tmp1, tmp2, *ps;
mas01cr@239 111 int j, w;
mas01cr@239 112
mas01cr@239 113 tmp1 = *buffer;
mas01cr@239 114 j = 1;
mas01cr@239 115 w = seqlen - 1;
mas01cr@239 116 while(w--) {
mas01cr@239 117 *buffer += buffer[j++];
mas01cr@239 118 }
mas01cr@239 119 ps = buffer + 1;
mas01cr@239 120 w = length - seqlen; // +1 - 1
mas01cr@239 121 while(w--) {
mas01cr@239 122 tmp2 = *ps;
mas01cr@239 123 if(isfinite(tmp1)) {
mas01cr@239 124 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1);
mas01cr@239 125 } else {
mas01cr@239 126 for(int i = 1; i < seqlen; i++) {
mas01cr@239 127 *ps += *(ps + i);
mas01cr@239 128 }
mas01cr@239 129 }
mas01cr@239 130 tmp1 = tmp2;
mas01cr@239 131 ps++;
mas01cr@239 132 }
mas01cr@239 133 }
mas01cr@239 134
mas01cr@239 135 // In contrast to sequence_sum() above, sequence_sqrt() and
mas01cr@239 136 // sequence_average() below are simple mappers across the sequence.
mas01cr@239 137 void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) {
mas01cr@239 138 int w = length - seqlen + 1;
mas01cr@239 139 while(w--) {
mas01cr@239 140 *buffer = sqrt(*buffer);
mas01cr@239 141 buffer++;
mas01cr@239 142 }
mas01cr@239 143 }
mas01cr@239 144
mas01cr@239 145 void audioDB::sequence_average(double *buffer, int length, int seqlen) {
mas01cr@239 146 int w = length - seqlen + 1;
mas01cr@239 147 while(w--) {
mas01cr@239 148 *buffer /= seqlen;
mas01cr@239 149 buffer++;
mas01cr@239 150 }
mas01cr@239 151 }
mas01cr@239 152
mas01cr@239 153 void audioDB::initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
mas01cr@239 154 unsigned int j, k, l, w;
mas01cr@239 155 double *dp, *qp, *sp;
mas01cr@239 156
mas01cr@239 157 const unsigned HOP_SIZE = sequenceHop;
mas01cr@239 158 const unsigned wL = sequenceLength;
mas01cr@239 159
mas01cr@239 160 for(j = 0; j < numVectors; j++) {
mas01cr@239 161 // Sum products matrix
mas01cr@239 162 D[j] = new double[trackTable[track]];
mas01cr@239 163 assert(D[j]);
mas01cr@239 164 // Matched filter matrix
mas01cr@239 165 DD[j]=new double[trackTable[track]];
mas01cr@239 166 assert(DD[j]);
mas01cr@239 167 }
mas01cr@239 168
mas01cr@239 169 // Dot product
mas01cr@239 170 for(j = 0; j < numVectors; j++)
mas01cr@239 171 for(k = 0; k < trackTable[track]; k++){
mas01cr@239 172 qp = query + j * dbH->dim;
mas01cr@239 173 sp = data_buffer + k * dbH->dim;
mas01cr@239 174 DD[j][k] = 0.0; // Initialize matched filter array
mas01cr@239 175 dp = &D[j][k]; // point to correlation cell j,k
mas01cr@239 176 *dp = 0.0; // initialize correlation cell
mas01cr@239 177 l = dbH->dim; // size of vectors
mas01cr@239 178 while(l--)
mas01cr@239 179 *dp += *qp++ * *sp++;
mas01cr@239 180 }
mas01cr@239 181
mas01cr@239 182 // Matched Filter
mas01cr@239 183 // HOP SIZE == 1
mas01cr@239 184 double* spd;
mas01cr@239 185 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop
mas01cr@239 186 for(w = 0; w < wL; w++) {
mas01cr@239 187 for(j = 0; j < numVectors - w; j++) {
mas01cr@239 188 sp = DD[j];
mas01cr@239 189 spd = D[j+w] + w;
mas01cr@239 190 k = trackTable[track] - w;
mas01mc@292 191 while(k--)
mas01mc@292 192 *sp++ += *spd++;
mas01cr@239 193 }
mas01cr@239 194 }
mas01cr@239 195 } else { // HOP_SIZE != 1
mas01cr@239 196 for(w = 0; w < wL; w++) {
mas01cr@239 197 for(j = 0; j < numVectors - w; j += HOP_SIZE) {
mas01cr@239 198 sp = DD[j];
mas01cr@239 199 spd = D[j+w]+w;
mas01cr@239 200 for(k = 0; k < trackTable[track] - w; k += HOP_SIZE) {
mas01cr@239 201 *sp += *spd;
mas01cr@239 202 sp += HOP_SIZE;
mas01cr@239 203 spd += HOP_SIZE;
mas01cr@239 204 }
mas01cr@239 205 }
mas01cr@239 206 }
mas01cr@239 207 }
mas01cr@239 208 }
mas01cr@239 209
mas01cr@239 210 void audioDB::delete_arrays(int track, unsigned int numVectors, double **D, double **DD) {
mas01cr@239 211 if(D != NULL) {
mas01cr@239 212 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 213 delete[] D[j];
mas01cr@239 214 }
mas01cr@239 215 }
mas01cr@239 216 if(DD != NULL) {
mas01cr@239 217 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 218 delete[] DD[j];
mas01cr@239 219 }
mas01cr@239 220 }
mas01cr@239 221 }
mas01cr@239 222
mas01cr@239 223 void audioDB::read_data(int track, double **data_buffer_p, size_t *data_buffer_size_p) {
mas01cr@239 224 if (trackTable[track] * sizeof(double) * dbH->dim > *data_buffer_size_p) {
mas01cr@239 225 if(*data_buffer_p) {
mas01cr@239 226 free(*data_buffer_p);
mas01cr@239 227 }
mas01cr@239 228 {
mas01cr@239 229 *data_buffer_size_p = trackTable[track] * sizeof(double) * dbH->dim;
mas01cr@239 230 void *tmp = malloc(*data_buffer_size_p);
mas01cr@239 231 if (tmp == NULL) {
mas01cr@239 232 error("error allocating data buffer");
mas01cr@239 233 }
mas01cr@239 234 *data_buffer_p = (double *) tmp;
mas01cr@239 235 }
mas01cr@239 236 }
mas01cr@239 237
mas01cr@239 238 read(dbfid, *data_buffer_p, trackTable[track] * sizeof(double) * dbH->dim);
mas01cr@239 239 }
mas01cr@239 240
mas01cr@239 241 // These names deserve some unpicking. The names starting with a "q"
mas01cr@239 242 // are pointers to the query, norm and power vectors; the names
mas01cr@239 243 // starting with "v" are things that will end up pointing to the
mas01cr@239 244 // actual query point's information. -- CSR, 2007-12-05
mas01cr@239 245 void audioDB::set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp) {
mas01cr@239 246 *nvp = (statbuf.st_size - sizeof(int)) / (dbH->dim * sizeof(double));
mas01mc@292 247
mas01cr@239 248 if(!(dbH->flags & O2_FLAG_L2NORM)) {
mas01cr@239 249 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01cr@239 250 }
mas01cr@239 251
mas01cr@239 252 if(*nvp < sequenceLength) {
mas01cr@239 253 error("Query shorter than requested sequence length", "maybe use -l");
mas01cr@239 254 }
mas01cr@239 255
mas01cr@239 256 VERB_LOG(1, "performing norms... ");
mas01cr@239 257
mas01cr@239 258 *qp = new double[*nvp * dbH->dim];
mas01cr@239 259 memcpy(*qp, indata+sizeof(int), *nvp * dbH->dim * sizeof(double));
mas01cr@239 260 *qnp = new double[*nvp];
mas01cr@239 261 unitNorm(*qp, dbH->dim, *nvp, *qnp);
mas01cr@239 262
mas01cr@239 263 sequence_sum(*qnp, *nvp, sequenceLength);
mas01cr@239 264 sequence_sqrt(*qnp, *nvp, sequenceLength);
mas01cr@239 265
mas01cr@239 266 if (usingPower) {
mas01cr@239 267 *qpp = new double[*nvp];
mas01cr@239 268 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
mas01cr@239 269 error("error seeking to data", powerFileName, "lseek");
mas01cr@239 270 }
mas01cr@239 271 int count = read(powerfd, *qpp, *nvp * sizeof(double));
mas01cr@239 272 if (count == -1) {
mas01cr@239 273 error("error reading data", powerFileName, "read");
mas01cr@239 274 }
mas01cr@239 275 if ((unsigned) count != *nvp * sizeof(double)) {
mas01cr@239 276 error("short read", powerFileName);
mas01cr@239 277 }
mas01cr@239 278
mas01cr@239 279 sequence_sum(*qpp, *nvp, sequenceLength);
mas01cr@239 280 sequence_average(*qpp, *nvp, sequenceLength);
mas01cr@239 281 }
mas01cr@239 282
mas01cr@239 283 if (usingTimes) {
mas01cr@239 284 unsigned int k;
mas01cr@239 285 *mqdp = 0.0;
mas01cr@239 286 double *querydurs = new double[*nvp];
mas01cr@239 287 double *timesdata = new double[*nvp*2];
mas01cr@239 288 insertTimeStamps(*nvp, timesFile, timesdata);
mas01cr@239 289 for(k = 0; k < *nvp; k++) {
mas01cr@239 290 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01cr@239 291 *mqdp += querydurs[k];
mas01cr@239 292 }
mas01cr@239 293 *mqdp /= k;
mas01cr@239 294
mas01cr@239 295 VERB_LOG(1, "mean query file duration: %f\n", *mqdp);
mas01cr@239 296
mas01cr@239 297 delete [] querydurs;
mas01cr@239 298 delete [] timesdata;
mas01cr@239 299 }
mas01cr@239 300
mas01cr@239 301 // Defaults, for exhaustive search (!usingQueryPoint)
mas01cr@239 302 *vqp = *qp;
mas01cr@239 303 *vqnp = *qnp;
mas01cr@239 304 *vqpp = *qpp;
mas01cr@239 305
mas01cr@239 306 if(usingQueryPoint) {
mas01cr@239 307 if(queryPoint > *nvp || queryPoint > *nvp - sequenceLength + 1) {
mas01cr@239 308 error("queryPoint > numVectors-wL+1 in query");
mas01cr@239 309 } else {
mas01cr@239 310 VERB_LOG(1, "query point: %u\n", queryPoint);
mas01cr@239 311 *vqp = *qp + queryPoint * dbH->dim;
mas01cr@239 312 *vqnp = *qnp + queryPoint;
mas01cr@239 313 if (usingPower) {
mas01cr@239 314 *vqpp = *qpp + queryPoint;
mas01cr@239 315 }
mas01cr@239 316 *nvp = sequenceLength;
mas01cr@239 317 }
mas01cr@239 318 }
mas01cr@239 319 }
mas01cr@239 320
mas01mc@292 321 // Does the same as set_up_query(...) but from database features instead of from a file
mas01mc@292 322 // Constructs the same outputs as set_up_query
mas01mc@292 323 void audioDB::set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex) {
mas01mc@292 324 if(!trackTable)
mas01mc@292 325 error("trackTable not initialized","set_up_query_from_key");
mas01mc@292 326
mas01mc@292 327 if(!(dbH->flags & O2_FLAG_L2NORM)) {
mas01mc@292 328 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01mc@292 329 }
mas01mc@292 330
mas01mc@292 331 if(dbH->flags & O2_FLAG_POWER)
mas01mc@292 332 usingPower = true;
mas01mc@292 333
mas01mc@292 334 if(dbH->flags & O2_FLAG_TIMES)
mas01mc@292 335 usingTimes = true;
mas01mc@292 336
mas01mc@292 337 *nvp = trackTable[queryIndex];
mas01mc@292 338 if(*nvp < sequenceLength) {
mas01mc@292 339 error("Query shorter than requested sequence length", "maybe use -l");
mas01mc@292 340 }
mas01mc@292 341
mas01mc@292 342 VERB_LOG(1, "performing norms... ");
mas01mc@292 343
mas01mc@292 344 // Read query feature vectors from database
mas01mc@292 345 *qp = NULL;
mas01mc@292 346 lseek(dbfid, dbH->dataOffset + trackOffsetTable[queryIndex] * sizeof(double), SEEK_SET);
mas01mc@292 347 size_t allocatedSize = 0;
mas01mc@292 348 read_data(queryIndex, qp, &allocatedSize);
mas01mc@292 349 // Consistency check on allocated memory and query feature size
mas01mc@292 350 if(*nvp*sizeof(double)*dbH->dim != allocatedSize)
mas01mc@292 351 error("Query memory allocation failed consitency check","set_up_query_from_key");
mas01mc@292 352
mas01mc@292 353 Uns32T trackIndexOffset = trackOffsetTable[queryIndex]/dbH->dim; // Convert num data elements to num vectors
mas01mc@292 354 // Copy L2 norm partial-sum coefficients
mas01mc@292 355 assert(*qnp = new double[*nvp]);
mas01mc@292 356 memcpy(*qnp, l2normTable+trackIndexOffset, *nvp*sizeof(double));
mas01mc@292 357 sequence_sum(*qnp, *nvp, sequenceLength);
mas01mc@292 358 sequence_sqrt(*qnp, *nvp, sequenceLength);
mas01mc@292 359
mas01mc@292 360 if( usingPower ){
mas01mc@292 361 // Copy Power partial-sum coefficients
mas01mc@292 362 assert(*qpp = new double[*nvp]);
mas01mc@292 363 memcpy(*qpp, powerTable+trackIndexOffset, *nvp*sizeof(double));
mas01mc@292 364 sequence_sum(*qpp, *nvp, sequenceLength);
mas01mc@292 365 sequence_average(*qpp, *nvp, sequenceLength);
mas01mc@292 366 }
mas01mc@292 367
mas01mc@292 368 if (usingTimes) {
mas01mc@292 369 unsigned int k;
mas01mc@292 370 *mqdp = 0.0;
mas01mc@292 371 double *querydurs = new double[*nvp];
mas01mc@292 372 double *timesdata = new double[*nvp*2];
mas01mc@292 373 assert(querydurs && timesdata);
mas01mc@292 374 memcpy(timesdata, timesTable+trackIndexOffset, *nvp*sizeof(double));
mas01mc@292 375 for(k = 0; k < *nvp; k++) {
mas01mc@292 376 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01mc@292 377 *mqdp += querydurs[k];
mas01mc@292 378 }
mas01mc@292 379 *mqdp /= k;
mas01mc@292 380
mas01mc@292 381 VERB_LOG(1, "mean query file duration: %f\n", *mqdp);
mas01mc@292 382
mas01mc@292 383 delete [] querydurs;
mas01mc@292 384 delete [] timesdata;
mas01mc@292 385 }
mas01mc@292 386 // Defaults, for exhaustive search (!usingQueryPoint)
mas01mc@292 387 *vqp = *qp;
mas01mc@292 388 *vqnp = *qnp;
mas01mc@292 389 *vqpp = *qpp;
mas01mc@292 390
mas01mc@292 391 if(usingQueryPoint) {
mas01mc@292 392 if(queryPoint > *nvp || queryPoint > *nvp - sequenceLength + 1) {
mas01mc@292 393 error("queryPoint > numVectors-wL+1 in query");
mas01mc@292 394 } else {
mas01mc@292 395 VERB_LOG(1, "query point: %u\n", queryPoint);
mas01mc@292 396 *vqp = *qp + queryPoint * dbH->dim;
mas01mc@292 397 *vqnp = *qnp + queryPoint;
mas01mc@292 398 if (usingPower) {
mas01mc@292 399 *vqpp = *qpp + queryPoint;
mas01mc@292 400 }
mas01mc@292 401 *nvp = sequenceLength;
mas01mc@292 402 }
mas01mc@292 403 }
mas01mc@292 404 }
mas01mc@292 405
mas01mc@292 406
mas01cr@239 407 // FIXME: this is not the right name; we're not actually setting up
mas01cr@239 408 // the database, but copying various bits of it out of mmap()ed tables
mas01cr@239 409 // in order to reduce seeks.
mas01cr@239 410 void audioDB::set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp) {
mas01cr@239 411 *dvp = dbH->length / (dbH->dim * sizeof(double));
mas01cr@239 412 *snp = new double[*dvp];
mas01cr@239 413
mas01cr@239 414 double *snpp = *snp, *sppp = 0;
mas01cr@239 415 memcpy(*snp, l2normTable, *dvp * sizeof(double));
mas01cr@239 416
mas01cr@239 417 if (usingPower) {
mas01cr@239 418 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@239 419 error("database not power-enabled", dbName);
mas01cr@239 420 }
mas01cr@239 421 *spp = new double[*dvp];
mas01cr@239 422 sppp = *spp;
mas01cr@239 423 memcpy(*spp, powerTable, *dvp * sizeof(double));
mas01cr@239 424 }
mas01cr@239 425
mas01cr@239 426 for(unsigned int i = 0; i < dbH->numFiles; i++){
mas01cr@239 427 if(trackTable[i] >= sequenceLength) {
mas01cr@239 428 sequence_sum(snpp, trackTable[i], sequenceLength);
mas01cr@239 429 sequence_sqrt(snpp, trackTable[i], sequenceLength);
mas01cr@239 430
mas01cr@239 431 if (usingPower) {
mas01cr@239 432 sequence_sum(sppp, trackTable[i], sequenceLength);
mas01cr@239 433 sequence_average(sppp, trackTable[i], sequenceLength);
mas01cr@239 434 }
mas01cr@239 435 }
mas01cr@239 436 snpp += trackTable[i];
mas01cr@239 437 if (usingPower) {
mas01cr@239 438 sppp += trackTable[i];
mas01cr@239 439 }
mas01cr@239 440 }
mas01cr@239 441
mas01cr@239 442 if (usingTimes) {
mas01cr@239 443 if(!(dbH->flags & O2_FLAG_TIMES)) {
mas01cr@239 444 error("query timestamps provided for non-timed database", dbName);
mas01cr@239 445 }
mas01cr@239 446
mas01cr@239 447 *mddp = new double[dbH->numFiles];
mas01cr@239 448
mas01cr@239 449 for(unsigned int k = 0; k < dbH->numFiles; k++) {
mas01cr@239 450 unsigned int j;
mas01cr@239 451 (*mddp)[k] = 0.0;
mas01cr@239 452 for(j = 0; j < trackTable[k]; j++) {
mas01cr@239 453 (*mddp)[k] += timesTable[2*j+1] - timesTable[2*j];
mas01cr@239 454 }
mas01cr@239 455 (*mddp)[k] /= j;
mas01cr@239 456 }
mas01cr@239 457 }
mas01cr@239 458
mas01cr@239 459 *vsnp = *snp;
mas01cr@239 460 *vspp = *spp;
mas01cr@239 461 }
mas01cr@239 462
mas01mc@292 463 // query_points()
mas01mc@292 464 //
mas01mc@292 465 // using PointPairs held in the exact_evaluation_queue compute squared distance for each PointPair
mas01mc@292 466 // and insert result into the current reporter.
mas01mc@292 467 //
mas01mc@292 468 // Preconditions:
mas01mc@292 469 // A query inFile has been opened with setup_query(...) and query pointers initialized
mas01mc@292 470 // The database contains some points
mas01mc@292 471 // An exact_evaluation_queue has been allocated and populated
mas01mc@292 472 // A reporter has been allocated
mas01mc@292 473 //
mas01mc@292 474 // Postconditions:
mas01mc@292 475 // reporter contains the points and distances that meet the reporter constraints
mas01mc@292 476
mas01mc@292 477 void audioDB::query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors){
mas01mc@292 478 unsigned int dbVectors;
mas01mc@292 479 double *sNorm, *snPtr, *sPower = 0, *spPtr = 0;
mas01mc@292 480 double *meanDBdur = 0;
mas01mc@292 481
mas01mc@292 482 // check pre-conditions
mas01mc@292 483 assert(exact_evaluation_queue&&reporter);
mas01mc@292 484 if(!exact_evaluation_queue->size()) // Exit if no points to evaluate
mas01mc@292 485 return;
mas01mc@292 486
mas01mc@292 487 // Compute database info
mas01mc@292 488 // FIXME: we more than likely don't need very much of the database
mas01mc@292 489 // so make a new method to build these values per-track or, even better, per-point
mas01mc@292 490 set_up_db(&sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors);
mas01mc@292 491
mas01mc@292 492 VERB_LOG(1, "matching points...");
mas01mc@292 493
mas01mc@292 494 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01mc@292 495 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01mc@292 496
mas01mc@292 497 // We are guaranteed that the order of points is sorted by:
mas01mc@292 498 // qpos, trackID, spos
mas01mc@292 499 // so we can be relatively efficient in initialization of track data.
mas01mc@292 500 // Here we assume that points don't overlap, so we will use exhaustive dot
mas01mc@292 501 // product evaluation over the sequence
mas01mc@292 502 double dist;
mas01mc@292 503 size_t data_buffer_size = 0;
mas01mc@292 504 double *data_buffer = 0;
mas01mc@292 505 Uns32T trackOffset;
mas01mc@292 506 Uns32T trackIndexOffset;
mas01mc@292 507 Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range
mas01mc@292 508 Uns32T npairs = exact_evaluation_queue->size();
mas01mc@292 509 while(npairs--){
mas01mc@292 510 PointPair pp = exact_evaluation_queue->top();
mas01mc@292 511 trackOffset=trackOffsetTable[pp.trackID]; // num data elements offset
mas01mc@292 512 trackIndexOffset=trackOffset/dbH->dim; // num vectors offset
mas01mc@292 513 if((!(usingPower) || powers_acceptable(qpPtr[usingQueryPoint?0:pp.qpos], sPower[trackIndexOffset+pp.spos])) &&
mas01mc@292 514 ((usingQueryPoint?0:pp.qpos) < numVectors-sequenceLength+1 && pp.spos < trackTable[pp.trackID]-sequenceLength+1)){
mas01mc@292 515 if(currentTrack!=pp.trackID){
mas01mc@292 516 currentTrack=pp.trackID;
mas01mc@292 517 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01mc@292 518 read_data(currentTrack, &data_buffer, &data_buffer_size);
mas01mc@292 519 }
mas01mc@292 520 dist = dot_product_points(query+(usingQueryPoint?0:pp.qpos*dbH->dim), data_buffer+pp.spos*dbH->dim, dbH->dim*sequenceLength);
mas01mc@292 521 if(normalizedDistance)
mas01mc@292 522 dist = 2-(2/(qnPtr[usingQueryPoint?0:pp.qpos]*sNorm[trackIndexOffset+pp.spos]))*dist;
mas01mc@292 523 else
mas01mc@292 524 if(no_unit_norming)
mas01mc@292 525 dist = qnPtr[usingQueryPoint?0:pp.qpos]*qnPtr[usingQueryPoint?0:pp.qpos]+sNorm[trackIndexOffset+pp.spos]*sNorm[trackIndexOffset+pp.spos] - 2*dist;
mas01mc@292 526 // else
mas01mc@292 527 // dist = dist;
mas01mc@314 528 if((!radius) || dist <= (O2_LSH_EXACT_MULT*radius+O2_DISTANCE_TOLERANCE))
mas01mc@292 529 reporter->add_point(pp.trackID, pp.qpos, pp.spos, dist);
mas01mc@292 530 }
mas01mc@292 531 exact_evaluation_queue->pop();
mas01mc@292 532 }
mas01mc@292 533 }
mas01mc@292 534
mas01mc@292 535 // A completely unprotected dot-product method
mas01mc@292 536 // Caller is responsible for ensuring that memory is within bounds
mas01mc@292 537 inline double audioDB::dot_product_points(double* q, double* p, Uns32T L){
mas01mc@292 538 double dist = 0.0;
mas01mc@292 539 while(L--)
mas01mc@292 540 dist += *q++ * *p++;
mas01mc@292 541 return dist;
mas01mc@292 542 }
mas01mc@292 543
mas01mc@292 544 void audioDB::query_loop(const char* dbName, Uns32T queryIndex) {
mas01cr@239 545
mas01cr@239 546 unsigned int numVectors;
mas01cr@239 547 double *query, *query_data;
mas01cr@239 548 double *qNorm, *qnPtr, *qPower = 0, *qpPtr = 0;
mas01cr@239 549 double meanQdur;
mas01cr@239 550
mas01mc@292 551 if(query_from_key)
mas01mc@292 552 set_up_query_from_key(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors, queryIndex);
mas01mc@292 553 else
mas01mc@292 554 set_up_query(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors);
mas01cr@239 555
mas01cr@239 556 unsigned int dbVectors;
mas01cr@239 557 double *sNorm, *snPtr, *sPower = 0, *spPtr = 0;
mas01cr@239 558 double *meanDBdur = 0;
mas01cr@239 559
mas01cr@239 560 set_up_db(&sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors);
mas01cr@239 561
mas01cr@239 562 VERB_LOG(1, "matching tracks...");
mas01cr@239 563
mas01cr@239 564 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@239 565 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@239 566
mas01cr@239 567 unsigned j,k,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
mas01cr@239 568 double **D = 0; // Differences query and target
mas01cr@239 569 double **DD = 0; // Matched filter distance
mas01cr@239 570
mas01mc@292 571 D = new double*[numVectors]; // pre-allocate
mas01cr@239 572 DD = new double*[numVectors];
mas01cr@239 573
mas01cr@239 574 gettimeofday(&tv1, NULL);
mas01cr@239 575 unsigned processedTracks = 0;
mas01cr@239 576 off_t trackIndexOffset;
mas01cr@239 577 char nextKey[MAXSTR];
mas01cr@239 578
mas01cr@239 579 // Track loop
mas01cr@239 580 size_t data_buffer_size = 0;
mas01cr@239 581 double *data_buffer = 0;
mas01cr@239 582 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01cr@239 583
mas01cr@239 584 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) {
mas01cr@239 585
mas01cr@239 586 trackOffset = trackOffsetTable[track]; // numDoubles offset
mas01cr@239 587
mas01cr@239 588 // get trackID from file if using a control file
mas01cr@239 589 if(trackFile) {
mas01cr@239 590 trackFile->getline(nextKey,MAXSTR);
mas01cr@239 591 if(!trackFile->eof()) {
mas01cr@239 592 track = getKeyPos(nextKey);
mas01cr@239 593 trackOffset = trackOffsetTable[track];
mas01cr@239 594 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01cr@239 595 } else {
mas01cr@239 596 break;
mas01cr@239 597 }
mas01cr@239 598 }
mas01cr@239 599
mas01mc@292 600 // skip identity on query_from_key
mas01mc@292 601 if( query_from_key && (track == queryIndex) ) {
mas01mc@292 602 if(queryIndex!=dbH->numFiles-1){
mas01mc@292 603 track++;
mas01mc@292 604 trackOffset = trackOffsetTable[track];
mas01mc@292 605 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01mc@292 606 }
mas01mc@292 607 else{
mas01mc@292 608 break;
mas01mc@292 609 }
mas01mc@292 610 }
mas01mc@292 611
mas01cr@239 612 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@239 613
mas01cr@239 614 read_data(track, &data_buffer, &data_buffer_size);
mas01cr@239 615 if(sequenceLength <= trackTable[track]) { // test for short sequences
mas01cr@239 616
mas01cr@239 617 VERB_LOG(7,"%u.%jd.%u | ", track, (intmax_t) trackIndexOffset, trackTable[track]);
mas01cr@239 618
mas01cr@239 619 initialize_arrays(track, numVectors, query, data_buffer, D, DD);
mas01cr@239 620
mas01cr@239 621 if(usingTimes) {
mas01cr@239 622 VERB_LOG(3,"meanQdur=%f meanDBdur=%f\n", meanQdur, meanDBdur[track]);
mas01cr@239 623 }
mas01cr@239 624
mas01cr@239 625 if((!usingTimes) || fabs(meanDBdur[track]-meanQdur) < meanQdur*timesTol) {
mas01cr@239 626 if(usingTimes) {
mas01cr@239 627 VERB_LOG(3,"within duration tolerance.\n");
mas01cr@239 628 }
mas01cr@239 629
mas01cr@239 630 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@239 631 for(j = 0; j <= numVectors - wL; j += HOP_SIZE) {
mas01cr@239 632 for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) {
mas01cr@239 633 double thisDist;
mas01mc@292 634 if(normalizedDistance)
mas01cr@239 635 thisDist = 2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
mas01mc@292 636 else
mas01mc@292 637 if(no_unit_norming)
mas01mc@292 638 thisDist = qnPtr[j]*qnPtr[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k];
mas01mc@292 639 else
mas01mc@292 640 thisDist = DD[j][k];
mas01mc@292 641
mas01cr@239 642 // Power test
mas01cr@239 643 if ((!usingPower) || powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k])) {
mas01cr@239 644 // radius test
mas01mc@292 645 if((!radius) || thisDist <= (radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@239 646 reporter->add_point(track, usingQueryPoint ? queryPoint : j, k, thisDist);
mas01cr@239 647 }
mas01cr@239 648 }
mas01cr@239 649 }
mas01cr@239 650 }
mas01cr@239 651 } // Duration match
mas01cr@239 652 delete_arrays(track, numVectors, D, DD);
mas01cr@239 653 }
mas01cr@239 654 }
mas01cr@239 655
mas01cr@239 656 free(data_buffer);
mas01cr@239 657
mas01cr@239 658 gettimeofday(&tv2,NULL);
mas01cr@239 659 VERB_LOG(1,"elapsed time: %ld msec\n",
mas01cr@239 660 (tv2.tv_sec*1000 + tv2.tv_usec/1000) -
mas01cr@239 661 (tv1.tv_sec*1000 + tv1.tv_usec/1000))
mas01cr@239 662
mas01cr@239 663 // Clean up
mas01cr@239 664 if(query_data)
mas01cr@239 665 delete[] query_data;
mas01cr@239 666 if(qNorm)
mas01cr@239 667 delete[] qNorm;
mas01cr@239 668 if(sNorm)
mas01cr@239 669 delete[] sNorm;
mas01cr@239 670 if(qPower)
mas01cr@239 671 delete[] qPower;
mas01cr@239 672 if(sPower)
mas01cr@239 673 delete[] sPower;
mas01cr@239 674 if(D)
mas01cr@239 675 delete[] D;
mas01cr@239 676 if(DD)
mas01cr@239 677 delete[] DD;
mas01cr@239 678 if(meanDBdur)
mas01cr@239 679 delete[] meanDBdur;
mas01cr@239 680 }
mas01cr@239 681
mas01cr@239 682 // Unit norm block of features
mas01cr@239 683 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){
mas01cr@239 684 unsigned d;
mas01cr@239 685 double L2, *p;
mas01cr@239 686
mas01cr@239 687 VERB_LOG(2, "norming %u vectors...", n);
mas01cr@239 688 while(n--) {
mas01cr@239 689 p = X;
mas01cr@239 690 L2 = 0.0;
mas01cr@239 691 d = dim;
mas01cr@239 692 while(d--) {
mas01cr@239 693 L2 += *p * *p;
mas01cr@239 694 p++;
mas01cr@239 695 }
mas01cr@239 696 if(qNorm) {
mas01cr@239 697 *qNorm++=L2;
mas01cr@239 698 }
mas01cr@239 699 X += dim;
mas01cr@239 700 }
mas01cr@239 701 VERB_LOG(2, "done.\n");
mas01cr@239 702 }
mas01mc@292 703
mas01mc@292 704