annotate query.cpp @ 369:6564be3109c5 gcc-4.3-cleanups

gcc-4.3 warning cleanups for lshlib.cpp (I do not believe that any of these changes contain significant copyrightable "intellectual property". However, to the extent that they do, the changes are hereby released into the Public Domain, and may be therefore be used by anyone for any purpose without need for consideration of any kind.)
author mas01cr
date Wed, 12 Nov 2008 15:23:32 +0000
parents 521812d63516
children ef4792df8f93 f9d86b1db21c 3171089ecfd7
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@239 2 #include "reporter.h"
mas01cr@239 3
mas01cr@239 4 bool audioDB::powers_acceptable(double p1, double p2) {
mas01cr@239 5 if (use_absolute_threshold) {
mas01cr@239 6 if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) {
mas01cr@239 7 return false;
mas01cr@239 8 }
mas01cr@239 9 }
mas01cr@239 10 if (use_relative_threshold) {
mas01cr@239 11 if (fabs(p1-p2) > fabs(relative_threshold)) {
mas01cr@239 12 return false;
mas01cr@239 13 }
mas01cr@239 14 }
mas01cr@239 15 return true;
mas01cr@239 16 }
mas01cr@239 17
mas01cr@239 18 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
mas01mc@292 19 // init database tables and dbH first
mas01mc@292 20 if(query_from_key)
mas01mc@292 21 initTables(dbName);
mas01mc@292 22 else
mas01mc@292 23 initTables(dbName, inFile);
mas01mc@292 24
mas01mc@292 25 // keyKeyPos requires dbH to be initialized
mas01mc@292 26 if(query_from_key && (!key || (query_from_key_index = getKeyPos((char*)key))==O2_ERR_KEYNOTFOUND))
mas01mc@292 27 error("Query key not found :",key);
mas01mc@292 28
mas01cr@239 29 switch (queryType) {
mas01cr@239 30 case O2_POINT_QUERY:
mas01cr@239 31 sequenceLength = 1;
mas01cr@239 32 normalizedDistance = false;
mas01mc@292 33 reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN);
mas01cr@239 34 break;
mas01cr@239 35 case O2_TRACK_QUERY:
mas01cr@239 36 sequenceLength = 1;
mas01cr@239 37 normalizedDistance = false;
mas01mc@292 38 reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@239 39 break;
mas01mc@292 40 case O2_SEQUENCE_QUERY:
mas01mc@292 41 if(no_unit_norming)
mas01mc@292 42 normalizedDistance = false;
mas01cr@239 43 if(radius == 0) {
mas01mc@292 44 reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01cr@239 45 } else {
mas01mc@292 46 if(index_exists(dbName, radius, sequenceLength)){
mas01mc@292 47 char* indexName = index_get_name(dbName, radius, sequenceLength);
mas01mc@308 48 lsh = index_allocate(indexName, false);
mas01mc@324 49 reporter = new trackSequenceQueryRadReporter(trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
mas01mc@292 50 delete[] indexName;
mas01mc@292 51 }
mas01mc@292 52 else
mas01mc@292 53 reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles);
mas01cr@239 54 }
mas01cr@239 55 break;
mas01mc@292 56 case O2_N_SEQUENCE_QUERY:
mas01mc@292 57 if(no_unit_norming)
mas01mc@292 58 normalizedDistance = false;
mas01mc@248 59 if(radius == 0) {
mas01mc@292 60 reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles);
mas01mc@248 61 } else {
mas01mc@292 62 if(index_exists(dbName, radius, sequenceLength)){
mas01mc@292 63 char* indexName = index_get_name(dbName, radius, sequenceLength);
mas01mc@308 64 lsh = index_allocate(indexName, false);
mas01mc@324 65 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
mas01mc@292 66 delete[] indexName;
mas01mc@292 67 }
mas01mc@292 68 else
mas01mc@292 69 reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles);
mas01mc@248 70 }
mas01mc@248 71 break;
mas01mc@263 72 case O2_ONE_TO_ONE_N_SEQUENCE_QUERY :
mas01mc@263 73 if(radius == 0) {
mas01mc@263 74 error("query-type not yet supported");
mas01mc@263 75 } else {
mas01mc@292 76 reporter = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, dbH->numFiles);
mas01mc@263 77 }
mas01mc@263 78 break;
mas01cr@239 79 default:
mas01cr@239 80 error("unrecognized queryType in query()");
mas01cr@239 81 }
mas01mc@292 82
mas01mc@292 83 // Test for index (again) here
mas01mc@329 84 if(radius && index_exists(dbName, radius, sequenceLength)){
mas01mc@329 85 VERB_LOG(1, "Calling indexed query on database %s, radius=%f, sequenceLength=%d\n", dbName, radius, sequenceLength);
mas01mc@292 86 index_query_loop(dbName, query_from_key_index);
mas01mc@329 87 }
mas01mc@329 88 else{
mas01mc@329 89 VERB_LOG(1, "Calling brute-force query on database %s\n", dbName);
mas01mc@292 90 query_loop(dbName, query_from_key_index);
mas01mc@329 91 }
mas01mc@292 92
mas01mc@292 93 reporter->report(fileTable, adbQueryResponse);
mas01cr@239 94 }
mas01cr@239 95
mas01cr@239 96 // return ordinal position of key in keyTable
mas01mc@292 97 // this should really be a STL hash map search
mas01cr@239 98 unsigned audioDB::getKeyPos(char* key){
mas01mc@292 99 if(!dbH)
mas01mc@292 100 error("dbH not initialized","getKeyPos");
mas01cr@239 101 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@256 102 if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key))==0)
mas01cr@239 103 return k;
mas01cr@239 104 error("Key not found",key);
mas01cr@239 105 return O2_ERR_KEYNOTFOUND;
mas01cr@239 106 }
mas01cr@239 107
mas01cr@239 108 // This is a common pattern in sequence queries: what we are doing is
mas01cr@239 109 // taking a window of length seqlen over a buffer of length length,
mas01cr@239 110 // and placing the sum of the elements in that window in the first
mas01cr@239 111 // element of the window: thus replacing all but the last seqlen
mas01cr@239 112 // elements in the buffer with the corresponding windowed sum.
mas01cr@239 113 void audioDB::sequence_sum(double *buffer, int length, int seqlen) {
mas01cr@239 114 double tmp1, tmp2, *ps;
mas01cr@239 115 int j, w;
mas01cr@239 116
mas01cr@239 117 tmp1 = *buffer;
mas01cr@239 118 j = 1;
mas01cr@239 119 w = seqlen - 1;
mas01cr@239 120 while(w--) {
mas01cr@239 121 *buffer += buffer[j++];
mas01cr@239 122 }
mas01cr@239 123 ps = buffer + 1;
mas01cr@239 124 w = length - seqlen; // +1 - 1
mas01cr@239 125 while(w--) {
mas01cr@239 126 tmp2 = *ps;
mas01cr@239 127 if(isfinite(tmp1)) {
mas01cr@239 128 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1);
mas01cr@239 129 } else {
mas01cr@239 130 for(int i = 1; i < seqlen; i++) {
mas01cr@239 131 *ps += *(ps + i);
mas01cr@239 132 }
mas01cr@239 133 }
mas01cr@239 134 tmp1 = tmp2;
mas01cr@239 135 ps++;
mas01cr@239 136 }
mas01cr@239 137 }
mas01cr@239 138
mas01cr@239 139 // In contrast to sequence_sum() above, sequence_sqrt() and
mas01cr@239 140 // sequence_average() below are simple mappers across the sequence.
mas01cr@239 141 void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) {
mas01cr@239 142 int w = length - seqlen + 1;
mas01cr@239 143 while(w--) {
mas01cr@239 144 *buffer = sqrt(*buffer);
mas01cr@239 145 buffer++;
mas01cr@239 146 }
mas01cr@239 147 }
mas01cr@239 148
mas01cr@239 149 void audioDB::sequence_average(double *buffer, int length, int seqlen) {
mas01cr@239 150 int w = length - seqlen + 1;
mas01cr@239 151 while(w--) {
mas01cr@239 152 *buffer /= seqlen;
mas01cr@239 153 buffer++;
mas01cr@239 154 }
mas01cr@239 155 }
mas01cr@239 156
mas01cr@239 157 void audioDB::initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
mas01cr@239 158 unsigned int j, k, l, w;
mas01cr@239 159 double *dp, *qp, *sp;
mas01cr@239 160
mas01cr@239 161 const unsigned HOP_SIZE = sequenceHop;
mas01cr@239 162 const unsigned wL = sequenceLength;
mas01cr@239 163
mas01cr@239 164 for(j = 0; j < numVectors; j++) {
mas01cr@239 165 // Sum products matrix
mas01cr@239 166 D[j] = new double[trackTable[track]];
mas01cr@239 167 assert(D[j]);
mas01cr@239 168 // Matched filter matrix
mas01cr@239 169 DD[j]=new double[trackTable[track]];
mas01cr@239 170 assert(DD[j]);
mas01cr@239 171 }
mas01cr@239 172
mas01cr@239 173 // Dot product
mas01cr@239 174 for(j = 0; j < numVectors; j++)
mas01cr@239 175 for(k = 0; k < trackTable[track]; k++){
mas01cr@239 176 qp = query + j * dbH->dim;
mas01cr@239 177 sp = data_buffer + k * dbH->dim;
mas01cr@239 178 DD[j][k] = 0.0; // Initialize matched filter array
mas01cr@239 179 dp = &D[j][k]; // point to correlation cell j,k
mas01cr@239 180 *dp = 0.0; // initialize correlation cell
mas01cr@239 181 l = dbH->dim; // size of vectors
mas01cr@239 182 while(l--)
mas01cr@239 183 *dp += *qp++ * *sp++;
mas01cr@239 184 }
mas01cr@239 185
mas01cr@239 186 // Matched Filter
mas01cr@239 187 // HOP SIZE == 1
mas01cr@239 188 double* spd;
mas01cr@239 189 if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop
mas01cr@239 190 for(w = 0; w < wL; w++) {
mas01cr@239 191 for(j = 0; j < numVectors - w; j++) {
mas01cr@239 192 sp = DD[j];
mas01cr@239 193 spd = D[j+w] + w;
mas01cr@239 194 k = trackTable[track] - w;
mas01mc@292 195 while(k--)
mas01mc@292 196 *sp++ += *spd++;
mas01cr@239 197 }
mas01cr@239 198 }
mas01cr@239 199 } else { // HOP_SIZE != 1
mas01cr@239 200 for(w = 0; w < wL; w++) {
mas01cr@239 201 for(j = 0; j < numVectors - w; j += HOP_SIZE) {
mas01cr@239 202 sp = DD[j];
mas01cr@239 203 spd = D[j+w]+w;
mas01cr@239 204 for(k = 0; k < trackTable[track] - w; k += HOP_SIZE) {
mas01cr@239 205 *sp += *spd;
mas01cr@239 206 sp += HOP_SIZE;
mas01cr@239 207 spd += HOP_SIZE;
mas01cr@239 208 }
mas01cr@239 209 }
mas01cr@239 210 }
mas01cr@239 211 }
mas01cr@239 212 }
mas01cr@239 213
mas01cr@239 214 void audioDB::delete_arrays(int track, unsigned int numVectors, double **D, double **DD) {
mas01cr@239 215 if(D != NULL) {
mas01cr@239 216 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 217 delete[] D[j];
mas01cr@239 218 }
mas01cr@239 219 }
mas01cr@239 220 if(DD != NULL) {
mas01cr@239 221 for(unsigned int j = 0; j < numVectors; j++) {
mas01cr@239 222 delete[] DD[j];
mas01cr@239 223 }
mas01cr@239 224 }
mas01cr@239 225 }
mas01cr@239 226
mas01mc@324 227 void audioDB::read_data(int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p) {
mas01cr@239 228 if (trackTable[track] * sizeof(double) * dbH->dim > *data_buffer_size_p) {
mas01cr@239 229 if(*data_buffer_p) {
mas01cr@239 230 free(*data_buffer_p);
mas01cr@239 231 }
mas01cr@239 232 {
mas01cr@239 233 *data_buffer_size_p = trackTable[track] * sizeof(double) * dbH->dim;
mas01cr@239 234 void *tmp = malloc(*data_buffer_size_p);
mas01cr@239 235 if (tmp == NULL) {
mas01cr@239 236 error("error allocating data buffer");
mas01cr@239 237 }
mas01cr@239 238 *data_buffer_p = (double *) tmp;
mas01cr@239 239 }
mas01cr@239 240 }
mas01cr@239 241
mas01cr@366 242 CHECKED_READ(trkfid, *data_buffer_p, trackTable[track] * sizeof(double) * dbH->dim);
mas01cr@239 243 }
mas01cr@239 244
mas01cr@239 245 // These names deserve some unpicking. The names starting with a "q"
mas01cr@239 246 // are pointers to the query, norm and power vectors; the names
mas01cr@239 247 // starting with "v" are things that will end up pointing to the
mas01cr@239 248 // actual query point's information. -- CSR, 2007-12-05
mas01cr@239 249 void audioDB::set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp) {
mas01cr@239 250 *nvp = (statbuf.st_size - sizeof(int)) / (dbH->dim * sizeof(double));
mas01mc@292 251
mas01cr@239 252 if(!(dbH->flags & O2_FLAG_L2NORM)) {
mas01cr@239 253 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01cr@239 254 }
mas01cr@239 255
mas01cr@239 256 if(*nvp < sequenceLength) {
mas01cr@239 257 error("Query shorter than requested sequence length", "maybe use -l");
mas01cr@239 258 }
mas01cr@239 259
mas01cr@239 260 VERB_LOG(1, "performing norms... ");
mas01cr@239 261
mas01cr@239 262 *qp = new double[*nvp * dbH->dim];
mas01cr@239 263 memcpy(*qp, indata+sizeof(int), *nvp * dbH->dim * sizeof(double));
mas01cr@239 264 *qnp = new double[*nvp];
mas01cr@239 265 unitNorm(*qp, dbH->dim, *nvp, *qnp);
mas01cr@239 266
mas01cr@239 267 sequence_sum(*qnp, *nvp, sequenceLength);
mas01cr@239 268 sequence_sqrt(*qnp, *nvp, sequenceLength);
mas01cr@239 269
mas01cr@239 270 if (usingPower) {
mas01cr@239 271 *qpp = new double[*nvp];
mas01cr@239 272 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
mas01cr@239 273 error("error seeking to data", powerFileName, "lseek");
mas01cr@239 274 }
mas01cr@239 275 int count = read(powerfd, *qpp, *nvp * sizeof(double));
mas01cr@239 276 if (count == -1) {
mas01cr@239 277 error("error reading data", powerFileName, "read");
mas01cr@239 278 }
mas01cr@239 279 if ((unsigned) count != *nvp * sizeof(double)) {
mas01cr@239 280 error("short read", powerFileName);
mas01cr@239 281 }
mas01cr@239 282
mas01cr@239 283 sequence_sum(*qpp, *nvp, sequenceLength);
mas01cr@239 284 sequence_average(*qpp, *nvp, sequenceLength);
mas01cr@239 285 }
mas01cr@239 286
mas01cr@239 287 if (usingTimes) {
mas01cr@239 288 unsigned int k;
mas01cr@239 289 *mqdp = 0.0;
mas01cr@239 290 double *querydurs = new double[*nvp];
mas01cr@239 291 double *timesdata = new double[*nvp*2];
mas01cr@239 292 insertTimeStamps(*nvp, timesFile, timesdata);
mas01cr@239 293 for(k = 0; k < *nvp; k++) {
mas01cr@239 294 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01cr@239 295 *mqdp += querydurs[k];
mas01cr@239 296 }
mas01cr@239 297 *mqdp /= k;
mas01cr@239 298
mas01cr@239 299 VERB_LOG(1, "mean query file duration: %f\n", *mqdp);
mas01cr@239 300
mas01cr@239 301 delete [] querydurs;
mas01cr@239 302 delete [] timesdata;
mas01cr@239 303 }
mas01cr@239 304
mas01cr@239 305 // Defaults, for exhaustive search (!usingQueryPoint)
mas01cr@239 306 *vqp = *qp;
mas01cr@239 307 *vqnp = *qnp;
mas01cr@239 308 *vqpp = *qpp;
mas01cr@239 309
mas01cr@239 310 if(usingQueryPoint) {
mas01mc@341 311 if( !(queryPoint < *nvp && queryPoint < *nvp - sequenceLength + 1) ) {
mas01mc@342 312 error("queryPoint >= numVectors-sequenceLength+1 in query");
mas01cr@239 313 } else {
mas01cr@239 314 VERB_LOG(1, "query point: %u\n", queryPoint);
mas01cr@239 315 *vqp = *qp + queryPoint * dbH->dim;
mas01cr@239 316 *vqnp = *qnp + queryPoint;
mas01cr@239 317 if (usingPower) {
mas01cr@239 318 *vqpp = *qpp + queryPoint;
mas01cr@239 319 }
mas01cr@239 320 *nvp = sequenceLength;
mas01cr@239 321 }
mas01cr@239 322 }
mas01cr@239 323 }
mas01cr@239 324
mas01mc@292 325 // Does the same as set_up_query(...) but from database features instead of from a file
mas01mc@292 326 // Constructs the same outputs as set_up_query
mas01mc@292 327 void audioDB::set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex) {
mas01mc@292 328 if(!trackTable)
mas01mc@292 329 error("trackTable not initialized","set_up_query_from_key");
mas01mc@292 330
mas01mc@292 331 if(!(dbH->flags & O2_FLAG_L2NORM)) {
mas01mc@292 332 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01mc@292 333 }
mas01mc@292 334
mas01mc@292 335 if(dbH->flags & O2_FLAG_POWER)
mas01mc@292 336 usingPower = true;
mas01mc@292 337
mas01mc@292 338 if(dbH->flags & O2_FLAG_TIMES)
mas01mc@292 339 usingTimes = true;
mas01mc@292 340
mas01mc@292 341 *nvp = trackTable[queryIndex];
mas01mc@292 342 if(*nvp < sequenceLength) {
mas01mc@292 343 error("Query shorter than requested sequence length", "maybe use -l");
mas01mc@292 344 }
mas01mc@292 345
mas01mc@292 346 VERB_LOG(1, "performing norms... ");
mas01mc@292 347
mas01mc@324 348 // For LARGE_ADB load query features from file
mas01mc@324 349 if( dbH->flags & O2_FLAG_LARGE_ADB ){
mas01mc@324 350 if(infid>0)
mas01mc@324 351 close(infid);
mas01mc@324 352 char* prefixedString = new char[O2_MAXFILESTR];
mas01mc@324 353 char* tmpStr = prefixedString;
mas01mc@324 354 strncpy(prefixedString, featureFileNameTable+queryIndex*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
mas01mc@324 355 prefix_name(&prefixedString, adb_feature_root);
mas01mc@324 356 if(tmpStr!=prefixedString)
mas01mc@324 357 delete[] tmpStr;
mas01mc@324 358 initInputFile(prefixedString, false); // nommap, file pointer at correct position
mas01mc@324 359 size_t allocatedSize = 0;
mas01mc@324 360 read_data(infid, queryIndex, qp, &allocatedSize); // over-writes qp and allocatedSize
mas01mc@324 361 // Consistency check on allocated memory and query feature size
mas01mc@324 362 if(*nvp*sizeof(double)*dbH->dim != allocatedSize)
mas01mc@324 363 error("Query memory allocation failed consitency check","set_up_query_from_key");
mas01mc@324 364 // Allocated and calculate auxillary sequences: l2norm and power
mas01mc@324 365 init_track_aux_data(queryIndex, *qp, qnp, vqnp, qpp, vqpp);
mas01mc@324 366 }
mas01mc@324 367 else{ // Load from self-contained ADB database
mas01mc@324 368 // Read query feature vectors from database
mas01mc@324 369 *qp = NULL;
mas01mc@324 370 lseek(dbfid, dbH->dataOffset + trackOffsetTable[queryIndex] * sizeof(double), SEEK_SET);
mas01mc@324 371 size_t allocatedSize = 0;
mas01mc@324 372 read_data(dbfid, queryIndex, qp, &allocatedSize);
mas01mc@324 373 // Consistency check on allocated memory and query feature size
mas01mc@324 374 if(*nvp*sizeof(double)*dbH->dim != allocatedSize)
mas01mc@324 375 error("Query memory allocation failed consitency check","set_up_query_from_key");
mas01mc@324 376
mas01mc@324 377 Uns32T trackIndexOffset = trackOffsetTable[queryIndex]/dbH->dim; // Convert num data elements to num vectors
mas01mc@324 378 // Copy L2 norm partial-sum coefficients
mas01mc@324 379 assert(*qnp = new double[*nvp]);
mas01mc@324 380 memcpy(*qnp, l2normTable+trackIndexOffset, *nvp*sizeof(double));
mas01mc@324 381 sequence_sum(*qnp, *nvp, sequenceLength);
mas01mc@324 382 sequence_sqrt(*qnp, *nvp, sequenceLength);
mas01mc@324 383
mas01mc@324 384 if( usingPower ){
mas01mc@324 385 // Copy Power partial-sum coefficients
mas01mc@324 386 assert(*qpp = new double[*nvp]);
mas01mc@324 387 memcpy(*qpp, powerTable+trackIndexOffset, *nvp*sizeof(double));
mas01mc@324 388 sequence_sum(*qpp, *nvp, sequenceLength);
mas01mc@324 389 sequence_average(*qpp, *nvp, sequenceLength);
mas01mc@324 390 }
mas01mc@324 391
mas01mc@324 392 if (usingTimes) {
mas01mc@324 393 unsigned int k;
mas01mc@324 394 *mqdp = 0.0;
mas01mc@324 395 double *querydurs = new double[*nvp];
mas01mc@324 396 double *timesdata = new double[*nvp*2];
mas01mc@324 397 assert(querydurs && timesdata);
mas01mc@324 398 memcpy(timesdata, timesTable+trackIndexOffset, *nvp*sizeof(double));
mas01mc@324 399 for(k = 0; k < *nvp; k++) {
mas01mc@324 400 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01mc@324 401 *mqdp += querydurs[k];
mas01mc@324 402 }
mas01mc@324 403 *mqdp /= k;
mas01mc@324 404
mas01mc@324 405 VERB_LOG(1, "mean query file duration: %f\n", *mqdp);
mas01mc@324 406
mas01mc@324 407 delete [] querydurs;
mas01mc@324 408 delete [] timesdata;
mas01mc@324 409 }
mas01mc@292 410 }
mas01mc@292 411
mas01mc@292 412 // Defaults, for exhaustive search (!usingQueryPoint)
mas01mc@292 413 *vqp = *qp;
mas01mc@292 414 *vqnp = *qnp;
mas01mc@292 415 *vqpp = *qpp;
mas01mc@292 416
mas01mc@292 417 if(usingQueryPoint) {
mas01mc@341 418 if( !(queryPoint < *nvp && queryPoint < *nvp - sequenceLength + 1) ) {
mas01mc@342 419 error("queryPoint >= numVectors-sequenceLength+1 in query");
mas01mc@292 420 } else {
mas01mc@292 421 VERB_LOG(1, "query point: %u\n", queryPoint);
mas01mc@292 422 *vqp = *qp + queryPoint * dbH->dim;
mas01mc@292 423 *vqnp = *qnp + queryPoint;
mas01mc@292 424 if (usingPower) {
mas01mc@292 425 *vqpp = *qpp + queryPoint;
mas01mc@292 426 }
mas01mc@292 427 *nvp = sequenceLength;
mas01mc@292 428 }
mas01mc@292 429 }
mas01mc@292 430 }
mas01mc@292 431
mas01mc@292 432
mas01cr@239 433 // FIXME: this is not the right name; we're not actually setting up
mas01cr@239 434 // the database, but copying various bits of it out of mmap()ed tables
mas01cr@239 435 // in order to reduce seeks.
mas01cr@239 436 void audioDB::set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp) {
mas01cr@239 437 *dvp = dbH->length / (dbH->dim * sizeof(double));
mas01cr@239 438 *snp = new double[*dvp];
mas01cr@239 439
mas01cr@239 440 double *snpp = *snp, *sppp = 0;
mas01cr@239 441 memcpy(*snp, l2normTable, *dvp * sizeof(double));
mas01cr@239 442
mas01cr@239 443 if (usingPower) {
mas01cr@239 444 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@239 445 error("database not power-enabled", dbName);
mas01cr@239 446 }
mas01cr@239 447 *spp = new double[*dvp];
mas01cr@239 448 sppp = *spp;
mas01cr@239 449 memcpy(*spp, powerTable, *dvp * sizeof(double));
mas01cr@239 450 }
mas01cr@239 451
mas01cr@239 452 for(unsigned int i = 0; i < dbH->numFiles; i++){
mas01cr@239 453 if(trackTable[i] >= sequenceLength) {
mas01cr@239 454 sequence_sum(snpp, trackTable[i], sequenceLength);
mas01cr@239 455 sequence_sqrt(snpp, trackTable[i], sequenceLength);
mas01cr@239 456
mas01cr@239 457 if (usingPower) {
mas01cr@239 458 sequence_sum(sppp, trackTable[i], sequenceLength);
mas01cr@239 459 sequence_average(sppp, trackTable[i], sequenceLength);
mas01cr@239 460 }
mas01cr@239 461 }
mas01cr@239 462 snpp += trackTable[i];
mas01cr@239 463 if (usingPower) {
mas01cr@239 464 sppp += trackTable[i];
mas01cr@239 465 }
mas01cr@239 466 }
mas01cr@239 467
mas01cr@239 468 if (usingTimes) {
mas01cr@239 469 if(!(dbH->flags & O2_FLAG_TIMES)) {
mas01cr@239 470 error("query timestamps provided for non-timed database", dbName);
mas01cr@239 471 }
mas01cr@239 472
mas01cr@239 473 *mddp = new double[dbH->numFiles];
mas01cr@239 474
mas01cr@239 475 for(unsigned int k = 0; k < dbH->numFiles; k++) {
mas01cr@239 476 unsigned int j;
mas01cr@239 477 (*mddp)[k] = 0.0;
mas01cr@239 478 for(j = 0; j < trackTable[k]; j++) {
mas01cr@239 479 (*mddp)[k] += timesTable[2*j+1] - timesTable[2*j];
mas01cr@239 480 }
mas01cr@239 481 (*mddp)[k] /= j;
mas01cr@239 482 }
mas01cr@239 483 }
mas01cr@239 484
mas01cr@239 485 *vsnp = *snp;
mas01cr@239 486 *vspp = *spp;
mas01cr@239 487 }
mas01cr@239 488
mas01mc@292 489 // query_points()
mas01mc@292 490 //
mas01mc@292 491 // using PointPairs held in the exact_evaluation_queue compute squared distance for each PointPair
mas01mc@292 492 // and insert result into the current reporter.
mas01mc@292 493 //
mas01mc@292 494 // Preconditions:
mas01mc@292 495 // A query inFile has been opened with setup_query(...) and query pointers initialized
mas01mc@292 496 // The database contains some points
mas01mc@292 497 // An exact_evaluation_queue has been allocated and populated
mas01mc@292 498 // A reporter has been allocated
mas01mc@292 499 //
mas01mc@292 500 // Postconditions:
mas01mc@292 501 // reporter contains the points and distances that meet the reporter constraints
mas01mc@292 502
mas01mc@292 503 void audioDB::query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors){
mas01mc@292 504 unsigned int dbVectors;
mas01mc@315 505 double *sNorm = 0, *snPtr, *sPower = 0, *spPtr = 0;
mas01mc@292 506 double *meanDBdur = 0;
mas01mc@292 507
mas01mc@292 508 // check pre-conditions
mas01mc@292 509 assert(exact_evaluation_queue&&reporter);
mas01mc@292 510 if(!exact_evaluation_queue->size()) // Exit if no points to evaluate
mas01mc@292 511 return;
mas01mc@292 512
mas01mc@292 513 // Compute database info
mas01mc@292 514 // FIXME: we more than likely don't need very much of the database
mas01mc@292 515 // so make a new method to build these values per-track or, even better, per-point
mas01mc@324 516 if( !( dbH->flags & O2_FLAG_LARGE_ADB) )
mas01mc@324 517 set_up_db(&sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors);
mas01mc@292 518
mas01mc@292 519 VERB_LOG(1, "matching points...");
mas01mc@292 520
mas01mc@292 521 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01mc@292 522 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01mc@292 523
mas01mc@292 524 // We are guaranteed that the order of points is sorted by:
mas01mc@324 525 // trackID, spos, qpos
mas01mc@292 526 // so we can be relatively efficient in initialization of track data.
mas01mc@292 527 // Here we assume that points don't overlap, so we will use exhaustive dot
mas01mc@324 528 // product evaluation instead of memoization of partial sums which is used
mas01mc@324 529 // for exhaustive brute-force evaluation from smaller databases: e.g. query_loop()
mas01mc@292 530 double dist;
mas01mc@292 531 size_t data_buffer_size = 0;
mas01mc@292 532 double *data_buffer = 0;
mas01mc@324 533 Uns32T trackOffset = 0;
mas01mc@324 534 Uns32T trackIndexOffset = 0;
mas01mc@292 535 Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range
mas01mc@292 536 Uns32T npairs = exact_evaluation_queue->size();
mas01mc@292 537 while(npairs--){
mas01mc@292 538 PointPair pp = exact_evaluation_queue->top();
mas01mc@324 539 // Large ADB track data must be loaded here for sPower
mas01mc@324 540 if(dbH->flags & O2_FLAG_LARGE_ADB){
mas01mc@324 541 trackOffset=0;
mas01mc@324 542 trackIndexOffset=0;
mas01mc@292 543 if(currentTrack!=pp.trackID){
mas01mc@324 544 char* prefixedString = new char[O2_MAXFILESTR];
mas01mc@324 545 char* tmpStr = prefixedString;
mas01mc@324 546 // On currentTrack change, allocate and load track data
mas01mc@292 547 currentTrack=pp.trackID;
mas01mc@324 548 SAFE_DELETE_ARRAY(sNorm);
mas01mc@324 549 SAFE_DELETE_ARRAY(sPower);
mas01mc@324 550 if(infid>0)
mas01mc@324 551 close(infid);
mas01mc@324 552 // Open and check dimensions of feature file
mas01mc@324 553 strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
mas01mc@324 554 prefix_name((char ** const) &prefixedString, adb_feature_root);
mas01mc@324 555 if (prefixedString!=tmpStr)
mas01mc@324 556 delete[] tmpStr;
mas01mc@324 557 initInputFile(prefixedString, false); // nommap, file pointer at correct position
mas01mc@324 558 // Load the feature vector data for current track into data_buffer
mas01mc@324 559 read_data(infid, pp.trackID, &data_buffer, &data_buffer_size);
mas01mc@324 560 // Load power and calculate power and l2norm sequence sums
mas01mc@324 561 init_track_aux_data(pp.trackID, data_buffer, &sNorm, &snPtr, &sPower, &spPtr);
mas01mc@292 562 }
mas01mc@324 563 }
mas01mc@324 564 else{
mas01mc@324 565 // These offsets are w.r.t. the entire database of feature vectors and auxillary variables
mas01mc@324 566 trackOffset=trackOffsetTable[pp.trackID]; // num data elements offset
mas01mc@324 567 trackIndexOffset=trackOffset/dbH->dim; // num vectors offset
mas01mc@324 568 }
mas01mc@324 569 Uns32T qPos = usingQueryPoint?0:pp.qpos;// index for query point
mas01mc@324 570 Uns32T sPos = trackIndexOffset+pp.spos; // index into l2norm table
mas01mc@324 571 // Test power thresholds before computing distance
mas01mc@324 572 if( ( !usingPower || powers_acceptable(qpPtr[qPos], sPower[sPos])) &&
mas01mc@324 573 ( qPos<numVectors-sequenceLength+1 && pp.spos<trackTable[pp.trackID]-sequenceLength+1 ) ){
mas01mc@324 574 // Non-large ADB track data is loaded inside power test for efficiency
mas01mc@324 575 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && (currentTrack!=pp.trackID) ){
mas01mc@324 576 // On currentTrack change, allocate and load track data
mas01mc@324 577 currentTrack=pp.trackID;
mas01mc@324 578 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01mc@324 579 read_data(dbfid, currentTrack, &data_buffer, &data_buffer_size);
mas01mc@324 580 }
mas01mc@324 581 // Compute distance
mas01mc@324 582 dist = dot_product_points(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequenceLength);
mas01mc@324 583 double qn = qnPtr[qPos];
mas01mc@324 584 double sn = sNorm[sPos];
mas01mc@292 585 if(normalizedDistance)
mas01mc@324 586 dist = 2 - (2/(qn*sn))*dist;
mas01mc@292 587 else
mas01mc@292 588 if(no_unit_norming)
mas01mc@324 589 dist = qn*qn + sn*sn - 2*dist;
mas01mc@292 590 // else
mas01mc@292 591 // dist = dist;
mas01mc@314 592 if((!radius) || dist <= (O2_LSH_EXACT_MULT*radius+O2_DISTANCE_TOLERANCE))
mas01mc@324 593 reporter->add_point(pp.trackID, pp.qpos, pp.spos, dist);
mas01mc@292 594 }
mas01mc@292 595 exact_evaluation_queue->pop();
mas01mc@292 596 }
mas01mc@315 597 // Cleanup
mas01mc@324 598 SAFE_DELETE_ARRAY(sNorm);
mas01mc@324 599 SAFE_DELETE_ARRAY(sPower);
mas01mc@324 600 SAFE_DELETE_ARRAY(meanDBdur);
mas01mc@292 601 }
mas01mc@292 602
mas01mc@292 603 // A completely unprotected dot-product method
mas01mc@292 604 // Caller is responsible for ensuring that memory is within bounds
mas01mc@292 605 inline double audioDB::dot_product_points(double* q, double* p, Uns32T L){
mas01mc@292 606 double dist = 0.0;
mas01mc@292 607 while(L--)
mas01mc@292 608 dist += *q++ * *p++;
mas01mc@292 609 return dist;
mas01mc@292 610 }
mas01mc@292 611
mas01mc@292 612 void audioDB::query_loop(const char* dbName, Uns32T queryIndex) {
mas01cr@239 613
mas01cr@239 614 unsigned int numVectors;
mas01cr@239 615 double *query, *query_data;
mas01cr@239 616 double *qNorm, *qnPtr, *qPower = 0, *qpPtr = 0;
mas01cr@239 617 double meanQdur;
mas01cr@239 618
mas01mc@324 619 if( dbH->flags & O2_FLAG_LARGE_ADB )
mas01mc@324 620 error("error: LARGE_ADB requires indexed query");
mas01mc@324 621
mas01mc@292 622 if(query_from_key)
mas01mc@292 623 set_up_query_from_key(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors, queryIndex);
mas01mc@292 624 else
mas01mc@292 625 set_up_query(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors);
mas01cr@239 626
mas01cr@239 627 unsigned int dbVectors;
mas01cr@239 628 double *sNorm, *snPtr, *sPower = 0, *spPtr = 0;
mas01cr@239 629 double *meanDBdur = 0;
mas01cr@239 630
mas01cr@239 631 set_up_db(&sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors);
mas01cr@239 632
mas01cr@239 633 VERB_LOG(1, "matching tracks...");
mas01cr@239 634
mas01cr@239 635 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@239 636 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@239 637
mas01cr@239 638 unsigned j,k,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
mas01cr@239 639 double **D = 0; // Differences query and target
mas01cr@239 640 double **DD = 0; // Matched filter distance
mas01cr@239 641
mas01mc@292 642 D = new double*[numVectors]; // pre-allocate
mas01cr@239 643 DD = new double*[numVectors];
mas01cr@239 644
mas01cr@239 645 gettimeofday(&tv1, NULL);
mas01cr@239 646 unsigned processedTracks = 0;
mas01cr@239 647 off_t trackIndexOffset;
mas01cr@239 648 char nextKey[MAXSTR];
mas01cr@239 649
mas01cr@239 650 // Track loop
mas01cr@239 651 size_t data_buffer_size = 0;
mas01cr@239 652 double *data_buffer = 0;
mas01cr@239 653 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01cr@239 654
mas01cr@239 655 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) {
mas01cr@239 656
mas01cr@239 657 trackOffset = trackOffsetTable[track]; // numDoubles offset
mas01cr@239 658
mas01cr@239 659 // get trackID from file if using a control file
mas01cr@239 660 if(trackFile) {
mas01cr@239 661 trackFile->getline(nextKey,MAXSTR);
mas01cr@239 662 if(!trackFile->eof()) {
mas01cr@239 663 track = getKeyPos(nextKey);
mas01cr@239 664 trackOffset = trackOffsetTable[track];
mas01cr@239 665 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01cr@239 666 } else {
mas01cr@239 667 break;
mas01cr@239 668 }
mas01cr@239 669 }
mas01cr@239 670
mas01mc@292 671 // skip identity on query_from_key
mas01mc@292 672 if( query_from_key && (track == queryIndex) ) {
mas01mc@292 673 if(queryIndex!=dbH->numFiles-1){
mas01mc@292 674 track++;
mas01mc@292 675 trackOffset = trackOffsetTable[track];
mas01mc@292 676 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01mc@292 677 }
mas01mc@292 678 else{
mas01mc@292 679 break;
mas01mc@292 680 }
mas01mc@292 681 }
mas01mc@292 682
mas01cr@239 683 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@239 684
mas01mc@324 685 read_data(dbfid, track, &data_buffer, &data_buffer_size);
mas01cr@239 686 if(sequenceLength <= trackTable[track]) { // test for short sequences
mas01cr@239 687
mas01cr@239 688 VERB_LOG(7,"%u.%jd.%u | ", track, (intmax_t) trackIndexOffset, trackTable[track]);
mas01cr@239 689
mas01cr@239 690 initialize_arrays(track, numVectors, query, data_buffer, D, DD);
mas01cr@239 691
mas01cr@239 692 if(usingTimes) {
mas01cr@239 693 VERB_LOG(3,"meanQdur=%f meanDBdur=%f\n", meanQdur, meanDBdur[track]);
mas01cr@239 694 }
mas01cr@239 695
mas01cr@239 696 if((!usingTimes) || fabs(meanDBdur[track]-meanQdur) < meanQdur*timesTol) {
mas01cr@239 697 if(usingTimes) {
mas01cr@239 698 VERB_LOG(3,"within duration tolerance.\n");
mas01cr@239 699 }
mas01cr@239 700
mas01cr@239 701 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@239 702 for(j = 0; j <= numVectors - wL; j += HOP_SIZE) {
mas01cr@239 703 for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) {
mas01cr@239 704 double thisDist;
mas01mc@292 705 if(normalizedDistance)
mas01cr@239 706 thisDist = 2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
mas01mc@292 707 else
mas01mc@292 708 if(no_unit_norming)
mas01mc@292 709 thisDist = qnPtr[j]*qnPtr[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k];
mas01mc@292 710 else
mas01mc@292 711 thisDist = DD[j][k];
mas01mc@292 712
mas01cr@239 713 // Power test
mas01cr@239 714 if ((!usingPower) || powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k])) {
mas01cr@239 715 // radius test
mas01mc@292 716 if((!radius) || thisDist <= (radius+O2_DISTANCE_TOLERANCE)) {
mas01cr@239 717 reporter->add_point(track, usingQueryPoint ? queryPoint : j, k, thisDist);
mas01cr@239 718 }
mas01cr@239 719 }
mas01cr@239 720 }
mas01cr@239 721 }
mas01cr@239 722 } // Duration match
mas01cr@239 723 delete_arrays(track, numVectors, D, DD);
mas01cr@239 724 }
mas01cr@239 725 }
mas01cr@239 726
mas01cr@239 727 free(data_buffer);
mas01cr@239 728
mas01cr@239 729 gettimeofday(&tv2,NULL);
mas01cr@239 730 VERB_LOG(1,"elapsed time: %ld msec\n",
mas01cr@239 731 (tv2.tv_sec*1000 + tv2.tv_usec/1000) -
mas01cr@239 732 (tv1.tv_sec*1000 + tv1.tv_usec/1000))
mas01cr@239 733
mas01cr@239 734 // Clean up
mas01cr@239 735 if(query_data)
mas01cr@239 736 delete[] query_data;
mas01cr@239 737 if(qNorm)
mas01cr@239 738 delete[] qNorm;
mas01cr@239 739 if(sNorm)
mas01cr@239 740 delete[] sNorm;
mas01cr@239 741 if(qPower)
mas01cr@239 742 delete[] qPower;
mas01cr@239 743 if(sPower)
mas01cr@239 744 delete[] sPower;
mas01cr@239 745 if(D)
mas01cr@239 746 delete[] D;
mas01cr@239 747 if(DD)
mas01cr@239 748 delete[] DD;
mas01cr@239 749 if(meanDBdur)
mas01cr@239 750 delete[] meanDBdur;
mas01cr@239 751 }
mas01cr@239 752
mas01cr@239 753 // Unit norm block of features
mas01cr@239 754 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){
mas01cr@239 755 unsigned d;
mas01cr@239 756 double L2, *p;
mas01cr@239 757
mas01cr@239 758 VERB_LOG(2, "norming %u vectors...", n);
mas01cr@239 759 while(n--) {
mas01cr@239 760 p = X;
mas01cr@239 761 L2 = 0.0;
mas01cr@239 762 d = dim;
mas01cr@239 763 while(d--) {
mas01cr@239 764 L2 += *p * *p;
mas01cr@239 765 p++;
mas01cr@239 766 }
mas01cr@239 767 if(qNorm) {
mas01cr@239 768 *qNorm++=L2;
mas01cr@239 769 }
mas01cr@239 770 X += dim;
mas01cr@239 771 }
mas01cr@239 772 VERB_LOG(2, "done.\n");
mas01cr@239 773 }
mas01mc@292 774
mas01mc@292 775