annotate query.cpp @ 206:3c7c8b84e4f3 refactoring

Delete pointQuery() and trackPointQuery() New convention for tests: an exit code of 14 means "not applicable", as in n/a = 14/1. Decorate the newly-failing tests with "exit 14".
author mas01cr
date Wed, 28 Nov 2007 17:04:09 +0000
parents 2ea1908707c7
children 861e4bc95547
rev   line source
mas01cr@204 1 #include "audioDB.h"
mas01cr@204 2
mas01cr@204 3 bool audioDB::powers_acceptable(double p1, double p2) {
mas01cr@204 4 if (use_absolute_threshold) {
mas01cr@204 5 if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) {
mas01cr@204 6 return false;
mas01cr@204 7 }
mas01cr@204 8 }
mas01cr@204 9 if (use_relative_threshold) {
mas01cr@204 10 if (fabs(p1-p2) > fabs(relative_threshold)) {
mas01cr@204 11 return false;
mas01cr@204 12 }
mas01cr@204 13 }
mas01cr@204 14 return true;
mas01cr@204 15 }
mas01cr@204 16
mas01cr@206 17 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
mas01cr@206 18 switch(queryType) {
mas01cr@204 19 case O2_SEQUENCE_QUERY:
mas01cr@204 20 if(radius==0)
mas01cr@204 21 trackSequenceQueryNN(dbName, inFile, adbQueryResponse);
mas01cr@204 22 else
mas01cr@204 23 trackSequenceQueryRad(dbName, inFile, adbQueryResponse);
mas01cr@204 24 break;
mas01cr@204 25 default:
mas01cr@204 26 error("unrecognized queryType in query()");
mas01cr@204 27 }
mas01cr@204 28 }
mas01cr@204 29
mas01cr@206 30 // return ordinal position of key in keyTable
mas01cr@204 31 unsigned audioDB::getKeyPos(char* key){
mas01cr@204 32 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@204 33 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0)
mas01cr@204 34 return k;
mas01cr@204 35 error("Key not found",key);
mas01cr@204 36 return O2_ERR_KEYNOTFOUND;
mas01cr@204 37 }
mas01cr@204 38
mas01cr@204 39 // This is a common pattern in sequence queries: what we are doing is
mas01cr@204 40 // taking a window of length seqlen over a buffer of length length,
mas01cr@204 41 // and placing the sum of the elements in that window in the first
mas01cr@204 42 // element of the window: thus replacing all but the last seqlen
mas01cr@204 43 // elements in the buffer the corresponding windowed sum.
mas01cr@204 44 void audioDB::sequence_sum(double *buffer, int length, int seqlen) {
mas01cr@204 45 double tmp1, tmp2, *ps;
mas01cr@204 46 int j, w;
mas01cr@204 47
mas01cr@204 48 tmp1 = *buffer;
mas01cr@204 49 j = 1;
mas01cr@204 50 w = seqlen - 1;
mas01cr@204 51 while(w--) {
mas01cr@204 52 *buffer += buffer[j++];
mas01cr@204 53 }
mas01cr@204 54 ps = buffer + 1;
mas01cr@204 55 w = length - seqlen; // +1 - 1
mas01cr@204 56 while(w--) {
mas01cr@204 57 tmp2 = *ps;
mas01cr@204 58 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1);
mas01cr@204 59 tmp1 = tmp2;
mas01cr@204 60 ps++;
mas01cr@204 61 }
mas01cr@204 62 }
mas01cr@204 63
mas01cr@204 64 void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) {
mas01cr@204 65 int w = length - seqlen + 1;
mas01cr@204 66 while(w--) {
mas01cr@204 67 *buffer = sqrt(*buffer);
mas01cr@204 68 buffer++;
mas01cr@204 69 }
mas01cr@204 70 }
mas01cr@204 71
mas01cr@204 72 void audioDB::sequence_average(double *buffer, int length, int seqlen) {
mas01cr@204 73 int w = length - seqlen + 1;
mas01cr@204 74 while(w--) {
mas01cr@204 75 *buffer /= seqlen;
mas01cr@204 76 buffer++;
mas01cr@204 77 }
mas01cr@204 78 }
mas01cr@204 79
mas01cr@204 80 // k nearest-neighbor (k-NN) search between query and target tracks
mas01cr@204 81 // efficient implementation based on matched filter
mas01cr@204 82 // assumes normed shingles
mas01cr@204 83 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track
mas01cr@204 84 void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
mas01cr@204 85
mas01cr@204 86 initTables(dbName, inFile);
mas01cr@204 87
mas01cr@204 88 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@204 89 // we use stdout in this stub version
mas01cr@204 90 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@204 91 double* query = (double*)(indata+sizeof(int));
mas01cr@204 92 double* queryCopy = 0;
mas01cr@204 93
mas01cr@204 94 if(!(dbH->flags & O2_FLAG_L2NORM) )
mas01cr@204 95 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01cr@204 96
mas01cr@204 97 if(numVectors<sequenceLength)
mas01cr@204 98 error("Query shorter than requested sequence length", "maybe use -l");
mas01cr@204 99
mas01cr@204 100 if(verbosity>1) {
mas01cr@204 101 std::cerr << "performing norms ... "; std::cerr.flush();
mas01cr@204 102 }
mas01cr@204 103 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01cr@204 104
mas01cr@204 105 // Make a copy of the query
mas01cr@204 106 queryCopy = new double[numVectors*dbH->dim];
mas01cr@204 107 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@204 108 qNorm = new double[numVectors];
mas01cr@204 109 sNorm = new double[dbVectors];
mas01cr@204 110 assert(qNorm&&sNorm&&queryCopy&&sequenceLength);
mas01cr@204 111 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@204 112 query = queryCopy;
mas01cr@204 113
mas01cr@204 114 // Make norm measurements relative to sequenceLength
mas01cr@204 115 unsigned w = sequenceLength-1;
mas01cr@204 116 unsigned i,j;
mas01cr@204 117
mas01cr@204 118 // Copy the L2 norm values to core to avoid disk random access later on
mas01cr@204 119 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
mas01cr@204 120 double* qnPtr = qNorm;
mas01cr@204 121 double* snPtr = sNorm;
mas01cr@204 122
mas01cr@204 123 double *sPower = 0, *qPower = 0;
mas01cr@204 124 double *spPtr = 0, *qpPtr = 0;
mas01cr@204 125
mas01cr@204 126 if (usingPower) {
mas01cr@204 127 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@204 128 error("database not power-enabled", dbName);
mas01cr@204 129 }
mas01cr@204 130 sPower = new double[dbVectors];
mas01cr@204 131 spPtr = sPower;
mas01cr@204 132 memcpy(sPower, powerTable, dbVectors * sizeof(double));
mas01cr@204 133 }
mas01cr@204 134
mas01cr@204 135 for(i=0; i<dbH->numFiles; i++){
mas01cr@204 136 if(trackTable[i]>=sequenceLength) {
mas01cr@204 137 sequence_sum(snPtr, trackTable[i], sequenceLength);
mas01cr@204 138 sequence_sqrt(snPtr, trackTable[i], sequenceLength);
mas01cr@204 139
mas01cr@204 140 if (usingPower) {
mas01cr@204 141 sequence_sum(spPtr, trackTable[i], sequenceLength);
mas01cr@204 142 sequence_average(spPtr, trackTable[i], sequenceLength);
mas01cr@204 143 }
mas01cr@204 144 }
mas01cr@204 145 snPtr += trackTable[i];
mas01cr@204 146 if (usingPower) {
mas01cr@204 147 spPtr += trackTable[i];
mas01cr@204 148 }
mas01cr@204 149 }
mas01cr@204 150
mas01cr@204 151 sequence_sum(qnPtr, numVectors, sequenceLength);
mas01cr@204 152 sequence_sqrt(qnPtr, numVectors, sequenceLength);
mas01cr@204 153
mas01cr@204 154 if (usingPower) {
mas01cr@204 155 qPower = new double[numVectors];
mas01cr@204 156 qpPtr = qPower;
mas01cr@204 157 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
mas01cr@204 158 error("error seeking to data", powerFileName, "lseek");
mas01cr@204 159 }
mas01cr@204 160 int count = read(powerfd, qPower, numVectors * sizeof(double));
mas01cr@204 161 if (count == -1) {
mas01cr@204 162 error("error reading data", powerFileName, "read");
mas01cr@204 163 }
mas01cr@204 164 if ((unsigned) count != numVectors * sizeof(double)) {
mas01cr@204 165 error("short read", powerFileName);
mas01cr@204 166 }
mas01cr@204 167
mas01cr@204 168 sequence_sum(qpPtr, numVectors, sequenceLength);
mas01cr@204 169 sequence_average(qpPtr, numVectors, sequenceLength);
mas01cr@204 170 }
mas01cr@204 171
mas01cr@204 172 if(verbosity>1) {
mas01cr@204 173 std::cerr << "done." << std::endl;
mas01cr@204 174 }
mas01cr@204 175
mas01cr@204 176 if(verbosity>1) {
mas01cr@204 177 std::cerr << "matching tracks..." << std::endl;
mas01cr@204 178 }
mas01cr@204 179
mas01cr@204 180 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@204 181 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@204 182
mas01cr@204 183 // Make temporary dynamic memory for results
mas01cr@204 184 double trackDistances[trackNN];
mas01cr@204 185 unsigned trackIDs[trackNN];
mas01cr@204 186 unsigned trackQIndexes[trackNN];
mas01cr@204 187 unsigned trackSIndexes[trackNN];
mas01cr@204 188
mas01cr@204 189 double distances[pointNN];
mas01cr@204 190 unsigned qIndexes[pointNN];
mas01cr@204 191 unsigned sIndexes[pointNN];
mas01cr@204 192
mas01cr@204 193
mas01cr@204 194 unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
mas01cr@204 195 double thisDist;
mas01cr@204 196
mas01cr@204 197 for(k=0; k<pointNN; k++){
mas01cr@204 198 distances[k]=1.0e6;
mas01cr@204 199 qIndexes[k]=~0;
mas01cr@204 200 sIndexes[k]=~0;
mas01cr@204 201 }
mas01cr@204 202
mas01cr@204 203 for(k=0; k<trackNN; k++){
mas01cr@204 204 trackDistances[k]=1.0e6;
mas01cr@204 205 trackQIndexes[k]=~0;
mas01cr@204 206 trackSIndexes[k]=~0;
mas01cr@204 207 trackIDs[k]=~0;
mas01cr@204 208 }
mas01cr@204 209
mas01cr@204 210 // Timestamp and durations processing
mas01cr@204 211 double meanQdur = 0;
mas01cr@204 212 double *timesdata = 0;
mas01cr@204 213 double *querydurs = 0;
mas01cr@204 214 double *meanDBdur = 0;
mas01cr@204 215
mas01cr@204 216 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@204 217 std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl;
mas01cr@204 218 usingTimes=0;
mas01cr@204 219 }
mas01cr@204 220
mas01cr@204 221 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@204 222 std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl;
mas01cr@204 223
mas01cr@204 224 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@204 225 timesdata = new double[2*numVectors];
mas01cr@204 226 querydurs = new double[numVectors];
mas01cr@204 227
mas01cr@204 228 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@204 229 // Calculate durations of points
mas01cr@204 230 for(k=0; k<numVectors-1; k++) {
mas01cr@204 231 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01cr@204 232 meanQdur += querydurs[k];
mas01cr@204 233 }
mas01cr@204 234 meanQdur/=k;
mas01cr@204 235 if(verbosity>1) {
mas01cr@204 236 std::cerr << "mean query file duration: " << meanQdur << std::endl;
mas01cr@204 237 }
mas01cr@204 238 meanDBdur = new double[dbH->numFiles];
mas01cr@204 239 assert(meanDBdur);
mas01cr@204 240 for(k=0; k<dbH->numFiles; k++){
mas01cr@204 241 meanDBdur[k]=0.0;
mas01cr@204 242 for(j=0; j<trackTable[k]-1 ; j++) {
mas01cr@204 243 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j];
mas01cr@204 244 }
mas01cr@204 245 meanDBdur[k]/=j;
mas01cr@204 246 }
mas01cr@204 247 }
mas01cr@204 248
mas01cr@204 249 if(usingQueryPoint)
mas01cr@204 250 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
mas01cr@204 251 error("queryPoint > numVectors-wL+1 in query");
mas01cr@204 252 else{
mas01cr@204 253 if(verbosity>1) {
mas01cr@204 254 std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush();
mas01cr@204 255 }
mas01cr@204 256 query = query + queryPoint * dbH->dim;
mas01cr@204 257 qnPtr = qnPtr + queryPoint;
mas01cr@204 258 if (usingPower) {
mas01cr@204 259 qpPtr = qpPtr + queryPoint;
mas01cr@204 260 }
mas01cr@204 261 numVectors=wL;
mas01cr@204 262 }
mas01cr@204 263
mas01cr@204 264 double ** D = 0; // Differences query and target
mas01cr@204 265 double ** DD = 0; // Matched filter distance
mas01cr@204 266
mas01cr@204 267 D = new double*[numVectors];
mas01cr@204 268 assert(D);
mas01cr@204 269 DD = new double*[numVectors];
mas01cr@204 270 assert(DD);
mas01cr@204 271
mas01cr@204 272 gettimeofday(&tv1, NULL);
mas01cr@204 273 unsigned processedTracks = 0;
mas01cr@204 274 unsigned successfulTracks=0;
mas01cr@204 275
mas01cr@204 276 double* qp;
mas01cr@204 277 double* sp;
mas01cr@204 278 double* dp;
mas01cr@204 279
mas01cr@204 280 // build track offset table
mas01cr@204 281 off_t *trackOffsetTable = new off_t[dbH->numFiles];
mas01cr@204 282 unsigned cumTrack=0;
mas01cr@204 283 off_t trackIndexOffset;
mas01cr@204 284 for(k=0; k<dbH->numFiles;k++){
mas01cr@204 285 trackOffsetTable[k]=cumTrack;
mas01cr@204 286 cumTrack+=trackTable[k]*dbH->dim;
mas01cr@204 287 }
mas01cr@204 288
mas01cr@204 289 char nextKey [MAXSTR];
mas01cr@204 290
mas01cr@204 291 // chi^2 statistics
mas01cr@204 292 double sampleCount = 0;
mas01cr@204 293 double sampleSum = 0;
mas01cr@204 294 double logSampleSum = 0;
mas01cr@204 295 double minSample = 1e9;
mas01cr@204 296 double maxSample = 0;
mas01cr@204 297
mas01cr@204 298 // Track loop
mas01cr@204 299 size_t data_buffer_size = 0;
mas01cr@204 300 double *data_buffer = 0;
mas01cr@204 301 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01cr@204 302
mas01cr@204 303 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) {
mas01cr@204 304
mas01cr@204 305 trackOffset = trackOffsetTable[track]; // numDoubles offset
mas01cr@204 306
mas01cr@204 307 // get trackID from file if using a control file
mas01cr@204 308 if(trackFile) {
mas01cr@204 309 trackFile->getline(nextKey,MAXSTR);
mas01cr@204 310 if(!trackFile->eof()) {
mas01cr@204 311 track = getKeyPos(nextKey);
mas01cr@204 312 trackOffset = trackOffsetTable[track];
mas01cr@204 313 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01cr@204 314 } else {
mas01cr@204 315 break;
mas01cr@204 316 }
mas01cr@204 317 }
mas01cr@204 318
mas01cr@204 319 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@204 320
mas01cr@204 321 if(sequenceLength<=trackTable[track]){ // test for short sequences
mas01cr@204 322
mas01cr@204 323 if(verbosity>7) {
mas01cr@204 324 std::cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";std::cerr.flush();
mas01cr@204 325 }
mas01cr@204 326
mas01cr@204 327 // Sum products matrix
mas01cr@204 328 for(j=0; j<numVectors;j++){
mas01cr@204 329 D[j]=new double[trackTable[track]];
mas01cr@204 330 assert(D[j]);
mas01cr@204 331
mas01cr@204 332 }
mas01cr@204 333
mas01cr@204 334 // Matched filter matrix
mas01cr@204 335 for(j=0; j<numVectors;j++){
mas01cr@204 336 DD[j]=new double[trackTable[track]];
mas01cr@204 337 assert(DD[j]);
mas01cr@204 338 }
mas01cr@204 339
mas01cr@204 340 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
mas01cr@204 341 if(data_buffer) {
mas01cr@204 342 free(data_buffer);
mas01cr@204 343 }
mas01cr@204 344 {
mas01cr@204 345 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
mas01cr@204 346 void *tmp = malloc(data_buffer_size);
mas01cr@204 347 if (tmp == NULL) {
mas01cr@204 348 error("error allocating data buffer");
mas01cr@204 349 }
mas01cr@204 350 data_buffer = (double *) tmp;
mas01cr@204 351 }
mas01cr@204 352 }
mas01cr@204 353
mas01cr@204 354 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
mas01cr@204 355
mas01cr@204 356 // Dot product
mas01cr@204 357 for(j=0; j<numVectors; j++)
mas01cr@204 358 for(k=0; k<trackTable[track]; k++){
mas01cr@204 359 qp=query+j*dbH->dim;
mas01cr@204 360 sp=data_buffer+k*dbH->dim;
mas01cr@204 361 DD[j][k]=0.0; // Initialize matched filter array
mas01cr@204 362 dp=&D[j][k]; // point to correlation cell j,k
mas01cr@204 363 *dp=0.0; // initialize correlation cell
mas01cr@204 364 l=dbH->dim; // size of vectors
mas01cr@204 365 while(l--)
mas01cr@204 366 *dp+=*qp++**sp++;
mas01cr@204 367 }
mas01cr@204 368
mas01cr@204 369 // Matched Filter
mas01cr@204 370 // HOP SIZE == 1
mas01cr@204 371 double* spd;
mas01cr@204 372 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
mas01cr@204 373 for(w=0; w<wL; w++)
mas01cr@204 374 for(j=0; j<numVectors-w; j++){
mas01cr@204 375 sp=DD[j];
mas01cr@204 376 spd=D[j+w]+w;
mas01cr@204 377 k=trackTable[track]-w;
mas01cr@204 378 while(k--)
mas01cr@204 379 *sp+++=*spd++;
mas01cr@204 380 }
mas01cr@204 381 }
mas01cr@204 382
mas01cr@204 383 else{ // HOP_SIZE != 1
mas01cr@204 384 for(w=0; w<wL; w++)
mas01cr@204 385 for(j=0; j<numVectors-w; j+=HOP_SIZE){
mas01cr@204 386 sp=DD[j];
mas01cr@204 387 spd=D[j+w]+w;
mas01cr@204 388 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
mas01cr@204 389 *sp+=*spd;
mas01cr@204 390 sp+=HOP_SIZE;
mas01cr@204 391 spd+=HOP_SIZE;
mas01cr@204 392 }
mas01cr@204 393 }
mas01cr@204 394 }
mas01cr@204 395
mas01cr@204 396 if(verbosity>3 && usingTimes) {
mas01cr@204 397 std::cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << std::endl;
mas01cr@204 398 std::cerr.flush();
mas01cr@204 399 }
mas01cr@204 400
mas01cr@204 401 if(!usingTimes ||
mas01cr@204 402 (usingTimes
mas01cr@204 403 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
mas01cr@204 404
mas01cr@204 405 if(verbosity>3 && usingTimes) {
mas01cr@204 406 std::cerr << "within duration tolerance." << std::endl;
mas01cr@204 407 std::cerr.flush();
mas01cr@204 408 }
mas01cr@204 409
mas01cr@204 410 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@204 411 for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
mas01cr@204 412 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
mas01cr@204 413 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
mas01cr@204 414 if(verbosity>9) {
mas01cr@204 415 std::cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << std::endl;
mas01cr@204 416 }
mas01cr@204 417 // Gather chi^2 statistics
mas01cr@204 418 if(thisDist<minSample)
mas01cr@204 419 minSample=thisDist;
mas01cr@204 420 else if(thisDist>maxSample)
mas01cr@204 421 maxSample=thisDist;
mas01cr@204 422 if(thisDist>1e-9){
mas01cr@204 423 sampleCount++;
mas01cr@204 424 sampleSum+=thisDist;
mas01cr@204 425 logSampleSum+=log(thisDist);
mas01cr@204 426 }
mas01cr@204 427
mas01cr@204 428 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]);
mas01cr@204 429 // Power test
mas01cr@204 430 if (usingPower) {
mas01cr@204 431 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) {
mas01cr@204 432 thisDist = 1000000.0;
mas01cr@204 433 }
mas01cr@204 434 }
mas01cr@204 435
mas01cr@204 436 // k-NN match algorithm
mas01cr@204 437 m=pointNN;
mas01cr@204 438 while(m--){
mas01cr@204 439 if(thisDist<=distances[m])
mas01cr@204 440 if(m==0 || thisDist>=distances[m-1]){
mas01cr@204 441 // Shuffle distances up the list
mas01cr@204 442 for(l=pointNN-1; l>m; l--){
mas01cr@204 443 distances[l]=distances[l-1];
mas01cr@204 444 qIndexes[l]=qIndexes[l-1];
mas01cr@204 445 sIndexes[l]=sIndexes[l-1];
mas01cr@204 446 }
mas01cr@204 447 distances[m]=thisDist;
mas01cr@204 448 if(usingQueryPoint)
mas01cr@204 449 qIndexes[m]=queryPoint;
mas01cr@204 450 else
mas01cr@204 451 qIndexes[m]=j;
mas01cr@204 452 sIndexes[m]=k;
mas01cr@204 453 break;
mas01cr@204 454 }
mas01cr@204 455 }
mas01cr@204 456 }
mas01cr@204 457 // Calculate the mean of the N-Best matches
mas01cr@204 458 thisDist=0.0;
mas01cr@204 459 for(m=0; m<pointNN; m++) {
mas01cr@204 460 if (distances[m] == 1000000.0) break;
mas01cr@204 461 thisDist+=distances[m];
mas01cr@204 462 }
mas01cr@204 463 thisDist/=m;
mas01cr@204 464
mas01cr@204 465 // Let's see the distances then...
mas01cr@204 466 if(verbosity>3) {
mas01cr@204 467 std::cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << std::endl;
mas01cr@204 468 }
mas01cr@204 469
mas01cr@204 470
mas01cr@204 471 // All the track stuff goes here
mas01cr@204 472 n=trackNN;
mas01cr@204 473 while(n--){
mas01cr@204 474 if(thisDist<=trackDistances[n]){
mas01cr@204 475 if((n==0 || thisDist>=trackDistances[n-1])){
mas01cr@204 476 // Copy all values above up the queue
mas01cr@204 477 for( l=trackNN-1 ; l > n ; l--){
mas01cr@204 478 trackDistances[l]=trackDistances[l-1];
mas01cr@204 479 trackQIndexes[l]=trackQIndexes[l-1];
mas01cr@204 480 trackSIndexes[l]=trackSIndexes[l-1];
mas01cr@204 481 trackIDs[l]=trackIDs[l-1];
mas01cr@204 482 }
mas01cr@204 483 trackDistances[n]=thisDist;
mas01cr@204 484 trackQIndexes[n]=qIndexes[0];
mas01cr@204 485 trackSIndexes[n]=sIndexes[0];
mas01cr@204 486 successfulTracks++;
mas01cr@204 487 trackIDs[n]=track;
mas01cr@204 488 break;
mas01cr@204 489 }
mas01cr@204 490 }
mas01cr@204 491 else
mas01cr@204 492 break;
mas01cr@204 493 }
mas01cr@204 494 } // Duration match
mas01cr@204 495
mas01cr@204 496 // Clean up current track
mas01cr@204 497 if(D!=NULL){
mas01cr@204 498 for(j=0; j<numVectors; j++)
mas01cr@204 499 delete[] D[j];
mas01cr@204 500 }
mas01cr@204 501
mas01cr@204 502 if(DD!=NULL){
mas01cr@204 503 for(j=0; j<numVectors; j++)
mas01cr@204 504 delete[] DD[j];
mas01cr@204 505 }
mas01cr@204 506 }
mas01cr@204 507 // per-track reset array values
mas01cr@204 508 for(unsigned k=0; k<pointNN; k++){
mas01cr@204 509 distances[k]=1.0e6;
mas01cr@204 510 qIndexes[k]=~0;
mas01cr@204 511 sIndexes[k]=~0;
mas01cr@204 512 }
mas01cr@204 513 }
mas01cr@204 514
mas01cr@204 515 free(data_buffer);
mas01cr@204 516
mas01cr@204 517 gettimeofday(&tv2,NULL);
mas01cr@204 518 if(verbosity>1) {
mas01cr@204 519 std::cerr << std::endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
mas01cr@204 520 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl;
mas01cr@204 521 std::cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
mas01cr@204 522 << " minSample: " << minSample << " maxSample: " << maxSample << std::endl;
mas01cr@204 523 }
mas01cr@204 524 if(adbQueryResponse==0){
mas01cr@204 525 if(verbosity>1) {
mas01cr@204 526 std::cerr<<std::endl;
mas01cr@204 527 }
mas01cr@204 528 // Output answer
mas01cr@204 529 // Loop over nearest neighbours
mas01cr@204 530 for(k=0; k < std::min(trackNN,successfulTracks); k++)
mas01cr@204 531 std::cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << " "
mas01cr@204 532 << trackQIndexes[k] << " " << trackSIndexes[k] << std::endl;
mas01cr@204 533 }
mas01cr@204 534 else{ // Process Web Services Query
mas01cr@204 535 int listLen = std::min(trackNN, processedTracks);
mas01cr@204 536 adbQueryResponse->result.__sizeRlist=listLen;
mas01cr@204 537 adbQueryResponse->result.__sizeDist=listLen;
mas01cr@204 538 adbQueryResponse->result.__sizeQpos=listLen;
mas01cr@204 539 adbQueryResponse->result.__sizeSpos=listLen;
mas01cr@204 540 adbQueryResponse->result.Rlist= new char*[listLen];
mas01cr@204 541 adbQueryResponse->result.Dist = new double[listLen];
mas01cr@204 542 adbQueryResponse->result.Qpos = new unsigned int[listLen];
mas01cr@204 543 adbQueryResponse->result.Spos = new unsigned int[listLen];
mas01cr@204 544 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
mas01cr@204 545 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@204 546 adbQueryResponse->result.Dist[k]=trackDistances[k];
mas01cr@204 547 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
mas01cr@204 548 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
mas01cr@204 549 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
mas01cr@204 550 }
mas01cr@204 551 }
mas01cr@204 552
mas01cr@204 553 // Clean up
mas01cr@204 554 if(trackOffsetTable)
mas01cr@204 555 delete[] trackOffsetTable;
mas01cr@204 556 if(queryCopy)
mas01cr@204 557 delete[] queryCopy;
mas01cr@204 558 if(qNorm)
mas01cr@204 559 delete[] qNorm;
mas01cr@204 560 if(sNorm)
mas01cr@204 561 delete[] sNorm;
mas01cr@204 562 if(qPower)
mas01cr@204 563 delete[] qPower;
mas01cr@204 564 if(sPower)
mas01cr@204 565 delete[] sPower;
mas01cr@204 566 if(D)
mas01cr@204 567 delete[] D;
mas01cr@204 568 if(DD)
mas01cr@204 569 delete[] DD;
mas01cr@204 570 if(timesdata)
mas01cr@204 571 delete[] timesdata;
mas01cr@204 572 if(querydurs)
mas01cr@204 573 delete[] querydurs;
mas01cr@204 574 if(meanDBdur)
mas01cr@204 575 delete[] meanDBdur;
mas01cr@204 576 }
mas01cr@204 577
mas01cr@204 578 // Radius search between query and target tracks
mas01cr@204 579 // efficient implementation based on matched filter
mas01cr@204 580 // assumes normed shingles
mas01cr@204 581 // outputs count of retrieved shingles, max retreived = one shingle per query shingle per track
mas01cr@204 582 void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
mas01cr@204 583
mas01cr@204 584 initTables(dbName, inFile);
mas01cr@204 585
mas01cr@204 586 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@204 587 // we use stdout in this stub version
mas01cr@204 588 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@204 589 double* query = (double*)(indata+sizeof(int));
mas01cr@204 590 double* queryCopy = 0;
mas01cr@204 591
mas01cr@204 592 if(!(dbH->flags & O2_FLAG_L2NORM) )
mas01cr@204 593 error("Database must be L2 normed for sequence query","use -l2norm");
mas01cr@204 594
mas01cr@204 595 if(verbosity>1) {
mas01cr@204 596 std::cerr << "performing norms ... "; std::cerr.flush();
mas01cr@204 597 }
mas01cr@204 598 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01cr@204 599
mas01cr@204 600 // Make a copy of the query
mas01cr@204 601 queryCopy = new double[numVectors*dbH->dim];
mas01cr@204 602 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@204 603 qNorm = new double[numVectors];
mas01cr@204 604 sNorm = new double[dbVectors];
mas01cr@204 605 assert(qNorm&&sNorm&&queryCopy&&sequenceLength);
mas01cr@204 606 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@204 607 query = queryCopy;
mas01cr@204 608
mas01cr@204 609 // Make norm measurements relative to sequenceLength
mas01cr@204 610 unsigned w = sequenceLength-1;
mas01cr@204 611 unsigned i,j;
mas01cr@204 612
mas01cr@204 613 // Copy the L2 norm values to core to avoid disk random access later on
mas01cr@204 614 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
mas01cr@204 615 double* snPtr = sNorm;
mas01cr@204 616 double* qnPtr = qNorm;
mas01cr@204 617
mas01cr@204 618 double *sPower = 0, *qPower = 0;
mas01cr@204 619 double *spPtr = 0, *qpPtr = 0;
mas01cr@204 620
mas01cr@204 621 if (usingPower) {
mas01cr@204 622 if(!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@204 623 error("database not power-enabled", dbName);
mas01cr@204 624 }
mas01cr@204 625 sPower = new double[dbVectors];
mas01cr@204 626 spPtr = sPower;
mas01cr@204 627 memcpy(sPower, powerTable, dbVectors * sizeof(double));
mas01cr@204 628 }
mas01cr@204 629
mas01cr@204 630 for(i=0; i<dbH->numFiles; i++){
mas01cr@204 631 if(trackTable[i]>=sequenceLength) {
mas01cr@204 632 sequence_sum(snPtr, trackTable[i], sequenceLength);
mas01cr@204 633 sequence_sqrt(snPtr, trackTable[i], sequenceLength);
mas01cr@204 634 if (usingPower) {
mas01cr@204 635 sequence_sum(spPtr, trackTable[i], sequenceLength);
mas01cr@204 636 sequence_average(spPtr, trackTable[i], sequenceLength);
mas01cr@204 637 }
mas01cr@204 638 }
mas01cr@204 639 snPtr += trackTable[i];
mas01cr@204 640 if (usingPower) {
mas01cr@204 641 spPtr += trackTable[i];
mas01cr@204 642 }
mas01cr@204 643 }
mas01cr@204 644
mas01cr@204 645 sequence_sum(qnPtr, numVectors, sequenceLength);
mas01cr@204 646 sequence_sqrt(qnPtr, numVectors, sequenceLength);
mas01cr@204 647
mas01cr@204 648 if (usingPower) {
mas01cr@204 649 qPower = new double[numVectors];
mas01cr@204 650 qpPtr = qPower;
mas01cr@204 651 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
mas01cr@204 652 error("error seeking to data", powerFileName, "lseek");
mas01cr@204 653 }
mas01cr@204 654 int count = read(powerfd, qPower, numVectors * sizeof(double));
mas01cr@204 655 if (count == -1) {
mas01cr@204 656 error("error reading data", powerFileName, "read");
mas01cr@204 657 }
mas01cr@204 658 if ((unsigned) count != numVectors * sizeof(double)) {
mas01cr@204 659 error("short read", powerFileName);
mas01cr@204 660 }
mas01cr@204 661
mas01cr@204 662 sequence_sum(qpPtr, numVectors, sequenceLength);
mas01cr@204 663 sequence_average(qpPtr, numVectors, sequenceLength);
mas01cr@204 664 }
mas01cr@204 665
mas01cr@204 666 if(verbosity>1) {
mas01cr@204 667 std::cerr << "done." << std::endl;
mas01cr@204 668 }
mas01cr@204 669
mas01cr@204 670 if(verbosity>1) {
mas01cr@204 671 std::cerr << "matching tracks..." << std::endl;
mas01cr@204 672 }
mas01cr@204 673
mas01cr@204 674 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@204 675 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@204 676
mas01cr@204 677 // Make temporary dynamic memory for results
mas01cr@204 678 double trackDistances[trackNN];
mas01cr@204 679 unsigned trackIDs[trackNN];
mas01cr@204 680 unsigned trackQIndexes[trackNN];
mas01cr@204 681 unsigned trackSIndexes[trackNN];
mas01cr@204 682
mas01cr@204 683 double distances[pointNN];
mas01cr@204 684 unsigned qIndexes[pointNN];
mas01cr@204 685 unsigned sIndexes[pointNN];
mas01cr@204 686
mas01cr@204 687
mas01cr@204 688 unsigned k,l,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
mas01cr@204 689 double thisDist;
mas01cr@204 690
mas01cr@204 691 for(k=0; k<pointNN; k++){
mas01cr@204 692 distances[k]=0.0;
mas01cr@204 693 qIndexes[k]=~0;
mas01cr@204 694 sIndexes[k]=~0;
mas01cr@204 695 }
mas01cr@204 696
mas01cr@204 697 for(k=0; k<trackNN; k++){
mas01cr@204 698 trackDistances[k]=0.0;
mas01cr@204 699 trackQIndexes[k]=~0;
mas01cr@204 700 trackSIndexes[k]=~0;
mas01cr@204 701 trackIDs[k]=~0;
mas01cr@204 702 }
mas01cr@204 703
mas01cr@204 704 // Timestamp and durations processing
mas01cr@204 705 double meanQdur = 0;
mas01cr@204 706 double *timesdata = 0;
mas01cr@204 707 double *querydurs = 0;
mas01cr@204 708 double *meanDBdur = 0;
mas01cr@204 709
mas01cr@204 710 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@204 711 std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl;
mas01cr@204 712 usingTimes=0;
mas01cr@204 713 }
mas01cr@204 714
mas01cr@204 715 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@204 716 std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl;
mas01cr@204 717
mas01cr@204 718 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@204 719 timesdata = new double[2*numVectors];
mas01cr@204 720 querydurs = new double[numVectors];
mas01cr@204 721
mas01cr@204 722 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@204 723 // Calculate durations of points
mas01cr@204 724 for(k=0; k<numVectors-1; k++){
mas01cr@204 725 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01cr@204 726 meanQdur += querydurs[k];
mas01cr@204 727 }
mas01cr@204 728 meanQdur/=k;
mas01cr@204 729 if(verbosity>1) {
mas01cr@204 730 std::cerr << "mean query file duration: " << meanQdur << std::endl;
mas01cr@204 731 }
mas01cr@204 732 meanDBdur = new double[dbH->numFiles];
mas01cr@204 733 assert(meanDBdur);
mas01cr@204 734 for(k=0; k<dbH->numFiles; k++){
mas01cr@204 735 meanDBdur[k]=0.0;
mas01cr@204 736 for(j=0; j<trackTable[k]-1 ; j++) {
mas01cr@204 737 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j];
mas01cr@204 738 }
mas01cr@204 739 meanDBdur[k]/=j;
mas01cr@204 740 }
mas01cr@204 741 }
mas01cr@204 742
mas01cr@204 743 if(usingQueryPoint)
mas01cr@204 744 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
mas01cr@204 745 error("queryPoint > numVectors-wL+1 in query");
mas01cr@204 746 else{
mas01cr@204 747 if(verbosity>1) {
mas01cr@204 748 std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush();
mas01cr@204 749 }
mas01cr@204 750 query = query + queryPoint*dbH->dim;
mas01cr@204 751 qnPtr = qnPtr + queryPoint;
mas01cr@204 752 if (usingPower) {
mas01cr@204 753 qpPtr = qpPtr + queryPoint;
mas01cr@204 754 }
mas01cr@204 755 numVectors=wL;
mas01cr@204 756 }
mas01cr@204 757
mas01cr@204 758 double ** D = 0; // Differences query and target
mas01cr@204 759 double ** DD = 0; // Matched filter distance
mas01cr@204 760
mas01cr@204 761 D = new double*[numVectors];
mas01cr@204 762 assert(D);
mas01cr@204 763 DD = new double*[numVectors];
mas01cr@204 764 assert(DD);
mas01cr@204 765
mas01cr@204 766 gettimeofday(&tv1, NULL);
mas01cr@204 767 unsigned processedTracks = 0;
mas01cr@204 768 unsigned successfulTracks=0;
mas01cr@204 769
mas01cr@204 770 double* qp;
mas01cr@204 771 double* sp;
mas01cr@204 772 double* dp;
mas01cr@204 773
mas01cr@204 774 // build track offset table
mas01cr@204 775 off_t *trackOffsetTable = new off_t[dbH->numFiles];
mas01cr@204 776 unsigned cumTrack=0;
mas01cr@204 777 off_t trackIndexOffset;
mas01cr@204 778 for(k=0; k<dbH->numFiles;k++){
mas01cr@204 779 trackOffsetTable[k]=cumTrack;
mas01cr@204 780 cumTrack+=trackTable[k]*dbH->dim;
mas01cr@204 781 }
mas01cr@204 782
mas01cr@204 783 char nextKey [MAXSTR];
mas01cr@204 784
mas01cr@204 785 // chi^2 statistics
mas01cr@204 786 double sampleCount = 0;
mas01cr@204 787 double sampleSum = 0;
mas01cr@204 788 double logSampleSum = 0;
mas01cr@204 789 double minSample = 1e9;
mas01cr@204 790 double maxSample = 0;
mas01cr@204 791
mas01cr@204 792 // Track loop
mas01cr@204 793 size_t data_buffer_size = 0;
mas01cr@204 794 double *data_buffer = 0;
mas01cr@204 795 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01cr@204 796
mas01cr@204 797 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
mas01cr@204 798
mas01cr@204 799 trackOffset = trackOffsetTable[track]; // numDoubles offset
mas01cr@204 800
mas01cr@204 801 // get trackID from file if using a control file
mas01cr@204 802 if(trackFile) {
mas01cr@204 803 trackFile->getline(nextKey,MAXSTR);
mas01cr@204 804 if(!trackFile->eof()) {
mas01cr@204 805 track = getKeyPos(nextKey);
mas01cr@204 806 trackOffset = trackOffsetTable[track];
mas01cr@204 807 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01cr@204 808 } else {
mas01cr@204 809 break;
mas01cr@204 810 }
mas01cr@204 811 }
mas01cr@204 812
mas01cr@204 813 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@204 814
mas01cr@204 815 if(sequenceLength<=trackTable[track]){ // test for short sequences
mas01cr@204 816
mas01cr@204 817 if(verbosity>7) {
mas01cr@204 818 std::cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";std::cerr.flush();
mas01cr@204 819 }
mas01cr@204 820
mas01cr@204 821 // Sum products matrix
mas01cr@204 822 for(j=0; j<numVectors;j++){
mas01cr@204 823 D[j]=new double[trackTable[track]];
mas01cr@204 824 assert(D[j]);
mas01cr@204 825
mas01cr@204 826 }
mas01cr@204 827
mas01cr@204 828 // Matched filter matrix
mas01cr@204 829 for(j=0; j<numVectors;j++){
mas01cr@204 830 DD[j]=new double[trackTable[track]];
mas01cr@204 831 assert(DD[j]);
mas01cr@204 832 }
mas01cr@204 833
mas01cr@204 834 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
mas01cr@204 835 if(data_buffer) {
mas01cr@204 836 free(data_buffer);
mas01cr@204 837 }
mas01cr@204 838 {
mas01cr@204 839 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
mas01cr@204 840 void *tmp = malloc(data_buffer_size);
mas01cr@204 841 if (tmp == NULL) {
mas01cr@204 842 error("error allocating data buffer");
mas01cr@204 843 }
mas01cr@204 844 data_buffer = (double *) tmp;
mas01cr@204 845 }
mas01cr@204 846 }
mas01cr@204 847
mas01cr@204 848 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
mas01cr@204 849
mas01cr@204 850 // Dot product
mas01cr@204 851 for(j=0; j<numVectors; j++)
mas01cr@204 852 for(k=0; k<trackTable[track]; k++){
mas01cr@204 853 qp=query+j*dbH->dim;
mas01cr@204 854 sp=data_buffer+k*dbH->dim;
mas01cr@204 855 DD[j][k]=0.0; // Initialize matched filter array
mas01cr@204 856 dp=&D[j][k]; // point to correlation cell j,k
mas01cr@204 857 *dp=0.0; // initialize correlation cell
mas01cr@204 858 l=dbH->dim; // size of vectors
mas01cr@204 859 while(l--)
mas01cr@204 860 *dp+=*qp++**sp++;
mas01cr@204 861 }
mas01cr@204 862
mas01cr@204 863 // Matched Filter
mas01cr@204 864 // HOP SIZE == 1
mas01cr@204 865 double* spd;
mas01cr@204 866 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
mas01cr@204 867 for(w=0; w<wL; w++)
mas01cr@204 868 for(j=0; j<numVectors-w; j++){
mas01cr@204 869 sp=DD[j];
mas01cr@204 870 spd=D[j+w]+w;
mas01cr@204 871 k=trackTable[track]-w;
mas01cr@204 872 while(k--)
mas01cr@204 873 *sp+++=*spd++;
mas01cr@204 874 }
mas01cr@204 875 }
mas01cr@204 876
mas01cr@204 877 else{ // HOP_SIZE != 1
mas01cr@204 878 for(w=0; w<wL; w++)
mas01cr@204 879 for(j=0; j<numVectors-w; j+=HOP_SIZE){
mas01cr@204 880 sp=DD[j];
mas01cr@204 881 spd=D[j+w]+w;
mas01cr@204 882 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
mas01cr@204 883 *sp+=*spd;
mas01cr@204 884 sp+=HOP_SIZE;
mas01cr@204 885 spd+=HOP_SIZE;
mas01cr@204 886 }
mas01cr@204 887 }
mas01cr@204 888 }
mas01cr@204 889
mas01cr@204 890 if(verbosity>3 && usingTimes) {
mas01cr@204 891 std::cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << std::endl;
mas01cr@204 892 std::cerr.flush();
mas01cr@204 893 }
mas01cr@204 894
mas01cr@204 895 if(!usingTimes ||
mas01cr@204 896 (usingTimes
mas01cr@204 897 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
mas01cr@204 898
mas01cr@204 899 if(verbosity>3 && usingTimes) {
mas01cr@204 900 std::cerr << "within duration tolerance." << std::endl;
mas01cr@204 901 std::cerr.flush();
mas01cr@204 902 }
mas01cr@204 903
mas01cr@204 904 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@204 905 for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
mas01cr@204 906 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
mas01cr@204 907 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
mas01cr@204 908 if(verbosity>9) {
mas01cr@204 909 std::cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << std::endl;
mas01cr@204 910 }
mas01cr@204 911 // Gather chi^2 statistics
mas01cr@204 912 if(thisDist<minSample)
mas01cr@204 913 minSample=thisDist;
mas01cr@204 914 else if(thisDist>maxSample)
mas01cr@204 915 maxSample=thisDist;
mas01cr@204 916 if(thisDist>1e-9){
mas01cr@204 917 sampleCount++;
mas01cr@204 918 sampleSum+=thisDist;
mas01cr@204 919 logSampleSum+=log(thisDist);
mas01cr@204 920 }
mas01cr@204 921
mas01cr@204 922 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]);
mas01cr@204 923 // Power test
mas01cr@204 924 if (usingPower) {
mas01cr@204 925 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) {
mas01cr@204 926 thisDist = 1000000.0;
mas01cr@204 927 }
mas01cr@204 928 }
mas01cr@204 929
mas01cr@204 930 if(thisDist>=0 && thisDist<=radius){
mas01cr@204 931 distances[0]++; // increment count
mas01cr@204 932 break; // only need one track point per query point
mas01cr@204 933 }
mas01cr@204 934 }
mas01cr@204 935 // How many points were below threshold ?
mas01cr@204 936 thisDist=distances[0];
mas01cr@204 937
mas01cr@204 938 // Let's see the distances then...
mas01cr@204 939 if(verbosity>3) {
mas01cr@204 940 std::cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << std::endl;
mas01cr@204 941 }
mas01cr@204 942
mas01cr@204 943 // All the track stuff goes here
mas01cr@204 944 n=trackNN;
mas01cr@204 945 while(n--){
mas01cr@204 946 if(thisDist>trackDistances[n]){
mas01cr@204 947 if((n==0 || thisDist<=trackDistances[n-1])){
mas01cr@204 948 // Copy all values above up the queue
mas01cr@204 949 for( l=trackNN-1 ; l > n ; l--){
mas01cr@204 950 trackDistances[l]=trackDistances[l-1];
mas01cr@204 951 trackQIndexes[l]=trackQIndexes[l-1];
mas01cr@204 952 trackSIndexes[l]=trackSIndexes[l-1];
mas01cr@204 953 trackIDs[l]=trackIDs[l-1];
mas01cr@204 954 }
mas01cr@204 955 trackDistances[n]=thisDist;
mas01cr@204 956 trackQIndexes[n]=qIndexes[0];
mas01cr@204 957 trackSIndexes[n]=sIndexes[0];
mas01cr@204 958 successfulTracks++;
mas01cr@204 959 trackIDs[n]=track;
mas01cr@204 960 break;
mas01cr@204 961 }
mas01cr@204 962 }
mas01cr@204 963 else
mas01cr@204 964 break;
mas01cr@204 965 }
mas01cr@204 966 } // Duration match
mas01cr@204 967
mas01cr@204 968 // Clean up current track
mas01cr@204 969 if(D!=NULL){
mas01cr@204 970 for(j=0; j<numVectors; j++)
mas01cr@204 971 delete[] D[j];
mas01cr@204 972 }
mas01cr@204 973
mas01cr@204 974 if(DD!=NULL){
mas01cr@204 975 for(j=0; j<numVectors; j++)
mas01cr@204 976 delete[] DD[j];
mas01cr@204 977 }
mas01cr@204 978 }
mas01cr@204 979 // per-track reset array values
mas01cr@204 980 for(unsigned k=0; k<pointNN; k++){
mas01cr@204 981 distances[k]=0.0;
mas01cr@204 982 qIndexes[k]=~0;
mas01cr@204 983 sIndexes[k]=~0;
mas01cr@204 984 }
mas01cr@204 985 }
mas01cr@204 986
mas01cr@204 987 free(data_buffer);
mas01cr@204 988
mas01cr@204 989 gettimeofday(&tv2,NULL);
mas01cr@204 990 if(verbosity>1) {
mas01cr@204 991 std::cerr << std::endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
mas01cr@204 992 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl;
mas01cr@204 993 std::cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
mas01cr@204 994 << " minSample: " << minSample << " maxSample: " << maxSample << std::endl;
mas01cr@204 995 }
mas01cr@204 996
mas01cr@204 997 if(adbQueryResponse==0){
mas01cr@204 998 if(verbosity>1) {
mas01cr@204 999 std::cerr<<std::endl;
mas01cr@204 1000 }
mas01cr@204 1001 // Output answer
mas01cr@204 1002 // Loop over nearest neighbours
mas01cr@204 1003 for(k=0; k < std::min(trackNN,successfulTracks); k++)
mas01cr@204 1004 std::cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << std::endl;
mas01cr@204 1005 }
mas01cr@204 1006 else{ // Process Web Services Query
mas01cr@204 1007 int listLen = std::min(trackNN, processedTracks);
mas01cr@204 1008 adbQueryResponse->result.__sizeRlist=listLen;
mas01cr@204 1009 adbQueryResponse->result.__sizeDist=listLen;
mas01cr@204 1010 adbQueryResponse->result.__sizeQpos=listLen;
mas01cr@204 1011 adbQueryResponse->result.__sizeSpos=listLen;
mas01cr@204 1012 adbQueryResponse->result.Rlist= new char*[listLen];
mas01cr@204 1013 adbQueryResponse->result.Dist = new double[listLen];
mas01cr@204 1014 adbQueryResponse->result.Qpos = new unsigned int[listLen];
mas01cr@204 1015 adbQueryResponse->result.Spos = new unsigned int[listLen];
mas01cr@204 1016 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
mas01cr@204 1017 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@204 1018 adbQueryResponse->result.Dist[k]=trackDistances[k];
mas01cr@204 1019 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
mas01cr@204 1020 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
mas01cr@204 1021 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
mas01cr@204 1022 }
mas01cr@204 1023 }
mas01cr@204 1024
mas01cr@204 1025 // Clean up
mas01cr@204 1026 if(trackOffsetTable)
mas01cr@204 1027 delete[] trackOffsetTable;
mas01cr@204 1028 if(queryCopy)
mas01cr@204 1029 delete[] queryCopy;
mas01cr@204 1030 if(qNorm)
mas01cr@204 1031 delete[] qNorm;
mas01cr@204 1032 if(sNorm)
mas01cr@204 1033 delete[] sNorm;
mas01cr@204 1034 if(qPower)
mas01cr@204 1035 delete[] qPower;
mas01cr@204 1036 if(sPower)
mas01cr@204 1037 delete[] sPower;
mas01cr@204 1038 if(D)
mas01cr@204 1039 delete[] D;
mas01cr@204 1040 if(DD)
mas01cr@204 1041 delete[] DD;
mas01cr@204 1042 if(timesdata)
mas01cr@204 1043 delete[] timesdata;
mas01cr@204 1044 if(querydurs)
mas01cr@204 1045 delete[] querydurs;
mas01cr@204 1046 if(meanDBdur)
mas01cr@204 1047 delete[] meanDBdur;
mas01cr@204 1048 }
mas01cr@204 1049
mas01cr@204 1050 // Unit norm block of features
mas01cr@204 1051 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){
mas01cr@204 1052 unsigned d;
mas01cr@204 1053 double L2, *p;
mas01cr@204 1054 if(verbosity>2) {
mas01cr@204 1055 std::cerr << "norming " << n << " vectors...";std::cerr.flush();
mas01cr@204 1056 }
mas01cr@204 1057 while(n--){
mas01cr@204 1058 p=X;
mas01cr@204 1059 L2=0.0;
mas01cr@204 1060 d=dim;
mas01cr@204 1061 while(d--){
mas01cr@204 1062 L2+=*p**p;
mas01cr@204 1063 p++;
mas01cr@204 1064 }
mas01cr@204 1065 /* L2=sqrt(L2);*/
mas01cr@204 1066 if(qNorm)
mas01cr@204 1067 *qNorm++=L2;
mas01cr@204 1068 /*
mas01cr@204 1069 oneOverL2 = 1.0/L2;
mas01cr@204 1070 d=dim;
mas01cr@204 1071 while(d--){
mas01cr@204 1072 *X*=oneOverL2;
mas01cr@204 1073 X++;
mas01cr@204 1074 */
mas01cr@204 1075 X+=dim;
mas01cr@204 1076 }
mas01cr@204 1077 if(verbosity>2) {
mas01cr@204 1078 std::cerr << "done..." << std::endl;
mas01cr@204 1079 }
mas01cr@204 1080 }