annotate query.cpp @ 204:2ea1908707c7 refactoring

Filewise refactor. Break apart huge monolithic audioDB.cpp file into seven broadly independent portions: * SOAP * DB creation * insertion * query * dump * common functionality * constructor functions Remove the "using namespace std" from the header file, though that wasn't actually a problem: the problem in question is solved by including adb.nsmap in only soap.cpp. Makefile improvements.
author mas01cr
date Wed, 28 Nov 2007 15:10:28 +0000
parents
children 3c7c8b84e4f3
rev   line source
mas01cr@204 1 #include "audioDB.h"
mas01cr@204 2
mas01cr@204 3 bool audioDB::powers_acceptable(double p1, double p2) {
mas01cr@204 4 if (use_absolute_threshold) {
mas01cr@204 5 if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) {
mas01cr@204 6 return false;
mas01cr@204 7 }
mas01cr@204 8 }
mas01cr@204 9 if (use_relative_threshold) {
mas01cr@204 10 if (fabs(p1-p2) > fabs(relative_threshold)) {
mas01cr@204 11 return false;
mas01cr@204 12 }
mas01cr@204 13 }
mas01cr@204 14 return true;
mas01cr@204 15 }
mas01cr@204 16
mas01cr@204 17 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
mas01cr@204 18 switch(queryType){
mas01cr@204 19 case O2_POINT_QUERY:
mas01cr@204 20 pointQuery(dbName, inFile, adbQueryResponse);
mas01cr@204 21 break;
mas01cr@204 22 case O2_SEQUENCE_QUERY:
mas01cr@204 23 if(radius==0)
mas01cr@204 24 trackSequenceQueryNN(dbName, inFile, adbQueryResponse);
mas01cr@204 25 else
mas01cr@204 26 trackSequenceQueryRad(dbName, inFile, adbQueryResponse);
mas01cr@204 27 break;
mas01cr@204 28 case O2_TRACK_QUERY:
mas01cr@204 29 trackPointQuery(dbName, inFile, adbQueryResponse);
mas01cr@204 30 break;
mas01cr@204 31 default:
mas01cr@204 32 error("unrecognized queryType in query()");
mas01cr@204 33
mas01cr@204 34 }
mas01cr@204 35 }
mas01cr@204 36
mas01cr@204 37 //return ordinal position of key in keyTable
mas01cr@204 38 unsigned audioDB::getKeyPos(char* key){
mas01cr@204 39 for(unsigned k=0; k<dbH->numFiles; k++)
mas01cr@204 40 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0)
mas01cr@204 41 return k;
mas01cr@204 42 error("Key not found",key);
mas01cr@204 43 return O2_ERR_KEYNOTFOUND;
mas01cr@204 44 }
mas01cr@204 45
mas01cr@204 46 // Basic point query engine
mas01cr@204 47 void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
mas01cr@204 48
mas01cr@204 49 initTables(dbName, inFile);
mas01cr@204 50
mas01cr@204 51 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@204 52 // we use stdout in this stub version
mas01cr@204 53 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@204 54
mas01cr@204 55 double* query = (double*)(indata+sizeof(int));
mas01cr@204 56 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
mas01cr@204 57 double* data = dataBuf;
mas01cr@204 58 double* queryCopy = 0;
mas01cr@204 59
mas01cr@204 60 if( dbH->flags & O2_FLAG_L2NORM ){
mas01cr@204 61 // Make a copy of the query
mas01cr@204 62 queryCopy = new double[numVectors*dbH->dim];
mas01cr@204 63 qNorm = new double[numVectors];
mas01cr@204 64 assert(queryCopy&&qNorm);
mas01cr@204 65 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@204 66 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@204 67 query = queryCopy;
mas01cr@204 68 }
mas01cr@204 69
mas01cr@204 70 // Make temporary dynamic memory for results
mas01cr@204 71 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@204 72 double distances[pointNN];
mas01cr@204 73 unsigned qIndexes[pointNN];
mas01cr@204 74 unsigned sIndexes[pointNN];
mas01cr@204 75 for(unsigned k=0; k<pointNN; k++){
mas01cr@204 76 distances[k]=-DBL_MAX;
mas01cr@204 77 qIndexes[k]=~0;
mas01cr@204 78 sIndexes[k]=~0;
mas01cr@204 79 }
mas01cr@204 80
mas01cr@204 81 unsigned j=numVectors;
mas01cr@204 82 unsigned k,l,n;
mas01cr@204 83 double thisDist;
mas01cr@204 84
mas01cr@204 85 unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double));
mas01cr@204 86 double meanQdur = 0;
mas01cr@204 87 double *timesdata = 0;
mas01cr@204 88 double *querydurs = 0;
mas01cr@204 89 double *dbdurs = 0;
mas01cr@204 90
mas01cr@204 91 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@204 92 std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl;
mas01cr@204 93 usingTimes=0;
mas01cr@204 94 }
mas01cr@204 95
mas01cr@204 96 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@204 97 std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl;
mas01cr@204 98
mas01cr@204 99 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@204 100 timesdata = new double[2*numVectors];
mas01cr@204 101 querydurs = new double[numVectors];
mas01cr@204 102 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@204 103 // Calculate durations of points
mas01cr@204 104 for(k=0; k<numVectors-1; k++){
mas01cr@204 105 querydurs[k]=timesdata[2*k+1]-timesdata[2*k];
mas01cr@204 106 meanQdur+=querydurs[k];
mas01cr@204 107 }
mas01cr@204 108 meanQdur/=k;
mas01cr@204 109 // Individual exhaustive timepoint durations
mas01cr@204 110 dbdurs = new double[totalVecs];
mas01cr@204 111 for(k=0; k<totalVecs-1; k++) {
mas01cr@204 112 dbdurs[k]=timesTable[2*k+1]-timesTable[2*k];
mas01cr@204 113 }
mas01cr@204 114 }
mas01cr@204 115
mas01cr@204 116 if(usingQueryPoint)
mas01cr@204 117 if(queryPoint>numVectors-1)
mas01cr@204 118 error("queryPoint > numVectors in query");
mas01cr@204 119 else{
mas01cr@204 120 if(verbosity>1) {
mas01cr@204 121 std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush();
mas01cr@204 122 }
mas01cr@204 123 query=query+queryPoint*dbH->dim;
mas01cr@204 124 numVectors=queryPoint+1;
mas01cr@204 125 j=1;
mas01cr@204 126 }
mas01cr@204 127
mas01cr@204 128 gettimeofday(&tv1, NULL);
mas01cr@204 129 while(j--){ // query
mas01cr@204 130 data=dataBuf;
mas01cr@204 131 k=totalVecs; // number of database vectors
mas01cr@204 132 while(k--){ // database
mas01cr@204 133 thisDist=0;
mas01cr@204 134 l=dbH->dim;
mas01cr@204 135 double* q=query;
mas01cr@204 136 while(l--)
mas01cr@204 137 thisDist+=*q++**data++;
mas01cr@204 138 if(!usingTimes ||
mas01cr@204 139 (usingTimes
mas01cr@204 140 && fabs(dbdurs[totalVecs-k-1]-querydurs[numVectors-j-1])<querydurs[numVectors-j-1]*timesTol)){
mas01cr@204 141 n=pointNN;
mas01cr@204 142 while(n--){
mas01cr@204 143 if(thisDist>=distances[n]){
mas01cr@204 144 if((n==0 || thisDist<=distances[n-1])){
mas01cr@204 145 // Copy all values above up the queue
mas01cr@204 146 for( l=pointNN-1 ; l >= n+1 ; l--){
mas01cr@204 147 distances[l]=distances[l-1];
mas01cr@204 148 qIndexes[l]=qIndexes[l-1];
mas01cr@204 149 sIndexes[l]=sIndexes[l-1];
mas01cr@204 150 }
mas01cr@204 151 distances[n]=thisDist;
mas01cr@204 152 qIndexes[n]=numVectors-j-1;
mas01cr@204 153 sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1;
mas01cr@204 154 break;
mas01cr@204 155 }
mas01cr@204 156 }
mas01cr@204 157 else
mas01cr@204 158 break;
mas01cr@204 159 }
mas01cr@204 160 }
mas01cr@204 161 }
mas01cr@204 162 // Move query pointer to next query point
mas01cr@204 163 query+=dbH->dim;
mas01cr@204 164 }
mas01cr@204 165
mas01cr@204 166 gettimeofday(&tv2, NULL);
mas01cr@204 167 if(verbosity>1) {
mas01cr@204 168 std::cerr << std::endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl;
mas01cr@204 169 }
mas01cr@204 170
mas01cr@204 171 if(adbQueryResponse==0){
mas01cr@204 172 // Output answer
mas01cr@204 173 // Loop over nearest neighbours
mas01cr@204 174 for(k=0; k < pointNN; k++){
mas01cr@204 175 // Scan for key
mas01cr@204 176 unsigned cumTrack=0;
mas01cr@204 177 for(l=0 ; l<dbH->numFiles; l++){
mas01cr@204 178 cumTrack+=trackTable[l];
mas01cr@204 179 if(sIndexes[k]<cumTrack){
mas01cr@204 180 std::cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " "
mas01cr@204 181 << sIndexes[k]+trackTable[l]-cumTrack << std::endl;
mas01cr@204 182 break;
mas01cr@204 183 }
mas01cr@204 184 }
mas01cr@204 185 }
mas01cr@204 186 }
mas01cr@204 187 else{ // Process Web Services Query
mas01cr@204 188 int listLen;
mas01cr@204 189 for(k = 0; k < pointNN; k++) {
mas01cr@204 190 if(distances[k] == -DBL_MAX)
mas01cr@204 191 break;
mas01cr@204 192 }
mas01cr@204 193 listLen = k;
mas01cr@204 194
mas01cr@204 195 adbQueryResponse->result.__sizeRlist=listLen;
mas01cr@204 196 adbQueryResponse->result.__sizeDist=listLen;
mas01cr@204 197 adbQueryResponse->result.__sizeQpos=listLen;
mas01cr@204 198 adbQueryResponse->result.__sizeSpos=listLen;
mas01cr@204 199 adbQueryResponse->result.Rlist= new char*[listLen];
mas01cr@204 200 adbQueryResponse->result.Dist = new double[listLen];
mas01cr@204 201 adbQueryResponse->result.Qpos = new unsigned int[listLen];
mas01cr@204 202 adbQueryResponse->result.Spos = new unsigned int[listLen];
mas01cr@204 203 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
mas01cr@204 204 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@204 205 adbQueryResponse->result.Dist[k]=distances[k];
mas01cr@204 206 adbQueryResponse->result.Qpos[k]=qIndexes[k];
mas01cr@204 207 unsigned cumTrack=0;
mas01cr@204 208 for(l=0 ; l<dbH->numFiles; l++){
mas01cr@204 209 cumTrack+=trackTable[l];
mas01cr@204 210 if(sIndexes[k]<cumTrack){
mas01cr@204 211 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE);
mas01cr@204 212 break;
mas01cr@204 213 }
mas01cr@204 214 }
mas01cr@204 215 adbQueryResponse->result.Spos[k]=sIndexes[k]+trackTable[l]-cumTrack;
mas01cr@204 216 }
mas01cr@204 217 }
mas01cr@204 218
mas01cr@204 219 // Clean up
mas01cr@204 220 if(queryCopy)
mas01cr@204 221 delete queryCopy;
mas01cr@204 222 if(qNorm)
mas01cr@204 223 delete qNorm;
mas01cr@204 224 if(timesdata)
mas01cr@204 225 delete[] timesdata;
mas01cr@204 226 if(querydurs)
mas01cr@204 227 delete[] querydurs;
mas01cr@204 228 if(dbdurs)
mas01cr@204 229 delete dbdurs;
mas01cr@204 230 }
mas01cr@204 231
mas01cr@204 232 // trackPointQuery
mas01cr@204 233 // return the trackNN closest tracks to the query track
mas01cr@204 234 // uses average of pointNN points per track
mas01cr@204 235 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
mas01cr@204 236 initTables(dbName, inFile);
mas01cr@204 237
mas01cr@204 238 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@204 239 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@204 240 double* query = (double*)(indata+sizeof(int));
mas01cr@204 241 double* data;
mas01cr@204 242 double* queryCopy = 0;
mas01cr@204 243
mas01cr@204 244 if( dbH->flags & O2_FLAG_L2NORM ){
mas01cr@204 245 // Make a copy of the query
mas01cr@204 246 queryCopy = new double[numVectors*dbH->dim];
mas01cr@204 247 qNorm = new double[numVectors];
mas01cr@204 248 assert(queryCopy&&qNorm);
mas01cr@204 249 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@204 250 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@204 251 query = queryCopy;
mas01cr@204 252 }
mas01cr@204 253
mas01cr@204 254 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@204 255 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@204 256
mas01cr@204 257 // Make temporary dynamic memory for results
mas01cr@204 258 double trackDistances[trackNN];
mas01cr@204 259 unsigned trackIDs[trackNN];
mas01cr@204 260 unsigned trackQIndexes[trackNN];
mas01cr@204 261 unsigned trackSIndexes[trackNN];
mas01cr@204 262
mas01cr@204 263 double distances[pointNN];
mas01cr@204 264 unsigned qIndexes[pointNN];
mas01cr@204 265 unsigned sIndexes[pointNN];
mas01cr@204 266
mas01cr@204 267 unsigned j=numVectors; // number of query points
mas01cr@204 268 unsigned k,l,n, track, trackOffset=0, processedTracks=0;
mas01cr@204 269 double thisDist;
mas01cr@204 270
mas01cr@204 271 for(k=0; k<pointNN; k++){
mas01cr@204 272 distances[k]=-DBL_MAX;
mas01cr@204 273 qIndexes[k]=~0;
mas01cr@204 274 sIndexes[k]=~0;
mas01cr@204 275 }
mas01cr@204 276
mas01cr@204 277 for(k=0; k<trackNN; k++){
mas01cr@204 278 trackDistances[k]=-DBL_MAX;
mas01cr@204 279 trackQIndexes[k]=~0;
mas01cr@204 280 trackSIndexes[k]=~0;
mas01cr@204 281 trackIDs[k]=~0;
mas01cr@204 282 }
mas01cr@204 283
mas01cr@204 284 double meanQdur = 0;
mas01cr@204 285 double *timesdata = 0;
mas01cr@204 286 double *querydurs = 0;
mas01cr@204 287 double *meanDBdur = 0;
mas01cr@204 288
mas01cr@204 289 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@204 290 std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl;
mas01cr@204 291 usingTimes=0;
mas01cr@204 292 }
mas01cr@204 293
mas01cr@204 294 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@204 295 std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl;
mas01cr@204 296
mas01cr@204 297 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@204 298 timesdata = new double[2*numVectors];
mas01cr@204 299 querydurs = new double[numVectors];
mas01cr@204 300 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@204 301 // Calculate durations of points
mas01cr@204 302 for(k=0; k<numVectors-1; k++) {
mas01cr@204 303 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01cr@204 304 meanQdur += querydurs[k];
mas01cr@204 305 }
mas01cr@204 306 meanQdur/=k;
mas01cr@204 307 meanDBdur = new double[dbH->numFiles];
mas01cr@204 308 for(k=0; k<dbH->numFiles; k++){
mas01cr@204 309 meanDBdur[k]=0.0;
mas01cr@204 310 for(j=0; j<trackTable[k]-1 ; j++) {
mas01cr@204 311 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j];
mas01cr@204 312 }
mas01cr@204 313 meanDBdur[k]/=j;
mas01cr@204 314 }
mas01cr@204 315 }
mas01cr@204 316
mas01cr@204 317 if(usingQueryPoint)
mas01cr@204 318 if(queryPoint>numVectors-1)
mas01cr@204 319 error("queryPoint > numVectors in query");
mas01cr@204 320 else{
mas01cr@204 321 if(verbosity>1) {
mas01cr@204 322 std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush();
mas01cr@204 323 }
mas01cr@204 324 query=query+queryPoint*dbH->dim;
mas01cr@204 325 numVectors=queryPoint+1;
mas01cr@204 326 }
mas01cr@204 327
mas01cr@204 328 // build track offset table
mas01cr@204 329 off_t *trackOffsetTable = new off_t[dbH->numFiles];
mas01cr@204 330 unsigned cumTrack=0;
mas01cr@204 331 off_t trackIndexOffset;
mas01cr@204 332 for(k=0; k<dbH->numFiles;k++){
mas01cr@204 333 trackOffsetTable[k]=cumTrack;
mas01cr@204 334 cumTrack+=trackTable[k]*dbH->dim;
mas01cr@204 335 }
mas01cr@204 336
mas01cr@204 337 char nextKey[MAXSTR];
mas01cr@204 338
mas01cr@204 339 gettimeofday(&tv1, NULL);
mas01cr@204 340
mas01cr@204 341 size_t data_buffer_size = 0;
mas01cr@204 342 double *data_buffer = 0;
mas01cr@204 343 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01cr@204 344
mas01cr@204 345 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
mas01cr@204 346
mas01cr@204 347 trackOffset = trackOffsetTable[track]; // numDoubles offset
mas01cr@204 348
mas01cr@204 349 // get trackID from file if using a control file
mas01cr@204 350 if(trackFile) {
mas01cr@204 351 trackFile->getline(nextKey,MAXSTR);
mas01cr@204 352 if(!trackFile->eof()) {
mas01cr@204 353 track = getKeyPos(nextKey);
mas01cr@204 354 trackOffset = trackOffsetTable[track];
mas01cr@204 355 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01cr@204 356 } else {
mas01cr@204 357 break;
mas01cr@204 358 }
mas01cr@204 359 }
mas01cr@204 360
mas01cr@204 361 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@204 362
mas01cr@204 363 if(verbosity>7) {
mas01cr@204 364 std::cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";std::cerr.flush();
mas01cr@204 365 }
mas01cr@204 366
mas01cr@204 367 if(dbH->flags & O2_FLAG_L2NORM)
mas01cr@204 368 usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy;
mas01cr@204 369 else
mas01cr@204 370 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int));
mas01cr@204 371 if(usingQueryPoint)
mas01cr@204 372 j=1;
mas01cr@204 373 else
mas01cr@204 374 j=numVectors;
mas01cr@204 375
mas01cr@204 376 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
mas01cr@204 377 if(data_buffer) {
mas01cr@204 378 free(data_buffer);
mas01cr@204 379 }
mas01cr@204 380 {
mas01cr@204 381 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
mas01cr@204 382 void *tmp = malloc(data_buffer_size);
mas01cr@204 383 if (tmp == NULL) {
mas01cr@204 384 error("error allocating data buffer");
mas01cr@204 385 }
mas01cr@204 386 data_buffer = (double *) tmp;
mas01cr@204 387 }
mas01cr@204 388 }
mas01cr@204 389
mas01cr@204 390 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
mas01cr@204 391
mas01cr@204 392 while(j--){
mas01cr@204 393 k=trackTable[track]; // number of vectors in track
mas01cr@204 394 data=data_buffer; // data for track
mas01cr@204 395 while(k--){
mas01cr@204 396 thisDist=0;
mas01cr@204 397 l=dbH->dim;
mas01cr@204 398 double* q=query;
mas01cr@204 399 while(l--)
mas01cr@204 400 thisDist+=*q++**data++;
mas01cr@204 401 if(!usingTimes ||
mas01cr@204 402 (usingTimes
mas01cr@204 403 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
mas01cr@204 404 n=pointNN;
mas01cr@204 405 while(n--){
mas01cr@204 406 if(thisDist>=distances[n]){
mas01cr@204 407 if((n==0 || thisDist<=distances[n-1])){
mas01cr@204 408 // Copy all values above up the queue
mas01cr@204 409 for( l=pointNN-1 ; l > n ; l--){
mas01cr@204 410 distances[l]=distances[l-1];
mas01cr@204 411 qIndexes[l]=qIndexes[l-1];
mas01cr@204 412 sIndexes[l]=sIndexes[l-1];
mas01cr@204 413 }
mas01cr@204 414 distances[n]=thisDist;
mas01cr@204 415 qIndexes[n]=numVectors-j-1;
mas01cr@204 416 sIndexes[n]=trackTable[track]-k-1;
mas01cr@204 417 break;
mas01cr@204 418 }
mas01cr@204 419 }
mas01cr@204 420 else
mas01cr@204 421 break;
mas01cr@204 422 }
mas01cr@204 423 }
mas01cr@204 424 } // track
mas01cr@204 425 // Move query pointer to next query point
mas01cr@204 426 query+=dbH->dim;
mas01cr@204 427 } // query
mas01cr@204 428 // Take the average of this track's distance
mas01cr@204 429 // Test the track distances
mas01cr@204 430 thisDist=0;
mas01cr@204 431 for (n = 0; n < pointNN; n++) {
mas01cr@204 432 if (distances[n] == -DBL_MAX) break;
mas01cr@204 433 thisDist += distances[n];
mas01cr@204 434 }
mas01cr@204 435 thisDist /= n;
mas01cr@204 436
mas01cr@204 437 n=trackNN;
mas01cr@204 438 while(n--){
mas01cr@204 439 if(thisDist>=trackDistances[n]){
mas01cr@204 440 if((n==0 || thisDist<=trackDistances[n-1])){
mas01cr@204 441 // Copy all values above up the queue
mas01cr@204 442 for( l=trackNN-1 ; l > n ; l--){
mas01cr@204 443 trackDistances[l]=trackDistances[l-1];
mas01cr@204 444 trackQIndexes[l]=trackQIndexes[l-1];
mas01cr@204 445 trackSIndexes[l]=trackSIndexes[l-1];
mas01cr@204 446 trackIDs[l]=trackIDs[l-1];
mas01cr@204 447 }
mas01cr@204 448 trackDistances[n]=thisDist;
mas01cr@204 449 trackQIndexes[n]=qIndexes[0];
mas01cr@204 450 trackSIndexes[n]=sIndexes[0];
mas01cr@204 451 trackIDs[n]=track;
mas01cr@204 452 break;
mas01cr@204 453 }
mas01cr@204 454 }
mas01cr@204 455 else
mas01cr@204 456 break;
mas01cr@204 457 }
mas01cr@204 458 for(unsigned k=0; k<pointNN; k++){
mas01cr@204 459 distances[k]=-DBL_MAX;
mas01cr@204 460 qIndexes[k]=~0;
mas01cr@204 461 sIndexes[k]=~0;
mas01cr@204 462 }
mas01cr@204 463 } // tracks
mas01cr@204 464
mas01cr@204 465 free(data_buffer);
mas01cr@204 466
mas01cr@204 467 gettimeofday(&tv2, NULL);
mas01cr@204 468
mas01cr@204 469 if(verbosity>1) {
mas01cr@204 470 std::cerr << std::endl << "processed tracks :" << processedTracks
mas01cr@204 471 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl;
mas01cr@204 472 }
mas01cr@204 473
mas01cr@204 474 if(adbQueryResponse==0){
mas01cr@204 475 if(verbosity>1) {
mas01cr@204 476 std::cerr<<std::endl;
mas01cr@204 477 }
mas01cr@204 478 // Output answer
mas01cr@204 479 // Loop over nearest neighbours
mas01cr@204 480 for(k=0; k < std::min(trackNN,processedTracks); k++)
mas01cr@204 481 std::cout << fileTable+trackIDs[k]*O2_FILETABLESIZE
mas01cr@204 482 << " " << trackDistances[k] << " " << trackQIndexes[k] << " " << trackSIndexes[k] << std::endl;
mas01cr@204 483 }
mas01cr@204 484 else{ // Process Web Services Query
mas01cr@204 485 int listLen = std::min(trackNN, processedTracks);
mas01cr@204 486 adbQueryResponse->result.__sizeRlist=listLen;
mas01cr@204 487 adbQueryResponse->result.__sizeDist=listLen;
mas01cr@204 488 adbQueryResponse->result.__sizeQpos=listLen;
mas01cr@204 489 adbQueryResponse->result.__sizeSpos=listLen;
mas01cr@204 490 adbQueryResponse->result.Rlist= new char*[listLen];
mas01cr@204 491 adbQueryResponse->result.Dist = new double[listLen];
mas01cr@204 492 adbQueryResponse->result.Qpos = new unsigned int[listLen];
mas01cr@204 493 adbQueryResponse->result.Spos = new unsigned int[listLen];
mas01cr@204 494 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
mas01cr@204 495 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@204 496 adbQueryResponse->result.Dist[k]=trackDistances[k];
mas01cr@204 497 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
mas01cr@204 498 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
mas01cr@204 499 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
mas01cr@204 500 }
mas01cr@204 501 }
mas01cr@204 502
mas01cr@204 503 // Clean up
mas01cr@204 504 if(trackOffsetTable)
mas01cr@204 505 delete trackOffsetTable;
mas01cr@204 506 if(queryCopy)
mas01cr@204 507 delete queryCopy;
mas01cr@204 508 if(qNorm)
mas01cr@204 509 delete qNorm;
mas01cr@204 510 if(timesdata)
mas01cr@204 511 delete[] timesdata;
mas01cr@204 512 if(querydurs)
mas01cr@204 513 delete[] querydurs;
mas01cr@204 514 if(meanDBdur)
mas01cr@204 515 delete meanDBdur;
mas01cr@204 516 }
mas01cr@204 517
mas01cr@204 518 // This is a common pattern in sequence queries: what we are doing is
mas01cr@204 519 // taking a window of length seqlen over a buffer of length length,
mas01cr@204 520 // and placing the sum of the elements in that window in the first
mas01cr@204 521 // element of the window: thus replacing all but the last seqlen
mas01cr@204 522 // elements in the buffer the corresponding windowed sum.
mas01cr@204 523 void audioDB::sequence_sum(double *buffer, int length, int seqlen) {
mas01cr@204 524 double tmp1, tmp2, *ps;
mas01cr@204 525 int j, w;
mas01cr@204 526
mas01cr@204 527 tmp1 = *buffer;
mas01cr@204 528 j = 1;
mas01cr@204 529 w = seqlen - 1;
mas01cr@204 530 while(w--) {
mas01cr@204 531 *buffer += buffer[j++];
mas01cr@204 532 }
mas01cr@204 533 ps = buffer + 1;
mas01cr@204 534 w = length - seqlen; // +1 - 1
mas01cr@204 535 while(w--) {
mas01cr@204 536 tmp2 = *ps;
mas01cr@204 537 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1);
mas01cr@204 538 tmp1 = tmp2;
mas01cr@204 539 ps++;
mas01cr@204 540 }
mas01cr@204 541 }
mas01cr@204 542
mas01cr@204 543 void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) {
mas01cr@204 544 int w = length - seqlen + 1;
mas01cr@204 545 while(w--) {
mas01cr@204 546 *buffer = sqrt(*buffer);
mas01cr@204 547 buffer++;
mas01cr@204 548 }
mas01cr@204 549 }
mas01cr@204 550
mas01cr@204 551 void audioDB::sequence_average(double *buffer, int length, int seqlen) {
mas01cr@204 552 int w = length - seqlen + 1;
mas01cr@204 553 while(w--) {
mas01cr@204 554 *buffer /= seqlen;
mas01cr@204 555 buffer++;
mas01cr@204 556 }
mas01cr@204 557 }
mas01cr@204 558
mas01cr@204 559 // k nearest-neighbor (k-NN) search between query and target tracks
mas01cr@204 560 // efficient implementation based on matched filter
mas01cr@204 561 // assumes normed shingles
mas01cr@204 562 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track
mas01cr@204 563 void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
mas01cr@204 564
mas01cr@204 565 initTables(dbName, inFile);
mas01cr@204 566
mas01cr@204 567 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@204 568 // we use stdout in this stub version
mas01cr@204 569 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@204 570 double* query = (double*)(indata+sizeof(int));
mas01cr@204 571 double* queryCopy = 0;
mas01cr@204 572
mas01cr@204 573 if(!(dbH->flags & O2_FLAG_L2NORM) )
mas01cr@204 574 error("Database must be L2 normed for sequence query","use -L2NORM");
mas01cr@204 575
mas01cr@204 576 if(numVectors<sequenceLength)
mas01cr@204 577 error("Query shorter than requested sequence length", "maybe use -l");
mas01cr@204 578
mas01cr@204 579 if(verbosity>1) {
mas01cr@204 580 std::cerr << "performing norms ... "; std::cerr.flush();
mas01cr@204 581 }
mas01cr@204 582 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01cr@204 583
mas01cr@204 584 // Make a copy of the query
mas01cr@204 585 queryCopy = new double[numVectors*dbH->dim];
mas01cr@204 586 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@204 587 qNorm = new double[numVectors];
mas01cr@204 588 sNorm = new double[dbVectors];
mas01cr@204 589 assert(qNorm&&sNorm&&queryCopy&&sequenceLength);
mas01cr@204 590 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@204 591 query = queryCopy;
mas01cr@204 592
mas01cr@204 593 // Make norm measurements relative to sequenceLength
mas01cr@204 594 unsigned w = sequenceLength-1;
mas01cr@204 595 unsigned i,j;
mas01cr@204 596
mas01cr@204 597 // Copy the L2 norm values to core to avoid disk random access later on
mas01cr@204 598 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
mas01cr@204 599 double* qnPtr = qNorm;
mas01cr@204 600 double* snPtr = sNorm;
mas01cr@204 601
mas01cr@204 602 double *sPower = 0, *qPower = 0;
mas01cr@204 603 double *spPtr = 0, *qpPtr = 0;
mas01cr@204 604
mas01cr@204 605 if (usingPower) {
mas01cr@204 606 if (!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@204 607 error("database not power-enabled", dbName);
mas01cr@204 608 }
mas01cr@204 609 sPower = new double[dbVectors];
mas01cr@204 610 spPtr = sPower;
mas01cr@204 611 memcpy(sPower, powerTable, dbVectors * sizeof(double));
mas01cr@204 612 }
mas01cr@204 613
mas01cr@204 614 for(i=0; i<dbH->numFiles; i++){
mas01cr@204 615 if(trackTable[i]>=sequenceLength) {
mas01cr@204 616 sequence_sum(snPtr, trackTable[i], sequenceLength);
mas01cr@204 617 sequence_sqrt(snPtr, trackTable[i], sequenceLength);
mas01cr@204 618
mas01cr@204 619 if (usingPower) {
mas01cr@204 620 sequence_sum(spPtr, trackTable[i], sequenceLength);
mas01cr@204 621 sequence_average(spPtr, trackTable[i], sequenceLength);
mas01cr@204 622 }
mas01cr@204 623 }
mas01cr@204 624 snPtr += trackTable[i];
mas01cr@204 625 if (usingPower) {
mas01cr@204 626 spPtr += trackTable[i];
mas01cr@204 627 }
mas01cr@204 628 }
mas01cr@204 629
mas01cr@204 630 sequence_sum(qnPtr, numVectors, sequenceLength);
mas01cr@204 631 sequence_sqrt(qnPtr, numVectors, sequenceLength);
mas01cr@204 632
mas01cr@204 633 if (usingPower) {
mas01cr@204 634 qPower = new double[numVectors];
mas01cr@204 635 qpPtr = qPower;
mas01cr@204 636 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
mas01cr@204 637 error("error seeking to data", powerFileName, "lseek");
mas01cr@204 638 }
mas01cr@204 639 int count = read(powerfd, qPower, numVectors * sizeof(double));
mas01cr@204 640 if (count == -1) {
mas01cr@204 641 error("error reading data", powerFileName, "read");
mas01cr@204 642 }
mas01cr@204 643 if ((unsigned) count != numVectors * sizeof(double)) {
mas01cr@204 644 error("short read", powerFileName);
mas01cr@204 645 }
mas01cr@204 646
mas01cr@204 647 sequence_sum(qpPtr, numVectors, sequenceLength);
mas01cr@204 648 sequence_average(qpPtr, numVectors, sequenceLength);
mas01cr@204 649 }
mas01cr@204 650
mas01cr@204 651 if(verbosity>1) {
mas01cr@204 652 std::cerr << "done." << std::endl;
mas01cr@204 653 }
mas01cr@204 654
mas01cr@204 655 if(verbosity>1) {
mas01cr@204 656 std::cerr << "matching tracks..." << std::endl;
mas01cr@204 657 }
mas01cr@204 658
mas01cr@204 659 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@204 660 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@204 661
mas01cr@204 662 // Make temporary dynamic memory for results
mas01cr@204 663 double trackDistances[trackNN];
mas01cr@204 664 unsigned trackIDs[trackNN];
mas01cr@204 665 unsigned trackQIndexes[trackNN];
mas01cr@204 666 unsigned trackSIndexes[trackNN];
mas01cr@204 667
mas01cr@204 668 double distances[pointNN];
mas01cr@204 669 unsigned qIndexes[pointNN];
mas01cr@204 670 unsigned sIndexes[pointNN];
mas01cr@204 671
mas01cr@204 672
mas01cr@204 673 unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
mas01cr@204 674 double thisDist;
mas01cr@204 675
mas01cr@204 676 for(k=0; k<pointNN; k++){
mas01cr@204 677 distances[k]=1.0e6;
mas01cr@204 678 qIndexes[k]=~0;
mas01cr@204 679 sIndexes[k]=~0;
mas01cr@204 680 }
mas01cr@204 681
mas01cr@204 682 for(k=0; k<trackNN; k++){
mas01cr@204 683 trackDistances[k]=1.0e6;
mas01cr@204 684 trackQIndexes[k]=~0;
mas01cr@204 685 trackSIndexes[k]=~0;
mas01cr@204 686 trackIDs[k]=~0;
mas01cr@204 687 }
mas01cr@204 688
mas01cr@204 689 // Timestamp and durations processing
mas01cr@204 690 double meanQdur = 0;
mas01cr@204 691 double *timesdata = 0;
mas01cr@204 692 double *querydurs = 0;
mas01cr@204 693 double *meanDBdur = 0;
mas01cr@204 694
mas01cr@204 695 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@204 696 std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl;
mas01cr@204 697 usingTimes=0;
mas01cr@204 698 }
mas01cr@204 699
mas01cr@204 700 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@204 701 std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl;
mas01cr@204 702
mas01cr@204 703 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@204 704 timesdata = new double[2*numVectors];
mas01cr@204 705 querydurs = new double[numVectors];
mas01cr@204 706
mas01cr@204 707 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@204 708 // Calculate durations of points
mas01cr@204 709 for(k=0; k<numVectors-1; k++) {
mas01cr@204 710 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01cr@204 711 meanQdur += querydurs[k];
mas01cr@204 712 }
mas01cr@204 713 meanQdur/=k;
mas01cr@204 714 if(verbosity>1) {
mas01cr@204 715 std::cerr << "mean query file duration: " << meanQdur << std::endl;
mas01cr@204 716 }
mas01cr@204 717 meanDBdur = new double[dbH->numFiles];
mas01cr@204 718 assert(meanDBdur);
mas01cr@204 719 for(k=0; k<dbH->numFiles; k++){
mas01cr@204 720 meanDBdur[k]=0.0;
mas01cr@204 721 for(j=0; j<trackTable[k]-1 ; j++) {
mas01cr@204 722 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j];
mas01cr@204 723 }
mas01cr@204 724 meanDBdur[k]/=j;
mas01cr@204 725 }
mas01cr@204 726 }
mas01cr@204 727
mas01cr@204 728 if(usingQueryPoint)
mas01cr@204 729 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
mas01cr@204 730 error("queryPoint > numVectors-wL+1 in query");
mas01cr@204 731 else{
mas01cr@204 732 if(verbosity>1) {
mas01cr@204 733 std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush();
mas01cr@204 734 }
mas01cr@204 735 query = query + queryPoint * dbH->dim;
mas01cr@204 736 qnPtr = qnPtr + queryPoint;
mas01cr@204 737 if (usingPower) {
mas01cr@204 738 qpPtr = qpPtr + queryPoint;
mas01cr@204 739 }
mas01cr@204 740 numVectors=wL;
mas01cr@204 741 }
mas01cr@204 742
mas01cr@204 743 double ** D = 0; // Differences query and target
mas01cr@204 744 double ** DD = 0; // Matched filter distance
mas01cr@204 745
mas01cr@204 746 D = new double*[numVectors];
mas01cr@204 747 assert(D);
mas01cr@204 748 DD = new double*[numVectors];
mas01cr@204 749 assert(DD);
mas01cr@204 750
mas01cr@204 751 gettimeofday(&tv1, NULL);
mas01cr@204 752 unsigned processedTracks = 0;
mas01cr@204 753 unsigned successfulTracks=0;
mas01cr@204 754
mas01cr@204 755 double* qp;
mas01cr@204 756 double* sp;
mas01cr@204 757 double* dp;
mas01cr@204 758
mas01cr@204 759 // build track offset table
mas01cr@204 760 off_t *trackOffsetTable = new off_t[dbH->numFiles];
mas01cr@204 761 unsigned cumTrack=0;
mas01cr@204 762 off_t trackIndexOffset;
mas01cr@204 763 for(k=0; k<dbH->numFiles;k++){
mas01cr@204 764 trackOffsetTable[k]=cumTrack;
mas01cr@204 765 cumTrack+=trackTable[k]*dbH->dim;
mas01cr@204 766 }
mas01cr@204 767
mas01cr@204 768 char nextKey [MAXSTR];
mas01cr@204 769
mas01cr@204 770 // chi^2 statistics
mas01cr@204 771 double sampleCount = 0;
mas01cr@204 772 double sampleSum = 0;
mas01cr@204 773 double logSampleSum = 0;
mas01cr@204 774 double minSample = 1e9;
mas01cr@204 775 double maxSample = 0;
mas01cr@204 776
mas01cr@204 777 // Track loop
mas01cr@204 778 size_t data_buffer_size = 0;
mas01cr@204 779 double *data_buffer = 0;
mas01cr@204 780 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01cr@204 781
mas01cr@204 782 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) {
mas01cr@204 783
mas01cr@204 784 trackOffset = trackOffsetTable[track]; // numDoubles offset
mas01cr@204 785
mas01cr@204 786 // get trackID from file if using a control file
mas01cr@204 787 if(trackFile) {
mas01cr@204 788 trackFile->getline(nextKey,MAXSTR);
mas01cr@204 789 if(!trackFile->eof()) {
mas01cr@204 790 track = getKeyPos(nextKey);
mas01cr@204 791 trackOffset = trackOffsetTable[track];
mas01cr@204 792 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01cr@204 793 } else {
mas01cr@204 794 break;
mas01cr@204 795 }
mas01cr@204 796 }
mas01cr@204 797
mas01cr@204 798 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@204 799
mas01cr@204 800 if(sequenceLength<=trackTable[track]){ // test for short sequences
mas01cr@204 801
mas01cr@204 802 if(verbosity>7) {
mas01cr@204 803 std::cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";std::cerr.flush();
mas01cr@204 804 }
mas01cr@204 805
mas01cr@204 806 // Sum products matrix
mas01cr@204 807 for(j=0; j<numVectors;j++){
mas01cr@204 808 D[j]=new double[trackTable[track]];
mas01cr@204 809 assert(D[j]);
mas01cr@204 810
mas01cr@204 811 }
mas01cr@204 812
mas01cr@204 813 // Matched filter matrix
mas01cr@204 814 for(j=0; j<numVectors;j++){
mas01cr@204 815 DD[j]=new double[trackTable[track]];
mas01cr@204 816 assert(DD[j]);
mas01cr@204 817 }
mas01cr@204 818
mas01cr@204 819 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
mas01cr@204 820 if(data_buffer) {
mas01cr@204 821 free(data_buffer);
mas01cr@204 822 }
mas01cr@204 823 {
mas01cr@204 824 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
mas01cr@204 825 void *tmp = malloc(data_buffer_size);
mas01cr@204 826 if (tmp == NULL) {
mas01cr@204 827 error("error allocating data buffer");
mas01cr@204 828 }
mas01cr@204 829 data_buffer = (double *) tmp;
mas01cr@204 830 }
mas01cr@204 831 }
mas01cr@204 832
mas01cr@204 833 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
mas01cr@204 834
mas01cr@204 835 // Dot product
mas01cr@204 836 for(j=0; j<numVectors; j++)
mas01cr@204 837 for(k=0; k<trackTable[track]; k++){
mas01cr@204 838 qp=query+j*dbH->dim;
mas01cr@204 839 sp=data_buffer+k*dbH->dim;
mas01cr@204 840 DD[j][k]=0.0; // Initialize matched filter array
mas01cr@204 841 dp=&D[j][k]; // point to correlation cell j,k
mas01cr@204 842 *dp=0.0; // initialize correlation cell
mas01cr@204 843 l=dbH->dim; // size of vectors
mas01cr@204 844 while(l--)
mas01cr@204 845 *dp+=*qp++**sp++;
mas01cr@204 846 }
mas01cr@204 847
mas01cr@204 848 // Matched Filter
mas01cr@204 849 // HOP SIZE == 1
mas01cr@204 850 double* spd;
mas01cr@204 851 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
mas01cr@204 852 for(w=0; w<wL; w++)
mas01cr@204 853 for(j=0; j<numVectors-w; j++){
mas01cr@204 854 sp=DD[j];
mas01cr@204 855 spd=D[j+w]+w;
mas01cr@204 856 k=trackTable[track]-w;
mas01cr@204 857 while(k--)
mas01cr@204 858 *sp+++=*spd++;
mas01cr@204 859 }
mas01cr@204 860 }
mas01cr@204 861
mas01cr@204 862 else{ // HOP_SIZE != 1
mas01cr@204 863 for(w=0; w<wL; w++)
mas01cr@204 864 for(j=0; j<numVectors-w; j+=HOP_SIZE){
mas01cr@204 865 sp=DD[j];
mas01cr@204 866 spd=D[j+w]+w;
mas01cr@204 867 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
mas01cr@204 868 *sp+=*spd;
mas01cr@204 869 sp+=HOP_SIZE;
mas01cr@204 870 spd+=HOP_SIZE;
mas01cr@204 871 }
mas01cr@204 872 }
mas01cr@204 873 }
mas01cr@204 874
mas01cr@204 875 if(verbosity>3 && usingTimes) {
mas01cr@204 876 std::cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << std::endl;
mas01cr@204 877 std::cerr.flush();
mas01cr@204 878 }
mas01cr@204 879
mas01cr@204 880 if(!usingTimes ||
mas01cr@204 881 (usingTimes
mas01cr@204 882 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
mas01cr@204 883
mas01cr@204 884 if(verbosity>3 && usingTimes) {
mas01cr@204 885 std::cerr << "within duration tolerance." << std::endl;
mas01cr@204 886 std::cerr.flush();
mas01cr@204 887 }
mas01cr@204 888
mas01cr@204 889 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@204 890 for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
mas01cr@204 891 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
mas01cr@204 892 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
mas01cr@204 893 if(verbosity>9) {
mas01cr@204 894 std::cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << std::endl;
mas01cr@204 895 }
mas01cr@204 896 // Gather chi^2 statistics
mas01cr@204 897 if(thisDist<minSample)
mas01cr@204 898 minSample=thisDist;
mas01cr@204 899 else if(thisDist>maxSample)
mas01cr@204 900 maxSample=thisDist;
mas01cr@204 901 if(thisDist>1e-9){
mas01cr@204 902 sampleCount++;
mas01cr@204 903 sampleSum+=thisDist;
mas01cr@204 904 logSampleSum+=log(thisDist);
mas01cr@204 905 }
mas01cr@204 906
mas01cr@204 907 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]);
mas01cr@204 908 // Power test
mas01cr@204 909 if (usingPower) {
mas01cr@204 910 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) {
mas01cr@204 911 thisDist = 1000000.0;
mas01cr@204 912 }
mas01cr@204 913 }
mas01cr@204 914
mas01cr@204 915 // k-NN match algorithm
mas01cr@204 916 m=pointNN;
mas01cr@204 917 while(m--){
mas01cr@204 918 if(thisDist<=distances[m])
mas01cr@204 919 if(m==0 || thisDist>=distances[m-1]){
mas01cr@204 920 // Shuffle distances up the list
mas01cr@204 921 for(l=pointNN-1; l>m; l--){
mas01cr@204 922 distances[l]=distances[l-1];
mas01cr@204 923 qIndexes[l]=qIndexes[l-1];
mas01cr@204 924 sIndexes[l]=sIndexes[l-1];
mas01cr@204 925 }
mas01cr@204 926 distances[m]=thisDist;
mas01cr@204 927 if(usingQueryPoint)
mas01cr@204 928 qIndexes[m]=queryPoint;
mas01cr@204 929 else
mas01cr@204 930 qIndexes[m]=j;
mas01cr@204 931 sIndexes[m]=k;
mas01cr@204 932 break;
mas01cr@204 933 }
mas01cr@204 934 }
mas01cr@204 935 }
mas01cr@204 936 // Calculate the mean of the N-Best matches
mas01cr@204 937 thisDist=0.0;
mas01cr@204 938 for(m=0; m<pointNN; m++) {
mas01cr@204 939 if (distances[m] == 1000000.0) break;
mas01cr@204 940 thisDist+=distances[m];
mas01cr@204 941 }
mas01cr@204 942 thisDist/=m;
mas01cr@204 943
mas01cr@204 944 // Let's see the distances then...
mas01cr@204 945 if(verbosity>3) {
mas01cr@204 946 std::cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << std::endl;
mas01cr@204 947 }
mas01cr@204 948
mas01cr@204 949
mas01cr@204 950 // All the track stuff goes here
mas01cr@204 951 n=trackNN;
mas01cr@204 952 while(n--){
mas01cr@204 953 if(thisDist<=trackDistances[n]){
mas01cr@204 954 if((n==0 || thisDist>=trackDistances[n-1])){
mas01cr@204 955 // Copy all values above up the queue
mas01cr@204 956 for( l=trackNN-1 ; l > n ; l--){
mas01cr@204 957 trackDistances[l]=trackDistances[l-1];
mas01cr@204 958 trackQIndexes[l]=trackQIndexes[l-1];
mas01cr@204 959 trackSIndexes[l]=trackSIndexes[l-1];
mas01cr@204 960 trackIDs[l]=trackIDs[l-1];
mas01cr@204 961 }
mas01cr@204 962 trackDistances[n]=thisDist;
mas01cr@204 963 trackQIndexes[n]=qIndexes[0];
mas01cr@204 964 trackSIndexes[n]=sIndexes[0];
mas01cr@204 965 successfulTracks++;
mas01cr@204 966 trackIDs[n]=track;
mas01cr@204 967 break;
mas01cr@204 968 }
mas01cr@204 969 }
mas01cr@204 970 else
mas01cr@204 971 break;
mas01cr@204 972 }
mas01cr@204 973 } // Duration match
mas01cr@204 974
mas01cr@204 975 // Clean up current track
mas01cr@204 976 if(D!=NULL){
mas01cr@204 977 for(j=0; j<numVectors; j++)
mas01cr@204 978 delete[] D[j];
mas01cr@204 979 }
mas01cr@204 980
mas01cr@204 981 if(DD!=NULL){
mas01cr@204 982 for(j=0; j<numVectors; j++)
mas01cr@204 983 delete[] DD[j];
mas01cr@204 984 }
mas01cr@204 985 }
mas01cr@204 986 // per-track reset array values
mas01cr@204 987 for(unsigned k=0; k<pointNN; k++){
mas01cr@204 988 distances[k]=1.0e6;
mas01cr@204 989 qIndexes[k]=~0;
mas01cr@204 990 sIndexes[k]=~0;
mas01cr@204 991 }
mas01cr@204 992 }
mas01cr@204 993
mas01cr@204 994 free(data_buffer);
mas01cr@204 995
mas01cr@204 996 gettimeofday(&tv2,NULL);
mas01cr@204 997 if(verbosity>1) {
mas01cr@204 998 std::cerr << std::endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
mas01cr@204 999 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl;
mas01cr@204 1000 std::cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
mas01cr@204 1001 << " minSample: " << minSample << " maxSample: " << maxSample << std::endl;
mas01cr@204 1002 }
mas01cr@204 1003 if(adbQueryResponse==0){
mas01cr@204 1004 if(verbosity>1) {
mas01cr@204 1005 std::cerr<<std::endl;
mas01cr@204 1006 }
mas01cr@204 1007 // Output answer
mas01cr@204 1008 // Loop over nearest neighbours
mas01cr@204 1009 for(k=0; k < std::min(trackNN,successfulTracks); k++)
mas01cr@204 1010 std::cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << " "
mas01cr@204 1011 << trackQIndexes[k] << " " << trackSIndexes[k] << std::endl;
mas01cr@204 1012 }
mas01cr@204 1013 else{ // Process Web Services Query
mas01cr@204 1014 int listLen = std::min(trackNN, processedTracks);
mas01cr@204 1015 adbQueryResponse->result.__sizeRlist=listLen;
mas01cr@204 1016 adbQueryResponse->result.__sizeDist=listLen;
mas01cr@204 1017 adbQueryResponse->result.__sizeQpos=listLen;
mas01cr@204 1018 adbQueryResponse->result.__sizeSpos=listLen;
mas01cr@204 1019 adbQueryResponse->result.Rlist= new char*[listLen];
mas01cr@204 1020 adbQueryResponse->result.Dist = new double[listLen];
mas01cr@204 1021 adbQueryResponse->result.Qpos = new unsigned int[listLen];
mas01cr@204 1022 adbQueryResponse->result.Spos = new unsigned int[listLen];
mas01cr@204 1023 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
mas01cr@204 1024 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@204 1025 adbQueryResponse->result.Dist[k]=trackDistances[k];
mas01cr@204 1026 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
mas01cr@204 1027 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
mas01cr@204 1028 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
mas01cr@204 1029 }
mas01cr@204 1030 }
mas01cr@204 1031
mas01cr@204 1032 // Clean up
mas01cr@204 1033 if(trackOffsetTable)
mas01cr@204 1034 delete[] trackOffsetTable;
mas01cr@204 1035 if(queryCopy)
mas01cr@204 1036 delete[] queryCopy;
mas01cr@204 1037 if(qNorm)
mas01cr@204 1038 delete[] qNorm;
mas01cr@204 1039 if(sNorm)
mas01cr@204 1040 delete[] sNorm;
mas01cr@204 1041 if(qPower)
mas01cr@204 1042 delete[] qPower;
mas01cr@204 1043 if(sPower)
mas01cr@204 1044 delete[] sPower;
mas01cr@204 1045 if(D)
mas01cr@204 1046 delete[] D;
mas01cr@204 1047 if(DD)
mas01cr@204 1048 delete[] DD;
mas01cr@204 1049 if(timesdata)
mas01cr@204 1050 delete[] timesdata;
mas01cr@204 1051 if(querydurs)
mas01cr@204 1052 delete[] querydurs;
mas01cr@204 1053 if(meanDBdur)
mas01cr@204 1054 delete[] meanDBdur;
mas01cr@204 1055 }
mas01cr@204 1056
mas01cr@204 1057 // Radius search between query and target tracks
mas01cr@204 1058 // efficient implementation based on matched filter
mas01cr@204 1059 // assumes normed shingles
mas01cr@204 1060 // outputs count of retrieved shingles, max retreived = one shingle per query shingle per track
mas01cr@204 1061 void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
mas01cr@204 1062
mas01cr@204 1063 initTables(dbName, inFile);
mas01cr@204 1064
mas01cr@204 1065 // For each input vector, find the closest pointNN matching output vectors and report
mas01cr@204 1066 // we use stdout in this stub version
mas01cr@204 1067 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
mas01cr@204 1068 double* query = (double*)(indata+sizeof(int));
mas01cr@204 1069 double* queryCopy = 0;
mas01cr@204 1070
mas01cr@204 1071 if(!(dbH->flags & O2_FLAG_L2NORM) )
mas01cr@204 1072 error("Database must be L2 normed for sequence query","use -l2norm");
mas01cr@204 1073
mas01cr@204 1074 if(verbosity>1) {
mas01cr@204 1075 std::cerr << "performing norms ... "; std::cerr.flush();
mas01cr@204 1076 }
mas01cr@204 1077 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01cr@204 1078
mas01cr@204 1079 // Make a copy of the query
mas01cr@204 1080 queryCopy = new double[numVectors*dbH->dim];
mas01cr@204 1081 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
mas01cr@204 1082 qNorm = new double[numVectors];
mas01cr@204 1083 sNorm = new double[dbVectors];
mas01cr@204 1084 assert(qNorm&&sNorm&&queryCopy&&sequenceLength);
mas01cr@204 1085 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
mas01cr@204 1086 query = queryCopy;
mas01cr@204 1087
mas01cr@204 1088 // Make norm measurements relative to sequenceLength
mas01cr@204 1089 unsigned w = sequenceLength-1;
mas01cr@204 1090 unsigned i,j;
mas01cr@204 1091
mas01cr@204 1092 // Copy the L2 norm values to core to avoid disk random access later on
mas01cr@204 1093 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
mas01cr@204 1094 double* snPtr = sNorm;
mas01cr@204 1095 double* qnPtr = qNorm;
mas01cr@204 1096
mas01cr@204 1097 double *sPower = 0, *qPower = 0;
mas01cr@204 1098 double *spPtr = 0, *qpPtr = 0;
mas01cr@204 1099
mas01cr@204 1100 if (usingPower) {
mas01cr@204 1101 if(!(dbH->flags & O2_FLAG_POWER)) {
mas01cr@204 1102 error("database not power-enabled", dbName);
mas01cr@204 1103 }
mas01cr@204 1104 sPower = new double[dbVectors];
mas01cr@204 1105 spPtr = sPower;
mas01cr@204 1106 memcpy(sPower, powerTable, dbVectors * sizeof(double));
mas01cr@204 1107 }
mas01cr@204 1108
mas01cr@204 1109 for(i=0; i<dbH->numFiles; i++){
mas01cr@204 1110 if(trackTable[i]>=sequenceLength) {
mas01cr@204 1111 sequence_sum(snPtr, trackTable[i], sequenceLength);
mas01cr@204 1112 sequence_sqrt(snPtr, trackTable[i], sequenceLength);
mas01cr@204 1113 if (usingPower) {
mas01cr@204 1114 sequence_sum(spPtr, trackTable[i], sequenceLength);
mas01cr@204 1115 sequence_average(spPtr, trackTable[i], sequenceLength);
mas01cr@204 1116 }
mas01cr@204 1117 }
mas01cr@204 1118 snPtr += trackTable[i];
mas01cr@204 1119 if (usingPower) {
mas01cr@204 1120 spPtr += trackTable[i];
mas01cr@204 1121 }
mas01cr@204 1122 }
mas01cr@204 1123
mas01cr@204 1124 sequence_sum(qnPtr, numVectors, sequenceLength);
mas01cr@204 1125 sequence_sqrt(qnPtr, numVectors, sequenceLength);
mas01cr@204 1126
mas01cr@204 1127 if (usingPower) {
mas01cr@204 1128 qPower = new double[numVectors];
mas01cr@204 1129 qpPtr = qPower;
mas01cr@204 1130 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
mas01cr@204 1131 error("error seeking to data", powerFileName, "lseek");
mas01cr@204 1132 }
mas01cr@204 1133 int count = read(powerfd, qPower, numVectors * sizeof(double));
mas01cr@204 1134 if (count == -1) {
mas01cr@204 1135 error("error reading data", powerFileName, "read");
mas01cr@204 1136 }
mas01cr@204 1137 if ((unsigned) count != numVectors * sizeof(double)) {
mas01cr@204 1138 error("short read", powerFileName);
mas01cr@204 1139 }
mas01cr@204 1140
mas01cr@204 1141 sequence_sum(qpPtr, numVectors, sequenceLength);
mas01cr@204 1142 sequence_average(qpPtr, numVectors, sequenceLength);
mas01cr@204 1143 }
mas01cr@204 1144
mas01cr@204 1145 if(verbosity>1) {
mas01cr@204 1146 std::cerr << "done." << std::endl;
mas01cr@204 1147 }
mas01cr@204 1148
mas01cr@204 1149 if(verbosity>1) {
mas01cr@204 1150 std::cerr << "matching tracks..." << std::endl;
mas01cr@204 1151 }
mas01cr@204 1152
mas01cr@204 1153 assert(pointNN>0 && pointNN<=O2_MAXNN);
mas01cr@204 1154 assert(trackNN>0 && trackNN<=O2_MAXNN);
mas01cr@204 1155
mas01cr@204 1156 // Make temporary dynamic memory for results
mas01cr@204 1157 double trackDistances[trackNN];
mas01cr@204 1158 unsigned trackIDs[trackNN];
mas01cr@204 1159 unsigned trackQIndexes[trackNN];
mas01cr@204 1160 unsigned trackSIndexes[trackNN];
mas01cr@204 1161
mas01cr@204 1162 double distances[pointNN];
mas01cr@204 1163 unsigned qIndexes[pointNN];
mas01cr@204 1164 unsigned sIndexes[pointNN];
mas01cr@204 1165
mas01cr@204 1166
mas01cr@204 1167 unsigned k,l,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
mas01cr@204 1168 double thisDist;
mas01cr@204 1169
mas01cr@204 1170 for(k=0; k<pointNN; k++){
mas01cr@204 1171 distances[k]=0.0;
mas01cr@204 1172 qIndexes[k]=~0;
mas01cr@204 1173 sIndexes[k]=~0;
mas01cr@204 1174 }
mas01cr@204 1175
mas01cr@204 1176 for(k=0; k<trackNN; k++){
mas01cr@204 1177 trackDistances[k]=0.0;
mas01cr@204 1178 trackQIndexes[k]=~0;
mas01cr@204 1179 trackSIndexes[k]=~0;
mas01cr@204 1180 trackIDs[k]=~0;
mas01cr@204 1181 }
mas01cr@204 1182
mas01cr@204 1183 // Timestamp and durations processing
mas01cr@204 1184 double meanQdur = 0;
mas01cr@204 1185 double *timesdata = 0;
mas01cr@204 1186 double *querydurs = 0;
mas01cr@204 1187 double *meanDBdur = 0;
mas01cr@204 1188
mas01cr@204 1189 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
mas01cr@204 1190 std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl;
mas01cr@204 1191 usingTimes=0;
mas01cr@204 1192 }
mas01cr@204 1193
mas01cr@204 1194 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
mas01cr@204 1195 std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl;
mas01cr@204 1196
mas01cr@204 1197 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
mas01cr@204 1198 timesdata = new double[2*numVectors];
mas01cr@204 1199 querydurs = new double[numVectors];
mas01cr@204 1200
mas01cr@204 1201 insertTimeStamps(numVectors, timesFile, timesdata);
mas01cr@204 1202 // Calculate durations of points
mas01cr@204 1203 for(k=0; k<numVectors-1; k++){
mas01cr@204 1204 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
mas01cr@204 1205 meanQdur += querydurs[k];
mas01cr@204 1206 }
mas01cr@204 1207 meanQdur/=k;
mas01cr@204 1208 if(verbosity>1) {
mas01cr@204 1209 std::cerr << "mean query file duration: " << meanQdur << std::endl;
mas01cr@204 1210 }
mas01cr@204 1211 meanDBdur = new double[dbH->numFiles];
mas01cr@204 1212 assert(meanDBdur);
mas01cr@204 1213 for(k=0; k<dbH->numFiles; k++){
mas01cr@204 1214 meanDBdur[k]=0.0;
mas01cr@204 1215 for(j=0; j<trackTable[k]-1 ; j++) {
mas01cr@204 1216 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j];
mas01cr@204 1217 }
mas01cr@204 1218 meanDBdur[k]/=j;
mas01cr@204 1219 }
mas01cr@204 1220 }
mas01cr@204 1221
mas01cr@204 1222 if(usingQueryPoint)
mas01cr@204 1223 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
mas01cr@204 1224 error("queryPoint > numVectors-wL+1 in query");
mas01cr@204 1225 else{
mas01cr@204 1226 if(verbosity>1) {
mas01cr@204 1227 std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush();
mas01cr@204 1228 }
mas01cr@204 1229 query = query + queryPoint*dbH->dim;
mas01cr@204 1230 qnPtr = qnPtr + queryPoint;
mas01cr@204 1231 if (usingPower) {
mas01cr@204 1232 qpPtr = qpPtr + queryPoint;
mas01cr@204 1233 }
mas01cr@204 1234 numVectors=wL;
mas01cr@204 1235 }
mas01cr@204 1236
mas01cr@204 1237 double ** D = 0; // Differences query and target
mas01cr@204 1238 double ** DD = 0; // Matched filter distance
mas01cr@204 1239
mas01cr@204 1240 D = new double*[numVectors];
mas01cr@204 1241 assert(D);
mas01cr@204 1242 DD = new double*[numVectors];
mas01cr@204 1243 assert(DD);
mas01cr@204 1244
mas01cr@204 1245 gettimeofday(&tv1, NULL);
mas01cr@204 1246 unsigned processedTracks = 0;
mas01cr@204 1247 unsigned successfulTracks=0;
mas01cr@204 1248
mas01cr@204 1249 double* qp;
mas01cr@204 1250 double* sp;
mas01cr@204 1251 double* dp;
mas01cr@204 1252
mas01cr@204 1253 // build track offset table
mas01cr@204 1254 off_t *trackOffsetTable = new off_t[dbH->numFiles];
mas01cr@204 1255 unsigned cumTrack=0;
mas01cr@204 1256 off_t trackIndexOffset;
mas01cr@204 1257 for(k=0; k<dbH->numFiles;k++){
mas01cr@204 1258 trackOffsetTable[k]=cumTrack;
mas01cr@204 1259 cumTrack+=trackTable[k]*dbH->dim;
mas01cr@204 1260 }
mas01cr@204 1261
mas01cr@204 1262 char nextKey [MAXSTR];
mas01cr@204 1263
mas01cr@204 1264 // chi^2 statistics
mas01cr@204 1265 double sampleCount = 0;
mas01cr@204 1266 double sampleSum = 0;
mas01cr@204 1267 double logSampleSum = 0;
mas01cr@204 1268 double minSample = 1e9;
mas01cr@204 1269 double maxSample = 0;
mas01cr@204 1270
mas01cr@204 1271 // Track loop
mas01cr@204 1272 size_t data_buffer_size = 0;
mas01cr@204 1273 double *data_buffer = 0;
mas01cr@204 1274 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01cr@204 1275
mas01cr@204 1276 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
mas01cr@204 1277
mas01cr@204 1278 trackOffset = trackOffsetTable[track]; // numDoubles offset
mas01cr@204 1279
mas01cr@204 1280 // get trackID from file if using a control file
mas01cr@204 1281 if(trackFile) {
mas01cr@204 1282 trackFile->getline(nextKey,MAXSTR);
mas01cr@204 1283 if(!trackFile->eof()) {
mas01cr@204 1284 track = getKeyPos(nextKey);
mas01cr@204 1285 trackOffset = trackOffsetTable[track];
mas01cr@204 1286 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
mas01cr@204 1287 } else {
mas01cr@204 1288 break;
mas01cr@204 1289 }
mas01cr@204 1290 }
mas01cr@204 1291
mas01cr@204 1292 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
mas01cr@204 1293
mas01cr@204 1294 if(sequenceLength<=trackTable[track]){ // test for short sequences
mas01cr@204 1295
mas01cr@204 1296 if(verbosity>7) {
mas01cr@204 1297 std::cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";std::cerr.flush();
mas01cr@204 1298 }
mas01cr@204 1299
mas01cr@204 1300 // Sum products matrix
mas01cr@204 1301 for(j=0; j<numVectors;j++){
mas01cr@204 1302 D[j]=new double[trackTable[track]];
mas01cr@204 1303 assert(D[j]);
mas01cr@204 1304
mas01cr@204 1305 }
mas01cr@204 1306
mas01cr@204 1307 // Matched filter matrix
mas01cr@204 1308 for(j=0; j<numVectors;j++){
mas01cr@204 1309 DD[j]=new double[trackTable[track]];
mas01cr@204 1310 assert(DD[j]);
mas01cr@204 1311 }
mas01cr@204 1312
mas01cr@204 1313 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
mas01cr@204 1314 if(data_buffer) {
mas01cr@204 1315 free(data_buffer);
mas01cr@204 1316 }
mas01cr@204 1317 {
mas01cr@204 1318 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
mas01cr@204 1319 void *tmp = malloc(data_buffer_size);
mas01cr@204 1320 if (tmp == NULL) {
mas01cr@204 1321 error("error allocating data buffer");
mas01cr@204 1322 }
mas01cr@204 1323 data_buffer = (double *) tmp;
mas01cr@204 1324 }
mas01cr@204 1325 }
mas01cr@204 1326
mas01cr@204 1327 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
mas01cr@204 1328
mas01cr@204 1329 // Dot product
mas01cr@204 1330 for(j=0; j<numVectors; j++)
mas01cr@204 1331 for(k=0; k<trackTable[track]; k++){
mas01cr@204 1332 qp=query+j*dbH->dim;
mas01cr@204 1333 sp=data_buffer+k*dbH->dim;
mas01cr@204 1334 DD[j][k]=0.0; // Initialize matched filter array
mas01cr@204 1335 dp=&D[j][k]; // point to correlation cell j,k
mas01cr@204 1336 *dp=0.0; // initialize correlation cell
mas01cr@204 1337 l=dbH->dim; // size of vectors
mas01cr@204 1338 while(l--)
mas01cr@204 1339 *dp+=*qp++**sp++;
mas01cr@204 1340 }
mas01cr@204 1341
mas01cr@204 1342 // Matched Filter
mas01cr@204 1343 // HOP SIZE == 1
mas01cr@204 1344 double* spd;
mas01cr@204 1345 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
mas01cr@204 1346 for(w=0; w<wL; w++)
mas01cr@204 1347 for(j=0; j<numVectors-w; j++){
mas01cr@204 1348 sp=DD[j];
mas01cr@204 1349 spd=D[j+w]+w;
mas01cr@204 1350 k=trackTable[track]-w;
mas01cr@204 1351 while(k--)
mas01cr@204 1352 *sp+++=*spd++;
mas01cr@204 1353 }
mas01cr@204 1354 }
mas01cr@204 1355
mas01cr@204 1356 else{ // HOP_SIZE != 1
mas01cr@204 1357 for(w=0; w<wL; w++)
mas01cr@204 1358 for(j=0; j<numVectors-w; j+=HOP_SIZE){
mas01cr@204 1359 sp=DD[j];
mas01cr@204 1360 spd=D[j+w]+w;
mas01cr@204 1361 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
mas01cr@204 1362 *sp+=*spd;
mas01cr@204 1363 sp+=HOP_SIZE;
mas01cr@204 1364 spd+=HOP_SIZE;
mas01cr@204 1365 }
mas01cr@204 1366 }
mas01cr@204 1367 }
mas01cr@204 1368
mas01cr@204 1369 if(verbosity>3 && usingTimes) {
mas01cr@204 1370 std::cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << std::endl;
mas01cr@204 1371 std::cerr.flush();
mas01cr@204 1372 }
mas01cr@204 1373
mas01cr@204 1374 if(!usingTimes ||
mas01cr@204 1375 (usingTimes
mas01cr@204 1376 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
mas01cr@204 1377
mas01cr@204 1378 if(verbosity>3 && usingTimes) {
mas01cr@204 1379 std::cerr << "within duration tolerance." << std::endl;
mas01cr@204 1380 std::cerr.flush();
mas01cr@204 1381 }
mas01cr@204 1382
mas01cr@204 1383 // Search for minimum distance by shingles (concatenated vectors)
mas01cr@204 1384 for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
mas01cr@204 1385 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
mas01cr@204 1386 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
mas01cr@204 1387 if(verbosity>9) {
mas01cr@204 1388 std::cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << std::endl;
mas01cr@204 1389 }
mas01cr@204 1390 // Gather chi^2 statistics
mas01cr@204 1391 if(thisDist<minSample)
mas01cr@204 1392 minSample=thisDist;
mas01cr@204 1393 else if(thisDist>maxSample)
mas01cr@204 1394 maxSample=thisDist;
mas01cr@204 1395 if(thisDist>1e-9){
mas01cr@204 1396 sampleCount++;
mas01cr@204 1397 sampleSum+=thisDist;
mas01cr@204 1398 logSampleSum+=log(thisDist);
mas01cr@204 1399 }
mas01cr@204 1400
mas01cr@204 1401 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]);
mas01cr@204 1402 // Power test
mas01cr@204 1403 if (usingPower) {
mas01cr@204 1404 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) {
mas01cr@204 1405 thisDist = 1000000.0;
mas01cr@204 1406 }
mas01cr@204 1407 }
mas01cr@204 1408
mas01cr@204 1409 if(thisDist>=0 && thisDist<=radius){
mas01cr@204 1410 distances[0]++; // increment count
mas01cr@204 1411 break; // only need one track point per query point
mas01cr@204 1412 }
mas01cr@204 1413 }
mas01cr@204 1414 // How many points were below threshold ?
mas01cr@204 1415 thisDist=distances[0];
mas01cr@204 1416
mas01cr@204 1417 // Let's see the distances then...
mas01cr@204 1418 if(verbosity>3) {
mas01cr@204 1419 std::cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << std::endl;
mas01cr@204 1420 }
mas01cr@204 1421
mas01cr@204 1422 // All the track stuff goes here
mas01cr@204 1423 n=trackNN;
mas01cr@204 1424 while(n--){
mas01cr@204 1425 if(thisDist>trackDistances[n]){
mas01cr@204 1426 if((n==0 || thisDist<=trackDistances[n-1])){
mas01cr@204 1427 // Copy all values above up the queue
mas01cr@204 1428 for( l=trackNN-1 ; l > n ; l--){
mas01cr@204 1429 trackDistances[l]=trackDistances[l-1];
mas01cr@204 1430 trackQIndexes[l]=trackQIndexes[l-1];
mas01cr@204 1431 trackSIndexes[l]=trackSIndexes[l-1];
mas01cr@204 1432 trackIDs[l]=trackIDs[l-1];
mas01cr@204 1433 }
mas01cr@204 1434 trackDistances[n]=thisDist;
mas01cr@204 1435 trackQIndexes[n]=qIndexes[0];
mas01cr@204 1436 trackSIndexes[n]=sIndexes[0];
mas01cr@204 1437 successfulTracks++;
mas01cr@204 1438 trackIDs[n]=track;
mas01cr@204 1439 break;
mas01cr@204 1440 }
mas01cr@204 1441 }
mas01cr@204 1442 else
mas01cr@204 1443 break;
mas01cr@204 1444 }
mas01cr@204 1445 } // Duration match
mas01cr@204 1446
mas01cr@204 1447 // Clean up current track
mas01cr@204 1448 if(D!=NULL){
mas01cr@204 1449 for(j=0; j<numVectors; j++)
mas01cr@204 1450 delete[] D[j];
mas01cr@204 1451 }
mas01cr@204 1452
mas01cr@204 1453 if(DD!=NULL){
mas01cr@204 1454 for(j=0; j<numVectors; j++)
mas01cr@204 1455 delete[] DD[j];
mas01cr@204 1456 }
mas01cr@204 1457 }
mas01cr@204 1458 // per-track reset array values
mas01cr@204 1459 for(unsigned k=0; k<pointNN; k++){
mas01cr@204 1460 distances[k]=0.0;
mas01cr@204 1461 qIndexes[k]=~0;
mas01cr@204 1462 sIndexes[k]=~0;
mas01cr@204 1463 }
mas01cr@204 1464 }
mas01cr@204 1465
mas01cr@204 1466 free(data_buffer);
mas01cr@204 1467
mas01cr@204 1468 gettimeofday(&tv2,NULL);
mas01cr@204 1469 if(verbosity>1) {
mas01cr@204 1470 std::cerr << std::endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
mas01cr@204 1471 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl;
mas01cr@204 1472 std::cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
mas01cr@204 1473 << " minSample: " << minSample << " maxSample: " << maxSample << std::endl;
mas01cr@204 1474 }
mas01cr@204 1475
mas01cr@204 1476 if(adbQueryResponse==0){
mas01cr@204 1477 if(verbosity>1) {
mas01cr@204 1478 std::cerr<<std::endl;
mas01cr@204 1479 }
mas01cr@204 1480 // Output answer
mas01cr@204 1481 // Loop over nearest neighbours
mas01cr@204 1482 for(k=0; k < std::min(trackNN,successfulTracks); k++)
mas01cr@204 1483 std::cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << std::endl;
mas01cr@204 1484 }
mas01cr@204 1485 else{ // Process Web Services Query
mas01cr@204 1486 int listLen = std::min(trackNN, processedTracks);
mas01cr@204 1487 adbQueryResponse->result.__sizeRlist=listLen;
mas01cr@204 1488 adbQueryResponse->result.__sizeDist=listLen;
mas01cr@204 1489 adbQueryResponse->result.__sizeQpos=listLen;
mas01cr@204 1490 adbQueryResponse->result.__sizeSpos=listLen;
mas01cr@204 1491 adbQueryResponse->result.Rlist= new char*[listLen];
mas01cr@204 1492 adbQueryResponse->result.Dist = new double[listLen];
mas01cr@204 1493 adbQueryResponse->result.Qpos = new unsigned int[listLen];
mas01cr@204 1494 adbQueryResponse->result.Spos = new unsigned int[listLen];
mas01cr@204 1495 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
mas01cr@204 1496 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
mas01cr@204 1497 adbQueryResponse->result.Dist[k]=trackDistances[k];
mas01cr@204 1498 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
mas01cr@204 1499 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
mas01cr@204 1500 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
mas01cr@204 1501 }
mas01cr@204 1502 }
mas01cr@204 1503
mas01cr@204 1504 // Clean up
mas01cr@204 1505 if(trackOffsetTable)
mas01cr@204 1506 delete[] trackOffsetTable;
mas01cr@204 1507 if(queryCopy)
mas01cr@204 1508 delete[] queryCopy;
mas01cr@204 1509 if(qNorm)
mas01cr@204 1510 delete[] qNorm;
mas01cr@204 1511 if(sNorm)
mas01cr@204 1512 delete[] sNorm;
mas01cr@204 1513 if(qPower)
mas01cr@204 1514 delete[] qPower;
mas01cr@204 1515 if(sPower)
mas01cr@204 1516 delete[] sPower;
mas01cr@204 1517 if(D)
mas01cr@204 1518 delete[] D;
mas01cr@204 1519 if(DD)
mas01cr@204 1520 delete[] DD;
mas01cr@204 1521 if(timesdata)
mas01cr@204 1522 delete[] timesdata;
mas01cr@204 1523 if(querydurs)
mas01cr@204 1524 delete[] querydurs;
mas01cr@204 1525 if(meanDBdur)
mas01cr@204 1526 delete[] meanDBdur;
mas01cr@204 1527 }
mas01cr@204 1528
mas01cr@204 1529 // Unit norm block of features
mas01cr@204 1530 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){
mas01cr@204 1531 unsigned d;
mas01cr@204 1532 double L2, *p;
mas01cr@204 1533 if(verbosity>2) {
mas01cr@204 1534 std::cerr << "norming " << n << " vectors...";std::cerr.flush();
mas01cr@204 1535 }
mas01cr@204 1536 while(n--){
mas01cr@204 1537 p=X;
mas01cr@204 1538 L2=0.0;
mas01cr@204 1539 d=dim;
mas01cr@204 1540 while(d--){
mas01cr@204 1541 L2+=*p**p;
mas01cr@204 1542 p++;
mas01cr@204 1543 }
mas01cr@204 1544 /* L2=sqrt(L2);*/
mas01cr@204 1545 if(qNorm)
mas01cr@204 1546 *qNorm++=L2;
mas01cr@204 1547 /*
mas01cr@204 1548 oneOverL2 = 1.0/L2;
mas01cr@204 1549 d=dim;
mas01cr@204 1550 while(d--){
mas01cr@204 1551 *X*=oneOverL2;
mas01cr@204 1552 X++;
mas01cr@204 1553 */
mas01cr@204 1554 X+=dim;
mas01cr@204 1555 }
mas01cr@204 1556 if(verbosity>2) {
mas01cr@204 1557 std::cerr << "done..." << std::endl;
mas01cr@204 1558 }
mas01cr@204 1559 }