mas01cr@204: #include "audioDB.h" mas01cr@204: mas01cr@204: bool audioDB::powers_acceptable(double p1, double p2) { mas01cr@204: if (use_absolute_threshold) { mas01cr@204: if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) { mas01cr@204: return false; mas01cr@204: } mas01cr@204: } mas01cr@204: if (use_relative_threshold) { mas01cr@204: if (fabs(p1-p2) > fabs(relative_threshold)) { mas01cr@204: return false; mas01cr@204: } mas01cr@204: } mas01cr@204: return true; mas01cr@204: } mas01cr@204: mas01cr@206: void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { mas01cr@206: switch(queryType) { mas01cr@204: case O2_SEQUENCE_QUERY: mas01cr@204: if(radius==0) mas01cr@204: trackSequenceQueryNN(dbName, inFile, adbQueryResponse); mas01cr@204: else mas01cr@204: trackSequenceQueryRad(dbName, inFile, adbQueryResponse); mas01cr@204: break; mas01cr@204: default: mas01cr@204: error("unrecognized queryType in query()"); mas01cr@204: } mas01cr@204: } mas01cr@204: mas01cr@206: // return ordinal position of key in keyTable mas01cr@204: unsigned audioDB::getKeyPos(char* key){ mas01cr@204: for(unsigned k=0; knumFiles; k++) mas01cr@204: if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0) mas01cr@204: return k; mas01cr@204: error("Key not found",key); mas01cr@204: return O2_ERR_KEYNOTFOUND; mas01cr@204: } mas01cr@204: mas01cr@204: // This is a common pattern in sequence queries: what we are doing is mas01cr@204: // taking a window of length seqlen over a buffer of length length, mas01cr@204: // and placing the sum of the elements in that window in the first mas01cr@204: // element of the window: thus replacing all but the last seqlen mas01cr@204: // elements in the buffer the corresponding windowed sum. mas01cr@204: void audioDB::sequence_sum(double *buffer, int length, int seqlen) { mas01cr@204: double tmp1, tmp2, *ps; mas01cr@204: int j, w; mas01cr@204: mas01cr@204: tmp1 = *buffer; mas01cr@204: j = 1; mas01cr@204: w = seqlen - 1; mas01cr@204: while(w--) { mas01cr@204: *buffer += buffer[j++]; mas01cr@204: } mas01cr@204: ps = buffer + 1; mas01cr@204: w = length - seqlen; // +1 - 1 mas01cr@204: while(w--) { mas01cr@204: tmp2 = *ps; mas01cr@204: *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1); mas01cr@204: tmp1 = tmp2; mas01cr@204: ps++; mas01cr@204: } mas01cr@204: } mas01cr@204: mas01cr@204: void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) { mas01cr@204: int w = length - seqlen + 1; mas01cr@204: while(w--) { mas01cr@204: *buffer = sqrt(*buffer); mas01cr@204: buffer++; mas01cr@204: } mas01cr@204: } mas01cr@204: mas01cr@204: void audioDB::sequence_average(double *buffer, int length, int seqlen) { mas01cr@204: int w = length - seqlen + 1; mas01cr@204: while(w--) { mas01cr@204: *buffer /= seqlen; mas01cr@204: buffer++; mas01cr@204: } mas01cr@204: } mas01cr@204: mas01cr@208: void audioDB::initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) { mas01cr@208: unsigned int j, k, l, w; mas01cr@208: double *dp, *qp, *sp; mas01cr@208: mas01cr@208: const unsigned HOP_SIZE = sequenceHop; mas01cr@208: const unsigned wL = sequenceLength; mas01cr@208: mas01cr@208: for(j = 0; j < numVectors; j++) { mas01cr@208: // Sum products matrix mas01cr@208: D[j] = new double[trackTable[track]]; mas01cr@208: assert(D[j]); mas01cr@208: // Matched filter matrix mas01cr@208: DD[j]=new double[trackTable[track]]; mas01cr@208: assert(DD[j]); mas01cr@208: } mas01cr@208: mas01cr@208: // Dot product mas01cr@208: for(j = 0; j < numVectors; j++) mas01cr@208: for(k = 0; k < trackTable[track]; k++){ mas01cr@208: qp = query + j * dbH->dim; mas01cr@208: sp = data_buffer + k * dbH->dim; mas01cr@208: DD[j][k] = 0.0; // Initialize matched filter array mas01cr@208: dp = &D[j][k]; // point to correlation cell j,k mas01cr@208: *dp = 0.0; // initialize correlation cell mas01cr@208: l = dbH->dim; // size of vectors mas01cr@208: while(l--) mas01cr@208: *dp += *qp++ * *sp++; mas01cr@208: } mas01cr@208: mas01cr@208: // Matched Filter mas01cr@208: // HOP SIZE == 1 mas01cr@208: double* spd; mas01cr@208: if(HOP_SIZE == 1) { // HOP_SIZE = shingleHop mas01cr@208: for(w = 0; w < wL; w++) mas01cr@208: for(j = 0; j < numVectors - w; j++) { mas01cr@208: sp = DD[j]; mas01cr@208: spd = D[j+w] + w; mas01cr@208: k = trackTable[track] - w; mas01cr@208: while(k--) mas01cr@208: *sp++ += *spd++; mas01cr@208: } mas01cr@208: } else { // HOP_SIZE != 1 mas01cr@208: for(w = 0; w < wL; w++) mas01cr@208: for(j = 0; j < numVectors - w; j += HOP_SIZE) { mas01cr@208: sp = DD[j]; mas01cr@208: spd = D[j+w]+w; mas01cr@208: for(k = 0; k < trackTable[track] - w; k += HOP_SIZE) { mas01cr@208: *sp += *spd; mas01cr@208: sp += HOP_SIZE; mas01cr@208: spd += HOP_SIZE; mas01cr@208: } mas01cr@208: } mas01cr@208: } mas01cr@208: } mas01cr@208: mas01cr@204: void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ mas01cr@204: mas01cr@204: initTables(dbName, inFile); mas01cr@204: mas01cr@204: // For each input vector, find the closest pointNN matching output vectors and report mas01cr@204: // we use stdout in this stub version mas01cr@204: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01cr@204: double* query = (double*)(indata+sizeof(int)); mas01cr@204: double* queryCopy = 0; mas01cr@204: mas01cr@204: if(!(dbH->flags & O2_FLAG_L2NORM) ) mas01cr@204: error("Database must be L2 normed for sequence query","use -L2NORM"); mas01cr@204: mas01cr@204: if(numVectors1) { mas01cr@204: std::cerr << "performing norms ... "; std::cerr.flush(); mas01cr@204: } mas01cr@204: unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); mas01cr@204: mas01cr@204: // Make a copy of the query mas01cr@204: queryCopy = new double[numVectors*dbH->dim]; mas01cr@204: memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); mas01cr@204: qNorm = new double[numVectors]; mas01cr@204: sNorm = new double[dbVectors]; mas01cr@204: assert(qNorm&&sNorm&&queryCopy&&sequenceLength); mas01cr@204: unitNorm(queryCopy, dbH->dim, numVectors, qNorm); mas01cr@204: query = queryCopy; mas01cr@204: mas01cr@204: // Make norm measurements relative to sequenceLength mas01cr@204: unsigned i,j; mas01cr@204: mas01cr@204: // Copy the L2 norm values to core to avoid disk random access later on mas01cr@204: memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); mas01cr@204: double* qnPtr = qNorm; mas01cr@204: double* snPtr = sNorm; mas01cr@204: mas01cr@204: double *sPower = 0, *qPower = 0; mas01cr@204: double *spPtr = 0, *qpPtr = 0; mas01cr@204: mas01cr@204: if (usingPower) { mas01cr@204: if (!(dbH->flags & O2_FLAG_POWER)) { mas01cr@204: error("database not power-enabled", dbName); mas01cr@204: } mas01cr@204: sPower = new double[dbVectors]; mas01cr@204: spPtr = sPower; mas01cr@204: memcpy(sPower, powerTable, dbVectors * sizeof(double)); mas01cr@204: } mas01cr@204: mas01cr@204: for(i=0; inumFiles; i++){ mas01cr@204: if(trackTable[i]>=sequenceLength) { mas01cr@204: sequence_sum(snPtr, trackTable[i], sequenceLength); mas01cr@204: sequence_sqrt(snPtr, trackTable[i], sequenceLength); mas01cr@204: mas01cr@204: if (usingPower) { mas01cr@204: sequence_sum(spPtr, trackTable[i], sequenceLength); mas01cr@204: sequence_average(spPtr, trackTable[i], sequenceLength); mas01cr@204: } mas01cr@204: } mas01cr@204: snPtr += trackTable[i]; mas01cr@204: if (usingPower) { mas01cr@204: spPtr += trackTable[i]; mas01cr@204: } mas01cr@204: } mas01cr@204: mas01cr@204: sequence_sum(qnPtr, numVectors, sequenceLength); mas01cr@204: sequence_sqrt(qnPtr, numVectors, sequenceLength); mas01cr@204: mas01cr@204: if (usingPower) { mas01cr@204: qPower = new double[numVectors]; mas01cr@204: qpPtr = qPower; mas01cr@204: if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { mas01cr@204: error("error seeking to data", powerFileName, "lseek"); mas01cr@204: } mas01cr@204: int count = read(powerfd, qPower, numVectors * sizeof(double)); mas01cr@204: if (count == -1) { mas01cr@204: error("error reading data", powerFileName, "read"); mas01cr@204: } mas01cr@204: if ((unsigned) count != numVectors * sizeof(double)) { mas01cr@204: error("short read", powerFileName); mas01cr@204: } mas01cr@204: mas01cr@204: sequence_sum(qpPtr, numVectors, sequenceLength); mas01cr@204: sequence_average(qpPtr, numVectors, sequenceLength); mas01cr@204: } mas01cr@204: mas01cr@204: if(verbosity>1) { mas01cr@204: std::cerr << "done." << std::endl; mas01cr@204: } mas01cr@204: mas01cr@204: if(verbosity>1) { mas01cr@204: std::cerr << "matching tracks..." << std::endl; mas01cr@204: } mas01cr@204: mas01cr@204: assert(pointNN>0 && pointNN<=O2_MAXNN); mas01cr@204: assert(trackNN>0 && trackNN<=O2_MAXNN); mas01cr@204: mas01cr@204: // Make temporary dynamic memory for results mas01cr@204: double trackDistances[trackNN]; mas01cr@204: unsigned trackIDs[trackNN]; mas01cr@204: unsigned trackQIndexes[trackNN]; mas01cr@204: unsigned trackSIndexes[trackNN]; mas01cr@204: mas01cr@204: double distances[pointNN]; mas01cr@204: unsigned qIndexes[pointNN]; mas01cr@204: unsigned sIndexes[pointNN]; mas01cr@204: mas01cr@204: mas01cr@204: unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; mas01cr@204: double thisDist; mas01cr@204: mas01cr@204: for(k=0; kflags & O2_FLAG_TIMES)){ mas01cr@204: std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl; mas01cr@204: usingTimes=0; mas01cr@204: } mas01cr@204: mas01cr@204: else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01cr@204: std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl; mas01cr@204: mas01cr@204: else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ mas01cr@204: timesdata = new double[2*numVectors]; mas01cr@204: querydurs = new double[numVectors]; mas01cr@204: mas01cr@204: insertTimeStamps(numVectors, timesFile, timesdata); mas01cr@204: // Calculate durations of points mas01cr@204: for(k=0; k1) { mas01cr@204: std::cerr << "mean query file duration: " << meanQdur << std::endl; mas01cr@204: } mas01cr@204: meanDBdur = new double[dbH->numFiles]; mas01cr@204: assert(meanDBdur); mas01cr@204: for(k=0; knumFiles; k++){ mas01cr@204: meanDBdur[k]=0.0; mas01cr@204: for(j=0; jnumVectors || queryPoint>numVectors-wL+1) mas01cr@204: error("queryPoint > numVectors-wL+1 in query"); mas01cr@204: else{ mas01cr@204: if(verbosity>1) { mas01cr@204: std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush(); mas01cr@204: } mas01cr@204: query = query + queryPoint * dbH->dim; mas01cr@204: qnPtr = qnPtr + queryPoint; mas01cr@204: if (usingPower) { mas01cr@204: qpPtr = qpPtr + queryPoint; mas01cr@204: } mas01cr@204: numVectors=wL; mas01cr@204: } mas01cr@204: mas01cr@204: double ** D = 0; // Differences query and target mas01cr@204: double ** DD = 0; // Matched filter distance mas01cr@204: mas01cr@204: D = new double*[numVectors]; mas01cr@204: assert(D); mas01cr@204: DD = new double*[numVectors]; mas01cr@204: assert(DD); mas01cr@204: mas01cr@204: gettimeofday(&tv1, NULL); mas01cr@204: unsigned processedTracks = 0; mas01cr@204: unsigned successfulTracks=0; mas01cr@204: mas01cr@204: // build track offset table mas01cr@204: off_t *trackOffsetTable = new off_t[dbH->numFiles]; mas01cr@204: unsigned cumTrack=0; mas01cr@204: off_t trackIndexOffset; mas01cr@204: for(k=0; knumFiles;k++){ mas01cr@204: trackOffsetTable[k]=cumTrack; mas01cr@204: cumTrack+=trackTable[k]*dbH->dim; mas01cr@204: } mas01cr@204: mas01cr@204: char nextKey [MAXSTR]; mas01cr@204: mas01cr@204: // chi^2 statistics mas01cr@204: double sampleCount = 0; mas01cr@204: double sampleSum = 0; mas01cr@204: double logSampleSum = 0; mas01cr@204: double minSample = 1e9; mas01cr@204: double maxSample = 0; mas01cr@204: mas01cr@204: // Track loop mas01cr@204: size_t data_buffer_size = 0; mas01cr@204: double *data_buffer = 0; mas01cr@204: lseek(dbfid, dbH->dataOffset, SEEK_SET); mas01cr@204: mas01cr@204: for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) { mas01cr@204: mas01cr@204: trackOffset = trackOffsetTable[track]; // numDoubles offset mas01cr@204: mas01cr@204: // get trackID from file if using a control file mas01cr@204: if(trackFile) { mas01cr@204: trackFile->getline(nextKey,MAXSTR); mas01cr@204: if(!trackFile->eof()) { mas01cr@204: track = getKeyPos(nextKey); mas01cr@204: trackOffset = trackOffsetTable[track]; mas01cr@204: lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); mas01cr@204: } else { mas01cr@204: break; mas01cr@204: } mas01cr@204: } mas01cr@204: mas01cr@204: trackIndexOffset=trackOffset/dbH->dim; // numVectors offset mas01cr@204: mas01cr@204: if(sequenceLength<=trackTable[track]){ // test for short sequences mas01cr@204: mas01cr@204: if(verbosity>7) { mas01cr@204: std::cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";std::cerr.flush(); mas01cr@204: } mas01cr@204: mas01cr@204: if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) { mas01cr@204: if(data_buffer) { mas01cr@204: free(data_buffer); mas01cr@204: } mas01cr@204: { mas01cr@204: data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; mas01cr@204: void *tmp = malloc(data_buffer_size); mas01cr@204: if (tmp == NULL) { mas01cr@204: error("error allocating data buffer"); mas01cr@204: } mas01cr@204: data_buffer = (double *) tmp; mas01cr@204: } mas01cr@204: } mas01cr@204: mas01cr@204: read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); mas01cr@204: mas01cr@208: initialize_arrays(track, numVectors, query, data_buffer, D, DD); mas01cr@207: mas01cr@204: if(verbosity>3 && usingTimes) { mas01cr@204: std::cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << std::endl; mas01cr@204: std::cerr.flush(); mas01cr@204: } mas01cr@204: mas01cr@204: if(!usingTimes || mas01cr@204: (usingTimes mas01cr@204: && fabs(meanDBdur[track]-meanQdur)3 && usingTimes) { mas01cr@204: std::cerr << "within duration tolerance." << std::endl; mas01cr@204: std::cerr.flush(); mas01cr@204: } mas01cr@204: mas01cr@204: // Search for minimum distance by shingles (concatenated vectors) mas01cr@204: for(j=0;j<=numVectors-wL;j+=HOP_SIZE) mas01cr@204: for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ mas01cr@204: thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; mas01cr@204: if(verbosity>9) { mas01cr@204: std::cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << std::endl; mas01cr@204: } mas01cr@204: // Gather chi^2 statistics mas01cr@204: if(thisDistmaxSample) mas01cr@204: maxSample=thisDist; mas01cr@204: if(thisDist>1e-9){ mas01cr@204: sampleCount++; mas01cr@204: sampleSum+=thisDist; mas01cr@204: logSampleSum+=log(thisDist); mas01cr@204: } mas01cr@204: mas01cr@204: // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); mas01cr@204: // Power test mas01cr@204: if (usingPower) { mas01cr@204: if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) { mas01cr@204: thisDist = 1000000.0; mas01cr@204: } mas01cr@204: } mas01cr@204: mas01cr@204: // k-NN match algorithm mas01cr@204: m=pointNN; mas01cr@204: while(m--){ mas01cr@204: if(thisDist<=distances[m]) mas01cr@204: if(m==0 || thisDist>=distances[m-1]){ mas01cr@204: // Shuffle distances up the list mas01cr@204: for(l=pointNN-1; l>m; l--){ mas01cr@204: distances[l]=distances[l-1]; mas01cr@204: qIndexes[l]=qIndexes[l-1]; mas01cr@204: sIndexes[l]=sIndexes[l-1]; mas01cr@204: } mas01cr@204: distances[m]=thisDist; mas01cr@204: if(usingQueryPoint) mas01cr@204: qIndexes[m]=queryPoint; mas01cr@204: else mas01cr@204: qIndexes[m]=j; mas01cr@204: sIndexes[m]=k; mas01cr@204: break; mas01cr@204: } mas01cr@204: } mas01cr@204: } mas01cr@204: // Calculate the mean of the N-Best matches mas01cr@204: thisDist=0.0; mas01cr@204: for(m=0; m3) { mas01cr@204: std::cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << std::endl; mas01cr@204: } mas01cr@204: mas01cr@204: mas01cr@204: // All the track stuff goes here mas01cr@204: n=trackNN; mas01cr@204: while(n--){ mas01cr@204: if(thisDist<=trackDistances[n]){ mas01cr@204: if((n==0 || thisDist>=trackDistances[n-1])){ mas01cr@204: // Copy all values above up the queue mas01cr@204: for( l=trackNN-1 ; l > n ; l--){ mas01cr@204: trackDistances[l]=trackDistances[l-1]; mas01cr@204: trackQIndexes[l]=trackQIndexes[l-1]; mas01cr@204: trackSIndexes[l]=trackSIndexes[l-1]; mas01cr@204: trackIDs[l]=trackIDs[l-1]; mas01cr@204: } mas01cr@204: trackDistances[n]=thisDist; mas01cr@204: trackQIndexes[n]=qIndexes[0]; mas01cr@204: trackSIndexes[n]=sIndexes[0]; mas01cr@204: successfulTracks++; mas01cr@204: trackIDs[n]=track; mas01cr@204: break; mas01cr@204: } mas01cr@204: } mas01cr@204: else mas01cr@204: break; mas01cr@204: } mas01cr@204: } // Duration match mas01cr@204: mas01cr@204: // Clean up current track mas01cr@204: if(D!=NULL){ mas01cr@204: for(j=0; j1) { mas01cr@204: std::cerr << std::endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" mas01cr@204: << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl; mas01cr@204: std::cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum mas01cr@204: << " minSample: " << minSample << " maxSample: " << maxSample << std::endl; mas01cr@204: } mas01cr@204: if(adbQueryResponse==0){ mas01cr@204: if(verbosity>1) { mas01cr@204: std::cerr<result.__sizeRlist=listLen; mas01cr@204: adbQueryResponse->result.__sizeDist=listLen; mas01cr@204: adbQueryResponse->result.__sizeQpos=listLen; mas01cr@204: adbQueryResponse->result.__sizeSpos=listLen; mas01cr@204: adbQueryResponse->result.Rlist= new char*[listLen]; mas01cr@204: adbQueryResponse->result.Dist = new double[listLen]; mas01cr@204: adbQueryResponse->result.Qpos = new unsigned int[listLen]; mas01cr@204: adbQueryResponse->result.Spos = new unsigned int[listLen]; mas01cr@204: for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ mas01cr@204: adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; mas01cr@204: adbQueryResponse->result.Dist[k]=trackDistances[k]; mas01cr@204: adbQueryResponse->result.Qpos[k]=trackQIndexes[k]; mas01cr@204: adbQueryResponse->result.Spos[k]=trackSIndexes[k]; mas01cr@204: sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); mas01cr@204: } mas01cr@204: } mas01cr@204: mas01cr@204: // Clean up mas01cr@204: if(trackOffsetTable) mas01cr@204: delete[] trackOffsetTable; mas01cr@204: if(queryCopy) mas01cr@204: delete[] queryCopy; mas01cr@204: if(qNorm) mas01cr@204: delete[] qNorm; mas01cr@204: if(sNorm) mas01cr@204: delete[] sNorm; mas01cr@204: if(qPower) mas01cr@204: delete[] qPower; mas01cr@204: if(sPower) mas01cr@204: delete[] sPower; mas01cr@204: if(D) mas01cr@204: delete[] D; mas01cr@204: if(DD) mas01cr@204: delete[] DD; mas01cr@204: if(timesdata) mas01cr@204: delete[] timesdata; mas01cr@204: if(querydurs) mas01cr@204: delete[] querydurs; mas01cr@204: if(meanDBdur) mas01cr@204: delete[] meanDBdur; mas01cr@204: } mas01cr@204: mas01cr@204: void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ mas01cr@204: mas01cr@204: initTables(dbName, inFile); mas01cr@204: mas01cr@204: // For each input vector, find the closest pointNN matching output vectors and report mas01cr@204: // we use stdout in this stub version mas01cr@204: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01cr@204: double* query = (double*)(indata+sizeof(int)); mas01cr@204: double* queryCopy = 0; mas01cr@204: mas01cr@204: if(!(dbH->flags & O2_FLAG_L2NORM) ) mas01cr@204: error("Database must be L2 normed for sequence query","use -l2norm"); mas01cr@204: mas01cr@204: if(verbosity>1) { mas01cr@204: std::cerr << "performing norms ... "; std::cerr.flush(); mas01cr@204: } mas01cr@204: unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); mas01cr@204: mas01cr@204: // Make a copy of the query mas01cr@204: queryCopy = new double[numVectors*dbH->dim]; mas01cr@204: memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); mas01cr@204: qNorm = new double[numVectors]; mas01cr@204: sNorm = new double[dbVectors]; mas01cr@204: assert(qNorm&&sNorm&&queryCopy&&sequenceLength); mas01cr@204: unitNorm(queryCopy, dbH->dim, numVectors, qNorm); mas01cr@204: query = queryCopy; mas01cr@204: mas01cr@204: // Make norm measurements relative to sequenceLength mas01cr@204: unsigned i,j; mas01cr@204: mas01cr@204: // Copy the L2 norm values to core to avoid disk random access later on mas01cr@204: memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); mas01cr@204: double* snPtr = sNorm; mas01cr@204: double* qnPtr = qNorm; mas01cr@204: mas01cr@204: double *sPower = 0, *qPower = 0; mas01cr@204: double *spPtr = 0, *qpPtr = 0; mas01cr@204: mas01cr@204: if (usingPower) { mas01cr@204: if(!(dbH->flags & O2_FLAG_POWER)) { mas01cr@204: error("database not power-enabled", dbName); mas01cr@204: } mas01cr@204: sPower = new double[dbVectors]; mas01cr@204: spPtr = sPower; mas01cr@204: memcpy(sPower, powerTable, dbVectors * sizeof(double)); mas01cr@204: } mas01cr@204: mas01cr@204: for(i=0; inumFiles; i++){ mas01cr@204: if(trackTable[i]>=sequenceLength) { mas01cr@204: sequence_sum(snPtr, trackTable[i], sequenceLength); mas01cr@204: sequence_sqrt(snPtr, trackTable[i], sequenceLength); mas01cr@204: if (usingPower) { mas01cr@204: sequence_sum(spPtr, trackTable[i], sequenceLength); mas01cr@204: sequence_average(spPtr, trackTable[i], sequenceLength); mas01cr@204: } mas01cr@204: } mas01cr@204: snPtr += trackTable[i]; mas01cr@204: if (usingPower) { mas01cr@204: spPtr += trackTable[i]; mas01cr@204: } mas01cr@204: } mas01cr@204: mas01cr@204: sequence_sum(qnPtr, numVectors, sequenceLength); mas01cr@204: sequence_sqrt(qnPtr, numVectors, sequenceLength); mas01cr@204: mas01cr@204: if (usingPower) { mas01cr@204: qPower = new double[numVectors]; mas01cr@204: qpPtr = qPower; mas01cr@204: if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { mas01cr@204: error("error seeking to data", powerFileName, "lseek"); mas01cr@204: } mas01cr@204: int count = read(powerfd, qPower, numVectors * sizeof(double)); mas01cr@204: if (count == -1) { mas01cr@204: error("error reading data", powerFileName, "read"); mas01cr@204: } mas01cr@204: if ((unsigned) count != numVectors * sizeof(double)) { mas01cr@204: error("short read", powerFileName); mas01cr@204: } mas01cr@204: mas01cr@204: sequence_sum(qpPtr, numVectors, sequenceLength); mas01cr@204: sequence_average(qpPtr, numVectors, sequenceLength); mas01cr@204: } mas01cr@204: mas01cr@204: if(verbosity>1) { mas01cr@204: std::cerr << "done." << std::endl; mas01cr@204: } mas01cr@204: mas01cr@204: if(verbosity>1) { mas01cr@204: std::cerr << "matching tracks..." << std::endl; mas01cr@204: } mas01cr@204: mas01cr@204: assert(pointNN>0 && pointNN<=O2_MAXNN); mas01cr@204: assert(trackNN>0 && trackNN<=O2_MAXNN); mas01cr@204: mas01cr@204: // Make temporary dynamic memory for results mas01cr@204: double trackDistances[trackNN]; mas01cr@204: unsigned trackIDs[trackNN]; mas01cr@204: unsigned trackQIndexes[trackNN]; mas01cr@204: unsigned trackSIndexes[trackNN]; mas01cr@204: mas01cr@204: double distances[pointNN]; mas01cr@204: unsigned qIndexes[pointNN]; mas01cr@204: unsigned sIndexes[pointNN]; mas01cr@204: mas01cr@204: mas01cr@208: unsigned k,l,n,track,trackOffset=0; mas01cr@208: unsigned const HOP_SIZE=sequenceHop; mas01cr@208: unsigned const wL=sequenceLength; mas01cr@204: double thisDist; mas01cr@204: mas01cr@204: for(k=0; kflags & O2_FLAG_TIMES)){ mas01cr@204: std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl; mas01cr@204: usingTimes=0; mas01cr@204: } mas01cr@204: mas01cr@204: else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01cr@204: std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl; mas01cr@204: mas01cr@204: else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ mas01cr@204: timesdata = new double[2*numVectors]; mas01cr@204: querydurs = new double[numVectors]; mas01cr@204: mas01cr@204: insertTimeStamps(numVectors, timesFile, timesdata); mas01cr@204: // Calculate durations of points mas01cr@204: for(k=0; k1) { mas01cr@204: std::cerr << "mean query file duration: " << meanQdur << std::endl; mas01cr@204: } mas01cr@204: meanDBdur = new double[dbH->numFiles]; mas01cr@204: assert(meanDBdur); mas01cr@204: for(k=0; knumFiles; k++){ mas01cr@204: meanDBdur[k]=0.0; mas01cr@204: for(j=0; jnumVectors || queryPoint>numVectors-wL+1) mas01cr@204: error("queryPoint > numVectors-wL+1 in query"); mas01cr@204: else{ mas01cr@204: if(verbosity>1) { mas01cr@204: std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush(); mas01cr@204: } mas01cr@204: query = query + queryPoint*dbH->dim; mas01cr@204: qnPtr = qnPtr + queryPoint; mas01cr@204: if (usingPower) { mas01cr@204: qpPtr = qpPtr + queryPoint; mas01cr@204: } mas01cr@204: numVectors=wL; mas01cr@204: } mas01cr@204: mas01cr@204: double ** D = 0; // Differences query and target mas01cr@204: double ** DD = 0; // Matched filter distance mas01cr@204: mas01cr@204: D = new double*[numVectors]; mas01cr@204: assert(D); mas01cr@204: DD = new double*[numVectors]; mas01cr@204: assert(DD); mas01cr@204: mas01cr@204: gettimeofday(&tv1, NULL); mas01cr@204: unsigned processedTracks = 0; mas01cr@204: unsigned successfulTracks=0; mas01cr@204: mas01cr@204: // build track offset table mas01cr@204: off_t *trackOffsetTable = new off_t[dbH->numFiles]; mas01cr@204: unsigned cumTrack=0; mas01cr@204: off_t trackIndexOffset; mas01cr@204: for(k=0; knumFiles;k++){ mas01cr@204: trackOffsetTable[k]=cumTrack; mas01cr@204: cumTrack+=trackTable[k]*dbH->dim; mas01cr@204: } mas01cr@204: mas01cr@204: char nextKey [MAXSTR]; mas01cr@204: mas01cr@204: // chi^2 statistics mas01cr@204: double sampleCount = 0; mas01cr@204: double sampleSum = 0; mas01cr@204: double logSampleSum = 0; mas01cr@204: double minSample = 1e9; mas01cr@204: double maxSample = 0; mas01cr@204: mas01cr@204: // Track loop mas01cr@204: size_t data_buffer_size = 0; mas01cr@204: double *data_buffer = 0; mas01cr@204: lseek(dbfid, dbH->dataOffset, SEEK_SET); mas01cr@204: mas01cr@204: for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ mas01cr@204: mas01cr@204: trackOffset = trackOffsetTable[track]; // numDoubles offset mas01cr@204: mas01cr@204: // get trackID from file if using a control file mas01cr@204: if(trackFile) { mas01cr@204: trackFile->getline(nextKey,MAXSTR); mas01cr@204: if(!trackFile->eof()) { mas01cr@204: track = getKeyPos(nextKey); mas01cr@204: trackOffset = trackOffsetTable[track]; mas01cr@204: lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); mas01cr@204: } else { mas01cr@204: break; mas01cr@204: } mas01cr@204: } mas01cr@204: mas01cr@204: trackIndexOffset=trackOffset/dbH->dim; // numVectors offset mas01cr@204: mas01cr@204: if(sequenceLength<=trackTable[track]){ // test for short sequences mas01cr@204: mas01cr@204: if(verbosity>7) { mas01cr@204: std::cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";std::cerr.flush(); mas01cr@204: } mas01cr@204: mas01cr@204: if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) { mas01cr@204: if(data_buffer) { mas01cr@204: free(data_buffer); mas01cr@204: } mas01cr@204: { mas01cr@204: data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; mas01cr@204: void *tmp = malloc(data_buffer_size); mas01cr@204: if (tmp == NULL) { mas01cr@204: error("error allocating data buffer"); mas01cr@204: } mas01cr@204: data_buffer = (double *) tmp; mas01cr@204: } mas01cr@204: } mas01cr@204: mas01cr@204: read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); mas01cr@204: mas01cr@208: initialize_arrays(track, numVectors, query, data_buffer, D, DD); mas01cr@207: mas01cr@204: if(verbosity>3 && usingTimes) { mas01cr@204: std::cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << std::endl; mas01cr@204: std::cerr.flush(); mas01cr@204: } mas01cr@204: mas01cr@204: if(!usingTimes || mas01cr@204: (usingTimes mas01cr@204: && fabs(meanDBdur[track]-meanQdur)3 && usingTimes) { mas01cr@204: std::cerr << "within duration tolerance." << std::endl; mas01cr@204: std::cerr.flush(); mas01cr@204: } mas01cr@204: mas01cr@204: // Search for minimum distance by shingles (concatenated vectors) mas01cr@204: for(j=0;j<=numVectors-wL;j+=HOP_SIZE) mas01cr@204: for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ mas01cr@204: thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; mas01cr@204: if(verbosity>9) { mas01cr@204: std::cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << std::endl; mas01cr@204: } mas01cr@204: // Gather chi^2 statistics mas01cr@204: if(thisDistmaxSample) mas01cr@204: maxSample=thisDist; mas01cr@204: if(thisDist>1e-9){ mas01cr@204: sampleCount++; mas01cr@204: sampleSum+=thisDist; mas01cr@204: logSampleSum+=log(thisDist); mas01cr@204: } mas01cr@204: mas01cr@204: // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); mas01cr@204: // Power test mas01cr@204: if (usingPower) { mas01cr@204: if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) { mas01cr@204: thisDist = 1000000.0; mas01cr@204: } mas01cr@204: } mas01cr@204: mas01cr@204: if(thisDist>=0 && thisDist<=radius){ mas01cr@204: distances[0]++; // increment count mas01cr@204: break; // only need one track point per query point mas01cr@204: } mas01cr@204: } mas01cr@204: // How many points were below threshold ? mas01cr@204: thisDist=distances[0]; mas01cr@204: mas01cr@204: // Let's see the distances then... mas01cr@204: if(verbosity>3) { mas01cr@204: std::cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << std::endl; mas01cr@204: } mas01cr@204: mas01cr@204: // All the track stuff goes here mas01cr@204: n=trackNN; mas01cr@204: while(n--){ mas01cr@204: if(thisDist>trackDistances[n]){ mas01cr@204: if((n==0 || thisDist<=trackDistances[n-1])){ mas01cr@204: // Copy all values above up the queue mas01cr@204: for( l=trackNN-1 ; l > n ; l--){ mas01cr@204: trackDistances[l]=trackDistances[l-1]; mas01cr@204: trackQIndexes[l]=trackQIndexes[l-1]; mas01cr@204: trackSIndexes[l]=trackSIndexes[l-1]; mas01cr@204: trackIDs[l]=trackIDs[l-1]; mas01cr@204: } mas01cr@204: trackDistances[n]=thisDist; mas01cr@204: trackQIndexes[n]=qIndexes[0]; mas01cr@204: trackSIndexes[n]=sIndexes[0]; mas01cr@204: successfulTracks++; mas01cr@204: trackIDs[n]=track; mas01cr@204: break; mas01cr@204: } mas01cr@204: } mas01cr@204: else mas01cr@204: break; mas01cr@204: } mas01cr@204: } // Duration match mas01cr@204: mas01cr@204: // Clean up current track mas01cr@204: if(D!=NULL){ mas01cr@204: for(j=0; j1) { mas01cr@204: std::cerr << std::endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" mas01cr@204: << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl; mas01cr@204: std::cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum mas01cr@204: << " minSample: " << minSample << " maxSample: " << maxSample << std::endl; mas01cr@204: } mas01cr@204: mas01cr@204: if(adbQueryResponse==0){ mas01cr@204: if(verbosity>1) { mas01cr@204: std::cerr<result.__sizeRlist=listLen; mas01cr@204: adbQueryResponse->result.__sizeDist=listLen; mas01cr@204: adbQueryResponse->result.__sizeQpos=listLen; mas01cr@204: adbQueryResponse->result.__sizeSpos=listLen; mas01cr@204: adbQueryResponse->result.Rlist= new char*[listLen]; mas01cr@204: adbQueryResponse->result.Dist = new double[listLen]; mas01cr@204: adbQueryResponse->result.Qpos = new unsigned int[listLen]; mas01cr@204: adbQueryResponse->result.Spos = new unsigned int[listLen]; mas01cr@204: for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ mas01cr@204: adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; mas01cr@204: adbQueryResponse->result.Dist[k]=trackDistances[k]; mas01cr@204: adbQueryResponse->result.Qpos[k]=trackQIndexes[k]; mas01cr@204: adbQueryResponse->result.Spos[k]=trackSIndexes[k]; mas01cr@204: sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); mas01cr@204: } mas01cr@204: } mas01cr@204: mas01cr@204: // Clean up mas01cr@204: if(trackOffsetTable) mas01cr@204: delete[] trackOffsetTable; mas01cr@204: if(queryCopy) mas01cr@204: delete[] queryCopy; mas01cr@204: if(qNorm) mas01cr@204: delete[] qNorm; mas01cr@204: if(sNorm) mas01cr@204: delete[] sNorm; mas01cr@204: if(qPower) mas01cr@204: delete[] qPower; mas01cr@204: if(sPower) mas01cr@204: delete[] sPower; mas01cr@204: if(D) mas01cr@204: delete[] D; mas01cr@204: if(DD) mas01cr@204: delete[] DD; mas01cr@204: if(timesdata) mas01cr@204: delete[] timesdata; mas01cr@204: if(querydurs) mas01cr@204: delete[] querydurs; mas01cr@204: if(meanDBdur) mas01cr@204: delete[] meanDBdur; mas01cr@204: } mas01cr@204: mas01cr@204: // Unit norm block of features mas01cr@204: void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){ mas01cr@204: unsigned d; mas01cr@204: double L2, *p; mas01cr@204: if(verbosity>2) { mas01cr@204: std::cerr << "norming " << n << " vectors...";std::cerr.flush(); mas01cr@204: } mas01cr@204: while(n--){ mas01cr@204: p=X; mas01cr@204: L2=0.0; mas01cr@204: d=dim; mas01cr@204: while(d--){ mas01cr@204: L2+=*p**p; mas01cr@204: p++; mas01cr@204: } mas01cr@204: /* L2=sqrt(L2);*/ mas01cr@204: if(qNorm) mas01cr@204: *qNorm++=L2; mas01cr@204: /* mas01cr@204: oneOverL2 = 1.0/L2; mas01cr@204: d=dim; mas01cr@204: while(d--){ mas01cr@204: *X*=oneOverL2; mas01cr@204: X++; mas01cr@204: */ mas01cr@204: X+=dim; mas01cr@204: } mas01cr@204: if(verbosity>2) { mas01cr@204: std::cerr << "done..." << std::endl; mas01cr@204: } mas01cr@204: }