# HG changeset patch # User mas01cr # Date 1187258695 0 # Node ID 42498552b30c9e2feb5aff38a40ee420edff63e0 # Parent 4346f4b717e4af7abdfe132750465c17ce891eb6 Merged trunk changes -r30:38 onto audiodb-debian branch diff -r 4346f4b717e4 -r 42498552b30c Makefile --- a/Makefile Thu Aug 02 11:40:22 2007 +0000 +++ b/Makefile Thu Aug 16 10:04:55 2007 +0000 @@ -1,4 +1,4 @@ -CFLAGS=-ggdb +CFLAGS=-O3 LIBS=-lgsoap++ EXECUTABLE=audioDB diff -r 4346f4b717e4 -r 42498552b30c Makefile.osx --- a/Makefile.osx Thu Aug 02 11:40:22 2007 +0000 +++ b/Makefile.osx Thu Aug 16 10:04:55 2007 +0000 @@ -1,5 +1,5 @@ -CFLAGS=-ggdb +CFLAGS=-O3 LIBDIR= LIBS= SOAPDIR=bin_x86_osx/soapcpp-macosx-2.1.6 diff -r 4346f4b717e4 -r 42498552b30c audioDB.cpp --- a/audioDB.cpp Thu Aug 02 11:40:22 2007 +0000 +++ b/audioDB.cpp Thu Aug 16 10:04:55 2007 +0000 @@ -50,9 +50,9 @@ -Q, --QUERY=searchtype content-based search on --database using --features as a query. Optionally restrict the - search to those segments identified in a + search to those tracks identified in a --keyList. (possible values="point", - "segment", "sequence") + "track", "sequence") -p, --qpoint=position ordinal position of query start point in --features file. (default=`0') -e, --exhaustive exhaustive search: iterate through all query @@ -61,7 +61,7 @@ -n, --pointnn=numpoints number of point nearest neighbours to use in retrieval. (default=`10') -R, --radius=DOUBLE radius search, returns all - points/segments/sequences inside given radius. + points/tracks/sequences inside given radius. (default=`1.0') -x, --expandfactor=DOUBLE time compress/expand factor of result length to query length [1.0 .. 100.0]. (default=`1.1') @@ -107,8 +107,8 @@ dbName(0), inFile(0), key(0), - segFile(0), - segFileName(0), + trackFile(0), + trackFileName(0), timesFile(0), timesFileName(0), usingTimes(0), @@ -121,8 +121,8 @@ queryType(O2_FLAG_POINT_QUERY), verbosity(1), pointNN(O2_DEFAULT_POINTNN), - segNN(O2_DEFAULT_SEGNN), - segTable(0), + trackNN(O2_DEFAULT_TRACKNN), + trackTable(0), fileTable(0), dataBuf(0), l2normTable(0), @@ -135,7 +135,8 @@ isClient(0), isServer(0), port(0), - timesTol(0.1){ + timesTol(0.1), + radius(0){ if(processArgs(argc, argv)<0){ printf("No command found.\n"); @@ -226,6 +227,17 @@ } } + if(args_info.radius_given){ + radius=args_info.radius_arg; + if(radius<=0 || radius>1000000000){ + cerr << "Warning: radius out of range" << endl; + exit(1); + } + else + if(verbosity>3) + cerr << "Setting radius to " << radius << endl; + } + if(args_info.SERVER_given){ command=COM_SERVER; port=args_info.SERVER_arg; @@ -292,9 +304,9 @@ /* TO DO: REPLACE WITH if(args_info.keyList_given){ - segFileName=args_info.keyList_arg; - if(strlen(segFileName)>0 && !(segFile = new ifstream(segFileName,ios::in))) - error("Could not open keyList file for reading",segFileName); + trackFileName=args_info.keyList_arg; + if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) + error("Could not open keyList file for reading",trackFileName); } AND UPDATE BATCHINSERT() */ @@ -317,9 +329,9 @@ inFile=args_info.features_arg; if(args_info.keyList_given){ - segFileName=args_info.keyList_arg; - if(strlen(segFileName)>0 && !(segFile = new ifstream(segFileName,ios::in))) - error("Could not open keyList file for reading",segFileName); + trackFileName=args_info.keyList_arg; + if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) + error("Could not open keyList file for reading",trackFileName); } if(args_info.times_given){ @@ -332,8 +344,8 @@ } // query type - if(strncmp(args_info.QUERY_arg, "segment", MAXSTR)==0) - queryType=O2_FLAG_SEG_QUERY; + if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0) + queryType=O2_FLAG_TRACK_QUERY; else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0) queryType=O2_FLAG_POINT_QUERY; else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0) @@ -355,8 +367,8 @@ - segNN=args_info.resultlength_arg; - if(segNN<1 || segNN >1000) + trackNN=args_info.resultlength_arg; + if(trackNN<1 || trackNN >10000) error("resultlength out of range: 1 <= resultlength <= 1000"); @@ -383,13 +395,13 @@ --------------------------------------------------------------------------------- - keyTable : list of keys of segments + keyTable : list of keys of tracks -------------------------------------------------------------------------- | key 256 bytes | -------------------------------------------------------------------------- O2_MAXFILES*02_FILENAMELENGTH - segTable : Maps implicit feature index to a feature vector matrix + trackTable : Maps implicit feature index to a feature vector matrix -------------------------------------------------------------------------- | numVectors (4 bytes) | -------------------------------------------------------------------------- @@ -478,8 +490,8 @@ error("error reading db header"); fileTableOffset = O2_HEADERSIZE; - segTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; - dataoffset = segTableOffset + O2_SEGTABLESIZE*O2_MAXFILES; + trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; + dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES; l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); @@ -512,7 +524,7 @@ // Make some handy tables with correct types fileTable= (char*)(db+fileTableOffset); - segTable = (unsigned*)(db+segTableOffset); + trackTable = (unsigned*)(db+trackTableOffset); dataBuf = (double*)(db+dataoffset); l2normTable = (double*)(db+l2normTableOffset); timesTable = (double*)(db+timesTableOffset); @@ -546,7 +558,7 @@ return; } - // Make a segment index table of features to file indexes + // Make a track index table of features to file indexes unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); if(!numVectors){ if(verbosity) @@ -577,9 +589,9 @@ // Copy the header back to the database memcpy (db, dbH, sizeof(dbTableHeaderT)); - // Update segment to file index map - //memcpy (db+segTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); - memcpy (segTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); + // Update track to file index map + //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); + memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); // Update the feature database memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); @@ -679,8 +691,8 @@ error("Must use timestamps with timestamped database","use --times"); fileTableOffset = O2_HEADERSIZE; - segTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; - dataoffset = segTableOffset + O2_SEGTABLESIZE*O2_MAXFILES; + trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; + dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES; l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); @@ -722,7 +734,7 @@ // Make some handy tables with correct types fileTable= (char*)(db+fileTableOffset); - segTable = (unsigned*)(db+segTableOffset); + trackTable = (unsigned*)(db+trackTableOffset); dataBuf = (double*)(db+dataoffset); l2normTable = (double*)(db+l2normTableOffset); timesTable = (double*)(db+timesTableOffset); @@ -764,7 +776,7 @@ } else{ - // Make a segment index table of features to file indexes + // Make a track index table of features to file indexes unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); if(!numVectors){ if(verbosity) @@ -798,9 +810,9 @@ // Copy the header back to the database memcpy (db, dbH, sizeof(dbTableHeaderT)); - // Update segment to file index map - //memcpy (db+segTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); - memcpy (segTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); + // Update track to file index map + //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); + memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); // Update the feature database memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); @@ -849,14 +861,14 @@ soap_done(&soap); } -void audioDB::ws_query(const char*dbName, const char *segKey, const char* hostport){ +void audioDB::ws_query(const char*dbName, const char *trackKey, const char* hostport){ struct soap soap; adb__queryResult adbQueryResult; soap_init(&soap); if(soap_call_adb__query(&soap,hostport,NULL, - (char*)dbName,(char*)segKey,(char*)segFileName,(char*)timesFileName, - queryType, queryPoint, pointNN, segNN, sequenceLength, adbQueryResult)==SOAP_OK){ + (char*)dbName,(char*)trackKey,(char*)trackFileName,(char*)timesFileName, + queryType, queryPoint, pointNN, trackNN, sequenceLength, adbQueryResult)==SOAP_OK){ //std::cerr << "result list length:" << adbQueryResult.__sizeRlist << std::endl; for(int i=0; inumFiles; k++){ - if(segTable[k]numFiles; k++) - cout << fileTable+k*O2_FILETABLESIZE << " " << segTable[k] << endl; + for(unsigned k=0, j=0; knumFiles; k++){ + cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; + j+=trackTable[k]; + } status(dbName); } @@ -930,10 +944,13 @@ pointQuery(dbName, inFile, adbQueryResult); break; case O2_FLAG_SEQUENCE_QUERY: - segSequenceQuery(dbName, inFile, adbQueryResult); + if(radius==0) + trackSequenceQueryNN(dbName, inFile, adbQueryResult); + else + trackSequenceQueryRad(dbName, inFile, adbQueryResult); break; - case O2_FLAG_SEG_QUERY: - segPointQuery(dbName, inFile, adbQueryResult); + case O2_FLAG_TRACK_QUERY: + trackPointQuery(dbName, inFile, adbQueryResult); break; default: error("unrecognized queryType in query()"); @@ -1075,12 +1092,12 @@ // Loop over nearest neighbours for(k=0; k < pointNN; k++){ // Scan for key - unsigned cumSeg=0; + unsigned cumTrack=0; for(l=0 ; lnumFiles; l++){ - cumSeg+=segTable[l]; - if(sIndexes[k]Rlist[k]=new char[O2_MAXFILESTR]; adbQueryResult->Dist[k]=distances[k]; adbQueryResult->Qpos[k]=qIndexes[k]; - unsigned cumSeg=0; + unsigned cumTrack=0; for(l=0 ; lnumFiles; l++){ - cumSeg+=segTable[l]; - if(sIndexes[k]Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); break; } } - adbQueryResult->Spos[k]=sIndexes[k]+segTable[l]-cumSeg; + adbQueryResult->Spos[k]=sIndexes[k]+trackTable[l]-cumTrack; } } @@ -1123,19 +1140,15 @@ delete dbdurs; } -void audioDB::sequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ - -} - -// segPointQuery -// return the segNN closest segs to the query seg -// uses average of pointNN points per seg -void audioDB::segPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ +// trackPointQuery +// return the trackNN closest tracks to the query track +// uses average of pointNN points per track +void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ initTables(dbName, inFile); // For each input vector, find the closest pointNN matching output vectors and report unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); - unsigned numSegs = dbH->numFiles; + unsigned numTracks = dbH->numFiles; double* query = (double*)(indata+sizeof(int)); double* data = dataBuf; @@ -1152,20 +1165,20 @@ } assert(pointNN>0 && pointNN<=O2_MAXNN); - assert(segNN>0 && segNN<=O2_MAXNN); + assert(trackNN>0 && trackNN<=O2_MAXNN); // Make temporary dynamic memory for results - double segDistances[segNN]; - unsigned segIDs[segNN]; - unsigned segQIndexes[segNN]; - unsigned segSIndexes[segNN]; + double trackDistances[trackNN]; + unsigned trackIDs[trackNN]; + unsigned trackQIndexes[trackNN]; + unsigned trackSIndexes[trackNN]; double distances[pointNN]; unsigned qIndexes[pointNN]; unsigned sIndexes[pointNN]; unsigned j=numVectors; // number of query points - unsigned k,l,n, seg, segOffset=0, processedSegs=0; + unsigned k,l,n, track, trackOffset=0, processedTracks=0; double thisDist; for(k=0; knumFiles]; for(k=0; knumFiles; k++){ meanDBdur[k]=0.0; - for(j=0; jnumFiles]; - unsigned cumSeg=0; - unsigned segIndexOffset; + // build track offset table + unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; + unsigned cumTrack=0; + unsigned trackIndexOffset; for(k=0; knumFiles;k++){ - segOffsetTable[k]=cumSeg; - cumSeg+=segTable[k]*dbH->dim; + trackOffsetTable[k]=cumTrack; + cumTrack+=trackTable[k]*dbH->dim; } char nextKey[MAXSTR]; gettimeofday(&tv1, NULL); - for(processedSegs=0, seg=0 ; processedSegs < dbH->numFiles ; seg++, processedSegs++){ - if(segFile){ - if(!segFile->eof()){ - segFile->getline(nextKey,MAXSTR); - seg=getKeyPos(nextKey); + for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ + if(trackFile){ + if(!trackFile->eof()){ + trackFile->getline(nextKey,MAXSTR); + track=getKeyPos(nextKey); } else break; } - segOffset=segOffsetTable[seg]; // numDoubles offset - segIndexOffset=segOffset/dbH->dim; // numVectors offset + trackOffset=trackOffsetTable[track]; // numDoubles offset + trackIndexOffset=trackOffset/dbH->dim; // numVectors offset if(verbosity>7) - cerr << seg << "." << segOffset/(dbH->dim) << "." << segTable[seg] << " | ";cerr.flush(); + cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush(); if(dbH->flags & O2_FLAG_L2NORM) usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy; @@ -1257,8 +1270,8 @@ else j=numVectors; while(j--){ - k=segTable[seg]; // number of vectors in seg - data=dataBuf+segOffset; // data for seg + k=trackTable[track]; // number of vectors in track + data=dataBuf+trackOffset; // data for track while(k--){ thisDist=0; l=dbH->dim; @@ -1267,7 +1280,7 @@ thisDist+=*q++**data++; if(!usingTimes || (usingTimes - && fabs(meanDBdur[seg]-meanQdur)=distances[n]){ @@ -1280,7 +1293,7 @@ } distances[n]=thisDist; qIndexes[n]=numVectors-j-1; - sIndexes[n]=segTable[seg]-k-1; + sIndexes[n]=trackTable[track]-k-1; break; } } @@ -1288,32 +1301,32 @@ break; } } - } // seg + } // track // Move query pointer to next query point query+=dbH->dim; } // query - // Take the average of this seg's distance - // Test the seg distances + // Take the average of this track's distance + // Test the track distances thisDist=0; n=pointNN; while(n--) thisDist+=distances[pointNN-n-1]; thisDist/=pointNN; - n=segNN; + n=trackNN; while(n--){ - if(thisDist>=segDistances[n]){ - if((n==0 || thisDist<=segDistances[n-1])){ + if(thisDist>=trackDistances[n]){ + if((n==0 || thisDist<=trackDistances[n-1])){ // Copy all values above up the queue for( l=pointNN-1 ; l > n ; l--){ - segDistances[l]=segDistances[l-1]; - segQIndexes[l]=segQIndexes[l-1]; - segSIndexes[l]=segSIndexes[l-1]; - segIDs[l]=segIDs[l-1]; + trackDistances[l]=trackDistances[l-1]; + trackQIndexes[l]=trackQIndexes[l-1]; + trackSIndexes[l]=trackSIndexes[l-1]; + trackIDs[l]=trackIDs[l-1]; } - segDistances[n]=thisDist; - segQIndexes[n]=qIndexes[0]; - segSIndexes[n]=sIndexes[0]; - segIDs[n]=seg; + trackDistances[n]=thisDist; + trackQIndexes[n]=qIndexes[0]; + trackSIndexes[n]=sIndexes[0]; + trackIDs[n]=track; break; } } @@ -1325,11 +1338,11 @@ qIndexes[k]=~0; sIndexes[k]=~0; } - } // segs + } // tracks gettimeofday(&tv2, NULL); if(verbosity>1) - cerr << endl << "processed segs :" << processedSegs + cerr << endl << "processed tracks :" << processedTracks << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; if(adbQueryResult==0){ @@ -1337,12 +1350,12 @@ cerr<__sizeRlist=listLen; adbQueryResult->__sizeDist=listLen; adbQueryResult->__sizeQpos=listLen; @@ -1353,17 +1366,17 @@ adbQueryResult->Spos = new int[listLen]; for(k=0; k__sizeRlist; k++){ adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; - adbQueryResult->Dist[k]=segDistances[k]; - adbQueryResult->Qpos[k]=segQIndexes[k]; - adbQueryResult->Spos[k]=segSIndexes[k]; - sprintf(adbQueryResult->Rlist[k], "%s", fileTable+segIDs[k]*O2_FILETABLESIZE); + adbQueryResult->Dist[k]=trackDistances[k]; + adbQueryResult->Qpos[k]=trackQIndexes[k]; + adbQueryResult->Spos[k]=trackSIndexes[k]; + sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); } } // Clean up - if(segOffsetTable) - delete segOffsetTable; + if(trackOffsetTable) + delete trackOffsetTable; if(queryCopy) delete queryCopy; if(qNorm) @@ -1374,22 +1387,20 @@ delete meanDBdur; } - -void audioDB::deleteDB(const char* dbName, const char* inFile){ -} -// NBest matched filter distance between query and target segs -// efficient implementation -// outputs average of N minimum matched filter distances -void audioDB::segSequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ +// k nearest-neighbor (k-NN) search between query and target tracks +// efficient implementation based on matched filter +// assumes normed shingles +// outputs distances of retrieved shingles, max retreived = pointNN shingles per per track +void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ initTables(dbName, inFile); // For each input vector, find the closest pointNN matching output vectors and report // we use stdout in this stub version unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); - unsigned numSegs = dbH->numFiles; + unsigned numTracks = dbH->numFiles; double* query = (double*)(indata+sizeof(int)); double* data = dataBuf; @@ -1408,6 +1419,7 @@ if(verbosity>1) cerr << "performing norms ... "; cerr.flush(); unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); + // Make a copy of the query queryCopy = new double[numVectors*dbH->dim]; memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); @@ -1417,31 +1429,39 @@ assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); unitNorm(queryCopy, dbH->dim, numVectors, qNorm); query = queryCopy; + // Make norm measurements relative to sequenceLength unsigned w = sequenceLength-1; unsigned i,j; double* ps; double tmp1,tmp2; + // Copy the L2 norm values to core to avoid disk random access later on memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); double* snPtr = sNorm; for(i=0; inumFiles; i++){ - if(segTable[i]>sequenceLength){ + if(trackTable[i]>=sequenceLength){ tmp1=*snPtr; j=1; w=sequenceLength-1; while(w--) *snPtr+=snPtr[j++]; ps = snPtr+1; - w=segTable[i]-sequenceLength; // +1 - 1 + w=trackTable[i]-sequenceLength; // +1 - 1 while(w--){ tmp2=*ps; - *ps=*(ps-1)-tmp1+*(ps+sequenceLength); + *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); tmp1=tmp2; ps++; } + ps = snPtr; + w=trackTable[i]-sequenceLength+1; + while(w--){ + *ps=sqrt(*ps); + ps++; + } } - snPtr+=segTable[i]; + snPtr+=trackTable[i]; } double* pn = sMeanL2; @@ -1449,78 +1469,89 @@ while(w--) *pn++=0.0; ps=sNorm; - unsigned processedSegs=0; + unsigned processedTracks=0; for(i=0; inumFiles; i++){ - if(segTable[i]>sequenceLength-1){ - w = segTable[i]-sequenceLength+1; + if(trackTable[i]>sequenceLength-1){ + w = trackTable[i]-sequenceLength; pn = sMeanL2+i; + *pn=0; while(w--) - *pn+=*ps++; - *pn/=segTable[i]-sequenceLength+1; + if(*ps>0) + *pn+=*ps++; + *pn/=trackTable[i]-sequenceLength; SILENCE_THRESH+=*pn; - processedSegs++; + processedTracks++; } - ps = sNorm + segTable[i]; + ps = sNorm + trackTable[i]; } if(verbosity>1) - cerr << "processedSegs: " << processedSegs << endl; - SILENCE_THRESH/=processedSegs; + cerr << "processedTracks: " << processedTracks << endl; + + + SILENCE_THRESH/=processedTracks; USE_THRESH=1; // Turn thresholding on - DIFF_THRESH=SILENCE_THRESH/=2; // 50% of the mean shingle power - SILENCE_THRESH/=10; // 10% of the mean shingle power is SILENCE - + DIFF_THRESH=SILENCE_THRESH; // mean shingle power + SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE + if(verbosity>4) + cerr << "silence thresh: " << SILENCE_THRESH; w=sequenceLength-1; i=1; tmp1=*qNorm; while(w--) *qNorm+=qNorm[i++]; ps = qNorm+1; - qMeanL2 = *qNorm; - w=numVectors-sequenceLength; + w=numVectors-sequenceLength; // +1 -1 while(w--){ tmp2=*ps; - *ps=*(ps-1)-tmp1+*(ps+sequenceLength); + *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); tmp1=tmp2; - qMeanL2+=*ps; - *ps++; + ps++; + } + ps = qNorm; + qMeanL2 = 0; + w=numVectors-sequenceLength+1; + while(w--){ + *ps=sqrt(*ps); + qMeanL2+=*ps++; } qMeanL2 /= numVectors-sequenceLength+1; + if(verbosity>1) cerr << "done." << endl; if(verbosity>1) - cerr << "matching segs..." << endl; + cerr << "matching tracks..." << endl; assert(pointNN>0 && pointNN<=O2_MAXNN); - assert(segNN>0 && segNN<=O2_MAXNN); + assert(trackNN>0 && trackNN<=O2_MAXNN); // Make temporary dynamic memory for results - double segDistances[segNN]; - unsigned segIDs[segNN]; - unsigned segQIndexes[segNN]; - unsigned segSIndexes[segNN]; + double trackDistances[trackNN]; + unsigned trackIDs[trackNN]; + unsigned trackQIndexes[trackNN]; + unsigned trackSIndexes[trackNN]; double distances[pointNN]; unsigned qIndexes[pointNN]; unsigned sIndexes[pointNN]; - unsigned k,l,m,n,seg,segOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; + unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; double thisDist; double oneOverWL=1.0/wL; for(k=0; knumFiles; k++){ meanDBdur[k]=0.0; - for(j=0; jnumFiles]; - unsigned cumSeg=0; - unsigned segIndexOffset; + // build track offset table + unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; + unsigned cumTrack=0; + unsigned trackIndexOffset; for(k=0; knumFiles;k++){ - segOffsetTable[k]=cumSeg; - cumSeg+=segTable[k]*dbH->dim; + trackOffsetTable[k]=cumTrack; + cumTrack+=trackTable[k]*dbH->dim; } char nextKey [MAXSTR]; - for(processedSegs=0, seg=0 ; processedSegs < dbH->numFiles ; seg++, processedSegs++){ - // get segID from file if using a control file - if(segFile){ - if(!segFile->eof()){ - segFile->getline(nextKey,MAXSTR); - seg=getKeyPos(nextKey); + // chi^2 statistics + double sampleCount = 0; + double sampleSum = 0; + double logSampleSum = 0; + double minSample = 1e9; + double maxSample = 0; + + // Track loop + for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ + + // get trackID from file if using a control file + if(trackFile){ + if(!trackFile->eof()){ + trackFile->getline(nextKey,MAXSTR); + track=getKeyPos(nextKey); } else break; } - segOffset=segOffsetTable[seg]; // numDoubles offset - segIndexOffset=segOffset/dbH->dim; // numVectors offset + trackOffset=trackOffsetTable[track]; // numDoubles offset + trackIndexOffset=trackOffset/dbH->dim; // numVectors offset - if(sequenceLength7) - cerr << seg << "." << segIndexOffset << "." << segTable[seg] << " | ";cerr.flush(); + cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); - // Cross-correlation matrix + // Sum products matrix for(j=0; jdim; - sp=dataBuf+segOffset+k*dbH->dim; + sp=dataBuf+trackOffset+k*dbH->dim; DD[j][k]=0.0; // Initialize matched filter array dp=&D[j][k]; // point to correlation cell j,k *dp=0.0; // initialize correlation cell @@ -1650,17 +1691,18 @@ for(j=0; j3 && usingTimes){ - cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[seg] << endl; + cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl; cerr.flush(); } if(!usingTimes || (usingTimes - && fabs(meanDBdur[seg]-meanQdur)3 && usingTimes){ cerr << "within duration tolerance." << endl; @@ -1683,23 +1725,38 @@ } // Search for minimum distance by shingles (concatenated vectors) - for(j=0;j10) + cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl; + // Gather chi^2 statistics + if(thisDistmaxSample) + maxSample=thisDist; + if(thisDist>1e-9){ + sampleCount++; + sampleSum+=thisDist; + logSampleSum+=log(thisDist); + } + + // diffL2 = fabs(qNorm[j] - sNorm[trackIndexOffset+k]); // Power test if(!USE_THRESH || // Threshold on mean L2 of Q and S sequences - (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[k]>SILENCE_THRESH && + (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH && // Are both query and target windows above mean energy? - (qNorm[j]>qMeanL2 && sNorm[k]>sMeanL2[seg] && diffL2 < DIFF_THRESH ))) - thisDist=DD[j][k]*oneOverWL; + (qNorm[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // && diffL2 < DIFF_THRESH ))) + thisDist=thisDist; // Computed above else - thisDist=0.0; - - // NBest match algorithm - for(m=0; m=distances[m]){ + thisDist=1000000.0; + + // k-NN match algorithm + m=pointNN; + while(m--){ + if(thisDist<=distances[m]) + if(m==0 || thisDist>=distances[m-1]){ // Shuffle distances up the list for(l=pointNN-1; l>m; l--){ distances[l]=distances[l-1]; @@ -1713,7 +1770,7 @@ qIndexes[m]=j; sIndexes[m]=k; break; - } + } } } // Calculate the mean of the N-Best matches @@ -1724,25 +1781,26 @@ // Let's see the distances then... if(verbosity>3) - cerr << "d[" << fileTable+seg*O2_FILETABLESIZE << "]=" << thisDist << endl; + cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl; - // All the seg stuff goes here - n=segNN; + + // All the track stuff goes here + n=trackNN; while(n--){ - if(thisDist>=segDistances[n]){ - if((n==0 || thisDist<=segDistances[n-1])){ + if(thisDist<=trackDistances[n]){ + if((n==0 || thisDist>=trackDistances[n-1])){ // Copy all values above up the queue - for( l=segNN-1 ; l > n ; l--){ - segDistances[l]=segDistances[l-1]; - segQIndexes[l]=segQIndexes[l-1]; - segSIndexes[l]=segSIndexes[l-1]; - segIDs[l]=segIDs[l-1]; + for( l=trackNN-1 ; l > n ; l--){ + trackDistances[l]=trackDistances[l-1]; + trackQIndexes[l]=trackQIndexes[l-1]; + trackSIndexes[l]=trackSIndexes[l-1]; + trackIDs[l]=trackIDs[l-1]; } - segDistances[n]=thisDist; - segQIndexes[n]=qIndexes[0]; - segSIndexes[n]=sIndexes[0]; - successfulSegs++; - segIDs[n]=seg; + trackDistances[n]=thisDist; + trackQIndexes[n]=qIndexes[0]; + trackSIndexes[n]=sIndexes[0]; + successfulTracks++; + trackIDs[n]=track; break; } } @@ -1750,15 +1808,8 @@ break; } } // Duration match - - // per-seg reset array values - for(unsigned k=0; k1) - cerr << endl << "processed segs :" << processedSegs << " matched segments: " << successfulSegs << " elapsed time:" + if(verbosity>1){ + cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; - + cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum + << " minSample: " << minSample << " maxSample: " << maxSample << endl; + } if(adbQueryResult==0){ if(verbosity>1) cerr<__sizeRlist=listLen; adbQueryResult->__sizeDist=listLen; adbQueryResult->__sizeQpos=listLen; @@ -1796,19 +1856,19 @@ adbQueryResult->Spos = new int[listLen]; for(k=0; k__sizeRlist; k++){ adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; - adbQueryResult->Dist[k]=segDistances[k]; - adbQueryResult->Qpos[k]=segQIndexes[k]; - adbQueryResult->Spos[k]=segSIndexes[k]; - sprintf(adbQueryResult->Rlist[k], "%s", fileTable+segIDs[k]*O2_FILETABLESIZE); + adbQueryResult->Dist[k]=trackDistances[k]; + adbQueryResult->Qpos[k]=trackQIndexes[k]; + adbQueryResult->Spos[k]=trackSIndexes[k]; + sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); } } // Clean up - if(segOffsetTable) - delete segOffsetTable; + if(trackOffsetTable) + delete[] trackOffsetTable; if(queryCopy) - delete queryCopy; + delete[] queryCopy; //if(qNorm) //delete qNorm; if(D) @@ -1816,9 +1876,482 @@ if(DD) delete[] DD; if(timesdata) - delete timesdata; + delete[] timesdata; if(meanDBdur) - delete meanDBdur; + delete[] meanDBdur; + + +} + +// Radius search between query and target tracks +// efficient implementation based on matched filter +// assumes normed shingles +// outputs count of retrieved shingles, max retreived = one shingle per query shingle per track +void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ + + initTables(dbName, inFile); + + // For each input vector, find the closest pointNN matching output vectors and report + // we use stdout in this stub version + unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); + unsigned numTracks = dbH->numFiles; + + double* query = (double*)(indata+sizeof(int)); + double* data = dataBuf; + double* queryCopy = 0; + + double qMeanL2; + double* sMeanL2; + + unsigned USE_THRESH=0; + double SILENCE_THRESH=0; + double DIFF_THRESH=0; + + if(!(dbH->flags & O2_FLAG_L2NORM) ) + error("Database must be L2 normed for sequence query","use -l2norm"); + + if(verbosity>1) + cerr << "performing norms ... "; cerr.flush(); + unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); + + // Make a copy of the query + queryCopy = new double[numVectors*dbH->dim]; + memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); + qNorm = new double[numVectors]; + sNorm = new double[dbVectors]; + sMeanL2=new double[dbH->numFiles]; + assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); + unitNorm(queryCopy, dbH->dim, numVectors, qNorm); + query = queryCopy; + + // Make norm measurements relative to sequenceLength + unsigned w = sequenceLength-1; + unsigned i,j; + double* ps; + double tmp1,tmp2; + + // Copy the L2 norm values to core to avoid disk random access later on + memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); + double* snPtr = sNorm; + for(i=0; inumFiles; i++){ + if(trackTable[i]>=sequenceLength){ + tmp1=*snPtr; + j=1; + w=sequenceLength-1; + while(w--) + *snPtr+=snPtr[j++]; + ps = snPtr+1; + w=trackTable[i]-sequenceLength; // +1 - 1 + while(w--){ + tmp2=*ps; + *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); + tmp1=tmp2; + ps++; + } + ps = snPtr; + w=trackTable[i]-sequenceLength+1; + while(w--){ + *ps=sqrt(*ps); + ps++; + } + } + snPtr+=trackTable[i]; + } + + double* pn = sMeanL2; + w=dbH->numFiles; + while(w--) + *pn++=0.0; + ps=sNorm; + unsigned processedTracks=0; + for(i=0; inumFiles; i++){ + if(trackTable[i]>sequenceLength-1){ + w = trackTable[i]-sequenceLength; + pn = sMeanL2+i; + *pn=0; + while(w--) + if(*ps>0) + *pn+=*ps++; + *pn/=trackTable[i]-sequenceLength; + SILENCE_THRESH+=*pn; + processedTracks++; + } + ps = sNorm + trackTable[i]; + } + if(verbosity>1) + cerr << "processedTracks: " << processedTracks << endl; + + + SILENCE_THRESH/=processedTracks; + USE_THRESH=1; // Turn thresholding on + DIFF_THRESH=SILENCE_THRESH; // mean shingle power + SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE + if(verbosity>4) + cerr << "silence thresh: " << SILENCE_THRESH; + w=sequenceLength-1; + i=1; + tmp1=*qNorm; + while(w--) + *qNorm+=qNorm[i++]; + ps = qNorm+1; + w=numVectors-sequenceLength; // +1 -1 + while(w--){ + tmp2=*ps; + *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); + tmp1=tmp2; + ps++; + } + ps = qNorm; + qMeanL2 = 0; + w=numVectors-sequenceLength+1; + while(w--){ + *ps=sqrt(*ps); + qMeanL2+=*ps++; + } + qMeanL2 /= numVectors-sequenceLength+1; + + if(verbosity>1) + cerr << "done." << endl; + + + if(verbosity>1) + cerr << "matching tracks..." << endl; + + assert(pointNN>0 && pointNN<=O2_MAXNN); + assert(trackNN>0 && trackNN<=O2_MAXNN); + + // Make temporary dynamic memory for results + double trackDistances[trackNN]; + unsigned trackIDs[trackNN]; + unsigned trackQIndexes[trackNN]; + unsigned trackSIndexes[trackNN]; + + double distances[pointNN]; + unsigned qIndexes[pointNN]; + unsigned sIndexes[pointNN]; + + + unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; + double thisDist; + double oneOverWL=1.0/wL; + + for(k=0; kflags & O2_FLAG_TIMES)){ + cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; + usingTimes=0; + } + + else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) + cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; + + else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ + timesdata = new double[numVectors]; + assert(timesdata); + insertTimeStamps(numVectors, timesFile, timesdata); + // Calculate durations of points + for(k=0; k1) + cerr << "mean query file duration: " << meanQdur << endl; + meanDBdur = new double[dbH->numFiles]; + assert(meanDBdur); + for(k=0; knumFiles; k++){ + meanDBdur[k]=0.0; + for(j=0; jnumVectors || queryPoint>numVectors-wL+1) + error("queryPoint > numVectors-wL+1 in query"); + else{ + if(verbosity>1) + cerr << "query point: " << queryPoint << endl; cerr.flush(); + query=query+queryPoint*dbH->dim; + qNorm=qNorm+queryPoint; + numVectors=wL; + } + + double ** D = 0; // Differences query and target + double ** DD = 0; // Matched filter distance + + D = new double*[numVectors]; + assert(D); + DD = new double*[numVectors]; + assert(DD); + + gettimeofday(&tv1, NULL); + processedTracks=0; + unsigned successfulTracks=0; + + double* qp; + double* sp; + double* dp; + double diffL2; + + // build track offset table + unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; + unsigned cumTrack=0; + unsigned trackIndexOffset; + for(k=0; knumFiles;k++){ + trackOffsetTable[k]=cumTrack; + cumTrack+=trackTable[k]*dbH->dim; + } + + char nextKey [MAXSTR]; + + // chi^2 statistics + double sampleCount = 0; + double sampleSum = 0; + double logSampleSum = 0; + double minSample = 1e9; + double maxSample = 0; + + // Track loop + for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ + + // get trackID from file if using a control file + if(trackFile){ + if(!trackFile->eof()){ + trackFile->getline(nextKey,MAXSTR); + track=getKeyPos(nextKey); + } + else + break; + } + + trackOffset=trackOffsetTable[track]; // numDoubles offset + trackIndexOffset=trackOffset/dbH->dim; // numVectors offset + + if(sequenceLength7) + cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); + + // Sum products matrix + for(j=0; jdim; + sp=dataBuf+trackOffset+k*dbH->dim; + DD[j][k]=0.0; // Initialize matched filter array + dp=&D[j][k]; // point to correlation cell j,k + *dp=0.0; // initialize correlation cell + l=dbH->dim; // size of vectors + while(l--) + *dp+=*qp++**sp++; + } + + // Matched Filter + // HOP SIZE == 1 + double* spd; + if(HOP_SIZE==1){ // HOP_SIZE = shingleHop + for(w=0; w3 && usingTimes){ + cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl; + cerr.flush(); + } + + if(!usingTimes || + (usingTimes + && fabs(meanDBdur[track]-meanQdur)3 && usingTimes){ + cerr << "within duration tolerance." << endl; + cerr.flush(); + } + + // Search for minimum distance by shingles (concatenated vectors) + for(j=0;j10) + cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl; + // Gather chi^2 statistics + if(thisDistmaxSample) + maxSample=thisDist; + if(thisDist>1e-9){ + sampleCount++; + sampleSum+=thisDist; + logSampleSum+=log(thisDist); + } + + // diffL2 = fabs(qNorm[j] - sNorm[trackIndexOffset+k]); + // Power test + if(!USE_THRESH || + // Threshold on mean L2 of Q and S sequences + (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH && + // Are both query and target windows above mean energy? + (qNorm[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // && diffL2 < DIFF_THRESH ))) + thisDist=thisDist; // Computed above + else + thisDist=1000000.0; + if(thisDist>=0 && thisDist<=radius){ + distances[0]++; // increment count + break; // only need one track point per query point + } + } + // How many points were below threshold ? + thisDist=distances[0]; + + // Let's see the distances then... + if(verbosity>3) + cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl; + + // All the track stuff goes here + n=trackNN; + while(n--){ + if(thisDist>trackDistances[n]){ + if((n==0 || thisDist<=trackDistances[n-1])){ + // Copy all values above up the queue + for( l=trackNN-1 ; l > n ; l--){ + trackDistances[l]=trackDistances[l-1]; + trackQIndexes[l]=trackQIndexes[l-1]; + trackSIndexes[l]=trackSIndexes[l-1]; + trackIDs[l]=trackIDs[l-1]; + } + trackDistances[n]=thisDist; + trackQIndexes[n]=qIndexes[0]; + trackSIndexes[n]=sIndexes[0]; + successfulTracks++; + trackIDs[n]=track; + break; + } + } + else + break; + } + } // Duration match + + // Clean up current track + if(D!=NULL){ + for(j=0; j1){ + cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" + << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; + cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum + << " minSample: " << minSample << " maxSample: " << maxSample << endl; + } + + if(adbQueryResult==0){ + if(verbosity>1) + cerr<__sizeRlist=listLen; + adbQueryResult->__sizeDist=listLen; + adbQueryResult->__sizeQpos=listLen; + adbQueryResult->__sizeSpos=listLen; + adbQueryResult->Rlist= new char*[listLen]; + adbQueryResult->Dist = new double[listLen]; + adbQueryResult->Qpos = new int[listLen]; + adbQueryResult->Spos = new int[listLen]; + for(k=0; k__sizeRlist; k++){ + adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; + adbQueryResult->Dist[k]=trackDistances[k]; + adbQueryResult->Qpos[k]=trackQIndexes[k]; + adbQueryResult->Spos[k]=trackSIndexes[k]; + sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); + } + } + + + // Clean up + if(trackOffsetTable) + delete[] trackOffsetTable; + if(queryCopy) + delete[] queryCopy; + //if(qNorm) + //delete qNorm; + if(D) + delete[] D; + if(DD) + delete[] DD; + if(timesdata) + delete[] timesdata; + if(meanDBdur) + delete[] meanDBdur; } @@ -1872,15 +2405,17 @@ L2+=*p**p; p++; } - L2=sqrt(L2); + /* L2=sqrt(L2);*/ if(qNorm) *qNorm++=L2; + /* oneOverL2 = 1.0/L2; d=dim; while(d--){ *X*=oneOverL2; X++; - } + */ + X+=dim; } if(verbosity>2) cerr << "done..." << endl; @@ -1913,13 +2448,16 @@ *l2ptr+=*p**p; p++; } - *l2ptr=sqrt(*l2ptr); - oneOverL2 = 1.0/(*l2ptr++); - d=dim; - while(d--){ + l2ptr++; + /* + oneOverL2 = 1.0/(*l2ptr++); + d=dim; + while(d--){ *X*=oneOverL2; X++; - } + } + */ + X+=dim; } unsigned offset; if(append) @@ -1928,7 +2466,7 @@ offset=0; memcpy(l2normTable+offset, l2buf, n*sizeof(double)); if(l2buf) - delete l2buf; + delete[] l2buf; if(verbosity>2) cerr << "done..." << endl; } @@ -1970,8 +2508,8 @@ // SERVER SIDE int adb__status(struct soap* soap, xsd__string dbName, xsd__int &adbCreateResult){ - char* const argv[]={"audioDB",COM_STATUS,dbName}; - const unsigned argc = 3; + char* const argv[]={"audioDB",COM_STATUS,"-d",dbName}; + const unsigned argc = 4; audioDB(argc,argv); adbCreateResult=100; return SOAP_OK; @@ -1979,7 +2517,7 @@ // Literal translation of command line to web service -int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int segNN, xsd__int seqLen, adb__queryResult &adbQueryResult){ +int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int trackNN, xsd__int seqLen, adb__queryResult &adbQueryResult){ char queryType[256]; for(int k=0; k<256; k++) queryType[k]='\0'; @@ -1987,15 +2525,15 @@ strncpy(queryType, "point", strlen("point")); else if (qType == O2_FLAG_SEQUENCE_QUERY) strncpy(queryType, "sequence", strlen("sequence")); - else if(qType == O2_FLAG_SEG_QUERY) - strncpy(queryType,"segment", strlen("segment")); + else if(qType == O2_FLAG_TRACK_QUERY) + strncpy(queryType,"track", strlen("track")); else strncpy(queryType, "", strlen("")); if(pointNN==0) pointNN=10; - if(segNN==0) - segNN=10; + if(trackNN==0) + trackNN=10; if(seqLen==0) seqLen=16; @@ -2003,8 +2541,8 @@ sprintf(qPosStr, "%d", qPos); char pointNNStr[256]; sprintf(pointNNStr,"%d",pointNN); - char segNNStr[256]; - sprintf(segNNStr,"%d",segNN); + char trackNNStr[256]; + sprintf(trackNNStr,"%d",trackNN); char seqLenStr[256]; sprintf(seqLenStr,"%d",seqLen); @@ -2024,8 +2562,8 @@ qPosStr, COM_POINTNN, pointNNStr, - COM_SEGNN, - segNNStr, // Need to pass a parameter + COM_TRACKNN, + trackNNStr, // Need to pass a parameter COM_SEQLEN, seqLenStr }; @@ -2038,5 +2576,3 @@ int main(const unsigned argc, char* const argv[]){ audioDB(argc, argv); } - - diff -r 4346f4b717e4 -r 42498552b30c audioDB.h --- a/audioDB.h Thu Aug 02 11:40:22 2007 +0000 +++ b/audioDB.h Thu Aug 16 10:04:55 2007 +0000 @@ -16,7 +16,7 @@ -d, --database=filename database name to be used with database commands -N, --new make a new database -S, --status database information - -D, --dump list all segments: index key size + -D, --dump list all tracks: index key size Database Insertion: The following commands process a binary input feature file and optional @@ -46,12 +46,12 @@ database using the named feature vector file as a query -q, --qtype=type the type of search (possible values="point", - "segment", "sequence" default=`sequence') + "track", "sequence" default=`sequence') -p, --qpoint=position ordinal position of query vector (or start of sequence) in feature vector input file (default=`0') -n, --pointnn=numpoints number of point nearest neighbours to use [per - segment in segment and sequence mode] + track in track and sequence mode] (default=`10') -r, --resultlength=length maximum length of the result list (default=`10') @@ -107,7 +107,7 @@ #define COM_SEQLEN "--sequencelength" #define COM_SEQHOP "--sequencehop" #define COM_POINTNN "--pointnn" -#define COM_SEGNN "--resultlength" +#define COM_TRACKNN "--resultlength" #define COM_QPOINT "--qpoint" #define COM_FEATURES "--features" #define COM_QUERYKEY "--key" @@ -117,7 +117,7 @@ #define O2_MAGIC 1111765583 // 'B'<<24|'D'<<16|'2'<<8|'O' reads O2DB in little endian order #define O2_DEFAULT_POINTNN (10U) -#define O2_DEFAULT_SEGNN (10U) +#define O2_DEFAULT_TRACKNN (10U) #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size //#define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size @@ -126,18 +126,18 @@ #define O2_MAXFILES (10000U) // 10,000 files #define O2_MAXFILESTR (256U) #define O2_FILETABLESIZE (O2_MAXFILESTR) -#define O2_SEGTABLESIZE (sizeof(unsigned)) +#define O2_TRACKTABLESIZE (sizeof(unsigned)) #define O2_HEADERSIZE (sizeof(dbTableHeaderT)) #define O2_MEANNUMVECTORS (1000U) #define O2_MAXDIM (1000U) -#define O2_MAXNN (1000U) +#define O2_MAXNN (10000U) // Flags #define O2_FLAG_L2NORM (0x1U) #define O2_FLAG_MINMAX (0x2U) #define O2_FLAG_POINT_QUERY (0x4U) #define O2_FLAG_SEQUENCE_QUERY (0x8U) -#define O2_FLAG_SEG_QUERY (0x10U) +#define O2_FLAG_TRACK_QUERY (0x10U) #define O2_FLAG_TIMES (0x20U) // Error Codes @@ -167,8 +167,8 @@ const char *inFile; const char *hostport; const char *key; - const char* segFileName; - ifstream *segFile; + const char* trackFileName; + ifstream *trackFile; const char *command; const char *timesFileName; ifstream *timesFile; @@ -180,13 +180,13 @@ struct stat statbuf; dbTableHeaderPtr dbH; size_t fileTableOffset; - size_t segTableOffset; + size_t trackTableOffset; size_t dataoffset; size_t l2normTableOffset; size_t timesTableOffset; char *fileTable; - unsigned* segTable; + unsigned* trackTable; double* dataBuf; double* inBuf; double* l2normTable; @@ -198,7 +198,7 @@ unsigned verbosity; // how much do we want to know? unsigned queryType; // point queries default unsigned pointNN; // how many point NNs ? - unsigned segNN; // how many seg NNs ? + unsigned trackNN; // how many track NNs ? unsigned sequenceLength; unsigned sequenceHop; unsigned queryPoint; @@ -208,24 +208,20 @@ unsigned isServer; unsigned port; double timesTol; - unsigned ignoreCols; - + double radius; + // Timers struct timeval tv1; struct timeval tv2; - - - // private methods void error(const char* a, const char* b = ""); void pointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); - void sequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); - void segPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); - void segSequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); + void trackPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); + void trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); + void trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); void initTables(const char* dbName, const char* inFile); - void NBestMatchedFilter(); void unitNorm(double* X, unsigned d, unsigned n, double* qNorm); void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append); void normalize(double* X, int dim, int n); @@ -244,12 +240,12 @@ void query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); void status(const char* dbName); void ws_status(const char*dbName, char* hostport); - void ws_query(const char*dbName, const char *segKey, const char* hostport); + void ws_query(const char*dbName, const char *trackKey, const char* hostport); void l2norm(const char* dbName); void dump(const char* dbName); - void deleteDB(const char* dbName, const char* inFile); // web services void startServer(); }; + diff -r 4346f4b717e4 -r 42498552b30c audioDBws.h --- a/audioDBws.h Thu Aug 02 11:40:22 2007 +0000 +++ b/audioDBws.h Thu Aug 16 10:04:55 2007 +0000 @@ -1,7 +1,11 @@ // audioDBws.h -- web services interface to audioDB // +//FIXME: this hard-coding of the service location might be right for +//its internal use at Goldsmiths (for now) but really isn't in +//general. Find a way to bind this later (at install time? Or maybe +//just require that the installer edit the resulting wsdl file?) // -// +//gsoap adb service location: http://gibbons.doc.gold.ac.uk:20703/ typedef int xsd__int; typedef double xsd__double; diff -r 4346f4b717e4 -r 42498552b30c docs/TODO.txt --- a/docs/TODO.txt Thu Aug 02 11:40:22 2007 +0000 +++ b/docs/TODO.txt Thu Aug 16 10:04:55 2007 +0000 @@ -2,13 +2,12 @@ audioDB FIXME: o fix segfault when query is zero-length -o use periodic memunmap on batch insert +:-) DONE use periodic memunmap on batch insert o allow keys to be passed as queries -o rename 'segments' in help to 'files' or 'keys' ? +:-) DONE rename 'segments' to 'tracks' in code and help files. o test suite o SOAP to serialize queryFile and keyList o SOAP to serialize files on insert / batch insert ? -M. Casey - 24/7/7 +M. Casey 13/08/07 - diff -r 4346f4b717e4 -r 42498552b30c gengetopt.in --- a/gengetopt.in Thu Aug 02 11:40:22 2007 +0000 +++ b/gengetopt.in Thu Aug 16 10:04:55 2007 +0000 @@ -19,13 +19,13 @@ # -T --timePointsList.txt # -B --BATCHINSERT dbName # -# -Q --QUERY {point|segment|sequence} +# -Q --QUERY {point|track|sequence} # -p --qpoint - ordinal position of query vector in feature input file -# -n --pointnn n - number of nearest neigbour points per segment -# -r --resultlength n - number of segments (nearest neighbours) to return +# -n --pointnn n - number of nearest neigbour points per track +# -r --resultlength n - number of tracks (nearest neighbours) to return # -l --seqlen len - length of sequence # -h --seqhop hop - hop size of sequence -# -R --radius - radius-based search, return all points/segments/sequences < radius (0...Inf) +# -R --radius - radius-based search, return all points/tracks/sequences < radius (0...Inf) # -x --x - time expand (compress) factor (ratio of result length to query length (and visa-versa)) [1..Inf] # -o --rotate - rotate the query feature vectors on search # @@ -54,18 +54,18 @@ option "key" k "unique identifier associated with features." string typestr="identifier" dependon="features" optional text "" option "BATCHINSERT" B "add feature vectors named in a --featureList file (with optional keys in a --keyList file) to the named database." dependon="featureList" optional -option "featureList" F "text file containing list of binary feature vector files to process" string typestr="filename" dependon="database" optional +option "featureList" F "text file containing list of binary feature vector files to process, one per track" string typestr="filename" dependon="database" optional option "timesList" T "text file containing list of ascii --times for each --features file in --featureList." string typestr="filename" dependon="featureList" optional option "keyList" K "text file containing list of unique identifiers associated with --features." string typestr="filename" optional section "Database Search" sectiondesc="Thse commands control the retrieval behaviour.\n" -option "QUERY" Q "content-based search on --database using --features as a query. Optionally restrict the search to those segments identified in a --keyList." values="point","segment","sequence" typestr="searchtype" dependon="database" dependon="features" optional +option "QUERY" Q "content-based search on --database using --features as a query. Optionally restrict the search to those tracks identified in a --keyList." values="point","track","sequence" typestr="searchtype" dependon="database" dependon="features" optional option "qpoint" p "ordinal position of query start point in --features file." int typestr="position" default="0" optional option "exhaustive" e "exhaustive search: iterate through all query vectors in search. Overrides --qpoint." flag off optional hidden option "pointnn" n "number of point nearest neighbours to use in retrieval." int typestr="numpoints" default="10" optional -option "radius" R "radius search, returns all points/segments/sequences inside given radius." double default="1.0" optional hidden +option "radius" R "radius search, returns all points/tracks/sequences inside given radius. (Overrides --pointnn)." double default="1.0" optional option "expandfactor" x "time compress/expand factor of result length to query length [1.0 .. 100.0]." double default="1.1" optional hidden option "rotate" o "rotate query vectors for rotationally invariant search." flag off optional hidden option "resultlength" r "maximum length of the result list." int typestr="length" default="10" optional