Mercurial > hg > audiodb
comparison audioDB.cpp @ 18:999c9c216565
minor revisions: mainly tidying up naming conventions (segment->track)
author | mas01mc |
---|---|
date | Mon, 13 Aug 2007 19:14:33 +0000 |
parents | 6d899df0cfe4 |
children | 0519fc406b29 |
comparison
equal
deleted
inserted
replaced
17:6d899df0cfe4 | 18:999c9c216565 |
---|---|
48 Database Search: | 48 Database Search: |
49 Thse commands control the retrieval behaviour. | 49 Thse commands control the retrieval behaviour. |
50 | 50 |
51 -Q, --QUERY=searchtype content-based search on --database using | 51 -Q, --QUERY=searchtype content-based search on --database using |
52 --features as a query. Optionally restrict the | 52 --features as a query. Optionally restrict the |
53 search to those segments identified in a | 53 search to those tracks identified in a |
54 --keyList. (possible values="point", | 54 --keyList. (possible values="point", |
55 "segment", "sequence") | 55 "track", "sequence") |
56 -p, --qpoint=position ordinal position of query start point in | 56 -p, --qpoint=position ordinal position of query start point in |
57 --features file. (default=`0') | 57 --features file. (default=`0') |
58 -e, --exhaustive exhaustive search: iterate through all query | 58 -e, --exhaustive exhaustive search: iterate through all query |
59 vectors in search. Overrides --qpoint. | 59 vectors in search. Overrides --qpoint. |
60 (default=off) | 60 (default=off) |
61 -n, --pointnn=numpoints number of point nearest neighbours to use in | 61 -n, --pointnn=numpoints number of point nearest neighbours to use in |
62 retrieval. (default=`10') | 62 retrieval. (default=`10') |
63 -R, --radius=DOUBLE radius search, returns all | 63 -R, --radius=DOUBLE radius search, returns all |
64 points/segments/sequences inside given radius. | 64 points/tracks/sequences inside given radius. |
65 (default=`1.0') | 65 (default=`1.0') |
66 -x, --expandfactor=DOUBLE time compress/expand factor of result length to | 66 -x, --expandfactor=DOUBLE time compress/expand factor of result length to |
67 query length [1.0 .. 100.0]. (default=`1.1') | 67 query length [1.0 .. 100.0]. (default=`1.1') |
68 -o, --rotate rotate query vectors for rotationally invariant | 68 -o, --rotate rotate query vectors for rotationally invariant |
69 search. (default=off) | 69 search. (default=off) |
105 audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult): | 105 audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult): |
106 dim(0), | 106 dim(0), |
107 dbName(0), | 107 dbName(0), |
108 inFile(0), | 108 inFile(0), |
109 key(0), | 109 key(0), |
110 segFile(0), | 110 trackFile(0), |
111 segFileName(0), | 111 trackFileName(0), |
112 timesFile(0), | 112 timesFile(0), |
113 timesFileName(0), | 113 timesFileName(0), |
114 usingTimes(0), | 114 usingTimes(0), |
115 command(0), | 115 command(0), |
116 dbfid(0), | 116 dbfid(0), |
119 infid(0), | 119 infid(0), |
120 indata(0), | 120 indata(0), |
121 queryType(O2_FLAG_POINT_QUERY), | 121 queryType(O2_FLAG_POINT_QUERY), |
122 verbosity(1), | 122 verbosity(1), |
123 pointNN(O2_DEFAULT_POINTNN), | 123 pointNN(O2_DEFAULT_POINTNN), |
124 segNN(O2_DEFAULT_SEGNN), | 124 trackNN(O2_DEFAULT_TRACKNN), |
125 segTable(0), | 125 trackTable(0), |
126 fileTable(0), | 126 fileTable(0), |
127 dataBuf(0), | 127 dataBuf(0), |
128 l2normTable(0), | 128 l2normTable(0), |
129 timesTable(0), | 129 timesTable(0), |
130 qNorm(0), | 130 qNorm(0), |
302 if(args_info.keyList_given) | 302 if(args_info.keyList_given) |
303 key=args_info.keyList_arg; // INCONSISTENT NO CHECK | 303 key=args_info.keyList_arg; // INCONSISTENT NO CHECK |
304 | 304 |
305 /* TO DO: REPLACE WITH | 305 /* TO DO: REPLACE WITH |
306 if(args_info.keyList_given){ | 306 if(args_info.keyList_given){ |
307 segFileName=args_info.keyList_arg; | 307 trackFileName=args_info.keyList_arg; |
308 if(strlen(segFileName)>0 && !(segFile = new ifstream(segFileName,ios::in))) | 308 if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) |
309 error("Could not open keyList file for reading",segFileName); | 309 error("Could not open keyList file for reading",trackFileName); |
310 } | 310 } |
311 AND UPDATE BATCHINSERT() | 311 AND UPDATE BATCHINSERT() |
312 */ | 312 */ |
313 | 313 |
314 if(args_info.timesList_given){ | 314 if(args_info.timesList_given){ |
327 command=COM_QUERY; | 327 command=COM_QUERY; |
328 dbName=args_info.database_arg; | 328 dbName=args_info.database_arg; |
329 inFile=args_info.features_arg; | 329 inFile=args_info.features_arg; |
330 | 330 |
331 if(args_info.keyList_given){ | 331 if(args_info.keyList_given){ |
332 segFileName=args_info.keyList_arg; | 332 trackFileName=args_info.keyList_arg; |
333 if(strlen(segFileName)>0 && !(segFile = new ifstream(segFileName,ios::in))) | 333 if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) |
334 error("Could not open keyList file for reading",segFileName); | 334 error("Could not open keyList file for reading",trackFileName); |
335 } | 335 } |
336 | 336 |
337 if(args_info.times_given){ | 337 if(args_info.times_given){ |
338 timesFileName=args_info.times_arg; | 338 timesFileName=args_info.times_arg; |
339 if(strlen(timesFileName)>0){ | 339 if(strlen(timesFileName)>0){ |
342 usingTimes=1; | 342 usingTimes=1; |
343 } | 343 } |
344 } | 344 } |
345 | 345 |
346 // query type | 346 // query type |
347 if(strncmp(args_info.QUERY_arg, "segment", MAXSTR)==0) | 347 if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0) |
348 queryType=O2_FLAG_SEG_QUERY; | 348 queryType=O2_FLAG_TRACK_QUERY; |
349 else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0) | 349 else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0) |
350 queryType=O2_FLAG_POINT_QUERY; | 350 queryType=O2_FLAG_POINT_QUERY; |
351 else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0) | 351 else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0) |
352 queryType=O2_FLAG_SEQUENCE_QUERY; | 352 queryType=O2_FLAG_SEQUENCE_QUERY; |
353 else | 353 else |
365 if(pointNN<1 || pointNN >1000) | 365 if(pointNN<1 || pointNN >1000) |
366 error("pointNN out of range: 1 <= pointNN <= 1000"); | 366 error("pointNN out of range: 1 <= pointNN <= 1000"); |
367 | 367 |
368 | 368 |
369 | 369 |
370 segNN=args_info.resultlength_arg; | 370 trackNN=args_info.resultlength_arg; |
371 if(segNN<1 || segNN >10000) | 371 if(trackNN<1 || trackNN >10000) |
372 error("resultlength out of range: 1 <= resultlength <= 1000"); | 372 error("resultlength out of range: 1 <= resultlength <= 1000"); |
373 | 373 |
374 | 374 |
375 sequenceLength=args_info.sequencelength_arg; | 375 sequenceLength=args_info.sequencelength_arg; |
376 if(sequenceLength<1 || sequenceLength >1000) | 376 if(sequenceLength<1 || sequenceLength >1000) |
393 --------------------------------------------------------------------------------- | 393 --------------------------------------------------------------------------------- |
394 | magic 4 bytes| numFiles 4 bytes | dim 4 bytes | length 4 bytes |flags 4 bytes | | 394 | magic 4 bytes| numFiles 4 bytes | dim 4 bytes | length 4 bytes |flags 4 bytes | |
395 --------------------------------------------------------------------------------- | 395 --------------------------------------------------------------------------------- |
396 | 396 |
397 | 397 |
398 keyTable : list of keys of segments | 398 keyTable : list of keys of tracks |
399 -------------------------------------------------------------------------- | 399 -------------------------------------------------------------------------- |
400 | key 256 bytes | | 400 | key 256 bytes | |
401 -------------------------------------------------------------------------- | 401 -------------------------------------------------------------------------- |
402 O2_MAXFILES*02_FILENAMELENGTH | 402 O2_MAXFILES*02_FILENAMELENGTH |
403 | 403 |
404 segTable : Maps implicit feature index to a feature vector matrix | 404 trackTable : Maps implicit feature index to a feature vector matrix |
405 -------------------------------------------------------------------------- | 405 -------------------------------------------------------------------------- |
406 | numVectors (4 bytes) | | 406 | numVectors (4 bytes) | |
407 -------------------------------------------------------------------------- | 407 -------------------------------------------------------------------------- |
408 O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(INT) | 408 O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(INT) |
409 | 409 |
488 | 488 |
489 if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT)) | 489 if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT)) |
490 error("error reading db header"); | 490 error("error reading db header"); |
491 | 491 |
492 fileTableOffset = O2_HEADERSIZE; | 492 fileTableOffset = O2_HEADERSIZE; |
493 segTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; | 493 trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; |
494 dataoffset = segTableOffset + O2_SEGTABLESIZE*O2_MAXFILES; | 494 dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES; |
495 l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); | 495 l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); |
496 timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); | 496 timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); |
497 | 497 |
498 if(dbH->magic!=O2_MAGIC){ | 498 if(dbH->magic!=O2_MAGIC){ |
499 cerr << "expected: " << O2_MAGIC << ", got:" << dbH->magic << endl; | 499 cerr << "expected: " << O2_MAGIC << ", got:" << dbH->magic << endl; |
522 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) | 522 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) |
523 error("mmap error for creating database"); | 523 error("mmap error for creating database"); |
524 | 524 |
525 // Make some handy tables with correct types | 525 // Make some handy tables with correct types |
526 fileTable= (char*)(db+fileTableOffset); | 526 fileTable= (char*)(db+fileTableOffset); |
527 segTable = (unsigned*)(db+segTableOffset); | 527 trackTable = (unsigned*)(db+trackTableOffset); |
528 dataBuf = (double*)(db+dataoffset); | 528 dataBuf = (double*)(db+dataoffset); |
529 l2normTable = (double*)(db+l2normTableOffset); | 529 l2normTable = (double*)(db+l2normTableOffset); |
530 timesTable = (double*)(db+timesTableOffset); | 530 timesTable = (double*)(db+timesTableOffset); |
531 | 531 |
532 } | 532 } |
556 if(verbosity) | 556 if(verbosity) |
557 cerr << "Warning: key already exists in database, ignoring: " <<inFile << endl; | 557 cerr << "Warning: key already exists in database, ignoring: " <<inFile << endl; |
558 return; | 558 return; |
559 } | 559 } |
560 | 560 |
561 // Make a segment index table of features to file indexes | 561 // Make a track index table of features to file indexes |
562 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 562 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); |
563 if(!numVectors){ | 563 if(!numVectors){ |
564 if(verbosity) | 564 if(verbosity) |
565 cerr << "Warning: ignoring zero-length feature vector file:" << key << endl; | 565 cerr << "Warning: ignoring zero-length feature vector file:" << key << endl; |
566 // CLEAN UP | 566 // CLEAN UP |
587 dbH->length+=(statbuf.st_size-sizeof(int)); | 587 dbH->length+=(statbuf.st_size-sizeof(int)); |
588 | 588 |
589 // Copy the header back to the database | 589 // Copy the header back to the database |
590 memcpy (db, dbH, sizeof(dbTableHeaderT)); | 590 memcpy (db, dbH, sizeof(dbTableHeaderT)); |
591 | 591 |
592 // Update segment to file index map | 592 // Update track to file index map |
593 //memcpy (db+segTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); | 593 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); |
594 memcpy (segTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); | 594 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); |
595 | 595 |
596 // Update the feature database | 596 // Update the feature database |
597 memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); | 597 memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); |
598 | 598 |
599 // Norm the vectors on input if the database is already L2 normed | 599 // Norm the vectors on input if the database is already L2 normed |
689 | 689 |
690 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | 690 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) |
691 error("Must use timestamps with timestamped database","use --times"); | 691 error("Must use timestamps with timestamped database","use --times"); |
692 | 692 |
693 fileTableOffset = O2_HEADERSIZE; | 693 fileTableOffset = O2_HEADERSIZE; |
694 segTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; | 694 trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; |
695 dataoffset = segTableOffset + O2_SEGTABLESIZE*O2_MAXFILES; | 695 dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES; |
696 l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); | 696 l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); |
697 timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); | 697 timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); |
698 | 698 |
699 if(dbH->magic!=O2_MAGIC){ | 699 if(dbH->magic!=O2_MAGIC){ |
700 cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl; | 700 cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl; |
732 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) | 732 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) |
733 error("mmap error for creating database"); | 733 error("mmap error for creating database"); |
734 | 734 |
735 // Make some handy tables with correct types | 735 // Make some handy tables with correct types |
736 fileTable= (char*)(db+fileTableOffset); | 736 fileTable= (char*)(db+fileTableOffset); |
737 segTable = (unsigned*)(db+segTableOffset); | 737 trackTable = (unsigned*)(db+trackTableOffset); |
738 dataBuf = (double*)(db+dataoffset); | 738 dataBuf = (double*)(db+dataoffset); |
739 l2normTable = (double*)(db+l2normTableOffset); | 739 l2normTable = (double*)(db+l2normTableOffset); |
740 timesTable = (double*)(db+timesTableOffset); | 740 timesTable = (double*)(db+timesTableOffset); |
741 | 741 |
742 // Check that there is room for at least 1 more file | 742 // Check that there is room for at least 1 more file |
774 if(verbosity) | 774 if(verbosity) |
775 cerr << "Warning: key already exists in database:" << thisKey << endl; | 775 cerr << "Warning: key already exists in database:" << thisKey << endl; |
776 } | 776 } |
777 else{ | 777 else{ |
778 | 778 |
779 // Make a segment index table of features to file indexes | 779 // Make a track index table of features to file indexes |
780 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 780 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); |
781 if(!numVectors){ | 781 if(!numVectors){ |
782 if(verbosity) | 782 if(verbosity) |
783 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; | 783 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; |
784 } | 784 } |
808 // Update Header information | 808 // Update Header information |
809 dbH->length+=(statbuf.st_size-sizeof(int)); | 809 dbH->length+=(statbuf.st_size-sizeof(int)); |
810 // Copy the header back to the database | 810 // Copy the header back to the database |
811 memcpy (db, dbH, sizeof(dbTableHeaderT)); | 811 memcpy (db, dbH, sizeof(dbTableHeaderT)); |
812 | 812 |
813 // Update segment to file index map | 813 // Update track to file index map |
814 //memcpy (db+segTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); | 814 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); |
815 memcpy (segTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); | 815 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); |
816 | 816 |
817 // Update the feature database | 817 // Update the feature database |
818 memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); | 818 memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); |
819 | 819 |
820 // Norm the vectors on input if the database is already L2 normed | 820 // Norm the vectors on input if the database is already L2 normed |
859 soap_destroy(&soap); | 859 soap_destroy(&soap); |
860 soap_end(&soap); | 860 soap_end(&soap); |
861 soap_done(&soap); | 861 soap_done(&soap); |
862 } | 862 } |
863 | 863 |
864 void audioDB::ws_query(const char*dbName, const char *segKey, const char* hostport){ | 864 void audioDB::ws_query(const char*dbName, const char *trackKey, const char* hostport){ |
865 struct soap soap; | 865 struct soap soap; |
866 adb__queryResult adbQueryResult; | 866 adb__queryResult adbQueryResult; |
867 | 867 |
868 soap_init(&soap); | 868 soap_init(&soap); |
869 if(soap_call_adb__query(&soap,hostport,NULL, | 869 if(soap_call_adb__query(&soap,hostport,NULL, |
870 (char*)dbName,(char*)segKey,(char*)segFileName,(char*)timesFileName, | 870 (char*)dbName,(char*)trackKey,(char*)trackFileName,(char*)timesFileName, |
871 queryType, queryPoint, pointNN, segNN, sequenceLength, adbQueryResult)==SOAP_OK){ | 871 queryType, queryPoint, pointNN, trackNN, sequenceLength, adbQueryResult)==SOAP_OK){ |
872 //std::cerr << "result list length:" << adbQueryResult.__sizeRlist << std::endl; | 872 //std::cerr << "result list length:" << adbQueryResult.__sizeRlist << std::endl; |
873 for(int i=0; i<adbQueryResult.__sizeRlist; i++) | 873 for(int i=0; i<adbQueryResult.__sizeRlist; i++) |
874 std::cout << adbQueryResult.Rlist[i] << " " << adbQueryResult.Dist[i] | 874 std::cout << adbQueryResult.Rlist[i] << " " << adbQueryResult.Dist[i] |
875 << " " << adbQueryResult.Qpos[i] << " " << adbQueryResult.Spos[i] << std::endl; | 875 << " " << adbQueryResult.Qpos[i] << " " << adbQueryResult.Spos[i] << std::endl; |
876 } | 876 } |
901 cout << "flags:" << dbH->flags << endl; | 901 cout << "flags:" << dbH->flags << endl; |
902 | 902 |
903 unsigned dudCount=0; | 903 unsigned dudCount=0; |
904 unsigned nullCount=0; | 904 unsigned nullCount=0; |
905 for(unsigned k=0; k<dbH->numFiles; k++){ | 905 for(unsigned k=0; k<dbH->numFiles; k++){ |
906 if(segTable[k]<sequenceLength){ | 906 if(trackTable[k]<sequenceLength){ |
907 dudCount++; | 907 dudCount++; |
908 if(!segTable[k]) | 908 if(!trackTable[k]) |
909 nullCount++; | 909 nullCount++; |
910 } | 910 } |
911 } | 911 } |
912 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl; | 912 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl; |
913 } | 913 } |
916 void audioDB::dump(const char* dbName){ | 916 void audioDB::dump(const char* dbName){ |
917 if(!dbH) | 917 if(!dbH) |
918 initTables(dbName,0); | 918 initTables(dbName,0); |
919 | 919 |
920 for(unsigned k=0, j=0; k<dbH->numFiles; k++){ | 920 for(unsigned k=0, j=0; k<dbH->numFiles; k++){ |
921 cout << fileTable+k*O2_FILETABLESIZE << " " << segTable[k] << endl; | 921 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; |
922 j+=segTable[k]; | 922 j+=trackTable[k]; |
923 } | 923 } |
924 | 924 |
925 status(dbName); | 925 status(dbName); |
926 } | 926 } |
927 | 927 |
943 case O2_FLAG_POINT_QUERY: | 943 case O2_FLAG_POINT_QUERY: |
944 pointQuery(dbName, inFile, adbQueryResult); | 944 pointQuery(dbName, inFile, adbQueryResult); |
945 break; | 945 break; |
946 case O2_FLAG_SEQUENCE_QUERY: | 946 case O2_FLAG_SEQUENCE_QUERY: |
947 if(radius==0) | 947 if(radius==0) |
948 segSequenceQuery(dbName, inFile, adbQueryResult); | 948 trackSequenceQuery(dbName, inFile, adbQueryResult); |
949 else | 949 else |
950 segSequenceQueryEuc(dbName, inFile, adbQueryResult); | 950 trackSequenceQueryEuc(dbName, inFile, adbQueryResult); |
951 break; | 951 break; |
952 case O2_FLAG_SEG_QUERY: | 952 case O2_FLAG_TRACK_QUERY: |
953 segPointQuery(dbName, inFile, adbQueryResult); | 953 trackPointQuery(dbName, inFile, adbQueryResult); |
954 break; | 954 break; |
955 default: | 955 default: |
956 error("unrecognized queryType in query()"); | 956 error("unrecognized queryType in query()"); |
957 | 957 |
958 } | 958 } |
1090 if(adbQueryResult==0){ | 1090 if(adbQueryResult==0){ |
1091 // Output answer | 1091 // Output answer |
1092 // Loop over nearest neighbours | 1092 // Loop over nearest neighbours |
1093 for(k=0; k < pointNN; k++){ | 1093 for(k=0; k < pointNN; k++){ |
1094 // Scan for key | 1094 // Scan for key |
1095 unsigned cumSeg=0; | 1095 unsigned cumTrack=0; |
1096 for(l=0 ; l<dbH->numFiles; l++){ | 1096 for(l=0 ; l<dbH->numFiles; l++){ |
1097 cumSeg+=segTable[l]; | 1097 cumTrack+=trackTable[l]; |
1098 if(sIndexes[k]<cumSeg){ | 1098 if(sIndexes[k]<cumTrack){ |
1099 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " " | 1099 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " " |
1100 << sIndexes[k]+segTable[l]-cumSeg << endl; | 1100 << sIndexes[k]+trackTable[l]-cumTrack << endl; |
1101 break; | 1101 break; |
1102 } | 1102 } |
1103 } | 1103 } |
1104 } | 1104 } |
1105 } | 1105 } |
1115 adbQueryResult->Spos = new int[listLen]; | 1115 adbQueryResult->Spos = new int[listLen]; |
1116 for(k=0; k<adbQueryResult->__sizeRlist; k++){ | 1116 for(k=0; k<adbQueryResult->__sizeRlist; k++){ |
1117 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; | 1117 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; |
1118 adbQueryResult->Dist[k]=distances[k]; | 1118 adbQueryResult->Dist[k]=distances[k]; |
1119 adbQueryResult->Qpos[k]=qIndexes[k]; | 1119 adbQueryResult->Qpos[k]=qIndexes[k]; |
1120 unsigned cumSeg=0; | 1120 unsigned cumTrack=0; |
1121 for(l=0 ; l<dbH->numFiles; l++){ | 1121 for(l=0 ; l<dbH->numFiles; l++){ |
1122 cumSeg+=segTable[l]; | 1122 cumTrack+=trackTable[l]; |
1123 if(sIndexes[k]<cumSeg){ | 1123 if(sIndexes[k]<cumTrack){ |
1124 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); | 1124 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); |
1125 break; | 1125 break; |
1126 } | 1126 } |
1127 } | 1127 } |
1128 adbQueryResult->Spos[k]=sIndexes[k]+segTable[l]-cumSeg; | 1128 adbQueryResult->Spos[k]=sIndexes[k]+trackTable[l]-cumTrack; |
1129 } | 1129 } |
1130 } | 1130 } |
1131 | 1131 |
1132 // Clean up | 1132 // Clean up |
1133 if(queryCopy) | 1133 if(queryCopy) |
1142 | 1142 |
1143 void audioDB::sequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ | 1143 void audioDB::sequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ |
1144 | 1144 |
1145 } | 1145 } |
1146 | 1146 |
1147 // segPointQuery | 1147 // trackPointQuery |
1148 // return the segNN closest segs to the query seg | 1148 // return the trackNN closest tracks to the query track |
1149 // uses average of pointNN points per seg | 1149 // uses average of pointNN points per track |
1150 void audioDB::segPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ | 1150 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ |
1151 initTables(dbName, inFile); | 1151 initTables(dbName, inFile); |
1152 | 1152 |
1153 // For each input vector, find the closest pointNN matching output vectors and report | 1153 // For each input vector, find the closest pointNN matching output vectors and report |
1154 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 1154 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); |
1155 unsigned numSegs = dbH->numFiles; | 1155 unsigned numTracks = dbH->numFiles; |
1156 | 1156 |
1157 double* query = (double*)(indata+sizeof(int)); | 1157 double* query = (double*)(indata+sizeof(int)); |
1158 double* data = dataBuf; | 1158 double* data = dataBuf; |
1159 double* queryCopy = 0; | 1159 double* queryCopy = 0; |
1160 | 1160 |
1167 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | 1167 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); |
1168 query = queryCopy; | 1168 query = queryCopy; |
1169 } | 1169 } |
1170 | 1170 |
1171 assert(pointNN>0 && pointNN<=O2_MAXNN); | 1171 assert(pointNN>0 && pointNN<=O2_MAXNN); |
1172 assert(segNN>0 && segNN<=O2_MAXNN); | 1172 assert(trackNN>0 && trackNN<=O2_MAXNN); |
1173 | 1173 |
1174 // Make temporary dynamic memory for results | 1174 // Make temporary dynamic memory for results |
1175 double segDistances[segNN]; | 1175 double trackDistances[trackNN]; |
1176 unsigned segIDs[segNN]; | 1176 unsigned trackIDs[trackNN]; |
1177 unsigned segQIndexes[segNN]; | 1177 unsigned trackQIndexes[trackNN]; |
1178 unsigned segSIndexes[segNN]; | 1178 unsigned trackSIndexes[trackNN]; |
1179 | 1179 |
1180 double distances[pointNN]; | 1180 double distances[pointNN]; |
1181 unsigned qIndexes[pointNN]; | 1181 unsigned qIndexes[pointNN]; |
1182 unsigned sIndexes[pointNN]; | 1182 unsigned sIndexes[pointNN]; |
1183 | 1183 |
1184 unsigned j=numVectors; // number of query points | 1184 unsigned j=numVectors; // number of query points |
1185 unsigned k,l,n, seg, segOffset=0, processedSegs=0; | 1185 unsigned k,l,n, track, trackOffset=0, processedTracks=0; |
1186 double thisDist; | 1186 double thisDist; |
1187 | 1187 |
1188 for(k=0; k<pointNN; k++){ | 1188 for(k=0; k<pointNN; k++){ |
1189 distances[k]=0.0; | 1189 distances[k]=0.0; |
1190 qIndexes[k]=~0; | 1190 qIndexes[k]=~0; |
1191 sIndexes[k]=~0; | 1191 sIndexes[k]=~0; |
1192 } | 1192 } |
1193 | 1193 |
1194 for(k=0; k<segNN; k++){ | 1194 for(k=0; k<trackNN; k++){ |
1195 segDistances[k]=0.0; | 1195 trackDistances[k]=0.0; |
1196 segQIndexes[k]=~0; | 1196 trackQIndexes[k]=~0; |
1197 segSIndexes[k]=~0; | 1197 trackSIndexes[k]=~0; |
1198 segIDs[k]=~0; | 1198 trackIDs[k]=~0; |
1199 } | 1199 } |
1200 | 1200 |
1201 double meanQdur = 0; | 1201 double meanQdur = 0; |
1202 double* timesdata = 0; | 1202 double* timesdata = 0; |
1203 double* meanDBdur = 0; | 1203 double* meanDBdur = 0; |
1220 } | 1220 } |
1221 meanQdur/=k; | 1221 meanQdur/=k; |
1222 meanDBdur = new double[dbH->numFiles]; | 1222 meanDBdur = new double[dbH->numFiles]; |
1223 for(k=0; k<dbH->numFiles; k++){ | 1223 for(k=0; k<dbH->numFiles; k++){ |
1224 meanDBdur[k]=0.0; | 1224 meanDBdur[k]=0.0; |
1225 for(j=0; j<segTable[k]-1 ; j++) | 1225 for(j=0; j<trackTable[k]-1 ; j++) |
1226 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; | 1226 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; |
1227 meanDBdur[k]/=j; | 1227 meanDBdur[k]/=j; |
1228 } | 1228 } |
1229 } | 1229 } |
1230 | 1230 |
1236 cerr << "query point: " << queryPoint << endl; cerr.flush(); | 1236 cerr << "query point: " << queryPoint << endl; cerr.flush(); |
1237 query=query+queryPoint*dbH->dim; | 1237 query=query+queryPoint*dbH->dim; |
1238 numVectors=queryPoint+1; | 1238 numVectors=queryPoint+1; |
1239 } | 1239 } |
1240 | 1240 |
1241 // build segment offset table | 1241 // build track offset table |
1242 unsigned *segOffsetTable = new unsigned[dbH->numFiles]; | 1242 unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; |
1243 unsigned cumSeg=0; | 1243 unsigned cumTrack=0; |
1244 unsigned segIndexOffset; | 1244 unsigned trackIndexOffset; |
1245 for(k=0; k<dbH->numFiles;k++){ | 1245 for(k=0; k<dbH->numFiles;k++){ |
1246 segOffsetTable[k]=cumSeg; | 1246 trackOffsetTable[k]=cumTrack; |
1247 cumSeg+=segTable[k]*dbH->dim; | 1247 cumTrack+=trackTable[k]*dbH->dim; |
1248 } | 1248 } |
1249 | 1249 |
1250 char nextKey[MAXSTR]; | 1250 char nextKey[MAXSTR]; |
1251 | 1251 |
1252 gettimeofday(&tv1, NULL); | 1252 gettimeofday(&tv1, NULL); |
1253 | 1253 |
1254 for(processedSegs=0, seg=0 ; processedSegs < dbH->numFiles ; seg++, processedSegs++){ | 1254 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ |
1255 if(segFile){ | 1255 if(trackFile){ |
1256 if(!segFile->eof()){ | 1256 if(!trackFile->eof()){ |
1257 segFile->getline(nextKey,MAXSTR); | 1257 trackFile->getline(nextKey,MAXSTR); |
1258 seg=getKeyPos(nextKey); | 1258 track=getKeyPos(nextKey); |
1259 } | 1259 } |
1260 else | 1260 else |
1261 break; | 1261 break; |
1262 } | 1262 } |
1263 segOffset=segOffsetTable[seg]; // numDoubles offset | 1263 trackOffset=trackOffsetTable[track]; // numDoubles offset |
1264 segIndexOffset=segOffset/dbH->dim; // numVectors offset | 1264 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset |
1265 if(verbosity>7) | 1265 if(verbosity>7) |
1266 cerr << seg << "." << segOffset/(dbH->dim) << "." << segTable[seg] << " | ";cerr.flush(); | 1266 cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush(); |
1267 | 1267 |
1268 if(dbH->flags & O2_FLAG_L2NORM) | 1268 if(dbH->flags & O2_FLAG_L2NORM) |
1269 usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy; | 1269 usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy; |
1270 else | 1270 else |
1271 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); | 1271 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); |
1272 if(usingQueryPoint) | 1272 if(usingQueryPoint) |
1273 j=1; | 1273 j=1; |
1274 else | 1274 else |
1275 j=numVectors; | 1275 j=numVectors; |
1276 while(j--){ | 1276 while(j--){ |
1277 k=segTable[seg]; // number of vectors in seg | 1277 k=trackTable[track]; // number of vectors in track |
1278 data=dataBuf+segOffset; // data for seg | 1278 data=dataBuf+trackOffset; // data for track |
1279 while(k--){ | 1279 while(k--){ |
1280 thisDist=0; | 1280 thisDist=0; |
1281 l=dbH->dim; | 1281 l=dbH->dim; |
1282 double* q=query; | 1282 double* q=query; |
1283 while(l--) | 1283 while(l--) |
1284 thisDist+=*q++**data++; | 1284 thisDist+=*q++**data++; |
1285 if(!usingTimes || | 1285 if(!usingTimes || |
1286 (usingTimes | 1286 (usingTimes |
1287 && fabs(meanDBdur[seg]-meanQdur)<meanQdur*timesTol)){ | 1287 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){ |
1288 n=pointNN; | 1288 n=pointNN; |
1289 while(n--){ | 1289 while(n--){ |
1290 if(thisDist>=distances[n]){ | 1290 if(thisDist>=distances[n]){ |
1291 if((n==0 || thisDist<=distances[n-1])){ | 1291 if((n==0 || thisDist<=distances[n-1])){ |
1292 // Copy all values above up the queue | 1292 // Copy all values above up the queue |
1295 qIndexes[l]=qIndexes[l-1]; | 1295 qIndexes[l]=qIndexes[l-1]; |
1296 sIndexes[l]=sIndexes[l-1]; | 1296 sIndexes[l]=sIndexes[l-1]; |
1297 } | 1297 } |
1298 distances[n]=thisDist; | 1298 distances[n]=thisDist; |
1299 qIndexes[n]=numVectors-j-1; | 1299 qIndexes[n]=numVectors-j-1; |
1300 sIndexes[n]=segTable[seg]-k-1; | 1300 sIndexes[n]=trackTable[track]-k-1; |
1301 break; | 1301 break; |
1302 } | 1302 } |
1303 } | 1303 } |
1304 else | 1304 else |
1305 break; | 1305 break; |
1306 } | 1306 } |
1307 } | 1307 } |
1308 } // seg | 1308 } // track |
1309 // Move query pointer to next query point | 1309 // Move query pointer to next query point |
1310 query+=dbH->dim; | 1310 query+=dbH->dim; |
1311 } // query | 1311 } // query |
1312 // Take the average of this seg's distance | 1312 // Take the average of this track's distance |
1313 // Test the seg distances | 1313 // Test the track distances |
1314 thisDist=0; | 1314 thisDist=0; |
1315 n=pointNN; | 1315 n=pointNN; |
1316 while(n--) | 1316 while(n--) |
1317 thisDist+=distances[pointNN-n-1]; | 1317 thisDist+=distances[pointNN-n-1]; |
1318 thisDist/=pointNN; | 1318 thisDist/=pointNN; |
1319 n=segNN; | 1319 n=trackNN; |
1320 while(n--){ | 1320 while(n--){ |
1321 if(thisDist>=segDistances[n]){ | 1321 if(thisDist>=trackDistances[n]){ |
1322 if((n==0 || thisDist<=segDistances[n-1])){ | 1322 if((n==0 || thisDist<=trackDistances[n-1])){ |
1323 // Copy all values above up the queue | 1323 // Copy all values above up the queue |
1324 for( l=pointNN-1 ; l > n ; l--){ | 1324 for( l=pointNN-1 ; l > n ; l--){ |
1325 segDistances[l]=segDistances[l-1]; | 1325 trackDistances[l]=trackDistances[l-1]; |
1326 segQIndexes[l]=segQIndexes[l-1]; | 1326 trackQIndexes[l]=trackQIndexes[l-1]; |
1327 segSIndexes[l]=segSIndexes[l-1]; | 1327 trackSIndexes[l]=trackSIndexes[l-1]; |
1328 segIDs[l]=segIDs[l-1]; | 1328 trackIDs[l]=trackIDs[l-1]; |
1329 } | 1329 } |
1330 segDistances[n]=thisDist; | 1330 trackDistances[n]=thisDist; |
1331 segQIndexes[n]=qIndexes[0]; | 1331 trackQIndexes[n]=qIndexes[0]; |
1332 segSIndexes[n]=sIndexes[0]; | 1332 trackSIndexes[n]=sIndexes[0]; |
1333 segIDs[n]=seg; | 1333 trackIDs[n]=track; |
1334 break; | 1334 break; |
1335 } | 1335 } |
1336 } | 1336 } |
1337 else | 1337 else |
1338 break; | 1338 break; |
1340 for(unsigned k=0; k<pointNN; k++){ | 1340 for(unsigned k=0; k<pointNN; k++){ |
1341 distances[k]=0.0; | 1341 distances[k]=0.0; |
1342 qIndexes[k]=~0; | 1342 qIndexes[k]=~0; |
1343 sIndexes[k]=~0; | 1343 sIndexes[k]=~0; |
1344 } | 1344 } |
1345 } // segs | 1345 } // tracks |
1346 gettimeofday(&tv2, NULL); | 1346 gettimeofday(&tv2, NULL); |
1347 | 1347 |
1348 if(verbosity>1) | 1348 if(verbosity>1) |
1349 cerr << endl << "processed segs :" << processedSegs | 1349 cerr << endl << "processed tracks :" << processedTracks |
1350 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | 1350 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; |
1351 | 1351 |
1352 if(adbQueryResult==0){ | 1352 if(adbQueryResult==0){ |
1353 if(verbosity>1) | 1353 if(verbosity>1) |
1354 cerr<<endl; | 1354 cerr<<endl; |
1355 // Output answer | 1355 // Output answer |
1356 // Loop over nearest neighbours | 1356 // Loop over nearest neighbours |
1357 for(k=0; k < min(segNN,processedSegs); k++) | 1357 for(k=0; k < min(trackNN,processedTracks); k++) |
1358 cout << fileTable+segIDs[k]*O2_FILETABLESIZE | 1358 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE |
1359 << " " << segDistances[k] << " " << segQIndexes[k] << " " << segSIndexes[k] << endl; | 1359 << " " << trackDistances[k] << " " << trackQIndexes[k] << " " << trackSIndexes[k] << endl; |
1360 } | 1360 } |
1361 else{ // Process Web Services Query | 1361 else{ // Process Web Services Query |
1362 int listLen = min(segNN, processedSegs); | 1362 int listLen = min(trackNN, processedTracks); |
1363 adbQueryResult->__sizeRlist=listLen; | 1363 adbQueryResult->__sizeRlist=listLen; |
1364 adbQueryResult->__sizeDist=listLen; | 1364 adbQueryResult->__sizeDist=listLen; |
1365 adbQueryResult->__sizeQpos=listLen; | 1365 adbQueryResult->__sizeQpos=listLen; |
1366 adbQueryResult->__sizeSpos=listLen; | 1366 adbQueryResult->__sizeSpos=listLen; |
1367 adbQueryResult->Rlist= new char*[listLen]; | 1367 adbQueryResult->Rlist= new char*[listLen]; |
1368 adbQueryResult->Dist = new double[listLen]; | 1368 adbQueryResult->Dist = new double[listLen]; |
1369 adbQueryResult->Qpos = new int[listLen]; | 1369 adbQueryResult->Qpos = new int[listLen]; |
1370 adbQueryResult->Spos = new int[listLen]; | 1370 adbQueryResult->Spos = new int[listLen]; |
1371 for(k=0; k<adbQueryResult->__sizeRlist; k++){ | 1371 for(k=0; k<adbQueryResult->__sizeRlist; k++){ |
1372 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; | 1372 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; |
1373 adbQueryResult->Dist[k]=segDistances[k]; | 1373 adbQueryResult->Dist[k]=trackDistances[k]; |
1374 adbQueryResult->Qpos[k]=segQIndexes[k]; | 1374 adbQueryResult->Qpos[k]=trackQIndexes[k]; |
1375 adbQueryResult->Spos[k]=segSIndexes[k]; | 1375 adbQueryResult->Spos[k]=trackSIndexes[k]; |
1376 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+segIDs[k]*O2_FILETABLESIZE); | 1376 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); |
1377 } | 1377 } |
1378 } | 1378 } |
1379 | 1379 |
1380 | 1380 |
1381 // Clean up | 1381 // Clean up |
1382 if(segOffsetTable) | 1382 if(trackOffsetTable) |
1383 delete segOffsetTable; | 1383 delete trackOffsetTable; |
1384 if(queryCopy) | 1384 if(queryCopy) |
1385 delete queryCopy; | 1385 delete queryCopy; |
1386 if(qNorm) | 1386 if(qNorm) |
1387 delete qNorm; | 1387 delete qNorm; |
1388 if(timesdata) | 1388 if(timesdata) |
1394 | 1394 |
1395 void audioDB::deleteDB(const char* dbName, const char* inFile){ | 1395 void audioDB::deleteDB(const char* dbName, const char* inFile){ |
1396 | 1396 |
1397 } | 1397 } |
1398 | 1398 |
1399 // NBest matched filter distance between query and target segs | 1399 // NBest matched filter distance between query and target tracks |
1400 // efficient implementation | 1400 // efficient implementation |
1401 // outputs average of N minimum matched filter distances | 1401 // outputs average of N minimum matched filter distances |
1402 void audioDB::segSequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ | 1402 void audioDB::trackSequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ |
1403 | 1403 |
1404 initTables(dbName, inFile); | 1404 initTables(dbName, inFile); |
1405 | 1405 |
1406 // For each input vector, find the closest pointNN matching output vectors and report | 1406 // For each input vector, find the closest pointNN matching output vectors and report |
1407 // we use stdout in this stub version | 1407 // we use stdout in this stub version |
1408 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 1408 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); |
1409 unsigned numSegs = dbH->numFiles; | 1409 unsigned numTracks = dbH->numFiles; |
1410 | 1410 |
1411 double* query = (double*)(indata+sizeof(int)); | 1411 double* query = (double*)(indata+sizeof(int)); |
1412 double* data = dataBuf; | 1412 double* data = dataBuf; |
1413 double* queryCopy = 0; | 1413 double* queryCopy = 0; |
1414 | 1414 |
1441 double tmp1,tmp2; | 1441 double tmp1,tmp2; |
1442 // Copy the L2 norm values to core to avoid disk random access later on | 1442 // Copy the L2 norm values to core to avoid disk random access later on |
1443 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); | 1443 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); |
1444 double* snPtr = sNorm; | 1444 double* snPtr = sNorm; |
1445 for(i=0; i<dbH->numFiles; i++){ | 1445 for(i=0; i<dbH->numFiles; i++){ |
1446 if(segTable[i]>sequenceLength){ | 1446 if(trackTable[i]>sequenceLength){ |
1447 tmp1=*snPtr; | 1447 tmp1=*snPtr; |
1448 j=1; | 1448 j=1; |
1449 w=sequenceLength-1; | 1449 w=sequenceLength-1; |
1450 while(w--) | 1450 while(w--) |
1451 *snPtr+=snPtr[j++]; | 1451 *snPtr+=snPtr[j++]; |
1452 ps = snPtr+1; | 1452 ps = snPtr+1; |
1453 w=segTable[i]-sequenceLength; // +1 - 1 | 1453 w=trackTable[i]-sequenceLength; // +1 - 1 |
1454 while(w--){ | 1454 while(w--){ |
1455 tmp2=*ps; | 1455 tmp2=*ps; |
1456 *ps=*(ps-1)-tmp1+*(ps+sequenceLength); | 1456 *ps=*(ps-1)-tmp1+*(ps+sequenceLength); |
1457 tmp1=tmp2; | 1457 tmp1=tmp2; |
1458 ps++; | 1458 ps++; |
1459 } | 1459 } |
1460 } | 1460 } |
1461 snPtr+=segTable[i]; | 1461 snPtr+=trackTable[i]; |
1462 } | 1462 } |
1463 | 1463 |
1464 double* pn = sMeanL2; | 1464 double* pn = sMeanL2; |
1465 w=dbH->numFiles; | 1465 w=dbH->numFiles; |
1466 while(w--) | 1466 while(w--) |
1467 *pn++=0.0; | 1467 *pn++=0.0; |
1468 ps=sNorm; | 1468 ps=sNorm; |
1469 unsigned processedSegs=0; | 1469 unsigned processedTracks=0; |
1470 for(i=0; i<dbH->numFiles; i++){ | 1470 for(i=0; i<dbH->numFiles; i++){ |
1471 if(segTable[i]>sequenceLength-1){ | 1471 if(trackTable[i]>sequenceLength-1){ |
1472 w = segTable[i]-sequenceLength+1; | 1472 w = trackTable[i]-sequenceLength+1; |
1473 pn = sMeanL2+i; | 1473 pn = sMeanL2+i; |
1474 while(w--) | 1474 while(w--) |
1475 *pn+=*ps++; | 1475 *pn+=*ps++; |
1476 *pn/=segTable[i]-sequenceLength+1; | 1476 *pn/=trackTable[i]-sequenceLength+1; |
1477 SILENCE_THRESH+=*pn; | 1477 SILENCE_THRESH+=*pn; |
1478 processedSegs++; | 1478 processedTracks++; |
1479 } | 1479 } |
1480 ps = sNorm + segTable[i]; | 1480 ps = sNorm + trackTable[i]; |
1481 } | 1481 } |
1482 if(verbosity>1) | 1482 if(verbosity>1) |
1483 cerr << "processedSegs: " << processedSegs << endl; | 1483 cerr << "processedTracks: " << processedTracks << endl; |
1484 SILENCE_THRESH/=processedSegs; | 1484 SILENCE_THRESH/=processedTracks; |
1485 USE_THRESH=1; // Turn thresholding on | 1485 USE_THRESH=1; // Turn thresholding on |
1486 DIFF_THRESH=SILENCE_THRESH/2; // 50% of the mean shingle power | 1486 DIFF_THRESH=SILENCE_THRESH/2; // 50% of the mean shingle power |
1487 SILENCE_THRESH/=10; // 10% of the mean shingle power is SILENCE | 1487 SILENCE_THRESH/=10; // 10% of the mean shingle power is SILENCE |
1488 | 1488 |
1489 w=sequenceLength-1; | 1489 w=sequenceLength-1; |
1505 if(verbosity>1) | 1505 if(verbosity>1) |
1506 cerr << "done." << endl; | 1506 cerr << "done." << endl; |
1507 | 1507 |
1508 | 1508 |
1509 if(verbosity>1) | 1509 if(verbosity>1) |
1510 cerr << "matching segs..." << endl; | 1510 cerr << "matching tracks..." << endl; |
1511 | 1511 |
1512 assert(pointNN>0 && pointNN<=O2_MAXNN); | 1512 assert(pointNN>0 && pointNN<=O2_MAXNN); |
1513 assert(segNN>0 && segNN<=O2_MAXNN); | 1513 assert(trackNN>0 && trackNN<=O2_MAXNN); |
1514 | 1514 |
1515 // Make temporary dynamic memory for results | 1515 // Make temporary dynamic memory for results |
1516 double segDistances[segNN]; | 1516 double trackDistances[trackNN]; |
1517 unsigned segIDs[segNN]; | 1517 unsigned trackIDs[trackNN]; |
1518 unsigned segQIndexes[segNN]; | 1518 unsigned trackQIndexes[trackNN]; |
1519 unsigned segSIndexes[segNN]; | 1519 unsigned trackSIndexes[trackNN]; |
1520 | 1520 |
1521 double distances[pointNN]; | 1521 double distances[pointNN]; |
1522 unsigned qIndexes[pointNN]; | 1522 unsigned qIndexes[pointNN]; |
1523 unsigned sIndexes[pointNN]; | 1523 unsigned sIndexes[pointNN]; |
1524 | 1524 |
1525 | 1525 |
1526 unsigned k,l,m,n,seg,segOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; | 1526 unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; |
1527 double thisDist; | 1527 double thisDist; |
1528 double oneOverWL=1.0/wL; | 1528 double oneOverWL=1.0/wL; |
1529 | 1529 |
1530 for(k=0; k<pointNN; k++){ | 1530 for(k=0; k<pointNN; k++){ |
1531 distances[k]=0.0; | 1531 distances[k]=0.0; |
1532 qIndexes[k]=~0; | 1532 qIndexes[k]=~0; |
1533 sIndexes[k]=~0; | 1533 sIndexes[k]=~0; |
1534 } | 1534 } |
1535 | 1535 |
1536 for(k=0; k<segNN; k++){ | 1536 for(k=0; k<trackNN; k++){ |
1537 segDistances[k]=0.0; | 1537 trackDistances[k]=0.0; |
1538 segQIndexes[k]=~0; | 1538 trackQIndexes[k]=~0; |
1539 segSIndexes[k]=~0; | 1539 trackSIndexes[k]=~0; |
1540 segIDs[k]=~0; | 1540 trackIDs[k]=~0; |
1541 } | 1541 } |
1542 | 1542 |
1543 // Timestamp and durations processing | 1543 // Timestamp and durations processing |
1544 double meanQdur = 0; | 1544 double meanQdur = 0; |
1545 double* timesdata = 0; | 1545 double* timesdata = 0; |
1567 cerr << "mean query file duration: " << meanQdur << endl; | 1567 cerr << "mean query file duration: " << meanQdur << endl; |
1568 meanDBdur = new double[dbH->numFiles]; | 1568 meanDBdur = new double[dbH->numFiles]; |
1569 assert(meanDBdur); | 1569 assert(meanDBdur); |
1570 for(k=0; k<dbH->numFiles; k++){ | 1570 for(k=0; k<dbH->numFiles; k++){ |
1571 meanDBdur[k]=0.0; | 1571 meanDBdur[k]=0.0; |
1572 for(j=0; j<segTable[k]-1 ; j++) | 1572 for(j=0; j<trackTable[k]-1 ; j++) |
1573 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; | 1573 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; |
1574 meanDBdur[k]/=j; | 1574 meanDBdur[k]/=j; |
1575 } | 1575 } |
1576 } | 1576 } |
1577 | 1577 |
1593 assert(D); | 1593 assert(D); |
1594 DD = new double*[numVectors]; | 1594 DD = new double*[numVectors]; |
1595 assert(DD); | 1595 assert(DD); |
1596 | 1596 |
1597 gettimeofday(&tv1, NULL); | 1597 gettimeofday(&tv1, NULL); |
1598 processedSegs=0; | 1598 processedTracks=0; |
1599 unsigned successfulSegs=0; | 1599 unsigned successfulTracks=0; |
1600 | 1600 |
1601 double* qp; | 1601 double* qp; |
1602 double* sp; | 1602 double* sp; |
1603 double* dp; | 1603 double* dp; |
1604 double diffL2; | 1604 double diffL2; |
1605 | 1605 |
1606 // build segment offset table | 1606 // build track offset table |
1607 unsigned *segOffsetTable = new unsigned[dbH->numFiles]; | 1607 unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; |
1608 unsigned cumSeg=0; | 1608 unsigned cumTrack=0; |
1609 unsigned segIndexOffset; | 1609 unsigned trackIndexOffset; |
1610 for(k=0; k<dbH->numFiles;k++){ | 1610 for(k=0; k<dbH->numFiles;k++){ |
1611 segOffsetTable[k]=cumSeg; | 1611 trackOffsetTable[k]=cumTrack; |
1612 cumSeg+=segTable[k]*dbH->dim; | 1612 cumTrack+=trackTable[k]*dbH->dim; |
1613 } | 1613 } |
1614 | 1614 |
1615 char nextKey [MAXSTR]; | 1615 char nextKey [MAXSTR]; |
1616 for(processedSegs=0, seg=0 ; processedSegs < dbH->numFiles ; seg++, processedSegs++){ | 1616 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ |
1617 | 1617 |
1618 // get segID from file if using a control file | 1618 // get trackID from file if using a control file |
1619 if(segFile){ | 1619 if(trackFile){ |
1620 if(!segFile->eof()){ | 1620 if(!trackFile->eof()){ |
1621 segFile->getline(nextKey,MAXSTR); | 1621 trackFile->getline(nextKey,MAXSTR); |
1622 seg=getKeyPos(nextKey); | 1622 track=getKeyPos(nextKey); |
1623 } | 1623 } |
1624 else | 1624 else |
1625 break; | 1625 break; |
1626 } | 1626 } |
1627 | 1627 |
1628 segOffset=segOffsetTable[seg]; // numDoubles offset | 1628 trackOffset=trackOffsetTable[track]; // numDoubles offset |
1629 segIndexOffset=segOffset/dbH->dim; // numVectors offset | 1629 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset |
1630 | 1630 |
1631 if(sequenceLength<segTable[seg]){ // test for short sequences | 1631 if(sequenceLength<trackTable[track]){ // test for short sequences |
1632 | 1632 |
1633 if(verbosity>7) | 1633 if(verbosity>7) |
1634 cerr << seg << "." << segIndexOffset << "." << segTable[seg] << " | ";cerr.flush(); | 1634 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); |
1635 | 1635 |
1636 // Cross-correlation matrix | 1636 // Cross-correlation matrix |
1637 for(j=0; j<numVectors;j++){ | 1637 for(j=0; j<numVectors;j++){ |
1638 D[j]=new double[segTable[seg]]; | 1638 D[j]=new double[trackTable[track]]; |
1639 assert(D[j]); | 1639 assert(D[j]); |
1640 | 1640 |
1641 } | 1641 } |
1642 | 1642 |
1643 // Matched filter matrix | 1643 // Matched filter matrix |
1644 for(j=0; j<numVectors;j++){ | 1644 for(j=0; j<numVectors;j++){ |
1645 DD[j]=new double[segTable[seg]]; | 1645 DD[j]=new double[trackTable[track]]; |
1646 assert(DD[j]); | 1646 assert(DD[j]); |
1647 } | 1647 } |
1648 | 1648 |
1649 // Cross Correlation | 1649 // Cross Correlation |
1650 for(j=0; j<numVectors; j++) | 1650 for(j=0; j<numVectors; j++) |
1651 for(k=0; k<segTable[seg]; k++){ | 1651 for(k=0; k<trackTable[track]; k++){ |
1652 qp=query+j*dbH->dim; | 1652 qp=query+j*dbH->dim; |
1653 sp=dataBuf+segOffset+k*dbH->dim; | 1653 sp=dataBuf+trackOffset+k*dbH->dim; |
1654 DD[j][k]=0.0; // Initialize matched filter array | 1654 DD[j][k]=0.0; // Initialize matched filter array |
1655 dp=&D[j][k]; // point to correlation cell j,k | 1655 dp=&D[j][k]; // point to correlation cell j,k |
1656 *dp=0.0; // initialize correlation cell | 1656 *dp=0.0; // initialize correlation cell |
1657 l=dbH->dim; // size of vectors | 1657 l=dbH->dim; // size of vectors |
1658 while(l--) | 1658 while(l--) |
1665 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop | 1665 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop |
1666 for(w=0; w<wL; w++) | 1666 for(w=0; w<wL; w++) |
1667 for(j=0; j<numVectors-w; j++){ | 1667 for(j=0; j<numVectors-w; j++){ |
1668 sp=DD[j]; | 1668 sp=DD[j]; |
1669 spd=D[j+w]+w; | 1669 spd=D[j+w]+w; |
1670 k=segTable[seg]-w; | 1670 k=trackTable[track]-w; |
1671 while(k--) | 1671 while(k--) |
1672 *sp+++=*spd++; | 1672 *sp+++=*spd++; |
1673 } | 1673 } |
1674 } | 1674 } |
1675 else{ // HOP_SIZE != 1 | 1675 else{ // HOP_SIZE != 1 |
1676 for(w=0; w<wL; w++) | 1676 for(w=0; w<wL; w++) |
1677 for(j=0; j<numVectors-w; j+=HOP_SIZE){ | 1677 for(j=0; j<numVectors-w; j+=HOP_SIZE){ |
1678 sp=DD[j]; | 1678 sp=DD[j]; |
1679 spd=D[j+w]+w; | 1679 spd=D[j+w]+w; |
1680 for(k=0; k<segTable[seg]-w; k+=HOP_SIZE){ | 1680 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){ |
1681 *sp+=*spd; | 1681 *sp+=*spd; |
1682 sp+=HOP_SIZE; | 1682 sp+=HOP_SIZE; |
1683 spd+=HOP_SIZE; | 1683 spd+=HOP_SIZE; |
1684 } | 1684 } |
1685 } | 1685 } |
1686 } | 1686 } |
1687 | 1687 |
1688 if(verbosity>3 && usingTimes){ | 1688 if(verbosity>3 && usingTimes){ |
1689 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[seg] << endl; | 1689 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl; |
1690 cerr.flush(); | 1690 cerr.flush(); |
1691 } | 1691 } |
1692 | 1692 |
1693 if(!usingTimes || | 1693 if(!usingTimes || |
1694 (usingTimes | 1694 (usingTimes |
1695 && fabs(meanDBdur[seg]-meanQdur)<meanQdur*timesTol)){ | 1695 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){ |
1696 | 1696 |
1697 if(verbosity>3 && usingTimes){ | 1697 if(verbosity>3 && usingTimes){ |
1698 cerr << "within duration tolerance." << endl; | 1698 cerr << "within duration tolerance." << endl; |
1699 cerr.flush(); | 1699 cerr.flush(); |
1700 } | 1700 } |
1701 | 1701 |
1702 // Search for minimum distance by shingles (concatenated vectors) | 1702 // Search for minimum distance by shingles (concatenated vectors) |
1703 for(j=0;j<numVectors-wL+1;j+=HOP_SIZE) | 1703 for(j=0;j<numVectors-wL+1;j+=HOP_SIZE) |
1704 for(k=0;k<segTable[seg]-wL+1;k+=HOP_SIZE){ | 1704 for(k=0;k<trackTable[track]-wL+1;k+=HOP_SIZE){ |
1705 | 1705 |
1706 diffL2 = fabs(qNorm[j] - sNorm[k]); | 1706 diffL2 = fabs(qNorm[j] - sNorm[k]); |
1707 // Power test | 1707 // Power test |
1708 if(!USE_THRESH || | 1708 if(!USE_THRESH || |
1709 // Threshold on mean L2 of Q and S sequences | 1709 // Threshold on mean L2 of Q and S sequences |
1710 (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[k]>SILENCE_THRESH && | 1710 (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[k]>SILENCE_THRESH && |
1711 // Are both query and target windows above mean energy? | 1711 // Are both query and target windows above mean energy? |
1712 (qNorm[j]>qMeanL2 && sNorm[k]>sMeanL2[seg] && diffL2 < DIFF_THRESH ))) | 1712 (qNorm[j]>qMeanL2 && sNorm[k]>sMeanL2[track] && diffL2 < DIFF_THRESH ))) |
1713 thisDist=DD[j][k]*oneOverWL; | 1713 thisDist=DD[j][k]*oneOverWL; |
1714 else | 1714 else |
1715 thisDist=0.0; | 1715 thisDist=0.0; |
1716 | 1716 |
1717 // NBest match algorithm | 1717 // NBest match algorithm |
1744 thisDist+=distances[m]; | 1744 thisDist+=distances[m]; |
1745 thisDist/=pointNN; | 1745 thisDist/=pointNN; |
1746 | 1746 |
1747 // Let's see the distances then... | 1747 // Let's see the distances then... |
1748 if(verbosity>3) | 1748 if(verbosity>3) |
1749 cerr << fileTable+seg*O2_FILETABLESIZE << " " << thisDist << endl; | 1749 cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl; |
1750 | 1750 |
1751 // All the seg stuff goes here | 1751 // All the track stuff goes here |
1752 n=segNN; | 1752 n=trackNN; |
1753 while(n--){ | 1753 while(n--){ |
1754 if(thisDist>=segDistances[n]){ | 1754 if(thisDist>=trackDistances[n]){ |
1755 if((n==0 || thisDist<=segDistances[n-1])){ | 1755 if((n==0 || thisDist<=trackDistances[n-1])){ |
1756 // Copy all values above up the queue | 1756 // Copy all values above up the queue |
1757 for( l=segNN-1 ; l > n ; l--){ | 1757 for( l=trackNN-1 ; l > n ; l--){ |
1758 segDistances[l]=segDistances[l-1]; | 1758 trackDistances[l]=trackDistances[l-1]; |
1759 segQIndexes[l]=segQIndexes[l-1]; | 1759 trackQIndexes[l]=trackQIndexes[l-1]; |
1760 segSIndexes[l]=segSIndexes[l-1]; | 1760 trackSIndexes[l]=trackSIndexes[l-1]; |
1761 segIDs[l]=segIDs[l-1]; | 1761 trackIDs[l]=trackIDs[l-1]; |
1762 } | 1762 } |
1763 segDistances[n]=thisDist; | 1763 trackDistances[n]=thisDist; |
1764 segQIndexes[n]=qIndexes[0]; | 1764 trackQIndexes[n]=qIndexes[0]; |
1765 segSIndexes[n]=sIndexes[0]; | 1765 trackSIndexes[n]=sIndexes[0]; |
1766 successfulSegs++; | 1766 successfulTracks++; |
1767 segIDs[n]=seg; | 1767 trackIDs[n]=track; |
1768 break; | 1768 break; |
1769 } | 1769 } |
1770 } | 1770 } |
1771 else | 1771 else |
1772 break; | 1772 break; |
1773 } | 1773 } |
1774 } // Duration match | 1774 } // Duration match |
1775 | 1775 |
1776 // per-seg reset array values | 1776 // per-track reset array values |
1777 for(unsigned k=0; k<pointNN; k++){ | 1777 for(unsigned k=0; k<pointNN; k++){ |
1778 distances[k]=0.0; | 1778 distances[k]=0.0; |
1779 qIndexes[k]=~0; | 1779 qIndexes[k]=~0; |
1780 sIndexes[k]=~0; | 1780 sIndexes[k]=~0; |
1781 } | 1781 } |
1782 | 1782 |
1783 // Clean up current seg | 1783 // Clean up current track |
1784 if(D!=NULL){ | 1784 if(D!=NULL){ |
1785 for(j=0; j<numVectors; j++) | 1785 for(j=0; j<numVectors; j++) |
1786 delete[] D[j]; | 1786 delete[] D[j]; |
1787 } | 1787 } |
1788 | 1788 |
1793 } | 1793 } |
1794 } | 1794 } |
1795 | 1795 |
1796 gettimeofday(&tv2,NULL); | 1796 gettimeofday(&tv2,NULL); |
1797 if(verbosity>1) | 1797 if(verbosity>1) |
1798 cerr << endl << "processed segs :" << processedSegs << " matched segments: " << successfulSegs << " elapsed time:" | 1798 cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" |
1799 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | 1799 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; |
1800 | 1800 |
1801 if(adbQueryResult==0){ | 1801 if(adbQueryResult==0){ |
1802 if(verbosity>1) | 1802 if(verbosity>1) |
1803 cerr<<endl; | 1803 cerr<<endl; |
1804 // Output answer | 1804 // Output answer |
1805 // Loop over nearest neighbours | 1805 // Loop over nearest neighbours |
1806 for(k=0; k < min(segNN,successfulSegs); k++) | 1806 for(k=0; k < min(trackNN,successfulTracks); k++) |
1807 cout << fileTable+segIDs[k]*O2_FILETABLESIZE << " " << segDistances[k] << " " << segQIndexes[k] << " " << segSIndexes[k] << endl; | 1807 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << " " << trackQIndexes[k] << " " << trackSIndexes[k] << endl; |
1808 } | 1808 } |
1809 else{ // Process Web Services Query | 1809 else{ // Process Web Services Query |
1810 int listLen = min(segNN, processedSegs); | 1810 int listLen = min(trackNN, processedTracks); |
1811 adbQueryResult->__sizeRlist=listLen; | 1811 adbQueryResult->__sizeRlist=listLen; |
1812 adbQueryResult->__sizeDist=listLen; | 1812 adbQueryResult->__sizeDist=listLen; |
1813 adbQueryResult->__sizeQpos=listLen; | 1813 adbQueryResult->__sizeQpos=listLen; |
1814 adbQueryResult->__sizeSpos=listLen; | 1814 adbQueryResult->__sizeSpos=listLen; |
1815 adbQueryResult->Rlist= new char*[listLen]; | 1815 adbQueryResult->Rlist= new char*[listLen]; |
1816 adbQueryResult->Dist = new double[listLen]; | 1816 adbQueryResult->Dist = new double[listLen]; |
1817 adbQueryResult->Qpos = new int[listLen]; | 1817 adbQueryResult->Qpos = new int[listLen]; |
1818 adbQueryResult->Spos = new int[listLen]; | 1818 adbQueryResult->Spos = new int[listLen]; |
1819 for(k=0; k<adbQueryResult->__sizeRlist; k++){ | 1819 for(k=0; k<adbQueryResult->__sizeRlist; k++){ |
1820 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; | 1820 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; |
1821 adbQueryResult->Dist[k]=segDistances[k]; | 1821 adbQueryResult->Dist[k]=trackDistances[k]; |
1822 adbQueryResult->Qpos[k]=segQIndexes[k]; | 1822 adbQueryResult->Qpos[k]=trackQIndexes[k]; |
1823 adbQueryResult->Spos[k]=segSIndexes[k]; | 1823 adbQueryResult->Spos[k]=trackSIndexes[k]; |
1824 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+segIDs[k]*O2_FILETABLESIZE); | 1824 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); |
1825 } | 1825 } |
1826 } | 1826 } |
1827 | 1827 |
1828 | 1828 |
1829 // Clean up | 1829 // Clean up |
1830 if(segOffsetTable) | 1830 if(trackOffsetTable) |
1831 delete segOffsetTable; | 1831 delete trackOffsetTable; |
1832 if(queryCopy) | 1832 if(queryCopy) |
1833 delete queryCopy; | 1833 delete queryCopy; |
1834 //if(qNorm) | 1834 //if(qNorm) |
1835 //delete qNorm; | 1835 //delete qNorm; |
1836 if(D) | 1836 if(D) |
1843 delete meanDBdur; | 1843 delete meanDBdur; |
1844 | 1844 |
1845 | 1845 |
1846 } | 1846 } |
1847 | 1847 |
1848 // NBest matched filter distance between query and target segs | 1848 // NBest matched filter distance between query and target tracks |
1849 // efficient implementation | 1849 // efficient implementation |
1850 // outputs average of N minimum matched filter distances | 1850 // outputs average of N minimum matched filter distances |
1851 void audioDB::segSequenceQueryEuc(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ | 1851 void audioDB::trackSequenceQueryEuc(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ |
1852 | 1852 |
1853 initTables(dbName, inFile); | 1853 initTables(dbName, inFile); |
1854 | 1854 |
1855 // For each input vector, find the closest pointNN matching output vectors and report | 1855 // For each input vector, find the closest pointNN matching output vectors and report |
1856 // we use stdout in this stub version | 1856 // we use stdout in this stub version |
1857 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 1857 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); |
1858 unsigned numSegs = dbH->numFiles; | 1858 unsigned numTracks = dbH->numFiles; |
1859 | 1859 |
1860 double* query = (double*)(indata+sizeof(int)); | 1860 double* query = (double*)(indata+sizeof(int)); |
1861 double* data = dataBuf; | 1861 double* data = dataBuf; |
1862 double* queryCopy = 0; | 1862 double* queryCopy = 0; |
1863 | 1863 |
1872 error("Database must be L2 normed for sequence query","use -l2norm"); | 1872 error("Database must be L2 normed for sequence query","use -l2norm"); |
1873 | 1873 |
1874 if(verbosity>1) | 1874 if(verbosity>1) |
1875 cerr << "performing norms ... "; cerr.flush(); | 1875 cerr << "performing norms ... "; cerr.flush(); |
1876 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); | 1876 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); |
1877 | |
1877 // Make a copy of the query | 1878 // Make a copy of the query |
1878 queryCopy = new double[numVectors*dbH->dim]; | 1879 queryCopy = new double[numVectors*dbH->dim]; |
1879 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | 1880 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); |
1880 qNorm = new double[numVectors]; | 1881 qNorm = new double[numVectors]; |
1881 sNorm = new double[dbVectors]; | 1882 sNorm = new double[dbVectors]; |
1882 sMeanL2=new double[dbH->numFiles]; | 1883 sMeanL2=new double[dbH->numFiles]; |
1883 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); | 1884 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); |
1884 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | 1885 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); |
1885 query = queryCopy; | 1886 query = queryCopy; |
1887 | |
1886 // Make norm measurements relative to sequenceLength | 1888 // Make norm measurements relative to sequenceLength |
1887 unsigned w = sequenceLength-1; | 1889 unsigned w = sequenceLength-1; |
1888 unsigned i,j; | 1890 unsigned i,j; |
1889 double* ps; | 1891 double* ps; |
1890 double tmp1,tmp2; | 1892 double tmp1,tmp2; |
1893 | |
1891 // Copy the L2 norm values to core to avoid disk random access later on | 1894 // Copy the L2 norm values to core to avoid disk random access later on |
1892 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); | 1895 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); |
1893 double* snPtr = sNorm; | 1896 double* snPtr = sNorm; |
1894 for(i=0; i<dbH->numFiles; i++){ | 1897 for(i=0; i<dbH->numFiles; i++){ |
1895 if(segTable[i]>=sequenceLength){ | 1898 if(trackTable[i]>=sequenceLength){ |
1896 tmp1=*snPtr; | 1899 tmp1=*snPtr; |
1897 j=1; | 1900 j=1; |
1898 w=sequenceLength-1; | 1901 w=sequenceLength-1; |
1899 while(w--) | 1902 while(w--) |
1900 *snPtr+=snPtr[j++]; | 1903 *snPtr+=snPtr[j++]; |
1901 ps = snPtr+1; | 1904 ps = snPtr+1; |
1902 w=segTable[i]-sequenceLength; // +1 - 1 | 1905 w=trackTable[i]-sequenceLength; // +1 - 1 |
1903 while(w--){ | 1906 while(w--){ |
1904 tmp2=*ps; | 1907 tmp2=*ps; |
1905 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); | 1908 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); |
1906 tmp1=tmp2; | 1909 tmp1=tmp2; |
1907 ps++; | 1910 ps++; |
1908 } | 1911 } |
1909 ps = snPtr; | 1912 ps = snPtr; |
1910 w=segTable[i]-sequenceLength+1; | 1913 w=trackTable[i]-sequenceLength+1; |
1911 while(w--){ | 1914 while(w--){ |
1912 *ps=sqrt(*ps); | 1915 *ps=sqrt(*ps); |
1913 ps++; | 1916 ps++; |
1914 } | 1917 } |
1915 } | 1918 } |
1916 snPtr+=segTable[i]; | 1919 snPtr+=trackTable[i]; |
1917 } | 1920 } |
1918 | 1921 |
1919 double* pn = sMeanL2; | 1922 double* pn = sMeanL2; |
1920 w=dbH->numFiles; | 1923 w=dbH->numFiles; |
1921 while(w--) | 1924 while(w--) |
1922 *pn++=0.0; | 1925 *pn++=0.0; |
1923 ps=sNorm; | 1926 ps=sNorm; |
1924 unsigned processedSegs=0; | 1927 unsigned processedTracks=0; |
1925 for(i=0; i<dbH->numFiles; i++){ | 1928 for(i=0; i<dbH->numFiles; i++){ |
1926 if(segTable[i]>sequenceLength-1){ | 1929 if(trackTable[i]>sequenceLength-1){ |
1927 w = segTable[i]-sequenceLength; | 1930 w = trackTable[i]-sequenceLength; |
1928 pn = sMeanL2+i; | 1931 pn = sMeanL2+i; |
1929 *pn=0; | 1932 *pn=0; |
1930 while(w--) | 1933 while(w--) |
1931 if(*ps>0) | 1934 if(*ps>0) |
1932 *pn+=*ps++; | 1935 *pn+=*ps++; |
1933 *pn/=segTable[i]-sequenceLength; | 1936 *pn/=trackTable[i]-sequenceLength; |
1934 SILENCE_THRESH+=*pn; | 1937 SILENCE_THRESH+=*pn; |
1935 processedSegs++; | 1938 processedTracks++; |
1936 } | 1939 } |
1937 ps = sNorm + segTable[i]; | 1940 ps = sNorm + trackTable[i]; |
1938 } | 1941 } |
1939 if(verbosity>1) | 1942 if(verbosity>1) |
1940 cerr << "processedSegs: " << processedSegs << endl; | 1943 cerr << "processedTracks: " << processedTracks << endl; |
1941 | 1944 |
1942 | 1945 |
1943 SILENCE_THRESH/=processedSegs; | 1946 SILENCE_THRESH/=processedTracks; |
1944 USE_THRESH=1; // Turn thresholding on | 1947 USE_THRESH=1; // Turn thresholding on |
1945 DIFF_THRESH=SILENCE_THRESH; // 50% of the mean shingle power | 1948 DIFF_THRESH=SILENCE_THRESH; // mean shingle power |
1946 SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE | 1949 SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE |
1947 if(verbosity>4) | 1950 if(verbosity>4) |
1948 cerr << "silence thresh: " << SILENCE_THRESH; | 1951 cerr << "silence thresh: " << SILENCE_THRESH; |
1949 w=sequenceLength-1; | 1952 w=sequenceLength-1; |
1950 i=1; | 1953 i=1; |
1971 if(verbosity>1) | 1974 if(verbosity>1) |
1972 cerr << "done." << endl; | 1975 cerr << "done." << endl; |
1973 | 1976 |
1974 | 1977 |
1975 if(verbosity>1) | 1978 if(verbosity>1) |
1976 cerr << "matching segs..." << endl; | 1979 cerr << "matching tracks..." << endl; |
1977 | 1980 |
1978 assert(pointNN>0 && pointNN<=O2_MAXNN); | 1981 assert(pointNN>0 && pointNN<=O2_MAXNN); |
1979 assert(segNN>0 && segNN<=O2_MAXNN); | 1982 assert(trackNN>0 && trackNN<=O2_MAXNN); |
1980 | 1983 |
1981 // Make temporary dynamic memory for results | 1984 // Make temporary dynamic memory for results |
1982 double segDistances[segNN]; | 1985 double trackDistances[trackNN]; |
1983 unsigned segIDs[segNN]; | 1986 unsigned trackIDs[trackNN]; |
1984 unsigned segQIndexes[segNN]; | 1987 unsigned trackQIndexes[trackNN]; |
1985 unsigned segSIndexes[segNN]; | 1988 unsigned trackSIndexes[trackNN]; |
1986 | 1989 |
1987 double distances[pointNN]; | 1990 double distances[pointNN]; |
1988 unsigned qIndexes[pointNN]; | 1991 unsigned qIndexes[pointNN]; |
1989 unsigned sIndexes[pointNN]; | 1992 unsigned sIndexes[pointNN]; |
1990 | 1993 |
1991 | 1994 |
1992 unsigned k,l,m,n,seg,segOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; | 1995 unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; |
1993 double thisDist; | 1996 double thisDist; |
1994 double oneOverWL=1.0/wL; | 1997 double oneOverWL=1.0/wL; |
1995 | 1998 |
1996 for(k=0; k<pointNN; k++){ | 1999 for(k=0; k<pointNN; k++){ |
1997 distances[k]=0.0; | 2000 distances[k]=0.0; |
1998 qIndexes[k]=~0; | 2001 qIndexes[k]=~0; |
1999 sIndexes[k]=~0; | 2002 sIndexes[k]=~0; |
2000 } | 2003 } |
2001 | 2004 |
2002 for(k=0; k<segNN; k++){ | 2005 for(k=0; k<trackNN; k++){ |
2003 segDistances[k]=0.0; | 2006 trackDistances[k]=0.0; |
2004 segQIndexes[k]=~0; | 2007 trackQIndexes[k]=~0; |
2005 segSIndexes[k]=~0; | 2008 trackSIndexes[k]=~0; |
2006 segIDs[k]=~0; | 2009 trackIDs[k]=~0; |
2007 } | 2010 } |
2008 | 2011 |
2009 // Timestamp and durations processing | 2012 // Timestamp and durations processing |
2010 double meanQdur = 0; | 2013 double meanQdur = 0; |
2011 double* timesdata = 0; | 2014 double* timesdata = 0; |
2033 cerr << "mean query file duration: " << meanQdur << endl; | 2036 cerr << "mean query file duration: " << meanQdur << endl; |
2034 meanDBdur = new double[dbH->numFiles]; | 2037 meanDBdur = new double[dbH->numFiles]; |
2035 assert(meanDBdur); | 2038 assert(meanDBdur); |
2036 for(k=0; k<dbH->numFiles; k++){ | 2039 for(k=0; k<dbH->numFiles; k++){ |
2037 meanDBdur[k]=0.0; | 2040 meanDBdur[k]=0.0; |
2038 for(j=0; j<segTable[k]-1 ; j++) | 2041 for(j=0; j<trackTable[k]-1 ; j++) |
2039 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; | 2042 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; |
2040 meanDBdur[k]/=j; | 2043 meanDBdur[k]/=j; |
2041 } | 2044 } |
2042 } | 2045 } |
2043 | 2046 |
2059 assert(D); | 2062 assert(D); |
2060 DD = new double*[numVectors]; | 2063 DD = new double*[numVectors]; |
2061 assert(DD); | 2064 assert(DD); |
2062 | 2065 |
2063 gettimeofday(&tv1, NULL); | 2066 gettimeofday(&tv1, NULL); |
2064 processedSegs=0; | 2067 processedTracks=0; |
2065 unsigned successfulSegs=0; | 2068 unsigned successfulTracks=0; |
2066 | 2069 |
2067 double* qp; | 2070 double* qp; |
2068 double* sp; | 2071 double* sp; |
2069 double* dp; | 2072 double* dp; |
2070 double diffL2; | 2073 double diffL2; |
2071 | 2074 |
2072 // build segment offset table | 2075 // build track offset table |
2073 unsigned *segOffsetTable = new unsigned[dbH->numFiles]; | 2076 unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; |
2074 unsigned cumSeg=0; | 2077 unsigned cumTrack=0; |
2075 unsigned segIndexOffset; | 2078 unsigned trackIndexOffset; |
2076 for(k=0; k<dbH->numFiles;k++){ | 2079 for(k=0; k<dbH->numFiles;k++){ |
2077 segOffsetTable[k]=cumSeg; | 2080 trackOffsetTable[k]=cumTrack; |
2078 cumSeg+=segTable[k]*dbH->dim; | 2081 cumTrack+=trackTable[k]*dbH->dim; |
2079 } | 2082 } |
2080 | 2083 |
2081 char nextKey [MAXSTR]; | 2084 char nextKey [MAXSTR]; |
2082 | 2085 |
2083 // chi^2 statistics | 2086 // chi^2 statistics |
2086 double logSampleSum = 0; | 2089 double logSampleSum = 0; |
2087 double minSample = 1e9; | 2090 double minSample = 1e9; |
2088 double maxSample = 0; | 2091 double maxSample = 0; |
2089 | 2092 |
2090 // Track loop | 2093 // Track loop |
2091 for(processedSegs=0, seg=0 ; processedSegs < dbH->numFiles ; seg++, processedSegs++){ | 2094 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ |
2092 | 2095 |
2093 // get segID from file if using a control file | 2096 // get trackID from file if using a control file |
2094 if(segFile){ | 2097 if(trackFile){ |
2095 if(!segFile->eof()){ | 2098 if(!trackFile->eof()){ |
2096 segFile->getline(nextKey,MAXSTR); | 2099 trackFile->getline(nextKey,MAXSTR); |
2097 seg=getKeyPos(nextKey); | 2100 track=getKeyPos(nextKey); |
2098 } | 2101 } |
2099 else | 2102 else |
2100 break; | 2103 break; |
2101 } | 2104 } |
2102 | 2105 |
2103 segOffset=segOffsetTable[seg]; // numDoubles offset | 2106 trackOffset=trackOffsetTable[track]; // numDoubles offset |
2104 segIndexOffset=segOffset/dbH->dim; // numVectors offset | 2107 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset |
2105 | 2108 |
2106 if(sequenceLength<segTable[seg]){ // test for short sequences | 2109 if(sequenceLength<trackTable[track]){ // test for short sequences |
2107 | 2110 |
2108 if(verbosity>7) | 2111 if(verbosity>7) |
2109 cerr << seg << "." << segIndexOffset << "." << segTable[seg] << " | ";cerr.flush(); | 2112 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); |
2110 | 2113 |
2111 // Sum products matrix | 2114 // Sum products matrix |
2112 for(j=0; j<numVectors;j++){ | 2115 for(j=0; j<numVectors;j++){ |
2113 D[j]=new double[segTable[seg]]; | 2116 D[j]=new double[trackTable[track]]; |
2114 assert(D[j]); | 2117 assert(D[j]); |
2115 | 2118 |
2116 } | 2119 } |
2117 | 2120 |
2118 // Matched filter matrix | 2121 // Matched filter matrix |
2119 for(j=0; j<numVectors;j++){ | 2122 for(j=0; j<numVectors;j++){ |
2120 DD[j]=new double[segTable[seg]]; | 2123 DD[j]=new double[trackTable[track]]; |
2121 assert(DD[j]); | 2124 assert(DD[j]); |
2122 } | 2125 } |
2123 | 2126 |
2124 double tmp; | 2127 double tmp; |
2125 // Dot product | 2128 // Dot product |
2126 for(j=0; j<numVectors; j++) | 2129 for(j=0; j<numVectors; j++) |
2127 for(k=0; k<segTable[seg]; k++){ | 2130 for(k=0; k<trackTable[track]; k++){ |
2128 qp=query+j*dbH->dim; | 2131 qp=query+j*dbH->dim; |
2129 sp=dataBuf+segOffset+k*dbH->dim; | 2132 sp=dataBuf+trackOffset+k*dbH->dim; |
2130 DD[j][k]=0.0; // Initialize matched filter array | 2133 DD[j][k]=0.0; // Initialize matched filter array |
2131 dp=&D[j][k]; // point to correlation cell j,k | 2134 dp=&D[j][k]; // point to correlation cell j,k |
2132 *dp=0.0; // initialize correlation cell | 2135 *dp=0.0; // initialize correlation cell |
2133 l=dbH->dim; // size of vectors | 2136 l=dbH->dim; // size of vectors |
2134 while(l--) | 2137 while(l--) |
2141 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop | 2144 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop |
2142 for(w=0; w<wL; w++) | 2145 for(w=0; w<wL; w++) |
2143 for(j=0; j<numVectors-w; j++){ | 2146 for(j=0; j<numVectors-w; j++){ |
2144 sp=DD[j]; | 2147 sp=DD[j]; |
2145 spd=D[j+w]+w; | 2148 spd=D[j+w]+w; |
2146 k=segTable[seg]-w; | 2149 k=trackTable[track]-w; |
2147 while(k--) | 2150 while(k--) |
2148 *sp+++=*spd++; | 2151 *sp+++=*spd++; |
2149 } | 2152 } |
2150 } | 2153 } |
2151 | 2154 |
2152 else{ // HOP_SIZE != 1 | 2155 else{ // HOP_SIZE != 1 |
2153 for(w=0; w<wL; w++) | 2156 for(w=0; w<wL; w++) |
2154 for(j=0; j<numVectors-w; j+=HOP_SIZE){ | 2157 for(j=0; j<numVectors-w; j+=HOP_SIZE){ |
2155 sp=DD[j]; | 2158 sp=DD[j]; |
2156 spd=D[j+w]+w; | 2159 spd=D[j+w]+w; |
2157 for(k=0; k<segTable[seg]-w; k+=HOP_SIZE){ | 2160 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){ |
2158 *sp+=*spd; | 2161 *sp+=*spd; |
2159 sp+=HOP_SIZE; | 2162 sp+=HOP_SIZE; |
2160 spd+=HOP_SIZE; | 2163 spd+=HOP_SIZE; |
2161 } | 2164 } |
2162 } | 2165 } |
2163 } | 2166 } |
2164 | 2167 |
2165 if(verbosity>3 && usingTimes){ | 2168 if(verbosity>3 && usingTimes){ |
2166 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[seg] << endl; | 2169 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl; |
2167 cerr.flush(); | 2170 cerr.flush(); |
2168 } | 2171 } |
2169 | 2172 |
2170 if(!usingTimes || | 2173 if(!usingTimes || |
2171 (usingTimes | 2174 (usingTimes |
2172 && fabs(meanDBdur[seg]-meanQdur)<meanQdur*timesTol)){ | 2175 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){ |
2173 | 2176 |
2174 if(verbosity>3 && usingTimes){ | 2177 if(verbosity>3 && usingTimes){ |
2175 cerr << "within duration tolerance." << endl; | 2178 cerr << "within duration tolerance." << endl; |
2176 cerr.flush(); | 2179 cerr.flush(); |
2177 } | 2180 } |
2178 | 2181 |
2179 // Search for minimum distance by shingles (concatenated vectors) | 2182 // Search for minimum distance by shingles (concatenated vectors) |
2180 for(j=0;j<numVectors-wL;j+=HOP_SIZE) | 2183 for(j=0;j<numVectors-wL;j+=HOP_SIZE) |
2181 for(k=0;k<segTable[seg]-wL;k+=HOP_SIZE){ | 2184 for(k=0;k<trackTable[track]-wL;k+=HOP_SIZE){ |
2182 thisDist=2-(2/(qNorm[j]*sNorm[segIndexOffset+k]))*DD[j][k]; | 2185 thisDist=2-(2/(qNorm[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; |
2183 if(verbosity>10) | 2186 if(verbosity>10) |
2184 cerr << thisDist << " " << qNorm[j] << " " << sNorm[segIndexOffset+k] << endl; | 2187 cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl; |
2185 // Gather chi^2 statistics | 2188 // Gather chi^2 statistics |
2186 if(thisDist<minSample) | 2189 if(thisDist<minSample) |
2187 minSample=thisDist; | 2190 minSample=thisDist; |
2188 else if(thisDist>maxSample) | 2191 else if(thisDist>maxSample) |
2189 maxSample=thisDist; | 2192 maxSample=thisDist; |
2191 sampleCount++; | 2194 sampleCount++; |
2192 sampleSum+=thisDist; | 2195 sampleSum+=thisDist; |
2193 logSampleSum+=log(thisDist); | 2196 logSampleSum+=log(thisDist); |
2194 } | 2197 } |
2195 | 2198 |
2196 diffL2 = fabs(qNorm[j] - sNorm[segIndexOffset+k]); | 2199 // diffL2 = fabs(qNorm[j] - sNorm[trackIndexOffset+k]); |
2197 // Power test | 2200 // Power test |
2198 if(!USE_THRESH || | 2201 if(!USE_THRESH || |
2199 // Threshold on mean L2 of Q and S sequences | 2202 // Threshold on mean L2 of Q and S sequences |
2200 (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[segIndexOffset+k]>SILENCE_THRESH && | 2203 (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH && |
2201 // Are both query and target windows above mean energy? | 2204 // Are both query and target windows above mean energy? |
2202 (qNorm[j]>qMeanL2*.25 && sNorm[segIndexOffset+k]>sMeanL2[seg]*.25))) // && diffL2 < DIFF_THRESH ))) | 2205 (qNorm[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // && diffL2 < DIFF_THRESH ))) |
2203 thisDist=thisDist; // Computed above | 2206 thisDist=thisDist; // Computed above |
2204 else | 2207 else |
2205 thisDist=1000000.0; | 2208 thisDist=1000000.0; |
2206 if(thisDist>=0 && thisDist<=radius){ | 2209 if(thisDist>=0 && thisDist<=radius){ |
2207 distances[0]++; // increment count | 2210 distances[0]++; // increment count |
2208 break; // only need one seg point per query point | 2211 break; // only need one track point per query point |
2209 } | 2212 } |
2210 } | 2213 } |
2211 // How many points were below threshold ? | 2214 // How many points were below threshold ? |
2212 thisDist=distances[0]; | 2215 thisDist=distances[0]; |
2213 | 2216 |
2214 // Let's see the distances then... | 2217 // Let's see the distances then... |
2215 if(verbosity>3) | 2218 if(verbosity>3) |
2216 cerr << fileTable+seg*O2_FILETABLESIZE << " " << thisDist << endl; | 2219 cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl; |
2217 | 2220 |
2218 // All the seg stuff goes here | 2221 // All the track stuff goes here |
2219 n=segNN; | 2222 n=trackNN; |
2220 while(n--){ | 2223 while(n--){ |
2221 if(thisDist>segDistances[n]){ | 2224 if(thisDist>trackDistances[n]){ |
2222 if((n==0 || thisDist<=segDistances[n-1])){ | 2225 if((n==0 || thisDist<=trackDistances[n-1])){ |
2223 // Copy all values above up the queue | 2226 // Copy all values above up the queue |
2224 for( l=segNN-1 ; l > n ; l--){ | 2227 for( l=trackNN-1 ; l > n ; l--){ |
2225 segDistances[l]=segDistances[l-1]; | 2228 trackDistances[l]=trackDistances[l-1]; |
2226 segQIndexes[l]=segQIndexes[l-1]; | 2229 trackQIndexes[l]=trackQIndexes[l-1]; |
2227 segSIndexes[l]=segSIndexes[l-1]; | 2230 trackSIndexes[l]=trackSIndexes[l-1]; |
2228 segIDs[l]=segIDs[l-1]; | 2231 trackIDs[l]=trackIDs[l-1]; |
2229 } | 2232 } |
2230 segDistances[n]=thisDist; | 2233 trackDistances[n]=thisDist; |
2231 segQIndexes[n]=qIndexes[0]; | 2234 trackQIndexes[n]=qIndexes[0]; |
2232 segSIndexes[n]=sIndexes[0]; | 2235 trackSIndexes[n]=sIndexes[0]; |
2233 successfulSegs++; | 2236 successfulTracks++; |
2234 segIDs[n]=seg; | 2237 trackIDs[n]=track; |
2235 break; | 2238 break; |
2236 } | 2239 } |
2237 } | 2240 } |
2238 else | 2241 else |
2239 break; | 2242 break; |
2240 } | 2243 } |
2241 } // Duration match | 2244 } // Duration match |
2242 | 2245 |
2243 // Clean up current seg | 2246 // Clean up current track |
2244 if(D!=NULL){ | 2247 if(D!=NULL){ |
2245 for(j=0; j<numVectors; j++) | 2248 for(j=0; j<numVectors; j++) |
2246 delete[] D[j]; | 2249 delete[] D[j]; |
2247 } | 2250 } |
2248 | 2251 |
2249 if(DD!=NULL){ | 2252 if(DD!=NULL){ |
2250 for(j=0; j<numVectors; j++) | 2253 for(j=0; j<numVectors; j++) |
2251 delete[] DD[j]; | 2254 delete[] DD[j]; |
2252 } | 2255 } |
2253 } | 2256 } |
2254 // per-seg reset array values | 2257 // per-track reset array values |
2255 for(unsigned k=0; k<pointNN; k++){ | 2258 for(unsigned k=0; k<pointNN; k++){ |
2256 distances[k]=0.0; | 2259 distances[k]=0.0; |
2257 qIndexes[k]=~0; | 2260 qIndexes[k]=~0; |
2258 sIndexes[k]=~0; | 2261 sIndexes[k]=~0; |
2259 } | 2262 } |
2260 } | 2263 } |
2261 | 2264 |
2262 gettimeofday(&tv2,NULL); | 2265 gettimeofday(&tv2,NULL); |
2263 if(verbosity>1){ | 2266 if(verbosity>1){ |
2264 cerr << endl << "processed segs :" << processedSegs << " matched segments: " << successfulSegs << " elapsed time:" | 2267 cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" |
2265 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | 2268 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; |
2266 cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum | 2269 cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum |
2267 << " minSample: " << minSample << " maxSample: " << maxSample << endl; | 2270 << " minSample: " << minSample << " maxSample: " << maxSample << endl; |
2268 } | 2271 } |
2269 | 2272 |
2270 if(adbQueryResult==0){ | 2273 if(adbQueryResult==0){ |
2271 if(verbosity>1) | 2274 if(verbosity>1) |
2272 cerr<<endl; | 2275 cerr<<endl; |
2273 // Output answer | 2276 // Output answer |
2274 // Loop over nearest neighbours | 2277 // Loop over nearest neighbours |
2275 for(k=0; k < min(segNN,successfulSegs); k++) | 2278 for(k=0; k < min(trackNN,successfulTracks); k++) |
2276 cout << fileTable+segIDs[k]*O2_FILETABLESIZE << " " << segDistances[k] << endl; | 2279 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << endl; |
2277 } | 2280 } |
2278 else{ // Process Web Services Query | 2281 else{ // Process Web Services Query |
2279 int listLen = min(segNN, processedSegs); | 2282 int listLen = min(trackNN, processedTracks); |
2280 adbQueryResult->__sizeRlist=listLen; | 2283 adbQueryResult->__sizeRlist=listLen; |
2281 adbQueryResult->__sizeDist=listLen; | 2284 adbQueryResult->__sizeDist=listLen; |
2282 adbQueryResult->__sizeQpos=listLen; | 2285 adbQueryResult->__sizeQpos=listLen; |
2283 adbQueryResult->__sizeSpos=listLen; | 2286 adbQueryResult->__sizeSpos=listLen; |
2284 adbQueryResult->Rlist= new char*[listLen]; | 2287 adbQueryResult->Rlist= new char*[listLen]; |
2285 adbQueryResult->Dist = new double[listLen]; | 2288 adbQueryResult->Dist = new double[listLen]; |
2286 adbQueryResult->Qpos = new int[listLen]; | 2289 adbQueryResult->Qpos = new int[listLen]; |
2287 adbQueryResult->Spos = new int[listLen]; | 2290 adbQueryResult->Spos = new int[listLen]; |
2288 for(k=0; k<adbQueryResult->__sizeRlist; k++){ | 2291 for(k=0; k<adbQueryResult->__sizeRlist; k++){ |
2289 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; | 2292 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; |
2290 adbQueryResult->Dist[k]=segDistances[k]; | 2293 adbQueryResult->Dist[k]=trackDistances[k]; |
2291 adbQueryResult->Qpos[k]=segQIndexes[k]; | 2294 adbQueryResult->Qpos[k]=trackQIndexes[k]; |
2292 adbQueryResult->Spos[k]=segSIndexes[k]; | 2295 adbQueryResult->Spos[k]=trackSIndexes[k]; |
2293 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+segIDs[k]*O2_FILETABLESIZE); | 2296 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); |
2294 } | 2297 } |
2295 } | 2298 } |
2296 | 2299 |
2297 | 2300 |
2298 // Clean up | 2301 // Clean up |
2299 if(segOffsetTable) | 2302 if(trackOffsetTable) |
2300 delete[] segOffsetTable; | 2303 delete[] trackOffsetTable; |
2301 if(queryCopy) | 2304 if(queryCopy) |
2302 delete[] queryCopy; | 2305 delete[] queryCopy; |
2303 //if(qNorm) | 2306 //if(qNorm) |
2304 //delete qNorm; | 2307 //delete qNorm; |
2305 if(D) | 2308 if(D) |
2473 return SOAP_OK; | 2476 return SOAP_OK; |
2474 } | 2477 } |
2475 | 2478 |
2476 // Literal translation of command line to web service | 2479 // Literal translation of command line to web service |
2477 | 2480 |
2478 int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int segNN, xsd__int seqLen, adb__queryResult &adbQueryResult){ | 2481 int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int trackNN, xsd__int seqLen, adb__queryResult &adbQueryResult){ |
2479 char queryType[256]; | 2482 char queryType[256]; |
2480 for(int k=0; k<256; k++) | 2483 for(int k=0; k<256; k++) |
2481 queryType[k]='\0'; | 2484 queryType[k]='\0'; |
2482 if(qType == O2_FLAG_POINT_QUERY) | 2485 if(qType == O2_FLAG_POINT_QUERY) |
2483 strncpy(queryType, "point", strlen("point")); | 2486 strncpy(queryType, "point", strlen("point")); |
2484 else if (qType == O2_FLAG_SEQUENCE_QUERY) | 2487 else if (qType == O2_FLAG_SEQUENCE_QUERY) |
2485 strncpy(queryType, "sequence", strlen("sequence")); | 2488 strncpy(queryType, "sequence", strlen("sequence")); |
2486 else if(qType == O2_FLAG_SEG_QUERY) | 2489 else if(qType == O2_FLAG_TRACK_QUERY) |
2487 strncpy(queryType,"segment", strlen("segment")); | 2490 strncpy(queryType,"track", strlen("track")); |
2488 else | 2491 else |
2489 strncpy(queryType, "", strlen("")); | 2492 strncpy(queryType, "", strlen("")); |
2490 | 2493 |
2491 if(pointNN==0) | 2494 if(pointNN==0) |
2492 pointNN=10; | 2495 pointNN=10; |
2493 if(segNN==0) | 2496 if(trackNN==0) |
2494 segNN=10; | 2497 trackNN=10; |
2495 if(seqLen==0) | 2498 if(seqLen==0) |
2496 seqLen=16; | 2499 seqLen=16; |
2497 | 2500 |
2498 char qPosStr[256]; | 2501 char qPosStr[256]; |
2499 sprintf(qPosStr, "%d", qPos); | 2502 sprintf(qPosStr, "%d", qPos); |
2500 char pointNNStr[256]; | 2503 char pointNNStr[256]; |
2501 sprintf(pointNNStr,"%d",pointNN); | 2504 sprintf(pointNNStr,"%d",pointNN); |
2502 char segNNStr[256]; | 2505 char trackNNStr[256]; |
2503 sprintf(segNNStr,"%d",segNN); | 2506 sprintf(trackNNStr,"%d",trackNN); |
2504 char seqLenStr[256]; | 2507 char seqLenStr[256]; |
2505 sprintf(seqLenStr,"%d",seqLen); | 2508 sprintf(seqLenStr,"%d",seqLen); |
2506 | 2509 |
2507 const char* argv[] ={ | 2510 const char* argv[] ={ |
2508 "./audioDB", | 2511 "./audioDB", |
2518 timesFileName==0?"":timesFileName, | 2521 timesFileName==0?"":timesFileName, |
2519 COM_QPOINT, | 2522 COM_QPOINT, |
2520 qPosStr, | 2523 qPosStr, |
2521 COM_POINTNN, | 2524 COM_POINTNN, |
2522 pointNNStr, | 2525 pointNNStr, |
2523 COM_SEGNN, | 2526 COM_TRACKNN, |
2524 segNNStr, // Need to pass a parameter | 2527 trackNNStr, // Need to pass a parameter |
2525 COM_SEQLEN, | 2528 COM_SEQLEN, |
2526 seqLenStr | 2529 seqLenStr |
2527 }; | 2530 }; |
2528 | 2531 |
2529 const unsigned argc = 19; | 2532 const unsigned argc = 19; |