Mercurial > hg > audiodb
comparison audioDB.cpp @ 117:e800eac265c3 endian-neutral
Towards endian-neutrality, step 2.
trackTable[x] is defined to be in network byte order on disk and in
memory, so access through ntohl() and htonl() as appropriate.
author | mas01cr |
---|---|
date | Fri, 12 Oct 2007 11:37:38 +0000 |
parents | 531ce5162861 |
children | c0789661f232 |
comparison
equal
deleted
inserted
replaced
116:531ce5162861 | 117:e800eac265c3 |
---|---|
572 | 572 |
573 // Copy the header back to the database | 573 // Copy the header back to the database |
574 memcpy (db, dbH, sizeof(dbTableHeaderT)); | 574 memcpy (db, dbH, sizeof(dbTableHeaderT)); |
575 | 575 |
576 // Update track to file index map | 576 // Update track to file index map |
577 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); | 577 *(trackTable + ntohl(dbH->numFiles) - 1) = htonl(numVectors); |
578 //memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); | |
579 *(trackTable + ntohl(dbH->numFiles) - 1) = numVectors; | |
580 | 578 |
581 // Update the feature database | 579 // Update the feature database |
582 memcpy (db+ntohl(dbH->dataOffset)+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); | 580 memcpy (db+ntohl(dbH->dataOffset)+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); |
583 | 581 |
584 // Norm the vectors on input if the database is already L2 normed | 582 // Norm the vectors on input if the database is already L2 normed |
793 dbH->length = htonl(ntohl(dbH->length) + (statbuf.st_size-sizeof(int))); | 791 dbH->length = htonl(ntohl(dbH->length) + (statbuf.st_size-sizeof(int))); |
794 // Copy the header back to the database | 792 // Copy the header back to the database |
795 memcpy (db, dbH, sizeof(dbTableHeaderT)); | 793 memcpy (db, dbH, sizeof(dbTableHeaderT)); |
796 | 794 |
797 // Update track to file index map | 795 // Update track to file index map |
798 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); | 796 *(trackTable + ntohl(dbH->numFiles) - 1) = htonl(numVectors); |
799 //memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); | |
800 *(trackTable + ntohl(dbH->numFiles) - 1) = numVectors; | |
801 | 797 |
802 // Update the feature database | 798 // Update the feature database |
803 memcpy (db+ntohl(dbH->dataOffset)+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); | 799 memcpy (db+ntohl(dbH->dataOffset)+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); |
804 | 800 |
805 // Norm the vectors on input if the database is already L2 normed | 801 // Norm the vectors on input if the database is already L2 normed |
884 initTables(dbName, 0, 0); | 880 initTables(dbName, 0, 0); |
885 | 881 |
886 unsigned dudCount=0; | 882 unsigned dudCount=0; |
887 unsigned nullCount=0; | 883 unsigned nullCount=0; |
888 for(unsigned k=0; k<ntohl(dbH->numFiles); k++){ | 884 for(unsigned k=0; k<ntohl(dbH->numFiles); k++){ |
889 if(trackTable[k]<sequenceLength){ | 885 if(ntohl(trackTable[k])<sequenceLength){ |
890 dudCount++; | 886 dudCount++; |
891 if(!trackTable[k]) | 887 if(!ntohl(trackTable[k])) |
892 nullCount++; | 888 nullCount++; |
893 } | 889 } |
894 } | 890 } |
895 | 891 |
896 if(adbStatusResult == 0) { | 892 if(adbStatusResult == 0) { |
921 void audioDB::dump(const char* dbName){ | 917 void audioDB::dump(const char* dbName){ |
922 if(!dbH) | 918 if(!dbH) |
923 initTables(dbName, 0, 0); | 919 initTables(dbName, 0, 0); |
924 | 920 |
925 for(unsigned k=0, j=0; k<ntohl(dbH->numFiles); k++){ | 921 for(unsigned k=0, j=0; k<ntohl(dbH->numFiles); k++){ |
926 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; | 922 cout << fileTable+k*O2_FILETABLESIZE << " " << ntohl(trackTable[k]) << endl; |
927 j+=trackTable[k]; | 923 j+=ntohl(trackTable[k]); |
928 } | 924 } |
929 | 925 |
930 status(dbName); | 926 status(dbName); |
931 } | 927 } |
932 | 928 |
1099 // Loop over nearest neighbours | 1095 // Loop over nearest neighbours |
1100 for(k=0; k < pointNN; k++){ | 1096 for(k=0; k < pointNN; k++){ |
1101 // Scan for key | 1097 // Scan for key |
1102 unsigned cumTrack=0; | 1098 unsigned cumTrack=0; |
1103 for(l=0 ; l<ntohl(dbH->numFiles); l++){ | 1099 for(l=0 ; l<ntohl(dbH->numFiles); l++){ |
1104 cumTrack+=trackTable[l]; | 1100 cumTrack+=ntohl(trackTable[l]); |
1105 if(sIndexes[k]<cumTrack){ | 1101 if(sIndexes[k]<cumTrack){ |
1106 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " " | 1102 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " " |
1107 << sIndexes[k]+trackTable[l]-cumTrack << endl; | 1103 << sIndexes[k]+ntohl(trackTable[l])-cumTrack << endl; |
1108 break; | 1104 break; |
1109 } | 1105 } |
1110 } | 1106 } |
1111 } | 1107 } |
1112 } | 1108 } |
1130 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; | 1126 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; |
1131 adbQueryResult->Dist[k]=distances[k]; | 1127 adbQueryResult->Dist[k]=distances[k]; |
1132 adbQueryResult->Qpos[k]=qIndexes[k]; | 1128 adbQueryResult->Qpos[k]=qIndexes[k]; |
1133 unsigned cumTrack=0; | 1129 unsigned cumTrack=0; |
1134 for(l=0 ; l<ntohl(dbH->numFiles); l++){ | 1130 for(l=0 ; l<ntohl(dbH->numFiles); l++){ |
1135 cumTrack+=trackTable[l]; | 1131 cumTrack+=ntohl(trackTable[l]); |
1136 if(sIndexes[k]<cumTrack){ | 1132 if(sIndexes[k]<cumTrack){ |
1137 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); | 1133 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); |
1138 break; | 1134 break; |
1139 } | 1135 } |
1140 } | 1136 } |
1141 adbQueryResult->Spos[k]=sIndexes[k]+trackTable[l]-cumTrack; | 1137 adbQueryResult->Spos[k]=sIndexes[k]+ntohl(trackTable[l])-cumTrack; |
1142 } | 1138 } |
1143 } | 1139 } |
1144 | 1140 |
1145 // Clean up | 1141 // Clean up |
1146 if(queryCopy) | 1142 if(queryCopy) |
1227 } | 1223 } |
1228 meanQdur/=k; | 1224 meanQdur/=k; |
1229 meanDBdur = new double[ntohl(dbH->numFiles)]; | 1225 meanDBdur = new double[ntohl(dbH->numFiles)]; |
1230 for(k=0; k<ntohl(dbH->numFiles); k++){ | 1226 for(k=0; k<ntohl(dbH->numFiles); k++){ |
1231 meanDBdur[k]=0.0; | 1227 meanDBdur[k]=0.0; |
1232 for(j=0; j<trackTable[k]-1 ; j++) | 1228 for(j=0; j<ntohl(trackTable[k])-1 ; j++) |
1233 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; | 1229 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; |
1234 meanDBdur[k]/=j; | 1230 meanDBdur[k]/=j; |
1235 } | 1231 } |
1236 } | 1232 } |
1237 | 1233 |
1250 unsigned *trackOffsetTable = new unsigned[ntohl(dbH->numFiles)]; | 1246 unsigned *trackOffsetTable = new unsigned[ntohl(dbH->numFiles)]; |
1251 unsigned cumTrack=0; | 1247 unsigned cumTrack=0; |
1252 unsigned trackIndexOffset; | 1248 unsigned trackIndexOffset; |
1253 for(k=0; k<ntohl(dbH->numFiles);k++){ | 1249 for(k=0; k<ntohl(dbH->numFiles);k++){ |
1254 trackOffsetTable[k]=cumTrack; | 1250 trackOffsetTable[k]=cumTrack; |
1255 cumTrack+=trackTable[k]*ntohl(dbH->dim); | 1251 cumTrack+=ntohl(trackTable[k])*ntohl(dbH->dim); |
1256 } | 1252 } |
1257 | 1253 |
1258 char nextKey[MAXSTR]; | 1254 char nextKey[MAXSTR]; |
1259 | 1255 |
1260 gettimeofday(&tv1, NULL); | 1256 gettimeofday(&tv1, NULL); |
1269 break; | 1265 break; |
1270 } | 1266 } |
1271 trackOffset=trackOffsetTable[track]; // numDoubles offset | 1267 trackOffset=trackOffsetTable[track]; // numDoubles offset |
1272 trackIndexOffset=trackOffset/ntohl(dbH->dim); // numVectors offset | 1268 trackIndexOffset=trackOffset/ntohl(dbH->dim); // numVectors offset |
1273 if(verbosity>7) { | 1269 if(verbosity>7) { |
1274 cerr << track << "." << trackOffset/(ntohl(dbH->dim)) << "." << trackTable[track] << " | ";cerr.flush(); | 1270 cerr << track << "." << trackOffset/(ntohl(dbH->dim)) << "." << ntohl(trackTable[track]) << " | ";cerr.flush(); |
1275 } | 1271 } |
1276 | 1272 |
1277 if(ntohl(dbH->flags) & O2_FLAG_L2NORM) | 1273 if(ntohl(dbH->flags) & O2_FLAG_L2NORM) |
1278 usingQueryPoint?query=queryCopy+queryPoint*ntohl(dbH->dim):query=queryCopy; | 1274 usingQueryPoint?query=queryCopy+queryPoint*ntohl(dbH->dim):query=queryCopy; |
1279 else | 1275 else |
1281 if(usingQueryPoint) | 1277 if(usingQueryPoint) |
1282 j=1; | 1278 j=1; |
1283 else | 1279 else |
1284 j=numVectors; | 1280 j=numVectors; |
1285 while(j--){ | 1281 while(j--){ |
1286 k=trackTable[track]; // number of vectors in track | 1282 k=ntohl(trackTable[track]); // number of vectors in track |
1287 data=dataBuf+trackOffset; // data for track | 1283 data=dataBuf+trackOffset; // data for track |
1288 while(k--){ | 1284 while(k--){ |
1289 thisDist=0; | 1285 thisDist=0; |
1290 l=ntohl(dbH->dim); | 1286 l=ntohl(dbH->dim); |
1291 double* q=query; | 1287 double* q=query; |
1304 qIndexes[l]=qIndexes[l-1]; | 1300 qIndexes[l]=qIndexes[l-1]; |
1305 sIndexes[l]=sIndexes[l-1]; | 1301 sIndexes[l]=sIndexes[l-1]; |
1306 } | 1302 } |
1307 distances[n]=thisDist; | 1303 distances[n]=thisDist; |
1308 qIndexes[n]=numVectors-j-1; | 1304 qIndexes[n]=numVectors-j-1; |
1309 sIndexes[n]=trackTable[track]-k-1; | 1305 sIndexes[n]=ntohl(trackTable[track])-k-1; |
1310 break; | 1306 break; |
1311 } | 1307 } |
1312 } | 1308 } |
1313 else | 1309 else |
1314 break; | 1310 break; |
1456 | 1452 |
1457 // Copy the L2 norm values to core to avoid disk random access later on | 1453 // Copy the L2 norm values to core to avoid disk random access later on |
1458 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); | 1454 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); |
1459 double* snPtr = sNorm; | 1455 double* snPtr = sNorm; |
1460 for(i=0; i<ntohl(dbH->numFiles); i++){ | 1456 for(i=0; i<ntohl(dbH->numFiles); i++){ |
1461 if(trackTable[i]>=sequenceLength){ | 1457 if(ntohl(trackTable[i])>=sequenceLength){ |
1462 tmp1=*snPtr; | 1458 tmp1=*snPtr; |
1463 j=1; | 1459 j=1; |
1464 w=sequenceLength-1; | 1460 w=sequenceLength-1; |
1465 while(w--) | 1461 while(w--) |
1466 *snPtr+=snPtr[j++]; | 1462 *snPtr+=snPtr[j++]; |
1467 ps = snPtr+1; | 1463 ps = snPtr+1; |
1468 w=trackTable[i]-sequenceLength; // +1 - 1 | 1464 w=ntohl(trackTable[i])-sequenceLength; // +1 - 1 |
1469 while(w--){ | 1465 while(w--){ |
1470 tmp2=*ps; | 1466 tmp2=*ps; |
1471 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); | 1467 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); |
1472 tmp1=tmp2; | 1468 tmp1=tmp2; |
1473 ps++; | 1469 ps++; |
1474 } | 1470 } |
1475 ps = snPtr; | 1471 ps = snPtr; |
1476 w=trackTable[i]-sequenceLength+1; | 1472 w=ntohl(trackTable[i])-sequenceLength+1; |
1477 while(w--){ | 1473 while(w--){ |
1478 *ps=sqrt(*ps); | 1474 *ps=sqrt(*ps); |
1479 ps++; | 1475 ps++; |
1480 } | 1476 } |
1481 } | 1477 } |
1482 snPtr+=trackTable[i]; | 1478 snPtr+=ntohl(trackTable[i]); |
1483 } | 1479 } |
1484 | 1480 |
1485 double* pn = sMeanL2; | 1481 double* pn = sMeanL2; |
1486 w=ntohl(dbH->numFiles); | 1482 w=ntohl(dbH->numFiles); |
1487 while(w--) | 1483 while(w--) |
1488 *pn++=0.0; | 1484 *pn++=0.0; |
1489 ps=sNorm; | 1485 ps=sNorm; |
1490 unsigned processedTracks=0; | 1486 unsigned processedTracks=0; |
1491 for(i=0; i<ntohl(dbH->numFiles); i++){ | 1487 for(i=0; i<ntohl(dbH->numFiles); i++){ |
1492 if(trackTable[i]>sequenceLength-1){ | 1488 if(ntohl(trackTable[i])>sequenceLength-1){ |
1493 w = trackTable[i]-sequenceLength+1; | 1489 w = ntohl(trackTable[i])-sequenceLength+1; |
1494 pn = sMeanL2+i; | 1490 pn = sMeanL2+i; |
1495 *pn=0; | 1491 *pn=0; |
1496 while(w--) | 1492 while(w--) |
1497 if(*ps>0) | 1493 if(*ps>0) |
1498 *pn+=*ps++; | 1494 *pn+=*ps++; |
1499 *pn/=trackTable[i]-sequenceLength+1; | 1495 *pn/=ntohl(trackTable[i])-sequenceLength+1; |
1500 SILENCE_THRESH+=*pn; | 1496 SILENCE_THRESH+=*pn; |
1501 processedTracks++; | 1497 processedTracks++; |
1502 } | 1498 } |
1503 ps = sNorm + trackTable[i]; | 1499 ps = sNorm + ntohl(trackTable[i]); |
1504 } | 1500 } |
1505 if(verbosity>1) { | 1501 if(verbosity>1) { |
1506 cerr << "processedTracks: " << processedTracks << endl; | 1502 cerr << "processedTracks: " << processedTracks << endl; |
1507 } | 1503 } |
1508 | 1504 |
1601 } | 1597 } |
1602 meanDBdur = new double[ntohl(dbH->numFiles)]; | 1598 meanDBdur = new double[ntohl(dbH->numFiles)]; |
1603 assert(meanDBdur); | 1599 assert(meanDBdur); |
1604 for(k=0; k<ntohl(dbH->numFiles); k++){ | 1600 for(k=0; k<ntohl(dbH->numFiles); k++){ |
1605 meanDBdur[k]=0.0; | 1601 meanDBdur[k]=0.0; |
1606 for(j=0; j<trackTable[k]-1 ; j++) | 1602 for(j=0; j<ntohl(trackTable[k])-1 ; j++) |
1607 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; | 1603 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; |
1608 meanDBdur[k]/=j; | 1604 meanDBdur[k]/=j; |
1609 } | 1605 } |
1610 } | 1606 } |
1611 | 1607 |
1641 unsigned *trackOffsetTable = new unsigned[ntohl(dbH->numFiles)]; | 1637 unsigned *trackOffsetTable = new unsigned[ntohl(dbH->numFiles)]; |
1642 unsigned cumTrack=0; | 1638 unsigned cumTrack=0; |
1643 unsigned trackIndexOffset; | 1639 unsigned trackIndexOffset; |
1644 for(k=0; k<ntohl(dbH->numFiles);k++){ | 1640 for(k=0; k<ntohl(dbH->numFiles);k++){ |
1645 trackOffsetTable[k]=cumTrack; | 1641 trackOffsetTable[k]=cumTrack; |
1646 cumTrack+=trackTable[k]*ntohl(dbH->dim); | 1642 cumTrack+=ntohl(trackTable[k])*ntohl(dbH->dim); |
1647 } | 1643 } |
1648 | 1644 |
1649 char nextKey [MAXSTR]; | 1645 char nextKey [MAXSTR]; |
1650 | 1646 |
1651 // chi^2 statistics | 1647 // chi^2 statistics |
1669 } | 1665 } |
1670 | 1666 |
1671 trackOffset=trackOffsetTable[track]; // numDoubles offset | 1667 trackOffset=trackOffsetTable[track]; // numDoubles offset |
1672 trackIndexOffset=trackOffset/ntohl(dbH->dim); // numVectors offset | 1668 trackIndexOffset=trackOffset/ntohl(dbH->dim); // numVectors offset |
1673 | 1669 |
1674 if(sequenceLength<=trackTable[track]){ // test for short sequences | 1670 if(sequenceLength<=ntohl(trackTable[track])){ // test for short sequences |
1675 | 1671 |
1676 if(verbosity>7) { | 1672 if(verbosity>7) { |
1677 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); | 1673 cerr << track << "." << trackIndexOffset << "." << ntohl(trackTable[track]) << " | ";cerr.flush(); |
1678 } | 1674 } |
1679 | 1675 |
1680 // Sum products matrix | 1676 // Sum products matrix |
1681 for(j=0; j<numVectors;j++){ | 1677 for(j=0; j<numVectors;j++){ |
1682 D[j]=new double[trackTable[track]]; | 1678 D[j]=new double[ntohl(trackTable[track])]; |
1683 assert(D[j]); | 1679 assert(D[j]); |
1684 | 1680 |
1685 } | 1681 } |
1686 | 1682 |
1687 // Matched filter matrix | 1683 // Matched filter matrix |
1688 for(j=0; j<numVectors;j++){ | 1684 for(j=0; j<numVectors;j++){ |
1689 DD[j]=new double[trackTable[track]]; | 1685 DD[j]=new double[ntohl(trackTable[track])]; |
1690 assert(DD[j]); | 1686 assert(DD[j]); |
1691 } | 1687 } |
1692 | 1688 |
1693 // Dot product | 1689 // Dot product |
1694 for(j=0; j<numVectors; j++) | 1690 for(j=0; j<numVectors; j++) |
1695 for(k=0; k<trackTable[track]; k++){ | 1691 for(k=0; k<ntohl(trackTable[track]); k++){ |
1696 qp=query+j*ntohl(dbH->dim); | 1692 qp=query+j*ntohl(dbH->dim); |
1697 sp=dataBuf+trackOffset+k*ntohl(dbH->dim); | 1693 sp=dataBuf+trackOffset+k*ntohl(dbH->dim); |
1698 DD[j][k]=0.0; // Initialize matched filter array | 1694 DD[j][k]=0.0; // Initialize matched filter array |
1699 dp=&D[j][k]; // point to correlation cell j,k | 1695 dp=&D[j][k]; // point to correlation cell j,k |
1700 *dp=0.0; // initialize correlation cell | 1696 *dp=0.0; // initialize correlation cell |
1709 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop | 1705 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop |
1710 for(w=0; w<wL; w++) | 1706 for(w=0; w<wL; w++) |
1711 for(j=0; j<numVectors-w; j++){ | 1707 for(j=0; j<numVectors-w; j++){ |
1712 sp=DD[j]; | 1708 sp=DD[j]; |
1713 spd=D[j+w]+w; | 1709 spd=D[j+w]+w; |
1714 k=trackTable[track]-w; | 1710 k=ntohl(trackTable[track])-w; |
1715 while(k--) | 1711 while(k--) |
1716 *sp+++=*spd++; | 1712 *sp+++=*spd++; |
1717 } | 1713 } |
1718 } | 1714 } |
1719 | 1715 |
1720 else{ // HOP_SIZE != 1 | 1716 else{ // HOP_SIZE != 1 |
1721 for(w=0; w<wL; w++) | 1717 for(w=0; w<wL; w++) |
1722 for(j=0; j<numVectors-w; j+=HOP_SIZE){ | 1718 for(j=0; j<numVectors-w; j+=HOP_SIZE){ |
1723 sp=DD[j]; | 1719 sp=DD[j]; |
1724 spd=D[j+w]+w; | 1720 spd=D[j+w]+w; |
1725 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){ | 1721 for(k=0; k<ntohl(trackTable[track])-w; k+=HOP_SIZE){ |
1726 *sp+=*spd; | 1722 *sp+=*spd; |
1727 sp+=HOP_SIZE; | 1723 sp+=HOP_SIZE; |
1728 spd+=HOP_SIZE; | 1724 spd+=HOP_SIZE; |
1729 } | 1725 } |
1730 } | 1726 } |
1744 cerr.flush(); | 1740 cerr.flush(); |
1745 } | 1741 } |
1746 | 1742 |
1747 // Search for minimum distance by shingles (concatenated vectors) | 1743 // Search for minimum distance by shingles (concatenated vectors) |
1748 for(j=0;j<=numVectors-wL;j+=HOP_SIZE) | 1744 for(j=0;j<=numVectors-wL;j+=HOP_SIZE) |
1749 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ | 1745 for(k=0;k<=ntohl(trackTable[track])-wL;k+=HOP_SIZE){ |
1750 thisDist=2-(2/(qNorm[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; | 1746 thisDist=2-(2/(qNorm[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; |
1751 if(verbosity>10) { | 1747 if(verbosity>10) { |
1752 cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl; | 1748 cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl; |
1753 } | 1749 } |
1754 // Gather chi^2 statistics | 1750 // Gather chi^2 statistics |
1955 | 1951 |
1956 // Copy the L2 norm values to core to avoid disk random access later on | 1952 // Copy the L2 norm values to core to avoid disk random access later on |
1957 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); | 1953 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); |
1958 double* snPtr = sNorm; | 1954 double* snPtr = sNorm; |
1959 for(i=0; i<ntohl(dbH->numFiles); i++){ | 1955 for(i=0; i<ntohl(dbH->numFiles); i++){ |
1960 if(trackTable[i]>=sequenceLength){ | 1956 if(ntohl(trackTable[i])>=sequenceLength){ |
1961 tmp1=*snPtr; | 1957 tmp1=*snPtr; |
1962 j=1; | 1958 j=1; |
1963 w=sequenceLength-1; | 1959 w=sequenceLength-1; |
1964 while(w--) | 1960 while(w--) |
1965 *snPtr+=snPtr[j++]; | 1961 *snPtr+=snPtr[j++]; |
1966 ps = snPtr+1; | 1962 ps = snPtr+1; |
1967 w=trackTable[i]-sequenceLength; // +1 - 1 | 1963 w=ntohl(trackTable[i])-sequenceLength; // +1 - 1 |
1968 while(w--){ | 1964 while(w--){ |
1969 tmp2=*ps; | 1965 tmp2=*ps; |
1970 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); | 1966 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); |
1971 tmp1=tmp2; | 1967 tmp1=tmp2; |
1972 ps++; | 1968 ps++; |
1973 } | 1969 } |
1974 ps = snPtr; | 1970 ps = snPtr; |
1975 w=trackTable[i]-sequenceLength+1; | 1971 w=ntohl(trackTable[i])-sequenceLength+1; |
1976 while(w--){ | 1972 while(w--){ |
1977 *ps=sqrt(*ps); | 1973 *ps=sqrt(*ps); |
1978 ps++; | 1974 ps++; |
1979 } | 1975 } |
1980 } | 1976 } |
1981 snPtr+=trackTable[i]; | 1977 snPtr+=ntohl(trackTable[i]); |
1982 } | 1978 } |
1983 | 1979 |
1984 double* pn = sMeanL2; | 1980 double* pn = sMeanL2; |
1985 w=ntohl(dbH->numFiles); | 1981 w=ntohl(dbH->numFiles); |
1986 while(w--) | 1982 while(w--) |
1987 *pn++=0.0; | 1983 *pn++=0.0; |
1988 ps=sNorm; | 1984 ps=sNorm; |
1989 unsigned processedTracks=0; | 1985 unsigned processedTracks=0; |
1990 for(i=0; i<ntohl(dbH->numFiles); i++){ | 1986 for(i=0; i<ntohl(dbH->numFiles); i++){ |
1991 if(trackTable[i]>sequenceLength-1){ | 1987 if(ntohl(trackTable[i])>sequenceLength-1){ |
1992 w = trackTable[i]-sequenceLength+1; | 1988 w = ntohl(trackTable[i])-sequenceLength+1; |
1993 pn = sMeanL2+i; | 1989 pn = sMeanL2+i; |
1994 *pn=0; | 1990 *pn=0; |
1995 while(w--) | 1991 while(w--) |
1996 if(*ps>0) | 1992 if(*ps>0) |
1997 *pn+=*ps++; | 1993 *pn+=*ps++; |
1998 *pn/=trackTable[i]-sequenceLength+1; | 1994 *pn/=ntohl(trackTable[i])-sequenceLength+1; |
1999 SILENCE_THRESH+=*pn; | 1995 SILENCE_THRESH+=*pn; |
2000 processedTracks++; | 1996 processedTracks++; |
2001 } | 1997 } |
2002 ps = sNorm + trackTable[i]; | 1998 ps = sNorm + ntohl(trackTable[i]); |
2003 } | 1999 } |
2004 if(verbosity>1) { | 2000 if(verbosity>1) { |
2005 cerr << "processedTracks: " << processedTracks << endl; | 2001 cerr << "processedTracks: " << processedTracks << endl; |
2006 } | 2002 } |
2007 | 2003 |
2100 } | 2096 } |
2101 meanDBdur = new double[ntohl(dbH->numFiles)]; | 2097 meanDBdur = new double[ntohl(dbH->numFiles)]; |
2102 assert(meanDBdur); | 2098 assert(meanDBdur); |
2103 for(k=0; k<ntohl(dbH->numFiles); k++){ | 2099 for(k=0; k<ntohl(dbH->numFiles); k++){ |
2104 meanDBdur[k]=0.0; | 2100 meanDBdur[k]=0.0; |
2105 for(j=0; j<trackTable[k]-1 ; j++) | 2101 for(j=0; j<ntohl(trackTable[k])-1 ; j++) |
2106 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; | 2102 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; |
2107 meanDBdur[k]/=j; | 2103 meanDBdur[k]/=j; |
2108 } | 2104 } |
2109 } | 2105 } |
2110 | 2106 |
2140 unsigned *trackOffsetTable = new unsigned[ntohl(dbH->numFiles)]; | 2136 unsigned *trackOffsetTable = new unsigned[ntohl(dbH->numFiles)]; |
2141 unsigned cumTrack=0; | 2137 unsigned cumTrack=0; |
2142 unsigned trackIndexOffset; | 2138 unsigned trackIndexOffset; |
2143 for(k=0; k<ntohl(dbH->numFiles);k++){ | 2139 for(k=0; k<ntohl(dbH->numFiles);k++){ |
2144 trackOffsetTable[k]=cumTrack; | 2140 trackOffsetTable[k]=cumTrack; |
2145 cumTrack+=trackTable[k]*ntohl(dbH->dim); | 2141 cumTrack+=ntohl(trackTable[k])*ntohl(dbH->dim); |
2146 } | 2142 } |
2147 | 2143 |
2148 char nextKey [MAXSTR]; | 2144 char nextKey [MAXSTR]; |
2149 | 2145 |
2150 // chi^2 statistics | 2146 // chi^2 statistics |
2168 } | 2164 } |
2169 | 2165 |
2170 trackOffset=trackOffsetTable[track]; // numDoubles offset | 2166 trackOffset=trackOffsetTable[track]; // numDoubles offset |
2171 trackIndexOffset=trackOffset/ntohl(dbH->dim); // numVectors offset | 2167 trackIndexOffset=trackOffset/ntohl(dbH->dim); // numVectors offset |
2172 | 2168 |
2173 if(sequenceLength<=trackTable[track]){ // test for short sequences | 2169 if(sequenceLength<=ntohl(trackTable[track])){ // test for short sequences |
2174 | 2170 |
2175 if(verbosity>7) { | 2171 if(verbosity>7) { |
2176 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); | 2172 cerr << track << "." << trackIndexOffset << "." << ntohl(trackTable[track]) << " | ";cerr.flush(); |
2177 } | 2173 } |
2178 | 2174 |
2179 // Sum products matrix | 2175 // Sum products matrix |
2180 for(j=0; j<numVectors;j++){ | 2176 for(j=0; j<numVectors;j++){ |
2181 D[j]=new double[trackTable[track]]; | 2177 D[j]=new double[ntohl(trackTable[track])]; |
2182 assert(D[j]); | 2178 assert(D[j]); |
2183 | 2179 |
2184 } | 2180 } |
2185 | 2181 |
2186 // Matched filter matrix | 2182 // Matched filter matrix |
2187 for(j=0; j<numVectors;j++){ | 2183 for(j=0; j<numVectors;j++){ |
2188 DD[j]=new double[trackTable[track]]; | 2184 DD[j]=new double[ntohl(trackTable[track])]; |
2189 assert(DD[j]); | 2185 assert(DD[j]); |
2190 } | 2186 } |
2191 | 2187 |
2192 // Dot product | 2188 // Dot product |
2193 for(j=0; j<numVectors; j++) | 2189 for(j=0; j<numVectors; j++) |
2194 for(k=0; k<trackTable[track]; k++){ | 2190 for(k=0; k<ntohl(trackTable[track]); k++){ |
2195 qp=query+j*ntohl(dbH->dim); | 2191 qp=query+j*ntohl(dbH->dim); |
2196 sp=dataBuf+trackOffset+k*ntohl(dbH->dim); | 2192 sp=dataBuf+trackOffset+k*ntohl(dbH->dim); |
2197 DD[j][k]=0.0; // Initialize matched filter array | 2193 DD[j][k]=0.0; // Initialize matched filter array |
2198 dp=&D[j][k]; // point to correlation cell j,k | 2194 dp=&D[j][k]; // point to correlation cell j,k |
2199 *dp=0.0; // initialize correlation cell | 2195 *dp=0.0; // initialize correlation cell |
2208 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop | 2204 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop |
2209 for(w=0; w<wL; w++) | 2205 for(w=0; w<wL; w++) |
2210 for(j=0; j<numVectors-w; j++){ | 2206 for(j=0; j<numVectors-w; j++){ |
2211 sp=DD[j]; | 2207 sp=DD[j]; |
2212 spd=D[j+w]+w; | 2208 spd=D[j+w]+w; |
2213 k=trackTable[track]-w; | 2209 k=ntohl(trackTable[track])-w; |
2214 while(k--) | 2210 while(k--) |
2215 *sp+++=*spd++; | 2211 *sp+++=*spd++; |
2216 } | 2212 } |
2217 } | 2213 } |
2218 | 2214 |
2219 else{ // HOP_SIZE != 1 | 2215 else{ // HOP_SIZE != 1 |
2220 for(w=0; w<wL; w++) | 2216 for(w=0; w<wL; w++) |
2221 for(j=0; j<numVectors-w; j+=HOP_SIZE){ | 2217 for(j=0; j<numVectors-w; j+=HOP_SIZE){ |
2222 sp=DD[j]; | 2218 sp=DD[j]; |
2223 spd=D[j+w]+w; | 2219 spd=D[j+w]+w; |
2224 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){ | 2220 for(k=0; k<ntohl(trackTable[track])-w; k+=HOP_SIZE){ |
2225 *sp+=*spd; | 2221 *sp+=*spd; |
2226 sp+=HOP_SIZE; | 2222 sp+=HOP_SIZE; |
2227 spd+=HOP_SIZE; | 2223 spd+=HOP_SIZE; |
2228 } | 2224 } |
2229 } | 2225 } |
2243 cerr.flush(); | 2239 cerr.flush(); |
2244 } | 2240 } |
2245 | 2241 |
2246 // Search for minimum distance by shingles (concatenated vectors) | 2242 // Search for minimum distance by shingles (concatenated vectors) |
2247 for(j=0;j<=numVectors-wL;j+=HOP_SIZE) | 2243 for(j=0;j<=numVectors-wL;j+=HOP_SIZE) |
2248 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ | 2244 for(k=0;k<=ntohl(trackTable[track])-wL;k+=HOP_SIZE){ |
2249 thisDist=2-(2/(qNorm[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; | 2245 thisDist=2-(2/(qNorm[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; |
2250 if(verbosity>10) { | 2246 if(verbosity>10) { |
2251 cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl; | 2247 cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl; |
2252 } | 2248 } |
2253 // Gather chi^2 statistics | 2249 // Gather chi^2 statistics |