comparison audioDB.cpp @ 17:6d899df0cfe4

added Euclidean distance for sequences with -R (--radus) (via dot product of unit norm vectors), re-worked L2-norm behaviour, fixed a load of bugs there, fixed shingle norming. Cosine dist sequence match not working now because of L2 norm behaviour
author mas01mc
date Fri, 10 Aug 2007 04:52:33 +0000
parents c633f3819a49
children 999c9c216565
comparison
equal deleted inserted replaced
14:5b9dd1acca40 17:6d899df0cfe4
133 queryPoint(0), 133 queryPoint(0),
134 usingQueryPoint(0), 134 usingQueryPoint(0),
135 isClient(0), 135 isClient(0),
136 isServer(0), 136 isServer(0),
137 port(0), 137 port(0),
138 timesTol(0.1){ 138 timesTol(0.1),
139 radius(0){
139 140
140 if(processArgs(argc, argv)<0){ 141 if(processArgs(argc, argv)<0){
141 printf("No command found.\n"); 142 printf("No command found.\n");
142 cmdline_parser_print_version (); 143 cmdline_parser_print_version ();
143 if (strlen(gengetopt_args_info_purpose) > 0) 144 if (strlen(gengetopt_args_info_purpose) > 0)
224 cerr << "Warning: verbosity out of range, setting to 1" << endl; 225 cerr << "Warning: verbosity out of range, setting to 1" << endl;
225 verbosity=1; 226 verbosity=1;
226 } 227 }
227 } 228 }
228 229
230 if(args_info.radius_given){
231 radius=args_info.radius_arg;
232 if(radius<=0 || radius>1000000000){
233 cerr << "Warning: radius out of range" << endl;
234 exit(1);
235 }
236 else
237 if(verbosity>3)
238 cerr << "Setting radius to " << radius << endl;
239 }
240
229 if(args_info.SERVER_given){ 241 if(args_info.SERVER_given){
230 command=COM_SERVER; 242 command=COM_SERVER;
231 port=args_info.SERVER_arg; 243 port=args_info.SERVER_arg;
232 if(port<100 || port > 100000) 244 if(port<100 || port > 100000)
233 error("port out of range"); 245 error("port out of range");
354 error("pointNN out of range: 1 <= pointNN <= 1000"); 366 error("pointNN out of range: 1 <= pointNN <= 1000");
355 367
356 368
357 369
358 segNN=args_info.resultlength_arg; 370 segNN=args_info.resultlength_arg;
359 if(segNN<1 || segNN >1000) 371 if(segNN<1 || segNN >10000)
360 error("resultlength out of range: 1 <= resultlength <= 1000"); 372 error("resultlength out of range: 1 <= resultlength <= 1000");
361 373
362 374
363 sequenceLength=args_info.sequencelength_arg; 375 sequenceLength=args_info.sequencelength_arg;
364 if(sequenceLength<1 || sequenceLength >1000) 376 if(sequenceLength<1 || sequenceLength >1000)
903 915
904 void audioDB::dump(const char* dbName){ 916 void audioDB::dump(const char* dbName){
905 if(!dbH) 917 if(!dbH)
906 initTables(dbName,0); 918 initTables(dbName,0);
907 919
908 for(unsigned k=0; k<dbH->numFiles; k++) 920 for(unsigned k=0, j=0; k<dbH->numFiles; k++){
909 cout << fileTable+k*O2_FILETABLESIZE << " " << segTable[k] << endl; 921 cout << fileTable+k*O2_FILETABLESIZE << " " << segTable[k] << endl;
922 j+=segTable[k];
923 }
910 924
911 status(dbName); 925 status(dbName);
912 } 926 }
913 927
914 void audioDB::l2norm(const char* dbName){ 928 void audioDB::l2norm(const char* dbName){
928 switch(queryType){ 942 switch(queryType){
929 case O2_FLAG_POINT_QUERY: 943 case O2_FLAG_POINT_QUERY:
930 pointQuery(dbName, inFile, adbQueryResult); 944 pointQuery(dbName, inFile, adbQueryResult);
931 break; 945 break;
932 case O2_FLAG_SEQUENCE_QUERY: 946 case O2_FLAG_SEQUENCE_QUERY:
933 segSequenceQuery(dbName, inFile, adbQueryResult); 947 if(radius==0)
948 segSequenceQuery(dbName, inFile, adbQueryResult);
949 else
950 segSequenceQueryEuc(dbName, inFile, adbQueryResult);
934 break; 951 break;
935 case O2_FLAG_SEG_QUERY: 952 case O2_FLAG_SEG_QUERY:
936 segPointQuery(dbName, inFile, adbQueryResult); 953 segPointQuery(dbName, inFile, adbQueryResult);
937 break; 954 break;
938 default: 955 default:
1464 } 1481 }
1465 if(verbosity>1) 1482 if(verbosity>1)
1466 cerr << "processedSegs: " << processedSegs << endl; 1483 cerr << "processedSegs: " << processedSegs << endl;
1467 SILENCE_THRESH/=processedSegs; 1484 SILENCE_THRESH/=processedSegs;
1468 USE_THRESH=1; // Turn thresholding on 1485 USE_THRESH=1; // Turn thresholding on
1469 DIFF_THRESH=SILENCE_THRESH/=2; // 50% of the mean shingle power 1486 DIFF_THRESH=SILENCE_THRESH/2; // 50% of the mean shingle power
1470 SILENCE_THRESH/=10; // 10% of the mean shingle power is SILENCE 1487 SILENCE_THRESH/=10; // 10% of the mean shingle power is SILENCE
1471 1488
1472 w=sequenceLength-1; 1489 w=sequenceLength-1;
1473 i=1; 1490 i=1;
1474 tmp1=*qNorm; 1491 tmp1=*qNorm;
1717 } 1734 }
1718 } 1735 }
1719 // Calculate the mean of the N-Best matches 1736 // Calculate the mean of the N-Best matches
1720 thisDist=0.0; 1737 thisDist=0.0;
1721 for(m=0; m<pointNN; m++) 1738 for(m=0; m<pointNN; m++)
1722 thisDist+=distances[m]; 1739 if(distances[m]<0.000001){ // Stop rubbish songs getting good scores
1740 thisDist=0.0;
1741 break;
1742 }
1743 else
1744 thisDist+=distances[m];
1723 thisDist/=pointNN; 1745 thisDist/=pointNN;
1724 1746
1725 // Let's see the distances then... 1747 // Let's see the distances then...
1726 if(verbosity>3) 1748 if(verbosity>3)
1727 cerr << "d[" << fileTable+seg*O2_FILETABLESIZE << "]=" << thisDist << endl; 1749 cerr << fileTable+seg*O2_FILETABLESIZE << " " << thisDist << endl;
1728 1750
1729 // All the seg stuff goes here 1751 // All the seg stuff goes here
1730 n=segNN; 1752 n=segNN;
1731 while(n--){ 1753 while(n--){
1732 if(thisDist>=segDistances[n]){ 1754 if(thisDist>=segDistances[n]){
1821 delete meanDBdur; 1843 delete meanDBdur;
1822 1844
1823 1845
1824 } 1846 }
1825 1847
1848 // NBest matched filter distance between query and target segs
1849 // efficient implementation
1850 // outputs average of N minimum matched filter distances
1851 void audioDB::segSequenceQueryEuc(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
1852
1853 initTables(dbName, inFile);
1854
1855 // For each input vector, find the closest pointNN matching output vectors and report
1856 // we use stdout in this stub version
1857 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
1858 unsigned numSegs = dbH->numFiles;
1859
1860 double* query = (double*)(indata+sizeof(int));
1861 double* data = dataBuf;
1862 double* queryCopy = 0;
1863
1864 double qMeanL2;
1865 double* sMeanL2;
1866
1867 unsigned USE_THRESH=0;
1868 double SILENCE_THRESH=0;
1869 double DIFF_THRESH=0;
1870
1871 if(!(dbH->flags & O2_FLAG_L2NORM) )
1872 error("Database must be L2 normed for sequence query","use -l2norm");
1873
1874 if(verbosity>1)
1875 cerr << "performing norms ... "; cerr.flush();
1876 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
1877 // Make a copy of the query
1878 queryCopy = new double[numVectors*dbH->dim];
1879 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
1880 qNorm = new double[numVectors];
1881 sNorm = new double[dbVectors];
1882 sMeanL2=new double[dbH->numFiles];
1883 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength);
1884 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
1885 query = queryCopy;
1886 // Make norm measurements relative to sequenceLength
1887 unsigned w = sequenceLength-1;
1888 unsigned i,j;
1889 double* ps;
1890 double tmp1,tmp2;
1891 // Copy the L2 norm values to core to avoid disk random access later on
1892 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
1893 double* snPtr = sNorm;
1894 for(i=0; i<dbH->numFiles; i++){
1895 if(segTable[i]>=sequenceLength){
1896 tmp1=*snPtr;
1897 j=1;
1898 w=sequenceLength-1;
1899 while(w--)
1900 *snPtr+=snPtr[j++];
1901 ps = snPtr+1;
1902 w=segTable[i]-sequenceLength; // +1 - 1
1903 while(w--){
1904 tmp2=*ps;
1905 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
1906 tmp1=tmp2;
1907 ps++;
1908 }
1909 ps = snPtr;
1910 w=segTable[i]-sequenceLength+1;
1911 while(w--){
1912 *ps=sqrt(*ps);
1913 ps++;
1914 }
1915 }
1916 snPtr+=segTable[i];
1917 }
1918
1919 double* pn = sMeanL2;
1920 w=dbH->numFiles;
1921 while(w--)
1922 *pn++=0.0;
1923 ps=sNorm;
1924 unsigned processedSegs=0;
1925 for(i=0; i<dbH->numFiles; i++){
1926 if(segTable[i]>sequenceLength-1){
1927 w = segTable[i]-sequenceLength;
1928 pn = sMeanL2+i;
1929 *pn=0;
1930 while(w--)
1931 if(*ps>0)
1932 *pn+=*ps++;
1933 *pn/=segTable[i]-sequenceLength;
1934 SILENCE_THRESH+=*pn;
1935 processedSegs++;
1936 }
1937 ps = sNorm + segTable[i];
1938 }
1939 if(verbosity>1)
1940 cerr << "processedSegs: " << processedSegs << endl;
1941
1942
1943 SILENCE_THRESH/=processedSegs;
1944 USE_THRESH=1; // Turn thresholding on
1945 DIFF_THRESH=SILENCE_THRESH; // 50% of the mean shingle power
1946 SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE
1947 if(verbosity>4)
1948 cerr << "silence thresh: " << SILENCE_THRESH;
1949 w=sequenceLength-1;
1950 i=1;
1951 tmp1=*qNorm;
1952 while(w--)
1953 *qNorm+=qNorm[i++];
1954 ps = qNorm+1;
1955 w=numVectors-sequenceLength; // +1 -1
1956 while(w--){
1957 tmp2=*ps;
1958 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
1959 tmp1=tmp2;
1960 ps++;
1961 }
1962 ps = qNorm;
1963 qMeanL2 = 0;
1964 w=numVectors-sequenceLength+1;
1965 while(w--){
1966 *ps=sqrt(*ps);
1967 qMeanL2+=*ps++;
1968 }
1969 qMeanL2 /= numVectors-sequenceLength+1;
1970
1971 if(verbosity>1)
1972 cerr << "done." << endl;
1973
1974
1975 if(verbosity>1)
1976 cerr << "matching segs..." << endl;
1977
1978 assert(pointNN>0 && pointNN<=O2_MAXNN);
1979 assert(segNN>0 && segNN<=O2_MAXNN);
1980
1981 // Make temporary dynamic memory for results
1982 double segDistances[segNN];
1983 unsigned segIDs[segNN];
1984 unsigned segQIndexes[segNN];
1985 unsigned segSIndexes[segNN];
1986
1987 double distances[pointNN];
1988 unsigned qIndexes[pointNN];
1989 unsigned sIndexes[pointNN];
1990
1991
1992 unsigned k,l,m,n,seg,segOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
1993 double thisDist;
1994 double oneOverWL=1.0/wL;
1995
1996 for(k=0; k<pointNN; k++){
1997 distances[k]=0.0;
1998 qIndexes[k]=~0;
1999 sIndexes[k]=~0;
2000 }
2001
2002 for(k=0; k<segNN; k++){
2003 segDistances[k]=0.0;
2004 segQIndexes[k]=~0;
2005 segSIndexes[k]=~0;
2006 segIDs[k]=~0;
2007 }
2008
2009 // Timestamp and durations processing
2010 double meanQdur = 0;
2011 double* timesdata = 0;
2012 double* meanDBdur = 0;
2013
2014 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
2015 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
2016 usingTimes=0;
2017 }
2018
2019 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
2020 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
2021
2022 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
2023 timesdata = new double[numVectors];
2024 assert(timesdata);
2025 insertTimeStamps(numVectors, timesFile, timesdata);
2026 // Calculate durations of points
2027 for(k=0; k<numVectors-1; k++){
2028 timesdata[k]=timesdata[k+1]-timesdata[k];
2029 meanQdur+=timesdata[k];
2030 }
2031 meanQdur/=k;
2032 if(verbosity>1)
2033 cerr << "mean query file duration: " << meanQdur << endl;
2034 meanDBdur = new double[dbH->numFiles];
2035 assert(meanDBdur);
2036 for(k=0; k<dbH->numFiles; k++){
2037 meanDBdur[k]=0.0;
2038 for(j=0; j<segTable[k]-1 ; j++)
2039 meanDBdur[k]+=timesTable[j+1]-timesTable[j];
2040 meanDBdur[k]/=j;
2041 }
2042 }
2043
2044 if(usingQueryPoint)
2045 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
2046 error("queryPoint > numVectors-wL+1 in query");
2047 else{
2048 if(verbosity>1)
2049 cerr << "query point: " << queryPoint << endl; cerr.flush();
2050 query=query+queryPoint*dbH->dim;
2051 qNorm=qNorm+queryPoint;
2052 numVectors=wL;
2053 }
2054
2055 double ** D = 0; // Differences query and target
2056 double ** DD = 0; // Matched filter distance
2057
2058 D = new double*[numVectors];
2059 assert(D);
2060 DD = new double*[numVectors];
2061 assert(DD);
2062
2063 gettimeofday(&tv1, NULL);
2064 processedSegs=0;
2065 unsigned successfulSegs=0;
2066
2067 double* qp;
2068 double* sp;
2069 double* dp;
2070 double diffL2;
2071
2072 // build segment offset table
2073 unsigned *segOffsetTable = new unsigned[dbH->numFiles];
2074 unsigned cumSeg=0;
2075 unsigned segIndexOffset;
2076 for(k=0; k<dbH->numFiles;k++){
2077 segOffsetTable[k]=cumSeg;
2078 cumSeg+=segTable[k]*dbH->dim;
2079 }
2080
2081 char nextKey [MAXSTR];
2082
2083 // chi^2 statistics
2084 double sampleCount = 0;
2085 double sampleSum = 0;
2086 double logSampleSum = 0;
2087 double minSample = 1e9;
2088 double maxSample = 0;
2089
2090 // Track loop
2091 for(processedSegs=0, seg=0 ; processedSegs < dbH->numFiles ; seg++, processedSegs++){
2092
2093 // get segID from file if using a control file
2094 if(segFile){
2095 if(!segFile->eof()){
2096 segFile->getline(nextKey,MAXSTR);
2097 seg=getKeyPos(nextKey);
2098 }
2099 else
2100 break;
2101 }
2102
2103 segOffset=segOffsetTable[seg]; // numDoubles offset
2104 segIndexOffset=segOffset/dbH->dim; // numVectors offset
2105
2106 if(sequenceLength<segTable[seg]){ // test for short sequences
2107
2108 if(verbosity>7)
2109 cerr << seg << "." << segIndexOffset << "." << segTable[seg] << " | ";cerr.flush();
2110
2111 // Sum products matrix
2112 for(j=0; j<numVectors;j++){
2113 D[j]=new double[segTable[seg]];
2114 assert(D[j]);
2115
2116 }
2117
2118 // Matched filter matrix
2119 for(j=0; j<numVectors;j++){
2120 DD[j]=new double[segTable[seg]];
2121 assert(DD[j]);
2122 }
2123
2124 double tmp;
2125 // Dot product
2126 for(j=0; j<numVectors; j++)
2127 for(k=0; k<segTable[seg]; k++){
2128 qp=query+j*dbH->dim;
2129 sp=dataBuf+segOffset+k*dbH->dim;
2130 DD[j][k]=0.0; // Initialize matched filter array
2131 dp=&D[j][k]; // point to correlation cell j,k
2132 *dp=0.0; // initialize correlation cell
2133 l=dbH->dim; // size of vectors
2134 while(l--)
2135 *dp+=*qp++**sp++;
2136 }
2137
2138 // Matched Filter
2139 // HOP SIZE == 1
2140 double* spd;
2141 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
2142 for(w=0; w<wL; w++)
2143 for(j=0; j<numVectors-w; j++){
2144 sp=DD[j];
2145 spd=D[j+w]+w;
2146 k=segTable[seg]-w;
2147 while(k--)
2148 *sp+++=*spd++;
2149 }
2150 }
2151
2152 else{ // HOP_SIZE != 1
2153 for(w=0; w<wL; w++)
2154 for(j=0; j<numVectors-w; j+=HOP_SIZE){
2155 sp=DD[j];
2156 spd=D[j+w]+w;
2157 for(k=0; k<segTable[seg]-w; k+=HOP_SIZE){
2158 *sp+=*spd;
2159 sp+=HOP_SIZE;
2160 spd+=HOP_SIZE;
2161 }
2162 }
2163 }
2164
2165 if(verbosity>3 && usingTimes){
2166 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[seg] << endl;
2167 cerr.flush();
2168 }
2169
2170 if(!usingTimes ||
2171 (usingTimes
2172 && fabs(meanDBdur[seg]-meanQdur)<meanQdur*timesTol)){
2173
2174 if(verbosity>3 && usingTimes){
2175 cerr << "within duration tolerance." << endl;
2176 cerr.flush();
2177 }
2178
2179 // Search for minimum distance by shingles (concatenated vectors)
2180 for(j=0;j<numVectors-wL;j+=HOP_SIZE)
2181 for(k=0;k<segTable[seg]-wL;k+=HOP_SIZE){
2182 thisDist=2-(2/(qNorm[j]*sNorm[segIndexOffset+k]))*DD[j][k];
2183 if(verbosity>10)
2184 cerr << thisDist << " " << qNorm[j] << " " << sNorm[segIndexOffset+k] << endl;
2185 // Gather chi^2 statistics
2186 if(thisDist<minSample)
2187 minSample=thisDist;
2188 else if(thisDist>maxSample)
2189 maxSample=thisDist;
2190 if(thisDist>1e-9){
2191 sampleCount++;
2192 sampleSum+=thisDist;
2193 logSampleSum+=log(thisDist);
2194 }
2195
2196 diffL2 = fabs(qNorm[j] - sNorm[segIndexOffset+k]);
2197 // Power test
2198 if(!USE_THRESH ||
2199 // Threshold on mean L2 of Q and S sequences
2200 (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[segIndexOffset+k]>SILENCE_THRESH &&
2201 // Are both query and target windows above mean energy?
2202 (qNorm[j]>qMeanL2*.25 && sNorm[segIndexOffset+k]>sMeanL2[seg]*.25))) // && diffL2 < DIFF_THRESH )))
2203 thisDist=thisDist; // Computed above
2204 else
2205 thisDist=1000000.0;
2206 if(thisDist>=0 && thisDist<=radius){
2207 distances[0]++; // increment count
2208 break; // only need one seg point per query point
2209 }
2210 }
2211 // How many points were below threshold ?
2212 thisDist=distances[0];
2213
2214 // Let's see the distances then...
2215 if(verbosity>3)
2216 cerr << fileTable+seg*O2_FILETABLESIZE << " " << thisDist << endl;
2217
2218 // All the seg stuff goes here
2219 n=segNN;
2220 while(n--){
2221 if(thisDist>segDistances[n]){
2222 if((n==0 || thisDist<=segDistances[n-1])){
2223 // Copy all values above up the queue
2224 for( l=segNN-1 ; l > n ; l--){
2225 segDistances[l]=segDistances[l-1];
2226 segQIndexes[l]=segQIndexes[l-1];
2227 segSIndexes[l]=segSIndexes[l-1];
2228 segIDs[l]=segIDs[l-1];
2229 }
2230 segDistances[n]=thisDist;
2231 segQIndexes[n]=qIndexes[0];
2232 segSIndexes[n]=sIndexes[0];
2233 successfulSegs++;
2234 segIDs[n]=seg;
2235 break;
2236 }
2237 }
2238 else
2239 break;
2240 }
2241 } // Duration match
2242
2243 // Clean up current seg
2244 if(D!=NULL){
2245 for(j=0; j<numVectors; j++)
2246 delete[] D[j];
2247 }
2248
2249 if(DD!=NULL){
2250 for(j=0; j<numVectors; j++)
2251 delete[] DD[j];
2252 }
2253 }
2254 // per-seg reset array values
2255 for(unsigned k=0; k<pointNN; k++){
2256 distances[k]=0.0;
2257 qIndexes[k]=~0;
2258 sIndexes[k]=~0;
2259 }
2260 }
2261
2262 gettimeofday(&tv2,NULL);
2263 if(verbosity>1){
2264 cerr << endl << "processed segs :" << processedSegs << " matched segments: " << successfulSegs << " elapsed time:"
2265 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
2266 cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
2267 << " minSample: " << minSample << " maxSample: " << maxSample << endl;
2268 }
2269
2270 if(adbQueryResult==0){
2271 if(verbosity>1)
2272 cerr<<endl;
2273 // Output answer
2274 // Loop over nearest neighbours
2275 for(k=0; k < min(segNN,successfulSegs); k++)
2276 cout << fileTable+segIDs[k]*O2_FILETABLESIZE << " " << segDistances[k] << endl;
2277 }
2278 else{ // Process Web Services Query
2279 int listLen = min(segNN, processedSegs);
2280 adbQueryResult->__sizeRlist=listLen;
2281 adbQueryResult->__sizeDist=listLen;
2282 adbQueryResult->__sizeQpos=listLen;
2283 adbQueryResult->__sizeSpos=listLen;
2284 adbQueryResult->Rlist= new char*[listLen];
2285 adbQueryResult->Dist = new double[listLen];
2286 adbQueryResult->Qpos = new int[listLen];
2287 adbQueryResult->Spos = new int[listLen];
2288 for(k=0; k<adbQueryResult->__sizeRlist; k++){
2289 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
2290 adbQueryResult->Dist[k]=segDistances[k];
2291 adbQueryResult->Qpos[k]=segQIndexes[k];
2292 adbQueryResult->Spos[k]=segSIndexes[k];
2293 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+segIDs[k]*O2_FILETABLESIZE);
2294 }
2295 }
2296
2297
2298 // Clean up
2299 if(segOffsetTable)
2300 delete[] segOffsetTable;
2301 if(queryCopy)
2302 delete[] queryCopy;
2303 //if(qNorm)
2304 //delete qNorm;
2305 if(D)
2306 delete[] D;
2307 if(DD)
2308 delete[] DD;
2309 if(timesdata)
2310 delete[] timesdata;
2311 if(meanDBdur)
2312 delete[] meanDBdur;
2313
2314
2315 }
2316
1826 void audioDB::normalize(double* X, int dim, int n){ 2317 void audioDB::normalize(double* X, int dim, int n){
1827 unsigned c = n*dim; 2318 unsigned c = n*dim;
1828 double minval,maxval,v,*p; 2319 double minval,maxval,v,*p;
1829 2320
1830 p=X; 2321 p=X;
1870 d=dim; 2361 d=dim;
1871 while(d--){ 2362 while(d--){
1872 L2+=*p**p; 2363 L2+=*p**p;
1873 p++; 2364 p++;
1874 } 2365 }
1875 L2=sqrt(L2); 2366 /* L2=sqrt(L2);*/
1876 if(qNorm) 2367 if(qNorm)
1877 *qNorm++=L2; 2368 *qNorm++=L2;
2369 /*
1878 oneOverL2 = 1.0/L2; 2370 oneOverL2 = 1.0/L2;
1879 d=dim; 2371 d=dim;
1880 while(d--){ 2372 while(d--){
1881 *X*=oneOverL2; 2373 *X*=oneOverL2;
1882 X++; 2374 X++;
1883 } 2375 */
2376 X+=dim;
1884 } 2377 }
1885 if(verbosity>2) 2378 if(verbosity>2)
1886 cerr << "done..." << endl; 2379 cerr << "done..." << endl;
1887 } 2380 }
1888 2381
1911 d=dim; 2404 d=dim;
1912 while(d--){ 2405 while(d--){
1913 *l2ptr+=*p**p; 2406 *l2ptr+=*p**p;
1914 p++; 2407 p++;
1915 } 2408 }
1916 *l2ptr=sqrt(*l2ptr); 2409 l2ptr++;
1917 oneOverL2 = 1.0/(*l2ptr++); 2410 /*
1918 d=dim; 2411 oneOverL2 = 1.0/(*l2ptr++);
1919 while(d--){ 2412 d=dim;
2413 while(d--){
1920 *X*=oneOverL2; 2414 *X*=oneOverL2;
1921 X++; 2415 X++;
1922 } 2416 }
2417 */
2418 X+=dim;
1923 } 2419 }
1924 unsigned offset; 2420 unsigned offset;
1925 if(append) 2421 if(append)
1926 offset=dbH->length/(dbH->dim*sizeof(double)); // number of vectors 2422 offset=dbH->length/(dbH->dim*sizeof(double)); // number of vectors
1927 else 2423 else
1928 offset=0; 2424 offset=0;
1929 memcpy(l2normTable+offset, l2buf, n*sizeof(double)); 2425 memcpy(l2normTable+offset, l2buf, n*sizeof(double));
1930 if(l2buf) 2426 if(l2buf)
1931 delete l2buf; 2427 delete[] l2buf;
1932 if(verbosity>2) 2428 if(verbosity>2)
1933 cerr << "done..." << endl; 2429 cerr << "done..." << endl;
1934 } 2430 }
1935 2431
1936 2432
2036 } 2532 }
2037 2533
2038 int main(const unsigned argc, char* const argv[]){ 2534 int main(const unsigned argc, char* const argv[]){
2039 audioDB(argc, argv); 2535 audioDB(argc, argv);
2040 } 2536 }
2041
2042