comparison YinUtil.cpp @ 136:7cbf40306c10 vamp-fft-revision

Add state to YinUtil, prepare to use the Vamp FFT (but don't actually use it yet)
author Chris Cannam
date Fri, 19 Aug 2016 12:00:13 +0100
parents 1629209f5bf2
children 109c3a2ad930
comparison
equal deleted inserted replaced
135:2c73618b4067 136:7cbf40306c10
19 #include <cmath> 19 #include <cmath>
20 #include <algorithm> 20 #include <algorithm>
21 21
22 #include <boost/math/distributions.hpp> 22 #include <boost/math/distributions.hpp>
23 23
24 YinUtil::YinUtil(size_t yinBufferSize) :
25 m_yinBufferSize(yinBufferSize),
26 m_fft(yinBufferSize * 2)
27 {
28 }
29
30 YinUtil::~YinUtil()
31 {
32 }
33
24 void 34 void
25 YinUtil::slowDifference(const double *in, double *yinBuffer, const size_t yinBufferSize) 35 YinUtil::slowDifference(const double *in, double *yinBuffer)
26 { 36 {
27 yinBuffer[0] = 0; 37 yinBuffer[0] = 0;
28 double delta ; 38 double delta ;
29 int startPoint = 0; 39 int startPoint = 0;
30 int endPoint = 0; 40 int endPoint = 0;
31 for (int i = 1; i < yinBufferSize; ++i) { 41 for (int i = 1; i < m_yinBufferSize; ++i) {
32 yinBuffer[i] = 0; 42 yinBuffer[i] = 0;
33 startPoint = yinBufferSize/2 - i/2; 43 startPoint = m_yinBufferSize/2 - i/2;
34 endPoint = startPoint + yinBufferSize; 44 endPoint = startPoint + m_yinBufferSize;
35 for (int j = startPoint; j < endPoint; ++j) { 45 for (int j = startPoint; j < endPoint; ++j) {
36 delta = in[i+j] - in[j]; 46 delta = in[i+j] - in[j];
37 yinBuffer[i] += delta * delta; 47 yinBuffer[i] += delta * delta;
38 } 48 }
39 } 49 }
40 } 50 }
41 51
42 void 52 void
43 YinUtil::fastDifference(const double *in, double *yinBuffer, const size_t yinBufferSize) 53 YinUtil::fastDifference(const double *in, double *yinBuffer)
44 { 54 {
45 55
46 // DECLARE AND INITIALISE 56 // DECLARE AND INITIALISE
47 // initialisation of most of the arrays here was done in a separate function, 57 // initialisation of most of the arrays here was done in a separate function,
48 // with all the arrays as members of the class... moved them back here. 58 // with all the arrays as members of the class... moved them back here.
49 59
50 size_t frameSize = 2 * yinBufferSize; 60 size_t frameSize = 2 * m_yinBufferSize;
51 61
52 double *audioTransformedReal = new double[frameSize]; 62 double *audioTransformedReal = new double[frameSize];
53 double *audioTransformedImag = new double[frameSize]; 63 double *audioTransformedImag = new double[frameSize];
54 double *nullImag = new double[frameSize]; 64 double *nullImag = new double[frameSize];
55 double *kernel = new double[frameSize]; 65 double *kernel = new double[frameSize];
56 double *kernelTransformedReal = new double[frameSize]; 66 double *kernelTransformedReal = new double[frameSize];
57 double *kernelTransformedImag = new double[frameSize]; 67 double *kernelTransformedImag = new double[frameSize];
58 double *yinStyleACFReal = new double[frameSize]; 68 double *yinStyleACFReal = new double[frameSize];
59 double *yinStyleACFImag = new double[frameSize]; 69 double *yinStyleACFImag = new double[frameSize];
60 double *powerTerms = new double[yinBufferSize]; 70 double *powerTerms = new double[m_yinBufferSize];
61 71
62 for (size_t j = 0; j < yinBufferSize; ++j) 72 for (size_t j = 0; j < m_yinBufferSize; ++j)
63 { 73 {
64 yinBuffer[j] = 0.; // set to zero 74 yinBuffer[j] = 0.; // set to zero
65 powerTerms[j] = 0.; // set to zero 75 powerTerms[j] = 0.; // set to zero
66 } 76 }
67 77
78 } 88 }
79 89
80 // POWER TERM CALCULATION 90 // POWER TERM CALCULATION
81 // ... for the power terms in equation (7) in the Yin paper 91 // ... for the power terms in equation (7) in the Yin paper
82 powerTerms[0] = 0.0; 92 powerTerms[0] = 0.0;
83 for (size_t j = 0; j < yinBufferSize; ++j) { 93 for (size_t j = 0; j < m_yinBufferSize; ++j) {
84 powerTerms[0] += in[j] * in[j]; 94 powerTerms[0] += in[j] * in[j];
85 } 95 }
86 96
87 // now iteratively calculate all others (saves a few multiplications) 97 // now iteratively calculate all others (saves a few multiplications)
88 for (size_t tau = 1; tau < yinBufferSize; ++tau) { 98 for (size_t tau = 1; tau < m_yinBufferSize; ++tau) {
89 powerTerms[tau] = powerTerms[tau-1] - in[tau-1] * in[tau-1] + in[tau+yinBufferSize] * in[tau+yinBufferSize]; 99 powerTerms[tau] = powerTerms[tau-1] - in[tau-1] * in[tau-1] + in[tau+m_yinBufferSize] * in[tau+m_yinBufferSize];
90 } 100 }
91 101
92 // YIN-STYLE AUTOCORRELATION via FFT 102 // YIN-STYLE AUTOCORRELATION via FFT
93 // 1. data 103 // 1. data
94 Vamp::FFT::forward(frameSize, in, nullImag, audioTransformedReal, audioTransformedImag); 104 Vamp::FFT::forward(frameSize, in, nullImag, audioTransformedReal, audioTransformedImag);
95 105
96 // 2. half of the data, disguised as a convolution kernel 106 // 2. half of the data, disguised as a convolution kernel
97 for (size_t j = 0; j < yinBufferSize; ++j) { 107 for (size_t j = 0; j < m_yinBufferSize; ++j) {
98 kernel[j] = in[yinBufferSize-1-j]; 108 kernel[j] = in[m_yinBufferSize-1-j];
99 } 109 }
100 Vamp::FFT::forward(frameSize, kernel, nullImag, kernelTransformedReal, kernelTransformedImag); 110 Vamp::FFT::forward(frameSize, kernel, nullImag, kernelTransformedReal, kernelTransformedImag);
101 111
102 // 3. convolution via complex multiplication -- written into 112 // 3. convolution via complex multiplication -- written into
103 for (size_t j = 0; j < frameSize; ++j) { 113 for (size_t j = 0; j < frameSize; ++j) {
106 } 116 }
107 Vamp::FFT::inverse(frameSize, yinStyleACFReal, yinStyleACFImag, audioTransformedReal, audioTransformedImag); 117 Vamp::FFT::inverse(frameSize, yinStyleACFReal, yinStyleACFImag, audioTransformedReal, audioTransformedImag);
108 118
109 // CALCULATION OF difference function 119 // CALCULATION OF difference function
110 // ... according to (7) in the Yin paper. 120 // ... according to (7) in the Yin paper.
111 for (size_t j = 0; j < yinBufferSize; ++j) { 121 for (size_t j = 0; j < m_yinBufferSize; ++j) {
112 // taking only the real part 122 // taking only the real part
113 yinBuffer[j] = powerTerms[0] + powerTerms[j] - 2 * audioTransformedReal[j+yinBufferSize-1]; 123 yinBuffer[j] = powerTerms[0] + powerTerms[j] - 2 * audioTransformedReal[j+m_yinBufferSize-1];
114 } 124 }
115 delete [] audioTransformedReal; 125 delete [] audioTransformedReal;
116 delete [] audioTransformedImag; 126 delete [] audioTransformedImag;
117 delete [] nullImag; 127 delete [] nullImag;
118 delete [] kernel; 128 delete [] kernel;
122 delete [] yinStyleACFImag; 132 delete [] yinStyleACFImag;
123 delete [] powerTerms; 133 delete [] powerTerms;
124 } 134 }
125 135
126 void 136 void
127 YinUtil::cumulativeDifference(double *yinBuffer, const size_t yinBufferSize) 137 YinUtil::cumulativeDifference(double *yinBuffer)
128 { 138 {
129 size_t tau; 139 size_t tau;
130 140
131 yinBuffer[0] = 1; 141 yinBuffer[0] = 1;
132 142
133 double runningSum = 0; 143 double runningSum = 0;
134 144
135 for (tau = 1; tau < yinBufferSize; ++tau) { 145 for (tau = 1; tau < m_yinBufferSize; ++tau) {
136 runningSum += yinBuffer[tau]; 146 runningSum += yinBuffer[tau];
137 if (runningSum == 0) 147 if (runningSum == 0)
138 { 148 {
139 yinBuffer[tau] = 1; 149 yinBuffer[tau] = 1;
140 } else { 150 } else {
142 } 152 }
143 } 153 }
144 } 154 }
145 155
146 int 156 int
147 YinUtil::absoluteThreshold(const double *yinBuffer, const size_t yinBufferSize, const double thresh) 157 YinUtil::absoluteThreshold(const double *yinBuffer, const double thresh)
148 { 158 {
149 size_t tau; 159 size_t tau;
150 size_t minTau = 0; 160 size_t minTau = 0;
151 double minVal = 1000.; 161 double minVal = 1000.;
152 162
153 // using Joren Six's "loop construct" from TarsosDSP 163 // using Joren Six's "loop construct" from TarsosDSP
154 tau = 2; 164 tau = 2;
155 while (tau < yinBufferSize) 165 while (tau < m_yinBufferSize)
156 { 166 {
157 if (yinBuffer[tau] < thresh) 167 if (yinBuffer[tau] < thresh)
158 { 168 {
159 while (tau+1 < yinBufferSize && yinBuffer[tau+1] < yinBuffer[tau]) 169 while (tau+1 < m_yinBufferSize && yinBuffer[tau+1] < yinBuffer[tau])
160 { 170 {
161 ++tau; 171 ++tau;
162 } 172 }
163 return tau; 173 return tau;
164 } else { 174 } else {
185 static float single10[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; 195 static float single10[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000};
186 static float single15[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; 196 static float single15[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000};
187 static float single20[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; 197 static float single20[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000};
188 198
189 std::vector<double> 199 std::vector<double>
190 YinUtil::yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize, const size_t minTau0, const size_t maxTau0) 200 YinUtil::yinProb(const double *yinBuffer, const size_t prior, const size_t minTau0, const size_t maxTau0)
191 { 201 {
192 size_t minTau = 2; 202 size_t minTau = 2;
193 size_t maxTau = yinBufferSize; 203 size_t maxTau = m_yinBufferSize;
194 204
195 // adapt period range, if necessary 205 // adapt period range, if necessary
196 if (minTau0 > 0 && minTau0 < maxTau0) minTau = minTau0; 206 if (minTau0 > 0 && minTau0 < maxTau0) minTau = minTau0;
197 if (maxTau0 > 0 && maxTau0 < yinBufferSize && maxTau0 > minTau) maxTau = maxTau0; 207 if (maxTau0 > 0 && maxTau0 < m_yinBufferSize && maxTau0 > minTau) maxTau = maxTau0;
198 208
199 double minWeight = 0.01; 209 double minWeight = 0.01;
200 size_t tau; 210 size_t tau;
201 std::vector<float> thresholds; 211 std::vector<float> thresholds;
202 std::vector<float> distribution; 212 std::vector<float> distribution;
203 std::vector<double> peakProb = std::vector<double>(yinBufferSize); 213 std::vector<double> peakProb = std::vector<double>(m_yinBufferSize);
204 214
205 size_t nThreshold = 100; 215 size_t nThreshold = 100;
206 int nThresholdInt = nThreshold; 216 int nThresholdInt = nThreshold;
207 217
208 for (int i = 0; i < nThresholdInt; ++i) 218 for (int i = 0; i < nThresholdInt; ++i)
301 } 311 }
302 } 312 }
303 313
304 if (peakProb[minInd] > 1) { 314 if (peakProb[minInd] > 1) {
305 std::cerr << "WARNING: yin has prob > 1 ??? I'm returning all zeros instead." << std::endl; 315 std::cerr << "WARNING: yin has prob > 1 ??? I'm returning all zeros instead." << std::endl;
306 return(std::vector<double>(yinBufferSize)); 316 return(std::vector<double>(m_yinBufferSize));
307 } 317 }
308 318
309 double nonPeakProb = 1; 319 double nonPeakProb = 1;
310 if (sumProb > 0) { 320 if (sumProb > 0) {
311 for (size_t i = minTau; i < maxTau; ++i) 321 for (size_t i = minTau; i < maxTau; ++i)
322 332
323 return peakProb; 333 return peakProb;
324 } 334 }
325 335
326 double 336 double
327 YinUtil::parabolicInterpolation(const double *yinBuffer, const size_t tau, const size_t yinBufferSize) 337 YinUtil::parabolicInterpolation(const double *yinBuffer, const size_t tau)
328 { 338 {
329 // this is taken almost literally from Joren Six's Java implementation 339 // this is taken almost literally from Joren Six's Java implementation
330 if (tau == yinBufferSize) // not valid anyway. 340 if (tau == m_yinBufferSize) // not valid anyway.
331 { 341 {
332 return static_cast<double>(tau); 342 return static_cast<double>(tau);
333 } 343 }
334 344
335 double betterTau = 0.0; 345 double betterTau = 0.0;
336 if (tau > 0 && tau < yinBufferSize-1) { 346 if (tau > 0 && tau < m_yinBufferSize-1) {
337 float s0, s1, s2; 347 float s0, s1, s2;
338 s0 = yinBuffer[tau-1]; 348 s0 = yinBuffer[tau-1];
339 s1 = yinBuffer[tau]; 349 s1 = yinBuffer[tau];
340 s2 = yinBuffer[tau+1]; 350 s2 = yinBuffer[tau+1];
341 351