Mercurial > hg > pyin
comparison YinUtil.cpp @ 136:7cbf40306c10 vamp-fft-revision
Add state to YinUtil, prepare to use the Vamp FFT (but don't actually use it yet)
author | Chris Cannam |
---|---|
date | Fri, 19 Aug 2016 12:00:13 +0100 |
parents | 1629209f5bf2 |
children | 109c3a2ad930 |
comparison
equal
deleted
inserted
replaced
135:2c73618b4067 | 136:7cbf40306c10 |
---|---|
19 #include <cmath> | 19 #include <cmath> |
20 #include <algorithm> | 20 #include <algorithm> |
21 | 21 |
22 #include <boost/math/distributions.hpp> | 22 #include <boost/math/distributions.hpp> |
23 | 23 |
24 YinUtil::YinUtil(size_t yinBufferSize) : | |
25 m_yinBufferSize(yinBufferSize), | |
26 m_fft(yinBufferSize * 2) | |
27 { | |
28 } | |
29 | |
30 YinUtil::~YinUtil() | |
31 { | |
32 } | |
33 | |
24 void | 34 void |
25 YinUtil::slowDifference(const double *in, double *yinBuffer, const size_t yinBufferSize) | 35 YinUtil::slowDifference(const double *in, double *yinBuffer) |
26 { | 36 { |
27 yinBuffer[0] = 0; | 37 yinBuffer[0] = 0; |
28 double delta ; | 38 double delta ; |
29 int startPoint = 0; | 39 int startPoint = 0; |
30 int endPoint = 0; | 40 int endPoint = 0; |
31 for (int i = 1; i < yinBufferSize; ++i) { | 41 for (int i = 1; i < m_yinBufferSize; ++i) { |
32 yinBuffer[i] = 0; | 42 yinBuffer[i] = 0; |
33 startPoint = yinBufferSize/2 - i/2; | 43 startPoint = m_yinBufferSize/2 - i/2; |
34 endPoint = startPoint + yinBufferSize; | 44 endPoint = startPoint + m_yinBufferSize; |
35 for (int j = startPoint; j < endPoint; ++j) { | 45 for (int j = startPoint; j < endPoint; ++j) { |
36 delta = in[i+j] - in[j]; | 46 delta = in[i+j] - in[j]; |
37 yinBuffer[i] += delta * delta; | 47 yinBuffer[i] += delta * delta; |
38 } | 48 } |
39 } | 49 } |
40 } | 50 } |
41 | 51 |
42 void | 52 void |
43 YinUtil::fastDifference(const double *in, double *yinBuffer, const size_t yinBufferSize) | 53 YinUtil::fastDifference(const double *in, double *yinBuffer) |
44 { | 54 { |
45 | 55 |
46 // DECLARE AND INITIALISE | 56 // DECLARE AND INITIALISE |
47 // initialisation of most of the arrays here was done in a separate function, | 57 // initialisation of most of the arrays here was done in a separate function, |
48 // with all the arrays as members of the class... moved them back here. | 58 // with all the arrays as members of the class... moved them back here. |
49 | 59 |
50 size_t frameSize = 2 * yinBufferSize; | 60 size_t frameSize = 2 * m_yinBufferSize; |
51 | 61 |
52 double *audioTransformedReal = new double[frameSize]; | 62 double *audioTransformedReal = new double[frameSize]; |
53 double *audioTransformedImag = new double[frameSize]; | 63 double *audioTransformedImag = new double[frameSize]; |
54 double *nullImag = new double[frameSize]; | 64 double *nullImag = new double[frameSize]; |
55 double *kernel = new double[frameSize]; | 65 double *kernel = new double[frameSize]; |
56 double *kernelTransformedReal = new double[frameSize]; | 66 double *kernelTransformedReal = new double[frameSize]; |
57 double *kernelTransformedImag = new double[frameSize]; | 67 double *kernelTransformedImag = new double[frameSize]; |
58 double *yinStyleACFReal = new double[frameSize]; | 68 double *yinStyleACFReal = new double[frameSize]; |
59 double *yinStyleACFImag = new double[frameSize]; | 69 double *yinStyleACFImag = new double[frameSize]; |
60 double *powerTerms = new double[yinBufferSize]; | 70 double *powerTerms = new double[m_yinBufferSize]; |
61 | 71 |
62 for (size_t j = 0; j < yinBufferSize; ++j) | 72 for (size_t j = 0; j < m_yinBufferSize; ++j) |
63 { | 73 { |
64 yinBuffer[j] = 0.; // set to zero | 74 yinBuffer[j] = 0.; // set to zero |
65 powerTerms[j] = 0.; // set to zero | 75 powerTerms[j] = 0.; // set to zero |
66 } | 76 } |
67 | 77 |
78 } | 88 } |
79 | 89 |
80 // POWER TERM CALCULATION | 90 // POWER TERM CALCULATION |
81 // ... for the power terms in equation (7) in the Yin paper | 91 // ... for the power terms in equation (7) in the Yin paper |
82 powerTerms[0] = 0.0; | 92 powerTerms[0] = 0.0; |
83 for (size_t j = 0; j < yinBufferSize; ++j) { | 93 for (size_t j = 0; j < m_yinBufferSize; ++j) { |
84 powerTerms[0] += in[j] * in[j]; | 94 powerTerms[0] += in[j] * in[j]; |
85 } | 95 } |
86 | 96 |
87 // now iteratively calculate all others (saves a few multiplications) | 97 // now iteratively calculate all others (saves a few multiplications) |
88 for (size_t tau = 1; tau < yinBufferSize; ++tau) { | 98 for (size_t tau = 1; tau < m_yinBufferSize; ++tau) { |
89 powerTerms[tau] = powerTerms[tau-1] - in[tau-1] * in[tau-1] + in[tau+yinBufferSize] * in[tau+yinBufferSize]; | 99 powerTerms[tau] = powerTerms[tau-1] - in[tau-1] * in[tau-1] + in[tau+m_yinBufferSize] * in[tau+m_yinBufferSize]; |
90 } | 100 } |
91 | 101 |
92 // YIN-STYLE AUTOCORRELATION via FFT | 102 // YIN-STYLE AUTOCORRELATION via FFT |
93 // 1. data | 103 // 1. data |
94 Vamp::FFT::forward(frameSize, in, nullImag, audioTransformedReal, audioTransformedImag); | 104 Vamp::FFT::forward(frameSize, in, nullImag, audioTransformedReal, audioTransformedImag); |
95 | 105 |
96 // 2. half of the data, disguised as a convolution kernel | 106 // 2. half of the data, disguised as a convolution kernel |
97 for (size_t j = 0; j < yinBufferSize; ++j) { | 107 for (size_t j = 0; j < m_yinBufferSize; ++j) { |
98 kernel[j] = in[yinBufferSize-1-j]; | 108 kernel[j] = in[m_yinBufferSize-1-j]; |
99 } | 109 } |
100 Vamp::FFT::forward(frameSize, kernel, nullImag, kernelTransformedReal, kernelTransformedImag); | 110 Vamp::FFT::forward(frameSize, kernel, nullImag, kernelTransformedReal, kernelTransformedImag); |
101 | 111 |
102 // 3. convolution via complex multiplication -- written into | 112 // 3. convolution via complex multiplication -- written into |
103 for (size_t j = 0; j < frameSize; ++j) { | 113 for (size_t j = 0; j < frameSize; ++j) { |
106 } | 116 } |
107 Vamp::FFT::inverse(frameSize, yinStyleACFReal, yinStyleACFImag, audioTransformedReal, audioTransformedImag); | 117 Vamp::FFT::inverse(frameSize, yinStyleACFReal, yinStyleACFImag, audioTransformedReal, audioTransformedImag); |
108 | 118 |
109 // CALCULATION OF difference function | 119 // CALCULATION OF difference function |
110 // ... according to (7) in the Yin paper. | 120 // ... according to (7) in the Yin paper. |
111 for (size_t j = 0; j < yinBufferSize; ++j) { | 121 for (size_t j = 0; j < m_yinBufferSize; ++j) { |
112 // taking only the real part | 122 // taking only the real part |
113 yinBuffer[j] = powerTerms[0] + powerTerms[j] - 2 * audioTransformedReal[j+yinBufferSize-1]; | 123 yinBuffer[j] = powerTerms[0] + powerTerms[j] - 2 * audioTransformedReal[j+m_yinBufferSize-1]; |
114 } | 124 } |
115 delete [] audioTransformedReal; | 125 delete [] audioTransformedReal; |
116 delete [] audioTransformedImag; | 126 delete [] audioTransformedImag; |
117 delete [] nullImag; | 127 delete [] nullImag; |
118 delete [] kernel; | 128 delete [] kernel; |
122 delete [] yinStyleACFImag; | 132 delete [] yinStyleACFImag; |
123 delete [] powerTerms; | 133 delete [] powerTerms; |
124 } | 134 } |
125 | 135 |
126 void | 136 void |
127 YinUtil::cumulativeDifference(double *yinBuffer, const size_t yinBufferSize) | 137 YinUtil::cumulativeDifference(double *yinBuffer) |
128 { | 138 { |
129 size_t tau; | 139 size_t tau; |
130 | 140 |
131 yinBuffer[0] = 1; | 141 yinBuffer[0] = 1; |
132 | 142 |
133 double runningSum = 0; | 143 double runningSum = 0; |
134 | 144 |
135 for (tau = 1; tau < yinBufferSize; ++tau) { | 145 for (tau = 1; tau < m_yinBufferSize; ++tau) { |
136 runningSum += yinBuffer[tau]; | 146 runningSum += yinBuffer[tau]; |
137 if (runningSum == 0) | 147 if (runningSum == 0) |
138 { | 148 { |
139 yinBuffer[tau] = 1; | 149 yinBuffer[tau] = 1; |
140 } else { | 150 } else { |
142 } | 152 } |
143 } | 153 } |
144 } | 154 } |
145 | 155 |
146 int | 156 int |
147 YinUtil::absoluteThreshold(const double *yinBuffer, const size_t yinBufferSize, const double thresh) | 157 YinUtil::absoluteThreshold(const double *yinBuffer, const double thresh) |
148 { | 158 { |
149 size_t tau; | 159 size_t tau; |
150 size_t minTau = 0; | 160 size_t minTau = 0; |
151 double minVal = 1000.; | 161 double minVal = 1000.; |
152 | 162 |
153 // using Joren Six's "loop construct" from TarsosDSP | 163 // using Joren Six's "loop construct" from TarsosDSP |
154 tau = 2; | 164 tau = 2; |
155 while (tau < yinBufferSize) | 165 while (tau < m_yinBufferSize) |
156 { | 166 { |
157 if (yinBuffer[tau] < thresh) | 167 if (yinBuffer[tau] < thresh) |
158 { | 168 { |
159 while (tau+1 < yinBufferSize && yinBuffer[tau+1] < yinBuffer[tau]) | 169 while (tau+1 < m_yinBufferSize && yinBuffer[tau+1] < yinBuffer[tau]) |
160 { | 170 { |
161 ++tau; | 171 ++tau; |
162 } | 172 } |
163 return tau; | 173 return tau; |
164 } else { | 174 } else { |
185 static float single10[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; | 195 static float single10[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; |
186 static float single15[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; | 196 static float single15[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; |
187 static float single20[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; | 197 static float single20[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000}; |
188 | 198 |
189 std::vector<double> | 199 std::vector<double> |
190 YinUtil::yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize, const size_t minTau0, const size_t maxTau0) | 200 YinUtil::yinProb(const double *yinBuffer, const size_t prior, const size_t minTau0, const size_t maxTau0) |
191 { | 201 { |
192 size_t minTau = 2; | 202 size_t minTau = 2; |
193 size_t maxTau = yinBufferSize; | 203 size_t maxTau = m_yinBufferSize; |
194 | 204 |
195 // adapt period range, if necessary | 205 // adapt period range, if necessary |
196 if (minTau0 > 0 && minTau0 < maxTau0) minTau = minTau0; | 206 if (minTau0 > 0 && minTau0 < maxTau0) minTau = minTau0; |
197 if (maxTau0 > 0 && maxTau0 < yinBufferSize && maxTau0 > minTau) maxTau = maxTau0; | 207 if (maxTau0 > 0 && maxTau0 < m_yinBufferSize && maxTau0 > minTau) maxTau = maxTau0; |
198 | 208 |
199 double minWeight = 0.01; | 209 double minWeight = 0.01; |
200 size_t tau; | 210 size_t tau; |
201 std::vector<float> thresholds; | 211 std::vector<float> thresholds; |
202 std::vector<float> distribution; | 212 std::vector<float> distribution; |
203 std::vector<double> peakProb = std::vector<double>(yinBufferSize); | 213 std::vector<double> peakProb = std::vector<double>(m_yinBufferSize); |
204 | 214 |
205 size_t nThreshold = 100; | 215 size_t nThreshold = 100; |
206 int nThresholdInt = nThreshold; | 216 int nThresholdInt = nThreshold; |
207 | 217 |
208 for (int i = 0; i < nThresholdInt; ++i) | 218 for (int i = 0; i < nThresholdInt; ++i) |
301 } | 311 } |
302 } | 312 } |
303 | 313 |
304 if (peakProb[minInd] > 1) { | 314 if (peakProb[minInd] > 1) { |
305 std::cerr << "WARNING: yin has prob > 1 ??? I'm returning all zeros instead." << std::endl; | 315 std::cerr << "WARNING: yin has prob > 1 ??? I'm returning all zeros instead." << std::endl; |
306 return(std::vector<double>(yinBufferSize)); | 316 return(std::vector<double>(m_yinBufferSize)); |
307 } | 317 } |
308 | 318 |
309 double nonPeakProb = 1; | 319 double nonPeakProb = 1; |
310 if (sumProb > 0) { | 320 if (sumProb > 0) { |
311 for (size_t i = minTau; i < maxTau; ++i) | 321 for (size_t i = minTau; i < maxTau; ++i) |
322 | 332 |
323 return peakProb; | 333 return peakProb; |
324 } | 334 } |
325 | 335 |
326 double | 336 double |
327 YinUtil::parabolicInterpolation(const double *yinBuffer, const size_t tau, const size_t yinBufferSize) | 337 YinUtil::parabolicInterpolation(const double *yinBuffer, const size_t tau) |
328 { | 338 { |
329 // this is taken almost literally from Joren Six's Java implementation | 339 // this is taken almost literally from Joren Six's Java implementation |
330 if (tau == yinBufferSize) // not valid anyway. | 340 if (tau == m_yinBufferSize) // not valid anyway. |
331 { | 341 { |
332 return static_cast<double>(tau); | 342 return static_cast<double>(tau); |
333 } | 343 } |
334 | 344 |
335 double betterTau = 0.0; | 345 double betterTau = 0.0; |
336 if (tau > 0 && tau < yinBufferSize-1) { | 346 if (tau > 0 && tau < m_yinBufferSize-1) { |
337 float s0, s1, s2; | 347 float s0, s1, s2; |
338 s0 = yinBuffer[tau-1]; | 348 s0 = yinBuffer[tau-1]; |
339 s1 = yinBuffer[tau]; | 349 s1 = yinBuffer[tau]; |
340 s2 = yinBuffer[tau+1]; | 350 s2 = yinBuffer[tau+1]; |
341 | 351 |