d@0
|
1 /*
|
d@0
|
2 ==============================================================================
|
d@0
|
3
|
d@0
|
4 AudioSourceFeatureExtractor.cpp
|
d@0
|
5 Created: 11 Aug 2014 10:41:02am
|
d@0
|
6 Author: david.ronan
|
d@0
|
7
|
d@0
|
8 ==============================================================================
|
d@0
|
9 */
|
d@0
|
10
|
d@0
|
11 #include "AudioSourceFeatureExtractor.h"
|
d@0
|
12 #include "ObservationData.h"
|
d@0
|
13 #include <math.h>
|
d@0
|
14 #include <cmath>
|
d@0
|
15 #include <algorithm>
|
d@0
|
16 #include <string>
|
d@0
|
17 #include <windows.h>
|
d@0
|
18
|
d@0
|
19 //============================================================================== AudioSourceFeatureExtractor
|
d@0
|
20 AudioSourceFeatureExtractor::AudioSourceFeatureExtractor() // m_fft(FFTSIZE)
|
d@0
|
21 {
|
d@0
|
22 m_fft = new FFTW(FFTSIZE);
|
d@0
|
23 m_fInputBuffer = new float[FFTSIZE];
|
d@0
|
24 m_fMagnitudeSpectrum = new float[MAGNITUDESIZE];
|
d@0
|
25 m_fPreviousMagnitudeSpectrum = new float[MAGNITUDESIZE];
|
d@0
|
26 m_OutReal = new float[FFTSIZE];
|
d@0
|
27 m_OutImag = new float[FFTSIZE];
|
d@0
|
28
|
d@0
|
29 memset(m_fInputBuffer, 0, FFTSIZE * sizeof(float));
|
d@0
|
30 memset(m_fMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float));
|
d@0
|
31 memset(m_fPreviousMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float));
|
d@0
|
32
|
d@0
|
33
|
d@0
|
34 }
|
d@0
|
35
|
d@0
|
36 //============================================================================== ~AudioSourceFeatureExtractor
|
d@0
|
37 AudioSourceFeatureExtractor::~AudioSourceFeatureExtractor()
|
d@0
|
38 {
|
d@0
|
39 if(m_fInputBuffer != NULL)
|
d@0
|
40 {
|
d@0
|
41 delete m_fInputBuffer; // memory freed up
|
d@0
|
42 m_fInputBuffer = nullptr;
|
d@0
|
43 }
|
d@0
|
44
|
d@0
|
45 if(m_fMagnitudeSpectrum != NULL)
|
d@0
|
46 {
|
d@0
|
47 delete m_fMagnitudeSpectrum; // memory freed up
|
d@0
|
48 m_fMagnitudeSpectrum = nullptr;
|
d@0
|
49 }
|
d@0
|
50
|
d@0
|
51 if(m_fPreviousMagnitudeSpectrum != NULL)
|
d@0
|
52 {
|
d@0
|
53 delete m_fPreviousMagnitudeSpectrum; // memory freed up
|
d@0
|
54 m_fPreviousMagnitudeSpectrum = nullptr;
|
d@0
|
55 }
|
d@0
|
56
|
d@0
|
57 if (m_OutReal != NULL)
|
d@0
|
58 {
|
d@0
|
59 delete[] m_OutReal;
|
d@0
|
60 m_OutReal = nullptr;
|
d@0
|
61 }
|
d@0
|
62
|
d@0
|
63 if (m_OutImag != NULL)
|
d@0
|
64 {
|
d@0
|
65 delete[] m_OutImag;
|
d@0
|
66 m_OutImag = nullptr;
|
d@0
|
67 }
|
d@0
|
68
|
d@0
|
69 if (m_fft != NULL)
|
d@0
|
70 {
|
d@0
|
71 delete m_fft;
|
d@0
|
72 m_fft = nullptr;
|
d@0
|
73 }
|
d@0
|
74
|
d@0
|
75 }
|
d@0
|
76
|
d@0
|
77 //============================================================================== init
|
d@0
|
78 void AudioSourceFeatureExtractor::Initialise(float fSampleRate)
|
d@0
|
79 {
|
d@0
|
80 m_fSampleRate = fSampleRate;
|
d@0
|
81 m_iWriteIdx = 0;
|
d@0
|
82
|
d@0
|
83 for (int i = 1; i <= MAGNITUDESIZE; i++)
|
d@0
|
84 {
|
d@0
|
85 float win = ((fSampleRate / (2 * MAGNITUDESIZE)) * i);
|
d@0
|
86 m_fFreqBins.push_back(win);
|
d@0
|
87 }
|
d@0
|
88
|
d@0
|
89 m_MFCC.initMFFCvariables(12, MAGNITUDESIZE, m_fSampleRate);
|
d@0
|
90
|
d@0
|
91
|
d@0
|
92 }
|
d@0
|
93
|
d@0
|
94 //============================================================================== process
|
d@0
|
95 std::vector<ObservationData> AudioSourceFeatureExtractor::Process( const float* data, size_t numSamples)
|
d@0
|
96 {
|
d@0
|
97 std::vector<ObservationData> observations = std::vector<ObservationData>();
|
d@0
|
98
|
d@0
|
99 memset(m_fInputBuffer, 0, FFTSIZE * sizeof(float));
|
d@0
|
100 memset(m_fMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float));
|
d@0
|
101 memset(m_fPreviousMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float));
|
d@0
|
102 m_iWriteIdx = 0;
|
d@0
|
103
|
d@0
|
104 float perdiodicity = EstimatePerdiodicity(const_cast<float*>(data), numSamples);
|
d@0
|
105
|
d@0
|
106 float entropyofenergy = EntropyOfEnergy(const_cast<float*>(data), numSamples);
|
d@0
|
107
|
d@0
|
108 for(size_t n = 0; n < numSamples; n++)
|
d@0
|
109 {
|
d@0
|
110 m_fInputBuffer[m_iWriteIdx] = data[n];
|
d@0
|
111 m_iWriteIdx++;
|
d@0
|
112
|
d@0
|
113 //Do normal FFT
|
d@0
|
114
|
d@0
|
115 if(FFTSIZE == m_iWriteIdx)
|
d@0
|
116 {
|
d@0
|
117 //Get the peak amplitude rms and crest factor for the current frame
|
d@0
|
118 float peakvalue = 0;
|
d@0
|
119 float rms = 0;
|
d@0
|
120 float crestfactor = 0;
|
d@0
|
121
|
d@0
|
122 for(int i=0; i < FFTSIZE; i++)
|
d@0
|
123 {
|
d@0
|
124 if(m_fInputBuffer[i] > peakvalue)
|
d@0
|
125 peakvalue = abs(m_fInputBuffer[i]);
|
d@0
|
126
|
d@0
|
127 rms += (m_fInputBuffer[i] * m_fInputBuffer[i]);
|
d@0
|
128 }
|
d@0
|
129
|
d@0
|
130 rms /= FFTSIZE;
|
d@0
|
131 rms = sqrt(rms);
|
d@0
|
132
|
d@0
|
133 crestfactor = peakvalue/(rms + std::numeric_limits<float>::epsilon());
|
d@0
|
134
|
d@0
|
135 float ZCR = 0;
|
d@0
|
136 float inputBuffer2[FFTSIZE] = {0.0};
|
d@0
|
137
|
d@0
|
138 for (int i = 1; i < FFTSIZE; i++)
|
d@0
|
139 {
|
d@0
|
140 inputBuffer2[i] = m_fInputBuffer[i -1];
|
d@0
|
141 }
|
d@0
|
142
|
d@0
|
143 for (int i = 0; i < FFTSIZE; i++)
|
d@0
|
144 {
|
d@0
|
145 ZCR += (abs(Sign(m_fInputBuffer[i]) - Sign(inputBuffer2[i])));
|
d@0
|
146 }
|
d@0
|
147
|
d@0
|
148 ZCR /= (2 * FFTSIZE);
|
d@0
|
149
|
d@0
|
150 //////////////////////////////////////////////////////////////////
|
d@0
|
151 for (int i = 0; i < FFTSIZE; i++)
|
d@0
|
152 {
|
d@0
|
153 float multiplier = (float) (0.5 * (1 - cos(2*PI*i/(FFTSIZE -1))));
|
d@0
|
154 m_fInputBuffer[i] = multiplier * m_fInputBuffer[i];
|
d@0
|
155 }
|
d@0
|
156
|
d@0
|
157 //Compute the FFT
|
d@0
|
158
|
d@0
|
159 memset(m_OutReal, 0, sizeof(float)*FFTSIZE);
|
d@0
|
160 memset(m_OutImag, 0, sizeof(float)*FFTSIZE);
|
d@0
|
161
|
d@0
|
162 m_fft->process(m_fInputBuffer, m_OutReal, m_OutImag);
|
d@0
|
163
|
d@0
|
164 //Compute the Magnitude
|
d@0
|
165 VectorDistance(m_OutReal, 1, m_OutImag, 1, m_fMagnitudeSpectrum, 1, MAGNITUDESIZE);
|
d@0
|
166
|
d@0
|
167 //Compute the spectral features
|
d@0
|
168 float centroid = 0;
|
d@0
|
169 float spread = 0;
|
d@0
|
170 float skewness = 0;
|
d@0
|
171 float kurtosis = 0;
|
d@0
|
172 float brightness = 0;
|
d@0
|
173 float rolloff85 = 0;
|
d@0
|
174 float rolloff95 = 0;
|
d@0
|
175 float spectralentropy = 0;
|
d@0
|
176 float flatness = 0;
|
d@0
|
177 float spectralcf = 0;
|
d@0
|
178 float spectralflux = 0;
|
d@0
|
179 std::vector<float> mfccs;
|
d@0
|
180
|
d@0
|
181 SpectralFeatures(m_fMagnitudeSpectrum, m_fPreviousMagnitudeSpectrum, MAGNITUDESIZE, centroid, spread, skewness, kurtosis, brightness, rolloff85, rolloff95, spectralentropy, flatness, spectralcf, spectralflux);
|
d@0
|
182
|
d@0
|
183 m_MFCC.ComputeMFCC(m_fMagnitudeSpectrum, mfccs);
|
d@0
|
184
|
d@0
|
185 //Update for Spectral Flux
|
d@0
|
186 for(int i = 0; i < MAGNITUDESIZE; i++)
|
d@0
|
187 {
|
d@0
|
188 m_fPreviousMagnitudeSpectrum[i] = m_fMagnitudeSpectrum[i];
|
d@0
|
189 }
|
d@0
|
190
|
d@0
|
191 //50% Hop size
|
d@0
|
192 for(int i = MAGNITUDESIZE; i < FFTSIZE; i++)
|
d@0
|
193 {
|
d@0
|
194 m_fInputBuffer[i - MAGNITUDESIZE] = m_fInputBuffer[i];
|
d@0
|
195 m_iWriteIdx = MAGNITUDESIZE;
|
d@0
|
196 }
|
d@0
|
197
|
d@0
|
198 //create an observation for this window and push it to the list of observations for this 30 sec. audio clip
|
d@0
|
199 ObservationData newObservation = ObservationData(rms, peakvalue, crestfactor, ZCR, centroid, spread, skewness, kurtosis, brightness, rolloff85, rolloff95, spectralentropy, flatness, spectralcf, spectralflux, mfccs, perdiodicity, entropyofenergy);
|
d@0
|
200 observations.push_back(newObservation);
|
d@0
|
201
|
d@0
|
202 }
|
d@0
|
203 }
|
d@0
|
204
|
d@0
|
205 return observations;
|
d@0
|
206 }
|
d@0
|
207
|
d@0
|
208 void AudioSourceFeatureExtractor::Finalize()
|
d@0
|
209 {
|
d@0
|
210 m_MFCC.freeMFCCmemory();
|
d@0
|
211 }
|
d@0
|
212
|
d@0
|
213 void AudioSourceFeatureExtractor::VectorDistance(const float* vIn1, int stride1, const float* vIn2, int stride2, float* &vOut, int strideOut, size_t nElements)
|
d@0
|
214 {
|
d@0
|
215 //Compute remaining samples
|
d@0
|
216 for (size_t i = 0; i < nElements; i++)
|
d@0
|
217 {
|
d@0
|
218 vOut[i* strideOut] = sqrt(vIn1[i*stride1] * vIn1[i*stride1] + vIn2[i*stride2] * vIn2[i*stride2]);
|
d@0
|
219 }
|
d@0
|
220 }
|
d@0
|
221
|
d@0
|
222 void AudioSourceFeatureExtractor::SpectralFeatures(float* &magnitude, float* &prevmagnitude, size_t windowSize, float ¢roid, float &spread, float &skew, float &kurtosis, float &brightness, float &rolloff95, float &rolloff85, float &spectralentropy, float &flatness, float &spectralcf, float &spectralflux)
|
d@0
|
223 {
|
d@0
|
224 float summagbin = 0;
|
d@0
|
225 float summag = 0;
|
d@0
|
226 float sumprevmag = 0;
|
d@0
|
227 float summagspread = 0;
|
d@0
|
228 float summagskewness = 0;
|
d@0
|
229 float summagkurtosis = 0;
|
d@0
|
230 float sumbrightness = 0;
|
d@0
|
231 float maxmag = 0;
|
d@0
|
232
|
d@0
|
233 float geo = 1;
|
d@0
|
234 float totalenergy = 0;
|
d@0
|
235 float currEnergy95 = 0;
|
d@0
|
236 float currEnergy85 = 0;
|
d@0
|
237 int countFFT85 = 0;
|
d@0
|
238 int countFFT95 = 0;
|
d@0
|
239
|
d@0
|
240 float normalisedMagSumEntropy = 0;
|
d@0
|
241
|
d@0
|
242 //Centroid
|
d@0
|
243 //////////////////////////////////////////////////////
|
d@0
|
244
|
d@0
|
245 for (size_t i = 0; i < windowSize; i++)
|
d@0
|
246 {
|
d@0
|
247 //Moments
|
d@0
|
248 summagbin += (m_fFreqBins[i] * magnitude[i]);
|
d@0
|
249 summag += magnitude[i];
|
d@0
|
250 sumprevmag += prevmagnitude[i];
|
d@0
|
251
|
d@0
|
252 //Spectral Crest factor
|
d@0
|
253 if(magnitude[i] > maxmag)
|
d@0
|
254 {
|
d@0
|
255 maxmag = magnitude[i];
|
d@0
|
256 }
|
d@0
|
257
|
d@0
|
258 float energy = magnitude[i] * magnitude[i];
|
d@0
|
259
|
d@0
|
260 //Roll off + Flatness
|
d@0
|
261
|
d@0
|
262 totalenergy += energy;
|
d@0
|
263 float exponent = (float)(1.0/(float)windowSize);
|
d@0
|
264
|
d@0
|
265 if(energy != 0)
|
d@0
|
266 {
|
d@0
|
267 geo *= pow(energy, exponent);
|
d@0
|
268 }
|
d@0
|
269 }
|
d@0
|
270
|
d@0
|
271 summag += std::numeric_limits<float>::epsilon();
|
d@0
|
272
|
d@0
|
273 centroid = summagbin / summag;
|
d@0
|
274
|
d@0
|
275 /////////////////////////////////////////////////////////
|
d@0
|
276
|
d@0
|
277 //Spread
|
d@0
|
278 //////////////////////////////////////////////////////
|
d@0
|
279
|
d@0
|
280 for (size_t i = 0; i < windowSize; i++)
|
d@0
|
281 {
|
d@0
|
282 float norm = magnitude[i] / (summag + std::numeric_limits<float>::epsilon());
|
d@0
|
283 float normprev = prevmagnitude[i] / (sumprevmag + std::numeric_limits<float>::epsilon());
|
d@0
|
284
|
d@0
|
285 //Spectral Flux
|
d@0
|
286 spectralflux += ((norm - normprev) * (norm - normprev));
|
d@0
|
287
|
d@0
|
288 //Entropy
|
d@0
|
289 normalisedMagSumEntropy += (norm * log(norm + std::numeric_limits<float>::epsilon()));
|
d@0
|
290
|
d@0
|
291 //Moments
|
d@0
|
292 summagspread += pow((m_fFreqBins[i] - centroid), 2) * norm;
|
d@0
|
293 summagskewness += pow((m_fFreqBins[i] - centroid), 3) * norm;
|
d@0
|
294 summagkurtosis += pow((m_fFreqBins[i] - centroid), 4) * norm;
|
d@0
|
295
|
d@0
|
296 //Brightness
|
d@0
|
297 if (m_fFreqBins[i] >= 1500)
|
d@0
|
298 {
|
d@0
|
299 sumbrightness += magnitude[i];
|
d@0
|
300 }
|
d@0
|
301 }
|
d@0
|
302
|
d@0
|
303 spread = sqrt(summagspread);
|
d@0
|
304
|
d@0
|
305 /////////////////////////////////////////////////////////
|
d@0
|
306
|
d@0
|
307 //Skewness
|
d@0
|
308 //////////////////////////////////////////////////////
|
d@0
|
309
|
d@0
|
310 float spreadtemp = spread;
|
d@0
|
311 spreadtemp += std::numeric_limits<float>::epsilon();
|
d@0
|
312
|
d@0
|
313 skew = summagskewness / pow(spreadtemp, 3);
|
d@0
|
314
|
d@0
|
315 /////////////////////////////////////////////////////////
|
d@0
|
316
|
d@0
|
317 //Kurtosis
|
d@0
|
318 //////////////////////////////////////////////////////
|
d@0
|
319
|
d@0
|
320 kurtosis = summagkurtosis / pow(spreadtemp, 4);
|
d@0
|
321
|
d@0
|
322 /////////////////////////////////////////////////////////
|
d@0
|
323
|
d@0
|
324 //Brightness
|
d@0
|
325 //////////////////////////////////////////////////////
|
d@0
|
326
|
d@0
|
327 brightness = sumbrightness / summag;
|
d@0
|
328
|
d@0
|
329 /////////////////////////////////////////////////////////
|
d@0
|
330
|
d@0
|
331 //Roll off 85% - 95%
|
d@0
|
332 /////////////////////////////////////////////////////////
|
d@0
|
333
|
d@0
|
334 while ((currEnergy95 <= 0.95 * totalenergy) && (countFFT95 < (int)windowSize))
|
d@0
|
335 {
|
d@0
|
336 currEnergy95 += (magnitude[countFFT95] * magnitude[countFFT95]);
|
d@0
|
337 countFFT95 += 1;
|
d@0
|
338
|
d@0
|
339 if (currEnergy85 <= 0.85 * totalenergy)
|
d@0
|
340 {
|
d@0
|
341 currEnergy85 += (magnitude[countFFT85] * magnitude[countFFT85]);
|
d@0
|
342 countFFT85 += 1;
|
d@0
|
343 }
|
d@0
|
344
|
d@0
|
345 }
|
d@0
|
346
|
d@0
|
347 countFFT85 -= 1;
|
d@0
|
348 countFFT95 -= 1;
|
d@0
|
349
|
d@0
|
350 //Normalization
|
d@0
|
351 if (currEnergy85 <= 0)
|
d@0
|
352 {
|
d@0
|
353 //If we have silence then our value should be 0.
|
d@0
|
354 rolloff85 = 0;
|
d@0
|
355 }
|
d@0
|
356 else
|
d@0
|
357 {
|
d@0
|
358 rolloff85 = (((float)countFFT85) / (float)windowSize);
|
d@0
|
359 }
|
d@0
|
360
|
d@0
|
361 //Normalization
|
d@0
|
362 if (currEnergy95 <= 0)
|
d@0
|
363 {
|
d@0
|
364 //If we have silence then our value should be 0.
|
d@0
|
365 rolloff95 = 0;
|
d@0
|
366 }
|
d@0
|
367 else
|
d@0
|
368 {
|
d@0
|
369 rolloff95 = (((float)countFFT95) / (float)windowSize);
|
d@0
|
370 }
|
d@0
|
371
|
d@0
|
372 /////////////////////////////////////////////////////
|
d@0
|
373
|
d@0
|
374 //Spectral Entropy
|
d@0
|
375 /////////////////////////////////////////////////////////
|
d@0
|
376
|
d@0
|
377 spectralentropy = (float) ((normalisedMagSumEntropy / log((float)windowSize)) * - 1);
|
d@0
|
378
|
d@0
|
379 /////////////////////////////////////////////////////////
|
d@0
|
380
|
d@0
|
381 /////////////////////////////////////////////////////
|
d@0
|
382
|
d@0
|
383 //Flatness
|
d@0
|
384 /////////////////////////////////////////////////////////
|
d@0
|
385 if(totalenergy != 0 && geo != 0)
|
d@0
|
386 {
|
d@0
|
387 flatness = geo / (totalenergy / (float)windowSize);
|
d@0
|
388 }
|
d@0
|
389 else
|
d@0
|
390 {
|
d@0
|
391 flatness = 0;
|
d@0
|
392 }
|
d@0
|
393
|
d@0
|
394 /////////////////////////////////////////////////////////
|
d@0
|
395
|
d@0
|
396 //Spectral Crest Factor
|
d@0
|
397 /////////////////////////////////////////////////////////
|
d@0
|
398
|
d@0
|
399 if(summag != 0)
|
d@0
|
400 {
|
d@0
|
401 spectralcf = maxmag / summag;
|
d@0
|
402 }
|
d@0
|
403 else
|
d@0
|
404 {
|
d@0
|
405 spectralcf = 0;
|
d@0
|
406 }
|
d@0
|
407 }
|
d@0
|
408
|
d@0
|
409 void AudioSourceFeatureExtractor::ConvolveFunction(float* &z, float *x, float *y, size_t &lenz, size_t lenx, size_t leny)
|
d@0
|
410 {
|
d@0
|
411 // computes cross-correlation of two vectors, takes 2 vectors of the same length and computes 2*length-1 lags
|
d@0
|
412
|
d@0
|
413 float *zptr,s,*xp,*yp;
|
d@0
|
414 size_t n_lo, n_hi;
|
d@0
|
415
|
d@0
|
416 lenz = lenx + leny - 1;
|
d@0
|
417
|
d@0
|
418 z = new float[lenz];
|
d@0
|
419
|
d@0
|
420 zptr = z;
|
d@0
|
421
|
d@0
|
422 for (size_t i = 0; i < lenz; i++)
|
d@0
|
423 {
|
d@0
|
424 s = 0.0;
|
d@0
|
425 n_lo = 0 > (i - leny + 1 ) ? 0 : (i - leny + 1);
|
d@0
|
426 n_hi = (lenx - 1) < i ? (lenx - 1): i;
|
d@0
|
427 xp = &x[0] + n_lo;
|
d@0
|
428 yp = &y[0] + i - n_lo;
|
d@0
|
429
|
d@0
|
430 for (size_t n = n_lo; n <= n_hi; n++)
|
d@0
|
431 {
|
d@0
|
432 s += *xp * *yp;
|
d@0
|
433 xp++;
|
d@0
|
434 yp--;
|
d@0
|
435 }
|
d@0
|
436
|
d@0
|
437 *zptr=s;
|
d@0
|
438 zptr++;
|
d@0
|
439 }
|
d@0
|
440 }
|
d@0
|
441
|
d@0
|
442 void AudioSourceFeatureExtractor::XCorr(float *&output, float* input1, float* input2, size_t &outputlen, size_t inputlen, size_t maxLag = 0)
|
d@0
|
443 {
|
d@0
|
444 // TODO: adapt for different sizes
|
d@0
|
445
|
d@0
|
446 // find next power of 2 for efficient FFT
|
d@0
|
447 size_t fftSize = 1;
|
d@0
|
448 while (fftSize < 2*static_cast<size_t>(inputlen)-1)
|
d@0
|
449 {
|
d@0
|
450 fftSize *= 2;
|
d@0
|
451 }
|
d@0
|
452
|
d@0
|
453 FFTW fft1 = FFTW(fftSize);
|
d@0
|
454 FFTW fft2 = FFTW(fftSize);
|
d@0
|
455
|
d@0
|
456 // allocate space for FFTW processing
|
d@0
|
457 float *in1 = new float[fftSize];
|
d@0
|
458 float *in2 = new float[fftSize];
|
d@0
|
459
|
d@0
|
460 // output from FFT is only up to Nyquist frequency
|
d@0
|
461 float *out1Real = new float[fftSize];
|
d@0
|
462 float *out1Imag = new float[fftSize];
|
d@0
|
463 float *out2Real = new float[fftSize];
|
d@0
|
464 float *out2Imag = new float[fftSize];
|
d@0
|
465
|
d@0
|
466 memset(in1, 0, sizeof(float)*fftSize);
|
d@0
|
467 memset(in2, 0, sizeof(float)*fftSize);
|
d@0
|
468 memset(out1Real, 0, sizeof(float)*fftSize);
|
d@0
|
469 memset(out1Imag, 0, sizeof(float)*fftSize);
|
d@0
|
470 memset(out2Real, 0, sizeof(float)*fftSize);
|
d@0
|
471 memset(out2Imag, 0, sizeof(float)*fftSize);
|
d@0
|
472
|
d@0
|
473 // copy input to allocated arrays
|
d@0
|
474 for (size_t i = 0; i < static_cast<size_t>(inputlen); ++i)
|
d@0
|
475 {
|
d@0
|
476 in1[i] = input1[i];
|
d@0
|
477 in2[i] = input2[i];
|
d@0
|
478 }
|
d@0
|
479
|
d@0
|
480 fft1.process(in1, out1Real, out1Imag);
|
d@0
|
481 fft2.process(in2, out2Real, out2Imag);
|
d@0
|
482
|
d@0
|
483 // multiply out1*conj(out2) in FFT domain
|
d@0
|
484 fftwf_complex *product = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*fftSize);
|
d@0
|
485 for (size_t i = 0; i < fftSize; ++i)
|
d@0
|
486 {
|
d@0
|
487 product[i][0] = (out1Real[i]*out2Real[i] + out1Imag[i]*out2Imag[i]);
|
d@0
|
488 product[i][1] = (out1Imag[i]*out2Real[i] - out1Real[i]*out2Imag[i]);
|
d@0
|
489 }
|
d@0
|
490
|
d@0
|
491 // compute IFFT
|
d@0
|
492
|
d@0
|
493 // do FFT, adapted from iTraktorLib "FFTW.h"
|
d@0
|
494 fftw_iodim dim;
|
d@0
|
495 dim.n = fftSize;
|
d@0
|
496 dim.is = 1;
|
d@0
|
497 dim.os = 1;
|
d@0
|
498
|
d@0
|
499 fftwf_plan plan;
|
d@0
|
500
|
d@0
|
501 float *result = (float*) fftwf_malloc(sizeof(float)*fftSize);
|
d@0
|
502 plan = fftwf_plan_guru_dft_c2r(1, &dim, 0, NULL, product, result, FFTW_ESTIMATE);
|
d@0
|
503 fftwf_execute_dft_c2r(plan, product, result);
|
d@0
|
504 fftwf_destroy_plan(plan);
|
d@0
|
505
|
d@0
|
506 // copy real part of result back to output array, normalize by FFT size
|
d@0
|
507 if (maxLag == 0 || maxLag >= static_cast<size_t>(inputlen))
|
d@0
|
508 {
|
d@0
|
509 maxLag = static_cast<size_t>(inputlen)-1;
|
d@0
|
510 }
|
d@0
|
511
|
d@0
|
512 output = new float[2 * maxLag + 1];
|
d@0
|
513 outputlen = 2 * maxLag + 1;
|
d@0
|
514
|
d@0
|
515 for (size_t i = fftSize - maxLag; i < fftSize; ++i)
|
d@0
|
516 {
|
d@0
|
517 output[i - fftSize + maxLag] = result[i] / fftSize;
|
d@0
|
518 }
|
d@0
|
519
|
d@0
|
520 for (unsigned i = 0; i <= (unsigned)maxLag; ++i)
|
d@0
|
521 {
|
d@0
|
522 output[i + maxLag] = result[i] / fftSize;
|
d@0
|
523 }
|
d@0
|
524
|
d@0
|
525 fftwf_free(result);
|
d@0
|
526 fftwf_free(product);
|
d@0
|
527
|
d@0
|
528 if(in1 != NULL)
|
d@0
|
529 {
|
d@0
|
530 delete[] in1;
|
d@0
|
531 in1 = nullptr;
|
d@0
|
532 }
|
d@0
|
533
|
d@0
|
534 if(in2 != NULL)
|
d@0
|
535 {
|
d@0
|
536 delete[] in2;
|
d@0
|
537 in2 = nullptr;
|
d@0
|
538 }
|
d@0
|
539
|
d@0
|
540 if(out1Real != NULL)
|
d@0
|
541 {
|
d@0
|
542 delete[] out1Real;
|
d@0
|
543 out1Real= nullptr;
|
d@0
|
544 }
|
d@0
|
545
|
d@0
|
546 if(out1Imag != NULL)
|
d@0
|
547 {
|
d@0
|
548 delete[] out1Imag;
|
d@0
|
549 out1Imag= nullptr;
|
d@0
|
550 }
|
d@0
|
551
|
d@0
|
552 if(out2Real != NULL)
|
d@0
|
553 {
|
d@0
|
554 delete[] out2Real;
|
d@0
|
555 out2Real= nullptr;
|
d@0
|
556 }
|
d@0
|
557
|
d@0
|
558 if(out2Imag != NULL)
|
d@0
|
559 {
|
d@0
|
560 delete[] out2Imag;
|
d@0
|
561 out2Imag= nullptr;
|
d@0
|
562 }
|
d@0
|
563 }
|
d@0
|
564
|
d@0
|
565 float AudioSourceFeatureExtractor::EstimatePerdiodicity(float* data, size_t numSamples)
|
d@0
|
566 {
|
d@0
|
567 float periodicity = 0.0;
|
d@0
|
568 float* downsampleddata = new float[numSamples];
|
d@0
|
569 memset(downsampleddata, 0, sizeof(float)*numSamples);
|
d@0
|
570 float* xcorr = nullptr;
|
d@0
|
571
|
d@0
|
572 for(size_t k = 0; k < numSamples; k++)
|
d@0
|
573 {
|
d@0
|
574 downsampleddata[k] = data[k];
|
d@0
|
575 }
|
d@0
|
576
|
d@0
|
577 //DownSampler(data, downsampleddata, numSamples, downsamplelength, m_fSampleRate, 11025.0);
|
d@0
|
578
|
d@0
|
579 EnvelopeCurve(downsampleddata, downsampleddata, numSamples, m_fSampleRate);
|
d@0
|
580
|
d@0
|
581 int startLag = (int)floor((float)(60.0 / 480.0 * m_fSampleRate));
|
d@0
|
582 int endLag = (int)floor((float)(60.0 / 30.0 * m_fSampleRate));
|
d@0
|
583 int thresh = (int)floor((float)(.2 * m_fSampleRate));
|
d@0
|
584
|
d@0
|
585 //Normalise
|
d@0
|
586 //Normalise(downsampleddata, downsampleddata, downsamplelength);
|
d@0
|
587
|
d@0
|
588 size_t xcorrlen = 0;
|
d@0
|
589 XCorr(xcorr, downsampleddata, downsampleddata, xcorrlen, numSamples, endLag);
|
d@0
|
590
|
d@0
|
591 int i = (int)floor(xcorrlen / 2);
|
d@0
|
592
|
d@0
|
593 std::vector<float> temp;
|
d@0
|
594 for(size_t j = i; j < xcorrlen; j++)
|
d@0
|
595 {
|
d@0
|
596 temp.push_back(xcorr[j]);
|
d@0
|
597 }
|
d@0
|
598
|
d@0
|
599 std::vector<float> temp2;
|
d@0
|
600 for(size_t j = (size_t)max(1,startLag); j < (size_t)min(endLag,temp.size()); j++)
|
d@0
|
601 {
|
d@0
|
602 temp2.push_back(temp[j]);
|
d@0
|
603 }
|
d@0
|
604
|
d@0
|
605 int maxIdx = 0;
|
d@0
|
606 float maxVal = std::numeric_limits<float>::lowest();
|
d@0
|
607 for(size_t j =0; j < temp2.size(); j++)
|
d@0
|
608 {
|
d@0
|
609 if(temp2[j] > maxVal)
|
d@0
|
610 {
|
d@0
|
611 maxIdx = j;
|
d@0
|
612 maxVal = temp2[j];
|
d@0
|
613 }
|
d@0
|
614 }
|
d@0
|
615
|
d@0
|
616 int from = max(1, maxIdx-thresh);
|
d@0
|
617 int to = min(temp2.size(), maxIdx+thresh);
|
d@0
|
618
|
d@0
|
619 float minValInRange = FLT_MAX;
|
d@0
|
620
|
d@0
|
621 for(size_t j = (size_t)from; j < to; j++)
|
d@0
|
622 {
|
d@0
|
623 if(temp2[j] < minValInRange)
|
d@0
|
624 {
|
d@0
|
625 minValInRange = temp2[j];
|
d@0
|
626 }
|
d@0
|
627 }
|
d@0
|
628
|
d@0
|
629 periodicity = (maxVal-minValInRange);
|
d@0
|
630
|
d@0
|
631 std::string dave = "Periodicity " + std::to_string(periodicity) + "\n";
|
d@0
|
632 OutputDebugString(dave.c_str());
|
d@0
|
633
|
d@0
|
634 if(xcorr != NULL)
|
d@0
|
635 {
|
d@0
|
636 delete[] xcorr;
|
d@0
|
637 xcorr = nullptr;
|
d@0
|
638 }
|
d@0
|
639
|
d@0
|
640 if(downsampleddata != NULL)
|
d@0
|
641 {
|
d@0
|
642 delete[] downsampleddata;
|
d@0
|
643 downsampleddata = nullptr;
|
d@0
|
644 }
|
d@0
|
645
|
d@0
|
646 return periodicity;
|
d@0
|
647 }
|
d@0
|
648
|
d@0
|
649 void AudioSourceFeatureExtractor::DownSampler(float* data, float* &out, size_t lenIn, size_t &lenOut, float currentSampleRate, float futureSampleRate)
|
d@0
|
650 {
|
d@0
|
651 ////Low pass our data before we decimate
|
d@0
|
652 //const int filterlength = 101;
|
d@0
|
653 //float* tempdata = new float[lenIn];
|
d@0
|
654 //memset(tempdata, 0, sizeof(float)*lenIn);
|
d@0
|
655
|
d@0
|
656 ////Coefficients were taken from Matlab
|
d@0
|
657 //float coeffs[filterlength] ={2.0839274e-05f, 6.6880953e-06f,-4.7252855e-05f, -3.4874138e-05f, 7.8508412e-05f,9.7089069e-05f,-9.9197161e-05f, -0.00020412984f, 8.2261082e-05f, 0.00035689832f, 9.4890293e-06f, -0.00053820928f, -0.00021722882f, 0.00070567406f, 0.00057491515f, -0.00078844035f, -0.0010939316f, 0.00069057313f, 0.0017460365f, -0.00030308162f, -0.0024484061f, -0.00047496572f, 0.0030552838f, 0.0017078921f, -0.0033604759f, -0.0033912712f, 0.0031135236f, 0.0054217125f, -0.0020499325f, -0.0075746416f, -6.7239242e-05f, 0.0094950572f, 0.0034002098f, -0.010703080f, -0.0079918290f, 0.010610434f, 0.013732497f, -0.0085344436f, -0.020346783f, 0.0036776769f, 0.027405130f, 0.0050050775f, -0.034361813f, -0.019287759f, 0.040615436f, 0.043452863f,-0.045583628f, -0.093336113f, 0.048780452f, 0.31394306f, 0.45011634f, 0.31394306f, 0.048780452f,-0.093336113f,-0.045583628f, 0.043452863f,0.040615436f,-0.019287759f, -0.034361813f, 0.0050050775f, 0.027405130f,0.0036776769f,-0.020346783f, -0.0085344436f, 0.013732497f, 0.010610434f, -0.0079918290f, -0.010703080f, 0.0034002098f, 0.0094950572f, -6.7239242e-05f, -0.0075746416f, -0.0020499325f, 0.0054217125f, 0.0031135236f, -0.0033912712f, -0.0033604759f, 0.0017078921f, 0.0030552838f, -0.00047496572f, -0.0024484061f, -0.00030308162f, 0.0017460365f,0.00069057313f, -0.0010939316f, -0.00078844035f, 0.00057491515f, 0.00070567406f, -0.00021722882f, -0.00053820928f, 9.4890293e-06f, 0.00035689832f, 8.2261082e-05f, -0.00020412984f, -9.9197161e-05f, 9.7089069e-05f, 7.8508412e-05f, -3.4874138e-05f, -4.7252855e-05f, 6.6880953e-06f, 2.0839274e-05f};
|
d@0
|
658 //
|
d@0
|
659 //float acc; // accumulator for MACs
|
d@0
|
660 // float* coeffp; // pointer to coefficients
|
d@0
|
661 // float* inputp; // pointer to input samples
|
d@0
|
662 //float* endp = &data[lenIn -1]; // pointer to last sample
|
d@0
|
663 //
|
d@0
|
664 //
|
d@0
|
665 // // apply the filter to each input sample
|
d@0
|
666 // for (size_t n = 0; n < lenIn; n++ )
|
d@0
|
667 //{
|
d@0
|
668 // // calculate output n
|
d@0
|
669 // coeffp = &coeffs[0];
|
d@0
|
670 // inputp = &data[filterlength - 1 + n];
|
d@0
|
671
|
d@0
|
672 // acc = 0.0f;
|
d@0
|
673 // for (int k = 0; k < filterlength; k++ )
|
d@0
|
674 // {
|
d@0
|
675 // if(inputp <= endp)
|
d@0
|
676 // acc += (*coeffp++) * (*inputp--);
|
d@0
|
677 // else
|
d@0
|
678 // {
|
d@0
|
679 // //When we reach the end of the buffer
|
d@0
|
680 // acc += (*coeffp++) * 0.0f;
|
d@0
|
681 // *inputp--;
|
d@0
|
682 // }
|
d@0
|
683 // }
|
d@0
|
684
|
d@0
|
685 // tempdata[n] = acc;
|
d@0
|
686 // }
|
d@0
|
687 //int ratio = (int) (currentSampleRate / futureSampleRate);
|
d@0
|
688 //
|
d@0
|
689 //lenOut = lenIn / ratio;
|
d@0
|
690
|
d@0
|
691 //out = new float[lenOut];
|
d@0
|
692 //memset(out, 0, sizeof(float)*lenOut);
|
d@0
|
693
|
d@0
|
694
|
d@0
|
695 //int idx = 0;
|
d@0
|
696 //for(size_t i = 0; i < lenIn; i = i + ratio)
|
d@0
|
697 //{
|
d@0
|
698 // out[idx] = tempdata[i];
|
d@0
|
699 // idx++;
|
d@0
|
700 //}
|
d@0
|
701
|
d@0
|
702 //if(tempdata != NULL)
|
d@0
|
703 //{
|
d@0
|
704 // delete[] tempdata;
|
d@0
|
705 // tempdata = nullptr;
|
d@0
|
706 //}
|
d@0
|
707 }
|
d@0
|
708
|
d@0
|
709 void AudioSourceFeatureExtractor::EnvelopeCurve(float* data, float* &out, size_t dataLen, float sampleRate)
|
d@0
|
710 {
|
d@0
|
711 float release = 0.02f;
|
d@0
|
712 float envelope = 0.0f;
|
d@0
|
713
|
d@0
|
714 float gr = exp(-1 / (sampleRate * release));
|
d@0
|
715
|
d@0
|
716 for (size_t i = 0; i < dataLen; i++)
|
d@0
|
717 {
|
d@0
|
718 float EnvIn = abs(data[i]);
|
d@0
|
719
|
d@0
|
720 if(envelope < EnvIn)
|
d@0
|
721 {
|
d@0
|
722 envelope = EnvIn;
|
d@0
|
723 }
|
d@0
|
724 else
|
d@0
|
725 {
|
d@0
|
726 envelope = envelope * gr;
|
d@0
|
727 envelope = envelope + (1 - gr) * EnvIn;
|
d@0
|
728 }
|
d@0
|
729
|
d@0
|
730 out[i] = envelope;
|
d@0
|
731 }
|
d@0
|
732 }
|
d@0
|
733
|
d@0
|
734 void AudioSourceFeatureExtractor::Normalise(float* data, float* &out, size_t len)
|
d@0
|
735 {
|
d@0
|
736 float maxvalue = std::numeric_limits<float>::lowest();
|
d@0
|
737
|
d@0
|
738 for(size_t i = 0; i < len; i++)
|
d@0
|
739 {
|
d@0
|
740 if(data[i] > maxvalue)
|
d@0
|
741 maxvalue = data[i];
|
d@0
|
742 }
|
d@0
|
743
|
d@0
|
744 for(size_t i = 0; i < len; i++)
|
d@0
|
745 {
|
d@0
|
746 out[i] = data[i] / maxvalue;
|
d@0
|
747 }
|
d@0
|
748 }
|
d@0
|
749
|
d@0
|
750 float AudioSourceFeatureExtractor::EntropyOfEnergy(float* data, size_t len)
|
d@0
|
751 {
|
d@0
|
752 float totalEnergy = 0.0;
|
d@0
|
753 float totalentropy = 0.0;
|
d@0
|
754 const int numSubFrames = 30;
|
d@0
|
755
|
d@0
|
756 //Calculate Total Energy and normalise it
|
d@0
|
757 for (size_t i =0; i < len; i++)
|
d@0
|
758 {
|
d@0
|
759 totalEnergy += (data[i] * data[i]);
|
d@0
|
760 }
|
d@0
|
761
|
d@0
|
762 int subFrameSize = len / numSubFrames;
|
d@0
|
763
|
d@0
|
764 for (size_t i = 0; i < numSubFrames; i++)
|
d@0
|
765 {
|
d@0
|
766 float val = 0.0;
|
d@0
|
767
|
d@0
|
768 for (size_t j = 0; j < (size_t)subFrameSize; j++)
|
d@0
|
769 {
|
d@0
|
770 val += (data[j + (i * subFrameSize)] * data[j + (i * subFrameSize)]);
|
d@0
|
771 }
|
d@0
|
772
|
d@0
|
773 float frameval = val / (totalEnergy + std::numeric_limits<float>::epsilon());
|
d@0
|
774
|
d@0
|
775 totalentropy += (frameval * Log2(frameval + std::numeric_limits<float>::epsilon()));
|
d@0
|
776 }
|
d@0
|
777
|
d@0
|
778 return (totalentropy * -1);
|
d@0
|
779 }
|
d@0
|
780
|
d@0
|
781 void AudioSourceFeatureExtractor::PDF_getResampleKrnl(std::vector<float> freqVec, float fsProc, int nfft, int nBin, std::vector<float> &outLogFreqVec, std::vector<std::vector<float>> &outKrnl)
|
d@0
|
782 {
|
d@0
|
783 float f1 = freqVec[0];
|
d@0
|
784 float f2 = freqVec[freqVec.size() - 1];
|
d@0
|
785
|
d@0
|
786 outLogFreqVec = LinSpace(log(f1), log(f2), nBin);
|
d@0
|
787 std::vector<float> f(nfft / 2, 0.0f);
|
d@0
|
788
|
d@0
|
789 for (size_t i = 0; i < (size_t)nBin; i++)
|
d@0
|
790 {
|
d@0
|
791 outKrnl.push_back(f);
|
d@0
|
792 }
|
d@0
|
793
|
d@0
|
794 for (size_t i = 0; i < outLogFreqVec.size(); i++)
|
d@0
|
795 {
|
d@0
|
796 outLogFreqVec[i] = exp(outLogFreqVec[i]);
|
d@0
|
797 }
|
d@0
|
798
|
d@0
|
799 for (size_t i = 1; i < (size_t)nBin - 2; i++)
|
d@0
|
800 {
|
d@0
|
801 float freqBinLin = outLogFreqVec[i] / (fsProc / nfft);
|
d@0
|
802
|
d@0
|
803 float nextBinUp = outLogFreqVec[i + 1] / (fsProc / nfft);
|
d@0
|
804
|
d@0
|
805 float nextBinDown = outLogFreqVec[i - 1] / (fsProc / nfft);
|
d@0
|
806
|
d@0
|
807 float filtWid = nextBinUp - nextBinDown;
|
d@0
|
808
|
d@0
|
809 if (filtWid <= 2)
|
d@0
|
810 {
|
d@0
|
811 // log resolution is finer than linear resolution
|
d@0
|
812 // linear interpolation of neighboring bins
|
d@0
|
813
|
d@0
|
814 float binDown = floor(freqBinLin);
|
d@0
|
815 float frac = freqBinLin - binDown;
|
d@0
|
816 std::vector<float> filt(nfft/2, 0.0f);
|
d@0
|
817 filt[((int)binDown) - 1] = 1 - frac;
|
d@0
|
818 filt[((int)binDown + 1) - 1] = frac;
|
d@0
|
819 outKrnl[i][((int)binDown) - 1] = filt[((int)binDown) - 1];
|
d@0
|
820 outKrnl[i][((int)binDown + 1) - 1] = filt[((int)binDown + 1) - 1];
|
d@0
|
821
|
d@0
|
822 }
|
d@0
|
823 else
|
d@0
|
824 {
|
d@0
|
825 float ceilNextBinDown = ceil(nextBinDown);
|
d@0
|
826 float floorFreqBinLin = floor(freqBinLin);
|
d@0
|
827 float ceilFreqBinLin = ceil(freqBinLin);
|
d@0
|
828 float floorNextBinUp = floor(nextBinUp);
|
d@0
|
829
|
d@0
|
830 std::vector<int> idxLow;
|
d@0
|
831 std::vector<int> idxHigh;
|
d@0
|
832 std::vector<int> filtIdx;
|
d@0
|
833
|
d@0
|
834 for (size_t j = (size_t)ceilNextBinDown; j <= (size_t)floorFreqBinLin; j++ )
|
d@0
|
835 {
|
d@0
|
836 idxLow.push_back(j);
|
d@0
|
837 }
|
d@0
|
838
|
d@0
|
839 for (size_t j = (size_t)ceilFreqBinLin; j <= (size_t)floorNextBinUp; j++ )
|
d@0
|
840 {
|
d@0
|
841 idxHigh.push_back(j);
|
d@0
|
842 }
|
d@0
|
843
|
d@0
|
844 std::vector<int>::iterator it;
|
d@0
|
845 it = std::unique (idxLow.begin(), idxLow.end());
|
d@0
|
846 idxLow.resize(std::distance(idxLow.begin(), it));
|
d@0
|
847
|
d@0
|
848 it = std::unique (idxHigh.begin(), idxHigh.end());
|
d@0
|
849 idxHigh.resize(std::distance(idxHigh.begin(), it));
|
d@0
|
850
|
d@0
|
851 filtIdx.reserve(idxLow.size() + idxHigh.size()); // preallocate memory
|
d@0
|
852 filtIdx.insert(filtIdx.end(), idxLow.begin(), idxLow.end());
|
d@0
|
853 filtIdx.insert(filtIdx.end(), idxHigh.begin(), idxHigh.end());
|
d@0
|
854
|
d@0
|
855 std::vector<float> rampUp;
|
d@0
|
856 std::vector<float> rampDown;
|
d@0
|
857 for (size_t j = 0; j < filtIdx.size(); j++)
|
d@0
|
858 {
|
d@0
|
859 rampUp.push_back(1 - (freqBinLin - filtIdx[j]) / (freqBinLin - nextBinDown));
|
d@0
|
860 rampDown.push_back(1 - (filtIdx[j] - freqBinLin) / (nextBinUp - freqBinLin));
|
d@0
|
861 }
|
d@0
|
862
|
d@0
|
863 std::vector<float> filt;
|
d@0
|
864
|
d@0
|
865 for (size_t j = 0; j < rampUp.size(); j++)
|
d@0
|
866 {
|
d@0
|
867 filt.push_back(min(rampUp[j], rampDown[j]));
|
d@0
|
868 }
|
d@0
|
869
|
d@0
|
870 float sumfilt = 0.0f;
|
d@0
|
871 for (size_t j = 0; j < filt.size(); j++)
|
d@0
|
872 {
|
d@0
|
873 sumfilt += filt[j];
|
d@0
|
874 }
|
d@0
|
875
|
d@0
|
876 for (size_t j = 0; j < filt.size(); j++)
|
d@0
|
877 {
|
d@0
|
878 filt[j] /= sumfilt;
|
d@0
|
879 }
|
d@0
|
880
|
d@0
|
881 for (size_t j = 0; j < filtIdx.size(); j++)
|
d@0
|
882 {
|
d@0
|
883 outKrnl[i][filtIdx[j] - 1] = filt[j];
|
d@0
|
884 }
|
d@0
|
885 }
|
d@0
|
886 }
|
d@0
|
887
|
d@0
|
888 // special routine for first bin
|
d@0
|
889 // get frequency in linear bins
|
d@0
|
890
|
d@0
|
891 float freqBinLin = outLogFreqVec[0] / (fsProc/(float)nfft);
|
d@0
|
892 float binDown = floor(freqBinLin);
|
d@0
|
893 float frac = freqBinLin - binDown;
|
d@0
|
894
|
d@0
|
895 std::vector<float> filt(nfft/2, 0.0f);
|
d@0
|
896 filt[((int)binDown) - 1] = 1 - frac;
|
d@0
|
897 filt[((int)binDown + 1) - 1] = frac;
|
d@0
|
898
|
d@0
|
899 outKrnl[0][((int)binDown) - 1] = filt[((int)binDown) - 1];
|
d@0
|
900 outKrnl[0][((int)binDown + 1) - 1] = filt[((int)binDown + 1) - 1];
|
d@0
|
901
|
d@0
|
902 // special routine for last bin
|
d@0
|
903 // get frequency in linear bins
|
d@0
|
904 freqBinLin = outLogFreqVec[outLogFreqVec.size() - 1] / (fsProc/nfft);
|
d@0
|
905
|
d@0
|
906 // get next lower bin
|
d@0
|
907 float nextBinDown = outLogFreqVec[outLogFreqVec.size() - 2] / (fsProc/nfft);
|
d@0
|
908
|
d@0
|
909 float ceilNextBinDown = ceil(nextBinDown); //Subtract by -1 because of array
|
d@0
|
910 float floorFreqBinLin = floor(freqBinLin);
|
d@0
|
911
|
d@0
|
912 std::vector<int> filtIdx;
|
d@0
|
913
|
d@0
|
914 for (size_t i = (size_t)ceilNextBinDown; i <= (size_t)floorFreqBinLin; i++ )
|
d@0
|
915 {
|
d@0
|
916 filtIdx.push_back(i);
|
d@0
|
917 }
|
d@0
|
918
|
d@0
|
919 std::vector<float> filt2;
|
d@0
|
920 for (size_t i = 0; i < filtIdx.size(); i++)
|
d@0
|
921 {
|
d@0
|
922 filt2.push_back(1 - (freqBinLin - filtIdx[i]) / (freqBinLin - nextBinDown));
|
d@0
|
923 }
|
d@0
|
924
|
d@0
|
925 float sumfilt = 0.0f;
|
d@0
|
926 for (size_t i = 0; i < filt2.size(); i++)
|
d@0
|
927 {
|
d@0
|
928 sumfilt += filt2[i];
|
d@0
|
929 }
|
d@0
|
930
|
d@0
|
931 for (size_t i = 0; i < filt2.size(); i++)
|
d@0
|
932 {
|
d@0
|
933 filt2[i] /= sumfilt;
|
d@0
|
934 }
|
d@0
|
935
|
d@0
|
936 for (size_t j = 0; j < filtIdx.size(); j++)
|
d@0
|
937 {
|
d@0
|
938 outKrnl[outKrnl.size() - 1][filtIdx[j] - 1] = filt2[j];
|
d@0
|
939 }
|
d@0
|
940 }
|
d@0
|
941
|
d@0
|
942 float AudioSourceFeatureExtractor::Log2(float n)
|
d@0
|
943 {
|
d@0
|
944 // log(n)/log(2) is log2.
|
d@0
|
945 return logf( n ) / logf( 2 );
|
d@0
|
946 }
|
d@0
|
947
|
d@0
|
948 int AudioSourceFeatureExtractor::Sign(float x)
|
d@0
|
949 {
|
d@0
|
950 int retValue = 0;
|
d@0
|
951 if (x >= 0)
|
d@0
|
952 retValue = 1;
|
d@0
|
953 if (x < 0)
|
d@0
|
954 retValue = -1;
|
d@0
|
955
|
d@0
|
956 return retValue;
|
d@0
|
957 }
|
d@0
|
958
|
d@0
|
959 std::vector<float> AudioSourceFeatureExtractor::LinSpace(float min, float max, int n)
|
d@0
|
960 {
|
d@0
|
961 std::vector<float> result;
|
d@0
|
962 // vector iterator
|
d@0
|
963 int iterator = 0;
|
d@0
|
964
|
d@0
|
965 for (int i = 0; i <= n-2; i++)
|
d@0
|
966 {
|
d@0
|
967 float temp = min + i*(max-min)/(floor((float)n) - 1);
|
d@0
|
968 result.insert(result.begin() + iterator, temp);
|
d@0
|
969 iterator += 1;
|
d@0
|
970 }
|
d@0
|
971
|
d@0
|
972 result.insert(result.begin() + iterator, max);
|
d@0
|
973 return result;
|
d@0
|
974 }
|
d@0
|
975
|
d@0
|
976 std::vector<float> AudioSourceFeatureExtractor::LogSpace(float min, float max, int n)
|
d@0
|
977 {
|
d@0
|
978 float logMin = log(min);
|
d@0
|
979 float logMax = log(max);
|
d@0
|
980 double delta = (logMax - logMin) / n;
|
d@0
|
981 double accDelta = 0;
|
d@0
|
982 std::vector<float> v;
|
d@0
|
983
|
d@0
|
984 for (int i = 0; i <= n; i++)
|
d@0
|
985 {
|
d@0
|
986 v.push_back((float) exp(logMin + accDelta));
|
d@0
|
987 accDelta += delta;// accDelta = delta * i
|
d@0
|
988 }
|
d@0
|
989
|
d@0
|
990 return v;
|
d@0
|
991 }
|
d@0
|
992
|