annotate Source/.svn/text-base/AudioSourceFeatureExtractor.cpp.svn-base @ 15:585caf503ef5 tip

Tidy up for ROLI
author Geogaddi\David <d.m.ronan@qmul.ac.uk>
date Tue, 17 May 2016 18:50:19 +0100
parents 25bf17994ef1
children
rev   line source
d@0 1 /*
d@0 2 ==============================================================================
d@0 3
d@0 4 AudioSourceFeatureExtractor.cpp
d@0 5 Created: 11 Aug 2014 10:41:02am
d@0 6 Author: david.ronan
d@0 7
d@0 8 ==============================================================================
d@0 9 */
d@0 10
d@0 11 #include "AudioSourceFeatureExtractor.h"
d@0 12 #include "ObservationData.h"
d@0 13 #include <math.h>
d@0 14 #include <cmath>
d@0 15 #include <algorithm>
d@0 16 #include <string>
d@0 17 #include <windows.h>
d@0 18
d@0 19 //============================================================================== AudioSourceFeatureExtractor
d@0 20 AudioSourceFeatureExtractor::AudioSourceFeatureExtractor() // m_fft(FFTSIZE)
d@0 21 {
d@0 22 m_fft = new FFTW(FFTSIZE);
d@0 23 m_fInputBuffer = new float[FFTSIZE];
d@0 24 m_fMagnitudeSpectrum = new float[MAGNITUDESIZE];
d@0 25 m_fPreviousMagnitudeSpectrum = new float[MAGNITUDESIZE];
d@0 26 m_OutReal = new float[FFTSIZE];
d@0 27 m_OutImag = new float[FFTSIZE];
d@0 28
d@0 29 memset(m_fInputBuffer, 0, FFTSIZE * sizeof(float));
d@0 30 memset(m_fMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float));
d@0 31 memset(m_fPreviousMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float));
d@0 32
d@0 33
d@0 34 }
d@0 35
d@0 36 //============================================================================== ~AudioSourceFeatureExtractor
d@0 37 AudioSourceFeatureExtractor::~AudioSourceFeatureExtractor()
d@0 38 {
d@0 39 if(m_fInputBuffer != NULL)
d@0 40 {
d@0 41 delete m_fInputBuffer; // memory freed up
d@0 42 m_fInputBuffer = nullptr;
d@0 43 }
d@0 44
d@0 45 if(m_fMagnitudeSpectrum != NULL)
d@0 46 {
d@0 47 delete m_fMagnitudeSpectrum; // memory freed up
d@0 48 m_fMagnitudeSpectrum = nullptr;
d@0 49 }
d@0 50
d@0 51 if(m_fPreviousMagnitudeSpectrum != NULL)
d@0 52 {
d@0 53 delete m_fPreviousMagnitudeSpectrum; // memory freed up
d@0 54 m_fPreviousMagnitudeSpectrum = nullptr;
d@0 55 }
d@0 56
d@0 57 if (m_OutReal != NULL)
d@0 58 {
d@0 59 delete[] m_OutReal;
d@0 60 m_OutReal = nullptr;
d@0 61 }
d@0 62
d@0 63 if (m_OutImag != NULL)
d@0 64 {
d@0 65 delete[] m_OutImag;
d@0 66 m_OutImag = nullptr;
d@0 67 }
d@0 68
d@0 69 if (m_fft != NULL)
d@0 70 {
d@0 71 delete m_fft;
d@0 72 m_fft = nullptr;
d@0 73 }
d@0 74
d@0 75 }
d@0 76
d@0 77 //============================================================================== init
d@0 78 void AudioSourceFeatureExtractor::Initialise(float fSampleRate)
d@0 79 {
d@0 80 m_fSampleRate = fSampleRate;
d@0 81 m_iWriteIdx = 0;
d@0 82
d@0 83 for (int i = 1; i <= MAGNITUDESIZE; i++)
d@0 84 {
d@0 85 float win = ((fSampleRate / (2 * MAGNITUDESIZE)) * i);
d@0 86 m_fFreqBins.push_back(win);
d@0 87 }
d@0 88
d@0 89 m_MFCC.initMFFCvariables(12, MAGNITUDESIZE, m_fSampleRate);
d@0 90
d@0 91
d@0 92 }
d@0 93
d@0 94 //============================================================================== process
d@0 95 std::vector<ObservationData> AudioSourceFeatureExtractor::Process( const float* data, size_t numSamples)
d@0 96 {
d@0 97 std::vector<ObservationData> observations = std::vector<ObservationData>();
d@0 98
d@0 99 memset(m_fInputBuffer, 0, FFTSIZE * sizeof(float));
d@0 100 memset(m_fMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float));
d@0 101 memset(m_fPreviousMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float));
d@0 102 m_iWriteIdx = 0;
d@0 103
d@0 104 float perdiodicity = EstimatePerdiodicity(const_cast<float*>(data), numSamples);
d@0 105
d@0 106 float entropyofenergy = EntropyOfEnergy(const_cast<float*>(data), numSamples);
d@0 107
d@0 108 for(size_t n = 0; n < numSamples; n++)
d@0 109 {
d@0 110 m_fInputBuffer[m_iWriteIdx] = data[n];
d@0 111 m_iWriteIdx++;
d@0 112
d@0 113 //Do normal FFT
d@0 114
d@0 115 if(FFTSIZE == m_iWriteIdx)
d@0 116 {
d@0 117 //Get the peak amplitude rms and crest factor for the current frame
d@0 118 float peakvalue = 0;
d@0 119 float rms = 0;
d@0 120 float crestfactor = 0;
d@0 121
d@0 122 for(int i=0; i < FFTSIZE; i++)
d@0 123 {
d@0 124 if(m_fInputBuffer[i] > peakvalue)
d@0 125 peakvalue = abs(m_fInputBuffer[i]);
d@0 126
d@0 127 rms += (m_fInputBuffer[i] * m_fInputBuffer[i]);
d@0 128 }
d@0 129
d@0 130 rms /= FFTSIZE;
d@0 131 rms = sqrt(rms);
d@0 132
d@0 133 crestfactor = peakvalue/(rms + std::numeric_limits<float>::epsilon());
d@0 134
d@0 135 float ZCR = 0;
d@0 136 float inputBuffer2[FFTSIZE] = {0.0};
d@0 137
d@0 138 for (int i = 1; i < FFTSIZE; i++)
d@0 139 {
d@0 140 inputBuffer2[i] = m_fInputBuffer[i -1];
d@0 141 }
d@0 142
d@0 143 for (int i = 0; i < FFTSIZE; i++)
d@0 144 {
d@0 145 ZCR += (abs(Sign(m_fInputBuffer[i]) - Sign(inputBuffer2[i])));
d@0 146 }
d@0 147
d@0 148 ZCR /= (2 * FFTSIZE);
d@0 149
d@0 150 //////////////////////////////////////////////////////////////////
d@0 151 for (int i = 0; i < FFTSIZE; i++)
d@0 152 {
d@0 153 float multiplier = (float) (0.5 * (1 - cos(2*PI*i/(FFTSIZE -1))));
d@0 154 m_fInputBuffer[i] = multiplier * m_fInputBuffer[i];
d@0 155 }
d@0 156
d@0 157 //Compute the FFT
d@0 158
d@0 159 memset(m_OutReal, 0, sizeof(float)*FFTSIZE);
d@0 160 memset(m_OutImag, 0, sizeof(float)*FFTSIZE);
d@0 161
d@0 162 m_fft->process(m_fInputBuffer, m_OutReal, m_OutImag);
d@0 163
d@0 164 //Compute the Magnitude
d@0 165 VectorDistance(m_OutReal, 1, m_OutImag, 1, m_fMagnitudeSpectrum, 1, MAGNITUDESIZE);
d@0 166
d@0 167 //Compute the spectral features
d@0 168 float centroid = 0;
d@0 169 float spread = 0;
d@0 170 float skewness = 0;
d@0 171 float kurtosis = 0;
d@0 172 float brightness = 0;
d@0 173 float rolloff85 = 0;
d@0 174 float rolloff95 = 0;
d@0 175 float spectralentropy = 0;
d@0 176 float flatness = 0;
d@0 177 float spectralcf = 0;
d@0 178 float spectralflux = 0;
d@0 179 std::vector<float> mfccs;
d@0 180
d@0 181 SpectralFeatures(m_fMagnitudeSpectrum, m_fPreviousMagnitudeSpectrum, MAGNITUDESIZE, centroid, spread, skewness, kurtosis, brightness, rolloff85, rolloff95, spectralentropy, flatness, spectralcf, spectralflux);
d@0 182
d@0 183 m_MFCC.ComputeMFCC(m_fMagnitudeSpectrum, mfccs);
d@0 184
d@0 185 //Update for Spectral Flux
d@0 186 for(int i = 0; i < MAGNITUDESIZE; i++)
d@0 187 {
d@0 188 m_fPreviousMagnitudeSpectrum[i] = m_fMagnitudeSpectrum[i];
d@0 189 }
d@0 190
d@0 191 //50% Hop size
d@0 192 for(int i = MAGNITUDESIZE; i < FFTSIZE; i++)
d@0 193 {
d@0 194 m_fInputBuffer[i - MAGNITUDESIZE] = m_fInputBuffer[i];
d@0 195 m_iWriteIdx = MAGNITUDESIZE;
d@0 196 }
d@0 197
d@0 198 //create an observation for this window and push it to the list of observations for this 30 sec. audio clip
d@0 199 ObservationData newObservation = ObservationData(rms, peakvalue, crestfactor, ZCR, centroid, spread, skewness, kurtosis, brightness, rolloff85, rolloff95, spectralentropy, flatness, spectralcf, spectralflux, mfccs, perdiodicity, entropyofenergy);
d@0 200 observations.push_back(newObservation);
d@0 201
d@0 202 }
d@0 203 }
d@0 204
d@0 205 return observations;
d@0 206 }
d@0 207
d@0 208 void AudioSourceFeatureExtractor::Finalize()
d@0 209 {
d@0 210 m_MFCC.freeMFCCmemory();
d@0 211 }
d@0 212
d@0 213 void AudioSourceFeatureExtractor::VectorDistance(const float* vIn1, int stride1, const float* vIn2, int stride2, float* &vOut, int strideOut, size_t nElements)
d@0 214 {
d@0 215 //Compute remaining samples
d@0 216 for (size_t i = 0; i < nElements; i++)
d@0 217 {
d@0 218 vOut[i* strideOut] = sqrt(vIn1[i*stride1] * vIn1[i*stride1] + vIn2[i*stride2] * vIn2[i*stride2]);
d@0 219 }
d@0 220 }
d@0 221
d@0 222 void AudioSourceFeatureExtractor::SpectralFeatures(float* &magnitude, float* &prevmagnitude, size_t windowSize, float &centroid, float &spread, float &skew, float &kurtosis, float &brightness, float &rolloff95, float &rolloff85, float &spectralentropy, float &flatness, float &spectralcf, float &spectralflux)
d@0 223 {
d@0 224 float summagbin = 0;
d@0 225 float summag = 0;
d@0 226 float sumprevmag = 0;
d@0 227 float summagspread = 0;
d@0 228 float summagskewness = 0;
d@0 229 float summagkurtosis = 0;
d@0 230 float sumbrightness = 0;
d@0 231 float maxmag = 0;
d@0 232
d@0 233 float geo = 1;
d@0 234 float totalenergy = 0;
d@0 235 float currEnergy95 = 0;
d@0 236 float currEnergy85 = 0;
d@0 237 int countFFT85 = 0;
d@0 238 int countFFT95 = 0;
d@0 239
d@0 240 float normalisedMagSumEntropy = 0;
d@0 241
d@0 242 //Centroid
d@0 243 //////////////////////////////////////////////////////
d@0 244
d@0 245 for (size_t i = 0; i < windowSize; i++)
d@0 246 {
d@0 247 //Moments
d@0 248 summagbin += (m_fFreqBins[i] * magnitude[i]);
d@0 249 summag += magnitude[i];
d@0 250 sumprevmag += prevmagnitude[i];
d@0 251
d@0 252 //Spectral Crest factor
d@0 253 if(magnitude[i] > maxmag)
d@0 254 {
d@0 255 maxmag = magnitude[i];
d@0 256 }
d@0 257
d@0 258 float energy = magnitude[i] * magnitude[i];
d@0 259
d@0 260 //Roll off + Flatness
d@0 261
d@0 262 totalenergy += energy;
d@0 263 float exponent = (float)(1.0/(float)windowSize);
d@0 264
d@0 265 if(energy != 0)
d@0 266 {
d@0 267 geo *= pow(energy, exponent);
d@0 268 }
d@0 269 }
d@0 270
d@0 271 summag += std::numeric_limits<float>::epsilon();
d@0 272
d@0 273 centroid = summagbin / summag;
d@0 274
d@0 275 /////////////////////////////////////////////////////////
d@0 276
d@0 277 //Spread
d@0 278 //////////////////////////////////////////////////////
d@0 279
d@0 280 for (size_t i = 0; i < windowSize; i++)
d@0 281 {
d@0 282 float norm = magnitude[i] / (summag + std::numeric_limits<float>::epsilon());
d@0 283 float normprev = prevmagnitude[i] / (sumprevmag + std::numeric_limits<float>::epsilon());
d@0 284
d@0 285 //Spectral Flux
d@0 286 spectralflux += ((norm - normprev) * (norm - normprev));
d@0 287
d@0 288 //Entropy
d@0 289 normalisedMagSumEntropy += (norm * log(norm + std::numeric_limits<float>::epsilon()));
d@0 290
d@0 291 //Moments
d@0 292 summagspread += pow((m_fFreqBins[i] - centroid), 2) * norm;
d@0 293 summagskewness += pow((m_fFreqBins[i] - centroid), 3) * norm;
d@0 294 summagkurtosis += pow((m_fFreqBins[i] - centroid), 4) * norm;
d@0 295
d@0 296 //Brightness
d@0 297 if (m_fFreqBins[i] >= 1500)
d@0 298 {
d@0 299 sumbrightness += magnitude[i];
d@0 300 }
d@0 301 }
d@0 302
d@0 303 spread = sqrt(summagspread);
d@0 304
d@0 305 /////////////////////////////////////////////////////////
d@0 306
d@0 307 //Skewness
d@0 308 //////////////////////////////////////////////////////
d@0 309
d@0 310 float spreadtemp = spread;
d@0 311 spreadtemp += std::numeric_limits<float>::epsilon();
d@0 312
d@0 313 skew = summagskewness / pow(spreadtemp, 3);
d@0 314
d@0 315 /////////////////////////////////////////////////////////
d@0 316
d@0 317 //Kurtosis
d@0 318 //////////////////////////////////////////////////////
d@0 319
d@0 320 kurtosis = summagkurtosis / pow(spreadtemp, 4);
d@0 321
d@0 322 /////////////////////////////////////////////////////////
d@0 323
d@0 324 //Brightness
d@0 325 //////////////////////////////////////////////////////
d@0 326
d@0 327 brightness = sumbrightness / summag;
d@0 328
d@0 329 /////////////////////////////////////////////////////////
d@0 330
d@0 331 //Roll off 85% - 95%
d@0 332 /////////////////////////////////////////////////////////
d@0 333
d@0 334 while ((currEnergy95 <= 0.95 * totalenergy) && (countFFT95 < (int)windowSize))
d@0 335 {
d@0 336 currEnergy95 += (magnitude[countFFT95] * magnitude[countFFT95]);
d@0 337 countFFT95 += 1;
d@0 338
d@0 339 if (currEnergy85 <= 0.85 * totalenergy)
d@0 340 {
d@0 341 currEnergy85 += (magnitude[countFFT85] * magnitude[countFFT85]);
d@0 342 countFFT85 += 1;
d@0 343 }
d@0 344
d@0 345 }
d@0 346
d@0 347 countFFT85 -= 1;
d@0 348 countFFT95 -= 1;
d@0 349
d@0 350 //Normalization
d@0 351 if (currEnergy85 <= 0)
d@0 352 {
d@0 353 //If we have silence then our value should be 0.
d@0 354 rolloff85 = 0;
d@0 355 }
d@0 356 else
d@0 357 {
d@0 358 rolloff85 = (((float)countFFT85) / (float)windowSize);
d@0 359 }
d@0 360
d@0 361 //Normalization
d@0 362 if (currEnergy95 <= 0)
d@0 363 {
d@0 364 //If we have silence then our value should be 0.
d@0 365 rolloff95 = 0;
d@0 366 }
d@0 367 else
d@0 368 {
d@0 369 rolloff95 = (((float)countFFT95) / (float)windowSize);
d@0 370 }
d@0 371
d@0 372 /////////////////////////////////////////////////////
d@0 373
d@0 374 //Spectral Entropy
d@0 375 /////////////////////////////////////////////////////////
d@0 376
d@0 377 spectralentropy = (float) ((normalisedMagSumEntropy / log((float)windowSize)) * - 1);
d@0 378
d@0 379 /////////////////////////////////////////////////////////
d@0 380
d@0 381 /////////////////////////////////////////////////////
d@0 382
d@0 383 //Flatness
d@0 384 /////////////////////////////////////////////////////////
d@0 385 if(totalenergy != 0 && geo != 0)
d@0 386 {
d@0 387 flatness = geo / (totalenergy / (float)windowSize);
d@0 388 }
d@0 389 else
d@0 390 {
d@0 391 flatness = 0;
d@0 392 }
d@0 393
d@0 394 /////////////////////////////////////////////////////////
d@0 395
d@0 396 //Spectral Crest Factor
d@0 397 /////////////////////////////////////////////////////////
d@0 398
d@0 399 if(summag != 0)
d@0 400 {
d@0 401 spectralcf = maxmag / summag;
d@0 402 }
d@0 403 else
d@0 404 {
d@0 405 spectralcf = 0;
d@0 406 }
d@0 407 }
d@0 408
d@0 409 void AudioSourceFeatureExtractor::ConvolveFunction(float* &z, float *x, float *y, size_t &lenz, size_t lenx, size_t leny)
d@0 410 {
d@0 411 // computes cross-correlation of two vectors, takes 2 vectors of the same length and computes 2*length-1 lags
d@0 412
d@0 413 float *zptr,s,*xp,*yp;
d@0 414 size_t n_lo, n_hi;
d@0 415
d@0 416 lenz = lenx + leny - 1;
d@0 417
d@0 418 z = new float[lenz];
d@0 419
d@0 420 zptr = z;
d@0 421
d@0 422 for (size_t i = 0; i < lenz; i++)
d@0 423 {
d@0 424 s = 0.0;
d@0 425 n_lo = 0 > (i - leny + 1 ) ? 0 : (i - leny + 1);
d@0 426 n_hi = (lenx - 1) < i ? (lenx - 1): i;
d@0 427 xp = &x[0] + n_lo;
d@0 428 yp = &y[0] + i - n_lo;
d@0 429
d@0 430 for (size_t n = n_lo; n <= n_hi; n++)
d@0 431 {
d@0 432 s += *xp * *yp;
d@0 433 xp++;
d@0 434 yp--;
d@0 435 }
d@0 436
d@0 437 *zptr=s;
d@0 438 zptr++;
d@0 439 }
d@0 440 }
d@0 441
d@0 442 void AudioSourceFeatureExtractor::XCorr(float *&output, float* input1, float* input2, size_t &outputlen, size_t inputlen, size_t maxLag = 0)
d@0 443 {
d@0 444 // TODO: adapt for different sizes
d@0 445
d@0 446 // find next power of 2 for efficient FFT
d@0 447 size_t fftSize = 1;
d@0 448 while (fftSize < 2*static_cast<size_t>(inputlen)-1)
d@0 449 {
d@0 450 fftSize *= 2;
d@0 451 }
d@0 452
d@0 453 FFTW fft1 = FFTW(fftSize);
d@0 454 FFTW fft2 = FFTW(fftSize);
d@0 455
d@0 456 // allocate space for FFTW processing
d@0 457 float *in1 = new float[fftSize];
d@0 458 float *in2 = new float[fftSize];
d@0 459
d@0 460 // output from FFT is only up to Nyquist frequency
d@0 461 float *out1Real = new float[fftSize];
d@0 462 float *out1Imag = new float[fftSize];
d@0 463 float *out2Real = new float[fftSize];
d@0 464 float *out2Imag = new float[fftSize];
d@0 465
d@0 466 memset(in1, 0, sizeof(float)*fftSize);
d@0 467 memset(in2, 0, sizeof(float)*fftSize);
d@0 468 memset(out1Real, 0, sizeof(float)*fftSize);
d@0 469 memset(out1Imag, 0, sizeof(float)*fftSize);
d@0 470 memset(out2Real, 0, sizeof(float)*fftSize);
d@0 471 memset(out2Imag, 0, sizeof(float)*fftSize);
d@0 472
d@0 473 // copy input to allocated arrays
d@0 474 for (size_t i = 0; i < static_cast<size_t>(inputlen); ++i)
d@0 475 {
d@0 476 in1[i] = input1[i];
d@0 477 in2[i] = input2[i];
d@0 478 }
d@0 479
d@0 480 fft1.process(in1, out1Real, out1Imag);
d@0 481 fft2.process(in2, out2Real, out2Imag);
d@0 482
d@0 483 // multiply out1*conj(out2) in FFT domain
d@0 484 fftwf_complex *product = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*fftSize);
d@0 485 for (size_t i = 0; i < fftSize; ++i)
d@0 486 {
d@0 487 product[i][0] = (out1Real[i]*out2Real[i] + out1Imag[i]*out2Imag[i]);
d@0 488 product[i][1] = (out1Imag[i]*out2Real[i] - out1Real[i]*out2Imag[i]);
d@0 489 }
d@0 490
d@0 491 // compute IFFT
d@0 492
d@0 493 // do FFT, adapted from iTraktorLib "FFTW.h"
d@0 494 fftw_iodim dim;
d@0 495 dim.n = fftSize;
d@0 496 dim.is = 1;
d@0 497 dim.os = 1;
d@0 498
d@0 499 fftwf_plan plan;
d@0 500
d@0 501 float *result = (float*) fftwf_malloc(sizeof(float)*fftSize);
d@0 502 plan = fftwf_plan_guru_dft_c2r(1, &dim, 0, NULL, product, result, FFTW_ESTIMATE);
d@0 503 fftwf_execute_dft_c2r(plan, product, result);
d@0 504 fftwf_destroy_plan(plan);
d@0 505
d@0 506 // copy real part of result back to output array, normalize by FFT size
d@0 507 if (maxLag == 0 || maxLag >= static_cast<size_t>(inputlen))
d@0 508 {
d@0 509 maxLag = static_cast<size_t>(inputlen)-1;
d@0 510 }
d@0 511
d@0 512 output = new float[2 * maxLag + 1];
d@0 513 outputlen = 2 * maxLag + 1;
d@0 514
d@0 515 for (size_t i = fftSize - maxLag; i < fftSize; ++i)
d@0 516 {
d@0 517 output[i - fftSize + maxLag] = result[i] / fftSize;
d@0 518 }
d@0 519
d@0 520 for (unsigned i = 0; i <= (unsigned)maxLag; ++i)
d@0 521 {
d@0 522 output[i + maxLag] = result[i] / fftSize;
d@0 523 }
d@0 524
d@0 525 fftwf_free(result);
d@0 526 fftwf_free(product);
d@0 527
d@0 528 if(in1 != NULL)
d@0 529 {
d@0 530 delete[] in1;
d@0 531 in1 = nullptr;
d@0 532 }
d@0 533
d@0 534 if(in2 != NULL)
d@0 535 {
d@0 536 delete[] in2;
d@0 537 in2 = nullptr;
d@0 538 }
d@0 539
d@0 540 if(out1Real != NULL)
d@0 541 {
d@0 542 delete[] out1Real;
d@0 543 out1Real= nullptr;
d@0 544 }
d@0 545
d@0 546 if(out1Imag != NULL)
d@0 547 {
d@0 548 delete[] out1Imag;
d@0 549 out1Imag= nullptr;
d@0 550 }
d@0 551
d@0 552 if(out2Real != NULL)
d@0 553 {
d@0 554 delete[] out2Real;
d@0 555 out2Real= nullptr;
d@0 556 }
d@0 557
d@0 558 if(out2Imag != NULL)
d@0 559 {
d@0 560 delete[] out2Imag;
d@0 561 out2Imag= nullptr;
d@0 562 }
d@0 563 }
d@0 564
d@0 565 float AudioSourceFeatureExtractor::EstimatePerdiodicity(float* data, size_t numSamples)
d@0 566 {
d@0 567 float periodicity = 0.0;
d@0 568 float* downsampleddata = new float[numSamples];
d@0 569 memset(downsampleddata, 0, sizeof(float)*numSamples);
d@0 570 float* xcorr = nullptr;
d@0 571
d@0 572 for(size_t k = 0; k < numSamples; k++)
d@0 573 {
d@0 574 downsampleddata[k] = data[k];
d@0 575 }
d@0 576
d@0 577 //DownSampler(data, downsampleddata, numSamples, downsamplelength, m_fSampleRate, 11025.0);
d@0 578
d@0 579 EnvelopeCurve(downsampleddata, downsampleddata, numSamples, m_fSampleRate);
d@0 580
d@0 581 int startLag = (int)floor((float)(60.0 / 480.0 * m_fSampleRate));
d@0 582 int endLag = (int)floor((float)(60.0 / 30.0 * m_fSampleRate));
d@0 583 int thresh = (int)floor((float)(.2 * m_fSampleRate));
d@0 584
d@0 585 //Normalise
d@0 586 //Normalise(downsampleddata, downsampleddata, downsamplelength);
d@0 587
d@0 588 size_t xcorrlen = 0;
d@0 589 XCorr(xcorr, downsampleddata, downsampleddata, xcorrlen, numSamples, endLag);
d@0 590
d@0 591 int i = (int)floor(xcorrlen / 2);
d@0 592
d@0 593 std::vector<float> temp;
d@0 594 for(size_t j = i; j < xcorrlen; j++)
d@0 595 {
d@0 596 temp.push_back(xcorr[j]);
d@0 597 }
d@0 598
d@0 599 std::vector<float> temp2;
d@0 600 for(size_t j = (size_t)max(1,startLag); j < (size_t)min(endLag,temp.size()); j++)
d@0 601 {
d@0 602 temp2.push_back(temp[j]);
d@0 603 }
d@0 604
d@0 605 int maxIdx = 0;
d@0 606 float maxVal = std::numeric_limits<float>::lowest();
d@0 607 for(size_t j =0; j < temp2.size(); j++)
d@0 608 {
d@0 609 if(temp2[j] > maxVal)
d@0 610 {
d@0 611 maxIdx = j;
d@0 612 maxVal = temp2[j];
d@0 613 }
d@0 614 }
d@0 615
d@0 616 int from = max(1, maxIdx-thresh);
d@0 617 int to = min(temp2.size(), maxIdx+thresh);
d@0 618
d@0 619 float minValInRange = FLT_MAX;
d@0 620
d@0 621 for(size_t j = (size_t)from; j < to; j++)
d@0 622 {
d@0 623 if(temp2[j] < minValInRange)
d@0 624 {
d@0 625 minValInRange = temp2[j];
d@0 626 }
d@0 627 }
d@0 628
d@0 629 periodicity = (maxVal-minValInRange);
d@0 630
d@0 631 std::string dave = "Periodicity " + std::to_string(periodicity) + "\n";
d@0 632 OutputDebugString(dave.c_str());
d@0 633
d@0 634 if(xcorr != NULL)
d@0 635 {
d@0 636 delete[] xcorr;
d@0 637 xcorr = nullptr;
d@0 638 }
d@0 639
d@0 640 if(downsampleddata != NULL)
d@0 641 {
d@0 642 delete[] downsampleddata;
d@0 643 downsampleddata = nullptr;
d@0 644 }
d@0 645
d@0 646 return periodicity;
d@0 647 }
d@0 648
d@0 649 void AudioSourceFeatureExtractor::DownSampler(float* data, float* &out, size_t lenIn, size_t &lenOut, float currentSampleRate, float futureSampleRate)
d@0 650 {
d@0 651 ////Low pass our data before we decimate
d@0 652 //const int filterlength = 101;
d@0 653 //float* tempdata = new float[lenIn];
d@0 654 //memset(tempdata, 0, sizeof(float)*lenIn);
d@0 655
d@0 656 ////Coefficients were taken from Matlab
d@0 657 //float coeffs[filterlength] ={2.0839274e-05f, 6.6880953e-06f,-4.7252855e-05f, -3.4874138e-05f, 7.8508412e-05f,9.7089069e-05f,-9.9197161e-05f, -0.00020412984f, 8.2261082e-05f, 0.00035689832f, 9.4890293e-06f, -0.00053820928f, -0.00021722882f, 0.00070567406f, 0.00057491515f, -0.00078844035f, -0.0010939316f, 0.00069057313f, 0.0017460365f, -0.00030308162f, -0.0024484061f, -0.00047496572f, 0.0030552838f, 0.0017078921f, -0.0033604759f, -0.0033912712f, 0.0031135236f, 0.0054217125f, -0.0020499325f, -0.0075746416f, -6.7239242e-05f, 0.0094950572f, 0.0034002098f, -0.010703080f, -0.0079918290f, 0.010610434f, 0.013732497f, -0.0085344436f, -0.020346783f, 0.0036776769f, 0.027405130f, 0.0050050775f, -0.034361813f, -0.019287759f, 0.040615436f, 0.043452863f,-0.045583628f, -0.093336113f, 0.048780452f, 0.31394306f, 0.45011634f, 0.31394306f, 0.048780452f,-0.093336113f,-0.045583628f, 0.043452863f,0.040615436f,-0.019287759f, -0.034361813f, 0.0050050775f, 0.027405130f,0.0036776769f,-0.020346783f, -0.0085344436f, 0.013732497f, 0.010610434f, -0.0079918290f, -0.010703080f, 0.0034002098f, 0.0094950572f, -6.7239242e-05f, -0.0075746416f, -0.0020499325f, 0.0054217125f, 0.0031135236f, -0.0033912712f, -0.0033604759f, 0.0017078921f, 0.0030552838f, -0.00047496572f, -0.0024484061f, -0.00030308162f, 0.0017460365f,0.00069057313f, -0.0010939316f, -0.00078844035f, 0.00057491515f, 0.00070567406f, -0.00021722882f, -0.00053820928f, 9.4890293e-06f, 0.00035689832f, 8.2261082e-05f, -0.00020412984f, -9.9197161e-05f, 9.7089069e-05f, 7.8508412e-05f, -3.4874138e-05f, -4.7252855e-05f, 6.6880953e-06f, 2.0839274e-05f};
d@0 658 //
d@0 659 //float acc; // accumulator for MACs
d@0 660 // float* coeffp; // pointer to coefficients
d@0 661 // float* inputp; // pointer to input samples
d@0 662 //float* endp = &data[lenIn -1]; // pointer to last sample
d@0 663 //
d@0 664 //
d@0 665 // // apply the filter to each input sample
d@0 666 // for (size_t n = 0; n < lenIn; n++ )
d@0 667 //{
d@0 668 // // calculate output n
d@0 669 // coeffp = &coeffs[0];
d@0 670 // inputp = &data[filterlength - 1 + n];
d@0 671
d@0 672 // acc = 0.0f;
d@0 673 // for (int k = 0; k < filterlength; k++ )
d@0 674 // {
d@0 675 // if(inputp <= endp)
d@0 676 // acc += (*coeffp++) * (*inputp--);
d@0 677 // else
d@0 678 // {
d@0 679 // //When we reach the end of the buffer
d@0 680 // acc += (*coeffp++) * 0.0f;
d@0 681 // *inputp--;
d@0 682 // }
d@0 683 // }
d@0 684
d@0 685 // tempdata[n] = acc;
d@0 686 // }
d@0 687 //int ratio = (int) (currentSampleRate / futureSampleRate);
d@0 688 //
d@0 689 //lenOut = lenIn / ratio;
d@0 690
d@0 691 //out = new float[lenOut];
d@0 692 //memset(out, 0, sizeof(float)*lenOut);
d@0 693
d@0 694
d@0 695 //int idx = 0;
d@0 696 //for(size_t i = 0; i < lenIn; i = i + ratio)
d@0 697 //{
d@0 698 // out[idx] = tempdata[i];
d@0 699 // idx++;
d@0 700 //}
d@0 701
d@0 702 //if(tempdata != NULL)
d@0 703 //{
d@0 704 // delete[] tempdata;
d@0 705 // tempdata = nullptr;
d@0 706 //}
d@0 707 }
d@0 708
d@0 709 void AudioSourceFeatureExtractor::EnvelopeCurve(float* data, float* &out, size_t dataLen, float sampleRate)
d@0 710 {
d@0 711 float release = 0.02f;
d@0 712 float envelope = 0.0f;
d@0 713
d@0 714 float gr = exp(-1 / (sampleRate * release));
d@0 715
d@0 716 for (size_t i = 0; i < dataLen; i++)
d@0 717 {
d@0 718 float EnvIn = abs(data[i]);
d@0 719
d@0 720 if(envelope < EnvIn)
d@0 721 {
d@0 722 envelope = EnvIn;
d@0 723 }
d@0 724 else
d@0 725 {
d@0 726 envelope = envelope * gr;
d@0 727 envelope = envelope + (1 - gr) * EnvIn;
d@0 728 }
d@0 729
d@0 730 out[i] = envelope;
d@0 731 }
d@0 732 }
d@0 733
d@0 734 void AudioSourceFeatureExtractor::Normalise(float* data, float* &out, size_t len)
d@0 735 {
d@0 736 float maxvalue = std::numeric_limits<float>::lowest();
d@0 737
d@0 738 for(size_t i = 0; i < len; i++)
d@0 739 {
d@0 740 if(data[i] > maxvalue)
d@0 741 maxvalue = data[i];
d@0 742 }
d@0 743
d@0 744 for(size_t i = 0; i < len; i++)
d@0 745 {
d@0 746 out[i] = data[i] / maxvalue;
d@0 747 }
d@0 748 }
d@0 749
d@0 750 float AudioSourceFeatureExtractor::EntropyOfEnergy(float* data, size_t len)
d@0 751 {
d@0 752 float totalEnergy = 0.0;
d@0 753 float totalentropy = 0.0;
d@0 754 const int numSubFrames = 30;
d@0 755
d@0 756 //Calculate Total Energy and normalise it
d@0 757 for (size_t i =0; i < len; i++)
d@0 758 {
d@0 759 totalEnergy += (data[i] * data[i]);
d@0 760 }
d@0 761
d@0 762 int subFrameSize = len / numSubFrames;
d@0 763
d@0 764 for (size_t i = 0; i < numSubFrames; i++)
d@0 765 {
d@0 766 float val = 0.0;
d@0 767
d@0 768 for (size_t j = 0; j < (size_t)subFrameSize; j++)
d@0 769 {
d@0 770 val += (data[j + (i * subFrameSize)] * data[j + (i * subFrameSize)]);
d@0 771 }
d@0 772
d@0 773 float frameval = val / (totalEnergy + std::numeric_limits<float>::epsilon());
d@0 774
d@0 775 totalentropy += (frameval * Log2(frameval + std::numeric_limits<float>::epsilon()));
d@0 776 }
d@0 777
d@0 778 return (totalentropy * -1);
d@0 779 }
d@0 780
d@0 781 void AudioSourceFeatureExtractor::PDF_getResampleKrnl(std::vector<float> freqVec, float fsProc, int nfft, int nBin, std::vector<float> &outLogFreqVec, std::vector<std::vector<float>> &outKrnl)
d@0 782 {
d@0 783 float f1 = freqVec[0];
d@0 784 float f2 = freqVec[freqVec.size() - 1];
d@0 785
d@0 786 outLogFreqVec = LinSpace(log(f1), log(f2), nBin);
d@0 787 std::vector<float> f(nfft / 2, 0.0f);
d@0 788
d@0 789 for (size_t i = 0; i < (size_t)nBin; i++)
d@0 790 {
d@0 791 outKrnl.push_back(f);
d@0 792 }
d@0 793
d@0 794 for (size_t i = 0; i < outLogFreqVec.size(); i++)
d@0 795 {
d@0 796 outLogFreqVec[i] = exp(outLogFreqVec[i]);
d@0 797 }
d@0 798
d@0 799 for (size_t i = 1; i < (size_t)nBin - 2; i++)
d@0 800 {
d@0 801 float freqBinLin = outLogFreqVec[i] / (fsProc / nfft);
d@0 802
d@0 803 float nextBinUp = outLogFreqVec[i + 1] / (fsProc / nfft);
d@0 804
d@0 805 float nextBinDown = outLogFreqVec[i - 1] / (fsProc / nfft);
d@0 806
d@0 807 float filtWid = nextBinUp - nextBinDown;
d@0 808
d@0 809 if (filtWid <= 2)
d@0 810 {
d@0 811 // log resolution is finer than linear resolution
d@0 812 // linear interpolation of neighboring bins
d@0 813
d@0 814 float binDown = floor(freqBinLin);
d@0 815 float frac = freqBinLin - binDown;
d@0 816 std::vector<float> filt(nfft/2, 0.0f);
d@0 817 filt[((int)binDown) - 1] = 1 - frac;
d@0 818 filt[((int)binDown + 1) - 1] = frac;
d@0 819 outKrnl[i][((int)binDown) - 1] = filt[((int)binDown) - 1];
d@0 820 outKrnl[i][((int)binDown + 1) - 1] = filt[((int)binDown + 1) - 1];
d@0 821
d@0 822 }
d@0 823 else
d@0 824 {
d@0 825 float ceilNextBinDown = ceil(nextBinDown);
d@0 826 float floorFreqBinLin = floor(freqBinLin);
d@0 827 float ceilFreqBinLin = ceil(freqBinLin);
d@0 828 float floorNextBinUp = floor(nextBinUp);
d@0 829
d@0 830 std::vector<int> idxLow;
d@0 831 std::vector<int> idxHigh;
d@0 832 std::vector<int> filtIdx;
d@0 833
d@0 834 for (size_t j = (size_t)ceilNextBinDown; j <= (size_t)floorFreqBinLin; j++ )
d@0 835 {
d@0 836 idxLow.push_back(j);
d@0 837 }
d@0 838
d@0 839 for (size_t j = (size_t)ceilFreqBinLin; j <= (size_t)floorNextBinUp; j++ )
d@0 840 {
d@0 841 idxHigh.push_back(j);
d@0 842 }
d@0 843
d@0 844 std::vector<int>::iterator it;
d@0 845 it = std::unique (idxLow.begin(), idxLow.end());
d@0 846 idxLow.resize(std::distance(idxLow.begin(), it));
d@0 847
d@0 848 it = std::unique (idxHigh.begin(), idxHigh.end());
d@0 849 idxHigh.resize(std::distance(idxHigh.begin(), it));
d@0 850
d@0 851 filtIdx.reserve(idxLow.size() + idxHigh.size()); // preallocate memory
d@0 852 filtIdx.insert(filtIdx.end(), idxLow.begin(), idxLow.end());
d@0 853 filtIdx.insert(filtIdx.end(), idxHigh.begin(), idxHigh.end());
d@0 854
d@0 855 std::vector<float> rampUp;
d@0 856 std::vector<float> rampDown;
d@0 857 for (size_t j = 0; j < filtIdx.size(); j++)
d@0 858 {
d@0 859 rampUp.push_back(1 - (freqBinLin - filtIdx[j]) / (freqBinLin - nextBinDown));
d@0 860 rampDown.push_back(1 - (filtIdx[j] - freqBinLin) / (nextBinUp - freqBinLin));
d@0 861 }
d@0 862
d@0 863 std::vector<float> filt;
d@0 864
d@0 865 for (size_t j = 0; j < rampUp.size(); j++)
d@0 866 {
d@0 867 filt.push_back(min(rampUp[j], rampDown[j]));
d@0 868 }
d@0 869
d@0 870 float sumfilt = 0.0f;
d@0 871 for (size_t j = 0; j < filt.size(); j++)
d@0 872 {
d@0 873 sumfilt += filt[j];
d@0 874 }
d@0 875
d@0 876 for (size_t j = 0; j < filt.size(); j++)
d@0 877 {
d@0 878 filt[j] /= sumfilt;
d@0 879 }
d@0 880
d@0 881 for (size_t j = 0; j < filtIdx.size(); j++)
d@0 882 {
d@0 883 outKrnl[i][filtIdx[j] - 1] = filt[j];
d@0 884 }
d@0 885 }
d@0 886 }
d@0 887
d@0 888 // special routine for first bin
d@0 889 // get frequency in linear bins
d@0 890
d@0 891 float freqBinLin = outLogFreqVec[0] / (fsProc/(float)nfft);
d@0 892 float binDown = floor(freqBinLin);
d@0 893 float frac = freqBinLin - binDown;
d@0 894
d@0 895 std::vector<float> filt(nfft/2, 0.0f);
d@0 896 filt[((int)binDown) - 1] = 1 - frac;
d@0 897 filt[((int)binDown + 1) - 1] = frac;
d@0 898
d@0 899 outKrnl[0][((int)binDown) - 1] = filt[((int)binDown) - 1];
d@0 900 outKrnl[0][((int)binDown + 1) - 1] = filt[((int)binDown + 1) - 1];
d@0 901
d@0 902 // special routine for last bin
d@0 903 // get frequency in linear bins
d@0 904 freqBinLin = outLogFreqVec[outLogFreqVec.size() - 1] / (fsProc/nfft);
d@0 905
d@0 906 // get next lower bin
d@0 907 float nextBinDown = outLogFreqVec[outLogFreqVec.size() - 2] / (fsProc/nfft);
d@0 908
d@0 909 float ceilNextBinDown = ceil(nextBinDown); //Subtract by -1 because of array
d@0 910 float floorFreqBinLin = floor(freqBinLin);
d@0 911
d@0 912 std::vector<int> filtIdx;
d@0 913
d@0 914 for (size_t i = (size_t)ceilNextBinDown; i <= (size_t)floorFreqBinLin; i++ )
d@0 915 {
d@0 916 filtIdx.push_back(i);
d@0 917 }
d@0 918
d@0 919 std::vector<float> filt2;
d@0 920 for (size_t i = 0; i < filtIdx.size(); i++)
d@0 921 {
d@0 922 filt2.push_back(1 - (freqBinLin - filtIdx[i]) / (freqBinLin - nextBinDown));
d@0 923 }
d@0 924
d@0 925 float sumfilt = 0.0f;
d@0 926 for (size_t i = 0; i < filt2.size(); i++)
d@0 927 {
d@0 928 sumfilt += filt2[i];
d@0 929 }
d@0 930
d@0 931 for (size_t i = 0; i < filt2.size(); i++)
d@0 932 {
d@0 933 filt2[i] /= sumfilt;
d@0 934 }
d@0 935
d@0 936 for (size_t j = 0; j < filtIdx.size(); j++)
d@0 937 {
d@0 938 outKrnl[outKrnl.size() - 1][filtIdx[j] - 1] = filt2[j];
d@0 939 }
d@0 940 }
d@0 941
d@0 942 float AudioSourceFeatureExtractor::Log2(float n)
d@0 943 {
d@0 944 // log(n)/log(2) is log2.
d@0 945 return logf( n ) / logf( 2 );
d@0 946 }
d@0 947
d@0 948 int AudioSourceFeatureExtractor::Sign(float x)
d@0 949 {
d@0 950 int retValue = 0;
d@0 951 if (x >= 0)
d@0 952 retValue = 1;
d@0 953 if (x < 0)
d@0 954 retValue = -1;
d@0 955
d@0 956 return retValue;
d@0 957 }
d@0 958
d@0 959 std::vector<float> AudioSourceFeatureExtractor::LinSpace(float min, float max, int n)
d@0 960 {
d@0 961 std::vector<float> result;
d@0 962 // vector iterator
d@0 963 int iterator = 0;
d@0 964
d@0 965 for (int i = 0; i <= n-2; i++)
d@0 966 {
d@0 967 float temp = min + i*(max-min)/(floor((float)n) - 1);
d@0 968 result.insert(result.begin() + iterator, temp);
d@0 969 iterator += 1;
d@0 970 }
d@0 971
d@0 972 result.insert(result.begin() + iterator, max);
d@0 973 return result;
d@0 974 }
d@0 975
d@0 976 std::vector<float> AudioSourceFeatureExtractor::LogSpace(float min, float max, int n)
d@0 977 {
d@0 978 float logMin = log(min);
d@0 979 float logMax = log(max);
d@0 980 double delta = (logMax - logMin) / n;
d@0 981 double accDelta = 0;
d@0 982 std::vector<float> v;
d@0 983
d@0 984 for (int i = 0; i <= n; i++)
d@0 985 {
d@0 986 v.push_back((float) exp(logMin + accDelta));
d@0 987 accDelta += delta;// accDelta = delta * i
d@0 988 }
d@0 989
d@0 990 return v;
d@0 991 }
d@0 992