annotate Source/AudioSourceFeatureExtractor.cpp @ 15:585caf503ef5 tip

Tidy up for ROLI
author Geogaddi\David <d.m.ronan@qmul.ac.uk>
date Tue, 17 May 2016 18:50:19 +0100
parents 761289a660c6
children
rev   line source
d@0 1 /*
d@0 2 ==============================================================================
d@0 3
d@0 4 AudioSourceFeatureExtractor.cpp
d@0 5 Created: 11 Aug 2014 10:41:02am
d@0 6 Author: david.ronan
d@0 7
d@0 8 ==============================================================================
d@0 9 */
d@0 10
d@0 11 #include "AudioSourceFeatureExtractor.h"
d@0 12 #include "ObservationData.h"
d@0 13 #include <math.h>
d@0 14 #include <cmath>
d@0 15 #include <algorithm>
d@4 16 #include <vector>
d@7 17 #include <memory>
d@4 18
d@1 19 //#include <windows.h>
d@0 20
d@0 21 //============================================================================== AudioSourceFeatureExtractor
d@15 22 AudioSourceFeatureExtractor::AudioSourceFeatureExtractor()
d@0 23 {
d@4 24 m_fft = std::make_unique<FFTW>(FFTSIZE);
d@4 25 m_fInputBuffer = std::vector<float>(FFTSIZE, 0);;
d@4 26 m_fMagnitudeSpectrum = std::vector<float>(MAGNITUDESIZE, 0);
d@4 27 m_fPreviousMagnitudeSpectrum = std::vector<float>(MAGNITUDESIZE, 0);
d@4 28 m_OutReal = std::vector<float>(FFTSIZE, 0);
d@4 29 m_OutImag = std::vector<float>(FFTSIZE, 0);
d@0 30 }
d@0 31
d@0 32 //============================================================================== ~AudioSourceFeatureExtractor
d@0 33 AudioSourceFeatureExtractor::~AudioSourceFeatureExtractor()
d@0 34 {
d@0 35
d@0 36 }
d@0 37
d@0 38 //============================================================================== init
d@0 39 void AudioSourceFeatureExtractor::Initialise(float fSampleRate)
d@1 40 {
d@0 41 m_fSampleRate = fSampleRate;
d@1 42 m_iWriteIdx = 0;
d@0 43
d@0 44 for (int i = 1; i <= MAGNITUDESIZE; i++)
d@0 45 {
d@0 46 float win = ((fSampleRate / (2 * MAGNITUDESIZE)) * i);
d@0 47 m_fFreqBins.push_back(win);
d@0 48 }
d@0 49
d@0 50 m_MFCC.initMFFCvariables(12, MAGNITUDESIZE, m_fSampleRate);
d@10 51 m_SpectralContrast.initSpectralContrastVariables(FFTSIZE, m_fSampleRate);
d@0 52 }
d@0 53
d@0 54 //============================================================================== process
d@9 55 std::vector<ObservationData> AudioSourceFeatureExtractor::Process(const std::vector<float> data)
d@1 56 {
d@0 57 std::vector<ObservationData> observations = std::vector<ObservationData>();
d@0 58
d@4 59 m_fInputBuffer = std::vector<float>(FFTSIZE, 0);;
d@4 60 m_fMagnitudeSpectrum = std::vector<float>(MAGNITUDESIZE, 0);
d@4 61 m_fPreviousMagnitudeSpectrum = std::vector<float>(MAGNITUDESIZE, 0);
d@0 62 m_iWriteIdx = 0;
d@0 63
d@9 64 float perdiodicity = EstimatePerdiodicity(data);
d@0 65
d@9 66 float entropyofenergy = EntropyOfEnergy(data);
d@0 67
d@9 68 for (size_t n = 0; n < data.size(); n++)
d@0 69 {
d@0 70 m_fInputBuffer[m_iWriteIdx] = data[n];
d@0 71 m_iWriteIdx++;
d@0 72
d@0 73 //Do normal FFT
d@0 74
d@4 75 if (FFTSIZE == m_iWriteIdx)
d@0 76 {
d@0 77 //Get the peak amplitude rms and crest factor for the current frame
d@4 78 float peakvalue = 0;
d@4 79 float rms = 0;
d@12 80 float crestfactor;
d@0 81
d@4 82 for (int i = 0; i < FFTSIZE; i++)
d@0 83 {
d@4 84 if (m_fInputBuffer[i] > peakvalue)
d@0 85 peakvalue = abs(m_fInputBuffer[i]);
d@0 86
d@0 87 rms += (m_fInputBuffer[i] * m_fInputBuffer[i]);
d@0 88 }
d@1 89
d@0 90 rms /= FFTSIZE;
d@1 91 rms = sqrt(rms);
d@0 92
d@4 93 crestfactor = peakvalue / (rms + std::numeric_limits<float>::epsilon());
d@0 94
d@0 95 float ZCR = 0;
d@0 96 float inputBuffer2[FFTSIZE] = {0.0};
d@0 97
d@1 98 for (int i = 1; i < FFTSIZE; i++)
d@0 99 {
d@4 100 inputBuffer2[i] = m_fInputBuffer[i - 1];
d@0 101 }
d@0 102
d@1 103 for (int i = 0; i < FFTSIZE; i++)
d@0 104 {
d@0 105 ZCR += (abs(Sign(m_fInputBuffer[i]) - Sign(inputBuffer2[i])));
d@0 106 }
d@0 107
d@0 108 ZCR /= (2 * FFTSIZE);
d@0 109
d@0 110 //////////////////////////////////////////////////////////////////
d@1 111 for (int i = 0; i < FFTSIZE; i++)
d@0 112 {
d@4 113 float multiplier = static_cast<float>(0.5 * (1 - cos(2 * PI * i / (FFTSIZE - 1))));
d@0 114 m_fInputBuffer[i] = multiplier * m_fInputBuffer[i];
d@0 115 }
d@0 116
d@0 117 //Compute the FFT
d@4 118 m_OutReal = std::vector<float>(FFTSIZE, 0);
d@4 119 m_OutImag = std::vector<float>(FFTSIZE, 0);
d@4 120
d@0 121
d@0 122 m_fft->process(m_fInputBuffer, m_OutReal, m_OutImag);
d@0 123
d@0 124 //Compute the Magnitude
d@0 125 VectorDistance(m_OutReal, 1, m_OutImag, 1, m_fMagnitudeSpectrum, 1, MAGNITUDESIZE);
d@0 126
d@0 127 //Compute the spectral features
d@4 128 float centroid = 0;
d@4 129 float spread = 0;
d@4 130 float skewness = 0;
d@4 131 float kurtosis = 0;
d@0 132 float brightness = 0;
d@0 133 float rolloff85 = 0;
d@0 134 float rolloff95 = 0;
d@0 135 float spectralentropy = 0;
d@0 136 float flatness = 0;
d@0 137 float spectralcf = 0;
d@1 138 float spectralflux = 0;
d@0 139 std::vector<float> mfccs;
d@10 140 std::vector<float> spectralContrast;
d@10 141 std::vector<float> spectralValleys;
d@0 142
d@1 143 SpectralFeatures(m_fMagnitudeSpectrum, m_fPreviousMagnitudeSpectrum, MAGNITUDESIZE, centroid, spread, skewness, kurtosis, brightness, rolloff85, rolloff95, spectralentropy, flatness, spectralcf, spectralflux);
d@1 144
d@0 145 m_MFCC.ComputeMFCC(m_fMagnitudeSpectrum, mfccs);
d@0 146
d@10 147 m_SpectralContrast.computeSpectralContrast(m_fMagnitudeSpectrum, spectralContrast, spectralValleys);
d@10 148
d@0 149 //Update for Spectral Flux
d@4 150 for (int i = 0; i < MAGNITUDESIZE; i++)
d@0 151 {
d@0 152 m_fPreviousMagnitudeSpectrum[i] = m_fMagnitudeSpectrum[i];
d@1 153 }
d@0 154
d@0 155 //50% Hop size
d@4 156 for (int i = MAGNITUDESIZE; i < FFTSIZE; i++)
d@0 157 {
d@0 158 m_fInputBuffer[i - MAGNITUDESIZE] = m_fInputBuffer[i];
d@0 159 m_iWriteIdx = MAGNITUDESIZE;
d@0 160 }
d@1 161
d@0 162 //create an observation for this window and push it to the list of observations for this 30 sec. audio clip
d@10 163 ObservationData newObservation = ObservationData(rms, peakvalue, crestfactor, ZCR, centroid, spread, skewness, kurtosis, brightness, rolloff85, rolloff95, spectralentropy, flatness, spectralcf, spectralflux, mfccs, spectralContrast, spectralValleys, perdiodicity, entropyofenergy);
d@1 164 observations.push_back(newObservation);
d@0 165 }
d@0 166 }
d@1 167
d@0 168 return observations;
d@0 169 }
d@0 170
d@0 171 void AudioSourceFeatureExtractor::Finalize()
d@0 172 {
d@0 173 }
d@0 174
d@4 175 void AudioSourceFeatureExtractor::VectorDistance(std::vector<float> vIn1, int stride1, std::vector<float> vIn2, int stride2, std::vector<float> &vOut, int strideOut, size_t nElements)
d@0 176 {
d@4 177 //Compute remaining samples
d@0 178 for (size_t i = 0; i < nElements; i++)
d@0 179 {
d@4 180 vOut[i * strideOut] = sqrt(vIn1[i * stride1] * vIn1[i * stride1] + vIn2[i * stride2] * vIn2[i * stride2]);
d@0 181 }
d@0 182 }
d@0 183
d@4 184 void AudioSourceFeatureExtractor::SpectralFeatures(std::vector<float> &magnitude, std::vector<float> &prevmagnitude, size_t windowSize, float& centroid, float& spread, float& skew, float& kurtosis, float& brightness, float& rolloff95, float& rolloff85, float& spectralentropy, float& flatness, float& spectralcf, float& spectralflux)
d@0 185 {
d@0 186 float summagbin = 0;
d@0 187 float summag = 0;
d@0 188 float sumprevmag = 0;
d@0 189 float summagspread = 0;
d@0 190 float summagskewness = 0;
d@0 191 float summagkurtosis = 0;
d@0 192 float sumbrightness = 0;
d@0 193 float maxmag = 0;
d@0 194
d@0 195 float geo = 1;
d@0 196 float totalenergy = 0;
d@0 197 float currEnergy95 = 0;
d@0 198 float currEnergy85 = 0;
d@0 199 int countFFT85 = 0;
d@0 200 int countFFT95 = 0;
d@0 201
d@0 202 float normalisedMagSumEntropy = 0;
d@0 203
d@0 204 //Centroid
d@0 205 //////////////////////////////////////////////////////
d@0 206
d@0 207 for (size_t i = 0; i < windowSize; i++)
d@0 208 {
d@1 209 //Moments
d@0 210 summagbin += (m_fFreqBins[i] * magnitude[i]);
d@0 211 summag += magnitude[i];
d@0 212 sumprevmag += prevmagnitude[i];
d@0 213
d@0 214 //Spectral Crest factor
d@4 215 if (magnitude[i] > maxmag)
d@0 216 {
d@0 217 maxmag = magnitude[i];
d@0 218 }
d@0 219
d@0 220 float energy = magnitude[i] * magnitude[i];
d@1 221
d@0 222 //Roll off + Flatness
d@1 223
d@0 224 totalenergy += energy;
d@4 225 float exponent = static_cast<float>(1.0 / static_cast<float>(windowSize));
d@1 226
d@4 227 if (energy != 0)
d@0 228 {
d@1 229 geo *= pow(energy, exponent);
d@0 230 }
d@0 231 }
d@0 232
d@0 233 summag += std::numeric_limits<float>::epsilon();
d@0 234
d@0 235 centroid = summagbin / summag;
d@0 236
d@0 237 /////////////////////////////////////////////////////////
d@0 238
d@0 239 //Spread
d@0 240 //////////////////////////////////////////////////////
d@0 241
d@0 242 for (size_t i = 0; i < windowSize; i++)
d@0 243 {
d@0 244 float norm = magnitude[i] / (summag + std::numeric_limits<float>::epsilon());
d@0 245 float normprev = prevmagnitude[i] / (sumprevmag + std::numeric_limits<float>::epsilon());
d@0 246
d@0 247 //Spectral Flux
d@0 248 spectralflux += ((norm - normprev) * (norm - normprev));
d@0 249
d@1 250 //Entropy
d@0 251 normalisedMagSumEntropy += (norm * log(norm + std::numeric_limits<float>::epsilon()));
d@0 252
d@0 253 //Moments
d@4 254 summagspread += pow((m_fFreqBins[i] - centroid), 2) * norm;
d@4 255 summagskewness += pow((m_fFreqBins[i] - centroid), 3) * norm;
d@4 256 summagkurtosis += pow((m_fFreqBins[i] - centroid), 4) * norm;
d@0 257
d@0 258 //Brightness
d@0 259 if (m_fFreqBins[i] >= 1500)
d@0 260 {
d@0 261 sumbrightness += magnitude[i];
d@0 262 }
d@0 263 }
d@0 264
d@0 265 spread = sqrt(summagspread);
d@0 266
d@0 267 /////////////////////////////////////////////////////////
d@0 268
d@0 269 //Skewness
d@0 270 //////////////////////////////////////////////////////
d@0 271
d@0 272 float spreadtemp = spread;
d@0 273 spreadtemp += std::numeric_limits<float>::epsilon();
d@0 274
d@4 275 skew = summagskewness / pow(spreadtemp, 3);
d@0 276
d@0 277 /////////////////////////////////////////////////////////
d@0 278
d@0 279 //Kurtosis
d@0 280 //////////////////////////////////////////////////////
d@0 281
d@0 282 kurtosis = summagkurtosis / pow(spreadtemp, 4);
d@0 283
d@0 284 /////////////////////////////////////////////////////////
d@0 285
d@0 286 //Brightness
d@0 287 //////////////////////////////////////////////////////
d@0 288
d@0 289 brightness = sumbrightness / summag;
d@0 290
d@0 291 /////////////////////////////////////////////////////////
d@0 292
d@0 293 //Roll off 85% - 95%
d@0 294 /////////////////////////////////////////////////////////
d@0 295
d@3 296 while ((currEnergy95 <= 0.95 * totalenergy) && (countFFT95 < static_cast<int>(windowSize)))
d@0 297 {
d@0 298 currEnergy95 += (magnitude[countFFT95] * magnitude[countFFT95]);
d@0 299 countFFT95 += 1;
d@0 300
d@0 301 if (currEnergy85 <= 0.85 * totalenergy)
d@0 302 {
d@0 303 currEnergy85 += (magnitude[countFFT85] * magnitude[countFFT85]);
d@0 304 countFFT85 += 1;
d@0 305 }
d@0 306 }
d@0 307
d@0 308 countFFT85 -= 1;
d@0 309 countFFT95 -= 1;
d@0 310
d@0 311 //Normalization
d@0 312 if (currEnergy85 <= 0)
d@0 313 {
d@0 314 //If we have silence then our value should be 0.
d@0 315 rolloff85 = 0;
d@0 316 }
d@0 317 else
d@0 318 {
d@3 319 rolloff85 = (static_cast<float>(countFFT85) / static_cast<float>(windowSize));
d@0 320 }
d@0 321
d@0 322 //Normalization
d@0 323 if (currEnergy95 <= 0)
d@0 324 {
d@0 325 //If we have silence then our value should be 0.
d@0 326 rolloff95 = 0;
d@0 327 }
d@0 328 else
d@0 329 {
d@3 330 rolloff95 = (static_cast<float>(countFFT95) / static_cast<float>(windowSize));
d@0 331 }
d@0 332
d@0 333 /////////////////////////////////////////////////////
d@0 334
d@0 335 //Spectral Entropy
d@0 336 /////////////////////////////////////////////////////////
d@0 337
d@3 338 spectralentropy = static_cast<float>((normalisedMagSumEntropy / log(static_cast<float>(windowSize))) * - 1);
d@0 339
d@0 340 /////////////////////////////////////////////////////////
d@0 341
d@0 342 /////////////////////////////////////////////////////
d@0 343
d@0 344 //Flatness
d@0 345 /////////////////////////////////////////////////////////
d@4 346 if (totalenergy != 0 && geo != 0)
d@0 347 {
d@3 348 flatness = geo / (totalenergy / static_cast<float>(windowSize));
d@0 349 }
d@0 350 else
d@0 351 {
d@0 352 flatness = 0;
d@0 353 }
d@0 354
d@0 355 /////////////////////////////////////////////////////////
d@0 356
d@0 357 //Spectral Crest Factor
d@0 358 /////////////////////////////////////////////////////////
d@0 359
d@4 360 if (summag != 0)
d@0 361 {
d@0 362 spectralcf = maxmag / summag;
d@0 363 }
d@0 364 else
d@0 365 {
d@0 366 spectralcf = 0;
d@0 367 }
d@0 368 }
d@0 369
d@8 370 void AudioSourceFeatureExtractor::XCorr(std::vector<float>& output, std::vector<float> input1, std::vector<float> input2, size_t maxLag = 0)
d@0 371 {
d@4 372 // TODO: adapt for different sizes
d@0 373
d@4 374 // find next power of 2 for efficient FFT
d@4 375 size_t fftSize = 1;
d@8 376 while (fftSize < 2 * static_cast<size_t>(input1.size()) - 1)
d@4 377 {
d@4 378 fftSize *= 2;
d@4 379 }
d@1 380
d@0 381 FFTW fft1 = FFTW(fftSize);
d@0 382 FFTW fft2 = FFTW(fftSize);
d@1 383
d@4 384 // allocate space for FFTW processing
d@4 385 std::vector<float> in1 = std::vector<float>(fftSize, 0);;
d@4 386 std::vector<float> in2 = std::vector<float>(fftSize, 0);;
d@0 387
d@4 388 // output from FFT is only up to Nyquist frequency
d@4 389 std::vector<float> out1Real = std::vector<float>(fftSize, 0);
d@4 390 std::vector<float> out1Imag = std::vector<float>(fftSize, 0);
d@4 391 std::vector<float> out2Real = std::vector<float>(fftSize, 0);
d@4 392 std::vector<float> out2Imag = std::vector<float>(fftSize, 0);
d@0 393
d@4 394 // copy input to allocated arrays
d@8 395 for (size_t i = 0; i < static_cast<size_t>(input1.size()); ++i)
d@4 396 {
d@4 397 in1[i] = input1[i];
d@4 398 in2[i] = input2[i];
d@4 399 }
d@0 400
d@0 401 fft1.process(in1, out1Real, out1Imag);
d@0 402 fft2.process(in2, out2Real, out2Imag);
d@0 403
d@4 404 // multiply out1*conj(out2) in FFT domain
d@4 405 fftwf_complex* product = static_cast<fftwf_complex*>(fftwf_malloc(sizeof(fftwf_complex) * fftSize));
d@4 406 for (size_t i = 0; i < fftSize; ++i)
d@4 407 {
d@4 408 product[i][0] = (out1Real[i] * out2Real[i] + out1Imag[i] * out2Imag[i]);
d@4 409 product[i][1] = (out1Imag[i] * out2Real[i] - out1Real[i] * out2Imag[i]);
d@4 410 }
d@0 411
d@4 412 // compute IFFT
d@0 413
d@0 414 // do FFT, adapted from iTraktorLib "FFTW.h"
d@4 415 fftw_iodim dim;
d@4 416 dim.n = fftSize;
d@4 417 dim.is = 1;
d@4 418 dim.os = 1;
d@0 419
d@4 420 fftwf_plan plan;
d@0 421
d@4 422 float* result = static_cast<float*>(fftwf_malloc(sizeof(float) * fftSize));
d@4 423 plan = fftwf_plan_guru_dft_c2r(1, &dim, 0, nullptr, product, result, FFTW_ESTIMATE);
d@4 424 fftwf_execute_dft_c2r(plan, product, result);
d@4 425 fftwf_destroy_plan(plan);
d@0 426
d@4 427 // copy real part of result back to output array, normalize by FFT size
d@8 428 if (maxLag == 0 || maxLag >= static_cast<size_t>(input1.size()))
d@4 429 {
d@8 430 maxLag = static_cast<size_t>(input1.size()) - 1;
d@4 431 }
d@0 432
d@8 433 output = std::vector<float>(2 * maxLag + 1, 0);
d@0 434
d@4 435 for (size_t i = fftSize - maxLag; i < fftSize; ++i)
d@4 436 {
d@4 437 output[i - fftSize + maxLag] = result[i] / fftSize;
d@4 438 }
d@0 439
d@4 440 for (unsigned i = 0; i <= static_cast<unsigned>(maxLag); ++i)
d@4 441 {
d@4 442 output[i + maxLag] = result[i] / fftSize;
d@4 443 }
d@0 444
d@0 445 fftwf_free(result);
d@0 446 fftwf_free(product);
d@0 447 }
d@0 448
d@9 449 float AudioSourceFeatureExtractor::EstimatePerdiodicity(std::vector<float> data)
d@0 450 {
d@12 451 float periodicity;
d@8 452
d@9 453 std::vector<float> downsampleddata = std::vector<float>(data.size(), 0);
d@8 454
d@8 455 std::vector<float> xcorr;
d@0 456
d@9 457 for (size_t k = 0; k < data.size(); k++)
d@0 458 {
d@0 459 downsampleddata[k] = data[k];
d@0 460 }
d@8 461
d@8 462 EnvelopeCurve(downsampleddata, downsampleddata, m_fSampleRate);
d@0 463
d@3 464 int startLag = static_cast<int>(floor(static_cast<float>(60.0 / 480.0 * m_fSampleRate)));
d@3 465 int endLag = static_cast<int>(floor(static_cast<float>(60.0 / 30.0 * m_fSampleRate)));
d@8 466 int thresh = static_cast<int>(floor(static_cast<float>(0.2 * m_fSampleRate)));
d@0 467
d@12 468
d@8 469 XCorr(xcorr, downsampleddata, downsampleddata, endLag);
d@12 470 size_t xcorrlen = xcorr.size();
d@0 471
d@3 472 int i = static_cast<int>(floor(xcorrlen / 2));
d@0 473
d@0 474 std::vector<float> temp;
d@4 475 for (size_t j = i; j < xcorrlen; j++)
d@0 476 {
d@0 477 temp.push_back(xcorr[j]);
d@0 478 }
d@0 479
d@0 480 std::vector<float> temp2;
d@4 481 for (size_t j = static_cast<size_t>(std::max(1, startLag)); j < static_cast<size_t>(std::min(endLag, static_cast<int>(temp.size()))); j++)
d@0 482 {
d@0 483 temp2.push_back(temp[j]);
d@0 484 }
d@0 485
d@0 486 int maxIdx = 0;
d@0 487 float maxVal = std::numeric_limits<float>::lowest();
d@4 488 for (size_t j = 0; j < temp2.size(); j++)
d@0 489 {
d@4 490 if (temp2[j] > maxVal)
d@0 491 {
d@0 492 maxIdx = j;
d@0 493 maxVal = temp2[j];
d@0 494 }
d@0 495 }
d@0 496
d@4 497 int from = std::max(1, maxIdx - thresh);
d@4 498 int to = std::min(static_cast<int>(temp2.size()), maxIdx + thresh);
d@0 499
d@1 500 float minValInRange = std::numeric_limits<float>::max();
d@0 501
d@9 502 for (size_t j = static_cast<size_t>(from); j < static_cast<size_t>(to); j++)
d@1 503 {
d@4 504 if (temp2[j] < minValInRange)
d@0 505 {
d@0 506 minValInRange = temp2[j];
d@0 507 }
d@0 508 }
d@1 509
d@4 510 periodicity = (maxVal - minValInRange);
d@0 511
d@12 512 //std::string dave = "Periodicity " + std::to_string(periodicity) + "\n";
d@1 513 //OutputDebugString(dave.c_str());
d@0 514
d@0 515 return periodicity;
d@0 516 }
d@0 517
d@8 518 void AudioSourceFeatureExtractor::EnvelopeCurve(std::vector<float> data, std::vector<float> & out, float sampleRate)
d@0 519 {
d@4 520 float release = 0.02f;
d@0 521 float envelope = 0.0f;
d@0 522
d@1 523 float gr = exp(-1 / (sampleRate * release));
d@0 524
d@8 525 for (size_t i = 0; i < data.size(); i++)
d@1 526 {
d@0 527 float EnvIn = abs(data[i]);
d@0 528
d@4 529 if (envelope < EnvIn)
d@0 530 {
d@0 531 envelope = EnvIn;
d@0 532 }
d@0 533 else
d@0 534 {
d@0 535 envelope = envelope * gr;
d@0 536 envelope = envelope + (1 - gr) * EnvIn;
d@1 537 }
d@1 538
d@1 539 out[i] = envelope;
d@0 540 }
d@0 541 }
d@0 542
d@9 543 float AudioSourceFeatureExtractor::EntropyOfEnergy(std::vector<float> data)
d@0 544 {
d@0 545 float totalEnergy = 0.0;
d@0 546 float totalentropy = 0.0;
d@0 547 const int numSubFrames = 30;
d@0 548
d@0 549 //Calculate Total Energy and normalise it
d@9 550 for (size_t i = 0; i < data.size(); i++)
d@0 551 {
d@0 552 totalEnergy += (data[i] * data[i]);
d@0 553 }
d@0 554
d@9 555 int subFrameSize = data.size() / numSubFrames;
d@0 556
d@0 557 for (size_t i = 0; i < numSubFrames; i++)
d@0 558 {
d@0 559 float val = 0.0;
d@0 560
d@3 561 for (size_t j = 0; j < static_cast<size_t>(subFrameSize); j++)
d@0 562 {
d@0 563 val += (data[j + (i * subFrameSize)] * data[j + (i * subFrameSize)]);
d@0 564 }
d@0 565
d@0 566 float frameval = val / (totalEnergy + std::numeric_limits<float>::epsilon());
d@0 567
d@0 568 totalentropy += (frameval * Log2(frameval + std::numeric_limits<float>::epsilon()));
d@0 569 }
d@0 570
d@0 571 return (totalentropy * -1);
d@0 572 }
d@0 573
d@1 574 float AudioSourceFeatureExtractor::Log2(float n)
d@1 575 {
d@1 576 // log(n)/log(2) is log2.
d@4 577 return logf(n) / logf(2);
d@0 578 }
d@0 579
d@0 580 int AudioSourceFeatureExtractor::Sign(float x)
d@0 581 {
d@0 582 int retValue = 0;
d@0 583 if (x >= 0)
d@0 584 retValue = 1;
d@0 585 if (x < 0)
d@0 586 retValue = -1;
d@0 587
d@0 588 return retValue;
d@0 589 }
d@0 590
d@0 591 std::vector<float> AudioSourceFeatureExtractor::LinSpace(float min, float max, int n)
d@0 592 {
d@0 593 std::vector<float> result;
d@0 594 // vector iterator
d@0 595 int iterator = 0;
d@0 596
d@4 597 for (int i = 0; i <= n - 2; i++)
d@0 598 {
d@12 599 float temp = min + i * (max - min) / (floor(static_cast<float>(n)) - 1);
d@0 600 result.insert(result.begin() + iterator, temp);
d@0 601 iterator += 1;
d@0 602 }
d@0 603
d@0 604 result.insert(result.begin() + iterator, max);
d@0 605 return result;
d@0 606 }
d@0 607
d@0 608 std::vector<float> AudioSourceFeatureExtractor::LogSpace(float min, float max, int n)
d@0 609 {
d@0 610 float logMin = log(min);
d@0 611 float logMax = log(max);
d@0 612 double delta = (logMax - logMin) / n;
d@0 613 double accDelta = 0;
d@0 614 std::vector<float> v;
d@0 615
d@0 616 for (int i = 0; i <= n; i++)
d@0 617 {
d@4 618 v.push_back(static_cast<float>(exp(logMin + accDelta)));
d@0 619 accDelta += delta;// accDelta = delta * i
d@0 620 }
d@0 621
d@0 622 return v;
d@0 623 }
d@8 624
d@8 625 //void AudioSourceFeatureExtractor::ConvolveFunction(float* & z, float* x, float* y, size_t& lenz, size_t lenx, size_t leny)
d@8 626 //{
d@8 627 // // computes cross-correlation of two vectors, takes 2 vectors of the same length and computes 2*length-1 lags
d@8 628 //
d@8 629 // //float *zptr, s, *xp, *yp;
d@8 630 // //size_t n_lo, n_hi;
d@8 631 //
d@8 632 // //lenz = lenx + leny - 1;
d@8 633 //
d@8 634 // //z = new float[lenz];
d@8 635 //
d@8 636 // //zptr = z;
d@8 637 //
d@8 638 // //for (size_t i = 0; i < lenz; i++)
d@8 639 // //{
d@8 640 // // s = 0.0;
d@8 641 // // n_lo = 0 > (i - leny + 1) ? 0 : (i - leny + 1);
d@8 642 // // n_hi = (lenx - 1) < i ? (lenx - 1) : i;
d@8 643 // // xp = &x[0] + n_lo;
d@8 644 // // yp = &y[0] + i - n_lo;
d@8 645 //
d@8 646 // // for (size_t n = n_lo; n <= n_hi; n++)
d@8 647 // // {
d@8 648 // // s += *xp * *yp;
d@8 649 // // xp++;
d@8 650 // // yp--;
d@8 651 // // }
d@8 652 //
d@8 653 // // *zptr = s;
d@8 654 // // zptr++;
d@8 655 // //}
d@8 656 //}
d@8 657
d@8 658 //void AudioSourceFeatureExtractor::DownSampler(float* data, float* & out, size_t lenIn, size_t& lenOut, float currentSampleRate, float futureSampleRate)
d@8 659 //{
d@8 660 // ////Low pass our data before we decimate
d@8 661 // //const int filterlength = 101;
d@8 662 // //float* tempdata = new float[lenIn];
d@8 663 // //memset(tempdata, 0, sizeof(float)*lenIn);
d@8 664 //
d@8 665 // ////Coefficients were taken from Matlab
d@8 666 // //float coeffs[filterlength] ={2.0839274e-05f, 6.6880953e-06f,-4.7252855e-05f, -3.4874138e-05f, 7.8508412e-05f,9.7089069e-05f,-9.9197161e-05f, -0.00020412984f, 8.2261082e-05f, 0.00035689832f, 9.4890293e-06f, -0.00053820928f, -0.00021722882f, 0.00070567406f, 0.00057491515f, -0.00078844035f, -0.0010939316f, 0.00069057313f, 0.0017460365f, -0.00030308162f, -0.0024484061f, -0.00047496572f, 0.0030552838f, 0.0017078921f, -0.0033604759f, -0.0033912712f, 0.0031135236f, 0.0054217125f, -0.0020499325f, -0.0075746416f, -6.7239242e-05f, 0.0094950572f, 0.0034002098f, -0.010703080f, -0.0079918290f, 0.010610434f, 0.013732497f, -0.0085344436f, -0.020346783f, 0.0036776769f, 0.027405130f, 0.0050050775f, -0.034361813f, -0.019287759f, 0.040615436f, 0.043452863f,-0.045583628f, -0.093336113f, 0.048780452f, 0.31394306f, 0.45011634f, 0.31394306f, 0.048780452f,-0.093336113f,-0.045583628f, 0.043452863f,0.040615436f,-0.019287759f, -0.034361813f, 0.0050050775f, 0.027405130f,0.0036776769f,-0.020346783f, -0.0085344436f, 0.013732497f, 0.010610434f, -0.0079918290f, -0.010703080f, 0.0034002098f, 0.0094950572f, -6.7239242e-05f, -0.0075746416f, -0.0020499325f, 0.0054217125f, 0.0031135236f, -0.0033912712f, -0.0033604759f, 0.0017078921f, 0.0030552838f, -0.00047496572f, -0.0024484061f, -0.00030308162f, 0.0017460365f,0.00069057313f, -0.0010939316f, -0.00078844035f, 0.00057491515f, 0.00070567406f, -0.00021722882f, -0.00053820928f, 9.4890293e-06f, 0.00035689832f, 8.2261082e-05f, -0.00020412984f, -9.9197161e-05f, 9.7089069e-05f, 7.8508412e-05f, -3.4874138e-05f, -4.7252855e-05f, 6.6880953e-06f, 2.0839274e-05f};
d@8 667 // //
d@8 668 // //float acc; // accumulator for MACs
d@8 669 // // float* coeffp; // pointer to coefficients
d@8 670 // // float* inputp; // pointer to input samples
d@8 671 // //float* endp = &data[lenIn -1]; // pointer to last sample
d@8 672 // //
d@8 673 // //
d@8 674 // // // apply the filter to each input sample
d@8 675 // // for (size_t n = 0; n < lenIn; n++ )
d@8 676 // //{
d@8 677 // // // calculate output n
d@8 678 // // coeffp = &coeffs[0];
d@8 679 // // inputp = &data[filterlength - 1 + n];
d@8 680 //
d@8 681 // // acc = 0.0f;
d@8 682 // // for (int k = 0; k < filterlength; k++ )
d@8 683 // // {
d@8 684 // // if(inputp <= endp)
d@8 685 // // acc += (*coeffp++) * (*inputp--);
d@8 686 // // else
d@8 687 // // {
d@8 688 // // //When we reach the end of the buffer
d@8 689 // // acc += (*coeffp++) * 0.0f;
d@8 690 // // *inputp--;
d@8 691 // // }
d@8 692 // // }
d@8 693 //
d@8 694 // // tempdata[n] = acc;
d@8 695 // // }
d@8 696 // //int ratio = (int) (currentSampleRate / futureSampleRate);
d@8 697 // //
d@8 698 // //lenOut = lenIn / ratio;
d@8 699 //
d@8 700 // //out = new float[lenOut];
d@8 701 // //memset(out, 0, sizeof(float)*lenOut);
d@8 702 //
d@8 703 //
d@8 704 // //int idx = 0;
d@8 705 // //for(size_t i = 0; i < lenIn; i = i + ratio)
d@8 706 // //{
d@8 707 // // out[idx] = tempdata[i];
d@8 708 // // idx++;
d@8 709 // //}
d@8 710 //
d@8 711 // //if(tempdata != NULL)
d@8 712 // //{
d@8 713 // // delete[] tempdata;
d@8 714 // // tempdata = nullptr;
d@8 715 // //}
d@8 716 //}
d@8 717
d@8 718 //void AudioSourceFeatureExtractor::Normalise(float* data, float* & out, size_t len)
d@8 719 //{
d@8 720 // //float maxvalue = std::numeric_limits<float>::lowest();
d@8 721 //
d@8 722 // //for (size_t i = 0; i < len; i++)
d@8 723 // //{
d@8 724 // // if (data[i] > maxvalue)
d@8 725 // // maxvalue = data[i];
d@8 726 // //}
d@8 727 //
d@8 728 // //for (size_t i = 0; i < len; i++)
d@8 729 // //{
d@8 730 // // out[i] = data[i] / maxvalue;
d@8 731 // //}
d@8 732 //}
d@8 733
d@8 734 //void AudioSourceFeatureExtractor::PDF_getResampleKrnl(std::vector<float> freqVec, float fsProc, int nfft, int nBin, std::vector<float>& outLogFreqVec, std::vector<std::vector<float>>& outKrnl)
d@8 735 //{
d@8 736 // //float f1 = freqVec[0];
d@8 737 // //float f2 = freqVec[freqVec.size() - 1];
d@8 738 //
d@8 739 // //outLogFreqVec = LinSpace(log(f1), log(f2), nBin);
d@8 740 // //std::vector<float> f(nfft / 2, 0.0f);
d@8 741 //
d@8 742 // //for (size_t i = 0; i < static_cast<size_t>(nBin); i++)
d@8 743 // //{
d@8 744 // // outKrnl.push_back(f);
d@8 745 // //}
d@8 746 //
d@8 747 // //for (size_t i = 0; i < outLogFreqVec.size(); i++)
d@8 748 // //{
d@8 749 // // outLogFreqVec[i] = exp(outLogFreqVec[i]);
d@8 750 // //}
d@8 751 //
d@8 752 // //for (size_t i = 1; i < static_cast<size_t>(nBin) - 2; i++)
d@8 753 // //{
d@8 754 // // float freqBinLin = outLogFreqVec[i] / (fsProc / nfft);
d@8 755 //
d@8 756 // // float nextBinUp = outLogFreqVec[i + 1] / (fsProc / nfft);
d@8 757 //
d@8 758 // // float nextBinDown = outLogFreqVec[i - 1] / (fsProc / nfft);
d@8 759 //
d@8 760 // // float filtWid = nextBinUp - nextBinDown;
d@8 761 //
d@8 762 // // if (filtWid <= 2)
d@8 763 // // {
d@8 764 // // // log resolution is finer than linear resolution
d@8 765 // // // linear interpolation of neighboring bins
d@8 766 //
d@8 767 // // float binDown = floor(freqBinLin);
d@8 768 // // float frac = freqBinLin - binDown;
d@8 769 // // std::vector<float> filt(nfft / 2, 0.0f);
d@8 770 // // filt[((int)binDown) - 1] = 1 - frac;
d@8 771 // // filt[((int)binDown + 1) - 1] = frac;
d@8 772 // // outKrnl[i][((int)binDown) - 1] = filt[((int)binDown) - 1];
d@8 773 // // outKrnl[i][((int)binDown + 1) - 1] = filt[((int)binDown + 1) - 1];
d@8 774 // // }
d@8 775 // // else
d@8 776 // // {
d@8 777 // // float ceilNextBinDown = ceil(nextBinDown);
d@8 778 // // float floorFreqBinLin = floor(freqBinLin);
d@8 779 // // float ceilFreqBinLin = ceil(freqBinLin);
d@8 780 // // float floorNextBinUp = floor(nextBinUp);
d@8 781 //
d@8 782 // // std::vector<int> idxLow;
d@8 783 // // std::vector<int> idxHigh;
d@8 784 // // std::vector<int> filtIdx;
d@8 785 //
d@8 786 // // for (size_t j = (size_t)ceilNextBinDown; j <= (size_t)floorFreqBinLin; j++)
d@8 787 // // {
d@8 788 // // idxLow.push_back(j);
d@8 789 // // }
d@8 790 //
d@8 791 // // for (size_t j = (size_t)ceilFreqBinLin; j <= (size_t)floorNextBinUp; j++)
d@8 792 // // {
d@8 793 // // idxHigh.push_back(j);
d@8 794 // // }
d@8 795 //
d@8 796 // // std::vector<int>::iterator it;
d@8 797 // // it = std::unique(idxLow.begin(), idxLow.end());
d@8 798 // // idxLow.resize(std::distance(idxLow.begin(), it));
d@8 799 //
d@8 800 // // it = std::unique(idxHigh.begin(), idxHigh.end());
d@8 801 // // idxHigh.resize(std::distance(idxHigh.begin(), it));
d@8 802 //
d@8 803 // // filtIdx.reserve(idxLow.size() + idxHigh.size()); // preallocate memory
d@8 804 // // filtIdx.insert(filtIdx.end(), idxLow.begin(), idxLow.end());
d@8 805 // // filtIdx.insert(filtIdx.end(), idxHigh.begin(), idxHigh.end());
d@8 806 //
d@8 807 // // std::vector<float> rampUp;
d@8 808 // // std::vector<float> rampDown;
d@8 809 // // for (size_t j = 0; j < filtIdx.size(); j++)
d@8 810 // // {
d@8 811 // // rampUp.push_back(1 - (freqBinLin - filtIdx[j]) / (freqBinLin - nextBinDown));
d@8 812 // // rampDown.push_back(1 - (filtIdx[j] - freqBinLin) / (nextBinUp - freqBinLin));
d@8 813 // // }
d@8 814 //
d@8 815 // // std::vector<float> filt;
d@8 816 //
d@8 817 // // for (size_t j = 0; j < rampUp.size(); j++)
d@8 818 // // {
d@8 819 // // filt.push_back(std::min(rampUp[j], rampDown[j]));
d@8 820 // // }
d@8 821 //
d@8 822 // // float sumfilt = 0.0f;
d@8 823 // // for (size_t j = 0; j < filt.size(); j++)
d@8 824 // // {
d@8 825 // // sumfilt += filt[j];
d@8 826 // // }
d@8 827 //
d@8 828 // // for (size_t j = 0; j < filt.size(); j++)
d@8 829 // // {
d@8 830 // // filt[j] /= sumfilt;
d@8 831 // // }
d@8 832 //
d@8 833 // // for (size_t j = 0; j < filtIdx.size(); j++)
d@8 834 // // {
d@8 835 // // outKrnl[i][filtIdx[j] - 1] = filt[j];
d@8 836 // // }
d@8 837 // // }
d@8 838 // //}
d@8 839 //
d@8 840 // // special routine for first bin
d@8 841 // // get frequency in linear bins
d@8 842 //
d@8 843 // //float freqBinLin = outLogFreqVec[0] / (fsProc / (float)nfft);
d@8 844 // //float binDown = floor(freqBinLin);
d@8 845 // //float frac = freqBinLin - binDown;
d@8 846 //
d@8 847 // //std::vector<float> filt(nfft / 2, 0.0f);
d@8 848 // //filt[((int)binDown) - 1] = 1 - frac;
d@8 849 // //filt[((int)binDown + 1) - 1] = frac;
d@8 850 //
d@8 851 // //outKrnl[0][((int)binDown) - 1] = filt[((int)binDown) - 1];
d@8 852 // //outKrnl[0][((int)binDown + 1) - 1] = filt[((int)binDown + 1) - 1];
d@8 853 //
d@8 854 // //// special routine for last bin
d@8 855 // //// get frequency in linear bins
d@8 856 // //freqBinLin = outLogFreqVec[outLogFreqVec.size() - 1] / (fsProc / nfft);
d@8 857 //
d@8 858 // //// get next lower bin
d@8 859 // //float nextBinDown = outLogFreqVec[outLogFreqVec.size() - 2] / (fsProc / nfft);
d@8 860 //
d@8 861 // //float ceilNextBinDown = ceil(nextBinDown); //Subtract by -1 because of array
d@8 862 // //float floorFreqBinLin = floor(freqBinLin);
d@8 863 //
d@8 864 // //std::vector<int> filtIdx;
d@8 865 //
d@8 866 // //for (size_t i = (size_t)ceilNextBinDown; i <= (size_t)floorFreqBinLin; i++)
d@8 867 // //{
d@8 868 // // filtIdx.push_back(i);
d@8 869 // //}
d@8 870 //
d@8 871 // //std::vector<float> filt2;
d@8 872 // //for (size_t i = 0; i < filtIdx.size(); i++)
d@8 873 // //{
d@8 874 // // filt2.push_back(1 - (freqBinLin - filtIdx[i]) / (freqBinLin - nextBinDown));
d@8 875 // //}
d@8 876 //
d@8 877 // //float sumfilt = 0.0f;
d@8 878 // //for (size_t i = 0; i < filt2.size(); i++)
d@8 879 // //{
d@8 880 // // sumfilt += filt2[i];
d@8 881 // //}
d@8 882 //
d@8 883 // //for (size_t i = 0; i < filt2.size(); i++)
d@8 884 // //{
d@8 885 // // filt2[i] /= sumfilt;
d@8 886 // //}
d@8 887 //
d@8 888 // //for (size_t j = 0; j < filtIdx.size(); j++)
d@8 889 // //{
d@8 890 // // outKrnl[outKrnl.size() - 1][filtIdx[j] - 1] = filt2[j];
d@8 891 // //}
d@8 892 //}
d@8 893
d@8 894