Mercurial > hg > batch-feature-extraction-tool
changeset 4:345acbd06029
Vectorised most things to make lifer easier. Still no debug version though. Weird.
author | Geogaddi\David <d.m.ronan@qmul.ac.uk> |
---|---|
date | Fri, 10 Jul 2015 03:04:11 +0100 |
parents | 005e311b5e62 |
children | a6bfbc7cb449 |
files | Source/AudioSourceFeatureExtractor.cpp Source/AudioSourceFeatureExtractor.h Source/FFTW.h Source/MFCC.cpp Source/MFCC.h |
diffstat | 5 files changed, 308 insertions(+), 401 deletions(-) [+] |
line wrap: on
line diff
--- a/Source/AudioSourceFeatureExtractor.cpp Fri Jul 10 00:33:15 2015 +0100 +++ b/Source/AudioSourceFeatureExtractor.cpp Fri Jul 10 03:04:11 2015 +0100 @@ -14,63 +14,24 @@ #include <cmath> #include <algorithm> #include <string> +#include <vector> + //#include <windows.h> //============================================================================== AudioSourceFeatureExtractor AudioSourceFeatureExtractor::AudioSourceFeatureExtractor() // m_fft(FFTSIZE) { - m_fft = new FFTW(FFTSIZE); - m_fInputBuffer = new float[FFTSIZE]; - m_fMagnitudeSpectrum = new float[MAGNITUDESIZE]; - m_fPreviousMagnitudeSpectrum = new float[MAGNITUDESIZE]; - m_OutReal = new float[FFTSIZE]; - m_OutImag = new float[FFTSIZE]; - - memset(m_fInputBuffer, 0, FFTSIZE * sizeof(float)); - memset(m_fMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float)); - memset(m_fPreviousMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float)); - - + m_fft = std::make_unique<FFTW>(FFTSIZE); + m_fInputBuffer = std::vector<float>(FFTSIZE, 0);; + m_fMagnitudeSpectrum = std::vector<float>(MAGNITUDESIZE, 0); + m_fPreviousMagnitudeSpectrum = std::vector<float>(MAGNITUDESIZE, 0); + m_OutReal = std::vector<float>(FFTSIZE, 0); + m_OutImag = std::vector<float>(FFTSIZE, 0); } //============================================================================== ~AudioSourceFeatureExtractor AudioSourceFeatureExtractor::~AudioSourceFeatureExtractor() { - if(m_fInputBuffer != nullptr) - { - delete m_fInputBuffer; // memory freed up - m_fInputBuffer = nullptr; - } - - if(m_fMagnitudeSpectrum != nullptr) - { - delete m_fMagnitudeSpectrum; // memory freed up - m_fMagnitudeSpectrum = nullptr; - } - - if(m_fPreviousMagnitudeSpectrum != nullptr) - { - delete m_fPreviousMagnitudeSpectrum; // memory freed up - m_fPreviousMagnitudeSpectrum = nullptr; - } - - if (m_OutReal != nullptr) - { - delete[] m_OutReal; - m_OutReal = nullptr; - } - - if (m_OutImag != nullptr) - { - delete[] m_OutImag; - m_OutImag = nullptr; - } - - if (m_fft != nullptr) - { - delete m_fft; - m_fft = nullptr; - } } @@ -90,36 +51,36 @@ } //============================================================================== process -std::vector<ObservationData> AudioSourceFeatureExtractor::Process( const float* data, size_t numSamples) +std::vector<ObservationData> AudioSourceFeatureExtractor::Process(const float* data, size_t numSamples) { std::vector<ObservationData> observations = std::vector<ObservationData>(); - memset(m_fInputBuffer, 0, FFTSIZE * sizeof(float)); - memset(m_fMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float)); - memset(m_fPreviousMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float)); + m_fInputBuffer = std::vector<float>(FFTSIZE, 0);; + m_fMagnitudeSpectrum = std::vector<float>(MAGNITUDESIZE, 0); + m_fPreviousMagnitudeSpectrum = std::vector<float>(MAGNITUDESIZE, 0); m_iWriteIdx = 0; float perdiodicity = EstimatePerdiodicity(const_cast<float*>(data), numSamples); float entropyofenergy = EntropyOfEnergy(const_cast<float*>(data), numSamples); - for(size_t n = 0; n < numSamples; n++) + for (size_t n = 0; n < numSamples; n++) { m_fInputBuffer[m_iWriteIdx] = data[n]; m_iWriteIdx++; //Do normal FFT - if(FFTSIZE == m_iWriteIdx) + if (FFTSIZE == m_iWriteIdx) { //Get the peak amplitude rms and crest factor for the current frame - float peakvalue = 0; - float rms = 0; - float crestfactor = 0; + float peakvalue = 0; + float rms = 0; + float crestfactor = 0; - for(int i=0; i < FFTSIZE; i++) + for (int i = 0; i < FFTSIZE; i++) { - if(m_fInputBuffer[i] > peakvalue) + if (m_fInputBuffer[i] > peakvalue) peakvalue = abs(m_fInputBuffer[i]); rms += (m_fInputBuffer[i] * m_fInputBuffer[i]); @@ -128,14 +89,14 @@ rms /= FFTSIZE; rms = sqrt(rms); - crestfactor = peakvalue/(rms + std::numeric_limits<float>::epsilon()); + crestfactor = peakvalue / (rms + std::numeric_limits<float>::epsilon()); float ZCR = 0; float inputBuffer2[FFTSIZE] = {0.0}; for (int i = 1; i < FFTSIZE; i++) { - inputBuffer2[i] = m_fInputBuffer[i -1]; + inputBuffer2[i] = m_fInputBuffer[i - 1]; } for (int i = 0; i < FFTSIZE; i++) @@ -148,14 +109,14 @@ ////////////////////////////////////////////////////////////////// for (int i = 0; i < FFTSIZE; i++) { - float multiplier = static_cast<float>(0.5 * (1 - cos(2*PI*i/(FFTSIZE -1)))); + float multiplier = static_cast<float>(0.5 * (1 - cos(2 * PI * i / (FFTSIZE - 1)))); m_fInputBuffer[i] = multiplier * m_fInputBuffer[i]; } //Compute the FFT - - memset(m_OutReal, 0, sizeof(float)*FFTSIZE); - memset(m_OutImag, 0, sizeof(float)*FFTSIZE); + m_OutReal = std::vector<float>(FFTSIZE, 0); + m_OutImag = std::vector<float>(FFTSIZE, 0); + m_fft->process(m_fInputBuffer, m_OutReal, m_OutImag); @@ -163,10 +124,10 @@ VectorDistance(m_OutReal, 1, m_OutImag, 1, m_fMagnitudeSpectrum, 1, MAGNITUDESIZE); //Compute the spectral features - float centroid = 0; - float spread = 0; - float skewness = 0; - float kurtosis = 0; + float centroid = 0; + float spread = 0; + float skewness = 0; + float kurtosis = 0; float brightness = 0; float rolloff85 = 0; float rolloff95 = 0; @@ -181,13 +142,13 @@ m_MFCC.ComputeMFCC(m_fMagnitudeSpectrum, mfccs); //Update for Spectral Flux - for(int i = 0; i < MAGNITUDESIZE; i++) + for (int i = 0; i < MAGNITUDESIZE; i++) { m_fPreviousMagnitudeSpectrum[i] = m_fMagnitudeSpectrum[i]; } //50% Hop size - for(int i = MAGNITUDESIZE; i < FFTSIZE; i++) + for (int i = MAGNITUDESIZE; i < FFTSIZE; i++) { m_fInputBuffer[i - MAGNITUDESIZE] = m_fInputBuffer[i]; m_iWriteIdx = MAGNITUDESIZE; @@ -196,7 +157,6 @@ //create an observation for this window and push it to the list of observations for this 30 sec. audio clip ObservationData newObservation = ObservationData(rms, peakvalue, crestfactor, ZCR, centroid, spread, skewness, kurtosis, brightness, rolloff85, rolloff95, spectralentropy, flatness, spectralcf, spectralflux, mfccs, perdiodicity, entropyofenergy); observations.push_back(newObservation); - } } @@ -208,16 +168,16 @@ m_MFCC.freeMFCCmemory(); } -void AudioSourceFeatureExtractor::VectorDistance(const float* vIn1, int stride1, const float* vIn2, int stride2, float* &vOut, int strideOut, size_t nElements) +void AudioSourceFeatureExtractor::VectorDistance(std::vector<float> vIn1, int stride1, std::vector<float> vIn2, int stride2, std::vector<float> &vOut, int strideOut, size_t nElements) { - //Compute remaining samples + //Compute remaining samples for (size_t i = 0; i < nElements; i++) { - vOut[i* strideOut] = sqrt(vIn1[i*stride1] * vIn1[i*stride1] + vIn2[i*stride2] * vIn2[i*stride2]); + vOut[i * strideOut] = sqrt(vIn1[i * stride1] * vIn1[i * stride1] + vIn2[i * stride2] * vIn2[i * stride2]); } } -void AudioSourceFeatureExtractor::SpectralFeatures(float* &magnitude, float* &prevmagnitude, size_t windowSize, float ¢roid, float &spread, float &skew, float &kurtosis, float &brightness, float &rolloff95, float &rolloff85, float &spectralentropy, float &flatness, float &spectralcf, float &spectralflux) +void AudioSourceFeatureExtractor::SpectralFeatures(std::vector<float> &magnitude, std::vector<float> &prevmagnitude, size_t windowSize, float& centroid, float& spread, float& skew, float& kurtosis, float& brightness, float& rolloff95, float& rolloff85, float& spectralentropy, float& flatness, float& spectralcf, float& spectralflux) { float summagbin = 0; float summag = 0; @@ -248,7 +208,7 @@ sumprevmag += prevmagnitude[i]; //Spectral Crest factor - if(magnitude[i] > maxmag) + if (magnitude[i] > maxmag) { maxmag = magnitude[i]; } @@ -258,9 +218,9 @@ //Roll off + Flatness totalenergy += energy; - float exponent = static_cast<float>(1.0/static_cast<float>(windowSize)); + float exponent = static_cast<float>(1.0 / static_cast<float>(windowSize)); - if(energy != 0) + if (energy != 0) { geo *= pow(energy, exponent); } @@ -287,9 +247,9 @@ normalisedMagSumEntropy += (norm * log(norm + std::numeric_limits<float>::epsilon())); //Moments - summagspread += pow((m_fFreqBins[i] - centroid), 2) * norm; - summagskewness += pow((m_fFreqBins[i] - centroid), 3) * norm; - summagkurtosis += pow((m_fFreqBins[i] - centroid), 4) * norm; + summagspread += pow((m_fFreqBins[i] - centroid), 2) * norm; + summagskewness += pow((m_fFreqBins[i] - centroid), 3) * norm; + summagkurtosis += pow((m_fFreqBins[i] - centroid), 4) * norm; //Brightness if (m_fFreqBins[i] >= 1500) @@ -308,7 +268,7 @@ float spreadtemp = spread; spreadtemp += std::numeric_limits<float>::epsilon(); - skew = summagskewness / pow(spreadtemp, 3); + skew = summagskewness / pow(spreadtemp, 3); ///////////////////////////////////////////////////////// @@ -339,7 +299,6 @@ currEnergy85 += (magnitude[countFFT85] * magnitude[countFFT85]); countFFT85 += 1; } - } countFFT85 -= 1; @@ -380,7 +339,7 @@ //Flatness ///////////////////////////////////////////////////////// - if(totalenergy != 0 && geo != 0) + if (totalenergy != 0 && geo != 0) { flatness = geo / (totalenergy / static_cast<float>(windowSize)); } @@ -394,7 +353,7 @@ //Spectral Crest Factor ///////////////////////////////////////////////////////// - if(summag != 0) + if (summag != 0) { spectralcf = maxmag / summag; } @@ -404,170 +363,127 @@ } } -void AudioSourceFeatureExtractor::ConvolveFunction(float* &z, float *x, float *y, size_t &lenz, size_t lenx, size_t leny) +void AudioSourceFeatureExtractor::ConvolveFunction(float* & z, float* x, float* y, size_t& lenz, size_t lenx, size_t leny) { // computes cross-correlation of two vectors, takes 2 vectors of the same length and computes 2*length-1 lags - float *zptr,s,*xp,*yp; - size_t n_lo, n_hi; + //float *zptr, s, *xp, *yp; + //size_t n_lo, n_hi; - lenz = lenx + leny - 1; + //lenz = lenx + leny - 1; - z = new float[lenz]; + //z = new float[lenz]; - zptr = z; + //zptr = z; - for (size_t i = 0; i < lenz; i++) - { - s = 0.0; - n_lo = 0 > (i - leny + 1 ) ? 0 : (i - leny + 1); - n_hi = (lenx - 1) < i ? (lenx - 1): i; - xp = &x[0] + n_lo; - yp = &y[0] + i - n_lo; + //for (size_t i = 0; i < lenz; i++) + //{ + // s = 0.0; + // n_lo = 0 > (i - leny + 1) ? 0 : (i - leny + 1); + // n_hi = (lenx - 1) < i ? (lenx - 1) : i; + // xp = &x[0] + n_lo; + // yp = &y[0] + i - n_lo; - for (size_t n = n_lo; n <= n_hi; n++) - { - s += *xp * *yp; - xp++; - yp--; - } + // for (size_t n = n_lo; n <= n_hi; n++) + // { + // s += *xp * *yp; + // xp++; + // yp--; + // } - *zptr=s; - zptr++; - } + // *zptr = s; + // zptr++; + //} } -void AudioSourceFeatureExtractor::XCorr(float *&output, float* input1, float* input2, size_t &outputlen, size_t inputlen, size_t maxLag = 0) +void AudioSourceFeatureExtractor::XCorr(float*& output, float* input1, float* input2, size_t& outputlen, size_t inputlen, size_t maxLag = 0) { - // TODO: adapt for different sizes + // TODO: adapt for different sizes - // find next power of 2 for efficient FFT - size_t fftSize = 1; - while (fftSize < 2*static_cast<size_t>(inputlen)-1) - { - fftSize *= 2; - } + // find next power of 2 for efficient FFT + size_t fftSize = 1; + while (fftSize < 2 * static_cast<size_t>(inputlen) - 1) + { + fftSize *= 2; + } FFTW fft1 = FFTW(fftSize); FFTW fft2 = FFTW(fftSize); - // allocate space for FFTW processing - float *in1 = new float[fftSize]; - float *in2 = new float[fftSize]; + // allocate space for FFTW processing + std::vector<float> in1 = std::vector<float>(fftSize, 0);; + std::vector<float> in2 = std::vector<float>(fftSize, 0);; - // output from FFT is only up to Nyquist frequency - float *out1Real = new float[fftSize]; - float *out1Imag = new float[fftSize]; - float *out2Real = new float[fftSize]; - float *out2Imag = new float[fftSize]; + // output from FFT is only up to Nyquist frequency + std::vector<float> out1Real = std::vector<float>(fftSize, 0); + std::vector<float> out1Imag = std::vector<float>(fftSize, 0); + std::vector<float> out2Real = std::vector<float>(fftSize, 0); + std::vector<float> out2Imag = std::vector<float>(fftSize, 0); - memset(in1, 0, sizeof(float)*fftSize); - memset(in2, 0, sizeof(float)*fftSize); - memset(out1Real, 0, sizeof(float)*fftSize); - memset(out1Imag, 0, sizeof(float)*fftSize); - memset(out2Real, 0, sizeof(float)*fftSize); - memset(out2Imag, 0, sizeof(float)*fftSize); - - // copy input to allocated arrays - for (size_t i = 0; i < static_cast<size_t>(inputlen); ++i) - { - in1[i] = input1[i]; - in2[i] = input2[i]; - } + // copy input to allocated arrays + for (size_t i = 0; i < static_cast<size_t>(inputlen); ++i) + { + in1[i] = input1[i]; + in2[i] = input2[i]; + } fft1.process(in1, out1Real, out1Imag); fft2.process(in2, out2Real, out2Imag); - // multiply out1*conj(out2) in FFT domain - fftwf_complex *product = static_cast<fftwf_complex*>(fftwf_malloc(sizeof(fftwf_complex)*fftSize)); - for (size_t i = 0; i < fftSize; ++i) - { - product[i][0] = (out1Real[i]*out2Real[i] + out1Imag[i]*out2Imag[i]); - product[i][1] = (out1Imag[i]*out2Real[i] - out1Real[i]*out2Imag[i]); - } + // multiply out1*conj(out2) in FFT domain + fftwf_complex* product = static_cast<fftwf_complex*>(fftwf_malloc(sizeof(fftwf_complex) * fftSize)); + for (size_t i = 0; i < fftSize; ++i) + { + product[i][0] = (out1Real[i] * out2Real[i] + out1Imag[i] * out2Imag[i]); + product[i][1] = (out1Imag[i] * out2Real[i] - out1Real[i] * out2Imag[i]); + } - // compute IFFT + // compute IFFT // do FFT, adapted from iTraktorLib "FFTW.h" - fftw_iodim dim; - dim.n = fftSize; - dim.is = 1; - dim.os = 1; + fftw_iodim dim; + dim.n = fftSize; + dim.is = 1; + dim.os = 1; - fftwf_plan plan; + fftwf_plan plan; - float *result = static_cast<float*>(fftwf_malloc(sizeof(float)*fftSize)); - plan = fftwf_plan_guru_dft_c2r(1, &dim, 0, nullptr, product, result, FFTW_ESTIMATE); - fftwf_execute_dft_c2r(plan, product, result); - fftwf_destroy_plan(plan); + float* result = static_cast<float*>(fftwf_malloc(sizeof(float) * fftSize)); + plan = fftwf_plan_guru_dft_c2r(1, &dim, 0, nullptr, product, result, FFTW_ESTIMATE); + fftwf_execute_dft_c2r(plan, product, result); + fftwf_destroy_plan(plan); - // copy real part of result back to output array, normalize by FFT size - if (maxLag == 0 || maxLag >= static_cast<size_t>(inputlen)) - { - maxLag = static_cast<size_t>(inputlen)-1; - } + // copy real part of result back to output array, normalize by FFT size + if (maxLag == 0 || maxLag >= static_cast<size_t>(inputlen)) + { + maxLag = static_cast<size_t>(inputlen) - 1; + } - output = new float[2 * maxLag + 1]; + output = new float[2 * maxLag + 1]; outputlen = 2 * maxLag + 1; - for (size_t i = fftSize - maxLag; i < fftSize; ++i) - { - output[i - fftSize + maxLag] = result[i] / fftSize; - } + for (size_t i = fftSize - maxLag; i < fftSize; ++i) + { + output[i - fftSize + maxLag] = result[i] / fftSize; + } - for (unsigned i = 0; i <= static_cast<unsigned>(maxLag); ++i) - { - output[i + maxLag] = result[i] / fftSize; - } + for (unsigned i = 0; i <= static_cast<unsigned>(maxLag); ++i) + { + output[i + maxLag] = result[i] / fftSize; + } fftwf_free(result); fftwf_free(product); - - if(in1 != nullptr) - { - delete[] in1; - in1 = nullptr; - } - - if(in2 != nullptr) - { - delete[] in2; - in2 = nullptr; - } - - if(out1Real != nullptr) - { - delete[] out1Real; - out1Real= nullptr; - } - - if(out1Imag != nullptr) - { - delete[] out1Imag; - out1Imag= nullptr; - } - - if(out2Real != nullptr) - { - delete[] out2Real; - out2Real= nullptr; - } - - if(out2Imag != nullptr) - { - delete[] out2Imag; - out2Imag= nullptr; - } } float AudioSourceFeatureExtractor::EstimatePerdiodicity(float* data, size_t numSamples) { float periodicity = 0.0; float* downsampleddata = new float[numSamples]; - memset(downsampleddata, 0, sizeof(float)*numSamples); + memset(downsampleddata, 0, sizeof(float) * numSamples); float* xcorr = nullptr; - for(size_t k = 0; k < numSamples; k++) + for (size_t k = 0; k < numSamples; k++) { downsampleddata[k] = data[k]; } @@ -577,7 +493,7 @@ int startLag = static_cast<int>(floor(static_cast<float>(60.0 / 480.0 * m_fSampleRate))); int endLag = static_cast<int>(floor(static_cast<float>(60.0 / 30.0 * m_fSampleRate))); - int thresh = static_cast<int>(floor(static_cast<float>(.2 * m_fSampleRate))); + int thresh = static_cast<int>(floor(static_cast<float>(.2 * m_fSampleRate))); size_t xcorrlen = 0; XCorr(xcorr, downsampleddata, downsampleddata, xcorrlen, numSamples, endLag); @@ -585,53 +501,53 @@ int i = static_cast<int>(floor(xcorrlen / 2)); std::vector<float> temp; - for(size_t j = i; j < xcorrlen; j++) + for (size_t j = i; j < xcorrlen; j++) { temp.push_back(xcorr[j]); } std::vector<float> temp2; - for(size_t j = static_cast<size_t>(std::max(1,startLag)); j < static_cast<size_t>(std::min(endLag,static_cast<int>(temp.size()))); j++) + for (size_t j = static_cast<size_t>(std::max(1, startLag)); j < static_cast<size_t>(std::min(endLag, static_cast<int>(temp.size()))); j++) { temp2.push_back(temp[j]); } int maxIdx = 0; float maxVal = std::numeric_limits<float>::lowest(); - for(size_t j =0; j < temp2.size(); j++) + for (size_t j = 0; j < temp2.size(); j++) { - if(temp2[j] > maxVal) + if (temp2[j] > maxVal) { maxIdx = j; maxVal = temp2[j]; } } - int from = std::max(1, maxIdx-thresh); - int to = std::min(static_cast<int>(temp2.size()), maxIdx+thresh); + int from = std::max(1, maxIdx - thresh); + int to = std::min(static_cast<int>(temp2.size()), maxIdx + thresh); float minValInRange = std::numeric_limits<float>::max(); - for(size_t j = static_cast<size_t>(from); j < to; j++) + for (size_t j = static_cast<size_t>(from); j < to; j++) { - if(temp2[j] < minValInRange) + if (temp2[j] < minValInRange) { minValInRange = temp2[j]; } } - periodicity = (maxVal-minValInRange); + periodicity = (maxVal - minValInRange); std::string dave = "Periodicity " + std::to_string(periodicity) + "\n"; //OutputDebugString(dave.c_str()); - if(xcorr != nullptr) + if (xcorr != nullptr) { delete[] xcorr; xcorr = nullptr; } - if(downsampleddata != nullptr) + if (downsampleddata != nullptr) { delete[] downsampleddata; downsampleddata = nullptr; @@ -640,7 +556,7 @@ return periodicity; } -void AudioSourceFeatureExtractor::DownSampler(float* data, float* &out, size_t lenIn, size_t &lenOut, float currentSampleRate, float futureSampleRate) +void AudioSourceFeatureExtractor::DownSampler(float* data, float* & out, size_t lenIn, size_t& lenOut, float currentSampleRate, float futureSampleRate) { ////Low pass our data before we decimate //const int filterlength = 101; @@ -651,17 +567,17 @@ //float coeffs[filterlength] ={2.0839274e-05f, 6.6880953e-06f,-4.7252855e-05f, -3.4874138e-05f, 7.8508412e-05f,9.7089069e-05f,-9.9197161e-05f, -0.00020412984f, 8.2261082e-05f, 0.00035689832f, 9.4890293e-06f, -0.00053820928f, -0.00021722882f, 0.00070567406f, 0.00057491515f, -0.00078844035f, -0.0010939316f, 0.00069057313f, 0.0017460365f, -0.00030308162f, -0.0024484061f, -0.00047496572f, 0.0030552838f, 0.0017078921f, -0.0033604759f, -0.0033912712f, 0.0031135236f, 0.0054217125f, -0.0020499325f, -0.0075746416f, -6.7239242e-05f, 0.0094950572f, 0.0034002098f, -0.010703080f, -0.0079918290f, 0.010610434f, 0.013732497f, -0.0085344436f, -0.020346783f, 0.0036776769f, 0.027405130f, 0.0050050775f, -0.034361813f, -0.019287759f, 0.040615436f, 0.043452863f,-0.045583628f, -0.093336113f, 0.048780452f, 0.31394306f, 0.45011634f, 0.31394306f, 0.048780452f,-0.093336113f,-0.045583628f, 0.043452863f,0.040615436f,-0.019287759f, -0.034361813f, 0.0050050775f, 0.027405130f,0.0036776769f,-0.020346783f, -0.0085344436f, 0.013732497f, 0.010610434f, -0.0079918290f, -0.010703080f, 0.0034002098f, 0.0094950572f, -6.7239242e-05f, -0.0075746416f, -0.0020499325f, 0.0054217125f, 0.0031135236f, -0.0033912712f, -0.0033604759f, 0.0017078921f, 0.0030552838f, -0.00047496572f, -0.0024484061f, -0.00030308162f, 0.0017460365f,0.00069057313f, -0.0010939316f, -0.00078844035f, 0.00057491515f, 0.00070567406f, -0.00021722882f, -0.00053820928f, 9.4890293e-06f, 0.00035689832f, 8.2261082e-05f, -0.00020412984f, -9.9197161e-05f, 9.7089069e-05f, 7.8508412e-05f, -3.4874138e-05f, -4.7252855e-05f, 6.6880953e-06f, 2.0839274e-05f}; // //float acc; // accumulator for MACs - // float* coeffp; // pointer to coefficients - // float* inputp; // pointer to input samples + // float* coeffp; // pointer to coefficients + // float* inputp; // pointer to input samples //float* endp = &data[lenIn -1]; // pointer to last sample - // - // - // // apply the filter to each input sample - // for (size_t n = 0; n < lenIn; n++ ) + // + // + // // apply the filter to each input sample + // for (size_t n = 0; n < lenIn; n++ ) //{ - // // calculate output n - // coeffp = &coeffs[0]; - // inputp = &data[filterlength - 1 + n]; + // // calculate output n + // coeffp = &coeffs[0]; + // inputp = &data[filterlength - 1 + n]; // acc = 0.0f; // for (int k = 0; k < filterlength; k++ ) @@ -677,7 +593,7 @@ // } // tempdata[n] = acc; - // } + // } //int ratio = (int) (currentSampleRate / futureSampleRate); // //lenOut = lenIn / ratio; @@ -700,18 +616,18 @@ //} } -void AudioSourceFeatureExtractor::EnvelopeCurve(float* data, float* &out, size_t dataLen, float sampleRate) +void AudioSourceFeatureExtractor::EnvelopeCurve(float* data, float* & out, size_t dataLen, float sampleRate) { - float release = 0.02f; + float release = 0.02f; float envelope = 0.0f; float gr = exp(-1 / (sampleRate * release)); - for (size_t i = 0; i < dataLen; i++) + for (size_t i = 0; i < dataLen; i++) { float EnvIn = abs(data[i]); - if(envelope < EnvIn) + if (envelope < EnvIn) { envelope = EnvIn; } @@ -725,20 +641,20 @@ } } -void AudioSourceFeatureExtractor::Normalise(float* data, float* &out, size_t len) +void AudioSourceFeatureExtractor::Normalise(float* data, float* & out, size_t len) { - float maxvalue = std::numeric_limits<float>::lowest(); + //float maxvalue = std::numeric_limits<float>::lowest(); - for(size_t i = 0; i < len; i++) - { - if(data[i] > maxvalue) - maxvalue = data[i]; - } + //for (size_t i = 0; i < len; i++) + //{ + // if (data[i] > maxvalue) + // maxvalue = data[i]; + //} - for(size_t i = 0; i < len; i++) - { - out[i] = data[i] / maxvalue; - } + //for (size_t i = 0; i < len; i++) + //{ + // out[i] = data[i] / maxvalue; + //} } float AudioSourceFeatureExtractor::EntropyOfEnergy(float* data, size_t len) @@ -748,7 +664,7 @@ const int numSubFrames = 30; //Calculate Total Energy and normalise it - for (size_t i =0; i < len; i++) + for (size_t i = 0; i < len; i++) { totalEnergy += (data[i] * data[i]); } @@ -772,171 +688,170 @@ return (totalentropy * -1); } -void AudioSourceFeatureExtractor::PDF_getResampleKrnl(std::vector<float> freqVec, float fsProc, int nfft, int nBin, std::vector<float> &outLogFreqVec, std::vector<std::vector<float>> &outKrnl) +void AudioSourceFeatureExtractor::PDF_getResampleKrnl(std::vector<float> freqVec, float fsProc, int nfft, int nBin, std::vector<float>& outLogFreqVec, std::vector<std::vector<float>>& outKrnl) { - float f1 = freqVec[0]; - float f2 = freqVec[freqVec.size() - 1]; + //float f1 = freqVec[0]; + //float f2 = freqVec[freqVec.size() - 1]; - outLogFreqVec = LinSpace(log(f1), log(f2), nBin); - std::vector<float> f(nfft / 2, 0.0f); + //outLogFreqVec = LinSpace(log(f1), log(f2), nBin); + //std::vector<float> f(nfft / 2, 0.0f); - for (size_t i = 0; i < static_cast<size_t>(nBin); i++) - { - outKrnl.push_back(f); - } + //for (size_t i = 0; i < static_cast<size_t>(nBin); i++) + //{ + // outKrnl.push_back(f); + //} - for (size_t i = 0; i < outLogFreqVec.size(); i++) - { - outLogFreqVec[i] = exp(outLogFreqVec[i]); - } + //for (size_t i = 0; i < outLogFreqVec.size(); i++) + //{ + // outLogFreqVec[i] = exp(outLogFreqVec[i]); + //} - for (size_t i = 1; i < static_cast<size_t>(nBin) - 2; i++) - { - float freqBinLin = outLogFreqVec[i] / (fsProc / nfft); + //for (size_t i = 1; i < static_cast<size_t>(nBin) - 2; i++) + //{ + // float freqBinLin = outLogFreqVec[i] / (fsProc / nfft); - float nextBinUp = outLogFreqVec[i + 1] / (fsProc / nfft); + // float nextBinUp = outLogFreqVec[i + 1] / (fsProc / nfft); - float nextBinDown = outLogFreqVec[i - 1] / (fsProc / nfft); + // float nextBinDown = outLogFreqVec[i - 1] / (fsProc / nfft); - float filtWid = nextBinUp - nextBinDown; + // float filtWid = nextBinUp - nextBinDown; - if (filtWid <= 2) - { - // log resolution is finer than linear resolution - // linear interpolation of neighboring bins + // if (filtWid <= 2) + // { + // // log resolution is finer than linear resolution + // // linear interpolation of neighboring bins - float binDown = floor(freqBinLin); - float frac = freqBinLin - binDown; - std::vector<float> filt(nfft/2, 0.0f); - filt[((int)binDown) - 1] = 1 - frac; - filt[((int)binDown + 1) - 1] = frac; - outKrnl[i][((int)binDown) - 1] = filt[((int)binDown) - 1]; - outKrnl[i][((int)binDown + 1) - 1] = filt[((int)binDown + 1) - 1]; + // float binDown = floor(freqBinLin); + // float frac = freqBinLin - binDown; + // std::vector<float> filt(nfft / 2, 0.0f); + // filt[((int)binDown) - 1] = 1 - frac; + // filt[((int)binDown + 1) - 1] = frac; + // outKrnl[i][((int)binDown) - 1] = filt[((int)binDown) - 1]; + // outKrnl[i][((int)binDown + 1) - 1] = filt[((int)binDown + 1) - 1]; + // } + // else + // { + // float ceilNextBinDown = ceil(nextBinDown); + // float floorFreqBinLin = floor(freqBinLin); + // float ceilFreqBinLin = ceil(freqBinLin); + // float floorNextBinUp = floor(nextBinUp); - } - else - { - float ceilNextBinDown = ceil(nextBinDown); - float floorFreqBinLin = floor(freqBinLin); - float ceilFreqBinLin = ceil(freqBinLin); - float floorNextBinUp = floor(nextBinUp); + // std::vector<int> idxLow; + // std::vector<int> idxHigh; + // std::vector<int> filtIdx; - std::vector<int> idxLow; - std::vector<int> idxHigh; - std::vector<int> filtIdx; + // for (size_t j = (size_t)ceilNextBinDown; j <= (size_t)floorFreqBinLin; j++) + // { + // idxLow.push_back(j); + // } - for (size_t j = (size_t)ceilNextBinDown; j <= (size_t)floorFreqBinLin; j++ ) - { - idxLow.push_back(j); - } + // for (size_t j = (size_t)ceilFreqBinLin; j <= (size_t)floorNextBinUp; j++) + // { + // idxHigh.push_back(j); + // } - for (size_t j = (size_t)ceilFreqBinLin; j <= (size_t)floorNextBinUp; j++ ) - { - idxHigh.push_back(j); - } + // std::vector<int>::iterator it; + // it = std::unique(idxLow.begin(), idxLow.end()); + // idxLow.resize(std::distance(idxLow.begin(), it)); - std::vector<int>::iterator it; - it = std::unique (idxLow.begin(), idxLow.end()); - idxLow.resize(std::distance(idxLow.begin(), it)); + // it = std::unique(idxHigh.begin(), idxHigh.end()); + // idxHigh.resize(std::distance(idxHigh.begin(), it)); - it = std::unique (idxHigh.begin(), idxHigh.end()); - idxHigh.resize(std::distance(idxHigh.begin(), it)); + // filtIdx.reserve(idxLow.size() + idxHigh.size()); // preallocate memory + // filtIdx.insert(filtIdx.end(), idxLow.begin(), idxLow.end()); + // filtIdx.insert(filtIdx.end(), idxHigh.begin(), idxHigh.end()); - filtIdx.reserve(idxLow.size() + idxHigh.size()); // preallocate memory - filtIdx.insert(filtIdx.end(), idxLow.begin(), idxLow.end()); - filtIdx.insert(filtIdx.end(), idxHigh.begin(), idxHigh.end()); + // std::vector<float> rampUp; + // std::vector<float> rampDown; + // for (size_t j = 0; j < filtIdx.size(); j++) + // { + // rampUp.push_back(1 - (freqBinLin - filtIdx[j]) / (freqBinLin - nextBinDown)); + // rampDown.push_back(1 - (filtIdx[j] - freqBinLin) / (nextBinUp - freqBinLin)); + // } - std::vector<float> rampUp; - std::vector<float> rampDown; - for (size_t j = 0; j < filtIdx.size(); j++) - { - rampUp.push_back(1 - (freqBinLin - filtIdx[j]) / (freqBinLin - nextBinDown)); - rampDown.push_back(1 - (filtIdx[j] - freqBinLin) / (nextBinUp - freqBinLin)); - } + // std::vector<float> filt; - std::vector<float> filt; + // for (size_t j = 0; j < rampUp.size(); j++) + // { + // filt.push_back(std::min(rampUp[j], rampDown[j])); + // } - for (size_t j = 0; j < rampUp.size(); j++) - { - filt.push_back(std::min(rampUp[j], rampDown[j])); - } + // float sumfilt = 0.0f; + // for (size_t j = 0; j < filt.size(); j++) + // { + // sumfilt += filt[j]; + // } - float sumfilt = 0.0f; - for (size_t j = 0; j < filt.size(); j++) - { - sumfilt += filt[j]; - } + // for (size_t j = 0; j < filt.size(); j++) + // { + // filt[j] /= sumfilt; + // } - for (size_t j = 0; j < filt.size(); j++) - { - filt[j] /= sumfilt; - } - - for (size_t j = 0; j < filtIdx.size(); j++) - { - outKrnl[i][filtIdx[j] - 1] = filt[j]; - } - } - } + // for (size_t j = 0; j < filtIdx.size(); j++) + // { + // outKrnl[i][filtIdx[j] - 1] = filt[j]; + // } + // } + //} // special routine for first bin // get frequency in linear bins - float freqBinLin = outLogFreqVec[0] / (fsProc/(float)nfft); - float binDown = floor(freqBinLin); - float frac = freqBinLin - binDown; + //float freqBinLin = outLogFreqVec[0] / (fsProc / (float)nfft); + //float binDown = floor(freqBinLin); + //float frac = freqBinLin - binDown; - std::vector<float> filt(nfft/2, 0.0f); - filt[((int)binDown) - 1] = 1 - frac; - filt[((int)binDown + 1) - 1] = frac; + //std::vector<float> filt(nfft / 2, 0.0f); + //filt[((int)binDown) - 1] = 1 - frac; + //filt[((int)binDown + 1) - 1] = frac; - outKrnl[0][((int)binDown) - 1] = filt[((int)binDown) - 1]; - outKrnl[0][((int)binDown + 1) - 1] = filt[((int)binDown + 1) - 1]; + //outKrnl[0][((int)binDown) - 1] = filt[((int)binDown) - 1]; + //outKrnl[0][((int)binDown + 1) - 1] = filt[((int)binDown + 1) - 1]; - // special routine for last bin - // get frequency in linear bins - freqBinLin = outLogFreqVec[outLogFreqVec.size() - 1] / (fsProc/nfft); + //// special routine for last bin + //// get frequency in linear bins + //freqBinLin = outLogFreqVec[outLogFreqVec.size() - 1] / (fsProc / nfft); - // get next lower bin - float nextBinDown = outLogFreqVec[outLogFreqVec.size() - 2] / (fsProc/nfft); + //// get next lower bin + //float nextBinDown = outLogFreqVec[outLogFreqVec.size() - 2] / (fsProc / nfft); - float ceilNextBinDown = ceil(nextBinDown); //Subtract by -1 because of array - float floorFreqBinLin = floor(freqBinLin); + //float ceilNextBinDown = ceil(nextBinDown); //Subtract by -1 because of array + //float floorFreqBinLin = floor(freqBinLin); - std::vector<int> filtIdx; + //std::vector<int> filtIdx; - for (size_t i = (size_t)ceilNextBinDown; i <= (size_t)floorFreqBinLin; i++ ) - { - filtIdx.push_back(i); - } + //for (size_t i = (size_t)ceilNextBinDown; i <= (size_t)floorFreqBinLin; i++) + //{ + // filtIdx.push_back(i); + //} - std::vector<float> filt2; - for (size_t i = 0; i < filtIdx.size(); i++) - { - filt2.push_back(1 - (freqBinLin - filtIdx[i]) / (freqBinLin - nextBinDown)); - } + //std::vector<float> filt2; + //for (size_t i = 0; i < filtIdx.size(); i++) + //{ + // filt2.push_back(1 - (freqBinLin - filtIdx[i]) / (freqBinLin - nextBinDown)); + //} - float sumfilt = 0.0f; - for (size_t i = 0; i < filt2.size(); i++) - { - sumfilt += filt2[i]; - } + //float sumfilt = 0.0f; + //for (size_t i = 0; i < filt2.size(); i++) + //{ + // sumfilt += filt2[i]; + //} - for (size_t i = 0; i < filt2.size(); i++) - { - filt2[i] /= sumfilt; - } + //for (size_t i = 0; i < filt2.size(); i++) + //{ + // filt2[i] /= sumfilt; + //} - for (size_t j = 0; j < filtIdx.size(); j++) - { - outKrnl[outKrnl.size() - 1][filtIdx[j] - 1] = filt2[j]; - } + //for (size_t j = 0; j < filtIdx.size(); j++) + //{ + // outKrnl[outKrnl.size() - 1][filtIdx[j] - 1] = filt2[j]; + //} } float AudioSourceFeatureExtractor::Log2(float n) { // log(n)/log(2) is log2. - return logf( n ) / logf( 2 ); + return logf(n) / logf(2); } int AudioSourceFeatureExtractor::Sign(float x) @@ -956,9 +871,9 @@ // vector iterator int iterator = 0; - for (int i = 0; i <= n-2; i++) + for (int i = 0; i <= n - 2; i++) { - float temp = min + i*(max-min)/(floor((float)n) - 1); + float temp = min + i * (max - min) / (floor((float)n) - 1); result.insert(result.begin() + iterator, temp); iterator += 1; } @@ -977,10 +892,9 @@ for (int i = 0; i <= n; i++) { - v.push_back((float) exp(logMin + accDelta)); + v.push_back(static_cast<float>(exp(logMin + accDelta))); accDelta += delta;// accDelta = delta * i } return v; } -
--- a/Source/AudioSourceFeatureExtractor.h Fri Jul 10 00:33:15 2015 +0100 +++ b/Source/AudioSourceFeatureExtractor.h Fri Jul 10 03:04:11 2015 +0100 @@ -35,11 +35,11 @@ private: //Computes the magnitude of FFT - void VectorDistance(const float* vIn1, int stride1, const float* vIn2, int stride2, float* &vOut, int strideOut, size_t nElements); + void VectorDistance(std::vector<float> vIn1, int stride1, std::vector<float>, int stride2, std::vector<float> &vOut, int strideOut, size_t nElements); - void SpectralFeatures(float* &magnitude, float* &previousmagnitude, size_t windowSize, float ¢roid, float &spread, float &skew, float &kurtosis, float &brightness, float &rolloff95, float &rolloff85, float &spectralentropy, float &flatness, float &spectralcf, float &spectralflux); + void SpectralFeatures(std::vector<float> &magnitude, std::vector<float> &previousmagnitude, size_t windowSize, float ¢roid, float &spread, float &skew, float &kurtosis, float &brightness, float &rolloff95, float &rolloff85, float &spectralentropy, float &flatness, float &spectralcf, float &spectralflux); - void MFCCs(float* &magnitude, size_t windowSize, float sampleRate); + void MFCCs(std::vector<float> &magnitude, size_t windowSize, float sampleRate); float EstimatePerdiodicity(float* data, size_t numSamples); @@ -68,13 +68,13 @@ size_t m_iWriteIdx; size_t m_iFlucIdx; float m_fSampleRate; - FFTW *m_fft; - float *m_OutReal; - float *m_OutImag; + std::unique_ptr<FFTW> m_fft; + std::vector<float> m_OutReal; + std::vector<float> m_OutImag; MFCC m_MFCC; - float* m_fInputBuffer; - float* m_fMagnitudeSpectrum; - float* m_fPreviousMagnitudeSpectrum; + std::vector<float> m_fInputBuffer; + std::vector<float> m_fMagnitudeSpectrum; + std::vector<float> m_fPreviousMagnitudeSpectrum; std::vector<float> m_fFreqBins; };
--- a/Source/FFTW.h Fri Jul 10 00:33:15 2015 +0100 +++ b/Source/FFTW.h Fri Jul 10 03:04:11 2015 +0100 @@ -16,6 +16,7 @@ #include "fftw3.h" #include <vector> +#include <memory> #include <stdio.h> //#include <windows.h> #include <string.h> @@ -35,23 +36,16 @@ { m_iFFTSize = fftLength; - float* tempInput = new float[fftLength]; - memset(tempInput,0, fftLength * sizeof(float)); - float* tempReal = new float[fftLength]; - memset(tempReal,0, fftLength * sizeof(float)); - float* tempImag = new float[fftLength]; - memset(tempImag,0, fftLength * sizeof(float)); + std::vector<float> tempInput = std::vector<float>(fftLength, 0); + std::vector<float> tempReal = std::vector<float>(fftLength, 0); + std::vector<float> tempImag = std::vector<float>(fftLength, 0); + + fftwf_iodim dim; dim.n = fftLength; dim.is = 1; dim.os = 1; - m_plan = fftwf_plan_guru_split_dft_r2c( 1, &dim, 0, nullptr, tempInput, tempReal, tempImag, FFTW_ESTIMATE ); - delete[] tempInput; - tempInput = nullptr; - delete[] tempReal; - tempReal = nullptr; - delete[] tempImag; - tempImag = nullptr; + m_plan = fftwf_plan_guru_split_dft_r2c( 1, &dim, 0, nullptr, tempInput.data(), tempReal.data(), tempImag.data(), FFTW_ESTIMATE ); } @@ -64,11 +58,10 @@ } - void process( const float* input , float* realPart , float* imagPart ) + void process(std::vector<float> input, std::vector<float> &realPart, std::vector<float> &imagPart) { - float* nonConstInput = const_cast<float*>(input); // fftw does not take const input even though the data not be manipulated! - - fftwf_execute_split_dft_r2c( m_plan, nonConstInput, realPart, imagPart); + + fftwf_execute_split_dft_r2c( m_plan, input.data(), realPart.data(), imagPart.data()); //Multiply results by 2 to match the iOS output for(size_t i = 0; i < m_iFFTSize; i++)
--- a/Source/MFCC.cpp Fri Jul 10 00:33:15 2015 +0100 +++ b/Source/MFCC.cpp Fri Jul 10 03:04:11 2015 +0100 @@ -13,13 +13,13 @@ //#include <windows.h> //----------------------------------------------------------------------------- ComputeMFCC -void MFCC::ComputeMFCC(float* magnitude, std::vector<float> &mfccs) +void MFCC::ComputeMFCC(std::vector<float> magnitude, std::vector<float> &mfccs) { //Apply the Mel Filters to the spectrum magnitude: for(int i=0; i<m_iTotalMFCCFilters; i++) { //Multiply spectrum with spectral mask - vDSP_vmul(magnitude, 1, m_ppMFCCFilters[i], 1, m_pfTempMelFilterResult, 1, m_pMFCCFilterLength[i]); + vDSP_vmul(magnitude.data(), 1, m_ppMFCCFilters[i], 1, m_pfTempMelFilterResult, 1, m_pMFCCFilterLength[i]); //Sum the values of each bins m_fMelFilteredFFT[i] = vDSP_sve(m_pfTempMelFilterResult, 1, m_pMFCCFilterLength[i]); @@ -220,4 +220,4 @@ vout[m]=fProdSum; } -} +}
--- a/Source/MFCC.h Fri Jul 10 00:33:15 2015 +0100 +++ b/Source/MFCC.h Fri Jul 10 03:04:11 2015 +0100 @@ -17,7 +17,7 @@ { public: void initMFFCvariables(int NCoeffs, int Nfft, float fSampleRate); - void ComputeMFCC(float* magnitude, std::vector<float> &mfccs); + void ComputeMFCC(std::vector<float> magnitude, std::vector<float> &mfccs); void freeMFCCmemory(); private: // -------- MFFC computation