d@0: /* d@0: ============================================================================== d@0: d@0: AudioSourceFeatureExtractor.cpp d@0: Created: 11 Aug 2014 10:41:02am d@0: Author: david.ronan d@0: d@0: ============================================================================== d@0: */ d@0: d@0: #include "AudioSourceFeatureExtractor.h" d@0: #include "ObservationData.h" d@0: #include d@0: #include d@0: #include d@0: #include d@0: #include d@0: d@0: //============================================================================== AudioSourceFeatureExtractor d@0: AudioSourceFeatureExtractor::AudioSourceFeatureExtractor() // m_fft(FFTSIZE) d@0: { d@0: m_fft = new FFTW(FFTSIZE); d@0: m_fInputBuffer = new float[FFTSIZE]; d@0: m_fMagnitudeSpectrum = new float[MAGNITUDESIZE]; d@0: m_fPreviousMagnitudeSpectrum = new float[MAGNITUDESIZE]; d@0: m_OutReal = new float[FFTSIZE]; d@0: m_OutImag = new float[FFTSIZE]; d@0: d@0: memset(m_fInputBuffer, 0, FFTSIZE * sizeof(float)); d@0: memset(m_fMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float)); d@0: memset(m_fPreviousMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float)); d@0: d@0: d@0: } d@0: d@0: //============================================================================== ~AudioSourceFeatureExtractor d@0: AudioSourceFeatureExtractor::~AudioSourceFeatureExtractor() d@0: { d@0: if(m_fInputBuffer != NULL) d@0: { d@0: delete m_fInputBuffer; // memory freed up d@0: m_fInputBuffer = nullptr; d@0: } d@0: d@0: if(m_fMagnitudeSpectrum != NULL) d@0: { d@0: delete m_fMagnitudeSpectrum; // memory freed up d@0: m_fMagnitudeSpectrum = nullptr; d@0: } d@0: d@0: if(m_fPreviousMagnitudeSpectrum != NULL) d@0: { d@0: delete m_fPreviousMagnitudeSpectrum; // memory freed up d@0: m_fPreviousMagnitudeSpectrum = nullptr; d@0: } d@0: d@0: if (m_OutReal != NULL) d@0: { d@0: delete[] m_OutReal; d@0: m_OutReal = nullptr; d@0: } d@0: d@0: if (m_OutImag != NULL) d@0: { d@0: delete[] m_OutImag; d@0: m_OutImag = nullptr; d@0: } d@0: d@0: if (m_fft != NULL) d@0: { d@0: delete m_fft; d@0: m_fft = nullptr; d@0: } d@0: d@0: } d@0: d@0: //============================================================================== init d@0: void AudioSourceFeatureExtractor::Initialise(float fSampleRate) d@0: { d@0: m_fSampleRate = fSampleRate; d@0: m_iWriteIdx = 0; d@0: d@0: for (int i = 1; i <= MAGNITUDESIZE; i++) d@0: { d@0: float win = ((fSampleRate / (2 * MAGNITUDESIZE)) * i); d@0: m_fFreqBins.push_back(win); d@0: } d@0: d@0: m_MFCC.initMFFCvariables(12, MAGNITUDESIZE, m_fSampleRate); d@0: d@0: d@0: } d@0: d@0: //============================================================================== process d@0: std::vector AudioSourceFeatureExtractor::Process( const float* data, size_t numSamples) d@0: { d@0: std::vector observations = std::vector(); d@0: d@0: memset(m_fInputBuffer, 0, FFTSIZE * sizeof(float)); d@0: memset(m_fMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float)); d@0: memset(m_fPreviousMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float)); d@0: m_iWriteIdx = 0; d@0: d@0: float perdiodicity = EstimatePerdiodicity(const_cast(data), numSamples); d@0: d@0: float entropyofenergy = EntropyOfEnergy(const_cast(data), numSamples); d@0: d@0: for(size_t n = 0; n < numSamples; n++) d@0: { d@0: m_fInputBuffer[m_iWriteIdx] = data[n]; d@0: m_iWriteIdx++; d@0: d@0: //Do normal FFT d@0: d@0: if(FFTSIZE == m_iWriteIdx) d@0: { d@0: //Get the peak amplitude rms and crest factor for the current frame d@0: float peakvalue = 0; d@0: float rms = 0; d@0: float crestfactor = 0; d@0: d@0: for(int i=0; i < FFTSIZE; i++) d@0: { d@0: if(m_fInputBuffer[i] > peakvalue) d@0: peakvalue = abs(m_fInputBuffer[i]); d@0: d@0: rms += (m_fInputBuffer[i] * m_fInputBuffer[i]); d@0: } d@0: d@0: rms /= FFTSIZE; d@0: rms = sqrt(rms); d@0: d@0: crestfactor = peakvalue/(rms + std::numeric_limits::epsilon()); d@0: d@0: float ZCR = 0; d@0: float inputBuffer2[FFTSIZE] = {0.0}; d@0: d@0: for (int i = 1; i < FFTSIZE; i++) d@0: { d@0: inputBuffer2[i] = m_fInputBuffer[i -1]; d@0: } d@0: d@0: for (int i = 0; i < FFTSIZE; i++) d@0: { d@0: ZCR += (abs(Sign(m_fInputBuffer[i]) - Sign(inputBuffer2[i]))); d@0: } d@0: d@0: ZCR /= (2 * FFTSIZE); d@0: d@0: ////////////////////////////////////////////////////////////////// d@0: for (int i = 0; i < FFTSIZE; i++) d@0: { d@0: float multiplier = (float) (0.5 * (1 - cos(2*PI*i/(FFTSIZE -1)))); d@0: m_fInputBuffer[i] = multiplier * m_fInputBuffer[i]; d@0: } d@0: d@0: //Compute the FFT d@0: d@0: memset(m_OutReal, 0, sizeof(float)*FFTSIZE); d@0: memset(m_OutImag, 0, sizeof(float)*FFTSIZE); d@0: d@0: m_fft->process(m_fInputBuffer, m_OutReal, m_OutImag); d@0: d@0: //Compute the Magnitude d@0: VectorDistance(m_OutReal, 1, m_OutImag, 1, m_fMagnitudeSpectrum, 1, MAGNITUDESIZE); d@0: d@0: //Compute the spectral features d@0: float centroid = 0; d@0: float spread = 0; d@0: float skewness = 0; d@0: float kurtosis = 0; d@0: float brightness = 0; d@0: float rolloff85 = 0; d@0: float rolloff95 = 0; d@0: float spectralentropy = 0; d@0: float flatness = 0; d@0: float spectralcf = 0; d@0: float spectralflux = 0; d@0: std::vector mfccs; d@0: d@0: SpectralFeatures(m_fMagnitudeSpectrum, m_fPreviousMagnitudeSpectrum, MAGNITUDESIZE, centroid, spread, skewness, kurtosis, brightness, rolloff85, rolloff95, spectralentropy, flatness, spectralcf, spectralflux); d@0: d@0: m_MFCC.ComputeMFCC(m_fMagnitudeSpectrum, mfccs); d@0: d@0: //Update for Spectral Flux d@0: for(int i = 0; i < MAGNITUDESIZE; i++) d@0: { d@0: m_fPreviousMagnitudeSpectrum[i] = m_fMagnitudeSpectrum[i]; d@0: } d@0: d@0: //50% Hop size d@0: for(int i = MAGNITUDESIZE; i < FFTSIZE; i++) d@0: { d@0: m_fInputBuffer[i - MAGNITUDESIZE] = m_fInputBuffer[i]; d@0: m_iWriteIdx = MAGNITUDESIZE; d@0: } d@0: d@0: //create an observation for this window and push it to the list of observations for this 30 sec. audio clip d@0: ObservationData newObservation = ObservationData(rms, peakvalue, crestfactor, ZCR, centroid, spread, skewness, kurtosis, brightness, rolloff85, rolloff95, spectralentropy, flatness, spectralcf, spectralflux, mfccs, perdiodicity, entropyofenergy); d@0: observations.push_back(newObservation); d@0: d@0: } d@0: } d@0: d@0: return observations; d@0: } d@0: d@0: void AudioSourceFeatureExtractor::Finalize() d@0: { d@0: m_MFCC.freeMFCCmemory(); d@0: } d@0: d@0: void AudioSourceFeatureExtractor::VectorDistance(const float* vIn1, int stride1, const float* vIn2, int stride2, float* &vOut, int strideOut, size_t nElements) d@0: { d@0: //Compute remaining samples d@0: for (size_t i = 0; i < nElements; i++) d@0: { d@0: vOut[i* strideOut] = sqrt(vIn1[i*stride1] * vIn1[i*stride1] + vIn2[i*stride2] * vIn2[i*stride2]); d@0: } d@0: } d@0: d@0: void AudioSourceFeatureExtractor::SpectralFeatures(float* &magnitude, float* &prevmagnitude, size_t windowSize, float ¢roid, float &spread, float &skew, float &kurtosis, float &brightness, float &rolloff95, float &rolloff85, float &spectralentropy, float &flatness, float &spectralcf, float &spectralflux) d@0: { d@0: float summagbin = 0; d@0: float summag = 0; d@0: float sumprevmag = 0; d@0: float summagspread = 0; d@0: float summagskewness = 0; d@0: float summagkurtosis = 0; d@0: float sumbrightness = 0; d@0: float maxmag = 0; d@0: d@0: float geo = 1; d@0: float totalenergy = 0; d@0: float currEnergy95 = 0; d@0: float currEnergy85 = 0; d@0: int countFFT85 = 0; d@0: int countFFT95 = 0; d@0: d@0: float normalisedMagSumEntropy = 0; d@0: d@0: //Centroid d@0: ////////////////////////////////////////////////////// d@0: d@0: for (size_t i = 0; i < windowSize; i++) d@0: { d@0: //Moments d@0: summagbin += (m_fFreqBins[i] * magnitude[i]); d@0: summag += magnitude[i]; d@0: sumprevmag += prevmagnitude[i]; d@0: d@0: //Spectral Crest factor d@0: if(magnitude[i] > maxmag) d@0: { d@0: maxmag = magnitude[i]; d@0: } d@0: d@0: float energy = magnitude[i] * magnitude[i]; d@0: d@0: //Roll off + Flatness d@0: d@0: totalenergy += energy; d@0: float exponent = (float)(1.0/(float)windowSize); d@0: d@0: if(energy != 0) d@0: { d@0: geo *= pow(energy, exponent); d@0: } d@0: } d@0: d@0: summag += std::numeric_limits::epsilon(); d@0: d@0: centroid = summagbin / summag; d@0: d@0: ///////////////////////////////////////////////////////// d@0: d@0: //Spread d@0: ////////////////////////////////////////////////////// d@0: d@0: for (size_t i = 0; i < windowSize; i++) d@0: { d@0: float norm = magnitude[i] / (summag + std::numeric_limits::epsilon()); d@0: float normprev = prevmagnitude[i] / (sumprevmag + std::numeric_limits::epsilon()); d@0: d@0: //Spectral Flux d@0: spectralflux += ((norm - normprev) * (norm - normprev)); d@0: d@0: //Entropy d@0: normalisedMagSumEntropy += (norm * log(norm + std::numeric_limits::epsilon())); d@0: d@0: //Moments d@0: summagspread += pow((m_fFreqBins[i] - centroid), 2) * norm; d@0: summagskewness += pow((m_fFreqBins[i] - centroid), 3) * norm; d@0: summagkurtosis += pow((m_fFreqBins[i] - centroid), 4) * norm; d@0: d@0: //Brightness d@0: if (m_fFreqBins[i] >= 1500) d@0: { d@0: sumbrightness += magnitude[i]; d@0: } d@0: } d@0: d@0: spread = sqrt(summagspread); d@0: d@0: ///////////////////////////////////////////////////////// d@0: d@0: //Skewness d@0: ////////////////////////////////////////////////////// d@0: d@0: float spreadtemp = spread; d@0: spreadtemp += std::numeric_limits::epsilon(); d@0: d@0: skew = summagskewness / pow(spreadtemp, 3); d@0: d@0: ///////////////////////////////////////////////////////// d@0: d@0: //Kurtosis d@0: ////////////////////////////////////////////////////// d@0: d@0: kurtosis = summagkurtosis / pow(spreadtemp, 4); d@0: d@0: ///////////////////////////////////////////////////////// d@0: d@0: //Brightness d@0: ////////////////////////////////////////////////////// d@0: d@0: brightness = sumbrightness / summag; d@0: d@0: ///////////////////////////////////////////////////////// d@0: d@0: //Roll off 85% - 95% d@0: ///////////////////////////////////////////////////////// d@0: d@0: while ((currEnergy95 <= 0.95 * totalenergy) && (countFFT95 < (int)windowSize)) d@0: { d@0: currEnergy95 += (magnitude[countFFT95] * magnitude[countFFT95]); d@0: countFFT95 += 1; d@0: d@0: if (currEnergy85 <= 0.85 * totalenergy) d@0: { d@0: currEnergy85 += (magnitude[countFFT85] * magnitude[countFFT85]); d@0: countFFT85 += 1; d@0: } d@0: d@0: } d@0: d@0: countFFT85 -= 1; d@0: countFFT95 -= 1; d@0: d@0: //Normalization d@0: if (currEnergy85 <= 0) d@0: { d@0: //If we have silence then our value should be 0. d@0: rolloff85 = 0; d@0: } d@0: else d@0: { d@0: rolloff85 = (((float)countFFT85) / (float)windowSize); d@0: } d@0: d@0: //Normalization d@0: if (currEnergy95 <= 0) d@0: { d@0: //If we have silence then our value should be 0. d@0: rolloff95 = 0; d@0: } d@0: else d@0: { d@0: rolloff95 = (((float)countFFT95) / (float)windowSize); d@0: } d@0: d@0: ///////////////////////////////////////////////////// d@0: d@0: //Spectral Entropy d@0: ///////////////////////////////////////////////////////// d@0: d@0: spectralentropy = (float) ((normalisedMagSumEntropy / log((float)windowSize)) * - 1); d@0: d@0: ///////////////////////////////////////////////////////// d@0: d@0: ///////////////////////////////////////////////////// d@0: d@0: //Flatness d@0: ///////////////////////////////////////////////////////// d@0: if(totalenergy != 0 && geo != 0) d@0: { d@0: flatness = geo / (totalenergy / (float)windowSize); d@0: } d@0: else d@0: { d@0: flatness = 0; d@0: } d@0: d@0: ///////////////////////////////////////////////////////// d@0: d@0: //Spectral Crest Factor d@0: ///////////////////////////////////////////////////////// d@0: d@0: if(summag != 0) d@0: { d@0: spectralcf = maxmag / summag; d@0: } d@0: else d@0: { d@0: spectralcf = 0; d@0: } d@0: } d@0: d@0: void AudioSourceFeatureExtractor::ConvolveFunction(float* &z, float *x, float *y, size_t &lenz, size_t lenx, size_t leny) d@0: { d@0: // computes cross-correlation of two vectors, takes 2 vectors of the same length and computes 2*length-1 lags d@0: d@0: float *zptr,s,*xp,*yp; d@0: size_t n_lo, n_hi; d@0: d@0: lenz = lenx + leny - 1; d@0: d@0: z = new float[lenz]; d@0: d@0: zptr = z; d@0: d@0: for (size_t i = 0; i < lenz; i++) d@0: { d@0: s = 0.0; d@0: n_lo = 0 > (i - leny + 1 ) ? 0 : (i - leny + 1); d@0: n_hi = (lenx - 1) < i ? (lenx - 1): i; d@0: xp = &x[0] + n_lo; d@0: yp = &y[0] + i - n_lo; d@0: d@0: for (size_t n = n_lo; n <= n_hi; n++) d@0: { d@0: s += *xp * *yp; d@0: xp++; d@0: yp--; d@0: } d@0: d@0: *zptr=s; d@0: zptr++; d@0: } d@0: } d@0: d@0: void AudioSourceFeatureExtractor::XCorr(float *&output, float* input1, float* input2, size_t &outputlen, size_t inputlen, size_t maxLag = 0) d@0: { d@0: // TODO: adapt for different sizes d@0: d@0: // find next power of 2 for efficient FFT d@0: size_t fftSize = 1; d@0: while (fftSize < 2*static_cast(inputlen)-1) d@0: { d@0: fftSize *= 2; d@0: } d@0: d@0: FFTW fft1 = FFTW(fftSize); d@0: FFTW fft2 = FFTW(fftSize); d@0: d@0: // allocate space for FFTW processing d@0: float *in1 = new float[fftSize]; d@0: float *in2 = new float[fftSize]; d@0: d@0: // output from FFT is only up to Nyquist frequency d@0: float *out1Real = new float[fftSize]; d@0: float *out1Imag = new float[fftSize]; d@0: float *out2Real = new float[fftSize]; d@0: float *out2Imag = new float[fftSize]; d@0: d@0: memset(in1, 0, sizeof(float)*fftSize); d@0: memset(in2, 0, sizeof(float)*fftSize); d@0: memset(out1Real, 0, sizeof(float)*fftSize); d@0: memset(out1Imag, 0, sizeof(float)*fftSize); d@0: memset(out2Real, 0, sizeof(float)*fftSize); d@0: memset(out2Imag, 0, sizeof(float)*fftSize); d@0: d@0: // copy input to allocated arrays d@0: for (size_t i = 0; i < static_cast(inputlen); ++i) d@0: { d@0: in1[i] = input1[i]; d@0: in2[i] = input2[i]; d@0: } d@0: d@0: fft1.process(in1, out1Real, out1Imag); d@0: fft2.process(in2, out2Real, out2Imag); d@0: d@0: // multiply out1*conj(out2) in FFT domain d@0: fftwf_complex *product = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*fftSize); d@0: for (size_t i = 0; i < fftSize; ++i) d@0: { d@0: product[i][0] = (out1Real[i]*out2Real[i] + out1Imag[i]*out2Imag[i]); d@0: product[i][1] = (out1Imag[i]*out2Real[i] - out1Real[i]*out2Imag[i]); d@0: } d@0: d@0: // compute IFFT d@0: d@0: // do FFT, adapted from iTraktorLib "FFTW.h" d@0: fftw_iodim dim; d@0: dim.n = fftSize; d@0: dim.is = 1; d@0: dim.os = 1; d@0: d@0: fftwf_plan plan; d@0: d@0: float *result = (float*) fftwf_malloc(sizeof(float)*fftSize); d@0: plan = fftwf_plan_guru_dft_c2r(1, &dim, 0, NULL, product, result, FFTW_ESTIMATE); d@0: fftwf_execute_dft_c2r(plan, product, result); d@0: fftwf_destroy_plan(plan); d@0: d@0: // copy real part of result back to output array, normalize by FFT size d@0: if (maxLag == 0 || maxLag >= static_cast(inputlen)) d@0: { d@0: maxLag = static_cast(inputlen)-1; d@0: } d@0: d@0: output = new float[2 * maxLag + 1]; d@0: outputlen = 2 * maxLag + 1; d@0: d@0: for (size_t i = fftSize - maxLag; i < fftSize; ++i) d@0: { d@0: output[i - fftSize + maxLag] = result[i] / fftSize; d@0: } d@0: d@0: for (unsigned i = 0; i <= (unsigned)maxLag; ++i) d@0: { d@0: output[i + maxLag] = result[i] / fftSize; d@0: } d@0: d@0: fftwf_free(result); d@0: fftwf_free(product); d@0: d@0: if(in1 != NULL) d@0: { d@0: delete[] in1; d@0: in1 = nullptr; d@0: } d@0: d@0: if(in2 != NULL) d@0: { d@0: delete[] in2; d@0: in2 = nullptr; d@0: } d@0: d@0: if(out1Real != NULL) d@0: { d@0: delete[] out1Real; d@0: out1Real= nullptr; d@0: } d@0: d@0: if(out1Imag != NULL) d@0: { d@0: delete[] out1Imag; d@0: out1Imag= nullptr; d@0: } d@0: d@0: if(out2Real != NULL) d@0: { d@0: delete[] out2Real; d@0: out2Real= nullptr; d@0: } d@0: d@0: if(out2Imag != NULL) d@0: { d@0: delete[] out2Imag; d@0: out2Imag= nullptr; d@0: } d@0: } d@0: d@0: float AudioSourceFeatureExtractor::EstimatePerdiodicity(float* data, size_t numSamples) d@0: { d@0: float periodicity = 0.0; d@0: float* downsampleddata = new float[numSamples]; d@0: memset(downsampleddata, 0, sizeof(float)*numSamples); d@0: float* xcorr = nullptr; d@0: d@0: for(size_t k = 0; k < numSamples; k++) d@0: { d@0: downsampleddata[k] = data[k]; d@0: } d@0: d@0: //DownSampler(data, downsampleddata, numSamples, downsamplelength, m_fSampleRate, 11025.0); d@0: d@0: EnvelopeCurve(downsampleddata, downsampleddata, numSamples, m_fSampleRate); d@0: d@0: int startLag = (int)floor((float)(60.0 / 480.0 * m_fSampleRate)); d@0: int endLag = (int)floor((float)(60.0 / 30.0 * m_fSampleRate)); d@0: int thresh = (int)floor((float)(.2 * m_fSampleRate)); d@0: d@0: //Normalise d@0: //Normalise(downsampleddata, downsampleddata, downsamplelength); d@0: d@0: size_t xcorrlen = 0; d@0: XCorr(xcorr, downsampleddata, downsampleddata, xcorrlen, numSamples, endLag); d@0: d@0: int i = (int)floor(xcorrlen / 2); d@0: d@0: std::vector temp; d@0: for(size_t j = i; j < xcorrlen; j++) d@0: { d@0: temp.push_back(xcorr[j]); d@0: } d@0: d@0: std::vector temp2; d@0: for(size_t j = (size_t)max(1,startLag); j < (size_t)min(endLag,temp.size()); j++) d@0: { d@0: temp2.push_back(temp[j]); d@0: } d@0: d@0: int maxIdx = 0; d@0: float maxVal = std::numeric_limits::lowest(); d@0: for(size_t j =0; j < temp2.size(); j++) d@0: { d@0: if(temp2[j] > maxVal) d@0: { d@0: maxIdx = j; d@0: maxVal = temp2[j]; d@0: } d@0: } d@0: d@0: int from = max(1, maxIdx-thresh); d@0: int to = min(temp2.size(), maxIdx+thresh); d@0: d@0: float minValInRange = FLT_MAX; d@0: d@0: for(size_t j = (size_t)from; j < to; j++) d@0: { d@0: if(temp2[j] < minValInRange) d@0: { d@0: minValInRange = temp2[j]; d@0: } d@0: } d@0: d@0: periodicity = (maxVal-minValInRange); d@0: d@0: std::string dave = "Periodicity " + std::to_string(periodicity) + "\n"; d@0: OutputDebugString(dave.c_str()); d@0: d@0: if(xcorr != NULL) d@0: { d@0: delete[] xcorr; d@0: xcorr = nullptr; d@0: } d@0: d@0: if(downsampleddata != NULL) d@0: { d@0: delete[] downsampleddata; d@0: downsampleddata = nullptr; d@0: } d@0: d@0: return periodicity; d@0: } d@0: d@0: void AudioSourceFeatureExtractor::DownSampler(float* data, float* &out, size_t lenIn, size_t &lenOut, float currentSampleRate, float futureSampleRate) d@0: { d@0: ////Low pass our data before we decimate d@0: //const int filterlength = 101; d@0: //float* tempdata = new float[lenIn]; d@0: //memset(tempdata, 0, sizeof(float)*lenIn); d@0: d@0: ////Coefficients were taken from Matlab d@0: //float coeffs[filterlength] ={2.0839274e-05f, 6.6880953e-06f,-4.7252855e-05f, -3.4874138e-05f, 7.8508412e-05f,9.7089069e-05f,-9.9197161e-05f, -0.00020412984f, 8.2261082e-05f, 0.00035689832f, 9.4890293e-06f, -0.00053820928f, -0.00021722882f, 0.00070567406f, 0.00057491515f, -0.00078844035f, -0.0010939316f, 0.00069057313f, 0.0017460365f, -0.00030308162f, -0.0024484061f, -0.00047496572f, 0.0030552838f, 0.0017078921f, -0.0033604759f, -0.0033912712f, 0.0031135236f, 0.0054217125f, -0.0020499325f, -0.0075746416f, -6.7239242e-05f, 0.0094950572f, 0.0034002098f, -0.010703080f, -0.0079918290f, 0.010610434f, 0.013732497f, -0.0085344436f, -0.020346783f, 0.0036776769f, 0.027405130f, 0.0050050775f, -0.034361813f, -0.019287759f, 0.040615436f, 0.043452863f,-0.045583628f, -0.093336113f, 0.048780452f, 0.31394306f, 0.45011634f, 0.31394306f, 0.048780452f,-0.093336113f,-0.045583628f, 0.043452863f,0.040615436f,-0.019287759f, -0.034361813f, 0.0050050775f, 0.027405130f,0.0036776769f,-0.020346783f, -0.0085344436f, 0.013732497f, 0.010610434f, -0.0079918290f, -0.010703080f, 0.0034002098f, 0.0094950572f, -6.7239242e-05f, -0.0075746416f, -0.0020499325f, 0.0054217125f, 0.0031135236f, -0.0033912712f, -0.0033604759f, 0.0017078921f, 0.0030552838f, -0.00047496572f, -0.0024484061f, -0.00030308162f, 0.0017460365f,0.00069057313f, -0.0010939316f, -0.00078844035f, 0.00057491515f, 0.00070567406f, -0.00021722882f, -0.00053820928f, 9.4890293e-06f, 0.00035689832f, 8.2261082e-05f, -0.00020412984f, -9.9197161e-05f, 9.7089069e-05f, 7.8508412e-05f, -3.4874138e-05f, -4.7252855e-05f, 6.6880953e-06f, 2.0839274e-05f}; d@0: // d@0: //float acc; // accumulator for MACs d@0: // float* coeffp; // pointer to coefficients d@0: // float* inputp; // pointer to input samples d@0: //float* endp = &data[lenIn -1]; // pointer to last sample d@0: // d@0: // d@0: // // apply the filter to each input sample d@0: // for (size_t n = 0; n < lenIn; n++ ) d@0: //{ d@0: // // calculate output n d@0: // coeffp = &coeffs[0]; d@0: // inputp = &data[filterlength - 1 + n]; d@0: d@0: // acc = 0.0f; d@0: // for (int k = 0; k < filterlength; k++ ) d@0: // { d@0: // if(inputp <= endp) d@0: // acc += (*coeffp++) * (*inputp--); d@0: // else d@0: // { d@0: // //When we reach the end of the buffer d@0: // acc += (*coeffp++) * 0.0f; d@0: // *inputp--; d@0: // } d@0: // } d@0: d@0: // tempdata[n] = acc; d@0: // } d@0: //int ratio = (int) (currentSampleRate / futureSampleRate); d@0: // d@0: //lenOut = lenIn / ratio; d@0: d@0: //out = new float[lenOut]; d@0: //memset(out, 0, sizeof(float)*lenOut); d@0: d@0: d@0: //int idx = 0; d@0: //for(size_t i = 0; i < lenIn; i = i + ratio) d@0: //{ d@0: // out[idx] = tempdata[i]; d@0: // idx++; d@0: //} d@0: d@0: //if(tempdata != NULL) d@0: //{ d@0: // delete[] tempdata; d@0: // tempdata = nullptr; d@0: //} d@0: } d@0: d@0: void AudioSourceFeatureExtractor::EnvelopeCurve(float* data, float* &out, size_t dataLen, float sampleRate) d@0: { d@0: float release = 0.02f; d@0: float envelope = 0.0f; d@0: d@0: float gr = exp(-1 / (sampleRate * release)); d@0: d@0: for (size_t i = 0; i < dataLen; i++) d@0: { d@0: float EnvIn = abs(data[i]); d@0: d@0: if(envelope < EnvIn) d@0: { d@0: envelope = EnvIn; d@0: } d@0: else d@0: { d@0: envelope = envelope * gr; d@0: envelope = envelope + (1 - gr) * EnvIn; d@0: } d@0: d@0: out[i] = envelope; d@0: } d@0: } d@0: d@0: void AudioSourceFeatureExtractor::Normalise(float* data, float* &out, size_t len) d@0: { d@0: float maxvalue = std::numeric_limits::lowest(); d@0: d@0: for(size_t i = 0; i < len; i++) d@0: { d@0: if(data[i] > maxvalue) d@0: maxvalue = data[i]; d@0: } d@0: d@0: for(size_t i = 0; i < len; i++) d@0: { d@0: out[i] = data[i] / maxvalue; d@0: } d@0: } d@0: d@0: float AudioSourceFeatureExtractor::EntropyOfEnergy(float* data, size_t len) d@0: { d@0: float totalEnergy = 0.0; d@0: float totalentropy = 0.0; d@0: const int numSubFrames = 30; d@0: d@0: //Calculate Total Energy and normalise it d@0: for (size_t i =0; i < len; i++) d@0: { d@0: totalEnergy += (data[i] * data[i]); d@0: } d@0: d@0: int subFrameSize = len / numSubFrames; d@0: d@0: for (size_t i = 0; i < numSubFrames; i++) d@0: { d@0: float val = 0.0; d@0: d@0: for (size_t j = 0; j < (size_t)subFrameSize; j++) d@0: { d@0: val += (data[j + (i * subFrameSize)] * data[j + (i * subFrameSize)]); d@0: } d@0: d@0: float frameval = val / (totalEnergy + std::numeric_limits::epsilon()); d@0: d@0: totalentropy += (frameval * Log2(frameval + std::numeric_limits::epsilon())); d@0: } d@0: d@0: return (totalentropy * -1); d@0: } d@0: d@0: void AudioSourceFeatureExtractor::PDF_getResampleKrnl(std::vector freqVec, float fsProc, int nfft, int nBin, std::vector &outLogFreqVec, std::vector> &outKrnl) d@0: { d@0: float f1 = freqVec[0]; d@0: float f2 = freqVec[freqVec.size() - 1]; d@0: d@0: outLogFreqVec = LinSpace(log(f1), log(f2), nBin); d@0: std::vector f(nfft / 2, 0.0f); d@0: d@0: for (size_t i = 0; i < (size_t)nBin; i++) d@0: { d@0: outKrnl.push_back(f); d@0: } d@0: d@0: for (size_t i = 0; i < outLogFreqVec.size(); i++) d@0: { d@0: outLogFreqVec[i] = exp(outLogFreqVec[i]); d@0: } d@0: d@0: for (size_t i = 1; i < (size_t)nBin - 2; i++) d@0: { d@0: float freqBinLin = outLogFreqVec[i] / (fsProc / nfft); d@0: d@0: float nextBinUp = outLogFreqVec[i + 1] / (fsProc / nfft); d@0: d@0: float nextBinDown = outLogFreqVec[i - 1] / (fsProc / nfft); d@0: d@0: float filtWid = nextBinUp - nextBinDown; d@0: d@0: if (filtWid <= 2) d@0: { d@0: // log resolution is finer than linear resolution d@0: // linear interpolation of neighboring bins d@0: d@0: float binDown = floor(freqBinLin); d@0: float frac = freqBinLin - binDown; d@0: std::vector filt(nfft/2, 0.0f); d@0: filt[((int)binDown) - 1] = 1 - frac; d@0: filt[((int)binDown + 1) - 1] = frac; d@0: outKrnl[i][((int)binDown) - 1] = filt[((int)binDown) - 1]; d@0: outKrnl[i][((int)binDown + 1) - 1] = filt[((int)binDown + 1) - 1]; d@0: d@0: } d@0: else d@0: { d@0: float ceilNextBinDown = ceil(nextBinDown); d@0: float floorFreqBinLin = floor(freqBinLin); d@0: float ceilFreqBinLin = ceil(freqBinLin); d@0: float floorNextBinUp = floor(nextBinUp); d@0: d@0: std::vector idxLow; d@0: std::vector idxHigh; d@0: std::vector filtIdx; d@0: d@0: for (size_t j = (size_t)ceilNextBinDown; j <= (size_t)floorFreqBinLin; j++ ) d@0: { d@0: idxLow.push_back(j); d@0: } d@0: d@0: for (size_t j = (size_t)ceilFreqBinLin; j <= (size_t)floorNextBinUp; j++ ) d@0: { d@0: idxHigh.push_back(j); d@0: } d@0: d@0: std::vector::iterator it; d@0: it = std::unique (idxLow.begin(), idxLow.end()); d@0: idxLow.resize(std::distance(idxLow.begin(), it)); d@0: d@0: it = std::unique (idxHigh.begin(), idxHigh.end()); d@0: idxHigh.resize(std::distance(idxHigh.begin(), it)); d@0: d@0: filtIdx.reserve(idxLow.size() + idxHigh.size()); // preallocate memory d@0: filtIdx.insert(filtIdx.end(), idxLow.begin(), idxLow.end()); d@0: filtIdx.insert(filtIdx.end(), idxHigh.begin(), idxHigh.end()); d@0: d@0: std::vector rampUp; d@0: std::vector rampDown; d@0: for (size_t j = 0; j < filtIdx.size(); j++) d@0: { d@0: rampUp.push_back(1 - (freqBinLin - filtIdx[j]) / (freqBinLin - nextBinDown)); d@0: rampDown.push_back(1 - (filtIdx[j] - freqBinLin) / (nextBinUp - freqBinLin)); d@0: } d@0: d@0: std::vector filt; d@0: d@0: for (size_t j = 0; j < rampUp.size(); j++) d@0: { d@0: filt.push_back(min(rampUp[j], rampDown[j])); d@0: } d@0: d@0: float sumfilt = 0.0f; d@0: for (size_t j = 0; j < filt.size(); j++) d@0: { d@0: sumfilt += filt[j]; d@0: } d@0: d@0: for (size_t j = 0; j < filt.size(); j++) d@0: { d@0: filt[j] /= sumfilt; d@0: } d@0: d@0: for (size_t j = 0; j < filtIdx.size(); j++) d@0: { d@0: outKrnl[i][filtIdx[j] - 1] = filt[j]; d@0: } d@0: } d@0: } d@0: d@0: // special routine for first bin d@0: // get frequency in linear bins d@0: d@0: float freqBinLin = outLogFreqVec[0] / (fsProc/(float)nfft); d@0: float binDown = floor(freqBinLin); d@0: float frac = freqBinLin - binDown; d@0: d@0: std::vector filt(nfft/2, 0.0f); d@0: filt[((int)binDown) - 1] = 1 - frac; d@0: filt[((int)binDown + 1) - 1] = frac; d@0: d@0: outKrnl[0][((int)binDown) - 1] = filt[((int)binDown) - 1]; d@0: outKrnl[0][((int)binDown + 1) - 1] = filt[((int)binDown + 1) - 1]; d@0: d@0: // special routine for last bin d@0: // get frequency in linear bins d@0: freqBinLin = outLogFreqVec[outLogFreqVec.size() - 1] / (fsProc/nfft); d@0: d@0: // get next lower bin d@0: float nextBinDown = outLogFreqVec[outLogFreqVec.size() - 2] / (fsProc/nfft); d@0: d@0: float ceilNextBinDown = ceil(nextBinDown); //Subtract by -1 because of array d@0: float floorFreqBinLin = floor(freqBinLin); d@0: d@0: std::vector filtIdx; d@0: d@0: for (size_t i = (size_t)ceilNextBinDown; i <= (size_t)floorFreqBinLin; i++ ) d@0: { d@0: filtIdx.push_back(i); d@0: } d@0: d@0: std::vector filt2; d@0: for (size_t i = 0; i < filtIdx.size(); i++) d@0: { d@0: filt2.push_back(1 - (freqBinLin - filtIdx[i]) / (freqBinLin - nextBinDown)); d@0: } d@0: d@0: float sumfilt = 0.0f; d@0: for (size_t i = 0; i < filt2.size(); i++) d@0: { d@0: sumfilt += filt2[i]; d@0: } d@0: d@0: for (size_t i = 0; i < filt2.size(); i++) d@0: { d@0: filt2[i] /= sumfilt; d@0: } d@0: d@0: for (size_t j = 0; j < filtIdx.size(); j++) d@0: { d@0: outKrnl[outKrnl.size() - 1][filtIdx[j] - 1] = filt2[j]; d@0: } d@0: } d@0: d@0: float AudioSourceFeatureExtractor::Log2(float n) d@0: { d@0: // log(n)/log(2) is log2. d@0: return logf( n ) / logf( 2 ); d@0: } d@0: d@0: int AudioSourceFeatureExtractor::Sign(float x) d@0: { d@0: int retValue = 0; d@0: if (x >= 0) d@0: retValue = 1; d@0: if (x < 0) d@0: retValue = -1; d@0: d@0: return retValue; d@0: } d@0: d@0: std::vector AudioSourceFeatureExtractor::LinSpace(float min, float max, int n) d@0: { d@0: std::vector result; d@0: // vector iterator d@0: int iterator = 0; d@0: d@0: for (int i = 0; i <= n-2; i++) d@0: { d@0: float temp = min + i*(max-min)/(floor((float)n) - 1); d@0: result.insert(result.begin() + iterator, temp); d@0: iterator += 1; d@0: } d@0: d@0: result.insert(result.begin() + iterator, max); d@0: return result; d@0: } d@0: d@0: std::vector AudioSourceFeatureExtractor::LogSpace(float min, float max, int n) d@0: { d@0: float logMin = log(min); d@0: float logMax = log(max); d@0: double delta = (logMax - logMin) / n; d@0: double accDelta = 0; d@0: std::vector v; d@0: d@0: for (int i = 0; i <= n; i++) d@0: { d@0: v.push_back((float) exp(logMin + accDelta)); d@0: accDelta += delta;// accDelta = delta * i d@0: } d@0: d@0: return v; d@0: } d@0: