batch-feature-extraction-tool: Source/.svn/text-base/AudioSourceFeatureExtractor.cpp.svn-base annotate

annotate Source/.svn/text-base/AudioSourceFeatureExtractor.cpp.svn-base @ 15:585caf503ef5 tip

Tidy up for ROLI

author	Geogaddi\David <d.m.ronan@qmul.ac.uk>
date	Tue, 17 May 2016 18:50:19 +0100
parents	25bf17994ef1
children

rev	line source
d@0	1 /*
d@0	2 ==============================================================================
d@0	3
d@0	4 AudioSourceFeatureExtractor.cpp
d@0	5 Created: 11 Aug 2014 10:41:02am
d@0	6 Author: david.ronan
d@0	7
d@0	8 ==============================================================================
d@0	9 */
d@0	10
d@0	11 #include "AudioSourceFeatureExtractor.h"
d@0	12 #include "ObservationData.h"
d@0	13 #include <math.h>
d@0	14 #include <cmath>
d@0	15 #include <algorithm>
d@0	16 #include <string>
d@0	17 #include <windows.h>
d@0	18
d@0	19 //============================================================================== AudioSourceFeatureExtractor
d@0	20 AudioSourceFeatureExtractor::AudioSourceFeatureExtractor() // m_fft(FFTSIZE)
d@0	21 {
d@0	22 m_fft = new FFTW(FFTSIZE);
d@0	23 m_fInputBuffer = new float[FFTSIZE];
d@0	24 m_fMagnitudeSpectrum = new float[MAGNITUDESIZE];
d@0	25 m_fPreviousMagnitudeSpectrum = new float[MAGNITUDESIZE];
d@0	26 m_OutReal = new float[FFTSIZE];
d@0	27 m_OutImag = new float[FFTSIZE];
d@0	28
d@0	29 memset(m_fInputBuffer, 0, FFTSIZE * sizeof(float));
d@0	30 memset(m_fMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float));
d@0	31 memset(m_fPreviousMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float));
d@0	32
d@0	33
d@0	34 }
d@0	35
d@0	36 //============================================================================== ~AudioSourceFeatureExtractor
d@0	37 AudioSourceFeatureExtractor::~AudioSourceFeatureExtractor()
d@0	38 {
d@0	39 if(m_fInputBuffer != NULL)
d@0	40 {
d@0	41 delete m_fInputBuffer; // memory freed up
d@0	42 m_fInputBuffer = nullptr;
d@0	43 }
d@0	44
d@0	45 if(m_fMagnitudeSpectrum != NULL)
d@0	46 {
d@0	47 delete m_fMagnitudeSpectrum; // memory freed up
d@0	48 m_fMagnitudeSpectrum = nullptr;
d@0	49 }
d@0	50
d@0	51 if(m_fPreviousMagnitudeSpectrum != NULL)
d@0	52 {
d@0	53 delete m_fPreviousMagnitudeSpectrum; // memory freed up
d@0	54 m_fPreviousMagnitudeSpectrum = nullptr;
d@0	55 }
d@0	56
d@0	57 if (m_OutReal != NULL)
d@0	58 {
d@0	59 delete[] m_OutReal;
d@0	60 m_OutReal = nullptr;
d@0	61 }
d@0	62
d@0	63 if (m_OutImag != NULL)
d@0	64 {
d@0	65 delete[] m_OutImag;
d@0	66 m_OutImag = nullptr;
d@0	67 }
d@0	68
d@0	69 if (m_fft != NULL)
d@0	70 {
d@0	71 delete m_fft;
d@0	72 m_fft = nullptr;
d@0	73 }
d@0	74
d@0	75 }
d@0	76
d@0	77 //============================================================================== init
d@0	78 void AudioSourceFeatureExtractor::Initialise(float fSampleRate)
d@0	79 {
d@0	80 m_fSampleRate = fSampleRate;
d@0	81 m_iWriteIdx = 0;
d@0	82
d@0	83 for (int i = 1; i <= MAGNITUDESIZE; i++)
d@0	84 {
d@0	85 float win = ((fSampleRate / (2 * MAGNITUDESIZE)) * i);
d@0	86 m_fFreqBins.push_back(win);
d@0	87 }
d@0	88
d@0	89 m_MFCC.initMFFCvariables(12, MAGNITUDESIZE, m_fSampleRate);
d@0	90
d@0	91
d@0	92 }
d@0	93
d@0	94 //============================================================================== process
d@0	95 std::vector<ObservationData> AudioSourceFeatureExtractor::Process( const float* data, size_t numSamples)
d@0	96 {
d@0	97 std::vector<ObservationData> observations = std::vector<ObservationData>();
d@0	98
d@0	99 memset(m_fInputBuffer, 0, FFTSIZE * sizeof(float));
d@0	100 memset(m_fMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float));
d@0	101 memset(m_fPreviousMagnitudeSpectrum, 0, MAGNITUDESIZE * sizeof(float));
d@0	102 m_iWriteIdx = 0;
d@0	103
d@0	104 float perdiodicity = EstimatePerdiodicity(const_cast<float*>(data), numSamples);
d@0	105
d@0	106 float entropyofenergy = EntropyOfEnergy(const_cast<float*>(data), numSamples);
d@0	107
d@0	108 for(size_t n = 0; n < numSamples; n++)
d@0	109 {
d@0	110 m_fInputBuffer[m_iWriteIdx] = data[n];
d@0	111 m_iWriteIdx++;
d@0	112
d@0	113 //Do normal FFT
d@0	114
d@0	115 if(FFTSIZE == m_iWriteIdx)
d@0	116 {
d@0	117 //Get the peak amplitude rms and crest factor for the current frame
d@0	118 float peakvalue = 0;
d@0	119 float rms = 0;
d@0	120 float crestfactor = 0;
d@0	121
d@0	122 for(int i=0; i < FFTSIZE; i++)
d@0	123 {
d@0	124 if(m_fInputBuffer[i] > peakvalue)
d@0	125 peakvalue = abs(m_fInputBuffer[i]);
d@0	126
d@0	127 rms += (m_fInputBuffer[i] * m_fInputBuffer[i]);
d@0	128 }
d@0	129
d@0	130 rms /= FFTSIZE;
d@0	131 rms = sqrt(rms);
d@0	132
d@0	133 crestfactor = peakvalue/(rms + std::numeric_limits<float>::epsilon());
d@0	134
d@0	135 float ZCR = 0;
d@0	136 float inputBuffer2[FFTSIZE] = {0.0};
d@0	137
d@0	138 for (int i = 1; i < FFTSIZE; i++)
d@0	139 {
d@0	140 inputBuffer2[i] = m_fInputBuffer[i -1];
d@0	141 }
d@0	142
d@0	143 for (int i = 0; i < FFTSIZE; i++)
d@0	144 {
d@0	145 ZCR += (abs(Sign(m_fInputBuffer[i]) - Sign(inputBuffer2[i])));
d@0	146 }
d@0	147
d@0	148 ZCR /= (2 * FFTSIZE);
d@0	149
d@0	150 //////////////////////////////////////////////////////////////////
d@0	151 for (int i = 0; i < FFTSIZE; i++)
d@0	152 {
d@0	153 float multiplier = (float) (0.5 * (1 - cos(2PIi/(FFTSIZE -1))));
d@0	154 m_fInputBuffer[i] = multiplier * m_fInputBuffer[i];
d@0	155 }
d@0	156
d@0	157 //Compute the FFT
d@0	158
d@0	159 memset(m_OutReal, 0, sizeof(float)*FFTSIZE);
d@0	160 memset(m_OutImag, 0, sizeof(float)*FFTSIZE);
d@0	161
d@0	162 m_fft->process(m_fInputBuffer, m_OutReal, m_OutImag);
d@0	163
d@0	164 //Compute the Magnitude
d@0	165 VectorDistance(m_OutReal, 1, m_OutImag, 1, m_fMagnitudeSpectrum, 1, MAGNITUDESIZE);
d@0	166
d@0	167 //Compute the spectral features
d@0	168 float centroid = 0;
d@0	169 float spread = 0;
d@0	170 float skewness = 0;
d@0	171 float kurtosis = 0;
d@0	172 float brightness = 0;
d@0	173 float rolloff85 = 0;
d@0	174 float rolloff95 = 0;
d@0	175 float spectralentropy = 0;
d@0	176 float flatness = 0;
d@0	177 float spectralcf = 0;
d@0	178 float spectralflux = 0;
d@0	179 std::vector<float> mfccs;
d@0	180
d@0	181 SpectralFeatures(m_fMagnitudeSpectrum, m_fPreviousMagnitudeSpectrum, MAGNITUDESIZE, centroid, spread, skewness, kurtosis, brightness, rolloff85, rolloff95, spectralentropy, flatness, spectralcf, spectralflux);
d@0	182
d@0	183 m_MFCC.ComputeMFCC(m_fMagnitudeSpectrum, mfccs);
d@0	184
d@0	185 //Update for Spectral Flux
d@0	186 for(int i = 0; i < MAGNITUDESIZE; i++)
d@0	187 {
d@0	188 m_fPreviousMagnitudeSpectrum[i] = m_fMagnitudeSpectrum[i];
d@0	189 }
d@0	190
d@0	191 //50% Hop size
d@0	192 for(int i = MAGNITUDESIZE; i < FFTSIZE; i++)
d@0	193 {
d@0	194 m_fInputBuffer[i - MAGNITUDESIZE] = m_fInputBuffer[i];
d@0	195 m_iWriteIdx = MAGNITUDESIZE;
d@0	196 }
d@0	197
d@0	198 //create an observation for this window and push it to the list of observations for this 30 sec. audio clip
d@0	199 ObservationData newObservation = ObservationData(rms, peakvalue, crestfactor, ZCR, centroid, spread, skewness, kurtosis, brightness, rolloff85, rolloff95, spectralentropy, flatness, spectralcf, spectralflux, mfccs, perdiodicity, entropyofenergy);
d@0	200 observations.push_back(newObservation);
d@0	201
d@0	202 }
d@0	203 }
d@0	204
d@0	205 return observations;
d@0	206 }
d@0	207
d@0	208 void AudioSourceFeatureExtractor::Finalize()
d@0	209 {
d@0	210 m_MFCC.freeMFCCmemory();
d@0	211 }
d@0	212
d@0	213 void AudioSourceFeatureExtractor::VectorDistance(const float* vIn1, int stride1, const float* vIn2, int stride2, float* &vOut, int strideOut, size_t nElements)
d@0	214 {
d@0	215 //Compute remaining samples
d@0	216 for (size_t i = 0; i < nElements; i++)
d@0	217 {
d@0	218 vOut[i* strideOut] = sqrt(vIn1[istride1] vIn1[istride1] + vIn2[istride2] * vIn2[i*stride2]);
d@0	219 }
d@0	220 }
d@0	221
d@0	222 void AudioSourceFeatureExtractor::SpectralFeatures(float* &magnitude, float* &prevmagnitude, size_t windowSize, float &centroid, float &spread, float &skew, float &kurtosis, float &brightness, float &rolloff95, float &rolloff85, float &spectralentropy, float &flatness, float &spectralcf, float &spectralflux)
d@0	223 {
d@0	224 float summagbin = 0;
d@0	225 float summag = 0;
d@0	226 float sumprevmag = 0;
d@0	227 float summagspread = 0;
d@0	228 float summagskewness = 0;
d@0	229 float summagkurtosis = 0;
d@0	230 float sumbrightness = 0;
d@0	231 float maxmag = 0;
d@0	232
d@0	233 float geo = 1;
d@0	234 float totalenergy = 0;
d@0	235 float currEnergy95 = 0;
d@0	236 float currEnergy85 = 0;
d@0	237 int countFFT85 = 0;
d@0	238 int countFFT95 = 0;
d@0	239
d@0	240 float normalisedMagSumEntropy = 0;
d@0	241
d@0	242 //Centroid
d@0	243 //////////////////////////////////////////////////////
d@0	244
d@0	245 for (size_t i = 0; i < windowSize; i++)
d@0	246 {
d@0	247 //Moments
d@0	248 summagbin += (m_fFreqBins[i] * magnitude[i]);
d@0	249 summag += magnitude[i];
d@0	250 sumprevmag += prevmagnitude[i];
d@0	251
d@0	252 //Spectral Crest factor
d@0	253 if(magnitude[i] > maxmag)
d@0	254 {
d@0	255 maxmag = magnitude[i];
d@0	256 }
d@0	257
d@0	258 float energy = magnitude[i] * magnitude[i];
d@0	259
d@0	260 //Roll off + Flatness
d@0	261
d@0	262 totalenergy += energy;
d@0	263 float exponent = (float)(1.0/(float)windowSize);
d@0	264
d@0	265 if(energy != 0)
d@0	266 {
d@0	267 geo *= pow(energy, exponent);
d@0	268 }
d@0	269 }
d@0	270
d@0	271 summag += std::numeric_limits<float>::epsilon();
d@0	272
d@0	273 centroid = summagbin / summag;
d@0	274
d@0	275 /////////////////////////////////////////////////////////
d@0	276
d@0	277 //Spread
d@0	278 //////////////////////////////////////////////////////
d@0	279
d@0	280 for (size_t i = 0; i < windowSize; i++)
d@0	281 {
d@0	282 float norm = magnitude[i] / (summag + std::numeric_limits<float>::epsilon());
d@0	283 float normprev = prevmagnitude[i] / (sumprevmag + std::numeric_limits<float>::epsilon());
d@0	284
d@0	285 //Spectral Flux
d@0	286 spectralflux += ((norm - normprev) * (norm - normprev));
d@0	287
d@0	288 //Entropy
d@0	289 normalisedMagSumEntropy += (norm * log(norm + std::numeric_limits<float>::epsilon()));
d@0	290
d@0	291 //Moments
d@0	292 summagspread += pow((m_fFreqBins[i] - centroid), 2) * norm;
d@0	293 summagskewness += pow((m_fFreqBins[i] - centroid), 3) * norm;
d@0	294 summagkurtosis += pow((m_fFreqBins[i] - centroid), 4) * norm;
d@0	295
d@0	296 //Brightness
d@0	297 if (m_fFreqBins[i] >= 1500)
d@0	298 {
d@0	299 sumbrightness += magnitude[i];
d@0	300 }
d@0	301 }
d@0	302
d@0	303 spread = sqrt(summagspread);
d@0	304
d@0	305 /////////////////////////////////////////////////////////
d@0	306
d@0	307 //Skewness
d@0	308 //////////////////////////////////////////////////////
d@0	309
d@0	310 float spreadtemp = spread;
d@0	311 spreadtemp += std::numeric_limits<float>::epsilon();
d@0	312
d@0	313 skew = summagskewness / pow(spreadtemp, 3);
d@0	314
d@0	315 /////////////////////////////////////////////////////////
d@0	316
d@0	317 //Kurtosis
d@0	318 //////////////////////////////////////////////////////
d@0	319
d@0	320 kurtosis = summagkurtosis / pow(spreadtemp, 4);
d@0	321
d@0	322 /////////////////////////////////////////////////////////
d@0	323
d@0	324 //Brightness
d@0	325 //////////////////////////////////////////////////////
d@0	326
d@0	327 brightness = sumbrightness / summag;
d@0	328
d@0	329 /////////////////////////////////////////////////////////
d@0	330
d@0	331 //Roll off 85% - 95%
d@0	332 /////////////////////////////////////////////////////////
d@0	333
d@0	334 while ((currEnergy95 <= 0.95 * totalenergy) && (countFFT95 < (int)windowSize))
d@0	335 {
d@0	336 currEnergy95 += (magnitude[countFFT95] * magnitude[countFFT95]);
d@0	337 countFFT95 += 1;
d@0	338
d@0	339 if (currEnergy85 <= 0.85 * totalenergy)
d@0	340 {
d@0	341 currEnergy85 += (magnitude[countFFT85] * magnitude[countFFT85]);
d@0	342 countFFT85 += 1;
d@0	343 }
d@0	344
d@0	345 }
d@0	346
d@0	347 countFFT85 -= 1;
d@0	348 countFFT95 -= 1;
d@0	349
d@0	350 //Normalization
d@0	351 if (currEnergy85 <= 0)
d@0	352 {
d@0	353 //If we have silence then our value should be 0.
d@0	354 rolloff85 = 0;
d@0	355 }
d@0	356 else
d@0	357 {
d@0	358 rolloff85 = (((float)countFFT85) / (float)windowSize);
d@0	359 }
d@0	360
d@0	361 //Normalization
d@0	362 if (currEnergy95 <= 0)
d@0	363 {
d@0	364 //If we have silence then our value should be 0.
d@0	365 rolloff95 = 0;
d@0	366 }
d@0	367 else
d@0	368 {
d@0	369 rolloff95 = (((float)countFFT95) / (float)windowSize);
d@0	370 }
d@0	371
d@0	372 /////////////////////////////////////////////////////
d@0	373
d@0	374 //Spectral Entropy
d@0	375 /////////////////////////////////////////////////////////
d@0	376
d@0	377 spectralentropy = (float) ((normalisedMagSumEntropy / log((float)windowSize)) * - 1);
d@0	378
d@0	379 /////////////////////////////////////////////////////////
d@0	380
d@0	381 /////////////////////////////////////////////////////
d@0	382
d@0	383 //Flatness
d@0	384 /////////////////////////////////////////////////////////
d@0	385 if(totalenergy != 0 && geo != 0)
d@0	386 {
d@0	387 flatness = geo / (totalenergy / (float)windowSize);
d@0	388 }
d@0	389 else
d@0	390 {
d@0	391 flatness = 0;
d@0	392 }
d@0	393
d@0	394 /////////////////////////////////////////////////////////
d@0	395
d@0	396 //Spectral Crest Factor
d@0	397 /////////////////////////////////////////////////////////
d@0	398
d@0	399 if(summag != 0)
d@0	400 {
d@0	401 spectralcf = maxmag / summag;
d@0	402 }
d@0	403 else
d@0	404 {
d@0	405 spectralcf = 0;
d@0	406 }
d@0	407 }
d@0	408
d@0	409 void AudioSourceFeatureExtractor::ConvolveFunction(float* &z, float x, float y, size_t &lenz, size_t lenx, size_t leny)
d@0	410 {
d@0	411 // computes cross-correlation of two vectors, takes 2 vectors of the same length and computes 2*length-1 lags
d@0	412
d@0	413 float zptr,s,xp,*yp;
d@0	414 size_t n_lo, n_hi;
d@0	415
d@0	416 lenz = lenx + leny - 1;
d@0	417
d@0	418 z = new float[lenz];
d@0	419
d@0	420 zptr = z;
d@0	421
d@0	422 for (size_t i = 0; i < lenz; i++)
d@0	423 {
d@0	424 s = 0.0;
d@0	425 n_lo = 0 > (i - leny + 1 ) ? 0 : (i - leny + 1);
d@0	426 n_hi = (lenx - 1) < i ? (lenx - 1): i;
d@0	427 xp = &x[0] + n_lo;
d@0	428 yp = &y[0] + i - n_lo;
d@0	429
d@0	430 for (size_t n = n_lo; n <= n_hi; n++)
d@0	431 {
d@0	432 s += xp *yp;
d@0	433 xp++;
d@0	434 yp--;
d@0	435 }
d@0	436
d@0	437 *zptr=s;
d@0	438 zptr++;
d@0	439 }
d@0	440 }
d@0	441
d@0	442 void AudioSourceFeatureExtractor::XCorr(float &output, float input1, float* input2, size_t &outputlen, size_t inputlen, size_t maxLag = 0)
d@0	443 {
d@0	444 // TODO: adapt for different sizes
d@0	445
d@0	446 // find next power of 2 for efficient FFT
d@0	447 size_t fftSize = 1;
d@0	448 while (fftSize < 2*static_cast<size_t>(inputlen)-1)
d@0	449 {
d@0	450 fftSize *= 2;
d@0	451 }
d@0	452
d@0	453 FFTW fft1 = FFTW(fftSize);
d@0	454 FFTW fft2 = FFTW(fftSize);
d@0	455
d@0	456 // allocate space for FFTW processing
d@0	457 float *in1 = new float[fftSize];
d@0	458 float *in2 = new float[fftSize];
d@0	459
d@0	460 // output from FFT is only up to Nyquist frequency
d@0	461 float *out1Real = new float[fftSize];
d@0	462 float *out1Imag = new float[fftSize];
d@0	463 float *out2Real = new float[fftSize];
d@0	464 float *out2Imag = new float[fftSize];
d@0	465
d@0	466 memset(in1, 0, sizeof(float)*fftSize);
d@0	467 memset(in2, 0, sizeof(float)*fftSize);
d@0	468 memset(out1Real, 0, sizeof(float)*fftSize);
d@0	469 memset(out1Imag, 0, sizeof(float)*fftSize);
d@0	470 memset(out2Real, 0, sizeof(float)*fftSize);
d@0	471 memset(out2Imag, 0, sizeof(float)*fftSize);
d@0	472
d@0	473 // copy input to allocated arrays
d@0	474 for (size_t i = 0; i < static_cast<size_t>(inputlen); ++i)
d@0	475 {
d@0	476 in1[i] = input1[i];
d@0	477 in2[i] = input2[i];
d@0	478 }
d@0	479
d@0	480 fft1.process(in1, out1Real, out1Imag);
d@0	481 fft2.process(in2, out2Real, out2Imag);
d@0	482
d@0	483 // multiply out1*conj(out2) in FFT domain
d@0	484 fftwf_complex product = (fftwf_complex)fftwf_malloc(sizeof(fftwf_complex)*fftSize);
d@0	485 for (size_t i = 0; i < fftSize; ++i)
d@0	486 {
d@0	487 product[i][0] = (out1Real[i]out2Real[i] + out1Imag[i]out2Imag[i]);
d@0	488 product[i][1] = (out1Imag[i]out2Real[i] - out1Real[i]out2Imag[i]);
d@0	489 }
d@0	490
d@0	491 // compute IFFT
d@0	492
d@0	493 // do FFT, adapted from iTraktorLib "FFTW.h"
d@0	494 fftw_iodim dim;
d@0	495 dim.n = fftSize;
d@0	496 dim.is = 1;
d@0	497 dim.os = 1;
d@0	498
d@0	499 fftwf_plan plan;
d@0	500
d@0	501 float result = (float) fftwf_malloc(sizeof(float)*fftSize);
d@0	502 plan = fftwf_plan_guru_dft_c2r(1, &dim, 0, NULL, product, result, FFTW_ESTIMATE);
d@0	503 fftwf_execute_dft_c2r(plan, product, result);
d@0	504 fftwf_destroy_plan(plan);
d@0	505
d@0	506 // copy real part of result back to output array, normalize by FFT size
d@0	507 if (maxLag == 0 \|\| maxLag >= static_cast<size_t>(inputlen))
d@0	508 {
d@0	509 maxLag = static_cast<size_t>(inputlen)-1;
d@0	510 }
d@0	511
d@0	512 output = new float[2 * maxLag + 1];
d@0	513 outputlen = 2 * maxLag + 1;
d@0	514
d@0	515 for (size_t i = fftSize - maxLag; i < fftSize; ++i)
d@0	516 {
d@0	517 output[i - fftSize + maxLag] = result[i] / fftSize;
d@0	518 }
d@0	519
d@0	520 for (unsigned i = 0; i <= (unsigned)maxLag; ++i)
d@0	521 {
d@0	522 output[i + maxLag] = result[i] / fftSize;
d@0	523 }
d@0	524
d@0	525 fftwf_free(result);
d@0	526 fftwf_free(product);
d@0	527
d@0	528 if(in1 != NULL)
d@0	529 {
d@0	530 delete[] in1;
d@0	531 in1 = nullptr;
d@0	532 }
d@0	533
d@0	534 if(in2 != NULL)
d@0	535 {
d@0	536 delete[] in2;
d@0	537 in2 = nullptr;
d@0	538 }
d@0	539
d@0	540 if(out1Real != NULL)
d@0	541 {
d@0	542 delete[] out1Real;
d@0	543 out1Real= nullptr;
d@0	544 }
d@0	545
d@0	546 if(out1Imag != NULL)
d@0	547 {
d@0	548 delete[] out1Imag;
d@0	549 out1Imag= nullptr;
d@0	550 }
d@0	551
d@0	552 if(out2Real != NULL)
d@0	553 {
d@0	554 delete[] out2Real;
d@0	555 out2Real= nullptr;
d@0	556 }
d@0	557
d@0	558 if(out2Imag != NULL)
d@0	559 {
d@0	560 delete[] out2Imag;
d@0	561 out2Imag= nullptr;
d@0	562 }
d@0	563 }
d@0	564
d@0	565 float AudioSourceFeatureExtractor::EstimatePerdiodicity(float* data, size_t numSamples)
d@0	566 {
d@0	567 float periodicity = 0.0;
d@0	568 float* downsampleddata = new float[numSamples];
d@0	569 memset(downsampleddata, 0, sizeof(float)*numSamples);
d@0	570 float* xcorr = nullptr;
d@0	571
d@0	572 for(size_t k = 0; k < numSamples; k++)
d@0	573 {
d@0	574 downsampleddata[k] = data[k];
d@0	575 }
d@0	576
d@0	577 //DownSampler(data, downsampleddata, numSamples, downsamplelength, m_fSampleRate, 11025.0);
d@0	578
d@0	579 EnvelopeCurve(downsampleddata, downsampleddata, numSamples, m_fSampleRate);
d@0	580
d@0	581 int startLag = (int)floor((float)(60.0 / 480.0 * m_fSampleRate));
d@0	582 int endLag = (int)floor((float)(60.0 / 30.0 * m_fSampleRate));
d@0	583 int thresh = (int)floor((float)(.2 * m_fSampleRate));
d@0	584
d@0	585 //Normalise
d@0	586 //Normalise(downsampleddata, downsampleddata, downsamplelength);
d@0	587
d@0	588 size_t xcorrlen = 0;
d@0	589 XCorr(xcorr, downsampleddata, downsampleddata, xcorrlen, numSamples, endLag);
d@0	590
d@0	591 int i = (int)floor(xcorrlen / 2);
d@0	592
d@0	593 std::vector<float> temp;
d@0	594 for(size_t j = i; j < xcorrlen; j++)
d@0	595 {
d@0	596 temp.push_back(xcorr[j]);
d@0	597 }
d@0	598
d@0	599 std::vector<float> temp2;
d@0	600 for(size_t j = (size_t)max(1,startLag); j < (size_t)min(endLag,temp.size()); j++)
d@0	601 {
d@0	602 temp2.push_back(temp[j]);
d@0	603 }
d@0	604
d@0	605 int maxIdx = 0;
d@0	606 float maxVal = std::numeric_limits<float>::lowest();
d@0	607 for(size_t j =0; j < temp2.size(); j++)
d@0	608 {
d@0	609 if(temp2[j] > maxVal)
d@0	610 {
d@0	611 maxIdx = j;
d@0	612 maxVal = temp2[j];
d@0	613 }
d@0	614 }
d@0	615
d@0	616 int from = max(1, maxIdx-thresh);
d@0	617 int to = min(temp2.size(), maxIdx+thresh);
d@0	618
d@0	619 float minValInRange = FLT_MAX;
d@0	620
d@0	621 for(size_t j = (size_t)from; j < to; j++)
d@0	622 {
d@0	623 if(temp2[j] < minValInRange)
d@0	624 {
d@0	625 minValInRange = temp2[j];
d@0	626 }
d@0	627 }
d@0	628
d@0	629 periodicity = (maxVal-minValInRange);
d@0	630
d@0	631 std::string dave = "Periodicity " + std::to_string(periodicity) + "\n";
d@0	632 OutputDebugString(dave.c_str());
d@0	633
d@0	634 if(xcorr != NULL)
d@0	635 {
d@0	636 delete[] xcorr;
d@0	637 xcorr = nullptr;
d@0	638 }
d@0	639
d@0	640 if(downsampleddata != NULL)
d@0	641 {
d@0	642 delete[] downsampleddata;
d@0	643 downsampleddata = nullptr;
d@0	644 }
d@0	645
d@0	646 return periodicity;
d@0	647 }
d@0	648
d@0	649 void AudioSourceFeatureExtractor::DownSampler(float* data, float* &out, size_t lenIn, size_t &lenOut, float currentSampleRate, float futureSampleRate)
d@0	650 {
d@0	651 ////Low pass our data before we decimate
d@0	652 //const int filterlength = 101;
d@0	653 //float* tempdata = new float[lenIn];
d@0	654 //memset(tempdata, 0, sizeof(float)*lenIn);
d@0	655
d@0	656 ////Coefficients were taken from Matlab
d@0	657 //float coeffs[filterlength] ={2.0839274e-05f, 6.6880953e-06f,-4.7252855e-05f, -3.4874138e-05f, 7.8508412e-05f,9.7089069e-05f,-9.9197161e-05f, -0.00020412984f, 8.2261082e-05f, 0.00035689832f, 9.4890293e-06f, -0.00053820928f, -0.00021722882f, 0.00070567406f, 0.00057491515f, -0.00078844035f, -0.0010939316f, 0.00069057313f, 0.0017460365f, -0.00030308162f, -0.0024484061f, -0.00047496572f, 0.0030552838f, 0.0017078921f, -0.0033604759f, -0.0033912712f, 0.0031135236f, 0.0054217125f, -0.0020499325f, -0.0075746416f, -6.7239242e-05f, 0.0094950572f, 0.0034002098f, -0.010703080f, -0.0079918290f, 0.010610434f, 0.013732497f, -0.0085344436f, -0.020346783f, 0.0036776769f, 0.027405130f, 0.0050050775f, -0.034361813f, -0.019287759f, 0.040615436f, 0.043452863f,-0.045583628f, -0.093336113f, 0.048780452f, 0.31394306f, 0.45011634f, 0.31394306f, 0.048780452f,-0.093336113f,-0.045583628f, 0.043452863f,0.040615436f,-0.019287759f, -0.034361813f, 0.0050050775f, 0.027405130f,0.0036776769f,-0.020346783f, -0.0085344436f, 0.013732497f, 0.010610434f, -0.0079918290f, -0.010703080f, 0.0034002098f, 0.0094950572f, -6.7239242e-05f, -0.0075746416f, -0.0020499325f, 0.0054217125f, 0.0031135236f, -0.0033912712f, -0.0033604759f, 0.0017078921f, 0.0030552838f, -0.00047496572f, -0.0024484061f, -0.00030308162f, 0.0017460365f,0.00069057313f, -0.0010939316f, -0.00078844035f, 0.00057491515f, 0.00070567406f, -0.00021722882f, -0.00053820928f, 9.4890293e-06f, 0.00035689832f, 8.2261082e-05f, -0.00020412984f, -9.9197161e-05f, 9.7089069e-05f, 7.8508412e-05f, -3.4874138e-05f, -4.7252855e-05f, 6.6880953e-06f, 2.0839274e-05f};
d@0	658 //
d@0	659 //float acc; // accumulator for MACs
d@0	660 // float* coeffp; // pointer to coefficients
d@0	661 // float* inputp; // pointer to input samples
d@0	662 //float* endp = &data[lenIn -1]; // pointer to last sample
d@0	663 //
d@0	664 //
d@0	665 // // apply the filter to each input sample
d@0	666 // for (size_t n = 0; n < lenIn; n++ )
d@0	667 //{
d@0	668 // // calculate output n
d@0	669 // coeffp = &coeffs[0];
d@0	670 // inputp = &data[filterlength - 1 + n];
d@0	671
d@0	672 // acc = 0.0f;
d@0	673 // for (int k = 0; k < filterlength; k++ )
d@0	674 // {
d@0	675 // if(inputp <= endp)
d@0	676 // acc += (coeffp++) (*inputp--);
d@0	677 // else
d@0	678 // {
d@0	679 // //When we reach the end of the buffer
d@0	680 // acc += (coeffp++) 0.0f;
d@0	681 // *inputp--;
d@0	682 // }
d@0	683 // }
d@0	684
d@0	685 // tempdata[n] = acc;
d@0	686 // }
d@0	687 //int ratio = (int) (currentSampleRate / futureSampleRate);
d@0	688 //
d@0	689 //lenOut = lenIn / ratio;
d@0	690
d@0	691 //out = new float[lenOut];
d@0	692 //memset(out, 0, sizeof(float)*lenOut);
d@0	693
d@0	694
d@0	695 //int idx = 0;
d@0	696 //for(size_t i = 0; i < lenIn; i = i + ratio)
d@0	697 //{
d@0	698 // out[idx] = tempdata[i];
d@0	699 // idx++;
d@0	700 //}
d@0	701
d@0	702 //if(tempdata != NULL)
d@0	703 //{
d@0	704 // delete[] tempdata;
d@0	705 // tempdata = nullptr;
d@0	706 //}
d@0	707 }
d@0	708
d@0	709 void AudioSourceFeatureExtractor::EnvelopeCurve(float* data, float* &out, size_t dataLen, float sampleRate)
d@0	710 {
d@0	711 float release = 0.02f;
d@0	712 float envelope = 0.0f;
d@0	713
d@0	714 float gr = exp(-1 / (sampleRate * release));
d@0	715
d@0	716 for (size_t i = 0; i < dataLen; i++)
d@0	717 {
d@0	718 float EnvIn = abs(data[i]);
d@0	719
d@0	720 if(envelope < EnvIn)
d@0	721 {
d@0	722 envelope = EnvIn;
d@0	723 }
d@0	724 else
d@0	725 {
d@0	726 envelope = envelope * gr;
d@0	727 envelope = envelope + (1 - gr) * EnvIn;
d@0	728 }
d@0	729
d@0	730 out[i] = envelope;
d@0	731 }
d@0	732 }
d@0	733
d@0	734 void AudioSourceFeatureExtractor::Normalise(float* data, float* &out, size_t len)
d@0	735 {
d@0	736 float maxvalue = std::numeric_limits<float>::lowest();
d@0	737
d@0	738 for(size_t i = 0; i < len; i++)
d@0	739 {
d@0	740 if(data[i] > maxvalue)
d@0	741 maxvalue = data[i];
d@0	742 }
d@0	743
d@0	744 for(size_t i = 0; i < len; i++)
d@0	745 {
d@0	746 out[i] = data[i] / maxvalue;
d@0	747 }
d@0	748 }
d@0	749
d@0	750 float AudioSourceFeatureExtractor::EntropyOfEnergy(float* data, size_t len)
d@0	751 {
d@0	752 float totalEnergy = 0.0;
d@0	753 float totalentropy = 0.0;
d@0	754 const int numSubFrames = 30;
d@0	755
d@0	756 //Calculate Total Energy and normalise it
d@0	757 for (size_t i =0; i < len; i++)
d@0	758 {
d@0	759 totalEnergy += (data[i] * data[i]);
d@0	760 }
d@0	761
d@0	762 int subFrameSize = len / numSubFrames;
d@0	763
d@0	764 for (size_t i = 0; i < numSubFrames; i++)
d@0	765 {
d@0	766 float val = 0.0;
d@0	767
d@0	768 for (size_t j = 0; j < (size_t)subFrameSize; j++)
d@0	769 {
d@0	770 val += (data[j + (i * subFrameSize)] * data[j + (i * subFrameSize)]);
d@0	771 }
d@0	772
d@0	773 float frameval = val / (totalEnergy + std::numeric_limits<float>::epsilon());
d@0	774
d@0	775 totalentropy += (frameval * Log2(frameval + std::numeric_limits<float>::epsilon()));
d@0	776 }
d@0	777
d@0	778 return (totalentropy * -1);
d@0	779 }
d@0	780
d@0	781 void AudioSourceFeatureExtractor::PDF_getResampleKrnl(std::vector<float> freqVec, float fsProc, int nfft, int nBin, std::vector<float> &outLogFreqVec, std::vector<std::vector<float>> &outKrnl)
d@0	782 {
d@0	783 float f1 = freqVec[0];
d@0	784 float f2 = freqVec[freqVec.size() - 1];
d@0	785
d@0	786 outLogFreqVec = LinSpace(log(f1), log(f2), nBin);
d@0	787 std::vector<float> f(nfft / 2, 0.0f);
d@0	788
d@0	789 for (size_t i = 0; i < (size_t)nBin; i++)
d@0	790 {
d@0	791 outKrnl.push_back(f);
d@0	792 }
d@0	793
d@0	794 for (size_t i = 0; i < outLogFreqVec.size(); i++)
d@0	795 {
d@0	796 outLogFreqVec[i] = exp(outLogFreqVec[i]);
d@0	797 }
d@0	798
d@0	799 for (size_t i = 1; i < (size_t)nBin - 2; i++)
d@0	800 {
d@0	801 float freqBinLin = outLogFreqVec[i] / (fsProc / nfft);
d@0	802
d@0	803 float nextBinUp = outLogFreqVec[i + 1] / (fsProc / nfft);
d@0	804
d@0	805 float nextBinDown = outLogFreqVec[i - 1] / (fsProc / nfft);
d@0	806
d@0	807 float filtWid = nextBinUp - nextBinDown;
d@0	808
d@0	809 if (filtWid <= 2)
d@0	810 {
d@0	811 // log resolution is finer than linear resolution
d@0	812 // linear interpolation of neighboring bins
d@0	813
d@0	814 float binDown = floor(freqBinLin);
d@0	815 float frac = freqBinLin - binDown;
d@0	816 std::vector<float> filt(nfft/2, 0.0f);
d@0	817 filt[((int)binDown) - 1] = 1 - frac;
d@0	818 filt[((int)binDown + 1) - 1] = frac;
d@0	819 outKrnl[i][((int)binDown) - 1] = filt[((int)binDown) - 1];
d@0	820 outKrnl[i][((int)binDown + 1) - 1] = filt[((int)binDown + 1) - 1];
d@0	821
d@0	822 }
d@0	823 else
d@0	824 {
d@0	825 float ceilNextBinDown = ceil(nextBinDown);
d@0	826 float floorFreqBinLin = floor(freqBinLin);
d@0	827 float ceilFreqBinLin = ceil(freqBinLin);
d@0	828 float floorNextBinUp = floor(nextBinUp);
d@0	829
d@0	830 std::vector<int> idxLow;
d@0	831 std::vector<int> idxHigh;
d@0	832 std::vector<int> filtIdx;
d@0	833
d@0	834 for (size_t j = (size_t)ceilNextBinDown; j <= (size_t)floorFreqBinLin; j++ )
d@0	835 {
d@0	836 idxLow.push_back(j);
d@0	837 }
d@0	838
d@0	839 for (size_t j = (size_t)ceilFreqBinLin; j <= (size_t)floorNextBinUp; j++ )
d@0	840 {
d@0	841 idxHigh.push_back(j);
d@0	842 }
d@0	843
d@0	844 std::vector<int>::iterator it;
d@0	845 it = std::unique (idxLow.begin(), idxLow.end());
d@0	846 idxLow.resize(std::distance(idxLow.begin(), it));
d@0	847
d@0	848 it = std::unique (idxHigh.begin(), idxHigh.end());
d@0	849 idxHigh.resize(std::distance(idxHigh.begin(), it));
d@0	850
d@0	851 filtIdx.reserve(idxLow.size() + idxHigh.size()); // preallocate memory
d@0	852 filtIdx.insert(filtIdx.end(), idxLow.begin(), idxLow.end());
d@0	853 filtIdx.insert(filtIdx.end(), idxHigh.begin(), idxHigh.end());
d@0	854
d@0	855 std::vector<float> rampUp;
d@0	856 std::vector<float> rampDown;
d@0	857 for (size_t j = 0; j < filtIdx.size(); j++)
d@0	858 {
d@0	859 rampUp.push_back(1 - (freqBinLin - filtIdx[j]) / (freqBinLin - nextBinDown));
d@0	860 rampDown.push_back(1 - (filtIdx[j] - freqBinLin) / (nextBinUp - freqBinLin));
d@0	861 }
d@0	862
d@0	863 std::vector<float> filt;
d@0	864
d@0	865 for (size_t j = 0; j < rampUp.size(); j++)
d@0	866 {
d@0	867 filt.push_back(min(rampUp[j], rampDown[j]));
d@0	868 }
d@0	869
d@0	870 float sumfilt = 0.0f;
d@0	871 for (size_t j = 0; j < filt.size(); j++)
d@0	872 {
d@0	873 sumfilt += filt[j];
d@0	874 }
d@0	875
d@0	876 for (size_t j = 0; j < filt.size(); j++)
d@0	877 {
d@0	878 filt[j] /= sumfilt;
d@0	879 }
d@0	880
d@0	881 for (size_t j = 0; j < filtIdx.size(); j++)
d@0	882 {
d@0	883 outKrnl[i][filtIdx[j] - 1] = filt[j];
d@0	884 }
d@0	885 }
d@0	886 }
d@0	887
d@0	888 // special routine for first bin
d@0	889 // get frequency in linear bins
d@0	890
d@0	891 float freqBinLin = outLogFreqVec[0] / (fsProc/(float)nfft);
d@0	892 float binDown = floor(freqBinLin);
d@0	893 float frac = freqBinLin - binDown;
d@0	894
d@0	895 std::vector<float> filt(nfft/2, 0.0f);
d@0	896 filt[((int)binDown) - 1] = 1 - frac;
d@0	897 filt[((int)binDown + 1) - 1] = frac;
d@0	898
d@0	899 outKrnl[0][((int)binDown) - 1] = filt[((int)binDown) - 1];
d@0	900 outKrnl[0][((int)binDown + 1) - 1] = filt[((int)binDown + 1) - 1];
d@0	901
d@0	902 // special routine for last bin
d@0	903 // get frequency in linear bins
d@0	904 freqBinLin = outLogFreqVec[outLogFreqVec.size() - 1] / (fsProc/nfft);
d@0	905
d@0	906 // get next lower bin
d@0	907 float nextBinDown = outLogFreqVec[outLogFreqVec.size() - 2] / (fsProc/nfft);
d@0	908
d@0	909 float ceilNextBinDown = ceil(nextBinDown); //Subtract by -1 because of array
d@0	910 float floorFreqBinLin = floor(freqBinLin);
d@0	911
d@0	912 std::vector<int> filtIdx;
d@0	913
d@0	914 for (size_t i = (size_t)ceilNextBinDown; i <= (size_t)floorFreqBinLin; i++ )
d@0	915 {
d@0	916 filtIdx.push_back(i);
d@0	917 }
d@0	918
d@0	919 std::vector<float> filt2;
d@0	920 for (size_t i = 0; i < filtIdx.size(); i++)
d@0	921 {
d@0	922 filt2.push_back(1 - (freqBinLin - filtIdx[i]) / (freqBinLin - nextBinDown));
d@0	923 }
d@0	924
d@0	925 float sumfilt = 0.0f;
d@0	926 for (size_t i = 0; i < filt2.size(); i++)
d@0	927 {
d@0	928 sumfilt += filt2[i];
d@0	929 }
d@0	930
d@0	931 for (size_t i = 0; i < filt2.size(); i++)
d@0	932 {
d@0	933 filt2[i] /= sumfilt;
d@0	934 }
d@0	935
d@0	936 for (size_t j = 0; j < filtIdx.size(); j++)
d@0	937 {
d@0	938 outKrnl[outKrnl.size() - 1][filtIdx[j] - 1] = filt2[j];
d@0	939 }
d@0	940 }
d@0	941
d@0	942 float AudioSourceFeatureExtractor::Log2(float n)
d@0	943 {
d@0	944 // log(n)/log(2) is log2.
d@0	945 return logf( n ) / logf( 2 );
d@0	946 }
d@0	947
d@0	948 int AudioSourceFeatureExtractor::Sign(float x)
d@0	949 {
d@0	950 int retValue = 0;
d@0	951 if (x >= 0)
d@0	952 retValue = 1;
d@0	953 if (x < 0)
d@0	954 retValue = -1;
d@0	955
d@0	956 return retValue;
d@0	957 }
d@0	958
d@0	959 std::vector<float> AudioSourceFeatureExtractor::LinSpace(float min, float max, int n)
d@0	960 {
d@0	961 std::vector<float> result;
d@0	962 // vector iterator
d@0	963 int iterator = 0;
d@0	964
d@0	965 for (int i = 0; i <= n-2; i++)
d@0	966 {
d@0	967 float temp = min + i*(max-min)/(floor((float)n) - 1);
d@0	968 result.insert(result.begin() + iterator, temp);
d@0	969 iterator += 1;
d@0	970 }
d@0	971
d@0	972 result.insert(result.begin() + iterator, max);
d@0	973 return result;
d@0	974 }
d@0	975
d@0	976 std::vector<float> AudioSourceFeatureExtractor::LogSpace(float min, float max, int n)
d@0	977 {
d@0	978 float logMin = log(min);
d@0	979 float logMax = log(max);
d@0	980 double delta = (logMax - logMin) / n;
d@0	981 double accDelta = 0;
d@0	982 std::vector<float> v;
d@0	983
d@0	984 for (int i = 0; i <= n; i++)
d@0	985 {
d@0	986 v.push_back((float) exp(logMin + accDelta));
d@0	987 accDelta += delta;// accDelta = delta * i
d@0	988 }
d@0	989
d@0	990 return v;
d@0	991 }
d@0	992

Mercurial > hg > batch-feature-extraction-tool

annotate Source/.svn/text-base/AudioSourceFeatureExtractor.cpp.svn-base @ 15:585caf503ef5 tip