Mercurial > hg > qm-dsp
view dsp/mfcc/MFCC.cpp @ 254:52c1a295d775
...
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Wed, 16 Jan 2008 18:02:31 +0000 |
parents | c3600d3cfe5c |
children | 9edaa3ce62e8 |
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ /* QM DSP Library Centre for Digital Music, Queen Mary, University of London. This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL. All rights reserved. */ #include <cmath> #include <cstdlib> #include "MFCC.h" #include "dsp/transforms/FFT.h" #include "base/Window.h" MFCC::MFCC(MFCCConfig config) { int i,j; /* Calculate at startup */ double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs; lowestFrequency = 66.6666666; linearFilters = 13; linearSpacing = 66.66666666; logFilters = 27; logSpacing = 1.0711703; /* FFT and analysis window sizes */ fftSize = config.fftsize; totalFilters = linearFilters + logFilters; samplingRate = config.FS; /* The number of cepstral componenents */ nceps = config.nceps; /* Set if user want C0 */ WANT_C0 = (config.want_c0 ? 1 : 0); /* Allocate space for feature vector */ if (WANT_C0 == 1) { ceps = (double*)calloc(nceps+1, sizeof(double)); } else { ceps = (double*)calloc(nceps, sizeof(double)); } /* Allocate space for local vectors */ mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*)); for (i=0;i<nceps+1; i++) { mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double)); } mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*)); for (i=0;i<totalFilters; i++) { mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double)); } freqs = (double*)calloc(totalFilters+2,sizeof(double)); lower = (double*)calloc(totalFilters,sizeof(double)); center = (double*)calloc(totalFilters,sizeof(double)); upper = (double*)calloc(totalFilters,sizeof(double)); triangleHeight = (double*)calloc(totalFilters,sizeof(double)); fftFreqs = (double*)calloc(fftSize,sizeof(double)); for (i=0;i<linearFilters;i++) { freqs[i] = lowestFrequency + ((double)i) * linearSpacing; } for (i=linearFilters; i<totalFilters+2; i++) { freqs[i] = freqs[linearFilters-1] * pow(logSpacing, (double)(i-linearFilters+1)); } /* Define lower, center and upper */ memcpy(lower, freqs,totalFilters*sizeof(double)); memcpy(center, &freqs[1],totalFilters*sizeof(double)); memcpy(upper, &freqs[2],totalFilters*sizeof(double)); for (i=0;i<totalFilters;i++){ triangleHeight[i] = 2./(upper[i]-lower[i]); } for (i=0;i<fftSize;i++){ fftFreqs[i] = ((double) i / ((double) fftSize ) * (double) samplingRate); } /* Build now the mccFilterWeight matrix */ for (i=0;i<totalFilters;i++){ for (j=0;j<fftSize;j++) { if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) { mfccFilterWeights[i][j] = triangleHeight[i] * (fftFreqs[j]-lower[i]) / (center[i]-lower[i]); } else { mfccFilterWeights[i][j] = 0.0; } if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) { mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j]) / (upper[i]-center[i]); } else { mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0; } } } /* * We calculate now mfccDCT matrix * NB: +1 because of the DC component */ const double pi = 3.14159265358979323846264338327950288; for (i=0; i<nceps+1; i++) { for (j=0; j<totalFilters; j++) { mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.)) * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi); } } for (j=0;j<totalFilters;j++){ mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j]; } /* The analysis window */ window = new Window<double>(HammingWindow, fftSize); /* Allocate memory for the FFT */ imagIn = (double*)calloc(fftSize,sizeof(double)); realOut = (double*)calloc(fftSize,sizeof(double)); imagOut = (double*)calloc(fftSize,sizeof(double)); free(freqs); free(lower); free(center); free(upper); free(triangleHeight); free(fftFreqs); } MFCC::~MFCC() { int i; /* Free the structure */ for (i=0;i<nceps+1;i++) { free(mfccDCTMatrix[i]); } free(mfccDCTMatrix); for (i=0;i<totalFilters; i++) { free(mfccFilterWeights[i]); } free(mfccFilterWeights); /* Free the feature vector */ free(ceps); /* The analysis window */ delete window; /* Free the FFT */ free(imagIn); free(realOut); free(imagOut); } /* * * Extract the MFCC on the input frame * * looks like we have to have length = fftSize ?????? * */ int MFCC::process(int length, double *inframe, double *outceps) { int i,j; double *fftMag; double *earMag; double *inputData; double tmp; earMag = (double*)calloc(totalFilters, sizeof(double)); inputData = (double*)calloc(fftSize, sizeof(double)); fftMag = (double*)calloc(fftSize, sizeof(double)); /* Zero-pad if needed */ memcpy(inputData, inframe, length*sizeof(double)); //!!! window->cut(inputData); /* Calculate the fft on the input frame */ FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut); for (i = 0; i < fftSize; ++i) { fftMag[i] = sqrt(realOut[i] * realOut[i] + imagOut[i] * imagOut[i]); } /* Multiply by mfccFilterWeights */ for (i=0;i<totalFilters;i++) { tmp = 0.; for(j=0;j<fftSize/2; j++) { tmp = tmp + (mfccFilterWeights[i][j]*fftMag[j]); } if (tmp>0) earMag[i] = log10(tmp); else earMag[i] = 0.0; } /* * * Calculate now the cepstral coefficients * with or without the DC component * */ if (WANT_C0==1) { for (i=0;i<nceps+1;i++) { tmp = 0.; for (j=0;j<totalFilters;j++){ tmp = tmp + mfccDCTMatrix[i][j]*earMag[j]; } outceps[i] = tmp; } } else { for (i=1;i<nceps+1;i++) { tmp = 0.; for (j=0;j<totalFilters;j++){ tmp = tmp + mfccDCTMatrix[i][j]*earMag[j]; } outceps[i-1] = tmp; } } free(fftMag); free(earMag); free(inputData); return nceps; }