Mercurial > hg > qm-dsp
diff dsp/mfcc/MFCC.cpp @ 26:d096a79fa772
* Add timbral (MFCC) feature option to segmenter
author | cannam |
---|---|
date | Thu, 10 Jan 2008 16:41:33 +0000 |
parents | dsp/mfcc/MFCC.c@54a962727271 |
children | 1c9c4d2c0592 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dsp/mfcc/MFCC.cpp Thu Jan 10 16:41:33 2008 +0000 @@ -0,0 +1,264 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM DSP Library + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL. + All rights reserved. +*/ + +#include <cmath> +#include <cstdlib> + +#include "MFCC.h" +#include "dsp/transforms/FFT.h" +#include "base/Window.h" + +MFCC::MFCC(MFCCConfig config) +{ + int i,j; + + /* Calculate at startup */ + double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs; + + lowestFrequency = 66.6666666; + linearFilters = 13; + linearSpacing = 66.66666666; + logFilters = 27; + logSpacing = 1.0711703; + + /* FFT and analysis window sizes */ + fftSize = config.fftsize; + + totalFilters = linearFilters + logFilters; + + samplingRate = config.FS; + + /* The number of cepstral componenents */ + nceps = config.nceps; + + /* Set if user want C0 */ + WANT_C0 = (config.want_c0 ? 1 : 0); + + /* Allocate space for feature vector */ + if (WANT_C0 == 1) { + ceps = (double*)calloc(nceps+1, sizeof(double)); + } else { + ceps = (double*)calloc(nceps, sizeof(double)); + } + + /* Allocate space for local vectors */ + mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*)); + for (i=0;i<nceps+1; i++) { + mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double)); + } + + mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*)); + for (i=0;i<totalFilters; i++) { + mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double)); + } + + freqs = (double*)calloc(totalFilters+2,sizeof(double)); + + lower = (double*)calloc(totalFilters,sizeof(double)); + center = (double*)calloc(totalFilters,sizeof(double)); + upper = (double*)calloc(totalFilters,sizeof(double)); + + triangleHeight = (double*)calloc(totalFilters,sizeof(double)); + fftFreqs = (double*)calloc(fftSize,sizeof(double)); + + for (i=0;i<linearFilters;i++) { + freqs[i] = lowestFrequency + ((double)i) * linearSpacing; + } + + for (i=linearFilters; i<totalFilters+2; i++) { + freqs[i] = freqs[linearFilters-1] * + pow(logSpacing, (double)(i-linearFilters+1)); + } + + /* Define lower, center and upper */ + memcpy(lower, freqs,totalFilters*sizeof(double)); + memcpy(center, &freqs[1],totalFilters*sizeof(double)); + memcpy(upper, &freqs[2],totalFilters*sizeof(double)); + + for (i=0;i<totalFilters;i++){ + triangleHeight[i] = 2./(upper[i]-lower[i]); + } + + for (i=0;i<fftSize;i++){ + fftFreqs[i] = ((double) i / ((double) fftSize ) * + (double) samplingRate); + } + + /* Build now the mccFilterWeight matrix */ + for (i=0;i<totalFilters;i++){ + + for (j=0;j<fftSize;j++) { + + if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) { + + mfccFilterWeights[i][j] = triangleHeight[i] * + (fftFreqs[j]-lower[i]) / (center[i]-lower[i]); + + } + else + { + mfccFilterWeights[i][j] = 0.0; + } + + if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) { + + mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j]) + / (upper[i]-center[i]); + } + else + { + mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0; + } + } + + } + + /* + * We calculate now mfccDCT matrix + * NB: +1 because of the DC component + */ + + for (i=0; i<nceps+1; i++) { + for (j=0; j<totalFilters; j++) { + mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.)) + * cos((double) i * ((double) j + 0.5) / (double) totalFilters * M_PI); + } + } + + for (j=0;j<totalFilters;j++){ + mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j]; + } + + /* The analysis window */ + window = new Window<double>(HammingWindow, fftSize); + + /* Allocate memory for the FFT */ + imagIn = (double*)calloc(fftSize,sizeof(double)); + realOut = (double*)calloc(fftSize,sizeof(double)); + imagOut = (double*)calloc(fftSize,sizeof(double)); + + free(freqs); + free(lower); + free(center); + free(upper); + free(triangleHeight); + free(fftFreqs); +} + +MFCC::~MFCC() +{ + int i; + + /* Free the structure */ + for (i=0;i<nceps+1;i++) { + free(mfccDCTMatrix[i]); + } + free(mfccDCTMatrix); + + for (i=0;i<totalFilters; i++) { + free(mfccFilterWeights[i]); + } + free(mfccFilterWeights); + + /* Free the feature vector */ + free(ceps); + + /* The analysis window */ + delete window; + + /* Free the FFT */ + free(imagIn); + free(realOut); + free(imagOut); +} + + +/* + * + * Extract the MFCC on the input frame + * + * looks like we have to have length = fftSize ?????? + * + */ +int MFCC::process(int length, double *inframe, double *outceps) +{ + int i,j; + + double *fftMag; + double *earMag; + + double *inputData; + + double tmp; + + earMag = (double*)calloc(totalFilters, sizeof(double)); + inputData = (double*)calloc(fftSize, sizeof(double)); + fftMag = (double*)calloc(fftSize, sizeof(double)); + + /* Zero-pad if needed */ + memcpy(inputData, inframe, length*sizeof(double)); + + window->cut(inputData); + + /* Calculate the fft on the input frame */ + FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut); + + for (i = 0; i < fftSize; ++i) { + fftMag[i] = sqrt(realOut[i] * realOut[i] + + imagOut[i] * imagOut[i]); + } + + /* Multiply by mfccFilterWeights */ + for (i=0;i<totalFilters;i++) { + tmp = 0.; + for(j=0;j<fftSize/2; j++) { + tmp = tmp + (mfccFilterWeights[i][j]*fftMag[j]); + } + if (tmp>0) + earMag[i] = log10(tmp); + else + earMag[i] = 0.0; + } + + /* + * + * Calculate now the cepstral coefficients + * with or without the DC component + * + */ + + if (WANT_C0==1) { + + for (i=0;i<nceps+1;i++) { + tmp = 0.; + for (j=0;j<totalFilters;j++){ + tmp = tmp + mfccDCTMatrix[i][j]*earMag[j]; + } + outceps[i] = tmp; + } + } + else + { + for (i=1;i<nceps+1;i++) { + tmp = 0.; + for (j=0;j<totalFilters;j++){ + tmp = tmp + mfccDCTMatrix[i][j]*earMag[j]; + } + outceps[i-1] = tmp; + } + } + + free(fftMag); + free(earMag); + free(inputData); + + return nceps; +} +