Mercurial > hg > qm-dsp
changeset 255:9edaa3ce62e8
* Make MFCC able to accept already-FFT'd input, and simplify API a bit
* Add log power value to MFCC, restore windowing, and avoid some heap allocs
* In HMM, bail out of iteration if loglik hits NaN
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Fri, 18 Jan 2008 13:24:12 +0000 |
parents | 52c1a295d775 |
children | 43943a4382ef |
files | dsp/mfcc/MFCC.cpp dsp/mfcc/MFCC.h dsp/segmentation/ClusterMeltSegmenter.cpp dsp/transforms/FFT.cpp dsp/transforms/FFT.h hmm/hmm.c |
diffstat | 6 files changed, 104 insertions(+), 73 deletions(-) [+] |
line wrap: on
line diff
--- a/dsp/mfcc/MFCC.cpp Wed Jan 16 18:02:31 2008 +0000 +++ b/dsp/mfcc/MFCC.cpp Fri Jan 18 13:24:12 2008 +0000 @@ -32,6 +32,7 @@ fftSize = config.fftsize; totalFilters = linearFilters + logFilters; + logPower = config.logpower; samplingRate = config.FS; @@ -50,12 +51,12 @@ /* Allocate space for local vectors */ mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*)); - for (i=0;i<nceps+1; i++) { + for (i = 0; i < nceps+1; i++) { mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double)); } mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*)); - for (i=0;i<totalFilters; i++) { + for (i = 0; i < totalFilters; i++) { mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double)); } @@ -68,11 +69,11 @@ triangleHeight = (double*)calloc(totalFilters,sizeof(double)); fftFreqs = (double*)calloc(fftSize,sizeof(double)); - for (i=0;i<linearFilters;i++) { + for (i = 0; i < linearFilters; i++) { freqs[i] = lowestFrequency + ((double)i) * linearSpacing; } - for (i=linearFilters; i<totalFilters+2; i++) { + for (i = linearFilters; i < totalFilters+2; i++) { freqs[i] = freqs[linearFilters-1] * pow(logSpacing, (double)(i-linearFilters+1)); } @@ -109,7 +110,8 @@ if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) { - mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j]) + mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + + triangleHeight[i] * (upper[i]-fftFreqs[j]) / (upper[i]-center[i]); } else @@ -127,24 +129,27 @@ const double pi = 3.14159265358979323846264338327950288; - for (i=0; i<nceps+1; i++) { - for (j=0; j<totalFilters; j++) { + for (i = 0; i < nceps+1; i++) { + for (j = 0; j < totalFilters; j++) { mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.)) * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi); } } - for (j=0;j<totalFilters;j++){ - mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j]; + for (j = 0; j < totalFilters; j++){ + mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j]; } /* The analysis window */ window = new Window<double>(HammingWindow, fftSize); /* Allocate memory for the FFT */ - imagIn = (double*)calloc(fftSize,sizeof(double)); - realOut = (double*)calloc(fftSize,sizeof(double)); - imagOut = (double*)calloc(fftSize,sizeof(double)); + imagIn = (double*)calloc(fftSize, sizeof(double)); + realOut = (double*)calloc(fftSize, sizeof(double)); + imagOut = (double*)calloc(fftSize, sizeof(double)); + + earMag = (double*)calloc(totalFilters, sizeof(double)); + fftMag = (double*)calloc(fftSize/2, sizeof(double)); free(freqs); free(lower); @@ -159,12 +164,12 @@ int i; /* Free the structure */ - for (i=0;i<nceps+1;i++) { + for (i = 0; i < nceps+1; i++) { free(mfccDCTMatrix[i]); } free(mfccDCTMatrix); - for (i=0;i<totalFilters; i++) { + for (i = 0; i < totalFilters; i++) { free(mfccFilterWeights[i]); } free(mfccFilterWeights); @@ -174,6 +179,9 @@ /* The analysis window */ delete window; + + free(earMag); + free(fftMag); /* Free the FFT */ free(imagIn); @@ -185,48 +193,47 @@ /* * * Extract the MFCC on the input frame - * - * looks like we have to have length = fftSize ?????? * */ -int MFCC::process(int length, double *inframe, double *outceps) +int MFCC::process(const double *inframe, double *outceps) { - int i,j; - - double *fftMag; - double *earMag; + double *inputData = (double *)malloc(fftSize * sizeof(double)); + for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i]; - double *inputData; - - double tmp; - - earMag = (double*)calloc(totalFilters, sizeof(double)); - inputData = (double*)calloc(fftSize, sizeof(double)); - fftMag = (double*)calloc(fftSize, sizeof(double)); - - /* Zero-pad if needed */ - memcpy(inputData, inframe, length*sizeof(double)); - -//!!! window->cut(inputData); + window->cut(inputData); /* Calculate the fft on the input frame */ FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut); - for (i = 0; i < fftSize; ++i) { - fftMag[i] = sqrt(realOut[i] * realOut[i] + - imagOut[i] * imagOut[i]); + free(inputData); + + return process(realOut, imagOut, outceps); +} + +int MFCC::process(const double *real, const double *imag, double *outceps) +{ + int i, j; + + for (i = 0; i < fftSize/2; ++i) { + fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]); + } + + for (i = 0; i < totalFilters; ++i) { + earMag[i] = 0.0; } /* Multiply by mfccFilterWeights */ - for (i=0;i<totalFilters;i++) { - tmp = 0.; - for(j=0;j<fftSize/2; j++) { - tmp = tmp + (mfccFilterWeights[i][j]*fftMag[j]); + for (i = 0; i < totalFilters; i++) { + double tmp = 0.0; + for (j = 0; j < fftSize/2; j++) { + tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]); } - if (tmp>0) - earMag[i] = log10(tmp); - else - earMag[i] = 0.0; + if (tmp > 0) earMag[i] = log10(tmp); + else earMag[i] = 0.0; + + if (logPower != 1.0) { + earMag[i] = pow(earMag[i], logPower); + } } /* @@ -236,31 +243,27 @@ * */ - if (WANT_C0==1) { + if (WANT_C0 == 1) { - for (i=0;i<nceps+1;i++) { - tmp = 0.; - for (j=0;j<totalFilters;j++){ - tmp = tmp + mfccDCTMatrix[i][j]*earMag[j]; + for (i = 0; i < nceps+1; i++) { + double tmp = 0.; + for (j = 0; j < totalFilters; j++){ + tmp = tmp + mfccDCTMatrix[i][j] * earMag[j]; } outceps[i] = tmp; } } else { - for (i=1;i<nceps+1;i++) { - tmp = 0.; - for (j=0;j<totalFilters;j++){ - tmp = tmp + mfccDCTMatrix[i][j]*earMag[j]; + for (i = 1; i < nceps+1; i++) { + double tmp = 0.; + for (j = 0; j < totalFilters; j++){ + tmp = tmp + mfccDCTMatrix[i][j] * earMag[j]; } outceps[i-1] = tmp; } } - free(fftMag); - free(earMag); - free(inputData); - return nceps; }
--- a/dsp/mfcc/MFCC.h Wed Jan 16 18:02:31 2008 +0000 +++ b/dsp/mfcc/MFCC.h Fri Jan 18 13:24:12 2008 +0000 @@ -17,7 +17,10 @@ int FS; int fftsize; int nceps; + double logpower; bool want_c0; + MFCCConfig(int _FS) : + FS(_FS), fftsize(2048), nceps(19), logpower(1.0), want_c0(true) { } }; class MFCC @@ -26,7 +29,20 @@ MFCC(MFCCConfig config); virtual ~MFCC(); - int process(int length, double *inframe, double *outceps); + /** + * Process time-domain input data. inframe must contain + * getfftlength() samples. outceps must contain space for nceps + * values, plus one if want_c0 is specified. + */ + int process(const double *inframe, double *outceps); + + /** + * Process time-domain input data. real and imag must contain + * getfftlength()/2+1 elements (i.e. the conjugate half of the FFT + * is not expected). outceps must contain space for nceps values, + * plus one if want_c0 is specified. + */ + int process(const double *real, const double *imag, double *outceps); int getfftlength() const { return fftSize; } @@ -42,6 +58,7 @@ int fftSize; int totalFilters; + double logPower; /* Misc. */ int samplingRate; @@ -57,10 +74,12 @@ Window<double> *window; /* For the FFT */ - double* imagIn; // always zero - double* realOut; - double* imagOut; - + double *imagIn; // always zero + double *realOut; + double *imagOut; + double *fftMag; + double *earMag; + /* Set if user want C0 */ int WANT_C0; };
--- a/dsp/segmentation/ClusterMeltSegmenter.cpp Wed Jan 16 18:02:31 2008 +0000 +++ b/dsp/segmentation/ClusterMeltSegmenter.cpp Fri Jan 18 13:24:12 2008 +0000 @@ -92,8 +92,7 @@ decimator = new Decimator(getWindowsize(), decimationFactor); } - MFCCConfig config; - config.FS = samplerate / decimationFactor; + MFCCConfig config(samplerate / decimationFactor); config.fftsize = 2048; config.nceps = 19; config.want_c0 = true; @@ -278,7 +277,7 @@ } } - mfcc->process(fftsize, frame, ccout); + mfcc->process(frame, ccout); for (int i = 0; i < ncoeff; ++i) { cc[i] += ccout[i];
--- a/dsp/transforms/FFT.cpp Wed Jan 16 18:02:31 2008 +0000 +++ b/dsp/transforms/FFT.cpp Fri Jan 18 13:24:12 2008 +0000 @@ -24,7 +24,9 @@ } -void FFT::process(unsigned int p_nSamples, bool p_bInverseTransform, double *p_lpRealIn, double *p_lpImagIn, double *p_lpRealOut, double *p_lpImagOut) +void FFT::process(unsigned int p_nSamples, bool p_bInverseTransform, + const double *p_lpRealIn, const double *p_lpImagIn, + double *p_lpRealOut, double *p_lpImagOut) { if(!p_lpRealIn || !p_lpRealOut || !p_lpImagOut) return;
--- a/dsp/transforms/FFT.h Wed Jan 16 18:02:31 2008 +0000 +++ b/dsp/transforms/FFT.h Fri Jan 18 13:24:12 2008 +0000 @@ -13,16 +13,16 @@ class FFT { public: - static void process(unsigned int p_nSamples, bool p_bInverseTransform, - double *p_lpRealIn, double *p_lpImagIn, - double *p_lpRealOut, double *p_lpImagOut); + static void process(unsigned int nSamples, bool bInverseTransform, + const double *lpRealIn, const double *lpImagIn, + double *lpRealOut, double *lpImagOut); FFT(); virtual ~FFT(); protected: - static unsigned int reverseBits(unsigned int p_nIndex, unsigned int p_nBits); - static unsigned int numberOfBitsNeeded( unsigned int p_nSamples ); - static bool isPowerOfTwo( unsigned int p_nX ); + static unsigned int reverseBits(unsigned int nIndex, unsigned int nBits); + static unsigned int numberOfBitsNeeded( unsigned int nSamples ); + static bool isPowerOfTwo( unsigned int nX ); }; #endif
--- a/hmm/hmm.c Wed Jan 16 18:02:31 2008 +0000 +++ b/hmm/hmm.c Fri Jan 18 13:24:12 2008 +0000 @@ -159,7 +159,9 @@ int niter = 50; int iter = 0; double loglik1, loglik2; - while(iter < niter && !(iter > 1 && (loglik - loglik1) < thresh * (loglik1 - loglik2))) + int foundnan = 0; + + while (iter < niter && !foundnan && !(iter > 1 && (loglik - loglik1) < thresh * (loglik1 - loglik2))) { ++iter; @@ -199,6 +201,11 @@ fprintf(stderr, "iteration %d: loglik = %f\n", iter, loglik); fprintf(stderr, "re-estimation...\n"); fflush(stderr); + + if (isnan(loglik)) { + foundnan = 1; + continue; + } baum_welch(p0, a, mu, cov, N, T, L, x, xi, gamma); @@ -278,6 +285,7 @@ for (j = 0; j < N; j++) { a[i][j] = 0; + if (sum_gamma[i] == 0.) continue; for (t = 0; t < T-1; t++) a[i][j] += xi[t][i][j]; //s += a[i][j];