# HG changeset patch # User Chris Cannam # Date 1200662652 0 # Node ID 9edaa3ce62e87ee29ca3abfe08bb2a41ff4082d6 # Parent 52c1a295d77575ed6b7c6b3ffb8f0a8c92c156f7 * Make MFCC able to accept already-FFT'd input, and simplify API a bit * Add log power value to MFCC, restore windowing, and avoid some heap allocs * In HMM, bail out of iteration if loglik hits NaN diff -r 52c1a295d775 -r 9edaa3ce62e8 dsp/mfcc/MFCC.cpp --- a/dsp/mfcc/MFCC.cpp Wed Jan 16 18:02:31 2008 +0000 +++ b/dsp/mfcc/MFCC.cpp Fri Jan 18 13:24:12 2008 +0000 @@ -32,6 +32,7 @@ fftSize = config.fftsize; totalFilters = linearFilters + logFilters; + logPower = config.logpower; samplingRate = config.FS; @@ -50,12 +51,12 @@ /* Allocate space for local vectors */ mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*)); - for (i=0;icenter[i]) && (fftFreqs[j](HammingWindow, fftSize); /* Allocate memory for the FFT */ - imagIn = (double*)calloc(fftSize,sizeof(double)); - realOut = (double*)calloc(fftSize,sizeof(double)); - imagOut = (double*)calloc(fftSize,sizeof(double)); + imagIn = (double*)calloc(fftSize, sizeof(double)); + realOut = (double*)calloc(fftSize, sizeof(double)); + imagOut = (double*)calloc(fftSize, sizeof(double)); + + earMag = (double*)calloc(totalFilters, sizeof(double)); + fftMag = (double*)calloc(fftSize/2, sizeof(double)); free(freqs); free(lower); @@ -159,12 +164,12 @@ int i; /* Free the structure */ - for (i=0;icut(inputData); + window->cut(inputData); /* Calculate the fft on the input frame */ FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut); - for (i = 0; i < fftSize; ++i) { - fftMag[i] = sqrt(realOut[i] * realOut[i] + - imagOut[i] * imagOut[i]); + free(inputData); + + return process(realOut, imagOut, outceps); +} + +int MFCC::process(const double *real, const double *imag, double *outceps) +{ + int i, j; + + for (i = 0; i < fftSize/2; ++i) { + fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]); + } + + for (i = 0; i < totalFilters; ++i) { + earMag[i] = 0.0; } /* Multiply by mfccFilterWeights */ - for (i=0;i0) - earMag[i] = log10(tmp); - else - earMag[i] = 0.0; + if (tmp > 0) earMag[i] = log10(tmp); + else earMag[i] = 0.0; + + if (logPower != 1.0) { + earMag[i] = pow(earMag[i], logPower); + } } /* @@ -236,31 +243,27 @@ * */ - if (WANT_C0==1) { + if (WANT_C0 == 1) { - for (i=0;i *window; /* For the FFT */ - double* imagIn; // always zero - double* realOut; - double* imagOut; - + double *imagIn; // always zero + double *realOut; + double *imagOut; + double *fftMag; + double *earMag; + /* Set if user want C0 */ int WANT_C0; }; diff -r 52c1a295d775 -r 9edaa3ce62e8 dsp/segmentation/ClusterMeltSegmenter.cpp --- a/dsp/segmentation/ClusterMeltSegmenter.cpp Wed Jan 16 18:02:31 2008 +0000 +++ b/dsp/segmentation/ClusterMeltSegmenter.cpp Fri Jan 18 13:24:12 2008 +0000 @@ -92,8 +92,7 @@ decimator = new Decimator(getWindowsize(), decimationFactor); } - MFCCConfig config; - config.FS = samplerate / decimationFactor; + MFCCConfig config(samplerate / decimationFactor); config.fftsize = 2048; config.nceps = 19; config.want_c0 = true; @@ -278,7 +277,7 @@ } } - mfcc->process(fftsize, frame, ccout); + mfcc->process(frame, ccout); for (int i = 0; i < ncoeff; ++i) { cc[i] += ccout[i]; diff -r 52c1a295d775 -r 9edaa3ce62e8 dsp/transforms/FFT.cpp --- a/dsp/transforms/FFT.cpp Wed Jan 16 18:02:31 2008 +0000 +++ b/dsp/transforms/FFT.cpp Fri Jan 18 13:24:12 2008 +0000 @@ -24,7 +24,9 @@ } -void FFT::process(unsigned int p_nSamples, bool p_bInverseTransform, double *p_lpRealIn, double *p_lpImagIn, double *p_lpRealOut, double *p_lpImagOut) +void FFT::process(unsigned int p_nSamples, bool p_bInverseTransform, + const double *p_lpRealIn, const double *p_lpImagIn, + double *p_lpRealOut, double *p_lpImagOut) { if(!p_lpRealIn || !p_lpRealOut || !p_lpImagOut) return; diff -r 52c1a295d775 -r 9edaa3ce62e8 dsp/transforms/FFT.h --- a/dsp/transforms/FFT.h Wed Jan 16 18:02:31 2008 +0000 +++ b/dsp/transforms/FFT.h Fri Jan 18 13:24:12 2008 +0000 @@ -13,16 +13,16 @@ class FFT { public: - static void process(unsigned int p_nSamples, bool p_bInverseTransform, - double *p_lpRealIn, double *p_lpImagIn, - double *p_lpRealOut, double *p_lpImagOut); + static void process(unsigned int nSamples, bool bInverseTransform, + const double *lpRealIn, const double *lpImagIn, + double *lpRealOut, double *lpImagOut); FFT(); virtual ~FFT(); protected: - static unsigned int reverseBits(unsigned int p_nIndex, unsigned int p_nBits); - static unsigned int numberOfBitsNeeded( unsigned int p_nSamples ); - static bool isPowerOfTwo( unsigned int p_nX ); + static unsigned int reverseBits(unsigned int nIndex, unsigned int nBits); + static unsigned int numberOfBitsNeeded( unsigned int nSamples ); + static bool isPowerOfTwo( unsigned int nX ); }; #endif diff -r 52c1a295d775 -r 9edaa3ce62e8 hmm/hmm.c --- a/hmm/hmm.c Wed Jan 16 18:02:31 2008 +0000 +++ b/hmm/hmm.c Fri Jan 18 13:24:12 2008 +0000 @@ -159,7 +159,9 @@ int niter = 50; int iter = 0; double loglik1, loglik2; - while(iter < niter && !(iter > 1 && (loglik - loglik1) < thresh * (loglik1 - loglik2))) + int foundnan = 0; + + while (iter < niter && !foundnan && !(iter > 1 && (loglik - loglik1) < thresh * (loglik1 - loglik2))) { ++iter; @@ -199,6 +201,11 @@ fprintf(stderr, "iteration %d: loglik = %f\n", iter, loglik); fprintf(stderr, "re-estimation...\n"); fflush(stderr); + + if (isnan(loglik)) { + foundnan = 1; + continue; + } baum_welch(p0, a, mu, cov, N, T, L, x, xi, gamma); @@ -278,6 +285,7 @@ for (j = 0; j < N; j++) { a[i][j] = 0; + if (sum_gamma[i] == 0.) continue; for (t = 0; t < T-1; t++) a[i][j] += xi[t][i][j]; //s += a[i][j];