diff dsp/mfcc/MFCC.cpp @ 255:9edaa3ce62e8

* Make MFCC able to accept already-FFT'd input, and simplify API a bit * Add log power value to MFCC, restore windowing, and avoid some heap allocs * In HMM, bail out of iteration if loglik hits NaN
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 18 Jan 2008 13:24:12 +0000
parents 52c1a295d775
children 9619d6995b73
line wrap: on
line diff
--- a/dsp/mfcc/MFCC.cpp	Wed Jan 16 18:02:31 2008 +0000
+++ b/dsp/mfcc/MFCC.cpp	Fri Jan 18 13:24:12 2008 +0000
@@ -32,6 +32,7 @@
     fftSize           = config.fftsize;
 
     totalFilters      = linearFilters + logFilters;
+    logPower          = config.logpower;
   
     samplingRate      = config.FS;
   
@@ -50,12 +51,12 @@
  
     /* Allocate space for local vectors */
     mfccDCTMatrix     = (double**)calloc(nceps+1, sizeof(double*));
-    for (i=0;i<nceps+1; i++) {
+    for (i = 0; i < nceps+1; i++) {
         mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double)); 
     }
 
     mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
-    for (i=0;i<totalFilters; i++) {
+    for (i = 0; i < totalFilters; i++) {
         mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double)); 
     }
     
@@ -68,11 +69,11 @@
     triangleHeight = (double*)calloc(totalFilters,sizeof(double));
     fftFreqs       = (double*)calloc(fftSize,sizeof(double));
   
-    for (i=0;i<linearFilters;i++) {
+    for (i = 0; i < linearFilters; i++) {
         freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
     }
   
-    for (i=linearFilters; i<totalFilters+2; i++) {
+    for (i = linearFilters; i < totalFilters+2; i++) {
         freqs[i] = freqs[linearFilters-1] * 
             pow(logSpacing, (double)(i-linearFilters+1));
     }
@@ -109,7 +110,8 @@
 
             if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
 
-                mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j]) 
+                mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
+                    + triangleHeight[i] * (upper[i]-fftFreqs[j]) 
                     / (upper[i]-center[i]);
             }
             else
@@ -127,24 +129,27 @@
 
     const double pi = 3.14159265358979323846264338327950288;
   
-    for (i=0; i<nceps+1; i++) {
-        for (j=0; j<totalFilters; j++) {
+    for (i = 0; i < nceps+1; i++) {
+        for (j = 0; j < totalFilters; j++) {
             mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))  
                 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
         }
     }
 
-    for (j=0;j<totalFilters;j++){
-        mfccDCTMatrix[0][j]     =  (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
+    for (j = 0; j < totalFilters; j++){
+        mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
     }
    
     /* The analysis window */
     window = new Window<double>(HammingWindow, fftSize);
 
     /* Allocate memory for the FFT */
-    imagIn      = (double*)calloc(fftSize,sizeof(double));
-    realOut     = (double*)calloc(fftSize,sizeof(double));
-    imagOut     = (double*)calloc(fftSize,sizeof(double));
+    imagIn      = (double*)calloc(fftSize, sizeof(double));
+    realOut     = (double*)calloc(fftSize, sizeof(double));
+    imagOut     = (double*)calloc(fftSize, sizeof(double));
+
+    earMag      = (double*)calloc(totalFilters, sizeof(double));
+    fftMag      = (double*)calloc(fftSize/2, sizeof(double));
   
     free(freqs);
     free(lower);
@@ -159,12 +164,12 @@
     int i;
   
     /* Free the structure */
-    for (i=0;i<nceps+1;i++) {
+    for (i = 0; i < nceps+1; i++) {
         free(mfccDCTMatrix[i]);
     }
     free(mfccDCTMatrix);
     
-    for (i=0;i<totalFilters; i++) {
+    for (i = 0; i < totalFilters; i++) {
         free(mfccFilterWeights[i]);
     }
     free(mfccFilterWeights);
@@ -174,6 +179,9 @@
     
     /* The analysis window */
     delete window;
+
+    free(earMag);
+    free(fftMag);
     
     /* Free the FFT */
     free(imagIn);
@@ -185,48 +193,47 @@
 /*
  * 
  * Extract the MFCC on the input frame 
- *
- * looks like we have to have length = fftSize ??????
  * 
  */ 
-int MFCC::process(int length, double *inframe, double *outceps)
+int MFCC::process(const double *inframe, double *outceps)
 {
-    int i,j;
-  
-    double *fftMag;
-    double *earMag;
+    double *inputData = (double *)malloc(fftSize * sizeof(double));
+    for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
 
-    double *inputData;
-  
-    double tmp;
-
-    earMag    = (double*)calloc(totalFilters, sizeof(double));
-    inputData = (double*)calloc(fftSize, sizeof(double)); 
-    fftMag    = (double*)calloc(fftSize, sizeof(double)); 
-    
-    /* Zero-pad if needed */
-    memcpy(inputData, inframe, length*sizeof(double));
-
-//!!!    window->cut(inputData);
+    window->cut(inputData);
   
     /* Calculate the fft on the input frame */
     FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut);
 
-    for (i = 0; i < fftSize; ++i) {
-        fftMag[i] = sqrt(realOut[i] * realOut[i] +
-                         imagOut[i] * imagOut[i]);
+    free(inputData);
+
+    return process(realOut, imagOut, outceps);
+}
+
+int MFCC::process(const double *real, const double *imag, double *outceps)
+{
+    int i, j;
+
+    for (i = 0; i < fftSize/2; ++i) {
+        fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
+    }
+
+    for (i = 0; i < totalFilters; ++i) {
+        earMag[i] = 0.0;
     }
 
     /* Multiply by mfccFilterWeights */
-    for (i=0;i<totalFilters;i++) {
-        tmp = 0.;
-        for(j=0;j<fftSize/2; j++) {
-            tmp = tmp + (mfccFilterWeights[i][j]*fftMag[j]);
+    for (i = 0; i < totalFilters; i++) {
+        double tmp = 0.0;
+        for (j = 0; j < fftSize/2; j++) {
+            tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
         }
-        if (tmp>0)
-            earMag[i] = log10(tmp);
-	else
-            earMag[i] = 0.0;
+        if (tmp > 0) earMag[i] = log10(tmp);
+	else earMag[i] = 0.0;
+
+        if (logPower != 1.0) {
+            earMag[i] = pow(earMag[i], logPower);
+        }
     }
 
     /*
@@ -236,31 +243,27 @@
      *
      */
   
-    if (WANT_C0==1) {
+    if (WANT_C0 == 1) {
      
-        for (i=0;i<nceps+1;i++) {
-            tmp = 0.;
-            for (j=0;j<totalFilters;j++){
-                tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
+        for (i = 0; i < nceps+1; i++) {
+            double tmp = 0.;
+            for (j = 0; j < totalFilters; j++){
+                tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
             }
             outceps[i] = tmp;
         }
     }
     else 
     {  
-        for (i=1;i<nceps+1;i++) {
-            tmp = 0.;
-            for (j=0;j<totalFilters;j++){
-                tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
+        for (i = 1; i < nceps+1; i++) {
+            double tmp = 0.;
+            for (j = 0; j < totalFilters; j++){
+                tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
             }
             outceps[i-1] = tmp;
         }
     }
     
-    free(fftMag);
-    free(earMag);
-    free(inputData);
-
     return nceps;
 }