diff dsp/mfcc/MFCC.cpp @ 26:d096a79fa772

* Add timbral (MFCC) feature option to segmenter
author cannam
date Thu, 10 Jan 2008 16:41:33 +0000
parents dsp/mfcc/MFCC.c@54a962727271
children 1c9c4d2c0592
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dsp/mfcc/MFCC.cpp	Thu Jan 10 16:41:33 2008 +0000
@@ -0,0 +1,264 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    QM DSP Library
+
+    Centre for Digital Music, Queen Mary, University of London.
+    This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
+    All rights reserved.
+*/
+
+#include <cmath>
+#include <cstdlib>
+
+#include "MFCC.h"
+#include "dsp/transforms/FFT.h"
+#include "base/Window.h"
+
+MFCC::MFCC(MFCCConfig config)
+{
+    int i,j;
+
+    /* Calculate at startup */
+    double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
+  
+    lowestFrequency   = 66.6666666;
+    linearFilters     = 13;
+    linearSpacing     = 66.66666666;
+    logFilters        = 27;
+    logSpacing        = 1.0711703;
+  
+    /* FFT and analysis window sizes */
+    fftSize           = config.fftsize;
+
+    totalFilters      = linearFilters + logFilters;
+  
+    samplingRate      = config.FS;
+  
+    /* The number of cepstral componenents */
+    nceps             = config.nceps;
+
+    /* Set if user want C0 */
+    WANT_C0           = (config.want_c0 ? 1 : 0);
+  
+    /* Allocate space for feature vector */
+    if (WANT_C0 == 1) {
+        ceps              = (double*)calloc(nceps+1, sizeof(double));
+    } else {
+        ceps              = (double*)calloc(nceps, sizeof(double));
+    }
+ 
+    /* Allocate space for local vectors */
+    mfccDCTMatrix     = (double**)calloc(nceps+1, sizeof(double*));
+    for (i=0;i<nceps+1; i++) {
+        mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double)); 
+    }
+
+    mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
+    for (i=0;i<totalFilters; i++) {
+        mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double)); 
+    }
+    
+    freqs  = (double*)calloc(totalFilters+2,sizeof(double));
+    
+    lower  = (double*)calloc(totalFilters,sizeof(double));
+    center = (double*)calloc(totalFilters,sizeof(double));
+    upper  = (double*)calloc(totalFilters,sizeof(double));
+    
+    triangleHeight = (double*)calloc(totalFilters,sizeof(double));
+    fftFreqs       = (double*)calloc(fftSize,sizeof(double));
+  
+    for (i=0;i<linearFilters;i++) {
+        freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
+    }
+  
+    for (i=linearFilters; i<totalFilters+2; i++) {
+        freqs[i] = freqs[linearFilters-1] * 
+            pow(logSpacing, (double)(i-linearFilters+1));
+    }
+  
+    /* Define lower, center and upper */
+    memcpy(lower,  freqs,totalFilters*sizeof(double));
+    memcpy(center, &freqs[1],totalFilters*sizeof(double));
+    memcpy(upper,  &freqs[2],totalFilters*sizeof(double));
+    
+    for (i=0;i<totalFilters;i++){
+        triangleHeight[i] = 2./(upper[i]-lower[i]);
+    }
+  
+    for (i=0;i<fftSize;i++){
+        fftFreqs[i] = ((double) i / ((double) fftSize ) * 
+                       (double) samplingRate);
+    }
+
+    /* Build now the mccFilterWeight matrix */
+    for (i=0;i<totalFilters;i++){
+
+        for (j=0;j<fftSize;j++) {
+      
+            if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
+          
+                mfccFilterWeights[i][j] = triangleHeight[i] * 
+                    (fftFreqs[j]-lower[i]) / (center[i]-lower[i]); 
+          
+            }
+            else
+            {
+                mfccFilterWeights[i][j] = 0.0;
+            }
+
+            if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
+
+                mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j]) 
+                    / (upper[i]-center[i]);
+            }
+            else
+            {
+                mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
+            }
+        }
+
+    }
+
+    /*
+     * We calculate now mfccDCT matrix 
+     * NB: +1 because of the DC component
+     */
+  
+    for (i=0; i<nceps+1; i++) {
+        for (j=0; j<totalFilters; j++) {
+            mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))  
+                * cos((double) i * ((double) j + 0.5) / (double) totalFilters * M_PI);
+        }
+    }
+
+    for (j=0;j<totalFilters;j++){
+        mfccDCTMatrix[0][j]     =  (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
+    }
+   
+    /* The analysis window */
+    window = new Window<double>(HammingWindow, fftSize);
+
+    /* Allocate memory for the FFT */
+    imagIn      = (double*)calloc(fftSize,sizeof(double));
+    realOut     = (double*)calloc(fftSize,sizeof(double));
+    imagOut     = (double*)calloc(fftSize,sizeof(double));
+  
+    free(freqs);
+    free(lower);
+    free(center);
+    free(upper);
+    free(triangleHeight);
+    free(fftFreqs);
+}
+
+MFCC::~MFCC()
+{
+    int i;
+  
+    /* Free the structure */
+    for (i=0;i<nceps+1;i++) {
+        free(mfccDCTMatrix[i]);
+    }
+    free(mfccDCTMatrix);
+    
+    for (i=0;i<totalFilters; i++) {
+        free(mfccFilterWeights[i]);
+    }
+    free(mfccFilterWeights);
+    
+    /* Free the feature vector */
+    free(ceps);
+    
+    /* The analysis window */
+    delete window;
+    
+    /* Free the FFT */
+    free(imagIn);
+    free(realOut);
+    free(imagOut);
+}
+
+
+/*
+ * 
+ * Extract the MFCC on the input frame 
+ *
+ * looks like we have to have length = fftSize ??????
+ * 
+ */ 
+int MFCC::process(int length, double *inframe, double *outceps)
+{
+    int i,j;
+  
+    double *fftMag;
+    double *earMag;
+
+    double *inputData;
+  
+    double tmp;
+
+    earMag    = (double*)calloc(totalFilters, sizeof(double));
+    inputData = (double*)calloc(fftSize, sizeof(double)); 
+    fftMag    = (double*)calloc(fftSize, sizeof(double)); 
+    
+    /* Zero-pad if needed */
+    memcpy(inputData, inframe, length*sizeof(double));
+
+    window->cut(inputData);
+  
+    /* Calculate the fft on the input frame */
+    FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut);
+
+    for (i = 0; i < fftSize; ++i) {
+        fftMag[i] = sqrt(realOut[i] * realOut[i] +
+                         imagOut[i] * imagOut[i]);
+    }
+
+    /* Multiply by mfccFilterWeights */
+    for (i=0;i<totalFilters;i++) {
+        tmp = 0.;
+        for(j=0;j<fftSize/2; j++) {
+            tmp = tmp + (mfccFilterWeights[i][j]*fftMag[j]);
+        }
+        if (tmp>0)
+            earMag[i] = log10(tmp);
+	else
+            earMag[i] = 0.0;
+    }
+
+    /*
+     * 
+     * Calculate now the cepstral coefficients 
+     * with or without the DC component
+     *
+     */
+  
+    if (WANT_C0==1) {
+     
+        for (i=0;i<nceps+1;i++) {
+            tmp = 0.;
+            for (j=0;j<totalFilters;j++){
+                tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
+            }
+            outceps[i] = tmp;
+        }
+    }
+    else 
+    {  
+        for (i=1;i<nceps+1;i++) {
+            tmp = 0.;
+            for (j=0;j<totalFilters;j++){
+                tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
+            }
+            outceps[i-1] = tmp;
+        }
+    }
+    
+    free(fftMag);
+    free(earMag);
+    free(inputData);
+
+    return nceps;
+}
+