annotate dsp/mfcc/MFCC.cpp @ 255:9edaa3ce62e8

* Make MFCC able to accept already-FFT'd input, and simplify API a bit * Add log power value to MFCC, restore windowing, and avoid some heap allocs * In HMM, bail out of iteration if loglik hits NaN
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 18 Jan 2008 13:24:12 +0000
parents 52c1a295d775
children 9619d6995b73
rev   line source
c@251 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@251 2
c@251 3 /*
c@251 4 QM DSP Library
c@251 5
c@251 6 Centre for Digital Music, Queen Mary, University of London.
c@251 7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
c@251 8 All rights reserved.
c@251 9 */
c@251 10
c@251 11 #include <cmath>
c@251 12 #include <cstdlib>
c@251 13
c@251 14 #include "MFCC.h"
c@251 15 #include "dsp/transforms/FFT.h"
c@251 16 #include "base/Window.h"
c@251 17
c@251 18 MFCC::MFCC(MFCCConfig config)
c@251 19 {
c@251 20 int i,j;
c@251 21
c@251 22 /* Calculate at startup */
c@251 23 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
c@251 24
c@251 25 lowestFrequency = 66.6666666;
c@251 26 linearFilters = 13;
c@251 27 linearSpacing = 66.66666666;
c@251 28 logFilters = 27;
c@251 29 logSpacing = 1.0711703;
c@251 30
c@251 31 /* FFT and analysis window sizes */
c@251 32 fftSize = config.fftsize;
c@251 33
c@251 34 totalFilters = linearFilters + logFilters;
c@255 35 logPower = config.logpower;
c@251 36
c@251 37 samplingRate = config.FS;
c@251 38
c@251 39 /* The number of cepstral componenents */
c@251 40 nceps = config.nceps;
c@251 41
c@251 42 /* Set if user want C0 */
c@251 43 WANT_C0 = (config.want_c0 ? 1 : 0);
c@251 44
c@251 45 /* Allocate space for feature vector */
c@251 46 if (WANT_C0 == 1) {
c@251 47 ceps = (double*)calloc(nceps+1, sizeof(double));
c@251 48 } else {
c@251 49 ceps = (double*)calloc(nceps, sizeof(double));
c@251 50 }
c@251 51
c@251 52 /* Allocate space for local vectors */
c@251 53 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
c@255 54 for (i = 0; i < nceps+1; i++) {
c@251 55 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
c@251 56 }
c@251 57
c@251 58 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
c@255 59 for (i = 0; i < totalFilters; i++) {
c@251 60 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
c@251 61 }
c@251 62
c@251 63 freqs = (double*)calloc(totalFilters+2,sizeof(double));
c@251 64
c@251 65 lower = (double*)calloc(totalFilters,sizeof(double));
c@251 66 center = (double*)calloc(totalFilters,sizeof(double));
c@251 67 upper = (double*)calloc(totalFilters,sizeof(double));
c@251 68
c@251 69 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
c@251 70 fftFreqs = (double*)calloc(fftSize,sizeof(double));
c@251 71
c@255 72 for (i = 0; i < linearFilters; i++) {
c@251 73 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
c@251 74 }
c@251 75
c@255 76 for (i = linearFilters; i < totalFilters+2; i++) {
c@251 77 freqs[i] = freqs[linearFilters-1] *
c@251 78 pow(logSpacing, (double)(i-linearFilters+1));
c@251 79 }
c@251 80
c@251 81 /* Define lower, center and upper */
c@251 82 memcpy(lower, freqs,totalFilters*sizeof(double));
c@251 83 memcpy(center, &freqs[1],totalFilters*sizeof(double));
c@251 84 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
c@251 85
c@251 86 for (i=0;i<totalFilters;i++){
c@251 87 triangleHeight[i] = 2./(upper[i]-lower[i]);
c@251 88 }
c@251 89
c@251 90 for (i=0;i<fftSize;i++){
c@251 91 fftFreqs[i] = ((double) i / ((double) fftSize ) *
c@251 92 (double) samplingRate);
c@251 93 }
c@251 94
c@251 95 /* Build now the mccFilterWeight matrix */
c@251 96 for (i=0;i<totalFilters;i++){
c@251 97
c@251 98 for (j=0;j<fftSize;j++) {
c@251 99
c@251 100 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
c@251 101
c@251 102 mfccFilterWeights[i][j] = triangleHeight[i] *
c@251 103 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
c@251 104
c@251 105 }
c@251 106 else
c@251 107 {
c@251 108 mfccFilterWeights[i][j] = 0.0;
c@251 109 }
c@251 110
c@251 111 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
c@251 112
c@255 113 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
c@255 114 + triangleHeight[i] * (upper[i]-fftFreqs[j])
c@251 115 / (upper[i]-center[i]);
c@251 116 }
c@251 117 else
c@251 118 {
c@251 119 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
c@251 120 }
c@251 121 }
c@251 122
c@251 123 }
c@251 124
c@251 125 /*
c@251 126 * We calculate now mfccDCT matrix
c@251 127 * NB: +1 because of the DC component
c@251 128 */
c@254 129
c@254 130 const double pi = 3.14159265358979323846264338327950288;
c@251 131
c@255 132 for (i = 0; i < nceps+1; i++) {
c@255 133 for (j = 0; j < totalFilters; j++) {
c@251 134 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
c@254 135 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
c@251 136 }
c@251 137 }
c@251 138
c@255 139 for (j = 0; j < totalFilters; j++){
c@255 140 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
c@251 141 }
c@251 142
c@251 143 /* The analysis window */
c@251 144 window = new Window<double>(HammingWindow, fftSize);
c@251 145
c@251 146 /* Allocate memory for the FFT */
c@255 147 imagIn = (double*)calloc(fftSize, sizeof(double));
c@255 148 realOut = (double*)calloc(fftSize, sizeof(double));
c@255 149 imagOut = (double*)calloc(fftSize, sizeof(double));
c@255 150
c@255 151 earMag = (double*)calloc(totalFilters, sizeof(double));
c@255 152 fftMag = (double*)calloc(fftSize/2, sizeof(double));
c@251 153
c@251 154 free(freqs);
c@251 155 free(lower);
c@251 156 free(center);
c@251 157 free(upper);
c@251 158 free(triangleHeight);
c@251 159 free(fftFreqs);
c@251 160 }
c@251 161
c@251 162 MFCC::~MFCC()
c@251 163 {
c@251 164 int i;
c@251 165
c@251 166 /* Free the structure */
c@255 167 for (i = 0; i < nceps+1; i++) {
c@251 168 free(mfccDCTMatrix[i]);
c@251 169 }
c@251 170 free(mfccDCTMatrix);
c@251 171
c@255 172 for (i = 0; i < totalFilters; i++) {
c@251 173 free(mfccFilterWeights[i]);
c@251 174 }
c@251 175 free(mfccFilterWeights);
c@251 176
c@251 177 /* Free the feature vector */
c@251 178 free(ceps);
c@251 179
c@251 180 /* The analysis window */
c@251 181 delete window;
c@255 182
c@255 183 free(earMag);
c@255 184 free(fftMag);
c@251 185
c@251 186 /* Free the FFT */
c@251 187 free(imagIn);
c@251 188 free(realOut);
c@251 189 free(imagOut);
c@251 190 }
c@251 191
c@251 192
c@251 193 /*
c@251 194 *
c@251 195 * Extract the MFCC on the input frame
c@251 196 *
c@251 197 */
c@255 198 int MFCC::process(const double *inframe, double *outceps)
c@251 199 {
c@255 200 double *inputData = (double *)malloc(fftSize * sizeof(double));
c@255 201 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
c@251 202
c@255 203 window->cut(inputData);
c@251 204
c@251 205 /* Calculate the fft on the input frame */
c@251 206 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut);
c@251 207
c@255 208 free(inputData);
c@255 209
c@255 210 return process(realOut, imagOut, outceps);
c@255 211 }
c@255 212
c@255 213 int MFCC::process(const double *real, const double *imag, double *outceps)
c@255 214 {
c@255 215 int i, j;
c@255 216
c@255 217 for (i = 0; i < fftSize/2; ++i) {
c@255 218 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
c@255 219 }
c@255 220
c@255 221 for (i = 0; i < totalFilters; ++i) {
c@255 222 earMag[i] = 0.0;
c@251 223 }
c@251 224
c@251 225 /* Multiply by mfccFilterWeights */
c@255 226 for (i = 0; i < totalFilters; i++) {
c@255 227 double tmp = 0.0;
c@255 228 for (j = 0; j < fftSize/2; j++) {
c@255 229 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
c@251 230 }
c@255 231 if (tmp > 0) earMag[i] = log10(tmp);
c@255 232 else earMag[i] = 0.0;
c@255 233
c@255 234 if (logPower != 1.0) {
c@255 235 earMag[i] = pow(earMag[i], logPower);
c@255 236 }
c@251 237 }
c@251 238
c@251 239 /*
c@251 240 *
c@251 241 * Calculate now the cepstral coefficients
c@251 242 * with or without the DC component
c@251 243 *
c@251 244 */
c@251 245
c@255 246 if (WANT_C0 == 1) {
c@251 247
c@255 248 for (i = 0; i < nceps+1; i++) {
c@255 249 double tmp = 0.;
c@255 250 for (j = 0; j < totalFilters; j++){
c@255 251 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
c@251 252 }
c@251 253 outceps[i] = tmp;
c@251 254 }
c@251 255 }
c@251 256 else
c@251 257 {
c@255 258 for (i = 1; i < nceps+1; i++) {
c@255 259 double tmp = 0.;
c@255 260 for (j = 0; j < totalFilters; j++){
c@255 261 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
c@251 262 }
c@251 263 outceps[i-1] = tmp;
c@251 264 }
c@251 265 }
c@251 266
c@251 267 return nceps;
c@251 268 }
c@251 269