annotate dsp/mfcc/MFCC.cpp @ 289:befe5aa6b450

* Refactor FFT a little bit so as to separate construction and processing rather than have a single static method -- will make it easier to use a different implementation * pull in KissFFT implementation (not hooked up yet)
author Chris Cannam <c.cannam@qmul.ac.uk>
date Wed, 13 May 2009 09:19:12 +0000
parents 330c2e11f8a9
children d5014ab8b0e5
rev   line source
c@251 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@251 2
c@251 3 /*
c@251 4 QM DSP Library
c@251 5
c@251 6 Centre for Digital Music, Queen Mary, University of London.
c@251 7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
c@251 8 All rights reserved.
c@251 9 */
c@251 10
c@251 11 #include <cmath>
c@251 12 #include <cstdlib>
c@272 13 #include <cstring>
c@251 14
c@251 15 #include "MFCC.h"
c@251 16 #include "dsp/transforms/FFT.h"
c@251 17 #include "base/Window.h"
c@251 18
c@251 19 MFCC::MFCC(MFCCConfig config)
c@251 20 {
c@251 21 int i,j;
c@251 22
c@251 23 /* Calculate at startup */
c@251 24 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
c@251 25
c@251 26 lowestFrequency = 66.6666666;
c@251 27 linearFilters = 13;
c@251 28 linearSpacing = 66.66666666;
c@251 29 logFilters = 27;
c@251 30 logSpacing = 1.0711703;
c@251 31
c@251 32 /* FFT and analysis window sizes */
c@251 33 fftSize = config.fftsize;
c@289 34 fft = new FFTReal(fftSize);
c@251 35
c@251 36 totalFilters = linearFilters + logFilters;
c@255 37 logPower = config.logpower;
c@251 38
c@251 39 samplingRate = config.FS;
c@251 40
c@251 41 /* The number of cepstral componenents */
c@251 42 nceps = config.nceps;
c@251 43
c@251 44 /* Set if user want C0 */
c@251 45 WANT_C0 = (config.want_c0 ? 1 : 0);
c@251 46
c@251 47 /* Allocate space for feature vector */
c@251 48 if (WANT_C0 == 1) {
c@251 49 ceps = (double*)calloc(nceps+1, sizeof(double));
c@251 50 } else {
c@251 51 ceps = (double*)calloc(nceps, sizeof(double));
c@251 52 }
c@251 53
c@251 54 /* Allocate space for local vectors */
c@251 55 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
c@255 56 for (i = 0; i < nceps+1; i++) {
c@251 57 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
c@251 58 }
c@251 59
c@251 60 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
c@255 61 for (i = 0; i < totalFilters; i++) {
c@251 62 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
c@251 63 }
c@251 64
c@251 65 freqs = (double*)calloc(totalFilters+2,sizeof(double));
c@251 66
c@251 67 lower = (double*)calloc(totalFilters,sizeof(double));
c@251 68 center = (double*)calloc(totalFilters,sizeof(double));
c@251 69 upper = (double*)calloc(totalFilters,sizeof(double));
c@251 70
c@251 71 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
c@251 72 fftFreqs = (double*)calloc(fftSize,sizeof(double));
c@251 73
c@255 74 for (i = 0; i < linearFilters; i++) {
c@251 75 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
c@251 76 }
c@251 77
c@255 78 for (i = linearFilters; i < totalFilters+2; i++) {
c@251 79 freqs[i] = freqs[linearFilters-1] *
c@251 80 pow(logSpacing, (double)(i-linearFilters+1));
c@251 81 }
c@251 82
c@251 83 /* Define lower, center and upper */
c@251 84 memcpy(lower, freqs,totalFilters*sizeof(double));
c@251 85 memcpy(center, &freqs[1],totalFilters*sizeof(double));
c@251 86 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
c@251 87
c@251 88 for (i=0;i<totalFilters;i++){
c@251 89 triangleHeight[i] = 2./(upper[i]-lower[i]);
c@251 90 }
c@251 91
c@251 92 for (i=0;i<fftSize;i++){
c@251 93 fftFreqs[i] = ((double) i / ((double) fftSize ) *
c@251 94 (double) samplingRate);
c@251 95 }
c@251 96
c@251 97 /* Build now the mccFilterWeight matrix */
c@251 98 for (i=0;i<totalFilters;i++){
c@251 99
c@251 100 for (j=0;j<fftSize;j++) {
c@251 101
c@251 102 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
c@251 103
c@251 104 mfccFilterWeights[i][j] = triangleHeight[i] *
c@251 105 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
c@251 106
c@251 107 }
c@251 108 else
c@251 109 {
c@251 110 mfccFilterWeights[i][j] = 0.0;
c@251 111 }
c@251 112
c@251 113 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
c@251 114
c@255 115 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
c@255 116 + triangleHeight[i] * (upper[i]-fftFreqs[j])
c@251 117 / (upper[i]-center[i]);
c@251 118 }
c@251 119 else
c@251 120 {
c@251 121 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
c@251 122 }
c@251 123 }
c@251 124
c@251 125 }
c@251 126
c@251 127 /*
c@251 128 * We calculate now mfccDCT matrix
c@251 129 * NB: +1 because of the DC component
c@251 130 */
c@254 131
c@254 132 const double pi = 3.14159265358979323846264338327950288;
c@251 133
c@255 134 for (i = 0; i < nceps+1; i++) {
c@255 135 for (j = 0; j < totalFilters; j++) {
c@251 136 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
c@254 137 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
c@251 138 }
c@251 139 }
c@251 140
c@255 141 for (j = 0; j < totalFilters; j++){
c@255 142 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
c@251 143 }
c@251 144
c@251 145 /* The analysis window */
c@257 146 window = new Window<double>(config.window, fftSize);
c@251 147
c@251 148 /* Allocate memory for the FFT */
c@255 149 realOut = (double*)calloc(fftSize, sizeof(double));
c@255 150 imagOut = (double*)calloc(fftSize, sizeof(double));
c@255 151
c@255 152 earMag = (double*)calloc(totalFilters, sizeof(double));
c@255 153 fftMag = (double*)calloc(fftSize/2, sizeof(double));
c@251 154
c@251 155 free(freqs);
c@251 156 free(lower);
c@251 157 free(center);
c@251 158 free(upper);
c@251 159 free(triangleHeight);
c@251 160 free(fftFreqs);
c@251 161 }
c@251 162
c@251 163 MFCC::~MFCC()
c@251 164 {
c@251 165 int i;
c@251 166
c@251 167 /* Free the structure */
c@255 168 for (i = 0; i < nceps+1; i++) {
c@251 169 free(mfccDCTMatrix[i]);
c@251 170 }
c@251 171 free(mfccDCTMatrix);
c@251 172
c@255 173 for (i = 0; i < totalFilters; i++) {
c@251 174 free(mfccFilterWeights[i]);
c@251 175 }
c@251 176 free(mfccFilterWeights);
c@251 177
c@251 178 /* Free the feature vector */
c@251 179 free(ceps);
c@251 180
c@251 181 /* The analysis window */
c@251 182 delete window;
c@255 183
c@255 184 free(earMag);
c@255 185 free(fftMag);
c@251 186
c@251 187 /* Free the FFT */
c@251 188 free(realOut);
c@251 189 free(imagOut);
c@289 190
c@289 191 delete fft;
c@251 192 }
c@251 193
c@251 194
c@251 195 /*
c@251 196 *
c@251 197 * Extract the MFCC on the input frame
c@251 198 *
c@251 199 */
c@255 200 int MFCC::process(const double *inframe, double *outceps)
c@251 201 {
c@255 202 double *inputData = (double *)malloc(fftSize * sizeof(double));
c@255 203 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
c@251 204
c@255 205 window->cut(inputData);
c@251 206
c@251 207 /* Calculate the fft on the input frame */
c@289 208 fft->process(0, inputData, realOut, imagOut);
c@251 209
c@255 210 free(inputData);
c@255 211
c@255 212 return process(realOut, imagOut, outceps);
c@255 213 }
c@255 214
c@255 215 int MFCC::process(const double *real, const double *imag, double *outceps)
c@255 216 {
c@255 217 int i, j;
c@255 218
c@255 219 for (i = 0; i < fftSize/2; ++i) {
c@255 220 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
c@255 221 }
c@255 222
c@255 223 for (i = 0; i < totalFilters; ++i) {
c@255 224 earMag[i] = 0.0;
c@251 225 }
c@251 226
c@251 227 /* Multiply by mfccFilterWeights */
c@255 228 for (i = 0; i < totalFilters; i++) {
c@255 229 double tmp = 0.0;
c@255 230 for (j = 0; j < fftSize/2; j++) {
c@255 231 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
c@251 232 }
c@255 233 if (tmp > 0) earMag[i] = log10(tmp);
c@255 234 else earMag[i] = 0.0;
c@255 235
c@255 236 if (logPower != 1.0) {
c@255 237 earMag[i] = pow(earMag[i], logPower);
c@255 238 }
c@251 239 }
c@251 240
c@251 241 /*
c@251 242 *
c@251 243 * Calculate now the cepstral coefficients
c@251 244 * with or without the DC component
c@251 245 *
c@251 246 */
c@251 247
c@255 248 if (WANT_C0 == 1) {
c@251 249
c@255 250 for (i = 0; i < nceps+1; i++) {
c@255 251 double tmp = 0.;
c@255 252 for (j = 0; j < totalFilters; j++){
c@255 253 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
c@251 254 }
c@251 255 outceps[i] = tmp;
c@251 256 }
c@251 257 }
c@251 258 else
c@251 259 {
c@255 260 for (i = 1; i < nceps+1; i++) {
c@255 261 double tmp = 0.;
c@255 262 for (j = 0; j < totalFilters; j++){
c@255 263 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
c@251 264 }
c@251 265 outceps[i-1] = tmp;
c@251 266 }
c@251 267 }
c@251 268
c@251 269 return nceps;
c@251 270 }
c@251 271