annotate dsp/mfcc/MFCC.cpp @ 254:52c1a295d775

...
author Chris Cannam <c.cannam@qmul.ac.uk>
date Wed, 16 Jan 2008 18:02:31 +0000
parents c3600d3cfe5c
children 9edaa3ce62e8
rev   line source
c@251 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@251 2
c@251 3 /*
c@251 4 QM DSP Library
c@251 5
c@251 6 Centre for Digital Music, Queen Mary, University of London.
c@251 7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
c@251 8 All rights reserved.
c@251 9 */
c@251 10
c@251 11 #include <cmath>
c@251 12 #include <cstdlib>
c@251 13
c@251 14 #include "MFCC.h"
c@251 15 #include "dsp/transforms/FFT.h"
c@251 16 #include "base/Window.h"
c@251 17
c@251 18 MFCC::MFCC(MFCCConfig config)
c@251 19 {
c@251 20 int i,j;
c@251 21
c@251 22 /* Calculate at startup */
c@251 23 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
c@251 24
c@251 25 lowestFrequency = 66.6666666;
c@251 26 linearFilters = 13;
c@251 27 linearSpacing = 66.66666666;
c@251 28 logFilters = 27;
c@251 29 logSpacing = 1.0711703;
c@251 30
c@251 31 /* FFT and analysis window sizes */
c@251 32 fftSize = config.fftsize;
c@251 33
c@251 34 totalFilters = linearFilters + logFilters;
c@251 35
c@251 36 samplingRate = config.FS;
c@251 37
c@251 38 /* The number of cepstral componenents */
c@251 39 nceps = config.nceps;
c@251 40
c@251 41 /* Set if user want C0 */
c@251 42 WANT_C0 = (config.want_c0 ? 1 : 0);
c@251 43
c@251 44 /* Allocate space for feature vector */
c@251 45 if (WANT_C0 == 1) {
c@251 46 ceps = (double*)calloc(nceps+1, sizeof(double));
c@251 47 } else {
c@251 48 ceps = (double*)calloc(nceps, sizeof(double));
c@251 49 }
c@251 50
c@251 51 /* Allocate space for local vectors */
c@251 52 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
c@251 53 for (i=0;i<nceps+1; i++) {
c@251 54 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
c@251 55 }
c@251 56
c@251 57 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
c@251 58 for (i=0;i<totalFilters; i++) {
c@251 59 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
c@251 60 }
c@251 61
c@251 62 freqs = (double*)calloc(totalFilters+2,sizeof(double));
c@251 63
c@251 64 lower = (double*)calloc(totalFilters,sizeof(double));
c@251 65 center = (double*)calloc(totalFilters,sizeof(double));
c@251 66 upper = (double*)calloc(totalFilters,sizeof(double));
c@251 67
c@251 68 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
c@251 69 fftFreqs = (double*)calloc(fftSize,sizeof(double));
c@251 70
c@251 71 for (i=0;i<linearFilters;i++) {
c@251 72 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
c@251 73 }
c@251 74
c@251 75 for (i=linearFilters; i<totalFilters+2; i++) {
c@251 76 freqs[i] = freqs[linearFilters-1] *
c@251 77 pow(logSpacing, (double)(i-linearFilters+1));
c@251 78 }
c@251 79
c@251 80 /* Define lower, center and upper */
c@251 81 memcpy(lower, freqs,totalFilters*sizeof(double));
c@251 82 memcpy(center, &freqs[1],totalFilters*sizeof(double));
c@251 83 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
c@251 84
c@251 85 for (i=0;i<totalFilters;i++){
c@251 86 triangleHeight[i] = 2./(upper[i]-lower[i]);
c@251 87 }
c@251 88
c@251 89 for (i=0;i<fftSize;i++){
c@251 90 fftFreqs[i] = ((double) i / ((double) fftSize ) *
c@251 91 (double) samplingRate);
c@251 92 }
c@251 93
c@251 94 /* Build now the mccFilterWeight matrix */
c@251 95 for (i=0;i<totalFilters;i++){
c@251 96
c@251 97 for (j=0;j<fftSize;j++) {
c@251 98
c@251 99 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
c@251 100
c@251 101 mfccFilterWeights[i][j] = triangleHeight[i] *
c@251 102 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
c@251 103
c@251 104 }
c@251 105 else
c@251 106 {
c@251 107 mfccFilterWeights[i][j] = 0.0;
c@251 108 }
c@251 109
c@251 110 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
c@251 111
c@251 112 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j])
c@251 113 / (upper[i]-center[i]);
c@251 114 }
c@251 115 else
c@251 116 {
c@251 117 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
c@251 118 }
c@251 119 }
c@251 120
c@251 121 }
c@251 122
c@251 123 /*
c@251 124 * We calculate now mfccDCT matrix
c@251 125 * NB: +1 because of the DC component
c@251 126 */
c@254 127
c@254 128 const double pi = 3.14159265358979323846264338327950288;
c@251 129
c@251 130 for (i=0; i<nceps+1; i++) {
c@251 131 for (j=0; j<totalFilters; j++) {
c@251 132 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
c@254 133 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
c@251 134 }
c@251 135 }
c@251 136
c@251 137 for (j=0;j<totalFilters;j++){
c@251 138 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
c@251 139 }
c@251 140
c@251 141 /* The analysis window */
c@251 142 window = new Window<double>(HammingWindow, fftSize);
c@251 143
c@251 144 /* Allocate memory for the FFT */
c@251 145 imagIn = (double*)calloc(fftSize,sizeof(double));
c@251 146 realOut = (double*)calloc(fftSize,sizeof(double));
c@251 147 imagOut = (double*)calloc(fftSize,sizeof(double));
c@251 148
c@251 149 free(freqs);
c@251 150 free(lower);
c@251 151 free(center);
c@251 152 free(upper);
c@251 153 free(triangleHeight);
c@251 154 free(fftFreqs);
c@251 155 }
c@251 156
c@251 157 MFCC::~MFCC()
c@251 158 {
c@251 159 int i;
c@251 160
c@251 161 /* Free the structure */
c@251 162 for (i=0;i<nceps+1;i++) {
c@251 163 free(mfccDCTMatrix[i]);
c@251 164 }
c@251 165 free(mfccDCTMatrix);
c@251 166
c@251 167 for (i=0;i<totalFilters; i++) {
c@251 168 free(mfccFilterWeights[i]);
c@251 169 }
c@251 170 free(mfccFilterWeights);
c@251 171
c@251 172 /* Free the feature vector */
c@251 173 free(ceps);
c@251 174
c@251 175 /* The analysis window */
c@251 176 delete window;
c@251 177
c@251 178 /* Free the FFT */
c@251 179 free(imagIn);
c@251 180 free(realOut);
c@251 181 free(imagOut);
c@251 182 }
c@251 183
c@251 184
c@251 185 /*
c@251 186 *
c@251 187 * Extract the MFCC on the input frame
c@251 188 *
c@251 189 * looks like we have to have length = fftSize ??????
c@251 190 *
c@251 191 */
c@251 192 int MFCC::process(int length, double *inframe, double *outceps)
c@251 193 {
c@251 194 int i,j;
c@251 195
c@251 196 double *fftMag;
c@251 197 double *earMag;
c@251 198
c@251 199 double *inputData;
c@251 200
c@251 201 double tmp;
c@251 202
c@251 203 earMag = (double*)calloc(totalFilters, sizeof(double));
c@251 204 inputData = (double*)calloc(fftSize, sizeof(double));
c@251 205 fftMag = (double*)calloc(fftSize, sizeof(double));
c@251 206
c@251 207 /* Zero-pad if needed */
c@251 208 memcpy(inputData, inframe, length*sizeof(double));
c@251 209
c@254 210 //!!! window->cut(inputData);
c@251 211
c@251 212 /* Calculate the fft on the input frame */
c@251 213 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut);
c@251 214
c@251 215 for (i = 0; i < fftSize; ++i) {
c@251 216 fftMag[i] = sqrt(realOut[i] * realOut[i] +
c@251 217 imagOut[i] * imagOut[i]);
c@251 218 }
c@251 219
c@251 220 /* Multiply by mfccFilterWeights */
c@251 221 for (i=0;i<totalFilters;i++) {
c@251 222 tmp = 0.;
c@251 223 for(j=0;j<fftSize/2; j++) {
c@251 224 tmp = tmp + (mfccFilterWeights[i][j]*fftMag[j]);
c@251 225 }
c@251 226 if (tmp>0)
c@251 227 earMag[i] = log10(tmp);
c@251 228 else
c@251 229 earMag[i] = 0.0;
c@251 230 }
c@251 231
c@251 232 /*
c@251 233 *
c@251 234 * Calculate now the cepstral coefficients
c@251 235 * with or without the DC component
c@251 236 *
c@251 237 */
c@251 238
c@251 239 if (WANT_C0==1) {
c@251 240
c@251 241 for (i=0;i<nceps+1;i++) {
c@251 242 tmp = 0.;
c@251 243 for (j=0;j<totalFilters;j++){
c@251 244 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
c@251 245 }
c@251 246 outceps[i] = tmp;
c@251 247 }
c@251 248 }
c@251 249 else
c@251 250 {
c@251 251 for (i=1;i<nceps+1;i++) {
c@251 252 tmp = 0.;
c@251 253 for (j=0;j<totalFilters;j++){
c@251 254 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
c@251 255 }
c@251 256 outceps[i-1] = tmp;
c@251 257 }
c@251 258 }
c@251 259
c@251 260 free(fftMag);
c@251 261 free(earMag);
c@251 262 free(inputData);
c@251 263
c@251 264 return nceps;
c@251 265 }
c@251 266