annotate dsp/mfcc/MFCC.cpp @ 29:1c9c4d2c0592

...
author cannam
date Wed, 16 Jan 2008 18:02:31 +0000
parents d096a79fa772
children a251fb0de594
rev   line source
cannam@26 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@25 2
cannam@25 3 /*
cannam@26 4 QM DSP Library
cannam@25 5
cannam@26 6 Centre for Digital Music, Queen Mary, University of London.
cannam@26 7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
cannam@26 8 All rights reserved.
cannam@26 9 */
cannam@25 10
cannam@26 11 #include <cmath>
cannam@26 12 #include <cstdlib>
cannam@26 13
cannam@26 14 #include "MFCC.h"
cannam@26 15 #include "dsp/transforms/FFT.h"
cannam@26 16 #include "base/Window.h"
cannam@26 17
cannam@26 18 MFCC::MFCC(MFCCConfig config)
cannam@26 19 {
cannam@26 20 int i,j;
cannam@26 21
cannam@26 22 /* Calculate at startup */
cannam@26 23 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
cannam@25 24
cannam@26 25 lowestFrequency = 66.6666666;
cannam@26 26 linearFilters = 13;
cannam@26 27 linearSpacing = 66.66666666;
cannam@26 28 logFilters = 27;
cannam@26 29 logSpacing = 1.0711703;
cannam@25 30
cannam@26 31 /* FFT and analysis window sizes */
cannam@26 32 fftSize = config.fftsize;
cannam@26 33
cannam@26 34 totalFilters = linearFilters + logFilters;
cannam@25 35
cannam@26 36 samplingRate = config.FS;
cannam@26 37
cannam@26 38 /* The number of cepstral componenents */
cannam@26 39 nceps = config.nceps;
cannam@25 40
cannam@26 41 /* Set if user want C0 */
cannam@26 42 WANT_C0 = (config.want_c0 ? 1 : 0);
cannam@25 43
cannam@26 44 /* Allocate space for feature vector */
cannam@26 45 if (WANT_C0 == 1) {
cannam@26 46 ceps = (double*)calloc(nceps+1, sizeof(double));
cannam@26 47 } else {
cannam@26 48 ceps = (double*)calloc(nceps, sizeof(double));
cannam@26 49 }
cannam@26 50
cannam@26 51 /* Allocate space for local vectors */
cannam@26 52 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
cannam@26 53 for (i=0;i<nceps+1; i++) {
cannam@26 54 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
cannam@26 55 }
cannam@26 56
cannam@26 57 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
cannam@26 58 for (i=0;i<totalFilters; i++) {
cannam@26 59 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
cannam@26 60 }
cannam@26 61
cannam@26 62 freqs = (double*)calloc(totalFilters+2,sizeof(double));
cannam@26 63
cannam@26 64 lower = (double*)calloc(totalFilters,sizeof(double));
cannam@26 65 center = (double*)calloc(totalFilters,sizeof(double));
cannam@26 66 upper = (double*)calloc(totalFilters,sizeof(double));
cannam@26 67
cannam@26 68 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
cannam@26 69 fftFreqs = (double*)calloc(fftSize,sizeof(double));
cannam@25 70
cannam@26 71 for (i=0;i<linearFilters;i++) {
cannam@26 72 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
cannam@26 73 }
cannam@26 74
cannam@26 75 for (i=linearFilters; i<totalFilters+2; i++) {
cannam@26 76 freqs[i] = freqs[linearFilters-1] *
cannam@26 77 pow(logSpacing, (double)(i-linearFilters+1));
cannam@26 78 }
cannam@26 79
cannam@26 80 /* Define lower, center and upper */
cannam@26 81 memcpy(lower, freqs,totalFilters*sizeof(double));
cannam@26 82 memcpy(center, &freqs[1],totalFilters*sizeof(double));
cannam@26 83 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
cannam@26 84
cannam@26 85 for (i=0;i<totalFilters;i++){
cannam@26 86 triangleHeight[i] = 2./(upper[i]-lower[i]);
cannam@26 87 }
cannam@26 88
cannam@26 89 for (i=0;i<fftSize;i++){
cannam@26 90 fftFreqs[i] = ((double) i / ((double) fftSize ) *
cannam@26 91 (double) samplingRate);
cannam@26 92 }
cannam@25 93
cannam@26 94 /* Build now the mccFilterWeight matrix */
cannam@26 95 for (i=0;i<totalFilters;i++){
cannam@25 96
cannam@26 97 for (j=0;j<fftSize;j++) {
cannam@26 98
cannam@26 99 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
cannam@26 100
cannam@26 101 mfccFilterWeights[i][j] = triangleHeight[i] *
cannam@26 102 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
cannam@26 103
cannam@26 104 }
cannam@26 105 else
cannam@26 106 {
cannam@26 107 mfccFilterWeights[i][j] = 0.0;
cannam@26 108 }
cannam@25 109
cannam@26 110 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
cannam@25 111
cannam@26 112 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j])
cannam@26 113 / (upper[i]-center[i]);
cannam@26 114 }
cannam@26 115 else
cannam@26 116 {
cannam@26 117 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
cannam@26 118 }
cannam@25 119 }
cannam@25 120
cannam@26 121 }
cannam@25 122
cannam@26 123 /*
cannam@26 124 * We calculate now mfccDCT matrix
cannam@26 125 * NB: +1 because of the DC component
cannam@26 126 */
cannam@29 127
cannam@29 128 const double pi = 3.14159265358979323846264338327950288;
cannam@26 129
cannam@26 130 for (i=0; i<nceps+1; i++) {
cannam@26 131 for (j=0; j<totalFilters; j++) {
cannam@26 132 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
cannam@29 133 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
cannam@25 134 }
cannam@25 135 }
cannam@25 136
cannam@26 137 for (j=0;j<totalFilters;j++){
cannam@26 138 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
cannam@26 139 }
cannam@26 140
cannam@26 141 /* The analysis window */
cannam@26 142 window = new Window<double>(HammingWindow, fftSize);
cannam@25 143
cannam@26 144 /* Allocate memory for the FFT */
cannam@26 145 imagIn = (double*)calloc(fftSize,sizeof(double));
cannam@26 146 realOut = (double*)calloc(fftSize,sizeof(double));
cannam@26 147 imagOut = (double*)calloc(fftSize,sizeof(double));
cannam@25 148
cannam@26 149 free(freqs);
cannam@26 150 free(lower);
cannam@26 151 free(center);
cannam@26 152 free(upper);
cannam@26 153 free(triangleHeight);
cannam@26 154 free(fftFreqs);
cannam@25 155 }
cannam@25 156
cannam@26 157 MFCC::~MFCC()
cannam@26 158 {
cannam@26 159 int i;
cannam@26 160
cannam@26 161 /* Free the structure */
cannam@26 162 for (i=0;i<nceps+1;i++) {
cannam@26 163 free(mfccDCTMatrix[i]);
cannam@26 164 }
cannam@26 165 free(mfccDCTMatrix);
cannam@26 166
cannam@26 167 for (i=0;i<totalFilters; i++) {
cannam@26 168 free(mfccFilterWeights[i]);
cannam@26 169 }
cannam@26 170 free(mfccFilterWeights);
cannam@26 171
cannam@26 172 /* Free the feature vector */
cannam@26 173 free(ceps);
cannam@26 174
cannam@26 175 /* The analysis window */
cannam@26 176 delete window;
cannam@26 177
cannam@26 178 /* Free the FFT */
cannam@26 179 free(imagIn);
cannam@26 180 free(realOut);
cannam@26 181 free(imagOut);
cannam@26 182 }
cannam@25 183
cannam@25 184
cannam@25 185 /*
cannam@25 186 *
cannam@25 187 * Extract the MFCC on the input frame
cannam@26 188 *
cannam@26 189 * looks like we have to have length = fftSize ??????
cannam@25 190 *
cannam@25 191 */
cannam@26 192 int MFCC::process(int length, double *inframe, double *outceps)
cannam@26 193 {
cannam@26 194 int i,j;
cannam@26 195
cannam@26 196 double *fftMag;
cannam@26 197 double *earMag;
cannam@25 198
cannam@26 199 double *inputData;
cannam@26 200
cannam@26 201 double tmp;
cannam@25 202
cannam@26 203 earMag = (double*)calloc(totalFilters, sizeof(double));
cannam@26 204 inputData = (double*)calloc(fftSize, sizeof(double));
cannam@26 205 fftMag = (double*)calloc(fftSize, sizeof(double));
cannam@26 206
cannam@26 207 /* Zero-pad if needed */
cannam@26 208 memcpy(inputData, inframe, length*sizeof(double));
cannam@25 209
cannam@29 210 //!!! window->cut(inputData);
cannam@26 211
cannam@26 212 /* Calculate the fft on the input frame */
cannam@26 213 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut);
cannam@25 214
cannam@26 215 for (i = 0; i < fftSize; ++i) {
cannam@26 216 fftMag[i] = sqrt(realOut[i] * realOut[i] +
cannam@26 217 imagOut[i] * imagOut[i]);
cannam@25 218 }
cannam@25 219
cannam@26 220 /* Multiply by mfccFilterWeights */
cannam@26 221 for (i=0;i<totalFilters;i++) {
cannam@26 222 tmp = 0.;
cannam@26 223 for(j=0;j<fftSize/2; j++) {
cannam@26 224 tmp = tmp + (mfccFilterWeights[i][j]*fftMag[j]);
cannam@26 225 }
cannam@26 226 if (tmp>0)
cannam@26 227 earMag[i] = log10(tmp);
cannam@26 228 else
cannam@26 229 earMag[i] = 0.0;
cannam@26 230 }
cannam@26 231
cannam@26 232 /*
cannam@26 233 *
cannam@26 234 * Calculate now the cepstral coefficients
cannam@26 235 * with or without the DC component
cannam@26 236 *
cannam@26 237 */
cannam@26 238
cannam@26 239 if (WANT_C0==1) {
cannam@26 240
cannam@26 241 for (i=0;i<nceps+1;i++) {
cannam@26 242 tmp = 0.;
cannam@26 243 for (j=0;j<totalFilters;j++){
cannam@26 244 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
cannam@26 245 }
cannam@26 246 outceps[i] = tmp;
cannam@26 247 }
cannam@26 248 }
cannam@25 249 else
cannam@26 250 {
cannam@26 251 for (i=1;i<nceps+1;i++) {
cannam@26 252 tmp = 0.;
cannam@26 253 for (j=0;j<totalFilters;j++){
cannam@26 254 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
cannam@26 255 }
cannam@26 256 outceps[i-1] = tmp;
cannam@25 257 }
cannam@26 258 }
cannam@25 259
cannam@26 260 free(fftMag);
cannam@26 261 free(earMag);
cannam@26 262 free(inputData);
cannam@25 263
cannam@26 264 return nceps;
cannam@25 265 }
cannam@25 266