annotate dsp/mfcc/MFCC.cpp @ 278:833ca65b0820

* Update with fixes from Matthew's newer code
author Chris Cannam <c.cannam@qmul.ac.uk>
date Mon, 09 Feb 2009 16:05:32 +0000
parents 330c2e11f8a9
children befe5aa6b450
rev   line source
c@251 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@251 2
c@251 3 /*
c@251 4 QM DSP Library
c@251 5
c@251 6 Centre for Digital Music, Queen Mary, University of London.
c@251 7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
c@251 8 All rights reserved.
c@251 9 */
c@251 10
c@251 11 #include <cmath>
c@251 12 #include <cstdlib>
c@272 13 #include <cstring>
c@251 14
c@251 15 #include "MFCC.h"
c@251 16 #include "dsp/transforms/FFT.h"
c@251 17 #include "base/Window.h"
c@251 18
c@251 19 MFCC::MFCC(MFCCConfig config)
c@251 20 {
c@251 21 int i,j;
c@251 22
c@251 23 /* Calculate at startup */
c@251 24 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
c@251 25
c@251 26 lowestFrequency = 66.6666666;
c@251 27 linearFilters = 13;
c@251 28 linearSpacing = 66.66666666;
c@251 29 logFilters = 27;
c@251 30 logSpacing = 1.0711703;
c@251 31
c@251 32 /* FFT and analysis window sizes */
c@251 33 fftSize = config.fftsize;
c@251 34
c@251 35 totalFilters = linearFilters + logFilters;
c@255 36 logPower = config.logpower;
c@251 37
c@251 38 samplingRate = config.FS;
c@251 39
c@251 40 /* The number of cepstral componenents */
c@251 41 nceps = config.nceps;
c@251 42
c@251 43 /* Set if user want C0 */
c@251 44 WANT_C0 = (config.want_c0 ? 1 : 0);
c@251 45
c@251 46 /* Allocate space for feature vector */
c@251 47 if (WANT_C0 == 1) {
c@251 48 ceps = (double*)calloc(nceps+1, sizeof(double));
c@251 49 } else {
c@251 50 ceps = (double*)calloc(nceps, sizeof(double));
c@251 51 }
c@251 52
c@251 53 /* Allocate space for local vectors */
c@251 54 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
c@255 55 for (i = 0; i < nceps+1; i++) {
c@251 56 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
c@251 57 }
c@251 58
c@251 59 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
c@255 60 for (i = 0; i < totalFilters; i++) {
c@251 61 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
c@251 62 }
c@251 63
c@251 64 freqs = (double*)calloc(totalFilters+2,sizeof(double));
c@251 65
c@251 66 lower = (double*)calloc(totalFilters,sizeof(double));
c@251 67 center = (double*)calloc(totalFilters,sizeof(double));
c@251 68 upper = (double*)calloc(totalFilters,sizeof(double));
c@251 69
c@251 70 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
c@251 71 fftFreqs = (double*)calloc(fftSize,sizeof(double));
c@251 72
c@255 73 for (i = 0; i < linearFilters; i++) {
c@251 74 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
c@251 75 }
c@251 76
c@255 77 for (i = linearFilters; i < totalFilters+2; i++) {
c@251 78 freqs[i] = freqs[linearFilters-1] *
c@251 79 pow(logSpacing, (double)(i-linearFilters+1));
c@251 80 }
c@251 81
c@251 82 /* Define lower, center and upper */
c@251 83 memcpy(lower, freqs,totalFilters*sizeof(double));
c@251 84 memcpy(center, &freqs[1],totalFilters*sizeof(double));
c@251 85 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
c@251 86
c@251 87 for (i=0;i<totalFilters;i++){
c@251 88 triangleHeight[i] = 2./(upper[i]-lower[i]);
c@251 89 }
c@251 90
c@251 91 for (i=0;i<fftSize;i++){
c@251 92 fftFreqs[i] = ((double) i / ((double) fftSize ) *
c@251 93 (double) samplingRate);
c@251 94 }
c@251 95
c@251 96 /* Build now the mccFilterWeight matrix */
c@251 97 for (i=0;i<totalFilters;i++){
c@251 98
c@251 99 for (j=0;j<fftSize;j++) {
c@251 100
c@251 101 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
c@251 102
c@251 103 mfccFilterWeights[i][j] = triangleHeight[i] *
c@251 104 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
c@251 105
c@251 106 }
c@251 107 else
c@251 108 {
c@251 109 mfccFilterWeights[i][j] = 0.0;
c@251 110 }
c@251 111
c@251 112 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
c@251 113
c@255 114 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
c@255 115 + triangleHeight[i] * (upper[i]-fftFreqs[j])
c@251 116 / (upper[i]-center[i]);
c@251 117 }
c@251 118 else
c@251 119 {
c@251 120 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
c@251 121 }
c@251 122 }
c@251 123
c@251 124 }
c@251 125
c@251 126 /*
c@251 127 * We calculate now mfccDCT matrix
c@251 128 * NB: +1 because of the DC component
c@251 129 */
c@254 130
c@254 131 const double pi = 3.14159265358979323846264338327950288;
c@251 132
c@255 133 for (i = 0; i < nceps+1; i++) {
c@255 134 for (j = 0; j < totalFilters; j++) {
c@251 135 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
c@254 136 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
c@251 137 }
c@251 138 }
c@251 139
c@255 140 for (j = 0; j < totalFilters; j++){
c@255 141 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
c@251 142 }
c@251 143
c@251 144 /* The analysis window */
c@257 145 window = new Window<double>(config.window, fftSize);
c@251 146
c@251 147 /* Allocate memory for the FFT */
c@255 148 imagIn = (double*)calloc(fftSize, sizeof(double));
c@255 149 realOut = (double*)calloc(fftSize, sizeof(double));
c@255 150 imagOut = (double*)calloc(fftSize, sizeof(double));
c@255 151
c@255 152 earMag = (double*)calloc(totalFilters, sizeof(double));
c@255 153 fftMag = (double*)calloc(fftSize/2, sizeof(double));
c@251 154
c@251 155 free(freqs);
c@251 156 free(lower);
c@251 157 free(center);
c@251 158 free(upper);
c@251 159 free(triangleHeight);
c@251 160 free(fftFreqs);
c@251 161 }
c@251 162
c@251 163 MFCC::~MFCC()
c@251 164 {
c@251 165 int i;
c@251 166
c@251 167 /* Free the structure */
c@255 168 for (i = 0; i < nceps+1; i++) {
c@251 169 free(mfccDCTMatrix[i]);
c@251 170 }
c@251 171 free(mfccDCTMatrix);
c@251 172
c@255 173 for (i = 0; i < totalFilters; i++) {
c@251 174 free(mfccFilterWeights[i]);
c@251 175 }
c@251 176 free(mfccFilterWeights);
c@251 177
c@251 178 /* Free the feature vector */
c@251 179 free(ceps);
c@251 180
c@251 181 /* The analysis window */
c@251 182 delete window;
c@255 183
c@255 184 free(earMag);
c@255 185 free(fftMag);
c@251 186
c@251 187 /* Free the FFT */
c@251 188 free(imagIn);
c@251 189 free(realOut);
c@251 190 free(imagOut);
c@251 191 }
c@251 192
c@251 193
c@251 194 /*
c@251 195 *
c@251 196 * Extract the MFCC on the input frame
c@251 197 *
c@251 198 */
c@255 199 int MFCC::process(const double *inframe, double *outceps)
c@251 200 {
c@255 201 double *inputData = (double *)malloc(fftSize * sizeof(double));
c@255 202 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
c@251 203
c@255 204 window->cut(inputData);
c@251 205
c@251 206 /* Calculate the fft on the input frame */
c@251 207 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut);
c@251 208
c@255 209 free(inputData);
c@255 210
c@255 211 return process(realOut, imagOut, outceps);
c@255 212 }
c@255 213
c@255 214 int MFCC::process(const double *real, const double *imag, double *outceps)
c@255 215 {
c@255 216 int i, j;
c@255 217
c@255 218 for (i = 0; i < fftSize/2; ++i) {
c@255 219 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
c@255 220 }
c@255 221
c@255 222 for (i = 0; i < totalFilters; ++i) {
c@255 223 earMag[i] = 0.0;
c@251 224 }
c@251 225
c@251 226 /* Multiply by mfccFilterWeights */
c@255 227 for (i = 0; i < totalFilters; i++) {
c@255 228 double tmp = 0.0;
c@255 229 for (j = 0; j < fftSize/2; j++) {
c@255 230 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
c@251 231 }
c@255 232 if (tmp > 0) earMag[i] = log10(tmp);
c@255 233 else earMag[i] = 0.0;
c@255 234
c@255 235 if (logPower != 1.0) {
c@255 236 earMag[i] = pow(earMag[i], logPower);
c@255 237 }
c@251 238 }
c@251 239
c@251 240 /*
c@251 241 *
c@251 242 * Calculate now the cepstral coefficients
c@251 243 * with or without the DC component
c@251 244 *
c@251 245 */
c@251 246
c@255 247 if (WANT_C0 == 1) {
c@251 248
c@255 249 for (i = 0; i < nceps+1; i++) {
c@255 250 double tmp = 0.;
c@255 251 for (j = 0; j < totalFilters; j++){
c@255 252 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
c@251 253 }
c@251 254 outceps[i] = tmp;
c@251 255 }
c@251 256 }
c@251 257 else
c@251 258 {
c@255 259 for (i = 1; i < nceps+1; i++) {
c@255 260 double tmp = 0.;
c@255 261 for (j = 0; j < totalFilters; j++){
c@255 262 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
c@251 263 }
c@251 264 outceps[i-1] = tmp;
c@251 265 }
c@251 266 }
c@251 267
c@251 268 return nceps;
c@251 269 }
c@251 270