annotate dsp/mfcc/MFCC.cpp @ 30:a251fb0de594

* Make MFCC able to accept already-FFT'd input, and simplify API a bit * Add log power value to MFCC, restore windowing, and avoid some heap allocs * In HMM, bail out of iteration if loglik hits NaN
author cannam
date Fri, 18 Jan 2008 13:24:12 +0000
parents 1c9c4d2c0592
children 8bb764969d50
rev   line source
cannam@26 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@25 2
cannam@25 3 /*
cannam@26 4 QM DSP Library
cannam@25 5
cannam@26 6 Centre for Digital Music, Queen Mary, University of London.
cannam@26 7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
cannam@26 8 All rights reserved.
cannam@26 9 */
cannam@25 10
cannam@26 11 #include <cmath>
cannam@26 12 #include <cstdlib>
cannam@26 13
cannam@26 14 #include "MFCC.h"
cannam@26 15 #include "dsp/transforms/FFT.h"
cannam@26 16 #include "base/Window.h"
cannam@26 17
cannam@26 18 MFCC::MFCC(MFCCConfig config)
cannam@26 19 {
cannam@26 20 int i,j;
cannam@26 21
cannam@26 22 /* Calculate at startup */
cannam@26 23 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
cannam@25 24
cannam@26 25 lowestFrequency = 66.6666666;
cannam@26 26 linearFilters = 13;
cannam@26 27 linearSpacing = 66.66666666;
cannam@26 28 logFilters = 27;
cannam@26 29 logSpacing = 1.0711703;
cannam@25 30
cannam@26 31 /* FFT and analysis window sizes */
cannam@26 32 fftSize = config.fftsize;
cannam@26 33
cannam@26 34 totalFilters = linearFilters + logFilters;
cannam@30 35 logPower = config.logpower;
cannam@25 36
cannam@26 37 samplingRate = config.FS;
cannam@26 38
cannam@26 39 /* The number of cepstral componenents */
cannam@26 40 nceps = config.nceps;
cannam@25 41
cannam@26 42 /* Set if user want C0 */
cannam@26 43 WANT_C0 = (config.want_c0 ? 1 : 0);
cannam@25 44
cannam@26 45 /* Allocate space for feature vector */
cannam@26 46 if (WANT_C0 == 1) {
cannam@26 47 ceps = (double*)calloc(nceps+1, sizeof(double));
cannam@26 48 } else {
cannam@26 49 ceps = (double*)calloc(nceps, sizeof(double));
cannam@26 50 }
cannam@26 51
cannam@26 52 /* Allocate space for local vectors */
cannam@26 53 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
cannam@30 54 for (i = 0; i < nceps+1; i++) {
cannam@26 55 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
cannam@26 56 }
cannam@26 57
cannam@26 58 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
cannam@30 59 for (i = 0; i < totalFilters; i++) {
cannam@26 60 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
cannam@26 61 }
cannam@26 62
cannam@26 63 freqs = (double*)calloc(totalFilters+2,sizeof(double));
cannam@26 64
cannam@26 65 lower = (double*)calloc(totalFilters,sizeof(double));
cannam@26 66 center = (double*)calloc(totalFilters,sizeof(double));
cannam@26 67 upper = (double*)calloc(totalFilters,sizeof(double));
cannam@26 68
cannam@26 69 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
cannam@26 70 fftFreqs = (double*)calloc(fftSize,sizeof(double));
cannam@25 71
cannam@30 72 for (i = 0; i < linearFilters; i++) {
cannam@26 73 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
cannam@26 74 }
cannam@26 75
cannam@30 76 for (i = linearFilters; i < totalFilters+2; i++) {
cannam@26 77 freqs[i] = freqs[linearFilters-1] *
cannam@26 78 pow(logSpacing, (double)(i-linearFilters+1));
cannam@26 79 }
cannam@26 80
cannam@26 81 /* Define lower, center and upper */
cannam@26 82 memcpy(lower, freqs,totalFilters*sizeof(double));
cannam@26 83 memcpy(center, &freqs[1],totalFilters*sizeof(double));
cannam@26 84 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
cannam@26 85
cannam@26 86 for (i=0;i<totalFilters;i++){
cannam@26 87 triangleHeight[i] = 2./(upper[i]-lower[i]);
cannam@26 88 }
cannam@26 89
cannam@26 90 for (i=0;i<fftSize;i++){
cannam@26 91 fftFreqs[i] = ((double) i / ((double) fftSize ) *
cannam@26 92 (double) samplingRate);
cannam@26 93 }
cannam@25 94
cannam@26 95 /* Build now the mccFilterWeight matrix */
cannam@26 96 for (i=0;i<totalFilters;i++){
cannam@25 97
cannam@26 98 for (j=0;j<fftSize;j++) {
cannam@26 99
cannam@26 100 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
cannam@26 101
cannam@26 102 mfccFilterWeights[i][j] = triangleHeight[i] *
cannam@26 103 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
cannam@26 104
cannam@26 105 }
cannam@26 106 else
cannam@26 107 {
cannam@26 108 mfccFilterWeights[i][j] = 0.0;
cannam@26 109 }
cannam@25 110
cannam@26 111 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
cannam@25 112
cannam@30 113 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
cannam@30 114 + triangleHeight[i] * (upper[i]-fftFreqs[j])
cannam@26 115 / (upper[i]-center[i]);
cannam@26 116 }
cannam@26 117 else
cannam@26 118 {
cannam@26 119 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
cannam@26 120 }
cannam@25 121 }
cannam@25 122
cannam@26 123 }
cannam@25 124
cannam@26 125 /*
cannam@26 126 * We calculate now mfccDCT matrix
cannam@26 127 * NB: +1 because of the DC component
cannam@26 128 */
cannam@29 129
cannam@29 130 const double pi = 3.14159265358979323846264338327950288;
cannam@26 131
cannam@30 132 for (i = 0; i < nceps+1; i++) {
cannam@30 133 for (j = 0; j < totalFilters; j++) {
cannam@26 134 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
cannam@29 135 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
cannam@25 136 }
cannam@25 137 }
cannam@25 138
cannam@30 139 for (j = 0; j < totalFilters; j++){
cannam@30 140 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
cannam@26 141 }
cannam@26 142
cannam@26 143 /* The analysis window */
cannam@26 144 window = new Window<double>(HammingWindow, fftSize);
cannam@25 145
cannam@26 146 /* Allocate memory for the FFT */
cannam@30 147 imagIn = (double*)calloc(fftSize, sizeof(double));
cannam@30 148 realOut = (double*)calloc(fftSize, sizeof(double));
cannam@30 149 imagOut = (double*)calloc(fftSize, sizeof(double));
cannam@30 150
cannam@30 151 earMag = (double*)calloc(totalFilters, sizeof(double));
cannam@30 152 fftMag = (double*)calloc(fftSize/2, sizeof(double));
cannam@25 153
cannam@26 154 free(freqs);
cannam@26 155 free(lower);
cannam@26 156 free(center);
cannam@26 157 free(upper);
cannam@26 158 free(triangleHeight);
cannam@26 159 free(fftFreqs);
cannam@25 160 }
cannam@25 161
cannam@26 162 MFCC::~MFCC()
cannam@26 163 {
cannam@26 164 int i;
cannam@26 165
cannam@26 166 /* Free the structure */
cannam@30 167 for (i = 0; i < nceps+1; i++) {
cannam@26 168 free(mfccDCTMatrix[i]);
cannam@26 169 }
cannam@26 170 free(mfccDCTMatrix);
cannam@26 171
cannam@30 172 for (i = 0; i < totalFilters; i++) {
cannam@26 173 free(mfccFilterWeights[i]);
cannam@26 174 }
cannam@26 175 free(mfccFilterWeights);
cannam@26 176
cannam@26 177 /* Free the feature vector */
cannam@26 178 free(ceps);
cannam@26 179
cannam@26 180 /* The analysis window */
cannam@26 181 delete window;
cannam@30 182
cannam@30 183 free(earMag);
cannam@30 184 free(fftMag);
cannam@26 185
cannam@26 186 /* Free the FFT */
cannam@26 187 free(imagIn);
cannam@26 188 free(realOut);
cannam@26 189 free(imagOut);
cannam@26 190 }
cannam@25 191
cannam@25 192
cannam@25 193 /*
cannam@25 194 *
cannam@25 195 * Extract the MFCC on the input frame
cannam@25 196 *
cannam@25 197 */
cannam@30 198 int MFCC::process(const double *inframe, double *outceps)
cannam@26 199 {
cannam@30 200 double *inputData = (double *)malloc(fftSize * sizeof(double));
cannam@30 201 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
cannam@25 202
cannam@30 203 window->cut(inputData);
cannam@26 204
cannam@26 205 /* Calculate the fft on the input frame */
cannam@26 206 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut);
cannam@25 207
cannam@30 208 free(inputData);
cannam@30 209
cannam@30 210 return process(realOut, imagOut, outceps);
cannam@30 211 }
cannam@30 212
cannam@30 213 int MFCC::process(const double *real, const double *imag, double *outceps)
cannam@30 214 {
cannam@30 215 int i, j;
cannam@30 216
cannam@30 217 for (i = 0; i < fftSize/2; ++i) {
cannam@30 218 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
cannam@30 219 }
cannam@30 220
cannam@30 221 for (i = 0; i < totalFilters; ++i) {
cannam@30 222 earMag[i] = 0.0;
cannam@25 223 }
cannam@25 224
cannam@26 225 /* Multiply by mfccFilterWeights */
cannam@30 226 for (i = 0; i < totalFilters; i++) {
cannam@30 227 double tmp = 0.0;
cannam@30 228 for (j = 0; j < fftSize/2; j++) {
cannam@30 229 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
cannam@26 230 }
cannam@30 231 if (tmp > 0) earMag[i] = log10(tmp);
cannam@30 232 else earMag[i] = 0.0;
cannam@30 233
cannam@30 234 if (logPower != 1.0) {
cannam@30 235 earMag[i] = pow(earMag[i], logPower);
cannam@30 236 }
cannam@26 237 }
cannam@26 238
cannam@26 239 /*
cannam@26 240 *
cannam@26 241 * Calculate now the cepstral coefficients
cannam@26 242 * with or without the DC component
cannam@26 243 *
cannam@26 244 */
cannam@26 245
cannam@30 246 if (WANT_C0 == 1) {
cannam@26 247
cannam@30 248 for (i = 0; i < nceps+1; i++) {
cannam@30 249 double tmp = 0.;
cannam@30 250 for (j = 0; j < totalFilters; j++){
cannam@30 251 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
cannam@26 252 }
cannam@26 253 outceps[i] = tmp;
cannam@26 254 }
cannam@26 255 }
cannam@25 256 else
cannam@26 257 {
cannam@30 258 for (i = 1; i < nceps+1; i++) {
cannam@30 259 double tmp = 0.;
cannam@30 260 for (j = 0; j < totalFilters; j++){
cannam@30 261 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
cannam@26 262 }
cannam@26 263 outceps[i-1] = tmp;
cannam@25 264 }
cannam@26 265 }
cannam@25 266
cannam@26 267 return nceps;
cannam@25 268 }
cannam@25 269