annotate dsp/mfcc/MFCC.cpp @ 251:c3600d3cfe5c

* Add timbral (MFCC) feature option to segmenter
author Chris Cannam <c.cannam@qmul.ac.uk>
date Thu, 10 Jan 2008 16:41:33 +0000
parents
children 52c1a295d775
rev   line source
c@251 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@251 2
c@251 3 /*
c@251 4 QM DSP Library
c@251 5
c@251 6 Centre for Digital Music, Queen Mary, University of London.
c@251 7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
c@251 8 All rights reserved.
c@251 9 */
c@251 10
c@251 11 #include <cmath>
c@251 12 #include <cstdlib>
c@251 13
c@251 14 #include "MFCC.h"
c@251 15 #include "dsp/transforms/FFT.h"
c@251 16 #include "base/Window.h"
c@251 17
c@251 18 MFCC::MFCC(MFCCConfig config)
c@251 19 {
c@251 20 int i,j;
c@251 21
c@251 22 /* Calculate at startup */
c@251 23 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
c@251 24
c@251 25 lowestFrequency = 66.6666666;
c@251 26 linearFilters = 13;
c@251 27 linearSpacing = 66.66666666;
c@251 28 logFilters = 27;
c@251 29 logSpacing = 1.0711703;
c@251 30
c@251 31 /* FFT and analysis window sizes */
c@251 32 fftSize = config.fftsize;
c@251 33
c@251 34 totalFilters = linearFilters + logFilters;
c@251 35
c@251 36 samplingRate = config.FS;
c@251 37
c@251 38 /* The number of cepstral componenents */
c@251 39 nceps = config.nceps;
c@251 40
c@251 41 /* Set if user want C0 */
c@251 42 WANT_C0 = (config.want_c0 ? 1 : 0);
c@251 43
c@251 44 /* Allocate space for feature vector */
c@251 45 if (WANT_C0 == 1) {
c@251 46 ceps = (double*)calloc(nceps+1, sizeof(double));
c@251 47 } else {
c@251 48 ceps = (double*)calloc(nceps, sizeof(double));
c@251 49 }
c@251 50
c@251 51 /* Allocate space for local vectors */
c@251 52 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
c@251 53 for (i=0;i<nceps+1; i++) {
c@251 54 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
c@251 55 }
c@251 56
c@251 57 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
c@251 58 for (i=0;i<totalFilters; i++) {
c@251 59 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
c@251 60 }
c@251 61
c@251 62 freqs = (double*)calloc(totalFilters+2,sizeof(double));
c@251 63
c@251 64 lower = (double*)calloc(totalFilters,sizeof(double));
c@251 65 center = (double*)calloc(totalFilters,sizeof(double));
c@251 66 upper = (double*)calloc(totalFilters,sizeof(double));
c@251 67
c@251 68 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
c@251 69 fftFreqs = (double*)calloc(fftSize,sizeof(double));
c@251 70
c@251 71 for (i=0;i<linearFilters;i++) {
c@251 72 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
c@251 73 }
c@251 74
c@251 75 for (i=linearFilters; i<totalFilters+2; i++) {
c@251 76 freqs[i] = freqs[linearFilters-1] *
c@251 77 pow(logSpacing, (double)(i-linearFilters+1));
c@251 78 }
c@251 79
c@251 80 /* Define lower, center and upper */
c@251 81 memcpy(lower, freqs,totalFilters*sizeof(double));
c@251 82 memcpy(center, &freqs[1],totalFilters*sizeof(double));
c@251 83 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
c@251 84
c@251 85 for (i=0;i<totalFilters;i++){
c@251 86 triangleHeight[i] = 2./(upper[i]-lower[i]);
c@251 87 }
c@251 88
c@251 89 for (i=0;i<fftSize;i++){
c@251 90 fftFreqs[i] = ((double) i / ((double) fftSize ) *
c@251 91 (double) samplingRate);
c@251 92 }
c@251 93
c@251 94 /* Build now the mccFilterWeight matrix */
c@251 95 for (i=0;i<totalFilters;i++){
c@251 96
c@251 97 for (j=0;j<fftSize;j++) {
c@251 98
c@251 99 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
c@251 100
c@251 101 mfccFilterWeights[i][j] = triangleHeight[i] *
c@251 102 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
c@251 103
c@251 104 }
c@251 105 else
c@251 106 {
c@251 107 mfccFilterWeights[i][j] = 0.0;
c@251 108 }
c@251 109
c@251 110 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
c@251 111
c@251 112 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j])
c@251 113 / (upper[i]-center[i]);
c@251 114 }
c@251 115 else
c@251 116 {
c@251 117 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
c@251 118 }
c@251 119 }
c@251 120
c@251 121 }
c@251 122
c@251 123 /*
c@251 124 * We calculate now mfccDCT matrix
c@251 125 * NB: +1 because of the DC component
c@251 126 */
c@251 127
c@251 128 for (i=0; i<nceps+1; i++) {
c@251 129 for (j=0; j<totalFilters; j++) {
c@251 130 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
c@251 131 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * M_PI);
c@251 132 }
c@251 133 }
c@251 134
c@251 135 for (j=0;j<totalFilters;j++){
c@251 136 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
c@251 137 }
c@251 138
c@251 139 /* The analysis window */
c@251 140 window = new Window<double>(HammingWindow, fftSize);
c@251 141
c@251 142 /* Allocate memory for the FFT */
c@251 143 imagIn = (double*)calloc(fftSize,sizeof(double));
c@251 144 realOut = (double*)calloc(fftSize,sizeof(double));
c@251 145 imagOut = (double*)calloc(fftSize,sizeof(double));
c@251 146
c@251 147 free(freqs);
c@251 148 free(lower);
c@251 149 free(center);
c@251 150 free(upper);
c@251 151 free(triangleHeight);
c@251 152 free(fftFreqs);
c@251 153 }
c@251 154
c@251 155 MFCC::~MFCC()
c@251 156 {
c@251 157 int i;
c@251 158
c@251 159 /* Free the structure */
c@251 160 for (i=0;i<nceps+1;i++) {
c@251 161 free(mfccDCTMatrix[i]);
c@251 162 }
c@251 163 free(mfccDCTMatrix);
c@251 164
c@251 165 for (i=0;i<totalFilters; i++) {
c@251 166 free(mfccFilterWeights[i]);
c@251 167 }
c@251 168 free(mfccFilterWeights);
c@251 169
c@251 170 /* Free the feature vector */
c@251 171 free(ceps);
c@251 172
c@251 173 /* The analysis window */
c@251 174 delete window;
c@251 175
c@251 176 /* Free the FFT */
c@251 177 free(imagIn);
c@251 178 free(realOut);
c@251 179 free(imagOut);
c@251 180 }
c@251 181
c@251 182
c@251 183 /*
c@251 184 *
c@251 185 * Extract the MFCC on the input frame
c@251 186 *
c@251 187 * looks like we have to have length = fftSize ??????
c@251 188 *
c@251 189 */
c@251 190 int MFCC::process(int length, double *inframe, double *outceps)
c@251 191 {
c@251 192 int i,j;
c@251 193
c@251 194 double *fftMag;
c@251 195 double *earMag;
c@251 196
c@251 197 double *inputData;
c@251 198
c@251 199 double tmp;
c@251 200
c@251 201 earMag = (double*)calloc(totalFilters, sizeof(double));
c@251 202 inputData = (double*)calloc(fftSize, sizeof(double));
c@251 203 fftMag = (double*)calloc(fftSize, sizeof(double));
c@251 204
c@251 205 /* Zero-pad if needed */
c@251 206 memcpy(inputData, inframe, length*sizeof(double));
c@251 207
c@251 208 window->cut(inputData);
c@251 209
c@251 210 /* Calculate the fft on the input frame */
c@251 211 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut);
c@251 212
c@251 213 for (i = 0; i < fftSize; ++i) {
c@251 214 fftMag[i] = sqrt(realOut[i] * realOut[i] +
c@251 215 imagOut[i] * imagOut[i]);
c@251 216 }
c@251 217
c@251 218 /* Multiply by mfccFilterWeights */
c@251 219 for (i=0;i<totalFilters;i++) {
c@251 220 tmp = 0.;
c@251 221 for(j=0;j<fftSize/2; j++) {
c@251 222 tmp = tmp + (mfccFilterWeights[i][j]*fftMag[j]);
c@251 223 }
c@251 224 if (tmp>0)
c@251 225 earMag[i] = log10(tmp);
c@251 226 else
c@251 227 earMag[i] = 0.0;
c@251 228 }
c@251 229
c@251 230 /*
c@251 231 *
c@251 232 * Calculate now the cepstral coefficients
c@251 233 * with or without the DC component
c@251 234 *
c@251 235 */
c@251 236
c@251 237 if (WANT_C0==1) {
c@251 238
c@251 239 for (i=0;i<nceps+1;i++) {
c@251 240 tmp = 0.;
c@251 241 for (j=0;j<totalFilters;j++){
c@251 242 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
c@251 243 }
c@251 244 outceps[i] = tmp;
c@251 245 }
c@251 246 }
c@251 247 else
c@251 248 {
c@251 249 for (i=1;i<nceps+1;i++) {
c@251 250 tmp = 0.;
c@251 251 for (j=0;j<totalFilters;j++){
c@251 252 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
c@251 253 }
c@251 254 outceps[i-1] = tmp;
c@251 255 }
c@251 256 }
c@251 257
c@251 258 free(fftMag);
c@251 259 free(earMag);
c@251 260 free(inputData);
c@251 261
c@251 262 return nceps;
c@251 263 }
c@251 264