annotate dsp/mfcc/MFCC.c @ 250:a106e551e9a4

...
author Chris Cannam <c.cannam@qmul.ac.uk>
date Thu, 10 Jan 2008 15:16:08 +0000
parents
children
rev   line source
c@250 1 #include <stdio.h>
c@250 2 #include <stdlib.h>
c@250 3 #include <string.h>
c@250 4 #include <math.h>
c@250 5
c@250 6 #include "mfcc.h"
c@250 7 #include "SBFFT.h"
c@250 8 #include "windows.h"
c@250 9
c@250 10 /*
c@250 11 *
c@250 12 * Initialise the MFCC structure and return a pointer to a
c@250 13 * feature vector
c@250 14 *
c@250 15 */
c@250 16
c@250 17 extern mfcc_t* init_mfcc(int fftSize, int nceps , int samplingRate, int WANT_C0){
c@250 18
c@250 19 int i,j;
c@250 20 /* Calculate at startup */
c@250 21 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
c@250 22
c@250 23 /* Allocate space for the structure */
c@250 24 mfcc_t* mfcc_p = (mfcc_t*)malloc(sizeof(mfcc_t));
c@250 25
c@250 26 mfcc_p->lowestFrequency = 66.6666666;
c@250 27 mfcc_p->linearFilters = 13;
c@250 28 mfcc_p->linearSpacing = 66.66666666;
c@250 29 mfcc_p->logFilters = 27;
c@250 30 mfcc_p->logSpacing = 1.0711703;
c@250 31
c@250 32 /* FFT and analysis window sizes */
c@250 33 mfcc_p->fftSize = fftSize;
c@250 34
c@250 35 mfcc_p->totalFilters = mfcc_p->linearFilters + mfcc_p->logFilters;
c@250 36
c@250 37 mfcc_p->samplingRate = samplingRate;
c@250 38
c@250 39 /* The number of cepstral componenents */
c@250 40 mfcc_p->nceps = nceps;
c@250 41
c@250 42 /* Set if user want C0 */
c@250 43 mfcc_p->WANT_C0 = WANT_C0;
c@250 44
c@250 45 /* Allocate space for feature vector */
c@250 46 if (mfcc_p->WANT_C0==1) {
c@250 47 mfcc_p->ceps = (double*)calloc(nceps+1,sizeof(double));
c@250 48 } else {
c@250 49 mfcc_p->ceps = (double*)calloc(nceps,sizeof(double));
c@250 50 }
c@250 51
c@250 52 /* Allocate space for local vectors */
c@250 53 mfcc_p->mfccDCTMatrix = (double**)calloc(mfcc_p->nceps+1, sizeof(double*));
c@250 54 for (i=0;i<mfcc_p->nceps+1; i++) {
c@250 55 mfcc_p->mfccDCTMatrix[i]= (double*)calloc(mfcc_p->totalFilters, sizeof(double));
c@250 56 }
c@250 57
c@250 58 mfcc_p->mfccFilterWeights = (double**)calloc(mfcc_p->totalFilters, sizeof(double*));
c@250 59 for (i=0;i<mfcc_p->totalFilters; i++) {
c@250 60 mfcc_p->mfccFilterWeights[i] = (double*)calloc(mfcc_p->fftSize, sizeof(double));
c@250 61 }
c@250 62
c@250 63 freqs = (double*)calloc(mfcc_p->totalFilters+2,sizeof(double));
c@250 64
c@250 65 lower = (double*)calloc(mfcc_p->totalFilters,sizeof(double));
c@250 66 center = (double*)calloc(mfcc_p->totalFilters,sizeof(double));
c@250 67 upper = (double*)calloc(mfcc_p->totalFilters,sizeof(double));
c@250 68
c@250 69 triangleHeight = (double*)calloc(mfcc_p->totalFilters,sizeof(double));
c@250 70 fftFreqs = (double*)calloc(mfcc_p->fftSize,sizeof(double));
c@250 71
c@250 72 for (i=0;i<mfcc_p->linearFilters;i++) {
c@250 73 freqs[i] = mfcc_p->lowestFrequency + ((double)i) * mfcc_p->linearSpacing;
c@250 74 }
c@250 75
c@250 76 for (i=mfcc_p->linearFilters; i<mfcc_p->totalFilters+2; i++) {
c@250 77 freqs[i] = freqs[mfcc_p->linearFilters-1] *
c@250 78 pow(mfcc_p->logSpacing, (double)(i-mfcc_p->linearFilters+1));
c@250 79 }
c@250 80
c@250 81 /* Define lower, center and upper */
c@250 82 memcpy(lower, freqs,mfcc_p->totalFilters*sizeof(double));
c@250 83 memcpy(center, &freqs[1],mfcc_p->totalFilters*sizeof(double));
c@250 84 memcpy(upper, &freqs[2],mfcc_p->totalFilters*sizeof(double));
c@250 85
c@250 86 for (i=0;i<mfcc_p->totalFilters;i++){
c@250 87 triangleHeight[i] = 2./(upper[i]-lower[i]);
c@250 88 }
c@250 89
c@250 90 for (i=0;i<mfcc_p->fftSize;i++){
c@250 91 fftFreqs[i] = ((double) i / ((double) mfcc_p->fftSize ) *
c@250 92 (double) mfcc_p->samplingRate);
c@250 93 }
c@250 94
c@250 95 /* Build now the mccFilterWeight matrix */
c@250 96 for (i=0;i<mfcc_p->totalFilters;i++){
c@250 97
c@250 98 for (j=0;j<mfcc_p->fftSize;j++) {
c@250 99
c@250 100 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
c@250 101
c@250 102 mfcc_p->mfccFilterWeights[i][j] = triangleHeight[i] *
c@250 103 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
c@250 104
c@250 105 }
c@250 106 else
c@250 107 {
c@250 108
c@250 109 mfcc_p->mfccFilterWeights[i][j] = 0.0;
c@250 110
c@250 111 }
c@250 112
c@250 113 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
c@250 114
c@250 115 mfcc_p->mfccFilterWeights[i][j] = mfcc_p->mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j])
c@250 116 / (upper[i]-center[i]);
c@250 117 }
c@250 118 else
c@250 119 {
c@250 120
c@250 121 mfcc_p->mfccFilterWeights[i][j] = mfcc_p->mfccFilterWeights[i][j] + 0.0;
c@250 122
c@250 123 }
c@250 124 }
c@250 125
c@250 126 }
c@250 127
c@250 128 #ifndef PI
c@250 129 #define PI 3.14159265358979323846264338327950288
c@250 130 #endif
c@250 131
c@250 132 /*
c@250 133 *
c@250 134 * We calculate now mfccDCT matrix
c@250 135 * NB: +1 because of the DC component
c@250 136 *
c@250 137 */
c@250 138
c@250 139 for (i=0; i<nceps+1; i++) {
c@250 140 for (j=0; j<mfcc_p->totalFilters; j++) {
c@250 141 mfcc_p->mfccDCTMatrix[i][j] = (1./sqrt((double) mfcc_p->totalFilters / 2.))
c@250 142 * cos((double) i * ((double) j + 0.5) / (double) mfcc_p->totalFilters * PI);
c@250 143 }
c@250 144 }
c@250 145
c@250 146 for (j=0;j<mfcc_p->totalFilters;j++){
c@250 147 mfcc_p->mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfcc_p->mfccDCTMatrix[0][j];
c@250 148 }
c@250 149
c@250 150 /* The analysis window */
c@250 151 mfcc_p->window = hamming(mfcc_p->fftSize);
c@250 152
c@250 153 /* Allocate memory for the FFT */
c@250 154 mfcc_p->imagIn = (double*)calloc(mfcc_p->fftSize,sizeof(double));
c@250 155 mfcc_p->realOut = (double*)calloc(mfcc_p->fftSize,sizeof(double));
c@250 156 mfcc_p->imagOut = (double*)calloc(mfcc_p->fftSize,sizeof(double));
c@250 157
c@250 158 free(freqs);
c@250 159 free(lower);
c@250 160 free(center);
c@250 161 free(upper);
c@250 162 free(triangleHeight);
c@250 163 free(fftFreqs);
c@250 164
c@250 165 return mfcc_p;
c@250 166
c@250 167 }
c@250 168
c@250 169 /*
c@250 170 *
c@250 171 * Free the memory that has been allocated
c@250 172 *
c@250 173 */
c@250 174
c@250 175 extern void close_mfcc(mfcc_t* mfcc_p) {
c@250 176
c@250 177 int i;
c@250 178
c@250 179 /* Free the structure */
c@250 180 for (i=0;i<mfcc_p->nceps+1;i++) {
c@250 181 free(mfcc_p->mfccDCTMatrix[i]);
c@250 182 }
c@250 183 free(mfcc_p->mfccDCTMatrix);
c@250 184
c@250 185 for (i=0;i<mfcc_p->totalFilters; i++) {
c@250 186 free(mfcc_p->mfccFilterWeights[i]);
c@250 187 }
c@250 188 free(mfcc_p->mfccFilterWeights);
c@250 189
c@250 190 /* Free the feature vector */
c@250 191 free(mfcc_p->ceps);
c@250 192
c@250 193 /* The analysis window */
c@250 194 free(mfcc_p->window);
c@250 195
c@250 196 /* Free the FFT */
c@250 197 free(mfcc_p->imagIn);
c@250 198 free(mfcc_p->realOut);
c@250 199 free(mfcc_p->imagOut);
c@250 200
c@250 201 /* Free the structure itself */
c@250 202 free(mfcc_p);
c@250 203 mfcc_p = NULL;
c@250 204
c@250 205 }
c@250 206
c@250 207 /*
c@250 208 *
c@250 209 * Extract the MFCC on the input frame
c@250 210 *
c@250 211 */
c@250 212
c@250 213
c@250 214 // looks like we have to have length = mfcc_p->fftSize ??????
c@250 215
c@250 216 extern int do_mfcc(mfcc_t* mfcc_p, double* frame, int length){
c@250 217
c@250 218 int i,j;
c@250 219
c@250 220 double *fftMag;
c@250 221 double *earMag;
c@250 222
c@250 223 double *inputData;
c@250 224
c@250 225 double tmp;
c@250 226
c@250 227 earMag = (double*)calloc(mfcc_p->totalFilters, sizeof(double));
c@250 228 inputData = (double*)calloc(mfcc_p->fftSize, sizeof(double));
c@250 229
c@250 230 /* Zero-pad if needed */
c@250 231 memcpy(inputData, frame, length*sizeof(double));
c@250 232
c@250 233 /* Calculate the fft on the input frame */
c@250 234 fft_process(mfcc_p->fftSize, 0, inputData, mfcc_p->imagIn, mfcc_p->realOut, mfcc_p->imagOut);
c@250 235
c@250 236 /* Get the magnitude */
c@250 237 fftMag = abs_fft(mfcc_p->realOut, mfcc_p->imagOut, mfcc_p->fftSize);
c@250 238
c@250 239 /* Multiply by mfccFilterWeights */
c@250 240 for (i=0;i<mfcc_p->totalFilters;i++) {
c@250 241 tmp = 0.;
c@250 242 for(j=0;j<mfcc_p->fftSize/2; j++) {
c@250 243 tmp = tmp + (mfcc_p->mfccFilterWeights[i][j]*fftMag[j]);
c@250 244 }
c@250 245 if (tmp>0)
c@250 246 earMag[i] = log10(tmp);
c@250 247 else
c@250 248 earMag[i] = 0.0;
c@250 249 }
c@250 250
c@250 251 /*
c@250 252 *
c@250 253 * Calculate now the ceptral coefficients
c@250 254 * with or without the DC component
c@250 255 *
c@250 256 */
c@250 257
c@250 258 if (mfcc_p->WANT_C0==1) {
c@250 259
c@250 260 for (i=0;i<mfcc_p->nceps+1;i++) {
c@250 261 tmp = 0.;
c@250 262 for (j=0;j<mfcc_p->totalFilters;j++){
c@250 263 tmp = tmp + mfcc_p->mfccDCTMatrix[i][j]*earMag[j];
c@250 264 }
c@250 265 /* Send to workspace */
c@250 266 mfcc_p->ceps[i] = tmp;
c@250 267 }
c@250 268
c@250 269 }
c@250 270 else
c@250 271 {
c@250 272 for (i=1;i<mfcc_p->nceps+1;i++) {
c@250 273 tmp = 0.;
c@250 274 for (j=0;j<mfcc_p->totalFilters;j++){
c@250 275 tmp = tmp + mfcc_p->mfccDCTMatrix[i][j]*earMag[j];
c@250 276 }
c@250 277 /* Send to workspace */
c@250 278 mfcc_p->ceps[i-1] = tmp;
c@250 279 }
c@250 280 }
c@250 281
c@250 282 free(fftMag);
c@250 283 free(earMag);
c@250 284 free(inputData);
c@250 285
c@250 286 return mfcc_p->nceps;
c@250 287
c@250 288 }
c@250 289
c@250 290
c@250 291
c@250 292