annotate dsp/mfcc/mfcc.c @ 247:a98a8fe967c0

* Add decimation filter for 8x decimation
author Chris Cannam <c.cannam@qmul.ac.uk>
date Thu, 10 Jan 2008 15:14:11 +0000
parents 0105e9b916a9
children
rev   line source
c@246 1 #include <stdio.h>
c@246 2 #include <stdlib.h>
c@246 3 #include <string.h>
c@246 4 #include <math.h>
c@246 5
c@246 6 #include "mfcc.h"
c@246 7 #include "SBFFT.h"
c@246 8 #include "windows.h"
c@246 9
c@246 10 /*
c@246 11 *
c@246 12 * Initialise the MFCC structure and return a pointer to a
c@246 13 * feature vector
c@246 14 *
c@246 15 */
c@246 16
c@246 17 extern mfcc_t* init_mfcc(int fftSize, int nceps , int samplingRate, int WANT_C0){
c@246 18
c@246 19 int i,j;
c@246 20 /* Calculate at startup */
c@246 21 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
c@246 22
c@246 23 /* Allocate space for the structure */
c@246 24 mfcc_t* mfcc_p = (mfcc_t*)malloc(sizeof(mfcc_t));
c@246 25
c@246 26 mfcc_p->lowestFrequency = 66.6666666;
c@246 27 mfcc_p->linearFilters = 13;
c@246 28 mfcc_p->linearSpacing = 66.66666666;
c@246 29 mfcc_p->logFilters = 27;
c@246 30 mfcc_p->logSpacing = 1.0711703;
c@246 31
c@246 32 /* FFT and analysis window sizes */
c@246 33 mfcc_p->fftSize = fftSize;
c@246 34
c@246 35 mfcc_p->totalFilters = mfcc_p->linearFilters + mfcc_p->logFilters;
c@246 36
c@246 37 mfcc_p->samplingRate = samplingRate;
c@246 38
c@246 39 /* The number of cepstral componenents */
c@246 40 mfcc_p->nceps = nceps;
c@246 41
c@246 42 /* Set if user want C0 */
c@246 43 mfcc_p->WANT_C0 = WANT_C0;
c@246 44
c@246 45 /* Allocate space for feature vector */
c@246 46 if (mfcc_p->WANT_C0==1) {
c@246 47 mfcc_p->ceps = (double*)calloc(nceps+1,sizeof(double));
c@246 48 } else {
c@246 49 mfcc_p->ceps = (double*)calloc(nceps,sizeof(double));
c@246 50 }
c@246 51
c@246 52 /* Allocate space for local vectors */
c@246 53 mfcc_p->mfccDCTMatrix = (double**)calloc(mfcc_p->nceps+1, sizeof(double*));
c@246 54 for (i=0;i<mfcc_p->nceps+1; i++) {
c@246 55 mfcc_p->mfccDCTMatrix[i]= (double*)calloc(mfcc_p->totalFilters, sizeof(double));
c@246 56 }
c@246 57
c@246 58 mfcc_p->mfccFilterWeights = (double**)calloc(mfcc_p->totalFilters, sizeof(double*));
c@246 59 for (i=0;i<mfcc_p->totalFilters; i++) {
c@246 60 mfcc_p->mfccFilterWeights[i] = (double*)calloc(mfcc_p->fftSize, sizeof(double));
c@246 61 }
c@246 62
c@246 63 freqs = (double*)calloc(mfcc_p->totalFilters+2,sizeof(double));
c@246 64
c@246 65 lower = (double*)calloc(mfcc_p->totalFilters,sizeof(double));
c@246 66 center = (double*)calloc(mfcc_p->totalFilters,sizeof(double));
c@246 67 upper = (double*)calloc(mfcc_p->totalFilters,sizeof(double));
c@246 68
c@246 69 triangleHeight = (double*)calloc(mfcc_p->totalFilters,sizeof(double));
c@246 70 fftFreqs = (double*)calloc(mfcc_p->fftSize,sizeof(double));
c@246 71
c@246 72 for (i=0;i<mfcc_p->linearFilters;i++) {
c@246 73 freqs[i] = mfcc_p->lowestFrequency + ((double)i) * mfcc_p->linearSpacing;
c@246 74 }
c@246 75
c@246 76 for (i=mfcc_p->linearFilters; i<mfcc_p->totalFilters+2; i++) {
c@246 77 freqs[i] = freqs[mfcc_p->linearFilters-1] *
c@246 78 pow(mfcc_p->logSpacing, (double)(i-mfcc_p->linearFilters+1));
c@246 79 }
c@246 80
c@246 81 /* Define lower, center and upper */
c@246 82 memcpy(lower, freqs,mfcc_p->totalFilters*sizeof(double));
c@246 83 memcpy(center, &freqs[1],mfcc_p->totalFilters*sizeof(double));
c@246 84 memcpy(upper, &freqs[2],mfcc_p->totalFilters*sizeof(double));
c@246 85
c@246 86 for (i=0;i<mfcc_p->totalFilters;i++){
c@246 87 triangleHeight[i] = 2./(upper[i]-lower[i]);
c@246 88 }
c@246 89
c@246 90 for (i=0;i<mfcc_p->fftSize;i++){
c@246 91 fftFreqs[i] = ((double) i / ((double) mfcc_p->fftSize ) *
c@246 92 (double) mfcc_p->samplingRate);
c@246 93 }
c@246 94
c@246 95 /* Build now the mccFilterWeight matrix */
c@246 96 for (i=0;i<mfcc_p->totalFilters;i++){
c@246 97
c@246 98 for (j=0;j<mfcc_p->fftSize;j++) {
c@246 99
c@246 100 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
c@246 101
c@246 102 mfcc_p->mfccFilterWeights[i][j] = triangleHeight[i] *
c@246 103 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
c@246 104
c@246 105 }
c@246 106 else
c@246 107 {
c@246 108
c@246 109 mfcc_p->mfccFilterWeights[i][j] = 0.0;
c@246 110
c@246 111 }
c@246 112
c@246 113 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
c@246 114
c@246 115 mfcc_p->mfccFilterWeights[i][j] = mfcc_p->mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j])
c@246 116 / (upper[i]-center[i]);
c@246 117 }
c@246 118 else
c@246 119 {
c@246 120
c@246 121 mfcc_p->mfccFilterWeights[i][j] = mfcc_p->mfccFilterWeights[i][j] + 0.0;
c@246 122
c@246 123 }
c@246 124 }
c@246 125
c@246 126 }
c@246 127
c@246 128 #ifndef PI
c@246 129 #define PI 3.14159265358979323846264338327950288
c@246 130 #endif
c@246 131
c@246 132 /*
c@246 133 *
c@246 134 * We calculate now mfccDCT matrix
c@246 135 * NB: +1 because of the DC component
c@246 136 *
c@246 137 */
c@246 138
c@246 139 for (i=0; i<nceps+1; i++) {
c@246 140 for (j=0; j<mfcc_p->totalFilters; j++) {
c@246 141 mfcc_p->mfccDCTMatrix[i][j] = (1./sqrt((double) mfcc_p->totalFilters / 2.))
c@246 142 * cos((double) i * ((double) j + 0.5) / (double) mfcc_p->totalFilters * PI);
c@246 143 }
c@246 144 }
c@246 145
c@246 146 for (j=0;j<mfcc_p->totalFilters;j++){
c@246 147 mfcc_p->mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfcc_p->mfccDCTMatrix[0][j];
c@246 148 }
c@246 149
c@246 150 /* The analysis window */
c@246 151 mfcc_p->window = hamming(mfcc_p->fftSize);
c@246 152
c@246 153 /* Allocate memory for the FFT */
c@246 154 mfcc_p->imagIn = (double*)calloc(mfcc_p->fftSize,sizeof(double));
c@246 155 mfcc_p->realOut = (double*)calloc(mfcc_p->fftSize,sizeof(double));
c@246 156 mfcc_p->imagOut = (double*)calloc(mfcc_p->fftSize,sizeof(double));
c@246 157
c@246 158 free(freqs);
c@246 159 free(lower);
c@246 160 free(center);
c@246 161 free(upper);
c@246 162 free(triangleHeight);
c@246 163 free(fftFreqs);
c@246 164
c@246 165 return mfcc_p;
c@246 166
c@246 167 }
c@246 168
c@246 169 /*
c@246 170 *
c@246 171 * Free the memory that has been allocated
c@246 172 *
c@246 173 */
c@246 174
c@246 175 extern void close_mfcc(mfcc_t* mfcc_p) {
c@246 176
c@246 177 int i;
c@246 178
c@246 179 /* Free the structure */
c@246 180 for (i=0;i<mfcc_p->nceps+1;i++) {
c@246 181 free(mfcc_p->mfccDCTMatrix[i]);
c@246 182 }
c@246 183 free(mfcc_p->mfccDCTMatrix);
c@246 184
c@246 185 for (i=0;i<mfcc_p->totalFilters; i++) {
c@246 186 free(mfcc_p->mfccFilterWeights[i]);
c@246 187 }
c@246 188 free(mfcc_p->mfccFilterWeights);
c@246 189
c@246 190 /* Free the feature vector */
c@246 191 free(mfcc_p->ceps);
c@246 192
c@246 193 /* The analysis window */
c@246 194 free(mfcc_p->window);
c@246 195
c@246 196 /* Free the FFT */
c@246 197 free(mfcc_p->imagIn);
c@246 198 free(mfcc_p->realOut);
c@246 199 free(mfcc_p->imagOut);
c@246 200
c@246 201 /* Free the structure itself */
c@246 202 free(mfcc_p);
c@246 203 mfcc_p = NULL;
c@246 204
c@246 205 }
c@246 206
c@246 207 /*
c@246 208 *
c@246 209 * Extract the MFCC on the input frame
c@246 210 *
c@246 211 */
c@246 212
c@246 213
c@246 214 // looks like we have to have length = mfcc_p->fftSize ??????
c@246 215
c@246 216 extern int do_mfcc(mfcc_t* mfcc_p, double* frame, int length){
c@246 217
c@246 218 int i,j;
c@246 219
c@246 220 double *fftMag;
c@246 221 double *earMag;
c@246 222
c@246 223 double *inputData;
c@246 224
c@246 225 double tmp;
c@246 226
c@246 227 earMag = (double*)calloc(mfcc_p->totalFilters, sizeof(double));
c@246 228 inputData = (double*)calloc(mfcc_p->fftSize, sizeof(double));
c@246 229
c@246 230 /* Zero-pad if needed */
c@246 231 memcpy(inputData, frame, length*sizeof(double));
c@246 232
c@246 233 /* Calculate the fft on the input frame */
c@246 234 fft_process(mfcc_p->fftSize, 0, inputData, mfcc_p->imagIn, mfcc_p->realOut, mfcc_p->imagOut);
c@246 235
c@246 236 /* Get the magnitude */
c@246 237 fftMag = abs_fft(mfcc_p->realOut, mfcc_p->imagOut, mfcc_p->fftSize);
c@246 238
c@246 239 /* Multiply by mfccFilterWeights */
c@246 240 for (i=0;i<mfcc_p->totalFilters;i++) {
c@246 241 tmp = 0.;
c@246 242 for(j=0;j<mfcc_p->fftSize/2; j++) {
c@246 243 tmp = tmp + (mfcc_p->mfccFilterWeights[i][j]*fftMag[j]);
c@246 244 }
c@246 245 if (tmp>0)
c@246 246 earMag[i] = log10(tmp);
c@246 247 else
c@246 248 earMag[i] = 0.0;
c@246 249 }
c@246 250
c@246 251 /*
c@246 252 *
c@246 253 * Calculate now the ceptral coefficients
c@246 254 * with or without the DC component
c@246 255 *
c@246 256 */
c@246 257
c@246 258 if (mfcc_p->WANT_C0==1) {
c@246 259
c@246 260 for (i=0;i<mfcc_p->nceps+1;i++) {
c@246 261 tmp = 0.;
c@246 262 for (j=0;j<mfcc_p->totalFilters;j++){
c@246 263 tmp = tmp + mfcc_p->mfccDCTMatrix[i][j]*earMag[j];
c@246 264 }
c@246 265 /* Send to workspace */
c@246 266 mfcc_p->ceps[i] = tmp;
c@246 267 }
c@246 268
c@246 269 }
c@246 270 else
c@246 271 {
c@246 272 for (i=1;i<mfcc_p->nceps+1;i++) {
c@246 273 tmp = 0.;
c@246 274 for (j=0;j<mfcc_p->totalFilters;j++){
c@246 275 tmp = tmp + mfcc_p->mfccDCTMatrix[i][j]*earMag[j];
c@246 276 }
c@246 277 /* Send to workspace */
c@246 278 mfcc_p->ceps[i-1] = tmp;
c@246 279 }
c@246 280 }
c@246 281
c@246 282 free(fftMag);
c@246 283 free(earMag);
c@246 284 free(inputData);
c@246 285
c@246 286 return mfcc_p->nceps;
c@246 287
c@246 288 }
c@246 289
c@246 290
c@246 291
c@246 292