comparison dsp/mfcc/MFCC.cpp @ 255:9edaa3ce62e8

* Make MFCC able to accept already-FFT'd input, and simplify API a bit * Add log power value to MFCC, restore windowing, and avoid some heap allocs * In HMM, bail out of iteration if loglik hits NaN
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 18 Jan 2008 13:24:12 +0000
parents 52c1a295d775
children 9619d6995b73
comparison
equal deleted inserted replaced
254:52c1a295d775 255:9edaa3ce62e8
30 30
31 /* FFT and analysis window sizes */ 31 /* FFT and analysis window sizes */
32 fftSize = config.fftsize; 32 fftSize = config.fftsize;
33 33
34 totalFilters = linearFilters + logFilters; 34 totalFilters = linearFilters + logFilters;
35 logPower = config.logpower;
35 36
36 samplingRate = config.FS; 37 samplingRate = config.FS;
37 38
38 /* The number of cepstral componenents */ 39 /* The number of cepstral componenents */
39 nceps = config.nceps; 40 nceps = config.nceps;
48 ceps = (double*)calloc(nceps, sizeof(double)); 49 ceps = (double*)calloc(nceps, sizeof(double));
49 } 50 }
50 51
51 /* Allocate space for local vectors */ 52 /* Allocate space for local vectors */
52 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*)); 53 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
53 for (i=0;i<nceps+1; i++) { 54 for (i = 0; i < nceps+1; i++) {
54 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double)); 55 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
55 } 56 }
56 57
57 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*)); 58 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
58 for (i=0;i<totalFilters; i++) { 59 for (i = 0; i < totalFilters; i++) {
59 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double)); 60 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
60 } 61 }
61 62
62 freqs = (double*)calloc(totalFilters+2,sizeof(double)); 63 freqs = (double*)calloc(totalFilters+2,sizeof(double));
63 64
66 upper = (double*)calloc(totalFilters,sizeof(double)); 67 upper = (double*)calloc(totalFilters,sizeof(double));
67 68
68 triangleHeight = (double*)calloc(totalFilters,sizeof(double)); 69 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
69 fftFreqs = (double*)calloc(fftSize,sizeof(double)); 70 fftFreqs = (double*)calloc(fftSize,sizeof(double));
70 71
71 for (i=0;i<linearFilters;i++) { 72 for (i = 0; i < linearFilters; i++) {
72 freqs[i] = lowestFrequency + ((double)i) * linearSpacing; 73 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
73 } 74 }
74 75
75 for (i=linearFilters; i<totalFilters+2; i++) { 76 for (i = linearFilters; i < totalFilters+2; i++) {
76 freqs[i] = freqs[linearFilters-1] * 77 freqs[i] = freqs[linearFilters-1] *
77 pow(logSpacing, (double)(i-linearFilters+1)); 78 pow(logSpacing, (double)(i-linearFilters+1));
78 } 79 }
79 80
80 /* Define lower, center and upper */ 81 /* Define lower, center and upper */
107 mfccFilterWeights[i][j] = 0.0; 108 mfccFilterWeights[i][j] = 0.0;
108 } 109 }
109 110
110 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) { 111 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
111 112
112 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j]) 113 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
114 + triangleHeight[i] * (upper[i]-fftFreqs[j])
113 / (upper[i]-center[i]); 115 / (upper[i]-center[i]);
114 } 116 }
115 else 117 else
116 { 118 {
117 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0; 119 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
125 * NB: +1 because of the DC component 127 * NB: +1 because of the DC component
126 */ 128 */
127 129
128 const double pi = 3.14159265358979323846264338327950288; 130 const double pi = 3.14159265358979323846264338327950288;
129 131
130 for (i=0; i<nceps+1; i++) { 132 for (i = 0; i < nceps+1; i++) {
131 for (j=0; j<totalFilters; j++) { 133 for (j = 0; j < totalFilters; j++) {
132 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.)) 134 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
133 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi); 135 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
134 } 136 }
135 } 137 }
136 138
137 for (j=0;j<totalFilters;j++){ 139 for (j = 0; j < totalFilters; j++){
138 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j]; 140 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
139 } 141 }
140 142
141 /* The analysis window */ 143 /* The analysis window */
142 window = new Window<double>(HammingWindow, fftSize); 144 window = new Window<double>(HammingWindow, fftSize);
143 145
144 /* Allocate memory for the FFT */ 146 /* Allocate memory for the FFT */
145 imagIn = (double*)calloc(fftSize,sizeof(double)); 147 imagIn = (double*)calloc(fftSize, sizeof(double));
146 realOut = (double*)calloc(fftSize,sizeof(double)); 148 realOut = (double*)calloc(fftSize, sizeof(double));
147 imagOut = (double*)calloc(fftSize,sizeof(double)); 149 imagOut = (double*)calloc(fftSize, sizeof(double));
150
151 earMag = (double*)calloc(totalFilters, sizeof(double));
152 fftMag = (double*)calloc(fftSize/2, sizeof(double));
148 153
149 free(freqs); 154 free(freqs);
150 free(lower); 155 free(lower);
151 free(center); 156 free(center);
152 free(upper); 157 free(upper);
157 MFCC::~MFCC() 162 MFCC::~MFCC()
158 { 163 {
159 int i; 164 int i;
160 165
161 /* Free the structure */ 166 /* Free the structure */
162 for (i=0;i<nceps+1;i++) { 167 for (i = 0; i < nceps+1; i++) {
163 free(mfccDCTMatrix[i]); 168 free(mfccDCTMatrix[i]);
164 } 169 }
165 free(mfccDCTMatrix); 170 free(mfccDCTMatrix);
166 171
167 for (i=0;i<totalFilters; i++) { 172 for (i = 0; i < totalFilters; i++) {
168 free(mfccFilterWeights[i]); 173 free(mfccFilterWeights[i]);
169 } 174 }
170 free(mfccFilterWeights); 175 free(mfccFilterWeights);
171 176
172 /* Free the feature vector */ 177 /* Free the feature vector */
173 free(ceps); 178 free(ceps);
174 179
175 /* The analysis window */ 180 /* The analysis window */
176 delete window; 181 delete window;
182
183 free(earMag);
184 free(fftMag);
177 185
178 /* Free the FFT */ 186 /* Free the FFT */
179 free(imagIn); 187 free(imagIn);
180 free(realOut); 188 free(realOut);
181 free(imagOut); 189 free(imagOut);
183 191
184 192
185 /* 193 /*
186 * 194 *
187 * Extract the MFCC on the input frame 195 * Extract the MFCC on the input frame
188 *
189 * looks like we have to have length = fftSize ??????
190 * 196 *
191 */ 197 */
192 int MFCC::process(int length, double *inframe, double *outceps) 198 int MFCC::process(const double *inframe, double *outceps)
193 { 199 {
194 int i,j; 200 double *inputData = (double *)malloc(fftSize * sizeof(double));
195 201 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
196 double *fftMag; 202
197 double *earMag; 203 window->cut(inputData);
198
199 double *inputData;
200
201 double tmp;
202
203 earMag = (double*)calloc(totalFilters, sizeof(double));
204 inputData = (double*)calloc(fftSize, sizeof(double));
205 fftMag = (double*)calloc(fftSize, sizeof(double));
206
207 /* Zero-pad if needed */
208 memcpy(inputData, inframe, length*sizeof(double));
209
210 //!!! window->cut(inputData);
211 204
212 /* Calculate the fft on the input frame */ 205 /* Calculate the fft on the input frame */
213 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut); 206 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut);
214 207
215 for (i = 0; i < fftSize; ++i) { 208 free(inputData);
216 fftMag[i] = sqrt(realOut[i] * realOut[i] + 209
217 imagOut[i] * imagOut[i]); 210 return process(realOut, imagOut, outceps);
211 }
212
213 int MFCC::process(const double *real, const double *imag, double *outceps)
214 {
215 int i, j;
216
217 for (i = 0; i < fftSize/2; ++i) {
218 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
219 }
220
221 for (i = 0; i < totalFilters; ++i) {
222 earMag[i] = 0.0;
218 } 223 }
219 224
220 /* Multiply by mfccFilterWeights */ 225 /* Multiply by mfccFilterWeights */
221 for (i=0;i<totalFilters;i++) { 226 for (i = 0; i < totalFilters; i++) {
222 tmp = 0.; 227 double tmp = 0.0;
223 for(j=0;j<fftSize/2; j++) { 228 for (j = 0; j < fftSize/2; j++) {
224 tmp = tmp + (mfccFilterWeights[i][j]*fftMag[j]); 229 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
225 } 230 }
226 if (tmp>0) 231 if (tmp > 0) earMag[i] = log10(tmp);
227 earMag[i] = log10(tmp); 232 else earMag[i] = 0.0;
228 else 233
229 earMag[i] = 0.0; 234 if (logPower != 1.0) {
235 earMag[i] = pow(earMag[i], logPower);
236 }
230 } 237 }
231 238
232 /* 239 /*
233 * 240 *
234 * Calculate now the cepstral coefficients 241 * Calculate now the cepstral coefficients
235 * with or without the DC component 242 * with or without the DC component
236 * 243 *
237 */ 244 */
238 245
239 if (WANT_C0==1) { 246 if (WANT_C0 == 1) {
240 247
241 for (i=0;i<nceps+1;i++) { 248 for (i = 0; i < nceps+1; i++) {
242 tmp = 0.; 249 double tmp = 0.;
243 for (j=0;j<totalFilters;j++){ 250 for (j = 0; j < totalFilters; j++){
244 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j]; 251 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
245 } 252 }
246 outceps[i] = tmp; 253 outceps[i] = tmp;
247 } 254 }
248 } 255 }
249 else 256 else
250 { 257 {
251 for (i=1;i<nceps+1;i++) { 258 for (i = 1; i < nceps+1; i++) {
252 tmp = 0.; 259 double tmp = 0.;
253 for (j=0;j<totalFilters;j++){ 260 for (j = 0; j < totalFilters; j++){
254 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j]; 261 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
255 } 262 }
256 outceps[i-1] = tmp; 263 outceps[i-1] = tmp;
257 } 264 }
258 } 265 }
259 266
260 free(fftMag);
261 free(earMag);
262 free(inputData);
263
264 return nceps; 267 return nceps;
265 } 268 }
266 269