Mercurial > hg > qm-dsp
comparison dsp/mfcc/MFCC.cpp @ 30:a251fb0de594
* Make MFCC able to accept already-FFT'd input, and simplify API a bit
* Add log power value to MFCC, restore windowing, and avoid some heap allocs
* In HMM, bail out of iteration if loglik hits NaN
author | cannam |
---|---|
date | Fri, 18 Jan 2008 13:24:12 +0000 |
parents | 1c9c4d2c0592 |
children | 8bb764969d50 |
comparison
equal
deleted
inserted
replaced
29:1c9c4d2c0592 | 30:a251fb0de594 |
---|---|
30 | 30 |
31 /* FFT and analysis window sizes */ | 31 /* FFT and analysis window sizes */ |
32 fftSize = config.fftsize; | 32 fftSize = config.fftsize; |
33 | 33 |
34 totalFilters = linearFilters + logFilters; | 34 totalFilters = linearFilters + logFilters; |
35 logPower = config.logpower; | |
35 | 36 |
36 samplingRate = config.FS; | 37 samplingRate = config.FS; |
37 | 38 |
38 /* The number of cepstral componenents */ | 39 /* The number of cepstral componenents */ |
39 nceps = config.nceps; | 40 nceps = config.nceps; |
48 ceps = (double*)calloc(nceps, sizeof(double)); | 49 ceps = (double*)calloc(nceps, sizeof(double)); |
49 } | 50 } |
50 | 51 |
51 /* Allocate space for local vectors */ | 52 /* Allocate space for local vectors */ |
52 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*)); | 53 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*)); |
53 for (i=0;i<nceps+1; i++) { | 54 for (i = 0; i < nceps+1; i++) { |
54 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double)); | 55 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double)); |
55 } | 56 } |
56 | 57 |
57 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*)); | 58 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*)); |
58 for (i=0;i<totalFilters; i++) { | 59 for (i = 0; i < totalFilters; i++) { |
59 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double)); | 60 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double)); |
60 } | 61 } |
61 | 62 |
62 freqs = (double*)calloc(totalFilters+2,sizeof(double)); | 63 freqs = (double*)calloc(totalFilters+2,sizeof(double)); |
63 | 64 |
66 upper = (double*)calloc(totalFilters,sizeof(double)); | 67 upper = (double*)calloc(totalFilters,sizeof(double)); |
67 | 68 |
68 triangleHeight = (double*)calloc(totalFilters,sizeof(double)); | 69 triangleHeight = (double*)calloc(totalFilters,sizeof(double)); |
69 fftFreqs = (double*)calloc(fftSize,sizeof(double)); | 70 fftFreqs = (double*)calloc(fftSize,sizeof(double)); |
70 | 71 |
71 for (i=0;i<linearFilters;i++) { | 72 for (i = 0; i < linearFilters; i++) { |
72 freqs[i] = lowestFrequency + ((double)i) * linearSpacing; | 73 freqs[i] = lowestFrequency + ((double)i) * linearSpacing; |
73 } | 74 } |
74 | 75 |
75 for (i=linearFilters; i<totalFilters+2; i++) { | 76 for (i = linearFilters; i < totalFilters+2; i++) { |
76 freqs[i] = freqs[linearFilters-1] * | 77 freqs[i] = freqs[linearFilters-1] * |
77 pow(logSpacing, (double)(i-linearFilters+1)); | 78 pow(logSpacing, (double)(i-linearFilters+1)); |
78 } | 79 } |
79 | 80 |
80 /* Define lower, center and upper */ | 81 /* Define lower, center and upper */ |
107 mfccFilterWeights[i][j] = 0.0; | 108 mfccFilterWeights[i][j] = 0.0; |
108 } | 109 } |
109 | 110 |
110 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) { | 111 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) { |
111 | 112 |
112 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j]) | 113 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] |
114 + triangleHeight[i] * (upper[i]-fftFreqs[j]) | |
113 / (upper[i]-center[i]); | 115 / (upper[i]-center[i]); |
114 } | 116 } |
115 else | 117 else |
116 { | 118 { |
117 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0; | 119 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0; |
125 * NB: +1 because of the DC component | 127 * NB: +1 because of the DC component |
126 */ | 128 */ |
127 | 129 |
128 const double pi = 3.14159265358979323846264338327950288; | 130 const double pi = 3.14159265358979323846264338327950288; |
129 | 131 |
130 for (i=0; i<nceps+1; i++) { | 132 for (i = 0; i < nceps+1; i++) { |
131 for (j=0; j<totalFilters; j++) { | 133 for (j = 0; j < totalFilters; j++) { |
132 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.)) | 134 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.)) |
133 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi); | 135 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi); |
134 } | 136 } |
135 } | 137 } |
136 | 138 |
137 for (j=0;j<totalFilters;j++){ | 139 for (j = 0; j < totalFilters; j++){ |
138 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j]; | 140 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j]; |
139 } | 141 } |
140 | 142 |
141 /* The analysis window */ | 143 /* The analysis window */ |
142 window = new Window<double>(HammingWindow, fftSize); | 144 window = new Window<double>(HammingWindow, fftSize); |
143 | 145 |
144 /* Allocate memory for the FFT */ | 146 /* Allocate memory for the FFT */ |
145 imagIn = (double*)calloc(fftSize,sizeof(double)); | 147 imagIn = (double*)calloc(fftSize, sizeof(double)); |
146 realOut = (double*)calloc(fftSize,sizeof(double)); | 148 realOut = (double*)calloc(fftSize, sizeof(double)); |
147 imagOut = (double*)calloc(fftSize,sizeof(double)); | 149 imagOut = (double*)calloc(fftSize, sizeof(double)); |
150 | |
151 earMag = (double*)calloc(totalFilters, sizeof(double)); | |
152 fftMag = (double*)calloc(fftSize/2, sizeof(double)); | |
148 | 153 |
149 free(freqs); | 154 free(freqs); |
150 free(lower); | 155 free(lower); |
151 free(center); | 156 free(center); |
152 free(upper); | 157 free(upper); |
157 MFCC::~MFCC() | 162 MFCC::~MFCC() |
158 { | 163 { |
159 int i; | 164 int i; |
160 | 165 |
161 /* Free the structure */ | 166 /* Free the structure */ |
162 for (i=0;i<nceps+1;i++) { | 167 for (i = 0; i < nceps+1; i++) { |
163 free(mfccDCTMatrix[i]); | 168 free(mfccDCTMatrix[i]); |
164 } | 169 } |
165 free(mfccDCTMatrix); | 170 free(mfccDCTMatrix); |
166 | 171 |
167 for (i=0;i<totalFilters; i++) { | 172 for (i = 0; i < totalFilters; i++) { |
168 free(mfccFilterWeights[i]); | 173 free(mfccFilterWeights[i]); |
169 } | 174 } |
170 free(mfccFilterWeights); | 175 free(mfccFilterWeights); |
171 | 176 |
172 /* Free the feature vector */ | 177 /* Free the feature vector */ |
173 free(ceps); | 178 free(ceps); |
174 | 179 |
175 /* The analysis window */ | 180 /* The analysis window */ |
176 delete window; | 181 delete window; |
182 | |
183 free(earMag); | |
184 free(fftMag); | |
177 | 185 |
178 /* Free the FFT */ | 186 /* Free the FFT */ |
179 free(imagIn); | 187 free(imagIn); |
180 free(realOut); | 188 free(realOut); |
181 free(imagOut); | 189 free(imagOut); |
183 | 191 |
184 | 192 |
185 /* | 193 /* |
186 * | 194 * |
187 * Extract the MFCC on the input frame | 195 * Extract the MFCC on the input frame |
188 * | |
189 * looks like we have to have length = fftSize ?????? | |
190 * | 196 * |
191 */ | 197 */ |
192 int MFCC::process(int length, double *inframe, double *outceps) | 198 int MFCC::process(const double *inframe, double *outceps) |
193 { | 199 { |
194 int i,j; | 200 double *inputData = (double *)malloc(fftSize * sizeof(double)); |
195 | 201 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i]; |
196 double *fftMag; | 202 |
197 double *earMag; | 203 window->cut(inputData); |
198 | |
199 double *inputData; | |
200 | |
201 double tmp; | |
202 | |
203 earMag = (double*)calloc(totalFilters, sizeof(double)); | |
204 inputData = (double*)calloc(fftSize, sizeof(double)); | |
205 fftMag = (double*)calloc(fftSize, sizeof(double)); | |
206 | |
207 /* Zero-pad if needed */ | |
208 memcpy(inputData, inframe, length*sizeof(double)); | |
209 | |
210 //!!! window->cut(inputData); | |
211 | 204 |
212 /* Calculate the fft on the input frame */ | 205 /* Calculate the fft on the input frame */ |
213 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut); | 206 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut); |
214 | 207 |
215 for (i = 0; i < fftSize; ++i) { | 208 free(inputData); |
216 fftMag[i] = sqrt(realOut[i] * realOut[i] + | 209 |
217 imagOut[i] * imagOut[i]); | 210 return process(realOut, imagOut, outceps); |
211 } | |
212 | |
213 int MFCC::process(const double *real, const double *imag, double *outceps) | |
214 { | |
215 int i, j; | |
216 | |
217 for (i = 0; i < fftSize/2; ++i) { | |
218 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]); | |
219 } | |
220 | |
221 for (i = 0; i < totalFilters; ++i) { | |
222 earMag[i] = 0.0; | |
218 } | 223 } |
219 | 224 |
220 /* Multiply by mfccFilterWeights */ | 225 /* Multiply by mfccFilterWeights */ |
221 for (i=0;i<totalFilters;i++) { | 226 for (i = 0; i < totalFilters; i++) { |
222 tmp = 0.; | 227 double tmp = 0.0; |
223 for(j=0;j<fftSize/2; j++) { | 228 for (j = 0; j < fftSize/2; j++) { |
224 tmp = tmp + (mfccFilterWeights[i][j]*fftMag[j]); | 229 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]); |
225 } | 230 } |
226 if (tmp>0) | 231 if (tmp > 0) earMag[i] = log10(tmp); |
227 earMag[i] = log10(tmp); | 232 else earMag[i] = 0.0; |
228 else | 233 |
229 earMag[i] = 0.0; | 234 if (logPower != 1.0) { |
235 earMag[i] = pow(earMag[i], logPower); | |
236 } | |
230 } | 237 } |
231 | 238 |
232 /* | 239 /* |
233 * | 240 * |
234 * Calculate now the cepstral coefficients | 241 * Calculate now the cepstral coefficients |
235 * with or without the DC component | 242 * with or without the DC component |
236 * | 243 * |
237 */ | 244 */ |
238 | 245 |
239 if (WANT_C0==1) { | 246 if (WANT_C0 == 1) { |
240 | 247 |
241 for (i=0;i<nceps+1;i++) { | 248 for (i = 0; i < nceps+1; i++) { |
242 tmp = 0.; | 249 double tmp = 0.; |
243 for (j=0;j<totalFilters;j++){ | 250 for (j = 0; j < totalFilters; j++){ |
244 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j]; | 251 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j]; |
245 } | 252 } |
246 outceps[i] = tmp; | 253 outceps[i] = tmp; |
247 } | 254 } |
248 } | 255 } |
249 else | 256 else |
250 { | 257 { |
251 for (i=1;i<nceps+1;i++) { | 258 for (i = 1; i < nceps+1; i++) { |
252 tmp = 0.; | 259 double tmp = 0.; |
253 for (j=0;j<totalFilters;j++){ | 260 for (j = 0; j < totalFilters; j++){ |
254 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j]; | 261 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j]; |
255 } | 262 } |
256 outceps[i-1] = tmp; | 263 outceps[i-1] = tmp; |
257 } | 264 } |
258 } | 265 } |
259 | 266 |
260 free(fftMag); | |
261 free(earMag); | |
262 free(inputData); | |
263 | |
264 return nceps; | 267 return nceps; |
265 } | 268 } |
266 | 269 |