cannam@26
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
cannam@25
|
2
|
cannam@25
|
3 /*
|
cannam@26
|
4 QM DSP Library
|
cannam@25
|
5
|
cannam@26
|
6 Centre for Digital Music, Queen Mary, University of London.
|
cannam@26
|
7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
|
cannam@26
|
8 All rights reserved.
|
cannam@26
|
9 */
|
cannam@25
|
10
|
cannam@26
|
11 #include <cmath>
|
cannam@26
|
12 #include <cstdlib>
|
cannam@26
|
13
|
cannam@26
|
14 #include "MFCC.h"
|
cannam@26
|
15 #include "dsp/transforms/FFT.h"
|
cannam@26
|
16 #include "base/Window.h"
|
cannam@26
|
17
|
cannam@26
|
18 MFCC::MFCC(MFCCConfig config)
|
cannam@26
|
19 {
|
cannam@26
|
20 int i,j;
|
cannam@26
|
21
|
cannam@26
|
22 /* Calculate at startup */
|
cannam@26
|
23 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
|
cannam@25
|
24
|
cannam@26
|
25 lowestFrequency = 66.6666666;
|
cannam@26
|
26 linearFilters = 13;
|
cannam@26
|
27 linearSpacing = 66.66666666;
|
cannam@26
|
28 logFilters = 27;
|
cannam@26
|
29 logSpacing = 1.0711703;
|
cannam@25
|
30
|
cannam@26
|
31 /* FFT and analysis window sizes */
|
cannam@26
|
32 fftSize = config.fftsize;
|
cannam@26
|
33
|
cannam@26
|
34 totalFilters = linearFilters + logFilters;
|
cannam@25
|
35
|
cannam@26
|
36 samplingRate = config.FS;
|
cannam@26
|
37
|
cannam@26
|
38 /* The number of cepstral componenents */
|
cannam@26
|
39 nceps = config.nceps;
|
cannam@25
|
40
|
cannam@26
|
41 /* Set if user want C0 */
|
cannam@26
|
42 WANT_C0 = (config.want_c0 ? 1 : 0);
|
cannam@25
|
43
|
cannam@26
|
44 /* Allocate space for feature vector */
|
cannam@26
|
45 if (WANT_C0 == 1) {
|
cannam@26
|
46 ceps = (double*)calloc(nceps+1, sizeof(double));
|
cannam@26
|
47 } else {
|
cannam@26
|
48 ceps = (double*)calloc(nceps, sizeof(double));
|
cannam@26
|
49 }
|
cannam@26
|
50
|
cannam@26
|
51 /* Allocate space for local vectors */
|
cannam@26
|
52 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
|
cannam@26
|
53 for (i=0;i<nceps+1; i++) {
|
cannam@26
|
54 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
|
cannam@26
|
55 }
|
cannam@26
|
56
|
cannam@26
|
57 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
|
cannam@26
|
58 for (i=0;i<totalFilters; i++) {
|
cannam@26
|
59 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
|
cannam@26
|
60 }
|
cannam@26
|
61
|
cannam@26
|
62 freqs = (double*)calloc(totalFilters+2,sizeof(double));
|
cannam@26
|
63
|
cannam@26
|
64 lower = (double*)calloc(totalFilters,sizeof(double));
|
cannam@26
|
65 center = (double*)calloc(totalFilters,sizeof(double));
|
cannam@26
|
66 upper = (double*)calloc(totalFilters,sizeof(double));
|
cannam@26
|
67
|
cannam@26
|
68 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
|
cannam@26
|
69 fftFreqs = (double*)calloc(fftSize,sizeof(double));
|
cannam@25
|
70
|
cannam@26
|
71 for (i=0;i<linearFilters;i++) {
|
cannam@26
|
72 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
|
cannam@26
|
73 }
|
cannam@26
|
74
|
cannam@26
|
75 for (i=linearFilters; i<totalFilters+2; i++) {
|
cannam@26
|
76 freqs[i] = freqs[linearFilters-1] *
|
cannam@26
|
77 pow(logSpacing, (double)(i-linearFilters+1));
|
cannam@26
|
78 }
|
cannam@26
|
79
|
cannam@26
|
80 /* Define lower, center and upper */
|
cannam@26
|
81 memcpy(lower, freqs,totalFilters*sizeof(double));
|
cannam@26
|
82 memcpy(center, &freqs[1],totalFilters*sizeof(double));
|
cannam@26
|
83 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
|
cannam@26
|
84
|
cannam@26
|
85 for (i=0;i<totalFilters;i++){
|
cannam@26
|
86 triangleHeight[i] = 2./(upper[i]-lower[i]);
|
cannam@26
|
87 }
|
cannam@26
|
88
|
cannam@26
|
89 for (i=0;i<fftSize;i++){
|
cannam@26
|
90 fftFreqs[i] = ((double) i / ((double) fftSize ) *
|
cannam@26
|
91 (double) samplingRate);
|
cannam@26
|
92 }
|
cannam@25
|
93
|
cannam@26
|
94 /* Build now the mccFilterWeight matrix */
|
cannam@26
|
95 for (i=0;i<totalFilters;i++){
|
cannam@25
|
96
|
cannam@26
|
97 for (j=0;j<fftSize;j++) {
|
cannam@26
|
98
|
cannam@26
|
99 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
|
cannam@26
|
100
|
cannam@26
|
101 mfccFilterWeights[i][j] = triangleHeight[i] *
|
cannam@26
|
102 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
|
cannam@26
|
103
|
cannam@26
|
104 }
|
cannam@26
|
105 else
|
cannam@26
|
106 {
|
cannam@26
|
107 mfccFilterWeights[i][j] = 0.0;
|
cannam@26
|
108 }
|
cannam@25
|
109
|
cannam@26
|
110 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
|
cannam@25
|
111
|
cannam@26
|
112 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j])
|
cannam@26
|
113 / (upper[i]-center[i]);
|
cannam@26
|
114 }
|
cannam@26
|
115 else
|
cannam@26
|
116 {
|
cannam@26
|
117 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
|
cannam@26
|
118 }
|
cannam@25
|
119 }
|
cannam@25
|
120
|
cannam@26
|
121 }
|
cannam@25
|
122
|
cannam@26
|
123 /*
|
cannam@26
|
124 * We calculate now mfccDCT matrix
|
cannam@26
|
125 * NB: +1 because of the DC component
|
cannam@26
|
126 */
|
cannam@26
|
127
|
cannam@26
|
128 for (i=0; i<nceps+1; i++) {
|
cannam@26
|
129 for (j=0; j<totalFilters; j++) {
|
cannam@26
|
130 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
|
cannam@26
|
131 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * M_PI);
|
cannam@25
|
132 }
|
cannam@25
|
133 }
|
cannam@25
|
134
|
cannam@26
|
135 for (j=0;j<totalFilters;j++){
|
cannam@26
|
136 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
|
cannam@26
|
137 }
|
cannam@26
|
138
|
cannam@26
|
139 /* The analysis window */
|
cannam@26
|
140 window = new Window<double>(HammingWindow, fftSize);
|
cannam@25
|
141
|
cannam@26
|
142 /* Allocate memory for the FFT */
|
cannam@26
|
143 imagIn = (double*)calloc(fftSize,sizeof(double));
|
cannam@26
|
144 realOut = (double*)calloc(fftSize,sizeof(double));
|
cannam@26
|
145 imagOut = (double*)calloc(fftSize,sizeof(double));
|
cannam@25
|
146
|
cannam@26
|
147 free(freqs);
|
cannam@26
|
148 free(lower);
|
cannam@26
|
149 free(center);
|
cannam@26
|
150 free(upper);
|
cannam@26
|
151 free(triangleHeight);
|
cannam@26
|
152 free(fftFreqs);
|
cannam@25
|
153 }
|
cannam@25
|
154
|
cannam@26
|
155 MFCC::~MFCC()
|
cannam@26
|
156 {
|
cannam@26
|
157 int i;
|
cannam@26
|
158
|
cannam@26
|
159 /* Free the structure */
|
cannam@26
|
160 for (i=0;i<nceps+1;i++) {
|
cannam@26
|
161 free(mfccDCTMatrix[i]);
|
cannam@26
|
162 }
|
cannam@26
|
163 free(mfccDCTMatrix);
|
cannam@26
|
164
|
cannam@26
|
165 for (i=0;i<totalFilters; i++) {
|
cannam@26
|
166 free(mfccFilterWeights[i]);
|
cannam@26
|
167 }
|
cannam@26
|
168 free(mfccFilterWeights);
|
cannam@26
|
169
|
cannam@26
|
170 /* Free the feature vector */
|
cannam@26
|
171 free(ceps);
|
cannam@26
|
172
|
cannam@26
|
173 /* The analysis window */
|
cannam@26
|
174 delete window;
|
cannam@26
|
175
|
cannam@26
|
176 /* Free the FFT */
|
cannam@26
|
177 free(imagIn);
|
cannam@26
|
178 free(realOut);
|
cannam@26
|
179 free(imagOut);
|
cannam@26
|
180 }
|
cannam@25
|
181
|
cannam@25
|
182
|
cannam@25
|
183 /*
|
cannam@25
|
184 *
|
cannam@25
|
185 * Extract the MFCC on the input frame
|
cannam@26
|
186 *
|
cannam@26
|
187 * looks like we have to have length = fftSize ??????
|
cannam@25
|
188 *
|
cannam@25
|
189 */
|
cannam@26
|
190 int MFCC::process(int length, double *inframe, double *outceps)
|
cannam@26
|
191 {
|
cannam@26
|
192 int i,j;
|
cannam@26
|
193
|
cannam@26
|
194 double *fftMag;
|
cannam@26
|
195 double *earMag;
|
cannam@25
|
196
|
cannam@26
|
197 double *inputData;
|
cannam@26
|
198
|
cannam@26
|
199 double tmp;
|
cannam@25
|
200
|
cannam@26
|
201 earMag = (double*)calloc(totalFilters, sizeof(double));
|
cannam@26
|
202 inputData = (double*)calloc(fftSize, sizeof(double));
|
cannam@26
|
203 fftMag = (double*)calloc(fftSize, sizeof(double));
|
cannam@26
|
204
|
cannam@26
|
205 /* Zero-pad if needed */
|
cannam@26
|
206 memcpy(inputData, inframe, length*sizeof(double));
|
cannam@25
|
207
|
cannam@26
|
208 window->cut(inputData);
|
cannam@26
|
209
|
cannam@26
|
210 /* Calculate the fft on the input frame */
|
cannam@26
|
211 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut);
|
cannam@25
|
212
|
cannam@26
|
213 for (i = 0; i < fftSize; ++i) {
|
cannam@26
|
214 fftMag[i] = sqrt(realOut[i] * realOut[i] +
|
cannam@26
|
215 imagOut[i] * imagOut[i]);
|
cannam@25
|
216 }
|
cannam@25
|
217
|
cannam@26
|
218 /* Multiply by mfccFilterWeights */
|
cannam@26
|
219 for (i=0;i<totalFilters;i++) {
|
cannam@26
|
220 tmp = 0.;
|
cannam@26
|
221 for(j=0;j<fftSize/2; j++) {
|
cannam@26
|
222 tmp = tmp + (mfccFilterWeights[i][j]*fftMag[j]);
|
cannam@26
|
223 }
|
cannam@26
|
224 if (tmp>0)
|
cannam@26
|
225 earMag[i] = log10(tmp);
|
cannam@26
|
226 else
|
cannam@26
|
227 earMag[i] = 0.0;
|
cannam@26
|
228 }
|
cannam@26
|
229
|
cannam@26
|
230 /*
|
cannam@26
|
231 *
|
cannam@26
|
232 * Calculate now the cepstral coefficients
|
cannam@26
|
233 * with or without the DC component
|
cannam@26
|
234 *
|
cannam@26
|
235 */
|
cannam@26
|
236
|
cannam@26
|
237 if (WANT_C0==1) {
|
cannam@26
|
238
|
cannam@26
|
239 for (i=0;i<nceps+1;i++) {
|
cannam@26
|
240 tmp = 0.;
|
cannam@26
|
241 for (j=0;j<totalFilters;j++){
|
cannam@26
|
242 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
|
cannam@26
|
243 }
|
cannam@26
|
244 outceps[i] = tmp;
|
cannam@26
|
245 }
|
cannam@26
|
246 }
|
cannam@25
|
247 else
|
cannam@26
|
248 {
|
cannam@26
|
249 for (i=1;i<nceps+1;i++) {
|
cannam@26
|
250 tmp = 0.;
|
cannam@26
|
251 for (j=0;j<totalFilters;j++){
|
cannam@26
|
252 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
|
cannam@26
|
253 }
|
cannam@26
|
254 outceps[i-1] = tmp;
|
cannam@25
|
255 }
|
cannam@26
|
256 }
|
cannam@25
|
257
|
cannam@26
|
258 free(fftMag);
|
cannam@26
|
259 free(earMag);
|
cannam@26
|
260 free(inputData);
|
cannam@25
|
261
|
cannam@26
|
262 return nceps;
|
cannam@25
|
263 }
|
cannam@25
|
264
|