cannam@26
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
cannam@25
|
2
|
cannam@25
|
3 /*
|
cannam@26
|
4 QM DSP Library
|
cannam@25
|
5
|
cannam@26
|
6 Centre for Digital Music, Queen Mary, University of London.
|
cannam@26
|
7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
|
cannam@26
|
8 All rights reserved.
|
cannam@26
|
9 */
|
cannam@25
|
10
|
cannam@26
|
11 #include <cmath>
|
cannam@26
|
12 #include <cstdlib>
|
cannam@26
|
13
|
cannam@26
|
14 #include "MFCC.h"
|
cannam@26
|
15 #include "dsp/transforms/FFT.h"
|
cannam@26
|
16 #include "base/Window.h"
|
cannam@26
|
17
|
cannam@26
|
18 MFCC::MFCC(MFCCConfig config)
|
cannam@26
|
19 {
|
cannam@26
|
20 int i,j;
|
cannam@26
|
21
|
cannam@26
|
22 /* Calculate at startup */
|
cannam@26
|
23 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
|
cannam@25
|
24
|
cannam@26
|
25 lowestFrequency = 66.6666666;
|
cannam@26
|
26 linearFilters = 13;
|
cannam@26
|
27 linearSpacing = 66.66666666;
|
cannam@26
|
28 logFilters = 27;
|
cannam@26
|
29 logSpacing = 1.0711703;
|
cannam@25
|
30
|
cannam@26
|
31 /* FFT and analysis window sizes */
|
cannam@26
|
32 fftSize = config.fftsize;
|
cannam@26
|
33
|
cannam@26
|
34 totalFilters = linearFilters + logFilters;
|
cannam@25
|
35
|
cannam@26
|
36 samplingRate = config.FS;
|
cannam@26
|
37
|
cannam@26
|
38 /* The number of cepstral componenents */
|
cannam@26
|
39 nceps = config.nceps;
|
cannam@25
|
40
|
cannam@26
|
41 /* Set if user want C0 */
|
cannam@26
|
42 WANT_C0 = (config.want_c0 ? 1 : 0);
|
cannam@25
|
43
|
cannam@26
|
44 /* Allocate space for feature vector */
|
cannam@26
|
45 if (WANT_C0 == 1) {
|
cannam@26
|
46 ceps = (double*)calloc(nceps+1, sizeof(double));
|
cannam@26
|
47 } else {
|
cannam@26
|
48 ceps = (double*)calloc(nceps, sizeof(double));
|
cannam@26
|
49 }
|
cannam@26
|
50
|
cannam@26
|
51 /* Allocate space for local vectors */
|
cannam@26
|
52 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
|
cannam@26
|
53 for (i=0;i<nceps+1; i++) {
|
cannam@26
|
54 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
|
cannam@26
|
55 }
|
cannam@26
|
56
|
cannam@26
|
57 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
|
cannam@26
|
58 for (i=0;i<totalFilters; i++) {
|
cannam@26
|
59 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
|
cannam@26
|
60 }
|
cannam@26
|
61
|
cannam@26
|
62 freqs = (double*)calloc(totalFilters+2,sizeof(double));
|
cannam@26
|
63
|
cannam@26
|
64 lower = (double*)calloc(totalFilters,sizeof(double));
|
cannam@26
|
65 center = (double*)calloc(totalFilters,sizeof(double));
|
cannam@26
|
66 upper = (double*)calloc(totalFilters,sizeof(double));
|
cannam@26
|
67
|
cannam@26
|
68 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
|
cannam@26
|
69 fftFreqs = (double*)calloc(fftSize,sizeof(double));
|
cannam@25
|
70
|
cannam@26
|
71 for (i=0;i<linearFilters;i++) {
|
cannam@26
|
72 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
|
cannam@26
|
73 }
|
cannam@26
|
74
|
cannam@26
|
75 for (i=linearFilters; i<totalFilters+2; i++) {
|
cannam@26
|
76 freqs[i] = freqs[linearFilters-1] *
|
cannam@26
|
77 pow(logSpacing, (double)(i-linearFilters+1));
|
cannam@26
|
78 }
|
cannam@26
|
79
|
cannam@26
|
80 /* Define lower, center and upper */
|
cannam@26
|
81 memcpy(lower, freqs,totalFilters*sizeof(double));
|
cannam@26
|
82 memcpy(center, &freqs[1],totalFilters*sizeof(double));
|
cannam@26
|
83 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
|
cannam@26
|
84
|
cannam@26
|
85 for (i=0;i<totalFilters;i++){
|
cannam@26
|
86 triangleHeight[i] = 2./(upper[i]-lower[i]);
|
cannam@26
|
87 }
|
cannam@26
|
88
|
cannam@26
|
89 for (i=0;i<fftSize;i++){
|
cannam@26
|
90 fftFreqs[i] = ((double) i / ((double) fftSize ) *
|
cannam@26
|
91 (double) samplingRate);
|
cannam@26
|
92 }
|
cannam@25
|
93
|
cannam@26
|
94 /* Build now the mccFilterWeight matrix */
|
cannam@26
|
95 for (i=0;i<totalFilters;i++){
|
cannam@25
|
96
|
cannam@26
|
97 for (j=0;j<fftSize;j++) {
|
cannam@26
|
98
|
cannam@26
|
99 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
|
cannam@26
|
100
|
cannam@26
|
101 mfccFilterWeights[i][j] = triangleHeight[i] *
|
cannam@26
|
102 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
|
cannam@26
|
103
|
cannam@26
|
104 }
|
cannam@26
|
105 else
|
cannam@26
|
106 {
|
cannam@26
|
107 mfccFilterWeights[i][j] = 0.0;
|
cannam@26
|
108 }
|
cannam@25
|
109
|
cannam@26
|
110 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
|
cannam@25
|
111
|
cannam@26
|
112 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j])
|
cannam@26
|
113 / (upper[i]-center[i]);
|
cannam@26
|
114 }
|
cannam@26
|
115 else
|
cannam@26
|
116 {
|
cannam@26
|
117 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
|
cannam@26
|
118 }
|
cannam@25
|
119 }
|
cannam@25
|
120
|
cannam@26
|
121 }
|
cannam@25
|
122
|
cannam@26
|
123 /*
|
cannam@26
|
124 * We calculate now mfccDCT matrix
|
cannam@26
|
125 * NB: +1 because of the DC component
|
cannam@26
|
126 */
|
cannam@29
|
127
|
cannam@29
|
128 const double pi = 3.14159265358979323846264338327950288;
|
cannam@26
|
129
|
cannam@26
|
130 for (i=0; i<nceps+1; i++) {
|
cannam@26
|
131 for (j=0; j<totalFilters; j++) {
|
cannam@26
|
132 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
|
cannam@29
|
133 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
|
cannam@25
|
134 }
|
cannam@25
|
135 }
|
cannam@25
|
136
|
cannam@26
|
137 for (j=0;j<totalFilters;j++){
|
cannam@26
|
138 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
|
cannam@26
|
139 }
|
cannam@26
|
140
|
cannam@26
|
141 /* The analysis window */
|
cannam@26
|
142 window = new Window<double>(HammingWindow, fftSize);
|
cannam@25
|
143
|
cannam@26
|
144 /* Allocate memory for the FFT */
|
cannam@26
|
145 imagIn = (double*)calloc(fftSize,sizeof(double));
|
cannam@26
|
146 realOut = (double*)calloc(fftSize,sizeof(double));
|
cannam@26
|
147 imagOut = (double*)calloc(fftSize,sizeof(double));
|
cannam@25
|
148
|
cannam@26
|
149 free(freqs);
|
cannam@26
|
150 free(lower);
|
cannam@26
|
151 free(center);
|
cannam@26
|
152 free(upper);
|
cannam@26
|
153 free(triangleHeight);
|
cannam@26
|
154 free(fftFreqs);
|
cannam@25
|
155 }
|
cannam@25
|
156
|
cannam@26
|
157 MFCC::~MFCC()
|
cannam@26
|
158 {
|
cannam@26
|
159 int i;
|
cannam@26
|
160
|
cannam@26
|
161 /* Free the structure */
|
cannam@26
|
162 for (i=0;i<nceps+1;i++) {
|
cannam@26
|
163 free(mfccDCTMatrix[i]);
|
cannam@26
|
164 }
|
cannam@26
|
165 free(mfccDCTMatrix);
|
cannam@26
|
166
|
cannam@26
|
167 for (i=0;i<totalFilters; i++) {
|
cannam@26
|
168 free(mfccFilterWeights[i]);
|
cannam@26
|
169 }
|
cannam@26
|
170 free(mfccFilterWeights);
|
cannam@26
|
171
|
cannam@26
|
172 /* Free the feature vector */
|
cannam@26
|
173 free(ceps);
|
cannam@26
|
174
|
cannam@26
|
175 /* The analysis window */
|
cannam@26
|
176 delete window;
|
cannam@26
|
177
|
cannam@26
|
178 /* Free the FFT */
|
cannam@26
|
179 free(imagIn);
|
cannam@26
|
180 free(realOut);
|
cannam@26
|
181 free(imagOut);
|
cannam@26
|
182 }
|
cannam@25
|
183
|
cannam@25
|
184
|
cannam@25
|
185 /*
|
cannam@25
|
186 *
|
cannam@25
|
187 * Extract the MFCC on the input frame
|
cannam@26
|
188 *
|
cannam@26
|
189 * looks like we have to have length = fftSize ??????
|
cannam@25
|
190 *
|
cannam@25
|
191 */
|
cannam@26
|
192 int MFCC::process(int length, double *inframe, double *outceps)
|
cannam@26
|
193 {
|
cannam@26
|
194 int i,j;
|
cannam@26
|
195
|
cannam@26
|
196 double *fftMag;
|
cannam@26
|
197 double *earMag;
|
cannam@25
|
198
|
cannam@26
|
199 double *inputData;
|
cannam@26
|
200
|
cannam@26
|
201 double tmp;
|
cannam@25
|
202
|
cannam@26
|
203 earMag = (double*)calloc(totalFilters, sizeof(double));
|
cannam@26
|
204 inputData = (double*)calloc(fftSize, sizeof(double));
|
cannam@26
|
205 fftMag = (double*)calloc(fftSize, sizeof(double));
|
cannam@26
|
206
|
cannam@26
|
207 /* Zero-pad if needed */
|
cannam@26
|
208 memcpy(inputData, inframe, length*sizeof(double));
|
cannam@25
|
209
|
cannam@29
|
210 //!!! window->cut(inputData);
|
cannam@26
|
211
|
cannam@26
|
212 /* Calculate the fft on the input frame */
|
cannam@26
|
213 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut);
|
cannam@25
|
214
|
cannam@26
|
215 for (i = 0; i < fftSize; ++i) {
|
cannam@26
|
216 fftMag[i] = sqrt(realOut[i] * realOut[i] +
|
cannam@26
|
217 imagOut[i] * imagOut[i]);
|
cannam@25
|
218 }
|
cannam@25
|
219
|
cannam@26
|
220 /* Multiply by mfccFilterWeights */
|
cannam@26
|
221 for (i=0;i<totalFilters;i++) {
|
cannam@26
|
222 tmp = 0.;
|
cannam@26
|
223 for(j=0;j<fftSize/2; j++) {
|
cannam@26
|
224 tmp = tmp + (mfccFilterWeights[i][j]*fftMag[j]);
|
cannam@26
|
225 }
|
cannam@26
|
226 if (tmp>0)
|
cannam@26
|
227 earMag[i] = log10(tmp);
|
cannam@26
|
228 else
|
cannam@26
|
229 earMag[i] = 0.0;
|
cannam@26
|
230 }
|
cannam@26
|
231
|
cannam@26
|
232 /*
|
cannam@26
|
233 *
|
cannam@26
|
234 * Calculate now the cepstral coefficients
|
cannam@26
|
235 * with or without the DC component
|
cannam@26
|
236 *
|
cannam@26
|
237 */
|
cannam@26
|
238
|
cannam@26
|
239 if (WANT_C0==1) {
|
cannam@26
|
240
|
cannam@26
|
241 for (i=0;i<nceps+1;i++) {
|
cannam@26
|
242 tmp = 0.;
|
cannam@26
|
243 for (j=0;j<totalFilters;j++){
|
cannam@26
|
244 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
|
cannam@26
|
245 }
|
cannam@26
|
246 outceps[i] = tmp;
|
cannam@26
|
247 }
|
cannam@26
|
248 }
|
cannam@25
|
249 else
|
cannam@26
|
250 {
|
cannam@26
|
251 for (i=1;i<nceps+1;i++) {
|
cannam@26
|
252 tmp = 0.;
|
cannam@26
|
253 for (j=0;j<totalFilters;j++){
|
cannam@26
|
254 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
|
cannam@26
|
255 }
|
cannam@26
|
256 outceps[i-1] = tmp;
|
cannam@25
|
257 }
|
cannam@26
|
258 }
|
cannam@25
|
259
|
cannam@26
|
260 free(fftMag);
|
cannam@26
|
261 free(earMag);
|
cannam@26
|
262 free(inputData);
|
cannam@25
|
263
|
cannam@26
|
264 return nceps;
|
cannam@25
|
265 }
|
cannam@25
|
266
|