c@251
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
c@251
|
2
|
c@251
|
3 /*
|
c@251
|
4 QM DSP Library
|
c@251
|
5
|
c@251
|
6 Centre for Digital Music, Queen Mary, University of London.
|
c@251
|
7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
|
c@309
|
8
|
c@309
|
9 This program is free software; you can redistribute it and/or
|
c@309
|
10 modify it under the terms of the GNU General Public License as
|
c@309
|
11 published by the Free Software Foundation; either version 2 of the
|
c@309
|
12 License, or (at your option) any later version. See the file
|
c@309
|
13 COPYING included with this distribution for more information.
|
c@251
|
14 */
|
c@251
|
15
|
c@251
|
16 #include <cmath>
|
c@251
|
17 #include <cstdlib>
|
c@272
|
18 #include <cstring>
|
c@251
|
19
|
c@251
|
20 #include "MFCC.h"
|
c@251
|
21 #include "dsp/transforms/FFT.h"
|
c@251
|
22 #include "base/Window.h"
|
c@251
|
23
|
c@251
|
24 MFCC::MFCC(MFCCConfig config)
|
c@251
|
25 {
|
c@251
|
26 int i,j;
|
c@251
|
27
|
c@251
|
28 /* Calculate at startup */
|
c@251
|
29 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
|
c@251
|
30
|
c@251
|
31 lowestFrequency = 66.6666666;
|
c@251
|
32 linearFilters = 13;
|
c@251
|
33 linearSpacing = 66.66666666;
|
c@251
|
34 logFilters = 27;
|
c@251
|
35 logSpacing = 1.0711703;
|
c@251
|
36
|
c@251
|
37 /* FFT and analysis window sizes */
|
c@251
|
38 fftSize = config.fftsize;
|
c@289
|
39 fft = new FFTReal(fftSize);
|
c@251
|
40
|
c@251
|
41 totalFilters = linearFilters + logFilters;
|
c@255
|
42 logPower = config.logpower;
|
c@251
|
43
|
c@251
|
44 samplingRate = config.FS;
|
c@251
|
45
|
c@251
|
46 /* The number of cepstral componenents */
|
c@251
|
47 nceps = config.nceps;
|
c@251
|
48
|
c@251
|
49 /* Set if user want C0 */
|
c@251
|
50 WANT_C0 = (config.want_c0 ? 1 : 0);
|
c@251
|
51
|
c@251
|
52 /* Allocate space for feature vector */
|
c@251
|
53 if (WANT_C0 == 1) {
|
c@251
|
54 ceps = (double*)calloc(nceps+1, sizeof(double));
|
c@251
|
55 } else {
|
c@251
|
56 ceps = (double*)calloc(nceps, sizeof(double));
|
c@251
|
57 }
|
c@251
|
58
|
c@251
|
59 /* Allocate space for local vectors */
|
c@251
|
60 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
|
c@255
|
61 for (i = 0; i < nceps+1; i++) {
|
c@251
|
62 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
|
c@251
|
63 }
|
c@251
|
64
|
c@251
|
65 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
|
c@255
|
66 for (i = 0; i < totalFilters; i++) {
|
c@251
|
67 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
|
c@251
|
68 }
|
c@251
|
69
|
c@251
|
70 freqs = (double*)calloc(totalFilters+2,sizeof(double));
|
c@251
|
71
|
c@251
|
72 lower = (double*)calloc(totalFilters,sizeof(double));
|
c@251
|
73 center = (double*)calloc(totalFilters,sizeof(double));
|
c@251
|
74 upper = (double*)calloc(totalFilters,sizeof(double));
|
c@251
|
75
|
c@251
|
76 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
|
c@251
|
77 fftFreqs = (double*)calloc(fftSize,sizeof(double));
|
c@251
|
78
|
c@255
|
79 for (i = 0; i < linearFilters; i++) {
|
c@251
|
80 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
|
c@251
|
81 }
|
c@251
|
82
|
c@255
|
83 for (i = linearFilters; i < totalFilters+2; i++) {
|
c@251
|
84 freqs[i] = freqs[linearFilters-1] *
|
c@251
|
85 pow(logSpacing, (double)(i-linearFilters+1));
|
c@251
|
86 }
|
c@251
|
87
|
c@251
|
88 /* Define lower, center and upper */
|
c@251
|
89 memcpy(lower, freqs,totalFilters*sizeof(double));
|
c@251
|
90 memcpy(center, &freqs[1],totalFilters*sizeof(double));
|
c@251
|
91 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
|
c@251
|
92
|
c@251
|
93 for (i=0;i<totalFilters;i++){
|
c@251
|
94 triangleHeight[i] = 2./(upper[i]-lower[i]);
|
c@251
|
95 }
|
c@251
|
96
|
c@251
|
97 for (i=0;i<fftSize;i++){
|
c@251
|
98 fftFreqs[i] = ((double) i / ((double) fftSize ) *
|
c@251
|
99 (double) samplingRate);
|
c@251
|
100 }
|
c@251
|
101
|
c@251
|
102 /* Build now the mccFilterWeight matrix */
|
c@251
|
103 for (i=0;i<totalFilters;i++){
|
c@251
|
104
|
c@251
|
105 for (j=0;j<fftSize;j++) {
|
c@251
|
106
|
c@251
|
107 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
|
c@251
|
108
|
c@251
|
109 mfccFilterWeights[i][j] = triangleHeight[i] *
|
c@251
|
110 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
|
c@251
|
111
|
c@251
|
112 }
|
c@251
|
113 else
|
c@251
|
114 {
|
c@251
|
115 mfccFilterWeights[i][j] = 0.0;
|
c@251
|
116 }
|
c@251
|
117
|
c@251
|
118 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
|
c@251
|
119
|
c@255
|
120 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
|
c@255
|
121 + triangleHeight[i] * (upper[i]-fftFreqs[j])
|
c@251
|
122 / (upper[i]-center[i]);
|
c@251
|
123 }
|
c@251
|
124 else
|
c@251
|
125 {
|
c@251
|
126 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
|
c@251
|
127 }
|
c@251
|
128 }
|
c@251
|
129
|
c@251
|
130 }
|
c@251
|
131
|
c@251
|
132 /*
|
c@251
|
133 * We calculate now mfccDCT matrix
|
c@251
|
134 * NB: +1 because of the DC component
|
c@251
|
135 */
|
c@254
|
136
|
c@254
|
137 const double pi = 3.14159265358979323846264338327950288;
|
c@251
|
138
|
c@255
|
139 for (i = 0; i < nceps+1; i++) {
|
c@255
|
140 for (j = 0; j < totalFilters; j++) {
|
c@251
|
141 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
|
c@254
|
142 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
|
c@251
|
143 }
|
c@251
|
144 }
|
c@251
|
145
|
c@255
|
146 for (j = 0; j < totalFilters; j++){
|
c@255
|
147 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
|
c@251
|
148 }
|
c@251
|
149
|
c@251
|
150 /* The analysis window */
|
c@257
|
151 window = new Window<double>(config.window, fftSize);
|
c@251
|
152
|
c@251
|
153 /* Allocate memory for the FFT */
|
c@255
|
154 realOut = (double*)calloc(fftSize, sizeof(double));
|
c@255
|
155 imagOut = (double*)calloc(fftSize, sizeof(double));
|
c@255
|
156
|
c@255
|
157 earMag = (double*)calloc(totalFilters, sizeof(double));
|
c@255
|
158 fftMag = (double*)calloc(fftSize/2, sizeof(double));
|
c@251
|
159
|
c@251
|
160 free(freqs);
|
c@251
|
161 free(lower);
|
c@251
|
162 free(center);
|
c@251
|
163 free(upper);
|
c@251
|
164 free(triangleHeight);
|
c@251
|
165 free(fftFreqs);
|
c@251
|
166 }
|
c@251
|
167
|
c@251
|
168 MFCC::~MFCC()
|
c@251
|
169 {
|
c@251
|
170 int i;
|
c@251
|
171
|
c@251
|
172 /* Free the structure */
|
c@255
|
173 for (i = 0; i < nceps+1; i++) {
|
c@251
|
174 free(mfccDCTMatrix[i]);
|
c@251
|
175 }
|
c@251
|
176 free(mfccDCTMatrix);
|
c@251
|
177
|
c@255
|
178 for (i = 0; i < totalFilters; i++) {
|
c@251
|
179 free(mfccFilterWeights[i]);
|
c@251
|
180 }
|
c@251
|
181 free(mfccFilterWeights);
|
c@251
|
182
|
c@251
|
183 /* Free the feature vector */
|
c@251
|
184 free(ceps);
|
c@251
|
185
|
c@251
|
186 /* The analysis window */
|
c@251
|
187 delete window;
|
c@255
|
188
|
c@255
|
189 free(earMag);
|
c@255
|
190 free(fftMag);
|
c@251
|
191
|
c@251
|
192 /* Free the FFT */
|
c@251
|
193 free(realOut);
|
c@251
|
194 free(imagOut);
|
c@289
|
195
|
c@289
|
196 delete fft;
|
c@251
|
197 }
|
c@251
|
198
|
c@251
|
199
|
c@251
|
200 /*
|
c@251
|
201 *
|
c@251
|
202 * Extract the MFCC on the input frame
|
c@251
|
203 *
|
c@251
|
204 */
|
c@255
|
205 int MFCC::process(const double *inframe, double *outceps)
|
c@251
|
206 {
|
c@255
|
207 double *inputData = (double *)malloc(fftSize * sizeof(double));
|
c@255
|
208 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
|
c@251
|
209
|
c@255
|
210 window->cut(inputData);
|
c@251
|
211
|
c@251
|
212 /* Calculate the fft on the input frame */
|
c@289
|
213 fft->process(0, inputData, realOut, imagOut);
|
c@251
|
214
|
c@255
|
215 free(inputData);
|
c@255
|
216
|
c@255
|
217 return process(realOut, imagOut, outceps);
|
c@255
|
218 }
|
c@255
|
219
|
c@255
|
220 int MFCC::process(const double *real, const double *imag, double *outceps)
|
c@255
|
221 {
|
c@255
|
222 int i, j;
|
c@255
|
223
|
c@255
|
224 for (i = 0; i < fftSize/2; ++i) {
|
c@255
|
225 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
|
c@255
|
226 }
|
c@255
|
227
|
c@255
|
228 for (i = 0; i < totalFilters; ++i) {
|
c@255
|
229 earMag[i] = 0.0;
|
c@251
|
230 }
|
c@251
|
231
|
c@251
|
232 /* Multiply by mfccFilterWeights */
|
c@255
|
233 for (i = 0; i < totalFilters; i++) {
|
c@255
|
234 double tmp = 0.0;
|
c@255
|
235 for (j = 0; j < fftSize/2; j++) {
|
c@255
|
236 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
|
c@251
|
237 }
|
c@255
|
238 if (tmp > 0) earMag[i] = log10(tmp);
|
c@255
|
239 else earMag[i] = 0.0;
|
c@255
|
240
|
c@255
|
241 if (logPower != 1.0) {
|
c@255
|
242 earMag[i] = pow(earMag[i], logPower);
|
c@255
|
243 }
|
c@251
|
244 }
|
c@251
|
245
|
c@251
|
246 /*
|
c@251
|
247 *
|
c@251
|
248 * Calculate now the cepstral coefficients
|
c@251
|
249 * with or without the DC component
|
c@251
|
250 *
|
c@251
|
251 */
|
c@251
|
252
|
c@255
|
253 if (WANT_C0 == 1) {
|
c@251
|
254
|
c@255
|
255 for (i = 0; i < nceps+1; i++) {
|
c@255
|
256 double tmp = 0.;
|
c@255
|
257 for (j = 0; j < totalFilters; j++){
|
c@255
|
258 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
|
c@251
|
259 }
|
c@251
|
260 outceps[i] = tmp;
|
c@251
|
261 }
|
c@251
|
262 }
|
c@251
|
263 else
|
c@251
|
264 {
|
c@255
|
265 for (i = 1; i < nceps+1; i++) {
|
c@255
|
266 double tmp = 0.;
|
c@255
|
267 for (j = 0; j < totalFilters; j++){
|
c@255
|
268 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
|
c@251
|
269 }
|
c@251
|
270 outceps[i-1] = tmp;
|
c@251
|
271 }
|
c@251
|
272 }
|
c@251
|
273
|
c@251
|
274 return nceps;
|
c@251
|
275 }
|
c@251
|
276
|