c@251
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
c@251
|
2
|
c@251
|
3 /*
|
c@251
|
4 QM DSP Library
|
c@251
|
5
|
c@251
|
6 Centre for Digital Music, Queen Mary, University of London.
|
c@251
|
7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
|
c@251
|
8 All rights reserved.
|
c@251
|
9 */
|
c@251
|
10
|
c@251
|
11 #include <cmath>
|
c@251
|
12 #include <cstdlib>
|
c@272
|
13 #include <cstring>
|
c@251
|
14
|
c@251
|
15 #include "MFCC.h"
|
c@251
|
16 #include "dsp/transforms/FFT.h"
|
c@251
|
17 #include "base/Window.h"
|
c@251
|
18
|
c@251
|
19 MFCC::MFCC(MFCCConfig config)
|
c@251
|
20 {
|
c@251
|
21 int i,j;
|
c@251
|
22
|
c@251
|
23 /* Calculate at startup */
|
c@251
|
24 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
|
c@251
|
25
|
c@251
|
26 lowestFrequency = 66.6666666;
|
c@251
|
27 linearFilters = 13;
|
c@251
|
28 linearSpacing = 66.66666666;
|
c@251
|
29 logFilters = 27;
|
c@251
|
30 logSpacing = 1.0711703;
|
c@251
|
31
|
c@251
|
32 /* FFT and analysis window sizes */
|
c@251
|
33 fftSize = config.fftsize;
|
c@289
|
34 fft = new FFTReal(fftSize);
|
c@251
|
35
|
c@251
|
36 totalFilters = linearFilters + logFilters;
|
c@255
|
37 logPower = config.logpower;
|
c@251
|
38
|
c@251
|
39 samplingRate = config.FS;
|
c@251
|
40
|
c@251
|
41 /* The number of cepstral componenents */
|
c@251
|
42 nceps = config.nceps;
|
c@251
|
43
|
c@251
|
44 /* Set if user want C0 */
|
c@251
|
45 WANT_C0 = (config.want_c0 ? 1 : 0);
|
c@251
|
46
|
c@251
|
47 /* Allocate space for feature vector */
|
c@251
|
48 if (WANT_C0 == 1) {
|
c@251
|
49 ceps = (double*)calloc(nceps+1, sizeof(double));
|
c@251
|
50 } else {
|
c@251
|
51 ceps = (double*)calloc(nceps, sizeof(double));
|
c@251
|
52 }
|
c@251
|
53
|
c@251
|
54 /* Allocate space for local vectors */
|
c@251
|
55 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
|
c@255
|
56 for (i = 0; i < nceps+1; i++) {
|
c@251
|
57 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
|
c@251
|
58 }
|
c@251
|
59
|
c@251
|
60 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
|
c@255
|
61 for (i = 0; i < totalFilters; i++) {
|
c@251
|
62 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
|
c@251
|
63 }
|
c@251
|
64
|
c@251
|
65 freqs = (double*)calloc(totalFilters+2,sizeof(double));
|
c@251
|
66
|
c@251
|
67 lower = (double*)calloc(totalFilters,sizeof(double));
|
c@251
|
68 center = (double*)calloc(totalFilters,sizeof(double));
|
c@251
|
69 upper = (double*)calloc(totalFilters,sizeof(double));
|
c@251
|
70
|
c@251
|
71 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
|
c@251
|
72 fftFreqs = (double*)calloc(fftSize,sizeof(double));
|
c@251
|
73
|
c@255
|
74 for (i = 0; i < linearFilters; i++) {
|
c@251
|
75 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
|
c@251
|
76 }
|
c@251
|
77
|
c@255
|
78 for (i = linearFilters; i < totalFilters+2; i++) {
|
c@251
|
79 freqs[i] = freqs[linearFilters-1] *
|
c@251
|
80 pow(logSpacing, (double)(i-linearFilters+1));
|
c@251
|
81 }
|
c@251
|
82
|
c@251
|
83 /* Define lower, center and upper */
|
c@251
|
84 memcpy(lower, freqs,totalFilters*sizeof(double));
|
c@251
|
85 memcpy(center, &freqs[1],totalFilters*sizeof(double));
|
c@251
|
86 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
|
c@251
|
87
|
c@251
|
88 for (i=0;i<totalFilters;i++){
|
c@251
|
89 triangleHeight[i] = 2./(upper[i]-lower[i]);
|
c@251
|
90 }
|
c@251
|
91
|
c@251
|
92 for (i=0;i<fftSize;i++){
|
c@251
|
93 fftFreqs[i] = ((double) i / ((double) fftSize ) *
|
c@251
|
94 (double) samplingRate);
|
c@251
|
95 }
|
c@251
|
96
|
c@251
|
97 /* Build now the mccFilterWeight matrix */
|
c@251
|
98 for (i=0;i<totalFilters;i++){
|
c@251
|
99
|
c@251
|
100 for (j=0;j<fftSize;j++) {
|
c@251
|
101
|
c@251
|
102 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
|
c@251
|
103
|
c@251
|
104 mfccFilterWeights[i][j] = triangleHeight[i] *
|
c@251
|
105 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
|
c@251
|
106
|
c@251
|
107 }
|
c@251
|
108 else
|
c@251
|
109 {
|
c@251
|
110 mfccFilterWeights[i][j] = 0.0;
|
c@251
|
111 }
|
c@251
|
112
|
c@251
|
113 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
|
c@251
|
114
|
c@255
|
115 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
|
c@255
|
116 + triangleHeight[i] * (upper[i]-fftFreqs[j])
|
c@251
|
117 / (upper[i]-center[i]);
|
c@251
|
118 }
|
c@251
|
119 else
|
c@251
|
120 {
|
c@251
|
121 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
|
c@251
|
122 }
|
c@251
|
123 }
|
c@251
|
124
|
c@251
|
125 }
|
c@251
|
126
|
c@251
|
127 /*
|
c@251
|
128 * We calculate now mfccDCT matrix
|
c@251
|
129 * NB: +1 because of the DC component
|
c@251
|
130 */
|
c@254
|
131
|
c@254
|
132 const double pi = 3.14159265358979323846264338327950288;
|
c@251
|
133
|
c@255
|
134 for (i = 0; i < nceps+1; i++) {
|
c@255
|
135 for (j = 0; j < totalFilters; j++) {
|
c@251
|
136 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
|
c@254
|
137 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
|
c@251
|
138 }
|
c@251
|
139 }
|
c@251
|
140
|
c@255
|
141 for (j = 0; j < totalFilters; j++){
|
c@255
|
142 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
|
c@251
|
143 }
|
c@251
|
144
|
c@251
|
145 /* The analysis window */
|
c@257
|
146 window = new Window<double>(config.window, fftSize);
|
c@251
|
147
|
c@251
|
148 /* Allocate memory for the FFT */
|
c@255
|
149 realOut = (double*)calloc(fftSize, sizeof(double));
|
c@255
|
150 imagOut = (double*)calloc(fftSize, sizeof(double));
|
c@255
|
151
|
c@255
|
152 earMag = (double*)calloc(totalFilters, sizeof(double));
|
c@255
|
153 fftMag = (double*)calloc(fftSize/2, sizeof(double));
|
c@251
|
154
|
c@251
|
155 free(freqs);
|
c@251
|
156 free(lower);
|
c@251
|
157 free(center);
|
c@251
|
158 free(upper);
|
c@251
|
159 free(triangleHeight);
|
c@251
|
160 free(fftFreqs);
|
c@251
|
161 }
|
c@251
|
162
|
c@251
|
163 MFCC::~MFCC()
|
c@251
|
164 {
|
c@251
|
165 int i;
|
c@251
|
166
|
c@251
|
167 /* Free the structure */
|
c@255
|
168 for (i = 0; i < nceps+1; i++) {
|
c@251
|
169 free(mfccDCTMatrix[i]);
|
c@251
|
170 }
|
c@251
|
171 free(mfccDCTMatrix);
|
c@251
|
172
|
c@255
|
173 for (i = 0; i < totalFilters; i++) {
|
c@251
|
174 free(mfccFilterWeights[i]);
|
c@251
|
175 }
|
c@251
|
176 free(mfccFilterWeights);
|
c@251
|
177
|
c@251
|
178 /* Free the feature vector */
|
c@251
|
179 free(ceps);
|
c@251
|
180
|
c@251
|
181 /* The analysis window */
|
c@251
|
182 delete window;
|
c@255
|
183
|
c@255
|
184 free(earMag);
|
c@255
|
185 free(fftMag);
|
c@251
|
186
|
c@251
|
187 /* Free the FFT */
|
c@251
|
188 free(realOut);
|
c@251
|
189 free(imagOut);
|
c@289
|
190
|
c@289
|
191 delete fft;
|
c@251
|
192 }
|
c@251
|
193
|
c@251
|
194
|
c@251
|
195 /*
|
c@251
|
196 *
|
c@251
|
197 * Extract the MFCC on the input frame
|
c@251
|
198 *
|
c@251
|
199 */
|
c@255
|
200 int MFCC::process(const double *inframe, double *outceps)
|
c@251
|
201 {
|
c@255
|
202 double *inputData = (double *)malloc(fftSize * sizeof(double));
|
c@255
|
203 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
|
c@251
|
204
|
c@255
|
205 window->cut(inputData);
|
c@251
|
206
|
c@251
|
207 /* Calculate the fft on the input frame */
|
c@289
|
208 fft->process(0, inputData, realOut, imagOut);
|
c@251
|
209
|
c@255
|
210 free(inputData);
|
c@255
|
211
|
c@255
|
212 return process(realOut, imagOut, outceps);
|
c@255
|
213 }
|
c@255
|
214
|
c@255
|
215 int MFCC::process(const double *real, const double *imag, double *outceps)
|
c@255
|
216 {
|
c@255
|
217 int i, j;
|
c@255
|
218
|
c@255
|
219 for (i = 0; i < fftSize/2; ++i) {
|
c@255
|
220 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
|
c@255
|
221 }
|
c@255
|
222
|
c@255
|
223 for (i = 0; i < totalFilters; ++i) {
|
c@255
|
224 earMag[i] = 0.0;
|
c@251
|
225 }
|
c@251
|
226
|
c@251
|
227 /* Multiply by mfccFilterWeights */
|
c@255
|
228 for (i = 0; i < totalFilters; i++) {
|
c@255
|
229 double tmp = 0.0;
|
c@255
|
230 for (j = 0; j < fftSize/2; j++) {
|
c@255
|
231 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
|
c@251
|
232 }
|
c@255
|
233 if (tmp > 0) earMag[i] = log10(tmp);
|
c@255
|
234 else earMag[i] = 0.0;
|
c@255
|
235
|
c@255
|
236 if (logPower != 1.0) {
|
c@255
|
237 earMag[i] = pow(earMag[i], logPower);
|
c@255
|
238 }
|
c@251
|
239 }
|
c@251
|
240
|
c@251
|
241 /*
|
c@251
|
242 *
|
c@251
|
243 * Calculate now the cepstral coefficients
|
c@251
|
244 * with or without the DC component
|
c@251
|
245 *
|
c@251
|
246 */
|
c@251
|
247
|
c@255
|
248 if (WANT_C0 == 1) {
|
c@251
|
249
|
c@255
|
250 for (i = 0; i < nceps+1; i++) {
|
c@255
|
251 double tmp = 0.;
|
c@255
|
252 for (j = 0; j < totalFilters; j++){
|
c@255
|
253 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
|
c@251
|
254 }
|
c@251
|
255 outceps[i] = tmp;
|
c@251
|
256 }
|
c@251
|
257 }
|
c@251
|
258 else
|
c@251
|
259 {
|
c@255
|
260 for (i = 1; i < nceps+1; i++) {
|
c@255
|
261 double tmp = 0.;
|
c@255
|
262 for (j = 0; j < totalFilters; j++){
|
c@255
|
263 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
|
c@251
|
264 }
|
c@251
|
265 outceps[i-1] = tmp;
|
c@251
|
266 }
|
c@251
|
267 }
|
c@251
|
268
|
c@251
|
269 return nceps;
|
c@251
|
270 }
|
c@251
|
271
|