c@251
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
c@251
|
2
|
c@251
|
3 /*
|
c@251
|
4 QM DSP Library
|
c@251
|
5
|
c@251
|
6 Centre for Digital Music, Queen Mary, University of London.
|
c@251
|
7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
|
c@251
|
8 All rights reserved.
|
c@251
|
9 */
|
c@251
|
10
|
c@251
|
11 #include <cmath>
|
c@251
|
12 #include <cstdlib>
|
c@251
|
13
|
c@251
|
14 #include "MFCC.h"
|
c@251
|
15 #include "dsp/transforms/FFT.h"
|
c@251
|
16 #include "base/Window.h"
|
c@251
|
17
|
c@251
|
18 MFCC::MFCC(MFCCConfig config)
|
c@251
|
19 {
|
c@251
|
20 int i,j;
|
c@251
|
21
|
c@251
|
22 /* Calculate at startup */
|
c@251
|
23 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
|
c@251
|
24
|
c@251
|
25 lowestFrequency = 66.6666666;
|
c@251
|
26 linearFilters = 13;
|
c@251
|
27 linearSpacing = 66.66666666;
|
c@251
|
28 logFilters = 27;
|
c@251
|
29 logSpacing = 1.0711703;
|
c@251
|
30
|
c@251
|
31 /* FFT and analysis window sizes */
|
c@251
|
32 fftSize = config.fftsize;
|
c@251
|
33
|
c@251
|
34 totalFilters = linearFilters + logFilters;
|
c@255
|
35 logPower = config.logpower;
|
c@251
|
36
|
c@251
|
37 samplingRate = config.FS;
|
c@251
|
38
|
c@251
|
39 /* The number of cepstral componenents */
|
c@251
|
40 nceps = config.nceps;
|
c@251
|
41
|
c@251
|
42 /* Set if user want C0 */
|
c@251
|
43 WANT_C0 = (config.want_c0 ? 1 : 0);
|
c@251
|
44
|
c@251
|
45 /* Allocate space for feature vector */
|
c@251
|
46 if (WANT_C0 == 1) {
|
c@251
|
47 ceps = (double*)calloc(nceps+1, sizeof(double));
|
c@251
|
48 } else {
|
c@251
|
49 ceps = (double*)calloc(nceps, sizeof(double));
|
c@251
|
50 }
|
c@251
|
51
|
c@251
|
52 /* Allocate space for local vectors */
|
c@251
|
53 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
|
c@255
|
54 for (i = 0; i < nceps+1; i++) {
|
c@251
|
55 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
|
c@251
|
56 }
|
c@251
|
57
|
c@251
|
58 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
|
c@255
|
59 for (i = 0; i < totalFilters; i++) {
|
c@251
|
60 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
|
c@251
|
61 }
|
c@251
|
62
|
c@251
|
63 freqs = (double*)calloc(totalFilters+2,sizeof(double));
|
c@251
|
64
|
c@251
|
65 lower = (double*)calloc(totalFilters,sizeof(double));
|
c@251
|
66 center = (double*)calloc(totalFilters,sizeof(double));
|
c@251
|
67 upper = (double*)calloc(totalFilters,sizeof(double));
|
c@251
|
68
|
c@251
|
69 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
|
c@251
|
70 fftFreqs = (double*)calloc(fftSize,sizeof(double));
|
c@251
|
71
|
c@255
|
72 for (i = 0; i < linearFilters; i++) {
|
c@251
|
73 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
|
c@251
|
74 }
|
c@251
|
75
|
c@255
|
76 for (i = linearFilters; i < totalFilters+2; i++) {
|
c@251
|
77 freqs[i] = freqs[linearFilters-1] *
|
c@251
|
78 pow(logSpacing, (double)(i-linearFilters+1));
|
c@251
|
79 }
|
c@251
|
80
|
c@251
|
81 /* Define lower, center and upper */
|
c@251
|
82 memcpy(lower, freqs,totalFilters*sizeof(double));
|
c@251
|
83 memcpy(center, &freqs[1],totalFilters*sizeof(double));
|
c@251
|
84 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
|
c@251
|
85
|
c@251
|
86 for (i=0;i<totalFilters;i++){
|
c@251
|
87 triangleHeight[i] = 2./(upper[i]-lower[i]);
|
c@251
|
88 }
|
c@251
|
89
|
c@251
|
90 for (i=0;i<fftSize;i++){
|
c@251
|
91 fftFreqs[i] = ((double) i / ((double) fftSize ) *
|
c@251
|
92 (double) samplingRate);
|
c@251
|
93 }
|
c@251
|
94
|
c@251
|
95 /* Build now the mccFilterWeight matrix */
|
c@251
|
96 for (i=0;i<totalFilters;i++){
|
c@251
|
97
|
c@251
|
98 for (j=0;j<fftSize;j++) {
|
c@251
|
99
|
c@251
|
100 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
|
c@251
|
101
|
c@251
|
102 mfccFilterWeights[i][j] = triangleHeight[i] *
|
c@251
|
103 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
|
c@251
|
104
|
c@251
|
105 }
|
c@251
|
106 else
|
c@251
|
107 {
|
c@251
|
108 mfccFilterWeights[i][j] = 0.0;
|
c@251
|
109 }
|
c@251
|
110
|
c@251
|
111 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
|
c@251
|
112
|
c@255
|
113 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
|
c@255
|
114 + triangleHeight[i] * (upper[i]-fftFreqs[j])
|
c@251
|
115 / (upper[i]-center[i]);
|
c@251
|
116 }
|
c@251
|
117 else
|
c@251
|
118 {
|
c@251
|
119 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
|
c@251
|
120 }
|
c@251
|
121 }
|
c@251
|
122
|
c@251
|
123 }
|
c@251
|
124
|
c@251
|
125 /*
|
c@251
|
126 * We calculate now mfccDCT matrix
|
c@251
|
127 * NB: +1 because of the DC component
|
c@251
|
128 */
|
c@254
|
129
|
c@254
|
130 const double pi = 3.14159265358979323846264338327950288;
|
c@251
|
131
|
c@255
|
132 for (i = 0; i < nceps+1; i++) {
|
c@255
|
133 for (j = 0; j < totalFilters; j++) {
|
c@251
|
134 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
|
c@254
|
135 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
|
c@251
|
136 }
|
c@251
|
137 }
|
c@251
|
138
|
c@255
|
139 for (j = 0; j < totalFilters; j++){
|
c@255
|
140 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
|
c@251
|
141 }
|
c@251
|
142
|
c@251
|
143 /* The analysis window */
|
c@257
|
144 window = new Window<double>(config.window, fftSize);
|
c@251
|
145
|
c@251
|
146 /* Allocate memory for the FFT */
|
c@255
|
147 imagIn = (double*)calloc(fftSize, sizeof(double));
|
c@255
|
148 realOut = (double*)calloc(fftSize, sizeof(double));
|
c@255
|
149 imagOut = (double*)calloc(fftSize, sizeof(double));
|
c@255
|
150
|
c@255
|
151 earMag = (double*)calloc(totalFilters, sizeof(double));
|
c@255
|
152 fftMag = (double*)calloc(fftSize/2, sizeof(double));
|
c@251
|
153
|
c@251
|
154 free(freqs);
|
c@251
|
155 free(lower);
|
c@251
|
156 free(center);
|
c@251
|
157 free(upper);
|
c@251
|
158 free(triangleHeight);
|
c@251
|
159 free(fftFreqs);
|
c@251
|
160 }
|
c@251
|
161
|
c@251
|
162 MFCC::~MFCC()
|
c@251
|
163 {
|
c@251
|
164 int i;
|
c@251
|
165
|
c@251
|
166 /* Free the structure */
|
c@255
|
167 for (i = 0; i < nceps+1; i++) {
|
c@251
|
168 free(mfccDCTMatrix[i]);
|
c@251
|
169 }
|
c@251
|
170 free(mfccDCTMatrix);
|
c@251
|
171
|
c@255
|
172 for (i = 0; i < totalFilters; i++) {
|
c@251
|
173 free(mfccFilterWeights[i]);
|
c@251
|
174 }
|
c@251
|
175 free(mfccFilterWeights);
|
c@251
|
176
|
c@251
|
177 /* Free the feature vector */
|
c@251
|
178 free(ceps);
|
c@251
|
179
|
c@251
|
180 /* The analysis window */
|
c@251
|
181 delete window;
|
c@255
|
182
|
c@255
|
183 free(earMag);
|
c@255
|
184 free(fftMag);
|
c@251
|
185
|
c@251
|
186 /* Free the FFT */
|
c@251
|
187 free(imagIn);
|
c@251
|
188 free(realOut);
|
c@251
|
189 free(imagOut);
|
c@251
|
190 }
|
c@251
|
191
|
c@251
|
192
|
c@251
|
193 /*
|
c@251
|
194 *
|
c@251
|
195 * Extract the MFCC on the input frame
|
c@251
|
196 *
|
c@251
|
197 */
|
c@255
|
198 int MFCC::process(const double *inframe, double *outceps)
|
c@251
|
199 {
|
c@255
|
200 double *inputData = (double *)malloc(fftSize * sizeof(double));
|
c@255
|
201 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
|
c@251
|
202
|
c@255
|
203 window->cut(inputData);
|
c@251
|
204
|
c@251
|
205 /* Calculate the fft on the input frame */
|
c@251
|
206 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut);
|
c@251
|
207
|
c@255
|
208 free(inputData);
|
c@255
|
209
|
c@255
|
210 return process(realOut, imagOut, outceps);
|
c@255
|
211 }
|
c@255
|
212
|
c@255
|
213 int MFCC::process(const double *real, const double *imag, double *outceps)
|
c@255
|
214 {
|
c@255
|
215 int i, j;
|
c@255
|
216
|
c@255
|
217 for (i = 0; i < fftSize/2; ++i) {
|
c@255
|
218 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
|
c@255
|
219 }
|
c@255
|
220
|
c@255
|
221 for (i = 0; i < totalFilters; ++i) {
|
c@255
|
222 earMag[i] = 0.0;
|
c@251
|
223 }
|
c@251
|
224
|
c@251
|
225 /* Multiply by mfccFilterWeights */
|
c@255
|
226 for (i = 0; i < totalFilters; i++) {
|
c@255
|
227 double tmp = 0.0;
|
c@255
|
228 for (j = 0; j < fftSize/2; j++) {
|
c@255
|
229 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
|
c@251
|
230 }
|
c@255
|
231 if (tmp > 0) earMag[i] = log10(tmp);
|
c@255
|
232 else earMag[i] = 0.0;
|
c@255
|
233
|
c@255
|
234 if (logPower != 1.0) {
|
c@255
|
235 earMag[i] = pow(earMag[i], logPower);
|
c@255
|
236 }
|
c@251
|
237 }
|
c@251
|
238
|
c@251
|
239 /*
|
c@251
|
240 *
|
c@251
|
241 * Calculate now the cepstral coefficients
|
c@251
|
242 * with or without the DC component
|
c@251
|
243 *
|
c@251
|
244 */
|
c@251
|
245
|
c@255
|
246 if (WANT_C0 == 1) {
|
c@251
|
247
|
c@255
|
248 for (i = 0; i < nceps+1; i++) {
|
c@255
|
249 double tmp = 0.;
|
c@255
|
250 for (j = 0; j < totalFilters; j++){
|
c@255
|
251 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
|
c@251
|
252 }
|
c@251
|
253 outceps[i] = tmp;
|
c@251
|
254 }
|
c@251
|
255 }
|
c@251
|
256 else
|
c@251
|
257 {
|
c@255
|
258 for (i = 1; i < nceps+1; i++) {
|
c@255
|
259 double tmp = 0.;
|
c@255
|
260 for (j = 0; j < totalFilters; j++){
|
c@255
|
261 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
|
c@251
|
262 }
|
c@251
|
263 outceps[i-1] = tmp;
|
c@251
|
264 }
|
c@251
|
265 }
|
c@251
|
266
|
c@251
|
267 return nceps;
|
c@251
|
268 }
|
c@251
|
269
|