cannam@26
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
cannam@25
|
2
|
cannam@25
|
3 /*
|
cannam@26
|
4 QM DSP Library
|
cannam@25
|
5
|
cannam@26
|
6 Centre for Digital Music, Queen Mary, University of London.
|
cannam@26
|
7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
|
cannam@26
|
8 All rights reserved.
|
cannam@26
|
9 */
|
cannam@25
|
10
|
cannam@26
|
11 #include <cmath>
|
cannam@26
|
12 #include <cstdlib>
|
cannam@47
|
13 #include <cstring>
|
cannam@26
|
14
|
cannam@26
|
15 #include "MFCC.h"
|
cannam@26
|
16 #include "dsp/transforms/FFT.h"
|
cannam@26
|
17 #include "base/Window.h"
|
cannam@26
|
18
|
cannam@26
|
19 MFCC::MFCC(MFCCConfig config)
|
cannam@26
|
20 {
|
cannam@26
|
21 int i,j;
|
cannam@26
|
22
|
cannam@26
|
23 /* Calculate at startup */
|
cannam@26
|
24 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
|
cannam@25
|
25
|
cannam@26
|
26 lowestFrequency = 66.6666666;
|
cannam@26
|
27 linearFilters = 13;
|
cannam@26
|
28 linearSpacing = 66.66666666;
|
cannam@26
|
29 logFilters = 27;
|
cannam@26
|
30 logSpacing = 1.0711703;
|
cannam@25
|
31
|
cannam@26
|
32 /* FFT and analysis window sizes */
|
cannam@26
|
33 fftSize = config.fftsize;
|
cannam@26
|
34
|
cannam@26
|
35 totalFilters = linearFilters + logFilters;
|
cannam@30
|
36 logPower = config.logpower;
|
cannam@25
|
37
|
cannam@26
|
38 samplingRate = config.FS;
|
cannam@26
|
39
|
cannam@26
|
40 /* The number of cepstral componenents */
|
cannam@26
|
41 nceps = config.nceps;
|
cannam@25
|
42
|
cannam@26
|
43 /* Set if user want C0 */
|
cannam@26
|
44 WANT_C0 = (config.want_c0 ? 1 : 0);
|
cannam@25
|
45
|
cannam@26
|
46 /* Allocate space for feature vector */
|
cannam@26
|
47 if (WANT_C0 == 1) {
|
cannam@26
|
48 ceps = (double*)calloc(nceps+1, sizeof(double));
|
cannam@26
|
49 } else {
|
cannam@26
|
50 ceps = (double*)calloc(nceps, sizeof(double));
|
cannam@26
|
51 }
|
cannam@26
|
52
|
cannam@26
|
53 /* Allocate space for local vectors */
|
cannam@26
|
54 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
|
cannam@30
|
55 for (i = 0; i < nceps+1; i++) {
|
cannam@26
|
56 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
|
cannam@26
|
57 }
|
cannam@26
|
58
|
cannam@26
|
59 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
|
cannam@30
|
60 for (i = 0; i < totalFilters; i++) {
|
cannam@26
|
61 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
|
cannam@26
|
62 }
|
cannam@26
|
63
|
cannam@26
|
64 freqs = (double*)calloc(totalFilters+2,sizeof(double));
|
cannam@26
|
65
|
cannam@26
|
66 lower = (double*)calloc(totalFilters,sizeof(double));
|
cannam@26
|
67 center = (double*)calloc(totalFilters,sizeof(double));
|
cannam@26
|
68 upper = (double*)calloc(totalFilters,sizeof(double));
|
cannam@26
|
69
|
cannam@26
|
70 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
|
cannam@26
|
71 fftFreqs = (double*)calloc(fftSize,sizeof(double));
|
cannam@25
|
72
|
cannam@30
|
73 for (i = 0; i < linearFilters; i++) {
|
cannam@26
|
74 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
|
cannam@26
|
75 }
|
cannam@26
|
76
|
cannam@30
|
77 for (i = linearFilters; i < totalFilters+2; i++) {
|
cannam@26
|
78 freqs[i] = freqs[linearFilters-1] *
|
cannam@26
|
79 pow(logSpacing, (double)(i-linearFilters+1));
|
cannam@26
|
80 }
|
cannam@26
|
81
|
cannam@26
|
82 /* Define lower, center and upper */
|
cannam@26
|
83 memcpy(lower, freqs,totalFilters*sizeof(double));
|
cannam@26
|
84 memcpy(center, &freqs[1],totalFilters*sizeof(double));
|
cannam@26
|
85 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
|
cannam@26
|
86
|
cannam@26
|
87 for (i=0;i<totalFilters;i++){
|
cannam@26
|
88 triangleHeight[i] = 2./(upper[i]-lower[i]);
|
cannam@26
|
89 }
|
cannam@26
|
90
|
cannam@26
|
91 for (i=0;i<fftSize;i++){
|
cannam@26
|
92 fftFreqs[i] = ((double) i / ((double) fftSize ) *
|
cannam@26
|
93 (double) samplingRate);
|
cannam@26
|
94 }
|
cannam@25
|
95
|
cannam@26
|
96 /* Build now the mccFilterWeight matrix */
|
cannam@26
|
97 for (i=0;i<totalFilters;i++){
|
cannam@25
|
98
|
cannam@26
|
99 for (j=0;j<fftSize;j++) {
|
cannam@26
|
100
|
cannam@26
|
101 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
|
cannam@26
|
102
|
cannam@26
|
103 mfccFilterWeights[i][j] = triangleHeight[i] *
|
cannam@26
|
104 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
|
cannam@26
|
105
|
cannam@26
|
106 }
|
cannam@26
|
107 else
|
cannam@26
|
108 {
|
cannam@26
|
109 mfccFilterWeights[i][j] = 0.0;
|
cannam@26
|
110 }
|
cannam@25
|
111
|
cannam@26
|
112 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
|
cannam@25
|
113
|
cannam@30
|
114 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
|
cannam@30
|
115 + triangleHeight[i] * (upper[i]-fftFreqs[j])
|
cannam@26
|
116 / (upper[i]-center[i]);
|
cannam@26
|
117 }
|
cannam@26
|
118 else
|
cannam@26
|
119 {
|
cannam@26
|
120 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
|
cannam@26
|
121 }
|
cannam@25
|
122 }
|
cannam@25
|
123
|
cannam@26
|
124 }
|
cannam@25
|
125
|
cannam@26
|
126 /*
|
cannam@26
|
127 * We calculate now mfccDCT matrix
|
cannam@26
|
128 * NB: +1 because of the DC component
|
cannam@26
|
129 */
|
cannam@29
|
130
|
cannam@29
|
131 const double pi = 3.14159265358979323846264338327950288;
|
cannam@26
|
132
|
cannam@30
|
133 for (i = 0; i < nceps+1; i++) {
|
cannam@30
|
134 for (j = 0; j < totalFilters; j++) {
|
cannam@26
|
135 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
|
cannam@29
|
136 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
|
cannam@25
|
137 }
|
cannam@25
|
138 }
|
cannam@25
|
139
|
cannam@30
|
140 for (j = 0; j < totalFilters; j++){
|
cannam@30
|
141 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
|
cannam@26
|
142 }
|
cannam@26
|
143
|
cannam@26
|
144 /* The analysis window */
|
cannam@32
|
145 window = new Window<double>(config.window, fftSize);
|
cannam@25
|
146
|
cannam@26
|
147 /* Allocate memory for the FFT */
|
cannam@30
|
148 imagIn = (double*)calloc(fftSize, sizeof(double));
|
cannam@30
|
149 realOut = (double*)calloc(fftSize, sizeof(double));
|
cannam@30
|
150 imagOut = (double*)calloc(fftSize, sizeof(double));
|
cannam@30
|
151
|
cannam@30
|
152 earMag = (double*)calloc(totalFilters, sizeof(double));
|
cannam@30
|
153 fftMag = (double*)calloc(fftSize/2, sizeof(double));
|
cannam@25
|
154
|
cannam@26
|
155 free(freqs);
|
cannam@26
|
156 free(lower);
|
cannam@26
|
157 free(center);
|
cannam@26
|
158 free(upper);
|
cannam@26
|
159 free(triangleHeight);
|
cannam@26
|
160 free(fftFreqs);
|
cannam@25
|
161 }
|
cannam@25
|
162
|
cannam@26
|
163 MFCC::~MFCC()
|
cannam@26
|
164 {
|
cannam@26
|
165 int i;
|
cannam@26
|
166
|
cannam@26
|
167 /* Free the structure */
|
cannam@30
|
168 for (i = 0; i < nceps+1; i++) {
|
cannam@26
|
169 free(mfccDCTMatrix[i]);
|
cannam@26
|
170 }
|
cannam@26
|
171 free(mfccDCTMatrix);
|
cannam@26
|
172
|
cannam@30
|
173 for (i = 0; i < totalFilters; i++) {
|
cannam@26
|
174 free(mfccFilterWeights[i]);
|
cannam@26
|
175 }
|
cannam@26
|
176 free(mfccFilterWeights);
|
cannam@26
|
177
|
cannam@26
|
178 /* Free the feature vector */
|
cannam@26
|
179 free(ceps);
|
cannam@26
|
180
|
cannam@26
|
181 /* The analysis window */
|
cannam@26
|
182 delete window;
|
cannam@30
|
183
|
cannam@30
|
184 free(earMag);
|
cannam@30
|
185 free(fftMag);
|
cannam@26
|
186
|
cannam@26
|
187 /* Free the FFT */
|
cannam@26
|
188 free(imagIn);
|
cannam@26
|
189 free(realOut);
|
cannam@26
|
190 free(imagOut);
|
cannam@26
|
191 }
|
cannam@25
|
192
|
cannam@25
|
193
|
cannam@25
|
194 /*
|
cannam@25
|
195 *
|
cannam@25
|
196 * Extract the MFCC on the input frame
|
cannam@25
|
197 *
|
cannam@25
|
198 */
|
cannam@30
|
199 int MFCC::process(const double *inframe, double *outceps)
|
cannam@26
|
200 {
|
cannam@30
|
201 double *inputData = (double *)malloc(fftSize * sizeof(double));
|
cannam@30
|
202 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
|
cannam@25
|
203
|
cannam@30
|
204 window->cut(inputData);
|
cannam@26
|
205
|
cannam@26
|
206 /* Calculate the fft on the input frame */
|
cannam@26
|
207 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut);
|
cannam@25
|
208
|
cannam@30
|
209 free(inputData);
|
cannam@30
|
210
|
cannam@30
|
211 return process(realOut, imagOut, outceps);
|
cannam@30
|
212 }
|
cannam@30
|
213
|
cannam@30
|
214 int MFCC::process(const double *real, const double *imag, double *outceps)
|
cannam@30
|
215 {
|
cannam@30
|
216 int i, j;
|
cannam@30
|
217
|
cannam@30
|
218 for (i = 0; i < fftSize/2; ++i) {
|
cannam@30
|
219 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
|
cannam@30
|
220 }
|
cannam@30
|
221
|
cannam@30
|
222 for (i = 0; i < totalFilters; ++i) {
|
cannam@30
|
223 earMag[i] = 0.0;
|
cannam@25
|
224 }
|
cannam@25
|
225
|
cannam@26
|
226 /* Multiply by mfccFilterWeights */
|
cannam@30
|
227 for (i = 0; i < totalFilters; i++) {
|
cannam@30
|
228 double tmp = 0.0;
|
cannam@30
|
229 for (j = 0; j < fftSize/2; j++) {
|
cannam@30
|
230 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
|
cannam@26
|
231 }
|
cannam@30
|
232 if (tmp > 0) earMag[i] = log10(tmp);
|
cannam@30
|
233 else earMag[i] = 0.0;
|
cannam@30
|
234
|
cannam@30
|
235 if (logPower != 1.0) {
|
cannam@30
|
236 earMag[i] = pow(earMag[i], logPower);
|
cannam@30
|
237 }
|
cannam@26
|
238 }
|
cannam@26
|
239
|
cannam@26
|
240 /*
|
cannam@26
|
241 *
|
cannam@26
|
242 * Calculate now the cepstral coefficients
|
cannam@26
|
243 * with or without the DC component
|
cannam@26
|
244 *
|
cannam@26
|
245 */
|
cannam@26
|
246
|
cannam@30
|
247 if (WANT_C0 == 1) {
|
cannam@26
|
248
|
cannam@30
|
249 for (i = 0; i < nceps+1; i++) {
|
cannam@30
|
250 double tmp = 0.;
|
cannam@30
|
251 for (j = 0; j < totalFilters; j++){
|
cannam@30
|
252 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
|
cannam@26
|
253 }
|
cannam@26
|
254 outceps[i] = tmp;
|
cannam@26
|
255 }
|
cannam@26
|
256 }
|
cannam@25
|
257 else
|
cannam@26
|
258 {
|
cannam@30
|
259 for (i = 1; i < nceps+1; i++) {
|
cannam@30
|
260 double tmp = 0.;
|
cannam@30
|
261 for (j = 0; j < totalFilters; j++){
|
cannam@30
|
262 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
|
cannam@26
|
263 }
|
cannam@26
|
264 outceps[i-1] = tmp;
|
cannam@25
|
265 }
|
cannam@26
|
266 }
|
cannam@25
|
267
|
cannam@26
|
268 return nceps;
|
cannam@25
|
269 }
|
cannam@25
|
270
|