comparison dsp/mfcc/MFCC.cpp @ 251:c3600d3cfe5c

* Add timbral (MFCC) feature option to segmenter
author Chris Cannam <c.cannam@qmul.ac.uk>
date Thu, 10 Jan 2008 16:41:33 +0000
parents
children 52c1a295d775
comparison
equal deleted inserted replaced
250:a106e551e9a4 251:c3600d3cfe5c
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 /*
4 QM DSP Library
5
6 Centre for Digital Music, Queen Mary, University of London.
7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
8 All rights reserved.
9 */
10
11 #include <cmath>
12 #include <cstdlib>
13
14 #include "MFCC.h"
15 #include "dsp/transforms/FFT.h"
16 #include "base/Window.h"
17
18 MFCC::MFCC(MFCCConfig config)
19 {
20 int i,j;
21
22 /* Calculate at startup */
23 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
24
25 lowestFrequency = 66.6666666;
26 linearFilters = 13;
27 linearSpacing = 66.66666666;
28 logFilters = 27;
29 logSpacing = 1.0711703;
30
31 /* FFT and analysis window sizes */
32 fftSize = config.fftsize;
33
34 totalFilters = linearFilters + logFilters;
35
36 samplingRate = config.FS;
37
38 /* The number of cepstral componenents */
39 nceps = config.nceps;
40
41 /* Set if user want C0 */
42 WANT_C0 = (config.want_c0 ? 1 : 0);
43
44 /* Allocate space for feature vector */
45 if (WANT_C0 == 1) {
46 ceps = (double*)calloc(nceps+1, sizeof(double));
47 } else {
48 ceps = (double*)calloc(nceps, sizeof(double));
49 }
50
51 /* Allocate space for local vectors */
52 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
53 for (i=0;i<nceps+1; i++) {
54 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
55 }
56
57 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
58 for (i=0;i<totalFilters; i++) {
59 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
60 }
61
62 freqs = (double*)calloc(totalFilters+2,sizeof(double));
63
64 lower = (double*)calloc(totalFilters,sizeof(double));
65 center = (double*)calloc(totalFilters,sizeof(double));
66 upper = (double*)calloc(totalFilters,sizeof(double));
67
68 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
69 fftFreqs = (double*)calloc(fftSize,sizeof(double));
70
71 for (i=0;i<linearFilters;i++) {
72 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
73 }
74
75 for (i=linearFilters; i<totalFilters+2; i++) {
76 freqs[i] = freqs[linearFilters-1] *
77 pow(logSpacing, (double)(i-linearFilters+1));
78 }
79
80 /* Define lower, center and upper */
81 memcpy(lower, freqs,totalFilters*sizeof(double));
82 memcpy(center, &freqs[1],totalFilters*sizeof(double));
83 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
84
85 for (i=0;i<totalFilters;i++){
86 triangleHeight[i] = 2./(upper[i]-lower[i]);
87 }
88
89 for (i=0;i<fftSize;i++){
90 fftFreqs[i] = ((double) i / ((double) fftSize ) *
91 (double) samplingRate);
92 }
93
94 /* Build now the mccFilterWeight matrix */
95 for (i=0;i<totalFilters;i++){
96
97 for (j=0;j<fftSize;j++) {
98
99 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
100
101 mfccFilterWeights[i][j] = triangleHeight[i] *
102 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
103
104 }
105 else
106 {
107 mfccFilterWeights[i][j] = 0.0;
108 }
109
110 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
111
112 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j])
113 / (upper[i]-center[i]);
114 }
115 else
116 {
117 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
118 }
119 }
120
121 }
122
123 /*
124 * We calculate now mfccDCT matrix
125 * NB: +1 because of the DC component
126 */
127
128 for (i=0; i<nceps+1; i++) {
129 for (j=0; j<totalFilters; j++) {
130 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
131 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * M_PI);
132 }
133 }
134
135 for (j=0;j<totalFilters;j++){
136 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
137 }
138
139 /* The analysis window */
140 window = new Window<double>(HammingWindow, fftSize);
141
142 /* Allocate memory for the FFT */
143 imagIn = (double*)calloc(fftSize,sizeof(double));
144 realOut = (double*)calloc(fftSize,sizeof(double));
145 imagOut = (double*)calloc(fftSize,sizeof(double));
146
147 free(freqs);
148 free(lower);
149 free(center);
150 free(upper);
151 free(triangleHeight);
152 free(fftFreqs);
153 }
154
155 MFCC::~MFCC()
156 {
157 int i;
158
159 /* Free the structure */
160 for (i=0;i<nceps+1;i++) {
161 free(mfccDCTMatrix[i]);
162 }
163 free(mfccDCTMatrix);
164
165 for (i=0;i<totalFilters; i++) {
166 free(mfccFilterWeights[i]);
167 }
168 free(mfccFilterWeights);
169
170 /* Free the feature vector */
171 free(ceps);
172
173 /* The analysis window */
174 delete window;
175
176 /* Free the FFT */
177 free(imagIn);
178 free(realOut);
179 free(imagOut);
180 }
181
182
183 /*
184 *
185 * Extract the MFCC on the input frame
186 *
187 * looks like we have to have length = fftSize ??????
188 *
189 */
190 int MFCC::process(int length, double *inframe, double *outceps)
191 {
192 int i,j;
193
194 double *fftMag;
195 double *earMag;
196
197 double *inputData;
198
199 double tmp;
200
201 earMag = (double*)calloc(totalFilters, sizeof(double));
202 inputData = (double*)calloc(fftSize, sizeof(double));
203 fftMag = (double*)calloc(fftSize, sizeof(double));
204
205 /* Zero-pad if needed */
206 memcpy(inputData, inframe, length*sizeof(double));
207
208 window->cut(inputData);
209
210 /* Calculate the fft on the input frame */
211 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut);
212
213 for (i = 0; i < fftSize; ++i) {
214 fftMag[i] = sqrt(realOut[i] * realOut[i] +
215 imagOut[i] * imagOut[i]);
216 }
217
218 /* Multiply by mfccFilterWeights */
219 for (i=0;i<totalFilters;i++) {
220 tmp = 0.;
221 for(j=0;j<fftSize/2; j++) {
222 tmp = tmp + (mfccFilterWeights[i][j]*fftMag[j]);
223 }
224 if (tmp>0)
225 earMag[i] = log10(tmp);
226 else
227 earMag[i] = 0.0;
228 }
229
230 /*
231 *
232 * Calculate now the cepstral coefficients
233 * with or without the DC component
234 *
235 */
236
237 if (WANT_C0==1) {
238
239 for (i=0;i<nceps+1;i++) {
240 tmp = 0.;
241 for (j=0;j<totalFilters;j++){
242 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
243 }
244 outceps[i] = tmp;
245 }
246 }
247 else
248 {
249 for (i=1;i<nceps+1;i++) {
250 tmp = 0.;
251 for (j=0;j<totalFilters;j++){
252 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j];
253 }
254 outceps[i-1] = tmp;
255 }
256 }
257
258 free(fftMag);
259 free(earMag);
260 free(inputData);
261
262 return nceps;
263 }
264