Mercurial > hg > qm-dsp
comparison dsp/mfcc/MFCC.cpp @ 251:c3600d3cfe5c
* Add timbral (MFCC) feature option to segmenter
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Thu, 10 Jan 2008 16:41:33 +0000 |
parents | |
children | 52c1a295d775 |
comparison
equal
deleted
inserted
replaced
250:a106e551e9a4 | 251:c3600d3cfe5c |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 QM DSP Library | |
5 | |
6 Centre for Digital Music, Queen Mary, University of London. | |
7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL. | |
8 All rights reserved. | |
9 */ | |
10 | |
11 #include <cmath> | |
12 #include <cstdlib> | |
13 | |
14 #include "MFCC.h" | |
15 #include "dsp/transforms/FFT.h" | |
16 #include "base/Window.h" | |
17 | |
18 MFCC::MFCC(MFCCConfig config) | |
19 { | |
20 int i,j; | |
21 | |
22 /* Calculate at startup */ | |
23 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs; | |
24 | |
25 lowestFrequency = 66.6666666; | |
26 linearFilters = 13; | |
27 linearSpacing = 66.66666666; | |
28 logFilters = 27; | |
29 logSpacing = 1.0711703; | |
30 | |
31 /* FFT and analysis window sizes */ | |
32 fftSize = config.fftsize; | |
33 | |
34 totalFilters = linearFilters + logFilters; | |
35 | |
36 samplingRate = config.FS; | |
37 | |
38 /* The number of cepstral componenents */ | |
39 nceps = config.nceps; | |
40 | |
41 /* Set if user want C0 */ | |
42 WANT_C0 = (config.want_c0 ? 1 : 0); | |
43 | |
44 /* Allocate space for feature vector */ | |
45 if (WANT_C0 == 1) { | |
46 ceps = (double*)calloc(nceps+1, sizeof(double)); | |
47 } else { | |
48 ceps = (double*)calloc(nceps, sizeof(double)); | |
49 } | |
50 | |
51 /* Allocate space for local vectors */ | |
52 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*)); | |
53 for (i=0;i<nceps+1; i++) { | |
54 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double)); | |
55 } | |
56 | |
57 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*)); | |
58 for (i=0;i<totalFilters; i++) { | |
59 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double)); | |
60 } | |
61 | |
62 freqs = (double*)calloc(totalFilters+2,sizeof(double)); | |
63 | |
64 lower = (double*)calloc(totalFilters,sizeof(double)); | |
65 center = (double*)calloc(totalFilters,sizeof(double)); | |
66 upper = (double*)calloc(totalFilters,sizeof(double)); | |
67 | |
68 triangleHeight = (double*)calloc(totalFilters,sizeof(double)); | |
69 fftFreqs = (double*)calloc(fftSize,sizeof(double)); | |
70 | |
71 for (i=0;i<linearFilters;i++) { | |
72 freqs[i] = lowestFrequency + ((double)i) * linearSpacing; | |
73 } | |
74 | |
75 for (i=linearFilters; i<totalFilters+2; i++) { | |
76 freqs[i] = freqs[linearFilters-1] * | |
77 pow(logSpacing, (double)(i-linearFilters+1)); | |
78 } | |
79 | |
80 /* Define lower, center and upper */ | |
81 memcpy(lower, freqs,totalFilters*sizeof(double)); | |
82 memcpy(center, &freqs[1],totalFilters*sizeof(double)); | |
83 memcpy(upper, &freqs[2],totalFilters*sizeof(double)); | |
84 | |
85 for (i=0;i<totalFilters;i++){ | |
86 triangleHeight[i] = 2./(upper[i]-lower[i]); | |
87 } | |
88 | |
89 for (i=0;i<fftSize;i++){ | |
90 fftFreqs[i] = ((double) i / ((double) fftSize ) * | |
91 (double) samplingRate); | |
92 } | |
93 | |
94 /* Build now the mccFilterWeight matrix */ | |
95 for (i=0;i<totalFilters;i++){ | |
96 | |
97 for (j=0;j<fftSize;j++) { | |
98 | |
99 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) { | |
100 | |
101 mfccFilterWeights[i][j] = triangleHeight[i] * | |
102 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]); | |
103 | |
104 } | |
105 else | |
106 { | |
107 mfccFilterWeights[i][j] = 0.0; | |
108 } | |
109 | |
110 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) { | |
111 | |
112 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j]) | |
113 / (upper[i]-center[i]); | |
114 } | |
115 else | |
116 { | |
117 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0; | |
118 } | |
119 } | |
120 | |
121 } | |
122 | |
123 /* | |
124 * We calculate now mfccDCT matrix | |
125 * NB: +1 because of the DC component | |
126 */ | |
127 | |
128 for (i=0; i<nceps+1; i++) { | |
129 for (j=0; j<totalFilters; j++) { | |
130 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.)) | |
131 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * M_PI); | |
132 } | |
133 } | |
134 | |
135 for (j=0;j<totalFilters;j++){ | |
136 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j]; | |
137 } | |
138 | |
139 /* The analysis window */ | |
140 window = new Window<double>(HammingWindow, fftSize); | |
141 | |
142 /* Allocate memory for the FFT */ | |
143 imagIn = (double*)calloc(fftSize,sizeof(double)); | |
144 realOut = (double*)calloc(fftSize,sizeof(double)); | |
145 imagOut = (double*)calloc(fftSize,sizeof(double)); | |
146 | |
147 free(freqs); | |
148 free(lower); | |
149 free(center); | |
150 free(upper); | |
151 free(triangleHeight); | |
152 free(fftFreqs); | |
153 } | |
154 | |
155 MFCC::~MFCC() | |
156 { | |
157 int i; | |
158 | |
159 /* Free the structure */ | |
160 for (i=0;i<nceps+1;i++) { | |
161 free(mfccDCTMatrix[i]); | |
162 } | |
163 free(mfccDCTMatrix); | |
164 | |
165 for (i=0;i<totalFilters; i++) { | |
166 free(mfccFilterWeights[i]); | |
167 } | |
168 free(mfccFilterWeights); | |
169 | |
170 /* Free the feature vector */ | |
171 free(ceps); | |
172 | |
173 /* The analysis window */ | |
174 delete window; | |
175 | |
176 /* Free the FFT */ | |
177 free(imagIn); | |
178 free(realOut); | |
179 free(imagOut); | |
180 } | |
181 | |
182 | |
183 /* | |
184 * | |
185 * Extract the MFCC on the input frame | |
186 * | |
187 * looks like we have to have length = fftSize ?????? | |
188 * | |
189 */ | |
190 int MFCC::process(int length, double *inframe, double *outceps) | |
191 { | |
192 int i,j; | |
193 | |
194 double *fftMag; | |
195 double *earMag; | |
196 | |
197 double *inputData; | |
198 | |
199 double tmp; | |
200 | |
201 earMag = (double*)calloc(totalFilters, sizeof(double)); | |
202 inputData = (double*)calloc(fftSize, sizeof(double)); | |
203 fftMag = (double*)calloc(fftSize, sizeof(double)); | |
204 | |
205 /* Zero-pad if needed */ | |
206 memcpy(inputData, inframe, length*sizeof(double)); | |
207 | |
208 window->cut(inputData); | |
209 | |
210 /* Calculate the fft on the input frame */ | |
211 FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut); | |
212 | |
213 for (i = 0; i < fftSize; ++i) { | |
214 fftMag[i] = sqrt(realOut[i] * realOut[i] + | |
215 imagOut[i] * imagOut[i]); | |
216 } | |
217 | |
218 /* Multiply by mfccFilterWeights */ | |
219 for (i=0;i<totalFilters;i++) { | |
220 tmp = 0.; | |
221 for(j=0;j<fftSize/2; j++) { | |
222 tmp = tmp + (mfccFilterWeights[i][j]*fftMag[j]); | |
223 } | |
224 if (tmp>0) | |
225 earMag[i] = log10(tmp); | |
226 else | |
227 earMag[i] = 0.0; | |
228 } | |
229 | |
230 /* | |
231 * | |
232 * Calculate now the cepstral coefficients | |
233 * with or without the DC component | |
234 * | |
235 */ | |
236 | |
237 if (WANT_C0==1) { | |
238 | |
239 for (i=0;i<nceps+1;i++) { | |
240 tmp = 0.; | |
241 for (j=0;j<totalFilters;j++){ | |
242 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j]; | |
243 } | |
244 outceps[i] = tmp; | |
245 } | |
246 } | |
247 else | |
248 { | |
249 for (i=1;i<nceps+1;i++) { | |
250 tmp = 0.; | |
251 for (j=0;j<totalFilters;j++){ | |
252 tmp = tmp + mfccDCTMatrix[i][j]*earMag[j]; | |
253 } | |
254 outceps[i-1] = tmp; | |
255 } | |
256 } | |
257 | |
258 free(fftMag); | |
259 free(earMag); | |
260 free(inputData); | |
261 | |
262 return nceps; | |
263 } | |
264 |