MFCC.cpp
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2 
3 /*
4  QM DSP Library
5 
6  Centre for Digital Music, Queen Mary, University of London.
7  This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
8 
9  This program is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 2 of the
12  License, or (at your option) any later version. See the file
13  COPYING included with this distribution for more information.
14 */
15 
16 #include <cmath>
17 #include <cstdlib>
18 #include <cstring>
19 
20 #include "MFCC.h"
21 #include "dsp/transforms/FFT.h"
22 #include "base/Window.h"
23 
25 {
26  int i,j;
27 
28  /* Calculate at startup */
29  double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
30 
31  lowestFrequency = 66.6666666;
32  linearFilters = 13;
33  linearSpacing = 66.66666666;
34  logFilters = 27;
35  logSpacing = 1.0711703;
36 
37  /* FFT and analysis window sizes */
38  fftSize = config.fftsize;
39  fft = new FFTReal(fftSize);
40 
42  logPower = config.logpower;
43 
44  samplingRate = config.FS;
45 
46  /* The number of cepstral componenents */
47  nceps = config.nceps;
48 
49  /* Set if user want C0 */
50  WANT_C0 = (config.want_c0 ? 1 : 0);
51 
52  /* Allocate space for feature vector */
53  if (WANT_C0 == 1) {
54  ceps = (double*)calloc(nceps+1, sizeof(double));
55  } else {
56  ceps = (double*)calloc(nceps, sizeof(double));
57  }
58 
59  /* Allocate space for local vectors */
60  mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
61  for (i = 0; i < nceps+1; i++) {
62  mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
63  }
64 
65  mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
66  for (i = 0; i < totalFilters; i++) {
67  mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
68  }
69 
70  freqs = (double*)calloc(totalFilters+2,sizeof(double));
71 
72  lower = (double*)calloc(totalFilters,sizeof(double));
73  center = (double*)calloc(totalFilters,sizeof(double));
74  upper = (double*)calloc(totalFilters,sizeof(double));
75 
76  triangleHeight = (double*)calloc(totalFilters,sizeof(double));
77  fftFreqs = (double*)calloc(fftSize,sizeof(double));
78 
79  for (i = 0; i < linearFilters; i++) {
80  freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
81  }
82 
83  for (i = linearFilters; i < totalFilters+2; i++) {
84  freqs[i] = freqs[linearFilters-1] *
85  pow(logSpacing, (double)(i-linearFilters+1));
86  }
87 
88  /* Define lower, center and upper */
89  memcpy(lower, freqs,totalFilters*sizeof(double));
90  memcpy(center, &freqs[1],totalFilters*sizeof(double));
91  memcpy(upper, &freqs[2],totalFilters*sizeof(double));
92 
93  for (i=0;i<totalFilters;i++){
94  triangleHeight[i] = 2./(upper[i]-lower[i]);
95  }
96 
97  for (i=0;i<fftSize;i++){
98  fftFreqs[i] = ((double) i / ((double) fftSize ) *
99  (double) samplingRate);
100  }
101 
102  /* Build now the mccFilterWeight matrix */
103  for (i=0;i<totalFilters;i++){
104 
105  for (j=0;j<fftSize;j++) {
106 
107  if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
108 
109  mfccFilterWeights[i][j] = triangleHeight[i] *
110  (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
111 
112  }
113  else
114  {
115  mfccFilterWeights[i][j] = 0.0;
116  }
117 
118  if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
119 
121  + triangleHeight[i] * (upper[i]-fftFreqs[j])
122  / (upper[i]-center[i]);
123  }
124  else
125  {
126  mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
127  }
128  }
129 
130  }
131 
132  /*
133  * We calculate now mfccDCT matrix
134  * NB: +1 because of the DC component
135  */
136 
137  const double pi = 3.14159265358979323846264338327950288;
138 
139  for (i = 0; i < nceps+1; i++) {
140  for (j = 0; j < totalFilters; j++) {
141  mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
142  * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
143  }
144  }
145 
146  for (j = 0; j < totalFilters; j++){
147  mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
148  }
149 
150  /* The analysis window */
151  window = new Window<double>(config.window, fftSize);
152 
153  /* Allocate memory for the FFT */
154  realOut = (double*)calloc(fftSize, sizeof(double));
155  imagOut = (double*)calloc(fftSize, sizeof(double));
156 
157  earMag = (double*)calloc(totalFilters, sizeof(double));
158  fftMag = (double*)calloc(fftSize/2, sizeof(double));
159 
160  free(freqs);
161  free(lower);
162  free(center);
163  free(upper);
164  free(triangleHeight);
165  free(fftFreqs);
166 }
167 
169 {
170  int i;
171 
172  /* Free the structure */
173  for (i = 0; i < nceps+1; i++) {
174  free(mfccDCTMatrix[i]);
175  }
176  free(mfccDCTMatrix);
177 
178  for (i = 0; i < totalFilters; i++) {
179  free(mfccFilterWeights[i]);
180  }
181  free(mfccFilterWeights);
182 
183  /* Free the feature vector */
184  free(ceps);
185 
186  /* The analysis window */
187  delete window;
188 
189  free(earMag);
190  free(fftMag);
191 
192  /* Free the FFT */
193  free(realOut);
194  free(imagOut);
195 
196  delete fft;
197 }
198 
199 
200 /*
201  *
202  * Extract the MFCC on the input frame
203  *
204  */
205 int MFCC::process(const double *inframe, double *outceps)
206 {
207  double *inputData = (double *)malloc(fftSize * sizeof(double));
208  for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
209 
210  window->cut(inputData);
211 
212  /* Calculate the fft on the input frame */
213  fft->forward(inputData, realOut, imagOut);
214 
215  free(inputData);
216 
217  return process(realOut, imagOut, outceps);
218 }
219 
220 int MFCC::process(const double *real, const double *imag, double *outceps)
221 {
222  int i, j;
223 
224  for (i = 0; i < fftSize/2; ++i) {
225  fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
226  }
227 
228  for (i = 0; i < totalFilters; ++i) {
229  earMag[i] = 0.0;
230  }
231 
232  /* Multiply by mfccFilterWeights */
233  for (i = 0; i < totalFilters; i++) {
234  double tmp = 0.0;
235  for (j = 0; j < fftSize/2; j++) {
236  tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
237  }
238  if (tmp > 0) earMag[i] = log10(tmp);
239  else earMag[i] = 0.0;
240 
241  if (logPower != 1.0) {
242  earMag[i] = pow(earMag[i], logPower);
243  }
244  }
245 
246  /*
247  *
248  * Calculate now the cepstral coefficients
249  * with or without the DC component
250  *
251  */
252 
253  if (WANT_C0 == 1) {
254 
255  for (i = 0; i < nceps+1; i++) {
256  double tmp = 0.;
257  for (j = 0; j < totalFilters; j++){
258  tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
259  }
260  outceps[i] = tmp;
261  }
262  } else {
263  for (i = 1; i < nceps+1; i++) {
264  double tmp = 0.;
265  for (j = 0; j < totalFilters; j++){
266  tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
267  }
268  outceps[i-1] = tmp;
269  }
270  }
271 
272  return nceps;
273 }
274 
int fftsize
Definition: MFCC.h:25
MFCC(MFCCConfig config)
Definition: MFCC.cpp:24
int FS
Definition: MFCC.h:24
double ** mfccFilterWeights
Definition: MFCC.h:80
int nceps
Definition: MFCC.h:74
int process(const double *inframe, double *outceps)
Process time-domain input data.
Definition: MFCC.cpp:205
double logpower
Definition: MFCC.h:27
double * ceps
Definition: MFCC.h:77
int logFilters
Definition: MFCC.h:63
int nceps
Definition: MFCC.h:26
virtual ~MFCC()
Definition: MFCC.cpp:168
double * realOut
Definition: MFCC.h:86
double logPower
Definition: MFCC.h:70
double ** mfccDCTMatrix
Definition: MFCC.h:79
void forward(const double *realIn, double *realOut, double *imagOut)
Carry out a forward real-to-complex transform of size nsamples, where nsamples is the value provided ...
Definition: FFT.cpp:184
double linearSpacing
Definition: MFCC.h:62
int fftSize
Definition: MFCC.h:67
int linearFilters
Definition: MFCC.h:61
void cut(T *src) const
Definition: Window.h:61
int totalFilters
Definition: MFCC.h:69
int samplingRate
Definition: MFCC.h:73
double lowestFrequency
Definition: MFCC.h:60
Definition: FFT.h:52
double * fftMag
Definition: MFCC.h:88
double logSpacing
Definition: MFCC.h:64
double * imagOut
Definition: MFCC.h:87
int WANT_C0
Definition: MFCC.h:93
bool want_c0
Definition: MFCC.h:28
WindowType window
Definition: MFCC.h:29
Window< double > * window
Definition: MFCC.h:83
double * earMag
Definition: MFCC.h:89
FFTReal * fft
Definition: MFCC.h:90