annotate dsp/mfcc/MFCC.cpp @ 84:e5907ae6de17

* Add GPL and README; some tidying
author Chris Cannam
date Mon, 13 Dec 2010 14:55:28 +0000
parents 6cb2b3cd5356
children f6ccde089491
rev   line source
cannam@26 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@25 2
cannam@25 3 /*
cannam@26 4 QM DSP Library
cannam@25 5
cannam@26 6 Centre for Digital Music, Queen Mary, University of London.
cannam@26 7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
Chris@84 8
Chris@84 9 This program is free software; you can redistribute it and/or
Chris@84 10 modify it under the terms of the GNU General Public License as
Chris@84 11 published by the Free Software Foundation; either version 2 of the
Chris@84 12 License, or (at your option) any later version. See the file
Chris@84 13 COPYING included with this distribution for more information.
cannam@26 14 */
cannam@25 15
cannam@26 16 #include <cmath>
cannam@26 17 #include <cstdlib>
cannam@47 18 #include <cstring>
cannam@26 19
cannam@26 20 #include "MFCC.h"
cannam@26 21 #include "dsp/transforms/FFT.h"
cannam@26 22 #include "base/Window.h"
cannam@26 23
cannam@26 24 MFCC::MFCC(MFCCConfig config)
cannam@26 25 {
cannam@26 26 int i,j;
cannam@26 27
cannam@26 28 /* Calculate at startup */
cannam@26 29 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
cannam@25 30
cannam@26 31 lowestFrequency = 66.6666666;
cannam@26 32 linearFilters = 13;
cannam@26 33 linearSpacing = 66.66666666;
cannam@26 34 logFilters = 27;
cannam@26 35 logSpacing = 1.0711703;
cannam@25 36
cannam@26 37 /* FFT and analysis window sizes */
cannam@26 38 fftSize = config.fftsize;
cannam@64 39 fft = new FFTReal(fftSize);
cannam@26 40
cannam@26 41 totalFilters = linearFilters + logFilters;
cannam@30 42 logPower = config.logpower;
cannam@25 43
cannam@26 44 samplingRate = config.FS;
cannam@26 45
cannam@26 46 /* The number of cepstral componenents */
cannam@26 47 nceps = config.nceps;
cannam@25 48
cannam@26 49 /* Set if user want C0 */
cannam@26 50 WANT_C0 = (config.want_c0 ? 1 : 0);
cannam@25 51
cannam@26 52 /* Allocate space for feature vector */
cannam@26 53 if (WANT_C0 == 1) {
cannam@26 54 ceps = (double*)calloc(nceps+1, sizeof(double));
cannam@26 55 } else {
cannam@26 56 ceps = (double*)calloc(nceps, sizeof(double));
cannam@26 57 }
cannam@26 58
cannam@26 59 /* Allocate space for local vectors */
cannam@26 60 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
cannam@30 61 for (i = 0; i < nceps+1; i++) {
cannam@26 62 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
cannam@26 63 }
cannam@26 64
cannam@26 65 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
cannam@30 66 for (i = 0; i < totalFilters; i++) {
cannam@26 67 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
cannam@26 68 }
cannam@26 69
cannam@26 70 freqs = (double*)calloc(totalFilters+2,sizeof(double));
cannam@26 71
cannam@26 72 lower = (double*)calloc(totalFilters,sizeof(double));
cannam@26 73 center = (double*)calloc(totalFilters,sizeof(double));
cannam@26 74 upper = (double*)calloc(totalFilters,sizeof(double));
cannam@26 75
cannam@26 76 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
cannam@26 77 fftFreqs = (double*)calloc(fftSize,sizeof(double));
cannam@25 78
cannam@30 79 for (i = 0; i < linearFilters; i++) {
cannam@26 80 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
cannam@26 81 }
cannam@26 82
cannam@30 83 for (i = linearFilters; i < totalFilters+2; i++) {
cannam@26 84 freqs[i] = freqs[linearFilters-1] *
cannam@26 85 pow(logSpacing, (double)(i-linearFilters+1));
cannam@26 86 }
cannam@26 87
cannam@26 88 /* Define lower, center and upper */
cannam@26 89 memcpy(lower, freqs,totalFilters*sizeof(double));
cannam@26 90 memcpy(center, &freqs[1],totalFilters*sizeof(double));
cannam@26 91 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
cannam@26 92
cannam@26 93 for (i=0;i<totalFilters;i++){
cannam@26 94 triangleHeight[i] = 2./(upper[i]-lower[i]);
cannam@26 95 }
cannam@26 96
cannam@26 97 for (i=0;i<fftSize;i++){
cannam@26 98 fftFreqs[i] = ((double) i / ((double) fftSize ) *
cannam@26 99 (double) samplingRate);
cannam@26 100 }
cannam@25 101
cannam@26 102 /* Build now the mccFilterWeight matrix */
cannam@26 103 for (i=0;i<totalFilters;i++){
cannam@25 104
cannam@26 105 for (j=0;j<fftSize;j++) {
cannam@26 106
cannam@26 107 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
cannam@26 108
cannam@26 109 mfccFilterWeights[i][j] = triangleHeight[i] *
cannam@26 110 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
cannam@26 111
cannam@26 112 }
cannam@26 113 else
cannam@26 114 {
cannam@26 115 mfccFilterWeights[i][j] = 0.0;
cannam@26 116 }
cannam@25 117
cannam@26 118 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
cannam@25 119
cannam@30 120 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
cannam@30 121 + triangleHeight[i] * (upper[i]-fftFreqs[j])
cannam@26 122 / (upper[i]-center[i]);
cannam@26 123 }
cannam@26 124 else
cannam@26 125 {
cannam@26 126 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
cannam@26 127 }
cannam@25 128 }
cannam@25 129
cannam@26 130 }
cannam@25 131
cannam@26 132 /*
cannam@26 133 * We calculate now mfccDCT matrix
cannam@26 134 * NB: +1 because of the DC component
cannam@26 135 */
cannam@29 136
cannam@29 137 const double pi = 3.14159265358979323846264338327950288;
cannam@26 138
cannam@30 139 for (i = 0; i < nceps+1; i++) {
cannam@30 140 for (j = 0; j < totalFilters; j++) {
cannam@26 141 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
cannam@29 142 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
cannam@25 143 }
cannam@25 144 }
cannam@25 145
cannam@30 146 for (j = 0; j < totalFilters; j++){
cannam@30 147 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
cannam@26 148 }
cannam@26 149
cannam@26 150 /* The analysis window */
cannam@32 151 window = new Window<double>(config.window, fftSize);
cannam@25 152
cannam@26 153 /* Allocate memory for the FFT */
cannam@30 154 realOut = (double*)calloc(fftSize, sizeof(double));
cannam@30 155 imagOut = (double*)calloc(fftSize, sizeof(double));
cannam@30 156
cannam@30 157 earMag = (double*)calloc(totalFilters, sizeof(double));
cannam@30 158 fftMag = (double*)calloc(fftSize/2, sizeof(double));
cannam@25 159
cannam@26 160 free(freqs);
cannam@26 161 free(lower);
cannam@26 162 free(center);
cannam@26 163 free(upper);
cannam@26 164 free(triangleHeight);
cannam@26 165 free(fftFreqs);
cannam@25 166 }
cannam@25 167
cannam@26 168 MFCC::~MFCC()
cannam@26 169 {
cannam@26 170 int i;
cannam@26 171
cannam@26 172 /* Free the structure */
cannam@30 173 for (i = 0; i < nceps+1; i++) {
cannam@26 174 free(mfccDCTMatrix[i]);
cannam@26 175 }
cannam@26 176 free(mfccDCTMatrix);
cannam@26 177
cannam@30 178 for (i = 0; i < totalFilters; i++) {
cannam@26 179 free(mfccFilterWeights[i]);
cannam@26 180 }
cannam@26 181 free(mfccFilterWeights);
cannam@26 182
cannam@26 183 /* Free the feature vector */
cannam@26 184 free(ceps);
cannam@26 185
cannam@26 186 /* The analysis window */
cannam@26 187 delete window;
cannam@30 188
cannam@30 189 free(earMag);
cannam@30 190 free(fftMag);
cannam@26 191
cannam@26 192 /* Free the FFT */
cannam@26 193 free(realOut);
cannam@26 194 free(imagOut);
cannam@64 195
cannam@64 196 delete fft;
cannam@26 197 }
cannam@25 198
cannam@25 199
cannam@25 200 /*
cannam@25 201 *
cannam@25 202 * Extract the MFCC on the input frame
cannam@25 203 *
cannam@25 204 */
cannam@30 205 int MFCC::process(const double *inframe, double *outceps)
cannam@26 206 {
cannam@30 207 double *inputData = (double *)malloc(fftSize * sizeof(double));
cannam@30 208 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
cannam@25 209
cannam@30 210 window->cut(inputData);
cannam@26 211
cannam@26 212 /* Calculate the fft on the input frame */
cannam@64 213 fft->process(0, inputData, realOut, imagOut);
cannam@25 214
cannam@30 215 free(inputData);
cannam@30 216
cannam@30 217 return process(realOut, imagOut, outceps);
cannam@30 218 }
cannam@30 219
cannam@30 220 int MFCC::process(const double *real, const double *imag, double *outceps)
cannam@30 221 {
cannam@30 222 int i, j;
cannam@30 223
cannam@30 224 for (i = 0; i < fftSize/2; ++i) {
cannam@30 225 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
cannam@30 226 }
cannam@30 227
cannam@30 228 for (i = 0; i < totalFilters; ++i) {
cannam@30 229 earMag[i] = 0.0;
cannam@25 230 }
cannam@25 231
cannam@26 232 /* Multiply by mfccFilterWeights */
cannam@30 233 for (i = 0; i < totalFilters; i++) {
cannam@30 234 double tmp = 0.0;
cannam@30 235 for (j = 0; j < fftSize/2; j++) {
cannam@30 236 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
cannam@26 237 }
cannam@30 238 if (tmp > 0) earMag[i] = log10(tmp);
cannam@30 239 else earMag[i] = 0.0;
cannam@30 240
cannam@30 241 if (logPower != 1.0) {
cannam@30 242 earMag[i] = pow(earMag[i], logPower);
cannam@30 243 }
cannam@26 244 }
cannam@26 245
cannam@26 246 /*
cannam@26 247 *
cannam@26 248 * Calculate now the cepstral coefficients
cannam@26 249 * with or without the DC component
cannam@26 250 *
cannam@26 251 */
cannam@26 252
cannam@30 253 if (WANT_C0 == 1) {
cannam@26 254
cannam@30 255 for (i = 0; i < nceps+1; i++) {
cannam@30 256 double tmp = 0.;
cannam@30 257 for (j = 0; j < totalFilters; j++){
cannam@30 258 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
cannam@26 259 }
cannam@26 260 outceps[i] = tmp;
cannam@26 261 }
cannam@26 262 }
cannam@25 263 else
cannam@26 264 {
cannam@30 265 for (i = 1; i < nceps+1; i++) {
cannam@30 266 double tmp = 0.;
cannam@30 267 for (j = 0; j < totalFilters; j++){
cannam@30 268 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
cannam@26 269 }
cannam@26 270 outceps[i-1] = tmp;
cannam@25 271 }
cannam@26 272 }
cannam@25 273
cannam@26 274 return nceps;
cannam@25 275 }
cannam@25 276