annotate dsp/mfcc/MFCC.cpp @ 298:255e431ae3d4

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 05 Jun 2009 15:12:39 +0000
parents befe5aa6b450
children d5014ab8b0e5
rev   line source
c@251 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@251 2
c@251 3 /*
c@251 4 QM DSP Library
c@251 5
c@251 6 Centre for Digital Music, Queen Mary, University of London.
c@251 7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
c@251 8 All rights reserved.
c@251 9 */
c@251 10
c@251 11 #include <cmath>
c@251 12 #include <cstdlib>
c@272 13 #include <cstring>
c@251 14
c@251 15 #include "MFCC.h"
c@251 16 #include "dsp/transforms/FFT.h"
c@251 17 #include "base/Window.h"
c@251 18
c@251 19 MFCC::MFCC(MFCCConfig config)
c@251 20 {
c@251 21 int i,j;
c@251 22
c@251 23 /* Calculate at startup */
c@251 24 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
c@251 25
c@251 26 lowestFrequency = 66.6666666;
c@251 27 linearFilters = 13;
c@251 28 linearSpacing = 66.66666666;
c@251 29 logFilters = 27;
c@251 30 logSpacing = 1.0711703;
c@251 31
c@251 32 /* FFT and analysis window sizes */
c@251 33 fftSize = config.fftsize;
c@289 34 fft = new FFTReal(fftSize);
c@251 35
c@251 36 totalFilters = linearFilters + logFilters;
c@255 37 logPower = config.logpower;
c@251 38
c@251 39 samplingRate = config.FS;
c@251 40
c@251 41 /* The number of cepstral componenents */
c@251 42 nceps = config.nceps;
c@251 43
c@251 44 /* Set if user want C0 */
c@251 45 WANT_C0 = (config.want_c0 ? 1 : 0);
c@251 46
c@251 47 /* Allocate space for feature vector */
c@251 48 if (WANT_C0 == 1) {
c@251 49 ceps = (double*)calloc(nceps+1, sizeof(double));
c@251 50 } else {
c@251 51 ceps = (double*)calloc(nceps, sizeof(double));
c@251 52 }
c@251 53
c@251 54 /* Allocate space for local vectors */
c@251 55 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
c@255 56 for (i = 0; i < nceps+1; i++) {
c@251 57 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
c@251 58 }
c@251 59
c@251 60 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
c@255 61 for (i = 0; i < totalFilters; i++) {
c@251 62 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
c@251 63 }
c@251 64
c@251 65 freqs = (double*)calloc(totalFilters+2,sizeof(double));
c@251 66
c@251 67 lower = (double*)calloc(totalFilters,sizeof(double));
c@251 68 center = (double*)calloc(totalFilters,sizeof(double));
c@251 69 upper = (double*)calloc(totalFilters,sizeof(double));
c@251 70
c@251 71 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
c@251 72 fftFreqs = (double*)calloc(fftSize,sizeof(double));
c@251 73
c@255 74 for (i = 0; i < linearFilters; i++) {
c@251 75 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
c@251 76 }
c@251 77
c@255 78 for (i = linearFilters; i < totalFilters+2; i++) {
c@251 79 freqs[i] = freqs[linearFilters-1] *
c@251 80 pow(logSpacing, (double)(i-linearFilters+1));
c@251 81 }
c@251 82
c@251 83 /* Define lower, center and upper */
c@251 84 memcpy(lower, freqs,totalFilters*sizeof(double));
c@251 85 memcpy(center, &freqs[1],totalFilters*sizeof(double));
c@251 86 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
c@251 87
c@251 88 for (i=0;i<totalFilters;i++){
c@251 89 triangleHeight[i] = 2./(upper[i]-lower[i]);
c@251 90 }
c@251 91
c@251 92 for (i=0;i<fftSize;i++){
c@251 93 fftFreqs[i] = ((double) i / ((double) fftSize ) *
c@251 94 (double) samplingRate);
c@251 95 }
c@251 96
c@251 97 /* Build now the mccFilterWeight matrix */
c@251 98 for (i=0;i<totalFilters;i++){
c@251 99
c@251 100 for (j=0;j<fftSize;j++) {
c@251 101
c@251 102 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
c@251 103
c@251 104 mfccFilterWeights[i][j] = triangleHeight[i] *
c@251 105 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
c@251 106
c@251 107 }
c@251 108 else
c@251 109 {
c@251 110 mfccFilterWeights[i][j] = 0.0;
c@251 111 }
c@251 112
c@251 113 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
c@251 114
c@255 115 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
c@255 116 + triangleHeight[i] * (upper[i]-fftFreqs[j])
c@251 117 / (upper[i]-center[i]);
c@251 118 }
c@251 119 else
c@251 120 {
c@251 121 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
c@251 122 }
c@251 123 }
c@251 124
c@251 125 }
c@251 126
c@251 127 /*
c@251 128 * We calculate now mfccDCT matrix
c@251 129 * NB: +1 because of the DC component
c@251 130 */
c@254 131
c@254 132 const double pi = 3.14159265358979323846264338327950288;
c@251 133
c@255 134 for (i = 0; i < nceps+1; i++) {
c@255 135 for (j = 0; j < totalFilters; j++) {
c@251 136 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
c@254 137 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
c@251 138 }
c@251 139 }
c@251 140
c@255 141 for (j = 0; j < totalFilters; j++){
c@255 142 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
c@251 143 }
c@251 144
c@251 145 /* The analysis window */
c@257 146 window = new Window<double>(config.window, fftSize);
c@251 147
c@251 148 /* Allocate memory for the FFT */
c@255 149 realOut = (double*)calloc(fftSize, sizeof(double));
c@255 150 imagOut = (double*)calloc(fftSize, sizeof(double));
c@255 151
c@255 152 earMag = (double*)calloc(totalFilters, sizeof(double));
c@255 153 fftMag = (double*)calloc(fftSize/2, sizeof(double));
c@251 154
c@251 155 free(freqs);
c@251 156 free(lower);
c@251 157 free(center);
c@251 158 free(upper);
c@251 159 free(triangleHeight);
c@251 160 free(fftFreqs);
c@251 161 }
c@251 162
c@251 163 MFCC::~MFCC()
c@251 164 {
c@251 165 int i;
c@251 166
c@251 167 /* Free the structure */
c@255 168 for (i = 0; i < nceps+1; i++) {
c@251 169 free(mfccDCTMatrix[i]);
c@251 170 }
c@251 171 free(mfccDCTMatrix);
c@251 172
c@255 173 for (i = 0; i < totalFilters; i++) {
c@251 174 free(mfccFilterWeights[i]);
c@251 175 }
c@251 176 free(mfccFilterWeights);
c@251 177
c@251 178 /* Free the feature vector */
c@251 179 free(ceps);
c@251 180
c@251 181 /* The analysis window */
c@251 182 delete window;
c@255 183
c@255 184 free(earMag);
c@255 185 free(fftMag);
c@251 186
c@251 187 /* Free the FFT */
c@251 188 free(realOut);
c@251 189 free(imagOut);
c@289 190
c@289 191 delete fft;
c@251 192 }
c@251 193
c@251 194
c@251 195 /*
c@251 196 *
c@251 197 * Extract the MFCC on the input frame
c@251 198 *
c@251 199 */
c@255 200 int MFCC::process(const double *inframe, double *outceps)
c@251 201 {
c@255 202 double *inputData = (double *)malloc(fftSize * sizeof(double));
c@255 203 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
c@251 204
c@255 205 window->cut(inputData);
c@251 206
c@251 207 /* Calculate the fft on the input frame */
c@289 208 fft->process(0, inputData, realOut, imagOut);
c@251 209
c@255 210 free(inputData);
c@255 211
c@255 212 return process(realOut, imagOut, outceps);
c@255 213 }
c@255 214
c@255 215 int MFCC::process(const double *real, const double *imag, double *outceps)
c@255 216 {
c@255 217 int i, j;
c@255 218
c@255 219 for (i = 0; i < fftSize/2; ++i) {
c@255 220 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
c@255 221 }
c@255 222
c@255 223 for (i = 0; i < totalFilters; ++i) {
c@255 224 earMag[i] = 0.0;
c@251 225 }
c@251 226
c@251 227 /* Multiply by mfccFilterWeights */
c@255 228 for (i = 0; i < totalFilters; i++) {
c@255 229 double tmp = 0.0;
c@255 230 for (j = 0; j < fftSize/2; j++) {
c@255 231 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
c@251 232 }
c@255 233 if (tmp > 0) earMag[i] = log10(tmp);
c@255 234 else earMag[i] = 0.0;
c@255 235
c@255 236 if (logPower != 1.0) {
c@255 237 earMag[i] = pow(earMag[i], logPower);
c@255 238 }
c@251 239 }
c@251 240
c@251 241 /*
c@251 242 *
c@251 243 * Calculate now the cepstral coefficients
c@251 244 * with or without the DC component
c@251 245 *
c@251 246 */
c@251 247
c@255 248 if (WANT_C0 == 1) {
c@251 249
c@255 250 for (i = 0; i < nceps+1; i++) {
c@255 251 double tmp = 0.;
c@255 252 for (j = 0; j < totalFilters; j++){
c@255 253 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
c@251 254 }
c@251 255 outceps[i] = tmp;
c@251 256 }
c@251 257 }
c@251 258 else
c@251 259 {
c@255 260 for (i = 1; i < nceps+1; i++) {
c@255 261 double tmp = 0.;
c@255 262 for (j = 0; j < totalFilters; j++){
c@255 263 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
c@251 264 }
c@251 265 outceps[i-1] = tmp;
c@251 266 }
c@251 267 }
c@251 268
c@251 269 return nceps;
c@251 270 }
c@251 271