annotate dsp/mfcc/MFCC.cpp @ 73:dcb555b90924

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.
author cannam
date Fri, 05 Jun 2009 15:12:39 +0000
parents 6cb2b3cd5356
children e5907ae6de17
rev   line source
cannam@26 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@25 2
cannam@25 3 /*
cannam@26 4 QM DSP Library
cannam@25 5
cannam@26 6 Centre for Digital Music, Queen Mary, University of London.
cannam@26 7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
cannam@26 8 All rights reserved.
cannam@26 9 */
cannam@25 10
cannam@26 11 #include <cmath>
cannam@26 12 #include <cstdlib>
cannam@47 13 #include <cstring>
cannam@26 14
cannam@26 15 #include "MFCC.h"
cannam@26 16 #include "dsp/transforms/FFT.h"
cannam@26 17 #include "base/Window.h"
cannam@26 18
cannam@26 19 MFCC::MFCC(MFCCConfig config)
cannam@26 20 {
cannam@26 21 int i,j;
cannam@26 22
cannam@26 23 /* Calculate at startup */
cannam@26 24 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
cannam@25 25
cannam@26 26 lowestFrequency = 66.6666666;
cannam@26 27 linearFilters = 13;
cannam@26 28 linearSpacing = 66.66666666;
cannam@26 29 logFilters = 27;
cannam@26 30 logSpacing = 1.0711703;
cannam@25 31
cannam@26 32 /* FFT and analysis window sizes */
cannam@26 33 fftSize = config.fftsize;
cannam@64 34 fft = new FFTReal(fftSize);
cannam@26 35
cannam@26 36 totalFilters = linearFilters + logFilters;
cannam@30 37 logPower = config.logpower;
cannam@25 38
cannam@26 39 samplingRate = config.FS;
cannam@26 40
cannam@26 41 /* The number of cepstral componenents */
cannam@26 42 nceps = config.nceps;
cannam@25 43
cannam@26 44 /* Set if user want C0 */
cannam@26 45 WANT_C0 = (config.want_c0 ? 1 : 0);
cannam@25 46
cannam@26 47 /* Allocate space for feature vector */
cannam@26 48 if (WANT_C0 == 1) {
cannam@26 49 ceps = (double*)calloc(nceps+1, sizeof(double));
cannam@26 50 } else {
cannam@26 51 ceps = (double*)calloc(nceps, sizeof(double));
cannam@26 52 }
cannam@26 53
cannam@26 54 /* Allocate space for local vectors */
cannam@26 55 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
cannam@30 56 for (i = 0; i < nceps+1; i++) {
cannam@26 57 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
cannam@26 58 }
cannam@26 59
cannam@26 60 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
cannam@30 61 for (i = 0; i < totalFilters; i++) {
cannam@26 62 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
cannam@26 63 }
cannam@26 64
cannam@26 65 freqs = (double*)calloc(totalFilters+2,sizeof(double));
cannam@26 66
cannam@26 67 lower = (double*)calloc(totalFilters,sizeof(double));
cannam@26 68 center = (double*)calloc(totalFilters,sizeof(double));
cannam@26 69 upper = (double*)calloc(totalFilters,sizeof(double));
cannam@26 70
cannam@26 71 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
cannam@26 72 fftFreqs = (double*)calloc(fftSize,sizeof(double));
cannam@25 73
cannam@30 74 for (i = 0; i < linearFilters; i++) {
cannam@26 75 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
cannam@26 76 }
cannam@26 77
cannam@30 78 for (i = linearFilters; i < totalFilters+2; i++) {
cannam@26 79 freqs[i] = freqs[linearFilters-1] *
cannam@26 80 pow(logSpacing, (double)(i-linearFilters+1));
cannam@26 81 }
cannam@26 82
cannam@26 83 /* Define lower, center and upper */
cannam@26 84 memcpy(lower, freqs,totalFilters*sizeof(double));
cannam@26 85 memcpy(center, &freqs[1],totalFilters*sizeof(double));
cannam@26 86 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
cannam@26 87
cannam@26 88 for (i=0;i<totalFilters;i++){
cannam@26 89 triangleHeight[i] = 2./(upper[i]-lower[i]);
cannam@26 90 }
cannam@26 91
cannam@26 92 for (i=0;i<fftSize;i++){
cannam@26 93 fftFreqs[i] = ((double) i / ((double) fftSize ) *
cannam@26 94 (double) samplingRate);
cannam@26 95 }
cannam@25 96
cannam@26 97 /* Build now the mccFilterWeight matrix */
cannam@26 98 for (i=0;i<totalFilters;i++){
cannam@25 99
cannam@26 100 for (j=0;j<fftSize;j++) {
cannam@26 101
cannam@26 102 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
cannam@26 103
cannam@26 104 mfccFilterWeights[i][j] = triangleHeight[i] *
cannam@26 105 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
cannam@26 106
cannam@26 107 }
cannam@26 108 else
cannam@26 109 {
cannam@26 110 mfccFilterWeights[i][j] = 0.0;
cannam@26 111 }
cannam@25 112
cannam@26 113 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
cannam@25 114
cannam@30 115 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
cannam@30 116 + triangleHeight[i] * (upper[i]-fftFreqs[j])
cannam@26 117 / (upper[i]-center[i]);
cannam@26 118 }
cannam@26 119 else
cannam@26 120 {
cannam@26 121 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
cannam@26 122 }
cannam@25 123 }
cannam@25 124
cannam@26 125 }
cannam@25 126
cannam@26 127 /*
cannam@26 128 * We calculate now mfccDCT matrix
cannam@26 129 * NB: +1 because of the DC component
cannam@26 130 */
cannam@29 131
cannam@29 132 const double pi = 3.14159265358979323846264338327950288;
cannam@26 133
cannam@30 134 for (i = 0; i < nceps+1; i++) {
cannam@30 135 for (j = 0; j < totalFilters; j++) {
cannam@26 136 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
cannam@29 137 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
cannam@25 138 }
cannam@25 139 }
cannam@25 140
cannam@30 141 for (j = 0; j < totalFilters; j++){
cannam@30 142 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
cannam@26 143 }
cannam@26 144
cannam@26 145 /* The analysis window */
cannam@32 146 window = new Window<double>(config.window, fftSize);
cannam@25 147
cannam@26 148 /* Allocate memory for the FFT */
cannam@30 149 realOut = (double*)calloc(fftSize, sizeof(double));
cannam@30 150 imagOut = (double*)calloc(fftSize, sizeof(double));
cannam@30 151
cannam@30 152 earMag = (double*)calloc(totalFilters, sizeof(double));
cannam@30 153 fftMag = (double*)calloc(fftSize/2, sizeof(double));
cannam@25 154
cannam@26 155 free(freqs);
cannam@26 156 free(lower);
cannam@26 157 free(center);
cannam@26 158 free(upper);
cannam@26 159 free(triangleHeight);
cannam@26 160 free(fftFreqs);
cannam@25 161 }
cannam@25 162
cannam@26 163 MFCC::~MFCC()
cannam@26 164 {
cannam@26 165 int i;
cannam@26 166
cannam@26 167 /* Free the structure */
cannam@30 168 for (i = 0; i < nceps+1; i++) {
cannam@26 169 free(mfccDCTMatrix[i]);
cannam@26 170 }
cannam@26 171 free(mfccDCTMatrix);
cannam@26 172
cannam@30 173 for (i = 0; i < totalFilters; i++) {
cannam@26 174 free(mfccFilterWeights[i]);
cannam@26 175 }
cannam@26 176 free(mfccFilterWeights);
cannam@26 177
cannam@26 178 /* Free the feature vector */
cannam@26 179 free(ceps);
cannam@26 180
cannam@26 181 /* The analysis window */
cannam@26 182 delete window;
cannam@30 183
cannam@30 184 free(earMag);
cannam@30 185 free(fftMag);
cannam@26 186
cannam@26 187 /* Free the FFT */
cannam@26 188 free(realOut);
cannam@26 189 free(imagOut);
cannam@64 190
cannam@64 191 delete fft;
cannam@26 192 }
cannam@25 193
cannam@25 194
cannam@25 195 /*
cannam@25 196 *
cannam@25 197 * Extract the MFCC on the input frame
cannam@25 198 *
cannam@25 199 */
cannam@30 200 int MFCC::process(const double *inframe, double *outceps)
cannam@26 201 {
cannam@30 202 double *inputData = (double *)malloc(fftSize * sizeof(double));
cannam@30 203 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
cannam@25 204
cannam@30 205 window->cut(inputData);
cannam@26 206
cannam@26 207 /* Calculate the fft on the input frame */
cannam@64 208 fft->process(0, inputData, realOut, imagOut);
cannam@25 209
cannam@30 210 free(inputData);
cannam@30 211
cannam@30 212 return process(realOut, imagOut, outceps);
cannam@30 213 }
cannam@30 214
cannam@30 215 int MFCC::process(const double *real, const double *imag, double *outceps)
cannam@30 216 {
cannam@30 217 int i, j;
cannam@30 218
cannam@30 219 for (i = 0; i < fftSize/2; ++i) {
cannam@30 220 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
cannam@30 221 }
cannam@30 222
cannam@30 223 for (i = 0; i < totalFilters; ++i) {
cannam@30 224 earMag[i] = 0.0;
cannam@25 225 }
cannam@25 226
cannam@26 227 /* Multiply by mfccFilterWeights */
cannam@30 228 for (i = 0; i < totalFilters; i++) {
cannam@30 229 double tmp = 0.0;
cannam@30 230 for (j = 0; j < fftSize/2; j++) {
cannam@30 231 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
cannam@26 232 }
cannam@30 233 if (tmp > 0) earMag[i] = log10(tmp);
cannam@30 234 else earMag[i] = 0.0;
cannam@30 235
cannam@30 236 if (logPower != 1.0) {
cannam@30 237 earMag[i] = pow(earMag[i], logPower);
cannam@30 238 }
cannam@26 239 }
cannam@26 240
cannam@26 241 /*
cannam@26 242 *
cannam@26 243 * Calculate now the cepstral coefficients
cannam@26 244 * with or without the DC component
cannam@26 245 *
cannam@26 246 */
cannam@26 247
cannam@30 248 if (WANT_C0 == 1) {
cannam@26 249
cannam@30 250 for (i = 0; i < nceps+1; i++) {
cannam@30 251 double tmp = 0.;
cannam@30 252 for (j = 0; j < totalFilters; j++){
cannam@30 253 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
cannam@26 254 }
cannam@26 255 outceps[i] = tmp;
cannam@26 256 }
cannam@26 257 }
cannam@25 258 else
cannam@26 259 {
cannam@30 260 for (i = 1; i < nceps+1; i++) {
cannam@30 261 double tmp = 0.;
cannam@30 262 for (j = 0; j < totalFilters; j++){
cannam@30 263 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
cannam@26 264 }
cannam@26 265 outceps[i-1] = tmp;
cannam@25 266 }
cannam@26 267 }
cannam@25 268
cannam@26 269 return nceps;
cannam@25 270 }
cannam@25 271