annotate dsp/mfcc/MFCC.cpp @ 321:f1e6be2de9a5

A threshold (delta) is added in the peak picking parameters structure (PPickParams). It is used as an offset when computing the smoothed detection function. A constructor for the structure PPickParams is also added to set the parameters to 0 when a structure instance is created. Hence programmes using the peak picking parameter structure and which do not set the delta parameter (e.g. QM Vamp note onset detector) won't be affected by the modifications. Functions modified: - dsp/onsets/PeakPicking.cpp - dsp/onsets/PeakPicking.h - dsp/signalconditioning/DFProcess.cpp - dsp/signalconditioning/DFProcess.h
author mathieub <mathieu.barthet@eecs.qmul.ac.uk>
date Mon, 20 Jun 2011 19:01:48 +0100
parents d5014ab8b0e5
children 9c8ee77db9de
rev   line source
c@251 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@251 2
c@251 3 /*
c@251 4 QM DSP Library
c@251 5
c@251 6 Centre for Digital Music, Queen Mary, University of London.
c@251 7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
c@309 8
c@309 9 This program is free software; you can redistribute it and/or
c@309 10 modify it under the terms of the GNU General Public License as
c@309 11 published by the Free Software Foundation; either version 2 of the
c@309 12 License, or (at your option) any later version. See the file
c@309 13 COPYING included with this distribution for more information.
c@251 14 */
c@251 15
c@251 16 #include <cmath>
c@251 17 #include <cstdlib>
c@272 18 #include <cstring>
c@251 19
c@251 20 #include "MFCC.h"
c@251 21 #include "dsp/transforms/FFT.h"
c@251 22 #include "base/Window.h"
c@251 23
c@251 24 MFCC::MFCC(MFCCConfig config)
c@251 25 {
c@251 26 int i,j;
c@251 27
c@251 28 /* Calculate at startup */
c@251 29 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
c@251 30
c@251 31 lowestFrequency = 66.6666666;
c@251 32 linearFilters = 13;
c@251 33 linearSpacing = 66.66666666;
c@251 34 logFilters = 27;
c@251 35 logSpacing = 1.0711703;
c@251 36
c@251 37 /* FFT and analysis window sizes */
c@251 38 fftSize = config.fftsize;
c@289 39 fft = new FFTReal(fftSize);
c@251 40
c@251 41 totalFilters = linearFilters + logFilters;
c@255 42 logPower = config.logpower;
c@251 43
c@251 44 samplingRate = config.FS;
c@251 45
c@251 46 /* The number of cepstral componenents */
c@251 47 nceps = config.nceps;
c@251 48
c@251 49 /* Set if user want C0 */
c@251 50 WANT_C0 = (config.want_c0 ? 1 : 0);
c@251 51
c@251 52 /* Allocate space for feature vector */
c@251 53 if (WANT_C0 == 1) {
c@251 54 ceps = (double*)calloc(nceps+1, sizeof(double));
c@251 55 } else {
c@251 56 ceps = (double*)calloc(nceps, sizeof(double));
c@251 57 }
c@251 58
c@251 59 /* Allocate space for local vectors */
c@251 60 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
c@255 61 for (i = 0; i < nceps+1; i++) {
c@251 62 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
c@251 63 }
c@251 64
c@251 65 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
c@255 66 for (i = 0; i < totalFilters; i++) {
c@251 67 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
c@251 68 }
c@251 69
c@251 70 freqs = (double*)calloc(totalFilters+2,sizeof(double));
c@251 71
c@251 72 lower = (double*)calloc(totalFilters,sizeof(double));
c@251 73 center = (double*)calloc(totalFilters,sizeof(double));
c@251 74 upper = (double*)calloc(totalFilters,sizeof(double));
c@251 75
c@251 76 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
c@251 77 fftFreqs = (double*)calloc(fftSize,sizeof(double));
c@251 78
c@255 79 for (i = 0; i < linearFilters; i++) {
c@251 80 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
c@251 81 }
c@251 82
c@255 83 for (i = linearFilters; i < totalFilters+2; i++) {
c@251 84 freqs[i] = freqs[linearFilters-1] *
c@251 85 pow(logSpacing, (double)(i-linearFilters+1));
c@251 86 }
c@251 87
c@251 88 /* Define lower, center and upper */
c@251 89 memcpy(lower, freqs,totalFilters*sizeof(double));
c@251 90 memcpy(center, &freqs[1],totalFilters*sizeof(double));
c@251 91 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
c@251 92
c@251 93 for (i=0;i<totalFilters;i++){
c@251 94 triangleHeight[i] = 2./(upper[i]-lower[i]);
c@251 95 }
c@251 96
c@251 97 for (i=0;i<fftSize;i++){
c@251 98 fftFreqs[i] = ((double) i / ((double) fftSize ) *
c@251 99 (double) samplingRate);
c@251 100 }
c@251 101
c@251 102 /* Build now the mccFilterWeight matrix */
c@251 103 for (i=0;i<totalFilters;i++){
c@251 104
c@251 105 for (j=0;j<fftSize;j++) {
c@251 106
c@251 107 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
c@251 108
c@251 109 mfccFilterWeights[i][j] = triangleHeight[i] *
c@251 110 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
c@251 111
c@251 112 }
c@251 113 else
c@251 114 {
c@251 115 mfccFilterWeights[i][j] = 0.0;
c@251 116 }
c@251 117
c@251 118 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
c@251 119
c@255 120 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
c@255 121 + triangleHeight[i] * (upper[i]-fftFreqs[j])
c@251 122 / (upper[i]-center[i]);
c@251 123 }
c@251 124 else
c@251 125 {
c@251 126 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
c@251 127 }
c@251 128 }
c@251 129
c@251 130 }
c@251 131
c@251 132 /*
c@251 133 * We calculate now mfccDCT matrix
c@251 134 * NB: +1 because of the DC component
c@251 135 */
c@254 136
c@254 137 const double pi = 3.14159265358979323846264338327950288;
c@251 138
c@255 139 for (i = 0; i < nceps+1; i++) {
c@255 140 for (j = 0; j < totalFilters; j++) {
c@251 141 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
c@254 142 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
c@251 143 }
c@251 144 }
c@251 145
c@255 146 for (j = 0; j < totalFilters; j++){
c@255 147 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
c@251 148 }
c@251 149
c@251 150 /* The analysis window */
c@257 151 window = new Window<double>(config.window, fftSize);
c@251 152
c@251 153 /* Allocate memory for the FFT */
c@255 154 realOut = (double*)calloc(fftSize, sizeof(double));
c@255 155 imagOut = (double*)calloc(fftSize, sizeof(double));
c@255 156
c@255 157 earMag = (double*)calloc(totalFilters, sizeof(double));
c@255 158 fftMag = (double*)calloc(fftSize/2, sizeof(double));
c@251 159
c@251 160 free(freqs);
c@251 161 free(lower);
c@251 162 free(center);
c@251 163 free(upper);
c@251 164 free(triangleHeight);
c@251 165 free(fftFreqs);
c@251 166 }
c@251 167
c@251 168 MFCC::~MFCC()
c@251 169 {
c@251 170 int i;
c@251 171
c@251 172 /* Free the structure */
c@255 173 for (i = 0; i < nceps+1; i++) {
c@251 174 free(mfccDCTMatrix[i]);
c@251 175 }
c@251 176 free(mfccDCTMatrix);
c@251 177
c@255 178 for (i = 0; i < totalFilters; i++) {
c@251 179 free(mfccFilterWeights[i]);
c@251 180 }
c@251 181 free(mfccFilterWeights);
c@251 182
c@251 183 /* Free the feature vector */
c@251 184 free(ceps);
c@251 185
c@251 186 /* The analysis window */
c@251 187 delete window;
c@255 188
c@255 189 free(earMag);
c@255 190 free(fftMag);
c@251 191
c@251 192 /* Free the FFT */
c@251 193 free(realOut);
c@251 194 free(imagOut);
c@289 195
c@289 196 delete fft;
c@251 197 }
c@251 198
c@251 199
c@251 200 /*
c@251 201 *
c@251 202 * Extract the MFCC on the input frame
c@251 203 *
c@251 204 */
c@255 205 int MFCC::process(const double *inframe, double *outceps)
c@251 206 {
c@255 207 double *inputData = (double *)malloc(fftSize * sizeof(double));
c@255 208 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
c@251 209
c@255 210 window->cut(inputData);
c@251 211
c@251 212 /* Calculate the fft on the input frame */
c@289 213 fft->process(0, inputData, realOut, imagOut);
c@251 214
c@255 215 free(inputData);
c@255 216
c@255 217 return process(realOut, imagOut, outceps);
c@255 218 }
c@255 219
c@255 220 int MFCC::process(const double *real, const double *imag, double *outceps)
c@255 221 {
c@255 222 int i, j;
c@255 223
c@255 224 for (i = 0; i < fftSize/2; ++i) {
c@255 225 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
c@255 226 }
c@255 227
c@255 228 for (i = 0; i < totalFilters; ++i) {
c@255 229 earMag[i] = 0.0;
c@251 230 }
c@251 231
c@251 232 /* Multiply by mfccFilterWeights */
c@255 233 for (i = 0; i < totalFilters; i++) {
c@255 234 double tmp = 0.0;
c@255 235 for (j = 0; j < fftSize/2; j++) {
c@255 236 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
c@251 237 }
c@255 238 if (tmp > 0) earMag[i] = log10(tmp);
c@255 239 else earMag[i] = 0.0;
c@255 240
c@255 241 if (logPower != 1.0) {
c@255 242 earMag[i] = pow(earMag[i], logPower);
c@255 243 }
c@251 244 }
c@251 245
c@251 246 /*
c@251 247 *
c@251 248 * Calculate now the cepstral coefficients
c@251 249 * with or without the DC component
c@251 250 *
c@251 251 */
c@251 252
c@255 253 if (WANT_C0 == 1) {
c@251 254
c@255 255 for (i = 0; i < nceps+1; i++) {
c@255 256 double tmp = 0.;
c@255 257 for (j = 0; j < totalFilters; j++){
c@255 258 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
c@251 259 }
c@251 260 outceps[i] = tmp;
c@251 261 }
c@251 262 }
c@251 263 else
c@251 264 {
c@255 265 for (i = 1; i < nceps+1; i++) {
c@255 266 double tmp = 0.;
c@255 267 for (j = 0; j < totalFilters; j++){
c@255 268 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
c@251 269 }
c@251 270 outceps[i-1] = tmp;
c@251 271 }
c@251 272 }
c@251 273
c@251 274 return nceps;
c@251 275 }
c@251 276