annotate dsp/mfcc/MFCC.cpp @ 209:ccd2019190bf msvc

Some MSVC fixes, including (temporarily, probably) renaming the FFT source file to avoid getting it mixed up with the Vamp SDK one in our object dir
author Chris Cannam
date Thu, 01 Feb 2018 16:34:08 +0000
parents f6ccde089491
children
rev   line source
cannam@26 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@25 2
cannam@25 3 /*
cannam@26 4 QM DSP Library
cannam@25 5
cannam@26 6 Centre for Digital Music, Queen Mary, University of London.
cannam@26 7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
Chris@84 8
Chris@84 9 This program is free software; you can redistribute it and/or
Chris@84 10 modify it under the terms of the GNU General Public License as
Chris@84 11 published by the Free Software Foundation; either version 2 of the
Chris@84 12 License, or (at your option) any later version. See the file
Chris@84 13 COPYING included with this distribution for more information.
cannam@26 14 */
cannam@25 15
cannam@26 16 #include <cmath>
cannam@26 17 #include <cstdlib>
cannam@47 18 #include <cstring>
cannam@26 19
cannam@26 20 #include "MFCC.h"
cannam@26 21 #include "dsp/transforms/FFT.h"
cannam@26 22 #include "base/Window.h"
cannam@26 23
cannam@26 24 MFCC::MFCC(MFCCConfig config)
cannam@26 25 {
cannam@26 26 int i,j;
cannam@26 27
cannam@26 28 /* Calculate at startup */
cannam@26 29 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
cannam@25 30
cannam@26 31 lowestFrequency = 66.6666666;
cannam@26 32 linearFilters = 13;
cannam@26 33 linearSpacing = 66.66666666;
cannam@26 34 logFilters = 27;
cannam@26 35 logSpacing = 1.0711703;
cannam@25 36
cannam@26 37 /* FFT and analysis window sizes */
cannam@26 38 fftSize = config.fftsize;
cannam@64 39 fft = new FFTReal(fftSize);
cannam@26 40
cannam@26 41 totalFilters = linearFilters + logFilters;
cannam@30 42 logPower = config.logpower;
cannam@25 43
cannam@26 44 samplingRate = config.FS;
cannam@26 45
cannam@26 46 /* The number of cepstral componenents */
cannam@26 47 nceps = config.nceps;
cannam@25 48
cannam@26 49 /* Set if user want C0 */
cannam@26 50 WANT_C0 = (config.want_c0 ? 1 : 0);
cannam@25 51
cannam@26 52 /* Allocate space for feature vector */
cannam@26 53 if (WANT_C0 == 1) {
cannam@26 54 ceps = (double*)calloc(nceps+1, sizeof(double));
cannam@26 55 } else {
cannam@26 56 ceps = (double*)calloc(nceps, sizeof(double));
cannam@26 57 }
cannam@26 58
cannam@26 59 /* Allocate space for local vectors */
cannam@26 60 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
cannam@30 61 for (i = 0; i < nceps+1; i++) {
cannam@26 62 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
cannam@26 63 }
cannam@26 64
cannam@26 65 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
cannam@30 66 for (i = 0; i < totalFilters; i++) {
cannam@26 67 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
cannam@26 68 }
cannam@26 69
cannam@26 70 freqs = (double*)calloc(totalFilters+2,sizeof(double));
cannam@26 71
cannam@26 72 lower = (double*)calloc(totalFilters,sizeof(double));
cannam@26 73 center = (double*)calloc(totalFilters,sizeof(double));
cannam@26 74 upper = (double*)calloc(totalFilters,sizeof(double));
cannam@26 75
cannam@26 76 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
cannam@26 77 fftFreqs = (double*)calloc(fftSize,sizeof(double));
cannam@25 78
cannam@30 79 for (i = 0; i < linearFilters; i++) {
cannam@26 80 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
cannam@26 81 }
cannam@26 82
cannam@30 83 for (i = linearFilters; i < totalFilters+2; i++) {
cannam@26 84 freqs[i] = freqs[linearFilters-1] *
cannam@26 85 pow(logSpacing, (double)(i-linearFilters+1));
cannam@26 86 }
cannam@26 87
cannam@26 88 /* Define lower, center and upper */
cannam@26 89 memcpy(lower, freqs,totalFilters*sizeof(double));
cannam@26 90 memcpy(center, &freqs[1],totalFilters*sizeof(double));
cannam@26 91 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
cannam@26 92
cannam@26 93 for (i=0;i<totalFilters;i++){
cannam@26 94 triangleHeight[i] = 2./(upper[i]-lower[i]);
cannam@26 95 }
cannam@26 96
cannam@26 97 for (i=0;i<fftSize;i++){
cannam@26 98 fftFreqs[i] = ((double) i / ((double) fftSize ) *
cannam@26 99 (double) samplingRate);
cannam@26 100 }
cannam@25 101
cannam@26 102 /* Build now the mccFilterWeight matrix */
cannam@26 103 for (i=0;i<totalFilters;i++){
cannam@25 104
cannam@26 105 for (j=0;j<fftSize;j++) {
cannam@26 106
cannam@26 107 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
cannam@26 108
cannam@26 109 mfccFilterWeights[i][j] = triangleHeight[i] *
cannam@26 110 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
cannam@26 111
cannam@26 112 }
cannam@26 113 else
cannam@26 114 {
cannam@26 115 mfccFilterWeights[i][j] = 0.0;
cannam@26 116 }
cannam@25 117
cannam@26 118 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
cannam@25 119
cannam@30 120 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
cannam@30 121 + triangleHeight[i] * (upper[i]-fftFreqs[j])
cannam@26 122 / (upper[i]-center[i]);
cannam@26 123 }
cannam@26 124 else
cannam@26 125 {
cannam@26 126 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
cannam@26 127 }
cannam@25 128 }
cannam@25 129
cannam@26 130 }
cannam@25 131
cannam@26 132 /*
cannam@26 133 * We calculate now mfccDCT matrix
cannam@26 134 * NB: +1 because of the DC component
cannam@26 135 */
cannam@29 136
cannam@29 137 const double pi = 3.14159265358979323846264338327950288;
cannam@26 138
cannam@30 139 for (i = 0; i < nceps+1; i++) {
cannam@30 140 for (j = 0; j < totalFilters; j++) {
cannam@26 141 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
cannam@29 142 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
cannam@25 143 }
cannam@25 144 }
cannam@25 145
cannam@30 146 for (j = 0; j < totalFilters; j++){
cannam@30 147 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
cannam@26 148 }
cannam@26 149
cannam@26 150 /* The analysis window */
cannam@32 151 window = new Window<double>(config.window, fftSize);
cannam@25 152
cannam@26 153 /* Allocate memory for the FFT */
cannam@30 154 realOut = (double*)calloc(fftSize, sizeof(double));
cannam@30 155 imagOut = (double*)calloc(fftSize, sizeof(double));
cannam@30 156
cannam@30 157 earMag = (double*)calloc(totalFilters, sizeof(double));
cannam@30 158 fftMag = (double*)calloc(fftSize/2, sizeof(double));
cannam@25 159
cannam@26 160 free(freqs);
cannam@26 161 free(lower);
cannam@26 162 free(center);
cannam@26 163 free(upper);
cannam@26 164 free(triangleHeight);
cannam@26 165 free(fftFreqs);
cannam@25 166 }
cannam@25 167
cannam@26 168 MFCC::~MFCC()
cannam@26 169 {
cannam@26 170 int i;
cannam@26 171
cannam@26 172 /* Free the structure */
cannam@30 173 for (i = 0; i < nceps+1; i++) {
cannam@26 174 free(mfccDCTMatrix[i]);
cannam@26 175 }
cannam@26 176 free(mfccDCTMatrix);
cannam@26 177
cannam@30 178 for (i = 0; i < totalFilters; i++) {
cannam@26 179 free(mfccFilterWeights[i]);
cannam@26 180 }
cannam@26 181 free(mfccFilterWeights);
cannam@26 182
cannam@26 183 /* Free the feature vector */
cannam@26 184 free(ceps);
cannam@26 185
cannam@26 186 /* The analysis window */
cannam@26 187 delete window;
cannam@30 188
cannam@30 189 free(earMag);
cannam@30 190 free(fftMag);
cannam@26 191
cannam@26 192 /* Free the FFT */
cannam@26 193 free(realOut);
cannam@26 194 free(imagOut);
cannam@64 195
cannam@64 196 delete fft;
cannam@26 197 }
cannam@25 198
cannam@25 199
cannam@25 200 /*
cannam@25 201 *
cannam@25 202 * Extract the MFCC on the input frame
cannam@25 203 *
cannam@25 204 */
cannam@30 205 int MFCC::process(const double *inframe, double *outceps)
cannam@26 206 {
cannam@30 207 double *inputData = (double *)malloc(fftSize * sizeof(double));
cannam@30 208 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
cannam@25 209
cannam@30 210 window->cut(inputData);
cannam@26 211
cannam@26 212 /* Calculate the fft on the input frame */
Chris@114 213 fft->forward(inputData, realOut, imagOut);
cannam@25 214
cannam@30 215 free(inputData);
cannam@30 216
cannam@30 217 return process(realOut, imagOut, outceps);
cannam@30 218 }
cannam@30 219
cannam@30 220 int MFCC::process(const double *real, const double *imag, double *outceps)
cannam@30 221 {
cannam@30 222 int i, j;
cannam@30 223
cannam@30 224 for (i = 0; i < fftSize/2; ++i) {
cannam@30 225 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
cannam@30 226 }
cannam@30 227
cannam@30 228 for (i = 0; i < totalFilters; ++i) {
cannam@30 229 earMag[i] = 0.0;
cannam@25 230 }
cannam@25 231
cannam@26 232 /* Multiply by mfccFilterWeights */
cannam@30 233 for (i = 0; i < totalFilters; i++) {
cannam@30 234 double tmp = 0.0;
cannam@30 235 for (j = 0; j < fftSize/2; j++) {
cannam@30 236 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
cannam@26 237 }
cannam@30 238 if (tmp > 0) earMag[i] = log10(tmp);
cannam@30 239 else earMag[i] = 0.0;
cannam@30 240
cannam@30 241 if (logPower != 1.0) {
cannam@30 242 earMag[i] = pow(earMag[i], logPower);
cannam@30 243 }
cannam@26 244 }
cannam@26 245
cannam@26 246 /*
cannam@26 247 *
cannam@26 248 * Calculate now the cepstral coefficients
cannam@26 249 * with or without the DC component
cannam@26 250 *
cannam@26 251 */
cannam@26 252
cannam@30 253 if (WANT_C0 == 1) {
cannam@26 254
cannam@30 255 for (i = 0; i < nceps+1; i++) {
cannam@30 256 double tmp = 0.;
cannam@30 257 for (j = 0; j < totalFilters; j++){
cannam@30 258 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
cannam@26 259 }
cannam@26 260 outceps[i] = tmp;
cannam@26 261 }
cannam@26 262 }
cannam@25 263 else
cannam@26 264 {
cannam@30 265 for (i = 1; i < nceps+1; i++) {
cannam@30 266 double tmp = 0.;
cannam@30 267 for (j = 0; j < totalFilters; j++){
cannam@30 268 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
cannam@26 269 }
cannam@26 270 outceps[i-1] = tmp;
cannam@25 271 }
cannam@26 272 }
cannam@25 273
cannam@26 274 return nceps;
cannam@25 275 }
cannam@25 276