qm-dsp: dsp/mfcc/MFCC.cpp annotate

annotate dsp/mfcc/MFCC.cpp @ 289:befe5aa6b450

* Refactor FFT a little bit so as to separate construction and processing rather than have a single static method -- will make it easier to use a different implementation * pull in KissFFT implementation (not hooked up yet)

author	Chris Cannam <c.cannam@qmul.ac.uk>
date	Wed, 13 May 2009 09:19:12 +0000
parents	330c2e11f8a9
children	d5014ab8b0e5

rev	line source
c@251	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
c@251	2
c@251	3 /*
c@251	4 QM DSP Library
c@251	5
c@251	6 Centre for Digital Music, Queen Mary, University of London.
c@251	7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
c@251	8 All rights reserved.
c@251	9 */
c@251	10
c@251	11 #include <cmath>
c@251	12 #include <cstdlib>
c@272	13 #include <cstring>
c@251	14
c@251	15 #include "MFCC.h"
c@251	16 #include "dsp/transforms/FFT.h"
c@251	17 #include "base/Window.h"
c@251	18
c@251	19 MFCC::MFCC(MFCCConfig config)
c@251	20 {
c@251	21 int i,j;
c@251	22
c@251	23 /* Calculate at startup */
c@251	24 double freqs, lower, center, upper, triangleHeight, fftFreqs;
c@251	25
c@251	26 lowestFrequency = 66.6666666;
c@251	27 linearFilters = 13;
c@251	28 linearSpacing = 66.66666666;
c@251	29 logFilters = 27;
c@251	30 logSpacing = 1.0711703;
c@251	31
c@251	32 /* FFT and analysis window sizes */
c@251	33 fftSize = config.fftsize;
c@289	34 fft = new FFTReal(fftSize);
c@251	35
c@251	36 totalFilters = linearFilters + logFilters;
c@255	37 logPower = config.logpower;
c@251	38
c@251	39 samplingRate = config.FS;
c@251	40
c@251	41 /* The number of cepstral componenents */
c@251	42 nceps = config.nceps;
c@251	43
c@251	44 /* Set if user want C0 */
c@251	45 WANT_C0 = (config.want_c0 ? 1 : 0);
c@251	46
c@251	47 /* Allocate space for feature vector */
c@251	48 if (WANT_C0 == 1) {
c@251	49 ceps = (double*)calloc(nceps+1, sizeof(double));
c@251	50 } else {
c@251	51 ceps = (double*)calloc(nceps, sizeof(double));
c@251	52 }
c@251	53
c@251	54 /* Allocate space for local vectors */
c@251	55 mfccDCTMatrix = (double*)calloc(nceps+1, sizeof(double));
c@255	56 for (i = 0; i < nceps+1; i++) {
c@251	57 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
c@251	58 }
c@251	59
c@251	60 mfccFilterWeights = (double*)calloc(totalFilters, sizeof(double));
c@255	61 for (i = 0; i < totalFilters; i++) {
c@251	62 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
c@251	63 }
c@251	64
c@251	65 freqs = (double*)calloc(totalFilters+2,sizeof(double));
c@251	66
c@251	67 lower = (double*)calloc(totalFilters,sizeof(double));
c@251	68 center = (double*)calloc(totalFilters,sizeof(double));
c@251	69 upper = (double*)calloc(totalFilters,sizeof(double));
c@251	70
c@251	71 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
c@251	72 fftFreqs = (double*)calloc(fftSize,sizeof(double));
c@251	73
c@255	74 for (i = 0; i < linearFilters; i++) {
c@251	75 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
c@251	76 }
c@251	77
c@255	78 for (i = linearFilters; i < totalFilters+2; i++) {
c@251	79 freqs[i] = freqs[linearFilters-1] *
c@251	80 pow(logSpacing, (double)(i-linearFilters+1));
c@251	81 }
c@251	82
c@251	83 /* Define lower, center and upper */
c@251	84 memcpy(lower, freqs,totalFilters*sizeof(double));
c@251	85 memcpy(center, &freqs[1],totalFilters*sizeof(double));
c@251	86 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
c@251	87
c@251	88 for (i=0;i<totalFilters;i++){
c@251	89 triangleHeight[i] = 2./(upper[i]-lower[i]);
c@251	90 }
c@251	91
c@251	92 for (i=0;i<fftSize;i++){
c@251	93 fftFreqs[i] = ((double) i / ((double) fftSize ) *
c@251	94 (double) samplingRate);
c@251	95 }
c@251	96
c@251	97 /* Build now the mccFilterWeight matrix */
c@251	98 for (i=0;i<totalFilters;i++){
c@251	99
c@251	100 for (j=0;j<fftSize;j++) {
c@251	101
c@251	102 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
c@251	103
c@251	104 mfccFilterWeights[i][j] = triangleHeight[i] *
c@251	105 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
c@251	106
c@251	107 }
c@251	108 else
c@251	109 {
c@251	110 mfccFilterWeights[i][j] = 0.0;
c@251	111 }
c@251	112
c@251	113 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
c@251	114
c@255	115 mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
c@255	116 + triangleHeight[i] * (upper[i]-fftFreqs[j])
c@251	117 / (upper[i]-center[i]);
c@251	118 }
c@251	119 else
c@251	120 {
c@251	121 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
c@251	122 }
c@251	123 }
c@251	124
c@251	125 }
c@251	126
c@251	127 /*
c@251	128 * We calculate now mfccDCT matrix
c@251	129 * NB: +1 because of the DC component
c@251	130 */
c@254	131
c@254	132 const double pi = 3.14159265358979323846264338327950288;
c@251	133
c@255	134 for (i = 0; i < nceps+1; i++) {
c@255	135 for (j = 0; j < totalFilters; j++) {
c@251	136 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
c@254	137 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
c@251	138 }
c@251	139 }
c@251	140
c@255	141 for (j = 0; j < totalFilters; j++){
c@255	142 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
c@251	143 }
c@251	144
c@251	145 /* The analysis window */
c@257	146 window = new Window<double>(config.window, fftSize);
c@251	147
c@251	148 /* Allocate memory for the FFT */
c@255	149 realOut = (double*)calloc(fftSize, sizeof(double));
c@255	150 imagOut = (double*)calloc(fftSize, sizeof(double));
c@255	151
c@255	152 earMag = (double*)calloc(totalFilters, sizeof(double));
c@255	153 fftMag = (double*)calloc(fftSize/2, sizeof(double));
c@251	154
c@251	155 free(freqs);
c@251	156 free(lower);
c@251	157 free(center);
c@251	158 free(upper);
c@251	159 free(triangleHeight);
c@251	160 free(fftFreqs);
c@251	161 }
c@251	162
c@251	163 MFCC::~MFCC()
c@251	164 {
c@251	165 int i;
c@251	166
c@251	167 /* Free the structure */
c@255	168 for (i = 0; i < nceps+1; i++) {
c@251	169 free(mfccDCTMatrix[i]);
c@251	170 }
c@251	171 free(mfccDCTMatrix);
c@251	172
c@255	173 for (i = 0; i < totalFilters; i++) {
c@251	174 free(mfccFilterWeights[i]);
c@251	175 }
c@251	176 free(mfccFilterWeights);
c@251	177
c@251	178 /* Free the feature vector */
c@251	179 free(ceps);
c@251	180
c@251	181 /* The analysis window */
c@251	182 delete window;
c@255	183
c@255	184 free(earMag);
c@255	185 free(fftMag);
c@251	186
c@251	187 /* Free the FFT */
c@251	188 free(realOut);
c@251	189 free(imagOut);
c@289	190
c@289	191 delete fft;
c@251	192 }
c@251	193
c@251	194
c@251	195 /*
c@251	196 *
c@251	197 * Extract the MFCC on the input frame
c@251	198 *
c@251	199 */
c@255	200 int MFCC::process(const double inframe, double outceps)
c@251	201 {
c@255	202 double inputData = (double )malloc(fftSize * sizeof(double));
c@255	203 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
c@251	204
c@255	205 window->cut(inputData);
c@251	206
c@251	207 /* Calculate the fft on the input frame */
c@289	208 fft->process(0, inputData, realOut, imagOut);
c@251	209
c@255	210 free(inputData);
c@255	211
c@255	212 return process(realOut, imagOut, outceps);
c@255	213 }
c@255	214
c@255	215 int MFCC::process(const double real, const double imag, double *outceps)
c@255	216 {
c@255	217 int i, j;
c@255	218
c@255	219 for (i = 0; i < fftSize/2; ++i) {
c@255	220 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
c@255	221 }
c@255	222
c@255	223 for (i = 0; i < totalFilters; ++i) {
c@255	224 earMag[i] = 0.0;
c@251	225 }
c@251	226
c@251	227 /* Multiply by mfccFilterWeights */
c@255	228 for (i = 0; i < totalFilters; i++) {
c@255	229 double tmp = 0.0;
c@255	230 for (j = 0; j < fftSize/2; j++) {
c@255	231 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
c@251	232 }
c@255	233 if (tmp > 0) earMag[i] = log10(tmp);
c@255	234 else earMag[i] = 0.0;
c@255	235
c@255	236 if (logPower != 1.0) {
c@255	237 earMag[i] = pow(earMag[i], logPower);
c@255	238 }
c@251	239 }
c@251	240
c@251	241 /*
c@251	242 *
c@251	243 * Calculate now the cepstral coefficients
c@251	244 * with or without the DC component
c@251	245 *
c@251	246 */
c@251	247
c@255	248 if (WANT_C0 == 1) {
c@251	249
c@255	250 for (i = 0; i < nceps+1; i++) {
c@255	251 double tmp = 0.;
c@255	252 for (j = 0; j < totalFilters; j++){
c@255	253 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
c@251	254 }
c@251	255 outceps[i] = tmp;
c@251	256 }
c@251	257 }
c@251	258 else
c@251	259 {
c@255	260 for (i = 1; i < nceps+1; i++) {
c@255	261 double tmp = 0.;
c@255	262 for (j = 0; j < totalFilters; j++){
c@255	263 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
c@251	264 }
c@251	265 outceps[i-1] = tmp;
c@251	266 }
c@251	267 }
c@251	268
c@251	269 return nceps;
c@251	270 }
c@251	271

Mercurial > hg > qm-dsp

annotate dsp/mfcc/MFCC.cpp @ 289:befe5aa6b450