annotate dsp/mfcc/MFCC.h @ 73:dcb555b90924

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.
author cannam
date Fri, 05 Jun 2009 15:12:39 +0000
parents 6cb2b3cd5356
children e5907ae6de17
rev   line source
cannam@26 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@25 2
cannam@26 3 /*
cannam@26 4 QM DSP Library
cannam@25 5
cannam@26 6 Centre for Digital Music, Queen Mary, University of London.
cannam@26 7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
cannam@26 8 All rights reserved.
cannam@26 9 */
cannam@25 10
cannam@26 11 #ifndef MFCC_H
cannam@26 12 #define MFCC_H
cannam@26 13
cannam@26 14 #include "base/Window.h"
cannam@26 15
cannam@64 16 class FFTReal;
cannam@64 17
cannam@26 18 struct MFCCConfig {
cannam@26 19 int FS;
cannam@26 20 int fftsize;
cannam@26 21 int nceps;
cannam@30 22 double logpower;
cannam@26 23 bool want_c0;
cannam@32 24 WindowType window;
cannam@30 25 MFCCConfig(int _FS) :
cannam@32 26 FS(_FS), fftsize(2048), nceps(19),
cannam@32 27 logpower(1.0), want_c0(true), window(HammingWindow) { }
cannam@26 28 };
cannam@26 29
cannam@26 30 class MFCC
cannam@26 31 {
cannam@26 32 public:
cannam@26 33 MFCC(MFCCConfig config);
cannam@26 34 virtual ~MFCC();
cannam@26 35
cannam@30 36 /**
cannam@30 37 * Process time-domain input data. inframe must contain
cannam@30 38 * getfftlength() samples. outceps must contain space for nceps
cannam@30 39 * values, plus one if want_c0 is specified.
cannam@30 40 */
cannam@30 41 int process(const double *inframe, double *outceps);
cannam@30 42
cannam@30 43 /**
cannam@30 44 * Process time-domain input data. real and imag must contain
cannam@30 45 * getfftlength()/2+1 elements (i.e. the conjugate half of the FFT
cannam@30 46 * is not expected). outceps must contain space for nceps values,
cannam@30 47 * plus one if want_c0 is specified.
cannam@30 48 */
cannam@30 49 int process(const double *real, const double *imag, double *outceps);
cannam@26 50
cannam@26 51 int getfftlength() const { return fftSize; }
cannam@26 52
cannam@26 53 private:
cannam@26 54 /* Filter bank parameters */
cannam@26 55 double lowestFrequency;
cannam@26 56 int linearFilters;
cannam@26 57 double linearSpacing;
cannam@26 58 int logFilters;
cannam@26 59 double logSpacing;
cannam@26 60
cannam@26 61 /* FFT length */
cannam@26 62 int fftSize;
cannam@26 63
cannam@26 64 int totalFilters;
cannam@30 65 double logPower;
cannam@26 66
cannam@26 67 /* Misc. */
cannam@26 68 int samplingRate;
cannam@26 69 int nceps;
cannam@26 70
cannam@26 71 /* MFCC vector */
cannam@26 72 double *ceps;
cannam@26 73
cannam@26 74 double **mfccDCTMatrix;
cannam@26 75 double **mfccFilterWeights;
cannam@26 76
cannam@26 77 /* The analysis window */
cannam@26 78 Window<double> *window;
cannam@26 79
cannam@26 80 /* For the FFT */
cannam@30 81 double *realOut;
cannam@30 82 double *imagOut;
cannam@30 83 double *fftMag;
cannam@30 84 double *earMag;
cannam@64 85 FFTReal *fft;
cannam@30 86
cannam@26 87 /* Set if user want C0 */
cannam@26 88 int WANT_C0;
cannam@26 89 };
cannam@26 90
cannam@25 91
cannam@25 92 #endif
cannam@25 93