Mercurial > hg > qm-dsp
changeset 26:d096a79fa772
* Add timbral (MFCC) feature option to segmenter
author | cannam |
---|---|
date | Thu, 10 Jan 2008 16:41:33 +0000 |
parents | 54a962727271 |
children | b678e72323df |
files | base/Window.h dsp/mfcc/MFCC.c dsp/mfcc/MFCC.cpp dsp/mfcc/MFCC.h dsp/segmentation/ClusterMeltSegmenter.cpp dsp/segmentation/ClusterMeltSegmenter.h dsp/segmentation/segment.h qm-dsp.pro |
diffstat | 8 files changed, 441 insertions(+), 375 deletions(-) [+] |
line wrap: on
line diff
--- a/base/Window.h Thu Jan 10 15:16:08 2008 +0000 +++ b/base/Window.h Thu Jan 10 16:41:33 2008 +0000 @@ -40,7 +40,7 @@ encache(); return *this; } - virtual ~Window() { delete m_cache; } + virtual ~Window() { delete[] m_cache; } void cut(T *src) const { cut(src, src); } void cut(T *src, T *dst) const {
--- a/dsp/mfcc/MFCC.c Thu Jan 10 15:16:08 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,292 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <math.h> - -#include "mfcc.h" -#include "SBFFT.h" -#include "windows.h" - -/* - * - * Initialise the MFCC structure and return a pointer to a - * feature vector - * - */ - -extern mfcc_t* init_mfcc(int fftSize, int nceps , int samplingRate, int WANT_C0){ - - int i,j; - /* Calculate at startup */ - double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs; - - /* Allocate space for the structure */ - mfcc_t* mfcc_p = (mfcc_t*)malloc(sizeof(mfcc_t)); - - mfcc_p->lowestFrequency = 66.6666666; - mfcc_p->linearFilters = 13; - mfcc_p->linearSpacing = 66.66666666; - mfcc_p->logFilters = 27; - mfcc_p->logSpacing = 1.0711703; - - /* FFT and analysis window sizes */ - mfcc_p->fftSize = fftSize; - - mfcc_p->totalFilters = mfcc_p->linearFilters + mfcc_p->logFilters; - - mfcc_p->samplingRate = samplingRate; - - /* The number of cepstral componenents */ - mfcc_p->nceps = nceps; - - /* Set if user want C0 */ - mfcc_p->WANT_C0 = WANT_C0; - - /* Allocate space for feature vector */ - if (mfcc_p->WANT_C0==1) { - mfcc_p->ceps = (double*)calloc(nceps+1,sizeof(double)); - } else { - mfcc_p->ceps = (double*)calloc(nceps,sizeof(double)); - } - - /* Allocate space for local vectors */ - mfcc_p->mfccDCTMatrix = (double**)calloc(mfcc_p->nceps+1, sizeof(double*)); - for (i=0;i<mfcc_p->nceps+1; i++) { - mfcc_p->mfccDCTMatrix[i]= (double*)calloc(mfcc_p->totalFilters, sizeof(double)); - } - - mfcc_p->mfccFilterWeights = (double**)calloc(mfcc_p->totalFilters, sizeof(double*)); - for (i=0;i<mfcc_p->totalFilters; i++) { - mfcc_p->mfccFilterWeights[i] = (double*)calloc(mfcc_p->fftSize, sizeof(double)); - } - - freqs = (double*)calloc(mfcc_p->totalFilters+2,sizeof(double)); - - lower = (double*)calloc(mfcc_p->totalFilters,sizeof(double)); - center = (double*)calloc(mfcc_p->totalFilters,sizeof(double)); - upper = (double*)calloc(mfcc_p->totalFilters,sizeof(double)); - - triangleHeight = (double*)calloc(mfcc_p->totalFilters,sizeof(double)); - fftFreqs = (double*)calloc(mfcc_p->fftSize,sizeof(double)); - - for (i=0;i<mfcc_p->linearFilters;i++) { - freqs[i] = mfcc_p->lowestFrequency + ((double)i) * mfcc_p->linearSpacing; - } - - for (i=mfcc_p->linearFilters; i<mfcc_p->totalFilters+2; i++) { - freqs[i] = freqs[mfcc_p->linearFilters-1] * - pow(mfcc_p->logSpacing, (double)(i-mfcc_p->linearFilters+1)); - } - - /* Define lower, center and upper */ - memcpy(lower, freqs,mfcc_p->totalFilters*sizeof(double)); - memcpy(center, &freqs[1],mfcc_p->totalFilters*sizeof(double)); - memcpy(upper, &freqs[2],mfcc_p->totalFilters*sizeof(double)); - - for (i=0;i<mfcc_p->totalFilters;i++){ - triangleHeight[i] = 2./(upper[i]-lower[i]); - } - - for (i=0;i<mfcc_p->fftSize;i++){ - fftFreqs[i] = ((double) i / ((double) mfcc_p->fftSize ) * - (double) mfcc_p->samplingRate); - } - - /* Build now the mccFilterWeight matrix */ - for (i=0;i<mfcc_p->totalFilters;i++){ - - for (j=0;j<mfcc_p->fftSize;j++) { - - if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) { - - mfcc_p->mfccFilterWeights[i][j] = triangleHeight[i] * - (fftFreqs[j]-lower[i]) / (center[i]-lower[i]); - - } - else - { - - mfcc_p->mfccFilterWeights[i][j] = 0.0; - - } - - if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) { - - mfcc_p->mfccFilterWeights[i][j] = mfcc_p->mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j]) - / (upper[i]-center[i]); - } - else - { - - mfcc_p->mfccFilterWeights[i][j] = mfcc_p->mfccFilterWeights[i][j] + 0.0; - - } - } - - } - -#ifndef PI -#define PI 3.14159265358979323846264338327950288 -#endif - - /* - * - * We calculate now mfccDCT matrix - * NB: +1 because of the DC component - * - */ - - for (i=0; i<nceps+1; i++) { - for (j=0; j<mfcc_p->totalFilters; j++) { - mfcc_p->mfccDCTMatrix[i][j] = (1./sqrt((double) mfcc_p->totalFilters / 2.)) - * cos((double) i * ((double) j + 0.5) / (double) mfcc_p->totalFilters * PI); - } - } - - for (j=0;j<mfcc_p->totalFilters;j++){ - mfcc_p->mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfcc_p->mfccDCTMatrix[0][j]; - } - - /* The analysis window */ - mfcc_p->window = hamming(mfcc_p->fftSize); - - /* Allocate memory for the FFT */ - mfcc_p->imagIn = (double*)calloc(mfcc_p->fftSize,sizeof(double)); - mfcc_p->realOut = (double*)calloc(mfcc_p->fftSize,sizeof(double)); - mfcc_p->imagOut = (double*)calloc(mfcc_p->fftSize,sizeof(double)); - - free(freqs); - free(lower); - free(center); - free(upper); - free(triangleHeight); - free(fftFreqs); - - return mfcc_p; - -} - -/* - * - * Free the memory that has been allocated - * - */ - -extern void close_mfcc(mfcc_t* mfcc_p) { - - int i; - - /* Free the structure */ - for (i=0;i<mfcc_p->nceps+1;i++) { - free(mfcc_p->mfccDCTMatrix[i]); - } - free(mfcc_p->mfccDCTMatrix); - - for (i=0;i<mfcc_p->totalFilters; i++) { - free(mfcc_p->mfccFilterWeights[i]); - } - free(mfcc_p->mfccFilterWeights); - - /* Free the feature vector */ - free(mfcc_p->ceps); - - /* The analysis window */ - free(mfcc_p->window); - - /* Free the FFT */ - free(mfcc_p->imagIn); - free(mfcc_p->realOut); - free(mfcc_p->imagOut); - - /* Free the structure itself */ - free(mfcc_p); - mfcc_p = NULL; - -} - -/* - * - * Extract the MFCC on the input frame - * - */ - - -// looks like we have to have length = mfcc_p->fftSize ?????? - -extern int do_mfcc(mfcc_t* mfcc_p, double* frame, int length){ - - int i,j; - - double *fftMag; - double *earMag; - - double *inputData; - - double tmp; - - earMag = (double*)calloc(mfcc_p->totalFilters, sizeof(double)); - inputData = (double*)calloc(mfcc_p->fftSize, sizeof(double)); - - /* Zero-pad if needed */ - memcpy(inputData, frame, length*sizeof(double)); - - /* Calculate the fft on the input frame */ - fft_process(mfcc_p->fftSize, 0, inputData, mfcc_p->imagIn, mfcc_p->realOut, mfcc_p->imagOut); - - /* Get the magnitude */ - fftMag = abs_fft(mfcc_p->realOut, mfcc_p->imagOut, mfcc_p->fftSize); - - /* Multiply by mfccFilterWeights */ - for (i=0;i<mfcc_p->totalFilters;i++) { - tmp = 0.; - for(j=0;j<mfcc_p->fftSize/2; j++) { - tmp = tmp + (mfcc_p->mfccFilterWeights[i][j]*fftMag[j]); - } - if (tmp>0) - earMag[i] = log10(tmp); - else - earMag[i] = 0.0; - } - - /* - * - * Calculate now the ceptral coefficients - * with or without the DC component - * - */ - - if (mfcc_p->WANT_C0==1) { - - for (i=0;i<mfcc_p->nceps+1;i++) { - tmp = 0.; - for (j=0;j<mfcc_p->totalFilters;j++){ - tmp = tmp + mfcc_p->mfccDCTMatrix[i][j]*earMag[j]; - } - /* Send to workspace */ - mfcc_p->ceps[i] = tmp; - } - - } - else - { - for (i=1;i<mfcc_p->nceps+1;i++) { - tmp = 0.; - for (j=0;j<mfcc_p->totalFilters;j++){ - tmp = tmp + mfcc_p->mfccDCTMatrix[i][j]*earMag[j]; - } - /* Send to workspace */ - mfcc_p->ceps[i-1] = tmp; - } - } - - free(fftMag); - free(earMag); - free(inputData); - - return mfcc_p->nceps; - -} - - - -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dsp/mfcc/MFCC.cpp Thu Jan 10 16:41:33 2008 +0000 @@ -0,0 +1,264 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM DSP Library + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL. + All rights reserved. +*/ + +#include <cmath> +#include <cstdlib> + +#include "MFCC.h" +#include "dsp/transforms/FFT.h" +#include "base/Window.h" + +MFCC::MFCC(MFCCConfig config) +{ + int i,j; + + /* Calculate at startup */ + double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs; + + lowestFrequency = 66.6666666; + linearFilters = 13; + linearSpacing = 66.66666666; + logFilters = 27; + logSpacing = 1.0711703; + + /* FFT and analysis window sizes */ + fftSize = config.fftsize; + + totalFilters = linearFilters + logFilters; + + samplingRate = config.FS; + + /* The number of cepstral componenents */ + nceps = config.nceps; + + /* Set if user want C0 */ + WANT_C0 = (config.want_c0 ? 1 : 0); + + /* Allocate space for feature vector */ + if (WANT_C0 == 1) { + ceps = (double*)calloc(nceps+1, sizeof(double)); + } else { + ceps = (double*)calloc(nceps, sizeof(double)); + } + + /* Allocate space for local vectors */ + mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*)); + for (i=0;i<nceps+1; i++) { + mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double)); + } + + mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*)); + for (i=0;i<totalFilters; i++) { + mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double)); + } + + freqs = (double*)calloc(totalFilters+2,sizeof(double)); + + lower = (double*)calloc(totalFilters,sizeof(double)); + center = (double*)calloc(totalFilters,sizeof(double)); + upper = (double*)calloc(totalFilters,sizeof(double)); + + triangleHeight = (double*)calloc(totalFilters,sizeof(double)); + fftFreqs = (double*)calloc(fftSize,sizeof(double)); + + for (i=0;i<linearFilters;i++) { + freqs[i] = lowestFrequency + ((double)i) * linearSpacing; + } + + for (i=linearFilters; i<totalFilters+2; i++) { + freqs[i] = freqs[linearFilters-1] * + pow(logSpacing, (double)(i-linearFilters+1)); + } + + /* Define lower, center and upper */ + memcpy(lower, freqs,totalFilters*sizeof(double)); + memcpy(center, &freqs[1],totalFilters*sizeof(double)); + memcpy(upper, &freqs[2],totalFilters*sizeof(double)); + + for (i=0;i<totalFilters;i++){ + triangleHeight[i] = 2./(upper[i]-lower[i]); + } + + for (i=0;i<fftSize;i++){ + fftFreqs[i] = ((double) i / ((double) fftSize ) * + (double) samplingRate); + } + + /* Build now the mccFilterWeight matrix */ + for (i=0;i<totalFilters;i++){ + + for (j=0;j<fftSize;j++) { + + if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) { + + mfccFilterWeights[i][j] = triangleHeight[i] * + (fftFreqs[j]-lower[i]) / (center[i]-lower[i]); + + } + else + { + mfccFilterWeights[i][j] = 0.0; + } + + if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) { + + mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + triangleHeight[i] * (upper[i]-fftFreqs[j]) + / (upper[i]-center[i]); + } + else + { + mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0; + } + } + + } + + /* + * We calculate now mfccDCT matrix + * NB: +1 because of the DC component + */ + + for (i=0; i<nceps+1; i++) { + for (j=0; j<totalFilters; j++) { + mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.)) + * cos((double) i * ((double) j + 0.5) / (double) totalFilters * M_PI); + } + } + + for (j=0;j<totalFilters;j++){ + mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j]; + } + + /* The analysis window */ + window = new Window<double>(HammingWindow, fftSize); + + /* Allocate memory for the FFT */ + imagIn = (double*)calloc(fftSize,sizeof(double)); + realOut = (double*)calloc(fftSize,sizeof(double)); + imagOut = (double*)calloc(fftSize,sizeof(double)); + + free(freqs); + free(lower); + free(center); + free(upper); + free(triangleHeight); + free(fftFreqs); +} + +MFCC::~MFCC() +{ + int i; + + /* Free the structure */ + for (i=0;i<nceps+1;i++) { + free(mfccDCTMatrix[i]); + } + free(mfccDCTMatrix); + + for (i=0;i<totalFilters; i++) { + free(mfccFilterWeights[i]); + } + free(mfccFilterWeights); + + /* Free the feature vector */ + free(ceps); + + /* The analysis window */ + delete window; + + /* Free the FFT */ + free(imagIn); + free(realOut); + free(imagOut); +} + + +/* + * + * Extract the MFCC on the input frame + * + * looks like we have to have length = fftSize ?????? + * + */ +int MFCC::process(int length, double *inframe, double *outceps) +{ + int i,j; + + double *fftMag; + double *earMag; + + double *inputData; + + double tmp; + + earMag = (double*)calloc(totalFilters, sizeof(double)); + inputData = (double*)calloc(fftSize, sizeof(double)); + fftMag = (double*)calloc(fftSize, sizeof(double)); + + /* Zero-pad if needed */ + memcpy(inputData, inframe, length*sizeof(double)); + + window->cut(inputData); + + /* Calculate the fft on the input frame */ + FFT::process(fftSize, 0, inputData, imagIn, realOut, imagOut); + + for (i = 0; i < fftSize; ++i) { + fftMag[i] = sqrt(realOut[i] * realOut[i] + + imagOut[i] * imagOut[i]); + } + + /* Multiply by mfccFilterWeights */ + for (i=0;i<totalFilters;i++) { + tmp = 0.; + for(j=0;j<fftSize/2; j++) { + tmp = tmp + (mfccFilterWeights[i][j]*fftMag[j]); + } + if (tmp>0) + earMag[i] = log10(tmp); + else + earMag[i] = 0.0; + } + + /* + * + * Calculate now the cepstral coefficients + * with or without the DC component + * + */ + + if (WANT_C0==1) { + + for (i=0;i<nceps+1;i++) { + tmp = 0.; + for (j=0;j<totalFilters;j++){ + tmp = tmp + mfccDCTMatrix[i][j]*earMag[j]; + } + outceps[i] = tmp; + } + } + else + { + for (i=1;i<nceps+1;i++) { + tmp = 0.; + for (j=0;j<totalFilters;j++){ + tmp = tmp + mfccDCTMatrix[i][j]*earMag[j]; + } + outceps[i-1] = tmp; + } + } + + free(fftMag); + free(earMag); + free(inputData); + + return nceps; +} +
--- a/dsp/mfcc/MFCC.h Thu Jan 10 15:16:08 2008 +0000 +++ b/dsp/mfcc/MFCC.h Thu Jan 10 16:41:33 2008 +0000 @@ -1,51 +1,70 @@ -#ifndef _LIB_MFCC_H -#define _LIB_MFCC_H +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ -#define MFCC 6 +/* + QM DSP Library -typedef struct mfcc_t { - - /* Filter bank parameters */ - double lowestFrequency; - int linearFilters; - double linearSpacing; - int logFilters; - double logSpacing; - - /* FFT length */ - int fftSize; - - /* Analysis window length*/ - int windowSize; - - int totalFilters; - - /* Misc. */ - int samplingRate; - int nceps; - - /* MFCC vector */ - double *ceps; - - double **mfccDCTMatrix; - double **mfccFilterWeights; - - /* The analysis window */ - double *window; - - /* For the FFT */ - double* imagIn; // always zero - double* realOut; - double* imagOut; - - /* Set if user want C0 */ - int WANT_C0; - -} mfcc_t; + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL. + All rights reserved. +*/ -extern mfcc_t* init_mfcc(int fftSize, int nceps , int samplingRate, int WANT_C0); -extern int do_mfcc(mfcc_t* mfcc_p, double* frame, int length); -extern void close_mfcc(mfcc_t* mfcc_p); +#ifndef MFCC_H +#define MFCC_H + +#include "base/Window.h" + +struct MFCCConfig { + int FS; + int fftsize; + int nceps; + bool want_c0; +}; + +class MFCC +{ +public: + MFCC(MFCCConfig config); + virtual ~MFCC(); + + int process(int length, double *inframe, double *outceps); + + int getfftlength() const { return fftSize; } + +private: + /* Filter bank parameters */ + double lowestFrequency; + int linearFilters; + double linearSpacing; + int logFilters; + double logSpacing; + + /* FFT length */ + int fftSize; + + int totalFilters; + + /* Misc. */ + int samplingRate; + int nceps; + + /* MFCC vector */ + double *ceps; + + double **mfccDCTMatrix; + double **mfccFilterWeights; + + /* The analysis window */ + Window<double> *window; + + /* For the FFT */ + double* imagIn; // always zero + double* realOut; + double* imagOut; + + /* Set if user want C0 */ + int WANT_C0; +}; + #endif
--- a/dsp/segmentation/ClusterMeltSegmenter.cpp Thu Jan 10 15:16:08 2008 +0000 +++ b/dsp/segmentation/ClusterMeltSegmenter.cpp Thu Jan 10 16:41:33 2008 +0000 @@ -18,10 +18,12 @@ #include "dsp/transforms/FFT.h" #include "dsp/chromagram/ConstantQ.h" #include "dsp/rateconversion/Decimator.h" +#include "dsp/mfcc/MFCC.h" ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : window(NULL), constq(NULL), + mfcc(NULL), featureType(params.featureType), hopSize(params.hopSize), windowSize(params.windowSize), @@ -33,7 +35,7 @@ nclusters(params.nclusters), histogramLength(params.histogramLength), neighbourhoodLimit(params.neighbourhoodLimit), - decimator(0) + decimator(NULL) { } @@ -41,9 +43,10 @@ { samplerate = fs; - if (featureType != FEATURE_TYPE_UNKNOWN) - { - // always run internal processing at 11025 or thereabouts + if (featureType == FEATURE_TYPE_CONSTQ || + featureType == FEATURE_TYPE_CHROMA) { + + // run internal processing at 11025 or thereabouts int internalRate = 11025; int decimationFactor = samplerate / internalRate; if (decimationFactor < 1) decimationFactor = 1; @@ -68,8 +71,19 @@ constq = new ConstantQ(config); constq->sparsekernel(); + + ncoeff = constq->getK(); + + } else if (featureType == FEATURE_TYPE_MFCC) { - ncoeff = constq->getK(); + MFCCConfig config; + config.FS = samplerate; + config.fftsize = 1024; + config.nceps = 20; + config.want_c0 = false; + + mfcc = new MFCC(config); + ncoeff = config.nceps; } } @@ -83,24 +97,6 @@ int ClusterMeltSegmenter::getWindowsize() { - if (featureType != FEATURE_TYPE_UNKNOWN) { - - if (constq) { -/* - std::cerr << "ClusterMeltSegmenter::getWindowsize: " - << "rate = " << samplerate - << ", dec factor = " << (decimator ? decimator->getFactor() : 1) - << ", fft length = " << constq->getfftlength() - << ", fmin = " << fmin - << ", fmax = " << fmax - << ", nbins = " << nbins - << ", K = " << constq->getK() - << ", Q = " << constq->getQ() - << std::endl; -*/ - } - } - return static_cast<int>(windowSize * samplerate); } @@ -112,9 +108,19 @@ void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples) { + if (featureType == FEATURE_TYPE_CONSTQ || + featureType == FEATURE_TYPE_CHROMA) { + extractFeaturesConstQ(samples, nsamples); + } else if (featureType == FEATURE_TYPE_MFCC) { + extractFeaturesMFCC(samples, nsamples); + } +} + +void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples) +{ if (!constq) { - std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: " - << "Cannot run unknown feature type (or initialise not called)" + std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: " + << "No const-q: initialise not called?" << std::endl; return; } @@ -198,16 +204,77 @@ delete [] frame; for (int i = 0; i < ncoeff; ++i) { -// std::cerr << cq[i] << " "; cq[i] /= frames; } -// std::cerr << std::endl; if (decimator) delete[] psource; features.push_back(cq); } +void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples) +{ + if (!mfcc) { + std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: " + << "No mfcc: initialise not called?" + << std::endl; + return; + } + + if (nsamples < getWindowsize()) { + std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; + return; + } + + int fftsize = mfcc->getfftlength(); + + vector<double> cc(ncoeff); + + for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0; + + const double *psource = samples; + int pcount = nsamples; + + int origin = 0; + int frames = 0; + + double *frame = new double[fftsize]; + double *ccout = new double[ncoeff]; + + while (origin <= pcount) { + + // always need at least one fft window per block, but after + // that we want to avoid having any incomplete ones + if (origin > 0 && origin + fftsize >= pcount) break; + + for (int i = 0; i < fftsize; ++i) { + if (origin + i < pcount) { + frame[i] = psource[origin + i]; + } else { + frame[i] = 0.0; + } + } + + mfcc->process(fftsize, frame, ccout); + + for (int i = 0; i < ncoeff; ++i) { + cc[i] += ccout[i]; + } + ++frames; + + origin += fftsize/2; + } + + delete [] ccout; + delete [] frame; + + for (int i = 0; i < ncoeff; ++i) { + cc[i] /= frames; + } + + features.push_back(cc); +} + void ClusterMeltSegmenter::segment(int m) { nclusters = m; @@ -222,13 +289,12 @@ void ClusterMeltSegmenter::segment() { - if (constq) - { - delete constq; - constq = 0; - delete decimator; - decimator = 0; - } + delete constq; + constq = 0; + delete mfcc; + mfcc = 0; + delete decimator; + decimator = 0; std::cerr << "ClusterMeltSegmenter::segment: have " << features.size() << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl; @@ -250,7 +316,8 @@ q = new int[features.size()]; - if (featureType == FEATURE_TYPE_UNKNOWN) + if (featureType == FEATURE_TYPE_UNKNOWN || + featureType == FEATURE_TYPE_MFCC) cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, nclusters, neighbourhoodLimit); else
--- a/dsp/segmentation/ClusterMeltSegmenter.h Thu Jan 10 15:16:08 2008 +0000 +++ b/dsp/segmentation/ClusterMeltSegmenter.h Thu Jan 10 16:41:33 2008 +0000 @@ -19,6 +19,7 @@ class Decimator; class ConstantQ; +class MFCC; class ClusterMeltSegmenterParams // defaults are sensible for 11025Hz with 0.2 second hopsize @@ -66,8 +67,12 @@ protected: void makeSegmentation(int* q, int len); + void extractFeaturesConstQ(const double *, int); + void extractFeaturesMFCC(const double *, int); + Window<double> *window; - ConstantQ* constq; + ConstantQ* constq; + MFCC* mfcc; model_t* model; // the HMM int* q; // the decoded HMM state sequence vector<vector<double> > histograms;
--- a/dsp/segmentation/segment.h Thu Jan 10 15:16:08 2008 +0000 +++ b/dsp/segmentation/segment.h Thu Jan 10 16:41:33 2008 +0000 @@ -33,7 +33,8 @@ { FEATURE_TYPE_UNKNOWN = 0, FEATURE_TYPE_CONSTQ = 1, - FEATURE_TYPE_CHROMA + FEATURE_TYPE_CHROMA = 2, + FEATURE_TYPE_MFCC = 3 } feature_types; #ifdef __cplusplus
--- a/qm-dsp.pro Thu Jan 10 15:16:08 2008 +0000 +++ b/qm-dsp.pro Thu Jan 10 16:41:33 2008 +0000 @@ -24,6 +24,7 @@ dsp/chromagram/ChromaProcess.h \ dsp/chromagram/ConstantQ.h \ dsp/keydetection/GetKeyMode.h \ + dsp/mfcc/MFCC.h \ dsp/onsets/DetectionFunction.h \ dsp/onsets/PeakPicking.h \ dsp/phasevocoder/PhaseVocoder.h \ @@ -55,6 +56,7 @@ dsp/chromagram/ChromaProcess.cpp \ dsp/chromagram/ConstantQ.cpp \ dsp/keydetection/GetKeyMode.cpp \ + dsp/mfcc/MFCC.cpp \ dsp/onsets/DetectionFunction.cpp \ dsp/onsets/PeakPicking.cpp \ dsp/phasevocoder/PhaseVocoder.cpp \