Mercurial > hg > qm-dsp
diff dsp/segmentation/ClusterMeltSegmenter.cpp @ 251:c3600d3cfe5c
* Add timbral (MFCC) feature option to segmenter
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Thu, 10 Jan 2008 16:41:33 +0000 |
parents | 18a0dffa5c1a |
children | b678e72323df |
line wrap: on
line diff
--- a/dsp/segmentation/ClusterMeltSegmenter.cpp Thu Jan 10 15:16:08 2008 +0000 +++ b/dsp/segmentation/ClusterMeltSegmenter.cpp Thu Jan 10 16:41:33 2008 +0000 @@ -18,10 +18,12 @@ #include "dsp/transforms/FFT.h" #include "dsp/chromagram/ConstantQ.h" #include "dsp/rateconversion/Decimator.h" +#include "dsp/mfcc/MFCC.h" ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : window(NULL), constq(NULL), + mfcc(NULL), featureType(params.featureType), hopSize(params.hopSize), windowSize(params.windowSize), @@ -33,7 +35,7 @@ nclusters(params.nclusters), histogramLength(params.histogramLength), neighbourhoodLimit(params.neighbourhoodLimit), - decimator(0) + decimator(NULL) { } @@ -41,9 +43,10 @@ { samplerate = fs; - if (featureType != FEATURE_TYPE_UNKNOWN) - { - // always run internal processing at 11025 or thereabouts + if (featureType == FEATURE_TYPE_CONSTQ || + featureType == FEATURE_TYPE_CHROMA) { + + // run internal processing at 11025 or thereabouts int internalRate = 11025; int decimationFactor = samplerate / internalRate; if (decimationFactor < 1) decimationFactor = 1; @@ -68,8 +71,19 @@ constq = new ConstantQ(config); constq->sparsekernel(); + + ncoeff = constq->getK(); + + } else if (featureType == FEATURE_TYPE_MFCC) { - ncoeff = constq->getK(); + MFCCConfig config; + config.FS = samplerate; + config.fftsize = 1024; + config.nceps = 20; + config.want_c0 = false; + + mfcc = new MFCC(config); + ncoeff = config.nceps; } } @@ -83,24 +97,6 @@ int ClusterMeltSegmenter::getWindowsize() { - if (featureType != FEATURE_TYPE_UNKNOWN) { - - if (constq) { -/* - std::cerr << "ClusterMeltSegmenter::getWindowsize: " - << "rate = " << samplerate - << ", dec factor = " << (decimator ? decimator->getFactor() : 1) - << ", fft length = " << constq->getfftlength() - << ", fmin = " << fmin - << ", fmax = " << fmax - << ", nbins = " << nbins - << ", K = " << constq->getK() - << ", Q = " << constq->getQ() - << std::endl; -*/ - } - } - return static_cast<int>(windowSize * samplerate); } @@ -112,9 +108,19 @@ void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples) { + if (featureType == FEATURE_TYPE_CONSTQ || + featureType == FEATURE_TYPE_CHROMA) { + extractFeaturesConstQ(samples, nsamples); + } else if (featureType == FEATURE_TYPE_MFCC) { + extractFeaturesMFCC(samples, nsamples); + } +} + +void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples) +{ if (!constq) { - std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: " - << "Cannot run unknown feature type (or initialise not called)" + std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: " + << "No const-q: initialise not called?" << std::endl; return; } @@ -198,16 +204,77 @@ delete [] frame; for (int i = 0; i < ncoeff; ++i) { -// std::cerr << cq[i] << " "; cq[i] /= frames; } -// std::cerr << std::endl; if (decimator) delete[] psource; features.push_back(cq); } +void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples) +{ + if (!mfcc) { + std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: " + << "No mfcc: initialise not called?" + << std::endl; + return; + } + + if (nsamples < getWindowsize()) { + std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; + return; + } + + int fftsize = mfcc->getfftlength(); + + vector<double> cc(ncoeff); + + for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0; + + const double *psource = samples; + int pcount = nsamples; + + int origin = 0; + int frames = 0; + + double *frame = new double[fftsize]; + double *ccout = new double[ncoeff]; + + while (origin <= pcount) { + + // always need at least one fft window per block, but after + // that we want to avoid having any incomplete ones + if (origin > 0 && origin + fftsize >= pcount) break; + + for (int i = 0; i < fftsize; ++i) { + if (origin + i < pcount) { + frame[i] = psource[origin + i]; + } else { + frame[i] = 0.0; + } + } + + mfcc->process(fftsize, frame, ccout); + + for (int i = 0; i < ncoeff; ++i) { + cc[i] += ccout[i]; + } + ++frames; + + origin += fftsize/2; + } + + delete [] ccout; + delete [] frame; + + for (int i = 0; i < ncoeff; ++i) { + cc[i] /= frames; + } + + features.push_back(cc); +} + void ClusterMeltSegmenter::segment(int m) { nclusters = m; @@ -222,13 +289,12 @@ void ClusterMeltSegmenter::segment() { - if (constq) - { - delete constq; - constq = 0; - delete decimator; - decimator = 0; - } + delete constq; + constq = 0; + delete mfcc; + mfcc = 0; + delete decimator; + decimator = 0; std::cerr << "ClusterMeltSegmenter::segment: have " << features.size() << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl; @@ -250,7 +316,8 @@ q = new int[features.size()]; - if (featureType == FEATURE_TYPE_UNKNOWN) + if (featureType == FEATURE_TYPE_UNKNOWN || + featureType == FEATURE_TYPE_MFCC) cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, nclusters, neighbourhoodLimit); else