Mercurial > hg > qm-dsp
changeset 249:18a0dffa5c1a
* Various fixes to segmentation code
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Thu, 10 Jan 2008 15:14:53 +0000 |
parents | fd68f25b9949 |
children | a106e551e9a4 |
files | dsp/chromagram/ConstantQ.cpp dsp/segmentation/ClusterMeltSegmenter.cpp dsp/segmentation/ClusterMeltSegmenter.h dsp/segmentation/Segmenter.h |
diffstat | 4 files changed, 306 insertions(+), 241 deletions(-) [+] |
line wrap: on
line diff
--- a/dsp/chromagram/ConstantQ.cpp Thu Jan 10 15:14:30 2008 +0000 +++ b/dsp/chromagram/ConstantQ.cpp Thu Jan 10 15:14:53 2008 +0000 @@ -170,14 +170,14 @@ m_dQ = 1/(pow(2,(1/(double)m_BPO))-1); // Work out Q value for Filter bank m_uK = (unsigned int) ceil(m_BPO * log(m_FMax/m_FMin)/log(2.0)); // No. of constant Q bins - std::cerr << "ConstantQ::initialise: rate = " << m_FS << ", fmin = " << m_FMin << ", fmax = " << m_FMax << ", bpo = " << m_BPO << ", K = " << m_uK << ", Q = " << m_dQ << std::endl; +// std::cerr << "ConstantQ::initialise: rate = " << m_FS << ", fmin = " << m_FMin << ", fmax = " << m_FMax << ", bpo = " << m_BPO << ", K = " << m_uK << ", Q = " << m_dQ << std::endl; // work out length of fft required for this constant Q Filter bank m_FFTLength = (int) pow(2, nextpow2(ceil( m_dQ*m_FS/m_FMin ))); m_hop = m_FFTLength/8; // <------ hop size is window length divided by 32 - std::cerr << "ConstantQ::initialise: -> fft length = " << m_FFTLength << ", hop = " << m_hop << std::endl; +// std::cerr << "ConstantQ::initialise: -> fft length = " << m_FFTLength << ", hop = " << m_hop << std::endl; // allocate memory for cqdata m_CQdata = new double [2*m_uK]; @@ -208,7 +208,7 @@ const unsigned col = fftbin[i]; const double & r1 = real[i]; const double & i1 = imag[i]; - const double & r2 = FFTRe[ m_FFTLength- col]; + const double & r2 = FFTRe[ m_FFTLength - col]; const double & i2 = FFTIm[ m_FFTLength - col]; // add the multiplication CQRe[ row ] += (r1*r2 - i1*i2);
--- a/dsp/segmentation/ClusterMeltSegmenter.cpp Thu Jan 10 15:14:30 2008 +0000 +++ b/dsp/segmentation/ClusterMeltSegmenter.cpp Thu Jan 10 15:14:53 2008 +0000 @@ -1,10 +1,11 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + /* - * ClusterMeltSegmenter.cpp - * soundbite + * ClusterMeltSegmenter.cpp * - * Created by Mark Levy on 23/03/2006. - * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. - * + * Created by Mark Levy on 23/03/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. + * All rights reserved. */ #include <cfloat> @@ -15,230 +16,281 @@ #include "segment.h" #include "dsp/transforms/FFT.h" +#include "dsp/chromagram/ConstantQ.h" +#include "dsp/rateconversion/Decimator.h" -ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : window(NULL), -constq(NULL), -featureType(params.featureType), -hopSize(params.hopSize), -windowSize(params.windowSize), -fmin(params.fmin), -fmax(params.fmax), -nbins(params.nbins), -ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c -nHMMStates(params.nHMMStates), -nclusters(params.nclusters), -histogramLength(params.histogramLength), -neighbourhoodLimit(params.neighbourhoodLimit) +ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : + window(NULL), + constq(NULL), + featureType(params.featureType), + hopSize(params.hopSize), + windowSize(params.windowSize), + fmin(params.fmin), + fmax(params.fmax), + nbins(params.nbins), + ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c + nHMMStates(params.nHMMStates), + nclusters(params.nclusters), + histogramLength(params.histogramLength), + neighbourhoodLimit(params.neighbourhoodLimit), + decimator(0) { } void ClusterMeltSegmenter::initialise(int fs) { - samplerate = fs; - if (featureType != FEATURE_TYPE_UNKNOWN) - { -//!!! ncoeff = static_cast<int>(ceil(nbins * (log(fmax / static_cast<double>(fmin))) / log(2.0))); - CQConfig config; - config.FS = samplerate; - config.min = fmin; - config.max = fmax; - config.BPO = nbins; - config.CQThresh = 0.0054; - constq = new ConstantQ(config); -//!!! constq = init_constQ(fmin, fmax, nbins, samplerate, ncoeff); - ncoeff = constq->getK(); - } + samplerate = fs; + + if (featureType != FEATURE_TYPE_UNKNOWN) + { + // always run internal processing at 11025 or thereabouts + int internalRate = 11025; + int decimationFactor = samplerate / internalRate; + if (decimationFactor < 1) decimationFactor = 1; + + // must be a power of two + while (decimationFactor & (decimationFactor - 1)) ++decimationFactor; + + if (decimationFactor > Decimator::getHighestSupportedFactor()) { + decimationFactor = Decimator::getHighestSupportedFactor(); + } + + if (decimationFactor > 1) { + decimator = new Decimator(getWindowsize(), decimationFactor); + } + + CQConfig config; + config.FS = samplerate / decimationFactor; + config.min = fmin; + config.max = fmax; + config.BPO = nbins; + config.CQThresh = 0.0054; + + constq = new ConstantQ(config); + constq->sparsekernel(); + + ncoeff = constq->getK(); + } } ClusterMeltSegmenter::~ClusterMeltSegmenter() { - delete window; - delete constq; -//!!! if (constq) -// close_constQ(constq); + delete window; + delete constq; + delete decimator; } int ClusterMeltSegmenter::getWindowsize() { - if (featureType != FEATURE_TYPE_UNKNOWN) { - std::cerr << "rate = " << samplerate << ", fft length = " << constq->getfftlength() << ", fmin = " << fmin << ", fmax = " << fmax << ", nbins = " << nbins << ", K = " << constq->getK() << ", Q = " << constq->getQ() << std::endl; - return constq->getfftlength(); - } else { - return static_cast<int>(windowSize * samplerate); - } + if (featureType != FEATURE_TYPE_UNKNOWN) { + + if (constq) { +/* + std::cerr << "ClusterMeltSegmenter::getWindowsize: " + << "rate = " << samplerate + << ", dec factor = " << (decimator ? decimator->getFactor() : 1) + << ", fft length = " << constq->getfftlength() + << ", fmin = " << fmin + << ", fmax = " << fmax + << ", nbins = " << nbins + << ", K = " << constq->getK() + << ", Q = " << constq->getQ() + << std::endl; +*/ + } + } + + return static_cast<int>(windowSize * samplerate); } int ClusterMeltSegmenter::getHopsize() { - return static_cast<int>(hopSize * samplerate); + return static_cast<int>(hopSize * samplerate); } -void ClusterMeltSegmenter::extractFeatures(double* samples, int nsamples) +void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples) { - // create a new window if needed -/*!!! - if (!window || nsamples != windowLength) - { - if (window) - delete [] window; -// Window<double>(HammingWindow, nsamples).cut -//!!! window = hamming_p(nsamples); - windowLength = nsamples; - } -*/ - if (!window || window->getSize() != nsamples) { - delete window; - window = new Window<double>(HammingWindow, nsamples); - } + if (!constq) { + std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: " + << "Cannot run unknown feature type (or initialise not called)" + << std::endl; + return; + } - // copy the samples before windowing in case we need them for something else - double* frame = new double[nsamples]; -// for (int i = 0; i < nsamples; i++) -// frame[i] = samples[i] * window[i]; - window->cut(frame); + if (nsamples < getWindowsize()) { + std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; + return; + } + + int fftsize = constq->getfftlength(); + + if (!window || window->getSize() != fftsize) { + delete window; + window = new Window<double>(HammingWindow, fftsize); + } + + vector<double> cq(ncoeff); + + for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0; + + const double *psource = samples; + int pcount = nsamples; + + if (decimator) { + pcount = nsamples / decimator->getFactor(); + double *decout = new double[pcount]; + decimator->process(samples, decout); + psource = decout; + } + + int origin = 0; + +// std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl; + + int frames = 0; + + double *frame = new double[fftsize]; + double *real = new double[fftsize]; + double *imag = new double[fftsize]; + double *cqre = new double[ncoeff]; + double *cqim = new double[ncoeff]; + + while (origin <= pcount) { + + // always need at least one fft window per block, but after + // that we want to avoid having any incomplete ones + if (origin > 0 && origin + fftsize >= pcount) break; + + for (int i = 0; i < fftsize; ++i) { + if (origin + i < pcount) { + frame[i] = psource[origin + i]; + } else { + frame[i] = 0.0; + } + } + + for (int i = 0; i < fftsize/2; ++i) { + double value = frame[i]; + frame[i] = frame[i + fftsize/2]; + frame[i + fftsize/2] = value; + } + + window->cut(frame); + + FFT::process(fftsize, false, frame, 0, real, imag); + + constq->process(real, imag, cqre, cqim); - std::cerr << "nsamples = " << nsamples << std::endl; + for (int i = 0; i < ncoeff; ++i) { + cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]); + } + ++frames; - double *real = new double[nsamples]; - double *imag = new double[nsamples]; + origin += fftsize/2; + } - FFT::process(nsamples, false, frame, 0, real, imag); + delete [] cqre; + delete [] cqim; + delete [] real; + delete [] imag; + delete [] frame; - double *cqre = new double[ncoeff]; - double *cqim = new double[ncoeff]; + for (int i = 0; i < ncoeff; ++i) { +// std::cerr << cq[i] << " "; + cq[i] /= frames; + } +// std::cerr << std::endl; - constq->process(real, imag, cqre, cqim); + if (decimator) delete[] psource; - // extract const-Q -//!!! do_constQ(constq, frame, nsamples); -// int ncq = constq->ncoeff; - - delete [] frame; - delete [] real; - delete [] imag; - -//!!! if (ncq == ncoeff) // else feature extraction failed -// { -// vector<double> cq(ncq); -// for (int i = 0; i < ncq; i++) -// cq[i] = constq->absconstQtransform[i]; - vector<double> cq(ncoeff); - for (int i = 0; i < ncoeff; ++i) { - cq[i] = sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]); - } - features.push_back(cq); -// } - - delete[] cqre; - delete[] cqim; + features.push_back(cq); } void ClusterMeltSegmenter::segment(int m) { - nclusters = m; - segment(); + nclusters = m; + segment(); } void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f) { - features = f; - featureType = FEATURE_TYPE_UNKNOWN; + features = f; + featureType = FEATURE_TYPE_UNKNOWN; } void ClusterMeltSegmenter::segment() { - if (constq) - { -//!!! close_constQ(constq); // finished extracting features - delete constq; - constq = NULL; - } + if (constq) + { + delete constq; + constq = 0; + delete decimator; + decimator = 0; + } - // for now copy the features to a native array and use the existing C segmenter... - double** arrFeatures = new double*[features.size()]; - for (int i = 0; i < features.size(); i++) - { - if (featureType == FEATURE_TYPE_UNKNOWN) - arrFeatures[i] = new double[features[0].size()]; - else - arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope - for (int j = 0; j < ncoeff; j++) - arrFeatures[i][j] = features[i][j]; - } + std::cerr << "ClusterMeltSegmenter::segment: have " << features.size() + << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl; + + // copy the features to a native array and use the existing C segmenter... + double** arrFeatures = new double*[features.size()]; + for (int i = 0; i < features.size(); i++) + { + if (featureType == FEATURE_TYPE_UNKNOWN) { + arrFeatures[i] = new double[features[0].size()]; + for (int j = 0; j < features[0].size(); j++) + arrFeatures[i][j] = features[i][j]; + } else { + arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope + for (int j = 0; j < ncoeff; j++) + arrFeatures[i][j] = features[i][j]; + } + } - q = new int[features.size()]; + q = new int[features.size()]; - if (featureType == FEATURE_TYPE_UNKNOWN) - cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, - nclusters, neighbourhoodLimit); - else - constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, - nHMMStates, histogramLength, nclusters, neighbourhoodLimit); + if (featureType == FEATURE_TYPE_UNKNOWN) + cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, + nclusters, neighbourhoodLimit); + else + constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, + nHMMStates, histogramLength, nclusters, neighbourhoodLimit); - // convert the cluster assignment sequence to a segmentation - makeSegmentation(q, features.size()); + // convert the cluster assignment sequence to a segmentation + makeSegmentation(q, features.size()); - // de-allocate arrays - delete [] q; - for (int i = 0; i < features.size(); i++) - delete [] arrFeatures[i]; - delete [] arrFeatures; + // de-allocate arrays + delete [] q; + for (int i = 0; i < features.size(); i++) + delete [] arrFeatures[i]; + delete [] arrFeatures; - // clear the features - clear(); + // clear the features + clear(); } void ClusterMeltSegmenter::makeSegmentation(int* q, int len) { - segmentation.segments.clear(); - segmentation.nsegtypes = nclusters; - segmentation.samplerate = samplerate; + segmentation.segments.clear(); + segmentation.nsegtypes = nclusters; + segmentation.samplerate = samplerate; - Segment segment; - segment.start = 0; - segment.type = q[0]; + Segment segment; + segment.start = 0; + segment.type = q[0]; - for (int i = 1; i < len; i++) - { - if (q[i] != q[i-1]) - { - segment.end = i * getHopsize(); - segmentation.segments.push_back(segment); - segment.type = q[i]; - segment.start = segment.end; - } - } - segment.end = len * getHopsize(); - segmentation.segments.push_back(segment); + for (int i = 1; i < len; i++) + { + if (q[i] != q[i-1]) + { + segment.end = i * getHopsize(); + segmentation.segments.push_back(segment); + segment.type = q[i]; + segment.start = segment.end; + } + } + segment.end = len * getHopsize(); + segmentation.segments.push_back(segment); } -/* -void ClusterMeltSegmenter::mpeg7ConstQ() -{ - // convert to dB scale - for (int i = 0; i < features.size(); i++) - for (int j = 0; j < ncoeff; j++) - features[i][j] = 10.0 * log10(features[i][j] + DBL_EPSILON); - - // normalise features and add the norm at the end as an extra feature dimension - double maxnorm = 0; // track the max of the norms - for (int i = 0; i < features.size(); i++) - { - double norm = 0; - for (int j = 0; j < ncoeff; j++) - norm += features[i][j] * features[i][j]; - norm = sqrt(norm); - for (int j = 0; j < ncoeff; j++) - features[i][j] /= norm; - features[i].push_back(norm); - if (norm > maxnorm) - maxnorm = norm; - } - - // normalise the norms - for (int i = 0; i < features.size(); i++) - features[i][ncoeff] /= maxnorm; -} -*/
--- a/dsp/segmentation/ClusterMeltSegmenter.h Thu Jan 10 15:14:30 2008 +0000 +++ b/dsp/segmentation/ClusterMeltSegmenter.h Thu Jan 10 15:14:53 2008 +0000 @@ -1,10 +1,11 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + /* - * ClusterMeltSegmenter.h - * soundbite + * ClusterMeltSegmenter.h * - * Created by Mark Levy on 23/03/2006. - * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. - * + * Created by Mark Levy on 23/03/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. + * All rights reserved. */ #include <vector> @@ -13,72 +14,84 @@ #include "Segmenter.h" #include "hmm/hmm.h" #include "base/Window.h" -#include "dsp/chromagram/ConstantQ.h" using std::vector; -class ClusterMeltSegmenterParams // defaults are sensible for 11025Hz with 0.2 second hopsize +class Decimator; +class ConstantQ; + +class ClusterMeltSegmenterParams +// defaults are sensible for 11025Hz with 0.2 second hopsize { public: - ClusterMeltSegmenterParams() : featureType(FEATURE_TYPE_CONSTQ), hopSize(0.2), windowSize(0.6), fmin(62), fmax(16000), - nbins(8), ncomponents(20), nHMMStates(40), nclusters(10), histogramLength(15), neighbourhoodLimit(20) { } - feature_types featureType; - double hopSize; // in secs - double windowSize; // in secs - int fmin; - int fmax; - int nbins; - int ncomponents; - int nHMMStates; - int nclusters; - int histogramLength; - int neighbourhoodLimit; + ClusterMeltSegmenterParams() : + featureType(FEATURE_TYPE_CONSTQ), + hopSize(0.2), + windowSize(0.6), + fmin(62), + fmax(16000), + nbins(8), + ncomponents(20), + nHMMStates(40), + nclusters(10), + histogramLength(15), + neighbourhoodLimit(20) { } + feature_types featureType; + double hopSize; // in secs + double windowSize; // in secs + int fmin; + int fmax; + int nbins; + int ncomponents; + int nHMMStates; + int nclusters; + int histogramLength; + int neighbourhoodLimit; }; class ClusterMeltSegmenter : public Segmenter { public: - ClusterMeltSegmenter(ClusterMeltSegmenterParams params); - virtual ~ClusterMeltSegmenter(); - virtual void initialise(int samplerate); - virtual int getWindowsize(); - virtual int getHopsize(); - virtual void extractFeatures(double* samples, int nsamples); - void setFeatures(const vector<vector<double> >& f); // provide the features yourself - virtual void segment(); // segment into default number of segment-types - void segment(int m); // segment into m segment-types - int getNSegmentTypes() { return nclusters; } + ClusterMeltSegmenter(ClusterMeltSegmenterParams params); + virtual ~ClusterMeltSegmenter(); + virtual void initialise(int samplerate); + virtual int getWindowsize(); + virtual int getHopsize(); + virtual void extractFeatures(const double* samples, int nsamples); + void setFeatures(const vector<vector<double> >& f); // provide the features yourself + virtual void segment(); // segment into default number of segment-types + void segment(int m); // segment into m segment-types + int getNSegmentTypes() { return nclusters; } + protected: - //void mpeg7ConstQ(); - void makeSegmentation(int* q, int len); + void makeSegmentation(int* q, int len); - Window<double> *window; -// int windowLength; // in samples - ConstantQ* constq; - model_t* model; // the HMM - //vector<int> stateSequence; - //vector<int> segmentTypeSequence; - int* q; // the decoded HMM state sequence - vector<vector<double> > histograms; - - feature_types featureType; - double hopSize; // in seconds - double windowSize; // in seconds - - // constant-Q parameters - int fmin; - int fmax; - int nbins; - int ncoeff; - - // PCA parameters - int ncomponents; - - // HMM parameters - int nHMMStates; - - // clustering parameters - int nclusters; - int histogramLength; - int neighbourhoodLimit; + Window<double> *window; + ConstantQ* constq; + model_t* model; // the HMM + int* q; // the decoded HMM state sequence + vector<vector<double> > histograms; + + feature_types featureType; + double hopSize; // in seconds + double windowSize; // in seconds + + // constant-Q parameters + int fmin; + int fmax; + int nbins; + int ncoeff; + + // PCA parameters + int ncomponents; + + // HMM parameters + int nHMMStates; + + // clustering parameters + int nclusters; + int histogramLength; + int neighbourhoodLimit; + + Decimator *decimator; };
--- a/dsp/segmentation/Segmenter.h Thu Jan 10 15:14:30 2008 +0000 +++ b/dsp/segmentation/Segmenter.h Thu Jan 10 15:14:53 2008 +0000 @@ -42,7 +42,7 @@ virtual void initialise(int samplerate) = 0; // must be called before any other methods virtual int getWindowsize() = 0; // required window size for calls to extractFeatures() virtual int getHopsize() = 0; // required hop size for calls to extractFeatures() - virtual void extractFeatures(double* samples, int nsamples) = 0; + virtual void extractFeatures(const double* samples, int nsamples) = 0; virtual void segment() = 0; // call once all the features have been extracted virtual void segment(int m) = 0; // specify desired number of segment-types virtual void clear() { features.clear(); }