c@249: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ c@249: c@243: /* c@249: * ClusterMeltSegmenter.cpp c@243: * c@249: * Created by Mark Levy on 23/03/2006. c@249: * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. c@249: * All rights reserved. c@243: */ c@243: c@243: #include c@243: #include c@243: c@243: #include "ClusterMeltSegmenter.h" c@243: #include "cluster_segmenter.h" c@243: #include "segment.h" c@243: c@245: #include "dsp/transforms/FFT.h" c@249: #include "dsp/chromagram/ConstantQ.h" c@249: #include "dsp/rateconversion/Decimator.h" c@251: #include "dsp/mfcc/MFCC.h" c@245: c@249: ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : c@249: window(NULL), c@249: constq(NULL), c@251: mfcc(NULL), c@249: featureType(params.featureType), c@249: hopSize(params.hopSize), c@249: windowSize(params.windowSize), c@249: fmin(params.fmin), c@249: fmax(params.fmax), c@249: nbins(params.nbins), c@249: ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c c@249: nHMMStates(params.nHMMStates), c@249: nclusters(params.nclusters), c@249: histogramLength(params.histogramLength), c@249: neighbourhoodLimit(params.neighbourhoodLimit), c@251: decimator(NULL) c@243: { c@243: } c@243: c@243: void ClusterMeltSegmenter::initialise(int fs) c@243: { c@249: samplerate = fs; c@249: c@251: if (featureType == FEATURE_TYPE_CONSTQ || c@251: featureType == FEATURE_TYPE_CHROMA) { c@251: c@251: // run internal processing at 11025 or thereabouts c@249: int internalRate = 11025; c@249: int decimationFactor = samplerate / internalRate; c@249: if (decimationFactor < 1) decimationFactor = 1; c@249: c@249: // must be a power of two c@249: while (decimationFactor & (decimationFactor - 1)) ++decimationFactor; c@249: c@249: if (decimationFactor > Decimator::getHighestSupportedFactor()) { c@249: decimationFactor = Decimator::getHighestSupportedFactor(); c@249: } c@249: c@249: if (decimationFactor > 1) { c@249: decimator = new Decimator(getWindowsize(), decimationFactor); c@249: } c@249: c@249: CQConfig config; c@249: config.FS = samplerate / decimationFactor; c@249: config.min = fmin; c@249: config.max = fmax; c@249: config.BPO = nbins; c@249: config.CQThresh = 0.0054; c@249: c@249: constq = new ConstantQ(config); c@249: constq->sparsekernel(); c@251: c@251: ncoeff = constq->getK(); c@251: c@251: } else if (featureType == FEATURE_TYPE_MFCC) { c@249: c@252: // run internal processing at 22050 or thereabouts c@252: int internalRate = 22050; c@252: int decimationFactor = samplerate / internalRate; c@252: if (decimationFactor < 1) decimationFactor = 1; c@252: c@252: // must be a power of two c@252: while (decimationFactor & (decimationFactor - 1)) ++decimationFactor; c@252: c@252: if (decimationFactor > Decimator::getHighestSupportedFactor()) { c@252: decimationFactor = Decimator::getHighestSupportedFactor(); c@252: } c@252: c@252: if (decimationFactor > 1) { c@252: decimator = new Decimator(getWindowsize(), decimationFactor); c@252: } c@252: c@251: MFCCConfig config; c@252: config.FS = samplerate / decimationFactor; c@252: config.fftsize = 2048; c@252: config.nceps = 19; c@252: config.want_c0 = true; c@251: c@251: mfcc = new MFCC(config); c@252: ncoeff = config.nceps + 1; c@249: } c@243: } c@243: c@243: ClusterMeltSegmenter::~ClusterMeltSegmenter() c@243: { c@249: delete window; c@249: delete constq; c@249: delete decimator; c@245: } c@245: c@245: int c@245: ClusterMeltSegmenter::getWindowsize() c@245: { c@249: return static_cast(windowSize * samplerate); c@245: } c@245: c@245: int c@245: ClusterMeltSegmenter::getHopsize() c@245: { c@249: return static_cast(hopSize * samplerate); c@243: } c@243: c@249: void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples) c@243: { c@251: if (featureType == FEATURE_TYPE_CONSTQ || c@251: featureType == FEATURE_TYPE_CHROMA) { c@251: extractFeaturesConstQ(samples, nsamples); c@251: } else if (featureType == FEATURE_TYPE_MFCC) { c@251: extractFeaturesMFCC(samples, nsamples); c@251: } c@251: } c@251: c@251: void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples) c@251: { c@249: if (!constq) { c@251: std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: " c@251: << "No const-q: initialise not called?" c@249: << std::endl; c@249: return; c@249: } c@245: c@249: if (nsamples < getWindowsize()) { c@249: std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; c@249: return; c@249: } c@249: c@249: int fftsize = constq->getfftlength(); c@249: c@249: if (!window || window->getSize() != fftsize) { c@249: delete window; c@249: window = new Window(HammingWindow, fftsize); c@249: } c@249: c@249: vector cq(ncoeff); c@249: c@249: for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0; c@249: c@249: const double *psource = samples; c@249: int pcount = nsamples; c@249: c@249: if (decimator) { c@249: pcount = nsamples / decimator->getFactor(); c@249: double *decout = new double[pcount]; c@249: decimator->process(samples, decout); c@249: psource = decout; c@249: } c@249: c@249: int origin = 0; c@249: c@249: // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl; c@249: c@249: int frames = 0; c@249: c@249: double *frame = new double[fftsize]; c@249: double *real = new double[fftsize]; c@249: double *imag = new double[fftsize]; c@249: double *cqre = new double[ncoeff]; c@249: double *cqim = new double[ncoeff]; c@249: c@249: while (origin <= pcount) { c@249: c@249: // always need at least one fft window per block, but after c@249: // that we want to avoid having any incomplete ones c@249: if (origin > 0 && origin + fftsize >= pcount) break; c@249: c@249: for (int i = 0; i < fftsize; ++i) { c@249: if (origin + i < pcount) { c@249: frame[i] = psource[origin + i]; c@249: } else { c@249: frame[i] = 0.0; c@249: } c@249: } c@249: c@249: for (int i = 0; i < fftsize/2; ++i) { c@249: double value = frame[i]; c@249: frame[i] = frame[i + fftsize/2]; c@249: frame[i + fftsize/2] = value; c@249: } c@249: c@249: window->cut(frame); c@249: c@249: FFT::process(fftsize, false, frame, 0, real, imag); c@249: c@249: constq->process(real, imag, cqre, cqim); c@243: c@249: for (int i = 0; i < ncoeff; ++i) { c@249: cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]); c@249: } c@249: ++frames; c@245: c@249: origin += fftsize/2; c@249: } c@245: c@249: delete [] cqre; c@249: delete [] cqim; c@249: delete [] real; c@249: delete [] imag; c@249: delete [] frame; c@245: c@249: for (int i = 0; i < ncoeff; ++i) { c@249: cq[i] /= frames; c@249: } c@245: c@249: if (decimator) delete[] psource; c@245: c@249: features.push_back(cq); c@243: } c@243: c@251: void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples) c@251: { c@251: if (!mfcc) { c@251: std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: " c@251: << "No mfcc: initialise not called?" c@251: << std::endl; c@251: return; c@251: } c@251: c@251: if (nsamples < getWindowsize()) { c@251: std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; c@251: return; c@251: } c@251: c@251: int fftsize = mfcc->getfftlength(); c@251: c@251: vector cc(ncoeff); c@251: c@251: for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0; c@251: c@251: const double *psource = samples; c@251: int pcount = nsamples; c@251: c@252: if (decimator) { c@252: pcount = nsamples / decimator->getFactor(); c@252: double *decout = new double[pcount]; c@252: decimator->process(samples, decout); c@252: psource = decout; c@252: } c@252: c@251: int origin = 0; c@251: int frames = 0; c@251: c@251: double *frame = new double[fftsize]; c@251: double *ccout = new double[ncoeff]; c@251: c@251: while (origin <= pcount) { c@251: c@251: // always need at least one fft window per block, but after c@251: // that we want to avoid having any incomplete ones c@251: if (origin > 0 && origin + fftsize >= pcount) break; c@251: c@251: for (int i = 0; i < fftsize; ++i) { c@251: if (origin + i < pcount) { c@251: frame[i] = psource[origin + i]; c@251: } else { c@251: frame[i] = 0.0; c@251: } c@251: } c@251: c@251: mfcc->process(fftsize, frame, ccout); c@251: c@251: for (int i = 0; i < ncoeff; ++i) { c@251: cc[i] += ccout[i]; c@251: } c@251: ++frames; c@251: c@251: origin += fftsize/2; c@251: } c@251: c@251: delete [] ccout; c@251: delete [] frame; c@251: c@251: for (int i = 0; i < ncoeff; ++i) { c@251: cc[i] /= frames; c@251: } c@251: c@252: if (decimator) delete[] psource; c@252: c@251: features.push_back(cc); c@251: } c@251: c@243: void ClusterMeltSegmenter::segment(int m) c@243: { c@249: nclusters = m; c@249: segment(); c@243: } c@243: c@243: void ClusterMeltSegmenter::setFeatures(const vector >& f) c@243: { c@249: features = f; c@249: featureType = FEATURE_TYPE_UNKNOWN; c@243: } c@243: c@243: void ClusterMeltSegmenter::segment() c@243: { c@251: delete constq; c@251: constq = 0; c@251: delete mfcc; c@251: mfcc = 0; c@251: delete decimator; c@251: decimator = 0; c@243: c@249: std::cerr << "ClusterMeltSegmenter::segment: have " << features.size() c@249: << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl; c@249: c@249: // copy the features to a native array and use the existing C segmenter... c@249: double** arrFeatures = new double*[features.size()]; c@249: for (int i = 0; i < features.size(); i++) c@249: { c@249: if (featureType == FEATURE_TYPE_UNKNOWN) { c@249: arrFeatures[i] = new double[features[0].size()]; c@249: for (int j = 0; j < features[0].size(); j++) c@249: arrFeatures[i][j] = features[i][j]; c@249: } else { c@249: arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope c@249: for (int j = 0; j < ncoeff; j++) c@249: arrFeatures[i][j] = features[i][j]; c@249: } c@249: } c@243: c@249: q = new int[features.size()]; c@243: c@251: if (featureType == FEATURE_TYPE_UNKNOWN || c@251: featureType == FEATURE_TYPE_MFCC) c@249: cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, c@249: nclusters, neighbourhoodLimit); c@249: else c@249: constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, c@249: nHMMStates, histogramLength, nclusters, neighbourhoodLimit); c@243: c@249: // convert the cluster assignment sequence to a segmentation c@249: makeSegmentation(q, features.size()); c@243: c@249: // de-allocate arrays c@249: delete [] q; c@249: for (int i = 0; i < features.size(); i++) c@249: delete [] arrFeatures[i]; c@249: delete [] arrFeatures; c@243: c@249: // clear the features c@249: clear(); c@243: } c@243: c@243: void ClusterMeltSegmenter::makeSegmentation(int* q, int len) c@243: { c@249: segmentation.segments.clear(); c@249: segmentation.nsegtypes = nclusters; c@249: segmentation.samplerate = samplerate; c@243: c@249: Segment segment; c@249: segment.start = 0; c@249: segment.type = q[0]; c@243: c@249: for (int i = 1; i < len; i++) c@249: { c@249: if (q[i] != q[i-1]) c@249: { c@249: segment.end = i * getHopsize(); c@249: segmentation.segments.push_back(segment); c@249: segment.type = q[i]; c@249: segment.start = segment.end; c@249: } c@249: } c@249: segment.end = len * getHopsize(); c@249: segmentation.segments.push_back(segment); c@243: } c@243: