cannam@24: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ cannam@24: cannam@18: /* cannam@24: * ClusterMeltSegmenter.cpp cannam@18: * cannam@24: * Created by Mark Levy on 23/03/2006. cannam@24: * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. cannam@24: * All rights reserved. cannam@18: */ cannam@18: cannam@18: #include cannam@18: #include cannam@18: cannam@18: #include "ClusterMeltSegmenter.h" cannam@18: #include "cluster_segmenter.h" cannam@18: #include "segment.h" cannam@18: cannam@20: #include "dsp/transforms/FFT.h" cannam@24: #include "dsp/chromagram/ConstantQ.h" cannam@24: #include "dsp/rateconversion/Decimator.h" cannam@20: cannam@24: ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : cannam@24: window(NULL), cannam@24: constq(NULL), cannam@24: featureType(params.featureType), cannam@24: hopSize(params.hopSize), cannam@24: windowSize(params.windowSize), cannam@24: fmin(params.fmin), cannam@24: fmax(params.fmax), cannam@24: nbins(params.nbins), cannam@24: ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c cannam@24: nHMMStates(params.nHMMStates), cannam@24: nclusters(params.nclusters), cannam@24: histogramLength(params.histogramLength), cannam@24: neighbourhoodLimit(params.neighbourhoodLimit), cannam@24: decimator(0) cannam@18: { cannam@18: } cannam@18: cannam@18: void ClusterMeltSegmenter::initialise(int fs) cannam@18: { cannam@24: samplerate = fs; cannam@24: cannam@24: if (featureType != FEATURE_TYPE_UNKNOWN) cannam@24: { cannam@24: // always run internal processing at 11025 or thereabouts cannam@24: int internalRate = 11025; cannam@24: int decimationFactor = samplerate / internalRate; cannam@24: if (decimationFactor < 1) decimationFactor = 1; cannam@24: cannam@24: // must be a power of two cannam@24: while (decimationFactor & (decimationFactor - 1)) ++decimationFactor; cannam@24: cannam@24: if (decimationFactor > Decimator::getHighestSupportedFactor()) { cannam@24: decimationFactor = Decimator::getHighestSupportedFactor(); cannam@24: } cannam@24: cannam@24: if (decimationFactor > 1) { cannam@24: decimator = new Decimator(getWindowsize(), decimationFactor); cannam@24: } cannam@24: cannam@24: CQConfig config; cannam@24: config.FS = samplerate / decimationFactor; cannam@24: config.min = fmin; cannam@24: config.max = fmax; cannam@24: config.BPO = nbins; cannam@24: config.CQThresh = 0.0054; cannam@24: cannam@24: constq = new ConstantQ(config); cannam@24: constq->sparsekernel(); cannam@24: cannam@24: ncoeff = constq->getK(); cannam@24: } cannam@18: } cannam@18: cannam@18: ClusterMeltSegmenter::~ClusterMeltSegmenter() cannam@18: { cannam@24: delete window; cannam@24: delete constq; cannam@24: delete decimator; cannam@20: } cannam@20: cannam@20: int cannam@20: ClusterMeltSegmenter::getWindowsize() cannam@20: { cannam@24: if (featureType != FEATURE_TYPE_UNKNOWN) { cannam@24: cannam@24: if (constq) { cannam@24: /* cannam@24: std::cerr << "ClusterMeltSegmenter::getWindowsize: " cannam@24: << "rate = " << samplerate cannam@24: << ", dec factor = " << (decimator ? decimator->getFactor() : 1) cannam@24: << ", fft length = " << constq->getfftlength() cannam@24: << ", fmin = " << fmin cannam@24: << ", fmax = " << fmax cannam@24: << ", nbins = " << nbins cannam@24: << ", K = " << constq->getK() cannam@24: << ", Q = " << constq->getQ() cannam@24: << std::endl; cannam@24: */ cannam@24: } cannam@24: } cannam@24: cannam@24: return static_cast(windowSize * samplerate); cannam@20: } cannam@20: cannam@20: int cannam@20: ClusterMeltSegmenter::getHopsize() cannam@20: { cannam@24: return static_cast(hopSize * samplerate); cannam@18: } cannam@18: cannam@24: void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples) cannam@18: { cannam@24: if (!constq) { cannam@24: std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: " cannam@24: << "Cannot run unknown feature type (or initialise not called)" cannam@24: << std::endl; cannam@24: return; cannam@24: } cannam@20: cannam@24: if (nsamples < getWindowsize()) { cannam@24: std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; cannam@24: return; cannam@24: } cannam@24: cannam@24: int fftsize = constq->getfftlength(); cannam@24: cannam@24: if (!window || window->getSize() != fftsize) { cannam@24: delete window; cannam@24: window = new Window(HammingWindow, fftsize); cannam@24: } cannam@24: cannam@24: vector cq(ncoeff); cannam@24: cannam@24: for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0; cannam@24: cannam@24: const double *psource = samples; cannam@24: int pcount = nsamples; cannam@24: cannam@24: if (decimator) { cannam@24: pcount = nsamples / decimator->getFactor(); cannam@24: double *decout = new double[pcount]; cannam@24: decimator->process(samples, decout); cannam@24: psource = decout; cannam@24: } cannam@24: cannam@24: int origin = 0; cannam@24: cannam@24: // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl; cannam@24: cannam@24: int frames = 0; cannam@24: cannam@24: double *frame = new double[fftsize]; cannam@24: double *real = new double[fftsize]; cannam@24: double *imag = new double[fftsize]; cannam@24: double *cqre = new double[ncoeff]; cannam@24: double *cqim = new double[ncoeff]; cannam@24: cannam@24: while (origin <= pcount) { cannam@24: cannam@24: // always need at least one fft window per block, but after cannam@24: // that we want to avoid having any incomplete ones cannam@24: if (origin > 0 && origin + fftsize >= pcount) break; cannam@24: cannam@24: for (int i = 0; i < fftsize; ++i) { cannam@24: if (origin + i < pcount) { cannam@24: frame[i] = psource[origin + i]; cannam@24: } else { cannam@24: frame[i] = 0.0; cannam@24: } cannam@24: } cannam@24: cannam@24: for (int i = 0; i < fftsize/2; ++i) { cannam@24: double value = frame[i]; cannam@24: frame[i] = frame[i + fftsize/2]; cannam@24: frame[i + fftsize/2] = value; cannam@24: } cannam@24: cannam@24: window->cut(frame); cannam@24: cannam@24: FFT::process(fftsize, false, frame, 0, real, imag); cannam@24: cannam@24: constq->process(real, imag, cqre, cqim); cannam@18: cannam@24: for (int i = 0; i < ncoeff; ++i) { cannam@24: cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]); cannam@24: } cannam@24: ++frames; cannam@20: cannam@24: origin += fftsize/2; cannam@24: } cannam@20: cannam@24: delete [] cqre; cannam@24: delete [] cqim; cannam@24: delete [] real; cannam@24: delete [] imag; cannam@24: delete [] frame; cannam@20: cannam@24: for (int i = 0; i < ncoeff; ++i) { cannam@24: // std::cerr << cq[i] << " "; cannam@24: cq[i] /= frames; cannam@24: } cannam@24: // std::cerr << std::endl; cannam@20: cannam@24: if (decimator) delete[] psource; cannam@20: cannam@24: features.push_back(cq); cannam@18: } cannam@18: cannam@18: void ClusterMeltSegmenter::segment(int m) cannam@18: { cannam@24: nclusters = m; cannam@24: segment(); cannam@18: } cannam@18: cannam@18: void ClusterMeltSegmenter::setFeatures(const vector >& f) cannam@18: { cannam@24: features = f; cannam@24: featureType = FEATURE_TYPE_UNKNOWN; cannam@18: } cannam@18: cannam@18: void ClusterMeltSegmenter::segment() cannam@18: { cannam@24: if (constq) cannam@24: { cannam@24: delete constq; cannam@24: constq = 0; cannam@24: delete decimator; cannam@24: decimator = 0; cannam@24: } cannam@18: cannam@24: std::cerr << "ClusterMeltSegmenter::segment: have " << features.size() cannam@24: << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl; cannam@24: cannam@24: // copy the features to a native array and use the existing C segmenter... cannam@24: double** arrFeatures = new double*[features.size()]; cannam@24: for (int i = 0; i < features.size(); i++) cannam@24: { cannam@24: if (featureType == FEATURE_TYPE_UNKNOWN) { cannam@24: arrFeatures[i] = new double[features[0].size()]; cannam@24: for (int j = 0; j < features[0].size(); j++) cannam@24: arrFeatures[i][j] = features[i][j]; cannam@24: } else { cannam@24: arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope cannam@24: for (int j = 0; j < ncoeff; j++) cannam@24: arrFeatures[i][j] = features[i][j]; cannam@24: } cannam@24: } cannam@18: cannam@24: q = new int[features.size()]; cannam@18: cannam@24: if (featureType == FEATURE_TYPE_UNKNOWN) cannam@24: cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, cannam@24: nclusters, neighbourhoodLimit); cannam@24: else cannam@24: constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, cannam@24: nHMMStates, histogramLength, nclusters, neighbourhoodLimit); cannam@18: cannam@24: // convert the cluster assignment sequence to a segmentation cannam@24: makeSegmentation(q, features.size()); cannam@18: cannam@24: // de-allocate arrays cannam@24: delete [] q; cannam@24: for (int i = 0; i < features.size(); i++) cannam@24: delete [] arrFeatures[i]; cannam@24: delete [] arrFeatures; cannam@18: cannam@24: // clear the features cannam@24: clear(); cannam@18: } cannam@18: cannam@18: void ClusterMeltSegmenter::makeSegmentation(int* q, int len) cannam@18: { cannam@24: segmentation.segments.clear(); cannam@24: segmentation.nsegtypes = nclusters; cannam@24: segmentation.samplerate = samplerate; cannam@18: cannam@24: Segment segment; cannam@24: segment.start = 0; cannam@24: segment.type = q[0]; cannam@18: cannam@24: for (int i = 1; i < len; i++) cannam@24: { cannam@24: if (q[i] != q[i-1]) cannam@24: { cannam@24: segment.end = i * getHopsize(); cannam@24: segmentation.segments.push_back(segment); cannam@24: segment.type = q[i]; cannam@24: segment.start = segment.end; cannam@24: } cannam@24: } cannam@24: segment.end = len * getHopsize(); cannam@24: segmentation.segments.push_back(segment); cannam@18: } cannam@18: