cannam@24: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ cannam@24: cannam@18: /* cannam@24: * ClusterMeltSegmenter.cpp cannam@18: * cannam@24: * Created by Mark Levy on 23/03/2006. cannam@24: * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. Chris@84: Chris@84: This program is free software; you can redistribute it and/or Chris@84: modify it under the terms of the GNU General Public License as Chris@84: published by the Free Software Foundation; either version 2 of the Chris@84: License, or (at your option) any later version. See the file Chris@84: COPYING included with this distribution for more information. cannam@18: */ cannam@18: cannam@18: #include cannam@18: #include cannam@18: cannam@18: #include "ClusterMeltSegmenter.h" cannam@18: #include "cluster_segmenter.h" cannam@18: #include "segment.h" cannam@18: cannam@20: #include "dsp/transforms/FFT.h" cannam@24: #include "dsp/chromagram/ConstantQ.h" cannam@24: #include "dsp/rateconversion/Decimator.h" cannam@26: #include "dsp/mfcc/MFCC.h" cannam@20: cannam@24: ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : cannam@24: window(NULL), cannam@64: fft(NULL), cannam@24: constq(NULL), cannam@26: mfcc(NULL), cannam@24: featureType(params.featureType), cannam@24: hopSize(params.hopSize), cannam@24: windowSize(params.windowSize), cannam@24: fmin(params.fmin), cannam@24: fmax(params.fmax), cannam@24: nbins(params.nbins), cannam@24: ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c cannam@24: nHMMStates(params.nHMMStates), cannam@24: nclusters(params.nclusters), cannam@24: histogramLength(params.histogramLength), cannam@24: neighbourhoodLimit(params.neighbourhoodLimit), cannam@26: decimator(NULL) cannam@18: { cannam@18: } cannam@18: cannam@18: void ClusterMeltSegmenter::initialise(int fs) cannam@18: { cannam@24: samplerate = fs; cannam@24: cannam@26: if (featureType == FEATURE_TYPE_CONSTQ || cannam@26: featureType == FEATURE_TYPE_CHROMA) { cannam@26: cannam@26: // run internal processing at 11025 or thereabouts cannam@24: int internalRate = 11025; cannam@24: int decimationFactor = samplerate / internalRate; cannam@24: if (decimationFactor < 1) decimationFactor = 1; cannam@24: cannam@24: // must be a power of two cannam@24: while (decimationFactor & (decimationFactor - 1)) ++decimationFactor; cannam@24: cannam@24: if (decimationFactor > Decimator::getHighestSupportedFactor()) { cannam@24: decimationFactor = Decimator::getHighestSupportedFactor(); cannam@24: } cannam@24: cannam@24: if (decimationFactor > 1) { cannam@24: decimator = new Decimator(getWindowsize(), decimationFactor); cannam@24: } cannam@24: cannam@24: CQConfig config; cannam@24: config.FS = samplerate / decimationFactor; cannam@24: config.min = fmin; cannam@24: config.max = fmax; cannam@24: config.BPO = nbins; cannam@24: config.CQThresh = 0.0054; cannam@24: cannam@24: constq = new ConstantQ(config); cannam@24: constq->sparsekernel(); cannam@26: cannam@26: ncoeff = constq->getK(); cannam@64: cannam@64: fft = new FFTReal(constq->getfftlength()); cannam@26: cannam@26: } else if (featureType == FEATURE_TYPE_MFCC) { cannam@24: cannam@27: // run internal processing at 22050 or thereabouts cannam@27: int internalRate = 22050; cannam@27: int decimationFactor = samplerate / internalRate; cannam@27: if (decimationFactor < 1) decimationFactor = 1; cannam@27: cannam@27: // must be a power of two cannam@27: while (decimationFactor & (decimationFactor - 1)) ++decimationFactor; cannam@27: cannam@27: if (decimationFactor > Decimator::getHighestSupportedFactor()) { cannam@27: decimationFactor = Decimator::getHighestSupportedFactor(); cannam@27: } cannam@27: cannam@27: if (decimationFactor > 1) { cannam@27: decimator = new Decimator(getWindowsize(), decimationFactor); cannam@27: } cannam@27: cannam@30: MFCCConfig config(samplerate / decimationFactor); cannam@27: config.fftsize = 2048; cannam@27: config.nceps = 19; cannam@27: config.want_c0 = true; cannam@26: cannam@26: mfcc = new MFCC(config); cannam@27: ncoeff = config.nceps + 1; cannam@24: } cannam@18: } cannam@18: cannam@18: ClusterMeltSegmenter::~ClusterMeltSegmenter() cannam@18: { cannam@24: delete window; cannam@24: delete constq; cannam@24: delete decimator; cannam@64: delete fft; cannam@20: } cannam@20: cannam@20: int cannam@20: ClusterMeltSegmenter::getWindowsize() cannam@20: { cannam@44: return static_cast(windowSize * samplerate + 0.001); cannam@20: } cannam@20: cannam@20: int cannam@20: ClusterMeltSegmenter::getHopsize() cannam@20: { cannam@44: return static_cast(hopSize * samplerate + 0.001); cannam@18: } cannam@18: cannam@24: void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples) cannam@18: { cannam@26: if (featureType == FEATURE_TYPE_CONSTQ || cannam@26: featureType == FEATURE_TYPE_CHROMA) { cannam@26: extractFeaturesConstQ(samples, nsamples); cannam@26: } else if (featureType == FEATURE_TYPE_MFCC) { cannam@26: extractFeaturesMFCC(samples, nsamples); cannam@26: } cannam@26: } cannam@26: cannam@26: void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples) cannam@26: { cannam@24: if (!constq) { cannam@26: std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: " cannam@26: << "No const-q: initialise not called?" cannam@24: << std::endl; cannam@24: return; cannam@24: } cannam@20: cannam@24: if (nsamples < getWindowsize()) { cannam@24: std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; cannam@24: return; cannam@24: } cannam@24: cannam@24: int fftsize = constq->getfftlength(); cannam@24: cannam@24: if (!window || window->getSize() != fftsize) { cannam@24: delete window; cannam@24: window = new Window(HammingWindow, fftsize); cannam@24: } cannam@24: cannam@24: vector cq(ncoeff); cannam@24: cannam@24: for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0; cannam@24: cannam@24: const double *psource = samples; cannam@24: int pcount = nsamples; cannam@24: cannam@24: if (decimator) { cannam@24: pcount = nsamples / decimator->getFactor(); cannam@24: double *decout = new double[pcount]; cannam@24: decimator->process(samples, decout); cannam@24: psource = decout; cannam@24: } cannam@24: cannam@24: int origin = 0; cannam@24: cannam@24: // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl; cannam@24: cannam@24: int frames = 0; cannam@24: cannam@24: double *frame = new double[fftsize]; cannam@24: double *real = new double[fftsize]; cannam@24: double *imag = new double[fftsize]; cannam@24: double *cqre = new double[ncoeff]; cannam@24: double *cqim = new double[ncoeff]; cannam@24: cannam@24: while (origin <= pcount) { cannam@24: cannam@24: // always need at least one fft window per block, but after cannam@24: // that we want to avoid having any incomplete ones cannam@24: if (origin > 0 && origin + fftsize >= pcount) break; cannam@24: cannam@24: for (int i = 0; i < fftsize; ++i) { cannam@24: if (origin + i < pcount) { cannam@24: frame[i] = psource[origin + i]; cannam@24: } else { cannam@24: frame[i] = 0.0; cannam@24: } cannam@24: } cannam@24: cannam@24: for (int i = 0; i < fftsize/2; ++i) { cannam@24: double value = frame[i]; cannam@24: frame[i] = frame[i + fftsize/2]; cannam@24: frame[i + fftsize/2] = value; cannam@24: } cannam@24: cannam@24: window->cut(frame); cannam@24: cannam@64: fft->process(false, frame, real, imag); cannam@24: cannam@24: constq->process(real, imag, cqre, cqim); cannam@18: cannam@24: for (int i = 0; i < ncoeff; ++i) { cannam@24: cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]); cannam@24: } cannam@24: ++frames; cannam@20: cannam@24: origin += fftsize/2; cannam@24: } cannam@20: cannam@24: delete [] cqre; cannam@24: delete [] cqim; cannam@24: delete [] real; cannam@24: delete [] imag; cannam@24: delete [] frame; cannam@20: cannam@24: for (int i = 0; i < ncoeff; ++i) { cannam@24: cq[i] /= frames; cannam@24: } cannam@20: cannam@24: if (decimator) delete[] psource; cannam@20: cannam@24: features.push_back(cq); cannam@18: } cannam@18: cannam@26: void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples) cannam@26: { cannam@26: if (!mfcc) { cannam@26: std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: " cannam@26: << "No mfcc: initialise not called?" cannam@26: << std::endl; cannam@26: return; cannam@26: } cannam@26: cannam@26: if (nsamples < getWindowsize()) { cannam@26: std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; cannam@26: return; cannam@26: } cannam@26: cannam@26: int fftsize = mfcc->getfftlength(); cannam@26: cannam@26: vector cc(ncoeff); cannam@26: cannam@26: for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0; cannam@26: cannam@26: const double *psource = samples; cannam@26: int pcount = nsamples; cannam@26: cannam@27: if (decimator) { cannam@27: pcount = nsamples / decimator->getFactor(); cannam@27: double *decout = new double[pcount]; cannam@27: decimator->process(samples, decout); cannam@27: psource = decout; cannam@27: } cannam@27: cannam@26: int origin = 0; cannam@26: int frames = 0; cannam@26: cannam@26: double *frame = new double[fftsize]; cannam@26: double *ccout = new double[ncoeff]; cannam@26: cannam@26: while (origin <= pcount) { cannam@26: cannam@26: // always need at least one fft window per block, but after cannam@26: // that we want to avoid having any incomplete ones cannam@26: if (origin > 0 && origin + fftsize >= pcount) break; cannam@26: cannam@26: for (int i = 0; i < fftsize; ++i) { cannam@26: if (origin + i < pcount) { cannam@26: frame[i] = psource[origin + i]; cannam@26: } else { cannam@26: frame[i] = 0.0; cannam@26: } cannam@26: } cannam@26: cannam@30: mfcc->process(frame, ccout); cannam@26: cannam@26: for (int i = 0; i < ncoeff; ++i) { cannam@26: cc[i] += ccout[i]; cannam@26: } cannam@26: ++frames; cannam@26: cannam@26: origin += fftsize/2; cannam@26: } cannam@26: cannam@26: delete [] ccout; cannam@26: delete [] frame; cannam@26: cannam@26: for (int i = 0; i < ncoeff; ++i) { cannam@26: cc[i] /= frames; cannam@26: } cannam@26: cannam@27: if (decimator) delete[] psource; cannam@27: cannam@26: features.push_back(cc); cannam@26: } cannam@26: cannam@18: void ClusterMeltSegmenter::segment(int m) cannam@18: { cannam@24: nclusters = m; cannam@24: segment(); cannam@18: } cannam@18: cannam@18: void ClusterMeltSegmenter::setFeatures(const vector >& f) cannam@18: { cannam@24: features = f; cannam@24: featureType = FEATURE_TYPE_UNKNOWN; cannam@18: } cannam@18: cannam@18: void ClusterMeltSegmenter::segment() cannam@18: { cannam@26: delete constq; cannam@26: constq = 0; cannam@26: delete mfcc; cannam@26: mfcc = 0; cannam@26: delete decimator; cannam@26: decimator = 0; cannam@58: cannam@58: if (features.size() < histogramLength) return; cannam@58: /* cannam@24: std::cerr << "ClusterMeltSegmenter::segment: have " << features.size() cannam@24: << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl; cannam@58: */ cannam@24: // copy the features to a native array and use the existing C segmenter... cannam@24: double** arrFeatures = new double*[features.size()]; cannam@24: for (int i = 0; i < features.size(); i++) cannam@24: { cannam@24: if (featureType == FEATURE_TYPE_UNKNOWN) { cannam@24: arrFeatures[i] = new double[features[0].size()]; cannam@24: for (int j = 0; j < features[0].size(); j++) cannam@24: arrFeatures[i][j] = features[i][j]; cannam@24: } else { cannam@24: arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope cannam@24: for (int j = 0; j < ncoeff; j++) cannam@24: arrFeatures[i][j] = features[i][j]; cannam@24: } cannam@24: } cannam@18: cannam@24: q = new int[features.size()]; cannam@18: cannam@26: if (featureType == FEATURE_TYPE_UNKNOWN || cannam@26: featureType == FEATURE_TYPE_MFCC) cannam@24: cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, cannam@24: nclusters, neighbourhoodLimit); cannam@24: else cannam@24: constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, cannam@24: nHMMStates, histogramLength, nclusters, neighbourhoodLimit); cannam@18: cannam@24: // convert the cluster assignment sequence to a segmentation cannam@24: makeSegmentation(q, features.size()); cannam@18: cannam@24: // de-allocate arrays cannam@24: delete [] q; cannam@24: for (int i = 0; i < features.size(); i++) cannam@24: delete [] arrFeatures[i]; cannam@24: delete [] arrFeatures; cannam@18: cannam@24: // clear the features cannam@24: clear(); cannam@18: } cannam@18: cannam@18: void ClusterMeltSegmenter::makeSegmentation(int* q, int len) cannam@18: { cannam@24: segmentation.segments.clear(); cannam@24: segmentation.nsegtypes = nclusters; cannam@24: segmentation.samplerate = samplerate; cannam@18: cannam@24: Segment segment; cannam@24: segment.start = 0; cannam@24: segment.type = q[0]; cannam@18: cannam@24: for (int i = 1; i < len; i++) cannam@24: { cannam@24: if (q[i] != q[i-1]) cannam@24: { cannam@24: segment.end = i * getHopsize(); cannam@24: segmentation.segments.push_back(segment); cannam@24: segment.type = q[i]; cannam@24: segment.start = segment.end; cannam@24: } cannam@24: } cannam@24: segment.end = len * getHopsize(); cannam@24: segmentation.segments.push_back(segment); cannam@18: } cannam@18: