c@243: /* c@243: * ClusterMeltSegmenter.cpp c@243: * soundbite c@243: * c@243: * Created by Mark Levy on 23/03/2006. c@243: * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. c@243: * c@243: */ c@243: c@243: #include c@243: #include c@243: c@243: #include "ClusterMeltSegmenter.h" c@243: #include "cluster_segmenter.h" c@243: #include "segment.h" c@243: c@245: #include "dsp/transforms/FFT.h" c@245: c@243: ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : window(NULL), c@243: constq(NULL), c@243: featureType(params.featureType), c@245: hopSize(params.hopSize), c@243: windowSize(params.windowSize), c@243: fmin(params.fmin), c@243: fmax(params.fmax), c@243: nbins(params.nbins), c@243: ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c c@243: nHMMStates(params.nHMMStates), c@243: nclusters(params.nclusters), c@243: histogramLength(params.histogramLength), c@243: neighbourhoodLimit(params.neighbourhoodLimit) c@243: { c@243: } c@243: c@243: void ClusterMeltSegmenter::initialise(int fs) c@243: { c@243: samplerate = fs; c@243: if (featureType != FEATURE_TYPE_UNKNOWN) c@243: { c@245: //!!! ncoeff = static_cast(ceil(nbins * (log(fmax / static_cast(fmin))) / log(2.0))); c@245: CQConfig config; c@245: config.FS = samplerate; c@245: config.min = fmin; c@245: config.max = fmax; c@245: config.BPO = nbins; c@245: config.CQThresh = 0.0054; c@245: constq = new ConstantQ(config); c@245: //!!! constq = init_constQ(fmin, fmax, nbins, samplerate, ncoeff); c@245: ncoeff = constq->getK(); c@243: } c@243: } c@243: c@243: ClusterMeltSegmenter::~ClusterMeltSegmenter() c@243: { c@245: delete window; c@245: delete constq; c@245: //!!! if (constq) c@245: // close_constQ(constq); c@245: } c@245: c@245: int c@245: ClusterMeltSegmenter::getWindowsize() c@245: { c@245: if (featureType != FEATURE_TYPE_UNKNOWN) { c@245: std::cerr << "rate = " << samplerate << ", fft length = " << constq->getfftlength() << ", fmin = " << fmin << ", fmax = " << fmax << ", nbins = " << nbins << ", K = " << constq->getK() << ", Q = " << constq->getQ() << std::endl; c@245: return constq->getfftlength(); c@245: } else { c@245: return static_cast(windowSize * samplerate); c@245: } c@245: } c@245: c@245: int c@245: ClusterMeltSegmenter::getHopsize() c@245: { c@245: return static_cast(hopSize * samplerate); c@243: } c@243: c@243: void ClusterMeltSegmenter::extractFeatures(double* samples, int nsamples) c@243: { c@243: // create a new window if needed c@245: /*!!! c@243: if (!window || nsamples != windowLength) c@243: { c@243: if (window) c@243: delete [] window; c@245: // Window(HammingWindow, nsamples).cut c@245: //!!! window = hamming_p(nsamples); c@243: windowLength = nsamples; c@243: } c@245: */ c@245: if (!window || window->getSize() != nsamples) { c@245: delete window; c@245: window = new Window(HammingWindow, nsamples); c@245: } c@245: c@243: // copy the samples before windowing in case we need them for something else c@243: double* frame = new double[nsamples]; c@245: // for (int i = 0; i < nsamples; i++) c@245: // frame[i] = samples[i] * window[i]; c@245: window->cut(frame); c@243: c@245: std::cerr << "nsamples = " << nsamples << std::endl; c@245: c@245: double *real = new double[nsamples]; c@245: double *imag = new double[nsamples]; c@245: c@245: FFT::process(nsamples, false, frame, 0, real, imag); c@245: c@245: double *cqre = new double[ncoeff]; c@245: double *cqim = new double[ncoeff]; c@245: c@245: constq->process(real, imag, cqre, cqim); c@245: c@243: // extract const-Q c@245: //!!! do_constQ(constq, frame, nsamples); c@245: // int ncq = constq->ncoeff; c@245: c@245: delete [] frame; c@245: delete [] real; c@245: delete [] imag; c@243: c@245: //!!! if (ncq == ncoeff) // else feature extraction failed c@245: // { c@245: // vector cq(ncq); c@245: // for (int i = 0; i < ncq; i++) c@245: // cq[i] = constq->absconstQtransform[i]; c@245: vector cq(ncoeff); c@245: for (int i = 0; i < ncoeff; ++i) { c@245: cq[i] = sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]); c@245: } c@243: features.push_back(cq); c@245: // } c@245: c@245: delete[] cqre; c@245: delete[] cqim; c@243: } c@243: c@243: void ClusterMeltSegmenter::segment(int m) c@243: { c@243: nclusters = m; c@243: segment(); c@243: } c@243: c@243: void ClusterMeltSegmenter::setFeatures(const vector >& f) c@243: { c@243: features = f; c@243: featureType = FEATURE_TYPE_UNKNOWN; c@243: } c@243: c@243: void ClusterMeltSegmenter::segment() c@243: { c@243: if (constq) c@243: { c@245: //!!! close_constQ(constq); // finished extracting features c@245: delete constq; c@243: constq = NULL; c@243: } c@243: c@243: // for now copy the features to a native array and use the existing C segmenter... c@243: double** arrFeatures = new double*[features.size()]; c@243: for (int i = 0; i < features.size(); i++) c@243: { c@243: if (featureType == FEATURE_TYPE_UNKNOWN) c@243: arrFeatures[i] = new double[features[0].size()]; c@243: else c@243: arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope c@243: for (int j = 0; j < ncoeff; j++) c@243: arrFeatures[i][j] = features[i][j]; c@243: } c@243: c@243: q = new int[features.size()]; c@243: c@243: if (featureType == FEATURE_TYPE_UNKNOWN) c@243: cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, c@243: nclusters, neighbourhoodLimit); c@243: else c@243: constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, c@243: nHMMStates, histogramLength, nclusters, neighbourhoodLimit); c@243: c@243: // convert the cluster assignment sequence to a segmentation c@243: makeSegmentation(q, features.size()); c@243: c@243: // de-allocate arrays c@243: delete [] q; c@243: for (int i = 0; i < features.size(); i++) c@243: delete [] arrFeatures[i]; c@243: delete [] arrFeatures; c@243: c@243: // clear the features c@243: clear(); c@243: } c@243: c@243: void ClusterMeltSegmenter::makeSegmentation(int* q, int len) c@243: { c@243: segmentation.segments.clear(); c@243: segmentation.nsegtypes = nclusters; c@243: segmentation.samplerate = samplerate; c@243: c@243: Segment segment; c@243: segment.start = 0; c@243: segment.type = q[0]; c@243: c@243: for (int i = 1; i < len; i++) c@243: { c@243: if (q[i] != q[i-1]) c@243: { c@243: segment.end = i * getHopsize(); c@243: segmentation.segments.push_back(segment); c@243: segment.type = q[i]; c@243: segment.start = segment.end; c@243: } c@243: } c@243: segment.end = len * getHopsize(); c@243: segmentation.segments.push_back(segment); c@243: } c@243: c@243: /* c@243: void ClusterMeltSegmenter::mpeg7ConstQ() c@243: { c@243: // convert to dB scale c@243: for (int i = 0; i < features.size(); i++) c@243: for (int j = 0; j < ncoeff; j++) c@243: features[i][j] = 10.0 * log10(features[i][j] + DBL_EPSILON); c@243: c@243: // normalise features and add the norm at the end as an extra feature dimension c@243: double maxnorm = 0; // track the max of the norms c@243: for (int i = 0; i < features.size(); i++) c@243: { c@243: double norm = 0; c@243: for (int j = 0; j < ncoeff; j++) c@243: norm += features[i][j] * features[i][j]; c@243: norm = sqrt(norm); c@243: for (int j = 0; j < ncoeff; j++) c@243: features[i][j] /= norm; c@243: features[i].push_back(norm); c@243: if (norm > maxnorm) c@243: maxnorm = norm; c@243: } c@243: c@243: // normalise the norms c@243: for (int i = 0; i < features.size(); i++) c@243: features[i][ncoeff] /= maxnorm; c@243: } c@243: */