cannam@24: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
cannam@24: 
cannam@18: /*
cannam@24:  * ClusterMeltSegmenter.cpp
cannam@18:  *
cannam@24:  * Created by Mark Levy on 23/03/2006.
cannam@24:  * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
Chris@84: 
Chris@84:     This program is free software; you can redistribute it and/or
Chris@84:     modify it under the terms of the GNU General Public License as
Chris@84:     published by the Free Software Foundation; either version 2 of the
Chris@84:     License, or (at your option) any later version.  See the file
Chris@84:     COPYING included with this distribution for more information.
cannam@18:  */
cannam@18: 
cannam@18: #include <cfloat>
cannam@18: #include <cmath>
cannam@18: 
cannam@18: #include "ClusterMeltSegmenter.h"
cannam@18: #include "cluster_segmenter.h"
cannam@18: #include "segment.h"
cannam@18: 
cannam@20: #include "dsp/transforms/FFT.h"
cannam@24: #include "dsp/chromagram/ConstantQ.h"
cannam@24: #include "dsp/rateconversion/Decimator.h"
cannam@26: #include "dsp/mfcc/MFCC.h"
cannam@20: 
cannam@24: ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
cannam@24:     window(NULL),
cannam@64:     fft(NULL),
cannam@24:     constq(NULL),
cannam@26:     mfcc(NULL),
cannam@24:     featureType(params.featureType),
cannam@24:     hopSize(params.hopSize),
cannam@24:     windowSize(params.windowSize),
cannam@24:     fmin(params.fmin),
cannam@24:     fmax(params.fmax),
cannam@24:     nbins(params.nbins),
cannam@24:     ncomponents(params.ncomponents),	// NB currently not passed - no. of PCA components is set in cluser_segmenter.c
cannam@24:     nHMMStates(params.nHMMStates),
cannam@24:     nclusters(params.nclusters),
cannam@24:     histogramLength(params.histogramLength),
cannam@24:     neighbourhoodLimit(params.neighbourhoodLimit),
cannam@26:     decimator(NULL)
cannam@18: {
cannam@18: }
cannam@18: 
cannam@18: void ClusterMeltSegmenter::initialise(int fs)
cannam@18: {
cannam@24:     samplerate = fs;
cannam@24: 
cannam@26:     if (featureType == FEATURE_TYPE_CONSTQ ||
cannam@26:         featureType == FEATURE_TYPE_CHROMA) {
cannam@26:         
cannam@26:         // run internal processing at 11025 or thereabouts
cannam@24:         int internalRate = 11025;
cannam@24:         int decimationFactor = samplerate / internalRate;
cannam@24:         if (decimationFactor < 1) decimationFactor = 1;
cannam@24: 
cannam@24:         // must be a power of two
cannam@24:         while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@24: 
cannam@24:         if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@24:             decimationFactor = Decimator::getHighestSupportedFactor();
cannam@24:         }
cannam@24: 
cannam@24:         if (decimationFactor > 1) {
cannam@24:             decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@24:         }
cannam@24: 
cannam@24:         CQConfig config;
cannam@24:         config.FS = samplerate / decimationFactor;
cannam@24:         config.min = fmin;
cannam@24:         config.max = fmax;
cannam@24:         config.BPO = nbins;
cannam@24:         config.CQThresh = 0.0054;
cannam@24: 
cannam@24:         constq = new ConstantQ(config);
cannam@24:         constq->sparsekernel();
cannam@26:         
cannam@26:         ncoeff = constq->getK();
cannam@64: 
cannam@64:         fft = new FFTReal(constq->getfftlength());
cannam@26:         
cannam@26:     } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@24: 
cannam@27:         // run internal processing at 22050 or thereabouts
cannam@27:         int internalRate = 22050;
cannam@27:         int decimationFactor = samplerate / internalRate;
cannam@27:         if (decimationFactor < 1) decimationFactor = 1;
cannam@27: 
cannam@27:         // must be a power of two
cannam@27:         while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@27: 
cannam@27:         if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@27:             decimationFactor = Decimator::getHighestSupportedFactor();
cannam@27:         }
cannam@27: 
cannam@27:         if (decimationFactor > 1) {
cannam@27:             decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@27:         }
cannam@27: 
cannam@30:         MFCCConfig config(samplerate / decimationFactor);
cannam@27:         config.fftsize = 2048;
cannam@27:         config.nceps = 19;
cannam@27:         config.want_c0 = true;
cannam@26: 
cannam@26:         mfcc = new MFCC(config);
cannam@27:         ncoeff = config.nceps + 1;
cannam@24:     }
cannam@18: }
cannam@18: 
cannam@18: ClusterMeltSegmenter::~ClusterMeltSegmenter() 
cannam@18: {
cannam@24:     delete window;
cannam@24:     delete constq;
cannam@24:     delete decimator;
cannam@64:     delete fft;
cannam@20: }
cannam@20: 
cannam@20: int
cannam@20: ClusterMeltSegmenter::getWindowsize()
cannam@20: {
cannam@44:     return static_cast<int>(windowSize * samplerate + 0.001);
cannam@20: }
cannam@20: 
cannam@20: int
cannam@20: ClusterMeltSegmenter::getHopsize()
cannam@20: {
cannam@44:     return static_cast<int>(hopSize * samplerate + 0.001);
cannam@18: }
cannam@18: 
cannam@24: void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
cannam@18: {
cannam@26:     if (featureType == FEATURE_TYPE_CONSTQ ||
cannam@26:         featureType == FEATURE_TYPE_CHROMA) {
cannam@26:         extractFeaturesConstQ(samples, nsamples);
cannam@26:     } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@26:         extractFeaturesMFCC(samples, nsamples);
cannam@26:     }
cannam@26: }
cannam@26: 
cannam@26: void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
cannam@26: {
cannam@24:     if (!constq) {
cannam@26:         std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
cannam@26:                   << "No const-q: initialise not called?"
cannam@24:                   << std::endl;
cannam@24:         return;
cannam@24:     }
cannam@20: 
cannam@24:     if (nsamples < getWindowsize()) {
cannam@24:         std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@24:         return;
cannam@24:     }
cannam@24: 
cannam@24:     int fftsize = constq->getfftlength();
cannam@24: 
cannam@24:     if (!window || window->getSize() != fftsize) {
cannam@24:         delete window;
cannam@24:         window = new Window<double>(HammingWindow, fftsize);
cannam@24:     }
cannam@24: 
cannam@24:     vector<double> cq(ncoeff);
cannam@24: 
cannam@24:     for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
cannam@24:     
cannam@24:     const double *psource = samples;
cannam@24:     int pcount = nsamples;
cannam@24: 
cannam@24:     if (decimator) {
cannam@24:         pcount = nsamples / decimator->getFactor();
cannam@24:         double *decout = new double[pcount];
cannam@24:         decimator->process(samples, decout);
cannam@24:         psource = decout;
cannam@24:     }
cannam@24:     
cannam@24:     int origin = 0;
cannam@24:     
cannam@24: //    std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
cannam@24: 
cannam@24:     int frames = 0;
cannam@24: 
cannam@24:     double *frame = new double[fftsize];
cannam@24:     double *real = new double[fftsize];
cannam@24:     double *imag = new double[fftsize];
cannam@24:     double *cqre = new double[ncoeff];
cannam@24:     double *cqim = new double[ncoeff];
cannam@24: 
cannam@24:     while (origin <= pcount) {
cannam@24: 
cannam@24:         // always need at least one fft window per block, but after
cannam@24:         // that we want to avoid having any incomplete ones
cannam@24:         if (origin > 0 && origin + fftsize >= pcount) break;
cannam@24: 
cannam@24:         for (int i = 0; i < fftsize; ++i) {
cannam@24:             if (origin + i < pcount) {
cannam@24:                 frame[i] = psource[origin + i];
cannam@24:             } else {
cannam@24:                 frame[i] = 0.0;
cannam@24:             }
cannam@24:         }
cannam@24: 
cannam@24:         for (int i = 0; i < fftsize/2; ++i) {
cannam@24:             double value = frame[i];
cannam@24:             frame[i] = frame[i + fftsize/2];
cannam@24:             frame[i + fftsize/2] = value;
cannam@24:         }
cannam@24: 
cannam@24:         window->cut(frame);
cannam@24:         
cannam@64:         fft->process(false, frame, real, imag);
cannam@24:         
cannam@24:         constq->process(real, imag, cqre, cqim);
cannam@18: 	
cannam@24:         for (int i = 0; i < ncoeff; ++i) {
cannam@24:             cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
cannam@24:         }
cannam@24:         ++frames;
cannam@20: 
cannam@24:         origin += fftsize/2;
cannam@24:     }
cannam@20: 
cannam@24:     delete [] cqre;
cannam@24:     delete [] cqim;
cannam@24:     delete [] real;
cannam@24:     delete [] imag;
cannam@24:     delete [] frame;
cannam@20: 
cannam@24:     for (int i = 0; i < ncoeff; ++i) {
cannam@24:         cq[i] /= frames;
cannam@24:     }
cannam@20: 
cannam@24:     if (decimator) delete[] psource;
cannam@20: 
cannam@24:     features.push_back(cq);
cannam@18: }
cannam@18: 
cannam@26: void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
cannam@26: {
cannam@26:     if (!mfcc) {
cannam@26:         std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
cannam@26:                   << "No mfcc: initialise not called?"
cannam@26:                   << std::endl;
cannam@26:         return;
cannam@26:     }
cannam@26: 
cannam@26:     if (nsamples < getWindowsize()) {
cannam@26:         std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@26:         return;
cannam@26:     }
cannam@26: 
cannam@26:     int fftsize = mfcc->getfftlength();
cannam@26: 
cannam@26:     vector<double> cc(ncoeff);
cannam@26: 
cannam@26:     for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
cannam@26:     
cannam@26:     const double *psource = samples;
cannam@26:     int pcount = nsamples;
cannam@26: 
cannam@27:     if (decimator) {
cannam@27:         pcount = nsamples / decimator->getFactor();
cannam@27:         double *decout = new double[pcount];
cannam@27:         decimator->process(samples, decout);
cannam@27:         psource = decout;
cannam@27:     }
cannam@27: 
cannam@26:     int origin = 0;
cannam@26:     int frames = 0;
cannam@26: 
cannam@26:     double *frame = new double[fftsize];
cannam@26:     double *ccout = new double[ncoeff];
cannam@26: 
cannam@26:     while (origin <= pcount) {
cannam@26: 
cannam@26:         // always need at least one fft window per block, but after
cannam@26:         // that we want to avoid having any incomplete ones
cannam@26:         if (origin > 0 && origin + fftsize >= pcount) break;
cannam@26: 
cannam@26:         for (int i = 0; i < fftsize; ++i) {
cannam@26:             if (origin + i < pcount) {
cannam@26:                 frame[i] = psource[origin + i];
cannam@26:             } else {
cannam@26:                 frame[i] = 0.0;
cannam@26:             }
cannam@26:         }
cannam@26: 
cannam@30:         mfcc->process(frame, ccout);
cannam@26: 	
cannam@26:         for (int i = 0; i < ncoeff; ++i) {
cannam@26:             cc[i] += ccout[i];
cannam@26:         }
cannam@26:         ++frames;
cannam@26: 
cannam@26:         origin += fftsize/2;
cannam@26:     }
cannam@26: 
cannam@26:     delete [] ccout;
cannam@26:     delete [] frame;
cannam@26: 
cannam@26:     for (int i = 0; i < ncoeff; ++i) {
cannam@26:         cc[i] /= frames;
cannam@26:     }
cannam@26: 
cannam@27:     if (decimator) delete[] psource;
cannam@27: 
cannam@26:     features.push_back(cc);
cannam@26: }
cannam@26: 
cannam@18: void ClusterMeltSegmenter::segment(int m)
cannam@18: {
cannam@24:     nclusters = m;
cannam@24:     segment();
cannam@18: }
cannam@18: 
cannam@18: void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
cannam@18: {
cannam@24:     features = f;
cannam@24:     featureType = FEATURE_TYPE_UNKNOWN;
cannam@18: }
cannam@18: 
cannam@18: void ClusterMeltSegmenter::segment()
cannam@18: {
cannam@26:     delete constq;
cannam@26:     constq = 0;
cannam@26:     delete mfcc;
cannam@26:     mfcc = 0;
cannam@26:     delete decimator;
cannam@26:     decimator = 0;
cannam@58: 
cannam@58:     if (features.size() < histogramLength) return;
cannam@58: /*    
cannam@24:     std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
cannam@24:               << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
cannam@58: */
cannam@24:     // copy the features to a native array and use the existing C segmenter...
cannam@24:     double** arrFeatures = new double*[features.size()];	
cannam@24:     for (int i = 0; i < features.size(); i++)
cannam@24:     {
cannam@24:         if (featureType == FEATURE_TYPE_UNKNOWN) {
cannam@24:             arrFeatures[i] = new double[features[0].size()];
cannam@24:             for (int j = 0; j < features[0].size(); j++)
cannam@24:                 arrFeatures[i][j] = features[i][j];	
cannam@24:         } else {
cannam@24:             arrFeatures[i] = new double[ncoeff+1];	// allow space for the normalised envelope
cannam@24:             for (int j = 0; j < ncoeff; j++)
cannam@24:                 arrFeatures[i][j] = features[i][j];	
cannam@24:         }
cannam@24:     }
cannam@18: 	
cannam@24:     q = new int[features.size()];
cannam@18: 	
cannam@26:     if (featureType == FEATURE_TYPE_UNKNOWN ||
cannam@26:         featureType == FEATURE_TYPE_MFCC)
cannam@24:         cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, 
cannam@24:                         nclusters, neighbourhoodLimit);
cannam@24:     else
cannam@24:         constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, 
cannam@24:                        nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
cannam@18: 	
cannam@24:     // convert the cluster assignment sequence to a segmentation
cannam@24:     makeSegmentation(q, features.size());		
cannam@18: 	
cannam@24:     // de-allocate arrays
cannam@24:     delete [] q;
cannam@24:     for (int i = 0; i < features.size(); i++)
cannam@24:         delete [] arrFeatures[i];
cannam@24:     delete [] arrFeatures;
cannam@18: 	
cannam@24:     // clear the features
cannam@24:     clear();
cannam@18: }
cannam@18: 
cannam@18: void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
cannam@18: {
cannam@24:     segmentation.segments.clear();
cannam@24:     segmentation.nsegtypes = nclusters;
cannam@24:     segmentation.samplerate = samplerate;
cannam@18: 	
cannam@24:     Segment segment;
cannam@24:     segment.start = 0;
cannam@24:     segment.type = q[0];
cannam@18: 	
cannam@24:     for (int i = 1; i < len; i++)
cannam@24:     {
cannam@24:         if (q[i] != q[i-1])
cannam@24:         {
cannam@24:             segment.end = i * getHopsize();
cannam@24:             segmentation.segments.push_back(segment);
cannam@24:             segment.type = q[i];
cannam@24:             segment.start = segment.end;
cannam@24:         }
cannam@24:     }
cannam@24:     segment.end = len * getHopsize();
cannam@24:     segmentation.segments.push_back(segment);
cannam@18: }
cannam@18: