qm-dsp: dsp/segmentation/ClusterMeltSegmenter.cpp annotate

annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 26:d096a79fa772

* Add timbral (MFCC) feature option to segmenter

author	cannam
date	Thu, 10 Jan 2008 16:41:33 +0000
parents	2b74bd60c61f
children	b678e72323df

rev	line source
cannam@24	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
cannam@24	2
cannam@18	3 /*
cannam@24	4 * ClusterMeltSegmenter.cpp
cannam@18	5 *
cannam@24	6 * Created by Mark Levy on 23/03/2006.
cannam@24	7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
cannam@24	8 * All rights reserved.
cannam@18	9 */
cannam@18	10
cannam@18	11 #include <cfloat>
cannam@18	12 #include <cmath>
cannam@18	13
cannam@18	14 #include "ClusterMeltSegmenter.h"
cannam@18	15 #include "cluster_segmenter.h"
cannam@18	16 #include "segment.h"
cannam@18	17
cannam@20	18 #include "dsp/transforms/FFT.h"
cannam@24	19 #include "dsp/chromagram/ConstantQ.h"
cannam@24	20 #include "dsp/rateconversion/Decimator.h"
cannam@26	21 #include "dsp/mfcc/MFCC.h"
cannam@20	22
cannam@24	23 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
cannam@24	24 window(NULL),
cannam@24	25 constq(NULL),
cannam@26	26 mfcc(NULL),
cannam@24	27 featureType(params.featureType),
cannam@24	28 hopSize(params.hopSize),
cannam@24	29 windowSize(params.windowSize),
cannam@24	30 fmin(params.fmin),
cannam@24	31 fmax(params.fmax),
cannam@24	32 nbins(params.nbins),
cannam@24	33 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
cannam@24	34 nHMMStates(params.nHMMStates),
cannam@24	35 nclusters(params.nclusters),
cannam@24	36 histogramLength(params.histogramLength),
cannam@24	37 neighbourhoodLimit(params.neighbourhoodLimit),
cannam@26	38 decimator(NULL)
cannam@18	39 {
cannam@18	40 }
cannam@18	41
cannam@18	42 void ClusterMeltSegmenter::initialise(int fs)
cannam@18	43 {
cannam@24	44 samplerate = fs;
cannam@24	45
cannam@26	46 if (featureType == FEATURE_TYPE_CONSTQ \|\|
cannam@26	47 featureType == FEATURE_TYPE_CHROMA) {
cannam@26	48
cannam@26	49 // run internal processing at 11025 or thereabouts
cannam@24	50 int internalRate = 11025;
cannam@24	51 int decimationFactor = samplerate / internalRate;
cannam@24	52 if (decimationFactor < 1) decimationFactor = 1;
cannam@24	53
cannam@24	54 // must be a power of two
cannam@24	55 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@24	56
cannam@24	57 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@24	58 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@24	59 }
cannam@24	60
cannam@24	61 if (decimationFactor > 1) {
cannam@24	62 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@24	63 }
cannam@24	64
cannam@24	65 CQConfig config;
cannam@24	66 config.FS = samplerate / decimationFactor;
cannam@24	67 config.min = fmin;
cannam@24	68 config.max = fmax;
cannam@24	69 config.BPO = nbins;
cannam@24	70 config.CQThresh = 0.0054;
cannam@24	71
cannam@24	72 constq = new ConstantQ(config);
cannam@24	73 constq->sparsekernel();
cannam@26	74
cannam@26	75 ncoeff = constq->getK();
cannam@26	76
cannam@26	77 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@24	78
cannam@26	79 MFCCConfig config;
cannam@26	80 config.FS = samplerate;
cannam@26	81 config.fftsize = 1024;
cannam@26	82 config.nceps = 20;
cannam@26	83 config.want_c0 = false;
cannam@26	84
cannam@26	85 mfcc = new MFCC(config);
cannam@26	86 ncoeff = config.nceps;
cannam@24	87 }
cannam@18	88 }
cannam@18	89
cannam@18	90 ClusterMeltSegmenter::~ClusterMeltSegmenter()
cannam@18	91 {
cannam@24	92 delete window;
cannam@24	93 delete constq;
cannam@24	94 delete decimator;
cannam@20	95 }
cannam@20	96
cannam@20	97 int
cannam@20	98 ClusterMeltSegmenter::getWindowsize()
cannam@20	99 {
cannam@24	100 return static_cast<int>(windowSize * samplerate);
cannam@20	101 }
cannam@20	102
cannam@20	103 int
cannam@20	104 ClusterMeltSegmenter::getHopsize()
cannam@20	105 {
cannam@24	106 return static_cast<int>(hopSize * samplerate);
cannam@18	107 }
cannam@18	108
cannam@24	109 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
cannam@18	110 {
cannam@26	111 if (featureType == FEATURE_TYPE_CONSTQ \|\|
cannam@26	112 featureType == FEATURE_TYPE_CHROMA) {
cannam@26	113 extractFeaturesConstQ(samples, nsamples);
cannam@26	114 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@26	115 extractFeaturesMFCC(samples, nsamples);
cannam@26	116 }
cannam@26	117 }
cannam@26	118
cannam@26	119 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
cannam@26	120 {
cannam@24	121 if (!constq) {
cannam@26	122 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
cannam@26	123 << "No const-q: initialise not called?"
cannam@24	124 << std::endl;
cannam@24	125 return;
cannam@24	126 }
cannam@20	127
cannam@24	128 if (nsamples < getWindowsize()) {
cannam@24	129 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@24	130 return;
cannam@24	131 }
cannam@24	132
cannam@24	133 int fftsize = constq->getfftlength();
cannam@24	134
cannam@24	135 if (!window \|\| window->getSize() != fftsize) {
cannam@24	136 delete window;
cannam@24	137 window = new Window<double>(HammingWindow, fftsize);
cannam@24	138 }
cannam@24	139
cannam@24	140 vector<double> cq(ncoeff);
cannam@24	141
cannam@24	142 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
cannam@24	143
cannam@24	144 const double *psource = samples;
cannam@24	145 int pcount = nsamples;
cannam@24	146
cannam@24	147 if (decimator) {
cannam@24	148 pcount = nsamples / decimator->getFactor();
cannam@24	149 double *decout = new double[pcount];
cannam@24	150 decimator->process(samples, decout);
cannam@24	151 psource = decout;
cannam@24	152 }
cannam@24	153
cannam@24	154 int origin = 0;
cannam@24	155
cannam@24	156 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
cannam@24	157
cannam@24	158 int frames = 0;
cannam@24	159
cannam@24	160 double *frame = new double[fftsize];
cannam@24	161 double *real = new double[fftsize];
cannam@24	162 double *imag = new double[fftsize];
cannam@24	163 double *cqre = new double[ncoeff];
cannam@24	164 double *cqim = new double[ncoeff];
cannam@24	165
cannam@24	166 while (origin <= pcount) {
cannam@24	167
cannam@24	168 // always need at least one fft window per block, but after
cannam@24	169 // that we want to avoid having any incomplete ones
cannam@24	170 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@24	171
cannam@24	172 for (int i = 0; i < fftsize; ++i) {
cannam@24	173 if (origin + i < pcount) {
cannam@24	174 frame[i] = psource[origin + i];
cannam@24	175 } else {
cannam@24	176 frame[i] = 0.0;
cannam@24	177 }
cannam@24	178 }
cannam@24	179
cannam@24	180 for (int i = 0; i < fftsize/2; ++i) {
cannam@24	181 double value = frame[i];
cannam@24	182 frame[i] = frame[i + fftsize/2];
cannam@24	183 frame[i + fftsize/2] = value;
cannam@24	184 }
cannam@24	185
cannam@24	186 window->cut(frame);
cannam@24	187
cannam@24	188 FFT::process(fftsize, false, frame, 0, real, imag);
cannam@24	189
cannam@24	190 constq->process(real, imag, cqre, cqim);
cannam@18	191
cannam@24	192 for (int i = 0; i < ncoeff; ++i) {
cannam@24	193 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
cannam@24	194 }
cannam@24	195 ++frames;
cannam@20	196
cannam@24	197 origin += fftsize/2;
cannam@24	198 }
cannam@20	199
cannam@24	200 delete [] cqre;
cannam@24	201 delete [] cqim;
cannam@24	202 delete [] real;
cannam@24	203 delete [] imag;
cannam@24	204 delete [] frame;
cannam@20	205
cannam@24	206 for (int i = 0; i < ncoeff; ++i) {
cannam@24	207 cq[i] /= frames;
cannam@24	208 }
cannam@20	209
cannam@24	210 if (decimator) delete[] psource;
cannam@20	211
cannam@24	212 features.push_back(cq);
cannam@18	213 }
cannam@18	214
cannam@26	215 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
cannam@26	216 {
cannam@26	217 if (!mfcc) {
cannam@26	218 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
cannam@26	219 << "No mfcc: initialise not called?"
cannam@26	220 << std::endl;
cannam@26	221 return;
cannam@26	222 }
cannam@26	223
cannam@26	224 if (nsamples < getWindowsize()) {
cannam@26	225 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@26	226 return;
cannam@26	227 }
cannam@26	228
cannam@26	229 int fftsize = mfcc->getfftlength();
cannam@26	230
cannam@26	231 vector<double> cc(ncoeff);
cannam@26	232
cannam@26	233 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
cannam@26	234
cannam@26	235 const double *psource = samples;
cannam@26	236 int pcount = nsamples;
cannam@26	237
cannam@26	238 int origin = 0;
cannam@26	239 int frames = 0;
cannam@26	240
cannam@26	241 double *frame = new double[fftsize];
cannam@26	242 double *ccout = new double[ncoeff];
cannam@26	243
cannam@26	244 while (origin <= pcount) {
cannam@26	245
cannam@26	246 // always need at least one fft window per block, but after
cannam@26	247 // that we want to avoid having any incomplete ones
cannam@26	248 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@26	249
cannam@26	250 for (int i = 0; i < fftsize; ++i) {
cannam@26	251 if (origin + i < pcount) {
cannam@26	252 frame[i] = psource[origin + i];
cannam@26	253 } else {
cannam@26	254 frame[i] = 0.0;
cannam@26	255 }
cannam@26	256 }
cannam@26	257
cannam@26	258 mfcc->process(fftsize, frame, ccout);
cannam@26	259
cannam@26	260 for (int i = 0; i < ncoeff; ++i) {
cannam@26	261 cc[i] += ccout[i];
cannam@26	262 }
cannam@26	263 ++frames;
cannam@26	264
cannam@26	265 origin += fftsize/2;
cannam@26	266 }
cannam@26	267
cannam@26	268 delete [] ccout;
cannam@26	269 delete [] frame;
cannam@26	270
cannam@26	271 for (int i = 0; i < ncoeff; ++i) {
cannam@26	272 cc[i] /= frames;
cannam@26	273 }
cannam@26	274
cannam@26	275 features.push_back(cc);
cannam@26	276 }
cannam@26	277
cannam@18	278 void ClusterMeltSegmenter::segment(int m)
cannam@18	279 {
cannam@24	280 nclusters = m;
cannam@24	281 segment();
cannam@18	282 }
cannam@18	283
cannam@18	284 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
cannam@18	285 {
cannam@24	286 features = f;
cannam@24	287 featureType = FEATURE_TYPE_UNKNOWN;
cannam@18	288 }
cannam@18	289
cannam@18	290 void ClusterMeltSegmenter::segment()
cannam@18	291 {
cannam@26	292 delete constq;
cannam@26	293 constq = 0;
cannam@26	294 delete mfcc;
cannam@26	295 mfcc = 0;
cannam@26	296 delete decimator;
cannam@26	297 decimator = 0;
cannam@18	298
cannam@24	299 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
cannam@24	300 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
cannam@24	301
cannam@24	302 // copy the features to a native array and use the existing C segmenter...
cannam@24	303 double** arrFeatures = new double*[features.size()];
cannam@24	304 for (int i = 0; i < features.size(); i++)
cannam@24	305 {
cannam@24	306 if (featureType == FEATURE_TYPE_UNKNOWN) {
cannam@24	307 arrFeatures[i] = new double[features[0].size()];
cannam@24	308 for (int j = 0; j < features[0].size(); j++)
cannam@24	309 arrFeatures[i][j] = features[i][j];
cannam@24	310 } else {
cannam@24	311 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
cannam@24	312 for (int j = 0; j < ncoeff; j++)
cannam@24	313 arrFeatures[i][j] = features[i][j];
cannam@24	314 }
cannam@24	315 }
cannam@18	316
cannam@24	317 q = new int[features.size()];
cannam@18	318
cannam@26	319 if (featureType == FEATURE_TYPE_UNKNOWN \|\|
cannam@26	320 featureType == FEATURE_TYPE_MFCC)
cannam@24	321 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
cannam@24	322 nclusters, neighbourhoodLimit);
cannam@24	323 else
cannam@24	324 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
cannam@24	325 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
cannam@18	326
cannam@24	327 // convert the cluster assignment sequence to a segmentation
cannam@24	328 makeSegmentation(q, features.size());
cannam@18	329
cannam@24	330 // de-allocate arrays
cannam@24	331 delete [] q;
cannam@24	332 for (int i = 0; i < features.size(); i++)
cannam@24	333 delete [] arrFeatures[i];
cannam@24	334 delete [] arrFeatures;
cannam@18	335
cannam@24	336 // clear the features
cannam@24	337 clear();
cannam@18	338 }
cannam@18	339
cannam@18	340 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
cannam@18	341 {
cannam@24	342 segmentation.segments.clear();
cannam@24	343 segmentation.nsegtypes = nclusters;
cannam@24	344 segmentation.samplerate = samplerate;
cannam@18	345
cannam@24	346 Segment segment;
cannam@24	347 segment.start = 0;
cannam@24	348 segment.type = q[0];
cannam@18	349
cannam@24	350 for (int i = 1; i < len; i++)
cannam@24	351 {
cannam@24	352 if (q[i] != q[i-1])
cannam@24	353 {
cannam@24	354 segment.end = i * getHopsize();
cannam@24	355 segmentation.segments.push_back(segment);
cannam@24	356 segment.type = q[i];
cannam@24	357 segment.start = segment.end;
cannam@24	358 }
cannam@24	359 }
cannam@24	360 segment.end = len * getHopsize();
cannam@24	361 segmentation.segments.push_back(segment);
cannam@18	362 }
cannam@18	363

Mercurial > hg > qm-dsp

annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 26:d096a79fa772