qm-dsp: dsp/segmentation/ClusterMeltSegmenter.cpp annotate

annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 64:6cb2b3cd5356

* Refactor FFT a little bit so as to separate construction and processing rather than have a single static method -- will make it easier to use a different implementation * pull in KissFFT implementation (not hooked up yet)

author	cannam
date	Wed, 13 May 2009 09:19:12 +0000
parents	d72fcd34d9a7
children	e5907ae6de17

rev	line source
cannam@24	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
cannam@24	2
cannam@18	3 /*
cannam@24	4 * ClusterMeltSegmenter.cpp
cannam@18	5 *
cannam@24	6 * Created by Mark Levy on 23/03/2006.
cannam@24	7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
cannam@24	8 * All rights reserved.
cannam@18	9 */
cannam@18	10
cannam@18	11 #include <cfloat>
cannam@18	12 #include <cmath>
cannam@18	13
cannam@18	14 #include "ClusterMeltSegmenter.h"
cannam@18	15 #include "cluster_segmenter.h"
cannam@18	16 #include "segment.h"
cannam@18	17
cannam@20	18 #include "dsp/transforms/FFT.h"
cannam@24	19 #include "dsp/chromagram/ConstantQ.h"
cannam@24	20 #include "dsp/rateconversion/Decimator.h"
cannam@26	21 #include "dsp/mfcc/MFCC.h"
cannam@20	22
cannam@24	23 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
cannam@24	24 window(NULL),
cannam@64	25 fft(NULL),
cannam@24	26 constq(NULL),
cannam@26	27 mfcc(NULL),
cannam@24	28 featureType(params.featureType),
cannam@24	29 hopSize(params.hopSize),
cannam@24	30 windowSize(params.windowSize),
cannam@24	31 fmin(params.fmin),
cannam@24	32 fmax(params.fmax),
cannam@24	33 nbins(params.nbins),
cannam@24	34 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
cannam@24	35 nHMMStates(params.nHMMStates),
cannam@24	36 nclusters(params.nclusters),
cannam@24	37 histogramLength(params.histogramLength),
cannam@24	38 neighbourhoodLimit(params.neighbourhoodLimit),
cannam@26	39 decimator(NULL)
cannam@18	40 {
cannam@18	41 }
cannam@18	42
cannam@18	43 void ClusterMeltSegmenter::initialise(int fs)
cannam@18	44 {
cannam@24	45 samplerate = fs;
cannam@24	46
cannam@26	47 if (featureType == FEATURE_TYPE_CONSTQ \|\|
cannam@26	48 featureType == FEATURE_TYPE_CHROMA) {
cannam@26	49
cannam@26	50 // run internal processing at 11025 or thereabouts
cannam@24	51 int internalRate = 11025;
cannam@24	52 int decimationFactor = samplerate / internalRate;
cannam@24	53 if (decimationFactor < 1) decimationFactor = 1;
cannam@24	54
cannam@24	55 // must be a power of two
cannam@24	56 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@24	57
cannam@24	58 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@24	59 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@24	60 }
cannam@24	61
cannam@24	62 if (decimationFactor > 1) {
cannam@24	63 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@24	64 }
cannam@24	65
cannam@24	66 CQConfig config;
cannam@24	67 config.FS = samplerate / decimationFactor;
cannam@24	68 config.min = fmin;
cannam@24	69 config.max = fmax;
cannam@24	70 config.BPO = nbins;
cannam@24	71 config.CQThresh = 0.0054;
cannam@24	72
cannam@24	73 constq = new ConstantQ(config);
cannam@24	74 constq->sparsekernel();
cannam@26	75
cannam@26	76 ncoeff = constq->getK();
cannam@64	77
cannam@64	78 fft = new FFTReal(constq->getfftlength());
cannam@26	79
cannam@26	80 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@24	81
cannam@27	82 // run internal processing at 22050 or thereabouts
cannam@27	83 int internalRate = 22050;
cannam@27	84 int decimationFactor = samplerate / internalRate;
cannam@27	85 if (decimationFactor < 1) decimationFactor = 1;
cannam@27	86
cannam@27	87 // must be a power of two
cannam@27	88 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@27	89
cannam@27	90 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@27	91 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@27	92 }
cannam@27	93
cannam@27	94 if (decimationFactor > 1) {
cannam@27	95 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@27	96 }
cannam@27	97
cannam@30	98 MFCCConfig config(samplerate / decimationFactor);
cannam@27	99 config.fftsize = 2048;
cannam@27	100 config.nceps = 19;
cannam@27	101 config.want_c0 = true;
cannam@26	102
cannam@26	103 mfcc = new MFCC(config);
cannam@27	104 ncoeff = config.nceps + 1;
cannam@24	105 }
cannam@18	106 }
cannam@18	107
cannam@18	108 ClusterMeltSegmenter::~ClusterMeltSegmenter()
cannam@18	109 {
cannam@24	110 delete window;
cannam@24	111 delete constq;
cannam@24	112 delete decimator;
cannam@64	113 delete fft;
cannam@20	114 }
cannam@20	115
cannam@20	116 int
cannam@20	117 ClusterMeltSegmenter::getWindowsize()
cannam@20	118 {
cannam@44	119 return static_cast<int>(windowSize * samplerate + 0.001);
cannam@20	120 }
cannam@20	121
cannam@20	122 int
cannam@20	123 ClusterMeltSegmenter::getHopsize()
cannam@20	124 {
cannam@44	125 return static_cast<int>(hopSize * samplerate + 0.001);
cannam@18	126 }
cannam@18	127
cannam@24	128 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
cannam@18	129 {
cannam@26	130 if (featureType == FEATURE_TYPE_CONSTQ \|\|
cannam@26	131 featureType == FEATURE_TYPE_CHROMA) {
cannam@26	132 extractFeaturesConstQ(samples, nsamples);
cannam@26	133 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@26	134 extractFeaturesMFCC(samples, nsamples);
cannam@26	135 }
cannam@26	136 }
cannam@26	137
cannam@26	138 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
cannam@26	139 {
cannam@24	140 if (!constq) {
cannam@26	141 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
cannam@26	142 << "No const-q: initialise not called?"
cannam@24	143 << std::endl;
cannam@24	144 return;
cannam@24	145 }
cannam@20	146
cannam@24	147 if (nsamples < getWindowsize()) {
cannam@24	148 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@24	149 return;
cannam@24	150 }
cannam@24	151
cannam@24	152 int fftsize = constq->getfftlength();
cannam@24	153
cannam@24	154 if (!window \|\| window->getSize() != fftsize) {
cannam@24	155 delete window;
cannam@24	156 window = new Window<double>(HammingWindow, fftsize);
cannam@24	157 }
cannam@24	158
cannam@24	159 vector<double> cq(ncoeff);
cannam@24	160
cannam@24	161 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
cannam@24	162
cannam@24	163 const double *psource = samples;
cannam@24	164 int pcount = nsamples;
cannam@24	165
cannam@24	166 if (decimator) {
cannam@24	167 pcount = nsamples / decimator->getFactor();
cannam@24	168 double *decout = new double[pcount];
cannam@24	169 decimator->process(samples, decout);
cannam@24	170 psource = decout;
cannam@24	171 }
cannam@24	172
cannam@24	173 int origin = 0;
cannam@24	174
cannam@24	175 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
cannam@24	176
cannam@24	177 int frames = 0;
cannam@24	178
cannam@24	179 double *frame = new double[fftsize];
cannam@24	180 double *real = new double[fftsize];
cannam@24	181 double *imag = new double[fftsize];
cannam@24	182 double *cqre = new double[ncoeff];
cannam@24	183 double *cqim = new double[ncoeff];
cannam@24	184
cannam@24	185 while (origin <= pcount) {
cannam@24	186
cannam@24	187 // always need at least one fft window per block, but after
cannam@24	188 // that we want to avoid having any incomplete ones
cannam@24	189 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@24	190
cannam@24	191 for (int i = 0; i < fftsize; ++i) {
cannam@24	192 if (origin + i < pcount) {
cannam@24	193 frame[i] = psource[origin + i];
cannam@24	194 } else {
cannam@24	195 frame[i] = 0.0;
cannam@24	196 }
cannam@24	197 }
cannam@24	198
cannam@24	199 for (int i = 0; i < fftsize/2; ++i) {
cannam@24	200 double value = frame[i];
cannam@24	201 frame[i] = frame[i + fftsize/2];
cannam@24	202 frame[i + fftsize/2] = value;
cannam@24	203 }
cannam@24	204
cannam@24	205 window->cut(frame);
cannam@24	206
cannam@64	207 fft->process(false, frame, real, imag);
cannam@24	208
cannam@24	209 constq->process(real, imag, cqre, cqim);
cannam@18	210
cannam@24	211 for (int i = 0; i < ncoeff; ++i) {
cannam@24	212 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
cannam@24	213 }
cannam@24	214 ++frames;
cannam@20	215
cannam@24	216 origin += fftsize/2;
cannam@24	217 }
cannam@20	218
cannam@24	219 delete [] cqre;
cannam@24	220 delete [] cqim;
cannam@24	221 delete [] real;
cannam@24	222 delete [] imag;
cannam@24	223 delete [] frame;
cannam@20	224
cannam@24	225 for (int i = 0; i < ncoeff; ++i) {
cannam@24	226 cq[i] /= frames;
cannam@24	227 }
cannam@20	228
cannam@24	229 if (decimator) delete[] psource;
cannam@20	230
cannam@24	231 features.push_back(cq);
cannam@18	232 }
cannam@18	233
cannam@26	234 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
cannam@26	235 {
cannam@26	236 if (!mfcc) {
cannam@26	237 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
cannam@26	238 << "No mfcc: initialise not called?"
cannam@26	239 << std::endl;
cannam@26	240 return;
cannam@26	241 }
cannam@26	242
cannam@26	243 if (nsamples < getWindowsize()) {
cannam@26	244 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@26	245 return;
cannam@26	246 }
cannam@26	247
cannam@26	248 int fftsize = mfcc->getfftlength();
cannam@26	249
cannam@26	250 vector<double> cc(ncoeff);
cannam@26	251
cannam@26	252 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
cannam@26	253
cannam@26	254 const double *psource = samples;
cannam@26	255 int pcount = nsamples;
cannam@26	256
cannam@27	257 if (decimator) {
cannam@27	258 pcount = nsamples / decimator->getFactor();
cannam@27	259 double *decout = new double[pcount];
cannam@27	260 decimator->process(samples, decout);
cannam@27	261 psource = decout;
cannam@27	262 }
cannam@27	263
cannam@26	264 int origin = 0;
cannam@26	265 int frames = 0;
cannam@26	266
cannam@26	267 double *frame = new double[fftsize];
cannam@26	268 double *ccout = new double[ncoeff];
cannam@26	269
cannam@26	270 while (origin <= pcount) {
cannam@26	271
cannam@26	272 // always need at least one fft window per block, but after
cannam@26	273 // that we want to avoid having any incomplete ones
cannam@26	274 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@26	275
cannam@26	276 for (int i = 0; i < fftsize; ++i) {
cannam@26	277 if (origin + i < pcount) {
cannam@26	278 frame[i] = psource[origin + i];
cannam@26	279 } else {
cannam@26	280 frame[i] = 0.0;
cannam@26	281 }
cannam@26	282 }
cannam@26	283
cannam@30	284 mfcc->process(frame, ccout);
cannam@26	285
cannam@26	286 for (int i = 0; i < ncoeff; ++i) {
cannam@26	287 cc[i] += ccout[i];
cannam@26	288 }
cannam@26	289 ++frames;
cannam@26	290
cannam@26	291 origin += fftsize/2;
cannam@26	292 }
cannam@26	293
cannam@26	294 delete [] ccout;
cannam@26	295 delete [] frame;
cannam@26	296
cannam@26	297 for (int i = 0; i < ncoeff; ++i) {
cannam@26	298 cc[i] /= frames;
cannam@26	299 }
cannam@26	300
cannam@27	301 if (decimator) delete[] psource;
cannam@27	302
cannam@26	303 features.push_back(cc);
cannam@26	304 }
cannam@26	305
cannam@18	306 void ClusterMeltSegmenter::segment(int m)
cannam@18	307 {
cannam@24	308 nclusters = m;
cannam@24	309 segment();
cannam@18	310 }
cannam@18	311
cannam@18	312 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
cannam@18	313 {
cannam@24	314 features = f;
cannam@24	315 featureType = FEATURE_TYPE_UNKNOWN;
cannam@18	316 }
cannam@18	317
cannam@18	318 void ClusterMeltSegmenter::segment()
cannam@18	319 {
cannam@26	320 delete constq;
cannam@26	321 constq = 0;
cannam@26	322 delete mfcc;
cannam@26	323 mfcc = 0;
cannam@26	324 delete decimator;
cannam@26	325 decimator = 0;
cannam@58	326
cannam@58	327 if (features.size() < histogramLength) return;
cannam@58	328 /*
cannam@24	329 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
cannam@24	330 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
cannam@58	331 */
cannam@24	332 // copy the features to a native array and use the existing C segmenter...
cannam@24	333 double** arrFeatures = new double*[features.size()];
cannam@24	334 for (int i = 0; i < features.size(); i++)
cannam@24	335 {
cannam@24	336 if (featureType == FEATURE_TYPE_UNKNOWN) {
cannam@24	337 arrFeatures[i] = new double[features[0].size()];
cannam@24	338 for (int j = 0; j < features[0].size(); j++)
cannam@24	339 arrFeatures[i][j] = features[i][j];
cannam@24	340 } else {
cannam@24	341 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
cannam@24	342 for (int j = 0; j < ncoeff; j++)
cannam@24	343 arrFeatures[i][j] = features[i][j];
cannam@24	344 }
cannam@24	345 }
cannam@18	346
cannam@24	347 q = new int[features.size()];
cannam@18	348
cannam@26	349 if (featureType == FEATURE_TYPE_UNKNOWN \|\|
cannam@26	350 featureType == FEATURE_TYPE_MFCC)
cannam@24	351 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
cannam@24	352 nclusters, neighbourhoodLimit);
cannam@24	353 else
cannam@24	354 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
cannam@24	355 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
cannam@18	356
cannam@24	357 // convert the cluster assignment sequence to a segmentation
cannam@24	358 makeSegmentation(q, features.size());
cannam@18	359
cannam@24	360 // de-allocate arrays
cannam@24	361 delete [] q;
cannam@24	362 for (int i = 0; i < features.size(); i++)
cannam@24	363 delete [] arrFeatures[i];
cannam@24	364 delete [] arrFeatures;
cannam@18	365
cannam@24	366 // clear the features
cannam@24	367 clear();
cannam@18	368 }
cannam@18	369
cannam@18	370 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
cannam@18	371 {
cannam@24	372 segmentation.segments.clear();
cannam@24	373 segmentation.nsegtypes = nclusters;
cannam@24	374 segmentation.samplerate = samplerate;
cannam@18	375
cannam@24	376 Segment segment;
cannam@24	377 segment.start = 0;
cannam@24	378 segment.type = q[0];
cannam@18	379
cannam@24	380 for (int i = 1; i < len; i++)
cannam@24	381 {
cannam@24	382 if (q[i] != q[i-1])
cannam@24	383 {
cannam@24	384 segment.end = i * getHopsize();
cannam@24	385 segmentation.segments.push_back(segment);
cannam@24	386 segment.type = q[i];
cannam@24	387 segment.start = segment.end;
cannam@24	388 }
cannam@24	389 }
cannam@24	390 segment.end = len * getHopsize();
cannam@24	391 segmentation.segments.push_back(segment);
cannam@18	392 }
cannam@18	393

Mercurial > hg > qm-dsp

annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 64:6cb2b3cd5356