qm-dsp: dsp/segmentation/ClusterMeltSegmenter.cpp annotate

annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 114:f6ccde089491 pvoc

Tidy real-to-complex FFT -- forward and inverse have different arguments, so make them separate functions; document

author	Chris Cannam
date	Wed, 02 Oct 2013 15:04:38 +0100
parents	e5907ae6de17
children	e4a57215ddee

rev	line source
cannam@24	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
cannam@24	2
cannam@18	3 /*
cannam@24	4 * ClusterMeltSegmenter.cpp
cannam@18	5 *
cannam@24	6 * Created by Mark Levy on 23/03/2006.
cannam@24	7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
Chris@84	8
Chris@84	9 This program is free software; you can redistribute it and/or
Chris@84	10 modify it under the terms of the GNU General Public License as
Chris@84	11 published by the Free Software Foundation; either version 2 of the
Chris@84	12 License, or (at your option) any later version. See the file
Chris@84	13 COPYING included with this distribution for more information.
cannam@18	14 */
cannam@18	15
cannam@18	16 #include <cfloat>
cannam@18	17 #include <cmath>
cannam@18	18
cannam@18	19 #include "ClusterMeltSegmenter.h"
cannam@18	20 #include "cluster_segmenter.h"
cannam@18	21 #include "segment.h"
cannam@18	22
cannam@20	23 #include "dsp/transforms/FFT.h"
cannam@24	24 #include "dsp/chromagram/ConstantQ.h"
cannam@24	25 #include "dsp/rateconversion/Decimator.h"
cannam@26	26 #include "dsp/mfcc/MFCC.h"
cannam@20	27
cannam@24	28 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
cannam@24	29 window(NULL),
cannam@64	30 fft(NULL),
cannam@24	31 constq(NULL),
cannam@26	32 mfcc(NULL),
cannam@24	33 featureType(params.featureType),
cannam@24	34 hopSize(params.hopSize),
cannam@24	35 windowSize(params.windowSize),
cannam@24	36 fmin(params.fmin),
cannam@24	37 fmax(params.fmax),
cannam@24	38 nbins(params.nbins),
cannam@24	39 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
cannam@24	40 nHMMStates(params.nHMMStates),
cannam@24	41 nclusters(params.nclusters),
cannam@24	42 histogramLength(params.histogramLength),
cannam@24	43 neighbourhoodLimit(params.neighbourhoodLimit),
cannam@26	44 decimator(NULL)
cannam@18	45 {
cannam@18	46 }
cannam@18	47
cannam@18	48 void ClusterMeltSegmenter::initialise(int fs)
cannam@18	49 {
cannam@24	50 samplerate = fs;
cannam@24	51
cannam@26	52 if (featureType == FEATURE_TYPE_CONSTQ \|\|
cannam@26	53 featureType == FEATURE_TYPE_CHROMA) {
cannam@26	54
cannam@26	55 // run internal processing at 11025 or thereabouts
cannam@24	56 int internalRate = 11025;
cannam@24	57 int decimationFactor = samplerate / internalRate;
cannam@24	58 if (decimationFactor < 1) decimationFactor = 1;
cannam@24	59
cannam@24	60 // must be a power of two
cannam@24	61 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@24	62
cannam@24	63 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@24	64 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@24	65 }
cannam@24	66
cannam@24	67 if (decimationFactor > 1) {
cannam@24	68 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@24	69 }
cannam@24	70
cannam@24	71 CQConfig config;
cannam@24	72 config.FS = samplerate / decimationFactor;
cannam@24	73 config.min = fmin;
cannam@24	74 config.max = fmax;
cannam@24	75 config.BPO = nbins;
cannam@24	76 config.CQThresh = 0.0054;
cannam@24	77
cannam@24	78 constq = new ConstantQ(config);
cannam@24	79 constq->sparsekernel();
cannam@26	80
cannam@26	81 ncoeff = constq->getK();
cannam@64	82
cannam@64	83 fft = new FFTReal(constq->getfftlength());
cannam@26	84
cannam@26	85 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@24	86
cannam@27	87 // run internal processing at 22050 or thereabouts
cannam@27	88 int internalRate = 22050;
cannam@27	89 int decimationFactor = samplerate / internalRate;
cannam@27	90 if (decimationFactor < 1) decimationFactor = 1;
cannam@27	91
cannam@27	92 // must be a power of two
cannam@27	93 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
cannam@27	94
cannam@27	95 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
cannam@27	96 decimationFactor = Decimator::getHighestSupportedFactor();
cannam@27	97 }
cannam@27	98
cannam@27	99 if (decimationFactor > 1) {
cannam@27	100 decimator = new Decimator(getWindowsize(), decimationFactor);
cannam@27	101 }
cannam@27	102
cannam@30	103 MFCCConfig config(samplerate / decimationFactor);
cannam@27	104 config.fftsize = 2048;
cannam@27	105 config.nceps = 19;
cannam@27	106 config.want_c0 = true;
cannam@26	107
cannam@26	108 mfcc = new MFCC(config);
cannam@27	109 ncoeff = config.nceps + 1;
cannam@24	110 }
cannam@18	111 }
cannam@18	112
cannam@18	113 ClusterMeltSegmenter::~ClusterMeltSegmenter()
cannam@18	114 {
cannam@24	115 delete window;
cannam@24	116 delete constq;
cannam@24	117 delete decimator;
cannam@64	118 delete fft;
cannam@20	119 }
cannam@20	120
cannam@20	121 int
cannam@20	122 ClusterMeltSegmenter::getWindowsize()
cannam@20	123 {
cannam@44	124 return static_cast<int>(windowSize * samplerate + 0.001);
cannam@20	125 }
cannam@20	126
cannam@20	127 int
cannam@20	128 ClusterMeltSegmenter::getHopsize()
cannam@20	129 {
cannam@44	130 return static_cast<int>(hopSize * samplerate + 0.001);
cannam@18	131 }
cannam@18	132
cannam@24	133 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
cannam@18	134 {
cannam@26	135 if (featureType == FEATURE_TYPE_CONSTQ \|\|
cannam@26	136 featureType == FEATURE_TYPE_CHROMA) {
cannam@26	137 extractFeaturesConstQ(samples, nsamples);
cannam@26	138 } else if (featureType == FEATURE_TYPE_MFCC) {
cannam@26	139 extractFeaturesMFCC(samples, nsamples);
cannam@26	140 }
cannam@26	141 }
cannam@26	142
cannam@26	143 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
cannam@26	144 {
cannam@24	145 if (!constq) {
cannam@26	146 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
cannam@26	147 << "No const-q: initialise not called?"
cannam@24	148 << std::endl;
cannam@24	149 return;
cannam@24	150 }
cannam@20	151
cannam@24	152 if (nsamples < getWindowsize()) {
cannam@24	153 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@24	154 return;
cannam@24	155 }
cannam@24	156
cannam@24	157 int fftsize = constq->getfftlength();
cannam@24	158
cannam@24	159 if (!window \|\| window->getSize() != fftsize) {
cannam@24	160 delete window;
cannam@24	161 window = new Window<double>(HammingWindow, fftsize);
cannam@24	162 }
cannam@24	163
cannam@24	164 vector<double> cq(ncoeff);
cannam@24	165
cannam@24	166 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
cannam@24	167
cannam@24	168 const double *psource = samples;
cannam@24	169 int pcount = nsamples;
cannam@24	170
cannam@24	171 if (decimator) {
cannam@24	172 pcount = nsamples / decimator->getFactor();
cannam@24	173 double *decout = new double[pcount];
cannam@24	174 decimator->process(samples, decout);
cannam@24	175 psource = decout;
cannam@24	176 }
cannam@24	177
cannam@24	178 int origin = 0;
cannam@24	179
cannam@24	180 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
cannam@24	181
cannam@24	182 int frames = 0;
cannam@24	183
cannam@24	184 double *frame = new double[fftsize];
cannam@24	185 double *real = new double[fftsize];
cannam@24	186 double *imag = new double[fftsize];
cannam@24	187 double *cqre = new double[ncoeff];
cannam@24	188 double *cqim = new double[ncoeff];
cannam@24	189
cannam@24	190 while (origin <= pcount) {
cannam@24	191
cannam@24	192 // always need at least one fft window per block, but after
cannam@24	193 // that we want to avoid having any incomplete ones
cannam@24	194 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@24	195
cannam@24	196 for (int i = 0; i < fftsize; ++i) {
cannam@24	197 if (origin + i < pcount) {
cannam@24	198 frame[i] = psource[origin + i];
cannam@24	199 } else {
cannam@24	200 frame[i] = 0.0;
cannam@24	201 }
cannam@24	202 }
cannam@24	203
cannam@24	204 for (int i = 0; i < fftsize/2; ++i) {
cannam@24	205 double value = frame[i];
cannam@24	206 frame[i] = frame[i + fftsize/2];
cannam@24	207 frame[i + fftsize/2] = value;
cannam@24	208 }
cannam@24	209
cannam@24	210 window->cut(frame);
cannam@24	211
Chris@114	212 fft->forward(frame, real, imag);
cannam@24	213
cannam@24	214 constq->process(real, imag, cqre, cqim);
cannam@18	215
cannam@24	216 for (int i = 0; i < ncoeff; ++i) {
cannam@24	217 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
cannam@24	218 }
cannam@24	219 ++frames;
cannam@20	220
cannam@24	221 origin += fftsize/2;
cannam@24	222 }
cannam@20	223
cannam@24	224 delete [] cqre;
cannam@24	225 delete [] cqim;
cannam@24	226 delete [] real;
cannam@24	227 delete [] imag;
cannam@24	228 delete [] frame;
cannam@20	229
cannam@24	230 for (int i = 0; i < ncoeff; ++i) {
cannam@24	231 cq[i] /= frames;
cannam@24	232 }
cannam@20	233
cannam@24	234 if (decimator) delete[] psource;
cannam@20	235
cannam@24	236 features.push_back(cq);
cannam@18	237 }
cannam@18	238
cannam@26	239 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
cannam@26	240 {
cannam@26	241 if (!mfcc) {
cannam@26	242 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
cannam@26	243 << "No mfcc: initialise not called?"
cannam@26	244 << std::endl;
cannam@26	245 return;
cannam@26	246 }
cannam@26	247
cannam@26	248 if (nsamples < getWindowsize()) {
cannam@26	249 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
cannam@26	250 return;
cannam@26	251 }
cannam@26	252
cannam@26	253 int fftsize = mfcc->getfftlength();
cannam@26	254
cannam@26	255 vector<double> cc(ncoeff);
cannam@26	256
cannam@26	257 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
cannam@26	258
cannam@26	259 const double *psource = samples;
cannam@26	260 int pcount = nsamples;
cannam@26	261
cannam@27	262 if (decimator) {
cannam@27	263 pcount = nsamples / decimator->getFactor();
cannam@27	264 double *decout = new double[pcount];
cannam@27	265 decimator->process(samples, decout);
cannam@27	266 psource = decout;
cannam@27	267 }
cannam@27	268
cannam@26	269 int origin = 0;
cannam@26	270 int frames = 0;
cannam@26	271
cannam@26	272 double *frame = new double[fftsize];
cannam@26	273 double *ccout = new double[ncoeff];
cannam@26	274
cannam@26	275 while (origin <= pcount) {
cannam@26	276
cannam@26	277 // always need at least one fft window per block, but after
cannam@26	278 // that we want to avoid having any incomplete ones
cannam@26	279 if (origin > 0 && origin + fftsize >= pcount) break;
cannam@26	280
cannam@26	281 for (int i = 0; i < fftsize; ++i) {
cannam@26	282 if (origin + i < pcount) {
cannam@26	283 frame[i] = psource[origin + i];
cannam@26	284 } else {
cannam@26	285 frame[i] = 0.0;
cannam@26	286 }
cannam@26	287 }
cannam@26	288
cannam@30	289 mfcc->process(frame, ccout);
cannam@26	290
cannam@26	291 for (int i = 0; i < ncoeff; ++i) {
cannam@26	292 cc[i] += ccout[i];
cannam@26	293 }
cannam@26	294 ++frames;
cannam@26	295
cannam@26	296 origin += fftsize/2;
cannam@26	297 }
cannam@26	298
cannam@26	299 delete [] ccout;
cannam@26	300 delete [] frame;
cannam@26	301
cannam@26	302 for (int i = 0; i < ncoeff; ++i) {
cannam@26	303 cc[i] /= frames;
cannam@26	304 }
cannam@26	305
cannam@27	306 if (decimator) delete[] psource;
cannam@27	307
cannam@26	308 features.push_back(cc);
cannam@26	309 }
cannam@26	310
cannam@18	311 void ClusterMeltSegmenter::segment(int m)
cannam@18	312 {
cannam@24	313 nclusters = m;
cannam@24	314 segment();
cannam@18	315 }
cannam@18	316
cannam@18	317 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
cannam@18	318 {
cannam@24	319 features = f;
cannam@24	320 featureType = FEATURE_TYPE_UNKNOWN;
cannam@18	321 }
cannam@18	322
cannam@18	323 void ClusterMeltSegmenter::segment()
cannam@18	324 {
cannam@26	325 delete constq;
cannam@26	326 constq = 0;
cannam@26	327 delete mfcc;
cannam@26	328 mfcc = 0;
cannam@26	329 delete decimator;
cannam@26	330 decimator = 0;
cannam@58	331
cannam@58	332 if (features.size() < histogramLength) return;
cannam@58	333 /*
cannam@24	334 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
cannam@24	335 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
cannam@58	336 */
cannam@24	337 // copy the features to a native array and use the existing C segmenter...
cannam@24	338 double** arrFeatures = new double*[features.size()];
cannam@24	339 for (int i = 0; i < features.size(); i++)
cannam@24	340 {
cannam@24	341 if (featureType == FEATURE_TYPE_UNKNOWN) {
cannam@24	342 arrFeatures[i] = new double[features[0].size()];
cannam@24	343 for (int j = 0; j < features[0].size(); j++)
cannam@24	344 arrFeatures[i][j] = features[i][j];
cannam@24	345 } else {
cannam@24	346 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
cannam@24	347 for (int j = 0; j < ncoeff; j++)
cannam@24	348 arrFeatures[i][j] = features[i][j];
cannam@24	349 }
cannam@24	350 }
cannam@18	351
cannam@24	352 q = new int[features.size()];
cannam@18	353
cannam@26	354 if (featureType == FEATURE_TYPE_UNKNOWN \|\|
cannam@26	355 featureType == FEATURE_TYPE_MFCC)
cannam@24	356 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
cannam@24	357 nclusters, neighbourhoodLimit);
cannam@24	358 else
cannam@24	359 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
cannam@24	360 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
cannam@18	361
cannam@24	362 // convert the cluster assignment sequence to a segmentation
cannam@24	363 makeSegmentation(q, features.size());
cannam@18	364
cannam@24	365 // de-allocate arrays
cannam@24	366 delete [] q;
cannam@24	367 for (int i = 0; i < features.size(); i++)
cannam@24	368 delete [] arrFeatures[i];
cannam@24	369 delete [] arrFeatures;
cannam@18	370
cannam@24	371 // clear the features
cannam@24	372 clear();
cannam@18	373 }
cannam@18	374
cannam@18	375 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
cannam@18	376 {
cannam@24	377 segmentation.segments.clear();
cannam@24	378 segmentation.nsegtypes = nclusters;
cannam@24	379 segmentation.samplerate = samplerate;
cannam@18	380
cannam@24	381 Segment segment;
cannam@24	382 segment.start = 0;
cannam@24	383 segment.type = q[0];
cannam@18	384
cannam@24	385 for (int i = 1; i < len; i++)
cannam@24	386 {
cannam@24	387 if (q[i] != q[i-1])
cannam@24	388 {
cannam@24	389 segment.end = i * getHopsize();
cannam@24	390 segmentation.segments.push_back(segment);
cannam@24	391 segment.type = q[i];
cannam@24	392 segment.start = segment.end;
cannam@24	393 }
cannam@24	394 }
cannam@24	395 segment.end = len * getHopsize();
cannam@24	396 segmentation.segments.push_back(segment);
cannam@18	397 }
cannam@18	398

Mercurial > hg > qm-dsp

annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 114:f6ccde089491 pvoc