annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 249:18a0dffa5c1a

* Various fixes to segmentation code
author Chris Cannam <c.cannam@qmul.ac.uk>
date Thu, 10 Jan 2008 15:14:53 +0000
parents cdfd0948a852
children d096a79fa772
rev   line source
c@249 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@249 2
c@243 3 /*
c@249 4 * ClusterMeltSegmenter.cpp
c@243 5 *
c@249 6 * Created by Mark Levy on 23/03/2006.
c@249 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
c@249 8 * All rights reserved.
c@243 9 */
c@243 10
c@243 11 #include <cfloat>
c@243 12 #include <cmath>
c@243 13
c@243 14 #include "ClusterMeltSegmenter.h"
c@243 15 #include "cluster_segmenter.h"
c@243 16 #include "segment.h"
c@243 17
c@245 18 #include "dsp/transforms/FFT.h"
c@249 19 #include "dsp/chromagram/ConstantQ.h"
c@249 20 #include "dsp/rateconversion/Decimator.h"
c@245 21
c@249 22 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) :
c@249 23 window(NULL),
c@249 24 constq(NULL),
c@249 25 featureType(params.featureType),
c@249 26 hopSize(params.hopSize),
c@249 27 windowSize(params.windowSize),
c@249 28 fmin(params.fmin),
c@249 29 fmax(params.fmax),
c@249 30 nbins(params.nbins),
c@249 31 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
c@249 32 nHMMStates(params.nHMMStates),
c@249 33 nclusters(params.nclusters),
c@249 34 histogramLength(params.histogramLength),
c@249 35 neighbourhoodLimit(params.neighbourhoodLimit),
c@249 36 decimator(0)
c@243 37 {
c@243 38 }
c@243 39
c@243 40 void ClusterMeltSegmenter::initialise(int fs)
c@243 41 {
c@249 42 samplerate = fs;
c@249 43
c@249 44 if (featureType != FEATURE_TYPE_UNKNOWN)
c@249 45 {
c@249 46 // always run internal processing at 11025 or thereabouts
c@249 47 int internalRate = 11025;
c@249 48 int decimationFactor = samplerate / internalRate;
c@249 49 if (decimationFactor < 1) decimationFactor = 1;
c@249 50
c@249 51 // must be a power of two
c@249 52 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
c@249 53
c@249 54 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
c@249 55 decimationFactor = Decimator::getHighestSupportedFactor();
c@249 56 }
c@249 57
c@249 58 if (decimationFactor > 1) {
c@249 59 decimator = new Decimator(getWindowsize(), decimationFactor);
c@249 60 }
c@249 61
c@249 62 CQConfig config;
c@249 63 config.FS = samplerate / decimationFactor;
c@249 64 config.min = fmin;
c@249 65 config.max = fmax;
c@249 66 config.BPO = nbins;
c@249 67 config.CQThresh = 0.0054;
c@249 68
c@249 69 constq = new ConstantQ(config);
c@249 70 constq->sparsekernel();
c@249 71
c@249 72 ncoeff = constq->getK();
c@249 73 }
c@243 74 }
c@243 75
c@243 76 ClusterMeltSegmenter::~ClusterMeltSegmenter()
c@243 77 {
c@249 78 delete window;
c@249 79 delete constq;
c@249 80 delete decimator;
c@245 81 }
c@245 82
c@245 83 int
c@245 84 ClusterMeltSegmenter::getWindowsize()
c@245 85 {
c@249 86 if (featureType != FEATURE_TYPE_UNKNOWN) {
c@249 87
c@249 88 if (constq) {
c@249 89 /*
c@249 90 std::cerr << "ClusterMeltSegmenter::getWindowsize: "
c@249 91 << "rate = " << samplerate
c@249 92 << ", dec factor = " << (decimator ? decimator->getFactor() : 1)
c@249 93 << ", fft length = " << constq->getfftlength()
c@249 94 << ", fmin = " << fmin
c@249 95 << ", fmax = " << fmax
c@249 96 << ", nbins = " << nbins
c@249 97 << ", K = " << constq->getK()
c@249 98 << ", Q = " << constq->getQ()
c@249 99 << std::endl;
c@249 100 */
c@249 101 }
c@249 102 }
c@249 103
c@249 104 return static_cast<int>(windowSize * samplerate);
c@245 105 }
c@245 106
c@245 107 int
c@245 108 ClusterMeltSegmenter::getHopsize()
c@245 109 {
c@249 110 return static_cast<int>(hopSize * samplerate);
c@243 111 }
c@243 112
c@249 113 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
c@243 114 {
c@249 115 if (!constq) {
c@249 116 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: "
c@249 117 << "Cannot run unknown feature type (or initialise not called)"
c@249 118 << std::endl;
c@249 119 return;
c@249 120 }
c@245 121
c@249 122 if (nsamples < getWindowsize()) {
c@249 123 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
c@249 124 return;
c@249 125 }
c@249 126
c@249 127 int fftsize = constq->getfftlength();
c@249 128
c@249 129 if (!window || window->getSize() != fftsize) {
c@249 130 delete window;
c@249 131 window = new Window<double>(HammingWindow, fftsize);
c@249 132 }
c@249 133
c@249 134 vector<double> cq(ncoeff);
c@249 135
c@249 136 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
c@249 137
c@249 138 const double *psource = samples;
c@249 139 int pcount = nsamples;
c@249 140
c@249 141 if (decimator) {
c@249 142 pcount = nsamples / decimator->getFactor();
c@249 143 double *decout = new double[pcount];
c@249 144 decimator->process(samples, decout);
c@249 145 psource = decout;
c@249 146 }
c@249 147
c@249 148 int origin = 0;
c@249 149
c@249 150 // std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
c@249 151
c@249 152 int frames = 0;
c@249 153
c@249 154 double *frame = new double[fftsize];
c@249 155 double *real = new double[fftsize];
c@249 156 double *imag = new double[fftsize];
c@249 157 double *cqre = new double[ncoeff];
c@249 158 double *cqim = new double[ncoeff];
c@249 159
c@249 160 while (origin <= pcount) {
c@249 161
c@249 162 // always need at least one fft window per block, but after
c@249 163 // that we want to avoid having any incomplete ones
c@249 164 if (origin > 0 && origin + fftsize >= pcount) break;
c@249 165
c@249 166 for (int i = 0; i < fftsize; ++i) {
c@249 167 if (origin + i < pcount) {
c@249 168 frame[i] = psource[origin + i];
c@249 169 } else {
c@249 170 frame[i] = 0.0;
c@249 171 }
c@249 172 }
c@249 173
c@249 174 for (int i = 0; i < fftsize/2; ++i) {
c@249 175 double value = frame[i];
c@249 176 frame[i] = frame[i + fftsize/2];
c@249 177 frame[i + fftsize/2] = value;
c@249 178 }
c@249 179
c@249 180 window->cut(frame);
c@249 181
c@249 182 FFT::process(fftsize, false, frame, 0, real, imag);
c@249 183
c@249 184 constq->process(real, imag, cqre, cqim);
c@243 185
c@249 186 for (int i = 0; i < ncoeff; ++i) {
c@249 187 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
c@249 188 }
c@249 189 ++frames;
c@245 190
c@249 191 origin += fftsize/2;
c@249 192 }
c@245 193
c@249 194 delete [] cqre;
c@249 195 delete [] cqim;
c@249 196 delete [] real;
c@249 197 delete [] imag;
c@249 198 delete [] frame;
c@245 199
c@249 200 for (int i = 0; i < ncoeff; ++i) {
c@249 201 // std::cerr << cq[i] << " ";
c@249 202 cq[i] /= frames;
c@249 203 }
c@249 204 // std::cerr << std::endl;
c@245 205
c@249 206 if (decimator) delete[] psource;
c@245 207
c@249 208 features.push_back(cq);
c@243 209 }
c@243 210
c@243 211 void ClusterMeltSegmenter::segment(int m)
c@243 212 {
c@249 213 nclusters = m;
c@249 214 segment();
c@243 215 }
c@243 216
c@243 217 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
c@243 218 {
c@249 219 features = f;
c@249 220 featureType = FEATURE_TYPE_UNKNOWN;
c@243 221 }
c@243 222
c@243 223 void ClusterMeltSegmenter::segment()
c@243 224 {
c@249 225 if (constq)
c@249 226 {
c@249 227 delete constq;
c@249 228 constq = 0;
c@249 229 delete decimator;
c@249 230 decimator = 0;
c@249 231 }
c@243 232
c@249 233 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
c@249 234 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
c@249 235
c@249 236 // copy the features to a native array and use the existing C segmenter...
c@249 237 double** arrFeatures = new double*[features.size()];
c@249 238 for (int i = 0; i < features.size(); i++)
c@249 239 {
c@249 240 if (featureType == FEATURE_TYPE_UNKNOWN) {
c@249 241 arrFeatures[i] = new double[features[0].size()];
c@249 242 for (int j = 0; j < features[0].size(); j++)
c@249 243 arrFeatures[i][j] = features[i][j];
c@249 244 } else {
c@249 245 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
c@249 246 for (int j = 0; j < ncoeff; j++)
c@249 247 arrFeatures[i][j] = features[i][j];
c@249 248 }
c@249 249 }
c@243 250
c@249 251 q = new int[features.size()];
c@243 252
c@249 253 if (featureType == FEATURE_TYPE_UNKNOWN)
c@249 254 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
c@249 255 nclusters, neighbourhoodLimit);
c@249 256 else
c@249 257 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
c@249 258 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
c@243 259
c@249 260 // convert the cluster assignment sequence to a segmentation
c@249 261 makeSegmentation(q, features.size());
c@243 262
c@249 263 // de-allocate arrays
c@249 264 delete [] q;
c@249 265 for (int i = 0; i < features.size(); i++)
c@249 266 delete [] arrFeatures[i];
c@249 267 delete [] arrFeatures;
c@243 268
c@249 269 // clear the features
c@249 270 clear();
c@243 271 }
c@243 272
c@243 273 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
c@243 274 {
c@249 275 segmentation.segments.clear();
c@249 276 segmentation.nsegtypes = nclusters;
c@249 277 segmentation.samplerate = samplerate;
c@243 278
c@249 279 Segment segment;
c@249 280 segment.start = 0;
c@249 281 segment.type = q[0];
c@243 282
c@249 283 for (int i = 1; i < len; i++)
c@249 284 {
c@249 285 if (q[i] != q[i-1])
c@249 286 {
c@249 287 segment.end = i * getHopsize();
c@249 288 segmentation.segments.push_back(segment);
c@249 289 segment.type = q[i];
c@249 290 segment.start = segment.end;
c@249 291 }
c@249 292 }
c@249 293 segment.end = len * getHopsize();
c@249 294 segmentation.segments.push_back(segment);
c@243 295 }
c@243 296