| c@249 | 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */ | 
| c@249 | 2 | 
| c@243 | 3 /* | 
| c@249 | 4  * ClusterMeltSegmenter.cpp | 
| c@243 | 5  * | 
| c@249 | 6  * Created by Mark Levy on 23/03/2006. | 
| c@249 | 7  * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. | 
| c@309 | 8 | 
| c@309 | 9     This program is free software; you can redistribute it and/or | 
| c@309 | 10     modify it under the terms of the GNU General Public License as | 
| c@309 | 11     published by the Free Software Foundation; either version 2 of the | 
| c@309 | 12     License, or (at your option) any later version.  See the file | 
| c@309 | 13     COPYING included with this distribution for more information. | 
| c@243 | 14  */ | 
| c@243 | 15 | 
| c@243 | 16 #include <cfloat> | 
| c@243 | 17 #include <cmath> | 
| c@243 | 18 | 
| c@243 | 19 #include "ClusterMeltSegmenter.h" | 
| c@243 | 20 #include "cluster_segmenter.h" | 
| c@243 | 21 #include "segment.h" | 
| c@243 | 22 | 
| c@245 | 23 #include "dsp/transforms/FFT.h" | 
| c@249 | 24 #include "dsp/chromagram/ConstantQ.h" | 
| c@249 | 25 #include "dsp/rateconversion/Decimator.h" | 
| c@251 | 26 #include "dsp/mfcc/MFCC.h" | 
| c@245 | 27 | 
| c@249 | 28 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : | 
| c@249 | 29     window(NULL), | 
| c@289 | 30     fft(NULL), | 
| c@249 | 31     constq(NULL), | 
| c@251 | 32     mfcc(NULL), | 
| c@249 | 33     featureType(params.featureType), | 
| c@249 | 34     hopSize(params.hopSize), | 
| c@249 | 35     windowSize(params.windowSize), | 
| c@249 | 36     fmin(params.fmin), | 
| c@249 | 37     fmax(params.fmax), | 
| c@249 | 38     nbins(params.nbins), | 
| c@249 | 39     ncomponents(params.ncomponents),	// NB currently not passed - no. of PCA components is set in cluser_segmenter.c | 
| c@249 | 40     nHMMStates(params.nHMMStates), | 
| c@249 | 41     nclusters(params.nclusters), | 
| c@249 | 42     histogramLength(params.histogramLength), | 
| c@249 | 43     neighbourhoodLimit(params.neighbourhoodLimit), | 
| c@251 | 44     decimator(NULL) | 
| c@243 | 45 { | 
| c@243 | 46 } | 
| c@243 | 47 | 
| c@243 | 48 void ClusterMeltSegmenter::initialise(int fs) | 
| c@243 | 49 { | 
| c@249 | 50     samplerate = fs; | 
| c@249 | 51 | 
| c@251 | 52     if (featureType == FEATURE_TYPE_CONSTQ || | 
| c@251 | 53         featureType == FEATURE_TYPE_CHROMA) { | 
| c@251 | 54 | 
| c@251 | 55         // run internal processing at 11025 or thereabouts | 
| c@249 | 56         int internalRate = 11025; | 
| c@249 | 57         int decimationFactor = samplerate / internalRate; | 
| c@249 | 58         if (decimationFactor < 1) decimationFactor = 1; | 
| c@249 | 59 | 
| c@249 | 60         // must be a power of two | 
| c@249 | 61         while (decimationFactor & (decimationFactor - 1)) ++decimationFactor; | 
| c@249 | 62 | 
| c@249 | 63         if (decimationFactor > Decimator::getHighestSupportedFactor()) { | 
| c@249 | 64             decimationFactor = Decimator::getHighestSupportedFactor(); | 
| c@249 | 65         } | 
| c@249 | 66 | 
| c@249 | 67         if (decimationFactor > 1) { | 
| c@249 | 68             decimator = new Decimator(getWindowsize(), decimationFactor); | 
| c@249 | 69         } | 
| c@249 | 70 | 
| c@249 | 71         CQConfig config; | 
| c@249 | 72         config.FS = samplerate / decimationFactor; | 
| c@249 | 73         config.min = fmin; | 
| c@249 | 74         config.max = fmax; | 
| c@249 | 75         config.BPO = nbins; | 
| c@249 | 76         config.CQThresh = 0.0054; | 
| c@249 | 77 | 
| c@249 | 78         constq = new ConstantQ(config); | 
| c@249 | 79         constq->sparsekernel(); | 
| c@251 | 80 | 
| c@251 | 81         ncoeff = constq->getK(); | 
| c@289 | 82 | 
| c@289 | 83         fft = new FFTReal(constq->getfftlength()); | 
| c@251 | 84 | 
| c@251 | 85     } else if (featureType == FEATURE_TYPE_MFCC) { | 
| c@249 | 86 | 
| c@252 | 87         // run internal processing at 22050 or thereabouts | 
| c@252 | 88         int internalRate = 22050; | 
| c@252 | 89         int decimationFactor = samplerate / internalRate; | 
| c@252 | 90         if (decimationFactor < 1) decimationFactor = 1; | 
| c@252 | 91 | 
| c@252 | 92         // must be a power of two | 
| c@252 | 93         while (decimationFactor & (decimationFactor - 1)) ++decimationFactor; | 
| c@252 | 94 | 
| c@252 | 95         if (decimationFactor > Decimator::getHighestSupportedFactor()) { | 
| c@252 | 96             decimationFactor = Decimator::getHighestSupportedFactor(); | 
| c@252 | 97         } | 
| c@252 | 98 | 
| c@252 | 99         if (decimationFactor > 1) { | 
| c@252 | 100             decimator = new Decimator(getWindowsize(), decimationFactor); | 
| c@252 | 101         } | 
| c@252 | 102 | 
| c@255 | 103         MFCCConfig config(samplerate / decimationFactor); | 
| c@252 | 104         config.fftsize = 2048; | 
| c@252 | 105         config.nceps = 19; | 
| c@252 | 106         config.want_c0 = true; | 
| c@251 | 107 | 
| c@251 | 108         mfcc = new MFCC(config); | 
| c@252 | 109         ncoeff = config.nceps + 1; | 
| c@249 | 110     } | 
| c@243 | 111 } | 
| c@243 | 112 | 
| c@243 | 113 ClusterMeltSegmenter::~ClusterMeltSegmenter() | 
| c@243 | 114 { | 
| c@249 | 115     delete window; | 
| c@249 | 116     delete constq; | 
| c@249 | 117     delete decimator; | 
| c@289 | 118     delete fft; | 
| c@245 | 119 } | 
| c@245 | 120 | 
| c@245 | 121 int | 
| c@245 | 122 ClusterMeltSegmenter::getWindowsize() | 
| c@245 | 123 { | 
| c@269 | 124     return static_cast<int>(windowSize * samplerate + 0.001); | 
| c@245 | 125 } | 
| c@245 | 126 | 
| c@245 | 127 int | 
| c@245 | 128 ClusterMeltSegmenter::getHopsize() | 
| c@245 | 129 { | 
| c@269 | 130     return static_cast<int>(hopSize * samplerate + 0.001); | 
| c@243 | 131 } | 
| c@243 | 132 | 
| c@249 | 133 void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples) | 
| c@243 | 134 { | 
| c@251 | 135     if (featureType == FEATURE_TYPE_CONSTQ || | 
| c@251 | 136         featureType == FEATURE_TYPE_CHROMA) { | 
| c@251 | 137         extractFeaturesConstQ(samples, nsamples); | 
| c@251 | 138     } else if (featureType == FEATURE_TYPE_MFCC) { | 
| c@251 | 139         extractFeaturesMFCC(samples, nsamples); | 
| c@251 | 140     } | 
| c@251 | 141 } | 
| c@251 | 142 | 
| c@251 | 143 void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples) | 
| c@251 | 144 { | 
| c@249 | 145     if (!constq) { | 
| c@251 | 146         std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: " | 
| c@251 | 147                   << "No const-q: initialise not called?" | 
| c@249 | 148                   << std::endl; | 
| c@249 | 149         return; | 
| c@249 | 150     } | 
| c@245 | 151 | 
| c@249 | 152     if (nsamples < getWindowsize()) { | 
| c@249 | 153         std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; | 
| c@249 | 154         return; | 
| c@249 | 155     } | 
| c@249 | 156 | 
| c@249 | 157     int fftsize = constq->getfftlength(); | 
| c@249 | 158 | 
| c@249 | 159     if (!window || window->getSize() != fftsize) { | 
| c@249 | 160         delete window; | 
| c@249 | 161         window = new Window<double>(HammingWindow, fftsize); | 
| c@249 | 162     } | 
| c@249 | 163 | 
| c@249 | 164     vector<double> cq(ncoeff); | 
| c@249 | 165 | 
| c@249 | 166     for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0; | 
| c@249 | 167 | 
| c@249 | 168     const double *psource = samples; | 
| c@249 | 169     int pcount = nsamples; | 
| c@249 | 170 | 
| c@249 | 171     if (decimator) { | 
| c@249 | 172         pcount = nsamples / decimator->getFactor(); | 
| c@249 | 173         double *decout = new double[pcount]; | 
| c@249 | 174         decimator->process(samples, decout); | 
| c@249 | 175         psource = decout; | 
| c@249 | 176     } | 
| c@249 | 177 | 
| c@249 | 178     int origin = 0; | 
| c@249 | 179 | 
| c@249 | 180 //    std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl; | 
| c@249 | 181 | 
| c@249 | 182     int frames = 0; | 
| c@249 | 183 | 
| c@249 | 184     double *frame = new double[fftsize]; | 
| c@249 | 185     double *real = new double[fftsize]; | 
| c@249 | 186     double *imag = new double[fftsize]; | 
| c@249 | 187     double *cqre = new double[ncoeff]; | 
| c@249 | 188     double *cqim = new double[ncoeff]; | 
| c@249 | 189 | 
| c@249 | 190     while (origin <= pcount) { | 
| c@249 | 191 | 
| c@249 | 192         // always need at least one fft window per block, but after | 
| c@249 | 193         // that we want to avoid having any incomplete ones | 
| c@249 | 194         if (origin > 0 && origin + fftsize >= pcount) break; | 
| c@249 | 195 | 
| c@249 | 196         for (int i = 0; i < fftsize; ++i) { | 
| c@249 | 197             if (origin + i < pcount) { | 
| c@249 | 198                 frame[i] = psource[origin + i]; | 
| c@249 | 199             } else { | 
| c@249 | 200                 frame[i] = 0.0; | 
| c@249 | 201             } | 
| c@249 | 202         } | 
| c@249 | 203 | 
| c@249 | 204         for (int i = 0; i < fftsize/2; ++i) { | 
| c@249 | 205             double value = frame[i]; | 
| c@249 | 206             frame[i] = frame[i + fftsize/2]; | 
| c@249 | 207             frame[i + fftsize/2] = value; | 
| c@249 | 208         } | 
| c@249 | 209 | 
| c@249 | 210         window->cut(frame); | 
| c@249 | 211 | 
| c@339 | 212         fft->forward(frame, real, imag); | 
| c@249 | 213 | 
| c@249 | 214         constq->process(real, imag, cqre, cqim); | 
| c@243 | 215 | 
| c@249 | 216         for (int i = 0; i < ncoeff; ++i) { | 
| c@249 | 217             cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]); | 
| c@249 | 218         } | 
| c@249 | 219         ++frames; | 
| c@245 | 220 | 
| c@249 | 221         origin += fftsize/2; | 
| c@249 | 222     } | 
| c@245 | 223 | 
| c@249 | 224     delete [] cqre; | 
| c@249 | 225     delete [] cqim; | 
| c@249 | 226     delete [] real; | 
| c@249 | 227     delete [] imag; | 
| c@249 | 228     delete [] frame; | 
| c@245 | 229 | 
| c@249 | 230     for (int i = 0; i < ncoeff; ++i) { | 
| c@249 | 231         cq[i] /= frames; | 
| c@249 | 232     } | 
| c@245 | 233 | 
| c@249 | 234     if (decimator) delete[] psource; | 
| c@245 | 235 | 
| c@249 | 236     features.push_back(cq); | 
| c@243 | 237 } | 
| c@243 | 238 | 
| c@251 | 239 void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples) | 
| c@251 | 240 { | 
| c@251 | 241     if (!mfcc) { | 
| c@251 | 242         std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: " | 
| c@251 | 243                   << "No mfcc: initialise not called?" | 
| c@251 | 244                   << std::endl; | 
| c@251 | 245         return; | 
| c@251 | 246     } | 
| c@251 | 247 | 
| c@251 | 248     if (nsamples < getWindowsize()) { | 
| c@251 | 249         std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl; | 
| c@251 | 250         return; | 
| c@251 | 251     } | 
| c@251 | 252 | 
| c@251 | 253     int fftsize = mfcc->getfftlength(); | 
| c@251 | 254 | 
| c@251 | 255     vector<double> cc(ncoeff); | 
| c@251 | 256 | 
| c@251 | 257     for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0; | 
| c@251 | 258 | 
| c@251 | 259     const double *psource = samples; | 
| c@251 | 260     int pcount = nsamples; | 
| c@251 | 261 | 
| c@252 | 262     if (decimator) { | 
| c@252 | 263         pcount = nsamples / decimator->getFactor(); | 
| c@252 | 264         double *decout = new double[pcount]; | 
| c@252 | 265         decimator->process(samples, decout); | 
| c@252 | 266         psource = decout; | 
| c@252 | 267     } | 
| c@252 | 268 | 
| c@251 | 269     int origin = 0; | 
| c@251 | 270     int frames = 0; | 
| c@251 | 271 | 
| c@251 | 272     double *frame = new double[fftsize]; | 
| c@251 | 273     double *ccout = new double[ncoeff]; | 
| c@251 | 274 | 
| c@251 | 275     while (origin <= pcount) { | 
| c@251 | 276 | 
| c@251 | 277         // always need at least one fft window per block, but after | 
| c@251 | 278         // that we want to avoid having any incomplete ones | 
| c@251 | 279         if (origin > 0 && origin + fftsize >= pcount) break; | 
| c@251 | 280 | 
| c@251 | 281         for (int i = 0; i < fftsize; ++i) { | 
| c@251 | 282             if (origin + i < pcount) { | 
| c@251 | 283                 frame[i] = psource[origin + i]; | 
| c@251 | 284             } else { | 
| c@251 | 285                 frame[i] = 0.0; | 
| c@251 | 286             } | 
| c@251 | 287         } | 
| c@251 | 288 | 
| c@255 | 289         mfcc->process(frame, ccout); | 
| c@251 | 290 | 
| c@251 | 291         for (int i = 0; i < ncoeff; ++i) { | 
| c@251 | 292             cc[i] += ccout[i]; | 
| c@251 | 293         } | 
| c@251 | 294         ++frames; | 
| c@251 | 295 | 
| c@251 | 296         origin += fftsize/2; | 
| c@251 | 297     } | 
| c@251 | 298 | 
| c@251 | 299     delete [] ccout; | 
| c@251 | 300     delete [] frame; | 
| c@251 | 301 | 
| c@251 | 302     for (int i = 0; i < ncoeff; ++i) { | 
| c@251 | 303         cc[i] /= frames; | 
| c@251 | 304     } | 
| c@251 | 305 | 
| c@252 | 306     if (decimator) delete[] psource; | 
| c@252 | 307 | 
| c@251 | 308     features.push_back(cc); | 
| c@251 | 309 } | 
| c@251 | 310 | 
| c@243 | 311 void ClusterMeltSegmenter::segment(int m) | 
| c@243 | 312 { | 
| c@249 | 313     nclusters = m; | 
| c@249 | 314     segment(); | 
| c@243 | 315 } | 
| c@243 | 316 | 
| c@243 | 317 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f) | 
| c@243 | 318 { | 
| c@249 | 319     features = f; | 
| c@249 | 320     featureType = FEATURE_TYPE_UNKNOWN; | 
| c@243 | 321 } | 
| c@243 | 322 | 
| c@243 | 323 void ClusterMeltSegmenter::segment() | 
| c@243 | 324 { | 
| c@251 | 325     delete constq; | 
| c@251 | 326     constq = 0; | 
| c@251 | 327     delete mfcc; | 
| c@251 | 328     mfcc = 0; | 
| c@251 | 329     delete decimator; | 
| c@251 | 330     decimator = 0; | 
| c@283 | 331 | 
| c@283 | 332     if (features.size() < histogramLength) return; | 
| c@283 | 333 /* | 
| c@249 | 334     std::cerr << "ClusterMeltSegmenter::segment: have " << features.size() | 
| c@249 | 335               << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl; | 
| c@283 | 336 */ | 
| c@249 | 337     // copy the features to a native array and use the existing C segmenter... | 
| c@249 | 338     double** arrFeatures = new double*[features.size()]; | 
| c@249 | 339     for (int i = 0; i < features.size(); i++) | 
| c@249 | 340     { | 
| c@249 | 341         if (featureType == FEATURE_TYPE_UNKNOWN) { | 
| c@249 | 342             arrFeatures[i] = new double[features[0].size()]; | 
| c@249 | 343             for (int j = 0; j < features[0].size(); j++) | 
| c@249 | 344                 arrFeatures[i][j] = features[i][j]; | 
| c@249 | 345         } else { | 
| c@249 | 346             arrFeatures[i] = new double[ncoeff+1];	// allow space for the normalised envelope | 
| c@249 | 347             for (int j = 0; j < ncoeff; j++) | 
| c@249 | 348                 arrFeatures[i][j] = features[i][j]; | 
| c@249 | 349         } | 
| c@249 | 350     } | 
| c@243 | 351 | 
| c@249 | 352     q = new int[features.size()]; | 
| c@243 | 353 | 
| c@251 | 354     if (featureType == FEATURE_TYPE_UNKNOWN || | 
| c@251 | 355         featureType == FEATURE_TYPE_MFCC) | 
| c@249 | 356         cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, | 
| c@249 | 357                         nclusters, neighbourhoodLimit); | 
| c@249 | 358     else | 
| c@249 | 359         constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, | 
| c@249 | 360                        nHMMStates, histogramLength, nclusters, neighbourhoodLimit); | 
| c@243 | 361 | 
| c@249 | 362     // convert the cluster assignment sequence to a segmentation | 
| c@249 | 363     makeSegmentation(q, features.size()); | 
| c@243 | 364 | 
| c@249 | 365     // de-allocate arrays | 
| c@249 | 366     delete [] q; | 
| c@249 | 367     for (int i = 0; i < features.size(); i++) | 
| c@249 | 368         delete [] arrFeatures[i]; | 
| c@249 | 369     delete [] arrFeatures; | 
| c@243 | 370 | 
| c@249 | 371     // clear the features | 
| c@249 | 372     clear(); | 
| c@243 | 373 } | 
| c@243 | 374 | 
| c@243 | 375 void ClusterMeltSegmenter::makeSegmentation(int* q, int len) | 
| c@243 | 376 { | 
| c@249 | 377     segmentation.segments.clear(); | 
| c@249 | 378     segmentation.nsegtypes = nclusters; | 
| c@249 | 379     segmentation.samplerate = samplerate; | 
| c@243 | 380 | 
| c@249 | 381     Segment segment; | 
| c@249 | 382     segment.start = 0; | 
| c@249 | 383     segment.type = q[0]; | 
| c@243 | 384 | 
| c@249 | 385     for (int i = 1; i < len; i++) | 
| c@249 | 386     { | 
| c@249 | 387         if (q[i] != q[i-1]) | 
| c@249 | 388         { | 
| c@249 | 389             segment.end = i * getHopsize(); | 
| c@249 | 390             segmentation.segments.push_back(segment); | 
| c@249 | 391             segment.type = q[i]; | 
| c@249 | 392             segment.start = segment.end; | 
| c@249 | 393         } | 
| c@249 | 394     } | 
| c@249 | 395     segment.end = len * getHopsize(); | 
| c@249 | 396     segmentation.segments.push_back(segment); | 
| c@243 | 397 } | 
| c@243 | 398 |