annotate dsp/segmentation/ClusterMeltSegmenter.h @ 298:255e431ae3d4

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 05 Jun 2009 15:12:39 +0000
parents befe5aa6b450
children e5907ae6de17
rev   line source
c@249 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@249 2
c@243 3 /*
c@249 4 * ClusterMeltSegmenter.h
c@243 5 *
c@249 6 * Created by Mark Levy on 23/03/2006.
c@249 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
c@249 8 * All rights reserved.
c@243 9 */
c@243 10
c@243 11 #include <vector>
c@243 12
c@243 13 #include "segment.h"
c@243 14 #include "Segmenter.h"
c@245 15 #include "hmm/hmm.h"
c@245 16 #include "base/Window.h"
c@243 17
c@243 18 using std::vector;
c@243 19
c@249 20 class Decimator;
c@249 21 class ConstantQ;
c@251 22 class MFCC;
c@289 23 class FFTReal;
c@249 24
c@249 25 class ClusterMeltSegmenterParams
c@249 26 // defaults are sensible for 11025Hz with 0.2 second hopsize
c@243 27 {
c@243 28 public:
c@249 29 ClusterMeltSegmenterParams() :
c@249 30 featureType(FEATURE_TYPE_CONSTQ),
c@249 31 hopSize(0.2),
c@249 32 windowSize(0.6),
c@249 33 fmin(62),
c@249 34 fmax(16000),
c@249 35 nbins(8),
c@249 36 ncomponents(20),
c@249 37 nHMMStates(40),
c@249 38 nclusters(10),
c@249 39 histogramLength(15),
c@249 40 neighbourhoodLimit(20) { }
c@249 41 feature_types featureType;
c@249 42 double hopSize; // in secs
c@249 43 double windowSize; // in secs
c@249 44 int fmin;
c@249 45 int fmax;
c@249 46 int nbins;
c@249 47 int ncomponents;
c@249 48 int nHMMStates;
c@249 49 int nclusters;
c@249 50 int histogramLength;
c@249 51 int neighbourhoodLimit;
c@243 52 };
c@243 53
c@243 54 class ClusterMeltSegmenter : public Segmenter
c@243 55 {
c@243 56 public:
c@249 57 ClusterMeltSegmenter(ClusterMeltSegmenterParams params);
c@249 58 virtual ~ClusterMeltSegmenter();
c@249 59 virtual void initialise(int samplerate);
c@249 60 virtual int getWindowsize();
c@249 61 virtual int getHopsize();
c@249 62 virtual void extractFeatures(const double* samples, int nsamples);
c@249 63 void setFeatures(const vector<vector<double> >& f); // provide the features yourself
c@249 64 virtual void segment(); // segment into default number of segment-types
c@249 65 void segment(int m); // segment into m segment-types
c@249 66 int getNSegmentTypes() { return nclusters; }
c@249 67
c@243 68 protected:
c@249 69 void makeSegmentation(int* q, int len);
c@243 70
c@251 71 void extractFeaturesConstQ(const double *, int);
c@251 72 void extractFeaturesMFCC(const double *, int);
c@251 73
c@249 74 Window<double> *window;
c@289 75 FFTReal *fft;
c@251 76 ConstantQ* constq;
c@251 77 MFCC* mfcc;
c@249 78 model_t* model; // the HMM
c@249 79 int* q; // the decoded HMM state sequence
c@249 80 vector<vector<double> > histograms;
c@249 81
c@249 82 feature_types featureType;
c@249 83 double hopSize; // in seconds
c@249 84 double windowSize; // in seconds
c@249 85
c@249 86 // constant-Q parameters
c@249 87 int fmin;
c@249 88 int fmax;
c@249 89 int nbins;
c@249 90 int ncoeff;
c@249 91
c@249 92 // PCA parameters
c@249 93 int ncomponents;
c@249 94
c@249 95 // HMM parameters
c@249 96 int nHMMStates;
c@249 97
c@249 98 // clustering parameters
c@249 99 int nclusters;
c@249 100 int histogramLength;
c@249 101 int neighbourhoodLimit;
c@249 102
c@249 103 Decimator *decimator;
c@243 104 };