annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 245:cdfd0948a852

* First cut at properly integrating the segmenter and making it work right
author Chris Cannam <c.cannam@qmul.ac.uk>
date Wed, 09 Jan 2008 16:50:04 +0000
parents dc30e3864ceb
children 2b74bd60c61f
rev   line source
c@243 1 /*
c@243 2 * ClusterMeltSegmenter.cpp
c@243 3 * soundbite
c@243 4 *
c@243 5 * Created by Mark Levy on 23/03/2006.
c@243 6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
c@243 7 *
c@243 8 */
c@243 9
c@243 10 #include <cfloat>
c@243 11 #include <cmath>
c@243 12
c@243 13 #include "ClusterMeltSegmenter.h"
c@243 14 #include "cluster_segmenter.h"
c@243 15 #include "segment.h"
c@243 16
c@245 17 #include "dsp/transforms/FFT.h"
c@245 18
c@243 19 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : window(NULL),
c@243 20 constq(NULL),
c@243 21 featureType(params.featureType),
c@245 22 hopSize(params.hopSize),
c@243 23 windowSize(params.windowSize),
c@243 24 fmin(params.fmin),
c@243 25 fmax(params.fmax),
c@243 26 nbins(params.nbins),
c@243 27 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
c@243 28 nHMMStates(params.nHMMStates),
c@243 29 nclusters(params.nclusters),
c@243 30 histogramLength(params.histogramLength),
c@243 31 neighbourhoodLimit(params.neighbourhoodLimit)
c@243 32 {
c@243 33 }
c@243 34
c@243 35 void ClusterMeltSegmenter::initialise(int fs)
c@243 36 {
c@243 37 samplerate = fs;
c@243 38 if (featureType != FEATURE_TYPE_UNKNOWN)
c@243 39 {
c@245 40 //!!! ncoeff = static_cast<int>(ceil(nbins * (log(fmax / static_cast<double>(fmin))) / log(2.0)));
c@245 41 CQConfig config;
c@245 42 config.FS = samplerate;
c@245 43 config.min = fmin;
c@245 44 config.max = fmax;
c@245 45 config.BPO = nbins;
c@245 46 config.CQThresh = 0.0054;
c@245 47 constq = new ConstantQ(config);
c@245 48 //!!! constq = init_constQ(fmin, fmax, nbins, samplerate, ncoeff);
c@245 49 ncoeff = constq->getK();
c@243 50 }
c@243 51 }
c@243 52
c@243 53 ClusterMeltSegmenter::~ClusterMeltSegmenter()
c@243 54 {
c@245 55 delete window;
c@245 56 delete constq;
c@245 57 //!!! if (constq)
c@245 58 // close_constQ(constq);
c@245 59 }
c@245 60
c@245 61 int
c@245 62 ClusterMeltSegmenter::getWindowsize()
c@245 63 {
c@245 64 if (featureType != FEATURE_TYPE_UNKNOWN) {
c@245 65 std::cerr << "rate = " << samplerate << ", fft length = " << constq->getfftlength() << ", fmin = " << fmin << ", fmax = " << fmax << ", nbins = " << nbins << ", K = " << constq->getK() << ", Q = " << constq->getQ() << std::endl;
c@245 66 return constq->getfftlength();
c@245 67 } else {
c@245 68 return static_cast<int>(windowSize * samplerate);
c@245 69 }
c@245 70 }
c@245 71
c@245 72 int
c@245 73 ClusterMeltSegmenter::getHopsize()
c@245 74 {
c@245 75 return static_cast<int>(hopSize * samplerate);
c@243 76 }
c@243 77
c@243 78 void ClusterMeltSegmenter::extractFeatures(double* samples, int nsamples)
c@243 79 {
c@243 80 // create a new window if needed
c@245 81 /*!!!
c@243 82 if (!window || nsamples != windowLength)
c@243 83 {
c@243 84 if (window)
c@243 85 delete [] window;
c@245 86 // Window<double>(HammingWindow, nsamples).cut
c@245 87 //!!! window = hamming_p(nsamples);
c@243 88 windowLength = nsamples;
c@243 89 }
c@245 90 */
c@245 91 if (!window || window->getSize() != nsamples) {
c@245 92 delete window;
c@245 93 window = new Window<double>(HammingWindow, nsamples);
c@245 94 }
c@245 95
c@243 96 // copy the samples before windowing in case we need them for something else
c@243 97 double* frame = new double[nsamples];
c@245 98 // for (int i = 0; i < nsamples; i++)
c@245 99 // frame[i] = samples[i] * window[i];
c@245 100 window->cut(frame);
c@243 101
c@245 102 std::cerr << "nsamples = " << nsamples << std::endl;
c@245 103
c@245 104 double *real = new double[nsamples];
c@245 105 double *imag = new double[nsamples];
c@245 106
c@245 107 FFT::process(nsamples, false, frame, 0, real, imag);
c@245 108
c@245 109 double *cqre = new double[ncoeff];
c@245 110 double *cqim = new double[ncoeff];
c@245 111
c@245 112 constq->process(real, imag, cqre, cqim);
c@245 113
c@243 114 // extract const-Q
c@245 115 //!!! do_constQ(constq, frame, nsamples);
c@245 116 // int ncq = constq->ncoeff;
c@245 117
c@245 118 delete [] frame;
c@245 119 delete [] real;
c@245 120 delete [] imag;
c@243 121
c@245 122 //!!! if (ncq == ncoeff) // else feature extraction failed
c@245 123 // {
c@245 124 // vector<double> cq(ncq);
c@245 125 // for (int i = 0; i < ncq; i++)
c@245 126 // cq[i] = constq->absconstQtransform[i];
c@245 127 vector<double> cq(ncoeff);
c@245 128 for (int i = 0; i < ncoeff; ++i) {
c@245 129 cq[i] = sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
c@245 130 }
c@243 131 features.push_back(cq);
c@245 132 // }
c@245 133
c@245 134 delete[] cqre;
c@245 135 delete[] cqim;
c@243 136 }
c@243 137
c@243 138 void ClusterMeltSegmenter::segment(int m)
c@243 139 {
c@243 140 nclusters = m;
c@243 141 segment();
c@243 142 }
c@243 143
c@243 144 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
c@243 145 {
c@243 146 features = f;
c@243 147 featureType = FEATURE_TYPE_UNKNOWN;
c@243 148 }
c@243 149
c@243 150 void ClusterMeltSegmenter::segment()
c@243 151 {
c@243 152 if (constq)
c@243 153 {
c@245 154 //!!! close_constQ(constq); // finished extracting features
c@245 155 delete constq;
c@243 156 constq = NULL;
c@243 157 }
c@243 158
c@243 159 // for now copy the features to a native array and use the existing C segmenter...
c@243 160 double** arrFeatures = new double*[features.size()];
c@243 161 for (int i = 0; i < features.size(); i++)
c@243 162 {
c@243 163 if (featureType == FEATURE_TYPE_UNKNOWN)
c@243 164 arrFeatures[i] = new double[features[0].size()];
c@243 165 else
c@243 166 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
c@243 167 for (int j = 0; j < ncoeff; j++)
c@243 168 arrFeatures[i][j] = features[i][j];
c@243 169 }
c@243 170
c@243 171 q = new int[features.size()];
c@243 172
c@243 173 if (featureType == FEATURE_TYPE_UNKNOWN)
c@243 174 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
c@243 175 nclusters, neighbourhoodLimit);
c@243 176 else
c@243 177 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
c@243 178 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
c@243 179
c@243 180 // convert the cluster assignment sequence to a segmentation
c@243 181 makeSegmentation(q, features.size());
c@243 182
c@243 183 // de-allocate arrays
c@243 184 delete [] q;
c@243 185 for (int i = 0; i < features.size(); i++)
c@243 186 delete [] arrFeatures[i];
c@243 187 delete [] arrFeatures;
c@243 188
c@243 189 // clear the features
c@243 190 clear();
c@243 191 }
c@243 192
c@243 193 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
c@243 194 {
c@243 195 segmentation.segments.clear();
c@243 196 segmentation.nsegtypes = nclusters;
c@243 197 segmentation.samplerate = samplerate;
c@243 198
c@243 199 Segment segment;
c@243 200 segment.start = 0;
c@243 201 segment.type = q[0];
c@243 202
c@243 203 for (int i = 1; i < len; i++)
c@243 204 {
c@243 205 if (q[i] != q[i-1])
c@243 206 {
c@243 207 segment.end = i * getHopsize();
c@243 208 segmentation.segments.push_back(segment);
c@243 209 segment.type = q[i];
c@243 210 segment.start = segment.end;
c@243 211 }
c@243 212 }
c@243 213 segment.end = len * getHopsize();
c@243 214 segmentation.segments.push_back(segment);
c@243 215 }
c@243 216
c@243 217 /*
c@243 218 void ClusterMeltSegmenter::mpeg7ConstQ()
c@243 219 {
c@243 220 // convert to dB scale
c@243 221 for (int i = 0; i < features.size(); i++)
c@243 222 for (int j = 0; j < ncoeff; j++)
c@243 223 features[i][j] = 10.0 * log10(features[i][j] + DBL_EPSILON);
c@243 224
c@243 225 // normalise features and add the norm at the end as an extra feature dimension
c@243 226 double maxnorm = 0; // track the max of the norms
c@243 227 for (int i = 0; i < features.size(); i++)
c@243 228 {
c@243 229 double norm = 0;
c@243 230 for (int j = 0; j < ncoeff; j++)
c@243 231 norm += features[i][j] * features[i][j];
c@243 232 norm = sqrt(norm);
c@243 233 for (int j = 0; j < ncoeff; j++)
c@243 234 features[i][j] /= norm;
c@243 235 features[i].push_back(norm);
c@243 236 if (norm > maxnorm)
c@243 237 maxnorm = norm;
c@243 238 }
c@243 239
c@243 240 // normalise the norms
c@243 241 for (int i = 0; i < features.size(); i++)
c@243 242 features[i][ncoeff] /= maxnorm;
c@243 243 }
c@243 244 */