annotate dsp/segmentation/ClusterMeltSegmenter.cpp @ 243:dc30e3864ceb

* merge in segmentation code from soundbite plugin/library repository
author Chris Cannam <c.cannam@qmul.ac.uk>
date Wed, 09 Jan 2008 10:46:25 +0000
parents
children 8bdbda7fb893
rev   line source
c@243 1 /*
c@243 2 * ClusterMeltSegmenter.cpp
c@243 3 * soundbite
c@243 4 *
c@243 5 * Created by Mark Levy on 23/03/2006.
c@243 6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
c@243 7 *
c@243 8 */
c@243 9
c@243 10 #include <cfloat>
c@243 11 #include <cmath>
c@243 12
c@243 13 #include "ClusterMeltSegmenter.h"
c@243 14 #include "lib_constQ.h"
c@243 15 #include "cluster_segmenter.h"
c@243 16 #include "segment.h"
c@243 17
c@243 18 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : window(NULL),
c@243 19 constq(NULL),
c@243 20 featureType(params.featureType),
c@243 21 windowSize(params.windowSize),
c@243 22 hopSize(params.hopSize),
c@243 23 fmin(params.fmin),
c@243 24 fmax(params.fmax),
c@243 25 nbins(params.nbins),
c@243 26 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
c@243 27 nHMMStates(params.nHMMStates),
c@243 28 nclusters(params.nclusters),
c@243 29 histogramLength(params.histogramLength),
c@243 30 neighbourhoodLimit(params.neighbourhoodLimit)
c@243 31 {
c@243 32 }
c@243 33
c@243 34 void ClusterMeltSegmenter::initialise(int fs)
c@243 35 {
c@243 36 samplerate = fs;
c@243 37 if (featureType != FEATURE_TYPE_UNKNOWN)
c@243 38 {
c@243 39 ncoeff = static_cast<int>(ceil(nbins * (log(fmax / static_cast<double>(fmin))) / log(2.0)));
c@243 40 constq = init_constQ(fmin, fmax, nbins, samplerate, ncoeff);
c@243 41 }
c@243 42 }
c@243 43
c@243 44 ClusterMeltSegmenter::~ClusterMeltSegmenter()
c@243 45 {
c@243 46 delete [] window;
c@243 47 if (constq)
c@243 48 close_constQ(constq);
c@243 49 }
c@243 50
c@243 51 void ClusterMeltSegmenter::extractFeatures(double* samples, int nsamples)
c@243 52 {
c@243 53 // create a new window if needed
c@243 54 if (!window || nsamples != windowLength)
c@243 55 {
c@243 56 if (window)
c@243 57 delete [] window;
c@243 58 window = hamming_p(nsamples);
c@243 59 windowLength = nsamples;
c@243 60 }
c@243 61
c@243 62 // copy the samples before windowing in case we need them for something else
c@243 63 double* frame = new double[nsamples];
c@243 64 for (int i = 0; i < nsamples; i++)
c@243 65 frame[i] = samples[i] * window[i];
c@243 66
c@243 67 // extract const-Q
c@243 68 do_constQ(constq, frame, nsamples);
c@243 69 int ncq = constq->ncoeff;
c@243 70
c@243 71 delete [] frame;
c@243 72
c@243 73 if (ncq == ncoeff) // else feature extraction failed
c@243 74 {
c@243 75 vector<double> cq(ncq);
c@243 76 for (int i = 0; i < ncq; i++)
c@243 77 cq[i] = constq->absconstQtransform[i];
c@243 78 features.push_back(cq);
c@243 79 }
c@243 80 }
c@243 81
c@243 82 void ClusterMeltSegmenter::segment(int m)
c@243 83 {
c@243 84 nclusters = m;
c@243 85 segment();
c@243 86 }
c@243 87
c@243 88 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
c@243 89 {
c@243 90 features = f;
c@243 91 featureType = FEATURE_TYPE_UNKNOWN;
c@243 92 }
c@243 93
c@243 94 void ClusterMeltSegmenter::segment()
c@243 95 {
c@243 96 if (constq)
c@243 97 {
c@243 98 close_constQ(constq); // finished extracting features
c@243 99 constq = NULL;
c@243 100 }
c@243 101
c@243 102 // for now copy the features to a native array and use the existing C segmenter...
c@243 103 double** arrFeatures = new double*[features.size()];
c@243 104 for (int i = 0; i < features.size(); i++)
c@243 105 {
c@243 106 if (featureType == FEATURE_TYPE_UNKNOWN)
c@243 107 arrFeatures[i] = new double[features[0].size()];
c@243 108 else
c@243 109 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
c@243 110 for (int j = 0; j < ncoeff; j++)
c@243 111 arrFeatures[i][j] = features[i][j];
c@243 112 }
c@243 113
c@243 114 q = new int[features.size()];
c@243 115
c@243 116 if (featureType == FEATURE_TYPE_UNKNOWN)
c@243 117 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
c@243 118 nclusters, neighbourhoodLimit);
c@243 119 else
c@243 120 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
c@243 121 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
c@243 122
c@243 123 // convert the cluster assignment sequence to a segmentation
c@243 124 makeSegmentation(q, features.size());
c@243 125
c@243 126 // de-allocate arrays
c@243 127 delete [] q;
c@243 128 for (int i = 0; i < features.size(); i++)
c@243 129 delete [] arrFeatures[i];
c@243 130 delete [] arrFeatures;
c@243 131
c@243 132 // clear the features
c@243 133 clear();
c@243 134 }
c@243 135
c@243 136 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
c@243 137 {
c@243 138 segmentation.segments.clear();
c@243 139 segmentation.nsegtypes = nclusters;
c@243 140 segmentation.samplerate = samplerate;
c@243 141
c@243 142 Segment segment;
c@243 143 segment.start = 0;
c@243 144 segment.type = q[0];
c@243 145
c@243 146 for (int i = 1; i < len; i++)
c@243 147 {
c@243 148 if (q[i] != q[i-1])
c@243 149 {
c@243 150 segment.end = i * getHopsize();
c@243 151 segmentation.segments.push_back(segment);
c@243 152 segment.type = q[i];
c@243 153 segment.start = segment.end;
c@243 154 }
c@243 155 }
c@243 156 segment.end = len * getHopsize();
c@243 157 segmentation.segments.push_back(segment);
c@243 158 }
c@243 159
c@243 160 /*
c@243 161 void ClusterMeltSegmenter::mpeg7ConstQ()
c@243 162 {
c@243 163 // convert to dB scale
c@243 164 for (int i = 0; i < features.size(); i++)
c@243 165 for (int j = 0; j < ncoeff; j++)
c@243 166 features[i][j] = 10.0 * log10(features[i][j] + DBL_EPSILON);
c@243 167
c@243 168 // normalise features and add the norm at the end as an extra feature dimension
c@243 169 double maxnorm = 0; // track the max of the norms
c@243 170 for (int i = 0; i < features.size(); i++)
c@243 171 {
c@243 172 double norm = 0;
c@243 173 for (int j = 0; j < ncoeff; j++)
c@243 174 norm += features[i][j] * features[i][j];
c@243 175 norm = sqrt(norm);
c@243 176 for (int j = 0; j < ncoeff; j++)
c@243 177 features[i][j] /= norm;
c@243 178 features[i].push_back(norm);
c@243 179 if (norm > maxnorm)
c@243 180 maxnorm = norm;
c@243 181 }
c@243 182
c@243 183 // normalise the norms
c@243 184 for (int i = 0; i < features.size(); i++)
c@243 185 features[i][ncoeff] /= maxnorm;
c@243 186 }
c@243 187 */