c@243
|
1 /*
|
c@243
|
2 * ClusterMeltSegmenter.cpp
|
c@243
|
3 * soundbite
|
c@243
|
4 *
|
c@243
|
5 * Created by Mark Levy on 23/03/2006.
|
c@243
|
6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
|
c@243
|
7 *
|
c@243
|
8 */
|
c@243
|
9
|
c@243
|
10 #include <cfloat>
|
c@243
|
11 #include <cmath>
|
c@243
|
12
|
c@243
|
13 #include "ClusterMeltSegmenter.h"
|
c@243
|
14 #include "lib_constQ.h"
|
c@243
|
15 #include "cluster_segmenter.h"
|
c@243
|
16 #include "segment.h"
|
c@243
|
17
|
c@243
|
18 ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : window(NULL),
|
c@243
|
19 constq(NULL),
|
c@243
|
20 featureType(params.featureType),
|
c@243
|
21 windowSize(params.windowSize),
|
c@243
|
22 hopSize(params.hopSize),
|
c@243
|
23 fmin(params.fmin),
|
c@243
|
24 fmax(params.fmax),
|
c@243
|
25 nbins(params.nbins),
|
c@243
|
26 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
|
c@243
|
27 nHMMStates(params.nHMMStates),
|
c@243
|
28 nclusters(params.nclusters),
|
c@243
|
29 histogramLength(params.histogramLength),
|
c@243
|
30 neighbourhoodLimit(params.neighbourhoodLimit)
|
c@243
|
31 {
|
c@243
|
32 }
|
c@243
|
33
|
c@243
|
34 void ClusterMeltSegmenter::initialise(int fs)
|
c@243
|
35 {
|
c@243
|
36 samplerate = fs;
|
c@243
|
37 if (featureType != FEATURE_TYPE_UNKNOWN)
|
c@243
|
38 {
|
c@243
|
39 ncoeff = static_cast<int>(ceil(nbins * (log(fmax / static_cast<double>(fmin))) / log(2.0)));
|
c@243
|
40 constq = init_constQ(fmin, fmax, nbins, samplerate, ncoeff);
|
c@243
|
41 }
|
c@243
|
42 }
|
c@243
|
43
|
c@243
|
44 ClusterMeltSegmenter::~ClusterMeltSegmenter()
|
c@243
|
45 {
|
c@243
|
46 delete [] window;
|
c@243
|
47 if (constq)
|
c@243
|
48 close_constQ(constq);
|
c@243
|
49 }
|
c@243
|
50
|
c@243
|
51 void ClusterMeltSegmenter::extractFeatures(double* samples, int nsamples)
|
c@243
|
52 {
|
c@243
|
53 // create a new window if needed
|
c@243
|
54 if (!window || nsamples != windowLength)
|
c@243
|
55 {
|
c@243
|
56 if (window)
|
c@243
|
57 delete [] window;
|
c@243
|
58 window = hamming_p(nsamples);
|
c@243
|
59 windowLength = nsamples;
|
c@243
|
60 }
|
c@243
|
61
|
c@243
|
62 // copy the samples before windowing in case we need them for something else
|
c@243
|
63 double* frame = new double[nsamples];
|
c@243
|
64 for (int i = 0; i < nsamples; i++)
|
c@243
|
65 frame[i] = samples[i] * window[i];
|
c@243
|
66
|
c@243
|
67 // extract const-Q
|
c@243
|
68 do_constQ(constq, frame, nsamples);
|
c@243
|
69 int ncq = constq->ncoeff;
|
c@243
|
70
|
c@243
|
71 delete [] frame;
|
c@243
|
72
|
c@243
|
73 if (ncq == ncoeff) // else feature extraction failed
|
c@243
|
74 {
|
c@243
|
75 vector<double> cq(ncq);
|
c@243
|
76 for (int i = 0; i < ncq; i++)
|
c@243
|
77 cq[i] = constq->absconstQtransform[i];
|
c@243
|
78 features.push_back(cq);
|
c@243
|
79 }
|
c@243
|
80 }
|
c@243
|
81
|
c@243
|
82 void ClusterMeltSegmenter::segment(int m)
|
c@243
|
83 {
|
c@243
|
84 nclusters = m;
|
c@243
|
85 segment();
|
c@243
|
86 }
|
c@243
|
87
|
c@243
|
88 void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
|
c@243
|
89 {
|
c@243
|
90 features = f;
|
c@243
|
91 featureType = FEATURE_TYPE_UNKNOWN;
|
c@243
|
92 }
|
c@243
|
93
|
c@243
|
94 void ClusterMeltSegmenter::segment()
|
c@243
|
95 {
|
c@243
|
96 if (constq)
|
c@243
|
97 {
|
c@243
|
98 close_constQ(constq); // finished extracting features
|
c@243
|
99 constq = NULL;
|
c@243
|
100 }
|
c@243
|
101
|
c@243
|
102 // for now copy the features to a native array and use the existing C segmenter...
|
c@243
|
103 double** arrFeatures = new double*[features.size()];
|
c@243
|
104 for (int i = 0; i < features.size(); i++)
|
c@243
|
105 {
|
c@243
|
106 if (featureType == FEATURE_TYPE_UNKNOWN)
|
c@243
|
107 arrFeatures[i] = new double[features[0].size()];
|
c@243
|
108 else
|
c@243
|
109 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
|
c@243
|
110 for (int j = 0; j < ncoeff; j++)
|
c@243
|
111 arrFeatures[i][j] = features[i][j];
|
c@243
|
112 }
|
c@243
|
113
|
c@243
|
114 q = new int[features.size()];
|
c@243
|
115
|
c@243
|
116 if (featureType == FEATURE_TYPE_UNKNOWN)
|
c@243
|
117 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
|
c@243
|
118 nclusters, neighbourhoodLimit);
|
c@243
|
119 else
|
c@243
|
120 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
|
c@243
|
121 nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
|
c@243
|
122
|
c@243
|
123 // convert the cluster assignment sequence to a segmentation
|
c@243
|
124 makeSegmentation(q, features.size());
|
c@243
|
125
|
c@243
|
126 // de-allocate arrays
|
c@243
|
127 delete [] q;
|
c@243
|
128 for (int i = 0; i < features.size(); i++)
|
c@243
|
129 delete [] arrFeatures[i];
|
c@243
|
130 delete [] arrFeatures;
|
c@243
|
131
|
c@243
|
132 // clear the features
|
c@243
|
133 clear();
|
c@243
|
134 }
|
c@243
|
135
|
c@243
|
136 void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
|
c@243
|
137 {
|
c@243
|
138 segmentation.segments.clear();
|
c@243
|
139 segmentation.nsegtypes = nclusters;
|
c@243
|
140 segmentation.samplerate = samplerate;
|
c@243
|
141
|
c@243
|
142 Segment segment;
|
c@243
|
143 segment.start = 0;
|
c@243
|
144 segment.type = q[0];
|
c@243
|
145
|
c@243
|
146 for (int i = 1; i < len; i++)
|
c@243
|
147 {
|
c@243
|
148 if (q[i] != q[i-1])
|
c@243
|
149 {
|
c@243
|
150 segment.end = i * getHopsize();
|
c@243
|
151 segmentation.segments.push_back(segment);
|
c@243
|
152 segment.type = q[i];
|
c@243
|
153 segment.start = segment.end;
|
c@243
|
154 }
|
c@243
|
155 }
|
c@243
|
156 segment.end = len * getHopsize();
|
c@243
|
157 segmentation.segments.push_back(segment);
|
c@243
|
158 }
|
c@243
|
159
|
c@243
|
160 /*
|
c@243
|
161 void ClusterMeltSegmenter::mpeg7ConstQ()
|
c@243
|
162 {
|
c@243
|
163 // convert to dB scale
|
c@243
|
164 for (int i = 0; i < features.size(); i++)
|
c@243
|
165 for (int j = 0; j < ncoeff; j++)
|
c@243
|
166 features[i][j] = 10.0 * log10(features[i][j] + DBL_EPSILON);
|
c@243
|
167
|
c@243
|
168 // normalise features and add the norm at the end as an extra feature dimension
|
c@243
|
169 double maxnorm = 0; // track the max of the norms
|
c@243
|
170 for (int i = 0; i < features.size(); i++)
|
c@243
|
171 {
|
c@243
|
172 double norm = 0;
|
c@243
|
173 for (int j = 0; j < ncoeff; j++)
|
c@243
|
174 norm += features[i][j] * features[i][j];
|
c@243
|
175 norm = sqrt(norm);
|
c@243
|
176 for (int j = 0; j < ncoeff; j++)
|
c@243
|
177 features[i][j] /= norm;
|
c@243
|
178 features[i].push_back(norm);
|
c@243
|
179 if (norm > maxnorm)
|
c@243
|
180 maxnorm = norm;
|
c@243
|
181 }
|
c@243
|
182
|
c@243
|
183 // normalise the norms
|
c@243
|
184 for (int i = 0; i < features.size(); i++)
|
c@243
|
185 features[i][ncoeff] /= maxnorm;
|
c@243
|
186 }
|
c@243
|
187 */
|