cannam@484
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
c@243
|
2 /*
|
c@243
|
3 * cluster_segmenter.c
|
c@243
|
4 * soundbite
|
c@243
|
5 *
|
c@243
|
6 * Created by Mark Levy on 06/04/2006.
|
c@309
|
7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
|
c@309
|
8
|
cannam@480
|
9 This program is free software; you can redistribute it and/or
|
cannam@480
|
10 modify it under the terms of the GNU General Public License as
|
cannam@480
|
11 published by the Free Software Foundation; either version 2 of the
|
cannam@480
|
12 License, or (at your option) any later version. See the file
|
cannam@480
|
13 COPYING included with this distribution for more information.
|
c@243
|
14 *
|
c@243
|
15 */
|
c@243
|
16
|
c@243
|
17 #include "cluster_segmenter.h"
|
c@243
|
18
|
c@243
|
19 extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d);
|
c@243
|
20 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr);
|
c@243
|
21
|
c@243
|
22 /* converts constant-Q features to normalised chroma */
|
c@243
|
23 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
|
c@243
|
24 {
|
cannam@480
|
25 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
|
cannam@480
|
26 int t, b, oct, ix;
|
cannam@480
|
27
|
cannam@480
|
28 for (t = 0; t < nframes; t++) {
|
cannam@480
|
29 for (b = 0; b < bins; b++) {
|
cannam@480
|
30 chroma[t][b] = 0;
|
cannam@480
|
31 }
|
cannam@480
|
32 for (oct = 0; oct < noct; oct++) {
|
cannam@480
|
33 ix = oct * bins;
|
cannam@480
|
34 for (b = 0; b < bins; b++) {
|
cannam@480
|
35 chroma[t][b] += fabs(cq[t][ix+b]);
|
cannam@480
|
36 }
|
cannam@480
|
37 }
|
cannam@480
|
38 }
|
c@243
|
39 }
|
c@243
|
40
|
c@243
|
41 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
|
c@243
|
42 void mpeg7_constq(double** features, int nframes, int ncoeff)
|
c@243
|
43 {
|
cannam@480
|
44 int i, j;
|
cannam@480
|
45 double ss;
|
cannam@480
|
46 double env;
|
cannam@480
|
47 double maxenv = 0;
|
cannam@480
|
48
|
cannam@480
|
49 /* convert const-Q features to dB scale */
|
cannam@480
|
50 for (i = 0; i < nframes; i++) {
|
cannam@480
|
51 for (j = 0; j < ncoeff; j++) {
|
cannam@480
|
52 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
|
cannam@480
|
53 }
|
cannam@480
|
54 }
|
cannam@480
|
55
|
cannam@480
|
56 /* normalise each feature vector and add the norm as an extra feature dimension */
|
cannam@480
|
57 for (i = 0; i < nframes; i++) {
|
cannam@480
|
58 ss = 0;
|
cannam@480
|
59 for (j = 0; j < ncoeff; j++) {
|
cannam@480
|
60 ss += features[i][j] * features[i][j];
|
cannam@480
|
61 }
|
cannam@480
|
62 env = sqrt(ss);
|
cannam@480
|
63 for (j = 0; j < ncoeff; j++) {
|
cannam@480
|
64 features[i][j] /= env;
|
cannam@480
|
65 }
|
cannam@480
|
66 features[i][ncoeff] = env;
|
cannam@480
|
67 if (env > maxenv) {
|
cannam@480
|
68 maxenv = env;
|
cannam@480
|
69 }
|
cannam@480
|
70 }
|
cannam@480
|
71 /* normalise the envelopes */
|
cannam@480
|
72 for (i = 0; i < nframes; i++) {
|
cannam@480
|
73 features[i][ncoeff] /= maxenv;
|
cannam@480
|
74 }
|
c@243
|
75 }
|
c@243
|
76
|
c@243
|
77 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
|
c@243
|
78 /* NB h is a vector in row major order, as required by cluster_melt() */
|
c@243
|
79 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
|
c@243
|
80 void create_histograms(int* x, int nx, int m, int hlen, double* h)
|
c@243
|
81 {
|
cannam@480
|
82 int i, j, t;
|
cannam@480
|
83 double norm;
|
c@266
|
84
|
cannam@480
|
85 for (i = 0; i < nx*m; i++) {
|
cannam@480
|
86 h[i] = 0;
|
cannam@480
|
87 }
|
c@266
|
88
|
cannam@480
|
89 for (i = hlen/2; i < nx-hlen/2; i++) {
|
cannam@480
|
90 for (j = 0; j < m; j++) {
|
cannam@480
|
91 h[i*m+j] = 0;
|
cannam@480
|
92 }
|
cannam@480
|
93 for (t = i-hlen/2; t <= i+hlen/2; t++) {
|
cannam@480
|
94 ++h[i*m+x[t]];
|
cannam@480
|
95 }
|
cannam@480
|
96 norm = 0;
|
cannam@480
|
97 for (j = 0; j < m; j++) {
|
cannam@480
|
98 norm += h[i*m+j] * h[i*m+j];
|
cannam@480
|
99 }
|
cannam@480
|
100 for (j = 0; j < m; j++) {
|
cannam@480
|
101 h[i*m+j] /= norm;
|
cannam@480
|
102 }
|
cannam@480
|
103 }
|
cannam@480
|
104
|
cannam@480
|
105 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
|
cannam@480
|
106 for (i = 0; i < hlen/2; i++) {
|
cannam@480
|
107 for (j = 0; j < m; j++) {
|
cannam@480
|
108 h[i*m+j] = h[hlen/2*m+j];
|
cannam@480
|
109 }
|
cannam@480
|
110 }
|
cannam@480
|
111 for (i = nx-hlen/2; i < nx; i++) {
|
cannam@480
|
112 for (j = 0; j < m; j++) {
|
cannam@480
|
113 h[i*m+j] = h[(nx-hlen/2-1)*m+j];
|
cannam@480
|
114 }
|
cannam@480
|
115 }
|
c@243
|
116 }
|
c@243
|
117
|
c@243
|
118 /* segment using HMM and then histogram clustering */
|
c@243
|
119 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
|
cannam@480
|
120 int histogram_length, int nclusters, int neighbour_limit)
|
c@243
|
121 {
|
cannam@480
|
122 int i, j;
|
cannam@480
|
123
|
cannam@480
|
124 /*****************************/
|
cannam@480
|
125 if (0) {
|
cannam@480
|
126 /* try just using the predominant bin number as a 'decoded state' */
|
cannam@480
|
127 nHMM_states = feature_length + 1; /* allow a 'zero' state */
|
cannam@480
|
128 double chroma_thresh = 0.05;
|
cannam@480
|
129 double maxval;
|
cannam@480
|
130 int maxbin;
|
cannam@480
|
131 for (i = 0; i < frames_read; i++) {
|
cannam@480
|
132 maxval = 0;
|
cannam@480
|
133 for (j = 0; j < feature_length; j++) {
|
cannam@480
|
134 if (features[i][j] > maxval) {
|
cannam@480
|
135 maxval = features[i][j];
|
cannam@480
|
136 maxbin = j;
|
cannam@480
|
137 }
|
cannam@480
|
138 }
|
cannam@480
|
139 if (maxval > chroma_thresh) {
|
cannam@480
|
140 q[i] = maxbin;
|
cannam@480
|
141 } else {
|
cannam@480
|
142 q[i] = feature_length;
|
cannam@480
|
143 }
|
cannam@480
|
144 }
|
cannam@480
|
145
|
cannam@480
|
146 }
|
cannam@480
|
147 if (1) {
|
cannam@480
|
148 /*****************************/
|
cannam@480
|
149
|
cannam@480
|
150 /* scale all the features to 'balance covariances' during HMM training */
|
cannam@480
|
151 double scale = 10;
|
cannam@480
|
152 for (i = 0; i < frames_read; i++)
|
cannam@480
|
153 for (j = 0; j < feature_length; j++)
|
cannam@480
|
154 features[i][j] *= scale;
|
cannam@480
|
155
|
cannam@480
|
156 /* train an HMM on the features */
|
cannam@480
|
157
|
cannam@480
|
158 /* create a model */
|
cannam@480
|
159 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
|
cannam@480
|
160
|
cannam@480
|
161 /* train the model */
|
cannam@480
|
162 hmm_train(features, frames_read, model);
|
cannam@480
|
163 /*
|
cannam@480
|
164 printf("\n\nafter training:\n");
|
cannam@480
|
165 hmm_print(model);
|
cannam@480
|
166 */
|
cannam@480
|
167 /* decode the hidden state sequence */
|
cannam@480
|
168 viterbi_decode(features, frames_read, model, q);
|
cannam@480
|
169 hmm_close(model);
|
cannam@480
|
170
|
cannam@480
|
171 /*****************************/
|
cannam@480
|
172 }
|
cannam@480
|
173 /*****************************/
|
c@243
|
174
|
c@283
|
175 /*
|
cannam@480
|
176 fprintf(stderr, "HMM state sequence:\n");
|
cannam@480
|
177 for (i = 0; i < frames_read; i++)
|
cannam@480
|
178 fprintf(stderr, "%d ", q[i]);
|
cannam@480
|
179 fprintf(stderr, "\n\n");
|
c@283
|
180 */
|
cannam@480
|
181
|
cannam@480
|
182 /* create histograms of states */
|
cannam@480
|
183 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
|
cannam@480
|
184 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
|
cannam@480
|
185
|
cannam@480
|
186 /* cluster the histograms */
|
cannam@480
|
187 int nbsched = 20; /* length of inverse temperature schedule */
|
cannam@480
|
188 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
|
cannam@480
|
189 double b0 = 100;
|
cannam@480
|
190 double alpha = 0.7;
|
cannam@480
|
191 bsched[0] = b0;
|
cannam@480
|
192 for (i = 1; i < nbsched; i++) {
|
cannam@480
|
193 bsched[i] = alpha * bsched[i-1];
|
cannam@480
|
194 }
|
cannam@480
|
195 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
|
cannam@480
|
196
|
cannam@480
|
197 /* now q holds a sequence of cluster assignments */
|
cannam@480
|
198
|
cannam@480
|
199 free(h);
|
cannam@480
|
200 free(bsched);
|
c@243
|
201 }
|
c@243
|
202
|
c@243
|
203 /* segment constant-Q or chroma features */
|
c@243
|
204 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
|
cannam@480
|
205 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
|
c@243
|
206 {
|
cannam@480
|
207 int feature_length;
|
cannam@480
|
208 double** chroma;
|
cannam@480
|
209 int i;
|
cannam@480
|
210
|
cannam@480
|
211 if (feature_type == FEATURE_TYPE_CONSTQ) {
|
cannam@480
|
212
|
cannam@480
|
213 mpeg7_constq(features, frames_read, ncoeff);
|
cannam@480
|
214
|
cannam@480
|
215 /* do PCA on the features (but not the envelope) */
|
cannam@480
|
216 int ncomponents = 20;
|
cannam@480
|
217 pca_project(features, frames_read, ncoeff, ncomponents);
|
cannam@480
|
218
|
cannam@480
|
219 /* copy the envelope so that it immediatly follows the chosen components */
|
cannam@480
|
220 for (i = 0; i < frames_read; i++) {
|
cannam@480
|
221 features[i][ncomponents] = features[i][ncoeff];
|
cannam@480
|
222 }
|
cannam@480
|
223
|
cannam@480
|
224 feature_length = ncomponents + 1;
|
cannam@480
|
225
|
cannam@480
|
226 cluster_segment(q, features, frames_read, feature_length,
|
cannam@480
|
227 nHMM_states, histogram_length, nclusters,
|
cannam@480
|
228 neighbour_limit);
|
cannam@480
|
229 }
|
cannam@480
|
230
|
cannam@480
|
231 if (feature_type == FEATURE_TYPE_CHROMA) {
|
cannam@480
|
232
|
cannam@480
|
233 /* convert constant-Q to normalised chroma features */
|
cannam@480
|
234 chroma = (double**) malloc(frames_read*sizeof(double*));
|
cannam@480
|
235 for (i = 0; i < frames_read; i++) {
|
cannam@480
|
236 chroma[i] = (double*) malloc(bins*sizeof(double));
|
cannam@480
|
237 }
|
cannam@480
|
238
|
cannam@480
|
239 cq2chroma(features, frames_read, ncoeff, bins, chroma);
|
cannam@480
|
240
|
cannam@480
|
241 feature_length = bins;
|
cannam@480
|
242
|
cannam@480
|
243 cluster_segment(q, chroma, frames_read, feature_length,
|
cannam@480
|
244 nHMM_states, histogram_length, nclusters,
|
cannam@480
|
245 neighbour_limit);
|
cannam@480
|
246
|
cannam@480
|
247 for (i = 0; i < frames_read; i++)
|
cannam@480
|
248 free(chroma[i]);
|
cannam@480
|
249 free(chroma);
|
cannam@480
|
250 }
|
c@243
|
251 }
|
c@243
|
252
|
c@243
|
253
|
c@243
|
254
|