comparison dsp/segmentation/cluster_segmenter.c @ 505:930b5b0f707d

Merge branch 'codestyle-and-tidy'
author Chris Cannam <cannam@all-day-breakfast.com>
date Wed, 05 Jun 2019 12:55:15 +0100
parents d48276a3ae24
children
comparison
equal deleted inserted replaced
471:e3335cb213da 505:930b5b0f707d
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
1 /* 2 /*
2 * cluster_segmenter.c 3 * cluster_segmenter.c
3 * soundbite 4 * soundbite
4 * 5 *
5 * Created by Mark Levy on 06/04/2006. 6 * Created by Mark Levy on 06/04/2006.
6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
7 8
8 This program is free software; you can redistribute it and/or 9 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as 10 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2 of the 11 published by the Free Software Foundation; either version 2 of the
11 License, or (at your option) any later version. See the file 12 License, or (at your option) any later version. See the file
12 COPYING included with this distribution for more information. 13 COPYING included with this distribution for more information.
13 * 14 *
14 */ 15 */
15 16
16 #include "cluster_segmenter.h" 17 #include "cluster_segmenter.h"
17 18
19 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr); 20 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr);
20 21
21 /* converts constant-Q features to normalised chroma */ 22 /* converts constant-Q features to normalised chroma */
22 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma) 23 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
23 { 24 {
24 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */ 25 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
25 int t, b, oct, ix; 26 int t, b, oct, ix;
26 //double maxchroma; /* max chroma value at each time, for normalisation */ 27
27 //double sum; /* for normalisation */ 28 for (t = 0; t < nframes; t++) {
28 29 for (b = 0; b < bins; b++) {
29 for (t = 0; t < nframes; t++) 30 chroma[t][b] = 0;
30 { 31 }
31 for (b = 0; b < bins; b++) 32 for (oct = 0; oct < noct; oct++) {
32 chroma[t][b] = 0; 33 ix = oct * bins;
33 for (oct = 0; oct < noct; oct++) 34 for (b = 0; b < bins; b++) {
34 { 35 chroma[t][b] += fabs(cq[t][ix+b]);
35 ix = oct * bins; 36 }
36 for (b = 0; b < bins; b++) 37 }
37 chroma[t][b] += fabs(cq[t][ix+b]); 38 }
38 }
39 /* normalise to unit sum
40 sum = 0;
41 for (b = 0; b < bins; b++)
42 sum += chroma[t][b];
43 for (b = 0; b < bins; b++)
44 chroma[t][b] /= sum;
45 */
46 /* normalise to unit max - NO this made results much worse!
47 maxchroma = 0;
48 for (b = 0; b < bins; b++)
49 if (chroma[t][b] > maxchroma)
50 maxchroma = chroma[t][b];
51 if (maxchroma > 0)
52 for (b = 0; b < bins; b++)
53 chroma[t][b] /= maxchroma;
54 */
55 }
56 } 39 }
57 40
58 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */ 41 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
59 void mpeg7_constq(double** features, int nframes, int ncoeff) 42 void mpeg7_constq(double** features, int nframes, int ncoeff)
60 { 43 {
61 int i, j; 44 int i, j;
62 double ss; 45 double ss;
63 double env; 46 double env;
64 double maxenv = 0; 47 double maxenv = 0;
65 48
66 /* convert const-Q features to dB scale */ 49 /* convert const-Q features to dB scale */
67 for (i = 0; i < nframes; i++) 50 for (i = 0; i < nframes; i++) {
68 for (j = 0; j < ncoeff; j++) 51 for (j = 0; j < ncoeff; j++) {
69 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON); 52 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
70 53 }
71 /* normalise each feature vector and add the norm as an extra feature dimension */ 54 }
72 for (i = 0; i < nframes; i++) 55
73 { 56 /* normalise each feature vector and add the norm as an extra feature dimension */
74 ss = 0; 57 for (i = 0; i < nframes; i++) {
75 for (j = 0; j < ncoeff; j++) 58 ss = 0;
76 ss += features[i][j] * features[i][j]; 59 for (j = 0; j < ncoeff; j++) {
77 env = sqrt(ss); 60 ss += features[i][j] * features[i][j];
78 for (j = 0; j < ncoeff; j++) 61 }
79 features[i][j] /= env; 62 env = sqrt(ss);
80 features[i][ncoeff] = env; 63 for (j = 0; j < ncoeff; j++) {
81 if (env > maxenv) 64 features[i][j] /= env;
82 maxenv = env; 65 }
83 } 66 features[i][ncoeff] = env;
84 /* normalise the envelopes */ 67 if (env > maxenv) {
85 for (i = 0; i < nframes; i++) 68 maxenv = env;
86 features[i][ncoeff] /= maxenv; 69 }
70 }
71 /* normalise the envelopes */
72 for (i = 0; i < nframes; i++) {
73 features[i][ncoeff] /= maxenv;
74 }
87 } 75 }
88 76
89 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */ 77 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
90 /* NB h is a vector in row major order, as required by cluster_melt() */ 78 /* NB h is a vector in row major order, as required by cluster_melt() */
91 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */ 79 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
92 void create_histograms(int* x, int nx, int m, int hlen, double* h) 80 void create_histograms(int* x, int nx, int m, int hlen, double* h)
93 { 81 {
94 int i, j, t; 82 int i, j, t;
95 double norm; 83 double norm;
96 84
97 for (i = 0; i < nx*m; i++) 85 for (i = 0; i < nx*m; i++) {
98 h[i] = 0; 86 h[i] = 0;
99 87 }
100 for (i = hlen/2; i < nx-hlen/2; i++) 88
101 { 89 for (i = hlen/2; i < nx-hlen/2; i++) {
102 for (j = 0; j < m; j++) 90 for (j = 0; j < m; j++) {
103 h[i*m+j] = 0; 91 h[i*m+j] = 0;
104 for (t = i-hlen/2; t <= i+hlen/2; t++) 92 }
105 ++h[i*m+x[t]]; 93 for (t = i-hlen/2; t <= i+hlen/2; t++) {
106 norm = 0; 94 ++h[i*m+x[t]];
107 for (j = 0; j < m; j++) 95 }
108 norm += h[i*m+j] * h[i*m+j]; 96 norm = 0;
109 for (j = 0; j < m; j++) 97 for (j = 0; j < m; j++) {
110 h[i*m+j] /= norm; 98 norm += h[i*m+j] * h[i*m+j];
111 } 99 }
112 100 for (j = 0; j < m; j++) {
113 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */ 101 h[i*m+j] /= norm;
114 for (i = 0; i < hlen/2; i++) 102 }
115 for (j = 0; j < m; j++) 103 }
116 h[i*m+j] = h[hlen/2*m+j]; 104
117 for (i = nx-hlen/2; i < nx; i++) 105 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
118 for (j = 0; j < m; j++) 106 for (i = 0; i < hlen/2; i++) {
119 h[i*m+j] = h[(nx-hlen/2-1)*m+j]; 107 for (j = 0; j < m; j++) {
108 h[i*m+j] = h[hlen/2*m+j];
109 }
110 }
111 for (i = nx-hlen/2; i < nx; i++) {
112 for (j = 0; j < m; j++) {
113 h[i*m+j] = h[(nx-hlen/2-1)*m+j];
114 }
115 }
120 } 116 }
121 117
122 /* segment using HMM and then histogram clustering */ 118 /* segment using HMM and then histogram clustering */
123 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, 119 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
124 int histogram_length, int nclusters, int neighbour_limit) 120 int histogram_length, int nclusters, int neighbour_limit)
125 { 121 {
126 int i, j; 122 int i, j;
127 123
128 /*****************************/ 124 /*****************************/
129 if (0) { 125 if (0) {
130 /* try just using the predominant bin number as a 'decoded state' */ 126 /* try just using the predominant bin number as a 'decoded state' */
131 nHMM_states = feature_length + 1; /* allow a 'zero' state */ 127 nHMM_states = feature_length + 1; /* allow a 'zero' state */
132 double chroma_thresh = 0.05; 128 double chroma_thresh = 0.05;
133 double maxval; 129 double maxval;
134 int maxbin; 130 int maxbin;
135 for (i = 0; i < frames_read; i++) 131 for (i = 0; i < frames_read; i++) {
136 { 132 maxval = 0;
137 maxval = 0; 133 for (j = 0; j < feature_length; j++) {
138 for (j = 0; j < feature_length; j++) 134 if (features[i][j] > maxval) {
139 { 135 maxval = features[i][j];
140 if (features[i][j] > maxval) 136 maxbin = j;
141 { 137 }
142 maxval = features[i][j]; 138 }
143 maxbin = j; 139 if (maxval > chroma_thresh) {
144 } 140 q[i] = maxbin;
145 } 141 } else {
146 if (maxval > chroma_thresh) 142 q[i] = feature_length;
147 q[i] = maxbin; 143 }
148 else 144 }
149 q[i] = feature_length; 145
150 } 146 }
151 147 if (1) {
152 } 148 /*****************************/
153 if (1) { 149
154 /*****************************/ 150 /* scale all the features to 'balance covariances' during HMM training */
155 151 double scale = 10;
156 152 for (i = 0; i < frames_read; i++)
157 /* scale all the features to 'balance covariances' during HMM training */ 153 for (j = 0; j < feature_length; j++)
158 double scale = 10; 154 features[i][j] *= scale;
159 for (i = 0; i < frames_read; i++) 155
160 for (j = 0; j < feature_length; j++) 156 /* train an HMM on the features */
161 features[i][j] *= scale; 157
162 158 /* create a model */
163 /* train an HMM on the features */ 159 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
164 160
165 /* create a model */ 161 /* train the model */
166 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states); 162 hmm_train(features, frames_read, model);
167 163 /*
168 /* train the model */ 164 printf("\n\nafter training:\n");
169 hmm_train(features, frames_read, model); 165 hmm_print(model);
170 /* 166 */
171 printf("\n\nafter training:\n"); 167 /* decode the hidden state sequence */
172 hmm_print(model); 168 viterbi_decode(features, frames_read, model, q);
173 */ 169 hmm_close(model);
174 /* decode the hidden state sequence */ 170
175 viterbi_decode(features, frames_read, model, q); 171 /*****************************/
176 hmm_close(model); 172 }
177 173 /*****************************/
178 /*****************************/
179 }
180 /*****************************/
181
182 174
183 /* 175 /*
184 fprintf(stderr, "HMM state sequence:\n"); 176 fprintf(stderr, "HMM state sequence:\n");
185 for (i = 0; i < frames_read; i++) 177 for (i = 0; i < frames_read; i++)
186 fprintf(stderr, "%d ", q[i]); 178 fprintf(stderr, "%d ", q[i]);
187 fprintf(stderr, "\n\n"); 179 fprintf(stderr, "\n\n");
188 */ 180 */
189 181
190 /* create histograms of states */ 182 /* create histograms of states */
191 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */ 183 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
192 create_histograms(q, frames_read, nHMM_states, histogram_length, h); 184 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
193 185
194 /* cluster the histograms */ 186 /* cluster the histograms */
195 int nbsched = 20; /* length of inverse temperature schedule */ 187 int nbsched = 20; /* length of inverse temperature schedule */
196 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */ 188 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
197 double b0 = 100; 189 double b0 = 100;
198 double alpha = 0.7; 190 double alpha = 0.7;
199 bsched[0] = b0; 191 bsched[0] = b0;
200 for (i = 1; i < nbsched; i++) 192 for (i = 1; i < nbsched; i++) {
201 bsched[i] = alpha * bsched[i-1]; 193 bsched[i] = alpha * bsched[i-1];
202 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q); 194 }
203 195 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
204 /* now q holds a sequence of cluster assignments */ 196
205 197 /* now q holds a sequence of cluster assignments */
206 free(h); 198
207 free(bsched); 199 free(h);
200 free(bsched);
208 } 201 }
209 202
210 /* segment constant-Q or chroma features */ 203 /* segment constant-Q or chroma features */
211 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, 204 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
212 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit) 205 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
213 { 206 {
214 int feature_length; 207 int feature_length;
215 double** chroma; 208 double** chroma;
216 int i; 209 int i;
217 210
218 if (feature_type == FEATURE_TYPE_CONSTQ) 211 if (feature_type == FEATURE_TYPE_CONSTQ) {
219 { 212
220 /* fprintf(stderr, "Converting to dB and normalising...\n"); 213 mpeg7_constq(features, frames_read, ncoeff);
221 */ 214
222 mpeg7_constq(features, frames_read, ncoeff); 215 /* do PCA on the features (but not the envelope) */
223 /* 216 int ncomponents = 20;
224 fprintf(stderr, "Running PCA...\n"); 217 pca_project(features, frames_read, ncoeff, ncomponents);
225 */ 218
226 /* do PCA on the features (but not the envelope) */ 219 /* copy the envelope so that it immediatly follows the chosen components */
227 int ncomponents = 20; 220 for (i = 0; i < frames_read; i++) {
228 pca_project(features, frames_read, ncoeff, ncomponents); 221 features[i][ncomponents] = features[i][ncoeff];
229 222 }
230 /* copy the envelope so that it immediatly follows the chosen components */ 223
231 for (i = 0; i < frames_read; i++) 224 feature_length = ncomponents + 1;
232 features[i][ncomponents] = features[i][ncoeff]; 225
233 226 cluster_segment(q, features, frames_read, feature_length,
234 feature_length = ncomponents + 1; 227 nHMM_states, histogram_length, nclusters,
235 228 neighbour_limit);
236 /************************************** 229 }
237 //TEST 230
238 // feature file name 231 if (feature_type == FEATURE_TYPE_CHROMA) {
239 char* dir = "/Users/mark/documents/semma/audio/"; 232
240 char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char)); 233 /* convert constant-Q to normalised chroma features */
241 strcpy(file_name, dir); 234 chroma = (double**) malloc(frames_read*sizeof(double*));
242 strcat(file_name, trackname); 235 for (i = 0; i < frames_read; i++) {
243 strcat(file_name, "_features_c20r8h0.2f0.6.mat"); 236 chroma[i] = (double*) malloc(bins*sizeof(double));
244 237 }
245 // get the features from Matlab from mat-file 238
246 int frames_in_file; 239 cq2chroma(features, frames_read, ncoeff, bins, chroma);
247 readmatarray_size(file_name, 2, &frames_in_file, &feature_length); 240
248 readmatarray(file_name, 2, frames_in_file, feature_length, features); 241 feature_length = bins;
249 // copy final frame to ensure that we get as many as we expected 242
250 int missing_frames = frames_read - frames_in_file; 243 cluster_segment(q, chroma, frames_read, feature_length,
251 while (missing_frames > 0) 244 nHMM_states, histogram_length, nclusters,
252 { 245 neighbour_limit);
253 for (i = 0; i < feature_length; i++) 246
254 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i]; 247 for (i = 0; i < frames_read; i++)
255 --missing_frames; 248 free(chroma[i]);
256 } 249 free(chroma);
257 250 }
258 free(file_name); 251 }
259 ******************************************/ 252
260 253
261 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); 254
262 }
263
264 if (feature_type == FEATURE_TYPE_CHROMA)
265 {
266 /*
267 fprintf(stderr, "Converting to chroma features...\n");
268 */
269 /* convert constant-Q to normalised chroma features */
270 chroma = (double**) malloc(frames_read*sizeof(double*));
271 for (i = 0; i < frames_read; i++)
272 chroma[i] = (double*) malloc(bins*sizeof(double));
273 cq2chroma(features, frames_read, ncoeff, bins, chroma);
274 feature_length = bins;
275
276 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
277
278 for (i = 0; i < frames_read; i++)
279 free(chroma[i]);
280 free(chroma);
281 }
282 }
283
284
285