comparison dsp/segmentation/cluster_segmenter.c @ 480:175e51ae78eb

Untabify, indent, tidy
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 31 May 2019 10:53:39 +0100
parents d5014ab8b0e5
children d48276a3ae24
comparison
equal deleted inserted replaced
479:7e52c034cf62 480:175e51ae78eb
3 * soundbite 3 * soundbite
4 * 4 *
5 * Created by Mark Levy on 06/04/2006. 5 * Created by Mark Levy on 06/04/2006.
6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. 6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
7 7
8 This program is free software; you can redistribute it and/or 8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as 9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2 of the 10 published by the Free Software Foundation; either version 2 of the
11 License, or (at your option) any later version. See the file 11 License, or (at your option) any later version. See the file
12 COPYING included with this distribution for more information. 12 COPYING included with this distribution for more information.
13 * 13 *
14 */ 14 */
15 15
16 #include "cluster_segmenter.h" 16 #include "cluster_segmenter.h"
17 17
19 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr); 19 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr);
20 20
21 /* converts constant-Q features to normalised chroma */ 21 /* converts constant-Q features to normalised chroma */
22 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma) 22 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
23 { 23 {
24 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */ 24 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
25 int t, b, oct, ix; 25 int t, b, oct, ix;
26 //double maxchroma; /* max chroma value at each time, for normalisation */ 26
27 //double sum; /* for normalisation */ 27 for (t = 0; t < nframes; t++) {
28 28 for (b = 0; b < bins; b++) {
29 for (t = 0; t < nframes; t++) 29 chroma[t][b] = 0;
30 { 30 }
31 for (b = 0; b < bins; b++) 31 for (oct = 0; oct < noct; oct++) {
32 chroma[t][b] = 0; 32 ix = oct * bins;
33 for (oct = 0; oct < noct; oct++) 33 for (b = 0; b < bins; b++) {
34 { 34 chroma[t][b] += fabs(cq[t][ix+b]);
35 ix = oct * bins; 35 }
36 for (b = 0; b < bins; b++) 36 }
37 chroma[t][b] += fabs(cq[t][ix+b]); 37 }
38 }
39 /* normalise to unit sum
40 sum = 0;
41 for (b = 0; b < bins; b++)
42 sum += chroma[t][b];
43 for (b = 0; b < bins; b++)
44 chroma[t][b] /= sum;
45 */
46 /* normalise to unit max - NO this made results much worse!
47 maxchroma = 0;
48 for (b = 0; b < bins; b++)
49 if (chroma[t][b] > maxchroma)
50 maxchroma = chroma[t][b];
51 if (maxchroma > 0)
52 for (b = 0; b < bins; b++)
53 chroma[t][b] /= maxchroma;
54 */
55 }
56 } 38 }
57 39
58 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */ 40 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
59 void mpeg7_constq(double** features, int nframes, int ncoeff) 41 void mpeg7_constq(double** features, int nframes, int ncoeff)
60 { 42 {
61 int i, j; 43 int i, j;
62 double ss; 44 double ss;
63 double env; 45 double env;
64 double maxenv = 0; 46 double maxenv = 0;
65 47
66 /* convert const-Q features to dB scale */ 48 /* convert const-Q features to dB scale */
67 for (i = 0; i < nframes; i++) 49 for (i = 0; i < nframes; i++) {
68 for (j = 0; j < ncoeff; j++) 50 for (j = 0; j < ncoeff; j++) {
69 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON); 51 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
70 52 }
71 /* normalise each feature vector and add the norm as an extra feature dimension */ 53 }
72 for (i = 0; i < nframes; i++) 54
73 { 55 /* normalise each feature vector and add the norm as an extra feature dimension */
74 ss = 0; 56 for (i = 0; i < nframes; i++) {
75 for (j = 0; j < ncoeff; j++) 57 ss = 0;
76 ss += features[i][j] * features[i][j]; 58 for (j = 0; j < ncoeff; j++) {
77 env = sqrt(ss); 59 ss += features[i][j] * features[i][j];
78 for (j = 0; j < ncoeff; j++) 60 }
79 features[i][j] /= env; 61 env = sqrt(ss);
80 features[i][ncoeff] = env; 62 for (j = 0; j < ncoeff; j++) {
81 if (env > maxenv) 63 features[i][j] /= env;
82 maxenv = env; 64 }
83 } 65 features[i][ncoeff] = env;
84 /* normalise the envelopes */ 66 if (env > maxenv) {
85 for (i = 0; i < nframes; i++) 67 maxenv = env;
86 features[i][ncoeff] /= maxenv; 68 }
69 }
70 /* normalise the envelopes */
71 for (i = 0; i < nframes; i++) {
72 features[i][ncoeff] /= maxenv;
73 }
87 } 74 }
88 75
89 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */ 76 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
90 /* NB h is a vector in row major order, as required by cluster_melt() */ 77 /* NB h is a vector in row major order, as required by cluster_melt() */
91 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */ 78 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
92 void create_histograms(int* x, int nx, int m, int hlen, double* h) 79 void create_histograms(int* x, int nx, int m, int hlen, double* h)
93 { 80 {
94 int i, j, t; 81 int i, j, t;
95 double norm; 82 double norm;
96 83
97 for (i = 0; i < nx*m; i++) 84 for (i = 0; i < nx*m; i++) {
98 h[i] = 0; 85 h[i] = 0;
99 86 }
100 for (i = hlen/2; i < nx-hlen/2; i++) 87
101 { 88 for (i = hlen/2; i < nx-hlen/2; i++) {
102 for (j = 0; j < m; j++) 89 for (j = 0; j < m; j++) {
103 h[i*m+j] = 0; 90 h[i*m+j] = 0;
104 for (t = i-hlen/2; t <= i+hlen/2; t++) 91 }
105 ++h[i*m+x[t]]; 92 for (t = i-hlen/2; t <= i+hlen/2; t++) {
106 norm = 0; 93 ++h[i*m+x[t]];
107 for (j = 0; j < m; j++) 94 }
108 norm += h[i*m+j] * h[i*m+j]; 95 norm = 0;
109 for (j = 0; j < m; j++) 96 for (j = 0; j < m; j++) {
110 h[i*m+j] /= norm; 97 norm += h[i*m+j] * h[i*m+j];
111 } 98 }
112 99 for (j = 0; j < m; j++) {
113 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */ 100 h[i*m+j] /= norm;
114 for (i = 0; i < hlen/2; i++) 101 }
115 for (j = 0; j < m; j++) 102 }
116 h[i*m+j] = h[hlen/2*m+j]; 103
117 for (i = nx-hlen/2; i < nx; i++) 104 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
118 for (j = 0; j < m; j++) 105 for (i = 0; i < hlen/2; i++) {
119 h[i*m+j] = h[(nx-hlen/2-1)*m+j]; 106 for (j = 0; j < m; j++) {
107 h[i*m+j] = h[hlen/2*m+j];
108 }
109 }
110 for (i = nx-hlen/2; i < nx; i++) {
111 for (j = 0; j < m; j++) {
112 h[i*m+j] = h[(nx-hlen/2-1)*m+j];
113 }
114 }
120 } 115 }
121 116
122 /* segment using HMM and then histogram clustering */ 117 /* segment using HMM and then histogram clustering */
123 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, 118 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
124 int histogram_length, int nclusters, int neighbour_limit) 119 int histogram_length, int nclusters, int neighbour_limit)
125 { 120 {
126 int i, j; 121 int i, j;
127 122
128 /*****************************/ 123 /*****************************/
129 if (0) { 124 if (0) {
130 /* try just using the predominant bin number as a 'decoded state' */ 125 /* try just using the predominant bin number as a 'decoded state' */
131 nHMM_states = feature_length + 1; /* allow a 'zero' state */ 126 nHMM_states = feature_length + 1; /* allow a 'zero' state */
132 double chroma_thresh = 0.05; 127 double chroma_thresh = 0.05;
133 double maxval; 128 double maxval;
134 int maxbin; 129 int maxbin;
135 for (i = 0; i < frames_read; i++) 130 for (i = 0; i < frames_read; i++) {
136 { 131 maxval = 0;
137 maxval = 0; 132 for (j = 0; j < feature_length; j++) {
138 for (j = 0; j < feature_length; j++) 133 if (features[i][j] > maxval) {
139 { 134 maxval = features[i][j];
140 if (features[i][j] > maxval) 135 maxbin = j;
141 { 136 }
142 maxval = features[i][j]; 137 }
143 maxbin = j; 138 if (maxval > chroma_thresh) {
144 } 139 q[i] = maxbin;
145 } 140 } else {
146 if (maxval > chroma_thresh) 141 q[i] = feature_length;
147 q[i] = maxbin; 142 }
148 else 143 }
149 q[i] = feature_length; 144
150 } 145 }
151 146 if (1) {
152 } 147 /*****************************/
153 if (1) { 148
154 /*****************************/ 149 /* scale all the features to 'balance covariances' during HMM training */
155 150 double scale = 10;
156 151 for (i = 0; i < frames_read; i++)
157 /* scale all the features to 'balance covariances' during HMM training */ 152 for (j = 0; j < feature_length; j++)
158 double scale = 10; 153 features[i][j] *= scale;
159 for (i = 0; i < frames_read; i++) 154
160 for (j = 0; j < feature_length; j++) 155 /* train an HMM on the features */
161 features[i][j] *= scale; 156
162 157 /* create a model */
163 /* train an HMM on the features */ 158 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
164 159
165 /* create a model */ 160 /* train the model */
166 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states); 161 hmm_train(features, frames_read, model);
167 162 /*
168 /* train the model */ 163 printf("\n\nafter training:\n");
169 hmm_train(features, frames_read, model); 164 hmm_print(model);
170 /* 165 */
171 printf("\n\nafter training:\n"); 166 /* decode the hidden state sequence */
172 hmm_print(model); 167 viterbi_decode(features, frames_read, model, q);
173 */ 168 hmm_close(model);
174 /* decode the hidden state sequence */ 169
175 viterbi_decode(features, frames_read, model, q); 170 /*****************************/
176 hmm_close(model); 171 }
177 172 /*****************************/
178 /*****************************/
179 }
180 /*****************************/
181
182 173
183 /* 174 /*
184 fprintf(stderr, "HMM state sequence:\n"); 175 fprintf(stderr, "HMM state sequence:\n");
185 for (i = 0; i < frames_read; i++) 176 for (i = 0; i < frames_read; i++)
186 fprintf(stderr, "%d ", q[i]); 177 fprintf(stderr, "%d ", q[i]);
187 fprintf(stderr, "\n\n"); 178 fprintf(stderr, "\n\n");
188 */ 179 */
189 180
190 /* create histograms of states */ 181 /* create histograms of states */
191 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */ 182 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
192 create_histograms(q, frames_read, nHMM_states, histogram_length, h); 183 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
193 184
194 /* cluster the histograms */ 185 /* cluster the histograms */
195 int nbsched = 20; /* length of inverse temperature schedule */ 186 int nbsched = 20; /* length of inverse temperature schedule */
196 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */ 187 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
197 double b0 = 100; 188 double b0 = 100;
198 double alpha = 0.7; 189 double alpha = 0.7;
199 bsched[0] = b0; 190 bsched[0] = b0;
200 for (i = 1; i < nbsched; i++) 191 for (i = 1; i < nbsched; i++) {
201 bsched[i] = alpha * bsched[i-1]; 192 bsched[i] = alpha * bsched[i-1];
202 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q); 193 }
203 194 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
204 /* now q holds a sequence of cluster assignments */ 195
205 196 /* now q holds a sequence of cluster assignments */
206 free(h); 197
207 free(bsched); 198 free(h);
199 free(bsched);
208 } 200 }
209 201
210 /* segment constant-Q or chroma features */ 202 /* segment constant-Q or chroma features */
211 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, 203 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
212 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit) 204 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
213 { 205 {
214 int feature_length; 206 int feature_length;
215 double** chroma; 207 double** chroma;
216 int i; 208 int i;
217 209
218 if (feature_type == FEATURE_TYPE_CONSTQ) 210 if (feature_type == FEATURE_TYPE_CONSTQ) {
219 { 211
220 /* fprintf(stderr, "Converting to dB and normalising...\n"); 212 mpeg7_constq(features, frames_read, ncoeff);
221 */ 213
222 mpeg7_constq(features, frames_read, ncoeff); 214 /* do PCA on the features (but not the envelope) */
223 /* 215 int ncomponents = 20;
224 fprintf(stderr, "Running PCA...\n"); 216 pca_project(features, frames_read, ncoeff, ncomponents);
225 */ 217
226 /* do PCA on the features (but not the envelope) */ 218 /* copy the envelope so that it immediatly follows the chosen components */
227 int ncomponents = 20; 219 for (i = 0; i < frames_read; i++) {
228 pca_project(features, frames_read, ncoeff, ncomponents); 220 features[i][ncomponents] = features[i][ncoeff];
229 221 }
230 /* copy the envelope so that it immediatly follows the chosen components */ 222
231 for (i = 0; i < frames_read; i++) 223 feature_length = ncomponents + 1;
232 features[i][ncomponents] = features[i][ncoeff]; 224
233 225 cluster_segment(q, features, frames_read, feature_length,
234 feature_length = ncomponents + 1; 226 nHMM_states, histogram_length, nclusters,
235 227 neighbour_limit);
236 /************************************** 228 }
237 //TEST 229
238 // feature file name 230 if (feature_type == FEATURE_TYPE_CHROMA) {
239 char* dir = "/Users/mark/documents/semma/audio/"; 231
240 char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char)); 232 /* convert constant-Q to normalised chroma features */
241 strcpy(file_name, dir); 233 chroma = (double**) malloc(frames_read*sizeof(double*));
242 strcat(file_name, trackname); 234 for (i = 0; i < frames_read; i++) {
243 strcat(file_name, "_features_c20r8h0.2f0.6.mat"); 235 chroma[i] = (double*) malloc(bins*sizeof(double));
244 236 }
245 // get the features from Matlab from mat-file 237
246 int frames_in_file; 238 cq2chroma(features, frames_read, ncoeff, bins, chroma);
247 readmatarray_size(file_name, 2, &frames_in_file, &feature_length); 239
248 readmatarray(file_name, 2, frames_in_file, feature_length, features); 240 feature_length = bins;
249 // copy final frame to ensure that we get as many as we expected 241
250 int missing_frames = frames_read - frames_in_file; 242 cluster_segment(q, chroma, frames_read, feature_length,
251 while (missing_frames > 0) 243 nHMM_states, histogram_length, nclusters,
252 { 244 neighbour_limit);
253 for (i = 0; i < feature_length; i++) 245
254 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i]; 246 for (i = 0; i < frames_read; i++)
255 --missing_frames; 247 free(chroma[i]);
256 } 248 free(chroma);
257 249 }
258 free(file_name); 250 }
259 ******************************************/ 251
260 252
261 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); 253
262 }
263
264 if (feature_type == FEATURE_TYPE_CHROMA)
265 {
266 /*
267 fprintf(stderr, "Converting to chroma features...\n");
268 */
269 /* convert constant-Q to normalised chroma features */
270 chroma = (double**) malloc(frames_read*sizeof(double*));
271 for (i = 0; i < frames_read; i++)
272 chroma[i] = (double*) malloc(bins*sizeof(double));
273 cq2chroma(features, frames_read, ncoeff, bins, chroma);
274 feature_length = bins;
275
276 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
277
278 for (i = 0; i < frames_read; i++)
279 free(chroma[i]);
280 free(chroma);
281 }
282 }
283
284
285