Mercurial > hg > qm-dsp
comparison dsp/segmentation/cluster_segmenter.c @ 505:930b5b0f707d
Merge branch 'codestyle-and-tidy'
author | Chris Cannam <cannam@all-day-breakfast.com> |
---|---|
date | Wed, 05 Jun 2019 12:55:15 +0100 |
parents | d48276a3ae24 |
children |
comparison
equal
deleted
inserted
replaced
471:e3335cb213da | 505:930b5b0f707d |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
1 /* | 2 /* |
2 * cluster_segmenter.c | 3 * cluster_segmenter.c |
3 * soundbite | 4 * soundbite |
4 * | 5 * |
5 * Created by Mark Levy on 06/04/2006. | 6 * Created by Mark Levy on 06/04/2006. |
6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. | 7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. |
7 | 8 |
8 This program is free software; you can redistribute it and/or | 9 This program is free software; you can redistribute it and/or |
9 modify it under the terms of the GNU General Public License as | 10 modify it under the terms of the GNU General Public License as |
10 published by the Free Software Foundation; either version 2 of the | 11 published by the Free Software Foundation; either version 2 of the |
11 License, or (at your option) any later version. See the file | 12 License, or (at your option) any later version. See the file |
12 COPYING included with this distribution for more information. | 13 COPYING included with this distribution for more information. |
13 * | 14 * |
14 */ | 15 */ |
15 | 16 |
16 #include "cluster_segmenter.h" | 17 #include "cluster_segmenter.h" |
17 | 18 |
19 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr); | 20 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr); |
20 | 21 |
21 /* converts constant-Q features to normalised chroma */ | 22 /* converts constant-Q features to normalised chroma */ |
22 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma) | 23 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma) |
23 { | 24 { |
24 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */ | 25 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */ |
25 int t, b, oct, ix; | 26 int t, b, oct, ix; |
26 //double maxchroma; /* max chroma value at each time, for normalisation */ | 27 |
27 //double sum; /* for normalisation */ | 28 for (t = 0; t < nframes; t++) { |
28 | 29 for (b = 0; b < bins; b++) { |
29 for (t = 0; t < nframes; t++) | 30 chroma[t][b] = 0; |
30 { | 31 } |
31 for (b = 0; b < bins; b++) | 32 for (oct = 0; oct < noct; oct++) { |
32 chroma[t][b] = 0; | 33 ix = oct * bins; |
33 for (oct = 0; oct < noct; oct++) | 34 for (b = 0; b < bins; b++) { |
34 { | 35 chroma[t][b] += fabs(cq[t][ix+b]); |
35 ix = oct * bins; | 36 } |
36 for (b = 0; b < bins; b++) | 37 } |
37 chroma[t][b] += fabs(cq[t][ix+b]); | 38 } |
38 } | |
39 /* normalise to unit sum | |
40 sum = 0; | |
41 for (b = 0; b < bins; b++) | |
42 sum += chroma[t][b]; | |
43 for (b = 0; b < bins; b++) | |
44 chroma[t][b] /= sum; | |
45 */ | |
46 /* normalise to unit max - NO this made results much worse! | |
47 maxchroma = 0; | |
48 for (b = 0; b < bins; b++) | |
49 if (chroma[t][b] > maxchroma) | |
50 maxchroma = chroma[t][b]; | |
51 if (maxchroma > 0) | |
52 for (b = 0; b < bins; b++) | |
53 chroma[t][b] /= maxchroma; | |
54 */ | |
55 } | |
56 } | 39 } |
57 | 40 |
58 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */ | 41 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */ |
59 void mpeg7_constq(double** features, int nframes, int ncoeff) | 42 void mpeg7_constq(double** features, int nframes, int ncoeff) |
60 { | 43 { |
61 int i, j; | 44 int i, j; |
62 double ss; | 45 double ss; |
63 double env; | 46 double env; |
64 double maxenv = 0; | 47 double maxenv = 0; |
65 | 48 |
66 /* convert const-Q features to dB scale */ | 49 /* convert const-Q features to dB scale */ |
67 for (i = 0; i < nframes; i++) | 50 for (i = 0; i < nframes; i++) { |
68 for (j = 0; j < ncoeff; j++) | 51 for (j = 0; j < ncoeff; j++) { |
69 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON); | 52 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON); |
70 | 53 } |
71 /* normalise each feature vector and add the norm as an extra feature dimension */ | 54 } |
72 for (i = 0; i < nframes; i++) | 55 |
73 { | 56 /* normalise each feature vector and add the norm as an extra feature dimension */ |
74 ss = 0; | 57 for (i = 0; i < nframes; i++) { |
75 for (j = 0; j < ncoeff; j++) | 58 ss = 0; |
76 ss += features[i][j] * features[i][j]; | 59 for (j = 0; j < ncoeff; j++) { |
77 env = sqrt(ss); | 60 ss += features[i][j] * features[i][j]; |
78 for (j = 0; j < ncoeff; j++) | 61 } |
79 features[i][j] /= env; | 62 env = sqrt(ss); |
80 features[i][ncoeff] = env; | 63 for (j = 0; j < ncoeff; j++) { |
81 if (env > maxenv) | 64 features[i][j] /= env; |
82 maxenv = env; | 65 } |
83 } | 66 features[i][ncoeff] = env; |
84 /* normalise the envelopes */ | 67 if (env > maxenv) { |
85 for (i = 0; i < nframes; i++) | 68 maxenv = env; |
86 features[i][ncoeff] /= maxenv; | 69 } |
70 } | |
71 /* normalise the envelopes */ | |
72 for (i = 0; i < nframes; i++) { | |
73 features[i][ncoeff] /= maxenv; | |
74 } | |
87 } | 75 } |
88 | 76 |
89 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */ | 77 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */ |
90 /* NB h is a vector in row major order, as required by cluster_melt() */ | 78 /* NB h is a vector in row major order, as required by cluster_melt() */ |
91 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */ | 79 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */ |
92 void create_histograms(int* x, int nx, int m, int hlen, double* h) | 80 void create_histograms(int* x, int nx, int m, int hlen, double* h) |
93 { | 81 { |
94 int i, j, t; | 82 int i, j, t; |
95 double norm; | 83 double norm; |
96 | 84 |
97 for (i = 0; i < nx*m; i++) | 85 for (i = 0; i < nx*m; i++) { |
98 h[i] = 0; | 86 h[i] = 0; |
99 | 87 } |
100 for (i = hlen/2; i < nx-hlen/2; i++) | 88 |
101 { | 89 for (i = hlen/2; i < nx-hlen/2; i++) { |
102 for (j = 0; j < m; j++) | 90 for (j = 0; j < m; j++) { |
103 h[i*m+j] = 0; | 91 h[i*m+j] = 0; |
104 for (t = i-hlen/2; t <= i+hlen/2; t++) | 92 } |
105 ++h[i*m+x[t]]; | 93 for (t = i-hlen/2; t <= i+hlen/2; t++) { |
106 norm = 0; | 94 ++h[i*m+x[t]]; |
107 for (j = 0; j < m; j++) | 95 } |
108 norm += h[i*m+j] * h[i*m+j]; | 96 norm = 0; |
109 for (j = 0; j < m; j++) | 97 for (j = 0; j < m; j++) { |
110 h[i*m+j] /= norm; | 98 norm += h[i*m+j] * h[i*m+j]; |
111 } | 99 } |
112 | 100 for (j = 0; j < m; j++) { |
113 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */ | 101 h[i*m+j] /= norm; |
114 for (i = 0; i < hlen/2; i++) | 102 } |
115 for (j = 0; j < m; j++) | 103 } |
116 h[i*m+j] = h[hlen/2*m+j]; | 104 |
117 for (i = nx-hlen/2; i < nx; i++) | 105 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */ |
118 for (j = 0; j < m; j++) | 106 for (i = 0; i < hlen/2; i++) { |
119 h[i*m+j] = h[(nx-hlen/2-1)*m+j]; | 107 for (j = 0; j < m; j++) { |
108 h[i*m+j] = h[hlen/2*m+j]; | |
109 } | |
110 } | |
111 for (i = nx-hlen/2; i < nx; i++) { | |
112 for (j = 0; j < m; j++) { | |
113 h[i*m+j] = h[(nx-hlen/2-1)*m+j]; | |
114 } | |
115 } | |
120 } | 116 } |
121 | 117 |
122 /* segment using HMM and then histogram clustering */ | 118 /* segment using HMM and then histogram clustering */ |
123 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, | 119 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, |
124 int histogram_length, int nclusters, int neighbour_limit) | 120 int histogram_length, int nclusters, int neighbour_limit) |
125 { | 121 { |
126 int i, j; | 122 int i, j; |
127 | 123 |
128 /*****************************/ | 124 /*****************************/ |
129 if (0) { | 125 if (0) { |
130 /* try just using the predominant bin number as a 'decoded state' */ | 126 /* try just using the predominant bin number as a 'decoded state' */ |
131 nHMM_states = feature_length + 1; /* allow a 'zero' state */ | 127 nHMM_states = feature_length + 1; /* allow a 'zero' state */ |
132 double chroma_thresh = 0.05; | 128 double chroma_thresh = 0.05; |
133 double maxval; | 129 double maxval; |
134 int maxbin; | 130 int maxbin; |
135 for (i = 0; i < frames_read; i++) | 131 for (i = 0; i < frames_read; i++) { |
136 { | 132 maxval = 0; |
137 maxval = 0; | 133 for (j = 0; j < feature_length; j++) { |
138 for (j = 0; j < feature_length; j++) | 134 if (features[i][j] > maxval) { |
139 { | 135 maxval = features[i][j]; |
140 if (features[i][j] > maxval) | 136 maxbin = j; |
141 { | 137 } |
142 maxval = features[i][j]; | 138 } |
143 maxbin = j; | 139 if (maxval > chroma_thresh) { |
144 } | 140 q[i] = maxbin; |
145 } | 141 } else { |
146 if (maxval > chroma_thresh) | 142 q[i] = feature_length; |
147 q[i] = maxbin; | 143 } |
148 else | 144 } |
149 q[i] = feature_length; | 145 |
150 } | 146 } |
151 | 147 if (1) { |
152 } | 148 /*****************************/ |
153 if (1) { | 149 |
154 /*****************************/ | 150 /* scale all the features to 'balance covariances' during HMM training */ |
155 | 151 double scale = 10; |
156 | 152 for (i = 0; i < frames_read; i++) |
157 /* scale all the features to 'balance covariances' during HMM training */ | 153 for (j = 0; j < feature_length; j++) |
158 double scale = 10; | 154 features[i][j] *= scale; |
159 for (i = 0; i < frames_read; i++) | 155 |
160 for (j = 0; j < feature_length; j++) | 156 /* train an HMM on the features */ |
161 features[i][j] *= scale; | 157 |
162 | 158 /* create a model */ |
163 /* train an HMM on the features */ | 159 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states); |
164 | 160 |
165 /* create a model */ | 161 /* train the model */ |
166 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states); | 162 hmm_train(features, frames_read, model); |
167 | 163 /* |
168 /* train the model */ | 164 printf("\n\nafter training:\n"); |
169 hmm_train(features, frames_read, model); | 165 hmm_print(model); |
170 /* | 166 */ |
171 printf("\n\nafter training:\n"); | 167 /* decode the hidden state sequence */ |
172 hmm_print(model); | 168 viterbi_decode(features, frames_read, model, q); |
173 */ | 169 hmm_close(model); |
174 /* decode the hidden state sequence */ | 170 |
175 viterbi_decode(features, frames_read, model, q); | 171 /*****************************/ |
176 hmm_close(model); | 172 } |
177 | 173 /*****************************/ |
178 /*****************************/ | |
179 } | |
180 /*****************************/ | |
181 | |
182 | 174 |
183 /* | 175 /* |
184 fprintf(stderr, "HMM state sequence:\n"); | 176 fprintf(stderr, "HMM state sequence:\n"); |
185 for (i = 0; i < frames_read; i++) | 177 for (i = 0; i < frames_read; i++) |
186 fprintf(stderr, "%d ", q[i]); | 178 fprintf(stderr, "%d ", q[i]); |
187 fprintf(stderr, "\n\n"); | 179 fprintf(stderr, "\n\n"); |
188 */ | 180 */ |
189 | 181 |
190 /* create histograms of states */ | 182 /* create histograms of states */ |
191 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */ | 183 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */ |
192 create_histograms(q, frames_read, nHMM_states, histogram_length, h); | 184 create_histograms(q, frames_read, nHMM_states, histogram_length, h); |
193 | 185 |
194 /* cluster the histograms */ | 186 /* cluster the histograms */ |
195 int nbsched = 20; /* length of inverse temperature schedule */ | 187 int nbsched = 20; /* length of inverse temperature schedule */ |
196 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */ | 188 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */ |
197 double b0 = 100; | 189 double b0 = 100; |
198 double alpha = 0.7; | 190 double alpha = 0.7; |
199 bsched[0] = b0; | 191 bsched[0] = b0; |
200 for (i = 1; i < nbsched; i++) | 192 for (i = 1; i < nbsched; i++) { |
201 bsched[i] = alpha * bsched[i-1]; | 193 bsched[i] = alpha * bsched[i-1]; |
202 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q); | 194 } |
203 | 195 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q); |
204 /* now q holds a sequence of cluster assignments */ | 196 |
205 | 197 /* now q holds a sequence of cluster assignments */ |
206 free(h); | 198 |
207 free(bsched); | 199 free(h); |
200 free(bsched); | |
208 } | 201 } |
209 | 202 |
210 /* segment constant-Q or chroma features */ | 203 /* segment constant-Q or chroma features */ |
211 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, | 204 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, |
212 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit) | 205 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit) |
213 { | 206 { |
214 int feature_length; | 207 int feature_length; |
215 double** chroma; | 208 double** chroma; |
216 int i; | 209 int i; |
217 | 210 |
218 if (feature_type == FEATURE_TYPE_CONSTQ) | 211 if (feature_type == FEATURE_TYPE_CONSTQ) { |
219 { | 212 |
220 /* fprintf(stderr, "Converting to dB and normalising...\n"); | 213 mpeg7_constq(features, frames_read, ncoeff); |
221 */ | 214 |
222 mpeg7_constq(features, frames_read, ncoeff); | 215 /* do PCA on the features (but not the envelope) */ |
223 /* | 216 int ncomponents = 20; |
224 fprintf(stderr, "Running PCA...\n"); | 217 pca_project(features, frames_read, ncoeff, ncomponents); |
225 */ | 218 |
226 /* do PCA on the features (but not the envelope) */ | 219 /* copy the envelope so that it immediatly follows the chosen components */ |
227 int ncomponents = 20; | 220 for (i = 0; i < frames_read; i++) { |
228 pca_project(features, frames_read, ncoeff, ncomponents); | 221 features[i][ncomponents] = features[i][ncoeff]; |
229 | 222 } |
230 /* copy the envelope so that it immediatly follows the chosen components */ | 223 |
231 for (i = 0; i < frames_read; i++) | 224 feature_length = ncomponents + 1; |
232 features[i][ncomponents] = features[i][ncoeff]; | 225 |
233 | 226 cluster_segment(q, features, frames_read, feature_length, |
234 feature_length = ncomponents + 1; | 227 nHMM_states, histogram_length, nclusters, |
235 | 228 neighbour_limit); |
236 /************************************** | 229 } |
237 //TEST | 230 |
238 // feature file name | 231 if (feature_type == FEATURE_TYPE_CHROMA) { |
239 char* dir = "/Users/mark/documents/semma/audio/"; | 232 |
240 char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char)); | 233 /* convert constant-Q to normalised chroma features */ |
241 strcpy(file_name, dir); | 234 chroma = (double**) malloc(frames_read*sizeof(double*)); |
242 strcat(file_name, trackname); | 235 for (i = 0; i < frames_read; i++) { |
243 strcat(file_name, "_features_c20r8h0.2f0.6.mat"); | 236 chroma[i] = (double*) malloc(bins*sizeof(double)); |
244 | 237 } |
245 // get the features from Matlab from mat-file | 238 |
246 int frames_in_file; | 239 cq2chroma(features, frames_read, ncoeff, bins, chroma); |
247 readmatarray_size(file_name, 2, &frames_in_file, &feature_length); | 240 |
248 readmatarray(file_name, 2, frames_in_file, feature_length, features); | 241 feature_length = bins; |
249 // copy final frame to ensure that we get as many as we expected | 242 |
250 int missing_frames = frames_read - frames_in_file; | 243 cluster_segment(q, chroma, frames_read, feature_length, |
251 while (missing_frames > 0) | 244 nHMM_states, histogram_length, nclusters, |
252 { | 245 neighbour_limit); |
253 for (i = 0; i < feature_length; i++) | 246 |
254 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i]; | 247 for (i = 0; i < frames_read; i++) |
255 --missing_frames; | 248 free(chroma[i]); |
256 } | 249 free(chroma); |
257 | 250 } |
258 free(file_name); | 251 } |
259 ******************************************/ | 252 |
260 | 253 |
261 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); | 254 |
262 } | |
263 | |
264 if (feature_type == FEATURE_TYPE_CHROMA) | |
265 { | |
266 /* | |
267 fprintf(stderr, "Converting to chroma features...\n"); | |
268 */ | |
269 /* convert constant-Q to normalised chroma features */ | |
270 chroma = (double**) malloc(frames_read*sizeof(double*)); | |
271 for (i = 0; i < frames_read; i++) | |
272 chroma[i] = (double*) malloc(bins*sizeof(double)); | |
273 cq2chroma(features, frames_read, ncoeff, bins, chroma); | |
274 feature_length = bins; | |
275 | |
276 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); | |
277 | |
278 for (i = 0; i < frames_read; i++) | |
279 free(chroma[i]); | |
280 free(chroma); | |
281 } | |
282 } | |
283 | |
284 | |
285 |