Mercurial > hg > qm-dsp
comparison dsp/segmentation/cluster_segmenter.c @ 480:175e51ae78eb
Untabify, indent, tidy
author | Chris Cannam <cannam@all-day-breakfast.com> |
---|---|
date | Fri, 31 May 2019 10:53:39 +0100 |
parents | d5014ab8b0e5 |
children | d48276a3ae24 |
comparison
equal
deleted
inserted
replaced
479:7e52c034cf62 | 480:175e51ae78eb |
---|---|
3 * soundbite | 3 * soundbite |
4 * | 4 * |
5 * Created by Mark Levy on 06/04/2006. | 5 * Created by Mark Levy on 06/04/2006. |
6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. | 6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. |
7 | 7 |
8 This program is free software; you can redistribute it and/or | 8 This program is free software; you can redistribute it and/or |
9 modify it under the terms of the GNU General Public License as | 9 modify it under the terms of the GNU General Public License as |
10 published by the Free Software Foundation; either version 2 of the | 10 published by the Free Software Foundation; either version 2 of the |
11 License, or (at your option) any later version. See the file | 11 License, or (at your option) any later version. See the file |
12 COPYING included with this distribution for more information. | 12 COPYING included with this distribution for more information. |
13 * | 13 * |
14 */ | 14 */ |
15 | 15 |
16 #include "cluster_segmenter.h" | 16 #include "cluster_segmenter.h" |
17 | 17 |
19 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr); | 19 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr); |
20 | 20 |
21 /* converts constant-Q features to normalised chroma */ | 21 /* converts constant-Q features to normalised chroma */ |
22 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma) | 22 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma) |
23 { | 23 { |
24 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */ | 24 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */ |
25 int t, b, oct, ix; | 25 int t, b, oct, ix; |
26 //double maxchroma; /* max chroma value at each time, for normalisation */ | 26 |
27 //double sum; /* for normalisation */ | 27 for (t = 0; t < nframes; t++) { |
28 | 28 for (b = 0; b < bins; b++) { |
29 for (t = 0; t < nframes; t++) | 29 chroma[t][b] = 0; |
30 { | 30 } |
31 for (b = 0; b < bins; b++) | 31 for (oct = 0; oct < noct; oct++) { |
32 chroma[t][b] = 0; | 32 ix = oct * bins; |
33 for (oct = 0; oct < noct; oct++) | 33 for (b = 0; b < bins; b++) { |
34 { | 34 chroma[t][b] += fabs(cq[t][ix+b]); |
35 ix = oct * bins; | 35 } |
36 for (b = 0; b < bins; b++) | 36 } |
37 chroma[t][b] += fabs(cq[t][ix+b]); | 37 } |
38 } | |
39 /* normalise to unit sum | |
40 sum = 0; | |
41 for (b = 0; b < bins; b++) | |
42 sum += chroma[t][b]; | |
43 for (b = 0; b < bins; b++) | |
44 chroma[t][b] /= sum; | |
45 */ | |
46 /* normalise to unit max - NO this made results much worse! | |
47 maxchroma = 0; | |
48 for (b = 0; b < bins; b++) | |
49 if (chroma[t][b] > maxchroma) | |
50 maxchroma = chroma[t][b]; | |
51 if (maxchroma > 0) | |
52 for (b = 0; b < bins; b++) | |
53 chroma[t][b] /= maxchroma; | |
54 */ | |
55 } | |
56 } | 38 } |
57 | 39 |
58 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */ | 40 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */ |
59 void mpeg7_constq(double** features, int nframes, int ncoeff) | 41 void mpeg7_constq(double** features, int nframes, int ncoeff) |
60 { | 42 { |
61 int i, j; | 43 int i, j; |
62 double ss; | 44 double ss; |
63 double env; | 45 double env; |
64 double maxenv = 0; | 46 double maxenv = 0; |
65 | 47 |
66 /* convert const-Q features to dB scale */ | 48 /* convert const-Q features to dB scale */ |
67 for (i = 0; i < nframes; i++) | 49 for (i = 0; i < nframes; i++) { |
68 for (j = 0; j < ncoeff; j++) | 50 for (j = 0; j < ncoeff; j++) { |
69 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON); | 51 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON); |
70 | 52 } |
71 /* normalise each feature vector and add the norm as an extra feature dimension */ | 53 } |
72 for (i = 0; i < nframes; i++) | 54 |
73 { | 55 /* normalise each feature vector and add the norm as an extra feature dimension */ |
74 ss = 0; | 56 for (i = 0; i < nframes; i++) { |
75 for (j = 0; j < ncoeff; j++) | 57 ss = 0; |
76 ss += features[i][j] * features[i][j]; | 58 for (j = 0; j < ncoeff; j++) { |
77 env = sqrt(ss); | 59 ss += features[i][j] * features[i][j]; |
78 for (j = 0; j < ncoeff; j++) | 60 } |
79 features[i][j] /= env; | 61 env = sqrt(ss); |
80 features[i][ncoeff] = env; | 62 for (j = 0; j < ncoeff; j++) { |
81 if (env > maxenv) | 63 features[i][j] /= env; |
82 maxenv = env; | 64 } |
83 } | 65 features[i][ncoeff] = env; |
84 /* normalise the envelopes */ | 66 if (env > maxenv) { |
85 for (i = 0; i < nframes; i++) | 67 maxenv = env; |
86 features[i][ncoeff] /= maxenv; | 68 } |
69 } | |
70 /* normalise the envelopes */ | |
71 for (i = 0; i < nframes; i++) { | |
72 features[i][ncoeff] /= maxenv; | |
73 } | |
87 } | 74 } |
88 | 75 |
89 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */ | 76 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */ |
90 /* NB h is a vector in row major order, as required by cluster_melt() */ | 77 /* NB h is a vector in row major order, as required by cluster_melt() */ |
91 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */ | 78 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */ |
92 void create_histograms(int* x, int nx, int m, int hlen, double* h) | 79 void create_histograms(int* x, int nx, int m, int hlen, double* h) |
93 { | 80 { |
94 int i, j, t; | 81 int i, j, t; |
95 double norm; | 82 double norm; |
96 | 83 |
97 for (i = 0; i < nx*m; i++) | 84 for (i = 0; i < nx*m; i++) { |
98 h[i] = 0; | 85 h[i] = 0; |
99 | 86 } |
100 for (i = hlen/2; i < nx-hlen/2; i++) | 87 |
101 { | 88 for (i = hlen/2; i < nx-hlen/2; i++) { |
102 for (j = 0; j < m; j++) | 89 for (j = 0; j < m; j++) { |
103 h[i*m+j] = 0; | 90 h[i*m+j] = 0; |
104 for (t = i-hlen/2; t <= i+hlen/2; t++) | 91 } |
105 ++h[i*m+x[t]]; | 92 for (t = i-hlen/2; t <= i+hlen/2; t++) { |
106 norm = 0; | 93 ++h[i*m+x[t]]; |
107 for (j = 0; j < m; j++) | 94 } |
108 norm += h[i*m+j] * h[i*m+j]; | 95 norm = 0; |
109 for (j = 0; j < m; j++) | 96 for (j = 0; j < m; j++) { |
110 h[i*m+j] /= norm; | 97 norm += h[i*m+j] * h[i*m+j]; |
111 } | 98 } |
112 | 99 for (j = 0; j < m; j++) { |
113 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */ | 100 h[i*m+j] /= norm; |
114 for (i = 0; i < hlen/2; i++) | 101 } |
115 for (j = 0; j < m; j++) | 102 } |
116 h[i*m+j] = h[hlen/2*m+j]; | 103 |
117 for (i = nx-hlen/2; i < nx; i++) | 104 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */ |
118 for (j = 0; j < m; j++) | 105 for (i = 0; i < hlen/2; i++) { |
119 h[i*m+j] = h[(nx-hlen/2-1)*m+j]; | 106 for (j = 0; j < m; j++) { |
107 h[i*m+j] = h[hlen/2*m+j]; | |
108 } | |
109 } | |
110 for (i = nx-hlen/2; i < nx; i++) { | |
111 for (j = 0; j < m; j++) { | |
112 h[i*m+j] = h[(nx-hlen/2-1)*m+j]; | |
113 } | |
114 } | |
120 } | 115 } |
121 | 116 |
122 /* segment using HMM and then histogram clustering */ | 117 /* segment using HMM and then histogram clustering */ |
123 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, | 118 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, |
124 int histogram_length, int nclusters, int neighbour_limit) | 119 int histogram_length, int nclusters, int neighbour_limit) |
125 { | 120 { |
126 int i, j; | 121 int i, j; |
127 | 122 |
128 /*****************************/ | 123 /*****************************/ |
129 if (0) { | 124 if (0) { |
130 /* try just using the predominant bin number as a 'decoded state' */ | 125 /* try just using the predominant bin number as a 'decoded state' */ |
131 nHMM_states = feature_length + 1; /* allow a 'zero' state */ | 126 nHMM_states = feature_length + 1; /* allow a 'zero' state */ |
132 double chroma_thresh = 0.05; | 127 double chroma_thresh = 0.05; |
133 double maxval; | 128 double maxval; |
134 int maxbin; | 129 int maxbin; |
135 for (i = 0; i < frames_read; i++) | 130 for (i = 0; i < frames_read; i++) { |
136 { | 131 maxval = 0; |
137 maxval = 0; | 132 for (j = 0; j < feature_length; j++) { |
138 for (j = 0; j < feature_length; j++) | 133 if (features[i][j] > maxval) { |
139 { | 134 maxval = features[i][j]; |
140 if (features[i][j] > maxval) | 135 maxbin = j; |
141 { | 136 } |
142 maxval = features[i][j]; | 137 } |
143 maxbin = j; | 138 if (maxval > chroma_thresh) { |
144 } | 139 q[i] = maxbin; |
145 } | 140 } else { |
146 if (maxval > chroma_thresh) | 141 q[i] = feature_length; |
147 q[i] = maxbin; | 142 } |
148 else | 143 } |
149 q[i] = feature_length; | 144 |
150 } | 145 } |
151 | 146 if (1) { |
152 } | 147 /*****************************/ |
153 if (1) { | 148 |
154 /*****************************/ | 149 /* scale all the features to 'balance covariances' during HMM training */ |
155 | 150 double scale = 10; |
156 | 151 for (i = 0; i < frames_read; i++) |
157 /* scale all the features to 'balance covariances' during HMM training */ | 152 for (j = 0; j < feature_length; j++) |
158 double scale = 10; | 153 features[i][j] *= scale; |
159 for (i = 0; i < frames_read; i++) | 154 |
160 for (j = 0; j < feature_length; j++) | 155 /* train an HMM on the features */ |
161 features[i][j] *= scale; | 156 |
162 | 157 /* create a model */ |
163 /* train an HMM on the features */ | 158 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states); |
164 | 159 |
165 /* create a model */ | 160 /* train the model */ |
166 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states); | 161 hmm_train(features, frames_read, model); |
167 | 162 /* |
168 /* train the model */ | 163 printf("\n\nafter training:\n"); |
169 hmm_train(features, frames_read, model); | 164 hmm_print(model); |
170 /* | 165 */ |
171 printf("\n\nafter training:\n"); | 166 /* decode the hidden state sequence */ |
172 hmm_print(model); | 167 viterbi_decode(features, frames_read, model, q); |
173 */ | 168 hmm_close(model); |
174 /* decode the hidden state sequence */ | 169 |
175 viterbi_decode(features, frames_read, model, q); | 170 /*****************************/ |
176 hmm_close(model); | 171 } |
177 | 172 /*****************************/ |
178 /*****************************/ | |
179 } | |
180 /*****************************/ | |
181 | |
182 | 173 |
183 /* | 174 /* |
184 fprintf(stderr, "HMM state sequence:\n"); | 175 fprintf(stderr, "HMM state sequence:\n"); |
185 for (i = 0; i < frames_read; i++) | 176 for (i = 0; i < frames_read; i++) |
186 fprintf(stderr, "%d ", q[i]); | 177 fprintf(stderr, "%d ", q[i]); |
187 fprintf(stderr, "\n\n"); | 178 fprintf(stderr, "\n\n"); |
188 */ | 179 */ |
189 | 180 |
190 /* create histograms of states */ | 181 /* create histograms of states */ |
191 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */ | 182 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */ |
192 create_histograms(q, frames_read, nHMM_states, histogram_length, h); | 183 create_histograms(q, frames_read, nHMM_states, histogram_length, h); |
193 | 184 |
194 /* cluster the histograms */ | 185 /* cluster the histograms */ |
195 int nbsched = 20; /* length of inverse temperature schedule */ | 186 int nbsched = 20; /* length of inverse temperature schedule */ |
196 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */ | 187 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */ |
197 double b0 = 100; | 188 double b0 = 100; |
198 double alpha = 0.7; | 189 double alpha = 0.7; |
199 bsched[0] = b0; | 190 bsched[0] = b0; |
200 for (i = 1; i < nbsched; i++) | 191 for (i = 1; i < nbsched; i++) { |
201 bsched[i] = alpha * bsched[i-1]; | 192 bsched[i] = alpha * bsched[i-1]; |
202 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q); | 193 } |
203 | 194 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q); |
204 /* now q holds a sequence of cluster assignments */ | 195 |
205 | 196 /* now q holds a sequence of cluster assignments */ |
206 free(h); | 197 |
207 free(bsched); | 198 free(h); |
199 free(bsched); | |
208 } | 200 } |
209 | 201 |
210 /* segment constant-Q or chroma features */ | 202 /* segment constant-Q or chroma features */ |
211 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, | 203 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, |
212 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit) | 204 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit) |
213 { | 205 { |
214 int feature_length; | 206 int feature_length; |
215 double** chroma; | 207 double** chroma; |
216 int i; | 208 int i; |
217 | 209 |
218 if (feature_type == FEATURE_TYPE_CONSTQ) | 210 if (feature_type == FEATURE_TYPE_CONSTQ) { |
219 { | 211 |
220 /* fprintf(stderr, "Converting to dB and normalising...\n"); | 212 mpeg7_constq(features, frames_read, ncoeff); |
221 */ | 213 |
222 mpeg7_constq(features, frames_read, ncoeff); | 214 /* do PCA on the features (but not the envelope) */ |
223 /* | 215 int ncomponents = 20; |
224 fprintf(stderr, "Running PCA...\n"); | 216 pca_project(features, frames_read, ncoeff, ncomponents); |
225 */ | 217 |
226 /* do PCA on the features (but not the envelope) */ | 218 /* copy the envelope so that it immediatly follows the chosen components */ |
227 int ncomponents = 20; | 219 for (i = 0; i < frames_read; i++) { |
228 pca_project(features, frames_read, ncoeff, ncomponents); | 220 features[i][ncomponents] = features[i][ncoeff]; |
229 | 221 } |
230 /* copy the envelope so that it immediatly follows the chosen components */ | 222 |
231 for (i = 0; i < frames_read; i++) | 223 feature_length = ncomponents + 1; |
232 features[i][ncomponents] = features[i][ncoeff]; | 224 |
233 | 225 cluster_segment(q, features, frames_read, feature_length, |
234 feature_length = ncomponents + 1; | 226 nHMM_states, histogram_length, nclusters, |
235 | 227 neighbour_limit); |
236 /************************************** | 228 } |
237 //TEST | 229 |
238 // feature file name | 230 if (feature_type == FEATURE_TYPE_CHROMA) { |
239 char* dir = "/Users/mark/documents/semma/audio/"; | 231 |
240 char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char)); | 232 /* convert constant-Q to normalised chroma features */ |
241 strcpy(file_name, dir); | 233 chroma = (double**) malloc(frames_read*sizeof(double*)); |
242 strcat(file_name, trackname); | 234 for (i = 0; i < frames_read; i++) { |
243 strcat(file_name, "_features_c20r8h0.2f0.6.mat"); | 235 chroma[i] = (double*) malloc(bins*sizeof(double)); |
244 | 236 } |
245 // get the features from Matlab from mat-file | 237 |
246 int frames_in_file; | 238 cq2chroma(features, frames_read, ncoeff, bins, chroma); |
247 readmatarray_size(file_name, 2, &frames_in_file, &feature_length); | 239 |
248 readmatarray(file_name, 2, frames_in_file, feature_length, features); | 240 feature_length = bins; |
249 // copy final frame to ensure that we get as many as we expected | 241 |
250 int missing_frames = frames_read - frames_in_file; | 242 cluster_segment(q, chroma, frames_read, feature_length, |
251 while (missing_frames > 0) | 243 nHMM_states, histogram_length, nclusters, |
252 { | 244 neighbour_limit); |
253 for (i = 0; i < feature_length; i++) | 245 |
254 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i]; | 246 for (i = 0; i < frames_read; i++) |
255 --missing_frames; | 247 free(chroma[i]); |
256 } | 248 free(chroma); |
257 | 249 } |
258 free(file_name); | 250 } |
259 ******************************************/ | 251 |
260 | 252 |
261 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); | 253 |
262 } | |
263 | |
264 if (feature_type == FEATURE_TYPE_CHROMA) | |
265 { | |
266 /* | |
267 fprintf(stderr, "Converting to chroma features...\n"); | |
268 */ | |
269 /* convert constant-Q to normalised chroma features */ | |
270 chroma = (double**) malloc(frames_read*sizeof(double*)); | |
271 for (i = 0; i < frames_read; i++) | |
272 chroma[i] = (double*) malloc(bins*sizeof(double)); | |
273 cq2chroma(features, frames_read, ncoeff, bins, chroma); | |
274 feature_length = bins; | |
275 | |
276 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); | |
277 | |
278 for (i = 0; i < frames_read; i++) | |
279 free(chroma[i]); | |
280 free(chroma); | |
281 } | |
282 } | |
283 | |
284 | |
285 |