cannam@18
|
1 /*
|
cannam@18
|
2 * cluster_segmenter.c
|
cannam@18
|
3 * soundbite
|
cannam@18
|
4 *
|
cannam@18
|
5 * Created by Mark Levy on 06/04/2006.
|
cannam@18
|
6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
|
cannam@18
|
7 *
|
cannam@18
|
8 */
|
cannam@18
|
9
|
cannam@18
|
10 #include "cluster_segmenter.h"
|
cannam@18
|
11
|
cannam@18
|
12 extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d);
|
cannam@18
|
13 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr);
|
cannam@18
|
14
|
cannam@18
|
15 /* converts constant-Q features to normalised chroma */
|
cannam@18
|
16 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
|
cannam@18
|
17 {
|
cannam@18
|
18 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
|
cannam@18
|
19 int t, b, oct, ix;
|
cannam@18
|
20 //double maxchroma; /* max chroma value at each time, for normalisation */
|
cannam@18
|
21 //double sum; /* for normalisation */
|
cannam@18
|
22
|
cannam@18
|
23 for (t = 0; t < nframes; t++)
|
cannam@18
|
24 {
|
cannam@18
|
25 for (b = 0; b < bins; b++)
|
cannam@18
|
26 chroma[t][b] = 0;
|
cannam@18
|
27 for (oct = 0; oct < noct; oct++)
|
cannam@18
|
28 {
|
cannam@18
|
29 ix = oct * bins;
|
cannam@18
|
30 for (b = 0; b < bins; b++)
|
cannam@18
|
31 chroma[t][b] += fabs(cq[t][ix+b]);
|
cannam@18
|
32 }
|
cannam@18
|
33 /* normalise to unit sum
|
cannam@18
|
34 sum = 0;
|
cannam@18
|
35 for (b = 0; b < bins; b++)
|
cannam@18
|
36 sum += chroma[t][b];
|
cannam@18
|
37 for (b = 0; b < bins; b++)
|
cannam@18
|
38 chroma[t][b] /= sum;
|
cannam@20
|
39 */
|
cannam@18
|
40 /* normalise to unit max - NO this made results much worse!
|
cannam@18
|
41 maxchroma = 0;
|
cannam@18
|
42 for (b = 0; b < bins; b++)
|
cannam@18
|
43 if (chroma[t][b] > maxchroma)
|
cannam@18
|
44 maxchroma = chroma[t][b];
|
cannam@18
|
45 if (maxchroma > 0)
|
cannam@18
|
46 for (b = 0; b < bins; b++)
|
cannam@18
|
47 chroma[t][b] /= maxchroma;
|
cannam@18
|
48 */
|
cannam@18
|
49 }
|
cannam@18
|
50 }
|
cannam@18
|
51
|
cannam@18
|
52 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
|
cannam@18
|
53 void mpeg7_constq(double** features, int nframes, int ncoeff)
|
cannam@18
|
54 {
|
cannam@18
|
55 int i, j;
|
cannam@18
|
56 double ss;
|
cannam@18
|
57 double env;
|
cannam@18
|
58 double maxenv = 0;
|
cannam@18
|
59
|
cannam@18
|
60 /* convert const-Q features to dB scale */
|
cannam@18
|
61 for (i = 0; i < nframes; i++)
|
cannam@18
|
62 for (j = 0; j < ncoeff; j++)
|
cannam@18
|
63 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
|
cannam@18
|
64
|
cannam@18
|
65 /* normalise each feature vector and add the norm as an extra feature dimension */
|
cannam@18
|
66 for (i = 0; i < nframes; i++)
|
cannam@18
|
67 {
|
cannam@18
|
68 ss = 0;
|
cannam@18
|
69 for (j = 0; j < ncoeff; j++)
|
cannam@18
|
70 ss += features[i][j] * features[i][j];
|
cannam@18
|
71 env = sqrt(ss);
|
cannam@18
|
72 for (j = 0; j < ncoeff; j++)
|
cannam@18
|
73 features[i][j] /= env;
|
cannam@18
|
74 features[i][ncoeff] = env;
|
cannam@18
|
75 if (env > maxenv)
|
cannam@18
|
76 maxenv = env;
|
cannam@18
|
77 }
|
cannam@18
|
78 /* normalise the envelopes */
|
cannam@18
|
79 for (i = 0; i < nframes; i++)
|
cannam@18
|
80 features[i][ncoeff] /= maxenv;
|
cannam@18
|
81 }
|
cannam@18
|
82
|
cannam@18
|
83 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
|
cannam@18
|
84 /* NB h is a vector in row major order, as required by cluster_melt() */
|
cannam@18
|
85 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
|
cannam@18
|
86 void create_histograms(int* x, int nx, int m, int hlen, double* h)
|
cannam@18
|
87 {
|
cannam@18
|
88 int i, j, t;
|
cannam@18
|
89 double norm;
|
cannam@41
|
90
|
cannam@41
|
91 for (i = 0; i < nx*m; i++)
|
cannam@41
|
92 h[i] = 0;
|
cannam@41
|
93
|
cannam@18
|
94 for (i = hlen/2; i < nx-hlen/2; i++)
|
cannam@18
|
95 {
|
cannam@18
|
96 for (j = 0; j < m; j++)
|
cannam@18
|
97 h[i*m+j] = 0;
|
cannam@18
|
98 for (t = i-hlen/2; t <= i+hlen/2; t++)
|
cannam@18
|
99 ++h[i*m+x[t]];
|
cannam@18
|
100 norm = 0;
|
cannam@18
|
101 for (j = 0; j < m; j++)
|
cannam@18
|
102 norm += h[i*m+j] * h[i*m+j];
|
cannam@18
|
103 for (j = 0; j < m; j++)
|
cannam@18
|
104 h[i*m+j] /= norm;
|
cannam@18
|
105 }
|
cannam@18
|
106
|
cannam@18
|
107 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
|
cannam@18
|
108 for (i = 0; i < hlen/2; i++)
|
cannam@18
|
109 for (j = 0; j < m; j++)
|
cannam@18
|
110 h[i*m+j] = h[hlen/2*m+j];
|
cannam@18
|
111 for (i = nx-hlen/2; i < nx; i++)
|
cannam@18
|
112 for (j = 0; j < m; j++)
|
cannam@18
|
113 h[i*m+j] = h[(nx-hlen/2-1)*m+j];
|
cannam@18
|
114 }
|
cannam@18
|
115
|
cannam@18
|
116 /* segment using HMM and then histogram clustering */
|
cannam@18
|
117 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
|
cannam@18
|
118 int histogram_length, int nclusters, int neighbour_limit)
|
cannam@18
|
119 {
|
cannam@18
|
120 int i, j;
|
cannam@18
|
121
|
cannam@18
|
122 /*****************************/
|
cannam@18
|
123 if (0) {
|
cannam@18
|
124 /* try just using the predominant bin number as a 'decoded state' */
|
cannam@18
|
125 nHMM_states = feature_length + 1; /* allow a 'zero' state */
|
cannam@18
|
126 double chroma_thresh = 0.05;
|
cannam@18
|
127 double maxval;
|
cannam@18
|
128 int maxbin;
|
cannam@18
|
129 for (i = 0; i < frames_read; i++)
|
cannam@18
|
130 {
|
cannam@18
|
131 maxval = 0;
|
cannam@18
|
132 for (j = 0; j < feature_length; j++)
|
cannam@18
|
133 {
|
cannam@18
|
134 if (features[i][j] > maxval)
|
cannam@18
|
135 {
|
cannam@18
|
136 maxval = features[i][j];
|
cannam@18
|
137 maxbin = j;
|
cannam@18
|
138 }
|
cannam@18
|
139 }
|
cannam@18
|
140 if (maxval > chroma_thresh)
|
cannam@18
|
141 q[i] = maxbin;
|
cannam@18
|
142 else
|
cannam@18
|
143 q[i] = feature_length;
|
cannam@18
|
144 }
|
cannam@18
|
145
|
cannam@18
|
146 }
|
cannam@18
|
147 if (1) {
|
cannam@18
|
148 /*****************************/
|
cannam@18
|
149
|
cannam@18
|
150
|
cannam@18
|
151 /* scale all the features to 'balance covariances' during HMM training */
|
cannam@18
|
152 double scale = 10;
|
cannam@18
|
153 for (i = 0; i < frames_read; i++)
|
cannam@18
|
154 for (j = 0; j < feature_length; j++)
|
cannam@18
|
155 features[i][j] *= scale;
|
cannam@18
|
156
|
cannam@18
|
157 /* train an HMM on the features */
|
cannam@18
|
158
|
cannam@18
|
159 /* create a model */
|
cannam@18
|
160 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
|
cannam@18
|
161
|
cannam@18
|
162 /* train the model */
|
cannam@18
|
163 hmm_train(features, frames_read, model);
|
cannam@18
|
164
|
cannam@18
|
165 printf("\n\nafter training:\n");
|
cannam@18
|
166 hmm_print(model);
|
cannam@18
|
167
|
cannam@18
|
168 /* decode the hidden state sequence */
|
cannam@18
|
169 viterbi_decode(features, frames_read, model, q);
|
cannam@18
|
170 hmm_close(model);
|
cannam@18
|
171
|
cannam@18
|
172 /*****************************/
|
cannam@18
|
173 }
|
cannam@18
|
174 /*****************************/
|
cannam@18
|
175
|
cannam@18
|
176
|
cannam@18
|
177 fprintf(stderr, "HMM state sequence:\n");
|
cannam@18
|
178 for (i = 0; i < frames_read; i++)
|
cannam@18
|
179 fprintf(stderr, "%d ", q[i]);
|
cannam@18
|
180 fprintf(stderr, "\n\n");
|
cannam@18
|
181
|
cannam@18
|
182 /* create histograms of states */
|
cannam@18
|
183 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
|
cannam@18
|
184 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
|
cannam@18
|
185
|
cannam@18
|
186 /* cluster the histograms */
|
cannam@18
|
187 int nbsched = 20; /* length of inverse temperature schedule */
|
cannam@18
|
188 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
|
cannam@18
|
189 double b0 = 100;
|
cannam@18
|
190 double alpha = 0.7;
|
cannam@18
|
191 bsched[0] = b0;
|
cannam@18
|
192 for (i = 1; i < nbsched; i++)
|
cannam@18
|
193 bsched[i] = alpha * bsched[i-1];
|
cannam@18
|
194 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
|
cannam@18
|
195
|
cannam@18
|
196 /* now q holds a sequence of cluster assignments */
|
cannam@18
|
197
|
cannam@18
|
198 free(h);
|
cannam@18
|
199 free(bsched);
|
cannam@18
|
200 }
|
cannam@18
|
201
|
cannam@18
|
202 /* segment constant-Q or chroma features */
|
cannam@18
|
203 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
|
cannam@18
|
204 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
|
cannam@18
|
205 {
|
cannam@18
|
206 int feature_length;
|
cannam@18
|
207 double** chroma;
|
cannam@18
|
208 int i;
|
cannam@18
|
209
|
cannam@18
|
210 if (feature_type == FEATURE_TYPE_CONSTQ)
|
cannam@18
|
211 {
|
cannam@18
|
212 fprintf(stderr, "Converting to dB and normalising...\n");
|
cannam@18
|
213
|
cannam@18
|
214 mpeg7_constq(features, frames_read, ncoeff);
|
cannam@18
|
215
|
cannam@18
|
216 fprintf(stderr, "Running PCA...\n");
|
cannam@18
|
217
|
cannam@18
|
218 /* do PCA on the features (but not the envelope) */
|
cannam@18
|
219 int ncomponents = 20;
|
cannam@18
|
220 pca_project(features, frames_read, ncoeff, ncomponents);
|
cannam@18
|
221
|
cannam@18
|
222 /* copy the envelope so that it immediatly follows the chosen components */
|
cannam@18
|
223 for (i = 0; i < frames_read; i++)
|
cannam@18
|
224 features[i][ncomponents] = features[i][ncoeff];
|
cannam@18
|
225
|
cannam@18
|
226 feature_length = ncomponents + 1;
|
cannam@18
|
227
|
cannam@18
|
228 /**************************************
|
cannam@18
|
229 //TEST
|
cannam@18
|
230 // feature file name
|
cannam@18
|
231 char* dir = "/Users/mark/documents/semma/audio/";
|
cannam@18
|
232 char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char));
|
cannam@18
|
233 strcpy(file_name, dir);
|
cannam@18
|
234 strcat(file_name, trackname);
|
cannam@18
|
235 strcat(file_name, "_features_c20r8h0.2f0.6.mat");
|
cannam@18
|
236
|
cannam@18
|
237 // get the features from Matlab from mat-file
|
cannam@18
|
238 int frames_in_file;
|
cannam@18
|
239 readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
|
cannam@18
|
240 readmatarray(file_name, 2, frames_in_file, feature_length, features);
|
cannam@18
|
241 // copy final frame to ensure that we get as many as we expected
|
cannam@18
|
242 int missing_frames = frames_read - frames_in_file;
|
cannam@18
|
243 while (missing_frames > 0)
|
cannam@18
|
244 {
|
cannam@18
|
245 for (i = 0; i < feature_length; i++)
|
cannam@18
|
246 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
|
cannam@18
|
247 --missing_frames;
|
cannam@18
|
248 }
|
cannam@18
|
249
|
cannam@18
|
250 free(file_name);
|
cannam@18
|
251 ******************************************/
|
cannam@18
|
252
|
cannam@18
|
253 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
|
cannam@18
|
254 }
|
cannam@18
|
255
|
cannam@18
|
256 if (feature_type == FEATURE_TYPE_CHROMA)
|
cannam@18
|
257 {
|
cannam@18
|
258 fprintf(stderr, "Converting to chroma features...\n");
|
cannam@18
|
259
|
cannam@18
|
260 /* convert constant-Q to normalised chroma features */
|
cannam@18
|
261 chroma = (double**) malloc(frames_read*sizeof(double*));
|
cannam@18
|
262 for (i = 0; i < frames_read; i++)
|
cannam@18
|
263 chroma[i] = (double*) malloc(bins*sizeof(double));
|
cannam@18
|
264 cq2chroma(features, frames_read, ncoeff, bins, chroma);
|
cannam@18
|
265 feature_length = bins;
|
cannam@18
|
266
|
cannam@18
|
267 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
|
cannam@18
|
268
|
cannam@18
|
269 for (i = 0; i < frames_read; i++)
|
cannam@18
|
270 free(chroma[i]);
|
cannam@18
|
271 free(chroma);
|
cannam@18
|
272 }
|
cannam@18
|
273 }
|
cannam@18
|
274
|
cannam@18
|
275
|
cannam@18
|
276
|