comparison dsp/segmentation/cluster_segmenter.c @ 243:dc30e3864ceb

* merge in segmentation code from soundbite plugin/library repository
author Chris Cannam <c.cannam@qmul.ac.uk>
date Wed, 09 Jan 2008 10:46:25 +0000
parents
children 8bdbda7fb893
comparison
equal deleted inserted replaced
242:6060110dc3c6 243:dc30e3864ceb
1 /*
2 * cluster_segmenter.c
3 * soundbite
4 *
5 * Created by Mark Levy on 06/04/2006.
6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
7 *
8 */
9
10 #include "cluster_segmenter.h"
11
12 extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d);
13 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr);
14
15 /* converts constant-Q features to normalised chroma */
16 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
17 {
18 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
19 int t, b, oct, ix;
20 //double maxchroma; /* max chroma value at each time, for normalisation */
21 //double sum; /* for normalisation */
22
23 for (t = 0; t < nframes; t++)
24 {
25 for (b = 0; b < bins; b++)
26 chroma[t][b] = 0;
27 for (oct = 0; oct < noct; oct++)
28 {
29 ix = oct * bins;
30 for (b = 0; b < bins; b++)
31 chroma[t][b] += fabs(cq[t][ix+b]);
32 }
33 /* normalise to unit sum
34 sum = 0;
35 for (b = 0; b < bins; b++)
36 sum += chroma[t][b];
37 for (b = 0; b < bins; b++)
38 chroma[t][b] /= sum;
39 /* normalise to unit max - NO this made results much worse!
40 maxchroma = 0;
41 for (b = 0; b < bins; b++)
42 if (chroma[t][b] > maxchroma)
43 maxchroma = chroma[t][b];
44 if (maxchroma > 0)
45 for (b = 0; b < bins; b++)
46 chroma[t][b] /= maxchroma;
47 */
48 }
49 }
50
51 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
52 void mpeg7_constq(double** features, int nframes, int ncoeff)
53 {
54 int i, j;
55 double ss;
56 double env;
57 double maxenv = 0;
58
59 /* convert const-Q features to dB scale */
60 for (i = 0; i < nframes; i++)
61 for (j = 0; j < ncoeff; j++)
62 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
63
64 /* normalise each feature vector and add the norm as an extra feature dimension */
65 for (i = 0; i < nframes; i++)
66 {
67 ss = 0;
68 for (j = 0; j < ncoeff; j++)
69 ss += features[i][j] * features[i][j];
70 env = sqrt(ss);
71 for (j = 0; j < ncoeff; j++)
72 features[i][j] /= env;
73 features[i][ncoeff] = env;
74 if (env > maxenv)
75 maxenv = env;
76 }
77 /* normalise the envelopes */
78 for (i = 0; i < nframes; i++)
79 features[i][ncoeff] /= maxenv;
80 }
81
82 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
83 /* NB h is a vector in row major order, as required by cluster_melt() */
84 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
85 void create_histograms(int* x, int nx, int m, int hlen, double* h)
86 {
87 int i, j, t;
88 double norm;
89 for (i = hlen/2; i < nx-hlen/2; i++)
90 {
91 for (j = 0; j < m; j++)
92 h[i*m+j] = 0;
93 for (t = i-hlen/2; t <= i+hlen/2; t++)
94 ++h[i*m+x[t]];
95 norm = 0;
96 for (j = 0; j < m; j++)
97 norm += h[i*m+j] * h[i*m+j];
98 for (j = 0; j < m; j++)
99 h[i*m+j] /= norm;
100 }
101
102 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
103 for (i = 0; i < hlen/2; i++)
104 for (j = 0; j < m; j++)
105 h[i*m+j] = h[hlen/2*m+j];
106 for (i = nx-hlen/2; i < nx; i++)
107 for (j = 0; j < m; j++)
108 h[i*m+j] = h[(nx-hlen/2-1)*m+j];
109 }
110
111 /* segment using HMM and then histogram clustering */
112 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
113 int histogram_length, int nclusters, int neighbour_limit)
114 {
115 int i, j;
116
117 /*****************************/
118 if (0) {
119 /* try just using the predominant bin number as a 'decoded state' */
120 nHMM_states = feature_length + 1; /* allow a 'zero' state */
121 double chroma_thresh = 0.05;
122 double maxval;
123 int maxbin;
124 for (i = 0; i < frames_read; i++)
125 {
126 maxval = 0;
127 for (j = 0; j < feature_length; j++)
128 {
129 if (features[i][j] > maxval)
130 {
131 maxval = features[i][j];
132 maxbin = j;
133 }
134 }
135 if (maxval > chroma_thresh)
136 q[i] = maxbin;
137 else
138 q[i] = feature_length;
139 }
140
141 }
142 if (1) {
143 /*****************************/
144
145
146 /* scale all the features to 'balance covariances' during HMM training */
147 double scale = 10;
148 for (i = 0; i < frames_read; i++)
149 for (j = 0; j < feature_length; j++)
150 features[i][j] *= scale;
151
152 /* train an HMM on the features */
153
154 /* create a model */
155 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
156
157 /* train the model */
158 hmm_train(features, frames_read, model);
159
160 printf("\n\nafter training:\n");
161 hmm_print(model);
162
163 /* decode the hidden state sequence */
164 viterbi_decode(features, frames_read, model, q);
165 hmm_close(model);
166
167 /*****************************/
168 }
169 /*****************************/
170
171
172 fprintf(stderr, "HMM state sequence:\n");
173 for (i = 0; i < frames_read; i++)
174 fprintf(stderr, "%d ", q[i]);
175 fprintf(stderr, "\n\n");
176
177 /* create histograms of states */
178 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
179 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
180
181 /* cluster the histograms */
182 int nbsched = 20; /* length of inverse temperature schedule */
183 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
184 double b0 = 100;
185 double alpha = 0.7;
186 bsched[0] = b0;
187 for (i = 1; i < nbsched; i++)
188 bsched[i] = alpha * bsched[i-1];
189 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
190
191 /* now q holds a sequence of cluster assignments */
192
193 free(h);
194 free(bsched);
195 }
196
197 /* segment constant-Q or chroma features */
198 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
199 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
200 {
201 int feature_length;
202 double** chroma;
203 int i;
204
205 if (feature_type == FEATURE_TYPE_CONSTQ)
206 {
207 fprintf(stderr, "Converting to dB and normalising...\n");
208
209 mpeg7_constq(features, frames_read, ncoeff);
210
211 fprintf(stderr, "Running PCA...\n");
212
213 /* do PCA on the features (but not the envelope) */
214 int ncomponents = 20;
215 pca_project(features, frames_read, ncoeff, ncomponents);
216
217 /* copy the envelope so that it immediatly follows the chosen components */
218 for (i = 0; i < frames_read; i++)
219 features[i][ncomponents] = features[i][ncoeff];
220
221 feature_length = ncomponents + 1;
222
223 /**************************************
224 //TEST
225 // feature file name
226 char* dir = "/Users/mark/documents/semma/audio/";
227 char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char));
228 strcpy(file_name, dir);
229 strcat(file_name, trackname);
230 strcat(file_name, "_features_c20r8h0.2f0.6.mat");
231
232 // get the features from Matlab from mat-file
233 int frames_in_file;
234 readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
235 readmatarray(file_name, 2, frames_in_file, feature_length, features);
236 // copy final frame to ensure that we get as many as we expected
237 int missing_frames = frames_read - frames_in_file;
238 while (missing_frames > 0)
239 {
240 for (i = 0; i < feature_length; i++)
241 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
242 --missing_frames;
243 }
244
245 free(file_name);
246 ******************************************/
247
248 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
249 }
250
251 if (feature_type == FEATURE_TYPE_CHROMA)
252 {
253 fprintf(stderr, "Converting to chroma features...\n");
254
255 /* convert constant-Q to normalised chroma features */
256 chroma = (double**) malloc(frames_read*sizeof(double*));
257 for (i = 0; i < frames_read; i++)
258 chroma[i] = (double*) malloc(bins*sizeof(double));
259 cq2chroma(features, frames_read, ncoeff, bins, chroma);
260 feature_length = bins;
261
262 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
263
264 for (i = 0; i < frames_read; i++)
265 free(chroma[i]);
266 free(chroma);
267 }
268 }
269
270
271