Mercurial > hg > qm-dsp
comparison dsp/segmentation/cluster_segmenter.c @ 243:dc30e3864ceb
* merge in segmentation code from soundbite plugin/library repository
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Wed, 09 Jan 2008 10:46:25 +0000 |
parents | |
children | 8bdbda7fb893 |
comparison
equal
deleted
inserted
replaced
242:6060110dc3c6 | 243:dc30e3864ceb |
---|---|
1 /* | |
2 * cluster_segmenter.c | |
3 * soundbite | |
4 * | |
5 * Created by Mark Levy on 06/04/2006. | |
6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. | |
7 * | |
8 */ | |
9 | |
10 #include "cluster_segmenter.h" | |
11 | |
12 extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d); | |
13 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr); | |
14 | |
15 /* converts constant-Q features to normalised chroma */ | |
16 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma) | |
17 { | |
18 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */ | |
19 int t, b, oct, ix; | |
20 //double maxchroma; /* max chroma value at each time, for normalisation */ | |
21 //double sum; /* for normalisation */ | |
22 | |
23 for (t = 0; t < nframes; t++) | |
24 { | |
25 for (b = 0; b < bins; b++) | |
26 chroma[t][b] = 0; | |
27 for (oct = 0; oct < noct; oct++) | |
28 { | |
29 ix = oct * bins; | |
30 for (b = 0; b < bins; b++) | |
31 chroma[t][b] += fabs(cq[t][ix+b]); | |
32 } | |
33 /* normalise to unit sum | |
34 sum = 0; | |
35 for (b = 0; b < bins; b++) | |
36 sum += chroma[t][b]; | |
37 for (b = 0; b < bins; b++) | |
38 chroma[t][b] /= sum; | |
39 /* normalise to unit max - NO this made results much worse! | |
40 maxchroma = 0; | |
41 for (b = 0; b < bins; b++) | |
42 if (chroma[t][b] > maxchroma) | |
43 maxchroma = chroma[t][b]; | |
44 if (maxchroma > 0) | |
45 for (b = 0; b < bins; b++) | |
46 chroma[t][b] /= maxchroma; | |
47 */ | |
48 } | |
49 } | |
50 | |
51 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */ | |
52 void mpeg7_constq(double** features, int nframes, int ncoeff) | |
53 { | |
54 int i, j; | |
55 double ss; | |
56 double env; | |
57 double maxenv = 0; | |
58 | |
59 /* convert const-Q features to dB scale */ | |
60 for (i = 0; i < nframes; i++) | |
61 for (j = 0; j < ncoeff; j++) | |
62 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON); | |
63 | |
64 /* normalise each feature vector and add the norm as an extra feature dimension */ | |
65 for (i = 0; i < nframes; i++) | |
66 { | |
67 ss = 0; | |
68 for (j = 0; j < ncoeff; j++) | |
69 ss += features[i][j] * features[i][j]; | |
70 env = sqrt(ss); | |
71 for (j = 0; j < ncoeff; j++) | |
72 features[i][j] /= env; | |
73 features[i][ncoeff] = env; | |
74 if (env > maxenv) | |
75 maxenv = env; | |
76 } | |
77 /* normalise the envelopes */ | |
78 for (i = 0; i < nframes; i++) | |
79 features[i][ncoeff] /= maxenv; | |
80 } | |
81 | |
82 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */ | |
83 /* NB h is a vector in row major order, as required by cluster_melt() */ | |
84 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */ | |
85 void create_histograms(int* x, int nx, int m, int hlen, double* h) | |
86 { | |
87 int i, j, t; | |
88 double norm; | |
89 for (i = hlen/2; i < nx-hlen/2; i++) | |
90 { | |
91 for (j = 0; j < m; j++) | |
92 h[i*m+j] = 0; | |
93 for (t = i-hlen/2; t <= i+hlen/2; t++) | |
94 ++h[i*m+x[t]]; | |
95 norm = 0; | |
96 for (j = 0; j < m; j++) | |
97 norm += h[i*m+j] * h[i*m+j]; | |
98 for (j = 0; j < m; j++) | |
99 h[i*m+j] /= norm; | |
100 } | |
101 | |
102 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */ | |
103 for (i = 0; i < hlen/2; i++) | |
104 for (j = 0; j < m; j++) | |
105 h[i*m+j] = h[hlen/2*m+j]; | |
106 for (i = nx-hlen/2; i < nx; i++) | |
107 for (j = 0; j < m; j++) | |
108 h[i*m+j] = h[(nx-hlen/2-1)*m+j]; | |
109 } | |
110 | |
111 /* segment using HMM and then histogram clustering */ | |
112 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, | |
113 int histogram_length, int nclusters, int neighbour_limit) | |
114 { | |
115 int i, j; | |
116 | |
117 /*****************************/ | |
118 if (0) { | |
119 /* try just using the predominant bin number as a 'decoded state' */ | |
120 nHMM_states = feature_length + 1; /* allow a 'zero' state */ | |
121 double chroma_thresh = 0.05; | |
122 double maxval; | |
123 int maxbin; | |
124 for (i = 0; i < frames_read; i++) | |
125 { | |
126 maxval = 0; | |
127 for (j = 0; j < feature_length; j++) | |
128 { | |
129 if (features[i][j] > maxval) | |
130 { | |
131 maxval = features[i][j]; | |
132 maxbin = j; | |
133 } | |
134 } | |
135 if (maxval > chroma_thresh) | |
136 q[i] = maxbin; | |
137 else | |
138 q[i] = feature_length; | |
139 } | |
140 | |
141 } | |
142 if (1) { | |
143 /*****************************/ | |
144 | |
145 | |
146 /* scale all the features to 'balance covariances' during HMM training */ | |
147 double scale = 10; | |
148 for (i = 0; i < frames_read; i++) | |
149 for (j = 0; j < feature_length; j++) | |
150 features[i][j] *= scale; | |
151 | |
152 /* train an HMM on the features */ | |
153 | |
154 /* create a model */ | |
155 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states); | |
156 | |
157 /* train the model */ | |
158 hmm_train(features, frames_read, model); | |
159 | |
160 printf("\n\nafter training:\n"); | |
161 hmm_print(model); | |
162 | |
163 /* decode the hidden state sequence */ | |
164 viterbi_decode(features, frames_read, model, q); | |
165 hmm_close(model); | |
166 | |
167 /*****************************/ | |
168 } | |
169 /*****************************/ | |
170 | |
171 | |
172 fprintf(stderr, "HMM state sequence:\n"); | |
173 for (i = 0; i < frames_read; i++) | |
174 fprintf(stderr, "%d ", q[i]); | |
175 fprintf(stderr, "\n\n"); | |
176 | |
177 /* create histograms of states */ | |
178 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */ | |
179 create_histograms(q, frames_read, nHMM_states, histogram_length, h); | |
180 | |
181 /* cluster the histograms */ | |
182 int nbsched = 20; /* length of inverse temperature schedule */ | |
183 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */ | |
184 double b0 = 100; | |
185 double alpha = 0.7; | |
186 bsched[0] = b0; | |
187 for (i = 1; i < nbsched; i++) | |
188 bsched[i] = alpha * bsched[i-1]; | |
189 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q); | |
190 | |
191 /* now q holds a sequence of cluster assignments */ | |
192 | |
193 free(h); | |
194 free(bsched); | |
195 } | |
196 | |
197 /* segment constant-Q or chroma features */ | |
198 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, | |
199 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit) | |
200 { | |
201 int feature_length; | |
202 double** chroma; | |
203 int i; | |
204 | |
205 if (feature_type == FEATURE_TYPE_CONSTQ) | |
206 { | |
207 fprintf(stderr, "Converting to dB and normalising...\n"); | |
208 | |
209 mpeg7_constq(features, frames_read, ncoeff); | |
210 | |
211 fprintf(stderr, "Running PCA...\n"); | |
212 | |
213 /* do PCA on the features (but not the envelope) */ | |
214 int ncomponents = 20; | |
215 pca_project(features, frames_read, ncoeff, ncomponents); | |
216 | |
217 /* copy the envelope so that it immediatly follows the chosen components */ | |
218 for (i = 0; i < frames_read; i++) | |
219 features[i][ncomponents] = features[i][ncoeff]; | |
220 | |
221 feature_length = ncomponents + 1; | |
222 | |
223 /************************************** | |
224 //TEST | |
225 // feature file name | |
226 char* dir = "/Users/mark/documents/semma/audio/"; | |
227 char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char)); | |
228 strcpy(file_name, dir); | |
229 strcat(file_name, trackname); | |
230 strcat(file_name, "_features_c20r8h0.2f0.6.mat"); | |
231 | |
232 // get the features from Matlab from mat-file | |
233 int frames_in_file; | |
234 readmatarray_size(file_name, 2, &frames_in_file, &feature_length); | |
235 readmatarray(file_name, 2, frames_in_file, feature_length, features); | |
236 // copy final frame to ensure that we get as many as we expected | |
237 int missing_frames = frames_read - frames_in_file; | |
238 while (missing_frames > 0) | |
239 { | |
240 for (i = 0; i < feature_length; i++) | |
241 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i]; | |
242 --missing_frames; | |
243 } | |
244 | |
245 free(file_name); | |
246 ******************************************/ | |
247 | |
248 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); | |
249 } | |
250 | |
251 if (feature_type == FEATURE_TYPE_CHROMA) | |
252 { | |
253 fprintf(stderr, "Converting to chroma features...\n"); | |
254 | |
255 /* convert constant-Q to normalised chroma features */ | |
256 chroma = (double**) malloc(frames_read*sizeof(double*)); | |
257 for (i = 0; i < frames_read; i++) | |
258 chroma[i] = (double*) malloc(bins*sizeof(double)); | |
259 cq2chroma(features, frames_read, ncoeff, bins, chroma); | |
260 feature_length = bins; | |
261 | |
262 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); | |
263 | |
264 for (i = 0; i < frames_read; i++) | |
265 free(chroma[i]); | |
266 free(chroma); | |
267 } | |
268 } | |
269 | |
270 | |
271 |