cannam@18
|
1 /*
|
cannam@18
|
2 * cluster.c
|
cannam@18
|
3 * cluster_melt
|
cannam@18
|
4 *
|
cannam@18
|
5 * Created by Mark Levy on 21/02/2006.
|
cannam@18
|
6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
|
cannam@18
|
7 *
|
cannam@18
|
8 */
|
cannam@18
|
9
|
cannam@18
|
10 #include <stdlib.h>
|
cannam@18
|
11
|
cannam@18
|
12 #include "cluster_melt.h"
|
cannam@18
|
13
|
cannam@18
|
14 #define DEFAULT_LAMBDA 0.02;
|
cannam@18
|
15 #define DEFAULT_LIMIT 20;
|
cannam@18
|
16
|
cannam@18
|
17 double kldist(double* a, double* b, int n) {
|
cannam@18
|
18 /* NB assume that all a[i], b[i] are non-negative
|
cannam@18
|
19 because a, b represent probability distributions */
|
cannam@18
|
20 double q, d;
|
cannam@18
|
21 int i;
|
cannam@18
|
22
|
cannam@18
|
23 d = 0;
|
cannam@18
|
24 for (i = 0; i < n; i++)
|
cannam@18
|
25 {
|
cannam@18
|
26 q = (a[i] + b[i]) / 2.0;
|
cannam@18
|
27 if (q > 0)
|
cannam@18
|
28 {
|
cannam@18
|
29 if (a[i] > 0)
|
cannam@18
|
30 d += a[i] * log(a[i] / q);
|
cannam@18
|
31 if (b[i] > 0)
|
cannam@18
|
32 d += b[i] * log(b[i] / q);
|
cannam@18
|
33 }
|
cannam@18
|
34 }
|
cannam@18
|
35 return d;
|
cannam@18
|
36 }
|
cannam@18
|
37
|
cannam@18
|
38 void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, int *c) {
|
cannam@18
|
39 double lambda, sum, beta, logsumexp, maxlp;
|
cannam@18
|
40 int i, j, a, b, b0, b1, limit, B, it, maxiter, maxiter0, maxiter1;
|
cannam@18
|
41 double** cl; /* reference histograms for each cluster */
|
cannam@18
|
42 int** nc; /* neighbour counts for each histogram */
|
cannam@18
|
43 double** lp; /* soft assignment probs for each histogram */
|
cannam@18
|
44 int* oldc; /* previous hard assignments (to check convergence) */
|
cannam@18
|
45
|
cannam@18
|
46 /* NB h is passed as a 1d row major array */
|
cannam@18
|
47
|
cannam@18
|
48 /* parameter values */
|
cannam@18
|
49 lambda = DEFAULT_LAMBDA;
|
cannam@18
|
50 if (l > 0)
|
cannam@18
|
51 limit = l;
|
cannam@18
|
52 else
|
cannam@18
|
53 limit = DEFAULT_LIMIT; /* use default if no valid neighbourhood limit supplied */
|
cannam@18
|
54 B = 2 * limit + 1;
|
cannam@18
|
55 maxiter0 = 20; /* number of iterations at initial temperature */
|
cannam@18
|
56 maxiter1 = 5; /* number of iterations at subsequent temperatures */
|
cannam@18
|
57
|
cannam@18
|
58 /* allocate memory */
|
cannam@18
|
59 cl = (double**) malloc(k*sizeof(double*));
|
cannam@18
|
60 for (i= 0; i < k; i++)
|
cannam@18
|
61 cl[i] = (double*) malloc(m*sizeof(double));
|
cannam@18
|
62
|
cannam@18
|
63 nc = (int**) malloc(n*sizeof(int*));
|
cannam@18
|
64 for (i= 0; i < n; i++)
|
cannam@18
|
65 nc[i] = (int*) malloc(k*sizeof(int));
|
cannam@18
|
66
|
cannam@18
|
67 lp = (double**) malloc(n*sizeof(double*));
|
cannam@18
|
68 for (i= 0; i < n; i++)
|
cannam@18
|
69 lp[i] = (double*) malloc(k*sizeof(double));
|
cannam@18
|
70
|
cannam@18
|
71 oldc = (int*) malloc(n * sizeof(int));
|
cannam@18
|
72
|
cannam@18
|
73 /* initialise */
|
cannam@18
|
74 for (i = 0; i < k; i++)
|
cannam@18
|
75 {
|
cannam@18
|
76 sum = 0;
|
cannam@18
|
77 for (j = 0; j < m; j++)
|
cannam@18
|
78 {
|
cannam@18
|
79 cl[i][j] = rand(); /* random initial reference histograms */
|
cannam@18
|
80 sum += cl[i][j] * cl[i][j];
|
cannam@18
|
81 }
|
cannam@18
|
82 sum = sqrt(sum);
|
cannam@18
|
83 for (j = 0; j < m; j++)
|
cannam@18
|
84 {
|
cannam@18
|
85 cl[i][j] /= sum; /* normalise */
|
cannam@18
|
86 }
|
cannam@18
|
87 }
|
cannam@18
|
88 //print_array(cl, k, m);
|
cannam@18
|
89
|
cannam@18
|
90 for (i = 0; i < n; i++)
|
cannam@18
|
91 c[i] = 1; /* initially assign all histograms to cluster 1 */
|
cannam@18
|
92
|
cannam@18
|
93 for (a = 0; a < t; a++)
|
cannam@18
|
94 {
|
cannam@18
|
95 beta = Bsched[a];
|
cannam@18
|
96
|
cannam@18
|
97 if (a == 0)
|
cannam@18
|
98 maxiter = maxiter0;
|
cannam@18
|
99 else
|
cannam@18
|
100 maxiter = maxiter1;
|
cannam@18
|
101
|
cannam@18
|
102 for (it = 0; it < maxiter; it++)
|
cannam@18
|
103 {
|
cannam@18
|
104 //if (it == maxiter - 1)
|
cannam@18
|
105 // mexPrintf("hasn't converged after %d iterations\n", maxiter);
|
cannam@18
|
106
|
cannam@18
|
107 for (i = 0; i < n; i++)
|
cannam@18
|
108 {
|
cannam@18
|
109 /* save current hard assignments */
|
cannam@18
|
110 oldc[i] = c[i];
|
cannam@18
|
111
|
cannam@18
|
112 /* calculate soft assignment logprobs for each cluster */
|
cannam@18
|
113 sum = 0;
|
cannam@18
|
114 for (j = 0; j < k; j++)
|
cannam@18
|
115 {
|
cannam@18
|
116 lp[i][ j] = -beta * kldist(cl[j], &h[i*m], m);
|
cannam@18
|
117
|
cannam@18
|
118 /* update matching neighbour counts for this histogram, based on current hard assignments */
|
cannam@18
|
119 /* old version:
|
cannam@18
|
120 nc[i][j] = 0;
|
cannam@18
|
121 if (i >= limit && i <= n - 1 - limit)
|
cannam@18
|
122 {
|
cannam@18
|
123 for (b = i - limit; b <= i + limit; b++)
|
cannam@18
|
124 {
|
cannam@18
|
125 if (c[b] == j+1)
|
cannam@18
|
126 nc[i][j]++;
|
cannam@18
|
127 }
|
cannam@18
|
128 nc[i][j] = B - nc[i][j];
|
cannam@18
|
129 }
|
cannam@18
|
130 */
|
cannam@18
|
131 b0 = i - limit;
|
cannam@18
|
132 if (b0 < 0)
|
cannam@18
|
133 b0 = 0;
|
cannam@18
|
134 b1 = i + limit;
|
cannam@18
|
135 if (b1 >= n)
|
cannam@18
|
136 b1 = n - 1;
|
cannam@18
|
137 nc[i][j] = b1 - b0 + 1; /* = B except at edges */
|
cannam@18
|
138 for (b = b0; b <= b1; b++)
|
cannam@18
|
139 if (c[b] == j+1)
|
cannam@18
|
140 nc[i][j]--;
|
cannam@18
|
141
|
cannam@18
|
142 sum += exp(lp[i][j]);
|
cannam@18
|
143 }
|
cannam@18
|
144
|
cannam@18
|
145 /* normalise responsibilities and add duration logprior */
|
cannam@18
|
146 logsumexp = log(sum);
|
cannam@18
|
147 for (j = 0; j < k; j++)
|
cannam@18
|
148 lp[i][j] -= logsumexp + lambda * nc[i][j];
|
cannam@18
|
149 }
|
cannam@18
|
150 //print_array(lp, n, k);
|
cannam@18
|
151 /*
|
cannam@18
|
152 for (i = 0; i < n; i++)
|
cannam@18
|
153 {
|
cannam@18
|
154 for (j = 0; j < k; j++)
|
cannam@18
|
155 mexPrintf("%d ", nc[i][j]);
|
cannam@18
|
156 mexPrintf("\n");
|
cannam@18
|
157 }
|
cannam@18
|
158 */
|
cannam@18
|
159
|
cannam@18
|
160
|
cannam@18
|
161 /* update the assignments now that we know the duration priors
|
cannam@18
|
162 based on the current assignments */
|
cannam@18
|
163 for (i = 0; i < n; i++)
|
cannam@18
|
164 {
|
cannam@18
|
165 maxlp = lp[i][0];
|
cannam@18
|
166 c[i] = 1;
|
cannam@18
|
167 for (j = 1; j < k; j++)
|
cannam@18
|
168 if (lp[i][j] > maxlp)
|
cannam@18
|
169 {
|
cannam@18
|
170 maxlp = lp[i][j];
|
cannam@18
|
171 c[i] = j+1;
|
cannam@18
|
172 }
|
cannam@18
|
173 }
|
cannam@18
|
174
|
cannam@18
|
175 /* break if assignments haven't changed */
|
cannam@18
|
176 i = 0;
|
cannam@18
|
177 while (i < n && oldc[i] == c[i])
|
cannam@18
|
178 i++;
|
cannam@18
|
179 if (i == n)
|
cannam@18
|
180 break;
|
cannam@18
|
181
|
cannam@18
|
182 /* update reference histograms now we know new responsibilities */
|
cannam@18
|
183 for (j = 0; j < k; j++)
|
cannam@18
|
184 {
|
cannam@18
|
185 for (b = 0; b < m; b++)
|
cannam@18
|
186 {
|
cannam@18
|
187 cl[j][b] = 0;
|
cannam@18
|
188 for (i = 0; i < n; i++)
|
cannam@18
|
189 {
|
cannam@18
|
190 cl[j][b] += exp(lp[i][j]) * h[i*m+b];
|
cannam@18
|
191 }
|
cannam@18
|
192 }
|
cannam@18
|
193
|
cannam@18
|
194 sum = 0;
|
cannam@18
|
195 for (i = 0; i < n; i++)
|
cannam@18
|
196 sum += exp(lp[i][j]);
|
cannam@18
|
197 for (b = 0; b < m; b++)
|
cannam@18
|
198 cl[j][b] /= sum; /* normalise */
|
cannam@18
|
199 }
|
cannam@18
|
200
|
cannam@18
|
201 //print_array(cl, k, m);
|
cannam@18
|
202 //mexPrintf("\n\n");
|
cannam@18
|
203 }
|
cannam@18
|
204 }
|
cannam@18
|
205
|
cannam@18
|
206 /* free memory */
|
cannam@18
|
207 for (i = 0; i < k; i++)
|
cannam@18
|
208 free(cl[i]);
|
cannam@18
|
209 free(cl);
|
cannam@18
|
210 for (i = 0; i < n; i++)
|
cannam@18
|
211 free(nc[i]);
|
cannam@18
|
212 free(nc);
|
cannam@18
|
213 for (i = 0; i < n; i++)
|
cannam@18
|
214 free(lp[i]);
|
cannam@18
|
215 free(lp);
|
cannam@18
|
216 free(oldc);
|
cannam@18
|
217 }
|
cannam@18
|
218
|
cannam@18
|
219
|