cannam@484
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
c@243
|
2 /*
|
c@243
|
3 * cluster.c
|
c@243
|
4 * cluster_melt
|
c@243
|
5 *
|
c@243
|
6 * Created by Mark Levy on 21/02/2006.
|
c@309
|
7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
|
c@309
|
8
|
c@309
|
9 This program is free software; you can redistribute it and/or
|
c@309
|
10 modify it under the terms of the GNU General Public License as
|
c@309
|
11 published by the Free Software Foundation; either version 2 of the
|
c@309
|
12 License, or (at your option) any later version. See the file
|
c@309
|
13 COPYING included with this distribution for more information.
|
c@243
|
14 *
|
c@243
|
15 */
|
c@243
|
16
|
c@243
|
17 #include <stdlib.h>
|
c@243
|
18
|
c@243
|
19 #include "cluster_melt.h"
|
c@243
|
20
|
c@243
|
21 #define DEFAULT_LAMBDA 0.02;
|
c@243
|
22 #define DEFAULT_LIMIT 20;
|
c@243
|
23
|
c@243
|
24 double kldist(double* a, double* b, int n) {
|
cannam@480
|
25 /* NB assume that all a[i], b[i] are non-negative
|
cannam@480
|
26 because a, b represent probability distributions */
|
cannam@480
|
27 double q, d;
|
cannam@480
|
28 int i;
|
cannam@480
|
29
|
cannam@480
|
30 d = 0;
|
cannam@480
|
31 for (i = 0; i < n; i++) {
|
cannam@480
|
32 q = (a[i] + b[i]) / 2.0;
|
cannam@480
|
33 if (q > 0) {
|
cannam@480
|
34 if (a[i] > 0) {
|
cannam@480
|
35 d += a[i] * log(a[i] / q);
|
cannam@480
|
36 }
|
cannam@480
|
37 if (b[i] > 0) {
|
cannam@480
|
38 d += b[i] * log(b[i] / q);
|
cannam@480
|
39 }
|
cannam@480
|
40 }
|
cannam@480
|
41 }
|
cannam@480
|
42 return d;
|
cannam@480
|
43 }
|
c@243
|
44
|
c@243
|
45 void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, int *c) {
|
cannam@480
|
46 double lambda, sum, beta, logsumexp, maxlp;
|
cannam@480
|
47 int i, j, a, b, b0, b1, limit, /* B, */ it, maxiter, maxiter0, maxiter1;
|
cannam@480
|
48 double** cl; /* reference histograms for each cluster */
|
cannam@480
|
49 int** nc; /* neighbour counts for each histogram */
|
cannam@480
|
50 double** lp; /* soft assignment probs for each histogram */
|
cannam@480
|
51 int* oldc; /* previous hard assignments (to check convergence) */
|
cannam@480
|
52
|
cannam@480
|
53 /* NB h is passed as a 1d row major array */
|
cannam@480
|
54
|
cannam@480
|
55 /* parameter values */
|
cannam@480
|
56 lambda = DEFAULT_LAMBDA;
|
cannam@480
|
57 if (l > 0) {
|
cannam@480
|
58 limit = l;
|
cannam@480
|
59 } else {
|
cannam@480
|
60 limit = DEFAULT_LIMIT; /* use default if no valid neighbourhood limit supplied */
|
cannam@480
|
61 }
|
cannam@480
|
62
|
cannam@480
|
63 maxiter0 = 20; /* number of iterations at initial temperature */
|
cannam@480
|
64 maxiter1 = 5; /* number of iterations at subsequent temperatures */
|
cannam@480
|
65
|
cannam@480
|
66 /* allocate memory */
|
cannam@480
|
67 cl = (double**) malloc(k*sizeof(double*));
|
cannam@480
|
68 for (i= 0; i < k; i++) {
|
cannam@480
|
69 cl[i] = (double*) malloc(m*sizeof(double));
|
cannam@480
|
70 }
|
cannam@480
|
71
|
cannam@480
|
72 nc = (int**) malloc(n*sizeof(int*));
|
cannam@480
|
73 for (i= 0; i < n; i++) {
|
cannam@480
|
74 nc[i] = (int*) malloc(k*sizeof(int));
|
cannam@480
|
75 }
|
cannam@480
|
76
|
cannam@480
|
77 lp = (double**) malloc(n*sizeof(double*));
|
cannam@480
|
78 for (i= 0; i < n; i++) {
|
cannam@480
|
79 lp[i] = (double*) malloc(k*sizeof(double));
|
cannam@480
|
80 }
|
cannam@480
|
81
|
cannam@480
|
82 oldc = (int*) malloc(n * sizeof(int));
|
cannam@480
|
83
|
cannam@480
|
84 /* initialise */
|
cannam@480
|
85 for (i = 0; i < k; i++) {
|
cannam@480
|
86 sum = 0;
|
cannam@480
|
87 for (j = 0; j < m; j++) {
|
cannam@480
|
88 cl[i][j] = rand(); /* random initial reference histograms */
|
cannam@480
|
89 sum += cl[i][j] * cl[i][j];
|
cannam@480
|
90 }
|
cannam@480
|
91 sum = sqrt(sum);
|
cannam@480
|
92 for (j = 0; j < m; j++) {
|
cannam@480
|
93 cl[i][j] /= sum; /* normalise */
|
cannam@480
|
94 }
|
cannam@480
|
95 }
|
cannam@480
|
96
|
cannam@480
|
97 for (i = 0; i < n; i++) {
|
cannam@480
|
98 c[i] = 1; /* initially assign all histograms to cluster 1 */
|
cannam@480
|
99 }
|
cannam@480
|
100
|
cannam@480
|
101 for (a = 0; a < t; a++) {
|
cannam@480
|
102
|
cannam@480
|
103 beta = Bsched[a];
|
cannam@480
|
104
|
cannam@480
|
105 if (a == 0) {
|
cannam@480
|
106 maxiter = maxiter0;
|
cannam@480
|
107 } else {
|
cannam@480
|
108 maxiter = maxiter1;
|
cannam@480
|
109 }
|
cannam@480
|
110
|
cannam@480
|
111 for (it = 0; it < maxiter; it++) {
|
cannam@480
|
112
|
cannam@480
|
113 //if (it == maxiter - 1)
|
cannam@480
|
114 // mexPrintf("hasn't converged after %d iterations\n", maxiter);
|
cannam@480
|
115
|
cannam@480
|
116 for (i = 0; i < n; i++) {
|
cannam@480
|
117
|
cannam@480
|
118 /* save current hard assignments */
|
cannam@480
|
119 oldc[i] = c[i];
|
cannam@480
|
120
|
cannam@480
|
121 /* calculate soft assignment logprobs for each cluster */
|
cannam@480
|
122 sum = 0;
|
cannam@480
|
123
|
cannam@480
|
124 for (j = 0; j < k; j++) {
|
cannam@480
|
125
|
cannam@480
|
126 lp[i][ j] = -beta * kldist(cl[j], &h[i*m], m);
|
cannam@480
|
127
|
cannam@480
|
128 /* update matching neighbour counts for this histogram, based on current hard assignments */
|
cannam@480
|
129
|
cannam@480
|
130 b0 = i - limit;
|
cannam@480
|
131 if (b0 < 0) {
|
cannam@480
|
132 b0 = 0;
|
cannam@480
|
133 }
|
cannam@480
|
134 b1 = i + limit;
|
cannam@480
|
135 if (b1 >= n) {
|
cannam@480
|
136 b1 = n - 1;
|
cannam@480
|
137 }
|
cannam@480
|
138 nc[i][j] = b1 - b0 + 1; /* = B except at edges */
|
cannam@480
|
139 for (b = b0; b <= b1; b++) {
|
cannam@480
|
140 if (c[b] == j+1) {
|
cannam@480
|
141 nc[i][j]--;
|
cannam@480
|
142 }
|
cannam@480
|
143 }
|
cannam@480
|
144
|
cannam@480
|
145 sum += exp(lp[i][j]);
|
cannam@480
|
146 }
|
cannam@480
|
147
|
cannam@480
|
148 /* normalise responsibilities and add duration logprior */
|
cannam@480
|
149 logsumexp = log(sum);
|
cannam@481
|
150 for (j = 0; j < k; j++) {
|
cannam@480
|
151 lp[i][j] -= logsumexp + lambda * nc[i][j];
|
cannam@480
|
152 }
|
cannam@480
|
153 }
|
cannam@480
|
154
|
cannam@480
|
155 /* update the assignments now that we know the duration priors
|
cannam@480
|
156 based on the current assignments */
|
cannam@480
|
157 for (i = 0; i < n; i++) {
|
cannam@480
|
158 maxlp = lp[i][0];
|
cannam@480
|
159 c[i] = 1;
|
cannam@480
|
160 for (j = 1; j < k; j++) {
|
cannam@480
|
161 if (lp[i][j] > maxlp) {
|
cannam@480
|
162 maxlp = lp[i][j];
|
cannam@480
|
163 c[i] = j+1;
|
cannam@480
|
164 }
|
cannam@480
|
165 }
|
cannam@480
|
166 }
|
cannam@480
|
167
|
cannam@480
|
168 /* break if assignments haven't changed */
|
cannam@480
|
169 i = 0;
|
cannam@480
|
170 while (i < n && oldc[i] == c[i]) {
|
cannam@480
|
171 i++;
|
cannam@480
|
172 }
|
cannam@480
|
173 if (i == n) {
|
cannam@480
|
174 break;
|
cannam@480
|
175 }
|
cannam@480
|
176
|
cannam@480
|
177 /* update reference histograms now we know new responsibilities */
|
cannam@480
|
178 for (j = 0; j < k; j++) {
|
cannam@480
|
179 for (b = 0; b < m; b++) {
|
cannam@480
|
180 cl[j][b] = 0;
|
cannam@480
|
181 for (i = 0; i < n; i++) {
|
cannam@480
|
182 cl[j][b] += exp(lp[i][j]) * h[i*m+b];
|
cannam@480
|
183 }
|
cannam@480
|
184 }
|
cannam@480
|
185
|
cannam@480
|
186 sum = 0;
|
cannam@480
|
187 for (i = 0; i < n; i++) {
|
cannam@480
|
188 sum += exp(lp[i][j]);
|
cannam@480
|
189 }
|
cannam@480
|
190 for (b = 0; b < m; b++) {
|
cannam@480
|
191 cl[j][b] /= sum; /* normalise */
|
cannam@480
|
192 }
|
cannam@480
|
193 }
|
cannam@480
|
194 }
|
cannam@480
|
195 }
|
cannam@480
|
196
|
cannam@480
|
197 /* free memory */
|
cannam@480
|
198 for (i = 0; i < k; i++) {
|
cannam@480
|
199 free(cl[i]);
|
cannam@480
|
200 }
|
cannam@480
|
201 free(cl);
|
cannam@480
|
202 for (i = 0; i < n; i++) {
|
cannam@480
|
203 free(nc[i]);
|
cannam@480
|
204 }
|
cannam@480
|
205 free(nc);
|
cannam@480
|
206 for (i = 0; i < n; i++) {
|
cannam@480
|
207 free(lp[i]);
|
cannam@480
|
208 }
|
cannam@480
|
209 free(lp);
|
cannam@480
|
210 free(oldc);
|
c@243
|
211 }
|
c@243
|
212
|
c@243
|
213
|