wolffd@0
|
1 function cwr = cwr_em(X, Y, nc, varargin)
|
wolffd@0
|
2 % CWR_LEARN Fit the parameters of a cluster weighted regression model using EM
|
wolffd@0
|
3 % function cwr = cwr_learn(X, Y, ...)
|
wolffd@0
|
4 %
|
wolffd@0
|
5 % X(:, t) is the t'th input example
|
wolffd@0
|
6 % Y(:, t) is the t'th output example
|
wolffd@0
|
7 % nc is the number of clusters
|
wolffd@0
|
8 %
|
wolffd@0
|
9 % Kevin Murphy, May 2003
|
wolffd@0
|
10
|
wolffd@0
|
11 [max_iter, thresh, cov_typeX, cov_typeY, clamp_weights, ...
|
wolffd@0
|
12 muX, muY, SigmaX, SigmaY, weightsY, priorC, create_init_params, ...
|
wolffd@0
|
13 cov_priorX, cov_priorY, verbose, regress, clamp_covX, clamp_covY] = process_options(...
|
wolffd@0
|
14 varargin, 'max_iter', 10, 'thresh', 1e-2, 'cov_typeX', 'full', ...
|
wolffd@0
|
15 'cov_typeY', 'full', 'clamp_weights', 0, ...
|
wolffd@0
|
16 'muX', [], 'muY', [], 'SigmaX', [], 'SigmaY', [], 'weightsY', [], 'priorC', [], ...
|
wolffd@0
|
17 'create_init_params', 1, 'cov_priorX', [], 'cov_priorY', [], 'verbose', 0, ...
|
wolffd@0
|
18 'regress', 1, 'clamp_covX', 0, 'clamp_covY', 0);
|
wolffd@0
|
19
|
wolffd@0
|
20 [nx N] = size(X);
|
wolffd@0
|
21 [ny N2] = size(Y);
|
wolffd@0
|
22 if N ~= N2
|
wolffd@0
|
23 error(sprintf('nsamples X (%d) ~= nsamples Y (%d)', N, N2));
|
wolffd@0
|
24 end
|
wolffd@0
|
25 %if N < nx
|
wolffd@0
|
26 % fprintf('cwr_em warning: dim X (%d) > nsamples X (%d)\n', nx, N);
|
wolffd@0
|
27 %end
|
wolffd@0
|
28 if (N < nx) & regress
|
wolffd@0
|
29 fprintf('cwr_em warning: dim X = %d, nsamples X = %d\n', nx, N);
|
wolffd@0
|
30 end
|
wolffd@0
|
31 if (N < ny)
|
wolffd@0
|
32 fprintf('cwr_em warning: dim Y = %d, nsamples Y = %d\n', ny, N);
|
wolffd@0
|
33 end
|
wolffd@0
|
34 if (nc > N)
|
wolffd@0
|
35 error(sprintf('cwr_em: more centers (%d) than data', nc))
|
wolffd@0
|
36 end
|
wolffd@0
|
37
|
wolffd@0
|
38 if nc==1
|
wolffd@0
|
39 % No latent variable, so there is a closed-form solution
|
wolffd@0
|
40 w = 1/N;
|
wolffd@0
|
41 WYbig = Y*w;
|
wolffd@0
|
42 WYY = WYbig * Y';
|
wolffd@0
|
43 WY = sum(WYbig, 2);
|
wolffd@0
|
44 WYTY = sum(diag(WYbig' * Y));
|
wolffd@0
|
45 cwr.priorC = 1;
|
wolffd@0
|
46 cwr.SigmaX = [];
|
wolffd@0
|
47 if ~regress
|
wolffd@0
|
48 % This is just fitting an unconditional Gaussian
|
wolffd@0
|
49 cwr.weightsY = [];
|
wolffd@0
|
50 [cwr.muY, cwr.SigmaY] = ...
|
wolffd@0
|
51 mixgauss_Mstep(1, WY, WYY, WYTY, ...
|
wolffd@0
|
52 'cov_type', cov_typeY, 'cov_prior', cov_priorY);
|
wolffd@0
|
53 % There is a much easier way...
|
wolffd@0
|
54 assert(approxeq(cwr.muY, mean(Y')))
|
wolffd@0
|
55 assert(approxeq(cwr.SigmaY, cov(Y') + 0.01*eye(ny)))
|
wolffd@0
|
56 else
|
wolffd@0
|
57 % This is just linear regression
|
wolffd@0
|
58 WXbig = X*w;
|
wolffd@0
|
59 WXX = WXbig * X';
|
wolffd@0
|
60 WX = sum(WXbig, 2);
|
wolffd@0
|
61 WXTX = sum(diag(WXbig' * X));
|
wolffd@0
|
62 WXY = WXbig * Y';
|
wolffd@0
|
63 [cwr.muY, cwr.SigmaY, cwr.weightsY] = ...
|
wolffd@0
|
64 clg_Mstep(1, WY, WYY, WYTY, WX, WXX, WXY, ...
|
wolffd@0
|
65 'cov_type', cov_typeY, 'cov_prior', cov_priorY);
|
wolffd@0
|
66 end
|
wolffd@0
|
67 if clamp_covY, cwr.SigmaY = SigmaY; end
|
wolffd@0
|
68 if clamp_weights, cwr.weightsY = weightsY; end
|
wolffd@0
|
69 return;
|
wolffd@0
|
70 end
|
wolffd@0
|
71
|
wolffd@0
|
72
|
wolffd@0
|
73 if create_init_params
|
wolffd@0
|
74 [cwr.muX, cwr.SigmaX] = mixgauss_init(nc, X, cov_typeX);
|
wolffd@0
|
75 [cwr.muY, cwr.SigmaY] = mixgauss_init(nc, Y, cov_typeY);
|
wolffd@0
|
76 cwr.weightsY = zeros(ny, nx, nc);
|
wolffd@0
|
77 cwr.priorC = normalize(ones(nc,1));
|
wolffd@0
|
78 else
|
wolffd@0
|
79 cwr.muX = muX; cwr.muY = muY; cwr.SigmaX = SigmaX; cwr.SigmaY = SigmaY;
|
wolffd@0
|
80 cwr.weightsY = weightsY; cwr.priorC = priorC;
|
wolffd@0
|
81 end
|
wolffd@0
|
82
|
wolffd@0
|
83
|
wolffd@0
|
84 if clamp_covY, cwr.SigmaY = SigmaY; end
|
wolffd@0
|
85 if clamp_covX, cwr.SigmaX = SigmaX; end
|
wolffd@0
|
86 if clamp_weights, cwr.weightsY = weightsY; end
|
wolffd@0
|
87
|
wolffd@0
|
88 previous_loglik = -inf;
|
wolffd@0
|
89 num_iter = 1;
|
wolffd@0
|
90 converged = 0;
|
wolffd@0
|
91
|
wolffd@0
|
92 while (num_iter <= max_iter) & ~converged
|
wolffd@0
|
93
|
wolffd@0
|
94 % E step
|
wolffd@0
|
95
|
wolffd@0
|
96 [likXandY, likYgivenX, post] = cwr_prob(cwr, X, Y);
|
wolffd@0
|
97 loglik = sum(log(likXandY));
|
wolffd@0
|
98 % extract expected sufficient statistics
|
wolffd@0
|
99 w = sum(post,2); % post(c,t)
|
wolffd@0
|
100 WYY = zeros(ny, ny, nc);
|
wolffd@0
|
101 WY = zeros(ny, nc);
|
wolffd@0
|
102 WYTY = zeros(nc,1);
|
wolffd@0
|
103
|
wolffd@0
|
104 WXX = zeros(nx, nx, nc);
|
wolffd@0
|
105 WX = zeros(nx, nc);
|
wolffd@0
|
106 WXTX = zeros(nc, 1);
|
wolffd@0
|
107 WXY = zeros(nx,ny,nc);
|
wolffd@0
|
108 %WYY = repmat(reshape(w, [1 1 nc]), [ny ny 1]) .* repmat(Y*Y', [1 1 nc]);
|
wolffd@0
|
109 for c=1:nc
|
wolffd@0
|
110 weights = repmat(post(c,:), ny, 1);
|
wolffd@0
|
111 WYbig = Y .* weights;
|
wolffd@0
|
112 WYY(:,:,c) = WYbig * Y';
|
wolffd@0
|
113 WY(:,c) = sum(WYbig, 2);
|
wolffd@0
|
114 WYTY(c) = sum(diag(WYbig' * Y));
|
wolffd@0
|
115
|
wolffd@0
|
116 weights = repmat(post(c,:), nx, 1); % weights(nx, nsamples)
|
wolffd@0
|
117 WXbig = X .* weights;
|
wolffd@0
|
118 WXX(:,:,c) = WXbig * X';
|
wolffd@0
|
119 WX(:,c) = sum(WXbig, 2);
|
wolffd@0
|
120 WXTX(c) = sum(diag(WXbig' * X));
|
wolffd@0
|
121 WXY(:,:,c) = WXbig * Y';
|
wolffd@0
|
122 end
|
wolffd@0
|
123
|
wolffd@0
|
124 % M step
|
wolffd@0
|
125 % Q -> X is called Q->Y in Mstep_clg
|
wolffd@0
|
126 [cwr.muX, cwr.SigmaX] = mixgauss_Mstep(w, WX, WXX, WXTX, ...
|
wolffd@0
|
127 'cov_type', cov_typeX, 'cov_prior', cov_priorX);
|
wolffd@0
|
128 for c=1:nc
|
wolffd@0
|
129 assert(is_psd(cwr.SigmaX(:,:,c)))
|
wolffd@0
|
130 end
|
wolffd@0
|
131
|
wolffd@0
|
132 if clamp_weights % affects estimate of mu and Sigma
|
wolffd@0
|
133 W = cwr.weightsY;
|
wolffd@0
|
134 else
|
wolffd@0
|
135 W = [];
|
wolffd@0
|
136 end
|
wolffd@0
|
137 [cwr.muY, cwr.SigmaY, cwr.weightsY] = ...
|
wolffd@0
|
138 clg_Mstep(w, WY, WYY, WYTY, WX, WXX, WXY, ...
|
wolffd@0
|
139 'cov_type', cov_typeY, 'clamped_weights', W, ...
|
wolffd@0
|
140 'cov_prior', cov_priorY);
|
wolffd@0
|
141 %'xs', X, 'ys', Y, 'post', post); % debug
|
wolffd@0
|
142 %a = linspace(min(Y(2,:)), max(Y(2,:)), nc+2);
|
wolffd@0
|
143 %cwr.muY(2,:) = a(2:end-1);
|
wolffd@0
|
144
|
wolffd@0
|
145 cwr.priorC = normalize(w);
|
wolffd@0
|
146
|
wolffd@0
|
147 for c=1:nc
|
wolffd@0
|
148 assert(is_psd(cwr.SigmaY(:,:,c)))
|
wolffd@0
|
149 end
|
wolffd@0
|
150
|
wolffd@0
|
151 if clamp_covY, cwr.SigmaY = SigmaY; end
|
wolffd@0
|
152 if clamp_covX, cwr.SigmaX = SigmaX; end
|
wolffd@0
|
153 if clamp_weights, cwr.weightsY = weightsY; end
|
wolffd@0
|
154
|
wolffd@0
|
155 if verbose, fprintf(1, 'iteration %d, loglik = %f\n', num_iter, loglik); end
|
wolffd@0
|
156 num_iter = num_iter + 1;
|
wolffd@0
|
157 converged = em_converged(loglik, previous_loglik, thresh);
|
wolffd@0
|
158 previous_loglik = loglik;
|
wolffd@0
|
159
|
wolffd@0
|
160 end
|
wolffd@0
|
161
|