wolffd@0
|
1 function [net, options, errlog] = gtmem(net, t, options)
|
wolffd@0
|
2 %GTMEM EM algorithm for Generative Topographic Mapping.
|
wolffd@0
|
3 %
|
wolffd@0
|
4 % Description
|
wolffd@0
|
5 % [NET, OPTIONS, ERRLOG] = GTMEM(NET, T, OPTIONS) uses the Expectation
|
wolffd@0
|
6 % Maximization algorithm to estimate the parameters of a GTM defined by
|
wolffd@0
|
7 % a data structure NET. The matrix T represents the data whose
|
wolffd@0
|
8 % expectation is maximized, with each row corresponding to a vector.
|
wolffd@0
|
9 % It is assumed that the latent data NET.X has been set following a
|
wolffd@0
|
10 % call to GTMINIT, for example. The optional parameters have the
|
wolffd@0
|
11 % following interpretations.
|
wolffd@0
|
12 %
|
wolffd@0
|
13 % OPTIONS(1) is set to 1 to display error values; also logs error
|
wolffd@0
|
14 % values in the return argument ERRLOG. If OPTIONS(1) is set to 0, then
|
wolffd@0
|
15 % only warning messages are displayed. If OPTIONS(1) is -1, then
|
wolffd@0
|
16 % nothing is displayed.
|
wolffd@0
|
17 %
|
wolffd@0
|
18 % OPTIONS(3) is a measure of the absolute precision required of the
|
wolffd@0
|
19 % error function at the solution. If the change in log likelihood
|
wolffd@0
|
20 % between two steps of the EM algorithm is less than this value, then
|
wolffd@0
|
21 % the function terminates.
|
wolffd@0
|
22 %
|
wolffd@0
|
23 % OPTIONS(14) is the maximum number of iterations; default 100.
|
wolffd@0
|
24 %
|
wolffd@0
|
25 % The optional return value OPTIONS contains the final error value
|
wolffd@0
|
26 % (i.e. data log likelihood) in OPTIONS(8).
|
wolffd@0
|
27 %
|
wolffd@0
|
28 % See also
|
wolffd@0
|
29 % GTM, GTMINIT
|
wolffd@0
|
30 %
|
wolffd@0
|
31
|
wolffd@0
|
32 % Copyright (c) Ian T Nabney (1996-2001)
|
wolffd@0
|
33
|
wolffd@0
|
34 % Check that inputs are consistent
|
wolffd@0
|
35 errstring = consist(net, 'gtm', t);
|
wolffd@0
|
36 if ~isempty(errstring)
|
wolffd@0
|
37 error(errstring);
|
wolffd@0
|
38 end
|
wolffd@0
|
39
|
wolffd@0
|
40 % Sort out the options
|
wolffd@0
|
41 if (options(14))
|
wolffd@0
|
42 niters = options(14);
|
wolffd@0
|
43 else
|
wolffd@0
|
44 niters = 100;
|
wolffd@0
|
45 end
|
wolffd@0
|
46
|
wolffd@0
|
47 display = options(1);
|
wolffd@0
|
48 store = 0;
|
wolffd@0
|
49 if (nargout > 2)
|
wolffd@0
|
50 store = 1; % Store the error values to return them
|
wolffd@0
|
51 errlog = zeros(1, niters);
|
wolffd@0
|
52 end
|
wolffd@0
|
53 test = 0;
|
wolffd@0
|
54 if options(3) > 0.0
|
wolffd@0
|
55 test = 1; % Test log likelihood for termination
|
wolffd@0
|
56 end
|
wolffd@0
|
57
|
wolffd@0
|
58 % Calculate various quantities that remain constant during training
|
wolffd@0
|
59 [ndata, tdim] = size(t);
|
wolffd@0
|
60 ND = ndata*tdim;
|
wolffd@0
|
61 [net.gmmnet.centres, Phi] = rbffwd(net.rbfnet, net.X);
|
wolffd@0
|
62 Phi = [Phi ones(size(net.X, 1), 1)];
|
wolffd@0
|
63 PhiT = Phi';
|
wolffd@0
|
64 [K, Mplus1] = size(Phi);
|
wolffd@0
|
65
|
wolffd@0
|
66 A = zeros(Mplus1, Mplus1);
|
wolffd@0
|
67 cholDcmp = zeros(Mplus1, Mplus1);
|
wolffd@0
|
68 % Use a sparse representation for the weight regularizing matrix.
|
wolffd@0
|
69 if (net.rbfnet.alpha > 0)
|
wolffd@0
|
70 Alpha = net.rbfnet.alpha*speye(Mplus1);
|
wolffd@0
|
71 Alpha(Mplus1, Mplus1) = 0;
|
wolffd@0
|
72 end
|
wolffd@0
|
73
|
wolffd@0
|
74 for n = 1:niters
|
wolffd@0
|
75 % Calculate responsibilities
|
wolffd@0
|
76 [R, act] = gtmpost(net, t);
|
wolffd@0
|
77 % Calculate error value if needed
|
wolffd@0
|
78 if (display | store | test)
|
wolffd@0
|
79 prob = act*(net.gmmnet.priors)';
|
wolffd@0
|
80 % Error value is negative log likelihood of data
|
wolffd@0
|
81 e = - sum(log(max(prob,eps)));
|
wolffd@0
|
82 if store
|
wolffd@0
|
83 errlog(n) = e;
|
wolffd@0
|
84 end
|
wolffd@0
|
85 if display > 0
|
wolffd@0
|
86 fprintf(1, 'Cycle %4d Error %11.6f\n', n, e);
|
wolffd@0
|
87 end
|
wolffd@0
|
88 if test
|
wolffd@0
|
89 if (n > 1 & abs(e - eold) < options(3))
|
wolffd@0
|
90 options(8) = e;
|
wolffd@0
|
91 return;
|
wolffd@0
|
92 else
|
wolffd@0
|
93 eold = e;
|
wolffd@0
|
94 end
|
wolffd@0
|
95 end
|
wolffd@0
|
96 end
|
wolffd@0
|
97
|
wolffd@0
|
98 % Calculate matrix be inverted (Phi'*G*Phi + alpha*I in the papers).
|
wolffd@0
|
99 % Sparse representation of G normally executes faster and saves
|
wolffd@0
|
100 % memory
|
wolffd@0
|
101 if (net.rbfnet.alpha > 0)
|
wolffd@0
|
102 A = full(PhiT*spdiags(sum(R)', 0, K, K)*Phi + ...
|
wolffd@0
|
103 (Alpha.*net.gmmnet.covars(1)));
|
wolffd@0
|
104 else
|
wolffd@0
|
105 A = full(PhiT*spdiags(sum(R)', 0, K, K)*Phi);
|
wolffd@0
|
106 end
|
wolffd@0
|
107 % A is a symmetric matrix likely to be positive definite, so try
|
wolffd@0
|
108 % fast Cholesky decomposition to calculate W, otherwise use SVD.
|
wolffd@0
|
109 % (PhiT*(R*t)) is computed right-to-left, as R
|
wolffd@0
|
110 % and t are normally (much) larger than PhiT.
|
wolffd@0
|
111 [cholDcmp singular] = chol(A);
|
wolffd@0
|
112 if (singular)
|
wolffd@0
|
113 if (display)
|
wolffd@0
|
114 fprintf(1, ...
|
wolffd@0
|
115 'gtmem: Warning -- M-Step matrix singular, using pinv.\n');
|
wolffd@0
|
116 end
|
wolffd@0
|
117 W = pinv(A)*(PhiT*(R'*t));
|
wolffd@0
|
118 else
|
wolffd@0
|
119 W = cholDcmp \ (cholDcmp' \ (PhiT*(R'*t)));
|
wolffd@0
|
120 end
|
wolffd@0
|
121 % Put new weights into network to calculate responsibilities
|
wolffd@0
|
122 % net.rbfnet = netunpak(net.rbfnet, W);
|
wolffd@0
|
123 net.rbfnet.w2 = W(1:net.rbfnet.nhidden, :);
|
wolffd@0
|
124 net.rbfnet.b2 = W(net.rbfnet.nhidden+1, :);
|
wolffd@0
|
125 % Calculate new distances
|
wolffd@0
|
126 d = dist2(t, Phi*W);
|
wolffd@0
|
127
|
wolffd@0
|
128 % Calculate new value for beta
|
wolffd@0
|
129 net.gmmnet.covars = ones(1, net.gmmnet.ncentres)*(sum(sum(d.*R))/ND);
|
wolffd@0
|
130 end
|
wolffd@0
|
131
|
wolffd@0
|
132 options(8) = -sum(log(gtmprob(net, t)));
|
wolffd@0
|
133 if (display >= 0)
|
wolffd@0
|
134 disp(maxitmess);
|
wolffd@0
|
135 end
|