wolffd@0: function [net, options, errlog] = gtmem(net, t, options) wolffd@0: %GTMEM EM algorithm for Generative Topographic Mapping. wolffd@0: % wolffd@0: % Description wolffd@0: % [NET, OPTIONS, ERRLOG] = GTMEM(NET, T, OPTIONS) uses the Expectation wolffd@0: % Maximization algorithm to estimate the parameters of a GTM defined by wolffd@0: % a data structure NET. The matrix T represents the data whose wolffd@0: % expectation is maximized, with each row corresponding to a vector. wolffd@0: % It is assumed that the latent data NET.X has been set following a wolffd@0: % call to GTMINIT, for example. The optional parameters have the wolffd@0: % following interpretations. wolffd@0: % wolffd@0: % OPTIONS(1) is set to 1 to display error values; also logs error wolffd@0: % values in the return argument ERRLOG. If OPTIONS(1) is set to 0, then wolffd@0: % only warning messages are displayed. If OPTIONS(1) is -1, then wolffd@0: % nothing is displayed. wolffd@0: % wolffd@0: % OPTIONS(3) is a measure of the absolute precision required of the wolffd@0: % error function at the solution. If the change in log likelihood wolffd@0: % between two steps of the EM algorithm is less than this value, then wolffd@0: % the function terminates. wolffd@0: % wolffd@0: % OPTIONS(14) is the maximum number of iterations; default 100. wolffd@0: % wolffd@0: % The optional return value OPTIONS contains the final error value wolffd@0: % (i.e. data log likelihood) in OPTIONS(8). wolffd@0: % wolffd@0: % See also wolffd@0: % GTM, GTMINIT wolffd@0: % wolffd@0: wolffd@0: % Copyright (c) Ian T Nabney (1996-2001) wolffd@0: wolffd@0: % Check that inputs are consistent wolffd@0: errstring = consist(net, 'gtm', t); wolffd@0: if ~isempty(errstring) wolffd@0: error(errstring); wolffd@0: end wolffd@0: wolffd@0: % Sort out the options wolffd@0: if (options(14)) wolffd@0: niters = options(14); wolffd@0: else wolffd@0: niters = 100; wolffd@0: end wolffd@0: wolffd@0: display = options(1); wolffd@0: store = 0; wolffd@0: if (nargout > 2) wolffd@0: store = 1; % Store the error values to return them wolffd@0: errlog = zeros(1, niters); wolffd@0: end wolffd@0: test = 0; wolffd@0: if options(3) > 0.0 wolffd@0: test = 1; % Test log likelihood for termination wolffd@0: end wolffd@0: wolffd@0: % Calculate various quantities that remain constant during training wolffd@0: [ndata, tdim] = size(t); wolffd@0: ND = ndata*tdim; wolffd@0: [net.gmmnet.centres, Phi] = rbffwd(net.rbfnet, net.X); wolffd@0: Phi = [Phi ones(size(net.X, 1), 1)]; wolffd@0: PhiT = Phi'; wolffd@0: [K, Mplus1] = size(Phi); wolffd@0: wolffd@0: A = zeros(Mplus1, Mplus1); wolffd@0: cholDcmp = zeros(Mplus1, Mplus1); wolffd@0: % Use a sparse representation for the weight regularizing matrix. wolffd@0: if (net.rbfnet.alpha > 0) wolffd@0: Alpha = net.rbfnet.alpha*speye(Mplus1); wolffd@0: Alpha(Mplus1, Mplus1) = 0; wolffd@0: end wolffd@0: wolffd@0: for n = 1:niters wolffd@0: % Calculate responsibilities wolffd@0: [R, act] = gtmpost(net, t); wolffd@0: % Calculate error value if needed wolffd@0: if (display | store | test) wolffd@0: prob = act*(net.gmmnet.priors)'; wolffd@0: % Error value is negative log likelihood of data wolffd@0: e = - sum(log(max(prob,eps))); wolffd@0: if store wolffd@0: errlog(n) = e; wolffd@0: end wolffd@0: if display > 0 wolffd@0: fprintf(1, 'Cycle %4d Error %11.6f\n', n, e); wolffd@0: end wolffd@0: if test wolffd@0: if (n > 1 & abs(e - eold) < options(3)) wolffd@0: options(8) = e; wolffd@0: return; wolffd@0: else wolffd@0: eold = e; wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % Calculate matrix be inverted (Phi'*G*Phi + alpha*I in the papers). wolffd@0: % Sparse representation of G normally executes faster and saves wolffd@0: % memory wolffd@0: if (net.rbfnet.alpha > 0) wolffd@0: A = full(PhiT*spdiags(sum(R)', 0, K, K)*Phi + ... wolffd@0: (Alpha.*net.gmmnet.covars(1))); wolffd@0: else wolffd@0: A = full(PhiT*spdiags(sum(R)', 0, K, K)*Phi); wolffd@0: end wolffd@0: % A is a symmetric matrix likely to be positive definite, so try wolffd@0: % fast Cholesky decomposition to calculate W, otherwise use SVD. wolffd@0: % (PhiT*(R*t)) is computed right-to-left, as R wolffd@0: % and t are normally (much) larger than PhiT. wolffd@0: [cholDcmp singular] = chol(A); wolffd@0: if (singular) wolffd@0: if (display) wolffd@0: fprintf(1, ... wolffd@0: 'gtmem: Warning -- M-Step matrix singular, using pinv.\n'); wolffd@0: end wolffd@0: W = pinv(A)*(PhiT*(R'*t)); wolffd@0: else wolffd@0: W = cholDcmp \ (cholDcmp' \ (PhiT*(R'*t))); wolffd@0: end wolffd@0: % Put new weights into network to calculate responsibilities wolffd@0: % net.rbfnet = netunpak(net.rbfnet, W); wolffd@0: net.rbfnet.w2 = W(1:net.rbfnet.nhidden, :); wolffd@0: net.rbfnet.b2 = W(net.rbfnet.nhidden+1, :); wolffd@0: % Calculate new distances wolffd@0: d = dist2(t, Phi*W); wolffd@0: wolffd@0: % Calculate new value for beta wolffd@0: net.gmmnet.covars = ones(1, net.gmmnet.ncentres)*(sum(sum(d.*R))/ND); wolffd@0: end wolffd@0: wolffd@0: options(8) = -sum(log(gtmprob(net, t))); wolffd@0: if (display >= 0) wolffd@0: disp(maxitmess); wolffd@0: end