wolffd@0: function [net, options, errlog] = gtmem(net, t, options)
wolffd@0: %GTMEM	EM algorithm for Generative Topographic Mapping.
wolffd@0: %
wolffd@0: %	Description
wolffd@0: %	[NET, OPTIONS, ERRLOG] = GTMEM(NET, T, OPTIONS) uses the Expectation
wolffd@0: %	Maximization algorithm to estimate the parameters of a GTM defined by
wolffd@0: %	a data structure NET. The matrix T represents the data whose
wolffd@0: %	expectation is maximized, with each row corresponding to a vector.
wolffd@0: %	It is assumed that the latent data NET.X has been set following a
wolffd@0: %	call to GTMINIT, for example.    The optional parameters have the
wolffd@0: %	following interpretations.
wolffd@0: %
wolffd@0: %	OPTIONS(1) is set to 1 to display error values; also logs error
wolffd@0: %	values in the return argument ERRLOG. If OPTIONS(1) is set to 0, then
wolffd@0: %	only warning messages are displayed.  If OPTIONS(1) is -1, then
wolffd@0: %	nothing is displayed.
wolffd@0: %
wolffd@0: %	OPTIONS(3) is a measure of the absolute precision required of the
wolffd@0: %	error function at the solution. If the change in log likelihood
wolffd@0: %	between two steps of the EM algorithm is less than this value, then
wolffd@0: %	the function terminates.
wolffd@0: %
wolffd@0: %	OPTIONS(14) is the maximum number of iterations; default 100.
wolffd@0: %
wolffd@0: %	The optional return value OPTIONS contains the final error value
wolffd@0: %	(i.e. data log likelihood) in OPTIONS(8).
wolffd@0: %
wolffd@0: %	See also
wolffd@0: %	GTM, GTMINIT
wolffd@0: %
wolffd@0: 
wolffd@0: %	Copyright (c) Ian T Nabney (1996-2001)
wolffd@0: 
wolffd@0: % Check that inputs are consistent
wolffd@0: errstring = consist(net, 'gtm', t);
wolffd@0: if ~isempty(errstring)
wolffd@0:   error(errstring);
wolffd@0: end
wolffd@0: 
wolffd@0: % Sort out the options
wolffd@0: if (options(14))
wolffd@0:   niters = options(14);
wolffd@0: else
wolffd@0:   niters = 100;
wolffd@0: end
wolffd@0: 
wolffd@0: display = options(1);
wolffd@0: store = 0;
wolffd@0: if (nargout > 2)
wolffd@0:   store = 1;	% Store the error values to return them
wolffd@0:   errlog = zeros(1, niters);
wolffd@0: end
wolffd@0: test = 0;
wolffd@0: if options(3) > 0.0
wolffd@0:   test = 1;	% Test log likelihood for termination
wolffd@0: end
wolffd@0: 
wolffd@0: % Calculate various quantities that remain constant during training
wolffd@0: [ndata, tdim] = size(t);
wolffd@0: ND = ndata*tdim;
wolffd@0: [net.gmmnet.centres, Phi] = rbffwd(net.rbfnet, net.X);
wolffd@0: Phi = [Phi ones(size(net.X, 1), 1)];
wolffd@0: PhiT = Phi';
wolffd@0: [K, Mplus1] = size(Phi);
wolffd@0: 
wolffd@0: A = zeros(Mplus1, Mplus1);
wolffd@0: cholDcmp = zeros(Mplus1, Mplus1);
wolffd@0: % Use a sparse representation for the weight regularizing matrix.
wolffd@0: if (net.rbfnet.alpha > 0)
wolffd@0:   Alpha = net.rbfnet.alpha*speye(Mplus1);
wolffd@0:   Alpha(Mplus1, Mplus1) = 0;
wolffd@0: end 
wolffd@0: 
wolffd@0: for n = 1:niters
wolffd@0:    % Calculate responsibilities
wolffd@0:    [R, act] = gtmpost(net, t);
wolffd@0:      % Calculate error value if needed
wolffd@0:    if (display | store | test)
wolffd@0:       prob = act*(net.gmmnet.priors)';
wolffd@0:       % Error value is negative log likelihood of data
wolffd@0:       e = - sum(log(max(prob,eps)));
wolffd@0:       if store
wolffd@0:          errlog(n) = e;
wolffd@0:       end
wolffd@0:       if display > 0
wolffd@0:          fprintf(1, 'Cycle %4d  Error %11.6f\n', n, e);
wolffd@0:       end
wolffd@0:       if test
wolffd@0:          if (n > 1 & abs(e - eold) < options(3))
wolffd@0:             options(8) = e;
wolffd@0:             return;
wolffd@0:          else
wolffd@0:             eold = e;
wolffd@0:          end
wolffd@0:       end
wolffd@0:    end
wolffd@0: 
wolffd@0:    % Calculate matrix be inverted (Phi'*G*Phi + alpha*I in the papers).
wolffd@0:    % Sparse representation of G normally executes faster and saves
wolffd@0:    % memory
wolffd@0:    if (net.rbfnet.alpha > 0)
wolffd@0:       A = full(PhiT*spdiags(sum(R)', 0, K, K)*Phi + ...
wolffd@0:          (Alpha.*net.gmmnet.covars(1)));
wolffd@0:    else
wolffd@0:       A = full(PhiT*spdiags(sum(R)', 0, K, K)*Phi);
wolffd@0:    end
wolffd@0:    % A is a symmetric matrix likely to be positive definite, so try
wolffd@0:    % fast Cholesky decomposition to calculate W, otherwise use SVD.
wolffd@0:    % (PhiT*(R*t)) is computed right-to-left, as R
wolffd@0:    % and t are normally (much) larger than PhiT.
wolffd@0:    [cholDcmp singular] = chol(A);
wolffd@0:    if (singular)
wolffd@0:       if (display)
wolffd@0:          fprintf(1, ...
wolffd@0:             'gtmem: Warning -- M-Step matrix singular, using pinv.\n');
wolffd@0:       end
wolffd@0:       W = pinv(A)*(PhiT*(R'*t));
wolffd@0:    else
wolffd@0:       W = cholDcmp \ (cholDcmp' \ (PhiT*(R'*t)));
wolffd@0:    end
wolffd@0:    % Put new weights into network to calculate responsibilities
wolffd@0:    % net.rbfnet = netunpak(net.rbfnet, W);
wolffd@0:    net.rbfnet.w2 = W(1:net.rbfnet.nhidden, :);
wolffd@0:    net.rbfnet.b2 = W(net.rbfnet.nhidden+1, :);
wolffd@0:    % Calculate new distances
wolffd@0:    d = dist2(t, Phi*W);
wolffd@0:    
wolffd@0:    % Calculate new value for beta
wolffd@0:    net.gmmnet.covars = ones(1, net.gmmnet.ncentres)*(sum(sum(d.*R))/ND);
wolffd@0: end
wolffd@0: 
wolffd@0: options(8) = -sum(log(gtmprob(net, t)));
wolffd@0: if (display >= 0)
wolffd@0:   disp(maxitmess);
wolffd@0: end