Daniel@0: function [net, options] = glmtrain_weighted(net, options, x, t, eso_w, alfa)
Daniel@0: %GLMTRAIN Specialised training of generalized linear model
Daniel@0: %
Daniel@0: %	Description
Daniel@0: %	NET = GLMTRAIN(NET, OPTIONS, X, T) uses the iterative reweighted
Daniel@0: %	least squares (IRLS) algorithm to set the weights in the generalized
Daniel@0: %	linear model structure NET.  This is a more efficient alternative to
Daniel@0: %	using GLMERR and GLMGRAD and a non-linear optimisation routine
Daniel@0: %	through NETOPT. Note that for linear outputs, a single pass through
Daniel@0: %	the  algorithm is all that is required, since the error function is
Daniel@0: %	quadratic in the weights.  The error function value at the final set
Daniel@0: %	of weights is returned in OPTIONS(8). Each row of X corresponds to
Daniel@0: %	one input vector and each row of T corresponds to one target vector.
Daniel@0: %
Daniel@0: %	The optional parameters have the following interpretations.
Daniel@0: %
Daniel@0: %	OPTIONS(1) is set to 1 to display error values during training. If
Daniel@0: %	OPTIONS(1) is set to 0, then only warning messages are displayed.  If
Daniel@0: %	OPTIONS(1) is -1, then nothing is displayed.
Daniel@0: %
Daniel@0: %	OPTIONS(2) is a measure of the precision required for the value of
Daniel@0: %	the weights W at the solution.
Daniel@0: %
Daniel@0: %	OPTIONS(3) is a measure of the precision required of the objective
Daniel@0: %	function at the solution.  Both this and the previous condition must
Daniel@0: %	be satisfied for termination.
Daniel@0: %
Daniel@0: %	OPTIONS(5) is set to 1 if an approximation to the Hessian (which
Daniel@0: %	assumes that all outputs are independent) is used for softmax
Daniel@0: %	outputs. With the default value of 0 the exact Hessian (which is more
Daniel@0: %	expensive to compute) is used.
Daniel@0: %
Daniel@0: %	OPTIONS(14) is the maximum number of iterations for the IRLS
Daniel@0: %	algorithm;  default 100.
Daniel@0: %
Daniel@0: %	See also
Daniel@0: %	GLM, GLMERR, GLMGRAD
Daniel@0: %
Daniel@0: 
Daniel@0: %	Copyright (c) Christopher M Bishop, Ian T Nabney (1996, 1997)
Daniel@0: 
Daniel@0: % Check arguments for consistency
Daniel@0: errstring = consist(net, 'glm', x, t);
Daniel@0: if ~errstring
Daniel@0:   error(errstring);
Daniel@0: end
Daniel@0: 
Daniel@0: if(~options(14))
Daniel@0:   options(14) = 100;
Daniel@0: end
Daniel@0: 
Daniel@0: display = options(1);
Daniel@0: 
Daniel@0: test = (options(2) | options(3));           % Do we need to test for termination?
Daniel@0: 
Daniel@0: ndata = size(x, 1);
Daniel@0: 
Daniel@0: inputs = [x ones(ndata, 1)];                % Add a column of ones for the bias 
Daniel@0: 
Daniel@0: % Use weighted iterative reweighted least squares (WIRLS)
Daniel@0: e = ones(1, net.nin+1);
Daniel@0: for n = 1:options(14)
Daniel@0:     
Daniel@0:     %switch net.actfn
Daniel@0:       switch net.outfn
Daniel@0:     case 'softmax'
Daniel@0:       if n == 1        
Daniel@0:         p = (t + (1/size(t, 2)))/2;    % Initialise model: ensure that row sum of p is one no matter
Daniel@0: 	    act = log(p./(1-p));           % how many classes there are
Daniel@0:       end
Daniel@0:       if options(5) == 1 | n == 1
Daniel@0:         link_deriv = p.*(1-p);
Daniel@0:         weights = sqrt(link_deriv);          % sqrt of weights
Daniel@0:         if (min(min(weights)) < eps)
Daniel@0:           fprintf(1, 'Warning: ill-conditioned weights in glmtrain\n')
Daniel@0:           return
Daniel@0:         end
Daniel@0:         z = act + (t-p)./link_deriv;
Daniel@0:         % Treat each output independently with relevant set of weights
Daniel@0:         for j = 1:net.nout
Daniel@0:           indep = inputs.*(weights(:,j)*e);
Daniel@0: 	      dep   = z(:,j).*weights(:,j);
Daniel@0: 	      temp  = indep\dep;
Daniel@0: 	      net.w1(:,j) = temp(1:net.nin);
Daniel@0: 	      net.b1(j)   = temp(net.nin+1);
Daniel@0:         end
Daniel@0:         [err, edata, eprior, p, act] = glmerr_weighted(net, x, t, eso_w);
Daniel@0:         if n == 1
Daniel@0:           errold = err;
Daniel@0:           wold = netpak(net);
Daniel@0:         else
Daniel@0:           w = netpak(net);
Daniel@0:         end
Daniel@0:       else
Daniel@0: 	    % Exact method of calculation after w first initialised
Daniel@0: 	    % Start by working out Hessian
Daniel@0: 	    Hessian = glmhess_weighted(net, x, t, eso_w);
Daniel@0: 	    temp = p-t;
Daniel@0: 	    for m=1:ndata,
Daniel@0: 	       temp(m,:)=eso_w(m,1)*temp(m,:);
Daniel@0: 	    end
Daniel@0: 	    gw1 = x'*(temp);
Daniel@0: 	    gb1 = sum(temp, 1);
Daniel@0: 	    gradient = [gw1(:)', gb1];
Daniel@0: 	    % Now compute modification to weights
Daniel@0: 	    deltaw = -gradient*pinv(Hessian);
Daniel@0: 	    w = wold + alfa*deltaw;
Daniel@0: 	    net = glmunpak(net, w);
Daniel@0: 	    [err, edata, eprior, p] = glmerr_weighted(net, x, t, eso_w);
Daniel@0:       end
Daniel@0:     otherwise
Daniel@0:       error(['Unknown activation function ', net.actfn]);
Daniel@0:     end    % switch' end
Daniel@0:     
Daniel@0:    if options(1)==1
Daniel@0:      fprintf(1, 'Cycle %4d Error %11.6f\n', n, err)
Daniel@0:    end
Daniel@0:    % Test for termination
Daniel@0:    % Terminate if error increases
Daniel@0:    if err >  errold
Daniel@0:      errold = err;
Daniel@0:      w = wold;
Daniel@0:      options(8) = err;
Daniel@0:      fprintf(1, 'Error has increased: terminating\n')
Daniel@0:      return;
Daniel@0:    end
Daniel@0:    if test & n > 1
Daniel@0:      if (max(abs(w - wold)) < options(2) & abs(err-errold) < options(3))
Daniel@0:        options(8) = err;
Daniel@0:        return;
Daniel@0:      else
Daniel@0:        errold = err;
Daniel@0:        wold = w;
Daniel@0:      end
Daniel@0:    end
Daniel@0: end
Daniel@0: 
Daniel@0: options(8) = err;
Daniel@0: if (options(1) > 0)
Daniel@0:   disp('Warning: Maximum number of iterations has been exceeded');
Daniel@0: end