annotate toolboxes/FullBNT-1.0.7/netlabKPM/glmtrain_weighted.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function [net, options] = glmtrain_weighted(net, options, x, t, eso_w, alfa)
wolffd@0 2 %GLMTRAIN Specialised training of generalized linear model
wolffd@0 3 %
wolffd@0 4 % Description
wolffd@0 5 % NET = GLMTRAIN(NET, OPTIONS, X, T) uses the iterative reweighted
wolffd@0 6 % least squares (IRLS) algorithm to set the weights in the generalized
wolffd@0 7 % linear model structure NET. This is a more efficient alternative to
wolffd@0 8 % using GLMERR and GLMGRAD and a non-linear optimisation routine
wolffd@0 9 % through NETOPT. Note that for linear outputs, a single pass through
wolffd@0 10 % the algorithm is all that is required, since the error function is
wolffd@0 11 % quadratic in the weights. The error function value at the final set
wolffd@0 12 % of weights is returned in OPTIONS(8). Each row of X corresponds to
wolffd@0 13 % one input vector and each row of T corresponds to one target vector.
wolffd@0 14 %
wolffd@0 15 % The optional parameters have the following interpretations.
wolffd@0 16 %
wolffd@0 17 % OPTIONS(1) is set to 1 to display error values during training. If
wolffd@0 18 % OPTIONS(1) is set to 0, then only warning messages are displayed. If
wolffd@0 19 % OPTIONS(1) is -1, then nothing is displayed.
wolffd@0 20 %
wolffd@0 21 % OPTIONS(2) is a measure of the precision required for the value of
wolffd@0 22 % the weights W at the solution.
wolffd@0 23 %
wolffd@0 24 % OPTIONS(3) is a measure of the precision required of the objective
wolffd@0 25 % function at the solution. Both this and the previous condition must
wolffd@0 26 % be satisfied for termination.
wolffd@0 27 %
wolffd@0 28 % OPTIONS(5) is set to 1 if an approximation to the Hessian (which
wolffd@0 29 % assumes that all outputs are independent) is used for softmax
wolffd@0 30 % outputs. With the default value of 0 the exact Hessian (which is more
wolffd@0 31 % expensive to compute) is used.
wolffd@0 32 %
wolffd@0 33 % OPTIONS(14) is the maximum number of iterations for the IRLS
wolffd@0 34 % algorithm; default 100.
wolffd@0 35 %
wolffd@0 36 % See also
wolffd@0 37 % GLM, GLMERR, GLMGRAD
wolffd@0 38 %
wolffd@0 39
wolffd@0 40 % Copyright (c) Christopher M Bishop, Ian T Nabney (1996, 1997)
wolffd@0 41
wolffd@0 42 % Check arguments for consistency
wolffd@0 43 errstring = consist(net, 'glm', x, t);
wolffd@0 44 if ~errstring
wolffd@0 45 error(errstring);
wolffd@0 46 end
wolffd@0 47
wolffd@0 48 if(~options(14))
wolffd@0 49 options(14) = 100;
wolffd@0 50 end
wolffd@0 51
wolffd@0 52 display = options(1);
wolffd@0 53
wolffd@0 54 test = (options(2) | options(3)); % Do we need to test for termination?
wolffd@0 55
wolffd@0 56 ndata = size(x, 1);
wolffd@0 57
wolffd@0 58 inputs = [x ones(ndata, 1)]; % Add a column of ones for the bias
wolffd@0 59
wolffd@0 60 % Use weighted iterative reweighted least squares (WIRLS)
wolffd@0 61 e = ones(1, net.nin+1);
wolffd@0 62 for n = 1:options(14)
wolffd@0 63
wolffd@0 64 %switch net.actfn
wolffd@0 65 switch net.outfn
wolffd@0 66 case 'softmax'
wolffd@0 67 if n == 1
wolffd@0 68 p = (t + (1/size(t, 2)))/2; % Initialise model: ensure that row sum of p is one no matter
wolffd@0 69 act = log(p./(1-p)); % how many classes there are
wolffd@0 70 end
wolffd@0 71 if options(5) == 1 | n == 1
wolffd@0 72 link_deriv = p.*(1-p);
wolffd@0 73 weights = sqrt(link_deriv); % sqrt of weights
wolffd@0 74 if (min(min(weights)) < eps)
wolffd@0 75 fprintf(1, 'Warning: ill-conditioned weights in glmtrain\n')
wolffd@0 76 return
wolffd@0 77 end
wolffd@0 78 z = act + (t-p)./link_deriv;
wolffd@0 79 % Treat each output independently with relevant set of weights
wolffd@0 80 for j = 1:net.nout
wolffd@0 81 indep = inputs.*(weights(:,j)*e);
wolffd@0 82 dep = z(:,j).*weights(:,j);
wolffd@0 83 temp = indep\dep;
wolffd@0 84 net.w1(:,j) = temp(1:net.nin);
wolffd@0 85 net.b1(j) = temp(net.nin+1);
wolffd@0 86 end
wolffd@0 87 [err, edata, eprior, p, act] = glmerr_weighted(net, x, t, eso_w);
wolffd@0 88 if n == 1
wolffd@0 89 errold = err;
wolffd@0 90 wold = netpak(net);
wolffd@0 91 else
wolffd@0 92 w = netpak(net);
wolffd@0 93 end
wolffd@0 94 else
wolffd@0 95 % Exact method of calculation after w first initialised
wolffd@0 96 % Start by working out Hessian
wolffd@0 97 Hessian = glmhess_weighted(net, x, t, eso_w);
wolffd@0 98 temp = p-t;
wolffd@0 99 for m=1:ndata,
wolffd@0 100 temp(m,:)=eso_w(m,1)*temp(m,:);
wolffd@0 101 end
wolffd@0 102 gw1 = x'*(temp);
wolffd@0 103 gb1 = sum(temp, 1);
wolffd@0 104 gradient = [gw1(:)', gb1];
wolffd@0 105 % Now compute modification to weights
wolffd@0 106 deltaw = -gradient*pinv(Hessian);
wolffd@0 107 w = wold + alfa*deltaw;
wolffd@0 108 net = glmunpak(net, w);
wolffd@0 109 [err, edata, eprior, p] = glmerr_weighted(net, x, t, eso_w);
wolffd@0 110 end
wolffd@0 111 otherwise
wolffd@0 112 error(['Unknown activation function ', net.actfn]);
wolffd@0 113 end % switch' end
wolffd@0 114
wolffd@0 115 if options(1)==1
wolffd@0 116 fprintf(1, 'Cycle %4d Error %11.6f\n', n, err)
wolffd@0 117 end
wolffd@0 118 % Test for termination
wolffd@0 119 % Terminate if error increases
wolffd@0 120 if err > errold
wolffd@0 121 errold = err;
wolffd@0 122 w = wold;
wolffd@0 123 options(8) = err;
wolffd@0 124 fprintf(1, 'Error has increased: terminating\n')
wolffd@0 125 return;
wolffd@0 126 end
wolffd@0 127 if test & n > 1
wolffd@0 128 if (max(abs(w - wold)) < options(2) & abs(err-errold) < options(3))
wolffd@0 129 options(8) = err;
wolffd@0 130 return;
wolffd@0 131 else
wolffd@0 132 errold = err;
wolffd@0 133 wold = w;
wolffd@0 134 end
wolffd@0 135 end
wolffd@0 136 end
wolffd@0 137
wolffd@0 138 options(8) = err;
wolffd@0 139 if (options(1) > 0)
wolffd@0 140 disp('Warning: Maximum number of iterations has been exceeded');
wolffd@0 141 end