camir-aes2014: toolboxes/FullBNT-1.0.7/netlab3.3/rbftrain.m annotate

annotate toolboxes/FullBNT-1.0.7/netlab3.3/rbftrain.m @ 0:e9a9cd732c1e tip

first hg version after svn

author	wolffd
date	Tue, 10 Feb 2015 15:05:51 +0000
parents
children

rev	line source
wolffd@0	1 function [net, options] = rbftrain(net, options, x, t)
wolffd@0	2 %RBFTRAIN Two stage training of RBF network.
wolffd@0	3 %
wolffd@0	4 % Description
wolffd@0	5 % NET = RBFTRAIN(NET, OPTIONS, X, T) uses a two stage training
wolffd@0	6 % algorithm to set the weights in the RBF model structure NET. Each row
wolffd@0	7 % of X corresponds to one input vector and each row of T contains the
wolffd@0	8 % corresponding target vector. The centres are determined by fitting a
wolffd@0	9 % Gaussian mixture model with circular covariances using the EM
wolffd@0	10 % algorithm through a call to RBFSETBF. (The mixture model is
wolffd@0	11 % initialised using a small number of iterations of the K-means
wolffd@0	12 % algorithm.) If the activation functions are Gaussians, then the basis
wolffd@0	13 % function widths are then set to the maximum inter-centre squared
wolffd@0	14 % distance.
wolffd@0	15 %
wolffd@0	16 % For linear outputs, the hidden to output weights that give rise to
wolffd@0	17 % the least squares solution can then be determined using the pseudo-
wolffd@0	18 % inverse. For neuroscale outputs, the hidden to output weights are
wolffd@0	19 % determined using the iterative shadow targets algorithm. Although
wolffd@0	20 % this two stage procedure may not give solutions with as low an error
wolffd@0	21 % as using general purpose non-linear optimisers, it is much faster.
wolffd@0	22 %
wolffd@0	23 % The options vector may have two rows: if this is the case, then the
wolffd@0	24 % second row is passed to RBFSETBF, which allows the user to specify a
wolffd@0	25 % different number iterations for RBF and GMM training. The optional
wolffd@0	26 % parameters to RBFTRAIN have the following interpretations.
wolffd@0	27 %
wolffd@0	28 % OPTIONS(1) is set to 1 to display error values during EM training.
wolffd@0	29 %
wolffd@0	30 % OPTIONS(2) is a measure of the precision required for the value of
wolffd@0	31 % the weights W at the solution.
wolffd@0	32 %
wolffd@0	33 % OPTIONS(3) is a measure of the precision required of the objective
wolffd@0	34 % function at the solution. Both this and the previous condition must
wolffd@0	35 % be satisfied for termination.
wolffd@0	36 %
wolffd@0	37 % OPTIONS(5) is set to 1 if the basis functions parameters should
wolffd@0	38 % remain unchanged; default 0.
wolffd@0	39 %
wolffd@0	40 % OPTIONS(6) is set to 1 if the output layer weights should be should
wolffd@0	41 % set using PCA. This is only relevant for Neuroscale outputs; default
wolffd@0	42 % 0.
wolffd@0	43 %
wolffd@0	44 % OPTIONS(14) is the maximum number of iterations for the shadow
wolffd@0	45 % targets algorithm; default 100.
wolffd@0	46 %
wolffd@0	47 % See also
wolffd@0	48 % RBF, RBFERR, RBFFWD, RBFGRAD, RBFPAK, RBFUNPAK, RBFSETBF
wolffd@0	49 %
wolffd@0	50
wolffd@0	51 % Copyright (c) Ian T Nabney (1996-2001)
wolffd@0	52
wolffd@0	53 % Check arguments for consistency
wolffd@0	54 switch net.outfn
wolffd@0	55 case 'linear'
wolffd@0	56 errstring = consist(net, 'rbf', x, t);
wolffd@0	57 case 'neuroscale'
wolffd@0	58 errstring = consist(net, 'rbf', x);
wolffd@0	59 otherwise
wolffd@0	60 error(['Unknown output function ', net.outfn]);
wolffd@0	61 end
wolffd@0	62 if ~isempty(errstring)
wolffd@0	63 error(errstring);
wolffd@0	64 end
wolffd@0	65
wolffd@0	66 % Allow options to have two rows: if this is the case, then the second row
wolffd@0	67 % is passed to rbfsetbf
wolffd@0	68 if size(options, 1) == 2
wolffd@0	69 setbfoptions = options(2, :);
wolffd@0	70 options = options(1, :);
wolffd@0	71 else
wolffd@0	72 setbfoptions = options;
wolffd@0	73 end
wolffd@0	74
wolffd@0	75 if(~options(14))
wolffd@0	76 options(14) = 100;
wolffd@0	77 end
wolffd@0	78 % Do we need to test for termination?
wolffd@0	79 test = (options(2) \| options(3));
wolffd@0	80
wolffd@0	81 % Set up the basis function parameters to model the input data density
wolffd@0	82 % unless options(5) is set.
wolffd@0	83 if ~(logical(options(5)))
wolffd@0	84 net = rbfsetbf(net, setbfoptions, x);
wolffd@0	85 end
wolffd@0	86
wolffd@0	87 % Compute the design (or activations) matrix
wolffd@0	88 [y, act] = rbffwd(net, x);
wolffd@0	89 ndata = size(x, 1);
wolffd@0	90
wolffd@0	91 if strcmp(net.outfn, 'neuroscale') & options(6)
wolffd@0	92 % Initialise output layer weights by projecting data with PCA
wolffd@0	93 mu = mean(x);
wolffd@0	94 [pcvals, pcvecs] = pca(x, net.nout);
wolffd@0	95 xproj = (x - ones(ndata, 1)mu)pcvecs;
wolffd@0	96 % Now use projected data as targets to compute output layer weights
wolffd@0	97 temp = pinv([act ones(ndata, 1)]) * xproj;
wolffd@0	98 net.w2 = temp(1:net.nhidden, :);
wolffd@0	99 net.b2 = temp(net.nhidden+1, :);
wolffd@0	100 % Propagate again to compute revised outputs
wolffd@0	101 [y, act] = rbffwd(net, x);
wolffd@0	102 end
wolffd@0	103
wolffd@0	104 switch net.outfn
wolffd@0	105 case 'linear'
wolffd@0	106 % Sum of squares error function in regression model
wolffd@0	107 % Solve for the weights and biases using pseudo-inverse from activations
wolffd@0	108 Phi = [act ones(ndata, 1)];
wolffd@0	109 if ~isfield(net, 'alpha')
wolffd@0	110 % Solve for the weights and biases using left matrix divide
wolffd@0	111 temp = pinv(Phi)*t;
wolffd@0	112 elseif size(net.alpha == [1 1])
wolffd@0	113 % Use normal form equation
wolffd@0	114 hessian = Phi'Phi + net.alphaeye(net.nhidden+1);
wolffd@0	115 temp = pinv(hessian)(Phi't);
wolffd@0	116 else
wolffd@0	117 error('Only scalar alpha allowed');
wolffd@0	118 end
wolffd@0	119 net.w2 = temp(1:net.nhidden, :);
wolffd@0	120 net.b2 = temp(net.nhidden+1, :);
wolffd@0	121
wolffd@0	122 case 'neuroscale'
wolffd@0	123 % Use the shadow targets training algorithm
wolffd@0	124 if nargin < 4
wolffd@0	125 % If optional input distances not passed in, then use
wolffd@0	126 % Euclidean distance
wolffd@0	127 x_dist = sqrt(dist2(x, x));
wolffd@0	128 else
wolffd@0	129 x_dist = t;
wolffd@0	130 end
wolffd@0	131 Phi = [act, ones(ndata, 1)];
wolffd@0	132 % Compute the pseudo-inverse of Phi
wolffd@0	133 PhiDag = pinv(Phi);
wolffd@0	134 % Compute y_dist, distances between image points
wolffd@0	135 y_dist = sqrt(dist2(y, y));
wolffd@0	136
wolffd@0	137 % Save old weights so that we can check the termination criterion
wolffd@0	138 wold = netpak(net);
wolffd@0	139 % Compute initial error (stress) value
wolffd@0	140 errold = 0.5*(sum(sum((x_dist - y_dist).^2)));
wolffd@0	141
wolffd@0	142 % Initial value for eta
wolffd@0	143 eta = 0.1;
wolffd@0	144 k_up = 1.2;
wolffd@0	145 k_down = 0.1;
wolffd@0	146 success = 1; % Force initial gradient calculation
wolffd@0	147
wolffd@0	148 for j = 1:options(14)
wolffd@0	149 if success
wolffd@0	150 % Compute the negative error gradient with respect to network outputs
wolffd@0	151 D = (x_dist - y_dist)./(y_dist+(y_dist==0));
wolffd@0	152 temp = y';
wolffd@0	153 neg_gradient = -2.sum(kron(D, ones(1, net.nout)) . ...
wolffd@0	154 (repmat(y, 1, ndata) - repmat((temp(:))', ndata, 1)), 1);
wolffd@0	155 neg_gradient = (reshape(neg_gradient, net.nout, ndata))';
wolffd@0	156 end
wolffd@0	157 % Compute the shadow targets
wolffd@0	158 t = y + eta*neg_gradient;
wolffd@0	159 % Solve for the weights and biases
wolffd@0	160 temp = PhiDag * t;
wolffd@0	161 net.w2 = temp(1:net.nhidden, :);
wolffd@0	162 net.b2 = temp(net.nhidden+1, :);
wolffd@0	163
wolffd@0	164 % Do housekeeping and test for convergence
wolffd@0	165 ynew = rbffwd(net, x);
wolffd@0	166 y_distnew = sqrt(dist2(ynew, ynew));
wolffd@0	167 err = 0.5.*(sum(sum((x_dist-y_distnew).^2)));
wolffd@0	168 if err > errold
wolffd@0	169 success = 0;
wolffd@0	170 % Restore previous weights
wolffd@0	171 net = netunpak(net, wold);
wolffd@0	172 err = errold;
wolffd@0	173 eta = eta * k_down;
wolffd@0	174 else
wolffd@0	175 success = 1;
wolffd@0	176 eta = eta * k_up;
wolffd@0	177 errold = err;
wolffd@0	178 y = ynew;
wolffd@0	179 y_dist = y_distnew;
wolffd@0	180 if test & j > 1
wolffd@0	181 w = netpak(net);
wolffd@0	182 if (max(abs(w - wold)) < options(2) & abs(err-errold) < options(3))
wolffd@0	183 options(8) = err;
wolffd@0	184 return;
wolffd@0	185 end
wolffd@0	186 end
wolffd@0	187 wold = netpak(net);
wolffd@0	188 end
wolffd@0	189 if options(1)
wolffd@0	190 fprintf(1, 'Cycle %4d Error %11.6f\n', j, err)
wolffd@0	191 end
wolffd@0	192 if nargout >= 3
wolffd@0	193 errlog(j) = err;
wolffd@0	194 end
wolffd@0	195 end
wolffd@0	196 options(8) = errold;
wolffd@0	197 if (options(1) >= 0)
wolffd@0	198 disp('Warning: Maximum number of iterations has been exceeded');
wolffd@0	199 end
wolffd@0	200 otherwise
wolffd@0	201 error(['Unknown output function ', net.outfn]);
wolffd@0	202
wolffd@0	203 end

Mercurial > hg > camir-aes2014

annotate toolboxes/FullBNT-1.0.7/netlab3.3/rbftrain.m @ 0:e9a9cd732c1e tip