wolffd@0
|
1 function [net, options] = rbftrain(net, options, x, t)
|
wolffd@0
|
2 %RBFTRAIN Two stage training of RBF network.
|
wolffd@0
|
3 %
|
wolffd@0
|
4 % Description
|
wolffd@0
|
5 % NET = RBFTRAIN(NET, OPTIONS, X, T) uses a two stage training
|
wolffd@0
|
6 % algorithm to set the weights in the RBF model structure NET. Each row
|
wolffd@0
|
7 % of X corresponds to one input vector and each row of T contains the
|
wolffd@0
|
8 % corresponding target vector. The centres are determined by fitting a
|
wolffd@0
|
9 % Gaussian mixture model with circular covariances using the EM
|
wolffd@0
|
10 % algorithm through a call to RBFSETBF. (The mixture model is
|
wolffd@0
|
11 % initialised using a small number of iterations of the K-means
|
wolffd@0
|
12 % algorithm.) If the activation functions are Gaussians, then the basis
|
wolffd@0
|
13 % function widths are then set to the maximum inter-centre squared
|
wolffd@0
|
14 % distance.
|
wolffd@0
|
15 %
|
wolffd@0
|
16 % For linear outputs, the hidden to output weights that give rise to
|
wolffd@0
|
17 % the least squares solution can then be determined using the pseudo-
|
wolffd@0
|
18 % inverse. For neuroscale outputs, the hidden to output weights are
|
wolffd@0
|
19 % determined using the iterative shadow targets algorithm. Although
|
wolffd@0
|
20 % this two stage procedure may not give solutions with as low an error
|
wolffd@0
|
21 % as using general purpose non-linear optimisers, it is much faster.
|
wolffd@0
|
22 %
|
wolffd@0
|
23 % The options vector may have two rows: if this is the case, then the
|
wolffd@0
|
24 % second row is passed to RBFSETBF, which allows the user to specify a
|
wolffd@0
|
25 % different number iterations for RBF and GMM training. The optional
|
wolffd@0
|
26 % parameters to RBFTRAIN have the following interpretations.
|
wolffd@0
|
27 %
|
wolffd@0
|
28 % OPTIONS(1) is set to 1 to display error values during EM training.
|
wolffd@0
|
29 %
|
wolffd@0
|
30 % OPTIONS(2) is a measure of the precision required for the value of
|
wolffd@0
|
31 % the weights W at the solution.
|
wolffd@0
|
32 %
|
wolffd@0
|
33 % OPTIONS(3) is a measure of the precision required of the objective
|
wolffd@0
|
34 % function at the solution. Both this and the previous condition must
|
wolffd@0
|
35 % be satisfied for termination.
|
wolffd@0
|
36 %
|
wolffd@0
|
37 % OPTIONS(5) is set to 1 if the basis functions parameters should
|
wolffd@0
|
38 % remain unchanged; default 0.
|
wolffd@0
|
39 %
|
wolffd@0
|
40 % OPTIONS(6) is set to 1 if the output layer weights should be should
|
wolffd@0
|
41 % set using PCA. This is only relevant for Neuroscale outputs; default
|
wolffd@0
|
42 % 0.
|
wolffd@0
|
43 %
|
wolffd@0
|
44 % OPTIONS(14) is the maximum number of iterations for the shadow
|
wolffd@0
|
45 % targets algorithm; default 100.
|
wolffd@0
|
46 %
|
wolffd@0
|
47 % See also
|
wolffd@0
|
48 % RBF, RBFERR, RBFFWD, RBFGRAD, RBFPAK, RBFUNPAK, RBFSETBF
|
wolffd@0
|
49 %
|
wolffd@0
|
50
|
wolffd@0
|
51 % Copyright (c) Ian T Nabney (1996-2001)
|
wolffd@0
|
52
|
wolffd@0
|
53 % Check arguments for consistency
|
wolffd@0
|
54 switch net.outfn
|
wolffd@0
|
55 case 'linear'
|
wolffd@0
|
56 errstring = consist(net, 'rbf', x, t);
|
wolffd@0
|
57 case 'neuroscale'
|
wolffd@0
|
58 errstring = consist(net, 'rbf', x);
|
wolffd@0
|
59 otherwise
|
wolffd@0
|
60 error(['Unknown output function ', net.outfn]);
|
wolffd@0
|
61 end
|
wolffd@0
|
62 if ~isempty(errstring)
|
wolffd@0
|
63 error(errstring);
|
wolffd@0
|
64 end
|
wolffd@0
|
65
|
wolffd@0
|
66 % Allow options to have two rows: if this is the case, then the second row
|
wolffd@0
|
67 % is passed to rbfsetbf
|
wolffd@0
|
68 if size(options, 1) == 2
|
wolffd@0
|
69 setbfoptions = options(2, :);
|
wolffd@0
|
70 options = options(1, :);
|
wolffd@0
|
71 else
|
wolffd@0
|
72 setbfoptions = options;
|
wolffd@0
|
73 end
|
wolffd@0
|
74
|
wolffd@0
|
75 if(~options(14))
|
wolffd@0
|
76 options(14) = 100;
|
wolffd@0
|
77 end
|
wolffd@0
|
78 % Do we need to test for termination?
|
wolffd@0
|
79 test = (options(2) | options(3));
|
wolffd@0
|
80
|
wolffd@0
|
81 % Set up the basis function parameters to model the input data density
|
wolffd@0
|
82 % unless options(5) is set.
|
wolffd@0
|
83 if ~(logical(options(5)))
|
wolffd@0
|
84 net = rbfsetbf(net, setbfoptions, x);
|
wolffd@0
|
85 end
|
wolffd@0
|
86
|
wolffd@0
|
87 % Compute the design (or activations) matrix
|
wolffd@0
|
88 [y, act] = rbffwd(net, x);
|
wolffd@0
|
89 ndata = size(x, 1);
|
wolffd@0
|
90
|
wolffd@0
|
91 if strcmp(net.outfn, 'neuroscale') & options(6)
|
wolffd@0
|
92 % Initialise output layer weights by projecting data with PCA
|
wolffd@0
|
93 mu = mean(x);
|
wolffd@0
|
94 [pcvals, pcvecs] = pca(x, net.nout);
|
wolffd@0
|
95 xproj = (x - ones(ndata, 1)*mu)*pcvecs;
|
wolffd@0
|
96 % Now use projected data as targets to compute output layer weights
|
wolffd@0
|
97 temp = pinv([act ones(ndata, 1)]) * xproj;
|
wolffd@0
|
98 net.w2 = temp(1:net.nhidden, :);
|
wolffd@0
|
99 net.b2 = temp(net.nhidden+1, :);
|
wolffd@0
|
100 % Propagate again to compute revised outputs
|
wolffd@0
|
101 [y, act] = rbffwd(net, x);
|
wolffd@0
|
102 end
|
wolffd@0
|
103
|
wolffd@0
|
104 switch net.outfn
|
wolffd@0
|
105 case 'linear'
|
wolffd@0
|
106 % Sum of squares error function in regression model
|
wolffd@0
|
107 % Solve for the weights and biases using pseudo-inverse from activations
|
wolffd@0
|
108 Phi = [act ones(ndata, 1)];
|
wolffd@0
|
109 if ~isfield(net, 'alpha')
|
wolffd@0
|
110 % Solve for the weights and biases using left matrix divide
|
wolffd@0
|
111 temp = pinv(Phi)*t;
|
wolffd@0
|
112 elseif size(net.alpha == [1 1])
|
wolffd@0
|
113 % Use normal form equation
|
wolffd@0
|
114 hessian = Phi'*Phi + net.alpha*eye(net.nhidden+1);
|
wolffd@0
|
115 temp = pinv(hessian)*(Phi'*t);
|
wolffd@0
|
116 else
|
wolffd@0
|
117 error('Only scalar alpha allowed');
|
wolffd@0
|
118 end
|
wolffd@0
|
119 net.w2 = temp(1:net.nhidden, :);
|
wolffd@0
|
120 net.b2 = temp(net.nhidden+1, :);
|
wolffd@0
|
121
|
wolffd@0
|
122 case 'neuroscale'
|
wolffd@0
|
123 % Use the shadow targets training algorithm
|
wolffd@0
|
124 if nargin < 4
|
wolffd@0
|
125 % If optional input distances not passed in, then use
|
wolffd@0
|
126 % Euclidean distance
|
wolffd@0
|
127 x_dist = sqrt(dist2(x, x));
|
wolffd@0
|
128 else
|
wolffd@0
|
129 x_dist = t;
|
wolffd@0
|
130 end
|
wolffd@0
|
131 Phi = [act, ones(ndata, 1)];
|
wolffd@0
|
132 % Compute the pseudo-inverse of Phi
|
wolffd@0
|
133 PhiDag = pinv(Phi);
|
wolffd@0
|
134 % Compute y_dist, distances between image points
|
wolffd@0
|
135 y_dist = sqrt(dist2(y, y));
|
wolffd@0
|
136
|
wolffd@0
|
137 % Save old weights so that we can check the termination criterion
|
wolffd@0
|
138 wold = netpak(net);
|
wolffd@0
|
139 % Compute initial error (stress) value
|
wolffd@0
|
140 errold = 0.5*(sum(sum((x_dist - y_dist).^2)));
|
wolffd@0
|
141
|
wolffd@0
|
142 % Initial value for eta
|
wolffd@0
|
143 eta = 0.1;
|
wolffd@0
|
144 k_up = 1.2;
|
wolffd@0
|
145 k_down = 0.1;
|
wolffd@0
|
146 success = 1; % Force initial gradient calculation
|
wolffd@0
|
147
|
wolffd@0
|
148 for j = 1:options(14)
|
wolffd@0
|
149 if success
|
wolffd@0
|
150 % Compute the negative error gradient with respect to network outputs
|
wolffd@0
|
151 D = (x_dist - y_dist)./(y_dist+(y_dist==0));
|
wolffd@0
|
152 temp = y';
|
wolffd@0
|
153 neg_gradient = -2.*sum(kron(D, ones(1, net.nout)) .* ...
|
wolffd@0
|
154 (repmat(y, 1, ndata) - repmat((temp(:))', ndata, 1)), 1);
|
wolffd@0
|
155 neg_gradient = (reshape(neg_gradient, net.nout, ndata))';
|
wolffd@0
|
156 end
|
wolffd@0
|
157 % Compute the shadow targets
|
wolffd@0
|
158 t = y + eta*neg_gradient;
|
wolffd@0
|
159 % Solve for the weights and biases
|
wolffd@0
|
160 temp = PhiDag * t;
|
wolffd@0
|
161 net.w2 = temp(1:net.nhidden, :);
|
wolffd@0
|
162 net.b2 = temp(net.nhidden+1, :);
|
wolffd@0
|
163
|
wolffd@0
|
164 % Do housekeeping and test for convergence
|
wolffd@0
|
165 ynew = rbffwd(net, x);
|
wolffd@0
|
166 y_distnew = sqrt(dist2(ynew, ynew));
|
wolffd@0
|
167 err = 0.5.*(sum(sum((x_dist-y_distnew).^2)));
|
wolffd@0
|
168 if err > errold
|
wolffd@0
|
169 success = 0;
|
wolffd@0
|
170 % Restore previous weights
|
wolffd@0
|
171 net = netunpak(net, wold);
|
wolffd@0
|
172 err = errold;
|
wolffd@0
|
173 eta = eta * k_down;
|
wolffd@0
|
174 else
|
wolffd@0
|
175 success = 1;
|
wolffd@0
|
176 eta = eta * k_up;
|
wolffd@0
|
177 errold = err;
|
wolffd@0
|
178 y = ynew;
|
wolffd@0
|
179 y_dist = y_distnew;
|
wolffd@0
|
180 if test & j > 1
|
wolffd@0
|
181 w = netpak(net);
|
wolffd@0
|
182 if (max(abs(w - wold)) < options(2) & abs(err-errold) < options(3))
|
wolffd@0
|
183 options(8) = err;
|
wolffd@0
|
184 return;
|
wolffd@0
|
185 end
|
wolffd@0
|
186 end
|
wolffd@0
|
187 wold = netpak(net);
|
wolffd@0
|
188 end
|
wolffd@0
|
189 if options(1)
|
wolffd@0
|
190 fprintf(1, 'Cycle %4d Error %11.6f\n', j, err)
|
wolffd@0
|
191 end
|
wolffd@0
|
192 if nargout >= 3
|
wolffd@0
|
193 errlog(j) = err;
|
wolffd@0
|
194 end
|
wolffd@0
|
195 end
|
wolffd@0
|
196 options(8) = errold;
|
wolffd@0
|
197 if (options(1) >= 0)
|
wolffd@0
|
198 disp('Warning: Maximum number of iterations has been exceeded');
|
wolffd@0
|
199 end
|
wolffd@0
|
200 otherwise
|
wolffd@0
|
201 error(['Unknown output function ', net.outfn]);
|
wolffd@0
|
202
|
wolffd@0
|
203 end
|