Mercurial > hg > camir-aes2014
comparison toolboxes/FullBNT-1.0.7/netlab3.3/rbftrain.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 function [net, options] = rbftrain(net, options, x, t) | |
2 %RBFTRAIN Two stage training of RBF network. | |
3 % | |
4 % Description | |
5 % NET = RBFTRAIN(NET, OPTIONS, X, T) uses a two stage training | |
6 % algorithm to set the weights in the RBF model structure NET. Each row | |
7 % of X corresponds to one input vector and each row of T contains the | |
8 % corresponding target vector. The centres are determined by fitting a | |
9 % Gaussian mixture model with circular covariances using the EM | |
10 % algorithm through a call to RBFSETBF. (The mixture model is | |
11 % initialised using a small number of iterations of the K-means | |
12 % algorithm.) If the activation functions are Gaussians, then the basis | |
13 % function widths are then set to the maximum inter-centre squared | |
14 % distance. | |
15 % | |
16 % For linear outputs, the hidden to output weights that give rise to | |
17 % the least squares solution can then be determined using the pseudo- | |
18 % inverse. For neuroscale outputs, the hidden to output weights are | |
19 % determined using the iterative shadow targets algorithm. Although | |
20 % this two stage procedure may not give solutions with as low an error | |
21 % as using general purpose non-linear optimisers, it is much faster. | |
22 % | |
23 % The options vector may have two rows: if this is the case, then the | |
24 % second row is passed to RBFSETBF, which allows the user to specify a | |
25 % different number iterations for RBF and GMM training. The optional | |
26 % parameters to RBFTRAIN have the following interpretations. | |
27 % | |
28 % OPTIONS(1) is set to 1 to display error values during EM training. | |
29 % | |
30 % OPTIONS(2) is a measure of the precision required for the value of | |
31 % the weights W at the solution. | |
32 % | |
33 % OPTIONS(3) is a measure of the precision required of the objective | |
34 % function at the solution. Both this and the previous condition must | |
35 % be satisfied for termination. | |
36 % | |
37 % OPTIONS(5) is set to 1 if the basis functions parameters should | |
38 % remain unchanged; default 0. | |
39 % | |
40 % OPTIONS(6) is set to 1 if the output layer weights should be should | |
41 % set using PCA. This is only relevant for Neuroscale outputs; default | |
42 % 0. | |
43 % | |
44 % OPTIONS(14) is the maximum number of iterations for the shadow | |
45 % targets algorithm; default 100. | |
46 % | |
47 % See also | |
48 % RBF, RBFERR, RBFFWD, RBFGRAD, RBFPAK, RBFUNPAK, RBFSETBF | |
49 % | |
50 | |
51 % Copyright (c) Ian T Nabney (1996-2001) | |
52 | |
53 % Check arguments for consistency | |
54 switch net.outfn | |
55 case 'linear' | |
56 errstring = consist(net, 'rbf', x, t); | |
57 case 'neuroscale' | |
58 errstring = consist(net, 'rbf', x); | |
59 otherwise | |
60 error(['Unknown output function ', net.outfn]); | |
61 end | |
62 if ~isempty(errstring) | |
63 error(errstring); | |
64 end | |
65 | |
66 % Allow options to have two rows: if this is the case, then the second row | |
67 % is passed to rbfsetbf | |
68 if size(options, 1) == 2 | |
69 setbfoptions = options(2, :); | |
70 options = options(1, :); | |
71 else | |
72 setbfoptions = options; | |
73 end | |
74 | |
75 if(~options(14)) | |
76 options(14) = 100; | |
77 end | |
78 % Do we need to test for termination? | |
79 test = (options(2) | options(3)); | |
80 | |
81 % Set up the basis function parameters to model the input data density | |
82 % unless options(5) is set. | |
83 if ~(logical(options(5))) | |
84 net = rbfsetbf(net, setbfoptions, x); | |
85 end | |
86 | |
87 % Compute the design (or activations) matrix | |
88 [y, act] = rbffwd(net, x); | |
89 ndata = size(x, 1); | |
90 | |
91 if strcmp(net.outfn, 'neuroscale') & options(6) | |
92 % Initialise output layer weights by projecting data with PCA | |
93 mu = mean(x); | |
94 [pcvals, pcvecs] = pca(x, net.nout); | |
95 xproj = (x - ones(ndata, 1)*mu)*pcvecs; | |
96 % Now use projected data as targets to compute output layer weights | |
97 temp = pinv([act ones(ndata, 1)]) * xproj; | |
98 net.w2 = temp(1:net.nhidden, :); | |
99 net.b2 = temp(net.nhidden+1, :); | |
100 % Propagate again to compute revised outputs | |
101 [y, act] = rbffwd(net, x); | |
102 end | |
103 | |
104 switch net.outfn | |
105 case 'linear' | |
106 % Sum of squares error function in regression model | |
107 % Solve for the weights and biases using pseudo-inverse from activations | |
108 Phi = [act ones(ndata, 1)]; | |
109 if ~isfield(net, 'alpha') | |
110 % Solve for the weights and biases using left matrix divide | |
111 temp = pinv(Phi)*t; | |
112 elseif size(net.alpha == [1 1]) | |
113 % Use normal form equation | |
114 hessian = Phi'*Phi + net.alpha*eye(net.nhidden+1); | |
115 temp = pinv(hessian)*(Phi'*t); | |
116 else | |
117 error('Only scalar alpha allowed'); | |
118 end | |
119 net.w2 = temp(1:net.nhidden, :); | |
120 net.b2 = temp(net.nhidden+1, :); | |
121 | |
122 case 'neuroscale' | |
123 % Use the shadow targets training algorithm | |
124 if nargin < 4 | |
125 % If optional input distances not passed in, then use | |
126 % Euclidean distance | |
127 x_dist = sqrt(dist2(x, x)); | |
128 else | |
129 x_dist = t; | |
130 end | |
131 Phi = [act, ones(ndata, 1)]; | |
132 % Compute the pseudo-inverse of Phi | |
133 PhiDag = pinv(Phi); | |
134 % Compute y_dist, distances between image points | |
135 y_dist = sqrt(dist2(y, y)); | |
136 | |
137 % Save old weights so that we can check the termination criterion | |
138 wold = netpak(net); | |
139 % Compute initial error (stress) value | |
140 errold = 0.5*(sum(sum((x_dist - y_dist).^2))); | |
141 | |
142 % Initial value for eta | |
143 eta = 0.1; | |
144 k_up = 1.2; | |
145 k_down = 0.1; | |
146 success = 1; % Force initial gradient calculation | |
147 | |
148 for j = 1:options(14) | |
149 if success | |
150 % Compute the negative error gradient with respect to network outputs | |
151 D = (x_dist - y_dist)./(y_dist+(y_dist==0)); | |
152 temp = y'; | |
153 neg_gradient = -2.*sum(kron(D, ones(1, net.nout)) .* ... | |
154 (repmat(y, 1, ndata) - repmat((temp(:))', ndata, 1)), 1); | |
155 neg_gradient = (reshape(neg_gradient, net.nout, ndata))'; | |
156 end | |
157 % Compute the shadow targets | |
158 t = y + eta*neg_gradient; | |
159 % Solve for the weights and biases | |
160 temp = PhiDag * t; | |
161 net.w2 = temp(1:net.nhidden, :); | |
162 net.b2 = temp(net.nhidden+1, :); | |
163 | |
164 % Do housekeeping and test for convergence | |
165 ynew = rbffwd(net, x); | |
166 y_distnew = sqrt(dist2(ynew, ynew)); | |
167 err = 0.5.*(sum(sum((x_dist-y_distnew).^2))); | |
168 if err > errold | |
169 success = 0; | |
170 % Restore previous weights | |
171 net = netunpak(net, wold); | |
172 err = errold; | |
173 eta = eta * k_down; | |
174 else | |
175 success = 1; | |
176 eta = eta * k_up; | |
177 errold = err; | |
178 y = ynew; | |
179 y_dist = y_distnew; | |
180 if test & j > 1 | |
181 w = netpak(net); | |
182 if (max(abs(w - wold)) < options(2) & abs(err-errold) < options(3)) | |
183 options(8) = err; | |
184 return; | |
185 end | |
186 end | |
187 wold = netpak(net); | |
188 end | |
189 if options(1) | |
190 fprintf(1, 'Cycle %4d Error %11.6f\n', j, err) | |
191 end | |
192 if nargout >= 3 | |
193 errlog(j) = err; | |
194 end | |
195 end | |
196 options(8) = errold; | |
197 if (options(1) >= 0) | |
198 disp('Warning: Maximum number of iterations has been exceeded'); | |
199 end | |
200 otherwise | |
201 error(['Unknown output function ', net.outfn]); | |
202 | |
203 end |