wolffd@0
|
1 function [h, hdata] = rbfhess(net, x, t, hdata)
|
wolffd@0
|
2 %RBFHESS Evaluate the Hessian matrix for RBF network.
|
wolffd@0
|
3 %
|
wolffd@0
|
4 % Description
|
wolffd@0
|
5 % H = RBFHESS(NET, X, T) takes an RBF network data structure NET, a
|
wolffd@0
|
6 % matrix X of input values, and a matrix T of target values and returns
|
wolffd@0
|
7 % the full Hessian matrix H corresponding to the second derivatives of
|
wolffd@0
|
8 % the negative log posterior distribution, evaluated for the current
|
wolffd@0
|
9 % weight and bias values as defined by NET. Currently, the
|
wolffd@0
|
10 % implementation only computes the Hessian for the output layer
|
wolffd@0
|
11 % weights.
|
wolffd@0
|
12 %
|
wolffd@0
|
13 % [H, HDATA] = RBFHESS(NET, X, T) returns both the Hessian matrix H and
|
wolffd@0
|
14 % the contribution HDATA arising from the data dependent term in the
|
wolffd@0
|
15 % Hessian.
|
wolffd@0
|
16 %
|
wolffd@0
|
17 % H = RBFHESS(NET, X, T, HDATA) takes a network data structure NET, a
|
wolffd@0
|
18 % matrix X of input values, and a matrix T of target values, together
|
wolffd@0
|
19 % with the contribution HDATA arising from the data dependent term in
|
wolffd@0
|
20 % the Hessian, and returns the full Hessian matrix H corresponding to
|
wolffd@0
|
21 % the second derivatives of the negative log posterior distribution.
|
wolffd@0
|
22 % This version saves computation time if HDATA has already been
|
wolffd@0
|
23 % evaluated for the current weight and bias values.
|
wolffd@0
|
24 %
|
wolffd@0
|
25 % See also
|
wolffd@0
|
26 % MLPHESS, HESSCHEK, EVIDENCE
|
wolffd@0
|
27 %
|
wolffd@0
|
28
|
wolffd@0
|
29 % Copyright (c) Ian T Nabney (1996-2001)
|
wolffd@0
|
30
|
wolffd@0
|
31 % Check arguments for consistency
|
wolffd@0
|
32 errstring = consist(net, 'rbf', x, t);
|
wolffd@0
|
33 if ~isempty(errstring);
|
wolffd@0
|
34 error(errstring);
|
wolffd@0
|
35 end
|
wolffd@0
|
36
|
wolffd@0
|
37 if nargin == 3
|
wolffd@0
|
38 % Data term in Hessian needs to be computed
|
wolffd@0
|
39 [a, z] = rbffwd(net, x);
|
wolffd@0
|
40 hdata = datahess(net, z, t);
|
wolffd@0
|
41 end
|
wolffd@0
|
42
|
wolffd@0
|
43 % Add in effect of regularisation
|
wolffd@0
|
44 [h, hdata] = hbayes(net, hdata);
|
wolffd@0
|
45
|
wolffd@0
|
46 % Sub-function to compute data part of Hessian
|
wolffd@0
|
47 function hdata = datahess(net, z, t)
|
wolffd@0
|
48
|
wolffd@0
|
49 % Only works for output layer Hessian currently
|
wolffd@0
|
50 if (isfield(net, 'mask') & ~any(net.mask(...
|
wolffd@0
|
51 1:(net.nwts - net.nout*(net.nhidden+1)))))
|
wolffd@0
|
52 hdata = zeros(net.nwts);
|
wolffd@0
|
53 ndata = size(z, 1);
|
wolffd@0
|
54 out_hess = [z ones(ndata, 1)]'*[z ones(ndata, 1)];
|
wolffd@0
|
55 for j = 1:net.nout
|
wolffd@0
|
56 hdata = rearrange_hess(net, j, out_hess, hdata);
|
wolffd@0
|
57 end
|
wolffd@0
|
58 else
|
wolffd@0
|
59 error('Output layer Hessian only.');
|
wolffd@0
|
60 end
|
wolffd@0
|
61 return
|
wolffd@0
|
62
|
wolffd@0
|
63 % Sub-function to rearrange Hessian matrix
|
wolffd@0
|
64 function hdata = rearrange_hess(net, j, out_hess, hdata)
|
wolffd@0
|
65
|
wolffd@0
|
66 % Because all the biases come after all the input weights,
|
wolffd@0
|
67 % we have to rearrange the blocks that make up the network Hessian.
|
wolffd@0
|
68 % This function assumes that we are on the jth output and that all outputs
|
wolffd@0
|
69 % are independent.
|
wolffd@0
|
70
|
wolffd@0
|
71 % Start of bias weights block
|
wolffd@0
|
72 bb_start = net.nwts - net.nout + 1;
|
wolffd@0
|
73 % Start of weight block for jth output
|
wolffd@0
|
74 ob_start = net.nwts - net.nout*(net.nhidden+1) + (j-1)*net.nhidden...
|
wolffd@0
|
75 + 1;
|
wolffd@0
|
76 % End of weight block for jth output
|
wolffd@0
|
77 ob_end = ob_start + net.nhidden - 1;
|
wolffd@0
|
78 % Index of bias weight
|
wolffd@0
|
79 b_index = bb_start+(j-1);
|
wolffd@0
|
80 % Put input weight block in right place
|
wolffd@0
|
81 hdata(ob_start:ob_end, ob_start:ob_end) = out_hess(1:net.nhidden, ...
|
wolffd@0
|
82 1:net.nhidden);
|
wolffd@0
|
83 % Put second derivative of bias weight in right place
|
wolffd@0
|
84 hdata(b_index, b_index) = out_hess(net.nhidden+1, net.nhidden+1);
|
wolffd@0
|
85 % Put cross terms (input weight v bias weight) in right place
|
wolffd@0
|
86 hdata(b_index, ob_start:ob_end) = out_hess(net.nhidden+1, ...
|
wolffd@0
|
87 1:net.nhidden);
|
wolffd@0
|
88 hdata(ob_start:ob_end, b_index) = out_hess(1:net.nhidden, ...
|
wolffd@0
|
89 net.nhidden+1);
|
wolffd@0
|
90
|
wolffd@0
|
91 return |