wolffd@0
|
1 function g = gpgrad(net, x, t)
|
wolffd@0
|
2 %GPGRAD Evaluate error gradient for Gaussian Process.
|
wolffd@0
|
3 %
|
wolffd@0
|
4 % Description
|
wolffd@0
|
5 % G = GPGRAD(NET, X, T) takes a Gaussian Process data structure NET
|
wolffd@0
|
6 % together with a matrix X of input vectors and a matrix T of target
|
wolffd@0
|
7 % vectors, and evaluates the error gradient G. Each row of X
|
wolffd@0
|
8 % corresponds to one input vector and each row of T corresponds to one
|
wolffd@0
|
9 % target vector.
|
wolffd@0
|
10 %
|
wolffd@0
|
11 % See also
|
wolffd@0
|
12 % GP, GPCOVAR, GPFWD, GPERR
|
wolffd@0
|
13 %
|
wolffd@0
|
14
|
wolffd@0
|
15 % Copyright (c) Ian T Nabney (1996-2001)
|
wolffd@0
|
16
|
wolffd@0
|
17 errstring = consist(net, 'gp', x, t);
|
wolffd@0
|
18 if ~isempty(errstring);
|
wolffd@0
|
19 error(errstring);
|
wolffd@0
|
20 end
|
wolffd@0
|
21
|
wolffd@0
|
22 % Evaluate derivatives with respect to each hyperparameter in turn.
|
wolffd@0
|
23 ndata = size(x, 1);
|
wolffd@0
|
24 [cov, covf] = gpcovar(net, x);
|
wolffd@0
|
25 cninv = inv(cov);
|
wolffd@0
|
26 trcninv = trace(cninv);
|
wolffd@0
|
27 cninvt = cninv*t;
|
wolffd@0
|
28
|
wolffd@0
|
29 % Function parameters
|
wolffd@0
|
30 switch net.covar_fn
|
wolffd@0
|
31
|
wolffd@0
|
32 case 'sqexp' % Squared exponential
|
wolffd@0
|
33 gfpar = trace(cninv*covf) - cninvt'*covf*cninvt;
|
wolffd@0
|
34
|
wolffd@0
|
35 case 'ratquad' % Rational quadratic
|
wolffd@0
|
36 beta = diag(exp(net.inweights));
|
wolffd@0
|
37 gfpar(1) = trace(cninv*covf) - cninvt'*covf*cninvt;
|
wolffd@0
|
38 D2 = (x.*x)*beta*ones(net.nin, ndata) - 2*x*beta*x' ...
|
wolffd@0
|
39 + ones(ndata, net.nin)*beta*(x.*x)';
|
wolffd@0
|
40 E = ones(size(D2));
|
wolffd@0
|
41 L = - exp(net.fpar(2)) * covf .* log(E + D2); % d(cn)/d(nu)
|
wolffd@0
|
42 gfpar(2) = trace(cninv*L) - cninvt'*L*cninvt;
|
wolffd@0
|
43
|
wolffd@0
|
44 otherwise
|
wolffd@0
|
45 error(['Unknown covariance function ', net.covar_fn]);
|
wolffd@0
|
46 end
|
wolffd@0
|
47
|
wolffd@0
|
48 % Bias derivative
|
wolffd@0
|
49 ndata = size(x, 1);
|
wolffd@0
|
50 fac = exp(net.bias)*ones(ndata);
|
wolffd@0
|
51 gbias = trace(cninv*fac) - cninvt'*fac*cninvt;
|
wolffd@0
|
52
|
wolffd@0
|
53 % Noise derivative
|
wolffd@0
|
54 gnoise = exp(net.noise)*(trcninv - cninvt'*cninvt);
|
wolffd@0
|
55
|
wolffd@0
|
56 % Input weight derivatives
|
wolffd@0
|
57 if strcmp(net.covar_fn, 'ratquad')
|
wolffd@0
|
58 F = (exp(net.fpar(2))*E)./(E + D2);
|
wolffd@0
|
59 end
|
wolffd@0
|
60
|
wolffd@0
|
61 nparams = length(net.inweights);
|
wolffd@0
|
62 for l = 1 : nparams
|
wolffd@0
|
63 vect = x(:, l);
|
wolffd@0
|
64 matx = (vect.*vect)*ones(1, ndata) ...
|
wolffd@0
|
65 - 2.0*vect*vect' ...
|
wolffd@0
|
66 + ones(ndata, 1)*(vect.*vect)';
|
wolffd@0
|
67 switch net.covar_fn
|
wolffd@0
|
68 case 'sqexp' % Squared exponential
|
wolffd@0
|
69 dmat = -0.5*exp(net.inweights(l))*covf.*matx;
|
wolffd@0
|
70
|
wolffd@0
|
71 case 'ratquad' % Rational quadratic
|
wolffd@0
|
72 dmat = - exp(net.inweights(l))*covf.*matx.*F;
|
wolffd@0
|
73 otherwise
|
wolffd@0
|
74 error(['Unknown covariance function ', net.covar_fn]);
|
wolffd@0
|
75 end
|
wolffd@0
|
76
|
wolffd@0
|
77 gw1(l) = trace(cninv*dmat) - cninvt'*dmat*cninvt;
|
wolffd@0
|
78 end
|
wolffd@0
|
79
|
wolffd@0
|
80 g1 = [gbias, gnoise, gw1, gfpar];
|
wolffd@0
|
81 g1 = 0.5*g1;
|
wolffd@0
|
82
|
wolffd@0
|
83 % Evaluate the prior contribution to the gradient.
|
wolffd@0
|
84 if isfield(net, 'pr_mean')
|
wolffd@0
|
85 w = gppak(net);
|
wolffd@0
|
86 m = repmat(net.pr_mean, size(w));
|
wolffd@0
|
87 if size(net.pr_mean) == [1 1]
|
wolffd@0
|
88 gprior = w - m;
|
wolffd@0
|
89 g2 = gprior/net.pr_var;
|
wolffd@0
|
90 else
|
wolffd@0
|
91 ngroups = size(net.pr_mean, 1);
|
wolffd@0
|
92 gprior = net.index'.*(ones(ngroups, 1)*w - m);
|
wolffd@0
|
93 g2 = (1./net.pr_var)'*gprior;
|
wolffd@0
|
94 end
|
wolffd@0
|
95 else
|
wolffd@0
|
96 gprior = 0;
|
wolffd@0
|
97 g2 = 0;
|
wolffd@0
|
98 end
|
wolffd@0
|
99
|
wolffd@0
|
100 g = g1 + g2;
|