wolffd@0: function g = gpgrad(net, x, t)
wolffd@0: %GPGRAD	Evaluate error gradient for Gaussian Process.
wolffd@0: %
wolffd@0: %	Description
wolffd@0: %	G = GPGRAD(NET, X, T) takes a Gaussian Process data structure NET
wolffd@0: %	together  with a matrix X of input vectors and a matrix T of target
wolffd@0: %	vectors, and evaluates the error gradient G. Each row of X
wolffd@0: %	corresponds to one input vector and each row of T corresponds to one
wolffd@0: %	target vector.
wolffd@0: %
wolffd@0: %	See also
wolffd@0: %	GP, GPCOVAR, GPFWD, GPERR
wolffd@0: %
wolffd@0: 
wolffd@0: %	Copyright (c) Ian T Nabney (1996-2001)
wolffd@0: 
wolffd@0: errstring = consist(net, 'gp', x, t);
wolffd@0: if ~isempty(errstring);
wolffd@0:   error(errstring);
wolffd@0: end
wolffd@0: 
wolffd@0: % Evaluate derivatives with respect to each hyperparameter in turn.
wolffd@0: ndata = size(x, 1);
wolffd@0: [cov, covf] = gpcovar(net, x);
wolffd@0: cninv = inv(cov);
wolffd@0: trcninv = trace(cninv);
wolffd@0: cninvt = cninv*t;
wolffd@0: 
wolffd@0: % Function parameters
wolffd@0: switch net.covar_fn
wolffd@0: 
wolffd@0:   case 'sqexp'		% Squared exponential
wolffd@0:     gfpar = trace(cninv*covf) - cninvt'*covf*cninvt;
wolffd@0: 
wolffd@0:   case 'ratquad' 	% Rational quadratic
wolffd@0:     beta = diag(exp(net.inweights));
wolffd@0:     gfpar(1) = trace(cninv*covf) - cninvt'*covf*cninvt;
wolffd@0:     D2 = (x.*x)*beta*ones(net.nin, ndata) - 2*x*beta*x' ... 
wolffd@0:       + ones(ndata, net.nin)*beta*(x.*x)';
wolffd@0:     E = ones(size(D2));
wolffd@0:     L = - exp(net.fpar(2)) * covf .* log(E + D2); % d(cn)/d(nu)
wolffd@0:     gfpar(2) = trace(cninv*L) - cninvt'*L*cninvt;
wolffd@0: 
wolffd@0:   otherwise
wolffd@0:     error(['Unknown covariance function ', net.covar_fn]);
wolffd@0: end
wolffd@0: 
wolffd@0: % Bias derivative
wolffd@0: ndata = size(x, 1);
wolffd@0: fac = exp(net.bias)*ones(ndata);
wolffd@0: gbias = trace(cninv*fac) - cninvt'*fac*cninvt;
wolffd@0: 
wolffd@0: % Noise derivative
wolffd@0: gnoise = exp(net.noise)*(trcninv - cninvt'*cninvt);
wolffd@0: 
wolffd@0: % Input weight derivatives
wolffd@0: if strcmp(net.covar_fn, 'ratquad')
wolffd@0:   F = (exp(net.fpar(2))*E)./(E + D2);
wolffd@0: end
wolffd@0: 
wolffd@0: nparams = length(net.inweights);
wolffd@0: for l = 1 : nparams
wolffd@0:   vect = x(:, l);
wolffd@0:   matx = (vect.*vect)*ones(1, ndata) ... 
wolffd@0: 	- 2.0*vect*vect' ... 
wolffd@0: 	+ ones(ndata, 1)*(vect.*vect)';
wolffd@0:   switch net.covar_fn
wolffd@0:     case 'sqexp'	% Squared exponential
wolffd@0:       dmat = -0.5*exp(net.inweights(l))*covf.*matx;
wolffd@0:       
wolffd@0:     case 'ratquad'	% Rational quadratic
wolffd@0:       dmat = - exp(net.inweights(l))*covf.*matx.*F;
wolffd@0:     otherwise
wolffd@0:       error(['Unknown covariance function ', net.covar_fn]);
wolffd@0:   end
wolffd@0: 
wolffd@0:   gw1(l) = trace(cninv*dmat) - cninvt'*dmat*cninvt;
wolffd@0: end
wolffd@0: 
wolffd@0: g1 = [gbias, gnoise, gw1, gfpar];
wolffd@0: g1 = 0.5*g1;
wolffd@0: 
wolffd@0: % Evaluate the prior contribution to the gradient.
wolffd@0: if isfield(net, 'pr_mean')
wolffd@0:   w = gppak(net);
wolffd@0:   m = repmat(net.pr_mean, size(w));
wolffd@0:   if size(net.pr_mean) == [1 1]
wolffd@0:     gprior = w - m;
wolffd@0:     g2 = gprior/net.pr_var;
wolffd@0:   else
wolffd@0:     ngroups = size(net.pr_mean, 1);
wolffd@0:     gprior = net.index'.*(ones(ngroups, 1)*w - m);
wolffd@0:     g2 = (1./net.pr_var)'*gprior;
wolffd@0:   end
wolffd@0: else
wolffd@0:   gprior = 0;
wolffd@0:   g2 = 0;
wolffd@0: end
wolffd@0: 
wolffd@0: g = g1 + g2;