wolffd@0: function [net, gamma, logev] = evidence_weighted(net, x, t, eso_w, num) wolffd@0: %EVIDENCE Re-estimate hyperparameters using evidence approximation. wolffd@0: % wolffd@0: % Description wolffd@0: % [NET] = EVIDENCE(NET, X, T) re-estimates the hyperparameters ALPHA wolffd@0: % and BETA by applying Bayesian re-estimation formulae for NUM wolffd@0: % iterations. The hyperparameter ALPHA can be a simple scalar wolffd@0: % associated with an isotropic prior on the weights, or can be a vector wolffd@0: % in which each component is associated with a group of weights as wolffd@0: % defined by the INDEX matrix in the NET data structure. These more wolffd@0: % complex priors can be set up for an MLP using MLPPRIOR. Initial wolffd@0: % values for the iterative re-estimation are taken from the network wolffd@0: % data structure NET passed as an input argument, while the return wolffd@0: % argument NET contains the re-estimated values. wolffd@0: % wolffd@0: % [NET, GAMMA, LOGEV] = EVIDENCE(NET, X, T, NUM) allows the re- wolffd@0: % estimation formula to be applied for NUM cycles in which the re- wolffd@0: % estimated values for the hyperparameters from each cycle are used to wolffd@0: % re-evaluate the Hessian matrix for the next cycle. The return value wolffd@0: % GAMMA is the number of well-determined parameters and LOGEV is the wolffd@0: % log of the evidence. wolffd@0: % wolffd@0: % See also wolffd@0: % MLPPRIOR, NETGRAD, NETHESS, DEMEV1, DEMARD wolffd@0: % wolffd@0: wolffd@0: % Copyright (c) Ian T Nabney (1996-9) wolffd@0: wolffd@0: errstring = consist(net, '', x, t); wolffd@0: if ~isempty(errstring) wolffd@0: error(errstring); wolffd@0: end wolffd@0: wolffd@0: ndata = size(x, 1); wolffd@0: if nargin == 4 wolffd@0: num = 1; wolffd@0: end wolffd@0: wolffd@0: if isfield(net,'beta') wolffd@0: beta = net.beta; wolffd@0: else wolffd@0: beta = 1; wolffd@0: end; wolffd@0: wolffd@0: % Extract weights from network wolffd@0: pakstr = [net.type, 'pak']; wolffd@0: w = feval(pakstr, net); wolffd@0: wolffd@0: % Evaluate data-dependent contribution to the Hessian matrix. wolffd@0: [h, dh] = nethess_weighted(w, net, x, t, eso_w); wolffd@0: wolffd@0: % Now set the negative eigenvalues to zero. wolffd@0: [evec, evl] = eig(dh); wolffd@0: evl = evl.*(evl > 0); wolffd@0: % safe_evl is used to avoid taking log of zero wolffd@0: safe_evl = evl + eps.*(evl <= 0); wolffd@0: wolffd@0: % Do the re-estimation. wolffd@0: for k = 1 : num wolffd@0: [e, edata, eprior] = neterr_weighted(w, net, x, t, eso_w); wolffd@0: h = nethess_weighted(w, net, x, t, eso_w, dh); wolffd@0: % Re-estimate alpha. wolffd@0: if size(net.alpha) == [1 1] wolffd@0: % Evaluate number of well-determined parameters. wolffd@0: if k == 1 wolffd@0: % Form vector of eigenvalues wolffd@0: evl = diag(evl); wolffd@0: safe_evl = diag(safe_evl); wolffd@0: end wolffd@0: B = beta*evl; wolffd@0: gamma = sum(B./(B + net.alpha)); wolffd@0: net.alpha = 0.5*gamma/eprior; wolffd@0: wolffd@0: % Partially evaluate log evidence wolffd@0: logev = e - 0.5*sum(log(safe_evl)) + 0.5*net.nwts*log(net.alpha) - ... wolffd@0: 0.5*ndata*log(2*pi); wolffd@0: else wolffd@0: ngroups = size(net.alpha, 1); wolffd@0: gams = zeros(1, ngroups); wolffd@0: logas = zeros(1, ngroups); wolffd@0: traces = zeros(1, ngroups); wolffd@0: % Reconstruct data hessian with negative eigenvalues set to zero. wolffd@0: dh = evec*evl*evec'; wolffd@0: hinv = inv(nethess_weighted(w, net, x, t, eso_w, dh)); wolffd@0: for m = 1 : ngroups wolffd@0: group_nweights = sum(net.index(:, m)); wolffd@0: gams(m) = group_nweights - ... wolffd@0: net.alpha(m)*sum(diag(hinv).*net.index(:,m)); wolffd@0: net.alpha(m) = real(gams(m)/(2*eprior(m))); wolffd@0: % Weight alphas by number of weights in group wolffd@0: logas(m) = 0.5*group_nweights*log(net.alpha(m)); wolffd@0: % Compute sum of evalues corresponding to group wolffd@0: traces(m) = sum(log(safe_evl*net.index(:,m))); wolffd@0: end wolffd@0: gamma = sum(gams, 2); wolffd@0: logev = e - 0.5*sum(traces) + sum(logas) - 0.5*ndata*log(2*pi); wolffd@0: end wolffd@0: % Re-estimate beta. wolffd@0: if isfield(net, 'beta') wolffd@0: net.beta = 0.5*(net.nout*ndata - gamma)/edata; wolffd@0: end wolffd@0: logev = logev + 0.5*ndata*log(beta); wolffd@0: end wolffd@0: