Mercurial > hg > camir-aes2014
diff toolboxes/FullBNT-1.0.7/netlab3.3/demard.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolboxes/FullBNT-1.0.7/netlab3.3/demard.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,157 @@ +%DEMARD Automatic relevance determination using the MLP. +% +% Description +% This script demonstrates the technique of automatic relevance +% determination (ARD) using a synthetic problem having three input +% variables: X1 is sampled uniformly from the range (0,1) and has a low +% level of added Gaussian noise, X2 is a copy of X1 with a higher level +% of added noise, and X3 is sampled randomly from a Gaussian +% distribution. The single target variable is determined by +% SIN(2*PI*X1) with additive Gaussian noise. Thus X1 is very relevant +% for determining the target value, X2 is of some relevance, while X3 +% is irrelevant. The prior over weights is given by the ARD Gaussian +% prior with a separate hyper-parameter for the group of weights +% associated with each input. A multi-layer perceptron is trained on +% this data, with re-estimation of the hyper-parameters using EVIDENCE. +% The final values for the hyper-parameters reflect the relative +% importance of the three inputs. +% +% See also +% DEMMLP1, DEMEV1, MLP, EVIDENCE +% + +% Copyright (c) Ian T Nabney (1996-2001) + +clc; +disp('This demonstration illustrates the technique of automatic relevance') +disp('determination (ARD) using a multi-layer perceptron.') +disp(' '); +disp('First, we set up a synthetic data set involving three input variables:') +disp('x1 is sampled uniformly from the range (0,1) and has a low level of') +disp('added Gaussian noise, x2 is a copy of x1 with a higher level of added') +disp('noise, and x3 is sampled randomly from a Gaussian distribution. The') +disp('single target variable is given by t = sin(2*pi*x1) with additive') +disp('Gaussian noise. Thus x1 is very relevant for determining the target') +disp('value, x2 is of some relevance, while x3 should in principle be') +disp('irrelevant.') +disp(' '); +disp('Press any key to see a plot of t against x1.') +pause; + +% Generate the data set. +randn('state', 0); +rand('state', 0); +ndata = 100; +noise = 0.05; +x1 = rand(ndata, 1) + 0.002*randn(ndata, 1); +x2 = x1 + 0.02*randn(ndata, 1); +x3 = 0.5 + 0.2*randn(ndata, 1); +x = [x1, x2, x3]; +t = sin(2*pi*x1) + noise*randn(ndata, 1); + +% Plot the data and the original function. +h = figure; +plotvals = linspace(0, 1, 200)'; +plot(x1, t, 'ob') +hold on +axis([0 1 -1.5 1.5]) +[fx, fy] = fplot('sin(2*pi*x)', [0 1]); +plot(fx, fy, '-g', 'LineWidth', 2); +legend('data', 'function'); + +disp(' '); +disp('Press any key to continue') +pause; clc; + +disp('The prior over weights is given by the ARD Gaussian prior with a') +disp('separate hyper-parameter for the group of weights associated with each') +disp('input. This prior is set up using the utility MLPPRIOR. The network is') +disp('trained by error minimization using scaled conjugate gradient function') +disp('SCG. There are two cycles of training, and at the end of each cycle') +disp('the hyper-parameters are re-estimated using EVIDENCE.') +disp(' '); +disp('Press any key to create and train the network.') +disp(' '); +pause; + +% Set up network parameters. +nin = 3; % Number of inputs. +nhidden = 2; % Number of hidden units. +nout = 1; % Number of outputs. +aw1 = 0.01*ones(1, nin); % First-layer ARD hyperparameters. +ab1 = 0.01; % Hyperparameter for hidden unit biases. +aw2 = 0.01; % Hyperparameter for second-layer weights. +ab2 = 0.01; % Hyperparameter for output unit biases. +beta = 50.0; % Coefficient of data error. + +% Create and initialize network. +prior = mlpprior(nin, nhidden, nout, aw1, ab1, aw2, ab2); +net = mlp(nin, nhidden, nout, 'linear', prior, beta); + +% Set up vector of options for the optimiser. +nouter = 2; % Number of outer loops +ninner = 10; % Number of inner loops +options = zeros(1,18); % Default options vector. +options(1) = 1; % This provides display of error values. +options(2) = 1.0e-7; % This ensures that convergence must occur +options(3) = 1.0e-7; +options(14) = 300; % Number of training cycles in inner loop. + +% Train using scaled conjugate gradients, re-estimating alpha and beta. +for k = 1:nouter + net = netopt(net, options, x, t, 'scg'); + [net, gamma] = evidence(net, x, t, ninner); + fprintf(1, '\n\nRe-estimation cycle %d:\n', k); + disp('The first three alphas are the hyperparameters for the corresponding'); + disp('input to hidden unit weights. The remainder are the hyperparameters'); + disp('for the hidden unit biases, second layer weights and output unit') + disp('biases, respectively.') + fprintf(1, ' alpha = %8.5f\n', net.alpha); + fprintf(1, ' beta = %8.5f\n', net.beta); + fprintf(1, ' gamma = %8.5f\n\n', gamma); + disp(' ') + disp('Press any key to continue.') + pause +end + +% Plot the function corresponding to the trained network. +figure(h); hold on; +[y, z] = mlpfwd(net, plotvals*ones(1,3)); +plot(plotvals, y, '-r', 'LineWidth', 2) +legend('data', 'function', 'network'); + +disp('Press any key to continue.'); +pause; clc; + +disp('We can now read off the hyperparameter values corresponding to the') +disp('three inputs x1, x2 and x3:') +disp(' '); +fprintf(1, ' alpha1: %8.5f\n', net.alpha(1)); +fprintf(1, ' alpha2: %8.5f\n', net.alpha(2)); +fprintf(1, ' alpha3: %8.5f\n', net.alpha(3)); +disp(' '); +disp('Since each alpha corresponds to an inverse variance, we see that the') +disp('posterior variance for weights associated with input x1 is large, that') +disp('of x2 has an intermediate value and the variance of weights associated') +disp('with x3 is small.') +disp(' ') +disp('Press any key to continue.') +disp(' ') +pause +disp('This is confirmed by looking at the corresponding weight values:') +disp(' '); +fprintf(1, ' %8.5f %8.5f\n', net.w1'); +disp(' '); +disp('where the three rows correspond to weights asssociated with x1, x2 and') +disp('x3 respectively. We see that the network is giving greatest emphasis') +disp('to x1 and least emphasis to x3, with intermediate emphasis on') +disp('x2. Since the target t is statistically independent of x3 we might') +disp('expect the weights associated with this input would go to') +disp('zero. However, for any finite data set there may be some chance') +disp('correlation between x3 and t, and so the corresponding alpha remains') +disp('finite.') + +disp(' '); +disp('Press any key to end.') +pause; clc; close(h); clear all +