annotate toolboxes/FullBNT-1.0.7/netlab3.3/demard.m @ 0:cc4b1211e677 tip

initial commit to HG from Changeset: 646 (e263d8a21543) added further path and more save "camirversion.m"
author Daniel Wolff
date Fri, 19 Aug 2016 13:07:06 +0200
parents
children
rev   line source
Daniel@0 1 %DEMARD Automatic relevance determination using the MLP.
Daniel@0 2 %
Daniel@0 3 % Description
Daniel@0 4 % This script demonstrates the technique of automatic relevance
Daniel@0 5 % determination (ARD) using a synthetic problem having three input
Daniel@0 6 % variables: X1 is sampled uniformly from the range (0,1) and has a low
Daniel@0 7 % level of added Gaussian noise, X2 is a copy of X1 with a higher level
Daniel@0 8 % of added noise, and X3 is sampled randomly from a Gaussian
Daniel@0 9 % distribution. The single target variable is determined by
Daniel@0 10 % SIN(2*PI*X1) with additive Gaussian noise. Thus X1 is very relevant
Daniel@0 11 % for determining the target value, X2 is of some relevance, while X3
Daniel@0 12 % is irrelevant. The prior over weights is given by the ARD Gaussian
Daniel@0 13 % prior with a separate hyper-parameter for the group of weights
Daniel@0 14 % associated with each input. A multi-layer perceptron is trained on
Daniel@0 15 % this data, with re-estimation of the hyper-parameters using EVIDENCE.
Daniel@0 16 % The final values for the hyper-parameters reflect the relative
Daniel@0 17 % importance of the three inputs.
Daniel@0 18 %
Daniel@0 19 % See also
Daniel@0 20 % DEMMLP1, DEMEV1, MLP, EVIDENCE
Daniel@0 21 %
Daniel@0 22
Daniel@0 23 % Copyright (c) Ian T Nabney (1996-2001)
Daniel@0 24
Daniel@0 25 clc;
Daniel@0 26 disp('This demonstration illustrates the technique of automatic relevance')
Daniel@0 27 disp('determination (ARD) using a multi-layer perceptron.')
Daniel@0 28 disp(' ');
Daniel@0 29 disp('First, we set up a synthetic data set involving three input variables:')
Daniel@0 30 disp('x1 is sampled uniformly from the range (0,1) and has a low level of')
Daniel@0 31 disp('added Gaussian noise, x2 is a copy of x1 with a higher level of added')
Daniel@0 32 disp('noise, and x3 is sampled randomly from a Gaussian distribution. The')
Daniel@0 33 disp('single target variable is given by t = sin(2*pi*x1) with additive')
Daniel@0 34 disp('Gaussian noise. Thus x1 is very relevant for determining the target')
Daniel@0 35 disp('value, x2 is of some relevance, while x3 should in principle be')
Daniel@0 36 disp('irrelevant.')
Daniel@0 37 disp(' ');
Daniel@0 38 disp('Press any key to see a plot of t against x1.')
Daniel@0 39 pause;
Daniel@0 40
Daniel@0 41 % Generate the data set.
Daniel@0 42 randn('state', 0);
Daniel@0 43 rand('state', 0);
Daniel@0 44 ndata = 100;
Daniel@0 45 noise = 0.05;
Daniel@0 46 x1 = rand(ndata, 1) + 0.002*randn(ndata, 1);
Daniel@0 47 x2 = x1 + 0.02*randn(ndata, 1);
Daniel@0 48 x3 = 0.5 + 0.2*randn(ndata, 1);
Daniel@0 49 x = [x1, x2, x3];
Daniel@0 50 t = sin(2*pi*x1) + noise*randn(ndata, 1);
Daniel@0 51
Daniel@0 52 % Plot the data and the original function.
Daniel@0 53 h = figure;
Daniel@0 54 plotvals = linspace(0, 1, 200)';
Daniel@0 55 plot(x1, t, 'ob')
Daniel@0 56 hold on
Daniel@0 57 axis([0 1 -1.5 1.5])
Daniel@0 58 [fx, fy] = fplot('sin(2*pi*x)', [0 1]);
Daniel@0 59 plot(fx, fy, '-g', 'LineWidth', 2);
Daniel@0 60 legend('data', 'function');
Daniel@0 61
Daniel@0 62 disp(' ');
Daniel@0 63 disp('Press any key to continue')
Daniel@0 64 pause; clc;
Daniel@0 65
Daniel@0 66 disp('The prior over weights is given by the ARD Gaussian prior with a')
Daniel@0 67 disp('separate hyper-parameter for the group of weights associated with each')
Daniel@0 68 disp('input. This prior is set up using the utility MLPPRIOR. The network is')
Daniel@0 69 disp('trained by error minimization using scaled conjugate gradient function')
Daniel@0 70 disp('SCG. There are two cycles of training, and at the end of each cycle')
Daniel@0 71 disp('the hyper-parameters are re-estimated using EVIDENCE.')
Daniel@0 72 disp(' ');
Daniel@0 73 disp('Press any key to create and train the network.')
Daniel@0 74 disp(' ');
Daniel@0 75 pause;
Daniel@0 76
Daniel@0 77 % Set up network parameters.
Daniel@0 78 nin = 3; % Number of inputs.
Daniel@0 79 nhidden = 2; % Number of hidden units.
Daniel@0 80 nout = 1; % Number of outputs.
Daniel@0 81 aw1 = 0.01*ones(1, nin); % First-layer ARD hyperparameters.
Daniel@0 82 ab1 = 0.01; % Hyperparameter for hidden unit biases.
Daniel@0 83 aw2 = 0.01; % Hyperparameter for second-layer weights.
Daniel@0 84 ab2 = 0.01; % Hyperparameter for output unit biases.
Daniel@0 85 beta = 50.0; % Coefficient of data error.
Daniel@0 86
Daniel@0 87 % Create and initialize network.
Daniel@0 88 prior = mlpprior(nin, nhidden, nout, aw1, ab1, aw2, ab2);
Daniel@0 89 net = mlp(nin, nhidden, nout, 'linear', prior, beta);
Daniel@0 90
Daniel@0 91 % Set up vector of options for the optimiser.
Daniel@0 92 nouter = 2; % Number of outer loops
Daniel@0 93 ninner = 10; % Number of inner loops
Daniel@0 94 options = zeros(1,18); % Default options vector.
Daniel@0 95 options(1) = 1; % This provides display of error values.
Daniel@0 96 options(2) = 1.0e-7; % This ensures that convergence must occur
Daniel@0 97 options(3) = 1.0e-7;
Daniel@0 98 options(14) = 300; % Number of training cycles in inner loop.
Daniel@0 99
Daniel@0 100 % Train using scaled conjugate gradients, re-estimating alpha and beta.
Daniel@0 101 for k = 1:nouter
Daniel@0 102 net = netopt(net, options, x, t, 'scg');
Daniel@0 103 [net, gamma] = evidence(net, x, t, ninner);
Daniel@0 104 fprintf(1, '\n\nRe-estimation cycle %d:\n', k);
Daniel@0 105 disp('The first three alphas are the hyperparameters for the corresponding');
Daniel@0 106 disp('input to hidden unit weights. The remainder are the hyperparameters');
Daniel@0 107 disp('for the hidden unit biases, second layer weights and output unit')
Daniel@0 108 disp('biases, respectively.')
Daniel@0 109 fprintf(1, ' alpha = %8.5f\n', net.alpha);
Daniel@0 110 fprintf(1, ' beta = %8.5f\n', net.beta);
Daniel@0 111 fprintf(1, ' gamma = %8.5f\n\n', gamma);
Daniel@0 112 disp(' ')
Daniel@0 113 disp('Press any key to continue.')
Daniel@0 114 pause
Daniel@0 115 end
Daniel@0 116
Daniel@0 117 % Plot the function corresponding to the trained network.
Daniel@0 118 figure(h); hold on;
Daniel@0 119 [y, z] = mlpfwd(net, plotvals*ones(1,3));
Daniel@0 120 plot(plotvals, y, '-r', 'LineWidth', 2)
Daniel@0 121 legend('data', 'function', 'network');
Daniel@0 122
Daniel@0 123 disp('Press any key to continue.');
Daniel@0 124 pause; clc;
Daniel@0 125
Daniel@0 126 disp('We can now read off the hyperparameter values corresponding to the')
Daniel@0 127 disp('three inputs x1, x2 and x3:')
Daniel@0 128 disp(' ');
Daniel@0 129 fprintf(1, ' alpha1: %8.5f\n', net.alpha(1));
Daniel@0 130 fprintf(1, ' alpha2: %8.5f\n', net.alpha(2));
Daniel@0 131 fprintf(1, ' alpha3: %8.5f\n', net.alpha(3));
Daniel@0 132 disp(' ');
Daniel@0 133 disp('Since each alpha corresponds to an inverse variance, we see that the')
Daniel@0 134 disp('posterior variance for weights associated with input x1 is large, that')
Daniel@0 135 disp('of x2 has an intermediate value and the variance of weights associated')
Daniel@0 136 disp('with x3 is small.')
Daniel@0 137 disp(' ')
Daniel@0 138 disp('Press any key to continue.')
Daniel@0 139 disp(' ')
Daniel@0 140 pause
Daniel@0 141 disp('This is confirmed by looking at the corresponding weight values:')
Daniel@0 142 disp(' ');
Daniel@0 143 fprintf(1, ' %8.5f %8.5f\n', net.w1');
Daniel@0 144 disp(' ');
Daniel@0 145 disp('where the three rows correspond to weights asssociated with x1, x2 and')
Daniel@0 146 disp('x3 respectively. We see that the network is giving greatest emphasis')
Daniel@0 147 disp('to x1 and least emphasis to x3, with intermediate emphasis on')
Daniel@0 148 disp('x2. Since the target t is statistically independent of x3 we might')
Daniel@0 149 disp('expect the weights associated with this input would go to')
Daniel@0 150 disp('zero. However, for any finite data set there may be some chance')
Daniel@0 151 disp('correlation between x3 and t, and so the corresponding alpha remains')
Daniel@0 152 disp('finite.')
Daniel@0 153
Daniel@0 154 disp(' ');
Daniel@0 155 disp('Press any key to end.')
Daniel@0 156 pause; clc; close(h); clear all
Daniel@0 157