annotate toolboxes/FullBNT-1.0.7/netlab3.3/demev1.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 %DEMEV1 Demonstrate Bayesian regression for the MLP.
wolffd@0 2 %
wolffd@0 3 % Description
wolffd@0 4 % The problem consists an input variable X which sampled from a
wolffd@0 5 % Gaussian distribution, and a target variable T generated by computing
wolffd@0 6 % SIN(2*PI*X) and adding Gaussian noise. A 2-layer network with linear
wolffd@0 7 % outputs is trained by minimizing a sum-of-squares error function with
wolffd@0 8 % isotropic Gaussian regularizer, using the scaled conjugate gradient
wolffd@0 9 % optimizer. The hyperparameters ALPHA and BETA are re-estimated using
wolffd@0 10 % the function EVIDENCE. A graph is plotted of the original function,
wolffd@0 11 % the training data, the trained network function, and the error bars.
wolffd@0 12 %
wolffd@0 13 % See also
wolffd@0 14 % EVIDENCE, MLP, SCG, DEMARD, DEMMLP1
wolffd@0 15 %
wolffd@0 16
wolffd@0 17 % Copyright (c) Ian T Nabney (1996-2001)
wolffd@0 18
wolffd@0 19 clc;
wolffd@0 20 disp('This demonstration illustrates the application of Bayesian')
wolffd@0 21 disp('re-estimation to determine the hyperparameters in a simple regression')
wolffd@0 22 disp('problem. It is based on a local quadratic approximation to a mode of')
wolffd@0 23 disp('the posterior distribution and the evidence maximization framework of')
wolffd@0 24 disp('MacKay.')
wolffd@0 25 disp(' ')
wolffd@0 26 disp('First, we generate a synthetic data set consisting of a single input')
wolffd@0 27 disp('variable x sampled from a Gaussian distribution, and a target variable')
wolffd@0 28 disp('t obtained by evaluating sin(2*pi*x) and adding Gaussian noise.')
wolffd@0 29 disp(' ')
wolffd@0 30 disp('Press any key to see a plot of the data together with the sine function.')
wolffd@0 31 pause;
wolffd@0 32
wolffd@0 33 % Generate the matrix of inputs x and targets t.
wolffd@0 34
wolffd@0 35 ndata = 16; % Number of data points.
wolffd@0 36 noise = 0.1; % Standard deviation of noise distribution.
wolffd@0 37 randn('state', 0);
wolffd@0 38 x = 0.25 + 0.07*randn(ndata, 1);
wolffd@0 39 t = sin(2*pi*x) + noise*randn(size(x));
wolffd@0 40
wolffd@0 41 % Plot the data and the original sine function.
wolffd@0 42 h = figure;
wolffd@0 43 nplot = 200;
wolffd@0 44 plotvals = linspace(0, 1, nplot)';
wolffd@0 45 plot(x, t, 'ok')
wolffd@0 46 xlabel('Input')
wolffd@0 47 ylabel('Target')
wolffd@0 48 hold on
wolffd@0 49 axis([0 1 -1.5 1.5])
wolffd@0 50 fplot('sin(2*pi*x)', [0 1], '-g')
wolffd@0 51 legend('data', 'function');
wolffd@0 52
wolffd@0 53 disp(' ')
wolffd@0 54 disp('Press any key to continue')
wolffd@0 55 pause; clc;
wolffd@0 56
wolffd@0 57 disp('Next we create a two-layer MLP network having 3 hidden units and one')
wolffd@0 58 disp('linear output. The model assumes Gaussian target noise governed by an')
wolffd@0 59 disp('inverse variance hyperparmeter beta, and uses a simple Gaussian prior')
wolffd@0 60 disp('distribution governed by an inverse variance hyperparameter alpha.')
wolffd@0 61 disp(' ');
wolffd@0 62 disp('The network weights and the hyperparameters are initialised and then')
wolffd@0 63 disp('the weights are optimized with the scaled conjugate gradient')
wolffd@0 64 disp('algorithm using the SCG function, with the hyperparameters kept')
wolffd@0 65 disp('fixed. After a maximum of 500 iterations, the hyperparameters are')
wolffd@0 66 disp('re-estimated using the EVIDENCE function. The process of optimizing')
wolffd@0 67 disp('the weights with fixed hyperparameters and then re-estimating the')
wolffd@0 68 disp('hyperparameters is repeated for a total of 3 cycles.')
wolffd@0 69 disp(' ')
wolffd@0 70 disp('Press any key to train the network and determine the hyperparameters.')
wolffd@0 71 pause;
wolffd@0 72
wolffd@0 73 % Set up network parameters.
wolffd@0 74 nin = 1; % Number of inputs.
wolffd@0 75 nhidden = 3; % Number of hidden units.
wolffd@0 76 nout = 1; % Number of outputs.
wolffd@0 77 alpha = 0.01; % Initial prior hyperparameter.
wolffd@0 78 beta_init = 50.0; % Initial noise hyperparameter.
wolffd@0 79
wolffd@0 80 % Create and initialize network weight vector.
wolffd@0 81 net = mlp(nin, nhidden, nout, 'linear', alpha, beta_init);
wolffd@0 82
wolffd@0 83 % Set up vector of options for the optimiser.
wolffd@0 84 nouter = 3; % Number of outer loops.
wolffd@0 85 ninner = 1; % Number of innter loops.
wolffd@0 86 options = zeros(1,18); % Default options vector.
wolffd@0 87 options(1) = 1; % This provides display of error values.
wolffd@0 88 options(2) = 1.0e-7; % Absolute precision for weights.
wolffd@0 89 options(3) = 1.0e-7; % Precision for objective function.
wolffd@0 90 options(14) = 500; % Number of training cycles in inner loop.
wolffd@0 91
wolffd@0 92 % Train using scaled conjugate gradients, re-estimating alpha and beta.
wolffd@0 93 for k = 1:nouter
wolffd@0 94 net = netopt(net, options, x, t, 'scg');
wolffd@0 95 [net, gamma] = evidence(net, x, t, ninner);
wolffd@0 96 fprintf(1, '\nRe-estimation cycle %d:\n', k);
wolffd@0 97 fprintf(1, ' alpha = %8.5f\n', net.alpha);
wolffd@0 98 fprintf(1, ' beta = %8.5f\n', net.beta);
wolffd@0 99 fprintf(1, ' gamma = %8.5f\n\n', gamma);
wolffd@0 100 disp(' ')
wolffd@0 101 disp('Press any key to continue.')
wolffd@0 102 pause;
wolffd@0 103 end
wolffd@0 104
wolffd@0 105 fprintf(1, 'true beta: %f\n', 1/(noise*noise));
wolffd@0 106
wolffd@0 107 disp(' ')
wolffd@0 108 disp('Network training and hyperparameter re-estimation are now complete.')
wolffd@0 109 disp('Compare the final value for the hyperparameter beta with the true')
wolffd@0 110 disp('value.')
wolffd@0 111 disp(' ')
wolffd@0 112 disp('Notice that the final error value is close to the number of data')
wolffd@0 113 disp(['points (', num2str(ndata),') divided by two.'])
wolffd@0 114 disp(' ')
wolffd@0 115 disp('Press any key to continue.')
wolffd@0 116 pause; clc;
wolffd@0 117 disp('We can now plot the function represented by the trained network. This')
wolffd@0 118 disp('corresponds to the mean of the predictive distribution. We can also')
wolffd@0 119 disp('plot ''error bars'' representing one standard deviation of the')
wolffd@0 120 disp('predictive distribution around the mean.')
wolffd@0 121 disp(' ')
wolffd@0 122 disp('Press any key to add the network function and error bars to the plot.')
wolffd@0 123 pause;
wolffd@0 124
wolffd@0 125 % Evaluate error bars.
wolffd@0 126 [y, sig2] = netevfwd(mlppak(net), net, x, t, plotvals);
wolffd@0 127 sig = sqrt(sig2);
wolffd@0 128
wolffd@0 129 % Plot the data, the original function, and the trained network function.
wolffd@0 130 [y, z] = mlpfwd(net, plotvals);
wolffd@0 131 figure(h); hold on;
wolffd@0 132 plot(plotvals, y, '-r')
wolffd@0 133 xlabel('Input')
wolffd@0 134 ylabel('Target')
wolffd@0 135 plot(plotvals, y + sig, '-b');
wolffd@0 136 plot(plotvals, y - sig, '-b');
wolffd@0 137 legend('data', 'function', 'network', 'error bars');
wolffd@0 138
wolffd@0 139 disp(' ')
wolffd@0 140 disp('Notice how the confidence interval spanned by the ''error bars'' is')
wolffd@0 141 disp('smaller in the region of input space where the data density is high,')
wolffd@0 142 disp('and becomes larger in regions away from the data.')
wolffd@0 143 disp(' ')
wolffd@0 144 disp('Press any key to end.')
wolffd@0 145 pause; clc; close(h);
wolffd@0 146 %clear all