Mercurial > hg > camir-aes2014

diff toolboxes/FullBNT-1.0.7/netlab3.3/demev2.m @ 0:e9a9cd732c1e tip
first hg version after svn
author: wolffd
date: Tue, 10 Feb 2015 15:05:51 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/toolboxes/FullBNT-1.0.7/netlab3.3/demev2.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,248 @@
+%DEMEV2	Demonstrate Bayesian classification for the MLP.
+%
+%	Description
+%	A synthetic two class two-dimensional dataset X is sampled  from a
+%	mixture of four Gaussians.  Each class is associated with two of the
+%	Gaussians so that the optimal decision boundary is non-linear. A 2-
+%	layer network with logistic outputs is trained by minimizing the
+%	cross-entropy error function with isotroipc Gaussian regularizer (one
+%	hyperparameter for each of the four standard weight groups), using
+%	the scaled conjugate gradient optimizer. The hyperparameter vectors
+%	ALPHA and BETA are re-estimated using the function EVIDENCE. A graph
+%	is plotted of the optimal, regularised, and unregularised decision
+%	boundaries.  A further plot of the moderated versus unmoderated
+%	contours is generated.
+%
+%	See also
+%	EVIDENCE, MLP, SCG, DEMARD, DEMMLP2
+%
+
+%	Copyright (c) Ian T Nabney (1996-2001)
+
+
+clc;
+
+disp('This program demonstrates the use of the evidence procedure on')
+disp('a two-class problem.  It also shows the improved generalisation')
+disp('performance that can be achieved with moderated outputs; that is')
+disp('predictions where an approximate integration over the true')
+disp('posterior distribution is carried out.')
+disp(' ')
+disp('First we generate a synthetic dataset with two-dimensional input')
+disp('sampled from a mixture of four Gaussians.  Each class is')
+disp('associated with two of the Gaussians so that the optimal decision')
+disp('boundary is non-linear.')
+disp(' ')
+disp('Press any key to see a plot of the data.')
+pause;
+
+% Generate the matrix of inputs x and targets t.
+
+rand('state', 423);
+randn('state', 423);
+
+ClassSymbol1 = 'r.';
+ClassSymbol2 = 'y.';
+PointSize = 12;
+titleSize = 10;
+
+fh1 = figure;
+set(fh1, 'Name', 'True Data Distribution');
+whitebg(fh1, 'k');
+
+% 
+% Generate the data
+% 
+n=200;
+
+% Set up mixture model: 2d data with four centres
+% Class 1 is first two centres, class 2 from the other two
+mix = gmm(2, 4, 'full');
+mix.priors = [0.25 0.25 0.25 0.25];
+mix.centres = [0 -0.1; 1.5 0; 1 1; 1 -1];
+mix.covars(:,:,1) = [0.625 -0.2165; -0.2165 0.875];
+mix.covars(:,:,2) = [0.25 0; 0 0.25];
+mix.covars(:,:,3) = [0.2241 -0.1368; -0.1368 0.9759];
+mix.covars(:,:,4) = [0.2375 0.1516; 0.1516 0.4125];
+
+[data, label] = gmmsamp(mix, n);
+
+% 
+% Calculate some useful axis limits
+% 
+x0 = min(data(:,1));
+x1 = max(data(:,1));
+y0 = min(data(:,2));
+y1 = max(data(:,2));
+dx = x1-x0;
+dy = y1-y0;
+expand = 5/100;			% Add on 5 percent each way
+x0 = x0 - dx*expand;
+x1 = x1 + dx*expand;
+y0 = y0 - dy*expand;
+y1 = y1 + dy*expand;
+resolution = 100;
+step = dx/resolution;
+xrange = [x0:step:x1];
+yrange = [y0:step:y1];
+% 					
+% Generate the grid
+% 
+[X Y]=meshgrid([x0:step:x1],[y0:step:y1]);
+% 
+% Calculate the class conditional densities, the unconditional densities and
+% the posterior probabilities
+% 
+px_j = gmmactiv(mix, [X(:) Y(:)]);
+px = reshape(px_j*(mix.priors)',size(X));
+post = gmmpost(mix, [X(:) Y(:)]);
+p1_x = reshape(post(:, 1) + post(:, 2), size(X));
+p2_x = reshape(post(:, 3) + post(:, 4), size(X));
+
+plot(data((label<=2),1),data(label<=2,2),ClassSymbol1, 'MarkerSize', ...
+PointSize)
+hold on
+axis([x0 x1 y0 y1])
+plot(data((label>2),1),data(label>2,2),ClassSymbol2, 'MarkerSize', ...
+    PointSize)
+
+% Convert targets to 0-1 encoding
+target=[label<=2];
+disp(' ')
+disp('Press any key to continue')
+pause; clc;
+
+disp('Next we create a two-layer MLP network with 6 hidden units and')
+disp('one logistic output.  We use a separate inverse variance')
+disp('hyperparameter for each group of weights (inputs, input bias,')
+disp('outputs, output bias) and the weights are optimised with the')
+disp('scaled conjugate gradient algorithm.  After each 100 iterations')
+disp('the hyperparameters are re-estimated twice.  There are eight')
+disp('cycles of the whole algorithm.')
+disp(' ')
+disp('Press any key to train the network and determine the hyperparameters.')
+pause;
+
+% Set up network parameters.
+nin = 2;		% Number of inputs.
+nhidden = 6;		% Number of hidden units.
+nout = 1;		% Number of outputs.
+alpha = 0.01;		% Initial prior hyperparameter.
+aw1 = 0.01;
+ab1 = 0.01;
+aw2 = 0.01;
+ab2 = 0.01;
+
+% Create and initialize network weight vector.
+prior = mlpprior(nin, nhidden, nout, aw1, ab1, aw2, ab2);
+net = mlp(nin, nhidden, nout, 'logistic', prior);
+
+% Set up vector of options for the optimiser.
+nouter = 8;			% Number of outer loops.
+ninner = 2;			% Number of innter loops.
+options = foptions;		% Default options vector.
+options(1) = 1;			% This provides display of error values.
+options(2) = 1.0e-5;		% Absolute precision for weights.
+options(3) = 1.0e-5;		% Precision for objective function.
+options(14) = 100;		% Number of training cycles in inner loop. 
+
+% Train using scaled conjugate gradients, re-estimating alpha and beta.
+for k = 1:nouter
+  net = netopt(net, options, data, target, 'scg');
+  [net, gamma] = evidence(net, data, target, ninner);
+  fprintf(1, '\nRe-estimation cycle %d:\n', k);
+  disp(['  alpha = ', num2str(net.alpha')]);
+  fprintf(1, '  gamma =  %8.5f\n\n', gamma);
+  disp(' ')
+  disp('Press any key to continue.')
+  pause;
+end
+
+disp(' ')
+disp('Network training and hyperparameter re-estimation are now complete.')
+disp('Notice that the final error value is close to the number of data')
+disp(['points (', num2str(n), ') divided by two.'])
+disp('Also, the hyperparameter values differ, which suggests that a single')
+disp('hyperparameter would not be so effective.')
+disp(' ')
+disp('First we train an MLP without Bayesian regularisation on the')
+disp('same dataset using 400 iterations of scaled conjugate gradient')
+disp(' ')
+disp('Press any key to train the network by maximum likelihood.')
+pause;
+% Train standard network
+net2 = mlp(nin, nhidden, nout, 'logistic');
+options(14) = 400;
+net2 = netopt(net2, options, data, target, 'scg');
+y2g = mlpfwd(net2, [X(:), Y(:)]);
+y2g = reshape(y2g(:, 1), size(X));
+
+disp(' ')
+disp('We can now plot the function represented by the trained networks.')
+disp('We show the decision boundaries (output = 0.5) and the optimal')
+disp('decision boundary given by applying Bayes'' theorem to the true')
+disp('data model.')
+disp(' ')
+disp('Press any key to add the boundaries to the plot.')
+pause;
+
+% Evaluate predictions.
+[yg, ymodg] = mlpevfwd(net, data, target, [X(:) Y(:)]);
+yg = reshape(yg(:,1),size(X));
+ymodg = reshape(ymodg(:,1),size(X));
+
+% Bayesian decision boundary
+[cB, hB] = contour(xrange,yrange,p1_x,[0.5 0.5],'b-');
+[cNb, hNb] = contour(xrange,yrange,yg,[0.5 0.5],'r-');
+[cN, hN] = contour(xrange,yrange,y2g,[0.5 0.5],'g-');
+set(hB, 'LineWidth', 2);
+set(hNb, 'LineWidth', 2);
+set(hN, 'LineWidth', 2);
+Chandles = [hB(1) hNb(1) hN(1)];
+legend(Chandles, 'Bayes', ...
+  'Reg. Network', 'Network', 3);
+
+disp(' ')
+disp('Note how the regularised network predictions are closer to the')
+disp('optimal decision boundary, while the unregularised network is')
+disp('overtrained.')
+
+disp(' ')
+disp('We will now compare moderated and unmoderated outputs for the');
+disp('regularised network by showing the contour plot of the posterior')
+disp('probability estimates.')
+disp(' ')
+disp('The first plot shows the regularised (moderated) predictions')
+disp('and the second shows the standard predictions from the same network.')
+disp('These agree at the level 0.5.')
+disp('Press any key to continue')
+pause
+levels = 0:0.1:1;
+fh4 = figure;
+set(fh4, 'Name', 'Moderated outputs');
+hold on
+plot(data((label<=2),1),data(label<=2,2),'r.', 'MarkerSize', PointSize)
+plot(data((label>2),1),data(label>2,2),'y.', 'MarkerSize', PointSize)
+
+[cNby, hNby] = contour(xrange, yrange, ymodg, levels, 'k-');
+set(hNby, 'LineWidth', 1);
+
+fh5 = figure;
+set(fh5, 'Name', 'Unmoderated outputs');
+hold on
+plot(data((label<=2),1),data(label<=2,2),'r.', 'MarkerSize', PointSize)
+plot(data((label>2),1),data(label>2,2),'y.', 'MarkerSize', PointSize)
+
+[cNbm, hNbm] = contour(xrange, yrange, yg, levels, 'k-');
+set(hNbm, 'LineWidth', 1);
+
+disp(' ')
+disp('Note how the moderated contours are more widely spaced.  This shows')
+disp('that there is a larger region where the outputs are close to 0.5')
+disp('and a smaller region where the outputs are close to 0 or 1.')
+disp(' ')
+disp('Press any key to exit')
+pause
+close(fh1);
+close(fh4);
+close(fh5);
\ No newline at end of file
author	wolffd
date	Tue, 10 Feb 2015 15:05:51 +0000
parents
children