camir-aes2014: toolboxes/FullBNT-1.0.7/netlab3.3/demev2.m annotate

annotate toolboxes/FullBNT-1.0.7/netlab3.3/demev2.m @ 0:e9a9cd732c1e tip

first hg version after svn

author	wolffd
date	Tue, 10 Feb 2015 15:05:51 +0000
parents
children

rev	line source
wolffd@0	1 %DEMEV2 Demonstrate Bayesian classification for the MLP.
wolffd@0	2 %
wolffd@0	3 % Description
wolffd@0	4 % A synthetic two class two-dimensional dataset X is sampled from a
wolffd@0	5 % mixture of four Gaussians. Each class is associated with two of the
wolffd@0	6 % Gaussians so that the optimal decision boundary is non-linear. A 2-
wolffd@0	7 % layer network with logistic outputs is trained by minimizing the
wolffd@0	8 % cross-entropy error function with isotroipc Gaussian regularizer (one
wolffd@0	9 % hyperparameter for each of the four standard weight groups), using
wolffd@0	10 % the scaled conjugate gradient optimizer. The hyperparameter vectors
wolffd@0	11 % ALPHA and BETA are re-estimated using the function EVIDENCE. A graph
wolffd@0	12 % is plotted of the optimal, regularised, and unregularised decision
wolffd@0	13 % boundaries. A further plot of the moderated versus unmoderated
wolffd@0	14 % contours is generated.
wolffd@0	15 %
wolffd@0	16 % See also
wolffd@0	17 % EVIDENCE, MLP, SCG, DEMARD, DEMMLP2
wolffd@0	18 %
wolffd@0	19
wolffd@0	20 % Copyright (c) Ian T Nabney (1996-2001)
wolffd@0	21
wolffd@0	22
wolffd@0	23 clc;
wolffd@0	24
wolffd@0	25 disp('This program demonstrates the use of the evidence procedure on')
wolffd@0	26 disp('a two-class problem. It also shows the improved generalisation')
wolffd@0	27 disp('performance that can be achieved with moderated outputs; that is')
wolffd@0	28 disp('predictions where an approximate integration over the true')
wolffd@0	29 disp('posterior distribution is carried out.')
wolffd@0	30 disp(' ')
wolffd@0	31 disp('First we generate a synthetic dataset with two-dimensional input')
wolffd@0	32 disp('sampled from a mixture of four Gaussians. Each class is')
wolffd@0	33 disp('associated with two of the Gaussians so that the optimal decision')
wolffd@0	34 disp('boundary is non-linear.')
wolffd@0	35 disp(' ')
wolffd@0	36 disp('Press any key to see a plot of the data.')
wolffd@0	37 pause;
wolffd@0	38
wolffd@0	39 % Generate the matrix of inputs x and targets t.
wolffd@0	40
wolffd@0	41 rand('state', 423);
wolffd@0	42 randn('state', 423);
wolffd@0	43
wolffd@0	44 ClassSymbol1 = 'r.';
wolffd@0	45 ClassSymbol2 = 'y.';
wolffd@0	46 PointSize = 12;
wolffd@0	47 titleSize = 10;
wolffd@0	48
wolffd@0	49 fh1 = figure;
wolffd@0	50 set(fh1, 'Name', 'True Data Distribution');
wolffd@0	51 whitebg(fh1, 'k');
wolffd@0	52
wolffd@0	53 %
wolffd@0	54 % Generate the data
wolffd@0	55 %
wolffd@0	56 n=200;
wolffd@0	57
wolffd@0	58 % Set up mixture model: 2d data with four centres
wolffd@0	59 % Class 1 is first two centres, class 2 from the other two
wolffd@0	60 mix = gmm(2, 4, 'full');
wolffd@0	61 mix.priors = [0.25 0.25 0.25 0.25];
wolffd@0	62 mix.centres = [0 -0.1; 1.5 0; 1 1; 1 -1];
wolffd@0	63 mix.covars(:,:,1) = [0.625 -0.2165; -0.2165 0.875];
wolffd@0	64 mix.covars(:,:,2) = [0.25 0; 0 0.25];
wolffd@0	65 mix.covars(:,:,3) = [0.2241 -0.1368; -0.1368 0.9759];
wolffd@0	66 mix.covars(:,:,4) = [0.2375 0.1516; 0.1516 0.4125];
wolffd@0	67
wolffd@0	68 [data, label] = gmmsamp(mix, n);
wolffd@0	69
wolffd@0	70 %
wolffd@0	71 % Calculate some useful axis limits
wolffd@0	72 %
wolffd@0	73 x0 = min(data(:,1));
wolffd@0	74 x1 = max(data(:,1));
wolffd@0	75 y0 = min(data(:,2));
wolffd@0	76 y1 = max(data(:,2));
wolffd@0	77 dx = x1-x0;
wolffd@0	78 dy = y1-y0;
wolffd@0	79 expand = 5/100; % Add on 5 percent each way
wolffd@0	80 x0 = x0 - dx*expand;
wolffd@0	81 x1 = x1 + dx*expand;
wolffd@0	82 y0 = y0 - dy*expand;
wolffd@0	83 y1 = y1 + dy*expand;
wolffd@0	84 resolution = 100;
wolffd@0	85 step = dx/resolution;
wolffd@0	86 xrange = [x0:step:x1];
wolffd@0	87 yrange = [y0:step:y1];
wolffd@0	88 %
wolffd@0	89 % Generate the grid
wolffd@0	90 %
wolffd@0	91 [X Y]=meshgrid([x0:step:x1],[y0:step:y1]);
wolffd@0	92 %
wolffd@0	93 % Calculate the class conditional densities, the unconditional densities and
wolffd@0	94 % the posterior probabilities
wolffd@0	95 %
wolffd@0	96 px_j = gmmactiv(mix, [X(:) Y(:)]);
wolffd@0	97 px = reshape(px_j*(mix.priors)',size(X));
wolffd@0	98 post = gmmpost(mix, [X(:) Y(:)]);
wolffd@0	99 p1_x = reshape(post(:, 1) + post(:, 2), size(X));
wolffd@0	100 p2_x = reshape(post(:, 3) + post(:, 4), size(X));
wolffd@0	101
wolffd@0	102 plot(data((label<=2),1),data(label<=2,2),ClassSymbol1, 'MarkerSize', ...
wolffd@0	103 PointSize)
wolffd@0	104 hold on
wolffd@0	105 axis([x0 x1 y0 y1])
wolffd@0	106 plot(data((label>2),1),data(label>2,2),ClassSymbol2, 'MarkerSize', ...
wolffd@0	107 PointSize)
wolffd@0	108
wolffd@0	109 % Convert targets to 0-1 encoding
wolffd@0	110 target=[label<=2];
wolffd@0	111 disp(' ')
wolffd@0	112 disp('Press any key to continue')
wolffd@0	113 pause; clc;
wolffd@0	114
wolffd@0	115 disp('Next we create a two-layer MLP network with 6 hidden units and')
wolffd@0	116 disp('one logistic output. We use a separate inverse variance')
wolffd@0	117 disp('hyperparameter for each group of weights (inputs, input bias,')
wolffd@0	118 disp('outputs, output bias) and the weights are optimised with the')
wolffd@0	119 disp('scaled conjugate gradient algorithm. After each 100 iterations')
wolffd@0	120 disp('the hyperparameters are re-estimated twice. There are eight')
wolffd@0	121 disp('cycles of the whole algorithm.')
wolffd@0	122 disp(' ')
wolffd@0	123 disp('Press any key to train the network and determine the hyperparameters.')
wolffd@0	124 pause;
wolffd@0	125
wolffd@0	126 % Set up network parameters.
wolffd@0	127 nin = 2; % Number of inputs.
wolffd@0	128 nhidden = 6; % Number of hidden units.
wolffd@0	129 nout = 1; % Number of outputs.
wolffd@0	130 alpha = 0.01; % Initial prior hyperparameter.
wolffd@0	131 aw1 = 0.01;
wolffd@0	132 ab1 = 0.01;
wolffd@0	133 aw2 = 0.01;
wolffd@0	134 ab2 = 0.01;
wolffd@0	135
wolffd@0	136 % Create and initialize network weight vector.
wolffd@0	137 prior = mlpprior(nin, nhidden, nout, aw1, ab1, aw2, ab2);
wolffd@0	138 net = mlp(nin, nhidden, nout, 'logistic', prior);
wolffd@0	139
wolffd@0	140 % Set up vector of options for the optimiser.
wolffd@0	141 nouter = 8; % Number of outer loops.
wolffd@0	142 ninner = 2; % Number of innter loops.
wolffd@0	143 options = foptions; % Default options vector.
wolffd@0	144 options(1) = 1; % This provides display of error values.
wolffd@0	145 options(2) = 1.0e-5; % Absolute precision for weights.
wolffd@0	146 options(3) = 1.0e-5; % Precision for objective function.
wolffd@0	147 options(14) = 100; % Number of training cycles in inner loop.
wolffd@0	148
wolffd@0	149 % Train using scaled conjugate gradients, re-estimating alpha and beta.
wolffd@0	150 for k = 1:nouter
wolffd@0	151 net = netopt(net, options, data, target, 'scg');
wolffd@0	152 [net, gamma] = evidence(net, data, target, ninner);
wolffd@0	153 fprintf(1, '\nRe-estimation cycle %d:\n', k);
wolffd@0	154 disp([' alpha = ', num2str(net.alpha')]);
wolffd@0	155 fprintf(1, ' gamma = %8.5f\n\n', gamma);
wolffd@0	156 disp(' ')
wolffd@0	157 disp('Press any key to continue.')
wolffd@0	158 pause;
wolffd@0	159 end
wolffd@0	160
wolffd@0	161 disp(' ')
wolffd@0	162 disp('Network training and hyperparameter re-estimation are now complete.')
wolffd@0	163 disp('Notice that the final error value is close to the number of data')
wolffd@0	164 disp(['points (', num2str(n), ') divided by two.'])
wolffd@0	165 disp('Also, the hyperparameter values differ, which suggests that a single')
wolffd@0	166 disp('hyperparameter would not be so effective.')
wolffd@0	167 disp(' ')
wolffd@0	168 disp('First we train an MLP without Bayesian regularisation on the')
wolffd@0	169 disp('same dataset using 400 iterations of scaled conjugate gradient')
wolffd@0	170 disp(' ')
wolffd@0	171 disp('Press any key to train the network by maximum likelihood.')
wolffd@0	172 pause;
wolffd@0	173 % Train standard network
wolffd@0	174 net2 = mlp(nin, nhidden, nout, 'logistic');
wolffd@0	175 options(14) = 400;
wolffd@0	176 net2 = netopt(net2, options, data, target, 'scg');
wolffd@0	177 y2g = mlpfwd(net2, [X(:), Y(:)]);
wolffd@0	178 y2g = reshape(y2g(:, 1), size(X));
wolffd@0	179
wolffd@0	180 disp(' ')
wolffd@0	181 disp('We can now plot the function represented by the trained networks.')
wolffd@0	182 disp('We show the decision boundaries (output = 0.5) and the optimal')
wolffd@0	183 disp('decision boundary given by applying Bayes'' theorem to the true')
wolffd@0	184 disp('data model.')
wolffd@0	185 disp(' ')
wolffd@0	186 disp('Press any key to add the boundaries to the plot.')
wolffd@0	187 pause;
wolffd@0	188
wolffd@0	189 % Evaluate predictions.
wolffd@0	190 [yg, ymodg] = mlpevfwd(net, data, target, [X(:) Y(:)]);
wolffd@0	191 yg = reshape(yg(:,1),size(X));
wolffd@0	192 ymodg = reshape(ymodg(:,1),size(X));
wolffd@0	193
wolffd@0	194 % Bayesian decision boundary
wolffd@0	195 [cB, hB] = contour(xrange,yrange,p1_x,[0.5 0.5],'b-');
wolffd@0	196 [cNb, hNb] = contour(xrange,yrange,yg,[0.5 0.5],'r-');
wolffd@0	197 [cN, hN] = contour(xrange,yrange,y2g,[0.5 0.5],'g-');
wolffd@0	198 set(hB, 'LineWidth', 2);
wolffd@0	199 set(hNb, 'LineWidth', 2);
wolffd@0	200 set(hN, 'LineWidth', 2);
wolffd@0	201 Chandles = [hB(1) hNb(1) hN(1)];
wolffd@0	202 legend(Chandles, 'Bayes', ...
wolffd@0	203 'Reg. Network', 'Network', 3);
wolffd@0	204
wolffd@0	205 disp(' ')
wolffd@0	206 disp('Note how the regularised network predictions are closer to the')
wolffd@0	207 disp('optimal decision boundary, while the unregularised network is')
wolffd@0	208 disp('overtrained.')
wolffd@0	209
wolffd@0	210 disp(' ')
wolffd@0	211 disp('We will now compare moderated and unmoderated outputs for the');
wolffd@0	212 disp('regularised network by showing the contour plot of the posterior')
wolffd@0	213 disp('probability estimates.')
wolffd@0	214 disp(' ')
wolffd@0	215 disp('The first plot shows the regularised (moderated) predictions')
wolffd@0	216 disp('and the second shows the standard predictions from the same network.')
wolffd@0	217 disp('These agree at the level 0.5.')
wolffd@0	218 disp('Press any key to continue')
wolffd@0	219 pause
wolffd@0	220 levels = 0:0.1:1;
wolffd@0	221 fh4 = figure;
wolffd@0	222 set(fh4, 'Name', 'Moderated outputs');
wolffd@0	223 hold on
wolffd@0	224 plot(data((label<=2),1),data(label<=2,2),'r.', 'MarkerSize', PointSize)
wolffd@0	225 plot(data((label>2),1),data(label>2,2),'y.', 'MarkerSize', PointSize)
wolffd@0	226
wolffd@0	227 [cNby, hNby] = contour(xrange, yrange, ymodg, levels, 'k-');
wolffd@0	228 set(hNby, 'LineWidth', 1);
wolffd@0	229
wolffd@0	230 fh5 = figure;
wolffd@0	231 set(fh5, 'Name', 'Unmoderated outputs');
wolffd@0	232 hold on
wolffd@0	233 plot(data((label<=2),1),data(label<=2,2),'r.', 'MarkerSize', PointSize)
wolffd@0	234 plot(data((label>2),1),data(label>2,2),'y.', 'MarkerSize', PointSize)
wolffd@0	235
wolffd@0	236 [cNbm, hNbm] = contour(xrange, yrange, yg, levels, 'k-');
wolffd@0	237 set(hNbm, 'LineWidth', 1);
wolffd@0	238
wolffd@0	239 disp(' ')
wolffd@0	240 disp('Note how the moderated contours are more widely spaced. This shows')
wolffd@0	241 disp('that there is a larger region where the outputs are close to 0.5')
wolffd@0	242 disp('and a smaller region where the outputs are close to 0 or 1.')
wolffd@0	243 disp(' ')
wolffd@0	244 disp('Press any key to exit')
wolffd@0	245 pause
wolffd@0	246 close(fh1);
wolffd@0	247 close(fh4);
wolffd@0	248 close(fh5);

Mercurial > hg > camir-aes2014

annotate toolboxes/FullBNT-1.0.7/netlab3.3/demev2.m @ 0:e9a9cd732c1e tip