wolffd@0: %DEMMLP2 Demonstrate simple classification using a multi-layer perceptron
wolffd@0: %
wolffd@0: %	Description
wolffd@0: %	The problem consists of input data in two dimensions drawn from a
wolffd@0: %	mixture of three Gaussians: two of which are assigned to a single
wolffd@0: %	class.  An MLP with logistic outputs trained with a quasi-Newton
wolffd@0: %	optimisation algorithm is compared with the optimal Bayesian decision
wolffd@0: %	rule.
wolffd@0: %
wolffd@0: %	See also
wolffd@0: %	MLP, MLPFWD, NETERR, QUASINEW
wolffd@0: %
wolffd@0: 
wolffd@0: %	Copyright (c) Ian T Nabney (1996-2001)
wolffd@0: 
wolffd@0: 
wolffd@0: % Set up some figure parameters
wolffd@0: AxisShift = 0.05;
wolffd@0: ClassSymbol1 = 'r.';
wolffd@0: ClassSymbol2 = 'y.';
wolffd@0: PointSize = 12;
wolffd@0: titleSize = 10;
wolffd@0: 
wolffd@0: % Fix the seeds
wolffd@0: rand('state', 423);
wolffd@0: randn('state', 423);
wolffd@0: 
wolffd@0: clc
wolffd@0: disp('This demonstration shows how an MLP with logistic outputs and')
wolffd@0: disp('and cross entropy error function can be trained to model the')
wolffd@0: disp('posterior class probabilities in a classification problem.')
wolffd@0: disp('The results are compared with the optimal Bayes rule classifier,')
wolffd@0: disp('which can be computed exactly as we know the form of the generating')
wolffd@0: disp('distribution.')
wolffd@0: disp(' ')
wolffd@0: disp('Press any key to continue.')
wolffd@0: pause
wolffd@0: 
wolffd@0: fh1 = figure;
wolffd@0: set(fh1, 'Name', 'True Data Distribution');
wolffd@0: whitebg(fh1, 'k');
wolffd@0: 
wolffd@0: % 
wolffd@0: % Generate the data
wolffd@0: % 
wolffd@0: n=200;
wolffd@0: 
wolffd@0: % Set up mixture model: 2d data with three centres
wolffd@0: % Class 1 is first centre, class 2 from the other two
wolffd@0: mix = gmm(2, 3, 'full');
wolffd@0: mix.priors = [0.5 0.25 0.25];
wolffd@0: mix.centres = [0 -0.1; 1 1; 1 -1];
wolffd@0: mix.covars(:,:,1) = [0.625 -0.2165; -0.2165 0.875];
wolffd@0: mix.covars(:,:,2) = [0.2241 -0.1368; -0.1368 0.9759];
wolffd@0: mix.covars(:,:,3) = [0.2375 0.1516; 0.1516 0.4125];
wolffd@0: 
wolffd@0: [data, label] = gmmsamp(mix, n);
wolffd@0: 
wolffd@0: % 
wolffd@0: % Calculate some useful axis limits
wolffd@0: % 
wolffd@0: x0 = min(data(:,1));
wolffd@0: x1 = max(data(:,1));
wolffd@0: y0 = min(data(:,2));
wolffd@0: y1 = max(data(:,2));
wolffd@0: dx = x1-x0;
wolffd@0: dy = y1-y0;
wolffd@0: expand = 5/100;			% Add on 5 percent each way
wolffd@0: x0 = x0 - dx*expand;
wolffd@0: x1 = x1 + dx*expand;
wolffd@0: y0 = y0 - dy*expand;
wolffd@0: y1 = y1 + dy*expand;
wolffd@0: resolution = 100;
wolffd@0: step = dx/resolution;
wolffd@0: xrange = [x0:step:x1];
wolffd@0: yrange = [y0:step:y1];
wolffd@0: % 					
wolffd@0: % Generate the grid
wolffd@0: % 
wolffd@0: [X Y]=meshgrid([x0:step:x1],[y0:step:y1]);
wolffd@0: % 
wolffd@0: % Calculate the class conditional densities, the unconditional densities and
wolffd@0: % the posterior probabilities
wolffd@0: % 
wolffd@0: px_j = gmmactiv(mix, [X(:) Y(:)]);
wolffd@0: px = reshape(px_j*(mix.priors)',size(X));
wolffd@0: post = gmmpost(mix, [X(:) Y(:)]);
wolffd@0: p1_x = reshape(post(:, 1), size(X));
wolffd@0: p2_x = reshape(post(:, 2) + post(:, 3), size(X));
wolffd@0: 
wolffd@0: % 
wolffd@0: % Generate some pretty pictures !!
wolffd@0: % 
wolffd@0: colormap(hot)
wolffd@0: colorbar
wolffd@0: subplot(1,2,1)
wolffd@0: hold on
wolffd@0: plot(data((label==1),1),data(label==1,2),ClassSymbol1, 'MarkerSize', PointSize)
wolffd@0: plot(data((label>1),1),data(label>1,2),ClassSymbol2, 'MarkerSize', PointSize)
wolffd@0: contour(xrange,yrange,p1_x,[0.5 0.5],'w-');
wolffd@0: axis([x0 x1 y0 y1])
wolffd@0: set(gca,'Box','On')
wolffd@0: title('The Sampled Data');
wolffd@0: rect=get(gca,'Position');
wolffd@0: rect(1)=rect(1)-AxisShift;
wolffd@0: rect(3)=rect(3)+AxisShift;
wolffd@0: set(gca,'Position',rect)
wolffd@0: hold off
wolffd@0: 
wolffd@0: subplot(1,2,2)
wolffd@0: imagesc(X(:),Y(:),px);
wolffd@0: hold on
wolffd@0: [cB, hB] = contour(xrange,yrange,p1_x,[0.5 0.5],'w:');
wolffd@0: set(hB,'LineWidth', 2);
wolffd@0: axis([x0 x1 y0 y1])
wolffd@0: set(gca,'YDir','normal')
wolffd@0: title('Probability Density p(x)')
wolffd@0: hold off
wolffd@0: 
wolffd@0: drawnow;
wolffd@0: clc;
wolffd@0: disp('The first figure shows the data sampled from a mixture of three')
wolffd@0: disp('Gaussians, the first of which (whose centre is near the origin) is')
wolffd@0: disp('labelled red and the other two are labelled yellow.  The second plot')
wolffd@0: disp('shows the unconditional density of the data with the optimal Bayesian')
wolffd@0: disp('decision boundary superimposed.')
wolffd@0: disp(' ')
wolffd@0: disp('Press any key to continue.')
wolffd@0: pause
wolffd@0: fh2 = figure;
wolffd@0: set(fh2, 'Name', 'Class-conditional Densities and Posterior Probabilities');
wolffd@0: whitebg(fh2, 'w');
wolffd@0: 
wolffd@0: subplot(2,2,1)
wolffd@0: p1=reshape(px_j(:,1),size(X));
wolffd@0: imagesc(X(:),Y(:),p1);
wolffd@0: colormap hot
wolffd@0: colorbar
wolffd@0: axis(axis)
wolffd@0: set(gca,'YDir','normal')
wolffd@0: hold on
wolffd@0: plot(mix.centres(:,1),mix.centres(:,2),'b+','MarkerSize',8,'LineWidth',2)
wolffd@0: title('Density p(x|red)')
wolffd@0: hold off
wolffd@0: 
wolffd@0: subplot(2,2,2)
wolffd@0: p2=reshape((px_j(:,2)+px_j(:,3)),size(X));
wolffd@0: imagesc(X(:),Y(:),p2);
wolffd@0: colorbar
wolffd@0: set(gca,'YDir','normal')
wolffd@0: hold on
wolffd@0: plot(mix.centres(:,1),mix.centres(:,2),'b+','MarkerSize',8,'LineWidth',2)
wolffd@0: title('Density p(x|yellow)')
wolffd@0: hold off
wolffd@0: 
wolffd@0: subplot(2,2,3)
wolffd@0: imagesc(X(:),Y(:),p1_x);
wolffd@0: set(gca,'YDir','normal')
wolffd@0: colorbar
wolffd@0: title('Posterior Probability p(red|x)')
wolffd@0: hold on
wolffd@0: plot(mix.centres(:,1),mix.centres(:,2),'b+','MarkerSize',8,'LineWidth',2)
wolffd@0: hold off
wolffd@0: 
wolffd@0: subplot(2,2,4)
wolffd@0: imagesc(X(:),Y(:),p2_x);
wolffd@0: set(gca,'YDir','normal')
wolffd@0: colorbar
wolffd@0: title('Posterior Probability p(yellow|x)')
wolffd@0: hold on
wolffd@0: plot(mix.centres(:,1),mix.centres(:,2),'b+','MarkerSize',8,'LineWidth',2)
wolffd@0: hold off
wolffd@0: 
wolffd@0: % Now set up and train the MLP
wolffd@0: nhidden=6;
wolffd@0: nout=1;
wolffd@0: alpha = 0.2;	% Weight decay
wolffd@0: ncycles = 60;	% Number of training cycles. 
wolffd@0: % Set up MLP network
wolffd@0: net = mlp(2, nhidden, nout, 'logistic', alpha);
wolffd@0: options = zeros(1,18);
wolffd@0: options(1) = 1;                 % Print out error values
wolffd@0: options(14) = ncycles;
wolffd@0: 
wolffd@0: mlpstring = ['We now set up an MLP with ', num2str(nhidden), ...
wolffd@0:     ' hidden units, logistic output and cross'];
wolffd@0: trainstring = ['entropy error function, and train it for ', ...
wolffd@0:     num2str(ncycles), ' cycles using the'];
wolffd@0: wdstring = ['quasi-Newton optimisation algorithm with weight decay of ', ...
wolffd@0:     num2str(alpha), '.'];
wolffd@0: 
wolffd@0: % Force out the figure before training the MLP
wolffd@0: drawnow;
wolffd@0: disp(' ')
wolffd@0: disp('The second figure shows the class conditional densities and posterior')
wolffd@0: disp('probabilities for each class. The blue crosses mark the centres of')
wolffd@0: disp('the three Gaussians.')
wolffd@0: disp(' ')
wolffd@0: disp(mlpstring)
wolffd@0: disp(trainstring)
wolffd@0: disp(wdstring)
wolffd@0: disp(' ')
wolffd@0: disp('Press any key to continue.')
wolffd@0: pause
wolffd@0: 
wolffd@0: % Convert targets to 0-1 encoding
wolffd@0: target=[label==1];
wolffd@0: 
wolffd@0: % Train using quasi-Newton.
wolffd@0: [net] = netopt(net, options, data, target, 'quasinew');
wolffd@0: y = mlpfwd(net, data);
wolffd@0: yg = mlpfwd(net, [X(:) Y(:)]);
wolffd@0: yg = reshape(yg(:,1),size(X));
wolffd@0: 
wolffd@0: fh3 = figure;
wolffd@0: set(fh3, 'Name', 'Network Output');
wolffd@0: whitebg(fh3, 'k')
wolffd@0: subplot(1, 2, 1)
wolffd@0: hold on
wolffd@0: plot(data((label==1),1),data(label==1,2),'r.', 'MarkerSize', PointSize)
wolffd@0: plot(data((label>1),1),data(label>1,2),'y.', 'MarkerSize', PointSize)
wolffd@0: % Bayesian decision boundary
wolffd@0: [cB, hB] = contour(xrange,yrange,p1_x,[0.5 0.5],'b-');
wolffd@0: [cN, hN] = contour(xrange,yrange,yg,[0.5 0.5],'r-');
wolffd@0: set(hB, 'LineWidth', 2);
wolffd@0: set(hN, 'LineWidth', 2);
wolffd@0: Chandles = [hB(1) hN(1)];
wolffd@0: legend(Chandles, 'Bayes', ...
wolffd@0:   'Network', 3);
wolffd@0: 
wolffd@0: axis([x0 x1 y0 y1])
wolffd@0: set(gca,'Box','on','XTick',[],'YTick',[])
wolffd@0: 
wolffd@0: title('Training Data','FontSize',titleSize);
wolffd@0: hold off
wolffd@0: 
wolffd@0: subplot(1, 2, 2)
wolffd@0: imagesc(X(:),Y(:),yg);
wolffd@0: colormap hot
wolffd@0: colorbar
wolffd@0: axis(axis)
wolffd@0: set(gca,'YDir','normal','XTick',[],'YTick',[])
wolffd@0: title('Network Output','FontSize',titleSize)
wolffd@0: 
wolffd@0: clc
wolffd@0: disp('This figure shows the training data with the decision boundary')
wolffd@0: disp('produced by the trained network and the network''s prediction of')
wolffd@0: disp('the posterior probability of the red class.')
wolffd@0: disp(' ')
wolffd@0: disp('Press any key to continue.')
wolffd@0: pause
wolffd@0: 
wolffd@0: % 
wolffd@0: % Now generate and classify a test data set
wolffd@0: % 
wolffd@0: [testdata testlabel] = gmmsamp(mix, n);
wolffd@0: testlab=[testlabel==1 testlabel>1];
wolffd@0: 
wolffd@0: % This is the Bayesian classification
wolffd@0: tpx_j = gmmpost(mix, testdata);
wolffd@0: Bpost = [tpx_j(:,1), tpx_j(:,2)+tpx_j(:,3)];
wolffd@0: [Bcon Brate]=confmat(Bpost, [testlabel==1 testlabel>1]);
wolffd@0: 
wolffd@0: % Compute network classification
wolffd@0: yt = mlpfwd(net, testdata);
wolffd@0: % Convert single output to posteriors for both classes
wolffd@0: testpost = [yt 1-yt];
wolffd@0: [C trate]=confmat(testpost,[testlabel==1 testlabel>1]);
wolffd@0: 
wolffd@0: fh4 = figure;
wolffd@0: set(fh4, 'Name', 'Decision Boundaries');
wolffd@0: whitebg(fh4, 'k');
wolffd@0: hold on
wolffd@0: plot(testdata((testlabel==1),1),testdata((testlabel==1),2),...
wolffd@0:   ClassSymbol1, 'MarkerSize', PointSize)
wolffd@0: plot(testdata((testlabel>1),1),testdata((testlabel>1),2),...
wolffd@0:   ClassSymbol2, 'MarkerSize', PointSize)
wolffd@0: % Bayesian decision boundary
wolffd@0: [cB, hB] = contour(xrange,yrange,p1_x,[0.5 0.5],'b-');
wolffd@0: set(hB, 'LineWidth', 2);
wolffd@0: % Network decision boundary
wolffd@0: [cN, hN] = contour(xrange,yrange,yg,[0.5 0.5],'r-');
wolffd@0: set(hN, 'LineWidth', 2);
wolffd@0: Chandles = [hB(1) hN(1)];
wolffd@0: legend(Chandles, 'Bayes decision boundary', ...
wolffd@0:   'Network decision boundary', -1);
wolffd@0: axis([x0 x1 y0 y1])
wolffd@0: title('Test Data')
wolffd@0: set(gca,'Box','On','Xtick',[],'YTick',[])
wolffd@0: 
wolffd@0: clc
wolffd@0: disp('This figure shows the test data with the decision boundary')
wolffd@0: disp('produced by the trained network and the optimal Bayes rule.')
wolffd@0: disp(' ')
wolffd@0: disp('Press any key to continue.')
wolffd@0: pause
wolffd@0: 
wolffd@0: fh5 = figure;
wolffd@0: set(fh5, 'Name', 'Test Set Performance');
wolffd@0: whitebg(fh5, 'w');
wolffd@0: % Bayes rule performance
wolffd@0: subplot(1,2,1)
wolffd@0: plotmat(Bcon,'b','k',12)
wolffd@0: set(gca,'XTick',[0.5 1.5])
wolffd@0: set(gca,'YTick',[0.5 1.5])
wolffd@0: grid('off')
wolffd@0: set(gca,'XTickLabel',['Red   ' ; 'Yellow'])
wolffd@0: set(gca,'YTickLabel',['Yellow' ; 'Red   '])
wolffd@0: ylabel('True')
wolffd@0: xlabel('Predicted')
wolffd@0: title(['Bayes Confusion Matrix (' num2str(Brate(1)) '%)'])
wolffd@0: 
wolffd@0: % Network performance
wolffd@0: subplot(1,2, 2)
wolffd@0: plotmat(C,'b','k',12)
wolffd@0: set(gca,'XTick',[0.5 1.5])
wolffd@0: set(gca,'YTick',[0.5 1.5])
wolffd@0: grid('off')
wolffd@0: set(gca,'XTickLabel',['Red   ' ; 'Yellow'])
wolffd@0: set(gca,'YTickLabel',['Yellow' ; 'Red   '])
wolffd@0: ylabel('True')
wolffd@0: xlabel('Predicted')
wolffd@0: title(['Network Confusion Matrix (' num2str(trate(1)) '%)'])
wolffd@0: 
wolffd@0: disp('The final figure shows the confusion matrices for the')
wolffd@0: disp('two rules on the test set.')
wolffd@0: disp(' ')
wolffd@0: disp('Press any key to exit.')
wolffd@0: pause
wolffd@0: whitebg(fh1, 'w');
wolffd@0: whitebg(fh2, 'w');
wolffd@0: whitebg(fh3, 'w');
wolffd@0: whitebg(fh4, 'w');
wolffd@0: whitebg(fh5, 'w');
wolffd@0: close(fh1); close(fh2); close(fh3);
wolffd@0: close(fh4); close(fh5);
wolffd@0: clear all;