wolffd@0: %DEMGTM2 Demonstrate GTM for visualisation. wolffd@0: % wolffd@0: % Description wolffd@0: % This script demonstrates the use of a GTM with a two-dimensional wolffd@0: % latent space to visualise data in a higher dimensional space. This is wolffd@0: % done through the use of the mean responsibility and magnification wolffd@0: % factors. wolffd@0: % wolffd@0: % See also wolffd@0: % DEMGTM1, GTM, GTMEM, GTMPOST wolffd@0: % wolffd@0: wolffd@0: % Copyright (c) Ian T Nabney (1996-2001) wolffd@0: wolffd@0: wolffd@0: % Fix seeds for reproducible results wolffd@0: rand('state', 420); wolffd@0: randn('state', 420); wolffd@0: wolffd@0: ndata = 300 wolffd@0: clc; wolffd@0: disp('This demonstration shows how a Generative Topographic Mapping') wolffd@0: disp('can be used to model and visualise high dimensional data. The') wolffd@0: disp('data is generated from a mixture of two spherical Gaussians in') wolffd@0: dstring = ['four dimensional space. ', num2str(ndata), ... wolffd@0: ' data points are generated.']; wolffd@0: disp(dstring); wolffd@0: disp(' '); wolffd@0: disp('Press any key to continue.') wolffd@0: pause wolffd@0: % Create data wolffd@0: data_dim = 4; wolffd@0: latent_dim = 2; wolffd@0: mix = gmm(data_dim, 2, 'spherical'); wolffd@0: mix.centres = [1 1 1 1; 0 0 0 0]; wolffd@0: mix.priors = [0.5 0.5]; wolffd@0: mix.covars = [0.1 0.1]; wolffd@0: wolffd@0: [data, labels] = gmmsamp(mix, ndata); wolffd@0: wolffd@0: latent_shape = [15 15]; % Number of latent points in each dimension wolffd@0: nlatent = prod(latent_shape); % Number of latent points wolffd@0: num_rbf_centres = 16; wolffd@0: wolffd@0: clc; wolffd@0: dstring = ['Next we generate and initialise the GTM. There are ',... wolffd@0: num2str(nlatent), ' latent points']; wolffd@0: disp(dstring); wolffd@0: dstring = ['arranged in a square of ', num2str(latent_shape(1)), ... wolffd@0: ' points on a side. There are ', num2str(num_rbf_centres), ... wolffd@0: ' centres in the']; wolffd@0: disp(dstring); wolffd@0: disp('RBF model, which has Gaussian activation functions.') wolffd@0: disp(' ') wolffd@0: disp('Once the model is created, the latent data sample') wolffd@0: disp('and RBF centres are placed uniformly in the square [-1 1 -1 1].') wolffd@0: disp('The output weights of the RBF are computed to map the latent'); wolffd@0: disp('space to the two dimensional PCA subspace of the data.'); wolffd@0: disp(' ') wolffd@0: disp('Press any key to continue.'); wolffd@0: pause; wolffd@0: wolffd@0: % Create and initialise GTM model wolffd@0: net = gtm(latent_dim, nlatent, data_dim, num_rbf_centres, ... wolffd@0: 'gaussian', 0.1); wolffd@0: wolffd@0: options = foptions; wolffd@0: options(1) = -1; wolffd@0: options(7) = 1; % Set width factor of RBF wolffd@0: net = gtminit(net, options, data, 'regular', latent_shape, [4 4]); wolffd@0: wolffd@0: options = foptions; wolffd@0: options(14) = 30; wolffd@0: options(1) = 1; wolffd@0: wolffd@0: clc; wolffd@0: dstring = ['We now train the model with ', num2str(options(14)), ... wolffd@0: ' iterations of']; wolffd@0: disp(dstring) wolffd@0: disp('the EM algorithm for the GTM.') wolffd@0: disp(' ') wolffd@0: disp('Press any key to continue.') wolffd@0: pause; wolffd@0: wolffd@0: [net, options] = gtmem(net, data, options); wolffd@0: wolffd@0: disp(' ') wolffd@0: disp('Press any key to continue.') wolffd@0: pause; wolffd@0: wolffd@0: clc; wolffd@0: disp('We now visualise the data by plotting, for each data point,'); wolffd@0: disp('the posterior mean and mode (in latent space). These give'); wolffd@0: disp('a summary of the entire posterior distribution in latent space.') wolffd@0: disp('The corresponding values are joined by a line to aid the') wolffd@0: disp('interpretation.') wolffd@0: disp(' ') wolffd@0: disp('Press any key to continue.'); wolffd@0: pause; wolffd@0: % Plot posterior means wolffd@0: means = gtmlmean(net, data); wolffd@0: modes = gtmlmode(net, data); wolffd@0: PointSize = 12; wolffd@0: ClassSymbol1 = 'r.'; wolffd@0: ClassSymbol2 = 'b.'; wolffd@0: fh1 = figure; wolffd@0: hold on; wolffd@0: title('Visualisation in latent space') wolffd@0: plot(means((labels==1),1), means(labels==1,2), ... wolffd@0: ClassSymbol1, 'MarkerSize', PointSize) wolffd@0: plot(means((labels>1),1),means(labels>1,2),... wolffd@0: ClassSymbol2, 'MarkerSize', PointSize) wolffd@0: wolffd@0: ClassSymbol1 = 'ro'; wolffd@0: ClassSymbol2 = 'bo'; wolffd@0: plot(modes(labels==1,1), modes(labels==1,2), ... wolffd@0: ClassSymbol1) wolffd@0: plot(modes(labels>1,1),modes(labels>1,2),... wolffd@0: ClassSymbol2) wolffd@0: wolffd@0: % Join up means and modes wolffd@0: for n = 1:ndata wolffd@0: plot([means(n,1); modes(n,1)], [means(n,2); modes(n,2)], 'g-') wolffd@0: end wolffd@0: % Place legend outside data plot wolffd@0: legend('Mean (class 1)', 'Mean (class 2)', 'Mode (class 1)',... wolffd@0: 'Mode (class 2)', -1); wolffd@0: wolffd@0: % Display posterior for a data point wolffd@0: % Choose an interesting one with a large distance between mean and wolffd@0: % mode wolffd@0: [distance, point] = max(sum((means-modes).^2, 2)); wolffd@0: resp = gtmpost(net, data(point, :)); wolffd@0: wolffd@0: disp(' ') wolffd@0: disp('For more detailed information, the full posterior distribution') wolffd@0: disp('(or responsibility) can be plotted in latent space for a') wolffd@0: disp('single data point. This point has been chosen as the one') wolffd@0: disp('with the largest distance between mean and mode.') wolffd@0: disp(' ') wolffd@0: disp('Press any key to continue.'); wolffd@0: pause; wolffd@0: wolffd@0: R = reshape(resp, fliplr(latent_shape)); wolffd@0: XL = reshape(net.X(:,1), fliplr(latent_shape)); wolffd@0: YL = reshape(net.X(:,2), fliplr(latent_shape)); wolffd@0: wolffd@0: fh2 = figure; wolffd@0: imagesc(net.X(:, 1), net.X(:,2), R); wolffd@0: hold on; wolffd@0: tstr = ['Responsibility for point ', num2str(point)]; wolffd@0: title(tstr); wolffd@0: set(gca,'YDir','normal') wolffd@0: colormap(hot); wolffd@0: colorbar wolffd@0: disp(' '); wolffd@0: disp('Press any key to continue.') wolffd@0: pause wolffd@0: wolffd@0: clc wolffd@0: disp('Finally, we visualise the data with the posterior means in') wolffd@0: disp('latent space as before, but superimpose the magnification') wolffd@0: disp('factors to highlight the separation between clusters.') wolffd@0: disp(' ') wolffd@0: disp('Note the large magnitude factors down the centre of the') wolffd@0: disp('graph, showing that the manifold is stretched more in') wolffd@0: disp('this region than within each of the two clusters.') wolffd@0: ClassSymbol1 = 'g.'; wolffd@0: ClassSymbol2 = 'b.'; wolffd@0: wolffd@0: fh3 = figure; wolffd@0: mags = gtmmag(net, net.X); wolffd@0: % Reshape into grid form wolffd@0: Mags = reshape(mags, fliplr(latent_shape)); wolffd@0: imagesc(net.X(:, 1), net.X(:,2), Mags); wolffd@0: hold on wolffd@0: title('Dataset visualisation with magnification factors') wolffd@0: set(gca,'YDir','normal') wolffd@0: colormap(hot); wolffd@0: colorbar wolffd@0: hold on; % Else the magnification plot disappears wolffd@0: plot(means(labels==1,1), means(labels==1,2), ... wolffd@0: ClassSymbol1, 'MarkerSize', PointSize) wolffd@0: plot(means(labels>1,1), means(labels>1,2), ... wolffd@0: ClassSymbol2, 'MarkerSize', PointSize) wolffd@0: wolffd@0: disp(' ') wolffd@0: disp('Press any key to exit.') wolffd@0: pause wolffd@0: wolffd@0: close(fh1); wolffd@0: close(fh2); wolffd@0: close(fh3); wolffd@0: clear all;