wolffd@0
|
1 %DEMGMM4 Demonstrate density modelling with a Gaussian mixture model.
|
wolffd@0
|
2 %
|
wolffd@0
|
3 % Description
|
wolffd@0
|
4 % The problem consists of modelling data generated by a mixture of
|
wolffd@0
|
5 % three Gaussians in 2 dimensions with a mixture model using full
|
wolffd@0
|
6 % covariance matrices. The priors are 0.3, 0.5 and 0.2; the centres
|
wolffd@0
|
7 % are (2, 3.5), (0, 0) and (0,2); the variances are (0.16, 0.64) axis
|
wolffd@0
|
8 % aligned, (0.25, 1) rotated by 30 degrees and the identity matrix. The
|
wolffd@0
|
9 % first figure contains a scatter plot of the data.
|
wolffd@0
|
10 %
|
wolffd@0
|
11 % A Gaussian mixture model with three components is trained using EM.
|
wolffd@0
|
12 % The parameter vector is printed before training and after training.
|
wolffd@0
|
13 % The user should press any key to continue at these points. The
|
wolffd@0
|
14 % parameter vector consists of priors (the column), and centres (given
|
wolffd@0
|
15 % as (x, y) pairs as the next two columns). The covariance matrices
|
wolffd@0
|
16 % are printed separately.
|
wolffd@0
|
17 %
|
wolffd@0
|
18 % The second figure is a 3 dimensional view of the density function,
|
wolffd@0
|
19 % while the third shows the axes of the 1-standard deviation ellipses
|
wolffd@0
|
20 % for the three components of the mixture model.
|
wolffd@0
|
21 %
|
wolffd@0
|
22 % See also
|
wolffd@0
|
23 % GMM, GMMINIT, GMMEM, GMMPROB, GMMUNPAK
|
wolffd@0
|
24 %
|
wolffd@0
|
25
|
wolffd@0
|
26 % Copyright (c) Ian T Nabney (1996-2001)
|
wolffd@0
|
27
|
wolffd@0
|
28
|
wolffd@0
|
29 % Generate the data
|
wolffd@0
|
30
|
wolffd@0
|
31 ndata = 500;
|
wolffd@0
|
32
|
wolffd@0
|
33 % Fix the seeds for reproducible results
|
wolffd@0
|
34 randn('state', 42);
|
wolffd@0
|
35 rand('state', 42);
|
wolffd@0
|
36 data = randn(ndata, 2);
|
wolffd@0
|
37 prior = [0.3 0.5 0.2];
|
wolffd@0
|
38 % Mixture model swaps clusters 1 and 3
|
wolffd@0
|
39 datap = [0.2 0.5 0.3];
|
wolffd@0
|
40 datac = [0 2; 0 0; 2 3.5];
|
wolffd@0
|
41 datacov = repmat(eye(2), [1 1 3]);
|
wolffd@0
|
42 data1 = data(1:prior(1)*ndata,:);
|
wolffd@0
|
43 data2 = data(prior(1)*ndata+1:(prior(2)+prior(1))*ndata, :);
|
wolffd@0
|
44 data3 = data((prior(1)+prior(2))*ndata +1:ndata, :);
|
wolffd@0
|
45
|
wolffd@0
|
46 % First cluster has axis aligned variance and centre (2, 3.5)
|
wolffd@0
|
47 data1(:, 1) = data1(:, 1)*0.4 + 2.0;
|
wolffd@0
|
48 data1(:, 2) = data1(:, 2)*0.8 + 3.5;
|
wolffd@0
|
49 datacov(:, :, 3) = [0.4*0.4 0; 0 0.8*0.8];
|
wolffd@0
|
50
|
wolffd@0
|
51 % Second cluster has variance axes rotated by 30 degrees and centre (0, 0)
|
wolffd@0
|
52 rotn = [cos(pi/6) -sin(pi/6); sin(pi/6) cos(pi/6)];
|
wolffd@0
|
53 data2(:,1) = data2(:, 1)*0.5;
|
wolffd@0
|
54 data2 = data2*rotn;
|
wolffd@0
|
55 datacov(:, :, 2) = rotn' * [0.25 0; 0 1] * rotn;
|
wolffd@0
|
56
|
wolffd@0
|
57 % Third cluster is at (0,2)
|
wolffd@0
|
58 data3 = data3 + repmat([0 2], prior(3)*ndata, 1);
|
wolffd@0
|
59
|
wolffd@0
|
60 % Put the dataset together again
|
wolffd@0
|
61 data = [data1; data2; data3];
|
wolffd@0
|
62
|
wolffd@0
|
63 clc
|
wolffd@0
|
64 disp('This demonstration illustrates the use of a Gaussian mixture model')
|
wolffd@0
|
65 disp('with full covariance matrices to approximate the unconditional ')
|
wolffd@0
|
66 disp('probability density of data in a two-dimensional space.')
|
wolffd@0
|
67 disp('We begin by generating the data from a mixture of three Gaussians and')
|
wolffd@0
|
68 disp('plotting it.')
|
wolffd@0
|
69 disp(' ')
|
wolffd@0
|
70 disp('The first cluster has axis aligned variance and centre (0, 2).')
|
wolffd@0
|
71 disp('The second cluster has variance axes rotated by 30 degrees')
|
wolffd@0
|
72 disp('and centre (0, 0). The third cluster has unit variance and centre')
|
wolffd@0
|
73 disp('(2, 3.5).')
|
wolffd@0
|
74 disp(' ')
|
wolffd@0
|
75 disp('Press any key to continue.')
|
wolffd@0
|
76 pause
|
wolffd@0
|
77
|
wolffd@0
|
78 fh1 = figure;
|
wolffd@0
|
79 plot(data(:, 1), data(:, 2), 'o')
|
wolffd@0
|
80 set(gca, 'Box', 'on')
|
wolffd@0
|
81
|
wolffd@0
|
82 % Set up mixture model
|
wolffd@0
|
83 ncentres = 3;
|
wolffd@0
|
84 input_dim = 2;
|
wolffd@0
|
85 mix = gmm(input_dim, ncentres, 'full');
|
wolffd@0
|
86
|
wolffd@0
|
87 % Initialise the model parameters from the data
|
wolffd@0
|
88 options = foptions;
|
wolffd@0
|
89 options(14) = 5; % Just use 5 iterations of k-means in initialisation
|
wolffd@0
|
90 mix = gmminit(mix, data, options);
|
wolffd@0
|
91
|
wolffd@0
|
92 % Print out model
|
wolffd@0
|
93 clc
|
wolffd@0
|
94 disp('The mixture model has three components and full covariance')
|
wolffd@0
|
95 disp('matrices. The model parameters after initialisation using the')
|
wolffd@0
|
96 disp('k-means algorithm are as follows')
|
wolffd@0
|
97 disp(' Priors Centres')
|
wolffd@0
|
98 disp([mix.priors' mix.centres])
|
wolffd@0
|
99 disp('Covariance matrices are')
|
wolffd@0
|
100 disp(mix.covars)
|
wolffd@0
|
101 disp('Press any key to continue.')
|
wolffd@0
|
102 pause
|
wolffd@0
|
103
|
wolffd@0
|
104 % Set up vector of options for EM trainer
|
wolffd@0
|
105 options = zeros(1, 18);
|
wolffd@0
|
106 options(1) = 1; % Prints out error values.
|
wolffd@0
|
107 options(14) = 50; % Number of iterations.
|
wolffd@0
|
108
|
wolffd@0
|
109 disp('We now train the model using the EM algorithm for 50 iterations.')
|
wolffd@0
|
110 disp(' ')
|
wolffd@0
|
111 disp('Press any key to continue.')
|
wolffd@0
|
112 pause
|
wolffd@0
|
113 [mix, options, errlog] = gmmem(mix, data, options);
|
wolffd@0
|
114
|
wolffd@0
|
115 % Print out model
|
wolffd@0
|
116 disp(' ')
|
wolffd@0
|
117 disp('The trained model has priors and centres:')
|
wolffd@0
|
118 disp(' Priors Centres')
|
wolffd@0
|
119 disp([mix.priors' mix.centres])
|
wolffd@0
|
120 disp('The data generator has priors and centres')
|
wolffd@0
|
121 disp(' Priors Centres')
|
wolffd@0
|
122 disp([datap' datac])
|
wolffd@0
|
123 disp('Model covariance matrices are')
|
wolffd@0
|
124 disp(mix.covars(:, :, 1))
|
wolffd@0
|
125 disp(mix.covars(:, :, 2))
|
wolffd@0
|
126 disp(mix.covars(:, :, 3))
|
wolffd@0
|
127 disp('Data generator covariance matrices are')
|
wolffd@0
|
128 disp(datacov(:, :, 1))
|
wolffd@0
|
129 disp(datacov(:, :, 2))
|
wolffd@0
|
130 disp(datacov(:, :, 3))
|
wolffd@0
|
131 disp('Note the close correspondence between these parameters and those')
|
wolffd@0
|
132 disp('of the distribution used to generate the data. The match for')
|
wolffd@0
|
133 disp('covariance matrices is not that close, but would be improved with')
|
wolffd@0
|
134 disp('more iterations of the training algorithm.')
|
wolffd@0
|
135 disp(' ')
|
wolffd@0
|
136 disp('Press any key to continue.')
|
wolffd@0
|
137 pause
|
wolffd@0
|
138
|
wolffd@0
|
139 clc
|
wolffd@0
|
140 disp('We now plot the density given by the mixture model as a surface plot.')
|
wolffd@0
|
141 disp(' ')
|
wolffd@0
|
142 disp('Press any key to continue.')
|
wolffd@0
|
143 pause
|
wolffd@0
|
144
|
wolffd@0
|
145 % Plot the result
|
wolffd@0
|
146 x = -4.0:0.2:5.0;
|
wolffd@0
|
147 y = -4.0:0.2:5.0;
|
wolffd@0
|
148 [X, Y] = meshgrid(x,y);
|
wolffd@0
|
149 X = X(:);
|
wolffd@0
|
150 Y = Y(:);
|
wolffd@0
|
151 grid = [X Y];
|
wolffd@0
|
152 Z = gmmprob(mix, grid);
|
wolffd@0
|
153 Z = reshape(Z, length(x), length(y));
|
wolffd@0
|
154 c = mesh(x, y, Z);
|
wolffd@0
|
155 hold on
|
wolffd@0
|
156 title('Surface plot of probability density')
|
wolffd@0
|
157 hold off
|
wolffd@0
|
158 drawnow
|
wolffd@0
|
159
|
wolffd@0
|
160 clc
|
wolffd@0
|
161 disp('The final plot shows the centres and widths, given by one standard')
|
wolffd@0
|
162 disp('deviation, of the three components of the mixture model. The axes')
|
wolffd@0
|
163 disp('of the ellipses of constant density are shown.')
|
wolffd@0
|
164 disp(' ')
|
wolffd@0
|
165 disp('Press any key to continue.')
|
wolffd@0
|
166 pause
|
wolffd@0
|
167
|
wolffd@0
|
168 % Try to calculate a sensible position for the second figure, below the first
|
wolffd@0
|
169 fig1_pos = get(fh1, 'Position');
|
wolffd@0
|
170 fig2_pos = fig1_pos;
|
wolffd@0
|
171 fig2_pos(2) = fig2_pos(2) - fig1_pos(4) - 30;
|
wolffd@0
|
172 fh2 = figure('Position', fig2_pos);
|
wolffd@0
|
173
|
wolffd@0
|
174 h3 = plot(data(:, 1), data(:, 2), 'bo');
|
wolffd@0
|
175 axis equal;
|
wolffd@0
|
176 hold on
|
wolffd@0
|
177 title('Plot of data and covariances')
|
wolffd@0
|
178 for i = 1:ncentres
|
wolffd@0
|
179 [v,d] = eig(mix.covars(:,:,i));
|
wolffd@0
|
180 for j = 1:2
|
wolffd@0
|
181 % Ensure that eigenvector has unit length
|
wolffd@0
|
182 v(:,j) = v(:,j)/norm(v(:,j));
|
wolffd@0
|
183 start=mix.centres(i,:)-sqrt(d(j,j))*(v(:,j)');
|
wolffd@0
|
184 endpt=mix.centres(i,:)+sqrt(d(j,j))*(v(:,j)');
|
wolffd@0
|
185 linex = [start(1) endpt(1)];
|
wolffd@0
|
186 liney = [start(2) endpt(2)];
|
wolffd@0
|
187 line(linex, liney, 'Color', 'k', 'LineWidth', 3)
|
wolffd@0
|
188 end
|
wolffd@0
|
189 % Plot ellipses of one standard deviation
|
wolffd@0
|
190 theta = 0:0.02:2*pi;
|
wolffd@0
|
191 x = sqrt(d(1,1))*cos(theta);
|
wolffd@0
|
192 y = sqrt(d(2,2))*sin(theta);
|
wolffd@0
|
193 % Rotate ellipse axes
|
wolffd@0
|
194 ellipse = (v*([x; y]))';
|
wolffd@0
|
195 % Adjust centre
|
wolffd@0
|
196 ellipse = ellipse + ones(length(theta), 1)*mix.centres(i,:);
|
wolffd@0
|
197 plot(ellipse(:,1), ellipse(:,2), 'r-');
|
wolffd@0
|
198 end
|
wolffd@0
|
199 hold off
|
wolffd@0
|
200
|
wolffd@0
|
201 disp('Note how the data cluster positions and widths are captured by')
|
wolffd@0
|
202 disp('the mixture model.')
|
wolffd@0
|
203 disp(' ')
|
wolffd@0
|
204 disp('Press any key to end.')
|
wolffd@0
|
205 pause
|
wolffd@0
|
206
|
wolffd@0
|
207 close(fh1);
|
wolffd@0
|
208 close(fh2);
|
wolffd@0
|
209 clear all;
|
wolffd@0
|
210
|