wolffd@0
|
1 %DEMKMEAN Demonstrate simple clustering model trained with K-means.
|
wolffd@0
|
2 %
|
wolffd@0
|
3 % Description
|
wolffd@0
|
4 % The problem consists of data in a two-dimensional space. The data is
|
wolffd@0
|
5 % drawn from three spherical Gaussian distributions with priors 0.3,
|
wolffd@0
|
6 % 0.5 and 0.2; centres (2, 3.5), (0, 0) and (0,2); and standard
|
wolffd@0
|
7 % deviations 0.2, 0.5 and 1.0. The first figure contains a scatter plot
|
wolffd@0
|
8 % of the data. The data is the same as in DEMGMM1.
|
wolffd@0
|
9 %
|
wolffd@0
|
10 % A cluster model with three components is trained using the batch K-
|
wolffd@0
|
11 % means algorithm. The matrix of centres is printed after training. The
|
wolffd@0
|
12 % second figure shows the data labelled with a colour derived from the
|
wolffd@0
|
13 % corresponding cluster
|
wolffd@0
|
14 %
|
wolffd@0
|
15 % See also
|
wolffd@0
|
16 % DEM2DDAT, DEMGMM1, KNN1, KMEANS
|
wolffd@0
|
17 %
|
wolffd@0
|
18
|
wolffd@0
|
19 % Copyright (c) Ian T Nabney (1996-2001)
|
wolffd@0
|
20
|
wolffd@0
|
21 % Generate the data, fixing seeds for reproducible results
|
wolffd@0
|
22 ndata = 250;
|
wolffd@0
|
23 randn('state', 42);
|
wolffd@0
|
24 rand('state', 42);
|
wolffd@0
|
25 data = dem2ddat(ndata);
|
wolffd@0
|
26
|
wolffd@0
|
27 % Randomise data order
|
wolffd@0
|
28 data = data(randperm(ndata),:);
|
wolffd@0
|
29
|
wolffd@0
|
30 clc
|
wolffd@0
|
31 disp('This demonstration illustrates the use of a cluster model to')
|
wolffd@0
|
32 disp('find centres that reflect the distribution of data points.')
|
wolffd@0
|
33 disp('We begin by generating the data from a mixture of three Gaussians')
|
wolffd@0
|
34 disp('in two-dimensional space and plotting it.')
|
wolffd@0
|
35 disp(' ')
|
wolffd@0
|
36 disp('Press any key to continue.')
|
wolffd@0
|
37 pause
|
wolffd@0
|
38
|
wolffd@0
|
39 fh1 = figure;
|
wolffd@0
|
40 plot(data(:, 1), data(:, 2), 'o')
|
wolffd@0
|
41 set(gca, 'Box', 'on')
|
wolffd@0
|
42 title('Data')
|
wolffd@0
|
43
|
wolffd@0
|
44 % Set up cluster model
|
wolffd@0
|
45 ncentres = 3;
|
wolffd@0
|
46 centres = zeros(ncentres, 2);
|
wolffd@0
|
47
|
wolffd@0
|
48 % Set up vector of options for kmeans trainer
|
wolffd@0
|
49 options = foptions;
|
wolffd@0
|
50 options(1) = 1; % Prints out error values.
|
wolffd@0
|
51 options(5) = 1;
|
wolffd@0
|
52 options(14) = 10; % Number of iterations.
|
wolffd@0
|
53
|
wolffd@0
|
54 clc
|
wolffd@0
|
55 disp('The model is chosen to have three centres, which are initialised')
|
wolffd@0
|
56 disp('at randomly selected data points. We now train the model using')
|
wolffd@0
|
57 disp('the batch K-means algorithm with a maximum of 10 iterations and')
|
wolffd@0
|
58 disp('stopping tolerance of 1e-4.')
|
wolffd@0
|
59 disp(' ')
|
wolffd@0
|
60 disp('Press any key to continue.')
|
wolffd@0
|
61 pause
|
wolffd@0
|
62
|
wolffd@0
|
63 % Train the centres from the data
|
wolffd@0
|
64 [centres, options, post] = kmeans(centres, data, options);
|
wolffd@0
|
65
|
wolffd@0
|
66 % Print out model
|
wolffd@0
|
67 disp(' ')
|
wolffd@0
|
68 disp('Note that training has terminated before 10 iterations as there')
|
wolffd@0
|
69 disp('has been no change in the centres or error function.')
|
wolffd@0
|
70 disp(' ')
|
wolffd@0
|
71 disp('The trained model has centres:')
|
wolffd@0
|
72 disp(centres);
|
wolffd@0
|
73 disp('Press any key to continue.')
|
wolffd@0
|
74 pause
|
wolffd@0
|
75
|
wolffd@0
|
76 clc
|
wolffd@0
|
77 disp('We now plot each data point coloured according to its classification')
|
wolffd@0
|
78 disp('given by the nearest cluster centre. The cluster centres are denoted')
|
wolffd@0
|
79 disp('by black crosses.')
|
wolffd@0
|
80
|
wolffd@0
|
81 % Plot the result
|
wolffd@0
|
82 fh2 = figure;
|
wolffd@0
|
83
|
wolffd@0
|
84 hold on
|
wolffd@0
|
85 colours = ['b.'; 'r.'; 'g.'];
|
wolffd@0
|
86
|
wolffd@0
|
87 [tempi, tempj] = find(post);
|
wolffd@0
|
88 hold on
|
wolffd@0
|
89 for i = 1:3
|
wolffd@0
|
90 % Select data points closest to ith centre
|
wolffd@0
|
91 thisX = data(tempi(tempj == i), 1);
|
wolffd@0
|
92 thisY = data(tempi(tempj == i), 2);
|
wolffd@0
|
93 hp(i) = plot(thisX, thisY, colours(i,:));
|
wolffd@0
|
94 set(hp(i), 'MarkerSize', 12);
|
wolffd@0
|
95 end
|
wolffd@0
|
96 set(gca, 'Box', 'on')
|
wolffd@0
|
97 legend('Class 1', 'Class 2', 'Class 3', 2)
|
wolffd@0
|
98 hold on
|
wolffd@0
|
99 plot(centres(:, 1), centres(:,2), 'k+', 'LineWidth', 2, ...
|
wolffd@0
|
100 'MarkerSize', 8)
|
wolffd@0
|
101 title('Centres and data labels')
|
wolffd@0
|
102 hold off
|
wolffd@0
|
103
|
wolffd@0
|
104 disp(' ')
|
wolffd@0
|
105 disp('Press any key to end.')
|
wolffd@0
|
106 pause
|
wolffd@0
|
107
|
wolffd@0
|
108 close(fh1);
|
wolffd@0
|
109 close(fh2);
|
wolffd@0
|
110 clear all;
|
wolffd@0
|
111
|