wolffd@0
|
1 function P = sammon(D, P, varargin)
|
wolffd@0
|
2
|
wolffd@0
|
3 %SAMMON Computes Sammon's mapping of a data set.
|
wolffd@0
|
4 %
|
wolffd@0
|
5 % P = sammon(D, P, [value], [mode], [alpha], [Mdist])
|
wolffd@0
|
6 %
|
wolffd@0
|
7 % P = sammon(D,2); % projection to 2-dim space
|
wolffd@0
|
8 % P = sammon(sMap,3); % projects the codebook vectors
|
wolffd@0
|
9 % P = sammon(sMap,3,[],[],[],Md) % uses distance matrix Md
|
wolffd@0
|
10 % som_grid(sMap,'Coord',P) % visualization of map projection
|
wolffd@0
|
11 %
|
wolffd@0
|
12 % Input and output arguments ([]'s are optional):
|
wolffd@0
|
13 % D (matrix) size dlen x dim, data to be projected
|
wolffd@0
|
14 % (struct) data or map struct
|
wolffd@0
|
15 % P (scalar) output dimension
|
wolffd@0
|
16 % (matrix) size dlen x odim, initial projection matrix
|
wolffd@0
|
17 % [value] (scalar) all different modes (the next argument) require
|
wolffd@0
|
18 % a value, default = 100
|
wolffd@0
|
19 % [mode] (string) 'steps' or 'errlimit' or 'errchange' or 'seconds',
|
wolffd@0
|
20 % see below, default is 'steps'
|
wolffd@0
|
21 % [alpha] (scalar) iteration step size, default = 0.2
|
wolffd@0
|
22 % [Dist] (matrix) pairwise distance matrix, size dlen x dlen.
|
wolffd@0
|
23 % If the distances in the input space should
|
wolffd@0
|
24 % be calculated otherwise than as euclidian
|
wolffd@0
|
25 % distances, the distance from each vector
|
wolffd@0
|
26 % to each other vector can be given here,
|
wolffd@0
|
27 % size dlen x dlen. For example PDIST
|
wolffd@0
|
28 % function can be used to calculate the
|
wolffd@0
|
29 % distances: Dist = squareform(pdist(D,'mahal'));
|
wolffd@0
|
30 %
|
wolffd@0
|
31 % P (matrix) size dlen x odim, the projections
|
wolffd@0
|
32 %
|
wolffd@0
|
33 % The output dimension must be 2 or higher but (naturally) lower
|
wolffd@0
|
34 % than data set dimension.
|
wolffd@0
|
35 %
|
wolffd@0
|
36 % The mode argument determines the end condition for iteration. If
|
wolffd@0
|
37 % the mode argument is used, also the value argument has to be
|
wolffd@0
|
38 % specified. Different mode possibilities are:
|
wolffd@0
|
39 % 'steps' the iteration is terminated when it is run <value>
|
wolffd@0
|
40 % 'errlimit' steps, the iteration is terminated when projection error
|
wolffd@0
|
41 % is lower than <value>,
|
wolffd@0
|
42 % 'errchange' the iteration is terminated when change between
|
wolffd@0
|
43 % projection error on two successive iteration rounds
|
wolffd@0
|
44 % is less than <value> percent of total error, and
|
wolffd@0
|
45 % 'seconds' the iteration is terminated after <value> seconds
|
wolffd@0
|
46 % of iteration.
|
wolffd@0
|
47 %
|
wolffd@0
|
48 % See also CCA, PCAPROJ, SOM_GRID.
|
wolffd@0
|
49
|
wolffd@0
|
50 % Reference: Sammon, J.W. Jr., "A nonlinear mapping for data
|
wolffd@0
|
51 % structure analysis", IEEE Transactions on Computers, vol. C-18,
|
wolffd@0
|
52 % no. 5, 1969, pp. 401-409.
|
wolffd@0
|
53
|
wolffd@0
|
54 % Contributed to SOM Toolbox vs2, February 2nd, 2000 by Juha Vesanto
|
wolffd@0
|
55 % Copyright (c) by Juha Vesanto
|
wolffd@0
|
56 % http://www.cis.hut.fi/projects/somtoolbox/
|
wolffd@0
|
57
|
wolffd@0
|
58 % juuso 040100
|
wolffd@0
|
59
|
wolffd@0
|
60 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
61 %% check arguments
|
wolffd@0
|
62
|
wolffd@0
|
63 error(nargchk(2, 6, nargin)); % check no. of input arguments is correct
|
wolffd@0
|
64
|
wolffd@0
|
65 % input data
|
wolffd@0
|
66 if isstruct(D),
|
wolffd@0
|
67 if isfield(D, 'data'), D = D.data; % data struct
|
wolffd@0
|
68 elseif isfield(D, 'codebook'), D = D.codebook; % map struct
|
wolffd@0
|
69 else error('Invalid structure');
|
wolffd@0
|
70 end
|
wolffd@0
|
71 end
|
wolffd@0
|
72 if any(isnan(D(:))),
|
wolffd@0
|
73 error('Cannot make Sammon''s projection for data with unknown components')
|
wolffd@0
|
74 end
|
wolffd@0
|
75
|
wolffd@0
|
76 % compute data dimensions
|
wolffd@0
|
77 orig_si = size(D);
|
wolffd@0
|
78 dim = orig_si(end);
|
wolffd@0
|
79 noc = prod(orig_si)/dim;
|
wolffd@0
|
80 if length(orig_si)>2, D = reshape(D,[noc dim]); end
|
wolffd@0
|
81
|
wolffd@0
|
82 % output dimension / initial projection matrix
|
wolffd@0
|
83 if prod(size(P))==1,
|
wolffd@0
|
84 odim = P;
|
wolffd@0
|
85 P = rand(noc,odim)-0.5;
|
wolffd@0
|
86 else
|
wolffd@0
|
87 si = size(P);
|
wolffd@0
|
88 odim = si(end);
|
wolffd@0
|
89 if prod(si) ~= noc*odim,
|
wolffd@0
|
90 error('Initial projection matrix size does not match data size');
|
wolffd@0
|
91 end
|
wolffd@0
|
92 if length(si)>2, P = reshape(P,[noc odim]); end
|
wolffd@0
|
93 inds = find(isnan(P));
|
wolffd@0
|
94 if length(inds), P(inds) = rand(size(inds)); end
|
wolffd@0
|
95 end
|
wolffd@0
|
96 if odim > dim | odim < 2,
|
wolffd@0
|
97 error('Output dimension must be within [2, dimension of data]');
|
wolffd@0
|
98 end
|
wolffd@0
|
99
|
wolffd@0
|
100 % determine operating mode
|
wolffd@0
|
101 if nargin < 3 | isempty(varargin{1}) | isnan(varargin{1}), value=100;
|
wolffd@0
|
102 else value = varargin{1};
|
wolffd@0
|
103 end
|
wolffd@0
|
104
|
wolffd@0
|
105 if nargin < 4 | isempty(varargin{2}) | isnan(varargin{2}), mode='steps';
|
wolffd@0
|
106 else mode = varargin{2};
|
wolffd@0
|
107 end
|
wolffd@0
|
108 switch mode,
|
wolffd@0
|
109 case 'steps', runlen = value;
|
wolffd@0
|
110 case 'errlimit', errlimit = value;
|
wolffd@0
|
111 case 'errchange', errchange = value; e_prev = 0;
|
wolffd@0
|
112 case 'seconds', endtime = value;
|
wolffd@0
|
113 otherwise, error(['Illegal mode: ' mode]);
|
wolffd@0
|
114 end
|
wolffd@0
|
115
|
wolffd@0
|
116 % iteration step size
|
wolffd@0
|
117 if nargin > 4, alpha = varargin{3}; else alpha = NaN; end
|
wolffd@0
|
118 if isempty(alpha) | isnan(alpha), alpha = 0.2; end
|
wolffd@0
|
119
|
wolffd@0
|
120 % mutual distances
|
wolffd@0
|
121 if nargin > 5, Mdist = varargin{4}; else Mdist = []; end
|
wolffd@0
|
122
|
wolffd@0
|
123 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
124 %% initialization
|
wolffd@0
|
125
|
wolffd@0
|
126 % these are used quite frequently
|
wolffd@0
|
127 noc_x_1 = ones(noc, 1);
|
wolffd@0
|
128 odim_x_1 = ones(odim,1);
|
wolffd@0
|
129
|
wolffd@0
|
130 % compute mutual distances between vectors
|
wolffd@0
|
131 if isempty(Mdist) | all(isnan(Mdist(:))),
|
wolffd@0
|
132 fprintf(2, 'computing mutual distances\r');
|
wolffd@0
|
133 dim_x_1 = ones(dim,1);
|
wolffd@0
|
134 for i = 1:noc,
|
wolffd@0
|
135 x = D(i,:);
|
wolffd@0
|
136 Diff = D - x(noc_x_1,:);
|
wolffd@0
|
137 N = isnan(Diff);
|
wolffd@0
|
138 Diff(find(N)) = 0;
|
wolffd@0
|
139 Mdist(:,i) = sqrt((Diff.^2)*dim_x_1);
|
wolffd@0
|
140 N = find(sum(N')==dim); %mutual distance unknown
|
wolffd@0
|
141 if ~isempty(N), Mdist(N,i) = NaN; end
|
wolffd@0
|
142 end
|
wolffd@0
|
143 else
|
wolffd@0
|
144 % if the distance matrix is output from PDIST function
|
wolffd@0
|
145 if size(Mdist,1)==1, Mdist = squareform(Mdist); end
|
wolffd@0
|
146 if size(Mdist,1)~=noc,
|
wolffd@0
|
147 error('Mutual distance matrix size and data set size do not match');
|
wolffd@0
|
148 end
|
wolffd@0
|
149 end
|
wolffd@0
|
150
|
wolffd@0
|
151 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
152 %% action
|
wolffd@0
|
153
|
wolffd@0
|
154 if strcmp(mode, 'seconds'), tic; end;
|
wolffd@0
|
155 fprintf(2, 'iterating \r');
|
wolffd@0
|
156
|
wolffd@0
|
157 % sammon iteration
|
wolffd@0
|
158
|
wolffd@0
|
159 x = P ;
|
wolffd@0
|
160 xu = zeros(noc, odim);
|
wolffd@0
|
161 xd = zeros(noc, odim);
|
wolffd@0
|
162 dq = zeros(noc, 1);
|
wolffd@0
|
163 dr = zeros(noc, 1);
|
wolffd@0
|
164
|
wolffd@0
|
165 i = 0;
|
wolffd@0
|
166 ready = 0;
|
wolffd@0
|
167 while ~ready
|
wolffd@0
|
168 for j = 1:noc,
|
wolffd@0
|
169 xd = -x + x(j*noc_x_1,:);
|
wolffd@0
|
170 xd2 = xd.^2;
|
wolffd@0
|
171 dpj = sqrt(sum(xd2'))';
|
wolffd@0
|
172 dq = Mdist(:,j) - dpj;
|
wolffd@0
|
173 dr = Mdist(:,j) .* dpj;
|
wolffd@0
|
174 ind = find(dr ~= 0);
|
wolffd@0
|
175 term = dq(ind) ./ dr(ind);
|
wolffd@0
|
176 e1 = sum(xd(ind,:) .* term(:,odim_x_1));
|
wolffd@0
|
177 term2 = ((1.0 + dq(ind) ./ dpj(ind)) ./ dpj(ind)) ./ dr(ind);
|
wolffd@0
|
178 e2 = sum(term) - sum(xd2(ind,:) .* term2(:,odim_x_1));
|
wolffd@0
|
179 xu(j,:) = x(j,:) + alpha * e1 ./ abs(e2);
|
wolffd@0
|
180 end
|
wolffd@0
|
181
|
wolffd@0
|
182 % move the center of mass to the center
|
wolffd@0
|
183
|
wolffd@0
|
184 c = sum(xu) / noc;
|
wolffd@0
|
185 x = xu - c(noc_x_1, :);
|
wolffd@0
|
186
|
wolffd@0
|
187 i = i + 1;
|
wolffd@0
|
188
|
wolffd@0
|
189 % compute mapping error
|
wolffd@0
|
190 % doing this adds about 25% to computing time
|
wolffd@0
|
191 if 0,
|
wolffd@0
|
192 e = 0; tot = 0;
|
wolffd@0
|
193 for j = 2:noc,
|
wolffd@0
|
194 d = Mdist(1:(j - 1), j);
|
wolffd@0
|
195 tot = tot + sum(d);
|
wolffd@0
|
196 ind = find(d ~= 0);
|
wolffd@0
|
197 xd = -x(1:(j - 1), :) + x(j * ones(j - 1, 1), :);
|
wolffd@0
|
198 ee = d - sqrt(sum(xd'.^2))';
|
wolffd@0
|
199 e = e + sum(ee(ind).^2 ./ d(ind));
|
wolffd@0
|
200 end
|
wolffd@0
|
201 e = e/tot;
|
wolffd@0
|
202 fprintf(2, '\r%d iterations, error %f', i, e);
|
wolffd@0
|
203 else
|
wolffd@0
|
204 fprintf(2, '\r%d iterations', i);
|
wolffd@0
|
205 end
|
wolffd@0
|
206
|
wolffd@0
|
207 % determine is the iteration ready
|
wolffd@0
|
208
|
wolffd@0
|
209 switch mode
|
wolffd@0
|
210 case 'steps',
|
wolffd@0
|
211 if i == runlen, ready = 1; end;
|
wolffd@0
|
212 case 'errlimit',
|
wolffd@0
|
213 if e < errlimit, ready = 1; end;
|
wolffd@0
|
214 case 'errchange',
|
wolffd@0
|
215 if i > 1
|
wolffd@0
|
216 change = 100 * abs(e - e_prev) / e_prev;
|
wolffd@0
|
217 if change < errchange, ready = 1; end;
|
wolffd@0
|
218 fprintf(2, ', change of error %f %% ', change);
|
wolffd@0
|
219 end
|
wolffd@0
|
220 e_prev = e;
|
wolffd@0
|
221 case 'seconds'
|
wolffd@0
|
222 if toc > endtime, ready = 1; end;
|
wolffd@0
|
223 fprintf(2, ', elapsed time %f seconds ', toc);
|
wolffd@0
|
224 end
|
wolffd@0
|
225 fprintf(2, ' ');
|
wolffd@0
|
226
|
wolffd@0
|
227 % If you want to see the Sammon's projection plotted (in 2-D and 3-D case),
|
wolffd@0
|
228 % execute the code below; it is not in use by default to speed up
|
wolffd@0
|
229 % computation.
|
wolffd@0
|
230 if 0,
|
wolffd@0
|
231 clf
|
wolffd@0
|
232 if odim == 1, plot(x(:,1), noc_x_1, 'o');
|
wolffd@0
|
233 elseif odim == 2, plot(x(:,1), x(:,2), 'o');
|
wolffd@0
|
234 else plot3(x(:,1), x(:,2), x(:,3), 'o')
|
wolffd@0
|
235 end
|
wolffd@0
|
236 drawnow
|
wolffd@0
|
237 end
|
wolffd@0
|
238 end
|
wolffd@0
|
239
|
wolffd@0
|
240 fprintf(2, '\n');
|
wolffd@0
|
241
|
wolffd@0
|
242 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
243 %% clean up
|
wolffd@0
|
244
|
wolffd@0
|
245 % reshape
|
wolffd@0
|
246 orig_si(end) = odim;
|
wolffd@0
|
247 P = reshape(x, orig_si);
|
wolffd@0
|
248
|
wolffd@0
|
249 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |