annotate toolboxes/MIRtoolbox1.3.2/somtoolbox/sammon.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function P = sammon(D, P, varargin)
wolffd@0 2
wolffd@0 3 %SAMMON Computes Sammon's mapping of a data set.
wolffd@0 4 %
wolffd@0 5 % P = sammon(D, P, [value], [mode], [alpha], [Mdist])
wolffd@0 6 %
wolffd@0 7 % P = sammon(D,2); % projection to 2-dim space
wolffd@0 8 % P = sammon(sMap,3); % projects the codebook vectors
wolffd@0 9 % P = sammon(sMap,3,[],[],[],Md) % uses distance matrix Md
wolffd@0 10 % som_grid(sMap,'Coord',P) % visualization of map projection
wolffd@0 11 %
wolffd@0 12 % Input and output arguments ([]'s are optional):
wolffd@0 13 % D (matrix) size dlen x dim, data to be projected
wolffd@0 14 % (struct) data or map struct
wolffd@0 15 % P (scalar) output dimension
wolffd@0 16 % (matrix) size dlen x odim, initial projection matrix
wolffd@0 17 % [value] (scalar) all different modes (the next argument) require
wolffd@0 18 % a value, default = 100
wolffd@0 19 % [mode] (string) 'steps' or 'errlimit' or 'errchange' or 'seconds',
wolffd@0 20 % see below, default is 'steps'
wolffd@0 21 % [alpha] (scalar) iteration step size, default = 0.2
wolffd@0 22 % [Dist] (matrix) pairwise distance matrix, size dlen x dlen.
wolffd@0 23 % If the distances in the input space should
wolffd@0 24 % be calculated otherwise than as euclidian
wolffd@0 25 % distances, the distance from each vector
wolffd@0 26 % to each other vector can be given here,
wolffd@0 27 % size dlen x dlen. For example PDIST
wolffd@0 28 % function can be used to calculate the
wolffd@0 29 % distances: Dist = squareform(pdist(D,'mahal'));
wolffd@0 30 %
wolffd@0 31 % P (matrix) size dlen x odim, the projections
wolffd@0 32 %
wolffd@0 33 % The output dimension must be 2 or higher but (naturally) lower
wolffd@0 34 % than data set dimension.
wolffd@0 35 %
wolffd@0 36 % The mode argument determines the end condition for iteration. If
wolffd@0 37 % the mode argument is used, also the value argument has to be
wolffd@0 38 % specified. Different mode possibilities are:
wolffd@0 39 % 'steps' the iteration is terminated when it is run <value>
wolffd@0 40 % 'errlimit' steps, the iteration is terminated when projection error
wolffd@0 41 % is lower than <value>,
wolffd@0 42 % 'errchange' the iteration is terminated when change between
wolffd@0 43 % projection error on two successive iteration rounds
wolffd@0 44 % is less than <value> percent of total error, and
wolffd@0 45 % 'seconds' the iteration is terminated after <value> seconds
wolffd@0 46 % of iteration.
wolffd@0 47 %
wolffd@0 48 % See also CCA, PCAPROJ, SOM_GRID.
wolffd@0 49
wolffd@0 50 % Reference: Sammon, J.W. Jr., "A nonlinear mapping for data
wolffd@0 51 % structure analysis", IEEE Transactions on Computers, vol. C-18,
wolffd@0 52 % no. 5, 1969, pp. 401-409.
wolffd@0 53
wolffd@0 54 % Contributed to SOM Toolbox vs2, February 2nd, 2000 by Juha Vesanto
wolffd@0 55 % Copyright (c) by Juha Vesanto
wolffd@0 56 % http://www.cis.hut.fi/projects/somtoolbox/
wolffd@0 57
wolffd@0 58 % juuso 040100
wolffd@0 59
wolffd@0 60 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 61 %% check arguments
wolffd@0 62
wolffd@0 63 error(nargchk(2, 6, nargin)); % check no. of input arguments is correct
wolffd@0 64
wolffd@0 65 % input data
wolffd@0 66 if isstruct(D),
wolffd@0 67 if isfield(D, 'data'), D = D.data; % data struct
wolffd@0 68 elseif isfield(D, 'codebook'), D = D.codebook; % map struct
wolffd@0 69 else error('Invalid structure');
wolffd@0 70 end
wolffd@0 71 end
wolffd@0 72 if any(isnan(D(:))),
wolffd@0 73 error('Cannot make Sammon''s projection for data with unknown components')
wolffd@0 74 end
wolffd@0 75
wolffd@0 76 % compute data dimensions
wolffd@0 77 orig_si = size(D);
wolffd@0 78 dim = orig_si(end);
wolffd@0 79 noc = prod(orig_si)/dim;
wolffd@0 80 if length(orig_si)>2, D = reshape(D,[noc dim]); end
wolffd@0 81
wolffd@0 82 % output dimension / initial projection matrix
wolffd@0 83 if prod(size(P))==1,
wolffd@0 84 odim = P;
wolffd@0 85 P = rand(noc,odim)-0.5;
wolffd@0 86 else
wolffd@0 87 si = size(P);
wolffd@0 88 odim = si(end);
wolffd@0 89 if prod(si) ~= noc*odim,
wolffd@0 90 error('Initial projection matrix size does not match data size');
wolffd@0 91 end
wolffd@0 92 if length(si)>2, P = reshape(P,[noc odim]); end
wolffd@0 93 inds = find(isnan(P));
wolffd@0 94 if length(inds), P(inds) = rand(size(inds)); end
wolffd@0 95 end
wolffd@0 96 if odim > dim | odim < 2,
wolffd@0 97 error('Output dimension must be within [2, dimension of data]');
wolffd@0 98 end
wolffd@0 99
wolffd@0 100 % determine operating mode
wolffd@0 101 if nargin < 3 | isempty(varargin{1}) | isnan(varargin{1}), value=100;
wolffd@0 102 else value = varargin{1};
wolffd@0 103 end
wolffd@0 104
wolffd@0 105 if nargin < 4 | isempty(varargin{2}) | isnan(varargin{2}), mode='steps';
wolffd@0 106 else mode = varargin{2};
wolffd@0 107 end
wolffd@0 108 switch mode,
wolffd@0 109 case 'steps', runlen = value;
wolffd@0 110 case 'errlimit', errlimit = value;
wolffd@0 111 case 'errchange', errchange = value; e_prev = 0;
wolffd@0 112 case 'seconds', endtime = value;
wolffd@0 113 otherwise, error(['Illegal mode: ' mode]);
wolffd@0 114 end
wolffd@0 115
wolffd@0 116 % iteration step size
wolffd@0 117 if nargin > 4, alpha = varargin{3}; else alpha = NaN; end
wolffd@0 118 if isempty(alpha) | isnan(alpha), alpha = 0.2; end
wolffd@0 119
wolffd@0 120 % mutual distances
wolffd@0 121 if nargin > 5, Mdist = varargin{4}; else Mdist = []; end
wolffd@0 122
wolffd@0 123 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 124 %% initialization
wolffd@0 125
wolffd@0 126 % these are used quite frequently
wolffd@0 127 noc_x_1 = ones(noc, 1);
wolffd@0 128 odim_x_1 = ones(odim,1);
wolffd@0 129
wolffd@0 130 % compute mutual distances between vectors
wolffd@0 131 if isempty(Mdist) | all(isnan(Mdist(:))),
wolffd@0 132 fprintf(2, 'computing mutual distances\r');
wolffd@0 133 dim_x_1 = ones(dim,1);
wolffd@0 134 for i = 1:noc,
wolffd@0 135 x = D(i,:);
wolffd@0 136 Diff = D - x(noc_x_1,:);
wolffd@0 137 N = isnan(Diff);
wolffd@0 138 Diff(find(N)) = 0;
wolffd@0 139 Mdist(:,i) = sqrt((Diff.^2)*dim_x_1);
wolffd@0 140 N = find(sum(N')==dim); %mutual distance unknown
wolffd@0 141 if ~isempty(N), Mdist(N,i) = NaN; end
wolffd@0 142 end
wolffd@0 143 else
wolffd@0 144 % if the distance matrix is output from PDIST function
wolffd@0 145 if size(Mdist,1)==1, Mdist = squareform(Mdist); end
wolffd@0 146 if size(Mdist,1)~=noc,
wolffd@0 147 error('Mutual distance matrix size and data set size do not match');
wolffd@0 148 end
wolffd@0 149 end
wolffd@0 150
wolffd@0 151 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 152 %% action
wolffd@0 153
wolffd@0 154 if strcmp(mode, 'seconds'), tic; end;
wolffd@0 155 fprintf(2, 'iterating \r');
wolffd@0 156
wolffd@0 157 % sammon iteration
wolffd@0 158
wolffd@0 159 x = P ;
wolffd@0 160 xu = zeros(noc, odim);
wolffd@0 161 xd = zeros(noc, odim);
wolffd@0 162 dq = zeros(noc, 1);
wolffd@0 163 dr = zeros(noc, 1);
wolffd@0 164
wolffd@0 165 i = 0;
wolffd@0 166 ready = 0;
wolffd@0 167 while ~ready
wolffd@0 168 for j = 1:noc,
wolffd@0 169 xd = -x + x(j*noc_x_1,:);
wolffd@0 170 xd2 = xd.^2;
wolffd@0 171 dpj = sqrt(sum(xd2'))';
wolffd@0 172 dq = Mdist(:,j) - dpj;
wolffd@0 173 dr = Mdist(:,j) .* dpj;
wolffd@0 174 ind = find(dr ~= 0);
wolffd@0 175 term = dq(ind) ./ dr(ind);
wolffd@0 176 e1 = sum(xd(ind,:) .* term(:,odim_x_1));
wolffd@0 177 term2 = ((1.0 + dq(ind) ./ dpj(ind)) ./ dpj(ind)) ./ dr(ind);
wolffd@0 178 e2 = sum(term) - sum(xd2(ind,:) .* term2(:,odim_x_1));
wolffd@0 179 xu(j,:) = x(j,:) + alpha * e1 ./ abs(e2);
wolffd@0 180 end
wolffd@0 181
wolffd@0 182 % move the center of mass to the center
wolffd@0 183
wolffd@0 184 c = sum(xu) / noc;
wolffd@0 185 x = xu - c(noc_x_1, :);
wolffd@0 186
wolffd@0 187 i = i + 1;
wolffd@0 188
wolffd@0 189 % compute mapping error
wolffd@0 190 % doing this adds about 25% to computing time
wolffd@0 191 if 0,
wolffd@0 192 e = 0; tot = 0;
wolffd@0 193 for j = 2:noc,
wolffd@0 194 d = Mdist(1:(j - 1), j);
wolffd@0 195 tot = tot + sum(d);
wolffd@0 196 ind = find(d ~= 0);
wolffd@0 197 xd = -x(1:(j - 1), :) + x(j * ones(j - 1, 1), :);
wolffd@0 198 ee = d - sqrt(sum(xd'.^2))';
wolffd@0 199 e = e + sum(ee(ind).^2 ./ d(ind));
wolffd@0 200 end
wolffd@0 201 e = e/tot;
wolffd@0 202 fprintf(2, '\r%d iterations, error %f', i, e);
wolffd@0 203 else
wolffd@0 204 fprintf(2, '\r%d iterations', i);
wolffd@0 205 end
wolffd@0 206
wolffd@0 207 % determine is the iteration ready
wolffd@0 208
wolffd@0 209 switch mode
wolffd@0 210 case 'steps',
wolffd@0 211 if i == runlen, ready = 1; end;
wolffd@0 212 case 'errlimit',
wolffd@0 213 if e < errlimit, ready = 1; end;
wolffd@0 214 case 'errchange',
wolffd@0 215 if i > 1
wolffd@0 216 change = 100 * abs(e - e_prev) / e_prev;
wolffd@0 217 if change < errchange, ready = 1; end;
wolffd@0 218 fprintf(2, ', change of error %f %% ', change);
wolffd@0 219 end
wolffd@0 220 e_prev = e;
wolffd@0 221 case 'seconds'
wolffd@0 222 if toc > endtime, ready = 1; end;
wolffd@0 223 fprintf(2, ', elapsed time %f seconds ', toc);
wolffd@0 224 end
wolffd@0 225 fprintf(2, ' ');
wolffd@0 226
wolffd@0 227 % If you want to see the Sammon's projection plotted (in 2-D and 3-D case),
wolffd@0 228 % execute the code below; it is not in use by default to speed up
wolffd@0 229 % computation.
wolffd@0 230 if 0,
wolffd@0 231 clf
wolffd@0 232 if odim == 1, plot(x(:,1), noc_x_1, 'o');
wolffd@0 233 elseif odim == 2, plot(x(:,1), x(:,2), 'o');
wolffd@0 234 else plot3(x(:,1), x(:,2), x(:,3), 'o')
wolffd@0 235 end
wolffd@0 236 drawnow
wolffd@0 237 end
wolffd@0 238 end
wolffd@0 239
wolffd@0 240 fprintf(2, '\n');
wolffd@0 241
wolffd@0 242 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 243 %% clean up
wolffd@0 244
wolffd@0 245 % reshape
wolffd@0 246 orig_si(end) = odim;
wolffd@0 247 P = reshape(x, orig_si);
wolffd@0 248
wolffd@0 249 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%