wolffd@0: function P = sammon(D, P, varargin) wolffd@0: wolffd@0: %SAMMON Computes Sammon's mapping of a data set. wolffd@0: % wolffd@0: % P = sammon(D, P, [value], [mode], [alpha], [Mdist]) wolffd@0: % wolffd@0: % P = sammon(D,2); % projection to 2-dim space wolffd@0: % P = sammon(sMap,3); % projects the codebook vectors wolffd@0: % P = sammon(sMap,3,[],[],[],Md) % uses distance matrix Md wolffd@0: % som_grid(sMap,'Coord',P) % visualization of map projection wolffd@0: % wolffd@0: % Input and output arguments ([]'s are optional): wolffd@0: % D (matrix) size dlen x dim, data to be projected wolffd@0: % (struct) data or map struct wolffd@0: % P (scalar) output dimension wolffd@0: % (matrix) size dlen x odim, initial projection matrix wolffd@0: % [value] (scalar) all different modes (the next argument) require wolffd@0: % a value, default = 100 wolffd@0: % [mode] (string) 'steps' or 'errlimit' or 'errchange' or 'seconds', wolffd@0: % see below, default is 'steps' wolffd@0: % [alpha] (scalar) iteration step size, default = 0.2 wolffd@0: % [Dist] (matrix) pairwise distance matrix, size dlen x dlen. wolffd@0: % If the distances in the input space should wolffd@0: % be calculated otherwise than as euclidian wolffd@0: % distances, the distance from each vector wolffd@0: % to each other vector can be given here, wolffd@0: % size dlen x dlen. For example PDIST wolffd@0: % function can be used to calculate the wolffd@0: % distances: Dist = squareform(pdist(D,'mahal')); wolffd@0: % wolffd@0: % P (matrix) size dlen x odim, the projections wolffd@0: % wolffd@0: % The output dimension must be 2 or higher but (naturally) lower wolffd@0: % than data set dimension. wolffd@0: % wolffd@0: % The mode argument determines the end condition for iteration. If wolffd@0: % the mode argument is used, also the value argument has to be wolffd@0: % specified. Different mode possibilities are: wolffd@0: % 'steps' the iteration is terminated when it is run wolffd@0: % 'errlimit' steps, the iteration is terminated when projection error wolffd@0: % is lower than , wolffd@0: % 'errchange' the iteration is terminated when change between wolffd@0: % projection error on two successive iteration rounds wolffd@0: % is less than percent of total error, and wolffd@0: % 'seconds' the iteration is terminated after seconds wolffd@0: % of iteration. wolffd@0: % wolffd@0: % See also CCA, PCAPROJ, SOM_GRID. wolffd@0: wolffd@0: % Reference: Sammon, J.W. Jr., "A nonlinear mapping for data wolffd@0: % structure analysis", IEEE Transactions on Computers, vol. C-18, wolffd@0: % no. 5, 1969, pp. 401-409. wolffd@0: wolffd@0: % Contributed to SOM Toolbox vs2, February 2nd, 2000 by Juha Vesanto wolffd@0: % Copyright (c) by Juha Vesanto wolffd@0: % http://www.cis.hut.fi/projects/somtoolbox/ wolffd@0: wolffd@0: % juuso 040100 wolffd@0: wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: %% check arguments wolffd@0: wolffd@0: error(nargchk(2, 6, nargin)); % check no. of input arguments is correct wolffd@0: wolffd@0: % input data wolffd@0: if isstruct(D), wolffd@0: if isfield(D, 'data'), D = D.data; % data struct wolffd@0: elseif isfield(D, 'codebook'), D = D.codebook; % map struct wolffd@0: else error('Invalid structure'); wolffd@0: end wolffd@0: end wolffd@0: if any(isnan(D(:))), wolffd@0: error('Cannot make Sammon''s projection for data with unknown components') wolffd@0: end wolffd@0: wolffd@0: % compute data dimensions wolffd@0: orig_si = size(D); wolffd@0: dim = orig_si(end); wolffd@0: noc = prod(orig_si)/dim; wolffd@0: if length(orig_si)>2, D = reshape(D,[noc dim]); end wolffd@0: wolffd@0: % output dimension / initial projection matrix wolffd@0: if prod(size(P))==1, wolffd@0: odim = P; wolffd@0: P = rand(noc,odim)-0.5; wolffd@0: else wolffd@0: si = size(P); wolffd@0: odim = si(end); wolffd@0: if prod(si) ~= noc*odim, wolffd@0: error('Initial projection matrix size does not match data size'); wolffd@0: end wolffd@0: if length(si)>2, P = reshape(P,[noc odim]); end wolffd@0: inds = find(isnan(P)); wolffd@0: if length(inds), P(inds) = rand(size(inds)); end wolffd@0: end wolffd@0: if odim > dim | odim < 2, wolffd@0: error('Output dimension must be within [2, dimension of data]'); wolffd@0: end wolffd@0: wolffd@0: % determine operating mode wolffd@0: if nargin < 3 | isempty(varargin{1}) | isnan(varargin{1}), value=100; wolffd@0: else value = varargin{1}; wolffd@0: end wolffd@0: wolffd@0: if nargin < 4 | isempty(varargin{2}) | isnan(varargin{2}), mode='steps'; wolffd@0: else mode = varargin{2}; wolffd@0: end wolffd@0: switch mode, wolffd@0: case 'steps', runlen = value; wolffd@0: case 'errlimit', errlimit = value; wolffd@0: case 'errchange', errchange = value; e_prev = 0; wolffd@0: case 'seconds', endtime = value; wolffd@0: otherwise, error(['Illegal mode: ' mode]); wolffd@0: end wolffd@0: wolffd@0: % iteration step size wolffd@0: if nargin > 4, alpha = varargin{3}; else alpha = NaN; end wolffd@0: if isempty(alpha) | isnan(alpha), alpha = 0.2; end wolffd@0: wolffd@0: % mutual distances wolffd@0: if nargin > 5, Mdist = varargin{4}; else Mdist = []; end wolffd@0: wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: %% initialization wolffd@0: wolffd@0: % these are used quite frequently wolffd@0: noc_x_1 = ones(noc, 1); wolffd@0: odim_x_1 = ones(odim,1); wolffd@0: wolffd@0: % compute mutual distances between vectors wolffd@0: if isempty(Mdist) | all(isnan(Mdist(:))), wolffd@0: fprintf(2, 'computing mutual distances\r'); wolffd@0: dim_x_1 = ones(dim,1); wolffd@0: for i = 1:noc, wolffd@0: x = D(i,:); wolffd@0: Diff = D - x(noc_x_1,:); wolffd@0: N = isnan(Diff); wolffd@0: Diff(find(N)) = 0; wolffd@0: Mdist(:,i) = sqrt((Diff.^2)*dim_x_1); wolffd@0: N = find(sum(N')==dim); %mutual distance unknown wolffd@0: if ~isempty(N), Mdist(N,i) = NaN; end wolffd@0: end wolffd@0: else wolffd@0: % if the distance matrix is output from PDIST function wolffd@0: if size(Mdist,1)==1, Mdist = squareform(Mdist); end wolffd@0: if size(Mdist,1)~=noc, wolffd@0: error('Mutual distance matrix size and data set size do not match'); wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: %% action wolffd@0: wolffd@0: if strcmp(mode, 'seconds'), tic; end; wolffd@0: fprintf(2, 'iterating \r'); wolffd@0: wolffd@0: % sammon iteration wolffd@0: wolffd@0: x = P ; wolffd@0: xu = zeros(noc, odim); wolffd@0: xd = zeros(noc, odim); wolffd@0: dq = zeros(noc, 1); wolffd@0: dr = zeros(noc, 1); wolffd@0: wolffd@0: i = 0; wolffd@0: ready = 0; wolffd@0: while ~ready wolffd@0: for j = 1:noc, wolffd@0: xd = -x + x(j*noc_x_1,:); wolffd@0: xd2 = xd.^2; wolffd@0: dpj = sqrt(sum(xd2'))'; wolffd@0: dq = Mdist(:,j) - dpj; wolffd@0: dr = Mdist(:,j) .* dpj; wolffd@0: ind = find(dr ~= 0); wolffd@0: term = dq(ind) ./ dr(ind); wolffd@0: e1 = sum(xd(ind,:) .* term(:,odim_x_1)); wolffd@0: term2 = ((1.0 + dq(ind) ./ dpj(ind)) ./ dpj(ind)) ./ dr(ind); wolffd@0: e2 = sum(term) - sum(xd2(ind,:) .* term2(:,odim_x_1)); wolffd@0: xu(j,:) = x(j,:) + alpha * e1 ./ abs(e2); wolffd@0: end wolffd@0: wolffd@0: % move the center of mass to the center wolffd@0: wolffd@0: c = sum(xu) / noc; wolffd@0: x = xu - c(noc_x_1, :); wolffd@0: wolffd@0: i = i + 1; wolffd@0: wolffd@0: % compute mapping error wolffd@0: % doing this adds about 25% to computing time wolffd@0: if 0, wolffd@0: e = 0; tot = 0; wolffd@0: for j = 2:noc, wolffd@0: d = Mdist(1:(j - 1), j); wolffd@0: tot = tot + sum(d); wolffd@0: ind = find(d ~= 0); wolffd@0: xd = -x(1:(j - 1), :) + x(j * ones(j - 1, 1), :); wolffd@0: ee = d - sqrt(sum(xd'.^2))'; wolffd@0: e = e + sum(ee(ind).^2 ./ d(ind)); wolffd@0: end wolffd@0: e = e/tot; wolffd@0: fprintf(2, '\r%d iterations, error %f', i, e); wolffd@0: else wolffd@0: fprintf(2, '\r%d iterations', i); wolffd@0: end wolffd@0: wolffd@0: % determine is the iteration ready wolffd@0: wolffd@0: switch mode wolffd@0: case 'steps', wolffd@0: if i == runlen, ready = 1; end; wolffd@0: case 'errlimit', wolffd@0: if e < errlimit, ready = 1; end; wolffd@0: case 'errchange', wolffd@0: if i > 1 wolffd@0: change = 100 * abs(e - e_prev) / e_prev; wolffd@0: if change < errchange, ready = 1; end; wolffd@0: fprintf(2, ', change of error %f %% ', change); wolffd@0: end wolffd@0: e_prev = e; wolffd@0: case 'seconds' wolffd@0: if toc > endtime, ready = 1; end; wolffd@0: fprintf(2, ', elapsed time %f seconds ', toc); wolffd@0: end wolffd@0: fprintf(2, ' '); wolffd@0: wolffd@0: % If you want to see the Sammon's projection plotted (in 2-D and 3-D case), wolffd@0: % execute the code below; it is not in use by default to speed up wolffd@0: % computation. wolffd@0: if 0, wolffd@0: clf wolffd@0: if odim == 1, plot(x(:,1), noc_x_1, 'o'); wolffd@0: elseif odim == 2, plot(x(:,1), x(:,2), 'o'); wolffd@0: else plot3(x(:,1), x(:,2), x(:,3), 'o') wolffd@0: end wolffd@0: drawnow wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: fprintf(2, '\n'); wolffd@0: wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: %% clean up wolffd@0: wolffd@0: % reshape wolffd@0: orig_si(end) = odim; wolffd@0: P = reshape(x, orig_si); wolffd@0: wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%