Mercurial > hg > camir-aes2014
diff toolboxes/MIRtoolbox1.3.2/somtoolbox/sammon.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolboxes/MIRtoolbox1.3.2/somtoolbox/sammon.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,249 @@ +function P = sammon(D, P, varargin) + +%SAMMON Computes Sammon's mapping of a data set. +% +% P = sammon(D, P, [value], [mode], [alpha], [Mdist]) +% +% P = sammon(D,2); % projection to 2-dim space +% P = sammon(sMap,3); % projects the codebook vectors +% P = sammon(sMap,3,[],[],[],Md) % uses distance matrix Md +% som_grid(sMap,'Coord',P) % visualization of map projection +% +% Input and output arguments ([]'s are optional): +% D (matrix) size dlen x dim, data to be projected +% (struct) data or map struct +% P (scalar) output dimension +% (matrix) size dlen x odim, initial projection matrix +% [value] (scalar) all different modes (the next argument) require +% a value, default = 100 +% [mode] (string) 'steps' or 'errlimit' or 'errchange' or 'seconds', +% see below, default is 'steps' +% [alpha] (scalar) iteration step size, default = 0.2 +% [Dist] (matrix) pairwise distance matrix, size dlen x dlen. +% If the distances in the input space should +% be calculated otherwise than as euclidian +% distances, the distance from each vector +% to each other vector can be given here, +% size dlen x dlen. For example PDIST +% function can be used to calculate the +% distances: Dist = squareform(pdist(D,'mahal')); +% +% P (matrix) size dlen x odim, the projections +% +% The output dimension must be 2 or higher but (naturally) lower +% than data set dimension. +% +% The mode argument determines the end condition for iteration. If +% the mode argument is used, also the value argument has to be +% specified. Different mode possibilities are: +% 'steps' the iteration is terminated when it is run <value> +% 'errlimit' steps, the iteration is terminated when projection error +% is lower than <value>, +% 'errchange' the iteration is terminated when change between +% projection error on two successive iteration rounds +% is less than <value> percent of total error, and +% 'seconds' the iteration is terminated after <value> seconds +% of iteration. +% +% See also CCA, PCAPROJ, SOM_GRID. + +% Reference: Sammon, J.W. Jr., "A nonlinear mapping for data +% structure analysis", IEEE Transactions on Computers, vol. C-18, +% no. 5, 1969, pp. 401-409. + +% Contributed to SOM Toolbox vs2, February 2nd, 2000 by Juha Vesanto +% Copyright (c) by Juha Vesanto +% http://www.cis.hut.fi/projects/somtoolbox/ + +% juuso 040100 + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% check arguments + +error(nargchk(2, 6, nargin)); % check no. of input arguments is correct + +% input data +if isstruct(D), + if isfield(D, 'data'), D = D.data; % data struct + elseif isfield(D, 'codebook'), D = D.codebook; % map struct + else error('Invalid structure'); + end +end +if any(isnan(D(:))), + error('Cannot make Sammon''s projection for data with unknown components') +end + +% compute data dimensions +orig_si = size(D); +dim = orig_si(end); +noc = prod(orig_si)/dim; +if length(orig_si)>2, D = reshape(D,[noc dim]); end + +% output dimension / initial projection matrix +if prod(size(P))==1, + odim = P; + P = rand(noc,odim)-0.5; +else + si = size(P); + odim = si(end); + if prod(si) ~= noc*odim, + error('Initial projection matrix size does not match data size'); + end + if length(si)>2, P = reshape(P,[noc odim]); end + inds = find(isnan(P)); + if length(inds), P(inds) = rand(size(inds)); end +end +if odim > dim | odim < 2, + error('Output dimension must be within [2, dimension of data]'); +end + +% determine operating mode +if nargin < 3 | isempty(varargin{1}) | isnan(varargin{1}), value=100; +else value = varargin{1}; +end + +if nargin < 4 | isempty(varargin{2}) | isnan(varargin{2}), mode='steps'; +else mode = varargin{2}; +end +switch mode, +case 'steps', runlen = value; +case 'errlimit', errlimit = value; +case 'errchange', errchange = value; e_prev = 0; +case 'seconds', endtime = value; +otherwise, error(['Illegal mode: ' mode]); +end + +% iteration step size +if nargin > 4, alpha = varargin{3}; else alpha = NaN; end +if isempty(alpha) | isnan(alpha), alpha = 0.2; end + +% mutual distances +if nargin > 5, Mdist = varargin{4}; else Mdist = []; end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% initialization + +% these are used quite frequently +noc_x_1 = ones(noc, 1); +odim_x_1 = ones(odim,1); + +% compute mutual distances between vectors +if isempty(Mdist) | all(isnan(Mdist(:))), + fprintf(2, 'computing mutual distances\r'); + dim_x_1 = ones(dim,1); + for i = 1:noc, + x = D(i,:); + Diff = D - x(noc_x_1,:); + N = isnan(Diff); + Diff(find(N)) = 0; + Mdist(:,i) = sqrt((Diff.^2)*dim_x_1); + N = find(sum(N')==dim); %mutual distance unknown + if ~isempty(N), Mdist(N,i) = NaN; end + end +else + % if the distance matrix is output from PDIST function + if size(Mdist,1)==1, Mdist = squareform(Mdist); end + if size(Mdist,1)~=noc, + error('Mutual distance matrix size and data set size do not match'); + end +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% action + +if strcmp(mode, 'seconds'), tic; end; +fprintf(2, 'iterating \r'); + +% sammon iteration + +x = P ; +xu = zeros(noc, odim); +xd = zeros(noc, odim); +dq = zeros(noc, 1); +dr = zeros(noc, 1); + +i = 0; +ready = 0; +while ~ready + for j = 1:noc, + xd = -x + x(j*noc_x_1,:); + xd2 = xd.^2; + dpj = sqrt(sum(xd2'))'; + dq = Mdist(:,j) - dpj; + dr = Mdist(:,j) .* dpj; + ind = find(dr ~= 0); + term = dq(ind) ./ dr(ind); + e1 = sum(xd(ind,:) .* term(:,odim_x_1)); + term2 = ((1.0 + dq(ind) ./ dpj(ind)) ./ dpj(ind)) ./ dr(ind); + e2 = sum(term) - sum(xd2(ind,:) .* term2(:,odim_x_1)); + xu(j,:) = x(j,:) + alpha * e1 ./ abs(e2); + end + + % move the center of mass to the center + + c = sum(xu) / noc; + x = xu - c(noc_x_1, :); + + i = i + 1; + + % compute mapping error + % doing this adds about 25% to computing time + if 0, + e = 0; tot = 0; + for j = 2:noc, + d = Mdist(1:(j - 1), j); + tot = tot + sum(d); + ind = find(d ~= 0); + xd = -x(1:(j - 1), :) + x(j * ones(j - 1, 1), :); + ee = d - sqrt(sum(xd'.^2))'; + e = e + sum(ee(ind).^2 ./ d(ind)); + end + e = e/tot; + fprintf(2, '\r%d iterations, error %f', i, e); + else + fprintf(2, '\r%d iterations', i); + end + + % determine is the iteration ready + + switch mode + case 'steps', + if i == runlen, ready = 1; end; + case 'errlimit', + if e < errlimit, ready = 1; end; + case 'errchange', + if i > 1 + change = 100 * abs(e - e_prev) / e_prev; + if change < errchange, ready = 1; end; + fprintf(2, ', change of error %f %% ', change); + end + e_prev = e; + case 'seconds' + if toc > endtime, ready = 1; end; + fprintf(2, ', elapsed time %f seconds ', toc); + end + fprintf(2, ' '); + + % If you want to see the Sammon's projection plotted (in 2-D and 3-D case), + % execute the code below; it is not in use by default to speed up + % computation. + if 0, + clf + if odim == 1, plot(x(:,1), noc_x_1, 'o'); + elseif odim == 2, plot(x(:,1), x(:,2), 'o'); + else plot3(x(:,1), x(:,2), x(:,3), 'o') + end + drawnow + end +end + +fprintf(2, '\n'); + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% clean up + +% reshape +orig_si(end) = odim; +P = reshape(x, orig_si); + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \ No newline at end of file