camir-aes2014: toolboxes/MIRtoolbox1.3.2/somtoolbox/som

comparison toolboxes/MIRtoolbox1.3.2/somtoolbox/som_linkage.m @ 0:e9a9cd732c1e tip

first hg version after svn

author	wolffd
date	Tue, 10 Feb 2015 15:05:51 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:e9a9cd732c1e
+function [Z,order,Md] = som_linkage(sM,varargin)
+%SOM_LINKAGE Make a hierarchical linkage of the SOM map units.
+%
+% [Z,order,Dist] = som_linkage(sM, [[argID,] value, ...])
+%
+%  Z = som_linkage(sM);
+%  Z = som_linkage(D,'complete');
+%  Z = som_linkage(sM,'single','ignore',find(~som_hits(sM,D)));
+%  Z = som_linkage(sM,pdist(sM.codebook,'mahal'));
+%  som_dendrogram(Z);
+%
+%  Input and output arguments ([]'s are optional):
+%   sM       (struct) map or data struct to be clustered
+%            (matrix) size dlen x dim, a data set: the matrix must not
+%                     contain any NaN's!
+%   [argID,  (string) See below. The values which are unambiguous can
+%    value]  (varies) be given without the preceeding argID.
+%
+%   Z        (matrix) size dlen-1 x 3, the linkage info
+%                     Z(i,1) and Z(i,2) hold the indeces of clusters
+%                     combined on level i (starting from bottom). The new
+%                     cluster has index dlen+i. The initial cluster
+%                     index of each unit is its linear index in the
+%                     original data matrix. Z(i,3) is the distance
+%                     between the combined clusters. See LINKAGE
+%                     function in the Statistics Toolbox.
+%                     The ignored samples are listed at the
+%                     end of the Z-matrix and have Z(*,3) == Inf
+%   Dist     (matrix) size dlen x dlen, pairwise distance matrix
+%
+% Here are the valid argument IDs and corresponding values. The values
+% which are unambiguous (marked with '*') can be given without the
+% preceeding argID.
+%   'linkage' *(string) the linkage criteria to use: 'single' (the
+%                       default), 'average' or 'complete'
+%   'topol'   *(struct) topology struct
+%   'connect' *(string) 'neighbors' or 'any' (default), whether the
+%                       connections should be allowed only between
+%                       neighbors or between any vectors
+%              (matrix) size dlen x dlen indicating the connections
+%                       between vectors
+%              (scalar) the N-neighborhood upto which the connections
+%                       should be formed (implies 'neighbors')
+%   'ignore'   (vector) the units/vectors which should be ignored
+%   'dist'     (matrix) size dlen x dlen, pairwise distance matrix to
+%                       be used instead of euclidian distances
+%              (vector) as the output of PDIST function
+%              (scalar) distance norm to use (euclidian = 2)
+%   'mask'     (vector) size dim x 1, the search mask used to
+%                       weight distance calculation. By default
+%                       sM.mask or a vector of ones is used.
+%
+% Note that together 'connect'='neighbors' and 'ignore' may form
+% areas on the map which will never be connected: connections
+% across the ignored map units simply do not exist.
+%
+% See also KMEANS_CLUSTERS, LINKAGE, PDIST, DENDROGRAM.
+% Copyright (c) 2000 by Juha Vesanto
+% Contributed to SOM Toolbox on June 16th, 2000 by Juha Vesanto
+% http://www.cis.hut.fi/projects/somtoolbox/
+% Version 2.0beta juuso 160600
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% input arguments
+% the data
+if isstruct(sM),
+if isfield(sM,'data'), D = sM.data; sT = []; mask = [];
+else D = sM.codebook; sT = sM.topol; mask = sM.mask;
+end
+else
+D = sM; sT = []; mask = [];
+end
+[dlen dim] = size(D);
+if isempty(mask), mask = ones(dim,1); end
+if any(isnan(D(:))), error('Data matrix must not have any NaNs.'); end
+% varargin
+Md = 2;
+linkage = 'single';
+ignore_units = [];
+constrained = 0;
+i=1;
+while i<=length(varargin),
+argok = 1;
+if ischar(varargin{i}),
+switch varargin{i},
+% argument IDs
+case {'topol','som_topol','sTopol'}, i=i+1; sT = varargin{i};
+case 'connect', i=i+1;
+if ischar(varargin{i}), constrained = ~strcmp(varargin{i},'any');
+else constrained = varargin{i}; end
+case 'ignore',  i=i+1; ignore_units = varargin{i};
+case 'dist',    i=i+1; Md = varargin{i};
+case 'linkage', i=i+1; linkage = varargin{i};
+case 'mask',    i=i+1; mask = varargin{i};
+case 'tracking',i=i+1; tracking = varargin{i};
+% unambiguous values
+case 'neighbors', constrained = 1;
+case 'any',       constrained = 0;
+case {'single','average','complete'}, linkage = varargin{i};
+otherwise argok=0;
+end
+elseif isstruct(varargin{i}) & isfield(varargin{i},'type'),
+switch varargin{i}(1).type,
+case 'som_topol', sTopol = varargin{i};
+otherwise argok=0;
+end
+else
+argok = 0;
+end
+if ~argok,
+disp(['(som_linkage) Ignoring invalid argument #' num2str(i+1)]);
+end
+i = i+1;
+end
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% distance matrix
+% given distance matrix % jh corrected this place totally 27.3. 03
+if (prod(size(Md))==1), % no explicit distance matrix, set flag
+q=2; % 17.2.03 kr added some brackets
+else
+if (prod(size(Md))<dlen^2), % check pdist form
+Md = squareform(Md);   % transform to ordinary square diastance matrix
+end
+% jh: 27.3. 03 "calculate pairwise dist. matrix" see approx. 20 lines below
+% sets self-distance to Inf! This must be set here also,
+% otherwise clustering fails for user defined dist. matrix!
+Md(eye(dlen)==1)=Inf;
+end
+% neighborhood constraint
+if length(constrained)==1 & constrained>0,
+Ne1 = som_unit_neighs(sT);
+Conn = som_neighborhood(Ne1,constrained);
+Conn(~isfinite(Conn(:))) = 0;
+else Conn = constrained; end
+if ~isempty(Conn), for i=1:dlen, C(i,i) = 1; end, end
+% pairwise distance matrix across connected units
+n = size(D,1);
+if prod(size(Md))>1,
+% remove distances between non-neighbors
+if constrained, for i = 1:n, Md(i,find(Conn(i,:)==0)) = Inf; end, end
+else
+% calculate pairwise distance matrix
+q = Md;
+Md = zeros(n,n)+Inf;
+if ~constrained & q==2, % fast for the default case
+for i = 1:n-1,
+x = D(i,:);
+inds = [(i+1):n];
+Diff = D(inds,:) - x(ones(n-i,1),:);
+Md(inds,i) = sqrt((Diff.^2)*mask);
+Md(i,inds) = Md(inds,i)';
+end
+else
+for i = 1:n-1,
+inds = find(Conn(i,:)==1);
+inds = inds(find(inds>i));
+Diff = abs(D(inds,:) - D(i*ones(length(inds),1),:));
+switch q,
+case 1,    dist = Diff*mask;
+case 2,    dist = sqrt((Diff.^2)*mask);
+case Inf,  dist = max(Diff,[],2);
+otherwise, dist = ((Diff.^q)*mask).^(1/q);
+end
+Md(inds,i) = dist;
+Md(i,inds) = dist';
+end
+end
+end
+% set distances to ignored units to Inf
+if ~isempty(ignore_units),
+Md(ignore_units,:) = Inf;
+Md(:,ignore_units) = Inf;
+end
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% construct dendrogram
+Z = zeros(n-1,3)+NaN;  % merged clusters and distance for each step
+clusters = 1:dlen;     % each vector is at first in its own cluster
+Cd = Md;               % distances between clusters
+h = waitbar(0,'Constructing hierarchical clustering');
+for i=1:n-1,
+% tracking
+waitbar(i/(n-1),h);
+%% combine two closest clusters
+% find the clusters which are closest to each other (c1 and c2)
+[d,ind] = min(min(Cd));
+if ~isfinite(d), break; end   % no more connected clusters
+[d,c1] = min(Cd(:,ind));      % cluster1
+c2 = clusters(ind);           % cluster2
+% combine the two clusters
+c1_inds = find(clusters==c1); % vectors belonging to c1
+c2_inds = find(clusters==c2); % vectors belonging to c2
+c_inds = [c1_inds, c2_inds];  % members of the new cluster
+% new cluster index = bigger cluster
+if length(c2_inds)>length(c1_inds), c=c2; k=c1; else c=c1; k=c2; end
+clusters(c_inds) = c;         % update cluster info
+Z(i,:) = [c, k, d];           % save info into Z
+%% update cluster distances
+% remove the subclusters from the Cd table
+Cd(c_inds,c_inds) = Inf;      % distance of the cluster to its members = Inf
+k_inds = c_inds(c_inds ~= c); % vectors of the smaller cluster
+Cd(k_inds,:) = Inf;           % distance of the subclusters to
+Cd(:,k_inds) = Inf;           % other clusters = Inf
+% update the distance of this cluster to the other clusters
+cl = unique(clusters(clusters ~= c)); % indeces of all other clusters
+if ~isempty(cl), % added since v6.5 works differently than 6.1
+for l=cl,
+o_inds = find(clusters==l); % indeces belonging to cluster k
+vd = Md(c_inds,o_inds);     % distances between vectors in c and k
+vd = vd(isfinite(vd(:)));   % remove infinite distances (no connection)
+len = length(vd);
+if ~len, % if the two clusters are not connected, their distance in Inf
+	sd = Inf;
+else   % otherwise, calculate the distance between them
+	switch linkage,
+	 case 'single',   sd = min(vd);
+	 case 'average',  sd = sum(vd)/len;
+	 case 'complete', sd = max(vd);
+	 otherwise, error(['Unknown set distance: ' linkage]);
+	end
+end
+Cd(c,l) = sd; Cd(l,c) = sd;
+end
+end
+end
+close(h);
+last = Z(i,1);
+if isnan(last),
+last = Z(i-1,1);
+rest = setdiff(unique(clusters),last);
+Z(i:n-1,1) = rest';
+Z(i:n-1,2) = last;
+Z(i:n-1,3) = Inf;
+i = i - 1;
+else
+rest = [];
+end
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% return values
+% calculate the order of samples
+order = last;
+% go through the combination matrix from top to down
+for k=i:-1:1,
+c = Z(k,1); k = Z(k,2);                     % what (k) change to what (c)
+j = find(order==c);                         % the occurance of c in order
+if j == length(order), order = [order k];   % put k behind c
+else order = [order(1:j) k order(j+1:end)];
+end
+end
+order = [rest, order];
+% to maintain compatibility with Statistics Toolbox, the values in
+% Z must be yet transformed so that they are similar to the output
+% of LINKAGE function
+Zs = Z;
+current_cluster = 1:dlen;
+for i=1:size(Z,1),
+Zs(i,1) = current_cluster(Z(i,1));
+Zs(i,2) = current_cluster(Z(i,2));
+current_cluster(Z(i,[1 2])) = dlen+i;
+end
+Z = Zs;
+return;
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Mercurial > hg > camir-aes2014

comparison toolboxes/MIRtoolbox1.3.2/somtoolbox/som_linkage.m @ 0:e9a9cd732c1e tip