wolffd@0: function [Z,order,Md] = som_linkage(sM,varargin) wolffd@0: wolffd@0: %SOM_LINKAGE Make a hierarchical linkage of the SOM map units. wolffd@0: % wolffd@0: % [Z,order,Dist] = som_linkage(sM, [[argID,] value, ...]) wolffd@0: % wolffd@0: % Z = som_linkage(sM); wolffd@0: % Z = som_linkage(D,'complete'); wolffd@0: % Z = som_linkage(sM,'single','ignore',find(~som_hits(sM,D))); wolffd@0: % Z = som_linkage(sM,pdist(sM.codebook,'mahal')); wolffd@0: % som_dendrogram(Z); wolffd@0: % wolffd@0: % Input and output arguments ([]'s are optional): wolffd@0: % sM (struct) map or data struct to be clustered wolffd@0: % (matrix) size dlen x dim, a data set: the matrix must not wolffd@0: % contain any NaN's! wolffd@0: % [argID, (string) See below. The values which are unambiguous can wolffd@0: % value] (varies) be given without the preceeding argID. wolffd@0: % wolffd@0: % Z (matrix) size dlen-1 x 3, the linkage info wolffd@0: % Z(i,1) and Z(i,2) hold the indeces of clusters wolffd@0: % combined on level i (starting from bottom). The new wolffd@0: % cluster has index dlen+i. The initial cluster wolffd@0: % index of each unit is its linear index in the wolffd@0: % original data matrix. Z(i,3) is the distance wolffd@0: % between the combined clusters. See LINKAGE wolffd@0: % function in the Statistics Toolbox. wolffd@0: % The ignored samples are listed at the wolffd@0: % end of the Z-matrix and have Z(*,3) == Inf wolffd@0: % Dist (matrix) size dlen x dlen, pairwise distance matrix wolffd@0: % wolffd@0: % Here are the valid argument IDs and corresponding values. The values wolffd@0: % which are unambiguous (marked with '*') can be given without the wolffd@0: % preceeding argID. wolffd@0: % 'linkage' *(string) the linkage criteria to use: 'single' (the wolffd@0: % default), 'average' or 'complete' wolffd@0: % 'topol' *(struct) topology struct wolffd@0: % 'connect' *(string) 'neighbors' or 'any' (default), whether the wolffd@0: % connections should be allowed only between wolffd@0: % neighbors or between any vectors wolffd@0: % (matrix) size dlen x dlen indicating the connections wolffd@0: % between vectors wolffd@0: % (scalar) the N-neighborhood upto which the connections wolffd@0: % should be formed (implies 'neighbors') wolffd@0: % 'ignore' (vector) the units/vectors which should be ignored wolffd@0: % 'dist' (matrix) size dlen x dlen, pairwise distance matrix to wolffd@0: % be used instead of euclidian distances wolffd@0: % (vector) as the output of PDIST function wolffd@0: % (scalar) distance norm to use (euclidian = 2) wolffd@0: % 'mask' (vector) size dim x 1, the search mask used to wolffd@0: % weight distance calculation. By default wolffd@0: % sM.mask or a vector of ones is used. wolffd@0: % wolffd@0: % Note that together 'connect'='neighbors' and 'ignore' may form wolffd@0: % areas on the map which will never be connected: connections wolffd@0: % across the ignored map units simply do not exist. wolffd@0: % wolffd@0: % See also KMEANS_CLUSTERS, LINKAGE, PDIST, DENDROGRAM. wolffd@0: wolffd@0: % Copyright (c) 2000 by Juha Vesanto wolffd@0: % Contributed to SOM Toolbox on June 16th, 2000 by Juha Vesanto wolffd@0: % http://www.cis.hut.fi/projects/somtoolbox/ wolffd@0: wolffd@0: % Version 2.0beta juuso 160600 wolffd@0: wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: %% input arguments wolffd@0: wolffd@0: % the data wolffd@0: if isstruct(sM), wolffd@0: if isfield(sM,'data'), D = sM.data; sT = []; mask = []; wolffd@0: else D = sM.codebook; sT = sM.topol; mask = sM.mask; wolffd@0: end wolffd@0: else wolffd@0: D = sM; sT = []; mask = []; wolffd@0: end wolffd@0: [dlen dim] = size(D); wolffd@0: if isempty(mask), mask = ones(dim,1); end wolffd@0: if any(isnan(D(:))), error('Data matrix must not have any NaNs.'); end wolffd@0: wolffd@0: % varargin wolffd@0: Md = 2; wolffd@0: linkage = 'single'; wolffd@0: ignore_units = []; wolffd@0: constrained = 0; wolffd@0: i=1; wolffd@0: while i<=length(varargin), wolffd@0: argok = 1; wolffd@0: if ischar(varargin{i}), wolffd@0: switch varargin{i}, wolffd@0: % argument IDs wolffd@0: case {'topol','som_topol','sTopol'}, i=i+1; sT = varargin{i}; wolffd@0: case 'connect', i=i+1; wolffd@0: if ischar(varargin{i}), constrained = ~strcmp(varargin{i},'any'); wolffd@0: else constrained = varargin{i}; end wolffd@0: case 'ignore', i=i+1; ignore_units = varargin{i}; wolffd@0: case 'dist', i=i+1; Md = varargin{i}; wolffd@0: case 'linkage', i=i+1; linkage = varargin{i}; wolffd@0: case 'mask', i=i+1; mask = varargin{i}; wolffd@0: case 'tracking',i=i+1; tracking = varargin{i}; wolffd@0: % unambiguous values wolffd@0: case 'neighbors', constrained = 1; wolffd@0: case 'any', constrained = 0; wolffd@0: case {'single','average','complete'}, linkage = varargin{i}; wolffd@0: otherwise argok=0; wolffd@0: end wolffd@0: elseif isstruct(varargin{i}) & isfield(varargin{i},'type'), wolffd@0: switch varargin{i}(1).type, wolffd@0: case 'som_topol', sTopol = varargin{i}; wolffd@0: otherwise argok=0; wolffd@0: end wolffd@0: else wolffd@0: argok = 0; wolffd@0: end wolffd@0: if ~argok, wolffd@0: disp(['(som_linkage) Ignoring invalid argument #' num2str(i+1)]); wolffd@0: end wolffd@0: i = i+1; wolffd@0: end wolffd@0: wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: %% distance matrix wolffd@0: wolffd@0: % given distance matrix % jh corrected this place totally 27.3. 03 wolffd@0: if (prod(size(Md))==1), % no explicit distance matrix, set flag wolffd@0: q=2; % 17.2.03 kr added some brackets wolffd@0: else wolffd@0: if (prod(size(Md))0, wolffd@0: Ne1 = som_unit_neighs(sT); wolffd@0: Conn = som_neighborhood(Ne1,constrained); wolffd@0: Conn(~isfinite(Conn(:))) = 0; wolffd@0: else Conn = constrained; end wolffd@0: if ~isempty(Conn), for i=1:dlen, C(i,i) = 1; end, end wolffd@0: wolffd@0: % pairwise distance matrix across connected units wolffd@0: n = size(D,1); wolffd@0: if prod(size(Md))>1, wolffd@0: % remove distances between non-neighbors wolffd@0: if constrained, for i = 1:n, Md(i,find(Conn(i,:)==0)) = Inf; end, end wolffd@0: else wolffd@0: % calculate pairwise distance matrix wolffd@0: q = Md; wolffd@0: Md = zeros(n,n)+Inf; wolffd@0: if ~constrained & q==2, % fast for the default case wolffd@0: for i = 1:n-1, wolffd@0: x = D(i,:); wolffd@0: inds = [(i+1):n]; wolffd@0: Diff = D(inds,:) - x(ones(n-i,1),:); wolffd@0: Md(inds,i) = sqrt((Diff.^2)*mask); wolffd@0: Md(i,inds) = Md(inds,i)'; wolffd@0: end wolffd@0: else wolffd@0: for i = 1:n-1, wolffd@0: inds = find(Conn(i,:)==1); wolffd@0: inds = inds(find(inds>i)); wolffd@0: Diff = abs(D(inds,:) - D(i*ones(length(inds),1),:)); wolffd@0: switch q, wolffd@0: case 1, dist = Diff*mask; wolffd@0: case 2, dist = sqrt((Diff.^2)*mask); wolffd@0: case Inf, dist = max(Diff,[],2); wolffd@0: otherwise, dist = ((Diff.^q)*mask).^(1/q); wolffd@0: end wolffd@0: Md(inds,i) = dist; wolffd@0: Md(i,inds) = dist'; wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % set distances to ignored units to Inf wolffd@0: if ~isempty(ignore_units), wolffd@0: Md(ignore_units,:) = Inf; wolffd@0: Md(:,ignore_units) = Inf; wolffd@0: end wolffd@0: wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: %% construct dendrogram wolffd@0: wolffd@0: Z = zeros(n-1,3)+NaN; % merged clusters and distance for each step wolffd@0: clusters = 1:dlen; % each vector is at first in its own cluster wolffd@0: Cd = Md; % distances between clusters wolffd@0: wolffd@0: h = waitbar(0,'Constructing hierarchical clustering'); wolffd@0: wolffd@0: for i=1:n-1, wolffd@0: wolffd@0: % tracking wolffd@0: waitbar(i/(n-1),h); wolffd@0: wolffd@0: %% combine two closest clusters wolffd@0: % find the clusters which are closest to each other (c1 and c2) wolffd@0: [d,ind] = min(min(Cd)); wolffd@0: if ~isfinite(d), break; end % no more connected clusters wolffd@0: [d,c1] = min(Cd(:,ind)); % cluster1 wolffd@0: c2 = clusters(ind); % cluster2 wolffd@0: % combine the two clusters wolffd@0: c1_inds = find(clusters==c1); % vectors belonging to c1 wolffd@0: c2_inds = find(clusters==c2); % vectors belonging to c2 wolffd@0: c_inds = [c1_inds, c2_inds]; % members of the new cluster wolffd@0: % new cluster index = bigger cluster wolffd@0: if length(c2_inds)>length(c1_inds), c=c2; k=c1; else c=c1; k=c2; end wolffd@0: clusters(c_inds) = c; % update cluster info wolffd@0: Z(i,:) = [c, k, d]; % save info into Z wolffd@0: wolffd@0: %% update cluster distances wolffd@0: % remove the subclusters from the Cd table wolffd@0: Cd(c_inds,c_inds) = Inf; % distance of the cluster to its members = Inf wolffd@0: k_inds = c_inds(c_inds ~= c); % vectors of the smaller cluster wolffd@0: Cd(k_inds,:) = Inf; % distance of the subclusters to wolffd@0: Cd(:,k_inds) = Inf; % other clusters = Inf wolffd@0: % update the distance of this cluster to the other clusters wolffd@0: cl = unique(clusters(clusters ~= c)); % indeces of all other clusters wolffd@0: if ~isempty(cl), % added since v6.5 works differently than 6.1 wolffd@0: for l=cl, wolffd@0: o_inds = find(clusters==l); % indeces belonging to cluster k wolffd@0: vd = Md(c_inds,o_inds); % distances between vectors in c and k wolffd@0: vd = vd(isfinite(vd(:))); % remove infinite distances (no connection) wolffd@0: len = length(vd); wolffd@0: if ~len, % if the two clusters are not connected, their distance in Inf wolffd@0: sd = Inf; wolffd@0: else % otherwise, calculate the distance between them wolffd@0: switch linkage, wolffd@0: case 'single', sd = min(vd); wolffd@0: case 'average', sd = sum(vd)/len; wolffd@0: case 'complete', sd = max(vd); wolffd@0: otherwise, error(['Unknown set distance: ' linkage]); wolffd@0: end wolffd@0: end wolffd@0: Cd(c,l) = sd; Cd(l,c) = sd; wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: close(h); wolffd@0: wolffd@0: last = Z(i,1); wolffd@0: if isnan(last), wolffd@0: last = Z(i-1,1); wolffd@0: rest = setdiff(unique(clusters),last); wolffd@0: Z(i:n-1,1) = rest'; wolffd@0: Z(i:n-1,2) = last; wolffd@0: Z(i:n-1,3) = Inf; wolffd@0: i = i - 1; wolffd@0: else wolffd@0: rest = []; wolffd@0: end wolffd@0: wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: %% return values wolffd@0: wolffd@0: % calculate the order of samples wolffd@0: order = last; wolffd@0: % go through the combination matrix from top to down wolffd@0: for k=i:-1:1, wolffd@0: c = Z(k,1); k = Z(k,2); % what (k) change to what (c) wolffd@0: j = find(order==c); % the occurance of c in order wolffd@0: if j == length(order), order = [order k]; % put k behind c wolffd@0: else order = [order(1:j) k order(j+1:end)]; wolffd@0: end wolffd@0: end wolffd@0: order = [rest, order]; wolffd@0: wolffd@0: % to maintain compatibility with Statistics Toolbox, the values in wolffd@0: % Z must be yet transformed so that they are similar to the output wolffd@0: % of LINKAGE function wolffd@0: wolffd@0: Zs = Z; wolffd@0: current_cluster = 1:dlen; wolffd@0: for i=1:size(Z,1), wolffd@0: Zs(i,1) = current_cluster(Z(i,1)); wolffd@0: Zs(i,2) = current_cluster(Z(i,2)); wolffd@0: current_cluster(Z(i,[1 2])) = dlen+i; wolffd@0: end wolffd@0: wolffd@0: Z = Zs; wolffd@0: wolffd@0: return; wolffd@0: wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% wolffd@0: wolffd@0: wolffd@0: wolffd@0: