wolffd@0: function [codes,clusters,err] = som_kmeans(method, D, k, epochs, verbose)
wolffd@0: 
wolffd@0: % SOM_KMEANS K-means algorithm.
wolffd@0: %
wolffd@0: % [codes,clusters,err] = som_kmeans(method, D, k, [epochs], [verbose])
wolffd@0: %
wolffd@0: %  Input and output arguments ([]'s are optional):  
wolffd@0: %    method     (string) k-means algorithm type: 'batch' or 'seq'
wolffd@0: %    D          (matrix) data matrix
wolffd@0: %               (struct) data or map struct
wolffd@0: %    k          (scalar) number of centroids
wolffd@0: %    [epochs]   (scalar) number of training epochs
wolffd@0: %    [verbose]  (scalar) if <> 0 display additonal information
wolffd@0: %
wolffd@0: %    codes      (matrix) codebook vectors
wolffd@0: %    clusters   (vector) cluster number for each sample
wolffd@0: %    err        (scalar) total quantization error for the data set
wolffd@0: %
wolffd@0: % See also KMEANS_CLUSTERS, SOM_MAKE, SOM_BATCHTRAIN, SOM_SEQTRAIN.
wolffd@0: 
wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0: % Function has been renamed by Kimmo Raivio, because matlab65 also have 
wolffd@0: % kmeans function 1.10.02
wolffd@0: %% input arguments
wolffd@0: 
wolffd@0: if isstruct(D), 
wolffd@0:     switch D.type, 
wolffd@0:     case 'som_map', data = D.codebook; 
wolffd@0:     case 'som_data', data = D.data; 
wolffd@0:     end 
wolffd@0: else 
wolffd@0:     data = D; 
wolffd@0: end
wolffd@0: [l dim]   = size(data);
wolffd@0: 
wolffd@0: if nargin < 4 | isempty(epochs) | isnan(epochs), epochs = 100; end
wolffd@0: if nargin < 5, verbose = 0; end
wolffd@0: 
wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0: %% action
wolffd@0: 
wolffd@0: rand('state', sum(100*clock)); % init rand generator
wolffd@0: 
wolffd@0: lr = 0.5;                      % learning rate for sequential k-means
wolffd@0: temp      = randperm(l);
wolffd@0: centroids = data(temp(1:k),:);
wolffd@0: res       = zeros(k,l);
wolffd@0: clusters  = zeros(1, l);
wolffd@0: 
wolffd@0: if dim==1, 
wolffd@0:     [codes,clusters,err] = scalar_kmeans(data,k,epochs); 
wolffd@0:     return; 
wolffd@0: end
wolffd@0: 
wolffd@0: switch method
wolffd@0:  case 'seq',
wolffd@0:   len = epochs * l;
wolffd@0:   l_rate = linspace(lr,0,len);
wolffd@0:   order  = randperm(l);
wolffd@0:   for iter = 1:len
wolffd@0:     x  = D(order(rem(iter,l)+1),:);                   
wolffd@0:     dx = x(ones(k,1),:) - centroids; 
wolffd@0:     [dist nearest] = min(sum(dx.^2,2)); 
wolffd@0:     centroids(nearest,:) = centroids(nearest,:) + l_rate(iter)*dx(nearest,:);
wolffd@0:   end
wolffd@0:   [dummy clusters] = min(((ones(k, 1) * sum((data.^2)', 1))' + ...
wolffd@0: 			 ones(l, 1) * sum((centroids.^2)',1) - ...
wolffd@0: 			 2.*(data*(centroids')))');
wolffd@0: 
wolffd@0:  case 'batch',
wolffd@0:   iter      = 0;
wolffd@0:   old_clusters = zeros(k, 1);
wolffd@0:   while iter<epochs
wolffd@0:     
wolffd@0:     [dummy clusters] = min(((ones(k, 1) * sum((data.^2)', 1))' + ...
wolffd@0: 			   ones(l, 1) * sum((centroids.^2)',1) - ...
wolffd@0: 			   2.*(data*(centroids')))');
wolffd@0: 
wolffd@0:     for i = 1:k
wolffd@0:       f = find(clusters==i);
wolffd@0:       s = length(f);
wolffd@0:       if s, centroids(i,:) = sum(data(f,:)) / s; end
wolffd@0:     end
wolffd@0: 
wolffd@0:     if iter
wolffd@0:       if sum(old_clusters==clusters)==0
wolffd@0: 	if verbose, fprintf(1, 'Convergence in %d iterations\n', iter); end
wolffd@0: 	break; 
wolffd@0:       end
wolffd@0:     end
wolffd@0: 
wolffd@0:     old_clusters = clusters;
wolffd@0:     iter = iter + 1;
wolffd@0:   end
wolffd@0:   
wolffd@0:   [dummy clusters] = min(((ones(k, 1) * sum((data.^2)', 1))' + ...
wolffd@0: 			  ones(l, 1) * sum((centroids.^2)',1) - ...
wolffd@0: 			  2.*(data*(centroids')))');
wolffd@0:  otherwise,
wolffd@0:   fprintf(2, 'Unknown method\n');
wolffd@0: end
wolffd@0: 
wolffd@0: err = 0;
wolffd@0: for i = 1:k
wolffd@0:   f = find(clusters==i);
wolffd@0:   s = length(f);
wolffd@0:   if s, err = err + sum(sum((data(f,:)-ones(s,1)*centroids(i,:)).^2,2)); end
wolffd@0: end
wolffd@0: 
wolffd@0: codes = centroids;
wolffd@0: return; 
wolffd@0: 
wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0: 
wolffd@0: function [y,bm,qe] = scalar_kmeans(x,k,maxepochs)
wolffd@0: 
wolffd@0:     nans = ~isfinite(x);
wolffd@0:     x(nans) = []; 
wolffd@0:     n = length(x); 
wolffd@0:     mi = min(x); ma = max(x)
wolffd@0:     y = linspace(mi,ma,k)'; 
wolffd@0:     bm = ones(n,1); 
wolffd@0:     bmold = zeros(n,1); 
wolffd@0:     i = 0; 
wolffd@0:     while ~all(bm==bmold) & i<maxepochs, 
wolffd@0:         bmold  = bm;  
wolffd@0:         [c bm] = histc(x,[-Inf; (y(2:end)+y(1:end-1))/2; Inf]);
wolffd@0:         y      = full(sum(sparse(bm,1:n,x,k,n),2));
wolffd@0:         zh     = (c(1:end-1)==0);
wolffd@0:         y(~zh) = y(~zh)./c(~zh);
wolffd@0:         inds   = find(zh)';
wolffd@0:         for j=inds, if j==1, y(j) = mi; else y(j) = y(j-1) + eps; end, end         
wolffd@0:         i=i+1;
wolffd@0:     end
wolffd@0:     if i==maxepochs, [c bm] = histc(x,[-Inf; (y(2:end)+y(1:end-1))/2; Inf]); end
wolffd@0:     if nargout>2, qe = sum(abs(x-y(bm)))/n; end
wolffd@0:     if any(nans),
wolffd@0:         notnan = find(~nans); n = length(nans);
wolffd@0:         y  = full(sparse(notnan,1,y ,n,1)); y(nans)  = NaN;  
wolffd@0:         bm = full(sparse(notnan,1,bm,n,1)); bm(nans) = NaN;
wolffd@0:         if nargout>2, qe = full(sparse(notnan,1,qe,n,1)); qe(nans) = NaN; end
wolffd@0:     end 
wolffd@0:        
wolffd@0:     return; 
wolffd@0: