view toolboxes/MIRtoolbox1.3.2/somtoolbox/som_make.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
function sMap = som_make(D, varargin)

%SOM_MAKE Create, initialize and train Self-Organizing Map.
%
% sMap = som_make(D, [[argID,] value, ...])
%
%  sMap = som_make(D);
%  sMap = som_make(D, 'munits', 20);
%  sMap = som_make(D, 'munits', 20, 'hexa', 'sheet');
%  sMap = som_make(D, 'msize', [4 6 7], 'lattice', 'rect');
%
%  Input and output arguments ([]'s are optional): 
%   D        (matrix) training data, size dlen x dim
%            (struct) data struct
%   [argID,  (string) See below. The values which are unambiguous can 
%    value]  (varies) be given without the preceeding argID.
%
%   sMap     (struct) map struct
%
% Here are the valid argument IDs and corresponding values. The values 
% which are unambiguous (marked with '*') can be given without the
% preceeding argID.
%   'init'       *(string) initialization: 'randinit' or 'lininit' (default)
%   'algorithm'  *(string) training: 'seq' or 'batch' (default) or 'sompak'
%   'munits'      (scalar) the preferred number of map units
%   'msize'       (vector) map grid size
%   'mapsize'    *(string) do you want a 'small', 'normal' or 'big' map
%                          Any explicit settings of munits or msize override this.
%   'lattice'    *(string) map lattice, 'hexa' or 'rect'
%   'shape'      *(string) map shape, 'sheet', 'cyl' or 'toroid'
%   'neigh'      *(string) neighborhood function, 'gaussian', 'cutgauss',
%                          'ep' or 'bubble'
%   'topol'      *(struct) topology struct
%   'som_topol','sTopol' = 'topol'
%   'mask'        (vector) BMU search mask, size dim x 1
%   'name'        (string) map name
%   'comp_names'  (string array / cellstr) component names, size dim x 1
%   'tracking'    (scalar) how much to report, default = 1
%   'training'    (string) 'short', 'default', 'long'
%                 (vector) size 1 x 2, first length of rough training in epochs, 
%                          and then length of finetuning in epochs
%
% For more help, try 'type som_make' or check out online documentation.
% See also SOM_MAP_STRUCT, SOM_TOPOL_STRUCT, SOM_TRAIN_STRUCT,
%          SOM_RANDINIT, SOM_LININIT, SOM_SEQTRAIN, SOM_BATCHTRAIN.          

%%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% som_make
%
% PURPOSE
%
% Creates, initializes and trains a SOM using default parameters.
%
% SYNTAX
%
%  sMap = som_make(D);
%  sMap = som_make(...,'argID',value,...);
%  sMap = som_make(...,value,...);
%
% DESCRIPTION
%
% Creates, initializes and trains a SOM with default parameters. Uses functions
% SOM_TOPOL_STRUCT, SOM_TRAIN_STRUCT, SOM_DATA_STRUCT and SOM_MAP_STRUCT to come
% up with the default values.
%
% First, the number of map units is determined. Unless they are
% explicitly defined, function SOM_TOPOL_STRUCT is used to determine this.
% It uses a heuristic formula of 'munits = 5*dlen^0.54321'. The 'mapsize'
% argument influences the final number of map units: a 'big' map has 
% x4 the default number of map units and a 'small' map has x0.25 the
% default number of map units. 
%
% After the number of map units has been determined, the map size is 
% determined. Basically, the two biggest eigenvalues of the training
% data are calculated and the ratio between sidelengths of the map grid
% is set to this ratio. The actual sidelengths are then set so that 
% their product is as close to the desired number of map units as
% possible.
%
% Then the SOM is initialized. First, linear initialization along two
% greatest eigenvectors is tried, but if this can't be done (the
% eigenvectors cannot be calculated), random initialization is used
% instead.  After initialization, the SOM is trained in two phases:
% first rough training and then fine-tuning. If the 'tracking'
% argument is greater than zero, the average quantization error and
% topographic error of the final map are calculated.
%
% REQUIRED INPUT ARGUMENTS
%
%  D           The data to use in the training.
%     (struct) A data struct. If a struct is given, '.comp_names' field as 
%              well as '.comp_norm' field is copied to the map struct.
%     (matrix) A data matrix, size dlen x dim. The data matrix may
%              contain unknown values, indicated by NaNs. 
%  
% OPTIONAL INPUT ARGUMENTS 
%
%  argID (string) Argument identifier string (see below).
%  value (varies) Value for the argument (see below).
%
% Here are the valid argument IDs and corresponding values. The values 
% which are unambiguous (marked with '*') can be given without the
% preceeding argID.
%   'init'       *(string) initialization: 'randinit' or 'lininit' (default)
%   'algorithm'  *(string) training: 'seq' or 'batch' (default) or 'sompak'
%   'munits'      (scalar) the preferred number of map units
%   'msize'       (vector) map grid size
%   'mapsize'    *(string) do you want a 'small', 'normal' or 'big' map
%                          Any explicit settings of munits or msize override this.
%   'lattice'    *(string) map lattice, 'hexa' or 'rect'
%   'shape'      *(string) map shape, 'sheet', 'cyl' or 'toroid'
%   'neigh'      *(string) neighborhood function, 'gaussian', 'cutgauss',
%                          'ep' or 'bubble'
%   'topol'      *(struct) topology struct
%   'som_topol','sTopol' = 'topol'
%   'mask'        (vector) BMU search mask, size dim x 1
%   'name'        (string) map name
%   'comp_names'  (string array / cellstr) component names, size dim x 1
%   'tracking'    (scalar) how much to report, default = 1
%   'training'    (string) 'short', 'default' or 'long'
%                 (vector) size 1 x 2, first length of rough training in epochs, 
%                          and then length of finetuning in epochs
%
% OUTPUT ARGUMENTS
% 
%  sMap (struct) the trained map struct
%
% EXAMPLES
%
%  To simply train a map with default parameters: 
%
%   sMap = som_make(D); 
%  
%  With the optional arguments, the initialization and training can be
%  influenced. To change map size, use 'msize', 'munits' or 'mapsize'
%  arguments:  
%
%   sMap = som_make(D,'mapsize','big'); or sMap=som_make(D,'big');
%   sMap = som_make(D,'munits', 100);
%   sMap = som_make(D,'msize', [20 10]); 
%
%  Argument 'algorithm' can be used to switch between 'seq' and 'batch'
%  algorithms. 'batch' is the default, so to use 'seq' algorithm: 
%
%   sMap = som_make(D,'algorithm','seq'); or sMap = som_make(D,'seq'); 
%
%  The 'tracking' argument can be used to control the amout of reporting
%  during training. The argument is used in this function, and it is
%  passed to the training functions. To make the function work silently
%  set it to 0.
%
%   sMap = som_make(D,'tracking',0); 
%
% SEE ALSO
% 
%  som_map_struct   Create a map struct.
%  som_topol_struct Default values for SOM topology.
%  som_train_struct Default values for SOM training parameters.
%  som_randinint    Random initialization algorithm.
%  som_lininit      Linear initialization algorithm.
%  som_seqtrain     Sequential training algorithm.
%  som_batchtrain   Batch training algorithm.

% Copyright (c) 1999-2000 by the SOM toolbox programming team.
% http://www.cis.hut.fi/projects/somtoolbox/

% Version 2.0beta juuso 111199

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% check arguments

% D
if isstruct(D) 
  data_name = D.name; 
  comp_names = D.comp_names;
  comp_norm = D.comp_norm;
  D = D.data;
else 
  data_name = inputname(1);
  sDummy = som_data_struct(D(1,:)); 
  comp_names = sDummy.comp_names;
  comp_norm = sDummy.comp_norm;
end
[dlen dim] = size(D);

% defaults
mapsize = '';
sM = som_map_struct(dim); 
sTopol = sM.topol;
munits = prod(sTopol.msize); % should be zero
mask = sM.mask; 
name = sM.name; 
neigh = sM.neigh; 
tracking = 1;
algorithm = 'batch'; 
initalg = 'lininit';
training = 'default'; 

% varargin
i=1; 
while i<=length(varargin), 
  argok = 1; 
  if ischar(varargin{i}), 
    switch varargin{i}, 
      % argument IDs
     case 'mask',       i=i+1; mask = varargin{i}; 
     case 'munits',     i=i+1; munits = varargin{i}; 
     case 'msize',      i=i+1; sTopol.msize = varargin{i}; 
                        munits = prod(sTopol.msize); 
     case 'mapsize',    i=i+1; mapsize = varargin{i}; 
     case 'name',       i=i+1; name = varargin{i};
     case 'comp_names', i=i+1; comp_names = varargin{i}; 
     case 'lattice',    i=i+1; sTopol.lattice = varargin{i};
     case 'shape',      i=i+1; sTopol.shape = varargin{i}; 
     case {'topol','som_topol','sTopol'}, 
                        i=i+1; sTopol = varargin{i}; munits = prod(sTopol.msize); 
     case 'neigh',      i=i+1; neigh = varargin{i};
     case 'tracking',   i=i+1; tracking = varargin{i};
     case 'algorithm',  i=i+1; algorithm = varargin{i}; 
     case 'init',       i=i+1; initalg = varargin{i};
     case 'training',   i=i+1; training = varargin{i}; 
      % unambiguous values
     case {'hexa','rect'}, sTopol.lattice = varargin{i};
     case {'sheet','cyl','toroid'}, sTopol.shape = varargin{i}; 
     case {'gaussian','cutgauss','ep','bubble'}, neigh = varargin{i};
     case {'seq','batch','sompak'}, algorithm = varargin{i}; 
     case {'small','normal','big'}, mapsize = varargin{i}; 
     case {'randinit','lininit'}, initalg = varargin{i};
     case {'short','default','long'}, training = varargin{i}; 
     otherwise argok=0; 
    end
  elseif isstruct(varargin{i}) & isfield(varargin{i},'type'), 
    switch varargin{i}(1).type, 
     case 'som_topol', sTopol = varargin{i}; 
     otherwise argok=0; 
    end
  else
    argok = 0; 
  end
  if ~argok, 
    disp(['(som_make) Ignoring invalid argument #' num2str(i+1)]); 
  end
  i = i+1; 
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% make the map struct

%% map size
if isempty(sTopol.msize) | ~prod(sTopol.msize), 
  if tracking>0, fprintf(1,'Determining map size...\n'); end
  if ~munits,     
    sTemp = som_topol_struct('dlen',dlen);
    munits = prod(sTemp.msize);
    switch mapsize,
     case 'small', munits = max(9,ceil(munits/4));
     case 'big',   munits = munits*4;
     otherwise % nil
    end
  end
  sTemp = som_topol_struct('data',D,'munits',munits);
  sTopol.msize = sTemp.msize;
  if tracking>0, 
    fprintf(1,' map size [%d, %d]\n',sTopol.msize(1), sTopol.msize(2));   
  end
end

% map struct
sMap = som_map_struct(dim,sTopol,neigh,'mask',mask,'name',name, ...
                      'comp_names', comp_names, 'comp_norm', comp_norm); 
       
% function
if strcmp(algorithm,'sompak'), 
  algorithm = 'seq';
  func = 'sompak';
else
  func = algorithm;
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% initialization

if tracking>0, fprintf(1,'Initialization...\n'); end

switch initalg, 
 case 'randinit', sMap = som_randinit(D, sMap);
 case 'lininit', sMap = som_lininit(D, sMap); 
end
sMap.trainhist(1) = som_set(sMap.trainhist(1),'data_name',data_name);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% training

if tracking>0, fprintf(1,'Training using %s algorithm...\n',algorithm); end

% rough train
if tracking>0, fprintf(1,'Rough training phase...\n'); end
sTrain = som_train_struct(sMap,'dlen',dlen,'algorithm',algorithm,'phase','rough');
sTrain = som_set(sTrain,'data_name',data_name);
if isnumeric(training), sTrain.trainlen = training(1); 
else
  switch training, 
   case 'short', sTrain.trainlen = max(1,sTrain.trainlen/4);
   case 'long',  sTrain.trainlen = sTrain.trainlen*4;
  end
end
switch func,
 case 'seq',    sMap = som_seqtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
 case 'sompak', sMap = som_sompaktrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
 case 'batch',  sMap = som_batchtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
end

% finetune
if tracking>0, fprintf(1,'Finetuning phase...\n'); end
sTrain = som_train_struct(sMap,'dlen',dlen,'phase','finetune');
sTrain = som_set(sTrain,'data_name',data_name,'algorithm',algorithm);
if isnumeric(training), sTrain.trainlen = training(2); 
else
  switch training, 
   case 'short', sTrain.trainlen = max(1,sTrain.trainlen/4);
   case 'long',  sTrain.trainlen = sTrain.trainlen*4;
  end
end
switch func,
 case 'seq',    sMap = som_seqtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
 case 'sompak', sMap = som_sompaktrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
 case 'batch',  sMap = som_batchtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
end

% quality
if tracking>0, 
  [mqe,tge] = som_quality(sMap,D);
  fprintf(1,'Final quantization error: %5.3f\n',mqe)
  fprintf(1,'Final topographic error:  %5.3f\n',tge)
end  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%