wolffd@0: function sMap = som_make(D, varargin)
wolffd@0: 
wolffd@0: %SOM_MAKE Create, initialize and train Self-Organizing Map.
wolffd@0: %
wolffd@0: % sMap = som_make(D, [[argID,] value, ...])
wolffd@0: %
wolffd@0: %  sMap = som_make(D);
wolffd@0: %  sMap = som_make(D, 'munits', 20);
wolffd@0: %  sMap = som_make(D, 'munits', 20, 'hexa', 'sheet');
wolffd@0: %  sMap = som_make(D, 'msize', [4 6 7], 'lattice', 'rect');
wolffd@0: %
wolffd@0: %  Input and output arguments ([]'s are optional): 
wolffd@0: %   D        (matrix) training data, size dlen x dim
wolffd@0: %            (struct) data struct
wolffd@0: %   [argID,  (string) See below. The values which are unambiguous can 
wolffd@0: %    value]  (varies) be given without the preceeding argID.
wolffd@0: %
wolffd@0: %   sMap     (struct) map struct
wolffd@0: %
wolffd@0: % Here are the valid argument IDs and corresponding values. The values 
wolffd@0: % which are unambiguous (marked with '*') can be given without the
wolffd@0: % preceeding argID.
wolffd@0: %   'init'       *(string) initialization: 'randinit' or 'lininit' (default)
wolffd@0: %   'algorithm'  *(string) training: 'seq' or 'batch' (default) or 'sompak'
wolffd@0: %   'munits'      (scalar) the preferred number of map units
wolffd@0: %   'msize'       (vector) map grid size
wolffd@0: %   'mapsize'    *(string) do you want a 'small', 'normal' or 'big' map
wolffd@0: %                          Any explicit settings of munits or msize override this.
wolffd@0: %   'lattice'    *(string) map lattice, 'hexa' or 'rect'
wolffd@0: %   'shape'      *(string) map shape, 'sheet', 'cyl' or 'toroid'
wolffd@0: %   'neigh'      *(string) neighborhood function, 'gaussian', 'cutgauss',
wolffd@0: %                          'ep' or 'bubble'
wolffd@0: %   'topol'      *(struct) topology struct
wolffd@0: %   'som_topol','sTopol' = 'topol'
wolffd@0: %   'mask'        (vector) BMU search mask, size dim x 1
wolffd@0: %   'name'        (string) map name
wolffd@0: %   'comp_names'  (string array / cellstr) component names, size dim x 1
wolffd@0: %   'tracking'    (scalar) how much to report, default = 1
wolffd@0: %   'training'    (string) 'short', 'default', 'long'
wolffd@0: %                 (vector) size 1 x 2, first length of rough training in epochs, 
wolffd@0: %                          and then length of finetuning in epochs
wolffd@0: %
wolffd@0: % For more help, try 'type som_make' or check out online documentation.
wolffd@0: % See also SOM_MAP_STRUCT, SOM_TOPOL_STRUCT, SOM_TRAIN_STRUCT,
wolffd@0: %          SOM_RANDINIT, SOM_LININIT, SOM_SEQTRAIN, SOM_BATCHTRAIN.          
wolffd@0: 
wolffd@0: %%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0: %
wolffd@0: % som_make
wolffd@0: %
wolffd@0: % PURPOSE
wolffd@0: %
wolffd@0: % Creates, initializes and trains a SOM using default parameters.
wolffd@0: %
wolffd@0: % SYNTAX
wolffd@0: %
wolffd@0: %  sMap = som_make(D);
wolffd@0: %  sMap = som_make(...,'argID',value,...);
wolffd@0: %  sMap = som_make(...,value,...);
wolffd@0: %
wolffd@0: % DESCRIPTION
wolffd@0: %
wolffd@0: % Creates, initializes and trains a SOM with default parameters. Uses functions
wolffd@0: % SOM_TOPOL_STRUCT, SOM_TRAIN_STRUCT, SOM_DATA_STRUCT and SOM_MAP_STRUCT to come
wolffd@0: % up with the default values.
wolffd@0: %
wolffd@0: % First, the number of map units is determined. Unless they are
wolffd@0: % explicitly defined, function SOM_TOPOL_STRUCT is used to determine this.
wolffd@0: % It uses a heuristic formula of 'munits = 5*dlen^0.54321'. The 'mapsize'
wolffd@0: % argument influences the final number of map units: a 'big' map has 
wolffd@0: % x4 the default number of map units and a 'small' map has x0.25 the
wolffd@0: % default number of map units. 
wolffd@0: %
wolffd@0: % After the number of map units has been determined, the map size is 
wolffd@0: % determined. Basically, the two biggest eigenvalues of the training
wolffd@0: % data are calculated and the ratio between sidelengths of the map grid
wolffd@0: % is set to this ratio. The actual sidelengths are then set so that 
wolffd@0: % their product is as close to the desired number of map units as
wolffd@0: % possible.
wolffd@0: %
wolffd@0: % Then the SOM is initialized. First, linear initialization along two
wolffd@0: % greatest eigenvectors is tried, but if this can't be done (the
wolffd@0: % eigenvectors cannot be calculated), random initialization is used
wolffd@0: % instead.  After initialization, the SOM is trained in two phases:
wolffd@0: % first rough training and then fine-tuning. If the 'tracking'
wolffd@0: % argument is greater than zero, the average quantization error and
wolffd@0: % topographic error of the final map are calculated.
wolffd@0: %
wolffd@0: % REQUIRED INPUT ARGUMENTS
wolffd@0: %
wolffd@0: %  D           The data to use in the training.
wolffd@0: %     (struct) A data struct. If a struct is given, '.comp_names' field as 
wolffd@0: %              well as '.comp_norm' field is copied to the map struct.
wolffd@0: %     (matrix) A data matrix, size dlen x dim. The data matrix may
wolffd@0: %              contain unknown values, indicated by NaNs. 
wolffd@0: %  
wolffd@0: % OPTIONAL INPUT ARGUMENTS 
wolffd@0: %
wolffd@0: %  argID (string) Argument identifier string (see below).
wolffd@0: %  value (varies) Value for the argument (see below).
wolffd@0: %
wolffd@0: % Here are the valid argument IDs and corresponding values. The values 
wolffd@0: % which are unambiguous (marked with '*') can be given without the
wolffd@0: % preceeding argID.
wolffd@0: %   'init'       *(string) initialization: 'randinit' or 'lininit' (default)
wolffd@0: %   'algorithm'  *(string) training: 'seq' or 'batch' (default) or 'sompak'
wolffd@0: %   'munits'      (scalar) the preferred number of map units
wolffd@0: %   'msize'       (vector) map grid size
wolffd@0: %   'mapsize'    *(string) do you want a 'small', 'normal' or 'big' map
wolffd@0: %                          Any explicit settings of munits or msize override this.
wolffd@0: %   'lattice'    *(string) map lattice, 'hexa' or 'rect'
wolffd@0: %   'shape'      *(string) map shape, 'sheet', 'cyl' or 'toroid'
wolffd@0: %   'neigh'      *(string) neighborhood function, 'gaussian', 'cutgauss',
wolffd@0: %                          'ep' or 'bubble'
wolffd@0: %   'topol'      *(struct) topology struct
wolffd@0: %   'som_topol','sTopol' = 'topol'
wolffd@0: %   'mask'        (vector) BMU search mask, size dim x 1
wolffd@0: %   'name'        (string) map name
wolffd@0: %   'comp_names'  (string array / cellstr) component names, size dim x 1
wolffd@0: %   'tracking'    (scalar) how much to report, default = 1
wolffd@0: %   'training'    (string) 'short', 'default' or 'long'
wolffd@0: %                 (vector) size 1 x 2, first length of rough training in epochs, 
wolffd@0: %                          and then length of finetuning in epochs
wolffd@0: %
wolffd@0: % OUTPUT ARGUMENTS
wolffd@0: % 
wolffd@0: %  sMap (struct) the trained map struct
wolffd@0: %
wolffd@0: % EXAMPLES
wolffd@0: %
wolffd@0: %  To simply train a map with default parameters: 
wolffd@0: %
wolffd@0: %   sMap = som_make(D); 
wolffd@0: %  
wolffd@0: %  With the optional arguments, the initialization and training can be
wolffd@0: %  influenced. To change map size, use 'msize', 'munits' or 'mapsize'
wolffd@0: %  arguments:  
wolffd@0: %
wolffd@0: %   sMap = som_make(D,'mapsize','big'); or sMap=som_make(D,'big');
wolffd@0: %   sMap = som_make(D,'munits', 100);
wolffd@0: %   sMap = som_make(D,'msize', [20 10]); 
wolffd@0: %
wolffd@0: %  Argument 'algorithm' can be used to switch between 'seq' and 'batch'
wolffd@0: %  algorithms. 'batch' is the default, so to use 'seq' algorithm: 
wolffd@0: %
wolffd@0: %   sMap = som_make(D,'algorithm','seq'); or sMap = som_make(D,'seq'); 
wolffd@0: %
wolffd@0: %  The 'tracking' argument can be used to control the amout of reporting
wolffd@0: %  during training. The argument is used in this function, and it is
wolffd@0: %  passed to the training functions. To make the function work silently
wolffd@0: %  set it to 0.
wolffd@0: %
wolffd@0: %   sMap = som_make(D,'tracking',0); 
wolffd@0: %
wolffd@0: % SEE ALSO
wolffd@0: % 
wolffd@0: %  som_map_struct   Create a map struct.
wolffd@0: %  som_topol_struct Default values for SOM topology.
wolffd@0: %  som_train_struct Default values for SOM training parameters.
wolffd@0: %  som_randinint    Random initialization algorithm.
wolffd@0: %  som_lininit      Linear initialization algorithm.
wolffd@0: %  som_seqtrain     Sequential training algorithm.
wolffd@0: %  som_batchtrain   Batch training algorithm.
wolffd@0: 
wolffd@0: % Copyright (c) 1999-2000 by the SOM toolbox programming team.
wolffd@0: % http://www.cis.hut.fi/projects/somtoolbox/
wolffd@0: 
wolffd@0: % Version 2.0beta juuso 111199
wolffd@0: 
wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0: %% check arguments
wolffd@0: 
wolffd@0: % D
wolffd@0: if isstruct(D) 
wolffd@0:   data_name = D.name; 
wolffd@0:   comp_names = D.comp_names;
wolffd@0:   comp_norm = D.comp_norm;
wolffd@0:   D = D.data;
wolffd@0: else 
wolffd@0:   data_name = inputname(1);
wolffd@0:   sDummy = som_data_struct(D(1,:)); 
wolffd@0:   comp_names = sDummy.comp_names;
wolffd@0:   comp_norm = sDummy.comp_norm;
wolffd@0: end
wolffd@0: [dlen dim] = size(D);
wolffd@0: 
wolffd@0: % defaults
wolffd@0: mapsize = '';
wolffd@0: sM = som_map_struct(dim); 
wolffd@0: sTopol = sM.topol;
wolffd@0: munits = prod(sTopol.msize); % should be zero
wolffd@0: mask = sM.mask; 
wolffd@0: name = sM.name; 
wolffd@0: neigh = sM.neigh; 
wolffd@0: tracking = 1;
wolffd@0: algorithm = 'batch'; 
wolffd@0: initalg = 'lininit';
wolffd@0: training = 'default'; 
wolffd@0: 
wolffd@0: % varargin
wolffd@0: i=1; 
wolffd@0: while i<=length(varargin), 
wolffd@0:   argok = 1; 
wolffd@0:   if ischar(varargin{i}), 
wolffd@0:     switch varargin{i}, 
wolffd@0:       % argument IDs
wolffd@0:      case 'mask',       i=i+1; mask = varargin{i}; 
wolffd@0:      case 'munits',     i=i+1; munits = varargin{i}; 
wolffd@0:      case 'msize',      i=i+1; sTopol.msize = varargin{i}; 
wolffd@0:                         munits = prod(sTopol.msize); 
wolffd@0:      case 'mapsize',    i=i+1; mapsize = varargin{i}; 
wolffd@0:      case 'name',       i=i+1; name = varargin{i};
wolffd@0:      case 'comp_names', i=i+1; comp_names = varargin{i}; 
wolffd@0:      case 'lattice',    i=i+1; sTopol.lattice = varargin{i};
wolffd@0:      case 'shape',      i=i+1; sTopol.shape = varargin{i}; 
wolffd@0:      case {'topol','som_topol','sTopol'}, 
wolffd@0:                         i=i+1; sTopol = varargin{i}; munits = prod(sTopol.msize); 
wolffd@0:      case 'neigh',      i=i+1; neigh = varargin{i};
wolffd@0:      case 'tracking',   i=i+1; tracking = varargin{i};
wolffd@0:      case 'algorithm',  i=i+1; algorithm = varargin{i}; 
wolffd@0:      case 'init',       i=i+1; initalg = varargin{i};
wolffd@0:      case 'training',   i=i+1; training = varargin{i}; 
wolffd@0:       % unambiguous values
wolffd@0:      case {'hexa','rect'}, sTopol.lattice = varargin{i};
wolffd@0:      case {'sheet','cyl','toroid'}, sTopol.shape = varargin{i}; 
wolffd@0:      case {'gaussian','cutgauss','ep','bubble'}, neigh = varargin{i};
wolffd@0:      case {'seq','batch','sompak'}, algorithm = varargin{i}; 
wolffd@0:      case {'small','normal','big'}, mapsize = varargin{i}; 
wolffd@0:      case {'randinit','lininit'}, initalg = varargin{i};
wolffd@0:      case {'short','default','long'}, training = varargin{i}; 
wolffd@0:      otherwise argok=0; 
wolffd@0:     end
wolffd@0:   elseif isstruct(varargin{i}) & isfield(varargin{i},'type'), 
wolffd@0:     switch varargin{i}(1).type, 
wolffd@0:      case 'som_topol', sTopol = varargin{i}; 
wolffd@0:      otherwise argok=0; 
wolffd@0:     end
wolffd@0:   else
wolffd@0:     argok = 0; 
wolffd@0:   end
wolffd@0:   if ~argok, 
wolffd@0:     disp(['(som_make) Ignoring invalid argument #' num2str(i+1)]); 
wolffd@0:   end
wolffd@0:   i = i+1; 
wolffd@0: end
wolffd@0: 
wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0: %% make the map struct
wolffd@0: 
wolffd@0: %% map size
wolffd@0: if isempty(sTopol.msize) | ~prod(sTopol.msize), 
wolffd@0:   if tracking>0, fprintf(1,'Determining map size...\n'); end
wolffd@0:   if ~munits,     
wolffd@0:     sTemp = som_topol_struct('dlen',dlen);
wolffd@0:     munits = prod(sTemp.msize);
wolffd@0:     switch mapsize,
wolffd@0:      case 'small', munits = max(9,ceil(munits/4));
wolffd@0:      case 'big',   munits = munits*4;
wolffd@0:      otherwise % nil
wolffd@0:     end
wolffd@0:   end
wolffd@0:   sTemp = som_topol_struct('data',D,'munits',munits);
wolffd@0:   sTopol.msize = sTemp.msize;
wolffd@0:   if tracking>0, 
wolffd@0:     fprintf(1,' map size [%d, %d]\n',sTopol.msize(1), sTopol.msize(2));   
wolffd@0:   end
wolffd@0: end
wolffd@0: 
wolffd@0: % map struct
wolffd@0: sMap = som_map_struct(dim,sTopol,neigh,'mask',mask,'name',name, ...
wolffd@0:                       'comp_names', comp_names, 'comp_norm', comp_norm); 
wolffd@0:        
wolffd@0: % function
wolffd@0: if strcmp(algorithm,'sompak'), 
wolffd@0:   algorithm = 'seq';
wolffd@0:   func = 'sompak';
wolffd@0: else
wolffd@0:   func = algorithm;
wolffd@0: end
wolffd@0: 
wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0: %% initialization
wolffd@0: 
wolffd@0: if tracking>0, fprintf(1,'Initialization...\n'); end
wolffd@0: 
wolffd@0: switch initalg, 
wolffd@0:  case 'randinit', sMap = som_randinit(D, sMap);
wolffd@0:  case 'lininit', sMap = som_lininit(D, sMap); 
wolffd@0: end
wolffd@0: sMap.trainhist(1) = som_set(sMap.trainhist(1),'data_name',data_name);
wolffd@0: 
wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0: %% training
wolffd@0: 
wolffd@0: if tracking>0, fprintf(1,'Training using %s algorithm...\n',algorithm); end
wolffd@0: 
wolffd@0: % rough train
wolffd@0: if tracking>0, fprintf(1,'Rough training phase...\n'); end
wolffd@0: sTrain = som_train_struct(sMap,'dlen',dlen,'algorithm',algorithm,'phase','rough');
wolffd@0: sTrain = som_set(sTrain,'data_name',data_name);
wolffd@0: if isnumeric(training), sTrain.trainlen = training(1); 
wolffd@0: else
wolffd@0:   switch training, 
wolffd@0:    case 'short', sTrain.trainlen = max(1,sTrain.trainlen/4);
wolffd@0:    case 'long',  sTrain.trainlen = sTrain.trainlen*4;
wolffd@0:   end
wolffd@0: end
wolffd@0: switch func,
wolffd@0:  case 'seq',    sMap = som_seqtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
wolffd@0:  case 'sompak', sMap = som_sompaktrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
wolffd@0:  case 'batch',  sMap = som_batchtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
wolffd@0: end
wolffd@0: 
wolffd@0: % finetune
wolffd@0: if tracking>0, fprintf(1,'Finetuning phase...\n'); end
wolffd@0: sTrain = som_train_struct(sMap,'dlen',dlen,'phase','finetune');
wolffd@0: sTrain = som_set(sTrain,'data_name',data_name,'algorithm',algorithm);
wolffd@0: if isnumeric(training), sTrain.trainlen = training(2); 
wolffd@0: else
wolffd@0:   switch training, 
wolffd@0:    case 'short', sTrain.trainlen = max(1,sTrain.trainlen/4);
wolffd@0:    case 'long',  sTrain.trainlen = sTrain.trainlen*4;
wolffd@0:   end
wolffd@0: end
wolffd@0: switch func,
wolffd@0:  case 'seq',    sMap = som_seqtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
wolffd@0:  case 'sompak', sMap = som_sompaktrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
wolffd@0:  case 'batch',  sMap = som_batchtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
wolffd@0: end
wolffd@0: 
wolffd@0: % quality
wolffd@0: if tracking>0, 
wolffd@0:   [mqe,tge] = som_quality(sMap,D);
wolffd@0:   fprintf(1,'Final quantization error: %5.3f\n',mqe)
wolffd@0:   fprintf(1,'Final topographic error:  %5.3f\n',tge)
wolffd@0: end  
wolffd@0: 
wolffd@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0: