Mercurial > hg > camir-aes2014
diff toolboxes/MIRtoolbox1.3.2/somtoolbox/som_make.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolboxes/MIRtoolbox1.3.2/somtoolbox/som_make.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,339 @@ +function sMap = som_make(D, varargin) + +%SOM_MAKE Create, initialize and train Self-Organizing Map. +% +% sMap = som_make(D, [[argID,] value, ...]) +% +% sMap = som_make(D); +% sMap = som_make(D, 'munits', 20); +% sMap = som_make(D, 'munits', 20, 'hexa', 'sheet'); +% sMap = som_make(D, 'msize', [4 6 7], 'lattice', 'rect'); +% +% Input and output arguments ([]'s are optional): +% D (matrix) training data, size dlen x dim +% (struct) data struct +% [argID, (string) See below. The values which are unambiguous can +% value] (varies) be given without the preceeding argID. +% +% sMap (struct) map struct +% +% Here are the valid argument IDs and corresponding values. The values +% which are unambiguous (marked with '*') can be given without the +% preceeding argID. +% 'init' *(string) initialization: 'randinit' or 'lininit' (default) +% 'algorithm' *(string) training: 'seq' or 'batch' (default) or 'sompak' +% 'munits' (scalar) the preferred number of map units +% 'msize' (vector) map grid size +% 'mapsize' *(string) do you want a 'small', 'normal' or 'big' map +% Any explicit settings of munits or msize override this. +% 'lattice' *(string) map lattice, 'hexa' or 'rect' +% 'shape' *(string) map shape, 'sheet', 'cyl' or 'toroid' +% 'neigh' *(string) neighborhood function, 'gaussian', 'cutgauss', +% 'ep' or 'bubble' +% 'topol' *(struct) topology struct +% 'som_topol','sTopol' = 'topol' +% 'mask' (vector) BMU search mask, size dim x 1 +% 'name' (string) map name +% 'comp_names' (string array / cellstr) component names, size dim x 1 +% 'tracking' (scalar) how much to report, default = 1 +% 'training' (string) 'short', 'default', 'long' +% (vector) size 1 x 2, first length of rough training in epochs, +% and then length of finetuning in epochs +% +% For more help, try 'type som_make' or check out online documentation. +% See also SOM_MAP_STRUCT, SOM_TOPOL_STRUCT, SOM_TRAIN_STRUCT, +% SOM_RANDINIT, SOM_LININIT, SOM_SEQTRAIN, SOM_BATCHTRAIN. + +%%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% +% som_make +% +% PURPOSE +% +% Creates, initializes and trains a SOM using default parameters. +% +% SYNTAX +% +% sMap = som_make(D); +% sMap = som_make(...,'argID',value,...); +% sMap = som_make(...,value,...); +% +% DESCRIPTION +% +% Creates, initializes and trains a SOM with default parameters. Uses functions +% SOM_TOPOL_STRUCT, SOM_TRAIN_STRUCT, SOM_DATA_STRUCT and SOM_MAP_STRUCT to come +% up with the default values. +% +% First, the number of map units is determined. Unless they are +% explicitly defined, function SOM_TOPOL_STRUCT is used to determine this. +% It uses a heuristic formula of 'munits = 5*dlen^0.54321'. The 'mapsize' +% argument influences the final number of map units: a 'big' map has +% x4 the default number of map units and a 'small' map has x0.25 the +% default number of map units. +% +% After the number of map units has been determined, the map size is +% determined. Basically, the two biggest eigenvalues of the training +% data are calculated and the ratio between sidelengths of the map grid +% is set to this ratio. The actual sidelengths are then set so that +% their product is as close to the desired number of map units as +% possible. +% +% Then the SOM is initialized. First, linear initialization along two +% greatest eigenvectors is tried, but if this can't be done (the +% eigenvectors cannot be calculated), random initialization is used +% instead. After initialization, the SOM is trained in two phases: +% first rough training and then fine-tuning. If the 'tracking' +% argument is greater than zero, the average quantization error and +% topographic error of the final map are calculated. +% +% REQUIRED INPUT ARGUMENTS +% +% D The data to use in the training. +% (struct) A data struct. If a struct is given, '.comp_names' field as +% well as '.comp_norm' field is copied to the map struct. +% (matrix) A data matrix, size dlen x dim. The data matrix may +% contain unknown values, indicated by NaNs. +% +% OPTIONAL INPUT ARGUMENTS +% +% argID (string) Argument identifier string (see below). +% value (varies) Value for the argument (see below). +% +% Here are the valid argument IDs and corresponding values. The values +% which are unambiguous (marked with '*') can be given without the +% preceeding argID. +% 'init' *(string) initialization: 'randinit' or 'lininit' (default) +% 'algorithm' *(string) training: 'seq' or 'batch' (default) or 'sompak' +% 'munits' (scalar) the preferred number of map units +% 'msize' (vector) map grid size +% 'mapsize' *(string) do you want a 'small', 'normal' or 'big' map +% Any explicit settings of munits or msize override this. +% 'lattice' *(string) map lattice, 'hexa' or 'rect' +% 'shape' *(string) map shape, 'sheet', 'cyl' or 'toroid' +% 'neigh' *(string) neighborhood function, 'gaussian', 'cutgauss', +% 'ep' or 'bubble' +% 'topol' *(struct) topology struct +% 'som_topol','sTopol' = 'topol' +% 'mask' (vector) BMU search mask, size dim x 1 +% 'name' (string) map name +% 'comp_names' (string array / cellstr) component names, size dim x 1 +% 'tracking' (scalar) how much to report, default = 1 +% 'training' (string) 'short', 'default' or 'long' +% (vector) size 1 x 2, first length of rough training in epochs, +% and then length of finetuning in epochs +% +% OUTPUT ARGUMENTS +% +% sMap (struct) the trained map struct +% +% EXAMPLES +% +% To simply train a map with default parameters: +% +% sMap = som_make(D); +% +% With the optional arguments, the initialization and training can be +% influenced. To change map size, use 'msize', 'munits' or 'mapsize' +% arguments: +% +% sMap = som_make(D,'mapsize','big'); or sMap=som_make(D,'big'); +% sMap = som_make(D,'munits', 100); +% sMap = som_make(D,'msize', [20 10]); +% +% Argument 'algorithm' can be used to switch between 'seq' and 'batch' +% algorithms. 'batch' is the default, so to use 'seq' algorithm: +% +% sMap = som_make(D,'algorithm','seq'); or sMap = som_make(D,'seq'); +% +% The 'tracking' argument can be used to control the amout of reporting +% during training. The argument is used in this function, and it is +% passed to the training functions. To make the function work silently +% set it to 0. +% +% sMap = som_make(D,'tracking',0); +% +% SEE ALSO +% +% som_map_struct Create a map struct. +% som_topol_struct Default values for SOM topology. +% som_train_struct Default values for SOM training parameters. +% som_randinint Random initialization algorithm. +% som_lininit Linear initialization algorithm. +% som_seqtrain Sequential training algorithm. +% som_batchtrain Batch training algorithm. + +% Copyright (c) 1999-2000 by the SOM toolbox programming team. +% http://www.cis.hut.fi/projects/somtoolbox/ + +% Version 2.0beta juuso 111199 + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% check arguments + +% D +if isstruct(D) + data_name = D.name; + comp_names = D.comp_names; + comp_norm = D.comp_norm; + D = D.data; +else + data_name = inputname(1); + sDummy = som_data_struct(D(1,:)); + comp_names = sDummy.comp_names; + comp_norm = sDummy.comp_norm; +end +[dlen dim] = size(D); + +% defaults +mapsize = ''; +sM = som_map_struct(dim); +sTopol = sM.topol; +munits = prod(sTopol.msize); % should be zero +mask = sM.mask; +name = sM.name; +neigh = sM.neigh; +tracking = 1; +algorithm = 'batch'; +initalg = 'lininit'; +training = 'default'; + +% varargin +i=1; +while i<=length(varargin), + argok = 1; + if ischar(varargin{i}), + switch varargin{i}, + % argument IDs + case 'mask', i=i+1; mask = varargin{i}; + case 'munits', i=i+1; munits = varargin{i}; + case 'msize', i=i+1; sTopol.msize = varargin{i}; + munits = prod(sTopol.msize); + case 'mapsize', i=i+1; mapsize = varargin{i}; + case 'name', i=i+1; name = varargin{i}; + case 'comp_names', i=i+1; comp_names = varargin{i}; + case 'lattice', i=i+1; sTopol.lattice = varargin{i}; + case 'shape', i=i+1; sTopol.shape = varargin{i}; + case {'topol','som_topol','sTopol'}, + i=i+1; sTopol = varargin{i}; munits = prod(sTopol.msize); + case 'neigh', i=i+1; neigh = varargin{i}; + case 'tracking', i=i+1; tracking = varargin{i}; + case 'algorithm', i=i+1; algorithm = varargin{i}; + case 'init', i=i+1; initalg = varargin{i}; + case 'training', i=i+1; training = varargin{i}; + % unambiguous values + case {'hexa','rect'}, sTopol.lattice = varargin{i}; + case {'sheet','cyl','toroid'}, sTopol.shape = varargin{i}; + case {'gaussian','cutgauss','ep','bubble'}, neigh = varargin{i}; + case {'seq','batch','sompak'}, algorithm = varargin{i}; + case {'small','normal','big'}, mapsize = varargin{i}; + case {'randinit','lininit'}, initalg = varargin{i}; + case {'short','default','long'}, training = varargin{i}; + otherwise argok=0; + end + elseif isstruct(varargin{i}) & isfield(varargin{i},'type'), + switch varargin{i}(1).type, + case 'som_topol', sTopol = varargin{i}; + otherwise argok=0; + end + else + argok = 0; + end + if ~argok, + disp(['(som_make) Ignoring invalid argument #' num2str(i+1)]); + end + i = i+1; +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% make the map struct + +%% map size +if isempty(sTopol.msize) | ~prod(sTopol.msize), + if tracking>0, fprintf(1,'Determining map size...\n'); end + if ~munits, + sTemp = som_topol_struct('dlen',dlen); + munits = prod(sTemp.msize); + switch mapsize, + case 'small', munits = max(9,ceil(munits/4)); + case 'big', munits = munits*4; + otherwise % nil + end + end + sTemp = som_topol_struct('data',D,'munits',munits); + sTopol.msize = sTemp.msize; + if tracking>0, + fprintf(1,' map size [%d, %d]\n',sTopol.msize(1), sTopol.msize(2)); + end +end + +% map struct +sMap = som_map_struct(dim,sTopol,neigh,'mask',mask,'name',name, ... + 'comp_names', comp_names, 'comp_norm', comp_norm); + +% function +if strcmp(algorithm,'sompak'), + algorithm = 'seq'; + func = 'sompak'; +else + func = algorithm; +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% initialization + +if tracking>0, fprintf(1,'Initialization...\n'); end + +switch initalg, + case 'randinit', sMap = som_randinit(D, sMap); + case 'lininit', sMap = som_lininit(D, sMap); +end +sMap.trainhist(1) = som_set(sMap.trainhist(1),'data_name',data_name); + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% training + +if tracking>0, fprintf(1,'Training using %s algorithm...\n',algorithm); end + +% rough train +if tracking>0, fprintf(1,'Rough training phase...\n'); end +sTrain = som_train_struct(sMap,'dlen',dlen,'algorithm',algorithm,'phase','rough'); +sTrain = som_set(sTrain,'data_name',data_name); +if isnumeric(training), sTrain.trainlen = training(1); +else + switch training, + case 'short', sTrain.trainlen = max(1,sTrain.trainlen/4); + case 'long', sTrain.trainlen = sTrain.trainlen*4; + end +end +switch func, + case 'seq', sMap = som_seqtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask); + case 'sompak', sMap = som_sompaktrain(sMap,D,sTrain,'tracking',tracking,'mask',mask); + case 'batch', sMap = som_batchtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask); +end + +% finetune +if tracking>0, fprintf(1,'Finetuning phase...\n'); end +sTrain = som_train_struct(sMap,'dlen',dlen,'phase','finetune'); +sTrain = som_set(sTrain,'data_name',data_name,'algorithm',algorithm); +if isnumeric(training), sTrain.trainlen = training(2); +else + switch training, + case 'short', sTrain.trainlen = max(1,sTrain.trainlen/4); + case 'long', sTrain.trainlen = sTrain.trainlen*4; + end +end +switch func, + case 'seq', sMap = som_seqtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask); + case 'sompak', sMap = som_sompaktrain(sMap,D,sTrain,'tracking',tracking,'mask',mask); + case 'batch', sMap = som_batchtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask); +end + +% quality +if tracking>0, + [mqe,tge] = som_quality(sMap,D); + fprintf(1,'Final quantization error: %5.3f\n',mqe) + fprintf(1,'Final topographic error: %5.3f\n',tge) +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +