annotate toolboxes/MIRtoolbox1.3.2/somtoolbox/som_make.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function sMap = som_make(D, varargin)
wolffd@0 2
wolffd@0 3 %SOM_MAKE Create, initialize and train Self-Organizing Map.
wolffd@0 4 %
wolffd@0 5 % sMap = som_make(D, [[argID,] value, ...])
wolffd@0 6 %
wolffd@0 7 % sMap = som_make(D);
wolffd@0 8 % sMap = som_make(D, 'munits', 20);
wolffd@0 9 % sMap = som_make(D, 'munits', 20, 'hexa', 'sheet');
wolffd@0 10 % sMap = som_make(D, 'msize', [4 6 7], 'lattice', 'rect');
wolffd@0 11 %
wolffd@0 12 % Input and output arguments ([]'s are optional):
wolffd@0 13 % D (matrix) training data, size dlen x dim
wolffd@0 14 % (struct) data struct
wolffd@0 15 % [argID, (string) See below. The values which are unambiguous can
wolffd@0 16 % value] (varies) be given without the preceeding argID.
wolffd@0 17 %
wolffd@0 18 % sMap (struct) map struct
wolffd@0 19 %
wolffd@0 20 % Here are the valid argument IDs and corresponding values. The values
wolffd@0 21 % which are unambiguous (marked with '*') can be given without the
wolffd@0 22 % preceeding argID.
wolffd@0 23 % 'init' *(string) initialization: 'randinit' or 'lininit' (default)
wolffd@0 24 % 'algorithm' *(string) training: 'seq' or 'batch' (default) or 'sompak'
wolffd@0 25 % 'munits' (scalar) the preferred number of map units
wolffd@0 26 % 'msize' (vector) map grid size
wolffd@0 27 % 'mapsize' *(string) do you want a 'small', 'normal' or 'big' map
wolffd@0 28 % Any explicit settings of munits or msize override this.
wolffd@0 29 % 'lattice' *(string) map lattice, 'hexa' or 'rect'
wolffd@0 30 % 'shape' *(string) map shape, 'sheet', 'cyl' or 'toroid'
wolffd@0 31 % 'neigh' *(string) neighborhood function, 'gaussian', 'cutgauss',
wolffd@0 32 % 'ep' or 'bubble'
wolffd@0 33 % 'topol' *(struct) topology struct
wolffd@0 34 % 'som_topol','sTopol' = 'topol'
wolffd@0 35 % 'mask' (vector) BMU search mask, size dim x 1
wolffd@0 36 % 'name' (string) map name
wolffd@0 37 % 'comp_names' (string array / cellstr) component names, size dim x 1
wolffd@0 38 % 'tracking' (scalar) how much to report, default = 1
wolffd@0 39 % 'training' (string) 'short', 'default', 'long'
wolffd@0 40 % (vector) size 1 x 2, first length of rough training in epochs,
wolffd@0 41 % and then length of finetuning in epochs
wolffd@0 42 %
wolffd@0 43 % For more help, try 'type som_make' or check out online documentation.
wolffd@0 44 % See also SOM_MAP_STRUCT, SOM_TOPOL_STRUCT, SOM_TRAIN_STRUCT,
wolffd@0 45 % SOM_RANDINIT, SOM_LININIT, SOM_SEQTRAIN, SOM_BATCHTRAIN.
wolffd@0 46
wolffd@0 47 %%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 48 %
wolffd@0 49 % som_make
wolffd@0 50 %
wolffd@0 51 % PURPOSE
wolffd@0 52 %
wolffd@0 53 % Creates, initializes and trains a SOM using default parameters.
wolffd@0 54 %
wolffd@0 55 % SYNTAX
wolffd@0 56 %
wolffd@0 57 % sMap = som_make(D);
wolffd@0 58 % sMap = som_make(...,'argID',value,...);
wolffd@0 59 % sMap = som_make(...,value,...);
wolffd@0 60 %
wolffd@0 61 % DESCRIPTION
wolffd@0 62 %
wolffd@0 63 % Creates, initializes and trains a SOM with default parameters. Uses functions
wolffd@0 64 % SOM_TOPOL_STRUCT, SOM_TRAIN_STRUCT, SOM_DATA_STRUCT and SOM_MAP_STRUCT to come
wolffd@0 65 % up with the default values.
wolffd@0 66 %
wolffd@0 67 % First, the number of map units is determined. Unless they are
wolffd@0 68 % explicitly defined, function SOM_TOPOL_STRUCT is used to determine this.
wolffd@0 69 % It uses a heuristic formula of 'munits = 5*dlen^0.54321'. The 'mapsize'
wolffd@0 70 % argument influences the final number of map units: a 'big' map has
wolffd@0 71 % x4 the default number of map units and a 'small' map has x0.25 the
wolffd@0 72 % default number of map units.
wolffd@0 73 %
wolffd@0 74 % After the number of map units has been determined, the map size is
wolffd@0 75 % determined. Basically, the two biggest eigenvalues of the training
wolffd@0 76 % data are calculated and the ratio between sidelengths of the map grid
wolffd@0 77 % is set to this ratio. The actual sidelengths are then set so that
wolffd@0 78 % their product is as close to the desired number of map units as
wolffd@0 79 % possible.
wolffd@0 80 %
wolffd@0 81 % Then the SOM is initialized. First, linear initialization along two
wolffd@0 82 % greatest eigenvectors is tried, but if this can't be done (the
wolffd@0 83 % eigenvectors cannot be calculated), random initialization is used
wolffd@0 84 % instead. After initialization, the SOM is trained in two phases:
wolffd@0 85 % first rough training and then fine-tuning. If the 'tracking'
wolffd@0 86 % argument is greater than zero, the average quantization error and
wolffd@0 87 % topographic error of the final map are calculated.
wolffd@0 88 %
wolffd@0 89 % REQUIRED INPUT ARGUMENTS
wolffd@0 90 %
wolffd@0 91 % D The data to use in the training.
wolffd@0 92 % (struct) A data struct. If a struct is given, '.comp_names' field as
wolffd@0 93 % well as '.comp_norm' field is copied to the map struct.
wolffd@0 94 % (matrix) A data matrix, size dlen x dim. The data matrix may
wolffd@0 95 % contain unknown values, indicated by NaNs.
wolffd@0 96 %
wolffd@0 97 % OPTIONAL INPUT ARGUMENTS
wolffd@0 98 %
wolffd@0 99 % argID (string) Argument identifier string (see below).
wolffd@0 100 % value (varies) Value for the argument (see below).
wolffd@0 101 %
wolffd@0 102 % Here are the valid argument IDs and corresponding values. The values
wolffd@0 103 % which are unambiguous (marked with '*') can be given without the
wolffd@0 104 % preceeding argID.
wolffd@0 105 % 'init' *(string) initialization: 'randinit' or 'lininit' (default)
wolffd@0 106 % 'algorithm' *(string) training: 'seq' or 'batch' (default) or 'sompak'
wolffd@0 107 % 'munits' (scalar) the preferred number of map units
wolffd@0 108 % 'msize' (vector) map grid size
wolffd@0 109 % 'mapsize' *(string) do you want a 'small', 'normal' or 'big' map
wolffd@0 110 % Any explicit settings of munits or msize override this.
wolffd@0 111 % 'lattice' *(string) map lattice, 'hexa' or 'rect'
wolffd@0 112 % 'shape' *(string) map shape, 'sheet', 'cyl' or 'toroid'
wolffd@0 113 % 'neigh' *(string) neighborhood function, 'gaussian', 'cutgauss',
wolffd@0 114 % 'ep' or 'bubble'
wolffd@0 115 % 'topol' *(struct) topology struct
wolffd@0 116 % 'som_topol','sTopol' = 'topol'
wolffd@0 117 % 'mask' (vector) BMU search mask, size dim x 1
wolffd@0 118 % 'name' (string) map name
wolffd@0 119 % 'comp_names' (string array / cellstr) component names, size dim x 1
wolffd@0 120 % 'tracking' (scalar) how much to report, default = 1
wolffd@0 121 % 'training' (string) 'short', 'default' or 'long'
wolffd@0 122 % (vector) size 1 x 2, first length of rough training in epochs,
wolffd@0 123 % and then length of finetuning in epochs
wolffd@0 124 %
wolffd@0 125 % OUTPUT ARGUMENTS
wolffd@0 126 %
wolffd@0 127 % sMap (struct) the trained map struct
wolffd@0 128 %
wolffd@0 129 % EXAMPLES
wolffd@0 130 %
wolffd@0 131 % To simply train a map with default parameters:
wolffd@0 132 %
wolffd@0 133 % sMap = som_make(D);
wolffd@0 134 %
wolffd@0 135 % With the optional arguments, the initialization and training can be
wolffd@0 136 % influenced. To change map size, use 'msize', 'munits' or 'mapsize'
wolffd@0 137 % arguments:
wolffd@0 138 %
wolffd@0 139 % sMap = som_make(D,'mapsize','big'); or sMap=som_make(D,'big');
wolffd@0 140 % sMap = som_make(D,'munits', 100);
wolffd@0 141 % sMap = som_make(D,'msize', [20 10]);
wolffd@0 142 %
wolffd@0 143 % Argument 'algorithm' can be used to switch between 'seq' and 'batch'
wolffd@0 144 % algorithms. 'batch' is the default, so to use 'seq' algorithm:
wolffd@0 145 %
wolffd@0 146 % sMap = som_make(D,'algorithm','seq'); or sMap = som_make(D,'seq');
wolffd@0 147 %
wolffd@0 148 % The 'tracking' argument can be used to control the amout of reporting
wolffd@0 149 % during training. The argument is used in this function, and it is
wolffd@0 150 % passed to the training functions. To make the function work silently
wolffd@0 151 % set it to 0.
wolffd@0 152 %
wolffd@0 153 % sMap = som_make(D,'tracking',0);
wolffd@0 154 %
wolffd@0 155 % SEE ALSO
wolffd@0 156 %
wolffd@0 157 % som_map_struct Create a map struct.
wolffd@0 158 % som_topol_struct Default values for SOM topology.
wolffd@0 159 % som_train_struct Default values for SOM training parameters.
wolffd@0 160 % som_randinint Random initialization algorithm.
wolffd@0 161 % som_lininit Linear initialization algorithm.
wolffd@0 162 % som_seqtrain Sequential training algorithm.
wolffd@0 163 % som_batchtrain Batch training algorithm.
wolffd@0 164
wolffd@0 165 % Copyright (c) 1999-2000 by the SOM toolbox programming team.
wolffd@0 166 % http://www.cis.hut.fi/projects/somtoolbox/
wolffd@0 167
wolffd@0 168 % Version 2.0beta juuso 111199
wolffd@0 169
wolffd@0 170 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 171 %% check arguments
wolffd@0 172
wolffd@0 173 % D
wolffd@0 174 if isstruct(D)
wolffd@0 175 data_name = D.name;
wolffd@0 176 comp_names = D.comp_names;
wolffd@0 177 comp_norm = D.comp_norm;
wolffd@0 178 D = D.data;
wolffd@0 179 else
wolffd@0 180 data_name = inputname(1);
wolffd@0 181 sDummy = som_data_struct(D(1,:));
wolffd@0 182 comp_names = sDummy.comp_names;
wolffd@0 183 comp_norm = sDummy.comp_norm;
wolffd@0 184 end
wolffd@0 185 [dlen dim] = size(D);
wolffd@0 186
wolffd@0 187 % defaults
wolffd@0 188 mapsize = '';
wolffd@0 189 sM = som_map_struct(dim);
wolffd@0 190 sTopol = sM.topol;
wolffd@0 191 munits = prod(sTopol.msize); % should be zero
wolffd@0 192 mask = sM.mask;
wolffd@0 193 name = sM.name;
wolffd@0 194 neigh = sM.neigh;
wolffd@0 195 tracking = 1;
wolffd@0 196 algorithm = 'batch';
wolffd@0 197 initalg = 'lininit';
wolffd@0 198 training = 'default';
wolffd@0 199
wolffd@0 200 % varargin
wolffd@0 201 i=1;
wolffd@0 202 while i<=length(varargin),
wolffd@0 203 argok = 1;
wolffd@0 204 if ischar(varargin{i}),
wolffd@0 205 switch varargin{i},
wolffd@0 206 % argument IDs
wolffd@0 207 case 'mask', i=i+1; mask = varargin{i};
wolffd@0 208 case 'munits', i=i+1; munits = varargin{i};
wolffd@0 209 case 'msize', i=i+1; sTopol.msize = varargin{i};
wolffd@0 210 munits = prod(sTopol.msize);
wolffd@0 211 case 'mapsize', i=i+1; mapsize = varargin{i};
wolffd@0 212 case 'name', i=i+1; name = varargin{i};
wolffd@0 213 case 'comp_names', i=i+1; comp_names = varargin{i};
wolffd@0 214 case 'lattice', i=i+1; sTopol.lattice = varargin{i};
wolffd@0 215 case 'shape', i=i+1; sTopol.shape = varargin{i};
wolffd@0 216 case {'topol','som_topol','sTopol'},
wolffd@0 217 i=i+1; sTopol = varargin{i}; munits = prod(sTopol.msize);
wolffd@0 218 case 'neigh', i=i+1; neigh = varargin{i};
wolffd@0 219 case 'tracking', i=i+1; tracking = varargin{i};
wolffd@0 220 case 'algorithm', i=i+1; algorithm = varargin{i};
wolffd@0 221 case 'init', i=i+1; initalg = varargin{i};
wolffd@0 222 case 'training', i=i+1; training = varargin{i};
wolffd@0 223 % unambiguous values
wolffd@0 224 case {'hexa','rect'}, sTopol.lattice = varargin{i};
wolffd@0 225 case {'sheet','cyl','toroid'}, sTopol.shape = varargin{i};
wolffd@0 226 case {'gaussian','cutgauss','ep','bubble'}, neigh = varargin{i};
wolffd@0 227 case {'seq','batch','sompak'}, algorithm = varargin{i};
wolffd@0 228 case {'small','normal','big'}, mapsize = varargin{i};
wolffd@0 229 case {'randinit','lininit'}, initalg = varargin{i};
wolffd@0 230 case {'short','default','long'}, training = varargin{i};
wolffd@0 231 otherwise argok=0;
wolffd@0 232 end
wolffd@0 233 elseif isstruct(varargin{i}) & isfield(varargin{i},'type'),
wolffd@0 234 switch varargin{i}(1).type,
wolffd@0 235 case 'som_topol', sTopol = varargin{i};
wolffd@0 236 otherwise argok=0;
wolffd@0 237 end
wolffd@0 238 else
wolffd@0 239 argok = 0;
wolffd@0 240 end
wolffd@0 241 if ~argok,
wolffd@0 242 disp(['(som_make) Ignoring invalid argument #' num2str(i+1)]);
wolffd@0 243 end
wolffd@0 244 i = i+1;
wolffd@0 245 end
wolffd@0 246
wolffd@0 247 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 248 %% make the map struct
wolffd@0 249
wolffd@0 250 %% map size
wolffd@0 251 if isempty(sTopol.msize) | ~prod(sTopol.msize),
wolffd@0 252 if tracking>0, fprintf(1,'Determining map size...\n'); end
wolffd@0 253 if ~munits,
wolffd@0 254 sTemp = som_topol_struct('dlen',dlen);
wolffd@0 255 munits = prod(sTemp.msize);
wolffd@0 256 switch mapsize,
wolffd@0 257 case 'small', munits = max(9,ceil(munits/4));
wolffd@0 258 case 'big', munits = munits*4;
wolffd@0 259 otherwise % nil
wolffd@0 260 end
wolffd@0 261 end
wolffd@0 262 sTemp = som_topol_struct('data',D,'munits',munits);
wolffd@0 263 sTopol.msize = sTemp.msize;
wolffd@0 264 if tracking>0,
wolffd@0 265 fprintf(1,' map size [%d, %d]\n',sTopol.msize(1), sTopol.msize(2));
wolffd@0 266 end
wolffd@0 267 end
wolffd@0 268
wolffd@0 269 % map struct
wolffd@0 270 sMap = som_map_struct(dim,sTopol,neigh,'mask',mask,'name',name, ...
wolffd@0 271 'comp_names', comp_names, 'comp_norm', comp_norm);
wolffd@0 272
wolffd@0 273 % function
wolffd@0 274 if strcmp(algorithm,'sompak'),
wolffd@0 275 algorithm = 'seq';
wolffd@0 276 func = 'sompak';
wolffd@0 277 else
wolffd@0 278 func = algorithm;
wolffd@0 279 end
wolffd@0 280
wolffd@0 281 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 282 %% initialization
wolffd@0 283
wolffd@0 284 if tracking>0, fprintf(1,'Initialization...\n'); end
wolffd@0 285
wolffd@0 286 switch initalg,
wolffd@0 287 case 'randinit', sMap = som_randinit(D, sMap);
wolffd@0 288 case 'lininit', sMap = som_lininit(D, sMap);
wolffd@0 289 end
wolffd@0 290 sMap.trainhist(1) = som_set(sMap.trainhist(1),'data_name',data_name);
wolffd@0 291
wolffd@0 292 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 293 %% training
wolffd@0 294
wolffd@0 295 if tracking>0, fprintf(1,'Training using %s algorithm...\n',algorithm); end
wolffd@0 296
wolffd@0 297 % rough train
wolffd@0 298 if tracking>0, fprintf(1,'Rough training phase...\n'); end
wolffd@0 299 sTrain = som_train_struct(sMap,'dlen',dlen,'algorithm',algorithm,'phase','rough');
wolffd@0 300 sTrain = som_set(sTrain,'data_name',data_name);
wolffd@0 301 if isnumeric(training), sTrain.trainlen = training(1);
wolffd@0 302 else
wolffd@0 303 switch training,
wolffd@0 304 case 'short', sTrain.trainlen = max(1,sTrain.trainlen/4);
wolffd@0 305 case 'long', sTrain.trainlen = sTrain.trainlen*4;
wolffd@0 306 end
wolffd@0 307 end
wolffd@0 308 switch func,
wolffd@0 309 case 'seq', sMap = som_seqtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
wolffd@0 310 case 'sompak', sMap = som_sompaktrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
wolffd@0 311 case 'batch', sMap = som_batchtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
wolffd@0 312 end
wolffd@0 313
wolffd@0 314 % finetune
wolffd@0 315 if tracking>0, fprintf(1,'Finetuning phase...\n'); end
wolffd@0 316 sTrain = som_train_struct(sMap,'dlen',dlen,'phase','finetune');
wolffd@0 317 sTrain = som_set(sTrain,'data_name',data_name,'algorithm',algorithm);
wolffd@0 318 if isnumeric(training), sTrain.trainlen = training(2);
wolffd@0 319 else
wolffd@0 320 switch training,
wolffd@0 321 case 'short', sTrain.trainlen = max(1,sTrain.trainlen/4);
wolffd@0 322 case 'long', sTrain.trainlen = sTrain.trainlen*4;
wolffd@0 323 end
wolffd@0 324 end
wolffd@0 325 switch func,
wolffd@0 326 case 'seq', sMap = som_seqtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
wolffd@0 327 case 'sompak', sMap = som_sompaktrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
wolffd@0 328 case 'batch', sMap = som_batchtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
wolffd@0 329 end
wolffd@0 330
wolffd@0 331 % quality
wolffd@0 332 if tracking>0,
wolffd@0 333 [mqe,tge] = som_quality(sMap,D);
wolffd@0 334 fprintf(1,'Final quantization error: %5.3f\n',mqe)
wolffd@0 335 fprintf(1,'Final topographic error: %5.3f\n',tge)
wolffd@0 336 end
wolffd@0 337
wolffd@0 338 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
wolffd@0 339