comparison toolboxes/MIRtoolbox1.3.2/somtoolbox/som_make.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 function sMap = som_make(D, varargin)
2
3 %SOM_MAKE Create, initialize and train Self-Organizing Map.
4 %
5 % sMap = som_make(D, [[argID,] value, ...])
6 %
7 % sMap = som_make(D);
8 % sMap = som_make(D, 'munits', 20);
9 % sMap = som_make(D, 'munits', 20, 'hexa', 'sheet');
10 % sMap = som_make(D, 'msize', [4 6 7], 'lattice', 'rect');
11 %
12 % Input and output arguments ([]'s are optional):
13 % D (matrix) training data, size dlen x dim
14 % (struct) data struct
15 % [argID, (string) See below. The values which are unambiguous can
16 % value] (varies) be given without the preceeding argID.
17 %
18 % sMap (struct) map struct
19 %
20 % Here are the valid argument IDs and corresponding values. The values
21 % which are unambiguous (marked with '*') can be given without the
22 % preceeding argID.
23 % 'init' *(string) initialization: 'randinit' or 'lininit' (default)
24 % 'algorithm' *(string) training: 'seq' or 'batch' (default) or 'sompak'
25 % 'munits' (scalar) the preferred number of map units
26 % 'msize' (vector) map grid size
27 % 'mapsize' *(string) do you want a 'small', 'normal' or 'big' map
28 % Any explicit settings of munits or msize override this.
29 % 'lattice' *(string) map lattice, 'hexa' or 'rect'
30 % 'shape' *(string) map shape, 'sheet', 'cyl' or 'toroid'
31 % 'neigh' *(string) neighborhood function, 'gaussian', 'cutgauss',
32 % 'ep' or 'bubble'
33 % 'topol' *(struct) topology struct
34 % 'som_topol','sTopol' = 'topol'
35 % 'mask' (vector) BMU search mask, size dim x 1
36 % 'name' (string) map name
37 % 'comp_names' (string array / cellstr) component names, size dim x 1
38 % 'tracking' (scalar) how much to report, default = 1
39 % 'training' (string) 'short', 'default', 'long'
40 % (vector) size 1 x 2, first length of rough training in epochs,
41 % and then length of finetuning in epochs
42 %
43 % For more help, try 'type som_make' or check out online documentation.
44 % See also SOM_MAP_STRUCT, SOM_TOPOL_STRUCT, SOM_TRAIN_STRUCT,
45 % SOM_RANDINIT, SOM_LININIT, SOM_SEQTRAIN, SOM_BATCHTRAIN.
46
47 %%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
48 %
49 % som_make
50 %
51 % PURPOSE
52 %
53 % Creates, initializes and trains a SOM using default parameters.
54 %
55 % SYNTAX
56 %
57 % sMap = som_make(D);
58 % sMap = som_make(...,'argID',value,...);
59 % sMap = som_make(...,value,...);
60 %
61 % DESCRIPTION
62 %
63 % Creates, initializes and trains a SOM with default parameters. Uses functions
64 % SOM_TOPOL_STRUCT, SOM_TRAIN_STRUCT, SOM_DATA_STRUCT and SOM_MAP_STRUCT to come
65 % up with the default values.
66 %
67 % First, the number of map units is determined. Unless they are
68 % explicitly defined, function SOM_TOPOL_STRUCT is used to determine this.
69 % It uses a heuristic formula of 'munits = 5*dlen^0.54321'. The 'mapsize'
70 % argument influences the final number of map units: a 'big' map has
71 % x4 the default number of map units and a 'small' map has x0.25 the
72 % default number of map units.
73 %
74 % After the number of map units has been determined, the map size is
75 % determined. Basically, the two biggest eigenvalues of the training
76 % data are calculated and the ratio between sidelengths of the map grid
77 % is set to this ratio. The actual sidelengths are then set so that
78 % their product is as close to the desired number of map units as
79 % possible.
80 %
81 % Then the SOM is initialized. First, linear initialization along two
82 % greatest eigenvectors is tried, but if this can't be done (the
83 % eigenvectors cannot be calculated), random initialization is used
84 % instead. After initialization, the SOM is trained in two phases:
85 % first rough training and then fine-tuning. If the 'tracking'
86 % argument is greater than zero, the average quantization error and
87 % topographic error of the final map are calculated.
88 %
89 % REQUIRED INPUT ARGUMENTS
90 %
91 % D The data to use in the training.
92 % (struct) A data struct. If a struct is given, '.comp_names' field as
93 % well as '.comp_norm' field is copied to the map struct.
94 % (matrix) A data matrix, size dlen x dim. The data matrix may
95 % contain unknown values, indicated by NaNs.
96 %
97 % OPTIONAL INPUT ARGUMENTS
98 %
99 % argID (string) Argument identifier string (see below).
100 % value (varies) Value for the argument (see below).
101 %
102 % Here are the valid argument IDs and corresponding values. The values
103 % which are unambiguous (marked with '*') can be given without the
104 % preceeding argID.
105 % 'init' *(string) initialization: 'randinit' or 'lininit' (default)
106 % 'algorithm' *(string) training: 'seq' or 'batch' (default) or 'sompak'
107 % 'munits' (scalar) the preferred number of map units
108 % 'msize' (vector) map grid size
109 % 'mapsize' *(string) do you want a 'small', 'normal' or 'big' map
110 % Any explicit settings of munits or msize override this.
111 % 'lattice' *(string) map lattice, 'hexa' or 'rect'
112 % 'shape' *(string) map shape, 'sheet', 'cyl' or 'toroid'
113 % 'neigh' *(string) neighborhood function, 'gaussian', 'cutgauss',
114 % 'ep' or 'bubble'
115 % 'topol' *(struct) topology struct
116 % 'som_topol','sTopol' = 'topol'
117 % 'mask' (vector) BMU search mask, size dim x 1
118 % 'name' (string) map name
119 % 'comp_names' (string array / cellstr) component names, size dim x 1
120 % 'tracking' (scalar) how much to report, default = 1
121 % 'training' (string) 'short', 'default' or 'long'
122 % (vector) size 1 x 2, first length of rough training in epochs,
123 % and then length of finetuning in epochs
124 %
125 % OUTPUT ARGUMENTS
126 %
127 % sMap (struct) the trained map struct
128 %
129 % EXAMPLES
130 %
131 % To simply train a map with default parameters:
132 %
133 % sMap = som_make(D);
134 %
135 % With the optional arguments, the initialization and training can be
136 % influenced. To change map size, use 'msize', 'munits' or 'mapsize'
137 % arguments:
138 %
139 % sMap = som_make(D,'mapsize','big'); or sMap=som_make(D,'big');
140 % sMap = som_make(D,'munits', 100);
141 % sMap = som_make(D,'msize', [20 10]);
142 %
143 % Argument 'algorithm' can be used to switch between 'seq' and 'batch'
144 % algorithms. 'batch' is the default, so to use 'seq' algorithm:
145 %
146 % sMap = som_make(D,'algorithm','seq'); or sMap = som_make(D,'seq');
147 %
148 % The 'tracking' argument can be used to control the amout of reporting
149 % during training. The argument is used in this function, and it is
150 % passed to the training functions. To make the function work silently
151 % set it to 0.
152 %
153 % sMap = som_make(D,'tracking',0);
154 %
155 % SEE ALSO
156 %
157 % som_map_struct Create a map struct.
158 % som_topol_struct Default values for SOM topology.
159 % som_train_struct Default values for SOM training parameters.
160 % som_randinint Random initialization algorithm.
161 % som_lininit Linear initialization algorithm.
162 % som_seqtrain Sequential training algorithm.
163 % som_batchtrain Batch training algorithm.
164
165 % Copyright (c) 1999-2000 by the SOM toolbox programming team.
166 % http://www.cis.hut.fi/projects/somtoolbox/
167
168 % Version 2.0beta juuso 111199
169
170 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
171 %% check arguments
172
173 % D
174 if isstruct(D)
175 data_name = D.name;
176 comp_names = D.comp_names;
177 comp_norm = D.comp_norm;
178 D = D.data;
179 else
180 data_name = inputname(1);
181 sDummy = som_data_struct(D(1,:));
182 comp_names = sDummy.comp_names;
183 comp_norm = sDummy.comp_norm;
184 end
185 [dlen dim] = size(D);
186
187 % defaults
188 mapsize = '';
189 sM = som_map_struct(dim);
190 sTopol = sM.topol;
191 munits = prod(sTopol.msize); % should be zero
192 mask = sM.mask;
193 name = sM.name;
194 neigh = sM.neigh;
195 tracking = 1;
196 algorithm = 'batch';
197 initalg = 'lininit';
198 training = 'default';
199
200 % varargin
201 i=1;
202 while i<=length(varargin),
203 argok = 1;
204 if ischar(varargin{i}),
205 switch varargin{i},
206 % argument IDs
207 case 'mask', i=i+1; mask = varargin{i};
208 case 'munits', i=i+1; munits = varargin{i};
209 case 'msize', i=i+1; sTopol.msize = varargin{i};
210 munits = prod(sTopol.msize);
211 case 'mapsize', i=i+1; mapsize = varargin{i};
212 case 'name', i=i+1; name = varargin{i};
213 case 'comp_names', i=i+1; comp_names = varargin{i};
214 case 'lattice', i=i+1; sTopol.lattice = varargin{i};
215 case 'shape', i=i+1; sTopol.shape = varargin{i};
216 case {'topol','som_topol','sTopol'},
217 i=i+1; sTopol = varargin{i}; munits = prod(sTopol.msize);
218 case 'neigh', i=i+1; neigh = varargin{i};
219 case 'tracking', i=i+1; tracking = varargin{i};
220 case 'algorithm', i=i+1; algorithm = varargin{i};
221 case 'init', i=i+1; initalg = varargin{i};
222 case 'training', i=i+1; training = varargin{i};
223 % unambiguous values
224 case {'hexa','rect'}, sTopol.lattice = varargin{i};
225 case {'sheet','cyl','toroid'}, sTopol.shape = varargin{i};
226 case {'gaussian','cutgauss','ep','bubble'}, neigh = varargin{i};
227 case {'seq','batch','sompak'}, algorithm = varargin{i};
228 case {'small','normal','big'}, mapsize = varargin{i};
229 case {'randinit','lininit'}, initalg = varargin{i};
230 case {'short','default','long'}, training = varargin{i};
231 otherwise argok=0;
232 end
233 elseif isstruct(varargin{i}) & isfield(varargin{i},'type'),
234 switch varargin{i}(1).type,
235 case 'som_topol', sTopol = varargin{i};
236 otherwise argok=0;
237 end
238 else
239 argok = 0;
240 end
241 if ~argok,
242 disp(['(som_make) Ignoring invalid argument #' num2str(i+1)]);
243 end
244 i = i+1;
245 end
246
247 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
248 %% make the map struct
249
250 %% map size
251 if isempty(sTopol.msize) | ~prod(sTopol.msize),
252 if tracking>0, fprintf(1,'Determining map size...\n'); end
253 if ~munits,
254 sTemp = som_topol_struct('dlen',dlen);
255 munits = prod(sTemp.msize);
256 switch mapsize,
257 case 'small', munits = max(9,ceil(munits/4));
258 case 'big', munits = munits*4;
259 otherwise % nil
260 end
261 end
262 sTemp = som_topol_struct('data',D,'munits',munits);
263 sTopol.msize = sTemp.msize;
264 if tracking>0,
265 fprintf(1,' map size [%d, %d]\n',sTopol.msize(1), sTopol.msize(2));
266 end
267 end
268
269 % map struct
270 sMap = som_map_struct(dim,sTopol,neigh,'mask',mask,'name',name, ...
271 'comp_names', comp_names, 'comp_norm', comp_norm);
272
273 % function
274 if strcmp(algorithm,'sompak'),
275 algorithm = 'seq';
276 func = 'sompak';
277 else
278 func = algorithm;
279 end
280
281 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
282 %% initialization
283
284 if tracking>0, fprintf(1,'Initialization...\n'); end
285
286 switch initalg,
287 case 'randinit', sMap = som_randinit(D, sMap);
288 case 'lininit', sMap = som_lininit(D, sMap);
289 end
290 sMap.trainhist(1) = som_set(sMap.trainhist(1),'data_name',data_name);
291
292 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
293 %% training
294
295 if tracking>0, fprintf(1,'Training using %s algorithm...\n',algorithm); end
296
297 % rough train
298 if tracking>0, fprintf(1,'Rough training phase...\n'); end
299 sTrain = som_train_struct(sMap,'dlen',dlen,'algorithm',algorithm,'phase','rough');
300 sTrain = som_set(sTrain,'data_name',data_name);
301 if isnumeric(training), sTrain.trainlen = training(1);
302 else
303 switch training,
304 case 'short', sTrain.trainlen = max(1,sTrain.trainlen/4);
305 case 'long', sTrain.trainlen = sTrain.trainlen*4;
306 end
307 end
308 switch func,
309 case 'seq', sMap = som_seqtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
310 case 'sompak', sMap = som_sompaktrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
311 case 'batch', sMap = som_batchtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
312 end
313
314 % finetune
315 if tracking>0, fprintf(1,'Finetuning phase...\n'); end
316 sTrain = som_train_struct(sMap,'dlen',dlen,'phase','finetune');
317 sTrain = som_set(sTrain,'data_name',data_name,'algorithm',algorithm);
318 if isnumeric(training), sTrain.trainlen = training(2);
319 else
320 switch training,
321 case 'short', sTrain.trainlen = max(1,sTrain.trainlen/4);
322 case 'long', sTrain.trainlen = sTrain.trainlen*4;
323 end
324 end
325 switch func,
326 case 'seq', sMap = som_seqtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
327 case 'sompak', sMap = som_sompaktrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
328 case 'batch', sMap = som_batchtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask);
329 end
330
331 % quality
332 if tracking>0,
333 [mqe,tge] = som_quality(sMap,D);
334 fprintf(1,'Final quantization error: %5.3f\n',mqe)
335 fprintf(1,'Final topographic error: %5.3f\n',tge)
336 end
337
338 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
339