Mercurial > hg > camir-aes2014
comparison toolboxes/MIRtoolbox1.3.2/somtoolbox/som_make.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 function sMap = som_make(D, varargin) | |
2 | |
3 %SOM_MAKE Create, initialize and train Self-Organizing Map. | |
4 % | |
5 % sMap = som_make(D, [[argID,] value, ...]) | |
6 % | |
7 % sMap = som_make(D); | |
8 % sMap = som_make(D, 'munits', 20); | |
9 % sMap = som_make(D, 'munits', 20, 'hexa', 'sheet'); | |
10 % sMap = som_make(D, 'msize', [4 6 7], 'lattice', 'rect'); | |
11 % | |
12 % Input and output arguments ([]'s are optional): | |
13 % D (matrix) training data, size dlen x dim | |
14 % (struct) data struct | |
15 % [argID, (string) See below. The values which are unambiguous can | |
16 % value] (varies) be given without the preceeding argID. | |
17 % | |
18 % sMap (struct) map struct | |
19 % | |
20 % Here are the valid argument IDs and corresponding values. The values | |
21 % which are unambiguous (marked with '*') can be given without the | |
22 % preceeding argID. | |
23 % 'init' *(string) initialization: 'randinit' or 'lininit' (default) | |
24 % 'algorithm' *(string) training: 'seq' or 'batch' (default) or 'sompak' | |
25 % 'munits' (scalar) the preferred number of map units | |
26 % 'msize' (vector) map grid size | |
27 % 'mapsize' *(string) do you want a 'small', 'normal' or 'big' map | |
28 % Any explicit settings of munits or msize override this. | |
29 % 'lattice' *(string) map lattice, 'hexa' or 'rect' | |
30 % 'shape' *(string) map shape, 'sheet', 'cyl' or 'toroid' | |
31 % 'neigh' *(string) neighborhood function, 'gaussian', 'cutgauss', | |
32 % 'ep' or 'bubble' | |
33 % 'topol' *(struct) topology struct | |
34 % 'som_topol','sTopol' = 'topol' | |
35 % 'mask' (vector) BMU search mask, size dim x 1 | |
36 % 'name' (string) map name | |
37 % 'comp_names' (string array / cellstr) component names, size dim x 1 | |
38 % 'tracking' (scalar) how much to report, default = 1 | |
39 % 'training' (string) 'short', 'default', 'long' | |
40 % (vector) size 1 x 2, first length of rough training in epochs, | |
41 % and then length of finetuning in epochs | |
42 % | |
43 % For more help, try 'type som_make' or check out online documentation. | |
44 % See also SOM_MAP_STRUCT, SOM_TOPOL_STRUCT, SOM_TRAIN_STRUCT, | |
45 % SOM_RANDINIT, SOM_LININIT, SOM_SEQTRAIN, SOM_BATCHTRAIN. | |
46 | |
47 %%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
48 % | |
49 % som_make | |
50 % | |
51 % PURPOSE | |
52 % | |
53 % Creates, initializes and trains a SOM using default parameters. | |
54 % | |
55 % SYNTAX | |
56 % | |
57 % sMap = som_make(D); | |
58 % sMap = som_make(...,'argID',value,...); | |
59 % sMap = som_make(...,value,...); | |
60 % | |
61 % DESCRIPTION | |
62 % | |
63 % Creates, initializes and trains a SOM with default parameters. Uses functions | |
64 % SOM_TOPOL_STRUCT, SOM_TRAIN_STRUCT, SOM_DATA_STRUCT and SOM_MAP_STRUCT to come | |
65 % up with the default values. | |
66 % | |
67 % First, the number of map units is determined. Unless they are | |
68 % explicitly defined, function SOM_TOPOL_STRUCT is used to determine this. | |
69 % It uses a heuristic formula of 'munits = 5*dlen^0.54321'. The 'mapsize' | |
70 % argument influences the final number of map units: a 'big' map has | |
71 % x4 the default number of map units and a 'small' map has x0.25 the | |
72 % default number of map units. | |
73 % | |
74 % After the number of map units has been determined, the map size is | |
75 % determined. Basically, the two biggest eigenvalues of the training | |
76 % data are calculated and the ratio between sidelengths of the map grid | |
77 % is set to this ratio. The actual sidelengths are then set so that | |
78 % their product is as close to the desired number of map units as | |
79 % possible. | |
80 % | |
81 % Then the SOM is initialized. First, linear initialization along two | |
82 % greatest eigenvectors is tried, but if this can't be done (the | |
83 % eigenvectors cannot be calculated), random initialization is used | |
84 % instead. After initialization, the SOM is trained in two phases: | |
85 % first rough training and then fine-tuning. If the 'tracking' | |
86 % argument is greater than zero, the average quantization error and | |
87 % topographic error of the final map are calculated. | |
88 % | |
89 % REQUIRED INPUT ARGUMENTS | |
90 % | |
91 % D The data to use in the training. | |
92 % (struct) A data struct. If a struct is given, '.comp_names' field as | |
93 % well as '.comp_norm' field is copied to the map struct. | |
94 % (matrix) A data matrix, size dlen x dim. The data matrix may | |
95 % contain unknown values, indicated by NaNs. | |
96 % | |
97 % OPTIONAL INPUT ARGUMENTS | |
98 % | |
99 % argID (string) Argument identifier string (see below). | |
100 % value (varies) Value for the argument (see below). | |
101 % | |
102 % Here are the valid argument IDs and corresponding values. The values | |
103 % which are unambiguous (marked with '*') can be given without the | |
104 % preceeding argID. | |
105 % 'init' *(string) initialization: 'randinit' or 'lininit' (default) | |
106 % 'algorithm' *(string) training: 'seq' or 'batch' (default) or 'sompak' | |
107 % 'munits' (scalar) the preferred number of map units | |
108 % 'msize' (vector) map grid size | |
109 % 'mapsize' *(string) do you want a 'small', 'normal' or 'big' map | |
110 % Any explicit settings of munits or msize override this. | |
111 % 'lattice' *(string) map lattice, 'hexa' or 'rect' | |
112 % 'shape' *(string) map shape, 'sheet', 'cyl' or 'toroid' | |
113 % 'neigh' *(string) neighborhood function, 'gaussian', 'cutgauss', | |
114 % 'ep' or 'bubble' | |
115 % 'topol' *(struct) topology struct | |
116 % 'som_topol','sTopol' = 'topol' | |
117 % 'mask' (vector) BMU search mask, size dim x 1 | |
118 % 'name' (string) map name | |
119 % 'comp_names' (string array / cellstr) component names, size dim x 1 | |
120 % 'tracking' (scalar) how much to report, default = 1 | |
121 % 'training' (string) 'short', 'default' or 'long' | |
122 % (vector) size 1 x 2, first length of rough training in epochs, | |
123 % and then length of finetuning in epochs | |
124 % | |
125 % OUTPUT ARGUMENTS | |
126 % | |
127 % sMap (struct) the trained map struct | |
128 % | |
129 % EXAMPLES | |
130 % | |
131 % To simply train a map with default parameters: | |
132 % | |
133 % sMap = som_make(D); | |
134 % | |
135 % With the optional arguments, the initialization and training can be | |
136 % influenced. To change map size, use 'msize', 'munits' or 'mapsize' | |
137 % arguments: | |
138 % | |
139 % sMap = som_make(D,'mapsize','big'); or sMap=som_make(D,'big'); | |
140 % sMap = som_make(D,'munits', 100); | |
141 % sMap = som_make(D,'msize', [20 10]); | |
142 % | |
143 % Argument 'algorithm' can be used to switch between 'seq' and 'batch' | |
144 % algorithms. 'batch' is the default, so to use 'seq' algorithm: | |
145 % | |
146 % sMap = som_make(D,'algorithm','seq'); or sMap = som_make(D,'seq'); | |
147 % | |
148 % The 'tracking' argument can be used to control the amout of reporting | |
149 % during training. The argument is used in this function, and it is | |
150 % passed to the training functions. To make the function work silently | |
151 % set it to 0. | |
152 % | |
153 % sMap = som_make(D,'tracking',0); | |
154 % | |
155 % SEE ALSO | |
156 % | |
157 % som_map_struct Create a map struct. | |
158 % som_topol_struct Default values for SOM topology. | |
159 % som_train_struct Default values for SOM training parameters. | |
160 % som_randinint Random initialization algorithm. | |
161 % som_lininit Linear initialization algorithm. | |
162 % som_seqtrain Sequential training algorithm. | |
163 % som_batchtrain Batch training algorithm. | |
164 | |
165 % Copyright (c) 1999-2000 by the SOM toolbox programming team. | |
166 % http://www.cis.hut.fi/projects/somtoolbox/ | |
167 | |
168 % Version 2.0beta juuso 111199 | |
169 | |
170 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
171 %% check arguments | |
172 | |
173 % D | |
174 if isstruct(D) | |
175 data_name = D.name; | |
176 comp_names = D.comp_names; | |
177 comp_norm = D.comp_norm; | |
178 D = D.data; | |
179 else | |
180 data_name = inputname(1); | |
181 sDummy = som_data_struct(D(1,:)); | |
182 comp_names = sDummy.comp_names; | |
183 comp_norm = sDummy.comp_norm; | |
184 end | |
185 [dlen dim] = size(D); | |
186 | |
187 % defaults | |
188 mapsize = ''; | |
189 sM = som_map_struct(dim); | |
190 sTopol = sM.topol; | |
191 munits = prod(sTopol.msize); % should be zero | |
192 mask = sM.mask; | |
193 name = sM.name; | |
194 neigh = sM.neigh; | |
195 tracking = 1; | |
196 algorithm = 'batch'; | |
197 initalg = 'lininit'; | |
198 training = 'default'; | |
199 | |
200 % varargin | |
201 i=1; | |
202 while i<=length(varargin), | |
203 argok = 1; | |
204 if ischar(varargin{i}), | |
205 switch varargin{i}, | |
206 % argument IDs | |
207 case 'mask', i=i+1; mask = varargin{i}; | |
208 case 'munits', i=i+1; munits = varargin{i}; | |
209 case 'msize', i=i+1; sTopol.msize = varargin{i}; | |
210 munits = prod(sTopol.msize); | |
211 case 'mapsize', i=i+1; mapsize = varargin{i}; | |
212 case 'name', i=i+1; name = varargin{i}; | |
213 case 'comp_names', i=i+1; comp_names = varargin{i}; | |
214 case 'lattice', i=i+1; sTopol.lattice = varargin{i}; | |
215 case 'shape', i=i+1; sTopol.shape = varargin{i}; | |
216 case {'topol','som_topol','sTopol'}, | |
217 i=i+1; sTopol = varargin{i}; munits = prod(sTopol.msize); | |
218 case 'neigh', i=i+1; neigh = varargin{i}; | |
219 case 'tracking', i=i+1; tracking = varargin{i}; | |
220 case 'algorithm', i=i+1; algorithm = varargin{i}; | |
221 case 'init', i=i+1; initalg = varargin{i}; | |
222 case 'training', i=i+1; training = varargin{i}; | |
223 % unambiguous values | |
224 case {'hexa','rect'}, sTopol.lattice = varargin{i}; | |
225 case {'sheet','cyl','toroid'}, sTopol.shape = varargin{i}; | |
226 case {'gaussian','cutgauss','ep','bubble'}, neigh = varargin{i}; | |
227 case {'seq','batch','sompak'}, algorithm = varargin{i}; | |
228 case {'small','normal','big'}, mapsize = varargin{i}; | |
229 case {'randinit','lininit'}, initalg = varargin{i}; | |
230 case {'short','default','long'}, training = varargin{i}; | |
231 otherwise argok=0; | |
232 end | |
233 elseif isstruct(varargin{i}) & isfield(varargin{i},'type'), | |
234 switch varargin{i}(1).type, | |
235 case 'som_topol', sTopol = varargin{i}; | |
236 otherwise argok=0; | |
237 end | |
238 else | |
239 argok = 0; | |
240 end | |
241 if ~argok, | |
242 disp(['(som_make) Ignoring invalid argument #' num2str(i+1)]); | |
243 end | |
244 i = i+1; | |
245 end | |
246 | |
247 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
248 %% make the map struct | |
249 | |
250 %% map size | |
251 if isempty(sTopol.msize) | ~prod(sTopol.msize), | |
252 if tracking>0, fprintf(1,'Determining map size...\n'); end | |
253 if ~munits, | |
254 sTemp = som_topol_struct('dlen',dlen); | |
255 munits = prod(sTemp.msize); | |
256 switch mapsize, | |
257 case 'small', munits = max(9,ceil(munits/4)); | |
258 case 'big', munits = munits*4; | |
259 otherwise % nil | |
260 end | |
261 end | |
262 sTemp = som_topol_struct('data',D,'munits',munits); | |
263 sTopol.msize = sTemp.msize; | |
264 if tracking>0, | |
265 fprintf(1,' map size [%d, %d]\n',sTopol.msize(1), sTopol.msize(2)); | |
266 end | |
267 end | |
268 | |
269 % map struct | |
270 sMap = som_map_struct(dim,sTopol,neigh,'mask',mask,'name',name, ... | |
271 'comp_names', comp_names, 'comp_norm', comp_norm); | |
272 | |
273 % function | |
274 if strcmp(algorithm,'sompak'), | |
275 algorithm = 'seq'; | |
276 func = 'sompak'; | |
277 else | |
278 func = algorithm; | |
279 end | |
280 | |
281 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
282 %% initialization | |
283 | |
284 if tracking>0, fprintf(1,'Initialization...\n'); end | |
285 | |
286 switch initalg, | |
287 case 'randinit', sMap = som_randinit(D, sMap); | |
288 case 'lininit', sMap = som_lininit(D, sMap); | |
289 end | |
290 sMap.trainhist(1) = som_set(sMap.trainhist(1),'data_name',data_name); | |
291 | |
292 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
293 %% training | |
294 | |
295 if tracking>0, fprintf(1,'Training using %s algorithm...\n',algorithm); end | |
296 | |
297 % rough train | |
298 if tracking>0, fprintf(1,'Rough training phase...\n'); end | |
299 sTrain = som_train_struct(sMap,'dlen',dlen,'algorithm',algorithm,'phase','rough'); | |
300 sTrain = som_set(sTrain,'data_name',data_name); | |
301 if isnumeric(training), sTrain.trainlen = training(1); | |
302 else | |
303 switch training, | |
304 case 'short', sTrain.trainlen = max(1,sTrain.trainlen/4); | |
305 case 'long', sTrain.trainlen = sTrain.trainlen*4; | |
306 end | |
307 end | |
308 switch func, | |
309 case 'seq', sMap = som_seqtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask); | |
310 case 'sompak', sMap = som_sompaktrain(sMap,D,sTrain,'tracking',tracking,'mask',mask); | |
311 case 'batch', sMap = som_batchtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask); | |
312 end | |
313 | |
314 % finetune | |
315 if tracking>0, fprintf(1,'Finetuning phase...\n'); end | |
316 sTrain = som_train_struct(sMap,'dlen',dlen,'phase','finetune'); | |
317 sTrain = som_set(sTrain,'data_name',data_name,'algorithm',algorithm); | |
318 if isnumeric(training), sTrain.trainlen = training(2); | |
319 else | |
320 switch training, | |
321 case 'short', sTrain.trainlen = max(1,sTrain.trainlen/4); | |
322 case 'long', sTrain.trainlen = sTrain.trainlen*4; | |
323 end | |
324 end | |
325 switch func, | |
326 case 'seq', sMap = som_seqtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask); | |
327 case 'sompak', sMap = som_sompaktrain(sMap,D,sTrain,'tracking',tracking,'mask',mask); | |
328 case 'batch', sMap = som_batchtrain(sMap,D,sTrain,'tracking',tracking,'mask',mask); | |
329 end | |
330 | |
331 % quality | |
332 if tracking>0, | |
333 [mqe,tge] = som_quality(sMap,D); | |
334 fprintf(1,'Final quantization error: %5.3f\n',mqe) | |
335 fprintf(1,'Final topographic error: %5.3f\n',tge) | |
336 end | |
337 | |
338 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
339 |