Mercurial > hg > camir-aes2014
comparison toolboxes/MIRtoolbox1.3.2/somtoolbox/som_sompaktrain.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 function [sMap, sTrain] = som_sompaktrain(sMap, D, varargin) | |
2 | |
3 %SOM_SOMPAKTRAIN Use SOM_PAK to train the Self-Organizing Map. | |
4 % | |
5 % [sM,sT] = som_sompaktrain(sM, D, [[argID,] value, ...]) | |
6 % | |
7 % sM = som_sompaktrain(sM,D); | |
8 % sM = som_sompaktrain(sM,sD,'alpha_type','inv'); | |
9 % [M,sT] = som_sompaktrain(M,D,'bubble','trainlen',10,'inv','hexa'); | |
10 % | |
11 % Input and output arguments ([]'s are optional): | |
12 % sM (struct) map struct, the trained and updated map is returned | |
13 % (matrix) codebook matrix of a self-organizing map | |
14 % size munits x dim or msize(1) x ... x msize(k) x dim | |
15 % The trained map codebook is returned. | |
16 % D (struct) training data; data struct | |
17 % (matrix) training data, size dlen x dim | |
18 % (string) name of data file | |
19 % [argID, (string) See below. The values which are unambiguous can | |
20 % value] (varies) be given without the preceeding argID. | |
21 % | |
22 % sT (struct) learning parameters used during the training | |
23 % | |
24 % Here are the valid argument IDs and corresponding values. The values which | |
25 % are unambiguous (marked with '*') can be given without the preceeding argID. | |
26 % 'msize' (vector) map size | |
27 % 'radius_ini' (scalar) neighborhood radius | |
28 % 'radius' = 'radius_ini' | |
29 % 'alpha_ini' (scalar) initial learning rate | |
30 % 'alpha' = 'alpha_ini' | |
31 % 'trainlen' (scalar) training length | |
32 % 'seed' (scalar) seed for random number generator | |
33 % 'snapfile' (string) base name for snapshot files | |
34 % 'snapinterval' (scalar) snapshot interval | |
35 % 'tlen_type' *(string) is the given trainlen 'samples' or 'epochs' | |
36 % 'train' *(struct) train struct, parameters for training | |
37 % 'sTrain','som_train' = 'train' | |
38 % 'alpha_type' *(string) learning rate function, 'inv' or 'linear' | |
39 % 'neigh' *(string) neighborhood function, 'gaussian' or 'bubble' | |
40 % 'topol' *(struct) topology struct | |
41 % 'som_topol','sTopol' = 'topol' | |
42 % 'lattice' *(string) map lattice, 'hexa' or 'rect' | |
43 % | |
44 % For more help, try 'type som_sompaktrain' or check out online documentation. | |
45 % See also SOM_MAKE, SOM_SEQTRAIN, SOM_BATCHTRAIN, SOM_TRAIN_STRUCT. | |
46 | |
47 %%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
48 % | |
49 % som_sompaktrain | |
50 % | |
51 % PURPOSE | |
52 % | |
53 % Use SOM_PAK to train the Self-Organizing Map. | |
54 % | |
55 % SYNTAX | |
56 % | |
57 % sM = som_sompaktrain(sM,D); | |
58 % sM = som_sompaktrain(sM,sD); | |
59 % sM = som_sompaktrain(...,'argID',value,...); | |
60 % sM = som_sompaktrain(...,value,...); | |
61 % [sM,sT] = som_sompaktrain(M,D,...); | |
62 % | |
63 % DESCRIPTION | |
64 % | |
65 % Trains the given SOM (sM or M above) with the given training data (sD or | |
66 % D) using SOM_PAK. If no optional arguments (argID, value) are | |
67 % given, a default training is done, the parameters are obtained from | |
68 % SOM_TRAIN_STRUCT function. Using optional arguments the training | |
69 % parameters can be specified. Returns the trained and updated SOM and a | |
70 % train struct which contains information on the training. | |
71 % | |
72 % Notice that the SOM_PAK program 'vsom' must be in the search path of your | |
73 % shell. Alternatively, you can set a variable 'SOM_PAKDIR' in the Matlab | |
74 % workspace to tell the som_sompaktrain where to find the 'vsom' program. | |
75 % | |
76 % Notice also that many of the training parameters are much more limited in | |
77 % values than when using SOM Toolbox function for training: | |
78 % - the map shape is always 'sheet' | |
79 % - only initial value for neighborhood radius can be given | |
80 % - neighborhood function can only be 'bubble' or 'gaussian' | |
81 % - only initial value for learning rate can be given | |
82 % - learning rate can only be 'linear' or 'inv' | |
83 % - mask cannot be used: all variables are always used in BMU search | |
84 % Any parameters not confirming to these restrictions will be converted | |
85 % so that they do before training. On the other hand, there are some | |
86 % additional options that are not present in the SOM Toolbox: | |
87 % - random seed | |
88 % - snapshot file and interval | |
89 % | |
90 % REQUIRED INPUT ARGUMENTS | |
91 % | |
92 % sM The map to be trained. | |
93 % (struct) map struct | |
94 % (matrix) codebook matrix (field .data of map struct) | |
95 % Size is either [munits dim], in which case the map grid | |
96 % dimensions (msize) should be specified with optional arguments, | |
97 % or [msize(1) ... msize(k) dim] in which case the map | |
98 % grid dimensions are taken from the size of the matrix. | |
99 % Lattice, by default, is 'rect' and shape 'sheet'. | |
100 % D Training data. | |
101 % (struct) data struct | |
102 % (matrix) data matrix, size [dlen dim] | |
103 % (string) name of data file | |
104 % | |
105 % OPTIONAL INPUT ARGUMENTS | |
106 % | |
107 % argID (string) Argument identifier string (see below). | |
108 % value (varies) Value for the argument (see below). | |
109 % | |
110 % The optional arguments can be given as 'argID',value -pairs. If an | |
111 % argument is given value multiple times, the last one is | |
112 % used. The valid IDs and corresponding values are listed below. The values | |
113 % which are unambiguous (marked with '*') can be given without the | |
114 % preceeding argID. | |
115 % | |
116 % 'msize' (vector) map grid dimensions. Default is the one | |
117 % in sM (field sM.topol.msize) or | |
118 % 'si = size(sM); msize = si(1:end-1);' | |
119 % if only a codebook matrix was given. | |
120 % 'radius_ini' (scalar) initial neighborhood radius | |
121 % 'radius' (scalar) = 'radius_ini' | |
122 % 'alpha_ini' (vector) initial learning rate | |
123 % 'alpha' (scalar) = 'alpha_ini' | |
124 % 'trainlen' (scalar) training length (see also 'tlen_type') | |
125 % 'seed' (scalar) seed for random number generator | |
126 % 'snapfile' (string) base name for snapshot files | |
127 % 'snapinterval' (scalar) snapshot interval | |
128 % 'tlen_type' *(string) is the trainlen argument given in 'epochs' or | |
129 % in 'samples'. Default is 'epochs'. | |
130 % 'train' *(struct) train struct, parameters for training. | |
131 % Default parameters, unless specified, | |
132 % are acquired using SOM_TRAIN_STRUCT (this | |
133 % also applies for 'trainlen', 'alpha_type', | |
134 % 'alpha_ini', 'radius_ini' and 'radius_fin'). | |
135 % 'sTrain', 'som_topol' (struct) = 'train' | |
136 % 'neigh' *(string) The used neighborhood function. Default is | |
137 % the one in sM (field '.neigh') or 'gaussian' | |
138 % if only a codebook matrix was given. The other | |
139 % possible value is 'bubble'. | |
140 % 'topol' *(struct) topology of the map. Default is the one | |
141 % in sM (field '.topol'). | |
142 % 'sTopol', 'som_topol' (struct) = 'topol' | |
143 % 'alpha_type' *(string) learning rate function, 'inv' or 'linear' | |
144 % 'lattice' *(string) map lattice. Default is the one in sM | |
145 % (field sM.topol.lattice) or 'rect' | |
146 % if only a codebook matrix was given. | |
147 % | |
148 % OUTPUT ARGUMENTS | |
149 % | |
150 % sM the trained map | |
151 % (struct) if a map struct was given as input argument, a | |
152 % map struct is also returned. The current training | |
153 % is added to the training history (sM.trainhist). | |
154 % The 'neigh' and 'mask' fields of the map struct | |
155 % are updated to match those of the training. | |
156 % (matrix) if a matrix was given as input argument, a matrix | |
157 % is also returned with the same size as the input | |
158 % argument. | |
159 % sT (struct) train struct; information of the accomplished training | |
160 % | |
161 % EXAMPLES | |
162 % | |
163 % Simplest case: | |
164 % sM = som_sompaktrain(sM,D); | |
165 % sM = som_sompaktrain(sM,sD); | |
166 % | |
167 % The change training parameters, the optional arguments 'train', | |
168 % 'neigh','mask','trainlen','radius','radius_ini', 'alpha', | |
169 % 'alpha_type' and 'alpha_ini' are used. | |
170 % sM = som_sompaktrain(sM,D,'bubble','trainlen',10,'radius_ini',3); | |
171 % | |
172 % Another way to specify training parameters is to create a train struct: | |
173 % sTrain = som_train_struct(sM,'dlen',size(D,1),'algorithm','seq'); | |
174 % sTrain = som_set(sTrain,'neigh','gaussian'); | |
175 % sM = som_sompaktrain(sM,D,sTrain); | |
176 % | |
177 % You don't necessarily have to use the map struct, but you can operate | |
178 % directly with codebook matrices. However, in this case you have to | |
179 % specify the topology of the map in the optional arguments. The | |
180 % following commads are identical (M is originally a 200 x dim sized matrix): | |
181 % M = som_sompaktrain(M,D,'msize',[20 10],'lattice','hexa'); | |
182 % | |
183 % M = som_sompaktrain(M,D,'msize',[20 10],'hexa'); | |
184 % | |
185 % sT= som_set('som_topol','msize',[20 10],'lattice','hexa'); | |
186 % M = som_sompaktrain(M,D,sT); | |
187 % | |
188 % M = reshape(M,[20 10 dim]); | |
189 % M = som_sompaktrain(M,D,'hexa'); | |
190 % | |
191 % The som_sompaktrain also returns a train struct with information on the | |
192 % accomplished training. This is the same one as is added to the end of the | |
193 % trainhist field of map struct, in case a map struct is given. | |
194 % [M,sTrain] = som_sompaktrain(M,D,'msize',[20 10]); | |
195 % | |
196 % [sM,sTrain] = som_sompaktrain(sM,D); % sM.trainhist(end)==sTrain | |
197 % | |
198 % SEE ALSO | |
199 % | |
200 % som_make Initialize and train a SOM using default parameters. | |
201 % som_seqtrain Train SOM with sequential algorithm. | |
202 % som_batchtrain Train SOM with batch algorithm. | |
203 % som_train_struct Determine default training parameters. | |
204 | |
205 % Copyright (c) 1999-2000 by the SOM toolbox programming team. | |
206 % http://www.cis.hut.fi/projects/somtoolbox/ | |
207 | |
208 % Version 2.0beta juuso 151199 | |
209 | |
210 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
211 %% Check arguments | |
212 | |
213 error(nargchk(2, Inf, nargin)); % check the number of input arguments | |
214 | |
215 % map | |
216 struct_mode = isstruct(sMap); | |
217 if struct_mode, | |
218 sTopol = sMap.topol; | |
219 else | |
220 orig_size = size(sMap); | |
221 if ndims(sMap) > 2, | |
222 si = size(sMap); dim = si(end); msize = si(1:end-1); | |
223 M = reshape(sMap,[prod(msize) dim]); | |
224 else | |
225 msize = [orig_size(1) 1]; | |
226 dim = orig_size(2); | |
227 end | |
228 sMap = som_map_struct(dim,'msize',msize); | |
229 sTopol = sMap.topol; | |
230 end | |
231 [munits dim] = size(sMap.codebook); | |
232 | |
233 % data | |
234 givendatafile = ''; | |
235 if ischar(D), | |
236 data_name = D; | |
237 givendatafile = D; | |
238 D = []; | |
239 dlen = NaN; | |
240 else | |
241 if isstruct(D), | |
242 data_name = D.name; | |
243 D = D.data; | |
244 else | |
245 data_name = inputname(2); | |
246 end | |
247 D = D(find(sum(isnan(D),2) < dim),:); % remove empty vectors from the data | |
248 [dlen ddim] = size(D); % check input dimension | |
249 if ddim ~= dim, error('Map and data dimensions must agree.'); end | |
250 end | |
251 | |
252 % varargin | |
253 sTrain = som_set('som_train','algorithm','seq',... | |
254 'neigh',sMap.neigh,... | |
255 'mask',ones(dim,1),... | |
256 'data_name',data_name); | |
257 tlen_type = 'epochs'; | |
258 random_seed = 0; | |
259 snapshotname = ''; | |
260 snapshotinterval = 0; | |
261 | |
262 i=1; | |
263 while i<=length(varargin), | |
264 argok = 1; | |
265 if ischar(varargin{i}), | |
266 switch varargin{i}, | |
267 % argument IDs | |
268 case 'msize', i=i+1; sTopol.msize = varargin{i}; | |
269 case 'lattice', i=i+1; sTopol.lattice = varargin{i}; | |
270 case 'neigh', i=i+1; sTrain.neigh = varargin{i}; | |
271 case 'trainlen', i=i+1; sTrain.trainlen = varargin{i}; | |
272 case 'tlen_type', i=i+1; tlen_type = varargin{i}; | |
273 case 'radius_ini', i=i+1; sTrain.radius_ini = varargin{i}; | |
274 case 'radius', i=i+1; sTrain.radius_ini = varargin{i}(1); | |
275 case 'alpha_type', i=i+1; sTrain.alpha_type = varargin{i}; | |
276 case 'alpha_ini', i=i+1; sTrain.alpha_ini = varargin{i}; | |
277 case 'alpha', i=i+1; sTrain.alpha_ini = varargin{i}(1); | |
278 case 'seed', i=i+1; random_seed = varargin{i}; | |
279 case 'snapshotname',i=i+1; snapshotname = varargin{i}; | |
280 case 'snapshotinterval',i=i+1; snapshotinterval = varargin{i}; | |
281 case {'sTrain','train','som_train'}, i=i+1; sTrain = varargin{i}; | |
282 case {'topol','sTopol','som_topol'}, | |
283 i=i+1; | |
284 sTopol = varargin{i}; | |
285 if prod(sTopol.msize) ~= munits, | |
286 error('Given map grid size does not match the codebook size.'); | |
287 end | |
288 % unambiguous values | |
289 case {'inv','linear'}, sTrain.alpha_type = varargin{i}; | |
290 case {'hexa','rect'}, sTopol.lattice = varargin{i}; | |
291 case {'gaussian','bubble'}, sTrain.neigh = varargin{i}; | |
292 case {'epochs','samples'}, tlen_type = varargin{i}; | |
293 otherwise argok=0; | |
294 end | |
295 elseif isstruct(varargin{i}) & isfield(varargin{i},'type'), | |
296 switch varargin{i}(1).type, | |
297 case 'som_topol', | |
298 sTopol = varargin{i}; | |
299 if prod(sTopol.msize) ~= munits, | |
300 error('Given map grid size does not match the codebook size.'); | |
301 end | |
302 case 'som_train', sTrain = varargin{i}; | |
303 otherwise argok=0; | |
304 end | |
305 else | |
306 argok = 0; | |
307 end | |
308 if ~argok, | |
309 disp(['(som_sompaktrain) Ignoring invalid argument #' num2str(i+2)]); | |
310 end | |
311 i = i+1; | |
312 end | |
313 | |
314 % check topology | |
315 if struct_mode, | |
316 if ~strcmp(sTopol.lattice,sMap.topol.lattice) | ... | |
317 ~strcmp(sTopol.shape,sMap.topol.shape) | ... | |
318 any(sTopol.msize ~= sMap.topol.msize), | |
319 warning('Changing the original map topology.'); | |
320 end | |
321 end | |
322 sMap.topol = sTopol; | |
323 | |
324 % complement the training struct | |
325 if ~isnan(dlen), | |
326 sTrain = som_train_struct(sTrain,sMap,'dlen',dlen); | |
327 else | |
328 sTrain = som_train_struct(sTrain,sMap); | |
329 end | |
330 if isempty(sTrain.mask), sTrain.mask = ones(dim,1); end | |
331 | |
332 % training length | |
333 if strcmp(tlen_type,'epochs'), | |
334 if isnan(dlen), | |
335 error('Training length given as epochs, but data length is not known.\n'); | |
336 else | |
337 rlen = sTrain.trainlen*dlen; | |
338 end | |
339 else | |
340 rlen = sTrain.trainlen; | |
341 sTrain.trainlen = sTrain.trainlen/dlen; | |
342 end | |
343 | |
344 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
345 %% check arguments | |
346 | |
347 % mask | |
348 if any(sTrain.mask~=1), | |
349 sTrain.mask = ones(dim,1); | |
350 fprintf(1,'Ignoring given mask.\n'); | |
351 end | |
352 | |
353 % learning rate | |
354 if strcmp(sTrain.alpha_type,'power'), | |
355 sTrain.alpha_type = 'inv'; | |
356 fprintf(1,'Using ''inv'' learning rate type instead of ''power''\n'); | |
357 end | |
358 | |
359 % neighborhood | |
360 if any(strcmp(sTrain.neigh,{'cutgauss','ep'})), | |
361 fprintf(1,'Using ''gaussian'' neighborhood function instead of %s.\n',sTrain.neigh); | |
362 sTrain.neigh = 'gaussian'; | |
363 end | |
364 | |
365 % map shape | |
366 if ~strcmp(sMap.topol.shape,'sheet'), | |
367 fprintf(1,'Using ''sheet'' map shape of %s.\n',sMap.topol.shape); | |
368 sMap.topol.shape = 'sheet'; | |
369 end | |
370 | |
371 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
372 %% Action | |
373 | |
374 % write files | |
375 if ~isempty(givendatafile), | |
376 temp_din = givendatafile; | |
377 else | |
378 temp_din = tempname; | |
379 som_write_data(D, temp_din, 'x') | |
380 end | |
381 temp_cin = tempname; | |
382 som_write_cod(sMap, temp_cin) | |
383 temp_cout = tempname; | |
384 | |
385 % check if the environment variable 'SOM_PAKDIR' has been defined | |
386 if any(strcmp('SOM_PAKDIR', evalin('base', 'who'))) | |
387 som_pak_dir = evalin('base', 'SOM_PAKDIR'); | |
388 else | |
389 som_pak_dir = ''; | |
390 end | |
391 if ~isempty(som_pak_dir) & ~strncmp(som_pak_dir(end), '/', 1) | |
392 som_pak_dir(end + 1) = '/'; | |
393 end | |
394 | |
395 aini = sTrain.alpha_ini; | |
396 atype = sTrain.alpha_type; | |
397 if strcmp(atype,'inv'), atype = 'inverse_t'; end | |
398 rad = sTrain.radius_ini; | |
399 str = [som_pak_dir 'vsom ' ... | |
400 sprintf('-cin %s -din %s -cout %s', temp_cin, temp_din, temp_cout) ... | |
401 sprintf(' -rlen %d -alpha %g -alpha_type %s', rlen, aini, atype) ... | |
402 sprintf(' -radius %g -rand %g ',rad,random_seed)]; | |
403 if ~isempty(snapshotname) & snapinterval>0, | |
404 str = [str, sprintf(' -snapfile %s -snapinterval %d',snapshotname,snapshotinterval)]; | |
405 end | |
406 | |
407 fprintf(1,'Execute: %s\n',str); | |
408 if isunix, | |
409 [status,w] = unix(str); | |
410 if status, fprintf(1,'Execution failed.\n'); end | |
411 if ~isempty(w), fprintf(1,'%s\n',w); end | |
412 else | |
413 [status,w] = dos(str); | |
414 if status, fprintf(1,'Execution failed.\n'); end | |
415 if ~isempty(w), fprintf(1,'%s\n',w); end | |
416 end | |
417 | |
418 sMap_temp = som_read_cod(temp_cout); | |
419 M = sMap_temp.codebook; | |
420 | |
421 if isunix | |
422 unix(['/bin/rm -f ' temp_din ' ' temp_cin ' ' temp_cout]); | |
423 else | |
424 dos(['del ' temp_din ' ' temp_cin ' ' temp_cout]); | |
425 end | |
426 | |
427 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
428 %% Build / clean up the return arguments | |
429 | |
430 % update structures | |
431 sTrain = som_set(sTrain,'time',datestr(now,0)); | |
432 if struct_mode, | |
433 sMap = som_set(sMap,'codebook',M,'mask',sTrain.mask,'neigh',sTrain.neigh); | |
434 tl = length(sMap.trainhist); | |
435 sMap.trainhist(tl+1) = sTrain; | |
436 else | |
437 sMap = reshape(M,orig_size); | |
438 end | |
439 | |
440 return; | |
441 | |
442 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
443 |