comparison toolboxes/MIRtoolbox1.3.2/somtoolbox/som_sompaktrain.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 function [sMap, sTrain] = som_sompaktrain(sMap, D, varargin)
2
3 %SOM_SOMPAKTRAIN Use SOM_PAK to train the Self-Organizing Map.
4 %
5 % [sM,sT] = som_sompaktrain(sM, D, [[argID,] value, ...])
6 %
7 % sM = som_sompaktrain(sM,D);
8 % sM = som_sompaktrain(sM,sD,'alpha_type','inv');
9 % [M,sT] = som_sompaktrain(M,D,'bubble','trainlen',10,'inv','hexa');
10 %
11 % Input and output arguments ([]'s are optional):
12 % sM (struct) map struct, the trained and updated map is returned
13 % (matrix) codebook matrix of a self-organizing map
14 % size munits x dim or msize(1) x ... x msize(k) x dim
15 % The trained map codebook is returned.
16 % D (struct) training data; data struct
17 % (matrix) training data, size dlen x dim
18 % (string) name of data file
19 % [argID, (string) See below. The values which are unambiguous can
20 % value] (varies) be given without the preceeding argID.
21 %
22 % sT (struct) learning parameters used during the training
23 %
24 % Here are the valid argument IDs and corresponding values. The values which
25 % are unambiguous (marked with '*') can be given without the preceeding argID.
26 % 'msize' (vector) map size
27 % 'radius_ini' (scalar) neighborhood radius
28 % 'radius' = 'radius_ini'
29 % 'alpha_ini' (scalar) initial learning rate
30 % 'alpha' = 'alpha_ini'
31 % 'trainlen' (scalar) training length
32 % 'seed' (scalar) seed for random number generator
33 % 'snapfile' (string) base name for snapshot files
34 % 'snapinterval' (scalar) snapshot interval
35 % 'tlen_type' *(string) is the given trainlen 'samples' or 'epochs'
36 % 'train' *(struct) train struct, parameters for training
37 % 'sTrain','som_train' = 'train'
38 % 'alpha_type' *(string) learning rate function, 'inv' or 'linear'
39 % 'neigh' *(string) neighborhood function, 'gaussian' or 'bubble'
40 % 'topol' *(struct) topology struct
41 % 'som_topol','sTopol' = 'topol'
42 % 'lattice' *(string) map lattice, 'hexa' or 'rect'
43 %
44 % For more help, try 'type som_sompaktrain' or check out online documentation.
45 % See also SOM_MAKE, SOM_SEQTRAIN, SOM_BATCHTRAIN, SOM_TRAIN_STRUCT.
46
47 %%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
48 %
49 % som_sompaktrain
50 %
51 % PURPOSE
52 %
53 % Use SOM_PAK to train the Self-Organizing Map.
54 %
55 % SYNTAX
56 %
57 % sM = som_sompaktrain(sM,D);
58 % sM = som_sompaktrain(sM,sD);
59 % sM = som_sompaktrain(...,'argID',value,...);
60 % sM = som_sompaktrain(...,value,...);
61 % [sM,sT] = som_sompaktrain(M,D,...);
62 %
63 % DESCRIPTION
64 %
65 % Trains the given SOM (sM or M above) with the given training data (sD or
66 % D) using SOM_PAK. If no optional arguments (argID, value) are
67 % given, a default training is done, the parameters are obtained from
68 % SOM_TRAIN_STRUCT function. Using optional arguments the training
69 % parameters can be specified. Returns the trained and updated SOM and a
70 % train struct which contains information on the training.
71 %
72 % Notice that the SOM_PAK program 'vsom' must be in the search path of your
73 % shell. Alternatively, you can set a variable 'SOM_PAKDIR' in the Matlab
74 % workspace to tell the som_sompaktrain where to find the 'vsom' program.
75 %
76 % Notice also that many of the training parameters are much more limited in
77 % values than when using SOM Toolbox function for training:
78 % - the map shape is always 'sheet'
79 % - only initial value for neighborhood radius can be given
80 % - neighborhood function can only be 'bubble' or 'gaussian'
81 % - only initial value for learning rate can be given
82 % - learning rate can only be 'linear' or 'inv'
83 % - mask cannot be used: all variables are always used in BMU search
84 % Any parameters not confirming to these restrictions will be converted
85 % so that they do before training. On the other hand, there are some
86 % additional options that are not present in the SOM Toolbox:
87 % - random seed
88 % - snapshot file and interval
89 %
90 % REQUIRED INPUT ARGUMENTS
91 %
92 % sM The map to be trained.
93 % (struct) map struct
94 % (matrix) codebook matrix (field .data of map struct)
95 % Size is either [munits dim], in which case the map grid
96 % dimensions (msize) should be specified with optional arguments,
97 % or [msize(1) ... msize(k) dim] in which case the map
98 % grid dimensions are taken from the size of the matrix.
99 % Lattice, by default, is 'rect' and shape 'sheet'.
100 % D Training data.
101 % (struct) data struct
102 % (matrix) data matrix, size [dlen dim]
103 % (string) name of data file
104 %
105 % OPTIONAL INPUT ARGUMENTS
106 %
107 % argID (string) Argument identifier string (see below).
108 % value (varies) Value for the argument (see below).
109 %
110 % The optional arguments can be given as 'argID',value -pairs. If an
111 % argument is given value multiple times, the last one is
112 % used. The valid IDs and corresponding values are listed below. The values
113 % which are unambiguous (marked with '*') can be given without the
114 % preceeding argID.
115 %
116 % 'msize' (vector) map grid dimensions. Default is the one
117 % in sM (field sM.topol.msize) or
118 % 'si = size(sM); msize = si(1:end-1);'
119 % if only a codebook matrix was given.
120 % 'radius_ini' (scalar) initial neighborhood radius
121 % 'radius' (scalar) = 'radius_ini'
122 % 'alpha_ini' (vector) initial learning rate
123 % 'alpha' (scalar) = 'alpha_ini'
124 % 'trainlen' (scalar) training length (see also 'tlen_type')
125 % 'seed' (scalar) seed for random number generator
126 % 'snapfile' (string) base name for snapshot files
127 % 'snapinterval' (scalar) snapshot interval
128 % 'tlen_type' *(string) is the trainlen argument given in 'epochs' or
129 % in 'samples'. Default is 'epochs'.
130 % 'train' *(struct) train struct, parameters for training.
131 % Default parameters, unless specified,
132 % are acquired using SOM_TRAIN_STRUCT (this
133 % also applies for 'trainlen', 'alpha_type',
134 % 'alpha_ini', 'radius_ini' and 'radius_fin').
135 % 'sTrain', 'som_topol' (struct) = 'train'
136 % 'neigh' *(string) The used neighborhood function. Default is
137 % the one in sM (field '.neigh') or 'gaussian'
138 % if only a codebook matrix was given. The other
139 % possible value is 'bubble'.
140 % 'topol' *(struct) topology of the map. Default is the one
141 % in sM (field '.topol').
142 % 'sTopol', 'som_topol' (struct) = 'topol'
143 % 'alpha_type' *(string) learning rate function, 'inv' or 'linear'
144 % 'lattice' *(string) map lattice. Default is the one in sM
145 % (field sM.topol.lattice) or 'rect'
146 % if only a codebook matrix was given.
147 %
148 % OUTPUT ARGUMENTS
149 %
150 % sM the trained map
151 % (struct) if a map struct was given as input argument, a
152 % map struct is also returned. The current training
153 % is added to the training history (sM.trainhist).
154 % The 'neigh' and 'mask' fields of the map struct
155 % are updated to match those of the training.
156 % (matrix) if a matrix was given as input argument, a matrix
157 % is also returned with the same size as the input
158 % argument.
159 % sT (struct) train struct; information of the accomplished training
160 %
161 % EXAMPLES
162 %
163 % Simplest case:
164 % sM = som_sompaktrain(sM,D);
165 % sM = som_sompaktrain(sM,sD);
166 %
167 % The change training parameters, the optional arguments 'train',
168 % 'neigh','mask','trainlen','radius','radius_ini', 'alpha',
169 % 'alpha_type' and 'alpha_ini' are used.
170 % sM = som_sompaktrain(sM,D,'bubble','trainlen',10,'radius_ini',3);
171 %
172 % Another way to specify training parameters is to create a train struct:
173 % sTrain = som_train_struct(sM,'dlen',size(D,1),'algorithm','seq');
174 % sTrain = som_set(sTrain,'neigh','gaussian');
175 % sM = som_sompaktrain(sM,D,sTrain);
176 %
177 % You don't necessarily have to use the map struct, but you can operate
178 % directly with codebook matrices. However, in this case you have to
179 % specify the topology of the map in the optional arguments. The
180 % following commads are identical (M is originally a 200 x dim sized matrix):
181 % M = som_sompaktrain(M,D,'msize',[20 10],'lattice','hexa');
182 %
183 % M = som_sompaktrain(M,D,'msize',[20 10],'hexa');
184 %
185 % sT= som_set('som_topol','msize',[20 10],'lattice','hexa');
186 % M = som_sompaktrain(M,D,sT);
187 %
188 % M = reshape(M,[20 10 dim]);
189 % M = som_sompaktrain(M,D,'hexa');
190 %
191 % The som_sompaktrain also returns a train struct with information on the
192 % accomplished training. This is the same one as is added to the end of the
193 % trainhist field of map struct, in case a map struct is given.
194 % [M,sTrain] = som_sompaktrain(M,D,'msize',[20 10]);
195 %
196 % [sM,sTrain] = som_sompaktrain(sM,D); % sM.trainhist(end)==sTrain
197 %
198 % SEE ALSO
199 %
200 % som_make Initialize and train a SOM using default parameters.
201 % som_seqtrain Train SOM with sequential algorithm.
202 % som_batchtrain Train SOM with batch algorithm.
203 % som_train_struct Determine default training parameters.
204
205 % Copyright (c) 1999-2000 by the SOM toolbox programming team.
206 % http://www.cis.hut.fi/projects/somtoolbox/
207
208 % Version 2.0beta juuso 151199
209
210 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
211 %% Check arguments
212
213 error(nargchk(2, Inf, nargin)); % check the number of input arguments
214
215 % map
216 struct_mode = isstruct(sMap);
217 if struct_mode,
218 sTopol = sMap.topol;
219 else
220 orig_size = size(sMap);
221 if ndims(sMap) > 2,
222 si = size(sMap); dim = si(end); msize = si(1:end-1);
223 M = reshape(sMap,[prod(msize) dim]);
224 else
225 msize = [orig_size(1) 1];
226 dim = orig_size(2);
227 end
228 sMap = som_map_struct(dim,'msize',msize);
229 sTopol = sMap.topol;
230 end
231 [munits dim] = size(sMap.codebook);
232
233 % data
234 givendatafile = '';
235 if ischar(D),
236 data_name = D;
237 givendatafile = D;
238 D = [];
239 dlen = NaN;
240 else
241 if isstruct(D),
242 data_name = D.name;
243 D = D.data;
244 else
245 data_name = inputname(2);
246 end
247 D = D(find(sum(isnan(D),2) < dim),:); % remove empty vectors from the data
248 [dlen ddim] = size(D); % check input dimension
249 if ddim ~= dim, error('Map and data dimensions must agree.'); end
250 end
251
252 % varargin
253 sTrain = som_set('som_train','algorithm','seq',...
254 'neigh',sMap.neigh,...
255 'mask',ones(dim,1),...
256 'data_name',data_name);
257 tlen_type = 'epochs';
258 random_seed = 0;
259 snapshotname = '';
260 snapshotinterval = 0;
261
262 i=1;
263 while i<=length(varargin),
264 argok = 1;
265 if ischar(varargin{i}),
266 switch varargin{i},
267 % argument IDs
268 case 'msize', i=i+1; sTopol.msize = varargin{i};
269 case 'lattice', i=i+1; sTopol.lattice = varargin{i};
270 case 'neigh', i=i+1; sTrain.neigh = varargin{i};
271 case 'trainlen', i=i+1; sTrain.trainlen = varargin{i};
272 case 'tlen_type', i=i+1; tlen_type = varargin{i};
273 case 'radius_ini', i=i+1; sTrain.radius_ini = varargin{i};
274 case 'radius', i=i+1; sTrain.radius_ini = varargin{i}(1);
275 case 'alpha_type', i=i+1; sTrain.alpha_type = varargin{i};
276 case 'alpha_ini', i=i+1; sTrain.alpha_ini = varargin{i};
277 case 'alpha', i=i+1; sTrain.alpha_ini = varargin{i}(1);
278 case 'seed', i=i+1; random_seed = varargin{i};
279 case 'snapshotname',i=i+1; snapshotname = varargin{i};
280 case 'snapshotinterval',i=i+1; snapshotinterval = varargin{i};
281 case {'sTrain','train','som_train'}, i=i+1; sTrain = varargin{i};
282 case {'topol','sTopol','som_topol'},
283 i=i+1;
284 sTopol = varargin{i};
285 if prod(sTopol.msize) ~= munits,
286 error('Given map grid size does not match the codebook size.');
287 end
288 % unambiguous values
289 case {'inv','linear'}, sTrain.alpha_type = varargin{i};
290 case {'hexa','rect'}, sTopol.lattice = varargin{i};
291 case {'gaussian','bubble'}, sTrain.neigh = varargin{i};
292 case {'epochs','samples'}, tlen_type = varargin{i};
293 otherwise argok=0;
294 end
295 elseif isstruct(varargin{i}) & isfield(varargin{i},'type'),
296 switch varargin{i}(1).type,
297 case 'som_topol',
298 sTopol = varargin{i};
299 if prod(sTopol.msize) ~= munits,
300 error('Given map grid size does not match the codebook size.');
301 end
302 case 'som_train', sTrain = varargin{i};
303 otherwise argok=0;
304 end
305 else
306 argok = 0;
307 end
308 if ~argok,
309 disp(['(som_sompaktrain) Ignoring invalid argument #' num2str(i+2)]);
310 end
311 i = i+1;
312 end
313
314 % check topology
315 if struct_mode,
316 if ~strcmp(sTopol.lattice,sMap.topol.lattice) | ...
317 ~strcmp(sTopol.shape,sMap.topol.shape) | ...
318 any(sTopol.msize ~= sMap.topol.msize),
319 warning('Changing the original map topology.');
320 end
321 end
322 sMap.topol = sTopol;
323
324 % complement the training struct
325 if ~isnan(dlen),
326 sTrain = som_train_struct(sTrain,sMap,'dlen',dlen);
327 else
328 sTrain = som_train_struct(sTrain,sMap);
329 end
330 if isempty(sTrain.mask), sTrain.mask = ones(dim,1); end
331
332 % training length
333 if strcmp(tlen_type,'epochs'),
334 if isnan(dlen),
335 error('Training length given as epochs, but data length is not known.\n');
336 else
337 rlen = sTrain.trainlen*dlen;
338 end
339 else
340 rlen = sTrain.trainlen;
341 sTrain.trainlen = sTrain.trainlen/dlen;
342 end
343
344 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
345 %% check arguments
346
347 % mask
348 if any(sTrain.mask~=1),
349 sTrain.mask = ones(dim,1);
350 fprintf(1,'Ignoring given mask.\n');
351 end
352
353 % learning rate
354 if strcmp(sTrain.alpha_type,'power'),
355 sTrain.alpha_type = 'inv';
356 fprintf(1,'Using ''inv'' learning rate type instead of ''power''\n');
357 end
358
359 % neighborhood
360 if any(strcmp(sTrain.neigh,{'cutgauss','ep'})),
361 fprintf(1,'Using ''gaussian'' neighborhood function instead of %s.\n',sTrain.neigh);
362 sTrain.neigh = 'gaussian';
363 end
364
365 % map shape
366 if ~strcmp(sMap.topol.shape,'sheet'),
367 fprintf(1,'Using ''sheet'' map shape of %s.\n',sMap.topol.shape);
368 sMap.topol.shape = 'sheet';
369 end
370
371 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
372 %% Action
373
374 % write files
375 if ~isempty(givendatafile),
376 temp_din = givendatafile;
377 else
378 temp_din = tempname;
379 som_write_data(D, temp_din, 'x')
380 end
381 temp_cin = tempname;
382 som_write_cod(sMap, temp_cin)
383 temp_cout = tempname;
384
385 % check if the environment variable 'SOM_PAKDIR' has been defined
386 if any(strcmp('SOM_PAKDIR', evalin('base', 'who')))
387 som_pak_dir = evalin('base', 'SOM_PAKDIR');
388 else
389 som_pak_dir = '';
390 end
391 if ~isempty(som_pak_dir) & ~strncmp(som_pak_dir(end), '/', 1)
392 som_pak_dir(end + 1) = '/';
393 end
394
395 aini = sTrain.alpha_ini;
396 atype = sTrain.alpha_type;
397 if strcmp(atype,'inv'), atype = 'inverse_t'; end
398 rad = sTrain.radius_ini;
399 str = [som_pak_dir 'vsom ' ...
400 sprintf('-cin %s -din %s -cout %s', temp_cin, temp_din, temp_cout) ...
401 sprintf(' -rlen %d -alpha %g -alpha_type %s', rlen, aini, atype) ...
402 sprintf(' -radius %g -rand %g ',rad,random_seed)];
403 if ~isempty(snapshotname) & snapinterval>0,
404 str = [str, sprintf(' -snapfile %s -snapinterval %d',snapshotname,snapshotinterval)];
405 end
406
407 fprintf(1,'Execute: %s\n',str);
408 if isunix,
409 [status,w] = unix(str);
410 if status, fprintf(1,'Execution failed.\n'); end
411 if ~isempty(w), fprintf(1,'%s\n',w); end
412 else
413 [status,w] = dos(str);
414 if status, fprintf(1,'Execution failed.\n'); end
415 if ~isempty(w), fprintf(1,'%s\n',w); end
416 end
417
418 sMap_temp = som_read_cod(temp_cout);
419 M = sMap_temp.codebook;
420
421 if isunix
422 unix(['/bin/rm -f ' temp_din ' ' temp_cin ' ' temp_cout]);
423 else
424 dos(['del ' temp_din ' ' temp_cin ' ' temp_cout]);
425 end
426
427 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
428 %% Build / clean up the return arguments
429
430 % update structures
431 sTrain = som_set(sTrain,'time',datestr(now,0));
432 if struct_mode,
433 sMap = som_set(sMap,'codebook',M,'mask',sTrain.mask,'neigh',sTrain.neigh);
434 tl = length(sMap.trainhist);
435 sMap.trainhist(tl+1) = sTrain;
436 else
437 sMap = reshape(M,orig_size);
438 end
439
440 return;
441
442 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
443