Mercurial > hg > camir-aes2014
diff toolboxes/MIRtoolbox1.3.2/somtoolbox/som_sompaktrain.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolboxes/MIRtoolbox1.3.2/somtoolbox/som_sompaktrain.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,443 @@ +function [sMap, sTrain] = som_sompaktrain(sMap, D, varargin) + +%SOM_SOMPAKTRAIN Use SOM_PAK to train the Self-Organizing Map. +% +% [sM,sT] = som_sompaktrain(sM, D, [[argID,] value, ...]) +% +% sM = som_sompaktrain(sM,D); +% sM = som_sompaktrain(sM,sD,'alpha_type','inv'); +% [M,sT] = som_sompaktrain(M,D,'bubble','trainlen',10,'inv','hexa'); +% +% Input and output arguments ([]'s are optional): +% sM (struct) map struct, the trained and updated map is returned +% (matrix) codebook matrix of a self-organizing map +% size munits x dim or msize(1) x ... x msize(k) x dim +% The trained map codebook is returned. +% D (struct) training data; data struct +% (matrix) training data, size dlen x dim +% (string) name of data file +% [argID, (string) See below. The values which are unambiguous can +% value] (varies) be given without the preceeding argID. +% +% sT (struct) learning parameters used during the training +% +% Here are the valid argument IDs and corresponding values. The values which +% are unambiguous (marked with '*') can be given without the preceeding argID. +% 'msize' (vector) map size +% 'radius_ini' (scalar) neighborhood radius +% 'radius' = 'radius_ini' +% 'alpha_ini' (scalar) initial learning rate +% 'alpha' = 'alpha_ini' +% 'trainlen' (scalar) training length +% 'seed' (scalar) seed for random number generator +% 'snapfile' (string) base name for snapshot files +% 'snapinterval' (scalar) snapshot interval +% 'tlen_type' *(string) is the given trainlen 'samples' or 'epochs' +% 'train' *(struct) train struct, parameters for training +% 'sTrain','som_train' = 'train' +% 'alpha_type' *(string) learning rate function, 'inv' or 'linear' +% 'neigh' *(string) neighborhood function, 'gaussian' or 'bubble' +% 'topol' *(struct) topology struct +% 'som_topol','sTopol' = 'topol' +% 'lattice' *(string) map lattice, 'hexa' or 'rect' +% +% For more help, try 'type som_sompaktrain' or check out online documentation. +% See also SOM_MAKE, SOM_SEQTRAIN, SOM_BATCHTRAIN, SOM_TRAIN_STRUCT. + +%%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% +% som_sompaktrain +% +% PURPOSE +% +% Use SOM_PAK to train the Self-Organizing Map. +% +% SYNTAX +% +% sM = som_sompaktrain(sM,D); +% sM = som_sompaktrain(sM,sD); +% sM = som_sompaktrain(...,'argID',value,...); +% sM = som_sompaktrain(...,value,...); +% [sM,sT] = som_sompaktrain(M,D,...); +% +% DESCRIPTION +% +% Trains the given SOM (sM or M above) with the given training data (sD or +% D) using SOM_PAK. If no optional arguments (argID, value) are +% given, a default training is done, the parameters are obtained from +% SOM_TRAIN_STRUCT function. Using optional arguments the training +% parameters can be specified. Returns the trained and updated SOM and a +% train struct which contains information on the training. +% +% Notice that the SOM_PAK program 'vsom' must be in the search path of your +% shell. Alternatively, you can set a variable 'SOM_PAKDIR' in the Matlab +% workspace to tell the som_sompaktrain where to find the 'vsom' program. +% +% Notice also that many of the training parameters are much more limited in +% values than when using SOM Toolbox function for training: +% - the map shape is always 'sheet' +% - only initial value for neighborhood radius can be given +% - neighborhood function can only be 'bubble' or 'gaussian' +% - only initial value for learning rate can be given +% - learning rate can only be 'linear' or 'inv' +% - mask cannot be used: all variables are always used in BMU search +% Any parameters not confirming to these restrictions will be converted +% so that they do before training. On the other hand, there are some +% additional options that are not present in the SOM Toolbox: +% - random seed +% - snapshot file and interval +% +% REQUIRED INPUT ARGUMENTS +% +% sM The map to be trained. +% (struct) map struct +% (matrix) codebook matrix (field .data of map struct) +% Size is either [munits dim], in which case the map grid +% dimensions (msize) should be specified with optional arguments, +% or [msize(1) ... msize(k) dim] in which case the map +% grid dimensions are taken from the size of the matrix. +% Lattice, by default, is 'rect' and shape 'sheet'. +% D Training data. +% (struct) data struct +% (matrix) data matrix, size [dlen dim] +% (string) name of data file +% +% OPTIONAL INPUT ARGUMENTS +% +% argID (string) Argument identifier string (see below). +% value (varies) Value for the argument (see below). +% +% The optional arguments can be given as 'argID',value -pairs. If an +% argument is given value multiple times, the last one is +% used. The valid IDs and corresponding values are listed below. The values +% which are unambiguous (marked with '*') can be given without the +% preceeding argID. +% +% 'msize' (vector) map grid dimensions. Default is the one +% in sM (field sM.topol.msize) or +% 'si = size(sM); msize = si(1:end-1);' +% if only a codebook matrix was given. +% 'radius_ini' (scalar) initial neighborhood radius +% 'radius' (scalar) = 'radius_ini' +% 'alpha_ini' (vector) initial learning rate +% 'alpha' (scalar) = 'alpha_ini' +% 'trainlen' (scalar) training length (see also 'tlen_type') +% 'seed' (scalar) seed for random number generator +% 'snapfile' (string) base name for snapshot files +% 'snapinterval' (scalar) snapshot interval +% 'tlen_type' *(string) is the trainlen argument given in 'epochs' or +% in 'samples'. Default is 'epochs'. +% 'train' *(struct) train struct, parameters for training. +% Default parameters, unless specified, +% are acquired using SOM_TRAIN_STRUCT (this +% also applies for 'trainlen', 'alpha_type', +% 'alpha_ini', 'radius_ini' and 'radius_fin'). +% 'sTrain', 'som_topol' (struct) = 'train' +% 'neigh' *(string) The used neighborhood function. Default is +% the one in sM (field '.neigh') or 'gaussian' +% if only a codebook matrix was given. The other +% possible value is 'bubble'. +% 'topol' *(struct) topology of the map. Default is the one +% in sM (field '.topol'). +% 'sTopol', 'som_topol' (struct) = 'topol' +% 'alpha_type' *(string) learning rate function, 'inv' or 'linear' +% 'lattice' *(string) map lattice. Default is the one in sM +% (field sM.topol.lattice) or 'rect' +% if only a codebook matrix was given. +% +% OUTPUT ARGUMENTS +% +% sM the trained map +% (struct) if a map struct was given as input argument, a +% map struct is also returned. The current training +% is added to the training history (sM.trainhist). +% The 'neigh' and 'mask' fields of the map struct +% are updated to match those of the training. +% (matrix) if a matrix was given as input argument, a matrix +% is also returned with the same size as the input +% argument. +% sT (struct) train struct; information of the accomplished training +% +% EXAMPLES +% +% Simplest case: +% sM = som_sompaktrain(sM,D); +% sM = som_sompaktrain(sM,sD); +% +% The change training parameters, the optional arguments 'train', +% 'neigh','mask','trainlen','radius','radius_ini', 'alpha', +% 'alpha_type' and 'alpha_ini' are used. +% sM = som_sompaktrain(sM,D,'bubble','trainlen',10,'radius_ini',3); +% +% Another way to specify training parameters is to create a train struct: +% sTrain = som_train_struct(sM,'dlen',size(D,1),'algorithm','seq'); +% sTrain = som_set(sTrain,'neigh','gaussian'); +% sM = som_sompaktrain(sM,D,sTrain); +% +% You don't necessarily have to use the map struct, but you can operate +% directly with codebook matrices. However, in this case you have to +% specify the topology of the map in the optional arguments. The +% following commads are identical (M is originally a 200 x dim sized matrix): +% M = som_sompaktrain(M,D,'msize',[20 10],'lattice','hexa'); +% +% M = som_sompaktrain(M,D,'msize',[20 10],'hexa'); +% +% sT= som_set('som_topol','msize',[20 10],'lattice','hexa'); +% M = som_sompaktrain(M,D,sT); +% +% M = reshape(M,[20 10 dim]); +% M = som_sompaktrain(M,D,'hexa'); +% +% The som_sompaktrain also returns a train struct with information on the +% accomplished training. This is the same one as is added to the end of the +% trainhist field of map struct, in case a map struct is given. +% [M,sTrain] = som_sompaktrain(M,D,'msize',[20 10]); +% +% [sM,sTrain] = som_sompaktrain(sM,D); % sM.trainhist(end)==sTrain +% +% SEE ALSO +% +% som_make Initialize and train a SOM using default parameters. +% som_seqtrain Train SOM with sequential algorithm. +% som_batchtrain Train SOM with batch algorithm. +% som_train_struct Determine default training parameters. + +% Copyright (c) 1999-2000 by the SOM toolbox programming team. +% http://www.cis.hut.fi/projects/somtoolbox/ + +% Version 2.0beta juuso 151199 + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Check arguments + +error(nargchk(2, Inf, nargin)); % check the number of input arguments + +% map +struct_mode = isstruct(sMap); +if struct_mode, + sTopol = sMap.topol; +else + orig_size = size(sMap); + if ndims(sMap) > 2, + si = size(sMap); dim = si(end); msize = si(1:end-1); + M = reshape(sMap,[prod(msize) dim]); + else + msize = [orig_size(1) 1]; + dim = orig_size(2); + end + sMap = som_map_struct(dim,'msize',msize); + sTopol = sMap.topol; +end +[munits dim] = size(sMap.codebook); + +% data +givendatafile = ''; +if ischar(D), + data_name = D; + givendatafile = D; + D = []; + dlen = NaN; +else + if isstruct(D), + data_name = D.name; + D = D.data; + else + data_name = inputname(2); + end + D = D(find(sum(isnan(D),2) < dim),:); % remove empty vectors from the data + [dlen ddim] = size(D); % check input dimension + if ddim ~= dim, error('Map and data dimensions must agree.'); end +end + +% varargin +sTrain = som_set('som_train','algorithm','seq',... + 'neigh',sMap.neigh,... + 'mask',ones(dim,1),... + 'data_name',data_name); +tlen_type = 'epochs'; +random_seed = 0; +snapshotname = ''; +snapshotinterval = 0; + +i=1; +while i<=length(varargin), + argok = 1; + if ischar(varargin{i}), + switch varargin{i}, + % argument IDs + case 'msize', i=i+1; sTopol.msize = varargin{i}; + case 'lattice', i=i+1; sTopol.lattice = varargin{i}; + case 'neigh', i=i+1; sTrain.neigh = varargin{i}; + case 'trainlen', i=i+1; sTrain.trainlen = varargin{i}; + case 'tlen_type', i=i+1; tlen_type = varargin{i}; + case 'radius_ini', i=i+1; sTrain.radius_ini = varargin{i}; + case 'radius', i=i+1; sTrain.radius_ini = varargin{i}(1); + case 'alpha_type', i=i+1; sTrain.alpha_type = varargin{i}; + case 'alpha_ini', i=i+1; sTrain.alpha_ini = varargin{i}; + case 'alpha', i=i+1; sTrain.alpha_ini = varargin{i}(1); + case 'seed', i=i+1; random_seed = varargin{i}; + case 'snapshotname',i=i+1; snapshotname = varargin{i}; + case 'snapshotinterval',i=i+1; snapshotinterval = varargin{i}; + case {'sTrain','train','som_train'}, i=i+1; sTrain = varargin{i}; + case {'topol','sTopol','som_topol'}, + i=i+1; + sTopol = varargin{i}; + if prod(sTopol.msize) ~= munits, + error('Given map grid size does not match the codebook size.'); + end + % unambiguous values + case {'inv','linear'}, sTrain.alpha_type = varargin{i}; + case {'hexa','rect'}, sTopol.lattice = varargin{i}; + case {'gaussian','bubble'}, sTrain.neigh = varargin{i}; + case {'epochs','samples'}, tlen_type = varargin{i}; + otherwise argok=0; + end + elseif isstruct(varargin{i}) & isfield(varargin{i},'type'), + switch varargin{i}(1).type, + case 'som_topol', + sTopol = varargin{i}; + if prod(sTopol.msize) ~= munits, + error('Given map grid size does not match the codebook size.'); + end + case 'som_train', sTrain = varargin{i}; + otherwise argok=0; + end + else + argok = 0; + end + if ~argok, + disp(['(som_sompaktrain) Ignoring invalid argument #' num2str(i+2)]); + end + i = i+1; +end + +% check topology +if struct_mode, + if ~strcmp(sTopol.lattice,sMap.topol.lattice) | ... + ~strcmp(sTopol.shape,sMap.topol.shape) | ... + any(sTopol.msize ~= sMap.topol.msize), + warning('Changing the original map topology.'); + end +end +sMap.topol = sTopol; + +% complement the training struct +if ~isnan(dlen), + sTrain = som_train_struct(sTrain,sMap,'dlen',dlen); +else + sTrain = som_train_struct(sTrain,sMap); +end +if isempty(sTrain.mask), sTrain.mask = ones(dim,1); end + +% training length +if strcmp(tlen_type,'epochs'), + if isnan(dlen), + error('Training length given as epochs, but data length is not known.\n'); + else + rlen = sTrain.trainlen*dlen; + end +else + rlen = sTrain.trainlen; + sTrain.trainlen = sTrain.trainlen/dlen; +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% check arguments + +% mask +if any(sTrain.mask~=1), + sTrain.mask = ones(dim,1); + fprintf(1,'Ignoring given mask.\n'); +end + +% learning rate +if strcmp(sTrain.alpha_type,'power'), + sTrain.alpha_type = 'inv'; + fprintf(1,'Using ''inv'' learning rate type instead of ''power''\n'); +end + +% neighborhood +if any(strcmp(sTrain.neigh,{'cutgauss','ep'})), + fprintf(1,'Using ''gaussian'' neighborhood function instead of %s.\n',sTrain.neigh); + sTrain.neigh = 'gaussian'; +end + +% map shape +if ~strcmp(sMap.topol.shape,'sheet'), + fprintf(1,'Using ''sheet'' map shape of %s.\n',sMap.topol.shape); + sMap.topol.shape = 'sheet'; +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Action + +% write files +if ~isempty(givendatafile), + temp_din = givendatafile; +else + temp_din = tempname; + som_write_data(D, temp_din, 'x') +end +temp_cin = tempname; +som_write_cod(sMap, temp_cin) +temp_cout = tempname; + +% check if the environment variable 'SOM_PAKDIR' has been defined +if any(strcmp('SOM_PAKDIR', evalin('base', 'who'))) + som_pak_dir = evalin('base', 'SOM_PAKDIR'); +else + som_pak_dir = ''; +end +if ~isempty(som_pak_dir) & ~strncmp(som_pak_dir(end), '/', 1) + som_pak_dir(end + 1) = '/'; +end + +aini = sTrain.alpha_ini; +atype = sTrain.alpha_type; +if strcmp(atype,'inv'), atype = 'inverse_t'; end +rad = sTrain.radius_ini; +str = [som_pak_dir 'vsom ' ... + sprintf('-cin %s -din %s -cout %s', temp_cin, temp_din, temp_cout) ... + sprintf(' -rlen %d -alpha %g -alpha_type %s', rlen, aini, atype) ... + sprintf(' -radius %g -rand %g ',rad,random_seed)]; +if ~isempty(snapshotname) & snapinterval>0, + str = [str, sprintf(' -snapfile %s -snapinterval %d',snapshotname,snapshotinterval)]; +end + +fprintf(1,'Execute: %s\n',str); +if isunix, + [status,w] = unix(str); + if status, fprintf(1,'Execution failed.\n'); end + if ~isempty(w), fprintf(1,'%s\n',w); end +else + [status,w] = dos(str); + if status, fprintf(1,'Execution failed.\n'); end + if ~isempty(w), fprintf(1,'%s\n',w); end +end + +sMap_temp = som_read_cod(temp_cout); +M = sMap_temp.codebook; + +if isunix + unix(['/bin/rm -f ' temp_din ' ' temp_cin ' ' temp_cout]); +else + dos(['del ' temp_din ' ' temp_cin ' ' temp_cout]); +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Build / clean up the return arguments + +% update structures +sTrain = som_set(sTrain,'time',datestr(now,0)); +if struct_mode, + sMap = som_set(sMap,'codebook',M,'mask',sTrain.mask,'neigh',sTrain.neigh); + tl = length(sMap.trainhist); + sMap.trainhist(tl+1) = sTrain; +else + sMap = reshape(M,orig_size); +end + +return; + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +