diff toolboxes/MIRtoolbox1.3.2/somtoolbox/som_norm_variable.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/toolboxes/MIRtoolbox1.3.2/somtoolbox/som_norm_variable.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,533 @@
+function [x,sNorm] = som_norm_variable(x, method, operation)
+
+%SOM_NORM_VARIABLE Normalize or denormalize a scalar variable.
+%
+% [x,sNorm] = som_norm_variable(x, method, operation)
+%
+%   xnew = som_norm_variable(x,'var','do');
+%   [dummy,sN] = som_norm_variable(x,'log','init');
+%   [xnew,sN]  = som_norm_variable(x,sN,'do');
+%   xorig      = som_norm_variable(xnew,sN,'undo');
+%
+%  Input and output arguments: 
+%   x         (vector) a set of values of a scalar variable for
+%                      which the (de)normalization is performed.
+%                      The processed values are returned.
+%   method    (string) identifier for a normalization method: 'var',
+%                      'range', 'log', 'logistic', 'histD', or 'histC'.
+%                      A normalization struct with default values is created.
+%             (struct) normalization struct, or an array of such
+%             (cellstr) first string gives normalization operation, and the
+%                      second gives denormalization operation, with x 
+%                      representing the variable, for example: 
+%                      {'x+2','x-2}, or {'exp(-x)','-log(x)'} or {'round(x)'}.
+%                      Note that in the last case, no denorm operation is 
+%                      defined. 
+%   operation (string) the operation to be performed: 'init', 'do' or 'undo'
+%                     
+%   sNorm     (struct) updated normalization struct/struct array
+%
+% For more help, try 'type som_norm_variable' or check out online documentation.
+% See also SOM_NORMALIZE, SOM_DENORMALIZE.
+
+%%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% som_norm_variable
+%
+% PURPOSE
+%
+% Initialize, apply and undo normalizations on a given vector of
+% scalar values.
+%
+% SYNTAX
+%
+%  xnew = som_norm_variable(x,method,operation)
+%  xnew = som_norm_variable(x,sNorm,operation)
+%  [xnew,sNorm] = som_norm_variable(...)
+%
+% DESCRIPTION
+%
+% This function is used to initialize, apply and undo normalizations
+% on scalar variables. It is the low-level function that upper-level
+% functions SOM_NORMALIZE and SOM_DENORMALIZE utilize to actually (un)do
+% the normalizations.
+%
+% Normalizations are typically performed to control the variance of 
+% vector components. If some vector components have variance which is
+% significantly higher than the variance of other components, those
+% components will dominate the map organization. Normalization of 
+% the variance of vector components (method 'var') is used to prevent 
+% that. In addition to variance normalization, other methods have
+% been implemented as well (see list below). 
+%
+% Usually normalizations convert the variable values so that they no 
+% longer make any sense: the values are still ordered, but their range 
+% may have changed so radically that interpreting the numbers in the 
+% original context is very hard. For this reason all implemented methods
+% are (more or less) revertible. The normalizations are monotonic
+% and information is saved so that they can be undone. Also, the saved
+% information makes it possible to apply the EXACTLY SAME normalization
+% to another set of values. The normalization information is determined
+% with 'init' operation, while 'do' and 'undo' operations are used to
+% apply or revert the normalization. 
+%
+% The normalization information is saved in a normalization struct, 
+% which is returned as the second argument of this function. Note that 
+% normalization operations may be stacked. In this case, normalization 
+% structs are positioned in a struct array. When applied, the array is 
+% gone through from start to end, and when undone, in reverse order.
+%
+%    method  description
+%
+%    'var'   Variance normalization. A linear transformation which 
+%            scales the values such that their variance=1. This is
+%            convenient way to use Mahalanobis distance measure without
+%            actually changing the distance calculation procedure.
+%
+%    'range' Normalization of range of values. A linear transformation
+%            which scales the values between [0,1]. 
+%
+%    'log'   Logarithmic normalization. In many cases the values of
+%            a vector component are exponentially distributed. This 
+%            normalization is a good way to get more resolution to
+%            (the low end of) that vector component. What this 
+%            actually does is a non-linear transformation: 
+%               x_new = log(x_old - m + 1) 
+%            where m=min(x_old) and log is the natural logarithm. 
+%            Applying the transformation to a value which is lower 
+%            than m-1 will give problems, as the result is then complex.
+%            If the minimum for values is known a priori, 
+%            it might be a good idea to initialize the normalization with
+%              [dummy,sN] = som_norm_variable(minimum,'log','init');
+%            and normalize only after this: 
+%              x_new = som_norm_variable(x,sN,'do');
+%
+%    'logistic' or softmax normalization. This normalization ensures
+%            that all values in the future, too, are within the range
+%            [0,1]. The transformation is more-or-less linear in the 
+%            middle range (around mean value), and has a smooth 
+%            nonlinearity at both ends which ensures that all values
+%            are within the range. The data is first scaled as in 
+%            variance normalization: 
+%               x_scaled = (x_old - mean(x_old))/std(x_old)
+%            and then transformed with the logistic function
+%               x_new = 1/(1+exp(-x_scaled))
+% 
+%    'histD' Discrete histogram equalization. Non-linear. Orders the 
+%            values and replaces each value by its ordinal number. 
+%            Finally, scales the values such that they are between [0,1].
+%            Useful for both discrete and continuous variables, but as 
+%            the saved normalization information consists of all 
+%            unique values of the initialization data set, it may use
+%            considerable amounts of memory. If the variable can get
+%            more than a few values (say, 20), it might be better to
+%            use 'histC' method below. Another important note is that
+%            this method is not exactly revertible if it is applied
+%            to values which are not part of the original value set.
+%            
+%    'histC' Continuous histogram equalization. Actually, a partially
+%            linear transformation which tries to do something like 
+%            histogram equalization. The value range is divided to 
+%            a number of bins such that the number of values in each
+%            bin is (almost) the same. The values are transformed 
+%            linearly in each bin. For example, values in bin number 3
+%            are scaled between [3,4[. Finally, all values are scaled
+%            between [0,1]. The number of bins is the square root
+%            of the number of unique values in the initialization set,
+%            rounded up. The resulting histogram equalization is not
+%            as good as the one that 'histD' makes, but the benefit
+%            is that it is exactly revertible - even outside the 
+%            original value range (although the results may be funny).
+%
+%    'eval'  With this method, freeform normalization operations can be 
+%            specified. The parameter field contains strings to be 
+%            evaluated with 'eval' function, with variable name 'x'
+%            representing the variable itself. The first string is 
+%            the normalization operation, and the second is a 
+%            denormalization operation. If the denormalization operation
+%            is empty, it is ignored.
+% 
+% INPUT ARGUMENTS
+%
+%   x          (vector) The scalar values to which the normalization      
+%                       operation is applied.
+%                     
+%   method              The normalization specification.
+%              (string) Identifier for a normalization method: 'var', 
+%                       'range', 'log', 'logistic', 'histD' or 'histC'. 
+%                       Corresponding default normalization struct is created.
+%              (struct) normalization struct 
+%              (struct array) of normalization structs, applied to 
+%                       x one after the other
+%              (cellstr) of length 
+%              (cellstr array) first string gives normalization operation, and 
+%                       the second gives denormalization operation, with x 
+%                       representing the variable, for example: 
+%                       {'x+2','x-2}, or {'exp(-x)','-log(x)'} or {'round(x)'}.
+%                       Note that in the last case, no denorm operation is 
+%                       defined. 
+%
+%               note: if the method is given as struct(s), it is
+%                     applied (done or undone, as specified by operation)
+%                     regardless of what the value of '.status' field
+%                     is in the struct(s). Only if the status is
+%                     'uninit', the undoing operation is halted.
+%                     Anyhow, the '.status' fields in the returned 
+%                     normalization struct(s) is set to approriate value.
+%   
+%   operation  (string) The operation to perform: 'init' to initialize
+%                       the normalization struct, 'do' to perform the 
+%                       normalization, 'undo' to undo the normalization, 
+%                       if possible. If operation 'do' is given, but the
+%                       normalization struct has not yet been initialized,
+%                       it is initialized using the given data (x).
+%
+% OUTPUT ARGUMENTS
+% 
+%   x        (vector) Appropriately processed values. 
+%
+%   sNorm    (struct) Updated normalization struct/struct array. If any,
+%                     the '.status' and '.params' fields are updated.
+% 
+% EXAMPLES
+%
+%  To initialize and apply a normalization on a set of scalar values: 
+%
+%    [x_new,sN] = som_norm_variable(x_old,'var','do'); 
+%
+%  To just initialize, use: 
+% 
+%    [dummy,sN] = som_norm_variable(x_old,'var','init'); 
+% 
+%  To undo the normalization(s): 
+%
+%    x_orig = som_norm_variable(x_new,sN,'undo');
+%
+%  Typically, normalizations of data structs/sets are handled using
+%  functions SOM_NORMALIZE and SOM_DENORMALIZE. However, when only the
+%  values of a single variable are of interest, SOM_NORM_VARIABLE may 
+%  be useful. For example, assume one wants to apply the normalization
+%  done on a component (i) of a data struct (sD) to a new set of values 
+%  (x) of that component. With SOM_NORM_VARIABLE this can be done with: 
+%
+%    x_new = som_norm_variable(x,sD.comp_norm{i},'do'); 
+% 
+%  Now, as the normalizations in sD.comp_norm{i} have already been 
+%  initialized with the original data set (presumably sD.data), 
+%  the EXACTLY SAME normalization(s) can be applied to the new values.
+%  The same thing can be done with SOM_NORMALIZE function, too: 
+%
+%    x_new = som_normalize(x,sD.comp_norm{i}); 
+%
+%  Or, if the new data set were in variable D - a matrix of same
+%  dimension as the original data set: 
+%
+%    D_new = som_normalize(D,sD,i);
+%
+% SEE ALSO
+%  
+%  som_normalize    Add/apply/redo normalizations for a data struct/set.
+%  som_denormalize  Undo normalizations of a data struct/set.
+
+% Copyright (c) 1998-2000 by the SOM toolbox programming team.
+% http://www.cis.hut.fi/projects/somtoolbox/
+
+% Version 2.0beta juuso 151199 170400 150500
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% check arguments
+
+error(nargchk(3, 3, nargin));  % check no. of input arguments is correct
+
+% method
+sNorm = []; 
+if ischar(method) 
+  if any(strcmp(method,{'var','range','log','logistic','histD','histC'})), 
+    sNorm = som_set('som_norm','method',method);
+  else 
+    method = cellstr(method); 
+  end
+end
+if iscell(method), 
+  if length(method)==1 & isstruct(method{1}), sNorm = method{1}; 
+  else
+    if length(method)==1 | isempty(method{2}), method{2} = 'x'; end
+    sNorm = som_set('som_norm','method','eval','params',method);
+  end
+else 
+  sNorm = method; 
+end
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% action
+
+order = [1:length(sNorm)]; 
+if length(order)>1 & strcmp(operation,'undo'), order = order(end:-1:1); end
+
+for i=order, 
+
+  % initialize
+  if strcmp(operation,'init') | ...
+     (strcmp(operation,'do') & strcmp(sNorm(i).status,'uninit')), 
+
+    % case method = 'hist'
+    if strcmp(sNorm(i).method,'hist'), 
+      inds = find(~isnan(x) & ~isinf(x));
+      if length(unique(x(inds)))>20, sNorm(i).method = 'histC'; 
+      else sNorm{i}.method = 'histD'; end
+    end
+
+    switch(sNorm(i).method), 
+    case 'var',   params = norm_variance_init(x);
+    case 'range', params = norm_scale01_init(x);
+    case 'log',   params = norm_log_init(x);
+    case 'logistic', params = norm_logistic_init(x);
+    case 'histD', params = norm_histeqD_init(x);
+    case 'histC', params = norm_histeqC_init(x);
+    case 'eval',  params = sNorm(i).params; 
+    otherwise, 
+      error(['Unrecognized method: ' sNorm(i).method]); 
+    end
+    sNorm(i).params = params;
+    sNorm(i).status = 'undone';
+  end
+
+  % do / undo
+  if strcmp(operation,'do'), 
+    switch(sNorm(i).method), 
+    case 'var',   x = norm_scale_do(x,sNorm(i).params);
+    case 'range', x = norm_scale_do(x,sNorm(i).params);
+    case 'log',   x = norm_log_do(x,sNorm(i).params);
+    case 'logistic', x = norm_logistic_do(x,sNorm(i).params);
+    case 'histD', x = norm_histeqD_do(x,sNorm(i).params);
+    case 'histC', x = norm_histeqC_do(x,sNorm(i).params);
+    case 'eval',  x = norm_eval_do(x,sNorm(i).params);
+    otherwise, 
+      error(['Unrecognized method: ' sNorm(i).method]);
+    end
+    sNorm(i).status = 'done';
+
+  elseif strcmp(operation,'undo'), 
+
+    if strcmp(sNorm(i).status,'uninit'), 
+      warning('Could not undo: uninitialized normalization struct.')
+      break;
+    end
+    switch(sNorm(i).method), 
+    case 'var',   x = norm_scale_undo(x,sNorm(i).params);
+    case 'range', x = norm_scale_undo(x,sNorm(i).params);
+    case 'log',   x = norm_log_undo(x,sNorm(i).params);    
+    case 'logistic', x = norm_logistic_undo(x,sNorm(i).params);
+    case 'histD', x = norm_histeqD_undo(x,sNorm(i).params);
+    case 'histC', x = norm_histeqC_undo(x,sNorm(i).params);
+    case 'eval',  x = norm_eval_undo(x,sNorm(i).params);
+    otherwise, 
+      error(['Unrecognized method: ' sNorm(i).method]);
+    end
+    sNorm(i).status = 'undone';
+
+  elseif ~strcmp(operation,'init'),
+
+    error(['Unrecognized operation: ' operation])
+
+  end
+end  
+
+return;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% subfunctions
+
+% linear scaling
+
+function p = norm_variance_init(x)
+  inds = find(~isnan(x) & isfinite(x));
+  p = [mean(x(inds)), std(x(inds))];
+  if p(2) == 0, p(2) = 1; end
+  %end of norm_variance_init
+
+function p = norm_scale01_init(x)
+  inds = find(~isnan(x) & isfinite(x));
+  mi = min(x(inds)); 
+  ma = max(x(inds));
+  if mi == ma, p = [mi, 1]; else p = [mi, ma-mi]; end
+  %end of norm_scale01_init  
+
+function x = norm_scale_do(x,p)
+  x = (x - p(1)) / p(2);
+  % end of norm_scale_do
+
+function x = norm_scale_undo(x,p)
+  x = x * p(2) + p(1);
+  % end of norm_scale_undo
+
+% logarithm
+
+function p = norm_log_init(x)
+  inds = find(~isnan(x) & isfinite(x));
+  p = min(x(inds));
+  % end of norm_log_init
+
+function x = norm_log_do(x,p)
+  x = log(x - p +1); 
+  % if any(~isreal(x)), ok = 0; end
+  % end of norm_log_do 
+
+function x = norm_log_undo(x,p)
+  x = exp(x) -1 + p; 
+  % end of norm_log_undo 
+
+% logistic
+
+function p = norm_logistic_init(x)
+  inds = find(~isnan(x) & isfinite(x));
+  p = [mean(x(inds)), std(x(inds))];
+  if p(2)==0, p(2) = 1; end
+  % end of norm_logistic_init
+
+function x = norm_logistic_do(x,p)
+  x = (x-p(1))/p(2);
+  x = 1./(1+exp(-x));
+  % end of norm_logistic_do
+
+function x = norm_logistic_undo(x,p)
+  x = log(x./(1-x));
+  x = x*p(2)+p(1);
+  % end of norm_logistic_undo
+
+% histogram equalization for discrete values
+
+function p = norm_histeqD_init(x)
+  inds = find(~isnan(x) & ~isinf(x));
+  p = unique(x(inds));
+  % end of norm_histeqD_init
+
+function x = norm_histeqD_do(x,p)
+  bins = length(p);
+  inds = find(~isnan(x) & ~isinf(x))';
+  for i = inds, 
+    [dummy ind] = min(abs(x(i) - p));
+    % data item closer to the left-hand bin wall is indexed after RH wall
+    if x(i) > p(ind) & ind < bins, 
+      x(i) = ind + 1;  
+    else 
+      x(i) = ind;
+    end
+  end
+  x = (x-1)/(bins-1); % normalization between [0,1]
+  % end of norm_histeqD_do 
+
+function x = norm_histeqD_undo(x,p)
+  bins = length(p);
+  x = round(x*(bins-1)+1);
+  inds = find(~isnan(x) & ~isinf(x));
+  x(inds) = p(x(inds));
+  % end of norm_histeqD_undo
+
+% histogram equalization with partially linear functions
+
+function p = norm_histeqC_init(x)
+  % investigate x
+  inds = find(~isnan(x) & ~isinf(x));
+  samples = length(inds);
+  xs = unique(x(inds));
+  mi = xs(1);
+  ma = xs(end);
+  % decide number of limits
+  lims = ceil(sqrt(length(xs))); % 2->2,100->10,1000->32,10000->100
+  % decide limits
+  if lims==1,     
+    p = [mi, mi+1];
+    lims = 2; 
+  elseif lims==2, 
+    p = [mi, ma];
+  else
+    p = zeros(lims,1);   
+    p(1) = mi; 
+    p(end) = ma;
+    binsize = zeros(lims-1,1); b = 1; avebinsize = samples/(lims-1);
+    for i=1:(length(xs)-1), 
+      binsize(b) = binsize(b) + sum(x==xs(i)); 
+      if binsize(b) >= avebinsize, 
+        b = b + 1;
+        p(b) = (xs(i)+xs(i+1))/2;
+      end
+      if b==(lims-1), 
+        binsize(b) = samples-sum(binsize); break;
+      else
+        avebinsize = (samples-sum(binsize))/(lims-1-b);
+      end
+    end
+  end
+  % end of norm_histeqC_init
+
+function x = norm_histeqC_do(x,p)
+  xnew = x; 
+  lims = length(p);
+  % handle values below minimum
+  r = p(2)-p(1); 
+  inds = find(x<=p(1) & isfinite(x)); 
+  if any(inds), xnew(inds) = 0-(p(1)-x(inds))/r; end 
+  % handle values above maximum
+  r = p(end)-p(end-1); 
+  inds = find(x>p(end) & isfinite(x)); 
+  if any(inds), xnew(inds) = lims-1+(x(inds)-p(end))/r; end
+  % handle all other values
+  for i=1:(lims-1), 
+    r0 = p(i); r1 = p(i+1); r = r1-r0; 
+    inds = find(x>r0 & x<=r1); 
+    if any(inds), xnew(inds) = i-1+(x(inds)-r0)/r; end
+  end
+  % scale so that minimum and maximum correspond to 0 and 1
+  x = xnew/(lims-1);
+  % end of norm_histeqC_do
+
+function x = norm_histeqC_undo(x,p)
+  xnew = x; 
+  lims = length(p); 
+  % scale so that 0 and 1 correspond to minimum and maximum
+  x = x*(lims-1);
+
+  % handle values below minimum
+  r = p(2)-p(1); 
+  inds = find(x<=0 & isfinite(x)); 
+  if any(inds), xnew(inds) = x(inds)*r + p(1); end 
+  % handle values above maximum
+  r = p(end)-p(end-1); 
+  inds = find(x>lims-1 & isfinite(x)); 
+  if any(inds), xnew(inds) = (x(inds)-(lims-1))*r+p(end); end
+  % handle all other values
+  for i=1:(lims-1), 
+    r0 = p(i); r1 = p(i+1); r = r1-r0; 
+    inds = find(x>i-1 & x<=i); 
+    if any(inds), xnew(inds) = (x(inds)-(i-1))*r + r0; end
+  end
+  x = xnew;
+  % end of norm_histeqC_undo
+
+% eval
+
+function p = norm_eval_init(method)
+  p = method;
+  %end of norm_eval_init
+
+function x = norm_eval_do(x,p)
+  x_tmp = eval(p{1});
+  if size(x_tmp,1)>=1 & size(x,1)>=1 & ...
+     size(x_tmp,2)==1 & size(x,2)==1,
+    x = x_tmp;
+  end
+  %end of norm_eval_do
+
+function x = norm_eval_undo(x,p)
+  x_tmp = eval(p{2});
+  if size(x_tmp,1)>=1 & size(x,1)>=1 & ...
+     size(x_tmp,2)==1 & size(x,2)==1,
+    x = x_tmp;
+  end
+  %end of norm_eval_undo
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+