Mercurial > hg > camir-aes2014
comparison toolboxes/MIRtoolbox1.3.2/somtoolbox/som_norm_variable.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 function [x,sNorm] = som_norm_variable(x, method, operation) | |
2 | |
3 %SOM_NORM_VARIABLE Normalize or denormalize a scalar variable. | |
4 % | |
5 % [x,sNorm] = som_norm_variable(x, method, operation) | |
6 % | |
7 % xnew = som_norm_variable(x,'var','do'); | |
8 % [dummy,sN] = som_norm_variable(x,'log','init'); | |
9 % [xnew,sN] = som_norm_variable(x,sN,'do'); | |
10 % xorig = som_norm_variable(xnew,sN,'undo'); | |
11 % | |
12 % Input and output arguments: | |
13 % x (vector) a set of values of a scalar variable for | |
14 % which the (de)normalization is performed. | |
15 % The processed values are returned. | |
16 % method (string) identifier for a normalization method: 'var', | |
17 % 'range', 'log', 'logistic', 'histD', or 'histC'. | |
18 % A normalization struct with default values is created. | |
19 % (struct) normalization struct, or an array of such | |
20 % (cellstr) first string gives normalization operation, and the | |
21 % second gives denormalization operation, with x | |
22 % representing the variable, for example: | |
23 % {'x+2','x-2}, or {'exp(-x)','-log(x)'} or {'round(x)'}. | |
24 % Note that in the last case, no denorm operation is | |
25 % defined. | |
26 % operation (string) the operation to be performed: 'init', 'do' or 'undo' | |
27 % | |
28 % sNorm (struct) updated normalization struct/struct array | |
29 % | |
30 % For more help, try 'type som_norm_variable' or check out online documentation. | |
31 % See also SOM_NORMALIZE, SOM_DENORMALIZE. | |
32 | |
33 %%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
34 % | |
35 % som_norm_variable | |
36 % | |
37 % PURPOSE | |
38 % | |
39 % Initialize, apply and undo normalizations on a given vector of | |
40 % scalar values. | |
41 % | |
42 % SYNTAX | |
43 % | |
44 % xnew = som_norm_variable(x,method,operation) | |
45 % xnew = som_norm_variable(x,sNorm,operation) | |
46 % [xnew,sNorm] = som_norm_variable(...) | |
47 % | |
48 % DESCRIPTION | |
49 % | |
50 % This function is used to initialize, apply and undo normalizations | |
51 % on scalar variables. It is the low-level function that upper-level | |
52 % functions SOM_NORMALIZE and SOM_DENORMALIZE utilize to actually (un)do | |
53 % the normalizations. | |
54 % | |
55 % Normalizations are typically performed to control the variance of | |
56 % vector components. If some vector components have variance which is | |
57 % significantly higher than the variance of other components, those | |
58 % components will dominate the map organization. Normalization of | |
59 % the variance of vector components (method 'var') is used to prevent | |
60 % that. In addition to variance normalization, other methods have | |
61 % been implemented as well (see list below). | |
62 % | |
63 % Usually normalizations convert the variable values so that they no | |
64 % longer make any sense: the values are still ordered, but their range | |
65 % may have changed so radically that interpreting the numbers in the | |
66 % original context is very hard. For this reason all implemented methods | |
67 % are (more or less) revertible. The normalizations are monotonic | |
68 % and information is saved so that they can be undone. Also, the saved | |
69 % information makes it possible to apply the EXACTLY SAME normalization | |
70 % to another set of values. The normalization information is determined | |
71 % with 'init' operation, while 'do' and 'undo' operations are used to | |
72 % apply or revert the normalization. | |
73 % | |
74 % The normalization information is saved in a normalization struct, | |
75 % which is returned as the second argument of this function. Note that | |
76 % normalization operations may be stacked. In this case, normalization | |
77 % structs are positioned in a struct array. When applied, the array is | |
78 % gone through from start to end, and when undone, in reverse order. | |
79 % | |
80 % method description | |
81 % | |
82 % 'var' Variance normalization. A linear transformation which | |
83 % scales the values such that their variance=1. This is | |
84 % convenient way to use Mahalanobis distance measure without | |
85 % actually changing the distance calculation procedure. | |
86 % | |
87 % 'range' Normalization of range of values. A linear transformation | |
88 % which scales the values between [0,1]. | |
89 % | |
90 % 'log' Logarithmic normalization. In many cases the values of | |
91 % a vector component are exponentially distributed. This | |
92 % normalization is a good way to get more resolution to | |
93 % (the low end of) that vector component. What this | |
94 % actually does is a non-linear transformation: | |
95 % x_new = log(x_old - m + 1) | |
96 % where m=min(x_old) and log is the natural logarithm. | |
97 % Applying the transformation to a value which is lower | |
98 % than m-1 will give problems, as the result is then complex. | |
99 % If the minimum for values is known a priori, | |
100 % it might be a good idea to initialize the normalization with | |
101 % [dummy,sN] = som_norm_variable(minimum,'log','init'); | |
102 % and normalize only after this: | |
103 % x_new = som_norm_variable(x,sN,'do'); | |
104 % | |
105 % 'logistic' or softmax normalization. This normalization ensures | |
106 % that all values in the future, too, are within the range | |
107 % [0,1]. The transformation is more-or-less linear in the | |
108 % middle range (around mean value), and has a smooth | |
109 % nonlinearity at both ends which ensures that all values | |
110 % are within the range. The data is first scaled as in | |
111 % variance normalization: | |
112 % x_scaled = (x_old - mean(x_old))/std(x_old) | |
113 % and then transformed with the logistic function | |
114 % x_new = 1/(1+exp(-x_scaled)) | |
115 % | |
116 % 'histD' Discrete histogram equalization. Non-linear. Orders the | |
117 % values and replaces each value by its ordinal number. | |
118 % Finally, scales the values such that they are between [0,1]. | |
119 % Useful for both discrete and continuous variables, but as | |
120 % the saved normalization information consists of all | |
121 % unique values of the initialization data set, it may use | |
122 % considerable amounts of memory. If the variable can get | |
123 % more than a few values (say, 20), it might be better to | |
124 % use 'histC' method below. Another important note is that | |
125 % this method is not exactly revertible if it is applied | |
126 % to values which are not part of the original value set. | |
127 % | |
128 % 'histC' Continuous histogram equalization. Actually, a partially | |
129 % linear transformation which tries to do something like | |
130 % histogram equalization. The value range is divided to | |
131 % a number of bins such that the number of values in each | |
132 % bin is (almost) the same. The values are transformed | |
133 % linearly in each bin. For example, values in bin number 3 | |
134 % are scaled between [3,4[. Finally, all values are scaled | |
135 % between [0,1]. The number of bins is the square root | |
136 % of the number of unique values in the initialization set, | |
137 % rounded up. The resulting histogram equalization is not | |
138 % as good as the one that 'histD' makes, but the benefit | |
139 % is that it is exactly revertible - even outside the | |
140 % original value range (although the results may be funny). | |
141 % | |
142 % 'eval' With this method, freeform normalization operations can be | |
143 % specified. The parameter field contains strings to be | |
144 % evaluated with 'eval' function, with variable name 'x' | |
145 % representing the variable itself. The first string is | |
146 % the normalization operation, and the second is a | |
147 % denormalization operation. If the denormalization operation | |
148 % is empty, it is ignored. | |
149 % | |
150 % INPUT ARGUMENTS | |
151 % | |
152 % x (vector) The scalar values to which the normalization | |
153 % operation is applied. | |
154 % | |
155 % method The normalization specification. | |
156 % (string) Identifier for a normalization method: 'var', | |
157 % 'range', 'log', 'logistic', 'histD' or 'histC'. | |
158 % Corresponding default normalization struct is created. | |
159 % (struct) normalization struct | |
160 % (struct array) of normalization structs, applied to | |
161 % x one after the other | |
162 % (cellstr) of length | |
163 % (cellstr array) first string gives normalization operation, and | |
164 % the second gives denormalization operation, with x | |
165 % representing the variable, for example: | |
166 % {'x+2','x-2}, or {'exp(-x)','-log(x)'} or {'round(x)'}. | |
167 % Note that in the last case, no denorm operation is | |
168 % defined. | |
169 % | |
170 % note: if the method is given as struct(s), it is | |
171 % applied (done or undone, as specified by operation) | |
172 % regardless of what the value of '.status' field | |
173 % is in the struct(s). Only if the status is | |
174 % 'uninit', the undoing operation is halted. | |
175 % Anyhow, the '.status' fields in the returned | |
176 % normalization struct(s) is set to approriate value. | |
177 % | |
178 % operation (string) The operation to perform: 'init' to initialize | |
179 % the normalization struct, 'do' to perform the | |
180 % normalization, 'undo' to undo the normalization, | |
181 % if possible. If operation 'do' is given, but the | |
182 % normalization struct has not yet been initialized, | |
183 % it is initialized using the given data (x). | |
184 % | |
185 % OUTPUT ARGUMENTS | |
186 % | |
187 % x (vector) Appropriately processed values. | |
188 % | |
189 % sNorm (struct) Updated normalization struct/struct array. If any, | |
190 % the '.status' and '.params' fields are updated. | |
191 % | |
192 % EXAMPLES | |
193 % | |
194 % To initialize and apply a normalization on a set of scalar values: | |
195 % | |
196 % [x_new,sN] = som_norm_variable(x_old,'var','do'); | |
197 % | |
198 % To just initialize, use: | |
199 % | |
200 % [dummy,sN] = som_norm_variable(x_old,'var','init'); | |
201 % | |
202 % To undo the normalization(s): | |
203 % | |
204 % x_orig = som_norm_variable(x_new,sN,'undo'); | |
205 % | |
206 % Typically, normalizations of data structs/sets are handled using | |
207 % functions SOM_NORMALIZE and SOM_DENORMALIZE. However, when only the | |
208 % values of a single variable are of interest, SOM_NORM_VARIABLE may | |
209 % be useful. For example, assume one wants to apply the normalization | |
210 % done on a component (i) of a data struct (sD) to a new set of values | |
211 % (x) of that component. With SOM_NORM_VARIABLE this can be done with: | |
212 % | |
213 % x_new = som_norm_variable(x,sD.comp_norm{i},'do'); | |
214 % | |
215 % Now, as the normalizations in sD.comp_norm{i} have already been | |
216 % initialized with the original data set (presumably sD.data), | |
217 % the EXACTLY SAME normalization(s) can be applied to the new values. | |
218 % The same thing can be done with SOM_NORMALIZE function, too: | |
219 % | |
220 % x_new = som_normalize(x,sD.comp_norm{i}); | |
221 % | |
222 % Or, if the new data set were in variable D - a matrix of same | |
223 % dimension as the original data set: | |
224 % | |
225 % D_new = som_normalize(D,sD,i); | |
226 % | |
227 % SEE ALSO | |
228 % | |
229 % som_normalize Add/apply/redo normalizations for a data struct/set. | |
230 % som_denormalize Undo normalizations of a data struct/set. | |
231 | |
232 % Copyright (c) 1998-2000 by the SOM toolbox programming team. | |
233 % http://www.cis.hut.fi/projects/somtoolbox/ | |
234 | |
235 % Version 2.0beta juuso 151199 170400 150500 | |
236 | |
237 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
238 %% check arguments | |
239 | |
240 error(nargchk(3, 3, nargin)); % check no. of input arguments is correct | |
241 | |
242 % method | |
243 sNorm = []; | |
244 if ischar(method) | |
245 if any(strcmp(method,{'var','range','log','logistic','histD','histC'})), | |
246 sNorm = som_set('som_norm','method',method); | |
247 else | |
248 method = cellstr(method); | |
249 end | |
250 end | |
251 if iscell(method), | |
252 if length(method)==1 & isstruct(method{1}), sNorm = method{1}; | |
253 else | |
254 if length(method)==1 | isempty(method{2}), method{2} = 'x'; end | |
255 sNorm = som_set('som_norm','method','eval','params',method); | |
256 end | |
257 else | |
258 sNorm = method; | |
259 end | |
260 | |
261 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
262 %% action | |
263 | |
264 order = [1:length(sNorm)]; | |
265 if length(order)>1 & strcmp(operation,'undo'), order = order(end:-1:1); end | |
266 | |
267 for i=order, | |
268 | |
269 % initialize | |
270 if strcmp(operation,'init') | ... | |
271 (strcmp(operation,'do') & strcmp(sNorm(i).status,'uninit')), | |
272 | |
273 % case method = 'hist' | |
274 if strcmp(sNorm(i).method,'hist'), | |
275 inds = find(~isnan(x) & ~isinf(x)); | |
276 if length(unique(x(inds)))>20, sNorm(i).method = 'histC'; | |
277 else sNorm{i}.method = 'histD'; end | |
278 end | |
279 | |
280 switch(sNorm(i).method), | |
281 case 'var', params = norm_variance_init(x); | |
282 case 'range', params = norm_scale01_init(x); | |
283 case 'log', params = norm_log_init(x); | |
284 case 'logistic', params = norm_logistic_init(x); | |
285 case 'histD', params = norm_histeqD_init(x); | |
286 case 'histC', params = norm_histeqC_init(x); | |
287 case 'eval', params = sNorm(i).params; | |
288 otherwise, | |
289 error(['Unrecognized method: ' sNorm(i).method]); | |
290 end | |
291 sNorm(i).params = params; | |
292 sNorm(i).status = 'undone'; | |
293 end | |
294 | |
295 % do / undo | |
296 if strcmp(operation,'do'), | |
297 switch(sNorm(i).method), | |
298 case 'var', x = norm_scale_do(x,sNorm(i).params); | |
299 case 'range', x = norm_scale_do(x,sNorm(i).params); | |
300 case 'log', x = norm_log_do(x,sNorm(i).params); | |
301 case 'logistic', x = norm_logistic_do(x,sNorm(i).params); | |
302 case 'histD', x = norm_histeqD_do(x,sNorm(i).params); | |
303 case 'histC', x = norm_histeqC_do(x,sNorm(i).params); | |
304 case 'eval', x = norm_eval_do(x,sNorm(i).params); | |
305 otherwise, | |
306 error(['Unrecognized method: ' sNorm(i).method]); | |
307 end | |
308 sNorm(i).status = 'done'; | |
309 | |
310 elseif strcmp(operation,'undo'), | |
311 | |
312 if strcmp(sNorm(i).status,'uninit'), | |
313 warning('Could not undo: uninitialized normalization struct.') | |
314 break; | |
315 end | |
316 switch(sNorm(i).method), | |
317 case 'var', x = norm_scale_undo(x,sNorm(i).params); | |
318 case 'range', x = norm_scale_undo(x,sNorm(i).params); | |
319 case 'log', x = norm_log_undo(x,sNorm(i).params); | |
320 case 'logistic', x = norm_logistic_undo(x,sNorm(i).params); | |
321 case 'histD', x = norm_histeqD_undo(x,sNorm(i).params); | |
322 case 'histC', x = norm_histeqC_undo(x,sNorm(i).params); | |
323 case 'eval', x = norm_eval_undo(x,sNorm(i).params); | |
324 otherwise, | |
325 error(['Unrecognized method: ' sNorm(i).method]); | |
326 end | |
327 sNorm(i).status = 'undone'; | |
328 | |
329 elseif ~strcmp(operation,'init'), | |
330 | |
331 error(['Unrecognized operation: ' operation]) | |
332 | |
333 end | |
334 end | |
335 | |
336 return; | |
337 | |
338 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
339 %% subfunctions | |
340 | |
341 % linear scaling | |
342 | |
343 function p = norm_variance_init(x) | |
344 inds = find(~isnan(x) & isfinite(x)); | |
345 p = [mean(x(inds)), std(x(inds))]; | |
346 if p(2) == 0, p(2) = 1; end | |
347 %end of norm_variance_init | |
348 | |
349 function p = norm_scale01_init(x) | |
350 inds = find(~isnan(x) & isfinite(x)); | |
351 mi = min(x(inds)); | |
352 ma = max(x(inds)); | |
353 if mi == ma, p = [mi, 1]; else p = [mi, ma-mi]; end | |
354 %end of norm_scale01_init | |
355 | |
356 function x = norm_scale_do(x,p) | |
357 x = (x - p(1)) / p(2); | |
358 % end of norm_scale_do | |
359 | |
360 function x = norm_scale_undo(x,p) | |
361 x = x * p(2) + p(1); | |
362 % end of norm_scale_undo | |
363 | |
364 % logarithm | |
365 | |
366 function p = norm_log_init(x) | |
367 inds = find(~isnan(x) & isfinite(x)); | |
368 p = min(x(inds)); | |
369 % end of norm_log_init | |
370 | |
371 function x = norm_log_do(x,p) | |
372 x = log(x - p +1); | |
373 % if any(~isreal(x)), ok = 0; end | |
374 % end of norm_log_do | |
375 | |
376 function x = norm_log_undo(x,p) | |
377 x = exp(x) -1 + p; | |
378 % end of norm_log_undo | |
379 | |
380 % logistic | |
381 | |
382 function p = norm_logistic_init(x) | |
383 inds = find(~isnan(x) & isfinite(x)); | |
384 p = [mean(x(inds)), std(x(inds))]; | |
385 if p(2)==0, p(2) = 1; end | |
386 % end of norm_logistic_init | |
387 | |
388 function x = norm_logistic_do(x,p) | |
389 x = (x-p(1))/p(2); | |
390 x = 1./(1+exp(-x)); | |
391 % end of norm_logistic_do | |
392 | |
393 function x = norm_logistic_undo(x,p) | |
394 x = log(x./(1-x)); | |
395 x = x*p(2)+p(1); | |
396 % end of norm_logistic_undo | |
397 | |
398 % histogram equalization for discrete values | |
399 | |
400 function p = norm_histeqD_init(x) | |
401 inds = find(~isnan(x) & ~isinf(x)); | |
402 p = unique(x(inds)); | |
403 % end of norm_histeqD_init | |
404 | |
405 function x = norm_histeqD_do(x,p) | |
406 bins = length(p); | |
407 inds = find(~isnan(x) & ~isinf(x))'; | |
408 for i = inds, | |
409 [dummy ind] = min(abs(x(i) - p)); | |
410 % data item closer to the left-hand bin wall is indexed after RH wall | |
411 if x(i) > p(ind) & ind < bins, | |
412 x(i) = ind + 1; | |
413 else | |
414 x(i) = ind; | |
415 end | |
416 end | |
417 x = (x-1)/(bins-1); % normalization between [0,1] | |
418 % end of norm_histeqD_do | |
419 | |
420 function x = norm_histeqD_undo(x,p) | |
421 bins = length(p); | |
422 x = round(x*(bins-1)+1); | |
423 inds = find(~isnan(x) & ~isinf(x)); | |
424 x(inds) = p(x(inds)); | |
425 % end of norm_histeqD_undo | |
426 | |
427 % histogram equalization with partially linear functions | |
428 | |
429 function p = norm_histeqC_init(x) | |
430 % investigate x | |
431 inds = find(~isnan(x) & ~isinf(x)); | |
432 samples = length(inds); | |
433 xs = unique(x(inds)); | |
434 mi = xs(1); | |
435 ma = xs(end); | |
436 % decide number of limits | |
437 lims = ceil(sqrt(length(xs))); % 2->2,100->10,1000->32,10000->100 | |
438 % decide limits | |
439 if lims==1, | |
440 p = [mi, mi+1]; | |
441 lims = 2; | |
442 elseif lims==2, | |
443 p = [mi, ma]; | |
444 else | |
445 p = zeros(lims,1); | |
446 p(1) = mi; | |
447 p(end) = ma; | |
448 binsize = zeros(lims-1,1); b = 1; avebinsize = samples/(lims-1); | |
449 for i=1:(length(xs)-1), | |
450 binsize(b) = binsize(b) + sum(x==xs(i)); | |
451 if binsize(b) >= avebinsize, | |
452 b = b + 1; | |
453 p(b) = (xs(i)+xs(i+1))/2; | |
454 end | |
455 if b==(lims-1), | |
456 binsize(b) = samples-sum(binsize); break; | |
457 else | |
458 avebinsize = (samples-sum(binsize))/(lims-1-b); | |
459 end | |
460 end | |
461 end | |
462 % end of norm_histeqC_init | |
463 | |
464 function x = norm_histeqC_do(x,p) | |
465 xnew = x; | |
466 lims = length(p); | |
467 % handle values below minimum | |
468 r = p(2)-p(1); | |
469 inds = find(x<=p(1) & isfinite(x)); | |
470 if any(inds), xnew(inds) = 0-(p(1)-x(inds))/r; end | |
471 % handle values above maximum | |
472 r = p(end)-p(end-1); | |
473 inds = find(x>p(end) & isfinite(x)); | |
474 if any(inds), xnew(inds) = lims-1+(x(inds)-p(end))/r; end | |
475 % handle all other values | |
476 for i=1:(lims-1), | |
477 r0 = p(i); r1 = p(i+1); r = r1-r0; | |
478 inds = find(x>r0 & x<=r1); | |
479 if any(inds), xnew(inds) = i-1+(x(inds)-r0)/r; end | |
480 end | |
481 % scale so that minimum and maximum correspond to 0 and 1 | |
482 x = xnew/(lims-1); | |
483 % end of norm_histeqC_do | |
484 | |
485 function x = norm_histeqC_undo(x,p) | |
486 xnew = x; | |
487 lims = length(p); | |
488 % scale so that 0 and 1 correspond to minimum and maximum | |
489 x = x*(lims-1); | |
490 | |
491 % handle values below minimum | |
492 r = p(2)-p(1); | |
493 inds = find(x<=0 & isfinite(x)); | |
494 if any(inds), xnew(inds) = x(inds)*r + p(1); end | |
495 % handle values above maximum | |
496 r = p(end)-p(end-1); | |
497 inds = find(x>lims-1 & isfinite(x)); | |
498 if any(inds), xnew(inds) = (x(inds)-(lims-1))*r+p(end); end | |
499 % handle all other values | |
500 for i=1:(lims-1), | |
501 r0 = p(i); r1 = p(i+1); r = r1-r0; | |
502 inds = find(x>i-1 & x<=i); | |
503 if any(inds), xnew(inds) = (x(inds)-(i-1))*r + r0; end | |
504 end | |
505 x = xnew; | |
506 % end of norm_histeqC_undo | |
507 | |
508 % eval | |
509 | |
510 function p = norm_eval_init(method) | |
511 p = method; | |
512 %end of norm_eval_init | |
513 | |
514 function x = norm_eval_do(x,p) | |
515 x_tmp = eval(p{1}); | |
516 if size(x_tmp,1)>=1 & size(x,1)>=1 & ... | |
517 size(x_tmp,2)==1 & size(x,2)==1, | |
518 x = x_tmp; | |
519 end | |
520 %end of norm_eval_do | |
521 | |
522 function x = norm_eval_undo(x,p) | |
523 x_tmp = eval(p{2}); | |
524 if size(x_tmp,1)>=1 & size(x,1)>=1 & ... | |
525 size(x_tmp,2)==1 & size(x,2)==1, | |
526 x = x_tmp; | |
527 end | |
528 %end of norm_eval_undo | |
529 | |
530 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
531 | |
532 | |
533 |