annotate toolboxes/MIRtoolbox1.3.2/somtoolbox/cca.m @ 0:cc4b1211e677 tip

initial commit to HG from Changeset: 646 (e263d8a21543) added further path and more save "camirversion.m"
author Daniel Wolff
date Fri, 19 Aug 2016 13:07:06 +0200
parents
children
rev   line source
Daniel@0 1 function [P] = cca(D, P, epochs, Mdist, alpha0, lambda0)
Daniel@0 2
Daniel@0 3 %CCA Projects data vectors using Curvilinear Component Analysis.
Daniel@0 4 %
Daniel@0 5 % P = cca(D, P, epochs, [Dist], [alpha0], [lambda0])
Daniel@0 6 %
Daniel@0 7 % P = cca(D,2,10); % projects the given data to a plane
Daniel@0 8 % P = cca(D,pcaproj(D,2),5); % same, but with PCA initialization
Daniel@0 9 % P = cca(D, 2, 10, Dist); % same, but the given distance matrix is used
Daniel@0 10 %
Daniel@0 11 % Input and output arguments ([]'s are optional):
Daniel@0 12 % D (matrix) the data matrix, size dlen x dim
Daniel@0 13 % (struct) data or map struct
Daniel@0 14 % P (scalar) output dimension
Daniel@0 15 % (matrix) size dlen x odim, the initial projection
Daniel@0 16 % epochs (scalar) training length
Daniel@0 17 % [Dist] (matrix) pairwise distance matrix, size dlen x dlen.
Daniel@0 18 % If the distances in the input space should
Daniel@0 19 % be calculated otherwise than as euclidian
Daniel@0 20 % distances, the distance from each vector
Daniel@0 21 % to each other vector can be given here,
Daniel@0 22 % size dlen x dlen. For example PDIST
Daniel@0 23 % function can be used to calculate the
Daniel@0 24 % distances: Dist = squareform(pdist(D,'mahal'));
Daniel@0 25 % [alpha0] (scalar) initial step size, 0.5 by default
Daniel@0 26 % [lambda0] (scalar) initial radius of influence, 3*max(std(D)) by default
Daniel@0 27 %
Daniel@0 28 % P (matrix) size dlen x odim, the projections
Daniel@0 29 %
Daniel@0 30 % Unknown values (NaN's) in the data: projections of vectors with
Daniel@0 31 % unknown components tend to drift towards the center of the
Daniel@0 32 % projection distribution. Projections of totally unknown vectors are
Daniel@0 33 % set to unknown (NaN).
Daniel@0 34 %
Daniel@0 35 % See also SAMMON, PCAPROJ.
Daniel@0 36
Daniel@0 37 % Reference: Demartines, P., Herault, J., "Curvilinear Component
Daniel@0 38 % Analysis: a Self-Organizing Neural Network for Nonlinear
Daniel@0 39 % Mapping of Data Sets", IEEE Transactions on Neural Networks,
Daniel@0 40 % vol 8, no 1, 1997, pp. 148-154.
Daniel@0 41
Daniel@0 42 % Contributed to SOM Toolbox 2.0, February 2nd, 2000 by Juha Vesanto
Daniel@0 43 % Copyright (c) by Juha Vesanto
Daniel@0 44 % http://www.cis.hut.fi/projects/somtoolbox/
Daniel@0 45
Daniel@0 46 % juuso 171297 040100
Daniel@0 47
Daniel@0 48 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 49 %% Check arguments
Daniel@0 50
Daniel@0 51 error(nargchk(3, 6, nargin)); % check the number of input arguments
Daniel@0 52
Daniel@0 53 % input data
Daniel@0 54 if isstruct(D),
Daniel@0 55 if strcmp(D.type,'som_map'), D = D.codebook; else D = D.data; end
Daniel@0 56 end
Daniel@0 57 [noc dim] = size(D);
Daniel@0 58 noc_x_1 = ones(noc, 1); % used frequently
Daniel@0 59 me = zeros(1,dim); st = zeros(1,dim);
Daniel@0 60 for i=1:dim,
Daniel@0 61 me(i) = mean(D(find(isfinite(D(:,i))),i));
Daniel@0 62 st(i) = std(D(find(isfinite(D(:,i))),i));
Daniel@0 63 end
Daniel@0 64
Daniel@0 65 % initial projection
Daniel@0 66 if prod(size(P))==1,
Daniel@0 67 P = (2*rand(noc,P)-1).*st(noc_x_1,1:P) + me(noc_x_1,1:P);
Daniel@0 68 else
Daniel@0 69 % replace unknown projections with known values
Daniel@0 70 inds = find(isnan(P)); P(inds) = rand(size(inds));
Daniel@0 71 end
Daniel@0 72 [dummy odim] = size(P);
Daniel@0 73 odim_x_1 = ones(odim, 1); % this is used frequently
Daniel@0 74
Daniel@0 75 % training length
Daniel@0 76 train_len = epochs*noc;
Daniel@0 77
Daniel@0 78 % random sample order
Daniel@0 79 rand('state',sum(100*clock));
Daniel@0 80 sample_inds = ceil(noc*rand(train_len,1));
Daniel@0 81
Daniel@0 82 % mutual distances
Daniel@0 83 if nargin<4 | isempty(Mdist) | all(isnan(Mdist(:))),
Daniel@0 84 fprintf(2, 'computing mutual distances\r');
Daniel@0 85 dim_x_1 = ones(dim,1);
Daniel@0 86 for i = 1:noc,
Daniel@0 87 x = D(i,:);
Daniel@0 88 Diff = D - x(noc_x_1,:);
Daniel@0 89 N = isnan(Diff);
Daniel@0 90 Diff(find(N)) = 0;
Daniel@0 91 Mdist(:,i) = sqrt((Diff.^2)*dim_x_1);
Daniel@0 92 N = find(sum(N')==dim); %mutual distance unknown
Daniel@0 93 if ~isempty(N), Mdist(N,i) = NaN; end
Daniel@0 94 end
Daniel@0 95 else
Daniel@0 96 % if the distance matrix is output from PDIST function
Daniel@0 97 if size(Mdist,1)==1, Mdist = squareform(Mdist); end
Daniel@0 98 if size(Mdist,1)~=noc,
Daniel@0 99 error('Mutual distance matrix size and data set size do not match');
Daniel@0 100 end
Daniel@0 101 end
Daniel@0 102
Daniel@0 103 % alpha and lambda
Daniel@0 104 if nargin<5 | isempty(alpha0) | isnan(alpha0), alpha0 = 0.5; end
Daniel@0 105 alpha = potency_curve(alpha0,alpha0/100,train_len);
Daniel@0 106
Daniel@0 107 if nargin<6 | isempty(lambda0) | isnan(lambda0), lambda0 = max(st)*3; end
Daniel@0 108 lambda = potency_curve(lambda0,0.01,train_len);
Daniel@0 109
Daniel@0 110 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 111 %% Action
Daniel@0 112
Daniel@0 113 k=0; fprintf(2, 'iterating: %d / %d epochs\r',k,epochs);
Daniel@0 114
Daniel@0 115 for i=1:train_len,
Daniel@0 116
Daniel@0 117 ind = sample_inds(i); % sample index
Daniel@0 118 dx = Mdist(:,ind); % mutual distances in input space
Daniel@0 119 known = find(~isnan(dx)); % known distances
Daniel@0 120
Daniel@0 121 if ~isempty(known),
Daniel@0 122 % sample vector's projection
Daniel@0 123 y = P(ind,:);
Daniel@0 124
Daniel@0 125 % distances in output space
Daniel@0 126 Dy = P(known,:) - y(noc_x_1(known),:);
Daniel@0 127 dy = sqrt((Dy.^2)*odim_x_1);
Daniel@0 128
Daniel@0 129 % relative effect
Daniel@0 130 dy(find(dy==0)) = 1; % to get rid of div-by-zero's
Daniel@0 131 fy = exp(-dy/lambda(i)) .* (dx(known) ./ dy - 1);
Daniel@0 132
Daniel@0 133 % Note that the function F here is e^(-dy/lambda))
Daniel@0 134 % instead of the bubble function 1(lambda-dy) used in the
Daniel@0 135 % paper.
Daniel@0 136
Daniel@0 137 % Note that here a simplification has been made: the derivatives of the
Daniel@0 138 % F function have been ignored in calculating the gradient of error
Daniel@0 139 % function w.r.t. to changes in dy.
Daniel@0 140
Daniel@0 141 % update
Daniel@0 142 P(known,:) = P(known,:) + alpha(i)*fy(:,odim_x_1).*Dy;
Daniel@0 143 end
Daniel@0 144
Daniel@0 145 % track
Daniel@0 146 if rem(i,noc)==0,
Daniel@0 147 k=k+1; fprintf(2, 'iterating: %d / %d epochs\r',k,epochs);
Daniel@0 148 end
Daniel@0 149
Daniel@0 150 end
Daniel@0 151
Daniel@0 152 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 153 %% clear up
Daniel@0 154
Daniel@0 155 % calculate error
Daniel@0 156 error = cca_error(P,Mdist,lambda(train_len));
Daniel@0 157 fprintf(2,'%d iterations, error %f \n', epochs, error);
Daniel@0 158
Daniel@0 159 % set projections of totally unknown vectors as unknown
Daniel@0 160 unknown = find(sum(isnan(D)')==dim);
Daniel@0 161 P(unknown,:) = NaN;
Daniel@0 162
Daniel@0 163 return;
Daniel@0 164
Daniel@0 165
Daniel@0 166 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 167 %% tips
Daniel@0 168
Daniel@0 169 % to plot the results, use the code below
Daniel@0 170
Daniel@0 171 %subplot(2,1,1),
Daniel@0 172 %switch(odim),
Daniel@0 173 % case 1, plot(P(:,1),ones(dlen,1),'x')
Daniel@0 174 % case 2, plot(P(:,1),P(:,2),'x');
Daniel@0 175 % otherwise, plot3(P(:,1),P(:,2),P(:,3),'x'); rotate3d on
Daniel@0 176 %end
Daniel@0 177 %subplot(2,1,2), dydxplot(P,Mdist);
Daniel@0 178
Daniel@0 179 % to a project a new point x in the input space to the output space
Daniel@0 180 % do the following:
Daniel@0 181
Daniel@0 182 % Diff = D - x(noc_x_1,:); Diff(find(isnan(Diff))) = 0;
Daniel@0 183 % dx = sqrt((Diff.^2)*dim_x_1);
Daniel@0 184 % p = project_point(P,x,dx); % this function can be found from below
Daniel@0 185 % tlen = size(p,1);
Daniel@0 186 % plot(P(:,1),P(:,2),'bx',p(tlen,1),p(tlen,2),'ro',p(:,1),p(:,2),'r-')
Daniel@0 187
Daniel@0 188 % similar trick can be made to the other direction
Daniel@0 189
Daniel@0 190
Daniel@0 191 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 192 %% subfunctions
Daniel@0 193
Daniel@0 194 function vals = potency_curve(v0,vn,l)
Daniel@0 195
Daniel@0 196 % curve that decreases from v0 to vn with a rate that is
Daniel@0 197 % somewhere between linear and 1/t
Daniel@0 198 vals = v0 * (vn/v0).^([0:(l-1)]/(l-1));
Daniel@0 199
Daniel@0 200
Daniel@0 201 function error = cca_error(P,Mdist,lambda)
Daniel@0 202
Daniel@0 203 [noc odim] = size(P);
Daniel@0 204 noc_x_1 = ones(noc,1);
Daniel@0 205 odim_x_1 = ones(odim,1);
Daniel@0 206
Daniel@0 207 error = 0;
Daniel@0 208 for i=1:noc,
Daniel@0 209 known = find(~isnan(Mdist(:,i)));
Daniel@0 210 if ~isempty(known),
Daniel@0 211 y = P(i,:);
Daniel@0 212 Dy = P(known,:) - y(noc_x_1(known),:);
Daniel@0 213 dy = sqrt((Dy.^2)*odim_x_1);
Daniel@0 214 fy = exp(-dy/lambda);
Daniel@0 215 error = error + sum(((Mdist(known,i) - dy).^2).*fy);
Daniel@0 216 end
Daniel@0 217 end
Daniel@0 218 error = error/2;
Daniel@0 219
Daniel@0 220
Daniel@0 221 function [] = dydxplot(P,Mdist)
Daniel@0 222
Daniel@0 223 [noc odim] = size(P);
Daniel@0 224 noc_x_1 = ones(noc,1);
Daniel@0 225 odim_x_1 = ones(odim,1);
Daniel@0 226 Pdist = zeros(noc,noc);
Daniel@0 227
Daniel@0 228 for i=1:noc,
Daniel@0 229 y = P(i,:);
Daniel@0 230 Dy = P - y(noc_x_1,:);
Daniel@0 231 Pdist(:,i) = sqrt((Dy.^2)*odim_x_1);
Daniel@0 232 end
Daniel@0 233
Daniel@0 234 Pdist = tril(Pdist,-1);
Daniel@0 235 inds = find(Pdist > 0);
Daniel@0 236 n = length(inds);
Daniel@0 237 plot(Pdist(inds),Mdist(inds),'.');
Daniel@0 238 xlabel('dy'), ylabel('dx')
Daniel@0 239
Daniel@0 240
Daniel@0 241 function p = project_point(P,x,dx)
Daniel@0 242
Daniel@0 243 [noc odim] = size(P);
Daniel@0 244 noc_x_1 = ones(noc,1);
Daniel@0 245 odim_x_1 = ones(odim,1);
Daniel@0 246
Daniel@0 247 % initial projection
Daniel@0 248 [dummy,i] = min(dx);
Daniel@0 249 y = P(i,:)+rand(1,odim)*norm(P(i,:))/20;
Daniel@0 250
Daniel@0 251 % lambda
Daniel@0 252 lambda = norm(std(P));
Daniel@0 253
Daniel@0 254 % termination
Daniel@0 255 eps = 1e-3; i_max = noc*10;
Daniel@0 256
Daniel@0 257 i=1; p(i,:) = y;
Daniel@0 258 ready = 0;
Daniel@0 259 while ~ready,
Daniel@0 260
Daniel@0 261 % mutual distances
Daniel@0 262 Dy = P - y(noc_x_1,:); % differences in output space
Daniel@0 263 dy = sqrt((Dy.^2)*odim_x_1); % distances in output space
Daniel@0 264 f = exp(-dy/lambda);
Daniel@0 265
Daniel@0 266 fprintf(2,'iteration %d, error %g \r',i,sum(((dx - dy).^2).*f));
Daniel@0 267
Daniel@0 268 % all the other vectors push the projected one
Daniel@0 269 fy = f .* (dx ./ dy - 1) / sum(f);
Daniel@0 270
Daniel@0 271 % update
Daniel@0 272 step = - sum(fy(:,odim_x_1).*Dy);
Daniel@0 273 y = y + step;
Daniel@0 274
Daniel@0 275 i=i+1;
Daniel@0 276 p(i,:) = y;
Daniel@0 277 ready = (norm(step)/norm(y) < eps | i > i_max);
Daniel@0 278
Daniel@0 279 end
Daniel@0 280 fprintf(2,'\n');
Daniel@0 281
Daniel@0 282