annotate toolboxes/FullBNT-1.0.7/KPMstats/logistK.m @ 0:cc4b1211e677 tip

initial commit to HG from Changeset: 646 (e263d8a21543) added further path and more save "camirversion.m"
author Daniel Wolff
date Fri, 19 Aug 2016 13:07:06 +0200
parents
children
rev   line source
Daniel@0 1 function [beta,post,lli] = logistK(x,y,w,beta)
Daniel@0 2 % [beta,post,lli] = logistK(x,y,beta,w)
Daniel@0 3 %
Daniel@0 4 % k-class logistic regression with optional sample weights
Daniel@0 5 %
Daniel@0 6 % k = number of classes
Daniel@0 7 % n = number of samples
Daniel@0 8 % d = dimensionality of samples
Daniel@0 9 %
Daniel@0 10 % INPUT
Daniel@0 11 % x dxn matrix of n input column vectors
Daniel@0 12 % y kxn vector of class assignments
Daniel@0 13 % [w] 1xn vector of sample weights
Daniel@0 14 % [beta] dxk matrix of model coefficients
Daniel@0 15 %
Daniel@0 16 % OUTPUT
Daniel@0 17 % beta dxk matrix of fitted model coefficients
Daniel@0 18 % (beta(:,k) are fixed at 0)
Daniel@0 19 % post kxn matrix of fitted class posteriors
Daniel@0 20 % lli log likelihood
Daniel@0 21 %
Daniel@0 22 % Let p(i,j) = exp(beta(:,j)'*x(:,i)),
Daniel@0 23 % Class j posterior for observation i is:
Daniel@0 24 % post(j,i) = p(i,j) / (p(i,1) + ... p(i,k))
Daniel@0 25 %
Daniel@0 26 % See also logistK_eval.
Daniel@0 27 %
Daniel@0 28 % David Martin <dmartin@eecs.berkeley.edu>
Daniel@0 29 % May 3, 2002
Daniel@0 30
Daniel@0 31 % Copyright (C) 2002 David R. Martin <dmartin@eecs.berkeley.edu>
Daniel@0 32 %
Daniel@0 33 % This program is free software; you can redistribute it and/or
Daniel@0 34 % modify it under the terms of the GNU General Public License as
Daniel@0 35 % published by the Free Software Foundation; either version 2 of the
Daniel@0 36 % License, or (at your option) any later version.
Daniel@0 37 %
Daniel@0 38 % This program is distributed in the hope that it will be useful, but
Daniel@0 39 % WITHOUT ANY WARRANTY; without even the implied warranty of
Daniel@0 40 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Daniel@0 41 % General Public License for more details.
Daniel@0 42 %
Daniel@0 43 % You should have received a copy of the GNU General Public License
Daniel@0 44 % along with this program; if not, write to the Free Software
Daniel@0 45 % Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
Daniel@0 46 % 02111-1307, USA, or see http://www.gnu.org/copyleft/gpl.html.
Daniel@0 47
Daniel@0 48 % TODO - this code would be faster if x were transposed
Daniel@0 49
Daniel@0 50 error(nargchk(2,4,nargin));
Daniel@0 51
Daniel@0 52 debug = 0;
Daniel@0 53 if debug>0,
Daniel@0 54 h=figure(1);
Daniel@0 55 set(h,'DoubleBuffer','on');
Daniel@0 56 end
Daniel@0 57
Daniel@0 58 % get sizes
Daniel@0 59 [d,nx] = size(x);
Daniel@0 60 [k,ny] = size(y);
Daniel@0 61
Daniel@0 62 % check sizes
Daniel@0 63 if k < 2,
Daniel@0 64 error('Input y must encode at least 2 classes.');
Daniel@0 65 end
Daniel@0 66 if nx ~= ny,
Daniel@0 67 error('Inputs x,y not the same length.');
Daniel@0 68 end
Daniel@0 69
Daniel@0 70 n = nx;
Daniel@0 71
Daniel@0 72 % make sure class assignments have unit L1-norm
Daniel@0 73 sumy = sum(y,1);
Daniel@0 74 if abs(1-sumy) > eps,
Daniel@0 75 sumy = sum(y,1);
Daniel@0 76 for i = 1:k, y(i,:) = y(i,:) ./ sumy; end
Daniel@0 77 end
Daniel@0 78 clear sumy;
Daniel@0 79
Daniel@0 80 % if sample weights weren't specified, set them to 1
Daniel@0 81 if nargin < 3,
Daniel@0 82 w = ones(1,n);
Daniel@0 83 end
Daniel@0 84
Daniel@0 85 % normalize sample weights so max is 1
Daniel@0 86 w = w / max(w);
Daniel@0 87
Daniel@0 88 % if starting beta wasn't specified, initialize randomly
Daniel@0 89 if nargin < 4,
Daniel@0 90 beta = 1e-3*rand(d,k);
Daniel@0 91 beta(:,k) = 0; % fix beta for class k at zero
Daniel@0 92 else
Daniel@0 93 if sum(beta(:,k)) ~= 0,
Daniel@0 94 error('beta(:,k) ~= 0');
Daniel@0 95 end
Daniel@0 96 end
Daniel@0 97
Daniel@0 98 stepsize = 1;
Daniel@0 99 minstepsize = 1e-2;
Daniel@0 100
Daniel@0 101 post = computePost(beta,x);
Daniel@0 102 lli = computeLogLik(post,y,w);
Daniel@0 103
Daniel@0 104 for iter = 1:100,
Daniel@0 105 %disp(sprintf(' logist iter=%d lli=%g',iter,lli));
Daniel@0 106 vis(x,y,beta,lli,d,k,iter,debug);
Daniel@0 107
Daniel@0 108 % gradient and hessian
Daniel@0 109 [g,h] = derivs(post,x,y,w);
Daniel@0 110
Daniel@0 111 % make sure Hessian is well conditioned
Daniel@0 112 if rcond(h) < eps,
Daniel@0 113 % condition with Levenberg-Marquardt method
Daniel@0 114 for i = -16:16,
Daniel@0 115 h2 = h .* ((1 + 10^i)*eye(size(h)) + (1-eye(size(h))));
Daniel@0 116 if rcond(h2) > eps, break, end
Daniel@0 117 end
Daniel@0 118 if rcond(h2) < eps,
Daniel@0 119 warning(['Stopped at iteration ' num2str(iter) ...
Daniel@0 120 ' because Hessian can''t be conditioned']);
Daniel@0 121 break
Daniel@0 122 end
Daniel@0 123 h = h2;
Daniel@0 124 end
Daniel@0 125
Daniel@0 126 % save lli before update
Daniel@0 127 lli_prev = lli;
Daniel@0 128
Daniel@0 129 % Newton-Raphson with step-size halving
Daniel@0 130 while stepsize >= minstepsize,
Daniel@0 131 % Newton-Raphson update step
Daniel@0 132 step = stepsize * (h \ g);
Daniel@0 133 beta2 = beta;
Daniel@0 134 beta2(:,1:k-1) = beta2(:,1:k-1) - reshape(step,d,k-1);
Daniel@0 135
Daniel@0 136 % get the new log likelihood
Daniel@0 137 post2 = computePost(beta2,x);
Daniel@0 138 lli2 = computeLogLik(post2,y,w);
Daniel@0 139
Daniel@0 140 % if the log likelihood increased, then stop
Daniel@0 141 if lli2 > lli,
Daniel@0 142 post = post2; lli = lli2; beta = beta2;
Daniel@0 143 break
Daniel@0 144 end
Daniel@0 145
Daniel@0 146 % otherwise, reduce step size by half
Daniel@0 147 stepsize = 0.5 * stepsize;
Daniel@0 148 end
Daniel@0 149
Daniel@0 150 % stop if the average log likelihood has gotten small enough
Daniel@0 151 if 1-exp(lli/n) < 1e-2, break, end
Daniel@0 152
Daniel@0 153 % stop if the log likelihood changed by a small enough fraction
Daniel@0 154 dlli = (lli_prev-lli) / lli;
Daniel@0 155 if abs(dlli) < 1e-3, break, end
Daniel@0 156
Daniel@0 157 % stop if the step size has gotten too small
Daniel@0 158 if stepsize < minstepsize, brea, end
Daniel@0 159
Daniel@0 160 % stop if the log likelihood has decreased; this shouldn't happen
Daniel@0 161 if lli < lli_prev,
Daniel@0 162 warning(['Stopped at iteration ' num2str(iter) ...
Daniel@0 163 ' because the log likelihood decreased from ' ...
Daniel@0 164 num2str(lli_prev) ' to ' num2str(lli) '.' ...
Daniel@0 165 ' This may be a bug.']);
Daniel@0 166 break
Daniel@0 167 end
Daniel@0 168 end
Daniel@0 169
Daniel@0 170 if debug>0,
Daniel@0 171 vis(x,y,beta,lli,d,k,iter,2);
Daniel@0 172 end
Daniel@0 173
Daniel@0 174 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 175 %% class posteriors
Daniel@0 176 function post = computePost(beta,x)
Daniel@0 177 [d,n] = size(x);
Daniel@0 178 [d,k] = size(beta);
Daniel@0 179 post = zeros(k,n);
Daniel@0 180 bx = zeros(k,n);
Daniel@0 181 for j = 1:k,
Daniel@0 182 bx(j,:) = beta(:,j)'*x;
Daniel@0 183 end
Daniel@0 184 for j = 1:k,
Daniel@0 185 post(j,:) = 1 ./ sum(exp(bx - repmat(bx(j,:),k,1)),1);
Daniel@0 186 end
Daniel@0 187
Daniel@0 188 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 189 %% log likelihood
Daniel@0 190 function lli = computeLogLik(post,y,w)
Daniel@0 191 [k,n] = size(post);
Daniel@0 192 lli = 0;
Daniel@0 193 for j = 1:k,
Daniel@0 194 lli = lli + sum(w.*y(j,:).*log(post(j,:)+eps));
Daniel@0 195 end
Daniel@0 196 if isnan(lli),
Daniel@0 197 error('lli is nan');
Daniel@0 198 end
Daniel@0 199
Daniel@0 200 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 201 %% gradient and hessian
Daniel@0 202 %% These are computed in what seems a verbose manner, but it is
Daniel@0 203 %% done this way to use minimal memory. x should be transposed
Daniel@0 204 %% to make it faster.
Daniel@0 205 function [g,h] = derivs(post,x,y,w)
Daniel@0 206
Daniel@0 207 [k,n] = size(post);
Daniel@0 208 [d,n] = size(x);
Daniel@0 209
Daniel@0 210 % first derivative of likelihood w.r.t. beta
Daniel@0 211 g = zeros(d,k-1);
Daniel@0 212 for j = 1:k-1,
Daniel@0 213 wyp = w .* (y(j,:) - post(j,:));
Daniel@0 214 for ii = 1:d,
Daniel@0 215 g(ii,j) = x(ii,:) * wyp';
Daniel@0 216 end
Daniel@0 217 end
Daniel@0 218 g = reshape(g,d*(k-1),1);
Daniel@0 219
Daniel@0 220 % hessian of likelihood w.r.t. beta
Daniel@0 221 h = zeros(d*(k-1),d*(k-1));
Daniel@0 222 for i = 1:k-1, % diagonal
Daniel@0 223 wt = w .* post(i,:) .* (1 - post(i,:));
Daniel@0 224 hii = zeros(d,d);
Daniel@0 225 for a = 1:d,
Daniel@0 226 wxa = wt .* x(a,:);
Daniel@0 227 for b = a:d,
Daniel@0 228 hii_ab = wxa * x(b,:)';
Daniel@0 229 hii(a,b) = hii_ab;
Daniel@0 230 hii(b,a) = hii_ab;
Daniel@0 231 end
Daniel@0 232 end
Daniel@0 233 h( (i-1)*d+1 : i*d , (i-1)*d+1 : i*d ) = -hii;
Daniel@0 234 end
Daniel@0 235 for i = 1:k-1, % off-diagonal
Daniel@0 236 for j = i+1:k-1,
Daniel@0 237 wt = w .* post(j,:) .* post(i,:);
Daniel@0 238 hij = zeros(d,d);
Daniel@0 239 for a = 1:d,
Daniel@0 240 wxa = wt .* x(a,:);
Daniel@0 241 for b = a:d,
Daniel@0 242 hij_ab = wxa * x(b,:)';
Daniel@0 243 hij(a,b) = hij_ab;
Daniel@0 244 hij(b,a) = hij_ab;
Daniel@0 245 end
Daniel@0 246 end
Daniel@0 247 h( (i-1)*d+1 : i*d , (j-1)*d+1 : j*d ) = hij;
Daniel@0 248 h( (j-1)*d+1 : j*d , (i-1)*d+1 : i*d ) = hij;
Daniel@0 249 end
Daniel@0 250 end
Daniel@0 251
Daniel@0 252 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Daniel@0 253 %% debug/visualization
Daniel@0 254 function vis (x,y,beta,lli,d,k,iter,debug)
Daniel@0 255
Daniel@0 256 if debug<=0, return, end
Daniel@0 257
Daniel@0 258 disp(['iter=' num2str(iter) ' lli=' num2str(lli)]);
Daniel@0 259 if debug<=1, return, end
Daniel@0 260
Daniel@0 261 if d~=3 | k>10, return, end
Daniel@0 262
Daniel@0 263 figure(1);
Daniel@0 264 res = 100;
Daniel@0 265 r = abs(max(max(x)));
Daniel@0 266 dom = linspace(-r,r,res);
Daniel@0 267 [px,py] = meshgrid(dom,dom);
Daniel@0 268 xx = px(:); yy = py(:);
Daniel@0 269 points = [xx' ; yy' ; ones(1,res*res)];
Daniel@0 270 func = zeros(k,res*res);
Daniel@0 271 for j = 1:k,
Daniel@0 272 func(j,:) = exp(beta(:,j)'*points);
Daniel@0 273 end
Daniel@0 274 [mval,ind] = max(func,[],1);
Daniel@0 275 hold off;
Daniel@0 276 im = reshape(ind,res,res);
Daniel@0 277 imagesc(xx,yy,im);
Daniel@0 278 hold on;
Daniel@0 279 syms = {'w.' 'wx' 'w+' 'wo' 'w*' 'ws' 'wd' 'wv' 'w^' 'w<'};
Daniel@0 280 for j = 1:k,
Daniel@0 281 [mval,ind] = max(y,[],1);
Daniel@0 282 ind = find(ind==j);
Daniel@0 283 plot(x(1,ind),x(2,ind),syms{j});
Daniel@0 284 end
Daniel@0 285 pause(0.1);
Daniel@0 286
Daniel@0 287 % eof