Mercurial > hg > camir-aes2014
diff toolboxes/FullBNT-1.0.7/KPMstats/logist2.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolboxes/FullBNT-1.0.7/KPMstats/logist2.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,115 @@ +function [beta,p,lli] = logist2(y,x,w) +% [beta,p,lli] = logist2(y,x) +% +% 2-class logistic regression. +% +% INPUT +% y Nx1 colum vector of 0|1 class assignments +% x NxK matrix of input vectors as rows +% [w] Nx1 vector of sample weights +% +% OUTPUT +% beta Kx1 column vector of model coefficients +% p Nx1 column vector of fitted class 1 posteriors +% lli log likelihood +% +% Class 1 posterior is 1 / (1 + exp(-x*beta)) +% +% David Martin <dmartin@eecs.berkeley.edu> +% April 16, 2002 + +% Copyright (C) 2002 David R. Martin <dmartin@eecs.berkeley.edu> +% +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License as +% published by the Free Software Foundation; either version 2 of the +% License, or (at your option) any later version. +% + +% This program is distributed in the hope that it will be useful, but +% WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +% General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +% 02111-1307, USA, or see http://www.gnu.org/copyleft/gpl.html. + +error(nargchk(2,3,nargin)); + +% check inputs +if size(y,2) ~= 1, + error('Input y not a column vector.'); +end +if size(y,1) ~= size(x,1), + error('Input x,y sizes mismatched.'); +end + +% get sizes +[N,k] = size(x); + +% if sample weights weren't specified, set them to 1 +if nargin < 3, + w = 1; +end + +% normalize sample weights so max is 1 +w = w / max(w); + +% initial guess for beta: all zeros +beta = zeros(k,1); + +% Newton-Raphson via IRLS, +% taken from Hastie/Tibshirani/Friedman Section 4.4. +iter = 0; +lli = 0; +while 1==1, + iter = iter + 1; + + % fitted probabilities + p = 1 ./ (1 + exp(-x*beta)); + + % log likelihood + lli_prev = lli; + lli = sum( w .* (y.*log(p+eps) + (1-y).*log(1-p+eps)) ); + + % least-squares weights + wt = w .* p .* (1-p); + + % derivatives of likelihood w.r.t. beta + deriv = x'*(w.*(y-p)); + + % Hessian of likelihood w.r.t. beta + % hessian = x'Wx, where W=diag(w) + % Do it this way to be memory efficient and fast. + hess = zeros(k,k); + for i = 1:k, + wxi = wt .* x(:,i); + for j = i:k, + hij = wxi' * x(:,j); + hess(i,j) = -hij; + hess(j,i) = -hij; + end + end + + % make sure Hessian is well conditioned + if (rcond(hess) < eps), + error(['Stopped at iteration ' num2str(iter) ... + ' because Hessian is poorly conditioned.']); + break; + end; + + % Newton-Raphson update step + step = hess\deriv; + beta = beta - step; + + % termination criterion based on derivatives + tol = 1e-6; + if abs(deriv'*step/k) < tol, break; end; + + % termination criterion based on log likelihood +% tol = 1e-4; +% if abs((lli-lli_prev)/(lli+lli_prev)) < 0.5*tol, break; end; +end; +