comparison toolboxes/FullBNT-1.0.7/KPMstats/logist2.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 function [beta,p,lli] = logist2(y,x,w)
2 % [beta,p,lli] = logist2(y,x)
3 %
4 % 2-class logistic regression.
5 %
6 % INPUT
7 % y Nx1 colum vector of 0|1 class assignments
8 % x NxK matrix of input vectors as rows
9 % [w] Nx1 vector of sample weights
10 %
11 % OUTPUT
12 % beta Kx1 column vector of model coefficients
13 % p Nx1 column vector of fitted class 1 posteriors
14 % lli log likelihood
15 %
16 % Class 1 posterior is 1 / (1 + exp(-x*beta))
17 %
18 % David Martin <dmartin@eecs.berkeley.edu>
19 % April 16, 2002
20
21 % Copyright (C) 2002 David R. Martin <dmartin@eecs.berkeley.edu>
22 %
23 % This program is free software; you can redistribute it and/or
24 % modify it under the terms of the GNU General Public License as
25 % published by the Free Software Foundation; either version 2 of the
26 % License, or (at your option) any later version.
27 %
28
29 % This program is distributed in the hope that it will be useful, but
30 % WITHOUT ANY WARRANTY; without even the implied warranty of
31 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
32 % General Public License for more details.
33 %
34 % You should have received a copy of the GNU General Public License
35 % along with this program; if not, write to the Free Software
36 % Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
37 % 02111-1307, USA, or see http://www.gnu.org/copyleft/gpl.html.
38
39 error(nargchk(2,3,nargin));
40
41 % check inputs
42 if size(y,2) ~= 1,
43 error('Input y not a column vector.');
44 end
45 if size(y,1) ~= size(x,1),
46 error('Input x,y sizes mismatched.');
47 end
48
49 % get sizes
50 [N,k] = size(x);
51
52 % if sample weights weren't specified, set them to 1
53 if nargin < 3,
54 w = 1;
55 end
56
57 % normalize sample weights so max is 1
58 w = w / max(w);
59
60 % initial guess for beta: all zeros
61 beta = zeros(k,1);
62
63 % Newton-Raphson via IRLS,
64 % taken from Hastie/Tibshirani/Friedman Section 4.4.
65 iter = 0;
66 lli = 0;
67 while 1==1,
68 iter = iter + 1;
69
70 % fitted probabilities
71 p = 1 ./ (1 + exp(-x*beta));
72
73 % log likelihood
74 lli_prev = lli;
75 lli = sum( w .* (y.*log(p+eps) + (1-y).*log(1-p+eps)) );
76
77 % least-squares weights
78 wt = w .* p .* (1-p);
79
80 % derivatives of likelihood w.r.t. beta
81 deriv = x'*(w.*(y-p));
82
83 % Hessian of likelihood w.r.t. beta
84 % hessian = x'Wx, where W=diag(w)
85 % Do it this way to be memory efficient and fast.
86 hess = zeros(k,k);
87 for i = 1:k,
88 wxi = wt .* x(:,i);
89 for j = i:k,
90 hij = wxi' * x(:,j);
91 hess(i,j) = -hij;
92 hess(j,i) = -hij;
93 end
94 end
95
96 % make sure Hessian is well conditioned
97 if (rcond(hess) < eps),
98 error(['Stopped at iteration ' num2str(iter) ...
99 ' because Hessian is poorly conditioned.']);
100 break;
101 end;
102
103 % Newton-Raphson update step
104 step = hess\deriv;
105 beta = beta - step;
106
107 % termination criterion based on derivatives
108 tol = 1e-6;
109 if abs(deriv'*step/k) < tol, break; end;
110
111 % termination criterion based on log likelihood
112 % tol = 1e-4;
113 % if abs((lli-lli_prev)/(lli+lli_prev)) < 0.5*tol, break; end;
114 end;
115