wolffd@0
|
1 function [beta,p,lli] = logist2(y,x,w)
|
wolffd@0
|
2 % [beta,p,lli] = logist2(y,x)
|
wolffd@0
|
3 %
|
wolffd@0
|
4 % 2-class logistic regression.
|
wolffd@0
|
5 %
|
wolffd@0
|
6 % INPUT
|
wolffd@0
|
7 % y Nx1 colum vector of 0|1 class assignments
|
wolffd@0
|
8 % x NxK matrix of input vectors as rows
|
wolffd@0
|
9 % [w] Nx1 vector of sample weights
|
wolffd@0
|
10 %
|
wolffd@0
|
11 % OUTPUT
|
wolffd@0
|
12 % beta Kx1 column vector of model coefficients
|
wolffd@0
|
13 % p Nx1 column vector of fitted class 1 posteriors
|
wolffd@0
|
14 % lli log likelihood
|
wolffd@0
|
15 %
|
wolffd@0
|
16 % Class 1 posterior is 1 / (1 + exp(-x*beta))
|
wolffd@0
|
17 %
|
wolffd@0
|
18 % David Martin <dmartin@eecs.berkeley.edu>
|
wolffd@0
|
19 % April 16, 2002
|
wolffd@0
|
20
|
wolffd@0
|
21 % Copyright (C) 2002 David R. Martin <dmartin@eecs.berkeley.edu>
|
wolffd@0
|
22 %
|
wolffd@0
|
23 % This program is free software; you can redistribute it and/or
|
wolffd@0
|
24 % modify it under the terms of the GNU General Public License as
|
wolffd@0
|
25 % published by the Free Software Foundation; either version 2 of the
|
wolffd@0
|
26 % License, or (at your option) any later version.
|
wolffd@0
|
27 %
|
wolffd@0
|
28
|
wolffd@0
|
29 % This program is distributed in the hope that it will be useful, but
|
wolffd@0
|
30 % WITHOUT ANY WARRANTY; without even the implied warranty of
|
wolffd@0
|
31 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
wolffd@0
|
32 % General Public License for more details.
|
wolffd@0
|
33 %
|
wolffd@0
|
34 % You should have received a copy of the GNU General Public License
|
wolffd@0
|
35 % along with this program; if not, write to the Free Software
|
wolffd@0
|
36 % Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
wolffd@0
|
37 % 02111-1307, USA, or see http://www.gnu.org/copyleft/gpl.html.
|
wolffd@0
|
38
|
wolffd@0
|
39 error(nargchk(2,3,nargin));
|
wolffd@0
|
40
|
wolffd@0
|
41 % check inputs
|
wolffd@0
|
42 if size(y,2) ~= 1,
|
wolffd@0
|
43 error('Input y not a column vector.');
|
wolffd@0
|
44 end
|
wolffd@0
|
45 if size(y,1) ~= size(x,1),
|
wolffd@0
|
46 error('Input x,y sizes mismatched.');
|
wolffd@0
|
47 end
|
wolffd@0
|
48
|
wolffd@0
|
49 % get sizes
|
wolffd@0
|
50 [N,k] = size(x);
|
wolffd@0
|
51
|
wolffd@0
|
52 % if sample weights weren't specified, set them to 1
|
wolffd@0
|
53 if nargin < 3,
|
wolffd@0
|
54 w = 1;
|
wolffd@0
|
55 end
|
wolffd@0
|
56
|
wolffd@0
|
57 % normalize sample weights so max is 1
|
wolffd@0
|
58 w = w / max(w);
|
wolffd@0
|
59
|
wolffd@0
|
60 % initial guess for beta: all zeros
|
wolffd@0
|
61 beta = zeros(k,1);
|
wolffd@0
|
62
|
wolffd@0
|
63 % Newton-Raphson via IRLS,
|
wolffd@0
|
64 % taken from Hastie/Tibshirani/Friedman Section 4.4.
|
wolffd@0
|
65 iter = 0;
|
wolffd@0
|
66 lli = 0;
|
wolffd@0
|
67 while 1==1,
|
wolffd@0
|
68 iter = iter + 1;
|
wolffd@0
|
69
|
wolffd@0
|
70 % fitted probabilities
|
wolffd@0
|
71 p = 1 ./ (1 + exp(-x*beta));
|
wolffd@0
|
72
|
wolffd@0
|
73 % log likelihood
|
wolffd@0
|
74 lli_prev = lli;
|
wolffd@0
|
75 lli = sum( w .* (y.*log(p+eps) + (1-y).*log(1-p+eps)) );
|
wolffd@0
|
76
|
wolffd@0
|
77 % least-squares weights
|
wolffd@0
|
78 wt = w .* p .* (1-p);
|
wolffd@0
|
79
|
wolffd@0
|
80 % derivatives of likelihood w.r.t. beta
|
wolffd@0
|
81 deriv = x'*(w.*(y-p));
|
wolffd@0
|
82
|
wolffd@0
|
83 % Hessian of likelihood w.r.t. beta
|
wolffd@0
|
84 % hessian = x'Wx, where W=diag(w)
|
wolffd@0
|
85 % Do it this way to be memory efficient and fast.
|
wolffd@0
|
86 hess = zeros(k,k);
|
wolffd@0
|
87 for i = 1:k,
|
wolffd@0
|
88 wxi = wt .* x(:,i);
|
wolffd@0
|
89 for j = i:k,
|
wolffd@0
|
90 hij = wxi' * x(:,j);
|
wolffd@0
|
91 hess(i,j) = -hij;
|
wolffd@0
|
92 hess(j,i) = -hij;
|
wolffd@0
|
93 end
|
wolffd@0
|
94 end
|
wolffd@0
|
95
|
wolffd@0
|
96 % make sure Hessian is well conditioned
|
wolffd@0
|
97 if (rcond(hess) < eps),
|
wolffd@0
|
98 error(['Stopped at iteration ' num2str(iter) ...
|
wolffd@0
|
99 ' because Hessian is poorly conditioned.']);
|
wolffd@0
|
100 break;
|
wolffd@0
|
101 end;
|
wolffd@0
|
102
|
wolffd@0
|
103 % Newton-Raphson update step
|
wolffd@0
|
104 step = hess\deriv;
|
wolffd@0
|
105 beta = beta - step;
|
wolffd@0
|
106
|
wolffd@0
|
107 % termination criterion based on derivatives
|
wolffd@0
|
108 tol = 1e-6;
|
wolffd@0
|
109 if abs(deriv'*step/k) < tol, break; end;
|
wolffd@0
|
110
|
wolffd@0
|
111 % termination criterion based on log likelihood
|
wolffd@0
|
112 % tol = 1e-4;
|
wolffd@0
|
113 % if abs((lli-lli_prev)/(lli+lli_prev)) < 0.5*tol, break; end;
|
wolffd@0
|
114 end;
|
wolffd@0
|
115
|