comparison pyCSalgos/BP/l1eq_pd.py @ 62:e684f76c1969

Added l1eq_pd(), l1 minimizer with equality constraints from l1-magic
author Nic Cleju <nikcleju@gmail.com>
date Mon, 12 Mar 2012 11:29:32 +0200
parents
children 2fc28e2ae0a2
comparison
equal deleted inserted replaced
61:15374d30fb87 62:e684f76c1969
1
2 import numpy
3 import scipy.linalg
4 import math
5
6 class l1ecNotImplementedError(Exception):
7 pass
8
9 #function xp = l1eq_pd(x0, A, At, b, pdtol, pdmaxiter, cgtol, cgmaxiter)
10 def l1eq_pd(x0, A, At, b, pdtol=1e-3, pdmaxiter=50, cgtol=1e-8, cgmaxiter=200, verbose=False):
11
12 # Solve
13 # min_x ||x||_1 s.t. Ax = b
14 #
15 # Recast as linear program
16 # min_{x,u} sum(u) s.t. -u <= x <= u, Ax=b
17 # and use primal-dual interior point method
18 #
19 # Usage: xp = l1eq_pd(x0, A, At, b, pdtol, pdmaxiter, cgtol, cgmaxiter)
20 #
21 # x0 - Nx1 vector, initial point.
22 #
23 # A - Either a handle to a function that takes a N vector and returns a K
24 # vector , or a KxN matrix. If A is a function handle, the algorithm
25 # operates in "largescale" mode, solving the Newton systems via the
26 # Conjugate Gradients algorithm.
27 #
28 # At - Handle to a function that takes a K vector and returns an N vector.
29 # If A is a KxN matrix, At is ignored.
30 #
31 # b - Kx1 vector of observations.
32 #
33 # pdtol - Tolerance for primal-dual algorithm (algorithm terminates if
34 # the duality gap is less than pdtol).
35 # Default = 1e-3.
36 #
37 # pdmaxiter - Maximum number of primal-dual iterations.
38 # Default = 50.
39 #
40 # cgtol - Tolerance for Conjugate Gradients; ignored if A is a matrix.
41 # Default = 1e-8.
42 #
43 # cgmaxiter - Maximum number of iterations for Conjugate Gradients; ignored
44 # if A is a matrix.
45 # Default = 200.
46 #
47 # Written by: Justin Romberg, Caltech
48 # Email: jrom@acm.caltech.edu
49 # Created: October 2005
50
51
52 #---------------------
53 # Original Matab code:
54
55 #largescale = isa(A,'function_handle');
56 #
57 #if (nargin < 5), pdtol = 1e-3; end
58 #if (nargin < 6), pdmaxiter = 50; end
59 #if (nargin < 7), cgtol = 1e-8; end
60 #if (nargin < 8), cgmaxiter = 200; end
61 #
62 #N = length(x0);
63 #
64 #alpha = 0.01;
65 #beta = 0.5;
66 #mu = 10;
67 #
68 #gradf0 = [zeros(N,1); ones(N,1)];
69 #
70 ## starting point --- make sure that it is feasible
71 #if (largescale)
72 # if (norm(A(x0)-b)/norm(b) > cgtol)
73 # disp('Starting point infeasible; using x0 = At*inv(AAt)*y.');
74 # AAt = @(z) A(At(z));
75 # [w, cgres, cgiter] = cgsolve(AAt, b, cgtol, cgmaxiter, 0);
76 # if (cgres > 1/2)
77 # disp('A*At is ill-conditioned: cannot find starting point');
78 # xp = x0;
79 # return;
80 # end
81 # x0 = At(w);
82 # end
83 #else
84 # if (norm(A*x0-b)/norm(b) > cgtol)
85 # disp('Starting point infeasible; using x0 = At*inv(AAt)*y.');
86 # opts.POSDEF = true; opts.SYM = true;
87 # [w, hcond] = linsolve(A*A', b, opts);
88 # if (hcond < 1e-14)
89 # disp('A*At is ill-conditioned: cannot find starting point');
90 # xp = x0;
91 # return;
92 # end
93 # x0 = A'*w;
94 # end
95 #end
96 #x = x0;
97 #u = (0.95)*abs(x0) + (0.10)*max(abs(x0));
98 #
99 ## set up for the first iteration
100 #fu1 = x - u;
101 #fu2 = -x - u;
102 #lamu1 = -1./fu1;
103 #lamu2 = -1./fu2;
104 #if (largescale)
105 # v = -A(lamu1-lamu2);
106 # Atv = At(v);
107 # rpri = A(x) - b;
108 #else
109 # v = -A*(lamu1-lamu2);
110 # Atv = A'*v;
111 # rpri = A*x - b;
112 #end
113 #
114 #sdg = -(fu1'*lamu1 + fu2'*lamu2);
115 #tau = mu*2*N/sdg;
116 #
117 #rcent = [-lamu1.*fu1; -lamu2.*fu2] - (1/tau);
118 #rdual = gradf0 + [lamu1-lamu2; -lamu1-lamu2] + [Atv; zeros(N,1)];
119 #resnorm = norm([rdual; rcent; rpri]);
120 #
121 #pditer = 0;
122 #done = (sdg < pdtol) | (pditer >= pdmaxiter);
123 #while (~done)
124 #
125 # pditer = pditer + 1;
126 #
127 # w1 = -1/tau*(-1./fu1 + 1./fu2) - Atv;
128 # w2 = -1 - 1/tau*(1./fu1 + 1./fu2);
129 # w3 = -rpri;
130 #
131 # sig1 = -lamu1./fu1 - lamu2./fu2;
132 # sig2 = lamu1./fu1 - lamu2./fu2;
133 # sigx = sig1 - sig2.^2./sig1;
134 #
135 # if (largescale)
136 # w1p = w3 - A(w1./sigx - w2.*sig2./(sigx.*sig1));
137 # h11pfun = @(z) -A(1./sigx.*At(z));
138 # [dv, cgres, cgiter] = cgsolve(h11pfun, w1p, cgtol, cgmaxiter, 0);
139 # if (cgres > 1/2)
140 # disp('Cannot solve system. Returning previous iterate. (See Section 4 of notes for more information.)');
141 # xp = x;
142 # return
143 # end
144 # dx = (w1 - w2.*sig2./sig1 - At(dv))./sigx;
145 # Adx = A(dx);
146 # Atdv = At(dv);
147 # else
148 # w1p = -(w3 - A*(w1./sigx - w2.*sig2./(sigx.*sig1)));
149 # H11p = A*(sparse(diag(1./sigx))*A');
150 # opts.POSDEF = true; opts.SYM = true;
151 # [dv,hcond] = linsolve(H11p, w1p, opts);
152 # if (hcond < 1e-14)
153 # disp('Matrix ill-conditioned. Returning previous iterate. (See Section 4 of notes for more information.)');
154 # xp = x;
155 # return
156 # end
157 # dx = (w1 - w2.*sig2./sig1 - A'*dv)./sigx;
158 # Adx = A*dx;
159 # Atdv = A'*dv;
160 # end
161 #
162 # du = (w2 - sig2.*dx)./sig1;
163 #
164 # dlamu1 = (lamu1./fu1).*(-dx+du) - lamu1 - (1/tau)*1./fu1;
165 # dlamu2 = (lamu2./fu2).*(dx+du) - lamu2 - 1/tau*1./fu2;
166 #
167 # # make sure that the step is feasible: keeps lamu1,lamu2 > 0, fu1,fu2 < 0
168 # indp = find(dlamu1 < 0); indn = find(dlamu2 < 0);
169 # s = min([1; -lamu1(indp)./dlamu1(indp); -lamu2(indn)./dlamu2(indn)]);
170 # indp = find((dx-du) > 0); indn = find((-dx-du) > 0);
171 # s = (0.99)*min([s; -fu1(indp)./(dx(indp)-du(indp)); -fu2(indn)./(-dx(indn)-du(indn))]);
172 #
173 # # backtracking line search
174 # suffdec = 0;
175 # backiter = 0;
176 # while (~suffdec)
177 # xp = x + s*dx; up = u + s*du;
178 # vp = v + s*dv; Atvp = Atv + s*Atdv;
179 # lamu1p = lamu1 + s*dlamu1; lamu2p = lamu2 + s*dlamu2;
180 # fu1p = xp - up; fu2p = -xp - up;
181 # rdp = gradf0 + [lamu1p-lamu2p; -lamu1p-lamu2p] + [Atvp; zeros(N,1)];
182 # rcp = [-lamu1p.*fu1p; -lamu2p.*fu2p] - (1/tau);
183 # rpp = rpri + s*Adx;
184 # suffdec = (norm([rdp; rcp; rpp]) <= (1-alpha*s)*resnorm);
185 # s = beta*s;
186 # backiter = backiter + 1;
187 # if (backiter > 32)
188 # disp('Stuck backtracking, returning last iterate. (See Section 4 of notes for more information.)')
189 # xp = x;
190 # return
191 # end
192 # end
193 #
194 #
195 # # next iteration
196 # x = xp; u = up;
197 # v = vp; Atv = Atvp;
198 # lamu1 = lamu1p; lamu2 = lamu2p;
199 # fu1 = fu1p; fu2 = fu2p;
200 #
201 # # surrogate duality gap
202 # sdg = -(fu1'*lamu1 + fu2'*lamu2);
203 # tau = mu*2*N/sdg;
204 # rpri = rpp;
205 # rcent = [-lamu1.*fu1; -lamu2.*fu2] - (1/tau);
206 # rdual = gradf0 + [lamu1-lamu2; -lamu1-lamu2] + [Atv; zeros(N,1)];
207 # resnorm = norm([rdual; rcent; rpri]);
208 #
209 # done = (sdg < pdtol) | (pditer >= pdmaxiter);
210 #
211 # disp(sprintf('Iteration = #d, tau = #8.3e, Primal = #8.3e, PDGap = #8.3e, Dual res = #8.3e, Primal res = #8.3e',...
212 # pditer, tau, sum(u), sdg, norm(rdual), norm(rpri)));
213 # if (largescale)
214 # disp(sprintf(' CG Res = #8.3e, CG Iter = #d', cgres, cgiter));
215 # else
216 # disp(sprintf(' H11p condition number = #8.3e', hcond));
217 # end
218 #
219 #end
220
221 # End of original Matab code
222 #----------------------------
223
224 #largescale = isa(A,'function_handle');
225 if hasattr(A, '__call__'):
226 largescale = True
227 else:
228 largescale = False
229
230 #N = length(x0);
231 N = x0.size
232
233 alpha = 0.01
234 beta = 0.5
235 mu = 10
236
237 #gradf0 = [zeros(N,1); ones(N,1)];
238 gradf0 = numpy.hstack((numpy.zeros(N), numpy.ones(N)))
239
240 # starting point --- make sure that it is feasible
241 #if (largescale)
242 if largescale:
243 raise l1qcNotImplementedError('Largescale not implemented yet!')
244 else:
245 #if (norm(A*x0-b)/norm(b) > cgtol)
246 if numpy.linalg.norm(numpy.dot(A,x0)-b) / numpy.linalg.norm(b) > cgtol:
247 #disp('Starting point infeasible; using x0 = At*inv(AAt)*y.');
248 if verbose:
249 print 'Starting point infeasible; using x0 = At*inv(AAt)*y.'
250 #opts.POSDEF = true; opts.SYM = true;
251 #[w, hcond] = linsolve(A*A', b, opts);
252 #if (hcond < 1e-14)
253 # disp('A*At is ill-conditioned: cannot find starting point');
254 # xp = x0;
255 # return;
256 #end
257 #x0 = A'*w;
258 try:
259 w = scipy.linalg.solve(numpy.dot(A,A.T), b, sym_pos=True)
260 hcond = 1.0/numpy.linalg.cond(np.dot(A,A.T))
261 except scipy.linalg.LinAlgError:
262 if verbose:
263 print 'A*At is ill-conditioned: cannot find starting point'
264 xp = x0.copy()
265 return xp
266 if hcond < 1e-14:
267 if verbose:
268 print 'A*At is ill-conditioned: cannot find starting point'
269 xp = x0.copy()
270 return xp
271 x0 = numpy.dot(A.T, w)
272 #end
273 #end
274 x = x0.copy()
275 #u = (0.95)*abs(x0) + (0.10)*max(abs(x0));
276 u = (0.95)*numpy.abs(x0) + (0.10)*numpy.abs(x0).max()
277
278 # set up for the first iteration
279 fu1 = x - u
280 fu2 = -x - u
281 lamu1 = -1/fu1
282 lamu2 = -1/fu2
283 if (largescale):
284 #v = -A(lamu1-lamu2);
285 #Atv = At(v);
286 #rpri = A(x) - b;
287 raise l1qcNotImplementedError('Largescale not implemented yet!')
288 else:
289 #v = -A*(lamu1-lamu2);
290 #Atv = A'*v;
291 #rpri = A*x - b;
292 v = numpy.dot(-A, lamu1-lamu2)
293 Atv = numpy.dot(A.T, v)
294 rpri = numpy.dot(A,x) - b
295 #end
296
297 #sdg = -(fu1'*lamu1 + fu2'*lamu2);
298 sdg = -(numpy.dot(fu1,lamu1) + numpy.dot(fu2,lamu2))
299 tau = mu*2*N/sdg
300
301 #rcent = [-lamu1.*fu1; -lamu2.*fu2] - (1/tau);
302 rcent = numpy.hstack((-numpy.dot(lamu1,fu1), -numpy.dot(lamu2,fu2))) - (1/tau)
303 #rdual = gradf0 + [lamu1-lamu2; -lamu1-lamu2] + [Atv; zeros(N,1)];
304 rdual = gradf0 + numpy.hstack((lamu1-lamu2, -lamu1-lamu2)) + numpy.hstack((Atv, numpy.zeros(N)))
305 #resnorm = norm([rdual; rcent; rpri]);
306 resnorm = numpy.linalg.norm(numpy.hstack((rdual, rcent, rpri)))
307
308 pditer = 0
309 #done = (sdg < pdtol) | (pditer >= pdmaxiter);
310 done = (sdg < pdtol) or (pditer >= pdmaxiter)
311 #while (~done)
312 while not done:
313
314 pditer = pditer + 1
315
316 #w1 = -1/tau*(-1./fu1 + 1./fu2) - Atv;
317 w1 = -1/tau*(-1/fu1 + 1/fu2) - Atv
318 w2 = -1 - 1/tau*(1/fu1 + 1/fu2)
319 w3 = -rpri
320
321 #sig1 = -lamu1./fu1 - lamu2./fu2;
322 sig1 = -lamu1/fu1 - lamu2/fu2
323 sig2 = lamu1/fu1 - lamu2/fu2
324 #sigx = sig1 - sig2.^2./sig1;
325 sigx = sig1 - sig2**2/sig1
326
327 if largescale:
328 #w1p = w3 - A(w1./sigx - w2.*sig2./(sigx.*sig1));
329 #h11pfun = @(z) -A(1./sigx.*At(z));
330 #[dv, cgres, cgiter] = cgsolve(h11pfun, w1p, cgtol, cgmaxiter, 0);
331 #if (cgres > 1/2)
332 # disp('Cannot solve system. Returning previous iterate. (See Section 4 of notes for more information.)');
333 # xp = x;
334 # return
335 #end
336 #dx = (w1 - w2.*sig2./sig1 - At(dv))./sigx;
337 #Adx = A(dx);
338 #Atdv = At(dv);
339 raise l1qcNotImplementedError('Largescale not implemented yet!')
340 else:
341 #w1p = -(w3 - A*(w1./sigx - w2.*sig2./(sigx.*sig1)));
342 w1p = -(w3 - numpy.dot(A,(w1/sigx - w2*sig2/(sigx*sig1))))
343 #H11p = A*(sparse(diag(1./sigx))*A');
344 H11p = numpy.dot(A, numpy.dot(numpy.diag(1/sigx),A.T))
345 #opts.POSDEF = true; opts.SYM = true;
346 #[dv,hcond] = linsolve(H11p, w1p, opts);
347 try:
348 dv = scipy.linalg.solve(H11p, w1p, sym_pos=True)
349 hcond = 1.0/numpy.linalg.cond(H11p)
350 except scipy.linalg.LinAlgError:
351 if verbose:
352 print 'Matrix ill-conditioned. Returning previous iterate. (See Section 4 of notes for more information.)'
353 xp = x.copy()
354 return xp
355 if hcond < 1e-14:
356 if verbose:
357 print 'Matrix ill-conditioned. Returning previous iterate. (See Section 4 of notes for more information.)'
358 xp = x.copy()
359 return xp
360 #if (hcond < 1e-14)
361 # disp('Matrix ill-conditioned. Returning previous iterate. (See Section 4 of notes for more information.)');
362 # xp = x;
363 # return
364 #end
365
366 #dx = (w1 - w2.*sig2./sig1 - A'*dv)./sigx;
367 dx = (w1 - w2*sig2/sig1 - numpy.dot(A.T,dv))/sigx
368 #Adx = A*dx;
369 Adx = numpy.dot(A,dx)
370 #Atdv = A'*dv;
371 Atdv = numpy.dot(A.T,dv)
372 #end
373
374 #du = (w2 - sig2.*dx)./sig1;
375 du = (w2 - sig2*dx)/sig1
376
377 #dlamu1 = (lamu1./fu1).*(-dx+du) - lamu1 - (1/tau)*1./fu1;
378 dlamu1 = (lamu1/fu1)*(-dx+du) - lamu1 - (1/tau)*1/fu1
379 dlamu2 = (lamu2/fu2)*(dx+du) - lamu2 - 1/tau*1/fu2
380
381 # make sure that the step is feasible: keeps lamu1,lamu2 > 0, fu1,fu2 < 0
382 #indp = find(dlamu1 < 0); indn = find(dlamu2 < 0);
383 indp = numpy.nonzero(dlamu1 < 0)
384 indn = numpy.nonzero(dlamu2 < 0)
385 #s = min([1; -lamu1(indp)./dlamu1(indp); -lamu2(indn)./dlamu2(indn)]);
386 s = numpy.min(numpy.hstack((numpy.array([1]), -lamu1[indp]/dlamu1[indp], -lamu2[indn]/dlamu2[indn])))
387 #indp = find((dx-du) > 0); indn = find((-dx-du) > 0);
388 indp = numpy.nonzero((dx-du) > 0)
389 indn = numpy.nonzero((-dx-du) > 0)
390 #s = (0.99)*min([s; -fu1(indp)./(dx(indp)-du(indp)); -fu2(indn)./(-dx(indn)-du(indn))]);
391 s = (0.99)*numpy.min(numpy.hstack((numpy.array([s]), -fu1[indp]/(dx[indp]-du[indp]), -fu2[indn]/(-dx[indn]-du[indn]))))
392
393 # backtracking line search
394 suffdec = 0
395 backiter = 0
396 #while (~suffdec)
397 while not suffdec:
398 #xp = x + s*dx; up = u + s*du;
399 xp = x + s*dx
400 up = u + s*du
401 #vp = v + s*dv; Atvp = Atv + s*Atdv;
402 vp = v + s*dv
403 Atvp = Atv + s*Atdv
404 #lamu1p = lamu1 + s*dlamu1; lamu2p = lamu2 + s*dlamu2;
405 lamu1p = lamu1 + s*dlamu1
406 lamu2p = lamu2 + s*dlamu2
407 #fu1p = xp - up; fu2p = -xp - up;
408 fu1p = xp - up
409 fu2p = -xp - up
410 #rdp = gradf0 + [lamu1p-lamu2p; -lamu1p-lamu2p] + [Atvp; zeros(N,1)];
411 rdp = gradf0 + numpy.hstack((lamu1p-lamu2p, -lamu1p-lamu2p)) + numpy.hstack((Atvp, numpy.zeros(N)))
412 #rcp = [-lamu1p.*fu1p; -lamu2p.*fu2p] - (1/tau);
413 rcp = numpy.hstack((-lamu1p*fu1p, -lamu2p*fu2p)) - (1/tau)
414 #rpp = rpri + s*Adx;
415 rpp = rpri + s*Adx
416 #suffdec = (norm([rdp; rcp; rpp]) <= (1-alpha*s)*resnorm);
417 suffdec = (numpy.linalg.norm(numpy.hstack((rdp, rcp, rpp))) <= (1-alpha*s)*resnorm)
418 s = beta*s
419 backiter = backiter + 1
420 if (backiter > 32):
421 if verbose:
422 print 'Stuck backtracking, returning last iterate. (See Section 4 of notes for more information.)'
423 xp = x.copy()
424 return xp
425 #end
426 #end
427
428
429 # next iteration
430 #x = xp; u = up;
431 x = xp.copy()
432 u = up.copy()
433 #v = vp; Atv = Atvp;
434 v = vp.copy()
435 Atv = Atvp.copy()
436 #lamu1 = lamu1p; lamu2 = lamu2p;
437 lamu1 = lamu1p.copy()
438 lamu2 = lamu2p.copy()
439 #fu1 = fu1p; fu2 = fu2p;
440 fu1 = fu1p.copy()
441 fu2 = fu2p.copy()
442
443 # surrogate duality gap
444 #sdg = -(fu1'*lamu1 + fu2'*lamu2);
445 sdg = -(numpy.dot(fu1,lamu1) + numpy.dot(fu2,lamu2))
446 tau = mu*2*N/sdg
447 rpri = rpp.copy()
448 #rcent = [-lamu1.*fu1; -lamu2.*fu2] - (1/tau);
449 rcent = numpy.hstack((-lamu1*fu1, -lamu2*fu2)) - (1/tau)
450 #rdual = gradf0 + [lamu1-lamu2; -lamu1-lamu2] + [Atv; zeros(N,1)];
451 rdual = gradf0 + numpy.hstack((lamu1-lamu2, -lamu1-lamu2)) + numpy.hstack((Atv, numpy.zeros(N)))
452 #resnorm = norm([rdual; rcent; rpri]);
453 resnorm = numpy.linalg.norm(numpy.hstack((rdual, rcent, rpri)))
454
455 #done = (sdg < pdtol) | (pditer >= pdmaxiter);
456 done = (sdg < pdtol) or (pditer >= pdmaxiter)
457
458 if verbose:
459 print 'Iteration =',pditer,', tau =',tau,', Primal =',numpy.sum(u),', PDGap =',sdg,', Dual res =',numpy.linalg.norm(rdual),', Primal res =',numpy.linalg.norm(rpri)
460 if largescale:
461 #disp(sprintf(' CG Res = #8.3e, CG Iter = #d', cgres, cgiter));
462 raise l1qcNotImplementedError('Largescale not implemented yet!')
463 else:
464 #disp(sprintf(' H11p condition number = #8.3e', hcond));
465 if verbose:
466 print ' H11p condition number =',hcond
467 #end
468
469 #end
470
471 return xp