nikcleju@37: # -*- coding: utf-8 -*- nikcleju@37: """ nikcleju@37: Created on Thu Nov 17 15:47:36 2011 nikcleju@37: nikcleju@37: Solve l1 minimization with quadratic AND equality constraints nikcleju@37: nikcleju@37: @author: ncleju nikcleju@37: """ nikcleju@37: nikcleju@37: nikcleju@37: import numpy as np nikcleju@37: import scipy.linalg nikcleju@37: import math nikcleju@37: nikcleju@37: class l1qecInputValueError(Exception): nikcleju@37: pass nikcleju@37: nikcleju@37: # This is not normally used, so it is not tested, probably doesn't work nikcleju@37: def cgsolve(A, b, tol, maxiter, verbose=1): nikcleju@37: raise Exception('Shouldn\'t run cgsolve(), as this is absolutely not tested. Comment this if you really want to proceed.') nikcleju@37: nikcleju@37: nikcleju@37: #if (nargin < 5), verbose = 1; end nikcleju@37: # Optional argument nikcleju@37: nikcleju@37: #implicit = isa(A,'function_handle'); nikcleju@37: if hasattr(A, '__call__'): nikcleju@37: implicit = True nikcleju@37: else: nikcleju@37: implicit = False nikcleju@37: nikcleju@37: x = np.zeros(b.size) nikcleju@37: r = b.copy() nikcleju@37: d = r.copy() nikcleju@37: delta = np.vdot(r,r) nikcleju@37: delta0 = np.vdot(b,b) nikcleju@37: numiter = 0 nikcleju@37: bestx = x.copy() nikcleju@37: bestres = math.sqrt(delta/delta0) nikcleju@37: while (numiter < maxiter) and (delta > tol**2*delta0): nikcleju@37: nikcleju@37: # q = A*d nikcleju@37: #if (implicit), q = A(d); else q = A*d; end nikcleju@37: if implicit: nikcleju@37: q = A(d) nikcleju@37: else: nikcleju@37: q = np.dot(A,d) nikcleju@37: nikcleju@37: alpha = delta/np.vdot(d,q) nikcleju@37: x = x + alpha*d nikcleju@37: nikcleju@37: if divmod(numiter+1,50)[1] == 0: nikcleju@37: # r = b - Aux*x nikcleju@37: #if (implicit), r = b - A(x); else r = b - A*x; end nikcleju@37: if implicit: nikcleju@37: r = b - A(x) nikcleju@37: else: nikcleju@37: r = b - np.dot(A,x) nikcleju@37: else: nikcleju@37: r = r - alpha*q nikcleju@37: #end nikcleju@37: nikcleju@37: deltaold = delta; nikcleju@37: delta = np.vdot(r,r) nikcleju@37: beta = delta/deltaold; nikcleju@37: d = r + beta*d nikcleju@37: numiter = numiter + 1 nikcleju@37: if (math.sqrt(delta/delta0) < bestres): nikcleju@37: bestx = x.copy() nikcleju@37: bestres = math.sqrt(delta/delta0) nikcleju@37: #end nikcleju@37: nikcleju@37: if ((verbose) and (divmod(numiter,verbose)[1]==0)): nikcleju@37: #disp(sprintf('cg: Iter = #d, Best residual = #8.3e, Current residual = #8.3e', ... nikcleju@37: # numiter, bestres, sqrt(delta/delta0))); nikcleju@37: print 'cg: Iter = ',numiter,', Best residual = ',bestres,', Current residual = ',math.sqrt(delta/delta0) nikcleju@37: #end nikcleju@37: nikcleju@37: #end nikcleju@37: nikcleju@37: if (verbose): nikcleju@37: #disp(sprintf('cg: Iterations = #d, best residual = #14.8e', numiter, bestres)); nikcleju@37: print 'cg: Iterations = ',numiter,', best residual = ',bestres nikcleju@37: #end nikcleju@37: x = bestx.copy() nikcleju@37: res = bestres nikcleju@37: iter = numiter nikcleju@37: nikcleju@37: return x,res,iter nikcleju@37: nikcleju@37: nikcleju@37: nikcleju@37: def l1qec_newton(x0, u0, A, At, b, epsilon, Aexact, tau, newtontol, newtonmaxiter, cgtol, cgmaxiter, verbose=False): nikcleju@37: nikcleju@37: # check if the matrix A is implicit or explicit nikcleju@37: #largescale = isa(A,'function_handle'); nikcleju@37: if hasattr(A, '__call__'): nikcleju@37: largescale = True nikcleju@37: else: nikcleju@37: largescale = False nikcleju@37: nikcleju@37: # line search parameters nikcleju@37: alpha = 0.01 nikcleju@37: beta = 0.5 nikcleju@37: nikcleju@37: #if (~largescale), AtA = A'*A; end nikcleju@37: if not largescale: nikcleju@37: AtA = np.dot(A.T,A) nikcleju@37: nikcleju@37: # initial point nikcleju@37: x = x0.copy() nikcleju@37: u = u0.copy() nikcleju@37: #if (largescale), r = A(x) - b; else r = A*x - b; end nikcleju@37: if largescale: nikcleju@37: r = A(x) - b nikcleju@37: else: nikcleju@37: r = np.dot(A,x) - b nikcleju@37: nikcleju@37: fu1 = x - u nikcleju@37: fu2 = -x - u nikcleju@37: fe = 1.0/2*(np.vdot(r,r) - epsilon**2) nikcleju@37: f = u.sum() - (1.0/tau)*(np.log(-fu1).sum() + np.log(-fu2).sum() + math.log(-fe)) nikcleju@37: nikcleju@37: niter = 0 nikcleju@37: done = 0 nikcleju@37: while not done: nikcleju@37: nikcleju@37: #if (largescale), atr = At(r); else atr = A'*r; end nikcleju@37: if largescale: nikcleju@37: atr = At(r) nikcleju@37: else: nikcleju@37: atr = np.dot(A.T,r) nikcleju@37: nikcleju@37: #ntgz = 1./fu1 - 1./fu2 + 1/fe*atr; nikcleju@37: ntgz = 1.0/fu1 - 1.0/fu2 + 1.0/fe*atr nikcleju@37: #ntgu = -tau - 1./fu1 - 1./fu2; nikcleju@37: ntgu = -tau - 1.0/fu1 - 1.0/fu2 nikcleju@37: #gradf = -(1/tau)*[ntgz; ntgu]; nikcleju@37: gradf = -(1.0/tau)*np.concatenate((ntgz, ntgu),0) nikcleju@37: nikcleju@37: #sig11 = 1./fu1.^2 + 1./fu2.^2; nikcleju@37: sig11 = 1.0/(fu1**2) + 1.0/(fu2**2) nikcleju@37: #sig12 = -1./fu1.^2 + 1./fu2.^2; nikcleju@37: sig12 = -1.0/(fu1**2) + 1.0/(fu2**2) nikcleju@37: #sigx = sig11 - sig12.^2./sig11; nikcleju@37: sigx = sig11 - (sig12**2)/sig11 nikcleju@37: nikcleju@37: #w1p = ntgz - sig12./sig11.*ntgu; nikcleju@37: w1p = ntgz - sig12/sig11*ntgu nikcleju@37: if largescale: nikcleju@37: #h11pfun = @(z) sigx.*z - (1/fe)*At(A(z)) + 1/fe^2*(atr'*z)*atr; nikcleju@37: h11pfun = lambda z: sigx*z - (1.0/fe)*At(A(z)) + 1.0/(fe**2)*np.dot(np.dot(atr.T,z),atr) nikcleju@37: dx,cgres,cgiter = cgsolve(h11pfun, w1p, cgtol, cgmaxiter, 0) nikcleju@37: if (cgres > 1.0/2): nikcleju@37: if verbose: nikcleju@37: print 'Cannot solve system. Returning previous iterate. (See Section 4 of notes for more information.)' nikcleju@37: xp = x.copy() nikcleju@37: up = u.copy() nikcleju@37: return xp,up,niter nikcleju@37: #end nikcleju@37: Adx = A(dx) nikcleju@37: else: nikcleju@37: #H11p = diag(sigx) - (1/fe)*AtA + (1/fe)^2*atr*atr'; nikcleju@37: # Attention: atr is column vector, so atr*atr' means outer(atr,atr) nikcleju@37: H11p = np.diag(sigx) - (1.0/fe)*AtA + (1.0/fe)**2*np.outer(atr,atr) nikcleju@37: #opts.POSDEF = true; opts.SYM = true; nikcleju@37: #[dx,hcond] = linsolve(H11p, w1p, opts); nikcleju@37: #if (hcond < 1e-14) nikcleju@37: # disp('Matrix ill-conditioned. Returning previous iterate. (See Section 4 of notes for more information.)'); nikcleju@37: # xp = x; up = u; nikcleju@37: # return nikcleju@37: #end nikcleju@37: nikcleju@37: # Nic says: from tveq_newton.m nikcleju@37: K = Aexact.shape[0] nikcleju@37: afac = (np.diag(H11p)).max() nikcleju@37: #Hp = [H11p afac*A'; afac*A zeros(K)]) nikcleju@37: Hp = np.vstack(( np.hstack((H11p,afac*Aexact.T)) , np.hstack((afac*Aexact,np.zeros((K,K)))) )) nikcleju@37: wp = np.concatenate((w1p , np.zeros(K))) nikcleju@37: try: nikcleju@37: #dx = scipy.linalg.solve(H11p, w1p, sym_pos=True) nikcleju@37: #hcond = 1.0/np.linalg.cond(H11p) nikcleju@37: dxv = scipy.linalg.solve(Hp, wp, sym_pos=False) # Only symmetric, not posdef nikcleju@37: dx = dxv[:x0.size] nikcleju@37: hcond = 1.0/np.linalg.cond(Hp) nikcleju@37: except scipy.linalg.LinAlgError: nikcleju@37: if verbose: nikcleju@37: print 'Matrix ill-conditioned. Returning previous iterate. (See Section 4 of notes for more information.)' nikcleju@37: xp = x.copy() nikcleju@37: up = u.copy() nikcleju@37: return xp,up,niter nikcleju@37: if hcond < 1e-14: nikcleju@37: if verbose: nikcleju@37: print 'Matrix ill-conditioned. Returning previous iterate. (See Section 4 of notes for more information.)' nikcleju@37: xp = x.copy() nikcleju@37: up = u.copy() nikcleju@37: return xp,up,niter nikcleju@37: nikcleju@37: #Adx = A*dx; nikcleju@37: Adx = np.dot(A,dx) nikcleju@37: #end nikcleju@37: #du = (1./sig11).*ntgu - (sig12./sig11).*dx; nikcleju@37: du = (1.0/sig11)*ntgu - (sig12/sig11)*dx; nikcleju@37: nikcleju@37: # minimum step size that stays in the interior nikcleju@37: #ifu1 = find((dx-du) > 0); ifu2 = find((-dx-du) > 0); nikcleju@37: ifu1 = np.nonzero((dx-du)>0) nikcleju@37: ifu2 = np.nonzero((-dx-du)>0) nikcleju@37: #aqe = Adx'*Adx; bqe = 2*r'*Adx; cqe = r'*r - epsilon^2; nikcleju@37: aqe = np.dot(Adx.T,Adx) nikcleju@37: bqe = 2*np.dot(r.T,Adx) nikcleju@37: cqe = np.vdot(r,r) - epsilon**2 nikcleju@37: #smax = min(1,min([... nikcleju@37: # -fu1(ifu1)./(dx(ifu1)-du(ifu1)); -fu2(ifu2)./(-dx(ifu2)-du(ifu2)); ... nikcleju@37: # (-bqe+sqrt(bqe^2-4*aqe*cqe))/(2*aqe) nikcleju@37: # ])); nikcleju@37: smax = min(1,np.concatenate( (-fu1[ifu1]/(dx[ifu1]-du[ifu1]) , -fu2[ifu2]/(-dx[ifu2]-du[ifu2]) , np.array([ (-bqe + math.sqrt(bqe**2-4*aqe*cqe))/(2*aqe) ]) ) , 0).min()) nikcleju@37: nikcleju@37: s = 0.99 * smax nikcleju@37: nikcleju@37: # backtracking line search nikcleju@37: suffdec = 0 nikcleju@37: backiter = 0 nikcleju@37: while not suffdec: nikcleju@37: #xp = x + s*dx; up = u + s*du; rp = r + s*Adx; nikcleju@37: xp = x + s*dx nikcleju@37: up = u + s*du nikcleju@37: rp = r + s*Adx nikcleju@37: #fu1p = xp - up; fu2p = -xp - up; fep = 1/2*(rp'*rp - epsilon^2); nikcleju@37: fu1p = xp - up nikcleju@37: fu2p = -xp - up nikcleju@37: fep = 0.5*(np.vdot(rp,rp) - epsilon**2) nikcleju@37: #fp = sum(up) - (1/tau)*(sum(log(-fu1p)) + sum(log(-fu2p)) + log(-fep)); nikcleju@37: fp = up.sum() - (1.0/tau)*(np.log(-fu1p).sum() + np.log(-fu2p).sum() + math.log(-fep)) nikcleju@37: #flin = f + alpha*s*(gradf'*[dx; du]); nikcleju@37: flin = f + alpha*s*np.dot(gradf.T , np.concatenate((dx,du),0)) nikcleju@37: #suffdec = (fp <= flin); nikcleju@37: if fp <= flin: nikcleju@37: suffdec = True nikcleju@37: else: nikcleju@37: suffdec = False nikcleju@37: nikcleju@37: s = beta*s nikcleju@37: backiter = backiter + 1 nikcleju@37: if (backiter > 32): nikcleju@37: if verbose: nikcleju@37: print 'Stuck on backtracking line search, returning previous iterate. (See Section 4 of notes for more information.)' nikcleju@37: xp = x.copy() nikcleju@37: up = u.copy() nikcleju@37: return xp,up,niter nikcleju@37: #end nikcleju@37: #end nikcleju@37: nikcleju@37: # set up for next iteration nikcleju@37: #x = xp; u = up; r = rp; nikcleju@37: x = xp.copy() nikcleju@37: u = up.copy() nikcleju@37: r = rp.copy() nikcleju@37: #fu1 = fu1p; fu2 = fu2p; fe = fep; f = fp; nikcleju@37: fu1 = fu1p.copy() nikcleju@37: fu2 = fu2p.copy() nikcleju@37: fe = fep nikcleju@37: f = fp nikcleju@37: nikcleju@37: #lambda2 = -(gradf'*[dx; du]); nikcleju@37: lambda2 = -np.dot(gradf.T , np.concatenate((dx,du),0)) nikcleju@37: #stepsize = s*norm([dx; du]); nikcleju@37: stepsize = s * np.linalg.norm(np.concatenate((dx,du),0)) nikcleju@37: niter = niter + 1 nikcleju@37: #done = (lambda2/2 < newtontol) | (niter >= newtonmaxiter); nikcleju@37: if lambda2/2.0 < newtontol or niter >= newtonmaxiter: nikcleju@37: done = 1 nikcleju@37: else: nikcleju@37: done = 0 nikcleju@37: nikcleju@37: #disp(sprintf('Newton iter = #d, Functional = #8.3f, Newton decrement = #8.3f, Stepsize = #8.3e', ... nikcleju@37: if verbose: nikcleju@37: print 'Newton iter = ',niter,', Functional = ',f,', Newton decrement = ',lambda2/2.0,', Stepsize = ',stepsize nikcleju@37: nikcleju@37: if verbose: nikcleju@37: if largescale: nikcleju@37: print ' CG Res = ',cgres,', CG Iter = ',cgiter nikcleju@37: else: nikcleju@37: print ' H11p condition number = ',hcond nikcleju@37: #end nikcleju@37: nikcleju@37: #end nikcleju@37: return xp,up,niter nikcleju@37: nikcleju@37: def l1qec_logbarrier(x0, A, At, b, epsilon, Aexact, Atexact, bexact, lbtol=1e-3, mu=10, cgtol=1e-8, cgmaxiter=200, verbose=False): nikcleju@37: nikcleju@37: # Check if epsilon > 0. If epsilon is 0, the algorithm fails. You should run the algo with equality constraint instead nikcleju@37: if epsilon == 0: nikcleju@37: raise l1qecInputValueError('Epsilon should be > 0!') nikcleju@37: nikcleju@37: #largescale = isa(A,'function_handle'); nikcleju@37: if hasattr(A, '__call__'): nikcleju@37: largescale = True nikcleju@37: else: nikcleju@37: largescale = False nikcleju@37: nikcleju@37: # if (nargin < 6), lbtol = 1e-3; end nikcleju@37: # if (nargin < 7), mu = 10; end nikcleju@37: # if (nargin < 8), cgtol = 1e-8; end nikcleju@37: # if (nargin < 9), cgmaxiter = 200; end nikcleju@37: # Nic: added them as optional parameteres nikcleju@37: nikcleju@37: newtontol = lbtol nikcleju@37: newtonmaxiter = 50 nikcleju@37: nikcleju@37: #N = length(x0); nikcleju@37: N = x0.size nikcleju@37: nikcleju@37: # starting point --- make sure that it is feasible nikcleju@37: if largescale: nikcleju@37: if np.linalg.norm(A(x0) - b) > epsilon or np.linalg.norm( np.dot(Aexact,x0) - bexact ) > 1e-15: nikcleju@37: if verbose: nikcleju@37: print 'Starting point infeasible; using x0 = At*inv(AAt)*y.' nikcleju@37: #AAt = @(z) A(At(z)); nikcleju@37: AAt = lambda z: A(At(z)) nikcleju@37: # TODO: implement cgsolve nikcleju@37: w,cgres,cgiter = cgsolve(AAt, b, cgtol, cgmaxiter, 0) nikcleju@37: if (cgres > 1.0/2): nikcleju@37: if verbose: nikcleju@37: print 'A*At is ill-conditioned: cannot find starting point' nikcleju@37: xp = x0.copy() nikcleju@37: return xp nikcleju@37: #end nikcleju@37: x0 = At(w) nikcleju@37: #end nikcleju@37: else: nikcleju@37: # Nic: add test for np.dot(Aexact,x0) - bexact ) > 1e-15 nikcleju@37: if np.linalg.norm( np.dot(A,x0) - b ) > epsilon or np.linalg.norm( np.dot(Aexact,x0) - bexact ) > 1e-15: nikcleju@37: if verbose: nikcleju@37: print 'Starting point infeasible; using x0 = At*inv(AAt)*y.' nikcleju@37: nikcleju@37: #Nic: stack A and Aexact, b and bexact, and use them instead of A and b nikcleju@37: Abig = np.vstack((A,Aexact)) nikcleju@37: bbig = np.concatenate((b,bexact)) nikcleju@37: try: nikcleju@37: w = scipy.linalg.solve(np.dot(Abig,Abig.T), bbig, sym_pos=True) nikcleju@37: #w = np.linalg.solve(np.dot(A,A.T), b) nikcleju@37: hcond = 1.0/np.linalg.cond(np.dot(Abig,Abig.T)) nikcleju@37: except scipy.linalg.LinAlgError: nikcleju@37: if verbose: nikcleju@37: print 'A*At is ill-conditioned: cannot find starting point' nikcleju@37: xp = x0.copy() nikcleju@37: return xp nikcleju@37: if hcond < 1e-14: nikcleju@37: if verbose: nikcleju@37: print 'A*At is ill-conditioned: cannot find starting point' nikcleju@37: xp = x0.copy() nikcleju@37: return xp nikcleju@37: x0 = np.dot(Abig.T, w) nikcleju@37: # try: nikcleju@37: # w = scipy.linalg.solve(np.dot(A,A.T), b, sym_pos=True) nikcleju@37: # #w = np.linalg.solve(np.dot(A,A.T), b) nikcleju@37: # hcond = 1.0/scipy.linalg.cond(np.dot(A,A.T)) nikcleju@37: # except scipy.linalg.LinAlgError: nikcleju@37: # print 'A*At is ill-conditioned: cannot find starting point' nikcleju@37: # xp = x0.copy() nikcleju@37: # return xp nikcleju@37: # if hcond < 1e-14: nikcleju@37: # print 'A*At is ill-conditioned: cannot find starting point' nikcleju@37: # xp = x0.copy() nikcleju@37: # return xp nikcleju@37: # #x0 = A'*w; nikcleju@37: # x0 = np.dot(A.T, w) nikcleju@37: #end nikcleju@37: #end nikcleju@37: x = x0.copy() nikcleju@37: u = (0.95)*np.abs(x0) + (0.10)*np.abs(x0).max() nikcleju@37: nikcleju@37: #disp(sprintf('Original l1 norm = #.3f, original functional = #.3f', sum(abs(x0)), sum(u))); nikcleju@37: if verbose: nikcleju@37: print 'Original l1 norm = ',np.abs(x0).sum(),'original functional = ',u.sum() nikcleju@37: nikcleju@37: # choose initial value of tau so that the duality gap after the first nikcleju@37: # step will be about the origial norm nikcleju@37: tau = max(((2*N+1.0)/np.abs(x0).sum()), 1) nikcleju@37: nikcleju@37: lbiter = math.ceil((math.log(2*N+1)-math.log(lbtol)-math.log(tau))/math.log(mu)) nikcleju@37: #disp(sprintf('Number of log barrier iterations = #d\n', lbiter)); nikcleju@37: if verbose: nikcleju@37: print 'Number of log barrier iterations = ',lbiter nikcleju@37: nikcleju@37: totaliter = 0 nikcleju@37: nikcleju@37: # Added by Nic, to fix some crashing nikcleju@37: if lbiter == 0: nikcleju@37: xp = np.zeros(x0.size) nikcleju@37: #end nikcleju@37: nikcleju@37: #for ii = 1:lbiter nikcleju@37: for ii in np.arange(lbiter): nikcleju@37: nikcleju@37: xp,up,ntiter = l1qec_newton(x, u, A, At, b, epsilon, Aexact, tau, newtontol, newtonmaxiter, cgtol, cgmaxiter) nikcleju@37: totaliter = totaliter + ntiter nikcleju@37: nikcleju@37: #disp(sprintf('\nLog barrier iter = #d, l1 = #.3f, functional = #8.3f, tau = #8.3e, total newton iter = #d\n', ... nikcleju@37: # ii, sum(abs(xp)), sum(up), tau, totaliter)); nikcleju@37: if verbose: nikcleju@37: print 'Log barrier iter = ',ii,', l1 = ',np.abs(xp).sum(),', functional = ',up.sum(),', tau = ',tau,', total newton iter = ',totaliter nikcleju@37: x = xp.copy() nikcleju@37: u = up.copy() nikcleju@37: nikcleju@37: tau = mu*tau nikcleju@37: nikcleju@37: #end nikcleju@37: return xp nikcleju@37: