Mercurial > hg > pycsalgos
view pyCSalgos/NESTA/NESTA.py @ 51:eb4c66488ddf
Split algos.py and stdparams.py, added nesta to std1, 2, 3, 4
author | nikcleju |
---|---|
date | Wed, 07 Dec 2011 12:44:19 +0000 |
parents | f6824da67b51 |
children |
line wrap: on
line source
# -*- coding: utf-8 -*- """ Created on Tue Nov 29 16:55:20 2011 @author: ncleju """ import numpy import math class NestaError(Exception): pass #function [xk,niter,residuals,outputData,opts] =NESTA(A,At,b,muf,delta,opts) def NESTA(A,At,b,muf,delta,opts=None): # [xk,niter,residuals,outputData] =NESTA(A,At,b,muf,delta,opts) # # Solves a L1 minimization problem under a quadratic constraint using the # Nesterov algorithm, with continuation: # # min_x || U x ||_1 s.t. ||y - Ax||_2 <= delta # # Continuation is performed by sequentially applying Nesterov's algorithm # with a decreasing sequence of values of mu0 >= mu >= muf # # The primal prox-function is also adapted by accounting for a first guess # xplug that also tends towards x_muf # # The observation matrix A is a projector # # Inputs: A and At - measurement matrix and adjoint (either a matrix, in which # case At is unused, or function handles). m x n dimensions. # b - Observed data, a m x 1 array # muf - The desired value of mu at the last continuation step. # A smaller mu leads to higher accuracy. # delta - l2 error bound. This enforces how close the variable # must fit the observations b, i.e. || y - Ax ||_2 <= delta # If delta = 0, enforces y = Ax # Common heuristic: delta = sqrt(m + 2*sqrt(2*m))*sigma; # where sigma=std(noise). # opts - # This is a structure that contains additional options, # some of which are optional. # The fieldnames are case insensitive. Below # are the possible fieldnames: # # opts.xplug - the first guess for the primal prox-function, and # also the initial point for xk. By default, xplug = At(b) # opts.U and opts.Ut - Analysis/Synthesis operators # (either matrices of function handles). # opts.normU - if opts.U is provided, this should be norm(U) # otherwise it will have to be calculated (potentially # expensive) # opts.MaxIntIter - number of continuation steps. # default is 5 # opts.maxiter - max number of iterations in an inner loop. # default is 10,000 # opts.TolVar - tolerance for the stopping criteria # opts.stopTest - which stopping criteria to apply # opts.stopTest == 1 : stop when the relative # change in the objective function is less than # TolVar # opts.stopTest == 2 : stop with the l_infinity norm # of difference in the xk variable is less # than TolVar # opts.TypeMin - if this is 'L1' (default), then # minimizes a smoothed version of the l_1 norm. # If this is 'tv', then minimizes a smoothed # version of the total-variation norm. # The string is case insensitive. # opts.Verbose - if this is 0 or false, then very # little output is displayed. If this is 1 or true, # then output every iteration is displayed. # If this is a number p greater than 1, then # output is displayed every pth iteration. # opts.fid - if this is 1 (default), the display is # the usual Matlab screen. If this is the file-id # of a file opened with fopen, then the display # will be redirected to this file. # opts.errFcn - if this is a function handle, # then the program will evaluate opts.errFcn(xk) # at every iteration and display the result. # ex. opts.errFcn = @(x) norm( x - x_true ) # opts.outFcn - if this is a function handle, # then then program will evaluate opts.outFcn(xk) # at every iteration and save the results in outputData. # If the result is a vector (as opposed to a scalar), # it should be a row vector and not a column vector. # ex. opts.outFcn = @(x) [norm( x - xtrue, 'inf' ),... # norm( x - xtrue) / norm(xtrue)] # opts.AAtinv - this is an experimental new option. AAtinv # is the inverse of AA^*. This allows the use of a # matrix A which is not a projection, but only # for the noiseless (i.e. delta = 0) case. # opts.USV - another experimental option. This supercedes # the AAtinv option, so it is recommended that you # do not define AAtinv. This allows the use of a matrix # A which is not a projection, and works for the # noisy ( i.e. delta > 0 ) case. # opts.USV should contain three fields: # opts.USV.U is the U from [U,S,V] = svd(A) # likewise, opts.USV.S and opts.USV.V are S and V # from svd(A). S may be a matrix or a vector. # # Outputs: # xk - estimate of the solution x # niter - number of iterations # residuals - first column is the residual at every step, # second column is the value of f_mu at every step # outputData - a matrix, where each row r is the output # from opts.outFcn, if supplied. # opts - the structure containing the options that were used # # Written by: Jerome Bobin, Caltech # Email: bobin@acm.caltech.edu # Created: February 2009 # Modified (version 1.0): May 2009, Jerome Bobin and Stephen Becker, Caltech # Modified (version 1.1): Nov 2009, Stephen Becker, Caltech # # NESTA Version 1.1 # See also Core_Nesterov #--------------------- # Original Matab code: # #if nargin < 6, opts = []; end #if isempty(opts) && isnumeric(opts), opts = struct; end # ##---- Set defaults #fid = setOpts('fid',1); #Verbose = setOpts('Verbose',true); #function printf(varargin), fprintf(fid,varargin{:}); end #MaxIntIter = setOpts('MaxIntIter',5,1); #TypeMin = setOpts('TypeMin','L1'); #TolVar = setOpts('tolvar',1e-5); #[U,U_userSet] = setOpts('U', @(x) x ); #if ~isa(U,'function_handle') # Ut = setOpts('Ut',[]); #else # Ut = setOpts('Ut', @(x) x ); #end #xplug = setOpts('xplug',[]); #normU = setOpts('normU',[]); # so we can tell if it's been set # #residuals = []; outputData = []; #AAtinv = setOpts('AAtinv',[]); #USV = setOpts('USV',[]); #if ~isempty(USV) # if isstruct(USV) # Q = USV.U; # we can't use "U" as the variable name # # since "U" already refers to the analysis operator # S = USV.S; # if isvector(S), s = S; #S = diag(s); # else s = diag(S); end # #V = USV.V; # else # error('opts.USV must be a structure'); # end #end # ## -- We can handle non-projections IF a (fast) routine for computing ## the psuedo-inverse is available. ## We can handle a nonzero delta, but we need the full SVD #if isempty(AAtinv) && isempty(USV) # # Check if A is a partial isometry, i.e. if AA' = I # z = randn(size(b)); # if isa(A,'function_handle'), AAtz = A(At(z)); # else AAtz = A*(A'*z); end # if norm( AAtz - z )/norm(z) > 1e-8 # error('Measurement matrix A must be a partial isometry: AA''=I'); # end #end # ## -- Find a initial guess if not already provided. ## Use least-squares solution: x_ref = A'*inv(A*A')*b ## If A is a projection, the least squares solution is trivial #if isempty(xplug) || norm(xplug) < 1e-12 # if ~isempty(USV) && isempty(AAtinv) # AAtinv = Q*diag( s.^(-2) )*Q'; # end # if ~isempty(AAtinv) # if delta > 0 && isempty(USV) # error('delta must be zero for non-projections'); # end # if isa(AAtinv,'function_handle') # x_ref = AAtinv(b); # else # x_ref = AAtinv * b; # end # else # x_ref = b; # end # # if isa(A,'function_handle') # x_ref=At(x_ref); # else # x_ref = A'*x_ref; # end # # if isempty(xplug) # xplug = x_ref; # end # # x_ref itself is used to calculate mu_0 # # in the case that xplug has very small norm #else # x_ref = xplug; #end # ## use x_ref, not xplug, to find mu_0 #if isa(U,'function_handle') # Ux_ref = U(x_ref); #else # Ux_ref = U*x_ref; #end #switch lower(TypeMin) # case 'l1' # mu0 = 0.9*max(abs(Ux_ref)); # case 'tv' # mu0 = ValMUTv(Ux_ref); #end # ## -- If U was set by the user and normU not supplied, then calcuate norm(U) #if U_userSet && isempty(normU) # # simple case: U*U' = I or U'*U = I, in which case norm(U) = 1 # z = randn(size(xplug)); # if isa(U,'function_handle'), UtUz = Ut(U(z)); else UtUz = U'*(U*z); end # if norm( UtUz - z )/norm(z) < 1e-8 # normU = 1; # else # z = randn(size(Ux_ref)); # if isa(U,'function_handle') # UUtz = U(Ut(z)); # else # UUtz = U*(U'*z); # end # if norm( UUtz - z )/norm(z) < 1e-8 # normU = 1; # end # end # # if isempty(normU) # # have to actually calculate the norm # if isa(U,'function_handle') # [normU,cnt] = my_normest(U,Ut,length(xplug),1e-3,30); # if cnt == 30, printf('Warning: norm(U) may be inaccurate\n'); end # else # [mU,nU] = size(U); # if mU < nU, UU = U*U'; else UU = U'*U; end # # last resort is to call MATLAB's "norm", which is slow # if norm( UU - diag(diag(UU)),'fro') < 100*eps # # this means the matrix is diagonal, so norm is easy: # normU = sqrt( max(abs(diag(UU))) ); # elseif issparse(UU) # normU = sqrt( normest(UU) ); # else # if min(size(U)) > 2000 # # norm(randn(2000)) takes about 5 seconds on my PC # printf('Warning: calculation of norm(U) may be slow\n'); # end # normU = sqrt( norm(UU) ); # end # end # end # opts.normU = normU; #end # # #niter = 0; #Gamma = (muf/mu0)^(1/MaxIntIter); #mu = mu0; #Gammat= (TolVar/0.1)^(1/MaxIntIter); #TolVar = 0.1; # #for nl=1:MaxIntIter # # mu = mu*Gamma; # TolVar=TolVar*Gammat; opts.TolVar = TolVar; # opts.xplug = xplug; # if Verbose, printf('\tBeginning #s Minimization; mu = #g\n',opts.TypeMin,mu); end # [xk,niter_int,res,out,optsOut] = Core_Nesterov(... # A,At,b,mu,delta,opts); # # xplug = xk; # niter = niter_int + niter; # # residuals = [residuals; res]; # outputData = [outputData; out]; # #end #opts = optsOut; # End of original Matab code: #--------------------- #if isempty(opts) && isnumeric(opts), opts = struct; end #---- Set defaults #fid = setOpts('fid',1); opts,Verbose,userSet = setOpts(opts,'Verbose',True); #function printf(varargin), fprintf(fid,varargin{:}); end opts,MaxIntIter,userSet = setOpts(opts,'MaxIntIter',5,1); opts,TypeMin,userSet = setOpts(opts,'TypeMin','L1'); opts,TolVar,userSet = setOpts(opts,'tolvar',1e-5); #[U,U_userSet] = setOpts('U', @(x) x ); opts,U,U_userSet = setOpts(opts,'U', lambda x: x ); #if ~isa(U,'function_handle') if not hasattr(U, '__call__'): opts,Ut,userSet = setOpts(opts,'Ut',None) else: opts,Ut,userSet = setOpts(opts,'Ut', lambda x: x ) #end opts,xplug,userSet = setOpts(opts,'xplug',None); opts,normU,userSet = setOpts(opts,'normU',None); # so we can tell if it's been set #residuals = []; outputData = []; residuals = numpy.zeros((0,2)) outputData = numpy.zeros(0) opts,AAtinv,userSet = setOpts(opts,'AAtinv',None); opts,USV,userSet = setOpts(opts,'USV',None); #if ~isempty(USV) if len(USV.keys()): #if isstruct(USV) Q = USV['U'] # we can't use "U" as the variable name # since "U" already refers to the analysis operator S = USV['S'] if S.ndim is 1: s = S else: s = numpy.diag(S) V = USV['V']; #else # error('opts.USV must be a structure'); #end #end # -- We can handle non-projections IF a (fast) routine for computing # the psuedo-inverse is available. # We can handle a nonzero delta, but we need the full SVD #if isempty(AAtinv) && isempty(USV) if (AAtinv is None) and (USV is None): # Check if A is a partial isometry, i.e. if AA' = I #z = randn(size(b)); z = numpy.random.randn(b.shape) #if isa(A,'function_handle'), AAtz = A(At(z)); #else AAtz = A*(A'*z); end if hasattr(A, '__call__'): AAtz = A(At(z)) else: #AAtz = A*(A'*z) AAtz = numpy.dot(A, numpy.dot(A.T,z)) #if norm( AAtz - z )/norm(z) > 1e-8 if numpy.linalg.norm(AAtz - z) / numpy.linalg.norm(z) > 1e-8: #error('Measurement matrix A must be a partial isometry: AA''=I'); print 'Measurement matrix A must be a partial isometry: AA''=I' raise NestaError('Measurement matrix A must be a partial isometry: AA''=I') #end #end # -- Find a initial guess if not already provided. # Use least-squares solution: x_ref = A'*inv(A*A')*b # If A is a projection, the least squares solution is trivial #if isempty(xplug) || norm(xplug) < 1e-12 if xplug is None or numpy.linalg.norm(xplug) < 1e-12: #if ~isempty(USV) && isempty(AAtinv) if USV is not None and AAtinv is None: #AAtinv = Q*diag( s.^(-2) )*Q'; AAtinv = numpy.dot(Q, numpy.dot(numpy.diag(s ** -2), Q.T)) #end #if ~isempty(AAtinv) if AAtinv is not None: #if delta > 0 && isempty(USV) if delta > 0 and USV is None: #error('delta must be zero for non-projections'); print 'delta must be zero for non-projections' raise NesteError('delta must be zero for non-projections') #end #if isa(AAtinv,'function_handle') if hasattr(AAtinv,'__call__'): x_ref = AAtinv(b) else: x_ref = numpy.dot(AAtinv , b) #end else: x_ref = b #end #if isa(A,'function_handle') if hasattr(A,'__call__'): x_ref=At(x_ref); else: #x_ref = A'*x_ref; x_ref = numpy.dot(A.T, x_ref) #end #if isempty(xplug) if xplug is None: xplug = x_ref; #end # x_ref itself is used to calculate mu_0 # in the case that xplug has very small norm else: x_ref = xplug; #end # use x_ref, not xplug, to find mu_0 #if isa(U,'function_handle') if hasattr(U,'__call__'): Ux_ref = U(x_ref); else: Ux_ref = numpy.dot(U,x_ref) #end #switch lower(TypeMin) # case 'l1' # mu0 = 0.9*max(abs(Ux_ref)); # case 'tv' # mu0 = ValMUTv(Ux_ref); #end if TypeMin.lower() == 'l1': mu0 = 0.9*max(abs(Ux_ref)) elif TypeMin.lower() == 'tv': #mu0 = ValMUTv(Ux_ref); print 'Nic: TODO: not implemented yet' raise NestaError('Nic: TODO: not implemented yet') # -- If U was set by the user and normU not supplied, then calcuate norm(U) #if U_userSet && isempty(normU) if U_userSet and normU is None: # simple case: U*U' = I or U'*U = I, in which case norm(U) = 1 #z = randn(size(xplug)); z = numpy.random.standard_normal(xplug.shape) #if isa(U,'function_handle'), UtUz = Ut(U(z)); else UtUz = U'*(U*z); end if hasattr(U,'__call__'): UtUz = Ut(U(z)) else: UtUz = numpy.dot(U.T, numpy.dot(U,z)) if numpy.linalg.norm( UtUz - z )/numpy.linalg.norm(z) < 1e-8: normU = 1; else: z = numpy.random.standard_normal(Ux_ref.shape) #if isa(U,'function_handle'): if hasattr(U,'__call__'): UUtz = U(Ut(z)); else: #UUtz = U*(U'*z); UUtz = numpy.dot(U, numpy.dot(U.T,z)) #end if numpy.linalg.norm( UUtz - z )/numpy.linalg.norm(z) < 1e-8: normU = 1; #end #end #if isempty(normU) if normU is None: # have to actually calculate the norm #if isa(U,'function_handle') if hasattr(U,'__call__'): #[normU,cnt] = my_normest(U,Ut,length(xplug),1e-3,30); normU,cnt = my_normest(U,Ut,xplug.size,1e-3,30) #if cnt == 30, printf('Warning: norm(U) may be inaccurate\n'); end if cnt == 30: print 'Warning: norm(U) may be inaccurate' else: mU,nU = U.shape if mU < nU: UU = numpy.dot(U,U.T) else: UU = numpy.dot(U.T,U) # last resort is to call MATLAB's "norm", which is slow #if norm( UU - diag(diag(UU)),'fro') < 100*eps if numpy.linalg.norm( UU - numpy.diag(numpy.diag(UU)),'fro') < 100*numpy.finfo(float).eps: # this means the matrix is diagonal, so norm is easy: #normU = sqrt( max(abs(diag(UU))) ); normU = math.sqrt( max(abs(numpy.diag(UU))) ) # Nic: TODO: sparse not implemented #elif issparse(UU) # normU = sqrt( normest(UU) ); else: if min(U.shape) > 2000: # norm(randn(2000)) takes about 5 seconds on my PC #printf('Warning: calculation of norm(U) may be slow\n'); print 'Warning: calculation of norm(U) may be slow' #end normU = math.sqrt( numpy.linalg.norm(UU, 2) ); #end #end #end #opts.normU = normU; opts['normU'] = normU #end niter = 0; Gamma = (muf/mu0)**(1.0/MaxIntIter); mu = mu0; Gammat = (TolVar/0.1)**(1.0/MaxIntIter); TolVar = 0.1; #for nl=1:MaxIntIter for n1 in numpy.arange(MaxIntIter): mu = mu*Gamma; TolVar=TolVar*Gammat; opts['TolVar'] = TolVar; opts['xplug'] = xplug; #if Verbose, printf('\tBeginning #s Minimization; mu = #g\n',opts.TypeMin,mu); end if Verbose: #printf('\tBeginning #s Minimization; mu = #g\n',opts.TypeMin,mu) print ' Beginning', opts['TypeMin'],'Minimization; mu =',mu #[xk,niter_int,res,out,optsOut] = Core_Nesterov(A,At,b,mu,delta,opts); xk,niter_int,res,out,optsOut = Core_Nesterov(A,At,b,mu,delta,opts) xplug = xk.copy(); niter = niter_int + niter; #residuals = [residuals; res]; residuals = numpy.vstack((residuals,res)) #outputData = [outputData; out]; if out is not None: outputData = numpy.vstack((outputData, out)) #end opts = optsOut.copy() return xk,niter,residuals,outputData,opts #---- internal routine for setting defaults #function [var,userSet] = setOpts(field,default,mn,mx) def setOpts(opts,field,default,mn=None,mx=None): var = default # has the option already been set? #if ~isfield(opts,field) if field in opts.keys(): # see if there is a capitalization problem: #names = fieldnames(opts); #for i = 1:length(names) for key in opts.keys(): #if strcmpi(names{i},field) if key.lower() == field.lower(): #opts.(field) = opts.(names{i}); opts[field] = opts[key] #opts = rmfield(opts,names{i}); # Don't delete because it is copied by reference! #del opts[key] break #end #end #end #if isfield(opts,field) && ~isempty(opts.(field)) if field in opts.keys() and (opts[field] is not None): #var = opts.(field); # override the default var = opts[field] userSet = True else: userSet = False #end # perform error checking, if desired #if nargin >= 3 && ~isempty(mn) if mn is not None: if var < mn: #printf('Variable #s is #f, should be at least #f\n',... # field,var,mn); error('variable out-of-bounds'); print 'Variable',field,'is',var,', should be at least',mn raise NestaError('setOpts error: value too small') #end #end #if nargin >= 4 && ~isempty(mx) if mx is not None: if var > mx: #printf('Variable #s is #f, should be at least #f\n',... # field,var,mn); error('variable out-of-bounds'); print 'Variable',field,'is',var,', should be at most',mx raise NestaError('setOpts error: value too large') #end #end #opts.(field) = var; opts[field] = var return opts,var,userSet # Nic: TODO: implement TV #---- internal routine for setting mu0 in the tv minimization case #function th=ValMUTv(x) # #N = length(x);n = floor(sqrt(N)); # N = b.size # n = floor(sqrt(N)) # Dv = spdiags([reshape([-ones(n-1,n); zeros(1,n)],N,1) ... # reshape([zeros(1,n); ones(n-1,n)],N,1)], [0 1], N, N); # Dh = spdiags([reshape([-ones(n,n-1) zeros(n,1)],N,1) ... # reshape([zeros(n,1) ones(n,n-1)],N,1)], [0 n], N, N); # D = sparse([Dh;Dv]); # # # Dhx = Dh*x; # Dvx = Dv*x; # # sk = sqrt(abs(Dhx).^2 + abs(Dvx).^2); # th = max(sk); # #end #end #-- end of NESTA function ############ POWER METHOD TO ESTIMATE NORM ############### # Copied from MATLAB's "normest" function, but allows function handles, not just sparse matrices #function [e,cnt] = my_normest(S,St,n,tol, maxiter) def my_normest(S,St,n,tol=1e-6, maxiter=20): #MY_NORMEST Estimate the matrix 2-norm via power method. #if nargin < 4, tol = 1.e-6; end #if nargin < 5, maxiter = 20; end #if isempty(St) if S is None: St = S # we assume the matrix is symmetric; #end x = numpy.ones(n); cnt = 0; e = numpy.linalg.norm(x); #if e == 0, return, end if e == 0: return e,cnt x = x/e; e0 = 0; while abs(e-e0) > tol*e and cnt < maxiter: e0 = e; Sx = S(x); #if nnz(Sx) == 0 if (Sx!=0).sum() == 0: Sx = numpy.random.rand(Sx.size); #end e = numpy.linalg.norm(Sx); x = St(Sx); x = x/numpy.linalg.norm(x); cnt = cnt+1; #end #end #function [xk,niter,residuals,outputData,opts] = Core_Nesterov(A,At,b,mu,delta,opts) def Core_Nesterov(A,At,b,mu,delta,opts): # [xk,niter,residuals,outputData,opts] =Core_Nesterov(A,At,b,mu,delta,opts) # # Solves a L1 minimization problem under a quadratic constraint using the # Nesterov algorithm, without continuation: # # min_x || U x ||_1 s.t. ||y - Ax||_2 <= delta # # If continuation is desired, see the function NESTA.m # # The primal prox-function is also adapted by accounting for a first guess # xplug that also tends towards x_muf # # The observation matrix A is a projector # # Inputs: A and At - measurement matrix and adjoint (either a matrix, in which # case At is unused, or function handles). m x n dimensions. # b - Observed data, a m x 1 array # muf - The desired value of mu at the last continuation step. # A smaller mu leads to higher accuracy. # delta - l2 error bound. This enforces how close the variable # must fit the observations b, i.e. || y - Ax ||_2 <= delta # If delta = 0, enforces y = Ax # Common heuristic: delta = sqrt(m + 2*sqrt(2*m))*sigma; # where sigma=std(noise). # opts - # This is a structure that contains additional options, # some of which are optional. # The fieldnames are case insensitive. Below # are the possible fieldnames: # # opts.xplug - the first guess for the primal prox-function, and # also the initial point for xk. By default, xplug = At(b) # opts.U and opts.Ut - Analysis/Synthesis operators # (either matrices of function handles). # opts.normU - if opts.U is provided, this should be norm(U) # opts.maxiter - max number of iterations in an inner loop. # default is 10,000 # opts.TolVar - tolerance for the stopping criteria # opts.stopTest - which stopping criteria to apply # opts.stopTest == 1 : stop when the relative # change in the objective function is less than # TolVar # opts.stopTest == 2 : stop with the l_infinity norm # of difference in the xk variable is less # than TolVar # opts.TypeMin - if this is 'L1' (default), then # minimizes a smoothed version of the l_1 norm. # If this is 'tv', then minimizes a smoothed # version of the total-variation norm. # The string is case insensitive. # opts.Verbose - if this is 0 or false, then very # little output is displayed. If this is 1 or true, # then output every iteration is displayed. # If this is a number p greater than 1, then # output is displayed every pth iteration. # opts.fid - if this is 1 (default), the display is # the usual Matlab screen. If this is the file-id # of a file opened with fopen, then the display # will be redirected to this file. # opts.errFcn - if this is a function handle, # then the program will evaluate opts.errFcn(xk) # at every iteration and display the result. # ex. opts.errFcn = @(x) norm( x - x_true ) # opts.outFcn - if this is a function handle, # then then program will evaluate opts.outFcn(xk) # at every iteration and save the results in outputData. # If the result is a vector (as opposed to a scalar), # it should be a row vector and not a column vector. # ex. opts.outFcn = @(x) [norm( x - xtrue, 'inf' ),... # norm( x - xtrue) / norm(xtrue)] # opts.AAtinv - this is an experimental new option. AAtinv # is the inverse of AA^*. This allows the use of a # matrix A which is not a projection, but only # for the noiseless (i.e. delta = 0) case. # If the SVD of A is U*S*V', then AAtinv = U*(S^{-2})*U'. # opts.USV - another experimental option. This supercedes # the AAtinv option, so it is recommended that you # do not define AAtinv. This allows the use of a matrix # A which is not a projection, and works for the # noisy ( i.e. delta > 0 ) case. # opts.USV should contain three fields: # opts.USV.U is the U from [U,S,V] = svd(A) # likewise, opts.USV.S and opts.USV.V are S and V # from svd(A). S may be a matrix or a vector. # Outputs: # xk - estimate of the solution x # niter - number of iterations # residuals - first column is the residual at every step, # second column is the value of f_mu at every step # outputData - a matrix, where each row r is the output # from opts.outFcn, if supplied. # opts - the structure containing the options that were used # # Written by: Jerome Bobin, Caltech # Email: bobin@acm.caltech.edu # Created: February 2009 # Modified: May 2009, Jerome Bobin and Stephen Becker, Caltech # Modified: Nov 2009, Stephen Becker # # NESTA Version 1.1 # See also NESTA #---- Set defaults # opts = []; #--------------------- # Original Matab code: #fid = setOpts('fid',1); #function printf(varargin), fprintf(fid,varargin{:}); end #maxiter = setOpts('maxiter',10000,0); #TolVar = setOpts('TolVar',1e-5); #TypeMin = setOpts('TypeMin','L1'); #Verbose = setOpts('Verbose',true); #errFcn = setOpts('errFcn',[]); #outFcn = setOpts('outFcn',[]); #stopTest = setOpts('stopTest',1,1,2); #U = setOpts('U', @(x) x ); #if ~isa(U,'function_handle') # Ut = setOpts('Ut',[]); #else # Ut = setOpts('Ut', @(x) x ); #end #xplug = setOpts('xplug',[]); #normU = setOpts('normU',1); # #if delta < 0, error('delta must be greater or equal to zero'); end # #if isa(A,'function_handle') # Atfun = At; # Afun = A; #else # Atfun = @(x) A'*x; # Afun = @(x) A*x; #end #Atb = Atfun(b); # #AAtinv = setOpts('AAtinv',[]); #USV = setOpts('USV',[]); #if ~isempty(USV) # if isstruct(USV) # Q = USV.U; # we can't use "U" as the variable name # # since "U" already refers to the analysis operator # S = USV.S; # if isvector(S), s = S; S = diag(s); # else s = diag(S); end # V = USV.V; # else # error('opts.USV must be a structure'); # end # if isempty(AAtinv) # AAtinv = Q*diag( s.^(-2) )*Q'; # end #end ## --- for A not a projection (experimental) #if ~isempty(AAtinv) # if isa(AAtinv,'function_handle') # AAtinv_fun = AAtinv; # else # AAtinv_fun = @(x) AAtinv * x; # end # # AtAAtb = Atfun( AAtinv_fun(b) ); # #else # # We assume it's a projection # AtAAtb = Atb; # AAtinv_fun = @(x) x; #end # #if isempty(xplug) # xplug = AtAAtb; #end # ##---- Initialization #N = length(xplug); #wk = zeros(N,1); #xk = xplug; # # ##---- Init Variables #Ak= 0; #Lmu = normU/mu; #yk = xk; #zk = xk; #fmean = realmin/10; #OK = 0; #n = floor(sqrt(N)); # ##---- Computing Atb #Atb = Atfun(b); #Axk = Afun(xk);# only needed if you want to see the residuals ## Axplug = Axk; # # ##---- TV Minimization #if strcmpi(TypeMin,'TV') # Lmu = 8*Lmu; # Dv = spdiags([reshape([-ones(n-1,n); zeros(1,n)],N,1) ... # reshape([zeros(1,n); ones(n-1,n)],N,1)], [0 1], N, N); # Dh = spdiags([reshape([-ones(n,n-1) zeros(n,1)],N,1) ... # reshape([zeros(n,1) ones(n,n-1)],N,1)], [0 n], N, N); # D = sparse([Dh;Dv]); #end # # #Lmu1 = 1/Lmu; ## SLmu = sqrt(Lmu); ## SLmu1 = 1/sqrt(Lmu); #lambdaY = 0; #lambdaZ = 0; # ##---- setup data storage variables #[DISPLAY_ERROR, RECORD_DATA] = deal(false); #outputData = deal([]); #residuals = zeros(maxiter,2); #if ~isempty(errFcn), DISPLAY_ERROR = true; end #if ~isempty(outFcn) && nargout >= 4 # RECORD_DATA = true; # outputData = zeros(maxiter, size(outFcn(xplug),2) ); #end # #for k = 0:maxiter-1, # # #---- Dual problem # # if strcmpi(TypeMin,'L1') [df,fx,val,uk] = Perform_L1_Constraint(xk,mu,U,Ut);end # # if strcmpi(TypeMin,'TV') [df,fx] = Perform_TV_Constraint(xk,mu,Dv,Dh,D,U,Ut);end # # #---- Primal Problem # # #---- Updating yk # # # # # yk = Argmin_x Lmu/2 ||x - xk||_l2^2 + <df,x-xk> s.t. ||b-Ax||_l2 < delta # # Let xp be sqrt(Lmu) (x-xk), dfp be df/sqrt(Lmu), bp be sqrt(Lmu)(b- Axk) and deltap be sqrt(Lmu)delta # # yk = xk + 1/sqrt(Lmu) Argmin_xp 1/2 || xp ||_2^2 + <dfp,xp> s.t. || bp - Axp ||_2 < deltap # # # # # cp = xk - 1/Lmu*df; # this is "q" in eq. (3.7) in the paper # # Acp = Afun( cp ); # if ~isempty(AAtinv) && isempty(USV) # AtAcp = Atfun( AAtinv_fun( Acp ) ); # else # AtAcp = Atfun( Acp ); # end # # residuals(k+1,1) = norm( b-Axk); # the residual # residuals(k+1,2) = fx; # the value of the objective # #--- if user has supplied a function, apply it to the iterate # if RECORD_DATA # outputData(k+1,:) = outFcn(xk); # end # # if delta > 0 # if ~isempty(USV) # # there are more efficient methods, but we're assuming # # that A is negligible compared to U and Ut. # # Here we make the change of variables x <-- x - xk # # and df <-- df/L # dfp = -Lmu1*df; Adfp = -(Axk - Acp); # bp = b - Axk; # deltap = delta; # # Check if we even need to project: # if norm( Adfp - bp ) < deltap # lambdaY = 0; projIter = 0; # # i.e. projection = dfp; # yk = xk + dfp; # Ayk = Axk + Adfp; # else # lambdaY_old = lambdaY; # [projection,projIter,lambdaY] = fastProjection(Q,S,V,dfp,bp,... # deltap, .999*lambdaY_old ); # if lambdaY > 0, disp('lambda is positive!'); keyboard; end # yk = xk + projection; # Ayk = Afun(yk); # # DEBUGGING ## if projIter == 50 ## fprintf('\n Maxed out iterations at y\n'); ## keyboard ## end # end # else # lambda = max(0,Lmu*(norm(b-Acp)/delta - 1));gamma = lambda/(lambda + Lmu); # yk = lambda/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp; # # for calculating the residual, we'll avoid calling A() # # by storing A(yk) here (using A'*A = I): # Ayk = lambda/Lmu*(1-gamma)*b + Acp - gamma*Acp; # end # else # # if delta is 0, the projection is simplified: # yk = AtAAtb + cp - AtAcp; # Ayk = b; # end # # # DEBUGGING ## if norm( Ayk - b ) > (1.05)*delta ## fprintf('\nAyk failed projection test\n'); ## keyboard; ## end # # #--- Stopping criterion # qp = abs(fx - mean(fmean))/mean(fmean); # # switch stopTest # case 1 # # look at the relative change in function value # if qp <= TolVar && OK; break;end # if qp <= TolVar && ~OK; OK=1; end # case 2 # # look at the l_inf change from previous iterate # if k >= 1 && norm( xk - xold, 'inf' ) <= TolVar # break # end # end # fmean = [fx,fmean]; # if (length(fmean) > 10) fmean = fmean(1:10);end # # # # #--- Updating zk # # apk =0.5*(k+1); # Ak = Ak + apk; # tauk = 2/(k+3); # # wk = apk*df + wk; # # # # # zk = Argmin_x Lmu/2 ||b - Ax||_l2^2 + Lmu/2||x - xplug ||_2^2+ <wk,x-xk> # # s.t. ||b-Ax||_l2 < delta # # # # cp = xplug - 1/Lmu*wk; # # Acp = Afun( cp ); # if ~isempty( AAtinv ) && isempty(USV) # AtAcp = Atfun( AAtinv_fun( Acp ) ); # else # AtAcp = Atfun( Acp ); # end # # if delta > 0 # if ~isempty(USV) # # Make the substitution wk <-- wk/K # ## dfp = (xplug - Lmu1*wk); # = cp ## Adfp= (Axplug - Acp); # dfp = cp; Adfp = Acp; # bp = b; # deltap = delta; ## dfp = SLmu*xplug - SLmu1*wk; ## bp = SLmu*b; ## deltap = SLmu*delta; # # # See if we even need to project: # if norm( Adfp - bp ) < deltap # zk = dfp; # Azk = Adfp; # else # [projection,projIter,lambdaZ] = fastProjection(Q,S,V,dfp,bp,... # deltap, .999*lambdaZ ); # if lambdaZ > 0, disp('lambda is positive!'); keyboard; end # zk = projection; # # zk = SLmu1*projection; # Azk = Afun(zk); # # # DEBUGGING: ## if projIter == 50 ## fprintf('\n Maxed out iterations at z\n'); ## keyboard ## end # end # else # lambda = max(0,Lmu*(norm(b-Acp)/delta - 1));gamma = lambda/(lambda + Lmu); # zk = lambda/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp; # # for calculating the residual, we'll avoid calling A() # # by storing A(zk) here (using A'*A = I): # Azk = lambda/Lmu*(1-gamma)*b + Acp - gamma*Acp; # end # else # # if delta is 0, this is simplified: # zk = AtAAtb + cp - AtAcp; # Azk = b; # end # # # DEBUGGING ## if norm( Ayk - b ) > (1.05)*delta ## fprintf('\nAzk failed projection test\n'); ## keyboard; ## end # # #--- Updating xk # # xkp = tauk*zk + (1-tauk)*yk; # xold = xk; # xk=xkp; # Axk = tauk*Azk + (1-tauk)*Ayk; # # if ~mod(k,10), Axk = Afun(xk); end # otherwise slowly lose precision # # DEBUG ## if norm(Axk - Afun(xk) ) > 1e-6, disp('error with Axk'); keyboard; end # # #--- display progress if desired # if ~mod(k+1,Verbose ) # printf('Iter: #3d ~ fmu: #.3e ~ Rel. Variation of fmu: #.2e ~ Residual: #.2e',... # k+1,fx,qp,residuals(k+1,1) ); # #--- if user has supplied a function to calculate the error, # # apply it to the current iterate and dislay the output: # if DISPLAY_ERROR, printf(' ~ Error: #.2e',errFcn(xk)); end # printf('\n'); # end # if abs(fx)>1e20 || abs(residuals(k+1,1)) >1e20 || isnan(fx) # error('Nesta: possible divergence or NaN. Bad estimate of ||A''A||?'); # end # #end # #niter = k+1; # ##-- truncate output vectors #residuals = residuals(1:niter,:); #if RECORD_DATA, outputData = outputData(1:niter,:); end # End of original Matab code #--------------------- #fid = setOpts('fid',1); #function printf(varargin), fprintf(fid,varargin{:}); end opts,maxiter,userSet = setOpts(opts,'maxiter',10000,0); opts,TolVar,userSet = setOpts(opts,'TolVar',1e-5); opts,TypeMin,userSet = setOpts(opts,'TypeMin','L1'); opts,Verbose,userSet = setOpts(opts,'Verbose',True); opts,errFcn,userSet = setOpts(opts,'errFcn',None); opts,outFcn,userSet = setOpts(opts,'outFcn',None); opts,stopTest,userSet = setOpts(opts,'stopTest',1,1,2); opts,U,userSet = setOpts(opts,'U',lambda x: x ); #if ~isa(U,'function_handle') if not hasattr(U,'__call__'): opts,Ut,userSet = setOpts(opts,'Ut',None); else: opts,Ut,userSet = setOpts(opts,'Ut', lambda x: x ); #end opts,xplug,userSet = setOpts(opts,'xplug',None); opts,normU,userSet = setOpts(opts,'normU',1); if delta < 0: print 'delta must be greater or equal to zero' raise NestaError('delta must be greater or equal to zero') if hasattr(A,'__call__'): Atfun = At; Afun = A; else: Atfun = lambda x: numpy.dot(A.T,x) Afun = lambda x: numpy.dot(A,x) #end Atb = Atfun(b); opts,AAtinv,userSet = setOpts(opts,'AAtinv',None); opts,USV,userSet = setOpts(opts,'USV',None); if USV is not None: #if isstruct(USV) Q = USV['U']; # we can't use "U" as the variable name # since "U" already refers to the analysis operator S = USV['S']; #if isvector(S), s = S; S = diag(s); #else s = diag(S); end if S.ndim is 1: s = S S = numpy.diag(s) else: s = numpy.diag(S) V = USV['V']; #else # error('opts.USV must be a structure'); #end #if isempty(AAtinv) if AAtinv is None: #AAtinv = Q*diag( s.^(-2) )*Q'; AAtinv = numpy.dot(Q, numpy.dot(numpy.diag(s ** -2), Q.T)) #end #end # --- for A not a projection (experimental) #if ~isempty(AAtinv) if AAtinv is not None: #if isa(AAtinv,'function_handle') if hasattr(AAtinv, '__call__'): AAtinv_fun = AAtinv; else: AAtinv_fun = lambda x: numpy.dot(AAtinv,x) #end AtAAtb = Atfun( AAtinv_fun(b) ); else: # We assume it's a projection AtAAtb = Atb; AAtinv_fun = lambda x: x; #end if xplug == None: xplug = AtAAtb.copy(); #end #---- Initialization #N = length(xplug); N = len(xplug) #wk = zeros(N,1); wk = numpy.zeros(N) xk = xplug.copy() #---- Init Variables Ak = 0.0; Lmu = normU/mu; yk = xk.copy(); zk = xk.copy(); fmean = numpy.finfo(float).tiny/10.0; OK = 0; n = math.floor(math.sqrt(N)); #---- Computing Atb Atb = Atfun(b); Axk = Afun(xk);# only needed if you want to see the residuals # Axplug = Axk; #---- TV Minimization if TypeMin == 'TV': print 'Nic:TODO: TV minimization not yet implemented!' raise NestaError('Nic:TODO: TV minimization not yet implemented!') #if strcmpi(TypeMin,'TV') # Lmu = 8*Lmu; # Dv = spdiags([reshape([-ones(n-1,n); zeros(1,n)],N,1) ... # reshape([zeros(1,n); ones(n-1,n)],N,1)], [0 1], N, N); # Dh = spdiags([reshape([-ones(n,n-1) zeros(n,1)],N,1) ... # reshape([zeros(n,1) ones(n,n-1)],N,1)], [0 n], N, N); # D = sparse([Dh;Dv]); #end Lmu1 = 1.0/Lmu; # SLmu = sqrt(Lmu); # SLmu1 = 1/sqrt(Lmu); lambdaY = 0.; lambdaZ = 0.; #---- setup data storage variables #[DISPLAY_ERROR, RECORD_DATA] = deal(false); DISPLAY_ERROR = False RECORD_DATA = False #outputData = deal([]); outputData = None residuals = numpy.zeros((maxiter,2)) #if ~isempty(errFcn), DISPLAY_ERROR = true; end if errFcn is not None: DISPLAY_ERROR = True #if ~isempty(outFcn) && nargout >= 4 if outFcn is not None: # Output max number of arguments RECORD_DATA = True outputData = numpy.zeros(maxiter, outFcn(xplug).shape[1]); #end #for k = 0:maxiter-1, for k in numpy.arange(maxiter): #---- Dual problem #if strcmpi(TypeMin,'L1') [df,fx,val,uk] = Perform_L1_Constraint(xk,mu,U,Ut);end if TypeMin == 'L1': df,fx,val,uk = Perform_L1_Constraint(xk,mu,U,Ut) # Nic: TODO: TV not implemented yet ! #if strcmpi(TypeMin,'TV') [df,fx] = Perform_TV_Constraint(xk,mu,Dv,Dh,D,U,Ut);end #---- Primal Problem #---- Updating yk # # yk = Argmin_x Lmu/2 ||x - xk||_l2^2 + <df,x-xk> s.t. ||b-Ax||_l2 < delta # Let xp be sqrt(Lmu) (x-xk), dfp be df/sqrt(Lmu), bp be sqrt(Lmu)(b- Axk) and deltap be sqrt(Lmu)delta # yk = xk + 1/sqrt(Lmu) Argmin_xp 1/2 || xp ||_2^2 + <dfp,xp> s.t. || bp - Axp ||_2 < deltap # cp = xk - 1./Lmu*df; # this is "q" in eq. (3.7) in the paper Acp = Afun( cp ); #if ~isempty(AAtinv) && isempty(USV) if AAtinv is not None and USV is None: AtAcp = Atfun( AAtinv_fun( Acp ) ); else: AtAcp = Atfun( Acp ); #end #residuals(k+1,1) = norm( b-Axk); # the residual residuals[k,0] = numpy.linalg.norm(b-Axk) #residuals(k+1,2) = fx; # the value of the objective residuals[k,1] = fx #--- if user has supplied a function, apply it to the iterate if RECORD_DATA: outputData[k,:] = outFcn(xk); #end if delta > 0: #if ~isempty(USV) if USV is not None: # there are more efficient methods, but we're assuming # that A is negligible compared to U and Ut. # Here we make the change of variables x <-- x - xk # and df <-- df/L dfp = -Lmu1*df; Adfp = -(Axk - Acp); bp = b - Axk; deltap = delta; # Check if we even need to project: if numpy.linalg.norm( Adfp - bp ) < deltap: lambdaY = 0. projIter = 0; # i.e. projection = dfp; yk = xk + dfp; Ayk = Axk + Adfp; else: lambdaY_old = lambdaY; #[projection,projIter,lambdaY] = fastProjection(Q,S,V,dfp,bp,deltap, .999*lambdaY_old ); projection,projIter,lambdaY = fastProjection(Q,S,V,dfp,bp,deltap, .999*lambdaY_old ) #if lambdaY > 0, disp('lambda is positive!'); keyboard; end if lambdaY > 0: print 'lambda is positive!' raise NestaError('lambda is positive!') yk = xk + projection; Ayk = Afun(yk); # DEBUGGING # if projIter == 50 # fprintf('\n Maxed out iterations at y\n'); # keyboard # end #end else: lambdaa = max(0,Lmu*(numpy.linalg.norm(b-Acp)/delta - 1)) gamma = lambdaa/(lambdaa + Lmu); yk = lambdaa/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp; # for calculating the residual, we'll avoid calling A() # by storing A(yk) here (using A'*A = I): Ayk = lambdaa/Lmu*(1-gamma)*b + Acp - gamma*Acp; #end else: # if delta is 0, the projection is simplified: yk = AtAAtb + cp - AtAcp; Ayk = b.copy(); #end # DEBUGGING # if norm( Ayk - b ) > (1.05)*delta # fprintf('\nAyk failed projection test\n'); # keyboard; # end #--- Stopping criterion if fmean.size == 1: qp = numpy.inf else: qp = abs(fx - numpy.mean(fmean))/numpy.mean(fmean); #switch stopTest # case 1 if stopTest == 1: # look at the relative change in function value #if qp <= TolVar && OK; break;end if qp <= TolVar and OK: break #if qp <= TolVar && ~OK; OK=1; end if qp <= TolVar and not OK: OK = 1 elif stopTest == 2: # look at the l_inf change from previous iterate if k >= 1 and numpy.linalg.norm( xk - xold, 'inf' ) <= TolVar: break #end #end #fmean = [fx,fmean]; fmean = numpy.hstack((fx,fmean)); if (len(fmean) > 10): fmean = fmean[:10] #--- Updating zk apk = 0.5*(k+1); Ak = Ak + apk; tauk = 2.0/(k+3); wk = apk*df + wk; # # zk = Argmin_x Lmu/2 ||b - Ax||_l2^2 + Lmu/2||x - xplug ||_2^2+ <wk,x-xk> # s.t. ||b-Ax||_l2 < delta # cp = xplug - 1.0/Lmu*wk; Acp = Afun( cp ); #if ~isempty( AAtinv ) && isempty(USV) if AAtinv is not None and USV is None: AtAcp = Atfun( AAtinv_fun( Acp ) ); else: AtAcp = Atfun( Acp ); #end if delta > 0: #if ~isempty(USV) if USV is not None: # Make the substitution wk <-- wk/K # dfp = (xplug - Lmu1*wk); # = cp # Adfp= (Axplug - Acp); dfp = cp.copy() Adfp = Acp.copy() bp = b.copy(); deltap = delta; # dfp = SLmu*xplug - SLmu1*wk; # bp = SLmu*b; # deltap = SLmu*delta; # See if we even need to project: if numpy.linalg.norm( Adfp - bp ) < deltap: zk = dfp.copy(); Azk = Adfp.copy(); else: projection,projIter,lambdaZ = fastProjection(Q,S,V,dfp,bp,deltap, .999*lambdaZ ) if lambdaZ > 0: print 'lambda is positive!' raise NestaError('lambda is positive!') zk = projection.copy(); # zk = SLmu1*projection; Azk = Afun(zk); # DEBUGGING: # if projIter == 50 # fprintf('\n Maxed out iterations at z\n'); # keyboard # end #end else: lambdaa = max(0,Lmu*(numpy.linalg.norm(b-Acp)/delta - 1)); gamma = lambdaa/(lambdaa + Lmu); zk = lambdaa/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp; # for calculating the residual, we'll avoid calling A() # by storing A(zk) here (using A'*A = I): Azk = lambdaa/Lmu*(1-gamma)*b + Acp - gamma*Acp; #end else: # if delta is 0, this is simplified: zk = AtAAtb + cp - AtAcp; Azk = b; #end # DEBUGGING # if norm( Ayk - b ) > (1.05)*delta # fprintf('\nAzk failed projection test\n'); # keyboard; # end #--- Updating xk xkp = tauk*zk + (1-tauk)*yk; xold = xk.copy(); xk = xkp.copy(); Axk = tauk*Azk + (1-tauk)*Ayk; #if ~mod(k,10), Axk = Afun(xk); end # otherwise slowly lose precision if not numpy.mod(k,10): Axk = Afun(xk) # DEBUG # if norm(Axk - Afun(xk) ) > 1e-6, disp('error with Axk'); keyboard; end #--- display progress if desired #if ~mod(k+1,Verbose ) if Verbose and not numpy.mod(k+1,Verbose): #printf('Iter: #3d ~ fmu: #.3e ~ Rel. Variation of fmu: #.2e ~ Residual: #.2e',k+1,fx,qp,residuals(k+1,1) ); print 'Iter: ',k+1,' ~ fmu: ',fx,' ~ Rel. Variation of fmu: ',qp,' ~ Residual:',residuals[k,0] #--- if user has supplied a function to calculate the error, # apply it to the current iterate and dislay the output: #if DISPLAY_ERROR, printf(' ~ Error: #.2e',errFcn(xk)); end if DISPLAY_ERROR: print ' ~ Error:',errFcn(xk) #end if abs(fx)>1e20 or abs(residuals[k,0]) >1e20 or numpy.isnan(fx): #error('Nesta: possible divergence or NaN. Bad estimate of ||A''A||?'); print 'Nesta: possible divergence or NaN. Bad estimate of ||A''A||?' raise NestaError('Nesta: possible divergence or NaN. Bad estimate of ||A''A||?') #end #end niter = k+1; #-- truncate output vectors residuals = residuals[:niter,:] #if RECORD_DATA, outputData = outputData(1:niter,:); end if RECORD_DATA: outputData = outputData[:niter,:] return xk,niter,residuals,outputData,opts ############ PERFORM THE L1 CONSTRAINT ################## #function [df,fx,val,uk] = Perform_L1_Constraint(xk,mu,U,Ut) def Perform_L1_Constraint(xk,mu,U,Ut): #if isa(U,'function_handle') if hasattr(U,'__call__'): uk = U(xk); else: uk = numpy.dot(U,xk) #end fx = uk.copy() #uk = uk./max(mu,abs(uk)); uk = uk / numpy.maximum(mu,abs(uk)) #val = real(uk'*fx); val = numpy.real(numpy.vdot(uk,fx)) #fx = real(uk'*fx - mu/2*norm(uk)^2); fx = numpy.real(numpy.vdot(uk,fx) - mu/2.*numpy.linalg.norm(uk)**2); #if isa(Ut,'function_handle') if hasattr(U,'__call__'): df = Ut(uk); else: #df = U'*uk; df = numpy.dot(U.T,uk) #end return df,fx,val,uk #end # Nic: TODO: TV not implemented yet! ############ PERFORM THE TV CONSTRAINT ################## #function [df,fx] = Perform_TV_Constraint(xk,mu,Dv,Dh,D,U,Ut) # if isa(U,'function_handle') # x = U(xk); # else # x = U*xk; # end # df = zeros(size(x)); # # Dhx = Dh*x; # Dvx = Dv*x; # # tvx = sum(sqrt(abs(Dhx).^2+abs(Dvx).^2)); # w = max(mu,sqrt(abs(Dhx).^2 + abs(Dvx).^2)); # uh = Dhx ./ w; # uv = Dvx ./ w; # u = [uh;uv]; # fx = real(u'*D*x - mu/2 * 1/numel(u)*sum(u'*u)); # if isa(Ut,'function_handle') # df = Ut(D'*u); # else # df = U'*(D'*u); # end #end #function [x,k,l] = fastProjection( U, S, V, y, b, epsilon, lambda0, DISP ) def fastProjection( U, S, V, y, b, epsilon, lambda0=0, DISP=False ): # [x,niter,lambda] = fastProjection(U, S, V, y, b, epsilon, [lambda0], [DISP] ) # # minimizes || x - y || # such that || Ax - b || <= epsilon # # where USV' = A (i.e the SVD of A) # # The optional input "lambda0" is a guess for the Lagrange parameter # # Warning: for speed, does not calculate A(y) to see if x = y is feasible # # NESTA Version 1.1 # See also Core_Nesterov # Written by Stephen Becker, September 2009, srbecker@caltech.edu #--------------------- # Original Matab code: #DEBUG = true; #if nargin < 8 # DISP = false; #end ## -- Parameters for Newton's method -- #MAXIT = 70; #TOL = 1e-8 * epsilon; ## TOL = max( TOL, 10*eps ); # we can't do better than machine precision # #m = size(U,1); #n = size(V,1); #mn = min([m,n]); #if numel(S) > mn^2, S = diag(diag(S)); end # S should be a small square matrix #r = size(S); #if size(U,2) > r, U = U(:,1:r); end #if size(V,2) > r, V = V(:,1:r); end # #s = diag(S); #s2 = s.^2; # ## What we want to do: ## b = b - A*y; ## bb = U'*b; # ## if A doesn't have full row rank, then b may not be in the range #if size(U,1) > size(U,2) # bRange = U*(U'*b); # bNull = b - bRange; # epsilon = sqrt( epsilon^2 - norm(bNull)^2 ); #end #b = U'*b - S*(V'*y); # parenthesis is very important! This is expensive. # ## b2 = b.^2; #b2 = abs(b).^2; # for complex data #bs2 = b2.*s2; #epsilon2 = epsilon^2; # ## The following routine need to be fast ## For efficiency (at cost of transparency), we are writing the calculations ## in a way that minimize number of operations. The functions "f" ## and "fp" represent f and its derivative. # ## f = @(lambda) sum( b2 .*(1-lambda*s2).^(-2) ) - epsilon^2; ## fp = @(lambda) 2*sum( bs2 .*(1-lambda*s2).^(-3) ); #if nargin < 7, lambda0 = 0; end #l = lambda0; oldff = 0; #one = ones(m,1); #alpha = 1; # take full Newton steps #for k = 1:MAXIT # # make f(l) and fp(l) as efficient as possible: # ls = one./(one-l*s2); # ls2 = ls.^2; # ls3 = ls2.*ls; # ff = b2.'*ls2; # should be .', not ', even for complex data # ff = ff - epsilon2; # fpl = 2*( bs2.'*ls3 ); # should be .', not ', even for complex data ## ff = f(l); # this is a little slower ## fpl = fp(l); # this is a little slower # d = -ff/fpl; # if DISP, fprintf('#2d, lambda is #5.2f, f(lambda) is #.2e, f''(lambda) is #.2e\n',... # k,l,ff,fpl ); end # if abs(ff) < TOL, break; end # stopping criteria # l_old = l; # if k>2 && ( abs(ff) > 10*abs(oldff+100) ) #|| abs(d) > 1e13 ) # l = 0; alpha = 1/2; ## oldff = f(0); # oldff = sum(b2); oldff = oldff - epsilon2; # if DISP, disp('restarting'); end # else # if alpha < 1, alpha = (alpha+1)/2; end # l = l + alpha*d; # oldff = ff; # if l > 0 # l = 0; # shouldn't be positive # oldff = sum(b2); oldff = oldff - epsilon2; # end # end # if l_old == l && l == 0 # if DISP, disp('Making no progress; x = y is probably feasible'); end # break; # end #end ## if k == MAXIT && DEBUG, disp('maxed out iterations'); end #if l < 0 # xhat = -l*s.*b./( 1 - l*s2 ); # x = V*xhat + y; #else # # y is already feasible, so no need to project # l = 0; # x = y; #end # End of original Matab code #--------------------- DEBUG = True; #if nargin < 8 # DISP = false; #end # -- Parameters for Newton's method -- MAXIT = 70; TOL = 1e-8 * epsilon; # TOL = max( TOL, 10*eps ); # we can't do better than machine precision #m = size(U,1); #n = size(V,1); m = U.shape[0] n = V.shape[0] mn = min(m,n); #if numel(S) > mn^2, S = diag(diag(S)); end # S should be a small square matrix if S.size > mn**2: S = numpy.diag(numpy.diag(S)) #r = size(S); r = S.shape[0] # S is square #if size(U,2) > r, U = U(:,1:r); end if U.shape[1] > r: U = U[:,:r] #if size(V,2) > r, V = V(:,1:r); end if V.shape[1] > r: V = V[:,:r] s = numpy.diag(S); s2 = s**2; # What we want to do: # b = b - A*y; # bb = U'*b; # if A doesn't have full row rank, then b may not be in the range #if size(U,1) > size(U,2) if U.shape[0] > U.shape[1]: #bRange = U*(U'*b); bRange = numpy.dot(U, numpy.dot(U.T, b)) bNull = b - bRange; epsilon = math.sqrt( epsilon**2 - numpy.linalg.norm(bNull)**2 ); #end #b = U'*b - S*(V'*y); # parenthesis is very important! This is expensive. b = numpy.dot(U.T,b) - numpy.dot(S, numpy.dot(V.T,y)) # b2 = b.^2; b2 = abs(b)**2; # for complex data bs2 = b2*s2; epsilon2 = epsilon**2; # The following routine need to be fast # For efficiency (at cost of transparency), we are writing the calculations # in a way that minimize number of operations. The functions "f" # and "fp" represent f and its derivative. # f = @(lambda) sum( b2 .*(1-lambda*s2).^(-2) ) - epsilon^2; # fp = @(lambda) 2*sum( bs2 .*(1-lambda*s2).^(-3) ); #if nargin < 7, lambda0 = 0; end l = lambda0; oldff = 0; one = numpy.ones(m); alpha = 1; # take full Newton steps #for k = 1:MAXIT for k in numpy.arange(MAXIT): # make f(l) and fp(l) as efficient as possible: #ls = one./(one-l*s2); ls = one/(one-l*s2) ls2 = ls**2; ls3 = ls2*ls; #ff = b2.'*ls2; # should be .', not ', even for complex data ff = numpy.dot(b2.conj(), ls2) ff = ff - epsilon2; #fpl = 2*( bs2.'*ls3 ); # should be .', not ', even for complex data fpl = 2 * numpy.dot(bs2.conj(),ls3) # ff = f(l); # this is a little slower # fpl = fp(l); # this is a little slower d = -ff/fpl; # if DISP, fprintf('#2d, lambda is #5.2f, f(lambda) is #.2e, f''(lambda) is #.2e\n',k,l,ff,fpl ); end if DISP: print k,', lambda is ',l,', f(lambda) is ',ff,', f''(lambda) is',fpl #if abs(ff) < TOL, break; end # stopping criteria if abs(ff) < TOL: break l_old = l if k>2 and ( abs(ff) > 10*abs(oldff+100) ): #|| abs(d) > 1e13 ) l = 0; alpha = 1.0/2.0; # oldff = f(0); oldff = b2.sum(); oldff = oldff - epsilon2; if DISP: print 'restarting' else: if alpha < 1: alpha = (alpha+1.0)/2.0 l = l + alpha*d; oldff = ff if l > 0: l = 0; # shouldn't be positive oldff = b2.sum() oldff = oldff - epsilon2; #end #end if l_old == l and l == 0: #if DISP, disp('Making no progress; x = y is probably feasible'); end if DISP: print 'Making no progress; x = y is probably feasible' break; #end #end # if k == MAXIT && DEBUG, disp('maxed out iterations'); end if l < 0: #xhat = -l*s.*b./( 1 - l*s2 ); xhat = numpy.dot(-l, s*b/( 1. - numpy.dot(l,s2) ) ) #x = V*xhat + y; x = numpy.dot(V,xhat) + y else: # y is already feasible, so no need to project l = 0; x = y.copy(); #end return x,k,l