pycsalgos: pyCSalgos/NESTA/NESTA.py comparison

comparison pyCSalgos/NESTA/NESTA.py @ 46:88f0ebe1667a

Finished NESTA implementation. Not tested yet

author	nikcleju
date	Tue, 29 Nov 2011 22:06:20 +0000
parents	7524d7749456
children	e6a5f2173015

comparison

equal deleted inserted replaced

-:7524d7749456
+:88f0ebe1667a
 if S.ndim is 1:
 s = S
 else:
 s = numpy.diag(S)
-#V = USV.V;
+V = USV['V'];
 #else
 #    error('opts.USV must be a structure');
 #end
 #end
 x = St(Sx);
 x = x/numpy.linalg.norm(x);
 cnt = cnt+1;
 #end
 #end
+#function [xk,niter,residuals,outputData,opts] = Core_Nesterov(A,At,b,mu,delta,opts)
+def Core_Nesterov(A,At,b,mu,delta,opts):
+# [xk,niter,residuals,outputData,opts] =Core_Nesterov(A,At,b,mu,delta,opts)
+#
+# Solves a L1 minimization problem under a quadratic constraint using the
+# Nesterov algorithm, without continuation:
+#
+#     min_x || U x ||_1 s.t. ||y - Ax||_2 <= delta
+#
+# If continuation is desired, see the function NESTA.m
+#
+# The primal prox-function is also adapted by accounting for a first guess
+# xplug that also tends towards x_muf
+#
+# The observation matrix A is a projector
+#
+# Inputs:   A and At - measurement matrix and adjoint (either a matrix, in which
+#               case At is unused, or function handles).  m x n dimensions.
+#           b   - Observed data, a m x 1 array
+#           muf - The desired value of mu at the last continuation step.
+#               A smaller mu leads to higher accuracy.
+#           delta - l2 error bound.  This enforces how close the variable
+#               must fit the observations b, i.e. || y - Ax ||_2 <= delta
+#               If delta = 0, enforces y = Ax
+#               Common heuristic: delta = sqrt(m + 2*sqrt(2*m))*sigma;
+#               where sigma=std(noise).
+#           opts -
+#               This is a structure that contains additional options,
+#               some of which are optional.
+#               The fieldnames are case insensitive.  Below
+#               are the possible fieldnames:
+#
+#               opts.xplug - the first guess for the primal prox-function, and
+#                 also the initial point for xk.  By default, xplug = At(b)
+#               opts.U and opts.Ut - Analysis/Synthesis operators
+#                 (either matrices of function handles).
+#               opts.normU - if opts.U is provided, this should be norm(U)
+#               opts.maxiter - max number of iterations in an inner loop.
+#                 default is 10,000
+#               opts.TolVar - tolerance for the stopping criteria
+#               opts.stopTest - which stopping criteria to apply
+#                   opts.stopTest == 1 : stop when the relative
+#                       change in the objective function is less than
+#                       TolVar
+#                   opts.stopTest == 2 : stop with the l_infinity norm
+#                       of difference in the xk variable is less
+#                       than TolVar
+#               opts.TypeMin - if this is 'L1' (default), then
+#                   minimizes a smoothed version of the l_1 norm.
+#                   If this is 'tv', then minimizes a smoothed
+#                   version of the total-variation norm.
+#                   The string is case insensitive.
+#               opts.Verbose - if this is 0 or false, then very
+#                   little output is displayed.  If this is 1 or true,
+#                   then output every iteration is displayed.
+#                   If this is a number p greater than 1, then
+#                   output is displayed every pth iteration.
+#               opts.fid - if this is 1 (default), the display is
+#                   the usual Matlab screen.  If this is the file-id
+#                   of a file opened with fopen, then the display
+#                   will be redirected to this file.
+#               opts.errFcn - if this is a function handle,
+#                   then the program will evaluate opts.errFcn(xk)
+#                   at every iteration and display the result.
+#                   ex.  opts.errFcn = @(x) norm( x - x_true )
+#               opts.outFcn - if this is a function handle,
+#                   then then program will evaluate opts.outFcn(xk)
+#                   at every iteration and save the results in outputData.
+#                   If the result is a vector (as opposed to a scalar),
+#                   it should be a row vector and not a column vector.
+#                   ex. opts.outFcn = @(x) [norm( x - xtrue, 'inf' ),...
+#                                           norm( x - xtrue) / norm(xtrue)]
+#               opts.AAtinv - this is an experimental new option.  AAtinv
+#                   is the inverse of AA^*.  This allows the use of a
+#                   matrix A which is not a projection, but only
+#                   for the noiseless (i.e. delta = 0) case.
+#                   If the SVD of A is U*S*V', then AAtinv = U*(S^{-2})*U'.
+#               opts.USV - another experimental option.  This supercedes
+#                   the AAtinv option, so it is recommended that you
+#                   do not define AAtinv.  This allows the use of a matrix
+#                   A which is not a projection, and works for the
+#                   noisy ( i.e. delta > 0 ) case.
+#                   opts.USV should contain three fields:
+#                   opts.USV.U  is the U from [U,S,V] = svd(A)
+#                   likewise, opts.USV.S and opts.USV.V are S and V
+#                   from svd(A).  S may be a matrix or a vector.
+#  Outputs:
+#           xk  - estimate of the solution x
+#           niter - number of iterations
+#           residuals - first column is the residual at every step,
+#               second column is the value of f_mu at every step
+#           outputData - a matrix, where each row r is the output
+#               from opts.outFcn, if supplied.
+#           opts - the structure containing the options that were used
+#
+# Written by: Jerome Bobin, Caltech
+# Email: bobin@acm.caltech.edu
+# Created: February 2009
+# Modified: May 2009, Jerome Bobin and Stephen Becker, Caltech
+# Modified: Nov 2009, Stephen Becker
+#
+# NESTA Version 1.1
+#   See also NESTA
+#---- Set defaults
+# opts = [];
+#---------------------
+# Original Matab code:
+#fid = setOpts('fid',1);
+#function printf(varargin), fprintf(fid,varargin{:}); end
+#maxiter = setOpts('maxiter',10000,0);
+#TolVar = setOpts('TolVar',1e-5);
+#TypeMin = setOpts('TypeMin','L1');
+#Verbose = setOpts('Verbose',true);
+#errFcn = setOpts('errFcn',[]);
+#outFcn = setOpts('outFcn',[]);
+#stopTest = setOpts('stopTest',1,1,2);
+#U = setOpts('U', @(x) x );
+#if ~isa(U,'function_handle')
+#    Ut = setOpts('Ut',[]);
+#else
+#    Ut = setOpts('Ut', @(x) x );
+#end
+#xplug = setOpts('xplug',[]);
+#normU = setOpts('normU',1);
+#
+#if delta < 0, error('delta must be greater or equal to zero'); end
+#
+#if isa(A,'function_handle')
+#    Atfun = At;
+#    Afun = A;
+#else
+#    Atfun = @(x) A'*x;
+#    Afun = @(x) A*x;
+#end
+#Atb = Atfun(b);
+#
+#AAtinv = setOpts('AAtinv',[]);
+#USV = setOpts('USV',[]);
+#if ~isempty(USV)
+#    if isstruct(USV)
+#        Q = USV.U;  # we can't use "U" as the variable name
+#                    # since "U" already refers to the analysis operator
+#        S = USV.S;
+#        if isvector(S), s = S; S = diag(s);
+#        else s = diag(S); end
+#        V = USV.V;
+#    else
+#        error('opts.USV must be a structure');
+#    end
+#    if isempty(AAtinv)
+#        AAtinv = Q*diag( s.^(-2) )*Q';
+#    end
+#end
+## --- for A not a projection (experimental)
+#if ~isempty(AAtinv)
+#    if isa(AAtinv,'function_handle')
+#        AAtinv_fun = AAtinv;
+#    else
+#        AAtinv_fun = @(x) AAtinv * x;
+#    end
+#
+#    AtAAtb = Atfun( AAtinv_fun(b) );
+#
+#else
+#    # We assume it's a projection
+#    AtAAtb = Atb;
+#    AAtinv_fun = @(x) x;
+#end
+#
+#if isempty(xplug)
+#    xplug = AtAAtb;
+#end
+#
+##---- Initialization
+#N = length(xplug);
+#wk = zeros(N,1);
+#xk = xplug;
+#
+#
+##---- Init Variables
+#Ak= 0;
+#Lmu = normU/mu;
+#yk = xk;
+#zk = xk;
+#fmean = realmin/10;
+#OK = 0;
+#n = floor(sqrt(N));
+#
+##---- Computing Atb
+#Atb = Atfun(b);
+#Axk = Afun(xk);# only needed if you want to see the residuals
+## Axplug = Axk;
+#
+#
+##---- TV Minimization
+#if strcmpi(TypeMin,'TV')
+#    Lmu = 8*Lmu;
+#    Dv = spdiags([reshape([-ones(n-1,n); zeros(1,n)],N,1) ...
+#        reshape([zeros(1,n); ones(n-1,n)],N,1)], [0 1], N, N);
+#    Dh = spdiags([reshape([-ones(n,n-1) zeros(n,1)],N,1) ...
+#        reshape([zeros(n,1) ones(n,n-1)],N,1)], [0 n], N, N);
+#    D = sparse([Dh;Dv]);
+#end
+#
+#
+#Lmu1 = 1/Lmu;
+## SLmu = sqrt(Lmu);
+## SLmu1 = 1/sqrt(Lmu);
+#lambdaY = 0;
+#lambdaZ = 0;
+#
+##---- setup data storage variables
+#[DISPLAY_ERROR, RECORD_DATA] = deal(false);
+#outputData = deal([]);
+#residuals = zeros(maxiter,2);
+#if ~isempty(errFcn), DISPLAY_ERROR = true; end
+#if ~isempty(outFcn) && nargout >= 4
+#    RECORD_DATA = true;
+#    outputData = zeros(maxiter, size(outFcn(xplug),2) );
+#end
+#
+#for k = 0:maxiter-1,
+#
+#   #---- Dual problem
+#
+#   if strcmpi(TypeMin,'L1')  [df,fx,val,uk] = Perform_L1_Constraint(xk,mu,U,Ut);end
+#
+#   if strcmpi(TypeMin,'TV')  [df,fx] = Perform_TV_Constraint(xk,mu,Dv,Dh,D,U,Ut);end
+#
+#   #---- Primal Problem
+#
+#   #---- Updating yk
+#
+#    #
+#    # yk = Argmin_x Lmu/2 ||x - xk||_l2^2 + <df,x-xk> s.t. ||b-Ax||_l2 < delta
+#    # Let xp be sqrt(Lmu) (x-xk), dfp be df/sqrt(Lmu), bp be sqrt(Lmu)(b- Axk) and deltap be sqrt(Lmu)delta
+#    # yk =  xk + 1/sqrt(Lmu) Argmin_xp 1/2 || xp ||_2^2 + <dfp,xp> s.t. || bp - Axp ||_2 < deltap
+#    #
+#
+#
+#    cp = xk - 1/Lmu*df;  # this is "q" in eq. (3.7) in the paper
+#
+#    Acp = Afun( cp );
+#    if ~isempty(AAtinv) && isempty(USV)
+#        AtAcp = Atfun( AAtinv_fun( Acp ) );
+#    else
+#        AtAcp = Atfun( Acp );
+#    end
+#
+#    residuals(k+1,1) = norm( b-Axk);    # the residual
+#    residuals(k+1,2) = fx;              # the value of the objective
+#    #--- if user has supplied a function, apply it to the iterate
+#    if RECORD_DATA
+#        outputData(k+1,:) = outFcn(xk);
+#    end
+#
+#    if delta > 0
+#        if ~isempty(USV)
+#            # there are more efficient methods, but we're assuming
+#            # that A is negligible compared to U and Ut.
+#            # Here we make the change of variables x <-- x - xk
+#            #       and                            df <-- df/L
+#            dfp = -Lmu1*df;  Adfp = -(Axk - Acp);
+#            bp = b - Axk;
+#            deltap = delta;
+#            # Check if we even need to project:
+#            if norm( Adfp - bp ) < deltap
+#                lambdaY = 0;  projIter = 0;
+#                # i.e. projection = dfp;
+#                yk = xk + dfp;
+#                Ayk = Axk + Adfp;
+#            else
+#                lambdaY_old = lambdaY;
+#                [projection,projIter,lambdaY] = fastProjection(Q,S,V,dfp,bp,...
+#                    deltap, .999*lambdaY_old );
+#                if lambdaY > 0, disp('lambda is positive!'); keyboard; end
+#                yk = xk + projection;
+#                Ayk = Afun(yk);
+#                # DEBUGGING
+##                 if projIter == 50
+##                     fprintf('\n Maxed out iterations at y\n');
+##                     keyboard
+##                 end
+#            end
+#        else
+#            lambda = max(0,Lmu*(norm(b-Acp)/delta - 1));gamma = lambda/(lambda + Lmu);
+#            yk = lambda/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp;
+#            # for calculating the residual, we'll avoid calling A()
+#            # by storing A(yk) here (using A'*A = I):
+#            Ayk = lambda/Lmu*(1-gamma)*b + Acp - gamma*Acp;
+#        end
+#    else
+#        # if delta is 0, the projection is simplified:
+#        yk = AtAAtb + cp - AtAcp;
+#        Ayk = b;
+#    end
+#
+#    # DEBUGGING
+##     if norm( Ayk - b ) > (1.05)*delta
+##         fprintf('\nAyk failed projection test\n');
+##         keyboard;
+##     end
+#
+#    #--- Stopping criterion
+#    qp = abs(fx - mean(fmean))/mean(fmean);
+#
+#    switch stopTest
+#        case 1
+#            # look at the relative change in function value
+#            if qp <= TolVar && OK; break;end
+#            if qp <= TolVar && ~OK; OK=1; end
+#        case 2
+#            # look at the l_inf change from previous iterate
+#            if k >= 1 && norm( xk - xold, 'inf' ) <= TolVar
+#                break
+#            end
+#    end
+#    fmean = [fx,fmean];
+#    if (length(fmean) > 10) fmean = fmean(1:10);end
+#
+#
+#
+#    #--- Updating zk
+#
+#    apk =0.5*(k+1);
+#    Ak = Ak + apk;
+#    tauk = 2/(k+3);
+#
+#    wk =  apk*df + wk;
+#
+#    #
+#    # zk = Argmin_x Lmu/2 ||b - Ax||_l2^2 + Lmu/2||x - xplug ||_2^2+ <wk,x-xk>
+#    #   s.t. ||b-Ax||_l2 < delta
+#    #
+#
+#    cp = xplug - 1/Lmu*wk;
+#
+#    Acp = Afun( cp );
+#    if ~isempty( AAtinv ) && isempty(USV)
+#        AtAcp = Atfun( AAtinv_fun( Acp ) );
+#    else
+#        AtAcp = Atfun( Acp );
+#    end
+#
+#    if delta > 0
+#        if ~isempty(USV)
+#            # Make the substitution wk <-- wk/K
+#
+##             dfp = (xplug - Lmu1*wk);  # = cp
+##             Adfp= (Axplug - Acp);
+#            dfp = cp; Adfp = Acp;
+#            bp = b;
+#            deltap = delta;
+##             dfp = SLmu*xplug - SLmu1*wk;
+##             bp = SLmu*b;
+##             deltap = SLmu*delta;
+#
+#            # See if we even need to project:
+#            if norm( Adfp - bp ) < deltap
+#                zk = dfp;
+#                Azk = Adfp;
+#            else
+#                [projection,projIter,lambdaZ] = fastProjection(Q,S,V,dfp,bp,...
+#                    deltap, .999*lambdaZ );
+#                if lambdaZ > 0, disp('lambda is positive!'); keyboard; end
+#                zk = projection;
+#                #             zk = SLmu1*projection;
+#                Azk = Afun(zk);
+#
+#                # DEBUGGING:
+##                 if projIter == 50
+##                     fprintf('\n Maxed out iterations at z\n');
+##                     keyboard
+##                 end
+#            end
+#        else
+#            lambda = max(0,Lmu*(norm(b-Acp)/delta - 1));gamma = lambda/(lambda + Lmu);
+#            zk = lambda/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp;
+#            # for calculating the residual, we'll avoid calling A()
+#            # by storing A(zk) here (using A'*A = I):
+#            Azk = lambda/Lmu*(1-gamma)*b + Acp - gamma*Acp;
+#        end
+#    else
+#        # if delta is 0, this is simplified:
+#        zk = AtAAtb + cp - AtAcp;
+#        Azk = b;
+#    end
+#
+#    # DEBUGGING
+##     if norm( Ayk - b ) > (1.05)*delta
+##         fprintf('\nAzk failed projection test\n');
+##         keyboard;
+##     end
+#
+#    #--- Updating xk
+#
+#    xkp = tauk*zk + (1-tauk)*yk;
+#    xold = xk;
+#    xk=xkp;
+#    Axk = tauk*Azk + (1-tauk)*Ayk;
+#
+#    if ~mod(k,10), Axk = Afun(xk); end   # otherwise slowly lose precision
+#    # DEBUG
+##     if norm(Axk - Afun(xk) ) > 1e-6, disp('error with Axk'); keyboard; end
+#
+#    #--- display progress if desired
+#    if ~mod(k+1,Verbose )
+#        printf('Iter: #3d  ~ fmu: #.3e ~ Rel. Variation of fmu: #.2e ~ Residual: #.2e',...
+#            k+1,fx,qp,residuals(k+1,1) );
+#        #--- if user has supplied a function to calculate the error,
+#        # apply it to the current iterate and dislay the output:
+#        if DISPLAY_ERROR, printf(' ~ Error: #.2e',errFcn(xk)); end
+#        printf('\n');
+#    end
+#    if abs(fx)>1e20 || abs(residuals(k+1,1)) >1e20 || isnan(fx)
+#        error('Nesta: possible divergence or NaN.  Bad estimate of ||A''A||?');
+#    end
+#
+#end
+#
+#niter = k+1;
+#
+##-- truncate output vectors
+#residuals = residuals(1:niter,:);
+#if RECORD_DATA,     outputData = outputData(1:niter,:); end
+# End of original Matab code
+#---------------------
+#fid = setOpts('fid',1);
+#function printf(varargin), fprintf(fid,varargin{:}); end
+opts,maxiter,userSet = setOpts(opts,'maxiter',10000,0);
+opts,TolVar,userSet = setOpts(opts,'TolVar',1e-5);
+opts,TypeMin,userSet = setOpts(opts,'TypeMin','L1');
+opts,Verbose,userSet = setOpts(opts,'Verbose',True);
+opts,errFcn,userSet = setOpts(opts,'errFcn',None);
+opts,outFcn,userSet = setOpts(opts,'outFcn',None);
+opts,stopTest,userSet = setOpts(opts,'stopTest',1,1,2);
+opts,U,userSet = setOpts(opts,'U',lambda x: x );
+#if ~isa(U,'function_handle')
+if hasattr(U,'__call__'):
+opts,Ut,userSet = setOpts(opts,'Ut',None);
+else:
+opts,Ut,userSet = setOpts(opts,'Ut', lambda x: x );
+#end
+opts,xplug,userSet = setOpts(opts,'xplug',None);
+opts,normU,userSet = setOpts(opts,'normU',1);
+if delta < 0:
+print 'delta must be greater or equal to zero'
+raise
+if hasattr(A,'__call__'):
+Atfun = At;
+Afun = A;
+else:
+Atfun = lambda x: numpy.dot(A.T,x)
+Afun = lambda x: numpy.dot(A,x)
+#end
+Atb = Atfun(b);
+opts,AAtinv,userSet = setOpts(opts,'AAtinv',None);
+opts,USV,userSet = setOpts(opts,'USV',None);
+if USV is not None:
+#if isstruct(USV)
+Q = USV['U'];  # we can't use "U" as the variable name
+# since "U" already refers to the analysis operator
+S = USV['S'];
+#if isvector(S), s = S; S = diag(s);
+#else s = diag(S); end
+if S.ndim is 1:
+s = S
+else:
+s = numpy.diag(S)
+V = USV['V'];
+#else
+#    error('opts.USV must be a structure');
+#end
+#if isempty(AAtinv)
+if AAtinv is None:
+#AAtinv = Q*diag( s.^(-2) )*Q';
+AAtinv = numpy.dot(Q, numpy.dot(numpy.diag(s ** -2), Q.T))
+#end
+#end
+# --- for A not a projection (experimental)
+#if ~isempty(AAtinv)
+if AAtinv is not None:
+#if isa(AAtinv,'function_handle')
+if hasattr(AAtinv, '__call__'):
+AAtinv_fun = AAtinv;
+else:
+AAtinv_fun = lambda x: numpy.dot(AAtinv,x)
+#end
+AtAAtb = Atfun( AAtinv_fun(b) );
+else:
+# We assume it's a projection
+AtAAtb = Atb;
+AAtinv_fun = lambda x: x;
+#end
+if xplug == None:
+xplug = AtAAtb.copy();
+#end
+#---- Initialization
+#N = length(xplug);
+N = len(xplug)
+#wk = zeros(N,1);
+wk = numpy.zeros(N)
+xk = xplug.copy()
+#---- Init Variables
+Ak = 0.0;
+Lmu = normU/mu;
+yk = xk.copy();
+zk = xk.copy();
+fmean = numpy.finfo(float).tiny/10.0;
+OK = 0;
+n = math.floor(math.sqrt(N));
+#---- Computing Atb
+Atb = Atfun(b);
+Axk = Afun(xk);# only needed if you want to see the residuals
+# Axplug = Axk;
+#---- TV Minimization
+if TypeMin == 'TV':
+print 'Nic:TODO: TV minimization not yet implemented!'
+raise
+#if strcmpi(TypeMin,'TV')
+#    Lmu = 8*Lmu;
+#    Dv = spdiags([reshape([-ones(n-1,n); zeros(1,n)],N,1) ...
+#        reshape([zeros(1,n); ones(n-1,n)],N,1)], [0 1], N, N);
+#    Dh = spdiags([reshape([-ones(n,n-1) zeros(n,1)],N,1) ...
+#        reshape([zeros(n,1) ones(n,n-1)],N,1)], [0 n], N, N);
+#    D = sparse([Dh;Dv]);
+#end
+Lmu1 = 1.0/Lmu;
+# SLmu = sqrt(Lmu);
+# SLmu1 = 1/sqrt(Lmu);
+lambdaY = 0.;
+lambdaZ = 0.;
+#---- setup data storage variables
+#[DISPLAY_ERROR, RECORD_DATA] = deal(false);
+DISPLAY_ERROR = False
+RECORD_DATA = False
+#outputData = deal([]);
+outputData = None
+residuals = numpy.zeros((maxiter,2))
+#if ~isempty(errFcn), DISPLAY_ERROR = true; end
+if errFcn is not None:
+DISPLAY_ERROR = True
+#if ~isempty(outFcn) && nargout >= 4
+if outFcn is not None:  # Output max number of arguments
+RECORD_DATA = True
+outputData = numpy.zeros(maxiter, outFcn(xplug).shape[1]);
+#end
+#for k = 0:maxiter-1,
+for k in numpy.arange(maxiter):
+#---- Dual problem
+#if strcmpi(TypeMin,'L1')  [df,fx,val,uk] = Perform_L1_Constraint(xk,mu,U,Ut);end
+if TypeMin == 'L1':
+df,fx,val,uk = Perform_L1_Constraint(xk,mu,U,Ut)
+# Nic: TODO: TV not implemented yet !
+#if strcmpi(TypeMin,'TV')  [df,fx] = Perform_TV_Constraint(xk,mu,Dv,Dh,D,U,Ut);end
+#---- Primal Problem
+#---- Updating yk
+#
+# yk = Argmin_x Lmu/2 ||x - xk||_l2^2 + <df,x-xk> s.t. ||b-Ax||_l2 < delta
+# Let xp be sqrt(Lmu) (x-xk), dfp be df/sqrt(Lmu), bp be sqrt(Lmu)(b- Axk) and deltap be sqrt(Lmu)delta
+# yk =  xk + 1/sqrt(Lmu) Argmin_xp 1/2 || xp ||_2^2 + <dfp,xp> s.t. || bp - Axp ||_2 < deltap
+#
+cp = xk - 1./Lmu*df;  # this is "q" in eq. (3.7) in the paper
+Acp = Afun( cp );
+#if ~isempty(AAtinv) && isempty(USV)
+if AAtinv is not None and USV is None:
+AtAcp = Atfun( AAtinv_fun( Acp ) );
+else:
+AtAcp = Atfun( Acp );
+#end
+#residuals(k+1,1) = norm( b-Axk);    # the residual
+residuals[k,0] = numpy.linalg.norm(b-Axk)
+#residuals(k+1,2) = fx;              # the value of the objective
+residuals[k,1] = fx
+#--- if user has supplied a function, apply it to the iterate
+if RECORD_DATA:
+outputData[k+1,:] = outFcn(xk);
+#end
+if delta > 0:
+#if ~isempty(USV)
+if USV is not None:
+# there are more efficient methods, but we're assuming
+# that A is negligible compared to U and Ut.
+# Here we make the change of variables x <-- x - xk
+#       and                            df <-- df/L
+dfp = -Lmu1*df;
+Adfp = -(Axk - Acp);
+bp = b - Axk;
+deltap = delta;
+# Check if we even need to project:
+if numpy.linalg.norm( Adfp - bp ) < deltap:
+lambdaY = 0.
+projIter = 0;
+# i.e. projection = dfp;
+yk = xk + dfp;
+Ayk = Axk + Adfp;
+else:
+lambdaY_old = lambdaY.copy();
+#[projection,projIter,lambdaY] = fastProjection(Q,S,V,dfp,bp,deltap, .999*lambdaY_old );
+projection,projIter,lambdaY = fastProjection(Q,S,V,dfp,bp,deltap, .999*lambdaY_old )
+#if lambdaY > 0, disp('lambda is positive!'); keyboard; end
+if lambdaY > 0:
+print 'lambda is positive!'
+raise
+yk = xk + projection;
+Ayk = Afun(yk);
+# DEBUGGING
+#                 if projIter == 50
+#                     fprintf('\n Maxed out iterations at y\n');
+#                     keyboard
+#                 end
+#end
+else:
+lambdaa = max(0,Lmu*(numpy.linalg.norm(b-Acp)/delta - 1))
+gamma = lambdaa/(lambdaa + Lmu);
+yk = lambdaa/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp;
+# for calculating the residual, we'll avoid calling A()
+# by storing A(yk) here (using A'*A = I):
+Ayk = lambdaa/Lmu*(1-gamma)*b + Acp - gamma*Acp;
+#end
+else:
+# if delta is 0, the projection is simplified:
+yk = AtAAtb + cp - AtAcp;
+Ayk = b.copy();
+#end
+# DEBUGGING
+#     if norm( Ayk - b ) > (1.05)*delta
+#         fprintf('\nAyk failed projection test\n');
+#         keyboard;
+#     end
+#--- Stopping criterion
+qp = abs(fx - numpy.mean(fmean))/numpy.mean(fmean);
+#switch stopTest
+#    case 1
+if stopTest == 1:
+# look at the relative change in function value
+#if qp <= TolVar && OK; break;end
+if qp <= TolVar and OK:
+break
+#if qp <= TolVar && ~OK; OK=1; end
+if qp <= TolVar and not OK:
+OK = 1
+elif stopTest == 2:
+# look at the l_inf change from previous iterate
+if k >= 1 and numpy.linalg.norm( xk - xold, 'inf' ) <= TolVar:
+break
+#end
+#end
+#fmean = [fx,fmean];
+fmean = numpy.hstack((fx,fmean));
+if (len(fmean) > 10):
+fmean = fmean[:10]
+#--- Updating zk
+apk = 0.5*(k+1);
+Ak = Ak + apk;
+tauk = 2/(k+3);
+wk =  apk*df + wk;
+#
+# zk = Argmin_x Lmu/2 ||b - Ax||_l2^2 + Lmu/2||x - xplug ||_2^2+ <wk,x-xk>
+#   s.t. ||b-Ax||_l2 < delta
+#
+cp = xplug - 1.0/Lmu*wk;
+Acp = Afun( cp );
+#if ~isempty( AAtinv ) && isempty(USV)
+if AAtinv is not None and USV is None:
+AtAcp = Atfun( AAtinv_fun( Acp ) );
+else:
+AtAcp = Atfun( Acp );
+#end
+if delta > 0:
+#if ~isempty(USV)
+if USV is not None:
+# Make the substitution wk <-- wk/K
+#             dfp = (xplug - Lmu1*wk);  # = cp
+#             Adfp= (Axplug - Acp);
+dfp = cp.copy()
+Adfp = Acp.copy()
+bp = b.copy();
+deltap = delta;
+#             dfp = SLmu*xplug - SLmu1*wk;
+#             bp = SLmu*b;
+#             deltap = SLmu*delta;
+# See if we even need to project:
+if numpy.linalg.norm( Adfp - bp ) < deltap:
+zk = dfp.copy();
+Azk = Adfp.copy();
+else:
+projection,projIter,lambdaZ = fastProjection(Q,S,V,dfp,bp,deltap, .999*lambdaZ )
+if lambdaZ > 0:
+print 'lambda is positive!'
+raise
+zk = projection.copy();
+#             zk = SLmu1*projection;
+Azk = Afun(zk);
+# DEBUGGING:
+#                 if projIter == 50
+#                     fprintf('\n Maxed out iterations at z\n');
+#                     keyboard
+#                 end
+#end
+else:
+lambdaa = max(0,Lmu*(numpy.linalg.norm(b-Acp)/delta - 1));
+gamma = lambdaa/(lambdaa + Lmu);
+zk = lambdaa/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp;
+# for calculating the residual, we'll avoid calling A()
+# by storing A(zk) here (using A'*A = I):
+Azk = lambdaa/Lmu*(1-gamma)*b + Acp - gamma*Acp;
+#end
+else:
+# if delta is 0, this is simplified:
+zk = AtAAtb + cp - AtAcp;
+Azk = b;
+#end
+# DEBUGGING
+#     if norm( Ayk - b ) > (1.05)*delta
+#         fprintf('\nAzk failed projection test\n');
+#         keyboard;
+#     end
+#--- Updating xk
+xkp = tauk*zk + (1-tauk)*yk;
+xold = xk.copy();
+xk = xkp.copy();
+Axk = tauk*Azk + (1-tauk)*Ayk;
+#if ~mod(k,10), Axk = Afun(xk); end   # otherwise slowly lose precision
+if not numpy.mod(k,10):
+Axk = Afun(xk)
+# DEBUG
+#     if norm(Axk - Afun(xk) ) > 1e-6, disp('error with Axk'); keyboard; end
+#--- display progress if desired
+#if ~mod(k+1,Verbose )
+if not numpy.mod(k+1,Verbose):
+#printf('Iter: #3d  ~ fmu: #.3e ~ Rel. Variation of fmu: #.2e ~ Residual: #.2e',k+1,fx,qp,residuals(k+1,1) );
+print 'Iter: ',k+1,'  ~ fmu: ',fx,' ~ Rel. Variation of fmu: ',qp,' ~ Residual:',residuals[k+1,0]
+#--- if user has supplied a function to calculate the error,
+# apply it to the current iterate and dislay the output:
+#if DISPLAY_ERROR, printf(' ~ Error: #.2e',errFcn(xk)); end
+if DISPLAY_ERROR:
+print ' ~ Error:',errFcn(xk)
+#end
+if abs(fx)>1e20 or abs(residuals[k,0]) >1e20 or numpy.isnan(fx):
+#error('Nesta: possible divergence or NaN.  Bad estimate of ||A''A||?');
+print 'Nesta: possible divergence or NaN.  Bad estimate of ||A''A||?'
+raise
+#end
+#end
+niter = k+1;
+#-- truncate output vectors
+residuals = residuals[:niter,:]
+#if RECORD_DATA,     outputData = outputData(1:niter,:); end
+if RECORD_DATA:
+outputData = outputData[:niter,:]
+return xk,niter,residuals,outputData,opts
+############ PERFORM THE L1 CONSTRAINT ##################
+#function [df,fx,val,uk] = Perform_L1_Constraint(xk,mu,U,Ut)
+def Perform_L1_Constraint(xk,mu,U,Ut):
+#if isa(U,'function_handle')
+if hasattr(U,'__call__'):
+uk = U(xk);
+else:
+uk = numpy.dot(U,xk)
+#end
+fx = uk.copy()
+#uk = uk./max(mu,abs(uk));
+uk = uk / max(mu,abs(uk))
+#val = real(uk'*fx);
+val = numpy.real(numpy.vdot(uk,fx))
+#fx = real(uk'*fx - mu/2*norm(uk)^2);
+fx = numpy.real(numpy.vdot(uk,fx) - mu/2.*numpy.linalg.norm(uk)**2);
+#if isa(Ut,'function_handle')
+if hasattr(U,'__call__'):
+df = Ut(uk);
+else:
+#df = U'*uk;
+df = numpy.dot(U.T,uk)
+#end
+return df,fx,val,uk
+#end
+# Nic: TODO: TV not implemented yet!
+############ PERFORM THE TV CONSTRAINT ##################
+#function [df,fx] = Perform_TV_Constraint(xk,mu,Dv,Dh,D,U,Ut)
+#    if isa(U,'function_handle')
+#        x = U(xk);
+#    else
+#        x = U*xk;
+#    end
+#    df = zeros(size(x));
+#
+#    Dhx = Dh*x;
+#    Dvx = Dv*x;
+#
+#    tvx = sum(sqrt(abs(Dhx).^2+abs(Dvx).^2));
+#    w = max(mu,sqrt(abs(Dhx).^2 + abs(Dvx).^2));
+#    uh = Dhx ./ w;
+#    uv = Dvx ./ w;
+#    u = [uh;uv];
+#    fx = real(u'*D*x - mu/2 * 1/numel(u)*sum(u'*u));
+#    if isa(Ut,'function_handle')
+#        df = Ut(D'*u);
+#    else
+#        df = U'*(D'*u);
+#    end
+#end
+#function [x,k,l] = fastProjection( U, S, V, y, b, epsilon, lambda0, DISP )
+def fastProjection( U, S, V, y, b, epsilon, lambda0=0, DISP=False ):
+# [x,niter,lambda] = fastProjection(U, S, V, y, b, epsilon, [lambda0], [DISP] )
+#
+# minimizes || x - y ||
+#   such that || Ax - b || <= epsilon
+#
+# where USV' = A (i.e the SVD of A)
+#
+# The optional input "lambda0" is a guess for the Lagrange parameter
+#
+# Warning: for speed, does not calculate A(y) to see if x = y is feasible
+#
+# NESTA Version 1.1
+#   See also Core_Nesterov
+# Written by Stephen Becker, September 2009, srbecker@caltech.edu
+#---------------------
+# Original Matab code:
+#DEBUG = true;
+#if nargin < 8
+#    DISP = false;
+#end
+## -- Parameters for Newton's method --
+#MAXIT = 70;
+#TOL = 1e-8 * epsilon;
+## TOL = max( TOL, 10*eps );  # we can't do better than machine precision
+#
+#m = size(U,1);
+#n = size(V,1);
+#mn = min([m,n]);
+#if numel(S) > mn^2, S = diag(diag(S)); end  # S should be a small square matrix
+#r = size(S);
+#if size(U,2) > r, U = U(:,1:r); end
+#if size(V,2) > r, V = V(:,1:r); end
+#
+#s = diag(S);
+#s2 = s.^2;
+#
+## What we want to do:
+##   b = b - A*y;
+##   bb = U'*b;
+#
+## if A doesn't have full row rank, then b may not be in the range
+#if size(U,1) > size(U,2)
+#    bRange = U*(U'*b);
+#    bNull = b - bRange;
+#    epsilon = sqrt( epsilon^2 - norm(bNull)^2 );
+#end
+#b = U'*b - S*(V'*y);  # parenthesis is very important!  This is expensive.
+#
+## b2 = b.^2;
+#b2 = abs(b).^2;  # for complex data
+#bs2 = b2.*s2;
+#epsilon2 = epsilon^2;
+#
+## The following routine need to be fast
+## For efficiency (at cost of transparency), we are writing the calculations
+## in a way that minimize number of operations.  The functions "f"
+## and "fp" represent f and its derivative.
+#
+## f = @(lambda) sum( b2 .*(1-lambda*s2).^(-2) ) - epsilon^2;
+## fp = @(lambda) 2*sum( bs2 .*(1-lambda*s2).^(-3) );
+#if nargin < 7, lambda0 = 0; end
+#l = lambda0; oldff = 0;
+#one = ones(m,1);
+#alpha = 1;      # take full Newton steps
+#for k = 1:MAXIT
+#    # make f(l) and fp(l) as efficient as possible:
+#    ls = one./(one-l*s2);
+#    ls2 = ls.^2;
+#    ls3 = ls2.*ls;
+#    ff = b2.'*ls2; # should be .', not ', even for complex data
+#    ff = ff - epsilon2;
+#    fpl = 2*( bs2.'*ls3 );  # should be .', not ', even for complex data
+##     ff = f(l);    # this is a little slower
+##     fpl = fp(l);  # this is a little slower
+#    d = -ff/fpl;
+#    if DISP, fprintf('#2d, lambda is #5.2f, f(lambda) is #.2e, f''(lambda) is #.2e\n',...
+#            k,l,ff,fpl ); end
+#    if abs(ff) < TOL, break; end        # stopping criteria
+#    l_old = l;
+#    if k>2 && ( abs(ff) > 10*abs(oldff+100) ) #|| abs(d) > 1e13 )
+#        l = 0; alpha = 1/2;
+##         oldff = f(0);
+#        oldff = sum(b2); oldff = oldff - epsilon2;
+#        if DISP, disp('restarting'); end
+#    else
+#        if alpha < 1, alpha = (alpha+1)/2; end
+#        l = l + alpha*d;
+#        oldff = ff;
+#        if l > 0
+#            l = 0;  # shouldn't be positive
+#            oldff = sum(b2);  oldff = oldff - epsilon2;
+#        end
+#    end
+#    if l_old == l && l == 0
+#        if DISP, disp('Making no progress; x = y is probably feasible'); end
+#        break;
+#    end
+#end
+## if k == MAXIT && DEBUG, disp('maxed out iterations'); end
+#if l < 0
+#    xhat = -l*s.*b./( 1 - l*s2 );
+#    x = V*xhat + y;
+#else
+#    # y is already feasible, so no need to project
+#    l = 0;
+#    x = y;
+#end
+# End of original Matab code
+#---------------------
+DEBUG = True;
+#if nargin < 8
+#    DISP = false;
+#end
+# -- Parameters for Newton's method --
+MAXIT = 70;
+TOL = 1e-8 * epsilon;
+# TOL = max( TOL, 10*eps );  # we can't do better than machine precision
+#m = size(U,1);
+#n = size(V,1);
+m = U.shape[0]
+n = V.shape[0]
+mn = min(m,n);
+#if numel(S) > mn^2, S = diag(diag(S)); end  # S should be a small square matrix
+if S.size > mn**2:
+S = numpy.diag(numpy.diag(S))
+#r = size(S);
+r = S.shape
+#if size(U,2) > r, U = U(:,1:r); end
+if U.shape[1] > r:
+U = U[:,r]
+#if size(V,2) > r, V = V(:,1:r); end
+if V.shape[1] > r:
+V = V[:,r]
+s = numpy.diag(S);
+s2 = s**2;
+# What we want to do:
+#   b = b - A*y;
+#   bb = U'*b;
+# if A doesn't have full row rank, then b may not be in the range
+#if size(U,1) > size(U,2)
+if U.shape[0] > U.shape[1]:
+#bRange = U*(U'*b);
+bRange = numpy.dot(U, numpy.dot(U.T, b))
+bNull = b - bRange;
+epsilon = math.sqrt( epsilon**2 - numpy.linalg.norm(bNull)**2 );
+#end
+#b = U'*b - S*(V'*y);  # parenthesis is very important!  This is expensive.
+b = numpy.dot(U.T,b) - numpy.dot(S, numpy.dot(V.T,y))
+# b2 = b.^2;
+b2 = abs(b)**2;  # for complex data
+bs2 = b2**s2;
+epsilon2 = epsilon**2;
+# The following routine need to be fast
+# For efficiency (at cost of transparency), we are writing the calculations
+# in a way that minimize number of operations.  The functions "f"
+# and "fp" represent f and its derivative.
+# f = @(lambda) sum( b2 .*(1-lambda*s2).^(-2) ) - epsilon^2;
+# fp = @(lambda) 2*sum( bs2 .*(1-lambda*s2).^(-3) );
+#if nargin < 7, lambda0 = 0; end
+l = lambda0;
+oldff = 0;
+one = numpy.ones(m);
+alpha = 1;      # take full Newton steps
+#for k = 1:MAXIT
+for k in numpy.arange(MAXIT):
+# make f(l) and fp(l) as efficient as possible:
+#ls = one./(one-l*s2);
+ls = one/(one-l*s2)
+ls2 = ls**2;
+ls3 = ls2**ls;
+#ff = b2.'*ls2; # should be .', not ', even for complex data
+ff = numpy.dot(b2.conj(), ls2)
+ff = ff - epsilon2;
+#fpl = 2*( bs2.'*ls3 );  # should be .', not ', even for complex data
+fpl = 2 * numpy.dot(bs2.conj(),ls3)
+#     ff = f(l);    # this is a little slower
+#     fpl = fp(l);  # this is a little slower
+d = -ff/fpl;
+#      if DISP, fprintf('#2d, lambda is #5.2f, f(lambda) is #.2e, f''(lambda) is #.2e\n',k,l,ff,fpl ); end
+if DISP:
+print k,', lambda is ',l,', f(lambda) is ',ff,', f''(lambda) is',fpl
+#if abs(ff) < TOL, break; end        # stopping criteria
+if abs(ff) < TOL:
+break
+l_old = l.copy();
+if k>2 and ( abs(ff) > 10*abs(oldff+100) ): #|| abs(d) > 1e13 )
+l = 0;
+alpha = 1.0/2.0;
+#         oldff = f(0);
+oldff = b2.sum(); oldff = oldff - epsilon2;
+if DISP:
+print 'restarting'
+else:
+if alpha < 1:
+alpha = (alpha+1.0)/2.0
+l = l + alpha*d;
+oldff = ff
+if l > 0:
+l = 0;  # shouldn't be positive
+oldff = b2.sum()
+oldff = oldff - epsilon2;
+#end
+#end
+if l_old == l and l == 0:
+#if DISP, disp('Making no progress; x = y is probably feasible'); end
+if DISP:
+print 'Making no progress; x = y is probably feasible'
+break;
+#end
+#end
+# if k == MAXIT && DEBUG, disp('maxed out iterations'); end
+if l < 0:
+#xhat = -l*s.*b./( 1 - l*s2 );
+xhat = numpy.dot(-l, s*b/( 1. - numpy.dot(l,s2) ) )
+#x = V*xhat + y;
+x = numpy.dot(V,xhat) + y
+else:
+# y is already feasible, so no need to project
+l = 0;
+x = y.copy();
+#end
+return x,k,l

Mercurial > hg > pycsalgos

comparison pyCSalgos/NESTA/NESTA.py @ 46:88f0ebe1667a