changeset 46:88f0ebe1667a

Finished NESTA implementation. Not tested yet
author nikcleju
date Tue, 29 Nov 2011 22:06:20 +0000
parents 7524d7749456
children e6a5f2173015
files matlab/NESTA/fastProjection.m pyCSalgos/NESTA/NESTA.py
diffstat 2 files changed, 1217 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/matlab/NESTA/fastProjection.m	Tue Nov 29 22:06:20 2011 +0000
@@ -0,0 +1,108 @@
+function [x,k,l] = fastProjection( U, S, V, y, b, epsilon, lambda0, DISP )
+% [x,niter,lambda] = fastProjection(U, S, V, y, b, epsilon, [lambda0], [DISP] )
+%
+% minimizes || x - y ||
+%   such that || Ax - b || <= epsilon
+%
+% where USV' = A (i.e the SVD of A)
+%
+% The optional input "lambda0" is a guess for the Lagrange parameter
+%
+% Warning: for speed, does not calculate A(y) to see if x = y is feasible
+%
+% NESTA Version 1.1
+%   See also Core_Nesterov
+
+% Written by Stephen Becker, September 2009, srbecker@caltech.edu
+
+DEBUG = true;
+if nargin < 8
+    DISP = false;
+end
+% -- Parameters for Newton's method --
+MAXIT = 70;
+TOL = 1e-8 * epsilon;
+% TOL = max( TOL, 10*eps );  % we can't do better than machine precision
+
+m = size(U,1);
+n = size(V,1);
+mn = min([m,n]);
+if numel(S) > mn^2, S = diag(diag(S)); end  % S should be a small square matrix
+r = size(S);
+if size(U,2) > r, U = U(:,1:r); end
+if size(V,2) > r, V = V(:,1:r); end
+
+s = diag(S);
+s2 = s.^2;
+
+% What we want to do:
+%   b = b - A*y;
+%   bb = U'*b;
+
+% if A doesn't have full row rank, then b may not be in the range
+if size(U,1) > size(U,2)
+    bRange = U*(U'*b);
+    bNull = b - bRange;
+    epsilon = sqrt( epsilon^2 - norm(bNull)^2 );
+end
+b = U'*b - S*(V'*y);  % parenthesis is very important!  This is expensive.
+    
+% b2 = b.^2;
+b2 = abs(b).^2;  % for complex data
+bs2 = b2.*s2;
+epsilon2 = epsilon^2;
+
+% The following routine need to be fast
+% For efficiency (at cost of transparency), we are writing the calculations
+% in a way that minimize number of operations.  The functions "f"
+% and "fp" represent f and its derivative.
+
+% f = @(lambda) sum( b2 .*(1-lambda*s2).^(-2) ) - epsilon^2;
+% fp = @(lambda) 2*sum( bs2 .*(1-lambda*s2).^(-3) );
+if nargin < 7, lambda0 = 0; end
+l = lambda0; oldff = 0;
+one = ones(m,1);
+alpha = 1;      % take full Newton steps
+for k = 1:MAXIT
+    % make f(l) and fp(l) as efficient as possible:
+    ls = one./(one-l*s2);
+    ls2 = ls.^2;
+    ls3 = ls2.*ls;
+    ff = b2.'*ls2; % should be .', not ', even for complex data
+    ff = ff - epsilon2;
+    fpl = 2*( bs2.'*ls3 );  % should be .', not ', even for complex data
+%     ff = f(l);    % this is a little slower
+%     fpl = fp(l);  % this is a little slower
+    d = -ff/fpl;
+    if DISP, fprintf('%2d, lambda is %5.2f, f(lambda) is %.2e, f''(lambda) is %.2e\n',...
+            k,l,ff,fpl ); end
+    if abs(ff) < TOL, break; end        % stopping criteria
+    l_old = l;
+    if k>2 && ( abs(ff) > 10*abs(oldff+100) ) %|| abs(d) > 1e13 )
+        l = 0; alpha = 1/2;  
+%         oldff = f(0);
+        oldff = sum(b2); oldff = oldff - epsilon2;
+        if DISP, disp('restarting'); end
+    else
+        if alpha < 1, alpha = (alpha+1)/2; end
+        l = l + alpha*d;
+        oldff = ff;
+        if l > 0
+            l = 0;  % shouldn't be positive
+            oldff = sum(b2);  oldff = oldff - epsilon2;
+        end
+    end
+    if l_old == l && l == 0
+        if DISP, disp('Making no progress; x = y is probably feasible'); end
+        break;
+    end
+end
+% if k == MAXIT && DEBUG, disp('maxed out iterations'); end
+if l < 0
+    xhat = -l*s.*b./( 1 - l*s2 );
+    x = V*xhat + y;
+else
+    % y is already feasible, so no need to project
+    l = 0;
+    x = y;
+end
\ No newline at end of file
--- a/pyCSalgos/NESTA/NESTA.py	Tue Nov 29 18:13:17 2011 +0000
+++ b/pyCSalgos/NESTA/NESTA.py	Tue Nov 29 22:06:20 2011 +0000
@@ -327,7 +327,7 @@
       else:
         s = numpy.diag(S)
       
-      #V = USV.V;
+      V = USV['V'];
       #else
       #    error('opts.USV must be a structure');
       #end
@@ -637,3 +637,1111 @@
        cnt = cnt+1;
     #end
 #end
+
+
+
+#function [xk,niter,residuals,outputData,opts] = Core_Nesterov(A,At,b,mu,delta,opts)
+def Core_Nesterov(A,At,b,mu,delta,opts):
+# [xk,niter,residuals,outputData,opts] =Core_Nesterov(A,At,b,mu,delta,opts)
+#
+# Solves a L1 minimization problem under a quadratic constraint using the
+# Nesterov algorithm, without continuation:
+#
+#     min_x || U x ||_1 s.t. ||y - Ax||_2 <= delta
+# 
+# If continuation is desired, see the function NESTA.m
+#
+# The primal prox-function is also adapted by accounting for a first guess
+# xplug that also tends towards x_muf 
+#
+# The observation matrix A is a projector
+#
+# Inputs:   A and At - measurement matrix and adjoint (either a matrix, in which
+#               case At is unused, or function handles).  m x n dimensions.
+#           b   - Observed data, a m x 1 array
+#           muf - The desired value of mu at the last continuation step.
+#               A smaller mu leads to higher accuracy.
+#           delta - l2 error bound.  This enforces how close the variable
+#               must fit the observations b, i.e. || y - Ax ||_2 <= delta
+#               If delta = 0, enforces y = Ax
+#               Common heuristic: delta = sqrt(m + 2*sqrt(2*m))*sigma;
+#               where sigma=std(noise).
+#           opts -
+#               This is a structure that contains additional options,
+#               some of which are optional.
+#               The fieldnames are case insensitive.  Below
+#               are the possible fieldnames:
+#               
+#               opts.xplug - the first guess for the primal prox-function, and
+#                 also the initial point for xk.  By default, xplug = At(b)
+#               opts.U and opts.Ut - Analysis/Synthesis operators
+#                 (either matrices of function handles).
+#               opts.normU - if opts.U is provided, this should be norm(U)
+#               opts.maxiter - max number of iterations in an inner loop.
+#                 default is 10,000
+#               opts.TolVar - tolerance for the stopping criteria
+#               opts.stopTest - which stopping criteria to apply
+#                   opts.stopTest == 1 : stop when the relative
+#                       change in the objective function is less than
+#                       TolVar
+#                   opts.stopTest == 2 : stop with the l_infinity norm
+#                       of difference in the xk variable is less
+#                       than TolVar
+#               opts.TypeMin - if this is 'L1' (default), then
+#                   minimizes a smoothed version of the l_1 norm.
+#                   If this is 'tv', then minimizes a smoothed
+#                   version of the total-variation norm.
+#                   The string is case insensitive.
+#               opts.Verbose - if this is 0 or false, then very
+#                   little output is displayed.  If this is 1 or true,
+#                   then output every iteration is displayed.
+#                   If this is a number p greater than 1, then
+#                   output is displayed every pth iteration.
+#               opts.fid - if this is 1 (default), the display is
+#                   the usual Matlab screen.  If this is the file-id
+#                   of a file opened with fopen, then the display
+#                   will be redirected to this file.
+#               opts.errFcn - if this is a function handle,
+#                   then the program will evaluate opts.errFcn(xk)
+#                   at every iteration and display the result.
+#                   ex.  opts.errFcn = @(x) norm( x - x_true )
+#               opts.outFcn - if this is a function handle, 
+#                   then then program will evaluate opts.outFcn(xk)
+#                   at every iteration and save the results in outputData.
+#                   If the result is a vector (as opposed to a scalar),
+#                   it should be a row vector and not a column vector.
+#                   ex. opts.outFcn = @(x) [norm( x - xtrue, 'inf' ),...
+#                                           norm( x - xtrue) / norm(xtrue)]
+#               opts.AAtinv - this is an experimental new option.  AAtinv
+#                   is the inverse of AA^*.  This allows the use of a 
+#                   matrix A which is not a projection, but only
+#                   for the noiseless (i.e. delta = 0) case.
+#                   If the SVD of A is U*S*V', then AAtinv = U*(S^{-2})*U'.
+#               opts.USV - another experimental option.  This supercedes
+#                   the AAtinv option, so it is recommended that you
+#                   do not define AAtinv.  This allows the use of a matrix
+#                   A which is not a projection, and works for the
+#                   noisy ( i.e. delta > 0 ) case.
+#                   opts.USV should contain three fields: 
+#                   opts.USV.U  is the U from [U,S,V] = svd(A)
+#                   likewise, opts.USV.S and opts.USV.V are S and V
+#                   from svd(A).  S may be a matrix or a vector.
+#  Outputs:
+#           xk  - estimate of the solution x
+#           niter - number of iterations
+#           residuals - first column is the residual at every step,
+#               second column is the value of f_mu at every step
+#           outputData - a matrix, where each row r is the output
+#               from opts.outFcn, if supplied.
+#           opts - the structure containing the options that were used      
+#
+# Written by: Jerome Bobin, Caltech
+# Email: bobin@acm.caltech.edu
+# Created: February 2009
+# Modified: May 2009, Jerome Bobin and Stephen Becker, Caltech
+# Modified: Nov 2009, Stephen Becker
+#
+# NESTA Version 1.1
+#   See also NESTA
+
+#---- Set defaults
+# opts = [];
+
+  #---------------------
+  # Original Matab code:
+
+  #fid = setOpts('fid',1);
+  #function printf(varargin), fprintf(fid,varargin{:}); end
+  #maxiter = setOpts('maxiter',10000,0);
+  #TolVar = setOpts('TolVar',1e-5);
+  #TypeMin = setOpts('TypeMin','L1');
+  #Verbose = setOpts('Verbose',true);
+  #errFcn = setOpts('errFcn',[]);
+  #outFcn = setOpts('outFcn',[]);
+  #stopTest = setOpts('stopTest',1,1,2);
+  #U = setOpts('U', @(x) x );
+  #if ~isa(U,'function_handle')
+  #    Ut = setOpts('Ut',[]);
+  #else
+  #    Ut = setOpts('Ut', @(x) x );
+  #end
+  #xplug = setOpts('xplug',[]);
+  #normU = setOpts('normU',1);
+  #
+  #if delta < 0, error('delta must be greater or equal to zero'); end
+  #
+  #if isa(A,'function_handle')
+  #    Atfun = At;
+  #    Afun = A;
+  #else
+  #    Atfun = @(x) A'*x;
+  #    Afun = @(x) A*x;
+  #end
+  #Atb = Atfun(b);
+  #
+  #AAtinv = setOpts('AAtinv',[]);
+  #USV = setOpts('USV',[]);
+  #if ~isempty(USV)
+  #    if isstruct(USV)
+  #        Q = USV.U;  # we can't use "U" as the variable name
+  #                    # since "U" already refers to the analysis operator
+  #        S = USV.S;
+  #        if isvector(S), s = S; S = diag(s);
+  #        else s = diag(S); end
+  #        V = USV.V;
+  #    else
+  #        error('opts.USV must be a structure');
+  #    end
+  #    if isempty(AAtinv)
+  #        AAtinv = Q*diag( s.^(-2) )*Q';
+  #    end
+  #end
+  ## --- for A not a projection (experimental)
+  #if ~isempty(AAtinv)
+  #    if isa(AAtinv,'function_handle')
+  #        AAtinv_fun = AAtinv;
+  #    else
+  #        AAtinv_fun = @(x) AAtinv * x;
+  #    end
+  #    
+  #    AtAAtb = Atfun( AAtinv_fun(b) );
+  #
+  #else
+  #    # We assume it's a projection
+  #    AtAAtb = Atb;
+  #    AAtinv_fun = @(x) x;
+  #end
+  #
+  #if isempty(xplug)
+  #    xplug = AtAAtb; 
+  #end
+  #
+  ##---- Initialization
+  #N = length(xplug);
+  #wk = zeros(N,1); 
+  #xk = xplug;
+  #
+  #
+  ##---- Init Variables
+  #Ak= 0;
+  #Lmu = normU/mu;
+  #yk = xk;
+  #zk = xk;
+  #fmean = realmin/10;
+  #OK = 0;
+  #n = floor(sqrt(N));
+  #
+  ##---- Computing Atb
+  #Atb = Atfun(b);
+  #Axk = Afun(xk);# only needed if you want to see the residuals
+  ## Axplug = Axk;
+  #
+  #
+  ##---- TV Minimization
+  #if strcmpi(TypeMin,'TV')
+  #    Lmu = 8*Lmu;
+  #    Dv = spdiags([reshape([-ones(n-1,n); zeros(1,n)],N,1) ...
+  #        reshape([zeros(1,n); ones(n-1,n)],N,1)], [0 1], N, N);
+  #    Dh = spdiags([reshape([-ones(n,n-1) zeros(n,1)],N,1) ...
+  #        reshape([zeros(n,1) ones(n,n-1)],N,1)], [0 n], N, N);
+  #    D = sparse([Dh;Dv]);
+  #end
+  #
+  #
+  #Lmu1 = 1/Lmu;
+  ## SLmu = sqrt(Lmu);
+  ## SLmu1 = 1/sqrt(Lmu);
+  #lambdaY = 0;
+  #lambdaZ = 0;
+  #
+  ##---- setup data storage variables
+  #[DISPLAY_ERROR, RECORD_DATA] = deal(false);
+  #outputData = deal([]);
+  #residuals = zeros(maxiter,2);
+  #if ~isempty(errFcn), DISPLAY_ERROR = true; end
+  #if ~isempty(outFcn) && nargout >= 4
+  #    RECORD_DATA = true;
+  #    outputData = zeros(maxiter, size(outFcn(xplug),2) );
+  #end
+  #
+  #for k = 0:maxiter-1,
+  #    
+  #   #---- Dual problem
+  #   
+  #   if strcmpi(TypeMin,'L1')  [df,fx,val,uk] = Perform_L1_Constraint(xk,mu,U,Ut);end
+  #   
+  #   if strcmpi(TypeMin,'TV')  [df,fx] = Perform_TV_Constraint(xk,mu,Dv,Dh,D,U,Ut);end
+  #   
+  #   #---- Primal Problem
+  #   
+  #   #---- Updating yk 
+  #    
+  #    #
+  #    # yk = Argmin_x Lmu/2 ||x - xk||_l2^2 + <df,x-xk> s.t. ||b-Ax||_l2 < delta
+  #    # Let xp be sqrt(Lmu) (x-xk), dfp be df/sqrt(Lmu), bp be sqrt(Lmu)(b- Axk) and deltap be sqrt(Lmu)delta
+  #    # yk =  xk + 1/sqrt(Lmu) Argmin_xp 1/2 || xp ||_2^2 + <dfp,xp> s.t. || bp - Axp ||_2 < deltap
+  #    #
+  #    
+  #    
+  #    cp = xk - 1/Lmu*df;  # this is "q" in eq. (3.7) in the paper
+  #    
+  #    Acp = Afun( cp );
+  #    if ~isempty(AAtinv) && isempty(USV)
+  #        AtAcp = Atfun( AAtinv_fun( Acp ) );
+  #    else
+  #        AtAcp = Atfun( Acp );
+  #    end
+  #    
+  #    residuals(k+1,1) = norm( b-Axk);    # the residual
+  #    residuals(k+1,2) = fx;              # the value of the objective
+  #    #--- if user has supplied a function, apply it to the iterate
+  #    if RECORD_DATA
+  #        outputData(k+1,:) = outFcn(xk);
+  #    end
+  #    
+  #    if delta > 0
+  #        if ~isempty(USV)
+  #            # there are more efficient methods, but we're assuming
+  #            # that A is negligible compared to U and Ut.
+  #            # Here we make the change of variables x <-- x - xk
+  #            #       and                            df <-- df/L
+  #            dfp = -Lmu1*df;  Adfp = -(Axk - Acp);
+  #            bp = b - Axk;
+  #            deltap = delta;
+  #            # Check if we even need to project:
+  #            if norm( Adfp - bp ) < deltap
+  #                lambdaY = 0;  projIter = 0;
+  #                # i.e. projection = dfp;
+  #                yk = xk + dfp;
+  #                Ayk = Axk + Adfp;
+  #            else
+  #                lambdaY_old = lambdaY;
+  #                [projection,projIter,lambdaY] = fastProjection(Q,S,V,dfp,bp,...
+  #                    deltap, .999*lambdaY_old );
+  #                if lambdaY > 0, disp('lambda is positive!'); keyboard; end
+  #                yk = xk + projection;
+  #                Ayk = Afun(yk);
+  #                # DEBUGGING
+  ##                 if projIter == 50
+  ##                     fprintf('\n Maxed out iterations at y\n');
+  ##                     keyboard
+  ##                 end
+  #            end
+  #        else
+  #            lambda = max(0,Lmu*(norm(b-Acp)/delta - 1));gamma = lambda/(lambda + Lmu);
+  #            yk = lambda/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp;
+  #            # for calculating the residual, we'll avoid calling A()
+  #            # by storing A(yk) here (using A'*A = I):
+  #            Ayk = lambda/Lmu*(1-gamma)*b + Acp - gamma*Acp;
+  #        end
+  #    else
+  #        # if delta is 0, the projection is simplified:
+  #        yk = AtAAtb + cp - AtAcp;
+  #        Ayk = b;
+  #    end
+  #
+  #    # DEBUGGING
+  ##     if norm( Ayk - b ) > (1.05)*delta
+  ##         fprintf('\nAyk failed projection test\n');
+  ##         keyboard;
+  ##     end
+  #    
+  #    #--- Stopping criterion
+  #    qp = abs(fx - mean(fmean))/mean(fmean);
+  #    
+  #    switch stopTest
+  #        case 1
+  #            # look at the relative change in function value
+  #            if qp <= TolVar && OK; break;end
+  #            if qp <= TolVar && ~OK; OK=1; end
+  #        case 2
+  #            # look at the l_inf change from previous iterate
+  #            if k >= 1 && norm( xk - xold, 'inf' ) <= TolVar
+  #                break
+  #            end
+  #    end
+  #    fmean = [fx,fmean];
+  #    if (length(fmean) > 10) fmean = fmean(1:10);end
+  #    
+  #
+  #    
+  #    #--- Updating zk
+  #  
+  #    apk =0.5*(k+1);
+  #    Ak = Ak + apk; 
+  #    tauk = 2/(k+3); 
+  #    
+  #    wk =  apk*df + wk;
+  #    
+  #    #
+  #    # zk = Argmin_x Lmu/2 ||b - Ax||_l2^2 + Lmu/2||x - xplug ||_2^2+ <wk,x-xk> 
+  #    #   s.t. ||b-Ax||_l2 < delta
+  #    #
+  #    
+  #    cp = xplug - 1/Lmu*wk;
+  #    
+  #    Acp = Afun( cp );
+  #    if ~isempty( AAtinv ) && isempty(USV)
+  #        AtAcp = Atfun( AAtinv_fun( Acp ) );
+  #    else
+  #        AtAcp = Atfun( Acp );
+  #    end
+  #    
+  #    if delta > 0
+  #        if ~isempty(USV)
+  #            # Make the substitution wk <-- wk/K
+  #                 
+  ##             dfp = (xplug - Lmu1*wk);  # = cp
+  ##             Adfp= (Axplug - Acp);
+  #            dfp = cp; Adfp = Acp; 
+  #            bp = b;
+  #            deltap = delta;            
+  ##             dfp = SLmu*xplug - SLmu1*wk;
+  ##             bp = SLmu*b;
+  ##             deltap = SLmu*delta;
+  #
+  #            # See if we even need to project:
+  #            if norm( Adfp - bp ) < deltap
+  #                zk = dfp;
+  #                Azk = Adfp;
+  #            else
+  #                [projection,projIter,lambdaZ] = fastProjection(Q,S,V,dfp,bp,...
+  #                    deltap, .999*lambdaZ );
+  #                if lambdaZ > 0, disp('lambda is positive!'); keyboard; end
+  #                zk = projection;
+  #                #             zk = SLmu1*projection;
+  #                Azk = Afun(zk);
+  #            
+  #                # DEBUGGING:
+  ##                 if projIter == 50
+  ##                     fprintf('\n Maxed out iterations at z\n');
+  ##                     keyboard
+  ##                 end
+  #            end
+  #        else
+  #            lambda = max(0,Lmu*(norm(b-Acp)/delta - 1));gamma = lambda/(lambda + Lmu);
+  #            zk = lambda/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp;
+  #            # for calculating the residual, we'll avoid calling A()
+  #            # by storing A(zk) here (using A'*A = I):
+  #            Azk = lambda/Lmu*(1-gamma)*b + Acp - gamma*Acp;
+  #        end
+  #    else
+  #        # if delta is 0, this is simplified:
+  #        zk = AtAAtb + cp - AtAcp;
+  #        Azk = b;
+  #    end
+  #    
+  #    # DEBUGGING
+  ##     if norm( Ayk - b ) > (1.05)*delta
+  ##         fprintf('\nAzk failed projection test\n');
+  ##         keyboard;
+  ##     end
+  #
+  #    #--- Updating xk
+  #    
+  #    xkp = tauk*zk + (1-tauk)*yk;
+  #    xold = xk;
+  #    xk=xkp; 
+  #    Axk = tauk*Azk + (1-tauk)*Ayk;
+  #    
+  #    if ~mod(k,10), Axk = Afun(xk); end   # otherwise slowly lose precision
+  #    # DEBUG
+  ##     if norm(Axk - Afun(xk) ) > 1e-6, disp('error with Axk'); keyboard; end
+  #    
+  #    #--- display progress if desired
+  #    if ~mod(k+1,Verbose )
+  #        printf('Iter: #3d  ~ fmu: #.3e ~ Rel. Variation of fmu: #.2e ~ Residual: #.2e',...
+  #            k+1,fx,qp,residuals(k+1,1) ); 
+  #        #--- if user has supplied a function to calculate the error,
+  #        # apply it to the current iterate and dislay the output:
+  #        if DISPLAY_ERROR, printf(' ~ Error: #.2e',errFcn(xk)); end
+  #        printf('\n');
+  #    end
+  #    if abs(fx)>1e20 || abs(residuals(k+1,1)) >1e20 || isnan(fx)
+  #        error('Nesta: possible divergence or NaN.  Bad estimate of ||A''A||?');
+  #    end
+  #
+  #end
+  #
+  #niter = k+1; 
+  #
+  ##-- truncate output vectors
+  #residuals = residuals(1:niter,:);
+  #if RECORD_DATA,     outputData = outputData(1:niter,:); end
+
+  # End of original Matab code
+  #---------------------
+
+  #fid = setOpts('fid',1);
+  #function printf(varargin), fprintf(fid,varargin{:}); end
+  opts,maxiter,userSet = setOpts(opts,'maxiter',10000,0);
+  opts,TolVar,userSet = setOpts(opts,'TolVar',1e-5);
+  opts,TypeMin,userSet = setOpts(opts,'TypeMin','L1');
+  opts,Verbose,userSet = setOpts(opts,'Verbose',True);
+  opts,errFcn,userSet = setOpts(opts,'errFcn',None);
+  opts,outFcn,userSet = setOpts(opts,'outFcn',None);
+  opts,stopTest,userSet = setOpts(opts,'stopTest',1,1,2);
+  opts,U,userSet = setOpts(opts,'U',lambda x: x );
+  #if ~isa(U,'function_handle')
+  if hasattr(U,'__call__'):
+      opts,Ut,userSet = setOpts(opts,'Ut',None);
+  else:
+      opts,Ut,userSet = setOpts(opts,'Ut', lambda x: x );
+  #end
+  opts,xplug,userSet = setOpts(opts,'xplug',None);
+  opts,normU,userSet = setOpts(opts,'normU',1);
+  
+  if delta < 0:
+    print 'delta must be greater or equal to zero'
+    raise
+  
+  if hasattr(A,'__call__'):
+      Atfun = At;
+      Afun = A;
+  else:
+      Atfun = lambda x: numpy.dot(A.T,x)
+      Afun = lambda x: numpy.dot(A,x)
+  #end
+  Atb = Atfun(b);
+  
+  opts,AAtinv,userSet = setOpts(opts,'AAtinv',None);
+  opts,USV,userSet = setOpts(opts,'USV',None);
+  if USV is not None:
+      #if isstruct(USV)
+      Q = USV['U'];  # we can't use "U" as the variable name
+                  # since "U" already refers to the analysis operator
+      S = USV['S'];
+      #if isvector(S), s = S; S = diag(s);
+      #else s = diag(S); end
+      if S.ndim is 1:
+        s = S
+      else:
+        s = numpy.diag(S)
+        
+      V = USV['V'];
+      #else
+      #    error('opts.USV must be a structure');
+      #end
+      #if isempty(AAtinv)
+      if AAtinv is None:
+          #AAtinv = Q*diag( s.^(-2) )*Q';
+          AAtinv = numpy.dot(Q, numpy.dot(numpy.diag(s ** -2), Q.T))
+      #end
+  #end
+  # --- for A not a projection (experimental)
+  #if ~isempty(AAtinv)
+  if AAtinv is not None:
+      #if isa(AAtinv,'function_handle')
+      if hasattr(AAtinv, '__call__'):
+          AAtinv_fun = AAtinv;
+      else:
+          AAtinv_fun = lambda x: numpy.dot(AAtinv,x)
+      #end
+      
+      AtAAtb = Atfun( AAtinv_fun(b) );
+  
+  else:
+      # We assume it's a projection
+      AtAAtb = Atb;
+      AAtinv_fun = lambda x: x;
+  #end
+  
+  if xplug == None:
+      xplug = AtAAtb.copy(); 
+  #end
+  
+  #---- Initialization
+  #N = length(xplug);
+  N = len(xplug)
+  #wk = zeros(N,1); 
+  wk = numpy.zeros(N)
+  xk = xplug.copy()
+  
+  
+  #---- Init Variables
+  Ak = 0.0;
+  Lmu = normU/mu;
+  yk = xk.copy();
+  zk = xk.copy();
+  fmean = numpy.finfo(float).tiny/10.0;
+  OK = 0;
+  n = math.floor(math.sqrt(N));
+  
+  #---- Computing Atb
+  Atb = Atfun(b);
+  Axk = Afun(xk);# only needed if you want to see the residuals
+  # Axplug = Axk;
+  
+  
+  #---- TV Minimization
+  if TypeMin == 'TV':
+    print 'Nic:TODO: TV minimization not yet implemented!'
+    raise
+  #if strcmpi(TypeMin,'TV')
+  #    Lmu = 8*Lmu;
+  #    Dv = spdiags([reshape([-ones(n-1,n); zeros(1,n)],N,1) ...
+  #        reshape([zeros(1,n); ones(n-1,n)],N,1)], [0 1], N, N);
+  #    Dh = spdiags([reshape([-ones(n,n-1) zeros(n,1)],N,1) ...
+  #        reshape([zeros(n,1) ones(n,n-1)],N,1)], [0 n], N, N);
+  #    D = sparse([Dh;Dv]);
+  #end
+  
+  
+  Lmu1 = 1.0/Lmu;
+  # SLmu = sqrt(Lmu);
+  # SLmu1 = 1/sqrt(Lmu);
+  lambdaY = 0.;
+  lambdaZ = 0.;
+  
+  #---- setup data storage variables
+  #[DISPLAY_ERROR, RECORD_DATA] = deal(false);
+  DISPLAY_ERROR = False
+  RECORD_DATA = False
+  #outputData = deal([]);
+  outputData = None
+  residuals = numpy.zeros((maxiter,2))
+  #if ~isempty(errFcn), DISPLAY_ERROR = true; end
+  if errFcn is not None:
+    DISPLAY_ERROR = True
+  #if ~isempty(outFcn) && nargout >= 4
+  if outFcn is not None:  # Output max number of arguments
+      RECORD_DATA = True
+      outputData = numpy.zeros(maxiter, outFcn(xplug).shape[1]);
+  #end
+  
+  #for k = 0:maxiter-1,
+  for k in numpy.arange(maxiter):
+      
+      #---- Dual problem
+     
+      #if strcmpi(TypeMin,'L1')  [df,fx,val,uk] = Perform_L1_Constraint(xk,mu,U,Ut);end
+      if TypeMin == 'L1':
+        df,fx,val,uk = Perform_L1_Constraint(xk,mu,U,Ut)
+     
+      # Nic: TODO: TV not implemented yet !     
+      #if strcmpi(TypeMin,'TV')  [df,fx] = Perform_TV_Constraint(xk,mu,Dv,Dh,D,U,Ut);end
+     
+      #---- Primal Problem
+     
+      #---- Updating yk 
+      
+      #
+      # yk = Argmin_x Lmu/2 ||x - xk||_l2^2 + <df,x-xk> s.t. ||b-Ax||_l2 < delta
+      # Let xp be sqrt(Lmu) (x-xk), dfp be df/sqrt(Lmu), bp be sqrt(Lmu)(b- Axk) and deltap be sqrt(Lmu)delta
+      # yk =  xk + 1/sqrt(Lmu) Argmin_xp 1/2 || xp ||_2^2 + <dfp,xp> s.t. || bp - Axp ||_2 < deltap
+      #
+      
+      
+      cp = xk - 1./Lmu*df;  # this is "q" in eq. (3.7) in the paper
+      
+      Acp = Afun( cp );
+      #if ~isempty(AAtinv) && isempty(USV)
+      if AAtinv is not None and USV is None:
+          AtAcp = Atfun( AAtinv_fun( Acp ) );
+      else:
+          AtAcp = Atfun( Acp );
+      #end
+      
+      #residuals(k+1,1) = norm( b-Axk);    # the residual
+      residuals[k,0] = numpy.linalg.norm(b-Axk)
+      #residuals(k+1,2) = fx;              # the value of the objective
+      residuals[k,1] = fx
+      #--- if user has supplied a function, apply it to the iterate
+      if RECORD_DATA:
+          outputData[k+1,:] = outFcn(xk);
+      #end
+      
+      if delta > 0:
+          #if ~isempty(USV)
+          if USV is not None:
+              # there are more efficient methods, but we're assuming
+              # that A is negligible compared to U and Ut.
+              # Here we make the change of variables x <-- x - xk
+              #       and                            df <-- df/L
+              dfp = -Lmu1*df;
+              Adfp = -(Axk - Acp);
+              bp = b - Axk;
+              deltap = delta;
+              # Check if we even need to project:
+              if numpy.linalg.norm( Adfp - bp ) < deltap:
+                  lambdaY = 0.
+                  projIter = 0;
+                  # i.e. projection = dfp;
+                  yk = xk + dfp;
+                  Ayk = Axk + Adfp;
+              else:
+                  lambdaY_old = lambdaY.copy();
+                  #[projection,projIter,lambdaY] = fastProjection(Q,S,V,dfp,bp,deltap, .999*lambdaY_old );
+                  projection,projIter,lambdaY = fastProjection(Q,S,V,dfp,bp,deltap, .999*lambdaY_old )
+                  #if lambdaY > 0, disp('lambda is positive!'); keyboard; end
+                  if lambdaY > 0:
+                    print 'lambda is positive!'
+                    raise
+                  yk = xk + projection;
+                  Ayk = Afun(yk);
+                  # DEBUGGING
+  #                 if projIter == 50
+  #                     fprintf('\n Maxed out iterations at y\n');
+  #                     keyboard
+  #                 end
+              #end
+          else:
+              lambdaa = max(0,Lmu*(numpy.linalg.norm(b-Acp)/delta - 1))
+              gamma = lambdaa/(lambdaa + Lmu);
+              yk = lambdaa/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp;
+              # for calculating the residual, we'll avoid calling A()
+              # by storing A(yk) here (using A'*A = I):
+              Ayk = lambdaa/Lmu*(1-gamma)*b + Acp - gamma*Acp;
+          #end
+      else:
+          # if delta is 0, the projection is simplified:
+          yk = AtAAtb + cp - AtAcp;
+          Ayk = b.copy();
+      #end
+  
+      # DEBUGGING
+  #     if norm( Ayk - b ) > (1.05)*delta
+  #         fprintf('\nAyk failed projection test\n');
+  #         keyboard;
+  #     end
+      
+      #--- Stopping criterion
+      qp = abs(fx - numpy.mean(fmean))/numpy.mean(fmean);
+      
+      #switch stopTest
+      #    case 1
+      if stopTest == 1:
+              # look at the relative change in function value
+              #if qp <= TolVar && OK; break;end
+              if qp <= TolVar and OK:
+                break
+              #if qp <= TolVar && ~OK; OK=1; end
+              if qp <= TolVar and not OK:
+                OK = 1
+      elif stopTest == 2:
+              # look at the l_inf change from previous iterate
+              if k >= 1 and numpy.linalg.norm( xk - xold, 'inf' ) <= TolVar:
+                  break
+              #end
+      #end
+      #fmean = [fx,fmean];
+      fmean = numpy.hstack((fx,fmean));
+      if (len(fmean) > 10):
+        fmean = fmean[:10]
+      
+  
+      
+      #--- Updating zk
+    
+      apk = 0.5*(k+1);
+      Ak = Ak + apk; 
+      tauk = 2/(k+3); 
+      
+      wk =  apk*df + wk;
+      
+      #
+      # zk = Argmin_x Lmu/2 ||b - Ax||_l2^2 + Lmu/2||x - xplug ||_2^2+ <wk,x-xk> 
+      #   s.t. ||b-Ax||_l2 < delta
+      #
+      
+      cp = xplug - 1.0/Lmu*wk;
+      
+      Acp = Afun( cp );
+      #if ~isempty( AAtinv ) && isempty(USV)
+      if AAtinv is not None and USV is None:
+          AtAcp = Atfun( AAtinv_fun( Acp ) );
+      else:
+          AtAcp = Atfun( Acp );
+      #end
+      
+      if delta > 0:
+          #if ~isempty(USV)
+          if USV is not None:
+              # Make the substitution wk <-- wk/K
+                   
+  #             dfp = (xplug - Lmu1*wk);  # = cp
+  #             Adfp= (Axplug - Acp);
+              dfp = cp.copy()
+              Adfp = Acp.copy() 
+              bp = b.copy();
+              deltap = delta;            
+  #             dfp = SLmu*xplug - SLmu1*wk;
+  #             bp = SLmu*b;
+  #             deltap = SLmu*delta;
+  
+              # See if we even need to project:
+              if numpy.linalg.norm( Adfp - bp ) < deltap:
+                  zk = dfp.copy();
+                  Azk = Adfp.copy();
+              else:
+                  projection,projIter,lambdaZ = fastProjection(Q,S,V,dfp,bp,deltap, .999*lambdaZ )
+                  if lambdaZ > 0:
+                    print 'lambda is positive!'
+                    raise
+                  zk = projection.copy();
+                  #             zk = SLmu1*projection;
+                  Azk = Afun(zk);
+              
+                  # DEBUGGING:
+  #                 if projIter == 50
+  #                     fprintf('\n Maxed out iterations at z\n');
+  #                     keyboard
+  #                 end
+              #end
+          else:
+              lambdaa = max(0,Lmu*(numpy.linalg.norm(b-Acp)/delta - 1));
+              gamma = lambdaa/(lambdaa + Lmu);
+              zk = lambdaa/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp;
+              # for calculating the residual, we'll avoid calling A()
+              # by storing A(zk) here (using A'*A = I):
+              Azk = lambdaa/Lmu*(1-gamma)*b + Acp - gamma*Acp;
+          #end
+      else:
+          # if delta is 0, this is simplified:
+          zk = AtAAtb + cp - AtAcp;
+          Azk = b;
+      #end
+      
+      # DEBUGGING
+  #     if norm( Ayk - b ) > (1.05)*delta
+  #         fprintf('\nAzk failed projection test\n');
+  #         keyboard;
+  #     end
+  
+      #--- Updating xk
+      
+      xkp = tauk*zk + (1-tauk)*yk;
+      xold = xk.copy();
+      xk = xkp.copy(); 
+      Axk = tauk*Azk + (1-tauk)*Ayk;
+      
+      #if ~mod(k,10), Axk = Afun(xk); end   # otherwise slowly lose precision
+      if not numpy.mod(k,10):
+        Axk = Afun(xk)
+      # DEBUG
+  #     if norm(Axk - Afun(xk) ) > 1e-6, disp('error with Axk'); keyboard; end
+      
+      #--- display progress if desired
+      #if ~mod(k+1,Verbose )
+      if not numpy.mod(k+1,Verbose):
+          #printf('Iter: #3d  ~ fmu: #.3e ~ Rel. Variation of fmu: #.2e ~ Residual: #.2e',k+1,fx,qp,residuals(k+1,1) ); 
+          print 'Iter: ',k+1,'  ~ fmu: ',fx,' ~ Rel. Variation of fmu: ',qp,' ~ Residual:',residuals[k+1,0]
+          #--- if user has supplied a function to calculate the error,
+          # apply it to the current iterate and dislay the output:
+          #if DISPLAY_ERROR, printf(' ~ Error: #.2e',errFcn(xk)); end
+          if DISPLAY_ERROR:
+            print ' ~ Error:',errFcn(xk)
+      #end
+      if abs(fx)>1e20 or abs(residuals[k,0]) >1e20 or numpy.isnan(fx):
+          #error('Nesta: possible divergence or NaN.  Bad estimate of ||A''A||?');
+          print 'Nesta: possible divergence or NaN.  Bad estimate of ||A''A||?'
+          raise
+      #end
+  
+  #end
+  
+  niter = k+1; 
+  
+  #-- truncate output vectors
+  residuals = residuals[:niter,:]
+  #if RECORD_DATA,     outputData = outputData(1:niter,:); end
+  if RECORD_DATA:
+    outputData = outputData[:niter,:]
+    
+  return xk,niter,residuals,outputData,opts
+
+
+############ PERFORM THE L1 CONSTRAINT ##################
+
+#function [df,fx,val,uk] = Perform_L1_Constraint(xk,mu,U,Ut)
+def Perform_L1_Constraint(xk,mu,U,Ut):
+
+    #if isa(U,'function_handle')
+    if hasattr(U,'__call__'):
+        uk = U(xk);
+    else:
+        uk = numpy.dot(U,xk)
+    #end
+    fx = uk.copy()
+
+    #uk = uk./max(mu,abs(uk));
+    uk = uk / max(mu,abs(uk))
+    #val = real(uk'*fx);
+    val = numpy.real(numpy.vdot(uk,fx))
+    #fx = real(uk'*fx - mu/2*norm(uk)^2);
+    fx = numpy.real(numpy.vdot(uk,fx) - mu/2.*numpy.linalg.norm(uk)**2);
+
+    #if isa(Ut,'function_handle')
+    if hasattr(U,'__call__'):
+        df = Ut(uk);
+    else:
+        #df = U'*uk;
+        df = numpy.dot(U.T,uk)
+    #end
+    return df,fx,val,uk
+#end
+
+# Nic: TODO: TV not implemented yet!
+############ PERFORM THE TV CONSTRAINT ##################
+#function [df,fx] = Perform_TV_Constraint(xk,mu,Dv,Dh,D,U,Ut)
+#    if isa(U,'function_handle')
+#        x = U(xk);
+#    else
+#        x = U*xk;
+#    end
+#    df = zeros(size(x));
+#
+#    Dhx = Dh*x;
+#    Dvx = Dv*x;
+#            
+#    tvx = sum(sqrt(abs(Dhx).^2+abs(Dvx).^2));
+#    w = max(mu,sqrt(abs(Dhx).^2 + abs(Dvx).^2));
+#    uh = Dhx ./ w;
+#    uv = Dvx ./ w;
+#    u = [uh;uv];
+#    fx = real(u'*D*x - mu/2 * 1/numel(u)*sum(u'*u));
+#    if isa(Ut,'function_handle')
+#        df = Ut(D'*u);
+#    else
+#        df = U'*(D'*u);
+#    end
+#end
+
+
+#function [x,k,l] = fastProjection( U, S, V, y, b, epsilon, lambda0, DISP )
+def fastProjection( U, S, V, y, b, epsilon, lambda0=0, DISP=False ):
+  # [x,niter,lambda] = fastProjection(U, S, V, y, b, epsilon, [lambda0], [DISP] )
+  #
+  # minimizes || x - y ||
+  #   such that || Ax - b || <= epsilon
+  #
+  # where USV' = A (i.e the SVD of A)
+  #
+  # The optional input "lambda0" is a guess for the Lagrange parameter
+  #
+  # Warning: for speed, does not calculate A(y) to see if x = y is feasible
+  #
+  # NESTA Version 1.1
+  #   See also Core_Nesterov
+  
+  # Written by Stephen Becker, September 2009, srbecker@caltech.edu
+  
+  #---------------------
+  # Original Matab code:
+  
+  #DEBUG = true;
+  #if nargin < 8
+  #    DISP = false;
+  #end
+  ## -- Parameters for Newton's method --
+  #MAXIT = 70;
+  #TOL = 1e-8 * epsilon;
+  ## TOL = max( TOL, 10*eps );  # we can't do better than machine precision
+  #
+  #m = size(U,1);
+  #n = size(V,1);
+  #mn = min([m,n]);
+  #if numel(S) > mn^2, S = diag(diag(S)); end  # S should be a small square matrix
+  #r = size(S);
+  #if size(U,2) > r, U = U(:,1:r); end
+  #if size(V,2) > r, V = V(:,1:r); end
+  #
+  #s = diag(S);
+  #s2 = s.^2;
+  #
+  ## What we want to do:
+  ##   b = b - A*y;
+  ##   bb = U'*b;
+  #
+  ## if A doesn't have full row rank, then b may not be in the range
+  #if size(U,1) > size(U,2)
+  #    bRange = U*(U'*b);
+  #    bNull = b - bRange;
+  #    epsilon = sqrt( epsilon^2 - norm(bNull)^2 );
+  #end
+  #b = U'*b - S*(V'*y);  # parenthesis is very important!  This is expensive.
+  #    
+  ## b2 = b.^2;
+  #b2 = abs(b).^2;  # for complex data
+  #bs2 = b2.*s2;
+  #epsilon2 = epsilon^2;
+  #
+  ## The following routine need to be fast
+  ## For efficiency (at cost of transparency), we are writing the calculations
+  ## in a way that minimize number of operations.  The functions "f"
+  ## and "fp" represent f and its derivative.
+  #
+  ## f = @(lambda) sum( b2 .*(1-lambda*s2).^(-2) ) - epsilon^2;
+  ## fp = @(lambda) 2*sum( bs2 .*(1-lambda*s2).^(-3) );
+  #if nargin < 7, lambda0 = 0; end
+  #l = lambda0; oldff = 0;
+  #one = ones(m,1);
+  #alpha = 1;      # take full Newton steps
+  #for k = 1:MAXIT
+  #    # make f(l) and fp(l) as efficient as possible:
+  #    ls = one./(one-l*s2);
+  #    ls2 = ls.^2;
+  #    ls3 = ls2.*ls;
+  #    ff = b2.'*ls2; # should be .', not ', even for complex data
+  #    ff = ff - epsilon2;
+  #    fpl = 2*( bs2.'*ls3 );  # should be .', not ', even for complex data
+  ##     ff = f(l);    # this is a little slower
+  ##     fpl = fp(l);  # this is a little slower
+  #    d = -ff/fpl;
+  #    if DISP, fprintf('#2d, lambda is #5.2f, f(lambda) is #.2e, f''(lambda) is #.2e\n',...
+  #            k,l,ff,fpl ); end
+  #    if abs(ff) < TOL, break; end        # stopping criteria
+  #    l_old = l;
+  #    if k>2 && ( abs(ff) > 10*abs(oldff+100) ) #|| abs(d) > 1e13 )
+  #        l = 0; alpha = 1/2;  
+  ##         oldff = f(0);
+  #        oldff = sum(b2); oldff = oldff - epsilon2;
+  #        if DISP, disp('restarting'); end
+  #    else
+  #        if alpha < 1, alpha = (alpha+1)/2; end
+  #        l = l + alpha*d;
+  #        oldff = ff;
+  #        if l > 0
+  #            l = 0;  # shouldn't be positive
+  #            oldff = sum(b2);  oldff = oldff - epsilon2;
+  #        end
+  #    end
+  #    if l_old == l && l == 0
+  #        if DISP, disp('Making no progress; x = y is probably feasible'); end
+  #        break;
+  #    end
+  #end
+  ## if k == MAXIT && DEBUG, disp('maxed out iterations'); end
+  #if l < 0
+  #    xhat = -l*s.*b./( 1 - l*s2 );
+  #    x = V*xhat + y;
+  #else
+  #    # y is already feasible, so no need to project
+  #    l = 0;
+  #    x = y;
+  #end
+  
+  # End of original Matab code
+  #---------------------
+  
+  DEBUG = True;
+  #if nargin < 8
+  #    DISP = false;
+  #end
+  # -- Parameters for Newton's method --
+  MAXIT = 70;
+  TOL = 1e-8 * epsilon;
+  # TOL = max( TOL, 10*eps );  # we can't do better than machine precision
+  
+  #m = size(U,1);
+  #n = size(V,1);
+  m = U.shape[0]
+  n = V.shape[0]
+  mn = min(m,n);
+  #if numel(S) > mn^2, S = diag(diag(S)); end  # S should be a small square matrix
+  if S.size > mn**2:
+    S = numpy.diag(numpy.diag(S))
+  #r = size(S);
+  r = S.shape
+  #if size(U,2) > r, U = U(:,1:r); end
+  if U.shape[1] > r:
+    U = U[:,r]
+  #if size(V,2) > r, V = V(:,1:r); end
+  if V.shape[1] > r:
+    V = V[:,r]
+  
+  s = numpy.diag(S);
+  s2 = s**2;
+  
+  # What we want to do:
+  #   b = b - A*y;
+  #   bb = U'*b;
+  
+  # if A doesn't have full row rank, then b may not be in the range
+  #if size(U,1) > size(U,2)
+  if U.shape[0] > U.shape[1]:
+      #bRange = U*(U'*b);
+      bRange = numpy.dot(U, numpy.dot(U.T, b))
+      bNull = b - bRange;
+      epsilon = math.sqrt( epsilon**2 - numpy.linalg.norm(bNull)**2 );
+  #end
+  #b = U'*b - S*(V'*y);  # parenthesis is very important!  This is expensive.
+  b = numpy.dot(U.T,b) - numpy.dot(S, numpy.dot(V.T,y))
+      
+  # b2 = b.^2;
+  b2 = abs(b)**2;  # for complex data
+  bs2 = b2**s2;
+  epsilon2 = epsilon**2;
+  
+  # The following routine need to be fast
+  # For efficiency (at cost of transparency), we are writing the calculations
+  # in a way that minimize number of operations.  The functions "f"
+  # and "fp" represent f and its derivative.
+  
+  # f = @(lambda) sum( b2 .*(1-lambda*s2).^(-2) ) - epsilon^2;
+  # fp = @(lambda) 2*sum( bs2 .*(1-lambda*s2).^(-3) );
+  
+  #if nargin < 7, lambda0 = 0; end
+  l = lambda0;
+  oldff = 0;
+  one = numpy.ones(m);
+  alpha = 1;      # take full Newton steps
+  #for k = 1:MAXIT
+  for k in numpy.arange(MAXIT):
+      # make f(l) and fp(l) as efficient as possible:
+      #ls = one./(one-l*s2);
+      ls = one/(one-l*s2)
+      ls2 = ls**2;
+      ls3 = ls2**ls;
+      #ff = b2.'*ls2; # should be .', not ', even for complex data
+      ff = numpy.dot(b2.conj(), ls2)
+      ff = ff - epsilon2;
+      #fpl = 2*( bs2.'*ls3 );  # should be .', not ', even for complex data
+      fpl = 2 * numpy.dot(bs2.conj(),ls3)
+      #     ff = f(l);    # this is a little slower
+      #     fpl = fp(l);  # this is a little slower
+      d = -ff/fpl;
+      #      if DISP, fprintf('#2d, lambda is #5.2f, f(lambda) is #.2e, f''(lambda) is #.2e\n',k,l,ff,fpl ); end
+      if DISP:
+        print k,', lambda is ',l,', f(lambda) is ',ff,', f''(lambda) is',fpl
+      #if abs(ff) < TOL, break; end        # stopping criteria
+      if abs(ff) < TOL:
+        break
+      l_old = l.copy();
+      if k>2 and ( abs(ff) > 10*abs(oldff+100) ): #|| abs(d) > 1e13 )
+          l = 0;
+          alpha = 1.0/2.0;  
+          #         oldff = f(0);
+          oldff = b2.sum(); oldff = oldff - epsilon2;
+          if DISP:
+            print 'restarting'
+      else:
+          if alpha < 1:
+            alpha = (alpha+1.0)/2.0
+          l = l + alpha*d;
+          oldff = ff
+          if l > 0:
+              l = 0;  # shouldn't be positive
+              oldff = b2.sum()
+              oldff = oldff - epsilon2;
+          #end
+      #end
+      if l_old == l and l == 0:
+          #if DISP, disp('Making no progress; x = y is probably feasible'); end
+          if DISP:
+            print 'Making no progress; x = y is probably feasible'
+          break;
+      #end
+  #end
+  # if k == MAXIT && DEBUG, disp('maxed out iterations'); end
+  if l < 0:
+      #xhat = -l*s.*b./( 1 - l*s2 );
+      xhat = numpy.dot(-l, s*b/( 1. - numpy.dot(l,s2) ) )
+      #x = V*xhat + y;
+      x = numpy.dot(V,xhat) + y
+  else:
+      # y is already feasible, so no need to project
+      l = 0;
+      x = y.copy();
+  #end 
+  return x,k,l
\ No newline at end of file