comparison pyCSalgos/NESTA/NESTA.py @ 46:88f0ebe1667a

Finished NESTA implementation. Not tested yet
author nikcleju
date Tue, 29 Nov 2011 22:06:20 +0000
parents 7524d7749456
children e6a5f2173015
comparison
equal deleted inserted replaced
45:7524d7749456 46:88f0ebe1667a
325 if S.ndim is 1: 325 if S.ndim is 1:
326 s = S 326 s = S
327 else: 327 else:
328 s = numpy.diag(S) 328 s = numpy.diag(S)
329 329
330 #V = USV.V; 330 V = USV['V'];
331 #else 331 #else
332 # error('opts.USV must be a structure'); 332 # error('opts.USV must be a structure');
333 #end 333 #end
334 #end 334 #end
335 335
635 x = St(Sx); 635 x = St(Sx);
636 x = x/numpy.linalg.norm(x); 636 x = x/numpy.linalg.norm(x);
637 cnt = cnt+1; 637 cnt = cnt+1;
638 #end 638 #end
639 #end 639 #end
640
641
642
643 #function [xk,niter,residuals,outputData,opts] = Core_Nesterov(A,At,b,mu,delta,opts)
644 def Core_Nesterov(A,At,b,mu,delta,opts):
645 # [xk,niter,residuals,outputData,opts] =Core_Nesterov(A,At,b,mu,delta,opts)
646 #
647 # Solves a L1 minimization problem under a quadratic constraint using the
648 # Nesterov algorithm, without continuation:
649 #
650 # min_x || U x ||_1 s.t. ||y - Ax||_2 <= delta
651 #
652 # If continuation is desired, see the function NESTA.m
653 #
654 # The primal prox-function is also adapted by accounting for a first guess
655 # xplug that also tends towards x_muf
656 #
657 # The observation matrix A is a projector
658 #
659 # Inputs: A and At - measurement matrix and adjoint (either a matrix, in which
660 # case At is unused, or function handles). m x n dimensions.
661 # b - Observed data, a m x 1 array
662 # muf - The desired value of mu at the last continuation step.
663 # A smaller mu leads to higher accuracy.
664 # delta - l2 error bound. This enforces how close the variable
665 # must fit the observations b, i.e. || y - Ax ||_2 <= delta
666 # If delta = 0, enforces y = Ax
667 # Common heuristic: delta = sqrt(m + 2*sqrt(2*m))*sigma;
668 # where sigma=std(noise).
669 # opts -
670 # This is a structure that contains additional options,
671 # some of which are optional.
672 # The fieldnames are case insensitive. Below
673 # are the possible fieldnames:
674 #
675 # opts.xplug - the first guess for the primal prox-function, and
676 # also the initial point for xk. By default, xplug = At(b)
677 # opts.U and opts.Ut - Analysis/Synthesis operators
678 # (either matrices of function handles).
679 # opts.normU - if opts.U is provided, this should be norm(U)
680 # opts.maxiter - max number of iterations in an inner loop.
681 # default is 10,000
682 # opts.TolVar - tolerance for the stopping criteria
683 # opts.stopTest - which stopping criteria to apply
684 # opts.stopTest == 1 : stop when the relative
685 # change in the objective function is less than
686 # TolVar
687 # opts.stopTest == 2 : stop with the l_infinity norm
688 # of difference in the xk variable is less
689 # than TolVar
690 # opts.TypeMin - if this is 'L1' (default), then
691 # minimizes a smoothed version of the l_1 norm.
692 # If this is 'tv', then minimizes a smoothed
693 # version of the total-variation norm.
694 # The string is case insensitive.
695 # opts.Verbose - if this is 0 or false, then very
696 # little output is displayed. If this is 1 or true,
697 # then output every iteration is displayed.
698 # If this is a number p greater than 1, then
699 # output is displayed every pth iteration.
700 # opts.fid - if this is 1 (default), the display is
701 # the usual Matlab screen. If this is the file-id
702 # of a file opened with fopen, then the display
703 # will be redirected to this file.
704 # opts.errFcn - if this is a function handle,
705 # then the program will evaluate opts.errFcn(xk)
706 # at every iteration and display the result.
707 # ex. opts.errFcn = @(x) norm( x - x_true )
708 # opts.outFcn - if this is a function handle,
709 # then then program will evaluate opts.outFcn(xk)
710 # at every iteration and save the results in outputData.
711 # If the result is a vector (as opposed to a scalar),
712 # it should be a row vector and not a column vector.
713 # ex. opts.outFcn = @(x) [norm( x - xtrue, 'inf' ),...
714 # norm( x - xtrue) / norm(xtrue)]
715 # opts.AAtinv - this is an experimental new option. AAtinv
716 # is the inverse of AA^*. This allows the use of a
717 # matrix A which is not a projection, but only
718 # for the noiseless (i.e. delta = 0) case.
719 # If the SVD of A is U*S*V', then AAtinv = U*(S^{-2})*U'.
720 # opts.USV - another experimental option. This supercedes
721 # the AAtinv option, so it is recommended that you
722 # do not define AAtinv. This allows the use of a matrix
723 # A which is not a projection, and works for the
724 # noisy ( i.e. delta > 0 ) case.
725 # opts.USV should contain three fields:
726 # opts.USV.U is the U from [U,S,V] = svd(A)
727 # likewise, opts.USV.S and opts.USV.V are S and V
728 # from svd(A). S may be a matrix or a vector.
729 # Outputs:
730 # xk - estimate of the solution x
731 # niter - number of iterations
732 # residuals - first column is the residual at every step,
733 # second column is the value of f_mu at every step
734 # outputData - a matrix, where each row r is the output
735 # from opts.outFcn, if supplied.
736 # opts - the structure containing the options that were used
737 #
738 # Written by: Jerome Bobin, Caltech
739 # Email: bobin@acm.caltech.edu
740 # Created: February 2009
741 # Modified: May 2009, Jerome Bobin and Stephen Becker, Caltech
742 # Modified: Nov 2009, Stephen Becker
743 #
744 # NESTA Version 1.1
745 # See also NESTA
746
747 #---- Set defaults
748 # opts = [];
749
750 #---------------------
751 # Original Matab code:
752
753 #fid = setOpts('fid',1);
754 #function printf(varargin), fprintf(fid,varargin{:}); end
755 #maxiter = setOpts('maxiter',10000,0);
756 #TolVar = setOpts('TolVar',1e-5);
757 #TypeMin = setOpts('TypeMin','L1');
758 #Verbose = setOpts('Verbose',true);
759 #errFcn = setOpts('errFcn',[]);
760 #outFcn = setOpts('outFcn',[]);
761 #stopTest = setOpts('stopTest',1,1,2);
762 #U = setOpts('U', @(x) x );
763 #if ~isa(U,'function_handle')
764 # Ut = setOpts('Ut',[]);
765 #else
766 # Ut = setOpts('Ut', @(x) x );
767 #end
768 #xplug = setOpts('xplug',[]);
769 #normU = setOpts('normU',1);
770 #
771 #if delta < 0, error('delta must be greater or equal to zero'); end
772 #
773 #if isa(A,'function_handle')
774 # Atfun = At;
775 # Afun = A;
776 #else
777 # Atfun = @(x) A'*x;
778 # Afun = @(x) A*x;
779 #end
780 #Atb = Atfun(b);
781 #
782 #AAtinv = setOpts('AAtinv',[]);
783 #USV = setOpts('USV',[]);
784 #if ~isempty(USV)
785 # if isstruct(USV)
786 # Q = USV.U; # we can't use "U" as the variable name
787 # # since "U" already refers to the analysis operator
788 # S = USV.S;
789 # if isvector(S), s = S; S = diag(s);
790 # else s = diag(S); end
791 # V = USV.V;
792 # else
793 # error('opts.USV must be a structure');
794 # end
795 # if isempty(AAtinv)
796 # AAtinv = Q*diag( s.^(-2) )*Q';
797 # end
798 #end
799 ## --- for A not a projection (experimental)
800 #if ~isempty(AAtinv)
801 # if isa(AAtinv,'function_handle')
802 # AAtinv_fun = AAtinv;
803 # else
804 # AAtinv_fun = @(x) AAtinv * x;
805 # end
806 #
807 # AtAAtb = Atfun( AAtinv_fun(b) );
808 #
809 #else
810 # # We assume it's a projection
811 # AtAAtb = Atb;
812 # AAtinv_fun = @(x) x;
813 #end
814 #
815 #if isempty(xplug)
816 # xplug = AtAAtb;
817 #end
818 #
819 ##---- Initialization
820 #N = length(xplug);
821 #wk = zeros(N,1);
822 #xk = xplug;
823 #
824 #
825 ##---- Init Variables
826 #Ak= 0;
827 #Lmu = normU/mu;
828 #yk = xk;
829 #zk = xk;
830 #fmean = realmin/10;
831 #OK = 0;
832 #n = floor(sqrt(N));
833 #
834 ##---- Computing Atb
835 #Atb = Atfun(b);
836 #Axk = Afun(xk);# only needed if you want to see the residuals
837 ## Axplug = Axk;
838 #
839 #
840 ##---- TV Minimization
841 #if strcmpi(TypeMin,'TV')
842 # Lmu = 8*Lmu;
843 # Dv = spdiags([reshape([-ones(n-1,n); zeros(1,n)],N,1) ...
844 # reshape([zeros(1,n); ones(n-1,n)],N,1)], [0 1], N, N);
845 # Dh = spdiags([reshape([-ones(n,n-1) zeros(n,1)],N,1) ...
846 # reshape([zeros(n,1) ones(n,n-1)],N,1)], [0 n], N, N);
847 # D = sparse([Dh;Dv]);
848 #end
849 #
850 #
851 #Lmu1 = 1/Lmu;
852 ## SLmu = sqrt(Lmu);
853 ## SLmu1 = 1/sqrt(Lmu);
854 #lambdaY = 0;
855 #lambdaZ = 0;
856 #
857 ##---- setup data storage variables
858 #[DISPLAY_ERROR, RECORD_DATA] = deal(false);
859 #outputData = deal([]);
860 #residuals = zeros(maxiter,2);
861 #if ~isempty(errFcn), DISPLAY_ERROR = true; end
862 #if ~isempty(outFcn) && nargout >= 4
863 # RECORD_DATA = true;
864 # outputData = zeros(maxiter, size(outFcn(xplug),2) );
865 #end
866 #
867 #for k = 0:maxiter-1,
868 #
869 # #---- Dual problem
870 #
871 # if strcmpi(TypeMin,'L1') [df,fx,val,uk] = Perform_L1_Constraint(xk,mu,U,Ut);end
872 #
873 # if strcmpi(TypeMin,'TV') [df,fx] = Perform_TV_Constraint(xk,mu,Dv,Dh,D,U,Ut);end
874 #
875 # #---- Primal Problem
876 #
877 # #---- Updating yk
878 #
879 # #
880 # # yk = Argmin_x Lmu/2 ||x - xk||_l2^2 + <df,x-xk> s.t. ||b-Ax||_l2 < delta
881 # # Let xp be sqrt(Lmu) (x-xk), dfp be df/sqrt(Lmu), bp be sqrt(Lmu)(b- Axk) and deltap be sqrt(Lmu)delta
882 # # yk = xk + 1/sqrt(Lmu) Argmin_xp 1/2 || xp ||_2^2 + <dfp,xp> s.t. || bp - Axp ||_2 < deltap
883 # #
884 #
885 #
886 # cp = xk - 1/Lmu*df; # this is "q" in eq. (3.7) in the paper
887 #
888 # Acp = Afun( cp );
889 # if ~isempty(AAtinv) && isempty(USV)
890 # AtAcp = Atfun( AAtinv_fun( Acp ) );
891 # else
892 # AtAcp = Atfun( Acp );
893 # end
894 #
895 # residuals(k+1,1) = norm( b-Axk); # the residual
896 # residuals(k+1,2) = fx; # the value of the objective
897 # #--- if user has supplied a function, apply it to the iterate
898 # if RECORD_DATA
899 # outputData(k+1,:) = outFcn(xk);
900 # end
901 #
902 # if delta > 0
903 # if ~isempty(USV)
904 # # there are more efficient methods, but we're assuming
905 # # that A is negligible compared to U and Ut.
906 # # Here we make the change of variables x <-- x - xk
907 # # and df <-- df/L
908 # dfp = -Lmu1*df; Adfp = -(Axk - Acp);
909 # bp = b - Axk;
910 # deltap = delta;
911 # # Check if we even need to project:
912 # if norm( Adfp - bp ) < deltap
913 # lambdaY = 0; projIter = 0;
914 # # i.e. projection = dfp;
915 # yk = xk + dfp;
916 # Ayk = Axk + Adfp;
917 # else
918 # lambdaY_old = lambdaY;
919 # [projection,projIter,lambdaY] = fastProjection(Q,S,V,dfp,bp,...
920 # deltap, .999*lambdaY_old );
921 # if lambdaY > 0, disp('lambda is positive!'); keyboard; end
922 # yk = xk + projection;
923 # Ayk = Afun(yk);
924 # # DEBUGGING
925 ## if projIter == 50
926 ## fprintf('\n Maxed out iterations at y\n');
927 ## keyboard
928 ## end
929 # end
930 # else
931 # lambda = max(0,Lmu*(norm(b-Acp)/delta - 1));gamma = lambda/(lambda + Lmu);
932 # yk = lambda/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp;
933 # # for calculating the residual, we'll avoid calling A()
934 # # by storing A(yk) here (using A'*A = I):
935 # Ayk = lambda/Lmu*(1-gamma)*b + Acp - gamma*Acp;
936 # end
937 # else
938 # # if delta is 0, the projection is simplified:
939 # yk = AtAAtb + cp - AtAcp;
940 # Ayk = b;
941 # end
942 #
943 # # DEBUGGING
944 ## if norm( Ayk - b ) > (1.05)*delta
945 ## fprintf('\nAyk failed projection test\n');
946 ## keyboard;
947 ## end
948 #
949 # #--- Stopping criterion
950 # qp = abs(fx - mean(fmean))/mean(fmean);
951 #
952 # switch stopTest
953 # case 1
954 # # look at the relative change in function value
955 # if qp <= TolVar && OK; break;end
956 # if qp <= TolVar && ~OK; OK=1; end
957 # case 2
958 # # look at the l_inf change from previous iterate
959 # if k >= 1 && norm( xk - xold, 'inf' ) <= TolVar
960 # break
961 # end
962 # end
963 # fmean = [fx,fmean];
964 # if (length(fmean) > 10) fmean = fmean(1:10);end
965 #
966 #
967 #
968 # #--- Updating zk
969 #
970 # apk =0.5*(k+1);
971 # Ak = Ak + apk;
972 # tauk = 2/(k+3);
973 #
974 # wk = apk*df + wk;
975 #
976 # #
977 # # zk = Argmin_x Lmu/2 ||b - Ax||_l2^2 + Lmu/2||x - xplug ||_2^2+ <wk,x-xk>
978 # # s.t. ||b-Ax||_l2 < delta
979 # #
980 #
981 # cp = xplug - 1/Lmu*wk;
982 #
983 # Acp = Afun( cp );
984 # if ~isempty( AAtinv ) && isempty(USV)
985 # AtAcp = Atfun( AAtinv_fun( Acp ) );
986 # else
987 # AtAcp = Atfun( Acp );
988 # end
989 #
990 # if delta > 0
991 # if ~isempty(USV)
992 # # Make the substitution wk <-- wk/K
993 #
994 ## dfp = (xplug - Lmu1*wk); # = cp
995 ## Adfp= (Axplug - Acp);
996 # dfp = cp; Adfp = Acp;
997 # bp = b;
998 # deltap = delta;
999 ## dfp = SLmu*xplug - SLmu1*wk;
1000 ## bp = SLmu*b;
1001 ## deltap = SLmu*delta;
1002 #
1003 # # See if we even need to project:
1004 # if norm( Adfp - bp ) < deltap
1005 # zk = dfp;
1006 # Azk = Adfp;
1007 # else
1008 # [projection,projIter,lambdaZ] = fastProjection(Q,S,V,dfp,bp,...
1009 # deltap, .999*lambdaZ );
1010 # if lambdaZ > 0, disp('lambda is positive!'); keyboard; end
1011 # zk = projection;
1012 # # zk = SLmu1*projection;
1013 # Azk = Afun(zk);
1014 #
1015 # # DEBUGGING:
1016 ## if projIter == 50
1017 ## fprintf('\n Maxed out iterations at z\n');
1018 ## keyboard
1019 ## end
1020 # end
1021 # else
1022 # lambda = max(0,Lmu*(norm(b-Acp)/delta - 1));gamma = lambda/(lambda + Lmu);
1023 # zk = lambda/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp;
1024 # # for calculating the residual, we'll avoid calling A()
1025 # # by storing A(zk) here (using A'*A = I):
1026 # Azk = lambda/Lmu*(1-gamma)*b + Acp - gamma*Acp;
1027 # end
1028 # else
1029 # # if delta is 0, this is simplified:
1030 # zk = AtAAtb + cp - AtAcp;
1031 # Azk = b;
1032 # end
1033 #
1034 # # DEBUGGING
1035 ## if norm( Ayk - b ) > (1.05)*delta
1036 ## fprintf('\nAzk failed projection test\n');
1037 ## keyboard;
1038 ## end
1039 #
1040 # #--- Updating xk
1041 #
1042 # xkp = tauk*zk + (1-tauk)*yk;
1043 # xold = xk;
1044 # xk=xkp;
1045 # Axk = tauk*Azk + (1-tauk)*Ayk;
1046 #
1047 # if ~mod(k,10), Axk = Afun(xk); end # otherwise slowly lose precision
1048 # # DEBUG
1049 ## if norm(Axk - Afun(xk) ) > 1e-6, disp('error with Axk'); keyboard; end
1050 #
1051 # #--- display progress if desired
1052 # if ~mod(k+1,Verbose )
1053 # printf('Iter: #3d ~ fmu: #.3e ~ Rel. Variation of fmu: #.2e ~ Residual: #.2e',...
1054 # k+1,fx,qp,residuals(k+1,1) );
1055 # #--- if user has supplied a function to calculate the error,
1056 # # apply it to the current iterate and dislay the output:
1057 # if DISPLAY_ERROR, printf(' ~ Error: #.2e',errFcn(xk)); end
1058 # printf('\n');
1059 # end
1060 # if abs(fx)>1e20 || abs(residuals(k+1,1)) >1e20 || isnan(fx)
1061 # error('Nesta: possible divergence or NaN. Bad estimate of ||A''A||?');
1062 # end
1063 #
1064 #end
1065 #
1066 #niter = k+1;
1067 #
1068 ##-- truncate output vectors
1069 #residuals = residuals(1:niter,:);
1070 #if RECORD_DATA, outputData = outputData(1:niter,:); end
1071
1072 # End of original Matab code
1073 #---------------------
1074
1075 #fid = setOpts('fid',1);
1076 #function printf(varargin), fprintf(fid,varargin{:}); end
1077 opts,maxiter,userSet = setOpts(opts,'maxiter',10000,0);
1078 opts,TolVar,userSet = setOpts(opts,'TolVar',1e-5);
1079 opts,TypeMin,userSet = setOpts(opts,'TypeMin','L1');
1080 opts,Verbose,userSet = setOpts(opts,'Verbose',True);
1081 opts,errFcn,userSet = setOpts(opts,'errFcn',None);
1082 opts,outFcn,userSet = setOpts(opts,'outFcn',None);
1083 opts,stopTest,userSet = setOpts(opts,'stopTest',1,1,2);
1084 opts,U,userSet = setOpts(opts,'U',lambda x: x );
1085 #if ~isa(U,'function_handle')
1086 if hasattr(U,'__call__'):
1087 opts,Ut,userSet = setOpts(opts,'Ut',None);
1088 else:
1089 opts,Ut,userSet = setOpts(opts,'Ut', lambda x: x );
1090 #end
1091 opts,xplug,userSet = setOpts(opts,'xplug',None);
1092 opts,normU,userSet = setOpts(opts,'normU',1);
1093
1094 if delta < 0:
1095 print 'delta must be greater or equal to zero'
1096 raise
1097
1098 if hasattr(A,'__call__'):
1099 Atfun = At;
1100 Afun = A;
1101 else:
1102 Atfun = lambda x: numpy.dot(A.T,x)
1103 Afun = lambda x: numpy.dot(A,x)
1104 #end
1105 Atb = Atfun(b);
1106
1107 opts,AAtinv,userSet = setOpts(opts,'AAtinv',None);
1108 opts,USV,userSet = setOpts(opts,'USV',None);
1109 if USV is not None:
1110 #if isstruct(USV)
1111 Q = USV['U']; # we can't use "U" as the variable name
1112 # since "U" already refers to the analysis operator
1113 S = USV['S'];
1114 #if isvector(S), s = S; S = diag(s);
1115 #else s = diag(S); end
1116 if S.ndim is 1:
1117 s = S
1118 else:
1119 s = numpy.diag(S)
1120
1121 V = USV['V'];
1122 #else
1123 # error('opts.USV must be a structure');
1124 #end
1125 #if isempty(AAtinv)
1126 if AAtinv is None:
1127 #AAtinv = Q*diag( s.^(-2) )*Q';
1128 AAtinv = numpy.dot(Q, numpy.dot(numpy.diag(s ** -2), Q.T))
1129 #end
1130 #end
1131 # --- for A not a projection (experimental)
1132 #if ~isempty(AAtinv)
1133 if AAtinv is not None:
1134 #if isa(AAtinv,'function_handle')
1135 if hasattr(AAtinv, '__call__'):
1136 AAtinv_fun = AAtinv;
1137 else:
1138 AAtinv_fun = lambda x: numpy.dot(AAtinv,x)
1139 #end
1140
1141 AtAAtb = Atfun( AAtinv_fun(b) );
1142
1143 else:
1144 # We assume it's a projection
1145 AtAAtb = Atb;
1146 AAtinv_fun = lambda x: x;
1147 #end
1148
1149 if xplug == None:
1150 xplug = AtAAtb.copy();
1151 #end
1152
1153 #---- Initialization
1154 #N = length(xplug);
1155 N = len(xplug)
1156 #wk = zeros(N,1);
1157 wk = numpy.zeros(N)
1158 xk = xplug.copy()
1159
1160
1161 #---- Init Variables
1162 Ak = 0.0;
1163 Lmu = normU/mu;
1164 yk = xk.copy();
1165 zk = xk.copy();
1166 fmean = numpy.finfo(float).tiny/10.0;
1167 OK = 0;
1168 n = math.floor(math.sqrt(N));
1169
1170 #---- Computing Atb
1171 Atb = Atfun(b);
1172 Axk = Afun(xk);# only needed if you want to see the residuals
1173 # Axplug = Axk;
1174
1175
1176 #---- TV Minimization
1177 if TypeMin == 'TV':
1178 print 'Nic:TODO: TV minimization not yet implemented!'
1179 raise
1180 #if strcmpi(TypeMin,'TV')
1181 # Lmu = 8*Lmu;
1182 # Dv = spdiags([reshape([-ones(n-1,n); zeros(1,n)],N,1) ...
1183 # reshape([zeros(1,n); ones(n-1,n)],N,1)], [0 1], N, N);
1184 # Dh = spdiags([reshape([-ones(n,n-1) zeros(n,1)],N,1) ...
1185 # reshape([zeros(n,1) ones(n,n-1)],N,1)], [0 n], N, N);
1186 # D = sparse([Dh;Dv]);
1187 #end
1188
1189
1190 Lmu1 = 1.0/Lmu;
1191 # SLmu = sqrt(Lmu);
1192 # SLmu1 = 1/sqrt(Lmu);
1193 lambdaY = 0.;
1194 lambdaZ = 0.;
1195
1196 #---- setup data storage variables
1197 #[DISPLAY_ERROR, RECORD_DATA] = deal(false);
1198 DISPLAY_ERROR = False
1199 RECORD_DATA = False
1200 #outputData = deal([]);
1201 outputData = None
1202 residuals = numpy.zeros((maxiter,2))
1203 #if ~isempty(errFcn), DISPLAY_ERROR = true; end
1204 if errFcn is not None:
1205 DISPLAY_ERROR = True
1206 #if ~isempty(outFcn) && nargout >= 4
1207 if outFcn is not None: # Output max number of arguments
1208 RECORD_DATA = True
1209 outputData = numpy.zeros(maxiter, outFcn(xplug).shape[1]);
1210 #end
1211
1212 #for k = 0:maxiter-1,
1213 for k in numpy.arange(maxiter):
1214
1215 #---- Dual problem
1216
1217 #if strcmpi(TypeMin,'L1') [df,fx,val,uk] = Perform_L1_Constraint(xk,mu,U,Ut);end
1218 if TypeMin == 'L1':
1219 df,fx,val,uk = Perform_L1_Constraint(xk,mu,U,Ut)
1220
1221 # Nic: TODO: TV not implemented yet !
1222 #if strcmpi(TypeMin,'TV') [df,fx] = Perform_TV_Constraint(xk,mu,Dv,Dh,D,U,Ut);end
1223
1224 #---- Primal Problem
1225
1226 #---- Updating yk
1227
1228 #
1229 # yk = Argmin_x Lmu/2 ||x - xk||_l2^2 + <df,x-xk> s.t. ||b-Ax||_l2 < delta
1230 # Let xp be sqrt(Lmu) (x-xk), dfp be df/sqrt(Lmu), bp be sqrt(Lmu)(b- Axk) and deltap be sqrt(Lmu)delta
1231 # yk = xk + 1/sqrt(Lmu) Argmin_xp 1/2 || xp ||_2^2 + <dfp,xp> s.t. || bp - Axp ||_2 < deltap
1232 #
1233
1234
1235 cp = xk - 1./Lmu*df; # this is "q" in eq. (3.7) in the paper
1236
1237 Acp = Afun( cp );
1238 #if ~isempty(AAtinv) && isempty(USV)
1239 if AAtinv is not None and USV is None:
1240 AtAcp = Atfun( AAtinv_fun( Acp ) );
1241 else:
1242 AtAcp = Atfun( Acp );
1243 #end
1244
1245 #residuals(k+1,1) = norm( b-Axk); # the residual
1246 residuals[k,0] = numpy.linalg.norm(b-Axk)
1247 #residuals(k+1,2) = fx; # the value of the objective
1248 residuals[k,1] = fx
1249 #--- if user has supplied a function, apply it to the iterate
1250 if RECORD_DATA:
1251 outputData[k+1,:] = outFcn(xk);
1252 #end
1253
1254 if delta > 0:
1255 #if ~isempty(USV)
1256 if USV is not None:
1257 # there are more efficient methods, but we're assuming
1258 # that A is negligible compared to U and Ut.
1259 # Here we make the change of variables x <-- x - xk
1260 # and df <-- df/L
1261 dfp = -Lmu1*df;
1262 Adfp = -(Axk - Acp);
1263 bp = b - Axk;
1264 deltap = delta;
1265 # Check if we even need to project:
1266 if numpy.linalg.norm( Adfp - bp ) < deltap:
1267 lambdaY = 0.
1268 projIter = 0;
1269 # i.e. projection = dfp;
1270 yk = xk + dfp;
1271 Ayk = Axk + Adfp;
1272 else:
1273 lambdaY_old = lambdaY.copy();
1274 #[projection,projIter,lambdaY] = fastProjection(Q,S,V,dfp,bp,deltap, .999*lambdaY_old );
1275 projection,projIter,lambdaY = fastProjection(Q,S,V,dfp,bp,deltap, .999*lambdaY_old )
1276 #if lambdaY > 0, disp('lambda is positive!'); keyboard; end
1277 if lambdaY > 0:
1278 print 'lambda is positive!'
1279 raise
1280 yk = xk + projection;
1281 Ayk = Afun(yk);
1282 # DEBUGGING
1283 # if projIter == 50
1284 # fprintf('\n Maxed out iterations at y\n');
1285 # keyboard
1286 # end
1287 #end
1288 else:
1289 lambdaa = max(0,Lmu*(numpy.linalg.norm(b-Acp)/delta - 1))
1290 gamma = lambdaa/(lambdaa + Lmu);
1291 yk = lambdaa/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp;
1292 # for calculating the residual, we'll avoid calling A()
1293 # by storing A(yk) here (using A'*A = I):
1294 Ayk = lambdaa/Lmu*(1-gamma)*b + Acp - gamma*Acp;
1295 #end
1296 else:
1297 # if delta is 0, the projection is simplified:
1298 yk = AtAAtb + cp - AtAcp;
1299 Ayk = b.copy();
1300 #end
1301
1302 # DEBUGGING
1303 # if norm( Ayk - b ) > (1.05)*delta
1304 # fprintf('\nAyk failed projection test\n');
1305 # keyboard;
1306 # end
1307
1308 #--- Stopping criterion
1309 qp = abs(fx - numpy.mean(fmean))/numpy.mean(fmean);
1310
1311 #switch stopTest
1312 # case 1
1313 if stopTest == 1:
1314 # look at the relative change in function value
1315 #if qp <= TolVar && OK; break;end
1316 if qp <= TolVar and OK:
1317 break
1318 #if qp <= TolVar && ~OK; OK=1; end
1319 if qp <= TolVar and not OK:
1320 OK = 1
1321 elif stopTest == 2:
1322 # look at the l_inf change from previous iterate
1323 if k >= 1 and numpy.linalg.norm( xk - xold, 'inf' ) <= TolVar:
1324 break
1325 #end
1326 #end
1327 #fmean = [fx,fmean];
1328 fmean = numpy.hstack((fx,fmean));
1329 if (len(fmean) > 10):
1330 fmean = fmean[:10]
1331
1332
1333
1334 #--- Updating zk
1335
1336 apk = 0.5*(k+1);
1337 Ak = Ak + apk;
1338 tauk = 2/(k+3);
1339
1340 wk = apk*df + wk;
1341
1342 #
1343 # zk = Argmin_x Lmu/2 ||b - Ax||_l2^2 + Lmu/2||x - xplug ||_2^2+ <wk,x-xk>
1344 # s.t. ||b-Ax||_l2 < delta
1345 #
1346
1347 cp = xplug - 1.0/Lmu*wk;
1348
1349 Acp = Afun( cp );
1350 #if ~isempty( AAtinv ) && isempty(USV)
1351 if AAtinv is not None and USV is None:
1352 AtAcp = Atfun( AAtinv_fun( Acp ) );
1353 else:
1354 AtAcp = Atfun( Acp );
1355 #end
1356
1357 if delta > 0:
1358 #if ~isempty(USV)
1359 if USV is not None:
1360 # Make the substitution wk <-- wk/K
1361
1362 # dfp = (xplug - Lmu1*wk); # = cp
1363 # Adfp= (Axplug - Acp);
1364 dfp = cp.copy()
1365 Adfp = Acp.copy()
1366 bp = b.copy();
1367 deltap = delta;
1368 # dfp = SLmu*xplug - SLmu1*wk;
1369 # bp = SLmu*b;
1370 # deltap = SLmu*delta;
1371
1372 # See if we even need to project:
1373 if numpy.linalg.norm( Adfp - bp ) < deltap:
1374 zk = dfp.copy();
1375 Azk = Adfp.copy();
1376 else:
1377 projection,projIter,lambdaZ = fastProjection(Q,S,V,dfp,bp,deltap, .999*lambdaZ )
1378 if lambdaZ > 0:
1379 print 'lambda is positive!'
1380 raise
1381 zk = projection.copy();
1382 # zk = SLmu1*projection;
1383 Azk = Afun(zk);
1384
1385 # DEBUGGING:
1386 # if projIter == 50
1387 # fprintf('\n Maxed out iterations at z\n');
1388 # keyboard
1389 # end
1390 #end
1391 else:
1392 lambdaa = max(0,Lmu*(numpy.linalg.norm(b-Acp)/delta - 1));
1393 gamma = lambdaa/(lambdaa + Lmu);
1394 zk = lambdaa/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp;
1395 # for calculating the residual, we'll avoid calling A()
1396 # by storing A(zk) here (using A'*A = I):
1397 Azk = lambdaa/Lmu*(1-gamma)*b + Acp - gamma*Acp;
1398 #end
1399 else:
1400 # if delta is 0, this is simplified:
1401 zk = AtAAtb + cp - AtAcp;
1402 Azk = b;
1403 #end
1404
1405 # DEBUGGING
1406 # if norm( Ayk - b ) > (1.05)*delta
1407 # fprintf('\nAzk failed projection test\n');
1408 # keyboard;
1409 # end
1410
1411 #--- Updating xk
1412
1413 xkp = tauk*zk + (1-tauk)*yk;
1414 xold = xk.copy();
1415 xk = xkp.copy();
1416 Axk = tauk*Azk + (1-tauk)*Ayk;
1417
1418 #if ~mod(k,10), Axk = Afun(xk); end # otherwise slowly lose precision
1419 if not numpy.mod(k,10):
1420 Axk = Afun(xk)
1421 # DEBUG
1422 # if norm(Axk - Afun(xk) ) > 1e-6, disp('error with Axk'); keyboard; end
1423
1424 #--- display progress if desired
1425 #if ~mod(k+1,Verbose )
1426 if not numpy.mod(k+1,Verbose):
1427 #printf('Iter: #3d ~ fmu: #.3e ~ Rel. Variation of fmu: #.2e ~ Residual: #.2e',k+1,fx,qp,residuals(k+1,1) );
1428 print 'Iter: ',k+1,' ~ fmu: ',fx,' ~ Rel. Variation of fmu: ',qp,' ~ Residual:',residuals[k+1,0]
1429 #--- if user has supplied a function to calculate the error,
1430 # apply it to the current iterate and dislay the output:
1431 #if DISPLAY_ERROR, printf(' ~ Error: #.2e',errFcn(xk)); end
1432 if DISPLAY_ERROR:
1433 print ' ~ Error:',errFcn(xk)
1434 #end
1435 if abs(fx)>1e20 or abs(residuals[k,0]) >1e20 or numpy.isnan(fx):
1436 #error('Nesta: possible divergence or NaN. Bad estimate of ||A''A||?');
1437 print 'Nesta: possible divergence or NaN. Bad estimate of ||A''A||?'
1438 raise
1439 #end
1440
1441 #end
1442
1443 niter = k+1;
1444
1445 #-- truncate output vectors
1446 residuals = residuals[:niter,:]
1447 #if RECORD_DATA, outputData = outputData(1:niter,:); end
1448 if RECORD_DATA:
1449 outputData = outputData[:niter,:]
1450
1451 return xk,niter,residuals,outputData,opts
1452
1453
1454 ############ PERFORM THE L1 CONSTRAINT ##################
1455
1456 #function [df,fx,val,uk] = Perform_L1_Constraint(xk,mu,U,Ut)
1457 def Perform_L1_Constraint(xk,mu,U,Ut):
1458
1459 #if isa(U,'function_handle')
1460 if hasattr(U,'__call__'):
1461 uk = U(xk);
1462 else:
1463 uk = numpy.dot(U,xk)
1464 #end
1465 fx = uk.copy()
1466
1467 #uk = uk./max(mu,abs(uk));
1468 uk = uk / max(mu,abs(uk))
1469 #val = real(uk'*fx);
1470 val = numpy.real(numpy.vdot(uk,fx))
1471 #fx = real(uk'*fx - mu/2*norm(uk)^2);
1472 fx = numpy.real(numpy.vdot(uk,fx) - mu/2.*numpy.linalg.norm(uk)**2);
1473
1474 #if isa(Ut,'function_handle')
1475 if hasattr(U,'__call__'):
1476 df = Ut(uk);
1477 else:
1478 #df = U'*uk;
1479 df = numpy.dot(U.T,uk)
1480 #end
1481 return df,fx,val,uk
1482 #end
1483
1484 # Nic: TODO: TV not implemented yet!
1485 ############ PERFORM THE TV CONSTRAINT ##################
1486 #function [df,fx] = Perform_TV_Constraint(xk,mu,Dv,Dh,D,U,Ut)
1487 # if isa(U,'function_handle')
1488 # x = U(xk);
1489 # else
1490 # x = U*xk;
1491 # end
1492 # df = zeros(size(x));
1493 #
1494 # Dhx = Dh*x;
1495 # Dvx = Dv*x;
1496 #
1497 # tvx = sum(sqrt(abs(Dhx).^2+abs(Dvx).^2));
1498 # w = max(mu,sqrt(abs(Dhx).^2 + abs(Dvx).^2));
1499 # uh = Dhx ./ w;
1500 # uv = Dvx ./ w;
1501 # u = [uh;uv];
1502 # fx = real(u'*D*x - mu/2 * 1/numel(u)*sum(u'*u));
1503 # if isa(Ut,'function_handle')
1504 # df = Ut(D'*u);
1505 # else
1506 # df = U'*(D'*u);
1507 # end
1508 #end
1509
1510
1511 #function [x,k,l] = fastProjection( U, S, V, y, b, epsilon, lambda0, DISP )
1512 def fastProjection( U, S, V, y, b, epsilon, lambda0=0, DISP=False ):
1513 # [x,niter,lambda] = fastProjection(U, S, V, y, b, epsilon, [lambda0], [DISP] )
1514 #
1515 # minimizes || x - y ||
1516 # such that || Ax - b || <= epsilon
1517 #
1518 # where USV' = A (i.e the SVD of A)
1519 #
1520 # The optional input "lambda0" is a guess for the Lagrange parameter
1521 #
1522 # Warning: for speed, does not calculate A(y) to see if x = y is feasible
1523 #
1524 # NESTA Version 1.1
1525 # See also Core_Nesterov
1526
1527 # Written by Stephen Becker, September 2009, srbecker@caltech.edu
1528
1529 #---------------------
1530 # Original Matab code:
1531
1532 #DEBUG = true;
1533 #if nargin < 8
1534 # DISP = false;
1535 #end
1536 ## -- Parameters for Newton's method --
1537 #MAXIT = 70;
1538 #TOL = 1e-8 * epsilon;
1539 ## TOL = max( TOL, 10*eps ); # we can't do better than machine precision
1540 #
1541 #m = size(U,1);
1542 #n = size(V,1);
1543 #mn = min([m,n]);
1544 #if numel(S) > mn^2, S = diag(diag(S)); end # S should be a small square matrix
1545 #r = size(S);
1546 #if size(U,2) > r, U = U(:,1:r); end
1547 #if size(V,2) > r, V = V(:,1:r); end
1548 #
1549 #s = diag(S);
1550 #s2 = s.^2;
1551 #
1552 ## What we want to do:
1553 ## b = b - A*y;
1554 ## bb = U'*b;
1555 #
1556 ## if A doesn't have full row rank, then b may not be in the range
1557 #if size(U,1) > size(U,2)
1558 # bRange = U*(U'*b);
1559 # bNull = b - bRange;
1560 # epsilon = sqrt( epsilon^2 - norm(bNull)^2 );
1561 #end
1562 #b = U'*b - S*(V'*y); # parenthesis is very important! This is expensive.
1563 #
1564 ## b2 = b.^2;
1565 #b2 = abs(b).^2; # for complex data
1566 #bs2 = b2.*s2;
1567 #epsilon2 = epsilon^2;
1568 #
1569 ## The following routine need to be fast
1570 ## For efficiency (at cost of transparency), we are writing the calculations
1571 ## in a way that minimize number of operations. The functions "f"
1572 ## and "fp" represent f and its derivative.
1573 #
1574 ## f = @(lambda) sum( b2 .*(1-lambda*s2).^(-2) ) - epsilon^2;
1575 ## fp = @(lambda) 2*sum( bs2 .*(1-lambda*s2).^(-3) );
1576 #if nargin < 7, lambda0 = 0; end
1577 #l = lambda0; oldff = 0;
1578 #one = ones(m,1);
1579 #alpha = 1; # take full Newton steps
1580 #for k = 1:MAXIT
1581 # # make f(l) and fp(l) as efficient as possible:
1582 # ls = one./(one-l*s2);
1583 # ls2 = ls.^2;
1584 # ls3 = ls2.*ls;
1585 # ff = b2.'*ls2; # should be .', not ', even for complex data
1586 # ff = ff - epsilon2;
1587 # fpl = 2*( bs2.'*ls3 ); # should be .', not ', even for complex data
1588 ## ff = f(l); # this is a little slower
1589 ## fpl = fp(l); # this is a little slower
1590 # d = -ff/fpl;
1591 # if DISP, fprintf('#2d, lambda is #5.2f, f(lambda) is #.2e, f''(lambda) is #.2e\n',...
1592 # k,l,ff,fpl ); end
1593 # if abs(ff) < TOL, break; end # stopping criteria
1594 # l_old = l;
1595 # if k>2 && ( abs(ff) > 10*abs(oldff+100) ) #|| abs(d) > 1e13 )
1596 # l = 0; alpha = 1/2;
1597 ## oldff = f(0);
1598 # oldff = sum(b2); oldff = oldff - epsilon2;
1599 # if DISP, disp('restarting'); end
1600 # else
1601 # if alpha < 1, alpha = (alpha+1)/2; end
1602 # l = l + alpha*d;
1603 # oldff = ff;
1604 # if l > 0
1605 # l = 0; # shouldn't be positive
1606 # oldff = sum(b2); oldff = oldff - epsilon2;
1607 # end
1608 # end
1609 # if l_old == l && l == 0
1610 # if DISP, disp('Making no progress; x = y is probably feasible'); end
1611 # break;
1612 # end
1613 #end
1614 ## if k == MAXIT && DEBUG, disp('maxed out iterations'); end
1615 #if l < 0
1616 # xhat = -l*s.*b./( 1 - l*s2 );
1617 # x = V*xhat + y;
1618 #else
1619 # # y is already feasible, so no need to project
1620 # l = 0;
1621 # x = y;
1622 #end
1623
1624 # End of original Matab code
1625 #---------------------
1626
1627 DEBUG = True;
1628 #if nargin < 8
1629 # DISP = false;
1630 #end
1631 # -- Parameters for Newton's method --
1632 MAXIT = 70;
1633 TOL = 1e-8 * epsilon;
1634 # TOL = max( TOL, 10*eps ); # we can't do better than machine precision
1635
1636 #m = size(U,1);
1637 #n = size(V,1);
1638 m = U.shape[0]
1639 n = V.shape[0]
1640 mn = min(m,n);
1641 #if numel(S) > mn^2, S = diag(diag(S)); end # S should be a small square matrix
1642 if S.size > mn**2:
1643 S = numpy.diag(numpy.diag(S))
1644 #r = size(S);
1645 r = S.shape
1646 #if size(U,2) > r, U = U(:,1:r); end
1647 if U.shape[1] > r:
1648 U = U[:,r]
1649 #if size(V,2) > r, V = V(:,1:r); end
1650 if V.shape[1] > r:
1651 V = V[:,r]
1652
1653 s = numpy.diag(S);
1654 s2 = s**2;
1655
1656 # What we want to do:
1657 # b = b - A*y;
1658 # bb = U'*b;
1659
1660 # if A doesn't have full row rank, then b may not be in the range
1661 #if size(U,1) > size(U,2)
1662 if U.shape[0] > U.shape[1]:
1663 #bRange = U*(U'*b);
1664 bRange = numpy.dot(U, numpy.dot(U.T, b))
1665 bNull = b - bRange;
1666 epsilon = math.sqrt( epsilon**2 - numpy.linalg.norm(bNull)**2 );
1667 #end
1668 #b = U'*b - S*(V'*y); # parenthesis is very important! This is expensive.
1669 b = numpy.dot(U.T,b) - numpy.dot(S, numpy.dot(V.T,y))
1670
1671 # b2 = b.^2;
1672 b2 = abs(b)**2; # for complex data
1673 bs2 = b2**s2;
1674 epsilon2 = epsilon**2;
1675
1676 # The following routine need to be fast
1677 # For efficiency (at cost of transparency), we are writing the calculations
1678 # in a way that minimize number of operations. The functions "f"
1679 # and "fp" represent f and its derivative.
1680
1681 # f = @(lambda) sum( b2 .*(1-lambda*s2).^(-2) ) - epsilon^2;
1682 # fp = @(lambda) 2*sum( bs2 .*(1-lambda*s2).^(-3) );
1683
1684 #if nargin < 7, lambda0 = 0; end
1685 l = lambda0;
1686 oldff = 0;
1687 one = numpy.ones(m);
1688 alpha = 1; # take full Newton steps
1689 #for k = 1:MAXIT
1690 for k in numpy.arange(MAXIT):
1691 # make f(l) and fp(l) as efficient as possible:
1692 #ls = one./(one-l*s2);
1693 ls = one/(one-l*s2)
1694 ls2 = ls**2;
1695 ls3 = ls2**ls;
1696 #ff = b2.'*ls2; # should be .', not ', even for complex data
1697 ff = numpy.dot(b2.conj(), ls2)
1698 ff = ff - epsilon2;
1699 #fpl = 2*( bs2.'*ls3 ); # should be .', not ', even for complex data
1700 fpl = 2 * numpy.dot(bs2.conj(),ls3)
1701 # ff = f(l); # this is a little slower
1702 # fpl = fp(l); # this is a little slower
1703 d = -ff/fpl;
1704 # if DISP, fprintf('#2d, lambda is #5.2f, f(lambda) is #.2e, f''(lambda) is #.2e\n',k,l,ff,fpl ); end
1705 if DISP:
1706 print k,', lambda is ',l,', f(lambda) is ',ff,', f''(lambda) is',fpl
1707 #if abs(ff) < TOL, break; end # stopping criteria
1708 if abs(ff) < TOL:
1709 break
1710 l_old = l.copy();
1711 if k>2 and ( abs(ff) > 10*abs(oldff+100) ): #|| abs(d) > 1e13 )
1712 l = 0;
1713 alpha = 1.0/2.0;
1714 # oldff = f(0);
1715 oldff = b2.sum(); oldff = oldff - epsilon2;
1716 if DISP:
1717 print 'restarting'
1718 else:
1719 if alpha < 1:
1720 alpha = (alpha+1.0)/2.0
1721 l = l + alpha*d;
1722 oldff = ff
1723 if l > 0:
1724 l = 0; # shouldn't be positive
1725 oldff = b2.sum()
1726 oldff = oldff - epsilon2;
1727 #end
1728 #end
1729 if l_old == l and l == 0:
1730 #if DISP, disp('Making no progress; x = y is probably feasible'); end
1731 if DISP:
1732 print 'Making no progress; x = y is probably feasible'
1733 break;
1734 #end
1735 #end
1736 # if k == MAXIT && DEBUG, disp('maxed out iterations'); end
1737 if l < 0:
1738 #xhat = -l*s.*b./( 1 - l*s2 );
1739 xhat = numpy.dot(-l, s*b/( 1. - numpy.dot(l,s2) ) )
1740 #x = V*xhat + y;
1741 x = numpy.dot(V,xhat) + y
1742 else:
1743 # y is already feasible, so no need to project
1744 l = 0;
1745 x = y.copy();
1746 #end
1747 return x,k,l