Mercurial > hg > pycsalgos
comparison pyCSalgos/NESTA/NESTA.py @ 46:88f0ebe1667a
Finished NESTA implementation. Not tested yet
author | nikcleju |
---|---|
date | Tue, 29 Nov 2011 22:06:20 +0000 |
parents | 7524d7749456 |
children | e6a5f2173015 |
comparison
equal
deleted
inserted
replaced
45:7524d7749456 | 46:88f0ebe1667a |
---|---|
325 if S.ndim is 1: | 325 if S.ndim is 1: |
326 s = S | 326 s = S |
327 else: | 327 else: |
328 s = numpy.diag(S) | 328 s = numpy.diag(S) |
329 | 329 |
330 #V = USV.V; | 330 V = USV['V']; |
331 #else | 331 #else |
332 # error('opts.USV must be a structure'); | 332 # error('opts.USV must be a structure'); |
333 #end | 333 #end |
334 #end | 334 #end |
335 | 335 |
635 x = St(Sx); | 635 x = St(Sx); |
636 x = x/numpy.linalg.norm(x); | 636 x = x/numpy.linalg.norm(x); |
637 cnt = cnt+1; | 637 cnt = cnt+1; |
638 #end | 638 #end |
639 #end | 639 #end |
640 | |
641 | |
642 | |
643 #function [xk,niter,residuals,outputData,opts] = Core_Nesterov(A,At,b,mu,delta,opts) | |
644 def Core_Nesterov(A,At,b,mu,delta,opts): | |
645 # [xk,niter,residuals,outputData,opts] =Core_Nesterov(A,At,b,mu,delta,opts) | |
646 # | |
647 # Solves a L1 minimization problem under a quadratic constraint using the | |
648 # Nesterov algorithm, without continuation: | |
649 # | |
650 # min_x || U x ||_1 s.t. ||y - Ax||_2 <= delta | |
651 # | |
652 # If continuation is desired, see the function NESTA.m | |
653 # | |
654 # The primal prox-function is also adapted by accounting for a first guess | |
655 # xplug that also tends towards x_muf | |
656 # | |
657 # The observation matrix A is a projector | |
658 # | |
659 # Inputs: A and At - measurement matrix and adjoint (either a matrix, in which | |
660 # case At is unused, or function handles). m x n dimensions. | |
661 # b - Observed data, a m x 1 array | |
662 # muf - The desired value of mu at the last continuation step. | |
663 # A smaller mu leads to higher accuracy. | |
664 # delta - l2 error bound. This enforces how close the variable | |
665 # must fit the observations b, i.e. || y - Ax ||_2 <= delta | |
666 # If delta = 0, enforces y = Ax | |
667 # Common heuristic: delta = sqrt(m + 2*sqrt(2*m))*sigma; | |
668 # where sigma=std(noise). | |
669 # opts - | |
670 # This is a structure that contains additional options, | |
671 # some of which are optional. | |
672 # The fieldnames are case insensitive. Below | |
673 # are the possible fieldnames: | |
674 # | |
675 # opts.xplug - the first guess for the primal prox-function, and | |
676 # also the initial point for xk. By default, xplug = At(b) | |
677 # opts.U and opts.Ut - Analysis/Synthesis operators | |
678 # (either matrices of function handles). | |
679 # opts.normU - if opts.U is provided, this should be norm(U) | |
680 # opts.maxiter - max number of iterations in an inner loop. | |
681 # default is 10,000 | |
682 # opts.TolVar - tolerance for the stopping criteria | |
683 # opts.stopTest - which stopping criteria to apply | |
684 # opts.stopTest == 1 : stop when the relative | |
685 # change in the objective function is less than | |
686 # TolVar | |
687 # opts.stopTest == 2 : stop with the l_infinity norm | |
688 # of difference in the xk variable is less | |
689 # than TolVar | |
690 # opts.TypeMin - if this is 'L1' (default), then | |
691 # minimizes a smoothed version of the l_1 norm. | |
692 # If this is 'tv', then minimizes a smoothed | |
693 # version of the total-variation norm. | |
694 # The string is case insensitive. | |
695 # opts.Verbose - if this is 0 or false, then very | |
696 # little output is displayed. If this is 1 or true, | |
697 # then output every iteration is displayed. | |
698 # If this is a number p greater than 1, then | |
699 # output is displayed every pth iteration. | |
700 # opts.fid - if this is 1 (default), the display is | |
701 # the usual Matlab screen. If this is the file-id | |
702 # of a file opened with fopen, then the display | |
703 # will be redirected to this file. | |
704 # opts.errFcn - if this is a function handle, | |
705 # then the program will evaluate opts.errFcn(xk) | |
706 # at every iteration and display the result. | |
707 # ex. opts.errFcn = @(x) norm( x - x_true ) | |
708 # opts.outFcn - if this is a function handle, | |
709 # then then program will evaluate opts.outFcn(xk) | |
710 # at every iteration and save the results in outputData. | |
711 # If the result is a vector (as opposed to a scalar), | |
712 # it should be a row vector and not a column vector. | |
713 # ex. opts.outFcn = @(x) [norm( x - xtrue, 'inf' ),... | |
714 # norm( x - xtrue) / norm(xtrue)] | |
715 # opts.AAtinv - this is an experimental new option. AAtinv | |
716 # is the inverse of AA^*. This allows the use of a | |
717 # matrix A which is not a projection, but only | |
718 # for the noiseless (i.e. delta = 0) case. | |
719 # If the SVD of A is U*S*V', then AAtinv = U*(S^{-2})*U'. | |
720 # opts.USV - another experimental option. This supercedes | |
721 # the AAtinv option, so it is recommended that you | |
722 # do not define AAtinv. This allows the use of a matrix | |
723 # A which is not a projection, and works for the | |
724 # noisy ( i.e. delta > 0 ) case. | |
725 # opts.USV should contain three fields: | |
726 # opts.USV.U is the U from [U,S,V] = svd(A) | |
727 # likewise, opts.USV.S and opts.USV.V are S and V | |
728 # from svd(A). S may be a matrix or a vector. | |
729 # Outputs: | |
730 # xk - estimate of the solution x | |
731 # niter - number of iterations | |
732 # residuals - first column is the residual at every step, | |
733 # second column is the value of f_mu at every step | |
734 # outputData - a matrix, where each row r is the output | |
735 # from opts.outFcn, if supplied. | |
736 # opts - the structure containing the options that were used | |
737 # | |
738 # Written by: Jerome Bobin, Caltech | |
739 # Email: bobin@acm.caltech.edu | |
740 # Created: February 2009 | |
741 # Modified: May 2009, Jerome Bobin and Stephen Becker, Caltech | |
742 # Modified: Nov 2009, Stephen Becker | |
743 # | |
744 # NESTA Version 1.1 | |
745 # See also NESTA | |
746 | |
747 #---- Set defaults | |
748 # opts = []; | |
749 | |
750 #--------------------- | |
751 # Original Matab code: | |
752 | |
753 #fid = setOpts('fid',1); | |
754 #function printf(varargin), fprintf(fid,varargin{:}); end | |
755 #maxiter = setOpts('maxiter',10000,0); | |
756 #TolVar = setOpts('TolVar',1e-5); | |
757 #TypeMin = setOpts('TypeMin','L1'); | |
758 #Verbose = setOpts('Verbose',true); | |
759 #errFcn = setOpts('errFcn',[]); | |
760 #outFcn = setOpts('outFcn',[]); | |
761 #stopTest = setOpts('stopTest',1,1,2); | |
762 #U = setOpts('U', @(x) x ); | |
763 #if ~isa(U,'function_handle') | |
764 # Ut = setOpts('Ut',[]); | |
765 #else | |
766 # Ut = setOpts('Ut', @(x) x ); | |
767 #end | |
768 #xplug = setOpts('xplug',[]); | |
769 #normU = setOpts('normU',1); | |
770 # | |
771 #if delta < 0, error('delta must be greater or equal to zero'); end | |
772 # | |
773 #if isa(A,'function_handle') | |
774 # Atfun = At; | |
775 # Afun = A; | |
776 #else | |
777 # Atfun = @(x) A'*x; | |
778 # Afun = @(x) A*x; | |
779 #end | |
780 #Atb = Atfun(b); | |
781 # | |
782 #AAtinv = setOpts('AAtinv',[]); | |
783 #USV = setOpts('USV',[]); | |
784 #if ~isempty(USV) | |
785 # if isstruct(USV) | |
786 # Q = USV.U; # we can't use "U" as the variable name | |
787 # # since "U" already refers to the analysis operator | |
788 # S = USV.S; | |
789 # if isvector(S), s = S; S = diag(s); | |
790 # else s = diag(S); end | |
791 # V = USV.V; | |
792 # else | |
793 # error('opts.USV must be a structure'); | |
794 # end | |
795 # if isempty(AAtinv) | |
796 # AAtinv = Q*diag( s.^(-2) )*Q'; | |
797 # end | |
798 #end | |
799 ## --- for A not a projection (experimental) | |
800 #if ~isempty(AAtinv) | |
801 # if isa(AAtinv,'function_handle') | |
802 # AAtinv_fun = AAtinv; | |
803 # else | |
804 # AAtinv_fun = @(x) AAtinv * x; | |
805 # end | |
806 # | |
807 # AtAAtb = Atfun( AAtinv_fun(b) ); | |
808 # | |
809 #else | |
810 # # We assume it's a projection | |
811 # AtAAtb = Atb; | |
812 # AAtinv_fun = @(x) x; | |
813 #end | |
814 # | |
815 #if isempty(xplug) | |
816 # xplug = AtAAtb; | |
817 #end | |
818 # | |
819 ##---- Initialization | |
820 #N = length(xplug); | |
821 #wk = zeros(N,1); | |
822 #xk = xplug; | |
823 # | |
824 # | |
825 ##---- Init Variables | |
826 #Ak= 0; | |
827 #Lmu = normU/mu; | |
828 #yk = xk; | |
829 #zk = xk; | |
830 #fmean = realmin/10; | |
831 #OK = 0; | |
832 #n = floor(sqrt(N)); | |
833 # | |
834 ##---- Computing Atb | |
835 #Atb = Atfun(b); | |
836 #Axk = Afun(xk);# only needed if you want to see the residuals | |
837 ## Axplug = Axk; | |
838 # | |
839 # | |
840 ##---- TV Minimization | |
841 #if strcmpi(TypeMin,'TV') | |
842 # Lmu = 8*Lmu; | |
843 # Dv = spdiags([reshape([-ones(n-1,n); zeros(1,n)],N,1) ... | |
844 # reshape([zeros(1,n); ones(n-1,n)],N,1)], [0 1], N, N); | |
845 # Dh = spdiags([reshape([-ones(n,n-1) zeros(n,1)],N,1) ... | |
846 # reshape([zeros(n,1) ones(n,n-1)],N,1)], [0 n], N, N); | |
847 # D = sparse([Dh;Dv]); | |
848 #end | |
849 # | |
850 # | |
851 #Lmu1 = 1/Lmu; | |
852 ## SLmu = sqrt(Lmu); | |
853 ## SLmu1 = 1/sqrt(Lmu); | |
854 #lambdaY = 0; | |
855 #lambdaZ = 0; | |
856 # | |
857 ##---- setup data storage variables | |
858 #[DISPLAY_ERROR, RECORD_DATA] = deal(false); | |
859 #outputData = deal([]); | |
860 #residuals = zeros(maxiter,2); | |
861 #if ~isempty(errFcn), DISPLAY_ERROR = true; end | |
862 #if ~isempty(outFcn) && nargout >= 4 | |
863 # RECORD_DATA = true; | |
864 # outputData = zeros(maxiter, size(outFcn(xplug),2) ); | |
865 #end | |
866 # | |
867 #for k = 0:maxiter-1, | |
868 # | |
869 # #---- Dual problem | |
870 # | |
871 # if strcmpi(TypeMin,'L1') [df,fx,val,uk] = Perform_L1_Constraint(xk,mu,U,Ut);end | |
872 # | |
873 # if strcmpi(TypeMin,'TV') [df,fx] = Perform_TV_Constraint(xk,mu,Dv,Dh,D,U,Ut);end | |
874 # | |
875 # #---- Primal Problem | |
876 # | |
877 # #---- Updating yk | |
878 # | |
879 # # | |
880 # # yk = Argmin_x Lmu/2 ||x - xk||_l2^2 + <df,x-xk> s.t. ||b-Ax||_l2 < delta | |
881 # # Let xp be sqrt(Lmu) (x-xk), dfp be df/sqrt(Lmu), bp be sqrt(Lmu)(b- Axk) and deltap be sqrt(Lmu)delta | |
882 # # yk = xk + 1/sqrt(Lmu) Argmin_xp 1/2 || xp ||_2^2 + <dfp,xp> s.t. || bp - Axp ||_2 < deltap | |
883 # # | |
884 # | |
885 # | |
886 # cp = xk - 1/Lmu*df; # this is "q" in eq. (3.7) in the paper | |
887 # | |
888 # Acp = Afun( cp ); | |
889 # if ~isempty(AAtinv) && isempty(USV) | |
890 # AtAcp = Atfun( AAtinv_fun( Acp ) ); | |
891 # else | |
892 # AtAcp = Atfun( Acp ); | |
893 # end | |
894 # | |
895 # residuals(k+1,1) = norm( b-Axk); # the residual | |
896 # residuals(k+1,2) = fx; # the value of the objective | |
897 # #--- if user has supplied a function, apply it to the iterate | |
898 # if RECORD_DATA | |
899 # outputData(k+1,:) = outFcn(xk); | |
900 # end | |
901 # | |
902 # if delta > 0 | |
903 # if ~isempty(USV) | |
904 # # there are more efficient methods, but we're assuming | |
905 # # that A is negligible compared to U and Ut. | |
906 # # Here we make the change of variables x <-- x - xk | |
907 # # and df <-- df/L | |
908 # dfp = -Lmu1*df; Adfp = -(Axk - Acp); | |
909 # bp = b - Axk; | |
910 # deltap = delta; | |
911 # # Check if we even need to project: | |
912 # if norm( Adfp - bp ) < deltap | |
913 # lambdaY = 0; projIter = 0; | |
914 # # i.e. projection = dfp; | |
915 # yk = xk + dfp; | |
916 # Ayk = Axk + Adfp; | |
917 # else | |
918 # lambdaY_old = lambdaY; | |
919 # [projection,projIter,lambdaY] = fastProjection(Q,S,V,dfp,bp,... | |
920 # deltap, .999*lambdaY_old ); | |
921 # if lambdaY > 0, disp('lambda is positive!'); keyboard; end | |
922 # yk = xk + projection; | |
923 # Ayk = Afun(yk); | |
924 # # DEBUGGING | |
925 ## if projIter == 50 | |
926 ## fprintf('\n Maxed out iterations at y\n'); | |
927 ## keyboard | |
928 ## end | |
929 # end | |
930 # else | |
931 # lambda = max(0,Lmu*(norm(b-Acp)/delta - 1));gamma = lambda/(lambda + Lmu); | |
932 # yk = lambda/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp; | |
933 # # for calculating the residual, we'll avoid calling A() | |
934 # # by storing A(yk) here (using A'*A = I): | |
935 # Ayk = lambda/Lmu*(1-gamma)*b + Acp - gamma*Acp; | |
936 # end | |
937 # else | |
938 # # if delta is 0, the projection is simplified: | |
939 # yk = AtAAtb + cp - AtAcp; | |
940 # Ayk = b; | |
941 # end | |
942 # | |
943 # # DEBUGGING | |
944 ## if norm( Ayk - b ) > (1.05)*delta | |
945 ## fprintf('\nAyk failed projection test\n'); | |
946 ## keyboard; | |
947 ## end | |
948 # | |
949 # #--- Stopping criterion | |
950 # qp = abs(fx - mean(fmean))/mean(fmean); | |
951 # | |
952 # switch stopTest | |
953 # case 1 | |
954 # # look at the relative change in function value | |
955 # if qp <= TolVar && OK; break;end | |
956 # if qp <= TolVar && ~OK; OK=1; end | |
957 # case 2 | |
958 # # look at the l_inf change from previous iterate | |
959 # if k >= 1 && norm( xk - xold, 'inf' ) <= TolVar | |
960 # break | |
961 # end | |
962 # end | |
963 # fmean = [fx,fmean]; | |
964 # if (length(fmean) > 10) fmean = fmean(1:10);end | |
965 # | |
966 # | |
967 # | |
968 # #--- Updating zk | |
969 # | |
970 # apk =0.5*(k+1); | |
971 # Ak = Ak + apk; | |
972 # tauk = 2/(k+3); | |
973 # | |
974 # wk = apk*df + wk; | |
975 # | |
976 # # | |
977 # # zk = Argmin_x Lmu/2 ||b - Ax||_l2^2 + Lmu/2||x - xplug ||_2^2+ <wk,x-xk> | |
978 # # s.t. ||b-Ax||_l2 < delta | |
979 # # | |
980 # | |
981 # cp = xplug - 1/Lmu*wk; | |
982 # | |
983 # Acp = Afun( cp ); | |
984 # if ~isempty( AAtinv ) && isempty(USV) | |
985 # AtAcp = Atfun( AAtinv_fun( Acp ) ); | |
986 # else | |
987 # AtAcp = Atfun( Acp ); | |
988 # end | |
989 # | |
990 # if delta > 0 | |
991 # if ~isempty(USV) | |
992 # # Make the substitution wk <-- wk/K | |
993 # | |
994 ## dfp = (xplug - Lmu1*wk); # = cp | |
995 ## Adfp= (Axplug - Acp); | |
996 # dfp = cp; Adfp = Acp; | |
997 # bp = b; | |
998 # deltap = delta; | |
999 ## dfp = SLmu*xplug - SLmu1*wk; | |
1000 ## bp = SLmu*b; | |
1001 ## deltap = SLmu*delta; | |
1002 # | |
1003 # # See if we even need to project: | |
1004 # if norm( Adfp - bp ) < deltap | |
1005 # zk = dfp; | |
1006 # Azk = Adfp; | |
1007 # else | |
1008 # [projection,projIter,lambdaZ] = fastProjection(Q,S,V,dfp,bp,... | |
1009 # deltap, .999*lambdaZ ); | |
1010 # if lambdaZ > 0, disp('lambda is positive!'); keyboard; end | |
1011 # zk = projection; | |
1012 # # zk = SLmu1*projection; | |
1013 # Azk = Afun(zk); | |
1014 # | |
1015 # # DEBUGGING: | |
1016 ## if projIter == 50 | |
1017 ## fprintf('\n Maxed out iterations at z\n'); | |
1018 ## keyboard | |
1019 ## end | |
1020 # end | |
1021 # else | |
1022 # lambda = max(0,Lmu*(norm(b-Acp)/delta - 1));gamma = lambda/(lambda + Lmu); | |
1023 # zk = lambda/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp; | |
1024 # # for calculating the residual, we'll avoid calling A() | |
1025 # # by storing A(zk) here (using A'*A = I): | |
1026 # Azk = lambda/Lmu*(1-gamma)*b + Acp - gamma*Acp; | |
1027 # end | |
1028 # else | |
1029 # # if delta is 0, this is simplified: | |
1030 # zk = AtAAtb + cp - AtAcp; | |
1031 # Azk = b; | |
1032 # end | |
1033 # | |
1034 # # DEBUGGING | |
1035 ## if norm( Ayk - b ) > (1.05)*delta | |
1036 ## fprintf('\nAzk failed projection test\n'); | |
1037 ## keyboard; | |
1038 ## end | |
1039 # | |
1040 # #--- Updating xk | |
1041 # | |
1042 # xkp = tauk*zk + (1-tauk)*yk; | |
1043 # xold = xk; | |
1044 # xk=xkp; | |
1045 # Axk = tauk*Azk + (1-tauk)*Ayk; | |
1046 # | |
1047 # if ~mod(k,10), Axk = Afun(xk); end # otherwise slowly lose precision | |
1048 # # DEBUG | |
1049 ## if norm(Axk - Afun(xk) ) > 1e-6, disp('error with Axk'); keyboard; end | |
1050 # | |
1051 # #--- display progress if desired | |
1052 # if ~mod(k+1,Verbose ) | |
1053 # printf('Iter: #3d ~ fmu: #.3e ~ Rel. Variation of fmu: #.2e ~ Residual: #.2e',... | |
1054 # k+1,fx,qp,residuals(k+1,1) ); | |
1055 # #--- if user has supplied a function to calculate the error, | |
1056 # # apply it to the current iterate and dislay the output: | |
1057 # if DISPLAY_ERROR, printf(' ~ Error: #.2e',errFcn(xk)); end | |
1058 # printf('\n'); | |
1059 # end | |
1060 # if abs(fx)>1e20 || abs(residuals(k+1,1)) >1e20 || isnan(fx) | |
1061 # error('Nesta: possible divergence or NaN. Bad estimate of ||A''A||?'); | |
1062 # end | |
1063 # | |
1064 #end | |
1065 # | |
1066 #niter = k+1; | |
1067 # | |
1068 ##-- truncate output vectors | |
1069 #residuals = residuals(1:niter,:); | |
1070 #if RECORD_DATA, outputData = outputData(1:niter,:); end | |
1071 | |
1072 # End of original Matab code | |
1073 #--------------------- | |
1074 | |
1075 #fid = setOpts('fid',1); | |
1076 #function printf(varargin), fprintf(fid,varargin{:}); end | |
1077 opts,maxiter,userSet = setOpts(opts,'maxiter',10000,0); | |
1078 opts,TolVar,userSet = setOpts(opts,'TolVar',1e-5); | |
1079 opts,TypeMin,userSet = setOpts(opts,'TypeMin','L1'); | |
1080 opts,Verbose,userSet = setOpts(opts,'Verbose',True); | |
1081 opts,errFcn,userSet = setOpts(opts,'errFcn',None); | |
1082 opts,outFcn,userSet = setOpts(opts,'outFcn',None); | |
1083 opts,stopTest,userSet = setOpts(opts,'stopTest',1,1,2); | |
1084 opts,U,userSet = setOpts(opts,'U',lambda x: x ); | |
1085 #if ~isa(U,'function_handle') | |
1086 if hasattr(U,'__call__'): | |
1087 opts,Ut,userSet = setOpts(opts,'Ut',None); | |
1088 else: | |
1089 opts,Ut,userSet = setOpts(opts,'Ut', lambda x: x ); | |
1090 #end | |
1091 opts,xplug,userSet = setOpts(opts,'xplug',None); | |
1092 opts,normU,userSet = setOpts(opts,'normU',1); | |
1093 | |
1094 if delta < 0: | |
1095 print 'delta must be greater or equal to zero' | |
1096 raise | |
1097 | |
1098 if hasattr(A,'__call__'): | |
1099 Atfun = At; | |
1100 Afun = A; | |
1101 else: | |
1102 Atfun = lambda x: numpy.dot(A.T,x) | |
1103 Afun = lambda x: numpy.dot(A,x) | |
1104 #end | |
1105 Atb = Atfun(b); | |
1106 | |
1107 opts,AAtinv,userSet = setOpts(opts,'AAtinv',None); | |
1108 opts,USV,userSet = setOpts(opts,'USV',None); | |
1109 if USV is not None: | |
1110 #if isstruct(USV) | |
1111 Q = USV['U']; # we can't use "U" as the variable name | |
1112 # since "U" already refers to the analysis operator | |
1113 S = USV['S']; | |
1114 #if isvector(S), s = S; S = diag(s); | |
1115 #else s = diag(S); end | |
1116 if S.ndim is 1: | |
1117 s = S | |
1118 else: | |
1119 s = numpy.diag(S) | |
1120 | |
1121 V = USV['V']; | |
1122 #else | |
1123 # error('opts.USV must be a structure'); | |
1124 #end | |
1125 #if isempty(AAtinv) | |
1126 if AAtinv is None: | |
1127 #AAtinv = Q*diag( s.^(-2) )*Q'; | |
1128 AAtinv = numpy.dot(Q, numpy.dot(numpy.diag(s ** -2), Q.T)) | |
1129 #end | |
1130 #end | |
1131 # --- for A not a projection (experimental) | |
1132 #if ~isempty(AAtinv) | |
1133 if AAtinv is not None: | |
1134 #if isa(AAtinv,'function_handle') | |
1135 if hasattr(AAtinv, '__call__'): | |
1136 AAtinv_fun = AAtinv; | |
1137 else: | |
1138 AAtinv_fun = lambda x: numpy.dot(AAtinv,x) | |
1139 #end | |
1140 | |
1141 AtAAtb = Atfun( AAtinv_fun(b) ); | |
1142 | |
1143 else: | |
1144 # We assume it's a projection | |
1145 AtAAtb = Atb; | |
1146 AAtinv_fun = lambda x: x; | |
1147 #end | |
1148 | |
1149 if xplug == None: | |
1150 xplug = AtAAtb.copy(); | |
1151 #end | |
1152 | |
1153 #---- Initialization | |
1154 #N = length(xplug); | |
1155 N = len(xplug) | |
1156 #wk = zeros(N,1); | |
1157 wk = numpy.zeros(N) | |
1158 xk = xplug.copy() | |
1159 | |
1160 | |
1161 #---- Init Variables | |
1162 Ak = 0.0; | |
1163 Lmu = normU/mu; | |
1164 yk = xk.copy(); | |
1165 zk = xk.copy(); | |
1166 fmean = numpy.finfo(float).tiny/10.0; | |
1167 OK = 0; | |
1168 n = math.floor(math.sqrt(N)); | |
1169 | |
1170 #---- Computing Atb | |
1171 Atb = Atfun(b); | |
1172 Axk = Afun(xk);# only needed if you want to see the residuals | |
1173 # Axplug = Axk; | |
1174 | |
1175 | |
1176 #---- TV Minimization | |
1177 if TypeMin == 'TV': | |
1178 print 'Nic:TODO: TV minimization not yet implemented!' | |
1179 raise | |
1180 #if strcmpi(TypeMin,'TV') | |
1181 # Lmu = 8*Lmu; | |
1182 # Dv = spdiags([reshape([-ones(n-1,n); zeros(1,n)],N,1) ... | |
1183 # reshape([zeros(1,n); ones(n-1,n)],N,1)], [0 1], N, N); | |
1184 # Dh = spdiags([reshape([-ones(n,n-1) zeros(n,1)],N,1) ... | |
1185 # reshape([zeros(n,1) ones(n,n-1)],N,1)], [0 n], N, N); | |
1186 # D = sparse([Dh;Dv]); | |
1187 #end | |
1188 | |
1189 | |
1190 Lmu1 = 1.0/Lmu; | |
1191 # SLmu = sqrt(Lmu); | |
1192 # SLmu1 = 1/sqrt(Lmu); | |
1193 lambdaY = 0.; | |
1194 lambdaZ = 0.; | |
1195 | |
1196 #---- setup data storage variables | |
1197 #[DISPLAY_ERROR, RECORD_DATA] = deal(false); | |
1198 DISPLAY_ERROR = False | |
1199 RECORD_DATA = False | |
1200 #outputData = deal([]); | |
1201 outputData = None | |
1202 residuals = numpy.zeros((maxiter,2)) | |
1203 #if ~isempty(errFcn), DISPLAY_ERROR = true; end | |
1204 if errFcn is not None: | |
1205 DISPLAY_ERROR = True | |
1206 #if ~isempty(outFcn) && nargout >= 4 | |
1207 if outFcn is not None: # Output max number of arguments | |
1208 RECORD_DATA = True | |
1209 outputData = numpy.zeros(maxiter, outFcn(xplug).shape[1]); | |
1210 #end | |
1211 | |
1212 #for k = 0:maxiter-1, | |
1213 for k in numpy.arange(maxiter): | |
1214 | |
1215 #---- Dual problem | |
1216 | |
1217 #if strcmpi(TypeMin,'L1') [df,fx,val,uk] = Perform_L1_Constraint(xk,mu,U,Ut);end | |
1218 if TypeMin == 'L1': | |
1219 df,fx,val,uk = Perform_L1_Constraint(xk,mu,U,Ut) | |
1220 | |
1221 # Nic: TODO: TV not implemented yet ! | |
1222 #if strcmpi(TypeMin,'TV') [df,fx] = Perform_TV_Constraint(xk,mu,Dv,Dh,D,U,Ut);end | |
1223 | |
1224 #---- Primal Problem | |
1225 | |
1226 #---- Updating yk | |
1227 | |
1228 # | |
1229 # yk = Argmin_x Lmu/2 ||x - xk||_l2^2 + <df,x-xk> s.t. ||b-Ax||_l2 < delta | |
1230 # Let xp be sqrt(Lmu) (x-xk), dfp be df/sqrt(Lmu), bp be sqrt(Lmu)(b- Axk) and deltap be sqrt(Lmu)delta | |
1231 # yk = xk + 1/sqrt(Lmu) Argmin_xp 1/2 || xp ||_2^2 + <dfp,xp> s.t. || bp - Axp ||_2 < deltap | |
1232 # | |
1233 | |
1234 | |
1235 cp = xk - 1./Lmu*df; # this is "q" in eq. (3.7) in the paper | |
1236 | |
1237 Acp = Afun( cp ); | |
1238 #if ~isempty(AAtinv) && isempty(USV) | |
1239 if AAtinv is not None and USV is None: | |
1240 AtAcp = Atfun( AAtinv_fun( Acp ) ); | |
1241 else: | |
1242 AtAcp = Atfun( Acp ); | |
1243 #end | |
1244 | |
1245 #residuals(k+1,1) = norm( b-Axk); # the residual | |
1246 residuals[k,0] = numpy.linalg.norm(b-Axk) | |
1247 #residuals(k+1,2) = fx; # the value of the objective | |
1248 residuals[k,1] = fx | |
1249 #--- if user has supplied a function, apply it to the iterate | |
1250 if RECORD_DATA: | |
1251 outputData[k+1,:] = outFcn(xk); | |
1252 #end | |
1253 | |
1254 if delta > 0: | |
1255 #if ~isempty(USV) | |
1256 if USV is not None: | |
1257 # there are more efficient methods, but we're assuming | |
1258 # that A is negligible compared to U and Ut. | |
1259 # Here we make the change of variables x <-- x - xk | |
1260 # and df <-- df/L | |
1261 dfp = -Lmu1*df; | |
1262 Adfp = -(Axk - Acp); | |
1263 bp = b - Axk; | |
1264 deltap = delta; | |
1265 # Check if we even need to project: | |
1266 if numpy.linalg.norm( Adfp - bp ) < deltap: | |
1267 lambdaY = 0. | |
1268 projIter = 0; | |
1269 # i.e. projection = dfp; | |
1270 yk = xk + dfp; | |
1271 Ayk = Axk + Adfp; | |
1272 else: | |
1273 lambdaY_old = lambdaY.copy(); | |
1274 #[projection,projIter,lambdaY] = fastProjection(Q,S,V,dfp,bp,deltap, .999*lambdaY_old ); | |
1275 projection,projIter,lambdaY = fastProjection(Q,S,V,dfp,bp,deltap, .999*lambdaY_old ) | |
1276 #if lambdaY > 0, disp('lambda is positive!'); keyboard; end | |
1277 if lambdaY > 0: | |
1278 print 'lambda is positive!' | |
1279 raise | |
1280 yk = xk + projection; | |
1281 Ayk = Afun(yk); | |
1282 # DEBUGGING | |
1283 # if projIter == 50 | |
1284 # fprintf('\n Maxed out iterations at y\n'); | |
1285 # keyboard | |
1286 # end | |
1287 #end | |
1288 else: | |
1289 lambdaa = max(0,Lmu*(numpy.linalg.norm(b-Acp)/delta - 1)) | |
1290 gamma = lambdaa/(lambdaa + Lmu); | |
1291 yk = lambdaa/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp; | |
1292 # for calculating the residual, we'll avoid calling A() | |
1293 # by storing A(yk) here (using A'*A = I): | |
1294 Ayk = lambdaa/Lmu*(1-gamma)*b + Acp - gamma*Acp; | |
1295 #end | |
1296 else: | |
1297 # if delta is 0, the projection is simplified: | |
1298 yk = AtAAtb + cp - AtAcp; | |
1299 Ayk = b.copy(); | |
1300 #end | |
1301 | |
1302 # DEBUGGING | |
1303 # if norm( Ayk - b ) > (1.05)*delta | |
1304 # fprintf('\nAyk failed projection test\n'); | |
1305 # keyboard; | |
1306 # end | |
1307 | |
1308 #--- Stopping criterion | |
1309 qp = abs(fx - numpy.mean(fmean))/numpy.mean(fmean); | |
1310 | |
1311 #switch stopTest | |
1312 # case 1 | |
1313 if stopTest == 1: | |
1314 # look at the relative change in function value | |
1315 #if qp <= TolVar && OK; break;end | |
1316 if qp <= TolVar and OK: | |
1317 break | |
1318 #if qp <= TolVar && ~OK; OK=1; end | |
1319 if qp <= TolVar and not OK: | |
1320 OK = 1 | |
1321 elif stopTest == 2: | |
1322 # look at the l_inf change from previous iterate | |
1323 if k >= 1 and numpy.linalg.norm( xk - xold, 'inf' ) <= TolVar: | |
1324 break | |
1325 #end | |
1326 #end | |
1327 #fmean = [fx,fmean]; | |
1328 fmean = numpy.hstack((fx,fmean)); | |
1329 if (len(fmean) > 10): | |
1330 fmean = fmean[:10] | |
1331 | |
1332 | |
1333 | |
1334 #--- Updating zk | |
1335 | |
1336 apk = 0.5*(k+1); | |
1337 Ak = Ak + apk; | |
1338 tauk = 2/(k+3); | |
1339 | |
1340 wk = apk*df + wk; | |
1341 | |
1342 # | |
1343 # zk = Argmin_x Lmu/2 ||b - Ax||_l2^2 + Lmu/2||x - xplug ||_2^2+ <wk,x-xk> | |
1344 # s.t. ||b-Ax||_l2 < delta | |
1345 # | |
1346 | |
1347 cp = xplug - 1.0/Lmu*wk; | |
1348 | |
1349 Acp = Afun( cp ); | |
1350 #if ~isempty( AAtinv ) && isempty(USV) | |
1351 if AAtinv is not None and USV is None: | |
1352 AtAcp = Atfun( AAtinv_fun( Acp ) ); | |
1353 else: | |
1354 AtAcp = Atfun( Acp ); | |
1355 #end | |
1356 | |
1357 if delta > 0: | |
1358 #if ~isempty(USV) | |
1359 if USV is not None: | |
1360 # Make the substitution wk <-- wk/K | |
1361 | |
1362 # dfp = (xplug - Lmu1*wk); # = cp | |
1363 # Adfp= (Axplug - Acp); | |
1364 dfp = cp.copy() | |
1365 Adfp = Acp.copy() | |
1366 bp = b.copy(); | |
1367 deltap = delta; | |
1368 # dfp = SLmu*xplug - SLmu1*wk; | |
1369 # bp = SLmu*b; | |
1370 # deltap = SLmu*delta; | |
1371 | |
1372 # See if we even need to project: | |
1373 if numpy.linalg.norm( Adfp - bp ) < deltap: | |
1374 zk = dfp.copy(); | |
1375 Azk = Adfp.copy(); | |
1376 else: | |
1377 projection,projIter,lambdaZ = fastProjection(Q,S,V,dfp,bp,deltap, .999*lambdaZ ) | |
1378 if lambdaZ > 0: | |
1379 print 'lambda is positive!' | |
1380 raise | |
1381 zk = projection.copy(); | |
1382 # zk = SLmu1*projection; | |
1383 Azk = Afun(zk); | |
1384 | |
1385 # DEBUGGING: | |
1386 # if projIter == 50 | |
1387 # fprintf('\n Maxed out iterations at z\n'); | |
1388 # keyboard | |
1389 # end | |
1390 #end | |
1391 else: | |
1392 lambdaa = max(0,Lmu*(numpy.linalg.norm(b-Acp)/delta - 1)); | |
1393 gamma = lambdaa/(lambdaa + Lmu); | |
1394 zk = lambdaa/Lmu*(1-gamma)*Atb + cp - gamma*AtAcp; | |
1395 # for calculating the residual, we'll avoid calling A() | |
1396 # by storing A(zk) here (using A'*A = I): | |
1397 Azk = lambdaa/Lmu*(1-gamma)*b + Acp - gamma*Acp; | |
1398 #end | |
1399 else: | |
1400 # if delta is 0, this is simplified: | |
1401 zk = AtAAtb + cp - AtAcp; | |
1402 Azk = b; | |
1403 #end | |
1404 | |
1405 # DEBUGGING | |
1406 # if norm( Ayk - b ) > (1.05)*delta | |
1407 # fprintf('\nAzk failed projection test\n'); | |
1408 # keyboard; | |
1409 # end | |
1410 | |
1411 #--- Updating xk | |
1412 | |
1413 xkp = tauk*zk + (1-tauk)*yk; | |
1414 xold = xk.copy(); | |
1415 xk = xkp.copy(); | |
1416 Axk = tauk*Azk + (1-tauk)*Ayk; | |
1417 | |
1418 #if ~mod(k,10), Axk = Afun(xk); end # otherwise slowly lose precision | |
1419 if not numpy.mod(k,10): | |
1420 Axk = Afun(xk) | |
1421 # DEBUG | |
1422 # if norm(Axk - Afun(xk) ) > 1e-6, disp('error with Axk'); keyboard; end | |
1423 | |
1424 #--- display progress if desired | |
1425 #if ~mod(k+1,Verbose ) | |
1426 if not numpy.mod(k+1,Verbose): | |
1427 #printf('Iter: #3d ~ fmu: #.3e ~ Rel. Variation of fmu: #.2e ~ Residual: #.2e',k+1,fx,qp,residuals(k+1,1) ); | |
1428 print 'Iter: ',k+1,' ~ fmu: ',fx,' ~ Rel. Variation of fmu: ',qp,' ~ Residual:',residuals[k+1,0] | |
1429 #--- if user has supplied a function to calculate the error, | |
1430 # apply it to the current iterate and dislay the output: | |
1431 #if DISPLAY_ERROR, printf(' ~ Error: #.2e',errFcn(xk)); end | |
1432 if DISPLAY_ERROR: | |
1433 print ' ~ Error:',errFcn(xk) | |
1434 #end | |
1435 if abs(fx)>1e20 or abs(residuals[k,0]) >1e20 or numpy.isnan(fx): | |
1436 #error('Nesta: possible divergence or NaN. Bad estimate of ||A''A||?'); | |
1437 print 'Nesta: possible divergence or NaN. Bad estimate of ||A''A||?' | |
1438 raise | |
1439 #end | |
1440 | |
1441 #end | |
1442 | |
1443 niter = k+1; | |
1444 | |
1445 #-- truncate output vectors | |
1446 residuals = residuals[:niter,:] | |
1447 #if RECORD_DATA, outputData = outputData(1:niter,:); end | |
1448 if RECORD_DATA: | |
1449 outputData = outputData[:niter,:] | |
1450 | |
1451 return xk,niter,residuals,outputData,opts | |
1452 | |
1453 | |
1454 ############ PERFORM THE L1 CONSTRAINT ################## | |
1455 | |
1456 #function [df,fx,val,uk] = Perform_L1_Constraint(xk,mu,U,Ut) | |
1457 def Perform_L1_Constraint(xk,mu,U,Ut): | |
1458 | |
1459 #if isa(U,'function_handle') | |
1460 if hasattr(U,'__call__'): | |
1461 uk = U(xk); | |
1462 else: | |
1463 uk = numpy.dot(U,xk) | |
1464 #end | |
1465 fx = uk.copy() | |
1466 | |
1467 #uk = uk./max(mu,abs(uk)); | |
1468 uk = uk / max(mu,abs(uk)) | |
1469 #val = real(uk'*fx); | |
1470 val = numpy.real(numpy.vdot(uk,fx)) | |
1471 #fx = real(uk'*fx - mu/2*norm(uk)^2); | |
1472 fx = numpy.real(numpy.vdot(uk,fx) - mu/2.*numpy.linalg.norm(uk)**2); | |
1473 | |
1474 #if isa(Ut,'function_handle') | |
1475 if hasattr(U,'__call__'): | |
1476 df = Ut(uk); | |
1477 else: | |
1478 #df = U'*uk; | |
1479 df = numpy.dot(U.T,uk) | |
1480 #end | |
1481 return df,fx,val,uk | |
1482 #end | |
1483 | |
1484 # Nic: TODO: TV not implemented yet! | |
1485 ############ PERFORM THE TV CONSTRAINT ################## | |
1486 #function [df,fx] = Perform_TV_Constraint(xk,mu,Dv,Dh,D,U,Ut) | |
1487 # if isa(U,'function_handle') | |
1488 # x = U(xk); | |
1489 # else | |
1490 # x = U*xk; | |
1491 # end | |
1492 # df = zeros(size(x)); | |
1493 # | |
1494 # Dhx = Dh*x; | |
1495 # Dvx = Dv*x; | |
1496 # | |
1497 # tvx = sum(sqrt(abs(Dhx).^2+abs(Dvx).^2)); | |
1498 # w = max(mu,sqrt(abs(Dhx).^2 + abs(Dvx).^2)); | |
1499 # uh = Dhx ./ w; | |
1500 # uv = Dvx ./ w; | |
1501 # u = [uh;uv]; | |
1502 # fx = real(u'*D*x - mu/2 * 1/numel(u)*sum(u'*u)); | |
1503 # if isa(Ut,'function_handle') | |
1504 # df = Ut(D'*u); | |
1505 # else | |
1506 # df = U'*(D'*u); | |
1507 # end | |
1508 #end | |
1509 | |
1510 | |
1511 #function [x,k,l] = fastProjection( U, S, V, y, b, epsilon, lambda0, DISP ) | |
1512 def fastProjection( U, S, V, y, b, epsilon, lambda0=0, DISP=False ): | |
1513 # [x,niter,lambda] = fastProjection(U, S, V, y, b, epsilon, [lambda0], [DISP] ) | |
1514 # | |
1515 # minimizes || x - y || | |
1516 # such that || Ax - b || <= epsilon | |
1517 # | |
1518 # where USV' = A (i.e the SVD of A) | |
1519 # | |
1520 # The optional input "lambda0" is a guess for the Lagrange parameter | |
1521 # | |
1522 # Warning: for speed, does not calculate A(y) to see if x = y is feasible | |
1523 # | |
1524 # NESTA Version 1.1 | |
1525 # See also Core_Nesterov | |
1526 | |
1527 # Written by Stephen Becker, September 2009, srbecker@caltech.edu | |
1528 | |
1529 #--------------------- | |
1530 # Original Matab code: | |
1531 | |
1532 #DEBUG = true; | |
1533 #if nargin < 8 | |
1534 # DISP = false; | |
1535 #end | |
1536 ## -- Parameters for Newton's method -- | |
1537 #MAXIT = 70; | |
1538 #TOL = 1e-8 * epsilon; | |
1539 ## TOL = max( TOL, 10*eps ); # we can't do better than machine precision | |
1540 # | |
1541 #m = size(U,1); | |
1542 #n = size(V,1); | |
1543 #mn = min([m,n]); | |
1544 #if numel(S) > mn^2, S = diag(diag(S)); end # S should be a small square matrix | |
1545 #r = size(S); | |
1546 #if size(U,2) > r, U = U(:,1:r); end | |
1547 #if size(V,2) > r, V = V(:,1:r); end | |
1548 # | |
1549 #s = diag(S); | |
1550 #s2 = s.^2; | |
1551 # | |
1552 ## What we want to do: | |
1553 ## b = b - A*y; | |
1554 ## bb = U'*b; | |
1555 # | |
1556 ## if A doesn't have full row rank, then b may not be in the range | |
1557 #if size(U,1) > size(U,2) | |
1558 # bRange = U*(U'*b); | |
1559 # bNull = b - bRange; | |
1560 # epsilon = sqrt( epsilon^2 - norm(bNull)^2 ); | |
1561 #end | |
1562 #b = U'*b - S*(V'*y); # parenthesis is very important! This is expensive. | |
1563 # | |
1564 ## b2 = b.^2; | |
1565 #b2 = abs(b).^2; # for complex data | |
1566 #bs2 = b2.*s2; | |
1567 #epsilon2 = epsilon^2; | |
1568 # | |
1569 ## The following routine need to be fast | |
1570 ## For efficiency (at cost of transparency), we are writing the calculations | |
1571 ## in a way that minimize number of operations. The functions "f" | |
1572 ## and "fp" represent f and its derivative. | |
1573 # | |
1574 ## f = @(lambda) sum( b2 .*(1-lambda*s2).^(-2) ) - epsilon^2; | |
1575 ## fp = @(lambda) 2*sum( bs2 .*(1-lambda*s2).^(-3) ); | |
1576 #if nargin < 7, lambda0 = 0; end | |
1577 #l = lambda0; oldff = 0; | |
1578 #one = ones(m,1); | |
1579 #alpha = 1; # take full Newton steps | |
1580 #for k = 1:MAXIT | |
1581 # # make f(l) and fp(l) as efficient as possible: | |
1582 # ls = one./(one-l*s2); | |
1583 # ls2 = ls.^2; | |
1584 # ls3 = ls2.*ls; | |
1585 # ff = b2.'*ls2; # should be .', not ', even for complex data | |
1586 # ff = ff - epsilon2; | |
1587 # fpl = 2*( bs2.'*ls3 ); # should be .', not ', even for complex data | |
1588 ## ff = f(l); # this is a little slower | |
1589 ## fpl = fp(l); # this is a little slower | |
1590 # d = -ff/fpl; | |
1591 # if DISP, fprintf('#2d, lambda is #5.2f, f(lambda) is #.2e, f''(lambda) is #.2e\n',... | |
1592 # k,l,ff,fpl ); end | |
1593 # if abs(ff) < TOL, break; end # stopping criteria | |
1594 # l_old = l; | |
1595 # if k>2 && ( abs(ff) > 10*abs(oldff+100) ) #|| abs(d) > 1e13 ) | |
1596 # l = 0; alpha = 1/2; | |
1597 ## oldff = f(0); | |
1598 # oldff = sum(b2); oldff = oldff - epsilon2; | |
1599 # if DISP, disp('restarting'); end | |
1600 # else | |
1601 # if alpha < 1, alpha = (alpha+1)/2; end | |
1602 # l = l + alpha*d; | |
1603 # oldff = ff; | |
1604 # if l > 0 | |
1605 # l = 0; # shouldn't be positive | |
1606 # oldff = sum(b2); oldff = oldff - epsilon2; | |
1607 # end | |
1608 # end | |
1609 # if l_old == l && l == 0 | |
1610 # if DISP, disp('Making no progress; x = y is probably feasible'); end | |
1611 # break; | |
1612 # end | |
1613 #end | |
1614 ## if k == MAXIT && DEBUG, disp('maxed out iterations'); end | |
1615 #if l < 0 | |
1616 # xhat = -l*s.*b./( 1 - l*s2 ); | |
1617 # x = V*xhat + y; | |
1618 #else | |
1619 # # y is already feasible, so no need to project | |
1620 # l = 0; | |
1621 # x = y; | |
1622 #end | |
1623 | |
1624 # End of original Matab code | |
1625 #--------------------- | |
1626 | |
1627 DEBUG = True; | |
1628 #if nargin < 8 | |
1629 # DISP = false; | |
1630 #end | |
1631 # -- Parameters for Newton's method -- | |
1632 MAXIT = 70; | |
1633 TOL = 1e-8 * epsilon; | |
1634 # TOL = max( TOL, 10*eps ); # we can't do better than machine precision | |
1635 | |
1636 #m = size(U,1); | |
1637 #n = size(V,1); | |
1638 m = U.shape[0] | |
1639 n = V.shape[0] | |
1640 mn = min(m,n); | |
1641 #if numel(S) > mn^2, S = diag(diag(S)); end # S should be a small square matrix | |
1642 if S.size > mn**2: | |
1643 S = numpy.diag(numpy.diag(S)) | |
1644 #r = size(S); | |
1645 r = S.shape | |
1646 #if size(U,2) > r, U = U(:,1:r); end | |
1647 if U.shape[1] > r: | |
1648 U = U[:,r] | |
1649 #if size(V,2) > r, V = V(:,1:r); end | |
1650 if V.shape[1] > r: | |
1651 V = V[:,r] | |
1652 | |
1653 s = numpy.diag(S); | |
1654 s2 = s**2; | |
1655 | |
1656 # What we want to do: | |
1657 # b = b - A*y; | |
1658 # bb = U'*b; | |
1659 | |
1660 # if A doesn't have full row rank, then b may not be in the range | |
1661 #if size(U,1) > size(U,2) | |
1662 if U.shape[0] > U.shape[1]: | |
1663 #bRange = U*(U'*b); | |
1664 bRange = numpy.dot(U, numpy.dot(U.T, b)) | |
1665 bNull = b - bRange; | |
1666 epsilon = math.sqrt( epsilon**2 - numpy.linalg.norm(bNull)**2 ); | |
1667 #end | |
1668 #b = U'*b - S*(V'*y); # parenthesis is very important! This is expensive. | |
1669 b = numpy.dot(U.T,b) - numpy.dot(S, numpy.dot(V.T,y)) | |
1670 | |
1671 # b2 = b.^2; | |
1672 b2 = abs(b)**2; # for complex data | |
1673 bs2 = b2**s2; | |
1674 epsilon2 = epsilon**2; | |
1675 | |
1676 # The following routine need to be fast | |
1677 # For efficiency (at cost of transparency), we are writing the calculations | |
1678 # in a way that minimize number of operations. The functions "f" | |
1679 # and "fp" represent f and its derivative. | |
1680 | |
1681 # f = @(lambda) sum( b2 .*(1-lambda*s2).^(-2) ) - epsilon^2; | |
1682 # fp = @(lambda) 2*sum( bs2 .*(1-lambda*s2).^(-3) ); | |
1683 | |
1684 #if nargin < 7, lambda0 = 0; end | |
1685 l = lambda0; | |
1686 oldff = 0; | |
1687 one = numpy.ones(m); | |
1688 alpha = 1; # take full Newton steps | |
1689 #for k = 1:MAXIT | |
1690 for k in numpy.arange(MAXIT): | |
1691 # make f(l) and fp(l) as efficient as possible: | |
1692 #ls = one./(one-l*s2); | |
1693 ls = one/(one-l*s2) | |
1694 ls2 = ls**2; | |
1695 ls3 = ls2**ls; | |
1696 #ff = b2.'*ls2; # should be .', not ', even for complex data | |
1697 ff = numpy.dot(b2.conj(), ls2) | |
1698 ff = ff - epsilon2; | |
1699 #fpl = 2*( bs2.'*ls3 ); # should be .', not ', even for complex data | |
1700 fpl = 2 * numpy.dot(bs2.conj(),ls3) | |
1701 # ff = f(l); # this is a little slower | |
1702 # fpl = fp(l); # this is a little slower | |
1703 d = -ff/fpl; | |
1704 # if DISP, fprintf('#2d, lambda is #5.2f, f(lambda) is #.2e, f''(lambda) is #.2e\n',k,l,ff,fpl ); end | |
1705 if DISP: | |
1706 print k,', lambda is ',l,', f(lambda) is ',ff,', f''(lambda) is',fpl | |
1707 #if abs(ff) < TOL, break; end # stopping criteria | |
1708 if abs(ff) < TOL: | |
1709 break | |
1710 l_old = l.copy(); | |
1711 if k>2 and ( abs(ff) > 10*abs(oldff+100) ): #|| abs(d) > 1e13 ) | |
1712 l = 0; | |
1713 alpha = 1.0/2.0; | |
1714 # oldff = f(0); | |
1715 oldff = b2.sum(); oldff = oldff - epsilon2; | |
1716 if DISP: | |
1717 print 'restarting' | |
1718 else: | |
1719 if alpha < 1: | |
1720 alpha = (alpha+1.0)/2.0 | |
1721 l = l + alpha*d; | |
1722 oldff = ff | |
1723 if l > 0: | |
1724 l = 0; # shouldn't be positive | |
1725 oldff = b2.sum() | |
1726 oldff = oldff - epsilon2; | |
1727 #end | |
1728 #end | |
1729 if l_old == l and l == 0: | |
1730 #if DISP, disp('Making no progress; x = y is probably feasible'); end | |
1731 if DISP: | |
1732 print 'Making no progress; x = y is probably feasible' | |
1733 break; | |
1734 #end | |
1735 #end | |
1736 # if k == MAXIT && DEBUG, disp('maxed out iterations'); end | |
1737 if l < 0: | |
1738 #xhat = -l*s.*b./( 1 - l*s2 ); | |
1739 xhat = numpy.dot(-l, s*b/( 1. - numpy.dot(l,s2) ) ) | |
1740 #x = V*xhat + y; | |
1741 x = numpy.dot(V,xhat) + y | |
1742 else: | |
1743 # y is already feasible, so no need to project | |
1744 l = 0; | |
1745 x = y.copy(); | |
1746 #end | |
1747 return x,k,l |