# HG changeset patch
# User Daniele Barchiesi <daniele.barchiesi@eecs.qmul.ac.uk>
# Date 1317908021 -3600
# Node ID 68fb71aa5339c271914c8e679f845517fd981448
# Parent  290cca7d346912ea544acb1ed287e83b8ac3c07f
Added dictionary decorrelation functions and test script for Letters paper.

diff -r 290cca7d3469 -r 68fb71aa5339 DL/two-step DL/SMALL_two_step_DL.m
--- a/DL/two-step DL/SMALL_two_step_DL.m	Thu Sep 29 09:46:52 2011 +0100
+++ b/DL/two-step DL/SMALL_two_step_DL.m	Thu Oct 06 14:33:41 2011 +0100
@@ -114,18 +114,16 @@
     [dico, solver.solution] = dico_update(dico, sig, solver.solution, ...
         typeUpdate, flow, learningRate);
     dico = normcols(dico);
-        switch DL.param.decFcn
-            case 'mailhe'
-                dico = dico_decorr_symetric(dico, mu, solver.solution);
-            case 'tropp'
+        switch lower(DL.param.decFcn)
+            case 'ink-svd'
+                dico = dico_decorr_symetric(dico,mu,solver.solution);
+            case 'grassmannian'
                 [n m] = size(dico);
-                dico = grassmannian(n,m,[],[],[],dico);
-			case 'barchiesi'
-				[n m] = size(dico);
-				params.nIter = 100;
-				dico = iterativeprojections(n,m,[],[],[],dico);
-				[~, ~, W] = rotatematrix(Problem.b,dico*solver.solution,'conjgradlie',params);
-				dico = W*dico;
+                dico = grassmannian(n,m,[],0.9,0.99,dico);
+			case 'shrinkgram'
+				dico = shrinkgram(dico,mu);
+			case 'iterproj'
+				dico = iterativeprojections(dico,mu,Problem.b1,solver.solution);
             otherwise
         end
     
diff -r 290cca7d3469 -r 68fb71aa5339 examples/SMALL_test_coherence2.m
--- a/examples/SMALL_test_coherence2.m	Thu Sep 29 09:46:52 2011 +0100
+++ b/examples/SMALL_test_coherence2.m	Thu Oct 06 14:33:41 2011 +0100
@@ -1,13 +1,13 @@
 clc, clear, close all
 
 %% Parameteres
+nTrials   = 10;										%number of trials of the experiment
+
 % Dictionary learning parameters
 toolbox   = 'TwoStepDL';							%dictionary learning toolbox
-dicUpdate = {'ksvd'};								%dictionary learning updates
-dicDecorr = {'none','mailhe','tropp','barchiesi'};	%dictionary decorrelation methods
-minCoherence = linspace(0.1,1,1);					%coherence levels
-minCoherence = 0.4;
-%dicDecorr = {'barchiesi'};
+dicUpdate = 'ksvd';									%dictionary learning updates
+dicDecorr = {'iterproj','ink-svd','shrinkgram'};	%dictionary decorrelation methods
+minCoherence = linspace(0.1,1,10);					%coherence levels
 
 iterNum   = 20;				%number of iterations
 epsilon   = 1e-6;			%tolerance level
@@ -19,20 +19,16 @@
 blockSize = 256;						%size of audio frames
 overlap   = 0.5;						%overlap between consecutive frames
 
-
 % Dependent parameters
 nActiveAtoms = fix(blockSize/100*percActiveAtoms); %number of active atoms
 
 % Initial dictionaries
-dctDict = dictionary('dct',blockSize,dictSize);
-dctDict = dctDict.phi;
 gaborParam = struct('N',blockSize,'redundancyFactor',2,'wd',@rectwin);
 gaborDict = Gabor_Dictionary(gaborParam);
-initDicts = {[],dctDict,gaborDict};
-initDicts = {[]};
+initDicts = {[],gaborDict};
 
 %% Generate audio approximation problem
-signal			 = buffer(signal,blockSize,blockSize*overlap,@rectwin);
+signal			 = buffer(signal,blockSize,blockSize*overlap,@rectwin);	%buffer frames of audio into columns of the matrix S
 SMALL.Problem.b  = signal.S;
 SMALL.Problem.b1 = SMALL.Problem.b; % copy signals from training set b to test set b1 (needed for later functions)
 
@@ -42,20 +38,19 @@
 
 
 %% Test
-nDicUpdates = length(dicUpdate);		%number of dictionary updates
 nDecorrAlgs = length(dicDecorr);		%number of decorrelation algorithms
 nCorrLevels = length(minCoherence);		%number of coherence levels
 nInitDicts  = length(initDicts);		%number of initial dictionaries
 
-SMALL.DL(nInitDicts,nCorrLevels,nDecorrAlgs,nDicUpdates) = SMALL_init_DL(toolbox); %create dictionary learning structures
-for iInitDicts=1:nInitDicts
-	for iCorrLevels=1:nCorrLevels
-		for iDecorrAlgs=1:nDecorrAlgs
-			for iDicUpdates=1:nDicUpdates
-				SMALL.DL(iInitDicts,iCorrLevels,iDecorrAlgs,iDicUpdates).toolbox = toolbox;
-				SMALL.DL(iInitDicts,iCorrLevels,iDecorrAlgs,iDicUpdates).name = dicUpdate{iDicUpdates};
-				SMALL.DL(iInitDicts,iCorrLevels,iDecorrAlgs,iDicUpdates).profile = true;
-				SMALL.DL(iInitDicts,iCorrLevels,iDecorrAlgs,iDicUpdates).param = ...
+SMALL.DL(nTrials,nInitDicts,nCorrLevels,nDecorrAlgs) = SMALL_init_DL(toolbox); %create dictionary learning structures
+for iTrial=1:nTrials
+	for iInitDicts=1:nInitDicts
+		for iCorrLevels=1:nCorrLevels
+			for iDecorrAlgs=1:nDecorrAlgs
+				SMALL.DL(iTrial,iInitDicts,iCorrLevels,iDecorrAlgs).toolbox = toolbox;
+				SMALL.DL(iTrial,iInitDicts,iCorrLevels,iDecorrAlgs).name = dicUpdate;
+				SMALL.DL(iTrial,iInitDicts,iCorrLevels,iDecorrAlgs).profile = true;
+				SMALL.DL(iTrial,iInitDicts,iCorrLevels,iDecorrAlgs).param = ...
 					struct( 'data',SMALL.Problem.b,...
 					'Tdata',nActiveAtoms,...
 					'dictsize',dictSize,...
@@ -66,8 +61,9 @@
 					'coherence',minCoherence(iCorrLevels),...
 					'initdict',initDicts(iInitDicts));
 				
-				SMALL.DL(iInitDicts,iCorrLevels,iDecorrAlgs,iDicUpdates) = ...
-					SMALL_learn(SMALL.Problem,SMALL.DL(iInitDicts,iCorrLevels,iDecorrAlgs,iDicUpdates));
+				SMALL.DL(iTrial,iInitDicts,iCorrLevels,iDecorrAlgs) = ...
+					SMALL_learn(SMALL.Problem,SMALL.DL(iTrial,iInitDicts,iCorrLevels,iDecorrAlgs));
+				save('SMALL_DL','SMALL');
 			end
 		end
 	end
@@ -75,23 +71,23 @@
 
 %% Evaluate coherence and snr of representation for the various methods
 sr = zeros(size(SMALL.DL));				%signal to noise ratio
-mu = zeros(size(SMALL.DL));				%coherence
+mu = zeros(iTrial,iInitDicts,iCorrLevels,iDecorrAlgs,blockSize);	%cumulative coherence
 dic(size(SMALL.DL)) = dictionary;		%initialise dictionary objects
-for iInitDict=1:nInitDicts
-	for iCorrLevels=1:nCorrLevels
-		for iDecorrAlgs=1:nDecorrAlgs
-			for iDicUpdates=1:nDicUpdates
+for iTrial=1:nTrials
+	for iInitDicts=1:nInitDicts
+		for iCorrLevels=1:nCorrLevels
+			for iDecorrAlgs=1:nDecorrAlgs
 				%Sparse representation
-				SMALL.Problem.A = SMALL.DL(iInitDicts,iCorrLevels,iDecorrAlgs,iDicUpdates).D;
+				SMALL.Problem.A = SMALL.DL(iTrial,iInitDicts,iCorrLevels,iDecorrAlgs).D;
 				tempSolver = SMALL_solve(SMALL.Problem,solver);
 				%calculate snr
-				sr(iInitDicts,iCorrLevels,iDecorrAlgs,iDicUpdates) = ...
-					snr(SMALL.Problem.b,SMALL.DL(iInitDicts,iCorrLevels,iDecorrAlgs,iDicUpdates).D*tempSolver.solution);
+				sr(iTrial,iInitDicts,iCorrLevels,iDecorrAlgs) = ...
+					snr(SMALL.Problem.b,SMALL.DL(iTrial,iInitDicts,iCorrLevels,iDecorrAlgs).D*tempSolver.solution);
 				%calculate mu
-				dic(iInitDicts,iCorrLevels,iDecorrAlgs,iDicUpdates) = ...
-					dictionary(SMALL.DL(iInitDicts,iCorrLevels,iDecorrAlgs,iDicUpdates).D);
-				mu(iInitDicts,iCorrLevels,iDecorrAlgs,iDicUpdates) = ...
-					dic(iInitDicts,iCorrLevels,iDecorrAlgs,iDicUpdates).coherence;
+				dic(iTrial,iInitDicts,iCorrLevels,iDecorrAlgs) = ...
+					dictionary(SMALL.DL(iTrial,iInitDicts,iCorrLevels,iDecorrAlgs).D);
+				mu(iTrial,iInitDicts,iCorrLevels,iDecorrAlgs,:) = ...
+					dic(iTrial,iInitDicts,iCorrLevels,iDecorrAlgs).cumcoherence;
 			end
 		end
 	end
@@ -99,15 +95,15 @@
 
 %% Plot results
 minMu = sqrt((dictSize-blockSize)/(blockSize*(dictSize-1)));	%lowe bound on coherence
-initDictsNames = {'Data','DCT','Gabor'};						
-dicDecorrNames = {'K-SVD','INK-SVD','Grassmannian','New'};
-lineStyles     = {'ks-','kd-','ko-','k*-'};
+initDictsNames = {'Data','Gabor'};
+dicDecorrNames = {'Iter. Proj. + Rotation','INK-SVD','Iter. Proj.'};
+lineStyles     = {'ks-','kd-','ko-'};
 for iInitDict=1:nInitDicts
 	figure, hold on, grid on
 	title([initDictsNames{iInitDict} ' Initialisation']);
 	plot([1 1],[0 25],'k-');
 	for iDecorrAlgs=1:nDecorrAlgs
-		plot(mu(iInitDicts,:,iDecorrAlgs,1),sr(iInitDicts,:,iDecorrAlgs,1),...
+		plot(mu(1,iInitDict,:,iDecorrAlgs,1),sr(1,iInitDict,:,iDecorrAlgs),...
 			lineStyles{iDecorrAlgs});
 	end
 	plot([minMu minMu],[0 25],'k--')
diff -r 290cca7d3469 -r 68fb71aa5339 util/classes/@dictionary/plotcumcoherence.m
--- a/util/classes/@dictionary/plotcumcoherence.m	Thu Sep 29 09:46:52 2011 +0100
+++ b/util/classes/@dictionary/plotcumcoherence.m	Thu Oct 06 14:33:41 2011 +0100
@@ -1,10 +1,42 @@
-function plotcumcoherence(obj)
-mu = cumcoherence(obj);
-v = conv(mu,[1 1]);
-ind = find(v<1, 1, 'last');
-plot(1:obj.len,mu);
-hold on
-line([ind ind], [min(mu) max(mu)],'Color','red');
-title(['Minimum allowed sparsity (Tanner):' num2str(ind/obj.len)]);
-grid on
-end
\ No newline at end of file
+function plotcumcoherence(obj,mu)
+if ~exist('mu','var') || isempty(mu), mu = cumcoherence(obj); end
+[d N] = size(obj.phi);
+
+% Plot cumulative coherence with lower and upper bounds
+mumin = (1:d)*sqrt((N-d)/(d*(N-1)));
+mumax = (1:d);
+figure,
+subplot(1,6,1:2)
+hold on, grid on
+plot(1,mumax(1),'k-s');
+plot(1,mu(1),'ko');
+plot(1,mumin(1),'k-d')
+set(gca,'XScale','log','YScale','log');
+axis tight
+ylabel('\mu');
+ylim([mumin(1) 10])
+
+subplot(1,6,3:6)
+hold on, grid on
+plot(2:d,mumax(2:end),'k-s');
+plot(2:d,mu(2:end),'k-o');
+plot(2:d,mumin(2:end),'k-d');
+set(gca,'XScale','log','YScale','log');
+axis tight
+xlabel('k');
+ylabel('\mu(k)');
+ylim([mumin(1) 10])
+
+% Plot recovery bounds
+plot(2:d,1-mu(1:d-1),'r-o')
+plot(2:d,1-mumin(1:d-1)','r-d')
+plot([2 d],[1/3 1/3],'b');
+legend('\mu_{max}(k)','\mu(k)','\mu_{min}(k)','Exact-Sparse \mu','Exact-Sparse \mu_{min}','Sparse');
+
+
+% v = conv(mu,[1 1]);
+% ind = find(v<1, 1, 'last');
+% 
+% line([ind ind], [min(mu) max(mu)],'Color','red');
+% title(['Minimum allowed sparsity (Tanner):' num2str(ind/obj.len)]);
+end
diff -r 290cca7d3469 -r 68fb71aa5339 util/classes/dictionaryMatrices/grassmannian.m
--- a/util/classes/dictionaryMatrices/grassmannian.m	Thu Sep 29 09:46:52 2011 +0100
+++ b/util/classes/dictionaryMatrices/grassmannian.m	Thu Oct 06 14:33:41 2011 +0100
@@ -12,7 +12,7 @@
 
 if ~exist('verb','var')  || isempty(verb),  verb = false; end %verbose output
 if ~exist('initA','var') || isempty(initA), initA = randn(n,m); end %initial matrix
-if ~exist('dd2','var')   || isempty(dd2),   dd2 = 0.99; end %shrinking factor
+if ~exist('dd2','var')   || isempty(dd2),   dd2 = 0.9; end %shrinking factor
 if ~exist('dd1','var')   || isempty(dd1),   dd1 = 0.9; end %percentage of coherences to be shrinked
 if ~exist('nIter','var') || isempty(nIter), nIter = 10; end %number of iterations
 
@@ -42,19 +42,6 @@
 	end
 end
 
-% [~, Sigma_gram V_gram] = svd(G); %calculate svd decomposition of gramian
-
-% A = normc(A);					%normalise dictionary
-
 [V_gram Sigma_gram] = svd(G);				%calculate svd decomposition of gramian
 Sigma_new = sqrt(Sigma_gram(1:n,:)).*sign(Sigma); %calculate singular values of dictionary
 A = Uinit*Sigma_new*V_gram';				%update dictionary
-
-% param.step  = 0.01;
-% param.reg   = 0.01;
-% param.nIter = 20;
-% A = rotatematrix(initA,A,'linesearchlie',param);
-
-% %% Debug visualization function
-% function plotcart2d(A)
-% compass(A(1,:),A(2,:));
diff -r 290cca7d3469 -r 68fb71aa5339 util/classes/dictionaryMatrices/iterativeprojections.m
--- a/util/classes/dictionaryMatrices/iterativeprojections.m	Thu Sep 29 09:46:52 2011 +0100
+++ b/util/classes/dictionaryMatrices/iterativeprojections.m	Thu Oct 06 14:33:41 2011 +0100
@@ -1,60 +1,97 @@
-function [A G res muMin] = grassmannian(n,m,nIter,dd1,dd2,initA,verb)
+function [dic mus srs] = iterativeprojections(dic,mu,Y,X,params)
 % grassmanian attempts to create an n by m matrix with minimal mutual
 % coherence using an iterative projection method.
 %
-% [A G res] = grassmanian(n,m,nIter,dd1,dd2,initA)
+% REFERENCE
 %
-% REFERENCE
-% M. Elad, Sparse and Redundant Representations, Springer 2010.
 
 %% Parameters and Defaults
-error(nargchk(2,7,nargin));
+if ~nargin, testiterativeprojections; return; end
 
-if ~exist('verb','var')  || isempty(verb),  verb = false; end %verbose output
-if ~exist('initA','var') || isempty(initA), initA = randn(n,m); end %initial matrix
-if ~exist('dd2','var')   || isempty(dd2),   dd2 = 0.99; end %shrinking factor
-if ~exist('dd1','var')   || isempty(dd1),   dd1 = 0.9; end %percentage of coherences to be shrinked
-if ~exist('nIter','var') || isempty(nIter), nIter = 10; end %number of iterations
+if ~exist('params','var') || isempty(param), params = struct; end
+if ~isfield(params,'nIter'), params.nIter = 10; end %number of iterations
+if ~isfield(params,'eps'),	 params.eps = 1e-9;	 end %tolerance level
+[n m] = size(dic);
 
-%% Main algo
-A = normc(initA); %normalise columns
-[Uinit Sigma] = svd(A);
-G = A'*A; %gram matrix
+SNR = @(dic) snr(Y,dic*X);									 %SNR function
+MU  = @(dic) max(max(abs((dic'*dic)-diag(diag(dic'*dic))))); %coherence function
 
-muMin = sqrt((m-n)/(n*(m-1)));              %Lower bound on mutual coherence (equiangular tight frame)
-res = zeros(nIter,1);
-if verb
-	fprintf(1,'Iter		mu_min		mu \n');
+%% Main algorithm
+dic = normc(dic);	%normalise columns
+alpha = m/n;		%ratio between number of atoms and ambient dimension
+
+mus = zeros(params.nIter,1);		%coherence at each iteration
+srs = zeros(params.nIter,1);		%signal to noise ratio at each iteration
+iIter = 1;
+while iIter<=params.nIter && MU(dic)>mu
+	fprintf(1,'Iteration number %u\n', iIter);
+	% calculate snr and coherence
+	mus(iIter) = MU(dic);
+	srs(iIter) = SNR(dic);
+	
+	% calculate gram matrix
+	G = dic'*dic;
+	
+	% project into the structural constraint set
+	H = zeros(size(G));				%initialise matrix
+	ind1 = find(abs(G(:))<=mu);		%find elements smaller than mu
+	ind2 = find(abs(G(:))>mu);		%find elements bigger than mu
+	H(ind1) = G(ind1);				%copy elements belonging to ind1
+	H(ind2) = mu*sign(G(ind2));		%threshold elements belonging to ind2
+	H(1:m+1:end) = 1;				%set diagonal to one
+	
+	% project into spectral constraint set
+	[~ , S, V] = svd(H);
+	%G = alpha*(V(:,1:n)*V(:,1:n)');
+	G = V(:,1:n)*S(1:n,1:n)*V(:,1:n)';
+	
+	% calculate dictionary
+	[~, S V] = svd(G);
+	dic = sqrt(S(1:n,:))*V';
+	
+	% rotate dictionary
+	options = struct('nIter',100,'step',0.001);
+	[~, ~, W] = rotatematrix(Y,dic*X,'conjgradlie',options);
+	dic = W*dic;
+	
+	iIter = iIter+1;
 end
+if iIter<params.nIter
+	mus(iIter:end) = mus(iIter);
+	srs(iIter:end) = srs(iIter);
+end
+% Test function
+function testiterativeprojections
+clc
+%define parameters
+n = 256;							%ambient dimension
+m = 512;							%number of atoms
+N = 1024;							%number of signals
+mu_min = sqrt((m-n)/(n*(m-1)));		%minimum coherence
 
-% optimise gram matrix
-for iIter = 1:nIter
-	gg  = sort(abs(G(:))); %sort inner products from less to most correlated
-	pos = find(abs(G(:))>=gg(round(dd1*(m^2-m))) & abs(G(:)-1)>1e-6); %find large elements of gram matrix
-	G(pos) = G(pos)*dd2;	%shrink large elements of gram matrix
-	[U S V] = svd(G);	%compute new SVD of gram matrix
-	S(n+1:end,1+n:end) = 0; %set small eigenvalues to zero (this ensures rank(G)<=d)
-	G = U*S*V';			%update gram matrix
-	G = diag(1./abs(sqrt(diag(G))))*G*diag(1./abs(sqrt(diag(G)))); %normalise gram matrix diagonal
-	if verb
-		Geye = G - eye(size(G));
-		fprintf(1,'%6i    %12.8f  %12.8f  \n',iIter,muMin,max(abs(Geye(:))));
-	end
-end
+%initialise data
+X = sprandn(m,N,1);					%matrix of coefficients
+phi = normc(randn(n,m));			%dictionary
+temp = randn(n);
+W = expm(0.5*(temp-temp'));			%rotation matrix
+Y = W*phi*X;						%observed signals
 
-% [~, Sigma_gram V_gram] = svd(G); %calculate svd decomposition of gramian
+%optimise dictionary
+[~, mus srs] = iterativeprojections(phi,0.2,Y,X);
 
-% A = normc(A);					%normalise dictionary
+%plot results
+nIter = length(mus);
 
-[V_gram Sigma_gram] = svd(G);				%calculate svd decomposition of gramian
-Sigma_new = sqrt(Sigma_gram(1:n,:)).*sign(Sigma); %calculate singular values of dictionary
-A = Uinit*Sigma_new*V_gram';				%update dictionary
+figure, 
+subplot(2,1,1)
+plot(1:nIter,srs,'kd-');
+xlabel('nIter');
+ylabel('snr (dB)');
+grid on
 
-% param.step  = 0.01;
-% param.reg   = 0.01;
-% param.nIter = 20;
-% A = rotatematrix(initA,A,'linesearchlie',param);
+subplot(2,1,2), hold on
+plot(1:nIter,mus,'ko-');
+plot([1 nIter],[mu_min mu_min],'k')
+grid on
+legend('\mu','\mu_{min}');
 
-% %% Debug visualization function
-% function plotcart2d(A)
-% compass(A(1,:),A(2,:));
diff -r 290cca7d3469 -r 68fb71aa5339 util/classes/dictionaryMatrices/rotatematrix.m
--- a/util/classes/dictionaryMatrices/rotatematrix.m	Thu Sep 29 09:46:52 2011 +0100
+++ b/util/classes/dictionaryMatrices/rotatematrix.m	Thu Oct 06 14:33:41 2011 +0100
@@ -5,54 +5,66 @@
 % REFERENCE
 % M.D. Plumbley, Geometrical Methods for Non-Negative ICA: Manifolds, Lie
 % Groups and Toral Subalgebra, Neurocomputing
+
+%% Parse inputs and set defaults
 if ~nargin, testrotatematrix; return, end
 
+if ~exist('param','var')  || isempty(param),  param  = struct; end
+if ~exist('method','var') || isempty(method), method = 'conjgradLie'; end
+if ~isfield(param,'nIter'), param.nIter = 100; end	%number of iterations
+if ~isfield(param,'eps'), param.eps = 1e-9; end		%tolerance level
+if ~isfield(param,'step'), param.step = 0.01; end
 
-if ~exist('method','var') || isempty(method), method = 'unconstrained'; end
+J = @(W) 0.5*norm(D-W*Phi,'fro');		%cost function
 
-J = @(W) 0.5*norm(D-W*Phi,'fro');
-cost = zeros(param.nIter,1);
+% Initialise variables
+cost = zeros(param.nIter,1);			%cost at each iteration
+W	 = eye(size(Phi,1));				%rotation matrix
+grad = ones(size(W));					%gradient
+t	 = param.step;						%step size
+Gprev = 0;								%previous gradient
+Hprev = 0;								%previous Lie search direction
+iIter = 1;								%iteration counter
 
-W   = eye(size(Phi,1));
-t = 0;
-Gprev = 0;
-Hprev = 0;
-for i=1:param.nIter
-	cost(i) = J(W);
-	grad = (W*Phi-D)*Phi';
+%% Main algorithm
+while iIter<=param.nIter && norm(grad,'fro')>eps
+	cost(iIter) = J(W);				%calculate cost
+	grad = (W*Phi-D)*Phi';			%calculate gradient
 	switch method
 		case 'unconstrained'		% gradient descent
 			eta = param.step;
 			W = W - eta*grad;		% update W by steepest descent
 		case 'tangent'				% self correcting tangent
 			eta = param.step;
-			mu  = param.reg;
-			W = W - 0.5*eta*(grad - W*grad'*W + mu*W*(W'*W-eye(size(W))));
-		case 'steepestlie'
+			W = W - 0.5*eta*(grad - W*grad'*W);
+			[U , ~, V] = svd(W);
+			W = U*V';
+		case 'steepestlie'			%steepest descent in Lie algebra
 			eta = param.step;
-			B = 2*skew(grad*W');	% calculate gradient in lie algebra
+			B = 2*skew(grad*W');	% calculate gradient in Lie algebra
 			W = expm(-eta*B)*W;		% update W by steepest descent
-		case 'linesearchlie'
-			B = 2*skew(grad*W');	% calculate gradient in lie algebra
+		case 'linesearchlie'		% line search in Lie algebra
+			B = 2*skew(grad*W');	% calculate gradient in Lie algebra
 			H = -B;					% calculate direction as negative gradient
-			t = searchline(J,H,W,t);% line search in one-parameter lie subalgebra
+			t = searchline(J,H,W,t);% line search in one-parameter Lie subalgebra
 			W = expm(t*H)*W;		% update W by line search
-		case 'conjgradlie'
-			G = 2*skew(grad*W');	% calculate gradient in lie algebra
+		case 'conjgradlie'			% conjugate gradient in Lie algebra
+			G = 2*skew(grad*W');	% calculate gradient in Lie algebra
 			H = -G + polakRibiere(G,Gprev)*Hprev; %calculate conjugate gradient direction
-			t = searchline(J,H,W,t);% line search in one-parameter lie subalgebra
+			t = searchline(J,H,W,t);% line search in one-parameter Lie subalgebra
 			W = expm(t*H)*W;		% update W by line search
-			Hprev = H;				% % save search direction
-			Gprev = G;				% % save gradient
+			Hprev = H;				% save search direction
+			Gprev = G;				% save gradient
 	end
+	iIter = iIter+1;				% update iteration counter
 end
-Dhat = W*Phi;
+Dhat = W*Phi;						%rotate matrix
+cost(iIter:end) = cost(iIter-1);	%pad cost vector
 end
-% function C = matcomm(A,B)
-% %Matrix commutator
-% C = A*B-B*A;
 
+%% Support functions
 function gamma = polakRibiere(G,Gprev)
+%Polak-Ribiere rule for conjugate direction calculation
 gamma = G(:)'*(G(:)-Gprev(:))/(norm(Gprev(:))^2);
 if isnan(gamma) || isinf(gamma)
 	gamma = 0;
@@ -60,36 +72,41 @@
 end
 
 function t = searchline(J,H,W,t)
+%Line search in one-parameter Lie subalgebra
 t = fminsearch(@(x) J(expm(x*H)*W),t);
 end
 
 function B = skew(A)
+%Skew-symmetric matrix
 B = 0.5*(A - A');
 end
 
 
+%% Test function
 function testrotatematrix
 clear, clc, close all
-n = 256;
-m = 512;
-disp('A random matrix...');
-Phi = randn(n,m);
-disp('And its rotated mate...');
-Qtrue = expm(skew(randn(n)));
-D = Qtrue*Phi; 
-disp('Now, lets try to find the right rotation...');
-param.nIter = 1000;
-param.step  = 0.001;
+n = 256;							%ambient dimension
+m = 512;							%number of atoms
+param.nIter = 300;				%number of iterations
+param.step  = 0.001;			%step size
+param.mu    = 0.01;			%regularization factor (for tangent method)
+methods = {'unconstrained','tangent','linesearchlie','conjgradlie'};
 
-cost = zeros(param.nIter,4);
-[~, cost(:,1)] = rotatematrix(D,Phi,'unconstrained',param);
-[~, cost(:,2)] = rotatematrix(D,Phi,'steepestlie',param);
-[~, cost(:,3)] = rotatematrix(D,Phi,'linesearchlie',param);
-[~, cost(:,4)] = rotatematrix(D,Phi,'conjgradlie',param);
+Phi = randn(n,m);				%initial dictionary
+Qtrue = expm(skew(randn(n)));	%rotation matrix
+D = Qtrue*Phi;					%target dictionary
+
+cost = zeros(param.nIter,length(methods));
+for iIter=1:length(methods)
+	tic
+	[~, cost(:,iIter)] = rotatematrix(D,Phi,methods{iIter},param);
+	time = toc;
+	sprintf('Method %s completed in %f seconds \n',methods{iIter},time)
+end
 
 figure, plot(cost)
 set(gca,'XScale','log','Yscale','log')
-legend({'uncons','settpestlie','linesearchlie','conjgradlie'})
+legend(methods)
 grid on
 xlabel('number of iterations')
 ylabel('J(W)')