Chris@5: function [w,h,z,u,xa] = cplcaMT( x, K, T, R, w, h, z, u, iter, sw, sh, sz, su, lw, lh, lz, lu, pa)
Chris@5: % function [w,h,xa2] = cplcaMT( x, K, T, R, w, h, z, u, iter, sw, sh, sz, su, lw, lh, lz, lu)
Chris@5: %
Chris@5: % Perform multiple-source, multiple-template SIPLCA for transcription
Chris@5: %
Chris@5: % Inputs:
Chris@5: %  x     input distribution
Chris@5: %  K     number of components
Chris@5: %  T     size of components
Chris@5: %  R     size of sources
Chris@5: %  w     initial value of p(w) [default = random]
Chris@5: %  h     initial value of p(h) [default = random]
Chris@5: %  z     initial value of p(z) [default = random]
Chris@5: %  iter  number of EM iterations [default = 10]
Chris@5: %  sw    sparsity parameter for w [default = 1]
Chris@5: %  sh    sparsity parameter for h [default = 1]
Chris@5: %  sz    sparsity parameter for z [default = 1]
Chris@5: %  lw    flag to update w [default = 1]
Chris@5: %  lh    flag to update h [default = 1]
Chris@5: %  lh    flag to update h [default = 1]
Chris@5: %  pa    source-component activity range [Rx2]
Chris@5: %
Chris@5: % Outputs: 
Chris@5: %  w   p(w) - spectral bases
Chris@5: %  h   p(h) - pitch impulse
Chris@5: %  z   p(z) - mixing matrix for p(h)
Chris@5: %  xa  approximation of input
Chris@5: 
Chris@5: % Emmanouil Benetos 2011, based on cplca code by Paris Smaragdis
Chris@5: 
Chris@5: 
Chris@5: %% for the transcription application,
Chris@5: %% x -> noise-reduced constant Q. In the application this is a 2-sec,
Chris@5: %%   100-col segment with 2 zeros at top and bottom, so 549x100
Chris@5: %% K -> 88, number of notes
Chris@5: %% T -> [545 1], a two-element array: 545 is the length of each
Chris@5: %%   template, but why 1?
Chris@5: %% R -> 10, number of instruments
Chris@5: %% w -> a 10x88 cell array, in which w{instrument,note} is a 545x1
Chris@5: %%   array containing the template for the given instrument and note
Chris@5: %%   number
Chris@5: %% h -> empty
Chris@5: %% z -> empty
Chris@5: %% u -> empty
Chris@5: %% iter -> a parameter for the program, 12 in the mirex submission
Chris@5: %% sw -> 1
Chris@5: %% sh -> 1
Chris@5: %% sz -> 1.1
Chris@5: %% su -> 1.3, not documented above, presumably sparsity for u
Chris@5: %% lw -> 0, don't update w
Chris@5: %% lh -> 1, do update h
Chris@5: %% lz -> 1, do update z
Chris@5: %% lu -> 1, not documented above, presumably do update u
Chris@5: %% pa -> a 10x2 array in which pa(instrument,1) is the lowest note
Chris@5: %%   expected for that instrument and pa(instrument,2) is the highest
Chris@5: 
Chris@5: 
Chris@5: % Sort out the sizes
Chris@5: 
Chris@5: wc = 2*size(x)-T; %% works out to 553x199
Chris@5: hc = size(x)+T-1; %% works out to 1093x100
Chris@5: 
Chris@5: % Default training iterations
Chris@5: if ~exist( 'iter')
Chris@5: 	iter = 10;
Chris@5: end
Chris@5: 
Chris@5: 
Chris@5: % Initialize
Chris@5: sumx = sum(x); %% for later normalisation
Chris@5: 
Chris@5: if ~exist( 'w') || isempty( w)
Chris@5:     %% doesn't happen, w was provided (it's the template data)
Chris@5:     w = cell(R, K);
Chris@5: 	for k = 1:K
Chris@5:         for r=1:R
Chris@5:             w{r,k} = rand( T);
Chris@5:             w{r,k} = w{r,k} / sum( w{r,k}(:));
Chris@5:         end
Chris@5:     end
Chris@5: end
Chris@5: if ~exist( 'h') || isempty( h)
Chris@5:     %% does happen, h was not provided
Chris@5:     h = cell(1, K);
Chris@5: 	for k = 1:K
Chris@5: 		h{k} = rand( size(x)-T+1);
Chris@5: 		h{k} = h{k};
Chris@5: 	end
Chris@5:     %% h is now a 1x88 cell, h{note} is a 5x100 array of random values.
Chris@5:     %% The 5 comes from the height of the CQ array minus the length of
Chris@5:     %% a template, plus 1. I guess this is space to allow for the
Chris@5:     %% 5-bins-per-semitone pitch shift.
Chris@5: end
Chris@5: if ~exist( 'z') || isempty( z)
Chris@5:     %% does happen, z was not provided
Chris@5:     z = cell(1, K);
Chris@5: 	for k = 1:K
Chris@5: 		z{k} = rand( size(x,2),1);
Chris@5: 		z{k} = z{k};
Chris@5: 	end
Chris@5:     %% z is a 1x88 cell, z{note} is a 100x1 array of random values.
Chris@5: end
Chris@5: if ~exist( 'u') || isempty( u)
Chris@5:     %% does happen, u was not provided
Chris@5:     u = cell(R, K);
Chris@5: 	for k = 1:K
Chris@5:         for r=1:R
Chris@5:             if( (pa(r,1) <= k &&  k <= pa(r,2)) )
Chris@5:                 u{r,k} = ones( size(x,2),1);
Chris@5:             else
Chris@5:                 u{r,k} = zeros( size(x,2),1);
Chris@5:             end
Chris@5:         end;
Chris@5: 	end
Chris@5:     %% u is a 10x88 cell, u{instrument,note} is a 100x1 double containing
Chris@5:     %% all 1s if note is in-range for instrument and all 0s otherwise
Chris@5: end
Chris@5: 
Chris@6: fh = cell(1, K); %% 1x88
Chris@6: fw = cell(R, K); %% 10x88
Chris@5: for k = 1:K
Chris@5:     fh{k} = ones(wc) + 1i*ones(wc);
Chris@5:     for r=1:R
Chris@5:         fw{r,k} = ones(wc) + 1i*ones(wc);
Chris@5:     end;
Chris@5: end;
Chris@5: 
Chris@6: %% now fh is a 1x88 cell, and fh{note} is a 553x199 array initialised
Chris@6: %% with all complex values 1 + 1i
Chris@6: 
Chris@6: %% fw is a 10x88 cell, and fw{instrument,note} is a 553x199 array
Chris@6: %% likewise
Chris@5: 
Chris@5: 
Chris@8: %% The MIREX paper describes the model as
Chris@8: %%
Chris@8: %% P(w,t) = P(t) sum[p,f,s]( P(w-f|s,p) P(f|p,t) P(s|p,t) P(p|t) )
Chris@8: %%
Chris@8: %% where 
Chris@8: %%   w = log-frequency bin index
Chris@8: %%   t = time
Chris@8: %%   s = instrument number
Chris@8: %%   p = MIDI pitch number
Chris@8: %%   f = frequency offset (pitch-adjustment convolution)
Chris@8: %% so
Chris@8: %%   P(w,t) = the input distribution (constant q spectrum)
Chris@8: %%   P(t) = overall energy by time
Chris@8: %%   P(w-f|s,p) = spectral template for instrument s, pitch p with offset f
Chris@8: %%   P(f|p,t) = frequency offset for pitch p, time t?
Chris@8: %%   P(s|p,t) = instrument contribution for pitch p, time t
Chris@8: %%   P(p|t) = pitch probability for time t
Chris@8: %% the outputs we want to produce are P(p|t) (the transcription matrix)
Chris@8: %% and P(s|p,t) (the instrument classification).
Chris@8: %%
Chris@8: %% In this program,
Chris@8: %%   x -> P(w,t), the input distribution
Chris@8: %%   w -> P(w|s,p), the templates
Chris@8: %%   h -> P(f|p,t), the pitch shift component
Chris@8: %%   z -> P(p|t), the pitch probabilities, the main return value
Chris@8: %%   u -> P(s|p,t), the source contribution, the secondary return value
Chris@8: %%
Chris@8: %% The paper gives the update rule for the expectation step as
Chris@8: %% 
Chris@8: %% P(p,f,s|w,t) =       P(w-f|s,p) P(f|p,t) P(s|p,t) P(p|t)
Chris@8: %%                --------------------------------------------------
Chris@8: %%                sum[p,f,s] ( P(w-f|s,p) P(f|p,t) P(s|p,t) P(p|t) )
Chris@8: %%
Chris@8: %% and the update equations for the maximization step as
Chris@8: %%
Chris@8: %% P(f|p,t) =  sum[w,s] ( P(p,f,s|w,t) P(w,t) )
Chris@8: %%  or h      ----------------------------------
Chris@8: %%            sum[f,w,s] ( P(p,f,s|w,t) P(w,t) )
Chris@8: %%
Chris@8: %% P(s|p,t) =  sum[w,f] ( P(p,f,s|w,t) P(w,t) )
Chris@8: %%  or u      ----------------------------------
Chris@8: %%            sum[s,w,f] ( P(p,f,s|w,t) P(w,t) )
Chris@8: %%
Chris@8: %% P(p|t) =  sum[w,f,s] ( P(p,f,s|w,t) P(w,t) )
Chris@8: %%  or z    ------------------------------------
Chris@8: %%          sum[p,w,f,s] ( P(p,f,s|w,t) P(w,t) )
Chris@8: %%
Chris@8: %% (there is also an update equation for x, or P(w|s,p) but we
Chris@15: %% don't want that as it's the input -- one paper proposes an 89th
Chris@15: %% template to learn the noise component but... not yet)
Chris@8: 
Chris@8: 
Chris@8: 
Chris@5: % Make commands for subsequent multidim operations and initialize fw
Chris@6: 
Chris@5: fnh = 'c(hc(1)-(T(1)+(1:size(h{k},1))-2),hc(2)-(T(2)+(1:size(h{k},2))-2))';
Chris@5: xai = 'xa(1:size(x,1),1:size(x,2))';
Chris@5: flz = 'xbar(end:-1:1,end:-1:1)';
Chris@5: 
Chris@5: for k = 1:K
Chris@5:     for r=1:R
Chris@5:         if( (pa(r,1) <= k &&  k <= pa(r,2)) )
Chris@6: 
Chris@6: 	  %% fftn(X,siz) takes an N-dimensional FFT (same number of
Chris@6: 	  %% dimensions as siz) of X, padding or truncating X
Chris@6: 	  %% beforehand so that it is of size siz. Here w{r,k} is the
Chris@6: 	  %% 545x1 template for instrument r and note k, and wc is
Chris@6: 	  %% 553x199.
Chris@6: 
Chris@6: 	  %% I believe this is equivalent to performing a 553-point
Chris@6: 	  %% FFT of each column of the input (with w{r,k} in the first
Chris@15: 	  %% 545 elements of the first column of that input).
Chris@6: 
Chris@6: 	  %% The output is of course complex.
Chris@6: 
Chris@8: 	  %% The purpose of this is to support convolution for pitch
Chris@8: 	  %% shifting. w{r,k} are the templates, and fw{r,k} are ffts
Chris@8: 	  %% of the templates which will be multiplied by fh, the
Chris@8: 	  %% equivalent ffts of the pitch shift contributions, later
Chris@8: 
Chris@5:             fw{r,k} = fftn( w{r,k}, wc);
Chris@5:         end;
Chris@5:     end;
Chris@5: end;
Chris@5: 
Chris@5: % Iterate
Chris@5: for it = 1:iter
Chris@5:     
Chris@5:     %disp(['Iteration: ' num2str(it)]);
Chris@5:     
Chris@5:     % E-step
Chris@13:     xa = eps; %% tiny non-zero initialiser as we'll be dividing by this later
Chris@6:     for k = 16:73  %% overall note range found in instrument set
Chris@8:         fh{k} = fftn( h{k}, wc); %% this and the subsequent ifftn are for the pitch-shift convolution step I think
Chris@12:         for r=1:R  %% instruments
Chris@5:             if( (pa(r,1) <= k &&  k <= pa(r,2)) )
Chris@5:                 xa1 = abs( real( ifftn( fw{r,k} .* fh{k})));                
Chris@5:                 xa = xa + xa1(1:size(x,1),1:size(x,2)) .*repmat(z{k},1,size(x,1))'.*repmat(u{r,k},1,size(x,1))';
Chris@8: 
Chris@8: 		%% so xa is the accumulation of the element-by-element
Chris@8: 		%% product of: the pitch-shifted templates (xa1); the
Chris@8: 		%% pitch probabilities (z); and the source
Chris@8: 		%% contributions (u); across all instruments and notes
Chris@8: 
Chris@8: 		%% note that xa1 is resized to match x, the input,
Chris@8: 		%% which is possible because fw and fh were
Chris@8: 		%% constructed at (just over) the same width and
Chris@8: 		%% (almost) twice the height (553x199 if x is
Chris@8: 		%% 549x100). 
Chris@8: 
Chris@8: 		%% the other components are 100x1, i.e. one value per
Chris@8: 		%% time step, so they are tiled up to 100x549 and then
Chris@8: 		%% transposed for the multiplication
Chris@8: 
Chris@5:                 clear xa1;
Chris@5:             end
Chris@5:         end
Chris@5:     end
Chris@5:     
Chris@5:     xbar = x ./ xa;
Chris@5:     xbar = eval( flz);
Chris@15: 
Chris@29:     %% xbar now contains the summation of Eqn 8 in the CMJ paper, i.e.
Chris@29:     %% it is a time-frequency distribution consisting of the
Chris@29:     %% five-dimensional distribution Pt(p,f,s|w) summed across p, f,
Chris@29:     %% and s. In the M step, for each parameter we want to update, we
Chris@29:     %% take the same five-dimensional distribution and marginalise it
Chris@29:     %% across the other parameters that that one does not depend on.
Chris@29:     %% The M-step is a bit tricky for me to follow here, with the
Chris@29:     %% convolution and use of MATLAB eval macros.
Chris@15: 
Chris@5:     fx = fftn( xbar, wc);
Chris@5:     
Chris@5:     
Chris@5:     % M-step
Chris@5:     for k = 16:73
Chris@5:         
Chris@5:         
Chris@8:       %% Throughout here, r is an instrument number (1..R) and k is a
Chris@8:       %% note number (1..K)
Chris@8: 
Chris@5:         % Update h, z, u
Chris@5:         nh=eps;
Chris@5:         for r=1:R
Chris@5:             if( (pa(r,1) <= k &&  k <= pa(r,2)) )
Chris@14: 
Chris@14: 	        %% so we're accumulating to nh (which is per-note but
Chris@14:                 %% across all instruments) here
Chris@14: 
Chris@14: 	        %% this is a convolution of the error (xbar) with w,
Chris@14: 	        %% carried out as a frequency-domain multiplication.
Chris@14: 	        %% so it's like xbar-for-all-w
Chris@5:                 c = abs( real( ifftn( fx .* fw{r,k} )));
Chris@14: 
Chris@14:                 nh1 = eval( fnh); %% this one is highly mysterious
Chris@14: 
Chris@14: 		%% take the 100x1 note range matrix, repeat to 100x5
Chris@14: 		%% (as h{k} is 5x100), transpose, multiply nh1 by that
Chris@14:                 nh1 = nh1 .* repmat(u{r,k},1,size(h{k},1))';
Chris@14:                 nh = nh + nh1; %% so nh will presumably be 100x5 too
Chris@5:                 
Chris@15:                 nhu = eval( fnh); %% more magic
Chris@14: 
Chris@5:                 nhu = nhu .* h{k};
Chris@14: 
Chris@5:                 nu = sum(nhu)';
Chris@5:                 nu = u{r,k} .* nu + eps;
Chris@5:                 if lu
Chris@5:                     u{r,k} = nu;
Chris@5:                 end;
Chris@5:                 
Chris@5:             end;
Chris@5:         end
Chris@5:         nh = h{k} .* (nh.^sh);
Chris@5:         nz = sum(nh)';
Chris@5:         nz = z{k} .* nz + eps;
Chris@5:         
Chris@5:         
Chris@5:         % Assign and normalize
Chris@5:         if lh
Chris@5:             h{k} = nh;
Chris@5:         end
Chris@5:         if lz
Chris@5:             z{k} = nz;
Chris@5:         end
Chris@5:         
Chris@5:         
Chris@5:     end
Chris@5:     
Chris@5:     % Normalize z over t
Chris@5:     if lz
Chris@5:         Z=[]; for i=1:K Z=[Z z{i}]; end;
Chris@5:         Z = Z.^sz;
Chris@5:         Z(1:end,1:15)=0;
Chris@5:         Z(1:end,74:88)=0;
Chris@5:         Z = Z./repmat(sum(Z,2),1,K); z = num2cell(Z,1); %figure; imagesc(imrotate(Z,90));
Chris@5:     end
Chris@5:     
Chris@5:     % Normalize u over z,t
Chris@5:     if lu
Chris@5:         U=[]; for r=1:R U(r,:,:) = cell2mat(u(r,:)); end;
Chris@5:         for i=1:size(U,2) for j=1:size(U,3) U(:,i,j) = U(:,i,j).^su; U(:,i,j) = U(:,i,j) ./ sum(U(:,i,j)); end; end;
Chris@5:         U0 = permute(U,[2 1 3]); u = squeeze(num2cell(U0,1));
Chris@5:     end
Chris@5:     
Chris@5:     % Normalize h over z,t
Chris@5:     H=[]; for k=1:K H(k,:,:) = cell2mat(h(k)); end; H0 = permute(H,[2 1 3]);
Chris@5:     for i=1:size(H0,2) for j=1:size(H0,3) H0(:,i,j) = sumx(j)* (H0(:,i,j) ./ sum(H0(:,i,j))); end; end;
Chris@5:     h = squeeze(num2cell(squeeze(H0),[1 3])); for k=1:K h{k} = squeeze(h{k}); end;
Chris@5:     
Chris@5:     %figure; imagesc(imrotate(xa',90));
Chris@5:     
Chris@5: end
Chris@5: 
Chris@5: %figure; imagesc(imrotate(xa',90));