changeset 2:8017dd4a650d

Add MIREX 2012 code
author Chris Cannam
date Wed, 19 Mar 2014 09:09:23 +0000
parents 662b0a8b17b9
children dd71b54edc9a
files mirex2012-matlab/README.txt mirex2012-matlab/cell2sparse.m mirex2012-matlab/computeCQT.m mirex2012-matlab/cplcaMT.m mirex2012-matlab/cqt.m mirex2012-matlab/doMultiF0.m mirex2012-matlab/genCQTkernel.m mirex2012-matlab/getCQT.m mirex2012-matlab/noteTemplatesBassoon.mat mirex2012-matlab/noteTemplatesCello.mat mirex2012-matlab/noteTemplatesClarinet.mat mirex2012-matlab/noteTemplatesFlute.mat mirex2012-matlab/noteTemplatesGuitar.mat mirex2012-matlab/noteTemplatesHorn.mat mirex2012-matlab/noteTemplatesOboe.mat mirex2012-matlab/noteTemplatesSptkBGCl.mat mirex2012-matlab/noteTemplatesTenorSax.mat mirex2012-matlab/noteTemplatesViolin.mat mirex2012-matlab/transcriptionMultipleTemplates.m
diffstat 19 files changed, 740 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirex2012-matlab/README.txt	Wed Mar 19 09:09:23 2014 +0000
@@ -0,0 +1,16 @@
+MIREX 2012 Multiple Fundamental Frequency Estimation & Tracking Task 2 (note tracking)
+Emmanouil Benetos and Simon Dixon
+
+Environment: MATLAB 7.x 32-bit (Windows/Mac OS X/Linux version) or MATLAB 7.x 64-bit (Mac OS X/Linux version)
+
+Command line calling format: doMultiF0('path/to/inputFile.wav','path/to/outputFile.F0')
+
+Example: doMultiF0('mix_var5a.wav','mix_var5a.F0')
+
+Expected Memory Footprint: 450 MB
+
+Number of threads/cores used: 1
+
+Expected Runtime: 50 x RealTime (eg. for a 30sec audio file, expected runtime is 25min)
+
+NOTE: An exit statement is contained at the end of doMultiF0.m
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirex2012-matlab/cell2sparse.m	Wed Mar 19 09:09:23 2014 +0000
@@ -0,0 +1,78 @@
+function spCQT = cell2sparse(Xcq,octaves,bins,firstcenter,atomHOP,atomNr)
+%Generates a sparse matrix containing the CQT coefficients (rasterized).
+%
+%The sparse matrix representation of the CQT coefficients contains all
+%computed coefficients at the corresponding time-frequency location
+%(similar to a spectrogram). For lower frequencies this means, that
+%each coefficient is followed by zeros stemming from the fact, that the time
+%resolution for lower frequencies decreases as the frequency resolution
+%increases. Due to the design of the CQT kernel, however, the coefficients
+%of different octaves are synchronised, meaning that for the second highest
+%octave each coefficient is followed by one zero, for the next octave down
+%two zeros are inserted, for the next octave four zeros are inserted and so
+%on.
+%
+%INPUT:
+%   Xcq         ... Cell array consisting of all coefficients for all octaves
+%   octaves     ... Number of octaves processed
+%   bins        ... Number of bins per octave
+%   firstcenter ... Location of the leftmost atom-stack in the temporal
+%                   kernel
+%   atomHOP     ... Spacing of two consecutive atom stacks
+%   atomNr      ... Number of atoms per bin within the kernel
+%
+%Christian Schörkhuber, Anssi Klapuri 2010-06
+
+if 0
+%% this version has big memory consumption but is very fast
+    emptyHops = firstcenter/atomHOP;
+    drop = emptyHops*2^(octaves-1)-emptyHops; %distance between first value in highest octave and first value in lowest octave
+    spCQT = zeros(bins*octaves,size(Xcq{1},2)*atomNr-drop);
+
+    for i=1:octaves
+        drop = emptyHops*2^(octaves-i)-emptyHops; %first coefficients of all octaves have to be in synchrony
+        X = Xcq{i}; 
+        if  atomNr > 1 %more than one atom per bin --> reshape
+            Xoct = zeros(bins,atomNr*size(X,2)-drop);
+            for u=1:bins %reshape to continous windows for each bin (for the case of several wins per frame)
+               octX_bin = X((u-1)*atomNr+1:u*atomNr,:);
+               Xcont = reshape(octX_bin,1,size(octX_bin,1)*size(octX_bin,2));
+               Xoct(u,:) = Xcont(1+drop:end);
+            end
+            X = Xoct;
+        else
+            X = X(:,1+drop:end);
+        end
+        binVec = bins*octaves-bins*i+1:bins*octaves-bins*(i-1);
+        spCQT(binVec,1:2^(i-1):size(X,2)*2^(i-1)) = X;
+
+    end
+    spCQT = sparse(spCQT); %storing as sparse matrix at the end is the fastest way. Big memory consumption though!
+
+else
+%% this version uses less memory but is noticable slower
+    emptyHops = firstcenter/atomHOP;
+    drops = emptyHops*2.^(octaves-(1:octaves))-emptyHops;
+    len = max(((atomNr*cellfun('size',Xcq,2)-drops).*2.^(0:octaves-1))); %number of columns of output matrix
+    spCQT = [];
+
+    for i=octaves:-1:1
+        drop = emptyHops*2^(octaves-i)-emptyHops; %first coefficients of all octaves have to be in synchrony
+        X = Xcq{i}; 
+        if  atomNr > 1 %more than one atom per bin --> reshape
+            Xoct = zeros(bins,atomNr*size(X,2)-drop);
+            for u=1:bins %reshape to continous windows for each bin (for the case of several wins per frame)
+               octX_bin = X((u-1)*atomNr+1:u*atomNr,:);
+               Xcont = reshape(octX_bin,1,size(octX_bin,1)*size(octX_bin,2));
+               Xoct(u,:) = Xcont(1+drop:end);
+            end
+            X = Xoct;
+        else
+            X = X(:,1+drop:end);
+        end
+        X = upsample(X.',2^(i-1)).';
+        X = [X zeros(bins,len-size(X,2))];
+        spCQT = sparse([spCQT; X]);  
+    end
+
+end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirex2012-matlab/computeCQT.m	Wed Mar 19 09:09:23 2014 +0000
@@ -0,0 +1,30 @@
+function [intCQT] = computeCQT(filename)
+% Settings for computing CQT for music signals (by E. Benetos)
+
+% Load .wav file
+[x fs bits] = wavread(filename);
+if (size(x,2) == 2) y = mean(x')'; clear('x'); else y=x; clear('x'); end;
+if (fs ~= 44100) y = resample(y,44100,fs); end;
+y = 0.5*y/max(y);
+fs = 44100;
+
+
+% Compute CQT
+Xcqt = cqt(y,27.5,fs/3,60,fs,'q',0.80,'atomHopFactor',0.3,'thresh',0.0005,'win','hann');
+%Xcqt = cqt(y,27.5,fs/3,120,fs,'q',0.35,'atomHopFactor',0.3,'thresh',0.0005,'win','hann'); % old resolution
+absCQT = getCQT(Xcqt,'all','all');
+
+% Crop CQT to useful time regions
+emptyHops = Xcqt.intParams.firstcenter/Xcqt.intParams.atomHOP;
+maxDrop = emptyHops*2^(Xcqt.octaveNr-1)-emptyHops;
+droppedSamples = (maxDrop-1)*Xcqt.intParams.atomHOP + Xcqt.intParams.firstcenter;
+outputTimeVec = (1:size(absCQT,2))*Xcqt.intParams.atomHOP-Xcqt.intParams.preZeros+droppedSamples;
+
+lowerLim = find(outputTimeVec>0,1);
+upperLim = find(outputTimeVec>length(y),1)-1;
+
+%intCQT = absCQT(112:1200,lowerLim:upperLim); % old resolution
+intCQT = absCQT(56:600,lowerLim:upperLim);
+
+
+%figure; imagesc(imrotate(abs(intCQT),90));
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirex2012-matlab/cplcaMT.m	Wed Mar 19 09:09:23 2014 +0000
@@ -0,0 +1,192 @@
+function [w,h,z,u,xa] = cplcaMT( x, K, T, R, w, h, z, u, iter, sw, sh, sz, su, lw, lh, lz, lu, pa)
+% function [w,h,xa2] = cplcaMT( x, K, T, R, w, h, z, u, iter, sw, sh, sz, su, lw, lh, lz, lu)
+%
+% Perform multiple-source, multiple-template SIPLCA for transcription
+%
+% Inputs:
+%  x     input distribution
+%  K     number of components
+%  T     size of components
+%  R     size of sources
+%  w     initial value of p(w) [default = random]
+%  h     initial value of p(h) [default = random]
+%  z     initial value of p(z) [default = random]
+%  iter  number of EM iterations [default = 10]
+%  sw    sparsity parameter for w [default = 1]
+%  sh    sparsity parameter for h [default = 1]
+%  sz    sparsity parameter for z [default = 1]
+%  lw    flag to update w [default = 1]
+%  lh    flag to update h [default = 1]
+%  lh    flag to update h [default = 1]
+%  pa    source-component activity range [Rx2]
+%
+% Outputs: 
+%  w   p(w) - spectral bases
+%  h   p(h) - pitch impulse
+%  z   p(z) - mixing matrix for p(h)
+%  xa  approximation of input
+
+% Emmanouil Benetos 2011, based on cplca code by Paris Smaragdis
+
+
+% Sort out the sizes
+wc = 2*size(x)-T;
+hc = size(x)+T-1;
+
+% Default training iterations
+if ~exist( 'iter')
+	iter = 10;
+end
+
+
+% Initialize
+sumx = sum(x);
+
+if ~exist( 'w') || isempty( w)
+    w = cell(R, K);
+	for k = 1:K
+        for r=1:R
+            w{r,k} = rand( T);
+            w{r,k} = w{r,k} / sum( w{r,k}(:));
+        end
+    end
+end
+if ~exist( 'h') || isempty( h)
+    h = cell(1, K);
+	for k = 1:K
+		h{k} = rand( size(x)-T+1);
+		h{k} = h{k};
+	end
+end
+if ~exist( 'z') || isempty( z)
+    z = cell(1, K);
+	for k = 1:K
+		z{k} = rand( size(x,2),1);
+		z{k} = z{k};
+	end
+end
+if ~exist( 'u') || isempty( u)
+    u = cell(R, K);
+	for k = 1:K
+        for r=1:R
+            if( (pa(r,1) <= k &&  k <= pa(r,2)) )
+                u{r,k} = ones( size(x,2),1);
+            else
+                u{r,k} = zeros( size(x,2),1);
+            end
+        end;
+	end
+end
+
+fh = cell(1, K);
+fw = cell(R, K);
+for k = 1:K
+    fh{k} = ones(wc) + 1i*ones(wc);
+    for r=1:R
+        fw{r,k} = ones(wc) + 1i*ones(wc);
+    end;
+end;
+
+
+
+% Make commands for subsequent multidim operations and initialize fw
+fnh = 'c(hc(1)-(T(1)+(1:size(h{k},1))-2),hc(2)-(T(2)+(1:size(h{k},2))-2))';
+xai = 'xa(1:size(x,1),1:size(x,2))';
+flz = 'xbar(end:-1:1,end:-1:1)';
+
+for k = 1:K
+    for r=1:R
+        if( (pa(r,1) <= k &&  k <= pa(r,2)) )
+            fw{r,k} = fftn( w{r,k}, wc);
+        end;
+    end;
+end;
+
+% Iterate
+for it = 1:iter
+    
+    %disp(['Iteration: ' num2str(it)]);
+    
+    % E-step
+    xa = eps;
+    for k = 16:73
+        fh{k} = fftn( h{k}, wc);
+        for r=1:R
+            if( (pa(r,1) <= k &&  k <= pa(r,2)) )
+                xa1 = abs( real( ifftn( fw{r,k} .* fh{k})));                
+                xa = xa + xa1(1:size(x,1),1:size(x,2)) .*repmat(z{k},1,size(x,1))'.*repmat(u{r,k},1,size(x,1))';
+                clear xa1;
+            end
+        end
+    end
+    
+    xbar = x ./ xa;
+    xbar = eval( flz);
+    fx = fftn( xbar, wc);
+    
+    
+    % M-step
+    for k = 16:73
+        
+        
+        % Update h, z, u
+        nh=eps;
+        for r=1:R
+            if( (pa(r,1) <= k &&  k <= pa(r,2)) )
+                c = abs( real( ifftn( fx .* fw{r,k} )));
+                nh1 = eval( fnh);
+                nh1 = nh1 .*repmat(u{r,k},1,size(h{k},1))';
+                nh = nh + nh1;
+                
+                nhu = eval( fnh);
+                nhu = nhu .* h{k};
+                nu = sum(nhu)';
+                nu = u{r,k} .* nu + eps;
+                if lu
+                    u{r,k} = nu;
+                end;
+                
+            end;
+        end
+        nh = h{k} .* (nh.^sh);
+        nz = sum(nh)';
+        nz = z{k} .* nz + eps;
+        
+        
+        % Assign and normalize
+        if lh
+            h{k} = nh;
+        end
+        if lz
+            z{k} = nz;
+        end
+        
+        
+    end
+    
+    % Normalize z over t
+    if lz
+        Z=[]; for i=1:K Z=[Z z{i}]; end;
+        Z = Z.^sz;
+        Z(1:end,1:15)=0;
+        Z(1:end,74:88)=0;
+        Z = Z./repmat(sum(Z,2),1,K); z = num2cell(Z,1); %figure; imagesc(imrotate(Z,90));
+    end
+    
+    % Normalize u over z,t
+    if lu
+        U=[]; for r=1:R U(r,:,:) = cell2mat(u(r,:)); end;
+        for i=1:size(U,2) for j=1:size(U,3) U(:,i,j) = U(:,i,j).^su; U(:,i,j) = U(:,i,j) ./ sum(U(:,i,j)); end; end;
+        U0 = permute(U,[2 1 3]); u = squeeze(num2cell(U0,1));
+    end
+    
+    % Normalize h over z,t
+    H=[]; for k=1:K H(k,:,:) = cell2mat(h(k)); end; H0 = permute(H,[2 1 3]);
+    for i=1:size(H0,2) for j=1:size(H0,3) H0(:,i,j) = sumx(j)* (H0(:,i,j) ./ sum(H0(:,i,j))); end; end;
+    h = squeeze(num2cell(squeeze(H0),[1 3])); for k=1:K h{k} = squeeze(h{k}); end;
+    
+    %figure; imagesc(imrotate(xa',90));
+    
+end
+
+%figure; imagesc(imrotate(xa',90));
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirex2012-matlab/cqt.m	Wed Mar 19 09:09:23 2014 +0000
@@ -0,0 +1,114 @@
+function Xcqt = cqt(x,fmin,fmax,bins,fs,varargin) 
+%Xcqt = cqt(x,fmin,fmax,bins,fs,varargin) calculates the constant-Q transform of the input signal x.
+%
+%INPUT:
+%   fmin      ... lowest frequency of interest
+%   fmax      ... highest frequency of interest
+%   bins      ... frequency bins per octave
+%   fs        ... sampling rate
+%
+%   optional input parameters (parameter name/value pairs):
+%
+%   'atomHopFactor'   ... overlap of temporal atoms in percent. Default: 0.25.
+%    
+%   'q'       ... the maximum value for optimal reconstruction is q=1.
+%                 For values smaller than 1 the bandwidths of the spectral
+%                 atoms (filter) are increased retaining their center
+%                 frequencies (frequency 'smearing', frequency domain redundancy 
+%                 increases, time resolutin improves). Default: 1.
+%   'thresh'  ... all values in the cqt kernel smaller than tresh are
+%                 rounded to zero. A high value for thresh yields a
+%                 very sparse kernel (fast) but introduces a bigger error. 
+%                 The default value is chosen so that the error due to rounding is negligible.
+%   'kernel'  ... if the cqt kernel structure has been precomputed
+%                 (using function 'genCQTkernel'), the computation of the kernel
+%                 will be by-passed below).
+%   'win'     ... defines which window will be used for the CQT. Valid
+%                 values are: 'blackman','hann' and 'blackmanharris'. To
+%                 use the square root of each window use the prefix 'sqrt_'
+%                 (i.e. 'sqrt_blackman'). Default: 'sqrt_blackmanharris'
+%   'coeffB',
+%   'coeffA'  ... Filter coefficients for the anti-aliasing filter, where
+%                 'coeffB' is the numerator and 'coeffA' is the
+%                 denominator (listed in descending powers of z). 
+%                                                  
+%OUTPUT:
+%   Xcqt      ... struct that comprises various fields: 
+%              spCQT: CQT coefficients in the form of a sparse matrix 
+%                    (rasterized, not interpolated)
+%              fKernel: spectral Kernel 
+%              fmin: frequency of the lowest bin
+%              fmax: frequency of the hiqhest bin
+%              octaveNr: number of octaves processed
+%              bins: number of bins per octave
+%              intParams: structure containing additional parameters for the inverse transform   
+%
+%Christian Schörkhuber, Anssi Klapuri 2010-06
+
+%% input checking
+if size(x,2) > 1 && size(x,1) > 1, error('cqt requires one-dimensional input!'); end;
+if size(x,2) > 1, x = x(:); end; %column vector
+
+%% input parameters
+q = 1; %default value
+atomHopFactor = 0.25; %default value
+thresh = 0.0005; %default value
+winFlag = 'sqrt_blackmanharris';
+
+for ain = 1:1:length(varargin)
+    if strcmp(varargin{ain},'q'), q = varargin{ain+1}; end;
+    if strcmp(varargin{ain},'atomHopFactor'), atomHopFactor = varargin{ain+1}; end;
+    if strcmp(varargin{ain},'thresh'), thresh = varargin{ain+1}; end;
+    if strcmp(varargin{ain},'kernel'), cqtKernel = varargin{ain+1}; end;
+    if strcmp(varargin{ain},'win'), winFlag = varargin{ain+1}; end;
+    if strcmp(varargin{ain},'coeffB'), B = varargin{ain+1}; end;
+    if strcmp(varargin{ain},'coeffA'), A = varargin{ain+1}; end;    
+end
+
+%% define
+octaveNr = ceil(log2(fmax/fmin));
+xlen_init = length(x);
+
+%% design lowpass filter
+if ~exist('B','var') || ~exist('A','var')
+    LPorder = 6; %order of the anti-aliasing filter
+    cutoff = 0.5;
+    [B A] = butter(LPorder,cutoff,'low'); %design f_nyquist/2-lowpass filter
+end
+
+%% design kernel for one octave 
+if ~exist('cqtKernel','var')
+    cqtKernel = genCQTkernel(fmax, bins,fs,'q',q,'atomHopFactor',atomHopFactor,'thresh',thresh,'win',winFlag);
+end
+
+%% calculate CQT
+cellCQT = cell(1,octaveNr);
+maxBlock = cqtKernel.fftLEN * 2^(octaveNr-1); %largest FFT Block (virtual)
+suffixZeros = maxBlock; 
+prefixZeros = maxBlock;
+x = [zeros(prefixZeros,1); x; zeros(suffixZeros,1)]; %zeropadding
+OVRLP = cqtKernel.fftLEN - cqtKernel.fftHOP;
+K = cqtKernel.fKernel'; %conjugate spectral kernel for cqt transformation
+
+for i = 1:octaveNr
+    xx = buffer(x,cqtKernel.fftLEN, OVRLP,'nodelay'); %generating FFT blocks
+    XX = fft(xx); %applying fft to each column (each FFT frame)
+    cellCQT{i} = K*XX; %calculating cqt coefficients for all FFT frames for this octave
+
+    if i~=octaveNr
+        x = filtfilt(B,A,x); %anti aliasing filter
+        x = x(1:2:end); %drop samplerate by 2
+    end
+end
+
+%% map to sparse matrix representation
+spCQT = cell2sparse(cellCQT,octaveNr,bins,cqtKernel.firstcenter,cqtKernel.atomHOP,cqtKernel.atomNr);
+
+%% return
+intParam = struct('sufZeros',suffixZeros,'preZeros',prefixZeros,'xlen_init',xlen_init,'fftLEN',cqtKernel.fftLEN,'fftHOP',cqtKernel.fftHOP,...
+    'q',q,'filtCoeffA',A,'filtCoeffB',B,'firstcenter',cqtKernel.firstcenter,'atomHOP',cqtKernel.atomHOP,...
+    'atomNr',cqtKernel.atomNr,'Nk_max',cqtKernel.Nk_max,'Q',cqtKernel.Q,'rast',0);
+
+Xcqt = struct('spCQT',spCQT,'fKernel',cqtKernel.fKernel,'fmax',fmax,'fmin',fmin*2^(1/bins),'octaveNr',octaveNr,'bins',cqtKernel.bins,'intParams',intParam);
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirex2012-matlab/doMultiF0.m	Wed Mar 19 09:09:23 2014 +0000
@@ -0,0 +1,57 @@
+function  []  = doMultiF0(inputFile,outputFile)
+
+
+% Transcribe file
+fprintf('%s',['Preprocessing............']);
+[ph pz sumY] = transcriptionMultipleTemplates(inputFile,12,1.1,1.3);
+fprintf('\n');
+fprintf('%s',['Postprocessing...........']);
+pianoRoll = repmat(sumY,88,1).*pz(1:88,:);
+pianoRoll = pianoRoll';
+for j=[1:15 74:88] pianoRoll(:,j)=0; end;
+pianoRoll = medfilt1(pianoRoll,3);
+
+
+% Max polyphony 5
+[B,IX] =sort(pianoRoll,2,'descend');  
+tempPianoRoll = zeros(size(pianoRoll,1),88);
+for j=1:size(pianoRoll,1) for k=1:5 tempPianoRoll(j,IX(j,k)) = B(j,k); end; end;
+pianoRoll = tempPianoRoll;
+
+
+% Expand piano-roll and perform thresholding
+expandedPianoRoll = zeros(4*size(pianoRoll,1),88);
+for j=1:4*size(pianoRoll,1)
+    expandedPianoRoll(j,:) = pianoRoll(floor((j-1)/4)+1,:);    
+end;
+finalPianoRoll = (expandedPianoRoll>4.8)';
+
+
+% Create output and perform minimum duration pruning
+auxPianoRoll = diff([zeros(1,88); finalPianoRoll'; zeros(1,88);],1); k=0;
+for i=1:88
+    onsets = find(auxPianoRoll(:,i)==1);
+    offsets = find(auxPianoRoll(:,i)==-1);
+    for j=1:length(onsets)           
+        if((offsets(j)/100-0.01) - (onsets(j)/100) > 0.05)
+            k=k+1;
+            nmat(k,1) = onsets(j)/100;       
+            nmat(k,2) = offsets(j)/100-0.01;    
+            nmat(k,3) = 27.5*2.^((( i-1)*10 )/120);
+        end;
+    end;
+end;
+nmat = sortrows(nmat,1);
+
+
+% Print output
+fid=fopen(outputFile,'w');
+for i=1:size(nmat,1)
+    fprintf(fid,'%.2f\t%.2f\t%.2f\n',nmat(i,1),nmat(i,2),nmat(i,3));
+end;
+fclose(fid);
+fprintf('%s','done');
+fprintf('\n');
+
+
+exit;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirex2012-matlab/genCQTkernel.m	Wed Mar 19 09:09:23 2014 +0000
@@ -0,0 +1,135 @@
+function cqtKernel = genCQTkernel(fmax, bins, fs, varargin)
+%Calculating the CQT Kernel for one octave. All atoms are center-stacked.
+%Atoms are placed so that the stacks of lower octaves are centered at the
+%same positions in time, however, their amount is reduced by factor two for
+%each octave down.
+%
+%INPUT:
+%   fmax          ... highest frequency of interest
+%   bins          ... number of bins per octave
+%   fs            ... sampling frequency
+%
+%optional input parameters (parameter name/value pairs):
+%
+%   'q'             ... Q scaling factor. Default: 1.
+%   'atomHopFactor' ... relative hop size corresponding to the shortest
+%                       temporal atom. Default: 0.25.
+%   'thresh'        ... values smaller than 'tresh' in the spectral kernel are rounded to
+%                       zero. Default: 0.0005.
+%   'win'           ... defines which window will be used for the CQT. Valid
+%                       values are: 'blackman','hann' and 'blackmanharris'. To
+%                       use the square root of each window use the prefix 'sqrt_'
+%                      (i.e. 'sqrt_blackman'). Default: 'sqrt_blackmanharris'
+%   'perfRast'      ... if set to 1 the kernel is designed in order to
+%                       enable perfect rasterization using the function
+%                       cqtPerfectRast() (Default: perRast=0). See documentation of
+%                       'cqtPerfectRast' for further information.
+%
+%OUTPUT:
+%   cqtKernel   ... Structure that contains the spectral kernel 'fKernel'
+%                   additional design parameters used in cqt(), cqtPerfectRast() and icqt().
+%
+%Christian Schörkhuber, Anssi Klapuri 2010-06
+
+%% input parameters
+q = 1; %default value
+atomHopFactor = 0.25; %default value
+thresh = 0.0005; %default value
+winFlag = 'sqrt_blackmanharris'; %default value
+perfRast = 0; %default value
+
+for ain = 1:length(varargin)
+    if strcmp(varargin{ain},'q'), q = varargin{ain+1}; end;
+    if strcmp(varargin{ain},'atomHopFactor'), atomHopFactor = varargin{ain+1}; end;
+    if strcmp(varargin{ain},'thresh'), thresh = varargin{ain+1}; end;
+    if strcmp(varargin{ain},'win'), winFlag = varargin{ain+1}; end;
+    if strcmp(varargin{ain},'perfRast'), perfRast = varargin{ain+1}; end;
+end
+
+%% define
+fmin = (fmax/2)*2^(1/bins);
+Q = 1/(2^(1/bins)-1);
+Q = Q*q;
+Nk_max = Q * fs / fmin; 
+Nk_max = round(Nk_max); %length of the largest atom [samples]
+
+
+%% Compute FFT size, FFT hop, atom hop, ...
+Nk_min = round( Q * fs / (fmin*2^((bins-1)/bins)) ); %length of the shortest atom [samples]
+atomHOP = round(Nk_min*atomHopFactor); %atom hop size
+first_center = ceil(Nk_max/2); %first possible center position within the frame
+first_center = atomHOP * ceil(first_center/atomHOP); %lock the first center to an integer multiple of the atom hop size
+FFTLen = 2^nextpow2(first_center+ceil(Nk_max/2)); %use smallest possible FFT size (increase sparsity)
+
+if perfRast
+    winNr = floor((FFTLen-ceil(Nk_max/2)-first_center)/atomHOP); %number of temporal atoms per FFT Frame
+    if winNr == 0
+        FFTLen = FFTLen * 2;
+        winNr = floor((FFTLen-ceil(Nk_max/2)-first_center)/atomHOP);
+    end
+else
+    winNr = floor((FFTLen-ceil(Nk_max/2)-first_center)/atomHOP)+1; %number of temporal atoms per FFT Frame
+end
+
+last_center = first_center + (winNr-1)*atomHOP;
+fftHOP = (last_center + atomHOP) - first_center; % hop size of FFT frames
+fftOLP = (FFTLen-fftHOP/FFTLen)*100; %overlap of FFT frames in percent ***AK:needed?
+
+%% init variables
+tempKernel= zeros(1,FFTLen); 
+sparKernel= []; 
+
+%% Compute kernel
+atomInd = 0;
+for k = 1:bins
+    
+   Nk = round( Q * fs / (fmin*2^((k-1)/bins)) ); %N[k] = (fs/fk)*Q. Rounding will be omitted in future versions
+   
+   switch winFlag
+       case 'sqrt_blackmanharris'
+            winFct = sqrt(blackmanharris(Nk));
+       case 'blackmanharris'
+            winFct = blackmanharris(Nk);
+       case 'sqrt_hann'
+            winFct = sqrt(hann(Nk,'periodic'));
+       case 'hann'
+            winFct = hann(Nk,'periodic');
+       case 'sqrt_blackman'
+            winFct = sqrt(hann(blackman,'periodic'));
+       case 'blackman'
+            winFct = blackman(Nk,'periodic');
+       otherwise
+            winFct = sqrt(blackmanharris(Nk));
+            if k==1, warning('CQT:INPUT','Non-existing window function. Default window is used!'); end;
+   end
+   
+   fk = fmin*2^((k-1)/bins);
+   tempKernelBin = (winFct/Nk) .* exp(2*pi*1i*fk*(0:Nk-1)'/fs);
+   atomOffset = first_center - ceil(Nk/2);
+
+   for i = 1:winNr 
+       shift = atomOffset + ((i-1) * atomHOP);
+       tempKernel(1+shift:Nk+shift) = tempKernelBin; 
+       atomInd = atomInd+1;
+       specKernel= fft(tempKernel);
+       specKernel(abs(specKernel)<=thresh)= 0;   
+       sparKernel= sparse([sparKernel; specKernel]);
+       tempKernel = zeros(1,FFTLen); %reset window     
+   end
+end
+sparKernel = (sparKernel.')/FFTLen;
+
+%% Normalize the magnitudes of the atoms
+[ignore,wx1]=max(sparKernel(:,1));
+[ignore,wx2]=max(sparKernel(:,end));
+wK=sparKernel(wx1:wx2,:);
+wK = diag(wK * wK');
+wK = wK(round(1/q)+1:(end-round(1/q)-2));
+weight = 1./mean(abs(wK));
+weight = weight.*(fftHOP/FFTLen); 
+weight = sqrt(weight); %sqrt because the same weight is applied in icqt again
+sparKernel = weight.*sparKernel;
+
+%% return
+cqtKernel = struct('fKernel',sparKernel,'fftLEN',FFTLen,'fftHOP',fftHOP,'fftOverlap',fftOLP,'perfRast',perfRast,...
+    'bins',bins,'firstcenter',first_center,'atomHOP',atomHOP,'atomNr',winNr,'Nk_max',Nk_max,'Q',Q,'fmin',fmin);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirex2012-matlab/getCQT.m	Wed Mar 19 09:09:23 2014 +0000
@@ -0,0 +1,47 @@
+function intCQT = getCQT(Xcqt,fSlice,tSlice,iFlag)
+%outCQ = getCQT(Xcqt,fSlice,tSlice,iFlag) computes a rasterized representation of 
+%the amplitudes of the calculated CQT coefficients for the frequency bins definded in vector fSlice and the 
+%points in time (time frames) defined in vector tSlice using the interpolation method defined in iFlag. 
+%Valid values for iFlag are:
+%
+%'linear'  ... linear interpolation (default)
+%'spline'  ... spline interpolation
+%'nearest' ... nearest neighbor interpolation
+%'cubic'   ... piecewise cubic interpolation
+%
+%If the entire CQT representation should be rasterized, set fSlice and
+%tSlice to 'all'.
+%The input parameter Xcqt is the structure gained using cqt(...).
+%The output parameter 'intCQT' is the same size as Xcqt.spCQT but is no
+%longer sparse since the zeros between two coefficients are replaced by
+%the interpolated values. The coefficients stored in 'intCQT' are now
+%real-valued since only the absolute values of the coefficients are
+%interpolated. If a spectrogram-like (rasterized) version of the CQT
+%coefficients including phase information is required, use the function
+%cqtPerfectRast() (see documentation for further information)
+%
+%Christian Schörkhuber, Anssi Klapuri 2010-06
+
+
+if ischar(fSlice), fSlice = 1:(Xcqt.bins*Xcqt.octaveNr); end;
+if ischar(tSlice)
+    lastEnt = find(Xcqt.spCQT(1,:),1,'last');
+    tSlice = 1:lastEnt;
+end
+if nargin < 4, iFlag = 'linear'; end;
+
+intCQT = zeros(length(fSlice),length(tSlice));
+bins = Xcqt.bins;
+spCQT = Xcqt.spCQT;
+octaveNr = Xcqt.octaveNr;
+spCQT = spCQT.';
+
+for k=1:length(fSlice)
+   oct = octaveNr-floor((fSlice(k)-0.1)/bins);
+   stepVec = 1:2^(oct-1):size(spCQT,1);
+   Xbin = full(spCQT(stepVec,fSlice(k)));
+   intCQT(k,:) = interp1(stepVec,abs(Xbin),tSlice,iFlag);
+end
+
+
+
Binary file mirex2012-matlab/noteTemplatesBassoon.mat has changed
Binary file mirex2012-matlab/noteTemplatesCello.mat has changed
Binary file mirex2012-matlab/noteTemplatesClarinet.mat has changed
Binary file mirex2012-matlab/noteTemplatesFlute.mat has changed
Binary file mirex2012-matlab/noteTemplatesGuitar.mat has changed
Binary file mirex2012-matlab/noteTemplatesHorn.mat has changed
Binary file mirex2012-matlab/noteTemplatesOboe.mat has changed
Binary file mirex2012-matlab/noteTemplatesSptkBGCl.mat has changed
Binary file mirex2012-matlab/noteTemplatesTenorSax.mat has changed
Binary file mirex2012-matlab/noteTemplatesViolin.mat has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mirex2012-matlab/transcriptionMultipleTemplates.m	Wed Mar 19 09:09:23 2014 +0000
@@ -0,0 +1,71 @@
+function [ph pz sumY] = transcriptionMultipleTemplates(filename,iter,sz,su)
+
+
+% Load note templates
+load('noteTemplatesBassoon');      W(:,:,1) = noteTemplatesBassoon;
+load('noteTemplatesCello');        W(:,:,2) = noteTemplatesCello;
+load('noteTemplatesClarinet');     W(:,:,3) = noteTemplatesClarinet;
+load('noteTemplatesFlute');        W(:,:,4) = noteTemplatesFlute;
+load('noteTemplatesGuitar');       W(:,:,5) = noteTemplatesGuitar;
+load('noteTemplatesHorn');         W(:,:,6) = noteTemplatesHorn;
+load('noteTemplatesOboe');         W(:,:,7) = noteTemplatesOboe;
+load('noteTemplatesTenorSax');     W(:,:,8) = noteTemplatesTenorSax;
+load('noteTemplatesViolin');       W(:,:,9) = noteTemplatesViolin;
+load('noteTemplatesSptkBGCl');     W(:,:,10) = noteTemplatesSptkBGCl;
+
+
+%pitchActivity = [14 16 30 40 20 21 38 24 35 1; 52 61 69 76 56 57 71 55 80 88]';
+pitchActivity = [16 16 30 40 20 21 38 24 35 16; 52 61 69 73 56 57 71 55 73 73]';
+
+
+W = permute(W,[2 1 3]);
+W0 = squeeze(num2cell(W,1))';
+clear('noteTemplatesBassoon','noteTemplatesCello','noteTemplatesClarinet','noteTemplatesFlute','noteTemplatesGuitar',...
+    'noteTemplatesHorn','noteTemplatesOboe','noteTemplatesTenorSax','noteTemplatesViolin','noteTemplatesSptkBGCl','W');
+
+
+% Compute CQT
+[intCQT] = computeCQT(filename);
+X = intCQT(:,round(1:7.1128:size(intCQT,2)))';
+noiseLevel1 = medfilt1(X',40);
+noiseLevel2 = medfilt1(min(X',noiseLevel1),40);
+X = max(X-noiseLevel2',0);
+Y = X(1:4:size(X,1),:);  % 40ms step
+sumY = sum(Y');
+clear('intCQT','X','noiseLevel1','noiseLevel2');
+
+fprintf('%s','done');
+fprintf('\n');
+fprintf('%s',['Estimating F0s...........']);
+
+% For each 2sec segment, perform SIPLCA with fixed W0
+ph = zeros(440,size(Y,1));
+pz = zeros(88,size(Y,1));
+
+for j=1:floor(size(Y,1)/100)
+
+    x=[zeros(2,100); Y(1+(j-1)*100:j*100,:)'; zeros(2,100)];
+    [w,h,z,u,xa] = cplcaMT( x, 88, [545 1], 10, W0, [], [], [], iter, 1, 1, sz, su, 0, 1, 1, 1, pitchActivity);
+   
+    H=[]; for i=1:88 H=[H; h{i}]; end;
+    ph(:,1+(j-1)*100:j*100) = H;
+    Z=[]; for i=1:88 Z=[Z z{i}]; end;
+    pz(:,1+(j-1)*100:j*100) = Z';   
+    perc = 100*(j/(floor(size(Y,1)/100)+1));
+    fprintf('\n');
+    fprintf('%.2f%% complete',perc);
+end;
+
+len=size(Y,1)-j*100; % Final segment
+
+if (len >0)
+x=[zeros(2,len); Y(1+j*100:end,:)'; zeros(2,len)];
+[w,h,z,u,xa] = cplcaMT( x, 88, [545 1], 10, W0, [], [], [], iter, 1, 1, sz, su, 0, 1, 1, 1, pitchActivity);
+fprintf('\n');
+fprintf('100%% complete');
+
+H=[]; for i=1:88 H=[H; h{i}]; end;
+ph(:,1+j*100:end) = H;
+Z=[]; for i=1:88 Z=[Z z{i}]; end;
+pz(:,1+j*100:end) = Z'; 
+end;
\ No newline at end of file