Mercurial > hg > silvet
changeset 2:8017dd4a650d
Add MIREX 2012 code
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirex2012-matlab/README.txt Wed Mar 19 09:09:23 2014 +0000 @@ -0,0 +1,16 @@ +MIREX 2012 Multiple Fundamental Frequency Estimation & Tracking Task 2 (note tracking) +Emmanouil Benetos and Simon Dixon + +Environment: MATLAB 7.x 32-bit (Windows/Mac OS X/Linux version) or MATLAB 7.x 64-bit (Mac OS X/Linux version) + +Command line calling format: doMultiF0('path/to/inputFile.wav','path/to/outputFile.F0') + +Example: doMultiF0('mix_var5a.wav','mix_var5a.F0') + +Expected Memory Footprint: 450 MB + +Number of threads/cores used: 1 + +Expected Runtime: 50 x RealTime (eg. for a 30sec audio file, expected runtime is 25min) + +NOTE: An exit statement is contained at the end of doMultiF0.m
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirex2012-matlab/cell2sparse.m Wed Mar 19 09:09:23 2014 +0000 @@ -0,0 +1,78 @@ +function spCQT = cell2sparse(Xcq,octaves,bins,firstcenter,atomHOP,atomNr) +%Generates a sparse matrix containing the CQT coefficients (rasterized). +% +%The sparse matrix representation of the CQT coefficients contains all +%computed coefficients at the corresponding time-frequency location +%(similar to a spectrogram). For lower frequencies this means, that +%each coefficient is followed by zeros stemming from the fact, that the time +%resolution for lower frequencies decreases as the frequency resolution +%increases. Due to the design of the CQT kernel, however, the coefficients +%of different octaves are synchronised, meaning that for the second highest +%octave each coefficient is followed by one zero, for the next octave down +%two zeros are inserted, for the next octave four zeros are inserted and so +%on. +% +%INPUT: +% Xcq ... Cell array consisting of all coefficients for all octaves +% octaves ... Number of octaves processed +% bins ... Number of bins per octave +% firstcenter ... Location of the leftmost atom-stack in the temporal +% kernel +% atomHOP ... Spacing of two consecutive atom stacks +% atomNr ... Number of atoms per bin within the kernel +% +%Christian Schörkhuber, Anssi Klapuri 2010-06 + +if 0 +%% this version has big memory consumption but is very fast + emptyHops = firstcenter/atomHOP; + drop = emptyHops*2^(octaves-1)-emptyHops; %distance between first value in highest octave and first value in lowest octave + spCQT = zeros(bins*octaves,size(Xcq{1},2)*atomNr-drop); + + for i=1:octaves + drop = emptyHops*2^(octaves-i)-emptyHops; %first coefficients of all octaves have to be in synchrony + X = Xcq{i}; + if atomNr > 1 %more than one atom per bin --> reshape + Xoct = zeros(bins,atomNr*size(X,2)-drop); + for u=1:bins %reshape to continous windows for each bin (for the case of several wins per frame) + octX_bin = X((u-1)*atomNr+1:u*atomNr,:); + Xcont = reshape(octX_bin,1,size(octX_bin,1)*size(octX_bin,2)); + Xoct(u,:) = Xcont(1+drop:end); + end + X = Xoct; + else + X = X(:,1+drop:end); + end + binVec = bins*octaves-bins*i+1:bins*octaves-bins*(i-1); + spCQT(binVec,1:2^(i-1):size(X,2)*2^(i-1)) = X; + + end + spCQT = sparse(spCQT); %storing as sparse matrix at the end is the fastest way. Big memory consumption though! + +else +%% this version uses less memory but is noticable slower + emptyHops = firstcenter/atomHOP; + drops = emptyHops*2.^(octaves-(1:octaves))-emptyHops; + len = max(((atomNr*cellfun('size',Xcq,2)-drops).*2.^(0:octaves-1))); %number of columns of output matrix + spCQT = []; + + for i=octaves:-1:1 + drop = emptyHops*2^(octaves-i)-emptyHops; %first coefficients of all octaves have to be in synchrony + X = Xcq{i}; + if atomNr > 1 %more than one atom per bin --> reshape + Xoct = zeros(bins,atomNr*size(X,2)-drop); + for u=1:bins %reshape to continous windows for each bin (for the case of several wins per frame) + octX_bin = X((u-1)*atomNr+1:u*atomNr,:); + Xcont = reshape(octX_bin,1,size(octX_bin,1)*size(octX_bin,2)); + Xoct(u,:) = Xcont(1+drop:end); + end + X = Xoct; + else + X = X(:,1+drop:end); + end + X = upsample(X.',2^(i-1)).'; + X = [X zeros(bins,len-size(X,2))]; + spCQT = sparse([spCQT; X]); + end + +end
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirex2012-matlab/computeCQT.m Wed Mar 19 09:09:23 2014 +0000 @@ -0,0 +1,30 @@ +function [intCQT] = computeCQT(filename) +% Settings for computing CQT for music signals (by E. Benetos) + +% Load .wav file +[x fs bits] = wavread(filename); +if (size(x,2) == 2) y = mean(x')'; clear('x'); else y=x; clear('x'); end; +if (fs ~= 44100) y = resample(y,44100,fs); end; +y = 0.5*y/max(y); +fs = 44100; + + +% Compute CQT +Xcqt = cqt(y,27.5,fs/3,60,fs,'q',0.80,'atomHopFactor',0.3,'thresh',0.0005,'win','hann'); +%Xcqt = cqt(y,27.5,fs/3,120,fs,'q',0.35,'atomHopFactor',0.3,'thresh',0.0005,'win','hann'); % old resolution +absCQT = getCQT(Xcqt,'all','all'); + +% Crop CQT to useful time regions +emptyHops = Xcqt.intParams.firstcenter/Xcqt.intParams.atomHOP; +maxDrop = emptyHops*2^(Xcqt.octaveNr-1)-emptyHops; +droppedSamples = (maxDrop-1)*Xcqt.intParams.atomHOP + Xcqt.intParams.firstcenter; +outputTimeVec = (1:size(absCQT,2))*Xcqt.intParams.atomHOP-Xcqt.intParams.preZeros+droppedSamples; + +lowerLim = find(outputTimeVec>0,1); +upperLim = find(outputTimeVec>length(y),1)-1; + +%intCQT = absCQT(112:1200,lowerLim:upperLim); % old resolution +intCQT = absCQT(56:600,lowerLim:upperLim); + + +%figure; imagesc(imrotate(abs(intCQT),90));
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirex2012-matlab/cplcaMT.m Wed Mar 19 09:09:23 2014 +0000 @@ -0,0 +1,192 @@ +function [w,h,z,u,xa] = cplcaMT( x, K, T, R, w, h, z, u, iter, sw, sh, sz, su, lw, lh, lz, lu, pa) +% function [w,h,xa2] = cplcaMT( x, K, T, R, w, h, z, u, iter, sw, sh, sz, su, lw, lh, lz, lu) +% +% Perform multiple-source, multiple-template SIPLCA for transcription +% +% Inputs: +% x input distribution +% K number of components +% T size of components +% R size of sources +% w initial value of p(w) [default = random] +% h initial value of p(h) [default = random] +% z initial value of p(z) [default = random] +% iter number of EM iterations [default = 10] +% sw sparsity parameter for w [default = 1] +% sh sparsity parameter for h [default = 1] +% sz sparsity parameter for z [default = 1] +% lw flag to update w [default = 1] +% lh flag to update h [default = 1] +% lh flag to update h [default = 1] +% pa source-component activity range [Rx2] +% +% Outputs: +% w p(w) - spectral bases +% h p(h) - pitch impulse +% z p(z) - mixing matrix for p(h) +% xa approximation of input + +% Emmanouil Benetos 2011, based on cplca code by Paris Smaragdis + + +% Sort out the sizes +wc = 2*size(x)-T; +hc = size(x)+T-1; + +% Default training iterations +if ~exist( 'iter') + iter = 10; +end + + +% Initialize +sumx = sum(x); + +if ~exist( 'w') || isempty( w) + w = cell(R, K); + for k = 1:K + for r=1:R + w{r,k} = rand( T); + w{r,k} = w{r,k} / sum( w{r,k}(:)); + end + end +end +if ~exist( 'h') || isempty( h) + h = cell(1, K); + for k = 1:K + h{k} = rand( size(x)-T+1); + h{k} = h{k}; + end +end +if ~exist( 'z') || isempty( z) + z = cell(1, K); + for k = 1:K + z{k} = rand( size(x,2),1); + z{k} = z{k}; + end +end +if ~exist( 'u') || isempty( u) + u = cell(R, K); + for k = 1:K + for r=1:R + if( (pa(r,1) <= k && k <= pa(r,2)) ) + u{r,k} = ones( size(x,2),1); + else + u{r,k} = zeros( size(x,2),1); + end + end; + end +end + +fh = cell(1, K); +fw = cell(R, K); +for k = 1:K + fh{k} = ones(wc) + 1i*ones(wc); + for r=1:R + fw{r,k} = ones(wc) + 1i*ones(wc); + end; +end; + + + +% Make commands for subsequent multidim operations and initialize fw +fnh = 'c(hc(1)-(T(1)+(1:size(h{k},1))-2),hc(2)-(T(2)+(1:size(h{k},2))-2))'; +xai = 'xa(1:size(x,1),1:size(x,2))'; +flz = 'xbar(end:-1:1,end:-1:1)'; + +for k = 1:K + for r=1:R + if( (pa(r,1) <= k && k <= pa(r,2)) ) + fw{r,k} = fftn( w{r,k}, wc); + end; + end; +end; + +% Iterate +for it = 1:iter + + %disp(['Iteration: ' num2str(it)]); + + % E-step + xa = eps; + for k = 16:73 + fh{k} = fftn( h{k}, wc); + for r=1:R + if( (pa(r,1) <= k && k <= pa(r,2)) ) + xa1 = abs( real( ifftn( fw{r,k} .* fh{k}))); + xa = xa + xa1(1:size(x,1),1:size(x,2)) .*repmat(z{k},1,size(x,1))'.*repmat(u{r,k},1,size(x,1))'; + clear xa1; + end + end + end + + xbar = x ./ xa; + xbar = eval( flz); + fx = fftn( xbar, wc); + + + % M-step + for k = 16:73 + + + % Update h, z, u + nh=eps; + for r=1:R + if( (pa(r,1) <= k && k <= pa(r,2)) ) + c = abs( real( ifftn( fx .* fw{r,k} ))); + nh1 = eval( fnh); + nh1 = nh1 .*repmat(u{r,k},1,size(h{k},1))'; + nh = nh + nh1; + + nhu = eval( fnh); + nhu = nhu .* h{k}; + nu = sum(nhu)'; + nu = u{r,k} .* nu + eps; + if lu + u{r,k} = nu; + end; + + end; + end + nh = h{k} .* (nh.^sh); + nz = sum(nh)'; + nz = z{k} .* nz + eps; + + + % Assign and normalize + if lh + h{k} = nh; + end + if lz + z{k} = nz; + end + + + end + + % Normalize z over t + if lz + Z=[]; for i=1:K Z=[Z z{i}]; end; + Z = Z.^sz; + Z(1:end,1:15)=0; + Z(1:end,74:88)=0; + Z = Z./repmat(sum(Z,2),1,K); z = num2cell(Z,1); %figure; imagesc(imrotate(Z,90)); + end + + % Normalize u over z,t + if lu + U=[]; for r=1:R U(r,:,:) = cell2mat(u(r,:)); end; + for i=1:size(U,2) for j=1:size(U,3) U(:,i,j) = U(:,i,j).^su; U(:,i,j) = U(:,i,j) ./ sum(U(:,i,j)); end; end; + U0 = permute(U,[2 1 3]); u = squeeze(num2cell(U0,1)); + end + + % Normalize h over z,t + H=[]; for k=1:K H(k,:,:) = cell2mat(h(k)); end; H0 = permute(H,[2 1 3]); + for i=1:size(H0,2) for j=1:size(H0,3) H0(:,i,j) = sumx(j)* (H0(:,i,j) ./ sum(H0(:,i,j))); end; end; + h = squeeze(num2cell(squeeze(H0),[1 3])); for k=1:K h{k} = squeeze(h{k}); end; + + %figure; imagesc(imrotate(xa',90)); + +end + +%figure; imagesc(imrotate(xa',90)); \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirex2012-matlab/cqt.m Wed Mar 19 09:09:23 2014 +0000 @@ -0,0 +1,114 @@ +function Xcqt = cqt(x,fmin,fmax,bins,fs,varargin) +%Xcqt = cqt(x,fmin,fmax,bins,fs,varargin) calculates the constant-Q transform of the input signal x. +% +%INPUT: +% fmin ... lowest frequency of interest +% fmax ... highest frequency of interest +% bins ... frequency bins per octave +% fs ... sampling rate +% +% optional input parameters (parameter name/value pairs): +% +% 'atomHopFactor' ... overlap of temporal atoms in percent. Default: 0.25. +% +% 'q' ... the maximum value for optimal reconstruction is q=1. +% For values smaller than 1 the bandwidths of the spectral +% atoms (filter) are increased retaining their center +% frequencies (frequency 'smearing', frequency domain redundancy +% increases, time resolutin improves). Default: 1. +% 'thresh' ... all values in the cqt kernel smaller than tresh are +% rounded to zero. A high value for thresh yields a +% very sparse kernel (fast) but introduces a bigger error. +% The default value is chosen so that the error due to rounding is negligible. +% 'kernel' ... if the cqt kernel structure has been precomputed +% (using function 'genCQTkernel'), the computation of the kernel +% will be by-passed below). +% 'win' ... defines which window will be used for the CQT. Valid +% values are: 'blackman','hann' and 'blackmanharris'. To +% use the square root of each window use the prefix 'sqrt_' +% (i.e. 'sqrt_blackman'). Default: 'sqrt_blackmanharris' +% 'coeffB', +% 'coeffA' ... Filter coefficients for the anti-aliasing filter, where +% 'coeffB' is the numerator and 'coeffA' is the +% denominator (listed in descending powers of z). +% +%OUTPUT: +% Xcqt ... struct that comprises various fields: +% spCQT: CQT coefficients in the form of a sparse matrix +% (rasterized, not interpolated) +% fKernel: spectral Kernel +% fmin: frequency of the lowest bin +% fmax: frequency of the hiqhest bin +% octaveNr: number of octaves processed +% bins: number of bins per octave +% intParams: structure containing additional parameters for the inverse transform +% +%Christian Schörkhuber, Anssi Klapuri 2010-06 + +%% input checking +if size(x,2) > 1 && size(x,1) > 1, error('cqt requires one-dimensional input!'); end; +if size(x,2) > 1, x = x(:); end; %column vector + +%% input parameters +q = 1; %default value +atomHopFactor = 0.25; %default value +thresh = 0.0005; %default value +winFlag = 'sqrt_blackmanharris'; + +for ain = 1:1:length(varargin) + if strcmp(varargin{ain},'q'), q = varargin{ain+1}; end; + if strcmp(varargin{ain},'atomHopFactor'), atomHopFactor = varargin{ain+1}; end; + if strcmp(varargin{ain},'thresh'), thresh = varargin{ain+1}; end; + if strcmp(varargin{ain},'kernel'), cqtKernel = varargin{ain+1}; end; + if strcmp(varargin{ain},'win'), winFlag = varargin{ain+1}; end; + if strcmp(varargin{ain},'coeffB'), B = varargin{ain+1}; end; + if strcmp(varargin{ain},'coeffA'), A = varargin{ain+1}; end; +end + +%% define +octaveNr = ceil(log2(fmax/fmin)); +xlen_init = length(x); + +%% design lowpass filter +if ~exist('B','var') || ~exist('A','var') + LPorder = 6; %order of the anti-aliasing filter + cutoff = 0.5; + [B A] = butter(LPorder,cutoff,'low'); %design f_nyquist/2-lowpass filter +end + +%% design kernel for one octave +if ~exist('cqtKernel','var') + cqtKernel = genCQTkernel(fmax, bins,fs,'q',q,'atomHopFactor',atomHopFactor,'thresh',thresh,'win',winFlag); +end + +%% calculate CQT +cellCQT = cell(1,octaveNr); +maxBlock = cqtKernel.fftLEN * 2^(octaveNr-1); %largest FFT Block (virtual) +suffixZeros = maxBlock; +prefixZeros = maxBlock; +x = [zeros(prefixZeros,1); x; zeros(suffixZeros,1)]; %zeropadding +OVRLP = cqtKernel.fftLEN - cqtKernel.fftHOP; +K = cqtKernel.fKernel'; %conjugate spectral kernel for cqt transformation + +for i = 1:octaveNr + xx = buffer(x,cqtKernel.fftLEN, OVRLP,'nodelay'); %generating FFT blocks + XX = fft(xx); %applying fft to each column (each FFT frame) + cellCQT{i} = K*XX; %calculating cqt coefficients for all FFT frames for this octave + + if i~=octaveNr + x = filtfilt(B,A,x); %anti aliasing filter + x = x(1:2:end); %drop samplerate by 2 + end +end + +%% map to sparse matrix representation +spCQT = cell2sparse(cellCQT,octaveNr,bins,cqtKernel.firstcenter,cqtKernel.atomHOP,cqtKernel.atomNr); + +%% return +intParam = struct('sufZeros',suffixZeros,'preZeros',prefixZeros,'xlen_init',xlen_init,'fftLEN',cqtKernel.fftLEN,'fftHOP',cqtKernel.fftHOP,... + 'q',q,'filtCoeffA',A,'filtCoeffB',B,'firstcenter',cqtKernel.firstcenter,'atomHOP',cqtKernel.atomHOP,... + 'atomNr',cqtKernel.atomNr,'Nk_max',cqtKernel.Nk_max,'Q',cqtKernel.Q,'rast',0); + +Xcqt = struct('spCQT',spCQT,'fKernel',cqtKernel.fKernel,'fmax',fmax,'fmin',fmin*2^(1/bins),'octaveNr',octaveNr,'bins',cqtKernel.bins,'intParams',intParam); + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirex2012-matlab/doMultiF0.m Wed Mar 19 09:09:23 2014 +0000 @@ -0,0 +1,57 @@ +function [] = doMultiF0(inputFile,outputFile) + + +% Transcribe file +fprintf('%s',['Preprocessing............']); +[ph pz sumY] = transcriptionMultipleTemplates(inputFile,12,1.1,1.3); +fprintf('\n'); +fprintf('%s',['Postprocessing...........']); +pianoRoll = repmat(sumY,88,1).*pz(1:88,:); +pianoRoll = pianoRoll'; +for j=[1:15 74:88] pianoRoll(:,j)=0; end; +pianoRoll = medfilt1(pianoRoll,3); + + +% Max polyphony 5 +[B,IX] =sort(pianoRoll,2,'descend'); +tempPianoRoll = zeros(size(pianoRoll,1),88); +for j=1:size(pianoRoll,1) for k=1:5 tempPianoRoll(j,IX(j,k)) = B(j,k); end; end; +pianoRoll = tempPianoRoll; + + +% Expand piano-roll and perform thresholding +expandedPianoRoll = zeros(4*size(pianoRoll,1),88); +for j=1:4*size(pianoRoll,1) + expandedPianoRoll(j,:) = pianoRoll(floor((j-1)/4)+1,:); +end; +finalPianoRoll = (expandedPianoRoll>4.8)'; + + +% Create output and perform minimum duration pruning +auxPianoRoll = diff([zeros(1,88); finalPianoRoll'; zeros(1,88);],1); k=0; +for i=1:88 + onsets = find(auxPianoRoll(:,i)==1); + offsets = find(auxPianoRoll(:,i)==-1); + for j=1:length(onsets) + if((offsets(j)/100-0.01) - (onsets(j)/100) > 0.05) + k=k+1; + nmat(k,1) = onsets(j)/100; + nmat(k,2) = offsets(j)/100-0.01; + nmat(k,3) = 27.5*2.^((( i-1)*10 )/120); + end; + end; +end; +nmat = sortrows(nmat,1); + + +% Print output +fid=fopen(outputFile,'w'); +for i=1:size(nmat,1) + fprintf(fid,'%.2f\t%.2f\t%.2f\n',nmat(i,1),nmat(i,2),nmat(i,3)); +end; +fclose(fid); +fprintf('%s','done'); +fprintf('\n'); + + +exit;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirex2012-matlab/genCQTkernel.m Wed Mar 19 09:09:23 2014 +0000 @@ -0,0 +1,135 @@ +function cqtKernel = genCQTkernel(fmax, bins, fs, varargin) +%Calculating the CQT Kernel for one octave. All atoms are center-stacked. +%Atoms are placed so that the stacks of lower octaves are centered at the +%same positions in time, however, their amount is reduced by factor two for +%each octave down. +% +%INPUT: +% fmax ... highest frequency of interest +% bins ... number of bins per octave +% fs ... sampling frequency +% +%optional input parameters (parameter name/value pairs): +% +% 'q' ... Q scaling factor. Default: 1. +% 'atomHopFactor' ... relative hop size corresponding to the shortest +% temporal atom. Default: 0.25. +% 'thresh' ... values smaller than 'tresh' in the spectral kernel are rounded to +% zero. Default: 0.0005. +% 'win' ... defines which window will be used for the CQT. Valid +% values are: 'blackman','hann' and 'blackmanharris'. To +% use the square root of each window use the prefix 'sqrt_' +% (i.e. 'sqrt_blackman'). Default: 'sqrt_blackmanharris' +% 'perfRast' ... if set to 1 the kernel is designed in order to +% enable perfect rasterization using the function +% cqtPerfectRast() (Default: perRast=0). See documentation of +% 'cqtPerfectRast' for further information. +% +%OUTPUT: +% cqtKernel ... Structure that contains the spectral kernel 'fKernel' +% additional design parameters used in cqt(), cqtPerfectRast() and icqt(). +% +%Christian Schörkhuber, Anssi Klapuri 2010-06 + +%% input parameters +q = 1; %default value +atomHopFactor = 0.25; %default value +thresh = 0.0005; %default value +winFlag = 'sqrt_blackmanharris'; %default value +perfRast = 0; %default value + +for ain = 1:length(varargin) + if strcmp(varargin{ain},'q'), q = varargin{ain+1}; end; + if strcmp(varargin{ain},'atomHopFactor'), atomHopFactor = varargin{ain+1}; end; + if strcmp(varargin{ain},'thresh'), thresh = varargin{ain+1}; end; + if strcmp(varargin{ain},'win'), winFlag = varargin{ain+1}; end; + if strcmp(varargin{ain},'perfRast'), perfRast = varargin{ain+1}; end; +end + +%% define +fmin = (fmax/2)*2^(1/bins); +Q = 1/(2^(1/bins)-1); +Q = Q*q; +Nk_max = Q * fs / fmin; +Nk_max = round(Nk_max); %length of the largest atom [samples] + + +%% Compute FFT size, FFT hop, atom hop, ... +Nk_min = round( Q * fs / (fmin*2^((bins-1)/bins)) ); %length of the shortest atom [samples] +atomHOP = round(Nk_min*atomHopFactor); %atom hop size +first_center = ceil(Nk_max/2); %first possible center position within the frame +first_center = atomHOP * ceil(first_center/atomHOP); %lock the first center to an integer multiple of the atom hop size +FFTLen = 2^nextpow2(first_center+ceil(Nk_max/2)); %use smallest possible FFT size (increase sparsity) + +if perfRast + winNr = floor((FFTLen-ceil(Nk_max/2)-first_center)/atomHOP); %number of temporal atoms per FFT Frame + if winNr == 0 + FFTLen = FFTLen * 2; + winNr = floor((FFTLen-ceil(Nk_max/2)-first_center)/atomHOP); + end +else + winNr = floor((FFTLen-ceil(Nk_max/2)-first_center)/atomHOP)+1; %number of temporal atoms per FFT Frame +end + +last_center = first_center + (winNr-1)*atomHOP; +fftHOP = (last_center + atomHOP) - first_center; % hop size of FFT frames +fftOLP = (FFTLen-fftHOP/FFTLen)*100; %overlap of FFT frames in percent ***AK:needed? + +%% init variables +tempKernel= zeros(1,FFTLen); +sparKernel= []; + +%% Compute kernel +atomInd = 0; +for k = 1:bins + + Nk = round( Q * fs / (fmin*2^((k-1)/bins)) ); %N[k] = (fs/fk)*Q. Rounding will be omitted in future versions + + switch winFlag + case 'sqrt_blackmanharris' + winFct = sqrt(blackmanharris(Nk)); + case 'blackmanharris' + winFct = blackmanharris(Nk); + case 'sqrt_hann' + winFct = sqrt(hann(Nk,'periodic')); + case 'hann' + winFct = hann(Nk,'periodic'); + case 'sqrt_blackman' + winFct = sqrt(hann(blackman,'periodic')); + case 'blackman' + winFct = blackman(Nk,'periodic'); + otherwise + winFct = sqrt(blackmanharris(Nk)); + if k==1, warning('CQT:INPUT','Non-existing window function. Default window is used!'); end; + end + + fk = fmin*2^((k-1)/bins); + tempKernelBin = (winFct/Nk) .* exp(2*pi*1i*fk*(0:Nk-1)'/fs); + atomOffset = first_center - ceil(Nk/2); + + for i = 1:winNr + shift = atomOffset + ((i-1) * atomHOP); + tempKernel(1+shift:Nk+shift) = tempKernelBin; + atomInd = atomInd+1; + specKernel= fft(tempKernel); + specKernel(abs(specKernel)<=thresh)= 0; + sparKernel= sparse([sparKernel; specKernel]); + tempKernel = zeros(1,FFTLen); %reset window + end +end +sparKernel = (sparKernel.')/FFTLen; + +%% Normalize the magnitudes of the atoms +[ignore,wx1]=max(sparKernel(:,1)); +[ignore,wx2]=max(sparKernel(:,end)); +wK=sparKernel(wx1:wx2,:); +wK = diag(wK * wK'); +wK = wK(round(1/q)+1:(end-round(1/q)-2)); +weight = 1./mean(abs(wK)); +weight = weight.*(fftHOP/FFTLen); +weight = sqrt(weight); %sqrt because the same weight is applied in icqt again +sparKernel = weight.*sparKernel; + +%% return +cqtKernel = struct('fKernel',sparKernel,'fftLEN',FFTLen,'fftHOP',fftHOP,'fftOverlap',fftOLP,'perfRast',perfRast,... + 'bins',bins,'firstcenter',first_center,'atomHOP',atomHOP,'atomNr',winNr,'Nk_max',Nk_max,'Q',Q,'fmin',fmin);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirex2012-matlab/getCQT.m Wed Mar 19 09:09:23 2014 +0000 @@ -0,0 +1,47 @@ +function intCQT = getCQT(Xcqt,fSlice,tSlice,iFlag) +%outCQ = getCQT(Xcqt,fSlice,tSlice,iFlag) computes a rasterized representation of +%the amplitudes of the calculated CQT coefficients for the frequency bins definded in vector fSlice and the +%points in time (time frames) defined in vector tSlice using the interpolation method defined in iFlag. +%Valid values for iFlag are: +% +%'linear' ... linear interpolation (default) +%'spline' ... spline interpolation +%'nearest' ... nearest neighbor interpolation +%'cubic' ... piecewise cubic interpolation +% +%If the entire CQT representation should be rasterized, set fSlice and +%tSlice to 'all'. +%The input parameter Xcqt is the structure gained using cqt(...). +%The output parameter 'intCQT' is the same size as Xcqt.spCQT but is no +%longer sparse since the zeros between two coefficients are replaced by +%the interpolated values. The coefficients stored in 'intCQT' are now +%real-valued since only the absolute values of the coefficients are +%interpolated. If a spectrogram-like (rasterized) version of the CQT +%coefficients including phase information is required, use the function +%cqtPerfectRast() (see documentation for further information) +% +%Christian Schörkhuber, Anssi Klapuri 2010-06 + + +if ischar(fSlice), fSlice = 1:(Xcqt.bins*Xcqt.octaveNr); end; +if ischar(tSlice) + lastEnt = find(Xcqt.spCQT(1,:),1,'last'); + tSlice = 1:lastEnt; +end +if nargin < 4, iFlag = 'linear'; end; + +intCQT = zeros(length(fSlice),length(tSlice)); +bins = Xcqt.bins; +spCQT = Xcqt.spCQT; +octaveNr = Xcqt.octaveNr; +spCQT = spCQT.'; + +for k=1:length(fSlice) + oct = octaveNr-floor((fSlice(k)-0.1)/bins); + stepVec = 1:2^(oct-1):size(spCQT,1); + Xbin = full(spCQT(stepVec,fSlice(k))); + intCQT(k,:) = interp1(stepVec,abs(Xbin),tSlice,iFlag); +end + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirex2012-matlab/transcriptionMultipleTemplates.m Wed Mar 19 09:09:23 2014 +0000 @@ -0,0 +1,71 @@ +function [ph pz sumY] = transcriptionMultipleTemplates(filename,iter,sz,su) + + +% Load note templates +load('noteTemplatesBassoon'); W(:,:,1) = noteTemplatesBassoon; +load('noteTemplatesCello'); W(:,:,2) = noteTemplatesCello; +load('noteTemplatesClarinet'); W(:,:,3) = noteTemplatesClarinet; +load('noteTemplatesFlute'); W(:,:,4) = noteTemplatesFlute; +load('noteTemplatesGuitar'); W(:,:,5) = noteTemplatesGuitar; +load('noteTemplatesHorn'); W(:,:,6) = noteTemplatesHorn; +load('noteTemplatesOboe'); W(:,:,7) = noteTemplatesOboe; +load('noteTemplatesTenorSax'); W(:,:,8) = noteTemplatesTenorSax; +load('noteTemplatesViolin'); W(:,:,9) = noteTemplatesViolin; +load('noteTemplatesSptkBGCl'); W(:,:,10) = noteTemplatesSptkBGCl; + + +%pitchActivity = [14 16 30 40 20 21 38 24 35 1; 52 61 69 76 56 57 71 55 80 88]'; +pitchActivity = [16 16 30 40 20 21 38 24 35 16; 52 61 69 73 56 57 71 55 73 73]'; + + +W = permute(W,[2 1 3]); +W0 = squeeze(num2cell(W,1))'; +clear('noteTemplatesBassoon','noteTemplatesCello','noteTemplatesClarinet','noteTemplatesFlute','noteTemplatesGuitar',... + 'noteTemplatesHorn','noteTemplatesOboe','noteTemplatesTenorSax','noteTemplatesViolin','noteTemplatesSptkBGCl','W'); + + +% Compute CQT +[intCQT] = computeCQT(filename); +X = intCQT(:,round(1:7.1128:size(intCQT,2)))'; +noiseLevel1 = medfilt1(X',40); +noiseLevel2 = medfilt1(min(X',noiseLevel1),40); +X = max(X-noiseLevel2',0); +Y = X(1:4:size(X,1),:); % 40ms step +sumY = sum(Y'); +clear('intCQT','X','noiseLevel1','noiseLevel2'); + +fprintf('%s','done'); +fprintf('\n'); +fprintf('%s',['Estimating F0s...........']); + +% For each 2sec segment, perform SIPLCA with fixed W0 +ph = zeros(440,size(Y,1)); +pz = zeros(88,size(Y,1)); + +for j=1:floor(size(Y,1)/100) + + x=[zeros(2,100); Y(1+(j-1)*100:j*100,:)'; zeros(2,100)]; + [w,h,z,u,xa] = cplcaMT( x, 88, [545 1], 10, W0, [], [], [], iter, 1, 1, sz, su, 0, 1, 1, 1, pitchActivity); + + H=[]; for i=1:88 H=[H; h{i}]; end; + ph(:,1+(j-1)*100:j*100) = H; + Z=[]; for i=1:88 Z=[Z z{i}]; end; + pz(:,1+(j-1)*100:j*100) = Z'; + perc = 100*(j/(floor(size(Y,1)/100)+1)); + fprintf('\n'); + fprintf('%.2f%% complete',perc); +end; + +len=size(Y,1)-j*100; % Final segment + +if (len >0) +x=[zeros(2,len); Y(1+j*100:end,:)'; zeros(2,len)]; +[w,h,z,u,xa] = cplcaMT( x, 88, [545 1], 10, W0, [], [], [], iter, 1, 1, sz, su, 0, 1, 1, 1, pitchActivity); +fprintf('\n'); +fprintf('100%% complete'); + +H=[]; for i=1:88 H=[H; h{i}]; end; +ph(:,1+j*100:end) = H; +Z=[]; for i=1:88 Z=[Z z{i}]; end; +pz(:,1+j*100:end) = Z'; +end; \ No newline at end of file