Mercurial > hg > yjdafx13bpr
changeset 1:d8c7b69cb4c9
(none)
author | Yannick JACOB <y.jacob@se12.qmul.ac.uk> |
---|---|
date | Tue, 03 Sep 2013 15:32:51 +0100 |
parents | 2cd427e000b0 |
children | 13ec2fa02a26 |
files | Sirtassa/311CLNOF.WAV Sirtassa/311CLNOF.mid Sirtassa/Lussier_bassoon.mid Sirtassa/Lussier_bassoon.wav Sirtassa/Lussier_mix.wav Sirtassa/Lussier_trumpet.mid Sirtassa/Lussier_trumpet.wav Sirtassa/Sirtassa.m |
diffstat | 8 files changed, 0 insertions(+), 257 deletions(-) [+] |
line wrap: on
line diff
--- a/Sirtassa/Sirtassa.m Tue Sep 03 12:53:16 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,257 +0,0 @@ -%% Default Values -%Audio and score information -if(~exist('midiCell','var')) - midiCell = cell(2,1); - midiCell{1} = 'Lussier_bassoon.mid'; - midiCell{2} = 'Lussier_trumpet.mid'; - mixName = 'Lussier_mix.wav'; - %mixName = '311CLNOF.WAV'; - %midiCell{1} = '311CLNOF.mid'; -end -if(~exist('blockSize','var')) blockSize = 2048; end -if(~exist('overlap','var')) overlap = 2; end -%Maximum frequency of the filter -if(~exist('bandPassFilterMaxFreq','var')) bandPassFilterMaxFreq = 10000; end -%Number of band pass filters in the filter -if(~exist('nbBandPassFilters','var')) nbBandPassFilters = 20;end -%Maximum updates per frame -if(~exist('maxUpdates','var')) maxUpdates = 2;end -%Number of partials in the model -if(~exist('nbPartials','var')) nbPartials = 8;end -if(~exist('plotRefinedScore','var')) plotRefinedScore = 0;end -if(~exist('plotPartial','var')) plotPartial = 0;end -if(~exist('plotFilter','var')) plotFilter = 0;end -%Nummber of instrument in the mix. -nbInstrument = length(midiCell); -fftSize = blockSize; - -%Weights of the harmonic series -partialsWeights = ones(nbPartials,nbInstrument); - -colors = 'rwm'; -%% Initialisation -hopSize =blockSize/overlap; %Hopsize -HFFTS = fftSize/2+1; %Size of the usefull part of the fft (half+1) -[audio, fs] = wavread(mixName); - -% zeropadding the audio length -audio(end:end+hopSize-mod(length(audio),hopSize)) = 0; -audioLength = length(audio); - -scoreLength = audioLength/hopSize - overlap+1; %Number of frames -wdw = hamming(blockSize); - -tic -%% Create filters -filterBasis = createBPFilters(bandPassFilterMaxFreq,nbBandPassFilters,fftSize,fftSize/fs); -filterCoefficients = ones(nbBandPassFilters,nbInstrument); -%% Creating the score -originalScore = createScore(scoreLength,midiCell,fs/hopSize,fftSize/fs); -originalScore(scoreLength+1:end,:) = []; -gains = double(originalScore'>0); -%% Creating spectrogram -magnitude = zeros(fftSize, scoreLength); -phase = zeros(fftSize, scoreLength); -audioVector = zeros(fftSize,1); -for curFrame = 1:scoreLength - audioVector(1:blockSize) = audio(hopSize*(curFrame-1)+1:hopSize*(curFrame-1)+blockSize,1).*wdw; - spectrogramVector = fft(audioVector); - magnitude(:,curFrame) = abs(spectrogramVector); - phase(:,curFrame) = angle(spectrogramVector); -end -magnitude(HFFTS+1:end,:) = []; -phase(HFFTS+1:end,:) = []; -maxSizeWindows = 400; -hammingWindows = cell(1,maxSizeWindows); -for len = 1:maxSizeWindows - hammingWindows{len} = hamming(len); -end - -%% Refine Score and create Excitation Spectrum -refinedScore = originalScore; %score to be refiened -basicSpec = zeros(HFFTS, scoreLength); %Basic theoretical spectrogram -excitationSpectrums = cell(nbInstrument,scoreLength); %cell for each frame -largerES = cell(nbInstrument,scoreLength); %cell for each frame - -for ins = 1:nbInstrument %for each isntrument - for curFrame = 1: scoreLength %for each frame - excitationSpectrums{ins,curFrame} = sparse(zeros(HFFTS,nbPartials)); - largerES{ins,curFrame} = sparse(zeros(HFFTS,nbPartials)); - if(originalScore(curFrame,ins)) %if there is a note - %Refine score and creates Excitation Spectrum - [refinedScore(curFrame,ins), width] = refineScore(magnitude(:,curFrame),originalScore(curFrame,ins)); - excitationSpectrums{ins,curFrame} = sparse(createExcitationSpectrums(refinedScore(curFrame,ins),HFFTS,nbPartials,partialsWeights(:,ins),width,hammingWindows)); - largerES{ins,curFrame} = sparse(createExcitationSpectrums(refinedScore(curFrame,ins),HFFTS,nbPartials,partialsWeights(:,ins),2*width,hammingWindows)); - end - basicSpec(:,curFrame) = basicSpec(:,curFrame) + (excitationSpectrums{ins,curFrame}*partialsWeights(:,ins))*gains(ins,curFrame); %Spectrogram without update - end -end -%% Plot spectrogram and add original and refined score -if (plotRefinedScore) - figure(1),imagesc(log(10*magnitude(1:fftSize/16,:)+1)), axis xy - plotableScore = originalScore+1; - plotableScore(plotableScore==1) = 0; - hold on ,plot(plotableScore,'.k'), - - for harm = 1 : nbPartials - for ins = nbInstrument:-1:1 - plot(refinedScore(:,ins)*harm+1,['.' colors(ins)]), colorbar - end - end -end -%% Online Processing -%Values that can be changed by the model. -maskForChangeableValues = basicSpec > 10^-4; -maskedSpectrogram = magnitude.*maskForChangeableValues; -bufferSize = 100; -nextProcess = 1; -for curFrame = 1:scoreLength - if sum(refinedScore(curFrame,:),2) - UpdateGainCoefficientsOL2 - if (curFrame==nextProcess) - first = max(curFrame-bufferSize+1,1); - currentMag = magnitude(:,first:curFrame); - tmpMaskForChangeableValues = basicSpec(:,first:curFrame) > 10^-4; - tmpMaskedSpectrogram = currentMag.*tmpMaskForChangeableValues; - len = curFrame-first+1; - for nbUpdates = 1:maxUpdates %update loop - UpdatePartialCoefficientsOL - UpdateFilterCoefficientsOL - if (plotFilter) - %%%Filter - figure(4),plot(outlook(1:blockSize/8,:)), title('Filter') - end - if (plotPartial) - %%%Decay - figure(5),stem(partialsWeights), title('Partial Weights') - end - end - nextProcess = min(ceil(curFrame*1.2),scoreLength) - %bufferSize = round(max(bufferSize,curFrame*0.5)); - if (plotFilter) - %%%Filter - figure(4),plot(outlook(1:blockSize/8,:)), title('Filter') - end - if (plotPartial) - %%%Decay - figure(5),stem(partialsWeights), title('Partial Weights') - - end - end - end -end -timeV = toc; -fprintf('Time: %6.3g s, %6.2g times real time \t \n ', timeV, timeV*fs/audioLength); - - -% Remodel Spectrogram -ECA = cell(nbInstrument,1); -xHat = zeros(HFFTS,scoreLength); -for ins = 1:nbInstrument - ECA{ins} = sparse(zeros(HFFTS,scoreLength)); - for frame = 1:scoreLength - ECA{ins}(:,frame) = filterBasis * filterCoefficients(:,ins).*(largerES{ins,frame}*partialsWeights(:,ins)); - end - xHat = xHat + ECA{ins}.*repmat(gains(ins,:),HFFTS,1); -end -%% Final plot -%%%Spectrogram (comparison with basic and final) -% % bMax = max(max(log(10*basicSpec+1))); -% % xMax = max(max(log(10*xHat+1))); -% % mMax = max(max(log(10*magnitude+1))); -% % tMax = max([bMax mMax xMax]); -% % figure(3), subplot(3,1,1), imagesc(log(10*basicSpec(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Basic Model') -% % subplot(3,1,2), imagesc(log(10*xHat(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Optimised Spectrogram') -% % subplot(3,1,3), imagesc(log(10*magnitude(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Actual Spectrogram') -% % -% % -% % %%%Filter -% % figure(4),plot(outlook(1:blockSize/8,:)), title('Filter') -% % -% % %%%Decay -% % figure(5),stem(partialsWeights), title('Partial Weights') - -%% Writing -%extraInfo = [sprintf('_%d_%d_%d_%d_%d_%d',blockSize,overlap,maxUpdates,nbPartials,nbBandPassFilters,bandPassFilterMaxFreq) specExp]; -finalScale = zeros(audioLength,1); -freq = zeros(fftSize,1); -finalAudio = cell(nbInstrument+1,1); -finalAudioMasked = cell(nbInstrument,1); - -wdw2 = hamming(fftSize); -nbMaskingTechniques = 4; -maskingCell = cell(1,nbMaskingTechniques); -maskingCell{1} = 'raw'; -maskingCell{2} = 'per'; -maskingCell{3} = 'sha'; -maskingCell{4} = '10p'; -finalMask = cell(nbInstrument,nbMaskingTechniques); %4 is the number of different masking techniques -for ins = 1:nbInstrument - finalMask{ins,1} = sparse(zeros(HFFTS,scoreLength)); - finalMask{ins,4} = sparse(zeros(HFFTS,scoreLength)); - for curFrame = 1:scoreLength - finalMask{ins,1}(:,curFrame) = filterBasis * filterCoefficients(:,ins).*(excitationSpectrums{ins,curFrame}*partialsWeights(:,ins)); - finalMask{ins,4}(:,curFrame) = filterBasis * filterCoefficients(:,ins).*(largerES{ins,curFrame}*partialsWeights(:,ins)); - end - - finalMask{ins,1} = finalMask{ins,1}.*repmat(gains(ins,:),HFFTS,1); - finalMask{ins,4} = finalMask{ins,4}.*repmat(gains(ins,:),HFFTS,1); - - % masking strategy 1 - finalMask{ins,2} = magnitude .* (finalMask{ins,4} ./ (xHat+eps)); - - % masking strategy 2 - finalMask{ins,3} = magnitude .* ( (finalMask{ins,4}+eps) ./ (xHat+eps)); - - % masking strategy 3 - finalMask{ins,4} = magnitude .* ( (finalMask{ins,4}+0.1 * eps) ./ (xHat+eps)); - - finalAudio{ins} = zeros(audioLength,1); - finalAudioMasked{ins} = zeros(audioLength,1); -end - -% sonifying the model -finalAudio{nbInstrument+1} = zeros(audioLength,1); -%Overlap and add -for curFrame = 1:scoreLength - freq(1:HFFTS) = xHat(:,curFrame).*exp(1i*phase(:,curFrame)); - freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2); - faudio = real(ifft(freq)).*wdw2; - finalAudio{nbInstrument+1}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudio{nbInstrument+1}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio; - for ins = 1:nbInstrument - freq(1:HFFTS) = finalMask{ins,1}(:,curFrame).*exp(1i*phase(:,curFrame)); - freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2); - faudio = real(ifft(freq)).*wdw2; - finalAudio{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudio{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio; - end - finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + wdw2; -end - -for ins = 1:nbInstrument - finalAudio{ins} = finalAudio{ins}./finalScale; - % wavwrite(finalAudio{ins},fs,16,[midiCell{ins}(1:4) '_' num2str(ins) '_' maskingCell{1} extraInfo '.wav']); -end -finalAudio{nbInstrument+1} = finalAudio{nbInstrument+1}./finalScale; -%wavwrite(finalAudio{nbInstrument+1},fs,16,[mixName(1:4) '_0_' maskingCell{1} extraInfo '.wav']); - -% sonifying the masked version -%Overlap and add -for tech = 2:nbMaskingTechniques - finalScale = zeros(audioLength,1); - for curFrame = 1:scoreLength - for ins = 1:nbInstrument - freq(1:HFFTS) = finalMask{ins,tech}(:,curFrame).*exp(i*phase(:,curFrame)); - freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2); - faudio = real(ifft(freq)).*wdw2; - finalAudioMasked{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudioMasked{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio; - end - finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + wdw2; - end - for ins = 1:nbInstrument - finalAudioMasked{ins} = finalAudioMasked{ins}./finalScale; - % wavwrite(finalAudioMasked{ins},fs,16,[midiCell{ins}(1:4) '_' num2str(ins) '_' maskingCell{tech} extraInfo '.wav']); - end -end - - -