Mercurial > hg > yjdafx13bpr
changeset 2:13ec2fa02a26 tip
(none)
author | Yannick JACOB <y.jacob@se12.qmul.ac.uk> |
---|---|
date | Tue, 03 Sep 2013 15:33:42 +0100 |
parents | d8c7b69cb4c9 |
children | |
files | Sirtassa/Main.m |
diffstat | 1 files changed, 257 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Sirtassa/Main.m Tue Sep 03 15:33:42 2013 +0100 @@ -0,0 +1,257 @@ +%% Default Values +%Audio and score information +if(~exist('midiCell','var')) + midiCell = cell(2,1); + midiCell{1} = 'Lussier_bassoon.mid'; + midiCell{2} = 'Lussier_trumpet.mid'; + mixName = 'Lussier_mix.wav'; + %mixName = '311CLNOF.WAV'; + %midiCell{1} = '311CLNOF.mid'; +end +if(~exist('blockSize','var')) blockSize = 2048; end +if(~exist('overlap','var')) overlap = 2; end +%Maximum frequency of the filter +if(~exist('bandPassFilterMaxFreq','var')) bandPassFilterMaxFreq = 10000; end +%Number of band pass filters in the filter +if(~exist('nbBandPassFilters','var')) nbBandPassFilters = 20;end +%Maximum updates per frame +if(~exist('maxUpdates','var')) maxUpdates = 2;end +%Number of partials in the model +if(~exist('nbPartials','var')) nbPartials = 8;end +if(~exist('plotRefinedScore','var')) plotRefinedScore = 0;end +if(~exist('plotPartial','var')) plotPartial = 0;end +if(~exist('plotFilter','var')) plotFilter = 0;end +%Nummber of instrument in the mix. +nbInstrument = length(midiCell); +fftSize = blockSize; + +%Weights of the harmonic series +partialsWeights = ones(nbPartials,nbInstrument); + +colors = 'rwm'; +%% Initialisation +hopSize =blockSize/overlap; %Hopsize +HFFTS = fftSize/2+1; %Size of the usefull part of the fft (half+1) +[audio, fs] = wavread(mixName); + +% zeropadding the audio length +audio(end:end+hopSize-mod(length(audio),hopSize)) = 0; +audioLength = length(audio); + +scoreLength = audioLength/hopSize - overlap+1; %Number of frames +wdw = hamming(blockSize); + +tic +%% Create filters +filterBasis = createBPFilters(bandPassFilterMaxFreq,nbBandPassFilters,fftSize,fftSize/fs); +filterCoefficients = ones(nbBandPassFilters,nbInstrument); +%% Creating the score +originalScore = createScore(scoreLength,midiCell,fs/hopSize,fftSize/fs); +originalScore(scoreLength+1:end,:) = []; +gains = double(originalScore'>0); +%% Creating spectrogram +magnitude = zeros(fftSize, scoreLength); +phase = zeros(fftSize, scoreLength); +audioVector = zeros(fftSize,1); +for curFrame = 1:scoreLength + audioVector(1:blockSize) = audio(hopSize*(curFrame-1)+1:hopSize*(curFrame-1)+blockSize,1).*wdw; + spectrogramVector = fft(audioVector); + magnitude(:,curFrame) = abs(spectrogramVector); + phase(:,curFrame) = angle(spectrogramVector); +end +magnitude(HFFTS+1:end,:) = []; +phase(HFFTS+1:end,:) = []; +maxSizeWindows = 400; +hammingWindows = cell(1,maxSizeWindows); +for len = 1:maxSizeWindows + hammingWindows{len} = hamming(len); +end + +%% Refine Score and create Excitation Spectrum +refinedScore = originalScore; %score to be refiened +basicSpec = zeros(HFFTS, scoreLength); %Basic theoretical spectrogram +excitationSpectrums = cell(nbInstrument,scoreLength); %cell for each frame +largerES = cell(nbInstrument,scoreLength); %cell for each frame + +for ins = 1:nbInstrument %for each isntrument + for curFrame = 1: scoreLength %for each frame + excitationSpectrums{ins,curFrame} = sparse(zeros(HFFTS,nbPartials)); + largerES{ins,curFrame} = sparse(zeros(HFFTS,nbPartials)); + if(originalScore(curFrame,ins)) %if there is a note + %Refine score and creates Excitation Spectrum + [refinedScore(curFrame,ins), width] = refineScore(magnitude(:,curFrame),originalScore(curFrame,ins)); + excitationSpectrums{ins,curFrame} = sparse(createExcitationSpectrums(refinedScore(curFrame,ins),HFFTS,nbPartials,partialsWeights(:,ins),width,hammingWindows)); + largerES{ins,curFrame} = sparse(createExcitationSpectrums(refinedScore(curFrame,ins),HFFTS,nbPartials,partialsWeights(:,ins),2*width,hammingWindows)); + end + basicSpec(:,curFrame) = basicSpec(:,curFrame) + (excitationSpectrums{ins,curFrame}*partialsWeights(:,ins))*gains(ins,curFrame); %Spectrogram without update + end +end +%% Plot spectrogram and add original and refined score +if (plotRefinedScore) + figure(1),imagesc(log(10*magnitude(1:fftSize/16,:)+1)), axis xy + plotableScore = originalScore+1; + plotableScore(plotableScore==1) = 0; + hold on ,plot(plotableScore,'.k'), + + for harm = 1 : nbPartials + for ins = nbInstrument:-1:1 + plot(refinedScore(:,ins)*harm+1,['.' colors(ins)]), colorbar + end + end +end +%% Online Processing +%Values that can be changed by the model. +maskForChangeableValues = basicSpec > 10^-4; +maskedSpectrogram = magnitude.*maskForChangeableValues; +bufferSize = 100; +nextProcess = 1; +for curFrame = 1:scoreLength + if sum(refinedScore(curFrame,:),2) + UpdateGainCoefficientsOL2 + if (curFrame==nextProcess) + first = max(curFrame-bufferSize+1,1); + currentMag = magnitude(:,first:curFrame); + tmpMaskForChangeableValues = basicSpec(:,first:curFrame) > 10^-4; + tmpMaskedSpectrogram = currentMag.*tmpMaskForChangeableValues; + len = curFrame-first+1; + for nbUpdates = 1:maxUpdates %update loop + UpdatePartialCoefficientsOL + UpdateFilterCoefficientsOL + if (plotFilter) + %%%Filter + figure(4),plot(outlook(1:blockSize/8,:)), title('Filter') + end + if (plotPartial) + %%%Decay + figure(5),stem(partialsWeights), title('Partial Weights') + end + end + nextProcess = min(ceil(curFrame*1.2),scoreLength) + %bufferSize = round(max(bufferSize,curFrame*0.5)); + if (plotFilter) + %%%Filter + figure(4),plot(outlook(1:blockSize/8,:)), title('Filter') + end + if (plotPartial) + %%%Decay + figure(5),stem(partialsWeights), title('Partial Weights') + + end + end + end +end +timeV = toc; +fprintf('Time: %6.3g s, %6.2g times real time \t \n ', timeV, timeV*fs/audioLength); + + +% Remodel Spectrogram +ECA = cell(nbInstrument,1); +xHat = zeros(HFFTS,scoreLength); +for ins = 1:nbInstrument + ECA{ins} = sparse(zeros(HFFTS,scoreLength)); + for frame = 1:scoreLength + ECA{ins}(:,frame) = filterBasis * filterCoefficients(:,ins).*(largerES{ins,frame}*partialsWeights(:,ins)); + end + xHat = xHat + ECA{ins}.*repmat(gains(ins,:),HFFTS,1); +end +%% Final plot +%%%Spectrogram (comparison with basic and final) +% % bMax = max(max(log(10*basicSpec+1))); +% % xMax = max(max(log(10*xHat+1))); +% % mMax = max(max(log(10*magnitude+1))); +% % tMax = max([bMax mMax xMax]); +% % figure(3), subplot(3,1,1), imagesc(log(10*basicSpec(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Basic Model') +% % subplot(3,1,2), imagesc(log(10*xHat(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Optimised Spectrogram') +% % subplot(3,1,3), imagesc(log(10*magnitude(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Actual Spectrogram') +% % +% % +% % %%%Filter +% % figure(4),plot(outlook(1:blockSize/8,:)), title('Filter') +% % +% % %%%Decay +% % figure(5),stem(partialsWeights), title('Partial Weights') + +%% Writing +%extraInfo = [sprintf('_%d_%d_%d_%d_%d_%d',blockSize,overlap,maxUpdates,nbPartials,nbBandPassFilters,bandPassFilterMaxFreq) specExp]; +finalScale = zeros(audioLength,1); +freq = zeros(fftSize,1); +finalAudio = cell(nbInstrument+1,1); +finalAudioMasked = cell(nbInstrument,1); + +wdw2 = hamming(fftSize); +nbMaskingTechniques = 4; +maskingCell = cell(1,nbMaskingTechniques); +maskingCell{1} = 'raw'; +maskingCell{2} = 'per'; +maskingCell{3} = 'sha'; +maskingCell{4} = '10p'; +finalMask = cell(nbInstrument,nbMaskingTechniques); %4 is the number of different masking techniques +for ins = 1:nbInstrument + finalMask{ins,1} = sparse(zeros(HFFTS,scoreLength)); + finalMask{ins,4} = sparse(zeros(HFFTS,scoreLength)); + for curFrame = 1:scoreLength + finalMask{ins,1}(:,curFrame) = filterBasis * filterCoefficients(:,ins).*(excitationSpectrums{ins,curFrame}*partialsWeights(:,ins)); + finalMask{ins,4}(:,curFrame) = filterBasis * filterCoefficients(:,ins).*(largerES{ins,curFrame}*partialsWeights(:,ins)); + end + + finalMask{ins,1} = finalMask{ins,1}.*repmat(gains(ins,:),HFFTS,1); + finalMask{ins,4} = finalMask{ins,4}.*repmat(gains(ins,:),HFFTS,1); + + % masking strategy 1 + finalMask{ins,2} = magnitude .* (finalMask{ins,4} ./ (xHat+eps)); + + % masking strategy 2 + finalMask{ins,3} = magnitude .* ( (finalMask{ins,4}+eps) ./ (xHat+eps)); + + % masking strategy 3 + finalMask{ins,4} = magnitude .* ( (finalMask{ins,4}+0.1 * eps) ./ (xHat+eps)); + + finalAudio{ins} = zeros(audioLength,1); + finalAudioMasked{ins} = zeros(audioLength,1); +end + +% sonifying the model +finalAudio{nbInstrument+1} = zeros(audioLength,1); +%Overlap and add +for curFrame = 1:scoreLength + freq(1:HFFTS) = xHat(:,curFrame).*exp(1i*phase(:,curFrame)); + freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2); + faudio = real(ifft(freq)).*wdw2; + finalAudio{nbInstrument+1}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudio{nbInstrument+1}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio; + for ins = 1:nbInstrument + freq(1:HFFTS) = finalMask{ins,1}(:,curFrame).*exp(1i*phase(:,curFrame)); + freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2); + faudio = real(ifft(freq)).*wdw2; + finalAudio{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudio{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio; + end + finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + wdw2; +end + +for ins = 1:nbInstrument + finalAudio{ins} = finalAudio{ins}./finalScale; + % wavwrite(finalAudio{ins},fs,16,[midiCell{ins}(1:4) '_' num2str(ins) '_' maskingCell{1} extraInfo '.wav']); +end +finalAudio{nbInstrument+1} = finalAudio{nbInstrument+1}./finalScale; +%wavwrite(finalAudio{nbInstrument+1},fs,16,[mixName(1:4) '_0_' maskingCell{1} extraInfo '.wav']); + +% sonifying the masked version +%Overlap and add +for tech = 2:nbMaskingTechniques + finalScale = zeros(audioLength,1); + for curFrame = 1:scoreLength + for ins = 1:nbInstrument + freq(1:HFFTS) = finalMask{ins,tech}(:,curFrame).*exp(i*phase(:,curFrame)); + freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2); + faudio = real(ifft(freq)).*wdw2; + finalAudioMasked{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudioMasked{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio; + end + finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + wdw2; + end + for ins = 1:nbInstrument + finalAudioMasked{ins} = finalAudioMasked{ins}./finalScale; + % wavwrite(finalAudioMasked{ins},fs,16,[midiCell{ins}(1:4) '_' num2str(ins) '_' maskingCell{tech} extraInfo '.wav']); + end +end + + +