Mercurial > hg > yjdafx13bpr
view Sirtassa/Main.m @ 2:13ec2fa02a26 tip
(none)
author | Yannick JACOB <y.jacob@se12.qmul.ac.uk> |
---|---|
date | Tue, 03 Sep 2013 15:33:42 +0100 |
parents | |
children |
line wrap: on
line source
%% Default Values %Audio and score information if(~exist('midiCell','var')) midiCell = cell(2,1); midiCell{1} = 'Lussier_bassoon.mid'; midiCell{2} = 'Lussier_trumpet.mid'; mixName = 'Lussier_mix.wav'; %mixName = '311CLNOF.WAV'; %midiCell{1} = '311CLNOF.mid'; end if(~exist('blockSize','var')) blockSize = 2048; end if(~exist('overlap','var')) overlap = 2; end %Maximum frequency of the filter if(~exist('bandPassFilterMaxFreq','var')) bandPassFilterMaxFreq = 10000; end %Number of band pass filters in the filter if(~exist('nbBandPassFilters','var')) nbBandPassFilters = 20;end %Maximum updates per frame if(~exist('maxUpdates','var')) maxUpdates = 2;end %Number of partials in the model if(~exist('nbPartials','var')) nbPartials = 8;end if(~exist('plotRefinedScore','var')) plotRefinedScore = 0;end if(~exist('plotPartial','var')) plotPartial = 0;end if(~exist('plotFilter','var')) plotFilter = 0;end %Nummber of instrument in the mix. nbInstrument = length(midiCell); fftSize = blockSize; %Weights of the harmonic series partialsWeights = ones(nbPartials,nbInstrument); colors = 'rwm'; %% Initialisation hopSize =blockSize/overlap; %Hopsize HFFTS = fftSize/2+1; %Size of the usefull part of the fft (half+1) [audio, fs] = wavread(mixName); % zeropadding the audio length audio(end:end+hopSize-mod(length(audio),hopSize)) = 0; audioLength = length(audio); scoreLength = audioLength/hopSize - overlap+1; %Number of frames wdw = hamming(blockSize); tic %% Create filters filterBasis = createBPFilters(bandPassFilterMaxFreq,nbBandPassFilters,fftSize,fftSize/fs); filterCoefficients = ones(nbBandPassFilters,nbInstrument); %% Creating the score originalScore = createScore(scoreLength,midiCell,fs/hopSize,fftSize/fs); originalScore(scoreLength+1:end,:) = []; gains = double(originalScore'>0); %% Creating spectrogram magnitude = zeros(fftSize, scoreLength); phase = zeros(fftSize, scoreLength); audioVector = zeros(fftSize,1); for curFrame = 1:scoreLength audioVector(1:blockSize) = audio(hopSize*(curFrame-1)+1:hopSize*(curFrame-1)+blockSize,1).*wdw; spectrogramVector = fft(audioVector); magnitude(:,curFrame) = abs(spectrogramVector); phase(:,curFrame) = angle(spectrogramVector); end magnitude(HFFTS+1:end,:) = []; phase(HFFTS+1:end,:) = []; maxSizeWindows = 400; hammingWindows = cell(1,maxSizeWindows); for len = 1:maxSizeWindows hammingWindows{len} = hamming(len); end %% Refine Score and create Excitation Spectrum refinedScore = originalScore; %score to be refiened basicSpec = zeros(HFFTS, scoreLength); %Basic theoretical spectrogram excitationSpectrums = cell(nbInstrument,scoreLength); %cell for each frame largerES = cell(nbInstrument,scoreLength); %cell for each frame for ins = 1:nbInstrument %for each isntrument for curFrame = 1: scoreLength %for each frame excitationSpectrums{ins,curFrame} = sparse(zeros(HFFTS,nbPartials)); largerES{ins,curFrame} = sparse(zeros(HFFTS,nbPartials)); if(originalScore(curFrame,ins)) %if there is a note %Refine score and creates Excitation Spectrum [refinedScore(curFrame,ins), width] = refineScore(magnitude(:,curFrame),originalScore(curFrame,ins)); excitationSpectrums{ins,curFrame} = sparse(createExcitationSpectrums(refinedScore(curFrame,ins),HFFTS,nbPartials,partialsWeights(:,ins),width,hammingWindows)); largerES{ins,curFrame} = sparse(createExcitationSpectrums(refinedScore(curFrame,ins),HFFTS,nbPartials,partialsWeights(:,ins),2*width,hammingWindows)); end basicSpec(:,curFrame) = basicSpec(:,curFrame) + (excitationSpectrums{ins,curFrame}*partialsWeights(:,ins))*gains(ins,curFrame); %Spectrogram without update end end %% Plot spectrogram and add original and refined score if (plotRefinedScore) figure(1),imagesc(log(10*magnitude(1:fftSize/16,:)+1)), axis xy plotableScore = originalScore+1; plotableScore(plotableScore==1) = 0; hold on ,plot(plotableScore,'.k'), for harm = 1 : nbPartials for ins = nbInstrument:-1:1 plot(refinedScore(:,ins)*harm+1,['.' colors(ins)]), colorbar end end end %% Online Processing %Values that can be changed by the model. maskForChangeableValues = basicSpec > 10^-4; maskedSpectrogram = magnitude.*maskForChangeableValues; bufferSize = 100; nextProcess = 1; for curFrame = 1:scoreLength if sum(refinedScore(curFrame,:),2) UpdateGainCoefficientsOL2 if (curFrame==nextProcess) first = max(curFrame-bufferSize+1,1); currentMag = magnitude(:,first:curFrame); tmpMaskForChangeableValues = basicSpec(:,first:curFrame) > 10^-4; tmpMaskedSpectrogram = currentMag.*tmpMaskForChangeableValues; len = curFrame-first+1; for nbUpdates = 1:maxUpdates %update loop UpdatePartialCoefficientsOL UpdateFilterCoefficientsOL if (plotFilter) %%%Filter figure(4),plot(outlook(1:blockSize/8,:)), title('Filter') end if (plotPartial) %%%Decay figure(5),stem(partialsWeights), title('Partial Weights') end end nextProcess = min(ceil(curFrame*1.2),scoreLength) %bufferSize = round(max(bufferSize,curFrame*0.5)); if (plotFilter) %%%Filter figure(4),plot(outlook(1:blockSize/8,:)), title('Filter') end if (plotPartial) %%%Decay figure(5),stem(partialsWeights), title('Partial Weights') end end end end timeV = toc; fprintf('Time: %6.3g s, %6.2g times real time \t \n ', timeV, timeV*fs/audioLength); % Remodel Spectrogram ECA = cell(nbInstrument,1); xHat = zeros(HFFTS,scoreLength); for ins = 1:nbInstrument ECA{ins} = sparse(zeros(HFFTS,scoreLength)); for frame = 1:scoreLength ECA{ins}(:,frame) = filterBasis * filterCoefficients(:,ins).*(largerES{ins,frame}*partialsWeights(:,ins)); end xHat = xHat + ECA{ins}.*repmat(gains(ins,:),HFFTS,1); end %% Final plot %%%Spectrogram (comparison with basic and final) % % bMax = max(max(log(10*basicSpec+1))); % % xMax = max(max(log(10*xHat+1))); % % mMax = max(max(log(10*magnitude+1))); % % tMax = max([bMax mMax xMax]); % % figure(3), subplot(3,1,1), imagesc(log(10*basicSpec(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Basic Model') % % subplot(3,1,2), imagesc(log(10*xHat(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Optimised Spectrogram') % % subplot(3,1,3), imagesc(log(10*magnitude(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Actual Spectrogram') % % % % % % %%%Filter % % figure(4),plot(outlook(1:blockSize/8,:)), title('Filter') % % % % %%%Decay % % figure(5),stem(partialsWeights), title('Partial Weights') %% Writing %extraInfo = [sprintf('_%d_%d_%d_%d_%d_%d',blockSize,overlap,maxUpdates,nbPartials,nbBandPassFilters,bandPassFilterMaxFreq) specExp]; finalScale = zeros(audioLength,1); freq = zeros(fftSize,1); finalAudio = cell(nbInstrument+1,1); finalAudioMasked = cell(nbInstrument,1); wdw2 = hamming(fftSize); nbMaskingTechniques = 4; maskingCell = cell(1,nbMaskingTechniques); maskingCell{1} = 'raw'; maskingCell{2} = 'per'; maskingCell{3} = 'sha'; maskingCell{4} = '10p'; finalMask = cell(nbInstrument,nbMaskingTechniques); %4 is the number of different masking techniques for ins = 1:nbInstrument finalMask{ins,1} = sparse(zeros(HFFTS,scoreLength)); finalMask{ins,4} = sparse(zeros(HFFTS,scoreLength)); for curFrame = 1:scoreLength finalMask{ins,1}(:,curFrame) = filterBasis * filterCoefficients(:,ins).*(excitationSpectrums{ins,curFrame}*partialsWeights(:,ins)); finalMask{ins,4}(:,curFrame) = filterBasis * filterCoefficients(:,ins).*(largerES{ins,curFrame}*partialsWeights(:,ins)); end finalMask{ins,1} = finalMask{ins,1}.*repmat(gains(ins,:),HFFTS,1); finalMask{ins,4} = finalMask{ins,4}.*repmat(gains(ins,:),HFFTS,1); % masking strategy 1 finalMask{ins,2} = magnitude .* (finalMask{ins,4} ./ (xHat+eps)); % masking strategy 2 finalMask{ins,3} = magnitude .* ( (finalMask{ins,4}+eps) ./ (xHat+eps)); % masking strategy 3 finalMask{ins,4} = magnitude .* ( (finalMask{ins,4}+0.1 * eps) ./ (xHat+eps)); finalAudio{ins} = zeros(audioLength,1); finalAudioMasked{ins} = zeros(audioLength,1); end % sonifying the model finalAudio{nbInstrument+1} = zeros(audioLength,1); %Overlap and add for curFrame = 1:scoreLength freq(1:HFFTS) = xHat(:,curFrame).*exp(1i*phase(:,curFrame)); freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2); faudio = real(ifft(freq)).*wdw2; finalAudio{nbInstrument+1}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudio{nbInstrument+1}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio; for ins = 1:nbInstrument freq(1:HFFTS) = finalMask{ins,1}(:,curFrame).*exp(1i*phase(:,curFrame)); freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2); faudio = real(ifft(freq)).*wdw2; finalAudio{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudio{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio; end finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + wdw2; end for ins = 1:nbInstrument finalAudio{ins} = finalAudio{ins}./finalScale; % wavwrite(finalAudio{ins},fs,16,[midiCell{ins}(1:4) '_' num2str(ins) '_' maskingCell{1} extraInfo '.wav']); end finalAudio{nbInstrument+1} = finalAudio{nbInstrument+1}./finalScale; %wavwrite(finalAudio{nbInstrument+1},fs,16,[mixName(1:4) '_0_' maskingCell{1} extraInfo '.wav']); % sonifying the masked version %Overlap and add for tech = 2:nbMaskingTechniques finalScale = zeros(audioLength,1); for curFrame = 1:scoreLength for ins = 1:nbInstrument freq(1:HFFTS) = finalMask{ins,tech}(:,curFrame).*exp(i*phase(:,curFrame)); freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2); faudio = real(ifft(freq)).*wdw2; finalAudioMasked{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudioMasked{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio; end finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + wdw2; end for ins = 1:nbInstrument finalAudioMasked{ins} = finalAudioMasked{ins}./finalScale; % wavwrite(finalAudioMasked{ins},fs,16,[midiCell{ins}(1:4) '_' num2str(ins) '_' maskingCell{tech} extraInfo '.wav']); end end