annotate Sirtassa/Main.m @ 2:13ec2fa02a26 tip

(none)
author Yannick JACOB <y.jacob@se12.qmul.ac.uk>
date Tue, 03 Sep 2013 15:33:42 +0100
parents
children
rev   line source
y@2 1 %% Default Values
y@2 2 %Audio and score information
y@2 3 if(~exist('midiCell','var'))
y@2 4 midiCell = cell(2,1);
y@2 5 midiCell{1} = 'Lussier_bassoon.mid';
y@2 6 midiCell{2} = 'Lussier_trumpet.mid';
y@2 7 mixName = 'Lussier_mix.wav';
y@2 8 %mixName = '311CLNOF.WAV';
y@2 9 %midiCell{1} = '311CLNOF.mid';
y@2 10 end
y@2 11 if(~exist('blockSize','var')) blockSize = 2048; end
y@2 12 if(~exist('overlap','var')) overlap = 2; end
y@2 13 %Maximum frequency of the filter
y@2 14 if(~exist('bandPassFilterMaxFreq','var')) bandPassFilterMaxFreq = 10000; end
y@2 15 %Number of band pass filters in the filter
y@2 16 if(~exist('nbBandPassFilters','var')) nbBandPassFilters = 20;end
y@2 17 %Maximum updates per frame
y@2 18 if(~exist('maxUpdates','var')) maxUpdates = 2;end
y@2 19 %Number of partials in the model
y@2 20 if(~exist('nbPartials','var')) nbPartials = 8;end
y@2 21 if(~exist('plotRefinedScore','var')) plotRefinedScore = 0;end
y@2 22 if(~exist('plotPartial','var')) plotPartial = 0;end
y@2 23 if(~exist('plotFilter','var')) plotFilter = 0;end
y@2 24 %Nummber of instrument in the mix.
y@2 25 nbInstrument = length(midiCell);
y@2 26 fftSize = blockSize;
y@2 27
y@2 28 %Weights of the harmonic series
y@2 29 partialsWeights = ones(nbPartials,nbInstrument);
y@2 30
y@2 31 colors = 'rwm';
y@2 32 %% Initialisation
y@2 33 hopSize =blockSize/overlap; %Hopsize
y@2 34 HFFTS = fftSize/2+1; %Size of the usefull part of the fft (half+1)
y@2 35 [audio, fs] = wavread(mixName);
y@2 36
y@2 37 % zeropadding the audio length
y@2 38 audio(end:end+hopSize-mod(length(audio),hopSize)) = 0;
y@2 39 audioLength = length(audio);
y@2 40
y@2 41 scoreLength = audioLength/hopSize - overlap+1; %Number of frames
y@2 42 wdw = hamming(blockSize);
y@2 43
y@2 44 tic
y@2 45 %% Create filters
y@2 46 filterBasis = createBPFilters(bandPassFilterMaxFreq,nbBandPassFilters,fftSize,fftSize/fs);
y@2 47 filterCoefficients = ones(nbBandPassFilters,nbInstrument);
y@2 48 %% Creating the score
y@2 49 originalScore = createScore(scoreLength,midiCell,fs/hopSize,fftSize/fs);
y@2 50 originalScore(scoreLength+1:end,:) = [];
y@2 51 gains = double(originalScore'>0);
y@2 52 %% Creating spectrogram
y@2 53 magnitude = zeros(fftSize, scoreLength);
y@2 54 phase = zeros(fftSize, scoreLength);
y@2 55 audioVector = zeros(fftSize,1);
y@2 56 for curFrame = 1:scoreLength
y@2 57 audioVector(1:blockSize) = audio(hopSize*(curFrame-1)+1:hopSize*(curFrame-1)+blockSize,1).*wdw;
y@2 58 spectrogramVector = fft(audioVector);
y@2 59 magnitude(:,curFrame) = abs(spectrogramVector);
y@2 60 phase(:,curFrame) = angle(spectrogramVector);
y@2 61 end
y@2 62 magnitude(HFFTS+1:end,:) = [];
y@2 63 phase(HFFTS+1:end,:) = [];
y@2 64 maxSizeWindows = 400;
y@2 65 hammingWindows = cell(1,maxSizeWindows);
y@2 66 for len = 1:maxSizeWindows
y@2 67 hammingWindows{len} = hamming(len);
y@2 68 end
y@2 69
y@2 70 %% Refine Score and create Excitation Spectrum
y@2 71 refinedScore = originalScore; %score to be refiened
y@2 72 basicSpec = zeros(HFFTS, scoreLength); %Basic theoretical spectrogram
y@2 73 excitationSpectrums = cell(nbInstrument,scoreLength); %cell for each frame
y@2 74 largerES = cell(nbInstrument,scoreLength); %cell for each frame
y@2 75
y@2 76 for ins = 1:nbInstrument %for each isntrument
y@2 77 for curFrame = 1: scoreLength %for each frame
y@2 78 excitationSpectrums{ins,curFrame} = sparse(zeros(HFFTS,nbPartials));
y@2 79 largerES{ins,curFrame} = sparse(zeros(HFFTS,nbPartials));
y@2 80 if(originalScore(curFrame,ins)) %if there is a note
y@2 81 %Refine score and creates Excitation Spectrum
y@2 82 [refinedScore(curFrame,ins), width] = refineScore(magnitude(:,curFrame),originalScore(curFrame,ins));
y@2 83 excitationSpectrums{ins,curFrame} = sparse(createExcitationSpectrums(refinedScore(curFrame,ins),HFFTS,nbPartials,partialsWeights(:,ins),width,hammingWindows));
y@2 84 largerES{ins,curFrame} = sparse(createExcitationSpectrums(refinedScore(curFrame,ins),HFFTS,nbPartials,partialsWeights(:,ins),2*width,hammingWindows));
y@2 85 end
y@2 86 basicSpec(:,curFrame) = basicSpec(:,curFrame) + (excitationSpectrums{ins,curFrame}*partialsWeights(:,ins))*gains(ins,curFrame); %Spectrogram without update
y@2 87 end
y@2 88 end
y@2 89 %% Plot spectrogram and add original and refined score
y@2 90 if (plotRefinedScore)
y@2 91 figure(1),imagesc(log(10*magnitude(1:fftSize/16,:)+1)), axis xy
y@2 92 plotableScore = originalScore+1;
y@2 93 plotableScore(plotableScore==1) = 0;
y@2 94 hold on ,plot(plotableScore,'.k'),
y@2 95
y@2 96 for harm = 1 : nbPartials
y@2 97 for ins = nbInstrument:-1:1
y@2 98 plot(refinedScore(:,ins)*harm+1,['.' colors(ins)]), colorbar
y@2 99 end
y@2 100 end
y@2 101 end
y@2 102 %% Online Processing
y@2 103 %Values that can be changed by the model.
y@2 104 maskForChangeableValues = basicSpec > 10^-4;
y@2 105 maskedSpectrogram = magnitude.*maskForChangeableValues;
y@2 106 bufferSize = 100;
y@2 107 nextProcess = 1;
y@2 108 for curFrame = 1:scoreLength
y@2 109 if sum(refinedScore(curFrame,:),2)
y@2 110 UpdateGainCoefficientsOL2
y@2 111 if (curFrame==nextProcess)
y@2 112 first = max(curFrame-bufferSize+1,1);
y@2 113 currentMag = magnitude(:,first:curFrame);
y@2 114 tmpMaskForChangeableValues = basicSpec(:,first:curFrame) > 10^-4;
y@2 115 tmpMaskedSpectrogram = currentMag.*tmpMaskForChangeableValues;
y@2 116 len = curFrame-first+1;
y@2 117 for nbUpdates = 1:maxUpdates %update loop
y@2 118 UpdatePartialCoefficientsOL
y@2 119 UpdateFilterCoefficientsOL
y@2 120 if (plotFilter)
y@2 121 %%%Filter
y@2 122 figure(4),plot(outlook(1:blockSize/8,:)), title('Filter')
y@2 123 end
y@2 124 if (plotPartial)
y@2 125 %%%Decay
y@2 126 figure(5),stem(partialsWeights), title('Partial Weights')
y@2 127 end
y@2 128 end
y@2 129 nextProcess = min(ceil(curFrame*1.2),scoreLength)
y@2 130 %bufferSize = round(max(bufferSize,curFrame*0.5));
y@2 131 if (plotFilter)
y@2 132 %%%Filter
y@2 133 figure(4),plot(outlook(1:blockSize/8,:)), title('Filter')
y@2 134 end
y@2 135 if (plotPartial)
y@2 136 %%%Decay
y@2 137 figure(5),stem(partialsWeights), title('Partial Weights')
y@2 138
y@2 139 end
y@2 140 end
y@2 141 end
y@2 142 end
y@2 143 timeV = toc;
y@2 144 fprintf('Time: %6.3g s, %6.2g times real time \t \n ', timeV, timeV*fs/audioLength);
y@2 145
y@2 146
y@2 147 % Remodel Spectrogram
y@2 148 ECA = cell(nbInstrument,1);
y@2 149 xHat = zeros(HFFTS,scoreLength);
y@2 150 for ins = 1:nbInstrument
y@2 151 ECA{ins} = sparse(zeros(HFFTS,scoreLength));
y@2 152 for frame = 1:scoreLength
y@2 153 ECA{ins}(:,frame) = filterBasis * filterCoefficients(:,ins).*(largerES{ins,frame}*partialsWeights(:,ins));
y@2 154 end
y@2 155 xHat = xHat + ECA{ins}.*repmat(gains(ins,:),HFFTS,1);
y@2 156 end
y@2 157 %% Final plot
y@2 158 %%%Spectrogram (comparison with basic and final)
y@2 159 % % bMax = max(max(log(10*basicSpec+1)));
y@2 160 % % xMax = max(max(log(10*xHat+1)));
y@2 161 % % mMax = max(max(log(10*magnitude+1)));
y@2 162 % % tMax = max([bMax mMax xMax]);
y@2 163 % % figure(3), subplot(3,1,1), imagesc(log(10*basicSpec(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Basic Model')
y@2 164 % % subplot(3,1,2), imagesc(log(10*xHat(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Optimised Spectrogram')
y@2 165 % % subplot(3,1,3), imagesc(log(10*magnitude(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Actual Spectrogram')
y@2 166 % %
y@2 167 % %
y@2 168 % % %%%Filter
y@2 169 % % figure(4),plot(outlook(1:blockSize/8,:)), title('Filter')
y@2 170 % %
y@2 171 % % %%%Decay
y@2 172 % % figure(5),stem(partialsWeights), title('Partial Weights')
y@2 173
y@2 174 %% Writing
y@2 175 %extraInfo = [sprintf('_%d_%d_%d_%d_%d_%d',blockSize,overlap,maxUpdates,nbPartials,nbBandPassFilters,bandPassFilterMaxFreq) specExp];
y@2 176 finalScale = zeros(audioLength,1);
y@2 177 freq = zeros(fftSize,1);
y@2 178 finalAudio = cell(nbInstrument+1,1);
y@2 179 finalAudioMasked = cell(nbInstrument,1);
y@2 180
y@2 181 wdw2 = hamming(fftSize);
y@2 182 nbMaskingTechniques = 4;
y@2 183 maskingCell = cell(1,nbMaskingTechniques);
y@2 184 maskingCell{1} = 'raw';
y@2 185 maskingCell{2} = 'per';
y@2 186 maskingCell{3} = 'sha';
y@2 187 maskingCell{4} = '10p';
y@2 188 finalMask = cell(nbInstrument,nbMaskingTechniques); %4 is the number of different masking techniques
y@2 189 for ins = 1:nbInstrument
y@2 190 finalMask{ins,1} = sparse(zeros(HFFTS,scoreLength));
y@2 191 finalMask{ins,4} = sparse(zeros(HFFTS,scoreLength));
y@2 192 for curFrame = 1:scoreLength
y@2 193 finalMask{ins,1}(:,curFrame) = filterBasis * filterCoefficients(:,ins).*(excitationSpectrums{ins,curFrame}*partialsWeights(:,ins));
y@2 194 finalMask{ins,4}(:,curFrame) = filterBasis * filterCoefficients(:,ins).*(largerES{ins,curFrame}*partialsWeights(:,ins));
y@2 195 end
y@2 196
y@2 197 finalMask{ins,1} = finalMask{ins,1}.*repmat(gains(ins,:),HFFTS,1);
y@2 198 finalMask{ins,4} = finalMask{ins,4}.*repmat(gains(ins,:),HFFTS,1);
y@2 199
y@2 200 % masking strategy 1
y@2 201 finalMask{ins,2} = magnitude .* (finalMask{ins,4} ./ (xHat+eps));
y@2 202
y@2 203 % masking strategy 2
y@2 204 finalMask{ins,3} = magnitude .* ( (finalMask{ins,4}+eps) ./ (xHat+eps));
y@2 205
y@2 206 % masking strategy 3
y@2 207 finalMask{ins,4} = magnitude .* ( (finalMask{ins,4}+0.1 * eps) ./ (xHat+eps));
y@2 208
y@2 209 finalAudio{ins} = zeros(audioLength,1);
y@2 210 finalAudioMasked{ins} = zeros(audioLength,1);
y@2 211 end
y@2 212
y@2 213 % sonifying the model
y@2 214 finalAudio{nbInstrument+1} = zeros(audioLength,1);
y@2 215 %Overlap and add
y@2 216 for curFrame = 1:scoreLength
y@2 217 freq(1:HFFTS) = xHat(:,curFrame).*exp(1i*phase(:,curFrame));
y@2 218 freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2);
y@2 219 faudio = real(ifft(freq)).*wdw2;
y@2 220 finalAudio{nbInstrument+1}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudio{nbInstrument+1}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio;
y@2 221 for ins = 1:nbInstrument
y@2 222 freq(1:HFFTS) = finalMask{ins,1}(:,curFrame).*exp(1i*phase(:,curFrame));
y@2 223 freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2);
y@2 224 faudio = real(ifft(freq)).*wdw2;
y@2 225 finalAudio{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudio{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio;
y@2 226 end
y@2 227 finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + wdw2;
y@2 228 end
y@2 229
y@2 230 for ins = 1:nbInstrument
y@2 231 finalAudio{ins} = finalAudio{ins}./finalScale;
y@2 232 % wavwrite(finalAudio{ins},fs,16,[midiCell{ins}(1:4) '_' num2str(ins) '_' maskingCell{1} extraInfo '.wav']);
y@2 233 end
y@2 234 finalAudio{nbInstrument+1} = finalAudio{nbInstrument+1}./finalScale;
y@2 235 %wavwrite(finalAudio{nbInstrument+1},fs,16,[mixName(1:4) '_0_' maskingCell{1} extraInfo '.wav']);
y@2 236
y@2 237 % sonifying the masked version
y@2 238 %Overlap and add
y@2 239 for tech = 2:nbMaskingTechniques
y@2 240 finalScale = zeros(audioLength,1);
y@2 241 for curFrame = 1:scoreLength
y@2 242 for ins = 1:nbInstrument
y@2 243 freq(1:HFFTS) = finalMask{ins,tech}(:,curFrame).*exp(i*phase(:,curFrame));
y@2 244 freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2);
y@2 245 faudio = real(ifft(freq)).*wdw2;
y@2 246 finalAudioMasked{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudioMasked{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio;
y@2 247 end
y@2 248 finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + wdw2;
y@2 249 end
y@2 250 for ins = 1:nbInstrument
y@2 251 finalAudioMasked{ins} = finalAudioMasked{ins}./finalScale;
y@2 252 % wavwrite(finalAudioMasked{ins},fs,16,[midiCell{ins}(1:4) '_' num2str(ins) '_' maskingCell{tech} extraInfo '.wav']);
y@2 253 end
y@2 254 end
y@2 255
y@2 256
y@2 257