Mercurial > hg > yjdafx13bpr
comparison Sirtassa/Main.m @ 2:13ec2fa02a26 tip
(none)
author | Yannick JACOB <y.jacob@se12.qmul.ac.uk> |
---|---|
date | Tue, 03 Sep 2013 15:33:42 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:d8c7b69cb4c9 | 2:13ec2fa02a26 |
---|---|
1 %% Default Values | |
2 %Audio and score information | |
3 if(~exist('midiCell','var')) | |
4 midiCell = cell(2,1); | |
5 midiCell{1} = 'Lussier_bassoon.mid'; | |
6 midiCell{2} = 'Lussier_trumpet.mid'; | |
7 mixName = 'Lussier_mix.wav'; | |
8 %mixName = '311CLNOF.WAV'; | |
9 %midiCell{1} = '311CLNOF.mid'; | |
10 end | |
11 if(~exist('blockSize','var')) blockSize = 2048; end | |
12 if(~exist('overlap','var')) overlap = 2; end | |
13 %Maximum frequency of the filter | |
14 if(~exist('bandPassFilterMaxFreq','var')) bandPassFilterMaxFreq = 10000; end | |
15 %Number of band pass filters in the filter | |
16 if(~exist('nbBandPassFilters','var')) nbBandPassFilters = 20;end | |
17 %Maximum updates per frame | |
18 if(~exist('maxUpdates','var')) maxUpdates = 2;end | |
19 %Number of partials in the model | |
20 if(~exist('nbPartials','var')) nbPartials = 8;end | |
21 if(~exist('plotRefinedScore','var')) plotRefinedScore = 0;end | |
22 if(~exist('plotPartial','var')) plotPartial = 0;end | |
23 if(~exist('plotFilter','var')) plotFilter = 0;end | |
24 %Nummber of instrument in the mix. | |
25 nbInstrument = length(midiCell); | |
26 fftSize = blockSize; | |
27 | |
28 %Weights of the harmonic series | |
29 partialsWeights = ones(nbPartials,nbInstrument); | |
30 | |
31 colors = 'rwm'; | |
32 %% Initialisation | |
33 hopSize =blockSize/overlap; %Hopsize | |
34 HFFTS = fftSize/2+1; %Size of the usefull part of the fft (half+1) | |
35 [audio, fs] = wavread(mixName); | |
36 | |
37 % zeropadding the audio length | |
38 audio(end:end+hopSize-mod(length(audio),hopSize)) = 0; | |
39 audioLength = length(audio); | |
40 | |
41 scoreLength = audioLength/hopSize - overlap+1; %Number of frames | |
42 wdw = hamming(blockSize); | |
43 | |
44 tic | |
45 %% Create filters | |
46 filterBasis = createBPFilters(bandPassFilterMaxFreq,nbBandPassFilters,fftSize,fftSize/fs); | |
47 filterCoefficients = ones(nbBandPassFilters,nbInstrument); | |
48 %% Creating the score | |
49 originalScore = createScore(scoreLength,midiCell,fs/hopSize,fftSize/fs); | |
50 originalScore(scoreLength+1:end,:) = []; | |
51 gains = double(originalScore'>0); | |
52 %% Creating spectrogram | |
53 magnitude = zeros(fftSize, scoreLength); | |
54 phase = zeros(fftSize, scoreLength); | |
55 audioVector = zeros(fftSize,1); | |
56 for curFrame = 1:scoreLength | |
57 audioVector(1:blockSize) = audio(hopSize*(curFrame-1)+1:hopSize*(curFrame-1)+blockSize,1).*wdw; | |
58 spectrogramVector = fft(audioVector); | |
59 magnitude(:,curFrame) = abs(spectrogramVector); | |
60 phase(:,curFrame) = angle(spectrogramVector); | |
61 end | |
62 magnitude(HFFTS+1:end,:) = []; | |
63 phase(HFFTS+1:end,:) = []; | |
64 maxSizeWindows = 400; | |
65 hammingWindows = cell(1,maxSizeWindows); | |
66 for len = 1:maxSizeWindows | |
67 hammingWindows{len} = hamming(len); | |
68 end | |
69 | |
70 %% Refine Score and create Excitation Spectrum | |
71 refinedScore = originalScore; %score to be refiened | |
72 basicSpec = zeros(HFFTS, scoreLength); %Basic theoretical spectrogram | |
73 excitationSpectrums = cell(nbInstrument,scoreLength); %cell for each frame | |
74 largerES = cell(nbInstrument,scoreLength); %cell for each frame | |
75 | |
76 for ins = 1:nbInstrument %for each isntrument | |
77 for curFrame = 1: scoreLength %for each frame | |
78 excitationSpectrums{ins,curFrame} = sparse(zeros(HFFTS,nbPartials)); | |
79 largerES{ins,curFrame} = sparse(zeros(HFFTS,nbPartials)); | |
80 if(originalScore(curFrame,ins)) %if there is a note | |
81 %Refine score and creates Excitation Spectrum | |
82 [refinedScore(curFrame,ins), width] = refineScore(magnitude(:,curFrame),originalScore(curFrame,ins)); | |
83 excitationSpectrums{ins,curFrame} = sparse(createExcitationSpectrums(refinedScore(curFrame,ins),HFFTS,nbPartials,partialsWeights(:,ins),width,hammingWindows)); | |
84 largerES{ins,curFrame} = sparse(createExcitationSpectrums(refinedScore(curFrame,ins),HFFTS,nbPartials,partialsWeights(:,ins),2*width,hammingWindows)); | |
85 end | |
86 basicSpec(:,curFrame) = basicSpec(:,curFrame) + (excitationSpectrums{ins,curFrame}*partialsWeights(:,ins))*gains(ins,curFrame); %Spectrogram without update | |
87 end | |
88 end | |
89 %% Plot spectrogram and add original and refined score | |
90 if (plotRefinedScore) | |
91 figure(1),imagesc(log(10*magnitude(1:fftSize/16,:)+1)), axis xy | |
92 plotableScore = originalScore+1; | |
93 plotableScore(plotableScore==1) = 0; | |
94 hold on ,plot(plotableScore,'.k'), | |
95 | |
96 for harm = 1 : nbPartials | |
97 for ins = nbInstrument:-1:1 | |
98 plot(refinedScore(:,ins)*harm+1,['.' colors(ins)]), colorbar | |
99 end | |
100 end | |
101 end | |
102 %% Online Processing | |
103 %Values that can be changed by the model. | |
104 maskForChangeableValues = basicSpec > 10^-4; | |
105 maskedSpectrogram = magnitude.*maskForChangeableValues; | |
106 bufferSize = 100; | |
107 nextProcess = 1; | |
108 for curFrame = 1:scoreLength | |
109 if sum(refinedScore(curFrame,:),2) | |
110 UpdateGainCoefficientsOL2 | |
111 if (curFrame==nextProcess) | |
112 first = max(curFrame-bufferSize+1,1); | |
113 currentMag = magnitude(:,first:curFrame); | |
114 tmpMaskForChangeableValues = basicSpec(:,first:curFrame) > 10^-4; | |
115 tmpMaskedSpectrogram = currentMag.*tmpMaskForChangeableValues; | |
116 len = curFrame-first+1; | |
117 for nbUpdates = 1:maxUpdates %update loop | |
118 UpdatePartialCoefficientsOL | |
119 UpdateFilterCoefficientsOL | |
120 if (plotFilter) | |
121 %%%Filter | |
122 figure(4),plot(outlook(1:blockSize/8,:)), title('Filter') | |
123 end | |
124 if (plotPartial) | |
125 %%%Decay | |
126 figure(5),stem(partialsWeights), title('Partial Weights') | |
127 end | |
128 end | |
129 nextProcess = min(ceil(curFrame*1.2),scoreLength) | |
130 %bufferSize = round(max(bufferSize,curFrame*0.5)); | |
131 if (plotFilter) | |
132 %%%Filter | |
133 figure(4),plot(outlook(1:blockSize/8,:)), title('Filter') | |
134 end | |
135 if (plotPartial) | |
136 %%%Decay | |
137 figure(5),stem(partialsWeights), title('Partial Weights') | |
138 | |
139 end | |
140 end | |
141 end | |
142 end | |
143 timeV = toc; | |
144 fprintf('Time: %6.3g s, %6.2g times real time \t \n ', timeV, timeV*fs/audioLength); | |
145 | |
146 | |
147 % Remodel Spectrogram | |
148 ECA = cell(nbInstrument,1); | |
149 xHat = zeros(HFFTS,scoreLength); | |
150 for ins = 1:nbInstrument | |
151 ECA{ins} = sparse(zeros(HFFTS,scoreLength)); | |
152 for frame = 1:scoreLength | |
153 ECA{ins}(:,frame) = filterBasis * filterCoefficients(:,ins).*(largerES{ins,frame}*partialsWeights(:,ins)); | |
154 end | |
155 xHat = xHat + ECA{ins}.*repmat(gains(ins,:),HFFTS,1); | |
156 end | |
157 %% Final plot | |
158 %%%Spectrogram (comparison with basic and final) | |
159 % % bMax = max(max(log(10*basicSpec+1))); | |
160 % % xMax = max(max(log(10*xHat+1))); | |
161 % % mMax = max(max(log(10*magnitude+1))); | |
162 % % tMax = max([bMax mMax xMax]); | |
163 % % figure(3), subplot(3,1,1), imagesc(log(10*basicSpec(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Basic Model') | |
164 % % subplot(3,1,2), imagesc(log(10*xHat(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Optimised Spectrogram') | |
165 % % subplot(3,1,3), imagesc(log(10*magnitude(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Actual Spectrogram') | |
166 % % | |
167 % % | |
168 % % %%%Filter | |
169 % % figure(4),plot(outlook(1:blockSize/8,:)), title('Filter') | |
170 % % | |
171 % % %%%Decay | |
172 % % figure(5),stem(partialsWeights), title('Partial Weights') | |
173 | |
174 %% Writing | |
175 %extraInfo = [sprintf('_%d_%d_%d_%d_%d_%d',blockSize,overlap,maxUpdates,nbPartials,nbBandPassFilters,bandPassFilterMaxFreq) specExp]; | |
176 finalScale = zeros(audioLength,1); | |
177 freq = zeros(fftSize,1); | |
178 finalAudio = cell(nbInstrument+1,1); | |
179 finalAudioMasked = cell(nbInstrument,1); | |
180 | |
181 wdw2 = hamming(fftSize); | |
182 nbMaskingTechniques = 4; | |
183 maskingCell = cell(1,nbMaskingTechniques); | |
184 maskingCell{1} = 'raw'; | |
185 maskingCell{2} = 'per'; | |
186 maskingCell{3} = 'sha'; | |
187 maskingCell{4} = '10p'; | |
188 finalMask = cell(nbInstrument,nbMaskingTechniques); %4 is the number of different masking techniques | |
189 for ins = 1:nbInstrument | |
190 finalMask{ins,1} = sparse(zeros(HFFTS,scoreLength)); | |
191 finalMask{ins,4} = sparse(zeros(HFFTS,scoreLength)); | |
192 for curFrame = 1:scoreLength | |
193 finalMask{ins,1}(:,curFrame) = filterBasis * filterCoefficients(:,ins).*(excitationSpectrums{ins,curFrame}*partialsWeights(:,ins)); | |
194 finalMask{ins,4}(:,curFrame) = filterBasis * filterCoefficients(:,ins).*(largerES{ins,curFrame}*partialsWeights(:,ins)); | |
195 end | |
196 | |
197 finalMask{ins,1} = finalMask{ins,1}.*repmat(gains(ins,:),HFFTS,1); | |
198 finalMask{ins,4} = finalMask{ins,4}.*repmat(gains(ins,:),HFFTS,1); | |
199 | |
200 % masking strategy 1 | |
201 finalMask{ins,2} = magnitude .* (finalMask{ins,4} ./ (xHat+eps)); | |
202 | |
203 % masking strategy 2 | |
204 finalMask{ins,3} = magnitude .* ( (finalMask{ins,4}+eps) ./ (xHat+eps)); | |
205 | |
206 % masking strategy 3 | |
207 finalMask{ins,4} = magnitude .* ( (finalMask{ins,4}+0.1 * eps) ./ (xHat+eps)); | |
208 | |
209 finalAudio{ins} = zeros(audioLength,1); | |
210 finalAudioMasked{ins} = zeros(audioLength,1); | |
211 end | |
212 | |
213 % sonifying the model | |
214 finalAudio{nbInstrument+1} = zeros(audioLength,1); | |
215 %Overlap and add | |
216 for curFrame = 1:scoreLength | |
217 freq(1:HFFTS) = xHat(:,curFrame).*exp(1i*phase(:,curFrame)); | |
218 freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2); | |
219 faudio = real(ifft(freq)).*wdw2; | |
220 finalAudio{nbInstrument+1}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudio{nbInstrument+1}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio; | |
221 for ins = 1:nbInstrument | |
222 freq(1:HFFTS) = finalMask{ins,1}(:,curFrame).*exp(1i*phase(:,curFrame)); | |
223 freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2); | |
224 faudio = real(ifft(freq)).*wdw2; | |
225 finalAudio{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudio{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio; | |
226 end | |
227 finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + wdw2; | |
228 end | |
229 | |
230 for ins = 1:nbInstrument | |
231 finalAudio{ins} = finalAudio{ins}./finalScale; | |
232 % wavwrite(finalAudio{ins},fs,16,[midiCell{ins}(1:4) '_' num2str(ins) '_' maskingCell{1} extraInfo '.wav']); | |
233 end | |
234 finalAudio{nbInstrument+1} = finalAudio{nbInstrument+1}./finalScale; | |
235 %wavwrite(finalAudio{nbInstrument+1},fs,16,[mixName(1:4) '_0_' maskingCell{1} extraInfo '.wav']); | |
236 | |
237 % sonifying the masked version | |
238 %Overlap and add | |
239 for tech = 2:nbMaskingTechniques | |
240 finalScale = zeros(audioLength,1); | |
241 for curFrame = 1:scoreLength | |
242 for ins = 1:nbInstrument | |
243 freq(1:HFFTS) = finalMask{ins,tech}(:,curFrame).*exp(i*phase(:,curFrame)); | |
244 freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2); | |
245 faudio = real(ifft(freq)).*wdw2; | |
246 finalAudioMasked{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudioMasked{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio; | |
247 end | |
248 finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + wdw2; | |
249 end | |
250 for ins = 1:nbInstrument | |
251 finalAudioMasked{ins} = finalAudioMasked{ins}./finalScale; | |
252 % wavwrite(finalAudioMasked{ins},fs,16,[midiCell{ins}(1:4) '_' num2str(ins) '_' maskingCell{tech} extraInfo '.wav']); | |
253 end | |
254 end | |
255 | |
256 | |
257 |