comparison Sirtassa/Main.m @ 2:13ec2fa02a26 tip

(none)
author Yannick JACOB <y.jacob@se12.qmul.ac.uk>
date Tue, 03 Sep 2013 15:33:42 +0100
parents
children
comparison
equal deleted inserted replaced
1:d8c7b69cb4c9 2:13ec2fa02a26
1 %% Default Values
2 %Audio and score information
3 if(~exist('midiCell','var'))
4 midiCell = cell(2,1);
5 midiCell{1} = 'Lussier_bassoon.mid';
6 midiCell{2} = 'Lussier_trumpet.mid';
7 mixName = 'Lussier_mix.wav';
8 %mixName = '311CLNOF.WAV';
9 %midiCell{1} = '311CLNOF.mid';
10 end
11 if(~exist('blockSize','var')) blockSize = 2048; end
12 if(~exist('overlap','var')) overlap = 2; end
13 %Maximum frequency of the filter
14 if(~exist('bandPassFilterMaxFreq','var')) bandPassFilterMaxFreq = 10000; end
15 %Number of band pass filters in the filter
16 if(~exist('nbBandPassFilters','var')) nbBandPassFilters = 20;end
17 %Maximum updates per frame
18 if(~exist('maxUpdates','var')) maxUpdates = 2;end
19 %Number of partials in the model
20 if(~exist('nbPartials','var')) nbPartials = 8;end
21 if(~exist('plotRefinedScore','var')) plotRefinedScore = 0;end
22 if(~exist('plotPartial','var')) plotPartial = 0;end
23 if(~exist('plotFilter','var')) plotFilter = 0;end
24 %Nummber of instrument in the mix.
25 nbInstrument = length(midiCell);
26 fftSize = blockSize;
27
28 %Weights of the harmonic series
29 partialsWeights = ones(nbPartials,nbInstrument);
30
31 colors = 'rwm';
32 %% Initialisation
33 hopSize =blockSize/overlap; %Hopsize
34 HFFTS = fftSize/2+1; %Size of the usefull part of the fft (half+1)
35 [audio, fs] = wavread(mixName);
36
37 % zeropadding the audio length
38 audio(end:end+hopSize-mod(length(audio),hopSize)) = 0;
39 audioLength = length(audio);
40
41 scoreLength = audioLength/hopSize - overlap+1; %Number of frames
42 wdw = hamming(blockSize);
43
44 tic
45 %% Create filters
46 filterBasis = createBPFilters(bandPassFilterMaxFreq,nbBandPassFilters,fftSize,fftSize/fs);
47 filterCoefficients = ones(nbBandPassFilters,nbInstrument);
48 %% Creating the score
49 originalScore = createScore(scoreLength,midiCell,fs/hopSize,fftSize/fs);
50 originalScore(scoreLength+1:end,:) = [];
51 gains = double(originalScore'>0);
52 %% Creating spectrogram
53 magnitude = zeros(fftSize, scoreLength);
54 phase = zeros(fftSize, scoreLength);
55 audioVector = zeros(fftSize,1);
56 for curFrame = 1:scoreLength
57 audioVector(1:blockSize) = audio(hopSize*(curFrame-1)+1:hopSize*(curFrame-1)+blockSize,1).*wdw;
58 spectrogramVector = fft(audioVector);
59 magnitude(:,curFrame) = abs(spectrogramVector);
60 phase(:,curFrame) = angle(spectrogramVector);
61 end
62 magnitude(HFFTS+1:end,:) = [];
63 phase(HFFTS+1:end,:) = [];
64 maxSizeWindows = 400;
65 hammingWindows = cell(1,maxSizeWindows);
66 for len = 1:maxSizeWindows
67 hammingWindows{len} = hamming(len);
68 end
69
70 %% Refine Score and create Excitation Spectrum
71 refinedScore = originalScore; %score to be refiened
72 basicSpec = zeros(HFFTS, scoreLength); %Basic theoretical spectrogram
73 excitationSpectrums = cell(nbInstrument,scoreLength); %cell for each frame
74 largerES = cell(nbInstrument,scoreLength); %cell for each frame
75
76 for ins = 1:nbInstrument %for each isntrument
77 for curFrame = 1: scoreLength %for each frame
78 excitationSpectrums{ins,curFrame} = sparse(zeros(HFFTS,nbPartials));
79 largerES{ins,curFrame} = sparse(zeros(HFFTS,nbPartials));
80 if(originalScore(curFrame,ins)) %if there is a note
81 %Refine score and creates Excitation Spectrum
82 [refinedScore(curFrame,ins), width] = refineScore(magnitude(:,curFrame),originalScore(curFrame,ins));
83 excitationSpectrums{ins,curFrame} = sparse(createExcitationSpectrums(refinedScore(curFrame,ins),HFFTS,nbPartials,partialsWeights(:,ins),width,hammingWindows));
84 largerES{ins,curFrame} = sparse(createExcitationSpectrums(refinedScore(curFrame,ins),HFFTS,nbPartials,partialsWeights(:,ins),2*width,hammingWindows));
85 end
86 basicSpec(:,curFrame) = basicSpec(:,curFrame) + (excitationSpectrums{ins,curFrame}*partialsWeights(:,ins))*gains(ins,curFrame); %Spectrogram without update
87 end
88 end
89 %% Plot spectrogram and add original and refined score
90 if (plotRefinedScore)
91 figure(1),imagesc(log(10*magnitude(1:fftSize/16,:)+1)), axis xy
92 plotableScore = originalScore+1;
93 plotableScore(plotableScore==1) = 0;
94 hold on ,plot(plotableScore,'.k'),
95
96 for harm = 1 : nbPartials
97 for ins = nbInstrument:-1:1
98 plot(refinedScore(:,ins)*harm+1,['.' colors(ins)]), colorbar
99 end
100 end
101 end
102 %% Online Processing
103 %Values that can be changed by the model.
104 maskForChangeableValues = basicSpec > 10^-4;
105 maskedSpectrogram = magnitude.*maskForChangeableValues;
106 bufferSize = 100;
107 nextProcess = 1;
108 for curFrame = 1:scoreLength
109 if sum(refinedScore(curFrame,:),2)
110 UpdateGainCoefficientsOL2
111 if (curFrame==nextProcess)
112 first = max(curFrame-bufferSize+1,1);
113 currentMag = magnitude(:,first:curFrame);
114 tmpMaskForChangeableValues = basicSpec(:,first:curFrame) > 10^-4;
115 tmpMaskedSpectrogram = currentMag.*tmpMaskForChangeableValues;
116 len = curFrame-first+1;
117 for nbUpdates = 1:maxUpdates %update loop
118 UpdatePartialCoefficientsOL
119 UpdateFilterCoefficientsOL
120 if (plotFilter)
121 %%%Filter
122 figure(4),plot(outlook(1:blockSize/8,:)), title('Filter')
123 end
124 if (plotPartial)
125 %%%Decay
126 figure(5),stem(partialsWeights), title('Partial Weights')
127 end
128 end
129 nextProcess = min(ceil(curFrame*1.2),scoreLength)
130 %bufferSize = round(max(bufferSize,curFrame*0.5));
131 if (plotFilter)
132 %%%Filter
133 figure(4),plot(outlook(1:blockSize/8,:)), title('Filter')
134 end
135 if (plotPartial)
136 %%%Decay
137 figure(5),stem(partialsWeights), title('Partial Weights')
138
139 end
140 end
141 end
142 end
143 timeV = toc;
144 fprintf('Time: %6.3g s, %6.2g times real time \t \n ', timeV, timeV*fs/audioLength);
145
146
147 % Remodel Spectrogram
148 ECA = cell(nbInstrument,1);
149 xHat = zeros(HFFTS,scoreLength);
150 for ins = 1:nbInstrument
151 ECA{ins} = sparse(zeros(HFFTS,scoreLength));
152 for frame = 1:scoreLength
153 ECA{ins}(:,frame) = filterBasis * filterCoefficients(:,ins).*(largerES{ins,frame}*partialsWeights(:,ins));
154 end
155 xHat = xHat + ECA{ins}.*repmat(gains(ins,:),HFFTS,1);
156 end
157 %% Final plot
158 %%%Spectrogram (comparison with basic and final)
159 % % bMax = max(max(log(10*basicSpec+1)));
160 % % xMax = max(max(log(10*xHat+1)));
161 % % mMax = max(max(log(10*magnitude+1)));
162 % % tMax = max([bMax mMax xMax]);
163 % % figure(3), subplot(3,1,1), imagesc(log(10*basicSpec(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Basic Model')
164 % % subplot(3,1,2), imagesc(log(10*xHat(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Optimised Spectrogram')
165 % % subplot(3,1,3), imagesc(log(10*magnitude(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Actual Spectrogram')
166 % %
167 % %
168 % % %%%Filter
169 % % figure(4),plot(outlook(1:blockSize/8,:)), title('Filter')
170 % %
171 % % %%%Decay
172 % % figure(5),stem(partialsWeights), title('Partial Weights')
173
174 %% Writing
175 %extraInfo = [sprintf('_%d_%d_%d_%d_%d_%d',blockSize,overlap,maxUpdates,nbPartials,nbBandPassFilters,bandPassFilterMaxFreq) specExp];
176 finalScale = zeros(audioLength,1);
177 freq = zeros(fftSize,1);
178 finalAudio = cell(nbInstrument+1,1);
179 finalAudioMasked = cell(nbInstrument,1);
180
181 wdw2 = hamming(fftSize);
182 nbMaskingTechniques = 4;
183 maskingCell = cell(1,nbMaskingTechniques);
184 maskingCell{1} = 'raw';
185 maskingCell{2} = 'per';
186 maskingCell{3} = 'sha';
187 maskingCell{4} = '10p';
188 finalMask = cell(nbInstrument,nbMaskingTechniques); %4 is the number of different masking techniques
189 for ins = 1:nbInstrument
190 finalMask{ins,1} = sparse(zeros(HFFTS,scoreLength));
191 finalMask{ins,4} = sparse(zeros(HFFTS,scoreLength));
192 for curFrame = 1:scoreLength
193 finalMask{ins,1}(:,curFrame) = filterBasis * filterCoefficients(:,ins).*(excitationSpectrums{ins,curFrame}*partialsWeights(:,ins));
194 finalMask{ins,4}(:,curFrame) = filterBasis * filterCoefficients(:,ins).*(largerES{ins,curFrame}*partialsWeights(:,ins));
195 end
196
197 finalMask{ins,1} = finalMask{ins,1}.*repmat(gains(ins,:),HFFTS,1);
198 finalMask{ins,4} = finalMask{ins,4}.*repmat(gains(ins,:),HFFTS,1);
199
200 % masking strategy 1
201 finalMask{ins,2} = magnitude .* (finalMask{ins,4} ./ (xHat+eps));
202
203 % masking strategy 2
204 finalMask{ins,3} = magnitude .* ( (finalMask{ins,4}+eps) ./ (xHat+eps));
205
206 % masking strategy 3
207 finalMask{ins,4} = magnitude .* ( (finalMask{ins,4}+0.1 * eps) ./ (xHat+eps));
208
209 finalAudio{ins} = zeros(audioLength,1);
210 finalAudioMasked{ins} = zeros(audioLength,1);
211 end
212
213 % sonifying the model
214 finalAudio{nbInstrument+1} = zeros(audioLength,1);
215 %Overlap and add
216 for curFrame = 1:scoreLength
217 freq(1:HFFTS) = xHat(:,curFrame).*exp(1i*phase(:,curFrame));
218 freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2);
219 faudio = real(ifft(freq)).*wdw2;
220 finalAudio{nbInstrument+1}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudio{nbInstrument+1}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio;
221 for ins = 1:nbInstrument
222 freq(1:HFFTS) = finalMask{ins,1}(:,curFrame).*exp(1i*phase(:,curFrame));
223 freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2);
224 faudio = real(ifft(freq)).*wdw2;
225 finalAudio{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudio{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio;
226 end
227 finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + wdw2;
228 end
229
230 for ins = 1:nbInstrument
231 finalAudio{ins} = finalAudio{ins}./finalScale;
232 % wavwrite(finalAudio{ins},fs,16,[midiCell{ins}(1:4) '_' num2str(ins) '_' maskingCell{1} extraInfo '.wav']);
233 end
234 finalAudio{nbInstrument+1} = finalAudio{nbInstrument+1}./finalScale;
235 %wavwrite(finalAudio{nbInstrument+1},fs,16,[mixName(1:4) '_0_' maskingCell{1} extraInfo '.wav']);
236
237 % sonifying the masked version
238 %Overlap and add
239 for tech = 2:nbMaskingTechniques
240 finalScale = zeros(audioLength,1);
241 for curFrame = 1:scoreLength
242 for ins = 1:nbInstrument
243 freq(1:HFFTS) = finalMask{ins,tech}(:,curFrame).*exp(i*phase(:,curFrame));
244 freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2);
245 faudio = real(ifft(freq)).*wdw2;
246 finalAudioMasked{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudioMasked{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio;
247 end
248 finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + wdw2;
249 end
250 for ins = 1:nbInstrument
251 finalAudioMasked{ins} = finalAudioMasked{ins}./finalScale;
252 % wavwrite(finalAudioMasked{ins},fs,16,[midiCell{ins}(1:4) '_' num2str(ins) '_' maskingCell{tech} extraInfo '.wav']);
253 end
254 end
255
256
257