y@2
|
1 %% Default Values
|
y@2
|
2 %Audio and score information
|
y@2
|
3 if(~exist('midiCell','var'))
|
y@2
|
4 midiCell = cell(2,1);
|
y@2
|
5 midiCell{1} = 'Lussier_bassoon.mid';
|
y@2
|
6 midiCell{2} = 'Lussier_trumpet.mid';
|
y@2
|
7 mixName = 'Lussier_mix.wav';
|
y@2
|
8 %mixName = '311CLNOF.WAV';
|
y@2
|
9 %midiCell{1} = '311CLNOF.mid';
|
y@2
|
10 end
|
y@2
|
11 if(~exist('blockSize','var')) blockSize = 2048; end
|
y@2
|
12 if(~exist('overlap','var')) overlap = 2; end
|
y@2
|
13 %Maximum frequency of the filter
|
y@2
|
14 if(~exist('bandPassFilterMaxFreq','var')) bandPassFilterMaxFreq = 10000; end
|
y@2
|
15 %Number of band pass filters in the filter
|
y@2
|
16 if(~exist('nbBandPassFilters','var')) nbBandPassFilters = 20;end
|
y@2
|
17 %Maximum updates per frame
|
y@2
|
18 if(~exist('maxUpdates','var')) maxUpdates = 2;end
|
y@2
|
19 %Number of partials in the model
|
y@2
|
20 if(~exist('nbPartials','var')) nbPartials = 8;end
|
y@2
|
21 if(~exist('plotRefinedScore','var')) plotRefinedScore = 0;end
|
y@2
|
22 if(~exist('plotPartial','var')) plotPartial = 0;end
|
y@2
|
23 if(~exist('plotFilter','var')) plotFilter = 0;end
|
y@2
|
24 %Nummber of instrument in the mix.
|
y@2
|
25 nbInstrument = length(midiCell);
|
y@2
|
26 fftSize = blockSize;
|
y@2
|
27
|
y@2
|
28 %Weights of the harmonic series
|
y@2
|
29 partialsWeights = ones(nbPartials,nbInstrument);
|
y@2
|
30
|
y@2
|
31 colors = 'rwm';
|
y@2
|
32 %% Initialisation
|
y@2
|
33 hopSize =blockSize/overlap; %Hopsize
|
y@2
|
34 HFFTS = fftSize/2+1; %Size of the usefull part of the fft (half+1)
|
y@2
|
35 [audio, fs] = wavread(mixName);
|
y@2
|
36
|
y@2
|
37 % zeropadding the audio length
|
y@2
|
38 audio(end:end+hopSize-mod(length(audio),hopSize)) = 0;
|
y@2
|
39 audioLength = length(audio);
|
y@2
|
40
|
y@2
|
41 scoreLength = audioLength/hopSize - overlap+1; %Number of frames
|
y@2
|
42 wdw = hamming(blockSize);
|
y@2
|
43
|
y@2
|
44 tic
|
y@2
|
45 %% Create filters
|
y@2
|
46 filterBasis = createBPFilters(bandPassFilterMaxFreq,nbBandPassFilters,fftSize,fftSize/fs);
|
y@2
|
47 filterCoefficients = ones(nbBandPassFilters,nbInstrument);
|
y@2
|
48 %% Creating the score
|
y@2
|
49 originalScore = createScore(scoreLength,midiCell,fs/hopSize,fftSize/fs);
|
y@2
|
50 originalScore(scoreLength+1:end,:) = [];
|
y@2
|
51 gains = double(originalScore'>0);
|
y@2
|
52 %% Creating spectrogram
|
y@2
|
53 magnitude = zeros(fftSize, scoreLength);
|
y@2
|
54 phase = zeros(fftSize, scoreLength);
|
y@2
|
55 audioVector = zeros(fftSize,1);
|
y@2
|
56 for curFrame = 1:scoreLength
|
y@2
|
57 audioVector(1:blockSize) = audio(hopSize*(curFrame-1)+1:hopSize*(curFrame-1)+blockSize,1).*wdw;
|
y@2
|
58 spectrogramVector = fft(audioVector);
|
y@2
|
59 magnitude(:,curFrame) = abs(spectrogramVector);
|
y@2
|
60 phase(:,curFrame) = angle(spectrogramVector);
|
y@2
|
61 end
|
y@2
|
62 magnitude(HFFTS+1:end,:) = [];
|
y@2
|
63 phase(HFFTS+1:end,:) = [];
|
y@2
|
64 maxSizeWindows = 400;
|
y@2
|
65 hammingWindows = cell(1,maxSizeWindows);
|
y@2
|
66 for len = 1:maxSizeWindows
|
y@2
|
67 hammingWindows{len} = hamming(len);
|
y@2
|
68 end
|
y@2
|
69
|
y@2
|
70 %% Refine Score and create Excitation Spectrum
|
y@2
|
71 refinedScore = originalScore; %score to be refiened
|
y@2
|
72 basicSpec = zeros(HFFTS, scoreLength); %Basic theoretical spectrogram
|
y@2
|
73 excitationSpectrums = cell(nbInstrument,scoreLength); %cell for each frame
|
y@2
|
74 largerES = cell(nbInstrument,scoreLength); %cell for each frame
|
y@2
|
75
|
y@2
|
76 for ins = 1:nbInstrument %for each isntrument
|
y@2
|
77 for curFrame = 1: scoreLength %for each frame
|
y@2
|
78 excitationSpectrums{ins,curFrame} = sparse(zeros(HFFTS,nbPartials));
|
y@2
|
79 largerES{ins,curFrame} = sparse(zeros(HFFTS,nbPartials));
|
y@2
|
80 if(originalScore(curFrame,ins)) %if there is a note
|
y@2
|
81 %Refine score and creates Excitation Spectrum
|
y@2
|
82 [refinedScore(curFrame,ins), width] = refineScore(magnitude(:,curFrame),originalScore(curFrame,ins));
|
y@2
|
83 excitationSpectrums{ins,curFrame} = sparse(createExcitationSpectrums(refinedScore(curFrame,ins),HFFTS,nbPartials,partialsWeights(:,ins),width,hammingWindows));
|
y@2
|
84 largerES{ins,curFrame} = sparse(createExcitationSpectrums(refinedScore(curFrame,ins),HFFTS,nbPartials,partialsWeights(:,ins),2*width,hammingWindows));
|
y@2
|
85 end
|
y@2
|
86 basicSpec(:,curFrame) = basicSpec(:,curFrame) + (excitationSpectrums{ins,curFrame}*partialsWeights(:,ins))*gains(ins,curFrame); %Spectrogram without update
|
y@2
|
87 end
|
y@2
|
88 end
|
y@2
|
89 %% Plot spectrogram and add original and refined score
|
y@2
|
90 if (plotRefinedScore)
|
y@2
|
91 figure(1),imagesc(log(10*magnitude(1:fftSize/16,:)+1)), axis xy
|
y@2
|
92 plotableScore = originalScore+1;
|
y@2
|
93 plotableScore(plotableScore==1) = 0;
|
y@2
|
94 hold on ,plot(plotableScore,'.k'),
|
y@2
|
95
|
y@2
|
96 for harm = 1 : nbPartials
|
y@2
|
97 for ins = nbInstrument:-1:1
|
y@2
|
98 plot(refinedScore(:,ins)*harm+1,['.' colors(ins)]), colorbar
|
y@2
|
99 end
|
y@2
|
100 end
|
y@2
|
101 end
|
y@2
|
102 %% Online Processing
|
y@2
|
103 %Values that can be changed by the model.
|
y@2
|
104 maskForChangeableValues = basicSpec > 10^-4;
|
y@2
|
105 maskedSpectrogram = magnitude.*maskForChangeableValues;
|
y@2
|
106 bufferSize = 100;
|
y@2
|
107 nextProcess = 1;
|
y@2
|
108 for curFrame = 1:scoreLength
|
y@2
|
109 if sum(refinedScore(curFrame,:),2)
|
y@2
|
110 UpdateGainCoefficientsOL2
|
y@2
|
111 if (curFrame==nextProcess)
|
y@2
|
112 first = max(curFrame-bufferSize+1,1);
|
y@2
|
113 currentMag = magnitude(:,first:curFrame);
|
y@2
|
114 tmpMaskForChangeableValues = basicSpec(:,first:curFrame) > 10^-4;
|
y@2
|
115 tmpMaskedSpectrogram = currentMag.*tmpMaskForChangeableValues;
|
y@2
|
116 len = curFrame-first+1;
|
y@2
|
117 for nbUpdates = 1:maxUpdates %update loop
|
y@2
|
118 UpdatePartialCoefficientsOL
|
y@2
|
119 UpdateFilterCoefficientsOL
|
y@2
|
120 if (plotFilter)
|
y@2
|
121 %%%Filter
|
y@2
|
122 figure(4),plot(outlook(1:blockSize/8,:)), title('Filter')
|
y@2
|
123 end
|
y@2
|
124 if (plotPartial)
|
y@2
|
125 %%%Decay
|
y@2
|
126 figure(5),stem(partialsWeights), title('Partial Weights')
|
y@2
|
127 end
|
y@2
|
128 end
|
y@2
|
129 nextProcess = min(ceil(curFrame*1.2),scoreLength)
|
y@2
|
130 %bufferSize = round(max(bufferSize,curFrame*0.5));
|
y@2
|
131 if (plotFilter)
|
y@2
|
132 %%%Filter
|
y@2
|
133 figure(4),plot(outlook(1:blockSize/8,:)), title('Filter')
|
y@2
|
134 end
|
y@2
|
135 if (plotPartial)
|
y@2
|
136 %%%Decay
|
y@2
|
137 figure(5),stem(partialsWeights), title('Partial Weights')
|
y@2
|
138
|
y@2
|
139 end
|
y@2
|
140 end
|
y@2
|
141 end
|
y@2
|
142 end
|
y@2
|
143 timeV = toc;
|
y@2
|
144 fprintf('Time: %6.3g s, %6.2g times real time \t \n ', timeV, timeV*fs/audioLength);
|
y@2
|
145
|
y@2
|
146
|
y@2
|
147 % Remodel Spectrogram
|
y@2
|
148 ECA = cell(nbInstrument,1);
|
y@2
|
149 xHat = zeros(HFFTS,scoreLength);
|
y@2
|
150 for ins = 1:nbInstrument
|
y@2
|
151 ECA{ins} = sparse(zeros(HFFTS,scoreLength));
|
y@2
|
152 for frame = 1:scoreLength
|
y@2
|
153 ECA{ins}(:,frame) = filterBasis * filterCoefficients(:,ins).*(largerES{ins,frame}*partialsWeights(:,ins));
|
y@2
|
154 end
|
y@2
|
155 xHat = xHat + ECA{ins}.*repmat(gains(ins,:),HFFTS,1);
|
y@2
|
156 end
|
y@2
|
157 %% Final plot
|
y@2
|
158 %%%Spectrogram (comparison with basic and final)
|
y@2
|
159 % % bMax = max(max(log(10*basicSpec+1)));
|
y@2
|
160 % % xMax = max(max(log(10*xHat+1)));
|
y@2
|
161 % % mMax = max(max(log(10*magnitude+1)));
|
y@2
|
162 % % tMax = max([bMax mMax xMax]);
|
y@2
|
163 % % figure(3), subplot(3,1,1), imagesc(log(10*basicSpec(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Basic Model')
|
y@2
|
164 % % subplot(3,1,2), imagesc(log(10*xHat(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Optimised Spectrogram')
|
y@2
|
165 % % subplot(3,1,3), imagesc(log(10*magnitude(1:hopSize/2,:)+1)), axis xy, linkaxes, caxis([0 tMax]), colorbar, title('Actual Spectrogram')
|
y@2
|
166 % %
|
y@2
|
167 % %
|
y@2
|
168 % % %%%Filter
|
y@2
|
169 % % figure(4),plot(outlook(1:blockSize/8,:)), title('Filter')
|
y@2
|
170 % %
|
y@2
|
171 % % %%%Decay
|
y@2
|
172 % % figure(5),stem(partialsWeights), title('Partial Weights')
|
y@2
|
173
|
y@2
|
174 %% Writing
|
y@2
|
175 %extraInfo = [sprintf('_%d_%d_%d_%d_%d_%d',blockSize,overlap,maxUpdates,nbPartials,nbBandPassFilters,bandPassFilterMaxFreq) specExp];
|
y@2
|
176 finalScale = zeros(audioLength,1);
|
y@2
|
177 freq = zeros(fftSize,1);
|
y@2
|
178 finalAudio = cell(nbInstrument+1,1);
|
y@2
|
179 finalAudioMasked = cell(nbInstrument,1);
|
y@2
|
180
|
y@2
|
181 wdw2 = hamming(fftSize);
|
y@2
|
182 nbMaskingTechniques = 4;
|
y@2
|
183 maskingCell = cell(1,nbMaskingTechniques);
|
y@2
|
184 maskingCell{1} = 'raw';
|
y@2
|
185 maskingCell{2} = 'per';
|
y@2
|
186 maskingCell{3} = 'sha';
|
y@2
|
187 maskingCell{4} = '10p';
|
y@2
|
188 finalMask = cell(nbInstrument,nbMaskingTechniques); %4 is the number of different masking techniques
|
y@2
|
189 for ins = 1:nbInstrument
|
y@2
|
190 finalMask{ins,1} = sparse(zeros(HFFTS,scoreLength));
|
y@2
|
191 finalMask{ins,4} = sparse(zeros(HFFTS,scoreLength));
|
y@2
|
192 for curFrame = 1:scoreLength
|
y@2
|
193 finalMask{ins,1}(:,curFrame) = filterBasis * filterCoefficients(:,ins).*(excitationSpectrums{ins,curFrame}*partialsWeights(:,ins));
|
y@2
|
194 finalMask{ins,4}(:,curFrame) = filterBasis * filterCoefficients(:,ins).*(largerES{ins,curFrame}*partialsWeights(:,ins));
|
y@2
|
195 end
|
y@2
|
196
|
y@2
|
197 finalMask{ins,1} = finalMask{ins,1}.*repmat(gains(ins,:),HFFTS,1);
|
y@2
|
198 finalMask{ins,4} = finalMask{ins,4}.*repmat(gains(ins,:),HFFTS,1);
|
y@2
|
199
|
y@2
|
200 % masking strategy 1
|
y@2
|
201 finalMask{ins,2} = magnitude .* (finalMask{ins,4} ./ (xHat+eps));
|
y@2
|
202
|
y@2
|
203 % masking strategy 2
|
y@2
|
204 finalMask{ins,3} = magnitude .* ( (finalMask{ins,4}+eps) ./ (xHat+eps));
|
y@2
|
205
|
y@2
|
206 % masking strategy 3
|
y@2
|
207 finalMask{ins,4} = magnitude .* ( (finalMask{ins,4}+0.1 * eps) ./ (xHat+eps));
|
y@2
|
208
|
y@2
|
209 finalAudio{ins} = zeros(audioLength,1);
|
y@2
|
210 finalAudioMasked{ins} = zeros(audioLength,1);
|
y@2
|
211 end
|
y@2
|
212
|
y@2
|
213 % sonifying the model
|
y@2
|
214 finalAudio{nbInstrument+1} = zeros(audioLength,1);
|
y@2
|
215 %Overlap and add
|
y@2
|
216 for curFrame = 1:scoreLength
|
y@2
|
217 freq(1:HFFTS) = xHat(:,curFrame).*exp(1i*phase(:,curFrame));
|
y@2
|
218 freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2);
|
y@2
|
219 faudio = real(ifft(freq)).*wdw2;
|
y@2
|
220 finalAudio{nbInstrument+1}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudio{nbInstrument+1}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio;
|
y@2
|
221 for ins = 1:nbInstrument
|
y@2
|
222 freq(1:HFFTS) = finalMask{ins,1}(:,curFrame).*exp(1i*phase(:,curFrame));
|
y@2
|
223 freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2);
|
y@2
|
224 faudio = real(ifft(freq)).*wdw2;
|
y@2
|
225 finalAudio{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudio{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio;
|
y@2
|
226 end
|
y@2
|
227 finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + wdw2;
|
y@2
|
228 end
|
y@2
|
229
|
y@2
|
230 for ins = 1:nbInstrument
|
y@2
|
231 finalAudio{ins} = finalAudio{ins}./finalScale;
|
y@2
|
232 % wavwrite(finalAudio{ins},fs,16,[midiCell{ins}(1:4) '_' num2str(ins) '_' maskingCell{1} extraInfo '.wav']);
|
y@2
|
233 end
|
y@2
|
234 finalAudio{nbInstrument+1} = finalAudio{nbInstrument+1}./finalScale;
|
y@2
|
235 %wavwrite(finalAudio{nbInstrument+1},fs,16,[mixName(1:4) '_0_' maskingCell{1} extraInfo '.wav']);
|
y@2
|
236
|
y@2
|
237 % sonifying the masked version
|
y@2
|
238 %Overlap and add
|
y@2
|
239 for tech = 2:nbMaskingTechniques
|
y@2
|
240 finalScale = zeros(audioLength,1);
|
y@2
|
241 for curFrame = 1:scoreLength
|
y@2
|
242 for ins = 1:nbInstrument
|
y@2
|
243 freq(1:HFFTS) = finalMask{ins,tech}(:,curFrame).*exp(i*phase(:,curFrame));
|
y@2
|
244 freq(HFFTS+1:fftSize) = freq(HFFTS-1:-1:2);
|
y@2
|
245 faudio = real(ifft(freq)).*wdw2;
|
y@2
|
246 finalAudioMasked{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalAudioMasked{ins}((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + faudio;
|
y@2
|
247 end
|
y@2
|
248 finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) = finalScale((curFrame-1)*hopSize+1:(curFrame-1)*hopSize+fftSize,1) + wdw2;
|
y@2
|
249 end
|
y@2
|
250 for ins = 1:nbInstrument
|
y@2
|
251 finalAudioMasked{ins} = finalAudioMasked{ins}./finalScale;
|
y@2
|
252 % wavwrite(finalAudioMasked{ins},fs,16,[midiCell{ins}(1:4) '_' num2str(ins) '_' maskingCell{tech} extraInfo '.wav']);
|
y@2
|
253 end
|
y@2
|
254 end
|
y@2
|
255
|
y@2
|
256
|
y@2
|
257
|