Chris@0
|
1 function [f_pitch,sideinfo] = audio_to_pitch_via_FB(f_audio,parameter,sideinfo)
|
Chris@0
|
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
Chris@0
|
3 % Name: audio_to_pitch_via_FB
|
Chris@0
|
4 % Date of Revision: 2011-03
|
Chris@0
|
5 % Programmer: Meinard Mueller, Sebastian Ewert
|
Chris@0
|
6 %
|
Chris@0
|
7 % Description:
|
Chris@0
|
8 % Computing and saving of pitch features via a pre-designed filterbank.
|
Chris@0
|
9 % features. For each window length specified via parameter.winLenSTMSP
|
Chris@0
|
10 % the following is computed:
|
Chris@0
|
11 % - STMSP (short-time mean-square power) for each MIDI pitch between
|
Chris@0
|
12 % parameter.midiMin and parameter.midiMax
|
Chris@0
|
13 % - STMSP subbands are stored in f_pitch, where f_pitch(p,:) contains
|
Chris@0
|
14 % STMSP of subband of pitch p
|
Chris@0
|
15 % - sideinfo contains information of original pcm, which is saved along
|
Chris@0
|
16 % with f_pitch into a single mat-file
|
Chris@0
|
17 % - Information f_pitch and sideinfo is stored in mat-file:
|
Chris@0
|
18 % save(strcat(parameter.saveDir,parameter.saveFilename),'f_pitch','sideinfo');
|
Chris@0
|
19 %
|
Chris@0
|
20 % Input:
|
Chris@0
|
21 % f_audio
|
Chris@0
|
22 % parameter.winLenSTMSP = 4410;
|
Chris@0
|
23 % parameter.shiftFB = 0;
|
Chris@0
|
24 % parameter.midiMin = 21;
|
Chris@0
|
25 % parameter.midiMax = 108;
|
Chris@0
|
26 % parameter.save = 0;
|
Chris@0
|
27 % parameter.saveDir = '';
|
Chris@0
|
28 % parameter.saveFilename = '';
|
Chris@0
|
29 % parameter.saveAsTuned = 0;
|
Chris@0
|
30 % parameter.fs = 22050;
|
Chris@0
|
31 % parameter.visualize = 0;
|
Chris@0
|
32 %
|
Chris@0
|
33 % Required files:
|
Chris@0
|
34 % 'MIDI_FB_ellip_pitch_60_96_22050_Q25.mat'
|
Chris@0
|
35 % 'MIDI_FB_ellip_pitch_60_96_22050_Q25_minusHalf.mat'
|
Chris@0
|
36 % 'MIDI_FB_ellip_pitch_60_96_22050_Q25_minusQuarter.mat'
|
Chris@0
|
37 % 'MIDI_FB_ellip_pitch_60_96_22050_Q25_minusThird.mat'
|
Chris@0
|
38 % 'MIDI_FB_ellip_pitch_60_96_22050_Q25_minusThreeQuarters.mat'
|
Chris@0
|
39 % 'MIDI_FB_ellip_pitch_60_96_22050_Q25_minusTwoThird.mat'
|
Chris@0
|
40 %
|
Chris@0
|
41 % Output:
|
Chris@0
|
42 % f_pitch
|
Chris@0
|
43 % sideinfo
|
Chris@0
|
44 %
|
Chris@0
|
45 %
|
Chris@0
|
46 % License:
|
Chris@0
|
47 % This file is part of 'Chroma Toolbox'.
|
Chris@0
|
48 %
|
Chris@0
|
49 % 'Chroma Toolbox' is free software: you can redistribute it and/or modify
|
Chris@0
|
50 % it under the terms of the GNU General Public License as published by
|
Chris@0
|
51 % the Free Software Foundation, either version 2 of the License, or
|
Chris@0
|
52 % (at your option) any later version.
|
Chris@0
|
53 %
|
Chris@0
|
54 % 'Chroma Toolbox' is distributed in the hope that it will be useful,
|
Chris@0
|
55 % but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Chris@0
|
56 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Chris@0
|
57 % GNU General Public License for more details.
|
Chris@0
|
58 %
|
Chris@0
|
59 % You should have received a copy of the GNU General Public License
|
Chris@0
|
60 % along with 'Chroma Toolbox'. If not, see <http://www.gnu.org/licenses/>.
|
Chris@0
|
61 %
|
Chris@0
|
62 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
Chris@0
|
63
|
Chris@0
|
64 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
Chris@0
|
65 % Check parameters
|
Chris@0
|
66 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
Chris@0
|
67
|
Chris@0
|
68 if nargin<3
|
Chris@0
|
69 sideinfo=[];
|
Chris@0
|
70 end
|
Chris@0
|
71
|
Chris@0
|
72 if nargin<2
|
Chris@0
|
73 parameter=[];
|
Chris@0
|
74 end
|
Chris@0
|
75 if isfield(parameter,'visualize')==0
|
Chris@0
|
76 parameter.visualize = 0;
|
Chris@0
|
77 end
|
Chris@0
|
78 if isfield(parameter,'save')==0
|
Chris@0
|
79 parameter.save = 0;
|
Chris@0
|
80 end
|
Chris@0
|
81 if isfield(parameter,'saveDir')==0
|
Chris@0
|
82 parameter.saveDir = '';
|
Chris@0
|
83 end
|
Chris@0
|
84 if isfield(parameter,'saveFilename')==0
|
Chris@0
|
85 parameter.saveFilename = '';
|
Chris@0
|
86 end
|
Chris@0
|
87 if isfield(parameter,'saveAsTuned')==0
|
Chris@0
|
88 parameter.saveAsTuned = 0;
|
Chris@0
|
89 end
|
Chris@0
|
90 if isfield(parameter,'fs')==0
|
Chris@0
|
91 parameter.fs = 22050;
|
Chris@0
|
92 else
|
Chris@0
|
93 if parameter.fs ~= 22050
|
Chris@0
|
94 error('audio_to_pitch_via_FB not implemented yet for sample rates other than 22050.');
|
Chris@0
|
95 end
|
Chris@0
|
96 end
|
Chris@0
|
97 if isfield(parameter,'midiMin')==0
|
Chris@0
|
98 parameter.midiMin = 21;
|
Chris@0
|
99 end
|
Chris@0
|
100 if isfield(parameter,'midiMax')==0
|
Chris@0
|
101 parameter.midiMax = 108;
|
Chris@0
|
102 end
|
Chris@0
|
103 if isfield(parameter,'winLenSTMSP')==0
|
Chris@0
|
104 parameter.winLenSTMSP = 4410;
|
Chris@0
|
105 %parameter.winLenSTMSP = [882 4410];
|
Chris@0
|
106 end
|
Chris@0
|
107 if isfield(parameter,'shiftFB')==0
|
Chris@0
|
108 parameter.shiftFB = 0;
|
Chris@0
|
109 end
|
Chris@0
|
110
|
Chris@0
|
111 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
Chris@0
|
112 % Main program
|
Chris@0
|
113 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
Chris@0
|
114
|
Chris@0
|
115 if parameter.shiftFB == 0
|
Chris@0
|
116 load MIDI_FB_ellip_pitch_60_96_22050_Q25.mat
|
Chris@0
|
117 elseif parameter.shiftFB == 1
|
Chris@0
|
118 load MIDI_FB_ellip_pitch_60_96_22050_Q25_minusQuarter.mat
|
Chris@0
|
119 elseif parameter.shiftFB == 2
|
Chris@0
|
120 load MIDI_FB_ellip_pitch_60_96_22050_Q25_minusThird.mat
|
Chris@0
|
121 elseif parameter.shiftFB == 3
|
Chris@0
|
122 load MIDI_FB_ellip_pitch_60_96_22050_Q25_minusHalf.mat
|
Chris@0
|
123 elseif parameter.shiftFB == 4
|
Chris@0
|
124 load MIDI_FB_ellip_pitch_60_96_22050_Q25_minusTwoThird.mat
|
Chris@0
|
125 elseif parameter.shiftFB == 5
|
Chris@0
|
126 load MIDI_FB_ellip_pitch_60_96_22050_Q25_minusThreeQuarters.mat
|
Chris@0
|
127 else
|
Chris@0
|
128 error('Wrong shift parameter!')
|
Chris@0
|
129 end
|
Chris@0
|
130
|
Chris@0
|
131 fs_pitch = zeros(1,128);
|
Chris@0
|
132 fs_index = zeros(1,128);
|
Chris@0
|
133
|
Chris@0
|
134 fs_pitch(21:59) = 882;
|
Chris@0
|
135 fs_pitch(60:95) = 4410;
|
Chris@0
|
136 fs_pitch(96:120) = 22050;
|
Chris@0
|
137
|
Chris@0
|
138 fs_index(21:59) = 3;
|
Chris@0
|
139 fs_index(60:95) = 2;
|
Chris@0
|
140 fs_index(96:120) = 1;
|
Chris@0
|
141
|
Chris@0
|
142 pcm_ds = cell(3,1);
|
Chris@0
|
143 pcm_ds{1} = f_audio;
|
Chris@0
|
144 pcm_ds{2} = resample(pcm_ds{1},1,5,100);
|
Chris@0
|
145 pcm_ds{3} = resample(pcm_ds{2},1,5,100);
|
Chris@0
|
146
|
Chris@0
|
147 fprintf('Computing subbands and STMSP for all pitches: (%i-%i): %4i',parameter.midiMin,parameter.midiMax,0);
|
Chris@0
|
148
|
Chris@0
|
149 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
Chris@0
|
150 % Compute features for all pitches
|
Chris@0
|
151 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
Chris@0
|
152
|
Chris@0
|
153 winLenSTMSP = parameter.winLenSTMSP;
|
Chris@0
|
154 winOvSTMSP = round(winLenSTMSP/2);
|
Chris@0
|
155 featureRate = parameter.fs./(winLenSTMSP-winOvSTMSP); %formerly win_res
|
Chris@0
|
156 wav_size = size(f_audio,1);
|
Chris@0
|
157
|
Chris@0
|
158 num_window = length(winLenSTMSP);
|
Chris@0
|
159 f_pitch_energy = cell(num_window,1);
|
Chris@0
|
160 seg_pcm_num = cell(num_window,1);
|
Chris@0
|
161 seg_pcm_start = cell(num_window,1);
|
Chris@0
|
162 seg_pcm_stop = cell(num_window,1);
|
Chris@0
|
163 for w=1:num_window;
|
Chris@0
|
164 step_size = winLenSTMSP(w)-winOvSTMSP(w);
|
Chris@0
|
165 group_delay = round(winLenSTMSP(w)/2);
|
Chris@0
|
166 seg_pcm_start{w} = [1 1:step_size:wav_size]'; %group delay is adjusted
|
Chris@0
|
167 seg_pcm_stop{w} = min(seg_pcm_start{w}+winLenSTMSP(w),wav_size);
|
Chris@0
|
168 seg_pcm_stop{w}(1) = min(group_delay,wav_size);
|
Chris@0
|
169 seg_pcm_num{w} = size(seg_pcm_start{w},1);
|
Chris@0
|
170 f_pitch_energy{w} = zeros(120,seg_pcm_num{w});
|
Chris@0
|
171 end
|
Chris@0
|
172
|
Chris@0
|
173
|
Chris@0
|
174 for p=parameter.midiMin:parameter.midiMax
|
Chris@0
|
175 fprintf('\b\b\b\b');fprintf('%4i',p);
|
Chris@0
|
176 index = fs_index(p);
|
Chris@0
|
177 f_filtfilt = filtfilt(h(p).b, h(p).a, pcm_ds{index});
|
Chris@0
|
178 f_square = f_filtfilt.^2;
|
Chris@0
|
179
|
Chris@0
|
180 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
Chris@0
|
181 % f_pitch_energy
|
Chris@0
|
182 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
Chris@0
|
183 for w=1:length(winLenSTMSP)
|
Chris@0
|
184 factor = (parameter.fs/fs_pitch(p)); %adjustment for sampling rate
|
Chris@0
|
185 for k=1:seg_pcm_num{w}
|
Chris@0
|
186 start = ceil((seg_pcm_start{w}(k)/parameter.fs)*fs_pitch(p));
|
Chris@0
|
187 stop = floor((seg_pcm_stop{w}(k)/parameter.fs)*fs_pitch(p));
|
Chris@0
|
188 f_pitch_energy{w}(p,k)=sum(f_square(start:stop))*factor;
|
Chris@0
|
189 end
|
Chris@0
|
190 end
|
Chris@0
|
191 end
|
Chris@0
|
192 fprintf('\n');
|
Chris@0
|
193
|
Chris@0
|
194 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
Chris@0
|
195 % Save f_pitch_energy for each window size separately as f_pitch
|
Chris@0
|
196 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
Chris@0
|
197 sideinfo.pitch.version = 1;
|
Chris@0
|
198 sideinfo.pitch.midiMin = parameter.midiMin;
|
Chris@0
|
199 sideinfo.pitch.midiMax = parameter.midiMax;
|
Chris@0
|
200 if parameter.save == 1
|
Chris@0
|
201 for w=1:num_window;
|
Chris@0
|
202 f_pitch = f_pitch_energy{w};
|
Chris@0
|
203 sideinfo.pitch.winLenSTMSP = winLenSTMSP(w);
|
Chris@0
|
204 sideinfo.pitch.winOvSTMSP = winOvSTMSP(w);
|
Chris@0
|
205 sideinfo.pitch.featureRate = featureRate(w);
|
Chris@0
|
206 sideinfo.pitch.shiftFB = parameter.shiftFB;
|
Chris@0
|
207 sideinfo.pitch.featuresAreTuned = 0;
|
Chris@0
|
208 if parameter.saveAsTuned
|
Chris@0
|
209 sideinfo.pitch.featuresAreTuned = 1;
|
Chris@0
|
210 filename = strcat(parameter.saveFilename,'_pitch_',num2str(winLenSTMSP(w)));
|
Chris@0
|
211 else
|
Chris@0
|
212 switch(parameter.shiftFB)
|
Chris@0
|
213 case 0
|
Chris@0
|
214 filename = strcat(parameter.saveFilename,'_pitch_',num2str(winLenSTMSP(w)));
|
Chris@0
|
215 case 1
|
Chris@0
|
216 filename = strcat(parameter.saveFilename,'_pitch_',num2str(winLenSTMSP(w)),'_minusQuarter');
|
Chris@0
|
217 case 2
|
Chris@0
|
218 filename = strcat(parameter.saveFilename,'_pitch_',num2str(winLenSTMSP(w)),'_minusThird');
|
Chris@0
|
219 case 3
|
Chris@0
|
220 filename = strcat(parameter.saveFilename,'_pitch_',num2str(winLenSTMSP(w)),'_minusHalf');
|
Chris@0
|
221 case 4
|
Chris@0
|
222 filename = strcat(parameter.saveFilename,'_pitch_',num2str(winLenSTMSP(w)),'_minusTwoThird');
|
Chris@0
|
223 case 5
|
Chris@0
|
224 filename = strcat(parameter.saveFilename,'_pitch_',num2str(winLenSTMSP(w)),'_minusThreeQuarter');
|
Chris@0
|
225 end
|
Chris@0
|
226 end
|
Chris@0
|
227 save(strcat(parameter.saveDir,filename),'f_pitch','sideinfo');
|
Chris@0
|
228 end
|
Chris@0
|
229 else
|
Chris@0
|
230 f_pitch = f_pitch_energy{num_window};
|
Chris@0
|
231 sideinfo.pitch.winLenSTMSP = winLenSTMSP(num_window);
|
Chris@0
|
232 sideinfo.pitch.winOvSTMSP = winOvSTMSP(num_window);
|
Chris@0
|
233 sideinfo.pitch.featureRate = featureRate(num_window);
|
Chris@0
|
234 sideinfo.pitch.shiftFB = parameter.shiftFB;
|
Chris@0
|
235 end
|
Chris@0
|
236
|
Chris@0
|
237 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
Chris@0
|
238 % Visualization
|
Chris@0
|
239 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
Chris@0
|
240 if parameter.visualize == 1
|
Chris@0
|
241 for w=1:num_window;
|
Chris@0
|
242 parameterVis.featureRate = featureRate(w);
|
Chris@0
|
243 visualizePitch(f_pitch_energy{w},parameterVis);
|
Chris@0
|
244 end
|
Chris@0
|
245 end
|
Chris@0
|
246
|
Chris@0
|
247 end
|
Chris@0
|
248
|
Chris@0
|
249
|