Mercurial > hg > camir-aes2014
comparison toolboxes/MIRtoolbox1.3.2/MIRToolbox/@miraudio/miraudio.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 function varargout = miraudio(orig,varargin) | |
2 % a = miraudio('filename') loads the sound file 'filename' (in WAV or AU | |
3 % format) into a miraudio object. | |
4 % a = miraudio('Folder') loads all the sound files in the CURRENT folder | |
5 % into a miraudio object. | |
6 % a = miraudio(v,sr), where v is a column vector, translates the vector v | |
7 % into a miraudio object. The sampling frequency is set to sr Hertz. | |
8 % Default value for sr: 44100 Hz. | |
9 % a = miraudio(b, ...), where b is already a miraudio object, performs | |
10 % operations on b specified by the optional arguments (see below). | |
11 % | |
12 % Transformation options: | |
13 % miraudio(...,'Mono',0) does not perform the default summing of | |
14 % channels into one single mono track, but instead stores each | |
15 % channel of the initial soundfile separately. | |
16 % miraudio(...,'Center') centers the signals. | |
17 % miraudio(...,'Sampling',r) resamples at sampling rate r (in Hz). | |
18 % (Requires the Signal Processing Toolbox.) | |
19 % miraudio(...,'Normal') normalizes with respect to RMS energy. | |
20 % Extraction options: | |
21 % miraudio(...,'Extract',t1,t2,u,f) extracts the signal between dates | |
22 % t1 and t2, expressed in the unit u. | |
23 % Possible values for u: | |
24 % 's' (seconds, by default), | |
25 % 'sp' (sample index, starting from 1). | |
26 % The additional optional argument f indicates the referential | |
27 % origin of the temporal positions. Possible values for f: | |
28 % 'Start' (by default) | |
29 % 'Middle' (of the sequence) | |
30 % 'End' of the sequence | |
31 % When using 'Middle' or 'End', negative values for t1 or t2 | |
32 % indicate values before the middle or the end of the audio | |
33 % sequence. | |
34 % miraudio(...,'Trim') trims the pseudo-silence beginning and end off | |
35 % the audio file. Silent frames are frames with RMS below t times | |
36 % the medium RMS of the whole audio file. | |
37 % Default value: t = 0.06 | |
38 % instead of 'Trim': | |
39 % 'TrimStart' only trims the beginning of the audio file, | |
40 % 'TrimEnd' only trims the end. | |
41 % miraudio(...,'TrimThreshold',t) specifies the trimming threshold t. | |
42 % miraudio(...,'Channel',c) or miraudio(...,'Channels',c) selects the | |
43 % channels indicated by the (array of) integer(s) c. | |
44 % Labeling option: | |
45 % miraudio(...,'Label',l) labels the audio signal(s) following the | |
46 % label(s) l. | |
47 % If l is a (series of) number(s), the audio signal(s) are | |
48 % labelled using the substring of their respective file name of | |
49 % index l. If l=0, the audio signal(s) are labelled using the | |
50 % whole file name. | |
51 | |
52 | |
53 if isnumeric(orig) | |
54 if size(orig,2) > 1 || size(orig,3) > 1 | |
55 mirerror('MIRAUDIO','Only column vectors can be imported into mirtoolbox.'); | |
56 end | |
57 if nargin == 1 | |
58 f = 44100; | |
59 else | |
60 f = varargin{1}; | |
61 end | |
62 b = 32; | |
63 if size(orig,1) == 1 | |
64 orig = orig'; | |
65 end | |
66 tp = (0:size(orig,1)-1)'/f; | |
67 t = mirtemporal([],'Time',{{tp}},'Data',{{orig}},... | |
68 'FramePos',{{tp([1 end])}},'Sampling',{f},... | |
69 'Name',{inputname(1)},'Label',{{}},'Clusters',{{}},... | |
70 'Channels',[],'Centered',0,'NBits',{b},... | |
71 'Title','Audio signal',... | |
72 'PeakPos',{{{}}},'PeakVal',{{{}}},'PeakMode',{{{}}}); | |
73 aa.fresh = 1; | |
74 varargout = {class(aa,'miraudio',t)}; | |
75 return | |
76 end | |
77 | |
78 | |
79 center.key = 'Center'; | |
80 center.type = 'Boolean'; | |
81 center.default = 0; | |
82 center.when = 'After'; | |
83 option.center = center; | |
84 | |
85 normal.key = 'Normal'; | |
86 normal.type = 'Boolean'; | |
87 normal.default = 0; | |
88 normal.when = 'After'; | |
89 option.normal = normal; | |
90 | |
91 extract.key = {'Extract','Excerpt'}; | |
92 extract.type = 'Integer'; | |
93 extract.number = 2; | |
94 extract.default = []; | |
95 extract.unit = {'s','sp'}; | |
96 extract.defaultunit = 's'; | |
97 extract.from = {'Start','Middle','End'}; | |
98 extract.defaultfrom = 'Start'; | |
99 option.extract = extract; | |
100 | |
101 trim.type = 'String'; | |
102 trim.choice = {'NoTrim','Trim','TrimBegin','TrimStart','TrimEnd'}; | |
103 trim.default = 'NoTrim'; | |
104 trim.when = 'After'; | |
105 option.trim = trim; | |
106 | |
107 trimthreshold.key = 'TrimThreshold'; | |
108 trimthreshold.type = 'Integer'; | |
109 trimthreshold.default = .06; | |
110 trimthreshold.when = 'After'; | |
111 option.trimthreshold = trimthreshold; | |
112 | |
113 label.key = 'Label'; | |
114 label.default = ''; | |
115 label.when = 'After'; | |
116 option.label = label; | |
117 | |
118 sampling.key = 'Sampling'; | |
119 sampling.type = 'Integer'; | |
120 sampling.default = 0; | |
121 sampling.when = 'Both'; | |
122 option.sampling = sampling; | |
123 | |
124 % segment.key = 'Segment'; | |
125 % segment.type = 'Integer'; | |
126 % segment.default = []; | |
127 % segment.when = 'After'; | |
128 % option.segment = segment; | |
129 | |
130 reverse.key = 'Reverse'; | |
131 reverse.type = 'Boolean'; | |
132 reverse.default = 0; | |
133 reverse.when = 'After'; | |
134 option.reverse = reverse; | |
135 | |
136 mono.key = 'Mono'; | |
137 mono.type = 'Boolean'; | |
138 mono.default = NaN; | |
139 mono.when = 'After'; | |
140 option.mono = mono; | |
141 | |
142 separate.key = 'SeparateChannels'; | |
143 separate.type = 'Boolean'; | |
144 separate.default = 0; | |
145 option.separate = separate; | |
146 | |
147 Ch.key = {'Channel','Channels'}; | |
148 Ch.type = 'Integer'; | |
149 Ch.default = []; | |
150 Ch.when = 'After'; | |
151 option.Ch = Ch; | |
152 | |
153 specif.option = option; | |
154 | |
155 specif.beforechunk = {@beforechunk,'normal'}; | |
156 specif.eachchunk = @eachchunk; | |
157 specif.combinechunk = @combinechunk; | |
158 | |
159 if nargin > 1 && ischar(varargin{1}) && strcmp(varargin{1},'Now') | |
160 if nargin > 2 | |
161 extract = varargin{2}; | |
162 else | |
163 extract = []; | |
164 end | |
165 para = []; | |
166 varargout = {main(orig,[],para,[],extract)}; | |
167 else | |
168 varargout = mirfunction(@miraudio,orig,varargin,nargout,specif,@init,@main); | |
169 end | |
170 if isempty(varargout) | |
171 varargout = {{}}; | |
172 end | |
173 | |
174 | |
175 function [x type] = init(x,option) | |
176 if isa(x,'mirdesign') | |
177 if option.sampling | |
178 x = setresampling(x,option.sampling); | |
179 end | |
180 end | |
181 type = 'miraudio'; | |
182 | |
183 | |
184 function a = main(orig,option,after,index,extract) | |
185 if iscell(orig) | |
186 orig = orig{1}; | |
187 end | |
188 if ischar(orig) | |
189 if nargin < 5 | |
190 extract = []; | |
191 end | |
192 [d{1},tp{1},fp{1},f{1},b{1},n{1},ch{1}] = mirread(extract,orig,1,0); | |
193 t = mirtemporal([],'Time',tp,'Data',d,'FramePos',fp,'Sampling',f,... | |
194 'Name',n,'Label',cell(1,length(d)),... | |
195 'Clusters',cell(1,length(d)),... | |
196 'Channels',ch,'Centered',0,'NBits',b); | |
197 t = set(t,'Title','Audio waveform'); | |
198 a.fresh = 1; | |
199 a = class(a,'miraudio',t); | |
200 else | |
201 if not(isempty(option)) && not(isempty(option.extract)) | |
202 if not(isstruct(after)) | |
203 after = struct; | |
204 end | |
205 after.extract = option.extract; | |
206 end | |
207 if isa(orig,'miraudio') | |
208 a = orig; | |
209 else | |
210 a.fresh = 1; | |
211 a = class(a,'miraudio',orig); | |
212 end | |
213 end | |
214 if not(isempty(after)) | |
215 a = post(a,after); | |
216 end | |
217 | |
218 | |
219 function a = post(a,para) | |
220 if a.fresh && isfield(para,'mono') | |
221 a.fresh = 0; | |
222 if isnan(para.mono) | |
223 para.mono = 1; | |
224 end | |
225 end | |
226 if isfield(para,'mono') && para.mono == 1 | |
227 a = mirsum(a,'Mean'); | |
228 end | |
229 d = get(a,'Data'); | |
230 t = get(a,'Time'); | |
231 ac = get(a,'AcrossChunks'); | |
232 f = get(a,'Sampling'); | |
233 cl = get(a,'Clusters'); | |
234 for h = 1:length(d) | |
235 for k = 1:length(d{h}) | |
236 tk = t{h}{k}; | |
237 dk = d{h}{k}; | |
238 if isfield(para,'extract') && not(isempty(para.extract)) | |
239 t1 = para.extract(1); | |
240 t2 = para.extract(2); | |
241 if para.extract(4) | |
242 if para.extract(4) == 1 | |
243 shift = round(size(tk,1)/2); | |
244 elseif para.extract(4) == 2 | |
245 shift = size(tk,1); | |
246 end | |
247 if para.extract(3) | |
248 shift = tk(shift,1,1); | |
249 end | |
250 t1 = t1+shift; | |
251 t2 = t2+shift; | |
252 end | |
253 if para.extract(3) % in seconds | |
254 ft = find(tk>=t1 & tk<=t2); | |
255 else % in samples | |
256 if not(t1) | |
257 warning('WARNING IN MIRAUDIO: Extract sample positions should be real positive integers.') | |
258 display('Positions incremented by one.'); | |
259 t1 = t1+1; | |
260 t2 = t2+1; | |
261 end | |
262 ft = t1:t2; | |
263 end | |
264 tk = tk(ft,:,:); | |
265 dk = dk(ft,:,:); | |
266 end | |
267 if isfield(para,'Ch') && not(isempty(para.Ch)) | |
268 dk = dk(:,:,para.Ch); | |
269 end | |
270 if isfield(para,'center') && para.center | |
271 dk = center(dk); | |
272 a = set(a,'Centered',1); | |
273 end | |
274 if isfield(para,'normal') && para.normal | |
275 nl = size(dk,1); | |
276 nc = size(dk,3); | |
277 if isempty(ac) | |
278 ee = 0; | |
279 for j = 1:nc | |
280 ee = ee+sum(dk(:,:,j).^2); | |
281 end | |
282 ee = sqrt(ee/nl/nc); | |
283 else | |
284 ee = sqrt(sum(ac.sqrsum.^2)/ac.samples); | |
285 end | |
286 dk = dk./repmat(ee,[nl,1,nc]); | |
287 end | |
288 if isfield(para,'trim') && not(isequal(para.trim,0)) ... | |
289 && not(strcmpi(para.trim,'NoTrim')) | |
290 if not(para.trimthreshold) | |
291 para.trimthreshold = 0.06; | |
292 end | |
293 trimframe = 100; | |
294 trimhop = 10; | |
295 nframes = floor((length(tk)-trimframe)/trimhop)+1; | |
296 rms = zeros(1,nframes); | |
297 for j = 1:nframes | |
298 st = floor((j-1)*trimhop)+1; | |
299 for z = 1:size(dk,3) | |
300 rms(1,j,z) = norm(dk(st:st+trimframe-1,1,z))/sqrt(trimframe); | |
301 end | |
302 end | |
303 rms = (rms-repmat(min(rms),[1,size(rms,2),1]))... | |
304 ./repmat(max(rms)-min(rms),[1,size(rms,2),1]); | |
305 nosil = find(rms>para.trimthreshold); | |
306 if strcmpi(para.trim,'Trim') || strcmpi(para.trim,'TrimStart') ... | |
307 || strcmpi(para.trim,'TrimBegin') | |
308 nosil1 = min(nosil); | |
309 if nosil1 > 1 | |
310 nosil1 = nosil1-1; | |
311 end | |
312 n1 = floor((nosil1-1)*trimhop)+1; | |
313 else | |
314 n1 = 1; | |
315 end | |
316 if strcmpi(para.trim,'Trim') || strcmpi(para.trim,'TrimEnd') | |
317 nosil2 = max(nosil); | |
318 if nosil2 < length(rms) | |
319 nosil2 = nosil2+1; | |
320 end | |
321 n2 = floor((nosil2-1)*trimhop)+1; | |
322 else | |
323 n2 = length(tk); | |
324 end | |
325 wh = ones(n2-n1+1,1); | |
326 dt = round(.02*f{h}); | |
327 ha = hann(dt*2); | |
328 wh(1:dt) = ha(1:dt); | |
329 wh(end-dt+1:end) = ha(dt+1:end); | |
330 tk = tk(n1:n2); | |
331 dk = dk(n1:n2,1,:);%.*repmat(wh,[1 1 size(dk,3)]); | |
332 end | |
333 if isfield(para,'sampling') && para.sampling | |
334 if and(f{k}, not(f{k} == para.sampling)) | |
335 for j = 1:size(dk,3) | |
336 rk(:,:,j) = resample(dk(:,:,j),para.sampling,f{k}); | |
337 end | |
338 dk = rk; | |
339 tk = repmat((0:size(dk,1)-1)',[1 1 size(tk,3)])... | |
340 /para.sampling + tk(1,:,:); | |
341 end | |
342 f{k} = para.sampling; | |
343 end | |
344 d{h}{k} = dk; | |
345 t{h}{k} = tk; | |
346 %if isfield(para,'reverse') && para.reverse | |
347 % d{h}{k} = flipdim(d{h}{k},1); | |
348 %end | |
349 end | |
350 end | |
351 a = set(a,'Data',d,'Time',t,'Sampling',f,'Clusters',cl); | |
352 if isfield(para,'label') | |
353 if isnumeric(para.label) | |
354 n = get(a,'Name'); | |
355 l = cell(1,length(d)); | |
356 for k = 1:length(d) | |
357 if para.label | |
358 l{k} = n{k}(para.label); | |
359 else | |
360 l{k} = n{k}; | |
361 end | |
362 end | |
363 a = set(a,'Label',l); | |
364 elseif iscell(para.label) | |
365 idx = mod(get(a,'Index'),length(para.label)); | |
366 if not(idx) | |
367 idx = length(para.label); | |
368 end | |
369 a = set(a,'Label',para.label{idx}); | |
370 elseif ischar(para.label) | |
371 l = cell(1,length(d)); | |
372 for k = 1:length(d) | |
373 l{k} = para.label; | |
374 end | |
375 a = set(a,'Label',l); | |
376 end | |
377 end | |
378 | |
379 | |
380 function [new orig] = beforechunk(orig,option,missing) | |
381 option.normal = 0; | |
382 a = miraudio(orig,option); | |
383 d = get(a,'Data'); | |
384 old = get(orig,'AcrossChunks'); | |
385 if isempty(old) | |
386 old.sqrsum = 0; | |
387 old.samples = 0; | |
388 end | |
389 new = mircompute(@crossum,d); | |
390 new = new{1}{1}; | |
391 new.sqrsum = old.sqrsum + new.sqrsum; | |
392 new.samples = old.samples + new.samples; | |
393 | |
394 | |
395 function s = crossum(d) | |
396 s.sqrsum = sum(d.^2); | |
397 s.samples = length(d); | |
398 | |
399 | |
400 function [y orig] = eachchunk(orig,option,missing) | |
401 y = miraudio(orig,option); | |
402 | |
403 | |
404 function y = combinechunk(old,new) | |
405 do = get(old,'Data'); | |
406 to = get(old,'Time'); | |
407 dn = get(new,'Data'); | |
408 tn = get(new,'Time'); | |
409 y = set(old,'Data',{{[do{1}{1};dn{1}{1}]}},... | |
410 'Time',{{[to{1}{1};tn{1}{1}]}}); |