Mercurial > hg > camir-aes2014
comparison core/magnatagatune/MTTAudioFeatureBasicSm.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 classdef MTTAudioFeatureBasicSm < MTTAudioFeature & handle | |
2 % --- | |
3 % the MTTAudioFeatureBasicSm Class contains | |
4 % a basic summary of chroma, mfcc and tempo features | |
5 % a few common chroma and mfcc vectors are concatenated | |
6 % along with some clip-wide variance | |
7 % a metric / rhythm fingerprint is added | |
8 % | |
9 % The usual workflow for these features consists of three steps | |
10 % 1. extract: extracts the basic single-file dependent features | |
11 % 2. define_global_transform: calculates the global feature | |
12 % transformation parameters | |
13 % 3. finalise: applies the common transformations to a specific feature | |
14 % --- | |
15 | |
16 properties(Constant = true) | |
17 | |
18 % svn hook | |
19 my_revision = str2double(substr('$Rev$', 5, -1)); | |
20 end | |
21 | |
22 properties | |
23 % --- | |
24 % Set default parameters | |
25 % --- | |
26 my_params = struct(... | |
27 'nchromas', 4, ... % 4 chroma vectors | |
28 'chroma_var', 0, ... % chroma variance | |
29 'norm_chromas', 0, ... % not implemented, chromas already rel. | |
30 'min_kshift_chromas', 0.1, ... % treshold for key shift. set to 1 for no shift (0-1) | |
31 ... | |
32 'ntimbres', 4, ... | |
33 'timbre_var', 0, ... % timbre variance | |
34 'norm_timbres', 1, ... | |
35 'clip_timbres', 0.85, ... % percentile of data which has to be inside 0-1 bounds | |
36 ... | |
37 'norm_weights',0, ... % globally norm weights for chroma times? | |
38 'norm_interval',1, ... | |
39 'max_iter',100, ... % max iterations for chroma and timbre knn | |
40 ... | |
41 'nrhythms', 0, ... | |
42 'nints', 11, ... | |
43 'energy_sr', 1000, ... % sample rate for energy curve | |
44 'norm_acorr', 1 ... % normalise arcorr locally-> shape imp... energy is normalised anyways | |
45 ); | |
46 end | |
47 | |
48 % --- | |
49 % member functions | |
50 % --- | |
51 methods | |
52 | |
53 % --- | |
54 % constructor: pointer to feature in database | |
55 % --- | |
56 function feature = MTTAudioFeatureBasicSm(varargin) | |
57 | |
58 feature = feature@MTTAudioFeature(varargin{:}); | |
59 | |
60 end | |
61 % --- | |
62 % extract feature data from raw audio features | |
63 % --- | |
64 function data = extract(feature, clip) | |
65 % --- | |
66 % get Basic Summary audio features. this includes possible | |
67 % local normalisations | |
68 % --- | |
69 | |
70 global globalvars; | |
71 | |
72 % --- | |
73 % get casimir child clip if available | |
74 % --- | |
75 if isa(clip, 'CASIMIRClip') | |
76 baseclip = clip.child_clip(); | |
77 else | |
78 baseclip = clip; | |
79 end | |
80 if isa(baseclip, 'MTTClip') | |
81 rawf = baseclip.audio_features_raw(); | |
82 elseif isa(baseclip, 'MSDClip') | |
83 rawf = baseclip.features('MSDAudioFeatureRAW'); | |
84 end | |
85 | |
86 % --- | |
87 % now extract the features | |
88 % first step: chroma clustering | |
89 % --- | |
90 weights = [rawf.data.segments_duration]; | |
91 | |
92 % normalise weights | |
93 weights = weights / rawf.data.duration; | |
94 | |
95 chroma = [rawf.data.segments_pitches]'; | |
96 | |
97 % --- | |
98 % get most present chroma vectors. | |
99 % the weighted k-means should return the four most prominent | |
100 % chroma vectors and their weight | |
101 % --- | |
102 % display error values | |
103 | |
104 op = foptions(); | |
105 op(1) = 0; | |
106 op(14) = feature.my_params.max_iter; | |
107 | |
108 % check for trivial case | |
109 if feature.my_params.nchromas == 0 | |
110 | |
111 chromas = []; | |
112 cwght = []; | |
113 | |
114 elseif feature.my_params.nchromas == 1 | |
115 | |
116 chromas = mean(chroma, 1); | |
117 chroma_var = var(chroma, 0, 1); | |
118 cwght = 1; | |
119 | |
120 elseif numel(weights) > feature.my_params.nchromas | |
121 | |
122 % --- | |
123 % there may be few chromas, try kmeans several (20) times | |
124 % --- | |
125 cont = 0; | |
126 cwght = []; | |
127 while (numel(cwght) ~= feature.my_params.nchromas) && (cont < 20); | |
128 | |
129 [chromas, cwght, post] = ... | |
130 weighted_kmeans(feature.my_params.nchromas, chroma, weights, op); | |
131 | |
132 cont = cont + 1; | |
133 end | |
134 | |
135 if (numel(cwght) ~= feature.my_params.nchromas) | |
136 | |
137 error('cannot find enough chroma centres'); | |
138 end | |
139 | |
140 % --- | |
141 % Calculate the weighted variance of the chroma clusters | |
142 % --- | |
143 if feature.my_params.chroma_var >= 1 | |
144 | |
145 chroma_var = zeros(size(chromas)); | |
146 for i = 1:size(chroma_var,1) | |
147 | |
148 % get distance from cluster centroid | |
149 tmp_var = (chroma(post(:,i),:) - repmat(chromas(i,:), sum(post(:,i)),1)).^2; | |
150 | |
151 % add up the weighted differences and normalise by sum | |
152 % of weights | |
153 chroma_var(i,:) = (weights(post(:,i)) * tmp_var) ./... | |
154 (sum(weights(post(:,i)))); | |
155 end | |
156 end | |
157 else | |
158 % --- | |
159 % odd case: less than nchroma data points. | |
160 % we repeat the mean vector at the end | |
161 % --- | |
162 chromas = [chroma; repmat(mean(chroma, 1),... | |
163 feature.my_params.nchromas - numel(weights), 1 )]; | |
164 | |
165 cwght = weights; | |
166 cwght( end + 1:feature.my_params.nchromas ) = 0; | |
167 | |
168 % --- | |
169 % TODO: get a variance for odd case : | |
170 % replicate the complete data variance? | |
171 % NO: every vector is a clsuter => zero variance | |
172 % --- | |
173 end | |
174 | |
175 % trivial case: no variance requested | |
176 if ~exist('chroma_var','var') | |
177 chroma_var = zeros(size(chromas)); | |
178 end | |
179 | |
180 % sort by associated time | |
181 [cwght, idx] = sort(cwght, 'descend'); | |
182 chromas = chromas(idx,:); | |
183 chroma_var = chroma_var(idx,:); | |
184 | |
185 % --- | |
186 % shift according to detected key, but only if | |
187 % the confidencee is high enough | |
188 % --- | |
189 shift = 0; | |
190 if rawf.data.keyConfidence > feature.my_params.min_kshift_chromas; | |
191 | |
192 shift = -rawf.data.key; | |
193 chromas = circshift(chromas, [0 shift]); | |
194 chroma_var = circshift(chroma_var, [0 shift]); | |
195 end | |
196 | |
197 % --- | |
198 % get mfcc centres: | |
199 % the same for mfccs | |
200 % --- | |
201 mfcc = [rawf.data.segments_timbre]'; | |
202 if feature.my_params.ntimbres == 0 | |
203 | |
204 mfccs = []; | |
205 mwght = []; | |
206 | |
207 elseif feature.my_params.ntimbres == 1 | |
208 | |
209 mfccs = mean(mfcc, 1); | |
210 timbre_var = var(mfccs, 0, 1); | |
211 mwght = 1; | |
212 | |
213 elseif numel(weights) > feature.my_params.ntimbres | |
214 | |
215 % --- | |
216 % there may be few mfccs, try kmeans several times | |
217 % --- | |
218 cont = 0; | |
219 mwght = []; | |
220 while (numel(mwght) ~= feature.my_params.ntimbres) && (cont < 20); | |
221 | |
222 [mfccs, mwght, post] = ... | |
223 weighted_kmeans(feature.my_params.ntimbres, mfcc, weights, op); | |
224 cont = cont + 1; | |
225 end | |
226 | |
227 if (numel(mwght) ~= feature.my_params.ntimbres) | |
228 | |
229 error('cannot find enough mfcc centres'); | |
230 end | |
231 | |
232 % --- | |
233 % Calculate the weighted variance of the chroma clusters | |
234 % --- | |
235 if feature.my_params.timbre_var >= 1 | |
236 | |
237 timbre_var = zeros(size(mfccs)); | |
238 for i = 1:size(timbre_var,1) | |
239 | |
240 % get distance from cluster centroid | |
241 tmp_var = (mfcc(post(:,i),:) - repmat(mfccs(i,:), sum(post(:,i)),1)).^2; | |
242 | |
243 % add up the weighted differences and normalise by sum | |
244 % of weights | |
245 timbre_var(i,:) = (weights(post(:,i)) * tmp_var) ./... | |
246 (sum(weights(post(:,i)))); | |
247 end | |
248 end | |
249 | |
250 else | |
251 % --- | |
252 % odd case: less than nchroma data points. | |
253 % we repeat the mean vector at the end | |
254 % --- | |
255 mfccs = [mfcc; repmat(mean(mfcc, 1),... | |
256 feature.my_params.ntimbres - numel(weights), 1)]; | |
257 mwght = weights; | |
258 mwght( end + 1:feature.my_params.ntimbres) = 0; | |
259 end | |
260 | |
261 % trivial case: no variance requested | |
262 if ~exist('timbre_var','var') | |
263 timbre_var = zeros(size(mfccs)); | |
264 end | |
265 | |
266 % sort by associated time | |
267 [mwght, idx] = sort(mwght, 'descend'); | |
268 mfccs = mfccs(idx,:); | |
269 timbre_var = timbre_var(idx,:); | |
270 | |
271 % --- | |
272 % get beat features: | |
273 % the autocorrelation curve over n quarters of length | |
274 % | |
275 % alternative: how about using the n=8 quarters relative | |
276 % volumes from the start of a sure measure? | |
277 % --- | |
278 if feature.my_params.nrhythms >= 1 | |
279 bars = rawf.data.bars; | |
280 beats = rawf.data.beats; | |
281 tatums = rawf.data.tatums; | |
282 % --- | |
283 % NOTE: the beat and tatum markers seem to have an offset :( | |
284 % --- | |
285 offset = 0.118; %seconds | |
286 | |
287 [envelope, time] = energy_envelope(feature, clip); | |
288 | |
289 % we offset the energy curve | |
290 time = time + offset; | |
291 | |
292 % --- | |
293 % we try to start at the best beat confidence more | |
294 % than sixteen eights from the end | |
295 % --- | |
296 | |
297 if rawf.data.tempo > 0 | |
298 | |
299 eightl = 30 / rawf.data.tempo; | |
300 else | |
301 % --- | |
302 % odd case: no rhythm data. assume 100 bpm | |
303 % --- | |
304 | |
305 eightl = 0.3; | |
306 end | |
307 | |
308 if isempty(beats) | |
309 % --- | |
310 % odd case: no beats detected. -> use best tatum | |
311 % --- | |
312 if ~isempty(tatums) | |
313 | |
314 beats = tatums; | |
315 else | |
316 | |
317 % ok, just take the beginning | |
318 beats = [0; 1]; | |
319 end | |
320 end | |
321 | |
322 last_valid = find(beats(1,:) < ... | |
323 (rawf.data.duration - feature.my_params.nints * eightl),1, 'last'); | |
324 | |
325 % find the best valid beat postition | |
326 [null, max_measure] = max( beats(2, 1:last_valid)); | |
327 max_mtime = beats(1,max_measure); | |
328 | |
329 % --- | |
330 % the correlation is calculated for the estimated eights lenght | |
331 % and for the 16th intervals, respectively. | |
332 % --- | |
333 | |
334 % calculate the EIGHTS correlation for the following segment | |
335 [acorr8, eight_en, eightt] = ... | |
336 beat_histogram(feature, max_mtime, eightl, envelope, time); | |
337 | |
338 % calculate the SIXTEENTHS correlation for the following segment | |
339 [acorr16, six_en, sixt] = ... | |
340 beat_histogram(feature, max_mtime, eightl / 2, envelope, time); | |
341 | |
342 % --- | |
343 % save the various features | |
344 % --- | |
345 % save rythm feature data | |
346 | |
347 data.rhythm.acorr8 = acorr8; | |
348 data.rhythm.acorr8_lag = eightt(1:end/2)-eightt(1); | |
349 | |
350 data.rhythm.energy8 = eight_en(1:end/2); | |
351 data.rhythm.energy8_time = eightt(1:end/2); | |
352 | |
353 % -- | |
354 % the interval is normed locally up to a max value | |
355 % associated to 30bpm | |
356 % --- | |
357 if feature.my_params.norm_interval | |
358 | |
359 % 1 second max value | |
360 data.rhythm.interval8 = eightl / 2; | |
361 else | |
362 data.rhythm.interval8 = eightl / 2; | |
363 end | |
364 | |
365 if feature.my_params.nrhythms >= 2 | |
366 | |
367 data.rhythm.acorr16 = acorr16; | |
368 data.rhythm.acorr16_lag = data.rhythm.acorr8_lag / 2; | |
369 | |
370 data.rhythm.energy16 = six_en(1:end/2); | |
371 data.rhythm.energy16_time = sixt(1:end/2); | |
372 | |
373 | |
374 % save beat interval / tempo | |
375 if feature.my_params.norm_interval | |
376 | |
377 % 1 second max value | |
378 data.rhythm.interval16 = eightl / 2; | |
379 else | |
380 data.rhythm.interval16 = eightl / 2; | |
381 end | |
382 | |
383 end | |
384 else | |
385 | |
386 % % save empty rythm struct | |
387 % data.rhythm = struct([]); | |
388 end | |
389 | |
390 % chroma feature data | |
391 for i = 1:size(chromas,1) | |
392 data.chroma(i).means = chromas(i,:)'; | |
393 data.chroma(i).means_weight = cwght(i); | |
394 data.chroma(i).vars = chroma_var(i,:)'; | |
395 data.chroma(i).shift = shift; | |
396 end | |
397 | |
398 % mfcc feature data | |
399 for i = 1:size(mfccs,1) | |
400 data.timbre(i).means = mfccs(i,:)'; | |
401 data.timbre(i).means_weight = mwght(i); | |
402 data.timbre(i).vars = timbre_var(i,:)'; | |
403 end | |
404 | |
405 % prepare field for final features | |
406 data.final.vector = []; | |
407 data.final.vector_info = struct(); | |
408 data.final.dim = 0; | |
409 | |
410 % save info data | |
411 data.info.type = 'MTTAudioFeatureBasicSm'; | |
412 data.info.owner = clip; | |
413 data.info.owner_id = clip.id; | |
414 data.info.creatorrev = feature.my_revision; | |
415 | |
416 % save parameters | |
417 data.info.params = feature.my_params; | |
418 end | |
419 | |
420 function define_global_transform(features) | |
421 % calculate and set normalization factors from the group of | |
422 % input features. These features will be set for the full database | |
423 | |
424 if numel(features) == 1 | |
425 error ('Insert feature array for this method'); | |
426 end | |
427 | |
428 % --- | |
429 % here, we only need to define the post-normalisation | |
430 % --- | |
431 | |
432 % --- | |
433 % get chroma variance data NORMALISATION Factors | |
434 % TODO: transport chroma variance to finalise step | |
435 % --- | |
436 if features(1).my_params.chroma_var >= 1 | |
437 allfeat = abs(cat(2, features(1).data.chroma(:).vars)); | |
438 for i = 2:numel(features) | |
439 | |
440 allfeat = cat(2 , allfeat, abs(abs(cat(2, features(i).data.chroma(:).vars)))); | |
441 end | |
442 [~, common.post_normf.chroma_var] = mapminmax(allfeat,0,1); | |
443 end | |
444 | |
445 % --- | |
446 % get timbre variance data NORMALISATION Factors | |
447 % TODO: transport chroma variance to finalise step | |
448 % --- | |
449 if features(1).my_params.timbre_var >= 1 | |
450 allfeat = abs(cat(2, features(1).data.timbre(:).vars)); | |
451 for i = 2:numel(features) | |
452 | |
453 allfeat = cat(2 , allfeat, abs(abs(cat(2, features(i).data.timbre(:).vars)))); | |
454 end | |
455 [~, common.post_normf.timbre_var] = mapminmax(allfeat,0,1); | |
456 end | |
457 | |
458 % --- | |
459 % derive normalisation for timbre features: | |
460 % MFCC's are actually special filter outputs | |
461 % (see developer.echonest.com/docs/v4/_static/AnalyzeDocumentation_2.2.pdf | |
462 % they are unbounded, so just the relative information will be | |
463 % used here. | |
464 % We normalise each bin independently | |
465 % --- | |
466 if features(1).my_params.ntimbres > 0 | |
467 | |
468 allfeat = abs(cat(2, features(1).data.timbre(:).means)); | |
469 for i = 2:numel(features) | |
470 | |
471 allfeat = cat(2 , allfeat, abs(cat(2, features(i).data.timbre(:).means))); | |
472 end | |
473 | |
474 % --- | |
475 % get normalisation factors | |
476 % NOTE: the values will later be clipped to [0,1] | |
477 % anyways | |
478 % --- | |
479 if (features(1).my_params.clip_timbres ~= 0 ) || ... | |
480 (features(1).my_params.clip_timbres ~= 1 ) | |
481 | |
482 common.post_normf.timbre = 1 ./ prctile(allfeat, features(1).my_params.clip_timbres * 100, 2); | |
483 | |
484 else | |
485 % just use the maximum | |
486 common.post_normf.timbre = 1/max(allfeat, 2); | |
487 end | |
488 | |
489 % set common feature values | |
490 features(1).my_db.set_common(common); | |
491 | |
492 else | |
493 | |
494 features(1).my_db.set_common([1]); | |
495 end | |
496 end | |
497 | |
498 | |
499 function finalise(feature) | |
500 % applies a final transformation and | |
501 % collects the information of this feature within a single vector | |
502 % see info for types in specific dimensions | |
503 | |
504 for i = 1:numel(feature) | |
505 | |
506 % check for neccesary parameters | |
507 if isempty(feature(i).my_db.commondb) | |
508 | |
509 error('Define the global transformation first') | |
510 return; | |
511 end | |
512 | |
513 if feature(1).my_params.ntimbres > 0 | |
514 % --- | |
515 % normalise features | |
516 % --- | |
517 % norm timbre features if neccesary | |
518 timbren = []; | |
519 if feature(i).my_params.norm_timbres | |
520 for j = 1:numel(feature(i).data.timbre) | |
521 | |
522 timbren = cat(1, timbren, ... | |
523 MTTAudioFeatureBasicSm.norm_timbre... | |
524 (feature(i).data.timbre(j).means, feature(i).my_db.commondb.post_normf.timbre)); | |
525 end | |
526 else | |
527 | |
528 timbren = cat(1, timbren, feature(i).data.timbre(:).means); | |
529 end | |
530 end | |
531 | |
532 % --- | |
533 % construct resulting feature vector out of features | |
534 % --- | |
535 vec = []; | |
536 info = {}; | |
537 if feature(i).my_params.nchromas > 0 | |
538 | |
539 info{numel(vec)+ 1} = 'chroma'; | |
540 vec = cat(1, vec, feature(i).data.chroma(:).means); | |
541 | |
542 info{numel(vec)+ 1} = 'chroma weights'; | |
543 vec = cat(1, vec, [feature(i).data.chroma(:).means_weight]'); | |
544 | |
545 % --- | |
546 % NORMALISE Chroma variance | |
547 % --- | |
548 if feature(i).my_params.chroma_var >= 1 | |
549 | |
550 info{numel(vec)+ 1} = 'chroma variance'; | |
551 | |
552 % normalise this pack of variance vectors | |
553 tmp_var = mapminmax('apply', [feature(i).data.chroma(:).vars],... | |
554 feature(i).common.post_normf.chroma_var); | |
555 | |
556 % concatenate normalised data to vector | |
557 for vari = 1:size(tmp_var,2) | |
558 | |
559 vec = cat(1, vec, tmp_var(:, vari)); | |
560 end | |
561 end | |
562 end | |
563 | |
564 | |
565 if feature(i).my_params.ntimbres > 0 | |
566 | |
567 info{numel(vec)+ 1} = 'timbre'; | |
568 vec = cat(1, vec, timbren); | |
569 | |
570 info{numel(vec)+ 1} = 'timbre weights'; | |
571 vec = cat(1, vec, [feature(i).data.timbre(:).means_weight]'); | |
572 | |
573 % --- | |
574 % NORMALISE timbre variance | |
575 % --- | |
576 if feature(i).my_params.timbre_var >= 1 | |
577 | |
578 info{numel(vec)+ 1} = 'timbre variance'; | |
579 | |
580 % normalise this pack of variance vectors | |
581 tmp_var = mapminmax('apply', [feature(i).data.timbre(:).vars],... | |
582 feature(i).common.post_normf.timbre_var); | |
583 | |
584 % concatenate normalised data to vector | |
585 for vari = 1:size(tmp_var,2) | |
586 | |
587 vec = cat(1, vec, tmp_var(:, vari)); | |
588 end | |
589 end | |
590 end | |
591 | |
592 if feature(i).my_params.nrhythms > 0 | |
593 | |
594 info{numel(vec)+ 1} = 'rhythm 8'; | |
595 vec = cat(1, vec, feature(i).data.rhythm.acorr8); | |
596 | |
597 info{numel(vec)+ 1} = 'int 8'; | |
598 vec = cat(1, vec, feature(i).data.rhythm.interval8); | |
599 | |
600 if feature(i).my_params.nrhythms >= 2 | |
601 | |
602 info{numel(vec)+ 1} = 'rhythm 16'; | |
603 vec = cat(1, vec, feature(i).data.rhythm.acorr16); | |
604 | |
605 info{numel(vec)+ 1} = 'int 16'; | |
606 vec = cat(1, vec, feature(i).data.rhythm.interval16); | |
607 end | |
608 end | |
609 | |
610 feature(i).data.final.vector = vec; | |
611 feature(i).data.final.dim = numel(feature(i).data.final.vector); | |
612 | |
613 % fill up info struct and append to feature | |
614 | |
615 info(end+1: feature(i).data.final.dim) = ... | |
616 cell(feature(i).data.final.dim - numel(info),1); | |
617 | |
618 feature(i).data.final.vector_info.labels = info; | |
619 end | |
620 | |
621 % --- | |
622 % TODO: Maybe delete more basic features again at this point? | |
623 % --- | |
624 end | |
625 | |
626 % --- | |
627 % destructor: do we really want to remove this | |
628 % from the database? No, but | |
629 % TODO: create marker for unused objects in db, and a cleanup | |
630 % function | |
631 % --- | |
632 function delete(feature) | |
633 | |
634 end | |
635 | |
636 | |
637 function visualise(feature) | |
638 % --- | |
639 % plots the different data types collected in this feature | |
640 % --- | |
641 for i = 1:numel(feature) | |
642 clip = feature(i).data.info.owner; | |
643 | |
644 % display raw features | |
645 if isa(clip, 'CASIMIRClip') | |
646 baseclip = clip.child_clip(); | |
647 else | |
648 baseclip = clip; | |
649 end | |
650 if isa(baseclip, 'MTTClip') | |
651 rawf = baseclip.audio_features_raw(); | |
652 elseif isa(baseclip, 'MSDClip') | |
653 rawf = baseclip.features('MSDAudioFeatureRAW'); | |
654 end | |
655 | |
656 % --- | |
657 % @todo: implement MSD feature visualisation | |
658 % --- | |
659 [a1, a2, a3] = rawf.visualise(); | |
660 | |
661 % --- | |
662 % Display chroma features | |
663 % --- | |
664 if isfield(feature(i).data, 'chroma') | |
665 | |
666 chroma_labels = {'c', 'c#', 'd','d#', 'e', 'f','f#', 'g','g#', 'a', 'a#', 'h'}; | |
667 mode_labels = {'minor', 'major'}; | |
668 | |
669 % change labels to reflect detected mode | |
670 chroma_labels{rawf.data.key + 1} = ... | |
671 sprintf('(%s) %s',mode_labels{rawf.data.mode + 1}, chroma_labels{rawf.data.key + 1}); | |
672 | |
673 % transpose labels and data | |
674 chroma_labels = circshift(chroma_labels, [0, feature(i).data.chroma(1).shift]); | |
675 chromar = circshift([rawf.data.segments_pitches], [feature(i).data.chroma(1).shift, 0]); | |
676 | |
677 % image transposed chromas again | |
678 segments = [rawf.data.segments_start]; | |
679 segments(end) = rawf.data.duration; | |
680 | |
681 hold(a1); | |
682 uimagesc(segments, 0:11, chromar, 'Parent', a1); | |
683 set(a1,'YTick',[0:11], 'YTickLabel', chroma_labels); | |
684 | |
685 % enlarge plot and plot new data after the old ones | |
686 ax = axis(a1); | |
687 ax(2) = ax(2) + 2*feature(i).my_params.nchromas + 0.5; | |
688 axis(a1, 'xy'); | |
689 axis(a1, ax); | |
690 | |
691 imagesc(rawf.data.duration + (1:feature(i).my_params.nchromas), (-1:11), ... | |
692 [ feature(i).data.chroma(:).means_weight; feature(i).data.chroma(:).means],... | |
693 'Parent', a1); | |
694 % variance calculated? | |
695 if isfield(feature(i).data.chroma, 'vars') | |
696 | |
697 imagesc(rawf.data.duration + feature(i).my_params.nchromas + (1:feature(i).my_params.nchromas), (-1:11), ... | |
698 [feature(i).data.chroma(:).vars],... | |
699 'Parent', a1); | |
700 end | |
701 end | |
702 | |
703 % --- | |
704 % Display timbre features | |
705 % --- | |
706 if isfield(feature(i).data, 'timbre') | |
707 | |
708 % enlarge plot and plot new data after the old ones | |
709 hold(a2); | |
710 ax = axis(a2); | |
711 ax(2) = ax(2) + 2*feature(i).my_params.ntimbres + 0.5; | |
712 | |
713 axis(a2, ax); | |
714 imagesc(rawf.data.duration + (1:feature(i).my_params.ntimbres), (-1:11), ... | |
715 [ feature(i).data.timbre(:).means_weight; feature(i).data.timbre(:).means],... | |
716 'Parent', a2); | |
717 if isfield(feature(i).data.timbre, 'vars') | |
718 | |
719 imagesc(rawf.data.duration + feature(i).my_params.ntimbres + (1:feature(i).my_params.ntimbres), (-1:11), ... | |
720 [feature(i).data.timbre(:).vars],... | |
721 'Parent', a1); | |
722 end | |
723 end | |
724 | |
725 % --- | |
726 % Display rhythm features | |
727 % --- | |
728 if isfield(feature(i).data, 'rhythm') | |
729 % data.rhythm.interval | |
730 % get timecode | |
731 eightt = feature(i).data.rhythm.energy8_time; | |
732 sixt = feature(i).data.rhythm.energy16_time; | |
733 | |
734 hold(a3); | |
735 % plot sixteens acorr and energy | |
736 plot(sixt, feature(i).data.rhythm.energy16, 'bx') | |
737 | |
738 plot(sixt, feature(i).data.rhythm.acorr16, 'b') | |
739 | |
740 % plot eights acorr and energy | |
741 plot(eightt, feature(i).data.rhythm.energy8, 'rx') | |
742 | |
743 plot(eightt, feature(i).data.rhythm.acorr8, 'r') | |
744 | |
745 % broaden view by fixed 4 seconds | |
746 ax = axis(a3); | |
747 axis(a3, [max(0, eightt(1)-( eightt(end) - eightt(1) + 4 )) ... | |
748 min(rawf.data.duration, eightt(end) +4) ... | |
749 ax(3:4)]); | |
750 end | |
751 end | |
752 end | |
753 end | |
754 | |
755 | |
756 methods (Hidden = true) | |
757 | |
758 function [env, time] = energy_envelope(feature, clip) | |
759 % extracts the envelope of energy for the given clip | |
760 | |
761 % --- | |
762 % TODO: externalise envelope etc in external audio features | |
763 % --- | |
764 | |
765 [null, src] = evalc('miraudio(clip.mp3file_full())'); | |
766 [null, env] = evalc('mirenvelope(src, ''Sampling'', feature.my_params.energy_sr)'); | |
767 | |
768 time = get(env,'Time'); | |
769 time = time{1}{1}; | |
770 env = mirgetdata(env); | |
771 end | |
772 | |
773 function [acorr, base_sig, base_t] = beat_histogram(feature, startt, interval, signal, signal_t) | |
774 % acorr = beat_histogram(feature, startt, interval, signal, time) | |
775 % | |
776 % compute correlation for beats of specified length in energy curve | |
777 | |
778 % get corresponding energy values | |
779 dt = signal_t(2) - signal_t(1); | |
780 base_t = startt:interval:(startt + (feature.my_params.nints*2-1) * interval); | |
781 base_sig = signal( min( numel(signal), max(1,round((base_t - signal_t(1))/dt)))); | |
782 | |
783 % normalise energy | |
784 acbase_sig = base_sig./max(base_sig); | |
785 | |
786 % calculate their cyclic autocorrelation | |
787 acorr = circshift(xcorr(acbase_sig,acbase_sig(1:end/2)),... | |
788 [numel(acbase_sig) 0]); | |
789 | |
790 % cut acorr to relevant points, normalise and square | |
791 acorr = (acorr(1:feature.my_params.nints)./feature.my_params.nints).^2; | |
792 | |
793 % --- | |
794 % NOTE: we normalise the autocorrelation locally, to compare the | |
795 % (rhythmic) shape | |
796 % --- | |
797 if feature.my_params.norm_acorr; | |
798 | |
799 acorr = acorr - min(acorr); | |
800 acorr = acorr/max(acorr); | |
801 end | |
802 end | |
803 end | |
804 | |
805 methods(Static) | |
806 | |
807 function timbre = norm_timbre(in, normfs) | |
808 % returns normed timbre data | |
809 | |
810 % --- | |
811 % individually scale the data using | |
812 % the dimensions factors | |
813 % --- | |
814 timbre = zeros(size(in)); | |
815 for i = 1:size(in,2) | |
816 | |
817 timbre(:,i) = normfs .* in(:,i); | |
818 end | |
819 | |
820 % shift to positive values | |
821 timbre = (1 + timbre) /2; | |
822 | |
823 % clip features to [0,1] | |
824 timbre = min(1, max(timbre, 0)); | |
825 end | |
826 | |
827 % --- | |
828 % returns parameter md5 hash for comparison | |
829 % --- | |
830 end | |
831 | |
832 end |