Mercurial > hg > camir-aes2014
comparison core/magnatagatune/MTTAudioFeatureHMM.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 classdef MTTAudioFeatureHMM < MTTAudioFeature & handle | |
2 % --- | |
3 % the MTTAudioFeatureBasicSm Class contains | |
4 % a basic summary of chroma, mfcc and tempo features | |
5 % a few common chroma and mfcc vectors are concatenated | |
6 % along with some clip-wide variance | |
7 % a metric / rhythm fingerprint is added | |
8 % | |
9 % The usual workflow for these features consists of three steps | |
10 % 1. extract: extracts the basic single-file dependent features | |
11 % 2. define_global_transform: calculates the global feature | |
12 % transformation parameters | |
13 % 3. finalise: applies the common transformations to a specific feature | |
14 % --- | |
15 | |
16 properties(Constant = true) | |
17 | |
18 % svn hook | |
19 my_revision = str2double(substr('$Rev: 2332 $', 5, -1)); | |
20 end | |
21 | |
22 properties | |
23 % --- | |
24 % Set default parameters | |
25 % --- | |
26 my_params = struct(... | |
27 'nstates', 4 ... % predefined number of states | |
28 ); | |
29 end | |
30 | |
31 % --- | |
32 % member functions | |
33 % --- | |
34 methods | |
35 | |
36 % --- | |
37 % constructor: pointer to feature in database | |
38 % --- | |
39 function feature = MTTAudioFeatureHMM(varargin) | |
40 | |
41 feature = feature@MTTAudioFeature(varargin{:}); | |
42 | |
43 end | |
44 % --- | |
45 % extract feature data from raw audio features | |
46 % --- | |
47 function data = extract(feature, clip) | |
48 % --- | |
49 % get Basic Summary audio features. this includes possible | |
50 % local normalisations | |
51 % --- | |
52 | |
53 global globalvars; | |
54 | |
55 % --- | |
56 % get casimir child clip if available | |
57 % --- | |
58 if isa(clip, 'CASIMIRClip') | |
59 baseclip = clip.child_clip(); | |
60 else | |
61 baseclip = clip; | |
62 end | |
63 if isa(baseclip, 'MTTClip') | |
64 rawf = baseclip.audio_features_raw(); | |
65 elseif isa(baseclip, 'MSDClip') | |
66 rawf = baseclip.features('MSDAudioFeatureRAW'); | |
67 end | |
68 | |
69 % --- | |
70 % now extract the features | |
71 % first step: chroma clustering | |
72 % --- | |
73 weights = [rawf.data.segments_duration]; | |
74 | |
75 % normalise weights | |
76 weights = weights / rawf.data.duration; | |
77 | |
78 % get the chroma features | |
79 chroma = [rawf.data.segments_pitches]'; | |
80 | |
81 % --- | |
82 % TODO: train hmm | |
83 % --- | |
84 | |
85 | |
86 % save hmm into data variable | |
87 data.mu = mu1 | |
88 data.transmat1 = mu1 | |
89 | |
90 | |
91 | |
92 | |
93 | |
94 | |
95 % prepare field for final features | |
96 data.final.vector = []; | |
97 data.final.vector_info = struct(); | |
98 data.final.dim = 0; | |
99 | |
100 % save info data | |
101 data.info.type = 'MTTAudioFeatureBasicSm'; | |
102 data.info.owner = clip; | |
103 data.info.owner_id = clip.id; | |
104 data.info.creatorrev = feature.my_revision; | |
105 | |
106 % save parameters | |
107 data.info.params = feature.my_params; | |
108 end | |
109 | |
110 function define_global_transform(features) | |
111 % calculate and set normalization factors from the group of | |
112 % input features. These features will be set for the full database | |
113 | |
114 | |
115 | |
116 end | |
117 | |
118 | |
119 function finalise(feature) | |
120 % applies a final transformation and | |
121 % collects the information of this feature within a single vector | |
122 % see info for types in specific dimensions | |
123 | |
124 for i = 1:numel(feature) | |
125 | |
126 % check for neccesary parameters | |
127 if isempty(feature(i).my_db.commondb) | |
128 | |
129 error('Define the global transformation first') | |
130 return; | |
131 end | |
132 | |
133 if feature(1).my_params.ntimbres > 0 | |
134 % --- | |
135 % normalise features | |
136 % --- | |
137 % norm timbre features if neccesary | |
138 timbren = []; | |
139 if feature(i).my_params.norm_timbres | |
140 for j = 1:numel(feature(i).data.timbre) | |
141 | |
142 timbren = cat(1, timbren, ... | |
143 MTTAudioFeatureBasicSm.norm_timbre... | |
144 (feature(i).data.timbre(j).means, feature(i).my_db.commondb.post_normf.timbre)); | |
145 end | |
146 else | |
147 | |
148 timbren = cat(1, timbren, feature(i).data.timbre(:).means); | |
149 end | |
150 end | |
151 | |
152 % --- | |
153 % construct resulting feature vector out of features | |
154 % --- | |
155 vec = []; | |
156 info = {}; | |
157 if feature(i).my_params.nchromas > 0 | |
158 | |
159 info{numel(vec)+ 1} = 'chroma'; | |
160 vec = cat(1, vec, feature(i).data.chroma(:).means); | |
161 | |
162 info{numel(vec)+ 1} = 'chroma weights'; | |
163 vec = cat(1, vec, [feature(i).data.chroma(:).means_weight]'); | |
164 | |
165 % --- | |
166 % NORMALISE Chroma variance | |
167 % --- | |
168 if feature(i).my_params.chroma_var >= 1 | |
169 | |
170 info{numel(vec)+ 1} = 'chroma variance'; | |
171 | |
172 % normalise this pack of variance vectors | |
173 tmp_var = mapminmax('apply', [feature(i).data.chroma(:).vars],... | |
174 feature(i).common.post_normf.chroma_var); | |
175 | |
176 % concatenate normalised data to vector | |
177 for vari = 1:size(tmp_var,2) | |
178 | |
179 vec = cat(1, vec, tmp_var(:, vari)); | |
180 end | |
181 end | |
182 end | |
183 | |
184 | |
185 if feature(i).my_params.ntimbres > 0 | |
186 | |
187 info{numel(vec)+ 1} = 'timbre'; | |
188 vec = cat(1, vec, timbren); | |
189 | |
190 info{numel(vec)+ 1} = 'timbre weights'; | |
191 vec = cat(1, vec, [feature(i).data.timbre(:).means_weight]'); | |
192 | |
193 % --- | |
194 % NORMALISE timbre variance | |
195 % --- | |
196 if feature(i).my_params.timbre_var >= 1 | |
197 | |
198 info{numel(vec)+ 1} = 'timbre variance'; | |
199 | |
200 % normalise this pack of variance vectors | |
201 tmp_var = mapminmax('apply', [feature(i).data.timbre(:).vars],... | |
202 feature(i).common.post_normf.timbre_var); | |
203 | |
204 % concatenate normalised data to vector | |
205 for vari = 1:size(tmp_var,2) | |
206 | |
207 vec = cat(1, vec, tmp_var(:, vari)); | |
208 end | |
209 end | |
210 end | |
211 | |
212 if feature(i).my_params.nrhythms > 0 | |
213 | |
214 info{numel(vec)+ 1} = 'rhythm 8'; | |
215 vec = cat(1, vec, feature(i).data.rhythm.acorr8); | |
216 | |
217 info{numel(vec)+ 1} = 'int 8'; | |
218 vec = cat(1, vec, feature(i).data.rhythm.interval8); | |
219 | |
220 if feature(i).my_params.nrhythms >= 2 | |
221 | |
222 info{numel(vec)+ 1} = 'rhythm 16'; | |
223 vec = cat(1, vec, feature(i).data.rhythm.acorr16); | |
224 | |
225 info{numel(vec)+ 1} = 'int 16'; | |
226 vec = cat(1, vec, feature(i).data.rhythm.interval16); | |
227 end | |
228 end | |
229 | |
230 feature(i).data.final.vector = vec; | |
231 feature(i).data.final.dim = numel(feature(i).data.final.vector); | |
232 | |
233 % fill up info struct and append to feature | |
234 | |
235 info(end+1: feature(i).data.final.dim) = ... | |
236 cell(feature(i).data.final.dim - numel(info),1); | |
237 | |
238 feature(i).data.final.vector_info.labels = info; | |
239 end | |
240 | |
241 % --- | |
242 % TODO: Maybe delete more basic features again at this point? | |
243 % --- | |
244 end | |
245 | |
246 % --- | |
247 % destructor: do we really want to remove this | |
248 % from the database? No, but | |
249 % TODO: create marker for unused objects in db, and a cleanup | |
250 % function | |
251 % --- | |
252 function delete(feature) | |
253 | |
254 end | |
255 | |
256 | |
257 function visualise(feature) | |
258 % --- | |
259 % plots the different data types collected in this feature | |
260 % --- | |
261 for i = 1:numel(feature) | |
262 clip = feature(i).data.info.owner; | |
263 | |
264 % display raw features | |
265 if isa(clip, 'CASIMIRClip') | |
266 baseclip = clip.child_clip(); | |
267 else | |
268 baseclip = clip; | |
269 end | |
270 if isa(baseclip, 'MTTClip') | |
271 rawf = baseclip.audio_features_raw(); | |
272 elseif isa(baseclip, 'MSDClip') | |
273 rawf = baseclip.features('MSDAudioFeatureRAW'); | |
274 end | |
275 | |
276 % --- | |
277 % @todo: implement MSD feature visualisation | |
278 % --- | |
279 [a1, a2, a3] = rawf.visualise(); | |
280 | |
281 % --- | |
282 % Display chroma features | |
283 % --- | |
284 if isfield(feature(i).data, 'chroma') | |
285 | |
286 chroma_labels = {'c', 'c#', 'd','d#', 'e', 'f','f#', 'g','g#', 'a', 'a#', 'h'}; | |
287 mode_labels = {'minor', 'major'}; | |
288 | |
289 % change labels to reflect detected mode | |
290 chroma_labels{rawf.data.key + 1} = ... | |
291 sprintf('(%s) %s',mode_labels{rawf.data.mode + 1}, chroma_labels{rawf.data.key + 1}); | |
292 | |
293 % transpose labels and data | |
294 chroma_labels = circshift(chroma_labels, [0, feature(i).data.chroma(1).shift]); | |
295 chromar = circshift([rawf.data.segments_pitches], [feature(i).data.chroma(1).shift, 0]); | |
296 | |
297 % image transposed chromas again | |
298 segments = [rawf.data.segments_start]; | |
299 segments(end) = rawf.data.duration; | |
300 | |
301 hold(a1); | |
302 uimagesc(segments, 0:11, chromar, 'Parent', a1); | |
303 set(a1,'YTick',[0:11], 'YTickLabel', chroma_labels); | |
304 | |
305 % enlarge plot and plot new data after the old ones | |
306 ax = axis(a1); | |
307 ax(2) = ax(2) + 2*feature(i).my_params.nchromas + 0.5; | |
308 axis(a1, 'xy'); | |
309 axis(a1, ax); | |
310 | |
311 imagesc(rawf.data.duration + (1:feature(i).my_params.nchromas), (-1:11), ... | |
312 [ feature(i).data.chroma(:).means_weight; feature(i).data.chroma(:).means],... | |
313 'Parent', a1); | |
314 % variance calculated? | |
315 if isfield(feature(i).data.chroma, 'vars') | |
316 | |
317 imagesc(rawf.data.duration + feature(i).my_params.nchromas + (1:feature(i).my_params.nchromas), (-1:11), ... | |
318 [feature(i).data.chroma(:).vars],... | |
319 'Parent', a1); | |
320 end | |
321 end | |
322 | |
323 % --- | |
324 % Display timbre features | |
325 % --- | |
326 if isfield(feature(i).data, 'timbre') | |
327 | |
328 % enlarge plot and plot new data after the old ones | |
329 hold(a2); | |
330 ax = axis(a2); | |
331 ax(2) = ax(2) + 2*feature(i).my_params.ntimbres + 0.5; | |
332 | |
333 axis(a2, ax); | |
334 imagesc(rawf.data.duration + (1:feature(i).my_params.ntimbres), (-1:11), ... | |
335 [ feature(i).data.timbre(:).means_weight; feature(i).data.timbre(:).means],... | |
336 'Parent', a2); | |
337 if isfield(feature(i).data.timbre, 'vars') | |
338 | |
339 imagesc(rawf.data.duration + feature(i).my_params.ntimbres + (1:feature(i).my_params.ntimbres), (-1:11), ... | |
340 [feature(i).data.timbre(:).vars],... | |
341 'Parent', a1); | |
342 end | |
343 end | |
344 | |
345 % --- | |
346 % Display rhythm features | |
347 % --- | |
348 if isfield(feature(i).data, 'rhythm') | |
349 % data.rhythm.interval | |
350 % get timecode | |
351 eightt = feature(i).data.rhythm.energy8_time; | |
352 sixt = feature(i).data.rhythm.energy16_time; | |
353 | |
354 hold(a3); | |
355 % plot sixteens acorr and energy | |
356 plot(sixt, feature(i).data.rhythm.energy16, 'bx') | |
357 | |
358 plot(sixt, feature(i).data.rhythm.acorr16, 'b') | |
359 | |
360 % plot eights acorr and energy | |
361 plot(eightt, feature(i).data.rhythm.energy8, 'rx') | |
362 | |
363 plot(eightt, feature(i).data.rhythm.acorr8, 'r') | |
364 | |
365 % broaden view by fixed 4 seconds | |
366 ax = axis(a3); | |
367 axis(a3, [max(0, eightt(1)-( eightt(end) - eightt(1) + 4 )) ... | |
368 min(rawf.data.duration, eightt(end) +4) ... | |
369 ax(3:4)]); | |
370 end | |
371 end | |
372 end | |
373 end | |
374 | |
375 | |
376 methods (Hidden = true) | |
377 | |
378 function [env, time] = energy_envelope(feature, clip) | |
379 % extracts the envelope of energy for the given clip | |
380 | |
381 % --- | |
382 % TODO: externalise envelope etc in external audio features | |
383 % --- | |
384 | |
385 [null, src] = evalc('miraudio(clip.mp3file_full())'); | |
386 [null, env] = evalc('mirenvelope(src, ''Sampling'', feature.my_params.energy_sr)'); | |
387 | |
388 time = get(env,'Time'); | |
389 time = time{1}{1}; | |
390 env = mirgetdata(env); | |
391 end | |
392 | |
393 function [acorr, base_sig, base_t] = beat_histogram(feature, startt, interval, signal, signal_t) | |
394 % acorr = beat_histogram(feature, startt, interval, signal, time) | |
395 % | |
396 % compute correlation for beats of specified length in energy curve | |
397 | |
398 % get corresponding energy values | |
399 dt = signal_t(2) - signal_t(1); | |
400 base_t = startt:interval:(startt + (feature.my_params.nints*2-1) * interval); | |
401 base_sig = signal( min( numel(signal), max(1,round((base_t - signal_t(1))/dt)))); | |
402 | |
403 % normalise energy | |
404 acbase_sig = base_sig./max(base_sig); | |
405 | |
406 % calculate their cyclic autocorrelation | |
407 acorr = circshift(xcorr(acbase_sig,acbase_sig(1:end/2)),... | |
408 [numel(acbase_sig) 0]); | |
409 | |
410 % cut acorr to relevant points, normalise and square | |
411 acorr = (acorr(1:feature.my_params.nints)./feature.my_params.nints).^2; | |
412 | |
413 % --- | |
414 % NOTE: we normalise the autocorrelation locally, to compare the | |
415 % (rhythmic) shape | |
416 % --- | |
417 if feature.my_params.norm_acorr; | |
418 | |
419 acorr = acorr - min(acorr); | |
420 acorr = acorr/max(acorr); | |
421 end | |
422 end | |
423 end | |
424 | |
425 methods(Static) | |
426 | |
427 function timbre = norm_timbre(in, normfs) | |
428 % returns normed timbre data | |
429 | |
430 % --- | |
431 % individually scale the data using | |
432 % the dimensions factors | |
433 % --- | |
434 timbre = zeros(size(in)); | |
435 for i = 1:size(in,2) | |
436 | |
437 timbre(:,i) = normfs .* in(:,i); | |
438 end | |
439 | |
440 % shift to positive values | |
441 timbre = (1 + timbre) /2; | |
442 | |
443 % clip features to [0,1] | |
444 timbre = min(1, max(timbre, 0)); | |
445 end | |
446 | |
447 % --- | |
448 % returns parameter md5 hash for comparison | |
449 % --- | |
450 end | |
451 | |
452 end |