comparison core/magnatagatune/MTTAudioFeatureHMM.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 classdef MTTAudioFeatureHMM < MTTAudioFeature & handle
2 % ---
3 % the MTTAudioFeatureBasicSm Class contains
4 % a basic summary of chroma, mfcc and tempo features
5 % a few common chroma and mfcc vectors are concatenated
6 % along with some clip-wide variance
7 % a metric / rhythm fingerprint is added
8 %
9 % The usual workflow for these features consists of three steps
10 % 1. extract: extracts the basic single-file dependent features
11 % 2. define_global_transform: calculates the global feature
12 % transformation parameters
13 % 3. finalise: applies the common transformations to a specific feature
14 % ---
15
16 properties(Constant = true)
17
18 % svn hook
19 my_revision = str2double(substr('$Rev: 2332 $', 5, -1));
20 end
21
22 properties
23 % ---
24 % Set default parameters
25 % ---
26 my_params = struct(...
27 'nstates', 4 ... % predefined number of states
28 );
29 end
30
31 % ---
32 % member functions
33 % ---
34 methods
35
36 % ---
37 % constructor: pointer to feature in database
38 % ---
39 function feature = MTTAudioFeatureHMM(varargin)
40
41 feature = feature@MTTAudioFeature(varargin{:});
42
43 end
44 % ---
45 % extract feature data from raw audio features
46 % ---
47 function data = extract(feature, clip)
48 % ---
49 % get Basic Summary audio features. this includes possible
50 % local normalisations
51 % ---
52
53 global globalvars;
54
55 % ---
56 % get casimir child clip if available
57 % ---
58 if isa(clip, 'CASIMIRClip')
59 baseclip = clip.child_clip();
60 else
61 baseclip = clip;
62 end
63 if isa(baseclip, 'MTTClip')
64 rawf = baseclip.audio_features_raw();
65 elseif isa(baseclip, 'MSDClip')
66 rawf = baseclip.features('MSDAudioFeatureRAW');
67 end
68
69 % ---
70 % now extract the features
71 % first step: chroma clustering
72 % ---
73 weights = [rawf.data.segments_duration];
74
75 % normalise weights
76 weights = weights / rawf.data.duration;
77
78 % get the chroma features
79 chroma = [rawf.data.segments_pitches]';
80
81 % ---
82 % TODO: train hmm
83 % ---
84
85
86 % save hmm into data variable
87 data.mu = mu1
88 data.transmat1 = mu1
89
90
91
92
93
94
95 % prepare field for final features
96 data.final.vector = [];
97 data.final.vector_info = struct();
98 data.final.dim = 0;
99
100 % save info data
101 data.info.type = 'MTTAudioFeatureBasicSm';
102 data.info.owner = clip;
103 data.info.owner_id = clip.id;
104 data.info.creatorrev = feature.my_revision;
105
106 % save parameters
107 data.info.params = feature.my_params;
108 end
109
110 function define_global_transform(features)
111 % calculate and set normalization factors from the group of
112 % input features. These features will be set for the full database
113
114
115
116 end
117
118
119 function finalise(feature)
120 % applies a final transformation and
121 % collects the information of this feature within a single vector
122 % see info for types in specific dimensions
123
124 for i = 1:numel(feature)
125
126 % check for neccesary parameters
127 if isempty(feature(i).my_db.commondb)
128
129 error('Define the global transformation first')
130 return;
131 end
132
133 if feature(1).my_params.ntimbres > 0
134 % ---
135 % normalise features
136 % ---
137 % norm timbre features if neccesary
138 timbren = [];
139 if feature(i).my_params.norm_timbres
140 for j = 1:numel(feature(i).data.timbre)
141
142 timbren = cat(1, timbren, ...
143 MTTAudioFeatureBasicSm.norm_timbre...
144 (feature(i).data.timbre(j).means, feature(i).my_db.commondb.post_normf.timbre));
145 end
146 else
147
148 timbren = cat(1, timbren, feature(i).data.timbre(:).means);
149 end
150 end
151
152 % ---
153 % construct resulting feature vector out of features
154 % ---
155 vec = [];
156 info = {};
157 if feature(i).my_params.nchromas > 0
158
159 info{numel(vec)+ 1} = 'chroma';
160 vec = cat(1, vec, feature(i).data.chroma(:).means);
161
162 info{numel(vec)+ 1} = 'chroma weights';
163 vec = cat(1, vec, [feature(i).data.chroma(:).means_weight]');
164
165 % ---
166 % NORMALISE Chroma variance
167 % ---
168 if feature(i).my_params.chroma_var >= 1
169
170 info{numel(vec)+ 1} = 'chroma variance';
171
172 % normalise this pack of variance vectors
173 tmp_var = mapminmax('apply', [feature(i).data.chroma(:).vars],...
174 feature(i).common.post_normf.chroma_var);
175
176 % concatenate normalised data to vector
177 for vari = 1:size(tmp_var,2)
178
179 vec = cat(1, vec, tmp_var(:, vari));
180 end
181 end
182 end
183
184
185 if feature(i).my_params.ntimbres > 0
186
187 info{numel(vec)+ 1} = 'timbre';
188 vec = cat(1, vec, timbren);
189
190 info{numel(vec)+ 1} = 'timbre weights';
191 vec = cat(1, vec, [feature(i).data.timbre(:).means_weight]');
192
193 % ---
194 % NORMALISE timbre variance
195 % ---
196 if feature(i).my_params.timbre_var >= 1
197
198 info{numel(vec)+ 1} = 'timbre variance';
199
200 % normalise this pack of variance vectors
201 tmp_var = mapminmax('apply', [feature(i).data.timbre(:).vars],...
202 feature(i).common.post_normf.timbre_var);
203
204 % concatenate normalised data to vector
205 for vari = 1:size(tmp_var,2)
206
207 vec = cat(1, vec, tmp_var(:, vari));
208 end
209 end
210 end
211
212 if feature(i).my_params.nrhythms > 0
213
214 info{numel(vec)+ 1} = 'rhythm 8';
215 vec = cat(1, vec, feature(i).data.rhythm.acorr8);
216
217 info{numel(vec)+ 1} = 'int 8';
218 vec = cat(1, vec, feature(i).data.rhythm.interval8);
219
220 if feature(i).my_params.nrhythms >= 2
221
222 info{numel(vec)+ 1} = 'rhythm 16';
223 vec = cat(1, vec, feature(i).data.rhythm.acorr16);
224
225 info{numel(vec)+ 1} = 'int 16';
226 vec = cat(1, vec, feature(i).data.rhythm.interval16);
227 end
228 end
229
230 feature(i).data.final.vector = vec;
231 feature(i).data.final.dim = numel(feature(i).data.final.vector);
232
233 % fill up info struct and append to feature
234
235 info(end+1: feature(i).data.final.dim) = ...
236 cell(feature(i).data.final.dim - numel(info),1);
237
238 feature(i).data.final.vector_info.labels = info;
239 end
240
241 % ---
242 % TODO: Maybe delete more basic features again at this point?
243 % ---
244 end
245
246 % ---
247 % destructor: do we really want to remove this
248 % from the database? No, but
249 % TODO: create marker for unused objects in db, and a cleanup
250 % function
251 % ---
252 function delete(feature)
253
254 end
255
256
257 function visualise(feature)
258 % ---
259 % plots the different data types collected in this feature
260 % ---
261 for i = 1:numel(feature)
262 clip = feature(i).data.info.owner;
263
264 % display raw features
265 if isa(clip, 'CASIMIRClip')
266 baseclip = clip.child_clip();
267 else
268 baseclip = clip;
269 end
270 if isa(baseclip, 'MTTClip')
271 rawf = baseclip.audio_features_raw();
272 elseif isa(baseclip, 'MSDClip')
273 rawf = baseclip.features('MSDAudioFeatureRAW');
274 end
275
276 % ---
277 % @todo: implement MSD feature visualisation
278 % ---
279 [a1, a2, a3] = rawf.visualise();
280
281 % ---
282 % Display chroma features
283 % ---
284 if isfield(feature(i).data, 'chroma')
285
286 chroma_labels = {'c', 'c#', 'd','d#', 'e', 'f','f#', 'g','g#', 'a', 'a#', 'h'};
287 mode_labels = {'minor', 'major'};
288
289 % change labels to reflect detected mode
290 chroma_labels{rawf.data.key + 1} = ...
291 sprintf('(%s) %s',mode_labels{rawf.data.mode + 1}, chroma_labels{rawf.data.key + 1});
292
293 % transpose labels and data
294 chroma_labels = circshift(chroma_labels, [0, feature(i).data.chroma(1).shift]);
295 chromar = circshift([rawf.data.segments_pitches], [feature(i).data.chroma(1).shift, 0]);
296
297 % image transposed chromas again
298 segments = [rawf.data.segments_start];
299 segments(end) = rawf.data.duration;
300
301 hold(a1);
302 uimagesc(segments, 0:11, chromar, 'Parent', a1);
303 set(a1,'YTick',[0:11], 'YTickLabel', chroma_labels);
304
305 % enlarge plot and plot new data after the old ones
306 ax = axis(a1);
307 ax(2) = ax(2) + 2*feature(i).my_params.nchromas + 0.5;
308 axis(a1, 'xy');
309 axis(a1, ax);
310
311 imagesc(rawf.data.duration + (1:feature(i).my_params.nchromas), (-1:11), ...
312 [ feature(i).data.chroma(:).means_weight; feature(i).data.chroma(:).means],...
313 'Parent', a1);
314 % variance calculated?
315 if isfield(feature(i).data.chroma, 'vars')
316
317 imagesc(rawf.data.duration + feature(i).my_params.nchromas + (1:feature(i).my_params.nchromas), (-1:11), ...
318 [feature(i).data.chroma(:).vars],...
319 'Parent', a1);
320 end
321 end
322
323 % ---
324 % Display timbre features
325 % ---
326 if isfield(feature(i).data, 'timbre')
327
328 % enlarge plot and plot new data after the old ones
329 hold(a2);
330 ax = axis(a2);
331 ax(2) = ax(2) + 2*feature(i).my_params.ntimbres + 0.5;
332
333 axis(a2, ax);
334 imagesc(rawf.data.duration + (1:feature(i).my_params.ntimbres), (-1:11), ...
335 [ feature(i).data.timbre(:).means_weight; feature(i).data.timbre(:).means],...
336 'Parent', a2);
337 if isfield(feature(i).data.timbre, 'vars')
338
339 imagesc(rawf.data.duration + feature(i).my_params.ntimbres + (1:feature(i).my_params.ntimbres), (-1:11), ...
340 [feature(i).data.timbre(:).vars],...
341 'Parent', a1);
342 end
343 end
344
345 % ---
346 % Display rhythm features
347 % ---
348 if isfield(feature(i).data, 'rhythm')
349 % data.rhythm.interval
350 % get timecode
351 eightt = feature(i).data.rhythm.energy8_time;
352 sixt = feature(i).data.rhythm.energy16_time;
353
354 hold(a3);
355 % plot sixteens acorr and energy
356 plot(sixt, feature(i).data.rhythm.energy16, 'bx')
357
358 plot(sixt, feature(i).data.rhythm.acorr16, 'b')
359
360 % plot eights acorr and energy
361 plot(eightt, feature(i).data.rhythm.energy8, 'rx')
362
363 plot(eightt, feature(i).data.rhythm.acorr8, 'r')
364
365 % broaden view by fixed 4 seconds
366 ax = axis(a3);
367 axis(a3, [max(0, eightt(1)-( eightt(end) - eightt(1) + 4 )) ...
368 min(rawf.data.duration, eightt(end) +4) ...
369 ax(3:4)]);
370 end
371 end
372 end
373 end
374
375
376 methods (Hidden = true)
377
378 function [env, time] = energy_envelope(feature, clip)
379 % extracts the envelope of energy for the given clip
380
381 % ---
382 % TODO: externalise envelope etc in external audio features
383 % ---
384
385 [null, src] = evalc('miraudio(clip.mp3file_full())');
386 [null, env] = evalc('mirenvelope(src, ''Sampling'', feature.my_params.energy_sr)');
387
388 time = get(env,'Time');
389 time = time{1}{1};
390 env = mirgetdata(env);
391 end
392
393 function [acorr, base_sig, base_t] = beat_histogram(feature, startt, interval, signal, signal_t)
394 % acorr = beat_histogram(feature, startt, interval, signal, time)
395 %
396 % compute correlation for beats of specified length in energy curve
397
398 % get corresponding energy values
399 dt = signal_t(2) - signal_t(1);
400 base_t = startt:interval:(startt + (feature.my_params.nints*2-1) * interval);
401 base_sig = signal( min( numel(signal), max(1,round((base_t - signal_t(1))/dt))));
402
403 % normalise energy
404 acbase_sig = base_sig./max(base_sig);
405
406 % calculate their cyclic autocorrelation
407 acorr = circshift(xcorr(acbase_sig,acbase_sig(1:end/2)),...
408 [numel(acbase_sig) 0]);
409
410 % cut acorr to relevant points, normalise and square
411 acorr = (acorr(1:feature.my_params.nints)./feature.my_params.nints).^2;
412
413 % ---
414 % NOTE: we normalise the autocorrelation locally, to compare the
415 % (rhythmic) shape
416 % ---
417 if feature.my_params.norm_acorr;
418
419 acorr = acorr - min(acorr);
420 acorr = acorr/max(acorr);
421 end
422 end
423 end
424
425 methods(Static)
426
427 function timbre = norm_timbre(in, normfs)
428 % returns normed timbre data
429
430 % ---
431 % individually scale the data using
432 % the dimensions factors
433 % ---
434 timbre = zeros(size(in));
435 for i = 1:size(in,2)
436
437 timbre(:,i) = normfs .* in(:,i);
438 end
439
440 % shift to positive values
441 timbre = (1 + timbre) /2;
442
443 % clip features to [0,1]
444 timbre = min(1, max(timbre, 0));
445 end
446
447 % ---
448 % returns parameter md5 hash for comparison
449 % ---
450 end
451
452 end