wolffd@0
|
1 classdef MTTAudioFeatureBasicSm < MTTAudioFeature & handle
|
wolffd@0
|
2 % ---
|
wolffd@0
|
3 % the MTTAudioFeatureBasicSm Class contains
|
wolffd@0
|
4 % a basic summary of chroma, mfcc and tempo features
|
wolffd@0
|
5 % a few common chroma and mfcc vectors are concatenated
|
wolffd@0
|
6 % along with some clip-wide variance
|
wolffd@0
|
7 % a metric / rhythm fingerprint is added
|
wolffd@0
|
8 %
|
wolffd@0
|
9 % The usual workflow for these features consists of three steps
|
wolffd@0
|
10 % 1. extract: extracts the basic single-file dependent features
|
wolffd@0
|
11 % 2. define_global_transform: calculates the global feature
|
wolffd@0
|
12 % transformation parameters
|
wolffd@0
|
13 % 3. finalise: applies the common transformations to a specific feature
|
wolffd@0
|
14 % ---
|
wolffd@0
|
15
|
wolffd@0
|
16 properties(Constant = true)
|
wolffd@0
|
17
|
wolffd@0
|
18 % svn hook
|
wolffd@0
|
19 my_revision = str2double(substr('$Rev$', 5, -1));
|
wolffd@0
|
20 end
|
wolffd@0
|
21
|
wolffd@0
|
22 properties
|
wolffd@0
|
23 % ---
|
wolffd@0
|
24 % Set default parameters
|
wolffd@0
|
25 % ---
|
wolffd@0
|
26 my_params = struct(...
|
wolffd@0
|
27 'nchromas', 4, ... % 4 chroma vectors
|
wolffd@0
|
28 'chroma_var', 0, ... % chroma variance
|
wolffd@0
|
29 'norm_chromas', 0, ... % not implemented, chromas already rel.
|
wolffd@0
|
30 'min_kshift_chromas', 0.1, ... % treshold for key shift. set to 1 for no shift (0-1)
|
wolffd@0
|
31 ...
|
wolffd@0
|
32 'ntimbres', 4, ...
|
wolffd@0
|
33 'timbre_var', 0, ... % timbre variance
|
wolffd@0
|
34 'norm_timbres', 1, ...
|
wolffd@0
|
35 'clip_timbres', 0.85, ... % percentile of data which has to be inside 0-1 bounds
|
wolffd@0
|
36 ...
|
wolffd@0
|
37 'norm_weights',0, ... % globally norm weights for chroma times?
|
wolffd@0
|
38 'norm_interval',1, ...
|
wolffd@0
|
39 'max_iter',100, ... % max iterations for chroma and timbre knn
|
wolffd@0
|
40 ...
|
wolffd@0
|
41 'nrhythms', 0, ...
|
wolffd@0
|
42 'nints', 11, ...
|
wolffd@0
|
43 'energy_sr', 1000, ... % sample rate for energy curve
|
wolffd@0
|
44 'norm_acorr', 1 ... % normalise arcorr locally-> shape imp... energy is normalised anyways
|
wolffd@0
|
45 );
|
wolffd@0
|
46 end
|
wolffd@0
|
47
|
wolffd@0
|
48 % ---
|
wolffd@0
|
49 % member functions
|
wolffd@0
|
50 % ---
|
wolffd@0
|
51 methods
|
wolffd@0
|
52
|
wolffd@0
|
53 % ---
|
wolffd@0
|
54 % constructor: pointer to feature in database
|
wolffd@0
|
55 % ---
|
wolffd@0
|
56 function feature = MTTAudioFeatureBasicSm(varargin)
|
wolffd@0
|
57
|
wolffd@0
|
58 feature = feature@MTTAudioFeature(varargin{:});
|
wolffd@0
|
59
|
wolffd@0
|
60 end
|
wolffd@0
|
61 % ---
|
wolffd@0
|
62 % extract feature data from raw audio features
|
wolffd@0
|
63 % ---
|
wolffd@0
|
64 function data = extract(feature, clip)
|
wolffd@0
|
65 % ---
|
wolffd@0
|
66 % get Basic Summary audio features. this includes possible
|
wolffd@0
|
67 % local normalisations
|
wolffd@0
|
68 % ---
|
wolffd@0
|
69
|
wolffd@0
|
70 global globalvars;
|
wolffd@0
|
71
|
wolffd@0
|
72 % ---
|
wolffd@0
|
73 % get casimir child clip if available
|
wolffd@0
|
74 % ---
|
wolffd@0
|
75 if isa(clip, 'CASIMIRClip')
|
wolffd@0
|
76 baseclip = clip.child_clip();
|
wolffd@0
|
77 else
|
wolffd@0
|
78 baseclip = clip;
|
wolffd@0
|
79 end
|
wolffd@0
|
80 if isa(baseclip, 'MTTClip')
|
wolffd@0
|
81 rawf = baseclip.audio_features_raw();
|
wolffd@0
|
82 elseif isa(baseclip, 'MSDClip')
|
wolffd@0
|
83 rawf = baseclip.features('MSDAudioFeatureRAW');
|
wolffd@0
|
84 end
|
wolffd@0
|
85
|
wolffd@0
|
86 % ---
|
wolffd@0
|
87 % now extract the features
|
wolffd@0
|
88 % first step: chroma clustering
|
wolffd@0
|
89 % ---
|
wolffd@0
|
90 weights = [rawf.data.segments_duration];
|
wolffd@0
|
91
|
wolffd@0
|
92 % normalise weights
|
wolffd@0
|
93 weights = weights / rawf.data.duration;
|
wolffd@0
|
94
|
wolffd@0
|
95 chroma = [rawf.data.segments_pitches]';
|
wolffd@0
|
96
|
wolffd@0
|
97 % ---
|
wolffd@0
|
98 % get most present chroma vectors.
|
wolffd@0
|
99 % the weighted k-means should return the four most prominent
|
wolffd@0
|
100 % chroma vectors and their weight
|
wolffd@0
|
101 % ---
|
wolffd@0
|
102 % display error values
|
wolffd@0
|
103
|
wolffd@0
|
104 op = foptions();
|
wolffd@0
|
105 op(1) = 0;
|
wolffd@0
|
106 op(14) = feature.my_params.max_iter;
|
wolffd@0
|
107
|
wolffd@0
|
108 % check for trivial case
|
wolffd@0
|
109 if feature.my_params.nchromas == 0
|
wolffd@0
|
110
|
wolffd@0
|
111 chromas = [];
|
wolffd@0
|
112 cwght = [];
|
wolffd@0
|
113
|
wolffd@0
|
114 elseif feature.my_params.nchromas == 1
|
wolffd@0
|
115
|
wolffd@0
|
116 chromas = mean(chroma, 1);
|
wolffd@0
|
117 chroma_var = var(chroma, 0, 1);
|
wolffd@0
|
118 cwght = 1;
|
wolffd@0
|
119
|
wolffd@0
|
120 elseif numel(weights) > feature.my_params.nchromas
|
wolffd@0
|
121
|
wolffd@0
|
122 % ---
|
wolffd@0
|
123 % there may be few chromas, try kmeans several (20) times
|
wolffd@0
|
124 % ---
|
wolffd@0
|
125 cont = 0;
|
wolffd@0
|
126 cwght = [];
|
wolffd@0
|
127 while (numel(cwght) ~= feature.my_params.nchromas) && (cont < 20);
|
wolffd@0
|
128
|
wolffd@0
|
129 [chromas, cwght, post] = ...
|
wolffd@0
|
130 weighted_kmeans(feature.my_params.nchromas, chroma, weights, op);
|
wolffd@0
|
131
|
wolffd@0
|
132 cont = cont + 1;
|
wolffd@0
|
133 end
|
wolffd@0
|
134
|
wolffd@0
|
135 if (numel(cwght) ~= feature.my_params.nchromas)
|
wolffd@0
|
136
|
wolffd@0
|
137 error('cannot find enough chroma centres');
|
wolffd@0
|
138 end
|
wolffd@0
|
139
|
wolffd@0
|
140 % ---
|
wolffd@0
|
141 % Calculate the weighted variance of the chroma clusters
|
wolffd@0
|
142 % ---
|
wolffd@0
|
143 if feature.my_params.chroma_var >= 1
|
wolffd@0
|
144
|
wolffd@0
|
145 chroma_var = zeros(size(chromas));
|
wolffd@0
|
146 for i = 1:size(chroma_var,1)
|
wolffd@0
|
147
|
wolffd@0
|
148 % get distance from cluster centroid
|
wolffd@0
|
149 tmp_var = (chroma(post(:,i),:) - repmat(chromas(i,:), sum(post(:,i)),1)).^2;
|
wolffd@0
|
150
|
wolffd@0
|
151 % add up the weighted differences and normalise by sum
|
wolffd@0
|
152 % of weights
|
wolffd@0
|
153 chroma_var(i,:) = (weights(post(:,i)) * tmp_var) ./...
|
wolffd@0
|
154 (sum(weights(post(:,i))));
|
wolffd@0
|
155 end
|
wolffd@0
|
156 end
|
wolffd@0
|
157 else
|
wolffd@0
|
158 % ---
|
wolffd@0
|
159 % odd case: less than nchroma data points.
|
wolffd@0
|
160 % we repeat the mean vector at the end
|
wolffd@0
|
161 % ---
|
wolffd@0
|
162 chromas = [chroma; repmat(mean(chroma, 1),...
|
wolffd@0
|
163 feature.my_params.nchromas - numel(weights), 1 )];
|
wolffd@0
|
164
|
wolffd@0
|
165 cwght = weights;
|
wolffd@0
|
166 cwght( end + 1:feature.my_params.nchromas ) = 0;
|
wolffd@0
|
167
|
wolffd@0
|
168 % ---
|
wolffd@0
|
169 % TODO: get a variance for odd case :
|
wolffd@0
|
170 % replicate the complete data variance?
|
wolffd@0
|
171 % NO: every vector is a clsuter => zero variance
|
wolffd@0
|
172 % ---
|
wolffd@0
|
173 end
|
wolffd@0
|
174
|
wolffd@0
|
175 % trivial case: no variance requested
|
wolffd@0
|
176 if ~exist('chroma_var','var')
|
wolffd@0
|
177 chroma_var = zeros(size(chromas));
|
wolffd@0
|
178 end
|
wolffd@0
|
179
|
wolffd@0
|
180 % sort by associated time
|
wolffd@0
|
181 [cwght, idx] = sort(cwght, 'descend');
|
wolffd@0
|
182 chromas = chromas(idx,:);
|
wolffd@0
|
183 chroma_var = chroma_var(idx,:);
|
wolffd@0
|
184
|
wolffd@0
|
185 % ---
|
wolffd@0
|
186 % shift according to detected key, but only if
|
wolffd@0
|
187 % the confidencee is high enough
|
wolffd@0
|
188 % ---
|
wolffd@0
|
189 shift = 0;
|
wolffd@0
|
190 if rawf.data.keyConfidence > feature.my_params.min_kshift_chromas;
|
wolffd@0
|
191
|
wolffd@0
|
192 shift = -rawf.data.key;
|
wolffd@0
|
193 chromas = circshift(chromas, [0 shift]);
|
wolffd@0
|
194 chroma_var = circshift(chroma_var, [0 shift]);
|
wolffd@0
|
195 end
|
wolffd@0
|
196
|
wolffd@0
|
197 % ---
|
wolffd@0
|
198 % get mfcc centres:
|
wolffd@0
|
199 % the same for mfccs
|
wolffd@0
|
200 % ---
|
wolffd@0
|
201 mfcc = [rawf.data.segments_timbre]';
|
wolffd@0
|
202 if feature.my_params.ntimbres == 0
|
wolffd@0
|
203
|
wolffd@0
|
204 mfccs = [];
|
wolffd@0
|
205 mwght = [];
|
wolffd@0
|
206
|
wolffd@0
|
207 elseif feature.my_params.ntimbres == 1
|
wolffd@0
|
208
|
wolffd@0
|
209 mfccs = mean(mfcc, 1);
|
wolffd@0
|
210 timbre_var = var(mfccs, 0, 1);
|
wolffd@0
|
211 mwght = 1;
|
wolffd@0
|
212
|
wolffd@0
|
213 elseif numel(weights) > feature.my_params.ntimbres
|
wolffd@0
|
214
|
wolffd@0
|
215 % ---
|
wolffd@0
|
216 % there may be few mfccs, try kmeans several times
|
wolffd@0
|
217 % ---
|
wolffd@0
|
218 cont = 0;
|
wolffd@0
|
219 mwght = [];
|
wolffd@0
|
220 while (numel(mwght) ~= feature.my_params.ntimbres) && (cont < 20);
|
wolffd@0
|
221
|
wolffd@0
|
222 [mfccs, mwght, post] = ...
|
wolffd@0
|
223 weighted_kmeans(feature.my_params.ntimbres, mfcc, weights, op);
|
wolffd@0
|
224 cont = cont + 1;
|
wolffd@0
|
225 end
|
wolffd@0
|
226
|
wolffd@0
|
227 if (numel(mwght) ~= feature.my_params.ntimbres)
|
wolffd@0
|
228
|
wolffd@0
|
229 error('cannot find enough mfcc centres');
|
wolffd@0
|
230 end
|
wolffd@0
|
231
|
wolffd@0
|
232 % ---
|
wolffd@0
|
233 % Calculate the weighted variance of the chroma clusters
|
wolffd@0
|
234 % ---
|
wolffd@0
|
235 if feature.my_params.timbre_var >= 1
|
wolffd@0
|
236
|
wolffd@0
|
237 timbre_var = zeros(size(mfccs));
|
wolffd@0
|
238 for i = 1:size(timbre_var,1)
|
wolffd@0
|
239
|
wolffd@0
|
240 % get distance from cluster centroid
|
wolffd@0
|
241 tmp_var = (mfcc(post(:,i),:) - repmat(mfccs(i,:), sum(post(:,i)),1)).^2;
|
wolffd@0
|
242
|
wolffd@0
|
243 % add up the weighted differences and normalise by sum
|
wolffd@0
|
244 % of weights
|
wolffd@0
|
245 timbre_var(i,:) = (weights(post(:,i)) * tmp_var) ./...
|
wolffd@0
|
246 (sum(weights(post(:,i))));
|
wolffd@0
|
247 end
|
wolffd@0
|
248 end
|
wolffd@0
|
249
|
wolffd@0
|
250 else
|
wolffd@0
|
251 % ---
|
wolffd@0
|
252 % odd case: less than nchroma data points.
|
wolffd@0
|
253 % we repeat the mean vector at the end
|
wolffd@0
|
254 % ---
|
wolffd@0
|
255 mfccs = [mfcc; repmat(mean(mfcc, 1),...
|
wolffd@0
|
256 feature.my_params.ntimbres - numel(weights), 1)];
|
wolffd@0
|
257 mwght = weights;
|
wolffd@0
|
258 mwght( end + 1:feature.my_params.ntimbres) = 0;
|
wolffd@0
|
259 end
|
wolffd@0
|
260
|
wolffd@0
|
261 % trivial case: no variance requested
|
wolffd@0
|
262 if ~exist('timbre_var','var')
|
wolffd@0
|
263 timbre_var = zeros(size(mfccs));
|
wolffd@0
|
264 end
|
wolffd@0
|
265
|
wolffd@0
|
266 % sort by associated time
|
wolffd@0
|
267 [mwght, idx] = sort(mwght, 'descend');
|
wolffd@0
|
268 mfccs = mfccs(idx,:);
|
wolffd@0
|
269 timbre_var = timbre_var(idx,:);
|
wolffd@0
|
270
|
wolffd@0
|
271 % ---
|
wolffd@0
|
272 % get beat features:
|
wolffd@0
|
273 % the autocorrelation curve over n quarters of length
|
wolffd@0
|
274 %
|
wolffd@0
|
275 % alternative: how about using the n=8 quarters relative
|
wolffd@0
|
276 % volumes from the start of a sure measure?
|
wolffd@0
|
277 % ---
|
wolffd@0
|
278 if feature.my_params.nrhythms >= 1
|
wolffd@0
|
279 bars = rawf.data.bars;
|
wolffd@0
|
280 beats = rawf.data.beats;
|
wolffd@0
|
281 tatums = rawf.data.tatums;
|
wolffd@0
|
282 % ---
|
wolffd@0
|
283 % NOTE: the beat and tatum markers seem to have an offset :(
|
wolffd@0
|
284 % ---
|
wolffd@0
|
285 offset = 0.118; %seconds
|
wolffd@0
|
286
|
wolffd@0
|
287 [envelope, time] = energy_envelope(feature, clip);
|
wolffd@0
|
288
|
wolffd@0
|
289 % we offset the energy curve
|
wolffd@0
|
290 time = time + offset;
|
wolffd@0
|
291
|
wolffd@0
|
292 % ---
|
wolffd@0
|
293 % we try to start at the best beat confidence more
|
wolffd@0
|
294 % than sixteen eights from the end
|
wolffd@0
|
295 % ---
|
wolffd@0
|
296
|
wolffd@0
|
297 if rawf.data.tempo > 0
|
wolffd@0
|
298
|
wolffd@0
|
299 eightl = 30 / rawf.data.tempo;
|
wolffd@0
|
300 else
|
wolffd@0
|
301 % ---
|
wolffd@0
|
302 % odd case: no rhythm data. assume 100 bpm
|
wolffd@0
|
303 % ---
|
wolffd@0
|
304
|
wolffd@0
|
305 eightl = 0.3;
|
wolffd@0
|
306 end
|
wolffd@0
|
307
|
wolffd@0
|
308 if isempty(beats)
|
wolffd@0
|
309 % ---
|
wolffd@0
|
310 % odd case: no beats detected. -> use best tatum
|
wolffd@0
|
311 % ---
|
wolffd@0
|
312 if ~isempty(tatums)
|
wolffd@0
|
313
|
wolffd@0
|
314 beats = tatums;
|
wolffd@0
|
315 else
|
wolffd@0
|
316
|
wolffd@0
|
317 % ok, just take the beginning
|
wolffd@0
|
318 beats = [0; 1];
|
wolffd@0
|
319 end
|
wolffd@0
|
320 end
|
wolffd@0
|
321
|
wolffd@0
|
322 last_valid = find(beats(1,:) < ...
|
wolffd@0
|
323 (rawf.data.duration - feature.my_params.nints * eightl),1, 'last');
|
wolffd@0
|
324
|
wolffd@0
|
325 % find the best valid beat postition
|
wolffd@0
|
326 [null, max_measure] = max( beats(2, 1:last_valid));
|
wolffd@0
|
327 max_mtime = beats(1,max_measure);
|
wolffd@0
|
328
|
wolffd@0
|
329 % ---
|
wolffd@0
|
330 % the correlation is calculated for the estimated eights lenght
|
wolffd@0
|
331 % and for the 16th intervals, respectively.
|
wolffd@0
|
332 % ---
|
wolffd@0
|
333
|
wolffd@0
|
334 % calculate the EIGHTS correlation for the following segment
|
wolffd@0
|
335 [acorr8, eight_en, eightt] = ...
|
wolffd@0
|
336 beat_histogram(feature, max_mtime, eightl, envelope, time);
|
wolffd@0
|
337
|
wolffd@0
|
338 % calculate the SIXTEENTHS correlation for the following segment
|
wolffd@0
|
339 [acorr16, six_en, sixt] = ...
|
wolffd@0
|
340 beat_histogram(feature, max_mtime, eightl / 2, envelope, time);
|
wolffd@0
|
341
|
wolffd@0
|
342 % ---
|
wolffd@0
|
343 % save the various features
|
wolffd@0
|
344 % ---
|
wolffd@0
|
345 % save rythm feature data
|
wolffd@0
|
346
|
wolffd@0
|
347 data.rhythm.acorr8 = acorr8;
|
wolffd@0
|
348 data.rhythm.acorr8_lag = eightt(1:end/2)-eightt(1);
|
wolffd@0
|
349
|
wolffd@0
|
350 data.rhythm.energy8 = eight_en(1:end/2);
|
wolffd@0
|
351 data.rhythm.energy8_time = eightt(1:end/2);
|
wolffd@0
|
352
|
wolffd@0
|
353 % --
|
wolffd@0
|
354 % the interval is normed locally up to a max value
|
wolffd@0
|
355 % associated to 30bpm
|
wolffd@0
|
356 % ---
|
wolffd@0
|
357 if feature.my_params.norm_interval
|
wolffd@0
|
358
|
wolffd@0
|
359 % 1 second max value
|
wolffd@0
|
360 data.rhythm.interval8 = eightl / 2;
|
wolffd@0
|
361 else
|
wolffd@0
|
362 data.rhythm.interval8 = eightl / 2;
|
wolffd@0
|
363 end
|
wolffd@0
|
364
|
wolffd@0
|
365 if feature.my_params.nrhythms >= 2
|
wolffd@0
|
366
|
wolffd@0
|
367 data.rhythm.acorr16 = acorr16;
|
wolffd@0
|
368 data.rhythm.acorr16_lag = data.rhythm.acorr8_lag / 2;
|
wolffd@0
|
369
|
wolffd@0
|
370 data.rhythm.energy16 = six_en(1:end/2);
|
wolffd@0
|
371 data.rhythm.energy16_time = sixt(1:end/2);
|
wolffd@0
|
372
|
wolffd@0
|
373
|
wolffd@0
|
374 % save beat interval / tempo
|
wolffd@0
|
375 if feature.my_params.norm_interval
|
wolffd@0
|
376
|
wolffd@0
|
377 % 1 second max value
|
wolffd@0
|
378 data.rhythm.interval16 = eightl / 2;
|
wolffd@0
|
379 else
|
wolffd@0
|
380 data.rhythm.interval16 = eightl / 2;
|
wolffd@0
|
381 end
|
wolffd@0
|
382
|
wolffd@0
|
383 end
|
wolffd@0
|
384 else
|
wolffd@0
|
385
|
wolffd@0
|
386 % % save empty rythm struct
|
wolffd@0
|
387 % data.rhythm = struct([]);
|
wolffd@0
|
388 end
|
wolffd@0
|
389
|
wolffd@0
|
390 % chroma feature data
|
wolffd@0
|
391 for i = 1:size(chromas,1)
|
wolffd@0
|
392 data.chroma(i).means = chromas(i,:)';
|
wolffd@0
|
393 data.chroma(i).means_weight = cwght(i);
|
wolffd@0
|
394 data.chroma(i).vars = chroma_var(i,:)';
|
wolffd@0
|
395 data.chroma(i).shift = shift;
|
wolffd@0
|
396 end
|
wolffd@0
|
397
|
wolffd@0
|
398 % mfcc feature data
|
wolffd@0
|
399 for i = 1:size(mfccs,1)
|
wolffd@0
|
400 data.timbre(i).means = mfccs(i,:)';
|
wolffd@0
|
401 data.timbre(i).means_weight = mwght(i);
|
wolffd@0
|
402 data.timbre(i).vars = timbre_var(i,:)';
|
wolffd@0
|
403 end
|
wolffd@0
|
404
|
wolffd@0
|
405 % prepare field for final features
|
wolffd@0
|
406 data.final.vector = [];
|
wolffd@0
|
407 data.final.vector_info = struct();
|
wolffd@0
|
408 data.final.dim = 0;
|
wolffd@0
|
409
|
wolffd@0
|
410 % save info data
|
wolffd@0
|
411 data.info.type = 'MTTAudioFeatureBasicSm';
|
wolffd@0
|
412 data.info.owner = clip;
|
wolffd@0
|
413 data.info.owner_id = clip.id;
|
wolffd@0
|
414 data.info.creatorrev = feature.my_revision;
|
wolffd@0
|
415
|
wolffd@0
|
416 % save parameters
|
wolffd@0
|
417 data.info.params = feature.my_params;
|
wolffd@0
|
418 end
|
wolffd@0
|
419
|
wolffd@0
|
420 function define_global_transform(features)
|
wolffd@0
|
421 % calculate and set normalization factors from the group of
|
wolffd@0
|
422 % input features. These features will be set for the full database
|
wolffd@0
|
423
|
wolffd@0
|
424 if numel(features) == 1
|
wolffd@0
|
425 error ('Insert feature array for this method');
|
wolffd@0
|
426 end
|
wolffd@0
|
427
|
wolffd@0
|
428 % ---
|
wolffd@0
|
429 % here, we only need to define the post-normalisation
|
wolffd@0
|
430 % ---
|
wolffd@0
|
431
|
wolffd@0
|
432 % ---
|
wolffd@0
|
433 % get chroma variance data NORMALISATION Factors
|
wolffd@0
|
434 % TODO: transport chroma variance to finalise step
|
wolffd@0
|
435 % ---
|
wolffd@0
|
436 if features(1).my_params.chroma_var >= 1
|
wolffd@0
|
437 allfeat = abs(cat(2, features(1).data.chroma(:).vars));
|
wolffd@0
|
438 for i = 2:numel(features)
|
wolffd@0
|
439
|
wolffd@0
|
440 allfeat = cat(2 , allfeat, abs(abs(cat(2, features(i).data.chroma(:).vars))));
|
wolffd@0
|
441 end
|
wolffd@0
|
442 [~, common.post_normf.chroma_var] = mapminmax(allfeat,0,1);
|
wolffd@0
|
443 end
|
wolffd@0
|
444
|
wolffd@0
|
445 % ---
|
wolffd@0
|
446 % get timbre variance data NORMALISATION Factors
|
wolffd@0
|
447 % TODO: transport chroma variance to finalise step
|
wolffd@0
|
448 % ---
|
wolffd@0
|
449 if features(1).my_params.timbre_var >= 1
|
wolffd@0
|
450 allfeat = abs(cat(2, features(1).data.timbre(:).vars));
|
wolffd@0
|
451 for i = 2:numel(features)
|
wolffd@0
|
452
|
wolffd@0
|
453 allfeat = cat(2 , allfeat, abs(abs(cat(2, features(i).data.timbre(:).vars))));
|
wolffd@0
|
454 end
|
wolffd@0
|
455 [~, common.post_normf.timbre_var] = mapminmax(allfeat,0,1);
|
wolffd@0
|
456 end
|
wolffd@0
|
457
|
wolffd@0
|
458 % ---
|
wolffd@0
|
459 % derive normalisation for timbre features:
|
wolffd@0
|
460 % MFCC's are actually special filter outputs
|
wolffd@0
|
461 % (see developer.echonest.com/docs/v4/_static/AnalyzeDocumentation_2.2.pdf
|
wolffd@0
|
462 % they are unbounded, so just the relative information will be
|
wolffd@0
|
463 % used here.
|
wolffd@0
|
464 % We normalise each bin independently
|
wolffd@0
|
465 % ---
|
wolffd@0
|
466 if features(1).my_params.ntimbres > 0
|
wolffd@0
|
467
|
wolffd@0
|
468 allfeat = abs(cat(2, features(1).data.timbre(:).means));
|
wolffd@0
|
469 for i = 2:numel(features)
|
wolffd@0
|
470
|
wolffd@0
|
471 allfeat = cat(2 , allfeat, abs(cat(2, features(i).data.timbre(:).means)));
|
wolffd@0
|
472 end
|
wolffd@0
|
473
|
wolffd@0
|
474 % ---
|
wolffd@0
|
475 % get normalisation factors
|
wolffd@0
|
476 % NOTE: the values will later be clipped to [0,1]
|
wolffd@0
|
477 % anyways
|
wolffd@0
|
478 % ---
|
wolffd@0
|
479 if (features(1).my_params.clip_timbres ~= 0 ) || ...
|
wolffd@0
|
480 (features(1).my_params.clip_timbres ~= 1 )
|
wolffd@0
|
481
|
wolffd@0
|
482 common.post_normf.timbre = 1 ./ prctile(allfeat, features(1).my_params.clip_timbres * 100, 2);
|
wolffd@0
|
483
|
wolffd@0
|
484 else
|
wolffd@0
|
485 % just use the maximum
|
wolffd@0
|
486 common.post_normf.timbre = 1/max(allfeat, 2);
|
wolffd@0
|
487 end
|
wolffd@0
|
488
|
wolffd@0
|
489 % set common feature values
|
wolffd@0
|
490 features(1).my_db.set_common(common);
|
wolffd@0
|
491
|
wolffd@0
|
492 else
|
wolffd@0
|
493
|
wolffd@0
|
494 features(1).my_db.set_common([1]);
|
wolffd@0
|
495 end
|
wolffd@0
|
496 end
|
wolffd@0
|
497
|
wolffd@0
|
498
|
wolffd@0
|
499 function finalise(feature)
|
wolffd@0
|
500 % applies a final transformation and
|
wolffd@0
|
501 % collects the information of this feature within a single vector
|
wolffd@0
|
502 % see info for types in specific dimensions
|
wolffd@0
|
503
|
wolffd@0
|
504 for i = 1:numel(feature)
|
wolffd@0
|
505
|
wolffd@0
|
506 % check for neccesary parameters
|
wolffd@0
|
507 if isempty(feature(i).my_db.commondb)
|
wolffd@0
|
508
|
wolffd@0
|
509 error('Define the global transformation first')
|
wolffd@0
|
510 return;
|
wolffd@0
|
511 end
|
wolffd@0
|
512
|
wolffd@0
|
513 if feature(1).my_params.ntimbres > 0
|
wolffd@0
|
514 % ---
|
wolffd@0
|
515 % normalise features
|
wolffd@0
|
516 % ---
|
wolffd@0
|
517 % norm timbre features if neccesary
|
wolffd@0
|
518 timbren = [];
|
wolffd@0
|
519 if feature(i).my_params.norm_timbres
|
wolffd@0
|
520 for j = 1:numel(feature(i).data.timbre)
|
wolffd@0
|
521
|
wolffd@0
|
522 timbren = cat(1, timbren, ...
|
wolffd@0
|
523 MTTAudioFeatureBasicSm.norm_timbre...
|
wolffd@0
|
524 (feature(i).data.timbre(j).means, feature(i).my_db.commondb.post_normf.timbre));
|
wolffd@0
|
525 end
|
wolffd@0
|
526 else
|
wolffd@0
|
527
|
wolffd@0
|
528 timbren = cat(1, timbren, feature(i).data.timbre(:).means);
|
wolffd@0
|
529 end
|
wolffd@0
|
530 end
|
wolffd@0
|
531
|
wolffd@0
|
532 % ---
|
wolffd@0
|
533 % construct resulting feature vector out of features
|
wolffd@0
|
534 % ---
|
wolffd@0
|
535 vec = [];
|
wolffd@0
|
536 info = {};
|
wolffd@0
|
537 if feature(i).my_params.nchromas > 0
|
wolffd@0
|
538
|
wolffd@0
|
539 info{numel(vec)+ 1} = 'chroma';
|
wolffd@0
|
540 vec = cat(1, vec, feature(i).data.chroma(:).means);
|
wolffd@0
|
541
|
wolffd@0
|
542 info{numel(vec)+ 1} = 'chroma weights';
|
wolffd@0
|
543 vec = cat(1, vec, [feature(i).data.chroma(:).means_weight]');
|
wolffd@0
|
544
|
wolffd@0
|
545 % ---
|
wolffd@0
|
546 % NORMALISE Chroma variance
|
wolffd@0
|
547 % ---
|
wolffd@0
|
548 if feature(i).my_params.chroma_var >= 1
|
wolffd@0
|
549
|
wolffd@0
|
550 info{numel(vec)+ 1} = 'chroma variance';
|
wolffd@0
|
551
|
wolffd@0
|
552 % normalise this pack of variance vectors
|
wolffd@0
|
553 tmp_var = mapminmax('apply', [feature(i).data.chroma(:).vars],...
|
wolffd@0
|
554 feature(i).common.post_normf.chroma_var);
|
wolffd@0
|
555
|
wolffd@0
|
556 % concatenate normalised data to vector
|
wolffd@0
|
557 for vari = 1:size(tmp_var,2)
|
wolffd@0
|
558
|
wolffd@0
|
559 vec = cat(1, vec, tmp_var(:, vari));
|
wolffd@0
|
560 end
|
wolffd@0
|
561 end
|
wolffd@0
|
562 end
|
wolffd@0
|
563
|
wolffd@0
|
564
|
wolffd@0
|
565 if feature(i).my_params.ntimbres > 0
|
wolffd@0
|
566
|
wolffd@0
|
567 info{numel(vec)+ 1} = 'timbre';
|
wolffd@0
|
568 vec = cat(1, vec, timbren);
|
wolffd@0
|
569
|
wolffd@0
|
570 info{numel(vec)+ 1} = 'timbre weights';
|
wolffd@0
|
571 vec = cat(1, vec, [feature(i).data.timbre(:).means_weight]');
|
wolffd@0
|
572
|
wolffd@0
|
573 % ---
|
wolffd@0
|
574 % NORMALISE timbre variance
|
wolffd@0
|
575 % ---
|
wolffd@0
|
576 if feature(i).my_params.timbre_var >= 1
|
wolffd@0
|
577
|
wolffd@0
|
578 info{numel(vec)+ 1} = 'timbre variance';
|
wolffd@0
|
579
|
wolffd@0
|
580 % normalise this pack of variance vectors
|
wolffd@0
|
581 tmp_var = mapminmax('apply', [feature(i).data.timbre(:).vars],...
|
wolffd@0
|
582 feature(i).common.post_normf.timbre_var);
|
wolffd@0
|
583
|
wolffd@0
|
584 % concatenate normalised data to vector
|
wolffd@0
|
585 for vari = 1:size(tmp_var,2)
|
wolffd@0
|
586
|
wolffd@0
|
587 vec = cat(1, vec, tmp_var(:, vari));
|
wolffd@0
|
588 end
|
wolffd@0
|
589 end
|
wolffd@0
|
590 end
|
wolffd@0
|
591
|
wolffd@0
|
592 if feature(i).my_params.nrhythms > 0
|
wolffd@0
|
593
|
wolffd@0
|
594 info{numel(vec)+ 1} = 'rhythm 8';
|
wolffd@0
|
595 vec = cat(1, vec, feature(i).data.rhythm.acorr8);
|
wolffd@0
|
596
|
wolffd@0
|
597 info{numel(vec)+ 1} = 'int 8';
|
wolffd@0
|
598 vec = cat(1, vec, feature(i).data.rhythm.interval8);
|
wolffd@0
|
599
|
wolffd@0
|
600 if feature(i).my_params.nrhythms >= 2
|
wolffd@0
|
601
|
wolffd@0
|
602 info{numel(vec)+ 1} = 'rhythm 16';
|
wolffd@0
|
603 vec = cat(1, vec, feature(i).data.rhythm.acorr16);
|
wolffd@0
|
604
|
wolffd@0
|
605 info{numel(vec)+ 1} = 'int 16';
|
wolffd@0
|
606 vec = cat(1, vec, feature(i).data.rhythm.interval16);
|
wolffd@0
|
607 end
|
wolffd@0
|
608 end
|
wolffd@0
|
609
|
wolffd@0
|
610 feature(i).data.final.vector = vec;
|
wolffd@0
|
611 feature(i).data.final.dim = numel(feature(i).data.final.vector);
|
wolffd@0
|
612
|
wolffd@0
|
613 % fill up info struct and append to feature
|
wolffd@0
|
614
|
wolffd@0
|
615 info(end+1: feature(i).data.final.dim) = ...
|
wolffd@0
|
616 cell(feature(i).data.final.dim - numel(info),1);
|
wolffd@0
|
617
|
wolffd@0
|
618 feature(i).data.final.vector_info.labels = info;
|
wolffd@0
|
619 end
|
wolffd@0
|
620
|
wolffd@0
|
621 % ---
|
wolffd@0
|
622 % TODO: Maybe delete more basic features again at this point?
|
wolffd@0
|
623 % ---
|
wolffd@0
|
624 end
|
wolffd@0
|
625
|
wolffd@0
|
626 % ---
|
wolffd@0
|
627 % destructor: do we really want to remove this
|
wolffd@0
|
628 % from the database? No, but
|
wolffd@0
|
629 % TODO: create marker for unused objects in db, and a cleanup
|
wolffd@0
|
630 % function
|
wolffd@0
|
631 % ---
|
wolffd@0
|
632 function delete(feature)
|
wolffd@0
|
633
|
wolffd@0
|
634 end
|
wolffd@0
|
635
|
wolffd@0
|
636
|
wolffd@0
|
637 function visualise(feature)
|
wolffd@0
|
638 % ---
|
wolffd@0
|
639 % plots the different data types collected in this feature
|
wolffd@0
|
640 % ---
|
wolffd@0
|
641 for i = 1:numel(feature)
|
wolffd@0
|
642 clip = feature(i).data.info.owner;
|
wolffd@0
|
643
|
wolffd@0
|
644 % display raw features
|
wolffd@0
|
645 if isa(clip, 'CASIMIRClip')
|
wolffd@0
|
646 baseclip = clip.child_clip();
|
wolffd@0
|
647 else
|
wolffd@0
|
648 baseclip = clip;
|
wolffd@0
|
649 end
|
wolffd@0
|
650 if isa(baseclip, 'MTTClip')
|
wolffd@0
|
651 rawf = baseclip.audio_features_raw();
|
wolffd@0
|
652 elseif isa(baseclip, 'MSDClip')
|
wolffd@0
|
653 rawf = baseclip.features('MSDAudioFeatureRAW');
|
wolffd@0
|
654 end
|
wolffd@0
|
655
|
wolffd@0
|
656 % ---
|
wolffd@0
|
657 % @todo: implement MSD feature visualisation
|
wolffd@0
|
658 % ---
|
wolffd@0
|
659 [a1, a2, a3] = rawf.visualise();
|
wolffd@0
|
660
|
wolffd@0
|
661 % ---
|
wolffd@0
|
662 % Display chroma features
|
wolffd@0
|
663 % ---
|
wolffd@0
|
664 if isfield(feature(i).data, 'chroma')
|
wolffd@0
|
665
|
wolffd@0
|
666 chroma_labels = {'c', 'c#', 'd','d#', 'e', 'f','f#', 'g','g#', 'a', 'a#', 'h'};
|
wolffd@0
|
667 mode_labels = {'minor', 'major'};
|
wolffd@0
|
668
|
wolffd@0
|
669 % change labels to reflect detected mode
|
wolffd@0
|
670 chroma_labels{rawf.data.key + 1} = ...
|
wolffd@0
|
671 sprintf('(%s) %s',mode_labels{rawf.data.mode + 1}, chroma_labels{rawf.data.key + 1});
|
wolffd@0
|
672
|
wolffd@0
|
673 % transpose labels and data
|
wolffd@0
|
674 chroma_labels = circshift(chroma_labels, [0, feature(i).data.chroma(1).shift]);
|
wolffd@0
|
675 chromar = circshift([rawf.data.segments_pitches], [feature(i).data.chroma(1).shift, 0]);
|
wolffd@0
|
676
|
wolffd@0
|
677 % image transposed chromas again
|
wolffd@0
|
678 segments = [rawf.data.segments_start];
|
wolffd@0
|
679 segments(end) = rawf.data.duration;
|
wolffd@0
|
680
|
wolffd@0
|
681 hold(a1);
|
wolffd@0
|
682 uimagesc(segments, 0:11, chromar, 'Parent', a1);
|
wolffd@0
|
683 set(a1,'YTick',[0:11], 'YTickLabel', chroma_labels);
|
wolffd@0
|
684
|
wolffd@0
|
685 % enlarge plot and plot new data after the old ones
|
wolffd@0
|
686 ax = axis(a1);
|
wolffd@0
|
687 ax(2) = ax(2) + 2*feature(i).my_params.nchromas + 0.5;
|
wolffd@0
|
688 axis(a1, 'xy');
|
wolffd@0
|
689 axis(a1, ax);
|
wolffd@0
|
690
|
wolffd@0
|
691 imagesc(rawf.data.duration + (1:feature(i).my_params.nchromas), (-1:11), ...
|
wolffd@0
|
692 [ feature(i).data.chroma(:).means_weight; feature(i).data.chroma(:).means],...
|
wolffd@0
|
693 'Parent', a1);
|
wolffd@0
|
694 % variance calculated?
|
wolffd@0
|
695 if isfield(feature(i).data.chroma, 'vars')
|
wolffd@0
|
696
|
wolffd@0
|
697 imagesc(rawf.data.duration + feature(i).my_params.nchromas + (1:feature(i).my_params.nchromas), (-1:11), ...
|
wolffd@0
|
698 [feature(i).data.chroma(:).vars],...
|
wolffd@0
|
699 'Parent', a1);
|
wolffd@0
|
700 end
|
wolffd@0
|
701 end
|
wolffd@0
|
702
|
wolffd@0
|
703 % ---
|
wolffd@0
|
704 % Display timbre features
|
wolffd@0
|
705 % ---
|
wolffd@0
|
706 if isfield(feature(i).data, 'timbre')
|
wolffd@0
|
707
|
wolffd@0
|
708 % enlarge plot and plot new data after the old ones
|
wolffd@0
|
709 hold(a2);
|
wolffd@0
|
710 ax = axis(a2);
|
wolffd@0
|
711 ax(2) = ax(2) + 2*feature(i).my_params.ntimbres + 0.5;
|
wolffd@0
|
712
|
wolffd@0
|
713 axis(a2, ax);
|
wolffd@0
|
714 imagesc(rawf.data.duration + (1:feature(i).my_params.ntimbres), (-1:11), ...
|
wolffd@0
|
715 [ feature(i).data.timbre(:).means_weight; feature(i).data.timbre(:).means],...
|
wolffd@0
|
716 'Parent', a2);
|
wolffd@0
|
717 if isfield(feature(i).data.timbre, 'vars')
|
wolffd@0
|
718
|
wolffd@0
|
719 imagesc(rawf.data.duration + feature(i).my_params.ntimbres + (1:feature(i).my_params.ntimbres), (-1:11), ...
|
wolffd@0
|
720 [feature(i).data.timbre(:).vars],...
|
wolffd@0
|
721 'Parent', a1);
|
wolffd@0
|
722 end
|
wolffd@0
|
723 end
|
wolffd@0
|
724
|
wolffd@0
|
725 % ---
|
wolffd@0
|
726 % Display rhythm features
|
wolffd@0
|
727 % ---
|
wolffd@0
|
728 if isfield(feature(i).data, 'rhythm')
|
wolffd@0
|
729 % data.rhythm.interval
|
wolffd@0
|
730 % get timecode
|
wolffd@0
|
731 eightt = feature(i).data.rhythm.energy8_time;
|
wolffd@0
|
732 sixt = feature(i).data.rhythm.energy16_time;
|
wolffd@0
|
733
|
wolffd@0
|
734 hold(a3);
|
wolffd@0
|
735 % plot sixteens acorr and energy
|
wolffd@0
|
736 plot(sixt, feature(i).data.rhythm.energy16, 'bx')
|
wolffd@0
|
737
|
wolffd@0
|
738 plot(sixt, feature(i).data.rhythm.acorr16, 'b')
|
wolffd@0
|
739
|
wolffd@0
|
740 % plot eights acorr and energy
|
wolffd@0
|
741 plot(eightt, feature(i).data.rhythm.energy8, 'rx')
|
wolffd@0
|
742
|
wolffd@0
|
743 plot(eightt, feature(i).data.rhythm.acorr8, 'r')
|
wolffd@0
|
744
|
wolffd@0
|
745 % broaden view by fixed 4 seconds
|
wolffd@0
|
746 ax = axis(a3);
|
wolffd@0
|
747 axis(a3, [max(0, eightt(1)-( eightt(end) - eightt(1) + 4 )) ...
|
wolffd@0
|
748 min(rawf.data.duration, eightt(end) +4) ...
|
wolffd@0
|
749 ax(3:4)]);
|
wolffd@0
|
750 end
|
wolffd@0
|
751 end
|
wolffd@0
|
752 end
|
wolffd@0
|
753 end
|
wolffd@0
|
754
|
wolffd@0
|
755
|
wolffd@0
|
756 methods (Hidden = true)
|
wolffd@0
|
757
|
wolffd@0
|
758 function [env, time] = energy_envelope(feature, clip)
|
wolffd@0
|
759 % extracts the envelope of energy for the given clip
|
wolffd@0
|
760
|
wolffd@0
|
761 % ---
|
wolffd@0
|
762 % TODO: externalise envelope etc in external audio features
|
wolffd@0
|
763 % ---
|
wolffd@0
|
764
|
wolffd@0
|
765 [null, src] = evalc('miraudio(clip.mp3file_full())');
|
wolffd@0
|
766 [null, env] = evalc('mirenvelope(src, ''Sampling'', feature.my_params.energy_sr)');
|
wolffd@0
|
767
|
wolffd@0
|
768 time = get(env,'Time');
|
wolffd@0
|
769 time = time{1}{1};
|
wolffd@0
|
770 env = mirgetdata(env);
|
wolffd@0
|
771 end
|
wolffd@0
|
772
|
wolffd@0
|
773 function [acorr, base_sig, base_t] = beat_histogram(feature, startt, interval, signal, signal_t)
|
wolffd@0
|
774 % acorr = beat_histogram(feature, startt, interval, signal, time)
|
wolffd@0
|
775 %
|
wolffd@0
|
776 % compute correlation for beats of specified length in energy curve
|
wolffd@0
|
777
|
wolffd@0
|
778 % get corresponding energy values
|
wolffd@0
|
779 dt = signal_t(2) - signal_t(1);
|
wolffd@0
|
780 base_t = startt:interval:(startt + (feature.my_params.nints*2-1) * interval);
|
wolffd@0
|
781 base_sig = signal( min( numel(signal), max(1,round((base_t - signal_t(1))/dt))));
|
wolffd@0
|
782
|
wolffd@0
|
783 % normalise energy
|
wolffd@0
|
784 acbase_sig = base_sig./max(base_sig);
|
wolffd@0
|
785
|
wolffd@0
|
786 % calculate their cyclic autocorrelation
|
wolffd@0
|
787 acorr = circshift(xcorr(acbase_sig,acbase_sig(1:end/2)),...
|
wolffd@0
|
788 [numel(acbase_sig) 0]);
|
wolffd@0
|
789
|
wolffd@0
|
790 % cut acorr to relevant points, normalise and square
|
wolffd@0
|
791 acorr = (acorr(1:feature.my_params.nints)./feature.my_params.nints).^2;
|
wolffd@0
|
792
|
wolffd@0
|
793 % ---
|
wolffd@0
|
794 % NOTE: we normalise the autocorrelation locally, to compare the
|
wolffd@0
|
795 % (rhythmic) shape
|
wolffd@0
|
796 % ---
|
wolffd@0
|
797 if feature.my_params.norm_acorr;
|
wolffd@0
|
798
|
wolffd@0
|
799 acorr = acorr - min(acorr);
|
wolffd@0
|
800 acorr = acorr/max(acorr);
|
wolffd@0
|
801 end
|
wolffd@0
|
802 end
|
wolffd@0
|
803 end
|
wolffd@0
|
804
|
wolffd@0
|
805 methods(Static)
|
wolffd@0
|
806
|
wolffd@0
|
807 function timbre = norm_timbre(in, normfs)
|
wolffd@0
|
808 % returns normed timbre data
|
wolffd@0
|
809
|
wolffd@0
|
810 % ---
|
wolffd@0
|
811 % individually scale the data using
|
wolffd@0
|
812 % the dimensions factors
|
wolffd@0
|
813 % ---
|
wolffd@0
|
814 timbre = zeros(size(in));
|
wolffd@0
|
815 for i = 1:size(in,2)
|
wolffd@0
|
816
|
wolffd@0
|
817 timbre(:,i) = normfs .* in(:,i);
|
wolffd@0
|
818 end
|
wolffd@0
|
819
|
wolffd@0
|
820 % shift to positive values
|
wolffd@0
|
821 timbre = (1 + timbre) /2;
|
wolffd@0
|
822
|
wolffd@0
|
823 % clip features to [0,1]
|
wolffd@0
|
824 timbre = min(1, max(timbre, 0));
|
wolffd@0
|
825 end
|
wolffd@0
|
826
|
wolffd@0
|
827 % ---
|
wolffd@0
|
828 % returns parameter md5 hash for comparison
|
wolffd@0
|
829 % ---
|
wolffd@0
|
830 end
|
wolffd@0
|
831
|
wolffd@0
|
832 end |