comparison core/magnatagatune/MTTAudioFeatureSlaney08.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 classdef MTTAudioFeatureSlaney08 < MTTAudioFeature & handle
2 % ---
3 % This Class contains
4 % a basic summary of MTT features complementary to those in
5 % MTTAudioFeatureBasicSm, features are extracted
6 % as described in Slaney 08 - LEARNING A METRIC FOR MUSIC SIMILARITY
7 %
8 % The usual workflow for these features constist of three steps
9 % 1. extract: extracts the basic single-file dependent features
10 % 2. define_global_transform: calculates the global feature
11 % transformation parameters
12 % 3. finalise: applies the common transformations to a specific feature
13 % ---
14
15 properties(Constant = true)
16
17 % svn hook
18 my_revision = str2double(substr('$Rev$', 5, -1));
19 end
20
21 properties
22 % ---
23 % Set default parameters
24 % ---
25 my_params = struct(...
26 'norm_mttstats', 1, ... %
27 'whiten_mttstats', 0, ... % NOTE: whitening as in slaney??
28 'select_mttstats', 1 ...% TODO: way to select certain features
29 );
30 end
31
32 % ---
33 % member functions
34 % ---
35 methods
36
37 % ---
38 % constructor: pointer to feature in database
39 % ---
40 function feature = MTTAudioFeatureSlaney08(varargin)
41
42 feature = feature@MTTAudioFeature(varargin{:});
43
44 end
45 % ---
46 % extract feature data from raw audio features
47 % ---
48 function data = extract(feature, clip)
49 % ---
50 % get features. this includes possible
51 % local normalisations
52 % ---
53
54 global globalvars;
55
56 % ---
57 % get casimir child clip if available
58 % ---
59 if isa(clip, 'CASIMIRClip')
60 baseclip = clip.child_clip();
61 else
62 baseclip = clip;
63 end
64 if isa(baseclip, 'MTTClip')
65 rawf = baseclip.audio_features_raw();
66 elseif isa(baseclip, 'MSDClip')
67 rawf = baseclip.features('MSDAudioFeatureRAW');
68 end
69
70
71 % ---
72 % TODO: implement time_weighted version of the statistical
73 % evaluations below
74 % ---
75
76 % segmentDurationMean: mean segment duration (sec.).
77 data.mttstats.segmentDurationMean = mean(rawf.data.segments_duration);
78
79 % segmentDurationVariance: variance of the segment duration
80 data.mttstats.segmentDurationVariance = var(rawf.data.segments_duration);
81
82 % timeLoudnessMaxMean: mean time to the segment maximum, or attack duration (sec.).
83 data.mttstats.timeLoudnessMaxMean = mean(rawf.data.segments_loudness_max_time);
84
85 % loudnessMaxMean: mean of segments’ maximum loudness(dB).
86 data.mttstats.loudnessMaxMean = mean(rawf.data.segments_loudness_max);
87
88 % loudnessMaxVariance: variance of the segments’ maximum loudness (dB).
89 data.mttstats.loudnessMaxVariance = var(rawf.data.segments_loudness_max);
90
91 % loudnessBeginMean: average loudness at the start of segments (dB)
92 data.mttstats.loudnessBeginMean = mean(rawf.data.segments_loudness);
93
94 % loudnessBeginVariance: variance of the loudness at the startof segments (dB2). Correlated with loudnessMaxVariance
95 data.mttstats.loudnessBeginVariance = var(rawf.data.segments_loudness);
96
97 % loudnessDynamicsMean: average of overall dynamic rangein the segments (dB).
98 % loudnessDynamicsVariance: segment dynamic range variance
99 % (dB). Higher variances suggest more dynamics ineach segment.
100 % ---
101 % NOTE: the above information cannot be extracted from the MTT
102 % Features, maybe more recent echonest features allow for this
103 % ---
104
105 % loudness: overall loudness estimate of the track (dB).
106 data.mttstats.loudness = rawf.data.loudness;
107
108 % ---
109 % TODO: get these from the beat loundesses?
110 % ---
111
112 % tempo: overall track tempo estimate (in beat per minute,BPM). Doubling and halving errors are possible.
113 data.mttstats.tempo = rawf.data.tempo;
114
115 % tempoConfidence: a measure of the con?dence of the tempo estimate (beween 0 and 1).
116 %data.mttstats.tempoConfidence = rawf.data.tempoConfidence;
117
118 beats = rawf.data.beats;
119 tatums = rawf.data.tatums;
120
121 % beatVariance: ameasure of the regularity of the beat (secs).
122 if numel(beats) > 0
123 bdiff = diff(beats(1,:));
124 data.mttstats.beatVariance = var(bdiff);
125 else
126
127 % ---
128 % This is a facke repolacement variance
129 % ---
130 data.mttstats.beatVariance = 0;
131 end
132
133
134 % tatum: estimated overall tatum duration (in seconds). Tatums are subdivisions of the beat.
135 % ---
136 % note: the tatum length could be also
137 % accessed by comparison with the global bpm estimate
138 % ---
139 if numel(tatums) > 0
140 tdiff = diff(tatums(1,:));
141 data.mttstats.tatum = median(tdiff);
142
143 % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1).
144 data.mttstats.tatumConfidence = mean(tatums(2,:));
145
146 % numTatumsPerBeat: number of tatums per beat
147 data.mttstats.numTatumsPerBeat = median(bdiff) / data.mttstats.tatum;
148 else
149 % ---
150 % This is a facke replacement tatum
151 % TODO: maybe set confidence to -1?
152 % ---
153
154 data.mttstats.tatum = 0;
155
156 % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1).
157
158 data.mttstats.tatumConfidence = 0;
159
160 % numTatumsPerBeat: number of tatums per beat
161 data.mttstats.numTatumsPerBeat = 2;
162 end
163
164
165 % ---
166 % TODO: beat analysis
167 % ---
168
169 % timeSignature: estimated time signature (number of beats per measure). (0-7 / 7)
170 data.mttstats.timeSignature = rawf.data.timeSignature;
171
172 % timeSignatureStability: a rough estimate of the stability of the time signature throughout the track
173 data.mttstats.timeSignatureStability = rawf.data.timeSignatureConfidence;
174
175 % ---
176 % prepare field for final features
177 % ---
178 data.final.vector = [];
179 data.final.vector_info = struct();
180 data.final.dim = 0;
181
182 % save info data
183 data.info.type = 'MTTAudioFeatureSlaney08';
184 data.info.owner_id = clip.id;
185 data.info.creatorrev = feature.my_revision;
186
187 % save parameters
188 data.info.params = feature.my_params;
189 end
190
191 function define_global_transform(features)
192 % calculate and set normalization factors from the group of
193 % input features. These features will be set for the full database
194
195 for i = 1:numel(features)
196 data = features(i).data.mttstats;
197
198 final(:,i) = [data.segmentDurationMean; ...
199 data.segmentDurationVariance; ...
200 data.timeLoudnessMaxMean; ...
201 data.loudnessMaxMean; ...
202 data.loudnessMaxVariance; ...
203 data.loudnessBeginMean; ...
204 data.loudnessBeginVariance; ...
205 data.loudness; ...
206 data.tempo; ...
207 ... % data.tempoConfidence; ...
208 data.beatVariance; ...
209 data.tatum; ...
210 data.tatumConfidence; ...
211 data.numTatumsPerBeat; ...
212 data.timeSignature; ...
213 data.timeSignatureStability];
214 end
215
216 if features(1).my_params.norm_mttstats
217 if numel(features) == 1
218 error ('Insert feature array for this method, or set normalisation to 0');
219 end
220
221 % ---
222 % here, we only need to define the post-normalisation
223 % ---
224 [final, pstd] = mapminmax(final,0,1);
225 common.mttstats.pre_norm = pstd;
226
227 % ---
228 % NOTE: whitening as in slaney??
229 % Would make reading the
230 % mahal matrices really hard
231 % ---
232
233 features(1).my_db.set_common(common);
234
235 else
236
237 features(1).my_db.set_common([1]);
238 end
239
240 % save the normalised features straight away!
241 features.finalise(final);
242 end
243
244
245 function finalise(features, final)
246 % applies a final transformation and
247 % collects the information of this feature within a single vector
248 % see info for types in specific dimensions
249 % check if features have been finalised already
250
251 % ---
252 % check for dummy feature
253 % ---
254 if isfield(features(1).my_params,'select_mttstats') && ...
255 isnumeric(features(1).my_params.select_mttstats) && ...
256 features(1).my_params.select_mttstats == 0
257
258 % if no information needed just fill everything 0
259 for i = 1:numel(features)
260 features(i).data.final.vector = [];
261 features(i).data.final.dim = 0;
262
263 % fill up info struct and append to feature
264 features(i).data.final.vector_info.labels = {};
265 end
266
267 return;
268 end
269
270 % ---
271 % set feature labelling
272 % ---
273 info = {'segmentDurationMean', ...
274 'segmentDurationVariance', ...
275 'timeLoudnessMaxMean', ...
276 'loudnessMaxMean', ...
277 'loudnessMaxVariance', ...
278 'loudnessBeginMean', ...
279 'loudnessBeginVariance', ...
280 'loudness', ...
281 'tempo', ...
282 ...% 'tempoConfidence', ...
283 'beatVariance', ...
284 'tatum', ...
285 'tatumConfidence', ...
286 'numTatumsPerBeat', ...
287 'timeSignature', ...
288 'timeSignatureStability'};
289
290 % ---
291 % construct resulting feature vector out of features
292 % ---
293 if nargin == 2 && isempty(final)
294
295 % the final vector etc already are set to zero;
296 return;
297
298 elseif nargin == 2 && (numel(features) == size(final, 2))
299 for i = 1:numel(features)
300
301 % check for neccesary parameters
302 if isempty(features(i).my_db.commondb)
303
304 error('Define the global transformation first')
305 return;
306 end
307
308 features(i).data.final.vector = final(:,i);
309 features(i).data.final.dim = size(final,1);
310
311 % fill up info struct and append to feature
312 features(i).data.final.vector_info.labels = info;
313 end
314 else
315 % ---
316 % if features have been added after gettin gnormalisation
317 % parameters, ther should be still an option to include
318 % them
319 % ---
320
321 for i = 1:numel(features)
322
323 % check for neccesary parameters
324 if isempty(features(i).my_db.commondb)
325
326 error('Define the global transformation first')
327 return;
328 end
329
330 data = features(i).data.mttstats;
331 final = [data.segmentDurationMean; ...
332 data.segmentDurationVariance; ...
333 data.timeLoudnessMaxMean; ...
334 data.loudnessMaxMean; ...
335 data.loudnessMaxVariance; ...
336 data.loudnessBeginMean; ...
337 data.loudnessBeginVariance; ...
338 data.loudness; ...
339 data.tempo; ...
340 ... % data.tempoConfidence; ...
341 data.beatVariance; ...
342 data.tatum; ...
343 data.tatumConfidence; ...
344 data.numTatumsPerBeat; ...
345 data.timeSignature; ...
346 data.timeSignatureStability];
347
348 if features(1).my_params.norm_mttstats == 1
349
350 [final] = mapminmax('apply', final, features(1).common.mttstats.pre_norm);
351 end
352
353 features(i).data.final.vector = final;
354 features(i).data.final.dim = size(final,1);
355
356 % fill up info struct and append to feature
357 features(i).data.final.vector_info.labels = info;
358 end
359
360 end
361
362 % ---
363 % TODO: Maybe delete more basic features again at this point?
364 % ---
365 end
366
367 % ---
368 % destructor: do we really want to remove this
369 % from the database? No, but
370 % TODO: create marker for unused objects in db, and a cleanup
371 % function
372 % ---
373 function delete(feature)
374
375 end
376 end
377 end