wolffd@0
|
1 classdef MTTAudioFeatureSlaney08 < MTTAudioFeature & handle
|
wolffd@0
|
2 % ---
|
wolffd@0
|
3 % This Class contains
|
wolffd@0
|
4 % a basic summary of MTT features complementary to those in
|
wolffd@0
|
5 % MTTAudioFeatureBasicSm, features are extracted
|
wolffd@0
|
6 % as described in Slaney 08 - LEARNING A METRIC FOR MUSIC SIMILARITY
|
wolffd@0
|
7 %
|
wolffd@0
|
8 % The usual workflow for these features constist of three steps
|
wolffd@0
|
9 % 1. extract: extracts the basic single-file dependent features
|
wolffd@0
|
10 % 2. define_global_transform: calculates the global feature
|
wolffd@0
|
11 % transformation parameters
|
wolffd@0
|
12 % 3. finalise: applies the common transformations to a specific feature
|
wolffd@0
|
13 % ---
|
wolffd@0
|
14
|
wolffd@0
|
15 properties(Constant = true)
|
wolffd@0
|
16
|
wolffd@0
|
17 % svn hook
|
wolffd@0
|
18 my_revision = str2double(substr('$Rev$', 5, -1));
|
wolffd@0
|
19 end
|
wolffd@0
|
20
|
wolffd@0
|
21 properties
|
wolffd@0
|
22 % ---
|
wolffd@0
|
23 % Set default parameters
|
wolffd@0
|
24 % ---
|
wolffd@0
|
25 my_params = struct(...
|
wolffd@0
|
26 'norm_mttstats', 1, ... %
|
wolffd@0
|
27 'whiten_mttstats', 0, ... % NOTE: whitening as in slaney??
|
wolffd@0
|
28 'select_mttstats', 1 ...% TODO: way to select certain features
|
wolffd@0
|
29 );
|
wolffd@0
|
30 end
|
wolffd@0
|
31
|
wolffd@0
|
32 % ---
|
wolffd@0
|
33 % member functions
|
wolffd@0
|
34 % ---
|
wolffd@0
|
35 methods
|
wolffd@0
|
36
|
wolffd@0
|
37 % ---
|
wolffd@0
|
38 % constructor: pointer to feature in database
|
wolffd@0
|
39 % ---
|
wolffd@0
|
40 function feature = MTTAudioFeatureSlaney08(varargin)
|
wolffd@0
|
41
|
wolffd@0
|
42 feature = feature@MTTAudioFeature(varargin{:});
|
wolffd@0
|
43
|
wolffd@0
|
44 end
|
wolffd@0
|
45 % ---
|
wolffd@0
|
46 % extract feature data from raw audio features
|
wolffd@0
|
47 % ---
|
wolffd@0
|
48 function data = extract(feature, clip)
|
wolffd@0
|
49 % ---
|
wolffd@0
|
50 % get features. this includes possible
|
wolffd@0
|
51 % local normalisations
|
wolffd@0
|
52 % ---
|
wolffd@0
|
53
|
wolffd@0
|
54 global globalvars;
|
wolffd@0
|
55
|
wolffd@0
|
56 % ---
|
wolffd@0
|
57 % get casimir child clip if available
|
wolffd@0
|
58 % ---
|
wolffd@0
|
59 if isa(clip, 'CASIMIRClip')
|
wolffd@0
|
60 baseclip = clip.child_clip();
|
wolffd@0
|
61 else
|
wolffd@0
|
62 baseclip = clip;
|
wolffd@0
|
63 end
|
wolffd@0
|
64 if isa(baseclip, 'MTTClip')
|
wolffd@0
|
65 rawf = baseclip.audio_features_raw();
|
wolffd@0
|
66 elseif isa(baseclip, 'MSDClip')
|
wolffd@0
|
67 rawf = baseclip.features('MSDAudioFeatureRAW');
|
wolffd@0
|
68 end
|
wolffd@0
|
69
|
wolffd@0
|
70
|
wolffd@0
|
71 % ---
|
wolffd@0
|
72 % TODO: implement time_weighted version of the statistical
|
wolffd@0
|
73 % evaluations below
|
wolffd@0
|
74 % ---
|
wolffd@0
|
75
|
wolffd@0
|
76 % segmentDurationMean: mean segment duration (sec.).
|
wolffd@0
|
77 data.mttstats.segmentDurationMean = mean(rawf.data.segments_duration);
|
wolffd@0
|
78
|
wolffd@0
|
79 % segmentDurationVariance: variance of the segment duration
|
wolffd@0
|
80 data.mttstats.segmentDurationVariance = var(rawf.data.segments_duration);
|
wolffd@0
|
81
|
wolffd@0
|
82 % timeLoudnessMaxMean: mean time to the segment maximum, or attack duration (sec.).
|
wolffd@0
|
83 data.mttstats.timeLoudnessMaxMean = mean(rawf.data.segments_loudness_max_time);
|
wolffd@0
|
84
|
wolffd@0
|
85 % loudnessMaxMean: mean of segments’ maximum loudness(dB).
|
wolffd@0
|
86 data.mttstats.loudnessMaxMean = mean(rawf.data.segments_loudness_max);
|
wolffd@0
|
87
|
wolffd@0
|
88 % loudnessMaxVariance: variance of the segments’ maximum loudness (dB).
|
wolffd@0
|
89 data.mttstats.loudnessMaxVariance = var(rawf.data.segments_loudness_max);
|
wolffd@0
|
90
|
wolffd@0
|
91 % loudnessBeginMean: average loudness at the start of segments (dB)
|
wolffd@0
|
92 data.mttstats.loudnessBeginMean = mean(rawf.data.segments_loudness);
|
wolffd@0
|
93
|
wolffd@0
|
94 % loudnessBeginVariance: variance of the loudness at the startof segments (dB2). Correlated with loudnessMaxVariance
|
wolffd@0
|
95 data.mttstats.loudnessBeginVariance = var(rawf.data.segments_loudness);
|
wolffd@0
|
96
|
wolffd@0
|
97 % loudnessDynamicsMean: average of overall dynamic rangein the segments (dB).
|
wolffd@0
|
98 % loudnessDynamicsVariance: segment dynamic range variance
|
wolffd@0
|
99 % (dB). Higher variances suggest more dynamics ineach segment.
|
wolffd@0
|
100 % ---
|
wolffd@0
|
101 % NOTE: the above information cannot be extracted from the MTT
|
wolffd@0
|
102 % Features, maybe more recent echonest features allow for this
|
wolffd@0
|
103 % ---
|
wolffd@0
|
104
|
wolffd@0
|
105 % loudness: overall loudness estimate of the track (dB).
|
wolffd@0
|
106 data.mttstats.loudness = rawf.data.loudness;
|
wolffd@0
|
107
|
wolffd@0
|
108 % ---
|
wolffd@0
|
109 % TODO: get these from the beat loundesses?
|
wolffd@0
|
110 % ---
|
wolffd@0
|
111
|
wolffd@0
|
112 % tempo: overall track tempo estimate (in beat per minute,BPM). Doubling and halving errors are possible.
|
wolffd@0
|
113 data.mttstats.tempo = rawf.data.tempo;
|
wolffd@0
|
114
|
wolffd@0
|
115 % tempoConfidence: a measure of the con?dence of the tempo estimate (beween 0 and 1).
|
wolffd@0
|
116 %data.mttstats.tempoConfidence = rawf.data.tempoConfidence;
|
wolffd@0
|
117
|
wolffd@0
|
118 beats = rawf.data.beats;
|
wolffd@0
|
119 tatums = rawf.data.tatums;
|
wolffd@0
|
120
|
wolffd@0
|
121 % beatVariance: ameasure of the regularity of the beat (secs).
|
wolffd@0
|
122 if numel(beats) > 0
|
wolffd@0
|
123 bdiff = diff(beats(1,:));
|
wolffd@0
|
124 data.mttstats.beatVariance = var(bdiff);
|
wolffd@0
|
125 else
|
wolffd@0
|
126
|
wolffd@0
|
127 % ---
|
wolffd@0
|
128 % This is a facke repolacement variance
|
wolffd@0
|
129 % ---
|
wolffd@0
|
130 data.mttstats.beatVariance = 0;
|
wolffd@0
|
131 end
|
wolffd@0
|
132
|
wolffd@0
|
133
|
wolffd@0
|
134 % tatum: estimated overall tatum duration (in seconds). Tatums are subdivisions of the beat.
|
wolffd@0
|
135 % ---
|
wolffd@0
|
136 % note: the tatum length could be also
|
wolffd@0
|
137 % accessed by comparison with the global bpm estimate
|
wolffd@0
|
138 % ---
|
wolffd@0
|
139 if numel(tatums) > 0
|
wolffd@0
|
140 tdiff = diff(tatums(1,:));
|
wolffd@0
|
141 data.mttstats.tatum = median(tdiff);
|
wolffd@0
|
142
|
wolffd@0
|
143 % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1).
|
wolffd@0
|
144 data.mttstats.tatumConfidence = mean(tatums(2,:));
|
wolffd@0
|
145
|
wolffd@0
|
146 % numTatumsPerBeat: number of tatums per beat
|
wolffd@0
|
147 data.mttstats.numTatumsPerBeat = median(bdiff) / data.mttstats.tatum;
|
wolffd@0
|
148 else
|
wolffd@0
|
149 % ---
|
wolffd@0
|
150 % This is a facke replacement tatum
|
wolffd@0
|
151 % TODO: maybe set confidence to -1?
|
wolffd@0
|
152 % ---
|
wolffd@0
|
153
|
wolffd@0
|
154 data.mttstats.tatum = 0;
|
wolffd@0
|
155
|
wolffd@0
|
156 % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1).
|
wolffd@0
|
157
|
wolffd@0
|
158 data.mttstats.tatumConfidence = 0;
|
wolffd@0
|
159
|
wolffd@0
|
160 % numTatumsPerBeat: number of tatums per beat
|
wolffd@0
|
161 data.mttstats.numTatumsPerBeat = 2;
|
wolffd@0
|
162 end
|
wolffd@0
|
163
|
wolffd@0
|
164
|
wolffd@0
|
165 % ---
|
wolffd@0
|
166 % TODO: beat analysis
|
wolffd@0
|
167 % ---
|
wolffd@0
|
168
|
wolffd@0
|
169 % timeSignature: estimated time signature (number of beats per measure). (0-7 / 7)
|
wolffd@0
|
170 data.mttstats.timeSignature = rawf.data.timeSignature;
|
wolffd@0
|
171
|
wolffd@0
|
172 % timeSignatureStability: a rough estimate of the stability of the time signature throughout the track
|
wolffd@0
|
173 data.mttstats.timeSignatureStability = rawf.data.timeSignatureConfidence;
|
wolffd@0
|
174
|
wolffd@0
|
175 % ---
|
wolffd@0
|
176 % prepare field for final features
|
wolffd@0
|
177 % ---
|
wolffd@0
|
178 data.final.vector = [];
|
wolffd@0
|
179 data.final.vector_info = struct();
|
wolffd@0
|
180 data.final.dim = 0;
|
wolffd@0
|
181
|
wolffd@0
|
182 % save info data
|
wolffd@0
|
183 data.info.type = 'MTTAudioFeatureSlaney08';
|
wolffd@0
|
184 data.info.owner_id = clip.id;
|
wolffd@0
|
185 data.info.creatorrev = feature.my_revision;
|
wolffd@0
|
186
|
wolffd@0
|
187 % save parameters
|
wolffd@0
|
188 data.info.params = feature.my_params;
|
wolffd@0
|
189 end
|
wolffd@0
|
190
|
wolffd@0
|
191 function define_global_transform(features)
|
wolffd@0
|
192 % calculate and set normalization factors from the group of
|
wolffd@0
|
193 % input features. These features will be set for the full database
|
wolffd@0
|
194
|
wolffd@0
|
195 for i = 1:numel(features)
|
wolffd@0
|
196 data = features(i).data.mttstats;
|
wolffd@0
|
197
|
wolffd@0
|
198 final(:,i) = [data.segmentDurationMean; ...
|
wolffd@0
|
199 data.segmentDurationVariance; ...
|
wolffd@0
|
200 data.timeLoudnessMaxMean; ...
|
wolffd@0
|
201 data.loudnessMaxMean; ...
|
wolffd@0
|
202 data.loudnessMaxVariance; ...
|
wolffd@0
|
203 data.loudnessBeginMean; ...
|
wolffd@0
|
204 data.loudnessBeginVariance; ...
|
wolffd@0
|
205 data.loudness; ...
|
wolffd@0
|
206 data.tempo; ...
|
wolffd@0
|
207 ... % data.tempoConfidence; ...
|
wolffd@0
|
208 data.beatVariance; ...
|
wolffd@0
|
209 data.tatum; ...
|
wolffd@0
|
210 data.tatumConfidence; ...
|
wolffd@0
|
211 data.numTatumsPerBeat; ...
|
wolffd@0
|
212 data.timeSignature; ...
|
wolffd@0
|
213 data.timeSignatureStability];
|
wolffd@0
|
214 end
|
wolffd@0
|
215
|
wolffd@0
|
216 if features(1).my_params.norm_mttstats
|
wolffd@0
|
217 if numel(features) == 1
|
wolffd@0
|
218 error ('Insert feature array for this method, or set normalisation to 0');
|
wolffd@0
|
219 end
|
wolffd@0
|
220
|
wolffd@0
|
221 % ---
|
wolffd@0
|
222 % here, we only need to define the post-normalisation
|
wolffd@0
|
223 % ---
|
wolffd@0
|
224 [final, pstd] = mapminmax(final,0,1);
|
wolffd@0
|
225 common.mttstats.pre_norm = pstd;
|
wolffd@0
|
226
|
wolffd@0
|
227 % ---
|
wolffd@0
|
228 % NOTE: whitening as in slaney??
|
wolffd@0
|
229 % Would make reading the
|
wolffd@0
|
230 % mahal matrices really hard
|
wolffd@0
|
231 % ---
|
wolffd@0
|
232
|
wolffd@0
|
233 features(1).my_db.set_common(common);
|
wolffd@0
|
234
|
wolffd@0
|
235 else
|
wolffd@0
|
236
|
wolffd@0
|
237 features(1).my_db.set_common([1]);
|
wolffd@0
|
238 end
|
wolffd@0
|
239
|
wolffd@0
|
240 % save the normalised features straight away!
|
wolffd@0
|
241 features.finalise(final);
|
wolffd@0
|
242 end
|
wolffd@0
|
243
|
wolffd@0
|
244
|
wolffd@0
|
245 function finalise(features, final)
|
wolffd@0
|
246 % applies a final transformation and
|
wolffd@0
|
247 % collects the information of this feature within a single vector
|
wolffd@0
|
248 % see info for types in specific dimensions
|
wolffd@0
|
249 % check if features have been finalised already
|
wolffd@0
|
250
|
wolffd@0
|
251 % ---
|
wolffd@0
|
252 % check for dummy feature
|
wolffd@0
|
253 % ---
|
wolffd@0
|
254 if isfield(features(1).my_params,'select_mttstats') && ...
|
wolffd@0
|
255 isnumeric(features(1).my_params.select_mttstats) && ...
|
wolffd@0
|
256 features(1).my_params.select_mttstats == 0
|
wolffd@0
|
257
|
wolffd@0
|
258 % if no information needed just fill everything 0
|
wolffd@0
|
259 for i = 1:numel(features)
|
wolffd@0
|
260 features(i).data.final.vector = [];
|
wolffd@0
|
261 features(i).data.final.dim = 0;
|
wolffd@0
|
262
|
wolffd@0
|
263 % fill up info struct and append to feature
|
wolffd@0
|
264 features(i).data.final.vector_info.labels = {};
|
wolffd@0
|
265 end
|
wolffd@0
|
266
|
wolffd@0
|
267 return;
|
wolffd@0
|
268 end
|
wolffd@0
|
269
|
wolffd@0
|
270 % ---
|
wolffd@0
|
271 % set feature labelling
|
wolffd@0
|
272 % ---
|
wolffd@0
|
273 info = {'segmentDurationMean', ...
|
wolffd@0
|
274 'segmentDurationVariance', ...
|
wolffd@0
|
275 'timeLoudnessMaxMean', ...
|
wolffd@0
|
276 'loudnessMaxMean', ...
|
wolffd@0
|
277 'loudnessMaxVariance', ...
|
wolffd@0
|
278 'loudnessBeginMean', ...
|
wolffd@0
|
279 'loudnessBeginVariance', ...
|
wolffd@0
|
280 'loudness', ...
|
wolffd@0
|
281 'tempo', ...
|
wolffd@0
|
282 ...% 'tempoConfidence', ...
|
wolffd@0
|
283 'beatVariance', ...
|
wolffd@0
|
284 'tatum', ...
|
wolffd@0
|
285 'tatumConfidence', ...
|
wolffd@0
|
286 'numTatumsPerBeat', ...
|
wolffd@0
|
287 'timeSignature', ...
|
wolffd@0
|
288 'timeSignatureStability'};
|
wolffd@0
|
289
|
wolffd@0
|
290 % ---
|
wolffd@0
|
291 % construct resulting feature vector out of features
|
wolffd@0
|
292 % ---
|
wolffd@0
|
293 if nargin == 2 && isempty(final)
|
wolffd@0
|
294
|
wolffd@0
|
295 % the final vector etc already are set to zero;
|
wolffd@0
|
296 return;
|
wolffd@0
|
297
|
wolffd@0
|
298 elseif nargin == 2 && (numel(features) == size(final, 2))
|
wolffd@0
|
299 for i = 1:numel(features)
|
wolffd@0
|
300
|
wolffd@0
|
301 % check for neccesary parameters
|
wolffd@0
|
302 if isempty(features(i).my_db.commondb)
|
wolffd@0
|
303
|
wolffd@0
|
304 error('Define the global transformation first')
|
wolffd@0
|
305 return;
|
wolffd@0
|
306 end
|
wolffd@0
|
307
|
wolffd@0
|
308 features(i).data.final.vector = final(:,i);
|
wolffd@0
|
309 features(i).data.final.dim = size(final,1);
|
wolffd@0
|
310
|
wolffd@0
|
311 % fill up info struct and append to feature
|
wolffd@0
|
312 features(i).data.final.vector_info.labels = info;
|
wolffd@0
|
313 end
|
wolffd@0
|
314 else
|
wolffd@0
|
315 % ---
|
wolffd@0
|
316 % if features have been added after gettin gnormalisation
|
wolffd@0
|
317 % parameters, ther should be still an option to include
|
wolffd@0
|
318 % them
|
wolffd@0
|
319 % ---
|
wolffd@0
|
320
|
wolffd@0
|
321 for i = 1:numel(features)
|
wolffd@0
|
322
|
wolffd@0
|
323 % check for neccesary parameters
|
wolffd@0
|
324 if isempty(features(i).my_db.commondb)
|
wolffd@0
|
325
|
wolffd@0
|
326 error('Define the global transformation first')
|
wolffd@0
|
327 return;
|
wolffd@0
|
328 end
|
wolffd@0
|
329
|
wolffd@0
|
330 data = features(i).data.mttstats;
|
wolffd@0
|
331 final = [data.segmentDurationMean; ...
|
wolffd@0
|
332 data.segmentDurationVariance; ...
|
wolffd@0
|
333 data.timeLoudnessMaxMean; ...
|
wolffd@0
|
334 data.loudnessMaxMean; ...
|
wolffd@0
|
335 data.loudnessMaxVariance; ...
|
wolffd@0
|
336 data.loudnessBeginMean; ...
|
wolffd@0
|
337 data.loudnessBeginVariance; ...
|
wolffd@0
|
338 data.loudness; ...
|
wolffd@0
|
339 data.tempo; ...
|
wolffd@0
|
340 ... % data.tempoConfidence; ...
|
wolffd@0
|
341 data.beatVariance; ...
|
wolffd@0
|
342 data.tatum; ...
|
wolffd@0
|
343 data.tatumConfidence; ...
|
wolffd@0
|
344 data.numTatumsPerBeat; ...
|
wolffd@0
|
345 data.timeSignature; ...
|
wolffd@0
|
346 data.timeSignatureStability];
|
wolffd@0
|
347
|
wolffd@0
|
348 if features(1).my_params.norm_mttstats == 1
|
wolffd@0
|
349
|
wolffd@0
|
350 [final] = mapminmax('apply', final, features(1).common.mttstats.pre_norm);
|
wolffd@0
|
351 end
|
wolffd@0
|
352
|
wolffd@0
|
353 features(i).data.final.vector = final;
|
wolffd@0
|
354 features(i).data.final.dim = size(final,1);
|
wolffd@0
|
355
|
wolffd@0
|
356 % fill up info struct and append to feature
|
wolffd@0
|
357 features(i).data.final.vector_info.labels = info;
|
wolffd@0
|
358 end
|
wolffd@0
|
359
|
wolffd@0
|
360 end
|
wolffd@0
|
361
|
wolffd@0
|
362 % ---
|
wolffd@0
|
363 % TODO: Maybe delete more basic features again at this point?
|
wolffd@0
|
364 % ---
|
wolffd@0
|
365 end
|
wolffd@0
|
366
|
wolffd@0
|
367 % ---
|
wolffd@0
|
368 % destructor: do we really want to remove this
|
wolffd@0
|
369 % from the database? No, but
|
wolffd@0
|
370 % TODO: create marker for unused objects in db, and a cleanup
|
wolffd@0
|
371 % function
|
wolffd@0
|
372 % ---
|
wolffd@0
|
373 function delete(feature)
|
wolffd@0
|
374
|
wolffd@0
|
375 end
|
wolffd@0
|
376 end
|
wolffd@0
|
377 end |