Mercurial > hg > camir-aes2014
comparison core/magnatagatune/MTTAudioFeatureSlaney08.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 classdef MTTAudioFeatureSlaney08 < MTTAudioFeature & handle | |
2 % --- | |
3 % This Class contains | |
4 % a basic summary of MTT features complementary to those in | |
5 % MTTAudioFeatureBasicSm, features are extracted | |
6 % as described in Slaney 08 - LEARNING A METRIC FOR MUSIC SIMILARITY | |
7 % | |
8 % The usual workflow for these features constist of three steps | |
9 % 1. extract: extracts the basic single-file dependent features | |
10 % 2. define_global_transform: calculates the global feature | |
11 % transformation parameters | |
12 % 3. finalise: applies the common transformations to a specific feature | |
13 % --- | |
14 | |
15 properties(Constant = true) | |
16 | |
17 % svn hook | |
18 my_revision = str2double(substr('$Rev$', 5, -1)); | |
19 end | |
20 | |
21 properties | |
22 % --- | |
23 % Set default parameters | |
24 % --- | |
25 my_params = struct(... | |
26 'norm_mttstats', 1, ... % | |
27 'whiten_mttstats', 0, ... % NOTE: whitening as in slaney?? | |
28 'select_mttstats', 1 ...% TODO: way to select certain features | |
29 ); | |
30 end | |
31 | |
32 % --- | |
33 % member functions | |
34 % --- | |
35 methods | |
36 | |
37 % --- | |
38 % constructor: pointer to feature in database | |
39 % --- | |
40 function feature = MTTAudioFeatureSlaney08(varargin) | |
41 | |
42 feature = feature@MTTAudioFeature(varargin{:}); | |
43 | |
44 end | |
45 % --- | |
46 % extract feature data from raw audio features | |
47 % --- | |
48 function data = extract(feature, clip) | |
49 % --- | |
50 % get features. this includes possible | |
51 % local normalisations | |
52 % --- | |
53 | |
54 global globalvars; | |
55 | |
56 % --- | |
57 % get casimir child clip if available | |
58 % --- | |
59 if isa(clip, 'CASIMIRClip') | |
60 baseclip = clip.child_clip(); | |
61 else | |
62 baseclip = clip; | |
63 end | |
64 if isa(baseclip, 'MTTClip') | |
65 rawf = baseclip.audio_features_raw(); | |
66 elseif isa(baseclip, 'MSDClip') | |
67 rawf = baseclip.features('MSDAudioFeatureRAW'); | |
68 end | |
69 | |
70 | |
71 % --- | |
72 % TODO: implement time_weighted version of the statistical | |
73 % evaluations below | |
74 % --- | |
75 | |
76 % segmentDurationMean: mean segment duration (sec.). | |
77 data.mttstats.segmentDurationMean = mean(rawf.data.segments_duration); | |
78 | |
79 % segmentDurationVariance: variance of the segment duration | |
80 data.mttstats.segmentDurationVariance = var(rawf.data.segments_duration); | |
81 | |
82 % timeLoudnessMaxMean: mean time to the segment maximum, or attack duration (sec.). | |
83 data.mttstats.timeLoudnessMaxMean = mean(rawf.data.segments_loudness_max_time); | |
84 | |
85 % loudnessMaxMean: mean of segments’ maximum loudness(dB). | |
86 data.mttstats.loudnessMaxMean = mean(rawf.data.segments_loudness_max); | |
87 | |
88 % loudnessMaxVariance: variance of the segments’ maximum loudness (dB). | |
89 data.mttstats.loudnessMaxVariance = var(rawf.data.segments_loudness_max); | |
90 | |
91 % loudnessBeginMean: average loudness at the start of segments (dB) | |
92 data.mttstats.loudnessBeginMean = mean(rawf.data.segments_loudness); | |
93 | |
94 % loudnessBeginVariance: variance of the loudness at the startof segments (dB2). Correlated with loudnessMaxVariance | |
95 data.mttstats.loudnessBeginVariance = var(rawf.data.segments_loudness); | |
96 | |
97 % loudnessDynamicsMean: average of overall dynamic rangein the segments (dB). | |
98 % loudnessDynamicsVariance: segment dynamic range variance | |
99 % (dB). Higher variances suggest more dynamics ineach segment. | |
100 % --- | |
101 % NOTE: the above information cannot be extracted from the MTT | |
102 % Features, maybe more recent echonest features allow for this | |
103 % --- | |
104 | |
105 % loudness: overall loudness estimate of the track (dB). | |
106 data.mttstats.loudness = rawf.data.loudness; | |
107 | |
108 % --- | |
109 % TODO: get these from the beat loundesses? | |
110 % --- | |
111 | |
112 % tempo: overall track tempo estimate (in beat per minute,BPM). Doubling and halving errors are possible. | |
113 data.mttstats.tempo = rawf.data.tempo; | |
114 | |
115 % tempoConfidence: a measure of the con?dence of the tempo estimate (beween 0 and 1). | |
116 %data.mttstats.tempoConfidence = rawf.data.tempoConfidence; | |
117 | |
118 beats = rawf.data.beats; | |
119 tatums = rawf.data.tatums; | |
120 | |
121 % beatVariance: ameasure of the regularity of the beat (secs). | |
122 if numel(beats) > 0 | |
123 bdiff = diff(beats(1,:)); | |
124 data.mttstats.beatVariance = var(bdiff); | |
125 else | |
126 | |
127 % --- | |
128 % This is a facke repolacement variance | |
129 % --- | |
130 data.mttstats.beatVariance = 0; | |
131 end | |
132 | |
133 | |
134 % tatum: estimated overall tatum duration (in seconds). Tatums are subdivisions of the beat. | |
135 % --- | |
136 % note: the tatum length could be also | |
137 % accessed by comparison with the global bpm estimate | |
138 % --- | |
139 if numel(tatums) > 0 | |
140 tdiff = diff(tatums(1,:)); | |
141 data.mttstats.tatum = median(tdiff); | |
142 | |
143 % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1). | |
144 data.mttstats.tatumConfidence = mean(tatums(2,:)); | |
145 | |
146 % numTatumsPerBeat: number of tatums per beat | |
147 data.mttstats.numTatumsPerBeat = median(bdiff) / data.mttstats.tatum; | |
148 else | |
149 % --- | |
150 % This is a facke replacement tatum | |
151 % TODO: maybe set confidence to -1? | |
152 % --- | |
153 | |
154 data.mttstats.tatum = 0; | |
155 | |
156 % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1). | |
157 | |
158 data.mttstats.tatumConfidence = 0; | |
159 | |
160 % numTatumsPerBeat: number of tatums per beat | |
161 data.mttstats.numTatumsPerBeat = 2; | |
162 end | |
163 | |
164 | |
165 % --- | |
166 % TODO: beat analysis | |
167 % --- | |
168 | |
169 % timeSignature: estimated time signature (number of beats per measure). (0-7 / 7) | |
170 data.mttstats.timeSignature = rawf.data.timeSignature; | |
171 | |
172 % timeSignatureStability: a rough estimate of the stability of the time signature throughout the track | |
173 data.mttstats.timeSignatureStability = rawf.data.timeSignatureConfidence; | |
174 | |
175 % --- | |
176 % prepare field for final features | |
177 % --- | |
178 data.final.vector = []; | |
179 data.final.vector_info = struct(); | |
180 data.final.dim = 0; | |
181 | |
182 % save info data | |
183 data.info.type = 'MTTAudioFeatureSlaney08'; | |
184 data.info.owner_id = clip.id; | |
185 data.info.creatorrev = feature.my_revision; | |
186 | |
187 % save parameters | |
188 data.info.params = feature.my_params; | |
189 end | |
190 | |
191 function define_global_transform(features) | |
192 % calculate and set normalization factors from the group of | |
193 % input features. These features will be set for the full database | |
194 | |
195 for i = 1:numel(features) | |
196 data = features(i).data.mttstats; | |
197 | |
198 final(:,i) = [data.segmentDurationMean; ... | |
199 data.segmentDurationVariance; ... | |
200 data.timeLoudnessMaxMean; ... | |
201 data.loudnessMaxMean; ... | |
202 data.loudnessMaxVariance; ... | |
203 data.loudnessBeginMean; ... | |
204 data.loudnessBeginVariance; ... | |
205 data.loudness; ... | |
206 data.tempo; ... | |
207 ... % data.tempoConfidence; ... | |
208 data.beatVariance; ... | |
209 data.tatum; ... | |
210 data.tatumConfidence; ... | |
211 data.numTatumsPerBeat; ... | |
212 data.timeSignature; ... | |
213 data.timeSignatureStability]; | |
214 end | |
215 | |
216 if features(1).my_params.norm_mttstats | |
217 if numel(features) == 1 | |
218 error ('Insert feature array for this method, or set normalisation to 0'); | |
219 end | |
220 | |
221 % --- | |
222 % here, we only need to define the post-normalisation | |
223 % --- | |
224 [final, pstd] = mapminmax(final,0,1); | |
225 common.mttstats.pre_norm = pstd; | |
226 | |
227 % --- | |
228 % NOTE: whitening as in slaney?? | |
229 % Would make reading the | |
230 % mahal matrices really hard | |
231 % --- | |
232 | |
233 features(1).my_db.set_common(common); | |
234 | |
235 else | |
236 | |
237 features(1).my_db.set_common([1]); | |
238 end | |
239 | |
240 % save the normalised features straight away! | |
241 features.finalise(final); | |
242 end | |
243 | |
244 | |
245 function finalise(features, final) | |
246 % applies a final transformation and | |
247 % collects the information of this feature within a single vector | |
248 % see info for types in specific dimensions | |
249 % check if features have been finalised already | |
250 | |
251 % --- | |
252 % check for dummy feature | |
253 % --- | |
254 if isfield(features(1).my_params,'select_mttstats') && ... | |
255 isnumeric(features(1).my_params.select_mttstats) && ... | |
256 features(1).my_params.select_mttstats == 0 | |
257 | |
258 % if no information needed just fill everything 0 | |
259 for i = 1:numel(features) | |
260 features(i).data.final.vector = []; | |
261 features(i).data.final.dim = 0; | |
262 | |
263 % fill up info struct and append to feature | |
264 features(i).data.final.vector_info.labels = {}; | |
265 end | |
266 | |
267 return; | |
268 end | |
269 | |
270 % --- | |
271 % set feature labelling | |
272 % --- | |
273 info = {'segmentDurationMean', ... | |
274 'segmentDurationVariance', ... | |
275 'timeLoudnessMaxMean', ... | |
276 'loudnessMaxMean', ... | |
277 'loudnessMaxVariance', ... | |
278 'loudnessBeginMean', ... | |
279 'loudnessBeginVariance', ... | |
280 'loudness', ... | |
281 'tempo', ... | |
282 ...% 'tempoConfidence', ... | |
283 'beatVariance', ... | |
284 'tatum', ... | |
285 'tatumConfidence', ... | |
286 'numTatumsPerBeat', ... | |
287 'timeSignature', ... | |
288 'timeSignatureStability'}; | |
289 | |
290 % --- | |
291 % construct resulting feature vector out of features | |
292 % --- | |
293 if nargin == 2 && isempty(final) | |
294 | |
295 % the final vector etc already are set to zero; | |
296 return; | |
297 | |
298 elseif nargin == 2 && (numel(features) == size(final, 2)) | |
299 for i = 1:numel(features) | |
300 | |
301 % check for neccesary parameters | |
302 if isempty(features(i).my_db.commondb) | |
303 | |
304 error('Define the global transformation first') | |
305 return; | |
306 end | |
307 | |
308 features(i).data.final.vector = final(:,i); | |
309 features(i).data.final.dim = size(final,1); | |
310 | |
311 % fill up info struct and append to feature | |
312 features(i).data.final.vector_info.labels = info; | |
313 end | |
314 else | |
315 % --- | |
316 % if features have been added after gettin gnormalisation | |
317 % parameters, ther should be still an option to include | |
318 % them | |
319 % --- | |
320 | |
321 for i = 1:numel(features) | |
322 | |
323 % check for neccesary parameters | |
324 if isempty(features(i).my_db.commondb) | |
325 | |
326 error('Define the global transformation first') | |
327 return; | |
328 end | |
329 | |
330 data = features(i).data.mttstats; | |
331 final = [data.segmentDurationMean; ... | |
332 data.segmentDurationVariance; ... | |
333 data.timeLoudnessMaxMean; ... | |
334 data.loudnessMaxMean; ... | |
335 data.loudnessMaxVariance; ... | |
336 data.loudnessBeginMean; ... | |
337 data.loudnessBeginVariance; ... | |
338 data.loudness; ... | |
339 data.tempo; ... | |
340 ... % data.tempoConfidence; ... | |
341 data.beatVariance; ... | |
342 data.tatum; ... | |
343 data.tatumConfidence; ... | |
344 data.numTatumsPerBeat; ... | |
345 data.timeSignature; ... | |
346 data.timeSignatureStability]; | |
347 | |
348 if features(1).my_params.norm_mttstats == 1 | |
349 | |
350 [final] = mapminmax('apply', final, features(1).common.mttstats.pre_norm); | |
351 end | |
352 | |
353 features(i).data.final.vector = final; | |
354 features(i).data.final.dim = size(final,1); | |
355 | |
356 % fill up info struct and append to feature | |
357 features(i).data.final.vector_info.labels = info; | |
358 end | |
359 | |
360 end | |
361 | |
362 % --- | |
363 % TODO: Maybe delete more basic features again at this point? | |
364 % --- | |
365 end | |
366 | |
367 % --- | |
368 % destructor: do we really want to remove this | |
369 % from the database? No, but | |
370 % TODO: create marker for unused objects in db, and a cleanup | |
371 % function | |
372 % --- | |
373 function delete(feature) | |
374 | |
375 end | |
376 end | |
377 end |