annotate core/magnatagatune/MTTAudioFeatureSlaney08.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 classdef MTTAudioFeatureSlaney08 < MTTAudioFeature & handle
wolffd@0 2 % ---
wolffd@0 3 % This Class contains
wolffd@0 4 % a basic summary of MTT features complementary to those in
wolffd@0 5 % MTTAudioFeatureBasicSm, features are extracted
wolffd@0 6 % as described in Slaney 08 - LEARNING A METRIC FOR MUSIC SIMILARITY
wolffd@0 7 %
wolffd@0 8 % The usual workflow for these features constist of three steps
wolffd@0 9 % 1. extract: extracts the basic single-file dependent features
wolffd@0 10 % 2. define_global_transform: calculates the global feature
wolffd@0 11 % transformation parameters
wolffd@0 12 % 3. finalise: applies the common transformations to a specific feature
wolffd@0 13 % ---
wolffd@0 14
wolffd@0 15 properties(Constant = true)
wolffd@0 16
wolffd@0 17 % svn hook
wolffd@0 18 my_revision = str2double(substr('$Rev$', 5, -1));
wolffd@0 19 end
wolffd@0 20
wolffd@0 21 properties
wolffd@0 22 % ---
wolffd@0 23 % Set default parameters
wolffd@0 24 % ---
wolffd@0 25 my_params = struct(...
wolffd@0 26 'norm_mttstats', 1, ... %
wolffd@0 27 'whiten_mttstats', 0, ... % NOTE: whitening as in slaney??
wolffd@0 28 'select_mttstats', 1 ...% TODO: way to select certain features
wolffd@0 29 );
wolffd@0 30 end
wolffd@0 31
wolffd@0 32 % ---
wolffd@0 33 % member functions
wolffd@0 34 % ---
wolffd@0 35 methods
wolffd@0 36
wolffd@0 37 % ---
wolffd@0 38 % constructor: pointer to feature in database
wolffd@0 39 % ---
wolffd@0 40 function feature = MTTAudioFeatureSlaney08(varargin)
wolffd@0 41
wolffd@0 42 feature = feature@MTTAudioFeature(varargin{:});
wolffd@0 43
wolffd@0 44 end
wolffd@0 45 % ---
wolffd@0 46 % extract feature data from raw audio features
wolffd@0 47 % ---
wolffd@0 48 function data = extract(feature, clip)
wolffd@0 49 % ---
wolffd@0 50 % get features. this includes possible
wolffd@0 51 % local normalisations
wolffd@0 52 % ---
wolffd@0 53
wolffd@0 54 global globalvars;
wolffd@0 55
wolffd@0 56 % ---
wolffd@0 57 % get casimir child clip if available
wolffd@0 58 % ---
wolffd@0 59 if isa(clip, 'CASIMIRClip')
wolffd@0 60 baseclip = clip.child_clip();
wolffd@0 61 else
wolffd@0 62 baseclip = clip;
wolffd@0 63 end
wolffd@0 64 if isa(baseclip, 'MTTClip')
wolffd@0 65 rawf = baseclip.audio_features_raw();
wolffd@0 66 elseif isa(baseclip, 'MSDClip')
wolffd@0 67 rawf = baseclip.features('MSDAudioFeatureRAW');
wolffd@0 68 end
wolffd@0 69
wolffd@0 70
wolffd@0 71 % ---
wolffd@0 72 % TODO: implement time_weighted version of the statistical
wolffd@0 73 % evaluations below
wolffd@0 74 % ---
wolffd@0 75
wolffd@0 76 % segmentDurationMean: mean segment duration (sec.).
wolffd@0 77 data.mttstats.segmentDurationMean = mean(rawf.data.segments_duration);
wolffd@0 78
wolffd@0 79 % segmentDurationVariance: variance of the segment duration
wolffd@0 80 data.mttstats.segmentDurationVariance = var(rawf.data.segments_duration);
wolffd@0 81
wolffd@0 82 % timeLoudnessMaxMean: mean time to the segment maximum, or attack duration (sec.).
wolffd@0 83 data.mttstats.timeLoudnessMaxMean = mean(rawf.data.segments_loudness_max_time);
wolffd@0 84
wolffd@0 85 % loudnessMaxMean: mean of segments’ maximum loudness(dB).
wolffd@0 86 data.mttstats.loudnessMaxMean = mean(rawf.data.segments_loudness_max);
wolffd@0 87
wolffd@0 88 % loudnessMaxVariance: variance of the segments’ maximum loudness (dB).
wolffd@0 89 data.mttstats.loudnessMaxVariance = var(rawf.data.segments_loudness_max);
wolffd@0 90
wolffd@0 91 % loudnessBeginMean: average loudness at the start of segments (dB)
wolffd@0 92 data.mttstats.loudnessBeginMean = mean(rawf.data.segments_loudness);
wolffd@0 93
wolffd@0 94 % loudnessBeginVariance: variance of the loudness at the startof segments (dB2). Correlated with loudnessMaxVariance
wolffd@0 95 data.mttstats.loudnessBeginVariance = var(rawf.data.segments_loudness);
wolffd@0 96
wolffd@0 97 % loudnessDynamicsMean: average of overall dynamic rangein the segments (dB).
wolffd@0 98 % loudnessDynamicsVariance: segment dynamic range variance
wolffd@0 99 % (dB). Higher variances suggest more dynamics ineach segment.
wolffd@0 100 % ---
wolffd@0 101 % NOTE: the above information cannot be extracted from the MTT
wolffd@0 102 % Features, maybe more recent echonest features allow for this
wolffd@0 103 % ---
wolffd@0 104
wolffd@0 105 % loudness: overall loudness estimate of the track (dB).
wolffd@0 106 data.mttstats.loudness = rawf.data.loudness;
wolffd@0 107
wolffd@0 108 % ---
wolffd@0 109 % TODO: get these from the beat loundesses?
wolffd@0 110 % ---
wolffd@0 111
wolffd@0 112 % tempo: overall track tempo estimate (in beat per minute,BPM). Doubling and halving errors are possible.
wolffd@0 113 data.mttstats.tempo = rawf.data.tempo;
wolffd@0 114
wolffd@0 115 % tempoConfidence: a measure of the con?dence of the tempo estimate (beween 0 and 1).
wolffd@0 116 %data.mttstats.tempoConfidence = rawf.data.tempoConfidence;
wolffd@0 117
wolffd@0 118 beats = rawf.data.beats;
wolffd@0 119 tatums = rawf.data.tatums;
wolffd@0 120
wolffd@0 121 % beatVariance: ameasure of the regularity of the beat (secs).
wolffd@0 122 if numel(beats) > 0
wolffd@0 123 bdiff = diff(beats(1,:));
wolffd@0 124 data.mttstats.beatVariance = var(bdiff);
wolffd@0 125 else
wolffd@0 126
wolffd@0 127 % ---
wolffd@0 128 % This is a facke repolacement variance
wolffd@0 129 % ---
wolffd@0 130 data.mttstats.beatVariance = 0;
wolffd@0 131 end
wolffd@0 132
wolffd@0 133
wolffd@0 134 % tatum: estimated overall tatum duration (in seconds). Tatums are subdivisions of the beat.
wolffd@0 135 % ---
wolffd@0 136 % note: the tatum length could be also
wolffd@0 137 % accessed by comparison with the global bpm estimate
wolffd@0 138 % ---
wolffd@0 139 if numel(tatums) > 0
wolffd@0 140 tdiff = diff(tatums(1,:));
wolffd@0 141 data.mttstats.tatum = median(tdiff);
wolffd@0 142
wolffd@0 143 % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1).
wolffd@0 144 data.mttstats.tatumConfidence = mean(tatums(2,:));
wolffd@0 145
wolffd@0 146 % numTatumsPerBeat: number of tatums per beat
wolffd@0 147 data.mttstats.numTatumsPerBeat = median(bdiff) / data.mttstats.tatum;
wolffd@0 148 else
wolffd@0 149 % ---
wolffd@0 150 % This is a facke replacement tatum
wolffd@0 151 % TODO: maybe set confidence to -1?
wolffd@0 152 % ---
wolffd@0 153
wolffd@0 154 data.mttstats.tatum = 0;
wolffd@0 155
wolffd@0 156 % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1).
wolffd@0 157
wolffd@0 158 data.mttstats.tatumConfidence = 0;
wolffd@0 159
wolffd@0 160 % numTatumsPerBeat: number of tatums per beat
wolffd@0 161 data.mttstats.numTatumsPerBeat = 2;
wolffd@0 162 end
wolffd@0 163
wolffd@0 164
wolffd@0 165 % ---
wolffd@0 166 % TODO: beat analysis
wolffd@0 167 % ---
wolffd@0 168
wolffd@0 169 % timeSignature: estimated time signature (number of beats per measure). (0-7 / 7)
wolffd@0 170 data.mttstats.timeSignature = rawf.data.timeSignature;
wolffd@0 171
wolffd@0 172 % timeSignatureStability: a rough estimate of the stability of the time signature throughout the track
wolffd@0 173 data.mttstats.timeSignatureStability = rawf.data.timeSignatureConfidence;
wolffd@0 174
wolffd@0 175 % ---
wolffd@0 176 % prepare field for final features
wolffd@0 177 % ---
wolffd@0 178 data.final.vector = [];
wolffd@0 179 data.final.vector_info = struct();
wolffd@0 180 data.final.dim = 0;
wolffd@0 181
wolffd@0 182 % save info data
wolffd@0 183 data.info.type = 'MTTAudioFeatureSlaney08';
wolffd@0 184 data.info.owner_id = clip.id;
wolffd@0 185 data.info.creatorrev = feature.my_revision;
wolffd@0 186
wolffd@0 187 % save parameters
wolffd@0 188 data.info.params = feature.my_params;
wolffd@0 189 end
wolffd@0 190
wolffd@0 191 function define_global_transform(features)
wolffd@0 192 % calculate and set normalization factors from the group of
wolffd@0 193 % input features. These features will be set for the full database
wolffd@0 194
wolffd@0 195 for i = 1:numel(features)
wolffd@0 196 data = features(i).data.mttstats;
wolffd@0 197
wolffd@0 198 final(:,i) = [data.segmentDurationMean; ...
wolffd@0 199 data.segmentDurationVariance; ...
wolffd@0 200 data.timeLoudnessMaxMean; ...
wolffd@0 201 data.loudnessMaxMean; ...
wolffd@0 202 data.loudnessMaxVariance; ...
wolffd@0 203 data.loudnessBeginMean; ...
wolffd@0 204 data.loudnessBeginVariance; ...
wolffd@0 205 data.loudness; ...
wolffd@0 206 data.tempo; ...
wolffd@0 207 ... % data.tempoConfidence; ...
wolffd@0 208 data.beatVariance; ...
wolffd@0 209 data.tatum; ...
wolffd@0 210 data.tatumConfidence; ...
wolffd@0 211 data.numTatumsPerBeat; ...
wolffd@0 212 data.timeSignature; ...
wolffd@0 213 data.timeSignatureStability];
wolffd@0 214 end
wolffd@0 215
wolffd@0 216 if features(1).my_params.norm_mttstats
wolffd@0 217 if numel(features) == 1
wolffd@0 218 error ('Insert feature array for this method, or set normalisation to 0');
wolffd@0 219 end
wolffd@0 220
wolffd@0 221 % ---
wolffd@0 222 % here, we only need to define the post-normalisation
wolffd@0 223 % ---
wolffd@0 224 [final, pstd] = mapminmax(final,0,1);
wolffd@0 225 common.mttstats.pre_norm = pstd;
wolffd@0 226
wolffd@0 227 % ---
wolffd@0 228 % NOTE: whitening as in slaney??
wolffd@0 229 % Would make reading the
wolffd@0 230 % mahal matrices really hard
wolffd@0 231 % ---
wolffd@0 232
wolffd@0 233 features(1).my_db.set_common(common);
wolffd@0 234
wolffd@0 235 else
wolffd@0 236
wolffd@0 237 features(1).my_db.set_common([1]);
wolffd@0 238 end
wolffd@0 239
wolffd@0 240 % save the normalised features straight away!
wolffd@0 241 features.finalise(final);
wolffd@0 242 end
wolffd@0 243
wolffd@0 244
wolffd@0 245 function finalise(features, final)
wolffd@0 246 % applies a final transformation and
wolffd@0 247 % collects the information of this feature within a single vector
wolffd@0 248 % see info for types in specific dimensions
wolffd@0 249 % check if features have been finalised already
wolffd@0 250
wolffd@0 251 % ---
wolffd@0 252 % check for dummy feature
wolffd@0 253 % ---
wolffd@0 254 if isfield(features(1).my_params,'select_mttstats') && ...
wolffd@0 255 isnumeric(features(1).my_params.select_mttstats) && ...
wolffd@0 256 features(1).my_params.select_mttstats == 0
wolffd@0 257
wolffd@0 258 % if no information needed just fill everything 0
wolffd@0 259 for i = 1:numel(features)
wolffd@0 260 features(i).data.final.vector = [];
wolffd@0 261 features(i).data.final.dim = 0;
wolffd@0 262
wolffd@0 263 % fill up info struct and append to feature
wolffd@0 264 features(i).data.final.vector_info.labels = {};
wolffd@0 265 end
wolffd@0 266
wolffd@0 267 return;
wolffd@0 268 end
wolffd@0 269
wolffd@0 270 % ---
wolffd@0 271 % set feature labelling
wolffd@0 272 % ---
wolffd@0 273 info = {'segmentDurationMean', ...
wolffd@0 274 'segmentDurationVariance', ...
wolffd@0 275 'timeLoudnessMaxMean', ...
wolffd@0 276 'loudnessMaxMean', ...
wolffd@0 277 'loudnessMaxVariance', ...
wolffd@0 278 'loudnessBeginMean', ...
wolffd@0 279 'loudnessBeginVariance', ...
wolffd@0 280 'loudness', ...
wolffd@0 281 'tempo', ...
wolffd@0 282 ...% 'tempoConfidence', ...
wolffd@0 283 'beatVariance', ...
wolffd@0 284 'tatum', ...
wolffd@0 285 'tatumConfidence', ...
wolffd@0 286 'numTatumsPerBeat', ...
wolffd@0 287 'timeSignature', ...
wolffd@0 288 'timeSignatureStability'};
wolffd@0 289
wolffd@0 290 % ---
wolffd@0 291 % construct resulting feature vector out of features
wolffd@0 292 % ---
wolffd@0 293 if nargin == 2 && isempty(final)
wolffd@0 294
wolffd@0 295 % the final vector etc already are set to zero;
wolffd@0 296 return;
wolffd@0 297
wolffd@0 298 elseif nargin == 2 && (numel(features) == size(final, 2))
wolffd@0 299 for i = 1:numel(features)
wolffd@0 300
wolffd@0 301 % check for neccesary parameters
wolffd@0 302 if isempty(features(i).my_db.commondb)
wolffd@0 303
wolffd@0 304 error('Define the global transformation first')
wolffd@0 305 return;
wolffd@0 306 end
wolffd@0 307
wolffd@0 308 features(i).data.final.vector = final(:,i);
wolffd@0 309 features(i).data.final.dim = size(final,1);
wolffd@0 310
wolffd@0 311 % fill up info struct and append to feature
wolffd@0 312 features(i).data.final.vector_info.labels = info;
wolffd@0 313 end
wolffd@0 314 else
wolffd@0 315 % ---
wolffd@0 316 % if features have been added after gettin gnormalisation
wolffd@0 317 % parameters, ther should be still an option to include
wolffd@0 318 % them
wolffd@0 319 % ---
wolffd@0 320
wolffd@0 321 for i = 1:numel(features)
wolffd@0 322
wolffd@0 323 % check for neccesary parameters
wolffd@0 324 if isempty(features(i).my_db.commondb)
wolffd@0 325
wolffd@0 326 error('Define the global transformation first')
wolffd@0 327 return;
wolffd@0 328 end
wolffd@0 329
wolffd@0 330 data = features(i).data.mttstats;
wolffd@0 331 final = [data.segmentDurationMean; ...
wolffd@0 332 data.segmentDurationVariance; ...
wolffd@0 333 data.timeLoudnessMaxMean; ...
wolffd@0 334 data.loudnessMaxMean; ...
wolffd@0 335 data.loudnessMaxVariance; ...
wolffd@0 336 data.loudnessBeginMean; ...
wolffd@0 337 data.loudnessBeginVariance; ...
wolffd@0 338 data.loudness; ...
wolffd@0 339 data.tempo; ...
wolffd@0 340 ... % data.tempoConfidence; ...
wolffd@0 341 data.beatVariance; ...
wolffd@0 342 data.tatum; ...
wolffd@0 343 data.tatumConfidence; ...
wolffd@0 344 data.numTatumsPerBeat; ...
wolffd@0 345 data.timeSignature; ...
wolffd@0 346 data.timeSignatureStability];
wolffd@0 347
wolffd@0 348 if features(1).my_params.norm_mttstats == 1
wolffd@0 349
wolffd@0 350 [final] = mapminmax('apply', final, features(1).common.mttstats.pre_norm);
wolffd@0 351 end
wolffd@0 352
wolffd@0 353 features(i).data.final.vector = final;
wolffd@0 354 features(i).data.final.dim = size(final,1);
wolffd@0 355
wolffd@0 356 % fill up info struct and append to feature
wolffd@0 357 features(i).data.final.vector_info.labels = info;
wolffd@0 358 end
wolffd@0 359
wolffd@0 360 end
wolffd@0 361
wolffd@0 362 % ---
wolffd@0 363 % TODO: Maybe delete more basic features again at this point?
wolffd@0 364 % ---
wolffd@0 365 end
wolffd@0 366
wolffd@0 367 % ---
wolffd@0 368 % destructor: do we really want to remove this
wolffd@0 369 % from the database? No, but
wolffd@0 370 % TODO: create marker for unused objects in db, and a cleanup
wolffd@0 371 % function
wolffd@0 372 % ---
wolffd@0 373 function delete(feature)
wolffd@0 374
wolffd@0 375 end
wolffd@0 376 end
wolffd@0 377 end