camir-aes2014: core/magnatagatune/MTTAudioFeatureSlaney08.m annotate

annotate core/magnatagatune/MTTAudioFeatureSlaney08.m @ 0:e9a9cd732c1e tip

first hg version after svn

author	wolffd
date	Tue, 10 Feb 2015 15:05:51 +0000
parents
children

rev	line source
wolffd@0	1 classdef MTTAudioFeatureSlaney08 < MTTAudioFeature & handle
wolffd@0	2 % ---
wolffd@0	3 % This Class contains
wolffd@0	4 % a basic summary of MTT features complementary to those in
wolffd@0	5 % MTTAudioFeatureBasicSm, features are extracted
wolffd@0	6 % as described in Slaney 08 - LEARNING A METRIC FOR MUSIC SIMILARITY
wolffd@0	7 %
wolffd@0	8 % The usual workflow for these features constist of three steps
wolffd@0	9 % 1. extract: extracts the basic single-file dependent features
wolffd@0	10 % 2. define_global_transform: calculates the global feature
wolffd@0	11 % transformation parameters
wolffd@0	12 % 3. finalise: applies the common transformations to a specific feature
wolffd@0	13 % ---
wolffd@0	14
wolffd@0	15 properties(Constant = true)
wolffd@0	16
wolffd@0	17 % svn hook
wolffd@0	18 my_revision = str2double(substr('$Rev$', 5, -1));
wolffd@0	19 end
wolffd@0	20
wolffd@0	21 properties
wolffd@0	22 % ---
wolffd@0	23 % Set default parameters
wolffd@0	24 % ---
wolffd@0	25 my_params = struct(...
wolffd@0	26 'norm_mttstats', 1, ... %
wolffd@0	27 'whiten_mttstats', 0, ... % NOTE: whitening as in slaney??
wolffd@0	28 'select_mttstats', 1 ...% TODO: way to select certain features
wolffd@0	29 );
wolffd@0	30 end
wolffd@0	31
wolffd@0	32 % ---
wolffd@0	33 % member functions
wolffd@0	34 % ---
wolffd@0	35 methods
wolffd@0	36
wolffd@0	37 % ---
wolffd@0	38 % constructor: pointer to feature in database
wolffd@0	39 % ---
wolffd@0	40 function feature = MTTAudioFeatureSlaney08(varargin)
wolffd@0	41
wolffd@0	42 feature = feature@MTTAudioFeature(varargin{:});
wolffd@0	43
wolffd@0	44 end
wolffd@0	45 % ---
wolffd@0	46 % extract feature data from raw audio features
wolffd@0	47 % ---
wolffd@0	48 function data = extract(feature, clip)
wolffd@0	49 % ---
wolffd@0	50 % get features. this includes possible
wolffd@0	51 % local normalisations
wolffd@0	52 % ---
wolffd@0	53
wolffd@0	54 global globalvars;
wolffd@0	55
wolffd@0	56 % ---
wolffd@0	57 % get casimir child clip if available
wolffd@0	58 % ---
wolffd@0	59 if isa(clip, 'CASIMIRClip')
wolffd@0	60 baseclip = clip.child_clip();
wolffd@0	61 else
wolffd@0	62 baseclip = clip;
wolffd@0	63 end
wolffd@0	64 if isa(baseclip, 'MTTClip')
wolffd@0	65 rawf = baseclip.audio_features_raw();
wolffd@0	66 elseif isa(baseclip, 'MSDClip')
wolffd@0	67 rawf = baseclip.features('MSDAudioFeatureRAW');
wolffd@0	68 end
wolffd@0	69
wolffd@0	70
wolffd@0	71 % ---
wolffd@0	72 % TODO: implement time_weighted version of the statistical
wolffd@0	73 % evaluations below
wolffd@0	74 % ---
wolffd@0	75
wolffd@0	76 % segmentDurationMean: mean segment duration (sec.).
wolffd@0	77 data.mttstats.segmentDurationMean = mean(rawf.data.segments_duration);
wolffd@0	78
wolffd@0	79 % segmentDurationVariance: variance of the segment duration
wolffd@0	80 data.mttstats.segmentDurationVariance = var(rawf.data.segments_duration);
wolffd@0	81
wolffd@0	82 % timeLoudnessMaxMean: mean time to the segment maximum, or attack duration (sec.).
wolffd@0	83 data.mttstats.timeLoudnessMaxMean = mean(rawf.data.segments_loudness_max_time);
wolffd@0	84
wolffd@0	85 % loudnessMaxMean: mean of segments� maximum loudness(dB).
wolffd@0	86 data.mttstats.loudnessMaxMean = mean(rawf.data.segments_loudness_max);
wolffd@0	87
wolffd@0	88 % loudnessMaxVariance: variance of the segments� maximum loudness (dB).
wolffd@0	89 data.mttstats.loudnessMaxVariance = var(rawf.data.segments_loudness_max);
wolffd@0	90
wolffd@0	91 % loudnessBeginMean: average loudness at the start of segments (dB)
wolffd@0	92 data.mttstats.loudnessBeginMean = mean(rawf.data.segments_loudness);
wolffd@0	93
wolffd@0	94 % loudnessBeginVariance: variance of the loudness at the startof segments (dB2). Correlated with loudnessMaxVariance
wolffd@0	95 data.mttstats.loudnessBeginVariance = var(rawf.data.segments_loudness);
wolffd@0	96
wolffd@0	97 % loudnessDynamicsMean: average of overall dynamic rangein the segments (dB).
wolffd@0	98 % loudnessDynamicsVariance: segment dynamic range variance
wolffd@0	99 % (dB). Higher variances suggest more dynamics ineach segment.
wolffd@0	100 % ---
wolffd@0	101 % NOTE: the above information cannot be extracted from the MTT
wolffd@0	102 % Features, maybe more recent echonest features allow for this
wolffd@0	103 % ---
wolffd@0	104
wolffd@0	105 % loudness: overall loudness estimate of the track (dB).
wolffd@0	106 data.mttstats.loudness = rawf.data.loudness;
wolffd@0	107
wolffd@0	108 % ---
wolffd@0	109 % TODO: get these from the beat loundesses?
wolffd@0	110 % ---
wolffd@0	111
wolffd@0	112 % tempo: overall track tempo estimate (in beat per minute,BPM). Doubling and halving errors are possible.
wolffd@0	113 data.mttstats.tempo = rawf.data.tempo;
wolffd@0	114
wolffd@0	115 % tempoConfidence: a measure of the con?dence of the tempo estimate (beween 0 and 1).
wolffd@0	116 %data.mttstats.tempoConfidence = rawf.data.tempoConfidence;
wolffd@0	117
wolffd@0	118 beats = rawf.data.beats;
wolffd@0	119 tatums = rawf.data.tatums;
wolffd@0	120
wolffd@0	121 % beatVariance: ameasure of the regularity of the beat (secs).
wolffd@0	122 if numel(beats) > 0
wolffd@0	123 bdiff = diff(beats(1,:));
wolffd@0	124 data.mttstats.beatVariance = var(bdiff);
wolffd@0	125 else
wolffd@0	126
wolffd@0	127 % ---
wolffd@0	128 % This is a facke repolacement variance
wolffd@0	129 % ---
wolffd@0	130 data.mttstats.beatVariance = 0;
wolffd@0	131 end
wolffd@0	132
wolffd@0	133
wolffd@0	134 % tatum: estimated overall tatum duration (in seconds). Tatums are subdivisions of the beat.
wolffd@0	135 % ---
wolffd@0	136 % note: the tatum length could be also
wolffd@0	137 % accessed by comparison with the global bpm estimate
wolffd@0	138 % ---
wolffd@0	139 if numel(tatums) > 0
wolffd@0	140 tdiff = diff(tatums(1,:));
wolffd@0	141 data.mttstats.tatum = median(tdiff);
wolffd@0	142
wolffd@0	143 % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1).
wolffd@0	144 data.mttstats.tatumConfidence = mean(tatums(2,:));
wolffd@0	145
wolffd@0	146 % numTatumsPerBeat: number of tatums per beat
wolffd@0	147 data.mttstats.numTatumsPerBeat = median(bdiff) / data.mttstats.tatum;
wolffd@0	148 else
wolffd@0	149 % ---
wolffd@0	150 % This is a facke replacement tatum
wolffd@0	151 % TODO: maybe set confidence to -1?
wolffd@0	152 % ---
wolffd@0	153
wolffd@0	154 data.mttstats.tatum = 0;
wolffd@0	155
wolffd@0	156 % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1).
wolffd@0	157
wolffd@0	158 data.mttstats.tatumConfidence = 0;
wolffd@0	159
wolffd@0	160 % numTatumsPerBeat: number of tatums per beat
wolffd@0	161 data.mttstats.numTatumsPerBeat = 2;
wolffd@0	162 end
wolffd@0	163
wolffd@0	164
wolffd@0	165 % ---
wolffd@0	166 % TODO: beat analysis
wolffd@0	167 % ---
wolffd@0	168
wolffd@0	169 % timeSignature: estimated time signature (number of beats per measure). (0-7 / 7)
wolffd@0	170 data.mttstats.timeSignature = rawf.data.timeSignature;
wolffd@0	171
wolffd@0	172 % timeSignatureStability: a rough estimate of the stability of the time signature throughout the track
wolffd@0	173 data.mttstats.timeSignatureStability = rawf.data.timeSignatureConfidence;
wolffd@0	174
wolffd@0	175 % ---
wolffd@0	176 % prepare field for final features
wolffd@0	177 % ---
wolffd@0	178 data.final.vector = [];
wolffd@0	179 data.final.vector_info = struct();
wolffd@0	180 data.final.dim = 0;
wolffd@0	181
wolffd@0	182 % save info data
wolffd@0	183 data.info.type = 'MTTAudioFeatureSlaney08';
wolffd@0	184 data.info.owner_id = clip.id;
wolffd@0	185 data.info.creatorrev = feature.my_revision;
wolffd@0	186
wolffd@0	187 % save parameters
wolffd@0	188 data.info.params = feature.my_params;
wolffd@0	189 end
wolffd@0	190
wolffd@0	191 function define_global_transform(features)
wolffd@0	192 % calculate and set normalization factors from the group of
wolffd@0	193 % input features. These features will be set for the full database
wolffd@0	194
wolffd@0	195 for i = 1:numel(features)
wolffd@0	196 data = features(i).data.mttstats;
wolffd@0	197
wolffd@0	198 final(:,i) = [data.segmentDurationMean; ...
wolffd@0	199 data.segmentDurationVariance; ...
wolffd@0	200 data.timeLoudnessMaxMean; ...
wolffd@0	201 data.loudnessMaxMean; ...
wolffd@0	202 data.loudnessMaxVariance; ...
wolffd@0	203 data.loudnessBeginMean; ...
wolffd@0	204 data.loudnessBeginVariance; ...
wolffd@0	205 data.loudness; ...
wolffd@0	206 data.tempo; ...
wolffd@0	207 ... % data.tempoConfidence; ...
wolffd@0	208 data.beatVariance; ...
wolffd@0	209 data.tatum; ...
wolffd@0	210 data.tatumConfidence; ...
wolffd@0	211 data.numTatumsPerBeat; ...
wolffd@0	212 data.timeSignature; ...
wolffd@0	213 data.timeSignatureStability];
wolffd@0	214 end
wolffd@0	215
wolffd@0	216 if features(1).my_params.norm_mttstats
wolffd@0	217 if numel(features) == 1
wolffd@0	218 error ('Insert feature array for this method, or set normalisation to 0');
wolffd@0	219 end
wolffd@0	220
wolffd@0	221 % ---
wolffd@0	222 % here, we only need to define the post-normalisation
wolffd@0	223 % ---
wolffd@0	224 [final, pstd] = mapminmax(final,0,1);
wolffd@0	225 common.mttstats.pre_norm = pstd;
wolffd@0	226
wolffd@0	227 % ---
wolffd@0	228 % NOTE: whitening as in slaney??
wolffd@0	229 % Would make reading the
wolffd@0	230 % mahal matrices really hard
wolffd@0	231 % ---
wolffd@0	232
wolffd@0	233 features(1).my_db.set_common(common);
wolffd@0	234
wolffd@0	235 else
wolffd@0	236
wolffd@0	237 features(1).my_db.set_common([1]);
wolffd@0	238 end
wolffd@0	239
wolffd@0	240 % save the normalised features straight away!
wolffd@0	241 features.finalise(final);
wolffd@0	242 end
wolffd@0	243
wolffd@0	244
wolffd@0	245 function finalise(features, final)
wolffd@0	246 % applies a final transformation and
wolffd@0	247 % collects the information of this feature within a single vector
wolffd@0	248 % see info for types in specific dimensions
wolffd@0	249 % check if features have been finalised already
wolffd@0	250
wolffd@0	251 % ---
wolffd@0	252 % check for dummy feature
wolffd@0	253 % ---
wolffd@0	254 if isfield(features(1).my_params,'select_mttstats') && ...
wolffd@0	255 isnumeric(features(1).my_params.select_mttstats) && ...
wolffd@0	256 features(1).my_params.select_mttstats == 0
wolffd@0	257
wolffd@0	258 % if no information needed just fill everything 0
wolffd@0	259 for i = 1:numel(features)
wolffd@0	260 features(i).data.final.vector = [];
wolffd@0	261 features(i).data.final.dim = 0;
wolffd@0	262
wolffd@0	263 % fill up info struct and append to feature
wolffd@0	264 features(i).data.final.vector_info.labels = {};
wolffd@0	265 end
wolffd@0	266
wolffd@0	267 return;
wolffd@0	268 end
wolffd@0	269
wolffd@0	270 % ---
wolffd@0	271 % set feature labelling
wolffd@0	272 % ---
wolffd@0	273 info = {'segmentDurationMean', ...
wolffd@0	274 'segmentDurationVariance', ...
wolffd@0	275 'timeLoudnessMaxMean', ...
wolffd@0	276 'loudnessMaxMean', ...
wolffd@0	277 'loudnessMaxVariance', ...
wolffd@0	278 'loudnessBeginMean', ...
wolffd@0	279 'loudnessBeginVariance', ...
wolffd@0	280 'loudness', ...
wolffd@0	281 'tempo', ...
wolffd@0	282 ...% 'tempoConfidence', ...
wolffd@0	283 'beatVariance', ...
wolffd@0	284 'tatum', ...
wolffd@0	285 'tatumConfidence', ...
wolffd@0	286 'numTatumsPerBeat', ...
wolffd@0	287 'timeSignature', ...
wolffd@0	288 'timeSignatureStability'};
wolffd@0	289
wolffd@0	290 % ---
wolffd@0	291 % construct resulting feature vector out of features
wolffd@0	292 % ---
wolffd@0	293 if nargin == 2 && isempty(final)
wolffd@0	294
wolffd@0	295 % the final vector etc already are set to zero;
wolffd@0	296 return;
wolffd@0	297
wolffd@0	298 elseif nargin == 2 && (numel(features) == size(final, 2))
wolffd@0	299 for i = 1:numel(features)
wolffd@0	300
wolffd@0	301 % check for neccesary parameters
wolffd@0	302 if isempty(features(i).my_db.commondb)
wolffd@0	303
wolffd@0	304 error('Define the global transformation first')
wolffd@0	305 return;
wolffd@0	306 end
wolffd@0	307
wolffd@0	308 features(i).data.final.vector = final(:,i);
wolffd@0	309 features(i).data.final.dim = size(final,1);
wolffd@0	310
wolffd@0	311 % fill up info struct and append to feature
wolffd@0	312 features(i).data.final.vector_info.labels = info;
wolffd@0	313 end
wolffd@0	314 else
wolffd@0	315 % ---
wolffd@0	316 % if features have been added after gettin gnormalisation
wolffd@0	317 % parameters, ther should be still an option to include
wolffd@0	318 % them
wolffd@0	319 % ---
wolffd@0	320
wolffd@0	321 for i = 1:numel(features)
wolffd@0	322
wolffd@0	323 % check for neccesary parameters
wolffd@0	324 if isempty(features(i).my_db.commondb)
wolffd@0	325
wolffd@0	326 error('Define the global transformation first')
wolffd@0	327 return;
wolffd@0	328 end
wolffd@0	329
wolffd@0	330 data = features(i).data.mttstats;
wolffd@0	331 final = [data.segmentDurationMean; ...
wolffd@0	332 data.segmentDurationVariance; ...
wolffd@0	333 data.timeLoudnessMaxMean; ...
wolffd@0	334 data.loudnessMaxMean; ...
wolffd@0	335 data.loudnessMaxVariance; ...
wolffd@0	336 data.loudnessBeginMean; ...
wolffd@0	337 data.loudnessBeginVariance; ...
wolffd@0	338 data.loudness; ...
wolffd@0	339 data.tempo; ...
wolffd@0	340 ... % data.tempoConfidence; ...
wolffd@0	341 data.beatVariance; ...
wolffd@0	342 data.tatum; ...
wolffd@0	343 data.tatumConfidence; ...
wolffd@0	344 data.numTatumsPerBeat; ...
wolffd@0	345 data.timeSignature; ...
wolffd@0	346 data.timeSignatureStability];
wolffd@0	347
wolffd@0	348 if features(1).my_params.norm_mttstats == 1
wolffd@0	349
wolffd@0	350 [final] = mapminmax('apply', final, features(1).common.mttstats.pre_norm);
wolffd@0	351 end
wolffd@0	352
wolffd@0	353 features(i).data.final.vector = final;
wolffd@0	354 features(i).data.final.dim = size(final,1);
wolffd@0	355
wolffd@0	356 % fill up info struct and append to feature
wolffd@0	357 features(i).data.final.vector_info.labels = info;
wolffd@0	358 end
wolffd@0	359
wolffd@0	360 end
wolffd@0	361
wolffd@0	362 % ---
wolffd@0	363 % TODO: Maybe delete more basic features again at this point?
wolffd@0	364 % ---
wolffd@0	365 end
wolffd@0	366
wolffd@0	367 % ---
wolffd@0	368 % destructor: do we really want to remove this
wolffd@0	369 % from the database? No, but
wolffd@0	370 % TODO: create marker for unused objects in db, and a cleanup
wolffd@0	371 % function
wolffd@0	372 % ---
wolffd@0	373 function delete(feature)
wolffd@0	374
wolffd@0	375 end
wolffd@0	376 end
wolffd@0	377 end

Mercurial > hg > camir-aes2014

annotate core/magnatagatune/MTTAudioFeatureSlaney08.m @ 0:e9a9cd732c1e tip