Mercurial > hg > camir-aes2014
diff core/magnatagatune/MTTAudioFeatureSlaney08.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/magnatagatune/MTTAudioFeatureSlaney08.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,377 @@ +classdef MTTAudioFeatureSlaney08 < MTTAudioFeature & handle + % --- + % This Class contains + % a basic summary of MTT features complementary to those in + % MTTAudioFeatureBasicSm, features are extracted + % as described in Slaney 08 - LEARNING A METRIC FOR MUSIC SIMILARITY + % + % The usual workflow for these features constist of three steps + % 1. extract: extracts the basic single-file dependent features + % 2. define_global_transform: calculates the global feature + % transformation parameters + % 3. finalise: applies the common transformations to a specific feature + % --- + + properties(Constant = true) + + % svn hook + my_revision = str2double(substr('$Rev$', 5, -1)); + end + + properties + % --- + % Set default parameters + % --- + my_params = struct(... + 'norm_mttstats', 1, ... % + 'whiten_mttstats', 0, ... % NOTE: whitening as in slaney?? + 'select_mttstats', 1 ...% TODO: way to select certain features + ); + end + + % --- + % member functions + % --- + methods + + % --- + % constructor: pointer to feature in database + % --- + function feature = MTTAudioFeatureSlaney08(varargin) + + feature = feature@MTTAudioFeature(varargin{:}); + + end + % --- + % extract feature data from raw audio features + % --- + function data = extract(feature, clip) + % --- + % get features. this includes possible + % local normalisations + % --- + + global globalvars; + + % --- + % get casimir child clip if available + % --- + if isa(clip, 'CASIMIRClip') + baseclip = clip.child_clip(); + else + baseclip = clip; + end + if isa(baseclip, 'MTTClip') + rawf = baseclip.audio_features_raw(); + elseif isa(baseclip, 'MSDClip') + rawf = baseclip.features('MSDAudioFeatureRAW'); + end + + + % --- + % TODO: implement time_weighted version of the statistical + % evaluations below + % --- + +% segmentDurationMean: mean segment duration (sec.). + data.mttstats.segmentDurationMean = mean(rawf.data.segments_duration); + +% segmentDurationVariance: variance of the segment duration + data.mttstats.segmentDurationVariance = var(rawf.data.segments_duration); + +% timeLoudnessMaxMean: mean time to the segment maximum, or attack duration (sec.). + data.mttstats.timeLoudnessMaxMean = mean(rawf.data.segments_loudness_max_time); + +% loudnessMaxMean: mean of segments’ maximum loudness(dB). + data.mttstats.loudnessMaxMean = mean(rawf.data.segments_loudness_max); + +% loudnessMaxVariance: variance of the segments’ maximum loudness (dB). + data.mttstats.loudnessMaxVariance = var(rawf.data.segments_loudness_max); + +% loudnessBeginMean: average loudness at the start of segments (dB) + data.mttstats.loudnessBeginMean = mean(rawf.data.segments_loudness); + +% loudnessBeginVariance: variance of the loudness at the startof segments (dB2). Correlated with loudnessMaxVariance + data.mttstats.loudnessBeginVariance = var(rawf.data.segments_loudness); + +% loudnessDynamicsMean: average of overall dynamic rangein the segments (dB). +% loudnessDynamicsVariance: segment dynamic range variance +% (dB). Higher variances suggest more dynamics ineach segment. + % --- + % NOTE: the above information cannot be extracted from the MTT + % Features, maybe more recent echonest features allow for this + % --- + +% loudness: overall loudness estimate of the track (dB). + data.mttstats.loudness = rawf.data.loudness; + + % --- + % TODO: get these from the beat loundesses? + % --- + +% tempo: overall track tempo estimate (in beat per minute,BPM). Doubling and halving errors are possible. + data.mttstats.tempo = rawf.data.tempo; + +% tempoConfidence: a measure of the con?dence of the tempo estimate (beween 0 and 1). + %data.mttstats.tempoConfidence = rawf.data.tempoConfidence; + + beats = rawf.data.beats; + tatums = rawf.data.tatums; + +% beatVariance: ameasure of the regularity of the beat (secs). + if numel(beats) > 0 + bdiff = diff(beats(1,:)); + data.mttstats.beatVariance = var(bdiff); + else + + % --- + % This is a facke repolacement variance + % --- + data.mttstats.beatVariance = 0; + end + + +% tatum: estimated overall tatum duration (in seconds). Tatums are subdivisions of the beat. + % --- + % note: the tatum length could be also + % accessed by comparison with the global bpm estimate + % --- + if numel(tatums) > 0 + tdiff = diff(tatums(1,:)); + data.mttstats.tatum = median(tdiff); + + % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1). + data.mttstats.tatumConfidence = mean(tatums(2,:)); + + % numTatumsPerBeat: number of tatums per beat + data.mttstats.numTatumsPerBeat = median(bdiff) / data.mttstats.tatum; + else + % --- + % This is a facke replacement tatum + % TODO: maybe set confidence to -1? + % --- + + data.mttstats.tatum = 0; + + % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1). + + data.mttstats.tatumConfidence = 0; + + % numTatumsPerBeat: number of tatums per beat + data.mttstats.numTatumsPerBeat = 2; + end + + + % --- + % TODO: beat analysis + % --- + +% timeSignature: estimated time signature (number of beats per measure). (0-7 / 7) + data.mttstats.timeSignature = rawf.data.timeSignature; + +% timeSignatureStability: a rough estimate of the stability of the time signature throughout the track + data.mttstats.timeSignatureStability = rawf.data.timeSignatureConfidence; + + % --- + % prepare field for final features + % --- + data.final.vector = []; + data.final.vector_info = struct(); + data.final.dim = 0; + + % save info data + data.info.type = 'MTTAudioFeatureSlaney08'; + data.info.owner_id = clip.id; + data.info.creatorrev = feature.my_revision; + + % save parameters + data.info.params = feature.my_params; + end + + function define_global_transform(features) + % calculate and set normalization factors from the group of + % input features. These features will be set for the full database + + for i = 1:numel(features) + data = features(i).data.mttstats; + + final(:,i) = [data.segmentDurationMean; ... + data.segmentDurationVariance; ... + data.timeLoudnessMaxMean; ... + data.loudnessMaxMean; ... + data.loudnessMaxVariance; ... + data.loudnessBeginMean; ... + data.loudnessBeginVariance; ... + data.loudness; ... + data.tempo; ... + ... % data.tempoConfidence; ... + data.beatVariance; ... + data.tatum; ... + data.tatumConfidence; ... + data.numTatumsPerBeat; ... + data.timeSignature; ... + data.timeSignatureStability]; + end + + if features(1).my_params.norm_mttstats + if numel(features) == 1 + error ('Insert feature array for this method, or set normalisation to 0'); + end + + % --- + % here, we only need to define the post-normalisation + % --- + [final, pstd] = mapminmax(final,0,1); + common.mttstats.pre_norm = pstd; + + % --- + % NOTE: whitening as in slaney?? + % Would make reading the + % mahal matrices really hard + % --- + + features(1).my_db.set_common(common); + + else + + features(1).my_db.set_common([1]); + end + + % save the normalised features straight away! + features.finalise(final); + end + + + function finalise(features, final) + % applies a final transformation and + % collects the information of this feature within a single vector + % see info for types in specific dimensions + % check if features have been finalised already + + % --- + % check for dummy feature + % --- + if isfield(features(1).my_params,'select_mttstats') && ... + isnumeric(features(1).my_params.select_mttstats) && ... + features(1).my_params.select_mttstats == 0 + + % if no information needed just fill everything 0 + for i = 1:numel(features) + features(i).data.final.vector = []; + features(i).data.final.dim = 0; + + % fill up info struct and append to feature + features(i).data.final.vector_info.labels = {}; + end + + return; + end + + % --- + % set feature labelling + % --- + info = {'segmentDurationMean', ... + 'segmentDurationVariance', ... + 'timeLoudnessMaxMean', ... + 'loudnessMaxMean', ... + 'loudnessMaxVariance', ... + 'loudnessBeginMean', ... + 'loudnessBeginVariance', ... + 'loudness', ... + 'tempo', ... + ...% 'tempoConfidence', ... + 'beatVariance', ... + 'tatum', ... + 'tatumConfidence', ... + 'numTatumsPerBeat', ... + 'timeSignature', ... + 'timeSignatureStability'}; + + % --- + % construct resulting feature vector out of features + % --- + if nargin == 2 && isempty(final) + + % the final vector etc already are set to zero; + return; + + elseif nargin == 2 && (numel(features) == size(final, 2)) + for i = 1:numel(features) + + % check for neccesary parameters + if isempty(features(i).my_db.commondb) + + error('Define the global transformation first') + return; + end + + features(i).data.final.vector = final(:,i); + features(i).data.final.dim = size(final,1); + + % fill up info struct and append to feature + features(i).data.final.vector_info.labels = info; + end + else + % --- + % if features have been added after gettin gnormalisation + % parameters, ther should be still an option to include + % them + % --- + + for i = 1:numel(features) + + % check for neccesary parameters + if isempty(features(i).my_db.commondb) + + error('Define the global transformation first') + return; + end + + data = features(i).data.mttstats; + final = [data.segmentDurationMean; ... + data.segmentDurationVariance; ... + data.timeLoudnessMaxMean; ... + data.loudnessMaxMean; ... + data.loudnessMaxVariance; ... + data.loudnessBeginMean; ... + data.loudnessBeginVariance; ... + data.loudness; ... + data.tempo; ... + ... % data.tempoConfidence; ... + data.beatVariance; ... + data.tatum; ... + data.tatumConfidence; ... + data.numTatumsPerBeat; ... + data.timeSignature; ... + data.timeSignatureStability]; + + if features(1).my_params.norm_mttstats == 1 + + [final] = mapminmax('apply', final, features(1).common.mttstats.pre_norm); + end + + features(i).data.final.vector = final; + features(i).data.final.dim = size(final,1); + + % fill up info struct and append to feature + features(i).data.final.vector_info.labels = info; + end + + end + + % --- + % TODO: Maybe delete more basic features again at this point? + % --- + end + + % --- + % destructor: do we really want to remove this + % from the database? No, but + % TODO: create marker for unused objects in db, and a cleanup + % function + % --- + function delete(feature) + + end + end +end \ No newline at end of file