Mercurial > hg > camir-aes2014
view core/magnatagatune/MTTAudioFeatureSlaney08.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line source
classdef MTTAudioFeatureSlaney08 < MTTAudioFeature & handle % --- % This Class contains % a basic summary of MTT features complementary to those in % MTTAudioFeatureBasicSm, features are extracted % as described in Slaney 08 - LEARNING A METRIC FOR MUSIC SIMILARITY % % The usual workflow for these features constist of three steps % 1. extract: extracts the basic single-file dependent features % 2. define_global_transform: calculates the global feature % transformation parameters % 3. finalise: applies the common transformations to a specific feature % --- properties(Constant = true) % svn hook my_revision = str2double(substr('$Rev$', 5, -1)); end properties % --- % Set default parameters % --- my_params = struct(... 'norm_mttstats', 1, ... % 'whiten_mttstats', 0, ... % NOTE: whitening as in slaney?? 'select_mttstats', 1 ...% TODO: way to select certain features ); end % --- % member functions % --- methods % --- % constructor: pointer to feature in database % --- function feature = MTTAudioFeatureSlaney08(varargin) feature = feature@MTTAudioFeature(varargin{:}); end % --- % extract feature data from raw audio features % --- function data = extract(feature, clip) % --- % get features. this includes possible % local normalisations % --- global globalvars; % --- % get casimir child clip if available % --- if isa(clip, 'CASIMIRClip') baseclip = clip.child_clip(); else baseclip = clip; end if isa(baseclip, 'MTTClip') rawf = baseclip.audio_features_raw(); elseif isa(baseclip, 'MSDClip') rawf = baseclip.features('MSDAudioFeatureRAW'); end % --- % TODO: implement time_weighted version of the statistical % evaluations below % --- % segmentDurationMean: mean segment duration (sec.). data.mttstats.segmentDurationMean = mean(rawf.data.segments_duration); % segmentDurationVariance: variance of the segment duration data.mttstats.segmentDurationVariance = var(rawf.data.segments_duration); % timeLoudnessMaxMean: mean time to the segment maximum, or attack duration (sec.). data.mttstats.timeLoudnessMaxMean = mean(rawf.data.segments_loudness_max_time); % loudnessMaxMean: mean of segments’ maximum loudness(dB). data.mttstats.loudnessMaxMean = mean(rawf.data.segments_loudness_max); % loudnessMaxVariance: variance of the segments’ maximum loudness (dB). data.mttstats.loudnessMaxVariance = var(rawf.data.segments_loudness_max); % loudnessBeginMean: average loudness at the start of segments (dB) data.mttstats.loudnessBeginMean = mean(rawf.data.segments_loudness); % loudnessBeginVariance: variance of the loudness at the startof segments (dB2). Correlated with loudnessMaxVariance data.mttstats.loudnessBeginVariance = var(rawf.data.segments_loudness); % loudnessDynamicsMean: average of overall dynamic rangein the segments (dB). % loudnessDynamicsVariance: segment dynamic range variance % (dB). Higher variances suggest more dynamics ineach segment. % --- % NOTE: the above information cannot be extracted from the MTT % Features, maybe more recent echonest features allow for this % --- % loudness: overall loudness estimate of the track (dB). data.mttstats.loudness = rawf.data.loudness; % --- % TODO: get these from the beat loundesses? % --- % tempo: overall track tempo estimate (in beat per minute,BPM). Doubling and halving errors are possible. data.mttstats.tempo = rawf.data.tempo; % tempoConfidence: a measure of the con?dence of the tempo estimate (beween 0 and 1). %data.mttstats.tempoConfidence = rawf.data.tempoConfidence; beats = rawf.data.beats; tatums = rawf.data.tatums; % beatVariance: ameasure of the regularity of the beat (secs). if numel(beats) > 0 bdiff = diff(beats(1,:)); data.mttstats.beatVariance = var(bdiff); else % --- % This is a facke repolacement variance % --- data.mttstats.beatVariance = 0; end % tatum: estimated overall tatum duration (in seconds). Tatums are subdivisions of the beat. % --- % note: the tatum length could be also % accessed by comparison with the global bpm estimate % --- if numel(tatums) > 0 tdiff = diff(tatums(1,:)); data.mttstats.tatum = median(tdiff); % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1). data.mttstats.tatumConfidence = mean(tatums(2,:)); % numTatumsPerBeat: number of tatums per beat data.mttstats.numTatumsPerBeat = median(bdiff) / data.mttstats.tatum; else % --- % This is a facke replacement tatum % TODO: maybe set confidence to -1? % --- data.mttstats.tatum = 0; % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1). data.mttstats.tatumConfidence = 0; % numTatumsPerBeat: number of tatums per beat data.mttstats.numTatumsPerBeat = 2; end % --- % TODO: beat analysis % --- % timeSignature: estimated time signature (number of beats per measure). (0-7 / 7) data.mttstats.timeSignature = rawf.data.timeSignature; % timeSignatureStability: a rough estimate of the stability of the time signature throughout the track data.mttstats.timeSignatureStability = rawf.data.timeSignatureConfidence; % --- % prepare field for final features % --- data.final.vector = []; data.final.vector_info = struct(); data.final.dim = 0; % save info data data.info.type = 'MTTAudioFeatureSlaney08'; data.info.owner_id = clip.id; data.info.creatorrev = feature.my_revision; % save parameters data.info.params = feature.my_params; end function define_global_transform(features) % calculate and set normalization factors from the group of % input features. These features will be set for the full database for i = 1:numel(features) data = features(i).data.mttstats; final(:,i) = [data.segmentDurationMean; ... data.segmentDurationVariance; ... data.timeLoudnessMaxMean; ... data.loudnessMaxMean; ... data.loudnessMaxVariance; ... data.loudnessBeginMean; ... data.loudnessBeginVariance; ... data.loudness; ... data.tempo; ... ... % data.tempoConfidence; ... data.beatVariance; ... data.tatum; ... data.tatumConfidence; ... data.numTatumsPerBeat; ... data.timeSignature; ... data.timeSignatureStability]; end if features(1).my_params.norm_mttstats if numel(features) == 1 error ('Insert feature array for this method, or set normalisation to 0'); end % --- % here, we only need to define the post-normalisation % --- [final, pstd] = mapminmax(final,0,1); common.mttstats.pre_norm = pstd; % --- % NOTE: whitening as in slaney?? % Would make reading the % mahal matrices really hard % --- features(1).my_db.set_common(common); else features(1).my_db.set_common([1]); end % save the normalised features straight away! features.finalise(final); end function finalise(features, final) % applies a final transformation and % collects the information of this feature within a single vector % see info for types in specific dimensions % check if features have been finalised already % --- % check for dummy feature % --- if isfield(features(1).my_params,'select_mttstats') && ... isnumeric(features(1).my_params.select_mttstats) && ... features(1).my_params.select_mttstats == 0 % if no information needed just fill everything 0 for i = 1:numel(features) features(i).data.final.vector = []; features(i).data.final.dim = 0; % fill up info struct and append to feature features(i).data.final.vector_info.labels = {}; end return; end % --- % set feature labelling % --- info = {'segmentDurationMean', ... 'segmentDurationVariance', ... 'timeLoudnessMaxMean', ... 'loudnessMaxMean', ... 'loudnessMaxVariance', ... 'loudnessBeginMean', ... 'loudnessBeginVariance', ... 'loudness', ... 'tempo', ... ...% 'tempoConfidence', ... 'beatVariance', ... 'tatum', ... 'tatumConfidence', ... 'numTatumsPerBeat', ... 'timeSignature', ... 'timeSignatureStability'}; % --- % construct resulting feature vector out of features % --- if nargin == 2 && isempty(final) % the final vector etc already are set to zero; return; elseif nargin == 2 && (numel(features) == size(final, 2)) for i = 1:numel(features) % check for neccesary parameters if isempty(features(i).my_db.commondb) error('Define the global transformation first') return; end features(i).data.final.vector = final(:,i); features(i).data.final.dim = size(final,1); % fill up info struct and append to feature features(i).data.final.vector_info.labels = info; end else % --- % if features have been added after gettin gnormalisation % parameters, ther should be still an option to include % them % --- for i = 1:numel(features) % check for neccesary parameters if isempty(features(i).my_db.commondb) error('Define the global transformation first') return; end data = features(i).data.mttstats; final = [data.segmentDurationMean; ... data.segmentDurationVariance; ... data.timeLoudnessMaxMean; ... data.loudnessMaxMean; ... data.loudnessMaxVariance; ... data.loudnessBeginMean; ... data.loudnessBeginVariance; ... data.loudness; ... data.tempo; ... ... % data.tempoConfidence; ... data.beatVariance; ... data.tatum; ... data.tatumConfidence; ... data.numTatumsPerBeat; ... data.timeSignature; ... data.timeSignatureStability]; if features(1).my_params.norm_mttstats == 1 [final] = mapminmax('apply', final, features(1).common.mttstats.pre_norm); end features(i).data.final.vector = final; features(i).data.final.dim = size(final,1); % fill up info struct and append to feature features(i).data.final.vector_info.labels = info; end end % --- % TODO: Maybe delete more basic features again at this point? % --- end % --- % destructor: do we really want to remove this % from the database? No, but % TODO: create marker for unused objects in db, and a cleanup % function % --- function delete(feature) end end end