wolffd@0: classdef MTTAudioFeatureSlaney08 < MTTAudioFeature & handle wolffd@0: % --- wolffd@0: % This Class contains wolffd@0: % a basic summary of MTT features complementary to those in wolffd@0: % MTTAudioFeatureBasicSm, features are extracted wolffd@0: % as described in Slaney 08 - LEARNING A METRIC FOR MUSIC SIMILARITY wolffd@0: % wolffd@0: % The usual workflow for these features constist of three steps wolffd@0: % 1. extract: extracts the basic single-file dependent features wolffd@0: % 2. define_global_transform: calculates the global feature wolffd@0: % transformation parameters wolffd@0: % 3. finalise: applies the common transformations to a specific feature wolffd@0: % --- wolffd@0: wolffd@0: properties(Constant = true) wolffd@0: wolffd@0: % svn hook wolffd@0: my_revision = str2double(substr('$Rev$', 5, -1)); wolffd@0: end wolffd@0: wolffd@0: properties wolffd@0: % --- wolffd@0: % Set default parameters wolffd@0: % --- wolffd@0: my_params = struct(... wolffd@0: 'norm_mttstats', 1, ... % wolffd@0: 'whiten_mttstats', 0, ... % NOTE: whitening as in slaney?? wolffd@0: 'select_mttstats', 1 ...% TODO: way to select certain features wolffd@0: ); wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % member functions wolffd@0: % --- wolffd@0: methods wolffd@0: wolffd@0: % --- wolffd@0: % constructor: pointer to feature in database wolffd@0: % --- wolffd@0: function feature = MTTAudioFeatureSlaney08(varargin) wolffd@0: wolffd@0: feature = feature@MTTAudioFeature(varargin{:}); wolffd@0: wolffd@0: end wolffd@0: % --- wolffd@0: % extract feature data from raw audio features wolffd@0: % --- wolffd@0: function data = extract(feature, clip) wolffd@0: % --- wolffd@0: % get features. this includes possible wolffd@0: % local normalisations wolffd@0: % --- wolffd@0: wolffd@0: global globalvars; wolffd@0: wolffd@0: % --- wolffd@0: % get casimir child clip if available wolffd@0: % --- wolffd@0: if isa(clip, 'CASIMIRClip') wolffd@0: baseclip = clip.child_clip(); wolffd@0: else wolffd@0: baseclip = clip; wolffd@0: end wolffd@0: if isa(baseclip, 'MTTClip') wolffd@0: rawf = baseclip.audio_features_raw(); wolffd@0: elseif isa(baseclip, 'MSDClip') wolffd@0: rawf = baseclip.features('MSDAudioFeatureRAW'); wolffd@0: end wolffd@0: wolffd@0: wolffd@0: % --- wolffd@0: % TODO: implement time_weighted version of the statistical wolffd@0: % evaluations below wolffd@0: % --- wolffd@0: wolffd@0: % segmentDurationMean: mean segment duration (sec.). wolffd@0: data.mttstats.segmentDurationMean = mean(rawf.data.segments_duration); wolffd@0: wolffd@0: % segmentDurationVariance: variance of the segment duration wolffd@0: data.mttstats.segmentDurationVariance = var(rawf.data.segments_duration); wolffd@0: wolffd@0: % timeLoudnessMaxMean: mean time to the segment maximum, or attack duration (sec.). wolffd@0: data.mttstats.timeLoudnessMaxMean = mean(rawf.data.segments_loudness_max_time); wolffd@0: wolffd@0: % loudnessMaxMean: mean of segments’ maximum loudness(dB). wolffd@0: data.mttstats.loudnessMaxMean = mean(rawf.data.segments_loudness_max); wolffd@0: wolffd@0: % loudnessMaxVariance: variance of the segments’ maximum loudness (dB). wolffd@0: data.mttstats.loudnessMaxVariance = var(rawf.data.segments_loudness_max); wolffd@0: wolffd@0: % loudnessBeginMean: average loudness at the start of segments (dB) wolffd@0: data.mttstats.loudnessBeginMean = mean(rawf.data.segments_loudness); wolffd@0: wolffd@0: % loudnessBeginVariance: variance of the loudness at the startof segments (dB2). Correlated with loudnessMaxVariance wolffd@0: data.mttstats.loudnessBeginVariance = var(rawf.data.segments_loudness); wolffd@0: wolffd@0: % loudnessDynamicsMean: average of overall dynamic rangein the segments (dB). wolffd@0: % loudnessDynamicsVariance: segment dynamic range variance wolffd@0: % (dB). Higher variances suggest more dynamics ineach segment. wolffd@0: % --- wolffd@0: % NOTE: the above information cannot be extracted from the MTT wolffd@0: % Features, maybe more recent echonest features allow for this wolffd@0: % --- wolffd@0: wolffd@0: % loudness: overall loudness estimate of the track (dB). wolffd@0: data.mttstats.loudness = rawf.data.loudness; wolffd@0: wolffd@0: % --- wolffd@0: % TODO: get these from the beat loundesses? wolffd@0: % --- wolffd@0: wolffd@0: % tempo: overall track tempo estimate (in beat per minute,BPM). Doubling and halving errors are possible. wolffd@0: data.mttstats.tempo = rawf.data.tempo; wolffd@0: wolffd@0: % tempoConfidence: a measure of the con?dence of the tempo estimate (beween 0 and 1). wolffd@0: %data.mttstats.tempoConfidence = rawf.data.tempoConfidence; wolffd@0: wolffd@0: beats = rawf.data.beats; wolffd@0: tatums = rawf.data.tatums; wolffd@0: wolffd@0: % beatVariance: ameasure of the regularity of the beat (secs). wolffd@0: if numel(beats) > 0 wolffd@0: bdiff = diff(beats(1,:)); wolffd@0: data.mttstats.beatVariance = var(bdiff); wolffd@0: else wolffd@0: wolffd@0: % --- wolffd@0: % This is a facke repolacement variance wolffd@0: % --- wolffd@0: data.mttstats.beatVariance = 0; wolffd@0: end wolffd@0: wolffd@0: wolffd@0: % tatum: estimated overall tatum duration (in seconds). Tatums are subdivisions of the beat. wolffd@0: % --- wolffd@0: % note: the tatum length could be also wolffd@0: % accessed by comparison with the global bpm estimate wolffd@0: % --- wolffd@0: if numel(tatums) > 0 wolffd@0: tdiff = diff(tatums(1,:)); wolffd@0: data.mttstats.tatum = median(tdiff); wolffd@0: wolffd@0: % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1). wolffd@0: data.mttstats.tatumConfidence = mean(tatums(2,:)); wolffd@0: wolffd@0: % numTatumsPerBeat: number of tatums per beat wolffd@0: data.mttstats.numTatumsPerBeat = median(bdiff) / data.mttstats.tatum; wolffd@0: else wolffd@0: % --- wolffd@0: % This is a facke replacement tatum wolffd@0: % TODO: maybe set confidence to -1? wolffd@0: % --- wolffd@0: wolffd@0: data.mttstats.tatum = 0; wolffd@0: wolffd@0: % tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1). wolffd@0: wolffd@0: data.mttstats.tatumConfidence = 0; wolffd@0: wolffd@0: % numTatumsPerBeat: number of tatums per beat wolffd@0: data.mttstats.numTatumsPerBeat = 2; wolffd@0: end wolffd@0: wolffd@0: wolffd@0: % --- wolffd@0: % TODO: beat analysis wolffd@0: % --- wolffd@0: wolffd@0: % timeSignature: estimated time signature (number of beats per measure). (0-7 / 7) wolffd@0: data.mttstats.timeSignature = rawf.data.timeSignature; wolffd@0: wolffd@0: % timeSignatureStability: a rough estimate of the stability of the time signature throughout the track wolffd@0: data.mttstats.timeSignatureStability = rawf.data.timeSignatureConfidence; wolffd@0: wolffd@0: % --- wolffd@0: % prepare field for final features wolffd@0: % --- wolffd@0: data.final.vector = []; wolffd@0: data.final.vector_info = struct(); wolffd@0: data.final.dim = 0; wolffd@0: wolffd@0: % save info data wolffd@0: data.info.type = 'MTTAudioFeatureSlaney08'; wolffd@0: data.info.owner_id = clip.id; wolffd@0: data.info.creatorrev = feature.my_revision; wolffd@0: wolffd@0: % save parameters wolffd@0: data.info.params = feature.my_params; wolffd@0: end wolffd@0: wolffd@0: function define_global_transform(features) wolffd@0: % calculate and set normalization factors from the group of wolffd@0: % input features. These features will be set for the full database wolffd@0: wolffd@0: for i = 1:numel(features) wolffd@0: data = features(i).data.mttstats; wolffd@0: wolffd@0: final(:,i) = [data.segmentDurationMean; ... wolffd@0: data.segmentDurationVariance; ... wolffd@0: data.timeLoudnessMaxMean; ... wolffd@0: data.loudnessMaxMean; ... wolffd@0: data.loudnessMaxVariance; ... wolffd@0: data.loudnessBeginMean; ... wolffd@0: data.loudnessBeginVariance; ... wolffd@0: data.loudness; ... wolffd@0: data.tempo; ... wolffd@0: ... % data.tempoConfidence; ... wolffd@0: data.beatVariance; ... wolffd@0: data.tatum; ... wolffd@0: data.tatumConfidence; ... wolffd@0: data.numTatumsPerBeat; ... wolffd@0: data.timeSignature; ... wolffd@0: data.timeSignatureStability]; wolffd@0: end wolffd@0: wolffd@0: if features(1).my_params.norm_mttstats wolffd@0: if numel(features) == 1 wolffd@0: error ('Insert feature array for this method, or set normalisation to 0'); wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % here, we only need to define the post-normalisation wolffd@0: % --- wolffd@0: [final, pstd] = mapminmax(final,0,1); wolffd@0: common.mttstats.pre_norm = pstd; wolffd@0: wolffd@0: % --- wolffd@0: % NOTE: whitening as in slaney?? wolffd@0: % Would make reading the wolffd@0: % mahal matrices really hard wolffd@0: % --- wolffd@0: wolffd@0: features(1).my_db.set_common(common); wolffd@0: wolffd@0: else wolffd@0: wolffd@0: features(1).my_db.set_common([1]); wolffd@0: end wolffd@0: wolffd@0: % save the normalised features straight away! wolffd@0: features.finalise(final); wolffd@0: end wolffd@0: wolffd@0: wolffd@0: function finalise(features, final) wolffd@0: % applies a final transformation and wolffd@0: % collects the information of this feature within a single vector wolffd@0: % see info for types in specific dimensions wolffd@0: % check if features have been finalised already wolffd@0: wolffd@0: % --- wolffd@0: % check for dummy feature wolffd@0: % --- wolffd@0: if isfield(features(1).my_params,'select_mttstats') && ... wolffd@0: isnumeric(features(1).my_params.select_mttstats) && ... wolffd@0: features(1).my_params.select_mttstats == 0 wolffd@0: wolffd@0: % if no information needed just fill everything 0 wolffd@0: for i = 1:numel(features) wolffd@0: features(i).data.final.vector = []; wolffd@0: features(i).data.final.dim = 0; wolffd@0: wolffd@0: % fill up info struct and append to feature wolffd@0: features(i).data.final.vector_info.labels = {}; wolffd@0: end wolffd@0: wolffd@0: return; wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % set feature labelling wolffd@0: % --- wolffd@0: info = {'segmentDurationMean', ... wolffd@0: 'segmentDurationVariance', ... wolffd@0: 'timeLoudnessMaxMean', ... wolffd@0: 'loudnessMaxMean', ... wolffd@0: 'loudnessMaxVariance', ... wolffd@0: 'loudnessBeginMean', ... wolffd@0: 'loudnessBeginVariance', ... wolffd@0: 'loudness', ... wolffd@0: 'tempo', ... wolffd@0: ...% 'tempoConfidence', ... wolffd@0: 'beatVariance', ... wolffd@0: 'tatum', ... wolffd@0: 'tatumConfidence', ... wolffd@0: 'numTatumsPerBeat', ... wolffd@0: 'timeSignature', ... wolffd@0: 'timeSignatureStability'}; wolffd@0: wolffd@0: % --- wolffd@0: % construct resulting feature vector out of features wolffd@0: % --- wolffd@0: if nargin == 2 && isempty(final) wolffd@0: wolffd@0: % the final vector etc already are set to zero; wolffd@0: return; wolffd@0: wolffd@0: elseif nargin == 2 && (numel(features) == size(final, 2)) wolffd@0: for i = 1:numel(features) wolffd@0: wolffd@0: % check for neccesary parameters wolffd@0: if isempty(features(i).my_db.commondb) wolffd@0: wolffd@0: error('Define the global transformation first') wolffd@0: return; wolffd@0: end wolffd@0: wolffd@0: features(i).data.final.vector = final(:,i); wolffd@0: features(i).data.final.dim = size(final,1); wolffd@0: wolffd@0: % fill up info struct and append to feature wolffd@0: features(i).data.final.vector_info.labels = info; wolffd@0: end wolffd@0: else wolffd@0: % --- wolffd@0: % if features have been added after gettin gnormalisation wolffd@0: % parameters, ther should be still an option to include wolffd@0: % them wolffd@0: % --- wolffd@0: wolffd@0: for i = 1:numel(features) wolffd@0: wolffd@0: % check for neccesary parameters wolffd@0: if isempty(features(i).my_db.commondb) wolffd@0: wolffd@0: error('Define the global transformation first') wolffd@0: return; wolffd@0: end wolffd@0: wolffd@0: data = features(i).data.mttstats; wolffd@0: final = [data.segmentDurationMean; ... wolffd@0: data.segmentDurationVariance; ... wolffd@0: data.timeLoudnessMaxMean; ... wolffd@0: data.loudnessMaxMean; ... wolffd@0: data.loudnessMaxVariance; ... wolffd@0: data.loudnessBeginMean; ... wolffd@0: data.loudnessBeginVariance; ... wolffd@0: data.loudness; ... wolffd@0: data.tempo; ... wolffd@0: ... % data.tempoConfidence; ... wolffd@0: data.beatVariance; ... wolffd@0: data.tatum; ... wolffd@0: data.tatumConfidence; ... wolffd@0: data.numTatumsPerBeat; ... wolffd@0: data.timeSignature; ... wolffd@0: data.timeSignatureStability]; wolffd@0: wolffd@0: if features(1).my_params.norm_mttstats == 1 wolffd@0: wolffd@0: [final] = mapminmax('apply', final, features(1).common.mttstats.pre_norm); wolffd@0: end wolffd@0: wolffd@0: features(i).data.final.vector = final; wolffd@0: features(i).data.final.dim = size(final,1); wolffd@0: wolffd@0: % fill up info struct and append to feature wolffd@0: features(i).data.final.vector_info.labels = info; wolffd@0: end wolffd@0: wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % TODO: Maybe delete more basic features again at this point? wolffd@0: % --- wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % destructor: do we really want to remove this wolffd@0: % from the database? No, but wolffd@0: % TODO: create marker for unused objects in db, and a cleanup wolffd@0: % function wolffd@0: % --- wolffd@0: function delete(feature) wolffd@0: wolffd@0: end wolffd@0: end wolffd@0: end