wolffd@0: classdef MTTAudioFeatureSlaney08 < MTTAudioFeature & handle
wolffd@0:     % ---
wolffd@0:     % This Class contains 
wolffd@0:     % a basic summary of MTT features complementary to those in
wolffd@0:     % MTTAudioFeatureBasicSm,  features are extracted 
wolffd@0:     % as described in Slaney 08 - LEARNING A METRIC FOR MUSIC SIMILARITY
wolffd@0:     %
wolffd@0:     % The usual workflow for these features constist of three steps
wolffd@0:     % 1. extract: extracts the basic single-file dependent features
wolffd@0:     % 2. define_global_transform: calculates the global feature
wolffd@0:     %       transformation parameters
wolffd@0:     % 3. finalise: applies the common transformations to a specific feature
wolffd@0:     % ---
wolffd@0:     
wolffd@0:     properties(Constant = true)
wolffd@0:         
wolffd@0:         % svn hook
wolffd@0:         my_revision = str2double(substr('$Rev$',  5, -1));
wolffd@0:     end
wolffd@0: 
wolffd@0:     properties
wolffd@0:         % ---
wolffd@0:         % Set default parameters
wolffd@0:         % ---
wolffd@0:         my_params = struct(...
wolffd@0:             'norm_mttstats', 1, ... % 
wolffd@0:             'whiten_mttstats', 0, ... % NOTE: whitening as in slaney?? 
wolffd@0:             'select_mttstats', 1 ...% TODO: way to select certain features
wolffd@0:             );
wolffd@0:     end
wolffd@0:     
wolffd@0:     % ---
wolffd@0:     % member functions
wolffd@0:     % ---
wolffd@0:     methods
wolffd@0:         
wolffd@0:         % ---
wolffd@0:         % constructor: pointer to feature in database
wolffd@0:         % ---
wolffd@0:         function feature = MTTAudioFeatureSlaney08(varargin)
wolffd@0: 
wolffd@0:             feature = feature@MTTAudioFeature(varargin{:});
wolffd@0: 
wolffd@0:         end
wolffd@0:         % ---
wolffd@0:         % extract feature data from raw audio features
wolffd@0:         % ---
wolffd@0:         function data = extract(feature, clip)
wolffd@0:             % ---
wolffd@0:             % get features. this includes possible
wolffd@0:             % local normalisations
wolffd@0:             % ---
wolffd@0:             
wolffd@0:             global globalvars;
wolffd@0:             
wolffd@0:             % --- 
wolffd@0:             % get casimir child clip if available
wolffd@0:             % ---
wolffd@0:             if isa(clip, 'CASIMIRClip')
wolffd@0:                 baseclip = clip.child_clip();
wolffd@0:             else
wolffd@0:                 baseclip = clip;
wolffd@0:             end
wolffd@0:             if isa(baseclip, 'MTTClip') 
wolffd@0:                 rawf = baseclip.audio_features_raw();
wolffd@0:             elseif isa(baseclip, 'MSDClip')
wolffd@0:                 rawf = baseclip.features('MSDAudioFeatureRAW');
wolffd@0:             end
wolffd@0:             
wolffd@0:                         
wolffd@0:             % ---
wolffd@0:             % TODO: implement time_weighted version of the statistical 
wolffd@0:             % evaluations below
wolffd@0:             % ---
wolffd@0:             
wolffd@0: %             segmentDurationMean: mean segment duration (sec.).
wolffd@0:             data.mttstats.segmentDurationMean = mean(rawf.data.segments_duration);
wolffd@0: 
wolffd@0: %             segmentDurationVariance: variance of the segment duration 
wolffd@0:             data.mttstats.segmentDurationVariance = var(rawf.data.segments_duration);
wolffd@0: 
wolffd@0: %             timeLoudnessMaxMean: mean time to the segment maximum, or attack duration (sec.).
wolffd@0:             data.mttstats.timeLoudnessMaxMean = mean(rawf.data.segments_loudness_max_time);
wolffd@0: 
wolffd@0: %             loudnessMaxMean: mean of segments’ maximum loudness(dB).
wolffd@0:             data.mttstats.loudnessMaxMean = mean(rawf.data.segments_loudness_max);
wolffd@0:             
wolffd@0: %             loudnessMaxVariance: variance of the segments’ maximum loudness (dB).
wolffd@0:             data.mttstats.loudnessMaxVariance = var(rawf.data.segments_loudness_max);
wolffd@0:             
wolffd@0: %             loudnessBeginMean: average loudness at the start of segments (dB)
wolffd@0:             data.mttstats.loudnessBeginMean = mean(rawf.data.segments_loudness);
wolffd@0: 
wolffd@0: %             loudnessBeginVariance: variance of the loudness at the startof segments (dB2). Correlated with loudnessMaxVariance
wolffd@0:             data.mttstats.loudnessBeginVariance = var(rawf.data.segments_loudness);
wolffd@0: 
wolffd@0: %             loudnessDynamicsMean: average of overall dynamic rangein the segments (dB).
wolffd@0: %             loudnessDynamicsVariance: segment dynamic range variance
wolffd@0: %             (dB). Higher variances suggest more dynamics ineach segment.
wolffd@0:             % ---
wolffd@0:             % NOTE: the above information cannot be extracted from the MTT 
wolffd@0:             % Features, maybe more recent echonest features allow for this
wolffd@0:             % ---
wolffd@0: 
wolffd@0: %             loudness: overall loudness estimate of the track (dB).  
wolffd@0:             data.mttstats.loudness = rawf.data.loudness;
wolffd@0: 
wolffd@0:             % ---
wolffd@0:             % TODO: get these from the beat loundesses?
wolffd@0:             % ---
wolffd@0:             
wolffd@0: %             tempo: overall track tempo estimate (in beat per minute,BPM). Doubling and halving errors are possible.
wolffd@0:             data.mttstats.tempo = rawf.data.tempo;
wolffd@0: 
wolffd@0: %             tempoConfidence: a measure of the con?dence of the tempo estimate (beween 0 and 1).
wolffd@0:             %data.mttstats.tempoConfidence = rawf.data.tempoConfidence;
wolffd@0:             
wolffd@0:             beats = rawf.data.beats;
wolffd@0:             tatums = rawf.data.tatums;
wolffd@0:             
wolffd@0: %             beatVariance: ameasure of the regularity of the beat (secs).
wolffd@0:             if numel(beats) > 0
wolffd@0:                 bdiff = diff(beats(1,:));
wolffd@0:                 data.mttstats.beatVariance = var(bdiff);
wolffd@0:             else
wolffd@0:                 
wolffd@0:                 % ---
wolffd@0:                 % This is a facke repolacement variance
wolffd@0:                 % ---
wolffd@0:                 data.mttstats.beatVariance = 0;
wolffd@0:             end
wolffd@0:             
wolffd@0:             
wolffd@0: %             tatum: estimated overall tatum duration (in seconds). Tatums are subdivisions of the beat.
wolffd@0:             % ---
wolffd@0:             % note: the tatum length could be also 
wolffd@0:             % accessed by comparison with the global bpm estimate
wolffd@0:             % ---
wolffd@0:             if numel(tatums) > 0
wolffd@0:                 tdiff = diff(tatums(1,:));
wolffd@0:                 data.mttstats.tatum = median(tdiff);
wolffd@0:                 
wolffd@0:     %             tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1).
wolffd@0:                 data.mttstats.tatumConfidence = mean(tatums(2,:));
wolffd@0: 
wolffd@0:     %             numTatumsPerBeat: number of tatums per beat
wolffd@0:                 data.mttstats.numTatumsPerBeat = median(bdiff) / data.mttstats.tatum;
wolffd@0:             else
wolffd@0:                 % ---
wolffd@0:                 % This is a facke replacement tatum
wolffd@0:                 % TODO: maybe set confidence to -1?
wolffd@0:                 % ---
wolffd@0:                 
wolffd@0:                 data.mttstats.tatum = 0;
wolffd@0:                 
wolffd@0:     %             tatumConfidence: a measure of the con?dence of the tatum estimate (beween 0 and 1).
wolffd@0:                 
wolffd@0:                 data.mttstats.tatumConfidence = 0;
wolffd@0: 
wolffd@0:     %             numTatumsPerBeat: number of tatums per beat
wolffd@0:                 data.mttstats.numTatumsPerBeat = 2;  
wolffd@0:             end
wolffd@0:             
wolffd@0:             
wolffd@0:             % ---
wolffd@0:             % TODO: beat analysis
wolffd@0:             % ---
wolffd@0: 
wolffd@0: %             timeSignature: estimated time signature (number of beats per measure).  (0-7 / 7)
wolffd@0:             data.mttstats.timeSignature = rawf.data.timeSignature;
wolffd@0:             
wolffd@0: %             timeSignatureStability: a rough estimate of the stability of the time signature throughout the track
wolffd@0:             data.mttstats.timeSignatureStability = rawf.data.timeSignatureConfidence;
wolffd@0:             
wolffd@0:             % ---
wolffd@0:             % prepare field for final features
wolffd@0:             % ---
wolffd@0:             data.final.vector = [];
wolffd@0:             data.final.vector_info = struct(); 
wolffd@0:             data.final.dim = 0;
wolffd@0:            
wolffd@0:             % save info data
wolffd@0:             data.info.type = 'MTTAudioFeatureSlaney08';
wolffd@0:             data.info.owner_id = clip.id;
wolffd@0:             data.info.creatorrev = feature.my_revision;
wolffd@0:             
wolffd@0:             % save parameters
wolffd@0:             data.info.params = feature.my_params;
wolffd@0:         end
wolffd@0:         
wolffd@0:         function define_global_transform(features)
wolffd@0:         % calculate and set normalization factors from the group of 
wolffd@0:         % input features. These features will be set for the full database
wolffd@0:                     
wolffd@0:          for i = 1:numel(features)
wolffd@0:                 data = features(i).data.mttstats;
wolffd@0: 
wolffd@0:                 final(:,i) = [data.segmentDurationMean; ...
wolffd@0:                     data.segmentDurationVariance; ...
wolffd@0:                     data.timeLoudnessMaxMean; ...
wolffd@0:                     data.loudnessMaxMean; ...
wolffd@0:                     data.loudnessMaxVariance; ...
wolffd@0:                     data.loudnessBeginMean; ...
wolffd@0:                     data.loudnessBeginVariance; ...
wolffd@0:                     data.loudness; ...
wolffd@0:                     data.tempo; ...
wolffd@0:                     ... % data.tempoConfidence; ...
wolffd@0:                     data.beatVariance; ...
wolffd@0:                     data.tatum; ...
wolffd@0:                     data.tatumConfidence; ...
wolffd@0:                     data.numTatumsPerBeat; ...
wolffd@0:                     data.timeSignature; ...
wolffd@0:                     data.timeSignatureStability];
wolffd@0:          end
wolffd@0:             
wolffd@0:             if features(1).my_params.norm_mttstats
wolffd@0:                 if numel(features) == 1
wolffd@0:                     error ('Insert feature array for this method, or set normalisation to 0');
wolffd@0:                 end
wolffd@0: 
wolffd@0:                 % ---
wolffd@0:                 % here, we only need to define the post-normalisation
wolffd@0:                 % ---
wolffd@0:                 [final, pstd] = mapminmax(final,0,1);
wolffd@0:                 common.mttstats.pre_norm = pstd;
wolffd@0:                 
wolffd@0:                 % ---
wolffd@0:                 % NOTE: whitening as in slaney?? 
wolffd@0:                 % Would make reading the
wolffd@0:                 % mahal matrices really hard
wolffd@0:                 % ---
wolffd@0:                 
wolffd@0:                 features(1).my_db.set_common(common);
wolffd@0:                 
wolffd@0:             else
wolffd@0:                 
wolffd@0:                 features(1).my_db.set_common([1]);
wolffd@0:             end
wolffd@0:             
wolffd@0:             % save the normalised features straight away!
wolffd@0:             features.finalise(final);
wolffd@0:         end
wolffd@0:         
wolffd@0:         
wolffd@0:         function finalise(features, final)
wolffd@0:         % applies a final transformation and
wolffd@0:         % collects the information of this feature within a single vector
wolffd@0:         % see info for types in specific dimensions
wolffd@0:         % check if features have been finalised already
wolffd@0: 
wolffd@0:         % ---
wolffd@0:         % check for dummy feature
wolffd@0:         % ---
wolffd@0:         if isfield(features(1).my_params,'select_mttstats') && ...
wolffd@0:             isnumeric(features(1).my_params.select_mttstats) && ...
wolffd@0:             features(1).my_params.select_mttstats == 0
wolffd@0:             
wolffd@0:             % if no information needed just fill everything 0
wolffd@0:             for i = 1:numel(features)
wolffd@0:                 features(i).data.final.vector = [];
wolffd@0:                 features(i).data.final.dim = 0;
wolffd@0: 
wolffd@0:                 % fill up info struct and append to feature
wolffd@0:                 features(i).data.final.vector_info.labels = {};
wolffd@0:             end
wolffd@0:             
wolffd@0:             return;
wolffd@0:         end
wolffd@0:         
wolffd@0:         % ---
wolffd@0:         % set feature labelling
wolffd@0:         % ---
wolffd@0:         info = {'segmentDurationMean', ...
wolffd@0:         'segmentDurationVariance', ...
wolffd@0:         'timeLoudnessMaxMean', ...
wolffd@0:         'loudnessMaxMean', ...
wolffd@0:         'loudnessMaxVariance', ...
wolffd@0:         'loudnessBeginMean', ...
wolffd@0:         'loudnessBeginVariance', ...
wolffd@0:         'loudness', ...
wolffd@0:         'tempo', ...
wolffd@0:         ...% 'tempoConfidence', ...
wolffd@0:         'beatVariance', ...
wolffd@0:         'tatum', ...
wolffd@0:         'tatumConfidence', ...
wolffd@0:         'numTatumsPerBeat', ...
wolffd@0:         'timeSignature', ...
wolffd@0:         'timeSignatureStability'};
wolffd@0:                 
wolffd@0:         % ---
wolffd@0:         % construct resulting feature vector out of features
wolffd@0:         % ---
wolffd@0:             if nargin == 2 && isempty(final)
wolffd@0:                 
wolffd@0:                 % the final vector etc already are set to zero;
wolffd@0:                 return;
wolffd@0:                 
wolffd@0:             elseif nargin == 2 && (numel(features) == size(final, 2))
wolffd@0:                 for i = 1:numel(features)
wolffd@0: 
wolffd@0:                     % check for neccesary parameters
wolffd@0:                     if isempty(features(i).my_db.commondb)
wolffd@0: 
wolffd@0:                         error('Define the global transformation first')
wolffd@0:                         return;
wolffd@0:                     end
wolffd@0: 
wolffd@0:                     features(i).data.final.vector = final(:,i);
wolffd@0:                     features(i).data.final.dim = size(final,1);
wolffd@0: 
wolffd@0:                     % fill up info struct and append to feature
wolffd@0:                     features(i).data.final.vector_info.labels = info;
wolffd@0:                 end
wolffd@0:             else
wolffd@0:                 % ---
wolffd@0:                 % if features have been added after gettin gnormalisation
wolffd@0:                 % parameters, ther should be still an option to include
wolffd@0:                 % them
wolffd@0:                 % ---
wolffd@0:                   
wolffd@0:                  for i = 1:numel(features)
wolffd@0: 
wolffd@0:                     % check for neccesary parameters
wolffd@0:                     if isempty(features(i).my_db.commondb)
wolffd@0: 
wolffd@0:                         error('Define the global transformation first')
wolffd@0:                         return;
wolffd@0:                     end
wolffd@0:                     
wolffd@0:                     data = features(i).data.mttstats;
wolffd@0:                     final = [data.segmentDurationMean; ...
wolffd@0:                         data.segmentDurationVariance; ...
wolffd@0:                         data.timeLoudnessMaxMean; ...
wolffd@0:                         data.loudnessMaxMean; ...
wolffd@0:                         data.loudnessMaxVariance; ...
wolffd@0:                         data.loudnessBeginMean; ...
wolffd@0:                         data.loudnessBeginVariance; ...
wolffd@0:                         data.loudness; ...
wolffd@0:                         data.tempo; ...
wolffd@0:                         ... % data.tempoConfidence; ...
wolffd@0:                         data.beatVariance; ...
wolffd@0:                         data.tatum; ...
wolffd@0:                         data.tatumConfidence; ...
wolffd@0:                         data.numTatumsPerBeat; ...
wolffd@0:                         data.timeSignature; ...
wolffd@0:                         data.timeSignatureStability];
wolffd@0:                     
wolffd@0:                     if features(1).my_params.norm_mttstats == 1
wolffd@0:                                 
wolffd@0:                         [final] = mapminmax('apply', final, features(1).common.mttstats.pre_norm);
wolffd@0:                     end
wolffd@0:                     
wolffd@0:                     features(i).data.final.vector = final;
wolffd@0:                     features(i).data.final.dim = size(final,1);
wolffd@0: 
wolffd@0:                     % fill up info struct and append to feature
wolffd@0:                     features(i).data.final.vector_info.labels = info;
wolffd@0:                  end
wolffd@0: 
wolffd@0:             end
wolffd@0:             
wolffd@0:             % ---
wolffd@0:             % TODO: Maybe delete more basic features again at this point?
wolffd@0:             % ---
wolffd@0:         end
wolffd@0: 
wolffd@0:         % ---
wolffd@0:         % destructor: do we really want to remove this 
wolffd@0:         % from the database? No, but 
wolffd@0:         % TODO: create marker for unused objects in db, and a cleanup
wolffd@0:         %  function
wolffd@0:         % ---
wolffd@0:         function delete(feature)
wolffd@0:             
wolffd@0:         end
wolffd@0:     end
wolffd@0: end