wolffd@0: classdef MTTAudioFeatureHMM < MTTAudioFeature & handle wolffd@0: % --- wolffd@0: % the MTTAudioFeatureBasicSm Class contains wolffd@0: % a basic summary of chroma, mfcc and tempo features wolffd@0: % a few common chroma and mfcc vectors are concatenated wolffd@0: % along with some clip-wide variance wolffd@0: % a metric / rhythm fingerprint is added wolffd@0: % wolffd@0: % The usual workflow for these features consists of three steps wolffd@0: % 1. extract: extracts the basic single-file dependent features wolffd@0: % 2. define_global_transform: calculates the global feature wolffd@0: % transformation parameters wolffd@0: % 3. finalise: applies the common transformations to a specific feature wolffd@0: % --- wolffd@0: wolffd@0: properties(Constant = true) wolffd@0: wolffd@0: % svn hook wolffd@0: my_revision = str2double(substr('$Rev: 2332 $', 5, -1)); wolffd@0: end wolffd@0: wolffd@0: properties wolffd@0: % --- wolffd@0: % Set default parameters wolffd@0: % --- wolffd@0: my_params = struct(... wolffd@0: 'nstates', 4 ... % predefined number of states wolffd@0: ); wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % member functions wolffd@0: % --- wolffd@0: methods wolffd@0: wolffd@0: % --- wolffd@0: % constructor: pointer to feature in database wolffd@0: % --- wolffd@0: function feature = MTTAudioFeatureHMM(varargin) wolffd@0: wolffd@0: feature = feature@MTTAudioFeature(varargin{:}); wolffd@0: wolffd@0: end wolffd@0: % --- wolffd@0: % extract feature data from raw audio features wolffd@0: % --- wolffd@0: function data = extract(feature, clip) wolffd@0: % --- wolffd@0: % get Basic Summary audio features. this includes possible wolffd@0: % local normalisations wolffd@0: % --- wolffd@0: wolffd@0: global globalvars; wolffd@0: wolffd@0: % --- wolffd@0: % get casimir child clip if available wolffd@0: % --- wolffd@0: if isa(clip, 'CASIMIRClip') wolffd@0: baseclip = clip.child_clip(); wolffd@0: else wolffd@0: baseclip = clip; wolffd@0: end wolffd@0: if isa(baseclip, 'MTTClip') wolffd@0: rawf = baseclip.audio_features_raw(); wolffd@0: elseif isa(baseclip, 'MSDClip') wolffd@0: rawf = baseclip.features('MSDAudioFeatureRAW'); wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % now extract the features wolffd@0: % first step: chroma clustering wolffd@0: % --- wolffd@0: weights = [rawf.data.segments_duration]; wolffd@0: wolffd@0: % normalise weights wolffd@0: weights = weights / rawf.data.duration; wolffd@0: wolffd@0: % get the chroma features wolffd@0: chroma = [rawf.data.segments_pitches]'; wolffd@0: wolffd@0: % --- wolffd@0: % TODO: train hmm wolffd@0: % --- wolffd@0: wolffd@0: wolffd@0: % save hmm into data variable wolffd@0: data.mu = mu1 wolffd@0: data.transmat1 = mu1 wolffd@0: wolffd@0: wolffd@0: wolffd@0: wolffd@0: wolffd@0: wolffd@0: % prepare field for final features wolffd@0: data.final.vector = []; wolffd@0: data.final.vector_info = struct(); wolffd@0: data.final.dim = 0; wolffd@0: wolffd@0: % save info data wolffd@0: data.info.type = 'MTTAudioFeatureBasicSm'; wolffd@0: data.info.owner = clip; wolffd@0: data.info.owner_id = clip.id; wolffd@0: data.info.creatorrev = feature.my_revision; wolffd@0: wolffd@0: % save parameters wolffd@0: data.info.params = feature.my_params; wolffd@0: end wolffd@0: wolffd@0: function define_global_transform(features) wolffd@0: % calculate and set normalization factors from the group of wolffd@0: % input features. These features will be set for the full database wolffd@0: wolffd@0: wolffd@0: wolffd@0: end wolffd@0: wolffd@0: wolffd@0: function finalise(feature) wolffd@0: % applies a final transformation and wolffd@0: % collects the information of this feature within a single vector wolffd@0: % see info for types in specific dimensions wolffd@0: wolffd@0: for i = 1:numel(feature) wolffd@0: wolffd@0: % check for neccesary parameters wolffd@0: if isempty(feature(i).my_db.commondb) wolffd@0: wolffd@0: error('Define the global transformation first') wolffd@0: return; wolffd@0: end wolffd@0: wolffd@0: if feature(1).my_params.ntimbres > 0 wolffd@0: % --- wolffd@0: % normalise features wolffd@0: % --- wolffd@0: % norm timbre features if neccesary wolffd@0: timbren = []; wolffd@0: if feature(i).my_params.norm_timbres wolffd@0: for j = 1:numel(feature(i).data.timbre) wolffd@0: wolffd@0: timbren = cat(1, timbren, ... wolffd@0: MTTAudioFeatureBasicSm.norm_timbre... wolffd@0: (feature(i).data.timbre(j).means, feature(i).my_db.commondb.post_normf.timbre)); wolffd@0: end wolffd@0: else wolffd@0: wolffd@0: timbren = cat(1, timbren, feature(i).data.timbre(:).means); wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % construct resulting feature vector out of features wolffd@0: % --- wolffd@0: vec = []; wolffd@0: info = {}; wolffd@0: if feature(i).my_params.nchromas > 0 wolffd@0: wolffd@0: info{numel(vec)+ 1} = 'chroma'; wolffd@0: vec = cat(1, vec, feature(i).data.chroma(:).means); wolffd@0: wolffd@0: info{numel(vec)+ 1} = 'chroma weights'; wolffd@0: vec = cat(1, vec, [feature(i).data.chroma(:).means_weight]'); wolffd@0: wolffd@0: % --- wolffd@0: % NORMALISE Chroma variance wolffd@0: % --- wolffd@0: if feature(i).my_params.chroma_var >= 1 wolffd@0: wolffd@0: info{numel(vec)+ 1} = 'chroma variance'; wolffd@0: wolffd@0: % normalise this pack of variance vectors wolffd@0: tmp_var = mapminmax('apply', [feature(i).data.chroma(:).vars],... wolffd@0: feature(i).common.post_normf.chroma_var); wolffd@0: wolffd@0: % concatenate normalised data to vector wolffd@0: for vari = 1:size(tmp_var,2) wolffd@0: wolffd@0: vec = cat(1, vec, tmp_var(:, vari)); wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: wolffd@0: if feature(i).my_params.ntimbres > 0 wolffd@0: wolffd@0: info{numel(vec)+ 1} = 'timbre'; wolffd@0: vec = cat(1, vec, timbren); wolffd@0: wolffd@0: info{numel(vec)+ 1} = 'timbre weights'; wolffd@0: vec = cat(1, vec, [feature(i).data.timbre(:).means_weight]'); wolffd@0: wolffd@0: % --- wolffd@0: % NORMALISE timbre variance wolffd@0: % --- wolffd@0: if feature(i).my_params.timbre_var >= 1 wolffd@0: wolffd@0: info{numel(vec)+ 1} = 'timbre variance'; wolffd@0: wolffd@0: % normalise this pack of variance vectors wolffd@0: tmp_var = mapminmax('apply', [feature(i).data.timbre(:).vars],... wolffd@0: feature(i).common.post_normf.timbre_var); wolffd@0: wolffd@0: % concatenate normalised data to vector wolffd@0: for vari = 1:size(tmp_var,2) wolffd@0: wolffd@0: vec = cat(1, vec, tmp_var(:, vari)); wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: if feature(i).my_params.nrhythms > 0 wolffd@0: wolffd@0: info{numel(vec)+ 1} = 'rhythm 8'; wolffd@0: vec = cat(1, vec, feature(i).data.rhythm.acorr8); wolffd@0: wolffd@0: info{numel(vec)+ 1} = 'int 8'; wolffd@0: vec = cat(1, vec, feature(i).data.rhythm.interval8); wolffd@0: wolffd@0: if feature(i).my_params.nrhythms >= 2 wolffd@0: wolffd@0: info{numel(vec)+ 1} = 'rhythm 16'; wolffd@0: vec = cat(1, vec, feature(i).data.rhythm.acorr16); wolffd@0: wolffd@0: info{numel(vec)+ 1} = 'int 16'; wolffd@0: vec = cat(1, vec, feature(i).data.rhythm.interval16); wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: feature(i).data.final.vector = vec; wolffd@0: feature(i).data.final.dim = numel(feature(i).data.final.vector); wolffd@0: wolffd@0: % fill up info struct and append to feature wolffd@0: wolffd@0: info(end+1: feature(i).data.final.dim) = ... wolffd@0: cell(feature(i).data.final.dim - numel(info),1); wolffd@0: wolffd@0: feature(i).data.final.vector_info.labels = info; wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % TODO: Maybe delete more basic features again at this point? wolffd@0: % --- wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % destructor: do we really want to remove this wolffd@0: % from the database? No, but wolffd@0: % TODO: create marker for unused objects in db, and a cleanup wolffd@0: % function wolffd@0: % --- wolffd@0: function delete(feature) wolffd@0: wolffd@0: end wolffd@0: wolffd@0: wolffd@0: function visualise(feature) wolffd@0: % --- wolffd@0: % plots the different data types collected in this feature wolffd@0: % --- wolffd@0: for i = 1:numel(feature) wolffd@0: clip = feature(i).data.info.owner; wolffd@0: wolffd@0: % display raw features wolffd@0: if isa(clip, 'CASIMIRClip') wolffd@0: baseclip = clip.child_clip(); wolffd@0: else wolffd@0: baseclip = clip; wolffd@0: end wolffd@0: if isa(baseclip, 'MTTClip') wolffd@0: rawf = baseclip.audio_features_raw(); wolffd@0: elseif isa(baseclip, 'MSDClip') wolffd@0: rawf = baseclip.features('MSDAudioFeatureRAW'); wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % @todo: implement MSD feature visualisation wolffd@0: % --- wolffd@0: [a1, a2, a3] = rawf.visualise(); wolffd@0: wolffd@0: % --- wolffd@0: % Display chroma features wolffd@0: % --- wolffd@0: if isfield(feature(i).data, 'chroma') wolffd@0: wolffd@0: chroma_labels = {'c', 'c#', 'd','d#', 'e', 'f','f#', 'g','g#', 'a', 'a#', 'h'}; wolffd@0: mode_labels = {'minor', 'major'}; wolffd@0: wolffd@0: % change labels to reflect detected mode wolffd@0: chroma_labels{rawf.data.key + 1} = ... wolffd@0: sprintf('(%s) %s',mode_labels{rawf.data.mode + 1}, chroma_labels{rawf.data.key + 1}); wolffd@0: wolffd@0: % transpose labels and data wolffd@0: chroma_labels = circshift(chroma_labels, [0, feature(i).data.chroma(1).shift]); wolffd@0: chromar = circshift([rawf.data.segments_pitches], [feature(i).data.chroma(1).shift, 0]); wolffd@0: wolffd@0: % image transposed chromas again wolffd@0: segments = [rawf.data.segments_start]; wolffd@0: segments(end) = rawf.data.duration; wolffd@0: wolffd@0: hold(a1); wolffd@0: uimagesc(segments, 0:11, chromar, 'Parent', a1); wolffd@0: set(a1,'YTick',[0:11], 'YTickLabel', chroma_labels); wolffd@0: wolffd@0: % enlarge plot and plot new data after the old ones wolffd@0: ax = axis(a1); wolffd@0: ax(2) = ax(2) + 2*feature(i).my_params.nchromas + 0.5; wolffd@0: axis(a1, 'xy'); wolffd@0: axis(a1, ax); wolffd@0: wolffd@0: imagesc(rawf.data.duration + (1:feature(i).my_params.nchromas), (-1:11), ... wolffd@0: [ feature(i).data.chroma(:).means_weight; feature(i).data.chroma(:).means],... wolffd@0: 'Parent', a1); wolffd@0: % variance calculated? wolffd@0: if isfield(feature(i).data.chroma, 'vars') wolffd@0: wolffd@0: imagesc(rawf.data.duration + feature(i).my_params.nchromas + (1:feature(i).my_params.nchromas), (-1:11), ... wolffd@0: [feature(i).data.chroma(:).vars],... wolffd@0: 'Parent', a1); wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % Display timbre features wolffd@0: % --- wolffd@0: if isfield(feature(i).data, 'timbre') wolffd@0: wolffd@0: % enlarge plot and plot new data after the old ones wolffd@0: hold(a2); wolffd@0: ax = axis(a2); wolffd@0: ax(2) = ax(2) + 2*feature(i).my_params.ntimbres + 0.5; wolffd@0: wolffd@0: axis(a2, ax); wolffd@0: imagesc(rawf.data.duration + (1:feature(i).my_params.ntimbres), (-1:11), ... wolffd@0: [ feature(i).data.timbre(:).means_weight; feature(i).data.timbre(:).means],... wolffd@0: 'Parent', a2); wolffd@0: if isfield(feature(i).data.timbre, 'vars') wolffd@0: wolffd@0: imagesc(rawf.data.duration + feature(i).my_params.ntimbres + (1:feature(i).my_params.ntimbres), (-1:11), ... wolffd@0: [feature(i).data.timbre(:).vars],... wolffd@0: 'Parent', a1); wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % Display rhythm features wolffd@0: % --- wolffd@0: if isfield(feature(i).data, 'rhythm') wolffd@0: % data.rhythm.interval wolffd@0: % get timecode wolffd@0: eightt = feature(i).data.rhythm.energy8_time; wolffd@0: sixt = feature(i).data.rhythm.energy16_time; wolffd@0: wolffd@0: hold(a3); wolffd@0: % plot sixteens acorr and energy wolffd@0: plot(sixt, feature(i).data.rhythm.energy16, 'bx') wolffd@0: wolffd@0: plot(sixt, feature(i).data.rhythm.acorr16, 'b') wolffd@0: wolffd@0: % plot eights acorr and energy wolffd@0: plot(eightt, feature(i).data.rhythm.energy8, 'rx') wolffd@0: wolffd@0: plot(eightt, feature(i).data.rhythm.acorr8, 'r') wolffd@0: wolffd@0: % broaden view by fixed 4 seconds wolffd@0: ax = axis(a3); wolffd@0: axis(a3, [max(0, eightt(1)-( eightt(end) - eightt(1) + 4 )) ... wolffd@0: min(rawf.data.duration, eightt(end) +4) ... wolffd@0: ax(3:4)]); wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: wolffd@0: methods (Hidden = true) wolffd@0: wolffd@0: function [env, time] = energy_envelope(feature, clip) wolffd@0: % extracts the envelope of energy for the given clip wolffd@0: wolffd@0: % --- wolffd@0: % TODO: externalise envelope etc in external audio features wolffd@0: % --- wolffd@0: wolffd@0: [null, src] = evalc('miraudio(clip.mp3file_full())'); wolffd@0: [null, env] = evalc('mirenvelope(src, ''Sampling'', feature.my_params.energy_sr)'); wolffd@0: wolffd@0: time = get(env,'Time'); wolffd@0: time = time{1}{1}; wolffd@0: env = mirgetdata(env); wolffd@0: end wolffd@0: wolffd@0: function [acorr, base_sig, base_t] = beat_histogram(feature, startt, interval, signal, signal_t) wolffd@0: % acorr = beat_histogram(feature, startt, interval, signal, time) wolffd@0: % wolffd@0: % compute correlation for beats of specified length in energy curve wolffd@0: wolffd@0: % get corresponding energy values wolffd@0: dt = signal_t(2) - signal_t(1); wolffd@0: base_t = startt:interval:(startt + (feature.my_params.nints*2-1) * interval); wolffd@0: base_sig = signal( min( numel(signal), max(1,round((base_t - signal_t(1))/dt)))); wolffd@0: wolffd@0: % normalise energy wolffd@0: acbase_sig = base_sig./max(base_sig); wolffd@0: wolffd@0: % calculate their cyclic autocorrelation wolffd@0: acorr = circshift(xcorr(acbase_sig,acbase_sig(1:end/2)),... wolffd@0: [numel(acbase_sig) 0]); wolffd@0: wolffd@0: % cut acorr to relevant points, normalise and square wolffd@0: acorr = (acorr(1:feature.my_params.nints)./feature.my_params.nints).^2; wolffd@0: wolffd@0: % --- wolffd@0: % NOTE: we normalise the autocorrelation locally, to compare the wolffd@0: % (rhythmic) shape wolffd@0: % --- wolffd@0: if feature.my_params.norm_acorr; wolffd@0: wolffd@0: acorr = acorr - min(acorr); wolffd@0: acorr = acorr/max(acorr); wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: methods(Static) wolffd@0: wolffd@0: function timbre = norm_timbre(in, normfs) wolffd@0: % returns normed timbre data wolffd@0: wolffd@0: % --- wolffd@0: % individually scale the data using wolffd@0: % the dimensions factors wolffd@0: % --- wolffd@0: timbre = zeros(size(in)); wolffd@0: for i = 1:size(in,2) wolffd@0: wolffd@0: timbre(:,i) = normfs .* in(:,i); wolffd@0: end wolffd@0: wolffd@0: % shift to positive values wolffd@0: timbre = (1 + timbre) /2; wolffd@0: wolffd@0: % clip features to [0,1] wolffd@0: timbre = min(1, max(timbre, 0)); wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % returns parameter md5 hash for comparison wolffd@0: % --- wolffd@0: end wolffd@0: wolffd@0: end