camir-aes2014: core/magnatagatune/MTTAudioFeatureBasicSm.m comparison

comparison core/magnatagatune/MTTAudioFeatureBasicSm.m @ 0:e9a9cd732c1e tip

first hg version after svn

author	wolffd
date	Tue, 10 Feb 2015 15:05:51 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:e9a9cd732c1e
+classdef MTTAudioFeatureBasicSm < MTTAudioFeature & handle
+% ---
+% the MTTAudioFeatureBasicSm Class contains
+% a basic summary of chroma, mfcc and tempo features
+% a few common chroma and mfcc vectors are concatenated
+% along with some clip-wide variance
+% a metric / rhythm fingerprint is added
+%
+% The usual workflow for these features consists of three steps
+% 1. extract: extracts the basic single-file dependent features
+% 2. define_global_transform: calculates the global feature
+%       transformation parameters
+% 3. finalise: applies the common transformations to a specific feature
+% ---
+properties(Constant = true)
+% svn hook
+my_revision = str2double(substr('$Rev$',  5, -1));
+end
+properties
+% ---
+% Set default parameters
+% ---
+my_params = struct(...
+'nchromas', 4, ... % 4 chroma vectors
+'chroma_var', 0, ... % chroma variance
+'norm_chromas', 0, ... % not implemented, chromas already rel.
+'min_kshift_chromas', 0.1, ... % treshold for key shift. set to 1 for no shift (0-1)
+...
+'ntimbres', 4, ...
+'timbre_var', 0, ... % timbre variance
+'norm_timbres', 1, ...
+'clip_timbres', 0.85, ... % percentile of data which has to be inside 0-1 bounds
+...
+'norm_weights',0, ... % globally norm weights for chroma times?
+'norm_interval',1, ...
+'max_iter',100, ...  % max iterations for chroma and timbre knn
+...
+'nrhythms', 0, ...
+'nints', 11, ...
+'energy_sr', 1000, ... % sample rate for energy curve
+'norm_acorr', 1 ... % normalise arcorr locally-> shape imp... energy is normalised anyways
+);
+end
+% ---
+% member functions
+% ---
+methods
+% ---
+% constructor: pointer to feature in database
+% ---
+function feature = MTTAudioFeatureBasicSm(varargin)
+feature = feature@MTTAudioFeature(varargin{:});
+end
+% ---
+% extract feature data from raw audio features
+% ---
+function data = extract(feature, clip)
+% ---
+% get Basic Summary audio features. this includes possible
+% local normalisations
+% ---
+global globalvars;
+% ---
+% get casimir child clip if available
+% ---
+if isa(clip, 'CASIMIRClip')
+baseclip = clip.child_clip();
+else
+baseclip = clip;
+end
+if isa(baseclip, 'MTTClip')
+rawf = baseclip.audio_features_raw();
+elseif isa(baseclip, 'MSDClip')
+rawf = baseclip.features('MSDAudioFeatureRAW');
+end
+% ---
+% now extract the features
+% first step: chroma clustering
+% ---
+weights = [rawf.data.segments_duration];
+% normalise weights
+weights = weights / rawf.data.duration;
+chroma = [rawf.data.segments_pitches]';
+% ---
+% get most present chroma vectors.
+% the weighted k-means should return the four most prominent
+% chroma vectors and their weight
+% ---
+% display error values
+op = foptions();
+op(1) = 0;
+op(14) = feature.my_params.max_iter;
+% check for trivial case
+if feature.my_params.nchromas == 0
+chromas = [];
+cwght = [];
+elseif feature.my_params.nchromas == 1
+chromas = mean(chroma, 1);
+chroma_var = var(chroma, 0, 1);
+cwght = 1;
+elseif numel(weights) > feature.my_params.nchromas
+% ---
+% there may be few chromas, try kmeans several (20) times
+% ---
+cont = 0;
+cwght = [];
+while (numel(cwght) ~= feature.my_params.nchromas) && (cont < 20);
+[chromas, cwght, post] = ...
+weighted_kmeans(feature.my_params.nchromas, chroma, weights, op);
+cont = cont + 1;
+end
+if (numel(cwght) ~= feature.my_params.nchromas)
+error('cannot find enough chroma centres');
+end
+% ---
+% Calculate the weighted variance of the chroma clusters
+% ---
+if feature.my_params.chroma_var >= 1
+chroma_var = zeros(size(chromas));
+for i = 1:size(chroma_var,1)
+% get distance from cluster centroid
+tmp_var = (chroma(post(:,i),:) - repmat(chromas(i,:), sum(post(:,i)),1)).^2;
+% add up the weighted differences and normalise by sum
+% of weights
+chroma_var(i,:) = (weights(post(:,i)) * tmp_var) ./...
+(sum(weights(post(:,i))));
+end
+end
+else
+% ---
+% odd case: less than nchroma data points.
+% we repeat the mean vector at the end
+% ---
+chromas = [chroma; repmat(mean(chroma, 1),...
+feature.my_params.nchromas - numel(weights), 1 )];
+cwght = weights;
+cwght( end + 1:feature.my_params.nchromas ) = 0;
+% ---
+% TODO: get a variance for odd case :
+% replicate the complete data variance?
+% NO: every vector is a clsuter => zero variance
+% ---
+end
+% trivial case: no variance requested
+if ~exist('chroma_var','var')
+chroma_var = zeros(size(chromas));
+end
+% sort by associated time
+[cwght, idx] = sort(cwght, 'descend');
+chromas = chromas(idx,:);
+chroma_var = chroma_var(idx,:);
+% ---
+% shift according to detected key, but only if
+% the confidencee is high enough
+% ---
+shift = 0;
+if  rawf.data.keyConfidence > feature.my_params.min_kshift_chromas;
+shift = -rawf.data.key;
+chromas = circshift(chromas, [0 shift]);
+chroma_var = circshift(chroma_var, [0 shift]);
+end
+% ---
+% get mfcc centres:
+% the same for mfccs
+% ---
+mfcc = [rawf.data.segments_timbre]';
+if feature.my_params.ntimbres == 0
+mfccs = [];
+mwght = [];
+elseif feature.my_params.ntimbres == 1
+mfccs = mean(mfcc, 1);
+timbre_var = var(mfccs, 0, 1);
+mwght = 1;
+elseif numel(weights) > feature.my_params.ntimbres
+% ---
+% there may be few mfccs, try kmeans several times
+% ---
+cont = 0;
+mwght = [];
+while (numel(mwght) ~= feature.my_params.ntimbres) && (cont < 20);
+[mfccs, mwght, post] = ...
+weighted_kmeans(feature.my_params.ntimbres, mfcc, weights, op);
+cont = cont + 1;
+end
+if (numel(mwght) ~= feature.my_params.ntimbres)
+error('cannot find enough mfcc centres');
+end
+% ---
+% Calculate the weighted variance of the chroma clusters
+% ---
+if feature.my_params.timbre_var >= 1
+timbre_var = zeros(size(mfccs));
+for i = 1:size(timbre_var,1)
+% get distance from cluster centroid
+tmp_var = (mfcc(post(:,i),:) - repmat(mfccs(i,:), sum(post(:,i)),1)).^2;
+% add up the weighted differences and normalise by sum
+% of weights
+timbre_var(i,:) = (weights(post(:,i)) * tmp_var) ./...
+(sum(weights(post(:,i))));
+end
+end
+else
+% ---
+% odd case: less than nchroma data points.
+% we repeat the mean vector at the end
+% ---
+mfccs = [mfcc; repmat(mean(mfcc, 1),...
+feature.my_params.ntimbres - numel(weights), 1)];
+mwght = weights;
+mwght( end + 1:feature.my_params.ntimbres) = 0;
+end
+% trivial case: no variance requested
+if ~exist('timbre_var','var')
+timbre_var = zeros(size(mfccs));
+end
+% sort by associated time
+[mwght, idx] = sort(mwght, 'descend');
+mfccs = mfccs(idx,:);
+timbre_var = timbre_var(idx,:);
+% ---
+% get beat features:
+% the autocorrelation curve over n quarters of length
+%
+%  alternative: how about using the n=8 quarters relative
+%  volumes from the start of a sure measure?
+% ---
+if feature.my_params.nrhythms >= 1
+bars = rawf.data.bars;
+beats = rawf.data.beats;
+tatums = rawf.data.tatums;
+% ---
+% NOTE: the beat and tatum markers seem to have an offset :(
+% ---
+offset = 0.118; %seconds
+[envelope, time] = energy_envelope(feature, clip);
+% we offset the energy curve
+time = time + offset;
+% ---
+% we try to start at the best beat confidence more
+% than sixteen eights from the end
+% ---
+if rawf.data.tempo > 0
+eightl = 30 / rawf.data.tempo;
+else
+% ---
+% odd case: no rhythm data. assume 100 bpm
+% ---
+eightl = 0.3;
+end
+if isempty(beats)
+% ---
+% odd case: no beats detected. -> use best tatum
+% ---
+if ~isempty(tatums)
+beats = tatums;
+else
+% ok, just take the beginning
+beats = [0; 1];
+end
+end
+last_valid = find(beats(1,:) < ...
+(rawf.data.duration - feature.my_params.nints * eightl),1, 'last');
+% find the best valid beat postition
+[null, max_measure] = max( beats(2, 1:last_valid));
+max_mtime = beats(1,max_measure);
+% ---
+% the correlation is calculated for the estimated eights lenght
+% and for the 16th intervals, respectively.
+% ---
+% calculate the EIGHTS correlation for the following segment
+[acorr8, eight_en, eightt] = ...
+beat_histogram(feature, max_mtime, eightl, envelope, time);
+% calculate the SIXTEENTHS correlation for the following segment
+[acorr16, six_en, sixt] = ...
+beat_histogram(feature, max_mtime, eightl / 2, envelope, time);
+% ---
+% save the various features
+% ---
+% save rythm feature data
+data.rhythm.acorr8 = acorr8;
+data.rhythm.acorr8_lag = eightt(1:end/2)-eightt(1);
+data.rhythm.energy8 = eight_en(1:end/2);
+data.rhythm.energy8_time = eightt(1:end/2);
+% --
+% the interval is normed locally up to a max value
+% associated to 30bpm
+% ---
+if feature.my_params.norm_interval
+% 1 second max value
+data.rhythm.interval8 = eightl / 2;
+else
+data.rhythm.interval8 = eightl / 2;
+end
+if feature.my_params.nrhythms >= 2
+data.rhythm.acorr16 = acorr16;
+data.rhythm.acorr16_lag = data.rhythm.acorr8_lag / 2;
+data.rhythm.energy16 = six_en(1:end/2);
+data.rhythm.energy16_time = sixt(1:end/2);
+% save beat interval / tempo
+if feature.my_params.norm_interval
+% 1 second max value
+data.rhythm.interval16 = eightl / 2;
+else
+data.rhythm.interval16 = eightl / 2;
+end
+end
+else
+%                  % save empty rythm struct
+%                  data.rhythm = struct([]);
+end
+% chroma feature data
+for i = 1:size(chromas,1)
+data.chroma(i).means = chromas(i,:)';
+data.chroma(i).means_weight = cwght(i);
+data.chroma(i).vars = chroma_var(i,:)';
+data.chroma(i).shift = shift;
+end
+% mfcc feature data
+for i = 1:size(mfccs,1)
+data.timbre(i).means = mfccs(i,:)';
+data.timbre(i).means_weight = mwght(i);
+data.timbre(i).vars = timbre_var(i,:)';
+end
+% prepare field for final features
+data.final.vector = [];
+data.final.vector_info = struct();
+data.final.dim = 0;
+% save info data
+data.info.type = 'MTTAudioFeatureBasicSm';
+data.info.owner = clip;
+data.info.owner_id = clip.id;
+data.info.creatorrev = feature.my_revision;
+% save parameters
+data.info.params = feature.my_params;
+end
+function define_global_transform(features)
+% calculate and set normalization factors from the group of
+% input features. These features will be set for the full database
+if numel(features) == 1
+error ('Insert feature array for this method');
+end
+% ---
+% here, we only need to define the post-normalisation
+% ---
+% ---
+% get chroma variance data NORMALISATION Factors
+% TODO: transport chroma variance to finalise step
+% ---
+if features(1).my_params.chroma_var >= 1
+allfeat = abs(cat(2, features(1).data.chroma(:).vars));
+for i = 2:numel(features)
+allfeat = cat(2 , allfeat, abs(abs(cat(2, features(i).data.chroma(:).vars))));
+end
+[~, common.post_normf.chroma_var] = mapminmax(allfeat,0,1);
+end
+% ---
+% get timbre variance data NORMALISATION Factors
+% TODO: transport chroma variance to finalise step
+% ---
+if features(1).my_params.timbre_var >= 1
+allfeat = abs(cat(2, features(1).data.timbre(:).vars));
+for i = 2:numel(features)
+allfeat = cat(2 , allfeat, abs(abs(cat(2, features(i).data.timbre(:).vars))));
+end
+[~, common.post_normf.timbre_var] = mapminmax(allfeat,0,1);
+end
+% ---
+% derive normalisation for timbre features:
+% MFCC's are actually special filter outputs
+% (see developer.echonest.com/docs/v4/_static/AnalyzeDocumentation_2.2.pdf
+% they are unbounded, so just the relative information will be
+% used here.
+% We normalise each bin independently
+% ---
+if features(1).my_params.ntimbres > 0
+allfeat = abs(cat(2, features(1).data.timbre(:).means));
+for i = 2:numel(features)
+allfeat = cat(2 , allfeat, abs(cat(2, features(i).data.timbre(:).means)));
+end
+% ---
+% get normalisation factors
+% NOTE: the values will later be clipped to [0,1]
+% anyways
+% ---
+if (features(1).my_params.clip_timbres ~= 0 ) || ...
+(features(1).my_params.clip_timbres ~= 1 )
+common.post_normf.timbre = 1 ./ prctile(allfeat, features(1).my_params.clip_timbres * 100, 2);
+else
+% just use the maximum
+common.post_normf.timbre = 1/max(allfeat, 2);
+end
+% set common feature values
+features(1).my_db.set_common(common);
+else
+features(1).my_db.set_common([1]);
+end
+end
+function finalise(feature)
+% applies a final transformation and
+% collects the information of this feature within a single vector
+% see info for types in specific dimensions
+for i = 1:numel(feature)
+% check for neccesary parameters
+if isempty(feature(i).my_db.commondb)
+error('Define the global transformation first')
+return;
+end
+if feature(1).my_params.ntimbres > 0
+% ---
+% normalise features
+% ---
+% norm timbre features if neccesary
+timbren = [];
+if feature(i).my_params.norm_timbres
+for j = 1:numel(feature(i).data.timbre)
+timbren = cat(1, timbren, ...
+MTTAudioFeatureBasicSm.norm_timbre...
+(feature(i).data.timbre(j).means, feature(i).my_db.commondb.post_normf.timbre));
+end
+else
+timbren = cat(1, timbren, feature(i).data.timbre(:).means);
+end
+end
+% ---
+% construct resulting feature vector out of features
+% ---
+vec = [];
+info = {};
+if  feature(i).my_params.nchromas > 0
+info{numel(vec)+ 1} = 'chroma';
+vec = cat(1, vec, feature(i).data.chroma(:).means);
+info{numel(vec)+ 1} = 'chroma weights';
+vec = cat(1, vec, [feature(i).data.chroma(:).means_weight]');
+% ---
+% NORMALISE Chroma variance
+% ---
+if feature(i).my_params.chroma_var >= 1
+info{numel(vec)+ 1} = 'chroma variance';
+% normalise this pack of variance vectors
+tmp_var =  mapminmax('apply', [feature(i).data.chroma(:).vars],...
+feature(i).common.post_normf.chroma_var);
+% concatenate normalised data to vector
+for vari = 1:size(tmp_var,2)
+vec = cat(1, vec, tmp_var(:, vari));
+end
+end
+end
+if feature(i).my_params.ntimbres > 0
+info{numel(vec)+ 1} = 'timbre';
+vec = cat(1, vec, timbren);
+info{numel(vec)+ 1} = 'timbre weights';
+vec = cat(1, vec, [feature(i).data.timbre(:).means_weight]');
+% ---
+% NORMALISE timbre variance
+% ---
+if feature(i).my_params.timbre_var >= 1
+info{numel(vec)+ 1} = 'timbre variance';
+% normalise this pack of variance vectors
+tmp_var =  mapminmax('apply', [feature(i).data.timbre(:).vars],...
+feature(i).common.post_normf.timbre_var);
+% concatenate normalised data to vector
+for vari = 1:size(tmp_var,2)
+vec = cat(1, vec, tmp_var(:, vari));
+end
+end
+end
+if  feature(i).my_params.nrhythms > 0
+info{numel(vec)+ 1} = 'rhythm 8';
+vec = cat(1, vec, feature(i).data.rhythm.acorr8);
+info{numel(vec)+ 1} = 'int 8';
+vec = cat(1, vec, feature(i).data.rhythm.interval8);
+if  feature(i).my_params.nrhythms >= 2
+info{numel(vec)+ 1} = 'rhythm 16';
+vec = cat(1, vec, feature(i).data.rhythm.acorr16);
+info{numel(vec)+ 1} = 'int 16';
+vec = cat(1, vec, feature(i).data.rhythm.interval16);
+end
+end
+feature(i).data.final.vector = vec;
+feature(i).data.final.dim = numel(feature(i).data.final.vector);
+% fill up info struct and append to feature
+info(end+1: feature(i).data.final.dim) = ...
+cell(feature(i).data.final.dim - numel(info),1);
+feature(i).data.final.vector_info.labels = info;
+end
+% ---
+% TODO: Maybe delete more basic features again at this point?
+% ---
+end
+% ---
+% destructor: do we really want to remove this
+% from the database? No, but
+% TODO: create marker for unused objects in db, and a cleanup
+%  function
+% ---
+function delete(feature)
+end
+function visualise(feature)
+% ---
+% plots the different data types collected in this feature
+% ---
+for i = 1:numel(feature)
+clip = feature(i).data.info.owner;
+% display raw features
+if isa(clip, 'CASIMIRClip')
+baseclip = clip.child_clip();
+else
+baseclip = clip;
+end
+if isa(baseclip, 'MTTClip')
+rawf = baseclip.audio_features_raw();
+elseif isa(baseclip, 'MSDClip')
+rawf = baseclip.features('MSDAudioFeatureRAW');
+end
+% ---
+% @todo: implement MSD feature visualisation
+% ---
+[a1, a2, a3] = rawf.visualise();
+% ---
+% Display chroma features
+% ---
+if isfield(feature(i).data, 'chroma')
+chroma_labels = {'c', 'c#', 'd','d#', 'e', 'f','f#', 'g','g#', 'a', 'a#', 'h'};
+mode_labels = {'minor', 'major'};
+% change labels to reflect detected mode
+chroma_labels{rawf.data.key + 1} = ...
+sprintf('(%s) %s',mode_labels{rawf.data.mode + 1}, chroma_labels{rawf.data.key + 1});
+% transpose labels and data
+chroma_labels = circshift(chroma_labels, [0, feature(i).data.chroma(1).shift]);
+chromar = circshift([rawf.data.segments_pitches], [feature(i).data.chroma(1).shift, 0]);
+% image transposed chromas again
+segments = [rawf.data.segments_start];
+segments(end) = rawf.data.duration;
+hold(a1);
+uimagesc(segments, 0:11, chromar, 'Parent', a1);
+set(a1,'YTick',[0:11], 'YTickLabel', chroma_labels);
+% enlarge plot and plot new data after the old ones
+ax = axis(a1);
+ax(2) = ax(2) + 2*feature(i).my_params.nchromas + 0.5;
+axis(a1, 'xy');
+axis(a1, ax);
+imagesc(rawf.data.duration + (1:feature(i).my_params.nchromas), (-1:11), ...
+[ feature(i).data.chroma(:).means_weight; feature(i).data.chroma(:).means],...
+'Parent', a1);
+% variance calculated?
+if isfield(feature(i).data.chroma, 'vars')
+imagesc(rawf.data.duration + feature(i).my_params.nchromas + (1:feature(i).my_params.nchromas), (-1:11), ...
+[feature(i).data.chroma(:).vars],...
+'Parent', a1);
+end
+end
+% ---
+% Display timbre features
+% ---
+if  isfield(feature(i).data, 'timbre')
+% enlarge plot and plot new data after the old ones
+hold(a2);
+ax = axis(a2);
+ax(2) = ax(2) + 2*feature(i).my_params.ntimbres + 0.5;
+axis(a2, ax);
+imagesc(rawf.data.duration + (1:feature(i).my_params.ntimbres), (-1:11), ...
+[ feature(i).data.timbre(:).means_weight; feature(i).data.timbre(:).means],...
+'Parent', a2);
+if isfield(feature(i).data.timbre, 'vars')
+imagesc(rawf.data.duration + feature(i).my_params.ntimbres + (1:feature(i).my_params.ntimbres), (-1:11), ...
+[feature(i).data.timbre(:).vars],...
+'Parent', a1);
+end
+end
+% ---
+% Display rhythm features
+% ---
+if  isfield(feature(i).data, 'rhythm')
+% data.rhythm.interval
+% get timecode
+eightt = feature(i).data.rhythm.energy8_time;
+sixt = feature(i).data.rhythm.energy16_time;
+hold(a3);
+% plot sixteens acorr and energy
+plot(sixt, feature(i).data.rhythm.energy16, 'bx')
+plot(sixt, feature(i).data.rhythm.acorr16, 'b')
+% plot eights acorr and energy
+plot(eightt, feature(i).data.rhythm.energy8, 'rx')
+plot(eightt, feature(i).data.rhythm.acorr8, 'r')
+% broaden view by fixed 4 seconds
+ax = axis(a3);
+axis(a3, [max(0, eightt(1)-( eightt(end) - eightt(1) + 4 )) ...
+min(rawf.data.duration, eightt(end) +4) ...
+ax(3:4)]);
+end
+end
+end
+end
+methods (Hidden = true)
+function [env, time] = energy_envelope(feature, clip)
+% extracts the envelope of energy for the given clip
+% ---
+% TODO: externalise envelope etc in external audio features
+% ---
+[null, src]  = evalc('miraudio(clip.mp3file_full())');
+[null, env] = evalc('mirenvelope(src, ''Sampling'', feature.my_params.energy_sr)');
+time = get(env,'Time');
+time = time{1}{1};
+env = mirgetdata(env);
+end
+function [acorr, base_sig, base_t] = beat_histogram(feature, startt, interval, signal, signal_t)
+% acorr = beat_histogram(feature, startt, interval, signal, time)
+%
+% compute correlation for beats of specified length in energy curve
+% get corresponding energy values
+dt = signal_t(2) - signal_t(1);
+base_t = startt:interval:(startt + (feature.my_params.nints*2-1) * interval);
+base_sig = signal( min( numel(signal), max(1,round((base_t - signal_t(1))/dt))));
+% normalise energy
+acbase_sig = base_sig./max(base_sig);
+% calculate their cyclic autocorrelation
+acorr = circshift(xcorr(acbase_sig,acbase_sig(1:end/2)),...
+[numel(acbase_sig) 0]);
+% cut acorr to relevant points, normalise and square
+acorr = (acorr(1:feature.my_params.nints)./feature.my_params.nints).^2;
+% ---
+% NOTE: we normalise the autocorrelation locally, to compare the
+% (rhythmic) shape
+% ---
+if feature.my_params.norm_acorr;
+acorr = acorr - min(acorr);
+acorr = acorr/max(acorr);
+end
+end
+end
+methods(Static)
+function timbre = norm_timbre(in, normfs)
+% returns normed timbre data
+% ---
+% individually scale the data using
+% the dimensions factors
+% ---
+timbre = zeros(size(in));
+for i = 1:size(in,2)
+timbre(:,i) = normfs .* in(:,i);
+end
+% shift to positive values
+timbre = (1 + timbre) /2;
+% clip features to [0,1]
+timbre = min(1, max(timbre, 0));
+end
+% ---
+% returns parameter md5 hash for comparison
+% ---
+end
+end

Mercurial > hg > camir-aes2014

comparison core/magnatagatune/MTTAudioFeatureBasicSm.m @ 0:e9a9cd732c1e tip