view core/magnatagatune/MTTAudioFeatureHMM.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
classdef MTTAudioFeatureHMM < MTTAudioFeature & handle
    % ---
    % the MTTAudioFeatureBasicSm Class contains 
    % a basic summary of chroma, mfcc and tempo features
    % a few common chroma and mfcc vectors are concatenated 
    % along with some clip-wide variance 
    % a metric / rhythm fingerprint is added
    %
    % The usual workflow for these features consists of three steps
    % 1. extract: extracts the basic single-file dependent features
    % 2. define_global_transform: calculates the global feature
    %       transformation parameters
    % 3. finalise: applies the common transformations to a specific feature
    % ---
    
    properties(Constant = true)
        
        % svn hook
        my_revision = str2double(substr('$Rev: 2332 $',  5, -1));
    end

    properties
        % ---
        % Set default parameters
        % ---
        my_params = struct(...
            'nstates', 4 ... % predefined number of states
            );
    end
    
    % ---
    % member functions
    % ---
    methods
        
        % ---
        % constructor: pointer to feature in database
        % ---
        function feature = MTTAudioFeatureHMM(varargin)

            feature = feature@MTTAudioFeature(varargin{:});

        end
        % ---
        % extract feature data from raw audio features
        % ---
        function data = extract(feature, clip)
            % ---
            % get Basic Summary audio features. this includes possible
            % local normalisations
            % ---
            
            global globalvars;
            
            % --- 
            % get casimir child clip if available
            % ---
            if isa(clip, 'CASIMIRClip')
                baseclip = clip.child_clip();
            else
                baseclip = clip;
            end
            if isa(baseclip, 'MTTClip') 
                rawf = baseclip.audio_features_raw();
            elseif isa(baseclip, 'MSDClip')
                rawf = baseclip.features('MSDAudioFeatureRAW');
            end
            
            % ---
            % now extract the features
            % first step: chroma clustering
            % ---
            weights = [rawf.data.segments_duration];
            
            % normalise weights
            weights = weights / rawf.data.duration;
            
            % get the chroma features
            chroma = [rawf.data.segments_pitches]';

            % ---
            % TODO: train hmm 
            % ---
            
            
            % save hmm into data variable
            data.mu = mu1
            data.transmat1 = mu1

            
            
            
            
            
            % prepare field for final features
            data.final.vector = [];
            data.final.vector_info = struct(); 
            data.final.dim = 0;
           
            % save info data
            data.info.type = 'MTTAudioFeatureBasicSm';
            data.info.owner = clip;
            data.info.owner_id = clip.id;
            data.info.creatorrev = feature.my_revision;
            
            % save parameters
            data.info.params = feature.my_params;
        end
        
        function define_global_transform(features)
        % calculate and set normalization factors from the group of 
        % input features. These features will be set for the full database
        
            
           
        end
        
        
        function finalise(feature)
        % applies a final transformation and
        % collects the information of this feature within a single vector
        % see info for types in specific dimensions

            for i = 1:numel(feature)
                
                % check for neccesary parameters
                if isempty(feature(i).my_db.commondb)

                    error('Define the global transformation first')
                    return;
                end

                if feature(1).my_params.ntimbres > 0
                    % ---
                    % normalise features
                    % ---
                    % norm timbre features if neccesary 
                    timbren = [];
                    if feature(i).my_params.norm_timbres
                        for j = 1:numel(feature(i).data.timbre)

                            timbren = cat(1, timbren, ...
                                MTTAudioFeatureBasicSm.norm_timbre...
                                (feature(i).data.timbre(j).means, feature(i).my_db.commondb.post_normf.timbre)); 
                        end
                    else

                        timbren = cat(1, timbren, feature(i).data.timbre(:).means); 
                    end
                end

                % ---
                % construct resulting feature vector out of features
                % ---
                vec = [];
                info = {};
                if  feature(i).my_params.nchromas > 0
                    
                    info{numel(vec)+ 1} = 'chroma';
                    vec = cat(1, vec, feature(i).data.chroma(:).means);

                    info{numel(vec)+ 1} = 'chroma weights';
                    vec = cat(1, vec, [feature(i).data.chroma(:).means_weight]');
                                
                    % ---
                    % NORMALISE Chroma variance
                    % ---
                    if feature(i).my_params.chroma_var >= 1
                        
                        info{numel(vec)+ 1} = 'chroma variance';
                        
                        % normalise this pack of variance vectors
                        tmp_var =  mapminmax('apply', [feature(i).data.chroma(:).vars],...
                            feature(i).common.post_normf.chroma_var);
                        
                        % concatenate normalised data to vector
                        for vari = 1:size(tmp_var,2)
                            
                            vec = cat(1, vec, tmp_var(:, vari));
                        end
                    end
                end

                
                if feature(i).my_params.ntimbres > 0
                    
                    info{numel(vec)+ 1} = 'timbre';
                    vec = cat(1, vec, timbren);
                    
                    info{numel(vec)+ 1} = 'timbre weights';
                    vec = cat(1, vec, [feature(i).data.timbre(:).means_weight]');
                    
                    % ---
                    % NORMALISE timbre variance
                    % ---
                    if feature(i).my_params.timbre_var >= 1
                        
                        info{numel(vec)+ 1} = 'timbre variance';
                        
                        % normalise this pack of variance vectors
                        tmp_var =  mapminmax('apply', [feature(i).data.timbre(:).vars],...
                            feature(i).common.post_normf.timbre_var);
                        
                        % concatenate normalised data to vector
                        for vari = 1:size(tmp_var,2)
                            
                            vec = cat(1, vec, tmp_var(:, vari));
                        end
                    end
                end
                
                if  feature(i).my_params.nrhythms > 0
                    
                    info{numel(vec)+ 1} = 'rhythm 8';
                    vec = cat(1, vec, feature(i).data.rhythm.acorr8);
                    
                    info{numel(vec)+ 1} = 'int 8';
                    vec = cat(1, vec, feature(i).data.rhythm.interval8);
                    
                    if  feature(i).my_params.nrhythms >= 2
                        
                        info{numel(vec)+ 1} = 'rhythm 16';
                        vec = cat(1, vec, feature(i).data.rhythm.acorr16);
                        
                        info{numel(vec)+ 1} = 'int 16';
                        vec = cat(1, vec, feature(i).data.rhythm.interval16);
                    end
                end

                feature(i).data.final.vector = vec;
                feature(i).data.final.dim = numel(feature(i).data.final.vector);
                
                % fill up info struct and append to feature
                
                info(end+1: feature(i).data.final.dim) = ...
                    cell(feature(i).data.final.dim - numel(info),1);
                
                feature(i).data.final.vector_info.labels = info;
            end
            
            % ---
            % TODO: Maybe delete more basic features again at this point?
            % ---
        end

        % ---
        % destructor: do we really want to remove this 
        % from the database? No, but 
        % TODO: create marker for unused objects in db, and a cleanup
        %  function
        % ---
        function delete(feature)
            
        end
        
        
        function visualise(feature)
        % ---
        % plots the different data types collected in this feature
        % ---
            for i = 1:numel(feature)
                clip = feature(i).data.info.owner;
               
                % display raw features
                if isa(clip, 'CASIMIRClip')
                    baseclip = clip.child_clip();
                else
                    baseclip = clip;
                end
                if isa(baseclip, 'MTTClip') 
                    rawf = baseclip.audio_features_raw();
                elseif isa(baseclip, 'MSDClip')
                    rawf = baseclip.features('MSDAudioFeatureRAW');
                end
                
                % ---
                % @todo: implement MSD feature visualisation
                % ---
                [a1, a2, a3] = rawf.visualise();
                
                % ---
                % Display chroma features
                % ---
                if isfield(feature(i).data, 'chroma')

                    chroma_labels = {'c', 'c#', 'd','d#', 'e', 'f','f#', 'g','g#', 'a', 'a#', 'h'};
                    mode_labels = {'minor', 'major'};

                    % change labels to reflect detected mode
                    chroma_labels{rawf.data.key + 1} = ...
                        sprintf('(%s) %s',mode_labels{rawf.data.mode + 1}, chroma_labels{rawf.data.key + 1});

                    % transpose labels and data
                    chroma_labels = circshift(chroma_labels, [0, feature(i).data.chroma(1).shift]);
                    chromar = circshift([rawf.data.segments_pitches], [feature(i).data.chroma(1).shift, 0]);

                    % image transposed chromas again
                    segments = [rawf.data.segments_start];
                    segments(end) = rawf.data.duration;

                    hold(a1);
                    uimagesc(segments, 0:11, chromar, 'Parent', a1);
                    set(a1,'YTick',[0:11], 'YTickLabel', chroma_labels);

                    % enlarge plot and plot new data after the old ones
                    ax = axis(a1);
                    ax(2) = ax(2) + 2*feature(i).my_params.nchromas + 0.5;     
                    axis(a1, 'xy');
                    axis(a1, ax);

                    imagesc(rawf.data.duration + (1:feature(i).my_params.nchromas), (-1:11), ...
                            [ feature(i).data.chroma(:).means_weight; feature(i).data.chroma(:).means],...
                            'Parent', a1);
                    % variance calculated?
                    if isfield(feature(i).data.chroma, 'vars')
                
                        imagesc(rawf.data.duration + feature(i).my_params.nchromas + (1:feature(i).my_params.nchromas), (-1:11), ...
                            [feature(i).data.chroma(:).vars],...
                        'Parent', a1);
                    end
                end

                % ---
                % Display timbre features
                % ---
                if  isfield(feature(i).data, 'timbre')
                    
                    % enlarge plot and plot new data after the old ones
                    hold(a2);
                    ax = axis(a2);
                    ax(2) = ax(2) + 2*feature(i).my_params.ntimbres + 0.5;

                    axis(a2, ax);
                    imagesc(rawf.data.duration + (1:feature(i).my_params.ntimbres), (-1:11), ...
                        [ feature(i).data.timbre(:).means_weight; feature(i).data.timbre(:).means],...
                        'Parent', a2);
                    if isfield(feature(i).data.timbre, 'vars')
                
                        imagesc(rawf.data.duration + feature(i).my_params.ntimbres + (1:feature(i).my_params.ntimbres), (-1:11), ...
                            [feature(i).data.timbre(:).vars],...
                        'Parent', a1);
                    end
                end
                
                % ---
                % Display rhythm features
                % ---
                if  isfield(feature(i).data, 'rhythm')
                    % data.rhythm.interval
                    % get timecode
                    eightt = feature(i).data.rhythm.energy8_time;
                    sixt = feature(i).data.rhythm.energy16_time;

                    hold(a3);
                    % plot sixteens acorr and energy
                    plot(sixt, feature(i).data.rhythm.energy16, 'bx')

                    plot(sixt, feature(i).data.rhythm.acorr16, 'b')

                    % plot eights acorr and energy
                    plot(eightt, feature(i).data.rhythm.energy8, 'rx')

                    plot(eightt, feature(i).data.rhythm.acorr8, 'r')

                    % broaden view by fixed 4 seconds
                    ax = axis(a3);
                    axis(a3, [max(0, eightt(1)-( eightt(end) - eightt(1) + 4 )) ...
                    min(rawf.data.duration, eightt(end) +4) ...
                    ax(3:4)]);
                end
            end
        end
    end
    
    
    methods (Hidden = true)
        
        function [env, time] = energy_envelope(feature, clip)
            % extracts the envelope of energy for the given clip
            
            % ---
            % TODO: externalise envelope etc in external audio features
            % ---
            
            [null, src]  = evalc('miraudio(clip.mp3file_full())');
            [null, env] = evalc('mirenvelope(src, ''Sampling'', feature.my_params.energy_sr)');
            
            time = get(env,'Time');
            time = time{1}{1};
            env = mirgetdata(env);
        end
        
        function [acorr, base_sig, base_t] = beat_histogram(feature, startt, interval, signal, signal_t)
        % acorr = beat_histogram(feature, startt, interval, signal, time)
        %
        % compute correlation for beats of specified length in energy curve
        
            % get corresponding energy values
            dt = signal_t(2) - signal_t(1); 
            base_t = startt:interval:(startt + (feature.my_params.nints*2-1) * interval);
            base_sig = signal( min( numel(signal), max(1,round((base_t - signal_t(1))/dt))));
            
            % normalise energy
            acbase_sig = base_sig./max(base_sig);
            
            % calculate their cyclic autocorrelation
            acorr = circshift(xcorr(acbase_sig,acbase_sig(1:end/2)),...
                [numel(acbase_sig) 0]);
            
            % cut acorr to relevant points, normalise and square
            acorr = (acorr(1:feature.my_params.nints)./feature.my_params.nints).^2;
            
            % ---
            % NOTE: we normalise the autocorrelation locally, to compare the
            % (rhythmic) shape
            % ---
            if feature.my_params.norm_acorr;
                
                acorr = acorr - min(acorr);
                acorr = acorr/max(acorr);
            end
        end
    end
    
    methods(Static)
        
       function timbre = norm_timbre(in, normfs)
        % returns normed timbre data
            
            % ---
            % individually scale the data using
            % the dimensions factors
            % ---
            timbre = zeros(size(in));
            for i = 1:size(in,2)
        
                timbre(:,i) = normfs .* in(:,i); 
            end
            
            % shift to positive values
            timbre = (1 + timbre) /2;

            % clip features to [0,1]
            timbre = min(1, max(timbre, 0));
       end 
        
       % ---
       % returns parameter md5 hash for comparison
       % ---
    end
    
end