camir-aes2014: toolboxes/MIRtoolbox1.3.2/MIRToolbox/mirsegment.m comparison

comparison toolboxes/MIRtoolbox1.3.2/MIRToolbox/mirsegment.m @ 0:e9a9cd732c1e tip

first hg version after svn

author	wolffd
date	Tue, 10 Feb 2015 15:05:51 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:e9a9cd732c1e
+function [f,p,m,fe] = mirsegment(x,varargin)
+%   f = mirsegment(a) segments an audio signal. It can also be the name of an
+%       audio file or 'Folder', for the analysis of the audio files in the
+%       current folder. The segmentation of audio signal already decomposed
+%       into frames is not available for the moment.
+%   f = mirsegment(...,'Novelty') segments using a self-similarity matrix
+%           (Foote & Cooper, 2003)     (by default)
+%       f = mirsegment(...,feature) bases the segmentation strategy on a
+%           specific feature.
+%           'Spectrum': from FFT spectrum (by default)
+%           'MFCC': from MFCCs
+%           'Keystrength': from the key strength profile
+%           'AutocorPitch': from the autocorrelation function computed as
+%               for pitch extraction.
+%           The option related to this feature extraction can be specified.
+%           Example: mirsegment(...,'Spectrum','Window','bartlett')
+%                    mirsegment(...,'MFCC','Rank',1:10)
+%                    mirsegment(...,'Keystrength','Weight',.5)
+%       These feature need to be frame-based, in order to appreciate their
+%           temporal evolution. Therefore, the audio signal x is first
+%           decomposed into frames. This decomposition can be controled
+%           using the 'Frame' keyword.
+%       The options available for the chosen strategies can be specified
+%           directly as options of the segment function.
+%           Example: mirsegment(a,'Novelty','KernelSize',10)
+%   f = mirsegment(...,'HCDF') segments using the Harmonic Change Detection
+%           Function (Harte & Sandler, 2006)
+%   f = mirsegment(...,'RMS') segments at positions of long silences. A
+%       frame decomposed RMS is computed using mirrms (with default
+%       options), and segments are selected from temporal positions
+%       where the RMS rises to a given 'On' threshold, until temporal
+%       positions where the RMS drops back to a given 'Off' threshold.
+%       f = mirsegment(...,'Off',t1) specifies the RMS 'Off' threshold.
+%           Default value: t1 = .01
+%       f = mirsegment(...,'On',t2) specifies the RMS 'On' threshold.
+%           Default value: t2 = .02
+%
+%   f = mirsegment(a,s) segments a using the results of a segmentation
+%       analysis s. s can be the peaks detected on an analysis of the
+%       audio for instance.
+%
+%   f = mirsegment(a,v) where v is an array of numbers, segments a using
+%       the temporal positions specified in v (in s.)
+%
+%   Foote, J. & Cooper, M. (2003). Media Segmentation using Self-Similarity
+%       Decomposition,. In Proc. SPIE Storage and Retrieval for Multimedia
+%       Databases, Vol. 5021, pp. 167-75.
+%   Harte, C. A. & Sandler, M. B. (2006). Detecting harmonic change in
+%       musical audio, in Proceedings of Audio and Music Computing for
+%       Multimedia Workshop, Santa Barbara, CA.
+%   [f,p] = mirsegment(...) also displays the analysis produced by the chosen
+%       strategy.
+%           For 'Novelty', p is the novelty curve.
+%           For 'HCDF', p is the Harmonic Change Detection Function.
+%   [f,p,m] = mirsegment(...) also displays the preliminary analysis
+%       undertaken in the chosen strategy.
+%           For 'Novelty', m is the similarity matrix.
+%           For 'HCDF', m is the tonal centroid.
+%   [f,p,m,fe] = mirsegment(...) also displays the temporal evolution of the
+%       feature used for the analysis.
+%   f = mirsegment(...,'Novelty')
+mfc.key = {'Rank','MFCC'};
+mfc.type = 'Integers';
+mfc.default = 0;
+mfc.keydefault = 1:13;
+option.mfc = mfc;
+K.key = 'KernelSize';
+K.type = 'Integer';
+K.default = 128;
+option.K = K;
+distance.key = 'Distance';
+distance.type = 'String';
+distance.default = 'cosine';
+option.distance = distance;
+measure.key = {'Measure','Similarity'};
+measure.type = 'String';
+measure.default = 'exponential';
+option.measure = measure;
+tot.key = 'Total';
+tot.type = 'Integer';
+tot.default = Inf;
+option.tot = tot;
+cthr.key = 'Contrast';
+cthr.type = 'Integer';
+cthr.default = .1;
+option.cthr = cthr;
+frame.key = 'Frame';
+frame.type = 'Integer';
+frame.number = 2;
+frame.default = [0 0];
+frame.keydefault = [3 .1];
+option.frame = frame;
+ana.type = 'String';
+ana.choice = {'Spectrum','Keystrength','AutocorPitch','Pitch'};
+ana.default = 0;
+option.ana = ana;
+%       f = mirsegment(...,'Spectrum')
+band.choice = {'Mel','Bark','Freq'};
+band.type = 'String';
+band.default = 'Freq';
+option.band = band;
+mi.key = 'Min';
+mi.type = 'Integer';
+mi.default = 0;
+option.mi = mi;
+ma.key = 'Max';
+ma.type = 'Integer';
+ma.default = 0;
+option.ma = ma;
+norm.key = 'Normal';
+norm.type = 'Boolean';
+norm.default = 0;
+option.norm = norm;
+win.key = 'Window';
+win.type = 'String';
+win.default = 'hamming';
+option.win = win;
+%       f = mirsegment(...,'Silence')
+throff.key = 'Off';
+throff.type = 'Integer';
+throff.default = .01;
+option.throff = throff;
+thron.key = 'On';
+thron.type = 'Integer';
+thron.default = .02;
+option.thron = thron;
+strat.choice = {'Novelty','HCDF','RMS'}; % should remain as last field
+strat.default = 'Novelty';
+strat.position = 2;
+option.strat = strat;
+specif.option = option;
+p = {};
+m = {};
+fe = {};
+if isa(x,'mirdesign')
+if not(get(x,'Eval'))
+% During bottom-up construction of the general design
+[unused option] = miroptions(@mirframe,x,specif,varargin);
+type = get(x,'Type');
+f = mirdesign(@mirsegment,x,option,{},struct,type);
+sg = get(x,'Segment');
+if not(isempty(sg))
+f = set(f,'Segment',sg);
+else
+f = set(f,'Segment',option.strat);
+end
+else
+% During top-down evaluation initiation
+f = evaleach(x);
+if iscell(f)
+f = f{1};
+end
+p = x;
+end
+elseif isa(x,'mirdata')
+[unused option] = miroptions(@mirframe,x,specif,varargin);
+if ischar(option.strat)
+dx = get(x,'Data');
+if size(dx{1},2) > 1
+error('ERROR IN MIRSEGMENT: The segmentation of audio signal already decomposed into frames is not available for the moment.');
+end
+if strcmpi(option.strat,'Novelty')
+if not(option.frame.length.val)
+if strcmpi(option.ana,'Keystrength')
+option.frame.length.val = .5;
+option.frame.hop.val = .2;
+elseif strcmpi(option.ana,'AutocorPitch') ...
+|| strcmpi(option.ana,'Pitch')
+option.frame.length.val = .05;
+option.frame.hop.val = .01;
+else
+option.frame.length.val = .05;
+option.frame.hop.val = 1;
+end
+end
+fr = mirframenow(x,option);
+if not(isequal(option.mfc,0))
+fe = mirmfcc(fr,'Rank',option.mfc);
+elseif strcmpi(option.ana,'Spectrum')
+fe = mirspectrum(fr,'Min',option.mi,'Max',option.ma,...
+'Normal',option.norm,option.band,...
+'Window',option.win);
+elseif strcmpi(option.ana,'Keystrength')
+fe = mirkeystrength(fr);
+elseif strcmpi(option.ana,'AutocorPitch') ...
+|| strcmpi(option.ana,'Pitch')
+[unused,fe] = mirpitch(x,'Frame');
+else
+fe = fr;
+end
+[n m] = mirnovelty(fe,'Distance',option.distance,...
+'Measure',option.measure,...
+'KernelSize',option.K);
+p = mirpeaks(n,'Total',option.tot,...
+'Contrast',option.cthr,...
+'Chrono','NoBegin','NoEnd');
+elseif strcmpi(option.strat,'HCDF')
+if not(option.frame.length.val)
+option.frame.length.val = .743;
+option.frame.hop.val = 1/8;
+end
+fr = mirframenow(x,option);
+%[df m fe] = mirhcdf(fr);
+df = mirhcdf(fr);
+p = mirpeaks(df);
+elseif strcmpi(option.strat,'RMS')
+if not(option.frame.length.val)
+option.frame.length.val = .05;
+option.frame.hop.val = .5;
+end
+fr = mirframenow(x,option);
+%[df m fe] = mirhcdf(fr);
+df = mirrms(fr);
+fp = get(df,'FramePos');
+p = mircompute(@findsilence,df,fp,option.throff,option.thron);
+end
+f = mirsegment(x,p);
+else
+dx = get(x,'Data');
+dt = get(x,'Time');
+if isa(option.strat,'mirscalar')
+ds = get(option.strat,'PeakPos');
+fp = get(option.strat,'FramePos');
+elseif isa(option.strat,'mirdata')
+ds = get(option.strat,'AttackPos');
+if isempty(ds) || isempty(ds{1})
+ds = get(option.strat,'PeakPos');
+end
+xx = get(option.strat,'Pos');
+else
+ds = option.strat;
+fp = cell(1,length(dx));
+end
+st = cell(1,length(dx));
+sx = cell(1,length(dx));
+cl = cell(1,length(dx));
+for k = 1:length(dx)
+dxk = dx{k}{1}; % values in kth audio file
+dtk = dt{k}{1}; % time positions in kth audio file
+if isa(option.strat,'mirdata')
+dsk = ds{k}{1}; % segmentation times in kth audio file
+else
+dsk = {ds};
+end
+fsk = [];   % the structured array of segmentation times
+% needs to be flatten
+for j = 1:length(dsk)
+if isa(option.strat,'mirdata')
+dsj = dsk{j}; % segmentation times in jth segment
+else
+dsj = ds;
+end
+if not(iscell(dsj))
+dsj = {dsj};
+end
+for m = 1:length(dsj)
+% segmentation times in mth bank channel
+if isa(option.strat,'mirscalar')
+dsm = fp{k}{m}(1,dsj{m});
+elseif isa(option.strat,'mirdata')
+dsm = xx{k}{m}(dsj{m});
+else
+dsm = dsj{m};
+end
+if iscell(dsm)
+dsm = dsm{1};
+end
+dsm(:,find(dsm(1,:) < dtk(1))) = [];
+dsm(:,find(dsm(end,:) > dtk(end))) = [];
+% It is presupposed here that the segmentations times
+% for a given channel are not decomposed per frames,
+% because the segmentation of the frame decomposition
+% is something that does not seem very clear.
+% Practically, the peak picking for instance is based
+% therefore on a frame analysis (such as novelty), and
+% segmentation are inferred between these frames...
+if size(dsm,2) == 1
+dsm = dsm';
+end
+fsk = [fsk dsm];
+end
+end
+fsk = sort(fsk); % Here is the chronological ordering
+if isempty(fsk)
+ffsk = {[0;dtk(end)]};
+sxk = {dxk};
+stk = {dtk};
+n = 1;
+elseif size(fsk,1) == 1
+ffsk = cell(1,length(fsk)+1);
+ffsk{1} = [dtk(1);fsk(1)];
+for h = 1:length(fsk)-1
+ffsk{h+1} = [fsk(h);fsk(h+1)];
+end
+ffsk{end} = [fsk(end);dtk(end)];
+n = length(ffsk);
+crd = zeros(1,n+1); % the sample positions of the
+% segmentations in the channel
+crd0 = 0;
+for i = 1:n
+crd0 = crd0 + find(dtk(crd0+1:end)>=ffsk{i}(1),1);
+crd(i) = crd0;
+end
+crd(n+1) = size(dxk,1)+1;
+sxk = cell(1,n); % each cell contains a segment
+stk = cell(1,n); % each cell contains
+% the corresponding time positions
+for i = 1:n
+sxk{i} = dxk(crd(i):crd(i+1)-1,1,:);
+stk{i} = dtk(crd(i):crd(i+1)-1);
+end
+elseif size(fsk,1) == 2
+ffsk = cell(1,size(fsk,2));
+for h = 1:length(fsk)
+ffsk{h} = [fsk(1,h);fsk(2,h)];
+end
+n = length(ffsk);
+crd = zeros(2,n); % the sample positions of the
+% segmentations in the channel
+crd0 = 0;
+for i = 1:n
+crd0 = crd0 + find(dtk(crd0+1:end)>=ffsk{i}(1),1);
+crd(i,1) = crd0;
+crd0 = crd0 + find(dtk(crd0+1:end)>=ffsk{i}(2),1);
+crd(i,2) = crd0;
+end
+sxk = cell(1,n); % each cell contains a segment
+stk = cell(1,n); % each cell contains
+% the corresponding time positions
+for i = 1:n
+sxk{i} = dxk(crd(i,1):crd(i,2),1,:);
+stk{i} = dtk(crd(i,1):crd(i,2));
+end
+end
+sx{k} = sxk;
+st{k} = stk;
+fp{k} = ffsk;
+cl{k} = 1:n;
+end
+f = set(x,'Data',sx,'Time',st,'FramePos',fp,'Clusters',cl);
+p = strat;
+m = {};
+fe = {};
+end
+else
+[f p] = mirsegment(miraudio(x),varargin{:});
+end
+function p = findsilence(d,fp,throff,thron)
+d = [0 d 0];
+begseg = find(d(1:end-1)<thron & d(2:end)>=thron);
+nseg = length(begseg);
+endseg = zeros(1,nseg);
+removed = [];
+for i = 1:nseg
+endseg(i) = begseg(i) + find(d(begseg(i)+1:end)<=throff, 1)-1;
+if i>1 && endseg(i) == endseg(i-1)
+removed = [removed i];
+end
+end
+begseg(removed) = [];
+%endseg(removed) = [];
+%endseg(end) = min(endseg(end),length(d)+1);
+p = fp(1,begseg); %; fp(2,endseg-1)];

Mercurial > hg > camir-aes2014

comparison toolboxes/MIRtoolbox1.3.2/MIRToolbox/mirsegment.m @ 0:e9a9cd732c1e tip