wolffd@0: function [f,p,m,fe] = mirsegment(x,varargin)
wolffd@0: %   f = mirsegment(a) segments an audio signal. It can also be the name of an
wolffd@0: %       audio file or 'Folder', for the analysis of the audio files in the
wolffd@0: %       current folder. The segmentation of audio signal already decomposed
wolffd@0: %       into frames is not available for the moment.
wolffd@0: %   f = mirsegment(...,'Novelty') segments using a self-similarity matrix
wolffd@0: %           (Foote & Cooper, 2003)     (by default)
wolffd@0: %       f = mirsegment(...,feature) bases the segmentation strategy on a
wolffd@0: %           specific feature.
wolffd@0: %           'Spectrum': from FFT spectrum (by default)
wolffd@0: %           'MFCC': from MFCCs
wolffd@0: %           'Keystrength': from the key strength profile
wolffd@0: %           'AutocorPitch': from the autocorrelation function computed as
wolffd@0: %               for pitch extraction.
wolffd@0: %           The option related to this feature extraction can be specified.
wolffd@0: %           Example: mirsegment(...,'Spectrum','Window','bartlett')
wolffd@0: %                    mirsegment(...,'MFCC','Rank',1:10)
wolffd@0: %                    mirsegment(...,'Keystrength','Weight',.5)
wolffd@0: %       These feature need to be frame-based, in order to appreciate their
wolffd@0: %           temporal evolution. Therefore, the audio signal x is first
wolffd@0: %           decomposed into frames. This decomposition can be controled
wolffd@0: %           using the 'Frame' keyword.  
wolffd@0: %       The options available for the chosen strategies can be specified
wolffd@0: %           directly as options of the segment function.
wolffd@0: %           Example: mirsegment(a,'Novelty','KernelSize',10)
wolffd@0: %   f = mirsegment(...,'HCDF') segments using the Harmonic Change Detection  
wolffd@0: %           Function (Harte & Sandler, 2006)
wolffd@0: %   f = mirsegment(...,'RMS') segments at positions of long silences. A
wolffd@0: %       frame decomposed RMS is computed using mirrms (with default
wolffd@0: %       options), and segments are selected from temporal positions
wolffd@0: %       where the RMS rises to a given 'On' threshold, until temporal
wolffd@0: %       positions where the RMS drops back to a given 'Off' threshold.
wolffd@0: %       f = mirsegment(...,'Off',t1) specifies the RMS 'Off' threshold.
wolffd@0: %           Default value: t1 = .01
wolffd@0: %       f = mirsegment(...,'On',t2) specifies the RMS 'On' threshold.
wolffd@0: %           Default value: t2 = .02
wolffd@0: %
wolffd@0: %   f = mirsegment(a,s) segments a using the results of a segmentation
wolffd@0: %       analysis s. s can be the peaks detected on an analysis of the
wolffd@0: %       audio for instance.
wolffd@0: %
wolffd@0: %   f = mirsegment(a,v) where v is an array of numbers, segments a using
wolffd@0: %       the temporal positions specified in v (in s.)
wolffd@0: %
wolffd@0: %   Foote, J. & Cooper, M. (2003). Media Segmentation using Self-Similarity
wolffd@0: %       Decomposition,. In Proc. SPIE Storage and Retrieval for Multimedia
wolffd@0: %       Databases, Vol. 5021, pp. 167-75.
wolffd@0: %   Harte, C. A. & Sandler, M. B. (2006). Detecting harmonic change in
wolffd@0: %       musical audio, in Proceedings of Audio and Music Computing for 
wolffd@0: %       Multimedia Workshop, Santa Barbara, CA.
wolffd@0: 
wolffd@0: 
wolffd@0: %   [f,p] = mirsegment(...) also displays the analysis produced by the chosen
wolffd@0: %       strategy.
wolffd@0: %           For 'Novelty', p is the novelty curve.
wolffd@0: %           For 'HCDF', p is the Harmonic Change Detection Function.
wolffd@0: %   [f,p,m] = mirsegment(...) also displays the preliminary analysis
wolffd@0: %       undertaken in the chosen strategy.
wolffd@0: %           For 'Novelty', m is the similarity matrix.
wolffd@0: %           For 'HCDF', m is the tonal centroid.
wolffd@0: %   [f,p,m,fe] = mirsegment(...) also displays the temporal evolution of the
wolffd@0: %       feature used for the analysis.
wolffd@0:  
wolffd@0: %   f = mirsegment(...,'Novelty')
wolffd@0: 
wolffd@0:         mfc.key = {'Rank','MFCC'};
wolffd@0:         mfc.type = 'Integers';
wolffd@0:         mfc.default = 0;
wolffd@0:         mfc.keydefault = 1:13;
wolffd@0:     option.mfc = mfc;
wolffd@0: 
wolffd@0:         K.key = 'KernelSize';
wolffd@0:         K.type = 'Integer';
wolffd@0:         K.default = 128;
wolffd@0:     option.K = K;
wolffd@0:     
wolffd@0:         distance.key = 'Distance';
wolffd@0:         distance.type = 'String';
wolffd@0:         distance.default = 'cosine';
wolffd@0:     option.distance = distance;
wolffd@0: 
wolffd@0:         measure.key = {'Measure','Similarity'};
wolffd@0:         measure.type = 'String';
wolffd@0:         measure.default = 'exponential';
wolffd@0:     option.measure = measure;
wolffd@0: 
wolffd@0:         tot.key = 'Total';
wolffd@0:         tot.type = 'Integer';
wolffd@0:         tot.default = Inf;
wolffd@0:     option.tot = tot;
wolffd@0: 
wolffd@0:         cthr.key = 'Contrast';
wolffd@0:         cthr.type = 'Integer';
wolffd@0:         cthr.default = .1;
wolffd@0:     option.cthr = cthr;
wolffd@0: 
wolffd@0:         frame.key = 'Frame';
wolffd@0:         frame.type = 'Integer';
wolffd@0:         frame.number = 2;
wolffd@0:         frame.default = [0 0];
wolffd@0:         frame.keydefault = [3 .1];
wolffd@0:     option.frame = frame;
wolffd@0: 
wolffd@0:         ana.type = 'String';
wolffd@0:         ana.choice = {'Spectrum','Keystrength','AutocorPitch','Pitch'};
wolffd@0:         ana.default = 0;
wolffd@0:     option.ana = ana;
wolffd@0:     
wolffd@0: %       f = mirsegment(...,'Spectrum')    
wolffd@0:     
wolffd@0:             band.choice = {'Mel','Bark','Freq'};
wolffd@0:             band.type = 'String';
wolffd@0:             band.default = 'Freq';
wolffd@0:         option.band = band;
wolffd@0: 
wolffd@0:             mi.key = 'Min';
wolffd@0:             mi.type = 'Integer';
wolffd@0:             mi.default = 0;
wolffd@0:         option.mi = mi;
wolffd@0: 
wolffd@0:             ma.key = 'Max';
wolffd@0:             ma.type = 'Integer';
wolffd@0:             ma.default = 0;
wolffd@0:         option.ma = ma;
wolffd@0: 
wolffd@0:             norm.key = 'Normal';
wolffd@0:             norm.type = 'Boolean';
wolffd@0:             norm.default = 0;
wolffd@0:         option.norm = norm;
wolffd@0: 
wolffd@0:             win.key = 'Window';
wolffd@0:             win.type = 'String';
wolffd@0:             win.default = 'hamming';
wolffd@0:         option.win = win;
wolffd@0:     
wolffd@0: %       f = mirsegment(...,'Silence')    
wolffd@0:     
wolffd@0:             throff.key = 'Off';
wolffd@0:             throff.type = 'Integer';
wolffd@0:             throff.default = .01;
wolffd@0:         option.throff = throff;
wolffd@0: 
wolffd@0:             thron.key = 'On';
wolffd@0:             thron.type = 'Integer';
wolffd@0:             thron.default = .02;
wolffd@0:         option.thron = thron;
wolffd@0: 
wolffd@0:         strat.choice = {'Novelty','HCDF','RMS'}; % should remain as last field
wolffd@0:         strat.default = 'Novelty';
wolffd@0:         strat.position = 2;
wolffd@0:     option.strat = strat;
wolffd@0:    
wolffd@0: specif.option = option;
wolffd@0: 
wolffd@0: 
wolffd@0: p = {};
wolffd@0: m = {};
wolffd@0: fe = {};
wolffd@0: 
wolffd@0: if isa(x,'mirdesign')
wolffd@0:     if not(get(x,'Eval'))
wolffd@0:         % During bottom-up construction of the general design
wolffd@0: 
wolffd@0:         [unused option] = miroptions(@mirframe,x,specif,varargin);
wolffd@0:         type = get(x,'Type');
wolffd@0:         f = mirdesign(@mirsegment,x,option,{},struct,type);
wolffd@0:         
wolffd@0:         sg = get(x,'Segment');
wolffd@0:         if not(isempty(sg))
wolffd@0:             f = set(f,'Segment',sg);
wolffd@0:         else
wolffd@0:             f = set(f,'Segment',option.strat);
wolffd@0:         end
wolffd@0:         
wolffd@0:     else
wolffd@0:         % During top-down evaluation initiation
wolffd@0:         
wolffd@0:         f = evaleach(x);
wolffd@0:         if iscell(f)
wolffd@0:             f = f{1};
wolffd@0:         end
wolffd@0:         p = x;
wolffd@0:     end
wolffd@0: elseif isa(x,'mirdata')
wolffd@0:     [unused option] = miroptions(@mirframe,x,specif,varargin);
wolffd@0:     if ischar(option.strat)
wolffd@0:         dx = get(x,'Data');
wolffd@0:         if size(dx{1},2) > 1
wolffd@0:             error('ERROR IN MIRSEGMENT: The segmentation of audio signal already decomposed into frames is not available for the moment.');
wolffd@0:         end
wolffd@0:         if strcmpi(option.strat,'Novelty')
wolffd@0:             if not(option.frame.length.val)
wolffd@0:                 if strcmpi(option.ana,'Keystrength')
wolffd@0:                     option.frame.length.val = .5;
wolffd@0:                     option.frame.hop.val = .2;
wolffd@0:                 elseif strcmpi(option.ana,'AutocorPitch') ...
wolffd@0:                         || strcmpi(option.ana,'Pitch')
wolffd@0:                     option.frame.length.val = .05;
wolffd@0:                     option.frame.hop.val = .01;
wolffd@0:                 else
wolffd@0:                     option.frame.length.val = .05;
wolffd@0:                     option.frame.hop.val = 1;
wolffd@0:                 end
wolffd@0:             end
wolffd@0:             fr = mirframenow(x,option);
wolffd@0:             if not(isequal(option.mfc,0))
wolffd@0:                 fe = mirmfcc(fr,'Rank',option.mfc);
wolffd@0:             elseif strcmpi(option.ana,'Spectrum')
wolffd@0:                 fe = mirspectrum(fr,'Min',option.mi,'Max',option.ma,...
wolffd@0:                                     'Normal',option.norm,option.band,...
wolffd@0:                                     'Window',option.win);
wolffd@0:             elseif strcmpi(option.ana,'Keystrength')
wolffd@0:                     fe = mirkeystrength(fr);
wolffd@0:             elseif strcmpi(option.ana,'AutocorPitch') ...
wolffd@0:                     || strcmpi(option.ana,'Pitch')
wolffd@0:                 [unused,fe] = mirpitch(x,'Frame');
wolffd@0:             else
wolffd@0:                 fe = fr;
wolffd@0:             end
wolffd@0:             [n m] = mirnovelty(fe,'Distance',option.distance,...
wolffd@0:                                   'Measure',option.measure,...
wolffd@0:                                   'KernelSize',option.K);
wolffd@0:             p = mirpeaks(n,'Total',option.tot,...
wolffd@0:                            'Contrast',option.cthr,...
wolffd@0:                            'Chrono','NoBegin','NoEnd');
wolffd@0:         elseif strcmpi(option.strat,'HCDF')
wolffd@0:             if not(option.frame.length.val)
wolffd@0:                 option.frame.length.val = .743;
wolffd@0:                 option.frame.hop.val = 1/8;
wolffd@0:             end
wolffd@0:             fr = mirframenow(x,option);
wolffd@0:             %[df m fe] = mirhcdf(fr);
wolffd@0:             df = mirhcdf(fr);
wolffd@0:             p = mirpeaks(df);
wolffd@0:         elseif strcmpi(option.strat,'RMS')
wolffd@0:             if not(option.frame.length.val)
wolffd@0:                 option.frame.length.val = .05;
wolffd@0:                 option.frame.hop.val = .5;
wolffd@0:             end
wolffd@0:             fr = mirframenow(x,option);
wolffd@0:             %[df m fe] = mirhcdf(fr);
wolffd@0:             df = mirrms(fr);
wolffd@0:             fp = get(df,'FramePos');
wolffd@0:             p = mircompute(@findsilence,df,fp,option.throff,option.thron);
wolffd@0:         end
wolffd@0:         f = mirsegment(x,p);
wolffd@0:     else
wolffd@0:         dx = get(x,'Data');
wolffd@0:         dt = get(x,'Time');
wolffd@0: 
wolffd@0:         if isa(option.strat,'mirscalar')
wolffd@0:             ds = get(option.strat,'PeakPos');
wolffd@0:             fp = get(option.strat,'FramePos');
wolffd@0:         elseif isa(option.strat,'mirdata')
wolffd@0:             ds = get(option.strat,'AttackPos');
wolffd@0:             if isempty(ds) || isempty(ds{1})
wolffd@0:                 ds = get(option.strat,'PeakPos');
wolffd@0:             end
wolffd@0:             xx = get(option.strat,'Pos');
wolffd@0:         else
wolffd@0:             ds = option.strat;
wolffd@0:             fp = cell(1,length(dx));
wolffd@0:         end
wolffd@0:         st = cell(1,length(dx));
wolffd@0:         sx = cell(1,length(dx));
wolffd@0:         cl = cell(1,length(dx));
wolffd@0:         for k = 1:length(dx)
wolffd@0:             dxk = dx{k}{1}; % values in kth audio file
wolffd@0:             dtk = dt{k}{1}; % time positions in kth audio file
wolffd@0:             if isa(option.strat,'mirdata')
wolffd@0:                 dsk = ds{k}{1}; % segmentation times in kth audio file
wolffd@0:             else
wolffd@0:                 dsk = {ds};
wolffd@0:             end
wolffd@0:             fsk = [];   % the structured array of segmentation times 
wolffd@0:                          % needs to be flatten
wolffd@0:             for j = 1:length(dsk)
wolffd@0:                 if isa(option.strat,'mirdata')
wolffd@0:                     dsj = dsk{j}; % segmentation times in jth segment
wolffd@0:                 else
wolffd@0:                     dsj = ds;
wolffd@0:                 end
wolffd@0:                 if not(iscell(dsj))
wolffd@0:                     dsj = {dsj};
wolffd@0:                 end
wolffd@0:                 for m = 1:length(dsj)
wolffd@0:                     % segmentation times in mth bank channel
wolffd@0:                     if isa(option.strat,'mirscalar')
wolffd@0:                         dsm = fp{k}{m}(1,dsj{m});
wolffd@0:                     elseif isa(option.strat,'mirdata')
wolffd@0:                         dsm = xx{k}{m}(dsj{m});
wolffd@0:                     else
wolffd@0:                         dsm = dsj{m};
wolffd@0:                     end
wolffd@0:                     if iscell(dsm)
wolffd@0:                         dsm = dsm{1};
wolffd@0:                     end
wolffd@0:                     dsm(:,find(dsm(1,:) < dtk(1))) = [];
wolffd@0:                     dsm(:,find(dsm(end,:) > dtk(end))) = [];
wolffd@0:                     % It is presupposed here that the segmentations times
wolffd@0:                     % for a given channel are not decomposed per frames,
wolffd@0:                     % because the segmentation of the frame decomposition
wolffd@0:                     % is something that does not seem very clear.
wolffd@0:                     % Practically, the peak picking for instance is based 
wolffd@0:                     % therefore on a frame analysis (such as novelty), and
wolffd@0:                     % segmentation are inferred between these frames...
wolffd@0:                     if size(dsm,2) == 1
wolffd@0:                         dsm = dsm';
wolffd@0:                     end
wolffd@0:                     fsk = [fsk dsm];
wolffd@0:                 end
wolffd@0:             end
wolffd@0: 
wolffd@0:             fsk = sort(fsk); % Here is the chronological ordering
wolffd@0:             
wolffd@0:             if isempty(fsk)
wolffd@0:                 ffsk = {[0;dtk(end)]};
wolffd@0:                 sxk = {dxk};
wolffd@0:                 stk = {dtk};
wolffd@0:                 n = 1;
wolffd@0:             elseif size(fsk,1) == 1
wolffd@0:                 ffsk = cell(1,length(fsk)+1);
wolffd@0:                 ffsk{1} = [dtk(1);fsk(1)];
wolffd@0:                 for h = 1:length(fsk)-1
wolffd@0:                     ffsk{h+1} = [fsk(h);fsk(h+1)];
wolffd@0:                 end
wolffd@0:                 ffsk{end} = [fsk(end);dtk(end)];
wolffd@0:                 
wolffd@0:                 n = length(ffsk);
wolffd@0: 
wolffd@0:                 crd = zeros(1,n+1); % the sample positions of the
wolffd@0:                                     % segmentations in the channel
wolffd@0:                 crd0 = 0;
wolffd@0:                 for i = 1:n
wolffd@0:                     crd0 = crd0 + find(dtk(crd0+1:end)>=ffsk{i}(1),1);
wolffd@0:                     crd(i) = crd0;
wolffd@0:                 end
wolffd@0:                 crd(n+1) = size(dxk,1)+1;
wolffd@0: 
wolffd@0:                 sxk = cell(1,n); % each cell contains a segment
wolffd@0:                 stk = cell(1,n); % each cell contains
wolffd@0:                                  % the corresponding time positions
wolffd@0: 
wolffd@0:                 for i = 1:n
wolffd@0:                     sxk{i} = dxk(crd(i):crd(i+1)-1,1,:);
wolffd@0:                     stk{i} = dtk(crd(i):crd(i+1)-1);
wolffd@0:                 end
wolffd@0: 
wolffd@0:             elseif size(fsk,1) == 2
wolffd@0:                 ffsk = cell(1,size(fsk,2));
wolffd@0:                 for h = 1:length(fsk)
wolffd@0:                     ffsk{h} = [fsk(1,h);fsk(2,h)];
wolffd@0:                 end
wolffd@0:                 n = length(ffsk);
wolffd@0:                 crd = zeros(2,n); % the sample positions of the
wolffd@0:                                   % segmentations in the channel
wolffd@0:                 crd0 = 0;
wolffd@0:                 for i = 1:n
wolffd@0:                     crd0 = crd0 + find(dtk(crd0+1:end)>=ffsk{i}(1),1);
wolffd@0:                     crd(i,1) = crd0;
wolffd@0:                     crd0 = crd0 + find(dtk(crd0+1:end)>=ffsk{i}(2),1);
wolffd@0:                     crd(i,2) = crd0;                    
wolffd@0:                 end
wolffd@0:                 sxk = cell(1,n); % each cell contains a segment
wolffd@0:                 stk = cell(1,n); % each cell contains
wolffd@0:                                  % the corresponding time positions
wolffd@0:                 for i = 1:n
wolffd@0:                     sxk{i} = dxk(crd(i,1):crd(i,2),1,:);
wolffd@0:                     stk{i} = dtk(crd(i,1):crd(i,2));
wolffd@0:                 end
wolffd@0:             end
wolffd@0:             sx{k} = sxk;
wolffd@0:             st{k} = stk;
wolffd@0:             fp{k} = ffsk;
wolffd@0:             cl{k} = 1:n;
wolffd@0:         end
wolffd@0:         f = set(x,'Data',sx,'Time',st,'FramePos',fp,'Clusters',cl);
wolffd@0:         p = strat;
wolffd@0:         m = {};
wolffd@0:         fe = {};
wolffd@0:     end
wolffd@0: else
wolffd@0:     [f p] = mirsegment(miraudio(x),varargin{:});
wolffd@0: end 
wolffd@0: 
wolffd@0: 
wolffd@0: function p = findsilence(d,fp,throff,thron)
wolffd@0: d = [0 d 0];
wolffd@0: begseg = find(d(1:end-1)<thron & d(2:end)>=thron);
wolffd@0: nseg = length(begseg);
wolffd@0: endseg = zeros(1,nseg);
wolffd@0: removed = [];
wolffd@0: for i = 1:nseg
wolffd@0:     endseg(i) = begseg(i) + find(d(begseg(i)+1:end)<=throff, 1)-1;
wolffd@0:     if i>1 && endseg(i) == endseg(i-1)
wolffd@0:         removed = [removed i];
wolffd@0:     end
wolffd@0: end
wolffd@0: begseg(removed) = [];
wolffd@0: %endseg(removed) = [];
wolffd@0: %endseg(end) = min(endseg(end),length(d)+1);
wolffd@0: p = fp(1,begseg); %; fp(2,endseg-1)];