dawn@0: function [Limits] = calculate_Silence( x, fs, frameLength ) dawn@0: dawn@0: % Convert mono to stereo dawn@0: if (size(x, 2)==2) dawn@0: x = mean(x')'; dawn@0: end dawn@0: [m n]=size(x); dawn@0: if(m>n) dawn@0: x=x'; dawn@0: end dawn@0: % Window length and step (in seconds): dawn@0: win = frameLength/fs; dawn@0: step = win; dawn@0: dawn@0: %%%%%%%%%%%%%%%%%%%%%%%%%%% dawn@0: % THRESHOLD ESTIMATION dawn@0: %%%%%%%%%%%%%%%%%%%%%%%%%%% dawn@0: dawn@0: % Weight = 10; % used in the threshold estimation method dawn@0: dawn@0: % Compute short-time energy and spectral centroid of the signal: dawn@0: Eor = ShortTimeEnergy(x, win*fs, step*fs); dawn@0: dawn@0: dawn@0: % Apply median filtering in the feature sequences (twice), using 5 windows: dawn@0: % (i.e., 250 mseconds) dawn@0: E = medfilt1(Eor, 5); E = medfilt1(E, 5); dawn@0: dawn@0: % normalise dawn@0: dawn@0: E = E*(1/max(E)); dawn@0: dawn@0: % Get the average values of the smoothed feature sequences: dawn@0: E_mean = mean(E); dawn@0: % dawn@0: %Find energy threshold: dawn@0: % [HistE, X_E] = hist(E, round(length(E) / 10)); % histogram computation dawn@0: % [MaximaE, countMaximaE] = findMaxima(HistE, 3); % find the local maxima of the histogram dawn@0: dawn@0: dawn@0: T_E = 0.0005; % determined empirically dawn@0: dawn@0: % Thresholding: dawn@0: Flags1 = (E>=T_E); dawn@0: dawn@0: flags = Flags1; dawn@0: dawn@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% dawn@0: % SPEECH SEGMENTS DETECTION dawn@0: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% dawn@0: count = 1; dawn@0: WIN = 0; dawn@0: Limits = []; dawn@0: while (count < length(flags)) % while there are windows to be processed: dawn@0: % initilize: dawn@0: curX = []; dawn@0: countTemp = 1; dawn@0: % while flags=1: dawn@0: while ((flags(count)==1) && (count < length(flags))) dawn@0: if (countTemp==1) % if this is the first of the current speech segment: dawn@0: Limit1 = round((count-WIN)*step*fs)+1; % set start limit: dawn@0: if (Limit1<1) Limit1 = 1; end dawn@0: end dawn@0: count = count + 1; % increase overall counter dawn@0: countTemp = countTemp + 1; % increase counter of the CURRENT speech segment dawn@0: end dawn@0: dawn@0: if (countTemp>1) % if at least one segment has been found in the current loop: dawn@0: Limit2 = round((count+WIN)*step*fs); % set end counter dawn@0: if (Limit2>length(x)) dawn@0: Limit2 = length(x); dawn@0: end dawn@0: dawn@0: Limits(end+1, 1) = Limit1; dawn@0: Limits(end, 2) = Limit2; dawn@0: end dawn@0: count = count + 1; % increase overall counter dawn@0: end dawn@0: dawn@0: %%%%%%%%%%%%%%%%%%%%%%% dawn@0: % POST - PROCESS % dawn@0: %%%%%%%%%%%%%%%%%%%%%%% dawn@0: dawn@0: % find the frame index of the start and end frames of non-silent segments dawn@0: Limits = floor( Limits / frameLength ); dawn@0: pos = find(Limits == 0); dawn@0: if(pos) dawn@0: Limits(pos) = 1; dawn@0: end dawn@0: dawn@0: % A. MERGE OVERLAPPING SEGMENTS: dawn@0: RUN = 1; dawn@0: while (RUN==1) dawn@0: RUN = 0; dawn@0: for (i=1:size(Limits,1)-1) % for each segment dawn@0: if (Limits(i,2)>=Limits(i+1,1)) dawn@0: RUN = 1; dawn@0: Limits(i,2) = Limits(i+1,2); dawn@0: Limits(i+1,:) = []; dawn@0: break; dawn@0: end dawn@0: end dawn@0: end dawn@0: dawn@0: % B. Get final segments: dawn@0: segments = {}; dawn@0: for (i=1:size(Limits,1)) dawn@0: segments{end+1} = x(Limits(i,1)*frameLength:Limits(i,2)*frameLength); dawn@0: end