dawn@0
|
1 function [Limits] = calculate_Silence( x, fs, frameLength )
|
dawn@0
|
2
|
dawn@0
|
3 % Convert mono to stereo
|
dawn@0
|
4 if (size(x, 2)==2)
|
dawn@0
|
5 x = mean(x')';
|
dawn@0
|
6 end
|
dawn@0
|
7 [m n]=size(x);
|
dawn@0
|
8 if(m>n)
|
dawn@0
|
9 x=x';
|
dawn@0
|
10 end
|
dawn@0
|
11 % Window length and step (in seconds):
|
dawn@0
|
12 win = frameLength/fs;
|
dawn@0
|
13 step = win;
|
dawn@0
|
14
|
dawn@0
|
15 %%%%%%%%%%%%%%%%%%%%%%%%%%%
|
dawn@0
|
16 % THRESHOLD ESTIMATION
|
dawn@0
|
17 %%%%%%%%%%%%%%%%%%%%%%%%%%%
|
dawn@0
|
18
|
dawn@0
|
19 % Weight = 10; % used in the threshold estimation method
|
dawn@0
|
20
|
dawn@0
|
21 % Compute short-time energy and spectral centroid of the signal:
|
dawn@0
|
22 Eor = ShortTimeEnergy(x, win*fs, step*fs);
|
dawn@0
|
23
|
dawn@0
|
24
|
dawn@0
|
25 % Apply median filtering in the feature sequences (twice), using 5 windows:
|
dawn@0
|
26 % (i.e., 250 mseconds)
|
dawn@0
|
27 E = medfilt1(Eor, 5); E = medfilt1(E, 5);
|
dawn@0
|
28
|
dawn@0
|
29 % normalise
|
dawn@0
|
30
|
dawn@0
|
31 E = E*(1/max(E));
|
dawn@0
|
32
|
dawn@0
|
33 % Get the average values of the smoothed feature sequences:
|
dawn@0
|
34 E_mean = mean(E);
|
dawn@0
|
35 %
|
dawn@0
|
36 %Find energy threshold:
|
dawn@0
|
37 % [HistE, X_E] = hist(E, round(length(E) / 10)); % histogram computation
|
dawn@0
|
38 % [MaximaE, countMaximaE] = findMaxima(HistE, 3); % find the local maxima of the histogram
|
dawn@0
|
39
|
dawn@0
|
40
|
dawn@0
|
41 T_E = 0.0005; % determined empirically
|
dawn@0
|
42
|
dawn@0
|
43 % Thresholding:
|
dawn@0
|
44 Flags1 = (E>=T_E);
|
dawn@0
|
45
|
dawn@0
|
46 flags = Flags1;
|
dawn@0
|
47
|
dawn@0
|
48 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
dawn@0
|
49 % SPEECH SEGMENTS DETECTION
|
dawn@0
|
50 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
dawn@0
|
51 count = 1;
|
dawn@0
|
52 WIN = 0;
|
dawn@0
|
53 Limits = [];
|
dawn@0
|
54 while (count < length(flags)) % while there are windows to be processed:
|
dawn@0
|
55 % initilize:
|
dawn@0
|
56 curX = [];
|
dawn@0
|
57 countTemp = 1;
|
dawn@0
|
58 % while flags=1:
|
dawn@0
|
59 while ((flags(count)==1) && (count < length(flags)))
|
dawn@0
|
60 if (countTemp==1) % if this is the first of the current speech segment:
|
dawn@0
|
61 Limit1 = round((count-WIN)*step*fs)+1; % set start limit:
|
dawn@0
|
62 if (Limit1<1) Limit1 = 1; end
|
dawn@0
|
63 end
|
dawn@0
|
64 count = count + 1; % increase overall counter
|
dawn@0
|
65 countTemp = countTemp + 1; % increase counter of the CURRENT speech segment
|
dawn@0
|
66 end
|
dawn@0
|
67
|
dawn@0
|
68 if (countTemp>1) % if at least one segment has been found in the current loop:
|
dawn@0
|
69 Limit2 = round((count+WIN)*step*fs); % set end counter
|
dawn@0
|
70 if (Limit2>length(x))
|
dawn@0
|
71 Limit2 = length(x);
|
dawn@0
|
72 end
|
dawn@0
|
73
|
dawn@0
|
74 Limits(end+1, 1) = Limit1;
|
dawn@0
|
75 Limits(end, 2) = Limit2;
|
dawn@0
|
76 end
|
dawn@0
|
77 count = count + 1; % increase overall counter
|
dawn@0
|
78 end
|
dawn@0
|
79
|
dawn@0
|
80 %%%%%%%%%%%%%%%%%%%%%%%
|
dawn@0
|
81 % POST - PROCESS %
|
dawn@0
|
82 %%%%%%%%%%%%%%%%%%%%%%%
|
dawn@0
|
83
|
dawn@0
|
84 % find the frame index of the start and end frames of non-silent segments
|
dawn@0
|
85 Limits = floor( Limits / frameLength );
|
dawn@0
|
86 pos = find(Limits == 0);
|
dawn@0
|
87 if(pos)
|
dawn@0
|
88 Limits(pos) = 1;
|
dawn@0
|
89 end
|
dawn@0
|
90
|
dawn@0
|
91 % A. MERGE OVERLAPPING SEGMENTS:
|
dawn@0
|
92 RUN = 1;
|
dawn@0
|
93 while (RUN==1)
|
dawn@0
|
94 RUN = 0;
|
dawn@0
|
95 for (i=1:size(Limits,1)-1) % for each segment
|
dawn@0
|
96 if (Limits(i,2)>=Limits(i+1,1))
|
dawn@0
|
97 RUN = 1;
|
dawn@0
|
98 Limits(i,2) = Limits(i+1,2);
|
dawn@0
|
99 Limits(i+1,:) = [];
|
dawn@0
|
100 break;
|
dawn@0
|
101 end
|
dawn@0
|
102 end
|
dawn@0
|
103 end
|
dawn@0
|
104
|
dawn@0
|
105 % B. Get final segments:
|
dawn@0
|
106 segments = {};
|
dawn@0
|
107 for (i=1:size(Limits,1))
|
dawn@0
|
108 segments{end+1} = x(Limits(i,1)*frameLength:Limits(i,2)*frameLength);
|
dawn@0
|
109 end |