amine@2
|
1 """
|
amine@2
|
2 @author: Amine SEHILI <amine.sehili@gmail.com>
|
amine@2
|
3 September, 2015
|
amine@2
|
4 """
|
amine@2
|
5
|
amine@10
|
6 # Trim leading and trailing silence from a record
|
amine@2
|
7
|
amine@2
|
8 from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for, dataset
|
amine@2
|
9 import pyaudio
|
amine@10
|
10 import sys
|
amine@2
|
11
|
amine@2
|
12 """
|
amine@2
|
13 The tokenizer in the following example is set up to remove the silence
|
amine@2
|
14 that precedes the first acoustic activity or follows the last activity
|
amine@2
|
15 in a record. It preserves whatever it founds between the two activities.
|
amine@10
|
16 In other words, it removes the leading and trailing silence.
|
amine@2
|
17
|
amine@2
|
18 Sampling rate is 44100 sample per second, we'll use an analysis window of 100 ms
|
amine@2
|
19 (i.e. bloc_ksize == 4410)
|
amine@2
|
20
|
amine@2
|
21 Energy threshold is 50.
|
amine@2
|
22
|
amine@2
|
23 The tokenizer will start accumulating windows up from the moment it encounters
|
amine@2
|
24 the first analysis window of an energy >= 50. ALL the following windows will be
|
amine@10
|
25 kept regardless of their energy. At the end of the analysis, it will drop trailing
|
amine@2
|
26 windows with an energy below 50.
|
amine@2
|
27
|
amine@2
|
28 This is an interesting example because the audio file we're analyzing contains a very
|
amine@2
|
29 brief noise that occurs within the leading silence. We certainly do want our tokenizer
|
amine@2
|
30 to stop at this point and considers whatever it comes after as a useful signal.
|
amine@2
|
31 To force the tokenizer to ignore that brief event we use two other parameters `init_min`
|
amine@2
|
32 ans `init_max_silence`. By `init_min`=3 and `init_max_silence`=1 we tell the tokenizer
|
amine@2
|
33 that a valid event must start with at least 3 noisy windows, between which there
|
amine@2
|
34 is at most 1 silent window.
|
amine@2
|
35
|
amine@2
|
36 Still with this configuration we can get the tokenizer detect that noise as a valid event
|
amine@2
|
37 (if it actually contains 3 consecutive noisy frames). To circummvent this we use an enough
|
amine@2
|
38 large analysis window (here of 100 ms) to ensure that the brief noise be surrounded by a much
|
amine@2
|
39 longer silence and hence the energy of the overall analysis window will be below 50.
|
amine@2
|
40
|
amine@2
|
41 When using a shorter analysis window (of 10ms for instance, block_size == 441), the brief
|
amine@2
|
42 noise contributes more to energy calculation which yields an energy of over 50 for the window.
|
amine@2
|
43 Again we can deal with this situation by using a higher energy threshold (55 for example)
|
amine@2
|
44
|
amine@2
|
45 """
|
amine@2
|
46
|
amine@10
|
47 try:
|
amine@10
|
48 # record = True so that we'll be able to rewind the source.
|
amine@10
|
49 asource = ADSFactory.ads(filename=dataset.was_der_mensch_saet_mono_44100_lead_tail_silence,
|
amine@10
|
50 record=True, block_size=4410)
|
amine@10
|
51 asource.open()
|
amine@2
|
52
|
amine@10
|
53 original_signal = []
|
amine@10
|
54 # Read the whole signal
|
amine@10
|
55 while True:
|
amine@10
|
56 w = asource.read()
|
amine@10
|
57 if w is None:
|
amine@10
|
58 break
|
amine@10
|
59 original_signal.append(w)
|
amine@2
|
60
|
amine@10
|
61 original_signal = b''.join(original_signal)
|
amine@2
|
62
|
amine@2
|
63
|
amine@10
|
64 # rewind source
|
amine@10
|
65 asource.rewind()
|
amine@2
|
66
|
amine@10
|
67 # Create a validator with an energy threshold of 50
|
amine@10
|
68 validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=50)
|
amine@2
|
69
|
amine@10
|
70 # Create a tokenizer with an unlimited token length and continuous silence within a token
|
amine@10
|
71 # Note the DROP_TRAILING_SILENCE mode that will ensure removing trailing silence
|
amine@10
|
72 trimmer = StreamTokenizer(validator, min_length = 20, max_length=99999999,
|
amine@10
|
73 max_continuous_silence=9999999, mode=StreamTokenizer.DROP_TRAILING_SILENCE, init_min=3, init_max_silence=1)
|
amine@2
|
74
|
amine@2
|
75
|
amine@10
|
76 tokens = trimmer.tokenize(asource)
|
amine@2
|
77
|
amine@10
|
78 # Make sure we only have one token
|
amine@10
|
79 assert len(tokens) == 1, "Should have detected one single token"
|
amine@2
|
80
|
amine@10
|
81 trimmed_signal = b''.join(tokens[0][0])
|
amine@2
|
82
|
amine@10
|
83 player = player_for(asource)
|
amine@2
|
84
|
amine@10
|
85 print("\n ** Playing original signal (with leading and trailing silence)...")
|
amine@10
|
86 player.play(original_signal)
|
amine@10
|
87 print("\n ** Playing trimmed signal...")
|
amine@10
|
88 player.play(trimmed_signal)
|
amine@2
|
89
|
amine@10
|
90 player.stop()
|
amine@10
|
91 asource.close()
|
amine@2
|
92
|
amine@10
|
93 except KeyboardInterrupt:
|
amine@10
|
94
|
amine@10
|
95 player.stop()
|
amine@10
|
96 asource.close()
|
amine@10
|
97 sys.exit(0)
|
amine@10
|
98
|
amine@10
|
99 except Exception as e:
|
amine@10
|
100
|
amine@10
|
101 sys.stderr.write(str(e) + "\n")
|
amine@10
|
102 sys.exit(1)
|