Mercurial > hg > auditok
comparison demos/audio_trim_demo.py @ 10:9be2d0ca4c00
Python 3.x support
author | Amine Sehili <amine.sehili@gmail.com> |
---|---|
date | Tue, 24 Nov 2015 01:57:53 +0100 |
parents | 31c97510b16b |
children | 9741b52f194a |
comparison
equal
deleted
inserted
replaced
9:c2ddae4d2c36 | 10:9be2d0ca4c00 |
---|---|
1 """ | 1 """ |
2 @author: Amine SEHILI <amine.sehili@gmail.com> | 2 @author: Amine SEHILI <amine.sehili@gmail.com> |
3 September, 2015 | 3 September, 2015 |
4 """ | 4 """ |
5 | 5 |
6 # Trim leading and tailing silence from a record | 6 # Trim leading and trailing silence from a record |
7 | 7 |
8 from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for, dataset | 8 from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for, dataset |
9 import pyaudio | 9 import pyaudio |
10 import sys | |
10 | 11 |
11 """ | 12 """ |
12 The tokenizer in the following example is set up to remove the silence | 13 The tokenizer in the following example is set up to remove the silence |
13 that precedes the first acoustic activity or follows the last activity | 14 that precedes the first acoustic activity or follows the last activity |
14 in a record. It preserves whatever it founds between the two activities. | 15 in a record. It preserves whatever it founds between the two activities. |
15 In other words, it removes the leading and tailing silence. | 16 In other words, it removes the leading and trailing silence. |
16 | 17 |
17 Sampling rate is 44100 sample per second, we'll use an analysis window of 100 ms | 18 Sampling rate is 44100 sample per second, we'll use an analysis window of 100 ms |
18 (i.e. bloc_ksize == 4410) | 19 (i.e. bloc_ksize == 4410) |
19 | 20 |
20 Energy threshold is 50. | 21 Energy threshold is 50. |
21 | 22 |
22 The tokenizer will start accumulating windows up from the moment it encounters | 23 The tokenizer will start accumulating windows up from the moment it encounters |
23 the first analysis window of an energy >= 50. ALL the following windows will be | 24 the first analysis window of an energy >= 50. ALL the following windows will be |
24 kept regardless of their energy. At the end of the analysis, it will drop tailing | 25 kept regardless of their energy. At the end of the analysis, it will drop trailing |
25 windows with an energy below 50. | 26 windows with an energy below 50. |
26 | 27 |
27 This is an interesting example because the audio file we're analyzing contains a very | 28 This is an interesting example because the audio file we're analyzing contains a very |
28 brief noise that occurs within the leading silence. We certainly do want our tokenizer | 29 brief noise that occurs within the leading silence. We certainly do want our tokenizer |
29 to stop at this point and considers whatever it comes after as a useful signal. | 30 to stop at this point and considers whatever it comes after as a useful signal. |
41 noise contributes more to energy calculation which yields an energy of over 50 for the window. | 42 noise contributes more to energy calculation which yields an energy of over 50 for the window. |
42 Again we can deal with this situation by using a higher energy threshold (55 for example) | 43 Again we can deal with this situation by using a higher energy threshold (55 for example) |
43 | 44 |
44 """ | 45 """ |
45 | 46 |
47 try: | |
48 # record = True so that we'll be able to rewind the source. | |
49 asource = ADSFactory.ads(filename=dataset.was_der_mensch_saet_mono_44100_lead_tail_silence, | |
50 record=True, block_size=4410) | |
51 asource.open() | |
46 | 52 |
47 # record = True so that we'll be able to rewind the source. | 53 original_signal = [] |
48 asource = ADSFactory.ads(filename=dataset.was_der_mensch_saet_mono_44100_lead_tail_silence, | 54 # Read the whole signal |
49 record=True, block_size=4410) | 55 while True: |
50 asource.open() | 56 w = asource.read() |
57 if w is None: | |
58 break | |
59 original_signal.append(w) | |
51 | 60 |
52 original_signal = [] | 61 original_signal = b''.join(original_signal) |
53 # Read the whole signal | |
54 while True: | |
55 w = asource.read() | |
56 if w is None: | |
57 break | |
58 original_signal.append(w) | |
59 | |
60 original_signal = ''.join(original_signal) | |
61 | 62 |
62 | 63 |
63 # rewind source | 64 # rewind source |
64 asource.rewind() | 65 asource.rewind() |
65 | 66 |
66 # Create a validator with an energy threshold of 50 | 67 # Create a validator with an energy threshold of 50 |
67 validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=50) | 68 validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=50) |
68 | 69 |
69 # Create a tokenizer with an unlimited token length and continuous silence within a token | 70 # Create a tokenizer with an unlimited token length and continuous silence within a token |
70 # Note the DROP_TAILING_SILENCE mode that will ensure removing tailing silence | 71 # Note the DROP_TRAILING_SILENCE mode that will ensure removing trailing silence |
71 trimmer = StreamTokenizer(validator, min_length = 20, max_length=99999999, | 72 trimmer = StreamTokenizer(validator, min_length = 20, max_length=99999999, |
72 max_continuous_silence=9999999, mode=StreamTokenizer.DROP_TAILING_SILENCE, init_min=3, init_max_silence=1) | 73 max_continuous_silence=9999999, mode=StreamTokenizer.DROP_TRAILING_SILENCE, init_min=3, init_max_silence=1) |
73 | 74 |
74 | 75 |
75 tokens = trimmer.tokenize(asource) | 76 tokens = trimmer.tokenize(asource) |
76 | 77 |
77 # Make sure we only have one token | 78 # Make sure we only have one token |
78 assert len(tokens) == 1, "Should have detected one single token" | 79 assert len(tokens) == 1, "Should have detected one single token" |
79 | 80 |
80 trimmed_signal = ''.join(tokens[0][0]) | 81 trimmed_signal = b''.join(tokens[0][0]) |
81 | 82 |
82 player = player_for(asource) | 83 player = player_for(asource) |
83 | 84 |
84 print("\n ** Playing original signal (with leading and tailing silence)...") | 85 print("\n ** Playing original signal (with leading and trailing silence)...") |
85 player.play(original_signal) | 86 player.play(original_signal) |
86 print("\n ** Playing trimmed signal...") | 87 print("\n ** Playing trimmed signal...") |
87 player.play(trimmed_signal) | 88 player.play(trimmed_signal) |
88 | 89 |
89 player.stop() | 90 player.stop() |
90 asource.close() | 91 asource.close() |
92 | |
93 except KeyboardInterrupt: | |
94 | |
95 player.stop() | |
96 asource.close() | |
97 sys.exit(0) | |
98 | |
99 except Exception as e: | |
100 | |
101 sys.stderr.write(str(e) + "\n") | |
102 sys.exit(1) |