Mercurial > hg > auditok
changeset 21:8c164d41bbbf
update README.md
author | Amine Sehili <amine.sehili@gmail.com> |
---|---|
date | Wed, 25 Nov 2015 02:36:06 +0100 |
parents | f26d19ad119d |
children | aceb9bc3d74e |
files | README.md auditok/cmdline.py |
diffstat | 2 files changed, 69 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/README.md Wed Nov 25 00:57:51 2015 +0100 +++ b/README.md Wed Nov 25 02:36:06 2015 +0100 @@ -19,8 +19,8 @@ However if you want more features, the following packages are needed: - [pydub](https://github.com/jiaaro/pydub): read audio files of popular audio formats (ogg, mp3, etc.) or extract audio from a video file - [PyAudio](http://people.csail.mit.edu/hubert/pyaudio/): read audio data from the microphone and play back detections -- matplotlib: plot audio signal and detections (see figures above) -- numpy: required by matplotlib. Also used for math operations instead of standard python if available +- `matplotlib`: plot audio signal and detections (see figures above) +- `numpy`: required by matplotlib. Also used for math operations instead of standard python if available - Optionnaly, you can use `sox` or `parecord` for data acquisition and feed `auditok` using a pipe. @@ -28,6 +28,66 @@ ------------ python setup.py install +Command line usage: +------------------ + +The first thing you want to check is perhaps how `auditok` detects your voice. If you have installed `PyAudio` just run (`Ctrl-C` to stop): + + auditok -D -E + +Option `-D` means debug, whereas `-E` stands for echo, so `auditok` plays back whatever it detects. + +If there are too many detections, use a higher value for energy threshold (the current version only implements a `validator` based on energy threshold. The use of spectral information is also desirable and might be part of future releases). To change the energy threshold (default: 45), use option `-e`: + + auditok -D -E -e 55 + +If you don't have `PyAudio`, you can use `sox` for data acquisition (`sudo apt-get install sox`): + + rec -q -t raw -r 16000 -c 1 -b 16 -e signed - | auditok -r 16000 -i - + +With `-i -`, `auditok` reads data from standard input. + +`rec` and `play` are just an alias for `sox`. Doing so you won't be able to play audio detections (`-E` requires `Pyaudio`). Fortunately, `auditok` gives the possibility to call any command every time it detects an activity, passing the activity as a file to the user supplied command: + + rec -q -t raw -r 16000 -c 1 -b 16 -e signed - | auditok -i - -r 16000 -C "play -q -t raw -r 16000 -c 1 -b 16 -e signed $" + +The `-C` option tells `auditok` to interpret its content as a command that is run whenever `auditok` detects an audio activity, replacing the `$` by a name of a temporary file into which the activity is saved as raw audio. Here we use `play` to play the activity, giving the necessary `play` arguments for raw data. + +The `-C` option can be useful in many cases. Imagine a command that sends audio data over a network only if there is an audio activity and saves bandwidth during silence. + +### Plot signal and detections: + +use option `-p`. Requires `matplotlib` and `numpy` + +### read data from file + + auditok -i input.wav ... + +Install `pydub` for other audio formats. + +### Limit the length of aquired data + + auditok -M 12 ... + +Time is in seconds. + +### Save the whole acquired audio signal + + auditok -O output.wav ... + +Install `pydub` for other audio formats. + + +### Save each detection into a separate audio file + + auditok -o det_{N}_{start}_{end}.wav ... + +You can use a free text and place `{N}`, `{start}` and `{end}` wherever you want, they will be replaced by detection number, start time and end time respectively. Another example: + + auditok -o {start}-{end}.wav ... + +Install `pydub` for more audio formats. + Demos ----- This code reads data from the microphone and plays back whatever it detects. @@ -65,3 +125,4 @@ Author ------ Amine Sehili (<amine.sehili@gmail.com>) +
--- a/auditok/cmdline.py Wed Nov 25 00:57:51 2015 +0100 +++ b/auditok/cmdline.py Wed Nov 25 02:36:06 2015 +0100 @@ -119,9 +119,9 @@ "audio_data" : audio_data, "start" : start, "end" : end, - "start_time" : "{:5.2f}".format(start * self.analysis_window), - "end_time" : "{:5.2f}".format((end+1) * self.analysis_window), - "duration" : "{:5.2f}".format((end - start + 1) * self.analysis_window)} + "start_time" : start * self.analysis_window, + "end_time" : (end+1) * self.analysis_window, + "duration" : (end - start + 1) * self.analysis_window} ) self.ads.open() @@ -168,7 +168,7 @@ self.player.play(audio_data) if self.debug: print("[PLAY]: playing detection {id} (start:{start}, end:{end}, dur:{dur})".format(id=_id, - start=start_time, end=end_time, dur=dur)) + start="{:5.2f}".format(start_time), end="{:5.2f}".format(end_time), dur="{:5.2f}".format(dur))) def notify(self, message): self.send(message) @@ -228,7 +228,7 @@ end_time = message.pop("end_time", None) _id = message.pop("id", None) if audio_data is not None and len(audio_data) > 0: - fname = self.name_format.format(N=_id, start = start_time, end = end_time) + fname = self.name_format.format(N=_id, start = "{:.2f}".format(start_time), end = "{:.2f}".format(end_time)) try: save_audio_data(audio_data, fname, filetype=self.filetype, **self.kwargs) if self.debug: @@ -481,7 +481,7 @@ group.add_option("-n", "--min-duration", dest="min_duration", help="Min duration of a valid audio event in seconds [default: %default]", type=float, default=0.2, metavar="FLOAT") group.add_option("-m", "--max-duration", dest="max_duration", help="Max duration of a valid audio event in seconds [default: %default]", type=float, default=5, metavar="FLOAT") group.add_option("-s", "--max-silence", dest="max_silence", help="Max duration of a consecutive silence within a valid audio event in seconds [default: %default]", type=float, default=0.3, metavar="FLOAT") - parser.add_option("-d", "--drop-trailing-silence", dest="drop_trailing_silence", help="Drop trailing silence from a detection [default: do not play]", action="store_true", default=False) + group.add_option("-d", "--drop-trailing-silence", dest="drop_trailing_silence", help="Drop trailing silence from a detection [default: do not play]", action="store_true", default=False) group.add_option("-e", "--energy-threshold", dest="energy_threshold", help="Log energy threshold for detection [default: %default]", type=float, default=45, metavar="FLOAT") parser.add_option_group(group) @@ -501,14 +501,11 @@ # process options (opts, args) = parser.parse_args(argv) - if (opts.output_tokens, opts.command, opts.echo, opts.plot, opts.debug) == (None, None, False, False, False): # nothing to do with audio data sys.stderr.write("Nothing to do!\nType -h for more information\n") sys.exit(1) - print(opts.debug) - if opts.input == "-": asource = StdinAudioSource(sampling_rate = opts.sampling_rate,