Mercurial > hg > auditok

--- a/README.md	Wed Nov 25 00:57:51 2015 +0100
+++ b/README.md	Wed Nov 25 02:36:06 2015 +0100
@@ -19,8 +19,8 @@
 However if you want more features, the following packages are needed:
 - [pydub](https://github.com/jiaaro/pydub): read audio files of popular audio formats (ogg, mp3, etc.) or extract audio from a video file
 - [PyAudio](http://people.csail.mit.edu/hubert/pyaudio/): read audio data from the microphone and play back detections
-- matplotlib: plot audio signal and detections (see figures above)
-- numpy: required by matplotlib. Also used for math operations instead of standard python if available
+- `matplotlib`: plot audio signal and detections (see figures above)
+- `numpy`: required by matplotlib. Also used for math operations instead of standard python if available
 - Optionnaly, you can use `sox` or `parecord` for data acquisition and feed `auditok` using a pipe.


@@ -28,6 +28,66 @@
 ------------
     python setup.py install

+Command line usage:
+------------------
+
+The first thing you want to check is perhaps how `auditok` detects your voice. If you have installed `PyAudio` just run (`Ctrl-C` to stop):
+
+    auditok -D -E
+
+Option `-D` means debug, whereas `-E` stands for echo, so `auditok` plays back whatever it detects.
+
+If there are too many detections, use a higher value for energy threshold (the current version only implements a `validator` based on energy threshold. The use of spectral information is also desirable and might be part of future releases). To change the energy threshold (default: 45), use option `-e`:
+
+    auditok -D -E -e 55
+
+If you don't have `PyAudio`, you can use `sox` for data acquisition (`sudo apt-get install sox`):
+
+    rec -q -t raw -r 16000 -c 1 -b 16 -e signed - | auditok -r 16000 -i -
+
+With `-i -`,  `auditok` reads data from standard input.
+
+`rec` and `play` are just an alias for `sox`. Doing so you won't be able to play audio detections (`-E` requires `Pyaudio`). Fortunately, `auditok` gives the possibility to call any command every time it detects an activity, passing the activity as a file to the user supplied command:
+
+    rec -q -t raw -r 16000 -c 1 -b 16 -e signed - | auditok -i - -r 16000 -C "play -q -t raw -r 16000 -c 1 -b 16 -e signed $"
+
+The `-C` option tells `auditok` to interpret its content as a command that is run whenever `auditok` detects an audio activity, replacing the `$` by a name of a temporary file into which the activity is saved as raw audio. Here we use `play` to play the activity, giving the necessary `play` arguments for raw data.
+
+The `-C` option can be useful in many cases. Imagine a command that sends audio data over a network only if there is an audio activity and saves bandwidth during silence.
+
+### Plot signal and detections:
+
+use option `-p`. Requires `matplotlib` and `numpy`
+
+### read data from file
+
+    auditok -i input.wav ...
+
+Install `pydub` for other audio formats.
+
+### Limit the length of aquired data
+
+    auditok -M 12 ...
+
+Time is in seconds.
+
+### Save the whole acquired audio signal
+
+    auditok -O output.wav ...
+
+Install `pydub` for other audio formats.
+
+
+### Save each detection into a separate audio file
+
+    auditok -o det_{N}_{start}_{end}.wav ...
+
+You can use a free text and place `{N}`, `{start}` and `{end}` wherever you want, they will be replaced by detection number, start time and end time respectively. Another example:
+
+    auditok -o {start}-{end}.wav ...
+
+Install `pydub` for more audio formats.
+
 Demos
 -----
 This code reads data from the microphone and plays back whatever it detects.
@@ -65,3 +125,4 @@
 Author
 ------
 Amine Sehili (<amine.sehili@gmail.com>)
+
--- a/auditok/cmdline.py	Wed Nov 25 00:57:51 2015 +0100
+++ b/auditok/cmdline.py	Wed Nov 25 02:36:06 2015 +0100
@@ -119,9 +119,9 @@
                                  "audio_data" : audio_data,
                                  "start" : start,
                                  "end" : end,
-                                 "start_time" : "{:5.2f}".format(start * self.analysis_window),
-                                 "end_time" : "{:5.2f}".format((end+1) * self.analysis_window),
-                                 "duration" : "{:5.2f}".format((end - start + 1) * self.analysis_window)}
+                                 "start_time" : start * self.analysis_window,
+                                 "end_time" : (end+1) * self.analysis_window,
+                                 "duration" : (end - start + 1) * self.analysis_window}
                                 )

         self.ads.open()
@@ -168,7 +168,7 @@
                     self.player.play(audio_data)
                     if self.debug:
                         print("[PLAY]: playing detection {id} (start:{start}, end:{end}, dur:{dur})".format(id=_id,
-                        start=start_time, end=end_time, dur=dur))
+                        start="{:5.2f}".format(start_time), end="{:5.2f}".format(end_time), dur="{:5.2f}".format(dur)))

     def notify(self, message):
         self.send(message)
@@ -228,7 +228,7 @@
                 end_time = message.pop("end_time", None)
                 _id = message.pop("id", None)
                 if audio_data is not None and len(audio_data) > 0:
-                    fname = self.name_format.format(N=_id, start = start_time, end = end_time)
+                    fname = self.name_format.format(N=_id, start = "{:.2f}".format(start_time), end = "{:.2f}".format(end_time))
                     try:
                         save_audio_data(audio_data, fname, filetype=self.filetype, **self.kwargs)
                         if self.debug:
@@ -481,7 +481,7 @@
         group.add_option("-n", "--min-duration", dest="min_duration", help="Min duration of a valid audio event in seconds [default: %default]", type=float, default=0.2, metavar="FLOAT")
         group.add_option("-m", "--max-duration", dest="max_duration", help="Max duration of a valid audio event in seconds [default: %default]", type=float, default=5, metavar="FLOAT")
         group.add_option("-s", "--max-silence", dest="max_silence", help="Max duration of a consecutive silence within a valid audio event in seconds [default: %default]", type=float, default=0.3, metavar="FLOAT")
-        parser.add_option("-d", "--drop-trailing-silence", dest="drop_trailing_silence", help="Drop trailing silence from a detection [default: do not play]",  action="store_true", default=False)
+        group.add_option("-d", "--drop-trailing-silence", dest="drop_trailing_silence", help="Drop trailing silence from a detection [default: do not play]",  action="store_true", default=False)
         group.add_option("-e", "--energy-threshold", dest="energy_threshold", help="Log energy threshold for detection [default: %default]", type=float, default=45, metavar="FLOAT")
         parser.add_option_group(group)

@@ -501,14 +501,11 @@
         # process options
         (opts, args) = parser.parse_args(argv)

-
         if (opts.output_tokens, opts.command, opts.echo, opts.plot, opts.debug) == (None, None, False, False, False):
             # nothing to do with audio data
             sys.stderr.write("Nothing to do!\nType -h for more information\n")
             sys.exit(1)

-        print(opts.debug)
-

         if opts.input == "-":
             asource = StdinAudioSource(sampling_rate = opts.sampling_rate,