changeset 2:edee860b9f61

First release on Github
author Amine Sehili <amine.sehili@gmail.com>
date Thu, 17 Sep 2015 22:01:30 +0200
parents 78ba0ead5f9f
children 364eeb8e8bd2
files CHANGELOG INSTALL LICENSE LICENSE.md README.md auditok/__init__.py auditok/core.py auditok/data/1to6arabic_16000_mono_bc_noise.wav auditok/data/was_der_mensch_saet_das_wir_er_veilfach_enrten_44100Hz_mono_lead_trail_silence.wav auditok/dataset.py auditok/io.py auditok/util.py demos/audio_tokenize_demo.py demos/audio_trim_demo.py demos/echo.py quickstart.rst setup.py tests/test_AudioDataSourceFactory.py tests/test_StreamTokenizer.py tests/test_audio_source.py
diffstat 20 files changed, 5052 insertions(+), 674 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CHANGELOG	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,8 @@
+auditok Changelog
+=================
+
+
+Version 0.1.2
+--------------
+
+First public release on Github.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/INSTALL	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,3 @@
+
+
+pip install auditok
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
--- a/LICENSE.md	Thu Sep 17 18:47:36 2015 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,674 +0,0 @@
-                    GNU GENERAL PUBLIC LICENSE
-                       Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-                            Preamble
-
-  The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
-  The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works.  By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users.  We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors.  You can apply it to
-your programs, too.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
-  To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights.  Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received.  You must make sure that they, too, receive
-or can get the source code.  And you must show them these terms so they
-know their rights.
-
-  Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
-  For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software.  For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
-  Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so.  This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software.  The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable.  Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products.  If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
-  Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary.  To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-                       TERMS AND CONDITIONS
-
-  0. Definitions.
-
-  "This License" refers to version 3 of the GNU General Public License.
-
-  "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
-  "The Program" refers to any copyrightable work licensed under this
-License.  Each licensee is addressed as "you".  "Licensees" and
-"recipients" may be individuals or organizations.
-
-  To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy.  The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
-  A "covered work" means either the unmodified Program or a work based
-on the Program.
-
-  To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy.  Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
-  To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies.  Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
-  An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License.  If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
-  1. Source Code.
-
-  The "source code" for a work means the preferred form of the work
-for making modifications to it.  "Object code" means any non-source
-form of a work.
-
-  A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
-  The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form.  A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
-  The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities.  However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work.  For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
-  The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
-  The Corresponding Source for a work in source code form is that
-same work.
-
-  2. Basic Permissions.
-
-  All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met.  This License explicitly affirms your unlimited
-permission to run the unmodified Program.  The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work.  This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
-  You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force.  You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright.  Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
-  Conveying under any other circumstances is permitted solely under
-the conditions stated below.  Sublicensing is not allowed; section 10
-makes it unnecessary.
-
-  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
-  No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
-  When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
-  4. Conveying Verbatim Copies.
-
-  You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
-  You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
-  5. Conveying Modified Source Versions.
-
-  You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
-    a) The work must carry prominent notices stating that you modified
-    it, and giving a relevant date.
-
-    b) The work must carry prominent notices stating that it is
-    released under this License and any conditions added under section
-    7.  This requirement modifies the requirement in section 4 to
-    "keep intact all notices".
-
-    c) You must license the entire work, as a whole, under this
-    License to anyone who comes into possession of a copy.  This
-    License will therefore apply, along with any applicable section 7
-    additional terms, to the whole of the work, and all its parts,
-    regardless of how they are packaged.  This License gives no
-    permission to license the work in any other way, but it does not
-    invalidate such permission if you have separately received it.
-
-    d) If the work has interactive user interfaces, each must display
-    Appropriate Legal Notices; however, if the Program has interactive
-    interfaces that do not display Appropriate Legal Notices, your
-    work need not make them do so.
-
-  A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit.  Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
-  6. Conveying Non-Source Forms.
-
-  You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
-    a) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by the
-    Corresponding Source fixed on a durable physical medium
-    customarily used for software interchange.
-
-    b) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by a
-    written offer, valid for at least three years and valid for as
-    long as you offer spare parts or customer support for that product
-    model, to give anyone who possesses the object code either (1) a
-    copy of the Corresponding Source for all the software in the
-    product that is covered by this License, on a durable physical
-    medium customarily used for software interchange, for a price no
-    more than your reasonable cost of physically performing this
-    conveying of source, or (2) access to copy the
-    Corresponding Source from a network server at no charge.
-
-    c) Convey individual copies of the object code with a copy of the
-    written offer to provide the Corresponding Source.  This
-    alternative is allowed only occasionally and noncommercially, and
-    only if you received the object code with such an offer, in accord
-    with subsection 6b.
-
-    d) Convey the object code by offering access from a designated
-    place (gratis or for a charge), and offer equivalent access to the
-    Corresponding Source in the same way through the same place at no
-    further charge.  You need not require recipients to copy the
-    Corresponding Source along with the object code.  If the place to
-    copy the object code is a network server, the Corresponding Source
-    may be on a different server (operated by you or a third party)
-    that supports equivalent copying facilities, provided you maintain
-    clear directions next to the object code saying where to find the
-    Corresponding Source.  Regardless of what server hosts the
-    Corresponding Source, you remain obligated to ensure that it is
-    available for as long as needed to satisfy these requirements.
-
-    e) Convey the object code using peer-to-peer transmission, provided
-    you inform other peers where the object code and Corresponding
-    Source of the work are being offered to the general public at no
-    charge under subsection 6d.
-
-  A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
-  A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling.  In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage.  For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product.  A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
-  "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source.  The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
-  If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information.  But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
-  The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed.  Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
-  Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
-  7. Additional Terms.
-
-  "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law.  If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
-  When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it.  (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.)  You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
-  Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
-    a) Disclaiming warranty or limiting liability differently from the
-    terms of sections 15 and 16 of this License; or
-
-    b) Requiring preservation of specified reasonable legal notices or
-    author attributions in that material or in the Appropriate Legal
-    Notices displayed by works containing it; or
-
-    c) Prohibiting misrepresentation of the origin of that material, or
-    requiring that modified versions of such material be marked in
-    reasonable ways as different from the original version; or
-
-    d) Limiting the use for publicity purposes of names of licensors or
-    authors of the material; or
-
-    e) Declining to grant rights under trademark law for use of some
-    trade names, trademarks, or service marks; or
-
-    f) Requiring indemnification of licensors and authors of that
-    material by anyone who conveys the material (or modified versions of
-    it) with contractual assumptions of liability to the recipient, for
-    any liability that these contractual assumptions directly impose on
-    those licensors and authors.
-
-  All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10.  If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term.  If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
-  If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
-  Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
-  8. Termination.
-
-  You may not propagate or modify a covered work except as expressly
-provided under this License.  Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
-  However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
-  Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
-  Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License.  If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
-  9. Acceptance Not Required for Having Copies.
-
-  You are not required to accept this License in order to receive or
-run a copy of the Program.  Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance.  However,
-nothing other than this License grants you permission to propagate or
-modify any covered work.  These actions infringe copyright if you do
-not accept this License.  Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
-  10. Automatic Licensing of Downstream Recipients.
-
-  Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License.  You are not responsible
-for enforcing compliance by third parties with this License.
-
-  An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations.  If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
-  You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License.  For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
-  11. Patents.
-
-  A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based.  The
-work thus licensed is called the contributor's "contributor version".
-
-  A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version.  For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
-  Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
-  In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement).  To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
-  If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients.  "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
-  If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
-  A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License.  You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
-  Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
-  12. No Surrender of Others' Freedom.
-
-  If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all.  For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
-  13. Use with the GNU Affero General Public License.
-
-  Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work.  The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
-  14. Revised Versions of this License.
-
-  The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-  Each version is given a distinguishing version number.  If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation.  If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
-  If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
-  Later license versions may give you additional or different
-permissions.  However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
-  15. Disclaimer of Warranty.
-
-  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  16. Limitation of Liability.
-
-  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
-  17. Interpretation of Sections 15 and 16.
-
-  If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
-                     END OF TERMS AND CONDITIONS
-
-            How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
-  If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
-    <program>  Copyright (C) <year>  <name of author>
-    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
-  You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
-  The GNU General Public License does not permit incorporating your program
-into proprietary programs.  If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library.  If this is what you want to do, use the GNU Lesser General
-Public License instead of this License.  But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,42 @@
+AUDIo TOKenizer
+===============
+
+`auditok` is an Audio Activity Detection library that wan be used with online data (i.e. microphone) or with audio files.
+
+Requirements
+------------
+`auditok` uses [PyAudio](http://people.csail.mit.edu/hubert/pyaudio/) for audio acquisition and playback.
+If installed, numpy  will be privileged for math operations on vectors.
+
+Installation
+------------
+    pip install auditok
+
+Demos
+-----
+This code reads data from the microphone and plays back whatever it detects.
+    python demos/echo.py
+
+`echo.py` accepts two arguments: energy threshold (default=45) and duration in seconds (default=10):
+
+    python demos/echo.py 50 15
+
+   If only one argument is given it will be used for energy. Other demos are in /demos.
+
+Documentation
+-------------
+
+Check out  a quick start and the API documentation [here](http://amsehili.github.io/auditok/pdoc/)
+
+Contribution
+------------
+Contributions are very appreciated !
+
+License
+-------
+`auditok` is published under the GNU General Public License Version 3.
+
+Author
+------
+Amine Sehili (<amine.sehili@gmail.com>)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/auditok/__init__.py	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,497 @@
+"""
+
+`auditok` is a module that can be used as a generic tool for data
+tokenization. Although its core motivation is **Acoustic Activity 
+Detection** (AAD) and extraction from audio streams (i.e. detect
+where a noise/an acoustic activity occurs within an audio stream and
+extract the corresponding portion of signal), it can easily be
+adapted to other tasks.
+
+Globally speaking, it can be used to extract, from a sequence of
+observations, all sub-sequences that meet a certain number of
+criteria in terms of:
+
+1. Minimum length of a **valid** token (i.e. sub-sequence)
+2. Maximum length of a valid token
+3. Maximum tolerated consecutive **non-valid** observations within
+   a valid token
+
+Examples of a non-valid observation are: a non-numeric ascii symbol
+if you are interested in sub-sequences of numeric symbols, or a silent
+audio window (of 10, 20 or 100 milliseconds for instance) if what
+interests you are audio regions made up of a sequence of ``noisy''
+windows (whatever kind of noise: speech, baby cry, laughter, etc.).
+
+The most important component of `auditok` is the `StreamTokenizer` class.
+An instance of this class encapsulates a `DataValidator` and can be 
+configured to detect the desired regions from a stream.
+The `auditok.core.StreamTokenizer.tokenize` method accepts a `DataSource`
+object that has a `read` method. Read data can be of any type accepted
+by the `validator`.
+
+
+As the main aim of this module is **Audio Activity Detection**,
+it provides the `auditok.util.ADSFactory` factory class that makes
+it very easy to create an `AudioDataSource` (a class that implements `DataSource`)
+ object, be that from:
+
+ - A file on the disk
+ - A buffer of data
+ - The built-in microphone (requires PyAudio)
+ 
+
+The `AudioDataSource` class inherits from `DataSource` and supplies
+a higher abstraction level than `AudioSource` thanks to a bunch of
+handy features:
+
+ - Define a fixed-length of block_size (i.e. analysis window)
+ - Allow overlap between two consecutive analysis windows (hop_size < block_size).
+   This can be very important if your validator use the **spectral** 
+   information of audio data instead of raw audio samples.
+ - Limit the amount (i.e. duration) of read data (very useful when reading
+   data from the microphone)
+ - Record and rewind data (also useful if you read data from the microphone
+   and you want to process it many times offline and/or save it)  
+
+
+Last but not least, the current version has only one audio window validator based on
+signal energy.
+
+Requirements:
+------------
+`auditok` requires [Pyaudio](http://people.csail.mit.edu/hubert/pyaudio/) for audio acquisition and playback.
+
+
+
+Illustrative examples with strings
+----------------------------------
+Let us look at some examples using the `auditok.util.StringDataSource` class
+created for test and illustration purposes. Imagine that each character of 
+`auditok.util.StringDataSource` data represent an audio slice of 100 ms for
+example. In the following examples we will use upper case letters to represent
+noisy audio slices (i.e. analysis windows or frames) and lower case letter for
+silent frames.
+
+## Extract sub-sequences of consecutive upper case letters
+We want to extract sub-sequences of characters that have:
+    
+  - A minimu length of 1 (`min_length` = 1)
+  - A maximum length of 9999 (`max_length` = 9999)
+  - Zero consecutive lower case characters within them (`max_continuous_silence` = 0)
+
+We also create the `UpperCaseChecker` whose `read` method returns `True` if the 
+checked character is in upper case and `False` otherwise. 
+ 
+    #!python
+    from auditok import StreamTokenizer, StringDataSource, DataValidator
+    
+    class UpperCaseChecker(DataValidator):
+       def is_valid(self, frame):
+          return frame.isupper()
+    
+    dsource = StringDataSource("aaaABCDEFbbGHIJKccc")
+    tokenizer = StreamTokenizer(validator=UpperCaseChecker(), 
+                 min_length=1, max_length=9999, max_continuous_silence=0)
+                 
+    tokenizer.tokenize(dsource)
+
+The output is a list of two tuples, each contains the extracted sub-sequence and its
+start and end position in the original sequence respectively:
+
+    #!python
+    [(['A', 'B', 'C', 'D', 'E', 'F'], 3, 8), (['G', 'H', 'I', 'J', 'K'], 11, 15)]
+    
+## Tolerate up to 2 non-valid (lower case) letters within an extracted sequence
+
+To do so, we set `max_continuous_silence`=2:
+
+    #!python
+    from auditok import StreamTokenizer, StringDataSource, DataValidator
+    
+    class UpperCaseChecker(DataValidator):
+       def is_valid(self, frame):
+          return frame.isupper()
+    
+    dsource = StringDataSource("aaaABCDbbEFcGHIdddJKee")
+    tokenizer = StreamTokenizer(validator=UpperCaseChecker(), 
+                 min_length=1, max_length=9999, max_continuous_silence=2)
+                 
+    tokenizer.tokenize(dsource)
+
+output:
+
+    #!python
+    [(['A', 'B', 'C', 'D', 'b', 'b', 'E', 'F', 'c', 'G', 'H', 'I', 'd', 'd'], 3, 16), (['J', 'K', 'e', 'e'], 18, 21)]
+    
+Notice the trailing lower case letters "dd" and "ee" at the end of the two
+tokens. The default behavior of `StreamTokenizer` is to keep the *trailing
+silence* if it does'nt exceed `max_continuous_silence`. This can be changed
+using the `DROP_TRAILING_SILENCE` mode (see next example).
+
+## Remove trailing silence
+
+Trailing silence can be useful for many sound recognition applications, including
+speech recognition. Moreover, from the human auditory system point of view, trailing
+low energy signal helps removing abrupt signal cuts.
+
+If you want to remove it anyway, you can do it by setting `mode` to `StreamTokenizer.DROP_TRAILING_SILENCE`:
+
+
+    #!python
+    from auditok import StreamTokenizer, StringDataSource, DataValidator
+    
+    class UpperCaseChecker(DataValidator):
+       def is_valid(self, frame):
+          return frame.isupper()
+    
+    dsource = StringDataSource("aaaABCDbbEFcGHIdddJKee")
+    tokenizer = StreamTokenizer(validator=UpperCaseChecker(), 
+                 min_length=1, max_length=9999, max_continuous_silence=2,
+                 mode=StreamTokenizer.DROP_TRAILING_SILENCE)
+                 
+    tokenizer.tokenize(dsource)
+
+output:
+
+    #!python
+    [(['A', 'B', 'C', 'D', 'b', 'b', 'E', 'F', 'c', 'G', 'H', 'I'], 3, 14), (['J', 'K'], 18, 19)]
+
+
+## Limit the length of detected tokens
+
+Imagine that you just want to detect and recognize a small part of a long
+acoustic event (e.g. engine noise, water flow, etc.) and avoid that that 
+event hogs the tokenizer and prevent it from feeding the event to the next
+processing step (i.e. a sound recognizer). You can do this by:
+
+ - limiting the length of a detected token.
+ 
+ and
+ 
+ - using a callback function as an argument to `StreamTokenizer.tokenize` 
+   so that the tokenizer delivers a token as soon as it is detected.
+
+The following code limits the length of a token to 5:
+
+    #!python
+    from auditok import StreamTokenizer, StringDataSource, DataValidator
+    
+    class UpperCaseChecker(DataValidator):
+       def is_valid(self, frame):
+          return frame.isupper()
+    
+    dsource = StringDataSource("aaaABCDEFGHIJKbbb")
+    tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
+                 min_length=1, max_length=5, max_continuous_silence=0)
+                 
+    def print_token(data, start, end):
+        print("token = '{0}', starts at {1}, ends at {2}".format(''.join(data), start, end))
+                 
+    tokenizer.tokenize(dsource, callback=print_token)
+    
+
+output:
+
+    "token = 'ABCDE', starts at 3, ends at 7"
+    "token = 'FGHIJ', starts at 8, ends at 12"
+    "token = 'K', starts at 13, ends at 13"
+
+
+Using real audio data
+-------------------------------
+In this section we will use `ADSFactory`, `AudioEnergyValidator` and `StreamTokenizer`
+for an AAD demonstration using audio data. Before we get any, further it is worth
+explaining a certain number of points.
+
+`ADSFactory.ads` method is called to create an `AudioDataSource` object that can be
+passed to  `StreamTokenizer.tokenize`. `ADSFactory.ads` accepts a number of keyword
+arguments, of which none is mandatory. The returned `AudioDataSource` object can 
+however greatly differ depending on the passed arguments. Further details can be found
+in the respective method documentation. Note however the following two calls that will
+create an `AudioDataSource` that read data from an audio file and from the built-in
+microphone respectively.
+
+    #!python
+    from auditok import ADSFactory
+    
+    # Get an AudioDataSource from a file
+    file_ads = ADSFactory.ads(filename = "path/to/file/")
+    
+    # Get an AudioDataSource from the built-in microphone
+    # The returned object has the default values for sampling
+    # rate, sample width an number of channels. see method's
+    # documentation for customized values 
+    mic_ads = ADSFactory.ads()
+    
+For `StreamTkenizer`, parameters `min_length`, `max_length` and `max_continuous_silence`
+are expressed in term of number of frames. If you want a `max_length` of *2 seconds* for
+your detected sound events and your *analysis window* is *10 ms* long, you have to specify
+a `max_length` of 200 (`int(2. / (10. / 1000)) == 200`). For a `max_continuous_silence` of *300 ms*
+for instance, the value to pass to StreamTokenizer is 30 (`int(0.3 / (10. / 1000)) == 30`).
+
+
+Where do you get the size of the **analysis window** from?
+
+
+Well this is a parameter you pass to `ADSFactory.ads`. By default `ADSFactory.ads` uses
+an analysis window of 10 ms. the number of samples that 10 ms of signal contain will
+vary depending on the sampling rate of your audio source (file, microphone, etc.).
+For a sampling rate of 16KHz (16000 samples per second), we have 160 samples for 10 ms.
+Therefore you can use block sizes of 160, 320, 1600 for analysis windows of 10, 20 and 100 
+ms respectively.
+
+    #!python
+    from auditok import ADSFactory
+    
+    file_ads = ADSFactory.ads(filename = "path/to/file/", block_size = 160)
+    
+    file_ads = ADSFactory.ads(filename = "path/to/file/", block_size = 320)
+    
+    # If no sampling rate is specified, ADSFactory use 16KHz as the default
+    # rate for the microphone. If you want to use a window of 100 ms, use 
+    # a block size of 1600 
+    mic_ads = ADSFactory.ads(block_size = 1600)
+    
+So if your not sure what you analysis windows in seconds is, use the following:
+
+    #!python
+    my_ads = ADSFactory.ads(...)
+    analysis_win_seconds = float(my_ads.get_block_size()) / my_ads.get_sampling_rate()
+    analysis_window_ms = analysis_win_seconds * 1000
+    
+    # For a `max_continuous_silence` of 300 ms use:
+    max_continuous_silence = int(300. / analysis_window_ms)
+    
+    # Which is the same as
+    max_continuous_silence = int(0.3 / (analysis_window_ms / 1000))
+    
+    
+Examples
+--------
+
+## Extract isolated phrases from an utterance
+We will build an `AudioDataSource` using a wave file from  the database.
+The file contains of isolated pronunciation of digits from 1 to 1
+in Arabic as well as breath-in/out between 2 and 3. The code will play the
+ original file then the detected sounds separately. Note that we use an 
+`energy_threshold` of 65, this parameter should be carefully chosen. It depends
+on microphone quality, background noise and the amplitude of events you want to 
+detect.
+
+    #!python
+    from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for, dataset
+     
+    # We set the `record` argument to True so that we can rewind the source
+    asource = ADSFactory.ads(filename=dataset.one_to_six_arabic_16000_mono_bc_noise, record=True)
+     
+    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=65)
+    
+    # Defalut analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
+    # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
+    # max_length=4000 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
+    # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms 
+    tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=400, max_continuous_silence=30)
+    
+    asource.open()
+    tokens = tokenizer.tokenize(asource)
+    
+    # Play detected regions back
+    
+    player = player_for(asource)
+    
+    # Rewind and read the whole signal
+    asource.rewind()
+    original_signal = []
+
+    while True:
+       w = asource.read()
+       if w is None:
+          break
+       original_signal.append(w)
+       
+    original_signal = ''.join(original_signal)
+    
+    print("Playing the original file...")
+    player.play(original_signal)
+    
+    print("playing detected regions...")
+    for t in tokens:
+        print("Token starts at {0} and ends at {1}".format(t[1], t[2]))
+        data = ''.join(t[0])
+        player.play(data)
+        
+    assert len(tokens) == 8
+    
+
+The tokenizer extracts 8 audio regions from the signal, including all isolated digits
+(from 1 to 6) as well as the 2-phase respiration of the subject. You might have noticed
+that, in the original file, the last three digit are closer to each other than the 
+previous ones. If you wan them to be extracted as one single phrase, you can do so
+by tolerating a larger continuous silence within a detection:
+ 
+    #!python
+    tokenizer.max_continuous_silence = 50
+    asource.rewind()
+    tokens = tokenizer.tokenize(asource)
+    
+    for t in tokens:
+       print("Token starts at {0} and ends at {1}".format(t[1], t[2]))
+       data = ''.join(t[0])
+       player.play(data)
+    
+    assert len(tokens) == 6
+        
+         
+## Trim leading and trailing silence
+ 
+The  tokenizer in the following example is set up to remove the silence
+that precedes the first acoustic activity or follows the last activity 
+in a record. It preserves whatever it founds between the two activities.
+In other words, it removes the leading and trailing silence.
+
+Sampling rate is 44100 sample per second, we'll use an analysis window of 100 ms
+(i.e. bloc_ksize == 4410)
+
+Energy threshold is 50.
+
+The tokenizer will start accumulating windows up from the moment it encounters
+the first analysis window of an energy >= 50. ALL the following windows will be 
+kept regardless of their energy. At the end of the analysis, it will drop trailing
+ windows with an energy below 50.
+
+This is an interesting example because the audio file we're analyzing contains a very
+brief noise that occurs within the leading silence. We certainly do want our tokenizer 
+to stop at this point and considers whatever it comes after as a useful signal.
+To force the tokenizer to ignore that brief event we use two other parameters `init_min`
+ans `init_max_silence`. By `init_min`=3 and `init_max_silence`=1 we tell the tokenizer
+that a valid event must start with at least 3 noisy windows, between which there
+is at most 1 silent window.
+
+Still with this configuration we can get the tokenizer detect that noise as a valid event
+(if it actually contains 3 consecutive noisy frames). To circummvent this we use an enough
+large analysis window (here of 100 ms) to ensure that the brief noise be surrounded by a much
+longer silence and hence the energy of the overall analysis window will be below 50.
+
+When using a shorter analysis window (of 10ms for instance, block_size == 441), the brief
+noise contributes more to energy calculation which yields an energy of over 50 for the window.
+Again we can deal with this situation by using a higher energy threshold (55 for example)
+ 
+ 
+    #!python
+    from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for, dataset
+    import pyaudio
+
+    # record = True so that we'll be able to rewind the source.
+    asource = ADSFactory.ads(filename=dataset.was_der_mensch_saet_mono_44100_lead_trail_silence,
+             record=True, block_size=4410)
+    asource.open()
+
+    original_signal = []
+    # Read the whole signal
+    while True:
+       w = asource.read()
+       if w is None:
+          break
+       original_signal.append(w)
+    
+    original_signal = ''.join(original_signal)
+    
+    # rewind source
+    asource.rewind()
+    
+    # Create a validator with an energy threshold of 50
+    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=50)
+    
+    # Create a tokenizer with an unlimited token length and continuous silence within a token
+    # Note the DROP_TRAILING_SILENCE mode that will ensure removing trailing silence
+    trimmer = StreamTokenizer(validator, min_length = 20, max_length=99999999, init_min=3, init_max_silence=1,
+                             max_continuous_silence=9999999, mode=StreamTokenizer.DROP_TRAILING_SILENCE)
+    
+    
+    tokens = trimmer.tokenize(asource)
+    
+    # Make sure we only have one token
+    assert len(tokens) == 1, "Should have detected one single token"
+    
+    trimmed_signal = ''.join(tokens[0][0])
+    
+    player = player_for(asource)
+    
+    print("Playing original signal (with leading and trailing silence)...")
+    player.play(original_signal)
+    print("Playing trimmed signal...")
+    player.play(trimmed_signal)
+    
+
+
+## Online audio signal processing
+
+In the next example, audio data is directely acquired from the built-in microphone.
+The `tokenize` method is passed a callback function so that audio activities
+are delivered as soon as they are detected. Each detected activity is played
+back using the build-in audio output device.
+
+As mentionned before , Signal energy is strongly related to many factors such
+microphone sensitivity, background noise (including noise inherent to the hardware), 
+distance and your operating system sound settings. Try a lower `energy_threshold`
+if your noise does not seem to be detected and a higher threshold if you notice
+an over detection (echo method prints a detection where you have made no noise).
+
+    #!python
+    from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for
+    import pyaudio
+     
+    # record = True so that we'll be able to rewind the source.
+    # max_time = 10: read 10 seconds from the microphone
+    asource = ADSFactory.ads(record=True, max_time=10)
+    
+    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=50)
+    tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=250, max_continuous_silence=30)
+    
+    player = player_for(asource)
+    
+    def echo(data, start, end):
+       print("Acoustic activity at: {0}--{1}".format(start, end))
+       player.play(''.join(data))
+       
+    asource.open()
+    
+    tokenizer.tokenize(asource, callback=echo)
+
+If you want to re-run the tokenizer after changing of one or many parameters, use the following code:
+
+    #!python
+    asource.rewind()
+    # change energy threshold for example
+    tokenizer.validator.set_energy_threshold(55)
+    tokenizer.tokenize(asource, callback=echo)
+
+In case you want to play the whole recorded signal back use:
+
+    #!python
+    player.play(asource.get_audio_source().get_data_buffer())
+    
+
+Contributing
+------------
+`auditok` is on [GitHub](https://github.com/amsehili). You're welcome to fork
+it and contribute.
+
+
+@author: Amine SEHILI <amine.sehili@gmail.com>
+September, 2015
+
+License
+-------
+
+This package is published under GNU GPL Version 3.
+
+"""
+from core import *
+from io import *
+from util import *
+import dataset
+
+__version__ = "0.1.2"
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/auditok/core.py	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,454 @@
+'''
+September 2015
+@author: Amine SEHILI <amine.sehili@gmail.com>
+
+'''
+
+from auditok.util import DataValidator
+
+
+
+__all__ = ["StreamTokenizer"]
+
+
+class StreamTokenizer():
+    
+    """
+    Class for stream tokenizers. It implements a 4-state automata scheme
+    for interesting sub-sequences extraction.
+    """
+    
+    SILENCE = 0
+    POSSIBLE_SILENCE = 1
+    POSSIBLE_NOISE = 2 
+    NOISE = 3
+    
+    STRICT_MIN_LENGTH = 2
+    DROP_TRAILING_SILENCE = 4
+    
+    def __init__(self, validator, 
+                 min_length, max_length, max_continuous_silence,
+                 init_min=0, init_max_silence=0,
+                 mode=0):
+        
+        """
+        
+        Parameters
+        -----------
+        
+
+        `validator` :
+            instance of `DataValidator` that implements `is_valid` method.
+        
+        `min_length` : *(int)*
+            Minimum number of frames of a valid token. This includes all \
+            tolerated non valid frames within the token.
+            
+        `max_length` : *(int)*
+            Maximum number of frames of a valid token. This includes all \
+            tolerated non valid frames within the token.
+        
+        `max_continuous_silence` : *(int)*
+            Maximum number of consecutive non-valid frames within a token.
+            Note that, within a valid token, there may be many tolerated \
+            *silent* regions that contain each a number of non valid frames up to \
+            `max_continuous_silence`
+        
+        `init_min` : *(int, default=0)*
+            Minimum number of consecutive valid frames that must be **initially** \
+            gathered before any sequence of non valid frames can be tolerated. This
+            option is not always needed, it can be used to drop non-valid tokens as
+            early as possible. **Default = 0** means that the option is by default 
+            ineffective. 
+                
+        `init_max_silence` : *(int, default=0)*
+            Maximum number of tolerated consecutive non-valid frames if the \
+            number already gathered valid frames has not yet reached 'init_min'.
+            This arguement is normally used if `init_min` is used. **Default = 0**,
+            by default this argument is not taken into consideration.
+            
+        
+        keep_trailing_silence : boolean, default=False
+            Whether to keep the trailing non valid frames of a valid token
+            This seems to be particularly useful to avoid an abrupt cut-off
+            when tokenizing some kinds of signals (e.g. audio signal)
+        
+        `mode` : *(int, default=0)*
+            `mode` can be:
+            
+           1. `StreamTokenizer.STRICT_MIN_LENGTH`: if token *i* is delivered because `max_length`
+               is reatched, and token *i+1* is immedialtely adjacent to
+               token *i* (i.e. token *i* ends at frame *k* and token *i+1* starts
+               at frame *k+1*) then accept toekn *i+1* only of it has a size of at
+               least `min_length`. The default behavior is to accept toekn *i+1*
+               event if it is shorter than `min_length` (given that the above conditions
+               are fullfilled of course).
+               
+              Example
+              -------
+               
+               In the following code, without `STRICT_MIN_LENGTH`, the 'BB' token is
+               accepted although it is shorter than `min_length` (3), because it immediatly
+               follows the latest delivered token:
+               
+                #!python
+                from auditok import StreamTokenizer, StringDataSource, DataValidator
+    
+                class UpperCaseChecker(DataValidator):
+                    def is_valid(self, frame):
+                        return frame.isupper()
+    
+                dsource = StringDataSource("aaaAAAABBbbb")
+                tokenizer = StreamTokenizer(validator=UpperCaseChecker(), 
+                            min_length=3, max_length=4, max_continuous_silence=0)
+                 
+                tokenizer.tokenize(dsource)
+                
+                
+            output:
+                
+                #!python
+                [(['A', 'A', 'A', 'A'], 3, 6), (['B', 'B'], 7, 8)]
+            
+            The following toknizer will however reject the 'BB' token 
+            
+                dsource = StringDataSource("aaaAAAABBbbb")
+                tokenizer = StreamTokenizer(validator=UpperCaseChecker(), 
+                            min_length=3, max_length=4, max_continuous_silence=0,
+                            mode=StreamTokenizer.STRICT_MIN_LENGTH)
+                tokenizer.tokenize(dsource)
+            
+            output:
+            
+                #!python
+                [(['A', 'A', 'A', 'A'], 3, 6)]
+                
+               
+           2. `StreamTokenizer.DROP_TRAILING_SILENCE`: drop all trailing non-valid frames
+               from a token to be delivered if and only if it is not **truncated**.
+               This can be a bit tricky. A token is actually delivered if:
+               
+               a. `max_continuous_silence` is reached
+               
+               OR
+               
+               b. Its length reaches `max_length`. This is called a **truncated** token
+               
+            In the current implementation, a `StreamTokenizer`'s decision is only based on seen
+            data and on incoming data. Thus, if a token is truncated at a non-valid but tolerated
+            frame (`max_length` is reached but `max_continuous_silence` not yet) any trailing
+            silence will be kept because it can potentilly be part of valid token (if `max_length`
+            was bigger). But if `max_continuous_silence` is reched before `max_length`, the delivered
+            token will not be considered as truncted but a result of *normal* end of detection
+            (i.e. no more valid data). In that case the trailing silence can be removed if you use
+            the `StreamTokenizer.DROP_TRAILING_SILENCE` mode.
+               
+            Take the following example:
+                
+                #!python
+                tokenizer = StreamTokenizer(validator=UpperCaseChecker(), min_length=3,
+                max_length=6, max_continuous_silence=3,
+                mode=StreamTokenizer.DROP_TRAILING_SILENCE)
+                
+                dsource = StringDataSource("aaaAAAaaaBBbbbb")
+                tokenizer.tokenize(dsource)
+                
+            output:
+            
+                #!python
+                [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B'], 9, 10)]
+                
+            The first troken is delivered with its trailing silence because it is truncated
+            while the second one has its trailing frames removed.
+            
+            Without `StreamTokenizer.DROP_TRAILING_SILENCE` the output whould be:
+                
+                #!python
+                [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B', 'b', 'b', 'b'], 9, 13)]
+
+            
+            
+           3. `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`:
+               use both options. That means: first remove trailing silence, then ckeck if the
+               token still has at least a length of `min_length`.
+        
+        """
+        
+        if not isinstance(validator, DataValidator):
+            raise TypeError("'validator' must be an instance of 'DataValidator'")
+        
+        if max_length <= 0:
+            raise ValueError("'max_length' must be > 0 (value={0})".format(max_length))
+        
+        if min_length <= 0 or min_length > max_length:
+            raise ValueError("'min_length' must be > 0 and <= 'max_length' \
+            (value={0})".format(min_length))
+        
+        if max_continuous_silence >= max_length:
+            raise ValueError("'max_continuous_silence' must be < \
+            'max_length' (value={0})".format(max_continuous_silence))
+            
+        # init_min must be shorter than max_length
+        
+        self.validator = validator
+        self.min_length = min_length
+        self.max_length = max_length
+        self.max_continuous_silence = max_continuous_silence
+        self.init_min = init_min
+        self.init_max_silent = init_max_silence
+        
+        self._mode = None
+        self.set_mode(mode)
+        self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
+        self._drop_trailing_silence  = (mode & self.DROP_TRAILING_SILENCE) != 0
+        
+        self._deliver = None
+        self._tokens = None
+        self._state = None
+        self._data = None
+        self._contiguous_token = False
+        
+        self._init_count = 0
+        self._silence_length = 0
+        self._start_frame = 0
+        self._current_frame = 0
+    
+    def set_mode(self, mode):
+        """
+        Set this tokenizer's mode.
+        
+        Paramerters
+        ------------
+        
+        `mode` : *(int)*
+           New mode, must be one of:
+           
+           a. `StreamTokenizer.STRICT_MIN_LENGTH`
+           
+           b. `StreamTokenizer.DROP_TRAILING_SILENCE`
+           
+           c. `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`
+           
+           d. 0
+           
+           
+           See `StreamTokenizer.__init__` for more information about the mode.
+        """
+        
+        if not mode in [self.STRICT_MIN_LENGTH, self.DROP_TRAILING_SILENCE,
+           self.STRICT_MIN_LENGTH | self.DROP_TRAILING_SILENCE, 0]:
+            
+            raise ValueError("Wrong value for mode")
+        
+        self._mode = mode
+        self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
+        self._drop_trailing_silence  = (mode & self.DROP_TRAILING_SILENCE) != 0
+        
+    
+    def get_mode(self):
+        """
+        Return the current mode. To check whether a specific mode is activated use
+        the bitwise 'and' operator `&`. Example:
+           
+            #!python
+            if mode & self.STRICT_MIN_LENGTH != 0:
+                ...
+                
+        """
+        return self._mode
+        
+    def _reinitialize(self):
+        self._contiguous_token = False
+        self._data = []
+        self._tokens = []
+        self._state = self.SILENCE
+        self._current_frame = -1
+        self._deliver = self._append_token
+    
+    
+    def tokenize(self, data_source, callback=None):
+        """
+        Read data from `data_source`, one frame a time, and process the read frames in
+        order to detect sequences of frames that make up valid tokens.
+        
+        Parameters
+        ----------
+        
+        `data_source` : instance of the `DataSource` class that implements a 'read' method.
+        'read' should return a slice of signal, i.e. frame (of whatever \
+        type as long as it can be processed by validator) and None if \
+        there is no more signal.
+        
+        `callback` : an optional 3-argument function.
+           If a `callback` function is given, it will be called each time a valid token
+           is found.
+           
+           
+        Returns
+        -------
+        
+        A list of tokens if `callback` is None. Each token is tuple with the following elemnts:
+        
+            #!python
+            (data, start, end)
+            
+        where `data` is a list of read frames, `start`: index of the first frame in the
+        original data and `end` : index of the last frame. 
+        
+            
+        """
+        
+        self._reinitialize()
+        
+        if callback is not None:
+            self._deliver = callback
+        
+        while True:
+            frame =  data_source.read()
+            if frame == None:
+                break
+            self._current_frame += 1
+            self._process(frame)
+            
+        self._post_process()
+        
+        if callback is None:
+            _ret = self._tokens
+            self._tokens = None
+            return _ret
+        
+        
+    def _process(self, frame):
+        
+        frame_is_valid = self.validator.is_valid(frame)
+        
+        if self._state == self.SILENCE:
+            
+            if frame_is_valid:
+                # seems we got a valid frame after a silence
+                self._init_count = 1
+                self._silence_length = 0
+                self._start_frame = self._current_frame
+                self._data.append(frame)
+                
+                if self._init_count  >= self.init_min:
+                    self._state = self.NOISE
+                    if len(self._data) >= self.max_length:
+                        self._process_end_of_detection(True)
+                else:
+                    self._state = self.POSSIBLE_NOISE
+        
+        elif self._state == self.POSSIBLE_NOISE:
+            
+            if frame_is_valid:
+                self._silence_length = 0
+                self._init_count += 1
+                self._data.append(frame)
+                if self._init_count  >= self.init_min:
+                    self._state = self.NOISE
+                    if len(self._data) >= self.max_length:
+                        self._process_end_of_detection(True)
+            
+            else:                
+                self._silence_length += 1
+                if self._silence_length > self.init_max_silent or \
+                len(self._data) + 1 >= self.max_length:
+                    # either init_max_silent or max_length is reached
+                    # before _init_count, back to silence
+                    self._data = []
+                    self._state = self.SILENCE
+                else:
+                    self._data.append(frame)
+                    
+                
+        elif self._state == self.NOISE:
+            
+            if frame_is_valid:
+                self._data.append(frame)
+                if len(self._data) >= self.max_length:
+                    self._process_end_of_detection(True)
+            
+            elif self.max_continuous_silence <= 0 :
+                # max token reached at this frame will _deliver if _contiguous_token
+                # and not _strict_min_length
+                self._process_end_of_detection()
+                self._state = self.SILENCE
+                
+            else:
+                # this is the first silent frame following a valid one
+                # and it is tolerated
+                self._silence_length = 1
+                self._data.append(frame)
+                self._state = self.POSSIBLE_SILENCE
+                if len(self._data) == self.max_length:
+                    self._process_end_of_detection(True)
+                    # don't reset _silence_length because we still 
+                    # need to know the total number of silent frames
+                                   
+                                
+    
+        elif self._state == self.POSSIBLE_SILENCE:
+            
+            if frame_is_valid:
+                self._data.append(frame)
+                self._silence_length = 0
+                self._state = self.NOISE
+                if len(self._data) >= self.max_length:
+                    self._process_end_of_detection(True)
+                
+            else:
+                if self._silence_length >= self.max_continuous_silence:
+                    if self._silence_length < len(self._data):
+                        # _deliver only gathered frames aren't all silent                    
+                        self._process_end_of_detection()
+                    else:
+                        self._data = []
+                    self._state = self.SILENCE
+                    self._silence_length = 0
+                else:
+                    self._data.append(frame)
+                    self._silence_length += 1
+                    if len(self._data) >= self.max_length:
+                        self._process_end_of_detection(True)
+                        # don't reset _silence_length because we still 
+                        # need to know the total number of silent frames
+                        
+    
+    def _post_process(self):
+        if self._state == self.NOISE or self._state == self.POSSIBLE_SILENCE:
+            if len(self._data) > 0 and len(self._data) > self._silence_length:
+                self._process_end_of_detection()
+    
+    
+    def _process_end_of_detection(self, truncated=False):
+        
+        if not truncated and self._drop_trailing_silence and self._silence_length > 0:
+            # happens if max_continuous_silence is reached
+            # or max_length is reached at a silent frame
+            self._data = self._data[0: - self._silence_length]
+        
+        if (len(self._data) >= self.min_length) or \
+           (len(self._data) > 0 and \
+            not self._strict_min_length and self._contiguous_token):
+            
+            
+            
+            _end_frame = self._start_frame + len(self._data) - 1
+            self._deliver(self._data, self._start_frame, _end_frame)
+            
+            if truncated:
+                # next token (if any) will start at _current_frame + 1
+                self._start_frame = self._current_frame + 1
+                # remember that it is contiguous with the just delivered one
+                self._contiguous_token = True
+            else:
+                self._contiguous_token = False
+        else:
+            self._contiguous_token = False       
+        
+        self._data = []
+            
+    
+    
+    def _append_token(self, data, start, end):
+        self._tokens.append((data, start, end))
Binary file auditok/data/1to6arabic_16000_mono_bc_noise.wav has changed
Binary file auditok/data/was_der_mensch_saet_das_wir_er_veilfach_enrten_44100Hz_mono_lead_trail_silence.wav has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/auditok/dataset.py	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,20 @@
+"""
+This module contains links to audio files you can use for test purposes.
+
+September 2015
+@author: Amine SEHILI <amine.sehili@gmail.com>
+"""
+
+import os
+
+__all__ = ["one_to_six_arabic_16000_mono_bc_noise", "was_der_mensch_saet_mono_44100_lead_trail_silence"]
+
+_current_dir = os.path.dirname(os.path.realpath(__file__))
+
+one_to_six_arabic_16000_mono_bc_noise = "{cd}{sep}data{sep}1to6arabic_\
+16000_mono_bc_noise.wav".format(cd=_current_dir, sep=os.path.sep)
+
+
+was_der_mensch_saet_mono_44100_lead_trail_silence = "{cd}{sep}data{sep}was_\
+der_mensch_saet_das_wir_er_veilfach_enrten_44100Hz_mono_lead_trail_\
+silence.wav".format(cd=_current_dir, sep=os.path.sep)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/auditok/io.py	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,453 @@
+"""
+Module for low-level audio input-output operations. 
+
+September 2015
+@author: Amine SEHILI <amine.sehili@gmail.com>
+"""
+
+from abc import ABCMeta, abstractmethod
+import wave
+
+__all__ = ["AudioSource", "Rewindable", "BufferAudioSource", "WaveAudioSource",
+           "PyAudioSource", "PyAudioPlayer", "from_file", "player_for"]
+
+DEFAULT_SAMPLE_RATE = 16000
+DEFAULT_SAMPLE_WIDTH = 2
+DEFAULT_NB_CHANNELS = 1
+
+
+class AudioSource():
+    __metaclass__ = ABCMeta
+    
+    """ 
+    Base class for audio source.
+        
+    Subclasses should implement methods to open/close and audio stream 
+    and read the desired amount of audio samples.
+         
+    """
+
+    def __init__(self, sampling_rate = DEFAULT_SAMPLE_RATE,
+                 sample_width = DEFAULT_SAMPLE_WIDTH,
+                 channels = DEFAULT_NB_CHANNELS):
+        
+        """
+        
+        Parameters
+        ----------
+        
+        `sampling_rate` *(int)* :
+            Number of samples per second of audio stream. Default = 16000.
+        
+        `sample_width` *(int)* :
+            Size in bytes of one audio sample. Possible values : 1, 2, 4.
+            Default = 2.
+            
+        `channels` *(int)* :
+            Number of channels of audio stream. The current version supports
+            only mono audio streams (i.e. one channel).
+        
+        """
+        
+        if not sample_width in (1, 2, 4):
+            raise ValueError("Sample width must be one of: 1, 2 or 4 (bytes)")
+        
+        if channels != 1:
+            raise ValueError("Only mono audio is currently handled")
+            
+        self.sampling_rate = sampling_rate
+        self.sample_width = sample_width
+        self.channels = channels
+      
+    @abstractmethod
+    def is_open(self):
+        """ Return True if audio source is open, False otherwise """
+    
+    @abstractmethod
+    def open(self):
+        """ Open audio source """
+    
+    @abstractmethod
+    def close(self):
+        """ Close audio source """
+    
+    @abstractmethod
+    def read(self, size):
+        """
+        Read and return `size` audio samples at most.
+        
+        Parameters
+        ----------
+        `size` : *(int)* :
+            the number of samples to read.
+        
+        Returns
+        --------
+        Audio data as a string of length 'N' * 'smaple_width' * 'channels', where 'N' is:
+        
+        `size` if `size` < 'left_samples'
+        
+        'left_samples' if `size` > 'left_samples' 
+        
+        """ 
+    
+    def get_sampling_rate(self):
+        """ Return the number of samples per second of audio stream """
+        return self.sampling_rate
+    
+    def get_sample_width(self):
+        """ Return the number of bytes used to represent one audio sample """
+        return self.sample_width
+    
+    def get_channels(self):
+        """ Return the number of channels of this audio source """
+        return self.channels
+    
+
+
+class Rewindable():
+    __metaclass__ = ABCMeta
+    
+    """
+    Base class for rewindable audio streams.
+    Subclasses should implement methods to return to the beginning of an
+    audio stream as well as method to move to an absolute audio position
+    expressed in time or in number of samples. 
+    
+    """
+    
+    @abstractmethod
+    def rewind(self):
+        """ Go back to the beginning of audio stream """
+        pass
+    
+    @abstractmethod
+    def get_position(self):
+        """ Return the total number of already read samples """
+    
+    @abstractmethod
+    def get_time_position(self):
+        """ Return the total duration in seconds of already read data """
+    
+    @abstractmethod
+    def set_position(self, position):
+        """ Move to an absolute position 
+        
+        Parameters
+        ----------
+        `position` : *(int)*
+            number of samples to skip from the start of the stream
+        """
+    
+    @abstractmethod
+    def set_time_position(self, time_position):
+        """ Move to an absolute position expressed in seconds
+        
+        Parameters
+        ----------
+        `time_position` : *(float)*
+            seconds to skip from the start of the stream
+        """
+        pass
+    
+    
+
+class BufferAudioSource(AudioSource, Rewindable):
+    
+    """
+    A class that represent audio data as a memory buffer. It implements
+    methods from `io.Rewindable` and is therefore a navigable `io.AudioSource`.
+    """
+    
+    def __init__(self, data_buffer,
+                 sampling_rate = DEFAULT_SAMPLE_RATE,
+                 sample_width = DEFAULT_SAMPLE_WIDTH,
+                 channels = DEFAULT_NB_CHANNELS):
+        
+        if len(data_buffer) % (sample_width * channels) !=0:
+            raise ValueError("length of data_buffer must be a multiple of (sample_width * channels)")
+        
+        AudioSource.__init__(self, sampling_rate, sample_width, channels)
+        self._buffer = data_buffer
+        self._index = 0
+        self._left = 0 if self._buffer is None else len(self._buffer)
+        self._is_open = False
+    
+    def is_open(self):
+        return self._is_open
+        
+    def open(self):
+        self._is_open = True
+    
+    def close(self):
+        self._is_open = False
+        self.rewind()
+    
+    def read(self, size=None):
+        
+        if not self._is_open:
+            raise IOError("Stream is not open")
+        
+        if self._left > 0:
+            
+            to_read = size * self.sample_width * self.channels       
+            if to_read > self._left:
+                to_read = self._left 
+                            
+            data = self._buffer[self._index: self._index + to_read]
+            self._index += to_read
+            self._left -= to_read
+            
+            return data
+        
+        return None
+    
+    def get_data_buffer(self):
+        """ Return all audio data as one string buffer. """
+        return self._buffer
+    
+    def set_data(self, data_buffer):
+        """ Set new data for this audio stream. 
+        
+        Parameters
+        ----------
+        `data_buffer` :
+           a string buffer with a length multiple of (sample_width * channels)
+        """
+        if len(data_buffer) % (self.sample_width * self.channels) !=0:
+            raise ValueError("length of data_buffer must be a multiple of (sample_width * channels)")
+        self._buffer = data_buffer
+        self._index = 0
+        self._left = 0 if self._buffer is None else len(self._buffer)
+    
+    def append_data(self, data_buffer):
+        """ Append data to this audio stream
+        
+        Parameters
+        ----------
+        `data_buffer` :
+           a string buffer with a length multiple of (sample_width * channels)
+        
+        """
+        
+        if len(data_buffer) % (self.sample_width * self.channels) !=0:
+            raise ValueError("length of data_buffer must be a multiple of (sample_width * channels)")
+        
+        self._buffer += data_buffer
+        self._left += len(data_buffer)
+
+    
+    def rewind(self):
+        self.set_position(0)
+    
+    def get_position(self):
+        return self._index / self.sample_width
+    
+    def get_time_position(self):
+        return float(self._index) / (self.sample_width * self.sampling_rate) 
+    
+    def set_position(self, position):
+        if position < 0:
+            raise ValueError("position must be >= 0")
+        
+        if self._buffer is None:
+            self._index = 0
+            self._left = 0
+            return
+         
+        position *= self.sample_width 
+        self._index = position if position < len(self._buffer) else len(self._buffer)
+        self._left = len(self._buffer) - self._index
+
+
+    def set_time_position(self, time_position): # time in seconds
+        
+        position = int(self.sampling_rate * time_position)
+        self.set_position(position)
+        
+
+        
+
+class WaveAudioSource(AudioSource):
+    
+    """ A class for an `AudioSource` that reads data from a wave file. """
+    
+    def __init__(self, filename):
+        
+        """
+        Parameters
+        ----------
+        `filename` :
+            path to a valid wave file
+        
+        """
+                
+        self._filename = filename
+        self._audio_stream = None
+        
+        stream = wave.open(self._filename)
+        AudioSource.__init__(self, stream.getframerate(),
+                                   stream.getsampwidth(),
+                                   stream.getnchannels())
+        stream.close()
+    
+    
+    def is_open(self):
+        return self._audio_stream is not None
+ 
+    def open(self):
+        if(self._audio_stream is None):
+            self._audio_stream = wave.open(self._filename)
+      
+        
+    def close(self):
+        if self._audio_stream is not None:
+            self._audio_stream.close()
+            self._audio_stream = None
+        
+    
+    def read(self, size):
+        
+        if self._audio_stream is None:
+            raise IOError("Stream is not open")
+        else:
+            data = self._audio_stream.readframes(size)
+            if data is None or len(data) < 1:
+                return None
+            return data
+
+
+class PyAudioSource(AudioSource):
+    
+    """ A class for an `AudioSource` that reads data the built-in microphone. """
+    
+    def __init__(self, sampling_rate = DEFAULT_SAMPLE_RATE,
+                 sample_width = DEFAULT_SAMPLE_WIDTH,
+                 channels = DEFAULT_NB_CHANNELS,
+                 frames_per_buffer = 1024):
+        
+        
+        AudioSource.__init__(self, sampling_rate, sample_width, channels)
+        self._chunk_size = frames_per_buffer
+        
+        import pyaudio
+        self._pyaudio_object = pyaudio.PyAudio()
+        self._pyaudio_format = self._pyaudio_object.get_format_from_width(self.sample_width) 
+        self._audio_stream = None
+
+    
+    def is_open(self):
+        return self._audio_stream is not None
+    
+    def open(self):
+        self._audio_stream = self._pyaudio_object.open(format = self._pyaudio_format,
+                                                   channels = self.channels,
+                                                   rate = self.sampling_rate,
+                                                   input = True,
+                                                   output = False,
+                                                   frames_per_buffer = self._chunk_size)
+        
+        
+    def close(self):
+        if self._audio_stream is not None:
+            self._audio_stream.stop_stream()
+            self._audio_stream.close()
+            self._audio_stream = None
+            
+    
+    def read(self, size):
+        
+        if self._audio_stream is None:
+            raise IOError("Stream is not open")
+        
+        if self._audio_stream.is_active():
+            data = self._audio_stream.read(size)
+            if data is None or len(data) < 1:
+                return None
+            return data
+        
+        return None
+    
+
+
+class PyAudioPlayer():
+    """ A class for audio playback """
+    
+    def __init__(self, sampling_rate = DEFAULT_SAMPLE_RATE,
+                 sample_width = DEFAULT_SAMPLE_WIDTH,
+                 channels = DEFAULT_NB_CHANNELS):
+        
+    
+        if not sample_width in (1, 2, 4):
+            raise ValueError("Sample width must be one of: 1, 2 or 4 (bytes)")
+        
+        self.sampling_rate = sampling_rate
+        self.sample_width = sample_width
+        self.channels = channels
+        
+        import pyaudio
+        self._p = pyaudio.PyAudio()
+        self.stream = self._p.open(format = self._p.get_format_from_width(self.sample_width),
+         channels = self.channels, rate = self.sampling_rate,
+         input = False, output = True)
+        
+    def play(self, data):
+        if self.stream.is_stopped():
+            self.stream.start_stream()
+        self.stream.write(data)
+        self.stream.stop_stream()
+    
+        
+    def  stop(self):
+        if not self.stream.is_stopped():
+            self.stream.stop_stream()
+        self.stream.close()
+        self._p.terminate()
+        
+    
+        
+
+def from_file(filename):
+    
+    """
+    Create an `AudioSource` object using the audio file specified by `filename`.
+    The appropriate `AudioSource` class is guessed from file's extension.
+    
+    Parameters
+    ----------
+    `filename` :
+        path to an audio file
+        
+    Returns
+    -------
+    an `AudioSource` object that reads data from the given file.
+    
+    """
+    
+    if filename.lower().endswith(".wav"):
+        return WaveAudioSource(filename)
+    
+    raise Exception("Can not create an AudioSource object from '%s'" %(filename))
+
+
+def player_for(audio_source):
+    """
+    Return a `PyAudioPlayer` that can play data from `audio_source`.
+    
+    Parameters
+    ----------
+    `audio_source` : 
+        an `AudioSource` object.
+    
+    Returns
+    -------
+    `PyAudioPlayer` that has the same sampling rate, sample width and number of channels
+    as `audio_source`.
+    """
+    
+    return PyAudioPlayer(audio_source.get_sampling_rate(),
+            audio_source.get_sample_width(),
+            audio_source.get_channels())
+    
+    
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/auditok/util.py	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,578 @@
+"""
+September 2015
+@author: Amine SEHILI <amine.sehili@gmail.com>
+"""
+
+from abc import ABCMeta, abstractmethod
+import math
+from array import array
+from io import Rewindable, from_file, BufferAudioSource, PyAudioSource
+
+
+try:
+    import numpy
+    _WITH_NUMPY = True
+except ImportError as e:
+    _WITH_NUMPY = False
+    
+
+__all__ = ["DataSource", "DataValidator", "StringDataSource", "ADSFactory", "AudioEnergyValidator"]
+    
+
+class DataSource():
+    __metaclass__ = ABCMeta
+    """
+    Base class for objects passed to `StreamTokenizer.tokenize`.
+    Subclasses should implement a `read` method.
+    
+    """
+    
+    @abstractmethod
+    def read(self):
+        """ Read a piece of data read from this source.
+            If no more data is available, return None.
+        """
+    
+    
+class DataValidator():
+    __metaclass__ = ABCMeta
+    """
+    Base class for a validator object used by `StreamTokenizer` to check
+    if read data is valid.
+    Subclasses should implement `is_valid` method.
+    """
+    
+    @abstractmethod
+    def is_valid(self, data):
+        """
+        Check whether `data` is valid
+        """
+
+class StringDataSource(DataSource):
+    """
+    A class that represent a `DataSource` as a string buffer.
+    Each call to `read` returns on character and moves one step forward.
+    If the end of the buffer is reached, `read` returns None. 
+    """
+     
+    def __init__(self, data):
+        """
+        Parameters
+        ----------
+        `data` : 
+            a basestring object.
+        """
+        
+        self._data = None
+        self._current = 0
+        self.set_data(data)
+        
+    
+    def read(self):
+        if self._current >= len(self._data):
+            return None
+        self._current += 1
+        return self._data[self._current - 1]
+    
+    def set_data(self, data):
+        """
+        Set a new data buffer.
+        
+        Parameters
+        ----------
+        `data` : 
+            a basestring object.
+        """
+        
+        if not isinstance(data, basestring):
+            raise ValueError("data must an instance of basestring")
+        self._data = data
+        self._current = 0
+        
+
+
+class ADSFactory:
+    """
+    Factory class that makes it easy to create an `AudioDataSource` object that implements
+    `DataSource` and can therefore be passed to `StreamTokenizer.tokenize`.
+    
+    Whether you read audio data from a file, the microphone or a memory buffer, this factory
+    instantiates and returns the right `AudioDataSource` object.
+    
+    There are many other features you want your `AudioDataSource` object to have, such as: 
+    memorize all read audio data so that you can rewind and reuse it (especially useful when 
+    reading data from the microphone), read a fixed amount of data (also useful when reading 
+    from the microphone), read overlapping audio frames (often needed when dosing a spectral
+    analysis of data).
+    
+    `ADSFactory.ads` automatically creates and return object with the desired behavior according
+    to the supplied keyword arguments. 
+     
+    
+    """
+    
+    @staticmethod
+    def ads(**kwargs):
+        
+        """
+        Create an return an `AudioDataSource`. The type and behavior of the object is the result
+        of the supplied parameters.
+        
+        Parameters
+        ----------
+        
+        *No parameters* :  
+           read audio data from the available built-in microphone with the default parameters.
+           The returned `AudioDataSource` encapsulate an `io.PyAudioSource` object and hence 
+           it accepts the next four parameters are passed to use instead of their default values.
+        
+        `sampling_rate` : *(int)*
+            number of samples per second. Default = 16000.
+        
+        `sample_width` : *(int)*
+            number of bytes per sample (must be in (1, 2, 4)). Default = 2
+        
+        `channels` : *(int)*
+            number of audio channels. Default = 1 (only this value is currently accepted)  
+            
+        `frames_per_buffer` *(int)*:
+            number of samples of PyAudio buffer. Default = 1024.
+        
+        `audio_source` : an `io.AudioSource` object
+            read data from this audio source
+            
+        `filename` : *(string)*
+            build an `io.AudioSource` object using this file (currently only wave format is supported)
+            
+        `data_buffer` : *(string)*
+            build an `io.BufferAudioSource` using data in `data_buffer`. If this keyword is used,
+            `sampling_rate`, `sample_width` and `channels` are passed to `io.BufferAudioSource`
+            constructor and used instead of default values.
+            
+        `max_time` : *(float)*
+             maximum time (in seconds) to read. Default behavior: read until there is no more data
+             available. 
+        
+         
+        `record` : *(bool)*
+            save all read data in cache. Provide a navigable object which boasts a `rewind` method.
+            Default = False.
+            
+          
+         `block_size` : *(int)*
+             number of samples to read each time the `read` method is called. Default : a block size
+             that represent a window of 10ms, so for a sampling rate of 16000, the default `block_size`
+             is 160, for a rate of 44100, `block_size` = 441, etc.
+        
+        `hop_size` : *(int)*
+            determines the number of overlapping samples between two consecutive read windows. For a
+            `hop_size` of value *N*, the overlap is `block_size` - *N*. Default : `hop_size` = `block_size`,
+            means that there is no overlap.       
+        
+        """
+        
+        for k in kwargs.iterkeys():
+            if not k in ["block_size", "hop_size", "max_time", "record", "audio_source",
+                         "filename", "frames_per_buffer", "data_buffer", "filename", "sampling_rate",
+                         "sample_width", "channels"]:
+                raise ValueError("Invalid argument: {0}".format(k))
+        
+        
+        if kwargs.has_key("block_size"):
+            block_size = kwargs.pop("block_size")
+        else:
+            block_size = None
+        
+        if kwargs.has_key("hop_size"):
+            hop_size = kwargs.pop("hop_size")
+        else:
+            hop_size = None
+        
+        if kwargs.has_key("max_time"):
+            max_time = float(kwargs.pop("max_time"))
+        else:
+            max_time = None
+        
+        if kwargs.has_key("record"):
+            record = kwargs.pop("record")
+        else:
+            record = False
+        
+        # Case 1: an audio source is supplied
+        if kwargs.has_key("audio_source"):
+            if kwargs.has_key("filename") or kwargs.has_key("data_buffer"):
+                raise Warning("You should provide one of 'audio_source', 'filename' or 'data_buffer'\
+                 keyword parameters. 'audio_source' will be used")
+            audio_source = kwargs.pop("audio_source")
+            
+            
+        # Case 2: a file name is supplied
+        elif kwargs.has_key("filename"):
+            if kwargs.has_key("data_buffer"):
+                raise Warning("You should provide one of 'filename' or 'data_buffer'\
+                 keyword parameters. 'filename' will be used")
+            audio_source = from_file(kwargs.pop("filename"))
+            
+            
+        # Case 3: a data_buffer is supplied 
+        elif kwargs.has_key("data_buffer"):
+            audio_source = BufferAudioSource(**kwargs)
+            
+        # Case 4: try to access native audio input
+        else:
+            audio_source = PyAudioSource(**kwargs)
+             
+        # Set default block_size to 10 ms
+        if block_size is None:
+            block_size = audio_source.get_sampling_rate() / 100
+        
+        # Instantiate base AudioDataSource  
+        ads = ADSFactory.AudioDataSource(audio_source=audio_source, block_size=block_size)
+        
+        # Limit data to be read
+        if max_time is not None:
+            ads = ADSFactory.LimiterADS(ads=ads, max_time=max_time)
+        
+        # Record, rewind and reuse data
+        if record:
+            ads = ADSFactory.RecorderADS(ads=ads)
+            
+        # Read overlapping blocks of data
+        if hop_size is not None:
+            if hop_size <= 0 or  hop_size > block_size:
+                raise ValueError("hop_size must be > 0 and <= block_size")
+            if hop_size < block_size:
+                ads = ADSFactory.OverlapADS(ads=ads, hop_size=hop_size)
+        
+        return ads
+        
+        
+    class AudioDataSource(DataSource):
+        
+        def __init__(self, audio_source, block_size):
+            
+            self.audio_source = audio_source
+            self.block_size = block_size
+                
+        def get_block_size(self):
+            return self.block_size
+        
+        def set_block_size(self, size):
+            self.block_size = size
+
+        def get_audio_source(self):
+            return self.audio_source
+        
+        def set_audio_source(self, audio_source):
+            self.audio_source = audio_source
+            
+        def open(self):
+            self.audio_source.open()
+        
+        def close(self):
+            self.audio_source.close()
+            
+        def is_open(self):
+            return self.audio_source.is_open()
+        
+        def get_sampling_rate(self):
+            return self.audio_source.get_sampling_rate()
+        
+        def get_sample_width(self):
+            return self.audio_source.get_sample_width()
+        
+        def get_channels(self):
+            return self.audio_source.get_channels()
+        
+        
+        def rewind(self):
+            if isinstance(self.audio_source, Rewindable):
+                self.audio_source.rewind()
+            else:
+                raise Exception("Audio source is not rewindable")
+            
+            
+        
+        def is_rewindable(self):
+            return isinstance(self.audio_source, Rewindable)
+        
+            
+        def read(self):
+            return self.audio_source.read(self.block_size)
+        
+        
+    
+    
+    class ADSDecorator(AudioDataSource):
+        __metaclass__ = ABCMeta
+        
+        def __init__(self, ads):
+            self.ads = ads
+            
+            self.get_block_size = self.ads.get_block_size
+            self.set_block_size = self.ads.set_block_size
+            self.get_audio_source = self.ads.get_audio_source
+            self.open = self.ads.open
+            self.close = self.ads.close
+            self.is_open = self.ads.is_open
+            self.get_sampling_rate = self.ads.get_sampling_rate
+            self.get_sample_width = self.ads.get_sample_width
+            self.get_channels = self.ads.get_channels
+        
+        
+        def is_rewindable(self):
+            return self.ads.is_rewindable
+            
+        def rewind(self):
+            self.ads.rewind()
+            self._reinit()
+            
+        def set_audio_source(self, audio_source):
+            self.ads.set_audio_source(audio_source)
+            self._reinit()
+        
+        def open(self):
+            if not self.ads.is_open():
+                self.ads.open()
+                self._reinit()
+            
+        
+        @abstractmethod
+        def _reinit(self):
+            pass            
+        
+        
+    class OverlapADS(ADSDecorator):
+        
+        """
+        Read overlapping audio frames
+        """
+        
+        def __init__(self, ads, hop_size):
+            ADSFactory.ADSDecorator.__init__(self, ads)
+            
+            if hop_size <= 0 or hop_size > self.get_block_size():
+                raise ValueError("hop_size must be either 'None' or \
+                 between 1 and block_size (both inclusive)")
+            self.hop_size = hop_size
+            self._actual_block_size = self.get_block_size()
+            self._reinit()
+            
+            
+            def _get_block_size():
+                return self._actual_block_size
+            
+            #self.get_block_size = _get_block_size
+            
+            
+            
+        def _read_first_block(self):
+            # For the first call, we need an entire block of size 'block_size'
+            block = self.ads.read()
+            if block is None:
+                return None
+            
+            # Keep a slice of data in cache and append it in the next call
+            if len(block) > self._hop_size_bytes:
+                self._cache = block[self._hop_size_bytes:]
+            
+            # Up from the next call, we will use '_read_next_blocks'
+            # and we only read 'hop_size'
+            self.ads.set_block_size(self.hop_size)
+            self.read = self._read_next_blocks
+            
+            return block
+                
+        def _read_next_blocks(self):
+            block = self.ads.read()
+            if block is None:
+                return None
+            
+            # Append block to cache data to ensure overlap
+            block = self._cache + block
+            # Keep a slice of data in cache only if we have a full length block
+            # if we don't that means that this is the last block
+            if len(block) == self._block_size_bytes:
+                self._cache = block[self._hop_size_bytes:]
+            else:
+                self._cache = None
+                
+            return block
+                
+                    
+        def read(self):
+            pass
+        
+        def _reinit(self):
+            self._cache = None
+            self.ads.set_block_size(self._actual_block_size)
+            self._hop_size_bytes = self.hop_size * \
+                               self.get_sample_width() * \
+                               self.get_channels()
+            self._block_size_bytes = self.get_block_size() * \
+                               self.get_sample_width() * \
+                               self.get_channels()
+            self.read = self._read_first_block
+     
+    
+    
+    class LimiterADS(ADSDecorator):
+        
+        def __init__(self, ads, max_time):
+            ADSFactory.ADSDecorator.__init__(self, ads)
+            
+            self.max_time = max_time
+            self._reinit()
+            
+        def read(self):
+            if self._total_read_bytes >=  self._max_read_bytes:
+                return None
+            block = self.ads.read()
+            if block is None:
+                return None
+            self._total_read_bytes += len(block)
+            
+            if self._total_read_bytes >=  self._max_read_bytes:
+                self.close()
+            
+            return block
+                
+                
+        def _reinit(self):
+            self._max_read_bytes = int(self.max_time  * self.get_sampling_rate()) * \
+                                  self.get_sample_width() * \
+                                  self.get_channels()
+            self._total_read_bytes = 0
+            
+            
+      
+    
+    class RecorderADS(ADSDecorator):
+        
+        def __init__(self, ads):
+            ADSFactory.ADSDecorator.__init__(self, ads)
+            
+            self._reinit()
+            
+            
+        def read(self):
+            pass
+        
+        
+        def _read_and_rec(self):
+            # Read and save read data
+            block = self.ads.read()
+            if block is not None:
+                self._cache.append(block)
+            
+            return block
+            
+            
+        def _read_simple(self):
+            # Read without recording
+            return self.ads.read()
+            
+        
+        def rewind(self):
+            if self._record:
+                # If has been recording, create a new BufferAudioSource
+                # from recorded data
+                dbuffer = ''.join(self._cache)
+                asource = BufferAudioSource(dbuffer, self.get_sampling_rate(),
+                                             self.get_sample_width(),
+                                             self.get_channels())
+                
+                
+                self.set_audio_source(asource)
+                self.open()
+                self._cache = []
+                self._record = False
+                self.read = self._read_simple
+            
+            else:
+                self.ads.rewind()
+                if not self.is_open():
+                    self.open()
+                    
+        
+        def is_rewindable(self):
+            return True
+        
+        def _reinit(self):
+            # when audio_source is replaced, start recording again
+            self._record = True
+            self._cache = []
+            self.read = self._read_and_rec
+
+
+                
+            
+
+class AudioEnergyValidator(DataValidator):
+    
+    
+    if _WITH_NUMPY:
+        
+        _formats = {1: numpy.int8 , 2: numpy.int16, 4: numpy.int32}
+
+        @staticmethod
+        def _convert(signal, sample_width):
+            return numpy.array(numpy.frombuffer(signal, 
+                               dtype=AudioEnergyValidator._formats[sample_width]),
+                               dtype=numpy.float64)
+                               
+            
+        @staticmethod
+        def _siganl_energy(signal):
+                return float(numpy.dot(signal, signal)) / len(signal)
+        
+        @staticmethod    
+        def _signal_log_energy(signal):
+            energy = AudioEnergyValidator._siganl_energy(signal)
+            if energy <= 0:
+                return -200
+            return 10. * numpy.log10(energy)
+        
+    else:
+        
+        
+        _formats = {1: 'B' , 2: 'H', 4: 'I'}
+        
+        @staticmethod
+        def _convert(signal, sample_width):
+            array("d", array(AudioEnergyValidator._formats[sample_width], signal))
+        
+        @staticmethod
+        def _siganl_energy(signal):
+                energy = 0.
+                for a in signal:
+                    energy += a * a
+                return energy / len(signal)
+        
+        @staticmethod    
+        def _signal_log_energy(signal):
+            energy = AudioEnergyValidator._siganl_energy(signal)
+            if energy <= 0:
+                return -200
+            return 10. * math.log10(energy)
+            
+    
+    def __init__(self, sample_width, energy_threshold=45):
+        
+        self.sample_width = sample_width
+        self._energy_threshold = energy_threshold
+        
+            
+    def is_valid(self, data):
+        signal = AudioEnergyValidator._convert(data, self.sample_width)
+        return AudioEnergyValidator._signal_log_energy(signal) >= self._energy_threshold
+    
+    def get_energy_threshold(self):
+        return self._energy_threshold
+    
+    def set_energy_threshold(self, threshold):
+        self._energy_threshold = threshold
+        
+    
+    
+        
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/demos/audio_tokenize_demo.py	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,50 @@
+"""
+@author: Amine SEHILI <amine.sehili@gmail.com>
+September, 2015
+"""
+
+from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for, dataset
+
+# We set the `record` argument to True so that we can rewind the source
+asource = ADSFactory.ads(filename=dataset.one_to_six_arabic_16000_mono_bc_noise, record=True)
+
+validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=65)
+
+# Defalut analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
+# min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
+# max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
+# max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms 
+tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=400, max_continuous_silence=30)
+
+asource.open()
+tokens = tokenizer.tokenize(asource)
+
+# Play detected regions back
+
+player = player_for(asource)
+
+# Rewind and read the whole signal
+asource.rewind()
+original_signal = []
+
+while True:
+   w = asource.read()
+   if w is None:
+      break
+   original_signal.append(w)
+
+original_signal = ''.join(original_signal)
+
+print("\n ** Playing original file...")
+player.play(original_signal)
+
+print("\n ** playing detected regions...\n")
+for i,t in enumerate(tokens):
+    print("Token [{0}] starts at {1} and ends at {2}".format(i+1, t[1], t[2]))
+    data = ''.join(t[0])
+    player.play(data)
+
+assert len(tokens) == 8
+
+asource.close()
+player.stop()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/demos/audio_trim_demo.py	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,90 @@
+"""
+@author: Amine SEHILI <amine.sehili@gmail.com>
+September, 2015
+"""
+
+# Trim leading and trailing silence from a record
+
+from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for, dataset
+import pyaudio
+
+"""
+The  tokenizer in the following example is set up to remove the silence
+that precedes the first acoustic activity or follows the last activity 
+in a record. It preserves whatever it founds between the two activities.
+In other words, it removes the leading and trailing silence.
+
+Sampling rate is 44100 sample per second, we'll use an analysis window of 100 ms
+(i.e. bloc_ksize == 4410)
+
+Energy threshold is 50.
+
+The tokenizer will start accumulating windows up from the moment it encounters
+the first analysis window of an energy >= 50. ALL the following windows will be 
+kept regardless of their energy. At the end of the analysis, it will drop trailing
+ windows with an energy below 50.
+
+This is an interesting example because the audio file we're analyzing contains a very
+brief noise that occurs within the leading silence. We certainly do want our tokenizer 
+to stop at this point and considers whatever it comes after as a useful signal.
+To force the tokenizer to ignore that brief event we use two other parameters `init_min`
+ans `init_max_silence`. By `init_min`=3 and `init_max_silence`=1 we tell the tokenizer
+that a valid event must start with at least 3 noisy windows, between which there
+is at most 1 silent window.
+
+Still with this configuration we can get the tokenizer detect that noise as a valid event
+(if it actually contains 3 consecutive noisy frames). To circummvent this we use an enough
+large analysis window (here of 100 ms) to ensure that the brief noise be surrounded by a much
+longer silence and hence the energy of the overall analysis window will be below 50.
+
+When using a shorter analysis window (of 10ms for instance, block_size == 441), the brief
+noise contributes more to energy calculation which yields an energy of over 50 for the window.
+Again we can deal with this situation by using a higher energy threshold (55 for example)
+ 
+"""
+
+
+# record = True so that we'll be able to rewind the source.
+asource = ADSFactory.ads(filename=dataset.was_der_mensch_saet_mono_44100_lead_trail_silence,
+          record=True, block_size=4410)
+asource.open()
+
+original_signal = []
+# Read the whole signal
+while True:
+   w = asource.read()
+   if w is None:
+      break
+   original_signal.append(w)
+
+original_signal = ''.join(original_signal)
+
+
+# rewind source
+asource.rewind()
+
+# Create a validator with an energy threshold of 50
+validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=50)
+
+# Create a tokenizer with an unlimited token length and continuous silence within a token
+# Note the DROP_TRAILING_SILENCE mode that will ensure removing trailing silence
+trimmer = StreamTokenizer(validator, min_length = 20, max_length=99999999,
+                          max_continuous_silence=9999999, mode=StreamTokenizer.DROP_TRAILING_SILENCE, init_min=3, init_max_silence=1)
+
+
+tokens = trimmer.tokenize(asource)
+
+# Make sure we only have one token
+assert len(tokens) == 1, "Should have detected one single token"
+
+trimmed_signal = ''.join(tokens[0][0])
+
+player = player_for(asource)
+
+print("\n ** Playing original signal (with leading and trailing silence)...")
+player.play(original_signal)
+print("\n ** Playing trimmed signal...")
+player.play(trimmed_signal)
+
+player.stop()
+asource.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/demos/echo.py	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,36 @@
+
+from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for
+import pyaudio
+import sys
+
+energy_threshold = 45
+duration = 10 # seconds
+
+
+if len(sys.argv) > 1:
+  energy_threshold = float(sys.argv[1])
+
+if len(sys.argv) > 2:
+  duration = float(sys.argv[2])
+
+# record = True so that we'll be able to rewind the source.
+# max_time = 10: read 10 seconds from the microphone
+asource = ADSFactory.ads(record=True, max_time = duration)
+
+validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold = energy_threshold)
+tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=250, max_continuous_silence=30)
+
+player = player_for(asource)
+
+def echo(data, start, end):
+   print("Acoustic activity at: {0}--{1}".format(start, end))
+   player.play(''.join(data))
+
+asource.open()
+
+print("\n  ** Make some noise (dur:{}, energy:{})...".format(duration, energy_threshold))
+
+tokenizer.tokenize(asource, callback=echo)
+
+asource.close()
+player.stop()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/quickstart.rst	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,516 @@
+.. auditok documentation.
+
+auditok, an AUDIo TOKenization module
+=====================================
+
+
+**auditok**  is a module that can be used as a generic tool for data
+tokenization. Although its core motivation is **Acoustic Activity 
+Detection** (AAD) and extraction from audio streams (i.e. detect
+where a noise/an acoustic activity occurs within an audio stream and
+extract the corresponding portion of signal), it can easily be
+adapted to other tasks.
+
+Globally speaking, it can be used to extract, from a sequence of
+observations, all sub-sequences that meet a certain number of
+criteria in terms of:
+
+1. Minimum length of a **valid** token (i.e. sub-sequence)
+2. Maximum length of a valid token
+3. Maximum tolerated consecutive **non-valid** observations within
+   a valid token
+
+Examples of a non-valid observation are: a non-numeric ascii symbol
+if you are interested in sub-sequences of numeric symbols, or a silent
+audio window (of 10, 20 or 100 milliseconds for instance) if what
+interests you are audio regions made up of a sequence of "noisy"
+windows (whatever kind of noise: speech, baby cry, laughter, etc.).
+
+The most important component of `auditok` is the `StreamTokenizer` class.
+An instance of this class encapsulates a `DataValidator` and can be 
+configured to detect the desired regions from a stream.
+The `auditok.core.StreamTokenizer.tokenize` method accepts a `DataSource`
+object that has a `read` method. Read data can be of any type accepted
+by the `validator`.
+
+
+As the main aim of this module is **Audio Activity Detection**,
+it provides the `auditok.util.ADSFactory` factory class that makes
+it very easy to create an `AudioDataSource` (a class that implements `DataSource`)
+object, be that from:
+
+- A file on the disk
+- A buffer of data
+- The built-in microphone (requires PyAudio)
+ 
+
+The `AudioDataSource` class inherits from `DataSource` and supplies
+a higher abstraction level than `AudioSource` thanks to a bunch of
+handy features:
+
+- Define a fixed-length of block_size (i.e. analysis window)
+- Allow overlap between two consecutive analysis windows (hop_size < block_size).
+   This can be very important if your validator use the **spectral** 
+   information of audio data instead of raw audio samples.
+- Limit the amount (i.e. duration) of read data (very useful when reading
+   data from the microphone)
+- Record and rewind data (also useful if you read data from the microphone
+   and you want to process it many times offline and/or save it)  
+
+
+Last but not least, the current version has only one audio window validator based on
+signal energy.
+
+Requirements
+============
+
+`auditok` requires `Pyaudio <http://people.csail.mit.edu/hubert/pyaudio/>`_
+for audio acquisition and playback.
+
+
+Illustrative examples with strings
+==================================
+
+Let us look at some examples using the `auditok.util.StringDataSource` class
+created for test and illustration purposes. Imagine that each character of 
+`auditok.util.StringDataSource` data represent an audio slice of 100 ms for
+example. In the following examples we will use upper case letters to represent
+noisy audio slices (i.e. analysis windows or frames) and lower case letter for
+silent frames.
+
+
+Extract sub-sequences of consecutive upper case letters
+-------------------------------------------------------
+
+We want to extract sub-sequences of characters that have:
+    
+- A minimu length of 1 (`min_length` = 1)
+- A maximum length of 9999 (`max_length` = 9999)
+- Zero consecutive lower case characters within them (`max_continuous_silence` = 0)
+
+We also create the `UpperCaseChecker` whose `read` method returns `True` if the 
+checked character is in upper case and `False` otherwise. 
+
+.. code:: python
+      
+    from auditok import StreamTokenizer, StringDataSource, DataValidator
+    
+    class UpperCaseChecker(DataValidator):
+       def is_valid(self, frame):
+          return frame.isupper()
+    
+    dsource = StringDataSource("aaaABCDEFbbGHIJKccc")
+    tokenizer = StreamTokenizer(validator=UpperCaseChecker(), 
+                 min_length=1, max_length=9999, max_continuous_silence=0)
+                 
+    tokenizer.tokenize(dsource)
+
+The output is a list of two tuples, each contains the extracted sub-sequence and its
+start and end position in the original sequence respectively:
+
+    
+    [(['A', 'B', 'C', 'D', 'E', 'F'], 3, 8), (['G', 'H', 'I', 'J', 'K'], 11, 15)]
+    
+Tolerate up to two non-valid (lower case) letter within an extracted sequence
+-----------------------------------------------------------------------------
+
+To do so, we set `max_continuous_silence` =2:
+
+.. code:: python
+
+
+    from auditok import StreamTokenizer, StringDataSource, DataValidator
+    
+    class UpperCaseChecker(DataValidator):
+       def is_valid(self, frame):
+          return frame.isupper()
+    
+    dsource = StringDataSource("aaaABCDbbEFcGHIdddJKee")
+    tokenizer = StreamTokenizer(validator=UpperCaseChecker(), 
+                 min_length=1, max_length=9999, max_continuous_silence=2)
+                 
+    tokenizer.tokenize(dsource)
+
+
+output:
+
+.. code:: python
+  
+    [(['A', 'B', 'C', 'D', 'b', 'b', 'E', 'F', 'c', 'G', 'H', 'I', 'd', 'd'], 3, 16), (['J', 'K', 'e', 'e'], 18, 21)]
+    
+Notice the trailing lower case letters "dd" and "ee" at the end of the two
+tokens. The default behavior of `StreamTokenizer` is to keep the *trailing
+silence* if it does'nt exceed `max_continuous_silence`. This can be changed
+using the `DROP_TRAILING_SILENCE` mode (see next example).
+
+Remove trailing silence
+-----------------------
+
+Trailing silence can be useful for many sound recognition applications, including
+speech recognition. Moreover, from the human auditory system point of view, trailing
+low energy signal helps removing abrupt signal cuts.
+
+If you want to remove it anyway, you can do it by setting `mode` to `StreamTokenizer.DROP_TRAILING_SILENCE`:
+
+.. code:: python
+
+    from auditok import StreamTokenizer, StringDataSource, DataValidator
+    
+    class UpperCaseChecker(DataValidator):
+       def is_valid(self, frame):
+          return frame.isupper()
+    
+    dsource = StringDataSource("aaaABCDbbEFcGHIdddJKee")
+    tokenizer = StreamTokenizer(validator=UpperCaseChecker(), 
+                 min_length=1, max_length=9999, max_continuous_silence=2,
+                 mode=StreamTokenizer.DROP_TRAILING_SILENCE)
+                 
+    tokenizer.tokenize(dsource)
+
+output:
+
+.. code:: python
+
+    [(['A', 'B', 'C', 'D', 'b', 'b', 'E', 'F', 'c', 'G', 'H', 'I'], 3, 14), (['J', 'K'], 18, 19)]
+
+
+Limit the length of detected tokens
+-----------------------------------
+
+Imagine that you just want to detect and recognize a small part of a long
+acoustic event (e.g. engine noise, water flow, etc.) and avoid that that 
+event hogs the tokenizer and prevent it from feeding the event to the next
+processing step (i.e. a sound recognizer). You can do this by:
+
+ - limiting the length of a detected token.
+ 
+ and
+ 
+ - using a callback function as an argument to `StreamTokenizer.tokenize`
+   so that the tokenizer delivers a token as soon as it is detected.
+
+The following code limits the length of a token to 5:
+
+.. code:: python
+    
+    from auditok import StreamTokenizer, StringDataSource, DataValidator
+    
+    class UpperCaseChecker(DataValidator):
+       def is_valid(self, frame):
+          return frame.isupper()
+    
+    dsource = StringDataSource("aaaABCDEFGHIJKbbb")
+    tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
+                 min_length=1, max_length=5, max_continuous_silence=0)
+                 
+    def print_token(data, start, end):
+        print("token = '{0}', starts at {1}, ends at {2}".format(''.join(data), start, end))
+                 
+    tokenizer.tokenize(dsource, callback=print_token)
+    
+
+output:
+
+    "token = 'ABCDE', starts at 3, ends at 7"
+    "token = 'FGHIJ', starts at 8, ends at 12"
+    "token = 'K', starts at 13, ends at 13"
+
+
+Using real audio data
+=====================
+
+In this section we will use `ADSFactory`, `AudioEnergyValidator` and `StreamTokenizer`
+for an AAD demonstration using audio data. Before we get any, further it is worth
+explaining a certain number of points.
+
+`ADSFactory.ads` method is called to create an `AudioDataSource` object that can be
+passed to  `StreamTokenizer.tokenize`. `ADSFactory.ads` accepts a number of keyword
+arguments, of which none is mandatory. The returned `AudioDataSource` object can 
+however greatly differ depending on the passed arguments. Further details can be found
+in the respective method documentation. Note however the following two calls that will
+create an `AudioDataSource` that read data from an audio file and from the built-in
+microphone respectively.
+
+.. code:: python
+    
+    from auditok import ADSFactory
+    
+    # Get an AudioDataSource from a file
+    file_ads = ADSFactory.ads(filename = "path/to/file/")
+    
+    # Get an AudioDataSource from the built-in microphone
+    # The returned object has the default values for sampling
+    # rate, sample width an number of channels. see method's
+    # documentation for customized values 
+    mic_ads = ADSFactory.ads()
+    
+For `StreamTkenizer`, parameters `min_length`, `max_length` and `max_continuous_silence`
+are expressed in term of number of frames. If you want a `max_length` of *2 seconds* for
+your detected sound events and your *analysis window* is *10 ms* long, you have to specify
+a `max_length` of 200 (`int(2. / (10. / 1000)) == 200`). For a `max_continuous_silence` of *300 ms*
+for instance, the value to pass to StreamTokenizer is 30 (`int(0.3 / (10. / 1000)) == 30`).
+
+
+Where do you get the size of the **analysis window** from?
+
+
+Well this is a parameter you pass to `ADSFactory.ads`. By default `ADSFactory.ads` uses
+an analysis window of 10 ms. the number of samples that 10 ms of signal contain will
+vary depending on the sampling rate of your audio source (file, microphone, etc.).
+For a sampling rate of 16KHz (16000 samples per second), we have 160 samples for 10 ms.
+Therefore you can use block sizes of 160, 320, 1600 for analysis windows of 10, 20 and 100 
+ms respectively.
+
+.. code:: python
+    
+    from auditok import ADSFactory
+    
+    file_ads = ADSFactory.ads(filename = "path/to/file/", block_size = 160)
+    
+    file_ads = ADSFactory.ads(filename = "path/to/file/", block_size = 320)
+    
+    # If no sampling rate is specified, ADSFactory use 16KHz as the default
+    # rate for the microphone. If you want to use a window of 100 ms, use 
+    # a block size of 1600 
+    mic_ads = ADSFactory.ads(block_size = 1600)
+    
+So if your not sure what you analysis windows in seconds is, use the following:
+
+.. code:: python
+    
+    my_ads = ADSFactory.ads(...)
+    analysis_win_seconds = float(my_ads.get_block_size()) / my_ads.get_sampling_rate()
+    analysis_window_ms = analysis_win_seconds * 1000
+    
+    # For a `max_continuous_silence` of 300 ms use:
+    max_continuous_silence = int(300. / analysis_window_ms)
+    
+    # Which is the same as
+    max_continuous_silence = int(0.3 / (analysis_window_ms / 1000))
+    
+    
+Examples
+--------
+
+Extract isolated phrases from an utterance
+------------------------------------------
+
+We will build an `AudioDataSource` using a wave file from  the database.
+The file contains of isolated pronunciation of digits from 1 to 1
+in Arabic as well as breath-in/out between 2 and 3. The code will play the
+original file then the detected sounds separately. Note that we use an 
+`energy_threshold` of 65, this parameter should be carefully chosen. It depends
+on microphone quality, background noise and the amplitude of events you want to 
+detect.
+
+.. code:: python
+
+    from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for, dataset
+     
+    # We set the `record` argument to True so that we can rewind the source
+    asource = ADSFactory.ads(filename=dataset.one_to_six_arabic_16000_mono_bc_noise, record=True)
+     
+    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=65)
+    
+    # Defalut analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
+    # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
+    # max_length=4000 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
+    # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms 
+    tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=400, max_continuous_silence=30)
+    
+    asource.open()
+    tokens = tokenizer.tokenize(asource)
+    
+    # Play detected regions back
+    
+    player = player_for(asource)
+    
+    # Rewind and read the whole signal
+    asource.rewind()
+    original_signal = []
+
+    while True:
+       w = asource.read()
+       if w is None:
+          break
+       original_signal.append(w)
+       
+    original_signal = ''.join(original_signal)
+    
+    print("Playing the original file...")
+    player.play(original_signal)
+    
+    print("playing detected regions...")
+    for t in tokens:
+        print("Token starts at {0} and ends at {1}".format(t[1], t[2]))
+        data = ''.join(t[0])
+        player.play(data)
+        
+    assert len(tokens) == 8
+    
+
+The tokenizer extracts 8 audio regions from the signal, including all isolated digits
+(from 1 to 6) as well as the 2-phase respiration of the subject. You might have noticed
+that, in the original file, the last three digit are closer to each other than the 
+previous ones. If you wan them to be extracted as one single phrase, you can do so
+by tolerating a larger continuous silence within a detection:
+ 
+.. code:: python
+    
+    tokenizer.max_continuous_silence = 50
+    asource.rewind()
+    tokens = tokenizer.tokenize(asource)
+    
+    for t in tokens:
+       print("Token starts at {0} and ends at {1}".format(t[1], t[2]))
+       data = ''.join(t[0])
+       player.play(data)
+    
+    assert len(tokens) == 6
+        
+         
+Trim leading and trailing silence
+---------------------------------
+ 
+The  tokenizer in the following example is set up to remove the silence
+that precedes the first acoustic activity or follows the last activity 
+in a record. It preserves whatever it founds between the two activities.
+In other words, it removes the leading and trailing silence.
+
+Sampling rate is 44100 sample per second, we'll use an analysis window of 100 ms
+(i.e. bloc_ksize == 4410)
+
+Energy threshold is 50.
+
+The tokenizer will start accumulating windows up from the moment it encounters
+the first analysis window of an energy >= 50. ALL the following windows will be 
+kept regardless of their energy. At the end of the analysis, it will drop trailing
+windows with an energy below 50.
+
+This is an interesting example because the audio file we're analyzing contains a very
+brief noise that occurs within the leading silence. We certainly do want our tokenizer 
+to stop at this point and considers whatever it comes after as a useful signal.
+To force the tokenizer to ignore that brief event we use two other parameters `init_min`
+ans `init_max_silence`. By `init_min` = 3 and `init_max_silence` = 1 we tell the tokenizer
+that a valid event must start with at least 3 noisy windows, between which there
+is at most 1 silent window.
+
+Still with this configuration we can get the tokenizer detect that noise as a valid event
+(if it actually contains 3 consecutive noisy frames). To circummvent this we use an enough
+large analysis window (here of 100 ms) to ensure that the brief noise be surrounded by a much
+longer silence and hence the energy of the overall analysis window will be below 50.
+
+When using a shorter analysis window (of 10ms for instance, block_size == 441), the brief
+noise contributes more to energy calculation which yields an energy of over 50 for the window.
+Again we can deal with this situation by using a higher energy threshold (55 for example).
+
+.. code:: python
+
+    from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for, dataset
+    import pyaudio
+
+    # record = True so that we'll be able to rewind the source.
+    asource = ADSFactory.ads(filename=dataset.was_der_mensch_saet_mono_44100_lead_trail_silence,
+             record=True, block_size=4410)
+    asource.open()
+
+    original_signal = []
+    # Read the whole signal
+    while True:
+       w = asource.read()
+       if w is None:
+          break
+       original_signal.append(w)
+    
+    original_signal = ''.join(original_signal)
+    
+    # rewind source
+    asource.rewind()
+    
+    # Create a validator with an energy threshold of 50
+    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=50)
+    
+    # Create a tokenizer with an unlimited token length and continuous silence within a token
+    # Note the DROP_TRAILING_SILENCE mode that will ensure removing trailing silence
+    trimmer = StreamTokenizer(validator, min_length = 20, max_length=99999999, init_min=3, init_max_silence=1, max_continuous_silence=9999999, mode=StreamTokenizer.DROP_TRAILING_SILENCE)
+    
+    
+    tokens = trimmer.tokenize(asource)
+    
+    # Make sure we only have one token
+    assert len(tokens) == 1, "Should have detected one single token"
+    
+    trimmed_signal = ''.join(tokens[0][0])
+    
+    player = player_for(asource)
+    
+    print("Playing original signal (with leading and trailing silence)...")
+    player.play(original_signal)
+    print("Playing trimmed signal...")
+    player.play(trimmed_signal)
+    
+
+Online audio signal processing
+------------------------------
+
+In the next example, audio data is directely acquired from the built-in microphone.
+The `tokenize` method is passed a callback function so that audio activities
+are delivered as soon as they are detected. Each detected activity is played
+back using the build-in audio output device.
+
+As mentionned before , Signal energy is strongly related to many factors such
+microphone sensitivity, background noise (including noise inherent to the hardware), 
+distance and your operating system sound settings. Try a lower `energy_threshold`
+if your noise does not seem to be detected and a higher threshold if you notice
+an over detection (echo method prints a detection where you have made no noise).
+
+.. code:: python
+
+    from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for
+    import pyaudio
+     
+    # record = True so that we'll be able to rewind the source.
+    # max_time = 10: read 10 seconds from the microphone
+    asource = ADSFactory.ads(record=True, max_time=10)
+    
+    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=50)
+    tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=250, max_continuous_silence=30)
+    
+    player = player_for(asource)
+    
+    def echo(data, start, end):
+       print("Acoustic activity at: {0}--{1}".format(start, end))
+       player.play(''.join(data))
+       
+    asource.open()
+    
+    tokenizer.tokenize(asource, callback=echo)
+
+If you want to re-run the tokenizer after changing of one or many parameters, use the following code:
+
+.. code:: python
+
+    asource.rewind()
+    # change energy threshold for example
+    tokenizer.validator.set_energy_threshold(55)
+    tokenizer.tokenize(asource, callback=echo)
+
+In case you want to play the whole recorded signal back use:
+
+.. code:: python
+
+    player.play(asource.get_audio_source().get_data_buffer())
+    
+
+Contributing
+============
+**auditok** is on `GitHub <https://github.com/amsehili/auditok>`_. You're welcome to fork it and contribute.
+
+
+Amine SEHILI <amine.sehili[_at_]gmail.com>
+September 2015
+
+License
+=======
+
+This package is published under GNU GPL Version 3.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/setup.py	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,49 @@
+import re
+import ast
+from setuptools import setup
+
+
+_version_re = re.compile(r'__version__\s+=\s+(.*)')
+
+with open('auditok/__init__.py', 'rb') as f:
+    version = str(ast.literal_eval(_version_re.search(
+        f.read().decode('utf-8')).group(1)))
+
+
+setup(
+    name='auditok',
+    version=version,
+    url='http://github.com/amsehili/auditok/',
+    license='GNU General Public License v3 (GPLv3)',
+    author='Amine Sehili',
+    author_email='amine.sehili@gmail.com',
+    description='A module for Audio/Acoustic Activity Detection',
+    long_description= open('quickstart.rst').read().decode('utf-8'),
+    packages=['auditok'],
+    include_package_data=True,
+    package_data={'auditok': ['data/*']},
+
+    #data_files=[(['README.md', 'quickstart.rst', 'LICENSE', 'INSTALL', 'CHANGELOG']),
+    #            ('share/doc/pdoc', ['doc/pdoc/index.html']),
+    #           ],
+
+    zip_safe=False,
+    platforms='ANY',
+    provides=['auditok'],
+    requires=['PyAudio'],
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Environment :: Console',
+        'Intended Audience :: Science/Research',
+        'Intended Audience :: Developers',
+        'Intended Audience :: Information Technology',
+        'Intended Audience :: Telecommunications Industry',
+        'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 2.7',
+        'Topic :: Multimedia :: Sound/Audio :: Analysis',
+        'Topic :: Scientific/Engineering :: Information Analysis'
+    ],
+
+)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_AudioDataSourceFactory.py	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,571 @@
+'''
+@author: Amine Sehili <amine.sehili@gmail.com>
+September 2015
+
+'''
+
+import unittest
+from auditok import dataset, ADSFactory, BufferAudioSource, WaveAudioSource
+import wave
+from Crypto.Cipher.AES import block_size
+
+
+class TestADSFactoryFileAudioSource(unittest.TestCase):
+    
+    def setUp(self):
+        self.audio_source = WaveAudioSource(filename=dataset.one_to_six_arabic_16000_mono_bc_noise)
+    
+    
+    def test_ADS_type(self):
+        
+        ads = ADSFactory.ads(audio_source=self.audio_source)
+        
+        self.assertIsInstance(ads, ADSFactory.AudioDataSource,
+                              msg="wrong type for ads object, expected: 'ADSFactory.AudioDataSource', found: {0}".format(type(ads)))
+        
+        
+    def test_default_block_size(self):
+        ads = ADSFactory.ads(audio_source=self.audio_source)
+        
+        size = ads.get_block_size()
+        self.assertEqual(size, 160, "Wrong default block_size, expected: 160, found: {0}".format(size))
+        
+        
+    def test_block_size(self):
+        ads = ADSFactory.ads(audio_source=self.audio_source, block_size=512)
+        
+        size = ads.get_block_size()
+        self.assertEqual(size, 512, "Wrong block_size, expected: 512, found: {0}".format(size))
+    
+    def test_sampling_rate(self):
+        ads = ADSFactory.ads(audio_source=self.audio_source)
+        
+        srate = ads.get_sampling_rate()
+        self.assertEqual(srate, 16000, "Wrong sampling rate, expected: 16000, found: {0}".format(srate))
+        
+    def test_sample_width(self):
+        ads = ADSFactory.ads(audio_source=self.audio_source)
+        
+        swidth = ads.get_sample_width()
+        self.assertEqual(swidth, 2, "Wrong sample width, expected: 2, found: {0}".format(swidth))
+    
+    def test_channels(self):
+        ads = ADSFactory.ads(audio_source=self.audio_source)
+        
+        channels = ads.get_channels()
+        self.assertEqual(channels, 1, "Wrong number of channels, expected: 1, found: {0}".format(channels))
+        
+    def test_read(self):
+        ads = ADSFactory.ads(audio_source=self.audio_source, block_size = 256)
+        
+        ads.open()
+        ads_data = ads.read()
+        ads.close()
+        
+        audio_source = WaveAudioSource(filename=dataset.one_to_six_arabic_16000_mono_bc_noise)
+        audio_source.open()
+        audio_source_data = audio_source.read(256)
+        audio_source.close()
+        
+        self.assertEqual(ads_data, audio_source_data, "Unexpected data read from ads")
+    
+    def test_Limiter_Deco_type(self):
+        ads = ADSFactory.ads(audio_source=self.audio_source, max_time=1)
+        
+        self.assertIsInstance(ads, ADSFactory.LimiterADS,
+                              msg="wrong type for ads object, expected: 'ADSFactory.LimiterADS', found: {0}".format(type(ads)))
+         
+    
+    def test_Limiter_Deco_read(self):
+        # read a maximum of 0.75 seconds from audio source
+        ads = ADSFactory.ads(audio_source=self.audio_source, max_time=0.75)
+        
+        ads_data = []
+        ads.open()
+        while True:
+            block = ads.read()
+            if block is None:
+                break
+            ads_data.append(block)
+        ads.close()
+        ads_data = ''.join(ads_data)    
+                    
+        audio_source = WaveAudioSource(filename=dataset.one_to_six_arabic_16000_mono_bc_noise)
+        audio_source.open()
+        audio_source_data = audio_source.read(int(16000 * 0.75))
+        audio_source.close()
+        
+        self.assertEqual(ads_data, audio_source_data, "Unexpected data read from LimiterADS")
+        
+        
+    def test_Limiter_Deco_read_limit(self):
+        # read a maximum of 1.25 seconds from audio source
+        ads = ADSFactory.ads(audio_source=self.audio_source, max_time=1.191)
+        
+        # desired duration into bytes is obtained by:
+        # max_time * sampling_rate * sample_width * nb_channels
+        # Limiter deco tries to a total quantity of data as
+        # possible to the desired duration in bytes.   
+        # It reads N block of size block_size where:
+        # (N - 1) * block_size < desired duration, AND
+        # N * block_size >= desired duration
+        
+        # theoretical size to reach          
+        expected_size = int(ads.get_sampling_rate() * 1.191) * \
+                       ads.get_sample_width() * ads.get_channels()
+        
+        
+        # how much data are required to get N blocks of size block_size
+        block_size_bytes = ads.get_block_size() * ads.get_sample_width() * ads.get_channels()
+        r = expected_size % block_size_bytes
+        if r > 0:
+            expected_size += block_size_bytes - r
+        
+        total_read = 0
+        ads.open()
+        i = 0
+        while True:
+            block = ads.read()
+            if block is None:
+                break
+            i += 1
+            total_read += len(block)
+        
+        ads.close()
+            
+        self.assertEqual(total_read, expected_size, "Wrong data length read from LimiterADS, expected: {0}, found: {1}".format(expected_size, total_read))
+        
+        
+        
+    def test_Recorder_Deco_type(self):
+        ads = ADSFactory.ads(audio_source=self.audio_source, record=True)
+        
+        self.assertIsInstance(ads, ADSFactory.RecorderADS,
+                              msg="wrong type for ads object, expected: 'ADSFactory.RecorderADS', found: {0}".format(type(ads)))
+         
+        
+    def test_Recorder_Deco_read(self):
+        ads = ADSFactory.ads(audio_source=self.audio_source, record=True, block_size=500)
+        
+        ads_data = []
+        ads.open()
+        for i in xrange(10):
+            block = ads.read()
+            if block is None:
+                break
+            ads_data.append(block)
+        ads.close()
+        ads_data = ''.join(ads_data)    
+                    
+        audio_source = WaveAudioSource(filename=dataset.one_to_six_arabic_16000_mono_bc_noise)
+        audio_source.open()
+        audio_source_data = audio_source.read(500 * 10)
+        audio_source.close()
+        
+        self.assertEqual(ads_data, audio_source_data, "Unexpected data read from RecorderADS")
+        
+    def test_Recorder_Deco_is_rewindable(self):
+        ads = ADSFactory.ads(audio_source=self.audio_source, record=True)
+        
+        self.assertTrue(ads.is_rewindable(), "RecorderADS.is_rewindable should return True")
+        
+    
+    def test_Recorder_Deco_rewind(self):
+        ads = ADSFactory.ads(audio_source=self.audio_source, record=True, block_size = 320)
+        
+        ads.open()
+        ads.read()
+        ads.rewind()
+        
+        
+        self.assertIsInstance(ads.get_audio_source(), 
+                              BufferAudioSource, "After rewind RecorderADS.get_audio_source should \
+                              be an instance of BufferAudioSource")
+        ads.close()
+        
+        
+    def test_Recorder_Deco_rewind_and_read(self):
+        ads = ADSFactory.ads(audio_source=self.audio_source, record=True, block_size = 320)
+        
+        ads.open()
+        for i in xrange(10):
+            ads.read()
+            
+        ads.rewind()
+        
+        # read all available data after rewind
+        ads_data = []
+        while True:
+            block = ads.read()
+            if block is None:
+                break
+            ads_data.append(block)
+        ads.close()
+        ads_data = ''.join(ads_data)    
+                    
+        audio_source = WaveAudioSource(filename=dataset.one_to_six_arabic_16000_mono_bc_noise)
+        audio_source.open()
+        audio_source_data = audio_source.read(320 * 10)
+        audio_source.close()
+        
+        self.assertEqual(ads_data, audio_source_data, "Unexpected data read from RecorderADS")
+    
+    def test_Overlap_Deco_type(self):
+        # an OverlapADS is obtained if a valid hop_size is given
+        ads = ADSFactory.ads(audio_source=self.audio_source, block_size = 256, hop_size = 128)
+        
+        self.assertIsInstance(ads, ADSFactory.OverlapADS,
+                              msg="wrong type for ads object, expected: 'ADSFactory.OverlapADS', found: {0}".format(type(ads)))
+         
+        
+        
+    
+    def test_Overlap_Deco_read(self):
+        
+        # Use arbitrary valid block_size and hop_size
+        block_size = 1714
+        hop_size = 313
+        
+        ads = ADSFactory.ads(audio_source=self.audio_source, block_size=block_size, hop_size=hop_size)
+        
+        # Read all available data overlapping blocks
+        ads.open()
+        ads_data = []
+        while True:
+            block = ads.read()
+            if block is None:
+                break
+            ads_data.append(block)
+        ads.close()
+        
+        # Read all data from file and build a BufferAudioSource
+        fp = wave.open(dataset.one_to_six_arabic_16000_mono_bc_noise, "r")
+        wave_data = fp.readframes(fp.getnframes())
+        fp.close()
+        audio_source = BufferAudioSource(wave_data, ads.get_sampling_rate(),
+                                         ads.get_sample_width(), ads.get_channels())
+        audio_source.open()
+        
+        # Compare all blocks read from OverlapADS to those read
+        # from an audio source with a manual set_position
+        for i,block in enumerate(ads_data):
+            
+            tmp = audio_source.read(block_size)
+            
+            self.assertEqual(block, tmp, "Unexpected block (N={0}) read from OverlapADS".format(i))
+            
+            audio_source.set_position((i+1) * hop_size)
+        
+        audio_source.close()
+    
+    
+            
+            
+    def test_Limiter_Overlap_Deco_type(self):
+        ads = ADSFactory.ads(audio_source=self.audio_source, max_time=1, block_size = 256, hop_size = 128)
+        
+        self.assertIsInstance(ads, ADSFactory.OverlapADS,
+                            msg="wrong type for ads object, expected: 'ADSFactory.OverlapADS', found: {0}".format(type(ads)))
+         
+        
+        self.assertIsInstance(ads.ads, ADSFactory.LimiterADS,
+                              msg="wrong type for ads object, expected: 'ADSFactory.LimiterADS', found: {0}".format(type(ads)))
+           
+        
+        
+    def test_Limiter_Overlap_Deco_read(self):    
+        
+        block_size = 256
+        hop_size = 200
+        
+        ads = ADSFactory.ads(audio_source=self.audio_source, max_time=0.50, block_size=block_size, hop_size=hop_size)
+        
+        # Read all available data overlapping blocks
+        ads.open()
+        ads_data = []
+        while True:
+            block = ads.read()
+            if block is None:
+                break
+            ads_data.append(block)
+        ads.close()
+        
+        # Read all data from file and build a BufferAudioSource
+        fp = wave.open(dataset.one_to_six_arabic_16000_mono_bc_noise, "r")
+        wave_data = fp.readframes(fp.getnframes())
+        fp.close()
+        audio_source = BufferAudioSource(wave_data, ads.get_sampling_rate(),
+                                         ads.get_sample_width(), ads.get_channels())
+        audio_source.open()
+        
+        # Compare all blocks read from OverlapADS to those read
+        # from an audio source with a manual set_position
+        for i,block in enumerate(ads_data):            
+            tmp = audio_source.read(block_size)
+            
+            self.assertEqual(block, tmp, "Unexpected block (N={0}) read from OverlapADS".format(i))
+            
+            audio_source.set_position((i+1) * hop_size)
+        
+        audio_source.close()
+    
+        
+        
+    def test_Limiter_Overlap_Deco_read_limit(self):
+        
+        block_size = 313
+        hop_size = 207
+        ads = ADSFactory.ads(audio_source=self.audio_source,
+                             max_time=1.932, block_size=block_size,
+                             hop_size=hop_size)
+        
+        # Limiter + Overlap decos => read N block of actual data
+        # one block of size block_size
+        # N - 1 blocks of size hop_size
+        # the total size of read data might be a slightly greater
+        # than the required size calculated from max_time
+        
+        # theoretical size to reach          
+        expected_size = int(ads.get_sampling_rate() * 1.932) * \
+                       ads.get_sample_width() * ads.get_channels()
+        
+        # minus block_size
+        expected_size -= (block_size * ads.get_sample_width() * ads.get_channels())
+        
+        # how much data are required to get N - 1 blocks of size hop_size
+        hop_size_bytes = hop_size * ads.get_sample_width() * ads.get_channels()
+        r = expected_size % hop_size_bytes
+        if r > 0:
+            expected_size += hop_size_bytes - r
+        
+        expected_size += block_size * ads.get_sample_width() * ads.get_channels()
+        
+        cache_size = (block_size - hop_size) * ads.get_sample_width() * ads.get_channels()
+        total_read = cache_size
+        
+        ads.open()
+        i = 0
+        while True:
+            block = ads.read()
+            if block is None:
+                break
+            i += 1
+            total_read += len(block) - cache_size
+        
+        ads.close()
+        self.assertEqual(total_read, expected_size, "Wrong data length read from LimiterADS, expected: {0}, found: {1}".format(expected_size, total_read))
+        
+        
+        
+    def test_Recorder_Overlap_Deco_type(self):
+        ads = ADSFactory.ads(audio_source=self.audio_source, block_size=256, hop_size=128, record=True)
+        
+        self.assertIsInstance(ads, ADSFactory.OverlapADS,
+                            msg="wrong type for ads object, expected: 'ADSFactory.OverlapADS', found: {0}".format(type(ads)))
+         
+        
+        self.assertIsInstance(ads.ads, ADSFactory.RecorderADS,
+                              msg="wrong type for ads object, expected: 'ADSFactory.RecorderADS', found: {0}".format(type(ads)))
+               
+    
+        
+    def test_Recorder_Overlap_Deco_is_rewindable(self):
+        ads = ADSFactory.ads(audio_source=self.audio_source, block_size=320, hop_size=160, record=True)
+        self.assertTrue(ads.is_rewindable(), "RecorderADS.is_rewindable should return True")
+        
+
+    def test_Recorder_Overlap_Deco_rewind_and_read(self):
+        
+        # Use arbitrary valid block_size and hop_size
+        block_size = 1600
+        hop_size = 400
+        
+        ads = ADSFactory.ads(audio_source=self.audio_source, block_size=block_size, hop_size=hop_size, record=True)
+        
+        # Read all available data overlapping blocks
+        ads.open()
+        i = 0
+        while True:
+            block = ads.read()
+            if block is None:
+                break
+            i += 1
+        
+        ads.rewind()
+        
+        # Read all data from file and build a BufferAudioSource
+        fp = wave.open(dataset.one_to_six_arabic_16000_mono_bc_noise, "r")
+        wave_data = fp.readframes(fp.getnframes())
+        fp.close()
+        audio_source = BufferAudioSource(wave_data, ads.get_sampling_rate(),
+                                         ads.get_sample_width(), ads.get_channels())
+        audio_source.open()
+        
+        # Compare all blocks read from OverlapADS to those read
+        # from an audio source with a manual set_position
+        for j in xrange(i):
+            
+            tmp = audio_source.read(block_size)
+            
+            self.assertEqual(ads.read(), tmp, "Unexpected block (N={0}) read from OverlapADS".format(i))
+            audio_source.set_position((j+1) * hop_size)
+        
+        ads.close()
+        audio_source.close()
+    
+    
+    def test_Limiter_Recorder_Overlap_Deco_rewind_and_read(self):
+        
+        # Use arbitrary valid block_size and hop_size
+        block_size = 1600
+        hop_size = 400
+        
+        ads = ADSFactory.ads(audio_source=self.audio_source, max_time = 1.50, block_size=block_size, hop_size=hop_size, record=True)
+        
+        # Read all available data overlapping blocks
+        ads.open()
+        i = 0
+        while True:
+            block = ads.read()
+            if block is None:
+                break
+            i += 1
+        
+        ads.rewind()
+        
+        # Read all data from file and build a BufferAudioSource
+        fp = wave.open(dataset.one_to_six_arabic_16000_mono_bc_noise, "r")
+        wave_data = fp.readframes(fp.getnframes())
+        fp.close()
+        audio_source = BufferAudioSource(wave_data, ads.get_sampling_rate(),
+                                         ads.get_sample_width(), ads.get_channels())
+        audio_source.open()
+        
+        # Compare all blocks read from OverlapADS to those read
+        # from an audio source with a manual set_position
+        for j in xrange(i):
+            
+            tmp = audio_source.read(block_size)
+            
+            self.assertEqual(ads.read(), tmp, "Unexpected block (N={0}) read from OverlapADS".format(i))
+            audio_source.set_position((j+1) * hop_size)
+        
+        ads.close()
+        audio_source.close()
+    
+    
+    def test_Limiter_Recorder_Overlap_Deco_rewind_and_read_limit(self):
+        
+        # Use arbitrary valid block_size and hop_size
+        block_size = 1000
+        hop_size = 200
+        
+        ads = ADSFactory.ads(audio_source=self.audio_source, max_time = 1.317, block_size=block_size, hop_size=hop_size, record=True)
+        
+        # Limiter + Overlap decos => read N block of actual data
+        # one block of size block_size
+        # N - 1 blocks of size hop_size
+        # the total size of read data might be a slightly greater
+        # than the required size calculated from max_time
+        
+        # theoretical size to reach          
+        expected_size = int(ads.get_sampling_rate() * 1.317) * \
+                       ads.get_sample_width() * ads.get_channels()
+        
+        # minus block_size
+        expected_size -= (block_size * ads.get_sample_width() * ads.get_channels())
+        
+        # how much data are required to get N - 1 blocks of size hop_size
+        hop_size_bytes = hop_size * ads.get_sample_width() * ads.get_channels()
+        r = expected_size % hop_size_bytes
+        if r > 0:
+            expected_size += hop_size_bytes - r
+        
+        expected_size += block_size * ads.get_sample_width() * ads.get_channels()
+        
+        cache_size = (block_size - hop_size) * ads.get_sample_width() * ads.get_channels()
+        total_read = cache_size
+        
+        ads.open()
+        i = 0
+        while True:
+            block = ads.read()
+            if block is None:
+                break
+            i += 1
+            total_read += len(block) - cache_size
+        
+        ads.close()
+        self.assertEqual(total_read, expected_size, "Wrong data length read from LimiterADS, expected: {0}, found: {1}".format(expected_size, total_read))
+        
+class TestADSFactoryBufferAudioSource(unittest.TestCase):
+    
+    def setUp(self):
+        self.signal = "ABCDEFGHIJKLMNOPQRSTUVWXYZ012345"
+        self.ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16,
+                             sample_width=2, channels=1)
+        
+    def test_ADS_BAS_type(self):
+        self.assertIsInstance(self.ads.get_audio_source(), 
+                              BufferAudioSource, "ads should \
+                              be an instance of BufferAudioSource")
+    
+    def test_ADS_BAS_sampling_rate(self):
+        srate = self.ads.get_sampling_rate()
+        self.assertEqual(srate, 16, "Wrong sampling rate, expected: 16000, found: {0}".format(srate))
+      
+        
+    def test_ADS_BAS_get_sample_width(self):
+        swidth = self.ads.get_sample_width()
+        self.assertEqual(swidth, 2, "Wrong sample width, expected: 2, found: {0}".format(swidth))
+    
+    def test_ADS_BAS_get_channels(self):
+        channels = self.ads.get_channels()
+        self.assertEqual(channels, 1, "Wrong number of channels, expected: 1, found: {0}".format(channels))
+        
+    
+    def test_Limiter_Recorder_Overlap_Deco_rewind_and_read(self):
+        
+        # Use arbitrary valid block_size and hop_size
+        block_size = 5
+        hop_size = 4
+        
+        ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16,
+                             sample_width=2, channels=1, max_time = 0.80,
+                             block_size=block_size, hop_size=hop_size,
+                             record=True)
+        
+        # Read all available data overlapping blocks
+        ads.open()
+        i = 0
+        while True:
+            block = ads.read()
+            if block is None:
+                break
+            i += 1
+        
+        ads.rewind()
+        
+        # Build a BufferAudioSource
+        audio_source = BufferAudioSource(self.signal, ads.get_sampling_rate(),
+                        ads.get_sample_width(), ads.get_channels())
+        audio_source.open()
+        
+        # Compare all blocks read from OverlapADS to those read
+        # from an audio source with a manual set_position
+        for j in xrange(i):
+            
+            tmp = audio_source.read(block_size)
+            
+            block = ads.read()
+            
+            self.assertEqual(block, tmp, "Unexpected block (N={0}) read from OverlapADS".format(i))
+            audio_source.set_position((j+1) * hop_size)
+        
+        ads.close()
+        audio_source.close()
+    
+
+if __name__ == "__main__":
+    #import sys;sys.argv = ['', 'Test.testName']
+    unittest.main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_StreamTokenizer.py	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,503 @@
+'''
+@author: Amine Sehili <amine.sehili@gmail.com>
+September 2015
+
+'''
+
+import unittest
+from auditok import StreamTokenizer, StringDataSource, DataValidator
+
+
+class AValidator(DataValidator):
+    
+    def is_valid(self, frame):
+        return frame == "A"
+
+
+class TestStreamTokenizerInitParams(unittest.TestCase):
+    
+    
+    def setUp(self):
+        self.A_validator = AValidator()
+        
+    # Completely deactivate init_min and init_max_silence
+    # The tokenizer will only rely on the other parameters
+    # Note that if init_min = 0, the value of init_max_silence
+    # will have no effect
+    def test_init_min_0_init_max_silence_0(self):
+        
+        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=20,
+                                     max_continuous_silence=4, init_min = 0,
+                                     init_max_silence = 0, mode=0)
+        
+        
+        data_source = StringDataSource("aAaaaAaAaaAaAaaaaaaaAAAAAAAA")
+        #                            ^              ^   ^      ^
+        #                            2              16  20     27
+        tokens = tokenizer.tokenize(data_source)
+                
+        self.assertEqual(len(tokens), 2, msg="wrong number of tokens, expected: 2, found: {0} ".format(len(tokens)))
+        tok1, tok2 = tokens[0], tokens[1]
+        
+        # tok1[0]: data
+        # tok1[1]: start frame (included)
+        # tok1[2]: end frame (included)
+        
+        data = ''.join(tok1[0])
+        start = tok1[1]
+        end = tok1[2]
+        self.assertEqual(data, "AaaaAaAaaAaAaaaa",
+                        msg="wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', found: {0} ".format(data))
+        self.assertEqual(start, 1, msg="wrong start frame for token 1, expected: 1, found: {0} ".format(start))
+        self.assertEqual(end, 16, msg="wrong end frame for token 1, expected: 16, found: {0} ".format(end))
+        
+        
+        data = ''.join(tok2[0])
+        start = tok2[1]
+        end = tok2[2]
+        self.assertEqual(data, "AAAAAAAA",
+                        msg="wrong data for token 1, expected: 'AAAAAAAA', found: {0} ".format(data))
+        self.assertEqual(start, 20, msg="wrong start frame for token 2, expected: 20, found: {0} ".format(start))
+        self.assertEqual(end, 27, msg="wrong end frame for token 2, expected: 27, found: {0} ".format(end))
+    
+    
+        
+    # A valid token is considered iff the tokenizer encounters
+    # at least valid frames (init_min = 3) between witch there
+    # are at most 0 consecutive non valid frames (init_max_silence = 0)
+    # The tokenizer will only rely on the other parameters
+    # In other words, a valid token must start with 3 valid frames
+    def test_init_min_3_init_max_silence_0(self):
+        
+        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=20,
+                                     max_continuous_silence=4, init_min = 3,
+                                     init_max_silence = 0, mode=0)
+        
+        
+        #data_source = StringDataSource("aAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaAAAAA")
+        #                                             ^       ^     ^   ^
+        #                                             18      26    32  36
+        
+        data_source = StringDataSource("aAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaaAAAAA")
+        #                                             ^           ^  ^   ^
+        #                                             18          30 33  37
+        
+        tokens = tokenizer.tokenize(data_source)
+                
+        self.assertEqual(len(tokens), 2, msg="wrong number of tokens, expected: 2, found: {0} ".format(len(tokens)))
+        tok1, tok2 = tokens[0], tokens[1]
+        
+        data = ''.join(tok1[0])
+        start = tok1[1]
+        end = tok1[2]
+        self.assertEqual(data, "AAAAAAAAAaaaa",
+                        msg="wrong data for token 1, expected: 'AAAAAAAAAaaaa', found: '{0}' ".format(data))
+        self.assertEqual(start, 18, msg="wrong start frame for token 1, expected: 18, found: {0} ".format(start))
+        self.assertEqual(end, 30, msg="wrong end frame for token 1, expected: 30, found: {0} ".format(end))
+        
+        
+        data = ''.join(tok2[0])
+        start = tok2[1]
+        end = tok2[2]
+        self.assertEqual(data, "AAAAA",
+                        msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(data))
+        self.assertEqual(start, 33, msg="wrong start frame for token 2, expected: 33, found: {0} ".format(start))
+        self.assertEqual(end, 37, msg="wrong end frame for token 2, expected: 37, found: {0} ".format(end))
+        
+    
+    # A valid token is considered iff the tokenizer encounters
+    # at least valid frames (init_min = 3) between witch there
+    # are at most 2 consecutive non valid frames (init_max_silence = 2)
+    def test_init_min_3_init_max_silence_2(self):
+        
+        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=20,
+                                     max_continuous_silence=4, init_min = 3,
+                                     init_max_silence = 2, mode=0)
+        
+        
+        data_source = StringDataSource("aAaaaAaAaaAaAaaaaaaAAAAAAAAAaaaaaaaAAAAA")
+        #                                ^          ^  ^           ^   ^   ^
+        #                                5          16 19          31  35  39
+        tokens = tokenizer.tokenize(data_source)
+                
+        self.assertEqual(len(tokens), 3, msg="wrong number of tokens, expected: 3, found: {0} ".format(len(tokens)))
+        tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]
+        
+        
+        data = ''.join(tok1[0])
+        start = tok1[1]
+        end = tok1[2]
+        self.assertEqual(data, "AaAaaAaAaaaa",
+                        msg="wrong data for token 1, expected: 'AaAaaAaA', found: '{0}' ".format(data))
+        self.assertEqual(start, 5, msg="wrong start frame for token 1, expected: 5, found: {0} ".format(start))
+        self.assertEqual(end, 16, msg="wrong end frame for token 1, expected: 16, found: {0} ".format(end))
+        
+        
+        data = ''.join(tok2[0])
+        start = tok2[1]
+        end = tok2[2]
+        self.assertEqual(data, "AAAAAAAAAaaaa",
+                        msg="wrong data for token 2, expected: 'AAAAAAAAAaaaa', found: '{0}' ".format(data))
+        self.assertEqual(start, 19, msg="wrong start frame for token 2, expected: 19, found: {0} ".format(start))
+        self.assertEqual(end, 31, msg="wrong end frame for token 2, expected: 31, found: {0} ".format(end))
+        
+        
+        data = ''.join(tok3[0])
+        start = tok3[1]
+        end = tok3[2]
+        self.assertEqual(data, "AAAAA",
+                        msg="wrong data for token 3, expected: 'AAAAA', found: '{0}' ".format(data))
+        self.assertEqual(start, 35, msg="wrong start frame for token 2, expected: 35, found: {0} ".format(start))
+        self.assertEqual(end, 39, msg="wrong end frame for token 2, expected: 39, found: {0} ".format(end))    
+               
+        
+    
+class TestStreamTokenizerMinMaxLength(unittest.TestCase):
+  
+    def setUp(self):
+        self.A_validator = AValidator()
+    
+    
+    def test_min_length_6_init_max_length_20(self):
+    
+        tokenizer = StreamTokenizer(self.A_validator, min_length = 6, max_length=20,
+                                     max_continuous_silence=2, init_min = 3,
+                                     init_max_silence = 3, mode=0)
+        
+        
+        data_source = StringDataSource("aAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaAAAAA")
+        #                            ^            ^   ^         ^
+        #                            1            14  18        28
+        
+        tokens = tokenizer.tokenize(data_source)
+                
+        self.assertEqual(len(tokens), 2, msg="wrong number of tokens, expected: 2, found: {0} ".format(len(tokens)))
+        tok1, tok2 = tokens[0], tokens[1]
+        
+        
+        data = ''.join(tok1[0])
+        start = tok1[1]
+        end = tok1[2]
+        self.assertEqual(data, "AaaaAaAaaAaAaa",
+                        msg="wrong data for token 1, expected: 'AaaaAaAaaAaAaa', found: '{0}' ".format(data))
+        self.assertEqual(start, 1, msg="wrong start frame for token 1, expected: 1, found: {0} ".format(start))
+        self.assertEqual(end, 14, msg="wrong end frame for token 1, expected: 14, found: {0} ".format(end))
+        
+        
+        data = ''.join(tok2[0])
+        start = tok2[1]
+        end = tok2[2]
+        self.assertEqual(data, "AAAAAAAAAaa",
+                        msg="wrong data for token 2, expected: 'AAAAAAAAAaa', found: '{0}' ".format(data))
+        self.assertEqual(start, 18, msg="wrong start frame for token 2, expected: 18, found: {0} ".format(start))
+        self.assertEqual(end, 28, msg="wrong end frame for token 2, expected: 28, found: {0} ".format(end))
+    
+    
+    def test_min_length_1_init_max_length_1(self):
+    
+        tokenizer = StreamTokenizer(self.A_validator, min_length = 1, max_length=1,
+                                     max_continuous_silence=0, init_min = 0,
+                                     init_max_silence = 0, mode=0)
+        
+        
+        data_source = StringDataSource("AAaaaAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaAAAAA")
+        
+        tokens = tokenizer.tokenize(data_source)
+                        
+        self.assertEqual(len(tokens), 21, msg="wrong number of tokens, expected: 21, found: {0} ".format(len(tokens)))
+        
+        
+    def test_min_length_10_init_max_length_20(self):
+    
+        tokenizer = StreamTokenizer(self.A_validator, min_length = 10, max_length=20,
+                                     max_continuous_silence=4, init_min = 3,
+                                     init_max_silence = 3, mode=0)
+        
+        
+        data_source = StringDataSource("aAaaaAaAaaAaAaaaaaaAAAAAaaaaaaAAAAAaaAAaaAAA")
+        #                            ^              ^             ^            ^
+        #                            1              16            30           45
+        
+        tokens = tokenizer.tokenize(data_source)
+                
+        self.assertEqual(len(tokens), 2, msg="wrong number of tokens, expected: 2, found: {0} ".format(len(tokens)))
+        tok1, tok2 = tokens[0], tokens[1]
+        
+        
+        data = ''.join(tok1[0])
+        start = tok1[1]
+        end = tok1[2]
+        self.assertEqual(data, "AaaaAaAaaAaAaaaa",
+                        msg="wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', found: '{0}' ".format(data))
+        self.assertEqual(start, 1, msg="wrong start frame for token 1, expected: 1, found: {0} ".format(start))
+        self.assertEqual(end, 16, msg="wrong end frame for token 1, expected: 16, found: {0} ".format(end))
+        
+        
+        data = ''.join(tok2[0])
+        start = tok2[1]
+        end = tok2[2]
+        self.assertEqual(data, "AAAAAaaAAaaAAA",
+                        msg="wrong data for token 2, expected: 'AAAAAaaAAaaAAA', found: '{0}' ".format(data))
+        self.assertEqual(start, 30, msg="wrong start frame for token 2, expected: 30, found: {0} ".format(start))
+        self.assertEqual(end, 43, msg="wrong end frame for token 2, expected: 43, found: {0} ".format(end))
+    
+    
+        
+    def test_min_length_4_init_max_length_5(self):
+    
+        tokenizer = StreamTokenizer(self.A_validator, min_length = 4, max_length=5,
+                                     max_continuous_silence=4, init_min = 3,
+                                     init_max_silence = 3, mode=0)
+        
+        
+        data_source = StringDataSource("aAaaaAaAaaAaAaaaaaAAAAAAAAaaaaaaAAAAAaaaaaAAaaAaa")
+        #                                             ^   ^^   ^    ^   ^     ^   ^
+        #                                             18 2223  27   32  36    42  46
+        
+        tokens = tokenizer.tokenize(data_source)
+               
+        self.assertEqual(len(tokens), 4, msg="wrong number of tokens, expected: 4, found: {0} ".format(len(tokens)))
+        tok1, tok2, tok3, tok4 = tokens[0], tokens[1], tokens[2], tokens[3]
+        
+        
+        data = ''.join(tok1[0])
+        start = tok1[1]
+        end = tok1[2]
+        self.assertEqual(data, "AAAAA",
+                        msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(data))
+        self.assertEqual(start, 18, msg="wrong start frame for token 1, expected: 18, found: {0} ".format(start))
+        self.assertEqual(end, 22, msg="wrong end frame for token 1, expected: 22, found: {0} ".format(end))
+        
+        
+        data = ''.join(tok2[0])
+        start = tok2[1]
+        end = tok2[2]
+        self.assertEqual(data, "AAAaa",
+                        msg="wrong data for token 1, expected: 'AAAaa', found: '{0}' ".format(data))
+        self.assertEqual(start, 23, msg="wrong start frame for token 1, expected: 23, found: {0} ".format(start))
+        self.assertEqual(end, 27, msg="wrong end frame for token 1, expected: 27, found: {0} ".format(end))
+        
+        
+        data = ''.join(tok3[0])
+        start = tok3[1]
+        end = tok3[2]
+        self.assertEqual(data, "AAAAA",
+                        msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(data))
+        self.assertEqual(start, 32, msg="wrong start frame for token 1, expected: 1, found: {0} ".format(start))
+        self.assertEqual(end, 36, msg="wrong end frame for token 1, expected: 7, found: {0} ".format(end))
+        
+        
+        data = ''.join(tok4[0])
+        start = tok4[1]
+        end = tok4[2]
+        self.assertEqual(data, "AAaaA",
+                        msg="wrong data for token 2, expected: 'AAaaA', found: '{0}' ".format(data))
+        self.assertEqual(start, 42, msg="wrong start frame for token 2, expected: 17, found: {0} ".format(start))
+        self.assertEqual(end, 46, msg="wrong end frame for token 2, expected: 22, found: {0} ".format(end))
+        
+        
+class TestStreamTokenizerMaxContinuousSilence(unittest.TestCase):
+    
+    def setUp(self):
+        self.A_validator = AValidator()
+    
+    
+    def test_min_5_max_10_max_continuous_silence_0(self):
+
+        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=10,
+                                    max_continuous_silence=0, init_min = 3,
+                                    init_max_silence = 3, mode=0)
+        
+        data_source = StringDataSource("aaaAAAAAaAAAAAAaaAAAAAAAAAa")
+        #                              ^   ^ ^    ^  ^       ^
+        #                              3   7 9   14 17      25
+        
+        tokens = tokenizer.tokenize(data_source)
+                
+        self.assertEqual(len(tokens), 3, msg="wrong number of tokens, expected: 3, found: {0} ".format(len(tokens)))
+        tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]
+        
+        
+        data = ''.join(tok1[0])
+        start = tok1[1]
+        end = tok1[2]
+        self.assertEqual(data, "AAAAA",
+                        msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(data))
+        self.assertEqual(start, 3, msg="wrong start frame for token 1, expected: 3, found: {0} ".format(start))
+        self.assertEqual(end, 7, msg="wrong end frame for token 1, expected: 7, found: {0} ".format(end))
+        
+        
+        data = ''.join(tok2[0])
+        start = tok2[1]
+        end = tok2[2]
+        self.assertEqual(data, "AAAAAA",
+                        msg="wrong data for token 1, expected: 'AAAAAA', found: '{0}' ".format(data))
+        self.assertEqual(start, 9, msg="wrong start frame for token 1, expected: 9, found: {0} ".format(start))
+        self.assertEqual(end, 14, msg="wrong end frame for token 1, expected: 14, found: {0} ".format(end))
+        
+        
+        data = ''.join(tok3[0])
+        start = tok3[1]
+        end = tok3[2]
+        self.assertEqual(data, "AAAAAAAAA",
+                        msg="wrong data for token 1, expected: 'AAAAAAAAA', found: '{0}' ".format(data))
+        self.assertEqual(start, 17, msg="wrong start frame for token 1, expected: 17, found: {0} ".format(start))
+        self.assertEqual(end, 25, msg="wrong end frame for token 1, expected: 25, found: {0} ".format(end))
+        
+        
+        
+        
+    def test_min_5_max_10_max_continuous_silence_1(self):
+
+        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=10,
+                                    max_continuous_silence=1, init_min = 3,
+                                    init_max_silence = 3, mode=0)
+        
+        data_source = StringDataSource("aaaAAAAAaAAAAAAaaAAAAAAAAAa")
+        #                              ^        ^^ ^ ^        ^
+        #                              3       12131517      26
+        #                                     (12 13 15 17)
+        
+        tokens = tokenizer.tokenize(data_source)
+                
+        self.assertEqual(len(tokens), 3, msg="wrong number of tokens, expected: 3, found: {0} ".format(len(tokens)))
+        tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]
+        
+        
+        data = ''.join(tok1[0])
+        start = tok1[1]
+        end = tok1[2]
+        self.assertEqual(data, "AAAAAaAAAA",
+                        msg="wrong data for token 1, expected: 'AAAAAaAAAA', found: '{0}' ".format(data))
+        self.assertEqual(start, 3, msg="wrong start frame for token 1, expected: 3, found: {0} ".format(start))
+        self.assertEqual(end, 12, msg="wrong end frame for token 1, expected: 10, found: {0} ".format(end))
+        
+        
+        data = ''.join(tok2[0])
+        start = tok2[1]
+        end = tok2[2]
+        self.assertEqual(data, "AAa",
+                        msg="wrong data for token 1, expected: 'AAa', found: '{0}' ".format(data))
+        self.assertEqual(start, 13, msg="wrong start frame for token 1, expected: 9, found: {0} ".format(start))
+        self.assertEqual(end, 15, msg="wrong end frame for token 1, expected: 14, found: {0} ".format(end))
+        
+        
+        data = ''.join(tok3[0])
+        start = tok3[1]
+        end = tok3[2]
+        self.assertEqual(data, "AAAAAAAAAa",
+                        msg="wrong data for token 1, expected: 'AAAAAAAAAa', found: '{0}' ".format(data))
+        self.assertEqual(start, 17, msg="wrong start frame for token 1, expected: 17, found: {0} ".format(start))
+        self.assertEqual(end, 26, msg="wrong end frame for token 1, expected: 26, found: {0} ".format(end))
+        
+        
+class TestStreamTokenizerModes(unittest.TestCase):
+    
+    def setUp(self):
+        self.A_validator = AValidator()
+    
+    def test_STRICT_MIN_LENGTH(self):
+        
+        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=8,
+                                    max_continuous_silence=3, init_min = 3,
+                                    init_max_silence = 3, mode=StreamTokenizer.STRICT_MIN_LENGTH)
+        
+        data_source = StringDataSource("aaAAAAAAAAAAAA")
+        #                             ^      ^
+        #                             2      9
+        
+        tokens = tokenizer.tokenize(data_source)
+                
+        self.assertEqual(len(tokens), 1, msg="wrong number of tokens, expected: 1, found: {0} ".format(len(tokens)))
+        tok1 = tokens[0]
+        
+        
+        data = ''.join(tok1[0])
+        start = tok1[1]
+        end = tok1[2]
+        self.assertEqual(data, "AAAAAAAA",
+                        msg="wrong data for token 1, expected: 'AAAAAAAA', found: '{0}' ".format(data))
+        self.assertEqual(start, 2, msg="wrong start frame for token 1, expected: 2, found: {0} ".format(start))
+        self.assertEqual(end, 9, msg="wrong end frame for token 1, expected: 9, found: {0} ".format(end))
+    
+    
+    def test_DROP_TRAILING_SILENCE(self):
+        
+        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=10,
+                                    max_continuous_silence=2, init_min = 3,
+                                    init_max_silence = 3, mode=StreamTokenizer.DROP_TRAILING_SILENCE)
+        
+        data_source = StringDataSource("aaAAAAAaaaaa")
+        #                             ^   ^
+        #                             2   6
+        
+        tokens = tokenizer.tokenize(data_source)
+                
+        self.assertEqual(len(tokens), 1, msg="wrong number of tokens, expected: 1, found: {0} ".format(len(tokens)))
+        tok1 = tokens[0]
+        
+        
+        data = ''.join(tok1[0])
+        start = tok1[1]
+        end = tok1[2]
+        self.assertEqual(data, "AAAAA",
+                        msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".format(data))
+        self.assertEqual(start, 2, msg="wrong start frame for token 1, expected: 2, found: {0} ".format(start))
+        self.assertEqual(end, 6, msg="wrong end frame for token 1, expected: 6, found: {0} ".format(end))
+        
+        
+    def test_STRICT_MIN_LENGTH_and_DROP_TRAILING_SILENCE(self):
+        
+        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=8,
+                                    max_continuous_silence=3, init_min = 3,
+                                    init_max_silence = 3, mode=StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE)
+        
+        data_source = StringDataSource("aaAAAAAAAAAAAAaa")
+        #                             ^      ^
+        #                             2      8
+        
+        tokens = tokenizer.tokenize(data_source)
+                
+        self.assertEqual(len(tokens), 1, msg="wrong number of tokens, expected: 1, found: {0} ".format(len(tokens)))
+        tok1 = tokens[0]
+        
+        
+        data = ''.join(tok1[0])
+        start = tok1[1]
+        end = tok1[2]
+        self.assertEqual(data, "AAAAAAAA",
+                        msg="wrong data for token 1, expected: 'AAAAAAAA', found: '{0}' ".format(data))
+        self.assertEqual(start, 2, msg="wrong start frame for token 1, expected: 2, found: {0} ".format(start))
+        self.assertEqual(end, 9, msg="wrong end frame for token 1, expected: 9, found: {0} ".format(end))
+        
+    
+class TestStreamTokenizerCallback(unittest.TestCase):
+    
+    def setUp(self):
+        self.A_validator = AValidator()
+    
+    def test_callback(self):
+        
+        tokens = []
+        
+        def callback(data, start, end):
+            tokens.append((data, start, end))
+            
+        
+        tokenizer = StreamTokenizer(self.A_validator, min_length = 5, max_length=8,
+                                    max_continuous_silence=3, init_min = 3,
+                                    init_max_silence = 3, mode=0)
+        
+        data_source = StringDataSource("aaAAAAAAAAAAAAa")
+        #                             ^      ^^   ^
+        #                             2      910  14
+        
+        tokenizer.tokenize(data_source, callback=callback)
+        
+        self.assertEqual(len(tokens), 2, msg="wrong number of tokens, expected: 1, found: {0} ".format(len(tokens)))
+        
+
+
+if __name__ == "__main__":
+    #import sys;sys.argv = ['', 'Test.testName']
+    unittest.main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_audio_source.py	Thu Sep 17 22:01:30 2015 +0200
@@ -0,0 +1,508 @@
+'''
+@author: Amine Sehili <amine.sehili@gmail.com>
+September 2015
+
+'''
+import unittest
+
+from auditok import BufferAudioSource
+
+
+class TestBufferAudioSource_SR10_SW1_CH1(unittest.TestCase):
+    
+    
+    def setUp(self):
+        self.signal = "ABCDEFGHIJKLMNOPQRSTUVWXYZ012345"
+        self.audio_source = BufferAudioSource(data_buffer=self.signal,
+                                         sampling_rate=10, sample_width=1, channels=1)
+        self.audio_source.open()
+        
+    def tearDown(self):
+        self.audio_source.close()
+    
+
+
+    def test_sr10_sw1_ch1_read_1(self):
+        
+        block = self.audio_source.read(1)
+        self.assertEqual(block, "A", msg="wrong block, expected: 'A', found: {0} ".format(block))
+    
+    
+    def test_sr10_sw1_ch1_read_6(self):
+        
+        block = self.audio_source.read(6)
+        self.assertEqual(block, "ABCDEF", msg="wrong block, expected: 'ABCDEF', found: {0} ".format(block))
+        
+    
+    def test_sr10_sw1_ch1_read_multiple(self):
+        
+        block = self.audio_source.read(1)
+        self.assertEqual(block, "A", msg="wrong block, expected: 'A', found: {0} ".format(block))
+                         
+        block = self.audio_source.read(6)
+        self.assertEqual(block, "BCDEFG", msg="wrong block, expected: 'BCDEFG', found: {0} ".format(block))
+        
+        block = self.audio_source.read(13)
+        self.assertEqual(block, "HIJKLMNOPQRST", msg="wrong block, expected: 'HIJKLMNOPQRST', found: {0} ".format(block))
+        
+        block = self.audio_source.read(9999)
+        self.assertEqual(block, "UVWXYZ012345", msg="wrong block, expected: 'UVWXYZ012345', found: {0} ".format(block))
+        
+    
+    def test_sr10_sw1_ch1_read_all(self):
+        block = self.audio_source.read(9999)
+        self.assertEqual(block, self.signal, msg="wrong block, expected: {0}, found: {1} ".format(self.signal, block))
+        
+        block = self.audio_source.read(1)
+        self.assertEqual(block, None, msg="wrong block, expected: {0}, found: {1} ".format(None, block))
+        
+    
+    def test_sr10_sw1_ch1_get_sampling_rate(self):
+        
+        srate = self.audio_source.get_sampling_rate()
+        self.assertEqual(srate, 10, msg="wrong sampling rate, expected: 10, found: {0} ".format(srate))
+    
+    
+    def test_sr10_sw1_ch1_get_sample_width(self):
+        
+        swidth = self.audio_source.get_sample_width()
+        self.assertEqual(swidth, 1, msg="wrong sample width, expected: 1, found: {0} ".format(swidth))
+        
+    
+    def test_sr10_sw1_ch1_get_channels(self):
+        
+        channels = self.audio_source.get_channels()
+        self.assertEqual(channels, 1, msg="wrong number of channels, expected: 1, found: {0} ".format(channels))
+    
+    
+    def test_sr10_sw1_ch1_get_position_0(self):
+        
+        pos = self.audio_source.get_position()
+        self.assertEqual(pos, 0, msg="wrong position, expected: 0, found: {0} ".format(pos))
+    
+    def test_sr10_sw1_ch1_get_position_5(self):
+        
+        self.audio_source.read(5)
+        pos = self.audio_source.get_position()
+        self.assertEqual(pos, 5, msg="wrong position, expected: 5, found: {0} ".format(pos))
+    
+    def test_sr10_sw1_ch1_get_position_25(self):
+                
+        self.audio_source.read(5)
+        self.audio_source.read(20)
+        
+        pos = self.audio_source.get_position()
+        self.assertEqual(pos, 25, msg="wrong position, expected: 5, found: {0} ".format(pos))
+        
+    
+    def test_sr10_sw1_ch1_set_position_0(self):
+        
+        self.audio_source.read(10)
+        self.audio_source.set_position(0)
+        pos = self.audio_source.get_position()
+        self.assertEqual(pos, 0, msg="wrong position, expected: 0, found: {0} ".format(pos))
+    
+    
+    def test_sr10_sw1_ch1_set_position_10(self):
+        
+        self.audio_source.set_position(10)
+        pos = self.audio_source.get_position()
+        self.assertEqual(pos, 10, msg="wrong position, expected: 10, found: {0} ".format(pos))
+    
+        
+    def test_sr10_sw1_ch1_get_time_position_0(self):
+        
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 0.0, msg="wrong time position, expected: 0.0, found: {0} ".format(tp))
+    
+    def test_sr10_sw1_ch1_get_time_position_1(self):
+        
+        srate = self.audio_source.get_sampling_rate()
+        # read one second
+        self.audio_source.read(srate)
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 1.0, msg="wrong time position, expected: 1.0, found: {0} ".format(tp))
+    
+    
+    def test_sr10_sw1_ch1_get_time_position_2_5(self):
+        
+        # read 2.5 seconds
+        self.audio_source.read(25)
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 2.5, msg="wrong time position, expected: 2.5, found: {0} ".format(tp))
+        
+    
+    def test_sr10_sw1_ch1_set_time_position_0(self):
+        
+        self.audio_source.read(10)
+        self.audio_source.set_time_position(0)
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 0.0, msg="wrong time position, expected: 0.0, found: {0} ".format(tp))
+        
+    
+    def test_sr10_sw1_ch1_set_time_position_1(self):
+
+        self.audio_source.set_time_position(1)
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 1.0, msg="wrong time position, expected: 1.0, found: {0} ".format(tp))
+    
+    def test_sr10_sw1_ch1_set_time_position_end(self):
+
+        self.audio_source.set_time_position(100)
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 3.2, msg="wrong time position, expected: 3.2, found: {0} ".format(tp))
+    
+    def test_sr10_sw1_ch1_rewind(self):
+        
+        self.audio_source.read(10)
+        self.audio_source.rewind()
+        tp = self.audio_source.get_position()
+        self.assertEqual(tp, 0, msg="wrong position, expected: 0.0, found: {0} ".format(tp))
+        
+    def test_sr10_sw1_ch1_set_data(self):
+        self.audio_source.set_data("12345")
+        block = self.audio_source.read(9999)
+        self.assertEqual(block, "12345", msg="wrong block, expected: '12345', found: {0} ".format(block))
+        
+    
+    def test_sr10_sw1_ch1_read_closed(self):
+        self.audio_source.close()
+        with self.assertRaises(Exception):
+            self.audio_source.read(1)
+            
+    
+
+class TestBufferAudioSource_SR16_SW2_CH1(unittest.TestCase):
+    
+    
+    def setUp(self):
+        self.signal = "ABCDEFGHIJKLMNOPQRSTUVWXYZ012345"
+        self.audio_source = BufferAudioSource(data_buffer=self.signal,
+                                         sampling_rate=16, sample_width=2, channels=1)
+        self.audio_source.open()
+        
+    def tearDown(self):
+        self.audio_source.close()
+    
+
+
+    def test_sr16_sw2_ch1_read_1(self):
+        
+        block = self.audio_source.read(1)
+        self.assertEqual(block, "AB", msg="wrong block, expected: 'AB', found: {0} ".format(block))
+    
+    
+    def test_sr16_sw2_ch1_read_6(self):
+        
+        block = self.audio_source.read(6)
+        self.assertEqual(block, "ABCDEFGHIJKL", msg="wrong block, expected: 'ABCDEFGHIJKL', found: {0} ".format(block))
+        
+    
+    def test_sr16_sw2_ch1_read_multiple(self):
+        
+        block = self.audio_source.read(1)
+        self.assertEqual(block, "AB", msg="wrong block, expected: 'AB', found: {0} ".format(block))
+                         
+        block = self.audio_source.read(6)
+        self.assertEqual(block, "CDEFGHIJKLMN", msg="wrong block, expected: 'CDEFGHIJKLMN', found: {0} ".format(block))
+        
+        block = self.audio_source.read(5)
+        self.assertEqual(block, "OPQRSTUVWX", msg="wrong block, expected: 'OPQRSTUVWX', found: {0} ".format(block))
+        
+        block = self.audio_source.read(9999)
+        self.assertEqual(block, "YZ012345", msg="wrong block, expected: 'YZ012345', found: {0} ".format(block))
+        
+    
+    def test_sr16_sw2_ch1_read_all(self):
+        block = self.audio_source.read(9999)
+        self.assertEqual(block, self.signal, msg="wrong block, expected: {0}, found: {1} ".format(self.signal, block))
+        
+        block = self.audio_source.read(1)
+        self.assertEqual(block, None, msg="wrong block, expected: {0}, found: {1} ".format(None, block))
+        
+    
+    def test_sr16_sw2_ch1_get_sampling_rate(self):
+        
+        srate = self.audio_source.get_sampling_rate()
+        self.assertEqual(srate, 16, msg="wrong sampling rate, expected: 10, found: {0} ".format(srate))
+    
+    
+    def test_sr16_sw2_ch1_get_sample_width(self):
+        
+        swidth = self.audio_source.get_sample_width()
+        self.assertEqual(swidth, 2, msg="wrong sample width, expected: 1, found: {0} ".format(swidth))
+        
+    
+    def test_sr16_sw2_ch1_get_channels(self):
+        
+        channels = self.audio_source.get_channels()
+        self.assertEqual(channels, 1, msg="wrong number of channels, expected: 1, found: {0} ".format(channels))
+    
+    
+    def test_sr16_sw2_ch1_get_position_0(self):
+        
+        pos = self.audio_source.get_position()
+        self.assertEqual(pos, 0, msg="wrong position, expected: 0, found: {0} ".format(pos))
+    
+    def test_sr16_sw2_ch1_get_position_5(self):
+        
+        self.audio_source.read(5)
+        pos = self.audio_source.get_position()
+        self.assertEqual(pos, 5, msg="wrong position, expected: 5, found: {0} ".format(pos))
+    
+    def test_sr16_sw2_ch1_get_position_15(self):
+                
+        self.audio_source.read(5)
+        self.audio_source.read(10)
+        
+        pos = self.audio_source.get_position()
+        self.assertEqual(pos, 15, msg="wrong position, expected: 5, found: {0} ".format(pos))
+        
+    
+    def test_sr16_sw2_ch1_set_position_0(self):
+        
+        self.audio_source.read(10)
+        self.audio_source.set_position(0)
+        pos = self.audio_source.get_position()
+        self.assertEqual(pos, 0, msg="wrong position, expected: 0, found: {0} ".format(pos))
+    
+    
+    def test_sr16_sw2_ch1_set_position_10(self):
+        
+        self.audio_source.set_position(10)
+        pos = self.audio_source.get_position()
+        self.assertEqual(pos, 10, msg="wrong position, expected: 10, found: {0} ".format(pos))
+    
+        
+    def test_sr16_sw2_ch1_get_time_position_0(self):
+        
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 0.0, msg="wrong time position, expected: 0.0, found: {0} ".format(tp))
+    
+    def test_sr16_sw2_ch1_get_time_position_1(self):
+        
+        srate = self.audio_source.get_sampling_rate()
+        # read one second
+        self.audio_source.read(srate)
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 1.0, msg="wrong time position, expected: 1.0, found: {0} ".format(tp))
+    
+    
+    def test_sr16_sw2_ch1_get_time_position_0_75(self):
+        
+        # read 2.5 seconds
+        self.audio_source.read(12)
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 0.75, msg="wrong time position, expected: 0.75, found: {0} ".format(tp))
+        
+    
+    def test_sr16_sw2_ch1_set_time_position_0(self):
+        
+        self.audio_source.read(10)
+        self.audio_source.set_time_position(0)
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 0.0, msg="wrong time position, expected: 0.0, found: {0} ".format(tp))
+        
+    
+    def test_sr16_sw2_ch1_set_time_position_1(self):
+
+        self.audio_source.set_time_position(1)
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 1.0, msg="wrong time position, expected: 1.0, found: {0} ".format(tp))
+    
+    def test_sr16_sw2_ch1_set_time_position_end(self):
+
+        self.audio_source.set_time_position(100)
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 1.0, msg="wrong time position, expected: 1.0, found: {0} ".format(tp))
+    
+    def test_sr16_sw2_ch1_rewind(self):
+        
+        self.audio_source.read(10)
+        self.audio_source.rewind()
+        tp = self.audio_source.get_position()
+        self.assertEqual(tp, 0, msg="wrong position, expected: 0.0, found: {0} ".format(tp))
+        
+    def test_sr16_sw2_ch1_set_data(self):
+        
+        self.audio_source.set_data("abcdef")
+        block = self.audio_source.read(9999)
+        self.assertEqual(block, "abcdef", msg="wrong block, expected: 'abcdef', found: {0} ".format(block))
+    
+    def test_sr16_sw2_ch1_set_data_exception(self):
+        
+        with self.assertRaises(Exception):
+            self.assertRaises(ValueError, self.audio_source.set_data("abcde"))
+        
+        
+
+class TestBufferAudioSource_SR11_SW4_CH1(unittest.TestCase):
+    
+    
+    def setUp(self):
+        self.signal = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefgh"
+        self.audio_source = BufferAudioSource(data_buffer=self.signal,
+                                         sampling_rate=11, sample_width=4, channels=1)
+        self.audio_source.open()
+        
+    def tearDown(self):
+        self.audio_source.close()
+    
+
+
+    def test_sr11_sw4_ch1_read_1(self):
+        
+        block = self.audio_source.read(1)
+        self.assertEqual(block, "ABCD", msg="wrong block, expected: 'ABCD', found: {0} ".format(block))
+    
+    
+    def test_sr11_sw4_ch1_read_6(self):
+        
+        block = self.audio_source.read(6)
+        self.assertEqual(block, "ABCDEFGHIJKLMNOPQRSTUVWX", msg="wrong block, expected: 'ABCDEFGHIJKLMNOPQRSTUVWX', found: {0} ".format(block))
+        
+    
+    def test_sr11_sw4_ch1_read_multiple(self):
+        
+        block = self.audio_source.read(1)
+        self.assertEqual(block, "ABCD", msg="wrong block, expected: 'AB', found: {0} ".format(block))
+                         
+        block = self.audio_source.read(6)
+        self.assertEqual(block, "EFGHIJKLMNOPQRSTUVWXYZ01", msg="wrong block, expected: 'EFGHIJKLMNOPQRSTUVWXYZ01', found: {0} ".format(block))
+        
+        block = self.audio_source.read(3)
+        self.assertEqual(block, "23456789abcd", msg="wrong block, expected: '23456789abcd', found: {0} ".format(block))
+        
+        block = self.audio_source.read(9999)
+        self.assertEqual(block, "efgh", msg="wrong block, expected: 'efgh', found: {0} ".format(block))
+        
+    
+    def test_sr11_sw4_ch1_read_all(self):
+        block = self.audio_source.read(9999)
+        self.assertEqual(block, self.signal, msg="wrong block, expected: {0}, found: {1} ".format(self.signal, block))
+        
+        block = self.audio_source.read(1)
+        self.assertEqual(block, None, msg="wrong block, expected: {0}, found: {1} ".format(None, block))
+        
+    
+    def test_sr11_sw4_ch1_get_sampling_rate(self):
+        
+        srate = self.audio_source.get_sampling_rate()
+        self.assertEqual(srate, 11, msg="wrong sampling rate, expected: 10, found: {0} ".format(srate))
+    
+    
+    def test_sr11_sw4_ch1_get_sample_width(self):
+        
+        swidth = self.audio_source.get_sample_width()
+        self.assertEqual(swidth, 4, msg="wrong sample width, expected: 1, found: {0} ".format(swidth))
+        
+    
+    def test_sr11_sw4_ch1_get_channels(self):
+        
+        channels = self.audio_source.get_channels()
+        self.assertEqual(channels, 1, msg="wrong number of channels, expected: 1, found: {0} ".format(channels))
+    
+    
+    def test_sr11_sw4_ch1_get_position_0(self):
+        
+        pos = self.audio_source.get_position()
+        self.assertEqual(pos, 0, msg="wrong position, expected: 0, found: {0} ".format(pos))
+    
+    def test_sr11_sw4_ch1_get_position_5(self):
+        
+        self.audio_source.read(5)
+        pos = self.audio_source.get_position()
+        self.assertEqual(pos, 5, msg="wrong position, expected: 5, found: {0} ".format(pos))
+    
+    def test_sr11_sw4_ch1_get_position_9(self):
+                
+        self.audio_source.read(5)
+        self.audio_source.read(4)
+        
+        pos = self.audio_source.get_position()
+        self.assertEqual(pos, 9, msg="wrong position, expected: 5, found: {0} ".format(pos))
+        
+    
+    def test_sr11_sw4_ch1_set_position_0(self):
+        
+        self.audio_source.read(10)
+        self.audio_source.set_position(0)
+        pos = self.audio_source.get_position()
+        self.assertEqual(pos, 0, msg="wrong position, expected: 0, found: {0} ".format(pos))
+    
+    
+    def test_sr11_sw4_ch1_set_position_10(self):
+        
+        self.audio_source.set_position(10)
+        pos = self.audio_source.get_position()
+        self.assertEqual(pos, 10, msg="wrong position, expected: 10, found: {0} ".format(pos))
+    
+        
+    def test_sr11_sw4_ch1_get_time_position_0(self):
+        
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 0.0, msg="wrong time position, expected: 0.0, found: {0} ".format(tp))
+    
+    def test_sr11_sw4_ch1_get_time_position_1(self):
+        
+        srate = self.audio_source.get_sampling_rate()
+        # read one second
+        self.audio_source.read(srate)
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 1.0, msg="wrong time position, expected: 1.0, found: {0} ".format(tp))
+    
+    
+    def test_sr11_sw4_ch1_get_time_position_0_63(self):
+        
+        # read 2.5 seconds
+        self.audio_source.read(7)
+        tp = self.audio_source.get_time_position()
+        self.assertAlmostEqual(tp, 0.636363636364, msg="wrong time position, expected: 0.636363636364, found: {0} ".format(tp))
+        
+    
+    def test_sr11_sw4_ch1_set_time_position_0(self):
+        
+        self.audio_source.read(10)
+        self.audio_source.set_time_position(0)
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 0.0, msg="wrong time position, expected: 0.0, found: {0} ".format(tp))
+        
+    
+    def test_sr11_sw4_ch1_set_time_position_1(self):
+
+        self.audio_source.set_time_position(1)
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 1.0, msg="wrong time position, expected: 1.0, found: {0} ".format(tp))
+    
+    def test_sr11_sw4_ch1_set_time_position_end(self):
+
+        self.audio_source.set_time_position(100)
+        tp = self.audio_source.get_time_position()
+        self.assertEqual(tp, 1.0, msg="wrong time position, expected: 1.0, found: {0} ".format(tp))
+    
+    def test_sr11_sw4_ch1_rewind(self):
+        
+        self.audio_source.read(10)
+        self.audio_source.rewind()
+        tp = self.audio_source.get_position()
+        self.assertEqual(tp, 0, msg="wrong position, expected: 0.0, found: {0} ".format(tp))
+        
+    def test_sr11_sw4_ch1_set_data(self):
+        
+        self.audio_source.set_data("abcdefgh")
+        block = self.audio_source.read(9999)
+        self.assertEqual(block, "abcdefgh", msg="wrong block, expected: 'abcdef', found: {0} ".format(block))
+    
+    def test_sr11_sw4_ch1_set_data_exception(self):
+        
+        with self.assertRaises(Exception):
+            self.assertRaises(ValueError, self.audio_source.set_data("abcdef"))
+        
+    
+        
+    
+
+
+if __name__ == "__main__":
+    #import sys;sys.argv = ['', 'Test.testName']
+    unittest.main()