# HG changeset patch # User luisf # Date 1378480643 -3600 # Node ID 473d83d0865ca208921fc875e168ef03c177339a # Parent e1b69a7360a54b60218c9a25e16862fe9d0d8ade added more references; started tidying up diff -r e1b69a7360a5 -r 473d83d0865c vamp-plugins_abstract/qmvamp-mirex2013.bib --- a/vamp-plugins_abstract/qmvamp-mirex2013.bib Fri Sep 06 11:32:30 2013 +0100 +++ b/vamp-plugins_abstract/qmvamp-mirex2013.bib Fri Sep 06 16:17:23 2013 +0100 @@ -39,3 +39,35 @@ volume = {16}, year = {2008} } + +@conference{noland2007signal, + title={Signal Processing Parameters for Tonality Estimation}, + author={Noland, Katy and Sandler, Mark}, + booktitle={Audio Engineering Society Convention 122}, + month={May}, + year={2007} +} + + @inproceedings{sonicvisualise2010, + author = {Chris Cannam and Christian Landone and Mark Sandler}, + title = {Sonic Visualiser: An Open Source Application for Viewing, Analysing, and Annotating Music Audio Files}, + booktitle = {Proceedings of the ACM Multimedia 2010 International Conference}, + year = {2010} +} + +@BOOK{krumhansl1990, + AUTHOR = {C. L. Krumhansl}, + TITLE = {Cognitive Foundations of Musical Pitch}, + PUBLISHER = {Oxford University Press}, + YEAR = {1990} +} + +@article {gomez2006, + title = {Tonal description of polyphonic audio for music content processing}, + journal = {INFORMS Journal on Computing, Special Cluster on Computation in Music}, + volume = {18}, + year = {2006}, + author = {Emilia G{\'o}mez} +} + + diff -r e1b69a7360a5 -r 473d83d0865c vamp-plugins_abstract/qmvamp-mirex2013.tex --- a/vamp-plugins_abstract/qmvamp-mirex2013.tex Fri Sep 06 11:32:30 2013 +0100 +++ b/vamp-plugins_abstract/qmvamp-mirex2013.tex Fri Sep 06 16:17:23 2013 +0100 @@ -9,7 +9,7 @@ % Title. % ------ -\title{MIREX 2013 Entry: VAMP Plugins} +\title{MIREX 2013 Entry: QM Vamp Plugins} % Single address % To use with only one author or several with the same address @@ -21,8 +21,8 @@ % Two addresses % -------------- \twoauthors -{Chris Cannam} {Affiliation1 \\ {\tt author1@music-ir.org}} -{Lu\'{i}s A. Figueira} {Affiliation2 \\ {\tt author2@music-ir.org}} +{Chris Cannam} {Queen Mary, University of London \\ {\tt chris.cannam@eecs.qmul.ac.uk}} +{Lu\'{i}s A. Figueira} {Queen Mary, University of London \\ {\tt luis.figueira@eecs.qmul.ac.uk}} % Three addresses % -------------- @@ -36,22 +36,23 @@ \maketitle % \begin{abstract} -This abstract relates to submissions to several different categories. All submissions are of previously published methods. All submissions are in the form of VAMP Plugins. -All plugins are fully open-source, and can be downloaded from the Isophonics\footnote{http://isophonics.net/QMVampPlugins} and SoundSoftware websites\footnote{http://code.soundsoftware.ac.uk/projects/qm-vamp-plugins}. - -For a complete (except binary files) overview of this submission - with detailed README files, scripts, etc. - please see the SoundSoftware site\footnote{http://code.soundsoftware.ac.uk/projects/mirex2013}. - +In this submission we intend to test several Vamp plugins that implement different algorithms for beat tracking, tempo estimation, key detection onset detection and structural segmentation. Most of these plugins are no longer state-of-the-art, and were developed a few years ago. All the methods/algorithms implemented by this set of plugins are described in the literature (and referenced throuout this paper). \end{abstract} % \section{Introduction}\label{sec:introduction} -describe vamp\ldots -describe rationale supporting submission\ldots +The Vamp plugin format\footnote{http://vamp-plugins.org/} was developed at the Centre for Digital Music (C4DM) at Queen Mary, University of London, during 2005-2006 and published as an open specification, alongside the Sonic Visualiser~\cite{sonicvisualise2010} audio analysis application, in response to a desire to publish algorithms developed at the Centre in a form in which they could be immediately useful to people outside this research field. + +In subsequent years the Vamp plugin format has become a moderately popular means of distributing methods from the Centre and other research groups. Some dozens of Vamp plugins are now available from groups such as the MTG at UPF in Barcelona, the SMC at INESC in Porto, the BBC, and others as well as from the Centre for Digital Music. + + These plugins are provided as a single library file, made available in binary form for Windows, OS/X, and Linux from the Centre for Digital Music's download page\footnote{http://vamp-plugins.org/plugin-doc/qm-vamp-plugins.html}. All plugins are fully open-source --- you can find the source code in the SoundSoftware website\footnote{http://code.soundsoftware.ac.uk/projects/qm-vamp-plugins}. + +For a complete overview of this submission - with detailed README files, scripts, etc. - please see the SoundSoftware site\footnote{http://code.soundsoftware.ac.uk/projects/mirex2013}. \section{Audio Beat Tracking and Audio Tempo Estimation} -The Tempo and Beat Tracker\cite{matthew2007a} VAMP plugin analyses a single channel of audio and estimates the positions of metrical beats within the music (the equivalent of a human listener tapping their foot to the beat). +The Tempo and Beat Tracker\cite{matthew2007a} Vamp plugin analyses a single channel of audio and estimates the positions of metrical beats within the music (the equivalent of a human listener tapping their foot to the beat). The Tempo and Beat Tracker Vamp plugin was written by Matthew Davies and Christian Landone. @@ -59,7 +60,16 @@ [Need reference] -The Key Detector VAMP pluginan alyses a single channel of audio and continuously estimates the key of the music by comparing the degree to which a block-by-block chromagram correlates to the stored key profiles for each major and minor key. +The Key Detector VAMP plugin anlyses a single channel of audio and continuously estimates the key of the music by comparing the degree to which a block-by-block chromagram correlates to the stored key profiles for each major and minor key. + + +The correlation method \cite{krumhansl1990, gomez2006} + +to, it's a standard technique these days. Krumhansl did it very early on with symbolic data [C. L. Krumhansl. Cognitive Foundations of Musical Pitch. Oxford University Press, 1990 - see p.~37]. Gómez was one of the first to do it with audio using chromagrams, but not using the same profiles as I did [ + +Chromagrams for profile-based chord estimation (not key estimation, and not using the correlation) came earlier, and are generally attributed to Fujishima [Takuya Fujishima. Realtime chord recognition of musical sound: a system using common lisp music. In Proceedings of the International Computer Music Conference (ICMC), pages 464–467, Beijing, 1999]. + +Maybe refer to both Krumhansl and Gómez - I used Krunhansl's method adapted for audio, in a similar fashion to Gómez, but using different tone profiles that were derived from recordings of Bach Preludes and Fugues. The key profiles are drawn from analysis of Book I of the Well Tempered Klavier by J S Bach, recorded at A=440 equal temperament. @@ -79,12 +89,6 @@ For music with clearly tonally distinguishable sections such as verse, chorus, etc., segments with the same type may be expected to be similar to one another in some structural sense. For example, repetitions of the chorus are likely to share a segment type. -The plugin only attempts to identify similar segments; it does not attempt to label them. For example, it makes no attempt to tell you which segment is the chorus. - -Note that this plugin does a substantial amount of processing after receiving all of the input audio data, before it produces any results. - -\subsection{Method} - The method, described in~\cite{mark2008a}, relies upon structural/timbral similarity to obtain the high-level song structure. This is based on the assumption that the distributions of timbre features are similar over corresponding structural elements of the music. The algorithm works by obtaining a frequency-domain representation of the audio signal using a Constant-Q transform, a Chromagram or Mel-Frequency Cepstral Coefficients (MFCC) as underlying features (the particular feature is selectable as a parameter). The extracted features are normalised in accordance with the MPEG-7 standard (NASE descriptor), which means the spectrum is converted to decibel scale and each spectral vector is normalised by the RMS energy envelope. The value of this envelope is stored for each processing block of audio. This is followed by the extraction of 20 principal components per block using PCA, yielding a sequence of 21 dimensional feature vectors where the last element in each vector corresponds to the energy envelope.