changeset 33:929391dcf778 abstract

Merge from default branch
author Chris Cannam
date Fri, 06 Sep 2013 16:39:23 +0100
parents 102bfb16f17f (diff) 46151fd36a0a (current diff)
children b2d9512e2e51
files vamp-plugins_abstract/qmvamp-mirex2013.bib
diffstat 3 files changed, 98 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/vamp-plugins_abstract/qmvamp-mirex2013.bib	Fri Sep 06 16:14:30 2013 +0100
+++ b/vamp-plugins_abstract/qmvamp-mirex2013.bib	Fri Sep 06 16:39:23 2013 +0100
@@ -8,6 +8,69 @@
   year = {2007}
 }
 
+@inproceedings{dan2007a,
+  author = {Dan Stowell and Mark D. Plumbley},
+  title = {Adaptive whitening for improved real-time audio onset detection},
+  booktitle = {Proceedings of the International Computer Music Conference (ICMC'07)},
+  year = {2007}
+}
+
+@inproceedings{chris2003a,
+  author = {Chris Duxbury and Juan Pablo Bello and Mike Davies and Mark Sandler},
+  title = {Complex Domain Onset Detection for Musical Signals},
+  booktitle = {Proceedings of the 6th Int. Conference on Digital Audio Effects (DAFx-03) },
+  year = {2003}
+}
+
+@inproceedings{dan2005a,
+  author = {Dan Barry and Derry Fitzgerald and Eugene Coyle and Bob Lawlor},
+  title = {Drum Source Separation using Percussive Feature Detection and Spectral Modulation},
+  booktitle = {ISSC 2005},
+  year = {2005}
+}
+
+ @article{mark2008a,
+  author = {Mark Levy and Mark Sandler},
+  title = {Structural Segmentation of Musical Audio by Constrained Clustering},
+  journal = {IEEE Transactions on Audio, Speech, and Language Processing},
+  month = {February},
+  number = {2},
+  pages = {318-326},
+  volume = {16},
+  year = {2008}
+}
+
+@conference{noland2007signal,
+        title={Signal Processing Parameters for Tonality Estimation},
+        author={Noland, Katy and Sandler, Mark},
+        booktitle={Audio Engineering Society Convention 122},
+        month={May},
+        year={2007}
+}
+
+ @inproceedings{sonicvisualise2010,
+  author = {Chris Cannam and Christian Landone and Mark Sandler},
+  title = {Sonic Visualiser: An Open Source Application for Viewing, Analysing, and Annotating Music Audio Files},
+  booktitle = {Proceedings of the ACM Multimedia 2010 International Conference},
+  year = {2010}
+}
+
+@BOOK{krumhansl1990,
+    AUTHOR    = {C. L. Krumhansl},
+    TITLE     = {Cognitive Foundations of Musical Pitch},
+    PUBLISHER = {Oxford University Press},
+    YEAR      = {1990}
+}
+
+@article {gomez2006,
+    title = {Tonal description of polyphonic audio for music content processing},
+    journal = {INFORMS Journal on Computing, Special Cluster on Computation in Music},
+    volume = {18},
+    year = {2006},
+    author = {Emilia G{\'o}mez}
+}
+
+
 @incollection{mauch:md1:2010,
 	Author = {Matthias Mauch and Simon Dixon},
 	Booktitle = {Submissions to MIREX 2010},
@@ -15,3 +78,4 @@
 	Title = {MIREX 2010: Chord Detection Using a Dynamic Bayesian Network},
 	Year = {2010}}
 
+
Binary file vamp-plugins_abstract/qmvamp-mirex2013.pdf has changed
--- a/vamp-plugins_abstract/qmvamp-mirex2013.tex	Fri Sep 06 16:14:30 2013 +0100
+++ b/vamp-plugins_abstract/qmvamp-mirex2013.tex	Fri Sep 06 16:39:23 2013 +0100
@@ -9,7 +9,7 @@
 
 % Title.
 % ------
-\title{MIREX 2013 Entry: VAMP Plugins}
+\title{MIREX 2013 Entry: QM Vamp Plugins}
 
 % Single address
 % To use with only one author or several with the same address
@@ -21,8 +21,8 @@
 % Two addresses
 % --------------
 \twoauthors
-{Chris Cannam} {Affiliation1 \\ {\tt author1@music-ir.org}}
-{Lu\'{i}s A. Figueira} {Affiliation2 \\ {\tt author2@music-ir.org}}
+{Chris Cannam} {Queen Mary, University of London \\ {\tt chris.cannam@eecs.qmul.ac.uk}}
+{Lu\'{i}s A. Figueira} {Queen Mary, University of London \\ {\tt luis.figueira@eecs.qmul.ac.uk}}
 
 % Three addresses
 % --------------
@@ -36,45 +36,54 @@
 \maketitle
 %
 \begin{abstract}
-This abstract relates to submissions to several different categories. All submissions are of previously published methods. All submissions are in the form of VAMP Plugins.
 
-All plugins are fully open-source, and can be downloaded from the Isophonics\footnote{http://isophonics.net/QMVampPlugins} and SoundSoftware websites\footnote{http://code.soundsoftware.ac.uk/projects/qm-vamp-plugins}.
-
-For a complete (except binary files) overview of this submission - with detailed README files, scripts, etc. - please see the SoundSoftware site\footnote{http://code.soundsoftware.ac.uk/projects/mirex2013}.
-
+In this submission we intend to test several Vamp plugins that implement different algorithms for beat tracking, tempo estimation, key detection onset detection and structural segmentation. Most of these plugins are no longer state-of-the-art, and were developed a few years ago. All the methods/algorithms implemented by this set of plugins are described in the literature (and referenced throuout this paper).
 \end{abstract}
 %
 \section{Introduction}\label{sec:introduction}
 
-describe vamp\ldots
-describe rationale supporting submission\ldots
+The Vamp plugin format\footnote{http://vamp-plugins.org/} was developed at the Centre for Digital Music (C4DM) at Queen Mary, University of London, during 2005-2006 and published as an open specification, alongside the Sonic Visualiser~\cite{sonicvisualise2010} audio analysis application, in response to a desire to publish algorithms developed at the Centre in a form in which they could be immediately useful to people outside this research field.
 
-\section{Audio Beat Tracking}
+In subsequent years the Vamp plugin format has become a moderately popular means of distributing methods from the Centre and other research groups. Some dozens of Vamp plugins are now available from groups such as the MTG at UPF in Barcelona, the SMC at INESC in Porto, the BBC, and others as well as from the Centre for Digital Music.
 
-\subsection{Tempo and Beat Tracker}
+ These plugins are provided as a single library file, made available in binary form for Windows, OS/X, and Linux from the Centre for Digital Music's download page\footnote{http://vamp-plugins.org/plugin-doc/qm-vamp-plugins.html}. All plugins are fully open-source --- you can find the source code in the SoundSoftware website\footnote{http://code.soundsoftware.ac.uk/projects/qm-vamp-plugins}.
 
-The Tempo and Beat Tracker\cite{matthew2007a} VAMP plugin analyses a single channel of audio and estimates the positions of metrical beats within the music (the equivalent of a human listener tapping their foot to the beat).
+For a complete overview of this submission - with detailed README files, scripts, etc. - please see the SoundSoftware site\footnote{http://code.soundsoftware.ac.uk/projects/mirex2013}.
+
+\section{Audio Beat Tracking and Audio Tempo Estimation}
+
+The Tempo and Beat Tracker\cite{matthew2007a} Vamp plugin analyses a single channel of audio and estimates the positions of metrical beats within the music (the equivalent of a human listener tapping their foot to the beat).
 
 The Tempo and Beat Tracker Vamp plugin was written by Matthew Davies and Christian Landone.
 
-\section{Audio Chord Estimation}
+\section{Audio Key Detection}
+The Key Detector VAMP plugin anlyses a single channel of audio and continuously estimates the key of the music by comparing the degree to which a block-by-block chromagram correlates to the stored key profiles for each major and minor key.
 
-\section{Audio Key Detection}
-
-\subsection{Key Detector}
-
-[Need reference]
-
-The Key Detector VAMP pluginanalyses a single channel of audio and continuously estimates the key of the music by comparing the degree to which a block-by-block chromagram correlates to the stored key profiles for each major and minor key.
-
-The key profiles are drawn from analysis of Book I of the Well Tempered Klavier by J S Bach, recorded at A=440 equal temperament.
+This plugin uses the correlation method described in \cite{krumhansl1990} and \cite{gomez2006}, but using different tone profiles. The key profiles used in this implementation are drawn from analysis of Book I of the Well Tempered Klavier by J S Bach, recorded at A=440 equal temperament, as described in \cite{noland2007signal}.
 
 The Key Detector Vamp plugin was written by Katy Noland and Christian Landone.
 
-\section{Audio Melody Extraction}
 \section{Audio Onset Detection}
+
+The Note Onset Detector VAMP plugin analyses a single channel of audio and estimates the onset times of notes within the music -- that is, the times at which notes and other audible events begin.
+
+It calculates an onset likelihood function for each spectral frame, and picks peaks in a smoothed version of this function. The plugin is non-causal, returning all results at the end of processing.
+
+Please read refer to the following publication for the basic detection methods~\cite{chris2003a}. The Adaptative Whitening technique is described in~\cite{dan2007a}. The Percussion Onset detector is described in~\cite{dan2005a}.
+
 \section{Audio Structural Segmentation}
-\section{Audio Tempo Estimation}
+
+The Segmenter VAMP plugin divides a single channel of music up into structurally consistent segments. It returns a numeric value (the segment type) for each moment at which a new segment starts.
+
+For music with clearly tonally distinguishable sections such as verse, chorus, etc., segments with the same type may be expected to be similar to one another in some structural sense. For example, repetitions of the chorus are likely to share a segment type.
+
+The method, described in~\cite{mark2008a}, relies upon structural/timbral similarity to obtain the high-level song structure. This is based on the assumption that the distributions of timbre features are similar over corresponding structural elements of the music.
+
+The algorithm works by obtaining a frequency-domain representation of the audio signal using a Constant-Q transform, a Chromagram or Mel-Frequency Cepstral Coefficients (MFCC) as underlying features (the particular feature is selectable as a parameter). The extracted features are normalised in accordance with the MPEG-7 standard (NASE descriptor), which means the spectrum is converted to decibel scale and each spectral vector is normalised by the RMS energy envelope. The value of this envelope is stored for each processing block of audio. This is followed by the extraction of 20 principal components per block using PCA, yielding a sequence of 21 dimensional feature vectors where the last element in each vector corresponds to the energy envelope.
+
+A 40-state Hidden Markov Model is then trained on the whole sequence of features, with each state of the HMM corresponding to a specific timbre type. This process partitions the timbre-space of a given track into 40 possible types. The important assumption of the model is that the distribution of these features remain consistent over a structural segment. After training and decoding the HMM, the song is assigned a sequence of timbre-features according to specific timbre-type distributions for each possible structural segment.
+
+The segmentation itself is computed by clustering timbre-type histograms. A series of histograms are created over a sliding window which are grouped into M clusters by an adapted soft k-means algorithm. Each of these clusters will correspond to a specific segment-type of the analyzed song. Reference histograms, iteratively updated during clustering, describe the timbre distribution for each segment. The segmentation arises from the final cluster assignments.
 
 \bibliography{qmvamp-mirex2013}