Mercurial > hg > mirex2013

--- a/vamp-plugins_abstract/qmvamp-mirex2013.bib	Fri Sep 06 21:13:34 2013 +0100
+++ b/vamp-plugins_abstract/qmvamp-mirex2013.bib	Fri Sep 06 22:11:44 2013 +0100
@@ -8,6 +8,16 @@
   year = {2007}
 }

+@article{ellis2007,
+ author = {D. P. W. Ellis},
+ title = {Beat Tracking by Dynamic Programming},
+ journal = {Journal of New Music Research},
+ volume = {37},
+ number = {1},
+ pages = {51-60},
+ year = {2007}
+}
+
 @inproceedings{dan2007a,
   author = {Dan Stowell and Mark D. Plumbley},
   title = {Adaptive whitening for improved real-time audio onset detection},
@@ -72,8 +82,6 @@

 @incollection{mauch:md1:2010,
 	Author = {Matthias Mauch and Simon Dixon},
-	Booktitle = {Submissions to MIREX 2010},
-	Publisher = {Graduate School of Library Information Science University of Illinois at Urbana-Champaign},
 	Title = {MIREX 2010: Chord Detection Using a Dynamic Bayesian Network},
 	Year = {2010}
 }
@@ -89,7 +97,6 @@
   author = {Matthias Mauch and Katy C. Noland and Dixon, Simon},
   title = {Using Musical Structure to Enhance Automatic Chord Transcription},
   booktitle = {Proceedings of the 10th International Conference on Music Information Retrieval (ISMIR 2009)},
-  note = {First description of the algorithm. However, the description is not complete. Better description in "Automatic chord transcription from audio using computational models of musical context" (Mauch's PhD Thesis), Chapter 6.},
   pages = {231-236},
   year = {2009}
 }
@@ -97,7 +104,6 @@
  @phdthesis{matthiasphd,
   author = {Matthias Mauch},
   title = {Automatic Chord Transcription from Audio Using Computational Models of Musical Context},
-  note = {Features a more thorough description of the segmentation algorithm in Chapter 6.},
   school = {Queen Mary, University of London},
   year = {2010}
 }
@@ -106,7 +112,20 @@
   author = {Chris Cannam},
   title = {Unit testing: An audio research example},
   howpublished = {Handout},
-  note = {One of the single-page handouts made available at DAFx and ISMIR 2012 tutorials etc. See http://www.soundsoftware.ac.uk/handouts-guides for more information.},
+  note = {One of the single-page handouts made available at DAFx and ISMIR 2012 tutorials. See http://www.soundsoftware.ac.uk/handouts-guides for more information.},
   year = {2012}
 }

+ @inproceedings{simon2006a,
+  author = {Simon Dixon},
+  title = {MIREX 2006 Audio Beat Tracking Evaluation: BeatRoot},
+  year = {2006}
+}
+
+ @inproceedings{simon2001a,
+  author = {Simon Dixon},
+  title = {An Interactive Beat Tracking and Visualisation System},
+  booktitle = {Proceedings of the 2001 International Computer Music Conference (ICMC'2001)},
+  year = {2001}
+}
+
--- a/vamp-plugins_abstract/qmvamp-mirex2013.tex	Fri Sep 06 21:13:34 2013 +0100
+++ b/vamp-plugins_abstract/qmvamp-mirex2013.tex	Fri Sep 06 22:11:44 2013 +0100
@@ -9,7 +9,7 @@

 % Title.
 % ------
-\title{MIREX 2013 Entry: QM Vamp Plugins}
+\title{MIREX 2013 Entry: Vamp Plugins from the Centre for Digital Music}

 % Single address
 % To use with only one author or several with the same address
@@ -19,30 +19,18 @@
 {Katy Noland, Mark Levy, Massimiliano Zanoni, Dan Stowell and Lu\'{i}s A. Figueira}
 {Queen Mary, University of London \\ {\em chris.cannam@eecs.qmul.ac.uk}}

-% Two addresses
-% --------------
-%\twoauthors
-%{Chris Cannam and Matthias Mauch and Katy Noland} {Queen Mary, University of London \\ {\tt chris.cannam@eecs.qmul.ac.uk}}
-%{Lu\'{i}s A. Figueira} {Queen Mary, University of London \\ {\tt luis.figueira@eecs.qmul.ac.uk}}
-
-% Three addresses
-% --------------
-% \threeauthors
-  % {Chris Cannam} {Affiliation1 \\ {\tt author1@music-ir.org}}
-  % {Lu\'{i}s A. Figueira} {Affiliation2 \\ {\tt author2@music-ir.org}}
-  % {Matthias Mauch} {Affiliation3 \\ {\tt author3@music-ir.org}}
-
 \begin{document}
 %
 \maketitle
 %
 \begin{abstract}

-In this submission we intend to test several Vamp plugins for various
-tasks. Most of these plugins are no longer state-of-the-art, and were
-developed a few years ago. All the methods/algorithms implemented on
-this set of plugins are described in the literature (and referenced
-throughout this paper).
+In this submission we offer for evaluation several audio feature
+extraction plugins in Vamp format. Some of these plugins represent
+efficient implementations based on modern work, while others are no
+longer state-of-the-art and were developed a few years ago. The
+methods implemented in this set of plugins are described in the
+literature and are referenced throughout this paper.

 \end{abstract}
 %
@@ -50,79 +38,116 @@

 The Vamp plugin format\footnote{http://vamp-plugins.org/} was
 developed at the Centre for Digital Music (C4DM) at Queen Mary,
-University of London, during 2005-2006 and published as an open
-specification, alongside the Sonic
-Visualiser~\cite{sonicvisualise2010} audio analysis application, in
-response to a desire to publish algorithms developed at the Centre in
-a form in which they could be immediately useful to people outside
-this research field.
+University of London, during 2005-2006 in response to a desire to
+publish work in a form that would be immediately useful to people
+outside this research field. The Vamp plugin format was published with
+an open source SDK, alongside the Sonic
+Visualiser~\cite{sonicvisualise2010} audio analysis application which
+provided a useful host for Vamp plugins.

-In subsequent years the Vamp plugin format has become a moderately
-popular means of distributing methods from the Centre and other
-research groups. Some dozens of Vamp plugins are now available from
-groups such as the MTG at UPF in Barcelona, the SMC at INESC in Porto,
-the BBC, and others as well as from the Centre for Digital Music.
+In subsequent years the Vamp format has become a moderately popular
+means of distributing methods from the Centre and other research
+groups. Some dozens of Vamp plugins are now available from groups such
+as the Music Technology Group at UPF in Barcelona, the Sound and Music
+Computing group at INESC in Porto, the BBC, and others, as well as
+from the Centre for Digital Music.

- These plugins are provided as a single library file, made available
- in binary form for Windows, OS/X, and Linux from the Centre for
- Digital Music's download
- page\footnote{http://vamp-plugins.org/plugin-doc/qm-vamp-plugins.html}. All
- plugins are fully open-source --- you can find the source code in the
- SoundSoftware
- website\footnote{http://code.soundsoftware.ac.uk/projects/qm-vamp-plugins}.
+The plugins submitted for this evaluation are provided as a set of
+library files. Those with names starting ``QM'' are all provided in a
+single library file, the QM Vamp Plugins set, made available in binary
+form for Windows, OS/X, and Linux from the Centre for Digital Music's
+download
+page\footnote{http://vamp-plugins.org/plugin-doc/qm-vamp-plugins.html}. All
+of these plugins are open-source, and source is available through the
+SoundSoftware code
+site\footnote{http://code.soundsoftware.ac.uk/projects/qm-vamp-plugins}. These
+plugins come from a number of authors who are credited in this
+abstract and in the plugins' accompanying documentation.
+
+In addition to the QM Vamp Plugins set, this submission contains a
+number of individual plugins: the Chordino and Segmentino plugins from
+Matthias Mauch; the BeatRoot Vamp Plugin from Simon Dixon; OnsetsDS
+from Dan Stowell; and a Cepstral Pitch Tracker Plugin from Chris
+Cannam.

 (For a complete overview of this submission across all of the tasks
  and plugins it covers, please see the relevant repository at the
  SoundSoftware
- site\footnote{http://code.soundsoftware.ac.uk/projects/mirex2013}.)
+ site.\footnote{http://code.soundsoftware.ac.uk/projects/mirex2013})

-\section{Audio Beat Tracking}
+\section{Submissions by MIREX Task}

-\subsection{Tempo and Beat Tracker Plugin}
+\subsection{Audio Beat Tracking}
+
+\subsubsection{QM Tempo and Beat Tracker}
 \label{tempo_and_beat_tracker}

-The Tempo and Beat Tracker\cite{matthew2007a} Vamp plugin analyses a
-single channel of audio and estimates the positions of metrical beats
-within the music (the equivalent of a human listener tapping their
-foot to the beat).
+The QM Tempo and Beat Tracker\cite{matthew2007a} Vamp plugin analyses
+a single channel of audio and estimates the positions of metrical
+beats within the music.

-The Tempo and Beat Tracker Vamp plugin was written by Matthew Davies
-and Christian Landone.
+This plugin uses the complex-domain onset detection method from~\cite{chris2003a} with a hybrid of the two-state beat tracking model
+proposed in~\cite{matthew2007a} and a dynamic programming method based
+on~\cite{ellis2007}.

-\subsection{BeatRoot Plugin}
+To identify the tempo, the onset detection function is partitioned
+into 6-second frames with a 1.5-second increment. The autocorrelation
+function of each 6-second onset detection function is found and this
+is then passed through a perceptually weighted comb
+filterbank\cite{matthew2007a}. The successive comb filterbank output
+signals are grouped together into a matrix of observations of
+periodicity through time. The best path of periodicity through these
+observations is found using the Viterbi algorithm, where the
+transition matrix is defined as a diagonal Gaussian.

-The BeatRoot Vamp Plugin is an open source Vamp plugin library that
+Given the estimates of periodicity, the beat locations are recovered
+by applying the dynamic programming algorithm\cite{ellis2007}. This
+process involves the calculation of a recursive cumulative score
+function and backtrace signal. The cumulative score indicates the
+likelihood of a beat existing at each sample of the onset detection
+function input, and the backtrace gives the location of the best
+previous beat given this point in time. Once the cumulative score and
+backtrace have been calculated for the whole input signal, the best
+path through beat locations is found by recursively sampling the
+backtrace signal from the end of the input signal back to the
+beginning.
+
+The QM Tempo and Beat Tracker plugin was written by Matthew
+Davies and Christian Landone.
+
+\subsubsection{BeatRoot}
+
+The BeatRoot Vamp plugin is an open source Vamp plugin library that
 implements the BeatRoot beat-tracking method of Simon
-Dixon\cite{!!!!}.
+Dixon\cite{simon2001a}. The BeatRoot algorithm has been submitted to
+MIREX evaluation in earlier years\cite{simon2006a}; this plugin
+consists of the most recent BeatRoot code release, converted from Java
+to C++ and modified for plugin format.

-This plugin library is available online as a free, open source
-download from the Centre for Digital Music at Queen Mary, University
-of London. The BeatRoot algorithm has been submitted to MIREX
-evaluation in earlier years\cite{!!!}; we are preparing and submitting
-this plugin version of the work as part of a programme of evaluation
-of Vamp plugin implementations of published or publicly available
-algorithms being carried out at the Centre for Digital Music.
+The BeatRoot plugin was written by Simon Dixon and Chris Cannam.

-\section{Audio Key Detection}
+\subsection{Audio Key Detection}

-The Key Detector Vamp plugin anlyses a single channel of audio and
-continuously estimates the key of the music by comparing the degree to
-which a block-by-block chromagram correlates to the stored key
-profiles for each major and minor key.
+\subsubsection{QM Key Detector}

-This plugin uses the correlation method described in
-\cite{krumhansl1990} and \cite{gomez2006}, but using different tone
+The QM Key Detector Vamp plugin continuously estimates the key of the
+music by comparing the degree to which a block-by-block chromagram
+correlates to stored key profiles for each major and minor key.
+
+This plugin uses the correlation method described in~\cite{krumhansl1990} and~\cite{gomez2006}, but using different tone
 profiles. The key profiles used in this implementation are drawn from
 analysis of Book I of the Well Tempered Klavier by J S Bach, recorded
-at A=440 equal temperament, as described in \cite{noland2007signal}.
+at A=440 equal temperament, as described in~\cite{noland2007signal}.

-The Key Detector Vamp plugin was written by Katy Noland and Christian Landone.
+The QM Key Detector plugin was written by Katy Noland and
+Christian Landone.

+\subsection{Audio Chord Extraction}

-\section{Audio Chord Extraction}
-\label{chordino}
+\subsubsection{Chordino}
+
 The Chordino plugin was developed following Mauch's 2010 work on chord
-extraction as submitted to MIREX in that
+extraction, submitted to MIREX in that
 year\cite{mauch:md1:2010}. While that submission used a C++ chroma
 implementation with a MATLAB dynamic Bayesian network as a chord
 extraction front-end\cite{matthias2010a}, Chordino is an entirely C++
@@ -131,17 +156,15 @@

 The method for the Chordino plugin has two parts:

-\subsection{NNLS Chroma}
-
-NNLS Chroma analyses a single channel of audio using frame-wise
-spectral input from the Vamp host. The spectrum is transformed to a
-log-frequency spectrum (constant-Q) with three bins per semitone. On
-this representation, two processing steps are performed: tuning, after
-which each centre bin (i.e. bin 2, 5, 8, …) corresponds to a semitone,
-even if the tuning of the piece deviates from 440 Hz standard pitch;
-and running standardisation: subtraction of the running mean, division
-by the running standard deviation. This has a spectral whitening
-effect.
+{\bf NNLS Chroma} --- NNLS Chroma analyses a single channel of audio
+using frame-wise spectral input from the Vamp host. The spectrum is
+transformed to a log-frequency spectrum (constant-Q) with three bins
+per semitone. On this representation, two processing steps are
+performed: tuning, after which each centre bin (i.e. bin 2, 5, 8, …)
+corresponds to a semitone, even if the tuning of the piece deviates
+from 440 Hz standard pitch; and running standardisation: subtraction
+of the running mean, division by the running standard deviation. This
+has a spectral whitening effect.

 The processed log-frequency spectrum is then used as an input for NNLS
 approximate transcription using a dictionary of harmonic notes with
@@ -150,119 +173,108 @@
 semitone spectrum is multiplied (element-wise) with the desired
 profile (chroma or bass chroma) and then mapped to 12 bins.

-\subsection{Chord transcription}
+{\bf Chord transcription} --- A fixed dictionary of chord profiles is
+used to calculate frame-wise chord similarities. A standard
+HMM/Viterbi approach is used to smooth these to provide a chord
+transcription.

-A fixed dictionary of chord profiles is used to calculate frame-wise
-chord similarities. A standard HMM/Viterbi approach is used to smooth
-these to provide a chord transcription.
+Chordino was written by Matthias Mauch.

-\section{Audio Melody Extraction}
+\subsection{Audio Melody Extraction}

-The Cepstral Pitch Tracker Vamp Plugin is a freely-available, open
-source Vamp plugin implementation of a monophonic pitch tracking and
-note segmentation method.
+\subsubsection{Cepstral Pitch Tracker}
+
+The Cepstral Pitch Tracker Vamp Plugin is an open source Vamp plugin
+implementation of a monophonic pitch tracking and note segmentation
+method.

 The method is that described in the one-page handout ``Unit Testing:
 An audio research example'' accompanying our tutorial at DAFx
-2012\cite{chris2012a}. It is an agent system consisting of five
-components:
+2012\cite{chris2012a}. It is an agent system, in which agents are used
+to evaluate successive pitch peaks to test whether they can be
+combined to form a plausible note. The peaks are obtained from an
+interpolating peak finder applied to the cepstral transform (inverse
+FFT of the log magnitude spectrum) of the short-time Fourier transform
+of each input frame. An agent-management system supplies the pitches
+to agents, creates a new agent when a novel pitch is found, reaps any
+agents that expire without finding a plausible note, and accumulates
+the resulting valid notes.

-\begin{enumerate}
-\item Short-time Fourier transform;
-\item Transform to cepstral domain, as the inverse FFT of the log
-  magnitude spectrum;
-\item Peak finder and interpolator;
-\item Agent that takes a series of pitch peaks and tests to see if they form a plausible note;
-\item Agent-management system that supplies the pitches to agents;
-  creates a new agent when a novel pitch is found and reaps any agents
-  that expire without finding a plausible note; accumulates a list of
-  valid notes; and discards failures.
-\end{enumerate}
+The Cepstral Pitch Tracker plugin was written by Chris Cannam.

-\section{Audio Onset Detection}
+\subsection{Audio Onset Detection}

-\subsection{Note Onset Detector Plugin}
+\subsubsection{QM Note Onset Detector}

-The Note Onset Detector Vamp plugin analyses a single channel of audio
-and estimates the onset times of notes within the music -- that is,
-the times at which notes and other audible events begin.
+The QM Note Onset Detector Vamp plugin estimates the onset times of
+notes within the music. It calculates an onset likelihood function for
+each spectral frame, and picks peaks in a smoothed version of this
+function.

-It calculates an onset likelihood function for each spectral frame,
-and picks peaks in a smoothed version of this function. The plugin is
-non-causal, returning all results at the end of processing.
+Several onset detection functions are available in this plugin; this
+submission uses the complex-domain method described
+in~\cite{chris2003a}.

-Please read refer to the following publication for the basic detection
-methods~\cite{chris2003a}. The Adaptative Whitening technique is
-described in~\cite{dan2007a}. The Percussion Onset detector is
-described in~\cite{dan2005a}.
+The QM Note Onset Detector plugin was written by Chris Duxbury, Juan
+Pablo Bello and Christian Landone.

-\subsection{OnsetDS Plugin}
+\subsubsection{OnsetsDS}

-OnsetDS is an onset detector that uses Dan Stowell's OnsetsDS
+OnsetsDS is an onset detector plugin wrapping Dan Stowell's OnsetsDS
 library\footnote{http://onsetsds.sourceforge.net/}, described
 in~\cite{dan2007a}.

-The purpose of OnsetsDS is to provide capabilities for FFT-based onset
-detection that works very efficiently in real-time, and can detect
-onsets pretty well in a broad variety of musical signals, with a fast
-reaction time.
+OnsetsDS was designed to provide an FFT-based onset detection that
+works very efficiently in real-time, with a fast reaction time. It is
+not tailored for non-real-time use or for any particular type of
+signal.

-It is not specialised for any particular type of signal. Nor is it
-particularly tailored towards non-real-time use (if we were working in
-non-real-time there are extra things we could do to improve the
-precision). Its efficiency and fast reaction are designed with general
-real-time musical applications in mind.
+The OnsetsDS plugin was written by Dan Stowell and Chris Cannam.

-\section{Audio Structural Segmentation}
+\subsection{Audio Structural Segmentation}

-\subsection{QM Segmenter Plugin}
+\subsubsection{QM Segmenter}

-The Segmenter Vamp plugin divides a single channel of music up into
-structurally consistent segments. It returns a numeric value (the
-segment type) for each moment at which a new segment starts.
+The QM Segmenter Vamp plugin divides a single channel of music up into
+structurally consistent segments.

-For music with clearly tonally distinguishable sections such as verse,
-chorus, etc., segments with the same type may be expected to be
-similar to one another in some structural sense. For example,
-repetitions of the chorus are likely to share a segment type.
+The method, described in~\cite{mark2008a}, relies upon timbral or
+pitch similarity to obtain the high-level song structure. This is
+based on the assumption that the distributions of timbre features are
+similar over corresponding structural elements of the music.

-The method, described in~\cite{mark2008a}, relies upon
-structural/timbral similarity to obtain the high-level song
-structure. This is based on the assumption that the distributions of
-timbre features are similar over corresponding structural elements of
-the music.
-
-The algorithm works by obtaining a frequency-domain representation of
-the audio signal using a Constant-Q transform, a Chromagram or
-Mel-Frequency Cepstral Coefficients (MFCC) as underlying features (the
-particular feature is selectable as a parameter). The extracted
-features are normalised in accordance with the MPEG-7 standard (NASE
-descriptor), which means the spectrum is converted to decibel scale
-and each spectral vector is normalised by the RMS energy envelope. The
-value of this envelope is stored for each processing block of
-audio. This is followed by the extraction of 20 principal components
-per block using PCA, yielding a sequence of 21 dimensional feature
-vectors where the last element in each vector corresponds to the
-energy envelope.
+The input feature is a frequency-domain representation of the audio
+signal, in this case using a Constant-Q transform for the underlying
+features (though the plugin supports other timbral and pitch
+features). The extracted features are normalised in accordance with
+the MPEG-7 standard (NASE descriptor), and the value of this envelope
+is stored for each processing block of audio. This is followed by the
+extraction of 20 principal components per block using PCA, yielding a
+sequence of 21 dimensional feature vectors where the last element in
+each vector corresponds to the energy envelope.

 A 40-state Hidden Markov Model is then trained on the whole sequence
-of features, with each state of the HMM corresponding to a specific
-timbre type. This process partitions the timbre-space of a given track
-into 40 possible types. The important assumption of the model is that
-the distribution of these features remain consistent over a structural
-segment. After training and decoding the HMM, the song is assigned a
-sequence of timbre-features according to specific timbre-type
-distributions for each possible structural segment.
+of features, with each state corresponding to a specific timbre
+type. This partitions the timbre-space of a given track into 40
+possible types. After training and decoding the HMM, the song is
+assigned a sequence of timbre-features according to specific
+timbre-type distributions for each possible structural segment.

 The segmentation itself is computed by clustering timbre-type
 histograms. A series of histograms are created over a sliding window
 which are grouped into M clusters by an adapted soft k-means
-algorithm. Each of these clusters will correspond to a specific
-segment-type of the analyzed song. Reference histograms, iteratively
-updated during clustering, describe the timbre distribution for each
-segment. The segmentation arises from the final cluster assignments.
+algorithm.  Reference histograms, iteratively updated during
+clustering, describe the timbre distribution for each segment. The
+segmentation arises from the final cluster assignments.

-\subsection{Segmentino}
+The QM Segmenter plugin was written by Mark Levy.
+
+\subsubsection{Segmentino}
+
+The Segmentino plugin is a new C++ implementation of a segmentation
+method first described in Matthias Mauch's paper on using musical
+structure to enhance chord transcription\cite{matthias2009a} and
+expanded on in Mauch's PhD thesis\cite{matthiasphd}.

 A beat-quantised chroma representation is used to calculate pair-wise
 similarities between beats (really: beat ``shingles'', i.e.\ multi-beat
@@ -283,10 +295,13 @@
 certain length; corresponding segments have the same length in
 beats.

-\section{Audio Tempo Estimation}
+The Segmentino plugin was written by Matthias Mauch and Massimiliano
+Zanoni. It is currently in preparation for a public release.

-For this task we used the same plugin as describer in
-Sec.~\ref{tempo_and_beat_tracker}.
+\subsection{Audio Tempo Estimation}
+
+For this task we submit the same plugin as that used in the Audio Beat
+Tracking task in section~\ref{tempo_and_beat_tracker}.

 \bibliography{qmvamp-mirex2013}