Mercurial > hg > vamp-plugin-sdk

--- a/examples/FixedTempoEstimator.cpp	Wed Nov 12 10:39:05 2008 +0000
+++ b/examples/FixedTempoEstimator.cpp	Wed Nov 12 14:11:01 2008 +0000
@@ -47,6 +47,7 @@


 class FixedTempoEstimator::D
+// this class just avoids us having to declare any data members in the header
 {
 public:
     D(float inputSampleRate);
@@ -314,16 +315,22 @@
     m_lasttime = ts;

     if (m_n == m_dfsize) {
+        // If we have seen enough input, do the estimation and return
         calculate();
         fs = assembleFeatures();
         ++m_n;
         return fs;
     }

+    // If we have seen more than enough, just discard and return!
     if (m_n > m_dfsize) return FeatureSet();

     float value = 0.f;

+    // m_df will contain an onset detection function based on the rise
+    // in overall power from one spectral frame to the next --
+    // simplistic but reasonably effective for our purposes.
+
     for (size_t i = 1; i < m_blockSize/2; ++i) {

         float real = inputBuffers[0][i*2];
@@ -378,18 +385,25 @@
         return;
     }

-    int n = m_n;
+    // This function takes m_df (the detection function array filled
+    // out in process()) and calculates m_r (the raw autocorrelation)
+    // and m_fr (the filtered autocorrelation from whose peaks tempo
+    // estimates will be taken).

-    m_r = new float[n/2];
-    m_fr = new float[n/2];
-    m_t = new float[n/2];
+    int n = m_n; // length of actual df array (m_dfsize is the theoretical max)
+
+    m_r  = new float[n/2]; // raw autocorrelation
+    m_fr = new float[n/2]; // filtered autocorrelation
+    m_t  = new float[n/2]; // averaged tempo estimate for each lag value

     for (int i = 0; i < n/2; ++i) {
-        m_r[i] = 0.f;
+        m_r[i]  = 0.f;
         m_fr[i] = 0.f;
-        m_t[i] = lag2tempo(i);
+        m_t[i]  = lag2tempo(i);
     }

+    // Calculate the raw autocorrelation of the detection function
+
     for (int i = 0; i < n/2; ++i) {

         for (int j = i; j < n-1; ++j) {
@@ -399,20 +413,20 @@
         m_r[i] /= n - i - 1;
     }

+    // Filter the autocorrelation and average out the tempo estimates
+
     float related[] = { 0.5, 2, 4, 8 };

     for (int i = 1; i < n/2-1; ++i) {

-        float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005;
-        if (weight < 0.f) weight = 0.f;
-        weight = weight * weight * weight;
-
         m_fr[i] = m_r[i];

         int div = 1;

         for (int j = 0; j < int(sizeof(related)/sizeof(related[0])); ++j) {

+            // Check for an obvious peak at each metrically related lag
+
             int k0 = int(i * related[j] + 0.5);

             if (k0 >= 0 && k0 < int(n/2)) {
@@ -431,11 +445,18 @@
                     have = true;
                 }

+                // Boost the original lag according to the strongest
+                // value found close to this related lag
+
                 m_fr[i] += m_r[kmax] / 5;

                 if ((kmax == 0 || m_r[kmax] > m_r[kmax-1]) &&
                     (kmax == n/2-1 || m_r[kmax] > m_r[kmax+1]) &&
                     kvmax > kvmin * 1.05) {
+
+                    // The strongest value close to the related lag is
+                    // also a pretty good looking peak, so use it to
+                    // improve our tempo estimate for the original lag

                     m_t[i] = m_t[i] + lag2tempo(kmax) * related[j];
                     ++div;
@@ -445,6 +466,13 @@

         m_t[i] /= div;

+        // Finally apply a primitive perceptual weighting (to prefer
+        // tempi of around 120-130)
+
+        float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005;
+        if (weight < 0.f) weight = 0.f;
+        weight = weight * weight * weight;
+
         m_fr[i] += m_fr[i] * (weight / 3);
     }
 }
@@ -453,7 +481,7 @@
 FixedTempoEstimator::D::assembleFeatures()
 {
     FeatureSet fs;
-    if (!m_r) return fs; // No results
+    if (!m_r) return fs; // No autocorrelation: no results

     Feature feature;
     feature.hasTimestamp = true;
@@ -467,6 +495,9 @@
     int n = m_n;

     for (int i = 0; i < n; ++i) {
+
+        // Return the detection function in the DF output
+
         feature.timestamp = m_start +
             RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
         feature.values[0] = m_df[i];
@@ -475,6 +506,10 @@
     }

     for (int i = 1; i < n/2; ++i) {
+
+        // Return the raw autocorrelation in the ACF output, each
+        // value labelled according to its corresponding tempo
+
         feature.timestamp = m_start +
             RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
         feature.values[0] = m_r[i];
@@ -496,9 +531,16 @@

         if (m_fr[i] > m_fr[i-1] &&
             m_fr[i] > m_fr[i+1]) {
+
+            // This is a peak in the filtered autocorrelation: stick
+            // it into the map from filtered autocorrelation to lag
+            // index -- this sorts our peaks by filtered acf value
+
             candidates[m_fr[i]] = i;
         }

+        // Also return the filtered autocorrelation in its own output
+
         feature.timestamp = m_start +
             RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
         feature.values[0] = m_fr[i];
@@ -519,15 +561,25 @@
     feature.hasDuration = true;
     feature.duration = m_lasttime - m_start;

+    // The map contains only peaks and is sorted by filtered acf
+    // value, so the final element in it is our "best" tempo guess
+
     std::map<float, int>::const_iterator ci = candidates.end();
     --ci;
     int maxpi = ci->second;

     if (m_t[maxpi] > 0) {
-        cerr << "*** Using adjusted tempo " << m_t[maxpi] << " instead of lag tempo " << lag2tempo(maxpi) << endl;
+
+        // This lag has an adjusted tempo from the averaging process:
+        // use it
+
         feature.values[0] = m_t[maxpi];
+
     } else {
-        // shouldn't happen -- it would imply that this high value was not a peak!
+
+        // shouldn't happen -- it would imply that this high value was
+        // not a peak!
+
         feature.values[0] = lag2tempo(maxpi);
         cerr << "WARNING: No stored tempo for index " << maxpi << endl;
     }
@@ -535,12 +587,17 @@
     sprintf(buffer, "%.1f bpm", feature.values[0]);
     feature.label = buffer;

+    // Return the best tempo in the main output
+
     fs[TempoOutput].push_back(feature);

+    // And return the other estimates (up to the arbitrarily chosen
+    // number of 10 of them) in the candidates output
+
     feature.values.clear();
     feature.label = "";

-    while (feature.values.size() < 8) {
+    while (feature.values.size() < 10) {
         if (m_t[ci->second] > 0) {
             feature.values.push_back(m_t[ci->second]);
         } else {
--- a/examples/PowerSpectrum.cpp	Wed Nov 12 10:39:05 2008 +0000
+++ b/examples/PowerSpectrum.cpp	Wed Nov 12 14:11:01 2008 +0000
@@ -115,7 +115,15 @@
     d.description = "Power values of the frequency spectrum bins calculated from the input signal";
     d.unit = "";
     d.hasFixedBinCount = true;
-    d.binCount = m_blockSize / 2 + 1;
+    if (m_blockSize == 0) {
+        // Just so as not to return "1".  This is the bin count that
+        // would result from a block size of 1024, which is a likely
+        // default -- but the host should always set the block size
+        // before querying the bin count for certain.
+        d.binCount = 513;
+    } else {
+        d.binCount = m_blockSize / 2 + 1;
+    }
     d.hasKnownExtents = false;
     d.isQuantized = false;
     d.sampleType = OutputDescriptor::OneSamplePerStep;
--- a/examples/SpectralCentroid.cpp	Wed Nov 12 10:39:05 2008 +0000
+++ b/examples/SpectralCentroid.cpp	Wed Nov 12 14:11:01 2008 +0000
@@ -137,8 +137,6 @@
     return list;
 }

-//static int scount = 0;
-
 SpectralCentroid::FeatureSet
 SpectralCentroid::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
 {
@@ -149,8 +147,6 @@
 	return FeatureSet();
     }

-//    std::cerr << "SpectralCentroid::process: count = " << scount++ << ", timestamp = " << timestamp << ", total power = ";
-
     double numLin = 0.0, numLog = 0.0, denom = 0.0;

     for (size_t i = 1; i <= m_blockSize/2; ++i) {
@@ -163,8 +159,6 @@
 	denom += scalemag;
     }

-//    std::cerr << denom << std::endl;
-
     FeatureSet returnFeatures;

     if (denom != 0.0) {
--- a/examples/ZeroCrossing.cpp	Wed Nov 12 10:39:05 2008 +0000
+++ b/examples/ZeroCrossing.cpp	Wed Nov 12 14:11:01 2008 +0000
@@ -138,8 +138,6 @@
     return list;
 }

-//static int scount = 0;
-
 ZeroCrossing::FeatureSet
 ZeroCrossing::process(const float *const *inputBuffers,
                       Vamp::RealTime timestamp)
@@ -151,15 +149,11 @@
 	return FeatureSet();
     }

-//    std::cerr << "ZeroCrossing::process: count = " << scount++ << ", timestamp = " << timestamp << ", rms = ";
-
     float prev = m_previousSample;
     size_t count = 0;

     FeatureSet returnFeatures;

-//    double acc = 0.0;
-
     for (size_t i = 0; i < m_stepSize; ++i) {

 	float sample = inputBuffers[0][i];
@@ -180,14 +174,9 @@
 	    returnFeatures[1].push_back(feature);
 	}

-//        acc += sample * sample;
-
 	prev = sample;
     }

-//    acc /= m_stepSize;
-//    std::cerr << sqrt(acc) << std::endl;
-
     m_previousSample = prev;

     Feature feature;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/examples/vamp-example-plugins.txt	Wed Nov 12 14:11:01 2008 +0000
@@ -0,0 +1,283 @@
+
+Vamp Example Plugins
+====================
+
+The vamp-example-plugins library contains a number of Vamp audio
+analysis plugins provided as part of the Vamp plugin SDK.
+
+These are simple, but sometimes useful, plugins whose source code you
+are free to study and reuse in any proprietary or non-proprietary
+plugins of your own without any licensing obligation.
+
+User documentation for the individual plugins in this library follows.
+
+
+Amplitude Follower
+==================
+
+System identifier: vamp-example-plugins:amplitudefollower
+RDF URI: http://vamp-plugins.org/rdf/plugins/vamp-example-plugins#amplitudefollower
+
+Amplitude Follower tracks and returns the amplitude of the audio
+signal, block by block.  It uses a method from the SuperCollider audio
+processing language, implemented as a Vamp plugin by Dan Stowell.
+
+Parameters
+----------
+
+Attack time (seconds)
+Release time (seconds)
+
+Outputs
+-------
+
+Amplitude
+~~~~~~~~~
+
+The estimated peak amplitude (in volts) for the current processing block.
+
+
+Simple Fixed Tempo Estimator
+============================
+
+System identifier: vamp-example-plugins:fixedtempo
+RDF URI: http://vamp-plugins.org/rdf/plugins/vamp-example-plugins#fixedtempo
+
+Simple Fixed Tempo Estimator analyses a fragment of audio and
+estimates its tempo.  It assumes that its input is of fixed tempo, and
+it analyses only the first (small but configurable number of) seconds
+before returning a result, discarding all subsequent input.
+
+The plugin calculates an overall energy rise function across a series
+of short frequency-domain input frames, takes the autocorrelation of
+this function, filters it to stress possible metrical patterns,
+locates peaks, and converts from autocorrelation lag to the
+corresponding tempo.
+
+The filtering process involves searching for peaks at simple
+metrically related intervals (at a given autocorrelation lag as well
+as at 0.5, 2, and 4 times that lag), boosting each peak that shows
+strong related peaks.  A simplistic perceptual curve is also applied
+in order to increase the probability of detecting a "likely" tempo.
+For improved tempo precision, each tempo with strong related peaks is
+averaged with the tempi calculated from those peaks.
+
+The method is mainly tuned for 4/4 pop and dance rhythms.
+
+This plugin returns many of its intermediate calculations as
+additional outputs, as well as the most favoured tempo.  Although as a
+tempo estimator it's still fairly primitive, it is intended to provide
+a useful example of a slightly more complex feature extraction plugin
+than the other examples, as well as one that returns several different
+types of output at a time.
+
+Parameters
+----------
+
+Minimum estimated tempo, Maximum estimated tempo (bpm) - These
+parameters control the range of values within which the tempo
+estimator will return its estimate.
+
+Input duration to study (seconds) - The tempo estimator uses only the
+first part of its input, discarding any that follows.  This parameter
+controls how much input it will use.  There is no value in increasing
+this beyond 8x the duration of the slowest returned beat.  The default
+of 10 seconds is likely to be appropriate for most purposes.
+
+Outputs
+-------
+
+Tempo
+~~~~~
+
+The tempo estimator's best guess at the tempo of its input, in beats
+per minute.
+
+This is returned as a feature whose timestamp and duration cover the
+range of the input which was used in estimating the tempo, with a
+single value containing the tempo.
+
+Tempo candidates
+~~~~~~~~~~~~~~~~
+
+Several guesses at the possible tempo.  This output is returned as a
+single feature whose timestamp and duration cover the range of the
+input which was used in estimating the tempo, with up to 10 bins
+containing one tempo value in each bin, with the "best guess" tempo in
+bin 0.
+
+Detection function
+~~~~~~~~~~~~~~~~~~
+
+The basic onset detection function used in tempo estimation.
+
+Autocorrelation function
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+The autocorrelation of the onset detection function.
+
+Filtered Autocorrelation
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+The autocorrelation after filtering to boost values with possible
+metrically related peaks and to apply perceptual weighting.  The peak
+value of this function is the one that will be used as the "best
+guess".
+
+
+Simple Percussion Onset Detector
+================================
+
+System identifier: vamp-example-plugins:percussiononsets
+RDF URI: http://vamp-plugins.org/rdf/plugins/vamp-example-plugins#percussiononsets
+
+Simple Percussion Onset Detector estimates the locations of percussive
+onsets in the audio signal.  It uses a method described in "Drum
+Source Separation using Percussive Feature Detection and Spectral
+Modulation" by Dan Barry, Derry Fitzgerald, Eugene Coyle and Bob
+Lawlor, ISSC 2005.
+
+The principle is to exploit the broadband nature of noisy percussive
+onsets by identifying only those frames in which the energy rise shows
+a broadband profile.
+
+The plugin takes a series of frequency domain frames, and examines
+each frame to count the number of bins whose energy content has
+increased by more than a certain threshold since the prior frame.
+Frames in which this number is at a peak relative to prior and
+following frames and also exceeds another threshold value are
+classified as percussive onsets.
+
+Parameters
+----------
+
+Energy rise threshold (dB) - The rise in energy within a bin from one
+frame to the next that is required for a bin to be counted toward the
+detection function's bin count.  This roughly corresponds to how
+"loud" a percussive sound must be in order to be detected.
+
+Sensitivity (%) - The proportion of bins that must exceed the energy
+rise threshold in order for an onset to be detected (at frames in
+which the detection function peaks).  This roughly corresponds to how
+"noisy" a percussive sound must be in order to be detected.
+
+Outputs
+-------
+
+Onsets
+~~~~~~
+
+The estimated onset locations.
+
+Detection Function
+~~~~~~~~~~~~~~~~~~
+
+The energy rise detection function whose peaks were used to estimate
+onset locations.
+
+
+Simple Power Spectrum
+=====================
+
+System identifier: vamp-example-plugins:powerspectrum
+RDF URI: http://vamp-plugins.org/rdf/plugins/vamp-example-plugins#powerspectrum
+
+Simple Power Spectrum returns a power spectrum calculated from
+windowed short-time Fourier transforms of the input audio.  (The power
+spectrum for a frame consists of a sequence of the squares of the
+magnitudes of the complex values for each frequency bin in the result
+of the Fourier transform.)
+
+This very simple plugin is an illustration of the fact that if a
+plugin requests frequency-domain input, its input will already be in
+the form needed for a spectrum such as this.  The plugin has no work
+left to do except to calculate the squared magnitude from the
+cartesian complex representation.
+
+This plugin also illustrates how to return "grid-type" visualisation
+data from a Vamp plugin.
+
+Parameters
+----------
+
+None.
+
+Outputs
+-------
+
+Power Spectrum
+~~~~~~~~~~~~~~
+
+The power spectrum calculated from the input frame.  This output
+returns a single feature per processing block, containing
+blocksize/2+1 power values corresponding to the FFT bins from DC to
+Nyquist inclusive.  The DC bin is always returned.
+
+
+Spectral Centroid
+=================
+
+System identifier: vamp-example-plugins:spectralcentroid
+RDF URI: http://vamp-plugins.org/rdf/plugins/vamp-example-plugins#spectralcentroid
+
+Spectral Centroid calculates the "centre of gravity" of the frequency
+spectrum for each input frame.
+
+Parameters
+----------
+
+None.
+
+Outputs
+-------
+
+Log Frequency Centroid
+~~~~~~~~~~~~~~~~~~~~~~
+
+The centroid of the log-weighted frequency spectrum.  That is, the sum
+across Fourier transform output bins of the logarithm of the bin
+frequency multiplied by the bin magnitude, divided by the sum of the
+bin magnitudes, and the inverse logarithm taken so as to give the
+result as a frequency in Hz.
+
+Linear Frequency Centroid
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The centroid of the linear-weighted frequency spectrum.  That is, the
+sum across Fourier transform output bins of the bin frequency
+multiplied by the bin magnitude, divided by the sum of the bin
+magnitudes.  The result is a frequency in Hz.
+
+
+Zero Crossings
+==============
+
+System identifier: vamp-example-plugins:zerocrossing
+RDF URI: http://vamp-plugins.org/rdf/plugins/vamp-example-plugins#zerocrossing
+
+Zero Crossings calculates the positions and density of "zero-crossing"
+points in an audio waveform.  For the purposes of this plugin, that
+means those positions at which the sampled value switches from
+zero-or-less to greater-than-zero, or vice versa.
+
+Parameters
+----------
+
+None.
+
+Outputs
+-------
+
+Zero Crossing Counts
+~~~~~~~~~~~~~~~~~~~~
+
+The number of zero-crossing points found in the current block of
+samples, as a single-valued feature returned per processing block.
+
+Zero Crossings
+~~~~~~~~~~~~~~
+
+The locations of zero-crossing points, returning one feature
+timestamped to the zero-crossing location, without values, for each
+crossing point.
+