Mercurial > hg > vamp-plugin-sdk
changeset 255:88ef5ffdbe8d
* docs
author | cannam |
---|---|
date | Wed, 12 Nov 2008 14:11:01 +0000 |
parents | e02c93c4de8f |
children | 3d98dd2ba0d6 |
files | examples/FixedTempoEstimator.cpp examples/PowerSpectrum.cpp examples/SpectralCentroid.cpp examples/ZeroCrossing.cpp examples/vamp-example-plugins.txt |
diffstat | 5 files changed, 363 insertions(+), 32 deletions(-) [+] |
line wrap: on
line diff
--- a/examples/FixedTempoEstimator.cpp Wed Nov 12 10:39:05 2008 +0000 +++ b/examples/FixedTempoEstimator.cpp Wed Nov 12 14:11:01 2008 +0000 @@ -47,6 +47,7 @@ class FixedTempoEstimator::D +// this class just avoids us having to declare any data members in the header { public: D(float inputSampleRate); @@ -314,16 +315,22 @@ m_lasttime = ts; if (m_n == m_dfsize) { + // If we have seen enough input, do the estimation and return calculate(); fs = assembleFeatures(); ++m_n; return fs; } + // If we have seen more than enough, just discard and return! if (m_n > m_dfsize) return FeatureSet(); float value = 0.f; + // m_df will contain an onset detection function based on the rise + // in overall power from one spectral frame to the next -- + // simplistic but reasonably effective for our purposes. + for (size_t i = 1; i < m_blockSize/2; ++i) { float real = inputBuffers[0][i*2]; @@ -378,18 +385,25 @@ return; } - int n = m_n; + // This function takes m_df (the detection function array filled + // out in process()) and calculates m_r (the raw autocorrelation) + // and m_fr (the filtered autocorrelation from whose peaks tempo + // estimates will be taken). - m_r = new float[n/2]; - m_fr = new float[n/2]; - m_t = new float[n/2]; + int n = m_n; // length of actual df array (m_dfsize is the theoretical max) + + m_r = new float[n/2]; // raw autocorrelation + m_fr = new float[n/2]; // filtered autocorrelation + m_t = new float[n/2]; // averaged tempo estimate for each lag value for (int i = 0; i < n/2; ++i) { - m_r[i] = 0.f; + m_r[i] = 0.f; m_fr[i] = 0.f; - m_t[i] = lag2tempo(i); + m_t[i] = lag2tempo(i); } + // Calculate the raw autocorrelation of the detection function + for (int i = 0; i < n/2; ++i) { for (int j = i; j < n-1; ++j) { @@ -399,20 +413,20 @@ m_r[i] /= n - i - 1; } + // Filter the autocorrelation and average out the tempo estimates + float related[] = { 0.5, 2, 4, 8 }; for (int i = 1; i < n/2-1; ++i) { - float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005; - if (weight < 0.f) weight = 0.f; - weight = weight * weight * weight; - m_fr[i] = m_r[i]; int div = 1; for (int j = 0; j < int(sizeof(related)/sizeof(related[0])); ++j) { + // Check for an obvious peak at each metrically related lag + int k0 = int(i * related[j] + 0.5); if (k0 >= 0 && k0 < int(n/2)) { @@ -431,11 +445,18 @@ have = true; } + // Boost the original lag according to the strongest + // value found close to this related lag + m_fr[i] += m_r[kmax] / 5; if ((kmax == 0 || m_r[kmax] > m_r[kmax-1]) && (kmax == n/2-1 || m_r[kmax] > m_r[kmax+1]) && kvmax > kvmin * 1.05) { + + // The strongest value close to the related lag is + // also a pretty good looking peak, so use it to + // improve our tempo estimate for the original lag m_t[i] = m_t[i] + lag2tempo(kmax) * related[j]; ++div; @@ -445,6 +466,13 @@ m_t[i] /= div; + // Finally apply a primitive perceptual weighting (to prefer + // tempi of around 120-130) + + float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005; + if (weight < 0.f) weight = 0.f; + weight = weight * weight * weight; + m_fr[i] += m_fr[i] * (weight / 3); } } @@ -453,7 +481,7 @@ FixedTempoEstimator::D::assembleFeatures() { FeatureSet fs; - if (!m_r) return fs; // No results + if (!m_r) return fs; // No autocorrelation: no results Feature feature; feature.hasTimestamp = true; @@ -467,6 +495,9 @@ int n = m_n; for (int i = 0; i < n; ++i) { + + // Return the detection function in the DF output + feature.timestamp = m_start + RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate); feature.values[0] = m_df[i]; @@ -475,6 +506,10 @@ } for (int i = 1; i < n/2; ++i) { + + // Return the raw autocorrelation in the ACF output, each + // value labelled according to its corresponding tempo + feature.timestamp = m_start + RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate); feature.values[0] = m_r[i]; @@ -496,9 +531,16 @@ if (m_fr[i] > m_fr[i-1] && m_fr[i] > m_fr[i+1]) { + + // This is a peak in the filtered autocorrelation: stick + // it into the map from filtered autocorrelation to lag + // index -- this sorts our peaks by filtered acf value + candidates[m_fr[i]] = i; } + // Also return the filtered autocorrelation in its own output + feature.timestamp = m_start + RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate); feature.values[0] = m_fr[i]; @@ -519,15 +561,25 @@ feature.hasDuration = true; feature.duration = m_lasttime - m_start; + // The map contains only peaks and is sorted by filtered acf + // value, so the final element in it is our "best" tempo guess + std::map<float, int>::const_iterator ci = candidates.end(); --ci; int maxpi = ci->second; if (m_t[maxpi] > 0) { - cerr << "*** Using adjusted tempo " << m_t[maxpi] << " instead of lag tempo " << lag2tempo(maxpi) << endl; + + // This lag has an adjusted tempo from the averaging process: + // use it + feature.values[0] = m_t[maxpi]; + } else { - // shouldn't happen -- it would imply that this high value was not a peak! + + // shouldn't happen -- it would imply that this high value was + // not a peak! + feature.values[0] = lag2tempo(maxpi); cerr << "WARNING: No stored tempo for index " << maxpi << endl; } @@ -535,12 +587,17 @@ sprintf(buffer, "%.1f bpm", feature.values[0]); feature.label = buffer; + // Return the best tempo in the main output + fs[TempoOutput].push_back(feature); + // And return the other estimates (up to the arbitrarily chosen + // number of 10 of them) in the candidates output + feature.values.clear(); feature.label = ""; - while (feature.values.size() < 8) { + while (feature.values.size() < 10) { if (m_t[ci->second] > 0) { feature.values.push_back(m_t[ci->second]); } else {
--- a/examples/PowerSpectrum.cpp Wed Nov 12 10:39:05 2008 +0000 +++ b/examples/PowerSpectrum.cpp Wed Nov 12 14:11:01 2008 +0000 @@ -115,7 +115,15 @@ d.description = "Power values of the frequency spectrum bins calculated from the input signal"; d.unit = ""; d.hasFixedBinCount = true; - d.binCount = m_blockSize / 2 + 1; + if (m_blockSize == 0) { + // Just so as not to return "1". This is the bin count that + // would result from a block size of 1024, which is a likely + // default -- but the host should always set the block size + // before querying the bin count for certain. + d.binCount = 513; + } else { + d.binCount = m_blockSize / 2 + 1; + } d.hasKnownExtents = false; d.isQuantized = false; d.sampleType = OutputDescriptor::OneSamplePerStep;
--- a/examples/SpectralCentroid.cpp Wed Nov 12 10:39:05 2008 +0000 +++ b/examples/SpectralCentroid.cpp Wed Nov 12 14:11:01 2008 +0000 @@ -137,8 +137,6 @@ return list; } -//static int scount = 0; - SpectralCentroid::FeatureSet SpectralCentroid::process(const float *const *inputBuffers, Vamp::RealTime timestamp) { @@ -149,8 +147,6 @@ return FeatureSet(); } -// std::cerr << "SpectralCentroid::process: count = " << scount++ << ", timestamp = " << timestamp << ", total power = "; - double numLin = 0.0, numLog = 0.0, denom = 0.0; for (size_t i = 1; i <= m_blockSize/2; ++i) { @@ -163,8 +159,6 @@ denom += scalemag; } -// std::cerr << denom << std::endl; - FeatureSet returnFeatures; if (denom != 0.0) {
--- a/examples/ZeroCrossing.cpp Wed Nov 12 10:39:05 2008 +0000 +++ b/examples/ZeroCrossing.cpp Wed Nov 12 14:11:01 2008 +0000 @@ -138,8 +138,6 @@ return list; } -//static int scount = 0; - ZeroCrossing::FeatureSet ZeroCrossing::process(const float *const *inputBuffers, Vamp::RealTime timestamp) @@ -151,15 +149,11 @@ return FeatureSet(); } -// std::cerr << "ZeroCrossing::process: count = " << scount++ << ", timestamp = " << timestamp << ", rms = "; - float prev = m_previousSample; size_t count = 0; FeatureSet returnFeatures; -// double acc = 0.0; - for (size_t i = 0; i < m_stepSize; ++i) { float sample = inputBuffers[0][i]; @@ -180,14 +174,9 @@ returnFeatures[1].push_back(feature); } -// acc += sample * sample; - prev = sample; } -// acc /= m_stepSize; -// std::cerr << sqrt(acc) << std::endl; - m_previousSample = prev; Feature feature;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/vamp-example-plugins.txt Wed Nov 12 14:11:01 2008 +0000 @@ -0,0 +1,283 @@ + +Vamp Example Plugins +==================== + +The vamp-example-plugins library contains a number of Vamp audio +analysis plugins provided as part of the Vamp plugin SDK. + +These are simple, but sometimes useful, plugins whose source code you +are free to study and reuse in any proprietary or non-proprietary +plugins of your own without any licensing obligation. + +User documentation for the individual plugins in this library follows. + + +Amplitude Follower +================== + +System identifier: vamp-example-plugins:amplitudefollower +RDF URI: http://vamp-plugins.org/rdf/plugins/vamp-example-plugins#amplitudefollower + +Amplitude Follower tracks and returns the amplitude of the audio +signal, block by block. It uses a method from the SuperCollider audio +processing language, implemented as a Vamp plugin by Dan Stowell. + +Parameters +---------- + +Attack time (seconds) +Release time (seconds) + +Outputs +------- + +Amplitude +~~~~~~~~~ + +The estimated peak amplitude (in volts) for the current processing block. + + +Simple Fixed Tempo Estimator +============================ + +System identifier: vamp-example-plugins:fixedtempo +RDF URI: http://vamp-plugins.org/rdf/plugins/vamp-example-plugins#fixedtempo + +Simple Fixed Tempo Estimator analyses a fragment of audio and +estimates its tempo. It assumes that its input is of fixed tempo, and +it analyses only the first (small but configurable number of) seconds +before returning a result, discarding all subsequent input. + +The plugin calculates an overall energy rise function across a series +of short frequency-domain input frames, takes the autocorrelation of +this function, filters it to stress possible metrical patterns, +locates peaks, and converts from autocorrelation lag to the +corresponding tempo. + +The filtering process involves searching for peaks at simple +metrically related intervals (at a given autocorrelation lag as well +as at 0.5, 2, and 4 times that lag), boosting each peak that shows +strong related peaks. A simplistic perceptual curve is also applied +in order to increase the probability of detecting a "likely" tempo. +For improved tempo precision, each tempo with strong related peaks is +averaged with the tempi calculated from those peaks. + +The method is mainly tuned for 4/4 pop and dance rhythms. + +This plugin returns many of its intermediate calculations as +additional outputs, as well as the most favoured tempo. Although as a +tempo estimator it's still fairly primitive, it is intended to provide +a useful example of a slightly more complex feature extraction plugin +than the other examples, as well as one that returns several different +types of output at a time. + +Parameters +---------- + +Minimum estimated tempo, Maximum estimated tempo (bpm) - These +parameters control the range of values within which the tempo +estimator will return its estimate. + +Input duration to study (seconds) - The tempo estimator uses only the +first part of its input, discarding any that follows. This parameter +controls how much input it will use. There is no value in increasing +this beyond 8x the duration of the slowest returned beat. The default +of 10 seconds is likely to be appropriate for most purposes. + +Outputs +------- + +Tempo +~~~~~ + +The tempo estimator's best guess at the tempo of its input, in beats +per minute. + +This is returned as a feature whose timestamp and duration cover the +range of the input which was used in estimating the tempo, with a +single value containing the tempo. + +Tempo candidates +~~~~~~~~~~~~~~~~ + +Several guesses at the possible tempo. This output is returned as a +single feature whose timestamp and duration cover the range of the +input which was used in estimating the tempo, with up to 10 bins +containing one tempo value in each bin, with the "best guess" tempo in +bin 0. + +Detection function +~~~~~~~~~~~~~~~~~~ + +The basic onset detection function used in tempo estimation. + +Autocorrelation function +~~~~~~~~~~~~~~~~~~~~~~~~ + +The autocorrelation of the onset detection function. + +Filtered Autocorrelation +~~~~~~~~~~~~~~~~~~~~~~~~ + +The autocorrelation after filtering to boost values with possible +metrically related peaks and to apply perceptual weighting. The peak +value of this function is the one that will be used as the "best +guess". + + +Simple Percussion Onset Detector +================================ + +System identifier: vamp-example-plugins:percussiononsets +RDF URI: http://vamp-plugins.org/rdf/plugins/vamp-example-plugins#percussiononsets + +Simple Percussion Onset Detector estimates the locations of percussive +onsets in the audio signal. It uses a method described in "Drum +Source Separation using Percussive Feature Detection and Spectral +Modulation" by Dan Barry, Derry Fitzgerald, Eugene Coyle and Bob +Lawlor, ISSC 2005. + +The principle is to exploit the broadband nature of noisy percussive +onsets by identifying only those frames in which the energy rise shows +a broadband profile. + +The plugin takes a series of frequency domain frames, and examines +each frame to count the number of bins whose energy content has +increased by more than a certain threshold since the prior frame. +Frames in which this number is at a peak relative to prior and +following frames and also exceeds another threshold value are +classified as percussive onsets. + +Parameters +---------- + +Energy rise threshold (dB) - The rise in energy within a bin from one +frame to the next that is required for a bin to be counted toward the +detection function's bin count. This roughly corresponds to how +"loud" a percussive sound must be in order to be detected. + +Sensitivity (%) - The proportion of bins that must exceed the energy +rise threshold in order for an onset to be detected (at frames in +which the detection function peaks). This roughly corresponds to how +"noisy" a percussive sound must be in order to be detected. + +Outputs +------- + +Onsets +~~~~~~ + +The estimated onset locations. + +Detection Function +~~~~~~~~~~~~~~~~~~ + +The energy rise detection function whose peaks were used to estimate +onset locations. + + +Simple Power Spectrum +===================== + +System identifier: vamp-example-plugins:powerspectrum +RDF URI: http://vamp-plugins.org/rdf/plugins/vamp-example-plugins#powerspectrum + +Simple Power Spectrum returns a power spectrum calculated from +windowed short-time Fourier transforms of the input audio. (The power +spectrum for a frame consists of a sequence of the squares of the +magnitudes of the complex values for each frequency bin in the result +of the Fourier transform.) + +This very simple plugin is an illustration of the fact that if a +plugin requests frequency-domain input, its input will already be in +the form needed for a spectrum such as this. The plugin has no work +left to do except to calculate the squared magnitude from the +cartesian complex representation. + +This plugin also illustrates how to return "grid-type" visualisation +data from a Vamp plugin. + +Parameters +---------- + +None. + +Outputs +------- + +Power Spectrum +~~~~~~~~~~~~~~ + +The power spectrum calculated from the input frame. This output +returns a single feature per processing block, containing +blocksize/2+1 power values corresponding to the FFT bins from DC to +Nyquist inclusive. The DC bin is always returned. + + +Spectral Centroid +================= + +System identifier: vamp-example-plugins:spectralcentroid +RDF URI: http://vamp-plugins.org/rdf/plugins/vamp-example-plugins#spectralcentroid + +Spectral Centroid calculates the "centre of gravity" of the frequency +spectrum for each input frame. + +Parameters +---------- + +None. + +Outputs +------- + +Log Frequency Centroid +~~~~~~~~~~~~~~~~~~~~~~ + +The centroid of the log-weighted frequency spectrum. That is, the sum +across Fourier transform output bins of the logarithm of the bin +frequency multiplied by the bin magnitude, divided by the sum of the +bin magnitudes, and the inverse logarithm taken so as to give the +result as a frequency in Hz. + +Linear Frequency Centroid +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The centroid of the linear-weighted frequency spectrum. That is, the +sum across Fourier transform output bins of the bin frequency +multiplied by the bin magnitude, divided by the sum of the bin +magnitudes. The result is a frequency in Hz. + + +Zero Crossings +============== + +System identifier: vamp-example-plugins:zerocrossing +RDF URI: http://vamp-plugins.org/rdf/plugins/vamp-example-plugins#zerocrossing + +Zero Crossings calculates the positions and density of "zero-crossing" +points in an audio waveform. For the purposes of this plugin, that +means those positions at which the sampled value switches from +zero-or-less to greater-than-zero, or vice versa. + +Parameters +---------- + +None. + +Outputs +------- + +Zero Crossing Counts +~~~~~~~~~~~~~~~~~~~~ + +The number of zero-crossing points found in the current block of +samples, as a single-valued feature returned per processing block. + +Zero Crossings +~~~~~~~~~~~~~~ + +The locations of zero-crossing points, returning one feature +timestamped to the zero-crossing location, without values, for each +crossing point. +