match-vamp: src/FeatureExtractor.h comparison

comparison src/FeatureExtractor.h @ 103:593054bf6476 feature_conditioner

Pull out normalisation and specdiff stuff into FeatureConditioner

author	Chris Cannam
date	Thu, 04 Dec 2014 13:05:16 +0000
parents	b9aa663a607b
children	3792bcd34470

comparison

equal deleted inserted replaced

-:6b91e40b2c04
+:593054bf6476
 * Convert frequency-domain audio frames into features suitable for
 * MATCH alignment calculation. The default feature is a warping of
 * the frequency data to map higher frequencies into a linear scale. A
 * chroma mapping is also available.
 *
-* Note that FeatureExtractor maintains internal frame-to-frame state:
+* Note that FeatureExtractor may maintain internal frame-to-frame
-* use one FeatureExtractor per audio source, and construct a new one
+* state: use one FeatureExtractor per audio source, and construct a
-* for each new source.
+* new one for each new source.
 */
 class FeatureExtractor
 {
 public:
-enum FrameNormalisation {
-/** Do not normalise frames */
-NoFrameNormalisation,
-/** Normalise each frame to have a sum of 1 */
-NormaliseFrameToSum1,
-/** Normalise each frame by the long-term average of the
-*  summed energy */
-NormaliseFrameToLTAverage,
-};
 struct Parameters {
 Parameters(float rate_, int fftSize_) :
 sampleRate(rate_),
-frameNorm(NormaliseFrameToSum1),
-useSpectralDifference(true),
 useChromaFrequencyMap(false),
-fftSize(fftSize_),
+fftSize(fftSize_)
-silenceThreshold(0.01),
-decay(0.99)
 {}
 /** Sample rate of audio */
 float sampleRate;
-/** Type of audio frame normalisation */
-FrameNormalisation frameNorm;
-/** Flag indicating whether or not the half-wave rectified
-*  spectral difference should be used in calculating the
-*  distance metric for pairs of audio frames, instead of the
-*  straight spectrum values. */
-bool useSpectralDifference;
 /** Flag indicating whether to use a chroma frequency map (12
 *  bins) instead of the default warped spectrogram */
 bool useChromaFrequencyMap;
 /** Size of an FFT frame in samples. Note that the data passed
 *  in is already in the frequency domain, so this expresses
 *  the size of the frame that the caller will be providing. */
 int fftSize;
-/** RMS level below which frame is considered silent */
-double silenceThreshold;
-/** Frame-to-frame decay factor in calculating long-term average */
-double decay;
 };
 /**
 * Construct a FeatureExtractor with the given parameters.
 *
 * imaginary components from the FFT output). Return a feature
 * vector of size given by getFeatureSize(). Input vectors must
 * have at least params.fftSize/2+1 elements each.
 *
 * Operates by mapping the frequency bins into a part-linear
-* part-logarithmic array, then (optionally) computing the
+* part-logarithmic array, unless useChromaFrequencyMap is true in
-* half-wave rectified spectral difference from the previous
+* which case they are mapped into chroma bins.
-* frame, then (optionally) normalising to a sum of 1.
-*
-* Return value is the frame (post-processed, with warping,
-* rectification, and normalisation as appropriate).
 */
 std::vector<double> process(const std::vector<double> &real,
 const std::vector<double> &imag);
 /**
 * Process one frequency-domain audio frame, provided as a single
 * array of alternating real and imaginary components. Input array
 * must have at least 2 * (params.fftSize/2 + 1) elements.
 *
 * Operates by mapping the frequency bins into a part-linear
-* part-logarithmic array, then (optionally) computing the
+* part-logarithmic array, unless useChromaFrequencyMap is true in
-* half-wave rectified spectral difference from the previous
+* which case they are mapped into chroma bins.
-* frame, then (optionally) normalising to a sum of 1.
-*
-* Return value is the frame (post-processed, with warping,
-* rectification, and normalisation as appropriate).
 */
 std::vector<double> process(const float *carray);
 protected:
 /** Make either standard or chroma map, depending on m_params */
 void makeStandardFrequencyMap();
 /** Creates a map of FFT frequency bins to semitone chroma bins. */
 void makeChromaFrequencyMap();
-std::vector<double> postProcess(const std::vector<double> &, double rms);
 /** Configuration parameters */
 Parameters m_params;
-/** Long term average frame energy (in frequency domain
-*  representation). */
-double m_ltAverage;
 /** A mapping function for mapping FFT bins to final frequency
 *  bins.  The mapping is linear (1-1) until the resolution
 *  reaches 2 points per semitone, then logarithmic with a
 *  semitone resolution.  e.g. for 44.1kHz sampling rate and
 *  bin. */
 std::vector<int> m_freqMap;
 /** The size of a returned feature. */
 int m_featureSize;
-/** The most recent frame; used for calculating the frame to frame
-*  spectral difference. This is therefore frequency warped but
-*  not yet normalised. */
-std::vector<double> m_prevFrame;
 };
 #endif

Mercurial > hg > match-vamp

comparison src/FeatureExtractor.h @ 103:593054bf6476 feature_conditioner