diff src/FeatureExtractor.h @ 103:593054bf6476 feature_conditioner

Pull out normalisation and specdiff stuff into FeatureConditioner
author Chris Cannam
date Thu, 04 Dec 2014 13:05:16 +0000
parents b9aa663a607b
children 3792bcd34470
line wrap: on
line diff
--- a/src/FeatureExtractor.h	Thu Nov 27 16:50:14 2014 +0000
+++ b/src/FeatureExtractor.h	Thu Dec 04 13:05:16 2014 +0000
@@ -25,50 +25,24 @@
  * the frequency data to map higher frequencies into a linear scale. A
  * chroma mapping is also available.
  *
- * Note that FeatureExtractor maintains internal frame-to-frame state:
- * use one FeatureExtractor per audio source, and construct a new one
- * for each new source.
+ * Note that FeatureExtractor may maintain internal frame-to-frame
+ * state: use one FeatureExtractor per audio source, and construct a
+ * new one for each new source.
  */
 class FeatureExtractor
 {
 public:
-    enum FrameNormalisation {
-
-        /** Do not normalise frames */
-        NoFrameNormalisation,
-        
-        /** Normalise each frame to have a sum of 1 */
-        NormaliseFrameToSum1,
-        
-        /** Normalise each frame by the long-term average of the
-         *  summed energy */
-        NormaliseFrameToLTAverage,
-    };
-
     struct Parameters {
 
         Parameters(float rate_, int fftSize_) :
             sampleRate(rate_),
-            frameNorm(NormaliseFrameToSum1),
-            useSpectralDifference(true),
             useChromaFrequencyMap(false),
-            fftSize(fftSize_),
-            silenceThreshold(0.01),
-            decay(0.99)
+            fftSize(fftSize_)
         {}
 
         /** Sample rate of audio */
         float sampleRate;
 
-        /** Type of audio frame normalisation */
-        FrameNormalisation frameNorm;
-
-        /** Flag indicating whether or not the half-wave rectified
-         *  spectral difference should be used in calculating the
-         *  distance metric for pairs of audio frames, instead of the
-         *  straight spectrum values. */
-        bool useSpectralDifference;
-
         /** Flag indicating whether to use a chroma frequency map (12
          *  bins) instead of the default warped spectrogram */
         bool useChromaFrequencyMap;
@@ -82,12 +56,6 @@
          *  in is already in the frequency domain, so this expresses
          *  the size of the frame that the caller will be providing. */
         int fftSize;
-
-        /** RMS level below which frame is considered silent */
-        double silenceThreshold;
-
-        /** Frame-to-frame decay factor in calculating long-term average */
-        double decay;
     };
 
     /**
@@ -117,12 +85,8 @@
      * have at least params.fftSize/2+1 elements each.
      *
      * Operates by mapping the frequency bins into a part-linear
-     * part-logarithmic array, then (optionally) computing the
-     * half-wave rectified spectral difference from the previous
-     * frame, then (optionally) normalising to a sum of 1.
-     *
-     * Return value is the frame (post-processed, with warping,
-     * rectification, and normalisation as appropriate).
+     * part-logarithmic array, unless useChromaFrequencyMap is true in
+     * which case they are mapped into chroma bins.
      */
     std::vector<double> process(const std::vector<double> &real,
                                 const std::vector<double> &imag);
@@ -133,12 +97,8 @@
      * must have at least 2 * (params.fftSize/2 + 1) elements.
      *
      * Operates by mapping the frequency bins into a part-linear
-     * part-logarithmic array, then (optionally) computing the
-     * half-wave rectified spectral difference from the previous
-     * frame, then (optionally) normalising to a sum of 1.
-     *
-     * Return value is the frame (post-processed, with warping,
-     * rectification, and normalisation as appropriate).
+     * part-logarithmic array, unless useChromaFrequencyMap is true in
+     * which case they are mapped into chroma bins.
      */
     std::vector<double> process(const float *carray);
     
@@ -157,14 +117,8 @@
     /** Creates a map of FFT frequency bins to semitone chroma bins. */
     void makeChromaFrequencyMap();
 
-    std::vector<double> postProcess(const std::vector<double> &, double rms);
-    
     /** Configuration parameters */
     Parameters m_params;
-    
-    /** Long term average frame energy (in frequency domain
-     *  representation). */
-    double m_ltAverage;
 
     /** A mapping function for mapping FFT bins to final frequency
      *  bins.  The mapping is linear (1-1) until the resolution
@@ -179,11 +133,6 @@
 
     /** The size of a returned feature. */
     int m_featureSize;
-
-    /** The most recent frame; used for calculating the frame to frame
-     *  spectral difference. This is therefore frequency warped but
-     *  not yet normalised. */
-    std::vector<double> m_prevFrame;
 };
 
 #endif