diff src/Matcher.h @ 38:8cce4e13ede3 refactors

Make use of FeatureExtractor in Matcher
author Chris Cannam
date Thu, 13 Nov 2014 12:50:54 +0000
parents 16870e8770ae
children 9aec2304b9f6
line wrap: on
line diff
--- a/src/Matcher.h	Thu Nov 13 12:03:52 2014 +0000
+++ b/src/Matcher.h	Thu Nov 13 12:50:54 2014 +0000
@@ -28,6 +28,7 @@
 #define MASK 0xfc
 
 #include "DistanceMetric.h"
+#include "FeatureExtractor.h"
 
 using std::vector;
 using std::string;
@@ -43,42 +44,21 @@
 class Matcher
 {
 public:
-    enum FrameNormalisation {
-
-        /** Do not normalise audio frames */
-        NoFrameNormalisation,
-        
-        /** Normalise each frame of audio to have a sum of 1 */
-        NormaliseFrameToSum1,
-        
-        /** Normalise each frame of audio by the long-term average
-         *  of the summed energy */
-        NormaliseFrameToLTAverage,
-    };
-
     struct Parameters {
 
         Parameters(float rate_, double hopTime_, int fftSize_) :
             sampleRate(rate_),
-            frameNorm(NormaliseFrameToSum1),
             distanceNorm(DistanceMetric::NormaliseDistanceToLogSum),
             distanceScale(90.0),
-            useSpectralDifference(true),
-            useChromaFrequencyMap(false),
             hopTime(hopTime_),
             fftSize(fftSize_),
             blockTime(10.0),
-            silenceThreshold(0.01),
-            decay(0.99),
             maxRunCount(3)
         {}
 
         /** Sample rate of audio */
         float sampleRate;
 
-        /** Type of audio frame normalisation */
-        FrameNormalisation frameNorm;
-
         /** Type of distance metric normalisation */
         DistanceMetric::DistanceNormalisation distanceNorm;
 
@@ -88,40 +68,23 @@
          */
         double distanceScale;
 
-        /** Flag indicating whether or not the half-wave rectified
-         *  spectral difference should be used in calculating the
-         *  distance metric for pairs of audio frames, instead of the
-         *  straight spectrum values. */
-        bool useSpectralDifference;
-
-        /** Flag indicating whether to use a chroma frequency map (12
-         *  bins) instead of the default warped spectrogram */
-        bool useChromaFrequencyMap;
-
         /** Spacing of audio frames (determines the amount of overlap or
          *  skip between frames). This value is expressed in
          *  seconds. */
         double hopTime;
-
+        
         /** Size of an FFT frame in samples. Note that the data passed
          *  in to Matcher is already in the frequency domain, so this
          *  expresses the size of the frame that the caller will be
-         *  providing.
-         */
+         *  providing. */
         int fftSize;
-
+        
         /** The width of the search band (error margin) around the current
          *  match position, measured in seconds. Strictly speaking the
          *  width is measured backwards from the current point, since the
          *  algorithm has to work causally.
          */
         double blockTime;
-        
-        /** RMS level below which frame is considered silent */
-        double silenceThreshold;
-
-        /** Frame-to-frame decay factor in calculating long-term average */
-        double decay;
 
         /** Maximum number of frames sequentially processed by this
          *  matcher, without a frame of the other matcher being
@@ -154,49 +117,14 @@
     /** The number of frames of audio data which have been read. */
     int frameCount;
 
-    /** Long term average frame energy (in frequency domain
-     *  representation). */
-    double ltAverage;
-
     /** The number of frames sequentially processed by this matcher,
      *  without a frame of the other matcher being processed.
      */
     int runCount;
 
-    /** A mapping function for mapping FFT bins to final frequency
-     *  bins.  The mapping is linear (1-1) until the resolution
-     *  reaches 2 points per semitone, then logarithmic with a
-     *  semitone resolution.  e.g. for 44.1kHz sampling rate and
-     *  fftSize of 2048 (46ms), bin spacing is 21.5Hz, which is mapped
-     *  linearly for bins 0-34 (0 to 732Hz), and logarithmically for
-     *  the remaining bins (midi notes 79 to 127, bins 35 to 83),
-     *  where all energy above note 127 is mapped into the final
-     *  bin. */
-    vector<int> freqMap;
-
-    /** The number of entries in <code>freqMap</code>. */
-    int freqMapSize;
-
-    /** The number of values in an externally-supplied feature vector,
-     *  used in preference to freqMap/freqMapSize if constructed with
-     *  the external feature version of the Matcher constructor. If
-     *  this is zero, the internal feature extractor will be used as
-     *  normal.
-     */
-    int externalFeatureSize;
-
-    /** The number of values in the feature vectors actually in
-     *  use. This will be externalFeatureSize if greater than zero, or
-     *  freqMapSize otherwise.
-     */
+    /** The number of values in a feature vector. */
     int featureSize;
 
-    /** The most recent frame; used for calculating the frame to frame
-     *  spectral difference. These are therefore frequency warped but
-     *  not yet normalised. */
-    vector<double> prevFrame;
-    vector<double> newFrame;
-
     /** A block of previously seen frames are stored in this structure
      *  for calculation of the distance matrix as the new frames are
      *  read in.  One can think of the structure of the array as a
@@ -206,21 +134,18 @@
      *  energy of frames[i] is stored in totalEnergies[i]. */
     vector<vector<double> > frames;
 
-    /** The total energy of each frame in the frames block. */ 
-    vector<double> totalEnergies;
-
     /** The best path cost matrix. */
-    int **bestPathCost;
+    vector<vector<int> > bestPathCost;
 
     /** The distance matrix. */
-    unsigned char **distance;
+    vector<vector<unsigned char> > distance;
 
     /** The bounds of each row of data in the distance and path cost matrices.*/
-    int *first;
-    int *last;
+    vector<int> first;
+    vector<int> last;
 
     /** Height of each column in distance and bestPathCost matrices */
-    int *distYSizes;
+    vector<int> distYSizes;
 
     /** Width of distance and bestPathCost matrices and first and last vectors */
     int  distXSize;
@@ -238,7 +163,9 @@
      *  between the two matchers (currently one possesses the distance
      *  matrix and optimal path matrix).
      */
-    Matcher(Parameters parameters, Matcher *p);
+    Matcher(Parameters parameters,
+            FeatureExtractor::Parameters featureParams,
+            Matcher *p);
 
     /** Constructor for Matcher using externally supplied features.
      *  A Matcher made using this constructor will not carry out its
@@ -275,58 +202,18 @@
         return frameCount;
     }
 
-    /**
-     * Return the feature vector size that will be used for the given
-     * parameters.
-     */
-    static int getFeatureSizeFor(Parameters params);
-
 protected:
-    template <typename T>
-    void initVector(vector<T> &vec, int sz, T dflt = 0) {
-        vec.clear();
-        while ((int)vec.size() < sz) vec.push_back(dflt);
-    }
-
-    template <typename T>
-    void initMatrix(vector<vector<T> > &mat, int hsz, int vsz,
-                    T dflt = 0, int fillTo = -1) {
-        mat.clear();
-        if (fillTo < 0) fillTo = hsz;
-        for (int i = 0; i < hsz; ++i) {
-            mat.push_back(vector<T>());
-            if (i < fillTo) {
-                while ((int)mat[i].size() < vsz) {
-                    mat[i].push_back(dflt);
-                }
-            }
-        }
-    }
-
+    /** Create internal structures and reset. */
     void init();
 
-    void makeFreqMap();
+    /** The distXSize value has changed: resize internal buffers. */
+    void expand();
 
-    /** Creates a map of FFT frequency bins to comparison bins.  Where
-     *  the spacing of FFT bins is less than 0.5 semitones, the
-     *  mapping is one to one. Where the spacing is greater than 0.5
-     *  semitones, the FFT energy is mapped into semitone-wide
-     *  bins. No scaling is performed; that is the energy is summed
-     *  into the comparison bins. See also consumeFrame()
-     */
-    void makeStandardFrequencyMap();
-
-    void makeChromaFrequencyMap();
-
-    /** Processes a frame of audio data by first computing the STFT
-     *  with a Hamming window, then mapping the frequency bins into a
-     *  part-linear part-logarithmic array, then (optionally)
-     *  computing the half-wave rectified spectral difference from the
-     *  previous frame, then (optionally) normalising to a sum of 1,
-     *  then calculating the distance to all frames stored in the
-     *  otherMatcher and storing them in the distance matrix, and
-     *  finally updating the optimal path matrix using the dynamic
-     *  time warping algorithm.
+    /** Process a frequency-domain frame of audio data using the
+     *  built-in FeatureExtractor, then calculating the distance to
+     *  all frames stored in the otherMatcher and storing them in the
+     *  distance matrix, and finally updating the optimal path matrix
+     *  using the dynamic time warping algorithm.
      *
      *  Return value is the frame (post-processed, with warping,
      *  rectification, and normalisation as appropriate).
@@ -369,9 +256,9 @@
      */
     void setValue(int i, int j, int dir, int value, int dMN);
 
-    vector<double> processFrameFromFreqData(double *, double *);
     void calcAdvance();
 
+    FeatureExtractor featureExtractor;
     DistanceMetric metric;
     
     friend class MatchFeeder;