diff Matcher.h @ 15:a82276091bbd

Pull out Matcher parameters into an object
author Chris Cannam
date Fri, 10 Oct 2014 12:55:05 +0100
parents cdead4a52755
children 4c8526c5bf58
line wrap: on
line diff
--- a/Matcher.h	Fri Oct 10 12:04:54 2014 +0100
+++ b/Matcher.h	Fri Oct 10 12:55:05 2014 +0100
@@ -39,9 +39,103 @@
  *  on the FFT data with the higher frequencies mapped onto a linear
  *  scale.
  */
-
 class Matcher
 {
+public:
+    enum FrameNormalisation {
+
+        /** Do not normalise audio frames */
+        NoFrameNormalisation,
+        
+        /** Normalise each frame of audio to have a sum of 1 */
+        NormaliseFrameToSum1,
+        
+        /** Normalise each frame of audio by the long-term average
+         *  of the summed energy */
+        NormaliseFrameToLTAverage,
+    };
+
+    enum DistanceNormalisation {
+            
+        /** Do not normalise distance metrics */
+        NoDistanceNormalisation,
+
+        /** Normalise distance metric for pairs of audio frames by
+         *  the sum of the two frames. */
+        NormaliseDistanceToSum,
+
+        /** Normalise distance metric for pairs of audio frames by
+         *  the log of the sum of the frames. */
+        NormaliseDistanceToLogSum,
+    };
+
+    struct Parameters {
+
+        Parameters(float rate_, double hopTime_, int fftSize_) :
+            sampleRate(rate_),
+            frameNorm(NormaliseFrameToSum1),
+            distanceNorm(NormaliseDistanceToLogSum),
+            useSpectralDifference(true),
+            useChromaFrequencyMap(false),
+            hopTime(hopTime_),
+            fftSize(fftSize_),
+            blockTime(10.0),
+            silenceThreshold(0.01),
+            decay(0.99),
+            maxRunCount(3)
+        {}
+
+        /** Sample rate of audio */
+        float sampleRate;
+
+        /** Type of audio frame normalisation */
+        FrameNormalisation frameNorm;
+
+        /** Type of distance metric normalisation */
+        DistanceNormalisation distanceNorm;
+
+        /** Flag indicating whether or not the half-wave rectified
+         *  spectral difference should be used in calculating the
+         *  distance metric for pairs of audio frames, instead of the
+         *  straight spectrum values. */
+        bool useSpectralDifference;
+
+        /** Flag indicating whether to use a chroma frequency map (12
+         *  bins) instead of the default warped spectrogram */
+        bool useChromaFrequencyMap;
+
+        /** Spacing of audio frames (determines the amount of overlap or
+         *  skip between frames). This value is expressed in
+         *  seconds. */
+        double hopTime;
+
+        /** Size of an FFT frame in samples. Note that the data passed
+         *  in to Matcher is already in the frequency domain, so this
+         *  expresses the size of the frame that the caller will be
+         *  providing.
+         */
+        int fftSize;
+
+        /** The width of the search band (error margin) around the current
+         *  match position, measured in seconds. Strictly speaking the
+         *  width is measured backwards from the current point, since the
+         *  algorithm has to work causally.
+         */
+        double blockTime;
+        
+        /** RMS level below which frame is considered silent */
+        double silenceThreshold;
+
+        /** Frame-to-frame decay factor in calculating long-term average */
+        double decay;
+
+        /** Maximum number of frames sequentially processed by this
+         *  matcher, without a frame of the other matcher being
+         *  processed.
+         */
+        int maxRunCount;
+    };
+
 protected:
     /** Points to the other performance with which this one is being
      *  compared.  The data for the distance metric and the dynamic
@@ -57,80 +151,21 @@
      *  DTW steps. */
     bool firstPM;
 
-    /** Sample rate of audio */
-    float sampleRate;
-
-    /** Onset time of the first note in the audio file, in order to
-     *  establish synchronisation between the match file and the audio
-     *  data. */
-    double matchFileOffset;
-
-    /** Flag indicating whether or not each frame of audio should be
-     *  normalised to have a sum of 1.  (Default = false). */
-    bool normalise1;
-	
-    /** Flag indicating whether or not the distance metric for pairs
-     *  of audio frames should be normalised by the sum of the two
-     *  frames.  (Default = false). */
-    bool normalise2;
-
-    /** Flag indicating whether or not each frame of audio should be
-     *  normalised by the long term average of the summed energy.
-     *  (Default = false; assumes normalise1 == false). */
-    bool normalise3;
-	
-    /** Flag indicating whether or not the distance metric for pairs
-     *  of audio frames should be normalised by the log of the sum of
-     *  the frames.  (Default = false; assumes normalise2 ==
-     *  false). */
-    bool normalise4;
-
-    /** Flag indicating whether or not the half-wave rectified
-     *  spectral difference should be used in calculating the distance
-     *  metric for pairs of audio frames, instead of the straight
-     *  spectrum values. (Default = true). */
-    bool useSpectralDifference;
-
-    bool useChromaFrequencyMap;
+    /** Configuration parameters */
+    Parameters params;
 
     /** Scaling factor for distance metric; must guarantee that the
      *  final value fits in the data type used, that is, unsigned
-     *  char. (Default = 16).
+     *  char.
      */
     double scale;
 
-    /** Spacing of audio frames (determines the amount of overlap or
-     *  skip between frames). This value is expressed in
-     *  seconds. (Default = 0.020s) */
-    double hopTime;
-
-    /** The size of an FFT frame in seconds. (Default = 0.04644s).
-     *  Note that the value is not taken to be precise; it is adjusted
-     *  so that <code>fftSize</code> is always power of 2. */
-    double fftTime;
-
-    /** The width of the search band (error margin) around the current
-     *  match position, measured in seconds. Strictly speaking the
-     *  width is measured backwards from the current point, since the
-     *  algorithm has to work causally.
-     */
-    double blockTime;
-
-    /** Spacing of audio frames in samples (see <code>hopTime</code>) */
-    int hopSize;
-
-    /** The size of an FFT frame in samples (see <code>fftTime</code>) */
-    int fftSize;
-
     /** Width of the search band in FFT frames (see <code>blockTime</code>) */
     int blockSize;
 
     /** The number of frames of audio data which have been read. */
     int frameCount;
 
-    /** RMS amplitude of the current frame. */
-//    double frameRMS;
-
     /** Long term average frame energy (in frequency domain
      *  representation). */
     double ltAverage;
@@ -140,14 +175,6 @@
      */
     int runCount;
 
-    /** Interactive control of the matching process allows pausing
-     *  computation of the cost matrices in one direction.
-     */
-    bool paused;
-
-    /** The total number of frames of audio data to be read. */
-    int maxFrames;
-
     /** A mapping function for mapping FFT bins to final frequency
      *  bins.  The mapping is linear (1-1) until the resolution
      *  reaches 2 points per semitone, then logarithmic with a
@@ -198,22 +225,11 @@
     /** Width of distance and bestPathCost matrices and first and last vectors */
     int  distXSize;
 
-    /** Total number of audio frames, or -1 for live or compressed input. */
-    long fileLength;
-
     bool initialised;
 
-//!!!    bool atEnd; //!!!
-
     /** Disable or enable debugging output */
     static bool silent;
 
-    static const double decay;
-    static const double silenceThreshold;
-    static const int MAX_RUN_COUNT;
-
-    friend class Finder; //!!!
-
 public:
     /** Constructor for Matcher.
      *
@@ -222,7 +238,7 @@
      *  between the two matchers (currently one possesses the distance
      *  matrix and optimal path matrix).
      */
-    Matcher(float rate, Matcher *p);
+    Matcher(Parameters parameters, Matcher *p);
 
     ~Matcher();
 
@@ -243,20 +259,10 @@
         otherMatcher = p;
     } // setOtherMatcher()
 
-    int getFFTSize() {
-        return fftSize;
-    }
-    
-    int getHopSize() {
-        return hopSize;
-    }
-
     int getFrameCount() { 
         return frameCount;
     }
 
-    void setHopSize(int);
-
 protected:
     template <typename T>
     void initVector(vector<T> &vec, int sz, T dflt = 0) {
@@ -281,7 +287,7 @@
 
     void init();
 
-    void makeFreqMap(int fftSize, float sampleRate);
+    void makeFreqMap();
 
     /** Creates a map of FFT frequency bins to comparison bins.  Where
      *  the spacing of FFT bins is less than 0.5 semitones, the
@@ -290,9 +296,9 @@
      *  bins. No scaling is performed; that is the energy is summed
      *  into the comparison bins. See also processFrame()
      */
-    void makeStandardFrequencyMap(int fftSize, float sampleRate);
+    void makeStandardFrequencyMap();
 
-    void makeChromaFrequencyMap(int fftSize, float sampleRate);
+    void makeChromaFrequencyMap();
 
     /** Processes a frame of audio data by first computing the STFT
      *  with a Hamming window, then mapping the frequency bins into a
@@ -341,6 +347,7 @@
     void setValue(int i, int j, int dir, int value, int dMN);
 
     friend class MatchFeeder;
+    friend class Finder;
 
 }; // class Matcher