cannam@0: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ cannam@0: cannam@0: /* cannam@0: Vamp feature extraction plugin using the MATCH audio alignment cannam@0: algorithm. cannam@0: cannam@0: Centre for Digital Music, Queen Mary, University of London. cannam@0: This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. cannam@0: cannam@0: This program is free software; you can redistribute it and/or cannam@0: modify it under the terms of the GNU General Public License as cannam@0: published by the Free Software Foundation; either version 2 of the cannam@0: License, or (at your option) any later version. See the file cannam@0: COPYING included with this distribution for more information. cannam@0: */ cannam@0: cannam@0: #ifndef _MATCHER_H_ cannam@0: #define _MATCHER_H_ cannam@0: cannam@0: #include cannam@0: #include cannam@0: #include cannam@0: #include cannam@0: cannam@0: #define ADVANCE_THIS 1 cannam@0: #define ADVANCE_OTHER 2 cannam@0: #define ADVANCE_BOTH 3 cannam@0: #define MASK 0xfc cannam@0: Chris@26: #include "DistanceMetric.h" Chris@38: #include "FeatureExtractor.h" cannam@0: cannam@0: using std::vector; cannam@0: using std::string; cannam@0: using std::cerr; cannam@0: using std::endl; cannam@0: cannam@0: /** Represents an audio stream that can be matched to another audio cannam@0: * stream of the same piece of music. The matching algorithm uses cannam@0: * dynamic time warping. The distance metric is a Euclidean metric cannam@0: * on the FFT data with the higher frequencies mapped onto a linear cannam@0: * scale. cannam@0: */ cannam@0: class Matcher cannam@0: { Chris@15: public: Chris@15: struct Parameters { Chris@15: Chris@15: Parameters(float rate_, double hopTime_, int fftSize_) : Chris@15: sampleRate(rate_), Chris@26: distanceNorm(DistanceMetric::NormaliseDistanceToLogSum), Chris@29: distanceScale(90.0), Chris@15: hopTime(hopTime_), Chris@15: fftSize(fftSize_), Chris@15: blockTime(10.0), Chris@15: maxRunCount(3) Chris@15: {} Chris@15: Chris@15: /** Sample rate of audio */ Chris@15: float sampleRate; Chris@15: Chris@15: /** Type of distance metric normalisation */ Chris@26: DistanceMetric::DistanceNormalisation distanceNorm; Chris@15: Chris@29: /** Scaling factor for distance metric; must guarantee that the Chris@29: * final value fits in the data type used, that is, unsigned Chris@29: * char. Chris@29: */ Chris@29: double distanceScale; Chris@29: Chris@15: /** Spacing of audio frames (determines the amount of overlap or Chris@15: * skip between frames). This value is expressed in Chris@15: * seconds. */ Chris@15: double hopTime; Chris@38: Chris@15: /** Size of an FFT frame in samples. Note that the data passed Chris@15: * in to Matcher is already in the frequency domain, so this Chris@15: * expresses the size of the frame that the caller will be Chris@38: * providing. */ Chris@15: int fftSize; Chris@38: Chris@15: /** The width of the search band (error margin) around the current Chris@15: * match position, measured in seconds. Strictly speaking the Chris@15: * width is measured backwards from the current point, since the Chris@15: * algorithm has to work causally. Chris@15: */ Chris@15: double blockTime; Chris@15: Chris@15: /** Maximum number of frames sequentially processed by this Chris@15: * matcher, without a frame of the other matcher being Chris@15: * processed. Chris@15: */ Chris@15: int maxRunCount; Chris@15: }; Chris@15: cannam@0: /** Constructor for Matcher. cannam@0: * cannam@0: * @param p The Matcher representing the performance with which cannam@0: * this one is going to be matched. Some information is shared cannam@0: * between the two matchers (currently one possesses the distance cannam@0: * matrix and optimal path matrix). cannam@0: */ Chris@38: Matcher(Parameters parameters, Chris@38: FeatureExtractor::Parameters featureParams, Chris@38: Matcher *p); cannam@0: Chris@23: /** Constructor for Matcher using externally supplied features. Chris@23: * A Matcher made using this constructor will not carry out its Chris@23: * own feature extraction from frequency-domain audio data, but Chris@23: * instead will accept arbitrary feature frames calculated by Chris@23: * some external code. Chris@23: * Chris@23: * @param p The Matcher representing the performance with which Chris@23: * this one is going to be matched. Some information is shared Chris@23: * between the two matchers (currently one possesses the distance Chris@23: * matrix and optimal path matrix). Chris@23: * Chris@23: * @param featureSize Number of values in each feature vector. Chris@23: */ Chris@23: Matcher(Parameters parameters, Matcher *p, int featureSize); Chris@23: cannam@0: ~Matcher(); cannam@0: cannam@0: /** Adds a link to the Matcher object representing the performance cannam@0: * which is going to be matched to this one. cannam@0: * cannam@0: * @param p the Matcher representing the other performance cannam@0: */ cannam@0: void setOtherMatcher(Matcher *p) { Chris@43: m_otherMatcher = p; cannam@0: } // setOtherMatcher() cannam@0: cannam@0: int getFrameCount() { Chris@43: return m_frameCount; cannam@0: } cannam@0: cannam@0: protected: Chris@38: /** Create internal structures and reset. */ cannam@0: void init(); cannam@0: Chris@38: /** The distXSize value has changed: resize internal buffers. */ Chris@41: void size(); cannam@0: Chris@38: /** Process a frequency-domain frame of audio data using the Chris@38: * built-in FeatureExtractor, then calculating the distance to Chris@38: * all frames stored in the otherMatcher and storing them in the Chris@38: * distance matrix, and finally updating the optimal path matrix Chris@38: * using the dynamic time warping algorithm. Chris@14: * Chris@14: * Return value is the frame (post-processed, with warping, Chris@14: * rectification, and normalisation as appropriate). Chris@23: * Chris@23: * The Matcher must have been constructed using the constructor Chris@23: * without an external featureSize parameter in order to use this Chris@23: * function. (Otherwise it will be expecting you to call Chris@23: * consumeFeatureVector.) cannam@0: */ Chris@21: std::vector consumeFrame(double *reBuffer, double *imBuffer); cannam@0: Chris@23: /** Processes a feature vector frame (presumably calculated from Chris@23: * audio data by some external code). As consumeFrame, except Chris@23: * that it does not calculate a feature from audio data but Chris@23: * instead uses the supplied feature directly. Chris@23: * Chris@23: * The Matcher must have been constructed using the constructor Chris@23: * that accepts an external featureSize parameter in order to Chris@23: * use this function. The supplied feature must be of the size Chris@23: * that was passed to the constructor. Chris@23: */ Chris@23: void consumeFeatureVector(std::vector feature); Chris@23: cannam@0: /** Retrieves values from the minimum cost matrix. cannam@0: * cannam@0: * @param i the frame number of this Matcher cannam@0: * @param j the frame number of the other Matcher cannam@0: * @return the cost of the minimum cost path to this location cannam@0: */ cannam@0: int getValue(int i, int j, bool firstAttempt); cannam@0: cannam@0: /** Stores entries in the distance matrix and the optimal path matrix. cannam@0: * cannam@0: * @param i the frame number of this Matcher cannam@0: * @param j the frame number of the other Matcher cannam@0: * @param dir the direction from which this position is reached with cannam@0: * minimum cost cannam@0: * @param value the cost of the minimum path except the current step cannam@0: * @param dMN the distance cost between the two frames cannam@0: */ cannam@0: void setValue(int i, int j, int dir, int value, int dMN); cannam@0: Chris@21: void calcAdvance(); Chris@21: Chris@42: /** Points to the other performance with which this one is being Chris@42: * compared. The data for the distance metric and the dynamic Chris@42: * time warping is shared between the two matchers. In the Chris@42: * original version, only one of the two performance matchers Chris@42: * contained the distance metric. (See first) Chris@42: */ Chris@43: Matcher *m_otherMatcher; Chris@42: Chris@42: /** Indicates which performance is considered primary (the Chris@42: * score). This is the performance shown on the vertical axis, Chris@42: * and referred to as "this" in the codes for the direction of Chris@42: * DTW steps. */ Chris@43: bool m_firstPM; Chris@42: Chris@42: /** Configuration parameters */ Chris@43: Parameters m_params; Chris@42: Chris@42: /** Width of the search band in FFT frames (see blockTime) */ Chris@43: int m_blockSize; Chris@42: Chris@42: /** The number of frames of audio data which have been read. */ Chris@43: int m_frameCount; Chris@42: Chris@42: /** The number of frames sequentially processed by this matcher, Chris@42: * without a frame of the other matcher being processed. Chris@42: */ Chris@43: int m_runCount; Chris@42: Chris@42: /** The number of values in a feature vector. */ Chris@43: int m_featureSize; Chris@42: Chris@50: /** A block of previously seen feature frames is stored in this Chris@50: * structure for calculation of the distance matrix as the new Chris@50: * frames are received. One can think of the structure of the Chris@50: * array as a circular buffer of vectors. */ Chris@43: vector > m_frames; Chris@42: Chris@42: /** The best path cost matrix. */ Chris@43: vector > m_bestPathCost; Chris@42: Chris@42: /** The distance matrix. */ Chris@43: vector > m_distance; Chris@42: Chris@42: /** The bounds of each row of data in the distance and path cost matrices.*/ Chris@43: vector m_first; Chris@43: vector m_last; Chris@42: Chris@42: /** Height of each column in distance and bestPathCost matrices */ Chris@43: vector m_distYSizes; Chris@42: Chris@42: /** Width of distance and bestPathCost matrices and first and last vectors */ Chris@43: int m_distXSize; Chris@42: Chris@43: bool m_initialised; Chris@42: Chris@43: FeatureExtractor m_featureExtractor; Chris@43: DistanceMetric m_metric; Chris@26: cannam@0: friend class MatchFeeder; Chris@24: friend class MatchFeatureFeeder; Chris@15: friend class Finder; cannam@0: cannam@0: }; // class Matcher cannam@0: cannam@0: #endif