cannam@0: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ cannam@0: cannam@0: /* cannam@0: Vamp feature extraction plugin using the MATCH audio alignment cannam@0: algorithm. cannam@0: cannam@0: Centre for Digital Music, Queen Mary, University of London. cannam@0: This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. cannam@0: cannam@0: This program is free software; you can redistribute it and/or cannam@0: modify it under the terms of the GNU General Public License as cannam@0: published by the Free Software Foundation; either version 2 of the cannam@0: License, or (at your option) any later version. See the file cannam@0: COPYING included with this distribution for more information. cannam@0: */ cannam@0: cannam@0: #ifndef _MATCHER_H_ cannam@0: #define _MATCHER_H_ cannam@0: cannam@0: #include cannam@0: #include cannam@0: #include cannam@0: #include cannam@0: Chris@26: #include "DistanceMetric.h" cannam@0: cannam@0: using std::vector; cannam@0: using std::string; cannam@0: using std::cerr; cannam@0: using std::endl; cannam@0: Chris@74: /** Represents an audio feature stream that can be matched to another Chris@74: * audio stream of the same piece of music. The matching algorithm Chris@74: * uses dynamic time warping. cannam@0: */ cannam@0: class Matcher cannam@0: { Chris@15: public: Chris@45: enum Advance { Chris@45: AdvanceNone, Chris@45: AdvanceBoth, Chris@45: AdvanceThis, Chris@45: AdvanceOther Chris@45: }; Chris@83: static string advanceToString(Advance a) { Chris@83: switch (a) { Chris@83: case AdvanceNone: return "AdvanceNone"; Chris@83: case AdvanceBoth: return "AdvanceBoth"; Chris@83: case AdvanceThis: return "AdvanceThis"; Chris@83: case AdvanceOther: return "AdvanceOther"; Chris@83: } Chris@83: return "(unknown)"; Chris@83: } Chris@45: Chris@15: struct Parameters { Chris@15: Chris@113: Parameters(double hopTime_) : Chris@15: hopTime(hopTime_), Chris@15: blockTime(10.0), Chris@83: maxRunCount(3), Chris@83: diagonalWeight(2.0) Chris@15: {} Chris@15: Chris@15: /** Spacing of audio frames (determines the amount of overlap or Chris@15: * skip between frames). This value is expressed in Chris@83: * seconds. Chris@83: */ Chris@15: double hopTime; Chris@38: Chris@15: /** The width of the search band (error margin) around the current Chris@15: * match position, measured in seconds. Strictly speaking the Chris@15: * width is measured backwards from the current point, since the Chris@15: * algorithm has to work causally. Chris@15: */ Chris@15: double blockTime; Chris@15: Chris@15: /** Maximum number of frames sequentially processed by this Chris@15: * matcher, without a frame of the other matcher being Chris@15: * processed. Chris@15: */ Chris@15: int maxRunCount; Chris@83: Chris@83: /** Weight applied to cost of diagonal step relative to Chris@83: * horizontal or vertical step. The default of 2.0 means that Chris@83: * a diagonal is not favoured over horizontal+vertical Chris@83: * combined, which is good when maintaining gross tracking of Chris@83: * performances that may have wildly differing speeds but Chris@83: * which also leads to quite jaggy paths. A more typical Chris@83: * normal DTW approach for performances with similar speeds Chris@83: * might use 1.0 or something close to it. Chris@83: */ Chris@83: float diagonalWeight; Chris@15: }; Chris@15: cannam@0: /** Constructor for Matcher. Chris@74: * Chris@74: * A Matcher expects to be provided with feature vectors Chris@74: * calculated by some external code (for example, a Chris@74: * FeatureExtractor). Call consumeFeatureVector to provide each Chris@74: * feature frame. Chris@23: * Chris@23: * @param p The Matcher representing the performance with which Chris@23: * this one is going to be matched. Some information is shared Chris@23: * between the two matchers (currently one possesses the distance Chris@23: * matrix and optimal path matrix). Chris@23: */ Chris@143: Matcher(Parameters params, DistanceMetric::Parameters dparams, Matcher *p); Chris@23: Chris@78: /** Destructor for Matcher. Chris@78: */ cannam@0: ~Matcher(); cannam@0: cannam@0: /** Adds a link to the Matcher object representing the performance cannam@0: * which is going to be matched to this one. cannam@0: * cannam@0: * @param p the Matcher representing the other performance cannam@0: */ cannam@0: void setOtherMatcher(Matcher *p) { Chris@43: m_otherMatcher = p; Chris@74: } cannam@0: Chris@78: int getBlockSize() { Chris@78: return m_blockSize; Chris@78: } Chris@78: Chris@171: bool isFillingInitialBlock() { Chris@171: return m_frameCount < m_blockSize; Chris@171: } Chris@171: Chris@78: bool isOverrunning() { Chris@78: return m_runCount >= m_params.maxRunCount; Chris@78: } Chris@78: cannam@0: int getFrameCount() { Chris@43: return m_frameCount; cannam@0: } cannam@0: Chris@72: int getOtherFrameCount() { Chris@72: return m_otherMatcher->getFrameCount(); Chris@72: } Chris@74: Chris@83: float getDiagonalWeight() { Chris@83: return m_params.diagonalWeight; Chris@83: } Chris@83: Chris@74: /** Processes a feature vector frame, presumably calculated from Chris@74: * audio data by some external code such as a FeatureExtractor. Chris@74: * Calculates the distance to all frames stored in the Chris@74: * otherMatcher and stores in the distance matrix, before Chris@74: * updating the optimal path matrix using the dynamic time Chris@74: * warping algorithm. Chris@74: * Chris@104: * The supplied features must always be of the same size (within Chris@104: * any pair of Matcher objects). Chris@74: */ Chris@74: void consumeFeatureVector(std::vector feature); Chris@72: Chris@72: /** Tests whether a location is in range in the minimum cost matrix. Chris@72: * Chris@72: * @param i the frame number of this Matcher Chris@72: * @param j the frame number of the other Matcher Chris@72: * @return true if the location is in range Chris@72: */ Chris@72: bool isInRange(int i, int j); Chris@154: Chris@154: /** Tests whether any locations in the given row are available. Chris@154: */ Chris@154: bool isRowAvailable(int i); Chris@154: Chris@154: /** Tests whether any locations in the given column are available. Chris@154: */ Chris@154: bool isColAvailable(int i); Chris@72: Chris@72: /** Tests whether a location is available in the minimum cost matrix. Chris@72: * Chris@72: * @param i the frame number of this Matcher Chris@72: * @param j the frame number of the other Matcher Chris@72: * @return true if the location is in range and contains a valid cost Chris@72: */ Chris@72: bool isAvailable(int i, int j); Chris@72: Chris@154: /** Returns the valid range of columns for the given row, that is, Chris@154: * the range of frames in the other Matcher for the given frame Chris@154: * in this Matcher's minimum cost matrix. Chris@72: * Chris@72: * @param i the frame number of this Matcher Chris@72: * @return the first, last pair of frame numbers for the other Chris@72: * Matcher. Note that the last frame is exclusive (last valid Chris@72: * frame + 1). Chris@72: */ Chris@72: std::pair getColRange(int i); Chris@72: Chris@154: /** Returns the valid range of rows for the given column, that is, Chris@154: * the range of frames in this Matcher for the given frame in the Chris@154: * other Matcher's minimum cost matrix. Chris@72: * Chris@72: * @param i the frame number of the other Matcher Chris@72: * @return the first, last pair of frame numbers for this Chris@72: * Matcher. Note that the last frame is exclusive (last valid Chris@72: * frame + 1). Chris@72: */ Chris@72: std::pair getRowRange(int i); Chris@72: Chris@72: /** Retrieves a value from the distance matrix. Chris@72: * Chris@72: * @param i the frame number of this Matcher Chris@72: * @param j the frame number of the other Matcher Chris@72: * @return the distance metric at this location Chris@72: */ Chris@72: float getDistance(int i, int j); Chris@72: Chris@72: /** Sets a value to the distance matrix. Chris@72: * Chris@72: * @param i the frame number of this Matcher Chris@72: * @param j the frame number of the other Matcher Chris@72: * @param value the distance metric to set for this location Chris@72: */ Chris@96: void setDistance(int i, int j, float distance); Chris@72: Chris@72: /** Retrieves a value from the minimum cost matrix. Chris@72: * Chris@72: * @param i the frame number of this Matcher Chris@72: * @param j the frame number of the other Matcher Chris@72: * @return the cost of the minimum cost path to this location Chris@72: */ Chris@72: double getPathCost(int i, int j); Chris@72: Chris@72: /** Sets a value and an advance direction to the minimum cost matrix. Chris@72: * Chris@72: * @param i the frame number of this Matcher Chris@72: * @param j the frame number of the other Matcher Chris@72: * @param dir the direction from which this position is reached with Chris@72: * minimum cost Chris@72: * @param value the cost of the minimum cost path to set for this location Chris@72: */ Chris@72: void setPathCost(int i, int j, Advance dir, double value); Chris@135: Chris@135: /** Retrieves a value from the minimum cost matrix, normalised for Chris@135: * path length. Chris@135: * Chris@135: * @param i the frame number of this Matcher Chris@135: * @param j the frame number of the other Matcher Chris@135: * @return the cost of the minimum cost path to this location, Chris@135: * normalised by the Manhattan distance from 0,0 to i,j Chris@135: */ Chris@135: double getNormalisedPathCost(int i, int j); Chris@72: Chris@72: /** Retrieves an advance direction from the matrix. Chris@72: * Chris@72: * @param i the frame number of this Matcher Chris@72: * @param j the frame number of the other Matcher Chris@72: * @return the direction from which this position is reached with Chris@72: * minimum cost Chris@72: */ Chris@72: Advance getAdvance(int i, int j); Chris@72: cannam@0: protected: Chris@38: /** Create internal structures and reset. */ cannam@0: void init(); cannam@0: Chris@38: /** The distXSize value has changed: resize internal buffers. */ Chris@41: void size(); cannam@0: Chris@71: /** Updates an entry in the distance matrix and the optimal path matrix. cannam@0: * cannam@0: * @param i the frame number of this Matcher cannam@0: * @param j the frame number of the other Matcher cannam@0: * @param dir the direction from which this position is reached with cannam@0: * minimum cost cannam@0: * @param value the cost of the minimum path except the current step cannam@0: * @param dMN the distance cost between the two frames cannam@0: */ Chris@71: void updateValue(int i, int j, Advance dir, double value, float dMN); cannam@0: Chris@21: void calcAdvance(); Chris@21: Chris@42: /** Points to the other performance with which this one is being Chris@42: * compared. The data for the distance metric and the dynamic Chris@42: * time warping is shared between the two matchers. In the Chris@42: * original version, only one of the two performance matchers Chris@42: * contained the distance metric. (See first) Chris@42: */ Chris@43: Matcher *m_otherMatcher; Chris@42: Chris@42: /** Indicates which performance is considered primary (the Chris@42: * score). This is the performance shown on the vertical axis, Chris@42: * and referred to as "this" in the codes for the direction of Chris@42: * DTW steps. */ Chris@43: bool m_firstPM; Chris@42: Chris@42: /** Configuration parameters */ Chris@43: Parameters m_params; Chris@42: Chris@42: /** Width of the search band in FFT frames (see blockTime) */ Chris@43: int m_blockSize; Chris@42: Chris@42: /** The number of frames of audio data which have been read. */ Chris@43: int m_frameCount; Chris@42: Chris@42: /** The number of frames sequentially processed by this matcher, Chris@42: * without a frame of the other matcher being processed. Chris@42: */ Chris@43: int m_runCount; Chris@42: Chris@50: /** A block of previously seen feature frames is stored in this Chris@50: * structure for calculation of the distance matrix as the new Chris@50: * frames are received. One can think of the structure of the Chris@50: * array as a circular buffer of vectors. */ Chris@43: vector > m_frames; Chris@42: Chris@42: /** The best path cost matrix. */ Chris@53: vector > m_bestPathCost; Chris@42: Chris@42: /** The distance matrix. */ Chris@45: vector > m_distance; Chris@42: Chris@45: /** The advance direction matrix. */ Chris@45: vector > m_advance; Chris@45: Chris@45: /** The bounds of each row of data in the distance, path cost, and Chris@45: * advance direction matrices.*/ Chris@43: vector m_first; Chris@43: vector m_last; Chris@42: Chris@45: /** Width of distance, path cost, and advance direction matrices Chris@45: * and first and last vectors */ Chris@43: int m_distXSize; Chris@42: Chris@43: bool m_initialised; Chris@42: Chris@43: DistanceMetric m_metric; Chris@78: }; cannam@0: cannam@0: #endif