cannam@0: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ cannam@0: cannam@0: /* cannam@0: Vamp feature extraction plugin using the MATCH audio alignment cannam@0: algorithm. cannam@0: cannam@0: Centre for Digital Music, Queen Mary, University of London. Chris@236: Copyright (c) 2007-2020 Simon Dixon, Chris Cannam, and Queen Mary Chris@230: University of London, Copyright (c) 2014-2015 Tido GmbH. cannam@0: cannam@0: This program is free software; you can redistribute it and/or cannam@0: modify it under the terms of the GNU General Public License as cannam@0: published by the Free Software Foundation; either version 2 of the cannam@0: License, or (at your option) any later version. See the file cannam@0: COPYING included with this distribution for more information. cannam@0: */ cannam@0: Chris@181: #ifndef MATCHER_H Chris@181: #define MATCHER_H cannam@0: cannam@0: #include cannam@0: #include cannam@0: #include cannam@0: #include cannam@0: Chris@26: #include "DistanceMetric.h" Chris@187: #include "MatchTypes.h" cannam@0: Chris@74: /** Represents an audio feature stream that can be matched to another Chris@74: * audio stream of the same piece of music. The matching algorithm Chris@74: * uses dynamic time warping. cannam@0: */ cannam@0: class Matcher cannam@0: { Chris@15: public: Chris@235: static std::string advanceToString(advance_t a) { Chris@83: switch (a) { Chris@83: case AdvanceNone: return "AdvanceNone"; Chris@83: case AdvanceBoth: return "AdvanceBoth"; Chris@83: case AdvanceThis: return "AdvanceThis"; Chris@83: case AdvanceOther: return "AdvanceOther"; Chris@83: } Chris@83: return "(unknown)"; Chris@83: } Chris@45: Chris@15: struct Parameters { Chris@15: Chris@113: Parameters(double hopTime_) : Chris@15: hopTime(hopTime_), Chris@15: blockTime(10.0), Chris@83: maxRunCount(3), Chris@83: diagonalWeight(2.0) Chris@15: {} Chris@15: Chris@235: /** Spacing of audio frames (determines the amount of overlap Chris@235: * or skip between frames). This value is expressed in Chris@83: * seconds. Chris@83: */ Chris@15: double hopTime; Chris@38: Chris@15: /** The width of the search band (error margin) around the current Chris@15: * match position, measured in seconds. Strictly speaking the Chris@15: * width is measured backwards from the current point, since the Chris@15: * algorithm has to work causally. Chris@15: */ Chris@15: double blockTime; Chris@15: Chris@15: /** Maximum number of frames sequentially processed by this Chris@15: * matcher, without a frame of the other matcher being Chris@15: * processed. Chris@15: */ Chris@15: int maxRunCount; Chris@83: Chris@83: /** Weight applied to cost of diagonal step relative to Chris@83: * horizontal or vertical step. The default of 2.0 means that Chris@83: * a diagonal is not favoured over horizontal+vertical Chris@83: * combined, which is good when maintaining gross tracking of Chris@83: * performances that may have wildly differing speeds but Chris@83: * which also leads to quite jaggy paths. A more typical Chris@83: * normal DTW approach for performances with similar speeds Chris@83: * might use 1.0 or something close to it. Chris@83: */ Chris@188: double diagonalWeight; Chris@15: }; Chris@15: cannam@0: /** Constructor for Matcher. Chris@74: * Chris@74: * A Matcher expects to be provided with feature vectors Chris@74: * calculated by some external code (for example, a Chris@74: * FeatureExtractor). Call consumeFeatureVector to provide each Chris@74: * feature frame. Chris@23: * Chris@23: * @param p The Matcher representing the performance with which Chris@23: * this one is going to be matched. Some information is shared Chris@23: * between the two matchers (currently one possesses the distance Chris@23: * matrix and optimal path matrix). Chris@23: */ Chris@143: Matcher(Parameters params, DistanceMetric::Parameters dparams, Matcher *p); Chris@23: Chris@78: /** Destructor for Matcher. Chris@78: */ cannam@0: ~Matcher(); cannam@0: cannam@0: /** Adds a link to the Matcher object representing the performance cannam@0: * which is going to be matched to this one. cannam@0: * cannam@0: * @param p the Matcher representing the other performance cannam@0: */ cannam@0: void setOtherMatcher(Matcher *p) { Chris@43: m_otherMatcher = p; Chris@74: } cannam@0: Chris@78: int getBlockSize() { Chris@78: return m_blockSize; Chris@78: } Chris@78: Chris@171: bool isFillingInitialBlock() { Chris@171: return m_frameCount < m_blockSize; Chris@171: } Chris@171: Chris@78: bool isOverrunning() { Chris@78: return m_runCount >= m_params.maxRunCount; Chris@78: } Chris@78: cannam@0: int getFrameCount() { Chris@43: return m_frameCount; cannam@0: } cannam@0: Chris@72: int getOtherFrameCount() { Chris@72: return m_otherMatcher->getFrameCount(); Chris@72: } Chris@74: Chris@188: double getDiagonalWeight() { Chris@83: return m_params.diagonalWeight; Chris@83: } Chris@83: Chris@74: /** Processes a feature vector frame, presumably calculated from Chris@74: * audio data by some external code such as a FeatureExtractor. Chris@74: * Calculates the distance to all frames stored in the Chris@74: * otherMatcher and stores in the distance matrix, before Chris@74: * updating the optimal path matrix using the dynamic time Chris@74: * warping algorithm. Chris@74: * Chris@104: * The supplied features must always be of the same size (within Chris@104: * any pair of Matcher objects). Chris@74: */ Chris@182: void consumeFeatureVector(const feature_t &feature); Chris@72: Chris@72: /** Tests whether a location is in range in the minimum cost matrix. Chris@72: * Chris@72: * @param i the frame number of this Matcher Chris@72: * @param j the frame number of the other Matcher Chris@72: * @return true if the location is in range Chris@72: */ Chris@72: bool isInRange(int i, int j); Chris@203: Chris@203: /** Tests whether a location is available in the minimum cost Chris@203: * matrix, that is, whether it is in range and contains a valid Chris@203: * cost value. Note this and its associated isRowAvailable, Chris@203: * isColAvailable checks are more expensive than isInRange and Chris@203: * are really intended for error checking. (If a row is in range, Chris@203: * it should always be available.) Chris@203: * Chris@203: * @param i the frame number of this Matcher Chris@203: * @param j the frame number of the other Matcher Chris@203: * @return true if the location is in range and contains a valid cost Chris@203: */ Chris@203: bool isAvailable(int i, int j); Chris@154: Chris@154: /** Tests whether any locations in the given row are available. Chris@154: */ Chris@154: bool isRowAvailable(int i); Chris@154: Chris@154: /** Tests whether any locations in the given column are available. Chris@154: */ Chris@154: bool isColAvailable(int i); Chris@72: Chris@154: /** Returns the valid range of columns for the given row, that is, Chris@154: * the range of frames in the other Matcher for the given frame Chris@154: * in this Matcher's minimum cost matrix. Chris@72: * Chris@72: * @param i the frame number of this Matcher Chris@72: * @return the first, last pair of frame numbers for the other Chris@72: * Matcher. Note that the last frame is exclusive (last valid Chris@72: * frame + 1). Chris@72: */ Chris@205: std::pair getColRangeForRow(int i); Chris@72: Chris@154: /** Returns the valid range of rows for the given column, that is, Chris@154: * the range of frames in this Matcher for the given frame in the Chris@154: * other Matcher's minimum cost matrix. Chris@72: * Chris@72: * @param i the frame number of the other Matcher Chris@72: * @return the first, last pair of frame numbers for this Chris@72: * Matcher. Note that the last frame is exclusive (last valid Chris@72: * frame + 1). Chris@72: */ Chris@205: std::pair getRowRangeForCol(int i); Chris@72: Chris@72: /** Retrieves a value from the distance matrix. Chris@72: * Chris@72: * @param i the frame number of this Matcher Chris@72: * @param j the frame number of the other Matcher Chris@72: * @return the distance metric at this location Chris@72: */ Chris@182: distance_t getDistance(int i, int j); Chris@72: Chris@72: /** Sets a value to the distance matrix. Chris@72: * Chris@72: * @param i the frame number of this Matcher Chris@72: * @param j the frame number of the other Matcher Chris@72: * @param value the distance metric to set for this location Chris@72: */ Chris@182: void setDistance(int i, int j, distance_t distance); Chris@72: Chris@72: /** Retrieves a value from the minimum cost matrix. Chris@72: * Chris@72: * @param i the frame number of this Matcher Chris@72: * @param j the frame number of the other Matcher Chris@72: * @return the cost of the minimum cost path to this location Chris@72: */ Chris@182: pathcost_t getPathCost(int i, int j); Chris@72: Chris@72: /** Sets a value and an advance direction to the minimum cost matrix. Chris@72: * Chris@72: * @param i the frame number of this Matcher Chris@72: * @param j the frame number of the other Matcher Chris@72: * @param dir the direction from which this position is reached with Chris@72: * minimum cost Chris@72: * @param value the cost of the minimum cost path to set for this location Chris@72: */ Chris@182: void setPathCost(int i, int j, advance_t dir, pathcost_t value); Chris@135: Chris@135: /** Retrieves a value from the minimum cost matrix, normalised for Chris@135: * path length. Chris@135: * Chris@135: * @param i the frame number of this Matcher Chris@135: * @param j the frame number of the other Matcher Chris@135: * @return the cost of the minimum cost path to this location, Chris@135: * normalised by the Manhattan distance from 0,0 to i,j Chris@135: */ Chris@191: normpathcost_t getNormalisedPathCost(int i, int j); Chris@72: Chris@72: /** Retrieves an advance direction from the matrix. Chris@72: * Chris@72: * @param i the frame number of this Matcher Chris@72: * @param j the frame number of the other Matcher Chris@72: * @return the direction from which this position is reached with Chris@72: * minimum cost Chris@72: */ Chris@181: advance_t getAdvance(int i, int j); Chris@202: Chris@232: struct MemoryStats { Chris@232: double features_k; Chris@232: double pathcosts_k; Chris@232: double distances_k; Chris@232: double advances_k; Chris@232: double total_k() const { Chris@232: return features_k + pathcosts_k + distances_k + advances_k; Chris@232: } Chris@232: }; Chris@232: Chris@232: /** Obtain some stats about memory consumption. Chris@232: */ Chris@232: MemoryStats getMemoryStats() const; Chris@232: Chris@202: /** Print some stats about memory consumption etc to stderr. Chris@202: */ Chris@202: void printStats(); Chris@72: cannam@0: protected: Chris@38: /** Create internal structures and reset. */ cannam@0: void init(); cannam@0: Chris@38: /** The distXSize value has changed: resize internal buffers. */ Chris@41: void size(); cannam@0: Chris@71: /** Updates an entry in the distance matrix and the optimal path matrix. cannam@0: * cannam@0: * @param i the frame number of this Matcher cannam@0: * @param j the frame number of the other Matcher cannam@0: * @param dir the direction from which this position is reached with cannam@0: * minimum cost cannam@0: * @param value the cost of the minimum path except the current step cannam@0: * @param dMN the distance cost between the two frames cannam@0: */ Chris@182: void updateValue(int i, int j, advance_t dir, pathcost_t value, distance_t dMN); cannam@0: Chris@21: void calcAdvance(); Chris@21: Chris@211: /** Chris@211: * Add the given distance increment to the given path cost, and Chris@211: * return the result clipped (if necessary) at MaxPathCost. Chris@211: */ Chris@211: pathcost_t addToCost(pathcost_t cost, pathcost_t increment); Chris@211: Chris@42: /** Points to the other performance with which this one is being Chris@211: * compared. (See m_firstPM) Chris@42: */ Chris@43: Matcher *m_otherMatcher; Chris@42: Chris@42: /** Indicates which performance is considered primary (the Chris@42: * score). This is the performance shown on the vertical axis, Chris@42: * and referred to as "this" in the codes for the direction of Chris@42: * DTW steps. */ Chris@43: bool m_firstPM; Chris@42: Chris@42: /** Configuration parameters */ Chris@43: Parameters m_params; Chris@42: Chris@42: /** Width of the search band in FFT frames (see blockTime) */ Chris@43: int m_blockSize; Chris@42: Chris@42: /** The number of frames of audio data which have been read. */ Chris@43: int m_frameCount; Chris@42: Chris@42: /** The number of frames sequentially processed by this matcher, Chris@42: * without a frame of the other matcher being processed. Chris@42: */ Chris@43: int m_runCount; Chris@42: Chris@50: /** A block of previously seen feature frames is stored in this Chris@50: * structure for calculation of the distance matrix as the new Chris@50: * frames are received. One can think of the structure of the Chris@50: * array as a circular buffer of vectors. */ Chris@182: featureseq_t m_features; Chris@42: Chris@42: /** The best path cost matrix. */ Chris@182: pathcostmat_t m_bestPathCost; Chris@42: Chris@42: /** The distance matrix. */ Chris@182: distancemat_t m_distance; Chris@42: Chris@45: /** The advance direction matrix. */ Chris@182: advancemat_t m_advance; Chris@45: Chris@45: /** The bounds of each row of data in the distance, path cost, and Chris@45: * advance direction matrices.*/ Chris@235: std::vector m_first; Chris@235: std::vector m_last; Chris@235: Chris@45: /** Width of distance, path cost, and advance direction matrices Chris@45: * and first and last vectors */ Chris@43: int m_distXSize; Chris@42: Chris@43: bool m_initialised; Chris@42: Chris@43: DistanceMetric m_metric; Chris@78: }; cannam@0: Chris@232: inline Matcher::MemoryStats operator+(const Matcher::MemoryStats &a, Chris@232: const Matcher::MemoryStats &b) Chris@232: { Chris@232: Matcher::MemoryStats m = a; Chris@232: m.features_k += b.features_k; Chris@232: m.pathcosts_k += b.pathcosts_k; Chris@232: m.distances_k += b.distances_k; Chris@232: m.advances_k += b.advances_k; Chris@232: return m; Chris@232: } Chris@232: cannam@0: #endif