match-vamp: src/Matcher.h annotate

annotate src/Matcher.h @ 96:6b91e40b2c04 refactors

Tidy

author	Chris Cannam
date	Thu, 27 Nov 2014 16:50:14 +0000
parents	10e76188c846
children	6636aca831c0

rev	line source
cannam@0	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
cannam@0	2
cannam@0	3 /*
cannam@0	4 Vamp feature extraction plugin using the MATCH audio alignment
cannam@0	5 algorithm.
cannam@0	6
cannam@0	7 Centre for Digital Music, Queen Mary, University of London.
cannam@0	8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
cannam@0	9
cannam@0	10 This program is free software; you can redistribute it and/or
cannam@0	11 modify it under the terms of the GNU General Public License as
cannam@0	12 published by the Free Software Foundation; either version 2 of the
cannam@0	13 License, or (at your option) any later version. See the file
cannam@0	14 COPYING included with this distribution for more information.
cannam@0	15 */
cannam@0	16
cannam@0	17 #ifndef _MATCHER_H_
cannam@0	18 #define _MATCHER_H_
cannam@0	19
cannam@0	20 #include <vector>
cannam@0	21 #include <iostream>
cannam@0	22 #include <sstream>
cannam@0	23 #include <cmath>
cannam@0	24
Chris@26	25 #include "DistanceMetric.h"
cannam@0	26
cannam@0	27 using std::vector;
cannam@0	28 using std::string;
cannam@0	29 using std::cerr;
cannam@0	30 using std::endl;
cannam@0	31
Chris@74	32 /** Represents an audio feature stream that can be matched to another
Chris@74	33 * audio stream of the same piece of music. The matching algorithm
Chris@74	34 * uses dynamic time warping.
cannam@0	35 */
cannam@0	36 class Matcher
cannam@0	37 {
Chris@15	38 public:
Chris@45	39 enum Advance {
Chris@45	40 AdvanceNone,
Chris@45	41 AdvanceBoth,
Chris@45	42 AdvanceThis,
Chris@45	43 AdvanceOther
Chris@45	44 };
Chris@83	45 static string advanceToString(Advance a) {
Chris@83	46 switch (a) {
Chris@83	47 case AdvanceNone: return "AdvanceNone";
Chris@83	48 case AdvanceBoth: return "AdvanceBoth";
Chris@83	49 case AdvanceThis: return "AdvanceThis";
Chris@83	50 case AdvanceOther: return "AdvanceOther";
Chris@83	51 }
Chris@83	52 return "(unknown)";
Chris@83	53 }
Chris@45	54
Chris@15	55 struct Parameters {
Chris@15	56
Chris@15	57 Parameters(float rate_, double hopTime_, int fftSize_) :
Chris@15	58 sampleRate(rate_),
Chris@26	59 distanceNorm(DistanceMetric::NormaliseDistanceToLogSum),
Chris@15	60 hopTime(hopTime_),
Chris@15	61 fftSize(fftSize_),
Chris@15	62 blockTime(10.0),
Chris@83	63 maxRunCount(3),
Chris@83	64 diagonalWeight(2.0)
Chris@15	65 {}
Chris@15	66
Chris@15	67 /** Sample rate of audio */
Chris@15	68 float sampleRate;
Chris@15	69
Chris@15	70 /** Type of distance metric normalisation */
Chris@26	71 DistanceMetric::DistanceNormalisation distanceNorm;
Chris@15	72
Chris@15	73 /** Spacing of audio frames (determines the amount of overlap or
Chris@15	74 * skip between frames). This value is expressed in
Chris@83	75 * seconds.
Chris@83	76 */
Chris@15	77 double hopTime;
Chris@38	78
Chris@15	79 /** Size of an FFT frame in samples. Note that the data passed
Chris@15	80 * in to Matcher is already in the frequency domain, so this
Chris@15	81 * expresses the size of the frame that the caller will be
Chris@83	82 * providing.
Chris@83	83 */
Chris@15	84 int fftSize;
Chris@38	85
Chris@15	86 /** The width of the search band (error margin) around the current
Chris@15	87 * match position, measured in seconds. Strictly speaking the
Chris@15	88 * width is measured backwards from the current point, since the
Chris@15	89 * algorithm has to work causally.
Chris@15	90 */
Chris@15	91 double blockTime;
Chris@15	92
Chris@15	93 /** Maximum number of frames sequentially processed by this
Chris@15	94 * matcher, without a frame of the other matcher being
Chris@15	95 * processed.
Chris@15	96 */
Chris@15	97 int maxRunCount;
Chris@83	98
Chris@83	99 /** Weight applied to cost of diagonal step relative to
Chris@83	100 * horizontal or vertical step. The default of 2.0 means that
Chris@83	101 * a diagonal is not favoured over horizontal+vertical
Chris@83	102 * combined, which is good when maintaining gross tracking of
Chris@83	103 * performances that may have wildly differing speeds but
Chris@83	104 * which also leads to quite jaggy paths. A more typical
Chris@83	105 * normal DTW approach for performances with similar speeds
Chris@83	106 * might use 1.0 or something close to it.
Chris@83	107 */
Chris@83	108 float diagonalWeight;
Chris@15	109 };
Chris@15	110
cannam@0	111 /** Constructor for Matcher.
Chris@74	112 *
Chris@74	113 * A Matcher expects to be provided with feature vectors
Chris@74	114 * calculated by some external code (for example, a
Chris@74	115 * FeatureExtractor). Call consumeFeatureVector to provide each
Chris@74	116 * feature frame.
Chris@23	117 *
Chris@23	118 * @param p The Matcher representing the performance with which
Chris@23	119 * this one is going to be matched. Some information is shared
Chris@23	120 * between the two matchers (currently one possesses the distance
Chris@23	121 * matrix and optimal path matrix).
Chris@23	122 *
Chris@74	123 * @param featureSize Number of values in each of the feature
Chris@74	124 * vectors that will be provided.
Chris@23	125 */
Chris@23	126 Matcher(Parameters parameters, Matcher *p, int featureSize);
Chris@23	127
Chris@78	128 /** Destructor for Matcher.
Chris@78	129 */
cannam@0	130 ~Matcher();
cannam@0	131
cannam@0	132 /** Adds a link to the Matcher object representing the performance
cannam@0	133 * which is going to be matched to this one.
cannam@0	134 *
cannam@0	135 * @param p the Matcher representing the other performance
cannam@0	136 */
cannam@0	137 void setOtherMatcher(Matcher *p) {
Chris@43	138 m_otherMatcher = p;
Chris@74	139 }
cannam@0	140
Chris@78	141 int getBlockSize() {
Chris@78	142 return m_blockSize;
Chris@78	143 }
Chris@78	144
Chris@78	145 bool isOverrunning() {
Chris@78	146 return m_runCount >= m_params.maxRunCount;
Chris@78	147 }
Chris@78	148
cannam@0	149 int getFrameCount() {
Chris@43	150 return m_frameCount;
cannam@0	151 }
cannam@0	152
Chris@72	153 int getOtherFrameCount() {
Chris@72	154 return m_otherMatcher->getFrameCount();
Chris@72	155 }
Chris@74	156
Chris@83	157 float getDiagonalWeight() {
Chris@83	158 return m_params.diagonalWeight;
Chris@83	159 }
Chris@83	160
Chris@74	161 /** Processes a feature vector frame, presumably calculated from
Chris@74	162 * audio data by some external code such as a FeatureExtractor.
Chris@74	163 * Calculates the distance to all frames stored in the
Chris@74	164 * otherMatcher and stores in the distance matrix, before
Chris@74	165 * updating the optimal path matrix using the dynamic time
Chris@74	166 * warping algorithm.
Chris@74	167 *
Chris@74	168 * The supplied feature must be of the size that was passed as
Chris@74	169 * featureSize to the constructor.
Chris@74	170 */
Chris@74	171 void consumeFeatureVector(std::vector<double> feature);
Chris@72	172
Chris@72	173 /** Tests whether a location is in range in the minimum cost matrix.
Chris@72	174 *
Chris@72	175 * @param i the frame number of this Matcher
Chris@72	176 * @param j the frame number of the other Matcher
Chris@72	177 * @return true if the location is in range
Chris@72	178 */
Chris@72	179 bool isInRange(int i, int j);
Chris@72	180
Chris@72	181 /** Tests whether a location is available in the minimum cost matrix.
Chris@72	182 *
Chris@72	183 * @param i the frame number of this Matcher
Chris@72	184 * @param j the frame number of the other Matcher
Chris@72	185 * @return true if the location is in range and contains a valid cost
Chris@72	186 */
Chris@72	187 bool isAvailable(int i, int j);
Chris@72	188
Chris@72	189 /** Returns the valid range of frames in the other Matcher for the
Chris@72	190 * given frame in this Matcher's minimum cost matrix.
Chris@72	191 *
Chris@72	192 * @param i the frame number of this Matcher
Chris@72	193 * @return the first, last pair of frame numbers for the other
Chris@72	194 * Matcher. Note that the last frame is exclusive (last valid
Chris@72	195 * frame + 1).
Chris@72	196 */
Chris@72	197 std::pair<int, int> getColRange(int i);
Chris@72	198
Chris@72	199 /** Returns the valid range of frames in this Matcher for the
Chris@72	200 * given frame in the other Matcher's minimum cost matrix.
Chris@72	201 *
Chris@72	202 * @param i the frame number of the other Matcher
Chris@72	203 * @return the first, last pair of frame numbers for this
Chris@72	204 * Matcher. Note that the last frame is exclusive (last valid
Chris@72	205 * frame + 1).
Chris@72	206 */
Chris@72	207 std::pair<int, int> getRowRange(int i);
Chris@72	208
Chris@72	209 /** Retrieves a value from the distance matrix.
Chris@72	210 *
Chris@72	211 * @param i the frame number of this Matcher
Chris@72	212 * @param j the frame number of the other Matcher
Chris@72	213 * @return the distance metric at this location
Chris@72	214 */
Chris@72	215 float getDistance(int i, int j);
Chris@72	216
Chris@72	217 /** Sets a value to the distance matrix.
Chris@72	218 *
Chris@72	219 * @param i the frame number of this Matcher
Chris@72	220 * @param j the frame number of the other Matcher
Chris@72	221 * @param value the distance metric to set for this location
Chris@72	222 */
Chris@96	223 void setDistance(int i, int j, float distance);
Chris@72	224
Chris@72	225 /** Retrieves a value from the minimum cost matrix.
Chris@72	226 *
Chris@72	227 * @param i the frame number of this Matcher
Chris@72	228 * @param j the frame number of the other Matcher
Chris@72	229 * @return the cost of the minimum cost path to this location
Chris@72	230 */
Chris@72	231 double getPathCost(int i, int j);
Chris@72	232
Chris@72	233 /** Sets a value and an advance direction to the minimum cost matrix.
Chris@72	234 *
Chris@72	235 * @param i the frame number of this Matcher
Chris@72	236 * @param j the frame number of the other Matcher
Chris@72	237 * @param dir the direction from which this position is reached with
Chris@72	238 * minimum cost
Chris@72	239 * @param value the cost of the minimum cost path to set for this location
Chris@72	240 */
Chris@72	241 void setPathCost(int i, int j, Advance dir, double value);
Chris@72	242
Chris@72	243 /** Retrieves an advance direction from the matrix.
Chris@72	244 *
Chris@72	245 * @param i the frame number of this Matcher
Chris@72	246 * @param j the frame number of the other Matcher
Chris@72	247 * @return the direction from which this position is reached with
Chris@72	248 * minimum cost
Chris@72	249 */
Chris@72	250 Advance getAdvance(int i, int j);
Chris@72	251
cannam@0	252 protected:
Chris@38	253 /** Create internal structures and reset. */
cannam@0	254 void init();
cannam@0	255
Chris@38	256 /** The distXSize value has changed: resize internal buffers. */
Chris@41	257 void size();
cannam@0	258
Chris@71	259 /** Updates an entry in the distance matrix and the optimal path matrix.
cannam@0	260 *
cannam@0	261 * @param i the frame number of this Matcher
cannam@0	262 * @param j the frame number of the other Matcher
cannam@0	263 * @param dir the direction from which this position is reached with
cannam@0	264 * minimum cost
cannam@0	265 * @param value the cost of the minimum path except the current step
cannam@0	266 * @param dMN the distance cost between the two frames
cannam@0	267 */
Chris@71	268 void updateValue(int i, int j, Advance dir, double value, float dMN);
cannam@0	269
Chris@21	270 void calcAdvance();
Chris@21	271
Chris@42	272 /** Points to the other performance with which this one is being
Chris@42	273 * compared. The data for the distance metric and the dynamic
Chris@42	274 * time warping is shared between the two matchers. In the
Chris@42	275 * original version, only one of the two performance matchers
Chris@42	276 * contained the distance metric. (See <code>first</code>)
Chris@42	277 */
Chris@43	278 Matcher *m_otherMatcher;
Chris@42	279
Chris@42	280 /** Indicates which performance is considered primary (the
Chris@42	281 * score). This is the performance shown on the vertical axis,
Chris@42	282 * and referred to as "this" in the codes for the direction of
Chris@42	283 * DTW steps. */
Chris@43	284 bool m_firstPM;
Chris@42	285
Chris@42	286 /** Configuration parameters */
Chris@43	287 Parameters m_params;
Chris@42	288
Chris@42	289 /** Width of the search band in FFT frames (see <code>blockTime</code>) */
Chris@43	290 int m_blockSize;
Chris@42	291
Chris@42	292 /** The number of frames of audio data which have been read. */
Chris@43	293 int m_frameCount;
Chris@42	294
Chris@42	295 /** The number of frames sequentially processed by this matcher,
Chris@42	296 * without a frame of the other matcher being processed.
Chris@42	297 */
Chris@43	298 int m_runCount;
Chris@42	299
Chris@42	300 /** The number of values in a feature vector. */
Chris@43	301 int m_featureSize;
Chris@42	302
Chris@50	303 /** A block of previously seen feature frames is stored in this
Chris@50	304 * structure for calculation of the distance matrix as the new
Chris@50	305 * frames are received. One can think of the structure of the
Chris@50	306 * array as a circular buffer of vectors. */
Chris@43	307 vector<vector<double> > m_frames;
Chris@42	308
Chris@42	309 /** The best path cost matrix. */
Chris@53	310 vector<vector<double> > m_bestPathCost;
Chris@42	311
Chris@42	312 /** The distance matrix. */
Chris@45	313 vector<vector<float> > m_distance;
Chris@42	314
Chris@45	315 /** The advance direction matrix. */
Chris@45	316 vector<vector<Advance> > m_advance;
Chris@45	317
Chris@45	318 /** The bounds of each row of data in the distance, path cost, and
Chris@45	319 * advance direction matrices.*/
Chris@43	320 vector<int> m_first;
Chris@43	321 vector<int> m_last;
Chris@42	322
Chris@45	323 /** Width of distance, path cost, and advance direction matrices
Chris@45	324 * and first and last vectors */
Chris@43	325 int m_distXSize;
Chris@42	326
Chris@43	327 bool m_initialised;
Chris@42	328
Chris@43	329 DistanceMetric m_metric;
Chris@78	330 };
cannam@0	331
cannam@0	332 #endif

Mercurial > hg > match-vamp

annotate src/Matcher.h @ 96:6b91e40b2c04 refactors