annotate src/Matcher.h @ 205:010b91e44098 memory

Rename getRowRange/getColRange to make nature of arg clearer
author Chris Cannam
date Fri, 27 Feb 2015 12:21:58 +0000
parents 3662865740da
children 6373d14deace
rev   line source
cannam@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@0 2
cannam@0 3 /*
cannam@0 4 Vamp feature extraction plugin using the MATCH audio alignment
cannam@0 5 algorithm.
cannam@0 6
cannam@0 7 Centre for Digital Music, Queen Mary, University of London.
cannam@0 8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
cannam@0 9
cannam@0 10 This program is free software; you can redistribute it and/or
cannam@0 11 modify it under the terms of the GNU General Public License as
cannam@0 12 published by the Free Software Foundation; either version 2 of the
cannam@0 13 License, or (at your option) any later version. See the file
cannam@0 14 COPYING included with this distribution for more information.
cannam@0 15 */
cannam@0 16
Chris@181 17 #ifndef MATCHER_H
Chris@181 18 #define MATCHER_H
cannam@0 19
cannam@0 20 #include <vector>
cannam@0 21 #include <iostream>
cannam@0 22 #include <sstream>
cannam@0 23 #include <cmath>
cannam@0 24
Chris@26 25 #include "DistanceMetric.h"
Chris@187 26 #include "MatchTypes.h"
cannam@0 27
cannam@0 28 using std::vector;
cannam@0 29 using std::string;
cannam@0 30 using std::cerr;
cannam@0 31 using std::endl;
cannam@0 32
Chris@74 33 /** Represents an audio feature stream that can be matched to another
Chris@74 34 * audio stream of the same piece of music. The matching algorithm
Chris@74 35 * uses dynamic time warping.
cannam@0 36 */
cannam@0 37 class Matcher
cannam@0 38 {
Chris@15 39 public:
Chris@181 40 static string advanceToString(advance_t a) {
Chris@83 41 switch (a) {
Chris@83 42 case AdvanceNone: return "AdvanceNone";
Chris@83 43 case AdvanceBoth: return "AdvanceBoth";
Chris@83 44 case AdvanceThis: return "AdvanceThis";
Chris@83 45 case AdvanceOther: return "AdvanceOther";
Chris@83 46 }
Chris@83 47 return "(unknown)";
Chris@83 48 }
Chris@45 49
Chris@15 50 struct Parameters {
Chris@15 51
Chris@113 52 Parameters(double hopTime_) :
Chris@15 53 hopTime(hopTime_),
Chris@15 54 blockTime(10.0),
Chris@83 55 maxRunCount(3),
Chris@83 56 diagonalWeight(2.0)
Chris@15 57 {}
Chris@15 58
Chris@15 59 /** Spacing of audio frames (determines the amount of overlap or
Chris@15 60 * skip between frames). This value is expressed in
Chris@83 61 * seconds.
Chris@83 62 */
Chris@15 63 double hopTime;
Chris@38 64
Chris@15 65 /** The width of the search band (error margin) around the current
Chris@15 66 * match position, measured in seconds. Strictly speaking the
Chris@15 67 * width is measured backwards from the current point, since the
Chris@15 68 * algorithm has to work causally.
Chris@15 69 */
Chris@15 70 double blockTime;
Chris@15 71
Chris@15 72 /** Maximum number of frames sequentially processed by this
Chris@15 73 * matcher, without a frame of the other matcher being
Chris@15 74 * processed.
Chris@15 75 */
Chris@15 76 int maxRunCount;
Chris@83 77
Chris@83 78 /** Weight applied to cost of diagonal step relative to
Chris@83 79 * horizontal or vertical step. The default of 2.0 means that
Chris@83 80 * a diagonal is not favoured over horizontal+vertical
Chris@83 81 * combined, which is good when maintaining gross tracking of
Chris@83 82 * performances that may have wildly differing speeds but
Chris@83 83 * which also leads to quite jaggy paths. A more typical
Chris@83 84 * normal DTW approach for performances with similar speeds
Chris@83 85 * might use 1.0 or something close to it.
Chris@83 86 */
Chris@188 87 double diagonalWeight;
Chris@15 88 };
Chris@15 89
cannam@0 90 /** Constructor for Matcher.
Chris@74 91 *
Chris@74 92 * A Matcher expects to be provided with feature vectors
Chris@74 93 * calculated by some external code (for example, a
Chris@74 94 * FeatureExtractor). Call consumeFeatureVector to provide each
Chris@74 95 * feature frame.
Chris@23 96 *
Chris@23 97 * @param p The Matcher representing the performance with which
Chris@23 98 * this one is going to be matched. Some information is shared
Chris@23 99 * between the two matchers (currently one possesses the distance
Chris@23 100 * matrix and optimal path matrix).
Chris@23 101 */
Chris@143 102 Matcher(Parameters params, DistanceMetric::Parameters dparams, Matcher *p);
Chris@23 103
Chris@78 104 /** Destructor for Matcher.
Chris@78 105 */
cannam@0 106 ~Matcher();
cannam@0 107
cannam@0 108 /** Adds a link to the Matcher object representing the performance
cannam@0 109 * which is going to be matched to this one.
cannam@0 110 *
cannam@0 111 * @param p the Matcher representing the other performance
cannam@0 112 */
cannam@0 113 void setOtherMatcher(Matcher *p) {
Chris@43 114 m_otherMatcher = p;
Chris@74 115 }
cannam@0 116
Chris@78 117 int getBlockSize() {
Chris@78 118 return m_blockSize;
Chris@78 119 }
Chris@78 120
Chris@171 121 bool isFillingInitialBlock() {
Chris@171 122 return m_frameCount < m_blockSize;
Chris@171 123 }
Chris@171 124
Chris@78 125 bool isOverrunning() {
Chris@78 126 return m_runCount >= m_params.maxRunCount;
Chris@78 127 }
Chris@78 128
cannam@0 129 int getFrameCount() {
Chris@43 130 return m_frameCount;
cannam@0 131 }
cannam@0 132
Chris@72 133 int getOtherFrameCount() {
Chris@72 134 return m_otherMatcher->getFrameCount();
Chris@72 135 }
Chris@74 136
Chris@188 137 double getDiagonalWeight() {
Chris@83 138 return m_params.diagonalWeight;
Chris@83 139 }
Chris@83 140
Chris@74 141 /** Processes a feature vector frame, presumably calculated from
Chris@74 142 * audio data by some external code such as a FeatureExtractor.
Chris@74 143 * Calculates the distance to all frames stored in the
Chris@74 144 * otherMatcher and stores in the distance matrix, before
Chris@74 145 * updating the optimal path matrix using the dynamic time
Chris@74 146 * warping algorithm.
Chris@74 147 *
Chris@104 148 * The supplied features must always be of the same size (within
Chris@104 149 * any pair of Matcher objects).
Chris@74 150 */
Chris@182 151 void consumeFeatureVector(const feature_t &feature);
Chris@72 152
Chris@72 153 /** Tests whether a location is in range in the minimum cost matrix.
Chris@72 154 *
Chris@72 155 * @param i the frame number of this Matcher
Chris@72 156 * @param j the frame number of the other Matcher
Chris@72 157 * @return true if the location is in range
Chris@72 158 */
Chris@72 159 bool isInRange(int i, int j);
Chris@203 160
Chris@203 161 /** Tests whether a location is available in the minimum cost
Chris@203 162 * matrix, that is, whether it is in range and contains a valid
Chris@203 163 * cost value. Note this and its associated isRowAvailable,
Chris@203 164 * isColAvailable checks are more expensive than isInRange and
Chris@203 165 * are really intended for error checking. (If a row is in range,
Chris@203 166 * it should always be available.)
Chris@203 167 *
Chris@203 168 * @param i the frame number of this Matcher
Chris@203 169 * @param j the frame number of the other Matcher
Chris@203 170 * @return true if the location is in range and contains a valid cost
Chris@203 171 */
Chris@203 172 bool isAvailable(int i, int j);
Chris@154 173
Chris@154 174 /** Tests whether any locations in the given row are available.
Chris@154 175 */
Chris@154 176 bool isRowAvailable(int i);
Chris@154 177
Chris@154 178 /** Tests whether any locations in the given column are available.
Chris@154 179 */
Chris@154 180 bool isColAvailable(int i);
Chris@72 181
Chris@154 182 /** Returns the valid range of columns for the given row, that is,
Chris@154 183 * the range of frames in the other Matcher for the given frame
Chris@154 184 * in this Matcher's minimum cost matrix.
Chris@72 185 *
Chris@72 186 * @param i the frame number of this Matcher
Chris@72 187 * @return the first, last pair of frame numbers for the other
Chris@72 188 * Matcher. Note that the last frame is exclusive (last valid
Chris@72 189 * frame + 1).
Chris@72 190 */
Chris@205 191 std::pair<int, int> getColRangeForRow(int i);
Chris@72 192
Chris@154 193 /** Returns the valid range of rows for the given column, that is,
Chris@154 194 * the range of frames in this Matcher for the given frame in the
Chris@154 195 * other Matcher's minimum cost matrix.
Chris@72 196 *
Chris@72 197 * @param i the frame number of the other Matcher
Chris@72 198 * @return the first, last pair of frame numbers for this
Chris@72 199 * Matcher. Note that the last frame is exclusive (last valid
Chris@72 200 * frame + 1).
Chris@72 201 */
Chris@205 202 std::pair<int, int> getRowRangeForCol(int i);
Chris@72 203
Chris@72 204 /** Retrieves a value from the distance matrix.
Chris@72 205 *
Chris@72 206 * @param i the frame number of this Matcher
Chris@72 207 * @param j the frame number of the other Matcher
Chris@72 208 * @return the distance metric at this location
Chris@72 209 */
Chris@182 210 distance_t getDistance(int i, int j);
Chris@72 211
Chris@72 212 /** Sets a value to the distance matrix.
Chris@72 213 *
Chris@72 214 * @param i the frame number of this Matcher
Chris@72 215 * @param j the frame number of the other Matcher
Chris@72 216 * @param value the distance metric to set for this location
Chris@72 217 */
Chris@182 218 void setDistance(int i, int j, distance_t distance);
Chris@72 219
Chris@72 220 /** Retrieves a value from the minimum cost matrix.
Chris@72 221 *
Chris@72 222 * @param i the frame number of this Matcher
Chris@72 223 * @param j the frame number of the other Matcher
Chris@72 224 * @return the cost of the minimum cost path to this location
Chris@72 225 */
Chris@182 226 pathcost_t getPathCost(int i, int j);
Chris@72 227
Chris@72 228 /** Sets a value and an advance direction to the minimum cost matrix.
Chris@72 229 *
Chris@72 230 * @param i the frame number of this Matcher
Chris@72 231 * @param j the frame number of the other Matcher
Chris@72 232 * @param dir the direction from which this position is reached with
Chris@72 233 * minimum cost
Chris@72 234 * @param value the cost of the minimum cost path to set for this location
Chris@72 235 */
Chris@182 236 void setPathCost(int i, int j, advance_t dir, pathcost_t value);
Chris@135 237
Chris@135 238 /** Retrieves a value from the minimum cost matrix, normalised for
Chris@135 239 * path length.
Chris@135 240 *
Chris@135 241 * @param i the frame number of this Matcher
Chris@135 242 * @param j the frame number of the other Matcher
Chris@135 243 * @return the cost of the minimum cost path to this location,
Chris@135 244 * normalised by the Manhattan distance from 0,0 to i,j
Chris@135 245 */
Chris@191 246 normpathcost_t getNormalisedPathCost(int i, int j);
Chris@72 247
Chris@72 248 /** Retrieves an advance direction from the matrix.
Chris@72 249 *
Chris@72 250 * @param i the frame number of this Matcher
Chris@72 251 * @param j the frame number of the other Matcher
Chris@72 252 * @return the direction from which this position is reached with
Chris@72 253 * minimum cost
Chris@72 254 */
Chris@181 255 advance_t getAdvance(int i, int j);
Chris@202 256
Chris@202 257 /** Print some stats about memory consumption etc to stderr.
Chris@202 258 */
Chris@202 259 void printStats();
Chris@72 260
cannam@0 261 protected:
Chris@38 262 /** Create internal structures and reset. */
cannam@0 263 void init();
cannam@0 264
Chris@38 265 /** The distXSize value has changed: resize internal buffers. */
Chris@41 266 void size();
cannam@0 267
Chris@71 268 /** Updates an entry in the distance matrix and the optimal path matrix.
cannam@0 269 *
cannam@0 270 * @param i the frame number of this Matcher
cannam@0 271 * @param j the frame number of the other Matcher
cannam@0 272 * @param dir the direction from which this position is reached with
cannam@0 273 * minimum cost
cannam@0 274 * @param value the cost of the minimum path except the current step
cannam@0 275 * @param dMN the distance cost between the two frames
cannam@0 276 */
Chris@182 277 void updateValue(int i, int j, advance_t dir, pathcost_t value, distance_t dMN);
cannam@0 278
Chris@21 279 void calcAdvance();
Chris@21 280
Chris@42 281 /** Points to the other performance with which this one is being
Chris@42 282 * compared. The data for the distance metric and the dynamic
Chris@42 283 * time warping is shared between the two matchers. In the
Chris@42 284 * original version, only one of the two performance matchers
Chris@42 285 * contained the distance metric. (See <code>first</code>)
Chris@42 286 */
Chris@43 287 Matcher *m_otherMatcher;
Chris@42 288
Chris@42 289 /** Indicates which performance is considered primary (the
Chris@42 290 * score). This is the performance shown on the vertical axis,
Chris@42 291 * and referred to as "this" in the codes for the direction of
Chris@42 292 * DTW steps. */
Chris@43 293 bool m_firstPM;
Chris@42 294
Chris@42 295 /** Configuration parameters */
Chris@43 296 Parameters m_params;
Chris@42 297
Chris@42 298 /** Width of the search band in FFT frames (see <code>blockTime</code>) */
Chris@43 299 int m_blockSize;
Chris@42 300
Chris@42 301 /** The number of frames of audio data which have been read. */
Chris@43 302 int m_frameCount;
Chris@42 303
Chris@42 304 /** The number of frames sequentially processed by this matcher,
Chris@42 305 * without a frame of the other matcher being processed.
Chris@42 306 */
Chris@43 307 int m_runCount;
Chris@42 308
Chris@50 309 /** A block of previously seen feature frames is stored in this
Chris@50 310 * structure for calculation of the distance matrix as the new
Chris@50 311 * frames are received. One can think of the structure of the
Chris@50 312 * array as a circular buffer of vectors. */
Chris@182 313 featureseq_t m_features;
Chris@42 314
Chris@42 315 /** The best path cost matrix. */
Chris@182 316 pathcostmat_t m_bestPathCost;
Chris@42 317
Chris@42 318 /** The distance matrix. */
Chris@182 319 distancemat_t m_distance;
Chris@42 320
Chris@45 321 /** The advance direction matrix. */
Chris@182 322 advancemat_t m_advance;
Chris@45 323
Chris@45 324 /** The bounds of each row of data in the distance, path cost, and
Chris@45 325 * advance direction matrices.*/
Chris@43 326 vector<int> m_first;
Chris@43 327 vector<int> m_last;
Chris@42 328
Chris@45 329 /** Width of distance, path cost, and advance direction matrices
Chris@45 330 * and first and last vectors */
Chris@43 331 int m_distXSize;
Chris@42 332
Chris@43 333 bool m_initialised;
Chris@42 334
Chris@43 335 DistanceMetric m_metric;
Chris@78 336 };
cannam@0 337
cannam@0 338 #endif