annotate src/Matcher.h @ 246:aac9ad4064ea subsequence tip

Fix incorrect handling of silent tail in the non-subsequence MATCH phase; some debug output changes
author Chris Cannam
date Fri, 24 Jul 2020 14:29:55 +0100
parents 39fe8728e1ca
children
rev   line source
cannam@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@0 2
cannam@0 3 /*
cannam@0 4 Vamp feature extraction plugin using the MATCH audio alignment
cannam@0 5 algorithm.
cannam@0 6
cannam@0 7 Centre for Digital Music, Queen Mary, University of London.
Chris@236 8 Copyright (c) 2007-2020 Simon Dixon, Chris Cannam, and Queen Mary
Chris@230 9 University of London, Copyright (c) 2014-2015 Tido GmbH.
cannam@0 10
cannam@0 11 This program is free software; you can redistribute it and/or
cannam@0 12 modify it under the terms of the GNU General Public License as
cannam@0 13 published by the Free Software Foundation; either version 2 of the
cannam@0 14 License, or (at your option) any later version. See the file
cannam@0 15 COPYING included with this distribution for more information.
cannam@0 16 */
cannam@0 17
Chris@181 18 #ifndef MATCHER_H
Chris@181 19 #define MATCHER_H
cannam@0 20
cannam@0 21 #include <vector>
cannam@0 22 #include <iostream>
cannam@0 23 #include <sstream>
cannam@0 24 #include <cmath>
cannam@0 25
Chris@26 26 #include "DistanceMetric.h"
Chris@187 27 #include "MatchTypes.h"
cannam@0 28
Chris@74 29 /** Represents an audio feature stream that can be matched to another
Chris@74 30 * audio stream of the same piece of music. The matching algorithm
Chris@74 31 * uses dynamic time warping.
cannam@0 32 */
cannam@0 33 class Matcher
cannam@0 34 {
Chris@15 35 public:
Chris@235 36 static std::string advanceToString(advance_t a) {
Chris@83 37 switch (a) {
Chris@83 38 case AdvanceNone: return "AdvanceNone";
Chris@83 39 case AdvanceBoth: return "AdvanceBoth";
Chris@83 40 case AdvanceThis: return "AdvanceThis";
Chris@83 41 case AdvanceOther: return "AdvanceOther";
Chris@83 42 }
Chris@83 43 return "(unknown)";
Chris@83 44 }
Chris@45 45
Chris@15 46 struct Parameters {
Chris@15 47
Chris@113 48 Parameters(double hopTime_) :
Chris@15 49 hopTime(hopTime_),
Chris@15 50 blockTime(10.0),
Chris@83 51 maxRunCount(3),
Chris@83 52 diagonalWeight(2.0)
Chris@15 53 {}
Chris@15 54
Chris@235 55 /** Spacing of audio frames (determines the amount of overlap
Chris@235 56 * or skip between frames). This value is expressed in
Chris@83 57 * seconds.
Chris@83 58 */
Chris@15 59 double hopTime;
Chris@38 60
Chris@15 61 /** The width of the search band (error margin) around the current
Chris@15 62 * match position, measured in seconds. Strictly speaking the
Chris@15 63 * width is measured backwards from the current point, since the
Chris@15 64 * algorithm has to work causally.
Chris@15 65 */
Chris@15 66 double blockTime;
Chris@15 67
Chris@15 68 /** Maximum number of frames sequentially processed by this
Chris@15 69 * matcher, without a frame of the other matcher being
Chris@15 70 * processed.
Chris@15 71 */
Chris@15 72 int maxRunCount;
Chris@83 73
Chris@83 74 /** Weight applied to cost of diagonal step relative to
Chris@83 75 * horizontal or vertical step. The default of 2.0 means that
Chris@83 76 * a diagonal is not favoured over horizontal+vertical
Chris@83 77 * combined, which is good when maintaining gross tracking of
Chris@83 78 * performances that may have wildly differing speeds but
Chris@83 79 * which also leads to quite jaggy paths. A more typical
Chris@83 80 * normal DTW approach for performances with similar speeds
Chris@83 81 * might use 1.0 or something close to it.
Chris@83 82 */
Chris@188 83 double diagonalWeight;
Chris@15 84 };
Chris@15 85
cannam@0 86 /** Constructor for Matcher.
Chris@74 87 *
Chris@74 88 * A Matcher expects to be provided with feature vectors
Chris@74 89 * calculated by some external code (for example, a
Chris@74 90 * FeatureExtractor). Call consumeFeatureVector to provide each
Chris@74 91 * feature frame.
Chris@23 92 *
Chris@23 93 * @param p The Matcher representing the performance with which
Chris@23 94 * this one is going to be matched. Some information is shared
Chris@23 95 * between the two matchers (currently one possesses the distance
Chris@23 96 * matrix and optimal path matrix).
Chris@23 97 */
Chris@143 98 Matcher(Parameters params, DistanceMetric::Parameters dparams, Matcher *p);
Chris@23 99
Chris@78 100 /** Destructor for Matcher.
Chris@78 101 */
cannam@0 102 ~Matcher();
cannam@0 103
cannam@0 104 /** Adds a link to the Matcher object representing the performance
cannam@0 105 * which is going to be matched to this one.
cannam@0 106 *
cannam@0 107 * @param p the Matcher representing the other performance
cannam@0 108 */
cannam@0 109 void setOtherMatcher(Matcher *p) {
Chris@43 110 m_otherMatcher = p;
Chris@74 111 }
cannam@0 112
Chris@78 113 int getBlockSize() {
Chris@78 114 return m_blockSize;
Chris@78 115 }
Chris@78 116
Chris@171 117 bool isFillingInitialBlock() {
Chris@171 118 return m_frameCount < m_blockSize;
Chris@171 119 }
Chris@171 120
Chris@78 121 bool isOverrunning() {
Chris@78 122 return m_runCount >= m_params.maxRunCount;
Chris@78 123 }
Chris@78 124
cannam@0 125 int getFrameCount() {
Chris@43 126 return m_frameCount;
cannam@0 127 }
cannam@0 128
Chris@72 129 int getOtherFrameCount() {
Chris@72 130 return m_otherMatcher->getFrameCount();
Chris@72 131 }
Chris@74 132
Chris@188 133 double getDiagonalWeight() {
Chris@83 134 return m_params.diagonalWeight;
Chris@83 135 }
Chris@83 136
Chris@74 137 /** Processes a feature vector frame, presumably calculated from
Chris@74 138 * audio data by some external code such as a FeatureExtractor.
Chris@74 139 * Calculates the distance to all frames stored in the
Chris@74 140 * otherMatcher and stores in the distance matrix, before
Chris@74 141 * updating the optimal path matrix using the dynamic time
Chris@74 142 * warping algorithm.
Chris@74 143 *
Chris@104 144 * The supplied features must always be of the same size (within
Chris@104 145 * any pair of Matcher objects).
Chris@74 146 */
Chris@182 147 void consumeFeatureVector(const feature_t &feature);
Chris@72 148
Chris@72 149 /** Tests whether a location is in range in the minimum cost matrix.
Chris@72 150 *
Chris@72 151 * @param i the frame number of this Matcher
Chris@72 152 * @param j the frame number of the other Matcher
Chris@72 153 * @return true if the location is in range
Chris@72 154 */
Chris@72 155 bool isInRange(int i, int j);
Chris@203 156
Chris@203 157 /** Tests whether a location is available in the minimum cost
Chris@203 158 * matrix, that is, whether it is in range and contains a valid
Chris@203 159 * cost value. Note this and its associated isRowAvailable,
Chris@203 160 * isColAvailable checks are more expensive than isInRange and
Chris@203 161 * are really intended for error checking. (If a row is in range,
Chris@203 162 * it should always be available.)
Chris@203 163 *
Chris@203 164 * @param i the frame number of this Matcher
Chris@203 165 * @param j the frame number of the other Matcher
Chris@203 166 * @return true if the location is in range and contains a valid cost
Chris@203 167 */
Chris@203 168 bool isAvailable(int i, int j);
Chris@154 169
Chris@154 170 /** Tests whether any locations in the given row are available.
Chris@154 171 */
Chris@154 172 bool isRowAvailable(int i);
Chris@154 173
Chris@154 174 /** Tests whether any locations in the given column are available.
Chris@154 175 */
Chris@154 176 bool isColAvailable(int i);
Chris@72 177
Chris@154 178 /** Returns the valid range of columns for the given row, that is,
Chris@154 179 * the range of frames in the other Matcher for the given frame
Chris@154 180 * in this Matcher's minimum cost matrix.
Chris@72 181 *
Chris@72 182 * @param i the frame number of this Matcher
Chris@72 183 * @return the first, last pair of frame numbers for the other
Chris@72 184 * Matcher. Note that the last frame is exclusive (last valid
Chris@72 185 * frame + 1).
Chris@72 186 */
Chris@205 187 std::pair<int, int> getColRangeForRow(int i);
Chris@72 188
Chris@154 189 /** Returns the valid range of rows for the given column, that is,
Chris@154 190 * the range of frames in this Matcher for the given frame in the
Chris@154 191 * other Matcher's minimum cost matrix.
Chris@72 192 *
Chris@72 193 * @param i the frame number of the other Matcher
Chris@72 194 * @return the first, last pair of frame numbers for this
Chris@72 195 * Matcher. Note that the last frame is exclusive (last valid
Chris@72 196 * frame + 1).
Chris@72 197 */
Chris@205 198 std::pair<int, int> getRowRangeForCol(int i);
Chris@72 199
Chris@72 200 /** Retrieves a value from the distance matrix.
Chris@72 201 *
Chris@72 202 * @param i the frame number of this Matcher
Chris@72 203 * @param j the frame number of the other Matcher
Chris@72 204 * @return the distance metric at this location
Chris@72 205 */
Chris@182 206 distance_t getDistance(int i, int j);
Chris@72 207
Chris@72 208 /** Sets a value to the distance matrix.
Chris@72 209 *
Chris@72 210 * @param i the frame number of this Matcher
Chris@72 211 * @param j the frame number of the other Matcher
Chris@72 212 * @param value the distance metric to set for this location
Chris@72 213 */
Chris@182 214 void setDistance(int i, int j, distance_t distance);
Chris@72 215
Chris@72 216 /** Retrieves a value from the minimum cost matrix.
Chris@72 217 *
Chris@72 218 * @param i the frame number of this Matcher
Chris@72 219 * @param j the frame number of the other Matcher
Chris@72 220 * @return the cost of the minimum cost path to this location
Chris@72 221 */
Chris@182 222 pathcost_t getPathCost(int i, int j);
Chris@72 223
Chris@72 224 /** Sets a value and an advance direction to the minimum cost matrix.
Chris@72 225 *
Chris@72 226 * @param i the frame number of this Matcher
Chris@72 227 * @param j the frame number of the other Matcher
Chris@72 228 * @param dir the direction from which this position is reached with
Chris@72 229 * minimum cost
Chris@72 230 * @param value the cost of the minimum cost path to set for this location
Chris@72 231 */
Chris@182 232 void setPathCost(int i, int j, advance_t dir, pathcost_t value);
Chris@135 233
Chris@135 234 /** Retrieves a value from the minimum cost matrix, normalised for
Chris@135 235 * path length.
Chris@135 236 *
Chris@135 237 * @param i the frame number of this Matcher
Chris@135 238 * @param j the frame number of the other Matcher
Chris@135 239 * @return the cost of the minimum cost path to this location,
Chris@135 240 * normalised by the Manhattan distance from 0,0 to i,j
Chris@135 241 */
Chris@191 242 normpathcost_t getNormalisedPathCost(int i, int j);
Chris@72 243
Chris@72 244 /** Retrieves an advance direction from the matrix.
Chris@72 245 *
Chris@72 246 * @param i the frame number of this Matcher
Chris@72 247 * @param j the frame number of the other Matcher
Chris@72 248 * @return the direction from which this position is reached with
Chris@72 249 * minimum cost
Chris@72 250 */
Chris@181 251 advance_t getAdvance(int i, int j);
Chris@202 252
Chris@232 253 struct MemoryStats {
Chris@232 254 double features_k;
Chris@232 255 double pathcosts_k;
Chris@232 256 double distances_k;
Chris@232 257 double advances_k;
Chris@232 258 double total_k() const {
Chris@232 259 return features_k + pathcosts_k + distances_k + advances_k;
Chris@232 260 }
Chris@232 261 };
Chris@232 262
Chris@232 263 /** Obtain some stats about memory consumption.
Chris@232 264 */
Chris@232 265 MemoryStats getMemoryStats() const;
Chris@232 266
Chris@202 267 /** Print some stats about memory consumption etc to stderr.
Chris@202 268 */
Chris@202 269 void printStats();
Chris@72 270
cannam@0 271 protected:
Chris@38 272 /** Create internal structures and reset. */
cannam@0 273 void init();
cannam@0 274
Chris@38 275 /** The distXSize value has changed: resize internal buffers. */
Chris@41 276 void size();
cannam@0 277
Chris@71 278 /** Updates an entry in the distance matrix and the optimal path matrix.
cannam@0 279 *
cannam@0 280 * @param i the frame number of this Matcher
cannam@0 281 * @param j the frame number of the other Matcher
cannam@0 282 * @param dir the direction from which this position is reached with
cannam@0 283 * minimum cost
cannam@0 284 * @param value the cost of the minimum path except the current step
cannam@0 285 * @param dMN the distance cost between the two frames
cannam@0 286 */
Chris@182 287 void updateValue(int i, int j, advance_t dir, pathcost_t value, distance_t dMN);
cannam@0 288
Chris@21 289 void calcAdvance();
Chris@21 290
Chris@211 291 /**
Chris@211 292 * Add the given distance increment to the given path cost, and
Chris@211 293 * return the result clipped (if necessary) at MaxPathCost.
Chris@211 294 */
Chris@211 295 pathcost_t addToCost(pathcost_t cost, pathcost_t increment);
Chris@211 296
Chris@42 297 /** Points to the other performance with which this one is being
Chris@211 298 * compared. (See <code>m_firstPM</code>)
Chris@42 299 */
Chris@43 300 Matcher *m_otherMatcher;
Chris@42 301
Chris@42 302 /** Indicates which performance is considered primary (the
Chris@42 303 * score). This is the performance shown on the vertical axis,
Chris@42 304 * and referred to as "this" in the codes for the direction of
Chris@42 305 * DTW steps. */
Chris@43 306 bool m_firstPM;
Chris@42 307
Chris@42 308 /** Configuration parameters */
Chris@43 309 Parameters m_params;
Chris@42 310
Chris@42 311 /** Width of the search band in FFT frames (see <code>blockTime</code>) */
Chris@43 312 int m_blockSize;
Chris@42 313
Chris@42 314 /** The number of frames of audio data which have been read. */
Chris@43 315 int m_frameCount;
Chris@42 316
Chris@42 317 /** The number of frames sequentially processed by this matcher,
Chris@42 318 * without a frame of the other matcher being processed.
Chris@42 319 */
Chris@43 320 int m_runCount;
Chris@42 321
Chris@50 322 /** A block of previously seen feature frames is stored in this
Chris@50 323 * structure for calculation of the distance matrix as the new
Chris@50 324 * frames are received. One can think of the structure of the
Chris@50 325 * array as a circular buffer of vectors. */
Chris@182 326 featureseq_t m_features;
Chris@42 327
Chris@42 328 /** The best path cost matrix. */
Chris@182 329 pathcostmat_t m_bestPathCost;
Chris@42 330
Chris@42 331 /** The distance matrix. */
Chris@182 332 distancemat_t m_distance;
Chris@42 333
Chris@45 334 /** The advance direction matrix. */
Chris@182 335 advancemat_t m_advance;
Chris@45 336
Chris@45 337 /** The bounds of each row of data in the distance, path cost, and
Chris@45 338 * advance direction matrices.*/
Chris@235 339 std::vector<int> m_first;
Chris@235 340 std::vector<int> m_last;
Chris@235 341
Chris@45 342 /** Width of distance, path cost, and advance direction matrices
Chris@45 343 * and first and last vectors */
Chris@43 344 int m_distXSize;
Chris@42 345
Chris@43 346 bool m_initialised;
Chris@42 347
Chris@43 348 DistanceMetric m_metric;
Chris@78 349 };
cannam@0 350
Chris@232 351 inline Matcher::MemoryStats operator+(const Matcher::MemoryStats &a,
Chris@232 352 const Matcher::MemoryStats &b)
Chris@232 353 {
Chris@232 354 Matcher::MemoryStats m = a;
Chris@232 355 m.features_k += b.features_k;
Chris@232 356 m.pathcosts_k += b.pathcosts_k;
Chris@232 357 m.distances_k += b.distances_k;
Chris@232 358 m.advances_k += b.advances_k;
Chris@232 359 return m;
Chris@232 360 }
Chris@232 361
cannam@0 362 #endif