annotate src/Matcher.h @ 74:b9aa663a607b refactors

Pull out feature extractor calls from Matcher, remove MatchFeeder, have only the feeder-from-features and use that in MatchVampPlugin
author Chris Cannam
date Wed, 19 Nov 2014 11:59:03 +0000
parents c3c50d5e05b7
children 0042b4d42167
rev   line source
cannam@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@0 2
cannam@0 3 /*
cannam@0 4 Vamp feature extraction plugin using the MATCH audio alignment
cannam@0 5 algorithm.
cannam@0 6
cannam@0 7 Centre for Digital Music, Queen Mary, University of London.
cannam@0 8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
cannam@0 9
cannam@0 10 This program is free software; you can redistribute it and/or
cannam@0 11 modify it under the terms of the GNU General Public License as
cannam@0 12 published by the Free Software Foundation; either version 2 of the
cannam@0 13 License, or (at your option) any later version. See the file
cannam@0 14 COPYING included with this distribution for more information.
cannam@0 15 */
cannam@0 16
cannam@0 17 #ifndef _MATCHER_H_
cannam@0 18 #define _MATCHER_H_
cannam@0 19
cannam@0 20 #include <vector>
cannam@0 21 #include <iostream>
cannam@0 22 #include <sstream>
cannam@0 23 #include <cmath>
cannam@0 24
Chris@26 25 #include "DistanceMetric.h"
cannam@0 26
cannam@0 27 using std::vector;
cannam@0 28 using std::string;
cannam@0 29 using std::cerr;
cannam@0 30 using std::endl;
cannam@0 31
Chris@74 32 /** Represents an audio feature stream that can be matched to another
Chris@74 33 * audio stream of the same piece of music. The matching algorithm
Chris@74 34 * uses dynamic time warping.
cannam@0 35 */
cannam@0 36 class Matcher
cannam@0 37 {
Chris@15 38 public:
Chris@45 39 enum Advance {
Chris@45 40 AdvanceNone,
Chris@45 41 AdvanceBoth,
Chris@45 42 AdvanceThis,
Chris@45 43 AdvanceOther
Chris@45 44 };
Chris@45 45
Chris@15 46 struct Parameters {
Chris@15 47
Chris@15 48 Parameters(float rate_, double hopTime_, int fftSize_) :
Chris@15 49 sampleRate(rate_),
Chris@26 50 distanceNorm(DistanceMetric::NormaliseDistanceToLogSum),
Chris@15 51 hopTime(hopTime_),
Chris@15 52 fftSize(fftSize_),
Chris@15 53 blockTime(10.0),
Chris@15 54 maxRunCount(3)
Chris@15 55 {}
Chris@15 56
Chris@15 57 /** Sample rate of audio */
Chris@15 58 float sampleRate;
Chris@15 59
Chris@15 60 /** Type of distance metric normalisation */
Chris@26 61 DistanceMetric::DistanceNormalisation distanceNorm;
Chris@15 62
Chris@15 63 /** Spacing of audio frames (determines the amount of overlap or
Chris@15 64 * skip between frames). This value is expressed in
Chris@15 65 * seconds. */
Chris@15 66 double hopTime;
Chris@38 67
Chris@15 68 /** Size of an FFT frame in samples. Note that the data passed
Chris@15 69 * in to Matcher is already in the frequency domain, so this
Chris@15 70 * expresses the size of the frame that the caller will be
Chris@38 71 * providing. */
Chris@15 72 int fftSize;
Chris@38 73
Chris@15 74 /** The width of the search band (error margin) around the current
Chris@15 75 * match position, measured in seconds. Strictly speaking the
Chris@15 76 * width is measured backwards from the current point, since the
Chris@15 77 * algorithm has to work causally.
Chris@15 78 */
Chris@15 79 double blockTime;
Chris@15 80
Chris@15 81 /** Maximum number of frames sequentially processed by this
Chris@15 82 * matcher, without a frame of the other matcher being
Chris@15 83 * processed.
Chris@15 84 */
Chris@15 85 int maxRunCount;
Chris@15 86 };
Chris@15 87
cannam@0 88 /** Constructor for Matcher.
Chris@74 89 *
Chris@74 90 * A Matcher expects to be provided with feature vectors
Chris@74 91 * calculated by some external code (for example, a
Chris@74 92 * FeatureExtractor). Call consumeFeatureVector to provide each
Chris@74 93 * feature frame.
Chris@23 94 *
Chris@23 95 * @param p The Matcher representing the performance with which
Chris@23 96 * this one is going to be matched. Some information is shared
Chris@23 97 * between the two matchers (currently one possesses the distance
Chris@23 98 * matrix and optimal path matrix).
Chris@23 99 *
Chris@74 100 * @param featureSize Number of values in each of the feature
Chris@74 101 * vectors that will be provided.
Chris@23 102 */
Chris@23 103 Matcher(Parameters parameters, Matcher *p, int featureSize);
Chris@23 104
cannam@0 105 ~Matcher();
cannam@0 106
cannam@0 107 /** Adds a link to the Matcher object representing the performance
cannam@0 108 * which is going to be matched to this one.
cannam@0 109 *
cannam@0 110 * @param p the Matcher representing the other performance
cannam@0 111 */
cannam@0 112 void setOtherMatcher(Matcher *p) {
Chris@43 113 m_otherMatcher = p;
Chris@74 114 }
cannam@0 115
cannam@0 116 int getFrameCount() {
Chris@43 117 return m_frameCount;
cannam@0 118 }
cannam@0 119
Chris@72 120 int getOtherFrameCount() {
Chris@72 121 return m_otherMatcher->getFrameCount();
Chris@72 122 }
Chris@74 123
Chris@74 124 /** Processes a feature vector frame, presumably calculated from
Chris@74 125 * audio data by some external code such as a FeatureExtractor.
Chris@74 126 * Calculates the distance to all frames stored in the
Chris@74 127 * otherMatcher and stores in the distance matrix, before
Chris@74 128 * updating the optimal path matrix using the dynamic time
Chris@74 129 * warping algorithm.
Chris@74 130 *
Chris@74 131 * The supplied feature must be of the size that was passed as
Chris@74 132 * featureSize to the constructor.
Chris@74 133 */
Chris@74 134 void consumeFeatureVector(std::vector<double> feature);
Chris@72 135
Chris@72 136 /** Tests whether a location is in range in the minimum cost matrix.
Chris@72 137 *
Chris@72 138 * @param i the frame number of this Matcher
Chris@72 139 * @param j the frame number of the other Matcher
Chris@72 140 * @return true if the location is in range
Chris@72 141 */
Chris@72 142 bool isInRange(int i, int j);
Chris@72 143
Chris@72 144 /** Tests whether a location is available in the minimum cost matrix.
Chris@72 145 *
Chris@72 146 * @param i the frame number of this Matcher
Chris@72 147 * @param j the frame number of the other Matcher
Chris@72 148 * @return true if the location is in range and contains a valid cost
Chris@72 149 */
Chris@72 150 bool isAvailable(int i, int j);
Chris@72 151
Chris@72 152 /** Returns the valid range of frames in the other Matcher for the
Chris@72 153 * given frame in this Matcher's minimum cost matrix.
Chris@72 154 *
Chris@72 155 * @param i the frame number of this Matcher
Chris@72 156 * @return the first, last pair of frame numbers for the other
Chris@72 157 * Matcher. Note that the last frame is exclusive (last valid
Chris@72 158 * frame + 1).
Chris@72 159 */
Chris@72 160 std::pair<int, int> getColRange(int i);
Chris@72 161
Chris@72 162 /** Returns the valid range of frames in this Matcher for the
Chris@72 163 * given frame in the other Matcher's minimum cost matrix.
Chris@72 164 *
Chris@72 165 * @param i the frame number of the other Matcher
Chris@72 166 * @return the first, last pair of frame numbers for this
Chris@72 167 * Matcher. Note that the last frame is exclusive (last valid
Chris@72 168 * frame + 1).
Chris@72 169 */
Chris@72 170 std::pair<int, int> getRowRange(int i);
Chris@72 171
Chris@72 172 /** Retrieves a value from the distance matrix.
Chris@72 173 *
Chris@72 174 * @param i the frame number of this Matcher
Chris@72 175 * @param j the frame number of the other Matcher
Chris@72 176 * @return the distance metric at this location
Chris@72 177 */
Chris@72 178 float getDistance(int i, int j);
Chris@72 179
Chris@72 180 /** Sets a value to the distance matrix.
Chris@72 181 *
Chris@72 182 * @param i the frame number of this Matcher
Chris@72 183 * @param j the frame number of the other Matcher
Chris@72 184 * @param value the distance metric to set for this location
Chris@72 185 */
Chris@72 186 void setDistance(int i, int j, float value);
Chris@72 187
Chris@72 188 /** Retrieves a value from the minimum cost matrix.
Chris@72 189 *
Chris@72 190 * @param i the frame number of this Matcher
Chris@72 191 * @param j the frame number of the other Matcher
Chris@72 192 * @return the cost of the minimum cost path to this location
Chris@72 193 */
Chris@72 194 double getPathCost(int i, int j);
Chris@72 195
Chris@72 196 /** Sets a value and an advance direction to the minimum cost matrix.
Chris@72 197 *
Chris@72 198 * @param i the frame number of this Matcher
Chris@72 199 * @param j the frame number of the other Matcher
Chris@72 200 * @param dir the direction from which this position is reached with
Chris@72 201 * minimum cost
Chris@72 202 * @param value the cost of the minimum cost path to set for this location
Chris@72 203 */
Chris@72 204 void setPathCost(int i, int j, Advance dir, double value);
Chris@72 205
Chris@72 206 /** Retrieves an advance direction from the matrix.
Chris@72 207 *
Chris@72 208 * @param i the frame number of this Matcher
Chris@72 209 * @param j the frame number of the other Matcher
Chris@72 210 * @return the direction from which this position is reached with
Chris@72 211 * minimum cost
Chris@72 212 */
Chris@72 213 Advance getAdvance(int i, int j);
Chris@72 214
cannam@0 215 protected:
Chris@38 216 /** Create internal structures and reset. */
cannam@0 217 void init();
cannam@0 218
Chris@38 219 /** The distXSize value has changed: resize internal buffers. */
Chris@41 220 void size();
cannam@0 221
Chris@71 222 /** Updates an entry in the distance matrix and the optimal path matrix.
cannam@0 223 *
cannam@0 224 * @param i the frame number of this Matcher
cannam@0 225 * @param j the frame number of the other Matcher
cannam@0 226 * @param dir the direction from which this position is reached with
cannam@0 227 * minimum cost
cannam@0 228 * @param value the cost of the minimum path except the current step
cannam@0 229 * @param dMN the distance cost between the two frames
cannam@0 230 */
Chris@71 231 void updateValue(int i, int j, Advance dir, double value, float dMN);
cannam@0 232
Chris@21 233 void calcAdvance();
Chris@21 234
Chris@42 235 /** Points to the other performance with which this one is being
Chris@42 236 * compared. The data for the distance metric and the dynamic
Chris@42 237 * time warping is shared between the two matchers. In the
Chris@42 238 * original version, only one of the two performance matchers
Chris@42 239 * contained the distance metric. (See <code>first</code>)
Chris@42 240 */
Chris@43 241 Matcher *m_otherMatcher;
Chris@42 242
Chris@42 243 /** Indicates which performance is considered primary (the
Chris@42 244 * score). This is the performance shown on the vertical axis,
Chris@42 245 * and referred to as "this" in the codes for the direction of
Chris@42 246 * DTW steps. */
Chris@43 247 bool m_firstPM;
Chris@42 248
Chris@42 249 /** Configuration parameters */
Chris@43 250 Parameters m_params;
Chris@42 251
Chris@42 252 /** Width of the search band in FFT frames (see <code>blockTime</code>) */
Chris@43 253 int m_blockSize;
Chris@42 254
Chris@42 255 /** The number of frames of audio data which have been read. */
Chris@43 256 int m_frameCount;
Chris@42 257
Chris@42 258 /** The number of frames sequentially processed by this matcher,
Chris@42 259 * without a frame of the other matcher being processed.
Chris@42 260 */
Chris@43 261 int m_runCount;
Chris@42 262
Chris@42 263 /** The number of values in a feature vector. */
Chris@43 264 int m_featureSize;
Chris@42 265
Chris@50 266 /** A block of previously seen feature frames is stored in this
Chris@50 267 * structure for calculation of the distance matrix as the new
Chris@50 268 * frames are received. One can think of the structure of the
Chris@50 269 * array as a circular buffer of vectors. */
Chris@43 270 vector<vector<double> > m_frames;
Chris@42 271
Chris@42 272 /** The best path cost matrix. */
Chris@53 273 vector<vector<double> > m_bestPathCost;
Chris@42 274
Chris@42 275 /** The distance matrix. */
Chris@45 276 vector<vector<float> > m_distance;
Chris@42 277
Chris@45 278 /** The advance direction matrix. */
Chris@45 279 vector<vector<Advance> > m_advance;
Chris@45 280
Chris@45 281 /** The bounds of each row of data in the distance, path cost, and
Chris@45 282 * advance direction matrices.*/
Chris@43 283 vector<int> m_first;
Chris@43 284 vector<int> m_last;
Chris@42 285
Chris@45 286 /** Width of distance, path cost, and advance direction matrices
Chris@45 287 * and first and last vectors */
Chris@43 288 int m_distXSize;
Chris@42 289
Chris@43 290 bool m_initialised;
Chris@42 291
Chris@43 292 DistanceMetric m_metric;
Chris@26 293
cannam@0 294 friend class MatchFeeder;
Chris@24 295 friend class MatchFeatureFeeder;
Chris@72 296
cannam@0 297 }; // class Matcher
cannam@0 298
cannam@0 299 #endif