annotate src/Matcher.h @ 72:c3c50d5e05b7 refactors

Pull up Matcher set/get to public API, use only public API in Finder
author Chris Cannam
date Wed, 19 Nov 2014 10:18:19 +0000
parents cba231851957
children b9aa663a607b
rev   line source
cannam@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@0 2
cannam@0 3 /*
cannam@0 4 Vamp feature extraction plugin using the MATCH audio alignment
cannam@0 5 algorithm.
cannam@0 6
cannam@0 7 Centre for Digital Music, Queen Mary, University of London.
cannam@0 8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
cannam@0 9
cannam@0 10 This program is free software; you can redistribute it and/or
cannam@0 11 modify it under the terms of the GNU General Public License as
cannam@0 12 published by the Free Software Foundation; either version 2 of the
cannam@0 13 License, or (at your option) any later version. See the file
cannam@0 14 COPYING included with this distribution for more information.
cannam@0 15 */
cannam@0 16
cannam@0 17 #ifndef _MATCHER_H_
cannam@0 18 #define _MATCHER_H_
cannam@0 19
cannam@0 20 #include <vector>
cannam@0 21 #include <iostream>
cannam@0 22 #include <sstream>
cannam@0 23 #include <cmath>
cannam@0 24
Chris@26 25 #include "DistanceMetric.h"
Chris@38 26 #include "FeatureExtractor.h"
cannam@0 27
cannam@0 28 using std::vector;
cannam@0 29 using std::string;
cannam@0 30 using std::cerr;
cannam@0 31 using std::endl;
cannam@0 32
cannam@0 33 /** Represents an audio stream that can be matched to another audio
cannam@0 34 * stream of the same piece of music. The matching algorithm uses
Chris@45 35 * dynamic time warping.
cannam@0 36 */
cannam@0 37 class Matcher
cannam@0 38 {
Chris@15 39 public:
Chris@45 40 enum Advance {
Chris@45 41 AdvanceNone,
Chris@45 42 AdvanceBoth,
Chris@45 43 AdvanceThis,
Chris@45 44 AdvanceOther
Chris@45 45 };
Chris@45 46
Chris@15 47 struct Parameters {
Chris@15 48
Chris@15 49 Parameters(float rate_, double hopTime_, int fftSize_) :
Chris@15 50 sampleRate(rate_),
Chris@26 51 distanceNorm(DistanceMetric::NormaliseDistanceToLogSum),
Chris@15 52 hopTime(hopTime_),
Chris@15 53 fftSize(fftSize_),
Chris@15 54 blockTime(10.0),
Chris@15 55 maxRunCount(3)
Chris@15 56 {}
Chris@15 57
Chris@15 58 /** Sample rate of audio */
Chris@15 59 float sampleRate;
Chris@15 60
Chris@15 61 /** Type of distance metric normalisation */
Chris@26 62 DistanceMetric::DistanceNormalisation distanceNorm;
Chris@15 63
Chris@15 64 /** Spacing of audio frames (determines the amount of overlap or
Chris@15 65 * skip between frames). This value is expressed in
Chris@15 66 * seconds. */
Chris@15 67 double hopTime;
Chris@38 68
Chris@15 69 /** Size of an FFT frame in samples. Note that the data passed
Chris@15 70 * in to Matcher is already in the frequency domain, so this
Chris@15 71 * expresses the size of the frame that the caller will be
Chris@38 72 * providing. */
Chris@15 73 int fftSize;
Chris@38 74
Chris@15 75 /** The width of the search band (error margin) around the current
Chris@15 76 * match position, measured in seconds. Strictly speaking the
Chris@15 77 * width is measured backwards from the current point, since the
Chris@15 78 * algorithm has to work causally.
Chris@15 79 */
Chris@15 80 double blockTime;
Chris@15 81
Chris@15 82 /** Maximum number of frames sequentially processed by this
Chris@15 83 * matcher, without a frame of the other matcher being
Chris@15 84 * processed.
Chris@15 85 */
Chris@15 86 int maxRunCount;
Chris@15 87 };
Chris@15 88
cannam@0 89 /** Constructor for Matcher.
cannam@0 90 *
cannam@0 91 * @param p The Matcher representing the performance with which
cannam@0 92 * this one is going to be matched. Some information is shared
cannam@0 93 * between the two matchers (currently one possesses the distance
cannam@0 94 * matrix and optimal path matrix).
cannam@0 95 */
Chris@38 96 Matcher(Parameters parameters,
Chris@38 97 FeatureExtractor::Parameters featureParams,
Chris@38 98 Matcher *p);
cannam@0 99
Chris@23 100 /** Constructor for Matcher using externally supplied features.
Chris@23 101 * A Matcher made using this constructor will not carry out its
Chris@23 102 * own feature extraction from frequency-domain audio data, but
Chris@23 103 * instead will accept arbitrary feature frames calculated by
Chris@23 104 * some external code.
Chris@23 105 *
Chris@23 106 * @param p The Matcher representing the performance with which
Chris@23 107 * this one is going to be matched. Some information is shared
Chris@23 108 * between the two matchers (currently one possesses the distance
Chris@23 109 * matrix and optimal path matrix).
Chris@23 110 *
Chris@23 111 * @param featureSize Number of values in each feature vector.
Chris@23 112 */
Chris@23 113 Matcher(Parameters parameters, Matcher *p, int featureSize);
Chris@23 114
cannam@0 115 ~Matcher();
cannam@0 116
cannam@0 117 /** Adds a link to the Matcher object representing the performance
cannam@0 118 * which is going to be matched to this one.
cannam@0 119 *
cannam@0 120 * @param p the Matcher representing the other performance
cannam@0 121 */
cannam@0 122 void setOtherMatcher(Matcher *p) {
Chris@43 123 m_otherMatcher = p;
cannam@0 124 } // setOtherMatcher()
cannam@0 125
cannam@0 126 int getFrameCount() {
Chris@43 127 return m_frameCount;
cannam@0 128 }
cannam@0 129
Chris@72 130 int getOtherFrameCount() {
Chris@72 131 return m_otherMatcher->getFrameCount();
Chris@72 132 }
Chris@72 133
Chris@72 134 /** Tests whether a location is in range in the minimum cost matrix.
Chris@72 135 *
Chris@72 136 * @param i the frame number of this Matcher
Chris@72 137 * @param j the frame number of the other Matcher
Chris@72 138 * @return true if the location is in range
Chris@72 139 */
Chris@72 140 bool isInRange(int i, int j);
Chris@72 141
Chris@72 142 /** Tests whether a location is available in the minimum cost matrix.
Chris@72 143 *
Chris@72 144 * @param i the frame number of this Matcher
Chris@72 145 * @param j the frame number of the other Matcher
Chris@72 146 * @return true if the location is in range and contains a valid cost
Chris@72 147 */
Chris@72 148 bool isAvailable(int i, int j);
Chris@72 149
Chris@72 150 /** Returns the valid range of frames in the other Matcher for the
Chris@72 151 * given frame in this Matcher's minimum cost matrix.
Chris@72 152 *
Chris@72 153 * @param i the frame number of this Matcher
Chris@72 154 * @return the first, last pair of frame numbers for the other
Chris@72 155 * Matcher. Note that the last frame is exclusive (last valid
Chris@72 156 * frame + 1).
Chris@72 157 */
Chris@72 158 std::pair<int, int> getColRange(int i);
Chris@72 159
Chris@72 160 /** Returns the valid range of frames in this Matcher for the
Chris@72 161 * given frame in the other Matcher's minimum cost matrix.
Chris@72 162 *
Chris@72 163 * @param i the frame number of the other Matcher
Chris@72 164 * @return the first, last pair of frame numbers for this
Chris@72 165 * Matcher. Note that the last frame is exclusive (last valid
Chris@72 166 * frame + 1).
Chris@72 167 */
Chris@72 168 std::pair<int, int> getRowRange(int i);
Chris@72 169
Chris@72 170 /** Retrieves a value from the distance matrix.
Chris@72 171 *
Chris@72 172 * @param i the frame number of this Matcher
Chris@72 173 * @param j the frame number of the other Matcher
Chris@72 174 * @return the distance metric at this location
Chris@72 175 */
Chris@72 176 float getDistance(int i, int j);
Chris@72 177
Chris@72 178 /** Sets a value to the distance matrix.
Chris@72 179 *
Chris@72 180 * @param i the frame number of this Matcher
Chris@72 181 * @param j the frame number of the other Matcher
Chris@72 182 * @param value the distance metric to set for this location
Chris@72 183 */
Chris@72 184 void setDistance(int i, int j, float value);
Chris@72 185
Chris@72 186 /** Retrieves a value from the minimum cost matrix.
Chris@72 187 *
Chris@72 188 * @param i the frame number of this Matcher
Chris@72 189 * @param j the frame number of the other Matcher
Chris@72 190 * @return the cost of the minimum cost path to this location
Chris@72 191 */
Chris@72 192 double getPathCost(int i, int j);
Chris@72 193
Chris@72 194 /** Sets a value and an advance direction to the minimum cost matrix.
Chris@72 195 *
Chris@72 196 * @param i the frame number of this Matcher
Chris@72 197 * @param j the frame number of the other Matcher
Chris@72 198 * @param dir the direction from which this position is reached with
Chris@72 199 * minimum cost
Chris@72 200 * @param value the cost of the minimum cost path to set for this location
Chris@72 201 */
Chris@72 202 void setPathCost(int i, int j, Advance dir, double value);
Chris@72 203
Chris@72 204 /** Retrieves an advance direction from the matrix.
Chris@72 205 *
Chris@72 206 * @param i the frame number of this Matcher
Chris@72 207 * @param j the frame number of the other Matcher
Chris@72 208 * @return the direction from which this position is reached with
Chris@72 209 * minimum cost
Chris@72 210 */
Chris@72 211 Advance getAdvance(int i, int j);
Chris@72 212
cannam@0 213 protected:
Chris@38 214 /** Create internal structures and reset. */
cannam@0 215 void init();
cannam@0 216
Chris@38 217 /** The distXSize value has changed: resize internal buffers. */
Chris@41 218 void size();
cannam@0 219
Chris@38 220 /** Process a frequency-domain frame of audio data using the
Chris@38 221 * built-in FeatureExtractor, then calculating the distance to
Chris@38 222 * all frames stored in the otherMatcher and storing them in the
Chris@38 223 * distance matrix, and finally updating the optimal path matrix
Chris@38 224 * using the dynamic time warping algorithm.
Chris@14 225 *
Chris@14 226 * Return value is the frame (post-processed, with warping,
Chris@14 227 * rectification, and normalisation as appropriate).
Chris@23 228 *
Chris@23 229 * The Matcher must have been constructed using the constructor
Chris@23 230 * without an external featureSize parameter in order to use this
Chris@23 231 * function. (Otherwise it will be expecting you to call
Chris@23 232 * consumeFeatureVector.)
cannam@0 233 */
Chris@21 234 std::vector<double> consumeFrame(double *reBuffer, double *imBuffer);
cannam@0 235
Chris@23 236 /** Processes a feature vector frame (presumably calculated from
Chris@23 237 * audio data by some external code). As consumeFrame, except
Chris@23 238 * that it does not calculate a feature from audio data but
Chris@23 239 * instead uses the supplied feature directly.
Chris@23 240 *
Chris@23 241 * The Matcher must have been constructed using the constructor
Chris@23 242 * that accepts an external featureSize parameter in order to
Chris@23 243 * use this function. The supplied feature must be of the size
Chris@23 244 * that was passed to the constructor.
Chris@23 245 */
Chris@23 246 void consumeFeatureVector(std::vector<double> feature);
Chris@23 247
Chris@71 248 /** Updates an entry in the distance matrix and the optimal path matrix.
cannam@0 249 *
cannam@0 250 * @param i the frame number of this Matcher
cannam@0 251 * @param j the frame number of the other Matcher
cannam@0 252 * @param dir the direction from which this position is reached with
cannam@0 253 * minimum cost
cannam@0 254 * @param value the cost of the minimum path except the current step
cannam@0 255 * @param dMN the distance cost between the two frames
cannam@0 256 */
Chris@71 257 void updateValue(int i, int j, Advance dir, double value, float dMN);
cannam@0 258
Chris@21 259 void calcAdvance();
Chris@21 260
Chris@42 261 /** Points to the other performance with which this one is being
Chris@42 262 * compared. The data for the distance metric and the dynamic
Chris@42 263 * time warping is shared between the two matchers. In the
Chris@42 264 * original version, only one of the two performance matchers
Chris@42 265 * contained the distance metric. (See <code>first</code>)
Chris@42 266 */
Chris@43 267 Matcher *m_otherMatcher;
Chris@42 268
Chris@42 269 /** Indicates which performance is considered primary (the
Chris@42 270 * score). This is the performance shown on the vertical axis,
Chris@42 271 * and referred to as "this" in the codes for the direction of
Chris@42 272 * DTW steps. */
Chris@43 273 bool m_firstPM;
Chris@42 274
Chris@42 275 /** Configuration parameters */
Chris@43 276 Parameters m_params;
Chris@42 277
Chris@42 278 /** Width of the search band in FFT frames (see <code>blockTime</code>) */
Chris@43 279 int m_blockSize;
Chris@42 280
Chris@42 281 /** The number of frames of audio data which have been read. */
Chris@43 282 int m_frameCount;
Chris@42 283
Chris@42 284 /** The number of frames sequentially processed by this matcher,
Chris@42 285 * without a frame of the other matcher being processed.
Chris@42 286 */
Chris@43 287 int m_runCount;
Chris@42 288
Chris@42 289 /** The number of values in a feature vector. */
Chris@43 290 int m_featureSize;
Chris@42 291
Chris@50 292 /** A block of previously seen feature frames is stored in this
Chris@50 293 * structure for calculation of the distance matrix as the new
Chris@50 294 * frames are received. One can think of the structure of the
Chris@50 295 * array as a circular buffer of vectors. */
Chris@43 296 vector<vector<double> > m_frames;
Chris@42 297
Chris@42 298 /** The best path cost matrix. */
Chris@53 299 vector<vector<double> > m_bestPathCost;
Chris@42 300
Chris@42 301 /** The distance matrix. */
Chris@45 302 vector<vector<float> > m_distance;
Chris@42 303
Chris@45 304 /** The advance direction matrix. */
Chris@45 305 vector<vector<Advance> > m_advance;
Chris@45 306
Chris@45 307 /** The bounds of each row of data in the distance, path cost, and
Chris@45 308 * advance direction matrices.*/
Chris@43 309 vector<int> m_first;
Chris@43 310 vector<int> m_last;
Chris@42 311
Chris@45 312 /** Width of distance, path cost, and advance direction matrices
Chris@45 313 * and first and last vectors */
Chris@43 314 int m_distXSize;
Chris@42 315
Chris@43 316 bool m_initialised;
Chris@42 317
Chris@43 318 FeatureExtractor m_featureExtractor;
Chris@43 319 DistanceMetric m_metric;
Chris@26 320
cannam@0 321 friend class MatchFeeder;
Chris@24 322 friend class MatchFeatureFeeder;
Chris@72 323
cannam@0 324 }; // class Matcher
cannam@0 325
cannam@0 326 #endif