annotate src/Matcher.h @ 41:9aec2304b9f6 refactors

Fix to resizing logic. Regression test passes.
author Chris Cannam
date Thu, 13 Nov 2014 13:19:53 +0000
parents 8cce4e13ede3
children 8dd16749c3c3
rev   line source
cannam@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@0 2
cannam@0 3 /*
cannam@0 4 Vamp feature extraction plugin using the MATCH audio alignment
cannam@0 5 algorithm.
cannam@0 6
cannam@0 7 Centre for Digital Music, Queen Mary, University of London.
cannam@0 8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
cannam@0 9
cannam@0 10 This program is free software; you can redistribute it and/or
cannam@0 11 modify it under the terms of the GNU General Public License as
cannam@0 12 published by the Free Software Foundation; either version 2 of the
cannam@0 13 License, or (at your option) any later version. See the file
cannam@0 14 COPYING included with this distribution for more information.
cannam@0 15 */
cannam@0 16
cannam@0 17 #ifndef _MATCHER_H_
cannam@0 18 #define _MATCHER_H_
cannam@0 19
cannam@0 20 #include <vector>
cannam@0 21 #include <iostream>
cannam@0 22 #include <sstream>
cannam@0 23 #include <cmath>
cannam@0 24
cannam@0 25 #define ADVANCE_THIS 1
cannam@0 26 #define ADVANCE_OTHER 2
cannam@0 27 #define ADVANCE_BOTH 3
cannam@0 28 #define MASK 0xfc
cannam@0 29
Chris@26 30 #include "DistanceMetric.h"
Chris@38 31 #include "FeatureExtractor.h"
cannam@0 32
cannam@0 33 using std::vector;
cannam@0 34 using std::string;
cannam@0 35 using std::cerr;
cannam@0 36 using std::endl;
cannam@0 37
cannam@0 38 /** Represents an audio stream that can be matched to another audio
cannam@0 39 * stream of the same piece of music. The matching algorithm uses
cannam@0 40 * dynamic time warping. The distance metric is a Euclidean metric
cannam@0 41 * on the FFT data with the higher frequencies mapped onto a linear
cannam@0 42 * scale.
cannam@0 43 */
cannam@0 44 class Matcher
cannam@0 45 {
Chris@15 46 public:
Chris@15 47 struct Parameters {
Chris@15 48
Chris@15 49 Parameters(float rate_, double hopTime_, int fftSize_) :
Chris@15 50 sampleRate(rate_),
Chris@26 51 distanceNorm(DistanceMetric::NormaliseDistanceToLogSum),
Chris@29 52 distanceScale(90.0),
Chris@15 53 hopTime(hopTime_),
Chris@15 54 fftSize(fftSize_),
Chris@15 55 blockTime(10.0),
Chris@15 56 maxRunCount(3)
Chris@15 57 {}
Chris@15 58
Chris@15 59 /** Sample rate of audio */
Chris@15 60 float sampleRate;
Chris@15 61
Chris@15 62 /** Type of distance metric normalisation */
Chris@26 63 DistanceMetric::DistanceNormalisation distanceNorm;
Chris@15 64
Chris@29 65 /** Scaling factor for distance metric; must guarantee that the
Chris@29 66 * final value fits in the data type used, that is, unsigned
Chris@29 67 * char.
Chris@29 68 */
Chris@29 69 double distanceScale;
Chris@29 70
Chris@15 71 /** Spacing of audio frames (determines the amount of overlap or
Chris@15 72 * skip between frames). This value is expressed in
Chris@15 73 * seconds. */
Chris@15 74 double hopTime;
Chris@38 75
Chris@15 76 /** Size of an FFT frame in samples. Note that the data passed
Chris@15 77 * in to Matcher is already in the frequency domain, so this
Chris@15 78 * expresses the size of the frame that the caller will be
Chris@38 79 * providing. */
Chris@15 80 int fftSize;
Chris@38 81
Chris@15 82 /** The width of the search band (error margin) around the current
Chris@15 83 * match position, measured in seconds. Strictly speaking the
Chris@15 84 * width is measured backwards from the current point, since the
Chris@15 85 * algorithm has to work causally.
Chris@15 86 */
Chris@15 87 double blockTime;
Chris@15 88
Chris@15 89 /** Maximum number of frames sequentially processed by this
Chris@15 90 * matcher, without a frame of the other matcher being
Chris@15 91 * processed.
Chris@15 92 */
Chris@15 93 int maxRunCount;
Chris@15 94 };
Chris@15 95
cannam@0 96 protected:
cannam@0 97 /** Points to the other performance with which this one is being
cannam@0 98 * compared. The data for the distance metric and the dynamic
cannam@0 99 * time warping is shared between the two matchers. In the
cannam@0 100 * original version, only one of the two performance matchers
cannam@0 101 * contained the distance metric. (See <code>first</code>)
cannam@0 102 */
cannam@0 103 Matcher *otherMatcher;
cannam@0 104
cannam@0 105 /** Indicates which performance is considered primary (the
cannam@0 106 * score). This is the performance shown on the vertical axis,
cannam@0 107 * and referred to as "this" in the codes for the direction of
cannam@0 108 * DTW steps. */
cannam@0 109 bool firstPM;
cannam@0 110
Chris@15 111 /** Configuration parameters */
Chris@15 112 Parameters params;
cannam@0 113
cannam@0 114 /** Width of the search band in FFT frames (see <code>blockTime</code>) */
cannam@0 115 int blockSize;
cannam@0 116
cannam@0 117 /** The number of frames of audio data which have been read. */
cannam@0 118 int frameCount;
cannam@0 119
cannam@0 120 /** The number of frames sequentially processed by this matcher,
cannam@0 121 * without a frame of the other matcher being processed.
cannam@0 122 */
cannam@0 123 int runCount;
cannam@0 124
Chris@38 125 /** The number of values in a feature vector. */
Chris@23 126 int featureSize;
Chris@23 127
cannam@0 128 /** A block of previously seen frames are stored in this structure
cannam@0 129 * for calculation of the distance matrix as the new frames are
cannam@0 130 * read in. One can think of the structure of the array as a
Chris@13 131 * circular buffer of vectors. These are the frames with all
Chris@13 132 * applicable processing applied (e.g. spectral difference,
Chris@13 133 * normalisation), unlike prevFrame and newFrame. The total
Chris@13 134 * energy of frames[i] is stored in totalEnergies[i]. */
cannam@0 135 vector<vector<double> > frames;
cannam@0 136
cannam@0 137 /** The best path cost matrix. */
Chris@38 138 vector<vector<int> > bestPathCost;
cannam@0 139
cannam@0 140 /** The distance matrix. */
Chris@38 141 vector<vector<unsigned char> > distance;
cannam@0 142
cannam@0 143 /** The bounds of each row of data in the distance and path cost matrices.*/
Chris@38 144 vector<int> first;
Chris@38 145 vector<int> last;
cannam@0 146
cannam@0 147 /** Height of each column in distance and bestPathCost matrices */
Chris@38 148 vector<int> distYSizes;
cannam@0 149
cannam@0 150 /** Width of distance and bestPathCost matrices and first and last vectors */
cannam@0 151 int distXSize;
cannam@0 152
cannam@0 153 bool initialised;
cannam@0 154
cannam@0 155 /** Disable or enable debugging output */
cannam@0 156 static bool silent;
cannam@0 157
cannam@0 158 public:
cannam@0 159 /** Constructor for Matcher.
cannam@0 160 *
cannam@0 161 * @param p The Matcher representing the performance with which
cannam@0 162 * this one is going to be matched. Some information is shared
cannam@0 163 * between the two matchers (currently one possesses the distance
cannam@0 164 * matrix and optimal path matrix).
cannam@0 165 */
Chris@38 166 Matcher(Parameters parameters,
Chris@38 167 FeatureExtractor::Parameters featureParams,
Chris@38 168 Matcher *p);
cannam@0 169
Chris@23 170 /** Constructor for Matcher using externally supplied features.
Chris@23 171 * A Matcher made using this constructor will not carry out its
Chris@23 172 * own feature extraction from frequency-domain audio data, but
Chris@23 173 * instead will accept arbitrary feature frames calculated by
Chris@23 174 * some external code.
Chris@23 175 *
Chris@23 176 * @param p The Matcher representing the performance with which
Chris@23 177 * this one is going to be matched. Some information is shared
Chris@23 178 * between the two matchers (currently one possesses the distance
Chris@23 179 * matrix and optimal path matrix).
Chris@23 180 *
Chris@23 181 * @param featureSize Number of values in each feature vector.
Chris@23 182 */
Chris@23 183 Matcher(Parameters parameters, Matcher *p, int featureSize);
Chris@23 184
cannam@0 185 ~Matcher();
cannam@0 186
cannam@0 187 /** For debugging, outputs information about the Matcher to
cannam@0 188 * standard error.
cannam@0 189 */
cannam@0 190 void print();
cannam@0 191
cannam@0 192 /** Adds a link to the Matcher object representing the performance
cannam@0 193 * which is going to be matched to this one.
cannam@0 194 *
cannam@0 195 * @param p the Matcher representing the other performance
cannam@0 196 */
cannam@0 197 void setOtherMatcher(Matcher *p) {
cannam@0 198 otherMatcher = p;
cannam@0 199 } // setOtherMatcher()
cannam@0 200
cannam@0 201 int getFrameCount() {
cannam@0 202 return frameCount;
cannam@0 203 }
cannam@0 204
cannam@0 205 protected:
Chris@38 206 /** Create internal structures and reset. */
cannam@0 207 void init();
cannam@0 208
Chris@38 209 /** The distXSize value has changed: resize internal buffers. */
Chris@41 210 void size();
cannam@0 211
Chris@38 212 /** Process a frequency-domain frame of audio data using the
Chris@38 213 * built-in FeatureExtractor, then calculating the distance to
Chris@38 214 * all frames stored in the otherMatcher and storing them in the
Chris@38 215 * distance matrix, and finally updating the optimal path matrix
Chris@38 216 * using the dynamic time warping algorithm.
Chris@14 217 *
Chris@14 218 * Return value is the frame (post-processed, with warping,
Chris@14 219 * rectification, and normalisation as appropriate).
Chris@23 220 *
Chris@23 221 * The Matcher must have been constructed using the constructor
Chris@23 222 * without an external featureSize parameter in order to use this
Chris@23 223 * function. (Otherwise it will be expecting you to call
Chris@23 224 * consumeFeatureVector.)
cannam@0 225 */
Chris@21 226 std::vector<double> consumeFrame(double *reBuffer, double *imBuffer);
cannam@0 227
Chris@23 228 /** Processes a feature vector frame (presumably calculated from
Chris@23 229 * audio data by some external code). As consumeFrame, except
Chris@23 230 * that it does not calculate a feature from audio data but
Chris@23 231 * instead uses the supplied feature directly.
Chris@23 232 *
Chris@23 233 * The Matcher must have been constructed using the constructor
Chris@23 234 * that accepts an external featureSize parameter in order to
Chris@23 235 * use this function. The supplied feature must be of the size
Chris@23 236 * that was passed to the constructor.
Chris@23 237 */
Chris@23 238 void consumeFeatureVector(std::vector<double> feature);
Chris@23 239
cannam@0 240 /** Retrieves values from the minimum cost matrix.
cannam@0 241 *
cannam@0 242 * @param i the frame number of this Matcher
cannam@0 243 * @param j the frame number of the other Matcher
cannam@0 244 * @return the cost of the minimum cost path to this location
cannam@0 245 */
cannam@0 246 int getValue(int i, int j, bool firstAttempt);
cannam@0 247
cannam@0 248 /** Stores entries in the distance matrix and the optimal path matrix.
cannam@0 249 *
cannam@0 250 * @param i the frame number of this Matcher
cannam@0 251 * @param j the frame number of the other Matcher
cannam@0 252 * @param dir the direction from which this position is reached with
cannam@0 253 * minimum cost
cannam@0 254 * @param value the cost of the minimum path except the current step
cannam@0 255 * @param dMN the distance cost between the two frames
cannam@0 256 */
cannam@0 257 void setValue(int i, int j, int dir, int value, int dMN);
cannam@0 258
Chris@21 259 void calcAdvance();
Chris@21 260
Chris@38 261 FeatureExtractor featureExtractor;
Chris@26 262 DistanceMetric metric;
Chris@26 263
cannam@0 264 friend class MatchFeeder;
Chris@24 265 friend class MatchFeatureFeeder;
Chris@15 266 friend class Finder;
cannam@0 267
cannam@0 268 }; // class Matcher
cannam@0 269
cannam@0 270 #endif