cannam@0
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
cannam@0
|
2
|
cannam@0
|
3 /*
|
cannam@0
|
4 Vamp feature extraction plugin using the MATCH audio alignment
|
cannam@0
|
5 algorithm.
|
cannam@0
|
6
|
cannam@0
|
7 Centre for Digital Music, Queen Mary, University of London.
|
Chris@236
|
8 Copyright (c) 2007-2020 Simon Dixon, Chris Cannam, and Queen Mary
|
Chris@230
|
9 University of London, Copyright (c) 2014-2015 Tido GmbH.
|
cannam@0
|
10
|
cannam@0
|
11 This program is free software; you can redistribute it and/or
|
cannam@0
|
12 modify it under the terms of the GNU General Public License as
|
cannam@0
|
13 published by the Free Software Foundation; either version 2 of the
|
cannam@0
|
14 License, or (at your option) any later version. See the file
|
cannam@0
|
15 COPYING included with this distribution for more information.
|
cannam@0
|
16 */
|
cannam@0
|
17
|
Chris@181
|
18 #ifndef MATCHER_H
|
Chris@181
|
19 #define MATCHER_H
|
cannam@0
|
20
|
cannam@0
|
21 #include <vector>
|
cannam@0
|
22 #include <iostream>
|
cannam@0
|
23 #include <sstream>
|
cannam@0
|
24 #include <cmath>
|
cannam@0
|
25
|
Chris@26
|
26 #include "DistanceMetric.h"
|
Chris@187
|
27 #include "MatchTypes.h"
|
cannam@0
|
28
|
Chris@74
|
29 /** Represents an audio feature stream that can be matched to another
|
Chris@74
|
30 * audio stream of the same piece of music. The matching algorithm
|
Chris@74
|
31 * uses dynamic time warping.
|
cannam@0
|
32 */
|
cannam@0
|
33 class Matcher
|
cannam@0
|
34 {
|
Chris@15
|
35 public:
|
Chris@235
|
36 static std::string advanceToString(advance_t a) {
|
Chris@83
|
37 switch (a) {
|
Chris@83
|
38 case AdvanceNone: return "AdvanceNone";
|
Chris@83
|
39 case AdvanceBoth: return "AdvanceBoth";
|
Chris@83
|
40 case AdvanceThis: return "AdvanceThis";
|
Chris@83
|
41 case AdvanceOther: return "AdvanceOther";
|
Chris@83
|
42 }
|
Chris@83
|
43 return "(unknown)";
|
Chris@83
|
44 }
|
Chris@45
|
45
|
Chris@15
|
46 struct Parameters {
|
Chris@15
|
47
|
Chris@113
|
48 Parameters(double hopTime_) :
|
Chris@15
|
49 hopTime(hopTime_),
|
Chris@15
|
50 blockTime(10.0),
|
Chris@83
|
51 maxRunCount(3),
|
Chris@83
|
52 diagonalWeight(2.0)
|
Chris@15
|
53 {}
|
Chris@15
|
54
|
Chris@235
|
55 /** Spacing of audio frames (determines the amount of overlap
|
Chris@235
|
56 * or skip between frames). This value is expressed in
|
Chris@83
|
57 * seconds.
|
Chris@83
|
58 */
|
Chris@15
|
59 double hopTime;
|
Chris@38
|
60
|
Chris@15
|
61 /** The width of the search band (error margin) around the current
|
Chris@15
|
62 * match position, measured in seconds. Strictly speaking the
|
Chris@15
|
63 * width is measured backwards from the current point, since the
|
Chris@15
|
64 * algorithm has to work causally.
|
Chris@15
|
65 */
|
Chris@15
|
66 double blockTime;
|
Chris@15
|
67
|
Chris@15
|
68 /** Maximum number of frames sequentially processed by this
|
Chris@15
|
69 * matcher, without a frame of the other matcher being
|
Chris@15
|
70 * processed.
|
Chris@15
|
71 */
|
Chris@15
|
72 int maxRunCount;
|
Chris@83
|
73
|
Chris@83
|
74 /** Weight applied to cost of diagonal step relative to
|
Chris@83
|
75 * horizontal or vertical step. The default of 2.0 means that
|
Chris@83
|
76 * a diagonal is not favoured over horizontal+vertical
|
Chris@83
|
77 * combined, which is good when maintaining gross tracking of
|
Chris@83
|
78 * performances that may have wildly differing speeds but
|
Chris@83
|
79 * which also leads to quite jaggy paths. A more typical
|
Chris@83
|
80 * normal DTW approach for performances with similar speeds
|
Chris@83
|
81 * might use 1.0 or something close to it.
|
Chris@83
|
82 */
|
Chris@188
|
83 double diagonalWeight;
|
Chris@15
|
84 };
|
Chris@15
|
85
|
cannam@0
|
86 /** Constructor for Matcher.
|
Chris@74
|
87 *
|
Chris@74
|
88 * A Matcher expects to be provided with feature vectors
|
Chris@74
|
89 * calculated by some external code (for example, a
|
Chris@74
|
90 * FeatureExtractor). Call consumeFeatureVector to provide each
|
Chris@74
|
91 * feature frame.
|
Chris@23
|
92 *
|
Chris@23
|
93 * @param p The Matcher representing the performance with which
|
Chris@23
|
94 * this one is going to be matched. Some information is shared
|
Chris@23
|
95 * between the two matchers (currently one possesses the distance
|
Chris@23
|
96 * matrix and optimal path matrix).
|
Chris@23
|
97 */
|
Chris@143
|
98 Matcher(Parameters params, DistanceMetric::Parameters dparams, Matcher *p);
|
Chris@23
|
99
|
Chris@78
|
100 /** Destructor for Matcher.
|
Chris@78
|
101 */
|
cannam@0
|
102 ~Matcher();
|
cannam@0
|
103
|
cannam@0
|
104 /** Adds a link to the Matcher object representing the performance
|
cannam@0
|
105 * which is going to be matched to this one.
|
cannam@0
|
106 *
|
cannam@0
|
107 * @param p the Matcher representing the other performance
|
cannam@0
|
108 */
|
cannam@0
|
109 void setOtherMatcher(Matcher *p) {
|
Chris@43
|
110 m_otherMatcher = p;
|
Chris@74
|
111 }
|
cannam@0
|
112
|
Chris@78
|
113 int getBlockSize() {
|
Chris@78
|
114 return m_blockSize;
|
Chris@78
|
115 }
|
Chris@78
|
116
|
Chris@171
|
117 bool isFillingInitialBlock() {
|
Chris@171
|
118 return m_frameCount < m_blockSize;
|
Chris@171
|
119 }
|
Chris@171
|
120
|
Chris@78
|
121 bool isOverrunning() {
|
Chris@78
|
122 return m_runCount >= m_params.maxRunCount;
|
Chris@78
|
123 }
|
Chris@78
|
124
|
cannam@0
|
125 int getFrameCount() {
|
Chris@43
|
126 return m_frameCount;
|
cannam@0
|
127 }
|
cannam@0
|
128
|
Chris@72
|
129 int getOtherFrameCount() {
|
Chris@72
|
130 return m_otherMatcher->getFrameCount();
|
Chris@72
|
131 }
|
Chris@74
|
132
|
Chris@188
|
133 double getDiagonalWeight() {
|
Chris@83
|
134 return m_params.diagonalWeight;
|
Chris@83
|
135 }
|
Chris@83
|
136
|
Chris@74
|
137 /** Processes a feature vector frame, presumably calculated from
|
Chris@74
|
138 * audio data by some external code such as a FeatureExtractor.
|
Chris@74
|
139 * Calculates the distance to all frames stored in the
|
Chris@74
|
140 * otherMatcher and stores in the distance matrix, before
|
Chris@74
|
141 * updating the optimal path matrix using the dynamic time
|
Chris@74
|
142 * warping algorithm.
|
Chris@74
|
143 *
|
Chris@104
|
144 * The supplied features must always be of the same size (within
|
Chris@104
|
145 * any pair of Matcher objects).
|
Chris@74
|
146 */
|
Chris@182
|
147 void consumeFeatureVector(const feature_t &feature);
|
Chris@72
|
148
|
Chris@72
|
149 /** Tests whether a location is in range in the minimum cost matrix.
|
Chris@72
|
150 *
|
Chris@72
|
151 * @param i the frame number of this Matcher
|
Chris@72
|
152 * @param j the frame number of the other Matcher
|
Chris@72
|
153 * @return true if the location is in range
|
Chris@72
|
154 */
|
Chris@72
|
155 bool isInRange(int i, int j);
|
Chris@203
|
156
|
Chris@203
|
157 /** Tests whether a location is available in the minimum cost
|
Chris@203
|
158 * matrix, that is, whether it is in range and contains a valid
|
Chris@203
|
159 * cost value. Note this and its associated isRowAvailable,
|
Chris@203
|
160 * isColAvailable checks are more expensive than isInRange and
|
Chris@203
|
161 * are really intended for error checking. (If a row is in range,
|
Chris@203
|
162 * it should always be available.)
|
Chris@203
|
163 *
|
Chris@203
|
164 * @param i the frame number of this Matcher
|
Chris@203
|
165 * @param j the frame number of the other Matcher
|
Chris@203
|
166 * @return true if the location is in range and contains a valid cost
|
Chris@203
|
167 */
|
Chris@203
|
168 bool isAvailable(int i, int j);
|
Chris@154
|
169
|
Chris@154
|
170 /** Tests whether any locations in the given row are available.
|
Chris@154
|
171 */
|
Chris@154
|
172 bool isRowAvailable(int i);
|
Chris@154
|
173
|
Chris@154
|
174 /** Tests whether any locations in the given column are available.
|
Chris@154
|
175 */
|
Chris@154
|
176 bool isColAvailable(int i);
|
Chris@72
|
177
|
Chris@154
|
178 /** Returns the valid range of columns for the given row, that is,
|
Chris@154
|
179 * the range of frames in the other Matcher for the given frame
|
Chris@154
|
180 * in this Matcher's minimum cost matrix.
|
Chris@72
|
181 *
|
Chris@72
|
182 * @param i the frame number of this Matcher
|
Chris@72
|
183 * @return the first, last pair of frame numbers for the other
|
Chris@72
|
184 * Matcher. Note that the last frame is exclusive (last valid
|
Chris@72
|
185 * frame + 1).
|
Chris@72
|
186 */
|
Chris@205
|
187 std::pair<int, int> getColRangeForRow(int i);
|
Chris@72
|
188
|
Chris@154
|
189 /** Returns the valid range of rows for the given column, that is,
|
Chris@154
|
190 * the range of frames in this Matcher for the given frame in the
|
Chris@154
|
191 * other Matcher's minimum cost matrix.
|
Chris@72
|
192 *
|
Chris@72
|
193 * @param i the frame number of the other Matcher
|
Chris@72
|
194 * @return the first, last pair of frame numbers for this
|
Chris@72
|
195 * Matcher. Note that the last frame is exclusive (last valid
|
Chris@72
|
196 * frame + 1).
|
Chris@72
|
197 */
|
Chris@205
|
198 std::pair<int, int> getRowRangeForCol(int i);
|
Chris@72
|
199
|
Chris@72
|
200 /** Retrieves a value from the distance matrix.
|
Chris@72
|
201 *
|
Chris@72
|
202 * @param i the frame number of this Matcher
|
Chris@72
|
203 * @param j the frame number of the other Matcher
|
Chris@72
|
204 * @return the distance metric at this location
|
Chris@72
|
205 */
|
Chris@182
|
206 distance_t getDistance(int i, int j);
|
Chris@72
|
207
|
Chris@72
|
208 /** Sets a value to the distance matrix.
|
Chris@72
|
209 *
|
Chris@72
|
210 * @param i the frame number of this Matcher
|
Chris@72
|
211 * @param j the frame number of the other Matcher
|
Chris@72
|
212 * @param value the distance metric to set for this location
|
Chris@72
|
213 */
|
Chris@182
|
214 void setDistance(int i, int j, distance_t distance);
|
Chris@72
|
215
|
Chris@72
|
216 /** Retrieves a value from the minimum cost matrix.
|
Chris@72
|
217 *
|
Chris@72
|
218 * @param i the frame number of this Matcher
|
Chris@72
|
219 * @param j the frame number of the other Matcher
|
Chris@72
|
220 * @return the cost of the minimum cost path to this location
|
Chris@72
|
221 */
|
Chris@182
|
222 pathcost_t getPathCost(int i, int j);
|
Chris@72
|
223
|
Chris@72
|
224 /** Sets a value and an advance direction to the minimum cost matrix.
|
Chris@72
|
225 *
|
Chris@72
|
226 * @param i the frame number of this Matcher
|
Chris@72
|
227 * @param j the frame number of the other Matcher
|
Chris@72
|
228 * @param dir the direction from which this position is reached with
|
Chris@72
|
229 * minimum cost
|
Chris@72
|
230 * @param value the cost of the minimum cost path to set for this location
|
Chris@72
|
231 */
|
Chris@182
|
232 void setPathCost(int i, int j, advance_t dir, pathcost_t value);
|
Chris@135
|
233
|
Chris@135
|
234 /** Retrieves a value from the minimum cost matrix, normalised for
|
Chris@135
|
235 * path length.
|
Chris@135
|
236 *
|
Chris@135
|
237 * @param i the frame number of this Matcher
|
Chris@135
|
238 * @param j the frame number of the other Matcher
|
Chris@135
|
239 * @return the cost of the minimum cost path to this location,
|
Chris@135
|
240 * normalised by the Manhattan distance from 0,0 to i,j
|
Chris@135
|
241 */
|
Chris@191
|
242 normpathcost_t getNormalisedPathCost(int i, int j);
|
Chris@72
|
243
|
Chris@72
|
244 /** Retrieves an advance direction from the matrix.
|
Chris@72
|
245 *
|
Chris@72
|
246 * @param i the frame number of this Matcher
|
Chris@72
|
247 * @param j the frame number of the other Matcher
|
Chris@72
|
248 * @return the direction from which this position is reached with
|
Chris@72
|
249 * minimum cost
|
Chris@72
|
250 */
|
Chris@181
|
251 advance_t getAdvance(int i, int j);
|
Chris@202
|
252
|
Chris@232
|
253 struct MemoryStats {
|
Chris@232
|
254 double features_k;
|
Chris@232
|
255 double pathcosts_k;
|
Chris@232
|
256 double distances_k;
|
Chris@232
|
257 double advances_k;
|
Chris@232
|
258 double total_k() const {
|
Chris@232
|
259 return features_k + pathcosts_k + distances_k + advances_k;
|
Chris@232
|
260 }
|
Chris@232
|
261 };
|
Chris@232
|
262
|
Chris@232
|
263 /** Obtain some stats about memory consumption.
|
Chris@232
|
264 */
|
Chris@232
|
265 MemoryStats getMemoryStats() const;
|
Chris@232
|
266
|
Chris@202
|
267 /** Print some stats about memory consumption etc to stderr.
|
Chris@202
|
268 */
|
Chris@202
|
269 void printStats();
|
Chris@72
|
270
|
cannam@0
|
271 protected:
|
Chris@38
|
272 /** Create internal structures and reset. */
|
cannam@0
|
273 void init();
|
cannam@0
|
274
|
Chris@38
|
275 /** The distXSize value has changed: resize internal buffers. */
|
Chris@41
|
276 void size();
|
cannam@0
|
277
|
Chris@71
|
278 /** Updates an entry in the distance matrix and the optimal path matrix.
|
cannam@0
|
279 *
|
cannam@0
|
280 * @param i the frame number of this Matcher
|
cannam@0
|
281 * @param j the frame number of the other Matcher
|
cannam@0
|
282 * @param dir the direction from which this position is reached with
|
cannam@0
|
283 * minimum cost
|
cannam@0
|
284 * @param value the cost of the minimum path except the current step
|
cannam@0
|
285 * @param dMN the distance cost between the two frames
|
cannam@0
|
286 */
|
Chris@182
|
287 void updateValue(int i, int j, advance_t dir, pathcost_t value, distance_t dMN);
|
cannam@0
|
288
|
Chris@21
|
289 void calcAdvance();
|
Chris@21
|
290
|
Chris@211
|
291 /**
|
Chris@211
|
292 * Add the given distance increment to the given path cost, and
|
Chris@211
|
293 * return the result clipped (if necessary) at MaxPathCost.
|
Chris@211
|
294 */
|
Chris@211
|
295 pathcost_t addToCost(pathcost_t cost, pathcost_t increment);
|
Chris@211
|
296
|
Chris@42
|
297 /** Points to the other performance with which this one is being
|
Chris@211
|
298 * compared. (See <code>m_firstPM</code>)
|
Chris@42
|
299 */
|
Chris@43
|
300 Matcher *m_otherMatcher;
|
Chris@42
|
301
|
Chris@42
|
302 /** Indicates which performance is considered primary (the
|
Chris@42
|
303 * score). This is the performance shown on the vertical axis,
|
Chris@42
|
304 * and referred to as "this" in the codes for the direction of
|
Chris@42
|
305 * DTW steps. */
|
Chris@43
|
306 bool m_firstPM;
|
Chris@42
|
307
|
Chris@42
|
308 /** Configuration parameters */
|
Chris@43
|
309 Parameters m_params;
|
Chris@42
|
310
|
Chris@42
|
311 /** Width of the search band in FFT frames (see <code>blockTime</code>) */
|
Chris@43
|
312 int m_blockSize;
|
Chris@42
|
313
|
Chris@42
|
314 /** The number of frames of audio data which have been read. */
|
Chris@43
|
315 int m_frameCount;
|
Chris@42
|
316
|
Chris@42
|
317 /** The number of frames sequentially processed by this matcher,
|
Chris@42
|
318 * without a frame of the other matcher being processed.
|
Chris@42
|
319 */
|
Chris@43
|
320 int m_runCount;
|
Chris@42
|
321
|
Chris@50
|
322 /** A block of previously seen feature frames is stored in this
|
Chris@50
|
323 * structure for calculation of the distance matrix as the new
|
Chris@50
|
324 * frames are received. One can think of the structure of the
|
Chris@50
|
325 * array as a circular buffer of vectors. */
|
Chris@182
|
326 featureseq_t m_features;
|
Chris@42
|
327
|
Chris@42
|
328 /** The best path cost matrix. */
|
Chris@182
|
329 pathcostmat_t m_bestPathCost;
|
Chris@42
|
330
|
Chris@42
|
331 /** The distance matrix. */
|
Chris@182
|
332 distancemat_t m_distance;
|
Chris@42
|
333
|
Chris@45
|
334 /** The advance direction matrix. */
|
Chris@182
|
335 advancemat_t m_advance;
|
Chris@45
|
336
|
Chris@45
|
337 /** The bounds of each row of data in the distance, path cost, and
|
Chris@45
|
338 * advance direction matrices.*/
|
Chris@235
|
339 std::vector<int> m_first;
|
Chris@235
|
340 std::vector<int> m_last;
|
Chris@235
|
341
|
Chris@45
|
342 /** Width of distance, path cost, and advance direction matrices
|
Chris@45
|
343 * and first and last vectors */
|
Chris@43
|
344 int m_distXSize;
|
Chris@42
|
345
|
Chris@43
|
346 bool m_initialised;
|
Chris@42
|
347
|
Chris@43
|
348 DistanceMetric m_metric;
|
Chris@78
|
349 };
|
cannam@0
|
350
|
Chris@232
|
351 inline Matcher::MemoryStats operator+(const Matcher::MemoryStats &a,
|
Chris@232
|
352 const Matcher::MemoryStats &b)
|
Chris@232
|
353 {
|
Chris@232
|
354 Matcher::MemoryStats m = a;
|
Chris@232
|
355 m.features_k += b.features_k;
|
Chris@232
|
356 m.pathcosts_k += b.pathcosts_k;
|
Chris@232
|
357 m.distances_k += b.distances_k;
|
Chris@232
|
358 m.advances_k += b.advances_k;
|
Chris@232
|
359 return m;
|
Chris@232
|
360 }
|
Chris@232
|
361
|
cannam@0
|
362 #endif
|