cannam@0
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
cannam@0
|
2
|
cannam@0
|
3 /*
|
cannam@0
|
4 Vamp feature extraction plugin using the MATCH audio alignment
|
cannam@0
|
5 algorithm.
|
cannam@0
|
6
|
cannam@0
|
7 Centre for Digital Music, Queen Mary, University of London.
|
cannam@0
|
8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
|
cannam@0
|
9
|
cannam@0
|
10 This program is free software; you can redistribute it and/or
|
cannam@0
|
11 modify it under the terms of the GNU General Public License as
|
cannam@0
|
12 published by the Free Software Foundation; either version 2 of the
|
cannam@0
|
13 License, or (at your option) any later version. See the file
|
cannam@0
|
14 COPYING included with this distribution for more information.
|
cannam@0
|
15 */
|
cannam@0
|
16
|
cannam@0
|
17 #ifndef _MATCHER_H_
|
cannam@0
|
18 #define _MATCHER_H_
|
cannam@0
|
19
|
cannam@0
|
20 #include <vector>
|
cannam@0
|
21 #include <iostream>
|
cannam@0
|
22 #include <sstream>
|
cannam@0
|
23 #include <cmath>
|
cannam@0
|
24
|
Chris@26
|
25 #include "DistanceMetric.h"
|
cannam@0
|
26
|
cannam@0
|
27 using std::vector;
|
cannam@0
|
28 using std::string;
|
cannam@0
|
29 using std::cerr;
|
cannam@0
|
30 using std::endl;
|
cannam@0
|
31
|
Chris@74
|
32 /** Represents an audio feature stream that can be matched to another
|
Chris@74
|
33 * audio stream of the same piece of music. The matching algorithm
|
Chris@74
|
34 * uses dynamic time warping.
|
cannam@0
|
35 */
|
cannam@0
|
36 class Matcher
|
cannam@0
|
37 {
|
Chris@15
|
38 public:
|
Chris@45
|
39 enum Advance {
|
Chris@45
|
40 AdvanceNone,
|
Chris@45
|
41 AdvanceBoth,
|
Chris@45
|
42 AdvanceThis,
|
Chris@45
|
43 AdvanceOther
|
Chris@45
|
44 };
|
Chris@83
|
45 static string advanceToString(Advance a) {
|
Chris@83
|
46 switch (a) {
|
Chris@83
|
47 case AdvanceNone: return "AdvanceNone";
|
Chris@83
|
48 case AdvanceBoth: return "AdvanceBoth";
|
Chris@83
|
49 case AdvanceThis: return "AdvanceThis";
|
Chris@83
|
50 case AdvanceOther: return "AdvanceOther";
|
Chris@83
|
51 }
|
Chris@83
|
52 return "(unknown)";
|
Chris@83
|
53 }
|
Chris@45
|
54
|
Chris@15
|
55 struct Parameters {
|
Chris@15
|
56
|
Chris@15
|
57 Parameters(float rate_, double hopTime_, int fftSize_) :
|
Chris@15
|
58 sampleRate(rate_),
|
Chris@26
|
59 distanceNorm(DistanceMetric::NormaliseDistanceToLogSum),
|
Chris@15
|
60 hopTime(hopTime_),
|
Chris@15
|
61 fftSize(fftSize_),
|
Chris@15
|
62 blockTime(10.0),
|
Chris@83
|
63 maxRunCount(3),
|
Chris@83
|
64 diagonalWeight(2.0)
|
Chris@15
|
65 {}
|
Chris@15
|
66
|
Chris@15
|
67 /** Sample rate of audio */
|
Chris@15
|
68 float sampleRate;
|
Chris@15
|
69
|
Chris@15
|
70 /** Type of distance metric normalisation */
|
Chris@26
|
71 DistanceMetric::DistanceNormalisation distanceNorm;
|
Chris@15
|
72
|
Chris@15
|
73 /** Spacing of audio frames (determines the amount of overlap or
|
Chris@15
|
74 * skip between frames). This value is expressed in
|
Chris@83
|
75 * seconds.
|
Chris@83
|
76 */
|
Chris@15
|
77 double hopTime;
|
Chris@38
|
78
|
Chris@15
|
79 /** Size of an FFT frame in samples. Note that the data passed
|
Chris@15
|
80 * in to Matcher is already in the frequency domain, so this
|
Chris@15
|
81 * expresses the size of the frame that the caller will be
|
Chris@83
|
82 * providing.
|
Chris@83
|
83 */
|
Chris@15
|
84 int fftSize;
|
Chris@38
|
85
|
Chris@15
|
86 /** The width of the search band (error margin) around the current
|
Chris@15
|
87 * match position, measured in seconds. Strictly speaking the
|
Chris@15
|
88 * width is measured backwards from the current point, since the
|
Chris@15
|
89 * algorithm has to work causally.
|
Chris@15
|
90 */
|
Chris@15
|
91 double blockTime;
|
Chris@15
|
92
|
Chris@15
|
93 /** Maximum number of frames sequentially processed by this
|
Chris@15
|
94 * matcher, without a frame of the other matcher being
|
Chris@15
|
95 * processed.
|
Chris@15
|
96 */
|
Chris@15
|
97 int maxRunCount;
|
Chris@83
|
98
|
Chris@83
|
99 /** Weight applied to cost of diagonal step relative to
|
Chris@83
|
100 * horizontal or vertical step. The default of 2.0 means that
|
Chris@83
|
101 * a diagonal is not favoured over horizontal+vertical
|
Chris@83
|
102 * combined, which is good when maintaining gross tracking of
|
Chris@83
|
103 * performances that may have wildly differing speeds but
|
Chris@83
|
104 * which also leads to quite jaggy paths. A more typical
|
Chris@83
|
105 * normal DTW approach for performances with similar speeds
|
Chris@83
|
106 * might use 1.0 or something close to it.
|
Chris@83
|
107 */
|
Chris@83
|
108 float diagonalWeight;
|
Chris@15
|
109 };
|
Chris@15
|
110
|
cannam@0
|
111 /** Constructor for Matcher.
|
Chris@74
|
112 *
|
Chris@74
|
113 * A Matcher expects to be provided with feature vectors
|
Chris@74
|
114 * calculated by some external code (for example, a
|
Chris@74
|
115 * FeatureExtractor). Call consumeFeatureVector to provide each
|
Chris@74
|
116 * feature frame.
|
Chris@23
|
117 *
|
Chris@23
|
118 * @param p The Matcher representing the performance with which
|
Chris@23
|
119 * this one is going to be matched. Some information is shared
|
Chris@23
|
120 * between the two matchers (currently one possesses the distance
|
Chris@23
|
121 * matrix and optimal path matrix).
|
Chris@23
|
122 *
|
Chris@74
|
123 * @param featureSize Number of values in each of the feature
|
Chris@74
|
124 * vectors that will be provided.
|
Chris@23
|
125 */
|
Chris@23
|
126 Matcher(Parameters parameters, Matcher *p, int featureSize);
|
Chris@23
|
127
|
Chris@78
|
128 /** Destructor for Matcher.
|
Chris@78
|
129 */
|
cannam@0
|
130 ~Matcher();
|
cannam@0
|
131
|
cannam@0
|
132 /** Adds a link to the Matcher object representing the performance
|
cannam@0
|
133 * which is going to be matched to this one.
|
cannam@0
|
134 *
|
cannam@0
|
135 * @param p the Matcher representing the other performance
|
cannam@0
|
136 */
|
cannam@0
|
137 void setOtherMatcher(Matcher *p) {
|
Chris@43
|
138 m_otherMatcher = p;
|
Chris@74
|
139 }
|
cannam@0
|
140
|
Chris@78
|
141 int getBlockSize() {
|
Chris@78
|
142 return m_blockSize;
|
Chris@78
|
143 }
|
Chris@78
|
144
|
Chris@78
|
145 bool isOverrunning() {
|
Chris@78
|
146 return m_runCount >= m_params.maxRunCount;
|
Chris@78
|
147 }
|
Chris@78
|
148
|
cannam@0
|
149 int getFrameCount() {
|
Chris@43
|
150 return m_frameCount;
|
cannam@0
|
151 }
|
cannam@0
|
152
|
Chris@72
|
153 int getOtherFrameCount() {
|
Chris@72
|
154 return m_otherMatcher->getFrameCount();
|
Chris@72
|
155 }
|
Chris@74
|
156
|
Chris@83
|
157 float getDiagonalWeight() {
|
Chris@83
|
158 return m_params.diagonalWeight;
|
Chris@83
|
159 }
|
Chris@83
|
160
|
Chris@74
|
161 /** Processes a feature vector frame, presumably calculated from
|
Chris@74
|
162 * audio data by some external code such as a FeatureExtractor.
|
Chris@74
|
163 * Calculates the distance to all frames stored in the
|
Chris@74
|
164 * otherMatcher and stores in the distance matrix, before
|
Chris@74
|
165 * updating the optimal path matrix using the dynamic time
|
Chris@74
|
166 * warping algorithm.
|
Chris@74
|
167 *
|
Chris@74
|
168 * The supplied feature must be of the size that was passed as
|
Chris@74
|
169 * featureSize to the constructor.
|
Chris@74
|
170 */
|
Chris@74
|
171 void consumeFeatureVector(std::vector<double> feature);
|
Chris@72
|
172
|
Chris@72
|
173 /** Tests whether a location is in range in the minimum cost matrix.
|
Chris@72
|
174 *
|
Chris@72
|
175 * @param i the frame number of this Matcher
|
Chris@72
|
176 * @param j the frame number of the other Matcher
|
Chris@72
|
177 * @return true if the location is in range
|
Chris@72
|
178 */
|
Chris@72
|
179 bool isInRange(int i, int j);
|
Chris@72
|
180
|
Chris@72
|
181 /** Tests whether a location is available in the minimum cost matrix.
|
Chris@72
|
182 *
|
Chris@72
|
183 * @param i the frame number of this Matcher
|
Chris@72
|
184 * @param j the frame number of the other Matcher
|
Chris@72
|
185 * @return true if the location is in range and contains a valid cost
|
Chris@72
|
186 */
|
Chris@72
|
187 bool isAvailable(int i, int j);
|
Chris@72
|
188
|
Chris@72
|
189 /** Returns the valid range of frames in the other Matcher for the
|
Chris@72
|
190 * given frame in this Matcher's minimum cost matrix.
|
Chris@72
|
191 *
|
Chris@72
|
192 * @param i the frame number of this Matcher
|
Chris@72
|
193 * @return the first, last pair of frame numbers for the other
|
Chris@72
|
194 * Matcher. Note that the last frame is exclusive (last valid
|
Chris@72
|
195 * frame + 1).
|
Chris@72
|
196 */
|
Chris@72
|
197 std::pair<int, int> getColRange(int i);
|
Chris@72
|
198
|
Chris@72
|
199 /** Returns the valid range of frames in this Matcher for the
|
Chris@72
|
200 * given frame in the other Matcher's minimum cost matrix.
|
Chris@72
|
201 *
|
Chris@72
|
202 * @param i the frame number of the other Matcher
|
Chris@72
|
203 * @return the first, last pair of frame numbers for this
|
Chris@72
|
204 * Matcher. Note that the last frame is exclusive (last valid
|
Chris@72
|
205 * frame + 1).
|
Chris@72
|
206 */
|
Chris@72
|
207 std::pair<int, int> getRowRange(int i);
|
Chris@72
|
208
|
Chris@72
|
209 /** Retrieves a value from the distance matrix.
|
Chris@72
|
210 *
|
Chris@72
|
211 * @param i the frame number of this Matcher
|
Chris@72
|
212 * @param j the frame number of the other Matcher
|
Chris@72
|
213 * @return the distance metric at this location
|
Chris@72
|
214 */
|
Chris@72
|
215 float getDistance(int i, int j);
|
Chris@72
|
216
|
Chris@72
|
217 /** Sets a value to the distance matrix.
|
Chris@72
|
218 *
|
Chris@72
|
219 * @param i the frame number of this Matcher
|
Chris@72
|
220 * @param j the frame number of the other Matcher
|
Chris@72
|
221 * @param value the distance metric to set for this location
|
Chris@72
|
222 */
|
Chris@96
|
223 void setDistance(int i, int j, float distance);
|
Chris@72
|
224
|
Chris@72
|
225 /** Retrieves a value from the minimum cost matrix.
|
Chris@72
|
226 *
|
Chris@72
|
227 * @param i the frame number of this Matcher
|
Chris@72
|
228 * @param j the frame number of the other Matcher
|
Chris@72
|
229 * @return the cost of the minimum cost path to this location
|
Chris@72
|
230 */
|
Chris@72
|
231 double getPathCost(int i, int j);
|
Chris@72
|
232
|
Chris@72
|
233 /** Sets a value and an advance direction to the minimum cost matrix.
|
Chris@72
|
234 *
|
Chris@72
|
235 * @param i the frame number of this Matcher
|
Chris@72
|
236 * @param j the frame number of the other Matcher
|
Chris@72
|
237 * @param dir the direction from which this position is reached with
|
Chris@72
|
238 * minimum cost
|
Chris@72
|
239 * @param value the cost of the minimum cost path to set for this location
|
Chris@72
|
240 */
|
Chris@72
|
241 void setPathCost(int i, int j, Advance dir, double value);
|
Chris@72
|
242
|
Chris@72
|
243 /** Retrieves an advance direction from the matrix.
|
Chris@72
|
244 *
|
Chris@72
|
245 * @param i the frame number of this Matcher
|
Chris@72
|
246 * @param j the frame number of the other Matcher
|
Chris@72
|
247 * @return the direction from which this position is reached with
|
Chris@72
|
248 * minimum cost
|
Chris@72
|
249 */
|
Chris@72
|
250 Advance getAdvance(int i, int j);
|
Chris@72
|
251
|
cannam@0
|
252 protected:
|
Chris@38
|
253 /** Create internal structures and reset. */
|
cannam@0
|
254 void init();
|
cannam@0
|
255
|
Chris@38
|
256 /** The distXSize value has changed: resize internal buffers. */
|
Chris@41
|
257 void size();
|
cannam@0
|
258
|
Chris@71
|
259 /** Updates an entry in the distance matrix and the optimal path matrix.
|
cannam@0
|
260 *
|
cannam@0
|
261 * @param i the frame number of this Matcher
|
cannam@0
|
262 * @param j the frame number of the other Matcher
|
cannam@0
|
263 * @param dir the direction from which this position is reached with
|
cannam@0
|
264 * minimum cost
|
cannam@0
|
265 * @param value the cost of the minimum path except the current step
|
cannam@0
|
266 * @param dMN the distance cost between the two frames
|
cannam@0
|
267 */
|
Chris@71
|
268 void updateValue(int i, int j, Advance dir, double value, float dMN);
|
cannam@0
|
269
|
Chris@21
|
270 void calcAdvance();
|
Chris@21
|
271
|
Chris@42
|
272 /** Points to the other performance with which this one is being
|
Chris@42
|
273 * compared. The data for the distance metric and the dynamic
|
Chris@42
|
274 * time warping is shared between the two matchers. In the
|
Chris@42
|
275 * original version, only one of the two performance matchers
|
Chris@42
|
276 * contained the distance metric. (See <code>first</code>)
|
Chris@42
|
277 */
|
Chris@43
|
278 Matcher *m_otherMatcher;
|
Chris@42
|
279
|
Chris@42
|
280 /** Indicates which performance is considered primary (the
|
Chris@42
|
281 * score). This is the performance shown on the vertical axis,
|
Chris@42
|
282 * and referred to as "this" in the codes for the direction of
|
Chris@42
|
283 * DTW steps. */
|
Chris@43
|
284 bool m_firstPM;
|
Chris@42
|
285
|
Chris@42
|
286 /** Configuration parameters */
|
Chris@43
|
287 Parameters m_params;
|
Chris@42
|
288
|
Chris@42
|
289 /** Width of the search band in FFT frames (see <code>blockTime</code>) */
|
Chris@43
|
290 int m_blockSize;
|
Chris@42
|
291
|
Chris@42
|
292 /** The number of frames of audio data which have been read. */
|
Chris@43
|
293 int m_frameCount;
|
Chris@42
|
294
|
Chris@42
|
295 /** The number of frames sequentially processed by this matcher,
|
Chris@42
|
296 * without a frame of the other matcher being processed.
|
Chris@42
|
297 */
|
Chris@43
|
298 int m_runCount;
|
Chris@42
|
299
|
Chris@42
|
300 /** The number of values in a feature vector. */
|
Chris@43
|
301 int m_featureSize;
|
Chris@42
|
302
|
Chris@50
|
303 /** A block of previously seen feature frames is stored in this
|
Chris@50
|
304 * structure for calculation of the distance matrix as the new
|
Chris@50
|
305 * frames are received. One can think of the structure of the
|
Chris@50
|
306 * array as a circular buffer of vectors. */
|
Chris@43
|
307 vector<vector<double> > m_frames;
|
Chris@42
|
308
|
Chris@42
|
309 /** The best path cost matrix. */
|
Chris@53
|
310 vector<vector<double> > m_bestPathCost;
|
Chris@42
|
311
|
Chris@42
|
312 /** The distance matrix. */
|
Chris@45
|
313 vector<vector<float> > m_distance;
|
Chris@42
|
314
|
Chris@45
|
315 /** The advance direction matrix. */
|
Chris@45
|
316 vector<vector<Advance> > m_advance;
|
Chris@45
|
317
|
Chris@45
|
318 /** The bounds of each row of data in the distance, path cost, and
|
Chris@45
|
319 * advance direction matrices.*/
|
Chris@43
|
320 vector<int> m_first;
|
Chris@43
|
321 vector<int> m_last;
|
Chris@42
|
322
|
Chris@45
|
323 /** Width of distance, path cost, and advance direction matrices
|
Chris@45
|
324 * and first and last vectors */
|
Chris@43
|
325 int m_distXSize;
|
Chris@42
|
326
|
Chris@43
|
327 bool m_initialised;
|
Chris@42
|
328
|
Chris@43
|
329 DistanceMetric m_metric;
|
Chris@78
|
330 };
|
cannam@0
|
331
|
cannam@0
|
332 #endif
|