cannam@0
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
cannam@0
|
2
|
cannam@0
|
3 /*
|
cannam@0
|
4 Vamp feature extraction plugin using the MATCH audio alignment
|
cannam@0
|
5 algorithm.
|
cannam@0
|
6
|
cannam@0
|
7 Centre for Digital Music, Queen Mary, University of London.
|
cannam@0
|
8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
|
cannam@0
|
9
|
cannam@0
|
10 This program is free software; you can redistribute it and/or
|
cannam@0
|
11 modify it under the terms of the GNU General Public License as
|
cannam@0
|
12 published by the Free Software Foundation; either version 2 of the
|
cannam@0
|
13 License, or (at your option) any later version. See the file
|
cannam@0
|
14 COPYING included with this distribution for more information.
|
cannam@0
|
15 */
|
cannam@0
|
16
|
cannam@0
|
17 #ifndef _MATCHER_H_
|
cannam@0
|
18 #define _MATCHER_H_
|
cannam@0
|
19
|
cannam@0
|
20 #include <vector>
|
cannam@0
|
21 #include <iostream>
|
cannam@0
|
22 #include <sstream>
|
cannam@0
|
23 #include <cmath>
|
cannam@0
|
24
|
Chris@26
|
25 #include "DistanceMetric.h"
|
Chris@38
|
26 #include "FeatureExtractor.h"
|
cannam@0
|
27
|
cannam@0
|
28 using std::vector;
|
cannam@0
|
29 using std::string;
|
cannam@0
|
30 using std::cerr;
|
cannam@0
|
31 using std::endl;
|
cannam@0
|
32
|
cannam@0
|
33 /** Represents an audio stream that can be matched to another audio
|
cannam@0
|
34 * stream of the same piece of music. The matching algorithm uses
|
Chris@45
|
35 * dynamic time warping.
|
cannam@0
|
36 */
|
cannam@0
|
37 class Matcher
|
cannam@0
|
38 {
|
Chris@15
|
39 public:
|
Chris@45
|
40 enum Advance {
|
Chris@45
|
41 AdvanceNone,
|
Chris@45
|
42 AdvanceBoth,
|
Chris@45
|
43 AdvanceThis,
|
Chris@45
|
44 AdvanceOther
|
Chris@45
|
45 };
|
Chris@45
|
46
|
Chris@15
|
47 struct Parameters {
|
Chris@15
|
48
|
Chris@15
|
49 Parameters(float rate_, double hopTime_, int fftSize_) :
|
Chris@15
|
50 sampleRate(rate_),
|
Chris@26
|
51 distanceNorm(DistanceMetric::NormaliseDistanceToLogSum),
|
Chris@15
|
52 hopTime(hopTime_),
|
Chris@15
|
53 fftSize(fftSize_),
|
Chris@15
|
54 blockTime(10.0),
|
Chris@15
|
55 maxRunCount(3)
|
Chris@15
|
56 {}
|
Chris@15
|
57
|
Chris@15
|
58 /** Sample rate of audio */
|
Chris@15
|
59 float sampleRate;
|
Chris@15
|
60
|
Chris@15
|
61 /** Type of distance metric normalisation */
|
Chris@26
|
62 DistanceMetric::DistanceNormalisation distanceNorm;
|
Chris@15
|
63
|
Chris@15
|
64 /** Spacing of audio frames (determines the amount of overlap or
|
Chris@15
|
65 * skip between frames). This value is expressed in
|
Chris@15
|
66 * seconds. */
|
Chris@15
|
67 double hopTime;
|
Chris@38
|
68
|
Chris@15
|
69 /** Size of an FFT frame in samples. Note that the data passed
|
Chris@15
|
70 * in to Matcher is already in the frequency domain, so this
|
Chris@15
|
71 * expresses the size of the frame that the caller will be
|
Chris@38
|
72 * providing. */
|
Chris@15
|
73 int fftSize;
|
Chris@38
|
74
|
Chris@15
|
75 /** The width of the search band (error margin) around the current
|
Chris@15
|
76 * match position, measured in seconds. Strictly speaking the
|
Chris@15
|
77 * width is measured backwards from the current point, since the
|
Chris@15
|
78 * algorithm has to work causally.
|
Chris@15
|
79 */
|
Chris@15
|
80 double blockTime;
|
Chris@15
|
81
|
Chris@15
|
82 /** Maximum number of frames sequentially processed by this
|
Chris@15
|
83 * matcher, without a frame of the other matcher being
|
Chris@15
|
84 * processed.
|
Chris@15
|
85 */
|
Chris@15
|
86 int maxRunCount;
|
Chris@15
|
87 };
|
Chris@15
|
88
|
cannam@0
|
89 /** Constructor for Matcher.
|
cannam@0
|
90 *
|
cannam@0
|
91 * @param p The Matcher representing the performance with which
|
cannam@0
|
92 * this one is going to be matched. Some information is shared
|
cannam@0
|
93 * between the two matchers (currently one possesses the distance
|
cannam@0
|
94 * matrix and optimal path matrix).
|
cannam@0
|
95 */
|
Chris@38
|
96 Matcher(Parameters parameters,
|
Chris@38
|
97 FeatureExtractor::Parameters featureParams,
|
Chris@38
|
98 Matcher *p);
|
cannam@0
|
99
|
Chris@23
|
100 /** Constructor for Matcher using externally supplied features.
|
Chris@23
|
101 * A Matcher made using this constructor will not carry out its
|
Chris@23
|
102 * own feature extraction from frequency-domain audio data, but
|
Chris@23
|
103 * instead will accept arbitrary feature frames calculated by
|
Chris@23
|
104 * some external code.
|
Chris@23
|
105 *
|
Chris@23
|
106 * @param p The Matcher representing the performance with which
|
Chris@23
|
107 * this one is going to be matched. Some information is shared
|
Chris@23
|
108 * between the two matchers (currently one possesses the distance
|
Chris@23
|
109 * matrix and optimal path matrix).
|
Chris@23
|
110 *
|
Chris@23
|
111 * @param featureSize Number of values in each feature vector.
|
Chris@23
|
112 */
|
Chris@23
|
113 Matcher(Parameters parameters, Matcher *p, int featureSize);
|
Chris@23
|
114
|
cannam@0
|
115 ~Matcher();
|
cannam@0
|
116
|
cannam@0
|
117 /** Adds a link to the Matcher object representing the performance
|
cannam@0
|
118 * which is going to be matched to this one.
|
cannam@0
|
119 *
|
cannam@0
|
120 * @param p the Matcher representing the other performance
|
cannam@0
|
121 */
|
cannam@0
|
122 void setOtherMatcher(Matcher *p) {
|
Chris@43
|
123 m_otherMatcher = p;
|
cannam@0
|
124 } // setOtherMatcher()
|
cannam@0
|
125
|
cannam@0
|
126 int getFrameCount() {
|
Chris@43
|
127 return m_frameCount;
|
cannam@0
|
128 }
|
cannam@0
|
129
|
Chris@72
|
130 int getOtherFrameCount() {
|
Chris@72
|
131 return m_otherMatcher->getFrameCount();
|
Chris@72
|
132 }
|
Chris@72
|
133
|
Chris@72
|
134 /** Tests whether a location is in range in the minimum cost matrix.
|
Chris@72
|
135 *
|
Chris@72
|
136 * @param i the frame number of this Matcher
|
Chris@72
|
137 * @param j the frame number of the other Matcher
|
Chris@72
|
138 * @return true if the location is in range
|
Chris@72
|
139 */
|
Chris@72
|
140 bool isInRange(int i, int j);
|
Chris@72
|
141
|
Chris@72
|
142 /** Tests whether a location is available in the minimum cost matrix.
|
Chris@72
|
143 *
|
Chris@72
|
144 * @param i the frame number of this Matcher
|
Chris@72
|
145 * @param j the frame number of the other Matcher
|
Chris@72
|
146 * @return true if the location is in range and contains a valid cost
|
Chris@72
|
147 */
|
Chris@72
|
148 bool isAvailable(int i, int j);
|
Chris@72
|
149
|
Chris@72
|
150 /** Returns the valid range of frames in the other Matcher for the
|
Chris@72
|
151 * given frame in this Matcher's minimum cost matrix.
|
Chris@72
|
152 *
|
Chris@72
|
153 * @param i the frame number of this Matcher
|
Chris@72
|
154 * @return the first, last pair of frame numbers for the other
|
Chris@72
|
155 * Matcher. Note that the last frame is exclusive (last valid
|
Chris@72
|
156 * frame + 1).
|
Chris@72
|
157 */
|
Chris@72
|
158 std::pair<int, int> getColRange(int i);
|
Chris@72
|
159
|
Chris@72
|
160 /** Returns the valid range of frames in this Matcher for the
|
Chris@72
|
161 * given frame in the other Matcher's minimum cost matrix.
|
Chris@72
|
162 *
|
Chris@72
|
163 * @param i the frame number of the other Matcher
|
Chris@72
|
164 * @return the first, last pair of frame numbers for this
|
Chris@72
|
165 * Matcher. Note that the last frame is exclusive (last valid
|
Chris@72
|
166 * frame + 1).
|
Chris@72
|
167 */
|
Chris@72
|
168 std::pair<int, int> getRowRange(int i);
|
Chris@72
|
169
|
Chris@72
|
170 /** Retrieves a value from the distance matrix.
|
Chris@72
|
171 *
|
Chris@72
|
172 * @param i the frame number of this Matcher
|
Chris@72
|
173 * @param j the frame number of the other Matcher
|
Chris@72
|
174 * @return the distance metric at this location
|
Chris@72
|
175 */
|
Chris@72
|
176 float getDistance(int i, int j);
|
Chris@72
|
177
|
Chris@72
|
178 /** Sets a value to the distance matrix.
|
Chris@72
|
179 *
|
Chris@72
|
180 * @param i the frame number of this Matcher
|
Chris@72
|
181 * @param j the frame number of the other Matcher
|
Chris@72
|
182 * @param value the distance metric to set for this location
|
Chris@72
|
183 */
|
Chris@72
|
184 void setDistance(int i, int j, float value);
|
Chris@72
|
185
|
Chris@72
|
186 /** Retrieves a value from the minimum cost matrix.
|
Chris@72
|
187 *
|
Chris@72
|
188 * @param i the frame number of this Matcher
|
Chris@72
|
189 * @param j the frame number of the other Matcher
|
Chris@72
|
190 * @return the cost of the minimum cost path to this location
|
Chris@72
|
191 */
|
Chris@72
|
192 double getPathCost(int i, int j);
|
Chris@72
|
193
|
Chris@72
|
194 /** Sets a value and an advance direction to the minimum cost matrix.
|
Chris@72
|
195 *
|
Chris@72
|
196 * @param i the frame number of this Matcher
|
Chris@72
|
197 * @param j the frame number of the other Matcher
|
Chris@72
|
198 * @param dir the direction from which this position is reached with
|
Chris@72
|
199 * minimum cost
|
Chris@72
|
200 * @param value the cost of the minimum cost path to set for this location
|
Chris@72
|
201 */
|
Chris@72
|
202 void setPathCost(int i, int j, Advance dir, double value);
|
Chris@72
|
203
|
Chris@72
|
204 /** Retrieves an advance direction from the matrix.
|
Chris@72
|
205 *
|
Chris@72
|
206 * @param i the frame number of this Matcher
|
Chris@72
|
207 * @param j the frame number of the other Matcher
|
Chris@72
|
208 * @return the direction from which this position is reached with
|
Chris@72
|
209 * minimum cost
|
Chris@72
|
210 */
|
Chris@72
|
211 Advance getAdvance(int i, int j);
|
Chris@72
|
212
|
cannam@0
|
213 protected:
|
Chris@38
|
214 /** Create internal structures and reset. */
|
cannam@0
|
215 void init();
|
cannam@0
|
216
|
Chris@38
|
217 /** The distXSize value has changed: resize internal buffers. */
|
Chris@41
|
218 void size();
|
cannam@0
|
219
|
Chris@38
|
220 /** Process a frequency-domain frame of audio data using the
|
Chris@38
|
221 * built-in FeatureExtractor, then calculating the distance to
|
Chris@38
|
222 * all frames stored in the otherMatcher and storing them in the
|
Chris@38
|
223 * distance matrix, and finally updating the optimal path matrix
|
Chris@38
|
224 * using the dynamic time warping algorithm.
|
Chris@14
|
225 *
|
Chris@14
|
226 * Return value is the frame (post-processed, with warping,
|
Chris@14
|
227 * rectification, and normalisation as appropriate).
|
Chris@23
|
228 *
|
Chris@23
|
229 * The Matcher must have been constructed using the constructor
|
Chris@23
|
230 * without an external featureSize parameter in order to use this
|
Chris@23
|
231 * function. (Otherwise it will be expecting you to call
|
Chris@23
|
232 * consumeFeatureVector.)
|
cannam@0
|
233 */
|
Chris@21
|
234 std::vector<double> consumeFrame(double *reBuffer, double *imBuffer);
|
cannam@0
|
235
|
Chris@23
|
236 /** Processes a feature vector frame (presumably calculated from
|
Chris@23
|
237 * audio data by some external code). As consumeFrame, except
|
Chris@23
|
238 * that it does not calculate a feature from audio data but
|
Chris@23
|
239 * instead uses the supplied feature directly.
|
Chris@23
|
240 *
|
Chris@23
|
241 * The Matcher must have been constructed using the constructor
|
Chris@23
|
242 * that accepts an external featureSize parameter in order to
|
Chris@23
|
243 * use this function. The supplied feature must be of the size
|
Chris@23
|
244 * that was passed to the constructor.
|
Chris@23
|
245 */
|
Chris@23
|
246 void consumeFeatureVector(std::vector<double> feature);
|
Chris@23
|
247
|
Chris@71
|
248 /** Updates an entry in the distance matrix and the optimal path matrix.
|
cannam@0
|
249 *
|
cannam@0
|
250 * @param i the frame number of this Matcher
|
cannam@0
|
251 * @param j the frame number of the other Matcher
|
cannam@0
|
252 * @param dir the direction from which this position is reached with
|
cannam@0
|
253 * minimum cost
|
cannam@0
|
254 * @param value the cost of the minimum path except the current step
|
cannam@0
|
255 * @param dMN the distance cost between the two frames
|
cannam@0
|
256 */
|
Chris@71
|
257 void updateValue(int i, int j, Advance dir, double value, float dMN);
|
cannam@0
|
258
|
Chris@21
|
259 void calcAdvance();
|
Chris@21
|
260
|
Chris@42
|
261 /** Points to the other performance with which this one is being
|
Chris@42
|
262 * compared. The data for the distance metric and the dynamic
|
Chris@42
|
263 * time warping is shared between the two matchers. In the
|
Chris@42
|
264 * original version, only one of the two performance matchers
|
Chris@42
|
265 * contained the distance metric. (See <code>first</code>)
|
Chris@42
|
266 */
|
Chris@43
|
267 Matcher *m_otherMatcher;
|
Chris@42
|
268
|
Chris@42
|
269 /** Indicates which performance is considered primary (the
|
Chris@42
|
270 * score). This is the performance shown on the vertical axis,
|
Chris@42
|
271 * and referred to as "this" in the codes for the direction of
|
Chris@42
|
272 * DTW steps. */
|
Chris@43
|
273 bool m_firstPM;
|
Chris@42
|
274
|
Chris@42
|
275 /** Configuration parameters */
|
Chris@43
|
276 Parameters m_params;
|
Chris@42
|
277
|
Chris@42
|
278 /** Width of the search band in FFT frames (see <code>blockTime</code>) */
|
Chris@43
|
279 int m_blockSize;
|
Chris@42
|
280
|
Chris@42
|
281 /** The number of frames of audio data which have been read. */
|
Chris@43
|
282 int m_frameCount;
|
Chris@42
|
283
|
Chris@42
|
284 /** The number of frames sequentially processed by this matcher,
|
Chris@42
|
285 * without a frame of the other matcher being processed.
|
Chris@42
|
286 */
|
Chris@43
|
287 int m_runCount;
|
Chris@42
|
288
|
Chris@42
|
289 /** The number of values in a feature vector. */
|
Chris@43
|
290 int m_featureSize;
|
Chris@42
|
291
|
Chris@50
|
292 /** A block of previously seen feature frames is stored in this
|
Chris@50
|
293 * structure for calculation of the distance matrix as the new
|
Chris@50
|
294 * frames are received. One can think of the structure of the
|
Chris@50
|
295 * array as a circular buffer of vectors. */
|
Chris@43
|
296 vector<vector<double> > m_frames;
|
Chris@42
|
297
|
Chris@42
|
298 /** The best path cost matrix. */
|
Chris@53
|
299 vector<vector<double> > m_bestPathCost;
|
Chris@42
|
300
|
Chris@42
|
301 /** The distance matrix. */
|
Chris@45
|
302 vector<vector<float> > m_distance;
|
Chris@42
|
303
|
Chris@45
|
304 /** The advance direction matrix. */
|
Chris@45
|
305 vector<vector<Advance> > m_advance;
|
Chris@45
|
306
|
Chris@45
|
307 /** The bounds of each row of data in the distance, path cost, and
|
Chris@45
|
308 * advance direction matrices.*/
|
Chris@43
|
309 vector<int> m_first;
|
Chris@43
|
310 vector<int> m_last;
|
Chris@42
|
311
|
Chris@45
|
312 /** Width of distance, path cost, and advance direction matrices
|
Chris@45
|
313 * and first and last vectors */
|
Chris@43
|
314 int m_distXSize;
|
Chris@42
|
315
|
Chris@43
|
316 bool m_initialised;
|
Chris@42
|
317
|
Chris@43
|
318 FeatureExtractor m_featureExtractor;
|
Chris@43
|
319 DistanceMetric m_metric;
|
Chris@26
|
320
|
cannam@0
|
321 friend class MatchFeeder;
|
Chris@24
|
322 friend class MatchFeatureFeeder;
|
Chris@72
|
323
|
cannam@0
|
324 }; // class Matcher
|
cannam@0
|
325
|
cannam@0
|
326 #endif
|