cannam@0
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
cannam@0
|
2
|
cannam@0
|
3 /*
|
cannam@0
|
4 Vamp feature extraction plugin using the MATCH audio alignment
|
cannam@0
|
5 algorithm.
|
cannam@0
|
6
|
cannam@0
|
7 Centre for Digital Music, Queen Mary, University of London.
|
cannam@0
|
8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
|
cannam@0
|
9
|
cannam@0
|
10 This program is free software; you can redistribute it and/or
|
cannam@0
|
11 modify it under the terms of the GNU General Public License as
|
cannam@0
|
12 published by the Free Software Foundation; either version 2 of the
|
cannam@0
|
13 License, or (at your option) any later version. See the file
|
cannam@0
|
14 COPYING included with this distribution for more information.
|
cannam@0
|
15 */
|
cannam@0
|
16
|
cannam@0
|
17 #ifndef _MATCHER_H_
|
cannam@0
|
18 #define _MATCHER_H_
|
cannam@0
|
19
|
cannam@0
|
20 #include <vector>
|
cannam@0
|
21 #include <iostream>
|
cannam@0
|
22 #include <sstream>
|
cannam@0
|
23 #include <cmath>
|
cannam@0
|
24
|
cannam@0
|
25 #define ADVANCE_THIS 1
|
cannam@0
|
26 #define ADVANCE_OTHER 2
|
cannam@0
|
27 #define ADVANCE_BOTH 3
|
cannam@0
|
28 #define MASK 0xfc
|
cannam@0
|
29
|
Chris@26
|
30 #include "DistanceMetric.h"
|
Chris@38
|
31 #include "FeatureExtractor.h"
|
cannam@0
|
32
|
cannam@0
|
33 using std::vector;
|
cannam@0
|
34 using std::string;
|
cannam@0
|
35 using std::cerr;
|
cannam@0
|
36 using std::endl;
|
cannam@0
|
37
|
cannam@0
|
38 /** Represents an audio stream that can be matched to another audio
|
cannam@0
|
39 * stream of the same piece of music. The matching algorithm uses
|
cannam@0
|
40 * dynamic time warping. The distance metric is a Euclidean metric
|
cannam@0
|
41 * on the FFT data with the higher frequencies mapped onto a linear
|
cannam@0
|
42 * scale.
|
cannam@0
|
43 */
|
cannam@0
|
44 class Matcher
|
cannam@0
|
45 {
|
Chris@15
|
46 public:
|
Chris@15
|
47 struct Parameters {
|
Chris@15
|
48
|
Chris@15
|
49 Parameters(float rate_, double hopTime_, int fftSize_) :
|
Chris@15
|
50 sampleRate(rate_),
|
Chris@26
|
51 distanceNorm(DistanceMetric::NormaliseDistanceToLogSum),
|
Chris@29
|
52 distanceScale(90.0),
|
Chris@15
|
53 hopTime(hopTime_),
|
Chris@15
|
54 fftSize(fftSize_),
|
Chris@15
|
55 blockTime(10.0),
|
Chris@15
|
56 maxRunCount(3)
|
Chris@15
|
57 {}
|
Chris@15
|
58
|
Chris@15
|
59 /** Sample rate of audio */
|
Chris@15
|
60 float sampleRate;
|
Chris@15
|
61
|
Chris@15
|
62 /** Type of distance metric normalisation */
|
Chris@26
|
63 DistanceMetric::DistanceNormalisation distanceNorm;
|
Chris@15
|
64
|
Chris@29
|
65 /** Scaling factor for distance metric; must guarantee that the
|
Chris@29
|
66 * final value fits in the data type used, that is, unsigned
|
Chris@29
|
67 * char.
|
Chris@29
|
68 */
|
Chris@29
|
69 double distanceScale;
|
Chris@29
|
70
|
Chris@15
|
71 /** Spacing of audio frames (determines the amount of overlap or
|
Chris@15
|
72 * skip between frames). This value is expressed in
|
Chris@15
|
73 * seconds. */
|
Chris@15
|
74 double hopTime;
|
Chris@38
|
75
|
Chris@15
|
76 /** Size of an FFT frame in samples. Note that the data passed
|
Chris@15
|
77 * in to Matcher is already in the frequency domain, so this
|
Chris@15
|
78 * expresses the size of the frame that the caller will be
|
Chris@38
|
79 * providing. */
|
Chris@15
|
80 int fftSize;
|
Chris@38
|
81
|
Chris@15
|
82 /** The width of the search band (error margin) around the current
|
Chris@15
|
83 * match position, measured in seconds. Strictly speaking the
|
Chris@15
|
84 * width is measured backwards from the current point, since the
|
Chris@15
|
85 * algorithm has to work causally.
|
Chris@15
|
86 */
|
Chris@15
|
87 double blockTime;
|
Chris@15
|
88
|
Chris@15
|
89 /** Maximum number of frames sequentially processed by this
|
Chris@15
|
90 * matcher, without a frame of the other matcher being
|
Chris@15
|
91 * processed.
|
Chris@15
|
92 */
|
Chris@15
|
93 int maxRunCount;
|
Chris@15
|
94 };
|
Chris@15
|
95
|
cannam@0
|
96 protected:
|
cannam@0
|
97 /** Points to the other performance with which this one is being
|
cannam@0
|
98 * compared. The data for the distance metric and the dynamic
|
cannam@0
|
99 * time warping is shared between the two matchers. In the
|
cannam@0
|
100 * original version, only one of the two performance matchers
|
cannam@0
|
101 * contained the distance metric. (See <code>first</code>)
|
cannam@0
|
102 */
|
cannam@0
|
103 Matcher *otherMatcher;
|
cannam@0
|
104
|
cannam@0
|
105 /** Indicates which performance is considered primary (the
|
cannam@0
|
106 * score). This is the performance shown on the vertical axis,
|
cannam@0
|
107 * and referred to as "this" in the codes for the direction of
|
cannam@0
|
108 * DTW steps. */
|
cannam@0
|
109 bool firstPM;
|
cannam@0
|
110
|
Chris@15
|
111 /** Configuration parameters */
|
Chris@15
|
112 Parameters params;
|
cannam@0
|
113
|
cannam@0
|
114 /** Width of the search band in FFT frames (see <code>blockTime</code>) */
|
cannam@0
|
115 int blockSize;
|
cannam@0
|
116
|
cannam@0
|
117 /** The number of frames of audio data which have been read. */
|
cannam@0
|
118 int frameCount;
|
cannam@0
|
119
|
cannam@0
|
120 /** The number of frames sequentially processed by this matcher,
|
cannam@0
|
121 * without a frame of the other matcher being processed.
|
cannam@0
|
122 */
|
cannam@0
|
123 int runCount;
|
cannam@0
|
124
|
Chris@38
|
125 /** The number of values in a feature vector. */
|
Chris@23
|
126 int featureSize;
|
Chris@23
|
127
|
cannam@0
|
128 /** A block of previously seen frames are stored in this structure
|
cannam@0
|
129 * for calculation of the distance matrix as the new frames are
|
cannam@0
|
130 * read in. One can think of the structure of the array as a
|
Chris@13
|
131 * circular buffer of vectors. These are the frames with all
|
Chris@13
|
132 * applicable processing applied (e.g. spectral difference,
|
Chris@13
|
133 * normalisation), unlike prevFrame and newFrame. The total
|
Chris@13
|
134 * energy of frames[i] is stored in totalEnergies[i]. */
|
cannam@0
|
135 vector<vector<double> > frames;
|
cannam@0
|
136
|
cannam@0
|
137 /** The best path cost matrix. */
|
Chris@38
|
138 vector<vector<int> > bestPathCost;
|
cannam@0
|
139
|
cannam@0
|
140 /** The distance matrix. */
|
Chris@38
|
141 vector<vector<unsigned char> > distance;
|
cannam@0
|
142
|
cannam@0
|
143 /** The bounds of each row of data in the distance and path cost matrices.*/
|
Chris@38
|
144 vector<int> first;
|
Chris@38
|
145 vector<int> last;
|
cannam@0
|
146
|
cannam@0
|
147 /** Height of each column in distance and bestPathCost matrices */
|
Chris@38
|
148 vector<int> distYSizes;
|
cannam@0
|
149
|
cannam@0
|
150 /** Width of distance and bestPathCost matrices and first and last vectors */
|
cannam@0
|
151 int distXSize;
|
cannam@0
|
152
|
cannam@0
|
153 bool initialised;
|
cannam@0
|
154
|
cannam@0
|
155 /** Disable or enable debugging output */
|
cannam@0
|
156 static bool silent;
|
cannam@0
|
157
|
cannam@0
|
158 public:
|
cannam@0
|
159 /** Constructor for Matcher.
|
cannam@0
|
160 *
|
cannam@0
|
161 * @param p The Matcher representing the performance with which
|
cannam@0
|
162 * this one is going to be matched. Some information is shared
|
cannam@0
|
163 * between the two matchers (currently one possesses the distance
|
cannam@0
|
164 * matrix and optimal path matrix).
|
cannam@0
|
165 */
|
Chris@38
|
166 Matcher(Parameters parameters,
|
Chris@38
|
167 FeatureExtractor::Parameters featureParams,
|
Chris@38
|
168 Matcher *p);
|
cannam@0
|
169
|
Chris@23
|
170 /** Constructor for Matcher using externally supplied features.
|
Chris@23
|
171 * A Matcher made using this constructor will not carry out its
|
Chris@23
|
172 * own feature extraction from frequency-domain audio data, but
|
Chris@23
|
173 * instead will accept arbitrary feature frames calculated by
|
Chris@23
|
174 * some external code.
|
Chris@23
|
175 *
|
Chris@23
|
176 * @param p The Matcher representing the performance with which
|
Chris@23
|
177 * this one is going to be matched. Some information is shared
|
Chris@23
|
178 * between the two matchers (currently one possesses the distance
|
Chris@23
|
179 * matrix and optimal path matrix).
|
Chris@23
|
180 *
|
Chris@23
|
181 * @param featureSize Number of values in each feature vector.
|
Chris@23
|
182 */
|
Chris@23
|
183 Matcher(Parameters parameters, Matcher *p, int featureSize);
|
Chris@23
|
184
|
cannam@0
|
185 ~Matcher();
|
cannam@0
|
186
|
cannam@0
|
187 /** For debugging, outputs information about the Matcher to
|
cannam@0
|
188 * standard error.
|
cannam@0
|
189 */
|
cannam@0
|
190 void print();
|
cannam@0
|
191
|
cannam@0
|
192 /** Adds a link to the Matcher object representing the performance
|
cannam@0
|
193 * which is going to be matched to this one.
|
cannam@0
|
194 *
|
cannam@0
|
195 * @param p the Matcher representing the other performance
|
cannam@0
|
196 */
|
cannam@0
|
197 void setOtherMatcher(Matcher *p) {
|
cannam@0
|
198 otherMatcher = p;
|
cannam@0
|
199 } // setOtherMatcher()
|
cannam@0
|
200
|
cannam@0
|
201 int getFrameCount() {
|
cannam@0
|
202 return frameCount;
|
cannam@0
|
203 }
|
cannam@0
|
204
|
cannam@0
|
205 protected:
|
Chris@38
|
206 /** Create internal structures and reset. */
|
cannam@0
|
207 void init();
|
cannam@0
|
208
|
Chris@38
|
209 /** The distXSize value has changed: resize internal buffers. */
|
Chris@41
|
210 void size();
|
cannam@0
|
211
|
Chris@38
|
212 /** Process a frequency-domain frame of audio data using the
|
Chris@38
|
213 * built-in FeatureExtractor, then calculating the distance to
|
Chris@38
|
214 * all frames stored in the otherMatcher and storing them in the
|
Chris@38
|
215 * distance matrix, and finally updating the optimal path matrix
|
Chris@38
|
216 * using the dynamic time warping algorithm.
|
Chris@14
|
217 *
|
Chris@14
|
218 * Return value is the frame (post-processed, with warping,
|
Chris@14
|
219 * rectification, and normalisation as appropriate).
|
Chris@23
|
220 *
|
Chris@23
|
221 * The Matcher must have been constructed using the constructor
|
Chris@23
|
222 * without an external featureSize parameter in order to use this
|
Chris@23
|
223 * function. (Otherwise it will be expecting you to call
|
Chris@23
|
224 * consumeFeatureVector.)
|
cannam@0
|
225 */
|
Chris@21
|
226 std::vector<double> consumeFrame(double *reBuffer, double *imBuffer);
|
cannam@0
|
227
|
Chris@23
|
228 /** Processes a feature vector frame (presumably calculated from
|
Chris@23
|
229 * audio data by some external code). As consumeFrame, except
|
Chris@23
|
230 * that it does not calculate a feature from audio data but
|
Chris@23
|
231 * instead uses the supplied feature directly.
|
Chris@23
|
232 *
|
Chris@23
|
233 * The Matcher must have been constructed using the constructor
|
Chris@23
|
234 * that accepts an external featureSize parameter in order to
|
Chris@23
|
235 * use this function. The supplied feature must be of the size
|
Chris@23
|
236 * that was passed to the constructor.
|
Chris@23
|
237 */
|
Chris@23
|
238 void consumeFeatureVector(std::vector<double> feature);
|
Chris@23
|
239
|
cannam@0
|
240 /** Retrieves values from the minimum cost matrix.
|
cannam@0
|
241 *
|
cannam@0
|
242 * @param i the frame number of this Matcher
|
cannam@0
|
243 * @param j the frame number of the other Matcher
|
cannam@0
|
244 * @return the cost of the minimum cost path to this location
|
cannam@0
|
245 */
|
cannam@0
|
246 int getValue(int i, int j, bool firstAttempt);
|
cannam@0
|
247
|
cannam@0
|
248 /** Stores entries in the distance matrix and the optimal path matrix.
|
cannam@0
|
249 *
|
cannam@0
|
250 * @param i the frame number of this Matcher
|
cannam@0
|
251 * @param j the frame number of the other Matcher
|
cannam@0
|
252 * @param dir the direction from which this position is reached with
|
cannam@0
|
253 * minimum cost
|
cannam@0
|
254 * @param value the cost of the minimum path except the current step
|
cannam@0
|
255 * @param dMN the distance cost between the two frames
|
cannam@0
|
256 */
|
cannam@0
|
257 void setValue(int i, int j, int dir, int value, int dMN);
|
cannam@0
|
258
|
Chris@21
|
259 void calcAdvance();
|
Chris@21
|
260
|
Chris@38
|
261 FeatureExtractor featureExtractor;
|
Chris@26
|
262 DistanceMetric metric;
|
Chris@26
|
263
|
cannam@0
|
264 friend class MatchFeeder;
|
Chris@24
|
265 friend class MatchFeatureFeeder;
|
Chris@15
|
266 friend class Finder;
|
cannam@0
|
267
|
cannam@0
|
268 }; // class Matcher
|
cannam@0
|
269
|
cannam@0
|
270 #endif
|