cannam@0
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
cannam@0
|
2
|
cannam@0
|
3 /*
|
cannam@0
|
4 Vamp feature extraction plugin using the MATCH audio alignment
|
cannam@0
|
5 algorithm.
|
cannam@0
|
6
|
cannam@0
|
7 Centre for Digital Music, Queen Mary, University of London.
|
Chris@236
|
8 Copyright (c) 2007-2020 Simon Dixon, Chris Cannam, and Queen Mary
|
Chris@230
|
9 University of London, Copyright (c) 2014-2015 Tido GmbH.
|
cannam@0
|
10
|
cannam@0
|
11 This program is free software; you can redistribute it and/or
|
cannam@0
|
12 modify it under the terms of the GNU General Public License as
|
cannam@0
|
13 published by the Free Software Foundation; either version 2 of the
|
cannam@0
|
14 License, or (at your option) any later version. See the file
|
cannam@0
|
15 COPYING included with this distribution for more information.
|
cannam@0
|
16 */
|
cannam@0
|
17
|
cannam@0
|
18 #include "MatchVampPlugin.h"
|
cannam@0
|
19
|
Chris@74
|
20 #include "FeatureExtractor.h"
|
cannam@0
|
21
|
cannam@0
|
22 #include <vamp/vamp.h>
|
cannam@0
|
23 #include <vamp-sdk/RealTime.h>
|
cannam@0
|
24
|
cannam@0
|
25 #include <vector>
|
cannam@0
|
26 #include <algorithm>
|
cannam@0
|
27
|
Chris@235
|
28 using std::string;
|
Chris@235
|
29
|
cannam@0
|
30 //static int extant = 0;
|
cannam@0
|
31
|
cannam@0
|
32 #ifdef _WIN32
|
cannam@0
|
33 HANDLE
|
cannam@0
|
34 MatchVampPlugin::m_serialisingMutex;
|
cannam@0
|
35 #else
|
cannam@0
|
36 pthread_mutex_t
|
cannam@0
|
37 MatchVampPlugin::m_serialisingMutex;
|
cannam@0
|
38 #endif
|
cannam@0
|
39
|
cannam@0
|
40 bool
|
cannam@0
|
41 MatchVampPlugin::m_serialisingMutexInitialised = false;
|
cannam@0
|
42
|
Chris@10
|
43 // We want to ensure our freq map / crossover bin in Matcher.cpp are
|
Chris@10
|
44 // always valid with a fixed FFT length in seconds, so must reject low
|
Chris@10
|
45 // sample rates
|
Chris@10
|
46 static float sampleRateMin = 5000.f;
|
Chris@10
|
47
|
Chris@52
|
48 static float defaultStepTime = 0.020f;
|
Chris@15
|
49
|
cannam@0
|
50 MatchVampPlugin::MatchVampPlugin(float inputSampleRate) :
|
cannam@0
|
51 Plugin(inputSampleRate),
|
Chris@52
|
52 m_stepSize(int(inputSampleRate * defaultStepTime + 0.001)),
|
Chris@15
|
53 m_stepTime(defaultStepTime),
|
Chris@16
|
54 m_blockSize(2048),
|
cannam@0
|
55 m_serialise(false),
|
cannam@0
|
56 m_begin(true),
|
Chris@17
|
57 m_locked(false),
|
Chris@138
|
58 m_smooth(false),
|
Chris@74
|
59 m_frameNo(0),
|
Chris@113
|
60 m_params(defaultStepTime),
|
Chris@113
|
61 m_defaultParams(defaultStepTime),
|
Chris@216
|
62 m_feParams(inputSampleRate),
|
Chris@223
|
63 m_defaultFeParams(44100), // parameter descriptors can't depend on samplerate
|
Chris@243
|
64 m_secondReferenceFrequency(m_defaultFeParams.referenceFrequency), // must be declared/initialised after m_defaultFeParams
|
Chris@103
|
65 m_fcParams(),
|
Chris@143
|
66 m_defaultFcParams(),
|
Chris@143
|
67 m_dParams(),
|
Chris@143
|
68 m_defaultDParams()
|
cannam@0
|
69 {
|
Chris@10
|
70 if (inputSampleRate < sampleRateMin) {
|
Chris@10
|
71 std::cerr << "MatchVampPlugin::MatchVampPlugin: input sample rate "
|
Chris@10
|
72 << inputSampleRate << " < min supported rate "
|
Chris@10
|
73 << sampleRateMin << ", plugin will refuse to initialise"
|
Chris@10
|
74 << std::endl;
|
Chris@10
|
75 }
|
Chris@10
|
76
|
cannam@0
|
77 if (!m_serialisingMutexInitialised) {
|
cannam@0
|
78 m_serialisingMutexInitialised = true;
|
cannam@0
|
79 #ifdef _WIN32
|
cannam@0
|
80 m_serialisingMutex = CreateMutex(NULL, FALSE, NULL);
|
cannam@0
|
81 #else
|
cannam@0
|
82 pthread_mutex_init(&m_serialisingMutex, 0);
|
cannam@0
|
83 #endif
|
cannam@0
|
84 }
|
cannam@0
|
85
|
Chris@107
|
86 m_pipeline = 0;
|
cannam@0
|
87 // std::cerr << "MatchVampPlugin::MatchVampPlugin(" << this << "): extant = " << ++extant << std::endl;
|
cannam@0
|
88 }
|
cannam@0
|
89
|
cannam@0
|
90 MatchVampPlugin::~MatchVampPlugin()
|
cannam@0
|
91 {
|
cannam@0
|
92 // std::cerr << "MatchVampPlugin::~MatchVampPlugin(" << this << "): extant = " << --extant << std::endl;
|
cannam@0
|
93
|
Chris@107
|
94 delete m_pipeline;
|
cannam@0
|
95
|
cannam@0
|
96 if (m_locked) {
|
cannam@0
|
97 #ifdef _WIN32
|
cannam@0
|
98 ReleaseMutex(m_serialisingMutex);
|
cannam@0
|
99 #else
|
cannam@0
|
100 pthread_mutex_unlock(&m_serialisingMutex);
|
cannam@0
|
101 #endif
|
cannam@0
|
102 m_locked = false;
|
cannam@0
|
103 }
|
cannam@0
|
104 }
|
cannam@0
|
105
|
cannam@0
|
106 string
|
cannam@0
|
107 MatchVampPlugin::getIdentifier() const
|
cannam@0
|
108 {
|
cannam@0
|
109 return "match";
|
cannam@0
|
110 }
|
cannam@0
|
111
|
cannam@0
|
112 string
|
cannam@0
|
113 MatchVampPlugin::getName() const
|
cannam@0
|
114 {
|
cannam@0
|
115 return "Match Performance Aligner";
|
cannam@0
|
116 }
|
cannam@0
|
117
|
cannam@0
|
118 string
|
cannam@0
|
119 MatchVampPlugin::getDescription() const
|
cannam@0
|
120 {
|
cannam@0
|
121 return "Calculate alignment between two performances in separate channel inputs";
|
cannam@0
|
122 }
|
cannam@0
|
123
|
cannam@0
|
124 string
|
cannam@0
|
125 MatchVampPlugin::getMaker() const
|
cannam@0
|
126 {
|
cannam@0
|
127 return "Simon Dixon (plugin by Chris Cannam)";
|
cannam@0
|
128 }
|
cannam@0
|
129
|
cannam@0
|
130 int
|
cannam@0
|
131 MatchVampPlugin::getPluginVersion() const
|
cannam@0
|
132 {
|
Chris@137
|
133 return 3;
|
cannam@0
|
134 }
|
cannam@0
|
135
|
cannam@0
|
136 string
|
cannam@0
|
137 MatchVampPlugin::getCopyright() const
|
cannam@0
|
138 {
|
cannam@0
|
139 return "GPL";
|
cannam@0
|
140 }
|
cannam@0
|
141
|
cannam@0
|
142 MatchVampPlugin::ParameterList
|
cannam@0
|
143 MatchVampPlugin::getParameterDescriptors() const
|
cannam@0
|
144 {
|
cannam@0
|
145 ParameterList list;
|
cannam@0
|
146
|
cannam@0
|
147 ParameterDescriptor desc;
|
Chris@18
|
148
|
Chris@162
|
149 desc.identifier = "freq1";
|
Chris@162
|
150 desc.name = "Tuning frequency of first input";
|
Chris@241
|
151 desc.description = "Tuning frequency (concert A) for the reference audio";
|
Chris@162
|
152 desc.minValue = 220.0;
|
Chris@162
|
153 desc.maxValue = 880.0;
|
Chris@188
|
154 desc.defaultValue = float(m_defaultFeParams.referenceFrequency);
|
Chris@162
|
155 desc.isQuantized = false;
|
Chris@162
|
156 desc.unit = "Hz";
|
Chris@162
|
157 list.push_back(desc);
|
Chris@162
|
158
|
Chris@162
|
159 desc.identifier = "freq2";
|
Chris@162
|
160 desc.name = "Tuning frequency of second input";
|
Chris@241
|
161 desc.description = "Tuning frequency (concert A) for the other audio";
|
Chris@162
|
162 desc.minValue = 220.0;
|
Chris@162
|
163 desc.maxValue = 880.0;
|
Chris@188
|
164 desc.defaultValue = float(m_defaultFeParams.referenceFrequency);
|
Chris@162
|
165 desc.isQuantized = false;
|
Chris@162
|
166 desc.unit = "Hz";
|
Chris@162
|
167 list.push_back(desc);
|
Chris@162
|
168
|
Chris@177
|
169 desc.identifier = "minfreq";
|
Chris@177
|
170 desc.name = "Minimum frequency";
|
Chris@241
|
171 desc.description = "Minimum frequency to include in features";
|
Chris@177
|
172 desc.minValue = 0.0;
|
Chris@188
|
173 desc.maxValue = float(m_inputSampleRate / 4.f);
|
Chris@188
|
174 desc.defaultValue = float(m_defaultFeParams.minFrequency);
|
Chris@177
|
175 desc.isQuantized = false;
|
Chris@177
|
176 desc.unit = "Hz";
|
Chris@177
|
177 list.push_back(desc);
|
Chris@177
|
178
|
Chris@177
|
179 desc.identifier = "maxfreq";
|
Chris@177
|
180 desc.name = "Maximum frequency";
|
Chris@241
|
181 desc.description = "Maximum frequency to include in features";
|
Chris@177
|
182 desc.minValue = 1000.0;
|
Chris@188
|
183 desc.maxValue = float(m_inputSampleRate / 2.f);
|
Chris@188
|
184 desc.defaultValue = float(m_defaultFeParams.maxFrequency);
|
Chris@177
|
185 desc.isQuantized = false;
|
Chris@177
|
186 desc.unit = "Hz";
|
Chris@177
|
187 list.push_back(desc);
|
Chris@177
|
188
|
Chris@162
|
189 desc.unit = "";
|
Chris@162
|
190
|
Chris@162
|
191 desc.identifier = "usechroma";
|
Chris@162
|
192 desc.name = "Feature type";
|
Chris@162
|
193 desc.description = "Whether to use warped spectrogram or chroma frequency map";
|
cannam@0
|
194 desc.minValue = 0;
|
cannam@0
|
195 desc.maxValue = 1;
|
Chris@162
|
196 desc.defaultValue = m_defaultFeParams.useChromaFrequencyMap ? 1 : 0;
|
Chris@162
|
197 desc.isQuantized = true;
|
Chris@162
|
198 desc.quantizeStep = 1;
|
Chris@162
|
199 desc.valueNames.clear();
|
Chris@162
|
200 desc.valueNames.push_back("Spectral");
|
Chris@162
|
201 desc.valueNames.push_back("Chroma");
|
Chris@162
|
202 list.push_back(desc);
|
Chris@162
|
203
|
Chris@162
|
204 desc.valueNames.clear();
|
Chris@162
|
205
|
Chris@162
|
206 desc.identifier = "usespecdiff";
|
Chris@162
|
207 desc.name = "Use feature difference";
|
Chris@162
|
208 desc.description = "Whether to use half-wave rectified feature-to-feature difference instead of straight spectral or chroma feature";
|
Chris@162
|
209 desc.minValue = 0;
|
Chris@162
|
210 desc.maxValue = 1;
|
Chris@188
|
211 desc.defaultValue = float(m_defaultFcParams.order);
|
cannam@0
|
212 desc.isQuantized = true;
|
cannam@0
|
213 desc.quantizeStep = 1;
|
cannam@0
|
214 list.push_back(desc);
|
cannam@0
|
215
|
Chris@18
|
216 desc.identifier = "framenorm";
|
Chris@162
|
217 desc.name = "Frame normalisation";
|
Chris@162
|
218 desc.description = "Type of normalisation to use for features";
|
Chris@18
|
219 desc.minValue = 0;
|
Chris@18
|
220 desc.maxValue = 2;
|
Chris@188
|
221 desc.defaultValue = float(m_defaultFcParams.norm);
|
Chris@18
|
222 desc.isQuantized = true;
|
Chris@18
|
223 desc.quantizeStep = 1;
|
Chris@18
|
224 desc.valueNames.clear();
|
Chris@18
|
225 desc.valueNames.push_back("None");
|
Chris@162
|
226 desc.valueNames.push_back("Sum to 1");
|
Chris@162
|
227 desc.valueNames.push_back("Long-term average");
|
Chris@18
|
228 list.push_back(desc);
|
Chris@18
|
229 desc.valueNames.clear();
|
Chris@188
|
230 desc.defaultValue = float(m_defaultFcParams.silenceThreshold);
|
Chris@151
|
231
|
Chris@156
|
232 desc.identifier = "metric";
|
Chris@156
|
233 desc.name = "Distance metric";
|
Chris@241
|
234 desc.description = "Metric for distance calculations";
|
Chris@156
|
235 desc.minValue = 0;
|
Chris@157
|
236 desc.maxValue = 2;
|
Chris@188
|
237 desc.defaultValue = float(m_defaultDParams.metric);
|
Chris@156
|
238 desc.isQuantized = true;
|
Chris@156
|
239 desc.quantizeStep = 1;
|
Chris@156
|
240 desc.valueNames.clear();
|
Chris@157
|
241 desc.valueNames.push_back("Manhattan");
|
Chris@156
|
242 desc.valueNames.push_back("Euclidean");
|
Chris@156
|
243 desc.valueNames.push_back("Cosine");
|
Chris@156
|
244 list.push_back(desc);
|
Chris@156
|
245 desc.valueNames.clear();
|
Chris@156
|
246
|
Chris@162
|
247 desc.identifier = "distnorm";
|
Chris@162
|
248 desc.name = "Distance normalisation";
|
Chris@162
|
249 desc.description = "Type of normalisation to use for distance metric";
|
Chris@162
|
250 desc.minValue = 0;
|
Chris@162
|
251 desc.maxValue = 2;
|
Chris@188
|
252 desc.defaultValue = float(m_defaultDParams.norm);
|
Chris@162
|
253 desc.isQuantized = true;
|
Chris@162
|
254 desc.quantizeStep = 1;
|
Chris@162
|
255 desc.valueNames.clear();
|
Chris@162
|
256 desc.valueNames.push_back("None");
|
Chris@162
|
257 desc.valueNames.push_back("Sum of frames");
|
Chris@162
|
258 desc.valueNames.push_back("Log sum of frames");
|
Chris@162
|
259 list.push_back(desc);
|
Chris@162
|
260 desc.valueNames.clear();
|
Chris@162
|
261
|
Chris@198
|
262 #ifdef USE_COMPACT_TYPES
|
Chris@198
|
263 desc.identifier = "scale";
|
Chris@198
|
264 desc.name = "Distance scale";
|
Chris@198
|
265 desc.description = "Scale factor to use when mapping distance metric into byte range for storage";
|
Chris@198
|
266 desc.minValue = 1;
|
Chris@198
|
267 desc.maxValue = 1000;
|
Chris@198
|
268 desc.defaultValue = float(m_defaultDParams.scale);
|
Chris@198
|
269 desc.isQuantized = false;
|
Chris@198
|
270 list.push_back(desc);
|
Chris@198
|
271 #endif
|
Chris@198
|
272
|
Chris@162
|
273 desc.identifier = "silencethreshold";
|
Chris@162
|
274 desc.name = "Silence threshold";
|
Chris@162
|
275 desc.description = "Total frame energy threshold below which a feature will be regarded as silent";
|
Chris@162
|
276 desc.minValue = 0;
|
Chris@165
|
277 desc.maxValue = 0.1f;
|
Chris@188
|
278 desc.defaultValue = float(m_defaultFcParams.silenceThreshold);
|
Chris@162
|
279 desc.isQuantized = false;
|
Chris@162
|
280 list.push_back(desc);
|
Chris@162
|
281
|
Chris@151
|
282 desc.identifier = "noise";
|
Chris@162
|
283 desc.name = "Add noise";
|
Chris@241
|
284 desc.description = "Whether to mix in a small constant white noise term when calculating feature distance. This can improve alignment against sources containing cleanly synthesised audio";
|
Chris@151
|
285 desc.minValue = 0;
|
Chris@151
|
286 desc.maxValue = 1;
|
Chris@188
|
287 desc.defaultValue = float(m_defaultDParams.noise);
|
Chris@151
|
288 desc.isQuantized = true;
|
Chris@151
|
289 desc.quantizeStep = 1;
|
Chris@151
|
290 list.push_back(desc);
|
Chris@136
|
291
|
Chris@25
|
292 desc.identifier = "gradientlimit";
|
Chris@162
|
293 desc.name = "Gradient limit";
|
Chris@18
|
294 desc.description = "Limit of number of frames that will be accepted from one source without a frame from the other source being accepted";
|
Chris@18
|
295 desc.minValue = 1;
|
Chris@18
|
296 desc.maxValue = 10;
|
Chris@188
|
297 desc.defaultValue = float(m_defaultParams.maxRunCount);
|
Chris@18
|
298 desc.isQuantized = true;
|
Chris@18
|
299 desc.quantizeStep = 1;
|
Chris@18
|
300 list.push_back(desc);
|
Chris@18
|
301
|
Chris@25
|
302 desc.identifier = "zonewidth";
|
Chris@162
|
303 desc.name = "Search zone width";
|
Chris@25
|
304 desc.description = "Width of the search zone (error margin) either side of the ongoing match position, in seconds";
|
Chris@25
|
305 desc.minValue = 1;
|
Chris@25
|
306 desc.maxValue = 60;
|
Chris@188
|
307 desc.defaultValue = float(m_defaultParams.blockTime);
|
Chris@25
|
308 desc.isQuantized = true;
|
Chris@25
|
309 desc.quantizeStep = 1;
|
Chris@25
|
310 desc.unit = "s";
|
Chris@25
|
311 list.push_back(desc);
|
Chris@25
|
312
|
Chris@83
|
313 desc.identifier = "diagonalweight";
|
Chris@162
|
314 desc.name = "Diagonal weight";
|
Chris@83
|
315 desc.description = "Weight applied to cost of diagonal step relative to horizontal or vertical step. The default of 2.0 is good for gross tracking of quite different performances; closer to 1.0 produces a smoother path for performances more similar in tempo";
|
Chris@83
|
316 desc.minValue = 1.0;
|
Chris@86
|
317 desc.maxValue = 2.0;
|
Chris@188
|
318 desc.defaultValue = float(m_defaultParams.diagonalWeight);
|
Chris@83
|
319 desc.isQuantized = false;
|
Chris@83
|
320 desc.unit = "";
|
Chris@83
|
321 list.push_back(desc);
|
Chris@83
|
322
|
Chris@32
|
323 desc.identifier = "smooth";
|
Chris@162
|
324 desc.name = "Use path smoothing";
|
Chris@138
|
325 desc.description = "Smooth the path by replacing steps with diagonals. (This was enabled by default in earlier versions of the MATCH plugin, but the default now is to produce an un-smoothed path.)";
|
Chris@32
|
326 desc.minValue = 0;
|
Chris@32
|
327 desc.maxValue = 1;
|
Chris@138
|
328 desc.defaultValue = 0;
|
Chris@32
|
329 desc.isQuantized = true;
|
Chris@32
|
330 desc.quantizeStep = 1;
|
Chris@32
|
331 desc.unit = "";
|
Chris@32
|
332 list.push_back(desc);
|
Chris@32
|
333
|
Chris@162
|
334 desc.identifier = "serialise";
|
Chris@162
|
335 desc.name = "Serialise plugin invocations";
|
Chris@162
|
336 desc.description = "Reduce potential memory load at the expense of multiprocessor performance by serialising multi-threaded plugin runs";
|
Chris@162
|
337 desc.minValue = 0;
|
Chris@162
|
338 desc.maxValue = 1;
|
Chris@162
|
339 desc.defaultValue = 0;
|
Chris@162
|
340 desc.isQuantized = true;
|
Chris@162
|
341 desc.quantizeStep = 1;
|
Chris@161
|
342 list.push_back(desc);
|
Chris@236
|
343
|
cannam@0
|
344 return list;
|
cannam@0
|
345 }
|
cannam@0
|
346
|
cannam@0
|
347 float
|
cannam@0
|
348 MatchVampPlugin::getParameter(std::string name) const
|
cannam@0
|
349 {
|
cannam@0
|
350 if (name == "serialise") {
|
cannam@0
|
351 return m_serialise ? 1.0 : 0.0;
|
Chris@18
|
352 } else if (name == "framenorm") {
|
Chris@188
|
353 return float(m_fcParams.norm);
|
Chris@18
|
354 } else if (name == "distnorm") {
|
Chris@188
|
355 return float(m_dParams.norm);
|
Chris@18
|
356 } else if (name == "usespecdiff") {
|
Chris@188
|
357 return float(m_fcParams.order);
|
Chris@18
|
358 } else if (name == "usechroma") {
|
Chris@38
|
359 return m_feParams.useChromaFrequencyMap ? 1.0 : 0.0;
|
Chris@25
|
360 } else if (name == "gradientlimit") {
|
Chris@188
|
361 return float(m_params.maxRunCount);
|
Chris@83
|
362 } else if (name == "diagonalweight") {
|
Chris@188
|
363 return float(m_params.diagonalWeight);
|
Chris@25
|
364 } else if (name == "zonewidth") {
|
Chris@188
|
365 return float(m_params.blockTime);
|
Chris@32
|
366 } else if (name == "smooth") {
|
Chris@32
|
367 return m_smooth ? 1.0 : 0.0;
|
Chris@136
|
368 } else if (name == "silencethreshold") {
|
Chris@188
|
369 return float(m_fcParams.silenceThreshold);
|
Chris@156
|
370 } else if (name == "metric") {
|
Chris@188
|
371 return float(m_dParams.metric);
|
Chris@151
|
372 } else if (name == "noise") {
|
Chris@151
|
373 return m_dParams.noise;
|
Chris@198
|
374 } else if (name == "scale") {
|
Chris@198
|
375 return float(m_dParams.scale);
|
Chris@161
|
376 } else if (name == "freq1") {
|
Chris@188
|
377 return float(m_feParams.referenceFrequency);
|
Chris@161
|
378 } else if (name == "freq2") {
|
Chris@188
|
379 return float(m_secondReferenceFrequency);
|
Chris@177
|
380 } else if (name == "minfreq") {
|
Chris@188
|
381 return float(m_feParams.minFrequency);
|
Chris@177
|
382 } else if (name == "maxfreq") {
|
Chris@188
|
383 return float(m_feParams.maxFrequency);
|
cannam@0
|
384 }
|
Chris@18
|
385
|
cannam@0
|
386 return 0.0;
|
cannam@0
|
387 }
|
cannam@0
|
388
|
cannam@0
|
389 void
|
cannam@0
|
390 MatchVampPlugin::setParameter(std::string name, float value)
|
cannam@0
|
391 {
|
cannam@0
|
392 if (name == "serialise") {
|
cannam@0
|
393 m_serialise = (value > 0.5);
|
Chris@18
|
394 } else if (name == "framenorm") {
|
Chris@188
|
395 m_fcParams.norm = FeatureConditioner::Normalisation(int(value + 0.1));
|
Chris@18
|
396 } else if (name == "distnorm") {
|
Chris@188
|
397 m_dParams.norm = DistanceMetric::DistanceNormalisation(int(value + 0.1));
|
Chris@18
|
398 } else if (name == "usespecdiff") {
|
Chris@188
|
399 m_fcParams.order = FeatureConditioner::OutputOrder(int(value + 0.1));
|
Chris@18
|
400 } else if (name == "usechroma") {
|
Chris@38
|
401 m_feParams.useChromaFrequencyMap = (value > 0.5);
|
Chris@25
|
402 } else if (name == "gradientlimit") {
|
Chris@18
|
403 m_params.maxRunCount = int(value + 0.1);
|
Chris@83
|
404 } else if (name == "diagonalweight") {
|
Chris@83
|
405 m_params.diagonalWeight = value;
|
Chris@25
|
406 } else if (name == "zonewidth") {
|
Chris@25
|
407 m_params.blockTime = value;
|
Chris@32
|
408 } else if (name == "smooth") {
|
Chris@32
|
409 m_smooth = (value > 0.5);
|
Chris@136
|
410 } else if (name == "silencethreshold") {
|
Chris@136
|
411 m_fcParams.silenceThreshold = value;
|
Chris@156
|
412 } else if (name == "metric") {
|
Chris@188
|
413 m_dParams.metric = DistanceMetric::Metric(int(value + 0.1));
|
Chris@151
|
414 } else if (name == "noise") {
|
Chris@188
|
415 m_dParams.noise = DistanceMetric::NoiseAddition(int(value + 0.1));
|
Chris@198
|
416 } else if (name == "scale") {
|
Chris@198
|
417 m_dParams.scale = value;
|
Chris@161
|
418 } else if (name == "freq1") {
|
Chris@161
|
419 m_feParams.referenceFrequency = value;
|
Chris@161
|
420 } else if (name == "freq2") {
|
Chris@161
|
421 m_secondReferenceFrequency = value;
|
Chris@177
|
422 } else if (name == "minfreq") {
|
Chris@177
|
423 m_feParams.minFrequency = value;
|
Chris@177
|
424 } else if (name == "maxfreq") {
|
Chris@177
|
425 m_feParams.maxFrequency = value;
|
cannam@0
|
426 }
|
cannam@0
|
427 }
|
cannam@0
|
428
|
cannam@0
|
429 size_t
|
cannam@0
|
430 MatchVampPlugin::getPreferredStepSize() const
|
cannam@0
|
431 {
|
Chris@52
|
432 return int(m_inputSampleRate * defaultStepTime + 0.001);
|
cannam@0
|
433 }
|
cannam@0
|
434
|
cannam@0
|
435 size_t
|
cannam@0
|
436 MatchVampPlugin::getPreferredBlockSize() const
|
cannam@0
|
437 {
|
Chris@216
|
438 return m_defaultFeParams.fftSize;
|
cannam@0
|
439 }
|
cannam@0
|
440
|
cannam@0
|
441 void
|
Chris@17
|
442 MatchVampPlugin::createMatchers()
|
cannam@0
|
443 {
|
Chris@17
|
444 m_params.hopTime = m_stepTime;
|
Chris@38
|
445 m_feParams.fftSize = m_blockSize;
|
Chris@107
|
446
|
Chris@161
|
447 m_pipeline = new MatchPipeline(m_feParams, m_fcParams, m_dParams, m_params,
|
Chris@161
|
448 m_secondReferenceFrequency);
|
cannam@0
|
449 }
|
cannam@0
|
450
|
cannam@0
|
451 bool
|
cannam@0
|
452 MatchVampPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
cannam@0
|
453 {
|
Chris@10
|
454 if (m_inputSampleRate < sampleRateMin) {
|
Chris@10
|
455 std::cerr << "MatchVampPlugin::MatchVampPlugin: input sample rate "
|
Chris@10
|
456 << m_inputSampleRate << " < min supported rate "
|
Chris@10
|
457 << sampleRateMin << std::endl;
|
Chris@10
|
458 return false;
|
Chris@10
|
459 }
|
cannam@0
|
460 if (channels < getMinChannelCount() ||
|
cannam@0
|
461 channels > getMaxChannelCount()) return false;
|
cannam@1
|
462 if (stepSize > blockSize/2 ||
|
cannam@0
|
463 blockSize != getPreferredBlockSize()) return false;
|
Chris@15
|
464
|
Chris@188
|
465 m_stepSize = int(stepSize);
|
Chris@15
|
466 m_stepTime = float(stepSize) / m_inputSampleRate;
|
Chris@188
|
467 m_blockSize = int(blockSize);
|
Chris@15
|
468
|
Chris@15
|
469 createMatchers();
|
cannam@0
|
470 m_begin = true;
|
cannam@0
|
471 m_locked = false;
|
Chris@15
|
472
|
cannam@0
|
473 return true;
|
cannam@0
|
474 }
|
cannam@0
|
475
|
cannam@0
|
476 void
|
cannam@0
|
477 MatchVampPlugin::reset()
|
cannam@0
|
478 {
|
Chris@107
|
479 delete m_pipeline;
|
Chris@107
|
480 m_pipeline = 0;
|
Chris@74
|
481 m_frameNo = 0;
|
cannam@6
|
482 createMatchers();
|
cannam@6
|
483 m_begin = true;
|
cannam@6
|
484 m_locked = false;
|
cannam@0
|
485 }
|
cannam@0
|
486
|
cannam@0
|
487 MatchVampPlugin::OutputList
|
cannam@0
|
488 MatchVampPlugin::getOutputDescriptors() const
|
cannam@0
|
489 {
|
cannam@0
|
490 OutputList list;
|
cannam@0
|
491
|
Chris@52
|
492 float outRate = 1.0f / m_stepTime;
|
cannam@0
|
493
|
cannam@0
|
494 OutputDescriptor desc;
|
cannam@0
|
495 desc.identifier = "path";
|
cannam@0
|
496 desc.name = "Path";
|
cannam@0
|
497 desc.description = "Alignment path";
|
cannam@0
|
498 desc.unit = "";
|
cannam@0
|
499 desc.hasFixedBinCount = true;
|
cannam@0
|
500 desc.binCount = 1;
|
cannam@0
|
501 desc.hasKnownExtents = false;
|
cannam@0
|
502 desc.isQuantized = true;
|
cannam@0
|
503 desc.quantizeStep = 1;
|
cannam@0
|
504 desc.sampleType = OutputDescriptor::VariableSampleRate;
|
cannam@0
|
505 desc.sampleRate = outRate;
|
Chris@180
|
506 m_pathOutNo = int(list.size());
|
cannam@0
|
507 list.push_back(desc);
|
cannam@0
|
508
|
cannam@0
|
509 desc.identifier = "a_b";
|
cannam@0
|
510 desc.name = "A-B Timeline";
|
cannam@0
|
511 desc.description = "Timing in performance B corresponding to moments in performance A";
|
cannam@0
|
512 desc.unit = "sec";
|
cannam@0
|
513 desc.hasFixedBinCount = true;
|
cannam@0
|
514 desc.binCount = 1;
|
cannam@0
|
515 desc.hasKnownExtents = false;
|
cannam@0
|
516 desc.isQuantized = false;
|
cannam@0
|
517 desc.sampleType = OutputDescriptor::VariableSampleRate;
|
cannam@0
|
518 desc.sampleRate = outRate;
|
Chris@180
|
519 m_abOutNo = int(list.size());
|
cannam@0
|
520 list.push_back(desc);
|
cannam@0
|
521
|
cannam@0
|
522 desc.identifier = "b_a";
|
cannam@0
|
523 desc.name = "B-A Timeline";
|
cannam@0
|
524 desc.description = "Timing in performance A corresponding to moments in performance B";
|
cannam@0
|
525 desc.unit = "sec";
|
cannam@0
|
526 desc.hasFixedBinCount = true;
|
cannam@0
|
527 desc.binCount = 1;
|
cannam@0
|
528 desc.hasKnownExtents = false;
|
cannam@0
|
529 desc.isQuantized = false;
|
cannam@0
|
530 desc.sampleType = OutputDescriptor::VariableSampleRate;
|
cannam@0
|
531 desc.sampleRate = outRate;
|
Chris@180
|
532 m_baOutNo = int(list.size());
|
cannam@0
|
533 list.push_back(desc);
|
cannam@0
|
534
|
cannam@0
|
535 desc.identifier = "a_b_divergence";
|
cannam@0
|
536 desc.name = "A-B Divergence";
|
cannam@0
|
537 desc.description = "Difference between timings in performances A and B";
|
cannam@0
|
538 desc.unit = "sec";
|
cannam@0
|
539 desc.hasFixedBinCount = true;
|
cannam@0
|
540 desc.binCount = 1;
|
cannam@0
|
541 desc.hasKnownExtents = false;
|
cannam@0
|
542 desc.isQuantized = false;
|
cannam@0
|
543 desc.sampleType = OutputDescriptor::VariableSampleRate;
|
cannam@0
|
544 desc.sampleRate = outRate;
|
Chris@180
|
545 m_abDivOutNo = int(list.size());
|
cannam@0
|
546 list.push_back(desc);
|
cannam@0
|
547
|
cannam@0
|
548 desc.identifier = "a_b_temporatio";
|
cannam@0
|
549 desc.name = "A-B Tempo Ratio";
|
cannam@0
|
550 desc.description = "Ratio of tempi between performances A and B";
|
cannam@0
|
551 desc.unit = "";
|
cannam@0
|
552 desc.hasFixedBinCount = true;
|
cannam@0
|
553 desc.binCount = 1;
|
cannam@0
|
554 desc.hasKnownExtents = false;
|
cannam@0
|
555 desc.isQuantized = false;
|
cannam@0
|
556 desc.sampleType = OutputDescriptor::VariableSampleRate;
|
cannam@0
|
557 desc.sampleRate = outRate;
|
Chris@180
|
558 m_abRatioOutNo = int(list.size());
|
cannam@0
|
559 list.push_back(desc);
|
cannam@0
|
560
|
Chris@38
|
561 int featureSize = FeatureExtractor(m_feParams).getFeatureSize();
|
Chris@38
|
562
|
Chris@15
|
563 desc.identifier = "a_features";
|
Chris@140
|
564 desc.name = "Raw A Features";
|
Chris@15
|
565 desc.description = "Spectral features extracted from performance A";
|
Chris@15
|
566 desc.unit = "";
|
Chris@15
|
567 desc.hasFixedBinCount = true;
|
Chris@38
|
568 desc.binCount = featureSize;
|
Chris@15
|
569 desc.hasKnownExtents = false;
|
Chris@15
|
570 desc.isQuantized = false;
|
Chris@16
|
571 desc.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@15
|
572 desc.sampleRate = outRate;
|
Chris@180
|
573 m_aFeaturesOutNo = int(list.size());
|
Chris@16
|
574 list.push_back(desc);
|
Chris@16
|
575
|
Chris@16
|
576 desc.identifier = "b_features";
|
Chris@140
|
577 desc.name = "Raw B Features";
|
Chris@16
|
578 desc.description = "Spectral features extracted from performance B";
|
Chris@16
|
579 desc.unit = "";
|
Chris@16
|
580 desc.hasFixedBinCount = true;
|
Chris@38
|
581 desc.binCount = featureSize;
|
Chris@16
|
582 desc.hasKnownExtents = false;
|
Chris@16
|
583 desc.isQuantized = false;
|
Chris@16
|
584 desc.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@16
|
585 desc.sampleRate = outRate;
|
Chris@180
|
586 m_bFeaturesOutNo = int(list.size());
|
Chris@15
|
587 list.push_back(desc);
|
Chris@15
|
588
|
Chris@140
|
589 desc.identifier = "a_cfeatures";
|
Chris@140
|
590 desc.name = "Conditioned A Features";
|
Chris@140
|
591 desc.description = "Spectral features extracted from performance A, after normalisation and conditioning";
|
Chris@140
|
592 desc.unit = "";
|
Chris@140
|
593 desc.hasFixedBinCount = true;
|
Chris@140
|
594 desc.binCount = featureSize;
|
Chris@140
|
595 desc.hasKnownExtents = false;
|
Chris@140
|
596 desc.isQuantized = false;
|
Chris@140
|
597 desc.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@140
|
598 desc.sampleRate = outRate;
|
Chris@180
|
599 m_caFeaturesOutNo = int(list.size());
|
Chris@140
|
600 list.push_back(desc);
|
Chris@140
|
601
|
Chris@140
|
602 desc.identifier = "b_cfeatures";
|
Chris@140
|
603 desc.name = "Conditioned B Features";
|
Chris@140
|
604 desc.description = "Spectral features extracted from performance B, after norrmalisation and conditioning";
|
Chris@140
|
605 desc.unit = "";
|
Chris@140
|
606 desc.hasFixedBinCount = true;
|
Chris@140
|
607 desc.binCount = featureSize;
|
Chris@140
|
608 desc.hasKnownExtents = false;
|
Chris@140
|
609 desc.isQuantized = false;
|
Chris@140
|
610 desc.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@140
|
611 desc.sampleRate = outRate;
|
Chris@180
|
612 m_cbFeaturesOutNo = int(list.size());
|
Chris@140
|
613 list.push_back(desc);
|
Chris@140
|
614
|
Chris@163
|
615 desc.identifier = "overall_cost";
|
Chris@163
|
616 desc.name = "Overall Cost";
|
Chris@163
|
617 desc.description = "Normalised overall path cost for the cheapest path";
|
Chris@163
|
618 desc.unit = "";
|
Chris@163
|
619 desc.hasFixedBinCount = true;
|
Chris@163
|
620 desc.binCount = 1;
|
Chris@163
|
621 desc.hasKnownExtents = false;
|
Chris@163
|
622 desc.isQuantized = false;
|
Chris@163
|
623 desc.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@163
|
624 desc.sampleRate = 1;
|
Chris@180
|
625 m_overallCostOutNo = int(list.size());
|
Chris@163
|
626 list.push_back(desc);
|
Chris@163
|
627
|
cannam@0
|
628 return list;
|
cannam@0
|
629 }
|
cannam@0
|
630
|
cannam@0
|
631 MatchVampPlugin::FeatureSet
|
cannam@0
|
632 MatchVampPlugin::process(const float *const *inputBuffers,
|
cannam@0
|
633 Vamp::RealTime timestamp)
|
cannam@0
|
634 {
|
cannam@0
|
635 if (m_begin) {
|
cannam@0
|
636 if (!m_locked && m_serialise) {
|
cannam@0
|
637 m_locked = true;
|
cannam@0
|
638 #ifdef _WIN32
|
cannam@0
|
639 WaitForSingleObject(m_serialisingMutex, INFINITE);
|
cannam@0
|
640 #else
|
cannam@0
|
641 pthread_mutex_lock(&m_serialisingMutex);
|
cannam@0
|
642 #endif
|
cannam@0
|
643 }
|
Chris@10
|
644 m_startTime = timestamp;
|
cannam@0
|
645 m_begin = false;
|
cannam@0
|
646 }
|
cannam@0
|
647
|
cannam@0
|
648 // std::cerr << timestamp.toString();
|
cannam@0
|
649
|
Chris@107
|
650 m_pipeline->feedFrequencyDomainAudio(inputBuffers[0], inputBuffers[1]);
|
Chris@74
|
651
|
Chris@140
|
652 FeatureSet returnFeatures;
|
Chris@140
|
653
|
Chris@185
|
654 feature_t f1, f2;
|
Chris@140
|
655 m_pipeline->extractFeatures(f1, f2);
|
Chris@16
|
656
|
Chris@185
|
657 feature_t cf1, cf2;
|
Chris@140
|
658 m_pipeline->extractConditionedFeatures(cf1, cf2);
|
Chris@16
|
659
|
Chris@16
|
660 Feature f;
|
Chris@16
|
661 f.hasTimestamp = false;
|
Chris@16
|
662
|
Chris@74
|
663 f.values.clear();
|
Chris@188
|
664 for (auto v: f1) f.values.push_back(float(v));
|
Chris@74
|
665 returnFeatures[m_aFeaturesOutNo].push_back(f);
|
Chris@16
|
666
|
Chris@74
|
667 f.values.clear();
|
Chris@188
|
668 for (auto v: f2) f.values.push_back(float(v));
|
Chris@74
|
669 returnFeatures[m_bFeaturesOutNo].push_back(f);
|
cannam@0
|
670
|
Chris@140
|
671 f.values.clear();
|
Chris@188
|
672 for (auto v: cf1) f.values.push_back(float(v));
|
Chris@140
|
673 returnFeatures[m_caFeaturesOutNo].push_back(f);
|
Chris@140
|
674
|
Chris@140
|
675 f.values.clear();
|
Chris@188
|
676 for (auto v: cf2) f.values.push_back(float(v));
|
Chris@140
|
677 returnFeatures[m_cbFeaturesOutNo].push_back(f);
|
Chris@140
|
678
|
cannam@0
|
679 // std::cerr << ".";
|
cannam@0
|
680 // std::cerr << std::endl;
|
cannam@0
|
681
|
Chris@74
|
682 ++m_frameNo;
|
Chris@74
|
683
|
Chris@16
|
684 return returnFeatures;
|
cannam@0
|
685 }
|
cannam@0
|
686
|
cannam@0
|
687 MatchVampPlugin::FeatureSet
|
cannam@0
|
688 MatchVampPlugin::getRemainingFeatures()
|
cannam@0
|
689 {
|
Chris@107
|
690 m_pipeline->finish();
|
Chris@74
|
691
|
Chris@63
|
692 FeatureSet returnFeatures;
|
Chris@63
|
693
|
cannam@0
|
694 std::vector<int> pathx;
|
cannam@0
|
695 std::vector<int> pathy;
|
Chris@155
|
696 int len = m_pipeline->retrievePath(m_smooth, pathx, pathy);
|
cannam@0
|
697
|
Chris@173
|
698 double cost = m_pipeline->getOverallCost();
|
Chris@163
|
699 Feature costFeature;
|
Chris@163
|
700 costFeature.hasTimestamp = false;
|
Chris@188
|
701 costFeature.values.push_back(float(cost));
|
Chris@163
|
702 returnFeatures[m_overallCostOutNo].push_back(costFeature);
|
Chris@163
|
703
|
cannam@0
|
704 int prevx = 0;
|
cannam@0
|
705 int prevy = 0;
|
cannam@0
|
706
|
Chris@30
|
707 for (int i = 0; i < len; ++i) {
|
cannam@0
|
708
|
cannam@0
|
709 int x = pathx[i];
|
cannam@0
|
710 int y = pathy[i];
|
cannam@0
|
711
|
cannam@0
|
712 Vamp::RealTime xt = Vamp::RealTime::frame2RealTime
|
Chris@180
|
713 (x * m_stepSize, int(m_inputSampleRate + 0.5));
|
cannam@0
|
714 Vamp::RealTime yt = Vamp::RealTime::frame2RealTime
|
Chris@180
|
715 (y * m_stepSize, int(m_inputSampleRate + 0.5));
|
cannam@0
|
716
|
cannam@0
|
717 Feature feature;
|
cannam@0
|
718 feature.hasTimestamp = true;
|
Chris@10
|
719 feature.timestamp = m_startTime + xt;
|
cannam@0
|
720 feature.values.clear();
|
Chris@52
|
721 feature.values.push_back(float(yt.sec + double(yt.nsec)/1.0e9));
|
Chris@16
|
722 returnFeatures[m_pathOutNo].push_back(feature);
|
cannam@0
|
723
|
cannam@0
|
724 if (x != prevx) {
|
cannam@0
|
725
|
cannam@0
|
726 feature.hasTimestamp = true;
|
Chris@10
|
727 feature.timestamp = m_startTime + xt;
|
cannam@0
|
728 feature.values.clear();
|
Chris@52
|
729 feature.values.push_back(float(yt.sec + yt.msec()/1000.0));
|
Chris@16
|
730 returnFeatures[m_abOutNo].push_back(feature);
|
cannam@0
|
731
|
cannam@0
|
732 Vamp::RealTime diff = yt - xt;
|
cannam@0
|
733 feature.values.clear();
|
Chris@52
|
734 feature.values.push_back(float(diff.sec + diff.msec()/1000.0));
|
Chris@16
|
735 returnFeatures[m_abDivOutNo].push_back(feature);
|
cannam@0
|
736
|
cannam@0
|
737 if (i > 0) {
|
cannam@0
|
738 int lookback = 100; //!!! arbitrary
|
cannam@0
|
739 if (lookback > i) lookback = i;
|
cannam@0
|
740 int xdiff = x - pathx[i-lookback];
|
cannam@0
|
741 int ydiff = y - pathy[i-lookback];
|
cannam@0
|
742 if (xdiff != 0 && ydiff != 0) {
|
cannam@0
|
743 float ratio = float(ydiff)/float(xdiff);
|
cannam@0
|
744 if (ratio < 8 && ratio > (1.0/8)) { //!!! just for now, since we aren't dealing properly with silence yet
|
cannam@0
|
745 feature.values.clear();
|
cannam@0
|
746 feature.values.push_back(ratio);
|
Chris@16
|
747 returnFeatures[m_abRatioOutNo].push_back(feature);
|
cannam@0
|
748 }
|
cannam@0
|
749 }
|
cannam@0
|
750 }
|
cannam@0
|
751 }
|
cannam@0
|
752
|
cannam@0
|
753 if (y != prevy) {
|
cannam@0
|
754 feature.hasTimestamp = true;
|
Chris@10
|
755 feature.timestamp = m_startTime + yt;
|
cannam@0
|
756 feature.values.clear();
|
Chris@52
|
757 feature.values.push_back(float(xt.sec + xt.msec()/1000.0));
|
Chris@16
|
758 returnFeatures[m_baOutNo].push_back(feature);
|
cannam@0
|
759 }
|
cannam@0
|
760
|
cannam@0
|
761 prevx = x;
|
cannam@0
|
762 prevy = y;
|
cannam@0
|
763 }
|
cannam@0
|
764
|
Chris@107
|
765 delete m_pipeline;
|
Chris@107
|
766 m_pipeline = 0;
|
cannam@0
|
767
|
cannam@0
|
768 if (m_locked) {
|
cannam@0
|
769 #ifdef _WIN32
|
cannam@0
|
770 ReleaseMutex(m_serialisingMutex);
|
cannam@0
|
771 #else
|
cannam@0
|
772 pthread_mutex_unlock(&m_serialisingMutex);
|
cannam@0
|
773 #endif
|
cannam@0
|
774 m_locked = false;
|
cannam@0
|
775 }
|
cannam@0
|
776
|
cannam@0
|
777 return returnFeatures;
|
cannam@0
|
778
|
cannam@0
|
779
|
cannam@0
|
780 /*
|
Chris@30
|
781 for (int i = 0; i < len; ++i) {
|
cannam@0
|
782 std::cerr << i << ": [" << pathx[i] << "," << pathy[i] << "]" << std::endl;
|
cannam@0
|
783 }
|
cannam@0
|
784
|
cannam@0
|
785 std::cerr << std::endl;
|
cannam@0
|
786 std::cerr << "File: A" << std::endl;
|
cannam@0
|
787 std::cerr << "Marks: -1" << std::endl;
|
cannam@0
|
788 std::cerr << "FixedPoints: true 0" << std::endl;
|
cannam@0
|
789 std::cerr << "0" << std::endl;
|
cannam@0
|
790 std::cerr << "0" << std::endl;
|
cannam@0
|
791 std::cerr << "0" << std::endl;
|
cannam@0
|
792 std::cerr << "0" << std::endl;
|
cannam@0
|
793 std::cerr << "File: B" << std::endl;
|
cannam@0
|
794 std::cerr << "Marks: 0" << std::endl;
|
cannam@0
|
795 std::cerr << "FixedPoints: true 0" << std::endl;
|
cannam@0
|
796 std::cerr << "0.02" << std::endl;
|
cannam@0
|
797 std::cerr << "0.02" << std::endl;
|
cannam@0
|
798
|
Chris@30
|
799 std::cerr << len << std::endl;
|
Chris@30
|
800 for (int i = 0; i < len; ++i) {
|
cannam@0
|
801 std::cerr << pathx[i] << std::endl;
|
cannam@0
|
802 }
|
cannam@0
|
803
|
Chris@30
|
804 std::cerr << len << std::endl;
|
Chris@30
|
805 for (int i = 0; i < len; ++i) {
|
cannam@0
|
806 std::cerr << pathy[i] << std::endl;
|
cannam@0
|
807 }
|
cannam@0
|
808 */
|
cannam@0
|
809 }
|