annotate src/MatchVampPlugin.cpp @ 246:aac9ad4064ea subsequence tip

Fix incorrect handling of silent tail in the non-subsequence MATCH phase; some debug output changes
author Chris Cannam
date Fri, 24 Jul 2020 14:29:55 +0100
parents f68277668ad4
children
rev   line source
cannam@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@0 2
cannam@0 3 /*
cannam@0 4 Vamp feature extraction plugin using the MATCH audio alignment
cannam@0 5 algorithm.
cannam@0 6
cannam@0 7 Centre for Digital Music, Queen Mary, University of London.
Chris@236 8 Copyright (c) 2007-2020 Simon Dixon, Chris Cannam, and Queen Mary
Chris@230 9 University of London, Copyright (c) 2014-2015 Tido GmbH.
cannam@0 10
cannam@0 11 This program is free software; you can redistribute it and/or
cannam@0 12 modify it under the terms of the GNU General Public License as
cannam@0 13 published by the Free Software Foundation; either version 2 of the
cannam@0 14 License, or (at your option) any later version. See the file
cannam@0 15 COPYING included with this distribution for more information.
cannam@0 16 */
cannam@0 17
cannam@0 18 #include "MatchVampPlugin.h"
cannam@0 19
Chris@74 20 #include "FeatureExtractor.h"
cannam@0 21
cannam@0 22 #include <vamp/vamp.h>
cannam@0 23 #include <vamp-sdk/RealTime.h>
cannam@0 24
cannam@0 25 #include <vector>
cannam@0 26 #include <algorithm>
cannam@0 27
Chris@235 28 using std::string;
Chris@235 29
cannam@0 30 //static int extant = 0;
cannam@0 31
cannam@0 32 #ifdef _WIN32
cannam@0 33 HANDLE
cannam@0 34 MatchVampPlugin::m_serialisingMutex;
cannam@0 35 #else
cannam@0 36 pthread_mutex_t
cannam@0 37 MatchVampPlugin::m_serialisingMutex;
cannam@0 38 #endif
cannam@0 39
cannam@0 40 bool
cannam@0 41 MatchVampPlugin::m_serialisingMutexInitialised = false;
cannam@0 42
Chris@10 43 // We want to ensure our freq map / crossover bin in Matcher.cpp are
Chris@10 44 // always valid with a fixed FFT length in seconds, so must reject low
Chris@10 45 // sample rates
Chris@10 46 static float sampleRateMin = 5000.f;
Chris@10 47
Chris@52 48 static float defaultStepTime = 0.020f;
Chris@15 49
cannam@0 50 MatchVampPlugin::MatchVampPlugin(float inputSampleRate) :
cannam@0 51 Plugin(inputSampleRate),
Chris@52 52 m_stepSize(int(inputSampleRate * defaultStepTime + 0.001)),
Chris@15 53 m_stepTime(defaultStepTime),
Chris@16 54 m_blockSize(2048),
cannam@0 55 m_serialise(false),
cannam@0 56 m_begin(true),
Chris@17 57 m_locked(false),
Chris@138 58 m_smooth(false),
Chris@74 59 m_frameNo(0),
Chris@113 60 m_params(defaultStepTime),
Chris@113 61 m_defaultParams(defaultStepTime),
Chris@216 62 m_feParams(inputSampleRate),
Chris@223 63 m_defaultFeParams(44100), // parameter descriptors can't depend on samplerate
Chris@243 64 m_secondReferenceFrequency(m_defaultFeParams.referenceFrequency), // must be declared/initialised after m_defaultFeParams
Chris@103 65 m_fcParams(),
Chris@143 66 m_defaultFcParams(),
Chris@143 67 m_dParams(),
Chris@143 68 m_defaultDParams()
cannam@0 69 {
Chris@10 70 if (inputSampleRate < sampleRateMin) {
Chris@10 71 std::cerr << "MatchVampPlugin::MatchVampPlugin: input sample rate "
Chris@10 72 << inputSampleRate << " < min supported rate "
Chris@10 73 << sampleRateMin << ", plugin will refuse to initialise"
Chris@10 74 << std::endl;
Chris@10 75 }
Chris@10 76
cannam@0 77 if (!m_serialisingMutexInitialised) {
cannam@0 78 m_serialisingMutexInitialised = true;
cannam@0 79 #ifdef _WIN32
cannam@0 80 m_serialisingMutex = CreateMutex(NULL, FALSE, NULL);
cannam@0 81 #else
cannam@0 82 pthread_mutex_init(&m_serialisingMutex, 0);
cannam@0 83 #endif
cannam@0 84 }
cannam@0 85
Chris@107 86 m_pipeline = 0;
cannam@0 87 // std::cerr << "MatchVampPlugin::MatchVampPlugin(" << this << "): extant = " << ++extant << std::endl;
cannam@0 88 }
cannam@0 89
cannam@0 90 MatchVampPlugin::~MatchVampPlugin()
cannam@0 91 {
cannam@0 92 // std::cerr << "MatchVampPlugin::~MatchVampPlugin(" << this << "): extant = " << --extant << std::endl;
cannam@0 93
Chris@107 94 delete m_pipeline;
cannam@0 95
cannam@0 96 if (m_locked) {
cannam@0 97 #ifdef _WIN32
cannam@0 98 ReleaseMutex(m_serialisingMutex);
cannam@0 99 #else
cannam@0 100 pthread_mutex_unlock(&m_serialisingMutex);
cannam@0 101 #endif
cannam@0 102 m_locked = false;
cannam@0 103 }
cannam@0 104 }
cannam@0 105
cannam@0 106 string
cannam@0 107 MatchVampPlugin::getIdentifier() const
cannam@0 108 {
cannam@0 109 return "match";
cannam@0 110 }
cannam@0 111
cannam@0 112 string
cannam@0 113 MatchVampPlugin::getName() const
cannam@0 114 {
cannam@0 115 return "Match Performance Aligner";
cannam@0 116 }
cannam@0 117
cannam@0 118 string
cannam@0 119 MatchVampPlugin::getDescription() const
cannam@0 120 {
cannam@0 121 return "Calculate alignment between two performances in separate channel inputs";
cannam@0 122 }
cannam@0 123
cannam@0 124 string
cannam@0 125 MatchVampPlugin::getMaker() const
cannam@0 126 {
cannam@0 127 return "Simon Dixon (plugin by Chris Cannam)";
cannam@0 128 }
cannam@0 129
cannam@0 130 int
cannam@0 131 MatchVampPlugin::getPluginVersion() const
cannam@0 132 {
Chris@137 133 return 3;
cannam@0 134 }
cannam@0 135
cannam@0 136 string
cannam@0 137 MatchVampPlugin::getCopyright() const
cannam@0 138 {
cannam@0 139 return "GPL";
cannam@0 140 }
cannam@0 141
cannam@0 142 MatchVampPlugin::ParameterList
cannam@0 143 MatchVampPlugin::getParameterDescriptors() const
cannam@0 144 {
cannam@0 145 ParameterList list;
cannam@0 146
cannam@0 147 ParameterDescriptor desc;
Chris@18 148
Chris@162 149 desc.identifier = "freq1";
Chris@162 150 desc.name = "Tuning frequency of first input";
Chris@241 151 desc.description = "Tuning frequency (concert A) for the reference audio";
Chris@162 152 desc.minValue = 220.0;
Chris@162 153 desc.maxValue = 880.0;
Chris@188 154 desc.defaultValue = float(m_defaultFeParams.referenceFrequency);
Chris@162 155 desc.isQuantized = false;
Chris@162 156 desc.unit = "Hz";
Chris@162 157 list.push_back(desc);
Chris@162 158
Chris@162 159 desc.identifier = "freq2";
Chris@162 160 desc.name = "Tuning frequency of second input";
Chris@241 161 desc.description = "Tuning frequency (concert A) for the other audio";
Chris@162 162 desc.minValue = 220.0;
Chris@162 163 desc.maxValue = 880.0;
Chris@188 164 desc.defaultValue = float(m_defaultFeParams.referenceFrequency);
Chris@162 165 desc.isQuantized = false;
Chris@162 166 desc.unit = "Hz";
Chris@162 167 list.push_back(desc);
Chris@162 168
Chris@177 169 desc.identifier = "minfreq";
Chris@177 170 desc.name = "Minimum frequency";
Chris@241 171 desc.description = "Minimum frequency to include in features";
Chris@177 172 desc.minValue = 0.0;
Chris@188 173 desc.maxValue = float(m_inputSampleRate / 4.f);
Chris@188 174 desc.defaultValue = float(m_defaultFeParams.minFrequency);
Chris@177 175 desc.isQuantized = false;
Chris@177 176 desc.unit = "Hz";
Chris@177 177 list.push_back(desc);
Chris@177 178
Chris@177 179 desc.identifier = "maxfreq";
Chris@177 180 desc.name = "Maximum frequency";
Chris@241 181 desc.description = "Maximum frequency to include in features";
Chris@177 182 desc.minValue = 1000.0;
Chris@188 183 desc.maxValue = float(m_inputSampleRate / 2.f);
Chris@188 184 desc.defaultValue = float(m_defaultFeParams.maxFrequency);
Chris@177 185 desc.isQuantized = false;
Chris@177 186 desc.unit = "Hz";
Chris@177 187 list.push_back(desc);
Chris@177 188
Chris@162 189 desc.unit = "";
Chris@162 190
Chris@162 191 desc.identifier = "usechroma";
Chris@162 192 desc.name = "Feature type";
Chris@162 193 desc.description = "Whether to use warped spectrogram or chroma frequency map";
cannam@0 194 desc.minValue = 0;
cannam@0 195 desc.maxValue = 1;
Chris@162 196 desc.defaultValue = m_defaultFeParams.useChromaFrequencyMap ? 1 : 0;
Chris@162 197 desc.isQuantized = true;
Chris@162 198 desc.quantizeStep = 1;
Chris@162 199 desc.valueNames.clear();
Chris@162 200 desc.valueNames.push_back("Spectral");
Chris@162 201 desc.valueNames.push_back("Chroma");
Chris@162 202 list.push_back(desc);
Chris@162 203
Chris@162 204 desc.valueNames.clear();
Chris@162 205
Chris@162 206 desc.identifier = "usespecdiff";
Chris@162 207 desc.name = "Use feature difference";
Chris@162 208 desc.description = "Whether to use half-wave rectified feature-to-feature difference instead of straight spectral or chroma feature";
Chris@162 209 desc.minValue = 0;
Chris@162 210 desc.maxValue = 1;
Chris@188 211 desc.defaultValue = float(m_defaultFcParams.order);
cannam@0 212 desc.isQuantized = true;
cannam@0 213 desc.quantizeStep = 1;
cannam@0 214 list.push_back(desc);
cannam@0 215
Chris@18 216 desc.identifier = "framenorm";
Chris@162 217 desc.name = "Frame normalisation";
Chris@162 218 desc.description = "Type of normalisation to use for features";
Chris@18 219 desc.minValue = 0;
Chris@18 220 desc.maxValue = 2;
Chris@188 221 desc.defaultValue = float(m_defaultFcParams.norm);
Chris@18 222 desc.isQuantized = true;
Chris@18 223 desc.quantizeStep = 1;
Chris@18 224 desc.valueNames.clear();
Chris@18 225 desc.valueNames.push_back("None");
Chris@162 226 desc.valueNames.push_back("Sum to 1");
Chris@162 227 desc.valueNames.push_back("Long-term average");
Chris@18 228 list.push_back(desc);
Chris@18 229 desc.valueNames.clear();
Chris@188 230 desc.defaultValue = float(m_defaultFcParams.silenceThreshold);
Chris@151 231
Chris@156 232 desc.identifier = "metric";
Chris@156 233 desc.name = "Distance metric";
Chris@241 234 desc.description = "Metric for distance calculations";
Chris@156 235 desc.minValue = 0;
Chris@157 236 desc.maxValue = 2;
Chris@188 237 desc.defaultValue = float(m_defaultDParams.metric);
Chris@156 238 desc.isQuantized = true;
Chris@156 239 desc.quantizeStep = 1;
Chris@156 240 desc.valueNames.clear();
Chris@157 241 desc.valueNames.push_back("Manhattan");
Chris@156 242 desc.valueNames.push_back("Euclidean");
Chris@156 243 desc.valueNames.push_back("Cosine");
Chris@156 244 list.push_back(desc);
Chris@156 245 desc.valueNames.clear();
Chris@156 246
Chris@162 247 desc.identifier = "distnorm";
Chris@162 248 desc.name = "Distance normalisation";
Chris@162 249 desc.description = "Type of normalisation to use for distance metric";
Chris@162 250 desc.minValue = 0;
Chris@162 251 desc.maxValue = 2;
Chris@188 252 desc.defaultValue = float(m_defaultDParams.norm);
Chris@162 253 desc.isQuantized = true;
Chris@162 254 desc.quantizeStep = 1;
Chris@162 255 desc.valueNames.clear();
Chris@162 256 desc.valueNames.push_back("None");
Chris@162 257 desc.valueNames.push_back("Sum of frames");
Chris@162 258 desc.valueNames.push_back("Log sum of frames");
Chris@162 259 list.push_back(desc);
Chris@162 260 desc.valueNames.clear();
Chris@162 261
Chris@198 262 #ifdef USE_COMPACT_TYPES
Chris@198 263 desc.identifier = "scale";
Chris@198 264 desc.name = "Distance scale";
Chris@198 265 desc.description = "Scale factor to use when mapping distance metric into byte range for storage";
Chris@198 266 desc.minValue = 1;
Chris@198 267 desc.maxValue = 1000;
Chris@198 268 desc.defaultValue = float(m_defaultDParams.scale);
Chris@198 269 desc.isQuantized = false;
Chris@198 270 list.push_back(desc);
Chris@198 271 #endif
Chris@198 272
Chris@162 273 desc.identifier = "silencethreshold";
Chris@162 274 desc.name = "Silence threshold";
Chris@162 275 desc.description = "Total frame energy threshold below which a feature will be regarded as silent";
Chris@162 276 desc.minValue = 0;
Chris@165 277 desc.maxValue = 0.1f;
Chris@188 278 desc.defaultValue = float(m_defaultFcParams.silenceThreshold);
Chris@162 279 desc.isQuantized = false;
Chris@162 280 list.push_back(desc);
Chris@162 281
Chris@151 282 desc.identifier = "noise";
Chris@162 283 desc.name = "Add noise";
Chris@241 284 desc.description = "Whether to mix in a small constant white noise term when calculating feature distance. This can improve alignment against sources containing cleanly synthesised audio";
Chris@151 285 desc.minValue = 0;
Chris@151 286 desc.maxValue = 1;
Chris@188 287 desc.defaultValue = float(m_defaultDParams.noise);
Chris@151 288 desc.isQuantized = true;
Chris@151 289 desc.quantizeStep = 1;
Chris@151 290 list.push_back(desc);
Chris@136 291
Chris@25 292 desc.identifier = "gradientlimit";
Chris@162 293 desc.name = "Gradient limit";
Chris@18 294 desc.description = "Limit of number of frames that will be accepted from one source without a frame from the other source being accepted";
Chris@18 295 desc.minValue = 1;
Chris@18 296 desc.maxValue = 10;
Chris@188 297 desc.defaultValue = float(m_defaultParams.maxRunCount);
Chris@18 298 desc.isQuantized = true;
Chris@18 299 desc.quantizeStep = 1;
Chris@18 300 list.push_back(desc);
Chris@18 301
Chris@25 302 desc.identifier = "zonewidth";
Chris@162 303 desc.name = "Search zone width";
Chris@25 304 desc.description = "Width of the search zone (error margin) either side of the ongoing match position, in seconds";
Chris@25 305 desc.minValue = 1;
Chris@25 306 desc.maxValue = 60;
Chris@188 307 desc.defaultValue = float(m_defaultParams.blockTime);
Chris@25 308 desc.isQuantized = true;
Chris@25 309 desc.quantizeStep = 1;
Chris@25 310 desc.unit = "s";
Chris@25 311 list.push_back(desc);
Chris@25 312
Chris@83 313 desc.identifier = "diagonalweight";
Chris@162 314 desc.name = "Diagonal weight";
Chris@83 315 desc.description = "Weight applied to cost of diagonal step relative to horizontal or vertical step. The default of 2.0 is good for gross tracking of quite different performances; closer to 1.0 produces a smoother path for performances more similar in tempo";
Chris@83 316 desc.minValue = 1.0;
Chris@86 317 desc.maxValue = 2.0;
Chris@188 318 desc.defaultValue = float(m_defaultParams.diagonalWeight);
Chris@83 319 desc.isQuantized = false;
Chris@83 320 desc.unit = "";
Chris@83 321 list.push_back(desc);
Chris@83 322
Chris@32 323 desc.identifier = "smooth";
Chris@162 324 desc.name = "Use path smoothing";
Chris@138 325 desc.description = "Smooth the path by replacing steps with diagonals. (This was enabled by default in earlier versions of the MATCH plugin, but the default now is to produce an un-smoothed path.)";
Chris@32 326 desc.minValue = 0;
Chris@32 327 desc.maxValue = 1;
Chris@138 328 desc.defaultValue = 0;
Chris@32 329 desc.isQuantized = true;
Chris@32 330 desc.quantizeStep = 1;
Chris@32 331 desc.unit = "";
Chris@32 332 list.push_back(desc);
Chris@32 333
Chris@162 334 desc.identifier = "serialise";
Chris@162 335 desc.name = "Serialise plugin invocations";
Chris@162 336 desc.description = "Reduce potential memory load at the expense of multiprocessor performance by serialising multi-threaded plugin runs";
Chris@162 337 desc.minValue = 0;
Chris@162 338 desc.maxValue = 1;
Chris@162 339 desc.defaultValue = 0;
Chris@162 340 desc.isQuantized = true;
Chris@162 341 desc.quantizeStep = 1;
Chris@161 342 list.push_back(desc);
Chris@236 343
cannam@0 344 return list;
cannam@0 345 }
cannam@0 346
cannam@0 347 float
cannam@0 348 MatchVampPlugin::getParameter(std::string name) const
cannam@0 349 {
cannam@0 350 if (name == "serialise") {
cannam@0 351 return m_serialise ? 1.0 : 0.0;
Chris@18 352 } else if (name == "framenorm") {
Chris@188 353 return float(m_fcParams.norm);
Chris@18 354 } else if (name == "distnorm") {
Chris@188 355 return float(m_dParams.norm);
Chris@18 356 } else if (name == "usespecdiff") {
Chris@188 357 return float(m_fcParams.order);
Chris@18 358 } else if (name == "usechroma") {
Chris@38 359 return m_feParams.useChromaFrequencyMap ? 1.0 : 0.0;
Chris@25 360 } else if (name == "gradientlimit") {
Chris@188 361 return float(m_params.maxRunCount);
Chris@83 362 } else if (name == "diagonalweight") {
Chris@188 363 return float(m_params.diagonalWeight);
Chris@25 364 } else if (name == "zonewidth") {
Chris@188 365 return float(m_params.blockTime);
Chris@32 366 } else if (name == "smooth") {
Chris@32 367 return m_smooth ? 1.0 : 0.0;
Chris@136 368 } else if (name == "silencethreshold") {
Chris@188 369 return float(m_fcParams.silenceThreshold);
Chris@156 370 } else if (name == "metric") {
Chris@188 371 return float(m_dParams.metric);
Chris@151 372 } else if (name == "noise") {
Chris@151 373 return m_dParams.noise;
Chris@198 374 } else if (name == "scale") {
Chris@198 375 return float(m_dParams.scale);
Chris@161 376 } else if (name == "freq1") {
Chris@188 377 return float(m_feParams.referenceFrequency);
Chris@161 378 } else if (name == "freq2") {
Chris@188 379 return float(m_secondReferenceFrequency);
Chris@177 380 } else if (name == "minfreq") {
Chris@188 381 return float(m_feParams.minFrequency);
Chris@177 382 } else if (name == "maxfreq") {
Chris@188 383 return float(m_feParams.maxFrequency);
cannam@0 384 }
Chris@18 385
cannam@0 386 return 0.0;
cannam@0 387 }
cannam@0 388
cannam@0 389 void
cannam@0 390 MatchVampPlugin::setParameter(std::string name, float value)
cannam@0 391 {
cannam@0 392 if (name == "serialise") {
cannam@0 393 m_serialise = (value > 0.5);
Chris@18 394 } else if (name == "framenorm") {
Chris@188 395 m_fcParams.norm = FeatureConditioner::Normalisation(int(value + 0.1));
Chris@18 396 } else if (name == "distnorm") {
Chris@188 397 m_dParams.norm = DistanceMetric::DistanceNormalisation(int(value + 0.1));
Chris@18 398 } else if (name == "usespecdiff") {
Chris@188 399 m_fcParams.order = FeatureConditioner::OutputOrder(int(value + 0.1));
Chris@18 400 } else if (name == "usechroma") {
Chris@38 401 m_feParams.useChromaFrequencyMap = (value > 0.5);
Chris@25 402 } else if (name == "gradientlimit") {
Chris@18 403 m_params.maxRunCount = int(value + 0.1);
Chris@83 404 } else if (name == "diagonalweight") {
Chris@83 405 m_params.diagonalWeight = value;
Chris@25 406 } else if (name == "zonewidth") {
Chris@25 407 m_params.blockTime = value;
Chris@32 408 } else if (name == "smooth") {
Chris@32 409 m_smooth = (value > 0.5);
Chris@136 410 } else if (name == "silencethreshold") {
Chris@136 411 m_fcParams.silenceThreshold = value;
Chris@156 412 } else if (name == "metric") {
Chris@188 413 m_dParams.metric = DistanceMetric::Metric(int(value + 0.1));
Chris@151 414 } else if (name == "noise") {
Chris@188 415 m_dParams.noise = DistanceMetric::NoiseAddition(int(value + 0.1));
Chris@198 416 } else if (name == "scale") {
Chris@198 417 m_dParams.scale = value;
Chris@161 418 } else if (name == "freq1") {
Chris@161 419 m_feParams.referenceFrequency = value;
Chris@161 420 } else if (name == "freq2") {
Chris@161 421 m_secondReferenceFrequency = value;
Chris@177 422 } else if (name == "minfreq") {
Chris@177 423 m_feParams.minFrequency = value;
Chris@177 424 } else if (name == "maxfreq") {
Chris@177 425 m_feParams.maxFrequency = value;
cannam@0 426 }
cannam@0 427 }
cannam@0 428
cannam@0 429 size_t
cannam@0 430 MatchVampPlugin::getPreferredStepSize() const
cannam@0 431 {
Chris@52 432 return int(m_inputSampleRate * defaultStepTime + 0.001);
cannam@0 433 }
cannam@0 434
cannam@0 435 size_t
cannam@0 436 MatchVampPlugin::getPreferredBlockSize() const
cannam@0 437 {
Chris@216 438 return m_defaultFeParams.fftSize;
cannam@0 439 }
cannam@0 440
cannam@0 441 void
Chris@17 442 MatchVampPlugin::createMatchers()
cannam@0 443 {
Chris@17 444 m_params.hopTime = m_stepTime;
Chris@38 445 m_feParams.fftSize = m_blockSize;
Chris@107 446
Chris@161 447 m_pipeline = new MatchPipeline(m_feParams, m_fcParams, m_dParams, m_params,
Chris@161 448 m_secondReferenceFrequency);
cannam@0 449 }
cannam@0 450
cannam@0 451 bool
cannam@0 452 MatchVampPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
cannam@0 453 {
Chris@10 454 if (m_inputSampleRate < sampleRateMin) {
Chris@10 455 std::cerr << "MatchVampPlugin::MatchVampPlugin: input sample rate "
Chris@10 456 << m_inputSampleRate << " < min supported rate "
Chris@10 457 << sampleRateMin << std::endl;
Chris@10 458 return false;
Chris@10 459 }
cannam@0 460 if (channels < getMinChannelCount() ||
cannam@0 461 channels > getMaxChannelCount()) return false;
cannam@1 462 if (stepSize > blockSize/2 ||
cannam@0 463 blockSize != getPreferredBlockSize()) return false;
Chris@15 464
Chris@188 465 m_stepSize = int(stepSize);
Chris@15 466 m_stepTime = float(stepSize) / m_inputSampleRate;
Chris@188 467 m_blockSize = int(blockSize);
Chris@15 468
Chris@15 469 createMatchers();
cannam@0 470 m_begin = true;
cannam@0 471 m_locked = false;
Chris@15 472
cannam@0 473 return true;
cannam@0 474 }
cannam@0 475
cannam@0 476 void
cannam@0 477 MatchVampPlugin::reset()
cannam@0 478 {
Chris@107 479 delete m_pipeline;
Chris@107 480 m_pipeline = 0;
Chris@74 481 m_frameNo = 0;
cannam@6 482 createMatchers();
cannam@6 483 m_begin = true;
cannam@6 484 m_locked = false;
cannam@0 485 }
cannam@0 486
cannam@0 487 MatchVampPlugin::OutputList
cannam@0 488 MatchVampPlugin::getOutputDescriptors() const
cannam@0 489 {
cannam@0 490 OutputList list;
cannam@0 491
Chris@52 492 float outRate = 1.0f / m_stepTime;
cannam@0 493
cannam@0 494 OutputDescriptor desc;
cannam@0 495 desc.identifier = "path";
cannam@0 496 desc.name = "Path";
cannam@0 497 desc.description = "Alignment path";
cannam@0 498 desc.unit = "";
cannam@0 499 desc.hasFixedBinCount = true;
cannam@0 500 desc.binCount = 1;
cannam@0 501 desc.hasKnownExtents = false;
cannam@0 502 desc.isQuantized = true;
cannam@0 503 desc.quantizeStep = 1;
cannam@0 504 desc.sampleType = OutputDescriptor::VariableSampleRate;
cannam@0 505 desc.sampleRate = outRate;
Chris@180 506 m_pathOutNo = int(list.size());
cannam@0 507 list.push_back(desc);
cannam@0 508
cannam@0 509 desc.identifier = "a_b";
cannam@0 510 desc.name = "A-B Timeline";
cannam@0 511 desc.description = "Timing in performance B corresponding to moments in performance A";
cannam@0 512 desc.unit = "sec";
cannam@0 513 desc.hasFixedBinCount = true;
cannam@0 514 desc.binCount = 1;
cannam@0 515 desc.hasKnownExtents = false;
cannam@0 516 desc.isQuantized = false;
cannam@0 517 desc.sampleType = OutputDescriptor::VariableSampleRate;
cannam@0 518 desc.sampleRate = outRate;
Chris@180 519 m_abOutNo = int(list.size());
cannam@0 520 list.push_back(desc);
cannam@0 521
cannam@0 522 desc.identifier = "b_a";
cannam@0 523 desc.name = "B-A Timeline";
cannam@0 524 desc.description = "Timing in performance A corresponding to moments in performance B";
cannam@0 525 desc.unit = "sec";
cannam@0 526 desc.hasFixedBinCount = true;
cannam@0 527 desc.binCount = 1;
cannam@0 528 desc.hasKnownExtents = false;
cannam@0 529 desc.isQuantized = false;
cannam@0 530 desc.sampleType = OutputDescriptor::VariableSampleRate;
cannam@0 531 desc.sampleRate = outRate;
Chris@180 532 m_baOutNo = int(list.size());
cannam@0 533 list.push_back(desc);
cannam@0 534
cannam@0 535 desc.identifier = "a_b_divergence";
cannam@0 536 desc.name = "A-B Divergence";
cannam@0 537 desc.description = "Difference between timings in performances A and B";
cannam@0 538 desc.unit = "sec";
cannam@0 539 desc.hasFixedBinCount = true;
cannam@0 540 desc.binCount = 1;
cannam@0 541 desc.hasKnownExtents = false;
cannam@0 542 desc.isQuantized = false;
cannam@0 543 desc.sampleType = OutputDescriptor::VariableSampleRate;
cannam@0 544 desc.sampleRate = outRate;
Chris@180 545 m_abDivOutNo = int(list.size());
cannam@0 546 list.push_back(desc);
cannam@0 547
cannam@0 548 desc.identifier = "a_b_temporatio";
cannam@0 549 desc.name = "A-B Tempo Ratio";
cannam@0 550 desc.description = "Ratio of tempi between performances A and B";
cannam@0 551 desc.unit = "";
cannam@0 552 desc.hasFixedBinCount = true;
cannam@0 553 desc.binCount = 1;
cannam@0 554 desc.hasKnownExtents = false;
cannam@0 555 desc.isQuantized = false;
cannam@0 556 desc.sampleType = OutputDescriptor::VariableSampleRate;
cannam@0 557 desc.sampleRate = outRate;
Chris@180 558 m_abRatioOutNo = int(list.size());
cannam@0 559 list.push_back(desc);
cannam@0 560
Chris@38 561 int featureSize = FeatureExtractor(m_feParams).getFeatureSize();
Chris@38 562
Chris@15 563 desc.identifier = "a_features";
Chris@140 564 desc.name = "Raw A Features";
Chris@15 565 desc.description = "Spectral features extracted from performance A";
Chris@15 566 desc.unit = "";
Chris@15 567 desc.hasFixedBinCount = true;
Chris@38 568 desc.binCount = featureSize;
Chris@15 569 desc.hasKnownExtents = false;
Chris@15 570 desc.isQuantized = false;
Chris@16 571 desc.sampleType = OutputDescriptor::FixedSampleRate;
Chris@15 572 desc.sampleRate = outRate;
Chris@180 573 m_aFeaturesOutNo = int(list.size());
Chris@16 574 list.push_back(desc);
Chris@16 575
Chris@16 576 desc.identifier = "b_features";
Chris@140 577 desc.name = "Raw B Features";
Chris@16 578 desc.description = "Spectral features extracted from performance B";
Chris@16 579 desc.unit = "";
Chris@16 580 desc.hasFixedBinCount = true;
Chris@38 581 desc.binCount = featureSize;
Chris@16 582 desc.hasKnownExtents = false;
Chris@16 583 desc.isQuantized = false;
Chris@16 584 desc.sampleType = OutputDescriptor::FixedSampleRate;
Chris@16 585 desc.sampleRate = outRate;
Chris@180 586 m_bFeaturesOutNo = int(list.size());
Chris@15 587 list.push_back(desc);
Chris@15 588
Chris@140 589 desc.identifier = "a_cfeatures";
Chris@140 590 desc.name = "Conditioned A Features";
Chris@140 591 desc.description = "Spectral features extracted from performance A, after normalisation and conditioning";
Chris@140 592 desc.unit = "";
Chris@140 593 desc.hasFixedBinCount = true;
Chris@140 594 desc.binCount = featureSize;
Chris@140 595 desc.hasKnownExtents = false;
Chris@140 596 desc.isQuantized = false;
Chris@140 597 desc.sampleType = OutputDescriptor::FixedSampleRate;
Chris@140 598 desc.sampleRate = outRate;
Chris@180 599 m_caFeaturesOutNo = int(list.size());
Chris@140 600 list.push_back(desc);
Chris@140 601
Chris@140 602 desc.identifier = "b_cfeatures";
Chris@140 603 desc.name = "Conditioned B Features";
Chris@140 604 desc.description = "Spectral features extracted from performance B, after norrmalisation and conditioning";
Chris@140 605 desc.unit = "";
Chris@140 606 desc.hasFixedBinCount = true;
Chris@140 607 desc.binCount = featureSize;
Chris@140 608 desc.hasKnownExtents = false;
Chris@140 609 desc.isQuantized = false;
Chris@140 610 desc.sampleType = OutputDescriptor::FixedSampleRate;
Chris@140 611 desc.sampleRate = outRate;
Chris@180 612 m_cbFeaturesOutNo = int(list.size());
Chris@140 613 list.push_back(desc);
Chris@140 614
Chris@163 615 desc.identifier = "overall_cost";
Chris@163 616 desc.name = "Overall Cost";
Chris@163 617 desc.description = "Normalised overall path cost for the cheapest path";
Chris@163 618 desc.unit = "";
Chris@163 619 desc.hasFixedBinCount = true;
Chris@163 620 desc.binCount = 1;
Chris@163 621 desc.hasKnownExtents = false;
Chris@163 622 desc.isQuantized = false;
Chris@163 623 desc.sampleType = OutputDescriptor::FixedSampleRate;
Chris@163 624 desc.sampleRate = 1;
Chris@180 625 m_overallCostOutNo = int(list.size());
Chris@163 626 list.push_back(desc);
Chris@163 627
cannam@0 628 return list;
cannam@0 629 }
cannam@0 630
cannam@0 631 MatchVampPlugin::FeatureSet
cannam@0 632 MatchVampPlugin::process(const float *const *inputBuffers,
cannam@0 633 Vamp::RealTime timestamp)
cannam@0 634 {
cannam@0 635 if (m_begin) {
cannam@0 636 if (!m_locked && m_serialise) {
cannam@0 637 m_locked = true;
cannam@0 638 #ifdef _WIN32
cannam@0 639 WaitForSingleObject(m_serialisingMutex, INFINITE);
cannam@0 640 #else
cannam@0 641 pthread_mutex_lock(&m_serialisingMutex);
cannam@0 642 #endif
cannam@0 643 }
Chris@10 644 m_startTime = timestamp;
cannam@0 645 m_begin = false;
cannam@0 646 }
cannam@0 647
cannam@0 648 // std::cerr << timestamp.toString();
cannam@0 649
Chris@107 650 m_pipeline->feedFrequencyDomainAudio(inputBuffers[0], inputBuffers[1]);
Chris@74 651
Chris@140 652 FeatureSet returnFeatures;
Chris@140 653
Chris@185 654 feature_t f1, f2;
Chris@140 655 m_pipeline->extractFeatures(f1, f2);
Chris@16 656
Chris@185 657 feature_t cf1, cf2;
Chris@140 658 m_pipeline->extractConditionedFeatures(cf1, cf2);
Chris@16 659
Chris@16 660 Feature f;
Chris@16 661 f.hasTimestamp = false;
Chris@16 662
Chris@74 663 f.values.clear();
Chris@188 664 for (auto v: f1) f.values.push_back(float(v));
Chris@74 665 returnFeatures[m_aFeaturesOutNo].push_back(f);
Chris@16 666
Chris@74 667 f.values.clear();
Chris@188 668 for (auto v: f2) f.values.push_back(float(v));
Chris@74 669 returnFeatures[m_bFeaturesOutNo].push_back(f);
cannam@0 670
Chris@140 671 f.values.clear();
Chris@188 672 for (auto v: cf1) f.values.push_back(float(v));
Chris@140 673 returnFeatures[m_caFeaturesOutNo].push_back(f);
Chris@140 674
Chris@140 675 f.values.clear();
Chris@188 676 for (auto v: cf2) f.values.push_back(float(v));
Chris@140 677 returnFeatures[m_cbFeaturesOutNo].push_back(f);
Chris@140 678
cannam@0 679 // std::cerr << ".";
cannam@0 680 // std::cerr << std::endl;
cannam@0 681
Chris@74 682 ++m_frameNo;
Chris@74 683
Chris@16 684 return returnFeatures;
cannam@0 685 }
cannam@0 686
cannam@0 687 MatchVampPlugin::FeatureSet
cannam@0 688 MatchVampPlugin::getRemainingFeatures()
cannam@0 689 {
Chris@107 690 m_pipeline->finish();
Chris@74 691
Chris@63 692 FeatureSet returnFeatures;
Chris@63 693
cannam@0 694 std::vector<int> pathx;
cannam@0 695 std::vector<int> pathy;
Chris@155 696 int len = m_pipeline->retrievePath(m_smooth, pathx, pathy);
cannam@0 697
Chris@173 698 double cost = m_pipeline->getOverallCost();
Chris@163 699 Feature costFeature;
Chris@163 700 costFeature.hasTimestamp = false;
Chris@188 701 costFeature.values.push_back(float(cost));
Chris@163 702 returnFeatures[m_overallCostOutNo].push_back(costFeature);
Chris@163 703
cannam@0 704 int prevx = 0;
cannam@0 705 int prevy = 0;
cannam@0 706
Chris@30 707 for (int i = 0; i < len; ++i) {
cannam@0 708
cannam@0 709 int x = pathx[i];
cannam@0 710 int y = pathy[i];
cannam@0 711
cannam@0 712 Vamp::RealTime xt = Vamp::RealTime::frame2RealTime
Chris@180 713 (x * m_stepSize, int(m_inputSampleRate + 0.5));
cannam@0 714 Vamp::RealTime yt = Vamp::RealTime::frame2RealTime
Chris@180 715 (y * m_stepSize, int(m_inputSampleRate + 0.5));
cannam@0 716
cannam@0 717 Feature feature;
cannam@0 718 feature.hasTimestamp = true;
Chris@10 719 feature.timestamp = m_startTime + xt;
cannam@0 720 feature.values.clear();
Chris@52 721 feature.values.push_back(float(yt.sec + double(yt.nsec)/1.0e9));
Chris@16 722 returnFeatures[m_pathOutNo].push_back(feature);
cannam@0 723
cannam@0 724 if (x != prevx) {
cannam@0 725
cannam@0 726 feature.hasTimestamp = true;
Chris@10 727 feature.timestamp = m_startTime + xt;
cannam@0 728 feature.values.clear();
Chris@52 729 feature.values.push_back(float(yt.sec + yt.msec()/1000.0));
Chris@16 730 returnFeatures[m_abOutNo].push_back(feature);
cannam@0 731
cannam@0 732 Vamp::RealTime diff = yt - xt;
cannam@0 733 feature.values.clear();
Chris@52 734 feature.values.push_back(float(diff.sec + diff.msec()/1000.0));
Chris@16 735 returnFeatures[m_abDivOutNo].push_back(feature);
cannam@0 736
cannam@0 737 if (i > 0) {
cannam@0 738 int lookback = 100; //!!! arbitrary
cannam@0 739 if (lookback > i) lookback = i;
cannam@0 740 int xdiff = x - pathx[i-lookback];
cannam@0 741 int ydiff = y - pathy[i-lookback];
cannam@0 742 if (xdiff != 0 && ydiff != 0) {
cannam@0 743 float ratio = float(ydiff)/float(xdiff);
cannam@0 744 if (ratio < 8 && ratio > (1.0/8)) { //!!! just for now, since we aren't dealing properly with silence yet
cannam@0 745 feature.values.clear();
cannam@0 746 feature.values.push_back(ratio);
Chris@16 747 returnFeatures[m_abRatioOutNo].push_back(feature);
cannam@0 748 }
cannam@0 749 }
cannam@0 750 }
cannam@0 751 }
cannam@0 752
cannam@0 753 if (y != prevy) {
cannam@0 754 feature.hasTimestamp = true;
Chris@10 755 feature.timestamp = m_startTime + yt;
cannam@0 756 feature.values.clear();
Chris@52 757 feature.values.push_back(float(xt.sec + xt.msec()/1000.0));
Chris@16 758 returnFeatures[m_baOutNo].push_back(feature);
cannam@0 759 }
cannam@0 760
cannam@0 761 prevx = x;
cannam@0 762 prevy = y;
cannam@0 763 }
cannam@0 764
Chris@107 765 delete m_pipeline;
Chris@107 766 m_pipeline = 0;
cannam@0 767
cannam@0 768 if (m_locked) {
cannam@0 769 #ifdef _WIN32
cannam@0 770 ReleaseMutex(m_serialisingMutex);
cannam@0 771 #else
cannam@0 772 pthread_mutex_unlock(&m_serialisingMutex);
cannam@0 773 #endif
cannam@0 774 m_locked = false;
cannam@0 775 }
cannam@0 776
cannam@0 777 return returnFeatures;
cannam@0 778
cannam@0 779
cannam@0 780 /*
Chris@30 781 for (int i = 0; i < len; ++i) {
cannam@0 782 std::cerr << i << ": [" << pathx[i] << "," << pathy[i] << "]" << std::endl;
cannam@0 783 }
cannam@0 784
cannam@0 785 std::cerr << std::endl;
cannam@0 786 std::cerr << "File: A" << std::endl;
cannam@0 787 std::cerr << "Marks: -1" << std::endl;
cannam@0 788 std::cerr << "FixedPoints: true 0" << std::endl;
cannam@0 789 std::cerr << "0" << std::endl;
cannam@0 790 std::cerr << "0" << std::endl;
cannam@0 791 std::cerr << "0" << std::endl;
cannam@0 792 std::cerr << "0" << std::endl;
cannam@0 793 std::cerr << "File: B" << std::endl;
cannam@0 794 std::cerr << "Marks: 0" << std::endl;
cannam@0 795 std::cerr << "FixedPoints: true 0" << std::endl;
cannam@0 796 std::cerr << "0.02" << std::endl;
cannam@0 797 std::cerr << "0.02" << std::endl;
cannam@0 798
Chris@30 799 std::cerr << len << std::endl;
Chris@30 800 for (int i = 0; i < len; ++i) {
cannam@0 801 std::cerr << pathx[i] << std::endl;
cannam@0 802 }
cannam@0 803
Chris@30 804 std::cerr << len << std::endl;
Chris@30 805 for (int i = 0; i < len; ++i) {
cannam@0 806 std::cerr << pathy[i] << std::endl;
cannam@0 807 }
cannam@0 808 */
cannam@0 809 }