annotate src/SubsequenceMatchVampPlugin.cpp @ 246:aac9ad4064ea subsequence tip

Fix incorrect handling of silent tail in the non-subsequence MATCH phase; some debug output changes
author Chris Cannam
date Fri, 24 Jul 2020 14:29:55 +0100
parents f68277668ad4
children
rev   line source
Chris@236 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@236 2
Chris@236 3 /*
Chris@236 4 Vamp feature extraction plugin using the MATCH audio alignment
Chris@236 5 algorithm.
Chris@236 6
Chris@236 7 Centre for Digital Music, Queen Mary, University of London.
Chris@236 8 Copyright (c) 2007-2020 Simon Dixon, Chris Cannam, and Queen Mary
Chris@236 9 University of London, Copyright (c) 2014-2015 Tido GmbH.
Chris@236 10
Chris@236 11 This program is free software; you can redistribute it and/or
Chris@236 12 modify it under the terms of the GNU General Public License as
Chris@236 13 published by the Free Software Foundation; either version 2 of the
Chris@236 14 License, or (at your option) any later version. See the file
Chris@236 15 COPYING included with this distribution for more information.
Chris@236 16 */
Chris@236 17
Chris@236 18 #include "SubsequenceMatchVampPlugin.h"
Chris@237 19 #include "FullDTW.h"
Chris@237 20 #include "MatchPipeline.h"
Chris@236 21
Chris@236 22 #include <vamp/vamp.h>
Chris@236 23 #include <vamp-sdk/RealTime.h>
Chris@236 24
Chris@236 25 #include <vector>
Chris@236 26 #include <algorithm>
Chris@236 27
Chris@236 28 using std::string;
Chris@237 29 using std::vector;
Chris@237 30 using std::cerr;
Chris@237 31 using std::cout;
Chris@237 32 using std::endl;
Chris@236 33
Chris@246 34 //#define DEBUG_SUBSEQUENCE_MATCH 1
Chris@246 35
Chris@236 36 // We want to ensure our freq map / crossover bin are always valid
Chris@236 37 // with a fixed FFT length in seconds, so must reject low sample rates
Chris@236 38 static float sampleRateMin = 5000.f;
Chris@236 39
Chris@236 40 static float defaultStepTime = 0.020f;
Chris@241 41 static int defaultCoarseDownsample = 50;
Chris@243 42 static double defaultAnchoredDiagonalWeight = 2.0;
Chris@243 43 static double defaultSubsequenceDiagonalWeight = 0.75;
Chris@241 44
Chris@236 45 SubsequenceMatchVampPlugin::SubsequenceMatchVampPlugin(float inputSampleRate) :
Chris@236 46 Plugin(inputSampleRate),
Chris@236 47 m_stepSize(int(inputSampleRate * defaultStepTime + 0.001)),
Chris@236 48 m_stepTime(defaultStepTime),
Chris@236 49 m_blockSize(2048),
Chris@241 50 m_coarseDownsample(defaultCoarseDownsample),
Chris@246 51 m_downsamplePeaks(false),
Chris@236 52 m_serialise(false),
Chris@236 53 m_smooth(false),
Chris@237 54 m_channelCount(0),
Chris@236 55 m_params(defaultStepTime),
Chris@236 56 m_defaultParams(defaultStepTime),
Chris@236 57 m_feParams(inputSampleRate),
Chris@236 58 m_defaultFeParams(44100), // parameter descriptors can't depend on samplerate
Chris@243 59 m_secondReferenceFrequency(m_defaultFeParams.referenceFrequency), // must be declared/initialised after m_defaultFeParams
Chris@236 60 m_fcParams(),
Chris@236 61 m_defaultFcParams(),
Chris@236 62 m_dParams(),
Chris@243 63 m_defaultDParams(),
Chris@243 64 m_fdParams(defaultStepTime),
Chris@243 65 m_defaultFdParams(defaultStepTime)
Chris@236 66 {
Chris@243 67 // for the coarse subsequence span aligner:
Chris@243 68 m_fdParams.diagonalWeight = m_defaultFdParams.diagonalWeight =
Chris@243 69 defaultSubsequenceDiagonalWeight;
Chris@243 70
Chris@243 71 // for the MATCH phase following subsequence span identification:
Chris@243 72 m_params.diagonalWeight = m_defaultParams.diagonalWeight =
Chris@243 73 defaultAnchoredDiagonalWeight;
Chris@243 74
Chris@243 75 // and of course
Chris@243 76 m_fdParams.subsequence = m_defaultFdParams.subsequence = true;
Chris@243 77
Chris@236 78 if (inputSampleRate < sampleRateMin) {
Chris@237 79 cerr << "SubsequenceMatchVampPlugin::SubsequenceMatchVampPlugin: input sample rate "
Chris@237 80 << inputSampleRate << " < min supported rate "
Chris@237 81 << sampleRateMin << ", plugin will refuse to initialise" << endl;
Chris@236 82 }
Chris@236 83 }
Chris@236 84
Chris@236 85 SubsequenceMatchVampPlugin::~SubsequenceMatchVampPlugin()
Chris@236 86 {
Chris@236 87 }
Chris@236 88
Chris@236 89 string
Chris@236 90 SubsequenceMatchVampPlugin::getIdentifier() const
Chris@236 91 {
Chris@237 92 return "match-subsequence";
Chris@236 93 }
Chris@236 94
Chris@236 95 string
Chris@236 96 SubsequenceMatchVampPlugin::getName() const
Chris@236 97 {
Chris@236 98 return "Match Subsequence Aligner";
Chris@236 99 }
Chris@236 100
Chris@236 101 string
Chris@236 102 SubsequenceMatchVampPlugin::getDescription() const
Chris@236 103 {
Chris@236 104 return "Calculate alignment between a reference performance and a performance known to represent only part of the same material";
Chris@236 105 }
Chris@236 106
Chris@236 107 string
Chris@236 108 SubsequenceMatchVampPlugin::getMaker() const
Chris@236 109 {
Chris@236 110 return "Simon Dixon (plugin by Chris Cannam)";
Chris@236 111 }
Chris@236 112
Chris@236 113 int
Chris@236 114 SubsequenceMatchVampPlugin::getPluginVersion() const
Chris@236 115 {
Chris@236 116 return 3;
Chris@236 117 }
Chris@236 118
Chris@236 119 string
Chris@236 120 SubsequenceMatchVampPlugin::getCopyright() const
Chris@236 121 {
Chris@236 122 return "GPL";
Chris@236 123 }
Chris@236 124
Chris@236 125 SubsequenceMatchVampPlugin::ParameterList
Chris@236 126 SubsequenceMatchVampPlugin::getParameterDescriptors() const
Chris@236 127 {
Chris@236 128 ParameterList list;
Chris@236 129
Chris@236 130 ParameterDescriptor desc;
Chris@236 131
Chris@236 132 desc.identifier = "freq1";
Chris@236 133 desc.name = "Tuning frequency of first input";
Chris@241 134 desc.description = "Tuning frequency (concert A) for the reference audio";
Chris@236 135 desc.minValue = 220.0;
Chris@236 136 desc.maxValue = 880.0;
Chris@236 137 desc.defaultValue = float(m_defaultFeParams.referenceFrequency);
Chris@236 138 desc.isQuantized = false;
Chris@236 139 desc.unit = "Hz";
Chris@236 140 list.push_back(desc);
Chris@236 141
Chris@236 142 desc.identifier = "freq2";
Chris@236 143 desc.name = "Tuning frequency of second input";
Chris@246 144 desc.description = "Tuning frequency (concert A) for the other audio.";
Chris@236 145 desc.minValue = 220.0;
Chris@236 146 desc.maxValue = 880.0;
Chris@236 147 desc.defaultValue = float(m_defaultFeParams.referenceFrequency);
Chris@236 148 desc.isQuantized = false;
Chris@236 149 desc.unit = "Hz";
Chris@236 150 list.push_back(desc);
Chris@236 151
Chris@236 152 desc.identifier = "minfreq";
Chris@236 153 desc.name = "Minimum frequency";
Chris@241 154 desc.description = "Minimum frequency to include in features";
Chris@236 155 desc.minValue = 0.0;
Chris@236 156 desc.maxValue = float(m_inputSampleRate / 4.f);
Chris@236 157 desc.defaultValue = float(m_defaultFeParams.minFrequency);
Chris@236 158 desc.isQuantized = false;
Chris@236 159 desc.unit = "Hz";
Chris@236 160 list.push_back(desc);
Chris@236 161
Chris@236 162 desc.identifier = "maxfreq";
Chris@236 163 desc.name = "Maximum frequency";
Chris@241 164 desc.description = "Maximum frequency to include in features";
Chris@236 165 desc.minValue = 1000.0;
Chris@236 166 desc.maxValue = float(m_inputSampleRate / 2.f);
Chris@236 167 desc.defaultValue = float(m_defaultFeParams.maxFrequency);
Chris@236 168 desc.isQuantized = false;
Chris@236 169 desc.unit = "Hz";
Chris@236 170 list.push_back(desc);
Chris@236 171
Chris@236 172 desc.unit = "";
Chris@241 173
Chris@241 174 desc.identifier = "coarsedownsample";
Chris@241 175 desc.name = "Coarse alignment downsample factor";
Chris@241 176 desc.description = "Downsample factor for features used in first coarse subsequence-alignment step";
Chris@241 177 desc.minValue = 1;
Chris@241 178 desc.maxValue = 200;
Chris@241 179 desc.defaultValue = float(defaultCoarseDownsample);
Chris@241 180 desc.isQuantized = true;
Chris@241 181 desc.quantizeStep = 1;
Chris@241 182 list.push_back(desc);
Chris@236 183
Chris@246 184 desc.identifier = "downsamplemethod";
Chris@246 185 desc.name = "Coarse alignment downsample method";
Chris@246 186 desc.description = "Downsample method for features used in first coarse subsequence-alignment step";
Chris@246 187 desc.minValue = 0;
Chris@246 188 desc.maxValue = 1;
Chris@246 189 desc.defaultValue = 0;
Chris@246 190 desc.isQuantized = true;
Chris@246 191 desc.quantizeStep = 1;
Chris@246 192 desc.valueNames.clear();
Chris@246 193 desc.valueNames.push_back("Average");
Chris@246 194 desc.valueNames.push_back("Peak");
Chris@246 195 list.push_back(desc);
Chris@246 196
Chris@236 197 desc.identifier = "usechroma";
Chris@236 198 desc.name = "Feature type";
Chris@236 199 desc.description = "Whether to use warped spectrogram or chroma frequency map";
Chris@236 200 desc.minValue = 0;
Chris@236 201 desc.maxValue = 1;
Chris@236 202 desc.defaultValue = m_defaultFeParams.useChromaFrequencyMap ? 1 : 0;
Chris@236 203 desc.isQuantized = true;
Chris@236 204 desc.quantizeStep = 1;
Chris@236 205 desc.valueNames.clear();
Chris@236 206 desc.valueNames.push_back("Spectral");
Chris@236 207 desc.valueNames.push_back("Chroma");
Chris@236 208 list.push_back(desc);
Chris@236 209
Chris@236 210 desc.valueNames.clear();
Chris@236 211
Chris@236 212 desc.identifier = "usespecdiff";
Chris@236 213 desc.name = "Use feature difference";
Chris@246 214 desc.description = "Whether to use half-wave rectified feature-to-feature difference instead of straight spectral or chroma feature (does not apply to downsampled features)";
Chris@236 215 desc.minValue = 0;
Chris@236 216 desc.maxValue = 1;
Chris@236 217 desc.defaultValue = float(m_defaultFcParams.order);
Chris@236 218 desc.isQuantized = true;
Chris@236 219 desc.quantizeStep = 1;
Chris@236 220 list.push_back(desc);
Chris@236 221
Chris@236 222 desc.identifier = "framenorm";
Chris@236 223 desc.name = "Frame normalisation";
Chris@236 224 desc.description = "Type of normalisation to use for features";
Chris@236 225 desc.minValue = 0;
Chris@236 226 desc.maxValue = 2;
Chris@236 227 desc.defaultValue = float(m_defaultFcParams.norm);
Chris@236 228 desc.isQuantized = true;
Chris@236 229 desc.quantizeStep = 1;
Chris@236 230 desc.valueNames.clear();
Chris@236 231 desc.valueNames.push_back("None");
Chris@236 232 desc.valueNames.push_back("Sum to 1");
Chris@236 233 desc.valueNames.push_back("Long-term average");
Chris@236 234 list.push_back(desc);
Chris@236 235 desc.valueNames.clear();
Chris@236 236 desc.defaultValue = float(m_defaultFcParams.silenceThreshold);
Chris@236 237
Chris@236 238 desc.identifier = "metric";
Chris@236 239 desc.name = "Distance metric";
Chris@241 240 desc.description = "Metric for distance calculations";
Chris@236 241 desc.minValue = 0;
Chris@236 242 desc.maxValue = 2;
Chris@236 243 desc.defaultValue = float(m_defaultDParams.metric);
Chris@236 244 desc.isQuantized = true;
Chris@236 245 desc.quantizeStep = 1;
Chris@236 246 desc.valueNames.clear();
Chris@236 247 desc.valueNames.push_back("Manhattan");
Chris@236 248 desc.valueNames.push_back("Euclidean");
Chris@236 249 desc.valueNames.push_back("Cosine");
Chris@236 250 list.push_back(desc);
Chris@236 251 desc.valueNames.clear();
Chris@236 252
Chris@236 253 desc.identifier = "distnorm";
Chris@236 254 desc.name = "Distance normalisation";
Chris@236 255 desc.description = "Type of normalisation to use for distance metric";
Chris@236 256 desc.minValue = 0;
Chris@236 257 desc.maxValue = 2;
Chris@236 258 desc.defaultValue = float(m_defaultDParams.norm);
Chris@236 259 desc.isQuantized = true;
Chris@236 260 desc.quantizeStep = 1;
Chris@236 261 desc.valueNames.clear();
Chris@236 262 desc.valueNames.push_back("None");
Chris@236 263 desc.valueNames.push_back("Sum of frames");
Chris@236 264 desc.valueNames.push_back("Log sum of frames");
Chris@236 265 list.push_back(desc);
Chris@236 266 desc.valueNames.clear();
Chris@236 267
Chris@236 268 #ifdef USE_COMPACT_TYPES
Chris@236 269 desc.identifier = "scale";
Chris@236 270 desc.name = "Distance scale";
Chris@236 271 desc.description = "Scale factor to use when mapping distance metric into byte range for storage";
Chris@236 272 desc.minValue = 1;
Chris@236 273 desc.maxValue = 1000;
Chris@236 274 desc.defaultValue = float(m_defaultDParams.scale);
Chris@236 275 desc.isQuantized = false;
Chris@236 276 list.push_back(desc);
Chris@236 277 #endif
Chris@236 278
Chris@236 279 desc.identifier = "silencethreshold";
Chris@236 280 desc.name = "Silence threshold";
Chris@236 281 desc.description = "Total frame energy threshold below which a feature will be regarded as silent";
Chris@236 282 desc.minValue = 0;
Chris@236 283 desc.maxValue = 0.1f;
Chris@236 284 desc.defaultValue = float(m_defaultFcParams.silenceThreshold);
Chris@236 285 desc.isQuantized = false;
Chris@236 286 list.push_back(desc);
Chris@236 287
Chris@236 288 desc.identifier = "noise";
Chris@236 289 desc.name = "Add noise";
Chris@241 290 desc.description = "Whether to mix in a small constant white noise term when calculating feature distance. This can improve alignment against sources containing cleanly synthesised audio";
Chris@236 291 desc.minValue = 0;
Chris@236 292 desc.maxValue = 1;
Chris@236 293 desc.defaultValue = float(m_defaultDParams.noise);
Chris@236 294 desc.isQuantized = true;
Chris@236 295 desc.quantizeStep = 1;
Chris@236 296 list.push_back(desc);
Chris@236 297
Chris@236 298 desc.identifier = "gradientlimit";
Chris@236 299 desc.name = "Gradient limit";
Chris@236 300 desc.description = "Limit of number of frames that will be accepted from one source without a frame from the other source being accepted";
Chris@236 301 desc.minValue = 1;
Chris@236 302 desc.maxValue = 10;
Chris@236 303 desc.defaultValue = float(m_defaultParams.maxRunCount);
Chris@236 304 desc.isQuantized = true;
Chris@236 305 desc.quantizeStep = 1;
Chris@236 306 list.push_back(desc);
Chris@236 307
Chris@236 308 desc.identifier = "zonewidth";
Chris@236 309 desc.name = "Search zone width";
Chris@236 310 desc.description = "Width of the search zone (error margin) either side of the ongoing match position, in seconds";
Chris@236 311 desc.minValue = 1;
Chris@236 312 desc.maxValue = 60;
Chris@236 313 desc.defaultValue = float(m_defaultParams.blockTime);
Chris@236 314 desc.isQuantized = true;
Chris@236 315 desc.quantizeStep = 1;
Chris@236 316 desc.unit = "s";
Chris@236 317 list.push_back(desc);
Chris@236 318
Chris@236 319 desc.identifier = "diagonalweight";
Chris@243 320 desc.name = "Diagonal weight, anchored";
Chris@243 321 desc.description = "Weight applied to cost of diagonal step relative to horizontal or vertical step, during the anchored (non-subsequence) alignment step";
Chris@243 322 desc.minValue = 0.5;
Chris@236 323 desc.maxValue = 2.0;
Chris@236 324 desc.defaultValue = float(m_defaultParams.diagonalWeight);
Chris@236 325 desc.isQuantized = false;
Chris@236 326 desc.unit = "";
Chris@236 327 list.push_back(desc);
Chris@243 328
Chris@243 329 desc.identifier = "diagonalweightsubsequence";
Chris@243 330 desc.name = "Diagonal weight, subsequence";
Chris@243 331 desc.description = "Weight applied to cost of diagonal step relative to horizontal or vertical step, during the coarse subsequence alignment step";
Chris@243 332 desc.minValue = 0.5;
Chris@243 333 desc.maxValue = 2.0;
Chris@243 334 desc.defaultValue = float(m_defaultFdParams.diagonalWeight);
Chris@243 335 desc.isQuantized = false;
Chris@243 336 desc.unit = "";
Chris@243 337 list.push_back(desc);
Chris@236 338
Chris@236 339 desc.identifier = "smooth";
Chris@236 340 desc.name = "Use path smoothing";
Chris@236 341 desc.description = "Smooth the path by replacing steps with diagonals. (This was enabled by default in earlier versions of the MATCH plugin, but the default now is to produce an un-smoothed path.)";
Chris@236 342 desc.minValue = 0;
Chris@236 343 desc.maxValue = 1;
Chris@236 344 desc.defaultValue = 0;
Chris@236 345 desc.isQuantized = true;
Chris@236 346 desc.quantizeStep = 1;
Chris@236 347 desc.unit = "";
Chris@236 348 list.push_back(desc);
Chris@236 349
Chris@236 350 desc.identifier = "serialise";
Chris@236 351 desc.name = "Serialise plugin invocations";
Chris@236 352 desc.description = "Reduce potential memory load at the expense of multiprocessor performance by serialising multi-threaded plugin runs";
Chris@236 353 desc.minValue = 0;
Chris@236 354 desc.maxValue = 1;
Chris@236 355 desc.defaultValue = 0;
Chris@236 356 desc.isQuantized = true;
Chris@236 357 desc.quantizeStep = 1;
Chris@236 358 list.push_back(desc);
Chris@236 359
Chris@236 360 return list;
Chris@236 361 }
Chris@236 362
Chris@236 363 float
Chris@236 364 SubsequenceMatchVampPlugin::getParameter(std::string name) const
Chris@236 365 {
Chris@236 366 if (name == "serialise") {
Chris@236 367 return m_serialise ? 1.0 : 0.0;
Chris@236 368 } else if (name == "framenorm") {
Chris@236 369 return float(m_fcParams.norm);
Chris@236 370 } else if (name == "distnorm") {
Chris@236 371 return float(m_dParams.norm);
Chris@236 372 } else if (name == "usespecdiff") {
Chris@236 373 return float(m_fcParams.order);
Chris@236 374 } else if (name == "usechroma") {
Chris@236 375 return m_feParams.useChromaFrequencyMap ? 1.0 : 0.0;
Chris@236 376 } else if (name == "gradientlimit") {
Chris@236 377 return float(m_params.maxRunCount);
Chris@236 378 } else if (name == "diagonalweight") {
Chris@236 379 return float(m_params.diagonalWeight);
Chris@243 380 } else if (name == "diagonalweightsubsequence") {
Chris@243 381 return float(m_fdParams.diagonalWeight);
Chris@236 382 } else if (name == "zonewidth") {
Chris@236 383 return float(m_params.blockTime);
Chris@236 384 } else if (name == "smooth") {
Chris@236 385 return m_smooth ? 1.0 : 0.0;
Chris@236 386 } else if (name == "silencethreshold") {
Chris@236 387 return float(m_fcParams.silenceThreshold);
Chris@236 388 } else if (name == "metric") {
Chris@236 389 return float(m_dParams.metric);
Chris@236 390 } else if (name == "noise") {
Chris@236 391 return m_dParams.noise;
Chris@236 392 } else if (name == "scale") {
Chris@236 393 return float(m_dParams.scale);
Chris@236 394 } else if (name == "freq1") {
Chris@236 395 return float(m_feParams.referenceFrequency);
Chris@236 396 } else if (name == "freq2") {
Chris@236 397 return float(m_secondReferenceFrequency);
Chris@236 398 } else if (name == "minfreq") {
Chris@236 399 return float(m_feParams.minFrequency);
Chris@236 400 } else if (name == "maxfreq") {
Chris@236 401 return float(m_feParams.maxFrequency);
Chris@241 402 } else if (name == "coarsedownsample") {
Chris@241 403 return float(m_coarseDownsample);
Chris@246 404 } else if (name == "downsamplemethod") {
Chris@246 405 return m_downsamplePeaks ? 1.0 : 0.0;
Chris@236 406 }
Chris@236 407
Chris@236 408 return 0.0;
Chris@236 409 }
Chris@236 410
Chris@236 411 void
Chris@236 412 SubsequenceMatchVampPlugin::setParameter(std::string name, float value)
Chris@236 413 {
Chris@236 414 if (name == "serialise") {
Chris@236 415 m_serialise = (value > 0.5);
Chris@236 416 } else if (name == "framenorm") {
Chris@236 417 m_fcParams.norm = FeatureConditioner::Normalisation(int(value + 0.1));
Chris@236 418 } else if (name == "distnorm") {
Chris@236 419 m_dParams.norm = DistanceMetric::DistanceNormalisation(int(value + 0.1));
Chris@236 420 } else if (name == "usespecdiff") {
Chris@236 421 m_fcParams.order = FeatureConditioner::OutputOrder(int(value + 0.1));
Chris@236 422 } else if (name == "usechroma") {
Chris@236 423 m_feParams.useChromaFrequencyMap = (value > 0.5);
Chris@236 424 } else if (name == "gradientlimit") {
Chris@236 425 m_params.maxRunCount = int(value + 0.1);
Chris@236 426 } else if (name == "diagonalweight") {
Chris@236 427 m_params.diagonalWeight = value;
Chris@243 428 } else if (name == "diagonalweightsubsequence") {
Chris@243 429 m_fdParams.diagonalWeight = value;
Chris@236 430 } else if (name == "zonewidth") {
Chris@236 431 m_params.blockTime = value;
Chris@236 432 } else if (name == "smooth") {
Chris@236 433 m_smooth = (value > 0.5);
Chris@236 434 } else if (name == "silencethreshold") {
Chris@236 435 m_fcParams.silenceThreshold = value;
Chris@236 436 } else if (name == "metric") {
Chris@236 437 m_dParams.metric = DistanceMetric::Metric(int(value + 0.1));
Chris@236 438 } else if (name == "noise") {
Chris@236 439 m_dParams.noise = DistanceMetric::NoiseAddition(int(value + 0.1));
Chris@236 440 } else if (name == "scale") {
Chris@236 441 m_dParams.scale = value;
Chris@236 442 } else if (name == "freq1") {
Chris@236 443 m_feParams.referenceFrequency = value;
Chris@236 444 } else if (name == "freq2") {
Chris@236 445 m_secondReferenceFrequency = value;
Chris@236 446 } else if (name == "minfreq") {
Chris@236 447 m_feParams.minFrequency = value;
Chris@236 448 } else if (name == "maxfreq") {
Chris@236 449 m_feParams.maxFrequency = value;
Chris@241 450 } else if (name == "coarsedownsample") {
Chris@241 451 m_coarseDownsample = int(value + 0.1);
Chris@246 452 } else if (name == "downsamplemethod") {
Chris@246 453 m_downsamplePeaks = (value > 0.5);
Chris@236 454 }
Chris@236 455 }
Chris@236 456
Chris@246 457 SubsequenceMatchVampPlugin::InputDomain
Chris@246 458 SubsequenceMatchVampPlugin::getInputDomain() const
Chris@246 459 {
Chris@246 460 return FrequencyDomain;
Chris@246 461 }
Chris@246 462
Chris@236 463 size_t
Chris@236 464 SubsequenceMatchVampPlugin::getPreferredStepSize() const
Chris@236 465 {
Chris@236 466 return int(m_inputSampleRate * defaultStepTime + 0.001);
Chris@236 467 }
Chris@236 468
Chris@236 469 size_t
Chris@236 470 SubsequenceMatchVampPlugin::getPreferredBlockSize() const
Chris@236 471 {
Chris@236 472 return m_defaultFeParams.fftSize;
Chris@236 473 }
Chris@236 474
Chris@246 475 size_t
Chris@246 476 SubsequenceMatchVampPlugin::getMinChannelCount() const
Chris@246 477 {
Chris@246 478 return 2;
Chris@246 479 }
Chris@246 480
Chris@246 481 size_t
Chris@246 482 SubsequenceMatchVampPlugin::getMaxChannelCount() const
Chris@246 483 {
Chris@246 484 return 2;
Chris@246 485 }
Chris@246 486
Chris@236 487 bool
Chris@236 488 SubsequenceMatchVampPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@236 489 {
Chris@236 490 if (m_inputSampleRate < sampleRateMin) {
Chris@237 491 cerr << "SubsequenceMatchVampPlugin::SubsequenceMatchVampPlugin: input sample rate "
Chris@237 492 << m_inputSampleRate << " < min supported rate "
Chris@237 493 << sampleRateMin << endl;
Chris@236 494 return false;
Chris@236 495 }
Chris@236 496 if (channels < getMinChannelCount() ||
Chris@237 497 channels > getMaxChannelCount()) {
Chris@237 498 return false;
Chris@237 499 }
Chris@236 500 if (stepSize > blockSize/2 ||
Chris@237 501 blockSize != getPreferredBlockSize()) {
Chris@237 502 return false;
Chris@237 503 }
Chris@236 504
Chris@236 505 m_stepSize = int(stepSize);
Chris@236 506 m_stepTime = float(stepSize) / m_inputSampleRate;
Chris@236 507 m_blockSize = int(blockSize);
Chris@236 508
Chris@236 509 m_params.hopTime = m_stepTime;
Chris@236 510 m_feParams.fftSize = m_blockSize;
Chris@236 511
Chris@237 512 m_channelCount = channels;
Chris@237 513
Chris@237 514 reset();
Chris@237 515
Chris@236 516 return true;
Chris@236 517 }
Chris@236 518
Chris@236 519 void
Chris@236 520 SubsequenceMatchVampPlugin::reset()
Chris@236 521 {
Chris@237 522 m_featureExtractors.clear();
Chris@237 523 m_features.clear();
Chris@237 524 m_startTime = Vamp::RealTime::zeroTime;
Chris@237 525
Chris@237 526 FeatureExtractor::Parameters feParams(m_feParams);
Chris@237 527
Chris@237 528 for (size_t c = 0; c < m_channelCount; ++c) {
Chris@239 529 if (c > 0 && m_secondReferenceFrequency != 0.0) {
Chris@237 530 feParams.referenceFrequency = m_secondReferenceFrequency;
Chris@237 531 }
Chris@237 532 m_featureExtractors.push_back(FeatureExtractor(feParams));
Chris@237 533 m_features.push_back(featureseq_t());
Chris@237 534 }
Chris@236 535 }
Chris@236 536
Chris@236 537 SubsequenceMatchVampPlugin::OutputList
Chris@236 538 SubsequenceMatchVampPlugin::getOutputDescriptors() const
Chris@236 539 {
Chris@236 540 OutputList list;
Chris@236 541
Chris@236 542 float outRate = 1.0f / m_stepTime;
Chris@236 543
Chris@236 544 OutputDescriptor desc;
Chris@236 545 desc.identifier = "path";
Chris@236 546 desc.name = "Path";
Chris@236 547 desc.description = "Alignment path";
Chris@236 548 desc.unit = "";
Chris@236 549 desc.hasFixedBinCount = true;
Chris@236 550 desc.binCount = 1;
Chris@236 551 desc.hasKnownExtents = false;
Chris@236 552 desc.isQuantized = true;
Chris@236 553 desc.quantizeStep = 1;
Chris@236 554 desc.sampleType = OutputDescriptor::VariableSampleRate;
Chris@236 555 desc.sampleRate = outRate;
Chris@236 556 m_pathOutNo = int(list.size());
Chris@236 557 list.push_back(desc);
Chris@236 558
Chris@236 559 desc.identifier = "b_a";
Chris@236 560 desc.name = "B-A Timeline";
Chris@236 561 desc.description = "Timing in performance A corresponding to moments in performance B";
Chris@236 562 desc.unit = "sec";
Chris@236 563 desc.hasFixedBinCount = true;
Chris@236 564 desc.binCount = 1;
Chris@236 565 desc.hasKnownExtents = false;
Chris@236 566 desc.isQuantized = false;
Chris@236 567 desc.sampleType = OutputDescriptor::VariableSampleRate;
Chris@236 568 desc.sampleRate = outRate;
Chris@236 569 m_baOutNo = int(list.size());
Chris@236 570 list.push_back(desc);
Chris@237 571
Chris@237 572 desc.identifier = "span";
Chris@237 573 desc.name = "Subsequence Span";
Chris@237 574 desc.description = "Region in performance A corresponding to the whole of performance B";
Chris@237 575 desc.unit = "";
Chris@237 576 desc.hasFixedBinCount = true;
Chris@237 577 desc.binCount = 0;
Chris@237 578 desc.hasKnownExtents = false;
Chris@237 579 desc.isQuantized = false;
Chris@237 580 desc.sampleType = OutputDescriptor::VariableSampleRate;
Chris@237 581 desc.sampleRate = outRate;
Chris@237 582 desc.hasDuration = true;
Chris@237 583 m_spanOutNo = int(list.size());
Chris@237 584 list.push_back(desc);
Chris@236 585
Chris@236 586 return list;
Chris@236 587 }
Chris@236 588
Chris@236 589 SubsequenceMatchVampPlugin::FeatureSet
Chris@236 590 SubsequenceMatchVampPlugin::process(const float *const *inputBuffers,
Chris@236 591 Vamp::RealTime timestamp)
Chris@236 592 {
Chris@237 593 if (m_featureExtractors.empty()) {
Chris@237 594 cerr << "SubsequenceMatchVampPlugin::process: Plugin has not been (properly?) initialised" << endl;
Chris@237 595 return {};
Chris@237 596 }
Chris@237 597
Chris@237 598 if (m_features[0].empty()) {
Chris@237 599 m_startTime = timestamp;
Chris@237 600 }
Chris@236 601
Chris@237 602 for (size_t c = 0; c < m_featureExtractors.size(); ++c) {
Chris@237 603 m_features[c].push_back(m_featureExtractors[c].process
Chris@237 604 (inputBuffers[c]));
Chris@237 605 }
Chris@237 606
Chris@237 607 return {};
Chris@237 608 }
Chris@237 609
Chris@246 610 size_t
Chris@246 611 SubsequenceMatchVampPlugin::findNonEmptyLength(const featureseq_t &ff)
Chris@237 612 {
Chris@246 613 bool haveNonEmpty = false;
Chris@237 614 size_t lastNonEmpty = 0;
Chris@237 615 for (size_t i = ff.size(); i > 0; ) {
Chris@237 616 --i;
Chris@237 617 if (MatchPipeline::isAboveEndingThreshold(ff[i])) {
Chris@246 618 haveNonEmpty = true;
Chris@237 619 lastNonEmpty = i;
Chris@237 620 break;
Chris@237 621 }
Chris@237 622 }
Chris@246 623 if (haveNonEmpty) {
Chris@246 624 return lastNonEmpty + 1;
Chris@246 625 } else {
Chris@246 626 return 0;
Chris@246 627 }
Chris@246 628 }
Chris@246 629
Chris@246 630 featureseq_t
Chris@246 631 SubsequenceMatchVampPlugin::downsample(const featureseq_t &ff,
Chris@246 632 size_t inLength)
Chris@246 633 {
Chris@246 634 if (ff.empty()) {
Chris@246 635 return ff;
Chris@246 636 }
Chris@237 637
Chris@237 638 FeatureConditioner::Parameters fcParams(m_fcParams);
Chris@237 639 fcParams.order = FeatureConditioner::OutputFeatures; // not the difference
Chris@237 640 FeatureConditioner fc(fcParams);
Chris@237 641
Chris@237 642 int featureSize = m_featureExtractors[0].getFeatureSize();
Chris@237 643
Chris@237 644 featureseq_t d;
Chris@237 645
Chris@237 646 size_t i = 0;
Chris@246 647 while (i < inLength) {
Chris@237 648 feature_t acc(featureSize, 0);
Chris@237 649 int j = 0;
Chris@237 650 while (j < m_coarseDownsample) {
Chris@237 651 if (i >= ff.size()) break;
Chris@237 652 feature_t feature = fc.process(ff[i]);
Chris@246 653 if (m_downsamplePeaks) {
Chris@246 654 for (int k = 0; k < featureSize; ++k) {
Chris@246 655 if (feature[k] > acc[k]) {
Chris@246 656 acc[k] = feature[k];
Chris@246 657 }
Chris@246 658 }
Chris@246 659 } else {
Chris@246 660 for (int k = 0; k < featureSize; ++k) {
Chris@246 661 acc[k] += feature[k];
Chris@246 662 }
Chris@237 663 }
Chris@237 664 ++i;
Chris@237 665 ++j;
Chris@237 666 }
Chris@246 667 if (!m_downsamplePeaks && j > 0) {
Chris@237 668 for (int k = 0; k < featureSize; ++k) {
Chris@237 669 acc[k] /= float(j);
Chris@237 670 }
Chris@237 671 }
Chris@237 672 d.push_back(acc);
Chris@237 673 }
Chris@237 674
Chris@237 675 return d;
Chris@236 676 }
Chris@236 677
Chris@236 678 SubsequenceMatchVampPlugin::FeatureSet
Chris@236 679 SubsequenceMatchVampPlugin::getRemainingFeatures()
Chris@236 680 {
Chris@237 681 if (m_featureExtractors.empty()) {
Chris@237 682 cerr << "SubsequenceMatchVampPlugin::getRemainingFeatures: Plugin has not been (properly?) initialised" << endl;
Chris@237 683 return {};
Chris@237 684 }
Chris@237 685
Chris@236 686 #ifdef _WIN32
Chris@237 687 static HANDLE mutex;
Chris@236 688 #else
Chris@237 689 static pthread_mutex_t mutex;
Chris@236 690 #endif
Chris@237 691 static bool mutexInitialised = false;
Chris@236 692
Chris@236 693 if (m_serialise) {
Chris@237 694 if (!mutexInitialised) {
Chris@237 695 #ifdef _WIN32
Chris@237 696 mutex = CreateMutex(NULL, FALSE, NULL);
Chris@237 697 #else
Chris@237 698 pthread_mutex_init(&mutex, 0);
Chris@237 699 #endif
Chris@237 700 mutexInitialised = true;
Chris@237 701 }
Chris@236 702 #ifdef _WIN32
Chris@236 703 WaitForSingleObject(mutex, INFINITE);
Chris@236 704 #else
Chris@236 705 pthread_mutex_lock(&mutex);
Chris@236 706 #endif
Chris@236 707 }
Chris@236 708
Chris@240 709 FeatureSet returnFeatures = performAlignment();
Chris@240 710
Chris@240 711 if (m_serialise) {
Chris@240 712 #ifdef _WIN32
Chris@240 713 ReleaseMutex(mutex);
Chris@240 714 #else
Chris@240 715 pthread_mutex_unlock(&mutex);
Chris@240 716 #endif
Chris@240 717 }
Chris@240 718
Chris@240 719 return returnFeatures;
Chris@240 720 }
Chris@240 721
Chris@240 722 SubsequenceMatchVampPlugin::FeatureSet
Chris@240 723 SubsequenceMatchVampPlugin::performAlignment()
Chris@240 724 {
Chris@246 725 size_t refLength = findNonEmptyLength(m_features[0]);
Chris@246 726 featureseq_t downsampledRef = downsample(m_features[0], refLength);
Chris@237 727
Chris@246 728 #ifdef DEBUG_SUBSEQUENCE_MATCH
Chris@246 729 cerr << "SubsequenceMatchVampPlugin: reference downsampled sequence length = " << downsampledRef.size() << " (from " << refLength << " non-empty of " << m_features[0].size() << " total)" << endl;
Chris@246 730 #endif
Chris@237 731
Chris@243 732 FullDTW dtw(m_fdParams, m_dParams);
Chris@237 733
Chris@236 734 FeatureSet returnFeatures;
Chris@237 735 int featureSize = m_featureExtractors[0].getFeatureSize();
Chris@236 736
Chris@237 737 int rate = int(m_inputSampleRate + 0.5);
Chris@237 738
Chris@237 739 for (size_t c = 1; c < m_channelCount; ++c) {
Chris@237 740
Chris@246 741 size_t otherLength = findNonEmptyLength(m_features[c]);
Chris@246 742 featureseq_t downsampledOther = downsample(m_features[c], otherLength);
Chris@237 743
Chris@246 744 #ifdef DEBUG_SUBSEQUENCE_MATCH
Chris@246 745 cerr << "SubsequenceMatchVampPlugin: other downsampled sequence length = " << downsampledOther.size() << " (from " << otherLength << " non-empty of " << m_features[c].size() << " total)" << endl;
Chris@246 746 #endif
Chris@237 747
Chris@237 748 vector<size_t> subsequenceAlignment = dtw.align(downsampledRef,
Chris@237 749 downsampledOther);
Chris@237 750
Chris@237 751 if (subsequenceAlignment.empty()) {
Chris@237 752 cerr << "No subsequenceAlignment??" << endl;
Chris@237 753 continue;
Chris@237 754 }
Chris@237 755
Chris@237 756 int64_t first = subsequenceAlignment[0];
Chris@237 757 int64_t last = subsequenceAlignment[subsequenceAlignment.size()-1];
Chris@237 758
Chris@246 759 #ifdef DEBUG_SUBSEQUENCE_MATCH
Chris@246 760 cerr << "Subsequence alignment maps 0 -> " << subsequenceAlignment.size()-1 << " to " << first << " -> " << last << endl;
Chris@246 761 #endif
Chris@237 762
Chris@237 763 if (last <= first) {
Chris@241 764 cerr << "NOTE: Invalid span (" << first << " to " << last
Chris@241 765 << "), reverting to aligning against whole of reference"
Chris@241 766 << endl;
Chris@241 767 first = 0;
Chris@241 768 last = downsampledRef.size() - 1;
Chris@241 769 } else if (first < 0 || last >= long(downsampledRef.size())) {
Chris@241 770 cerr << "NOTE: Span end points (" << first << " to "
Chris@241 771 << last << ") out of range (0 to " << downsampledRef.size()-1
Chris@241 772 << "), reverting to aligning against whole of reference"
Chris@241 773 << endl;
Chris@241 774 first = 0;
Chris@241 775 last = downsampledRef.size() - 1;
Chris@237 776 }
Chris@237 777
Chris@237 778 Feature span;
Chris@237 779 span.hasTimestamp = true;
Chris@237 780 span.timestamp = Vamp::RealTime::frame2RealTime
Chris@237 781 (first * m_coarseDownsample * m_stepSize, rate);
Chris@237 782 span.hasDuration = true;
Chris@237 783 span.duration = Vamp::RealTime::frame2RealTime
Chris@237 784 ((last - first) * m_coarseDownsample * m_stepSize, rate);
Chris@237 785 returnFeatures[m_spanOutNo].push_back(span);
Chris@237 786
Chris@241 787 size_t firstAtOriginalRate = first * m_coarseDownsample;
Chris@241 788 size_t lastAtOriginalRate = (last + 1) * m_coarseDownsample;
Chris@241 789
Chris@241 790 if (lastAtOriginalRate >= m_features[0].size()) {
Chris@241 791 lastAtOriginalRate = m_features[0].size() - 1;
Chris@241 792 }
Chris@241 793
Chris@237 794 featureseq_t referenceSubsequence
Chris@241 795 (m_features[0].begin() + firstAtOriginalRate,
Chris@241 796 m_features[0].begin() + lastAtOriginalRate);
Chris@237 797
Chris@246 798 #ifdef DEBUG_SUBSEQUENCE_MATCH
Chris@246 799 cerr << "Reference subsequence length = " << referenceSubsequence.size()
Chris@246 800 << endl;
Chris@246 801 cerr << "Other sequence length = " << otherLength << endl;
Chris@246 802 #endif
Chris@246 803
Chris@237 804 MatchPipeline pipeline(m_feParams,
Chris@237 805 m_fcParams,
Chris@237 806 m_dParams,
Chris@237 807 m_params,
Chris@237 808 m_secondReferenceFrequency);
Chris@237 809
Chris@246 810 size_t sequenceLength = std::max(referenceSubsequence.size(),
Chris@246 811 otherLength);
Chris@246 812
Chris@246 813 #ifdef DEBUG_SUBSEQUENCE_MATCH
Chris@246 814 cerr << "MATCH input sequences have length " << sequenceLength << endl;
Chris@246 815 #endif
Chris@246 816
Chris@246 817 for (size_t i = 0; i < sequenceLength; ++i) {
Chris@237 818 feature_t f1(featureSize, 0);
Chris@237 819 feature_t f2(featureSize, 0);
Chris@237 820 if (i < referenceSubsequence.size()) {
Chris@237 821 f1 = referenceSubsequence[i];
Chris@237 822 }
Chris@246 823 if (i < otherLength) {
Chris@237 824 f2 = m_features[c][i];
Chris@237 825 }
Chris@237 826 pipeline.feedFeatures(f1, f2);
Chris@237 827 }
Chris@237 828
Chris@237 829 pipeline.finish();
Chris@237 830
Chris@237 831 vector<int> pathx;
Chris@237 832 vector<int> pathy;
Chris@237 833 int len = pipeline.retrievePath(m_smooth, pathx, pathy);
Chris@237 834
Chris@237 835 int prevy = 0;
Chris@246 836
Chris@246 837 #ifdef DEBUG_SUBSEQUENCE_MATCH
Chris@246 838 cerr << "MATCH path has length " << len;
Chris@246 839 if (len > 0) {
Chris@246 840 cerr << " and goes from ("
Chris@246 841 << pathx[0] << ", " << pathy[0] << ") to ("
Chris@246 842 << pathx[len-1] << ", " << pathy[len-1] << ")";
Chris@246 843 if (len > 2) {
Chris@246 844 cerr << " with penultimate point at ("
Chris@246 845 << pathx[len-2] << ", " << pathy[len-2] << ")";
Chris@246 846 }
Chris@246 847 cerr << endl;
Chris@246 848 } else {
Chris@246 849 cerr << endl;
Chris@246 850 }
Chris@246 851 #endif
Chris@237 852
Chris@237 853 for (int i = 0; i < len; ++i) {
Chris@237 854
Chris@237 855 int x = pathx[i];
Chris@237 856 int y = pathy[i] + int(first * m_coarseDownsample);
Chris@246 857
Chris@237 858 Vamp::RealTime xt = Vamp::RealTime::frame2RealTime
Chris@237 859 (x * m_stepSize, rate) + m_startTime;
Chris@237 860 Vamp::RealTime yt = Vamp::RealTime::frame2RealTime
Chris@237 861 (y * m_stepSize, rate) + m_startTime;
Chris@237 862
Chris@237 863 Feature feature;
Chris@237 864 feature.hasTimestamp = true;
Chris@237 865 feature.timestamp = xt;
Chris@237 866 feature.values.clear();
Chris@237 867 feature.values.push_back(float(yt.sec + double(yt.nsec)/1.0e9));
Chris@237 868 returnFeatures[m_pathOutNo].push_back(feature);
Chris@237 869
Chris@237 870 if (y != prevy) {
Chris@237 871 feature.hasTimestamp = true;
Chris@237 872 feature.timestamp = yt;
Chris@237 873 feature.values.clear();
Chris@237 874 feature.values.push_back(float(xt.sec + xt.msec()/1000.0));
Chris@237 875 returnFeatures[m_baOutNo].push_back(feature);
Chris@237 876 }
Chris@237 877
Chris@237 878 prevy = y;
Chris@237 879 }
Chris@237 880 }
Chris@236 881
Chris@246 882 #ifdef DEBUG_SUBSEQUENCE_MATCH
Chris@246 883 cerr << endl;
Chris@246 884 #endif
Chris@246 885
Chris@236 886 return returnFeatures;
Chris@236 887 }