Chris@236: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@236: Chris@236: /* Chris@236: Vamp feature extraction plugin using the MATCH audio alignment Chris@236: algorithm. Chris@236: Chris@236: Centre for Digital Music, Queen Mary, University of London. Chris@236: Copyright (c) 2007-2020 Simon Dixon, Chris Cannam, and Queen Mary Chris@236: University of London, Copyright (c) 2014-2015 Tido GmbH. Chris@236: Chris@236: This program is free software; you can redistribute it and/or Chris@236: modify it under the terms of the GNU General Public License as Chris@236: published by the Free Software Foundation; either version 2 of the Chris@236: License, or (at your option) any later version. See the file Chris@236: COPYING included with this distribution for more information. Chris@236: */ Chris@236: Chris@236: #include "SubsequenceMatchVampPlugin.h" Chris@237: #include "FullDTW.h" Chris@237: #include "MatchPipeline.h" Chris@236: Chris@236: #include Chris@236: #include Chris@236: Chris@236: #include Chris@236: #include Chris@236: Chris@236: using std::string; Chris@237: using std::vector; Chris@237: using std::cerr; Chris@237: using std::cout; Chris@237: using std::endl; Chris@236: Chris@246: //#define DEBUG_SUBSEQUENCE_MATCH 1 Chris@246: Chris@236: // We want to ensure our freq map / crossover bin are always valid Chris@236: // with a fixed FFT length in seconds, so must reject low sample rates Chris@236: static float sampleRateMin = 5000.f; Chris@236: Chris@236: static float defaultStepTime = 0.020f; Chris@241: static int defaultCoarseDownsample = 50; Chris@243: static double defaultAnchoredDiagonalWeight = 2.0; Chris@243: static double defaultSubsequenceDiagonalWeight = 0.75; Chris@241: Chris@236: SubsequenceMatchVampPlugin::SubsequenceMatchVampPlugin(float inputSampleRate) : Chris@236: Plugin(inputSampleRate), Chris@236: m_stepSize(int(inputSampleRate * defaultStepTime + 0.001)), Chris@236: m_stepTime(defaultStepTime), Chris@236: m_blockSize(2048), Chris@241: m_coarseDownsample(defaultCoarseDownsample), Chris@246: m_downsamplePeaks(false), Chris@236: m_serialise(false), Chris@236: m_smooth(false), Chris@237: m_channelCount(0), Chris@236: m_params(defaultStepTime), Chris@236: m_defaultParams(defaultStepTime), Chris@236: m_feParams(inputSampleRate), Chris@236: m_defaultFeParams(44100), // parameter descriptors can't depend on samplerate Chris@243: m_secondReferenceFrequency(m_defaultFeParams.referenceFrequency), // must be declared/initialised after m_defaultFeParams Chris@236: m_fcParams(), Chris@236: m_defaultFcParams(), Chris@236: m_dParams(), Chris@243: m_defaultDParams(), Chris@243: m_fdParams(defaultStepTime), Chris@243: m_defaultFdParams(defaultStepTime) Chris@236: { Chris@243: // for the coarse subsequence span aligner: Chris@243: m_fdParams.diagonalWeight = m_defaultFdParams.diagonalWeight = Chris@243: defaultSubsequenceDiagonalWeight; Chris@243: Chris@243: // for the MATCH phase following subsequence span identification: Chris@243: m_params.diagonalWeight = m_defaultParams.diagonalWeight = Chris@243: defaultAnchoredDiagonalWeight; Chris@243: Chris@243: // and of course Chris@243: m_fdParams.subsequence = m_defaultFdParams.subsequence = true; Chris@243: Chris@236: if (inputSampleRate < sampleRateMin) { Chris@237: cerr << "SubsequenceMatchVampPlugin::SubsequenceMatchVampPlugin: input sample rate " Chris@237: << inputSampleRate << " < min supported rate " Chris@237: << sampleRateMin << ", plugin will refuse to initialise" << endl; Chris@236: } Chris@236: } Chris@236: Chris@236: SubsequenceMatchVampPlugin::~SubsequenceMatchVampPlugin() Chris@236: { Chris@236: } Chris@236: Chris@236: string Chris@236: SubsequenceMatchVampPlugin::getIdentifier() const Chris@236: { Chris@237: return "match-subsequence"; Chris@236: } Chris@236: Chris@236: string Chris@236: SubsequenceMatchVampPlugin::getName() const Chris@236: { Chris@236: return "Match Subsequence Aligner"; Chris@236: } Chris@236: Chris@236: string Chris@236: SubsequenceMatchVampPlugin::getDescription() const Chris@236: { Chris@236: return "Calculate alignment between a reference performance and a performance known to represent only part of the same material"; Chris@236: } Chris@236: Chris@236: string Chris@236: SubsequenceMatchVampPlugin::getMaker() const Chris@236: { Chris@236: return "Simon Dixon (plugin by Chris Cannam)"; Chris@236: } Chris@236: Chris@236: int Chris@236: SubsequenceMatchVampPlugin::getPluginVersion() const Chris@236: { Chris@236: return 3; Chris@236: } Chris@236: Chris@236: string Chris@236: SubsequenceMatchVampPlugin::getCopyright() const Chris@236: { Chris@236: return "GPL"; Chris@236: } Chris@236: Chris@236: SubsequenceMatchVampPlugin::ParameterList Chris@236: SubsequenceMatchVampPlugin::getParameterDescriptors() const Chris@236: { Chris@236: ParameterList list; Chris@236: Chris@236: ParameterDescriptor desc; Chris@236: Chris@236: desc.identifier = "freq1"; Chris@236: desc.name = "Tuning frequency of first input"; Chris@241: desc.description = "Tuning frequency (concert A) for the reference audio"; Chris@236: desc.minValue = 220.0; Chris@236: desc.maxValue = 880.0; Chris@236: desc.defaultValue = float(m_defaultFeParams.referenceFrequency); Chris@236: desc.isQuantized = false; Chris@236: desc.unit = "Hz"; Chris@236: list.push_back(desc); Chris@236: Chris@236: desc.identifier = "freq2"; Chris@236: desc.name = "Tuning frequency of second input"; Chris@246: desc.description = "Tuning frequency (concert A) for the other audio."; Chris@236: desc.minValue = 220.0; Chris@236: desc.maxValue = 880.0; Chris@236: desc.defaultValue = float(m_defaultFeParams.referenceFrequency); Chris@236: desc.isQuantized = false; Chris@236: desc.unit = "Hz"; Chris@236: list.push_back(desc); Chris@236: Chris@236: desc.identifier = "minfreq"; Chris@236: desc.name = "Minimum frequency"; Chris@241: desc.description = "Minimum frequency to include in features"; Chris@236: desc.minValue = 0.0; Chris@236: desc.maxValue = float(m_inputSampleRate / 4.f); Chris@236: desc.defaultValue = float(m_defaultFeParams.minFrequency); Chris@236: desc.isQuantized = false; Chris@236: desc.unit = "Hz"; Chris@236: list.push_back(desc); Chris@236: Chris@236: desc.identifier = "maxfreq"; Chris@236: desc.name = "Maximum frequency"; Chris@241: desc.description = "Maximum frequency to include in features"; Chris@236: desc.minValue = 1000.0; Chris@236: desc.maxValue = float(m_inputSampleRate / 2.f); Chris@236: desc.defaultValue = float(m_defaultFeParams.maxFrequency); Chris@236: desc.isQuantized = false; Chris@236: desc.unit = "Hz"; Chris@236: list.push_back(desc); Chris@236: Chris@236: desc.unit = ""; Chris@241: Chris@241: desc.identifier = "coarsedownsample"; Chris@241: desc.name = "Coarse alignment downsample factor"; Chris@241: desc.description = "Downsample factor for features used in first coarse subsequence-alignment step"; Chris@241: desc.minValue = 1; Chris@241: desc.maxValue = 200; Chris@241: desc.defaultValue = float(defaultCoarseDownsample); Chris@241: desc.isQuantized = true; Chris@241: desc.quantizeStep = 1; Chris@241: list.push_back(desc); Chris@236: Chris@246: desc.identifier = "downsamplemethod"; Chris@246: desc.name = "Coarse alignment downsample method"; Chris@246: desc.description = "Downsample method for features used in first coarse subsequence-alignment step"; Chris@246: desc.minValue = 0; Chris@246: desc.maxValue = 1; Chris@246: desc.defaultValue = 0; Chris@246: desc.isQuantized = true; Chris@246: desc.quantizeStep = 1; Chris@246: desc.valueNames.clear(); Chris@246: desc.valueNames.push_back("Average"); Chris@246: desc.valueNames.push_back("Peak"); Chris@246: list.push_back(desc); Chris@246: Chris@236: desc.identifier = "usechroma"; Chris@236: desc.name = "Feature type"; Chris@236: desc.description = "Whether to use warped spectrogram or chroma frequency map"; Chris@236: desc.minValue = 0; Chris@236: desc.maxValue = 1; Chris@236: desc.defaultValue = m_defaultFeParams.useChromaFrequencyMap ? 1 : 0; Chris@236: desc.isQuantized = true; Chris@236: desc.quantizeStep = 1; Chris@236: desc.valueNames.clear(); Chris@236: desc.valueNames.push_back("Spectral"); Chris@236: desc.valueNames.push_back("Chroma"); Chris@236: list.push_back(desc); Chris@236: Chris@236: desc.valueNames.clear(); Chris@236: Chris@236: desc.identifier = "usespecdiff"; Chris@236: desc.name = "Use feature difference"; Chris@246: desc.description = "Whether to use half-wave rectified feature-to-feature difference instead of straight spectral or chroma feature (does not apply to downsampled features)"; Chris@236: desc.minValue = 0; Chris@236: desc.maxValue = 1; Chris@236: desc.defaultValue = float(m_defaultFcParams.order); Chris@236: desc.isQuantized = true; Chris@236: desc.quantizeStep = 1; Chris@236: list.push_back(desc); Chris@236: Chris@236: desc.identifier = "framenorm"; Chris@236: desc.name = "Frame normalisation"; Chris@236: desc.description = "Type of normalisation to use for features"; Chris@236: desc.minValue = 0; Chris@236: desc.maxValue = 2; Chris@236: desc.defaultValue = float(m_defaultFcParams.norm); Chris@236: desc.isQuantized = true; Chris@236: desc.quantizeStep = 1; Chris@236: desc.valueNames.clear(); Chris@236: desc.valueNames.push_back("None"); Chris@236: desc.valueNames.push_back("Sum to 1"); Chris@236: desc.valueNames.push_back("Long-term average"); Chris@236: list.push_back(desc); Chris@236: desc.valueNames.clear(); Chris@236: desc.defaultValue = float(m_defaultFcParams.silenceThreshold); Chris@236: Chris@236: desc.identifier = "metric"; Chris@236: desc.name = "Distance metric"; Chris@241: desc.description = "Metric for distance calculations"; Chris@236: desc.minValue = 0; Chris@236: desc.maxValue = 2; Chris@236: desc.defaultValue = float(m_defaultDParams.metric); Chris@236: desc.isQuantized = true; Chris@236: desc.quantizeStep = 1; Chris@236: desc.valueNames.clear(); Chris@236: desc.valueNames.push_back("Manhattan"); Chris@236: desc.valueNames.push_back("Euclidean"); Chris@236: desc.valueNames.push_back("Cosine"); Chris@236: list.push_back(desc); Chris@236: desc.valueNames.clear(); Chris@236: Chris@236: desc.identifier = "distnorm"; Chris@236: desc.name = "Distance normalisation"; Chris@236: desc.description = "Type of normalisation to use for distance metric"; Chris@236: desc.minValue = 0; Chris@236: desc.maxValue = 2; Chris@236: desc.defaultValue = float(m_defaultDParams.norm); Chris@236: desc.isQuantized = true; Chris@236: desc.quantizeStep = 1; Chris@236: desc.valueNames.clear(); Chris@236: desc.valueNames.push_back("None"); Chris@236: desc.valueNames.push_back("Sum of frames"); Chris@236: desc.valueNames.push_back("Log sum of frames"); Chris@236: list.push_back(desc); Chris@236: desc.valueNames.clear(); Chris@236: Chris@236: #ifdef USE_COMPACT_TYPES Chris@236: desc.identifier = "scale"; Chris@236: desc.name = "Distance scale"; Chris@236: desc.description = "Scale factor to use when mapping distance metric into byte range for storage"; Chris@236: desc.minValue = 1; Chris@236: desc.maxValue = 1000; Chris@236: desc.defaultValue = float(m_defaultDParams.scale); Chris@236: desc.isQuantized = false; Chris@236: list.push_back(desc); Chris@236: #endif Chris@236: Chris@236: desc.identifier = "silencethreshold"; Chris@236: desc.name = "Silence threshold"; Chris@236: desc.description = "Total frame energy threshold below which a feature will be regarded as silent"; Chris@236: desc.minValue = 0; Chris@236: desc.maxValue = 0.1f; Chris@236: desc.defaultValue = float(m_defaultFcParams.silenceThreshold); Chris@236: desc.isQuantized = false; Chris@236: list.push_back(desc); Chris@236: Chris@236: desc.identifier = "noise"; Chris@236: desc.name = "Add noise"; Chris@241: desc.description = "Whether to mix in a small constant white noise term when calculating feature distance. This can improve alignment against sources containing cleanly synthesised audio"; Chris@236: desc.minValue = 0; Chris@236: desc.maxValue = 1; Chris@236: desc.defaultValue = float(m_defaultDParams.noise); Chris@236: desc.isQuantized = true; Chris@236: desc.quantizeStep = 1; Chris@236: list.push_back(desc); Chris@236: Chris@236: desc.identifier = "gradientlimit"; Chris@236: desc.name = "Gradient limit"; Chris@236: desc.description = "Limit of number of frames that will be accepted from one source without a frame from the other source being accepted"; Chris@236: desc.minValue = 1; Chris@236: desc.maxValue = 10; Chris@236: desc.defaultValue = float(m_defaultParams.maxRunCount); Chris@236: desc.isQuantized = true; Chris@236: desc.quantizeStep = 1; Chris@236: list.push_back(desc); Chris@236: Chris@236: desc.identifier = "zonewidth"; Chris@236: desc.name = "Search zone width"; Chris@236: desc.description = "Width of the search zone (error margin) either side of the ongoing match position, in seconds"; Chris@236: desc.minValue = 1; Chris@236: desc.maxValue = 60; Chris@236: desc.defaultValue = float(m_defaultParams.blockTime); Chris@236: desc.isQuantized = true; Chris@236: desc.quantizeStep = 1; Chris@236: desc.unit = "s"; Chris@236: list.push_back(desc); Chris@236: Chris@236: desc.identifier = "diagonalweight"; Chris@243: desc.name = "Diagonal weight, anchored"; Chris@243: desc.description = "Weight applied to cost of diagonal step relative to horizontal or vertical step, during the anchored (non-subsequence) alignment step"; Chris@243: desc.minValue = 0.5; Chris@236: desc.maxValue = 2.0; Chris@236: desc.defaultValue = float(m_defaultParams.diagonalWeight); Chris@236: desc.isQuantized = false; Chris@236: desc.unit = ""; Chris@236: list.push_back(desc); Chris@243: Chris@243: desc.identifier = "diagonalweightsubsequence"; Chris@243: desc.name = "Diagonal weight, subsequence"; Chris@243: desc.description = "Weight applied to cost of diagonal step relative to horizontal or vertical step, during the coarse subsequence alignment step"; Chris@243: desc.minValue = 0.5; Chris@243: desc.maxValue = 2.0; Chris@243: desc.defaultValue = float(m_defaultFdParams.diagonalWeight); Chris@243: desc.isQuantized = false; Chris@243: desc.unit = ""; Chris@243: list.push_back(desc); Chris@236: Chris@236: desc.identifier = "smooth"; Chris@236: desc.name = "Use path smoothing"; Chris@236: desc.description = "Smooth the path by replacing steps with diagonals. (This was enabled by default in earlier versions of the MATCH plugin, but the default now is to produce an un-smoothed path.)"; Chris@236: desc.minValue = 0; Chris@236: desc.maxValue = 1; Chris@236: desc.defaultValue = 0; Chris@236: desc.isQuantized = true; Chris@236: desc.quantizeStep = 1; Chris@236: desc.unit = ""; Chris@236: list.push_back(desc); Chris@236: Chris@236: desc.identifier = "serialise"; Chris@236: desc.name = "Serialise plugin invocations"; Chris@236: desc.description = "Reduce potential memory load at the expense of multiprocessor performance by serialising multi-threaded plugin runs"; Chris@236: desc.minValue = 0; Chris@236: desc.maxValue = 1; Chris@236: desc.defaultValue = 0; Chris@236: desc.isQuantized = true; Chris@236: desc.quantizeStep = 1; Chris@236: list.push_back(desc); Chris@236: Chris@236: return list; Chris@236: } Chris@236: Chris@236: float Chris@236: SubsequenceMatchVampPlugin::getParameter(std::string name) const Chris@236: { Chris@236: if (name == "serialise") { Chris@236: return m_serialise ? 1.0 : 0.0; Chris@236: } else if (name == "framenorm") { Chris@236: return float(m_fcParams.norm); Chris@236: } else if (name == "distnorm") { Chris@236: return float(m_dParams.norm); Chris@236: } else if (name == "usespecdiff") { Chris@236: return float(m_fcParams.order); Chris@236: } else if (name == "usechroma") { Chris@236: return m_feParams.useChromaFrequencyMap ? 1.0 : 0.0; Chris@236: } else if (name == "gradientlimit") { Chris@236: return float(m_params.maxRunCount); Chris@236: } else if (name == "diagonalweight") { Chris@236: return float(m_params.diagonalWeight); Chris@243: } else if (name == "diagonalweightsubsequence") { Chris@243: return float(m_fdParams.diagonalWeight); Chris@236: } else if (name == "zonewidth") { Chris@236: return float(m_params.blockTime); Chris@236: } else if (name == "smooth") { Chris@236: return m_smooth ? 1.0 : 0.0; Chris@236: } else if (name == "silencethreshold") { Chris@236: return float(m_fcParams.silenceThreshold); Chris@236: } else if (name == "metric") { Chris@236: return float(m_dParams.metric); Chris@236: } else if (name == "noise") { Chris@236: return m_dParams.noise; Chris@236: } else if (name == "scale") { Chris@236: return float(m_dParams.scale); Chris@236: } else if (name == "freq1") { Chris@236: return float(m_feParams.referenceFrequency); Chris@236: } else if (name == "freq2") { Chris@236: return float(m_secondReferenceFrequency); Chris@236: } else if (name == "minfreq") { Chris@236: return float(m_feParams.minFrequency); Chris@236: } else if (name == "maxfreq") { Chris@236: return float(m_feParams.maxFrequency); Chris@241: } else if (name == "coarsedownsample") { Chris@241: return float(m_coarseDownsample); Chris@246: } else if (name == "downsamplemethod") { Chris@246: return m_downsamplePeaks ? 1.0 : 0.0; Chris@236: } Chris@236: Chris@236: return 0.0; Chris@236: } Chris@236: Chris@236: void Chris@236: SubsequenceMatchVampPlugin::setParameter(std::string name, float value) Chris@236: { Chris@236: if (name == "serialise") { Chris@236: m_serialise = (value > 0.5); Chris@236: } else if (name == "framenorm") { Chris@236: m_fcParams.norm = FeatureConditioner::Normalisation(int(value + 0.1)); Chris@236: } else if (name == "distnorm") { Chris@236: m_dParams.norm = DistanceMetric::DistanceNormalisation(int(value + 0.1)); Chris@236: } else if (name == "usespecdiff") { Chris@236: m_fcParams.order = FeatureConditioner::OutputOrder(int(value + 0.1)); Chris@236: } else if (name == "usechroma") { Chris@236: m_feParams.useChromaFrequencyMap = (value > 0.5); Chris@236: } else if (name == "gradientlimit") { Chris@236: m_params.maxRunCount = int(value + 0.1); Chris@236: } else if (name == "diagonalweight") { Chris@236: m_params.diagonalWeight = value; Chris@243: } else if (name == "diagonalweightsubsequence") { Chris@243: m_fdParams.diagonalWeight = value; Chris@236: } else if (name == "zonewidth") { Chris@236: m_params.blockTime = value; Chris@236: } else if (name == "smooth") { Chris@236: m_smooth = (value > 0.5); Chris@236: } else if (name == "silencethreshold") { Chris@236: m_fcParams.silenceThreshold = value; Chris@236: } else if (name == "metric") { Chris@236: m_dParams.metric = DistanceMetric::Metric(int(value + 0.1)); Chris@236: } else if (name == "noise") { Chris@236: m_dParams.noise = DistanceMetric::NoiseAddition(int(value + 0.1)); Chris@236: } else if (name == "scale") { Chris@236: m_dParams.scale = value; Chris@236: } else if (name == "freq1") { Chris@236: m_feParams.referenceFrequency = value; Chris@236: } else if (name == "freq2") { Chris@236: m_secondReferenceFrequency = value; Chris@236: } else if (name == "minfreq") { Chris@236: m_feParams.minFrequency = value; Chris@236: } else if (name == "maxfreq") { Chris@236: m_feParams.maxFrequency = value; Chris@241: } else if (name == "coarsedownsample") { Chris@241: m_coarseDownsample = int(value + 0.1); Chris@246: } else if (name == "downsamplemethod") { Chris@246: m_downsamplePeaks = (value > 0.5); Chris@236: } Chris@236: } Chris@236: Chris@246: SubsequenceMatchVampPlugin::InputDomain Chris@246: SubsequenceMatchVampPlugin::getInputDomain() const Chris@246: { Chris@246: return FrequencyDomain; Chris@246: } Chris@246: Chris@236: size_t Chris@236: SubsequenceMatchVampPlugin::getPreferredStepSize() const Chris@236: { Chris@236: return int(m_inputSampleRate * defaultStepTime + 0.001); Chris@236: } Chris@236: Chris@236: size_t Chris@236: SubsequenceMatchVampPlugin::getPreferredBlockSize() const Chris@236: { Chris@236: return m_defaultFeParams.fftSize; Chris@236: } Chris@236: Chris@246: size_t Chris@246: SubsequenceMatchVampPlugin::getMinChannelCount() const Chris@246: { Chris@246: return 2; Chris@246: } Chris@246: Chris@246: size_t Chris@246: SubsequenceMatchVampPlugin::getMaxChannelCount() const Chris@246: { Chris@246: return 2; Chris@246: } Chris@246: Chris@236: bool Chris@236: SubsequenceMatchVampPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize) Chris@236: { Chris@236: if (m_inputSampleRate < sampleRateMin) { Chris@237: cerr << "SubsequenceMatchVampPlugin::SubsequenceMatchVampPlugin: input sample rate " Chris@237: << m_inputSampleRate << " < min supported rate " Chris@237: << sampleRateMin << endl; Chris@236: return false; Chris@236: } Chris@236: if (channels < getMinChannelCount() || Chris@237: channels > getMaxChannelCount()) { Chris@237: return false; Chris@237: } Chris@236: if (stepSize > blockSize/2 || Chris@237: blockSize != getPreferredBlockSize()) { Chris@237: return false; Chris@237: } Chris@236: Chris@236: m_stepSize = int(stepSize); Chris@236: m_stepTime = float(stepSize) / m_inputSampleRate; Chris@236: m_blockSize = int(blockSize); Chris@236: Chris@236: m_params.hopTime = m_stepTime; Chris@236: m_feParams.fftSize = m_blockSize; Chris@236: Chris@237: m_channelCount = channels; Chris@237: Chris@237: reset(); Chris@237: Chris@236: return true; Chris@236: } Chris@236: Chris@236: void Chris@236: SubsequenceMatchVampPlugin::reset() Chris@236: { Chris@237: m_featureExtractors.clear(); Chris@237: m_features.clear(); Chris@237: m_startTime = Vamp::RealTime::zeroTime; Chris@237: Chris@237: FeatureExtractor::Parameters feParams(m_feParams); Chris@237: Chris@237: for (size_t c = 0; c < m_channelCount; ++c) { Chris@239: if (c > 0 && m_secondReferenceFrequency != 0.0) { Chris@237: feParams.referenceFrequency = m_secondReferenceFrequency; Chris@237: } Chris@237: m_featureExtractors.push_back(FeatureExtractor(feParams)); Chris@237: m_features.push_back(featureseq_t()); Chris@237: } Chris@236: } Chris@236: Chris@236: SubsequenceMatchVampPlugin::OutputList Chris@236: SubsequenceMatchVampPlugin::getOutputDescriptors() const Chris@236: { Chris@236: OutputList list; Chris@236: Chris@236: float outRate = 1.0f / m_stepTime; Chris@236: Chris@236: OutputDescriptor desc; Chris@236: desc.identifier = "path"; Chris@236: desc.name = "Path"; Chris@236: desc.description = "Alignment path"; Chris@236: desc.unit = ""; Chris@236: desc.hasFixedBinCount = true; Chris@236: desc.binCount = 1; Chris@236: desc.hasKnownExtents = false; Chris@236: desc.isQuantized = true; Chris@236: desc.quantizeStep = 1; Chris@236: desc.sampleType = OutputDescriptor::VariableSampleRate; Chris@236: desc.sampleRate = outRate; Chris@236: m_pathOutNo = int(list.size()); Chris@236: list.push_back(desc); Chris@236: Chris@236: desc.identifier = "b_a"; Chris@236: desc.name = "B-A Timeline"; Chris@236: desc.description = "Timing in performance A corresponding to moments in performance B"; Chris@236: desc.unit = "sec"; Chris@236: desc.hasFixedBinCount = true; Chris@236: desc.binCount = 1; Chris@236: desc.hasKnownExtents = false; Chris@236: desc.isQuantized = false; Chris@236: desc.sampleType = OutputDescriptor::VariableSampleRate; Chris@236: desc.sampleRate = outRate; Chris@236: m_baOutNo = int(list.size()); Chris@236: list.push_back(desc); Chris@237: Chris@237: desc.identifier = "span"; Chris@237: desc.name = "Subsequence Span"; Chris@237: desc.description = "Region in performance A corresponding to the whole of performance B"; Chris@237: desc.unit = ""; Chris@237: desc.hasFixedBinCount = true; Chris@237: desc.binCount = 0; Chris@237: desc.hasKnownExtents = false; Chris@237: desc.isQuantized = false; Chris@237: desc.sampleType = OutputDescriptor::VariableSampleRate; Chris@237: desc.sampleRate = outRate; Chris@237: desc.hasDuration = true; Chris@237: m_spanOutNo = int(list.size()); Chris@237: list.push_back(desc); Chris@236: Chris@236: return list; Chris@236: } Chris@236: Chris@236: SubsequenceMatchVampPlugin::FeatureSet Chris@236: SubsequenceMatchVampPlugin::process(const float *const *inputBuffers, Chris@236: Vamp::RealTime timestamp) Chris@236: { Chris@237: if (m_featureExtractors.empty()) { Chris@237: cerr << "SubsequenceMatchVampPlugin::process: Plugin has not been (properly?) initialised" << endl; Chris@237: return {}; Chris@237: } Chris@237: Chris@237: if (m_features[0].empty()) { Chris@237: m_startTime = timestamp; Chris@237: } Chris@236: Chris@237: for (size_t c = 0; c < m_featureExtractors.size(); ++c) { Chris@237: m_features[c].push_back(m_featureExtractors[c].process Chris@237: (inputBuffers[c])); Chris@237: } Chris@237: Chris@237: return {}; Chris@237: } Chris@237: Chris@246: size_t Chris@246: SubsequenceMatchVampPlugin::findNonEmptyLength(const featureseq_t &ff) Chris@237: { Chris@246: bool haveNonEmpty = false; Chris@237: size_t lastNonEmpty = 0; Chris@237: for (size_t i = ff.size(); i > 0; ) { Chris@237: --i; Chris@237: if (MatchPipeline::isAboveEndingThreshold(ff[i])) { Chris@246: haveNonEmpty = true; Chris@237: lastNonEmpty = i; Chris@237: break; Chris@237: } Chris@237: } Chris@246: if (haveNonEmpty) { Chris@246: return lastNonEmpty + 1; Chris@246: } else { Chris@246: return 0; Chris@246: } Chris@246: } Chris@246: Chris@246: featureseq_t Chris@246: SubsequenceMatchVampPlugin::downsample(const featureseq_t &ff, Chris@246: size_t inLength) Chris@246: { Chris@246: if (ff.empty()) { Chris@246: return ff; Chris@246: } Chris@237: Chris@237: FeatureConditioner::Parameters fcParams(m_fcParams); Chris@237: fcParams.order = FeatureConditioner::OutputFeatures; // not the difference Chris@237: FeatureConditioner fc(fcParams); Chris@237: Chris@237: int featureSize = m_featureExtractors[0].getFeatureSize(); Chris@237: Chris@237: featureseq_t d; Chris@237: Chris@237: size_t i = 0; Chris@246: while (i < inLength) { Chris@237: feature_t acc(featureSize, 0); Chris@237: int j = 0; Chris@237: while (j < m_coarseDownsample) { Chris@237: if (i >= ff.size()) break; Chris@237: feature_t feature = fc.process(ff[i]); Chris@246: if (m_downsamplePeaks) { Chris@246: for (int k = 0; k < featureSize; ++k) { Chris@246: if (feature[k] > acc[k]) { Chris@246: acc[k] = feature[k]; Chris@246: } Chris@246: } Chris@246: } else { Chris@246: for (int k = 0; k < featureSize; ++k) { Chris@246: acc[k] += feature[k]; Chris@246: } Chris@237: } Chris@237: ++i; Chris@237: ++j; Chris@237: } Chris@246: if (!m_downsamplePeaks && j > 0) { Chris@237: for (int k = 0; k < featureSize; ++k) { Chris@237: acc[k] /= float(j); Chris@237: } Chris@237: } Chris@237: d.push_back(acc); Chris@237: } Chris@237: Chris@237: return d; Chris@236: } Chris@236: Chris@236: SubsequenceMatchVampPlugin::FeatureSet Chris@236: SubsequenceMatchVampPlugin::getRemainingFeatures() Chris@236: { Chris@237: if (m_featureExtractors.empty()) { Chris@237: cerr << "SubsequenceMatchVampPlugin::getRemainingFeatures: Plugin has not been (properly?) initialised" << endl; Chris@237: return {}; Chris@237: } Chris@237: Chris@236: #ifdef _WIN32 Chris@237: static HANDLE mutex; Chris@236: #else Chris@237: static pthread_mutex_t mutex; Chris@236: #endif Chris@237: static bool mutexInitialised = false; Chris@236: Chris@236: if (m_serialise) { Chris@237: if (!mutexInitialised) { Chris@237: #ifdef _WIN32 Chris@237: mutex = CreateMutex(NULL, FALSE, NULL); Chris@237: #else Chris@237: pthread_mutex_init(&mutex, 0); Chris@237: #endif Chris@237: mutexInitialised = true; Chris@237: } Chris@236: #ifdef _WIN32 Chris@236: WaitForSingleObject(mutex, INFINITE); Chris@236: #else Chris@236: pthread_mutex_lock(&mutex); Chris@236: #endif Chris@236: } Chris@236: Chris@240: FeatureSet returnFeatures = performAlignment(); Chris@240: Chris@240: if (m_serialise) { Chris@240: #ifdef _WIN32 Chris@240: ReleaseMutex(mutex); Chris@240: #else Chris@240: pthread_mutex_unlock(&mutex); Chris@240: #endif Chris@240: } Chris@240: Chris@240: return returnFeatures; Chris@240: } Chris@240: Chris@240: SubsequenceMatchVampPlugin::FeatureSet Chris@240: SubsequenceMatchVampPlugin::performAlignment() Chris@240: { Chris@246: size_t refLength = findNonEmptyLength(m_features[0]); Chris@246: featureseq_t downsampledRef = downsample(m_features[0], refLength); Chris@237: Chris@246: #ifdef DEBUG_SUBSEQUENCE_MATCH Chris@246: cerr << "SubsequenceMatchVampPlugin: reference downsampled sequence length = " << downsampledRef.size() << " (from " << refLength << " non-empty of " << m_features[0].size() << " total)" << endl; Chris@246: #endif Chris@237: Chris@243: FullDTW dtw(m_fdParams, m_dParams); Chris@237: Chris@236: FeatureSet returnFeatures; Chris@237: int featureSize = m_featureExtractors[0].getFeatureSize(); Chris@236: Chris@237: int rate = int(m_inputSampleRate + 0.5); Chris@237: Chris@237: for (size_t c = 1; c < m_channelCount; ++c) { Chris@237: Chris@246: size_t otherLength = findNonEmptyLength(m_features[c]); Chris@246: featureseq_t downsampledOther = downsample(m_features[c], otherLength); Chris@237: Chris@246: #ifdef DEBUG_SUBSEQUENCE_MATCH Chris@246: cerr << "SubsequenceMatchVampPlugin: other downsampled sequence length = " << downsampledOther.size() << " (from " << otherLength << " non-empty of " << m_features[c].size() << " total)" << endl; Chris@246: #endif Chris@237: Chris@237: vector subsequenceAlignment = dtw.align(downsampledRef, Chris@237: downsampledOther); Chris@237: Chris@237: if (subsequenceAlignment.empty()) { Chris@237: cerr << "No subsequenceAlignment??" << endl; Chris@237: continue; Chris@237: } Chris@237: Chris@237: int64_t first = subsequenceAlignment[0]; Chris@237: int64_t last = subsequenceAlignment[subsequenceAlignment.size()-1]; Chris@237: Chris@246: #ifdef DEBUG_SUBSEQUENCE_MATCH Chris@246: cerr << "Subsequence alignment maps 0 -> " << subsequenceAlignment.size()-1 << " to " << first << " -> " << last << endl; Chris@246: #endif Chris@237: Chris@237: if (last <= first) { Chris@241: cerr << "NOTE: Invalid span (" << first << " to " << last Chris@241: << "), reverting to aligning against whole of reference" Chris@241: << endl; Chris@241: first = 0; Chris@241: last = downsampledRef.size() - 1; Chris@241: } else if (first < 0 || last >= long(downsampledRef.size())) { Chris@241: cerr << "NOTE: Span end points (" << first << " to " Chris@241: << last << ") out of range (0 to " << downsampledRef.size()-1 Chris@241: << "), reverting to aligning against whole of reference" Chris@241: << endl; Chris@241: first = 0; Chris@241: last = downsampledRef.size() - 1; Chris@237: } Chris@237: Chris@237: Feature span; Chris@237: span.hasTimestamp = true; Chris@237: span.timestamp = Vamp::RealTime::frame2RealTime Chris@237: (first * m_coarseDownsample * m_stepSize, rate); Chris@237: span.hasDuration = true; Chris@237: span.duration = Vamp::RealTime::frame2RealTime Chris@237: ((last - first) * m_coarseDownsample * m_stepSize, rate); Chris@237: returnFeatures[m_spanOutNo].push_back(span); Chris@237: Chris@241: size_t firstAtOriginalRate = first * m_coarseDownsample; Chris@241: size_t lastAtOriginalRate = (last + 1) * m_coarseDownsample; Chris@241: Chris@241: if (lastAtOriginalRate >= m_features[0].size()) { Chris@241: lastAtOriginalRate = m_features[0].size() - 1; Chris@241: } Chris@241: Chris@237: featureseq_t referenceSubsequence Chris@241: (m_features[0].begin() + firstAtOriginalRate, Chris@241: m_features[0].begin() + lastAtOriginalRate); Chris@237: Chris@246: #ifdef DEBUG_SUBSEQUENCE_MATCH Chris@246: cerr << "Reference subsequence length = " << referenceSubsequence.size() Chris@246: << endl; Chris@246: cerr << "Other sequence length = " << otherLength << endl; Chris@246: #endif Chris@246: Chris@237: MatchPipeline pipeline(m_feParams, Chris@237: m_fcParams, Chris@237: m_dParams, Chris@237: m_params, Chris@237: m_secondReferenceFrequency); Chris@237: Chris@246: size_t sequenceLength = std::max(referenceSubsequence.size(), Chris@246: otherLength); Chris@246: Chris@246: #ifdef DEBUG_SUBSEQUENCE_MATCH Chris@246: cerr << "MATCH input sequences have length " << sequenceLength << endl; Chris@246: #endif Chris@246: Chris@246: for (size_t i = 0; i < sequenceLength; ++i) { Chris@237: feature_t f1(featureSize, 0); Chris@237: feature_t f2(featureSize, 0); Chris@237: if (i < referenceSubsequence.size()) { Chris@237: f1 = referenceSubsequence[i]; Chris@237: } Chris@246: if (i < otherLength) { Chris@237: f2 = m_features[c][i]; Chris@237: } Chris@237: pipeline.feedFeatures(f1, f2); Chris@237: } Chris@237: Chris@237: pipeline.finish(); Chris@237: Chris@237: vector pathx; Chris@237: vector pathy; Chris@237: int len = pipeline.retrievePath(m_smooth, pathx, pathy); Chris@237: Chris@237: int prevy = 0; Chris@246: Chris@246: #ifdef DEBUG_SUBSEQUENCE_MATCH Chris@246: cerr << "MATCH path has length " << len; Chris@246: if (len > 0) { Chris@246: cerr << " and goes from (" Chris@246: << pathx[0] << ", " << pathy[0] << ") to (" Chris@246: << pathx[len-1] << ", " << pathy[len-1] << ")"; Chris@246: if (len > 2) { Chris@246: cerr << " with penultimate point at (" Chris@246: << pathx[len-2] << ", " << pathy[len-2] << ")"; Chris@246: } Chris@246: cerr << endl; Chris@246: } else { Chris@246: cerr << endl; Chris@246: } Chris@246: #endif Chris@237: Chris@237: for (int i = 0; i < len; ++i) { Chris@237: Chris@237: int x = pathx[i]; Chris@237: int y = pathy[i] + int(first * m_coarseDownsample); Chris@246: Chris@237: Vamp::RealTime xt = Vamp::RealTime::frame2RealTime Chris@237: (x * m_stepSize, rate) + m_startTime; Chris@237: Vamp::RealTime yt = Vamp::RealTime::frame2RealTime Chris@237: (y * m_stepSize, rate) + m_startTime; Chris@237: Chris@237: Feature feature; Chris@237: feature.hasTimestamp = true; Chris@237: feature.timestamp = xt; Chris@237: feature.values.clear(); Chris@237: feature.values.push_back(float(yt.sec + double(yt.nsec)/1.0e9)); Chris@237: returnFeatures[m_pathOutNo].push_back(feature); Chris@237: Chris@237: if (y != prevy) { Chris@237: feature.hasTimestamp = true; Chris@237: feature.timestamp = yt; Chris@237: feature.values.clear(); Chris@237: feature.values.push_back(float(xt.sec + xt.msec()/1000.0)); Chris@237: returnFeatures[m_baOutNo].push_back(feature); Chris@237: } Chris@237: Chris@237: prevy = y; Chris@237: } Chris@237: } Chris@236: Chris@246: #ifdef DEBUG_SUBSEQUENCE_MATCH Chris@246: cerr << endl; Chris@246: #endif Chris@246: Chris@236: return returnFeatures; Chris@236: }