Mercurial > hg > svapp
changeset 771:1d6cca5a5621 pitch-align
Allow use of proper sparse models (i.e. retaining event time info) in alignment; use this to switch to note alignment, which is what we have most recently been doing in the external program. Not currently producing correct results, though
author | Chris Cannam |
---|---|
date | Fri, 29 May 2020 17:39:02 +0100 |
parents | 486add472c3f |
children | 8280f7a363d1 |
files | align/Align.cpp align/Align.h align/DTW.h align/TransformDTWAligner.cpp align/TransformDTWAligner.h framework/Document.cpp |
diffstat | 6 files changed, 258 insertions(+), 113 deletions(-) [+] |
line wrap: on
line diff
--- a/align/Align.cpp Thu May 28 17:52:19 2020 +0100 +++ b/align/Align.cpp Fri May 29 17:39:02 2020 +0100 @@ -46,8 +46,8 @@ return "match-alignment"; case MATCHAlignmentWithPitchCompare: return "match-alignment-with-pitch"; - case SungPitchContourAlignment: - return "sung-pitch-alignment"; + case SungNoteContourAlignment: + return "sung-note-alignment"; case TransformDrivenDTWAlignment: return "transform-driven-alignment"; case ExternalProgramAlignment: @@ -137,29 +137,36 @@ break; } - case SungPitchContourAlignment: + case SungNoteContourAlignment: { auto refModel = ModelById::get(reference); if (!refModel) return false; - + Transform transform = TransformFactory::getInstance()-> - getDefaultTransformFor("vamp:pyin:pyin:smoothedpitchtrack", + getDefaultTransformFor("vamp:pyin:pyin:notes", refModel->getSampleRate()); - transform.setParameter("outputunvoiced", 2.f); - aligner = make_shared<TransformDTWAligner> (doc, reference, toAlign, transform, - TransformDTWAligner::RiseFall, - [](double freq) { - if (freq < 0.0) { - return 0.0; + [](double prev, double curr) { + RiseFallDTW::Value v; + if (curr <= 0.0) { + v = { RiseFallDTW::Direction::None, 0.0 }; + } else if (prev <= 0.0) { + v = { RiseFallDTW::Direction::Up, 0.0 }; } else { - return double(Pitch::getPitchForFrequency(freq)); + double prevP = Pitch::getPitchForFrequency(prev); + double currP = Pitch::getPitchForFrequency(curr); + if (currP >= prevP) { + v = { RiseFallDTW::Direction::Up, currP - prevP }; + } else { + v = { RiseFallDTW::Direction::Down, prevP - currP }; + } } + return v; }); break; }
--- a/align/Align.h Thu May 28 17:52:19 2020 +0100 +++ b/align/Align.h Fri May 29 17:39:02 2020 +0100 @@ -39,7 +39,7 @@ TrimmedLinearAlignment, MATCHAlignment, MATCHAlignmentWithPitchCompare, - SungPitchContourAlignment, + SungNoteContourAlignment, TransformDrivenDTWAlignment, ExternalProgramAlignment,
--- a/align/DTW.h Thu May 28 17:52:19 2020 +0100 +++ b/align/DTW.h Fri May 29 17:39:02 2020 +0100 @@ -189,4 +189,11 @@ } }; +inline std::ostream &operator<<(std::ostream &s, const RiseFallDTW::Value v) { + return (s << + (v.direction == RiseFallDTW::Direction::None ? "=" : + v.direction == RiseFallDTW::Direction::Up ? "+" : "-") + << v.distance); +} + #endif
--- a/align/TransformDTWAligner.cpp Thu May 28 17:52:19 2020 +0100 +++ b/align/TransformDTWAligner.cpp Fri May 29 17:39:02 2020 +0100 @@ -16,6 +16,7 @@ #include "DTW.h" #include "data/model/SparseTimeValueModel.h" +#include "data/model/NoteModel.h" #include "data/model/RangeSummarisableTimeValueModel.h" #include "data/model/AlignmentModel.h" #include "data/model/AggregateWaveModel.h" @@ -31,6 +32,27 @@ using std::vector; +static +TransformDTWAligner::MagnitudePreprocessor identityMagnitudePreprocessor = + [](double x) { + return x; + }; + +static +TransformDTWAligner::RiseFallPreprocessor identityRiseFallPreprocessor = + [](double prev, double curr) { + if (prev == curr) { + return RiseFallDTW::Value({ RiseFallDTW::Direction::None, 0.0 }); + } else if (curr > prev) { + return RiseFallDTW::Value({ RiseFallDTW::Direction::Up, curr - prev }); + } else { + return RiseFallDTW::Value({ RiseFallDTW::Direction::Down, prev - curr }); + } + }; + +QMutex +TransformDTWAligner::m_dtwMutex; + TransformDTWAligner::TransformDTWAligner(Document *doc, ModelId reference, ModelId toAlign, @@ -42,7 +64,8 @@ m_transform(transform), m_dtwType(dtwType), m_incomplete(true), - m_outputPreprocessor([](double x) { return x; }) + m_magnitudePreprocessor(identityMagnitudePreprocessor), + m_riseFallPreprocessor(identityRiseFallPreprocessor) { } @@ -50,16 +73,31 @@ ModelId reference, ModelId toAlign, Transform transform, - DTWType dtwType, - std::function<double(double)> - outputPreprocessor) : + MagnitudePreprocessor outputPreprocessor) : m_document(doc), m_reference(reference), m_toAlign(toAlign), m_transform(transform), - m_dtwType(dtwType), + m_dtwType(Magnitude), m_incomplete(true), - m_outputPreprocessor(outputPreprocessor) + m_magnitudePreprocessor(outputPreprocessor), + m_riseFallPreprocessor(identityRiseFallPreprocessor) +{ +} + +TransformDTWAligner::TransformDTWAligner(Document *doc, + ModelId reference, + ModelId toAlign, + Transform transform, + RiseFallPreprocessor outputPreprocessor) : + m_document(doc), + m_reference(reference), + m_toAlign(toAlign), + m_transform(transform), + m_dtwType(RiseFall), + m_incomplete(true), + m_magnitudePreprocessor(identityMagnitudePreprocessor), + m_riseFallPreprocessor(outputPreprocessor) { } @@ -157,10 +195,10 @@ if (!m_incomplete) { return; } - +/* SVCERR << "TransformDTWAligner[" << this << "]: completionChanged: " << "model " << id << endl; - +*/ auto referenceOutputModel = ModelById::get(m_referenceOutputModel); auto toAlignOutputModel = ModelById::get(m_toAlignOutputModel); auto alignmentModel = ModelById::getAs<AlignmentModel>(m_alignmentModel); @@ -176,7 +214,7 @@ if (referenceReady && toAlignReady) { SVCERR << "TransformDTWAligner[" << this << "]: completionChanged: " - << "ready, calling performAlignment" << endl; + << "both models ready, calling performAlignment" << endl; alignmentModel->setCompletion(95); @@ -187,11 +225,11 @@ } } else { - +/* SVCERR << "TransformDTWAligner[" << this << "]: completionChanged: " << "not ready yet: reference completion " << referenceCompletion << ", toAlign completion " << toAlignCompletion << endl; - +*/ int completion = std::min(referenceCompletion, toAlignCompletion); completion = (completion * 94) / 100; @@ -210,76 +248,126 @@ } bool -TransformDTWAligner::performAlignmentMagnitude() +TransformDTWAligner::getValuesFrom(ModelId modelId, + vector<sv_frame_t> &frames, + vector<double> &values, + sv_frame_t &resolution) { - auto referenceOutputSTVM = ModelById::getAs<SparseTimeValueModel> - (m_referenceOutputModel); - auto toAlignOutputSTVM = ModelById::getAs<SparseTimeValueModel> - (m_toAlignOutputModel); - auto alignmentModel = ModelById::getAs<AlignmentModel> - (m_alignmentModel); + EventVector events; - if (!referenceOutputSTVM || !toAlignOutputSTVM) { - //!!! what? + if (auto model = ModelById::getAs<SparseTimeValueModel>(modelId)) { + resolution = model->getResolution(); + events = model->getAllEvents(); + } else if (auto model = ModelById::getAs<NoteModel>(modelId)) { + resolution = model->getResolution(); + events = model->getAllEvents(); + } else { + SVCERR << "TransformDTWAligner::getValuesFrom: Type of model " + << modelId << " is not supported" << endl; return false; } + frames.clear(); + values.clear(); + + for (auto e: events) { + frames.push_back(e.getFrame()); + values.push_back(e.getValue()); + } + + return true; +} + +Path +TransformDTWAligner::makePath(const vector<size_t> &alignment, + const vector<sv_frame_t> &refFrames, + const vector<sv_frame_t> &otherFrames, + sv_samplerate_t sampleRate, + sv_frame_t resolution) +{ + Path path(sampleRate, resolution); + + for (int i = 0; in_range_for(alignment, i); ++i) { + + // DTW returns "the index into s2 for each element in s1" + sv_frame_t refFrame = refFrames[i]; + + if (!in_range_for(otherFrames, alignment[i])) { + SVCERR << "TransformDTWAligner::makePath: Internal error: " + << "DTW maps index " << i << " in reference frame vector " + << "(size " << refFrames.size() << ") onto index " + << alignment[i] << " in other frame vector " + << "(only size " << otherFrames.size() << ")" << endl; + continue; + } + + sv_frame_t alignedFrame = otherFrames[alignment[i]]; + path.add(PathPoint(alignedFrame, refFrame)); + } + + return path; +} + +bool +TransformDTWAligner::performAlignmentMagnitude() +{ + auto alignmentModel = ModelById::getAs<AlignmentModel>(m_alignmentModel); if (!alignmentModel) { return false; } + + vector<sv_frame_t> refFrames, otherFrames; + vector<double> refValues, otherValues; + sv_frame_t resolution = 0; + + if (!getValuesFrom(m_referenceOutputModel, + refFrames, refValues, resolution)) { + return false; + } + + if (!getValuesFrom(m_toAlignOutputModel, + otherFrames, otherValues, resolution)) { + return false; + } vector<double> s1, s2; - - { - auto events = referenceOutputSTVM->getAllEvents(); - for (auto e: events) { - s1.push_back(m_outputPreprocessor(e.getValue())); - } - events = toAlignOutputSTVM->getAllEvents(); - for (auto e: events) { - s2.push_back(m_outputPreprocessor(e.getValue())); - } + for (double v: refValues) { + s1.push_back(m_magnitudePreprocessor(v)); + } + for (double v: otherValues) { + s2.push_back(m_magnitudePreprocessor(v)); } SVCERR << "TransformDTWAligner[" << this << "]: performAlignmentMagnitude: " << "Have " << s1.size() << " events from reference, " << s2.size() << " from toAlign" << endl; - + MagnitudeDTW dtw; vector<size_t> alignment; { SVCERR << "TransformDTWAligner[" << this << "]: serialising DTW to avoid over-allocation" << endl; - static QMutex mutex; - QMutexLocker locker(&mutex); - + QMutexLocker locker(&m_dtwMutex); alignment = dtw.alignSeries(s1, s2); } SVCERR << "TransformDTWAligner[" << this << "]: performAlignmentMagnitude: " << "DTW produced " << alignment.size() << " points:" << endl; - for (int i = 0; i < alignment.size() && i < 100; ++i) { + for (int i = 0; in_range_for(alignment, i) && i < 100; ++i) { SVCERR << alignment[i] << " "; } SVCERR << endl; + alignmentModel->setPath(makePath(alignment, + refFrames, + otherFrames, + alignmentModel->getSampleRate(), + resolution)); alignmentModel->setCompletion(100); - sv_frame_t resolution = referenceOutputSTVM->getResolution(); - sv_frame_t sourceFrame = 0; - - Path path(referenceOutputSTVM->getSampleRate(), resolution); - - for (size_t m: alignment) { - path.add(PathPoint(sourceFrame, sv_frame_t(m) * resolution)); - sourceFrame += resolution; - } - - alignmentModel->setPath(path); - - SVCERR << "TransformDTWAligner[" << this << "]: performAlignmentMagnitude: Done" - << endl; + SVCERR << "TransformDTWAligner[" << this + << "]: performAlignmentMagnitude: Done" << endl; m_incomplete = false; return true; @@ -288,59 +376,62 @@ bool TransformDTWAligner::performAlignmentRiseFall() { - auto referenceOutputSTVM = ModelById::getAs<SparseTimeValueModel> - (m_referenceOutputModel); - auto toAlignOutputSTVM = ModelById::getAs<SparseTimeValueModel> - (m_toAlignOutputModel); - auto alignmentModel = ModelById::getAs<AlignmentModel> - (m_alignmentModel); - - if (!referenceOutputSTVM || !toAlignOutputSTVM) { - //!!! what? - return false; - } - + auto alignmentModel = ModelById::getAs<AlignmentModel>(m_alignmentModel); if (!alignmentModel) { return false; } - auto convertEvents = - [this](const EventVector &ee) { + vector<sv_frame_t> refFrames, otherFrames; + vector<double> refValues, otherValues; + sv_frame_t resolution = 0; + + if (!getValuesFrom(m_referenceOutputModel, + refFrames, refValues, resolution)) { + return false; + } + + if (!getValuesFrom(m_toAlignOutputModel, + otherFrames, otherValues, resolution)) { + return false; + } + + auto preprocess = + [this](const std::vector<double> &vv) { vector<RiseFallDTW::Value> s; double prev = 0.0; - for (auto e: ee) { - double v = m_outputPreprocessor(e.getValue()); - if (v == prev || s.empty()) { - s.push_back({ RiseFallDTW::Direction::None, 0.0 }); - } else if (v > prev) { - s.push_back({ RiseFallDTW::Direction::Up, v - prev }); - } else { - s.push_back({ RiseFallDTW::Direction::Down, prev - v }); - } + for (auto curr: vv) { + s.push_back(m_riseFallPreprocessor(prev, curr)); + prev = curr; } return s; - }; + }; - vector<RiseFallDTW::Value> s1 = - convertEvents(referenceOutputSTVM->getAllEvents()); - - vector<RiseFallDTW::Value> s2 = - convertEvents(toAlignOutputSTVM->getAllEvents()); + vector<RiseFallDTW::Value> s1 = preprocess(refValues); + vector<RiseFallDTW::Value> s2 = preprocess(otherValues); SVCERR << "TransformDTWAligner[" << this << "]: performAlignmentRiseFall: " << "Have " << s1.size() << " events from reference, " << s2.size() << " from toAlign" << endl; + SVCERR << "Reference:" << endl; + for (int i = 0; in_range_for(s1, i) && i < 100; ++i) { + SVCERR << s1[i] << " "; + } + SVCERR << endl; + + SVCERR << "toAlign:" << endl; + for (int i = 0; in_range_for(s2, i) && i < 100; ++i) { + SVCERR << s2[i] << " "; + } + SVCERR << endl; + RiseFallDTW dtw; - vector<size_t> alignment; { SVCERR << "TransformDTWAligner[" << this << "]: serialising DTW to avoid over-allocation" << endl; - static QMutex mutex; - QMutexLocker locker(&mutex); - + QMutexLocker locker(&m_dtwMutex); alignment = dtw.alignSeries(s1, s2); } @@ -351,20 +442,14 @@ } SVCERR << endl; + alignmentModel->setPath(makePath(alignment, + refFrames, + otherFrames, + alignmentModel->getSampleRate(), + resolution)); + alignmentModel->setCompletion(100); - sv_frame_t resolution = referenceOutputSTVM->getResolution(); - sv_frame_t sourceFrame = 0; - - Path path(referenceOutputSTVM->getSampleRate(), resolution); - - for (size_t m: alignment) { - path.add(PathPoint(sourceFrame, sv_frame_t(m) * resolution)); - sourceFrame += resolution; - } - - alignmentModel->setPath(path); - SVCERR << "TransformDTWAligner[" << this << "]: performAlignmentRiseFall: Done" << endl;
--- a/align/TransformDTWAligner.h Thu May 28 17:52:19 2020 +0100 +++ b/align/TransformDTWAligner.h Fri May 29 17:39:02 2020 +0100 @@ -16,11 +16,15 @@ #define SV_TRANSFORM_DTW_ALIGNER_H #include "Aligner.h" +#include "DTW.h" #include "transform/Transform.h" +#include "svcore/data/model/Path.h" #include <functional> +#include <QMutex> + class AlignmentModel; class Document; @@ -33,19 +37,46 @@ Magnitude, RiseFall }; - + + /** + * Create a TransformDTWAligner that runs the given transform on + * both models and feeds the resulting values into the given DTW + * type. If DTWType is Magnitude, the transform output values are + * used unmodified; if RiseFall, the deltas between consecutive + * values are used. + */ TransformDTWAligner(Document *doc, ModelId reference, ModelId toAlign, Transform transform, DTWType dtwType); - + + typedef std::function<double(double)> MagnitudePreprocessor; + + /** + * Create a TransformDTWAligner that runs the given transform on + * both models, applies the supplied output preprocessor, and + * feeds the resulting values into a Magnitude DTW type. + */ TransformDTWAligner(Document *doc, ModelId reference, ModelId toAlign, Transform transform, - DTWType dtwType, - std::function<double(double)> outputPreprocessor); + MagnitudePreprocessor outputPreprocessor); + + typedef std::function<RiseFallDTW::Value(double prev, double curr)> + RiseFallPreprocessor; + + /** + * Create a TransformDTWAligner that runs the given transform on + * both models, applies the supplied output preprocessor, and + * feeds the resulting values into a RiseFall DTW type. + */ + TransformDTWAligner(Document *doc, + ModelId reference, + ModelId toAlign, + Transform transform, + RiseFallPreprocessor outputPreprocessor); // Destroy the aligner, cleanly cancelling any ongoing alignment ~TransformDTWAligner(); @@ -61,6 +92,17 @@ bool performAlignment(); bool performAlignmentMagnitude(); bool performAlignmentRiseFall(); + + bool getValuesFrom(ModelId modelId, + std::vector<sv_frame_t> &frames, + std::vector<double> &values, + sv_frame_t &resolution); + + Path makePath(const std::vector<size_t> &alignment, + const std::vector<sv_frame_t> &refFrames, + const std::vector<sv_frame_t> &otherFrames, + sv_samplerate_t sampleRate, + sv_frame_t resolution); Document *m_document; ModelId m_reference; @@ -71,7 +113,10 @@ Transform m_transform; DTWType m_dtwType; bool m_incomplete; - std::function<double(double)> m_outputPreprocessor; + MagnitudePreprocessor m_magnitudePreprocessor; + RiseFallPreprocessor m_riseFallPreprocessor; + + static QMutex m_dtwMutex; }; #endif
--- a/framework/Document.cpp Thu May 28 17:52:19 2020 +0100 +++ b/framework/Document.cpp Fri May 29 17:39:02 2020 +0100 @@ -1143,7 +1143,8 @@ SVDEBUG << "Document::alignModel: aligning..." << endl; if (!rm->getAlignmentReference().isNone()) { - SVDEBUG << "(Note: model " << rm << " is currently aligned to model " + SVDEBUG << "(Note: model " << modelId + << " is currently aligned to model " << rm->getAlignmentReference() << "; this will replace that)" << endl; }