Mercurial > hg > tuning-difference
view src/TuningDifference.cpp @ 29:409ff482cb30
Add max-duration parameter. The results are often going to be quite wrong if you use this, simply because the two performances may be at different speeds and so the same duration of each may represent a different (sub-)performance. But it's sometimes necessary just to avoid blowing the thing up.
author | Chris Cannam |
---|---|
date | Fri, 27 Mar 2015 17:20:40 +0000 |
parents | c21ce05afbe4 |
children | 86695c191896 |
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ /* Centre for Digital Music, Queen Mary University of London. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. See the file COPYING included with this distribution for more information. */ #include "TuningDifference.h" #include <iostream> #include <cmath> #include <cstdio> #include <algorithm> using namespace std; static double pitchToFrequency(int pitch, double centsOffset = 0., double concertA = 440.) { double p = double(pitch) + (centsOffset / 100.); return concertA * pow(2.0, (p - 69.0) / 12.0); } static double frequencyForCentsAbove440(double cents) { return pitchToFrequency(69, cents, 440.); } static float defaultMaxDuration = 0; TuningDifference::TuningDifference(float inputSampleRate) : Plugin(inputSampleRate), m_bpo(60), m_refChroma(new Chromagram(paramsForTuningFrequency(440.))), m_blockSize(0), m_frameCount(0), m_maxDuration(defaultMaxDuration) { } TuningDifference::~TuningDifference() { } string TuningDifference::getIdentifier() const { return "tuning-difference"; } string TuningDifference::getName() const { return "Tuning Difference"; } string TuningDifference::getDescription() const { // Return something helpful here! return ""; } string TuningDifference::getMaker() const { // Your name here return ""; } int TuningDifference::getPluginVersion() const { // Increment this each time you release a version that behaves // differently from the previous one return 1; } string TuningDifference::getCopyright() const { // This function is not ideally named. It does not necessarily // need to say who made the plugin -- getMaker does that -- but it // should indicate the terms under which it is distributed. For // example, "Copyright (year). All Rights Reserved", or "GPL" return ""; } TuningDifference::InputDomain TuningDifference::getInputDomain() const { return TimeDomain; } size_t TuningDifference::getPreferredBlockSize() const { return 0; } size_t TuningDifference::getPreferredStepSize() const { return 0; } size_t TuningDifference::getMinChannelCount() const { return 2; } size_t TuningDifference::getMaxChannelCount() const { return 2; } TuningDifference::ParameterList TuningDifference::getParameterDescriptors() const { ParameterList list; ParameterDescriptor desc; desc.identifier = "maxduration"; desc.name = "Maximum duration to analyse"; desc.description = "The maximum duration (in seconds) to consider from either input file. Zero means there is no limit."; desc.minValue = 0; desc.maxValue = 3600; desc.defaultValue = defaultMaxDuration; desc.isQuantized = false; desc.unit = "s"; list.push_back(desc); //!!! parameter: max search range //!!! parameter: fine search precision return list; } float TuningDifference::getParameter(string id) const { if (id == "maxduration") { return m_maxDuration; } return 0; } void TuningDifference::setParameter(string id, float value) { if (id == "maxduration") { m_maxDuration = value; } } TuningDifference::ProgramList TuningDifference::getPrograms() const { ProgramList list; return list; } string TuningDifference::getCurrentProgram() const { return ""; // no programs } void TuningDifference::selectProgram(string) { } TuningDifference::OutputList TuningDifference::getOutputDescriptors() const { OutputList list; OutputDescriptor d; d.identifier = "cents"; d.name = "Tuning Difference"; d.description = "Difference in averaged frequency profile between channels 1 and 2, in cents. A positive value means channel 2 is higher."; d.unit = "cents"; d.hasFixedBinCount = true; d.binCount = 1; d.hasKnownExtents = false; d.isQuantized = false; d.sampleType = OutputDescriptor::VariableSampleRate; d.hasDuration = false; m_outputs[d.identifier] = list.size(); list.push_back(d); d.identifier = "tuningfreq"; d.name = "Relative Tuning Frequency"; d.description = "Tuning frequency of channel 2, if channel 1 is assumed to contain the same music as it at a tuning frequency of A=440Hz."; d.unit = "hz"; d.hasFixedBinCount = true; d.binCount = 1; d.hasKnownExtents = false; d.isQuantized = false; d.sampleType = OutputDescriptor::VariableSampleRate; d.hasDuration = false; m_outputs[d.identifier] = list.size(); list.push_back(d); d.identifier = "reffeature"; d.name = "Reference Feature"; d.description = "Chroma feature from reference audio."; d.unit = ""; d.hasFixedBinCount = true; d.binCount = m_bpo; d.hasKnownExtents = false; d.isQuantized = false; d.sampleType = OutputDescriptor::FixedSampleRate; d.sampleRate = 1; d.hasDuration = false; m_outputs[d.identifier] = list.size(); list.push_back(d); d.identifier = "otherfeature"; d.name = "Other Feature"; d.description = "Chroma feature from other audio, before rotation."; d.unit = ""; d.hasFixedBinCount = true; d.binCount = m_bpo; d.hasKnownExtents = false; d.isQuantized = false; d.sampleType = OutputDescriptor::FixedSampleRate; d.sampleRate = 1; d.hasDuration = false; m_outputs[d.identifier] = list.size(); list.push_back(d); d.identifier = "rotfeature"; d.name = "Other Feature at Rotated Frequency"; d.description = "Chroma feature from reference audio calculated with the tuning frequency obtained from rotation matching."; d.unit = ""; d.hasFixedBinCount = true; d.binCount = m_bpo; d.hasKnownExtents = false; d.isQuantized = false; d.sampleType = OutputDescriptor::FixedSampleRate; d.sampleRate = 1; d.hasDuration = false; m_outputs[d.identifier] = list.size(); list.push_back(d); return list; } bool TuningDifference::initialise(size_t channels, size_t stepSize, size_t blockSize) { if (channels < getMinChannelCount() || channels > getMaxChannelCount()) return false; if (stepSize != blockSize) return false; m_blockSize = blockSize; reset(); return true; } void TuningDifference::reset() { if (m_frameCount > 0) { m_refChroma.reset(new Chromagram(paramsForTuningFrequency(440.))); m_frameCount = 0; } m_refTotals = TFeature(m_bpo, 0.0); m_other.clear(); } template<typename T> void addTo(vector<T> &a, const vector<T> &b) { transform(a.begin(), a.end(), b.begin(), a.begin(), plus<T>()); } template<typename T> T distance(const vector<T> &a, const vector<T> &b) { return inner_product(a.begin(), a.end(), b.begin(), T(), plus<T>(), [](T x, T y) { return fabs(x - y); }); } TuningDifference::TFeature TuningDifference::computeFeatureFromTotals(const TFeature &totals) const { if (m_frameCount == 0) return totals; TFeature feature(m_bpo); double sum = 0.0; for (int i = 0; i < m_bpo; ++i) { double value = totals[i] / m_frameCount; feature[i] += value; sum += value; } for (int i = 0; i < m_bpo; ++i) { feature[i] /= sum; } // cerr << "computeFeatureFromTotals: feature values:" << endl; // for (auto v: feature) cerr << v << " "; // cerr << endl; return feature; } Chromagram::Parameters TuningDifference::paramsForTuningFrequency(double hz) const { Chromagram::Parameters params(m_inputSampleRate); params.lowestOctave = 2; params.octaveCount = 4; params.binsPerOctave = m_bpo; params.tuningFrequency = hz; params.atomHopFactor = 0.5; params.window = CQParameters::Hann; return params; } TuningDifference::TFeature TuningDifference::computeFeatureFromSignal(const Signal &signal, double hz) const { Chromagram chromagram(paramsForTuningFrequency(hz)); TFeature totals(m_bpo, 0.0); cerr << "computeFeatureFromSignal: hz = " << hz << ", frame count = " << m_frameCount << endl; for (int i = 0; i < m_frameCount; ++i) { Signal::const_iterator first = signal.begin() + i * m_blockSize; Signal::const_iterator last = first + m_blockSize; if (last > signal.end()) last = signal.end(); CQBase::RealSequence input(first, last); input.resize(m_blockSize); CQBase::RealBlock block = chromagram.process(input); for (const auto &v: block) addTo(totals, v); } return computeFeatureFromTotals(totals); } TuningDifference::FeatureSet TuningDifference::process(const float *const *inputBuffers, Vamp::RealTime) { if (m_maxDuration > 0) { int maxFrames = (m_maxDuration * m_inputSampleRate) / m_blockSize; if (m_frameCount > maxFrames) return FeatureSet(); } CQBase::RealBlock block; CQBase::RealSequence input; input = CQBase::RealSequence (inputBuffers[0], inputBuffers[0] + m_blockSize); block = m_refChroma->process(input); for (const auto &v: block) addTo(m_refTotals, v); m_other.insert(m_other.end(), inputBuffers[1], inputBuffers[1] + m_blockSize); ++m_frameCount; return FeatureSet(); } double TuningDifference::featureDistance(const TFeature &other, int rotation) const { if (rotation == 0) { return distance(m_refFeature, other); } else { // A positive rotation pushes the tuning frequency up for this // chroma, negative one pulls it down. If a positive rotation // makes this chroma match an un-rotated reference, then this // chroma must have initially been lower than the reference. TFeature r(other); if (rotation < 0) { rotate(r.begin(), r.begin() - rotation, r.end()); } else { rotate(r.begin(), r.end() - rotation, r.end()); } return distance(m_refFeature, r); } } int TuningDifference::findBestRotation(const TFeature &other) const { map<double, int> dists; int maxSemis = 6; int maxRotation = (m_bpo * maxSemis) / 12; for (int r = -maxRotation; r <= maxRotation; ++r) { double dist = featureDistance(other, r); dists[dist] = r; // cerr << "rotation " << r << ": score " << dist << endl; } int best = dists.begin()->second; // cerr << "best is " << best << endl; return best; } pair<int, double> TuningDifference::findFineFrequency(int coarseCents, double coarseScore) { int coarseResolution = 1200 / m_bpo; int searchDistance = coarseResolution/2 - 1; double bestScore = coarseScore; int bestCents = coarseCents; double bestHz = frequencyForCentsAbove440(coarseCents); cerr << "corresponding coarse Hz " << bestHz << " scores " << coarseScore << endl; cerr << "searchDistance = " << searchDistance << endl; for (int sign = -1; sign <= 1; sign += 2) { for (int offset = 1; offset <= searchDistance; ++offset) { int fineCents = coarseCents + sign * offset; cerr << "trying with fineCents = " << fineCents << "..." << endl; double fineHz = frequencyForCentsAbove440(fineCents); TFeature fineFeature = computeFeatureFromSignal(m_other, fineHz); double fineScore = featureDistance(fineFeature); cerr << "fine offset = " << offset << ", cents = " << fineCents << ", Hz = " << fineHz << ", score " << fineScore << " (best score so far " << bestScore << ")" << endl; if (fineScore < bestScore) { cerr << "is good!" << endl; bestScore = fineScore; bestCents = fineCents; bestHz = fineHz; } else { break; } } } //!!! could keep a vector of scores & then interpolate... return pair<int, double>(bestCents, bestHz); } TuningDifference::FeatureSet TuningDifference::getRemainingFeatures() { FeatureSet fs; if (m_frameCount == 0) return fs; m_refFeature = computeFeatureFromTotals(m_refTotals); TFeature otherFeature = computeFeatureFromSignal(m_other, 440.); Feature f; f.values.clear(); for (auto v: m_refFeature) f.values.push_back(v); fs[m_outputs["reffeature"]].push_back(f); f.values.clear(); for (auto v: otherFeature) f.values.push_back(v); fs[m_outputs["otherfeature"]].push_back(f); int rotation = findBestRotation(otherFeature); int coarseCents = -(rotation * 1200) / m_bpo; cerr << "rotation " << rotation << " -> cents " << coarseCents << endl; double coarseHz = frequencyForCentsAbove440(coarseCents); TFeature coarseFeature; if (rotation == 0) { coarseFeature = otherFeature; } else { coarseFeature = computeFeatureFromSignal(m_other, coarseHz); } double coarseScore = featureDistance(coarseFeature); cerr << "corresponding Hz " << coarseHz << " scores " << coarseScore << endl; //!!! This should be returning the fine chroma, not the coarse f.values.clear(); for (auto v: coarseFeature) f.values.push_back(v); fs[m_outputs["rotfeature"]].push_back(f); pair<int, double> fine = findFineFrequency(coarseCents, coarseScore); int fineCents = fine.first; double fineHz = fine.second; f.values.clear(); f.values.push_back(fineHz); fs[m_outputs["tuningfreq"]].push_back(f); f.values.clear(); f.values.push_back(fineCents); fs[m_outputs["cents"]].push_back(f); cerr << "overall best Hz = " << fineHz << endl; return fs; }