Mercurial > hg > match-vamp
changeset 15:a82276091bbd
Pull out Matcher parameters into an object
author | Chris Cannam |
---|---|
date | Fri, 10 Oct 2014 12:55:05 +0100 |
parents | cdead4a52755 |
children | 4c8526c5bf58 |
files | Makefile.inc Makefile.linux MatchFeeder.cpp MatchVampPlugin.cpp MatchVampPlugin.h Matcher.cpp Matcher.h test/regressiontest.sh |
diffstat | 8 files changed, 190 insertions(+), 208 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile.inc Fri Oct 10 12:04:54 2014 +0100 +++ b/Makefile.inc Fri Oct 10 12:55:05 2014 +0100 @@ -18,10 +18,15 @@ distclean: clean rm -f $(PLUGIN) +depend: + makedepend -Y -fMakefile.inc $(SOURCES) $(HEADERS) + # DO NOT DELETE Finder.o: Finder.h Matcher.h -Matcher.o: Matcher.h Finder.h +Matcher.o: Matcher.h MatchFeeder.o: MatchFeeder.h Matcher.h Finder.h MatchVampPlugin.o: MatchVampPlugin.h Matcher.h MatchFeeder.h Finder.h Path.h Path.o: Path.h +Finder.o: Matcher.h +MatchFeeder.o: Matcher.h Finder.h
--- a/Makefile.linux Fri Oct 10 12:04:54 2014 +0100 +++ b/Makefile.linux Fri Oct 10 12:55:05 2014 +0100 @@ -1,6 +1,6 @@ CXXFLAGS += -fPIC -ffast-math -O3 -Wall -Werror -LDFLAGS += -shared -Wl,-Bstatic -lvamp-sdk -Wl,-Bdynamic -lpthread -Wl,--version-script=vamp-plugin.map +LDFLAGS += -shared -Wl,-Bstatic -lvamp-sdk -Wl,-Bdynamic -Wl,-Bsymbolic -Wl,-z,defs -lpthread -Wl,--version-script=vamp-plugin.map include Makefile.inc
--- a/MatchFeeder.cpp Fri Oct 10 12:04:54 2014 +0100 +++ b/MatchFeeder.cpp Fri Oct 10 12:55:05 2014 +0100 @@ -21,7 +21,7 @@ MatchFeeder::MatchFeeder(Matcher *m1, Matcher *m2) : pm1(m1), pm2(m2) { - fftSize = m1->fftSize; + fftSize = m1->params.fftSize; finder = new Finder(m1, m2); reBuffer = new double[fftSize/2+1]; imBuffer = new double[fftSize/2+1]; @@ -106,10 +106,10 @@ // feed2(); //!!! } else if (pm2->atEnd) // feed1(); - else if (pm1->runCount >= Matcher::MAX_RUN_COUNT) { // slope constraints + else if (pm1->runCount >= pm1->params.maxRunCount) { // slope constraints // std::cerr << "pm1 too slopey" << std::endl; f2 = feed2(); - } else if (pm2->runCount >= Matcher::MAX_RUN_COUNT) { + } else if (pm2->runCount >= pm2->params.maxRunCount) { // std::cerr << "pm2 too slopey" << std::endl; f1 = feed1(); } else {
--- a/MatchVampPlugin.cpp Fri Oct 10 12:04:54 2014 +0100 +++ b/MatchVampPlugin.cpp Fri Oct 10 12:55:05 2014 +0100 @@ -45,9 +45,13 @@ // sample rates static float sampleRateMin = 5000.f; +static float defaultStepTime = 0.020; + MatchVampPlugin::MatchVampPlugin(float inputSampleRate) : Plugin(inputSampleRate), m_stepSize(0), + m_stepTime(defaultStepTime), + m_blockSize(0), m_serialise(false), m_begin(true), m_locked(false) @@ -168,22 +172,21 @@ size_t MatchVampPlugin::getPreferredStepSize() const { - if (!pm1) createMatchers(); - return pm1->getHopSize(); + return m_inputSampleRate * defaultStepTime; } size_t MatchVampPlugin::getPreferredBlockSize() const { - if (!pm1) createMatchers(); - return pm1->getFFTSize(); + return 2048; } void MatchVampPlugin::createMatchers() const { - pm1 = new Matcher(m_inputSampleRate, 0); - pm2 = new Matcher(m_inputSampleRate, pm1); + Matcher::Parameters params(m_inputSampleRate, m_stepTime, m_blockSize); + pm1 = new Matcher(params, 0); + pm2 = new Matcher(params, pm1); pm1->setOtherMatcher(pm2); feeder = new MatchFeeder(pm1, pm2); } @@ -197,16 +200,21 @@ << sampleRateMin << std::endl; return false; } - if (!pm1) createMatchers(); if (channels < getMinChannelCount() || channels > getMaxChannelCount()) return false; if (stepSize > blockSize/2 || blockSize != getPreferredBlockSize()) return false; + m_stepSize = stepSize; - pm1->setHopSize(stepSize); - pm2->setHopSize(stepSize); + m_stepTime = float(stepSize) / m_inputSampleRate; + m_blockSize = blockSize; + + cerr << "step size = " << m_stepSize << ", time = " << m_stepTime << endl; + + createMatchers(); m_begin = true; m_locked = false; + return true; } @@ -221,8 +229,6 @@ pm2 = 0; createMatchers(); - pm1->setHopSize(m_stepSize); - pm2->setHopSize(m_stepSize); m_begin = true; m_locked = false; } @@ -232,7 +238,7 @@ { OutputList list; - float outRate = 1.0 / 0.020; //!!! this is the default value of hopTime in Matcher + float outRate = 1.0 / m_stepTime; OutputDescriptor desc; desc.identifier = "path"; @@ -296,6 +302,18 @@ desc.sampleRate = outRate; list.push_back(desc); + desc.identifier = "a_features"; + desc.name = "A Features"; + desc.description = "Spectral features extracted from performance A"; + desc.unit = ""; + desc.hasFixedBinCount = true; + desc.binCount = 1; + desc.hasKnownExtents = false; + desc.isQuantized = false; + desc.sampleType = OutputDescriptor::VariableSampleRate; + desc.sampleRate = outRate; + list.push_back(desc); + return list; } @@ -372,9 +390,9 @@ int y = pathy[i]; Vamp::RealTime xt = Vamp::RealTime::frame2RealTime - (x * pm1->getHopSize(), lrintf(m_inputSampleRate)); + (x * m_stepSize, lrintf(m_inputSampleRate)); Vamp::RealTime yt = Vamp::RealTime::frame2RealTime - (y * pm2->getHopSize(), lrintf(m_inputSampleRate)); + (y * m_stepSize, lrintf(m_inputSampleRate)); Feature feature; feature.hasTimestamp = true;
--- a/MatchVampPlugin.h Fri Oct 10 12:04:54 2014 +0100 +++ b/MatchVampPlugin.h Fri Oct 10 12:55:05 2014 +0100 @@ -69,7 +69,9 @@ mutable Matcher *pm2; mutable MatchFeeder *feeder; Vamp::RealTime m_startTime; - size_t m_stepSize; + int m_stepSize; + float m_stepTime; + int m_blockSize; bool m_serialise; bool m_begin; bool m_locked;
--- a/Matcher.cpp Fri Oct 10 12:04:54 2014 +0100 +++ b/Matcher.cpp Fri Oct 10 12:55:05 2014 +0100 @@ -15,7 +15,6 @@ */ #include "Matcher.h" -#include "Finder.h" #include <iostream> @@ -23,44 +22,27 @@ bool Matcher::silent = true; -const double Matcher::decay = 0.99; -const double Matcher::silenceThreshold = 0.0004; -const int Matcher::MAX_RUN_COUNT = 3; - //#define DEBUG_MATCHER 1 -Matcher::Matcher(float rate, Matcher *p) +Matcher::Matcher(Parameters parameters, Matcher *p) : + params(parameters) { #ifdef DEBUG_MATCHER - std::cerr << "Matcher::Matcher(" << rate << ", " << p << ")" << std::endl; + cerr << "Matcher::Matcher(" << params.sampleRate << ", " << p << ")" << endl; #endif - sampleRate = rate; otherMatcher = p; // the first matcher will need this to be set later firstPM = (!p); - matchFileOffset = 0; ltAverage = 0; frameCount = 0; runCount = 0; - paused = false; - hopSize = 0; - fftSize = 0; blockSize = 0; - hopTime = 0.020; // DEFAULT, overridden with -h //!!! - fftTime = 0.04644; // DEFAULT, overridden with -f - blockTime = 10.0; // DEFAULT, overridden with -c - normalise1 = true; - normalise2 = false; - normalise3 = false; - normalise4 = true; - useSpectralDifference = true; - useChromaFrequencyMap = false; scale = 90; - maxFrames = 0; // stop at EOF - hopSize = lrint(sampleRate * hopTime); - fftSize = lrint(pow(2.0, (int)lrint(log(fftTime * sampleRate) / log(2.0)))); - blockSize = lrint(blockTime / hopTime); + blockSize = lrint(params.blockTime / params.hopTime); +#ifdef DEBUG_MATCHER + cerr << "Matcher: blockSize = " << blockSize << endl; +#endif distance = 0; bestPathCost = 0; @@ -71,23 +53,10 @@ } // default constructor -void -Matcher::setHopSize(int sz) -{ - if (initialised) { - std::cerr << "Matcher::setHopSize: Can't set after use" << std::endl; - return; - } - - hopSize = sz; - hopTime = float(hopSize) / sampleRate; - blockTime = blockSize * hopTime; -} - Matcher::~Matcher() { #ifdef DEBUG_MATCHER - std::cerr << "Matcher(" << this << ")::~Matcher()" << std::endl; + cerr << "Matcher(" << this << ")::~Matcher()" << endl; #endif if (initialised) { @@ -109,42 +78,24 @@ } void -Matcher::print() -{ - cerr << toString() << endl; -} // print() - -string -Matcher::toString() -{ - std::stringstream os; - os << "Matcher " << this << ": (" << sampleRate - << "kHz)" - << "\n\tHop size: " << hopSize - << "\n\tFFT size: " << fftSize - << "\n\tBlock size: " << blockSize; - return os.str(); -} // toString() - -void Matcher::init() { if (initialised) return; initialised = true; - makeFreqMap(fftSize, sampleRate); + makeFreqMap(); initVector<double>(prevFrame, freqMapSize); initVector<double>(newFrame, freqMapSize); initMatrix<double>(frames, blockSize, freqMapSize); initVector<double>(totalEnergies, blockSize); - int distSize = (MAX_RUN_COUNT + 1) * blockSize; + int distSize = (params.maxRunCount + 1) * blockSize; distXSize = blockSize * 2; -// std::cerr << "Matcher::init: distXSize = " << distXSize << std::endl; + std::cerr << "Matcher::init: distXSize = " << distXSize << std::endl; distance = (unsigned char **)malloc(distXSize * sizeof(unsigned char *)); bestPathCost = (int **)malloc(distXSize * sizeof(int *)); @@ -164,26 +115,31 @@ frameCount = 0; runCount = 0; -// frameRMS = 0; ltAverage = 0; - if (!silent) print(); } // init void -Matcher::makeFreqMap(int fftSize, float sampleRate) +Matcher::makeFreqMap() { - initVector<int>(freqMap, fftSize/2 + 1); - if (useChromaFrequencyMap) - makeChromaFrequencyMap(fftSize, sampleRate); - else - makeStandardFrequencyMap(fftSize, sampleRate); + initVector<int>(freqMap, params.fftSize/2 + 1); + if (params.useChromaFrequencyMap) { +#ifdef DEBUG_MATCHER + cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl; +#endif + makeChromaFrequencyMap(); + } else { +#ifdef DEBUG_MATCHER + cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl; +#endif + makeStandardFrequencyMap(); + } } // makeFreqMap() void -Matcher::makeStandardFrequencyMap(int fftSize, float sampleRate) +Matcher::makeStandardFrequencyMap() { - double binWidth = sampleRate / fftSize; + double binWidth = params.sampleRate / params.fftSize; int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); int crossoverMidi = lrint(log(crossoverBin*binWidth/440.0)/ log(2.0) * 12 + 69); @@ -193,7 +149,7 @@ freqMap[i] = i; ++i; } - while (i <= fftSize/2) { + while (i <= params.fftSize/2) { double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; if (midi > 127) midi = 127; @@ -203,21 +159,21 @@ if (!silent) { cerr << "Standard map size: " << freqMapSize << "; Crossover at: " << crossoverBin << endl; -//!!! for (i = 0; i < fftSize / 2; i++) -// cerr << "freqMap[" << i << "] = " << freqMap[i] << endl; + for (i = 0; i < params.fftSize / 2; i++) + cerr << "freqMap[" << i << "] = " << freqMap[i] << endl; } } // makeStandardFrequencyMap() void -Matcher::makeChromaFrequencyMap(int fftSize, float sampleRate) +Matcher::makeChromaFrequencyMap() { - double binWidth = sampleRate / fftSize; + double binWidth = params.sampleRate / params.fftSize; int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1)); // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; int i = 0; while (i <= crossoverBin) freqMap[i++] = 0; - while (i <= fftSize/2) { + while (i <= params.fftSize/2) { double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; freqMap[i++] = (lrint(midi)) % 12 + 1; } @@ -225,7 +181,7 @@ if (!silent) { cerr << "Chroma map size: " << freqMapSize << "; Crossover at: " << crossoverBin << endl; - for (i = 0; i < fftSize / 2; i++) + for (i = 0; i < params.fftSize / 2; i++) cerr << "freqMap[" << i << "] = " << freqMap[i] << endl; } } // makeChromaFrequencyMap() @@ -239,13 +195,13 @@ newFrame[i] = 0; } double rms = 0; - for (int i = 0; i <= fftSize/2; i++) { + for (int i = 0; i <= params.fftSize/2; i++) { double mag = reBuffer[i] * reBuffer[i] + imBuffer[i] * imBuffer[i]; rms += mag; newFrame[freqMap[i]] += mag; } - rms = sqrt(rms / (fftSize/2)); + rms = sqrt(rms / (params.fftSize/2)); int frameIndex = frameCount % blockSize; @@ -300,7 +256,7 @@ } double totalEnergy = 0; - if (useSpectralDifference) { + if (params.useSpectralDifference) { for (int i = 0; i < freqMapSize; i++) { totalEnergy += newFrame[i]; if (newFrame[i] > prevFrame[i]) { @@ -325,19 +281,13 @@ else ltAverage = ltAverage * decay + totalEnergy * (1.0 - decay); -// System.err.println(Format.d(ltAverage,4) + " " + -// Format.d(totalEnergy) + " " + -// Format.d(frameRMS)); - -// std::cerr << "ltAverage: " << ltAverage << ", totalEnergy: " << totalEnergy << ", frameRMS: " << rms << std::endl; - - if (rms <= 0.01) //!!! silenceThreshold) + if (rms <= params.silenceThreshold) for (int i = 0; i < freqMapSize; i++) frames[frameIndex][i] = 0; - else if (normalise1) + else if (params.frameNorm == NormaliseFrameToSum1) for (int i = 0; i < freqMapSize; i++) frames[frameIndex][i] /= totalEnergy; - else if (normalise3) + else if (params.frameNorm == NormaliseFrameToLTAverage) for (int i = 0; i < freqMapSize; i++) frames[frameIndex][i] /= ltAverage; @@ -446,9 +396,9 @@ // System.err.print(" " + Format.d(d,3)); if (sum == 0) return 0; - if (normalise2) + if (params.distanceNorm == NormaliseDistanceToSum) return (int)(scale * d / sum); // 0 <= d/sum <= 2 - if (!normalise4) + if (params.distanceNorm != NormaliseDistanceToLogSum) return (int)(scale * d); // note if this were to be restored, it would have to use
--- a/Matcher.h Fri Oct 10 12:04:54 2014 +0100 +++ b/Matcher.h Fri Oct 10 12:55:05 2014 +0100 @@ -39,9 +39,103 @@ * on the FFT data with the higher frequencies mapped onto a linear * scale. */ - class Matcher { +public: + enum FrameNormalisation { + + /** Do not normalise audio frames */ + NoFrameNormalisation, + + /** Normalise each frame of audio to have a sum of 1 */ + NormaliseFrameToSum1, + + /** Normalise each frame of audio by the long-term average + * of the summed energy */ + NormaliseFrameToLTAverage, + }; + + enum DistanceNormalisation { + + /** Do not normalise distance metrics */ + NoDistanceNormalisation, + + /** Normalise distance metric for pairs of audio frames by + * the sum of the two frames. */ + NormaliseDistanceToSum, + + /** Normalise distance metric for pairs of audio frames by + * the log of the sum of the frames. */ + NormaliseDistanceToLogSum, + }; + + struct Parameters { + + Parameters(float rate_, double hopTime_, int fftSize_) : + sampleRate(rate_), + frameNorm(NormaliseFrameToSum1), + distanceNorm(NormaliseDistanceToLogSum), + useSpectralDifference(true), + useChromaFrequencyMap(false), + hopTime(hopTime_), + fftSize(fftSize_), + blockTime(10.0), + silenceThreshold(0.01), + decay(0.99), + maxRunCount(3) + {} + + /** Sample rate of audio */ + float sampleRate; + + /** Type of audio frame normalisation */ + FrameNormalisation frameNorm; + + /** Type of distance metric normalisation */ + DistanceNormalisation distanceNorm; + + /** Flag indicating whether or not the half-wave rectified + * spectral difference should be used in calculating the + * distance metric for pairs of audio frames, instead of the + * straight spectrum values. */ + bool useSpectralDifference; + + /** Flag indicating whether to use a chroma frequency map (12 + * bins) instead of the default warped spectrogram */ + bool useChromaFrequencyMap; + + /** Spacing of audio frames (determines the amount of overlap or + * skip between frames). This value is expressed in + * seconds. */ + double hopTime; + + /** Size of an FFT frame in samples. Note that the data passed + * in to Matcher is already in the frequency domain, so this + * expresses the size of the frame that the caller will be + * providing. + */ + int fftSize; + + /** The width of the search band (error margin) around the current + * match position, measured in seconds. Strictly speaking the + * width is measured backwards from the current point, since the + * algorithm has to work causally. + */ + double blockTime; + + /** RMS level below which frame is considered silent */ + double silenceThreshold; + + /** Frame-to-frame decay factor in calculating long-term average */ + double decay; + + /** Maximum number of frames sequentially processed by this + * matcher, without a frame of the other matcher being + * processed. + */ + int maxRunCount; + }; + protected: /** Points to the other performance with which this one is being * compared. The data for the distance metric and the dynamic @@ -57,80 +151,21 @@ * DTW steps. */ bool firstPM; - /** Sample rate of audio */ - float sampleRate; - - /** Onset time of the first note in the audio file, in order to - * establish synchronisation between the match file and the audio - * data. */ - double matchFileOffset; - - /** Flag indicating whether or not each frame of audio should be - * normalised to have a sum of 1. (Default = false). */ - bool normalise1; - - /** Flag indicating whether or not the distance metric for pairs - * of audio frames should be normalised by the sum of the two - * frames. (Default = false). */ - bool normalise2; - - /** Flag indicating whether or not each frame of audio should be - * normalised by the long term average of the summed energy. - * (Default = false; assumes normalise1 == false). */ - bool normalise3; - - /** Flag indicating whether or not the distance metric for pairs - * of audio frames should be normalised by the log of the sum of - * the frames. (Default = false; assumes normalise2 == - * false). */ - bool normalise4; - - /** Flag indicating whether or not the half-wave rectified - * spectral difference should be used in calculating the distance - * metric for pairs of audio frames, instead of the straight - * spectrum values. (Default = true). */ - bool useSpectralDifference; - - bool useChromaFrequencyMap; + /** Configuration parameters */ + Parameters params; /** Scaling factor for distance metric; must guarantee that the * final value fits in the data type used, that is, unsigned - * char. (Default = 16). + * char. */ double scale; - /** Spacing of audio frames (determines the amount of overlap or - * skip between frames). This value is expressed in - * seconds. (Default = 0.020s) */ - double hopTime; - - /** The size of an FFT frame in seconds. (Default = 0.04644s). - * Note that the value is not taken to be precise; it is adjusted - * so that <code>fftSize</code> is always power of 2. */ - double fftTime; - - /** The width of the search band (error margin) around the current - * match position, measured in seconds. Strictly speaking the - * width is measured backwards from the current point, since the - * algorithm has to work causally. - */ - double blockTime; - - /** Spacing of audio frames in samples (see <code>hopTime</code>) */ - int hopSize; - - /** The size of an FFT frame in samples (see <code>fftTime</code>) */ - int fftSize; - /** Width of the search band in FFT frames (see <code>blockTime</code>) */ int blockSize; /** The number of frames of audio data which have been read. */ int frameCount; - /** RMS amplitude of the current frame. */ -// double frameRMS; - /** Long term average frame energy (in frequency domain * representation). */ double ltAverage; @@ -140,14 +175,6 @@ */ int runCount; - /** Interactive control of the matching process allows pausing - * computation of the cost matrices in one direction. - */ - bool paused; - - /** The total number of frames of audio data to be read. */ - int maxFrames; - /** A mapping function for mapping FFT bins to final frequency * bins. The mapping is linear (1-1) until the resolution * reaches 2 points per semitone, then logarithmic with a @@ -198,22 +225,11 @@ /** Width of distance and bestPathCost matrices and first and last vectors */ int distXSize; - /** Total number of audio frames, or -1 for live or compressed input. */ - long fileLength; - bool initialised; -//!!! bool atEnd; //!!! - /** Disable or enable debugging output */ static bool silent; - static const double decay; - static const double silenceThreshold; - static const int MAX_RUN_COUNT; - - friend class Finder; //!!! - public: /** Constructor for Matcher. * @@ -222,7 +238,7 @@ * between the two matchers (currently one possesses the distance * matrix and optimal path matrix). */ - Matcher(float rate, Matcher *p); + Matcher(Parameters parameters, Matcher *p); ~Matcher(); @@ -243,20 +259,10 @@ otherMatcher = p; } // setOtherMatcher() - int getFFTSize() { - return fftSize; - } - - int getHopSize() { - return hopSize; - } - int getFrameCount() { return frameCount; } - void setHopSize(int); - protected: template <typename T> void initVector(vector<T> &vec, int sz, T dflt = 0) { @@ -281,7 +287,7 @@ void init(); - void makeFreqMap(int fftSize, float sampleRate); + void makeFreqMap(); /** Creates a map of FFT frequency bins to comparison bins. Where * the spacing of FFT bins is less than 0.5 semitones, the @@ -290,9 +296,9 @@ * bins. No scaling is performed; that is the energy is summed * into the comparison bins. See also processFrame() */ - void makeStandardFrequencyMap(int fftSize, float sampleRate); + void makeStandardFrequencyMap(); - void makeChromaFrequencyMap(int fftSize, float sampleRate); + void makeChromaFrequencyMap(); /** Processes a frame of audio data by first computing the STFT * with a Hamming window, then mapping the frequency bins into a @@ -341,6 +347,7 @@ void setValue(int i, int j, int dir, int value, int dMN); friend class MatchFeeder; + friend class Finder; }; // class Matcher
--- a/test/regressiontest.sh Fri Oct 10 12:04:54 2014 +0100 +++ b/test/regressiontest.sh Fri Oct 10 12:55:05 2014 +0100 @@ -10,6 +10,6 @@ ~/Music/cc-kids-abrsm-dataset/Kids/Allegro\ in\ G.mp3 \ -w csv --csv-stdout 2>/dev/null | sed 's/^[^,]*,//' > /tmp/$$ || exit 1 -diff /tmp/$$ `dirname $0`/expected.csv && echo Passed +diff -u /tmp/$$ `dirname $0`/expected.csv && echo Passed rm /tmp/$$