Mercurial > hg > match-vamp
changeset 21:b15106b0abcd
Separate out feature extraction from distance calculations
author | Chris Cannam |
---|---|
date | Fri, 10 Oct 2014 15:58:19 +0100 |
parents | 47f98349aa17 |
children | ac39717fc88d |
files | MatchFeeder.cpp Matcher.cpp Matcher.h |
diffstat | 3 files changed, 79 insertions(+), 57 deletions(-) [+] |
line wrap: on
line diff
--- a/MatchFeeder.cpp Fri Oct 10 14:45:15 2014 +0100 +++ b/MatchFeeder.cpp Fri Oct 10 15:58:19 2014 +0100 @@ -149,7 +149,7 @@ imBuffer[i] = block[i*2+1]; } delete[] block; - return pm1->processFrame(reBuffer, imBuffer); + return pm1->consumeFrame(reBuffer, imBuffer); } vector<double> @@ -165,6 +165,6 @@ imBuffer[i] = block[i*2+1]; } delete[] block; - return pm2->processFrame(reBuffer, imBuffer); + return pm2->consumeFrame(reBuffer, imBuffer); }
--- a/Matcher.cpp Fri Oct 10 14:45:15 2014 +0100 +++ b/Matcher.cpp Fri Oct 10 15:58:19 2014 +0100 @@ -196,10 +196,27 @@ } // makeChromaFrequencyMap() vector<double> -Matcher::processFrame(double *reBuffer, double *imBuffer) +Matcher::consumeFrame(double *reBuffer, double *imBuffer) { if (!initialised) init(); + vector<double> processedFrame = + processFrameFromFreqData(reBuffer, imBuffer); + + calcAdvance(); + + if ((frameCount % 100) == 0) { + if (!silent) { + cerr << "Progress:" << frameCount << " " << ltAverage << endl; + } + } + + return processedFrame; +} + +vector<double> +Matcher::processFrameFromFreqData(double *reBuffer, double *imBuffer) +{ for (int i = 0; i < (int)newFrame.size(); ++i) { newFrame[i] = 0; } @@ -214,6 +231,58 @@ int frameIndex = frameCount % blockSize; + vector<double> processedFrame(freqMapSize, 0.0); + + double totalEnergy = 0; + if (params.useSpectralDifference) { + for (int i = 0; i < freqMapSize; i++) { + totalEnergy += newFrame[i]; + if (newFrame[i] > prevFrame[i]) { + processedFrame[i] = newFrame[i] - prevFrame[i]; + } else { + processedFrame[i] = 0; + } + } + } else { + for (int i = 0; i < freqMapSize; i++) { + processedFrame[i] = newFrame[i]; + totalEnergy += processedFrame[i]; + } + } + totalEnergies[frameIndex] = totalEnergy; + + double decay = frameCount >= 200 ? 0.99: + (frameCount < 100? 0: (frameCount - 100) / 100.0); + + if (ltAverage == 0) + ltAverage = totalEnergy; + else + ltAverage = ltAverage * decay + totalEnergy * (1.0 - decay); + + if (rms <= params.silenceThreshold) + for (int i = 0; i < freqMapSize; i++) + processedFrame[i] = 0; + else if (params.frameNorm == NormaliseFrameToSum1) + for (int i = 0; i < freqMapSize; i++) + processedFrame[i] /= totalEnergy; + else if (params.frameNorm == NormaliseFrameToLTAverage) + for (int i = 0; i < freqMapSize; i++) + processedFrame[i] /= ltAverage; + + vector<double> tmp = prevFrame; + prevFrame = newFrame; + newFrame = tmp; + + frames[frameIndex] = processedFrame; + + return processedFrame; +} + +void +Matcher::calcAdvance() +{ + int frameIndex = frameCount % blockSize; + if (frameCount >= distXSize) { // std::cerr << "Resizing " << distXSize << " -> " << distXSize * 2 << std::endl; distXSize *= 2; @@ -264,44 +333,6 @@ distYSizes[frameCount - blockSize] = len; } - double totalEnergy = 0; - if (params.useSpectralDifference) { - for (int i = 0; i < freqMapSize; i++) { - totalEnergy += newFrame[i]; - if (newFrame[i] > prevFrame[i]) { - frames[frameIndex][i] = newFrame[i] - prevFrame[i]; - } else { - frames[frameIndex][i] = 0; - } - } - } else { - for (int i = 0; i < freqMapSize; i++) { - frames[frameIndex][i] = newFrame[i]; - totalEnergy += frames[frameIndex][i]; - } - } - totalEnergies[frameIndex] = totalEnergy; - - double decay = frameCount >= 200 ? 0.99: - (frameCount < 100? 0: (frameCount - 100) / 100.0); - - if (ltAverage == 0) - ltAverage = totalEnergy; - else - ltAverage = ltAverage * decay + totalEnergy * (1.0 - decay); - - if (rms <= params.silenceThreshold) - for (int i = 0; i < freqMapSize; i++) - frames[frameIndex][i] = 0; - else if (params.frameNorm == NormaliseFrameToSum1) - for (int i = 0; i < freqMapSize; i++) - frames[frameIndex][i] /= totalEnergy; - else if (params.frameNorm == NormaliseFrameToLTAverage) - for (int i = 0; i < freqMapSize; i++) - frames[frameIndex][i] /= ltAverage; - - vector<double> processedFrame = frames[frameIndex]; - int stop = otherMatcher->frameCount; int index = stop - blockSize; if (index < 0) @@ -368,10 +399,6 @@ otherMatcher->last[index]++; } // loop for row (resp. column) - vector<double> tmp = prevFrame; - prevFrame = newFrame; - newFrame = tmp; - frameCount++; runCount++; @@ -380,18 +407,10 @@ if (overflow && !silent) cerr << "WARNING: overflow in distance metric: " << "frame " << frameCount << ", val = " << mx << endl; - + if (!silent) std::cerr << "Frame " << frameCount << ", d = " << (mx-mn) << std::endl; - - if ((frameCount % 100) == 0) { - if (!silent) { - cerr << "Progress:" << frameCount << " " << ltAverage << endl; - } - } - - return processedFrame; -} // processFrame() +} int Matcher::calcDistance(const vector<double> &f1, const vector<double> &f2)
--- a/Matcher.h Fri Oct 10 14:45:15 2014 +0100 +++ b/Matcher.h Fri Oct 10 15:58:19 2014 +0100 @@ -297,7 +297,7 @@ * mapping is one to one. Where the spacing is greater than 0.5 * semitones, the FFT energy is mapped into semitone-wide * bins. No scaling is performed; that is the energy is summed - * into the comparison bins. See also processFrame() + * into the comparison bins. See also consumeFrame() */ void makeStandardFrequencyMap(); @@ -316,7 +316,7 @@ * Return value is the frame (post-processed, with warping, * rectification, and normalisation as appropriate). */ - std::vector<double> processFrame(double *reBuffer, double *imBuffer); + std::vector<double> consumeFrame(double *reBuffer, double *imBuffer); /** Calculates the Manhattan distance between two vectors, with an * optional normalisation by the combined values in the @@ -349,6 +349,9 @@ */ void setValue(int i, int j, int dir, int value, int dMN); + vector<double> processFrameFromFreqData(double *, double *); + void calcAdvance(); + friend class MatchFeeder; friend class Finder;