Mercurial > hg > match-vamp
changeset 178:1440773da492 tuning-rescale
Merge from refactors branch
author | Chris Cannam |
---|---|
date | Mon, 16 Feb 2015 14:47:43 +0000 |
parents | 001db4c32eb0 (current diff) 937951e66c5b (diff) |
children | 9ab52cb6baa3 |
files | src/FeatureExtractor.cpp src/FeatureExtractor.h |
diffstat | 11 files changed, 320 insertions(+), 56 deletions(-) [+] |
line wrap: on
line diff
--- a/src/FeatureExtractor.cpp Thu Feb 05 16:26:41 2015 +0000 +++ b/src/FeatureExtractor.cpp Mon Feb 16 14:47:43 2015 +0000 @@ -24,7 +24,7 @@ using namespace std; -#define DEBUG_FEATURE_EXTRACTOR 1 +//#define DEBUG_FEATURE_EXTRACTOR 1 FeatureExtractor::FeatureExtractor(Parameters parameters) : m_params(parameters) @@ -95,21 +95,36 @@ int i = 0; while (i <= crossoverBin) { - m_freqMap[i] = i; - ++i; + double freq = i * binWidth; + if (freq < m_params.minFrequency || freq > m_params.maxFrequency) { + m_freqMap[i++] = -1; + } else { + m_freqMap[i] = i; + i++; + } } while (i <= m_params.fftSize/2) { - double midi = log(i * binWidth / refFreq) / log(2.0) * 12 + 69; - if (midi > 127) midi = 127; - int target = crossoverBin + lrint(midi) - crossoverMidi; - if (target >= m_featureSize) target = m_featureSize - 1; - m_freqMap[i++] = target; + double freq = i * binWidth; + if (freq < m_params.minFrequency || freq > m_params.maxFrequency) { + m_freqMap[i++] = -1; + } else { + double midi = log(freq / refFreq) / log(2.0) * 12 + 69; + if (midi > 127) midi = 127; + int target = crossoverBin + lrint(midi) - crossoverMidi; + if (target >= m_featureSize) target = m_featureSize - 1; + m_freqMap[i++] = target; + } } #ifdef DEBUG_FEATURE_EXTRACTOR cerr << "FeatureExtractor: crossover bin is " << crossoverBin << " for midi " << crossoverMidi << endl; + cerr << "FeatureExtractor: map is:" << endl; + for (i = 0; i <= m_params.fftSize/2; ++i) { + cerr << i << ": " << m_freqMap[i] << ", "; + } + cerr << endl; #endif } @@ -121,11 +136,21 @@ int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1)); int i = 0; while (i <= crossoverBin) { - m_freqMap[i++] = 0; + double freq = i * binWidth; + if (freq < m_params.minFrequency || freq > m_params.maxFrequency) { + m_freqMap[i++] = -1; + } else { + m_freqMap[i++] = 0; + } } while (i <= m_params.fftSize/2) { - double midi = log(i * binWidth / refFreq) / log(2.0) * 12 + 69; - m_freqMap[i++] = (lrint(midi)) % 12 + 1; + double freq = i * binWidth; + if (freq < m_params.minFrequency || freq > m_params.maxFrequency) { + m_freqMap[i++] = -1; + } else { + double midi = log(freq / refFreq) / log(2.0) * 12 + 69; + m_freqMap[i++] = (lrint(midi)) % 12 + 1; + } } } @@ -165,12 +190,18 @@ vector<double> scaled = scaleMags(mags); for (int i = 0; i <= m_params.fftSize/2; i++) { - frame[m_freqMap[i]] += scaled[i]; + int index = m_freqMap[i]; + if (index >= 0) { + frame[index] += scaled[i]; + } } } else { for (int i = 0; i <= m_params.fftSize/2; i++) { - frame[m_freqMap[i]] += mags[i]; + int index = m_freqMap[i]; + if (index >= 0) { + frame[index] += mags[i]; + } } }
--- a/src/FeatureExtractor.h Thu Feb 05 16:26:41 2015 +0000 +++ b/src/FeatureExtractor.h Mon Feb 16 14:47:43 2015 +0000 @@ -49,7 +49,9 @@ sampleRate(rate_), useChromaFrequencyMap(false), fftSize(fftSize_), - referenceFrequency(440.0) + referenceFrequency(440.0), + minFrequency(0.), + maxFrequency(rate_/2.) {} /** Sample rate of audio */ @@ -66,6 +68,12 @@ /** Frequency of concert A */ double referenceFrequency; + + /** Minimum frequency cutoff to include in feature */ + double minFrequency; + + /** Maximum frequency cutoff to include in feature */ + double maxFrequency; }; /** @@ -138,7 +146,12 @@ * linearly for bins 0-34 (0 to 732Hz), and logarithmically for * the remaining bins (midi notes 79 to 127, bins 35 to 83), * where all energy above note 127 is mapped into the final - * bin. */ + * bin. + * + * If a bin's frequency is outside the minFrequency->maxFrequency + * range, it will be mapped to a target bin of -1 and should be + * discarded. + */ std::vector<int> m_freqMap; std::vector<double> processMags(const std::vector<double> &mags);
--- a/src/Finder.cpp Thu Feb 05 16:26:41 2015 +0000 +++ b/src/Finder.cpp Mon Feb 16 14:47:43 2015 +0000 @@ -38,6 +38,13 @@ } void +Finder::setMatcher(Matcher *pm) +{ + cerr << "Finder::setMatcher: finder " << this << ", matcher " << pm << endl; + m_m = pm; +} + +void Finder::setDurations(int d1, int d2) { #ifdef DEBUG_FINDER @@ -47,13 +54,47 @@ m_duration2 = d2; } -Matcher::Advance -Finder::getExpandDirection(int row, int col) +bool +Finder::getBestRowCost(int row, int &bestCol, double &min) { - double min = m_m->getPathCost(row, col); + if (!m_m->isRowAvailable(row)) return false; + pair<int, int> colRange = m_m->getColRange(row); + if (colRange.first >= colRange.second) return false; + for (int index = colRange.first; index < colRange.second; index++) { + double tmp = m_m->getNormalisedPathCost(row, index); + if (index == colRange.first || tmp < min) { + min = tmp; + bestCol = index; + } + } + return true; +} + +bool +Finder::getBestColCost(int col, int &bestRow, double &min) +{ + if (!m_m->isColAvailable(col)) return false; + pair<int, int> rowRange = m_m->getRowRange(col); + if (rowRange.first >= rowRange.second) return false; + for (int index = rowRange.first; index < rowRange.second; index++) { + double tmp = m_m->getNormalisedPathCost(index, col); + if (index == rowRange.first || tmp < min) { + min = tmp; + bestRow = index; + } + } + return true; +} + +void +Finder::getBestEdgeCost(int row, int col, + int &bestRow, int &bestCol, + double &min) +{ + min = m_m->getPathCost(row, col); - int bestRow = row; - int bestCol = col; + bestRow = row; + bestCol = col; pair<int, int> rowRange = m_m->getRowRange(col); if (rowRange.second > row+1) { @@ -79,8 +120,39 @@ bestRow = row; } } +} -// cerr << "at [" << row << "," << col << "] (cost " << m_m->getPathCost(row, col) << ") blocksize = " << m_m->getBlockSize() << " best is [" << bestRow << "," << bestCol << "] (cost " << min << ")" << endl; +Matcher::Advance +Finder::getExpandDirection() +{ + return getExpandDirection(m_m->getFrameCount() - 1, + m_m->getOtherFrameCount() - 1); +} + +Matcher::Advance +Finder::getExpandDirection(int row, int col) +{ + // To determine which direction to expand the search area in, we + // look at the path costs along the leading edges of the search + // area (the final row and column within the area). We find the + // lowest path cost within the final row, and the lowest within + // the final column, and we compare them. If the row is cheaper + // then we expand by adding another row next to it; if the column + // is cheaper then we expand by adding another column next to + // it. (The overall lowest path cost across the row and column + // represents the best alignment we have within the entire search + // area given the data available and the assumption that the piece + // is not ending yet.) + + int bestRow = row; + int bestCol = col; + double bestCost = -1; + +// cerr << "Finder " << this << "::getExpandDirection: "; + + getBestEdgeCost(row, col, bestRow, bestCol, bestCost); + +// cerr << "at [" << row << "," << col << "] (cost " << m_m->getPathCost(row, col) << ") blocksize = " << m_m->getBlockSize() << " best is [" << bestRow << "," << bestCol << "] (cost " << bestCost << ")" << endl; if (bestRow == row) { if (bestCol == col) {
--- a/src/Finder.h Thu Feb 05 16:26:41 2015 +0000 +++ b/src/Finder.h Mon Feb 16 14:47:43 2015 +0000 @@ -27,8 +27,12 @@ public: Finder(Matcher *pm); + // default copy ctor and operator= are fine + ~Finder(); + void setMatcher(Matcher *pm); + /** * Tell the finder that one or both files ends sooner than it * thought, i.e. that some of the trailing features are silence or @@ -37,7 +41,44 @@ * duration of each input will be considered. */ void setDurations(int d1, int d2); + + /** + * Find the location and cost of the column with the cheapest path + * cost within the given row. If the row is out of range, return + * false and leave the bestCol and bestCost variables unchanged. + */ + bool getBestRowCost(int row, int &bestCol, double &bestCost); + + /** + * Find the location and cost of the row with the cheapest path + * cost within the given column. If the column is out of range, + * return false and leave the bestRow and bestCost variables + * unchanged. + */ + bool getBestColCost(int col, int &bestRow, double &bestCost); + /** + * Find the location and cost of the cheapest path cost within the + * final row and column of the search area, given that the area + * extends as far as the point at (row, col). This is used by + * getExpandDirection and can also be used, for example, to + * determine the current best estimate alignment for a frame we + * have just reached. + */ + void getBestEdgeCost(int row, int col, + int &bestRow, int &bestCol, + double &bestCost); + + /** + * Calculate which direction to expand the search area in, given + * its current extents. + */ + Matcher::Advance getExpandDirection(); + + /** + * Calculate which direction to expand the search area in, given + * that so far it extends as far as the point at (row, col). + */ Matcher::Advance getExpandDirection(int row, int col); /** Calculates a rectangle of the path cost matrix so that the @@ -89,10 +130,11 @@ ErrorPosition checkPathCostMatrix(); void checkAndReport(); #endif + + Matcher *m_m; // I do not own this - Matcher *m_m; int m_duration1; int m_duration2; -}; // class Finder +}; #endif
--- a/src/MatchFeatureFeeder.cpp Thu Feb 05 16:26:41 2015 +0000 +++ b/src/MatchFeatureFeeder.cpp Mon Feb 16 14:47:43 2015 +0000 @@ -19,14 +19,14 @@ using std::vector; MatchFeatureFeeder::MatchFeatureFeeder(Matcher *m1, Matcher *m2) : - m_pm1(m1), m_pm2(m2) + m_pm1(m1), + m_pm2(m2), + m_finder(m_pm1) { - m_finder = new Finder(m1); } MatchFeatureFeeder::~MatchFeatureFeeder() { - delete m_finder; } void @@ -51,6 +51,21 @@ } } +int +MatchFeatureFeeder::getEstimatedReferenceFrame() +{ + if (m_pm1->getFrameCount() == 0 || m_pm2->getFrameCount() == 0) { + return 0; + } + int bestRow = 0; + double bestCost = 0; + if (!m_finder.getBestColCost(m_pm2->getFrameCount()-1, bestRow, bestCost)) { + return -1; + } else { + return bestRow; + } +} + void MatchFeatureFeeder::finish() { @@ -66,7 +81,7 @@ feed2(); } else if (m_q2.empty()) { // ended feed1(); - } else if (m_pm1->getFrameCount() < m_pm1->getBlockSize()) { // fill initial block + } else if (m_pm1->isFillingInitialBlock()) { feed1(); feed2(); } else if (m_pm1->isOverrunning()) { // slope constraints @@ -74,8 +89,7 @@ } else if (m_pm2->isOverrunning()) { feed1(); } else { - switch (m_finder->getExpandDirection - (m_pm1->getFrameCount()-1, m_pm2->getFrameCount()-1)) { + switch (m_finder.getExpandDirection()) { case Matcher::AdvanceThis: feed1(); break;
--- a/src/MatchFeatureFeeder.h Thu Feb 05 16:26:41 2015 +0000 +++ b/src/MatchFeatureFeeder.h Mon Feb 16 14:47:43 2015 +0000 @@ -44,6 +44,12 @@ void feed(std::vector<double> f1, std::vector<double> f2); /** + * Get the best estimate for the frame in the reference (f1) + * corresponding to the latest frame in the other input (f2). + */ + int getEstimatedReferenceFrame(); + + /** * Indicate that both inputs have come to an end. */ void finish(); @@ -60,22 +66,27 @@ pathy = m_fpy; } - Finder *getFinder() { return m_finder; } + Finder *getFinder() { return &m_finder; } protected: void feedBlock(); void feed1(); void feed2(); - Finder *m_finder; - Matcher *m_pm1; - Matcher *m_pm2; + Matcher *m_pm1; // I do not own this + Matcher *m_pm2; // I do not own this + Finder m_finder; // I own this, and it refers to m_pm1 + std::queue<std::vector<double> > m_q1; std::queue<std::vector<double> > m_q2; vector<int> m_fpx; vector<int> m_fpy; + + // not provided: + MatchFeatureFeeder(const MatchFeatureFeeder &other); + MatchFeatureFeeder &operator=(const MatchFeatureFeeder &other); }; #endif
--- a/src/MatchPipeline.cpp Thu Feb 05 16:26:41 2015 +0000 +++ b/src/MatchPipeline.cpp Mon Feb 16 14:47:43 2015 +0000 @@ -140,21 +140,23 @@ MatchPipeline::finish() { m_feeder.finish(); - getFinder()->setDurations(m_lastFrameIn1, m_lastFrameIn2); + m_feeder.getFinder()->setDurations(m_lastFrameIn1, m_lastFrameIn2); } -MatchFeatureFeeder * -MatchPipeline::getFeeder() +int +MatchPipeline::retrievePath(bool smooth, std::vector<int> &pathx, std::vector<int> &pathy) { - return &m_feeder; + return m_feeder.getFinder()->retrievePath(smooth, pathx, pathy); } -Finder * -MatchPipeline::getFinder() -{ - return m_feeder.getFinder(); +void +MatchPipeline::retrieveForwardPath(std::vector<int> &pathx, std::vector<int> &pathy) { + return m_feeder.retrieveForwardPath(pathx, pathy); } +double +MatchPipeline::getOverallCost() +{ + return m_feeder.getFinder()->getOverallCost(); +} - -
--- a/src/MatchPipeline.h Thu Feb 05 16:26:41 2015 +0000 +++ b/src/MatchPipeline.h Mon Feb 16 14:47:43 2015 +0000 @@ -96,9 +96,29 @@ */ void finish(); - MatchFeatureFeeder *getFeeder(); - Finder *getFinder(); - + /** + * Retrieve the final path. Only valid once all the features have + * been supplied and finish() has been called. + * + * See Finder::retrievePath for more details. + */ + int retrievePath(bool smooth, std::vector<int> &pathx, std::vector<int> &pathy); + + /** + * Retrieve the forward path resulting from the online search. + * + * See MatchFeatureFeeder::retrieveForwardPath for more details. + */ + void retrieveForwardPath(std::vector<int> &pathx, std::vector<int> &pathy); + + /** + * Get the path cost for the overall path to the end of both + * sources. + * + * See Finder::getOverallCost for more details. + */ + double getOverallCost(); + private: FeatureExtractor m_fe1; FeatureExtractor m_fe2;
--- a/src/MatchVampPlugin.cpp Thu Feb 05 16:26:41 2015 +0000 +++ b/src/MatchVampPlugin.cpp Mon Feb 16 14:47:43 2015 +0000 @@ -167,6 +167,26 @@ desc.unit = "Hz"; list.push_back(desc); + desc.identifier = "minfreq"; + desc.name = "Minimum frequency"; + desc.description = "Minimum frequency to include in features."; + desc.minValue = 0.0; + desc.maxValue = (float)m_inputSampleRate / 4.f; + desc.defaultValue = (float)m_defaultFeParams.minFrequency; + desc.isQuantized = false; + desc.unit = "Hz"; + list.push_back(desc); + + desc.identifier = "maxfreq"; + desc.name = "Maximum frequency"; + desc.description = "Maximum frequency to include in features."; + desc.minValue = 1000.0; + desc.maxValue = (float)m_inputSampleRate / 2.f; + desc.defaultValue = (float)m_defaultFeParams.maxFrequency; + desc.isQuantized = false; + desc.unit = "Hz"; + list.push_back(desc); + desc.unit = ""; desc.identifier = "usechroma"; @@ -208,6 +228,7 @@ desc.valueNames.push_back("Long-term average"); list.push_back(desc); desc.valueNames.clear(); + desc.defaultValue = (float)m_defaultFcParams.silenceThreshold; desc.identifier = "metric"; desc.name = "Distance metric"; @@ -344,6 +365,10 @@ return (float)m_feParams.referenceFrequency; } else if (name == "freq2") { return (float)m_secondReferenceFrequency; + } else if (name == "minfreq") { + return (float)m_feParams.minFrequency; + } else if (name == "maxfreq") { + return (float)m_feParams.maxFrequency; } return 0.0; @@ -380,6 +405,10 @@ m_feParams.referenceFrequency = value; } else if (name == "freq2") { m_secondReferenceFrequency = value; + } else if (name == "minfreq") { + m_feParams.minFrequency = value; + } else if (name == "maxfreq") { + m_feParams.maxFrequency = value; } } @@ -401,11 +430,8 @@ m_params.hopTime = m_stepTime; m_feParams.fftSize = m_blockSize; - cerr << "creating pipeline with m_secondReferenceFrequency = " - << m_secondReferenceFrequency << endl; m_pipeline = new MatchPipeline(m_feParams, m_fcParams, m_dParams, m_params, m_secondReferenceFrequency); - cerr << "done" << endl; } bool @@ -659,12 +685,11 @@ FeatureSet returnFeatures; - Finder *finder = m_pipeline->getFinder(); std::vector<int> pathx; std::vector<int> pathy; - int len = finder->retrievePath(m_smooth, pathx, pathy); + int len = m_pipeline->retrievePath(m_smooth, pathx, pathy); - double cost = finder->getOverallCost(); + double cost = m_pipeline->getOverallCost(); Feature costFeature; costFeature.hasTimestamp = false; costFeature.values.push_back((float)cost);
--- a/src/Matcher.cpp Thu Feb 05 16:26:41 2015 +0000 +++ b/src/Matcher.cpp Mon Feb 16 14:47:43 2015 +0000 @@ -76,6 +76,26 @@ } bool +Matcher::isRowAvailable(int i) +{ + if (i < 0 || i >= int(m_first.size())) return false; + + for (int j = m_first[i]; j < int(m_first[i] + m_bestPathCost[i].size()); ++j) { + if (isAvailable(i, j)) { + return true; + } + } + + return false; +} + +bool +Matcher::isColAvailable(int i) +{ + return m_otherMatcher->isRowAvailable(i); +} + +bool Matcher::isInRange(int i, int j) { if (m_firstPM) {
--- a/src/Matcher.h Thu Feb 05 16:26:41 2015 +0000 +++ b/src/Matcher.h Mon Feb 16 14:47:43 2015 +0000 @@ -123,6 +123,10 @@ return m_blockSize; } + bool isFillingInitialBlock() { + return m_frameCount < m_blockSize; + } + bool isOverrunning() { return m_runCount >= m_params.maxRunCount; } @@ -158,6 +162,14 @@ * @return true if the location is in range */ bool isInRange(int i, int j); + + /** Tests whether any locations in the given row are available. + */ + bool isRowAvailable(int i); + + /** Tests whether any locations in the given column are available. + */ + bool isColAvailable(int i); /** Tests whether a location is available in the minimum cost matrix. * @@ -167,8 +179,9 @@ */ bool isAvailable(int i, int j); - /** Returns the valid range of frames in the other Matcher for the - * given frame in this Matcher's minimum cost matrix. + /** Returns the valid range of columns for the given row, that is, + * the range of frames in the other Matcher for the given frame + * in this Matcher's minimum cost matrix. * * @param i the frame number of this Matcher * @return the first, last pair of frame numbers for the other @@ -177,8 +190,9 @@ */ std::pair<int, int> getColRange(int i); - /** Returns the valid range of frames in this Matcher for the - * given frame in the other Matcher's minimum cost matrix. + /** Returns the valid range of rows for the given column, that is, + * the range of frames in this Matcher for the given frame in the + * other Matcher's minimum cost matrix. * * @param i the frame number of the other Matcher * @return the first, last pair of frame numbers for this