Mercurial > hg > match-vamp
changeset 35:16870e8770ae refactors
Move source to src
author | Chris Cannam |
---|---|
date | Thu, 13 Nov 2014 09:50:14 +0000 |
parents | 930cf86113b7 |
children | a43b53d32735 91410483228b |
files | DistanceMetric.cpp DistanceMetric.h Finder.cpp Finder.h Makefile.inc MatchFeatureFeeder.cpp MatchFeatureFeeder.h MatchFeeder.cpp MatchFeeder.h MatchVampPlugin.cpp MatchVampPlugin.h Matcher.cpp Matcher.h Path.cpp Path.h src/DistanceMetric.cpp src/DistanceMetric.h src/Finder.cpp src/Finder.h src/MatchFeatureFeeder.cpp src/MatchFeatureFeeder.h src/MatchFeeder.cpp src/MatchFeeder.h src/MatchVampPlugin.cpp src/MatchVampPlugin.h src/Matcher.cpp src/Matcher.h src/Path.cpp src/Path.h |
diffstat | 29 files changed, 2690 insertions(+), 2689 deletions(-) [+] |
line wrap: on
line diff
--- a/DistanceMetric.cpp Thu Nov 13 09:50:09 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,67 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - Vamp feature extraction plugin using the MATCH audio alignment - algorithm. - - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#include "DistanceMetric.h" - -#include <cassert> -#include <cmath> - -using std::vector; - -double -DistanceMetric::calcDistance(const vector<double> &f1, - const vector<double> &f2) -{ - double d = 0; - double sum = 0; - - int featureSize = f1.size(); - assert(int(f2.size()) == featureSize); - - for (int i = 0; i < featureSize; i++) { - d += fabs(f1[i] - f2[i]); - sum += f1[i] + f2[i]; - } - - if (sum == 0) - return 0; - if (m_norm == NormaliseDistanceToSum) - return d / sum; // 0 <= d/sum <= 2 - if (m_norm != NormaliseDistanceToLogSum) - return d; - - // note if this were to be restored, it would have to use - // totalEnergies vector instead of f1[freqMapSize] which used to - // store the total energy: - // double weight = (5 + Math.log(f1[freqMapSize] + f2[freqMapSize]))/10.0; - - double weight = (8 + log(sum)) / 10.0; - - if (weight < 0) weight = 0; - else if (weight > 1) weight = 1; - - return d / sum * weight; -} - -int -DistanceMetric::calcDistanceScaled(const vector<double> &f1, - const vector<double> &f2, - double scale) -{ - double distance = calcDistance(f1, f2); - return int(distance * scale); -} -
--- a/DistanceMetric.h Thu Nov 13 09:50:09 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,73 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - Vamp feature extraction plugin using the MATCH audio alignment - algorithm. - - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#ifndef DISTANCE_METRIC_H -#define DISTANCE_METRIC_H - -#include <vector> - -class DistanceMetric -{ -public: - enum DistanceNormalisation { - - /** Do not normalise distance metrics */ - NoDistanceNormalisation, - - /** Normalise distance metric for pairs of frames by the sum - * of the two frames. */ - NormaliseDistanceToSum, - - /** Normalise distance metric for pairs of frames by the log - * of the sum of the frames. */ - NormaliseDistanceToLogSum, - }; - - DistanceMetric(DistanceNormalisation norm) : m_norm(norm) { } - - /** Calculates the Manhattan distance between two vectors, with an - * optional normalisation by the combined values in the - * vectors. Since the vectors contain energy, this could be - * considered as a squared Euclidean distance metric. Note that - * normalisation assumes the values are all non-negative. - * - * @param f1 one of the vectors involved in the distance calculation - * @param f2 one of the vectors involved in the distance calculation - * @return the distance - */ - double calcDistance(const std::vector<double> &f1, - const std::vector<double> &f2); - - /** Calculates the Manhattan distance between two vectors, with an - * optional normalisation by the combined values in the - * vectors. Since the vectors contain energy, this could be - * considered as a squared Euclidean distance metric. Note that - * normalisation assumes the values are all non-negative. - * - * @param f1 one of the vectors involved in the distance calculation - * @param f2 one of the vectors involved in the distance calculation - * @param scale the scaling factor to place the result in integer range - * @return the distance, scaled by scale and truncated to an integer - */ - int calcDistanceScaled(const std::vector<double> &f1, - const std::vector<double> &f2, - double scale); - -private: - DistanceNormalisation m_norm; -}; - -#endif
--- a/Finder.cpp Thu Nov 13 09:50:09 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,301 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - Vamp feature extraction plugin using the MATCH audio alignment - algorithm. - - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#include "Finder.h" - -#include "Path.h" - -#include <algorithm> - - -Finder::Finder(Matcher *p1, Matcher *p2) -{ - if (!p1->firstPM) - std::cerr << "Warning: wrong args in Finder()" << std::endl; - pm1 = p1; - pm2 = p2; - index1 = 0; - index2 = 0; - rowRange = new int[2]; - colRange = new int[2]; -} // constructor - -Finder::~Finder() -{ - delete[] rowRange; - delete[] colRange; -} - -bool -Finder::find(int i1, int i2) -{ - if (i1 >= 0) { - index1 = i1; - index2 = i2 - pm1->first[i1]; - } - return (i1 >= 0) && (i2 >= pm1->first[i1]) && (i2 < pm1->last[i1]); -} // find() - -void -Finder::getColRange(int row, int *range) -{ - range[0] = pm1->first[row]; - range[1] = pm1->last[row]; -} // getColRange() - -void -Finder::getRowRange(int col, int *range) -{ - range[0] = pm2->first[col]; - range[1] = pm2->last[col]; -} // getRowRange() - -int -Finder::getExpandDirection(int row, int col) -{ - return getExpandDirection(row, col, false); -} // getExpandDirection() - -int -Finder::getExpandDirection(int row, int col, bool check) -{ - int min = getPathCost(row, col); - bestRow = row; - bestCol = col; - getRowRange(col, rowRange); - if (rowRange[1] > row+1) - rowRange[1] = row+1; // don't cheat by looking at future :) - for (int index = rowRange[0]; index < rowRange[1]; index++) { - int tmp = getPathCost(index, col); - if (tmp < min) { - min = tmp; - bestRow = index; - } - } - getColRange(row, colRange); - if (colRange[1] > col+1) - colRange[1] = col+1; // don't cheat by looking at future :) - for (int index = colRange[0]; index < colRange[1]; index++) { - int tmp = getPathCost(row, index); - if (tmp < min) { - min = tmp; - bestCol = index; - bestRow = row; - } - } - // System.err.print(" BEST: " + bestRow + " " + bestCol + " " + check); - // System.err.println(" " + pm1->frameCount + " " + pm2->frameCount); - if (check) { - // System.err.println(find(row+1, col) + " " + find(row, col+1)); - if (!find(row, col+1)) - return ADVANCE_THIS; - if (!find(row+1, col)) - return ADVANCE_OTHER; - } - return ((bestRow==row)? ADVANCE_THIS: 0) | - ((bestCol==col)? ADVANCE_OTHER: 0); - -} // getExpandDirection() - -unsigned char -Finder::getDistance(int row, int col) -{ - if (find(row, col)) { - return pm1->distance[row][col - pm1->first[row]]; - } - std::cerr << "getDistance(" << row << "," << col << "): out of bounds" << std::endl; - throw "getDistance index out of bounds"; -} // getDistance()/2 - -void -Finder::setDistance(int row, int col, unsigned char b) -{ - if (find(row, col)) { - pm1->distance[row][col - pm1->first[row]] = b; - return; - } - std::cerr << "setDistance(" << row << "," << col << "," << b << "): out of bounds" << std::endl; - throw "setDistance index out of bounds"; -} // setDistance() - -int -Finder::getPathCost(int row, int col) -{ - if (find(row, col)) // "1" avoids div by 0 below - return pm1->bestPathCost[row][col - pm1->first[row]]*100/ (1+row+col); - std::cerr << "getPathCost(" << row << "," << col << "): out of bounds" << std::endl; - throw "getPathCost index out of bounds"; -} // getPathCost() - -int -Finder::getRawPathCost(int row, int col) -{ - if (find(row, col)) - return pm1->bestPathCost[row][col - pm1->first[row]]; - std::cerr << "getRawPathCost(" << row << "," << col << "): out of bounds" << std::endl; - throw "getRawPathCost index out of bounds"; -} // getRawPathCost() - -void -Finder::setPathCost(int row, int col, int i) -{ - if (find(row, col)) { - pm1->bestPathCost[row][col - pm1->first[row]] = i; - return; - } - std::cerr << "setPathCost(" << row << "," << col << "," << i << "): out of bounds" << std::endl; - throw "setPathCost index out of bounds"; -} // setPathCost() - -unsigned char -Finder::getDistance() -{ - return pm1->distance[index1][index2]; -} // getDistance()/0 - -void -Finder::setDistance(int b) -{ - pm1->distance[index1][index2] = (unsigned char)b; -} // setDistance() - -int -Finder::getPathCost() -{ - return pm1->bestPathCost[index1][index2]; -} // getPathCost() - -void -Finder::setPathCost(int i) -{ - pm1->bestPathCost[index1][index2] = i; -} // setPathCost() - -void -Finder::recalculatePathCostMatrix(int r1, int c1, int r2, int c2) -{ - if (!find(r1,c1)) { - std::cerr << "recalculatePathCostMatrix(" << r1 << "," << c1 << "): out of bounds" << std::endl; - throw "recalculatePathCostMatrix index out of bounds"; - } - int thisRowStart, c; - int prevRowStart = 0, prevRowStop = 0; - for (int r = r1; r <= r2; r++) { - thisRowStart = pm1->first[r]; - if (thisRowStart < c1) - thisRowStart = c1; - for (c = thisRowStart; c <= c2; c++) { - if (find(r,c)) { - int i2 = index2; - int newCost = pm1->distance[r][i2]; - int dir = 0; - if (r > r1) { // not first row - int min = -1; - if ((c > prevRowStart) && (c <= prevRowStop)) { - // diagonal from (r-1,c-1) - min = pm1->bestPathCost[r-1][c-pm1->first[r-1]-1] + - newCost * 2; - dir = ADVANCE_BOTH; - } - if ((c >= prevRowStart) && (c < prevRowStop)) { - // vertical from (r-1,c) - int cost = pm1->bestPathCost[r-1][c-pm1->first[r-1]] + - newCost; - if ((min == -1) || (cost < min)) { - min = cost; - dir = ADVANCE_THIS; - } - } - if (c > thisRowStart) { - // horizontal from (r,c-1) - int cost =pm1->bestPathCost[r][i2-1]+newCost; - if ((min == -1) || (cost < min)) { - min = cost; - dir = ADVANCE_OTHER; - } - } - pm1->bestPathCost[r][i2] = min; - } else if (c > thisRowStart) { // first row - // horizontal from (r,c-1) - pm1->bestPathCost[r][i2] = pm1->bestPathCost[r][i2-1] + - newCost; - dir = ADVANCE_OTHER; - } - if ((r != r1) || (c != c1)) { - pm1->distance[r][i2] = (unsigned char) - ((pm1->distance[r][i2] & MASK) | dir); - } - } else - break; // end of row - } - prevRowStart = thisRowStart; - prevRowStop = c; - } -} // recalculatePathCostMatrix() - -int -Finder::retrievePath(bool smooth, vector<int> &pathx, vector<int> &pathy) -{ - int x = pm2->getFrameCount() - 1; - int y = pm1->getFrameCount() - 1; - - pathx.clear(); - pathy.clear(); - - while (find(y, x) && ((x > 0) || (y > 0))) { - -// cerr << "x = " << x << ", y = " << y; - - pathx.push_back(x); - pathy.push_back(y); - - switch (getDistance() & ADVANCE_BOTH) { - case ADVANCE_THIS: -// cerr << ", going down (dist = " << (int)getDistance() << ")" << endl; - y--; - break; - case ADVANCE_OTHER: -// cerr << ", going left (dist = " << (int)getDistance() << ")" << endl; - x--; - break; - case ADVANCE_BOTH: -// cerr << ", going diag (dist = " << (int)getDistance() << ")" << endl; - x--; - y--; - break; - default: // this would indicate a bug, but we wouldn't want to hang -// cerr << "WARNING: Neither matcher advanced in path backtrack at (" << x << "," << y << ")" << endl; - if (x > y) { - x--; - } else { - y--; - } - break; - } - } - - std::reverse(pathx.begin(), pathx.end()); - std::reverse(pathy.begin(), pathy.end()); - - if (smooth) { - int smoothedLen = Path().smooth(pathx, pathy, pathx.size()); - return smoothedLen; - } else { - return pathx.size(); - } -} - -
--- a/Finder.h Thu Nov 13 09:50:09 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,103 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - Vamp feature extraction plugin using the MATCH audio alignment - algorithm. - - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#ifndef _FINDER_H_ -#define _FINDER_H_ - -#include <vector> -#include <iostream> - -#include "Matcher.h" - -/** Maps cost matrix coordinates into an efficient - * (linear instead of quadratic space) representation. - * Stores result of most recent mapping for fast - * sequential access. - */ -class Finder { - -protected: - Matcher *pm1, *pm2; - int index1, index2, bestRow, bestCol; - int *rowRange; - int *colRange; - -public: - Finder(Matcher *p1, Matcher *p2); - - ~Finder(); - - /** Sets up the instance variables to point to the given - * coordinate in the distance matrix. - * - * @param i1 frameNumber in the first Matcher - * @param i2 frameNumber in the second Matcher - * @return true iff the point (i2,i1) is represented in the distance matrix - */ - bool find(int i1, int i2); - - /** Returns the range [lo,hi) of legal column indices for the - * given row. */ - void getColRange(int row, int *range); - - /** Returns the range [lo,hi) of legal row indices for the given - * column. */ - void getRowRange(int col, int *range); - - int getExpandDirection(int row, int col); - int getExpandDirection(int row, int col, bool check); - - unsigned char getDistance(int row, int col); - void setDistance(int row, int col, unsigned char b); - - int getPathCost(int row, int col); - int getRawPathCost(int row, int col); - void setPathCost(int row, int col, int i); - - unsigned char getDistance(); - void setDistance(int b); - - int getPathCost(); - void setPathCost(int i); - - /** Calculates a rectangle of the path cost matrix so that the - * minimum cost path between the bottom left and top right - * corners can be computed. Caches previous values to avoid - * calling find() multiple times, and is several times faster as - * a result. - * - * @param r1 the bottom of the rectangle to be calculated - * @param c1 the left side of the rectangle to be calculated - * @param r2 the top of the rectangle to be calculated - * @param c2 the right side of the rectangle to be calculated - */ - void recalculatePathCostMatrix(int r1, int c1, int r2, int c2); - - /** - * Track back after all of the matchers have been fed in order to - * obtain the lowest cost path available. Path x and y coordinate - * pairs are returned in corresponding elements of pathx and - * pathy. Return value is the length of the returned path: only - * this many elements from pathx and pathy are valid (any - * subsequent ones may be spurious). - * - * @param smooth whether to smooth the path before returning it - */ - int retrievePath(bool smooth, std::vector<int> &pathx, std::vector<int> &pathy); - -}; // class Finder - -#endif
--- a/Makefile.inc Thu Nov 13 09:50:09 2014 +0000 +++ b/Makefile.inc Thu Nov 13 09:50:14 2014 +0000 @@ -4,8 +4,8 @@ CXX ?= g++ CC ?= gcc -HEADERS := DistanceMetric.h Finder.h Matcher.h MatchFeeder.h MatchFeatureFeeder.h MatchVampPlugin.h Path.h -SOURCES := DistanceMetric.cpp Finder.cpp Matcher.cpp MatchFeeder.cpp MatchFeatureFeeder.cpp MatchVampPlugin.cpp Path.cpp +HEADERS := $(wildcard src/*.h) +SOURCES := $(wildcard src/*.cpp) OBJECTS := $(SOURCES:.cpp=.o) @@ -20,20 +20,21 @@ depend: makedepend -Y -fMakefile.inc $(SOURCES) $(HEADERS) - # DO NOT DELETE -DistanceMetric.o: DistanceMetric.h -Finder.o: Finder.h Matcher.h DistanceMetric.h -Matcher.o: Matcher.h DistanceMetric.h -MatchFeeder.o: MatchFeeder.h Matcher.h DistanceMetric.h Finder.h -MatchFeatureFeeder.o: MatchFeatureFeeder.h Matcher.h DistanceMetric.h -MatchFeatureFeeder.o: Finder.h -MatchVampPlugin.o: MatchVampPlugin.h Matcher.h DistanceMetric.h MatchFeeder.h -MatchVampPlugin.o: Finder.h Path.h -Path.o: Path.h -Finder.o: Matcher.h DistanceMetric.h -Matcher.o: DistanceMetric.h -MatchFeeder.o: Matcher.h DistanceMetric.h Finder.h -MatchFeatureFeeder.o: Matcher.h DistanceMetric.h Finder.h -MatchVampPlugin.o: Matcher.h DistanceMetric.h +src/DistanceMetric.o: src/DistanceMetric.h +src/MatchFeeder.o: src/MatchFeeder.h src/Matcher.h src/DistanceMetric.h +src/MatchFeeder.o: src/Finder.h +src/Path.o: src/Path.h +src/MatchFeatureFeeder.o: src/MatchFeatureFeeder.h src/Matcher.h +src/MatchFeatureFeeder.o: src/DistanceMetric.h src/Finder.h +src/Finder.o: src/Finder.h src/Matcher.h src/DistanceMetric.h src/Path.h +src/Matcher.o: src/Matcher.h src/DistanceMetric.h +src/MatchVampPlugin.o: src/MatchVampPlugin.h src/Matcher.h +src/MatchVampPlugin.o: src/DistanceMetric.h src/MatchFeeder.h src/Finder.h +src/MatchVampPlugin.o: src/Path.h +src/MatchFeeder.o: src/Matcher.h src/DistanceMetric.h src/Finder.h +src/MatchFeatureFeeder.o: src/Matcher.h src/DistanceMetric.h src/Finder.h +src/Finder.o: src/Matcher.h src/DistanceMetric.h +src/Matcher.o: src/DistanceMetric.h +src/MatchVampPlugin.o: src/Matcher.h src/DistanceMetric.h
--- a/MatchFeatureFeeder.cpp Thu Nov 13 09:50:09 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,84 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - Vamp feature extraction plugin using the MATCH audio alignment - algorithm. - - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#include "MatchFeatureFeeder.h" - -using std::vector; - -MatchFeatureFeeder::MatchFeatureFeeder(Matcher *m1, Matcher *m2) : - pm1(m1), pm2(m2) -{ - finder = new Finder(m1, m2); -} - -MatchFeatureFeeder::~MatchFeatureFeeder() -{ - delete finder; -} - -void -MatchFeatureFeeder::feed(vector<double> f1, vector<double> f2) -{ - q1.push(f1); - q2.push(f2); - - while (!q1.empty() && !q2.empty()) { - feedBlock(); - } -} - -void -MatchFeatureFeeder::feedBlock() -{ - if (pm1->frameCount < pm1->blockSize) { // fill initial block - feed1(); - feed2(); - } - else if (pm1->runCount >= pm1->params.maxRunCount) { // slope constraints - feed2(); - } else if (pm2->runCount >= pm2->params.maxRunCount) { - feed1(); - } else { - switch (finder->getExpandDirection - (pm1->frameCount-1, pm2->frameCount-1)) { - case ADVANCE_THIS: - feed1(); - break; - case ADVANCE_OTHER: - feed2(); - break; - case ADVANCE_BOTH: - feed1(); - feed2(); - break; - } - } -} - -void -MatchFeatureFeeder::feed1() -{ - pm1->consumeFeatureVector(q1.front()); - q1.pop(); -} - -void -MatchFeatureFeeder::feed2() -{ - pm2->consumeFeatureVector(q2.front()); - q2.pop(); -} -
--- a/MatchFeatureFeeder.h Thu Nov 13 09:50:09 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,54 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ -/* - Vamp feature extraction plugin using the MATCH audio alignment - algorithm. - - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#ifndef _MATCH_FEATURE_FEEDER_H_ -#define _MATCH_FEATURE_FEEDER_H_ - -#include "Matcher.h" -#include "Finder.h" - -#include <queue> -#include <vector> - -class MatchFeatureFeeder -{ -public: - MatchFeatureFeeder(Matcher *m1, Matcher *m2); - ~MatchFeatureFeeder(); - - /** - * Feed the two supplied feature vectors to feeders 1 and 2 - * respectively (depending on their advance status). Matchers must - * have been constructed using the external featureSize - * constructor. - */ - void feed(std::vector<double> f1, std::vector<double> f2); - - Finder *getFinder() { return finder; } - -protected: - void feedBlock(); - void feed1(); - void feed2(); - - Finder *finder; - Matcher *pm1; - Matcher *pm2; - - std::queue<std::vector<double> > q1; - std::queue<std::vector<double> > q2; -}; - -#endif
--- a/MatchFeeder.cpp Thu Nov 13 09:50:09 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,170 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - Vamp feature extraction plugin using the MATCH audio alignment - algorithm. - - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#include "MatchFeeder.h" - -using std::vector; - -MatchFeeder::MatchFeeder(Matcher *m1, Matcher *m2) : - pm1(m1), pm2(m2) -{ - fftSize = m1->params.fftSize; - finder = new Finder(m1, m2); - reBuffer = new double[fftSize/2+1]; - imBuffer = new double[fftSize/2+1]; -} - -MatchFeeder::~MatchFeeder() -{ - delete[] imBuffer; - delete[] reBuffer; - while (!q1.empty()) { - delete[] q1.front(); - q1.pop(); - } - while (!q2.empty()) { - delete[] q2.front(); - q2.pop(); - } - delete finder; -} - -void -MatchFeeder::feed(const float *const *input) -{ - // We maintain two FIFO queues of audio data frame block pointers, - // one per input stream. When the match-feeder function is - // entered, it knows that it has at least one block in each queue. - // It loops, processing up to one block per matcher, until a queue - // is empty. Then it returns, to be called again with more data. - - prepare(input); - - while (!q1.empty() && !q2.empty()) { -// std::cerr << "MatchFeeder::feed: q1 " << q1.size() << " q2 " << q2.size() << std::endl; - (void)feedBlock(); - } -} - -MatchFeeder::Features -MatchFeeder::feedAndGetFeatures(const float *const *input) -{ - prepare(input); - - Features all; - - while (!q1.empty() && !q2.empty()) { - Features ff = feedBlock(); - all.f1.insert(all.f1.end(), ff.f1.begin(), ff.f1.end()); - all.f2.insert(all.f2.end(), ff.f2.begin(), ff.f2.end()); - } - - return all; -} - -void -MatchFeeder::prepare(const float *const *input) -{ - float *block = new float[fftSize+2]; - for (size_t i = 0; i < fftSize+2; ++i) { - block[i] = input[0][i]; - } - q1.push(block); - - block = new float[fftSize+2]; - for (size_t i = 0; i < fftSize+2; ++i) { - block[i] = input[1][i]; - } - q2.push(block); -} - -MatchFeeder::Features -MatchFeeder::feedBlock() -{ - Features ff; - vector<double> f1, f2; - - if (pm1->frameCount < pm1->blockSize) { // fill initial block -// std::cerr << "feeding initial block" << std::endl; - f1 = feed1(); - f2 = feed2(); - } -//!!! } else if (pm1->atEnd) { -// feed2(); -//!!! } else if (pm2->atEnd) -// feed1(); - else if (pm1->runCount >= pm1->params.maxRunCount) { // slope constraints -// std::cerr << "pm1 too slopey" << std::endl; - f2 = feed2(); - } else if (pm2->runCount >= pm2->params.maxRunCount) { -// std::cerr << "pm2 too slopey" << std::endl; - f1 = feed1(); - } else { - switch (finder->getExpandDirection - (pm1->frameCount-1, pm2->frameCount-1)) { - case ADVANCE_THIS: -// std::cerr << "finder says ADVANCE_THIS" << std::endl; - f1 = feed1(); - break; - case ADVANCE_OTHER: -// std::cerr << "finder says ADVANCE_OTHER" << std::endl; - f2 = feed2(); - break; - case ADVANCE_BOTH: -// std::cerr << "finder says ADVANCE_BOTH" << std::endl; - f1 = feed1(); - f2 = feed2(); - break; - } - } - - if (!f1.empty()) ff.f1.push_back(f1); - if (!f2.empty()) ff.f2.push_back(f2); - return ff; -} - -vector<double> -MatchFeeder::feed1() -{ -// std::cerr << "feed1" << std::endl; - float *block = q1.front(); - q1.pop(); - for (size_t i = 0; i <= fftSize/2; ++i) { - reBuffer[i] = block[i*2]; - } - for (size_t i = 0; i <= fftSize/2; ++i) { - imBuffer[i] = block[i*2+1]; - } - delete[] block; - return pm1->consumeFrame(reBuffer, imBuffer); -} - -vector<double> -MatchFeeder::feed2() -{ -// std::cerr << "feed2" << std::endl; - float *block = q2.front(); - q2.pop(); - for (size_t i = 0; i <= fftSize/2; ++i) { - reBuffer[i] = block[i*2]; - } - for (size_t i = 0; i <= fftSize/2; ++i) { - imBuffer[i] = block[i*2+1]; - } - delete[] block; - return pm2->consumeFrame(reBuffer, imBuffer); -} -
--- a/MatchFeeder.h Thu Nov 13 09:50:09 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,71 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ -/* - Vamp feature extraction plugin using the MATCH audio alignment - algorithm. - - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#ifndef _MATCH_FEEDER_H_ -#define _MATCH_FEEDER_H_ - -#include "Matcher.h" -#include "Finder.h" - -#include <queue> -#include <vector> - -class MatchFeeder -{ -public: - MatchFeeder(Matcher *m1, Matcher *m2); - ~MatchFeeder(); - - /** - * Feed the two supplied channels of frequency-domain input data - * to feeders 1 and 2 respectively, as appropriate (depending on - * their advance status). - */ - void feed(const float *const *input); - - struct Features { - std::vector<std::vector<double> > f1; - std::vector<std::vector<double> > f2; - }; - - /** - * Feed the two supplied channels of frequency-domain input data - * to matchers 1 and 2 respectively, as appropriate (depending on - * their advance status) and return any new feature vectors - * calculated by the two feeders. - */ - Features feedAndGetFeatures(const float *const *input); - - Finder *getFinder() { return finder; } - -protected: - void prepare(const float *const *input); - Features feedBlock(); - std::vector<double> feed1(); - std::vector<double> feed2(); - - Finder *finder; - Matcher *pm1; - Matcher *pm2; - - size_t fftSize; - double *reBuffer; - double *imBuffer; - - std::queue<float *> q1; - std::queue<float *> q2; -}; - -#endif
--- a/MatchVampPlugin.cpp Thu Nov 13 09:50:09 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,632 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - Vamp feature extraction plugin using the MATCH audio alignment - algorithm. - - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#include "MatchVampPlugin.h" - -#include "Matcher.h" -#include "MatchFeeder.h" -#include "Path.h" - -#include <vamp/vamp.h> -#include <vamp-sdk/PluginAdapter.h> -#include <vamp-sdk/RealTime.h> - -#include <vector> -#include <algorithm> - -//static int extant = 0; - -#ifdef _WIN32 -HANDLE -MatchVampPlugin::m_serialisingMutex; -#else -pthread_mutex_t -MatchVampPlugin::m_serialisingMutex; -#endif - -bool -MatchVampPlugin::m_serialisingMutexInitialised = false; - -// We want to ensure our freq map / crossover bin in Matcher.cpp are -// always valid with a fixed FFT length in seconds, so must reject low -// sample rates -static float sampleRateMin = 5000.f; - -static float defaultStepTime = 0.020; - -MatchVampPlugin::MatchVampPlugin(float inputSampleRate) : - Plugin(inputSampleRate), - m_stepSize(inputSampleRate * defaultStepTime + 0.001), - m_stepTime(defaultStepTime), - m_blockSize(2048), - m_serialise(false), - m_begin(true), - m_locked(false), - m_smooth(true), - m_params(inputSampleRate, defaultStepTime, m_blockSize), - m_defaultParams(inputSampleRate, defaultStepTime, m_blockSize) -{ - if (inputSampleRate < sampleRateMin) { - std::cerr << "MatchVampPlugin::MatchVampPlugin: input sample rate " - << inputSampleRate << " < min supported rate " - << sampleRateMin << ", plugin will refuse to initialise" - << std::endl; - } - - if (!m_serialisingMutexInitialised) { - m_serialisingMutexInitialised = true; -#ifdef _WIN32 - m_serialisingMutex = CreateMutex(NULL, FALSE, NULL); -#else - pthread_mutex_init(&m_serialisingMutex, 0); -#endif - } - - pm1 = 0; - pm2 = 0; - feeder = 0; -// std::cerr << "MatchVampPlugin::MatchVampPlugin(" << this << "): extant = " << ++extant << std::endl; -} - -MatchVampPlugin::~MatchVampPlugin() -{ -// std::cerr << "MatchVampPlugin::~MatchVampPlugin(" << this << "): extant = " << --extant << std::endl; - - delete feeder; - delete pm1; - delete pm2; - - if (m_locked) { -#ifdef _WIN32 - ReleaseMutex(m_serialisingMutex); -#else - pthread_mutex_unlock(&m_serialisingMutex); -#endif - m_locked = false; - } -} - -string -MatchVampPlugin::getIdentifier() const -{ - return "match"; -} - -string -MatchVampPlugin::getName() const -{ - return "Match Performance Aligner"; -} - -string -MatchVampPlugin::getDescription() const -{ - return "Calculate alignment between two performances in separate channel inputs"; -} - -string -MatchVampPlugin::getMaker() const -{ - return "Simon Dixon (plugin by Chris Cannam)"; -} - -int -MatchVampPlugin::getPluginVersion() const -{ - return 2; -} - -string -MatchVampPlugin::getCopyright() const -{ - return "GPL"; -} - -MatchVampPlugin::ParameterList -MatchVampPlugin::getParameterDescriptors() const -{ - ParameterList list; - - ParameterDescriptor desc; - - desc.identifier = "serialise"; - desc.name = "Serialise Plugin Invocations"; - desc.description = "Reduce potential memory load at the expense of multiprocessor performance by serialising multi-threaded plugin runs"; - desc.minValue = 0; - desc.maxValue = 1; - desc.defaultValue = 0; - desc.isQuantized = true; - desc.quantizeStep = 1; - list.push_back(desc); - - desc.identifier = "framenorm"; - desc.name = "Frame Normalisation"; - desc.description = "Type of normalisation to use for frequency-domain audio features"; - desc.minValue = 0; - desc.maxValue = 2; - desc.defaultValue = (int)m_defaultParams.frameNorm; - desc.isQuantized = true; - desc.quantizeStep = 1; - desc.valueNames.clear(); - desc.valueNames.push_back("None"); - desc.valueNames.push_back("Sum To 1"); - desc.valueNames.push_back("Long-Term Average"); - list.push_back(desc); - desc.valueNames.clear(); - - desc.identifier = "distnorm"; - desc.name = "Distance Normalisation"; - desc.description = "Type of normalisation to use for distance metric"; - desc.minValue = 0; - desc.maxValue = 2; - desc.defaultValue = (int)m_defaultParams.distanceNorm; - desc.isQuantized = true; - desc.quantizeStep = 1; - desc.valueNames.clear(); - desc.valueNames.push_back("None"); - desc.valueNames.push_back("Sum of Frames"); - desc.valueNames.push_back("Log Sum of Frames"); - list.push_back(desc); - desc.valueNames.clear(); - - desc.identifier = "usespecdiff"; - desc.name = "Use Spectral Difference"; - desc.description = "Whether to use half-wave rectified spectral difference instead of straight spectrum"; - desc.minValue = 0; - desc.maxValue = 1; - desc.defaultValue = m_defaultParams.useSpectralDifference ? 1 : 0; - desc.isQuantized = true; - desc.quantizeStep = 1; - list.push_back(desc); - - desc.identifier = "usechroma"; - desc.name = "Use Chroma Frequency Map"; - desc.description = "Whether to use a chroma frequency map instead of the default warped spectrogram"; - desc.minValue = 0; - desc.maxValue = 1; - desc.defaultValue = m_defaultParams.useChromaFrequencyMap ? 1 : 0; - desc.isQuantized = true; - desc.quantizeStep = 1; - list.push_back(desc); - - desc.identifier = "gradientlimit"; - desc.name = "Gradient Limit"; - desc.description = "Limit of number of frames that will be accepted from one source without a frame from the other source being accepted"; - desc.minValue = 1; - desc.maxValue = 10; - desc.defaultValue = m_defaultParams.maxRunCount; - desc.isQuantized = true; - desc.quantizeStep = 1; - list.push_back(desc); - - desc.identifier = "zonewidth"; - desc.name = "Search Zone Width"; - desc.description = "Width of the search zone (error margin) either side of the ongoing match position, in seconds"; - desc.minValue = 1; - desc.maxValue = 60; - desc.defaultValue = m_defaultParams.blockTime; - desc.isQuantized = true; - desc.quantizeStep = 1; - desc.unit = "s"; - list.push_back(desc); - - desc.identifier = "smooth"; - desc.name = "Smooth Path"; - desc.description = "Smooth the path by replacing steps with diagonals"; - desc.minValue = 0; - desc.maxValue = 1; - desc.defaultValue = 1; - desc.isQuantized = true; - desc.quantizeStep = 1; - desc.unit = ""; - list.push_back(desc); - - return list; -} - -float -MatchVampPlugin::getParameter(std::string name) const -{ - if (name == "serialise") { - return m_serialise ? 1.0 : 0.0; - } else if (name == "framenorm") { - return (int)m_params.frameNorm; - } else if (name == "distnorm") { - return (int)m_params.distanceNorm; - } else if (name == "usespecdiff") { - return m_params.useSpectralDifference ? 1.0 : 0.0; - } else if (name == "usechroma") { - return m_params.useChromaFrequencyMap ? 1.0 : 0.0; - } else if (name == "gradientlimit") { - return m_params.maxRunCount; - } else if (name == "zonewidth") { - return m_params.blockTime; - } else if (name == "smooth") { - return m_smooth ? 1.0 : 0.0; - } - - return 0.0; -} - -void -MatchVampPlugin::setParameter(std::string name, float value) -{ - if (name == "serialise") { - m_serialise = (value > 0.5); - } else if (name == "framenorm") { - m_params.frameNorm = (Matcher::FrameNormalisation)(int(value + 0.1)); - } else if (name == "distnorm") { - m_params.distanceNorm = (DistanceMetric::DistanceNormalisation)(int(value + 0.1)); - } else if (name == "usespecdiff") { - m_params.useSpectralDifference = (value > 0.5); - } else if (name == "usechroma") { - m_params.useChromaFrequencyMap = (value > 0.5); - } else if (name == "gradientlimit") { - m_params.maxRunCount = int(value + 0.1); - } else if (name == "zonewidth") { - m_params.blockTime = value; - } else if (name == "smooth") { - m_smooth = (value > 0.5); - } -} - -size_t -MatchVampPlugin::getPreferredStepSize() const -{ - return m_inputSampleRate * defaultStepTime; -} - -size_t -MatchVampPlugin::getPreferredBlockSize() const -{ - return 2048; -} - -void -MatchVampPlugin::createMatchers() -{ - m_params.hopTime = m_stepTime; - m_params.fftSize = m_blockSize; - pm1 = new Matcher(m_params, 0); - pm2 = new Matcher(m_params, pm1); - pm1->setOtherMatcher(pm2); - feeder = new MatchFeeder(pm1, pm2); -} - -bool -MatchVampPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize) -{ - if (m_inputSampleRate < sampleRateMin) { - std::cerr << "MatchVampPlugin::MatchVampPlugin: input sample rate " - << m_inputSampleRate << " < min supported rate " - << sampleRateMin << std::endl; - return false; - } - if (channels < getMinChannelCount() || - channels > getMaxChannelCount()) return false; - if (stepSize > blockSize/2 || - blockSize != getPreferredBlockSize()) return false; - - m_stepSize = stepSize; - m_stepTime = float(stepSize) / m_inputSampleRate; - m_blockSize = blockSize; - - createMatchers(); - m_begin = true; - m_locked = false; - - return true; -} - -void -MatchVampPlugin::reset() -{ - delete feeder; - delete pm1; - delete pm2; - feeder = 0; - pm1 = 0; - pm2 = 0; - - createMatchers(); - m_begin = true; - m_locked = false; -} - -MatchVampPlugin::OutputList -MatchVampPlugin::getOutputDescriptors() const -{ - OutputList list; - - float outRate = 1.0 / m_stepTime; - - OutputDescriptor desc; - desc.identifier = "path"; - desc.name = "Path"; - desc.description = "Alignment path"; - desc.unit = ""; - desc.hasFixedBinCount = true; - desc.binCount = 1; - desc.hasKnownExtents = false; - desc.isQuantized = true; - desc.quantizeStep = 1; - desc.sampleType = OutputDescriptor::VariableSampleRate; - desc.sampleRate = outRate; - m_pathOutNo = list.size(); - list.push_back(desc); - - desc.identifier = "a_b"; - desc.name = "A-B Timeline"; - desc.description = "Timing in performance B corresponding to moments in performance A"; - desc.unit = "sec"; - desc.hasFixedBinCount = true; - desc.binCount = 1; - desc.hasKnownExtents = false; - desc.isQuantized = false; - desc.sampleType = OutputDescriptor::VariableSampleRate; - desc.sampleRate = outRate; - m_abOutNo = list.size(); - list.push_back(desc); - - desc.identifier = "b_a"; - desc.name = "B-A Timeline"; - desc.description = "Timing in performance A corresponding to moments in performance B"; - desc.unit = "sec"; - desc.hasFixedBinCount = true; - desc.binCount = 1; - desc.hasKnownExtents = false; - desc.isQuantized = false; - desc.sampleType = OutputDescriptor::VariableSampleRate; - desc.sampleRate = outRate; - m_baOutNo = list.size(); - list.push_back(desc); - - desc.identifier = "a_b_divergence"; - desc.name = "A-B Divergence"; - desc.description = "Difference between timings in performances A and B"; - desc.unit = "sec"; - desc.hasFixedBinCount = true; - desc.binCount = 1; - desc.hasKnownExtents = false; - desc.isQuantized = false; - desc.sampleType = OutputDescriptor::VariableSampleRate; - desc.sampleRate = outRate; - m_abDivOutNo = list.size(); - list.push_back(desc); - - desc.identifier = "a_b_temporatio"; - desc.name = "A-B Tempo Ratio"; - desc.description = "Ratio of tempi between performances A and B"; - desc.unit = ""; - desc.hasFixedBinCount = true; - desc.binCount = 1; - desc.hasKnownExtents = false; - desc.isQuantized = false; - desc.sampleType = OutputDescriptor::VariableSampleRate; - desc.sampleRate = outRate; - m_abRatioOutNo = list.size(); - list.push_back(desc); - - desc.identifier = "a_features"; - desc.name = "A Features"; - desc.description = "Spectral features extracted from performance A"; - desc.unit = ""; - desc.hasFixedBinCount = true; - desc.binCount = Matcher::getFeatureSizeFor(m_params); - desc.hasKnownExtents = false; - desc.isQuantized = false; - desc.sampleType = OutputDescriptor::FixedSampleRate; - desc.sampleRate = outRate; - m_aFeaturesOutNo = list.size(); - list.push_back(desc); - - desc.identifier = "b_features"; - desc.name = "B Features"; - desc.description = "Spectral features extracted from performance B"; - desc.unit = ""; - desc.hasFixedBinCount = true; - desc.binCount = Matcher::getFeatureSizeFor(m_params); - desc.hasKnownExtents = false; - desc.isQuantized = false; - desc.sampleType = OutputDescriptor::FixedSampleRate; - desc.sampleRate = outRate; - m_bFeaturesOutNo = list.size(); - list.push_back(desc); - - return list; -} - -MatchVampPlugin::FeatureSet -MatchVampPlugin::process(const float *const *inputBuffers, - Vamp::RealTime timestamp) -{ - if (m_begin) { - if (!m_locked && m_serialise) { - m_locked = true; -#ifdef _WIN32 - WaitForSingleObject(m_serialisingMutex, INFINITE); -#else - pthread_mutex_lock(&m_serialisingMutex); -#endif - } - m_startTime = timestamp; - m_begin = false; - } - -// std::cerr << timestamp.toString(); - - MatchFeeder::Features ff = feeder->feedAndGetFeatures(inputBuffers); - - FeatureSet returnFeatures; - - Feature f; - f.hasTimestamp = false; - - for (int i = 0; i < (int)ff.f1.size(); ++i) { - f.values.clear(); - for (int j = 0; j < (int)ff.f1[i].size(); ++j) { - f.values.push_back(ff.f1[i][j]); - } - returnFeatures[m_aFeaturesOutNo].push_back(f); - } - - for (int i = 0; i < (int)ff.f2.size(); ++i) { - f.values.clear(); - for (int j = 0; j < (int)ff.f2[i].size(); ++j) { - f.values.push_back(ff.f2[i][j]); - } - returnFeatures[m_bFeaturesOutNo].push_back(f); - } - -// std::cerr << "."; -// std::cerr << std::endl; - - return returnFeatures; -} - -MatchVampPlugin::FeatureSet -MatchVampPlugin::getRemainingFeatures() -{ - Finder *finder = feeder->getFinder(); - std::vector<int> pathx; - std::vector<int> pathy; - int len = finder->retrievePath(m_smooth, pathx, pathy); - - FeatureSet returnFeatures; - - int prevx = 0; - int prevy = 0; - - for (int i = 0; i < len; ++i) { - - int x = pathx[i]; - int y = pathy[i]; - - Vamp::RealTime xt = Vamp::RealTime::frame2RealTime - (x * m_stepSize, lrintf(m_inputSampleRate)); - Vamp::RealTime yt = Vamp::RealTime::frame2RealTime - (y * m_stepSize, lrintf(m_inputSampleRate)); - - Feature feature; - feature.hasTimestamp = true; - feature.timestamp = m_startTime + xt; - feature.values.clear(); - feature.values.push_back(yt.sec + double(yt.nsec)/1.0e9); - returnFeatures[m_pathOutNo].push_back(feature); - - if (x != prevx) { - - feature.hasTimestamp = true; - feature.timestamp = m_startTime + xt; - feature.values.clear(); - feature.values.push_back(yt.sec + yt.msec()/1000.0); - returnFeatures[m_abOutNo].push_back(feature); - - Vamp::RealTime diff = yt - xt; - feature.values.clear(); - feature.values.push_back(diff.sec + diff.msec()/1000.0); - returnFeatures[m_abDivOutNo].push_back(feature); - - if (i > 0) { - int lookback = 100; //!!! arbitrary - if (lookback > i) lookback = i; - int xdiff = x - pathx[i-lookback]; - int ydiff = y - pathy[i-lookback]; - if (xdiff != 0 && ydiff != 0) { - float ratio = float(ydiff)/float(xdiff); - if (ratio < 8 && ratio > (1.0/8)) { //!!! just for now, since we aren't dealing properly with silence yet - feature.values.clear(); - feature.values.push_back(ratio); - returnFeatures[m_abRatioOutNo].push_back(feature); - } - } - } - } - - if (y != prevy) { - feature.hasTimestamp = true; - feature.timestamp = m_startTime + yt; - feature.values.clear(); - feature.values.push_back(xt.sec + xt.msec()/1000.0); - returnFeatures[m_baOutNo].push_back(feature); - } - - prevx = x; - prevy = y; - } - - delete feeder; - delete pm1; - delete pm2; - feeder = 0; - pm1 = 0; - pm2 = 0; - - if (m_locked) { -#ifdef _WIN32 - ReleaseMutex(m_serialisingMutex); -#else - pthread_mutex_unlock(&m_serialisingMutex); -#endif - m_locked = false; - } - - return returnFeatures; - - -/* - for (int i = 0; i < len; ++i) { - std::cerr << i << ": [" << pathx[i] << "," << pathy[i] << "]" << std::endl; - } - - std::cerr << std::endl; - std::cerr << "File: A" << std::endl; - std::cerr << "Marks: -1" << std::endl; - std::cerr << "FixedPoints: true 0" << std::endl; - std::cerr << "0" << std::endl; - std::cerr << "0" << std::endl; - std::cerr << "0" << std::endl; - std::cerr << "0" << std::endl; - std::cerr << "File: B" << std::endl; - std::cerr << "Marks: 0" << std::endl; - std::cerr << "FixedPoints: true 0" << std::endl; - std::cerr << "0.02" << std::endl; - std::cerr << "0.02" << std::endl; - - std::cerr << len << std::endl; - for (int i = 0; i < len; ++i) { - std::cerr << pathx[i] << std::endl; - } - - std::cerr << len << std::endl; - for (int i = 0; i < len; ++i) { - std::cerr << pathy[i] << std::endl; - } -*/ -} - -static Vamp::PluginAdapter<MatchVampPlugin> mvpAdapter; - -const VampPluginDescriptor *vampGetPluginDescriptor(unsigned int version, - unsigned int index) -{ - if (version < 1) return 0; - - switch (index) { - case 0: return mvpAdapter.getDescriptor(); - default: return 0; - } -}
--- a/MatchVampPlugin.h Thu Nov 13 09:50:09 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,104 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - Vamp feature extraction plugin using the MATCH audio alignment - algorithm. - - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#ifndef _MATCH_VAMP_PLUGIN_H_ -#define _MATCH_VAMP_PLUGIN_H_ - -#include <vamp-sdk/Plugin.h> - -#ifdef _WIN32 -#include <windows.h> -#else -#include <pthread.h> -#endif - -#include "Matcher.h" - -class MatchFeeder; - -class MatchVampPlugin : public Vamp::Plugin -{ -public: - MatchVampPlugin(float inputSampleRate); - virtual ~MatchVampPlugin(); - - bool initialise(size_t channels, size_t stepSize, size_t blockSize); - void reset(); - - InputDomain getInputDomain() const { return FrequencyDomain; } - - size_t getPreferredStepSize() const; - size_t getPreferredBlockSize() const; - - size_t getMinChannelCount() const { return 2; } - size_t getMaxChannelCount() const { return 2; } - - std::string getIdentifier() const; - std::string getName() const; - std::string getDescription() const; - std::string getMaker() const; - int getPluginVersion() const; - std::string getCopyright() const; - - ParameterList getParameterDescriptors() const; - float getParameter(std::string) const; - void setParameter(std::string, float); - - OutputList getOutputDescriptors() const; - - FeatureSet process(const float *const *inputBuffers, - Vamp::RealTime timestamp); - - FeatureSet getRemainingFeatures(); - -protected: - void createMatchers(); - - Matcher *pm1; - Matcher *pm2; - MatchFeeder *feeder; - - Vamp::RealTime m_startTime; - int m_stepSize; - float m_stepTime; - int m_blockSize; - bool m_serialise; - bool m_begin; - bool m_locked; - bool m_smooth; - - Matcher::Parameters m_params; - Matcher::Parameters m_defaultParams; - - mutable int m_pathOutNo; - mutable int m_abOutNo; - mutable int m_baOutNo; - mutable int m_abDivOutNo; - mutable int m_abRatioOutNo; - mutable int m_aFeaturesOutNo; - mutable int m_bFeaturesOutNo; - -#ifdef _WIN32 - static HANDLE m_serialisingMutex; -#else - static pthread_mutex_t m_serialisingMutex; -#endif - - static bool m_serialisingMutexInitialised; -}; - - -#endif
--- a/Matcher.cpp Thu Nov 13 09:50:09 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,508 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - Vamp feature extraction plugin using the MATCH audio alignment - algorithm. - - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#include "Matcher.h" - -#include <iostream> - -#include <cstdlib> -#include <cassert> - -bool Matcher::silent = true; - -//#define DEBUG_MATCHER 1 - -Matcher::Matcher(Parameters parameters, Matcher *p) : - params(parameters), - metric(parameters.distanceNorm) -{ -#ifdef DEBUG_MATCHER - cerr << "Matcher::Matcher(" << params.sampleRate << ", " << p << ")" << endl; -#endif - - otherMatcher = p; // the first matcher will need this to be set later - firstPM = (!p); - ltAverage = 0; - frameCount = 0; - runCount = 0; - freqMapSize = 0; - externalFeatureSize = 0; - featureSize = 0; - blockSize = 0; - - blockSize = lrint(params.blockTime / params.hopTime); -#ifdef DEBUG_MATCHER - cerr << "Matcher: blockSize = " << blockSize << endl; -#endif - - distance = 0; - bestPathCost = 0; - distYSizes = 0; - distXSize = 0; - - initialised = false; -} - -Matcher::Matcher(Parameters parameters, Matcher *p, int featureSize) : - params(parameters), - externalFeatureSize(featureSize), - metric(parameters.distanceNorm) -{ -#ifdef DEBUG_MATCHER - cerr << "Matcher::Matcher(" << params.sampleRate << ", " << p << ", " << featureSize << ")" << endl; -#endif - - otherMatcher = p; // the first matcher will need this to be set later - firstPM = (!p); - ltAverage = 0; - frameCount = 0; - runCount = 0; - freqMapSize = 0; - featureSize = 0; - blockSize = 0; - - blockSize = lrint(params.blockTime / params.hopTime); -#ifdef DEBUG_MATCHER - cerr << "Matcher: blockSize = " << blockSize << endl; -#endif - - distance = 0; - bestPathCost = 0; - distYSizes = 0; - distXSize = 0; - - initialised = false; - -} - -Matcher::~Matcher() -{ -#ifdef DEBUG_MATCHER - cerr << "Matcher(" << this << ")::~Matcher()" << endl; -#endif - - if (initialised) { - - for (int i = 0; i < distXSize; ++i) { - if (distance[i]) { - free(distance[i]); - free(bestPathCost[i]); - } - } - free(distance); - free(bestPathCost); - - free(first); - free(last); - - free(distYSizes); - } -} - -void -Matcher::init() -{ - if (initialised) return; - - initialised = true; - - if (externalFeatureSize == 0) { - freqMapSize = getFeatureSizeFor(params); - featureSize = freqMapSize; - makeFreqMap(); - } else { - featureSize = externalFeatureSize; - } - - initVector<double>(prevFrame, featureSize); - initVector<double>(newFrame, featureSize); - initMatrix<double>(frames, blockSize, featureSize); - initVector<double>(totalEnergies, blockSize); - - int distSize = (params.maxRunCount + 1) * blockSize; - - distXSize = blockSize * 2; - - distance = (unsigned char **)malloc(distXSize * sizeof(unsigned char *)); - bestPathCost = (int **)malloc(distXSize * sizeof(int *)); - distYSizes = (int *)malloc(distXSize * sizeof(int)); - - for (int i = 0; i < blockSize; ++i) { - distance[i] = (unsigned char *)malloc(distSize * sizeof(unsigned char)); - bestPathCost[i] = (int *)malloc(distSize * sizeof(int)); - distYSizes[i] = distSize; - } - for (int i = blockSize; i < distXSize; ++i) { - distance[i] = 0; - } - - first = (int *)malloc(distXSize * sizeof(int)); - last = (int *)malloc(distXSize * sizeof(int)); - - frameCount = 0; - runCount = 0; - ltAverage = 0; - -} // init - -void -Matcher::makeFreqMap() -{ - initVector<int>(freqMap, params.fftSize/2 + 1); - - if (params.useChromaFrequencyMap) { -#ifdef DEBUG_MATCHER - cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl; -#endif - makeChromaFrequencyMap(); - } else { -#ifdef DEBUG_MATCHER - cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl; -#endif - makeStandardFrequencyMap(); - } -} // makeFreqMap() - -int -Matcher::getFeatureSizeFor(Parameters params) -{ - if (params.useChromaFrequencyMap) { - return 13; - } else { - return 84; - } -} - -void -Matcher::makeStandardFrequencyMap() -{ - double binWidth = params.sampleRate / params.fftSize; - int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); - int crossoverMidi = lrint(log(crossoverBin*binWidth/440.0)/ - log(2.0) * 12 + 69); - // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; - int i = 0; - while (i <= crossoverBin) { - freqMap[i] = i; - ++i; - } - while (i <= params.fftSize/2) { - double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; - if (midi > 127) midi = 127; - freqMap[i++] = crossoverBin + lrint(midi) - crossoverMidi; - } - assert(freqMapSize == freqMap[i-1] + 1); - if (!silent) { - cerr << "Standard map size: " << freqMapSize - << "; Crossover at: " << crossoverBin << endl; - for (i = 0; i < params.fftSize / 2; i++) - cerr << "freqMap[" << i << "] = " << freqMap[i] << endl; - } -} // makeStandardFrequencyMap() - -void -Matcher::makeChromaFrequencyMap() -{ - double binWidth = params.sampleRate / params.fftSize; - int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1)); - // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; - int i = 0; - while (i <= crossoverBin) - freqMap[i++] = 0; - while (i <= params.fftSize/2) { - double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; - freqMap[i++] = (lrint(midi)) % 12 + 1; - } - if (!silent) { - cerr << "Chroma map size: " << freqMapSize - << "; Crossover at: " << crossoverBin << endl; - for (i = 0; i < params.fftSize / 2; i++) - cerr << "freqMap[" << i << "] = " << freqMap[i] << endl; - } -} // makeChromaFrequencyMap() - -vector<double> -Matcher::consumeFrame(double *reBuffer, double *imBuffer) -{ - if (!initialised) init(); - - vector<double> processedFrame = - processFrameFromFreqData(reBuffer, imBuffer); - - calcAdvance(); - - return processedFrame; -} - -void -Matcher::consumeFeatureVector(std::vector<double> feature) -{ - if (!initialised) init(); - int frameIndex = frameCount % blockSize; - frames[frameIndex] = feature; - calcAdvance(); -} - -vector<double> -Matcher::processFrameFromFreqData(double *reBuffer, double *imBuffer) -{ - for (int i = 0; i < (int)newFrame.size(); ++i) { - newFrame[i] = 0; - } - double rms = 0; - for (int i = 0; i <= params.fftSize/2; i++) { - double mag = reBuffer[i] * reBuffer[i] + - imBuffer[i] * imBuffer[i]; - rms += mag; - newFrame[freqMap[i]] += mag; - } - rms = sqrt(rms / (params.fftSize/2)); - - int frameIndex = frameCount % blockSize; - - vector<double> processedFrame(freqMapSize, 0.0); - - double totalEnergy = 0; - if (params.useSpectralDifference) { - for (int i = 0; i < freqMapSize; i++) { - totalEnergy += newFrame[i]; - if (newFrame[i] > prevFrame[i]) { - processedFrame[i] = newFrame[i] - prevFrame[i]; - } else { - processedFrame[i] = 0; - } - } - } else { - for (int i = 0; i < freqMapSize; i++) { - processedFrame[i] = newFrame[i]; - totalEnergy += processedFrame[i]; - } - } - totalEnergies[frameIndex] = totalEnergy; - - double decay = frameCount >= 200 ? 0.99: - (frameCount < 100? 0: (frameCount - 100) / 100.0); - - if (ltAverage == 0) - ltAverage = totalEnergy; - else - ltAverage = ltAverage * decay + totalEnergy * (1.0 - decay); - - if (rms <= params.silenceThreshold) - for (int i = 0; i < freqMapSize; i++) - processedFrame[i] = 0; - else if (params.frameNorm == NormaliseFrameToSum1) - for (int i = 0; i < freqMapSize; i++) - processedFrame[i] /= totalEnergy; - else if (params.frameNorm == NormaliseFrameToLTAverage) - for (int i = 0; i < freqMapSize; i++) - processedFrame[i] /= ltAverage; - - vector<double> tmp = prevFrame; - prevFrame = newFrame; - newFrame = tmp; - - frames[frameIndex] = processedFrame; - - if ((frameCount % 100) == 0) { - if (!silent) { - cerr << "Progress:" << frameCount << " " << ltAverage << endl; - } - } - - return processedFrame; -} - -void -Matcher::calcAdvance() -{ - int frameIndex = frameCount % blockSize; - - if (frameCount >= distXSize) { -// std::cerr << "Resizing " << distXSize << " -> " << distXSize * 2 << std::endl; - distXSize *= 2; - distance = (unsigned char **)realloc(distance, distXSize * sizeof(unsigned char *)); - bestPathCost = (int **)realloc(bestPathCost, distXSize * sizeof(int *)); - distYSizes = (int *)realloc(distYSizes, distXSize * sizeof(int)); - first = (int *)realloc(first, distXSize * sizeof(int)); - last = (int *)realloc(last, distXSize * sizeof(int)); - - for (int i = distXSize/2; i < distXSize; ++i) { - distance[i] = 0; - } - } - - if (firstPM && (frameCount >= blockSize)) { - - int len = last[frameCount - blockSize] - - first[frameCount - blockSize]; - - // We need to copy distance[frameCount-blockSize] to - // distance[frameCount], and then truncate - // distance[frameCount-blockSize] to its first len elements. - // Same for bestPathCost. -/* - std::cerr << "Matcher(" << this << "): moving " << distYSizes[frameCount - blockSize] << " from " << frameCount - blockSize << " to " - << frameCount << ", allocating " << len << " for " - << frameCount - blockSize << std::endl; -*/ - distance[frameCount] = distance[frameCount - blockSize]; - - distance[frameCount - blockSize] = (unsigned char *) - malloc(len * sizeof(unsigned char)); - for (int i = 0; i < len; ++i) { - distance[frameCount - blockSize][i] = - distance[frameCount][i]; - } - - bestPathCost[frameCount] = bestPathCost[frameCount - blockSize]; - - bestPathCost[frameCount - blockSize] = (int *) - malloc(len * sizeof(int)); - for (int i = 0; i < len; ++i) { - bestPathCost[frameCount - blockSize][i] = - bestPathCost[frameCount][i]; - } - - distYSizes[frameCount] = distYSizes[frameCount - blockSize]; - distYSizes[frameCount - blockSize] = len; - } - - int stop = otherMatcher->frameCount; - int index = stop - blockSize; - if (index < 0) - index = 0; - first[frameCount] = index; - last[frameCount] = stop; - - bool overflow = false; - int mn= -1; - int mx= -1; - for ( ; index < stop; index++) { - - int dMN = metric.calcDistanceScaled - (frames[frameIndex], - otherMatcher->frames[index % blockSize], - params.distanceScale); - - if (mx<0) - mx = mn = dMN; - else if (dMN > mx) - mx = dMN; - else if (dMN < mn) - mn = dMN; - if (dMN >= 255) { - overflow = true; - dMN = 255; - } - - if ((frameCount == 0) && (index == 0)) // first element - setValue(0, 0, 0, 0, dMN); - else if (frameCount == 0) // first row - setValue(0, index, ADVANCE_OTHER, - getValue(0, index-1, true), dMN); - else if (index == 0) // first column - setValue(frameCount, index, ADVANCE_THIS, - getValue(frameCount - 1, 0, true), dMN); - else if (index == otherMatcher->frameCount - blockSize) { - // missing value(s) due to cutoff - // - no previous value in current row (resp. column) - // - no diagonal value if prev. dir. == curr. dirn - int min2 = getValue(frameCount - 1, index, true); - // if ((firstPM && (first[frameCount - 1] == index)) || - // (!firstPM && (last[index-1] < frameCount))) - if (first[frameCount - 1] == index) - setValue(frameCount, index, ADVANCE_THIS, min2, dMN); - else { - int min1 = getValue(frameCount - 1, index - 1, true); - if (min1 + dMN <= min2) - setValue(frameCount, index, ADVANCE_BOTH, min1,dMN); - else - setValue(frameCount, index, ADVANCE_THIS, min2,dMN); - } - } else { - int min1 = getValue(frameCount, index-1, true); - int min2 = getValue(frameCount - 1, index, true); - int min3 = getValue(frameCount - 1, index-1, true); - if (min1 <= min2) { - if (min3 + dMN <= min1) - setValue(frameCount, index, ADVANCE_BOTH, min3,dMN); - else - setValue(frameCount, index, ADVANCE_OTHER,min1,dMN); - } else { - if (min3 + dMN <= min2) - setValue(frameCount, index, ADVANCE_BOTH, min3,dMN); - else - setValue(frameCount, index, ADVANCE_THIS, min2,dMN); - } - } - otherMatcher->last[index]++; - } // loop for row (resp. column) - - frameCount++; - runCount++; - - otherMatcher->runCount = 0; - - if (overflow && !silent) - cerr << "WARNING: overflow in distance metric: " - << "frame " << frameCount << ", val = " << mx << endl; - - if (!silent) - std::cerr << "Frame " << frameCount << ", d = " << (mx-mn) << std::endl; -} - -int -Matcher::getValue(int i, int j, bool firstAttempt) -{ - if (firstPM) - return bestPathCost[i][j - first[i]]; - else - return otherMatcher->bestPathCost[j][i - otherMatcher->first[j]]; -} // getValue() - -void -Matcher::setValue(int i, int j, int dir, int value, int dMN) -{ - if (firstPM) { - distance[i][j - first[i]] = (unsigned char)((dMN & MASK) | dir); - bestPathCost[i][j - first[i]] = - (value + (dir==ADVANCE_BOTH? dMN*2: dMN)); - } else { - if (dir == ADVANCE_THIS) - dir = ADVANCE_OTHER; - else if (dir == ADVANCE_OTHER) - dir = ADVANCE_THIS; - int idx = i - otherMatcher->first[j]; - if (idx == (int)otherMatcher->distYSizes[j]) { - // This should never happen, but if we allow arbitrary - // pauses in either direction, and arbitrary lengths at - // end, it is better than a segmentation fault. - std::cerr << "Emergency resize: " << idx << " -> " << idx * 2 << std::endl; - otherMatcher->distYSizes[j] = idx * 2; - otherMatcher->bestPathCost[j] = - (int *)realloc(otherMatcher->bestPathCost[j], - idx * 2 * sizeof(int)); - otherMatcher->distance[j] = - (unsigned char *)realloc(otherMatcher->distance[j], - idx * 2 * sizeof(unsigned char)); - } - otherMatcher->distance[j][idx] = (unsigned char)((dMN & MASK) | dir); - otherMatcher->bestPathCost[j][idx] = - (value + (dir==ADVANCE_BOTH? dMN*2: dMN)); - } -} // setValue() -
--- a/Matcher.h Thu Nov 13 09:50:09 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,383 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - Vamp feature extraction plugin using the MATCH audio alignment - algorithm. - - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#ifndef _MATCHER_H_ -#define _MATCHER_H_ - -#include <vector> -#include <iostream> -#include <sstream> -#include <cmath> - -#define ADVANCE_THIS 1 -#define ADVANCE_OTHER 2 -#define ADVANCE_BOTH 3 -#define MASK 0xfc - -#include "DistanceMetric.h" - -using std::vector; -using std::string; -using std::cerr; -using std::endl; - -/** Represents an audio stream that can be matched to another audio - * stream of the same piece of music. The matching algorithm uses - * dynamic time warping. The distance metric is a Euclidean metric - * on the FFT data with the higher frequencies mapped onto a linear - * scale. - */ -class Matcher -{ -public: - enum FrameNormalisation { - - /** Do not normalise audio frames */ - NoFrameNormalisation, - - /** Normalise each frame of audio to have a sum of 1 */ - NormaliseFrameToSum1, - - /** Normalise each frame of audio by the long-term average - * of the summed energy */ - NormaliseFrameToLTAverage, - }; - - struct Parameters { - - Parameters(float rate_, double hopTime_, int fftSize_) : - sampleRate(rate_), - frameNorm(NormaliseFrameToSum1), - distanceNorm(DistanceMetric::NormaliseDistanceToLogSum), - distanceScale(90.0), - useSpectralDifference(true), - useChromaFrequencyMap(false), - hopTime(hopTime_), - fftSize(fftSize_), - blockTime(10.0), - silenceThreshold(0.01), - decay(0.99), - maxRunCount(3) - {} - - /** Sample rate of audio */ - float sampleRate; - - /** Type of audio frame normalisation */ - FrameNormalisation frameNorm; - - /** Type of distance metric normalisation */ - DistanceMetric::DistanceNormalisation distanceNorm; - - /** Scaling factor for distance metric; must guarantee that the - * final value fits in the data type used, that is, unsigned - * char. - */ - double distanceScale; - - /** Flag indicating whether or not the half-wave rectified - * spectral difference should be used in calculating the - * distance metric for pairs of audio frames, instead of the - * straight spectrum values. */ - bool useSpectralDifference; - - /** Flag indicating whether to use a chroma frequency map (12 - * bins) instead of the default warped spectrogram */ - bool useChromaFrequencyMap; - - /** Spacing of audio frames (determines the amount of overlap or - * skip between frames). This value is expressed in - * seconds. */ - double hopTime; - - /** Size of an FFT frame in samples. Note that the data passed - * in to Matcher is already in the frequency domain, so this - * expresses the size of the frame that the caller will be - * providing. - */ - int fftSize; - - /** The width of the search band (error margin) around the current - * match position, measured in seconds. Strictly speaking the - * width is measured backwards from the current point, since the - * algorithm has to work causally. - */ - double blockTime; - - /** RMS level below which frame is considered silent */ - double silenceThreshold; - - /** Frame-to-frame decay factor in calculating long-term average */ - double decay; - - /** Maximum number of frames sequentially processed by this - * matcher, without a frame of the other matcher being - * processed. - */ - int maxRunCount; - }; - -protected: - /** Points to the other performance with which this one is being - * compared. The data for the distance metric and the dynamic - * time warping is shared between the two matchers. In the - * original version, only one of the two performance matchers - * contained the distance metric. (See <code>first</code>) - */ - Matcher *otherMatcher; - - /** Indicates which performance is considered primary (the - * score). This is the performance shown on the vertical axis, - * and referred to as "this" in the codes for the direction of - * DTW steps. */ - bool firstPM; - - /** Configuration parameters */ - Parameters params; - - /** Width of the search band in FFT frames (see <code>blockTime</code>) */ - int blockSize; - - /** The number of frames of audio data which have been read. */ - int frameCount; - - /** Long term average frame energy (in frequency domain - * representation). */ - double ltAverage; - - /** The number of frames sequentially processed by this matcher, - * without a frame of the other matcher being processed. - */ - int runCount; - - /** A mapping function for mapping FFT bins to final frequency - * bins. The mapping is linear (1-1) until the resolution - * reaches 2 points per semitone, then logarithmic with a - * semitone resolution. e.g. for 44.1kHz sampling rate and - * fftSize of 2048 (46ms), bin spacing is 21.5Hz, which is mapped - * linearly for bins 0-34 (0 to 732Hz), and logarithmically for - * the remaining bins (midi notes 79 to 127, bins 35 to 83), - * where all energy above note 127 is mapped into the final - * bin. */ - vector<int> freqMap; - - /** The number of entries in <code>freqMap</code>. */ - int freqMapSize; - - /** The number of values in an externally-supplied feature vector, - * used in preference to freqMap/freqMapSize if constructed with - * the external feature version of the Matcher constructor. If - * this is zero, the internal feature extractor will be used as - * normal. - */ - int externalFeatureSize; - - /** The number of values in the feature vectors actually in - * use. This will be externalFeatureSize if greater than zero, or - * freqMapSize otherwise. - */ - int featureSize; - - /** The most recent frame; used for calculating the frame to frame - * spectral difference. These are therefore frequency warped but - * not yet normalised. */ - vector<double> prevFrame; - vector<double> newFrame; - - /** A block of previously seen frames are stored in this structure - * for calculation of the distance matrix as the new frames are - * read in. One can think of the structure of the array as a - * circular buffer of vectors. These are the frames with all - * applicable processing applied (e.g. spectral difference, - * normalisation), unlike prevFrame and newFrame. The total - * energy of frames[i] is stored in totalEnergies[i]. */ - vector<vector<double> > frames; - - /** The total energy of each frame in the frames block. */ - vector<double> totalEnergies; - - /** The best path cost matrix. */ - int **bestPathCost; - - /** The distance matrix. */ - unsigned char **distance; - - /** The bounds of each row of data in the distance and path cost matrices.*/ - int *first; - int *last; - - /** Height of each column in distance and bestPathCost matrices */ - int *distYSizes; - - /** Width of distance and bestPathCost matrices and first and last vectors */ - int distXSize; - - bool initialised; - - /** Disable or enable debugging output */ - static bool silent; - -public: - /** Constructor for Matcher. - * - * @param p The Matcher representing the performance with which - * this one is going to be matched. Some information is shared - * between the two matchers (currently one possesses the distance - * matrix and optimal path matrix). - */ - Matcher(Parameters parameters, Matcher *p); - - /** Constructor for Matcher using externally supplied features. - * A Matcher made using this constructor will not carry out its - * own feature extraction from frequency-domain audio data, but - * instead will accept arbitrary feature frames calculated by - * some external code. - * - * @param p The Matcher representing the performance with which - * this one is going to be matched. Some information is shared - * between the two matchers (currently one possesses the distance - * matrix and optimal path matrix). - * - * @param featureSize Number of values in each feature vector. - */ - Matcher(Parameters parameters, Matcher *p, int featureSize); - - ~Matcher(); - - /** For debugging, outputs information about the Matcher to - * standard error. - */ - void print(); - - /** Adds a link to the Matcher object representing the performance - * which is going to be matched to this one. - * - * @param p the Matcher representing the other performance - */ - void setOtherMatcher(Matcher *p) { - otherMatcher = p; - } // setOtherMatcher() - - int getFrameCount() { - return frameCount; - } - - /** - * Return the feature vector size that will be used for the given - * parameters. - */ - static int getFeatureSizeFor(Parameters params); - -protected: - template <typename T> - void initVector(vector<T> &vec, int sz, T dflt = 0) { - vec.clear(); - while ((int)vec.size() < sz) vec.push_back(dflt); - } - - template <typename T> - void initMatrix(vector<vector<T> > &mat, int hsz, int vsz, - T dflt = 0, int fillTo = -1) { - mat.clear(); - if (fillTo < 0) fillTo = hsz; - for (int i = 0; i < hsz; ++i) { - mat.push_back(vector<T>()); - if (i < fillTo) { - while ((int)mat[i].size() < vsz) { - mat[i].push_back(dflt); - } - } - } - } - - void init(); - - void makeFreqMap(); - - /** Creates a map of FFT frequency bins to comparison bins. Where - * the spacing of FFT bins is less than 0.5 semitones, the - * mapping is one to one. Where the spacing is greater than 0.5 - * semitones, the FFT energy is mapped into semitone-wide - * bins. No scaling is performed; that is the energy is summed - * into the comparison bins. See also consumeFrame() - */ - void makeStandardFrequencyMap(); - - void makeChromaFrequencyMap(); - - /** Processes a frame of audio data by first computing the STFT - * with a Hamming window, then mapping the frequency bins into a - * part-linear part-logarithmic array, then (optionally) - * computing the half-wave rectified spectral difference from the - * previous frame, then (optionally) normalising to a sum of 1, - * then calculating the distance to all frames stored in the - * otherMatcher and storing them in the distance matrix, and - * finally updating the optimal path matrix using the dynamic - * time warping algorithm. - * - * Return value is the frame (post-processed, with warping, - * rectification, and normalisation as appropriate). - * - * The Matcher must have been constructed using the constructor - * without an external featureSize parameter in order to use this - * function. (Otherwise it will be expecting you to call - * consumeFeatureVector.) - */ - std::vector<double> consumeFrame(double *reBuffer, double *imBuffer); - - /** Processes a feature vector frame (presumably calculated from - * audio data by some external code). As consumeFrame, except - * that it does not calculate a feature from audio data but - * instead uses the supplied feature directly. - * - * The Matcher must have been constructed using the constructor - * that accepts an external featureSize parameter in order to - * use this function. The supplied feature must be of the size - * that was passed to the constructor. - */ - void consumeFeatureVector(std::vector<double> feature); - - /** Retrieves values from the minimum cost matrix. - * - * @param i the frame number of this Matcher - * @param j the frame number of the other Matcher - * @return the cost of the minimum cost path to this location - */ - int getValue(int i, int j, bool firstAttempt); - - /** Stores entries in the distance matrix and the optimal path matrix. - * - * @param i the frame number of this Matcher - * @param j the frame number of the other Matcher - * @param dir the direction from which this position is reached with - * minimum cost - * @param value the cost of the minimum path except the current step - * @param dMN the distance cost between the two frames - */ - void setValue(int i, int j, int dir, int value, int dMN); - - vector<double> processFrameFromFreqData(double *, double *); - void calcAdvance(); - - DistanceMetric metric; - - friend class MatchFeeder; - friend class MatchFeatureFeeder; - friend class Finder; - -}; // class Matcher - -#endif
--- a/Path.cpp Thu Nov 13 09:50:09 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,76 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - Vamp feature extraction plugin using the MATCH audio alignment - algorithm. - - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#include "Path.h" - -int -Path::smooth(std::vector<int> &x, std::vector<int> &y, int length) -{ - if (length == 0) - return 0; - while ((int)val.size() < length) { - val.push_back(0); - len.push_back(0); - } - int p = 0; - val[0] = len[0] = 0; - for (int i = 1; i < length; i++) { // H = 1; V = 2; D = 3 - int current = x[i] - x[i-1] + 2 * (y[i] - y[i-1]); - if (current == val[p]) { - len[p]++; - } else if ((current == 3) || (val[p] == 0)) { - val[++p] = current; - len[p] = 1; - } else if (val[p] + current == 3) { // 1 + 2 - if (--len[p] == 0) - p--; - if (val[p] == 3) - len[p]++; - else { - val[++p] = 3; - len[p] = 1; - } - } else { // val[p] == 3 && current != 3 - if ((val[p-1] == current) || - (val[p-1] == 0) || - (len[p] > MAX_RUN_LENGTH)) { - val[++p] = current; - len[p] = 1; - } else { - if (--len[p-1] == 0) { - val[p-1] = val[p]; - len[p-1] = len[p]; - p--; - if (val[p-1] == 3) { - len[p-1] += len[p]; - p--; - } - } - len[p]++; - } - } - } - int i = 1; - for (int pp = 1; pp <= p; pp++) { - int dx = val[pp] & 1; - int dy = val[pp] >> 1; - for (int j = len[pp]; j > 0; j--, i++) { - x[i] = x[i-1] + dx; - y[i] = y[i-1] + dy; - } - } - return i; -}
--- a/Path.h Thu Nov 13 09:50:09 2014 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ -/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ - -/* - Vamp feature extraction plugin using the MATCH audio alignment - algorithm. - - Centre for Digital Music, Queen Mary, University of London. - This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. See the file - COPYING included with this distribution for more information. -*/ - -#ifndef _PATH_H_ -#define _PATH_H_ - -#include <vector> - -class Path -{ -public: - Path() { } - - /** Smooths an alignment path.<BR> - * Consider the path as a sequence of horizontal (H), vertical (V) and - * diagonal (D) steps. The smoothing consists of 2 rewrite rules:<BR> - * HnDmVn / Dm+n (where m is less than MAX_RUN_LENGTH)<BR> - * VnDmHn / Dm+n (where m is less than MAX_RUN_LENGTH)<BR> - * The new path is written over the old path. Note that the end points of - * each application of a rewrite rule do not change. - * @return the length of the new path - */ - int smooth(std::vector<int> &x, std::vector<int> &y, int length); - -protected: - static const int MAX_RUN_LENGTH = 50; - - std::vector<int> val; - std::vector<int> len; -}; - -#endif -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/DistanceMetric.cpp Thu Nov 13 09:50:14 2014 +0000 @@ -0,0 +1,67 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "DistanceMetric.h" + +#include <cassert> +#include <cmath> + +using std::vector; + +double +DistanceMetric::calcDistance(const vector<double> &f1, + const vector<double> &f2) +{ + double d = 0; + double sum = 0; + + int featureSize = f1.size(); + assert(int(f2.size()) == featureSize); + + for (int i = 0; i < featureSize; i++) { + d += fabs(f1[i] - f2[i]); + sum += f1[i] + f2[i]; + } + + if (sum == 0) + return 0; + if (m_norm == NormaliseDistanceToSum) + return d / sum; // 0 <= d/sum <= 2 + if (m_norm != NormaliseDistanceToLogSum) + return d; + + // note if this were to be restored, it would have to use + // totalEnergies vector instead of f1[freqMapSize] which used to + // store the total energy: + // double weight = (5 + Math.log(f1[freqMapSize] + f2[freqMapSize]))/10.0; + + double weight = (8 + log(sum)) / 10.0; + + if (weight < 0) weight = 0; + else if (weight > 1) weight = 1; + + return d / sum * weight; +} + +int +DistanceMetric::calcDistanceScaled(const vector<double> &f1, + const vector<double> &f2, + double scale) +{ + double distance = calcDistance(f1, f2); + return int(distance * scale); +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/DistanceMetric.h Thu Nov 13 09:50:14 2014 +0000 @@ -0,0 +1,73 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef DISTANCE_METRIC_H +#define DISTANCE_METRIC_H + +#include <vector> + +class DistanceMetric +{ +public: + enum DistanceNormalisation { + + /** Do not normalise distance metrics */ + NoDistanceNormalisation, + + /** Normalise distance metric for pairs of frames by the sum + * of the two frames. */ + NormaliseDistanceToSum, + + /** Normalise distance metric for pairs of frames by the log + * of the sum of the frames. */ + NormaliseDistanceToLogSum, + }; + + DistanceMetric(DistanceNormalisation norm) : m_norm(norm) { } + + /** Calculates the Manhattan distance between two vectors, with an + * optional normalisation by the combined values in the + * vectors. Since the vectors contain energy, this could be + * considered as a squared Euclidean distance metric. Note that + * normalisation assumes the values are all non-negative. + * + * @param f1 one of the vectors involved in the distance calculation + * @param f2 one of the vectors involved in the distance calculation + * @return the distance + */ + double calcDistance(const std::vector<double> &f1, + const std::vector<double> &f2); + + /** Calculates the Manhattan distance between two vectors, with an + * optional normalisation by the combined values in the + * vectors. Since the vectors contain energy, this could be + * considered as a squared Euclidean distance metric. Note that + * normalisation assumes the values are all non-negative. + * + * @param f1 one of the vectors involved in the distance calculation + * @param f2 one of the vectors involved in the distance calculation + * @param scale the scaling factor to place the result in integer range + * @return the distance, scaled by scale and truncated to an integer + */ + int calcDistanceScaled(const std::vector<double> &f1, + const std::vector<double> &f2, + double scale); + +private: + DistanceNormalisation m_norm; +}; + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Finder.cpp Thu Nov 13 09:50:14 2014 +0000 @@ -0,0 +1,301 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "Finder.h" + +#include "Path.h" + +#include <algorithm> + + +Finder::Finder(Matcher *p1, Matcher *p2) +{ + if (!p1->firstPM) + std::cerr << "Warning: wrong args in Finder()" << std::endl; + pm1 = p1; + pm2 = p2; + index1 = 0; + index2 = 0; + rowRange = new int[2]; + colRange = new int[2]; +} // constructor + +Finder::~Finder() +{ + delete[] rowRange; + delete[] colRange; +} + +bool +Finder::find(int i1, int i2) +{ + if (i1 >= 0) { + index1 = i1; + index2 = i2 - pm1->first[i1]; + } + return (i1 >= 0) && (i2 >= pm1->first[i1]) && (i2 < pm1->last[i1]); +} // find() + +void +Finder::getColRange(int row, int *range) +{ + range[0] = pm1->first[row]; + range[1] = pm1->last[row]; +} // getColRange() + +void +Finder::getRowRange(int col, int *range) +{ + range[0] = pm2->first[col]; + range[1] = pm2->last[col]; +} // getRowRange() + +int +Finder::getExpandDirection(int row, int col) +{ + return getExpandDirection(row, col, false); +} // getExpandDirection() + +int +Finder::getExpandDirection(int row, int col, bool check) +{ + int min = getPathCost(row, col); + bestRow = row; + bestCol = col; + getRowRange(col, rowRange); + if (rowRange[1] > row+1) + rowRange[1] = row+1; // don't cheat by looking at future :) + for (int index = rowRange[0]; index < rowRange[1]; index++) { + int tmp = getPathCost(index, col); + if (tmp < min) { + min = tmp; + bestRow = index; + } + } + getColRange(row, colRange); + if (colRange[1] > col+1) + colRange[1] = col+1; // don't cheat by looking at future :) + for (int index = colRange[0]; index < colRange[1]; index++) { + int tmp = getPathCost(row, index); + if (tmp < min) { + min = tmp; + bestCol = index; + bestRow = row; + } + } + // System.err.print(" BEST: " + bestRow + " " + bestCol + " " + check); + // System.err.println(" " + pm1->frameCount + " " + pm2->frameCount); + if (check) { + // System.err.println(find(row+1, col) + " " + find(row, col+1)); + if (!find(row, col+1)) + return ADVANCE_THIS; + if (!find(row+1, col)) + return ADVANCE_OTHER; + } + return ((bestRow==row)? ADVANCE_THIS: 0) | + ((bestCol==col)? ADVANCE_OTHER: 0); + +} // getExpandDirection() + +unsigned char +Finder::getDistance(int row, int col) +{ + if (find(row, col)) { + return pm1->distance[row][col - pm1->first[row]]; + } + std::cerr << "getDistance(" << row << "," << col << "): out of bounds" << std::endl; + throw "getDistance index out of bounds"; +} // getDistance()/2 + +void +Finder::setDistance(int row, int col, unsigned char b) +{ + if (find(row, col)) { + pm1->distance[row][col - pm1->first[row]] = b; + return; + } + std::cerr << "setDistance(" << row << "," << col << "," << b << "): out of bounds" << std::endl; + throw "setDistance index out of bounds"; +} // setDistance() + +int +Finder::getPathCost(int row, int col) +{ + if (find(row, col)) // "1" avoids div by 0 below + return pm1->bestPathCost[row][col - pm1->first[row]]*100/ (1+row+col); + std::cerr << "getPathCost(" << row << "," << col << "): out of bounds" << std::endl; + throw "getPathCost index out of bounds"; +} // getPathCost() + +int +Finder::getRawPathCost(int row, int col) +{ + if (find(row, col)) + return pm1->bestPathCost[row][col - pm1->first[row]]; + std::cerr << "getRawPathCost(" << row << "," << col << "): out of bounds" << std::endl; + throw "getRawPathCost index out of bounds"; +} // getRawPathCost() + +void +Finder::setPathCost(int row, int col, int i) +{ + if (find(row, col)) { + pm1->bestPathCost[row][col - pm1->first[row]] = i; + return; + } + std::cerr << "setPathCost(" << row << "," << col << "," << i << "): out of bounds" << std::endl; + throw "setPathCost index out of bounds"; +} // setPathCost() + +unsigned char +Finder::getDistance() +{ + return pm1->distance[index1][index2]; +} // getDistance()/0 + +void +Finder::setDistance(int b) +{ + pm1->distance[index1][index2] = (unsigned char)b; +} // setDistance() + +int +Finder::getPathCost() +{ + return pm1->bestPathCost[index1][index2]; +} // getPathCost() + +void +Finder::setPathCost(int i) +{ + pm1->bestPathCost[index1][index2] = i; +} // setPathCost() + +void +Finder::recalculatePathCostMatrix(int r1, int c1, int r2, int c2) +{ + if (!find(r1,c1)) { + std::cerr << "recalculatePathCostMatrix(" << r1 << "," << c1 << "): out of bounds" << std::endl; + throw "recalculatePathCostMatrix index out of bounds"; + } + int thisRowStart, c; + int prevRowStart = 0, prevRowStop = 0; + for (int r = r1; r <= r2; r++) { + thisRowStart = pm1->first[r]; + if (thisRowStart < c1) + thisRowStart = c1; + for (c = thisRowStart; c <= c2; c++) { + if (find(r,c)) { + int i2 = index2; + int newCost = pm1->distance[r][i2]; + int dir = 0; + if (r > r1) { // not first row + int min = -1; + if ((c > prevRowStart) && (c <= prevRowStop)) { + // diagonal from (r-1,c-1) + min = pm1->bestPathCost[r-1][c-pm1->first[r-1]-1] + + newCost * 2; + dir = ADVANCE_BOTH; + } + if ((c >= prevRowStart) && (c < prevRowStop)) { + // vertical from (r-1,c) + int cost = pm1->bestPathCost[r-1][c-pm1->first[r-1]] + + newCost; + if ((min == -1) || (cost < min)) { + min = cost; + dir = ADVANCE_THIS; + } + } + if (c > thisRowStart) { + // horizontal from (r,c-1) + int cost =pm1->bestPathCost[r][i2-1]+newCost; + if ((min == -1) || (cost < min)) { + min = cost; + dir = ADVANCE_OTHER; + } + } + pm1->bestPathCost[r][i2] = min; + } else if (c > thisRowStart) { // first row + // horizontal from (r,c-1) + pm1->bestPathCost[r][i2] = pm1->bestPathCost[r][i2-1] + + newCost; + dir = ADVANCE_OTHER; + } + if ((r != r1) || (c != c1)) { + pm1->distance[r][i2] = (unsigned char) + ((pm1->distance[r][i2] & MASK) | dir); + } + } else + break; // end of row + } + prevRowStart = thisRowStart; + prevRowStop = c; + } +} // recalculatePathCostMatrix() + +int +Finder::retrievePath(bool smooth, vector<int> &pathx, vector<int> &pathy) +{ + int x = pm2->getFrameCount() - 1; + int y = pm1->getFrameCount() - 1; + + pathx.clear(); + pathy.clear(); + + while (find(y, x) && ((x > 0) || (y > 0))) { + +// cerr << "x = " << x << ", y = " << y; + + pathx.push_back(x); + pathy.push_back(y); + + switch (getDistance() & ADVANCE_BOTH) { + case ADVANCE_THIS: +// cerr << ", going down (dist = " << (int)getDistance() << ")" << endl; + y--; + break; + case ADVANCE_OTHER: +// cerr << ", going left (dist = " << (int)getDistance() << ")" << endl; + x--; + break; + case ADVANCE_BOTH: +// cerr << ", going diag (dist = " << (int)getDistance() << ")" << endl; + x--; + y--; + break; + default: // this would indicate a bug, but we wouldn't want to hang +// cerr << "WARNING: Neither matcher advanced in path backtrack at (" << x << "," << y << ")" << endl; + if (x > y) { + x--; + } else { + y--; + } + break; + } + } + + std::reverse(pathx.begin(), pathx.end()); + std::reverse(pathy.begin(), pathy.end()); + + if (smooth) { + int smoothedLen = Path().smooth(pathx, pathy, pathx.size()); + return smoothedLen; + } else { + return pathx.size(); + } +} + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Finder.h Thu Nov 13 09:50:14 2014 +0000 @@ -0,0 +1,103 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _FINDER_H_ +#define _FINDER_H_ + +#include <vector> +#include <iostream> + +#include "Matcher.h" + +/** Maps cost matrix coordinates into an efficient + * (linear instead of quadratic space) representation. + * Stores result of most recent mapping for fast + * sequential access. + */ +class Finder { + +protected: + Matcher *pm1, *pm2; + int index1, index2, bestRow, bestCol; + int *rowRange; + int *colRange; + +public: + Finder(Matcher *p1, Matcher *p2); + + ~Finder(); + + /** Sets up the instance variables to point to the given + * coordinate in the distance matrix. + * + * @param i1 frameNumber in the first Matcher + * @param i2 frameNumber in the second Matcher + * @return true iff the point (i2,i1) is represented in the distance matrix + */ + bool find(int i1, int i2); + + /** Returns the range [lo,hi) of legal column indices for the + * given row. */ + void getColRange(int row, int *range); + + /** Returns the range [lo,hi) of legal row indices for the given + * column. */ + void getRowRange(int col, int *range); + + int getExpandDirection(int row, int col); + int getExpandDirection(int row, int col, bool check); + + unsigned char getDistance(int row, int col); + void setDistance(int row, int col, unsigned char b); + + int getPathCost(int row, int col); + int getRawPathCost(int row, int col); + void setPathCost(int row, int col, int i); + + unsigned char getDistance(); + void setDistance(int b); + + int getPathCost(); + void setPathCost(int i); + + /** Calculates a rectangle of the path cost matrix so that the + * minimum cost path between the bottom left and top right + * corners can be computed. Caches previous values to avoid + * calling find() multiple times, and is several times faster as + * a result. + * + * @param r1 the bottom of the rectangle to be calculated + * @param c1 the left side of the rectangle to be calculated + * @param r2 the top of the rectangle to be calculated + * @param c2 the right side of the rectangle to be calculated + */ + void recalculatePathCostMatrix(int r1, int c1, int r2, int c2); + + /** + * Track back after all of the matchers have been fed in order to + * obtain the lowest cost path available. Path x and y coordinate + * pairs are returned in corresponding elements of pathx and + * pathy. Return value is the length of the returned path: only + * this many elements from pathx and pathy are valid (any + * subsequent ones may be spurious). + * + * @param smooth whether to smooth the path before returning it + */ + int retrievePath(bool smooth, std::vector<int> &pathx, std::vector<int> &pathy); + +}; // class Finder + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/MatchFeatureFeeder.cpp Thu Nov 13 09:50:14 2014 +0000 @@ -0,0 +1,84 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "MatchFeatureFeeder.h" + +using std::vector; + +MatchFeatureFeeder::MatchFeatureFeeder(Matcher *m1, Matcher *m2) : + pm1(m1), pm2(m2) +{ + finder = new Finder(m1, m2); +} + +MatchFeatureFeeder::~MatchFeatureFeeder() +{ + delete finder; +} + +void +MatchFeatureFeeder::feed(vector<double> f1, vector<double> f2) +{ + q1.push(f1); + q2.push(f2); + + while (!q1.empty() && !q2.empty()) { + feedBlock(); + } +} + +void +MatchFeatureFeeder::feedBlock() +{ + if (pm1->frameCount < pm1->blockSize) { // fill initial block + feed1(); + feed2(); + } + else if (pm1->runCount >= pm1->params.maxRunCount) { // slope constraints + feed2(); + } else if (pm2->runCount >= pm2->params.maxRunCount) { + feed1(); + } else { + switch (finder->getExpandDirection + (pm1->frameCount-1, pm2->frameCount-1)) { + case ADVANCE_THIS: + feed1(); + break; + case ADVANCE_OTHER: + feed2(); + break; + case ADVANCE_BOTH: + feed1(); + feed2(); + break; + } + } +} + +void +MatchFeatureFeeder::feed1() +{ + pm1->consumeFeatureVector(q1.front()); + q1.pop(); +} + +void +MatchFeatureFeeder::feed2() +{ + pm2->consumeFeatureVector(q2.front()); + q2.pop(); +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/MatchFeatureFeeder.h Thu Nov 13 09:50:14 2014 +0000 @@ -0,0 +1,54 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _MATCH_FEATURE_FEEDER_H_ +#define _MATCH_FEATURE_FEEDER_H_ + +#include "Matcher.h" +#include "Finder.h" + +#include <queue> +#include <vector> + +class MatchFeatureFeeder +{ +public: + MatchFeatureFeeder(Matcher *m1, Matcher *m2); + ~MatchFeatureFeeder(); + + /** + * Feed the two supplied feature vectors to feeders 1 and 2 + * respectively (depending on their advance status). Matchers must + * have been constructed using the external featureSize + * constructor. + */ + void feed(std::vector<double> f1, std::vector<double> f2); + + Finder *getFinder() { return finder; } + +protected: + void feedBlock(); + void feed1(); + void feed2(); + + Finder *finder; + Matcher *pm1; + Matcher *pm2; + + std::queue<std::vector<double> > q1; + std::queue<std::vector<double> > q2; +}; + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/MatchFeeder.cpp Thu Nov 13 09:50:14 2014 +0000 @@ -0,0 +1,170 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "MatchFeeder.h" + +using std::vector; + +MatchFeeder::MatchFeeder(Matcher *m1, Matcher *m2) : + pm1(m1), pm2(m2) +{ + fftSize = m1->params.fftSize; + finder = new Finder(m1, m2); + reBuffer = new double[fftSize/2+1]; + imBuffer = new double[fftSize/2+1]; +} + +MatchFeeder::~MatchFeeder() +{ + delete[] imBuffer; + delete[] reBuffer; + while (!q1.empty()) { + delete[] q1.front(); + q1.pop(); + } + while (!q2.empty()) { + delete[] q2.front(); + q2.pop(); + } + delete finder; +} + +void +MatchFeeder::feed(const float *const *input) +{ + // We maintain two FIFO queues of audio data frame block pointers, + // one per input stream. When the match-feeder function is + // entered, it knows that it has at least one block in each queue. + // It loops, processing up to one block per matcher, until a queue + // is empty. Then it returns, to be called again with more data. + + prepare(input); + + while (!q1.empty() && !q2.empty()) { +// std::cerr << "MatchFeeder::feed: q1 " << q1.size() << " q2 " << q2.size() << std::endl; + (void)feedBlock(); + } +} + +MatchFeeder::Features +MatchFeeder::feedAndGetFeatures(const float *const *input) +{ + prepare(input); + + Features all; + + while (!q1.empty() && !q2.empty()) { + Features ff = feedBlock(); + all.f1.insert(all.f1.end(), ff.f1.begin(), ff.f1.end()); + all.f2.insert(all.f2.end(), ff.f2.begin(), ff.f2.end()); + } + + return all; +} + +void +MatchFeeder::prepare(const float *const *input) +{ + float *block = new float[fftSize+2]; + for (size_t i = 0; i < fftSize+2; ++i) { + block[i] = input[0][i]; + } + q1.push(block); + + block = new float[fftSize+2]; + for (size_t i = 0; i < fftSize+2; ++i) { + block[i] = input[1][i]; + } + q2.push(block); +} + +MatchFeeder::Features +MatchFeeder::feedBlock() +{ + Features ff; + vector<double> f1, f2; + + if (pm1->frameCount < pm1->blockSize) { // fill initial block +// std::cerr << "feeding initial block" << std::endl; + f1 = feed1(); + f2 = feed2(); + } +//!!! } else if (pm1->atEnd) { +// feed2(); +//!!! } else if (pm2->atEnd) +// feed1(); + else if (pm1->runCount >= pm1->params.maxRunCount) { // slope constraints +// std::cerr << "pm1 too slopey" << std::endl; + f2 = feed2(); + } else if (pm2->runCount >= pm2->params.maxRunCount) { +// std::cerr << "pm2 too slopey" << std::endl; + f1 = feed1(); + } else { + switch (finder->getExpandDirection + (pm1->frameCount-1, pm2->frameCount-1)) { + case ADVANCE_THIS: +// std::cerr << "finder says ADVANCE_THIS" << std::endl; + f1 = feed1(); + break; + case ADVANCE_OTHER: +// std::cerr << "finder says ADVANCE_OTHER" << std::endl; + f2 = feed2(); + break; + case ADVANCE_BOTH: +// std::cerr << "finder says ADVANCE_BOTH" << std::endl; + f1 = feed1(); + f2 = feed2(); + break; + } + } + + if (!f1.empty()) ff.f1.push_back(f1); + if (!f2.empty()) ff.f2.push_back(f2); + return ff; +} + +vector<double> +MatchFeeder::feed1() +{ +// std::cerr << "feed1" << std::endl; + float *block = q1.front(); + q1.pop(); + for (size_t i = 0; i <= fftSize/2; ++i) { + reBuffer[i] = block[i*2]; + } + for (size_t i = 0; i <= fftSize/2; ++i) { + imBuffer[i] = block[i*2+1]; + } + delete[] block; + return pm1->consumeFrame(reBuffer, imBuffer); +} + +vector<double> +MatchFeeder::feed2() +{ +// std::cerr << "feed2" << std::endl; + float *block = q2.front(); + q2.pop(); + for (size_t i = 0; i <= fftSize/2; ++i) { + reBuffer[i] = block[i*2]; + } + for (size_t i = 0; i <= fftSize/2; ++i) { + imBuffer[i] = block[i*2+1]; + } + delete[] block; + return pm2->consumeFrame(reBuffer, imBuffer); +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/MatchFeeder.h Thu Nov 13 09:50:14 2014 +0000 @@ -0,0 +1,71 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _MATCH_FEEDER_H_ +#define _MATCH_FEEDER_H_ + +#include "Matcher.h" +#include "Finder.h" + +#include <queue> +#include <vector> + +class MatchFeeder +{ +public: + MatchFeeder(Matcher *m1, Matcher *m2); + ~MatchFeeder(); + + /** + * Feed the two supplied channels of frequency-domain input data + * to feeders 1 and 2 respectively, as appropriate (depending on + * their advance status). + */ + void feed(const float *const *input); + + struct Features { + std::vector<std::vector<double> > f1; + std::vector<std::vector<double> > f2; + }; + + /** + * Feed the two supplied channels of frequency-domain input data + * to matchers 1 and 2 respectively, as appropriate (depending on + * their advance status) and return any new feature vectors + * calculated by the two feeders. + */ + Features feedAndGetFeatures(const float *const *input); + + Finder *getFinder() { return finder; } + +protected: + void prepare(const float *const *input); + Features feedBlock(); + std::vector<double> feed1(); + std::vector<double> feed2(); + + Finder *finder; + Matcher *pm1; + Matcher *pm2; + + size_t fftSize; + double *reBuffer; + double *imBuffer; + + std::queue<float *> q1; + std::queue<float *> q2; +}; + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/MatchVampPlugin.cpp Thu Nov 13 09:50:14 2014 +0000 @@ -0,0 +1,632 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "MatchVampPlugin.h" + +#include "Matcher.h" +#include "MatchFeeder.h" +#include "Path.h" + +#include <vamp/vamp.h> +#include <vamp-sdk/PluginAdapter.h> +#include <vamp-sdk/RealTime.h> + +#include <vector> +#include <algorithm> + +//static int extant = 0; + +#ifdef _WIN32 +HANDLE +MatchVampPlugin::m_serialisingMutex; +#else +pthread_mutex_t +MatchVampPlugin::m_serialisingMutex; +#endif + +bool +MatchVampPlugin::m_serialisingMutexInitialised = false; + +// We want to ensure our freq map / crossover bin in Matcher.cpp are +// always valid with a fixed FFT length in seconds, so must reject low +// sample rates +static float sampleRateMin = 5000.f; + +static float defaultStepTime = 0.020; + +MatchVampPlugin::MatchVampPlugin(float inputSampleRate) : + Plugin(inputSampleRate), + m_stepSize(inputSampleRate * defaultStepTime + 0.001), + m_stepTime(defaultStepTime), + m_blockSize(2048), + m_serialise(false), + m_begin(true), + m_locked(false), + m_smooth(true), + m_params(inputSampleRate, defaultStepTime, m_blockSize), + m_defaultParams(inputSampleRate, defaultStepTime, m_blockSize) +{ + if (inputSampleRate < sampleRateMin) { + std::cerr << "MatchVampPlugin::MatchVampPlugin: input sample rate " + << inputSampleRate << " < min supported rate " + << sampleRateMin << ", plugin will refuse to initialise" + << std::endl; + } + + if (!m_serialisingMutexInitialised) { + m_serialisingMutexInitialised = true; +#ifdef _WIN32 + m_serialisingMutex = CreateMutex(NULL, FALSE, NULL); +#else + pthread_mutex_init(&m_serialisingMutex, 0); +#endif + } + + pm1 = 0; + pm2 = 0; + feeder = 0; +// std::cerr << "MatchVampPlugin::MatchVampPlugin(" << this << "): extant = " << ++extant << std::endl; +} + +MatchVampPlugin::~MatchVampPlugin() +{ +// std::cerr << "MatchVampPlugin::~MatchVampPlugin(" << this << "): extant = " << --extant << std::endl; + + delete feeder; + delete pm1; + delete pm2; + + if (m_locked) { +#ifdef _WIN32 + ReleaseMutex(m_serialisingMutex); +#else + pthread_mutex_unlock(&m_serialisingMutex); +#endif + m_locked = false; + } +} + +string +MatchVampPlugin::getIdentifier() const +{ + return "match"; +} + +string +MatchVampPlugin::getName() const +{ + return "Match Performance Aligner"; +} + +string +MatchVampPlugin::getDescription() const +{ + return "Calculate alignment between two performances in separate channel inputs"; +} + +string +MatchVampPlugin::getMaker() const +{ + return "Simon Dixon (plugin by Chris Cannam)"; +} + +int +MatchVampPlugin::getPluginVersion() const +{ + return 2; +} + +string +MatchVampPlugin::getCopyright() const +{ + return "GPL"; +} + +MatchVampPlugin::ParameterList +MatchVampPlugin::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor desc; + + desc.identifier = "serialise"; + desc.name = "Serialise Plugin Invocations"; + desc.description = "Reduce potential memory load at the expense of multiprocessor performance by serialising multi-threaded plugin runs"; + desc.minValue = 0; + desc.maxValue = 1; + desc.defaultValue = 0; + desc.isQuantized = true; + desc.quantizeStep = 1; + list.push_back(desc); + + desc.identifier = "framenorm"; + desc.name = "Frame Normalisation"; + desc.description = "Type of normalisation to use for frequency-domain audio features"; + desc.minValue = 0; + desc.maxValue = 2; + desc.defaultValue = (int)m_defaultParams.frameNorm; + desc.isQuantized = true; + desc.quantizeStep = 1; + desc.valueNames.clear(); + desc.valueNames.push_back("None"); + desc.valueNames.push_back("Sum To 1"); + desc.valueNames.push_back("Long-Term Average"); + list.push_back(desc); + desc.valueNames.clear(); + + desc.identifier = "distnorm"; + desc.name = "Distance Normalisation"; + desc.description = "Type of normalisation to use for distance metric"; + desc.minValue = 0; + desc.maxValue = 2; + desc.defaultValue = (int)m_defaultParams.distanceNorm; + desc.isQuantized = true; + desc.quantizeStep = 1; + desc.valueNames.clear(); + desc.valueNames.push_back("None"); + desc.valueNames.push_back("Sum of Frames"); + desc.valueNames.push_back("Log Sum of Frames"); + list.push_back(desc); + desc.valueNames.clear(); + + desc.identifier = "usespecdiff"; + desc.name = "Use Spectral Difference"; + desc.description = "Whether to use half-wave rectified spectral difference instead of straight spectrum"; + desc.minValue = 0; + desc.maxValue = 1; + desc.defaultValue = m_defaultParams.useSpectralDifference ? 1 : 0; + desc.isQuantized = true; + desc.quantizeStep = 1; + list.push_back(desc); + + desc.identifier = "usechroma"; + desc.name = "Use Chroma Frequency Map"; + desc.description = "Whether to use a chroma frequency map instead of the default warped spectrogram"; + desc.minValue = 0; + desc.maxValue = 1; + desc.defaultValue = m_defaultParams.useChromaFrequencyMap ? 1 : 0; + desc.isQuantized = true; + desc.quantizeStep = 1; + list.push_back(desc); + + desc.identifier = "gradientlimit"; + desc.name = "Gradient Limit"; + desc.description = "Limit of number of frames that will be accepted from one source without a frame from the other source being accepted"; + desc.minValue = 1; + desc.maxValue = 10; + desc.defaultValue = m_defaultParams.maxRunCount; + desc.isQuantized = true; + desc.quantizeStep = 1; + list.push_back(desc); + + desc.identifier = "zonewidth"; + desc.name = "Search Zone Width"; + desc.description = "Width of the search zone (error margin) either side of the ongoing match position, in seconds"; + desc.minValue = 1; + desc.maxValue = 60; + desc.defaultValue = m_defaultParams.blockTime; + desc.isQuantized = true; + desc.quantizeStep = 1; + desc.unit = "s"; + list.push_back(desc); + + desc.identifier = "smooth"; + desc.name = "Smooth Path"; + desc.description = "Smooth the path by replacing steps with diagonals"; + desc.minValue = 0; + desc.maxValue = 1; + desc.defaultValue = 1; + desc.isQuantized = true; + desc.quantizeStep = 1; + desc.unit = ""; + list.push_back(desc); + + return list; +} + +float +MatchVampPlugin::getParameter(std::string name) const +{ + if (name == "serialise") { + return m_serialise ? 1.0 : 0.0; + } else if (name == "framenorm") { + return (int)m_params.frameNorm; + } else if (name == "distnorm") { + return (int)m_params.distanceNorm; + } else if (name == "usespecdiff") { + return m_params.useSpectralDifference ? 1.0 : 0.0; + } else if (name == "usechroma") { + return m_params.useChromaFrequencyMap ? 1.0 : 0.0; + } else if (name == "gradientlimit") { + return m_params.maxRunCount; + } else if (name == "zonewidth") { + return m_params.blockTime; + } else if (name == "smooth") { + return m_smooth ? 1.0 : 0.0; + } + + return 0.0; +} + +void +MatchVampPlugin::setParameter(std::string name, float value) +{ + if (name == "serialise") { + m_serialise = (value > 0.5); + } else if (name == "framenorm") { + m_params.frameNorm = (Matcher::FrameNormalisation)(int(value + 0.1)); + } else if (name == "distnorm") { + m_params.distanceNorm = (DistanceMetric::DistanceNormalisation)(int(value + 0.1)); + } else if (name == "usespecdiff") { + m_params.useSpectralDifference = (value > 0.5); + } else if (name == "usechroma") { + m_params.useChromaFrequencyMap = (value > 0.5); + } else if (name == "gradientlimit") { + m_params.maxRunCount = int(value + 0.1); + } else if (name == "zonewidth") { + m_params.blockTime = value; + } else if (name == "smooth") { + m_smooth = (value > 0.5); + } +} + +size_t +MatchVampPlugin::getPreferredStepSize() const +{ + return m_inputSampleRate * defaultStepTime; +} + +size_t +MatchVampPlugin::getPreferredBlockSize() const +{ + return 2048; +} + +void +MatchVampPlugin::createMatchers() +{ + m_params.hopTime = m_stepTime; + m_params.fftSize = m_blockSize; + pm1 = new Matcher(m_params, 0); + pm2 = new Matcher(m_params, pm1); + pm1->setOtherMatcher(pm2); + feeder = new MatchFeeder(pm1, pm2); +} + +bool +MatchVampPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (m_inputSampleRate < sampleRateMin) { + std::cerr << "MatchVampPlugin::MatchVampPlugin: input sample rate " + << m_inputSampleRate << " < min supported rate " + << sampleRateMin << std::endl; + return false; + } + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + if (stepSize > blockSize/2 || + blockSize != getPreferredBlockSize()) return false; + + m_stepSize = stepSize; + m_stepTime = float(stepSize) / m_inputSampleRate; + m_blockSize = blockSize; + + createMatchers(); + m_begin = true; + m_locked = false; + + return true; +} + +void +MatchVampPlugin::reset() +{ + delete feeder; + delete pm1; + delete pm2; + feeder = 0; + pm1 = 0; + pm2 = 0; + + createMatchers(); + m_begin = true; + m_locked = false; +} + +MatchVampPlugin::OutputList +MatchVampPlugin::getOutputDescriptors() const +{ + OutputList list; + + float outRate = 1.0 / m_stepTime; + + OutputDescriptor desc; + desc.identifier = "path"; + desc.name = "Path"; + desc.description = "Alignment path"; + desc.unit = ""; + desc.hasFixedBinCount = true; + desc.binCount = 1; + desc.hasKnownExtents = false; + desc.isQuantized = true; + desc.quantizeStep = 1; + desc.sampleType = OutputDescriptor::VariableSampleRate; + desc.sampleRate = outRate; + m_pathOutNo = list.size(); + list.push_back(desc); + + desc.identifier = "a_b"; + desc.name = "A-B Timeline"; + desc.description = "Timing in performance B corresponding to moments in performance A"; + desc.unit = "sec"; + desc.hasFixedBinCount = true; + desc.binCount = 1; + desc.hasKnownExtents = false; + desc.isQuantized = false; + desc.sampleType = OutputDescriptor::VariableSampleRate; + desc.sampleRate = outRate; + m_abOutNo = list.size(); + list.push_back(desc); + + desc.identifier = "b_a"; + desc.name = "B-A Timeline"; + desc.description = "Timing in performance A corresponding to moments in performance B"; + desc.unit = "sec"; + desc.hasFixedBinCount = true; + desc.binCount = 1; + desc.hasKnownExtents = false; + desc.isQuantized = false; + desc.sampleType = OutputDescriptor::VariableSampleRate; + desc.sampleRate = outRate; + m_baOutNo = list.size(); + list.push_back(desc); + + desc.identifier = "a_b_divergence"; + desc.name = "A-B Divergence"; + desc.description = "Difference between timings in performances A and B"; + desc.unit = "sec"; + desc.hasFixedBinCount = true; + desc.binCount = 1; + desc.hasKnownExtents = false; + desc.isQuantized = false; + desc.sampleType = OutputDescriptor::VariableSampleRate; + desc.sampleRate = outRate; + m_abDivOutNo = list.size(); + list.push_back(desc); + + desc.identifier = "a_b_temporatio"; + desc.name = "A-B Tempo Ratio"; + desc.description = "Ratio of tempi between performances A and B"; + desc.unit = ""; + desc.hasFixedBinCount = true; + desc.binCount = 1; + desc.hasKnownExtents = false; + desc.isQuantized = false; + desc.sampleType = OutputDescriptor::VariableSampleRate; + desc.sampleRate = outRate; + m_abRatioOutNo = list.size(); + list.push_back(desc); + + desc.identifier = "a_features"; + desc.name = "A Features"; + desc.description = "Spectral features extracted from performance A"; + desc.unit = ""; + desc.hasFixedBinCount = true; + desc.binCount = Matcher::getFeatureSizeFor(m_params); + desc.hasKnownExtents = false; + desc.isQuantized = false; + desc.sampleType = OutputDescriptor::FixedSampleRate; + desc.sampleRate = outRate; + m_aFeaturesOutNo = list.size(); + list.push_back(desc); + + desc.identifier = "b_features"; + desc.name = "B Features"; + desc.description = "Spectral features extracted from performance B"; + desc.unit = ""; + desc.hasFixedBinCount = true; + desc.binCount = Matcher::getFeatureSizeFor(m_params); + desc.hasKnownExtents = false; + desc.isQuantized = false; + desc.sampleType = OutputDescriptor::FixedSampleRate; + desc.sampleRate = outRate; + m_bFeaturesOutNo = list.size(); + list.push_back(desc); + + return list; +} + +MatchVampPlugin::FeatureSet +MatchVampPlugin::process(const float *const *inputBuffers, + Vamp::RealTime timestamp) +{ + if (m_begin) { + if (!m_locked && m_serialise) { + m_locked = true; +#ifdef _WIN32 + WaitForSingleObject(m_serialisingMutex, INFINITE); +#else + pthread_mutex_lock(&m_serialisingMutex); +#endif + } + m_startTime = timestamp; + m_begin = false; + } + +// std::cerr << timestamp.toString(); + + MatchFeeder::Features ff = feeder->feedAndGetFeatures(inputBuffers); + + FeatureSet returnFeatures; + + Feature f; + f.hasTimestamp = false; + + for (int i = 0; i < (int)ff.f1.size(); ++i) { + f.values.clear(); + for (int j = 0; j < (int)ff.f1[i].size(); ++j) { + f.values.push_back(ff.f1[i][j]); + } + returnFeatures[m_aFeaturesOutNo].push_back(f); + } + + for (int i = 0; i < (int)ff.f2.size(); ++i) { + f.values.clear(); + for (int j = 0; j < (int)ff.f2[i].size(); ++j) { + f.values.push_back(ff.f2[i][j]); + } + returnFeatures[m_bFeaturesOutNo].push_back(f); + } + +// std::cerr << "."; +// std::cerr << std::endl; + + return returnFeatures; +} + +MatchVampPlugin::FeatureSet +MatchVampPlugin::getRemainingFeatures() +{ + Finder *finder = feeder->getFinder(); + std::vector<int> pathx; + std::vector<int> pathy; + int len = finder->retrievePath(m_smooth, pathx, pathy); + + FeatureSet returnFeatures; + + int prevx = 0; + int prevy = 0; + + for (int i = 0; i < len; ++i) { + + int x = pathx[i]; + int y = pathy[i]; + + Vamp::RealTime xt = Vamp::RealTime::frame2RealTime + (x * m_stepSize, lrintf(m_inputSampleRate)); + Vamp::RealTime yt = Vamp::RealTime::frame2RealTime + (y * m_stepSize, lrintf(m_inputSampleRate)); + + Feature feature; + feature.hasTimestamp = true; + feature.timestamp = m_startTime + xt; + feature.values.clear(); + feature.values.push_back(yt.sec + double(yt.nsec)/1.0e9); + returnFeatures[m_pathOutNo].push_back(feature); + + if (x != prevx) { + + feature.hasTimestamp = true; + feature.timestamp = m_startTime + xt; + feature.values.clear(); + feature.values.push_back(yt.sec + yt.msec()/1000.0); + returnFeatures[m_abOutNo].push_back(feature); + + Vamp::RealTime diff = yt - xt; + feature.values.clear(); + feature.values.push_back(diff.sec + diff.msec()/1000.0); + returnFeatures[m_abDivOutNo].push_back(feature); + + if (i > 0) { + int lookback = 100; //!!! arbitrary + if (lookback > i) lookback = i; + int xdiff = x - pathx[i-lookback]; + int ydiff = y - pathy[i-lookback]; + if (xdiff != 0 && ydiff != 0) { + float ratio = float(ydiff)/float(xdiff); + if (ratio < 8 && ratio > (1.0/8)) { //!!! just for now, since we aren't dealing properly with silence yet + feature.values.clear(); + feature.values.push_back(ratio); + returnFeatures[m_abRatioOutNo].push_back(feature); + } + } + } + } + + if (y != prevy) { + feature.hasTimestamp = true; + feature.timestamp = m_startTime + yt; + feature.values.clear(); + feature.values.push_back(xt.sec + xt.msec()/1000.0); + returnFeatures[m_baOutNo].push_back(feature); + } + + prevx = x; + prevy = y; + } + + delete feeder; + delete pm1; + delete pm2; + feeder = 0; + pm1 = 0; + pm2 = 0; + + if (m_locked) { +#ifdef _WIN32 + ReleaseMutex(m_serialisingMutex); +#else + pthread_mutex_unlock(&m_serialisingMutex); +#endif + m_locked = false; + } + + return returnFeatures; + + +/* + for (int i = 0; i < len; ++i) { + std::cerr << i << ": [" << pathx[i] << "," << pathy[i] << "]" << std::endl; + } + + std::cerr << std::endl; + std::cerr << "File: A" << std::endl; + std::cerr << "Marks: -1" << std::endl; + std::cerr << "FixedPoints: true 0" << std::endl; + std::cerr << "0" << std::endl; + std::cerr << "0" << std::endl; + std::cerr << "0" << std::endl; + std::cerr << "0" << std::endl; + std::cerr << "File: B" << std::endl; + std::cerr << "Marks: 0" << std::endl; + std::cerr << "FixedPoints: true 0" << std::endl; + std::cerr << "0.02" << std::endl; + std::cerr << "0.02" << std::endl; + + std::cerr << len << std::endl; + for (int i = 0; i < len; ++i) { + std::cerr << pathx[i] << std::endl; + } + + std::cerr << len << std::endl; + for (int i = 0; i < len; ++i) { + std::cerr << pathy[i] << std::endl; + } +*/ +} + +static Vamp::PluginAdapter<MatchVampPlugin> mvpAdapter; + +const VampPluginDescriptor *vampGetPluginDescriptor(unsigned int version, + unsigned int index) +{ + if (version < 1) return 0; + + switch (index) { + case 0: return mvpAdapter.getDescriptor(); + default: return 0; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/MatchVampPlugin.h Thu Nov 13 09:50:14 2014 +0000 @@ -0,0 +1,104 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _MATCH_VAMP_PLUGIN_H_ +#define _MATCH_VAMP_PLUGIN_H_ + +#include <vamp-sdk/Plugin.h> + +#ifdef _WIN32 +#include <windows.h> +#else +#include <pthread.h> +#endif + +#include "Matcher.h" + +class MatchFeeder; + +class MatchVampPlugin : public Vamp::Plugin +{ +public: + MatchVampPlugin(float inputSampleRate); + virtual ~MatchVampPlugin(); + + bool initialise(size_t channels, size_t stepSize, size_t blockSize); + void reset(); + + InputDomain getInputDomain() const { return FrequencyDomain; } + + size_t getPreferredStepSize() const; + size_t getPreferredBlockSize() const; + + size_t getMinChannelCount() const { return 2; } + size_t getMaxChannelCount() const { return 2; } + + std::string getIdentifier() const; + std::string getName() const; + std::string getDescription() const; + std::string getMaker() const; + int getPluginVersion() const; + std::string getCopyright() const; + + ParameterList getParameterDescriptors() const; + float getParameter(std::string) const; + void setParameter(std::string, float); + + OutputList getOutputDescriptors() const; + + FeatureSet process(const float *const *inputBuffers, + Vamp::RealTime timestamp); + + FeatureSet getRemainingFeatures(); + +protected: + void createMatchers(); + + Matcher *pm1; + Matcher *pm2; + MatchFeeder *feeder; + + Vamp::RealTime m_startTime; + int m_stepSize; + float m_stepTime; + int m_blockSize; + bool m_serialise; + bool m_begin; + bool m_locked; + bool m_smooth; + + Matcher::Parameters m_params; + Matcher::Parameters m_defaultParams; + + mutable int m_pathOutNo; + mutable int m_abOutNo; + mutable int m_baOutNo; + mutable int m_abDivOutNo; + mutable int m_abRatioOutNo; + mutable int m_aFeaturesOutNo; + mutable int m_bFeaturesOutNo; + +#ifdef _WIN32 + static HANDLE m_serialisingMutex; +#else + static pthread_mutex_t m_serialisingMutex; +#endif + + static bool m_serialisingMutexInitialised; +}; + + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Matcher.cpp Thu Nov 13 09:50:14 2014 +0000 @@ -0,0 +1,508 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "Matcher.h" + +#include <iostream> + +#include <cstdlib> +#include <cassert> + +bool Matcher::silent = true; + +//#define DEBUG_MATCHER 1 + +Matcher::Matcher(Parameters parameters, Matcher *p) : + params(parameters), + metric(parameters.distanceNorm) +{ +#ifdef DEBUG_MATCHER + cerr << "Matcher::Matcher(" << params.sampleRate << ", " << p << ")" << endl; +#endif + + otherMatcher = p; // the first matcher will need this to be set later + firstPM = (!p); + ltAverage = 0; + frameCount = 0; + runCount = 0; + freqMapSize = 0; + externalFeatureSize = 0; + featureSize = 0; + blockSize = 0; + + blockSize = lrint(params.blockTime / params.hopTime); +#ifdef DEBUG_MATCHER + cerr << "Matcher: blockSize = " << blockSize << endl; +#endif + + distance = 0; + bestPathCost = 0; + distYSizes = 0; + distXSize = 0; + + initialised = false; +} + +Matcher::Matcher(Parameters parameters, Matcher *p, int featureSize) : + params(parameters), + externalFeatureSize(featureSize), + metric(parameters.distanceNorm) +{ +#ifdef DEBUG_MATCHER + cerr << "Matcher::Matcher(" << params.sampleRate << ", " << p << ", " << featureSize << ")" << endl; +#endif + + otherMatcher = p; // the first matcher will need this to be set later + firstPM = (!p); + ltAverage = 0; + frameCount = 0; + runCount = 0; + freqMapSize = 0; + featureSize = 0; + blockSize = 0; + + blockSize = lrint(params.blockTime / params.hopTime); +#ifdef DEBUG_MATCHER + cerr << "Matcher: blockSize = " << blockSize << endl; +#endif + + distance = 0; + bestPathCost = 0; + distYSizes = 0; + distXSize = 0; + + initialised = false; + +} + +Matcher::~Matcher() +{ +#ifdef DEBUG_MATCHER + cerr << "Matcher(" << this << ")::~Matcher()" << endl; +#endif + + if (initialised) { + + for (int i = 0; i < distXSize; ++i) { + if (distance[i]) { + free(distance[i]); + free(bestPathCost[i]); + } + } + free(distance); + free(bestPathCost); + + free(first); + free(last); + + free(distYSizes); + } +} + +void +Matcher::init() +{ + if (initialised) return; + + initialised = true; + + if (externalFeatureSize == 0) { + freqMapSize = getFeatureSizeFor(params); + featureSize = freqMapSize; + makeFreqMap(); + } else { + featureSize = externalFeatureSize; + } + + initVector<double>(prevFrame, featureSize); + initVector<double>(newFrame, featureSize); + initMatrix<double>(frames, blockSize, featureSize); + initVector<double>(totalEnergies, blockSize); + + int distSize = (params.maxRunCount + 1) * blockSize; + + distXSize = blockSize * 2; + + distance = (unsigned char **)malloc(distXSize * sizeof(unsigned char *)); + bestPathCost = (int **)malloc(distXSize * sizeof(int *)); + distYSizes = (int *)malloc(distXSize * sizeof(int)); + + for (int i = 0; i < blockSize; ++i) { + distance[i] = (unsigned char *)malloc(distSize * sizeof(unsigned char)); + bestPathCost[i] = (int *)malloc(distSize * sizeof(int)); + distYSizes[i] = distSize; + } + for (int i = blockSize; i < distXSize; ++i) { + distance[i] = 0; + } + + first = (int *)malloc(distXSize * sizeof(int)); + last = (int *)malloc(distXSize * sizeof(int)); + + frameCount = 0; + runCount = 0; + ltAverage = 0; + +} // init + +void +Matcher::makeFreqMap() +{ + initVector<int>(freqMap, params.fftSize/2 + 1); + + if (params.useChromaFrequencyMap) { +#ifdef DEBUG_MATCHER + cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl; +#endif + makeChromaFrequencyMap(); + } else { +#ifdef DEBUG_MATCHER + cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl; +#endif + makeStandardFrequencyMap(); + } +} // makeFreqMap() + +int +Matcher::getFeatureSizeFor(Parameters params) +{ + if (params.useChromaFrequencyMap) { + return 13; + } else { + return 84; + } +} + +void +Matcher::makeStandardFrequencyMap() +{ + double binWidth = params.sampleRate / params.fftSize; + int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); + int crossoverMidi = lrint(log(crossoverBin*binWidth/440.0)/ + log(2.0) * 12 + 69); + // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; + int i = 0; + while (i <= crossoverBin) { + freqMap[i] = i; + ++i; + } + while (i <= params.fftSize/2) { + double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; + if (midi > 127) midi = 127; + freqMap[i++] = crossoverBin + lrint(midi) - crossoverMidi; + } + assert(freqMapSize == freqMap[i-1] + 1); + if (!silent) { + cerr << "Standard map size: " << freqMapSize + << "; Crossover at: " << crossoverBin << endl; + for (i = 0; i < params.fftSize / 2; i++) + cerr << "freqMap[" << i << "] = " << freqMap[i] << endl; + } +} // makeStandardFrequencyMap() + +void +Matcher::makeChromaFrequencyMap() +{ + double binWidth = params.sampleRate / params.fftSize; + int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1)); + // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; + int i = 0; + while (i <= crossoverBin) + freqMap[i++] = 0; + while (i <= params.fftSize/2) { + double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; + freqMap[i++] = (lrint(midi)) % 12 + 1; + } + if (!silent) { + cerr << "Chroma map size: " << freqMapSize + << "; Crossover at: " << crossoverBin << endl; + for (i = 0; i < params.fftSize / 2; i++) + cerr << "freqMap[" << i << "] = " << freqMap[i] << endl; + } +} // makeChromaFrequencyMap() + +vector<double> +Matcher::consumeFrame(double *reBuffer, double *imBuffer) +{ + if (!initialised) init(); + + vector<double> processedFrame = + processFrameFromFreqData(reBuffer, imBuffer); + + calcAdvance(); + + return processedFrame; +} + +void +Matcher::consumeFeatureVector(std::vector<double> feature) +{ + if (!initialised) init(); + int frameIndex = frameCount % blockSize; + frames[frameIndex] = feature; + calcAdvance(); +} + +vector<double> +Matcher::processFrameFromFreqData(double *reBuffer, double *imBuffer) +{ + for (int i = 0; i < (int)newFrame.size(); ++i) { + newFrame[i] = 0; + } + double rms = 0; + for (int i = 0; i <= params.fftSize/2; i++) { + double mag = reBuffer[i] * reBuffer[i] + + imBuffer[i] * imBuffer[i]; + rms += mag; + newFrame[freqMap[i]] += mag; + } + rms = sqrt(rms / (params.fftSize/2)); + + int frameIndex = frameCount % blockSize; + + vector<double> processedFrame(freqMapSize, 0.0); + + double totalEnergy = 0; + if (params.useSpectralDifference) { + for (int i = 0; i < freqMapSize; i++) { + totalEnergy += newFrame[i]; + if (newFrame[i] > prevFrame[i]) { + processedFrame[i] = newFrame[i] - prevFrame[i]; + } else { + processedFrame[i] = 0; + } + } + } else { + for (int i = 0; i < freqMapSize; i++) { + processedFrame[i] = newFrame[i]; + totalEnergy += processedFrame[i]; + } + } + totalEnergies[frameIndex] = totalEnergy; + + double decay = frameCount >= 200 ? 0.99: + (frameCount < 100? 0: (frameCount - 100) / 100.0); + + if (ltAverage == 0) + ltAverage = totalEnergy; + else + ltAverage = ltAverage * decay + totalEnergy * (1.0 - decay); + + if (rms <= params.silenceThreshold) + for (int i = 0; i < freqMapSize; i++) + processedFrame[i] = 0; + else if (params.frameNorm == NormaliseFrameToSum1) + for (int i = 0; i < freqMapSize; i++) + processedFrame[i] /= totalEnergy; + else if (params.frameNorm == NormaliseFrameToLTAverage) + for (int i = 0; i < freqMapSize; i++) + processedFrame[i] /= ltAverage; + + vector<double> tmp = prevFrame; + prevFrame = newFrame; + newFrame = tmp; + + frames[frameIndex] = processedFrame; + + if ((frameCount % 100) == 0) { + if (!silent) { + cerr << "Progress:" << frameCount << " " << ltAverage << endl; + } + } + + return processedFrame; +} + +void +Matcher::calcAdvance() +{ + int frameIndex = frameCount % blockSize; + + if (frameCount >= distXSize) { +// std::cerr << "Resizing " << distXSize << " -> " << distXSize * 2 << std::endl; + distXSize *= 2; + distance = (unsigned char **)realloc(distance, distXSize * sizeof(unsigned char *)); + bestPathCost = (int **)realloc(bestPathCost, distXSize * sizeof(int *)); + distYSizes = (int *)realloc(distYSizes, distXSize * sizeof(int)); + first = (int *)realloc(first, distXSize * sizeof(int)); + last = (int *)realloc(last, distXSize * sizeof(int)); + + for (int i = distXSize/2; i < distXSize; ++i) { + distance[i] = 0; + } + } + + if (firstPM && (frameCount >= blockSize)) { + + int len = last[frameCount - blockSize] - + first[frameCount - blockSize]; + + // We need to copy distance[frameCount-blockSize] to + // distance[frameCount], and then truncate + // distance[frameCount-blockSize] to its first len elements. + // Same for bestPathCost. +/* + std::cerr << "Matcher(" << this << "): moving " << distYSizes[frameCount - blockSize] << " from " << frameCount - blockSize << " to " + << frameCount << ", allocating " << len << " for " + << frameCount - blockSize << std::endl; +*/ + distance[frameCount] = distance[frameCount - blockSize]; + + distance[frameCount - blockSize] = (unsigned char *) + malloc(len * sizeof(unsigned char)); + for (int i = 0; i < len; ++i) { + distance[frameCount - blockSize][i] = + distance[frameCount][i]; + } + + bestPathCost[frameCount] = bestPathCost[frameCount - blockSize]; + + bestPathCost[frameCount - blockSize] = (int *) + malloc(len * sizeof(int)); + for (int i = 0; i < len; ++i) { + bestPathCost[frameCount - blockSize][i] = + bestPathCost[frameCount][i]; + } + + distYSizes[frameCount] = distYSizes[frameCount - blockSize]; + distYSizes[frameCount - blockSize] = len; + } + + int stop = otherMatcher->frameCount; + int index = stop - blockSize; + if (index < 0) + index = 0; + first[frameCount] = index; + last[frameCount] = stop; + + bool overflow = false; + int mn= -1; + int mx= -1; + for ( ; index < stop; index++) { + + int dMN = metric.calcDistanceScaled + (frames[frameIndex], + otherMatcher->frames[index % blockSize], + params.distanceScale); + + if (mx<0) + mx = mn = dMN; + else if (dMN > mx) + mx = dMN; + else if (dMN < mn) + mn = dMN; + if (dMN >= 255) { + overflow = true; + dMN = 255; + } + + if ((frameCount == 0) && (index == 0)) // first element + setValue(0, 0, 0, 0, dMN); + else if (frameCount == 0) // first row + setValue(0, index, ADVANCE_OTHER, + getValue(0, index-1, true), dMN); + else if (index == 0) // first column + setValue(frameCount, index, ADVANCE_THIS, + getValue(frameCount - 1, 0, true), dMN); + else if (index == otherMatcher->frameCount - blockSize) { + // missing value(s) due to cutoff + // - no previous value in current row (resp. column) + // - no diagonal value if prev. dir. == curr. dirn + int min2 = getValue(frameCount - 1, index, true); + // if ((firstPM && (first[frameCount - 1] == index)) || + // (!firstPM && (last[index-1] < frameCount))) + if (first[frameCount - 1] == index) + setValue(frameCount, index, ADVANCE_THIS, min2, dMN); + else { + int min1 = getValue(frameCount - 1, index - 1, true); + if (min1 + dMN <= min2) + setValue(frameCount, index, ADVANCE_BOTH, min1,dMN); + else + setValue(frameCount, index, ADVANCE_THIS, min2,dMN); + } + } else { + int min1 = getValue(frameCount, index-1, true); + int min2 = getValue(frameCount - 1, index, true); + int min3 = getValue(frameCount - 1, index-1, true); + if (min1 <= min2) { + if (min3 + dMN <= min1) + setValue(frameCount, index, ADVANCE_BOTH, min3,dMN); + else + setValue(frameCount, index, ADVANCE_OTHER,min1,dMN); + } else { + if (min3 + dMN <= min2) + setValue(frameCount, index, ADVANCE_BOTH, min3,dMN); + else + setValue(frameCount, index, ADVANCE_THIS, min2,dMN); + } + } + otherMatcher->last[index]++; + } // loop for row (resp. column) + + frameCount++; + runCount++; + + otherMatcher->runCount = 0; + + if (overflow && !silent) + cerr << "WARNING: overflow in distance metric: " + << "frame " << frameCount << ", val = " << mx << endl; + + if (!silent) + std::cerr << "Frame " << frameCount << ", d = " << (mx-mn) << std::endl; +} + +int +Matcher::getValue(int i, int j, bool firstAttempt) +{ + if (firstPM) + return bestPathCost[i][j - first[i]]; + else + return otherMatcher->bestPathCost[j][i - otherMatcher->first[j]]; +} // getValue() + +void +Matcher::setValue(int i, int j, int dir, int value, int dMN) +{ + if (firstPM) { + distance[i][j - first[i]] = (unsigned char)((dMN & MASK) | dir); + bestPathCost[i][j - first[i]] = + (value + (dir==ADVANCE_BOTH? dMN*2: dMN)); + } else { + if (dir == ADVANCE_THIS) + dir = ADVANCE_OTHER; + else if (dir == ADVANCE_OTHER) + dir = ADVANCE_THIS; + int idx = i - otherMatcher->first[j]; + if (idx == (int)otherMatcher->distYSizes[j]) { + // This should never happen, but if we allow arbitrary + // pauses in either direction, and arbitrary lengths at + // end, it is better than a segmentation fault. + std::cerr << "Emergency resize: " << idx << " -> " << idx * 2 << std::endl; + otherMatcher->distYSizes[j] = idx * 2; + otherMatcher->bestPathCost[j] = + (int *)realloc(otherMatcher->bestPathCost[j], + idx * 2 * sizeof(int)); + otherMatcher->distance[j] = + (unsigned char *)realloc(otherMatcher->distance[j], + idx * 2 * sizeof(unsigned char)); + } + otherMatcher->distance[j][idx] = (unsigned char)((dMN & MASK) | dir); + otherMatcher->bestPathCost[j][idx] = + (value + (dir==ADVANCE_BOTH? dMN*2: dMN)); + } +} // setValue() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Matcher.h Thu Nov 13 09:50:14 2014 +0000 @@ -0,0 +1,383 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _MATCHER_H_ +#define _MATCHER_H_ + +#include <vector> +#include <iostream> +#include <sstream> +#include <cmath> + +#define ADVANCE_THIS 1 +#define ADVANCE_OTHER 2 +#define ADVANCE_BOTH 3 +#define MASK 0xfc + +#include "DistanceMetric.h" + +using std::vector; +using std::string; +using std::cerr; +using std::endl; + +/** Represents an audio stream that can be matched to another audio + * stream of the same piece of music. The matching algorithm uses + * dynamic time warping. The distance metric is a Euclidean metric + * on the FFT data with the higher frequencies mapped onto a linear + * scale. + */ +class Matcher +{ +public: + enum FrameNormalisation { + + /** Do not normalise audio frames */ + NoFrameNormalisation, + + /** Normalise each frame of audio to have a sum of 1 */ + NormaliseFrameToSum1, + + /** Normalise each frame of audio by the long-term average + * of the summed energy */ + NormaliseFrameToLTAverage, + }; + + struct Parameters { + + Parameters(float rate_, double hopTime_, int fftSize_) : + sampleRate(rate_), + frameNorm(NormaliseFrameToSum1), + distanceNorm(DistanceMetric::NormaliseDistanceToLogSum), + distanceScale(90.0), + useSpectralDifference(true), + useChromaFrequencyMap(false), + hopTime(hopTime_), + fftSize(fftSize_), + blockTime(10.0), + silenceThreshold(0.01), + decay(0.99), + maxRunCount(3) + {} + + /** Sample rate of audio */ + float sampleRate; + + /** Type of audio frame normalisation */ + FrameNormalisation frameNorm; + + /** Type of distance metric normalisation */ + DistanceMetric::DistanceNormalisation distanceNorm; + + /** Scaling factor for distance metric; must guarantee that the + * final value fits in the data type used, that is, unsigned + * char. + */ + double distanceScale; + + /** Flag indicating whether or not the half-wave rectified + * spectral difference should be used in calculating the + * distance metric for pairs of audio frames, instead of the + * straight spectrum values. */ + bool useSpectralDifference; + + /** Flag indicating whether to use a chroma frequency map (12 + * bins) instead of the default warped spectrogram */ + bool useChromaFrequencyMap; + + /** Spacing of audio frames (determines the amount of overlap or + * skip between frames). This value is expressed in + * seconds. */ + double hopTime; + + /** Size of an FFT frame in samples. Note that the data passed + * in to Matcher is already in the frequency domain, so this + * expresses the size of the frame that the caller will be + * providing. + */ + int fftSize; + + /** The width of the search band (error margin) around the current + * match position, measured in seconds. Strictly speaking the + * width is measured backwards from the current point, since the + * algorithm has to work causally. + */ + double blockTime; + + /** RMS level below which frame is considered silent */ + double silenceThreshold; + + /** Frame-to-frame decay factor in calculating long-term average */ + double decay; + + /** Maximum number of frames sequentially processed by this + * matcher, without a frame of the other matcher being + * processed. + */ + int maxRunCount; + }; + +protected: + /** Points to the other performance with which this one is being + * compared. The data for the distance metric and the dynamic + * time warping is shared between the two matchers. In the + * original version, only one of the two performance matchers + * contained the distance metric. (See <code>first</code>) + */ + Matcher *otherMatcher; + + /** Indicates which performance is considered primary (the + * score). This is the performance shown on the vertical axis, + * and referred to as "this" in the codes for the direction of + * DTW steps. */ + bool firstPM; + + /** Configuration parameters */ + Parameters params; + + /** Width of the search band in FFT frames (see <code>blockTime</code>) */ + int blockSize; + + /** The number of frames of audio data which have been read. */ + int frameCount; + + /** Long term average frame energy (in frequency domain + * representation). */ + double ltAverage; + + /** The number of frames sequentially processed by this matcher, + * without a frame of the other matcher being processed. + */ + int runCount; + + /** A mapping function for mapping FFT bins to final frequency + * bins. The mapping is linear (1-1) until the resolution + * reaches 2 points per semitone, then logarithmic with a + * semitone resolution. e.g. for 44.1kHz sampling rate and + * fftSize of 2048 (46ms), bin spacing is 21.5Hz, which is mapped + * linearly for bins 0-34 (0 to 732Hz), and logarithmically for + * the remaining bins (midi notes 79 to 127, bins 35 to 83), + * where all energy above note 127 is mapped into the final + * bin. */ + vector<int> freqMap; + + /** The number of entries in <code>freqMap</code>. */ + int freqMapSize; + + /** The number of values in an externally-supplied feature vector, + * used in preference to freqMap/freqMapSize if constructed with + * the external feature version of the Matcher constructor. If + * this is zero, the internal feature extractor will be used as + * normal. + */ + int externalFeatureSize; + + /** The number of values in the feature vectors actually in + * use. This will be externalFeatureSize if greater than zero, or + * freqMapSize otherwise. + */ + int featureSize; + + /** The most recent frame; used for calculating the frame to frame + * spectral difference. These are therefore frequency warped but + * not yet normalised. */ + vector<double> prevFrame; + vector<double> newFrame; + + /** A block of previously seen frames are stored in this structure + * for calculation of the distance matrix as the new frames are + * read in. One can think of the structure of the array as a + * circular buffer of vectors. These are the frames with all + * applicable processing applied (e.g. spectral difference, + * normalisation), unlike prevFrame and newFrame. The total + * energy of frames[i] is stored in totalEnergies[i]. */ + vector<vector<double> > frames; + + /** The total energy of each frame in the frames block. */ + vector<double> totalEnergies; + + /** The best path cost matrix. */ + int **bestPathCost; + + /** The distance matrix. */ + unsigned char **distance; + + /** The bounds of each row of data in the distance and path cost matrices.*/ + int *first; + int *last; + + /** Height of each column in distance and bestPathCost matrices */ + int *distYSizes; + + /** Width of distance and bestPathCost matrices and first and last vectors */ + int distXSize; + + bool initialised; + + /** Disable or enable debugging output */ + static bool silent; + +public: + /** Constructor for Matcher. + * + * @param p The Matcher representing the performance with which + * this one is going to be matched. Some information is shared + * between the two matchers (currently one possesses the distance + * matrix and optimal path matrix). + */ + Matcher(Parameters parameters, Matcher *p); + + /** Constructor for Matcher using externally supplied features. + * A Matcher made using this constructor will not carry out its + * own feature extraction from frequency-domain audio data, but + * instead will accept arbitrary feature frames calculated by + * some external code. + * + * @param p The Matcher representing the performance with which + * this one is going to be matched. Some information is shared + * between the two matchers (currently one possesses the distance + * matrix and optimal path matrix). + * + * @param featureSize Number of values in each feature vector. + */ + Matcher(Parameters parameters, Matcher *p, int featureSize); + + ~Matcher(); + + /** For debugging, outputs information about the Matcher to + * standard error. + */ + void print(); + + /** Adds a link to the Matcher object representing the performance + * which is going to be matched to this one. + * + * @param p the Matcher representing the other performance + */ + void setOtherMatcher(Matcher *p) { + otherMatcher = p; + } // setOtherMatcher() + + int getFrameCount() { + return frameCount; + } + + /** + * Return the feature vector size that will be used for the given + * parameters. + */ + static int getFeatureSizeFor(Parameters params); + +protected: + template <typename T> + void initVector(vector<T> &vec, int sz, T dflt = 0) { + vec.clear(); + while ((int)vec.size() < sz) vec.push_back(dflt); + } + + template <typename T> + void initMatrix(vector<vector<T> > &mat, int hsz, int vsz, + T dflt = 0, int fillTo = -1) { + mat.clear(); + if (fillTo < 0) fillTo = hsz; + for (int i = 0; i < hsz; ++i) { + mat.push_back(vector<T>()); + if (i < fillTo) { + while ((int)mat[i].size() < vsz) { + mat[i].push_back(dflt); + } + } + } + } + + void init(); + + void makeFreqMap(); + + /** Creates a map of FFT frequency bins to comparison bins. Where + * the spacing of FFT bins is less than 0.5 semitones, the + * mapping is one to one. Where the spacing is greater than 0.5 + * semitones, the FFT energy is mapped into semitone-wide + * bins. No scaling is performed; that is the energy is summed + * into the comparison bins. See also consumeFrame() + */ + void makeStandardFrequencyMap(); + + void makeChromaFrequencyMap(); + + /** Processes a frame of audio data by first computing the STFT + * with a Hamming window, then mapping the frequency bins into a + * part-linear part-logarithmic array, then (optionally) + * computing the half-wave rectified spectral difference from the + * previous frame, then (optionally) normalising to a sum of 1, + * then calculating the distance to all frames stored in the + * otherMatcher and storing them in the distance matrix, and + * finally updating the optimal path matrix using the dynamic + * time warping algorithm. + * + * Return value is the frame (post-processed, with warping, + * rectification, and normalisation as appropriate). + * + * The Matcher must have been constructed using the constructor + * without an external featureSize parameter in order to use this + * function. (Otherwise it will be expecting you to call + * consumeFeatureVector.) + */ + std::vector<double> consumeFrame(double *reBuffer, double *imBuffer); + + /** Processes a feature vector frame (presumably calculated from + * audio data by some external code). As consumeFrame, except + * that it does not calculate a feature from audio data but + * instead uses the supplied feature directly. + * + * The Matcher must have been constructed using the constructor + * that accepts an external featureSize parameter in order to + * use this function. The supplied feature must be of the size + * that was passed to the constructor. + */ + void consumeFeatureVector(std::vector<double> feature); + + /** Retrieves values from the minimum cost matrix. + * + * @param i the frame number of this Matcher + * @param j the frame number of the other Matcher + * @return the cost of the minimum cost path to this location + */ + int getValue(int i, int j, bool firstAttempt); + + /** Stores entries in the distance matrix and the optimal path matrix. + * + * @param i the frame number of this Matcher + * @param j the frame number of the other Matcher + * @param dir the direction from which this position is reached with + * minimum cost + * @param value the cost of the minimum path except the current step + * @param dMN the distance cost between the two frames + */ + void setValue(int i, int j, int dir, int value, int dMN); + + vector<double> processFrameFromFreqData(double *, double *); + void calcAdvance(); + + DistanceMetric metric; + + friend class MatchFeeder; + friend class MatchFeatureFeeder; + friend class Finder; + +}; // class Matcher + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Path.cpp Thu Nov 13 09:50:14 2014 +0000 @@ -0,0 +1,76 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "Path.h" + +int +Path::smooth(std::vector<int> &x, std::vector<int> &y, int length) +{ + if (length == 0) + return 0; + while ((int)val.size() < length) { + val.push_back(0); + len.push_back(0); + } + int p = 0; + val[0] = len[0] = 0; + for (int i = 1; i < length; i++) { // H = 1; V = 2; D = 3 + int current = x[i] - x[i-1] + 2 * (y[i] - y[i-1]); + if (current == val[p]) { + len[p]++; + } else if ((current == 3) || (val[p] == 0)) { + val[++p] = current; + len[p] = 1; + } else if (val[p] + current == 3) { // 1 + 2 + if (--len[p] == 0) + p--; + if (val[p] == 3) + len[p]++; + else { + val[++p] = 3; + len[p] = 1; + } + } else { // val[p] == 3 && current != 3 + if ((val[p-1] == current) || + (val[p-1] == 0) || + (len[p] > MAX_RUN_LENGTH)) { + val[++p] = current; + len[p] = 1; + } else { + if (--len[p-1] == 0) { + val[p-1] = val[p]; + len[p-1] = len[p]; + p--; + if (val[p-1] == 3) { + len[p-1] += len[p]; + p--; + } + } + len[p]++; + } + } + } + int i = 1; + for (int pp = 1; pp <= p; pp++) { + int dx = val[pp] & 1; + int dy = val[pp] >> 1; + for (int j = len[pp]; j > 0; j--, i++) { + x[i] = x[i-1] + dx; + y[i] = y[i-1] + dy; + } + } + return i; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Path.h Thu Nov 13 09:50:14 2014 +0000 @@ -0,0 +1,46 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef _PATH_H_ +#define _PATH_H_ + +#include <vector> + +class Path +{ +public: + Path() { } + + /** Smooths an alignment path.<BR> + * Consider the path as a sequence of horizontal (H), vertical (V) and + * diagonal (D) steps. The smoothing consists of 2 rewrite rules:<BR> + * HnDmVn / Dm+n (where m is less than MAX_RUN_LENGTH)<BR> + * VnDmHn / Dm+n (where m is less than MAX_RUN_LENGTH)<BR> + * The new path is written over the old path. Note that the end points of + * each application of a rewrite rule do not change. + * @return the length of the new path + */ + int smooth(std::vector<int> &x, std::vector<int> &y, int length); + +protected: + static const int MAX_RUN_LENGTH = 50; + + std::vector<int> val; + std::vector<int> len; +}; + +#endif +