# HG changeset patch # User Chris Cannam # Date 1415960757 0 # Node ID 47f7649ab9d5388531f3c5214157c987dd2f5a3d # Parent 96ffe524b18ea59e4e718d3fb2194cf359b973b2# Parent a68204b9a529352a0042a9343f99b0d2b64c9ffc Merge from the default branch diff -r a68204b9a529 -r 47f7649ab9d5 Makefile.linux --- a/Makefile.linux Fri Nov 14 10:24:13 2014 +0000 +++ b/Makefile.linux Fri Nov 14 10:25:57 2014 +0000 @@ -1,5 +1,7 @@ CXXFLAGS += -fPIC -ffast-math -O3 -Wall -Werror +#CXXFLAGS += -fPIC -g -Wall -Werror + LDFLAGS += -shared -Wl,-Bstatic -lvamp-sdk -Wl,-Bdynamic -Wl,-Bsymbolic -Wl,-z,defs -lpthread -Wl,--version-script=vamp-plugin.map include Makefile.inc diff -r a68204b9a529 -r 47f7649ab9d5 src/FeatureExtractor.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/FeatureExtractor.cpp Fri Nov 14 10:25:57 2014 +0000 @@ -0,0 +1,156 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "FeatureExtractor.h" + +#include + +#include +#include +#include + +using namespace std; + +FeatureExtractor::FeatureExtractor(Parameters parameters) : + m_params(parameters), + m_ltAverage(0) +{ + if (m_params.useChromaFrequencyMap) { + m_featureSize = 13; + } else { + m_featureSize = 84; + } + + m_prevFrame = vector(m_featureSize, 0.0); + + makeFreqMap(); +} + +void +FeatureExtractor::makeFreqMap() +{ + m_freqMap = vector(m_params.fftSize / 2 + 1, 0); + + if (m_params.useChromaFrequencyMap) { +#ifdef DEBUG_MATCHER + cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl; +#endif + makeChromaFrequencyMap(); + } else { +#ifdef DEBUG_MATCHER + cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl; +#endif + makeStandardFrequencyMap(); + } +} + +void +FeatureExtractor::makeStandardFrequencyMap() +{ + double binWidth = m_params.sampleRate / m_params.fftSize; + int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); + int crossoverMidi = lrint(log(crossoverBin*binWidth/440.0)/ + log(2.0) * 12 + 69); + + // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; + + int i = 0; + while (i <= crossoverBin) { + m_freqMap[i] = i; + ++i; + } + + while (i <= m_params.fftSize/2) { + double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; + if (midi > 127) midi = 127; + int target = crossoverBin + lrint(midi) - crossoverMidi; + if (target >= m_featureSize) target = m_featureSize - 1; + m_freqMap[i++] = target; + } +} + +void +FeatureExtractor::makeChromaFrequencyMap() +{ + double binWidth = m_params.sampleRate / m_params.fftSize; + int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1)); + int i = 0; + while (i <= crossoverBin) { + m_freqMap[i++] = 0; + } + while (i <= m_params.fftSize/2) { + double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; + m_freqMap[i++] = (lrint(midi)) % 12 + 1; + } +} + +vector +FeatureExtractor::process(const vector &real, const vector &imag) +{ + vector frame(m_featureSize, 0.0); + + double rms = 0; + for (int i = 0; i <= m_params.fftSize/2; i++) { + double mag = real[i] * real[i] + imag[i] * imag[i]; + rms += mag; + frame[m_freqMap[i]] += mag; + } + rms = sqrt(rms / (m_params.fftSize/2)); + + vector feature(m_featureSize, 0.0); + + double totalEnergy = 0; + if (m_params.useSpectralDifference) { + for (int i = 0; i < m_featureSize; i++) { + totalEnergy += frame[i]; + if (frame[i] > m_prevFrame[i]) { + feature[i] = frame[i] - m_prevFrame[i]; + } else { + feature[i] = 0; + } + } + } else { + for (int i = 0; i < m_featureSize; i++) { + feature[i] = frame[i]; + totalEnergy += feature[i]; + } + } + + if (m_ltAverage == 0) { + m_ltAverage = totalEnergy; + } else { + double decay = m_params.decay; + m_ltAverage = m_ltAverage * decay + totalEnergy * (1.0 - decay); + } + + if (rms <= m_params.silenceThreshold) { + for (int i = 0; i < m_featureSize; i++) { + feature[i] = 0; + } + } else if (m_params.frameNorm == NormaliseFrameToSum1) { + for (int i = 0; i < m_featureSize; i++) { + feature[i] /= totalEnergy; + } + } else if (m_params.frameNorm == NormaliseFrameToLTAverage) { + for (int i = 0; i < m_featureSize; i++) { + feature[i] /= m_ltAverage; + } + } + + m_prevFrame = frame; + return feature; +} + diff -r a68204b9a529 -r 47f7649ab9d5 src/FeatureExtractor.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/FeatureExtractor.h Fri Nov 14 10:25:57 2014 +0000 @@ -0,0 +1,167 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Vamp feature extraction plugin using the MATCH audio alignment + algorithm. + + Centre for Digital Music, Queen Mary, University of London. + This file copyright 2007 Simon Dixon, Chris Cannam and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#ifndef FEATURE_EXTRACTOR_H +#define FEATURE_EXTRACTOR_H + +#include + +/** + * Convert frequency-domain audio frames into features suitable for + * MATCH alignment calculation. The default feature is a warping of + * the frequency data to map higher frequencies into a linear scale. A + * chroma mapping is also available. + * + * Note that FeatureExtractor maintains internal frame-to-frame state: + * use one FeatureExtractor per audio source, and construct a new one + * for each new source. + */ +class FeatureExtractor +{ +public: + enum FrameNormalisation { + + /** Do not normalise frames */ + NoFrameNormalisation, + + /** Normalise each frame to have a sum of 1 */ + NormaliseFrameToSum1, + + /** Normalise each frame by the long-term average of the + * summed energy */ + NormaliseFrameToLTAverage, + }; + + struct Parameters { + + Parameters(float rate_, int fftSize_) : + sampleRate(rate_), + frameNorm(NormaliseFrameToSum1), + useSpectralDifference(true), + useChromaFrequencyMap(false), + fftSize(fftSize_), + silenceThreshold(0.01), + decay(0.99) + {} + + /** Sample rate of audio */ + float sampleRate; + + /** Type of audio frame normalisation */ + FrameNormalisation frameNorm; + + /** Flag indicating whether or not the half-wave rectified + * spectral difference should be used in calculating the + * distance metric for pairs of audio frames, instead of the + * straight spectrum values. */ + bool useSpectralDifference; + + /** Flag indicating whether to use a chroma frequency map (12 + * bins) instead of the default warped spectrogram */ + bool useChromaFrequencyMap; + + /** Spacing of audio frames (determines the amount of overlap or + * skip between frames). This value is expressed in + * seconds. */ + double hopTime; + + /** Size of an FFT frame in samples. Note that the data passed + * in is already in the frequency domain, so this expresses + * the size of the frame that the caller will be providing. */ + int fftSize; + + /** RMS level below which frame is considered silent */ + double silenceThreshold; + + /** Frame-to-frame decay factor in calculating long-term average */ + double decay; + }; + + /** + * Construct a FeatureExtractor with the given parameters. + * + * Note that FeatureExtractor maintains internal frame-to-frame + * state: use one FeatureExtractor per audio source, and construct + * a new one for each new source. + */ + FeatureExtractor(Parameters params); + + /** + * Return the feature vector size that will be returned from process(). + */ + int getFeatureSize() const { return m_featureSize; } + + /** + * Process one frequency-domain audio frame (provided as real & + * imaginary components from the FFT output). Return a feature + * vector of size given by getFeatureSize(). Input vectors must + * have at least params.fftSize/2+1 elements each. + * + * Operates by mapping the frequency bins into a part-linear + * part-logarithmic array, then (optionally) computing the + * half-wave rectified spectral difference from the previous + * frame, then (optionally) normalising to a sum of 1. + * + * Return value is the frame (post-processed, with warping, + * rectification, and normalisation as appropriate). + */ + std::vector process(const std::vector &real, + const std::vector &imag); + +protected: + /** Make either standard or chroma map, depending on m_params */ + void makeFreqMap(); + + /** Creates a map of FFT frequency bins to comparison bins. Where + * the spacing of FFT bins is less than 0.5 semitones, the + * mapping is one to one. Where the spacing is greater than 0.5 + * semitones, the FFT energy is mapped into semitone-wide + * bins. No scaling is performed; that is the energy is summed + * into the comparison bins. */ + void makeStandardFrequencyMap(); + + /** Creates a map of FFT frequency bins to semitone chroma bins. */ + void makeChromaFrequencyMap(); + + /** Configuration parameters */ + Parameters m_params; + + /** Long term average frame energy (in frequency domain + * representation). */ + double m_ltAverage; + + /** A mapping function for mapping FFT bins to final frequency + * bins. The mapping is linear (1-1) until the resolution + * reaches 2 points per semitone, then logarithmic with a + * semitone resolution. e.g. for 44.1kHz sampling rate and + * fftSize of 2048 (46ms), bin spacing is 21.5Hz, which is mapped + * linearly for bins 0-34 (0 to 732Hz), and logarithmically for + * the remaining bins (midi notes 79 to 127, bins 35 to 83), + * where all energy above note 127 is mapped into the final + * bin. */ + std::vector m_freqMap; + + /** The size of a returned feature. */ + int m_featureSize; + + /** The most recent frame; used for calculating the frame to frame + * spectral difference. This is therefore frequency warped but + * not yet normalised. */ + std::vector m_prevFrame; +}; + +#endif + diff -r a68204b9a529 -r 47f7649ab9d5 src/Finder.cpp --- a/src/Finder.cpp Fri Nov 14 10:24:13 2014 +0000 +++ b/src/Finder.cpp Fri Nov 14 10:25:57 2014 +0000 @@ -23,7 +23,7 @@ Finder::Finder(Matcher *p1, Matcher *p2) { - if (!p1->firstPM) + if (!p1->m_firstPM) std::cerr << "Warning: wrong args in Finder()" << std::endl; pm1 = p1; pm2 = p2; @@ -44,23 +44,23 @@ { if (i1 >= 0) { index1 = i1; - index2 = i2 - pm1->first[i1]; + index2 = i2 - pm1->m_first[i1]; } - return (i1 >= 0) && (i2 >= pm1->first[i1]) && (i2 < pm1->last[i1]); + return (i1 >= 0) && (i2 >= pm1->m_first[i1]) && (i2 < pm1->m_last[i1]); } // find() void Finder::getColRange(int row, int *range) { - range[0] = pm1->first[row]; - range[1] = pm1->last[row]; + range[0] = pm1->m_first[row]; + range[1] = pm1->m_last[row]; } // getColRange() void Finder::getRowRange(int col, int *range) { - range[0] = pm2->first[col]; - range[1] = pm2->last[col]; + range[0] = pm2->m_first[col]; + range[1] = pm2->m_last[col]; } // getRowRange() int @@ -114,7 +114,7 @@ Finder::getDistance(int row, int col) { if (find(row, col)) { - return pm1->distance[row][col - pm1->first[row]]; + return pm1->m_distance[row][col - pm1->m_first[row]]; } std::cerr << "getDistance(" << row << "," << col << "): out of bounds" << std::endl; throw "getDistance index out of bounds"; @@ -124,7 +124,7 @@ Finder::setDistance(int row, int col, unsigned char b) { if (find(row, col)) { - pm1->distance[row][col - pm1->first[row]] = b; + pm1->m_distance[row][col - pm1->m_first[row]] = b; return; } std::cerr << "setDistance(" << row << "," << col << "," << b << "): out of bounds" << std::endl; @@ -135,7 +135,7 @@ Finder::getPathCost(int row, int col) { if (find(row, col)) // "1" avoids div by 0 below - return pm1->bestPathCost[row][col - pm1->first[row]]*100/ (1+row+col); + return pm1->m_bestPathCost[row][col - pm1->m_first[row]]*100/ (1+row+col); std::cerr << "getPathCost(" << row << "," << col << "): out of bounds" << std::endl; throw "getPathCost index out of bounds"; } // getPathCost() @@ -144,7 +144,7 @@ Finder::getRawPathCost(int row, int col) { if (find(row, col)) - return pm1->bestPathCost[row][col - pm1->first[row]]; + return pm1->m_bestPathCost[row][col - pm1->m_first[row]]; std::cerr << "getRawPathCost(" << row << "," << col << "): out of bounds" << std::endl; throw "getRawPathCost index out of bounds"; } // getRawPathCost() @@ -153,7 +153,7 @@ Finder::setPathCost(int row, int col, int i) { if (find(row, col)) { - pm1->bestPathCost[row][col - pm1->first[row]] = i; + pm1->m_bestPathCost[row][col - pm1->m_first[row]] = i; return; } std::cerr << "setPathCost(" << row << "," << col << "," << i << "): out of bounds" << std::endl; @@ -163,25 +163,25 @@ unsigned char Finder::getDistance() { - return pm1->distance[index1][index2]; + return pm1->m_distance[index1][index2]; } // getDistance()/0 void Finder::setDistance(int b) { - pm1->distance[index1][index2] = (unsigned char)b; + pm1->m_distance[index1][index2] = (unsigned char)b; } // setDistance() int Finder::getPathCost() { - return pm1->bestPathCost[index1][index2]; + return pm1->m_bestPathCost[index1][index2]; } // getPathCost() void Finder::setPathCost(int i) { - pm1->bestPathCost[index1][index2] = i; + pm1->m_bestPathCost[index1][index2] = i; } // setPathCost() void @@ -194,25 +194,25 @@ int thisRowStart, c; int prevRowStart = 0, prevRowStop = 0; for (int r = r1; r <= r2; r++) { - thisRowStart = pm1->first[r]; + thisRowStart = pm1->m_first[r]; if (thisRowStart < c1) thisRowStart = c1; for (c = thisRowStart; c <= c2; c++) { if (find(r,c)) { int i2 = index2; - int newCost = pm1->distance[r][i2]; + int newCost = pm1->m_distance[r][i2]; int dir = 0; if (r > r1) { // not first row int min = -1; if ((c > prevRowStart) && (c <= prevRowStop)) { // diagonal from (r-1,c-1) - min = pm1->bestPathCost[r-1][c-pm1->first[r-1]-1] + + min = pm1->m_bestPathCost[r-1][c-pm1->m_first[r-1]-1] + newCost * 2; dir = ADVANCE_BOTH; } if ((c >= prevRowStart) && (c < prevRowStop)) { // vertical from (r-1,c) - int cost = pm1->bestPathCost[r-1][c-pm1->first[r-1]] + + int cost = pm1->m_bestPathCost[r-1][c-pm1->m_first[r-1]] + newCost; if ((min == -1) || (cost < min)) { min = cost; @@ -221,22 +221,22 @@ } if (c > thisRowStart) { // horizontal from (r,c-1) - int cost =pm1->bestPathCost[r][i2-1]+newCost; + int cost =pm1->m_bestPathCost[r][i2-1]+newCost; if ((min == -1) || (cost < min)) { min = cost; dir = ADVANCE_OTHER; } } - pm1->bestPathCost[r][i2] = min; + pm1->m_bestPathCost[r][i2] = min; } else if (c > thisRowStart) { // first row // horizontal from (r,c-1) - pm1->bestPathCost[r][i2] = pm1->bestPathCost[r][i2-1] + + pm1->m_bestPathCost[r][i2] = pm1->m_bestPathCost[r][i2-1] + newCost; dir = ADVANCE_OTHER; } if ((r != r1) || (c != c1)) { - pm1->distance[r][i2] = (unsigned char) - ((pm1->distance[r][i2] & MASK) | dir); + pm1->m_distance[r][i2] = (unsigned char) + ((pm1->m_distance[r][i2] & MASK) | dir); } } else break; // end of row diff -r a68204b9a529 -r 47f7649ab9d5 src/MatchFeatureFeeder.cpp --- a/src/MatchFeatureFeeder.cpp Fri Nov 14 10:24:13 2014 +0000 +++ b/src/MatchFeatureFeeder.cpp Fri Nov 14 10:25:57 2014 +0000 @@ -32,6 +32,12 @@ void MatchFeatureFeeder::feed(vector f1, vector f2) { + // We maintain two FIFO queues of feature vectors, one per input + // stream. When the match-feeder function is entered, it knows + // that it has at least one feature in each queue. It loops, + // processing up to one feature per matcher, until a queue is + // empty. Then it returns, to be called again with more data. + q1.push(f1); q2.push(f2); @@ -43,17 +49,17 @@ void MatchFeatureFeeder::feedBlock() { - if (pm1->frameCount < pm1->blockSize) { // fill initial block + if (pm1->m_frameCount < pm1->m_blockSize) { // fill initial block feed1(); feed2(); } - else if (pm1->runCount >= pm1->params.maxRunCount) { // slope constraints + else if (pm1->m_runCount >= pm1->m_params.maxRunCount) { // slope constraints feed2(); - } else if (pm2->runCount >= pm2->params.maxRunCount) { + } else if (pm2->m_runCount >= pm2->m_params.maxRunCount) { feed1(); } else { switch (finder->getExpandDirection - (pm1->frameCount-1, pm2->frameCount-1)) { + (pm1->m_frameCount-1, pm2->m_frameCount-1)) { case ADVANCE_THIS: feed1(); break; diff -r a68204b9a529 -r 47f7649ab9d5 src/MatchFeeder.cpp --- a/src/MatchFeeder.cpp Fri Nov 14 10:24:13 2014 +0000 +++ b/src/MatchFeeder.cpp Fri Nov 14 10:25:57 2014 +0000 @@ -21,7 +21,7 @@ MatchFeeder::MatchFeeder(Matcher *m1, Matcher *m2) : pm1(m1), pm2(m2) { - fftSize = m1->params.fftSize; + fftSize = m1->m_params.fftSize; finder = new Finder(m1, m2); reBuffer = new double[fftSize/2+1]; imBuffer = new double[fftSize/2+1]; @@ -97,7 +97,7 @@ Features ff; vector f1, f2; - if (pm1->frameCount < pm1->blockSize) { // fill initial block + if (pm1->m_frameCount < pm1->m_blockSize) { // fill initial block // std::cerr << "feeding initial block" << std::endl; f1 = feed1(); f2 = feed2(); @@ -106,15 +106,15 @@ // feed2(); //!!! } else if (pm2->atEnd) // feed1(); - else if (pm1->runCount >= pm1->params.maxRunCount) { // slope constraints + else if (pm1->m_runCount >= pm1->m_params.maxRunCount) { // slope constraints // std::cerr << "pm1 too slopey" << std::endl; f2 = feed2(); - } else if (pm2->runCount >= pm2->params.maxRunCount) { + } else if (pm2->m_runCount >= pm2->m_params.maxRunCount) { // std::cerr << "pm2 too slopey" << std::endl; f1 = feed1(); } else { switch (finder->getExpandDirection - (pm1->frameCount-1, pm2->frameCount-1)) { + (pm1->m_frameCount-1, pm2->m_frameCount-1)) { case ADVANCE_THIS: // std::cerr << "finder says ADVANCE_THIS" << std::endl; f1 = feed1(); diff -r a68204b9a529 -r 47f7649ab9d5 src/MatchVampPlugin.cpp --- a/src/MatchVampPlugin.cpp Fri Nov 14 10:24:13 2014 +0000 +++ b/src/MatchVampPlugin.cpp Fri Nov 14 10:25:57 2014 +0000 @@ -57,7 +57,9 @@ m_locked(false), m_smooth(true), m_params(inputSampleRate, defaultStepTime, m_blockSize), - m_defaultParams(inputSampleRate, defaultStepTime, m_blockSize) + m_defaultParams(inputSampleRate, defaultStepTime, m_blockSize), + m_feParams(inputSampleRate, m_blockSize), + m_defaultFeParams(inputSampleRate, m_blockSize) { if (inputSampleRate < sampleRateMin) { std::cerr << "MatchVampPlugin::MatchVampPlugin: input sample rate " @@ -157,7 +159,7 @@ desc.description = "Type of normalisation to use for frequency-domain audio features"; desc.minValue = 0; desc.maxValue = 2; - desc.defaultValue = (int)m_defaultParams.frameNorm; + desc.defaultValue = (int)m_defaultFeParams.frameNorm; desc.isQuantized = true; desc.quantizeStep = 1; desc.valueNames.clear(); @@ -187,7 +189,7 @@ desc.description = "Whether to use half-wave rectified spectral difference instead of straight spectrum"; desc.minValue = 0; desc.maxValue = 1; - desc.defaultValue = m_defaultParams.useSpectralDifference ? 1 : 0; + desc.defaultValue = m_defaultFeParams.useSpectralDifference ? 1 : 0; desc.isQuantized = true; desc.quantizeStep = 1; list.push_back(desc); @@ -197,7 +199,7 @@ desc.description = "Whether to use a chroma frequency map instead of the default warped spectrogram"; desc.minValue = 0; desc.maxValue = 1; - desc.defaultValue = m_defaultParams.useChromaFrequencyMap ? 1 : 0; + desc.defaultValue = m_defaultFeParams.useChromaFrequencyMap ? 1 : 0; desc.isQuantized = true; desc.quantizeStep = 1; list.push_back(desc); @@ -243,13 +245,13 @@ if (name == "serialise") { return m_serialise ? 1.0 : 0.0; } else if (name == "framenorm") { - return (int)m_params.frameNorm; + return (int)m_feParams.frameNorm; } else if (name == "distnorm") { return (int)m_params.distanceNorm; } else if (name == "usespecdiff") { - return m_params.useSpectralDifference ? 1.0 : 0.0; + return m_feParams.useSpectralDifference ? 1.0 : 0.0; } else if (name == "usechroma") { - return m_params.useChromaFrequencyMap ? 1.0 : 0.0; + return m_feParams.useChromaFrequencyMap ? 1.0 : 0.0; } else if (name == "gradientlimit") { return m_params.maxRunCount; } else if (name == "zonewidth") { @@ -267,13 +269,13 @@ if (name == "serialise") { m_serialise = (value > 0.5); } else if (name == "framenorm") { - m_params.frameNorm = (Matcher::FrameNormalisation)(int(value + 0.1)); + m_feParams.frameNorm = (FeatureExtractor::FrameNormalisation)(int(value + 0.1)); } else if (name == "distnorm") { m_params.distanceNorm = (DistanceMetric::DistanceNormalisation)(int(value + 0.1)); } else if (name == "usespecdiff") { - m_params.useSpectralDifference = (value > 0.5); + m_feParams.useSpectralDifference = (value > 0.5); } else if (name == "usechroma") { - m_params.useChromaFrequencyMap = (value > 0.5); + m_feParams.useChromaFrequencyMap = (value > 0.5); } else if (name == "gradientlimit") { m_params.maxRunCount = int(value + 0.1); } else if (name == "zonewidth") { @@ -300,8 +302,9 @@ { m_params.hopTime = m_stepTime; m_params.fftSize = m_blockSize; - pm1 = new Matcher(m_params, 0); - pm2 = new Matcher(m_params, pm1); + m_feParams.fftSize = m_blockSize; + pm1 = new Matcher(m_params, m_feParams, 0); + pm2 = new Matcher(m_params, m_feParams, pm1); pm1->setOtherMatcher(pm2); feeder = new MatchFeeder(pm1, pm2); } @@ -420,12 +423,14 @@ m_abRatioOutNo = list.size(); list.push_back(desc); + int featureSize = FeatureExtractor(m_feParams).getFeatureSize(); + desc.identifier = "a_features"; desc.name = "A Features"; desc.description = "Spectral features extracted from performance A"; desc.unit = ""; desc.hasFixedBinCount = true; - desc.binCount = Matcher::getFeatureSizeFor(m_params); + desc.binCount = featureSize; desc.hasKnownExtents = false; desc.isQuantized = false; desc.sampleType = OutputDescriptor::FixedSampleRate; @@ -438,7 +443,7 @@ desc.description = "Spectral features extracted from performance B"; desc.unit = ""; desc.hasFixedBinCount = true; - desc.binCount = Matcher::getFeatureSizeFor(m_params); + desc.binCount = featureSize; desc.hasKnownExtents = false; desc.isQuantized = false; desc.sampleType = OutputDescriptor::FixedSampleRate; diff -r a68204b9a529 -r 47f7649ab9d5 src/MatchVampPlugin.h --- a/src/MatchVampPlugin.h Fri Nov 14 10:24:13 2014 +0000 +++ b/src/MatchVampPlugin.h Fri Nov 14 10:25:57 2014 +0000 @@ -26,6 +26,7 @@ #endif #include "Matcher.h" +#include "FeatureExtractor.h" class MatchFeeder; @@ -83,6 +84,9 @@ Matcher::Parameters m_params; Matcher::Parameters m_defaultParams; + FeatureExtractor::Parameters m_feParams; + FeatureExtractor::Parameters m_defaultFeParams; + mutable int m_pathOutNo; mutable int m_abOutNo; mutable int m_baOutNo; diff -r a68204b9a529 -r 47f7649ab9d5 src/Matcher.cpp --- a/src/Matcher.cpp Fri Nov 14 10:24:13 2014 +0000 +++ b/src/Matcher.cpp Fri Nov 14 10:25:57 2014 +0000 @@ -21,71 +21,56 @@ #include #include -bool Matcher::silent = true; - //#define DEBUG_MATCHER 1 -Matcher::Matcher(Parameters parameters, Matcher *p) : - params(parameters), - metric(parameters.distanceNorm) +Matcher::Matcher(Parameters parameters, + FeatureExtractor::Parameters feParams, + Matcher *p) : + m_params(parameters), + m_featureExtractor(feParams), + m_metric(parameters.distanceNorm) { #ifdef DEBUG_MATCHER - cerr << "Matcher::Matcher(" << params.sampleRate << ", " << p << ")" << endl; + cerr << "Matcher::Matcher(" << m_params.sampleRate << ", " << p << ")" << endl; #endif - otherMatcher = p; // the first matcher will need this to be set later - firstPM = (!p); - ltAverage = 0; - frameCount = 0; - runCount = 0; - freqMapSize = 0; - externalFeatureSize = 0; - featureSize = 0; - blockSize = 0; + m_otherMatcher = p; // the first matcher will need this to be set later + m_firstPM = (!p); + m_frameCount = 0; + m_runCount = 0; + m_featureSize = m_featureExtractor.getFeatureSize(); + m_blockSize = 0; - blockSize = lrint(params.blockTime / params.hopTime); + m_blockSize = lrint(m_params.blockTime / m_params.hopTime); #ifdef DEBUG_MATCHER - cerr << "Matcher: blockSize = " << blockSize << endl; + cerr << "Matcher: m_blockSize = " << m_blockSize << endl; #endif - distance = 0; - bestPathCost = 0; - distYSizes = 0; - distXSize = 0; - - initialised = false; + m_initialised = false; } -Matcher::Matcher(Parameters parameters, Matcher *p, int featureSize) : - params(parameters), - externalFeatureSize(featureSize), - metric(parameters.distanceNorm) +Matcher::Matcher(Parameters parameters, Matcher *p, int m_featureSize_) : + m_params(parameters), + m_featureSize(m_featureSize_), + m_featureExtractor(FeatureExtractor::Parameters(m_params.sampleRate, m_params.fftSize)), // unused default config + m_metric(parameters.distanceNorm) { #ifdef DEBUG_MATCHER - cerr << "Matcher::Matcher(" << params.sampleRate << ", " << p << ", " << featureSize << ")" << endl; + cerr << "Matcher::Matcher(" << m_params.sampleRate << ", " << p << ", " << m_featureSize << ")" << endl; #endif - otherMatcher = p; // the first matcher will need this to be set later - firstPM = (!p); - ltAverage = 0; - frameCount = 0; - runCount = 0; - freqMapSize = 0; - featureSize = 0; - blockSize = 0; + m_otherMatcher = p; // the first matcher will need this to be set later + m_firstPM = (!p); + m_frameCount = 0; + m_runCount = 0; + m_blockSize = 0; - blockSize = lrint(params.blockTime / params.hopTime); + m_blockSize = lrint(m_params.blockTime / m_params.hopTime); #ifdef DEBUG_MATCHER - cerr << "Matcher: blockSize = " << blockSize << endl; + cerr << "Matcher: m_blockSize = " << m_blockSize << endl; #endif - distance = 0; - bestPathCost = 0; - distYSizes = 0; - distXSize = 0; - - initialised = false; - + m_initialised = false; } Matcher::~Matcher() @@ -93,312 +78,118 @@ #ifdef DEBUG_MATCHER cerr << "Matcher(" << this << ")::~Matcher()" << endl; #endif - - if (initialised) { - - for (int i = 0; i < distXSize; ++i) { - if (distance[i]) { - free(distance[i]); - free(bestPathCost[i]); - } - } - free(distance); - free(bestPathCost); - - free(first); - free(last); - - free(distYSizes); - } } void Matcher::init() { - if (initialised) return; + if (m_initialised) return; - initialised = true; + m_frames = vector > + (m_blockSize, vector(m_featureSize, 0)); - if (externalFeatureSize == 0) { - freqMapSize = getFeatureSizeFor(params); - featureSize = freqMapSize; - makeFreqMap(); - } else { - featureSize = externalFeatureSize; - } + m_distXSize = m_blockSize * 2; + size(); - initVector(prevFrame, featureSize); - initVector(newFrame, featureSize); - initMatrix(frames, blockSize, featureSize); - initVector(totalEnergies, blockSize); - - int distSize = (params.maxRunCount + 1) * blockSize; - - distXSize = blockSize * 2; - - distance = (unsigned char **)malloc(distXSize * sizeof(unsigned char *)); - bestPathCost = (int **)malloc(distXSize * sizeof(int *)); - distYSizes = (int *)malloc(distXSize * sizeof(int)); - - for (int i = 0; i < blockSize; ++i) { - distance[i] = (unsigned char *)malloc(distSize * sizeof(unsigned char)); - bestPathCost[i] = (int *)malloc(distSize * sizeof(int)); - distYSizes[i] = distSize; - } - for (int i = blockSize; i < distXSize; ++i) { - distance[i] = 0; - } + m_frameCount = 0; + m_runCount = 0; - first = (int *)malloc(distXSize * sizeof(int)); - last = (int *)malloc(distXSize * sizeof(int)); - - frameCount = 0; - runCount = 0; - ltAverage = 0; - -} // init - -void -Matcher::makeFreqMap() -{ - initVector(freqMap, params.fftSize/2 + 1); - - if (params.useChromaFrequencyMap) { -#ifdef DEBUG_MATCHER - cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl; -#endif - makeChromaFrequencyMap(); - } else { -#ifdef DEBUG_MATCHER - cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl; -#endif - makeStandardFrequencyMap(); - } -} // makeFreqMap() - -int -Matcher::getFeatureSizeFor(Parameters params) -{ - if (params.useChromaFrequencyMap) { - return 13; - } else { - return 84; - } + m_initialised = true; } void -Matcher::makeStandardFrequencyMap() +Matcher::size() { - double binWidth = params.sampleRate / params.fftSize; - int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); - int crossoverMidi = lrint(log(crossoverBin*binWidth/440.0)/ - log(2.0) * 12 + 69); - // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; - int i = 0; - while (i <= crossoverBin) { - freqMap[i] = i; - ++i; - } - while (i <= params.fftSize/2) { - double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; - if (midi > 127) midi = 127; - int target = crossoverBin + lrint(midi) - crossoverMidi; - if (target >= freqMapSize) target = freqMapSize - 1; - freqMap[i++] = target; - } - - if (!silent) { - cerr << "Standard map size: " << freqMapSize - << "; Crossover at: " << crossoverBin << endl; - for (i = 0; i < params.fftSize / 2; i++) - cerr << "freqMap[" << i << "] = " << freqMap[i] << endl; - } -} // makeStandardFrequencyMap() - -void -Matcher::makeChromaFrequencyMap() -{ - double binWidth = params.sampleRate / params.fftSize; - int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1)); - // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; - int i = 0; - while (i <= crossoverBin) - freqMap[i++] = 0; - while (i <= params.fftSize/2) { - double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; - freqMap[i++] = (lrint(midi)) % 12 + 1; - } - if (!silent) { - cerr << "Chroma map size: " << freqMapSize - << "; Crossover at: " << crossoverBin << endl; - for (i = 0; i < params.fftSize / 2; i++) - cerr << "freqMap[" << i << "] = " << freqMap[i] << endl; - } -} // makeChromaFrequencyMap() + int distSize = (m_params.maxRunCount + 1) * m_blockSize; + m_bestPathCost.resize(m_distXSize, vector(distSize, 0)); + m_distance.resize(m_distXSize, vector(distSize, 0)); + m_distYSizes.resize(m_distXSize, distSize); + m_first.resize(m_distXSize, 0); + m_last.resize(m_distXSize, 0); +} vector Matcher::consumeFrame(double *reBuffer, double *imBuffer) { - if (!initialised) init(); + if (!m_initialised) init(); - vector processedFrame = - processFrameFromFreqData(reBuffer, imBuffer); - + vector real(reBuffer, reBuffer + m_params.fftSize/2 + 1); + vector imag(imBuffer, imBuffer + m_params.fftSize/2 + 1); + vector feature = m_featureExtractor.process(real, imag); + int frameIndex = m_frameCount % m_blockSize; + m_frames[frameIndex] = feature; calcAdvance(); - return processedFrame; + return feature; } void Matcher::consumeFeatureVector(std::vector feature) { - if (!initialised) init(); - int frameIndex = frameCount % blockSize; - frames[frameIndex] = feature; + if (!m_initialised) init(); + int frameIndex = m_frameCount % m_blockSize; + m_frames[frameIndex] = feature; calcAdvance(); } -vector -Matcher::processFrameFromFreqData(double *reBuffer, double *imBuffer) -{ - for (int i = 0; i < (int)newFrame.size(); ++i) { - newFrame[i] = 0; - } - double rms = 0; - for (int i = 0; i <= params.fftSize/2; i++) { - double mag = reBuffer[i] * reBuffer[i] + - imBuffer[i] * imBuffer[i]; - rms += mag; - newFrame[freqMap[i]] += mag; - } - rms = sqrt(rms / (params.fftSize/2)); - - int frameIndex = frameCount % blockSize; - - vector processedFrame(freqMapSize, 0.0); - - double totalEnergy = 0; - if (params.useSpectralDifference) { - for (int i = 0; i < freqMapSize; i++) { - totalEnergy += newFrame[i]; - if (newFrame[i] > prevFrame[i]) { - processedFrame[i] = newFrame[i] - prevFrame[i]; - } else { - processedFrame[i] = 0; - } - } - } else { - for (int i = 0; i < freqMapSize; i++) { - processedFrame[i] = newFrame[i]; - totalEnergy += processedFrame[i]; - } - } - totalEnergies[frameIndex] = totalEnergy; - - double decay = frameCount >= 200 ? 0.99: - (frameCount < 100? 0: (frameCount - 100) / 100.0); - - if (ltAverage == 0) - ltAverage = totalEnergy; - else - ltAverage = ltAverage * decay + totalEnergy * (1.0 - decay); - - if (rms <= params.silenceThreshold) - for (int i = 0; i < freqMapSize; i++) - processedFrame[i] = 0; - else if (params.frameNorm == NormaliseFrameToSum1) - for (int i = 0; i < freqMapSize; i++) - processedFrame[i] /= totalEnergy; - else if (params.frameNorm == NormaliseFrameToLTAverage) - for (int i = 0; i < freqMapSize; i++) - processedFrame[i] /= ltAverage; - - vector tmp = prevFrame; - prevFrame = newFrame; - newFrame = tmp; - - frames[frameIndex] = processedFrame; - - if ((frameCount % 100) == 0) { - if (!silent) { - cerr << "Progress:" << frameCount << " " << ltAverage << endl; - } - } - - return processedFrame; -} - void Matcher::calcAdvance() { - int frameIndex = frameCount % blockSize; + int frameIndex = m_frameCount % m_blockSize; - if (frameCount >= distXSize) { -// std::cerr << "Resizing " << distXSize << " -> " << distXSize * 2 << std::endl; - distXSize *= 2; - distance = (unsigned char **)realloc(distance, distXSize * sizeof(unsigned char *)); - bestPathCost = (int **)realloc(bestPathCost, distXSize * sizeof(int *)); - distYSizes = (int *)realloc(distYSizes, distXSize * sizeof(int)); - first = (int *)realloc(first, distXSize * sizeof(int)); - last = (int *)realloc(last, distXSize * sizeof(int)); - - for (int i = distXSize/2; i < distXSize; ++i) { - distance[i] = 0; - } + if (m_frameCount >= m_distXSize) { + m_distXSize *= 2; + size(); } - if (firstPM && (frameCount >= blockSize)) { + if (m_firstPM && (m_frameCount >= m_blockSize)) { - int len = last[frameCount - blockSize] - - first[frameCount - blockSize]; + int len = m_last[m_frameCount - m_blockSize] - + m_first[m_frameCount - m_blockSize]; - // We need to copy distance[frameCount-blockSize] to - // distance[frameCount], and then truncate - // distance[frameCount-blockSize] to its first len elements. + // We need to copy distance[m_frameCount-m_blockSize] to + // distance[m_frameCount], and then truncate + // distance[m_frameCount-m_blockSize] to its first len elements. // Same for bestPathCost. /* - std::cerr << "Matcher(" << this << "): moving " << distYSizes[frameCount - blockSize] << " from " << frameCount - blockSize << " to " - << frameCount << ", allocating " << len << " for " - << frameCount - blockSize << std::endl; + std::cerr << "Matcher(" << this << "): moving " << distYSizes[m_frameCount - m_blockSize] << " from " << m_frameCount - m_blockSize << " to " + << m_frameCount << ", allocating " << len << " for " + << m_frameCount - m_blockSize << std::endl; */ - distance[frameCount] = distance[frameCount - blockSize]; - - distance[frameCount - blockSize] = (unsigned char *) - malloc(len * sizeof(unsigned char)); + m_distance[m_frameCount] = m_distance[m_frameCount - m_blockSize]; + m_distance[m_frameCount - m_blockSize].resize(len, 0); for (int i = 0; i < len; ++i) { - distance[frameCount - blockSize][i] = - distance[frameCount][i]; + m_distance[m_frameCount - m_blockSize][i] = + m_distance[m_frameCount][i]; } - bestPathCost[frameCount] = bestPathCost[frameCount - blockSize]; - - bestPathCost[frameCount - blockSize] = (int *) - malloc(len * sizeof(int)); + m_bestPathCost[m_frameCount] = m_bestPathCost[m_frameCount - m_blockSize]; + m_bestPathCost[m_frameCount - m_blockSize].resize(len, 0); for (int i = 0; i < len; ++i) { - bestPathCost[frameCount - blockSize][i] = - bestPathCost[frameCount][i]; + m_bestPathCost[m_frameCount - m_blockSize][i] = + m_bestPathCost[m_frameCount][i]; } - distYSizes[frameCount] = distYSizes[frameCount - blockSize]; - distYSizes[frameCount - blockSize] = len; + m_distYSizes[m_frameCount] = m_distYSizes[m_frameCount - m_blockSize]; + m_distYSizes[m_frameCount - m_blockSize] = len; } - int stop = otherMatcher->frameCount; - int index = stop - blockSize; + int stop = m_otherMatcher->m_frameCount; + int index = stop - m_blockSize; if (index < 0) index = 0; - first[frameCount] = index; - last[frameCount] = stop; + m_first[m_frameCount] = index; + m_last[m_frameCount] = stop; bool overflow = false; int mn= -1; int mx= -1; for ( ; index < stop; index++) { - int dMN = metric.calcDistanceScaled - (frames[frameIndex], - otherMatcher->frames[index % blockSize], - params.distanceScale); + int dMN = m_metric.calcDistanceScaled + (m_frames[frameIndex], + m_otherMatcher->m_frames[index % m_blockSize], + m_params.distanceScale); if (mx<0) mx = mn = dMN; @@ -411,99 +202,93 @@ dMN = 255; } - if ((frameCount == 0) && (index == 0)) // first element + if ((m_frameCount == 0) && (index == 0)) // first element setValue(0, 0, 0, 0, dMN); - else if (frameCount == 0) // first row + else if (m_frameCount == 0) // first row setValue(0, index, ADVANCE_OTHER, getValue(0, index-1, true), dMN); else if (index == 0) // first column - setValue(frameCount, index, ADVANCE_THIS, - getValue(frameCount - 1, 0, true), dMN); - else if (index == otherMatcher->frameCount - blockSize) { + setValue(m_frameCount, index, ADVANCE_THIS, + getValue(m_frameCount - 1, 0, true), dMN); + else if (index == m_otherMatcher->m_frameCount - m_blockSize) { // missing value(s) due to cutoff // - no previous value in current row (resp. column) // - no diagonal value if prev. dir. == curr. dirn - int min2 = getValue(frameCount - 1, index, true); - // if ((firstPM && (first[frameCount - 1] == index)) || - // (!firstPM && (last[index-1] < frameCount))) - if (first[frameCount - 1] == index) - setValue(frameCount, index, ADVANCE_THIS, min2, dMN); + int min2 = getValue(m_frameCount - 1, index, true); + // if ((m_firstPM && (first[m_frameCount - 1] == index)) || + // (!m_firstPM && (m_last[index-1] < m_frameCount))) + if (m_first[m_frameCount - 1] == index) + setValue(m_frameCount, index, ADVANCE_THIS, min2, dMN); else { - int min1 = getValue(frameCount - 1, index - 1, true); + int min1 = getValue(m_frameCount - 1, index - 1, true); if (min1 + dMN <= min2) - setValue(frameCount, index, ADVANCE_BOTH, min1,dMN); + setValue(m_frameCount, index, ADVANCE_BOTH, min1,dMN); else - setValue(frameCount, index, ADVANCE_THIS, min2,dMN); + setValue(m_frameCount, index, ADVANCE_THIS, min2,dMN); } } else { - int min1 = getValue(frameCount, index-1, true); - int min2 = getValue(frameCount - 1, index, true); - int min3 = getValue(frameCount - 1, index-1, true); + int min1 = getValue(m_frameCount, index-1, true); + int min2 = getValue(m_frameCount - 1, index, true); + int min3 = getValue(m_frameCount - 1, index-1, true); if (min1 <= min2) { if (min3 + dMN <= min1) - setValue(frameCount, index, ADVANCE_BOTH, min3,dMN); + setValue(m_frameCount, index, ADVANCE_BOTH, min3,dMN); else - setValue(frameCount, index, ADVANCE_OTHER,min1,dMN); + setValue(m_frameCount, index, ADVANCE_OTHER,min1,dMN); } else { if (min3 + dMN <= min2) - setValue(frameCount, index, ADVANCE_BOTH, min3,dMN); + setValue(m_frameCount, index, ADVANCE_BOTH, min3,dMN); else - setValue(frameCount, index, ADVANCE_THIS, min2,dMN); + setValue(m_frameCount, index, ADVANCE_THIS, min2,dMN); } } - otherMatcher->last[index]++; + m_otherMatcher->m_last[index]++; } // loop for row (resp. column) - frameCount++; - runCount++; + m_frameCount++; + m_runCount++; - otherMatcher->runCount = 0; + m_otherMatcher->m_runCount = 0; - if (overflow && !silent) + if (overflow) { cerr << "WARNING: overflow in distance metric: " - << "frame " << frameCount << ", val = " << mx << endl; - - if (!silent) - std::cerr << "Frame " << frameCount << ", d = " << (mx-mn) << std::endl; + << "frame " << m_frameCount << ", val = " << mx << endl; + } } int Matcher::getValue(int i, int j, bool firstAttempt) { - if (firstPM) - return bestPathCost[i][j - first[i]]; + if (m_firstPM) + return m_bestPathCost[i][j - m_first[i]]; else - return otherMatcher->bestPathCost[j][i - otherMatcher->first[j]]; + return m_otherMatcher->m_bestPathCost[j][i - m_otherMatcher->m_first[j]]; } // getValue() void Matcher::setValue(int i, int j, int dir, int value, int dMN) { - if (firstPM) { - distance[i][j - first[i]] = (unsigned char)((dMN & MASK) | dir); - bestPathCost[i][j - first[i]] = + if (m_firstPM) { + m_distance[i][j - m_first[i]] = (unsigned char)((dMN & MASK) | dir); + m_bestPathCost[i][j - m_first[i]] = (value + (dir==ADVANCE_BOTH? dMN*2: dMN)); } else { if (dir == ADVANCE_THIS) dir = ADVANCE_OTHER; else if (dir == ADVANCE_OTHER) dir = ADVANCE_THIS; - int idx = i - otherMatcher->first[j]; - if (idx == (int)otherMatcher->distYSizes[j]) { + int idx = i - m_otherMatcher->m_first[j]; + if (idx == (int)m_otherMatcher->m_distYSizes[j]) { // This should never happen, but if we allow arbitrary // pauses in either direction, and arbitrary lengths at // end, it is better than a segmentation fault. std::cerr << "Emergency resize: " << idx << " -> " << idx * 2 << std::endl; - otherMatcher->distYSizes[j] = idx * 2; - otherMatcher->bestPathCost[j] = - (int *)realloc(otherMatcher->bestPathCost[j], - idx * 2 * sizeof(int)); - otherMatcher->distance[j] = - (unsigned char *)realloc(otherMatcher->distance[j], - idx * 2 * sizeof(unsigned char)); + m_otherMatcher->m_distYSizes[j] = idx * 2; + m_otherMatcher->m_bestPathCost[j].resize(idx * 2, 0); + m_otherMatcher->m_distance[j].resize(idx * 2, 0); } - otherMatcher->distance[j][idx] = (unsigned char)((dMN & MASK) | dir); - otherMatcher->bestPathCost[j][idx] = + m_otherMatcher->m_distance[j][idx] = (unsigned char)((dMN & MASK) | dir); + m_otherMatcher->m_bestPathCost[j][idx] = (value + (dir==ADVANCE_BOTH? dMN*2: dMN)); } } // setValue() diff -r a68204b9a529 -r 47f7649ab9d5 src/Matcher.h --- a/src/Matcher.h Fri Nov 14 10:24:13 2014 +0000 +++ b/src/Matcher.h Fri Nov 14 10:25:57 2014 +0000 @@ -28,6 +28,7 @@ #define MASK 0xfc #include "DistanceMetric.h" +#include "FeatureExtractor.h" using std::vector; using std::string; @@ -43,42 +44,21 @@ class Matcher { public: - enum FrameNormalisation { - - /** Do not normalise audio frames */ - NoFrameNormalisation, - - /** Normalise each frame of audio to have a sum of 1 */ - NormaliseFrameToSum1, - - /** Normalise each frame of audio by the long-term average - * of the summed energy */ - NormaliseFrameToLTAverage, - }; - struct Parameters { Parameters(float rate_, double hopTime_, int fftSize_) : sampleRate(rate_), - frameNorm(NormaliseFrameToSum1), distanceNorm(DistanceMetric::NormaliseDistanceToLogSum), distanceScale(90.0), - useSpectralDifference(true), - useChromaFrequencyMap(false), hopTime(hopTime_), fftSize(fftSize_), blockTime(10.0), - silenceThreshold(0.01), - decay(0.99), maxRunCount(3) {} /** Sample rate of audio */ float sampleRate; - /** Type of audio frame normalisation */ - FrameNormalisation frameNorm; - /** Type of distance metric normalisation */ DistanceMetric::DistanceNormalisation distanceNorm; @@ -88,40 +68,23 @@ */ double distanceScale; - /** Flag indicating whether or not the half-wave rectified - * spectral difference should be used in calculating the - * distance metric for pairs of audio frames, instead of the - * straight spectrum values. */ - bool useSpectralDifference; - - /** Flag indicating whether to use a chroma frequency map (12 - * bins) instead of the default warped spectrogram */ - bool useChromaFrequencyMap; - /** Spacing of audio frames (determines the amount of overlap or * skip between frames). This value is expressed in * seconds. */ double hopTime; - + /** Size of an FFT frame in samples. Note that the data passed * in to Matcher is already in the frequency domain, so this * expresses the size of the frame that the caller will be - * providing. - */ + * providing. */ int fftSize; - + /** The width of the search band (error margin) around the current * match position, measured in seconds. Strictly speaking the * width is measured backwards from the current point, since the * algorithm has to work causally. */ double blockTime; - - /** RMS level below which frame is considered silent */ - double silenceThreshold; - - /** Frame-to-frame decay factor in calculating long-term average */ - double decay; /** Maximum number of frames sequentially processed by this * matcher, without a frame of the other matcher being @@ -130,107 +93,6 @@ int maxRunCount; }; -protected: - /** Points to the other performance with which this one is being - * compared. The data for the distance metric and the dynamic - * time warping is shared between the two matchers. In the - * original version, only one of the two performance matchers - * contained the distance metric. (See first) - */ - Matcher *otherMatcher; - - /** Indicates which performance is considered primary (the - * score). This is the performance shown on the vertical axis, - * and referred to as "this" in the codes for the direction of - * DTW steps. */ - bool firstPM; - - /** Configuration parameters */ - Parameters params; - - /** Width of the search band in FFT frames (see blockTime) */ - int blockSize; - - /** The number of frames of audio data which have been read. */ - int frameCount; - - /** Long term average frame energy (in frequency domain - * representation). */ - double ltAverage; - - /** The number of frames sequentially processed by this matcher, - * without a frame of the other matcher being processed. - */ - int runCount; - - /** A mapping function for mapping FFT bins to final frequency - * bins. The mapping is linear (1-1) until the resolution - * reaches 2 points per semitone, then logarithmic with a - * semitone resolution. e.g. for 44.1kHz sampling rate and - * fftSize of 2048 (46ms), bin spacing is 21.5Hz, which is mapped - * linearly for bins 0-34 (0 to 732Hz), and logarithmically for - * the remaining bins (midi notes 79 to 127, bins 35 to 83), - * where all energy above note 127 is mapped into the final - * bin. */ - vector freqMap; - - /** The number of entries in freqMap. */ - int freqMapSize; - - /** The number of values in an externally-supplied feature vector, - * used in preference to freqMap/freqMapSize if constructed with - * the external feature version of the Matcher constructor. If - * this is zero, the internal feature extractor will be used as - * normal. - */ - int externalFeatureSize; - - /** The number of values in the feature vectors actually in - * use. This will be externalFeatureSize if greater than zero, or - * freqMapSize otherwise. - */ - int featureSize; - - /** The most recent frame; used for calculating the frame to frame - * spectral difference. These are therefore frequency warped but - * not yet normalised. */ - vector prevFrame; - vector newFrame; - - /** A block of previously seen frames are stored in this structure - * for calculation of the distance matrix as the new frames are - * read in. One can think of the structure of the array as a - * circular buffer of vectors. These are the frames with all - * applicable processing applied (e.g. spectral difference, - * normalisation), unlike prevFrame and newFrame. The total - * energy of frames[i] is stored in totalEnergies[i]. */ - vector > frames; - - /** The total energy of each frame in the frames block. */ - vector totalEnergies; - - /** The best path cost matrix. */ - int **bestPathCost; - - /** The distance matrix. */ - unsigned char **distance; - - /** The bounds of each row of data in the distance and path cost matrices.*/ - int *first; - int *last; - - /** Height of each column in distance and bestPathCost matrices */ - int *distYSizes; - - /** Width of distance and bestPathCost matrices and first and last vectors */ - int distXSize; - - bool initialised; - - /** Disable or enable debugging output */ - static bool silent; - -public: /** Constructor for Matcher. * * @param p The Matcher representing the performance with which @@ -238,7 +100,9 @@ * between the two matchers (currently one possesses the distance * matrix and optimal path matrix). */ - Matcher(Parameters parameters, Matcher *p); + Matcher(Parameters parameters, + FeatureExtractor::Parameters featureParams, + Matcher *p); /** Constructor for Matcher using externally supplied features. * A Matcher made using this constructor will not carry out its @@ -257,76 +121,31 @@ ~Matcher(); - /** For debugging, outputs information about the Matcher to - * standard error. - */ - void print(); - /** Adds a link to the Matcher object representing the performance * which is going to be matched to this one. * * @param p the Matcher representing the other performance */ void setOtherMatcher(Matcher *p) { - otherMatcher = p; + m_otherMatcher = p; } // setOtherMatcher() int getFrameCount() { - return frameCount; + return m_frameCount; } - /** - * Return the feature vector size that will be used for the given - * parameters. - */ - static int getFeatureSizeFor(Parameters params); - protected: - template - void initVector(vector &vec, int sz, T dflt = 0) { - vec.clear(); - while ((int)vec.size() < sz) vec.push_back(dflt); - } - - template - void initMatrix(vector > &mat, int hsz, int vsz, - T dflt = 0, int fillTo = -1) { - mat.clear(); - if (fillTo < 0) fillTo = hsz; - for (int i = 0; i < hsz; ++i) { - mat.push_back(vector()); - if (i < fillTo) { - while ((int)mat[i].size() < vsz) { - mat[i].push_back(dflt); - } - } - } - } - + /** Create internal structures and reset. */ void init(); - void makeFreqMap(); + /** The distXSize value has changed: resize internal buffers. */ + void size(); - /** Creates a map of FFT frequency bins to comparison bins. Where - * the spacing of FFT bins is less than 0.5 semitones, the - * mapping is one to one. Where the spacing is greater than 0.5 - * semitones, the FFT energy is mapped into semitone-wide - * bins. No scaling is performed; that is the energy is summed - * into the comparison bins. See also consumeFrame() - */ - void makeStandardFrequencyMap(); - - void makeChromaFrequencyMap(); - - /** Processes a frame of audio data by first computing the STFT - * with a Hamming window, then mapping the frequency bins into a - * part-linear part-logarithmic array, then (optionally) - * computing the half-wave rectified spectral difference from the - * previous frame, then (optionally) normalising to a sum of 1, - * then calculating the distance to all frames stored in the - * otherMatcher and storing them in the distance matrix, and - * finally updating the optimal path matrix using the dynamic - * time warping algorithm. + /** Process a frequency-domain frame of audio data using the + * built-in FeatureExtractor, then calculating the distance to + * all frames stored in the otherMatcher and storing them in the + * distance matrix, and finally updating the optimal path matrix + * using the dynamic time warping algorithm. * * Return value is the frame (post-processed, with warping, * rectification, and normalisation as appropriate). @@ -369,10 +188,65 @@ */ void setValue(int i, int j, int dir, int value, int dMN); - vector processFrameFromFreqData(double *, double *); void calcAdvance(); - DistanceMetric metric; + /** Points to the other performance with which this one is being + * compared. The data for the distance metric and the dynamic + * time warping is shared between the two matchers. In the + * original version, only one of the two performance matchers + * contained the distance metric. (See first) + */ + Matcher *m_otherMatcher; + + /** Indicates which performance is considered primary (the + * score). This is the performance shown on the vertical axis, + * and referred to as "this" in the codes for the direction of + * DTW steps. */ + bool m_firstPM; + + /** Configuration parameters */ + Parameters m_params; + + /** Width of the search band in FFT frames (see blockTime) */ + int m_blockSize; + + /** The number of frames of audio data which have been read. */ + int m_frameCount; + + /** The number of frames sequentially processed by this matcher, + * without a frame of the other matcher being processed. + */ + int m_runCount; + + /** The number of values in a feature vector. */ + int m_featureSize; + + /** A block of previously seen feature frames is stored in this + * structure for calculation of the distance matrix as the new + * frames are received. One can think of the structure of the + * array as a circular buffer of vectors. */ + vector > m_frames; + + /** The best path cost matrix. */ + vector > m_bestPathCost; + + /** The distance matrix. */ + vector > m_distance; + + /** The bounds of each row of data in the distance and path cost matrices.*/ + vector m_first; + vector m_last; + + /** Height of each column in distance and bestPathCost matrices */ + vector m_distYSizes; + + /** Width of distance and bestPathCost matrices and first and last vectors */ + int m_distXSize; + + bool m_initialised; + + FeatureExtractor m_featureExtractor; + DistanceMetric m_metric; friend class MatchFeeder; friend class MatchFeatureFeeder;