# HG changeset patch # User Chris Cannam # Date 1415883054 0 # Node ID 8cce4e13ede3260acceb285b4c112b860dd23683 # Parent 91410483228b99e6c1be5c687b86d5278f41aaa4 Make use of FeatureExtractor in Matcher diff -r 91410483228b -r 8cce4e13ede3 src/FeatureExtractor.cpp --- a/src/FeatureExtractor.cpp Thu Nov 13 12:03:52 2014 +0000 +++ b/src/FeatureExtractor.cpp Thu Nov 13 12:50:54 2014 +0000 @@ -79,6 +79,8 @@ m_freqMap[i++] = crossoverBin + lrint(midi) - crossoverMidi; } + cerr << "rate = " << m_params.sampleRate << ", m_featureSize = " << m_featureSize << ", m_freqMap[" << i << "-1] = " << m_freqMap[i-1] << endl; + assert(m_featureSize == m_freqMap[i-1] + 1); } diff -r 91410483228b -r 8cce4e13ede3 src/FeatureExtractor.h --- a/src/FeatureExtractor.h Thu Nov 13 12:03:52 2014 +0000 +++ b/src/FeatureExtractor.h Thu Nov 13 12:50:54 2014 +0000 @@ -107,7 +107,8 @@ /** * Process one frequency-domain audio frame (provided as real & * imaginary components from the FFT output). Return a feature - * vector of size given by getFeatureSize(). + * vector of size given by getFeatureSize(). Input vectors must + * have at least params.fftSize/2+1 elements each. * * Operates by mapping the frequency bins into a part-linear * part-logarithmic array, then (optionally) computing the diff -r 91410483228b -r 8cce4e13ede3 src/MatchVampPlugin.cpp --- a/src/MatchVampPlugin.cpp Thu Nov 13 12:03:52 2014 +0000 +++ b/src/MatchVampPlugin.cpp Thu Nov 13 12:50:54 2014 +0000 @@ -57,7 +57,9 @@ m_locked(false), m_smooth(true), m_params(inputSampleRate, defaultStepTime, m_blockSize), - m_defaultParams(inputSampleRate, defaultStepTime, m_blockSize) + m_defaultParams(inputSampleRate, defaultStepTime, m_blockSize), + m_feParams(inputSampleRate, m_blockSize), + m_defaultFeParams(inputSampleRate, m_blockSize) { if (inputSampleRate < sampleRateMin) { std::cerr << "MatchVampPlugin::MatchVampPlugin: input sample rate " @@ -157,7 +159,7 @@ desc.description = "Type of normalisation to use for frequency-domain audio features"; desc.minValue = 0; desc.maxValue = 2; - desc.defaultValue = (int)m_defaultParams.frameNorm; + desc.defaultValue = (int)m_defaultFeParams.frameNorm; desc.isQuantized = true; desc.quantizeStep = 1; desc.valueNames.clear(); @@ -187,7 +189,7 @@ desc.description = "Whether to use half-wave rectified spectral difference instead of straight spectrum"; desc.minValue = 0; desc.maxValue = 1; - desc.defaultValue = m_defaultParams.useSpectralDifference ? 1 : 0; + desc.defaultValue = m_defaultFeParams.useSpectralDifference ? 1 : 0; desc.isQuantized = true; desc.quantizeStep = 1; list.push_back(desc); @@ -197,7 +199,7 @@ desc.description = "Whether to use a chroma frequency map instead of the default warped spectrogram"; desc.minValue = 0; desc.maxValue = 1; - desc.defaultValue = m_defaultParams.useChromaFrequencyMap ? 1 : 0; + desc.defaultValue = m_defaultFeParams.useChromaFrequencyMap ? 1 : 0; desc.isQuantized = true; desc.quantizeStep = 1; list.push_back(desc); @@ -243,13 +245,13 @@ if (name == "serialise") { return m_serialise ? 1.0 : 0.0; } else if (name == "framenorm") { - return (int)m_params.frameNorm; + return (int)m_feParams.frameNorm; } else if (name == "distnorm") { return (int)m_params.distanceNorm; } else if (name == "usespecdiff") { - return m_params.useSpectralDifference ? 1.0 : 0.0; + return m_feParams.useSpectralDifference ? 1.0 : 0.0; } else if (name == "usechroma") { - return m_params.useChromaFrequencyMap ? 1.0 : 0.0; + return m_feParams.useChromaFrequencyMap ? 1.0 : 0.0; } else if (name == "gradientlimit") { return m_params.maxRunCount; } else if (name == "zonewidth") { @@ -267,13 +269,13 @@ if (name == "serialise") { m_serialise = (value > 0.5); } else if (name == "framenorm") { - m_params.frameNorm = (Matcher::FrameNormalisation)(int(value + 0.1)); + m_feParams.frameNorm = (FeatureExtractor::FrameNormalisation)(int(value + 0.1)); } else if (name == "distnorm") { m_params.distanceNorm = (DistanceMetric::DistanceNormalisation)(int(value + 0.1)); } else if (name == "usespecdiff") { - m_params.useSpectralDifference = (value > 0.5); + m_feParams.useSpectralDifference = (value > 0.5); } else if (name == "usechroma") { - m_params.useChromaFrequencyMap = (value > 0.5); + m_feParams.useChromaFrequencyMap = (value > 0.5); } else if (name == "gradientlimit") { m_params.maxRunCount = int(value + 0.1); } else if (name == "zonewidth") { @@ -300,8 +302,9 @@ { m_params.hopTime = m_stepTime; m_params.fftSize = m_blockSize; - pm1 = new Matcher(m_params, 0); - pm2 = new Matcher(m_params, pm1); + m_feParams.fftSize = m_blockSize; + pm1 = new Matcher(m_params, m_feParams, 0); + pm2 = new Matcher(m_params, m_feParams, pm1); pm1->setOtherMatcher(pm2); feeder = new MatchFeeder(pm1, pm2); } @@ -420,12 +423,14 @@ m_abRatioOutNo = list.size(); list.push_back(desc); + int featureSize = FeatureExtractor(m_feParams).getFeatureSize(); + desc.identifier = "a_features"; desc.name = "A Features"; desc.description = "Spectral features extracted from performance A"; desc.unit = ""; desc.hasFixedBinCount = true; - desc.binCount = Matcher::getFeatureSizeFor(m_params); + desc.binCount = featureSize; desc.hasKnownExtents = false; desc.isQuantized = false; desc.sampleType = OutputDescriptor::FixedSampleRate; @@ -438,7 +443,7 @@ desc.description = "Spectral features extracted from performance B"; desc.unit = ""; desc.hasFixedBinCount = true; - desc.binCount = Matcher::getFeatureSizeFor(m_params); + desc.binCount = featureSize; desc.hasKnownExtents = false; desc.isQuantized = false; desc.sampleType = OutputDescriptor::FixedSampleRate; diff -r 91410483228b -r 8cce4e13ede3 src/MatchVampPlugin.h --- a/src/MatchVampPlugin.h Thu Nov 13 12:03:52 2014 +0000 +++ b/src/MatchVampPlugin.h Thu Nov 13 12:50:54 2014 +0000 @@ -26,6 +26,7 @@ #endif #include "Matcher.h" +#include "FeatureExtractor.h" class MatchFeeder; @@ -83,6 +84,9 @@ Matcher::Parameters m_params; Matcher::Parameters m_defaultParams; + FeatureExtractor::Parameters m_feParams; + FeatureExtractor::Parameters m_defaultFeParams; + mutable int m_pathOutNo; mutable int m_abOutNo; mutable int m_baOutNo; diff -r 91410483228b -r 8cce4e13ede3 src/Matcher.cpp --- a/src/Matcher.cpp Thu Nov 13 12:03:52 2014 +0000 +++ b/src/Matcher.cpp Thu Nov 13 12:50:54 2014 +0000 @@ -25,8 +25,11 @@ //#define DEBUG_MATCHER 1 -Matcher::Matcher(Parameters parameters, Matcher *p) : +Matcher::Matcher(Parameters parameters, + FeatureExtractor::Parameters feParams, + Matcher *p) : params(parameters), + featureExtractor(feParams), metric(parameters.distanceNorm) { #ifdef DEBUG_MATCHER @@ -35,12 +38,9 @@ otherMatcher = p; // the first matcher will need this to be set later firstPM = (!p); - ltAverage = 0; frameCount = 0; runCount = 0; - freqMapSize = 0; - externalFeatureSize = 0; - featureSize = 0; + featureSize = featureExtractor.getFeatureSize(); blockSize = 0; blockSize = lrint(params.blockTime / params.hopTime); @@ -48,17 +48,13 @@ cerr << "Matcher: blockSize = " << blockSize << endl; #endif - distance = 0; - bestPathCost = 0; - distYSizes = 0; - distXSize = 0; - initialised = false; } -Matcher::Matcher(Parameters parameters, Matcher *p, int featureSize) : +Matcher::Matcher(Parameters parameters, Matcher *p, int featureSize_) : params(parameters), - externalFeatureSize(featureSize), + featureSize(featureSize_), + featureExtractor(FeatureExtractor::Parameters(params.sampleRate, params.fftSize)), // unused default config metric(parameters.distanceNorm) { #ifdef DEBUG_MATCHER @@ -67,11 +63,8 @@ otherMatcher = p; // the first matcher will need this to be set later firstPM = (!p); - ltAverage = 0; frameCount = 0; runCount = 0; - freqMapSize = 0; - featureSize = 0; blockSize = 0; blockSize = lrint(params.blockTime / params.hopTime); @@ -79,13 +72,7 @@ cerr << "Matcher: blockSize = " << blockSize << endl; #endif - distance = 0; - bestPathCost = 0; - distYSizes = 0; - distXSize = 0; - initialised = false; - } Matcher::~Matcher() @@ -93,23 +80,6 @@ #ifdef DEBUG_MATCHER cerr << "Matcher(" << this << ")::~Matcher()" << endl; #endif - - if (initialised) { - - for (int i = 0; i < distXSize; ++i) { - if (distance[i]) { - free(distance[i]); - free(bestPathCost[i]); - } - } - free(distance); - free(bestPathCost); - - free(first); - free(last); - - free(distYSizes); - } } void @@ -117,134 +87,43 @@ { if (initialised) return; - initialised = true; - - if (externalFeatureSize == 0) { - freqMapSize = getFeatureSizeFor(params); - featureSize = freqMapSize; - makeFreqMap(); - } else { - featureSize = externalFeatureSize; - } - - initVector(prevFrame, featureSize); - initVector(newFrame, featureSize); - initMatrix(frames, blockSize, featureSize); - initVector(totalEnergies, blockSize); - - int distSize = (params.maxRunCount + 1) * blockSize; + frames = vector > + (blockSize, vector(featureSize, 0)); distXSize = blockSize * 2; - - distance = (unsigned char **)malloc(distXSize * sizeof(unsigned char *)); - bestPathCost = (int **)malloc(distXSize * sizeof(int *)); - distYSizes = (int *)malloc(distXSize * sizeof(int)); - - for (int i = 0; i < blockSize; ++i) { - distance[i] = (unsigned char *)malloc(distSize * sizeof(unsigned char)); - bestPathCost[i] = (int *)malloc(distSize * sizeof(int)); - distYSizes[i] = distSize; - } - for (int i = blockSize; i < distXSize; ++i) { - distance[i] = 0; - } - - first = (int *)malloc(distXSize * sizeof(int)); - last = (int *)malloc(distXSize * sizeof(int)); + expand(); frameCount = 0; runCount = 0; - ltAverage = 0; - -} // init - -void -Matcher::makeFreqMap() -{ - initVector(freqMap, params.fftSize/2 + 1); - - if (params.useChromaFrequencyMap) { -#ifdef DEBUG_MATCHER - cerr << "makeFreqMap: calling makeChromaFrequencyMap" << endl; -#endif - makeChromaFrequencyMap(); - } else { -#ifdef DEBUG_MATCHER - cerr << "makeFreqMap: calling makeStandardFrequencyMap" << endl; -#endif - makeStandardFrequencyMap(); - } -} // makeFreqMap() - -int -Matcher::getFeatureSizeFor(Parameters params) -{ - if (params.useChromaFrequencyMap) { - return 13; - } else { - return 84; - } + + initialised = true; } void -Matcher::makeStandardFrequencyMap() +Matcher::expand() { - double binWidth = params.sampleRate / params.fftSize; - int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); - int crossoverMidi = lrint(log(crossoverBin*binWidth/440.0)/ - log(2.0) * 12 + 69); - // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; - int i = 0; - while (i <= crossoverBin) { - freqMap[i] = i; - ++i; - } - while (i <= params.fftSize/2) { - double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; - if (midi > 127) midi = 127; - freqMap[i++] = crossoverBin + lrint(midi) - crossoverMidi; - } - assert(freqMapSize == freqMap[i-1] + 1); - if (!silent) { - cerr << "Standard map size: " << freqMapSize - << "; Crossover at: " << crossoverBin << endl; - for (i = 0; i < params.fftSize / 2; i++) - cerr << "freqMap[" << i << "] = " << freqMap[i] << endl; - } -} // makeStandardFrequencyMap() + int distSize = (params.maxRunCount + 1) * blockSize; -void -Matcher::makeChromaFrequencyMap() -{ - double binWidth = params.sampleRate / params.fftSize; - int crossoverBin = (int)(1 / (pow(2, 1/12.0) - 1)); - // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; - int i = 0; - while (i <= crossoverBin) - freqMap[i++] = 0; - while (i <= params.fftSize/2) { - double midi = log(i*binWidth/440.0) / log(2.0) * 12 + 69; - freqMap[i++] = (lrint(midi)) % 12 + 1; - } - if (!silent) { - cerr << "Chroma map size: " << freqMapSize - << "; Crossover at: " << crossoverBin << endl; - for (i = 0; i < params.fftSize / 2; i++) - cerr << "freqMap[" << i << "] = " << freqMap[i] << endl; - } -} // makeChromaFrequencyMap() + bestPathCost.resize(distXSize, vector(distSize, 0)); + distance.resize(distXSize, vector(distSize, 0)); + distYSizes.resize(blockSize, distSize); + first.resize(distXSize, 0); + last.resize(distXSize, 0); +} vector Matcher::consumeFrame(double *reBuffer, double *imBuffer) { if (!initialised) init(); - vector processedFrame = - processFrameFromFreqData(reBuffer, imBuffer); - + vector real(reBuffer, reBuffer + params.fftSize/2 + 1); + vector imag(imBuffer, imBuffer + params.fftSize/2 + 1); + vector feature = featureExtractor.process(real, imag); + int frameIndex = frameCount % blockSize; + frames[frameIndex] = feature; calcAdvance(); - return processedFrame; + return feature; } void @@ -256,93 +135,13 @@ calcAdvance(); } -vector -Matcher::processFrameFromFreqData(double *reBuffer, double *imBuffer) -{ - for (int i = 0; i < (int)newFrame.size(); ++i) { - newFrame[i] = 0; - } - double rms = 0; - for (int i = 0; i <= params.fftSize/2; i++) { - double mag = reBuffer[i] * reBuffer[i] + - imBuffer[i] * imBuffer[i]; - rms += mag; - newFrame[freqMap[i]] += mag; - } - rms = sqrt(rms / (params.fftSize/2)); - - int frameIndex = frameCount % blockSize; - - vector processedFrame(freqMapSize, 0.0); - - double totalEnergy = 0; - if (params.useSpectralDifference) { - for (int i = 0; i < freqMapSize; i++) { - totalEnergy += newFrame[i]; - if (newFrame[i] > prevFrame[i]) { - processedFrame[i] = newFrame[i] - prevFrame[i]; - } else { - processedFrame[i] = 0; - } - } - } else { - for (int i = 0; i < freqMapSize; i++) { - processedFrame[i] = newFrame[i]; - totalEnergy += processedFrame[i]; - } - } - totalEnergies[frameIndex] = totalEnergy; - - double decay = frameCount >= 200 ? 0.99: - (frameCount < 100? 0: (frameCount - 100) / 100.0); - - if (ltAverage == 0) - ltAverage = totalEnergy; - else - ltAverage = ltAverage * decay + totalEnergy * (1.0 - decay); - - if (rms <= params.silenceThreshold) - for (int i = 0; i < freqMapSize; i++) - processedFrame[i] = 0; - else if (params.frameNorm == NormaliseFrameToSum1) - for (int i = 0; i < freqMapSize; i++) - processedFrame[i] /= totalEnergy; - else if (params.frameNorm == NormaliseFrameToLTAverage) - for (int i = 0; i < freqMapSize; i++) - processedFrame[i] /= ltAverage; - - vector tmp = prevFrame; - prevFrame = newFrame; - newFrame = tmp; - - frames[frameIndex] = processedFrame; - - if ((frameCount % 100) == 0) { - if (!silent) { - cerr << "Progress:" << frameCount << " " << ltAverage << endl; - } - } - - return processedFrame; -} - void Matcher::calcAdvance() { int frameIndex = frameCount % blockSize; if (frameCount >= distXSize) { -// std::cerr << "Resizing " << distXSize << " -> " << distXSize * 2 << std::endl; - distXSize *= 2; - distance = (unsigned char **)realloc(distance, distXSize * sizeof(unsigned char *)); - bestPathCost = (int **)realloc(bestPathCost, distXSize * sizeof(int *)); - distYSizes = (int *)realloc(distYSizes, distXSize * sizeof(int)); - first = (int *)realloc(first, distXSize * sizeof(int)); - last = (int *)realloc(last, distXSize * sizeof(int)); - - for (int i = distXSize/2; i < distXSize; ++i) { - distance[i] = 0; - } + expand(); } if (firstPM && (frameCount >= blockSize)) { @@ -360,18 +159,14 @@ << frameCount - blockSize << std::endl; */ distance[frameCount] = distance[frameCount - blockSize]; - - distance[frameCount - blockSize] = (unsigned char *) - malloc(len * sizeof(unsigned char)); + distance[frameCount - blockSize].resize(len, 0); for (int i = 0; i < len; ++i) { distance[frameCount - blockSize][i] = distance[frameCount][i]; } bestPathCost[frameCount] = bestPathCost[frameCount - blockSize]; - - bestPathCost[frameCount - blockSize] = (int *) - malloc(len * sizeof(int)); + bestPathCost[frameCount - blockSize].resize(len, 0); for (int i = 0; i < len; ++i) { bestPathCost[frameCount - blockSize][i] = bestPathCost[frameCount][i]; @@ -493,12 +288,8 @@ // end, it is better than a segmentation fault. std::cerr << "Emergency resize: " << idx << " -> " << idx * 2 << std::endl; otherMatcher->distYSizes[j] = idx * 2; - otherMatcher->bestPathCost[j] = - (int *)realloc(otherMatcher->bestPathCost[j], - idx * 2 * sizeof(int)); - otherMatcher->distance[j] = - (unsigned char *)realloc(otherMatcher->distance[j], - idx * 2 * sizeof(unsigned char)); + otherMatcher->bestPathCost[j].resize(idx * 2, 0); + otherMatcher->distance[j].resize(idx * 2, 0); } otherMatcher->distance[j][idx] = (unsigned char)((dMN & MASK) | dir); otherMatcher->bestPathCost[j][idx] = diff -r 91410483228b -r 8cce4e13ede3 src/Matcher.h --- a/src/Matcher.h Thu Nov 13 12:03:52 2014 +0000 +++ b/src/Matcher.h Thu Nov 13 12:50:54 2014 +0000 @@ -28,6 +28,7 @@ #define MASK 0xfc #include "DistanceMetric.h" +#include "FeatureExtractor.h" using std::vector; using std::string; @@ -43,42 +44,21 @@ class Matcher { public: - enum FrameNormalisation { - - /** Do not normalise audio frames */ - NoFrameNormalisation, - - /** Normalise each frame of audio to have a sum of 1 */ - NormaliseFrameToSum1, - - /** Normalise each frame of audio by the long-term average - * of the summed energy */ - NormaliseFrameToLTAverage, - }; - struct Parameters { Parameters(float rate_, double hopTime_, int fftSize_) : sampleRate(rate_), - frameNorm(NormaliseFrameToSum1), distanceNorm(DistanceMetric::NormaliseDistanceToLogSum), distanceScale(90.0), - useSpectralDifference(true), - useChromaFrequencyMap(false), hopTime(hopTime_), fftSize(fftSize_), blockTime(10.0), - silenceThreshold(0.01), - decay(0.99), maxRunCount(3) {} /** Sample rate of audio */ float sampleRate; - /** Type of audio frame normalisation */ - FrameNormalisation frameNorm; - /** Type of distance metric normalisation */ DistanceMetric::DistanceNormalisation distanceNorm; @@ -88,40 +68,23 @@ */ double distanceScale; - /** Flag indicating whether or not the half-wave rectified - * spectral difference should be used in calculating the - * distance metric for pairs of audio frames, instead of the - * straight spectrum values. */ - bool useSpectralDifference; - - /** Flag indicating whether to use a chroma frequency map (12 - * bins) instead of the default warped spectrogram */ - bool useChromaFrequencyMap; - /** Spacing of audio frames (determines the amount of overlap or * skip between frames). This value is expressed in * seconds. */ double hopTime; - + /** Size of an FFT frame in samples. Note that the data passed * in to Matcher is already in the frequency domain, so this * expresses the size of the frame that the caller will be - * providing. - */ + * providing. */ int fftSize; - + /** The width of the search band (error margin) around the current * match position, measured in seconds. Strictly speaking the * width is measured backwards from the current point, since the * algorithm has to work causally. */ double blockTime; - - /** RMS level below which frame is considered silent */ - double silenceThreshold; - - /** Frame-to-frame decay factor in calculating long-term average */ - double decay; /** Maximum number of frames sequentially processed by this * matcher, without a frame of the other matcher being @@ -154,49 +117,14 @@ /** The number of frames of audio data which have been read. */ int frameCount; - /** Long term average frame energy (in frequency domain - * representation). */ - double ltAverage; - /** The number of frames sequentially processed by this matcher, * without a frame of the other matcher being processed. */ int runCount; - /** A mapping function for mapping FFT bins to final frequency - * bins. The mapping is linear (1-1) until the resolution - * reaches 2 points per semitone, then logarithmic with a - * semitone resolution. e.g. for 44.1kHz sampling rate and - * fftSize of 2048 (46ms), bin spacing is 21.5Hz, which is mapped - * linearly for bins 0-34 (0 to 732Hz), and logarithmically for - * the remaining bins (midi notes 79 to 127, bins 35 to 83), - * where all energy above note 127 is mapped into the final - * bin. */ - vector freqMap; - - /** The number of entries in freqMap. */ - int freqMapSize; - - /** The number of values in an externally-supplied feature vector, - * used in preference to freqMap/freqMapSize if constructed with - * the external feature version of the Matcher constructor. If - * this is zero, the internal feature extractor will be used as - * normal. - */ - int externalFeatureSize; - - /** The number of values in the feature vectors actually in - * use. This will be externalFeatureSize if greater than zero, or - * freqMapSize otherwise. - */ + /** The number of values in a feature vector. */ int featureSize; - /** The most recent frame; used for calculating the frame to frame - * spectral difference. These are therefore frequency warped but - * not yet normalised. */ - vector prevFrame; - vector newFrame; - /** A block of previously seen frames are stored in this structure * for calculation of the distance matrix as the new frames are * read in. One can think of the structure of the array as a @@ -206,21 +134,18 @@ * energy of frames[i] is stored in totalEnergies[i]. */ vector > frames; - /** The total energy of each frame in the frames block. */ - vector totalEnergies; - /** The best path cost matrix. */ - int **bestPathCost; + vector > bestPathCost; /** The distance matrix. */ - unsigned char **distance; + vector > distance; /** The bounds of each row of data in the distance and path cost matrices.*/ - int *first; - int *last; + vector first; + vector last; /** Height of each column in distance and bestPathCost matrices */ - int *distYSizes; + vector distYSizes; /** Width of distance and bestPathCost matrices and first and last vectors */ int distXSize; @@ -238,7 +163,9 @@ * between the two matchers (currently one possesses the distance * matrix and optimal path matrix). */ - Matcher(Parameters parameters, Matcher *p); + Matcher(Parameters parameters, + FeatureExtractor::Parameters featureParams, + Matcher *p); /** Constructor for Matcher using externally supplied features. * A Matcher made using this constructor will not carry out its @@ -275,58 +202,18 @@ return frameCount; } - /** - * Return the feature vector size that will be used for the given - * parameters. - */ - static int getFeatureSizeFor(Parameters params); - protected: - template - void initVector(vector &vec, int sz, T dflt = 0) { - vec.clear(); - while ((int)vec.size() < sz) vec.push_back(dflt); - } - - template - void initMatrix(vector > &mat, int hsz, int vsz, - T dflt = 0, int fillTo = -1) { - mat.clear(); - if (fillTo < 0) fillTo = hsz; - for (int i = 0; i < hsz; ++i) { - mat.push_back(vector()); - if (i < fillTo) { - while ((int)mat[i].size() < vsz) { - mat[i].push_back(dflt); - } - } - } - } - + /** Create internal structures and reset. */ void init(); - void makeFreqMap(); + /** The distXSize value has changed: resize internal buffers. */ + void expand(); - /** Creates a map of FFT frequency bins to comparison bins. Where - * the spacing of FFT bins is less than 0.5 semitones, the - * mapping is one to one. Where the spacing is greater than 0.5 - * semitones, the FFT energy is mapped into semitone-wide - * bins. No scaling is performed; that is the energy is summed - * into the comparison bins. See also consumeFrame() - */ - void makeStandardFrequencyMap(); - - void makeChromaFrequencyMap(); - - /** Processes a frame of audio data by first computing the STFT - * with a Hamming window, then mapping the frequency bins into a - * part-linear part-logarithmic array, then (optionally) - * computing the half-wave rectified spectral difference from the - * previous frame, then (optionally) normalising to a sum of 1, - * then calculating the distance to all frames stored in the - * otherMatcher and storing them in the distance matrix, and - * finally updating the optimal path matrix using the dynamic - * time warping algorithm. + /** Process a frequency-domain frame of audio data using the + * built-in FeatureExtractor, then calculating the distance to + * all frames stored in the otherMatcher and storing them in the + * distance matrix, and finally updating the optimal path matrix + * using the dynamic time warping algorithm. * * Return value is the frame (post-processed, with warping, * rectification, and normalisation as appropriate). @@ -369,9 +256,9 @@ */ void setValue(int i, int j, int dir, int value, int dMN); - vector processFrameFromFreqData(double *, double *); void calcAdvance(); + FeatureExtractor featureExtractor; DistanceMetric metric; friend class MatchFeeder;