Mercurial > hg > vamp-tempogram
changeset 22:99380ba63be6
* Changed input of NoveltyCurve::spectrogramToNoveltyCurve() from transposed spe
ctrogram to spectrogram
* Collect spectrogram from process(), not transposed spectrogram
* allowed OctaveDivider parameter to be any value in range, despite number of binumber of values in the range
author | Carl Bussey <c.bussey@se10.qmul.ac.uk> |
---|---|
date | Tue, 19 Aug 2014 16:52:19 +0100 |
parents | 12b952286959 |
children | 7d36c742a183 |
files | NoveltyCurveProcessor.cpp NoveltyCurveProcessor.h TempogramPlugin.cpp TempogramPlugin.h |
diffstat | 4 files changed, 121 insertions(+), 120 deletions(-) [+] |
line wrap: on
line diff
--- a/NoveltyCurveProcessor.cpp Mon Aug 18 15:22:44 2014 +0100 +++ b/NoveltyCurveProcessor.cpp Tue Aug 19 16:52:19 2014 +0100 @@ -11,11 +11,10 @@ #include "NoveltyCurveProcessor.h" using namespace std; -NoveltyCurveProcessor::NoveltyCurveProcessor(const float &samplingFrequency, const size_t &fftLength, const size_t &numberOfBlocks, const size_t &compressionConstant) : +NoveltyCurveProcessor::NoveltyCurveProcessor(const float &samplingFrequency, const size_t &fftLength, const size_t &compressionConstant) : m_samplingFrequency(samplingFrequency), m_fftLength(fftLength), m_blockSize(fftLength/2 + 1), - m_numberOfBlocks(numberOfBlocks), m_compressionConstant(compressionConstant), m_numberOfBands(5), m_pBandBoundaries(0), @@ -54,12 +53,15 @@ } //calculate max of spectrogram -float NoveltyCurveProcessor::calculateMax(const vector< vector<float> > &spectrogram) const +float NoveltyCurveProcessor::calculateMax(const Spectrogram &spectrogram) const { float max = 0; - for (unsigned int j = 0; j < m_numberOfBlocks; j++){ - for (unsigned int i = 0; i < m_blockSize; i++){ + int length = spectrogram.size(); + int height = spectrogram[0].size(); + + for (int i = 0; i < length; i++){ + for (int j = 0; j < height; j++){ max = max > fabs(spectrogram[i][j]) ? max : fabs(spectrogram[i][j]); } } @@ -71,16 +73,16 @@ //uses m_hannWindow as filter void NoveltyCurveProcessor::subtractLocalAverage(vector<float> &noveltyCurve, const size_t &smoothLength) const { - vector<float> localAverage(m_numberOfBlocks); + int numberOfBlocks = noveltyCurve.size(); + vector<float> localAverage(numberOfBlocks); float * m_hannWindow = new float[smoothLength]; WindowFunction::hanning(m_hannWindow, smoothLength, true); - FIRFilter filter(m_numberOfBlocks, smoothLength); + FIRFilter filter(numberOfBlocks, smoothLength); filter.process(&noveltyCurve[0], m_hannWindow, &localAverage[0], FIRFilter::middle); - assert(noveltyCurve.size() == m_numberOfBlocks); - for (unsigned int i = 0; i < m_numberOfBlocks; i++){ + for (int i = 0; i < numberOfBlocks; i++){ noveltyCurve[i] -= localAverage[i]; noveltyCurve[i] = noveltyCurve[i] >= 0 ? noveltyCurve[i] : 0; } @@ -90,9 +92,11 @@ } //smoothed differentiator filter. Flips upper half of hanning window about y-axis to create coefficients. -void NoveltyCurveProcessor::smoothedDifferentiator(vector< vector<float> > &spectrogram, const size_t &smoothLength) const +void NoveltyCurveProcessor::smoothedDifferentiator(SpectrogramTransposed &spectrogramTransposed, const size_t &smoothLength) const { + int numberOfBlocks = spectrogramTransposed[0].size(); + float * diffHannWindow = new float [smoothLength]; WindowFunction::hanning(diffHannWindow, smoothLength, true); @@ -101,56 +105,56 @@ diffHannWindow[i] = -diffHannWindow[i]; } - FIRFilter smoothFilter(m_numberOfBlocks, smoothLength); + FIRFilter smoothFilter(numberOfBlocks, smoothLength); for (int i = 0; i < (int)m_blockSize; i++){ - smoothFilter.process(&spectrogram[i][0], diffHannWindow, &spectrogram[i][0], FIRFilter::middle); + smoothFilter.process(&spectrogramTransposed[i][0], diffHannWindow, &spectrogramTransposed[i][0], FIRFilter::middle); } } //half rectification (set negative to zero) -void NoveltyCurveProcessor::halfWaveRectify(vector< vector<float> > &spectrogram) const +void NoveltyCurveProcessor::halfWaveRectify(SpectrogramTransposed &spectrogramTransposed) const { - for (int block = 0; block < (int)m_numberOfBlocks; block++){ + int numberOfBlocks = spectrogramTransposed[0].size(); + + for (int block = 0; block < (int)numberOfBlocks; block++){ for (int k = 0; k < (int)m_blockSize; k++){ - if (spectrogram[k][block] < 0.0) spectrogram[k][block] = 0.0; + if (spectrogramTransposed[k][block] < 0.0) spectrogramTransposed[k][block] = 0.0; } } } //process method vector<float> -NoveltyCurveProcessor::spectrogramToNoveltyCurve(Spectrogram spectrogram) const +NoveltyCurveProcessor::spectrogramToNoveltyCurve(const Spectrogram &spectrogram) const //make argument const & { - std::vector<float> noveltyCurve(m_numberOfBlocks); - - //cout << spectrogram[0].size() << " : " << m_numberOfBlocks << endl; - assert(spectrogram.size() == m_blockSize); - assert(spectrogram[0].size() == m_numberOfBlocks); + int numberOfBlocks = spectrogram.size(); + std::vector<float> noveltyCurve(numberOfBlocks); + SpectrogramTransposed spectrogramTransposed(spectrogram[0].size(), vector<float>(spectrogram.size())); //normalise and log spectrogram float normaliseScale = calculateMax(spectrogram); - for (int block = 0; block < (int)m_numberOfBlocks; block++){ + for (int block = 0; block < (int)numberOfBlocks; block++){ for (int k = 0; k < (int)m_blockSize; k++){ - if(normaliseScale != 0.0) spectrogram[k][block] /= normaliseScale; //normalise - spectrogram[k][block] = log(1+m_compressionConstant*spectrogram[k][block]); + spectrogramTransposed[k][block] = log(1+m_compressionConstant*spectrogram[block][k]); + if(normaliseScale != 0.0) spectrogramTransposed[k][block] /= normaliseScale; //normalise } } //smooted differentiator - smoothedDifferentiator(spectrogram, 5); //make smoothLength a parameter! + smoothedDifferentiator(spectrogramTransposed, 5); //make smoothLength a parameter! //halfwave rectification - halfWaveRectify(spectrogram); + halfWaveRectify(spectrogramTransposed); //bandwise processing - for (int block = 0; block < (int)m_numberOfBlocks; block++){ + for (int block = 0; block < (int)numberOfBlocks; block++){ for (int band = 0; band < (int)m_numberOfBands; band++){ int k = m_pBandBoundaries[band]; int bandEnd = m_pBandBoundaries[band+1]; m_pBandSum[band] = 0; while(k < bandEnd){ - m_pBandSum[band] += spectrogram[k][block]; + m_pBandSum[band] += spectrogramTransposed[k][block]; k++; } }
--- a/NoveltyCurveProcessor.h Mon Aug 18 15:22:44 2014 +0100 +++ b/NoveltyCurveProcessor.h Tue Aug 19 16:52:19 2014 +0100 @@ -24,7 +24,6 @@ float m_samplingFrequency; size_t m_fftLength; size_t m_blockSize; - size_t m_numberOfBlocks; int m_compressionConstant; size_t m_numberOfBands; int * m_pBandBoundaries; @@ -32,16 +31,16 @@ void initialise(); void cleanup(); - float calculateMax(const std::vector< std::vector<float> > &spectrogram) const; + float calculateMax(const Spectrogram &spectrogram) const; void subtractLocalAverage(std::vector<float> &noveltyCurve, const size_t &smoothLength) const; - void smoothedDifferentiator(std::vector< std::vector<float> > &spectrogram, const size_t &smoothLength) const; - void halfWaveRectify(std::vector< std::vector<float> > &spectrogram) const; + void smoothedDifferentiator(SpectrogramTransposed &spectrogram, const size_t &smoothLength) const; + void halfWaveRectify(SpectrogramTransposed &spectrogram) const; public: - NoveltyCurveProcessor(const float &samplingFrequency, const size_t &fftLength, const size_t &numberOfBlocks, const size_t &compressionConstant); + NoveltyCurveProcessor(const float &samplingFrequency, const size_t &fftLength, const size_t &compressionConstant); ~NoveltyCurveProcessor(); - std::vector<float> spectrogramToNoveltyCurve(Spectrogram spectrogram) const; + std::vector<float> spectrogramToNoveltyCurve(const Spectrogram &spectrogram) const; }; #endif /* defined(__Tempogram__NoveltyCurve__) */
--- a/TempogramPlugin.cpp Mon Aug 18 15:22:44 2014 +0100 +++ b/TempogramPlugin.cpp Tue Aug 19 16:52:19 2014 +0100 @@ -312,14 +312,6 @@ { } -string TempogramPlugin::floatToString(float value) const -{ - ostringstream ss; - - if(!(ss << value)) throw runtime_error("TempogramPlugin::floatToString(): invalid conversion from float to string"); - return ss.str(); -} - TempogramPlugin::OutputList TempogramPlugin::getOutputDescriptors() const { @@ -383,34 +375,6 @@ return list; } -bool TempogramPlugin::handleParameterValues(){ - - if (m_tempogramHopSize <= 0) return false; - if (m_tempogramLog2FftLength <= 0) return false; - - if (m_tempogramFftLength < m_tempogramWindowLength){ - m_tempogramFftLength = m_tempogramWindowLength; - } - if (m_tempogramMinBPM > m_tempogramMaxBPM){ - m_tempogramMinBPM = 30; - m_tempogramMaxBPM = 480; - } - - float tempogramInputSampleRate = (float)m_inputSampleRate/m_inputStepSize; - m_tempogramMinBin = (max(floor(((m_tempogramMinBPM/60)/tempogramInputSampleRate)*m_tempogramFftLength), (float)0.0)); - m_tempogramMaxBin = (min(ceil(((m_tempogramMaxBPM/60)/tempogramInputSampleRate)*m_tempogramFftLength), (float)m_tempogramFftLength/2)); - - if (m_tempogramMinBPM > m_cyclicTempogramMinBPM) m_cyclicTempogramMinBPM = m_tempogramMinBPM; - float cyclicTempogramMaxBPM = 480; - if (m_tempogramMaxBPM < cyclicTempogramMaxBPM) cyclicTempogramMaxBPM = m_tempogramMaxBPM; - - m_cyclicTempogramNumberOfOctaves = floor(log2(cyclicTempogramMaxBPM/m_cyclicTempogramMinBPM)); - int numberOfBinsInFirstOctave = bpmToBin(m_cyclicTempogramMinBPM); - if (m_cyclicTempogramOctaveDivider > numberOfBinsInFirstOctave) m_cyclicTempogramOctaveDivider = numberOfBinsInFirstOctave; - - return true; -} - bool TempogramPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize) { @@ -421,7 +385,6 @@ m_inputBlockSize = blockSize; m_inputStepSize = stepSize; - m_spectrogram = SpectrogramTransposed(m_inputBlockSize/2.0f + 1); if (!handleParameterValues()) return false; //cout << m_cyclicTempogramOctaveDivider << endl; @@ -433,7 +396,6 @@ { // Clear buffers, reset stored values, etc m_spectrogram.clear(); - m_spectrogram = SpectrogramTransposed(m_inputBlockSize/2.0f + 1); handleParameterValues(); } @@ -450,51 +412,17 @@ const float *in = inputBuffers[0]; //calculate magnitude of FrequencyDomain input + vector<float> fftCoefficients; for (int i = 0; i < (int)n; i++){ float magnitude = sqrt(in[2*i] * in[2*i] + in[2*i + 1] * in[2*i + 1]); magnitude = magnitude > m_noveltyCurveMinDB ? magnitude : m_noveltyCurveMinDB; - m_spectrogram[i].push_back(magnitude); + fftCoefficients.push_back(magnitude); } + m_spectrogram.push_back(fftCoefficients); return featureSet; } -vector<unsigned int> TempogramPlugin::calculateTempogramNearestNeighbourLogBins() const -{ - vector<unsigned int> logBins; - - for (int i = 0; i < (int)ceil(m_cyclicTempogramNumberOfOctaves*m_cyclicTempogramOctaveDivider); i++){ - float bpm = m_cyclicTempogramMinBPM*pow(2.0f, (float)i/m_cyclicTempogramOctaveDivider); - int bin = bpmToBin(bpm); - - logBins.push_back(bin); - //cerr << bin << endl; - } - - //cerr << logBins.size() << endl; - - return logBins; -} - -int TempogramPlugin::bpmToBin(const float &bpm) const -{ - float w = (float)bpm/60; - float sampleRate = m_inputSampleRate/m_inputStepSize; - int bin = floor((float)m_tempogramFftLength*w/sampleRate + 0.5); - - if(bin < 0) bin = 0; - else if(bin > m_tempogramFftLength/2.0f) bin = m_tempogramFftLength; - - return bin; -} - -float TempogramPlugin::binToBPM(const int &bin) const -{ - float sampleRate = m_inputSampleRate/m_inputStepSize; - - return (bin*sampleRate/m_tempogramFftLength)*60; -} - TempogramPlugin::FeatureSet TempogramPlugin::getRemainingFeatures() { @@ -507,9 +435,9 @@ FeatureSet featureSet; //initialise novelty curve processor - size_t numberOfBlocks = m_spectrogram[0].size(); + size_t numberOfBlocks = m_spectrogram.size(); //cerr << numberOfBlocks << endl; - NoveltyCurveProcessor nc(m_inputSampleRate, m_inputBlockSize, numberOfBlocks, m_noveltyCurveCompressionConstant); + NoveltyCurveProcessor nc(m_inputSampleRate, m_inputBlockSize, m_noveltyCurveCompressionConstant); vector<float> noveltyCurve = nc.spectrogramToNoveltyCurve(m_spectrogram); //calculate novelty curvefrom magnitude data //if(noveltyCurve.size() > 50) for (int i = 0; i < 50; i++) cerr << noveltyCurve[i] << endl; @@ -548,20 +476,17 @@ } //Calculate cyclic tempogram - vector<unsigned int> logBins = calculateTempogramNearestNeighbourLogBins(); + vector< vector<unsigned int> > logBins = calculateTempogramNearestNeighbourLogBins(); - assert(logBins.back() <= m_tempogramFftLength/2.0f); - assert((int)logBins.size() == m_cyclicTempogramOctaveDivider*m_cyclicTempogramNumberOfOctaves); + //assert((int)logBins.size() == m_cyclicTempogramOctaveDivider*m_cyclicTempogramNumberOfOctaves); for (int block = 0; block < tempogramLength; block++){ Feature cyclicTempogramFeature; for (int i = 0; i < (int)m_cyclicTempogramOctaveDivider; i++){ float sum = 0; - //mcerr << floor(binToBPM(logBins[i]) + 0.5) << " " << floor(binToBPM(logBins[i + m_cyclicTempogramOctaveDivider]) + 0.5) << endl; - for (int j = 0; j < (int)m_cyclicTempogramNumberOfOctaves; j++){ - sum += tempogram[block][logBins[i+j*m_cyclicTempogramOctaveDivider]]; + sum += tempogram[block][logBins[j][i]]; } cyclicTempogramFeature.values.push_back(sum/m_cyclicTempogramNumberOfOctaves); assert(!isnan(cyclicTempogramFeature.values.back())); @@ -573,3 +498,76 @@ return featureSet; } + +vector< vector<unsigned int> > TempogramPlugin::calculateTempogramNearestNeighbourLogBins() const +{ + vector< vector<unsigned int> > logBins; + + for (int octave = 0; octave < (int)m_cyclicTempogramNumberOfOctaves; octave++){ + vector<unsigned int> octaveBins; + + for (int bin = 0; bin < (int)m_cyclicTempogramOctaveDivider; bin++){ + float bpm = m_cyclicTempogramMinBPM*pow(2.0f, octave+(float)bin/m_cyclicTempogramOctaveDivider); + + octaveBins.push_back(bpmToBin(bpm)); + } + logBins.push_back(octaveBins); + } + + //cerr << logBins.size() << endl; + + return logBins; +} + +unsigned int TempogramPlugin::bpmToBin(const float &bpm) const +{ + float w = (float)bpm/60; + float sampleRate = m_inputSampleRate/m_inputStepSize; + int bin = floor((float)m_tempogramFftLength*w/sampleRate + 0.5); + + if(bin < 0) bin = 0; + else if(bin > m_tempogramFftLength/2.0f) bin = m_tempogramFftLength; + + return bin; +} + +float TempogramPlugin::binToBPM(const int &bin) const +{ + float sampleRate = m_inputSampleRate/m_inputStepSize; + + return (bin*sampleRate/m_tempogramFftLength)*60; +} + +bool TempogramPlugin::handleParameterValues(){ + + if (m_tempogramHopSize <= 0) return false; + if (m_tempogramLog2FftLength <= 0) return false; + + if (m_tempogramFftLength < m_tempogramWindowLength){ + m_tempogramFftLength = m_tempogramWindowLength; + } + if (m_tempogramMinBPM >= m_tempogramMaxBPM){ + m_tempogramMinBPM = 30; + m_tempogramMaxBPM = 480; + } + + float tempogramInputSampleRate = (float)m_inputSampleRate/m_inputStepSize; + m_tempogramMinBin = (max(floor(((m_tempogramMinBPM/60)/tempogramInputSampleRate)*m_tempogramFftLength), (float)0.0)); + m_tempogramMaxBin = (min(ceil(((m_tempogramMaxBPM/60)/tempogramInputSampleRate)*m_tempogramFftLength), (float)m_tempogramFftLength/2)); + + if (m_tempogramMinBPM > m_cyclicTempogramMinBPM) m_cyclicTempogramMinBPM = m_tempogramMinBPM; + float cyclicTempogramMaxBPM = 480; + if (m_tempogramMaxBPM < cyclicTempogramMaxBPM) cyclicTempogramMaxBPM = m_tempogramMaxBPM; + + m_cyclicTempogramNumberOfOctaves = floor(log2(cyclicTempogramMaxBPM/m_cyclicTempogramMinBPM)); + + return true; +} + +string TempogramPlugin::floatToString(float value) const +{ + ostringstream ss; + + if(!(ss << value)) throw runtime_error("TempogramPlugin::floatToString(): invalid conversion from float to string"); + return ss.str(); +}
--- a/TempogramPlugin.h Mon Aug 18 15:22:44 2014 +0100 +++ b/TempogramPlugin.h Tue Aug 19 16:52:19 2014 +0100 @@ -77,7 +77,7 @@ // plugin-specific data and methods go here size_t m_inputBlockSize; size_t m_inputStepSize; - SpectrogramTransposed m_spectrogram; //spectrogram data + Spectrogram m_spectrogram; //spectrogram data //Novelty Curve specific parameters float m_noveltyCurveMinDB; @@ -102,8 +102,8 @@ int m_cyclicTempogramOctaveDivider; string floatToString(float value) const; - vector<unsigned int> calculateTempogramNearestNeighbourLogBins() const; - int bpmToBin(const float &bpm) const; + vector< vector<unsigned int> > calculateTempogramNearestNeighbourLogBins() const; + unsigned int bpmToBin(const float &bpm) const; float binToBPM (const int &bin) const; bool handleParameterValues(); };