Mercurial > hg > qm-dsp
changeset 471:e3335cb213da
Merge branch 'chroma-key-tuning-review'
author | Chris Cannam <cannam@all-day-breakfast.com> |
---|---|
date | Thu, 30 May 2019 16:18:13 +0100 |
parents | d20dafd127b3 (current diff) dd132354ea02 (diff) |
children | 73fc1de3254a 930b5b0f707d |
files | |
diffstat | 12 files changed, 498 insertions(+), 144 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.gitignore Thu May 30 16:18:13 2019 +0100 @@ -0,0 +1,12 @@ +*.o +*~ +*.orig +*.a +doc/html/ +tests/test-* +tests/test.log +*.rej +ext/uncertain/ +*.obj +build/msvc/Debug/ +build/msvc/x64/Release/
--- a/.travis.yml Fri May 24 11:39:24 2019 +0100 +++ b/.travis.yml Thu May 30 16:18:13 2019 +0100 @@ -19,6 +19,5 @@ - valgrind script: - - make -f build/linux/Makefile.linux64 - - ( cd tests ; make ) + - make -f build/linux/Makefile.linux64 test
--- a/build/general/Makefile.inc Fri May 24 11:39:24 2019 +0100 +++ b/build/general/Makefile.inc Thu May 30 16:18:13 2019 +0100 @@ -144,6 +144,9 @@ $(AR) cr $@ $^ $(RANLIB) $@ +test: $(LIBRARY) + $(MAKE) -C tests + depend: makedepend -fbuild/general/Makefile.inc -Y -- $(CFLAGS) -- $(SOURCES)
--- a/dsp/chromagram/Chromagram.cpp Fri May 24 11:39:24 2019 +0100 +++ b/dsp/chromagram/Chromagram.cpp Thu May 30 16:18:13 2019 +0100 @@ -33,8 +33,9 @@ m_BPO = Config.BPO; // bins per octave m_normalise = Config.normalise; // if frame normalisation is required - // No. of constant Q bins - m_uK = (int) ceil( m_BPO * log(m_FMax/m_FMin)/log(2.0)); + // Extend range to a full octave + double octaves = log(m_FMax / m_FMin) / log(2.0); + m_FMax = m_FMin * pow(2.0, ceil(octaves)); // Create array for chroma result m_chromadata = new double[ m_BPO ]; @@ -44,7 +45,7 @@ // Populate CQ config structure with parameters // inherited from the Chroma config - ConstantQConfig.FS = Config.FS; + ConstantQConfig.FS = Config.FS; ConstantQConfig.min = m_FMin; ConstantQConfig.max = m_FMax; ConstantQConfig.BPO = m_BPO; @@ -53,6 +54,9 @@ // Initialise ConstantQ operator m_ConstantQ = new ConstantQ( ConstantQConfig ); + // No. of constant Q bins + m_uK = m_ConstantQ->getK(); + // Initialise working arrays m_frameSize = m_ConstantQ->getfftlength(); m_hopSize = m_ConstantQ->gethop(); @@ -121,7 +125,7 @@ } -double* Chromagram::process( const double *data ) +double *Chromagram::process(const double *data) { if (!m_skGenerated) { // Generate CQ Kernel @@ -139,12 +143,20 @@ } m_window->cut(m_windowbuf); + // The frequency-domain version expects pre-fftshifted input - so + // we must do the same here + for (int i = 0; i < m_frameSize/2; ++i) { + double tmp = m_windowbuf[i]; + m_windowbuf[i] = m_windowbuf[i + m_frameSize/2]; + m_windowbuf[i + m_frameSize/2] = tmp; + } + m_FFT->forward(m_windowbuf, m_FFTRe, m_FFTIm); return process(m_FFTRe, m_FFTIm); } -double* Chromagram::process( const double *real, const double *imag ) +double *Chromagram::process(const double *real, const double *imag) { if (!m_skGenerated) { // Generate CQ Kernel @@ -159,8 +171,8 @@ m_ConstantQ->process( real, imag, m_CQRe, m_CQIm ); // add each octave of cq data into Chromagram - const int octaves = (int)floor(double( m_uK/m_BPO))-1; - for (int octave = 0; octave <= octaves; octave++) + const int octaves = m_uK / m_BPO; + for (int octave = 0; octave < octaves; octave++) { int firstBin = octave*m_BPO; for (int i = 0; i < m_BPO; i++)
--- a/dsp/chromagram/Chromagram.h Fri May 24 11:39:24 2019 +0100 +++ b/dsp/chromagram/Chromagram.h Thu May 30 16:18:13 2019 +0100 @@ -20,8 +20,8 @@ #include "base/Window.h" #include "ConstantQ.h" -struct ChromaConfig{ - int FS; +struct ChromaConfig { + double FS; double min; double max; int BPO; @@ -35,10 +35,35 @@ public: Chromagram( ChromaConfig Config ); ~Chromagram(); - - double* process( const double *data ); // time domain - double* process( const double *real, const double *imag ); // frequency domain - void unityNormalise( double* src ); + + /** + * Process a time-domain input signal of length equal to + * getFrameSize(). + * + * The returned buffer contains the chromagram values indexed by + * bin, with the number of values corresponding to the BPO field + * in the ChromaConfig supplied at construction. It is owned by + * the Chromagram object and is reused from one process call to + * the next. + */ + double *process(const double *data); + + /** + * Process a frequency-domain input signal generated from a + * time-domain signal of length equal to getFrameSize() that has + * been windowed and "fftshifted" to place the zero index in the + * centre of the frame. The real and imag buffers must each + * contain the full getFrameSize() frequency bins. + * + * The returned buffer contains the chromagram values indexed by + * bin, with the number of values corresponding to the BPO field + * in the ChromaConfig supplied at construction. It is owned by + * the Chromagram object and is reused from one process call to + * the next. + */ + double *process(const double *real, const double *imag); + + void unityNormalise(double* src); // Complex arithmetic double kabs( double real, double imag );
--- a/dsp/chromagram/ConstantQ.cpp Fri May 24 11:39:24 2019 +0100 +++ b/dsp/chromagram/ConstantQ.cpp Thu May 30 16:18:13 2019 +0100 @@ -128,6 +128,9 @@ // Computing a hamming window const unsigned hammingLength = (int) ceil( m_dQ * m_FS / ( m_FMin * pow(2,((double)(k))/(double)m_BPO))); +// cerr << "k = " << k << ", q = " << m_dQ << ", m_FMin = " << m_FMin << ", hammingLength = " << hammingLength << " (rounded up from " << (m_dQ * m_FS / ( m_FMin * pow(2,((double)(k))/(double)m_BPO))) << ")" << endl; + + unsigned origin = m_FFTLength/2 - hammingLength/2; for (unsigned i=0; i<hammingLength; i++) @@ -159,7 +162,7 @@ const double squaredBin = squaredModule( transfHammingWindowRe[ j ], transfHammingWindowIm[ j ]); if (squaredBin <= squareThreshold) continue; - // Insert non-zero position indexes, doubled because they are floats + // Insert non-zero position indexes sk->is.push_back(j); sk->js.push_back(k); @@ -271,6 +274,7 @@ { const unsigned row = cqbin[i]; const unsigned col = fftbin[i]; + if (col == 0) continue; const double & r1 = real[i]; const double & i1 = imag[i]; const double & r2 = fftdata[ (2*m_FFTLength) - 2*col - 2 ]; @@ -300,7 +304,7 @@ // work out length of fft required for this constant Q Filter bank m_FFTLength = (int) pow(2, nextpow2(ceil( m_dQ*m_FS/m_FMin ))); - m_hop = m_FFTLength/8; // <------ hop size is window length divided by 32 + m_hop = m_FFTLength/8; // std::cerr << "ConstantQ::initialise: -> fft length = " << m_FFTLength << ", hop = " << m_hop << std::endl; @@ -340,10 +344,11 @@ { const unsigned row = cqbin[i]; const unsigned col = fftbin[i]; + if (col == 0) continue; const double & r1 = real[i]; const double & i1 = imag[i]; - const double & r2 = FFTRe[ m_FFTLength - col - 1 ]; - const double & i2 = FFTIm[ m_FFTLength - col - 1 ]; + const double & r2 = FFTRe[ m_FFTLength - col ]; + const double & i2 = FFTIm[ m_FFTLength - col ]; // add the multiplication CQRe[ row ] += (r1*r2 - i1*i2); CQIm[ row ] += (r1*i2 + i1*r2);
--- a/dsp/chromagram/ConstantQ.h Fri May 24 11:39:24 2019 +0100 +++ b/dsp/chromagram/ConstantQ.h Thu May 30 16:18:13 2019 +0100 @@ -20,8 +20,8 @@ #include "maths/MathAliases.h" #include "maths/MathUtilities.h" -struct CQConfig{ - unsigned int FS; // samplerate +struct CQConfig { + double FS; // samplerate double min; // minimum frequency double max; // maximum frequency unsigned int BPO; // bins per octave @@ -58,7 +58,7 @@ void deInitialise(); double* m_CQdata; - unsigned int m_FS; + double m_FS; double m_FMin; double m_FMax; double m_dQ;
--- a/dsp/keydetection/GetKeyMode.cpp Fri May 24 11:39:24 2019 +0100 +++ b/dsp/keydetection/GetKeyMode.cpp Thu May 30 16:18:13 2019 +0100 @@ -1,7 +1,12 @@ /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ /* - Copyright (c) 2005 Centre for Digital Music ( C4DM ) - Queen Mary Univesrity of London + QM DSP Library + + Centre for Digital Music, Queen Mary, University of London. + This file 2005-2006 Christian Landone and Katy Noland. + + Fixes to correct chroma offsets and for thread safety contributed + by Daniel Schürmann. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -9,9 +14,6 @@ License, or (at your option) any later version. See the file COPYING included with this distribution for more information. */ -// GetKeyMode.cpp: implementation of the CGetKeyMode class. -// -////////////////////////////////////////////////////////////////////// #include "GetKeyMode.h" #include "maths/MathUtilities.h" @@ -22,18 +24,20 @@ #include <cstring> #include <cstdlib> +static const int kBinsPerOctave = 36; + // Chords profile -static double MajProfile[36] = -{ 0.0384, 0.0629, 0.0258, 0.0121, 0.0146, 0.0106, 0.0364, 0.0610, 0.0267, - 0.0126, 0.0121, 0.0086, 0.0364, 0.0623, 0.0279, 0.0275, 0.0414, 0.0186, - 0.0173, 0.0248, 0.0145, 0.0364, 0.0631, 0.0262, 0.0129, 0.0150, 0.0098, - 0.0312, 0.0521, 0.0235, 0.0129, 0.0142, 0.0095, 0.0289, 0.0478, 0.0239}; +static double MajProfile[kBinsPerOctave] = { + 0.0384, 0.0629, 0.0258, 0.0121, 0.0146, 0.0106, 0.0364, 0.0610, 0.0267, + 0.0126, 0.0121, 0.0086, 0.0364, 0.0623, 0.0279, 0.0275, 0.0414, 0.0186, + 0.0173, 0.0248, 0.0145, 0.0364, 0.0631, 0.0262, 0.0129, 0.0150, 0.0098, + 0.0312, 0.0521, 0.0235, 0.0129, 0.0142, 0.0095, 0.0289, 0.0478, 0.0239}; -static double MinProfile[36] = -{ 0.0375, 0.0682, 0.0299, 0.0119, 0.0138, 0.0093, 0.0296, 0.0543, 0.0257, - 0.0292, 0.0519, 0.0246, 0.0159, 0.0234, 0.0135, 0.0291, 0.0544, 0.0248, - 0.0137, 0.0176, 0.0104, 0.0352, 0.0670, 0.0302, 0.0222, 0.0349, 0.0164, - 0.0174, 0.0297, 0.0166, 0.0222, 0.0401, 0.0202, 0.0175, 0.0270, 0.0146}; +static double MinProfile[kBinsPerOctave] = { + 0.0375, 0.0682, 0.0299, 0.0119, 0.0138, 0.0093, 0.0296, 0.0543, 0.0257, + 0.0292, 0.0519, 0.0246, 0.0159, 0.0234, 0.0135, 0.0291, 0.0544, 0.0248, + 0.0137, 0.0176, 0.0104, 0.0352, 0.0670, 0.0302, 0.0222, 0.0349, 0.0164, + 0.0174, 0.0297, 0.0166, 0.0222, 0.0401, 0.0202, 0.0175, 0.0270, 0.0146}; // @@ -42,7 +46,7 @@ ////////////////////////////////////////////////////////////////////// GetKeyMode::GetKeyMode( int sampleRate, float tuningFrequency, - double hpcpAverage, double medianAverage ) : + double hpcpAverage, double medianAverage ) : m_hpcpAverage( hpcpAverage ), m_medianAverage( medianAverage ), m_ChrPointer(0), @@ -51,7 +55,6 @@ m_MeanHPCP(0), m_MajCorr(0), m_MinCorr(0), - m_Keys(0), m_MedianFilterBuffer(0), m_SortedBuffer(0), m_keyStrengths(0) @@ -60,17 +63,17 @@ // Chromagram configuration parameters m_ChromaConfig.normalise = MathUtilities::NormaliseUnitMax; - m_ChromaConfig.FS = lrint(sampleRate/(double)m_DecimationFactor); - if (m_ChromaConfig.FS < 1) m_ChromaConfig.FS = 1; + m_ChromaConfig.FS = sampleRate/(double)m_DecimationFactor; + if (m_ChromaConfig.FS < 1) { + m_ChromaConfig.FS = 1; + } - // Set C (= MIDI #12) as our base : + // Set C3 (= MIDI #48) as our base: // This implies that key = 1 => Cmaj, key = 12 => Bmaj, key = 13 => Cmin, etc. - m_ChromaConfig.min = Pitch::getFrequencyForPitch - (48, 0, tuningFrequency); - m_ChromaConfig.max = Pitch::getFrequencyForPitch - (96, 0, tuningFrequency); + m_ChromaConfig.min = Pitch::getFrequencyForPitch( 48, 0, tuningFrequency ); + m_ChromaConfig.max = Pitch::getFrequencyForPitch( 96, 0, tuningFrequency ); - m_ChromaConfig.BPO = 36; + m_ChromaConfig.BPO = kBinsPerOctave; m_ChromaConfig.CQThresh = 0.0054; // Chromagram inst. @@ -80,7 +83,6 @@ m_ChromaFrameSize = m_Chroma->getFrameSize(); // override hopsize for this application m_ChromaHopSize = m_ChromaFrameSize; - m_BPO = m_ChromaConfig.BPO; // std::cerr << "chroma frame size = " << m_ChromaFrameSize << ", decimation factor = " << m_DecimationFactor << " therefore block size = " << getBlockSize() << std::endl; @@ -96,30 +98,38 @@ // Spawn objectc/arrays m_DecimatedBuffer = new double[m_ChromaFrameSize]; - m_ChromaBuffer = new double[m_BPO * m_ChromaBuffersize]; - memset( m_ChromaBuffer, 0, sizeof(double) * m_BPO * m_ChromaBuffersize); + m_ChromaBuffer = new double[kBinsPerOctave * m_ChromaBuffersize]; + memset( m_ChromaBuffer, 0, sizeof(double) * kBinsPerOctave * m_ChromaBuffersize); - m_MeanHPCP = new double[m_BPO]; + m_MeanHPCP = new double[kBinsPerOctave]; - m_MajCorr = new double[m_BPO]; - m_MinCorr = new double[m_BPO]; - m_Keys = new double[2*m_BPO]; + m_MajCorr = new double[kBinsPerOctave]; + m_MinCorr = new double[kBinsPerOctave]; + m_MajProfileNorm = new double[kBinsPerOctave]; + m_MinProfileNorm = new double[kBinsPerOctave]; + + double mMaj = MathUtilities::mean( MajProfile, kBinsPerOctave ); + double mMin = MathUtilities::mean( MinProfile, kBinsPerOctave ); + + for( unsigned int i = 0; i < kBinsPerOctave; i++ ) { + m_MajProfileNorm[i] = MajProfile[i] - mMaj; + m_MinProfileNorm[i] = MinProfile[i] - mMin; + } + m_MedianFilterBuffer = new int[ m_MedianWinsize ]; memset( m_MedianFilterBuffer, 0, sizeof(int)*m_MedianWinsize); m_SortedBuffer = new int[ m_MedianWinsize ]; - memset( m_SortedBuffer, 0, sizeof(int)*m_MedianWinsize); + memset( m_SortedBuffer, 0, sizeof(int)*m_MedianWinsize); - m_Decimator = new Decimator - ( m_ChromaFrameSize*m_DecimationFactor, m_DecimationFactor ); + m_Decimator = new Decimator( m_ChromaFrameSize*m_DecimationFactor, m_DecimationFactor ); m_keyStrengths = new double[24]; } GetKeyMode::~GetKeyMode() { - delete m_Chroma; delete m_Decimator; @@ -128,40 +138,40 @@ delete [] m_MeanHPCP; delete [] m_MajCorr; delete [] m_MinCorr; - delete [] m_Keys; + delete [] m_MajProfileNorm; + delete [] m_MinProfileNorm; delete [] m_MedianFilterBuffer; delete [] m_SortedBuffer; - - delete[] m_keyStrengths; + delete [] m_keyStrengths; } -double GetKeyMode::krumCorr(double *pData1, double *pData2, unsigned int length) +double GetKeyMode::krumCorr( const double *pDataNorm, const double *pProfileNorm, + int shiftProfile, unsigned int length) { double retVal= 0.0; double num = 0; double den = 0; - double mX = MathUtilities::mean( pData1, length ); - double mY = MathUtilities::mean( pData2, length ); - double sum1 = 0; double sum2 = 0; for( unsigned int i = 0; i <length; i++ ) { - num += ( pData1[i] - mX ) * ( pData2[i] - mY ); + int k = (i - shiftProfile + length) % length; - sum1 += ( (pData1[i]-mX) * (pData1[i]-mX) ); - sum2 += ( (pData2[i]-mY) * (pData2[i]-mY) ); + num += pDataNorm[i] * pProfileNorm[k]; + + sum1 += ( pDataNorm[i] * pDataNorm[i] ); + sum2 += ( pProfileNorm[k] * pProfileNorm[k] ); } den = sqrt(sum1 * sum2); - - if( den>0 ) + + if( den>0 ) { retVal = num/den; - else + } else { retVal = 0; - + } return retVal; } @@ -169,118 +179,102 @@ int GetKeyMode::process(double *PCMData) { int key; - unsigned int j,k; ////////////////////////////////////////////// m_Decimator->process( PCMData, m_DecimatedBuffer); - m_ChrPointer = m_Chroma->process( m_DecimatedBuffer ); + m_ChrPointer = m_Chroma->process( m_DecimatedBuffer ); - - // Move bins such that the centre of the base note is in the - // middle of its three bins : - // Added 21.11.07 by Chris Sutton based on debugging with Katy - // Noland + comparison with Matlab equivalent. - MathUtilities::circShift( m_ChrPointer, m_BPO, 1); /* std::cout << "raw chroma: "; - for (int ii = 0; ii < m_BPO; ++ii) { - if (ii % (m_BPO/12) == 0) std::cout << "\n"; + for (int ii = 0; ii < kBinsPerOctave; ++ii) { + if (ii % (kBinsPerOctave/12) == 0) std::cout << "\n"; std::cout << m_ChrPointer[ii] << " "; } std::cout << std::endl; */ // populate hpcp values; int cbidx; - for( j = 0; j < m_BPO; j++ ) - { - cbidx = (m_bufferindex * m_BPO) + j; + for( j = 0; j < kBinsPerOctave; j++ ) { + cbidx = (m_bufferindex * kBinsPerOctave) + j; m_ChromaBuffer[ cbidx ] = m_ChrPointer[j]; } //keep track of input buffers; - if( m_bufferindex++ >= m_ChromaBuffersize - 1) + if( m_bufferindex++ >= m_ChromaBuffersize - 1) { m_bufferindex = 0; + } // track filling of chroma matrix - if( m_ChromaBufferFilling++ >= m_ChromaBuffersize) + if( m_ChromaBufferFilling++ >= m_ChromaBuffersize) { m_ChromaBufferFilling = m_ChromaBuffersize; + } - //calculate mean - for( k = 0; k < m_BPO; k++ ) - { + //calculate mean + for( k = 0; k < kBinsPerOctave; k++ ) { double mnVal = 0.0; - for( j = 0; j < m_ChromaBufferFilling; j++ ) - { - mnVal += m_ChromaBuffer[ k + (j*m_BPO) ]; + for( j = 0; j < m_ChromaBufferFilling; j++ ) { + mnVal += m_ChromaBuffer[ k + (j*kBinsPerOctave) ]; } m_MeanHPCP[k] = mnVal/(double)m_ChromaBufferFilling; } - - for( k = 0; k < m_BPO; k++ ) + // Normalize for zero average + double mHPCP = MathUtilities::mean( m_MeanHPCP, kBinsPerOctave ); + for( k = 0; k < kBinsPerOctave; k++ ) { - m_MajCorr[k] = krumCorr( m_MeanHPCP, MajProfile, m_BPO ); - m_MinCorr[k] = krumCorr( m_MeanHPCP, MinProfile, m_BPO ); - - MathUtilities::circShift( MajProfile, m_BPO, 1 ); - MathUtilities::circShift( MinProfile, m_BPO, 1 ); - } - - for( k = 0; k < m_BPO; k++ ) - { - m_Keys[k] = m_MajCorr[k]; - m_Keys[k+m_BPO] = m_MinCorr[k]; + m_MeanHPCP[k] -= mHPCP; } - for (k = 0; k < 24; ++k) { - m_keyStrengths[k] = 0; - } - for( k = 0; k < m_BPO*2; k++ ) + for( k = 0; k < kBinsPerOctave; k++ ) { - int idx = k / (m_BPO/12); - int rem = k % (m_BPO/12); - if (rem == 0 || m_Keys[k] > m_keyStrengths[idx]) { - m_keyStrengths[idx] = m_Keys[k]; - } - -// m_keyStrengths[k/(m_BPO/12)] += m_Keys[k]; + // The Cromagram has the center of C at bin 0, while the major + // and minor profiles have the center of C at 1. We want to have + // the correlation for C result also at 1. + // To achieve this we have to shift two times: + m_MajCorr[k] = krumCorr( m_MeanHPCP, m_MajProfileNorm, (int)k - 2, kBinsPerOctave ); + m_MinCorr[k] = krumCorr( m_MeanHPCP, m_MinProfileNorm, (int)k - 2, kBinsPerOctave ); } /* std::cout << "raw keys: "; - for (int ii = 0; ii < 2*m_BPO; ++ii) { - if (ii % (m_BPO/12) == 0) std::cout << "\n"; - std::cout << m_Keys[ii] << " "; + for (int ii = 0; ii < kBinsPerOctave; ++ii) { + if (ii % (kBinsPerOctave/12) == 0) std::cout << "\n"; + std::cout << m_MajCorr[ii] << " "; } - std::cout << std::endl; - - std::cout << "key strengths: "; - for (int ii = 0; ii < 24; ++ii) { - if (ii % 6 == 0) std::cout << "\n"; - std::cout << m_keyStrengths[ii] << " "; + for (int ii = 0; ii < kBinsPerOctave; ++ii) { + if (ii % (kBinsPerOctave/12) == 0) std::cout << "\n"; + std::cout << m_MinCorr[ii] << " "; } std::cout << std::endl; */ - double dummy; - // '1 +' because we number keys 1-24, not 0-23. - key = 1 + (int)ceil( (double)MathUtilities::getMax( m_Keys, 2* m_BPO, &dummy )/3 ); + // m_MajCorr[1] is C center 1 / 3 + 1 = 1 + // m_MajCorr[4] is D center 4 / 3 + 1 = 2 + // '+ 1' because we number keys 1-24, not 0-23. + double maxMaj; + int maxMajBin = MathUtilities::getMax( m_MajCorr, kBinsPerOctave, &maxMaj ); + double maxMin; + int maxMinBin = MathUtilities::getMax( m_MinCorr, kBinsPerOctave, &maxMin ); + int maxBin = (maxMaj > maxMin) ? maxMajBin : (maxMinBin + kBinsPerOctave); + key = maxBin / 3 + 1; + +// std::cout << "fractional key pre-sorting: " << (maxBin + 2) / 3.0 << std::endl; // std::cout << "key pre-sorting: " << key << std::endl; //Median filtering // track Median buffer initial filling - if( m_MedianBufferFilling++ >= m_MedianWinsize) + if( m_MedianBufferFilling++ >= m_MedianWinsize) { m_MedianBufferFilling = m_MedianWinsize; - + } + //shift median buffer - for( k = 1; k < m_MedianWinsize; k++ ) - { + for( k = 1; k < m_MedianWinsize; k++ ) { m_MedianFilterBuffer[ k - 1 ] = m_MedianFilterBuffer[ k ]; } @@ -290,8 +284,7 @@ //Copy median into sorting buffer, reversed unsigned int ijx = 0; - for( k = 0; k < m_MedianWinsize; k++ ) - { + for( k = 0; k < m_MedianWinsize; k++ ) { m_SortedBuffer[k] = m_MedianFilterBuffer[m_MedianWinsize-1-ijx]; ijx++; } @@ -310,8 +303,9 @@ // std::cout << "midpoint = " << midpoint << endl; - if( midpoint <= 0 ) + if( midpoint <= 0 ) { midpoint = 1; + } key = m_SortedBuffer[midpoint-1]; @@ -325,3 +319,45 @@ { return (key > 12); } + +unsigned int getChromaSize() +{ + return kBinsPerOctave; +} + +double* GetKeyMode::getKeyStrengths() { + unsigned int k; + + for (k = 0; k < 24; ++k) { + m_keyStrengths[k] = 0; + } + + for( k = 0; k < kBinsPerOctave; k++ ) + { + int idx = k / (kBinsPerOctave/12); + int rem = k % (kBinsPerOctave/12); + if (rem == 0 || m_MajCorr[k] > m_keyStrengths[idx]) { + m_keyStrengths[idx] = m_MajCorr[k]; + } + } + + for( k = 0; k < kBinsPerOctave; k++ ) + { + int idx = (k + kBinsPerOctave) / (kBinsPerOctave/12); + int rem = k % (kBinsPerOctave/12); + if (rem == 0 || m_MinCorr[k] > m_keyStrengths[idx]) { + m_keyStrengths[idx] = m_MinCorr[k]; + } + } + +/* + std::cout << "key strengths: "; + for (int ii = 0; ii < 24; ++ii) { + if (ii % 6 == 0) std::cout << "\n"; + std::cout << m_keyStrengths[ii] << " "; + } + std::cout << std::endl; +*/ + + return m_keyStrengths; +}
--- a/dsp/keydetection/GetKeyMode.h Fri May 24 11:39:24 2019 +0100 +++ b/dsp/keydetection/GetKeyMode.h Thu May 30 16:18:13 2019 +0100 @@ -27,17 +27,18 @@ int process( double* PCMData ); - double krumCorr( double* pData1, double* pData2, unsigned int length ); + double krumCorr( const double *pDataNorm, const double *pProfileNorm, + int shiftProfile, unsigned int length ); unsigned int getBlockSize() { return m_ChromaFrameSize*m_DecimationFactor; } unsigned int getHopSize() { return m_ChromaHopSize*m_DecimationFactor; } double* getChroma() { return m_ChrPointer; } - unsigned int getChromaSize() { return m_BPO; } + unsigned int getChromaSize(); double* getMeanHPCP() { return m_MeanHPCP; } - double *getKeyStrengths() { return m_keyStrengths; } + double* getKeyStrengths(); bool isModeMinor( int key ); @@ -63,8 +64,6 @@ unsigned int m_ChromaFrameSize; //Hop unsigned int m_ChromaHopSize; - //Bins per octave - unsigned int m_BPO; unsigned int m_ChromaBuffersize; @@ -79,9 +78,10 @@ double* m_ChromaBuffer; double* m_MeanHPCP; + double* m_MajProfileNorm; + double* m_MinProfileNorm; double* m_MajCorr; double* m_MinCorr; - double* m_Keys; int* m_MedianFilterBuffer; int* m_SortedBuffer;
--- a/tests/Makefile Fri May 24 11:39:24 2019 +0100 +++ b/tests/Makefile Thu May 30 16:18:13 2019 +0100 @@ -5,12 +5,13 @@ LDFLAGS := $(LDFLAGS) -lboost_unit_test_framework -lpthread LIBS := ../libqm-dsp.a -TESTS := test-mathutilities test-window test-filter test-fft test-dct test-pvoc test-resampler test-medianfilter +TESTS := test-mathutilities test-window test-filter test-fft test-dct test-pvoc test-resampler test-medianfilter test-getkeymode test-chromagram VG := valgrind -q all: $(TESTS) - for t in $(TESTS); do echo "Running $$t"; $(VG) ./"$$t" || exit 1; done +# for t in $(TESTS); do echo "Running $$t"; $(VG) ./"$$t" || exit 1; done + for t in $(TESTS); do echo "Running $$t"; ./"$$t" || exit 1; done test-medianfilter: TestMedianFilter.o $(LIBS) $(CXX) -o $@ $^ $(LDFLAGS) @@ -36,6 +37,12 @@ test-resampler: TestResampler.o $(LIBS) $(CXX) -o $@ $^ $(LDFLAGS) +test-chromagram: TestChromagram.o $(LIBS) + $(CXX) -o $@ $^ $(LDFLAGS) + +test-getkeymode: TestGetKeyMode.o $(LIBS) + $(CXX) -o $@ $^ $(LDFLAGS) + TestMathUtilities.o: $(LIBS) TestMedianFilter.o: $(LIBS) TestWindow.o: $(LIBS) @@ -44,6 +51,8 @@ TestDCT.o: $(LIBS) TestPhaseVocoder.o: $(LIBS) TestResampler.o: $(LIBS) +TestChromagram.o: $(LIBS) +TestGetKeyMode.o: $(LIBS) clean: rm -f *.o $(TESTS)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/TestChromagram.cpp Thu May 30 16:18:13 2019 +0100 @@ -0,0 +1,141 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#include "dsp/chromagram/Chromagram.h" + +#include <iostream> + +#include <cmath> + +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MAIN + +#include <boost/test/unit_test.hpp> + +BOOST_AUTO_TEST_SUITE(TestChromagram) + +using std::cout; +using std::endl; +using std::string; +using std::vector; + +string midiPitchName(int midiPitch) +{ + static string names[] = { + "C", "C#", "D", "D#", + "E", "F", "F#", "G", + "G#", "A", "A#", "B" + }; + + return names[midiPitch % 12]; +} + +vector<double> generateSinusoid(double frequency, + double sampleRate, + int length) +{ + vector<double> buffer; + for (int i = 0; i < length; ++i) { + buffer.push_back(sin((i * M_PI * 2.0 * frequency) / sampleRate)); + } + return buffer; +} + +double frequencyForPitch(int midiPitch, double concertA) +{ + return concertA * pow(2.0, (midiPitch - 69.0) / 12.0); +} + +void test_sinusoid_12tET(double concertA, double sampleRate, int bpo) +{ + int chromaMinPitch = 36; + int chromaMaxPitch = 108; + + int probeMinPitch = 36; + int probeMaxPitch = 108; + + ChromaConfig config { + sampleRate, + frequencyForPitch(chromaMinPitch, concertA), + frequencyForPitch(chromaMaxPitch, concertA), + bpo, + 0.0054, + MathUtilities::NormaliseNone + }; + + Chromagram chroma(config); + + int binsPerSemi = bpo / 12; + + for (int midiPitch = probeMinPitch; + midiPitch < probeMaxPitch; + ++midiPitch) { + + int blockSize = chroma.getFrameSize(); + + double frequency = frequencyForPitch(midiPitch, concertA); + int expectedPeakBin = + ((midiPitch - chromaMinPitch) * binsPerSemi) % bpo; +/* + cout << "midiPitch = " << midiPitch + << ", name = " << midiPitchName(midiPitch) + << ", frequency = " << frequency + << ", expected peak bin = " + << expectedPeakBin << endl; +*/ + vector<double> signal = generateSinusoid(frequency, + sampleRate, + blockSize); + + double *output = chroma.process(signal.data()); + + int peakBin = -1; + double peakValue = 0.0; + + for (int i = 0; i < bpo; ++i) { + if (i == 0 || output[i] > peakValue) { + peakValue = output[i]; + peakBin = i; + } + } +/* + cout << "peak value = " << peakValue << " at bin " << peakBin << endl; + cout << "(neighbouring values are " + << (peakBin > 0 ? output[peakBin-1] : output[bpo-1]) + << " and " + << (peakBin+1 < bpo ? output[peakBin+1] : output[0]) + << ")" << endl; + if (peakBin != expectedPeakBin) { + cout << "NOTE: peak bin " << peakBin << " does not match expected " << expectedPeakBin << endl; + cout << "bin values are: "; + for (int i = 0; i < bpo; ++i) { + cout << i << ": " << output[i] << " "; + } + cout << endl; + } +*/ + + BOOST_CHECK_EQUAL(peakBin, expectedPeakBin); + } +} + +BOOST_AUTO_TEST_CASE(sinusoid_12tET_440_44100_36) +{ + test_sinusoid_12tET(440.0, 44100.0, 36); +} + +BOOST_AUTO_TEST_CASE(sinusoid_12tET_440_44100_60) +{ + test_sinusoid_12tET(440.0, 44100.0, 60); +} + +BOOST_AUTO_TEST_CASE(sinusoid_12tET_397_44100_60) +{ + test_sinusoid_12tET(397.0, 44100.0, 60); +} + +BOOST_AUTO_TEST_CASE(sinusoid_12tET_440_48000_60) +{ + test_sinusoid_12tET(440.0, 48000.0, 60); +} + +BOOST_AUTO_TEST_SUITE_END()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/TestGetKeyMode.cpp Thu May 30 16:18:13 2019 +0100 @@ -0,0 +1,112 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#include "dsp/keydetection/GetKeyMode.h" + +#include <iostream> + +#include <cmath> + +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MAIN + +#include <boost/test/unit_test.hpp> + +BOOST_AUTO_TEST_SUITE(TestGetKeyMode) + +using std::cout; +using std::endl; +using std::string; +using std::vector; + +string keyName(int index, bool minor) +{ + static string namesMajor[] = { + "C", "Db", "D", "Eb", + "E", "F", "F# / Gb", "G", + "Ab", "A", "Bb", "B" + }; + + static string namesMinor[] = { + "C", "C#", "D", "Eb / D#", + "E", "F", "F#", "G", + "G#", "A", "Bb", "B" + }; + + if (index < 1 || index > 12) return ""; + + std::string name; + if (minor) name = namesMinor[index - 1] + " minor"; + else name = namesMajor[index - 1] + " major"; + return name; +} + +string midiPitchName(int midiPitch) +{ + static string names[] = { + "C", "C#", "D", "D#", + "E", "F", "F#", "G", + "G#", "A", "A#", "B" + }; + + return names[midiPitch % 12]; +} + +vector<double> generateSinusoid(double frequency, + int sampleRate, + int length) +{ + vector<double> buffer; + buffer.reserve(length); + for (int i = 0; i < length; ++i) { + buffer.push_back(sin(i * M_PI * 2.0 * frequency / sampleRate)); + } + return buffer; +} + +BOOST_AUTO_TEST_CASE(sinusoid_12tET) +{ + double concertA = 440.0; + int sampleRate = 44100; + + for (int midiPitch = 48; midiPitch < 96; ++midiPitch) { + + GetKeyMode gkm(sampleRate, concertA, 10, 10); + int blockSize = gkm.getBlockSize(); + int hopSize = gkm.getHopSize(); + + double frequency = concertA * pow(2.0, (midiPitch - 69.0) / 12.0); +/* + cout << "midiPitch = " << midiPitch + << ", name = " << midiPitchName(midiPitch) + << ", frequency = " << frequency << endl; +*/ + int blocks = 4; + int totalLength = blockSize * blocks; + vector<double> signal = generateSinusoid(frequency, sampleRate, + totalLength); + + int key; + + for (int offset = 0; offset + blockSize < totalLength; + offset += hopSize) { + int k = gkm.process(signal.data() + offset); + if (offset == 0) { + key = k; + } else { + BOOST_CHECK_EQUAL(key, k); + } + } + + bool minor = (key > 12); + + int tonic = key; + if (minor) tonic -= 12; + + BOOST_CHECK_EQUAL(tonic, 1 + (midiPitch % 12)); + +// string name = keyName(tonic, minor); +// cout << "key value = " << key << ", name = " << name << endl; + } +} + +BOOST_AUTO_TEST_SUITE_END()