Mercurial > hg > qm-vamp-plugins
changeset 60:90fa946fda40
* Add key strength plot to key detector
* Fix vector overrun in similarity plugin if some empty frames have been
encountered
* Fix uninitialised m_count in MFCC plugin
* Doc update
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Fri, 01 Feb 2008 16:47:39 +0000 |
parents | af3ec1585323 |
children | c78ec2a208e2 |
files | README.txt plugins/KeyDetect.cpp plugins/KeyDetect.h plugins/MFCCPlugin.cpp plugins/SimilarityPlugin.cpp plugins/SimilarityPlugin.h qm-vamp-plugins.pro |
diffstat | 7 files changed, 92 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/README.txt Thu Jan 31 12:04:59 2008 +0000 +++ b/README.txt Fri Feb 01 16:47:39 2008 +0000 @@ -33,8 +33,8 @@ segmentation of music audio, and a Mel-frequency cepstral coefficients calculation plugin. -This release also includes significant fixes to the existing key -detector and chromagram plugins. +This release also includes significant updates to the existing key +detector, tempo tracker, and chromagram plugins. To Install @@ -122,7 +122,8 @@ References: M. E. P. Davies and M. D. Plumbley. Context-dependent beat tracking of musical audio. - Technical Report C4DM-TR-06-02. 5 April 2006. + In IEEE Transactions on Audio, Speech and Language + Processing. Vol. 15, No. 3, pp1009-1020, 2007. M. E. P. Davies and M. D. Plumbley. Beat Tracking With A Two State Model. @@ -153,15 +154,30 @@ The Key Detector plugin analyses a single channel of audio and continuously estimates the key of the music. -It has three outputs: the tonic pitch of the key; a major or minor -mode flag; and key (combining the tonic and major/minor into a single -value). These outputs have the values: +It has four outputs: the tonic pitch of the key; a major or minor mode +flag; the key (combining the tonic and major/minor into a single +value); and a key strength plot which reports the degree to which the +chroma vector extracted from each input block correlates to the stored +key profiles for each major and minor key. The key profiles are drawn +from analysis of Book I of the Well Tempered Klavier by J S Bach, +recorded at A=440 equal temperament. + +The outputs have the values: Tonic pitch: C = 1, C#/Db = 2, ..., B = 12 + Major/minor mode: major = 0, minor = 1 + Key: C major = 1, C#/Db major = 2, ..., B major = 12 C minor = 13, C#/Db minor = 14, ..., B minor = 24 + Key Strength Plot: 25 separate bins per feature, separated into 1-12 + (major from C) and 14-25 (minor from C). Bin 13 is unused, not + for superstitious reasons but simply so as to delimit the major + and minor areas if they are displayed on a single plot by the + plugin host. Higher bin values show increased correlation with + the key profile for that key. + The outputs are also labelled with pitch or key as text.
--- a/plugins/KeyDetect.cpp Thu Jan 31 12:04:59 2008 +0000 +++ b/plugins/KeyDetect.cpp Fri Feb 01 16:47:39 2008 +0000 @@ -17,6 +17,12 @@ #include <cmath> +// Order for circle-of-5ths plotting +static int conversion[24] = +{ 7, 12, 5, 10, 3, 8, 1, 6, 11, 4, 9, 2, + 16, 21, 14, 19, 24, 17, 22, 15, 20, 13, 18, 23 }; + + KeyDetector::KeyDetector(float inputSampleRate) : Plugin(inputSampleRate), m_stepSize(0), @@ -232,6 +238,25 @@ d.sampleType = OutputDescriptor::OneSamplePerStep; list.push_back(d); + d.identifier = "keystrength"; + d.name = "Key Strength Plot"; + d.unit = ""; + d.description = "Correlation of the chroma vector with stored key profile for each major and minor key"; + d.hasFixedBinCount = true; + d.binCount = 25; + d.hasKnownExtents = false; + d.isQuantized = false; + d.sampleType = OutputDescriptor::OneSamplePerStep; + for (int i = 0; i < 24; ++i) { + if (i == 12) d.binNames.push_back(" "); + int idx = conversion[i]; + std::string label = getKeyName(idx > 12 ? idx-12 : idx); + if (i < 12) label += " major"; + else label += " minor"; + d.binNames.push_back(label); + } + list.push_back(d); + return list; } @@ -255,7 +280,6 @@ int tonic = key; if (tonic > 12) tonic -= 12; - int prevTonic = m_prevKey; if (prevTonic > 12) prevTonic -= 12; @@ -293,6 +317,16 @@ m_prevKey = key; + Feature ksf; + ksf.values.reserve(25); + double *keystrengths = m_getKeyMode->getKeyStrengths(); + for (int i = 0; i < 24; ++i) { + if (i == 12) ksf.values.push_back(-1); + ksf.values.push_back(keystrengths[conversion[i]-1]); + } + ksf.hasTimestamp = false; + returnFeatures[3].push_back(ksf); + return returnFeatures; } @@ -328,7 +362,7 @@ } const char * -KeyDetector::getKeyName(int index) +KeyDetector::getKeyName(int index) const { // Keys are numbered with 1 => C, 12 => B // This is based on chromagram base set to a C in qm-dsp's GetKeyMode.cpp
--- a/plugins/KeyDetect.h Thu Jan 31 12:04:59 2008 +0000 +++ b/plugins/KeyDetect.h Fri Feb 01 16:47:39 2008 +0000 @@ -79,7 +79,7 @@ float m_tuningFrequency; int m_length; - const char *getKeyName(int index); + const char *getKeyName(int index) const; GetKeyMode* m_getKeyMode; double* m_inputFrame;
--- a/plugins/MFCCPlugin.cpp Thu Jan 31 12:04:59 2008 +0000 +++ b/plugins/MFCCPlugin.cpp Fri Feb 01 16:47:39 2008 +0000 @@ -25,7 +25,8 @@ m_config(lrintf(inputSampleRate)), m_mfcc(0), m_step(1024), - m_block(2048) + m_block(2048), + m_count(0) { m_bins = 20; m_wantC0 = true; @@ -199,6 +200,7 @@ m_binsums[i] = 0.0; } } + m_count = 0; } size_t
--- a/plugins/SimilarityPlugin.cpp Thu Jan 31 12:04:59 2008 +0000 +++ b/plugins/SimilarityPlugin.cpp Fri Feb 01 16:47:39 2008 +0000 @@ -148,6 +148,10 @@ m_lastNonEmptyFrame = std::vector<int>(m_channels); for (int i = 0; i < m_channels; ++i) m_lastNonEmptyFrame[i] = -1; + + m_emptyFrameCount = std::vector<int>(m_channels); + for (int i = 0; i < m_channels; ++i) m_emptyFrameCount[i] = 0; + m_frameNo = 0; int decimationFactor = getDecimationFactor(); @@ -204,12 +208,12 @@ m_rhythmClipFrames = int(ceil((m_rhythmClipDuration * m_processRate) / m_rhythmClipFrameSize)); - std::cerr << "SimilarityPlugin::initialise: rhythm clip is " - << m_rhythmClipFrames << " frames of size " - << m_rhythmClipFrameSize << " at process rate " - << m_processRate << " ( = " - << (float(m_rhythmClipFrames * m_rhythmClipFrameSize) / m_processRate) << " sec )" - << std::endl; +// std::cerr << "SimilarityPlugin::initialise: rhythm clip requires " +// << m_rhythmClipFrames << " frames of size " +// << m_rhythmClipFrameSize << " at process rate " +// << m_processRate << " ( = " +// << (float(m_rhythmClipFrames * m_rhythmClipFrameSize) / m_processRate) << " sec )" +// << std::endl; MFCCConfig config(m_processRate); config.fftsize = m_rhythmClipFrameSize; @@ -245,6 +249,14 @@ m_rhythmValues[i].clear(); } + for (int i = 0; i < m_lastNonEmptyFrame.size(); ++i) { + m_lastNonEmptyFrame[i] = -1; + } + + for (int i = 0; i < m_emptyFrameCount.size(); ++i) { + m_emptyFrameCount[i] = 0; + } + m_done = false; } @@ -536,6 +548,7 @@ } } } + m_emptyFrameCount[c]++; continue; } @@ -645,8 +658,9 @@ // We want to take values up to, but not including, the // last non-empty frame (which may be partial) - int sz = m_lastNonEmptyFrame[i]; + int sz = m_lastNonEmptyFrame[i] - m_emptyFrameCount[i]; if (sz < 0) sz = 0; + if (sz >= m_values[i].size()) sz = m_values[i].size()-1; count = 0; for (int k = 0; k < sz; ++k) { @@ -752,6 +766,13 @@ { if (!needRhythm()) return FeatureMatrix(); +// std::cerr << "SimilarityPlugin::initialise: rhythm clip for channel 0 contains " +// << m_rhythmValues[0].size() << " frames of size " +// << m_rhythmClipFrameSize << " at process rate " +// << m_processRate << " ( = " +// << (float(m_rhythmValues[0].size() * m_rhythmClipFrameSize) / m_processRate) << " sec )" +// << std::endl; + BeatSpectrum bscalc; CosineDistance cd;
--- a/plugins/SimilarityPlugin.h Thu Jan 31 12:04:59 2008 +0000 +++ b/plugins/SimilarityPlugin.h Fri Feb 01 16:47:39 2008 +0000 @@ -88,6 +88,7 @@ static const float m_allRhythm; std::vector<int> m_lastNonEmptyFrame; // per channel + std::vector<int> m_emptyFrameCount; // per channel mutable int m_distanceMatrixOutput; mutable int m_distanceVectorOutput;
--- a/qm-vamp-plugins.pro Thu Jan 31 12:04:59 2008 +0000 +++ b/qm-vamp-plugins.pro Fri Feb 01 16:47:39 2008 +0000 @@ -4,7 +4,7 @@ CONFIG += plugin warn_on release CONFIG -= qt -linux-g++:QMAKE_CXXFLAGS_RELEASE += -DNDEBUG -O3 -march=pentium3 -msse +linux-g++:QMAKE_CXXFLAGS_RELEASE += -DNDEBUG -O3 -march=pentium3 -mfpmath=sse -msse -ffast-math OBJECTS_DIR = tmp_obj MOC_DIR = tmp_moc