Mercurial > hg > nnls-chroma
changeset 164:3c731acad404
Fix some problems from the plugin tester: zero sample rate with fixed sample type outputs; mismatching parameter id ("spectralshape" in Chordino, "s" in NNLSBase -- changing this in Chordino won't break anything as it would never have worked under that name anyway); some NaN values
author | Chris Cannam |
---|---|
date | Fri, 04 Sep 2015 16:45:37 +0100 |
parents | 59b26f52550d |
children | 9e9267d6d78a |
files | Chordino.cpp NNLSBase.cpp NNLSChroma.cpp Tuning.cpp viterbi.cpp |
diffstat | 5 files changed, 41 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/Chordino.cpp Fri Sep 04 12:22:09 2015 +0100 +++ b/Chordino.cpp Fri Sep 04 16:45:37 2015 +0100 @@ -121,7 +121,7 @@ list.push_back(whiteningParam); ParameterDescriptor spectralShapeParam; - spectralShapeParam.identifier = "spectralshape"; + spectralShapeParam.identifier = "s"; spectralShapeParam.name = "spectral shape"; spectralShapeParam.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics."; spectralShapeParam.unit = ""; @@ -167,6 +167,9 @@ int index = 0; + float featureRate = + (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; + OutputDescriptor d7; d7.identifier = "simplechord"; d7.name = "Chord Estimate"; @@ -178,7 +181,7 @@ d7.isQuantized = false; d7.sampleType = OutputDescriptor::VariableSampleRate; d7.hasDuration = false; - d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; + d7.sampleRate = featureRate; list.push_back(d7); m_outputChords = index++; @@ -196,7 +199,7 @@ chordnotes.quantizeStep = 1; chordnotes.sampleType = OutputDescriptor::VariableSampleRate; chordnotes.hasDuration = true; - chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; + chordnotes.sampleRate = featureRate; list.push_back(chordnotes); m_outputChordnotes = index++; @@ -210,6 +213,7 @@ d8.hasKnownExtents = false; d8.isQuantized = false; d8.sampleType = OutputDescriptor::FixedSampleRate; + d8.sampleRate = featureRate; d8.hasDuration = false; list.push_back(d8); m_outputHarmonicChange = index++; @@ -224,6 +228,7 @@ loglikelihood.hasKnownExtents = false; loglikelihood.isQuantized = false; loglikelihood.sampleType = OutputDescriptor::FixedSampleRate; + loglikelihood.sampleRate = featureRate; loglikelihood.hasDuration = false; list.push_back(loglikelihood); m_outputLoglikelihood = index++; @@ -368,7 +373,7 @@ FeatureList chromaList; - + bool clipwarned = false; for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) { Feature currentTunedSpec = *it; // logfreq spectrum @@ -449,7 +454,7 @@ vector<float> origchroma = chroma; chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas currentChromas.values = chroma; - + if (m_doNormalizeChroma > 0) { vector<float> chromanorm = vector<float>(3,0); switch (int(m_doNormalizeChroma)) { @@ -489,15 +494,22 @@ for (int iChord = 0; iChord < nChord; iChord++) { tempchordvalue = 0; for (int iBin = 0; iBin < 12; iBin++) { - tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; + tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; } for (int iBin = 12; iBin < 24; iBin++) { tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; } if (iChord == nChord-1) tempchordvalue *= .7; if (tempchordvalue < 0) tempchordvalue = 0.0; - tempchordvalue = pow(1.3,tempchordvalue); - sumchordvalue+=tempchordvalue; + if (tempchordvalue > 20.0) { + if (!clipwarned) { + cerr << "WARNING: interim chroma contains extreme chord value " << tempchordvalue << ", clipping this and any others that appear" << endl; + clipwarned = true; + } + tempchordvalue = 10.0; + } + tempchordvalue = pow(1.3, tempchordvalue); + sumchordvalue += tempchordvalue; currentChordSalience.push_back(tempchordvalue); } if (sumchordvalue > 0) { @@ -568,7 +580,11 @@ } /* calculating simple chord change prob */ for (int iChord = 0; iChord < nChord; iChord++) { - chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]); + double num = delta[(iFrame-1) * nChord + iChord]; + double denom = delta[iFrame * nChord + iChord]; + double eps = 1e-7; + if (denom < eps) denom = eps; + chordchange[iFrame-1] += num * log(num / denom + eps); } } @@ -601,7 +617,7 @@ chordchange_feature.hasTimestamp = true; chordchange_feature.timestamp = timestamps[iFrame]; chordchange_feature.values.push_back(chordchange[iFrame]); - // cerr << chordchange[iFrame] << endl; +// cerr << "putting value " << chordchange[iFrame] << " at time " << chordchange_feature.timestamp << endl; fsOut[m_outputHarmonicChange].push_back(chordchange_feature); }
--- a/NNLSBase.cpp Fri Sep 04 12:22:09 2015 +0100 +++ b/NNLSBase.cpp Fri Sep 04 16:45:37 2015 +0100 @@ -267,6 +267,8 @@ void NNLSBase::setParameter(string identifier, float value) { +// cerr << "setParameter (" << identifier << ") -> " << value << endl; + if (debug_on) cerr << "--> setParameter" << endl; if (identifier == "useNNLS") { m_useNNLS = (int) value;
--- a/NNLSChroma.cpp Fri Sep 04 12:22:09 2015 +0100 +++ b/NNLSChroma.cpp Fri Sep 04 16:45:37 2015 +0100 @@ -84,6 +84,9 @@ int index = 0; + float featureRate = + (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; + OutputDescriptor logfreqspecOutput; logfreqspecOutput.identifier = "logfreqspec"; logfreqspecOutput.name = "Log-Frequency Spectrum"; @@ -95,7 +98,7 @@ logfreqspecOutput.isQuantized = false; logfreqspecOutput.sampleType = OutputDescriptor::FixedSampleRate; logfreqspecOutput.hasDuration = false; - logfreqspecOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; + logfreqspecOutput.sampleRate = featureRate; list.push_back(logfreqspecOutput); m_outputLogfreqspec = index++; @@ -110,7 +113,7 @@ tunedlogfreqspecOutput.isQuantized = false; tunedlogfreqspecOutput.sampleType = OutputDescriptor::FixedSampleRate; tunedlogfreqspecOutput.hasDuration = false; - tunedlogfreqspecOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; + tunedlogfreqspecOutput.sampleRate = featureRate; list.push_back(tunedlogfreqspecOutput); m_outputTunedlogfreqspec = index++; @@ -125,7 +128,7 @@ semitonespectrumOutput.isQuantized = false; semitonespectrumOutput.sampleType = OutputDescriptor::FixedSampleRate; semitonespectrumOutput.hasDuration = false; - semitonespectrumOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; + semitonespectrumOutput.sampleRate = featureRate; list.push_back(semitonespectrumOutput); m_outputSemitonespectrum = index++; @@ -141,7 +144,7 @@ chromaOutput.isQuantized = false; chromaOutput.sampleType = OutputDescriptor::FixedSampleRate; chromaOutput.hasDuration = false; - chromaOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; + chromaOutput.sampleRate = featureRate; list.push_back(chromaOutput); m_outputChroma = index++; @@ -157,7 +160,7 @@ basschromaOutput.isQuantized = false; basschromaOutput.sampleType = OutputDescriptor::FixedSampleRate; basschromaOutput.hasDuration = false; - basschromaOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; + basschromaOutput.sampleRate = featureRate; list.push_back(basschromaOutput); m_outputBasschroma = index++; @@ -173,7 +176,7 @@ bothchromaOutput.isQuantized = false; bothchromaOutput.sampleType = OutputDescriptor::FixedSampleRate; bothchromaOutput.hasDuration = false; - bothchromaOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; + bothchromaOutput.sampleRate = featureRate; list.push_back(bothchromaOutput); m_outputBothchroma = index++; return list;
--- a/Tuning.cpp Fri Sep 04 12:22:09 2015 +0100 +++ b/Tuning.cpp Fri Sep 04 16:45:37 2015 +0100 @@ -126,8 +126,8 @@ d10.maxValue = 452.89; d10.isQuantized = false; d10.sampleType = OutputDescriptor::FixedSampleRate; + d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; d10.hasDuration = false; - // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; list.push_back(d10); m_outputLocalTuning = index++;
--- a/viterbi.cpp Fri Sep 04 12:22:09 2015 +0100 +++ b/viterbi.cpp Fri Sep 04 16:45:37 2015 +0100 @@ -20,11 +20,14 @@ /* initialise first frame */ for (int iState = 0; iState < nState; ++iState) { delta[iState] = init[iState] * obs[0][iState]; +// cerr << "init[" << iState << "] = " << init[iState] << ", obs[0][" << iState << "] = " << obs[0][iState] << endl; deltasum += delta[iState]; } for (int iState = 0; iState < nState; ++iState) delta[iState] /= deltasum; // normalise (scale) scale->push_back(1.0/deltasum); psi.push_back(vector<int>(nState,0)); + +// cerr << "nState = " << nState << ", deltasum = " << deltasum << endl; /* rest of the forward step */ for (int iFrame = 1; iFrame < nFrame; ++iFrame) {