Mercurial > hg > nnls-chroma

--- a/Chordino.cpp	Fri Sep 04 12:22:09 2015 +0100
+++ b/Chordino.cpp	Fri Sep 04 16:45:37 2015 +0100
@@ -121,7 +121,7 @@
     list.push_back(whiteningParam);

     ParameterDescriptor spectralShapeParam;
-    spectralShapeParam.identifier = "spectralshape";
+    spectralShapeParam.identifier = "s";
     spectralShapeParam.name = "spectral shape";
     spectralShapeParam.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
     spectralShapeParam.unit = "";
@@ -167,6 +167,9 @@

     int index = 0;

+    float featureRate =
+        (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+
     OutputDescriptor d7;
     d7.identifier = "simplechord";
     d7.name = "Chord Estimate";
@@ -178,7 +181,7 @@
     d7.isQuantized = false;
     d7.sampleType = OutputDescriptor::VariableSampleRate;
     d7.hasDuration = false;
-    d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    d7.sampleRate = featureRate;
     list.push_back(d7);
     m_outputChords = index++;

@@ -196,7 +199,7 @@
     chordnotes.quantizeStep = 1;
     chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
     chordnotes.hasDuration = true;
-    chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    chordnotes.sampleRate = featureRate;
     list.push_back(chordnotes);
     m_outputChordnotes = index++;

@@ -210,6 +213,7 @@
     d8.hasKnownExtents = false;
     d8.isQuantized = false;
     d8.sampleType = OutputDescriptor::FixedSampleRate;
+    d8.sampleRate = featureRate;
     d8.hasDuration = false;
     list.push_back(d8);
     m_outputHarmonicChange = index++;
@@ -224,6 +228,7 @@
     loglikelihood.hasKnownExtents = false;
     loglikelihood.isQuantized = false;
     loglikelihood.sampleType = OutputDescriptor::FixedSampleRate;
+    loglikelihood.sampleRate = featureRate;
     loglikelihood.hasDuration = false;
     list.push_back(loglikelihood);
     m_outputLoglikelihood = index++;
@@ -368,7 +373,7 @@

     FeatureList chromaList;

-
+    bool clipwarned = false;

     for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
         Feature currentTunedSpec = *it; // logfreq spectrum
@@ -449,7 +454,7 @@
         vector<float> origchroma = chroma;
         chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
         currentChromas.values = chroma;
-
+
         if (m_doNormalizeChroma > 0) {
             vector<float> chromanorm = vector<float>(3,0);
             switch (int(m_doNormalizeChroma)) {
@@ -489,15 +494,22 @@
         for (int iChord = 0; iChord < nChord; iChord++) {
             tempchordvalue = 0;
             for (int iBin = 0; iBin < 12; iBin++) {
-                tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
+                tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
             }
             for (int iBin = 12; iBin < 24; iBin++) {
                 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
             }
             if (iChord == nChord-1) tempchordvalue *= .7;
             if (tempchordvalue < 0) tempchordvalue = 0.0;
-            tempchordvalue = pow(1.3,tempchordvalue);
-            sumchordvalue+=tempchordvalue;
+            if (tempchordvalue > 20.0) {
+                if (!clipwarned) {
+                    cerr << "WARNING: interim chroma contains extreme chord value " << tempchordvalue << ", clipping this and any others that appear" << endl;
+                    clipwarned = true;
+                }
+                tempchordvalue = 10.0;
+            }
+            tempchordvalue = pow(1.3, tempchordvalue);
+            sumchordvalue += tempchordvalue;
             currentChordSalience.push_back(tempchordvalue);
         }
         if (sumchordvalue > 0) {
@@ -568,7 +580,11 @@
         }
         /* calculating simple chord change prob */
         for (int iChord = 0; iChord < nChord; iChord++) {
-            chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
+            double num = delta[(iFrame-1) * nChord + iChord];
+            double denom = delta[iFrame * nChord + iChord];
+            double eps = 1e-7;
+            if (denom < eps) denom = eps;
+            chordchange[iFrame-1] += num * log(num / denom + eps);
         }
     }

@@ -601,7 +617,7 @@
         chordchange_feature.hasTimestamp = true;
         chordchange_feature.timestamp = timestamps[iFrame];
         chordchange_feature.values.push_back(chordchange[iFrame]);
-        // cerr << chordchange[iFrame] << endl;
+//        cerr << "putting value " << chordchange[iFrame] << " at time " << chordchange_feature.timestamp << endl;
         fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
     }
--- a/NNLSBase.cpp	Fri Sep 04 12:22:09 2015 +0100
+++ b/NNLSBase.cpp	Fri Sep 04 16:45:37 2015 +0100
@@ -267,6 +267,8 @@
 void
 NNLSBase::setParameter(string identifier, float value)
 {
+//    cerr << "setParameter (" << identifier << ") -> " << value << endl;
+
     if (debug_on) cerr << "--> setParameter" << endl;
     if (identifier == "useNNLS") {
         m_useNNLS = (int) value;
--- a/NNLSChroma.cpp	Fri Sep 04 12:22:09 2015 +0100
+++ b/NNLSChroma.cpp	Fri Sep 04 16:45:37 2015 +0100
@@ -84,6 +84,9 @@

     int index = 0;

+    float featureRate =
+        (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+
     OutputDescriptor logfreqspecOutput;
     logfreqspecOutput.identifier = "logfreqspec";
     logfreqspecOutput.name = "Log-Frequency Spectrum";
@@ -95,7 +98,7 @@
     logfreqspecOutput.isQuantized = false;
     logfreqspecOutput.sampleType = OutputDescriptor::FixedSampleRate;
     logfreqspecOutput.hasDuration = false;
-    logfreqspecOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    logfreqspecOutput.sampleRate = featureRate;
     list.push_back(logfreqspecOutput);
     m_outputLogfreqspec = index++;

@@ -110,7 +113,7 @@
     tunedlogfreqspecOutput.isQuantized = false;
     tunedlogfreqspecOutput.sampleType = OutputDescriptor::FixedSampleRate;
     tunedlogfreqspecOutput.hasDuration = false;
-    tunedlogfreqspecOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    tunedlogfreqspecOutput.sampleRate = featureRate;
     list.push_back(tunedlogfreqspecOutput);
     m_outputTunedlogfreqspec = index++;

@@ -125,7 +128,7 @@
     semitonespectrumOutput.isQuantized = false;
     semitonespectrumOutput.sampleType = OutputDescriptor::FixedSampleRate;
     semitonespectrumOutput.hasDuration = false;
-    semitonespectrumOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    semitonespectrumOutput.sampleRate = featureRate;
     list.push_back(semitonespectrumOutput);
     m_outputSemitonespectrum = index++;

@@ -141,7 +144,7 @@
     chromaOutput.isQuantized = false;
     chromaOutput.sampleType = OutputDescriptor::FixedSampleRate;
     chromaOutput.hasDuration = false;
-    chromaOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    chromaOutput.sampleRate = featureRate;
     list.push_back(chromaOutput);
     m_outputChroma = index++;

@@ -157,7 +160,7 @@
     basschromaOutput.isQuantized = false;
     basschromaOutput.sampleType = OutputDescriptor::FixedSampleRate;
     basschromaOutput.hasDuration = false;
-    basschromaOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    basschromaOutput.sampleRate = featureRate;
     list.push_back(basschromaOutput);
     m_outputBasschroma = index++;

@@ -173,7 +176,7 @@
     bothchromaOutput.isQuantized = false;
     bothchromaOutput.sampleType = OutputDescriptor::FixedSampleRate;
     bothchromaOutput.hasDuration = false;
-    bothchromaOutput.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    bothchromaOutput.sampleRate = featureRate;
     list.push_back(bothchromaOutput);
     m_outputBothchroma = index++;
     return list;
--- a/Tuning.cpp	Fri Sep 04 12:22:09 2015 +0100
+++ b/Tuning.cpp	Fri Sep 04 16:45:37 2015 +0100
@@ -126,8 +126,8 @@
     d10.maxValue = 452.89;
     d10.isQuantized = false;
     d10.sampleType = OutputDescriptor::FixedSampleRate;
+    d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
     d10.hasDuration = false;
-    // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
     list.push_back(d10);
     m_outputLocalTuning = index++;
--- a/viterbi.cpp	Fri Sep 04 12:22:09 2015 +0100
+++ b/viterbi.cpp	Fri Sep 04 16:45:37 2015 +0100
@@ -20,11 +20,14 @@
     /* initialise first frame */
     for (int iState = 0; iState < nState; ++iState) {
         delta[iState] = init[iState] * obs[0][iState];
+//	cerr << "init[" << iState << "] = " << init[iState] << ", obs[0][" << iState << "] = " << obs[0][iState] << endl;
         deltasum += delta[iState];
     }
     for (int iState = 0; iState < nState; ++iState) delta[iState] /= deltasum; // normalise (scale)
     scale->push_back(1.0/deltasum);
     psi.push_back(vector<int>(nState,0));
+
+//    cerr << "nState = " << nState << ", deltasum = " << deltasum << endl;

     /* rest of the forward step */
     for (int iFrame = 1; iFrame < nFrame; ++iFrame) {