changeset 325:4cf4313d7e30 livemode

Always use q=0.8 and accept the hit on speed -- the templates are made for that configuration and it does work better. Also some adjustments to thresholding and peak picking for live mode in particular.
author Chris Cannam
date Mon, 18 May 2015 13:58:27 +0100
parents 71ffe35578fa
children caaac814c22a
files src/LiveInstruments.cpp src/Silvet.cpp src/Silvet.h testdata/evaluation/run.sh
diffstat 4 files changed, 44 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/src/LiveInstruments.cpp	Mon May 18 11:06:14 2015 +0100
+++ b/src/LiveInstruments.cpp	Mon May 18 13:58:27 2015 +0100
@@ -93,7 +93,7 @@
     live.maxPolyphony = original.maxPolyphony;
     live.pitchSparsity = original.pitchSparsity;
     live.sourceSparsity = original.sourceSparsity;
-    live.levelThreshold = original.levelThreshold / 20;
+    live.levelThreshold = original.levelThreshold / 15;
 
     return live;
 }
--- a/src/Silvet.cpp	Mon May 18 11:06:14 2015 +0100
+++ b/src/Silvet.cpp	Mon May 18 13:58:27 2015 +0100
@@ -512,14 +512,8 @@
                         maxFreq,
                         bpo);
 
-    // For params.q, the MIREX code uses 0.8, but it seems that with
-    // atomHopFactor of 0.3, using q == 0.9 or lower drops the FFT
-    // size to 512 from 1024 and alters some other processing
-    // parameters, making everything much, much slower. Could be a
-    // flaw in the CQ parameter calculations, must check. For
-    // atomHopFactor == 1, q == 0.8 is fine
-    params.q = (m_mode == HighQualityMode ? 0.95 : 0.8);
-    params.atomHopFactor = (m_mode == HighQualityMode ? 0.3 : 1.0);
+    params.q = 0.8;
+    params.atomHopFactor = (m_mode == LiveMode ? 1.0 : 0.3);
     params.threshold = 0.0005;
     params.decimator =
         (m_mode == LiveMode ?
@@ -645,16 +639,32 @@
     
     const InstrumentPack &pack(getPack(m_instrument));
 
-    for (int i = 0; i < (int)filtered.size(); ++i) {
+    int width = filtered.size();
+
+    double silenceThreshold = 0.01;
+    
+    for (int i = 0; i < width; ++i) {
+
+        RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1 + i);
+        float inputGain = getInputGainAt(timestamp);
+
         Feature f;
+        double rms = 0.0;
+
         for (int j = 0; j < pack.templateHeight; ++j) {
-            f.values.push_back(float(filtered[i][j]));
+            double v = filtered[i][j];
+            rms += v * v;
+            f.values.push_back(float(v));
         }
+
+        rms = sqrt(rms / pack.templateHeight);
+        if (rms / inputGain < silenceThreshold) {
+            filtered[i].clear();
+        }
+        
         fs[m_fcqOutputNo].push_back(f);
     }
-
-    int width = filtered.size();
-
+    
     Grid localPitches(width);
 
     bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
@@ -687,7 +697,8 @@
                 results.push_back
                     (async(std::launch::async,
                            [&](int index) {
-                               return applyEM(pack, filtered.at(index), wantShifts);
+                               return applyEM
+                                   (pack, filtered.at(index), wantShifts);
                            }, i + j));
             }
             for (int j = 0; j < emThreadCount && i + j < width; ++j) {
@@ -763,11 +774,13 @@
     double columnThreshold = 1e-5;
     
     if (m_mode == LiveMode) {
-        columnThreshold /= 20;
+        columnThreshold /= 15;
     }
     
     vector<double> pitches(pack.templateNoteCount, 0.0);
     vector<int> bestShifts;
+
+    if (column.empty()) return { pitches, bestShifts };
     
     double sum = 0.0;
     for (int j = 0; j < pack.templateHeight; ++j) {
@@ -923,14 +936,18 @@
         // In live mode with only a 12-bpo CQ, we are very likely to
         // get clusters of two or three high scores at a time for
         // neighbouring semitones. Eliminate these by picking only the
-        // peaks. This means we can't recognise actual semitone chords
-        // if they ever appear, but it's not as if live mode is good
-        // enough for that to be a big deal anyway.
+        // peaks (except that we never eliminate a note that has
+        // already been established as currently playing). This means
+        // we can't recognise actual semitone chords if they ever
+        // appear, but it's not as if live mode is good enough for
+        // that to be a big deal anyway.
         if (m_mode == LiveMode) {
-            if (j == 0 ||
-                j + 1 == pack.templateNoteCount ||
-                pitches[j] < pitches[j-1] ||
-                pitches[j] < pitches[j+1]) {
+            if (m_current.find(j) == m_current.end() &&
+                (j == 0 ||
+                 j + 1 == pack.templateNoteCount ||
+                 pitches[j] < pitches[j-1] ||
+                 pitches[j] < pitches[j+1])) {
+                // not a peak or a currently-playing note: skip it
                 continue;
             }
         }
@@ -1016,11 +1033,13 @@
         }
 
         if (duration == durationThreshold) {
+            m_current.insert(note);
             emitOnset(start, note, shiftCount, onsetFeatures);
         }            
         
         if (active.find(note) == active.end()) {
             // the note was playing but just ended
+            m_current.erase(note);
             emitNote(start, end, note, shiftCount, noteFeatures);
         }
     }
--- a/src/Silvet.h	Mon May 18 11:06:14 2015 +0100
+++ b/src/Silvet.h	Mon May 18 13:58:27 2015 +0100
@@ -105,6 +105,7 @@
     vector<map<int, double> > m_pianoRoll;
     vector<map<int, int> > m_pianoRollShifts;
     map<Vamp::RealTime, float> m_inputGains;
+    set<int> m_current;
 
     Grid preProcess(const Grid &);
 
--- a/testdata/evaluation/run.sh	Mon May 18 11:06:14 2015 +0100
+++ b/testdata/evaluation/run.sh	Mon May 18 13:58:27 2015 +0100
@@ -98,7 +98,7 @@
 	--csv-one-file "$outfile" \
 	--csv-force \
 	--transform "$transfile" \
-	"$tmpwav"
+	"$tmpwav" 2>/dev/null
 
     cat "$outfile" | \
 	sed 's/^[^,]*,//' | \