changeset 331:e8e37f471650 livemode

Merge
author Chris Cannam
date Wed, 29 Apr 2015 18:58:00 +0100
parents 8f5cfd7dbaa5 (current diff) 6f8fa7fc8fdc (diff)
children 1272070a1011
files src/Silvet.cpp
diffstat 3 files changed, 211 insertions(+), 112 deletions(-) [+]
line wrap: on
line diff
--- a/src/LiveInstruments.cpp	Tue Apr 28 18:56:54 2015 +0100
+++ b/src/LiveInstruments.cpp	Wed Apr 29 18:58:00 2015 +0100
@@ -27,47 +27,57 @@
     vector<InstrumentPack::Templates> templates;
 
 //            cerr << "LiveAdapter: reduced template height is " << SILVET_TEMPLATE_HEIGHT/5 << endl;
-            
-    for (vector<InstrumentPack::Templates>::const_iterator i =
-	     original.templates.begin();
-	 i != original.templates.end(); ++i) {
 
-	InstrumentPack::Templates t;
-	t.lowestNote = i->lowestNote;
-	t.highestNote = i->highestNote;
-	t.data.resize(i->data.size());
+    bool merge = false;
+    // The live template for piano has only one piano in it, so as
+    // to process faster. We make it by averaging the originals
+    if (original.name == "Piano") {
+        merge = true;
+    }
 
-	for (int j = 0; j < int(i->data.size()); ++j) {
+    InstrumentPack::Templates t;
+    bool first = true;
+    
+    for (const auto &origt: original.templates) {
+
+	t.lowestNote = origt.lowestNote;
+	t.highestNote = origt.highestNote;
+        t.data.resize(origt.data.size());
+
+	for (int j = 0; j < int(origt.data.size()); ++j) {
 
 	    t.data[j].resize(SILVET_TEMPLATE_HEIGHT/5);
 
-            float sum = 0.f;
-
 	    for (int k = 0; k < SILVET_TEMPLATE_HEIGHT/5; ++k) {
 
-                t.data[j][k] = 0.f;
+                if (!merge || first) {
+                    t.data[j][k] = 0.f;
+                }
 
                 for (int m = 0; m < 5; ++m) {
-                    t.data[j][k] += i->data[j][k * 5 + m + 2];
+                    t.data[j][k] += origt.data[j][k * 5 + m + 2];
                 }
-                
-                sum += t.data[j][k];
 	    }
-            
-	    // re-normalise
-            if (sum > 0.f) {
-                for (int k = 0; k < (int)t.data[j].size(); ++k) {
-                    t.data[j][k] *= 1.f / sum;
-                }
-            }
 	}
 
-	templates.push_back(t);
+        if (!merge) {
+            templates.push_back(t);
+            t = InstrumentPack::Templates();
+        }
 
-        // The live template for piano has only one piano in it, so as
-        // to process faster
-        if (original.name == "Piano") {
-            break;
+        first = false;
+    }
+
+    if (merge) {
+        templates.push_back(t);
+    }
+
+    // re-normalise
+    for (auto &t: templates) {
+        for (auto &d: t.data) {
+            float sum = 0.f;
+            for (auto v: d) sum += v;
+            for (auto &v: d) v /= sum;
         }
     }
     
--- a/src/Silvet.cpp	Tue Apr 28 18:56:54 2015 +0100
+++ b/src/Silvet.cpp	Wed Apr 29 18:58:00 2015 +0100
@@ -253,6 +253,22 @@
     m_notesOutputNo = list.size();
     list.push_back(d);
 
+    d.identifier = "onsets";
+    d.name = "Note onsets";
+    d.description = "Note onsets, without durations. These can be calculated sooner than complete notes, because it isn't necessary to wait for a note to finish before returning its feature. Each event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
+    d.unit = "Hz";
+    d.hasFixedBinCount = true;
+    d.binCount = 2;
+    d.binNames.push_back("Frequency");
+    d.binNames.push_back("Velocity");
+    d.hasKnownExtents = false;
+    d.isQuantized = false;
+    d.sampleType = OutputDescriptor::VariableSampleRate;
+    d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
+    d.hasDuration = false;
+    m_onsetsOutputNo = list.size();
+    list.push_back(d);
+
     d.identifier = "timefreq";
     d.name = "Time-frequency distribution";
     d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
@@ -291,7 +307,7 @@
     d.binNames.clear();
     if (m_cq) {
         for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
-            d.binNames.push_back(noteName(i, 0, 1));
+            d.binNames.push_back(getNoteName(i, 0, 1));
         }
     }
     d.hasKnownExtents = false;
@@ -311,7 +327,7 @@
     d.binNames.clear();
     if (m_cq) {
         for (int i = 0; i < 12; ++i) {
-            d.binNames.push_back(chromaName(i));
+            d.binNames.push_back(getChromaName(i));
         }
     }
     d.hasKnownExtents = false;
@@ -355,7 +371,7 @@
 }
 
 std::string
-Silvet::chromaName(int pitch) const
+Silvet::getChromaName(int pitch) const
 {
     static const char *names[] = {
         "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
@@ -365,9 +381,9 @@
 }
     
 std::string
-Silvet::noteName(int note, int shift, int shiftCount) const
+Silvet::getNoteName(int note, int shift, int shiftCount) const
 {
-    string n = chromaName(note % 12);
+    string n = getChromaName(note % 12);
 
     int oct = (note + 9) / 12; 
     
@@ -375,7 +391,7 @@
 
     float pshift = 0.f;
     if (shiftCount > 1) {
-        // see noteFrequency below
+        // see getNoteFrequency below
         pshift = 
             float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
     }
@@ -392,7 +408,7 @@
 }
 
 float
-Silvet::noteFrequency(int note, int shift, int shiftCount) const
+Silvet::getNoteFrequency(int note, int shift, int shiftCount) const
 {
     // Convert shift number to a pitch shift. The given shift number
     // is an offset into the template array, which starts with some
@@ -694,10 +710,12 @@
         
     for (int i = 0; i < width; ++i) {
 
-        // This returns a filtered column, and pushes the
-        // up-to-max-polyphony activation column to m_pianoRoll
-        vector<double> filtered = postProcess
-            (localPitches[i], localBestShifts[i], wantShifts);
+        vector<double> filtered;
+
+        for (int j = 0; j < pack.templateNoteCount; ++j) {
+            m_postFilter[j]->push(localPitches[i][j]);
+            filtered.push_back(m_postFilter[j]->get());
+        }
 
         RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
         float inputGain = getInputGainAt(timestamp);
@@ -716,13 +734,24 @@
             f.values[j % 12] += filtered[j] / inputGain;
         }
         fs[m_chromaOutputNo].push_back(f);
-        
-        FeatureList noteFeatures = noteTrack(shiftCount);
 
+        // This pushes the up-to-max-polyphony activation column to
+        // m_pianoRoll
+        postProcess(filtered, localBestShifts[i], wantShifts);
+
+        auto events = noteTrack(shiftCount);
+
+        FeatureList noteFeatures = events.first;
         for (FeatureList::const_iterator fi = noteFeatures.begin();
              fi != noteFeatures.end(); ++fi) {
             fs[m_notesOutputNo].push_back(*fi);
         }
+
+        FeatureList onsetFeatures = events.second;
+        for (FeatureList::const_iterator fi = onsetFeatures.begin();
+             fi != onsetFeatures.end(); ++fi) {
+            fs[m_onsetsOutputNo].push_back(*fi);
+        }
     }
 }
 
@@ -873,37 +902,13 @@
     return out;
 }
     
-vector<double>
+void
 Silvet::postProcess(const vector<double> &pitches,
                     const vector<int> &bestShifts,
                     bool wantShifts)
 {
     const InstrumentPack &pack(getPack(m_instrument));
 
-    vector<double> filtered;
-
-    for (int j = 0; j < pack.templateNoteCount; ++j) {
-        m_postFilter[j]->push(pitches[j]);
-        filtered.push_back(m_postFilter[j]->get());
-    }
-
-    if (m_mode == LiveMode) {
-        // In live mode with only a 12-bpo CQ, we are very likely to
-        // get clusters of two or three high scores at a time for
-        // neighbouring semitones. Eliminate these by picking only the
-        // peaks. This means we can't recognise actual semitone chords
-        // if they ever appear, but it's not as if live mode is good
-        // enough for that to be a big deal anyway.
-        for (int j = 0; j < pack.templateNoteCount; ++j) {
-            if (j > 0 && j + 1 < pack.templateNoteCount &&
-                filtered[j] >= filtered[j-1] &&
-                filtered[j] >= filtered[j+1]) {
-            } else {
-                filtered[j] = 0.0;
-            }
-        }
-    }
-
     // Threshold for level and reduce number of candidate pitches
 
     typedef std::multimap<double, int> ValueIndexMap;
@@ -911,8 +916,25 @@
     ValueIndexMap strengths;
 
     for (int j = 0; j < pack.templateNoteCount; ++j) {
-        double strength = filtered[j];
+
+        double strength = pitches[j];
         if (strength < pack.levelThreshold) continue;
+        
+        // In live mode with only a 12-bpo CQ, we are very likely to
+        // get clusters of two or three high scores at a time for
+        // neighbouring semitones. Eliminate these by picking only the
+        // peaks. This means we can't recognise actual semitone chords
+        // if they ever appear, but it's not as if live mode is good
+        // enough for that to be a big deal anyway.
+        if (m_mode == LiveMode) {
+            if (j == 0 ||
+                j + 1 == pack.templateNoteCount ||
+                pitches[j] < pitches[j-1] ||
+                pitches[j] < pitches[j+1]) {
+                continue;
+            }
+        }
+
         strengths.insert(ValueIndexMap::value_type(strength, j));
     }
 
@@ -941,10 +963,10 @@
         m_pianoRollShifts.push_back(activeShifts);
     }
 
-    return filtered;
+    return;
 }
 
-Vamp::Plugin::FeatureList
+pair<Vamp::Plugin::FeatureList, Vamp::Plugin::FeatureList>
 Silvet::noteTrack(int shiftCount)
 {        
     // Minimum duration pruning, and conversion to notes. We can only
@@ -961,13 +983,15 @@
     double columnDuration = 1.0 / m_colsPerSec;
 
     // only keep notes >= 100ms or thereabouts
-    int durationThreshold = floor(0.1 / columnDuration); // columns
+    double durationThrSec = 0.1;
+    if (m_mode == LiveMode) durationThrSec = 0.07;
+    int durationThreshold = floor(durationThrSec / columnDuration); // in cols
     if (durationThreshold < 1) durationThreshold = 1;
 
-    FeatureList noteFeatures;
+    FeatureList noteFeatures, onsetFeatures;
 
     if (width < durationThreshold + 1) {
-        return noteFeatures;
+        return { noteFeatures, onsetFeatures };
     }
     
     //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
@@ -976,13 +1000,7 @@
          ni != m_pianoRoll[width-1].end(); ++ni) {
 
         int note = ni->first;
-        
-        if (active.find(note) != active.end()) {
-            // the note is still playing
-            continue;
-        }
 
-        // the note was playing but just ended
         int end = width;
         int start = end-1;
 
@@ -991,16 +1009,25 @@
         }
         ++start;
 
-        if ((end - start) < durationThreshold) {
+        int duration = end - start;
+            
+        if (duration < durationThreshold) {
             continue;
         }
 
-        emitNote(start, end, note, shiftCount, noteFeatures);
+        if (duration == durationThreshold) {
+            emitOnset(start, note, shiftCount, onsetFeatures);
+        }            
+        
+        if (active.find(note) == active.end()) {
+            // the note was playing but just ended
+            emitNote(start, end, note, shiftCount, noteFeatures);
+        }
     }
 
 //    cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
 
-    return noteFeatures;
+    return { noteFeatures, onsetFeatures };
 }
 
 void
@@ -1009,7 +1036,7 @@
 {
     int partStart = start;
     int partShift = 0;
-    int partVelocity = 0;
+    double partStrength = 0;
 
     int partThreshold = floor(0.05 * m_colsPerSec);
 
@@ -1037,21 +1064,15 @@
                                                        note,
                                                        partShift,
                                                        shiftCount,
-                                                       partVelocity));
+                                                       partStrength));
                 partStart = i;
                 partShift = shift;
-                partVelocity = 0;
+                partStrength = 0;
             }
         }
 
-        int v;
-        if (m_mode == LiveMode) {
-            v = round(strength * 20);
-        } else {
-            v = round(strength * 2);
-        }
-        if (v > partVelocity) {
-            partVelocity = v;
+        if (strength > partStrength) {
+            partStrength = strength;
         }
     }
 
@@ -1061,10 +1082,37 @@
                                                note,
                                                partShift,
                                                shiftCount,
-                                               partVelocity));
+                                               partStrength));
     }
 }
 
+void
+Silvet::emitOnset(int start, int note, int shiftCount,
+                  FeatureList &onsetFeatures)
+{
+    int len = int(m_pianoRoll.size());
+
+    double onsetStrength = 0;
+
+    int shift = 0;
+    if (shiftCount > 1) {
+        shift = m_pianoRollShifts[start][note];
+    }
+    
+    for (int i = start; i < len; ++i) {
+        double strength = m_pianoRoll[i][note];
+        if (strength > onsetStrength) {
+            onsetStrength = strength;
+        }
+    }
+
+    onsetFeatures.push_back(makeOnsetFeature(start,
+                                             note,
+                                             shift,
+                                             shiftCount,
+                                             onsetStrength));
+}
+
 RealTime
 Silvet::getColumnTimestamp(int column)
 {
@@ -1081,7 +1129,7 @@
                         int note,
                         int shift,
                         int shiftCount,
-                        int velocity)
+                        double strength)
 {
     Feature f;
 
@@ -1092,22 +1140,55 @@
     f.duration = getColumnTimestamp(end) - f.timestamp;
 
     f.values.clear();
+    f.values.push_back(getNoteFrequency(note, shift, shiftCount));
+    f.values.push_back(getVelocityFor(strength, start));
 
-    f.values.push_back
-        (noteFrequency(note, shift, shiftCount));
-
-    float inputGain = getInputGainAt(f.timestamp);
-//    cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
-    velocity = round(velocity / inputGain);
-    if (velocity > 127) velocity = 127;
-    if (velocity < 1) velocity = 1;
-    f.values.push_back(velocity);
-
-    f.label = noteName(note, shift, shiftCount);
+    f.label = getNoteName(note, shift, shiftCount);
 
     return f;
 }
 
+Silvet::Feature
+Silvet::makeOnsetFeature(int start,
+                         int note,
+                         int shift,
+                         int shiftCount,
+                         double strength)
+{
+    Feature f;
+
+    f.hasTimestamp = true;
+    f.timestamp = getColumnTimestamp(start);
+
+    f.hasDuration = false;
+
+    f.values.clear();
+    f.values.push_back(getNoteFrequency(note, shift, shiftCount));
+    f.values.push_back(getVelocityFor(strength, start));
+
+    f.label = getNoteName(note, shift, shiftCount);
+
+    return f;
+}
+
+int
+Silvet::getVelocityFor(double strength, int column)
+{
+    RealTime rt = getColumnTimestamp(column + 1);
+
+    float inputGain = getInputGainAt(rt);
+
+    double scale = 2.0;
+    if (m_mode == LiveMode) scale = 20.0;
+        
+    double velocity = round((strength * scale) / inputGain);
+    
+    if (velocity > 127.0) velocity = 127.0;
+    if (velocity < 1.0) velocity = 1.0; // assume surpassed 0 threshold already
+
+    return int(velocity);
+}
+
 float
 Silvet::getInputGainAt(RealTime t)
 {
--- a/src/Silvet.h	Tue Apr 28 18:56:54 2015 +0100
+++ b/src/Silvet.h	Wed Apr 29 18:58:00 2015 +0100
@@ -112,29 +112,36 @@
                                                     const vector<double> &column,
                                                     bool wantShifts);
     
-    vector<double> postProcess(const vector<double> &pitches,
-                               const vector<int> &bestShifts,
-                               bool wantShifts); // -> piano roll column
+    void postProcess(const vector<double> &pitches,
+                     const vector<int> &bestShifts,
+                     bool wantShifts); // -> piano roll column
 
-    FeatureList noteTrack(int shiftCount);
+    std::pair<FeatureList, FeatureList> noteTrack(int shiftCount); // notes, onsets
 
     void emitNote(int start, int end, int note, int shiftCount,
                   FeatureList &noteFeatures);
+
+    void emitOnset(int start, int note, int shiftCount,
+                  FeatureList &noteFeatures);
     
     Vamp::RealTime getColumnTimestamp(int column);
     
     Feature makeNoteFeature(int start, int end, int note, int shift,
-                            int shiftCount, int velocity);
+                            int shiftCount, double strength);
+    Feature makeOnsetFeature(int start, int note, int shift,
+                             int shiftCount, double strength);
 
+    int getVelocityFor(double strength, int column);
+    
     float getInputGainAt(Vamp::RealTime t);
 
     void insertTemplateFeatures(FeatureSet &);
     
     void transcribe(const Grid &, FeatureSet &);
 
-    string chromaName(int n) const;
-    string noteName(int n, int shift, int shiftCount) const;
-    float noteFrequency(int n, int shift, int shiftCount) const;
+    string getChromaName(int n) const;
+    string getNoteName(int n, int shift, int shiftCount) const;
+    float getNoteFrequency(int n, int shift, int shiftCount) const;
 
     int m_blockSize;
     int m_columnCount;
@@ -143,6 +150,7 @@
     bool m_haveStartTime;
 
     mutable int m_notesOutputNo;
+    mutable int m_onsetsOutputNo;
     mutable int m_fcqOutputNo;
     mutable int m_pitchOutputNo;
     mutable int m_templateOutputNo;