diff src/Silvet.cpp @ 314:f98ba4f47e49 livemode

Merge from default branch
author Chris Cannam
date Tue, 28 Apr 2015 11:24:23 +0100
parents 5a181a427ac8 fa2ffbb786df
children f3e10617a60d
line wrap: on
line diff
--- a/src/Silvet.cpp	Mon Jan 19 09:33:35 2015 +0000
+++ b/src/Silvet.cpp	Tue Apr 28 11:24:23 2015 +0100
@@ -24,6 +24,7 @@
 #include "LiveInstruments.h"
 
 #include <vector>
+#include <future>
 
 #include <cstdio>
 
@@ -31,6 +32,9 @@
 using std::cout;
 using std::cerr;
 using std::endl;
+using std::pair;
+using std::future;
+using std::async;
 using Vamp::RealTime;
 
 static int processingSampleRate = 44100;
@@ -51,7 +55,8 @@
     m_mode(HighQualityMode),
     m_fineTuning(false),
     m_instrument(0),
-    m_colsPerSec(50)
+    m_colsPerSec(50),
+    m_haveStartTime(false)
 {
 }
 
@@ -295,6 +300,26 @@
     m_pitchOutputNo = list.size();
     list.push_back(d);
 
+    d.identifier = "chroma";
+    d.name = "Pitch chroma distribution";
+    d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
+    d.unit = "";
+    d.hasFixedBinCount = true;
+    d.binCount = 12;
+    d.binNames.clear();
+    if (m_cq) {
+        for (int i = 0; i < 12; ++i) {
+            d.binNames.push_back(chromaName(i));
+        }
+    }
+    d.hasKnownExtents = false;
+    d.isQuantized = false;
+    d.sampleType = OutputDescriptor::FixedSampleRate;
+    d.sampleRate = m_colsPerSec;
+    d.hasDuration = false;
+    m_chromaOutputNo = list.size();
+    list.push_back(d);
+
     d.identifier = "templates";
     d.name = "Templates";
     d.description = "Constant-Q spectral templates for the selected instrument pack.";
@@ -328,13 +353,19 @@
 }
 
 std::string
-Silvet::noteName(int note, int shift, int shiftCount) const
+Silvet::chromaName(int pitch) const
 {
     static const char *names[] = {
         "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
     };
 
-    const char *n = names[note % 12];
+    return names[pitch];
+}
+    
+std::string
+Silvet::noteName(int note, int shift, int shiftCount) const
+{
+    string n = chromaName(note % 12);
 
     int oct = (note + 9) / 12; 
     
@@ -348,11 +379,11 @@
     }
 
     if (pshift > 0.f) {
-        sprintf(buf, "%s%d+%dc", n, oct, int(round(pshift * 100)));
+        sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
     } else if (pshift < 0.f) {
-        sprintf(buf, "%s%d-%dc", n, oct, int(round((-pshift) * 100)));
+        sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
     } else {
-        sprintf(buf, "%s%d", n, oct);
+        sprintf(buf, "%s%d", n.c_str(), oct);
     }
 
     return buf;
@@ -492,6 +523,7 @@
     m_columnCount = 0;
     m_resampledCount = 0;
     m_startTime = RealTime::zeroTime;
+    m_haveStartTime = false;
 }
 
 Silvet::FeatureSet
@@ -499,8 +531,11 @@
 {
     FeatureSet fs;
     
-    if (m_columnCount == 0) {
+    if (!m_haveStartTime) {
+
         m_startTime = timestamp;
+        m_haveStartTime = true;
+
         insertTemplateFeatures(fs);
     }
 
@@ -597,9 +632,7 @@
 
     int width = filtered.size();
 
-    int iterations = (m_mode == HighQualityMode ? 20 : 10);
-
-    Grid localPitches(width, vector<double>(pack.templateNoteCount, 0.0));
+    Grid localPitches(width);
 
     bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
     int shiftCount = 1;
@@ -609,68 +642,63 @@
 
     vector<vector<int> > localBestShifts;
     if (wantShifts) {
-        localBestShifts = 
-            vector<vector<int> >(width, vector<int>(pack.templateNoteCount, 0));
+        localBestShifts = vector<vector<int> >(width);
     }
 
-    double columnThreshold = 1e-5;
+#ifndef MAX_EM_THREADS
+#define MAX_EM_THREADS 8
+#endif
 
-    if (m_mode == LiveMode) {
-        columnThreshold /= 20;
+#if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
+    for (int i = 0; i < width; ) {
+        typedef future<pair<vector<double>, vector<int>>> EMFuture;
+        vector<EMFuture> results;
+        for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) {
+            results.push_back
+                (async(std::launch::async,
+                       [&](int index) {
+                           return applyEM(pack, filtered.at(index), wantShifts);
+                       }, i + j));
+        }
+        for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) {
+            auto out = results[j].get();
+            localPitches[i+j] = out.first;
+            if (wantShifts) localBestShifts[i+j] = out.second;
+        }
+        i += MAX_EM_THREADS;
     }
-    
-#pragma omp parallel for
+#else
     for (int i = 0; i < width; ++i) {
-
-        double sum = 0.0;
-        for (int j = 0; j < pack.templateHeight; ++j) {
-            sum += filtered.at(i).at(j);
-        }
-        if (sum < columnThreshold) continue;
-
-        EM em(&pack, m_mode == HighQualityMode);
-
-        em.setPitchSparsity(pack.pitchSparsity);
-        em.setSourceSparsity(pack.sourceSparsity);
-
-        for (int j = 0; j < iterations; ++j) {
-            em.iterate(filtered.at(i).data());
-        }
-
-        const float *pitchDist = em.getPitchDistribution();
-        const float *const *shiftDist = em.getShifts();
-
-        for (int j = 0; j < pack.templateNoteCount; ++j) {
-
-            localPitches[i][j] = pitchDist[j] * sum;
-
-            int bestShift = 0;
-            float bestShiftValue = 0.0;
-            if (wantShifts) {
-                for (int k = 0; k < shiftCount; ++k) {
-                    float value = shiftDist[k][j];
-                    if (k == 0 || value > bestShiftValue) {
-                        bestShiftValue = value;
-                        bestShift = k;
-                    }
-                }
-                localBestShifts[i][j] = bestShift;
-            }                
-        }
+        auto out = applyEM(pack, filtered.at(i), wantShifts);
+        localPitches[i] = out.first;
+        if (wantShifts) localBestShifts[i] = out.second;
     }
+#endif
         
     for (int i = 0; i < width; ++i) {
 
+        // This returns a filtered column, and pushes the
+        // up-to-max-polyphony activation column to m_pianoRoll
         vector<double> filtered = postProcess
             (localPitches[i], localBestShifts[i], wantShifts);
 
+        RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
+        float inputGain = getInputGainAt(timestamp);
+
         Feature f;
         for (int j = 0; j < (int)filtered.size(); ++j) {
-            float v(filtered[j]);
+            float v = filtered[j];
             if (v < pack.levelThreshold) v = 0.f;
-            f.values.push_back(v);
+            f.values.push_back(v / inputGain);
         }
         fs[m_pitchOutputNo].push_back(f);
+
+        f.values.clear();
+        f.values.resize(12);
+        for (int j = 0; j < (int)filtered.size(); ++j) {
+            f.values[j % 12] += filtered[j] / inputGain;
+        }
+        fs[m_chromaOutputNo].push_back(f);
         
         FeatureList noteFeatures = noteTrack(shiftCount);
 
@@ -681,6 +709,66 @@
     }
 }
 
+pair<vector<double>, vector<int> >
+Silvet::applyEM(const InstrumentPack &pack,
+                const vector<double> &column,
+                bool wantShifts)
+{
+    double columnThreshold = 1e-5;
+    
+    if (m_mode == LiveMode) {
+        columnThreshold /= 20;
+    }
+    
+    vector<double> pitches(pack.templateNoteCount, 0.0);
+    vector<int> bestShifts;
+    
+    double sum = 0.0;
+    for (int j = 0; j < pack.templateHeight; ++j) {
+        sum += column.at(j);
+    }
+    if (sum < columnThreshold) return { pitches, bestShifts };
+
+    EM em(&pack, m_mode == HighQualityMode);
+
+    em.setPitchSparsity(pack.pitchSparsity);
+    em.setSourceSparsity(pack.sourceSparsity);
+
+    int iterations = (m_mode == HighQualityMode ? 20 : 10);
+
+    for (int j = 0; j < iterations; ++j) {
+        em.iterate(column.data());
+    }
+
+    const float *pitchDist = em.getPitchDistribution();
+    const float *const *shiftDist = em.getShifts();
+
+    int shiftCount = 1;
+    if (wantShifts) {
+        shiftCount = pack.templateMaxShift * 2 + 1;
+    }
+    
+    for (int j = 0; j < pack.templateNoteCount; ++j) {
+
+        pitches[j] = pitchDist[j] * sum;
+
+        int bestShift = 0;
+        float bestShiftValue = 0.0;
+        if (wantShifts) {
+            for (int k = 0; k < shiftCount; ++k) {
+                float value = shiftDist[k][j];
+                if (k == 0 || value > bestShiftValue) {
+                    bestShiftValue = value;
+                    bestShift = k;
+                }
+            }
+            bestShifts.push_back(bestShift);
+        }                
+    }
+
+    return { pitches, bestShifts };
+}
+
 Silvet::Grid
 Silvet::preProcess(const Grid &in)
 {
@@ -943,6 +1031,16 @@
     }
 }
 
+RealTime
+Silvet::getColumnTimestamp(int column)
+{
+    double columnDuration = 1.0 / m_colsPerSec;
+    int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
+
+    return m_startTime + RealTime::fromSeconds
+        (columnDuration * (column - postFilterLatency) + 0.02);
+}
+
 Silvet::Feature
 Silvet::makeNoteFeature(int start,
                         int end,
@@ -951,18 +1049,13 @@
                         int shiftCount,
                         int velocity)
 {
-    double columnDuration = 1.0 / m_colsPerSec;
-    int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
-
     Feature f;
 
     f.hasTimestamp = true;
-    f.timestamp = m_startTime + RealTime::fromSeconds
-        (columnDuration * (start - postFilterLatency) + 0.02);
+    f.timestamp = getColumnTimestamp(start);
 
     f.hasDuration = true;
-    f.duration = RealTime::fromSeconds
-        (columnDuration * (end - start));
+    f.duration = getColumnTimestamp(end) - f.timestamp;
 
     f.values.clear();