changeset 330:8f5cfd7dbaa5 livemode

Merge
author Chris Cannam
date Tue, 28 Apr 2015 18:56:54 +0100
parents 447ccdbfc6c0 (current diff) ba02cdc839db (diff)
children e8e37f471650
files src/Silvet.cpp
diffstat 9 files changed, 249 insertions(+), 85 deletions(-) [+]
line wrap: on
line diff
--- a/.hgsubstate	Tue Apr 28 18:56:34 2015 +0100
+++ b/.hgsubstate	Tue Apr 28 18:56:54 2015 +0100
@@ -1,3 +1,3 @@
-352579430f81c79a550741a97bb1f96fea05da75 bqvec
-4aa3d343f5fcc21c821c0f2f633980da2d8fc8bd constant-q-cpp
+7a48704e9a0fac1486240f9f7b7e31436a588064 bqvec
+56b5bb2893afc9748b1e1ce8b8b2c31b5863b728 constant-q-cpp
 d25a2e91e9d84aaff25e5d746398232d182d127d flattendynamics
--- a/Makefile.inc	Tue Apr 28 18:56:34 2015 +0100
+++ b/Makefile.inc	Tue Apr 28 18:56:54 2015 +0100
@@ -13,7 +13,7 @@
 CC	?= gcc
 
 CFLAGS := $(CFLAGS)
-CXXFLAGS := $(CFLAGS) -I. -I$(VAMPSDK_DIR) -I$(CQ_DIR) -I$(BQVEC_DIR) -I$(FD_DIR) $(CXXFLAGS)
+CXXFLAGS := $(CFLAGS) -I. -I$(VAMPSDK_DIR) -I$(CQ_DIR) -I$(BQVEC_DIR) -I$(BQVEC_DIR) -I$(FD_DIR) $(CXXFLAGS)
 
 LDFLAGS := $(LDFLAGS) 
 PLUGIN_LDFLAGS := $(LDFLAGS) $(PLUGIN_LDFLAGS)
@@ -23,7 +23,7 @@
 PLUGIN_HEADERS := $(SRC_DIR)/Silvet.h $(SRC_DIR)/EM.h $(SRC_DIR)/Instruments.h $(SRC_DIR)/LiveInstruments.h
 PLUGIN_SOURCES := $(SRC_DIR)/Silvet.cpp $(SRC_DIR)/EM.cpp $(SRC_DIR)/Instruments.cpp $(SRC_DIR)/LiveInstruments.cpp $(SRC_DIR)/libmain.cpp
 
-BQVEC_HEADERS	:= $(BQVEC_DIR)/bqvec/Allocators.h $(BQVEC_DIR)/bqvec/Restrict.h $(BQVEC_DIR)/bqvec/VectorOps.h
+BQVEC_HEADERS	:= $(BQVEC_DIR)/Allocators.h $(BQVEC_DIR)/Restrict.h $(BQVEC_DIR)/VectorOps.h
 BQVEC_SOURCES	:= $(BQVEC_DIR)/src/Allocators.cpp
 
 FD_HEADERS	:= $(FD_DIR)/flattendynamics-ladspa.h
@@ -81,5 +81,3 @@
 flattendynamics/flattendynamics-ladspa.o: flattendynamics/flattendynamics-ladspa.h
 src/Silvet.o: src/MedianFilter.h src/Instruments.h
 src/LiveInstruments.o: src/Instruments.h
-bqvec/bqvec/Allocators.o: bqvec/bqvec/VectorOps.h bqvec/bqvec/Restrict.h
-bqvec/bqvec/VectorOps.o: bqvec/bqvec/Restrict.h
--- a/Makefile.linux	Tue Apr 28 18:56:34 2015 +0100
+++ b/Makefile.linux	Tue Apr 28 18:56:54 2015 +0100
@@ -1,12 +1,12 @@
 
-CFLAGS := -Wall -O3 -ffast-math -msse -msse2 -mfpmath=sse -ftree-vectorize -fPIC -I../vamp-plugin-sdk/ -DUSE_PTHREADS
+CFLAGS := -Wall -O3 -ffast-math -msse -msse2 -mfpmath=sse -ftree-vectorize -fPIC -I../vamp-plugin-sdk/ 
 
 #CFLAGS := -g -fPIC -I../vamp-plugin-sdk
 
-CXXFLAGS := $(CFLAGS)
+CXXFLAGS := $(CFLAGS) -std=c++11
 
 VAMPSDK_DIR := ../vamp-plugin-sdk
-PLUGIN_LDFLAGS := -shared -Wl,-Bsymbolic -Wl,-z,defs -Wl,--version-script=vamp-plugin.map -lpthread
+PLUGIN_LDFLAGS := -shared -Wl,-Bsymbolic -Wl,-z,defs -Wl,--version-script=vamp-plugin.map 
 
 PLUGIN_EXT := .so
 
--- a/src/EM.cpp	Tue Apr 28 18:56:34 2015 +0100
+++ b/src/EM.cpp	Tue Apr 28 18:56:54 2015 +0100
@@ -22,7 +22,6 @@
 
 #include "bqvec/VectorOps.h"
 #include "bqvec/Allocators.h"
-
 #include "Instruments.h"
 
 using std::vector;
--- a/src/LiveInstruments.cpp	Tue Apr 28 18:56:34 2015 +0100
+++ b/src/LiveInstruments.cpp	Tue Apr 28 18:56:54 2015 +0100
@@ -63,6 +63,12 @@
 	}
 
 	templates.push_back(t);
+
+        // The live template for piano has only one piano in it, so as
+        // to process faster
+        if (original.name == "Piano") {
+            break;
+        }
     }
     
     InstrumentPack live(original.lowestNote,
--- a/src/Silvet.cpp	Tue Apr 28 18:56:34 2015 +0100
+++ b/src/Silvet.cpp	Tue Apr 28 18:56:54 2015 +0100
@@ -24,6 +24,7 @@
 #include "LiveInstruments.h"
 
 #include <vector>
+#include <future>
 
 #include <cstdio>
 
@@ -31,6 +32,9 @@
 using std::cout;
 using std::cerr;
 using std::endl;
+using std::pair;
+using std::future;
+using std::async;
 using Vamp::RealTime;
 
 static int processingSampleRate = 44100;
@@ -41,6 +45,8 @@
 static int minInputSampleRate = 100;
 static int maxInputSampleRate = 192000;
 
+static const Silvet::ProcessingMode defaultMode = Silvet::HighQualityMode;
+
 Silvet::Silvet(float inputSampleRate) :
     Plugin(inputSampleRate),
     m_instruments(InstrumentPack::listInstrumentPacks()),
@@ -48,10 +54,11 @@
     m_resampler(0),
     m_flattener(0),
     m_cq(0),
-    m_mode(HighQualityMode),
+    m_mode(defaultMode),
     m_fineTuning(false),
     m_instrument(0),
-    m_colsPerSec(50)
+    m_colsPerSec(50),
+    m_haveStartTime(false)
 {
 }
 
@@ -143,7 +150,7 @@
     desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
     desc.minValue = 0;
     desc.maxValue = 2;
-    desc.defaultValue = 1;
+    desc.defaultValue = int(defaultMode);
     desc.isQuantized = true;
     desc.quantizeStep = 1;
     desc.valueNames.push_back("Draft (faster)"); 
@@ -295,6 +302,26 @@
     m_pitchOutputNo = list.size();
     list.push_back(d);
 
+    d.identifier = "chroma";
+    d.name = "Pitch chroma distribution";
+    d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
+    d.unit = "";
+    d.hasFixedBinCount = true;
+    d.binCount = 12;
+    d.binNames.clear();
+    if (m_cq) {
+        for (int i = 0; i < 12; ++i) {
+            d.binNames.push_back(chromaName(i));
+        }
+    }
+    d.hasKnownExtents = false;
+    d.isQuantized = false;
+    d.sampleType = OutputDescriptor::FixedSampleRate;
+    d.sampleRate = m_colsPerSec;
+    d.hasDuration = false;
+    m_chromaOutputNo = list.size();
+    list.push_back(d);
+
     d.identifier = "templates";
     d.name = "Templates";
     d.description = "Constant-Q spectral templates for the selected instrument pack.";
@@ -328,13 +355,19 @@
 }
 
 std::string
-Silvet::noteName(int note, int shift, int shiftCount) const
+Silvet::chromaName(int pitch) const
 {
     static const char *names[] = {
         "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
     };
 
-    const char *n = names[note % 12];
+    return names[pitch];
+}
+    
+std::string
+Silvet::noteName(int note, int shift, int shiftCount) const
+{
+    string n = chromaName(note % 12);
 
     int oct = (note + 9) / 12; 
     
@@ -348,11 +381,11 @@
     }
 
     if (pshift > 0.f) {
-        sprintf(buf, "%s%d+%dc", n, oct, int(round(pshift * 100)));
+        sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
     } else if (pshift < 0.f) {
-        sprintf(buf, "%s%d-%dc", n, oct, int(round((-pshift) * 100)));
+        sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
     } else {
-        sprintf(buf, "%s%d", n, oct);
+        sprintf(buf, "%s%d", n.c_str(), oct);
     }
 
     return buf;
@@ -463,13 +496,18 @@
                         maxFreq,
                         bpo);
 
-    params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
-                     // drops the FFT size to 512 from 1024 and alters
-                     // some other processing parameters, making
-                     // everything much, much slower. Could be a flaw
-                     // in the CQ parameter calculations, must check
-    params.atomHopFactor = 0.3;
+    // For params.q, the MIREX code uses 0.8, but it seems that with
+    // atomHopFactor of 0.3, using q == 0.9 or lower drops the FFT
+    // size to 512 from 1024 and alters some other processing
+    // parameters, making everything much, much slower. Could be a
+    // flaw in the CQ parameter calculations, must check. For
+    // atomHopFactor == 1, q == 0.8 is fine
+    params.q = (m_mode == HighQualityMode ? 0.95 : 0.8);
+    params.atomHopFactor = (m_mode == HighQualityMode ? 0.3 : 1.0);
     params.threshold = 0.0005;
+    params.decimator =
+        (m_mode == LiveMode ?
+         CQParameters::FasterDecimator : CQParameters::BetterDecimator);
     params.window = CQParameters::Hann;
 
     m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
@@ -492,6 +530,7 @@
     m_columnCount = 0;
     m_resampledCount = 0;
     m_startTime = RealTime::zeroTime;
+    m_haveStartTime = false;
 }
 
 Silvet::FeatureSet
@@ -499,8 +538,11 @@
 {
     FeatureSet fs;
     
-    if (m_columnCount == 0) {
+    if (!m_haveStartTime) {
+
         m_startTime = timestamp;
+        m_haveStartTime = true;
+
         insertTemplateFeatures(fs);
     }
 
@@ -597,9 +639,7 @@
 
     int width = filtered.size();
 
-    int iterations = (m_mode == HighQualityMode ? 20 : 10);
-
-    Grid localPitches(width, vector<double>(pack.templateNoteCount, 0.0));
+    Grid localPitches(width);
 
     bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
     int shiftCount = 1;
@@ -609,68 +649,73 @@
 
     vector<vector<int> > localBestShifts;
     if (wantShifts) {
-        localBestShifts = 
-            vector<vector<int> >(width, vector<int>(pack.templateNoteCount, 0));
+        localBestShifts = vector<vector<int> >(width);
     }
 
-    double columnThreshold = 1e-5;
+#ifndef MAX_EM_THREADS
+#define MAX_EM_THREADS 8
+#endif
 
-    if (m_mode == LiveMode) {
-        columnThreshold /= 20;
+    int emThreadCount = MAX_EM_THREADS;
+    if (m_mode == LiveMode && pack.templates.size() == 1) {
+        // The EM step is probably not slow enough to merit it
+        emThreadCount = 1;
     }
-    
-#pragma omp parallel for
-    for (int i = 0; i < width; ++i) {
 
-        double sum = 0.0;
-        for (int j = 0; j < pack.templateHeight; ++j) {
-            sum += filtered.at(i).at(j);
+#if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
+    if (emThreadCount > 1) {
+        for (int i = 0; i < width; ) {
+            typedef future<pair<vector<double>, vector<int>>> EMFuture;
+            vector<EMFuture> results;
+            for (int j = 0; j < emThreadCount && i + j < width; ++j) {
+                results.push_back
+                    (async(std::launch::async,
+                           [&](int index) {
+                               return applyEM(pack, filtered.at(index), wantShifts);
+                           }, i + j));
+            }
+            for (int j = 0; j < emThreadCount && i + j < width; ++j) {
+                auto out = results[j].get();
+                localPitches[i+j] = out.first;
+                if (wantShifts) localBestShifts[i+j] = out.second;
+            }
+            i += emThreadCount;
         }
-        if (sum < columnThreshold) continue;
+    }
+#endif
 
-        EM em(&pack, m_mode == HighQualityMode);
-
-        em.setPitchSparsity(pack.pitchSparsity);
-        em.setSourceSparsity(pack.sourceSparsity);
-
-        for (int j = 0; j < iterations; ++j) {
-            em.iterate(filtered.at(i).data());
-        }
-
-        const float *pitchDist = em.getPitchDistribution();
-        const float *const *shiftDist = em.getShifts();
-
-        for (int j = 0; j < pack.templateNoteCount; ++j) {
-
-            localPitches[i][j] = pitchDist[j] * sum;
-
-            int bestShift = 0;
-            float bestShiftValue = 0.0;
-            if (wantShifts) {
-                for (int k = 0; k < shiftCount; ++k) {
-                    float value = shiftDist[k][j];
-                    if (k == 0 || value > bestShiftValue) {
-                        bestShiftValue = value;
-                        bestShift = k;
-                    }
-                }
-                localBestShifts[i][j] = bestShift;
-            }                
+    if (emThreadCount == 1) {
+        for (int i = 0; i < width; ++i) {
+            auto out = applyEM(pack, filtered.at(i), wantShifts);
+            localPitches[i] = out.first;
+            if (wantShifts) localBestShifts[i] = out.second;
         }
     }
         
     for (int i = 0; i < width; ++i) {
 
+        // This returns a filtered column, and pushes the
+        // up-to-max-polyphony activation column to m_pianoRoll
         vector<double> filtered = postProcess
             (localPitches[i], localBestShifts[i], wantShifts);
 
+        RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
+        float inputGain = getInputGainAt(timestamp);
+
         Feature f;
         for (int j = 0; j < (int)filtered.size(); ++j) {
-            float v(filtered[j]);
+            float v = filtered[j];
             if (v < pack.levelThreshold) v = 0.f;
-            f.values.push_back(v);
+            f.values.push_back(v / inputGain);
         }
         fs[m_pitchOutputNo].push_back(f);
+
+        f.values.clear();
+        f.values.resize(12);
+        for (int j = 0; j < (int)filtered.size(); ++j) {
+            f.values[j % 12] += filtered[j] / inputGain;
+        }
+        fs[m_chromaOutputNo].push_back(f);
         
         FeatureList noteFeatures = noteTrack(shiftCount);
 
@@ -681,6 +726,66 @@
     }
 }
 
+pair<vector<double>, vector<int> >
+Silvet::applyEM(const InstrumentPack &pack,
+                const vector<double> &column,
+                bool wantShifts)
+{
+    double columnThreshold = 1e-5;
+    
+    if (m_mode == LiveMode) {
+        columnThreshold /= 20;
+    }
+    
+    vector<double> pitches(pack.templateNoteCount, 0.0);
+    vector<int> bestShifts;
+    
+    double sum = 0.0;
+    for (int j = 0; j < pack.templateHeight; ++j) {
+        sum += column.at(j);
+    }
+    if (sum < columnThreshold) return { pitches, bestShifts };
+
+    EM em(&pack, m_mode == HighQualityMode);
+
+    em.setPitchSparsity(pack.pitchSparsity);
+    em.setSourceSparsity(pack.sourceSparsity);
+
+    int iterations = (m_mode == HighQualityMode ? 20 : 10);
+
+    for (int j = 0; j < iterations; ++j) {
+        em.iterate(column.data());
+    }
+
+    const float *pitchDist = em.getPitchDistribution();
+    const float *const *shiftDist = em.getShifts();
+
+    int shiftCount = 1;
+    if (wantShifts) {
+        shiftCount = pack.templateMaxShift * 2 + 1;
+    }
+    
+    for (int j = 0; j < pack.templateNoteCount; ++j) {
+
+        pitches[j] = pitchDist[j] * sum;
+
+        int bestShift = 0;
+        float bestShiftValue = 0.0;
+        if (wantShifts) {
+            for (int k = 0; k < shiftCount; ++k) {
+                float value = shiftDist[k][j];
+                if (k == 0 || value > bestShiftValue) {
+                    bestShiftValue = value;
+                    bestShift = k;
+                }
+            }
+            bestShifts.push_back(bestShift);
+        }                
+    }
+
+    return { pitches, bestShifts };
+}
+
 Silvet::Grid
 Silvet::preProcess(const Grid &in)
 {
@@ -782,6 +887,23 @@
         filtered.push_back(m_postFilter[j]->get());
     }
 
+    if (m_mode == LiveMode) {
+        // In live mode with only a 12-bpo CQ, we are very likely to
+        // get clusters of two or three high scores at a time for
+        // neighbouring semitones. Eliminate these by picking only the
+        // peaks. This means we can't recognise actual semitone chords
+        // if they ever appear, but it's not as if live mode is good
+        // enough for that to be a big deal anyway.
+        for (int j = 0; j < pack.templateNoteCount; ++j) {
+            if (j > 0 && j + 1 < pack.templateNoteCount &&
+                filtered[j] >= filtered[j-1] &&
+                filtered[j] >= filtered[j+1]) {
+            } else {
+                filtered[j] = 0.0;
+            }
+        }
+    }
+
     // Threshold for level and reduce number of candidate pitches
 
     typedef std::multimap<double, int> ValueIndexMap;
@@ -924,7 +1046,7 @@
 
         int v;
         if (m_mode == LiveMode) {
-            v = round(strength * 30);
+            v = round(strength * 20);
         } else {
             v = round(strength * 2);
         }
@@ -943,6 +1065,16 @@
     }
 }
 
+RealTime
+Silvet::getColumnTimestamp(int column)
+{
+    double columnDuration = 1.0 / m_colsPerSec;
+    int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
+
+    return m_startTime + RealTime::fromSeconds
+        (columnDuration * (column - postFilterLatency) + 0.02);
+}
+
 Silvet::Feature
 Silvet::makeNoteFeature(int start,
                         int end,
@@ -951,18 +1083,13 @@
                         int shiftCount,
                         int velocity)
 {
-    double columnDuration = 1.0 / m_colsPerSec;
-    int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
-
     Feature f;
 
     f.hasTimestamp = true;
-    f.timestamp = m_startTime + RealTime::fromSeconds
-        (columnDuration * (start - postFilterLatency) + 0.02);
+    f.timestamp = getColumnTimestamp(start);
 
     f.hasDuration = true;
-    f.duration = RealTime::fromSeconds
-        (columnDuration * (end - start));
+    f.duration = getColumnTimestamp(end) - f.timestamp;
 
     f.values.clear();
 
--- a/src/Silvet.h	Tue Apr 28 18:56:34 2015 +0100
+++ b/src/Silvet.h	Tue Apr 28 18:56:54 2015 +0100
@@ -71,6 +71,12 @@
 
     FeatureSet getRemainingFeatures();
 
+    enum ProcessingMode { // ordered so draft==0 and hq==1 as in prior releases
+        DraftMode = 0,
+        HighQualityMode = 1,
+        LiveMode = 2,
+    };
+
 protected:
     const std::vector<InstrumentPack> m_instruments;
     const std::vector<InstrumentPack> m_liveInstruments;
@@ -87,11 +93,6 @@
     FlattenDynamics *m_flattener;
     CQSpectrogram *m_cq;
 
-    enum ProcessingMode { // ordered so draft==0 and hq==1 as in prior releases
-        DraftMode = 0,
-        HighQualityMode = 1,
-        LiveMode = 2,
-    };
     ProcessingMode m_mode;
     
     bool m_fineTuning;
@@ -107,6 +108,10 @@
 
     Grid preProcess(const Grid &);
 
+    std::pair<vector<double>, vector<int> > applyEM(const InstrumentPack &pack,
+                                                    const vector<double> &column,
+                                                    bool wantShifts);
+    
     vector<double> postProcess(const vector<double> &pitches,
                                const vector<int> &bestShifts,
                                bool wantShifts); // -> piano roll column
@@ -115,7 +120,9 @@
 
     void emitNote(int start, int end, int note, int shiftCount,
                   FeatureList &noteFeatures);
-
+    
+    Vamp::RealTime getColumnTimestamp(int column);
+    
     Feature makeNoteFeature(int start, int end, int note, int shift,
                             int shiftCount, int velocity);
 
@@ -125,6 +132,7 @@
     
     void transcribe(const Grid &, FeatureSet &);
 
+    string chromaName(int n) const;
     string noteName(int n, int shift, int shiftCount) const;
     float noteFrequency(int n, int shift, int shiftCount) const;
 
@@ -132,11 +140,13 @@
     int m_columnCount;
     int m_resampledCount;
     Vamp::RealTime m_startTime;
+    bool m_haveStartTime;
 
     mutable int m_notesOutputNo;
     mutable int m_fcqOutputNo;
     mutable int m_pitchOutputNo;
     mutable int m_templateOutputNo;
+    mutable int m_chromaOutputNo;
 };
 
 #endif
--- a/testdata/timing/run.sh	Tue Apr 28 18:56:34 2015 +0100
+++ b/testdata/timing/run.sh	Tue Apr 28 18:56:54 2015 +0100
@@ -20,6 +20,8 @@
 VAMP_PATH=../..
 export VAMP_PATH
 
+tfile=transform.ttl
+
 outfile="/tmp/$$"
 
 tmpwav="/tmp/$$norm.wav"
@@ -32,7 +34,7 @@
     --writer csv \
     --csv-one-file "$outfile" \
     --csv-force \
-    --default vamp:silvet:silvet:notes \
+    --transform "$tfile" \
     "$tmpwav"
 
 cat "$outfile" | \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/testdata/timing/transform.ttl	Tue Apr 28 18:56:54 2015 +0100
@@ -0,0 +1,22 @@
+@prefix xsd:      <http://www.w3.org/2001/XMLSchema#> .
+@prefix vamp:     <http://purl.org/ontology/vamp/> .
+@prefix :         <#> .
+
+:transform a vamp:Transform ;
+    vamp:plugin <http://vamp-plugins.org/rdf/plugins/silvet#silvet> ;
+    vamp:step_size "1024"^^xsd:int ; 
+    vamp:block_size "1024"^^xsd:int ; 
+    vamp:plugin_version """3""" ; 
+    vamp:parameter_binding [
+        vamp:parameter [ vamp:identifier "finetune" ] ;
+        vamp:value "0"^^xsd:float ;
+    ] ;
+    vamp:parameter_binding [
+        vamp:parameter [ vamp:identifier "instrument" ] ;
+        vamp:value "0"^^xsd:float ; # general
+    ] ;
+    vamp:parameter_binding [
+        vamp:parameter [ vamp:identifier "mode" ] ;
+        vamp:value "1"^^xsd:float ; # hq
+    ] ;
+    vamp:output <http://vamp-plugins.org/rdf/plugins/silvet#silvet_output_notes> .