changeset 55:384338fa460d preshift

Support shifts as an additional dimension (as in the original model). Also return velocity as well.
author Chris Cannam
date Tue, 08 Apr 2014 13:30:32 +0100
parents a54df67e607e
children 4fa3ea96eb65 dcdd09d2abec
files Makefile.inc src/EM.cpp src/EM.h src/Silvet.cpp src/Silvet.h
diffstat 5 files changed, 102 insertions(+), 97 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile.inc	Mon Apr 07 17:36:40 2014 +0100
+++ b/Makefile.inc	Tue Apr 08 13:30:32 2014 +0100
@@ -55,8 +55,7 @@
 src/EM.o: data/include/cello.h data/include/clarinet.h data/include/flute.h
 src/EM.o: data/include/guitar.h data/include/horn.h data/include/oboe.h
 src/EM.o: data/include/tenorsax.h data/include/violin.h
-src/EM.o: data/include/piano-maps-SptkBGCl.h data/include/piano1.h
-src/EM.o: data/include/piano2.h data/include/piano3.h
+src/EM.o: data/include/piano-maps-SptkBGCl.h
 src/libmain.o: src/Silvet.h
 constant-q-cpp/cpp-qm-dsp/CQKernel.o: constant-q-cpp/cpp-qm-dsp/CQKernel.h
 constant-q-cpp/cpp-qm-dsp/ConstantQ.o: constant-q-cpp/cpp-qm-dsp/ConstantQ.h
--- a/src/EM.cpp	Mon Apr 07 17:36:40 2014 +0100
+++ b/src/EM.cpp	Tue Apr 08 13:30:32 2014 +0100
@@ -33,28 +33,31 @@
 EM::EM() :
     m_noteCount(SILVET_TEMPLATE_NOTE_COUNT),
     m_shiftCount(SILVET_TEMPLATE_MAX_SHIFT * 2 + 1),
-    m_pitchCount(m_noteCount * m_shiftCount),
     m_binCount(SILVET_TEMPLATE_HEIGHT),
     m_instrumentCount(SILVET_TEMPLATE_COUNT),
     m_pitchSparsity(1.1),
     m_sourceSparsity(1.3)
 {
-    m_lowestPitch = 
-        silvet_templates_lowest_note * m_shiftCount;
-    m_highestPitch =
-        silvet_templates_highest_note * m_shiftCount + m_shiftCount - 1;
+    m_lowestPitch = silvet_templates_lowest_note;
+    m_highestPitch = silvet_templates_highest_note;
 
-    m_pitches = V(m_pitchCount);
+    m_pitches = V(m_noteCount);
+    for (int n = 0; n < m_noteCount; ++n) {
+        m_pitches[n] = drand48();
+    }
 
-    for (int n = 0; n < m_pitchCount; ++n) {
-        m_pitches[n] = drand48();
+    m_shifts = Grid(m_shiftCount);
+    for (int f = 0; f < m_shiftCount; ++f) {
+        m_shifts[f] = V(m_noteCount);
+        for (int n = 0; n < m_noteCount; ++n) {
+            m_shifts[f][n] = drand48();
+        }
     }
     
     m_sources = Grid(m_instrumentCount);
-    
-    for (int i = 0; i < m_instrumentCount; ++i) {
-        m_sources[i] = V(m_pitchCount);
-        for (int n = 0; n < m_pitchCount; ++n) {
+        for (int i = 0; i < m_instrumentCount; ++i) {
+        m_sources[i] = V(m_noteCount);
+        for (int n = 0; n < m_noteCount; ++n) {
             m_sources[i][n] = (inRange(i, n) ? 1.0 : 0.0);
         }
     }
@@ -70,9 +73,8 @@
 void
 EM::rangeFor(int instrument, int &minPitch, int &maxPitch)
 {
-    minPitch = silvet_templates[instrument].lowest * m_shiftCount;
-    maxPitch = silvet_templates[instrument].highest * m_shiftCount
-        + m_shiftCount - 1;
+    minPitch = silvet_templates[instrument].lowest;
+    maxPitch = silvet_templates[instrument].highest;
 }
 
 bool
@@ -84,7 +86,7 @@
 }
 
 void
-EM::normalise(V &column)
+EM::normaliseColumn(V &column)
 {
     double sum = 0.0;
     for (int i = 0; i < (int)column.size(); ++i) {
@@ -96,19 +98,19 @@
 }
 
 void
-EM::normaliseSources(Grid &sources)
+EM::normaliseGrid(Grid &grid)
 {
-    V denominators(sources[0].size());
+    V denominators(grid[0].size());
 
-    for (int i = 0; i < (int)sources.size(); ++i) {
-        for (int j = 0; j < (int)sources[i].size(); ++j) {
-            denominators[j] += sources[i][j];
+    for (int i = 0; i < (int)grid.size(); ++i) {
+        for (int j = 0; j < (int)grid[i].size(); ++j) {
+            denominators[j] += grid[i][j];
         }
     }
 
-    for (int i = 0; i < (int)sources.size(); ++i) {
-        for (int j = 0; j < (int)sources[i].size(); ++j) {
-            sources[i][j] /= denominators[j];
+    for (int i = 0; i < (int)grid.size(); ++i) {
+        for (int j = 0; j < (int)grid[i].size(); ++j) {
+            grid[i][j] /= denominators[j];
         }
     }
 }
@@ -116,16 +118,14 @@
 void
 EM::iterate(V column)
 {
-    normalise(column);
+    normaliseColumn(column);
     expectation(column);
     maximisation(column);
 }
 
 const float *
-EM::templateFor(int instrument, int pitch)
+EM::templateFor(int instrument, int note, int shift)
 {
-    int note = pitch / m_shiftCount;
-    int shift = pitch % m_shiftCount;
     return silvet_templates[instrument].data[note] + shift;
 }
 
@@ -139,12 +139,15 @@
     }
 
     for (int i = 0; i < m_instrumentCount; ++i) {
-        for (int n = 0; n < m_pitchCount; ++n) {
-            const float *w = templateFor(i, n);
-            double pitch = m_pitches[n];
-            double source = m_sources[i][n];
-            for (int j = 0; j < m_binCount; ++j) {
-                m_estimate[j] += w[j] * pitch * source;
+        for (int n = 0; n < m_noteCount; ++n) {
+            for (int f = 0; f < m_shiftCount; ++f) {
+                const float *w = templateFor(i, n, f);
+                double pitch = m_pitches[n];
+                double source = m_sources[i][n];
+                double shift = m_shifts[f][n];
+                for (int j = 0; j < m_binCount; ++j) {
+                    m_estimate[j] += w[j] * pitch * source * shift;
+                }
             }
         }
     }
@@ -159,15 +162,18 @@
 {
     V newPitches = m_pitches;
 
-    for (int n = 0; n < m_pitchCount; ++n) {
+    for (int n = 0; n < m_noteCount; ++n) {
         newPitches[n] = epsilon;
         if (n >= m_lowestPitch && n <= m_highestPitch) {
             for (int i = 0; i < m_instrumentCount; ++i) {
-                const float *w = templateFor(i, n);
-                double pitch = m_pitches[n];
-                double source = m_sources[i][n];
-                for (int j = 0; j < m_binCount; ++j) {
-                    newPitches[n] += w[j] * m_q[j] * pitch * source;
+                for (int f = 0; f < m_shiftCount; ++f) {
+                    const float *w = templateFor(i, n, f);
+                    double pitch = m_pitches[n];
+                    double source = m_sources[i][n];
+                    double shift = m_shifts[f][n];
+                    for (int j = 0; j < m_binCount; ++j) {
+                        newPitches[n] += w[j] * m_q[j] * pitch * source * shift;
+                    }
                 }
             }
         }
@@ -175,19 +181,40 @@
             newPitches[n] = pow(newPitches[n], m_pitchSparsity);
         }
     }
-    normalise(newPitches);
+    normaliseColumn(newPitches);
+
+    Grid newShifts = m_shifts;
+
+    for (int f = 0; f < m_shiftCount; ++f) {
+        for (int n = 0; n < m_noteCount; ++n) {
+            newShifts[f][n] = epsilon;
+            for (int i = 0; i < m_instrumentCount; ++i) {
+                const float *w = templateFor(i, n, f);
+                double pitch = m_pitches[n];
+                double source = m_sources[i][n];
+                double shift = m_shifts[f][n];
+                for (int j = 0; j < m_binCount; ++j) {
+                    newShifts[f][n] += w[j] * m_q[j] * pitch * source * shift;
+                }
+            }
+        }
+    }
+    normaliseGrid(newShifts);
 
     Grid newSources = m_sources;
 
     for (int i = 0; i < m_instrumentCount; ++i) {
-        for (int n = 0; n < m_pitchCount; ++n) {
+        for (int n = 0; n < m_noteCount; ++n) {
             newSources[i][n] = epsilon;
             if (inRange(i, n)) {
-                const float *w = templateFor(i, n);
-                double pitch = m_pitches[n];
-                double source = m_sources[i][n];
-                for (int j = 0; j < m_binCount; ++j) {
-                    newSources[i][n] += w[j] * m_q[j] * pitch * source;
+                for (int f = 0; f < m_shiftCount; ++f) {
+                    const float *w = templateFor(i, n, f);
+                    double pitch = m_pitches[n];
+                    double source = m_sources[i][n];
+                    double shift = m_shifts[f][n];
+                    for (int j = 0; j < m_binCount; ++j) {
+                        newSources[i][n] += w[j] * m_q[j] * pitch * source * shift;
+                    }
                 }
             }
             if (m_sourceSparsity != 1.0) {
@@ -195,34 +222,11 @@
             }
         }
     }
-    normaliseSources(newSources);
+    normaliseGrid(newSources);
 
     m_pitches = newPitches;
+    m_shifts = newShifts;
     m_sources = newSources;
 }
 
-void
-EM::report()
-{
-    vector<int> sounding;
-    for (int n = 0; n < m_pitchCount; ++n) {
-        if (m_pitches[n] > 0.05) {
-            sounding.push_back(n);
-        }
-    }
-    cerr << " sounding: ";
-    for (int i = 0; i < (int)sounding.size(); ++i) {
-        cerr << sounding[i] << " ";
-        int maxj = -1;
-        double maxs = 0.0;
-        for (int j = 0; j < m_instrumentCount; ++j) {
-            if (j == 0 || m_sources[j][sounding[i]] > maxs) {
-                maxj = j;
-                maxs = m_sources[j][sounding[i]];
-            }
-        }
-        cerr << silvet_templates[maxj].name << " ";
-    }
-    cerr << endl;
-}
 
--- a/src/EM.h	Mon Apr 07 17:36:40 2014 +0100
+++ b/src/EM.h	Tue Apr 08 13:30:32 2014 +0100
@@ -25,7 +25,6 @@
     ~EM();
 
     void iterate(std::vector<double> column);
-    void report();
 
     const std::vector<double> &getEstimate() const { 
 	return m_estimate;
@@ -42,6 +41,7 @@
     typedef std::vector<std::vector<double> > Grid;
 
     V m_pitches;
+    Grid m_shifts;
     Grid m_sources;
 
     V m_estimate;
@@ -49,7 +49,6 @@
     
     int m_noteCount;
     int m_shiftCount; // 1 + 2 * max template shift
-    int m_pitchCount; // noteCount * shiftCount
     int m_binCount;
     int m_instrumentCount;
     
@@ -59,12 +58,12 @@
     int m_lowestPitch;
     int m_highestPitch;
 
-    void normalise(V &column);
-    void normaliseSources(Grid &grid);
+    void normaliseColumn(V &column);
+    void normaliseGrid(Grid &grid);
     void expectation(const V &column);
     void maximisation(const V &column);
 
-    const float *templateFor(int instrument, int pitch);
+    const float *templateFor(int instrument, int note, int shift);
     void rangeFor(int instrument, int &minPitch, int &maxPitch);
     bool inRange(int instrument, int pitch);
 };
--- a/src/Silvet.cpp	Mon Apr 07 17:36:40 2014 +0100
+++ b/src/Silvet.cpp	Tue Apr 08 13:30:32 2014 +0100
@@ -17,6 +17,7 @@
 #include "EM.h"
 
 #include "maths/MedianFilter.h"
+#include "maths/MathUtilities.h"
 #include "dsp/rateconversion/Resampler.h"
 
 #include "constant-q-cpp/cpp-qm-dsp/CQInterpolated.h"
@@ -35,8 +36,6 @@
 static int processingBPO = 60;
 static int processingHeight = 545;
 static int processingNotes = 88;
-static int processingShifts = 5;
-static int processingPitches = processingNotes * processingShifts;
 
 Silvet::Silvet(float inputSampleRate) :
     Plugin(inputSampleRate),
@@ -235,9 +234,9 @@
     d.description = "Estimated pitch activation matrix";
     d.unit = "";
     d.hasFixedBinCount = true;
-    d.binCount = processingPitches;
+    d.binCount = processingNotes;
     d.binNames.clear();
-    for (int i = 0; i < processingPitches; ++i) {
+    for (int i = 0; i < processingNotes; ++i) {
         d.binNames.push_back(noteName(i));
     }
     d.hasKnownExtents = false;
@@ -404,12 +403,12 @@
 
         vector<double> pitches = em.getPitchDistribution();
         
-        for (int j = 0; j < processingPitches; ++j) {
+        for (int j = 0; j < processingNotes; ++j) {
             pitches[j] *= sum;
         }
 
         Feature f;
-        for (int j = 0; j < processingPitches; ++j) {
+        for (int j = 0; j < processingNotes; ++j) {
             f.values.push_back(float(pitches[j]));
         }
         fs[m_pitchOutputNo].push_back(f);
@@ -504,12 +503,7 @@
     vector<double> filtered;
 
     for (int j = 0; j < processingNotes; ++j) {
-        double noteSum = 0.0;
-        for (int s = 0; s < processingShifts; ++s) {
-            double val = pitches[j * processingShifts + s];
-            noteSum += val;
-        }
-        m_postFilter[j]->push(noteSum);
+        m_postFilter[j]->push(pitches[j]);
         filtered.push_back(m_postFilter[j]->get());
     }
 
@@ -525,13 +519,13 @@
         strengths.insert(ValueIndexMap::value_type(filtered[j], j));
     }
 
-    set<int> active;
+    map<int, double> active;
     ValueIndexMap::const_iterator si = strengths.end();
     while (int(active.size()) < polyphony) {
         --si;
         if (si->first < threshold) break;
         cerr << si->second << " : " << si->first << endl;
-        active.insert(si->second);
+        active[si->second] = si->first;
         if (si == strengths.begin()) break;
     }
 
@@ -556,10 +550,10 @@
     // we have 25 columns per second
     double columnDuration = 1.0 / 25.0;
     
-    for (set<int>::const_iterator ni = m_pianoRoll[width-1].begin();
+    for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
          ni != m_pianoRoll[width-1].end(); ++ni) {
 
-        int note = *ni;
+        int note = ni->first;
         
         if (active.find(note) != active.end()) {
             // the note is still playing
@@ -570,7 +564,10 @@
         int end = width;
         int start = end-1;
 
+        vector<double> strengths;
+
         while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
+            strengths.push_back(m_pianoRoll[start][note]);
             --start;
         }
         ++start;
@@ -582,13 +579,18 @@
             continue;
         }
 
+        double medianStrength = MathUtilities::median
+            (strengths.data(), strengths.size());
+        int velocity = medianStrength * 2;
+        if (velocity > 127) velocity = 127;
+
         Feature nf;
         nf.hasTimestamp = true;
         nf.timestamp = RealTime::fromSeconds(columnDuration * start);
         nf.hasDuration = true;
         nf.duration = RealTime::fromSeconds(columnDuration * duration);
         nf.values.push_back(noteFrequency(note));
-        nf.values.push_back(80.f); //!!! todo: calculate velocity
+        nf.values.push_back(velocity);
         nf.label = noteName(note);
         noteFeatures.push_back(nf);
     }
--- a/src/Silvet.h	Mon Apr 07 17:36:40 2014 +0100
+++ b/src/Silvet.h	Tue Apr 08 13:30:32 2014 +0100
@@ -27,6 +27,7 @@
 using std::string;
 using std::vector;
 using std::set;
+using std::map;
 
 class Resampler;
 class CQInterpolated;
@@ -75,7 +76,7 @@
     typedef vector<vector<double> > Grid;
 
     vector<MedianFilter<double> *> m_postFilter;
-    vector<set<int> > m_pianoRoll;
+    vector<map<int, double> > m_pianoRoll;
 
     Grid preProcess(const Grid &);
     FeatureList postProcess(const vector<double> &);