Mercurial > hg > silvet
changeset 55:384338fa460d preshift
Support shifts as an additional dimension (as in the original model). Also return velocity as well.
author | Chris Cannam |
---|---|
date | Tue, 08 Apr 2014 13:30:32 +0100 |
parents | a54df67e607e |
children | 4fa3ea96eb65 dcdd09d2abec |
files | Makefile.inc src/EM.cpp src/EM.h src/Silvet.cpp src/Silvet.h |
diffstat | 5 files changed, 102 insertions(+), 97 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile.inc Mon Apr 07 17:36:40 2014 +0100 +++ b/Makefile.inc Tue Apr 08 13:30:32 2014 +0100 @@ -55,8 +55,7 @@ src/EM.o: data/include/cello.h data/include/clarinet.h data/include/flute.h src/EM.o: data/include/guitar.h data/include/horn.h data/include/oboe.h src/EM.o: data/include/tenorsax.h data/include/violin.h -src/EM.o: data/include/piano-maps-SptkBGCl.h data/include/piano1.h -src/EM.o: data/include/piano2.h data/include/piano3.h +src/EM.o: data/include/piano-maps-SptkBGCl.h src/libmain.o: src/Silvet.h constant-q-cpp/cpp-qm-dsp/CQKernel.o: constant-q-cpp/cpp-qm-dsp/CQKernel.h constant-q-cpp/cpp-qm-dsp/ConstantQ.o: constant-q-cpp/cpp-qm-dsp/ConstantQ.h
--- a/src/EM.cpp Mon Apr 07 17:36:40 2014 +0100 +++ b/src/EM.cpp Tue Apr 08 13:30:32 2014 +0100 @@ -33,28 +33,31 @@ EM::EM() : m_noteCount(SILVET_TEMPLATE_NOTE_COUNT), m_shiftCount(SILVET_TEMPLATE_MAX_SHIFT * 2 + 1), - m_pitchCount(m_noteCount * m_shiftCount), m_binCount(SILVET_TEMPLATE_HEIGHT), m_instrumentCount(SILVET_TEMPLATE_COUNT), m_pitchSparsity(1.1), m_sourceSparsity(1.3) { - m_lowestPitch = - silvet_templates_lowest_note * m_shiftCount; - m_highestPitch = - silvet_templates_highest_note * m_shiftCount + m_shiftCount - 1; + m_lowestPitch = silvet_templates_lowest_note; + m_highestPitch = silvet_templates_highest_note; - m_pitches = V(m_pitchCount); + m_pitches = V(m_noteCount); + for (int n = 0; n < m_noteCount; ++n) { + m_pitches[n] = drand48(); + } - for (int n = 0; n < m_pitchCount; ++n) { - m_pitches[n] = drand48(); + m_shifts = Grid(m_shiftCount); + for (int f = 0; f < m_shiftCount; ++f) { + m_shifts[f] = V(m_noteCount); + for (int n = 0; n < m_noteCount; ++n) { + m_shifts[f][n] = drand48(); + } } m_sources = Grid(m_instrumentCount); - - for (int i = 0; i < m_instrumentCount; ++i) { - m_sources[i] = V(m_pitchCount); - for (int n = 0; n < m_pitchCount; ++n) { + for (int i = 0; i < m_instrumentCount; ++i) { + m_sources[i] = V(m_noteCount); + for (int n = 0; n < m_noteCount; ++n) { m_sources[i][n] = (inRange(i, n) ? 1.0 : 0.0); } } @@ -70,9 +73,8 @@ void EM::rangeFor(int instrument, int &minPitch, int &maxPitch) { - minPitch = silvet_templates[instrument].lowest * m_shiftCount; - maxPitch = silvet_templates[instrument].highest * m_shiftCount - + m_shiftCount - 1; + minPitch = silvet_templates[instrument].lowest; + maxPitch = silvet_templates[instrument].highest; } bool @@ -84,7 +86,7 @@ } void -EM::normalise(V &column) +EM::normaliseColumn(V &column) { double sum = 0.0; for (int i = 0; i < (int)column.size(); ++i) { @@ -96,19 +98,19 @@ } void -EM::normaliseSources(Grid &sources) +EM::normaliseGrid(Grid &grid) { - V denominators(sources[0].size()); + V denominators(grid[0].size()); - for (int i = 0; i < (int)sources.size(); ++i) { - for (int j = 0; j < (int)sources[i].size(); ++j) { - denominators[j] += sources[i][j]; + for (int i = 0; i < (int)grid.size(); ++i) { + for (int j = 0; j < (int)grid[i].size(); ++j) { + denominators[j] += grid[i][j]; } } - for (int i = 0; i < (int)sources.size(); ++i) { - for (int j = 0; j < (int)sources[i].size(); ++j) { - sources[i][j] /= denominators[j]; + for (int i = 0; i < (int)grid.size(); ++i) { + for (int j = 0; j < (int)grid[i].size(); ++j) { + grid[i][j] /= denominators[j]; } } } @@ -116,16 +118,14 @@ void EM::iterate(V column) { - normalise(column); + normaliseColumn(column); expectation(column); maximisation(column); } const float * -EM::templateFor(int instrument, int pitch) +EM::templateFor(int instrument, int note, int shift) { - int note = pitch / m_shiftCount; - int shift = pitch % m_shiftCount; return silvet_templates[instrument].data[note] + shift; } @@ -139,12 +139,15 @@ } for (int i = 0; i < m_instrumentCount; ++i) { - for (int n = 0; n < m_pitchCount; ++n) { - const float *w = templateFor(i, n); - double pitch = m_pitches[n]; - double source = m_sources[i][n]; - for (int j = 0; j < m_binCount; ++j) { - m_estimate[j] += w[j] * pitch * source; + for (int n = 0; n < m_noteCount; ++n) { + for (int f = 0; f < m_shiftCount; ++f) { + const float *w = templateFor(i, n, f); + double pitch = m_pitches[n]; + double source = m_sources[i][n]; + double shift = m_shifts[f][n]; + for (int j = 0; j < m_binCount; ++j) { + m_estimate[j] += w[j] * pitch * source * shift; + } } } } @@ -159,15 +162,18 @@ { V newPitches = m_pitches; - for (int n = 0; n < m_pitchCount; ++n) { + for (int n = 0; n < m_noteCount; ++n) { newPitches[n] = epsilon; if (n >= m_lowestPitch && n <= m_highestPitch) { for (int i = 0; i < m_instrumentCount; ++i) { - const float *w = templateFor(i, n); - double pitch = m_pitches[n]; - double source = m_sources[i][n]; - for (int j = 0; j < m_binCount; ++j) { - newPitches[n] += w[j] * m_q[j] * pitch * source; + for (int f = 0; f < m_shiftCount; ++f) { + const float *w = templateFor(i, n, f); + double pitch = m_pitches[n]; + double source = m_sources[i][n]; + double shift = m_shifts[f][n]; + for (int j = 0; j < m_binCount; ++j) { + newPitches[n] += w[j] * m_q[j] * pitch * source * shift; + } } } } @@ -175,19 +181,40 @@ newPitches[n] = pow(newPitches[n], m_pitchSparsity); } } - normalise(newPitches); + normaliseColumn(newPitches); + + Grid newShifts = m_shifts; + + for (int f = 0; f < m_shiftCount; ++f) { + for (int n = 0; n < m_noteCount; ++n) { + newShifts[f][n] = epsilon; + for (int i = 0; i < m_instrumentCount; ++i) { + const float *w = templateFor(i, n, f); + double pitch = m_pitches[n]; + double source = m_sources[i][n]; + double shift = m_shifts[f][n]; + for (int j = 0; j < m_binCount; ++j) { + newShifts[f][n] += w[j] * m_q[j] * pitch * source * shift; + } + } + } + } + normaliseGrid(newShifts); Grid newSources = m_sources; for (int i = 0; i < m_instrumentCount; ++i) { - for (int n = 0; n < m_pitchCount; ++n) { + for (int n = 0; n < m_noteCount; ++n) { newSources[i][n] = epsilon; if (inRange(i, n)) { - const float *w = templateFor(i, n); - double pitch = m_pitches[n]; - double source = m_sources[i][n]; - for (int j = 0; j < m_binCount; ++j) { - newSources[i][n] += w[j] * m_q[j] * pitch * source; + for (int f = 0; f < m_shiftCount; ++f) { + const float *w = templateFor(i, n, f); + double pitch = m_pitches[n]; + double source = m_sources[i][n]; + double shift = m_shifts[f][n]; + for (int j = 0; j < m_binCount; ++j) { + newSources[i][n] += w[j] * m_q[j] * pitch * source * shift; + } } } if (m_sourceSparsity != 1.0) { @@ -195,34 +222,11 @@ } } } - normaliseSources(newSources); + normaliseGrid(newSources); m_pitches = newPitches; + m_shifts = newShifts; m_sources = newSources; } -void -EM::report() -{ - vector<int> sounding; - for (int n = 0; n < m_pitchCount; ++n) { - if (m_pitches[n] > 0.05) { - sounding.push_back(n); - } - } - cerr << " sounding: "; - for (int i = 0; i < (int)sounding.size(); ++i) { - cerr << sounding[i] << " "; - int maxj = -1; - double maxs = 0.0; - for (int j = 0; j < m_instrumentCount; ++j) { - if (j == 0 || m_sources[j][sounding[i]] > maxs) { - maxj = j; - maxs = m_sources[j][sounding[i]]; - } - } - cerr << silvet_templates[maxj].name << " "; - } - cerr << endl; -}
--- a/src/EM.h Mon Apr 07 17:36:40 2014 +0100 +++ b/src/EM.h Tue Apr 08 13:30:32 2014 +0100 @@ -25,7 +25,6 @@ ~EM(); void iterate(std::vector<double> column); - void report(); const std::vector<double> &getEstimate() const { return m_estimate; @@ -42,6 +41,7 @@ typedef std::vector<std::vector<double> > Grid; V m_pitches; + Grid m_shifts; Grid m_sources; V m_estimate; @@ -49,7 +49,6 @@ int m_noteCount; int m_shiftCount; // 1 + 2 * max template shift - int m_pitchCount; // noteCount * shiftCount int m_binCount; int m_instrumentCount; @@ -59,12 +58,12 @@ int m_lowestPitch; int m_highestPitch; - void normalise(V &column); - void normaliseSources(Grid &grid); + void normaliseColumn(V &column); + void normaliseGrid(Grid &grid); void expectation(const V &column); void maximisation(const V &column); - const float *templateFor(int instrument, int pitch); + const float *templateFor(int instrument, int note, int shift); void rangeFor(int instrument, int &minPitch, int &maxPitch); bool inRange(int instrument, int pitch); };
--- a/src/Silvet.cpp Mon Apr 07 17:36:40 2014 +0100 +++ b/src/Silvet.cpp Tue Apr 08 13:30:32 2014 +0100 @@ -17,6 +17,7 @@ #include "EM.h" #include "maths/MedianFilter.h" +#include "maths/MathUtilities.h" #include "dsp/rateconversion/Resampler.h" #include "constant-q-cpp/cpp-qm-dsp/CQInterpolated.h" @@ -35,8 +36,6 @@ static int processingBPO = 60; static int processingHeight = 545; static int processingNotes = 88; -static int processingShifts = 5; -static int processingPitches = processingNotes * processingShifts; Silvet::Silvet(float inputSampleRate) : Plugin(inputSampleRate), @@ -235,9 +234,9 @@ d.description = "Estimated pitch activation matrix"; d.unit = ""; d.hasFixedBinCount = true; - d.binCount = processingPitches; + d.binCount = processingNotes; d.binNames.clear(); - for (int i = 0; i < processingPitches; ++i) { + for (int i = 0; i < processingNotes; ++i) { d.binNames.push_back(noteName(i)); } d.hasKnownExtents = false; @@ -404,12 +403,12 @@ vector<double> pitches = em.getPitchDistribution(); - for (int j = 0; j < processingPitches; ++j) { + for (int j = 0; j < processingNotes; ++j) { pitches[j] *= sum; } Feature f; - for (int j = 0; j < processingPitches; ++j) { + for (int j = 0; j < processingNotes; ++j) { f.values.push_back(float(pitches[j])); } fs[m_pitchOutputNo].push_back(f); @@ -504,12 +503,7 @@ vector<double> filtered; for (int j = 0; j < processingNotes; ++j) { - double noteSum = 0.0; - for (int s = 0; s < processingShifts; ++s) { - double val = pitches[j * processingShifts + s]; - noteSum += val; - } - m_postFilter[j]->push(noteSum); + m_postFilter[j]->push(pitches[j]); filtered.push_back(m_postFilter[j]->get()); } @@ -525,13 +519,13 @@ strengths.insert(ValueIndexMap::value_type(filtered[j], j)); } - set<int> active; + map<int, double> active; ValueIndexMap::const_iterator si = strengths.end(); while (int(active.size()) < polyphony) { --si; if (si->first < threshold) break; cerr << si->second << " : " << si->first << endl; - active.insert(si->second); + active[si->second] = si->first; if (si == strengths.begin()) break; } @@ -556,10 +550,10 @@ // we have 25 columns per second double columnDuration = 1.0 / 25.0; - for (set<int>::const_iterator ni = m_pianoRoll[width-1].begin(); + for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin(); ni != m_pianoRoll[width-1].end(); ++ni) { - int note = *ni; + int note = ni->first; if (active.find(note) != active.end()) { // the note is still playing @@ -570,7 +564,10 @@ int end = width; int start = end-1; + vector<double> strengths; + while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) { + strengths.push_back(m_pianoRoll[start][note]); --start; } ++start; @@ -582,13 +579,18 @@ continue; } + double medianStrength = MathUtilities::median + (strengths.data(), strengths.size()); + int velocity = medianStrength * 2; + if (velocity > 127) velocity = 127; + Feature nf; nf.hasTimestamp = true; nf.timestamp = RealTime::fromSeconds(columnDuration * start); nf.hasDuration = true; nf.duration = RealTime::fromSeconds(columnDuration * duration); nf.values.push_back(noteFrequency(note)); - nf.values.push_back(80.f); //!!! todo: calculate velocity + nf.values.push_back(velocity); nf.label = noteName(note); noteFeatures.push_back(nf); }
--- a/src/Silvet.h Mon Apr 07 17:36:40 2014 +0100 +++ b/src/Silvet.h Tue Apr 08 13:30:32 2014 +0100 @@ -27,6 +27,7 @@ using std::string; using std::vector; using std::set; +using std::map; class Resampler; class CQInterpolated; @@ -75,7 +76,7 @@ typedef vector<vector<double> > Grid; vector<MedianFilter<double> *> m_postFilter; - vector<set<int> > m_pianoRoll; + vector<map<int, double> > m_pianoRoll; Grid preProcess(const Grid &); FeatureList postProcess(const vector<double> &);