Chris@34: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@34: Chris@34: /* Chris@34: Silvet Chris@34: Chris@34: A Vamp plugin for note transcription. Chris@34: Centre for Digital Music, Queen Mary University of London. Chris@34: Chris@34: This program is free software; you can redistribute it and/or Chris@34: modify it under the terms of the GNU General Public License as Chris@34: published by the Free Software Foundation; either version 2 of the Chris@34: License, or (at your option) any later version. See the file Chris@34: COPYING included with this distribution for more information. Chris@34: */ Chris@34: Chris@34: #include "EM.h" Chris@34: Chris@34: #include "data/include/templates.h" Chris@34: Chris@36: #include Chris@42: #include Chris@36: Chris@36: #include Chris@36: Chris@36: #include Chris@36: Chris@36: using std::vector; Chris@36: using std::cerr; Chris@36: using std::endl; Chris@36: Chris@35: static double epsilon = 1e-16; Chris@35: Chris@35: EM::EM() : Chris@35: m_notes(SILVET_TEMPLATE_NOTE_COUNT), Chris@35: m_bins(SILVET_TEMPLATE_HEIGHT), Chris@42: m_instruments(SILVET_TEMPLATE_COUNT), Chris@42: m_pitchSparsity(1.1), Chris@42: m_sourceSparsity(1.3) Chris@35: { Chris@35: m_lowest = 0; Chris@35: m_highest = m_notes - 1; Chris@35: Chris@35: for (int i = 0; i < m_instruments; ++i) { Chris@35: if (i == 0 || silvet_templates[i].lowest < m_lowest) { Chris@35: m_lowest = silvet_templates[i].lowest; Chris@35: } Chris@35: if (i == 0 || silvet_templates[i].highest > m_highest) { Chris@35: m_highest = silvet_templates[i].highest; Chris@35: } Chris@35: } Chris@35: Chris@35: m_pitches = V(m_notes); Chris@35: Chris@36: for (int n = 0; n < m_notes; ++n) { Chris@35: m_pitches[n] = drand48(); Chris@35: } Chris@35: Chris@35: m_sources = Grid(m_instruments); Chris@35: Chris@35: for (int i = 0; i < m_instruments; ++i) { Chris@35: m_sources[i] = V(m_notes); Chris@35: for (int n = 0; n < m_notes; ++n) { Chris@35: m_sources[i][n] = (inRange(i, n) ? 1.0 : 0.0); Chris@35: } Chris@35: } Chris@35: Chris@36: m_estimate = V(m_bins); Chris@35: m_q = V(m_bins); Chris@35: } Chris@35: Chris@35: EM::~EM() Chris@35: { Chris@35: } Chris@35: Chris@35: bool Chris@35: EM::inRange(int instrument, int note) Chris@35: { Chris@35: return (note >= silvet_templates[instrument].lowest && Chris@35: note <= silvet_templates[instrument].highest); Chris@35: } Chris@35: Chris@36: void Chris@36: EM::normalise(V &column) Chris@36: { Chris@36: double sum = 0.0; Chris@36: for (int i = 0; i < (int)column.size(); ++i) { Chris@36: sum += column[i]; Chris@36: } Chris@36: for (int i = 0; i < (int)column.size(); ++i) { Chris@36: column[i] /= sum; Chris@36: } Chris@36: } Chris@36: Chris@36: void Chris@36: EM::iterate(V column) Chris@36: { Chris@36: normalise(column); Chris@36: expectation(column); Chris@36: maximisation(column); Chris@36: } Chris@36: Chris@36: void Chris@36: EM::expectation(const V &column) Chris@36: { Chris@36: cerr << "."; Chris@36: Chris@36: for (int i = 0; i < m_bins; ++i) { Chris@36: m_estimate[i] = epsilon; Chris@36: } Chris@36: Chris@36: for (int i = 0; i < m_instruments; ++i) { Chris@36: for (int n = 0; n < m_notes; ++n) { Chris@36: float *w = silvet_templates[i].data[n]; Chris@36: double pitch = m_pitches[n]; Chris@36: double source = m_sources[i][n]; Chris@36: for (int j = 0; j < m_bins; ++j) { Chris@36: m_estimate[j] += w[j] * pitch * source; Chris@36: } Chris@36: } Chris@36: } Chris@36: Chris@36: for (int i = 0; i < m_bins; ++i) { Chris@36: m_q[i] = column[i] / m_estimate[i]; Chris@36: } Chris@36: } Chris@36: Chris@36: void Chris@36: EM::maximisation(const V &column) Chris@36: { Chris@36: V newPitches = m_pitches; Chris@36: Chris@36: for (int n = 0; n < m_notes; ++n) { Chris@36: newPitches[n] = epsilon; Chris@36: if (n >= m_lowest && n <= m_highest) { Chris@36: for (int i = 0; i < m_instruments; ++i) { Chris@36: float *w = silvet_templates[i].data[n]; Chris@36: double pitch = m_pitches[n]; Chris@36: double source = m_sources[i][n]; Chris@36: for (int j = 0; j < m_bins; ++j) { Chris@36: newPitches[n] += w[j] * m_q[j] * pitch * source; Chris@36: } Chris@36: } Chris@36: } Chris@42: if (m_pitchSparsity != 1.0) { Chris@42: newPitches[n] = pow(newPitches[n], m_pitchSparsity); Chris@42: } Chris@36: } Chris@36: normalise(newPitches); Chris@36: Chris@36: Grid newSources = m_sources; Chris@36: Chris@36: for (int i = 0; i < m_instruments; ++i) { Chris@36: for (int n = 0; n < m_notes; ++n) { Chris@36: newSources[i][n] = epsilon; Chris@36: if (inRange(i, n)) { Chris@36: float *w = silvet_templates[i].data[n]; Chris@38: double pitch = m_pitches[n]; Chris@38: double source = m_sources[i][n]; Chris@36: for (int j = 0; j < m_bins; ++j) { Chris@38: newSources[i][n] += w[j] * m_q[j] * pitch * source; Chris@36: } Chris@36: } Chris@42: if (m_sourceSparsity != 1.0) { Chris@42: newSources[i][n] = pow(newSources[i][n], m_sourceSparsity); Chris@42: } Chris@36: } Chris@36: normalise(newSources[i]); Chris@36: } Chris@36: Chris@36: m_pitches = newPitches; Chris@36: m_sources = newSources; Chris@36: } Chris@36: Chris@36: void Chris@36: EM::report() Chris@36: { Chris@36: vector sounding; Chris@36: for (int n = 0; n < m_notes; ++n) { Chris@36: if (m_pitches[n] > 0.05) { Chris@36: sounding.push_back(n); Chris@36: } Chris@36: } Chris@36: cerr << " sounding: "; Chris@36: for (int i = 0; i < (int)sounding.size(); ++i) { Chris@36: cerr << sounding[i] << " "; Chris@37: int maxj = -1; Chris@37: double maxs = 0.0; Chris@37: for (int j = 0; j < m_instruments; ++j) { Chris@37: if (j == 0 || m_sources[j][sounding[i]] > maxs) { Chris@37: maxj = j; Chris@37: maxs = m_sources[j][sounding[i]]; Chris@37: } Chris@37: } Chris@37: cerr << silvet_templates[maxj].name << " "; Chris@36: } Chris@36: cerr << endl; Chris@36: } Chris@36: