Chris@34: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@34: Chris@34: /* Chris@34: Silvet Chris@34: Chris@34: A Vamp plugin for note transcription. Chris@34: Centre for Digital Music, Queen Mary University of London. Chris@34: Chris@34: This program is free software; you can redistribute it and/or Chris@34: modify it under the terms of the GNU General Public License as Chris@34: published by the Free Software Foundation; either version 2 of the Chris@34: License, or (at your option) any later version. See the file Chris@34: COPYING included with this distribution for more information. Chris@34: */ Chris@34: Chris@34: #include "EM.h" Chris@34: Chris@34: #include "data/include/templates.h" Chris@34: Chris@36: #include Chris@42: #include Chris@36: Chris@36: #include Chris@36: Chris@91: #include "VectorOps.h" Chris@91: #include "Allocators.h" Chris@36: Chris@36: using std::vector; Chris@36: using std::cerr; Chris@36: using std::endl; Chris@36: Chris@91: using namespace breakfastquay; Chris@91: Chris@35: static double epsilon = 1e-16; Chris@35: Chris@35: EM::EM() : Chris@45: m_noteCount(SILVET_TEMPLATE_NOTE_COUNT), Chris@45: m_shiftCount(SILVET_TEMPLATE_MAX_SHIFT * 2 + 1), Chris@45: m_binCount(SILVET_TEMPLATE_HEIGHT), Chris@91: m_sourceCount(SILVET_TEMPLATE_COUNT), Chris@42: m_pitchSparsity(1.1), Chris@83: m_sourceSparsity(1.3), Chris@83: m_lowestPitch(silvet_templates_lowest_note), Chris@83: m_highestPitch(silvet_templates_highest_note) Chris@35: { Chris@91: m_pitches = allocate(m_noteCount); Chris@55: for (int n = 0; n < m_noteCount; ++n) { Chris@55: m_pitches[n] = drand48(); Chris@55: } Chris@35: Chris@91: m_shifts = allocate_channels(m_shiftCount, m_noteCount); Chris@55: for (int f = 0; f < m_shiftCount; ++f) { Chris@55: for (int n = 0; n < m_noteCount; ++n) { Chris@55: m_shifts[f][n] = drand48(); Chris@55: } Chris@35: } Chris@35: Chris@91: m_sources = allocate_channels(m_sourceCount, m_noteCount); Chris@91: for (int i = 0; i < m_sourceCount; ++i) { Chris@55: for (int n = 0; n < m_noteCount; ++n) { Chris@35: m_sources[i][n] = (inRange(i, n) ? 1.0 : 0.0); Chris@35: } Chris@35: } Chris@35: Chris@91: m_estimate = allocate(m_binCount); Chris@91: m_q = allocate(m_binCount); Chris@35: } Chris@35: Chris@35: EM::~EM() Chris@35: { Chris@92: deallocate(m_q); Chris@92: deallocate(m_estimate); Chris@92: deallocate_channels(m_sources, m_sourceCount); Chris@92: deallocate_channels(m_shifts, m_shiftCount); Chris@92: deallocate(m_pitches); Chris@35: } Chris@35: Chris@45: void Chris@45: EM::rangeFor(int instrument, int &minPitch, int &maxPitch) Chris@45: { Chris@55: minPitch = silvet_templates[instrument].lowest; Chris@55: maxPitch = silvet_templates[instrument].highest; Chris@45: } Chris@45: Chris@35: bool Chris@45: EM::inRange(int instrument, int pitch) Chris@35: { Chris@45: int minPitch, maxPitch; Chris@45: rangeFor(instrument, minPitch, maxPitch); Chris@45: return (pitch >= minPitch && pitch <= maxPitch); Chris@35: } Chris@35: Chris@36: void Chris@92: EM::normaliseColumn(double *column, int size) Chris@36: { Chris@92: double sum = v_sum(column, size); Chris@92: v_scale(column, 1.0 / sum, size); Chris@36: } Chris@36: Chris@36: void Chris@92: EM::normaliseGrid(double **grid, int size1, int size2) Chris@53: { Chris@92: double *denominators = allocate_and_zero(size2); Chris@53: Chris@92: for (int i = 0; i < size1; ++i) { Chris@92: for (int j = 0; j < size2; ++j) { Chris@55: denominators[j] += grid[i][j]; Chris@53: } Chris@53: } Chris@53: Chris@92: for (int i = 0; i < size1; ++i) { Chris@92: v_divide(grid[i], denominators, size2); Chris@53: } Chris@92: Chris@92: deallocate(denominators); Chris@53: } Chris@53: Chris@53: void Chris@92: EM::iterate(const double *column) Chris@36: { Chris@92: double *norm = allocate(m_binCount); Chris@92: v_copy(norm, column, m_binCount); Chris@92: normaliseColumn(norm, m_binCount); Chris@92: expectation(norm); Chris@92: maximisation(norm); Chris@36: } Chris@36: Chris@88: const double * Chris@55: EM::templateFor(int instrument, int note, int shift) Chris@45: { Chris@45: return silvet_templates[instrument].data[note] + shift; Chris@45: } Chris@45: Chris@36: void Chris@92: EM::expectation(const double *column) Chris@36: { Chris@62: // cerr << "."; Chris@36: Chris@45: for (int i = 0; i < m_binCount; ++i) { Chris@36: m_estimate[i] = epsilon; Chris@36: } Chris@36: Chris@91: for (int i = 0; i < m_sourceCount; ++i) { Chris@55: for (int n = 0; n < m_noteCount; ++n) { Chris@83: const double pitch = m_pitches[n]; Chris@83: const double source = m_sources[i][n]; Chris@55: for (int f = 0; f < m_shiftCount; ++f) { Chris@88: const double *w = templateFor(i, n, f); Chris@83: const double shift = m_shifts[f][n]; Chris@83: const double factor = pitch * source * shift; Chris@55: for (int j = 0; j < m_binCount; ++j) { Chris@83: m_estimate[j] += w[j] * factor; Chris@55: } Chris@36: } Chris@36: } Chris@36: } Chris@36: Chris@45: for (int i = 0; i < m_binCount; ++i) { Chris@36: m_q[i] = column[i] / m_estimate[i]; Chris@36: } Chris@36: } Chris@36: Chris@36: void Chris@92: EM::maximisation(const double *column) Chris@36: { Chris@92: double *newPitches = allocate(m_noteCount); Chris@92: v_set(newPitches, epsilon, m_noteCount); Chris@92: Chris@92: double **newShifts = allocate_channels(m_shiftCount, m_noteCount); Chris@92: for (int i = 0; i < m_shiftCount; ++i) { Chris@92: v_set(newShifts[i], epsilon, m_noteCount); Chris@92: } Chris@92: Chris@92: double **newSources = allocate_channels(m_sourceCount, m_noteCount); Chris@92: for (int i = 0; i < m_sourceCount; ++i) { Chris@92: v_set(newSources[i], epsilon, m_noteCount); Chris@92: } Chris@36: Chris@55: for (int n = 0; n < m_noteCount; ++n) { Chris@85: Chris@85: const double pitch = m_pitches[n]; Chris@85: Chris@85: for (int f = 0; f < m_shiftCount; ++f) { Chris@85: Chris@85: const double shift = m_shifts[f][n]; Chris@85: Chris@91: for (int i = 0; i < m_sourceCount; ++i) { Chris@85: Chris@83: const double source = m_sources[i][n]; Chris@89: const double factor = pitch * source * shift; Chris@88: const double *w = templateFor(i, n, f); Chris@85: Chris@86: if (n >= m_lowestPitch && n <= m_highestPitch) { Chris@85: Chris@86: for (int j = 0; j < m_binCount; ++j) { Chris@86: newPitches[n] += w[j] * m_q[j] * factor; Chris@85: } Chris@85: Chris@85: if (inRange(i, n)) { Chris@86: for (int j = 0; j < m_binCount; ++j) { Chris@86: newSources[i][n] += w[j] * m_q[j] * factor; Chris@86: } Chris@55: } Chris@36: } Chris@86: Chris@86: for (int j = 0; j < m_binCount; ++j) { Chris@86: newShifts[f][n] += w[j] * m_q[j] * factor; Chris@86: } Chris@36: } Chris@36: } Chris@85: } Chris@85: Chris@85: for (int n = 0; n < m_noteCount; ++n) { Chris@42: if (m_pitchSparsity != 1.0) { Chris@42: newPitches[n] = pow(newPitches[n], m_pitchSparsity); Chris@42: } Chris@85: if (m_sourceSparsity != 1.0) { Chris@91: for (int i = 0; i < m_sourceCount; ++i) { Chris@42: newSources[i][n] = pow(newSources[i][n], m_sourceSparsity); Chris@42: } Chris@36: } Chris@36: } Chris@85: Chris@92: normaliseColumn(newPitches, m_noteCount); Chris@92: normaliseGrid(newShifts, m_shiftCount, m_noteCount); Chris@92: normaliseGrid(newSources, m_sourceCount, m_noteCount); Chris@92: Chris@92: deallocate(m_pitches); Chris@92: deallocate_channels(m_shifts, m_shiftCount); Chris@92: deallocate_channels(m_sources, m_sourceCount); Chris@36: Chris@36: m_pitches = newPitches; Chris@55: m_shifts = newShifts; Chris@36: m_sources = newSources; Chris@36: } Chris@36: Chris@36: