view src/EM.cpp @ 53:9e2d1f6cd43a

Fix incorrect normalisation of source activation matrix
author Chris Cannam
date Mon, 07 Apr 2014 17:34:19 +0100
parents 9b17bbd16a5f
children a54df67e607e
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

/*
  Silvet

  A Vamp plugin for note transcription.
  Centre for Digital Music, Queen Mary University of London.
    
  This program is free software; you can redistribute it and/or
  modify it under the terms of the GNU General Public License as
  published by the Free Software Foundation; either version 2 of the
  License, or (at your option) any later version.  See the file
  COPYING included with this distribution for more information.
*/

#include "EM.h"

#include "data/include/templates.h"

#include <cstdlib>
#include <cmath>

#include <iostream>

#include <vector>

using std::vector;
using std::cerr;
using std::endl;

static double epsilon = 1e-16;

EM::EM() :
    m_notes(SILVET_TEMPLATE_NOTE_COUNT),
    m_bins(SILVET_TEMPLATE_HEIGHT),
    m_instruments(SILVET_TEMPLATE_COUNT),
    m_pitchSparsity(1.1),
    m_sourceSparsity(1.3)
{
    m_lowest = silvet_templates_lowest_note;
    m_highest = silvet_templates_highest_note;

    m_pitches = V(m_notes);

    for (int n = 0; n < m_notes; ++n) {
        m_pitches[n] = drand48();
    }
    
    m_sources = Grid(m_instruments);
    for (int i = 0; i < m_instruments; ++i) {
        m_sources[i] = V(m_notes);
        for (int n = 0; n < m_notes; ++n) {
            m_sources[i][n] = (inRange(i, n) ? 1.0 : 0.0);
        }
    }

    m_estimate = V(m_bins);
    m_q = V(m_bins);
}

EM::~EM()
{
}

bool
EM::inRange(int instrument, int note)
{
    return (note >= silvet_templates[instrument].lowest &&
            note <= silvet_templates[instrument].highest);
}

void
EM::normalise(V &column)
{
    double sum = 0.0;
    for (int i = 0; i < (int)column.size(); ++i) {
        sum += column[i];
    }
    for (int i = 0; i < (int)column.size(); ++i) {
        column[i] /= sum;
    }
}

void
EM::normaliseSources(Grid &sources)
{
    V denominators(sources[0].size());

    for (int i = 0; i < (int)sources.size(); ++i) {
        for (int j = 0; j < (int)sources[i].size(); ++j) {
            denominators[j] += sources[i][j];
        }
    }

    for (int i = 0; i < (int)sources.size(); ++i) {
        for (int j = 0; j < (int)sources[i].size(); ++j) {
            sources[i][j] /= denominators[j];
        }
    }
}

void
EM::iterate(V column)
{
    normalise(column);
    expectation(column);
    maximisation(column);
}

void
EM::expectation(const V &column)
{
    cerr << ".";

    for (int i = 0; i < m_bins; ++i) {
        m_estimate[i] = epsilon;
    }

    for (int i = 0; i < m_instruments; ++i) {
        for (int n = 0; n < m_notes; ++n) {
            float *w = silvet_templates[i].data[n];
            double pitch = m_pitches[n];
            double source = m_sources[i][n];
            for (int j = 0; j < m_bins; ++j) {
                m_estimate[j] += w[j] * pitch * source;
            }
        }
    }

    for (int i = 0; i < m_bins; ++i) {
        m_q[i] = column[i] / m_estimate[i];
    }
}

void
EM::maximisation(const V &column)
{
    V newPitches = m_pitches;

    for (int n = 0; n < m_notes; ++n) {
        newPitches[n] = epsilon;
        if (n >= m_lowest && n <= m_highest) {
            for (int i = 0; i < m_instruments; ++i) {
                float *w = silvet_templates[i].data[n];
                double pitch = m_pitches[n];
                double source = m_sources[i][n];
                for (int j = 0; j < m_bins; ++j) {
                    newPitches[n] += w[j] * m_q[j] * pitch * source;
                }
            }
        }
        if (m_pitchSparsity != 1.0) {
            newPitches[n] = pow(newPitches[n], m_pitchSparsity);
        }
    }
    normalise(newPitches);

    Grid newSources = m_sources;

    for (int i = 0; i < m_instruments; ++i) {
        for (int n = 0; n < m_notes; ++n) {
            newSources[i][n] = epsilon;
            if (inRange(i, n)) {
                float *w = silvet_templates[i].data[n];
                double pitch = m_pitches[n];
                double source = m_sources[i][n];
                for (int j = 0; j < m_bins; ++j) {
                    newSources[i][n] += w[j] * m_q[j] * pitch * source;
                }
            }
            if (m_sourceSparsity != 1.0) {
                newSources[i][n] = pow(newSources[i][n], m_sourceSparsity);
            }
        }
    }
    normaliseSources(newSources);

    m_pitches = newPitches;
    m_sources = newSources;
}

void
EM::report()
{
    vector<int> sounding;
    for (int n = 0; n < m_notes; ++n) {
        if (m_pitches[n] > 0.05) {
            sounding.push_back(n);
        }
    }
    cerr << " sounding: ";
    for (int i = 0; i < (int)sounding.size(); ++i) {
        cerr << sounding[i] << " ";
        int maxj = -1;
        double maxs = 0.0;
        for (int j = 0; j < m_instruments; ++j) {
            if (j == 0 || m_sources[j][sounding[i]] > maxs) {
                maxj = j;
                maxs = m_sources[j][sounding[i]];
            }
        }
        cerr << silvet_templates[maxj].name << " ";
    }
    cerr << endl;
}