view MonoNoteHMM.cpp @ 164:a7d9c6142f8f tip

Added tag v1.2 for changeset 4a97f7638ffd
author Chris Cannam
date Thu, 06 Feb 2020 15:02:47 +0000
parents 3faac48d491d
children
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

/*
    pYIN - A fundamental frequency estimator for monophonic audio
    Centre for Digital Music, Queen Mary, University of London.
    
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
*/

#include "MonoNoteHMM.h"

#include <boost/math/distributions.hpp>

#include <cstdio>
#include <cmath>

using std::vector;
using std::pair;

MonoNoteHMM::MonoNoteHMM(int fixedLag) :
    SparseHMM(fixedLag),
    par()
{
    build();
}

vector<double>
MonoNoteHMM::calculateObsProb(const vector<pair<double, double> > &pitchProb)
{
    // pitchProb is a list of pairs (pitches and their probabilities)
    
    size_t nCandidate = pitchProb.size();
    
    // what is the probability of pitched
    double pIsPitched = 0;
    for (size_t iCand = 0; iCand < nCandidate; ++iCand)
    {
        pIsPitched += pitchProb[iCand].second;
    }

    pIsPitched = pIsPitched * (1-par.priorWeight) + 
                     par.priorPitchedProb * par.priorWeight;

    vector<double> out = vector<double>(par.n);
    double tempProbSum = 0;
    for (size_t i = 0; i < par.n; ++i)
    {
        if (i % par.nSPP != 2)
        {
            // std::cerr << getMidiPitch(i) << std::endl;
            double tempProb = 0;
            if (nCandidate > 0)
            {
                double minDist = 10000.0;
                double minDistProb = 0;
                size_t minDistCandidate = 0;
                for (size_t iCand = 0; iCand < nCandidate; ++iCand)
                {
                    double currDist = std::abs(getMidiPitch(i)-
                                               pitchProb[iCand].first);
                    if (currDist < minDist)
                    {
                        minDist = currDist;
                        minDistProb = pitchProb[iCand].second;
                        minDistCandidate = iCand;
                    }
                }
                tempProb = std::pow(minDistProb, par.yinTrust) * 
                           boost::math::pdf(pitchDistr[i], 
                                            pitchProb[minDistCandidate].first);
            } else {
                tempProb = 1;
            }
            tempProbSum += tempProb;
            out[i] = tempProb;
        }
    }
    
    for (size_t i = 0; i < par.n; ++i)
    {
        if (i % par.nSPP != 2)
        {
            if (tempProbSum > 0) 
            {
                out[i] = out[i] / tempProbSum * pIsPitched;
            }
        } else {
            out[i] = (1-pIsPitched) / (par.nPPS * par.nS);
        }
    }
    
    return(out);
}

void
MonoNoteHMM::build()
{
    // the states are organised as follows:
    // 0-2. lowest pitch
    //    0. attack state
    //    1. stable state
    //    2. silent state
    // 3-5. second-lowest pitch
    //    3. attack state
    //    ...
    
    m_nState = par.n;

    // observation distributions
    for (size_t iState = 0; iState < par.n; ++iState)
    {
        pitchDistr.push_back(boost::math::normal(0,1));
        if (iState % par.nSPP == 2)
        {
            // silent state starts tracking
            m_init.push_back(1.0/(par.nS * par.nPPS));
        } else {
            m_init.push_back(0.0);            
        }
    }

    for (size_t iPitch = 0; iPitch < (par.nS * par.nPPS); ++iPitch)
    {
        size_t index = iPitch * par.nSPP;
        double mu = par.minPitch + iPitch * 1.0/par.nPPS;
        pitchDistr[index] = boost::math::normal(mu, par.sigmaYinPitchAttack);
        pitchDistr[index+1] = boost::math::normal(mu, par.sigmaYinPitchStable);
        pitchDistr[index+2] = boost::math::normal(mu, 1.0); // dummy
    }
    
    boost::math::normal noteDistanceDistr(0, par.sigma2Note);

    for (size_t iPitch = 0; iPitch < (par.nS * par.nPPS); ++iPitch)
    {
        // loop through all notes and set sparse transition probabilities
        size_t index = iPitch * par.nSPP;

        // transitions from attack state
        m_from.push_back(index);
        m_to.push_back(index);
        m_transProb.push_back(par.pAttackSelftrans);

        m_from.push_back(index);
        m_to.push_back(index+1);
        m_transProb.push_back(1-par.pAttackSelftrans);

        // transitions from stable state
        m_from.push_back(index+1);
        m_to.push_back(index+1); // to itself
        m_transProb.push_back(par.pStableSelftrans);
        
        m_from.push_back(index+1);
        m_to.push_back(index+2); // to silent
        m_transProb.push_back(par.pStable2Silent);

        // the "easy" transitions from silent state
        m_from.push_back(index+2);
        m_to.push_back(index+2);
        m_transProb.push_back(par.pSilentSelftrans);
        
        
        // the more complicated transitions from the silent
        double probSumSilent = 0;

        vector<double> tempTransProbSilent;
        for (size_t jPitch = 0; jPitch < (par.nS * par.nPPS); ++jPitch)
        {
            int fromPitch = iPitch;
            int toPitch = jPitch;
            double semitoneDistance = 
                std::abs(fromPitch - toPitch) * 1.0 / par.nPPS;
            

            if (semitoneDistance == 0 || 
                (semitoneDistance > par.minSemitoneDistance 
                 && semitoneDistance < par.maxJump))
            {
                size_t toIndex = jPitch * par.nSPP; // note attack index

                double tempWeightSilent = boost::math::pdf(noteDistanceDistr, 
                                                           semitoneDistance);
                probSumSilent += tempWeightSilent;

                tempTransProbSilent.push_back(tempWeightSilent);

                m_from.push_back(index+2);
                m_to.push_back(toIndex);
            }
        }
        for (size_t i = 0; i < tempTransProbSilent.size(); ++i)
        {
            m_transProb.push_back((1-par.pSilentSelftrans) * 
                                  tempTransProbSilent[i]/probSumSilent);
        }
    }
    m_nTrans = m_transProb.size();
    m_delta = vector<double>(m_nState);
    m_oldDelta = vector<double>(m_nState);
}

double
MonoNoteHMM::getMidiPitch(size_t index)
{
    return pitchDistr[index].mean();
}

double
MonoNoteHMM::getFrequency(size_t index)
{
    return 440 * pow(2, (pitchDistr[index].mean()-69)/12);
}