Chris@9
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@9
|
2
|
Chris@9
|
3 /*
|
Chris@9
|
4 pYIN - A fundamental frequency estimator for monophonic audio
|
Chris@9
|
5 Centre for Digital Music, Queen Mary, University of London.
|
Chris@9
|
6
|
Chris@9
|
7 This program is free software; you can redistribute it and/or
|
Chris@9
|
8 modify it under the terms of the GNU General Public License as
|
Chris@9
|
9 published by the Free Software Foundation; either version 2 of the
|
Chris@9
|
10 License, or (at your option) any later version. See the file
|
Chris@9
|
11 COPYING included with this distribution for more information.
|
Chris@9
|
12 */
|
Chris@9
|
13
|
matthiasm@0
|
14 #include "MonoNoteHMM.h"
|
matthiasm@0
|
15
|
matthiasm@0
|
16 #include <boost/math/distributions.hpp>
|
matthiasm@0
|
17
|
matthiasm@0
|
18 #include <cstdio>
|
matthiasm@0
|
19 #include <cmath>
|
matthiasm@0
|
20
|
matthiasm@0
|
21 using std::vector;
|
matthiasm@0
|
22 using std::pair;
|
matthiasm@0
|
23
|
matthiasm@0
|
24 MonoNoteHMM::MonoNoteHMM() :
|
matthiasm@0
|
25 par()
|
matthiasm@0
|
26 {
|
matthiasm@0
|
27 build();
|
matthiasm@0
|
28 }
|
matthiasm@0
|
29
|
matthiasm@0
|
30 const vector<double>
|
matthiasm@0
|
31 MonoNoteHMM::calculateObsProb(const vector<pair<double, double> > pitchProb)
|
matthiasm@0
|
32 {
|
matthiasm@0
|
33 // pitchProb is a list of pairs (pitches and their probabilities)
|
matthiasm@0
|
34
|
matthiasm@0
|
35 size_t nCandidate = pitchProb.size();
|
matthiasm@0
|
36
|
matthiasm@0
|
37 // what is the probability of pitched
|
matthiasm@0
|
38 double pIsPitched = 0;
|
matthiasm@0
|
39 for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate)
|
matthiasm@0
|
40 {
|
matthiasm@0
|
41 // pIsPitched = pitchProb[iCandidate].second > pIsPitched ? pitchProb[iCandidate].second : pIsPitched;
|
matthiasm@0
|
42 pIsPitched += pitchProb[iCandidate].second;
|
matthiasm@0
|
43 }
|
matthiasm@0
|
44
|
matthiasm@0
|
45 // pIsPitched = std::pow(pIsPitched, (1-par.priorWeight)) * std::pow(par.priorPitchedProb, par.priorWeight);
|
matthiasm@0
|
46 pIsPitched = pIsPitched * (1-par.priorWeight) + par.priorPitchedProb * par.priorWeight;
|
matthiasm@0
|
47
|
matthiasm@0
|
48 vector<double> out = vector<double>(par.n);
|
matthiasm@0
|
49 double tempProbSum = 0;
|
matthiasm@0
|
50 for (size_t i = 0; i < par.n; ++i)
|
matthiasm@0
|
51 {
|
matthiasm@0
|
52 if (i % 4 != 2)
|
matthiasm@0
|
53 {
|
matthiasm@0
|
54 // std::cerr << getMidiPitch(i) << std::endl;
|
matthiasm@0
|
55 double tempProb = 0;
|
matthiasm@0
|
56 if (nCandidate > 0)
|
matthiasm@0
|
57 {
|
matthiasm@0
|
58 double minDist = 10000.0;
|
matthiasm@0
|
59 double minDistProb = 0;
|
matthiasm@0
|
60 size_t minDistCandidate = 0;
|
matthiasm@0
|
61 for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate)
|
matthiasm@0
|
62 {
|
matthiasm@0
|
63 double currDist = std::abs(getMidiPitch(i)-pitchProb[iCandidate].first);
|
matthiasm@0
|
64 if (currDist < minDist)
|
matthiasm@0
|
65 {
|
matthiasm@0
|
66 minDist = currDist;
|
matthiasm@0
|
67 minDistProb = pitchProb[iCandidate].second;
|
matthiasm@0
|
68 minDistCandidate = iCandidate;
|
matthiasm@0
|
69 }
|
matthiasm@0
|
70 }
|
matthiasm@0
|
71 tempProb = std::pow(minDistProb, par.yinTrust) * boost::math::pdf(pitchDistr[i], pitchProb[minDistCandidate].first);
|
matthiasm@0
|
72 } else {
|
matthiasm@0
|
73 tempProb = 1;
|
matthiasm@0
|
74 }
|
matthiasm@0
|
75 tempProbSum += tempProb;
|
matthiasm@0
|
76 out[i] = tempProb;
|
matthiasm@0
|
77 }
|
matthiasm@0
|
78 }
|
matthiasm@0
|
79
|
matthiasm@0
|
80 for (size_t i = 0; i < par.n; ++i)
|
matthiasm@0
|
81 {
|
matthiasm@0
|
82 if (i % 4 != 2)
|
matthiasm@0
|
83 {
|
matthiasm@0
|
84 if (tempProbSum > 0)
|
matthiasm@0
|
85 {
|
matthiasm@0
|
86 out[i] = out[i] / tempProbSum * pIsPitched;
|
matthiasm@0
|
87 }
|
matthiasm@0
|
88 } else {
|
matthiasm@0
|
89 out[i] = (1-pIsPitched) / (par.nPPS * par.nS);
|
matthiasm@0
|
90 }
|
matthiasm@0
|
91 }
|
matthiasm@0
|
92
|
matthiasm@0
|
93 return(out);
|
matthiasm@0
|
94 }
|
matthiasm@0
|
95
|
matthiasm@0
|
96 void
|
matthiasm@0
|
97 MonoNoteHMM::build()
|
matthiasm@0
|
98 {
|
matthiasm@0
|
99 // the states are organised as follows:
|
matthiasm@0
|
100 // 0-2. lowest pitch
|
matthiasm@0
|
101 // 0. attack state
|
matthiasm@0
|
102 // 1. stable state
|
matthiasm@0
|
103 // 2. silent state
|
matthiasm@0
|
104 // 3. inter state
|
matthiasm@0
|
105 // 4-6. second-lowest pitch
|
matthiasm@0
|
106 // 4. attack state
|
matthiasm@0
|
107 // ...
|
matthiasm@0
|
108
|
matthiasm@0
|
109 // observation distributions
|
matthiasm@0
|
110 for (size_t iState = 0; iState < par.n; ++iState)
|
matthiasm@0
|
111 {
|
matthiasm@0
|
112 pitchDistr.push_back(boost::math::normal(0,1));
|
matthiasm@0
|
113 if (iState % 4 == 2)
|
matthiasm@0
|
114 {
|
matthiasm@0
|
115 // silent state starts tracking
|
matthiasm@0
|
116 init.push_back(1.0/(par.nS * par.nPPS));
|
matthiasm@0
|
117 } else {
|
matthiasm@0
|
118 init.push_back(0.0);
|
matthiasm@0
|
119 }
|
matthiasm@0
|
120 }
|
matthiasm@0
|
121
|
matthiasm@0
|
122 for (size_t iPitch = 0; iPitch < (par.nS * par.nPPS); ++iPitch)
|
matthiasm@0
|
123 {
|
matthiasm@0
|
124 size_t index = iPitch * par.nSPP;
|
matthiasm@0
|
125 double mu = par.minPitch + iPitch * 1.0/par.nPPS;
|
matthiasm@0
|
126 pitchDistr[index] = boost::math::normal(mu, par.sigmaYinPitchAttack);
|
matthiasm@0
|
127 pitchDistr[index+1] = boost::math::normal(mu, par.sigmaYinPitchStable);
|
matthiasm@0
|
128 pitchDistr[index+2] = boost::math::normal(mu, 1.0); // dummy
|
matthiasm@0
|
129 pitchDistr[index+3] = boost::math::normal(mu, par.sigmaYinPitchInter);
|
matthiasm@0
|
130 }
|
matthiasm@0
|
131
|
matthiasm@0
|
132 boost::math::normal noteDistanceDistr(0, par.sigma2Note);
|
matthiasm@0
|
133
|
matthiasm@0
|
134 for (size_t iPitch = 0; iPitch < (par.nS * par.nPPS); ++iPitch)
|
matthiasm@0
|
135 {
|
matthiasm@0
|
136 // loop through all notes and set sparse transition probabilities
|
matthiasm@0
|
137 size_t index = iPitch * par.nSPP;
|
matthiasm@0
|
138
|
matthiasm@0
|
139 // transitions from attack state
|
matthiasm@0
|
140 from.push_back(index);
|
matthiasm@0
|
141 to.push_back(index);
|
matthiasm@0
|
142 transProb.push_back(par.pAttackSelftrans);
|
matthiasm@0
|
143
|
matthiasm@0
|
144 from.push_back(index);
|
matthiasm@0
|
145 to.push_back(index+1);
|
matthiasm@0
|
146 transProb.push_back(1-par.pAttackSelftrans);
|
matthiasm@0
|
147
|
matthiasm@0
|
148 // transitions from stable state
|
matthiasm@0
|
149 from.push_back(index+1);
|
matthiasm@0
|
150 to.push_back(index+1); // to itself
|
matthiasm@0
|
151 transProb.push_back(par.pStableSelftrans);
|
matthiasm@0
|
152
|
matthiasm@0
|
153 from.push_back(index+1);
|
matthiasm@0
|
154 to.push_back(index+2); // to silent
|
matthiasm@0
|
155 transProb.push_back(par.pStable2Silent);
|
matthiasm@0
|
156
|
matthiasm@0
|
157 from.push_back(index+1);
|
matthiasm@0
|
158 to.push_back(index+3); // to inter-note
|
matthiasm@0
|
159 transProb.push_back(1-par.pStableSelftrans-par.pStable2Silent);
|
matthiasm@0
|
160
|
matthiasm@0
|
161 // the "easy" transitions from silent state
|
matthiasm@0
|
162 from.push_back(index+2);
|
matthiasm@0
|
163 to.push_back(index+2);
|
matthiasm@0
|
164 transProb.push_back(par.pSilentSelftrans);
|
matthiasm@0
|
165
|
matthiasm@0
|
166 // the "easy" inter state transition
|
matthiasm@0
|
167 from.push_back(index+3);
|
matthiasm@0
|
168 to.push_back(index+3);
|
matthiasm@0
|
169 transProb.push_back(par.pInterSelftrans);
|
matthiasm@0
|
170
|
matthiasm@0
|
171 // the more complicated transitions from the silent and inter state
|
matthiasm@0
|
172 double probSumSilent = 0;
|
matthiasm@0
|
173 double probSumInter = 0;
|
matthiasm@0
|
174 vector<double> tempTransProbInter;
|
matthiasm@0
|
175 vector<double> tempTransProbSilent;
|
matthiasm@0
|
176 for (size_t jPitch = 0; jPitch < (par.nS * par.nPPS); ++jPitch)
|
matthiasm@0
|
177 {
|
matthiasm@0
|
178 int fromPitch = iPitch;
|
matthiasm@0
|
179 int toPitch = jPitch;
|
matthiasm@0
|
180 double semitoneDistance = std::abs(fromPitch - toPitch) * 1.0 / par.nPPS;
|
matthiasm@0
|
181
|
matthiasm@0
|
182 // if (std::fmod(semitoneDistance, 1) == 0 && semitoneDistance > par.minSemitoneDistance)
|
matthiasm@0
|
183 if (semitoneDistance == 0 || (semitoneDistance > par.minSemitoneDistance && semitoneDistance < par.maxJump))
|
matthiasm@0
|
184 {
|
matthiasm@0
|
185 size_t toIndex = jPitch * par.nSPP; // note attack index
|
matthiasm@0
|
186
|
matthiasm@0
|
187 double tempWeightSilent = boost::math::pdf(noteDistanceDistr, semitoneDistance);
|
matthiasm@0
|
188 double tempWeightInter = semitoneDistance == 0 ? 0 : tempWeightSilent;
|
matthiasm@0
|
189 probSumSilent += tempWeightSilent;
|
matthiasm@0
|
190 probSumInter += tempWeightInter;
|
matthiasm@0
|
191
|
matthiasm@0
|
192 tempTransProbSilent.push_back(tempWeightSilent);
|
matthiasm@0
|
193 tempTransProbInter.push_back(tempWeightInter);
|
matthiasm@0
|
194
|
matthiasm@0
|
195 from.push_back(index+2);
|
matthiasm@0
|
196 to.push_back(toIndex);
|
matthiasm@0
|
197 from.push_back(index+3);
|
matthiasm@0
|
198 to.push_back(toIndex);
|
matthiasm@0
|
199 }
|
matthiasm@0
|
200 }
|
matthiasm@0
|
201 for (size_t i = 0; i < tempTransProbSilent.size(); ++i)
|
matthiasm@0
|
202 {
|
matthiasm@0
|
203 transProb.push_back((1-par.pSilentSelftrans) * tempTransProbSilent[i]/probSumSilent);
|
matthiasm@0
|
204 transProb.push_back((1-par.pInterSelftrans) * tempTransProbInter[i]/probSumInter);
|
matthiasm@0
|
205 }
|
matthiasm@0
|
206 }
|
matthiasm@0
|
207 }
|
matthiasm@0
|
208
|
matthiasm@0
|
209 double
|
matthiasm@0
|
210 MonoNoteHMM::getMidiPitch(size_t index)
|
matthiasm@0
|
211 {
|
matthiasm@0
|
212 return pitchDistr[index].mean();
|
matthiasm@0
|
213 }
|
matthiasm@0
|
214
|
matthiasm@0
|
215 double
|
matthiasm@0
|
216 MonoNoteHMM::getFrequency(size_t index)
|
matthiasm@0
|
217 {
|
matthiasm@0
|
218 return 440 * pow(2, (pitchDistr[index].mean()-69)/12);
|
matthiasm@0
|
219 }
|