Mercurial > hg > pyin
comparison MonoNoteHMM.cpp @ 0:99bac62ee2da
added PYIN sources, should be compileable
author | matthiasm |
---|---|
date | Wed, 27 Nov 2013 11:59:49 +0000 |
parents | |
children | 5945b8905d1f |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:99bac62ee2da |
---|---|
1 #include "MonoNoteHMM.h" | |
2 | |
3 #include <boost/math/distributions.hpp> | |
4 | |
5 #include <cstdio> | |
6 #include <cmath> | |
7 | |
8 using std::vector; | |
9 using std::pair; | |
10 | |
11 MonoNoteHMM::MonoNoteHMM() : | |
12 par() | |
13 { | |
14 build(); | |
15 } | |
16 | |
17 const vector<double> | |
18 MonoNoteHMM::calculateObsProb(const vector<pair<double, double> > pitchProb) | |
19 { | |
20 // pitchProb is a list of pairs (pitches and their probabilities) | |
21 | |
22 size_t nCandidate = pitchProb.size(); | |
23 | |
24 // what is the probability of pitched | |
25 double pIsPitched = 0; | |
26 for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate) | |
27 { | |
28 // pIsPitched = pitchProb[iCandidate].second > pIsPitched ? pitchProb[iCandidate].second : pIsPitched; | |
29 pIsPitched += pitchProb[iCandidate].second; | |
30 } | |
31 | |
32 // pIsPitched = std::pow(pIsPitched, (1-par.priorWeight)) * std::pow(par.priorPitchedProb, par.priorWeight); | |
33 pIsPitched = pIsPitched * (1-par.priorWeight) + par.priorPitchedProb * par.priorWeight; | |
34 | |
35 vector<double> out = vector<double>(par.n); | |
36 double tempProbSum = 0; | |
37 for (size_t i = 0; i < par.n; ++i) | |
38 { | |
39 if (i % 4 != 2) | |
40 { | |
41 // std::cerr << getMidiPitch(i) << std::endl; | |
42 double tempProb = 0; | |
43 if (nCandidate > 0) | |
44 { | |
45 double minDist = 10000.0; | |
46 double minDistProb = 0; | |
47 size_t minDistCandidate = 0; | |
48 for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate) | |
49 { | |
50 double currDist = std::abs(getMidiPitch(i)-pitchProb[iCandidate].first); | |
51 if (currDist < minDist) | |
52 { | |
53 minDist = currDist; | |
54 minDistProb = pitchProb[iCandidate].second; | |
55 minDistCandidate = iCandidate; | |
56 } | |
57 } | |
58 tempProb = std::pow(minDistProb, par.yinTrust) * boost::math::pdf(pitchDistr[i], pitchProb[minDistCandidate].first); | |
59 } else { | |
60 tempProb = 1; | |
61 } | |
62 tempProbSum += tempProb; | |
63 out[i] = tempProb; | |
64 } | |
65 } | |
66 | |
67 for (size_t i = 0; i < par.n; ++i) | |
68 { | |
69 if (i % 4 != 2) | |
70 { | |
71 if (tempProbSum > 0) | |
72 { | |
73 out[i] = out[i] / tempProbSum * pIsPitched; | |
74 } | |
75 } else { | |
76 out[i] = (1-pIsPitched) / (par.nPPS * par.nS); | |
77 } | |
78 } | |
79 | |
80 return(out); | |
81 } | |
82 | |
83 void | |
84 MonoNoteHMM::build() | |
85 { | |
86 // the states are organised as follows: | |
87 // 0-2. lowest pitch | |
88 // 0. attack state | |
89 // 1. stable state | |
90 // 2. silent state | |
91 // 3. inter state | |
92 // 4-6. second-lowest pitch | |
93 // 4. attack state | |
94 // ... | |
95 | |
96 // observation distributions | |
97 for (size_t iState = 0; iState < par.n; ++iState) | |
98 { | |
99 pitchDistr.push_back(boost::math::normal(0,1)); | |
100 if (iState % 4 == 2) | |
101 { | |
102 // silent state starts tracking | |
103 init.push_back(1.0/(par.nS * par.nPPS)); | |
104 } else { | |
105 init.push_back(0.0); | |
106 } | |
107 } | |
108 | |
109 for (size_t iPitch = 0; iPitch < (par.nS * par.nPPS); ++iPitch) | |
110 { | |
111 size_t index = iPitch * par.nSPP; | |
112 double mu = par.minPitch + iPitch * 1.0/par.nPPS; | |
113 pitchDistr[index] = boost::math::normal(mu, par.sigmaYinPitchAttack); | |
114 pitchDistr[index+1] = boost::math::normal(mu, par.sigmaYinPitchStable); | |
115 pitchDistr[index+2] = boost::math::normal(mu, 1.0); // dummy | |
116 pitchDistr[index+3] = boost::math::normal(mu, par.sigmaYinPitchInter); | |
117 } | |
118 | |
119 boost::math::normal noteDistanceDistr(0, par.sigma2Note); | |
120 | |
121 for (size_t iPitch = 0; iPitch < (par.nS * par.nPPS); ++iPitch) | |
122 { | |
123 // loop through all notes and set sparse transition probabilities | |
124 size_t index = iPitch * par.nSPP; | |
125 | |
126 // transitions from attack state | |
127 from.push_back(index); | |
128 to.push_back(index); | |
129 transProb.push_back(par.pAttackSelftrans); | |
130 | |
131 from.push_back(index); | |
132 to.push_back(index+1); | |
133 transProb.push_back(1-par.pAttackSelftrans); | |
134 | |
135 // transitions from stable state | |
136 from.push_back(index+1); | |
137 to.push_back(index+1); // to itself | |
138 transProb.push_back(par.pStableSelftrans); | |
139 | |
140 from.push_back(index+1); | |
141 to.push_back(index+2); // to silent | |
142 transProb.push_back(par.pStable2Silent); | |
143 | |
144 from.push_back(index+1); | |
145 to.push_back(index+3); // to inter-note | |
146 transProb.push_back(1-par.pStableSelftrans-par.pStable2Silent); | |
147 | |
148 // the "easy" transitions from silent state | |
149 from.push_back(index+2); | |
150 to.push_back(index+2); | |
151 transProb.push_back(par.pSilentSelftrans); | |
152 | |
153 // the "easy" inter state transition | |
154 from.push_back(index+3); | |
155 to.push_back(index+3); | |
156 transProb.push_back(par.pInterSelftrans); | |
157 | |
158 // the more complicated transitions from the silent and inter state | |
159 double probSumSilent = 0; | |
160 double probSumInter = 0; | |
161 vector<double> tempTransProbInter; | |
162 vector<double> tempTransProbSilent; | |
163 for (size_t jPitch = 0; jPitch < (par.nS * par.nPPS); ++jPitch) | |
164 { | |
165 int fromPitch = iPitch; | |
166 int toPitch = jPitch; | |
167 double semitoneDistance = std::abs(fromPitch - toPitch) * 1.0 / par.nPPS; | |
168 | |
169 // if (std::fmod(semitoneDistance, 1) == 0 && semitoneDistance > par.minSemitoneDistance) | |
170 if (semitoneDistance == 0 || (semitoneDistance > par.minSemitoneDistance && semitoneDistance < par.maxJump)) | |
171 { | |
172 size_t toIndex = jPitch * par.nSPP; // note attack index | |
173 | |
174 double tempWeightSilent = boost::math::pdf(noteDistanceDistr, semitoneDistance); | |
175 double tempWeightInter = semitoneDistance == 0 ? 0 : tempWeightSilent; | |
176 probSumSilent += tempWeightSilent; | |
177 probSumInter += tempWeightInter; | |
178 | |
179 tempTransProbSilent.push_back(tempWeightSilent); | |
180 tempTransProbInter.push_back(tempWeightInter); | |
181 | |
182 from.push_back(index+2); | |
183 to.push_back(toIndex); | |
184 from.push_back(index+3); | |
185 to.push_back(toIndex); | |
186 } | |
187 } | |
188 for (size_t i = 0; i < tempTransProbSilent.size(); ++i) | |
189 { | |
190 transProb.push_back((1-par.pSilentSelftrans) * tempTransProbSilent[i]/probSumSilent); | |
191 transProb.push_back((1-par.pInterSelftrans) * tempTransProbInter[i]/probSumInter); | |
192 } | |
193 } | |
194 } | |
195 | |
196 double | |
197 MonoNoteHMM::getMidiPitch(size_t index) | |
198 { | |
199 return pitchDistr[index].mean(); | |
200 } | |
201 | |
202 double | |
203 MonoNoteHMM::getFrequency(size_t index) | |
204 { | |
205 return 440 * pow(2, (pitchDistr[index].mean()-69)/12); | |
206 } |