Chris@9
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@9
|
2
|
Chris@9
|
3 /*
|
Chris@9
|
4 pYIN - A fundamental frequency estimator for monophonic audio
|
Chris@9
|
5 Centre for Digital Music, Queen Mary, University of London.
|
Chris@9
|
6
|
Chris@9
|
7 This program is free software; you can redistribute it and/or
|
Chris@9
|
8 modify it under the terms of the GNU General Public License as
|
Chris@9
|
9 published by the Free Software Foundation; either version 2 of the
|
Chris@9
|
10 License, or (at your option) any later version. See the file
|
Chris@9
|
11 COPYING included with this distribution for more information.
|
Chris@9
|
12 */
|
Chris@9
|
13
|
matthiasm@0
|
14 #include "MonoPitchHMM.h"
|
matthiasm@0
|
15
|
matthiasm@0
|
16 #include <boost/math/distributions.hpp>
|
matthiasm@0
|
17
|
matthiasm@0
|
18 #include <cstdio>
|
matthiasm@0
|
19 #include <cmath>
|
mail@130
|
20 #include <iostream>
|
matthiasm@0
|
21
|
matthiasm@0
|
22 using std::vector;
|
matthiasm@0
|
23 using std::pair;
|
matthiasm@0
|
24
|
mail@132
|
25 MonoPitchHMM::MonoPitchHMM(int fixedLag) :
|
mail@132
|
26 SparseHMM(fixedLag),
|
matthiasm@102
|
27 m_minFreq(61.735),
|
matthiasm@24
|
28 m_nBPS(5),
|
matthiasm@0
|
29 m_nPitch(0),
|
matthiasm@0
|
30 m_transitionWidth(0),
|
matthiasm@0
|
31 m_selfTrans(0.99),
|
matthiasm@0
|
32 m_yinTrust(.5),
|
matthiasm@0
|
33 m_freqs(0)
|
matthiasm@0
|
34 {
|
matthiasm@0
|
35 m_transitionWidth = 5*(m_nBPS/2) + 1;
|
matthiasm@102
|
36 m_nPitch = 69 * m_nBPS;
|
mail@130
|
37 m_nState = 2 * m_nPitch; // voiced and unvoiced
|
matthiasm@25
|
38 m_freqs = vector<double>(2*m_nPitch);
|
matthiasm@0
|
39 for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch)
|
matthiasm@0
|
40 {
|
matthiasm@0
|
41 m_freqs[iPitch] = m_minFreq * std::pow(2, iPitch * 1.0 / (12 * m_nBPS));
|
matthiasm@0
|
42 m_freqs[iPitch+m_nPitch] = -m_freqs[iPitch];
|
matthiasm@0
|
43 }
|
matthiasm@0
|
44 build();
|
matthiasm@0
|
45 }
|
matthiasm@0
|
46
|
matthiasm@0
|
47 const vector<double>
|
matthiasm@0
|
48 MonoPitchHMM::calculateObsProb(const vector<pair<double, double> > pitchProb)
|
matthiasm@0
|
49 {
|
matthiasm@0
|
50 vector<double> out = vector<double>(2*m_nPitch+1);
|
matthiasm@0
|
51 double probYinPitched = 0;
|
matthiasm@0
|
52 // BIN THE PITCHES
|
matthiasm@0
|
53 for (size_t iPair = 0; iPair < pitchProb.size(); ++iPair)
|
matthiasm@0
|
54 {
|
matthiasm@0
|
55 double freq = 440. * std::pow(2, (pitchProb[iPair].first - 69)/12);
|
matthiasm@0
|
56 if (freq <= m_minFreq) continue;
|
matthiasm@0
|
57 double d = 0;
|
matthiasm@0
|
58 double oldd = 1000;
|
matthiasm@0
|
59 for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch)
|
matthiasm@0
|
60 {
|
matthiasm@0
|
61 d = std::abs(freq-m_freqs[iPitch]);
|
matthiasm@0
|
62 if (oldd < d && iPitch > 0)
|
matthiasm@0
|
63 {
|
matthiasm@0
|
64 // previous bin must have been the closest
|
matthiasm@0
|
65 out[iPitch-1] = pitchProb[iPair].second;
|
matthiasm@0
|
66 probYinPitched += out[iPitch-1];
|
matthiasm@0
|
67 break;
|
matthiasm@0
|
68 }
|
matthiasm@0
|
69 oldd = d;
|
matthiasm@0
|
70 }
|
matthiasm@0
|
71 }
|
matthiasm@0
|
72
|
matthiasm@0
|
73 double probReallyPitched = m_yinTrust * probYinPitched;
|
matthiasm@58
|
74 // std::cerr << probReallyPitched << " " << probYinPitched << std::endl;
|
matthiasm@58
|
75 // damn, I forget what this is all about...
|
matthiasm@0
|
76 for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch)
|
matthiasm@0
|
77 {
|
matthiasm@0
|
78 if (probYinPitched > 0) out[iPitch] *= (probReallyPitched/probYinPitched) ;
|
matthiasm@0
|
79 out[iPitch+m_nPitch] = (1 - probReallyPitched) / m_nPitch;
|
matthiasm@0
|
80 }
|
matthiasm@0
|
81 // out[2*m_nPitch] = m_yinTrust * (1 - probYinPitched);
|
matthiasm@0
|
82 return(out);
|
matthiasm@0
|
83 }
|
matthiasm@0
|
84
|
matthiasm@0
|
85 void
|
matthiasm@0
|
86 MonoPitchHMM::build()
|
matthiasm@0
|
87 {
|
matthiasm@0
|
88 // INITIAL VECTOR
|
mail@130
|
89 m_init = vector<double>(2*m_nPitch, 1.0 / 2*m_nPitch);
|
matthiasm@0
|
90
|
matthiasm@0
|
91 // TRANSITIONS
|
matthiasm@0
|
92 for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch)
|
matthiasm@0
|
93 {
|
matthiasm@0
|
94 int theoreticalMinNextPitch = static_cast<int>(iPitch)-static_cast<int>(m_transitionWidth/2);
|
matthiasm@0
|
95 int minNextPitch = iPitch>m_transitionWidth/2 ? iPitch-m_transitionWidth/2 : 0;
|
matthiasm@0
|
96 int maxNextPitch = iPitch<m_nPitch-m_transitionWidth/2 ? iPitch+m_transitionWidth/2 : m_nPitch-1;
|
matthiasm@0
|
97
|
matthiasm@0
|
98 // WEIGHT VECTOR
|
matthiasm@0
|
99 double weightSum = 0;
|
matthiasm@0
|
100 vector<double> weights;
|
matthiasm@0
|
101 for (size_t i = minNextPitch; i <= maxNextPitch; ++i)
|
matthiasm@0
|
102 {
|
matthiasm@0
|
103 if (i <= iPitch)
|
matthiasm@0
|
104 {
|
matthiasm@0
|
105 weights.push_back(i-theoreticalMinNextPitch+1);
|
matthiasm@0
|
106 // weights.push_back(i-theoreticalMinNextPitch+1+m_transitionWidth/2);
|
matthiasm@0
|
107 } else {
|
matthiasm@0
|
108 weights.push_back(iPitch-theoreticalMinNextPitch+1-(i-iPitch));
|
matthiasm@0
|
109 // weights.push_back(iPitch-theoreticalMinNextPitch+1-(i-iPitch)+m_transitionWidth/2);
|
matthiasm@0
|
110 }
|
matthiasm@0
|
111 weightSum += weights[weights.size()-1];
|
matthiasm@0
|
112 }
|
matthiasm@0
|
113
|
matthiasm@0
|
114 // std::cerr << minNextPitch << " " << maxNextPitch << std::endl;
|
matthiasm@0
|
115 // TRANSITIONS TO CLOSE PITCH
|
matthiasm@0
|
116 for (size_t i = minNextPitch; i <= maxNextPitch; ++i)
|
matthiasm@0
|
117 {
|
mail@130
|
118 m_from.push_back(iPitch);
|
mail@130
|
119 m_to.push_back(i);
|
mail@130
|
120 m_transProb.push_back(weights[i-minNextPitch] / weightSum * m_selfTrans);
|
matthiasm@0
|
121
|
mail@130
|
122 m_from.push_back(iPitch);
|
mail@130
|
123 m_to.push_back(i+m_nPitch);
|
mail@130
|
124 m_transProb.push_back(weights[i-minNextPitch] / weightSum * (1-m_selfTrans));
|
matthiasm@0
|
125
|
mail@130
|
126 m_from.push_back(iPitch+m_nPitch);
|
mail@130
|
127 m_to.push_back(i+m_nPitch);
|
mail@130
|
128 m_transProb.push_back(weights[i-minNextPitch] / weightSum * m_selfTrans);
|
matthiasm@0
|
129 // transProb.push_back(weights[i-minNextPitch] / weightSum * 0.5);
|
matthiasm@0
|
130
|
mail@130
|
131 m_from.push_back(iPitch+m_nPitch);
|
mail@130
|
132 m_to.push_back(i);
|
mail@130
|
133 m_transProb.push_back(weights[i-minNextPitch] / weightSum * (1-m_selfTrans));
|
matthiasm@0
|
134 // transProb.push_back(weights[i-minNextPitch] / weightSum * 0.5);
|
matthiasm@0
|
135 }
|
matthiasm@0
|
136
|
matthiasm@0
|
137 // TRANSITION TO UNVOICED
|
matthiasm@0
|
138 // from.push_back(iPitch+m_nPitch);
|
matthiasm@0
|
139 // to.push_back(2*m_nPitch);
|
matthiasm@0
|
140 // transProb.push_back(1-m_selfTrans);
|
matthiasm@0
|
141
|
matthiasm@0
|
142 // TRANSITION FROM UNVOICED TO PITCH
|
matthiasm@25
|
143 // from.push_back(2*m_nPitch);
|
matthiasm@25
|
144 // to.push_back(iPitch+m_nPitch);
|
matthiasm@25
|
145 // transProb.push_back(1.0/m_nPitch);
|
matthiasm@0
|
146 }
|
matthiasm@0
|
147 // UNVOICED SELFTRANSITION
|
matthiasm@0
|
148 // from.push_back(2*m_nPitch);
|
matthiasm@0
|
149 // to.push_back(2*m_nPitch);
|
matthiasm@0
|
150 // transProb.push_back(m_selfTrans);
|
matthiasm@0
|
151
|
matthiasm@0
|
152 // for (size_t i = 0; i < from.size(); ++i) {
|
matthiasm@0
|
153 // std::cerr << "P(["<< from[i] << " --> " << to[i] << "]) = " << transProb[i] << std::endl;
|
matthiasm@0
|
154 // }
|
mail@130
|
155 m_nTrans = m_transProb.size();
|
mail@130
|
156 m_delta = vector<double>(m_nState);
|
mail@130
|
157 m_oldDelta = vector<double>(m_nState);
|
Chris@9
|
158 }
|
mail@132
|
159
|
mail@132
|
160 /*
|
mail@132
|
161 Takes a state number and a pitch-prob vector, then finds the pitch that would
|
mail@132
|
162 have been closest to the pitch of the state. Easy to understand? ;)
|
mail@132
|
163 */
|
mail@132
|
164 const float
|
mail@132
|
165 MonoPitchHMM::nearestFreq(int state, vector<pair<double, double> > pitchProb)
|
mail@132
|
166 {
|
mail@132
|
167 float hmmFreq = m_freqs[state];
|
mail@132
|
168 // std::cerr << "hmmFreq " << hmmFreq << std::endl;
|
mail@132
|
169 float bestFreq = 0;
|
mail@132
|
170 float leastDist = 10000;
|
mail@132
|
171 if (hmmFreq > 0)
|
mail@132
|
172 {
|
mail@132
|
173 // This was a Yin estimate, so try to get original pitch estimate back
|
mail@132
|
174 // ... a bit hacky, since we could have direclty saved the frequency
|
mail@132
|
175 // that was assigned to the HMM bin in hmm.calculateObsProb -- but would
|
mail@132
|
176 // have had to rethink the interface of that method.
|
mail@132
|
177
|
mail@132
|
178 // std::cerr << "pitch prob size " << pitchProb.size() << std::endl;
|
mail@132
|
179
|
mail@132
|
180 for (size_t iPt = 0; iPt < pitchProb.size(); ++iPt)
|
mail@132
|
181 {
|
mail@132
|
182 float freq = 440. *
|
mail@132
|
183 std::pow(2,
|
mail@132
|
184 (pitchProb[iPt].first - 69)/12);
|
mail@132
|
185 float dist = std::abs(hmmFreq-freq);
|
mail@132
|
186 if (dist < leastDist)
|
mail@132
|
187 {
|
mail@132
|
188 leastDist = dist;
|
mail@132
|
189 bestFreq = freq;
|
mail@132
|
190 }
|
mail@132
|
191 }
|
mail@132
|
192 } else {
|
mail@132
|
193 bestFreq = hmmFreq;
|
mail@132
|
194 }
|
mail@132
|
195 return bestFreq;
|
mail@132
|
196 } |