matthiasm@0
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@9
|
2
|
matthiasm@0
|
3 /*
|
Chris@9
|
4 pYIN - A fundamental frequency estimator for monophonic audio
|
Chris@9
|
5 Centre for Digital Music, Queen Mary, University of London.
|
Chris@9
|
6
|
Chris@9
|
7 This program is free software; you can redistribute it and/or
|
Chris@9
|
8 modify it under the terms of the GNU General Public License as
|
Chris@9
|
9 published by the Free Software Foundation; either version 2 of the
|
Chris@9
|
10 License, or (at your option) any later version. See the file
|
Chris@9
|
11 COPYING included with this distribution for more information.
|
matthiasm@0
|
12 */
|
matthiasm@0
|
13
|
matthiasm@0
|
14 #include "PYIN.h"
|
matthiasm@0
|
15 #include "MonoNote.h"
|
matthiasm@0
|
16 #include "MonoPitch.h"
|
matthiasm@0
|
17
|
matthiasm@0
|
18 #include "vamp-sdk/FFT.h"
|
matthiasm@0
|
19
|
matthiasm@0
|
20 #include <vector>
|
matthiasm@0
|
21 #include <algorithm>
|
matthiasm@0
|
22
|
matthiasm@0
|
23 #include <cstdio>
|
matthiasm@0
|
24 #include <cmath>
|
matthiasm@0
|
25 #include <complex>
|
matthiasm@0
|
26
|
matthiasm@0
|
27 using std::string;
|
matthiasm@0
|
28 using std::vector;
|
matthiasm@0
|
29 using Vamp::RealTime;
|
matthiasm@0
|
30
|
matthiasm@0
|
31
|
matthiasm@0
|
32 PYIN::PYIN(float inputSampleRate) :
|
matthiasm@0
|
33 Plugin(inputSampleRate),
|
matthiasm@0
|
34 m_channels(0),
|
matthiasm@0
|
35 m_stepSize(256),
|
matthiasm@0
|
36 m_blockSize(2048),
|
matthiasm@0
|
37 m_fmin(40),
|
matthiasm@0
|
38 m_fmax(700),
|
matthiasm@0
|
39 m_yin(2048, inputSampleRate, 0.0),
|
matthiasm@0
|
40 m_oF0Candidates(0),
|
matthiasm@0
|
41 m_oF0Probs(0),
|
matthiasm@0
|
42 m_oVoicedProb(0),
|
matthiasm@0
|
43 m_oCandidateSalience(0),
|
matthiasm@0
|
44 m_oSmoothedPitchTrack(0),
|
matthiasm@0
|
45 m_oNotes(0),
|
matthiasm@0
|
46 m_threshDistr(2.0f),
|
matthiasm@6
|
47 m_outputUnvoiced(0.0f),
|
matthiasm@21
|
48 m_minLocalFreq(0.f),
|
matthiasm@21
|
49 m_maxLocalFreq(5000.f),
|
matthiasm@21
|
50 m_leftBoundary(0.f),
|
matthiasm@21
|
51 m_rightBoundary(5000.f), // hack
|
matthiasm@0
|
52 m_pitchProb(0),
|
matthiasm@21
|
53 m_timestamp(0),
|
matthiasm@21
|
54 m_currentProgram("")
|
matthiasm@0
|
55 {
|
matthiasm@0
|
56 }
|
matthiasm@0
|
57
|
matthiasm@0
|
58 PYIN::~PYIN()
|
matthiasm@0
|
59 {
|
matthiasm@0
|
60 }
|
matthiasm@0
|
61
|
matthiasm@0
|
62 string
|
matthiasm@0
|
63 PYIN::getIdentifier() const
|
matthiasm@0
|
64 {
|
matthiasm@1
|
65 return "pyin";
|
matthiasm@0
|
66 }
|
matthiasm@0
|
67
|
matthiasm@0
|
68 string
|
matthiasm@0
|
69 PYIN::getName() const
|
matthiasm@0
|
70 {
|
matthiasm@1
|
71 return "pYin";
|
matthiasm@0
|
72 }
|
matthiasm@0
|
73
|
matthiasm@0
|
74 string
|
matthiasm@0
|
75 PYIN::getDescription() const
|
matthiasm@0
|
76 {
|
matthiasm@0
|
77 return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
|
matthiasm@0
|
78 }
|
matthiasm@0
|
79
|
matthiasm@0
|
80 string
|
matthiasm@0
|
81 PYIN::getMaker() const
|
matthiasm@0
|
82 {
|
matthiasm@0
|
83 return "Matthias Mauch";
|
matthiasm@0
|
84 }
|
matthiasm@0
|
85
|
matthiasm@0
|
86 int
|
matthiasm@0
|
87 PYIN::getPluginVersion() const
|
matthiasm@0
|
88 {
|
matthiasm@0
|
89 // Increment this each time you release a version that behaves
|
matthiasm@0
|
90 // differently from the previous one
|
matthiasm@0
|
91 return 1;
|
matthiasm@0
|
92 }
|
matthiasm@0
|
93
|
matthiasm@0
|
94 string
|
matthiasm@0
|
95 PYIN::getCopyright() const
|
matthiasm@0
|
96 {
|
matthiasm@0
|
97 return "GPL";
|
matthiasm@0
|
98 }
|
matthiasm@0
|
99
|
matthiasm@0
|
100 PYIN::InputDomain
|
matthiasm@0
|
101 PYIN::getInputDomain() const
|
matthiasm@0
|
102 {
|
matthiasm@0
|
103 return TimeDomain;
|
matthiasm@0
|
104 }
|
matthiasm@0
|
105
|
matthiasm@0
|
106 size_t
|
matthiasm@0
|
107 PYIN::getPreferredBlockSize() const
|
matthiasm@0
|
108 {
|
matthiasm@0
|
109 return 2048;
|
matthiasm@0
|
110 }
|
matthiasm@0
|
111
|
matthiasm@0
|
112 size_t
|
matthiasm@0
|
113 PYIN::getPreferredStepSize() const
|
matthiasm@0
|
114 {
|
matthiasm@0
|
115 return 256;
|
matthiasm@0
|
116 }
|
matthiasm@0
|
117
|
matthiasm@0
|
118 size_t
|
matthiasm@0
|
119 PYIN::getMinChannelCount() const
|
matthiasm@0
|
120 {
|
matthiasm@0
|
121 return 1;
|
matthiasm@0
|
122 }
|
matthiasm@0
|
123
|
matthiasm@0
|
124 size_t
|
matthiasm@0
|
125 PYIN::getMaxChannelCount() const
|
matthiasm@0
|
126 {
|
matthiasm@0
|
127 return 1;
|
matthiasm@0
|
128 }
|
matthiasm@0
|
129
|
matthiasm@0
|
130 PYIN::ParameterList
|
matthiasm@0
|
131 PYIN::getParameterDescriptors() const
|
matthiasm@0
|
132 {
|
matthiasm@0
|
133 ParameterList list;
|
matthiasm@0
|
134
|
matthiasm@0
|
135 ParameterDescriptor d;
|
matthiasm@0
|
136
|
matthiasm@0
|
137 d.identifier = "threshdistr";
|
matthiasm@0
|
138 d.name = "Yin threshold distribution";
|
matthiasm@0
|
139 d.description = ".";
|
matthiasm@0
|
140 d.unit = "";
|
matthiasm@0
|
141 d.minValue = 0.0f;
|
matthiasm@0
|
142 d.maxValue = 7.0f;
|
matthiasm@0
|
143 d.defaultValue = 2.0f;
|
matthiasm@0
|
144 d.isQuantized = true;
|
matthiasm@0
|
145 d.quantizeStep = 1.0f;
|
matthiasm@0
|
146 d.valueNames.push_back("Uniform");
|
matthiasm@0
|
147 d.valueNames.push_back("Beta (mean 0.10)");
|
matthiasm@0
|
148 d.valueNames.push_back("Beta (mean 0.15)");
|
matthiasm@0
|
149 d.valueNames.push_back("Beta (mean 0.20)");
|
matthiasm@0
|
150 d.valueNames.push_back("Beta (mean 0.30)");
|
matthiasm@0
|
151 d.valueNames.push_back("Single Value 0.10");
|
matthiasm@0
|
152 d.valueNames.push_back("Single Value 0.15");
|
matthiasm@0
|
153 d.valueNames.push_back("Single Value 0.20");
|
matthiasm@0
|
154 list.push_back(d);
|
matthiasm@0
|
155
|
matthiasm@0
|
156 d.identifier = "outputunvoiced";
|
matthiasm@0
|
157 d.valueNames.clear();
|
matthiasm@0
|
158 d.name = "Output estimates classified as unvoiced?";
|
matthiasm@0
|
159 d.description = ".";
|
matthiasm@0
|
160 d.unit = "";
|
matthiasm@0
|
161 d.minValue = 0.0f;
|
matthiasm@0
|
162 d.maxValue = 2.0f;
|
matthiasm@6
|
163 d.defaultValue = 0.0f;
|
matthiasm@0
|
164 d.isQuantized = true;
|
matthiasm@0
|
165 d.quantizeStep = 1.0f;
|
matthiasm@0
|
166 d.valueNames.push_back("No");
|
matthiasm@0
|
167 d.valueNames.push_back("Yes");
|
matthiasm@0
|
168 d.valueNames.push_back("Yes, as negative frequencies");
|
matthiasm@0
|
169 list.push_back(d);
|
matthiasm@21
|
170
|
matthiasm@21
|
171 d.identifier = "minlocalfreq";
|
matthiasm@21
|
172 d.valueNames.clear();
|
matthiasm@21
|
173 d.name = "Minimum local frequency.";
|
matthiasm@21
|
174 d.description = "Minimum frequency in selection.";
|
matthiasm@21
|
175 d.unit = "";
|
matthiasm@21
|
176 d.minValue = 50.f;
|
matthiasm@21
|
177 d.maxValue = 5000.f;
|
matthiasm@21
|
178 d.defaultValue = 50.f;
|
matthiasm@21
|
179 d.isQuantized = false;
|
matthiasm@21
|
180 d.quantizeStep = 0;
|
matthiasm@21
|
181 d.valueNames.clear();
|
matthiasm@21
|
182 list.push_back(d);
|
matthiasm@21
|
183
|
matthiasm@21
|
184 d.identifier = "maxlocalfreq";
|
matthiasm@21
|
185 d.valueNames.clear();
|
matthiasm@21
|
186 d.name = "Maximum local frequency.";
|
matthiasm@21
|
187 d.description = "Maximum frequency in selection.";
|
matthiasm@21
|
188 d.unit = "";
|
matthiasm@21
|
189 d.minValue = 50.f;
|
matthiasm@21
|
190 d.maxValue = 5000.f;
|
matthiasm@21
|
191 d.defaultValue = 5000.f;
|
matthiasm@21
|
192 d.isQuantized = false;
|
matthiasm@21
|
193 d.quantizeStep = 0;
|
matthiasm@21
|
194 d.valueNames.clear();
|
matthiasm@21
|
195 list.push_back(d);
|
matthiasm@21
|
196
|
matthiasm@21
|
197 d.identifier = "leftboundary";
|
matthiasm@21
|
198 d.valueNames.clear();
|
matthiasm@21
|
199 d.name = "Left boundary.";
|
matthiasm@21
|
200 d.description = "Left boundary of time selection.";
|
matthiasm@21
|
201 d.unit = "";
|
matthiasm@21
|
202 d.minValue = 0.f;
|
matthiasm@21
|
203 d.maxValue = 1000.f;
|
matthiasm@21
|
204 d.defaultValue = 0.f;
|
matthiasm@21
|
205 d.isQuantized = false;
|
matthiasm@21
|
206 d.quantizeStep = 0;
|
matthiasm@21
|
207 d.valueNames.clear();
|
matthiasm@21
|
208 list.push_back(d);
|
matthiasm@21
|
209
|
matthiasm@21
|
210 d.identifier = "rightboundary";
|
matthiasm@21
|
211 d.valueNames.clear();
|
matthiasm@21
|
212 d.name = "Right boundary.";
|
matthiasm@21
|
213 d.description = "Right boundary of time selection.";
|
matthiasm@21
|
214 d.unit = "";
|
matthiasm@21
|
215 d.minValue = 0.f;
|
matthiasm@21
|
216 d.maxValue = 1000.f;
|
matthiasm@21
|
217 d.defaultValue = 0.f;
|
matthiasm@21
|
218 d.isQuantized = false;
|
matthiasm@21
|
219 d.quantizeStep = 0;
|
matthiasm@21
|
220 d.valueNames.clear();
|
matthiasm@21
|
221 list.push_back(d);
|
matthiasm@0
|
222
|
matthiasm@0
|
223 return list;
|
matthiasm@0
|
224 }
|
matthiasm@0
|
225
|
matthiasm@0
|
226 float
|
matthiasm@0
|
227 PYIN::getParameter(string identifier) const
|
matthiasm@0
|
228 {
|
matthiasm@0
|
229 if (identifier == "threshdistr") {
|
matthiasm@0
|
230 return m_threshDistr;
|
matthiasm@0
|
231 }
|
matthiasm@0
|
232 if (identifier == "outputunvoiced") {
|
matthiasm@0
|
233 return m_outputUnvoiced;
|
matthiasm@0
|
234 }
|
matthiasm@21
|
235 if (identifier == "minlocalfreq") {
|
matthiasm@21
|
236 return m_minLocalFreq;
|
matthiasm@21
|
237 }
|
matthiasm@21
|
238 if (identifier == "maxlocalfreq") {
|
matthiasm@21
|
239 return m_maxLocalFreq;
|
matthiasm@21
|
240 }
|
matthiasm@21
|
241 if (identifier == "leftboundary") {
|
matthiasm@21
|
242 return m_leftBoundary;
|
matthiasm@21
|
243 }
|
matthiasm@21
|
244 if (identifier == "rightboundary") {
|
matthiasm@21
|
245 return m_rightBoundary;
|
matthiasm@21
|
246 }
|
matthiasm@0
|
247 return 0.f;
|
matthiasm@0
|
248 }
|
matthiasm@0
|
249
|
matthiasm@0
|
250 void
|
matthiasm@0
|
251 PYIN::setParameter(string identifier, float value)
|
matthiasm@0
|
252 {
|
matthiasm@21
|
253 m_currentProgram = "custom";
|
matthiasm@0
|
254 if (identifier == "threshdistr")
|
matthiasm@0
|
255 {
|
matthiasm@0
|
256 m_threshDistr = value;
|
matthiasm@0
|
257 }
|
matthiasm@0
|
258 if (identifier == "outputunvoiced")
|
matthiasm@0
|
259 {
|
matthiasm@0
|
260 m_outputUnvoiced = value;
|
matthiasm@0
|
261 }
|
matthiasm@21
|
262 if (identifier == "minlocalfreq")
|
matthiasm@21
|
263 {
|
matthiasm@21
|
264 m_minLocalFreq = value;
|
matthiasm@21
|
265 }
|
matthiasm@21
|
266 if (identifier == "maxlocalfreq")
|
matthiasm@21
|
267 {
|
matthiasm@21
|
268 m_maxLocalFreq = value;
|
matthiasm@21
|
269 }
|
matthiasm@21
|
270 if (identifier == "leftboundary")
|
matthiasm@21
|
271 {
|
matthiasm@21
|
272 m_leftBoundary = value;
|
matthiasm@21
|
273 }
|
matthiasm@21
|
274 if (identifier == "rightboundary")
|
matthiasm@21
|
275 {
|
matthiasm@21
|
276 m_rightBoundary = value;
|
matthiasm@21
|
277 }
|
matthiasm@0
|
278 }
|
matthiasm@0
|
279
|
matthiasm@0
|
280 PYIN::ProgramList
|
matthiasm@0
|
281 PYIN::getPrograms() const
|
matthiasm@0
|
282 {
|
matthiasm@0
|
283 ProgramList list;
|
matthiasm@21
|
284 list.push_back("default");
|
matthiasm@21
|
285 list.push_back("custom");
|
matthiasm@21
|
286 list.push_back("donttellme");
|
matthiasm@0
|
287 return list;
|
matthiasm@0
|
288 }
|
matthiasm@0
|
289
|
matthiasm@0
|
290 string
|
matthiasm@0
|
291 PYIN::getCurrentProgram() const
|
matthiasm@0
|
292 {
|
matthiasm@21
|
293 return m_currentProgram;
|
matthiasm@0
|
294 }
|
matthiasm@0
|
295
|
matthiasm@0
|
296 void
|
matthiasm@0
|
297 PYIN::selectProgram(string name)
|
matthiasm@0
|
298 {
|
matthiasm@21
|
299 if (name == "default") {
|
matthiasm@21
|
300 m_minLocalFreq = 0;
|
matthiasm@21
|
301 m_maxLocalFreq = 10000;
|
matthiasm@21
|
302 m_leftBoundary = 0;
|
matthiasm@21
|
303 m_rightBoundary = 5000;
|
matthiasm@21
|
304 }
|
matthiasm@21
|
305 if (name == "custom") {
|
matthiasm@21
|
306 // do nothing
|
matthiasm@21
|
307 }
|
matthiasm@21
|
308 if (name == "donttellme")
|
matthiasm@21
|
309 {
|
matthiasm@21
|
310 m_currentProgram = "donttellme";
|
matthiasm@21
|
311 m_minLocalFreq = 0;
|
matthiasm@21
|
312 m_maxLocalFreq = 400;
|
matthiasm@21
|
313 m_leftBoundary = 1.9;
|
matthiasm@21
|
314 m_rightBoundary = 2.9;
|
matthiasm@21
|
315 }
|
matthiasm@0
|
316 }
|
matthiasm@0
|
317
|
matthiasm@0
|
318 PYIN::OutputList
|
matthiasm@0
|
319 PYIN::getOutputDescriptors() const
|
matthiasm@0
|
320 {
|
matthiasm@0
|
321 OutputList outputs;
|
matthiasm@0
|
322
|
matthiasm@0
|
323 OutputDescriptor d;
|
matthiasm@0
|
324
|
matthiasm@0
|
325 int outputNumber = 0;
|
matthiasm@0
|
326
|
matthiasm@0
|
327 d.identifier = "f0candidates";
|
matthiasm@0
|
328 d.name = "F0 Candidates";
|
matthiasm@0
|
329 d.description = "Estimated fundamental frequency candidates.";
|
matthiasm@0
|
330 d.unit = "Hz";
|
matthiasm@0
|
331 d.hasFixedBinCount = false;
|
matthiasm@0
|
332 // d.binCount = 1;
|
matthiasm@0
|
333 d.hasKnownExtents = true;
|
matthiasm@0
|
334 d.minValue = m_fmin;
|
matthiasm@0
|
335 d.maxValue = 500;
|
matthiasm@0
|
336 d.isQuantized = false;
|
matthiasm@0
|
337 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
338 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
339 d.hasDuration = false;
|
matthiasm@0
|
340 outputs.push_back(d);
|
matthiasm@0
|
341 m_oF0Candidates = outputNumber++;
|
matthiasm@0
|
342
|
matthiasm@0
|
343 d.identifier = "f0probs";
|
matthiasm@0
|
344 d.name = "Candidate Probabilities";
|
matthiasm@0
|
345 d.description = "Probabilities of estimated fundamental frequency candidates.";
|
matthiasm@0
|
346 d.unit = "";
|
matthiasm@0
|
347 d.hasFixedBinCount = false;
|
matthiasm@0
|
348 // d.binCount = 1;
|
matthiasm@0
|
349 d.hasKnownExtents = true;
|
matthiasm@0
|
350 d.minValue = 0;
|
matthiasm@0
|
351 d.maxValue = 1;
|
matthiasm@0
|
352 d.isQuantized = false;
|
matthiasm@0
|
353 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
354 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
355 d.hasDuration = false;
|
matthiasm@0
|
356 outputs.push_back(d);
|
matthiasm@0
|
357 m_oF0Probs = outputNumber++;
|
matthiasm@0
|
358
|
matthiasm@0
|
359 d.identifier = "voicedprob";
|
matthiasm@0
|
360 d.name = "Voiced Probability";
|
matthiasm@0
|
361 d.description = "Probability that the signal is voiced according to Probabilistic Yin.";
|
matthiasm@0
|
362 d.unit = "";
|
matthiasm@0
|
363 d.hasFixedBinCount = true;
|
matthiasm@0
|
364 d.binCount = 1;
|
matthiasm@0
|
365 d.hasKnownExtents = true;
|
matthiasm@0
|
366 d.minValue = 0;
|
matthiasm@0
|
367 d.maxValue = 1;
|
matthiasm@0
|
368 d.isQuantized = false;
|
matthiasm@0
|
369 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
370 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
371 d.hasDuration = false;
|
matthiasm@0
|
372 outputs.push_back(d);
|
matthiasm@0
|
373 m_oVoicedProb = outputNumber++;
|
matthiasm@0
|
374
|
matthiasm@0
|
375 d.identifier = "candidatesalience";
|
matthiasm@0
|
376 d.name = "Candidate Salience";
|
matthiasm@0
|
377 d.description = "Candidate Salience";
|
matthiasm@0
|
378 d.hasFixedBinCount = true;
|
matthiasm@0
|
379 d.binCount = m_blockSize / 2;
|
matthiasm@0
|
380 d.hasKnownExtents = true;
|
matthiasm@0
|
381 d.minValue = 0;
|
matthiasm@0
|
382 d.maxValue = 1;
|
matthiasm@0
|
383 d.isQuantized = false;
|
matthiasm@0
|
384 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
385 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
386 d.hasDuration = false;
|
matthiasm@0
|
387 outputs.push_back(d);
|
matthiasm@0
|
388 m_oCandidateSalience = outputNumber++;
|
matthiasm@0
|
389
|
matthiasm@0
|
390 d.identifier = "smoothedpitchtrack";
|
matthiasm@0
|
391 d.name = "Smoothed Pitch Track";
|
matthiasm@0
|
392 d.description = ".";
|
matthiasm@0
|
393 d.unit = "Hz";
|
matthiasm@0
|
394 d.hasFixedBinCount = true;
|
matthiasm@0
|
395 d.binCount = 1;
|
matthiasm@0
|
396 d.hasKnownExtents = false;
|
matthiasm@0
|
397 // d.minValue = 0;
|
matthiasm@0
|
398 // d.maxValue = 1;
|
matthiasm@0
|
399 d.isQuantized = false;
|
matthiasm@0
|
400 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
401 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
402 d.hasDuration = false;
|
matthiasm@0
|
403 outputs.push_back(d);
|
matthiasm@0
|
404 m_oSmoothedPitchTrack = outputNumber++;
|
matthiasm@0
|
405
|
matthiasm@0
|
406 d.identifier = "notes";
|
matthiasm@0
|
407 d.name = "Notes";
|
matthiasm@0
|
408 d.description = "Derived fixed-pitch note frequencies";
|
matthiasm@0
|
409 // d.unit = "MIDI unit";
|
matthiasm@0
|
410 d.unit = "Hz";
|
matthiasm@0
|
411 d.hasFixedBinCount = true;
|
matthiasm@0
|
412 d.binCount = 1;
|
matthiasm@0
|
413 d.hasKnownExtents = false;
|
matthiasm@0
|
414 d.isQuantized = false;
|
matthiasm@0
|
415 d.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
416 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
417 d.hasDuration = true;
|
matthiasm@0
|
418 outputs.push_back(d);
|
matthiasm@0
|
419 m_oNotes = outputNumber++;
|
matthiasm@0
|
420
|
matthiasm@0
|
421 return outputs;
|
matthiasm@0
|
422 }
|
matthiasm@0
|
423
|
matthiasm@0
|
424 bool
|
matthiasm@0
|
425 PYIN::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
426 {
|
matthiasm@0
|
427 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
428 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
429
|
Chris@9
|
430 /*
|
matthiasm@0
|
431 std::cerr << "PYIN::initialise: channels = " << channels
|
matthiasm@0
|
432 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
|
matthiasm@0
|
433 << std::endl;
|
Chris@9
|
434 */
|
matthiasm@0
|
435 m_channels = channels;
|
matthiasm@0
|
436 m_stepSize = stepSize;
|
matthiasm@0
|
437 m_blockSize = blockSize;
|
matthiasm@0
|
438
|
matthiasm@0
|
439 reset();
|
matthiasm@0
|
440
|
matthiasm@0
|
441 return true;
|
matthiasm@0
|
442 }
|
matthiasm@0
|
443
|
matthiasm@0
|
444 void
|
matthiasm@0
|
445 PYIN::reset()
|
matthiasm@0
|
446 {
|
matthiasm@0
|
447 m_yin.setThresholdDistr(m_threshDistr);
|
matthiasm@0
|
448 m_yin.setFrameSize(m_blockSize);
|
matthiasm@0
|
449
|
matthiasm@0
|
450 m_pitchProb.clear();
|
matthiasm@0
|
451 m_timestamp.clear();
|
Chris@9
|
452 /*
|
matthiasm@0
|
453 std::cerr << "PYIN::reset"
|
matthiasm@0
|
454 << ", blockSize = " << m_blockSize
|
matthiasm@0
|
455 << std::endl;
|
Chris@9
|
456 */
|
matthiasm@0
|
457 }
|
matthiasm@0
|
458
|
matthiasm@0
|
459 PYIN::FeatureSet
|
matthiasm@0
|
460 PYIN::process(const float *const *inputBuffers, RealTime timestamp)
|
matthiasm@0
|
461 {
|
matthiasm@0
|
462 timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate));
|
matthiasm@0
|
463 FeatureSet fs;
|
matthiasm@0
|
464
|
matthiasm@0
|
465 double *dInputBuffers = new double[m_blockSize];
|
matthiasm@0
|
466 for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
|
matthiasm@0
|
467
|
matthiasm@21
|
468 Yin::YinOutput yo;
|
matthiasm@21
|
469 float floatTime = timestamp.sec + timestamp.nsec * 1.0 / 1000000000;
|
matthiasm@21
|
470 std::cerr << timestamp << " " << floatTime << std::endl;
|
matthiasm@21
|
471 if (floatTime > m_leftBoundary && floatTime < m_rightBoundary) {
|
matthiasm@21
|
472 // constrained
|
matthiasm@21
|
473 yo = m_yin.processProbabilisticYin(dInputBuffers, m_minLocalFreq, m_maxLocalFreq);
|
matthiasm@21
|
474 } else {
|
matthiasm@21
|
475 yo = m_yin.processProbabilisticYin(dInputBuffers);
|
matthiasm@21
|
476 }
|
matthiasm@21
|
477
|
matthiasm@0
|
478
|
matthiasm@0
|
479 Feature f;
|
matthiasm@0
|
480 f.hasTimestamp = true;
|
matthiasm@0
|
481 f.timestamp = timestamp;
|
matthiasm@0
|
482 for (size_t i = 0; i < yo.freqProb.size(); ++i)
|
matthiasm@0
|
483 {
|
matthiasm@0
|
484 f.values.push_back(yo.freqProb[i].first);
|
matthiasm@0
|
485 }
|
matthiasm@0
|
486 fs[m_oF0Candidates].push_back(f);
|
matthiasm@0
|
487
|
matthiasm@0
|
488 f.values.clear();
|
matthiasm@0
|
489 float voicedProb = 0;
|
matthiasm@0
|
490 for (size_t i = 0; i < yo.freqProb.size(); ++i)
|
matthiasm@0
|
491 {
|
matthiasm@0
|
492 f.values.push_back(yo.freqProb[i].second);
|
matthiasm@0
|
493 voicedProb += yo.freqProb[i].second;
|
matthiasm@0
|
494 }
|
matthiasm@0
|
495 fs[m_oF0Probs].push_back(f);
|
matthiasm@0
|
496
|
matthiasm@0
|
497 f.values.clear();
|
matthiasm@0
|
498 f.values.push_back(voicedProb);
|
matthiasm@0
|
499 fs[m_oVoicedProb].push_back(f);
|
matthiasm@0
|
500
|
matthiasm@0
|
501 f.values.clear();
|
matthiasm@0
|
502 float salienceSum = 0;
|
matthiasm@0
|
503 for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin)
|
matthiasm@0
|
504 {
|
matthiasm@0
|
505 f.values.push_back(yo.salience[iBin]);
|
matthiasm@0
|
506 salienceSum += yo.salience[iBin];
|
matthiasm@0
|
507 }
|
matthiasm@0
|
508 fs[m_oCandidateSalience].push_back(f);
|
matthiasm@0
|
509
|
matthiasm@0
|
510 delete [] dInputBuffers;
|
matthiasm@0
|
511
|
matthiasm@0
|
512 vector<pair<double, double> > tempPitchProb;
|
matthiasm@0
|
513 for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate)
|
matthiasm@0
|
514 {
|
matthiasm@0
|
515 double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69;
|
matthiasm@0
|
516 tempPitchProb.push_back(pair<double, double>
|
matthiasm@0
|
517 (tempPitch, yo.freqProb[iCandidate].second));
|
matthiasm@0
|
518 }
|
matthiasm@0
|
519 m_pitchProb.push_back(tempPitchProb);
|
matthiasm@0
|
520
|
matthiasm@0
|
521 m_timestamp.push_back(timestamp);
|
matthiasm@0
|
522
|
matthiasm@0
|
523 return fs;
|
matthiasm@0
|
524 }
|
matthiasm@0
|
525
|
matthiasm@0
|
526 PYIN::FeatureSet
|
matthiasm@0
|
527 PYIN::getRemainingFeatures()
|
matthiasm@0
|
528 {
|
matthiasm@0
|
529 FeatureSet fs;
|
matthiasm@0
|
530 Feature f;
|
matthiasm@0
|
531 f.hasTimestamp = true;
|
matthiasm@0
|
532 f.hasDuration = false;
|
matthiasm@0
|
533
|
Chris@4
|
534 if (m_pitchProb.empty()) {
|
Chris@4
|
535 return fs;
|
Chris@4
|
536 }
|
Chris@4
|
537
|
matthiasm@0
|
538 // MONO-PITCH STUFF
|
matthiasm@0
|
539 MonoPitch mp;
|
Chris@9
|
540 // std::cerr << "before viterbi" << std::endl;
|
matthiasm@0
|
541 vector<float> mpOut = mp.process(m_pitchProb);
|
matthiasm@0
|
542 // std::cerr << "after viterbi " << mpOut.size() << " "<< m_timestamp.size() << std::endl;
|
matthiasm@0
|
543 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame)
|
matthiasm@0
|
544 {
|
matthiasm@0
|
545 if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue;
|
matthiasm@0
|
546 f.timestamp = m_timestamp[iFrame];
|
matthiasm@0
|
547 f.values.clear();
|
matthiasm@0
|
548 if (m_outputUnvoiced == 1)
|
matthiasm@0
|
549 {
|
matthiasm@0
|
550 f.values.push_back(abs(mpOut[iFrame]));
|
matthiasm@0
|
551 } else {
|
matthiasm@0
|
552 f.values.push_back(mpOut[iFrame]);
|
matthiasm@0
|
553 }
|
matthiasm@0
|
554
|
matthiasm@0
|
555 fs[m_oSmoothedPitchTrack].push_back(f);
|
matthiasm@0
|
556 }
|
matthiasm@0
|
557
|
matthiasm@1
|
558 // MONO-NOTE STUFF
|
matthiasm@1
|
559 MonoNote mn;
|
matthiasm@1
|
560 std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
|
matthiasm@1
|
561 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) {
|
matthiasm@1
|
562 std::vector<std::pair<double, double> > temp;
|
matthiasm@1
|
563 if (mpOut[iFrame] > 0)
|
matthiasm@1
|
564 {
|
matthiasm@1
|
565 double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69;
|
matthiasm@1
|
566 temp.push_back(std::pair<double,double>(tempPitch, .9));
|
matthiasm@1
|
567 }
|
matthiasm@1
|
568 smoothedPitch.push_back(temp);
|
matthiasm@1
|
569 }
|
matthiasm@0
|
570 // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb);
|
matthiasm@1
|
571 vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
|
matthiasm@1
|
572
|
matthiasm@6
|
573 // turning feature into a note feature
|
matthiasm@1
|
574 f.hasTimestamp = true;
|
matthiasm@1
|
575 f.hasDuration = true;
|
matthiasm@1
|
576 f.values.clear();
|
matthiasm@6
|
577
|
matthiasm@6
|
578 int onsetFrame = 0;
|
matthiasm@6
|
579 bool isVoiced = 0;
|
matthiasm@6
|
580 bool oldIsVoiced = 0;
|
matthiasm@6
|
581 size_t nFrame = m_pitchProb.size();
|
matthiasm@1
|
582
|
matthiasm@6
|
583 std::vector<float> notePitchTrack; // collects pitches for one note at a time
|
matthiasm@6
|
584 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
matthiasm@1
|
585 {
|
matthiasm@6
|
586 isVoiced = mnOut[iFrame].noteState < 3 && smoothedPitch[iFrame].size() > 0;
|
matthiasm@6
|
587 if (isVoiced && iFrame != nFrame-1)
|
matthiasm@1
|
588 {
|
matthiasm@6
|
589 if (oldIsVoiced == 0) // beginning of a note
|
matthiasm@1
|
590 {
|
matthiasm@6
|
591 onsetFrame = iFrame;
|
matthiasm@6
|
592 notePitchTrack.clear();
|
matthiasm@1
|
593 }
|
matthiasm@6
|
594 float pitch = smoothedPitch[iFrame][0].first;
|
matthiasm@6
|
595 notePitchTrack.push_back(pitch); // add to the note's pitch track
|
matthiasm@6
|
596 } else { // not currently voiced
|
matthiasm@6
|
597 if (oldIsVoiced == 1 && notePitchTrack.size() > 4) // end of the note
|
matthiasm@6
|
598 {
|
matthiasm@1
|
599 std::sort(notePitchTrack.begin(), notePitchTrack.end());
|
matthiasm@6
|
600 float medianPitch = notePitchTrack[notePitchTrack.size()/2];
|
matthiasm@6
|
601 float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440;
|
matthiasm@6
|
602 f.values.clear();
|
matthiasm@6
|
603 f.values.push_back(medianFreq);
|
matthiasm@6
|
604 f.timestamp = m_timestamp[onsetFrame];
|
matthiasm@6
|
605 f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame];
|
matthiasm@5
|
606 fs[m_oNotes].push_back(f);
|
matthiasm@1
|
607 }
|
matthiasm@1
|
608 }
|
matthiasm@6
|
609 oldIsVoiced = isVoiced;
|
matthiasm@1
|
610 }
|
matthiasm@0
|
611 return fs;
|
matthiasm@0
|
612 }
|