matthiasm@0
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@9
|
2
|
matthiasm@0
|
3 /*
|
Chris@9
|
4 pYIN - A fundamental frequency estimator for monophonic audio
|
Chris@9
|
5 Centre for Digital Music, Queen Mary, University of London.
|
Chris@9
|
6
|
Chris@9
|
7 This program is free software; you can redistribute it and/or
|
Chris@9
|
8 modify it under the terms of the GNU General Public License as
|
Chris@9
|
9 published by the Free Software Foundation; either version 2 of the
|
Chris@9
|
10 License, or (at your option) any later version. See the file
|
Chris@9
|
11 COPYING included with this distribution for more information.
|
matthiasm@0
|
12 */
|
matthiasm@0
|
13
|
matthiasm@36
|
14 #include "PYinVamp.h"
|
matthiasm@0
|
15 #include "MonoNote.h"
|
matthiasm@0
|
16 #include "MonoPitch.h"
|
matthiasm@0
|
17
|
matthiasm@0
|
18 #include <vector>
|
matthiasm@0
|
19 #include <algorithm>
|
matthiasm@0
|
20
|
matthiasm@0
|
21 #include <cstdio>
|
matthiasm@0
|
22 #include <cmath>
|
matthiasm@0
|
23 #include <complex>
|
matthiasm@0
|
24
|
matthiasm@0
|
25 using std::string;
|
matthiasm@0
|
26 using std::vector;
|
matthiasm@0
|
27 using Vamp::RealTime;
|
matthiasm@0
|
28
|
matthiasm@0
|
29
|
matthiasm@36
|
30 PYinVamp::PYinVamp(float inputSampleRate) :
|
matthiasm@0
|
31 Plugin(inputSampleRate),
|
matthiasm@0
|
32 m_channels(0),
|
matthiasm@0
|
33 m_stepSize(256),
|
matthiasm@0
|
34 m_blockSize(2048),
|
matthiasm@0
|
35 m_fmin(40),
|
matthiasm@58
|
36 m_fmax(1600),
|
matthiasm@0
|
37 m_yin(2048, inputSampleRate, 0.0),
|
matthiasm@0
|
38 m_oF0Candidates(0),
|
matthiasm@0
|
39 m_oF0Probs(0),
|
matthiasm@0
|
40 m_oVoicedProb(0),
|
matthiasm@0
|
41 m_oCandidateSalience(0),
|
matthiasm@0
|
42 m_oSmoothedPitchTrack(0),
|
matthiasm@0
|
43 m_oNotes(0),
|
matthiasm@0
|
44 m_threshDistr(2.0f),
|
matthiasm@6
|
45 m_outputUnvoiced(0.0f),
|
matthiasm@70
|
46 m_preciseTime(0.0f),
|
matthiasm@117
|
47 m_lowAmp(0.1f),
|
matthiasm@117
|
48 m_onsetSensitivity(0.7f),
|
matthiasm@117
|
49 m_pruneThresh(0.1f),
|
matthiasm@0
|
50 m_pitchProb(0),
|
matthiasm@103
|
51 m_timestamp(0),
|
matthiasm@103
|
52 m_level(0)
|
matthiasm@0
|
53 {
|
matthiasm@0
|
54 }
|
matthiasm@0
|
55
|
matthiasm@36
|
56 PYinVamp::~PYinVamp()
|
matthiasm@0
|
57 {
|
matthiasm@0
|
58 }
|
matthiasm@0
|
59
|
matthiasm@0
|
60 string
|
matthiasm@36
|
61 PYinVamp::getIdentifier() const
|
matthiasm@0
|
62 {
|
matthiasm@1
|
63 return "pyin";
|
matthiasm@0
|
64 }
|
matthiasm@0
|
65
|
matthiasm@0
|
66 string
|
matthiasm@36
|
67 PYinVamp::getName() const
|
matthiasm@0
|
68 {
|
matthiasm@1
|
69 return "pYin";
|
matthiasm@0
|
70 }
|
matthiasm@0
|
71
|
matthiasm@0
|
72 string
|
matthiasm@36
|
73 PYinVamp::getDescription() const
|
matthiasm@0
|
74 {
|
matthiasm@0
|
75 return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
|
matthiasm@0
|
76 }
|
matthiasm@0
|
77
|
matthiasm@0
|
78 string
|
matthiasm@36
|
79 PYinVamp::getMaker() const
|
matthiasm@0
|
80 {
|
matthiasm@0
|
81 return "Matthias Mauch";
|
matthiasm@0
|
82 }
|
matthiasm@0
|
83
|
matthiasm@0
|
84 int
|
matthiasm@36
|
85 PYinVamp::getPluginVersion() const
|
matthiasm@0
|
86 {
|
matthiasm@0
|
87 // Increment this each time you release a version that behaves
|
matthiasm@0
|
88 // differently from the previous one
|
Chris@125
|
89 return 2;
|
matthiasm@0
|
90 }
|
matthiasm@0
|
91
|
matthiasm@0
|
92 string
|
matthiasm@36
|
93 PYinVamp::getCopyright() const
|
matthiasm@0
|
94 {
|
matthiasm@0
|
95 return "GPL";
|
matthiasm@0
|
96 }
|
matthiasm@0
|
97
|
matthiasm@36
|
98 PYinVamp::InputDomain
|
matthiasm@36
|
99 PYinVamp::getInputDomain() const
|
matthiasm@0
|
100 {
|
matthiasm@0
|
101 return TimeDomain;
|
matthiasm@0
|
102 }
|
matthiasm@0
|
103
|
matthiasm@0
|
104 size_t
|
matthiasm@36
|
105 PYinVamp::getPreferredBlockSize() const
|
matthiasm@0
|
106 {
|
matthiasm@0
|
107 return 2048;
|
matthiasm@0
|
108 }
|
matthiasm@0
|
109
|
matthiasm@0
|
110 size_t
|
matthiasm@36
|
111 PYinVamp::getPreferredStepSize() const
|
matthiasm@0
|
112 {
|
matthiasm@0
|
113 return 256;
|
matthiasm@0
|
114 }
|
matthiasm@0
|
115
|
matthiasm@0
|
116 size_t
|
matthiasm@36
|
117 PYinVamp::getMinChannelCount() const
|
matthiasm@0
|
118 {
|
matthiasm@0
|
119 return 1;
|
matthiasm@0
|
120 }
|
matthiasm@0
|
121
|
matthiasm@0
|
122 size_t
|
matthiasm@36
|
123 PYinVamp::getMaxChannelCount() const
|
matthiasm@0
|
124 {
|
matthiasm@0
|
125 return 1;
|
matthiasm@0
|
126 }
|
matthiasm@0
|
127
|
matthiasm@36
|
128 PYinVamp::ParameterList
|
matthiasm@36
|
129 PYinVamp::getParameterDescriptors() const
|
matthiasm@0
|
130 {
|
matthiasm@0
|
131 ParameterList list;
|
matthiasm@0
|
132
|
matthiasm@0
|
133 ParameterDescriptor d;
|
matthiasm@0
|
134
|
matthiasm@0
|
135 d.identifier = "threshdistr";
|
matthiasm@0
|
136 d.name = "Yin threshold distribution";
|
matthiasm@0
|
137 d.description = ".";
|
matthiasm@0
|
138 d.unit = "";
|
matthiasm@0
|
139 d.minValue = 0.0f;
|
matthiasm@0
|
140 d.maxValue = 7.0f;
|
matthiasm@0
|
141 d.defaultValue = 2.0f;
|
matthiasm@0
|
142 d.isQuantized = true;
|
matthiasm@0
|
143 d.quantizeStep = 1.0f;
|
matthiasm@0
|
144 d.valueNames.push_back("Uniform");
|
matthiasm@0
|
145 d.valueNames.push_back("Beta (mean 0.10)");
|
matthiasm@0
|
146 d.valueNames.push_back("Beta (mean 0.15)");
|
matthiasm@0
|
147 d.valueNames.push_back("Beta (mean 0.20)");
|
matthiasm@0
|
148 d.valueNames.push_back("Beta (mean 0.30)");
|
matthiasm@0
|
149 d.valueNames.push_back("Single Value 0.10");
|
matthiasm@0
|
150 d.valueNames.push_back("Single Value 0.15");
|
matthiasm@0
|
151 d.valueNames.push_back("Single Value 0.20");
|
matthiasm@0
|
152 list.push_back(d);
|
matthiasm@0
|
153
|
matthiasm@0
|
154 d.identifier = "outputunvoiced";
|
matthiasm@0
|
155 d.valueNames.clear();
|
matthiasm@0
|
156 d.name = "Output estimates classified as unvoiced?";
|
matthiasm@0
|
157 d.description = ".";
|
matthiasm@0
|
158 d.unit = "";
|
matthiasm@0
|
159 d.minValue = 0.0f;
|
matthiasm@0
|
160 d.maxValue = 2.0f;
|
matthiasm@6
|
161 d.defaultValue = 0.0f;
|
matthiasm@0
|
162 d.isQuantized = true;
|
matthiasm@0
|
163 d.quantizeStep = 1.0f;
|
matthiasm@0
|
164 d.valueNames.push_back("No");
|
matthiasm@0
|
165 d.valueNames.push_back("Yes");
|
matthiasm@0
|
166 d.valueNames.push_back("Yes, as negative frequencies");
|
matthiasm@0
|
167 list.push_back(d);
|
matthiasm@0
|
168
|
matthiasm@70
|
169 d.identifier = "precisetime";
|
matthiasm@70
|
170 d.valueNames.clear();
|
matthiasm@70
|
171 d.name = "Use non-standard precise YIN timing (slow).";
|
matthiasm@70
|
172 d.description = ".";
|
matthiasm@70
|
173 d.unit = "";
|
matthiasm@70
|
174 d.minValue = 0.0f;
|
matthiasm@70
|
175 d.maxValue = 1.0f;
|
matthiasm@70
|
176 d.defaultValue = 0.0f;
|
matthiasm@70
|
177 d.isQuantized = true;
|
matthiasm@70
|
178 d.quantizeStep = 1.0f;
|
matthiasm@70
|
179 list.push_back(d);
|
matthiasm@70
|
180
|
matthiasm@72
|
181 d.identifier = "lowampsuppression";
|
matthiasm@72
|
182 d.valueNames.clear();
|
matthiasm@72
|
183 d.name = "Suppress low amplitude pitch estimates.";
|
matthiasm@72
|
184 d.description = ".";
|
matthiasm@72
|
185 d.unit = "";
|
matthiasm@72
|
186 d.minValue = 0.0f;
|
matthiasm@72
|
187 d.maxValue = 1.0f;
|
matthiasm@73
|
188 d.defaultValue = 0.1f;
|
matthiasm@72
|
189 d.isQuantized = false;
|
matthiasm@72
|
190 list.push_back(d);
|
matthiasm@70
|
191
|
matthiasm@107
|
192 d.identifier = "onsetsensitivity";
|
matthiasm@107
|
193 d.valueNames.clear();
|
matthiasm@107
|
194 d.name = "Onset sensitivity";
|
matthiasm@107
|
195 d.description = "Adds additional note onsets when RMS increases.";
|
matthiasm@107
|
196 d.unit = "";
|
matthiasm@107
|
197 d.minValue = 0.0f;
|
matthiasm@107
|
198 d.maxValue = 1.0f;
|
matthiasm@117
|
199 d.defaultValue = 0.7f;
|
matthiasm@108
|
200 d.isQuantized = false;
|
matthiasm@108
|
201 list.push_back(d);
|
matthiasm@108
|
202
|
matthiasm@108
|
203 d.identifier = "prunethresh";
|
matthiasm@108
|
204 d.valueNames.clear();
|
matthiasm@108
|
205 d.name = "Duration pruning threshold.";
|
matthiasm@108
|
206 d.description = "Prune notes that are shorter than this value.";
|
matthiasm@108
|
207 d.unit = "";
|
matthiasm@108
|
208 d.minValue = 0.0f;
|
matthiasm@108
|
209 d.maxValue = 0.2f;
|
matthiasm@117
|
210 d.defaultValue = 0.1f;
|
matthiasm@107
|
211 d.isQuantized = false;
|
matthiasm@107
|
212 list.push_back(d);
|
matthiasm@107
|
213
|
matthiasm@0
|
214 return list;
|
matthiasm@0
|
215 }
|
matthiasm@0
|
216
|
matthiasm@0
|
217 float
|
matthiasm@36
|
218 PYinVamp::getParameter(string identifier) const
|
matthiasm@0
|
219 {
|
matthiasm@0
|
220 if (identifier == "threshdistr") {
|
matthiasm@0
|
221 return m_threshDistr;
|
matthiasm@0
|
222 }
|
matthiasm@0
|
223 if (identifier == "outputunvoiced") {
|
matthiasm@0
|
224 return m_outputUnvoiced;
|
matthiasm@0
|
225 }
|
matthiasm@70
|
226 if (identifier == "precisetime") {
|
matthiasm@70
|
227 return m_preciseTime;
|
matthiasm@70
|
228 }
|
matthiasm@72
|
229 if (identifier == "lowampsuppression") {
|
matthiasm@72
|
230 return m_lowAmp;
|
matthiasm@72
|
231 }
|
matthiasm@107
|
232 if (identifier == "onsetsensitivity") {
|
matthiasm@107
|
233 return m_onsetSensitivity;
|
matthiasm@107
|
234 }
|
matthiasm@108
|
235 if (identifier == "prunethresh") {
|
matthiasm@108
|
236 return m_pruneThresh;
|
matthiasm@108
|
237 }
|
matthiasm@0
|
238 return 0.f;
|
matthiasm@0
|
239 }
|
matthiasm@0
|
240
|
matthiasm@0
|
241 void
|
matthiasm@36
|
242 PYinVamp::setParameter(string identifier, float value)
|
matthiasm@0
|
243 {
|
matthiasm@0
|
244 if (identifier == "threshdistr")
|
matthiasm@0
|
245 {
|
matthiasm@0
|
246 m_threshDistr = value;
|
matthiasm@0
|
247 }
|
matthiasm@0
|
248 if (identifier == "outputunvoiced")
|
matthiasm@0
|
249 {
|
matthiasm@0
|
250 m_outputUnvoiced = value;
|
matthiasm@0
|
251 }
|
matthiasm@70
|
252 if (identifier == "precisetime")
|
matthiasm@70
|
253 {
|
matthiasm@70
|
254 m_preciseTime = value;
|
matthiasm@70
|
255 }
|
matthiasm@72
|
256 if (identifier == "lowampsuppression")
|
matthiasm@72
|
257 {
|
matthiasm@72
|
258 m_lowAmp = value;
|
matthiasm@72
|
259 }
|
matthiasm@107
|
260 if (identifier == "onsetsensitivity")
|
matthiasm@107
|
261 {
|
matthiasm@107
|
262 m_onsetSensitivity = value;
|
matthiasm@107
|
263 }
|
matthiasm@108
|
264 if (identifier == "prunethresh")
|
matthiasm@108
|
265 {
|
matthiasm@108
|
266 m_pruneThresh = value;
|
matthiasm@108
|
267 }
|
matthiasm@0
|
268 }
|
matthiasm@0
|
269
|
matthiasm@36
|
270 PYinVamp::ProgramList
|
matthiasm@36
|
271 PYinVamp::getPrograms() const
|
matthiasm@0
|
272 {
|
matthiasm@0
|
273 ProgramList list;
|
matthiasm@0
|
274 return list;
|
matthiasm@0
|
275 }
|
matthiasm@0
|
276
|
matthiasm@0
|
277 string
|
matthiasm@36
|
278 PYinVamp::getCurrentProgram() const
|
matthiasm@0
|
279 {
|
matthiasm@0
|
280 return ""; // no programs
|
matthiasm@0
|
281 }
|
matthiasm@0
|
282
|
matthiasm@0
|
283 void
|
Chris@138
|
284 PYinVamp::selectProgram(string)
|
matthiasm@0
|
285 {
|
matthiasm@0
|
286 }
|
matthiasm@0
|
287
|
matthiasm@36
|
288 PYinVamp::OutputList
|
matthiasm@36
|
289 PYinVamp::getOutputDescriptors() const
|
matthiasm@0
|
290 {
|
matthiasm@0
|
291 OutputList outputs;
|
matthiasm@0
|
292
|
matthiasm@0
|
293 OutputDescriptor d;
|
matthiasm@0
|
294
|
matthiasm@0
|
295 int outputNumber = 0;
|
matthiasm@0
|
296
|
matthiasm@0
|
297 d.identifier = "f0candidates";
|
matthiasm@0
|
298 d.name = "F0 Candidates";
|
matthiasm@0
|
299 d.description = "Estimated fundamental frequency candidates.";
|
matthiasm@0
|
300 d.unit = "Hz";
|
matthiasm@0
|
301 d.hasFixedBinCount = false;
|
matthiasm@0
|
302 // d.binCount = 1;
|
matthiasm@0
|
303 d.hasKnownExtents = true;
|
matthiasm@0
|
304 d.minValue = m_fmin;
|
matthiasm@0
|
305 d.maxValue = 500;
|
matthiasm@0
|
306 d.isQuantized = false;
|
matthiasm@0
|
307 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
308 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
309 d.hasDuration = false;
|
matthiasm@0
|
310 outputs.push_back(d);
|
matthiasm@0
|
311 m_oF0Candidates = outputNumber++;
|
matthiasm@0
|
312
|
matthiasm@0
|
313 d.identifier = "f0probs";
|
matthiasm@0
|
314 d.name = "Candidate Probabilities";
|
matthiasm@0
|
315 d.description = "Probabilities of estimated fundamental frequency candidates.";
|
matthiasm@0
|
316 d.unit = "";
|
matthiasm@0
|
317 d.hasFixedBinCount = false;
|
matthiasm@0
|
318 // d.binCount = 1;
|
matthiasm@0
|
319 d.hasKnownExtents = true;
|
matthiasm@0
|
320 d.minValue = 0;
|
matthiasm@0
|
321 d.maxValue = 1;
|
matthiasm@0
|
322 d.isQuantized = false;
|
matthiasm@0
|
323 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
324 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
325 d.hasDuration = false;
|
matthiasm@0
|
326 outputs.push_back(d);
|
matthiasm@0
|
327 m_oF0Probs = outputNumber++;
|
matthiasm@0
|
328
|
matthiasm@0
|
329 d.identifier = "voicedprob";
|
matthiasm@0
|
330 d.name = "Voiced Probability";
|
matthiasm@0
|
331 d.description = "Probability that the signal is voiced according to Probabilistic Yin.";
|
matthiasm@0
|
332 d.unit = "";
|
matthiasm@0
|
333 d.hasFixedBinCount = true;
|
matthiasm@0
|
334 d.binCount = 1;
|
matthiasm@0
|
335 d.hasKnownExtents = true;
|
matthiasm@0
|
336 d.minValue = 0;
|
matthiasm@0
|
337 d.maxValue = 1;
|
matthiasm@0
|
338 d.isQuantized = false;
|
matthiasm@0
|
339 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
340 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
341 d.hasDuration = false;
|
matthiasm@0
|
342 outputs.push_back(d);
|
matthiasm@0
|
343 m_oVoicedProb = outputNumber++;
|
matthiasm@0
|
344
|
matthiasm@0
|
345 d.identifier = "candidatesalience";
|
matthiasm@0
|
346 d.name = "Candidate Salience";
|
matthiasm@0
|
347 d.description = "Candidate Salience";
|
matthiasm@0
|
348 d.hasFixedBinCount = true;
|
matthiasm@0
|
349 d.binCount = m_blockSize / 2;
|
matthiasm@0
|
350 d.hasKnownExtents = true;
|
matthiasm@0
|
351 d.minValue = 0;
|
matthiasm@0
|
352 d.maxValue = 1;
|
matthiasm@0
|
353 d.isQuantized = false;
|
matthiasm@0
|
354 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
355 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
356 d.hasDuration = false;
|
matthiasm@0
|
357 outputs.push_back(d);
|
matthiasm@0
|
358 m_oCandidateSalience = outputNumber++;
|
matthiasm@0
|
359
|
matthiasm@0
|
360 d.identifier = "smoothedpitchtrack";
|
matthiasm@0
|
361 d.name = "Smoothed Pitch Track";
|
matthiasm@0
|
362 d.description = ".";
|
matthiasm@0
|
363 d.unit = "Hz";
|
matthiasm@0
|
364 d.hasFixedBinCount = true;
|
matthiasm@0
|
365 d.binCount = 1;
|
matthiasm@0
|
366 d.hasKnownExtents = false;
|
matthiasm@0
|
367 // d.minValue = 0;
|
matthiasm@0
|
368 // d.maxValue = 1;
|
matthiasm@0
|
369 d.isQuantized = false;
|
matthiasm@0
|
370 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
371 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
372 d.hasDuration = false;
|
matthiasm@0
|
373 outputs.push_back(d);
|
matthiasm@0
|
374 m_oSmoothedPitchTrack = outputNumber++;
|
matthiasm@0
|
375
|
matthiasm@0
|
376 d.identifier = "notes";
|
matthiasm@0
|
377 d.name = "Notes";
|
matthiasm@0
|
378 d.description = "Derived fixed-pitch note frequencies";
|
matthiasm@0
|
379 // d.unit = "MIDI unit";
|
matthiasm@0
|
380 d.unit = "Hz";
|
matthiasm@0
|
381 d.hasFixedBinCount = true;
|
matthiasm@0
|
382 d.binCount = 1;
|
matthiasm@0
|
383 d.hasKnownExtents = false;
|
matthiasm@0
|
384 d.isQuantized = false;
|
matthiasm@0
|
385 d.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
386 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
387 d.hasDuration = true;
|
matthiasm@0
|
388 outputs.push_back(d);
|
matthiasm@0
|
389 m_oNotes = outputNumber++;
|
matthiasm@0
|
390
|
matthiasm@0
|
391 return outputs;
|
matthiasm@0
|
392 }
|
matthiasm@0
|
393
|
matthiasm@0
|
394 bool
|
matthiasm@36
|
395 PYinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
396 {
|
matthiasm@0
|
397 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
398 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
399
|
Chris@9
|
400 /*
|
matthiasm@36
|
401 std::cerr << "PYinVamp::initialise: channels = " << channels
|
matthiasm@0
|
402 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
|
matthiasm@0
|
403 << std::endl;
|
Chris@9
|
404 */
|
matthiasm@0
|
405 m_channels = channels;
|
matthiasm@0
|
406 m_stepSize = stepSize;
|
matthiasm@0
|
407 m_blockSize = blockSize;
|
matthiasm@0
|
408
|
matthiasm@0
|
409 reset();
|
matthiasm@0
|
410
|
matthiasm@0
|
411 return true;
|
matthiasm@0
|
412 }
|
matthiasm@0
|
413
|
matthiasm@0
|
414 void
|
matthiasm@36
|
415 PYinVamp::reset()
|
matthiasm@0
|
416 {
|
matthiasm@0
|
417 m_yin.setThresholdDistr(m_threshDistr);
|
matthiasm@0
|
418 m_yin.setFrameSize(m_blockSize);
|
matthiasm@117
|
419 m_yin.setFast(!m_preciseTime);
|
matthiasm@0
|
420
|
matthiasm@0
|
421 m_pitchProb.clear();
|
matthiasm@0
|
422 m_timestamp.clear();
|
matthiasm@103
|
423 m_level.clear();
|
Chris@9
|
424 /*
|
matthiasm@36
|
425 std::cerr << "PYinVamp::reset"
|
matthiasm@0
|
426 << ", blockSize = " << m_blockSize
|
matthiasm@0
|
427 << std::endl;
|
Chris@9
|
428 */
|
matthiasm@0
|
429 }
|
matthiasm@0
|
430
|
matthiasm@36
|
431 PYinVamp::FeatureSet
|
matthiasm@36
|
432 PYinVamp::process(const float *const *inputBuffers, RealTime timestamp)
|
matthiasm@0
|
433 {
|
matthiasm@77
|
434 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
|
matthiasm@77
|
435 timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate));
|
matthiasm@77
|
436
|
matthiasm@0
|
437 FeatureSet fs;
|
matthiasm@0
|
438
|
matthiasm@46
|
439 float rms = 0;
|
matthiasm@46
|
440
|
matthiasm@0
|
441 double *dInputBuffers = new double[m_blockSize];
|
matthiasm@46
|
442 for (size_t i = 0; i < m_blockSize; ++i) {
|
matthiasm@46
|
443 dInputBuffers[i] = inputBuffers[0][i];
|
matthiasm@46
|
444 rms += inputBuffers[0][i] * inputBuffers[0][i];
|
matthiasm@46
|
445 }
|
matthiasm@46
|
446 rms /= m_blockSize;
|
matthiasm@46
|
447 rms = sqrt(rms);
|
matthiasm@116
|
448
|
matthiasm@72
|
449 bool isLowAmplitude = (rms < m_lowAmp);
|
matthiasm@0
|
450
|
matthiasm@0
|
451 Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers);
|
matthiasm@27
|
452 delete [] dInputBuffers;
|
matthiasm@27
|
453
|
matthiasm@103
|
454 m_level.push_back(yo.rms);
|
matthiasm@103
|
455
|
matthiasm@27
|
456 // First, get the things out of the way that we don't want to output
|
matthiasm@27
|
457 // immediately, but instead save for later.
|
matthiasm@27
|
458 vector<pair<double, double> > tempPitchProb;
|
matthiasm@27
|
459 for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate)
|
matthiasm@27
|
460 {
|
matthiasm@27
|
461 double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69;
|
matthiasm@50
|
462 if (!isLowAmplitude)
|
matthiasm@116
|
463 {
|
matthiasm@46
|
464 tempPitchProb.push_back(pair<double, double>
|
matthiasm@46
|
465 (tempPitch, yo.freqProb[iCandidate].second));
|
matthiasm@116
|
466 } else {
|
matthiasm@116
|
467 float factor = ((rms+0.01*m_lowAmp)/(1.01*m_lowAmp));
|
matthiasm@46
|
468 tempPitchProb.push_back(pair<double, double>
|
matthiasm@65
|
469 (tempPitch, yo.freqProb[iCandidate].second*factor));
|
matthiasm@65
|
470 }
|
matthiasm@27
|
471 }
|
matthiasm@27
|
472 m_pitchProb.push_back(tempPitchProb);
|
matthiasm@27
|
473 m_timestamp.push_back(timestamp);
|
matthiasm@27
|
474
|
matthiasm@27
|
475 // F0 CANDIDATES
|
matthiasm@0
|
476 Feature f;
|
matthiasm@0
|
477 f.hasTimestamp = true;
|
matthiasm@0
|
478 f.timestamp = timestamp;
|
matthiasm@0
|
479 for (size_t i = 0; i < yo.freqProb.size(); ++i)
|
matthiasm@0
|
480 {
|
matthiasm@0
|
481 f.values.push_back(yo.freqProb[i].first);
|
matthiasm@0
|
482 }
|
matthiasm@0
|
483 fs[m_oF0Candidates].push_back(f);
|
matthiasm@0
|
484
|
matthiasm@27
|
485 // VOICEDPROB
|
matthiasm@0
|
486 f.values.clear();
|
matthiasm@0
|
487 float voicedProb = 0;
|
matthiasm@0
|
488 for (size_t i = 0; i < yo.freqProb.size(); ++i)
|
matthiasm@0
|
489 {
|
matthiasm@0
|
490 f.values.push_back(yo.freqProb[i].second);
|
matthiasm@0
|
491 voicedProb += yo.freqProb[i].second;
|
matthiasm@0
|
492 }
|
matthiasm@0
|
493 fs[m_oF0Probs].push_back(f);
|
matthiasm@0
|
494
|
mail@128
|
495 f.values.clear();
|
matthiasm@0
|
496 f.values.push_back(voicedProb);
|
matthiasm@0
|
497 fs[m_oVoicedProb].push_back(f);
|
matthiasm@0
|
498
|
matthiasm@27
|
499 // SALIENCE -- maybe this should eventually disappear
|
matthiasm@0
|
500 f.values.clear();
|
matthiasm@0
|
501 float salienceSum = 0;
|
matthiasm@0
|
502 for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin)
|
matthiasm@0
|
503 {
|
matthiasm@0
|
504 f.values.push_back(yo.salience[iBin]);
|
matthiasm@0
|
505 salienceSum += yo.salience[iBin];
|
matthiasm@0
|
506 }
|
matthiasm@0
|
507 fs[m_oCandidateSalience].push_back(f);
|
matthiasm@0
|
508
|
matthiasm@0
|
509 return fs;
|
matthiasm@0
|
510 }
|
matthiasm@0
|
511
|
matthiasm@36
|
512 PYinVamp::FeatureSet
|
matthiasm@36
|
513 PYinVamp::getRemainingFeatures()
|
matthiasm@0
|
514 {
|
matthiasm@0
|
515 FeatureSet fs;
|
matthiasm@0
|
516 Feature f;
|
matthiasm@0
|
517 f.hasTimestamp = true;
|
matthiasm@0
|
518 f.hasDuration = false;
|
matthiasm@0
|
519
|
Chris@4
|
520 if (m_pitchProb.empty()) {
|
Chris@4
|
521 return fs;
|
Chris@4
|
522 }
|
Chris@4
|
523
|
matthiasm@0
|
524 // MONO-PITCH STUFF
|
matthiasm@0
|
525 MonoPitch mp;
|
matthiasm@0
|
526 vector<float> mpOut = mp.process(m_pitchProb);
|
matthiasm@0
|
527 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame)
|
matthiasm@0
|
528 {
|
matthiasm@0
|
529 if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue;
|
matthiasm@0
|
530 f.timestamp = m_timestamp[iFrame];
|
matthiasm@0
|
531 f.values.clear();
|
matthiasm@0
|
532 if (m_outputUnvoiced == 1)
|
matthiasm@0
|
533 {
|
matthiasm@26
|
534 f.values.push_back(fabs(mpOut[iFrame]));
|
matthiasm@0
|
535 } else {
|
matthiasm@0
|
536 f.values.push_back(mpOut[iFrame]);
|
matthiasm@0
|
537 }
|
matthiasm@0
|
538
|
matthiasm@0
|
539 fs[m_oSmoothedPitchTrack].push_back(f);
|
matthiasm@0
|
540 }
|
matthiasm@0
|
541
|
matthiasm@1
|
542 // MONO-NOTE STUFF
|
Chris@122
|
543 // std::cerr << "Mono Note Stuff" << std::endl;
|
matthiasm@1
|
544 MonoNote mn;
|
matthiasm@1
|
545 std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
|
matthiasm@1
|
546 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) {
|
matthiasm@1
|
547 std::vector<std::pair<double, double> > temp;
|
matthiasm@1
|
548 if (mpOut[iFrame] > 0)
|
matthiasm@1
|
549 {
|
matthiasm@1
|
550 double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69;
|
matthiasm@1
|
551 temp.push_back(std::pair<double,double>(tempPitch, .9));
|
matthiasm@1
|
552 }
|
matthiasm@1
|
553 smoothedPitch.push_back(temp);
|
matthiasm@1
|
554 }
|
matthiasm@0
|
555 // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb);
|
matthiasm@1
|
556 vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
|
matthiasm@1
|
557
|
matthiasm@6
|
558 // turning feature into a note feature
|
matthiasm@1
|
559 f.hasTimestamp = true;
|
matthiasm@1
|
560 f.hasDuration = true;
|
matthiasm@1
|
561 f.values.clear();
|
matthiasm@6
|
562
|
matthiasm@6
|
563 int onsetFrame = 0;
|
matthiasm@6
|
564 bool isVoiced = 0;
|
matthiasm@6
|
565 bool oldIsVoiced = 0;
|
matthiasm@6
|
566 size_t nFrame = m_pitchProb.size();
|
matthiasm@108
|
567
|
matthiasm@108
|
568 float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize;
|
matthiasm@1
|
569
|
matthiasm@6
|
570 std::vector<float> notePitchTrack; // collects pitches for one note at a time
|
matthiasm@6
|
571 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
matthiasm@1
|
572 {
|
matthiasm@103
|
573 isVoiced = mnOut[iFrame].noteState < 3
|
matthiasm@103
|
574 && smoothedPitch[iFrame].size() > 0
|
matthiasm@106
|
575 && (iFrame >= nFrame-2
|
matthiasm@107
|
576 || ((m_level[iFrame]/m_level[iFrame+2]) > m_onsetSensitivity));
|
matthiasm@108
|
577 // std::cerr << m_level[iFrame]/m_level[iFrame-1] << " " << isVoiced << std::endl;
|
matthiasm@6
|
578 if (isVoiced && iFrame != nFrame-1)
|
matthiasm@1
|
579 {
|
matthiasm@6
|
580 if (oldIsVoiced == 0) // beginning of a note
|
matthiasm@1
|
581 {
|
matthiasm@6
|
582 onsetFrame = iFrame;
|
matthiasm@1
|
583 }
|
matthiasm@6
|
584 float pitch = smoothedPitch[iFrame][0].first;
|
matthiasm@6
|
585 notePitchTrack.push_back(pitch); // add to the note's pitch track
|
matthiasm@6
|
586 } else { // not currently voiced
|
matthiasm@108
|
587 if (oldIsVoiced == 1) // end of note
|
matthiasm@6
|
588 {
|
matthiasm@118
|
589 // std::cerr << notePitchTrack.size() << " " << minNoteFrames << std::endl;
|
matthiasm@108
|
590 if (notePitchTrack.size() >= minNoteFrames)
|
matthiasm@108
|
591 {
|
matthiasm@108
|
592 std::sort(notePitchTrack.begin(), notePitchTrack.end());
|
matthiasm@108
|
593 float medianPitch = notePitchTrack[notePitchTrack.size()/2];
|
matthiasm@108
|
594 float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440;
|
matthiasm@108
|
595 f.values.clear();
|
matthiasm@108
|
596 f.values.push_back(medianFreq);
|
matthiasm@108
|
597 f.timestamp = m_timestamp[onsetFrame];
|
matthiasm@108
|
598 f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame];
|
matthiasm@108
|
599 fs[m_oNotes].push_back(f);
|
matthiasm@108
|
600 }
|
matthiasm@108
|
601 notePitchTrack.clear();
|
matthiasm@1
|
602 }
|
matthiasm@1
|
603 }
|
matthiasm@6
|
604 oldIsVoiced = isVoiced;
|
matthiasm@1
|
605 }
|
matthiasm@0
|
606 return fs;
|
matthiasm@0
|
607 }
|