matthiasm@0
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@9
|
3 /*
|
Chris@9
|
4 pYIN - A fundamental frequency estimator for monophonic audio
|
Chris@9
|
5 Centre for Digital Music, Queen Mary, University of London.
|
Chris@9
|
6
|
Chris@9
|
7 This program is free software; you can redistribute it and/or
|
Chris@9
|
8 modify it under the terms of the GNU General Public License as
|
Chris@9
|
9 published by the Free Software Foundation; either version 2 of the
|
Chris@9
|
10 License, or (at your option) any later version. See the file
|
Chris@9
|
11 COPYING included with this distribution for more information.
|
Chris@9
|
12 */
|
Chris@9
|
13
|
matthiasm@0
|
14 #include "VampYin.h"
|
matthiasm@0
|
15 #include "MonoNote.h"
|
matthiasm@0
|
16
|
matthiasm@0
|
17 #include "vamp-sdk/FFT.h"
|
matthiasm@0
|
18
|
matthiasm@0
|
19 #include <vector>
|
matthiasm@0
|
20 #include <algorithm>
|
matthiasm@0
|
21
|
matthiasm@0
|
22 #include <cstdio>
|
matthiasm@0
|
23 #include <cmath>
|
matthiasm@0
|
24 #include <complex>
|
matthiasm@0
|
25
|
matthiasm@0
|
26 using std::string;
|
matthiasm@0
|
27 using std::vector;
|
matthiasm@0
|
28 using Vamp::RealTime;
|
matthiasm@0
|
29
|
matthiasm@0
|
30
|
matthiasm@0
|
31 VampYin::VampYin(float inputSampleRate) :
|
matthiasm@0
|
32 Plugin(inputSampleRate),
|
matthiasm@0
|
33 m_channels(0),
|
matthiasm@0
|
34 m_stepSize(256),
|
matthiasm@0
|
35 m_blockSize(2048),
|
matthiasm@0
|
36 m_fmin(40),
|
matthiasm@0
|
37 m_fmax(1000),
|
matthiasm@0
|
38 m_yin(2048, inputSampleRate, 0.0),
|
matthiasm@0
|
39 m_outNoF0(0),
|
matthiasm@0
|
40 m_outNoPeriodicity(0),
|
matthiasm@0
|
41 m_outNoRms(0),
|
matthiasm@0
|
42 m_outNoSalience(0),
|
matthiasm@0
|
43 m_yinParameter(0.15f),
|
Chris@4
|
44 m_outputUnvoiced(2.0f)
|
matthiasm@0
|
45 {
|
matthiasm@0
|
46 }
|
matthiasm@0
|
47
|
matthiasm@0
|
48 VampYin::~VampYin()
|
matthiasm@0
|
49 {
|
matthiasm@0
|
50 }
|
matthiasm@0
|
51
|
matthiasm@0
|
52 string
|
matthiasm@0
|
53 VampYin::getIdentifier() const
|
matthiasm@0
|
54 {
|
matthiasm@0
|
55 return "yin";
|
matthiasm@0
|
56 }
|
matthiasm@0
|
57
|
matthiasm@0
|
58 string
|
matthiasm@0
|
59 VampYin::getName() const
|
matthiasm@0
|
60 {
|
matthiasm@0
|
61 return "Yin";
|
matthiasm@0
|
62 }
|
matthiasm@0
|
63
|
matthiasm@0
|
64 string
|
matthiasm@0
|
65 VampYin::getDescription() const
|
matthiasm@0
|
66 {
|
matthiasm@0
|
67 return "A vamp implementation of the Yin algorithm for monophonic frequency estimation.";
|
matthiasm@0
|
68 }
|
matthiasm@0
|
69
|
matthiasm@0
|
70 string
|
matthiasm@0
|
71 VampYin::getMaker() const
|
matthiasm@0
|
72 {
|
matthiasm@0
|
73 return "Matthias Mauch";
|
matthiasm@0
|
74 }
|
matthiasm@0
|
75
|
matthiasm@0
|
76 int
|
matthiasm@0
|
77 VampYin::getPluginVersion() const
|
matthiasm@0
|
78 {
|
matthiasm@0
|
79 // Increment this each time you release a version that behaves
|
matthiasm@0
|
80 // differently from the previous one
|
matthiasm@0
|
81 return 1;
|
matthiasm@0
|
82 }
|
matthiasm@0
|
83
|
matthiasm@0
|
84 string
|
matthiasm@0
|
85 VampYin::getCopyright() const
|
matthiasm@0
|
86 {
|
matthiasm@0
|
87 return "GPL";
|
matthiasm@0
|
88 }
|
matthiasm@0
|
89
|
matthiasm@0
|
90 VampYin::InputDomain
|
matthiasm@0
|
91 VampYin::getInputDomain() const
|
matthiasm@0
|
92 {
|
matthiasm@0
|
93 return TimeDomain;
|
matthiasm@0
|
94 }
|
matthiasm@0
|
95
|
matthiasm@0
|
96 size_t
|
matthiasm@0
|
97 VampYin::getPreferredBlockSize() const
|
matthiasm@0
|
98 {
|
matthiasm@0
|
99 return 2048;
|
matthiasm@0
|
100 }
|
matthiasm@0
|
101
|
matthiasm@0
|
102 size_t
|
matthiasm@0
|
103 VampYin::getPreferredStepSize() const
|
matthiasm@0
|
104 {
|
matthiasm@0
|
105 return 256;
|
matthiasm@0
|
106 }
|
matthiasm@0
|
107
|
matthiasm@0
|
108 size_t
|
matthiasm@0
|
109 VampYin::getMinChannelCount() const
|
matthiasm@0
|
110 {
|
matthiasm@0
|
111 return 1;
|
matthiasm@0
|
112 }
|
matthiasm@0
|
113
|
matthiasm@0
|
114 size_t
|
matthiasm@0
|
115 VampYin::getMaxChannelCount() const
|
matthiasm@0
|
116 {
|
matthiasm@0
|
117 return 1;
|
matthiasm@0
|
118 }
|
matthiasm@0
|
119
|
matthiasm@0
|
120 VampYin::ParameterList
|
matthiasm@0
|
121 VampYin::getParameterDescriptors() const
|
matthiasm@0
|
122 {
|
matthiasm@0
|
123 ParameterList list;
|
matthiasm@0
|
124
|
matthiasm@0
|
125 ParameterDescriptor d;
|
matthiasm@0
|
126 d.identifier = "yinThreshold";
|
matthiasm@0
|
127 d.name = "Yin threshold";
|
matthiasm@0
|
128 d.description = "The greedy Yin search for a low value difference function is done once a dip lower than this threshold is reached.";
|
matthiasm@0
|
129 d.unit = "";
|
matthiasm@0
|
130 d.minValue = 0.025f;
|
matthiasm@0
|
131 d.maxValue = 1.0f;
|
matthiasm@0
|
132 d.defaultValue = 0.15f;
|
matthiasm@0
|
133 d.isQuantized = true;
|
matthiasm@0
|
134 d.quantizeStep = 0.025f;
|
matthiasm@0
|
135
|
matthiasm@0
|
136 list.push_back(d);
|
matthiasm@0
|
137
|
matthiasm@0
|
138 // d.identifier = "removeunvoiced";
|
matthiasm@0
|
139 // d.name = "Remove pitches classified as unvoiced.";
|
matthiasm@0
|
140 // d.description = "If ticked, then the pitch estimator will return the most likely pitch, even if it 'thinks' there isn't any.";
|
matthiasm@0
|
141 // d.unit = "";
|
matthiasm@0
|
142 // d.minValue = 0.0f;
|
matthiasm@0
|
143 // d.maxValue = 1.0f;
|
matthiasm@0
|
144 // d.defaultValue = 0.0f;
|
matthiasm@0
|
145 // d.isQuantized = true;
|
matthiasm@0
|
146 // d.quantizeStep = 1.0f;
|
matthiasm@0
|
147 // d.valueNames.clear();
|
matthiasm@0
|
148 // list.push_back(d);
|
matthiasm@0
|
149
|
matthiasm@0
|
150 d.identifier = "outputunvoiced";
|
matthiasm@0
|
151 d.valueNames.clear();
|
matthiasm@0
|
152 d.name = "Output estimates classified as unvoiced?";
|
matthiasm@0
|
153 d.description = ".";
|
matthiasm@0
|
154 d.unit = "";
|
matthiasm@0
|
155 d.minValue = 0.0f;
|
matthiasm@0
|
156 d.maxValue = 2.0f;
|
matthiasm@0
|
157 d.defaultValue = 2.0f;
|
matthiasm@0
|
158 d.isQuantized = true;
|
matthiasm@0
|
159 d.quantizeStep = 1.0f;
|
matthiasm@0
|
160 d.valueNames.push_back("No");
|
matthiasm@0
|
161 d.valueNames.push_back("Yes");
|
matthiasm@0
|
162 d.valueNames.push_back("Yes, as negative frequencies");
|
matthiasm@0
|
163 list.push_back(d);
|
matthiasm@0
|
164
|
matthiasm@0
|
165 return list;
|
matthiasm@0
|
166 }
|
matthiasm@0
|
167
|
matthiasm@0
|
168 float
|
matthiasm@0
|
169 VampYin::getParameter(string identifier) const
|
matthiasm@0
|
170 {
|
matthiasm@0
|
171 if (identifier == "yinThreshold") {
|
matthiasm@0
|
172 return m_yinParameter;
|
matthiasm@0
|
173 }
|
matthiasm@0
|
174 if (identifier == "outputunvoiced") {
|
matthiasm@0
|
175 return m_outputUnvoiced;
|
matthiasm@0
|
176 }
|
matthiasm@0
|
177 return 0.f;
|
matthiasm@0
|
178 }
|
matthiasm@0
|
179
|
matthiasm@0
|
180 void
|
matthiasm@0
|
181 VampYin::setParameter(string identifier, float value)
|
matthiasm@0
|
182 {
|
matthiasm@0
|
183 if (identifier == "yinThreshold")
|
matthiasm@0
|
184 {
|
matthiasm@0
|
185 m_yinParameter = value;
|
matthiasm@0
|
186 }
|
matthiasm@0
|
187 if (identifier == "outputunvoiced")
|
matthiasm@0
|
188 {
|
matthiasm@0
|
189 m_outputUnvoiced = value;
|
matthiasm@0
|
190 }
|
matthiasm@0
|
191 }
|
matthiasm@0
|
192
|
matthiasm@0
|
193 VampYin::ProgramList
|
matthiasm@0
|
194 VampYin::getPrograms() const
|
matthiasm@0
|
195 {
|
matthiasm@0
|
196 ProgramList list;
|
matthiasm@0
|
197 return list;
|
matthiasm@0
|
198 }
|
matthiasm@0
|
199
|
matthiasm@0
|
200 string
|
matthiasm@0
|
201 VampYin::getCurrentProgram() const
|
matthiasm@0
|
202 {
|
matthiasm@0
|
203 return ""; // no programs
|
matthiasm@0
|
204 }
|
matthiasm@0
|
205
|
matthiasm@0
|
206 void
|
matthiasm@0
|
207 VampYin::selectProgram(string name)
|
matthiasm@0
|
208 {
|
matthiasm@0
|
209 }
|
matthiasm@0
|
210
|
matthiasm@0
|
211 VampYin::OutputList
|
matthiasm@0
|
212 VampYin::getOutputDescriptors() const
|
matthiasm@0
|
213 {
|
matthiasm@0
|
214 OutputList outputs;
|
matthiasm@0
|
215
|
matthiasm@0
|
216 OutputDescriptor d;
|
matthiasm@0
|
217
|
matthiasm@0
|
218 int outputNumber = 0;
|
matthiasm@0
|
219
|
matthiasm@0
|
220 d.identifier = "f0";
|
matthiasm@0
|
221 d.name = "Estimated f0";
|
matthiasm@0
|
222 d.description = "Estimated fundamental frequency";
|
matthiasm@0
|
223 d.unit = "Hz";
|
matthiasm@0
|
224 d.hasFixedBinCount = true;
|
matthiasm@0
|
225 d.binCount = 1;
|
matthiasm@0
|
226 d.hasKnownExtents = true;
|
matthiasm@0
|
227 d.minValue = m_fmin;
|
matthiasm@0
|
228 d.maxValue = 500;
|
matthiasm@0
|
229 d.isQuantized = false;
|
matthiasm@0
|
230 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
231 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
232 d.hasDuration = false;
|
matthiasm@0
|
233 outputs.push_back(d);
|
matthiasm@0
|
234 m_outNoF0 = outputNumber++;
|
matthiasm@0
|
235
|
matthiasm@0
|
236 d.identifier = "periodicity";
|
matthiasm@0
|
237 d.name = "Periodicity";
|
matthiasm@0
|
238 d.description = "by-product of Yin f0 estimation";
|
matthiasm@0
|
239 d.unit = "";
|
matthiasm@0
|
240 d.hasFixedBinCount = true;
|
matthiasm@0
|
241 d.binCount = 1;
|
matthiasm@0
|
242 d.hasKnownExtents = true;
|
matthiasm@0
|
243 d.minValue = 0;
|
matthiasm@0
|
244 d.maxValue = 1;
|
matthiasm@0
|
245 d.isQuantized = false;
|
matthiasm@0
|
246 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
247 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
248 d.hasDuration = false;
|
matthiasm@0
|
249 outputs.push_back(d);
|
matthiasm@0
|
250 m_outNoPeriodicity = outputNumber++;
|
matthiasm@0
|
251
|
matthiasm@0
|
252 d.identifier = "rms";
|
Chris@15
|
253 d.name = "Root mean square";
|
matthiasm@0
|
254 d.description = "Root mean square of the waveform.";
|
matthiasm@0
|
255 d.unit = "";
|
matthiasm@0
|
256 d.hasFixedBinCount = true;
|
matthiasm@0
|
257 d.binCount = 1;
|
matthiasm@0
|
258 d.hasKnownExtents = true;
|
matthiasm@0
|
259 d.minValue = 0;
|
matthiasm@0
|
260 d.maxValue = 1;
|
matthiasm@0
|
261 d.isQuantized = false;
|
matthiasm@0
|
262 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
263 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
264 d.hasDuration = false;
|
matthiasm@0
|
265 outputs.push_back(d);
|
matthiasm@0
|
266 m_outNoRms = outputNumber++;
|
matthiasm@0
|
267
|
matthiasm@0
|
268 d.identifier = "salience";
|
matthiasm@0
|
269 d.name = "Salience";
|
matthiasm@0
|
270 d.description = "Yin Salience";
|
matthiasm@0
|
271 d.hasFixedBinCount = true;
|
matthiasm@0
|
272 d.binCount = m_blockSize / 2;
|
matthiasm@0
|
273 d.hasKnownExtents = true;
|
matthiasm@0
|
274 d.minValue = 0;
|
matthiasm@0
|
275 d.maxValue = 1;
|
matthiasm@0
|
276 d.isQuantized = false;
|
matthiasm@0
|
277 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
278 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
279 d.hasDuration = false;
|
matthiasm@0
|
280 outputs.push_back(d);
|
matthiasm@0
|
281 m_outNoSalience = outputNumber++;
|
matthiasm@0
|
282
|
matthiasm@0
|
283 return outputs;
|
matthiasm@0
|
284 }
|
matthiasm@0
|
285
|
matthiasm@0
|
286 bool
|
matthiasm@0
|
287 VampYin::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
288 {
|
matthiasm@0
|
289 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
290 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
291
|
Chris@9
|
292 /*
|
matthiasm@0
|
293 std::cerr << "VampYin::initialise: channels = " << channels
|
matthiasm@0
|
294 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
|
matthiasm@0
|
295 << std::endl;
|
Chris@9
|
296 */
|
matthiasm@0
|
297 m_channels = channels;
|
matthiasm@0
|
298 m_stepSize = stepSize;
|
matthiasm@0
|
299 m_blockSize = blockSize;
|
matthiasm@0
|
300
|
matthiasm@0
|
301 reset();
|
matthiasm@0
|
302
|
matthiasm@0
|
303 return true;
|
matthiasm@0
|
304 }
|
matthiasm@0
|
305
|
matthiasm@0
|
306 void
|
matthiasm@0
|
307 VampYin::reset()
|
matthiasm@0
|
308 {
|
matthiasm@0
|
309 m_yin.setThreshold(m_yinParameter);
|
matthiasm@0
|
310 m_yin.setFrameSize(m_blockSize);
|
Chris@9
|
311 /*
|
matthiasm@0
|
312 std::cerr << "VampYin::reset: yin threshold set to " << (m_yinParameter)
|
matthiasm@0
|
313 << ", blockSize = " << m_blockSize
|
matthiasm@0
|
314 << std::endl;
|
Chris@9
|
315 */
|
matthiasm@0
|
316 }
|
matthiasm@0
|
317
|
matthiasm@0
|
318 VampYin::FeatureSet
|
matthiasm@0
|
319 VampYin::process(const float *const *inputBuffers, RealTime timestamp)
|
matthiasm@0
|
320 {
|
matthiasm@0
|
321 timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate));
|
matthiasm@0
|
322 FeatureSet fs;
|
matthiasm@0
|
323
|
matthiasm@0
|
324 double *dInputBuffers = new double[m_blockSize];
|
matthiasm@0
|
325 for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
|
matthiasm@0
|
326
|
matthiasm@0
|
327 Yin::YinOutput yo = m_yin.process(dInputBuffers);
|
matthiasm@0
|
328 // std::cerr << "f0 in VampYin: " << yo.f0 << std::endl;
|
matthiasm@0
|
329 Feature f;
|
matthiasm@0
|
330 f.hasTimestamp = true;
|
matthiasm@0
|
331 f.timestamp = timestamp;
|
matthiasm@0
|
332 if (m_outputUnvoiced == 0.0f)
|
matthiasm@0
|
333 {
|
matthiasm@0
|
334 // std::cerr << "f0 in VampYin: " << yo.f0 << std::endl;
|
matthiasm@0
|
335 if (yo.f0 > 0 && yo.f0 < m_fmax && yo.f0 > m_fmin) {
|
matthiasm@0
|
336 f.values.push_back(yo.f0);
|
matthiasm@0
|
337 fs[m_outNoF0].push_back(f);
|
matthiasm@0
|
338 }
|
matthiasm@0
|
339 } else if (m_outputUnvoiced == 1.0f)
|
matthiasm@0
|
340 {
|
matthiasm@0
|
341 if (abs(yo.f0) < m_fmax && abs(yo.f0) > m_fmin) {
|
matthiasm@0
|
342 f.values.push_back(abs(yo.f0));
|
matthiasm@0
|
343 fs[m_outNoF0].push_back(f);
|
matthiasm@0
|
344 }
|
matthiasm@0
|
345 } else
|
matthiasm@0
|
346 {
|
matthiasm@0
|
347 if (abs(yo.f0) < m_fmax && abs(yo.f0) > m_fmin) {
|
matthiasm@0
|
348 f.values.push_back(yo.f0);
|
matthiasm@0
|
349 fs[m_outNoF0].push_back(f);
|
matthiasm@0
|
350 }
|
matthiasm@0
|
351 }
|
matthiasm@0
|
352
|
matthiasm@0
|
353 f.values.clear();
|
matthiasm@0
|
354 f.values.push_back(yo.rms);
|
matthiasm@0
|
355 fs[m_outNoRms].push_back(f);
|
matthiasm@0
|
356
|
matthiasm@0
|
357 f.values.clear();
|
matthiasm@0
|
358 for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin)
|
matthiasm@0
|
359 {
|
matthiasm@0
|
360 f.values.push_back(yo.salience[iBin]);
|
matthiasm@0
|
361 }
|
matthiasm@0
|
362 fs[m_outNoSalience].push_back(f);
|
matthiasm@0
|
363
|
matthiasm@0
|
364 f.values.clear();
|
matthiasm@0
|
365 // f.values[0] = yo.periodicity;
|
matthiasm@0
|
366 f.values.push_back(yo.periodicity);
|
matthiasm@0
|
367 fs[m_outNoPeriodicity].push_back(f);
|
matthiasm@0
|
368
|
matthiasm@0
|
369 delete [] dInputBuffers;
|
matthiasm@0
|
370
|
matthiasm@0
|
371 return fs;
|
matthiasm@0
|
372 }
|
matthiasm@0
|
373
|
matthiasm@0
|
374 VampYin::FeatureSet
|
matthiasm@0
|
375 VampYin::getRemainingFeatures()
|
matthiasm@0
|
376 {
|
matthiasm@0
|
377 FeatureSet fs;
|
matthiasm@0
|
378 return fs;
|
matthiasm@0
|
379 }
|