matthiasm@0
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@9
|
2
|
matthiasm@0
|
3 /*
|
Chris@9
|
4 pYIN - A fundamental frequency estimator for monophonic audio
|
Chris@9
|
5 Centre for Digital Music, Queen Mary, University of London.
|
Chris@9
|
6
|
Chris@9
|
7 This program is free software; you can redistribute it and/or
|
Chris@9
|
8 modify it under the terms of the GNU General Public License as
|
Chris@9
|
9 published by the Free Software Foundation; either version 2 of the
|
Chris@9
|
10 License, or (at your option) any later version. See the file
|
Chris@9
|
11 COPYING included with this distribution for more information.
|
matthiasm@0
|
12 */
|
matthiasm@0
|
13
|
matthiasm@0
|
14 #include "PYIN.h"
|
matthiasm@0
|
15 #include "MonoNote.h"
|
matthiasm@0
|
16 #include "MonoPitch.h"
|
matthiasm@0
|
17
|
matthiasm@0
|
18 #include "vamp-sdk/FFT.h"
|
matthiasm@0
|
19
|
matthiasm@0
|
20 #include <vector>
|
matthiasm@0
|
21 #include <algorithm>
|
matthiasm@0
|
22
|
matthiasm@0
|
23 #include <cstdio>
|
matthiasm@0
|
24 #include <cmath>
|
matthiasm@0
|
25 #include <complex>
|
matthiasm@0
|
26
|
matthiasm@0
|
27 using std::string;
|
matthiasm@0
|
28 using std::vector;
|
matthiasm@0
|
29 using Vamp::RealTime;
|
matthiasm@0
|
30
|
matthiasm@0
|
31
|
matthiasm@0
|
32 PYIN::PYIN(float inputSampleRate) :
|
matthiasm@0
|
33 Plugin(inputSampleRate),
|
matthiasm@0
|
34 m_channels(0),
|
matthiasm@0
|
35 m_stepSize(256),
|
matthiasm@0
|
36 m_blockSize(2048),
|
matthiasm@0
|
37 m_fmin(40),
|
matthiasm@0
|
38 m_fmax(700),
|
matthiasm@0
|
39 m_yin(2048, inputSampleRate, 0.0),
|
matthiasm@0
|
40 m_oF0Candidates(0),
|
matthiasm@0
|
41 m_oF0Probs(0),
|
matthiasm@0
|
42 m_oVoicedProb(0),
|
matthiasm@0
|
43 m_oCandidateSalience(0),
|
matthiasm@0
|
44 m_oSmoothedPitchTrack(0),
|
matthiasm@0
|
45 m_oNotes(0),
|
matthiasm@0
|
46 m_threshDistr(2.0f),
|
matthiasm@6
|
47 m_outputUnvoiced(0.0f),
|
matthiasm@95
|
48 m_onsetSensitivity(0.0),
|
matthiasm@0
|
49 m_pitchProb(0),
|
matthiasm@91
|
50 m_timestamp(0),
|
matthiasm@91
|
51 m_level(0)
|
matthiasm@0
|
52 {
|
matthiasm@0
|
53 }
|
matthiasm@0
|
54
|
matthiasm@0
|
55 PYIN::~PYIN()
|
matthiasm@0
|
56 {
|
matthiasm@0
|
57 }
|
matthiasm@0
|
58
|
matthiasm@0
|
59 string
|
matthiasm@0
|
60 PYIN::getIdentifier() const
|
matthiasm@0
|
61 {
|
matthiasm@1
|
62 return "pyin";
|
matthiasm@0
|
63 }
|
matthiasm@0
|
64
|
matthiasm@0
|
65 string
|
matthiasm@0
|
66 PYIN::getName() const
|
matthiasm@0
|
67 {
|
matthiasm@1
|
68 return "pYin";
|
matthiasm@0
|
69 }
|
matthiasm@0
|
70
|
matthiasm@0
|
71 string
|
matthiasm@0
|
72 PYIN::getDescription() const
|
matthiasm@0
|
73 {
|
matthiasm@0
|
74 return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
|
matthiasm@0
|
75 }
|
matthiasm@0
|
76
|
matthiasm@0
|
77 string
|
matthiasm@0
|
78 PYIN::getMaker() const
|
matthiasm@0
|
79 {
|
matthiasm@0
|
80 return "Matthias Mauch";
|
matthiasm@0
|
81 }
|
matthiasm@0
|
82
|
matthiasm@0
|
83 int
|
matthiasm@0
|
84 PYIN::getPluginVersion() const
|
matthiasm@0
|
85 {
|
matthiasm@0
|
86 // Increment this each time you release a version that behaves
|
matthiasm@0
|
87 // differently from the previous one
|
matthiasm@0
|
88 return 1;
|
matthiasm@0
|
89 }
|
matthiasm@0
|
90
|
matthiasm@0
|
91 string
|
matthiasm@0
|
92 PYIN::getCopyright() const
|
matthiasm@0
|
93 {
|
matthiasm@0
|
94 return "GPL";
|
matthiasm@0
|
95 }
|
matthiasm@0
|
96
|
matthiasm@0
|
97 PYIN::InputDomain
|
matthiasm@0
|
98 PYIN::getInputDomain() const
|
matthiasm@0
|
99 {
|
matthiasm@0
|
100 return TimeDomain;
|
matthiasm@0
|
101 }
|
matthiasm@0
|
102
|
matthiasm@0
|
103 size_t
|
matthiasm@0
|
104 PYIN::getPreferredBlockSize() const
|
matthiasm@0
|
105 {
|
matthiasm@0
|
106 return 2048;
|
matthiasm@0
|
107 }
|
matthiasm@0
|
108
|
matthiasm@0
|
109 size_t
|
matthiasm@0
|
110 PYIN::getPreferredStepSize() const
|
matthiasm@0
|
111 {
|
matthiasm@0
|
112 return 256;
|
matthiasm@0
|
113 }
|
matthiasm@0
|
114
|
matthiasm@0
|
115 size_t
|
matthiasm@0
|
116 PYIN::getMinChannelCount() const
|
matthiasm@0
|
117 {
|
matthiasm@0
|
118 return 1;
|
matthiasm@0
|
119 }
|
matthiasm@0
|
120
|
matthiasm@0
|
121 size_t
|
matthiasm@0
|
122 PYIN::getMaxChannelCount() const
|
matthiasm@0
|
123 {
|
matthiasm@0
|
124 return 1;
|
matthiasm@0
|
125 }
|
matthiasm@0
|
126
|
matthiasm@0
|
127 PYIN::ParameterList
|
matthiasm@0
|
128 PYIN::getParameterDescriptors() const
|
matthiasm@0
|
129 {
|
matthiasm@0
|
130 ParameterList list;
|
matthiasm@0
|
131
|
matthiasm@0
|
132 ParameterDescriptor d;
|
matthiasm@0
|
133
|
matthiasm@0
|
134 d.identifier = "threshdistr";
|
matthiasm@0
|
135 d.name = "Yin threshold distribution";
|
matthiasm@0
|
136 d.description = ".";
|
matthiasm@0
|
137 d.unit = "";
|
matthiasm@0
|
138 d.minValue = 0.0f;
|
matthiasm@0
|
139 d.maxValue = 7.0f;
|
matthiasm@0
|
140 d.defaultValue = 2.0f;
|
matthiasm@0
|
141 d.isQuantized = true;
|
matthiasm@0
|
142 d.quantizeStep = 1.0f;
|
matthiasm@0
|
143 d.valueNames.push_back("Uniform");
|
matthiasm@0
|
144 d.valueNames.push_back("Beta (mean 0.10)");
|
matthiasm@0
|
145 d.valueNames.push_back("Beta (mean 0.15)");
|
matthiasm@0
|
146 d.valueNames.push_back("Beta (mean 0.20)");
|
matthiasm@0
|
147 d.valueNames.push_back("Beta (mean 0.30)");
|
matthiasm@0
|
148 d.valueNames.push_back("Single Value 0.10");
|
matthiasm@0
|
149 d.valueNames.push_back("Single Value 0.15");
|
matthiasm@0
|
150 d.valueNames.push_back("Single Value 0.20");
|
matthiasm@0
|
151 list.push_back(d);
|
matthiasm@0
|
152
|
matthiasm@0
|
153 d.identifier = "outputunvoiced";
|
matthiasm@0
|
154 d.valueNames.clear();
|
matthiasm@0
|
155 d.name = "Output estimates classified as unvoiced?";
|
matthiasm@0
|
156 d.description = ".";
|
matthiasm@0
|
157 d.unit = "";
|
matthiasm@0
|
158 d.minValue = 0.0f;
|
matthiasm@0
|
159 d.maxValue = 2.0f;
|
matthiasm@6
|
160 d.defaultValue = 0.0f;
|
matthiasm@0
|
161 d.isQuantized = true;
|
matthiasm@0
|
162 d.quantizeStep = 1.0f;
|
matthiasm@0
|
163 d.valueNames.push_back("No");
|
matthiasm@0
|
164 d.valueNames.push_back("Yes");
|
matthiasm@0
|
165 d.valueNames.push_back("Yes, as negative frequencies");
|
matthiasm@0
|
166 list.push_back(d);
|
matthiasm@0
|
167
|
matthiasm@95
|
168 d.identifier = "onsetsensitivity";
|
matthiasm@95
|
169 d.valueNames.clear();
|
matthiasm@95
|
170 d.name = "Onset sensitivity";
|
matthiasm@95
|
171 d.description = "Adds additional note onsets when RMS increases.";
|
matthiasm@95
|
172 d.unit = "";
|
matthiasm@95
|
173 d.minValue = 0.0f;
|
matthiasm@95
|
174 d.maxValue = 1.0f;
|
matthiasm@95
|
175 d.defaultValue = 0.0f;
|
matthiasm@95
|
176 d.isQuantized = false;
|
matthiasm@95
|
177 list.push_back(d);
|
matthiasm@95
|
178
|
matthiasm@0
|
179 return list;
|
matthiasm@0
|
180 }
|
matthiasm@0
|
181
|
matthiasm@0
|
182 float
|
matthiasm@0
|
183 PYIN::getParameter(string identifier) const
|
matthiasm@0
|
184 {
|
matthiasm@0
|
185 if (identifier == "threshdistr") {
|
matthiasm@0
|
186 return m_threshDistr;
|
matthiasm@0
|
187 }
|
matthiasm@0
|
188 if (identifier == "outputunvoiced") {
|
matthiasm@0
|
189 return m_outputUnvoiced;
|
matthiasm@0
|
190 }
|
matthiasm@95
|
191 if (identifier == "onsetsensitivity") {
|
matthiasm@95
|
192 return m_onsetSensitivity;
|
matthiasm@95
|
193 }
|
matthiasm@0
|
194 return 0.f;
|
matthiasm@0
|
195 }
|
matthiasm@0
|
196
|
matthiasm@0
|
197 void
|
matthiasm@0
|
198 PYIN::setParameter(string identifier, float value)
|
matthiasm@0
|
199 {
|
matthiasm@0
|
200 if (identifier == "threshdistr")
|
matthiasm@0
|
201 {
|
matthiasm@0
|
202 m_threshDistr = value;
|
matthiasm@0
|
203 }
|
matthiasm@0
|
204 if (identifier == "outputunvoiced")
|
matthiasm@0
|
205 {
|
matthiasm@0
|
206 m_outputUnvoiced = value;
|
matthiasm@0
|
207 }
|
matthiasm@95
|
208 if (identifier == "onsetsensitivity")
|
matthiasm@95
|
209 {
|
matthiasm@95
|
210 m_onsetSensitivity = value;
|
matthiasm@95
|
211 }
|
matthiasm@0
|
212 }
|
matthiasm@0
|
213
|
matthiasm@0
|
214 PYIN::ProgramList
|
matthiasm@0
|
215 PYIN::getPrograms() const
|
matthiasm@0
|
216 {
|
matthiasm@0
|
217 ProgramList list;
|
matthiasm@0
|
218 return list;
|
matthiasm@0
|
219 }
|
matthiasm@0
|
220
|
matthiasm@0
|
221 string
|
matthiasm@0
|
222 PYIN::getCurrentProgram() const
|
matthiasm@0
|
223 {
|
matthiasm@0
|
224 return ""; // no programs
|
matthiasm@0
|
225 }
|
matthiasm@0
|
226
|
matthiasm@0
|
227 void
|
matthiasm@0
|
228 PYIN::selectProgram(string name)
|
matthiasm@0
|
229 {
|
matthiasm@0
|
230 }
|
matthiasm@0
|
231
|
matthiasm@0
|
232 PYIN::OutputList
|
matthiasm@0
|
233 PYIN::getOutputDescriptors() const
|
matthiasm@0
|
234 {
|
matthiasm@0
|
235 OutputList outputs;
|
matthiasm@0
|
236
|
matthiasm@0
|
237 OutputDescriptor d;
|
matthiasm@0
|
238
|
matthiasm@0
|
239 int outputNumber = 0;
|
matthiasm@0
|
240
|
matthiasm@0
|
241 d.identifier = "f0candidates";
|
matthiasm@0
|
242 d.name = "F0 Candidates";
|
matthiasm@0
|
243 d.description = "Estimated fundamental frequency candidates.";
|
matthiasm@0
|
244 d.unit = "Hz";
|
matthiasm@0
|
245 d.hasFixedBinCount = false;
|
matthiasm@0
|
246 // d.binCount = 1;
|
matthiasm@0
|
247 d.hasKnownExtents = true;
|
matthiasm@0
|
248 d.minValue = m_fmin;
|
matthiasm@0
|
249 d.maxValue = 500;
|
matthiasm@0
|
250 d.isQuantized = false;
|
matthiasm@0
|
251 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
252 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
253 d.hasDuration = false;
|
matthiasm@0
|
254 outputs.push_back(d);
|
matthiasm@0
|
255 m_oF0Candidates = outputNumber++;
|
matthiasm@0
|
256
|
matthiasm@0
|
257 d.identifier = "f0probs";
|
matthiasm@0
|
258 d.name = "Candidate Probabilities";
|
matthiasm@0
|
259 d.description = "Probabilities of estimated fundamental frequency candidates.";
|
matthiasm@0
|
260 d.unit = "";
|
matthiasm@0
|
261 d.hasFixedBinCount = false;
|
matthiasm@0
|
262 // d.binCount = 1;
|
matthiasm@0
|
263 d.hasKnownExtents = true;
|
matthiasm@0
|
264 d.minValue = 0;
|
matthiasm@0
|
265 d.maxValue = 1;
|
matthiasm@0
|
266 d.isQuantized = false;
|
matthiasm@0
|
267 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
268 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
269 d.hasDuration = false;
|
matthiasm@0
|
270 outputs.push_back(d);
|
matthiasm@0
|
271 m_oF0Probs = outputNumber++;
|
matthiasm@0
|
272
|
matthiasm@0
|
273 d.identifier = "voicedprob";
|
matthiasm@0
|
274 d.name = "Voiced Probability";
|
matthiasm@0
|
275 d.description = "Probability that the signal is voiced according to Probabilistic Yin.";
|
matthiasm@0
|
276 d.unit = "";
|
matthiasm@0
|
277 d.hasFixedBinCount = true;
|
matthiasm@0
|
278 d.binCount = 1;
|
matthiasm@0
|
279 d.hasKnownExtents = true;
|
matthiasm@0
|
280 d.minValue = 0;
|
matthiasm@0
|
281 d.maxValue = 1;
|
matthiasm@0
|
282 d.isQuantized = false;
|
matthiasm@0
|
283 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
284 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
285 d.hasDuration = false;
|
matthiasm@0
|
286 outputs.push_back(d);
|
matthiasm@0
|
287 m_oVoicedProb = outputNumber++;
|
matthiasm@0
|
288
|
matthiasm@0
|
289 d.identifier = "candidatesalience";
|
matthiasm@0
|
290 d.name = "Candidate Salience";
|
matthiasm@0
|
291 d.description = "Candidate Salience";
|
matthiasm@0
|
292 d.hasFixedBinCount = true;
|
matthiasm@0
|
293 d.binCount = m_blockSize / 2;
|
matthiasm@0
|
294 d.hasKnownExtents = true;
|
matthiasm@0
|
295 d.minValue = 0;
|
matthiasm@0
|
296 d.maxValue = 1;
|
matthiasm@0
|
297 d.isQuantized = false;
|
matthiasm@0
|
298 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
299 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
300 d.hasDuration = false;
|
matthiasm@0
|
301 outputs.push_back(d);
|
matthiasm@0
|
302 m_oCandidateSalience = outputNumber++;
|
matthiasm@0
|
303
|
matthiasm@0
|
304 d.identifier = "smoothedpitchtrack";
|
matthiasm@0
|
305 d.name = "Smoothed Pitch Track";
|
matthiasm@0
|
306 d.description = ".";
|
matthiasm@0
|
307 d.unit = "Hz";
|
matthiasm@0
|
308 d.hasFixedBinCount = true;
|
matthiasm@0
|
309 d.binCount = 1;
|
matthiasm@0
|
310 d.hasKnownExtents = false;
|
matthiasm@0
|
311 // d.minValue = 0;
|
matthiasm@0
|
312 // d.maxValue = 1;
|
matthiasm@0
|
313 d.isQuantized = false;
|
matthiasm@0
|
314 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
315 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
316 d.hasDuration = false;
|
matthiasm@0
|
317 outputs.push_back(d);
|
matthiasm@0
|
318 m_oSmoothedPitchTrack = outputNumber++;
|
matthiasm@0
|
319
|
matthiasm@0
|
320 d.identifier = "notes";
|
matthiasm@0
|
321 d.name = "Notes";
|
matthiasm@0
|
322 d.description = "Derived fixed-pitch note frequencies";
|
matthiasm@0
|
323 // d.unit = "MIDI unit";
|
matthiasm@0
|
324 d.unit = "Hz";
|
matthiasm@0
|
325 d.hasFixedBinCount = true;
|
matthiasm@0
|
326 d.binCount = 1;
|
matthiasm@0
|
327 d.hasKnownExtents = false;
|
matthiasm@0
|
328 d.isQuantized = false;
|
matthiasm@0
|
329 d.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
330 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@0
|
331 d.hasDuration = true;
|
matthiasm@0
|
332 outputs.push_back(d);
|
matthiasm@0
|
333 m_oNotes = outputNumber++;
|
matthiasm@0
|
334
|
matthiasm@0
|
335 return outputs;
|
matthiasm@0
|
336 }
|
matthiasm@0
|
337
|
matthiasm@0
|
338 bool
|
matthiasm@0
|
339 PYIN::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
340 {
|
matthiasm@0
|
341 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
342 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
343
|
Chris@9
|
344 /*
|
matthiasm@0
|
345 std::cerr << "PYIN::initialise: channels = " << channels
|
matthiasm@0
|
346 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
|
matthiasm@0
|
347 << std::endl;
|
Chris@9
|
348 */
|
matthiasm@0
|
349 m_channels = channels;
|
matthiasm@0
|
350 m_stepSize = stepSize;
|
matthiasm@0
|
351 m_blockSize = blockSize;
|
matthiasm@0
|
352
|
matthiasm@0
|
353 reset();
|
matthiasm@0
|
354
|
matthiasm@0
|
355 return true;
|
matthiasm@0
|
356 }
|
matthiasm@0
|
357
|
matthiasm@0
|
358 void
|
matthiasm@0
|
359 PYIN::reset()
|
matthiasm@0
|
360 {
|
matthiasm@0
|
361 m_yin.setThresholdDistr(m_threshDistr);
|
matthiasm@0
|
362 m_yin.setFrameSize(m_blockSize);
|
matthiasm@0
|
363
|
matthiasm@0
|
364 m_pitchProb.clear();
|
matthiasm@0
|
365 m_timestamp.clear();
|
matthiasm@91
|
366 m_level.clear();
|
Chris@9
|
367 /*
|
matthiasm@0
|
368 std::cerr << "PYIN::reset"
|
matthiasm@0
|
369 << ", blockSize = " << m_blockSize
|
matthiasm@0
|
370 << std::endl;
|
Chris@9
|
371 */
|
matthiasm@0
|
372 }
|
matthiasm@0
|
373
|
matthiasm@0
|
374 PYIN::FeatureSet
|
matthiasm@0
|
375 PYIN::process(const float *const *inputBuffers, RealTime timestamp)
|
matthiasm@0
|
376 {
|
matthiasm@0
|
377 timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/4, lrintf(m_inputSampleRate));
|
matthiasm@0
|
378 FeatureSet fs;
|
matthiasm@0
|
379
|
matthiasm@0
|
380 double *dInputBuffers = new double[m_blockSize];
|
matthiasm@0
|
381 for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
|
matthiasm@0
|
382
|
matthiasm@0
|
383 Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers);
|
matthiasm@27
|
384 delete [] dInputBuffers;
|
matthiasm@27
|
385
|
matthiasm@91
|
386 m_level.push_back(yo.rms);
|
matthiasm@91
|
387
|
matthiasm@27
|
388 // First, get the things out of the way that we don't want to output
|
matthiasm@27
|
389 // immediately, but instead save for later.
|
matthiasm@27
|
390 vector<pair<double, double> > tempPitchProb;
|
matthiasm@27
|
391 for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate)
|
matthiasm@27
|
392 {
|
matthiasm@27
|
393 double tempPitch = 12 * std::log(yo.freqProb[iCandidate].first/440)/std::log(2.) + 69;
|
matthiasm@27
|
394 tempPitchProb.push_back(pair<double, double>
|
matthiasm@27
|
395 (tempPitch, yo.freqProb[iCandidate].second));
|
matthiasm@27
|
396 }
|
matthiasm@27
|
397 m_pitchProb.push_back(tempPitchProb);
|
matthiasm@27
|
398 m_timestamp.push_back(timestamp);
|
matthiasm@27
|
399
|
matthiasm@27
|
400 // F0 CANDIDATES
|
matthiasm@0
|
401 Feature f;
|
matthiasm@0
|
402 f.hasTimestamp = true;
|
matthiasm@0
|
403 f.timestamp = timestamp;
|
matthiasm@0
|
404 for (size_t i = 0; i < yo.freqProb.size(); ++i)
|
matthiasm@0
|
405 {
|
matthiasm@0
|
406 f.values.push_back(yo.freqProb[i].first);
|
matthiasm@0
|
407 }
|
matthiasm@0
|
408 fs[m_oF0Candidates].push_back(f);
|
matthiasm@0
|
409
|
matthiasm@27
|
410 // VOICEDPROB
|
matthiasm@0
|
411 f.values.clear();
|
matthiasm@0
|
412 float voicedProb = 0;
|
matthiasm@0
|
413 for (size_t i = 0; i < yo.freqProb.size(); ++i)
|
matthiasm@0
|
414 {
|
matthiasm@0
|
415 f.values.push_back(yo.freqProb[i].second);
|
matthiasm@0
|
416 voicedProb += yo.freqProb[i].second;
|
matthiasm@0
|
417 }
|
matthiasm@0
|
418 fs[m_oF0Probs].push_back(f);
|
matthiasm@0
|
419
|
matthiasm@0
|
420 f.values.clear();
|
matthiasm@0
|
421 f.values.push_back(voicedProb);
|
matthiasm@0
|
422 fs[m_oVoicedProb].push_back(f);
|
matthiasm@0
|
423
|
matthiasm@27
|
424 // SALIENCE -- maybe this should eventually disappear
|
matthiasm@0
|
425 f.values.clear();
|
matthiasm@0
|
426 float salienceSum = 0;
|
matthiasm@0
|
427 for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin)
|
matthiasm@0
|
428 {
|
matthiasm@0
|
429 f.values.push_back(yo.salience[iBin]);
|
matthiasm@0
|
430 salienceSum += yo.salience[iBin];
|
matthiasm@0
|
431 }
|
matthiasm@0
|
432 fs[m_oCandidateSalience].push_back(f);
|
matthiasm@0
|
433
|
matthiasm@0
|
434 return fs;
|
matthiasm@0
|
435 }
|
matthiasm@0
|
436
|
matthiasm@0
|
437 PYIN::FeatureSet
|
matthiasm@0
|
438 PYIN::getRemainingFeatures()
|
matthiasm@0
|
439 {
|
matthiasm@0
|
440 FeatureSet fs;
|
matthiasm@0
|
441 Feature f;
|
matthiasm@0
|
442 f.hasTimestamp = true;
|
matthiasm@0
|
443 f.hasDuration = false;
|
matthiasm@0
|
444
|
Chris@4
|
445 if (m_pitchProb.empty()) {
|
Chris@4
|
446 return fs;
|
Chris@4
|
447 }
|
Chris@4
|
448
|
matthiasm@0
|
449 // MONO-PITCH STUFF
|
matthiasm@0
|
450 MonoPitch mp;
|
matthiasm@0
|
451 vector<float> mpOut = mp.process(m_pitchProb);
|
matthiasm@0
|
452 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame)
|
matthiasm@0
|
453 {
|
matthiasm@0
|
454 if (mpOut[iFrame] < 0 && (m_outputUnvoiced==0)) continue;
|
matthiasm@0
|
455 f.timestamp = m_timestamp[iFrame];
|
matthiasm@0
|
456 f.values.clear();
|
matthiasm@0
|
457 if (m_outputUnvoiced == 1)
|
matthiasm@0
|
458 {
|
matthiasm@26
|
459 f.values.push_back(fabs(mpOut[iFrame]));
|
matthiasm@0
|
460 } else {
|
matthiasm@0
|
461 f.values.push_back(mpOut[iFrame]);
|
matthiasm@0
|
462 }
|
matthiasm@0
|
463
|
matthiasm@0
|
464 fs[m_oSmoothedPitchTrack].push_back(f);
|
matthiasm@0
|
465 }
|
matthiasm@0
|
466
|
matthiasm@1
|
467 // MONO-NOTE STUFF
|
matthiasm@1
|
468 MonoNote mn;
|
matthiasm@1
|
469 std::vector<std::vector<std::pair<double, double> > > smoothedPitch;
|
matthiasm@1
|
470 for (size_t iFrame = 0; iFrame < mpOut.size(); ++iFrame) {
|
matthiasm@1
|
471 std::vector<std::pair<double, double> > temp;
|
matthiasm@1
|
472 if (mpOut[iFrame] > 0)
|
matthiasm@1
|
473 {
|
matthiasm@1
|
474 double tempPitch = 12 * std::log(mpOut[iFrame]/440)/std::log(2.) + 69;
|
matthiasm@1
|
475 temp.push_back(std::pair<double,double>(tempPitch, .9));
|
matthiasm@1
|
476 }
|
matthiasm@1
|
477 smoothedPitch.push_back(temp);
|
matthiasm@1
|
478 }
|
matthiasm@0
|
479 // vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb);
|
matthiasm@1
|
480 vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
|
matthiasm@1
|
481
|
matthiasm@6
|
482 // turning feature into a note feature
|
matthiasm@1
|
483 f.hasTimestamp = true;
|
matthiasm@1
|
484 f.hasDuration = true;
|
matthiasm@1
|
485 f.values.clear();
|
matthiasm@6
|
486
|
matthiasm@6
|
487 int onsetFrame = 0;
|
matthiasm@6
|
488 bool isVoiced = 0;
|
matthiasm@6
|
489 bool oldIsVoiced = 0;
|
matthiasm@6
|
490 size_t nFrame = m_pitchProb.size();
|
matthiasm@1
|
491
|
matthiasm@6
|
492 std::vector<float> notePitchTrack; // collects pitches for one note at a time
|
matthiasm@6
|
493 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
matthiasm@1
|
494 {
|
matthiasm@91
|
495 isVoiced = mnOut[iFrame].noteState < 3
|
matthiasm@91
|
496 && smoothedPitch[iFrame].size() > 0
|
matthiasm@94
|
497 && (iFrame >= nFrame-2
|
matthiasm@95
|
498 || ((m_level[iFrame]/m_level[iFrame+2]) > m_onsetSensitivity));
|
matthiasm@91
|
499 // std::cerr << m_level[iFrame]/m_level[iFrame-1] << std::endl;
|
matthiasm@6
|
500 if (isVoiced && iFrame != nFrame-1)
|
matthiasm@1
|
501 {
|
matthiasm@6
|
502 if (oldIsVoiced == 0) // beginning of a note
|
matthiasm@1
|
503 {
|
matthiasm@6
|
504 onsetFrame = iFrame;
|
matthiasm@6
|
505 notePitchTrack.clear();
|
matthiasm@1
|
506 }
|
matthiasm@6
|
507 float pitch = smoothedPitch[iFrame][0].first;
|
matthiasm@6
|
508 notePitchTrack.push_back(pitch); // add to the note's pitch track
|
matthiasm@6
|
509 } else { // not currently voiced
|
matthiasm@95
|
510 if (oldIsVoiced == 1 && notePitchTrack.size() > 17) // end of note
|
matthiasm@6
|
511 {
|
matthiasm@1
|
512 std::sort(notePitchTrack.begin(), notePitchTrack.end());
|
matthiasm@6
|
513 float medianPitch = notePitchTrack[notePitchTrack.size()/2];
|
matthiasm@6
|
514 float medianFreq = std::pow(2,(medianPitch - 69) / 12) * 440;
|
matthiasm@6
|
515 f.values.clear();
|
matthiasm@6
|
516 f.values.push_back(medianFreq);
|
matthiasm@6
|
517 f.timestamp = m_timestamp[onsetFrame];
|
matthiasm@6
|
518 f.duration = m_timestamp[iFrame] - m_timestamp[onsetFrame];
|
matthiasm@5
|
519 fs[m_oNotes].push_back(f);
|
matthiasm@1
|
520 }
|
matthiasm@1
|
521 }
|
matthiasm@6
|
522 oldIsVoiced = isVoiced;
|
matthiasm@1
|
523 }
|
matthiasm@0
|
524 return fs;
|
matthiasm@0
|
525 }
|