matthiasm@32
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@32
|
2
|
matthiasm@32
|
3 /*
|
matthiasm@32
|
4 pYIN - A fundamental frequency estimator for monophonic audio
|
matthiasm@32
|
5 Centre for Digital Music, Queen Mary, University of London.
|
matthiasm@32
|
6
|
matthiasm@32
|
7 This program is free software; you can redistribute it and/or
|
matthiasm@32
|
8 modify it under the terms of the GNU General Public License as
|
matthiasm@32
|
9 published by the Free Software Foundation; either version 2 of the
|
matthiasm@32
|
10 License, or (at your option) any later version. See the file
|
matthiasm@32
|
11 COLocalCandidatePYING included with this distribution for more information.
|
matthiasm@32
|
12 */
|
matthiasm@32
|
13
|
matthiasm@32
|
14 #include "LocalCandidatePYIN.h"
|
matthiasm@32
|
15 #include "MonoPitch.h"
|
matthiasm@32
|
16 #include "YinUtil.h"
|
matthiasm@32
|
17
|
matthiasm@32
|
18 #include "vamp-sdk/FFT.h"
|
matthiasm@32
|
19
|
matthiasm@32
|
20 #include <vector>
|
matthiasm@32
|
21 #include <algorithm>
|
matthiasm@32
|
22
|
matthiasm@32
|
23 #include <cstdio>
|
matthiasm@32
|
24 #include <sstream>
|
matthiasm@32
|
25 // #include <iostream>
|
matthiasm@32
|
26 #include <cmath>
|
matthiasm@32
|
27 #include <complex>
|
Chris@39
|
28 #include <map>
|
matthiasm@32
|
29
|
matthiasm@46
|
30 #include <boost/math/distributions.hpp>
|
matthiasm@46
|
31
|
matthiasm@32
|
32 using std::string;
|
matthiasm@32
|
33 using std::vector;
|
Chris@39
|
34 using std::map;
|
matthiasm@32
|
35 using Vamp::RealTime;
|
matthiasm@32
|
36
|
matthiasm@32
|
37
|
matthiasm@32
|
38 LocalCandidatePYIN::LocalCandidatePYIN(float inputSampleRate) :
|
matthiasm@32
|
39 Plugin(inputSampleRate),
|
matthiasm@32
|
40 m_channels(0),
|
matthiasm@32
|
41 m_stepSize(256),
|
matthiasm@32
|
42 m_blockSize(2048),
|
matthiasm@32
|
43 m_fmin(40),
|
matthiasm@32
|
44 m_fmax(700),
|
matthiasm@32
|
45 m_oPitchTrackCandidates(0),
|
matthiasm@32
|
46 m_threshDistr(2.0f),
|
matthiasm@32
|
47 m_outputUnvoiced(0.0f),
|
matthiasm@70
|
48 m_preciseTime(0.0f),
|
matthiasm@32
|
49 m_pitchProb(0),
|
matthiasm@32
|
50 m_timestamp(0),
|
matthiasm@48
|
51 m_nCandidate(13)
|
matthiasm@32
|
52 {
|
matthiasm@32
|
53 }
|
matthiasm@32
|
54
|
matthiasm@32
|
55 LocalCandidatePYIN::~LocalCandidatePYIN()
|
matthiasm@32
|
56 {
|
matthiasm@32
|
57 }
|
matthiasm@32
|
58
|
matthiasm@32
|
59 string
|
matthiasm@32
|
60 LocalCandidatePYIN::getIdentifier() const
|
matthiasm@32
|
61 {
|
matthiasm@32
|
62 return "localcandidatepyin";
|
matthiasm@32
|
63 }
|
matthiasm@32
|
64
|
matthiasm@32
|
65 string
|
matthiasm@32
|
66 LocalCandidatePYIN::getName() const
|
matthiasm@32
|
67 {
|
matthiasm@32
|
68 return "Local Candidate PYIN";
|
matthiasm@32
|
69 }
|
matthiasm@32
|
70
|
matthiasm@32
|
71 string
|
matthiasm@32
|
72 LocalCandidatePYIN::getDescription() const
|
matthiasm@32
|
73 {
|
matthiasm@32
|
74 return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
|
matthiasm@32
|
75 }
|
matthiasm@32
|
76
|
matthiasm@32
|
77 string
|
matthiasm@32
|
78 LocalCandidatePYIN::getMaker() const
|
matthiasm@32
|
79 {
|
matthiasm@32
|
80 return "Matthias Mauch";
|
matthiasm@32
|
81 }
|
matthiasm@32
|
82
|
matthiasm@32
|
83 int
|
matthiasm@32
|
84 LocalCandidatePYIN::getPluginVersion() const
|
matthiasm@32
|
85 {
|
matthiasm@32
|
86 // Increment this each time you release a version that behaves
|
matthiasm@32
|
87 // differently from the previous one
|
Chris@125
|
88 return 2;
|
matthiasm@32
|
89 }
|
matthiasm@32
|
90
|
matthiasm@32
|
91 string
|
matthiasm@32
|
92 LocalCandidatePYIN::getCopyright() const
|
matthiasm@32
|
93 {
|
matthiasm@32
|
94 return "GPL";
|
matthiasm@32
|
95 }
|
matthiasm@32
|
96
|
matthiasm@32
|
97 LocalCandidatePYIN::InputDomain
|
matthiasm@32
|
98 LocalCandidatePYIN::getInputDomain() const
|
matthiasm@32
|
99 {
|
matthiasm@32
|
100 return TimeDomain;
|
matthiasm@32
|
101 }
|
matthiasm@32
|
102
|
matthiasm@32
|
103 size_t
|
matthiasm@32
|
104 LocalCandidatePYIN::getPreferredBlockSize() const
|
matthiasm@32
|
105 {
|
matthiasm@32
|
106 return 2048;
|
matthiasm@32
|
107 }
|
matthiasm@32
|
108
|
matthiasm@32
|
109 size_t
|
matthiasm@32
|
110 LocalCandidatePYIN::getPreferredStepSize() const
|
matthiasm@32
|
111 {
|
matthiasm@32
|
112 return 256;
|
matthiasm@32
|
113 }
|
matthiasm@32
|
114
|
matthiasm@32
|
115 size_t
|
matthiasm@32
|
116 LocalCandidatePYIN::getMinChannelCount() const
|
matthiasm@32
|
117 {
|
matthiasm@32
|
118 return 1;
|
matthiasm@32
|
119 }
|
matthiasm@32
|
120
|
matthiasm@32
|
121 size_t
|
matthiasm@32
|
122 LocalCandidatePYIN::getMaxChannelCount() const
|
matthiasm@32
|
123 {
|
matthiasm@32
|
124 return 1;
|
matthiasm@32
|
125 }
|
matthiasm@32
|
126
|
matthiasm@32
|
127 LocalCandidatePYIN::ParameterList
|
matthiasm@32
|
128 LocalCandidatePYIN::getParameterDescriptors() const
|
matthiasm@32
|
129 {
|
matthiasm@32
|
130 ParameterList list;
|
matthiasm@32
|
131
|
matthiasm@32
|
132 ParameterDescriptor d;
|
matthiasm@32
|
133
|
matthiasm@32
|
134 d.identifier = "threshdistr";
|
matthiasm@32
|
135 d.name = "Yin threshold distribution";
|
matthiasm@32
|
136 d.description = ".";
|
matthiasm@32
|
137 d.unit = "";
|
matthiasm@32
|
138 d.minValue = 0.0f;
|
matthiasm@32
|
139 d.maxValue = 7.0f;
|
matthiasm@32
|
140 d.defaultValue = 2.0f;
|
matthiasm@32
|
141 d.isQuantized = true;
|
matthiasm@32
|
142 d.quantizeStep = 1.0f;
|
matthiasm@32
|
143 d.valueNames.push_back("Uniform");
|
matthiasm@32
|
144 d.valueNames.push_back("Beta (mean 0.10)");
|
matthiasm@32
|
145 d.valueNames.push_back("Beta (mean 0.15)");
|
matthiasm@32
|
146 d.valueNames.push_back("Beta (mean 0.20)");
|
matthiasm@32
|
147 d.valueNames.push_back("Beta (mean 0.30)");
|
matthiasm@32
|
148 d.valueNames.push_back("Single Value 0.10");
|
matthiasm@32
|
149 d.valueNames.push_back("Single Value 0.15");
|
matthiasm@32
|
150 d.valueNames.push_back("Single Value 0.20");
|
matthiasm@32
|
151 list.push_back(d);
|
matthiasm@32
|
152
|
matthiasm@32
|
153 d.identifier = "outputunvoiced";
|
matthiasm@32
|
154 d.valueNames.clear();
|
matthiasm@32
|
155 d.name = "Output estimates classified as unvoiced?";
|
matthiasm@32
|
156 d.description = ".";
|
matthiasm@32
|
157 d.unit = "";
|
matthiasm@32
|
158 d.minValue = 0.0f;
|
matthiasm@32
|
159 d.maxValue = 2.0f;
|
matthiasm@32
|
160 d.defaultValue = 0.0f;
|
matthiasm@32
|
161 d.isQuantized = true;
|
matthiasm@32
|
162 d.quantizeStep = 1.0f;
|
matthiasm@32
|
163 d.valueNames.push_back("No");
|
matthiasm@32
|
164 d.valueNames.push_back("Yes");
|
matthiasm@32
|
165 d.valueNames.push_back("Yes, as negative frequencies");
|
matthiasm@32
|
166 list.push_back(d);
|
matthiasm@32
|
167
|
matthiasm@70
|
168 d.identifier = "precisetime";
|
matthiasm@70
|
169 d.valueNames.clear();
|
matthiasm@70
|
170 d.name = "Use non-standard precise YIN timing (slow).";
|
matthiasm@70
|
171 d.description = ".";
|
matthiasm@70
|
172 d.unit = "";
|
matthiasm@70
|
173 d.minValue = 0.0f;
|
matthiasm@70
|
174 d.maxValue = 1.0f;
|
matthiasm@70
|
175 d.defaultValue = 0.0f;
|
matthiasm@70
|
176 d.isQuantized = true;
|
matthiasm@70
|
177 d.quantizeStep = 1.0f;
|
matthiasm@70
|
178 list.push_back(d);
|
matthiasm@70
|
179
|
matthiasm@32
|
180 return list;
|
matthiasm@32
|
181 }
|
matthiasm@32
|
182
|
matthiasm@32
|
183 float
|
matthiasm@32
|
184 LocalCandidatePYIN::getParameter(string identifier) const
|
matthiasm@32
|
185 {
|
matthiasm@32
|
186 if (identifier == "threshdistr") {
|
matthiasm@32
|
187 return m_threshDistr;
|
matthiasm@32
|
188 }
|
matthiasm@32
|
189 if (identifier == "outputunvoiced") {
|
matthiasm@32
|
190 return m_outputUnvoiced;
|
matthiasm@32
|
191 }
|
matthiasm@70
|
192 if (identifier == "precisetime") {
|
matthiasm@70
|
193 return m_preciseTime;
|
matthiasm@70
|
194 }
|
matthiasm@32
|
195 return 0.f;
|
matthiasm@32
|
196 }
|
matthiasm@32
|
197
|
matthiasm@32
|
198 void
|
matthiasm@32
|
199 LocalCandidatePYIN::setParameter(string identifier, float value)
|
matthiasm@32
|
200 {
|
matthiasm@32
|
201 if (identifier == "threshdistr")
|
matthiasm@32
|
202 {
|
matthiasm@32
|
203 m_threshDistr = value;
|
matthiasm@32
|
204 }
|
matthiasm@32
|
205 if (identifier == "outputunvoiced")
|
matthiasm@32
|
206 {
|
matthiasm@32
|
207 m_outputUnvoiced = value;
|
matthiasm@32
|
208 }
|
matthiasm@70
|
209 if (identifier == "precisetime")
|
matthiasm@70
|
210 {
|
matthiasm@70
|
211 m_preciseTime = value;
|
matthiasm@70
|
212 }
|
matthiasm@32
|
213 }
|
matthiasm@32
|
214
|
matthiasm@32
|
215 LocalCandidatePYIN::ProgramList
|
matthiasm@32
|
216 LocalCandidatePYIN::getPrograms() const
|
matthiasm@32
|
217 {
|
matthiasm@32
|
218 ProgramList list;
|
matthiasm@32
|
219 return list;
|
matthiasm@32
|
220 }
|
matthiasm@32
|
221
|
matthiasm@32
|
222 string
|
matthiasm@32
|
223 LocalCandidatePYIN::getCurrentProgram() const
|
matthiasm@32
|
224 {
|
matthiasm@32
|
225 return ""; // no programs
|
matthiasm@32
|
226 }
|
matthiasm@32
|
227
|
matthiasm@32
|
228 void
|
matthiasm@32
|
229 LocalCandidatePYIN::selectProgram(string name)
|
matthiasm@32
|
230 {
|
matthiasm@32
|
231 }
|
matthiasm@32
|
232
|
matthiasm@32
|
233 LocalCandidatePYIN::OutputList
|
matthiasm@32
|
234 LocalCandidatePYIN::getOutputDescriptors() const
|
matthiasm@32
|
235 {
|
matthiasm@32
|
236 OutputList outputs;
|
matthiasm@32
|
237
|
matthiasm@32
|
238 OutputDescriptor d;
|
matthiasm@32
|
239
|
matthiasm@32
|
240 d.identifier = "pitchtrackcandidates";
|
matthiasm@32
|
241 d.name = "Pitch track candidates";
|
matthiasm@32
|
242 d.description = "Multiple candidate pitch tracks.";
|
matthiasm@32
|
243 d.unit = "Hz";
|
matthiasm@32
|
244 d.hasFixedBinCount = false;
|
matthiasm@32
|
245 d.hasKnownExtents = true;
|
matthiasm@32
|
246 d.minValue = m_fmin;
|
Chris@39
|
247 d.maxValue = 500; //!!!???
|
matthiasm@32
|
248 d.isQuantized = false;
|
matthiasm@32
|
249 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@32
|
250 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@32
|
251 d.hasDuration = false;
|
matthiasm@32
|
252 outputs.push_back(d);
|
matthiasm@32
|
253
|
matthiasm@32
|
254 return outputs;
|
matthiasm@32
|
255 }
|
matthiasm@32
|
256
|
matthiasm@32
|
257 bool
|
matthiasm@32
|
258 LocalCandidatePYIN::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@32
|
259 {
|
matthiasm@32
|
260 if (channels < getMinChannelCount() ||
|
matthiasm@32
|
261 channels > getMaxChannelCount()) return false;
|
matthiasm@32
|
262
|
matthiasm@32
|
263 /*
|
matthiasm@32
|
264 std::cerr << "LocalCandidatePYIN::initialise: channels = " << channels
|
matthiasm@32
|
265 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
|
matthiasm@32
|
266 << std::endl;
|
matthiasm@32
|
267 */
|
matthiasm@32
|
268 m_channels = channels;
|
matthiasm@32
|
269 m_stepSize = stepSize;
|
matthiasm@32
|
270 m_blockSize = blockSize;
|
matthiasm@32
|
271
|
matthiasm@32
|
272 reset();
|
matthiasm@32
|
273
|
matthiasm@32
|
274 return true;
|
matthiasm@32
|
275 }
|
matthiasm@32
|
276
|
matthiasm@32
|
277 void
|
matthiasm@32
|
278 LocalCandidatePYIN::reset()
|
matthiasm@32
|
279 {
|
matthiasm@32
|
280 m_pitchProb.clear();
|
matthiasm@32
|
281 m_timestamp.clear();
|
matthiasm@32
|
282 /*
|
matthiasm@32
|
283 std::cerr << "LocalCandidatePYIN::reset"
|
matthiasm@32
|
284 << ", blockSize = " << m_blockSize
|
matthiasm@32
|
285 << std::endl;
|
matthiasm@32
|
286 */
|
matthiasm@32
|
287 }
|
matthiasm@32
|
288
|
matthiasm@32
|
289 LocalCandidatePYIN::FeatureSet
|
matthiasm@32
|
290 LocalCandidatePYIN::process(const float *const *inputBuffers, RealTime timestamp)
|
matthiasm@32
|
291 {
|
matthiasm@77
|
292 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
|
matthiasm@77
|
293 timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate));
|
matthiasm@32
|
294
|
matthiasm@32
|
295 double *dInputBuffers = new double[m_blockSize];
|
matthiasm@32
|
296 for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
|
matthiasm@32
|
297
|
matthiasm@32
|
298 size_t yinBufferSize = m_blockSize/2;
|
matthiasm@32
|
299 double* yinBuffer = new double[yinBufferSize];
|
matthiasm@70
|
300 if (!m_preciseTime) YinUtil::fastDifference(dInputBuffers, yinBuffer, yinBufferSize);
|
matthiasm@70
|
301 else YinUtil::slowDifference(dInputBuffers, yinBuffer, yinBufferSize);
|
matthiasm@32
|
302
|
matthiasm@32
|
303 delete [] dInputBuffers;
|
matthiasm@32
|
304
|
matthiasm@32
|
305 YinUtil::cumulativeDifference(yinBuffer, yinBufferSize);
|
matthiasm@32
|
306
|
matthiasm@46
|
307 float minFrequency = 60;
|
matthiasm@46
|
308 float maxFrequency = 900;
|
matthiasm@46
|
309 vector<double> peakProbability = YinUtil::yinProb(yinBuffer,
|
matthiasm@46
|
310 m_threshDistr,
|
matthiasm@46
|
311 yinBufferSize,
|
matthiasm@46
|
312 m_inputSampleRate/maxFrequency,
|
matthiasm@46
|
313 m_inputSampleRate/minFrequency);
|
matthiasm@46
|
314
|
matthiasm@46
|
315 vector<pair<double, double> > tempPitchProb;
|
matthiasm@46
|
316 for (size_t iBuf = 0; iBuf < yinBufferSize; ++iBuf)
|
matthiasm@32
|
317 {
|
matthiasm@46
|
318 if (peakProbability[iBuf] > 0)
|
matthiasm@32
|
319 {
|
matthiasm@46
|
320 double currentF0 =
|
matthiasm@46
|
321 m_inputSampleRate * (1.0 /
|
matthiasm@46
|
322 YinUtil::parabolicInterpolation(yinBuffer, iBuf, yinBufferSize));
|
matthiasm@46
|
323 double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69;
|
matthiasm@46
|
324 tempPitchProb.push_back(pair<double, double>(tempPitch, peakProbability[iBuf]));
|
matthiasm@32
|
325 }
|
matthiasm@32
|
326 }
|
matthiasm@46
|
327 m_pitchProb.push_back(tempPitchProb);
|
matthiasm@32
|
328 m_timestamp.push_back(timestamp);
|
matthiasm@32
|
329
|
matthiasm@76
|
330 delete[] yinBuffer;
|
matthiasm@76
|
331
|
Chris@39
|
332 return FeatureSet();
|
matthiasm@32
|
333 }
|
matthiasm@32
|
334
|
matthiasm@32
|
335 LocalCandidatePYIN::FeatureSet
|
matthiasm@32
|
336 LocalCandidatePYIN::getRemainingFeatures()
|
matthiasm@32
|
337 {
|
Chris@39
|
338 // timestamp -> candidate number -> value
|
Chris@39
|
339 map<RealTime, map<int, float> > featureValues;
|
matthiasm@32
|
340
|
matthiasm@37
|
341 // std::cerr << "in remaining features" << std::endl;
|
matthiasm@32
|
342
|
matthiasm@32
|
343 if (m_pitchProb.empty()) {
|
Chris@39
|
344 return FeatureSet();
|
matthiasm@32
|
345 }
|
matthiasm@32
|
346
|
matthiasm@32
|
347 // MONO-PITCH STUFF
|
matthiasm@32
|
348 MonoPitch mp;
|
matthiasm@32
|
349 size_t nFrame = m_timestamp.size();
|
matthiasm@32
|
350 vector<vector<float> > pitchTracks;
|
matthiasm@32
|
351 vector<float> freqSum = vector<float>(m_nCandidate);
|
matthiasm@32
|
352 vector<float> freqNumber = vector<float>(m_nCandidate);
|
matthiasm@32
|
353 vector<float> freqMean = vector<float>(m_nCandidate);
|
matthiasm@44
|
354
|
matthiasm@46
|
355 boost::math::normal normalDist(0, 8); // semitones sd
|
matthiasm@46
|
356 float maxNormalDist = boost::math::pdf(normalDist, 0);
|
matthiasm@46
|
357
|
matthiasm@110
|
358 // Viterbi-decode multiple times with different frequencies emphasised
|
matthiasm@32
|
359 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
|
matthiasm@32
|
360 {
|
matthiasm@32
|
361 pitchTracks.push_back(vector<float>(nFrame));
|
matthiasm@46
|
362 vector<vector<pair<double,double> > > tempPitchProb;
|
matthiasm@46
|
363 float centrePitch = 45 + 3 * iCandidate;
|
matthiasm@109
|
364
|
matthiasm@46
|
365 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) {
|
matthiasm@60
|
366 tempPitchProb.push_back(vector<pair<double,double> >());
|
matthiasm@46
|
367 float sumProb = 0;
|
matthiasm@46
|
368 float pitch = 0;
|
matthiasm@46
|
369 float prob = 0;
|
matthiasm@109
|
370 for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb)
|
matthiasm@109
|
371 {
|
matthiasm@109
|
372 pitch = m_pitchProb[iFrame][iProb].first;
|
matthiasm@109
|
373 prob = m_pitchProb[iFrame][iProb].second *
|
matthiasm@109
|
374 boost::math::pdf(normalDist, pitch-centrePitch) /
|
matthiasm@109
|
375 maxNormalDist * 2;
|
matthiasm@46
|
376 sumProb += prob;
|
matthiasm@109
|
377 tempPitchProb[iFrame].push_back(
|
matthiasm@109
|
378 pair<double,double>(pitch,prob));
|
matthiasm@46
|
379 }
|
matthiasm@109
|
380 for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb)
|
matthiasm@109
|
381 {
|
matthiasm@46
|
382 tempPitchProb[iFrame][iProb].second /= sumProb;
|
matthiasm@46
|
383 }
|
matthiasm@46
|
384 }
|
matthiasm@109
|
385
|
matthiasm@46
|
386 vector<float> mpOut = mp.process(tempPitchProb);
|
matthiasm@44
|
387 float prevFreq = 0;
|
matthiasm@32
|
388 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
matthiasm@32
|
389 {
|
matthiasm@32
|
390 if (mpOut[iFrame] > 0) {
|
matthiasm@109
|
391
|
matthiasm@32
|
392 pitchTracks[iCandidate][iFrame] = mpOut[iFrame];
|
matthiasm@32
|
393 freqSum[iCandidate] += mpOut[iFrame];
|
matthiasm@32
|
394 freqNumber[iCandidate]++;
|
matthiasm@44
|
395 prevFreq = mpOut[iFrame];
|
matthiasm@109
|
396
|
matthiasm@32
|
397 }
|
matthiasm@32
|
398 }
|
matthiasm@32
|
399 freqMean[iCandidate] = freqSum[iCandidate]*1.0/freqNumber[iCandidate];
|
matthiasm@32
|
400 }
|
matthiasm@32
|
401
|
matthiasm@37
|
402 // find near duplicate pitch tracks
|
matthiasm@34
|
403 vector<size_t> duplicates;
|
matthiasm@34
|
404 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) {
|
matthiasm@34
|
405 for (size_t jCandidate = iCandidate+1; jCandidate < m_nCandidate; ++jCandidate) {
|
matthiasm@34
|
406 size_t countEqual = 0;
|
matthiasm@34
|
407 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
matthiasm@34
|
408 {
|
matthiasm@46
|
409 if ((pitchTracks[jCandidate][iFrame] == 0 && pitchTracks[iCandidate][iFrame] == 0) ||
|
matthiasm@46
|
410 fabs(pitchTracks[iCandidate][iFrame]/pitchTracks[jCandidate][iFrame]-1)<0.01)
|
matthiasm@34
|
411 countEqual++;
|
matthiasm@34
|
412 }
|
matthiasm@46
|
413 // std::cerr << "proportion equal: " << (countEqual * 1.0 / nFrame) << std::endl;
|
matthiasm@34
|
414 if (countEqual * 1.0 / nFrame > 0.8) {
|
matthiasm@34
|
415 if (freqNumber[iCandidate] > freqNumber[jCandidate]) {
|
matthiasm@34
|
416 duplicates.push_back(jCandidate);
|
matthiasm@46
|
417 } else if (iCandidate < jCandidate) {
|
matthiasm@34
|
418 duplicates.push_back(iCandidate);
|
matthiasm@34
|
419 }
|
matthiasm@34
|
420 }
|
matthiasm@34
|
421 }
|
matthiasm@34
|
422 }
|
matthiasm@34
|
423
|
matthiasm@37
|
424 // now find non-duplicate pitch tracks
|
Chris@39
|
425 map<int, int> candidateActuals;
|
Chris@39
|
426 map<int, std::string> candidateLabels;
|
Chris@39
|
427
|
matthiasm@46
|
428 vector<vector<float> > outputFrequencies;
|
matthiasm@60
|
429 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) outputFrequencies.push_back(vector<float>());
|
matthiasm@46
|
430
|
matthiasm@32
|
431 int actualCandidateNumber = 0;
|
matthiasm@110
|
432 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
|
matthiasm@110
|
433 {
|
matthiasm@34
|
434 bool isDuplicate = false;
|
matthiasm@34
|
435 for (size_t i = 0; i < duplicates.size(); ++i) {
|
matthiasm@110
|
436
|
matthiasm@34
|
437 if (duplicates[i] == iCandidate) {
|
matthiasm@34
|
438 isDuplicate = true;
|
matthiasm@34
|
439 break;
|
matthiasm@34
|
440 }
|
matthiasm@34
|
441 }
|
matthiasm@46
|
442 if (!isDuplicate && freqNumber[iCandidate] > 0.5*nFrame)
|
matthiasm@32
|
443 {
|
matthiasm@32
|
444 std::ostringstream convert;
|
matthiasm@32
|
445 convert << actualCandidateNumber++;
|
Chris@39
|
446 candidateLabels[iCandidate] = convert.str();
|
Chris@39
|
447 candidateActuals[iCandidate] = actualCandidateNumber;
|
matthiasm@46
|
448 // std::cerr << iCandidate << " " << actualCandidateNumber << " " << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl;
|
matthiasm@32
|
449 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
matthiasm@32
|
450 {
|
matthiasm@32
|
451 if (pitchTracks[iCandidate][iFrame] > 0)
|
matthiasm@32
|
452 {
|
matthiasm@46
|
453 // featureValues[m_timestamp[iFrame]][iCandidate] =
|
matthiasm@46
|
454 // pitchTracks[iCandidate][iFrame];
|
matthiasm@46
|
455 outputFrequencies[iFrame].push_back(pitchTracks[iCandidate][iFrame]);
|
matthiasm@60
|
456 } else {
|
matthiasm@60
|
457 outputFrequencies[iFrame].push_back(0);
|
matthiasm@32
|
458 }
|
matthiasm@32
|
459 }
|
matthiasm@32
|
460 }
|
matthiasm@43
|
461 // fs[m_oPitchTrackCandidates].push_back(f);
|
matthiasm@32
|
462 }
|
matthiasm@32
|
463
|
Chris@39
|
464 // adapt our features so as to return a stack of candidate values
|
Chris@39
|
465 // per frame
|
Chris@39
|
466
|
Chris@39
|
467 FeatureSet fs;
|
Chris@39
|
468
|
matthiasm@46
|
469 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame){
|
Chris@39
|
470 Feature f;
|
Chris@39
|
471 f.hasTimestamp = true;
|
matthiasm@46
|
472 f.timestamp = m_timestamp[iFrame];
|
matthiasm@46
|
473 f.values = outputFrequencies[iFrame];
|
Chris@39
|
474 fs[0].push_back(f);
|
Chris@39
|
475 }
|
matthiasm@46
|
476
|
matthiasm@46
|
477 // I stopped using Chris's map stuff below because I couldn't get my head around it
|
matthiasm@46
|
478 //
|
matthiasm@46
|
479 // for (map<RealTime, map<int, float> >::const_iterator i =
|
matthiasm@46
|
480 // featureValues.begin(); i != featureValues.end(); ++i) {
|
matthiasm@46
|
481 // Feature f;
|
matthiasm@46
|
482 // f.hasTimestamp = true;
|
matthiasm@46
|
483 // f.timestamp = i->first;
|
matthiasm@46
|
484 // int nextCandidate = candidateActuals.begin()->second;
|
matthiasm@46
|
485 // for (map<int, float>::const_iterator j =
|
matthiasm@46
|
486 // i->second.begin(); j != i->second.end(); ++j) {
|
matthiasm@46
|
487 // while (candidateActuals[j->first] > nextCandidate) {
|
matthiasm@46
|
488 // f.values.push_back(0);
|
matthiasm@46
|
489 // ++nextCandidate;
|
matthiasm@46
|
490 // }
|
matthiasm@46
|
491 // f.values.push_back(j->second);
|
matthiasm@46
|
492 // nextCandidate = j->first + 1;
|
matthiasm@46
|
493 // }
|
matthiasm@46
|
494 // //!!! can't use labels?
|
matthiasm@46
|
495 // fs[0].push_back(f);
|
matthiasm@46
|
496 // }
|
matthiasm@32
|
497
|
matthiasm@32
|
498 return fs;
|
matthiasm@32
|
499 }
|