matthiasm@32
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@32
|
2
|
matthiasm@32
|
3 /*
|
matthiasm@32
|
4 pYIN - A fundamental frequency estimator for monophonic audio
|
matthiasm@32
|
5 Centre for Digital Music, Queen Mary, University of London.
|
matthiasm@32
|
6
|
matthiasm@32
|
7 This program is free software; you can redistribute it and/or
|
matthiasm@32
|
8 modify it under the terms of the GNU General Public License as
|
matthiasm@32
|
9 published by the Free Software Foundation; either version 2 of the
|
matthiasm@32
|
10 License, or (at your option) any later version. See the file
|
matthiasm@32
|
11 COLocalCandidatePYING included with this distribution for more information.
|
matthiasm@32
|
12 */
|
matthiasm@32
|
13
|
matthiasm@32
|
14 #include "LocalCandidatePYIN.h"
|
matthiasm@32
|
15 #include "MonoPitch.h"
|
matthiasm@32
|
16 #include "YinUtil.h"
|
matthiasm@32
|
17
|
matthiasm@32
|
18 #include "vamp-sdk/FFT.h"
|
matthiasm@32
|
19
|
matthiasm@32
|
20 #include <vector>
|
matthiasm@32
|
21 #include <algorithm>
|
matthiasm@32
|
22
|
matthiasm@32
|
23 #include <cstdio>
|
matthiasm@32
|
24 #include <sstream>
|
matthiasm@32
|
25 // #include <iostream>
|
matthiasm@32
|
26 #include <cmath>
|
matthiasm@32
|
27 #include <complex>
|
Chris@39
|
28 #include <map>
|
matthiasm@32
|
29
|
matthiasm@46
|
30 #include <boost/math/distributions.hpp>
|
matthiasm@46
|
31
|
matthiasm@32
|
32 using std::string;
|
matthiasm@32
|
33 using std::vector;
|
Chris@39
|
34 using std::map;
|
matthiasm@32
|
35 using Vamp::RealTime;
|
matthiasm@32
|
36
|
matthiasm@32
|
37
|
matthiasm@32
|
38 LocalCandidatePYIN::LocalCandidatePYIN(float inputSampleRate) :
|
matthiasm@32
|
39 Plugin(inputSampleRate),
|
matthiasm@32
|
40 m_channels(0),
|
matthiasm@32
|
41 m_stepSize(256),
|
matthiasm@32
|
42 m_blockSize(2048),
|
matthiasm@32
|
43 m_fmin(40),
|
matthiasm@32
|
44 m_fmax(700),
|
matthiasm@32
|
45 m_yin(2048, inputSampleRate, 0.0),
|
matthiasm@32
|
46 m_oPitchTrackCandidates(0),
|
matthiasm@32
|
47 m_threshDistr(2.0f),
|
matthiasm@32
|
48 m_outputUnvoiced(0.0f),
|
matthiasm@32
|
49 m_pitchProb(0),
|
matthiasm@32
|
50 m_timestamp(0),
|
matthiasm@48
|
51 m_nCandidate(13)
|
matthiasm@32
|
52 {
|
matthiasm@32
|
53 }
|
matthiasm@32
|
54
|
matthiasm@32
|
55 LocalCandidatePYIN::~LocalCandidatePYIN()
|
matthiasm@32
|
56 {
|
matthiasm@32
|
57 }
|
matthiasm@32
|
58
|
matthiasm@32
|
59 string
|
matthiasm@32
|
60 LocalCandidatePYIN::getIdentifier() const
|
matthiasm@32
|
61 {
|
matthiasm@32
|
62 return "localcandidatepyin";
|
matthiasm@32
|
63 }
|
matthiasm@32
|
64
|
matthiasm@32
|
65 string
|
matthiasm@32
|
66 LocalCandidatePYIN::getName() const
|
matthiasm@32
|
67 {
|
matthiasm@32
|
68 return "Local Candidate PYIN";
|
matthiasm@32
|
69 }
|
matthiasm@32
|
70
|
matthiasm@32
|
71 string
|
matthiasm@32
|
72 LocalCandidatePYIN::getDescription() const
|
matthiasm@32
|
73 {
|
matthiasm@32
|
74 return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
|
matthiasm@32
|
75 }
|
matthiasm@32
|
76
|
matthiasm@32
|
77 string
|
matthiasm@32
|
78 LocalCandidatePYIN::getMaker() const
|
matthiasm@32
|
79 {
|
matthiasm@32
|
80 return "Matthias Mauch";
|
matthiasm@32
|
81 }
|
matthiasm@32
|
82
|
matthiasm@32
|
83 int
|
matthiasm@32
|
84 LocalCandidatePYIN::getPluginVersion() const
|
matthiasm@32
|
85 {
|
matthiasm@32
|
86 // Increment this each time you release a version that behaves
|
matthiasm@32
|
87 // differently from the previous one
|
matthiasm@32
|
88 return 1;
|
matthiasm@32
|
89 }
|
matthiasm@32
|
90
|
matthiasm@32
|
91 string
|
matthiasm@32
|
92 LocalCandidatePYIN::getCopyright() const
|
matthiasm@32
|
93 {
|
matthiasm@32
|
94 return "GPL";
|
matthiasm@32
|
95 }
|
matthiasm@32
|
96
|
matthiasm@32
|
97 LocalCandidatePYIN::InputDomain
|
matthiasm@32
|
98 LocalCandidatePYIN::getInputDomain() const
|
matthiasm@32
|
99 {
|
matthiasm@32
|
100 return TimeDomain;
|
matthiasm@32
|
101 }
|
matthiasm@32
|
102
|
matthiasm@32
|
103 size_t
|
matthiasm@32
|
104 LocalCandidatePYIN::getPreferredBlockSize() const
|
matthiasm@32
|
105 {
|
matthiasm@32
|
106 return 2048;
|
matthiasm@32
|
107 }
|
matthiasm@32
|
108
|
matthiasm@32
|
109 size_t
|
matthiasm@32
|
110 LocalCandidatePYIN::getPreferredStepSize() const
|
matthiasm@32
|
111 {
|
matthiasm@32
|
112 return 256;
|
matthiasm@32
|
113 }
|
matthiasm@32
|
114
|
matthiasm@32
|
115 size_t
|
matthiasm@32
|
116 LocalCandidatePYIN::getMinChannelCount() const
|
matthiasm@32
|
117 {
|
matthiasm@32
|
118 return 1;
|
matthiasm@32
|
119 }
|
matthiasm@32
|
120
|
matthiasm@32
|
121 size_t
|
matthiasm@32
|
122 LocalCandidatePYIN::getMaxChannelCount() const
|
matthiasm@32
|
123 {
|
matthiasm@32
|
124 return 1;
|
matthiasm@32
|
125 }
|
matthiasm@32
|
126
|
matthiasm@32
|
127 LocalCandidatePYIN::ParameterList
|
matthiasm@32
|
128 LocalCandidatePYIN::getParameterDescriptors() const
|
matthiasm@32
|
129 {
|
matthiasm@32
|
130 ParameterList list;
|
matthiasm@32
|
131
|
matthiasm@32
|
132 ParameterDescriptor d;
|
matthiasm@32
|
133
|
matthiasm@32
|
134 d.identifier = "threshdistr";
|
matthiasm@32
|
135 d.name = "Yin threshold distribution";
|
matthiasm@32
|
136 d.description = ".";
|
matthiasm@32
|
137 d.unit = "";
|
matthiasm@32
|
138 d.minValue = 0.0f;
|
matthiasm@32
|
139 d.maxValue = 7.0f;
|
matthiasm@32
|
140 d.defaultValue = 2.0f;
|
matthiasm@32
|
141 d.isQuantized = true;
|
matthiasm@32
|
142 d.quantizeStep = 1.0f;
|
matthiasm@32
|
143 d.valueNames.push_back("Uniform");
|
matthiasm@32
|
144 d.valueNames.push_back("Beta (mean 0.10)");
|
matthiasm@32
|
145 d.valueNames.push_back("Beta (mean 0.15)");
|
matthiasm@32
|
146 d.valueNames.push_back("Beta (mean 0.20)");
|
matthiasm@32
|
147 d.valueNames.push_back("Beta (mean 0.30)");
|
matthiasm@32
|
148 d.valueNames.push_back("Single Value 0.10");
|
matthiasm@32
|
149 d.valueNames.push_back("Single Value 0.15");
|
matthiasm@32
|
150 d.valueNames.push_back("Single Value 0.20");
|
matthiasm@32
|
151 list.push_back(d);
|
matthiasm@32
|
152
|
matthiasm@32
|
153 d.identifier = "outputunvoiced";
|
matthiasm@32
|
154 d.valueNames.clear();
|
matthiasm@32
|
155 d.name = "Output estimates classified as unvoiced?";
|
matthiasm@32
|
156 d.description = ".";
|
matthiasm@32
|
157 d.unit = "";
|
matthiasm@32
|
158 d.minValue = 0.0f;
|
matthiasm@32
|
159 d.maxValue = 2.0f;
|
matthiasm@32
|
160 d.defaultValue = 0.0f;
|
matthiasm@32
|
161 d.isQuantized = true;
|
matthiasm@32
|
162 d.quantizeStep = 1.0f;
|
matthiasm@32
|
163 d.valueNames.push_back("No");
|
matthiasm@32
|
164 d.valueNames.push_back("Yes");
|
matthiasm@32
|
165 d.valueNames.push_back("Yes, as negative frequencies");
|
matthiasm@32
|
166 list.push_back(d);
|
matthiasm@32
|
167
|
matthiasm@32
|
168 return list;
|
matthiasm@32
|
169 }
|
matthiasm@32
|
170
|
matthiasm@32
|
171 float
|
matthiasm@32
|
172 LocalCandidatePYIN::getParameter(string identifier) const
|
matthiasm@32
|
173 {
|
matthiasm@32
|
174 if (identifier == "threshdistr") {
|
matthiasm@32
|
175 return m_threshDistr;
|
matthiasm@32
|
176 }
|
matthiasm@32
|
177 if (identifier == "outputunvoiced") {
|
matthiasm@32
|
178 return m_outputUnvoiced;
|
matthiasm@32
|
179 }
|
matthiasm@32
|
180 return 0.f;
|
matthiasm@32
|
181 }
|
matthiasm@32
|
182
|
matthiasm@32
|
183 void
|
matthiasm@32
|
184 LocalCandidatePYIN::setParameter(string identifier, float value)
|
matthiasm@32
|
185 {
|
matthiasm@32
|
186 if (identifier == "threshdistr")
|
matthiasm@32
|
187 {
|
matthiasm@32
|
188 m_threshDistr = value;
|
matthiasm@32
|
189 }
|
matthiasm@32
|
190 if (identifier == "outputunvoiced")
|
matthiasm@32
|
191 {
|
matthiasm@32
|
192 m_outputUnvoiced = value;
|
matthiasm@32
|
193 }
|
matthiasm@32
|
194
|
matthiasm@32
|
195 }
|
matthiasm@32
|
196
|
matthiasm@32
|
197 LocalCandidatePYIN::ProgramList
|
matthiasm@32
|
198 LocalCandidatePYIN::getPrograms() const
|
matthiasm@32
|
199 {
|
matthiasm@32
|
200 ProgramList list;
|
matthiasm@32
|
201 return list;
|
matthiasm@32
|
202 }
|
matthiasm@32
|
203
|
matthiasm@32
|
204 string
|
matthiasm@32
|
205 LocalCandidatePYIN::getCurrentProgram() const
|
matthiasm@32
|
206 {
|
matthiasm@32
|
207 return ""; // no programs
|
matthiasm@32
|
208 }
|
matthiasm@32
|
209
|
matthiasm@32
|
210 void
|
matthiasm@32
|
211 LocalCandidatePYIN::selectProgram(string name)
|
matthiasm@32
|
212 {
|
matthiasm@32
|
213 }
|
matthiasm@32
|
214
|
matthiasm@32
|
215 LocalCandidatePYIN::OutputList
|
matthiasm@32
|
216 LocalCandidatePYIN::getOutputDescriptors() const
|
matthiasm@32
|
217 {
|
matthiasm@32
|
218 OutputList outputs;
|
matthiasm@32
|
219
|
matthiasm@32
|
220 OutputDescriptor d;
|
matthiasm@32
|
221
|
matthiasm@32
|
222 int outputNumber = 0;
|
matthiasm@32
|
223
|
matthiasm@32
|
224 d.identifier = "pitchtrackcandidates";
|
matthiasm@32
|
225 d.name = "Pitch track candidates";
|
matthiasm@32
|
226 d.description = "Multiple candidate pitch tracks.";
|
matthiasm@32
|
227 d.unit = "Hz";
|
matthiasm@32
|
228 d.hasFixedBinCount = false;
|
matthiasm@32
|
229 d.hasKnownExtents = true;
|
matthiasm@32
|
230 d.minValue = m_fmin;
|
Chris@39
|
231 d.maxValue = 500; //!!!???
|
matthiasm@32
|
232 d.isQuantized = false;
|
matthiasm@32
|
233 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@32
|
234 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@32
|
235 d.hasDuration = false;
|
matthiasm@32
|
236 outputs.push_back(d);
|
matthiasm@32
|
237
|
matthiasm@32
|
238 return outputs;
|
matthiasm@32
|
239 }
|
matthiasm@32
|
240
|
matthiasm@32
|
241 bool
|
matthiasm@32
|
242 LocalCandidatePYIN::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@32
|
243 {
|
matthiasm@32
|
244 if (channels < getMinChannelCount() ||
|
matthiasm@32
|
245 channels > getMaxChannelCount()) return false;
|
matthiasm@32
|
246
|
matthiasm@32
|
247 /*
|
matthiasm@32
|
248 std::cerr << "LocalCandidatePYIN::initialise: channels = " << channels
|
matthiasm@32
|
249 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
|
matthiasm@32
|
250 << std::endl;
|
matthiasm@32
|
251 */
|
matthiasm@32
|
252 m_channels = channels;
|
matthiasm@32
|
253 m_stepSize = stepSize;
|
matthiasm@32
|
254 m_blockSize = blockSize;
|
matthiasm@32
|
255
|
matthiasm@32
|
256 reset();
|
matthiasm@32
|
257
|
matthiasm@32
|
258 return true;
|
matthiasm@32
|
259 }
|
matthiasm@32
|
260
|
matthiasm@32
|
261 void
|
matthiasm@32
|
262 LocalCandidatePYIN::reset()
|
matthiasm@32
|
263 {
|
matthiasm@32
|
264 m_yin.setThresholdDistr(m_threshDistr);
|
matthiasm@32
|
265 m_yin.setFrameSize(m_blockSize);
|
matthiasm@32
|
266
|
matthiasm@32
|
267 m_pitchProb.clear();
|
matthiasm@32
|
268 m_timestamp.clear();
|
matthiasm@32
|
269 /*
|
matthiasm@32
|
270 std::cerr << "LocalCandidatePYIN::reset"
|
matthiasm@32
|
271 << ", blockSize = " << m_blockSize
|
matthiasm@32
|
272 << std::endl;
|
matthiasm@32
|
273 */
|
matthiasm@32
|
274 }
|
matthiasm@32
|
275
|
matthiasm@32
|
276 LocalCandidatePYIN::FeatureSet
|
matthiasm@32
|
277 LocalCandidatePYIN::process(const float *const *inputBuffers, RealTime timestamp)
|
matthiasm@32
|
278 {
|
matthiasm@60
|
279 timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/2, lrintf(m_inputSampleRate));
|
matthiasm@32
|
280
|
matthiasm@32
|
281 double *dInputBuffers = new double[m_blockSize];
|
matthiasm@32
|
282 for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
|
matthiasm@32
|
283
|
matthiasm@32
|
284 size_t yinBufferSize = m_blockSize/2;
|
matthiasm@32
|
285 double* yinBuffer = new double[yinBufferSize];
|
matthiasm@60
|
286 YinUtil::slowDifference(dInputBuffers, yinBuffer, yinBufferSize);
|
matthiasm@32
|
287
|
matthiasm@32
|
288 delete [] dInputBuffers;
|
matthiasm@32
|
289
|
matthiasm@32
|
290 YinUtil::cumulativeDifference(yinBuffer, yinBufferSize);
|
matthiasm@32
|
291
|
matthiasm@46
|
292 float minFrequency = 60;
|
matthiasm@46
|
293 float maxFrequency = 900;
|
matthiasm@46
|
294 vector<double> peakProbability = YinUtil::yinProb(yinBuffer,
|
matthiasm@46
|
295 m_threshDistr,
|
matthiasm@46
|
296 yinBufferSize,
|
matthiasm@46
|
297 m_inputSampleRate/maxFrequency,
|
matthiasm@46
|
298 m_inputSampleRate/minFrequency);
|
matthiasm@46
|
299
|
matthiasm@46
|
300 vector<pair<double, double> > tempPitchProb;
|
matthiasm@46
|
301 for (size_t iBuf = 0; iBuf < yinBufferSize; ++iBuf)
|
matthiasm@32
|
302 {
|
matthiasm@46
|
303 if (peakProbability[iBuf] > 0)
|
matthiasm@32
|
304 {
|
matthiasm@46
|
305 double currentF0 =
|
matthiasm@46
|
306 m_inputSampleRate * (1.0 /
|
matthiasm@46
|
307 YinUtil::parabolicInterpolation(yinBuffer, iBuf, yinBufferSize));
|
matthiasm@46
|
308 double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69;
|
matthiasm@46
|
309 if (tempPitch != tempPitch) std::cerr << "AAAAAAAAA! " << currentF0 << " " << (m_inputSampleRate * 1.0 / iBuf) << std::endl;
|
matthiasm@46
|
310 tempPitchProb.push_back(pair<double, double>(tempPitch, peakProbability[iBuf]));
|
matthiasm@32
|
311 }
|
matthiasm@32
|
312 }
|
matthiasm@46
|
313 m_pitchProb.push_back(tempPitchProb);
|
matthiasm@32
|
314 m_timestamp.push_back(timestamp);
|
matthiasm@32
|
315
|
Chris@39
|
316 return FeatureSet();
|
matthiasm@32
|
317 }
|
matthiasm@32
|
318
|
matthiasm@32
|
319 LocalCandidatePYIN::FeatureSet
|
matthiasm@32
|
320 LocalCandidatePYIN::getRemainingFeatures()
|
matthiasm@32
|
321 {
|
Chris@39
|
322 // timestamp -> candidate number -> value
|
Chris@39
|
323 map<RealTime, map<int, float> > featureValues;
|
matthiasm@32
|
324
|
matthiasm@37
|
325 // std::cerr << "in remaining features" << std::endl;
|
matthiasm@32
|
326
|
matthiasm@32
|
327 if (m_pitchProb.empty()) {
|
Chris@39
|
328 return FeatureSet();
|
matthiasm@32
|
329 }
|
matthiasm@32
|
330
|
matthiasm@32
|
331 // MONO-PITCH STUFF
|
matthiasm@32
|
332 MonoPitch mp;
|
matthiasm@32
|
333 size_t nFrame = m_timestamp.size();
|
matthiasm@32
|
334 vector<vector<float> > pitchTracks;
|
matthiasm@32
|
335 vector<float> freqSum = vector<float>(m_nCandidate);
|
matthiasm@32
|
336 vector<float> freqNumber = vector<float>(m_nCandidate);
|
matthiasm@32
|
337 vector<float> freqMean = vector<float>(m_nCandidate);
|
matthiasm@44
|
338
|
matthiasm@46
|
339 boost::math::normal normalDist(0, 8); // semitones sd
|
matthiasm@46
|
340 float maxNormalDist = boost::math::pdf(normalDist, 0);
|
matthiasm@46
|
341
|
matthiasm@32
|
342 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
|
matthiasm@32
|
343 {
|
matthiasm@32
|
344 pitchTracks.push_back(vector<float>(nFrame));
|
matthiasm@46
|
345 vector<vector<pair<double,double> > > tempPitchProb;
|
matthiasm@46
|
346 float centrePitch = 45 + 3 * iCandidate;
|
matthiasm@46
|
347 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) {
|
matthiasm@60
|
348 tempPitchProb.push_back(vector<pair<double,double> >());
|
matthiasm@46
|
349 float sumProb = 0;
|
matthiasm@46
|
350 float pitch = 0;
|
matthiasm@46
|
351 float prob = 0;
|
matthiasm@46
|
352 for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb) {
|
matthiasm@46
|
353 pitch = m_pitchProb[iFrame][iProb].first;
|
matthiasm@46
|
354 // std::cerr << pitch << " " << m_pitchProb[iFrame][iProb].second << std::endl;
|
matthiasm@48
|
355 prob = m_pitchProb[iFrame][iProb].second * boost::math::pdf(normalDist, pitch-centrePitch) / maxNormalDist * 2;
|
matthiasm@46
|
356 sumProb += prob;
|
matthiasm@46
|
357 tempPitchProb[iFrame].push_back(pair<double,double>(pitch,prob));
|
matthiasm@46
|
358 // std::cerr << m_timestamp[iFrame] << " " << iCandidate << " " << centrePitch << " " << pitch << " " << prob << std::endl;
|
matthiasm@46
|
359 }
|
matthiasm@46
|
360 for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb) {
|
matthiasm@46
|
361 tempPitchProb[iFrame][iProb].second /= sumProb;
|
matthiasm@46
|
362 }
|
matthiasm@46
|
363 }
|
matthiasm@46
|
364 vector<float> mpOut = mp.process(tempPitchProb);
|
matthiasm@44
|
365 float prevFreq = 0;
|
matthiasm@32
|
366 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
matthiasm@32
|
367 {
|
matthiasm@32
|
368 if (mpOut[iFrame] > 0) {
|
matthiasm@46
|
369 // if (prevFreq>0 && fabs(log2(mpOut[iFrame]/prevFreq)) > 0.1) {
|
matthiasm@46
|
370 // for (int jFrame = iFrame; jFrame != -1; --jFrame) {
|
matthiasm@46
|
371 // // hack: setting all freqs to 0 -- will be eliminated later
|
matthiasm@46
|
372 // pitchTracks[iCandidate][jFrame] = 0;
|
matthiasm@46
|
373 // }
|
matthiasm@46
|
374 // break;
|
matthiasm@46
|
375 // }
|
matthiasm@32
|
376 pitchTracks[iCandidate][iFrame] = mpOut[iFrame];
|
matthiasm@32
|
377 freqSum[iCandidate] += mpOut[iFrame];
|
matthiasm@32
|
378 freqNumber[iCandidate]++;
|
matthiasm@44
|
379 prevFreq = mpOut[iFrame];
|
matthiasm@32
|
380 }
|
matthiasm@32
|
381 }
|
matthiasm@32
|
382 freqMean[iCandidate] = freqSum[iCandidate]*1.0/freqNumber[iCandidate];
|
matthiasm@32
|
383 }
|
matthiasm@32
|
384
|
matthiasm@37
|
385 // find near duplicate pitch tracks
|
matthiasm@34
|
386 vector<size_t> duplicates;
|
matthiasm@34
|
387 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) {
|
matthiasm@34
|
388 for (size_t jCandidate = iCandidate+1; jCandidate < m_nCandidate; ++jCandidate) {
|
matthiasm@34
|
389 size_t countEqual = 0;
|
matthiasm@34
|
390 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
matthiasm@34
|
391 {
|
matthiasm@46
|
392 if ((pitchTracks[jCandidate][iFrame] == 0 && pitchTracks[iCandidate][iFrame] == 0) ||
|
matthiasm@46
|
393 fabs(pitchTracks[iCandidate][iFrame]/pitchTracks[jCandidate][iFrame]-1)<0.01)
|
matthiasm@34
|
394 countEqual++;
|
matthiasm@34
|
395 }
|
matthiasm@46
|
396 // std::cerr << "proportion equal: " << (countEqual * 1.0 / nFrame) << std::endl;
|
matthiasm@34
|
397 if (countEqual * 1.0 / nFrame > 0.8) {
|
matthiasm@34
|
398 if (freqNumber[iCandidate] > freqNumber[jCandidate]) {
|
matthiasm@34
|
399 duplicates.push_back(jCandidate);
|
matthiasm@46
|
400 } else if (iCandidate < jCandidate) {
|
matthiasm@34
|
401 duplicates.push_back(iCandidate);
|
matthiasm@34
|
402 }
|
matthiasm@34
|
403 }
|
matthiasm@34
|
404 }
|
matthiasm@34
|
405 }
|
matthiasm@34
|
406
|
matthiasm@37
|
407 // now find non-duplicate pitch tracks
|
Chris@39
|
408 map<int, int> candidateActuals;
|
Chris@39
|
409 map<int, std::string> candidateLabels;
|
Chris@39
|
410
|
matthiasm@46
|
411 vector<vector<float> > outputFrequencies;
|
matthiasm@60
|
412 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) outputFrequencies.push_back(vector<float>());
|
matthiasm@46
|
413
|
matthiasm@32
|
414 int actualCandidateNumber = 0;
|
matthiasm@32
|
415 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) {
|
matthiasm@34
|
416 bool isDuplicate = false;
|
matthiasm@34
|
417 for (size_t i = 0; i < duplicates.size(); ++i) {
|
matthiasm@37
|
418 // std::cerr << duplicates[i] << std::endl;
|
matthiasm@34
|
419 if (duplicates[i] == iCandidate) {
|
matthiasm@34
|
420 isDuplicate = true;
|
matthiasm@34
|
421 break;
|
matthiasm@34
|
422 }
|
matthiasm@34
|
423 }
|
matthiasm@46
|
424 if (!isDuplicate && freqNumber[iCandidate] > 0.5*nFrame)
|
matthiasm@32
|
425 {
|
matthiasm@32
|
426 std::ostringstream convert;
|
matthiasm@32
|
427 convert << actualCandidateNumber++;
|
Chris@39
|
428 candidateLabels[iCandidate] = convert.str();
|
Chris@39
|
429 candidateActuals[iCandidate] = actualCandidateNumber;
|
matthiasm@46
|
430 // std::cerr << iCandidate << " " << actualCandidateNumber << " " << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl;
|
matthiasm@32
|
431 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
matthiasm@32
|
432 {
|
matthiasm@32
|
433 if (pitchTracks[iCandidate][iFrame] > 0)
|
matthiasm@32
|
434 {
|
matthiasm@46
|
435 // featureValues[m_timestamp[iFrame]][iCandidate] =
|
matthiasm@46
|
436 // pitchTracks[iCandidate][iFrame];
|
matthiasm@46
|
437 outputFrequencies[iFrame].push_back(pitchTracks[iCandidate][iFrame]);
|
matthiasm@60
|
438 } else {
|
matthiasm@60
|
439 outputFrequencies[iFrame].push_back(0);
|
matthiasm@32
|
440 }
|
matthiasm@32
|
441 }
|
matthiasm@32
|
442 }
|
matthiasm@43
|
443 // fs[m_oPitchTrackCandidates].push_back(f);
|
matthiasm@32
|
444 }
|
matthiasm@32
|
445
|
Chris@39
|
446 // adapt our features so as to return a stack of candidate values
|
Chris@39
|
447 // per frame
|
Chris@39
|
448
|
Chris@39
|
449 FeatureSet fs;
|
Chris@39
|
450
|
matthiasm@46
|
451 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame){
|
Chris@39
|
452 Feature f;
|
Chris@39
|
453 f.hasTimestamp = true;
|
matthiasm@46
|
454 f.timestamp = m_timestamp[iFrame];
|
matthiasm@46
|
455 f.values = outputFrequencies[iFrame];
|
Chris@39
|
456 fs[0].push_back(f);
|
Chris@39
|
457 }
|
matthiasm@46
|
458
|
matthiasm@46
|
459 // I stopped using Chris's map stuff below because I couldn't get my head around it
|
matthiasm@46
|
460 //
|
matthiasm@46
|
461 // for (map<RealTime, map<int, float> >::const_iterator i =
|
matthiasm@46
|
462 // featureValues.begin(); i != featureValues.end(); ++i) {
|
matthiasm@46
|
463 // Feature f;
|
matthiasm@46
|
464 // f.hasTimestamp = true;
|
matthiasm@46
|
465 // f.timestamp = i->first;
|
matthiasm@46
|
466 // int nextCandidate = candidateActuals.begin()->second;
|
matthiasm@46
|
467 // for (map<int, float>::const_iterator j =
|
matthiasm@46
|
468 // i->second.begin(); j != i->second.end(); ++j) {
|
matthiasm@46
|
469 // while (candidateActuals[j->first] > nextCandidate) {
|
matthiasm@46
|
470 // f.values.push_back(0);
|
matthiasm@46
|
471 // ++nextCandidate;
|
matthiasm@46
|
472 // }
|
matthiasm@46
|
473 // f.values.push_back(j->second);
|
matthiasm@46
|
474 // nextCandidate = j->first + 1;
|
matthiasm@46
|
475 // }
|
matthiasm@46
|
476 // //!!! can't use labels?
|
matthiasm@46
|
477 // fs[0].push_back(f);
|
matthiasm@46
|
478 // }
|
matthiasm@32
|
479
|
matthiasm@32
|
480 return fs;
|
matthiasm@32
|
481 }
|