matthiasm@32
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@32
|
2
|
matthiasm@32
|
3 /*
|
matthiasm@32
|
4 pYIN - A fundamental frequency estimator for monophonic audio
|
matthiasm@32
|
5 Centre for Digital Music, Queen Mary, University of London.
|
matthiasm@32
|
6
|
matthiasm@32
|
7 This program is free software; you can redistribute it and/or
|
matthiasm@32
|
8 modify it under the terms of the GNU General Public License as
|
matthiasm@32
|
9 published by the Free Software Foundation; either version 2 of the
|
matthiasm@32
|
10 License, or (at your option) any later version. See the file
|
matthiasm@32
|
11 COLocalCandidatePYING included with this distribution for more information.
|
matthiasm@32
|
12 */
|
matthiasm@32
|
13
|
matthiasm@32
|
14 #include "LocalCandidatePYIN.h"
|
matthiasm@32
|
15 #include "MonoPitch.h"
|
matthiasm@32
|
16 #include "YinUtil.h"
|
matthiasm@32
|
17
|
matthiasm@32
|
18 #include "vamp-sdk/FFT.h"
|
matthiasm@32
|
19
|
matthiasm@32
|
20 #include <vector>
|
matthiasm@32
|
21 #include <algorithm>
|
matthiasm@32
|
22
|
matthiasm@32
|
23 #include <cstdio>
|
matthiasm@32
|
24 #include <sstream>
|
matthiasm@32
|
25 // #include <iostream>
|
matthiasm@32
|
26 #include <cmath>
|
matthiasm@32
|
27 #include <complex>
|
Chris@39
|
28 #include <map>
|
matthiasm@32
|
29
|
matthiasm@46
|
30 #include <boost/math/distributions.hpp>
|
matthiasm@46
|
31
|
matthiasm@32
|
32 using std::string;
|
matthiasm@32
|
33 using std::vector;
|
Chris@39
|
34 using std::map;
|
matthiasm@32
|
35 using Vamp::RealTime;
|
matthiasm@32
|
36
|
matthiasm@32
|
37
|
matthiasm@32
|
38 LocalCandidatePYIN::LocalCandidatePYIN(float inputSampleRate) :
|
matthiasm@32
|
39 Plugin(inputSampleRate),
|
matthiasm@32
|
40 m_channels(0),
|
matthiasm@32
|
41 m_stepSize(256),
|
matthiasm@32
|
42 m_blockSize(2048),
|
matthiasm@32
|
43 m_fmin(40),
|
matthiasm@32
|
44 m_fmax(700),
|
matthiasm@32
|
45 m_yin(2048, inputSampleRate, 0.0),
|
matthiasm@32
|
46 m_oPitchTrackCandidates(0),
|
matthiasm@32
|
47 m_threshDistr(2.0f),
|
matthiasm@32
|
48 m_outputUnvoiced(0.0f),
|
matthiasm@32
|
49 m_pitchProb(0),
|
matthiasm@32
|
50 m_timestamp(0),
|
matthiasm@48
|
51 m_nCandidate(13)
|
matthiasm@32
|
52 {
|
matthiasm@32
|
53 }
|
matthiasm@32
|
54
|
matthiasm@32
|
55 LocalCandidatePYIN::~LocalCandidatePYIN()
|
matthiasm@32
|
56 {
|
matthiasm@32
|
57 }
|
matthiasm@32
|
58
|
matthiasm@32
|
59 string
|
matthiasm@32
|
60 LocalCandidatePYIN::getIdentifier() const
|
matthiasm@32
|
61 {
|
matthiasm@32
|
62 return "localcandidatepyin";
|
matthiasm@32
|
63 }
|
matthiasm@32
|
64
|
matthiasm@32
|
65 string
|
matthiasm@32
|
66 LocalCandidatePYIN::getName() const
|
matthiasm@32
|
67 {
|
matthiasm@32
|
68 return "Local Candidate PYIN";
|
matthiasm@32
|
69 }
|
matthiasm@32
|
70
|
matthiasm@32
|
71 string
|
matthiasm@32
|
72 LocalCandidatePYIN::getDescription() const
|
matthiasm@32
|
73 {
|
matthiasm@32
|
74 return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
|
matthiasm@32
|
75 }
|
matthiasm@32
|
76
|
matthiasm@32
|
77 string
|
matthiasm@32
|
78 LocalCandidatePYIN::getMaker() const
|
matthiasm@32
|
79 {
|
matthiasm@32
|
80 return "Matthias Mauch";
|
matthiasm@32
|
81 }
|
matthiasm@32
|
82
|
matthiasm@32
|
83 int
|
matthiasm@32
|
84 LocalCandidatePYIN::getPluginVersion() const
|
matthiasm@32
|
85 {
|
matthiasm@32
|
86 // Increment this each time you release a version that behaves
|
matthiasm@32
|
87 // differently from the previous one
|
matthiasm@32
|
88 return 1;
|
matthiasm@32
|
89 }
|
matthiasm@32
|
90
|
matthiasm@32
|
91 string
|
matthiasm@32
|
92 LocalCandidatePYIN::getCopyright() const
|
matthiasm@32
|
93 {
|
matthiasm@32
|
94 return "GPL";
|
matthiasm@32
|
95 }
|
matthiasm@32
|
96
|
matthiasm@32
|
97 LocalCandidatePYIN::InputDomain
|
matthiasm@32
|
98 LocalCandidatePYIN::getInputDomain() const
|
matthiasm@32
|
99 {
|
matthiasm@32
|
100 return TimeDomain;
|
matthiasm@32
|
101 }
|
matthiasm@32
|
102
|
matthiasm@32
|
103 size_t
|
matthiasm@32
|
104 LocalCandidatePYIN::getPreferredBlockSize() const
|
matthiasm@32
|
105 {
|
matthiasm@32
|
106 return 2048;
|
matthiasm@32
|
107 }
|
matthiasm@32
|
108
|
matthiasm@32
|
109 size_t
|
matthiasm@32
|
110 LocalCandidatePYIN::getPreferredStepSize() const
|
matthiasm@32
|
111 {
|
matthiasm@32
|
112 return 256;
|
matthiasm@32
|
113 }
|
matthiasm@32
|
114
|
matthiasm@32
|
115 size_t
|
matthiasm@32
|
116 LocalCandidatePYIN::getMinChannelCount() const
|
matthiasm@32
|
117 {
|
matthiasm@32
|
118 return 1;
|
matthiasm@32
|
119 }
|
matthiasm@32
|
120
|
matthiasm@32
|
121 size_t
|
matthiasm@32
|
122 LocalCandidatePYIN::getMaxChannelCount() const
|
matthiasm@32
|
123 {
|
matthiasm@32
|
124 return 1;
|
matthiasm@32
|
125 }
|
matthiasm@32
|
126
|
matthiasm@32
|
127 LocalCandidatePYIN::ParameterList
|
matthiasm@32
|
128 LocalCandidatePYIN::getParameterDescriptors() const
|
matthiasm@32
|
129 {
|
matthiasm@32
|
130 ParameterList list;
|
matthiasm@32
|
131
|
matthiasm@32
|
132 ParameterDescriptor d;
|
matthiasm@32
|
133
|
matthiasm@32
|
134 d.identifier = "threshdistr";
|
matthiasm@32
|
135 d.name = "Yin threshold distribution";
|
matthiasm@32
|
136 d.description = ".";
|
matthiasm@32
|
137 d.unit = "";
|
matthiasm@32
|
138 d.minValue = 0.0f;
|
matthiasm@32
|
139 d.maxValue = 7.0f;
|
matthiasm@32
|
140 d.defaultValue = 2.0f;
|
matthiasm@32
|
141 d.isQuantized = true;
|
matthiasm@32
|
142 d.quantizeStep = 1.0f;
|
matthiasm@32
|
143 d.valueNames.push_back("Uniform");
|
matthiasm@32
|
144 d.valueNames.push_back("Beta (mean 0.10)");
|
matthiasm@32
|
145 d.valueNames.push_back("Beta (mean 0.15)");
|
matthiasm@32
|
146 d.valueNames.push_back("Beta (mean 0.20)");
|
matthiasm@32
|
147 d.valueNames.push_back("Beta (mean 0.30)");
|
matthiasm@32
|
148 d.valueNames.push_back("Single Value 0.10");
|
matthiasm@32
|
149 d.valueNames.push_back("Single Value 0.15");
|
matthiasm@32
|
150 d.valueNames.push_back("Single Value 0.20");
|
matthiasm@32
|
151 list.push_back(d);
|
matthiasm@32
|
152
|
matthiasm@32
|
153 d.identifier = "outputunvoiced";
|
matthiasm@32
|
154 d.valueNames.clear();
|
matthiasm@32
|
155 d.name = "Output estimates classified as unvoiced?";
|
matthiasm@32
|
156 d.description = ".";
|
matthiasm@32
|
157 d.unit = "";
|
matthiasm@32
|
158 d.minValue = 0.0f;
|
matthiasm@32
|
159 d.maxValue = 2.0f;
|
matthiasm@32
|
160 d.defaultValue = 0.0f;
|
matthiasm@32
|
161 d.isQuantized = true;
|
matthiasm@32
|
162 d.quantizeStep = 1.0f;
|
matthiasm@32
|
163 d.valueNames.push_back("No");
|
matthiasm@32
|
164 d.valueNames.push_back("Yes");
|
matthiasm@32
|
165 d.valueNames.push_back("Yes, as negative frequencies");
|
matthiasm@32
|
166 list.push_back(d);
|
matthiasm@32
|
167
|
matthiasm@32
|
168 return list;
|
matthiasm@32
|
169 }
|
matthiasm@32
|
170
|
matthiasm@32
|
171 float
|
matthiasm@32
|
172 LocalCandidatePYIN::getParameter(string identifier) const
|
matthiasm@32
|
173 {
|
matthiasm@32
|
174 if (identifier == "threshdistr") {
|
matthiasm@32
|
175 return m_threshDistr;
|
matthiasm@32
|
176 }
|
matthiasm@32
|
177 if (identifier == "outputunvoiced") {
|
matthiasm@32
|
178 return m_outputUnvoiced;
|
matthiasm@32
|
179 }
|
matthiasm@32
|
180 return 0.f;
|
matthiasm@32
|
181 }
|
matthiasm@32
|
182
|
matthiasm@32
|
183 void
|
matthiasm@32
|
184 LocalCandidatePYIN::setParameter(string identifier, float value)
|
matthiasm@32
|
185 {
|
matthiasm@32
|
186 if (identifier == "threshdistr")
|
matthiasm@32
|
187 {
|
matthiasm@32
|
188 m_threshDistr = value;
|
matthiasm@32
|
189 }
|
matthiasm@32
|
190 if (identifier == "outputunvoiced")
|
matthiasm@32
|
191 {
|
matthiasm@32
|
192 m_outputUnvoiced = value;
|
matthiasm@32
|
193 }
|
matthiasm@32
|
194
|
matthiasm@32
|
195 }
|
matthiasm@32
|
196
|
matthiasm@32
|
197 LocalCandidatePYIN::ProgramList
|
matthiasm@32
|
198 LocalCandidatePYIN::getPrograms() const
|
matthiasm@32
|
199 {
|
matthiasm@32
|
200 ProgramList list;
|
matthiasm@32
|
201 return list;
|
matthiasm@32
|
202 }
|
matthiasm@32
|
203
|
matthiasm@32
|
204 string
|
matthiasm@32
|
205 LocalCandidatePYIN::getCurrentProgram() const
|
matthiasm@32
|
206 {
|
matthiasm@32
|
207 return ""; // no programs
|
matthiasm@32
|
208 }
|
matthiasm@32
|
209
|
matthiasm@32
|
210 void
|
matthiasm@32
|
211 LocalCandidatePYIN::selectProgram(string name)
|
matthiasm@32
|
212 {
|
matthiasm@32
|
213 }
|
matthiasm@32
|
214
|
matthiasm@32
|
215 LocalCandidatePYIN::OutputList
|
matthiasm@32
|
216 LocalCandidatePYIN::getOutputDescriptors() const
|
matthiasm@32
|
217 {
|
matthiasm@32
|
218 OutputList outputs;
|
matthiasm@32
|
219
|
matthiasm@32
|
220 OutputDescriptor d;
|
matthiasm@32
|
221
|
matthiasm@32
|
222 int outputNumber = 0;
|
matthiasm@32
|
223
|
matthiasm@32
|
224 d.identifier = "pitchtrackcandidates";
|
matthiasm@32
|
225 d.name = "Pitch track candidates";
|
matthiasm@32
|
226 d.description = "Multiple candidate pitch tracks.";
|
matthiasm@32
|
227 d.unit = "Hz";
|
matthiasm@32
|
228 d.hasFixedBinCount = false;
|
matthiasm@32
|
229 d.hasKnownExtents = true;
|
matthiasm@32
|
230 d.minValue = m_fmin;
|
Chris@39
|
231 d.maxValue = 500; //!!!???
|
matthiasm@32
|
232 d.isQuantized = false;
|
matthiasm@32
|
233 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@32
|
234 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@32
|
235 d.hasDuration = false;
|
matthiasm@32
|
236 outputs.push_back(d);
|
matthiasm@32
|
237
|
matthiasm@32
|
238 return outputs;
|
matthiasm@32
|
239 }
|
matthiasm@32
|
240
|
matthiasm@32
|
241 bool
|
matthiasm@32
|
242 LocalCandidatePYIN::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@32
|
243 {
|
matthiasm@32
|
244 if (channels < getMinChannelCount() ||
|
matthiasm@32
|
245 channels > getMaxChannelCount()) return false;
|
matthiasm@32
|
246
|
matthiasm@32
|
247 /*
|
matthiasm@32
|
248 std::cerr << "LocalCandidatePYIN::initialise: channels = " << channels
|
matthiasm@32
|
249 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
|
matthiasm@32
|
250 << std::endl;
|
matthiasm@32
|
251 */
|
matthiasm@32
|
252 m_channels = channels;
|
matthiasm@32
|
253 m_stepSize = stepSize;
|
matthiasm@32
|
254 m_blockSize = blockSize;
|
matthiasm@32
|
255
|
matthiasm@32
|
256 reset();
|
matthiasm@32
|
257
|
matthiasm@32
|
258 return true;
|
matthiasm@32
|
259 }
|
matthiasm@32
|
260
|
matthiasm@32
|
261 void
|
matthiasm@32
|
262 LocalCandidatePYIN::reset()
|
matthiasm@32
|
263 {
|
matthiasm@32
|
264 m_yin.setThresholdDistr(m_threshDistr);
|
matthiasm@32
|
265 m_yin.setFrameSize(m_blockSize);
|
matthiasm@32
|
266
|
matthiasm@32
|
267 m_pitchProb.clear();
|
matthiasm@32
|
268 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
|
matthiasm@32
|
269 {
|
matthiasm@46
|
270 m_pitchProb.push_back(vector<pair<double, double> >());
|
matthiasm@32
|
271 }
|
matthiasm@32
|
272 m_timestamp.clear();
|
matthiasm@32
|
273 /*
|
matthiasm@32
|
274 std::cerr << "LocalCandidatePYIN::reset"
|
matthiasm@32
|
275 << ", blockSize = " << m_blockSize
|
matthiasm@32
|
276 << std::endl;
|
matthiasm@32
|
277 */
|
matthiasm@32
|
278 }
|
matthiasm@32
|
279
|
matthiasm@32
|
280 LocalCandidatePYIN::FeatureSet
|
matthiasm@32
|
281 LocalCandidatePYIN::process(const float *const *inputBuffers, RealTime timestamp)
|
matthiasm@32
|
282 {
|
matthiasm@48
|
283 // I don't understand why I should have to make this very weird 11
|
matthiasm@48
|
284 // step-size left-shift, but it does get it in sync with the normal pYIN
|
matthiasm@48
|
285 timestamp = timestamp - Vamp::RealTime::frame2RealTime(11 * m_stepSize, lrintf(m_inputSampleRate));
|
matthiasm@32
|
286
|
matthiasm@32
|
287 double *dInputBuffers = new double[m_blockSize];
|
matthiasm@32
|
288 for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
|
matthiasm@32
|
289
|
matthiasm@32
|
290 size_t yinBufferSize = m_blockSize/2;
|
matthiasm@32
|
291 double* yinBuffer = new double[yinBufferSize];
|
matthiasm@32
|
292 YinUtil::fastDifference(dInputBuffers, yinBuffer, yinBufferSize);
|
matthiasm@32
|
293
|
matthiasm@32
|
294 delete [] dInputBuffers;
|
matthiasm@32
|
295
|
matthiasm@32
|
296 YinUtil::cumulativeDifference(yinBuffer, yinBufferSize);
|
matthiasm@32
|
297
|
matthiasm@46
|
298 float minFrequency = 60;
|
matthiasm@46
|
299 float maxFrequency = 900;
|
matthiasm@46
|
300 vector<double> peakProbability = YinUtil::yinProb(yinBuffer,
|
matthiasm@46
|
301 m_threshDistr,
|
matthiasm@46
|
302 yinBufferSize,
|
matthiasm@46
|
303 m_inputSampleRate/maxFrequency,
|
matthiasm@46
|
304 m_inputSampleRate/minFrequency);
|
matthiasm@46
|
305
|
matthiasm@46
|
306 vector<pair<double, double> > tempPitchProb;
|
matthiasm@46
|
307 for (size_t iBuf = 0; iBuf < yinBufferSize; ++iBuf)
|
matthiasm@32
|
308 {
|
matthiasm@46
|
309 if (peakProbability[iBuf] > 0)
|
matthiasm@32
|
310 {
|
matthiasm@46
|
311 double currentF0 =
|
matthiasm@46
|
312 m_inputSampleRate * (1.0 /
|
matthiasm@46
|
313 YinUtil::parabolicInterpolation(yinBuffer, iBuf, yinBufferSize));
|
matthiasm@46
|
314 double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69;
|
matthiasm@46
|
315 if (tempPitch != tempPitch) std::cerr << "AAAAAAAAA! " << currentF0 << " " << (m_inputSampleRate * 1.0 / iBuf) << std::endl;
|
matthiasm@46
|
316 tempPitchProb.push_back(pair<double, double>(tempPitch, peakProbability[iBuf]));
|
matthiasm@32
|
317 }
|
matthiasm@32
|
318 }
|
matthiasm@46
|
319 m_pitchProb.push_back(tempPitchProb);
|
matthiasm@32
|
320 m_timestamp.push_back(timestamp);
|
matthiasm@32
|
321
|
Chris@39
|
322 return FeatureSet();
|
matthiasm@32
|
323 }
|
matthiasm@32
|
324
|
matthiasm@32
|
325 LocalCandidatePYIN::FeatureSet
|
matthiasm@32
|
326 LocalCandidatePYIN::getRemainingFeatures()
|
matthiasm@32
|
327 {
|
Chris@39
|
328 // timestamp -> candidate number -> value
|
Chris@39
|
329 map<RealTime, map<int, float> > featureValues;
|
matthiasm@32
|
330
|
matthiasm@37
|
331 // std::cerr << "in remaining features" << std::endl;
|
matthiasm@32
|
332
|
matthiasm@32
|
333 if (m_pitchProb.empty()) {
|
Chris@39
|
334 return FeatureSet();
|
matthiasm@32
|
335 }
|
matthiasm@32
|
336
|
matthiasm@32
|
337 // MONO-PITCH STUFF
|
matthiasm@32
|
338 MonoPitch mp;
|
matthiasm@32
|
339 size_t nFrame = m_timestamp.size();
|
matthiasm@32
|
340 vector<vector<float> > pitchTracks;
|
matthiasm@32
|
341 vector<float> freqSum = vector<float>(m_nCandidate);
|
matthiasm@32
|
342 vector<float> freqNumber = vector<float>(m_nCandidate);
|
matthiasm@32
|
343 vector<float> freqMean = vector<float>(m_nCandidate);
|
matthiasm@44
|
344
|
matthiasm@46
|
345 boost::math::normal normalDist(0, 8); // semitones sd
|
matthiasm@46
|
346 float maxNormalDist = boost::math::pdf(normalDist, 0);
|
matthiasm@46
|
347
|
matthiasm@32
|
348 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
|
matthiasm@32
|
349 {
|
matthiasm@32
|
350 pitchTracks.push_back(vector<float>(nFrame));
|
matthiasm@46
|
351 vector<vector<pair<double,double> > > tempPitchProb;
|
matthiasm@46
|
352 float centrePitch = 45 + 3 * iCandidate;
|
matthiasm@46
|
353 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) {
|
matthiasm@46
|
354 tempPitchProb.push_back(vector<pair<double,double> >(0));
|
matthiasm@46
|
355 float sumProb = 0;
|
matthiasm@46
|
356 float pitch = 0;
|
matthiasm@46
|
357 float prob = 0;
|
matthiasm@46
|
358 for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb) {
|
matthiasm@46
|
359 pitch = m_pitchProb[iFrame][iProb].first;
|
matthiasm@46
|
360 // std::cerr << pitch << " " << m_pitchProb[iFrame][iProb].second << std::endl;
|
matthiasm@48
|
361 prob = m_pitchProb[iFrame][iProb].second * boost::math::pdf(normalDist, pitch-centrePitch) / maxNormalDist * 2;
|
matthiasm@46
|
362 sumProb += prob;
|
matthiasm@46
|
363 tempPitchProb[iFrame].push_back(pair<double,double>(pitch,prob));
|
matthiasm@46
|
364 // std::cerr << m_timestamp[iFrame] << " " << iCandidate << " " << centrePitch << " " << pitch << " " << prob << std::endl;
|
matthiasm@46
|
365 }
|
matthiasm@46
|
366 for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb) {
|
matthiasm@46
|
367 tempPitchProb[iFrame][iProb].second /= sumProb;
|
matthiasm@46
|
368 }
|
matthiasm@46
|
369 }
|
matthiasm@46
|
370 vector<float> mpOut = mp.process(tempPitchProb);
|
matthiasm@44
|
371 float prevFreq = 0;
|
matthiasm@32
|
372 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
matthiasm@32
|
373 {
|
matthiasm@32
|
374 if (mpOut[iFrame] > 0) {
|
matthiasm@46
|
375 // if (prevFreq>0 && fabs(log2(mpOut[iFrame]/prevFreq)) > 0.1) {
|
matthiasm@46
|
376 // for (int jFrame = iFrame; jFrame != -1; --jFrame) {
|
matthiasm@46
|
377 // // hack: setting all freqs to 0 -- will be eliminated later
|
matthiasm@46
|
378 // pitchTracks[iCandidate][jFrame] = 0;
|
matthiasm@46
|
379 // }
|
matthiasm@46
|
380 // break;
|
matthiasm@46
|
381 // }
|
matthiasm@32
|
382 pitchTracks[iCandidate][iFrame] = mpOut[iFrame];
|
matthiasm@32
|
383 freqSum[iCandidate] += mpOut[iFrame];
|
matthiasm@32
|
384 freqNumber[iCandidate]++;
|
matthiasm@44
|
385 prevFreq = mpOut[iFrame];
|
matthiasm@32
|
386 }
|
matthiasm@32
|
387 }
|
matthiasm@32
|
388 freqMean[iCandidate] = freqSum[iCandidate]*1.0/freqNumber[iCandidate];
|
matthiasm@32
|
389 }
|
matthiasm@32
|
390
|
matthiasm@37
|
391 // find near duplicate pitch tracks
|
matthiasm@34
|
392 vector<size_t> duplicates;
|
matthiasm@34
|
393 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) {
|
matthiasm@34
|
394 for (size_t jCandidate = iCandidate+1; jCandidate < m_nCandidate; ++jCandidate) {
|
matthiasm@34
|
395 size_t countEqual = 0;
|
matthiasm@34
|
396 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
matthiasm@34
|
397 {
|
matthiasm@46
|
398 if ((pitchTracks[jCandidate][iFrame] == 0 && pitchTracks[iCandidate][iFrame] == 0) ||
|
matthiasm@46
|
399 fabs(pitchTracks[iCandidate][iFrame]/pitchTracks[jCandidate][iFrame]-1)<0.01)
|
matthiasm@34
|
400 countEqual++;
|
matthiasm@34
|
401 }
|
matthiasm@46
|
402 // std::cerr << "proportion equal: " << (countEqual * 1.0 / nFrame) << std::endl;
|
matthiasm@34
|
403 if (countEqual * 1.0 / nFrame > 0.8) {
|
matthiasm@34
|
404 if (freqNumber[iCandidate] > freqNumber[jCandidate]) {
|
matthiasm@34
|
405 duplicates.push_back(jCandidate);
|
matthiasm@46
|
406 } else if (iCandidate < jCandidate) {
|
matthiasm@34
|
407 duplicates.push_back(iCandidate);
|
matthiasm@34
|
408 }
|
matthiasm@34
|
409 }
|
matthiasm@34
|
410 }
|
matthiasm@34
|
411 }
|
matthiasm@34
|
412
|
matthiasm@46
|
413 // std::cerr << "n duplicate: " << duplicates.size() << std::endl;
|
matthiasm@46
|
414 for (size_t iDup = 0; iDup < duplicates.size(); ++ iDup) {
|
matthiasm@46
|
415 // std::cerr << "duplicate: " << iDup << std::endl;
|
matthiasm@46
|
416 }
|
matthiasm@46
|
417
|
matthiasm@37
|
418 // now find non-duplicate pitch tracks
|
Chris@39
|
419 map<int, int> candidateActuals;
|
Chris@39
|
420 map<int, std::string> candidateLabels;
|
Chris@39
|
421
|
matthiasm@46
|
422 vector<vector<float> > outputFrequencies;
|
matthiasm@46
|
423 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) outputFrequencies.push_back(vector<float>(0));
|
matthiasm@46
|
424
|
matthiasm@32
|
425 int actualCandidateNumber = 0;
|
matthiasm@32
|
426 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) {
|
matthiasm@34
|
427 bool isDuplicate = false;
|
matthiasm@34
|
428 for (size_t i = 0; i < duplicates.size(); ++i) {
|
matthiasm@37
|
429 // std::cerr << duplicates[i] << std::endl;
|
matthiasm@34
|
430 if (duplicates[i] == iCandidate) {
|
matthiasm@34
|
431 isDuplicate = true;
|
matthiasm@34
|
432 break;
|
matthiasm@34
|
433 }
|
matthiasm@34
|
434 }
|
matthiasm@46
|
435 if (!isDuplicate && freqNumber[iCandidate] > 0.5*nFrame)
|
matthiasm@32
|
436 {
|
matthiasm@32
|
437 std::ostringstream convert;
|
matthiasm@32
|
438 convert << actualCandidateNumber++;
|
Chris@39
|
439 candidateLabels[iCandidate] = convert.str();
|
Chris@39
|
440 candidateActuals[iCandidate] = actualCandidateNumber;
|
matthiasm@46
|
441 // std::cerr << iCandidate << " " << actualCandidateNumber << " " << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl;
|
matthiasm@32
|
442 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
matthiasm@32
|
443 {
|
matthiasm@32
|
444 if (pitchTracks[iCandidate][iFrame] > 0)
|
matthiasm@32
|
445 {
|
matthiasm@46
|
446 // featureValues[m_timestamp[iFrame]][iCandidate] =
|
matthiasm@46
|
447 // pitchTracks[iCandidate][iFrame];
|
matthiasm@46
|
448 outputFrequencies[iFrame].push_back(pitchTracks[iCandidate][iFrame]);
|
matthiasm@32
|
449 }
|
matthiasm@32
|
450 }
|
matthiasm@32
|
451 }
|
matthiasm@43
|
452 // fs[m_oPitchTrackCandidates].push_back(f);
|
matthiasm@32
|
453 }
|
matthiasm@32
|
454
|
Chris@39
|
455 // adapt our features so as to return a stack of candidate values
|
Chris@39
|
456 // per frame
|
Chris@39
|
457
|
Chris@39
|
458 FeatureSet fs;
|
Chris@39
|
459
|
matthiasm@46
|
460 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame){
|
Chris@39
|
461 Feature f;
|
Chris@39
|
462 f.hasTimestamp = true;
|
matthiasm@46
|
463 f.timestamp = m_timestamp[iFrame];
|
matthiasm@46
|
464 f.values = outputFrequencies[iFrame];
|
Chris@39
|
465 fs[0].push_back(f);
|
Chris@39
|
466 }
|
matthiasm@46
|
467
|
matthiasm@46
|
468 // I stopped using Chris's map stuff below because I couldn't get my head around it
|
matthiasm@46
|
469 //
|
matthiasm@46
|
470 // for (map<RealTime, map<int, float> >::const_iterator i =
|
matthiasm@46
|
471 // featureValues.begin(); i != featureValues.end(); ++i) {
|
matthiasm@46
|
472 // Feature f;
|
matthiasm@46
|
473 // f.hasTimestamp = true;
|
matthiasm@46
|
474 // f.timestamp = i->first;
|
matthiasm@46
|
475 // int nextCandidate = candidateActuals.begin()->second;
|
matthiasm@46
|
476 // for (map<int, float>::const_iterator j =
|
matthiasm@46
|
477 // i->second.begin(); j != i->second.end(); ++j) {
|
matthiasm@46
|
478 // while (candidateActuals[j->first] > nextCandidate) {
|
matthiasm@46
|
479 // f.values.push_back(0);
|
matthiasm@46
|
480 // ++nextCandidate;
|
matthiasm@46
|
481 // }
|
matthiasm@46
|
482 // f.values.push_back(j->second);
|
matthiasm@46
|
483 // nextCandidate = j->first + 1;
|
matthiasm@46
|
484 // }
|
matthiasm@46
|
485 // //!!! can't use labels?
|
matthiasm@46
|
486 // fs[0].push_back(f);
|
matthiasm@46
|
487 // }
|
matthiasm@32
|
488
|
matthiasm@32
|
489 return fs;
|
matthiasm@32
|
490 }
|