matthiasm@32
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@32
|
2
|
matthiasm@32
|
3 /*
|
matthiasm@32
|
4 pYIN - A fundamental frequency estimator for monophonic audio
|
matthiasm@32
|
5 Centre for Digital Music, Queen Mary, University of London.
|
matthiasm@32
|
6
|
matthiasm@32
|
7 This program is free software; you can redistribute it and/or
|
matthiasm@32
|
8 modify it under the terms of the GNU General Public License as
|
matthiasm@32
|
9 published by the Free Software Foundation; either version 2 of the
|
matthiasm@32
|
10 License, or (at your option) any later version. See the file
|
matthiasm@32
|
11 COLocalCandidatePYING included with this distribution for more information.
|
matthiasm@32
|
12 */
|
matthiasm@32
|
13
|
matthiasm@32
|
14 #include "LocalCandidatePYIN.h"
|
mail@132
|
15 #include "MonoPitchHMM.h"
|
matthiasm@32
|
16 #include "YinUtil.h"
|
matthiasm@32
|
17
|
matthiasm@32
|
18 #include "vamp-sdk/FFT.h"
|
matthiasm@32
|
19
|
matthiasm@32
|
20 #include <vector>
|
matthiasm@32
|
21 #include <algorithm>
|
matthiasm@32
|
22
|
matthiasm@32
|
23 #include <cstdio>
|
matthiasm@32
|
24 #include <sstream>
|
matthiasm@32
|
25 // #include <iostream>
|
matthiasm@32
|
26 #include <cmath>
|
matthiasm@32
|
27 #include <complex>
|
Chris@39
|
28 #include <map>
|
matthiasm@32
|
29
|
matthiasm@46
|
30 #include <boost/math/distributions.hpp>
|
matthiasm@46
|
31
|
matthiasm@32
|
32 using std::string;
|
matthiasm@32
|
33 using std::vector;
|
Chris@39
|
34 using std::map;
|
matthiasm@32
|
35 using Vamp::RealTime;
|
matthiasm@32
|
36
|
matthiasm@32
|
37
|
matthiasm@32
|
38 LocalCandidatePYIN::LocalCandidatePYIN(float inputSampleRate) :
|
matthiasm@32
|
39 Plugin(inputSampleRate),
|
matthiasm@32
|
40 m_channels(0),
|
matthiasm@32
|
41 m_stepSize(256),
|
matthiasm@32
|
42 m_blockSize(2048),
|
matthiasm@32
|
43 m_fmin(40),
|
matthiasm@32
|
44 m_fmax(700),
|
matthiasm@32
|
45 m_oPitchTrackCandidates(0),
|
matthiasm@32
|
46 m_threshDistr(2.0f),
|
matthiasm@32
|
47 m_outputUnvoiced(0.0f),
|
matthiasm@70
|
48 m_preciseTime(0.0f),
|
matthiasm@32
|
49 m_pitchProb(0),
|
matthiasm@32
|
50 m_timestamp(0),
|
Chris@136
|
51 m_nCandidate(13),
|
Chris@136
|
52 m_yinUtil(0)
|
matthiasm@32
|
53 {
|
matthiasm@32
|
54 }
|
matthiasm@32
|
55
|
matthiasm@32
|
56 LocalCandidatePYIN::~LocalCandidatePYIN()
|
matthiasm@32
|
57 {
|
Chris@136
|
58 delete m_yinUtil;
|
matthiasm@32
|
59 }
|
matthiasm@32
|
60
|
matthiasm@32
|
61 string
|
matthiasm@32
|
62 LocalCandidatePYIN::getIdentifier() const
|
matthiasm@32
|
63 {
|
matthiasm@32
|
64 return "localcandidatepyin";
|
matthiasm@32
|
65 }
|
matthiasm@32
|
66
|
matthiasm@32
|
67 string
|
matthiasm@32
|
68 LocalCandidatePYIN::getName() const
|
matthiasm@32
|
69 {
|
matthiasm@32
|
70 return "Local Candidate PYIN";
|
matthiasm@32
|
71 }
|
matthiasm@32
|
72
|
matthiasm@32
|
73 string
|
matthiasm@32
|
74 LocalCandidatePYIN::getDescription() const
|
matthiasm@32
|
75 {
|
matthiasm@32
|
76 return "Monophonic pitch and note tracking based on a probabilistic Yin extension.";
|
matthiasm@32
|
77 }
|
matthiasm@32
|
78
|
matthiasm@32
|
79 string
|
matthiasm@32
|
80 LocalCandidatePYIN::getMaker() const
|
matthiasm@32
|
81 {
|
matthiasm@32
|
82 return "Matthias Mauch";
|
matthiasm@32
|
83 }
|
matthiasm@32
|
84
|
matthiasm@32
|
85 int
|
matthiasm@32
|
86 LocalCandidatePYIN::getPluginVersion() const
|
matthiasm@32
|
87 {
|
matthiasm@32
|
88 // Increment this each time you release a version that behaves
|
matthiasm@32
|
89 // differently from the previous one
|
Chris@143
|
90 return 3;
|
matthiasm@32
|
91 }
|
matthiasm@32
|
92
|
matthiasm@32
|
93 string
|
matthiasm@32
|
94 LocalCandidatePYIN::getCopyright() const
|
matthiasm@32
|
95 {
|
matthiasm@32
|
96 return "GPL";
|
matthiasm@32
|
97 }
|
matthiasm@32
|
98
|
matthiasm@32
|
99 LocalCandidatePYIN::InputDomain
|
matthiasm@32
|
100 LocalCandidatePYIN::getInputDomain() const
|
matthiasm@32
|
101 {
|
matthiasm@32
|
102 return TimeDomain;
|
matthiasm@32
|
103 }
|
matthiasm@32
|
104
|
matthiasm@32
|
105 size_t
|
matthiasm@32
|
106 LocalCandidatePYIN::getPreferredBlockSize() const
|
matthiasm@32
|
107 {
|
matthiasm@32
|
108 return 2048;
|
matthiasm@32
|
109 }
|
matthiasm@32
|
110
|
matthiasm@32
|
111 size_t
|
matthiasm@32
|
112 LocalCandidatePYIN::getPreferredStepSize() const
|
matthiasm@32
|
113 {
|
matthiasm@32
|
114 return 256;
|
matthiasm@32
|
115 }
|
matthiasm@32
|
116
|
matthiasm@32
|
117 size_t
|
matthiasm@32
|
118 LocalCandidatePYIN::getMinChannelCount() const
|
matthiasm@32
|
119 {
|
matthiasm@32
|
120 return 1;
|
matthiasm@32
|
121 }
|
matthiasm@32
|
122
|
matthiasm@32
|
123 size_t
|
matthiasm@32
|
124 LocalCandidatePYIN::getMaxChannelCount() const
|
matthiasm@32
|
125 {
|
matthiasm@32
|
126 return 1;
|
matthiasm@32
|
127 }
|
matthiasm@32
|
128
|
matthiasm@32
|
129 LocalCandidatePYIN::ParameterList
|
matthiasm@32
|
130 LocalCandidatePYIN::getParameterDescriptors() const
|
matthiasm@32
|
131 {
|
matthiasm@32
|
132 ParameterList list;
|
matthiasm@32
|
133
|
matthiasm@32
|
134 ParameterDescriptor d;
|
matthiasm@32
|
135
|
matthiasm@32
|
136 d.identifier = "threshdistr";
|
matthiasm@32
|
137 d.name = "Yin threshold distribution";
|
matthiasm@32
|
138 d.description = ".";
|
matthiasm@32
|
139 d.unit = "";
|
matthiasm@32
|
140 d.minValue = 0.0f;
|
matthiasm@32
|
141 d.maxValue = 7.0f;
|
matthiasm@32
|
142 d.defaultValue = 2.0f;
|
matthiasm@32
|
143 d.isQuantized = true;
|
matthiasm@32
|
144 d.quantizeStep = 1.0f;
|
matthiasm@32
|
145 d.valueNames.push_back("Uniform");
|
matthiasm@32
|
146 d.valueNames.push_back("Beta (mean 0.10)");
|
matthiasm@32
|
147 d.valueNames.push_back("Beta (mean 0.15)");
|
matthiasm@32
|
148 d.valueNames.push_back("Beta (mean 0.20)");
|
matthiasm@32
|
149 d.valueNames.push_back("Beta (mean 0.30)");
|
matthiasm@32
|
150 d.valueNames.push_back("Single Value 0.10");
|
matthiasm@32
|
151 d.valueNames.push_back("Single Value 0.15");
|
matthiasm@32
|
152 d.valueNames.push_back("Single Value 0.20");
|
matthiasm@32
|
153 list.push_back(d);
|
matthiasm@32
|
154
|
matthiasm@32
|
155 d.identifier = "outputunvoiced";
|
matthiasm@32
|
156 d.valueNames.clear();
|
matthiasm@32
|
157 d.name = "Output estimates classified as unvoiced?";
|
matthiasm@32
|
158 d.description = ".";
|
matthiasm@32
|
159 d.unit = "";
|
matthiasm@32
|
160 d.minValue = 0.0f;
|
matthiasm@32
|
161 d.maxValue = 2.0f;
|
matthiasm@32
|
162 d.defaultValue = 0.0f;
|
matthiasm@32
|
163 d.isQuantized = true;
|
matthiasm@32
|
164 d.quantizeStep = 1.0f;
|
matthiasm@32
|
165 d.valueNames.push_back("No");
|
matthiasm@32
|
166 d.valueNames.push_back("Yes");
|
matthiasm@32
|
167 d.valueNames.push_back("Yes, as negative frequencies");
|
matthiasm@32
|
168 list.push_back(d);
|
matthiasm@32
|
169
|
matthiasm@70
|
170 d.identifier = "precisetime";
|
matthiasm@70
|
171 d.valueNames.clear();
|
matthiasm@70
|
172 d.name = "Use non-standard precise YIN timing (slow).";
|
matthiasm@70
|
173 d.description = ".";
|
matthiasm@70
|
174 d.unit = "";
|
matthiasm@70
|
175 d.minValue = 0.0f;
|
matthiasm@70
|
176 d.maxValue = 1.0f;
|
matthiasm@70
|
177 d.defaultValue = 0.0f;
|
matthiasm@70
|
178 d.isQuantized = true;
|
matthiasm@70
|
179 d.quantizeStep = 1.0f;
|
matthiasm@70
|
180 list.push_back(d);
|
matthiasm@70
|
181
|
matthiasm@32
|
182 return list;
|
matthiasm@32
|
183 }
|
matthiasm@32
|
184
|
matthiasm@32
|
185 float
|
matthiasm@32
|
186 LocalCandidatePYIN::getParameter(string identifier) const
|
matthiasm@32
|
187 {
|
matthiasm@32
|
188 if (identifier == "threshdistr") {
|
matthiasm@32
|
189 return m_threshDistr;
|
matthiasm@32
|
190 }
|
matthiasm@32
|
191 if (identifier == "outputunvoiced") {
|
matthiasm@32
|
192 return m_outputUnvoiced;
|
matthiasm@32
|
193 }
|
matthiasm@70
|
194 if (identifier == "precisetime") {
|
matthiasm@70
|
195 return m_preciseTime;
|
matthiasm@70
|
196 }
|
matthiasm@32
|
197 return 0.f;
|
matthiasm@32
|
198 }
|
matthiasm@32
|
199
|
matthiasm@32
|
200 void
|
matthiasm@32
|
201 LocalCandidatePYIN::setParameter(string identifier, float value)
|
matthiasm@32
|
202 {
|
matthiasm@32
|
203 if (identifier == "threshdistr")
|
matthiasm@32
|
204 {
|
matthiasm@32
|
205 m_threshDistr = value;
|
matthiasm@32
|
206 }
|
matthiasm@32
|
207 if (identifier == "outputunvoiced")
|
matthiasm@32
|
208 {
|
matthiasm@32
|
209 m_outputUnvoiced = value;
|
matthiasm@32
|
210 }
|
matthiasm@70
|
211 if (identifier == "precisetime")
|
matthiasm@70
|
212 {
|
matthiasm@70
|
213 m_preciseTime = value;
|
matthiasm@70
|
214 }
|
matthiasm@32
|
215 }
|
matthiasm@32
|
216
|
matthiasm@32
|
217 LocalCandidatePYIN::ProgramList
|
matthiasm@32
|
218 LocalCandidatePYIN::getPrograms() const
|
matthiasm@32
|
219 {
|
matthiasm@32
|
220 ProgramList list;
|
matthiasm@32
|
221 return list;
|
matthiasm@32
|
222 }
|
matthiasm@32
|
223
|
matthiasm@32
|
224 string
|
matthiasm@32
|
225 LocalCandidatePYIN::getCurrentProgram() const
|
matthiasm@32
|
226 {
|
matthiasm@32
|
227 return ""; // no programs
|
matthiasm@32
|
228 }
|
matthiasm@32
|
229
|
matthiasm@32
|
230 void
|
Chris@138
|
231 LocalCandidatePYIN::selectProgram(string)
|
matthiasm@32
|
232 {
|
matthiasm@32
|
233 }
|
matthiasm@32
|
234
|
matthiasm@32
|
235 LocalCandidatePYIN::OutputList
|
matthiasm@32
|
236 LocalCandidatePYIN::getOutputDescriptors() const
|
matthiasm@32
|
237 {
|
matthiasm@32
|
238 OutputList outputs;
|
matthiasm@32
|
239
|
matthiasm@32
|
240 OutputDescriptor d;
|
matthiasm@32
|
241
|
matthiasm@32
|
242 d.identifier = "pitchtrackcandidates";
|
matthiasm@32
|
243 d.name = "Pitch track candidates";
|
matthiasm@32
|
244 d.description = "Multiple candidate pitch tracks.";
|
matthiasm@32
|
245 d.unit = "Hz";
|
matthiasm@32
|
246 d.hasFixedBinCount = false;
|
matthiasm@32
|
247 d.hasKnownExtents = true;
|
matthiasm@32
|
248 d.minValue = m_fmin;
|
Chris@39
|
249 d.maxValue = 500; //!!!???
|
matthiasm@32
|
250 d.isQuantized = false;
|
matthiasm@32
|
251 d.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@32
|
252 d.sampleRate = (m_inputSampleRate / m_stepSize);
|
matthiasm@32
|
253 d.hasDuration = false;
|
matthiasm@32
|
254 outputs.push_back(d);
|
matthiasm@32
|
255
|
matthiasm@32
|
256 return outputs;
|
matthiasm@32
|
257 }
|
matthiasm@32
|
258
|
matthiasm@32
|
259 bool
|
matthiasm@32
|
260 LocalCandidatePYIN::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@32
|
261 {
|
matthiasm@32
|
262 if (channels < getMinChannelCount() ||
|
matthiasm@32
|
263 channels > getMaxChannelCount()) return false;
|
matthiasm@32
|
264
|
matthiasm@32
|
265 /*
|
matthiasm@32
|
266 std::cerr << "LocalCandidatePYIN::initialise: channels = " << channels
|
matthiasm@32
|
267 << ", stepSize = " << stepSize << ", blockSize = " << blockSize
|
matthiasm@32
|
268 << std::endl;
|
matthiasm@32
|
269 */
|
matthiasm@32
|
270 m_channels = channels;
|
matthiasm@32
|
271 m_stepSize = stepSize;
|
matthiasm@32
|
272 m_blockSize = blockSize;
|
Chris@136
|
273
|
Chris@136
|
274 m_yinUtil = new YinUtil(m_blockSize/2);
|
matthiasm@32
|
275
|
matthiasm@32
|
276 reset();
|
matthiasm@32
|
277
|
matthiasm@32
|
278 return true;
|
matthiasm@32
|
279 }
|
matthiasm@32
|
280
|
matthiasm@32
|
281 void
|
matthiasm@32
|
282 LocalCandidatePYIN::reset()
|
matthiasm@32
|
283 {
|
matthiasm@32
|
284 m_pitchProb.clear();
|
matthiasm@32
|
285 m_timestamp.clear();
|
matthiasm@32
|
286 /*
|
matthiasm@32
|
287 std::cerr << "LocalCandidatePYIN::reset"
|
matthiasm@32
|
288 << ", blockSize = " << m_blockSize
|
matthiasm@32
|
289 << std::endl;
|
matthiasm@32
|
290 */
|
matthiasm@32
|
291 }
|
matthiasm@32
|
292
|
matthiasm@32
|
293 LocalCandidatePYIN::FeatureSet
|
matthiasm@32
|
294 LocalCandidatePYIN::process(const float *const *inputBuffers, RealTime timestamp)
|
matthiasm@32
|
295 {
|
matthiasm@77
|
296 int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
|
matthiasm@77
|
297 timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate));
|
matthiasm@32
|
298
|
matthiasm@32
|
299 double *dInputBuffers = new double[m_blockSize];
|
matthiasm@32
|
300 for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
|
matthiasm@32
|
301
|
matthiasm@32
|
302 size_t yinBufferSize = m_blockSize/2;
|
matthiasm@32
|
303 double* yinBuffer = new double[yinBufferSize];
|
Chris@136
|
304 if (!m_preciseTime) m_yinUtil->fastDifference(dInputBuffers, yinBuffer);
|
Chris@136
|
305 else m_yinUtil->slowDifference(dInputBuffers, yinBuffer);
|
matthiasm@32
|
306
|
matthiasm@32
|
307 delete [] dInputBuffers;
|
matthiasm@32
|
308
|
Chris@136
|
309 m_yinUtil->cumulativeDifference(yinBuffer);
|
matthiasm@32
|
310
|
matthiasm@46
|
311 float minFrequency = 60;
|
matthiasm@46
|
312 float maxFrequency = 900;
|
Chris@136
|
313 vector<double> peakProbability = m_yinUtil->yinProb(yinBuffer,
|
Chris@136
|
314 m_threshDistr,
|
Chris@136
|
315 m_inputSampleRate/maxFrequency,
|
Chris@136
|
316 m_inputSampleRate/minFrequency);
|
matthiasm@46
|
317
|
matthiasm@46
|
318 vector<pair<double, double> > tempPitchProb;
|
matthiasm@46
|
319 for (size_t iBuf = 0; iBuf < yinBufferSize; ++iBuf)
|
matthiasm@32
|
320 {
|
matthiasm@46
|
321 if (peakProbability[iBuf] > 0)
|
matthiasm@32
|
322 {
|
matthiasm@46
|
323 double currentF0 =
|
matthiasm@46
|
324 m_inputSampleRate * (1.0 /
|
Chris@136
|
325 m_yinUtil->parabolicInterpolation(yinBuffer, iBuf));
|
matthiasm@46
|
326 double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69;
|
matthiasm@46
|
327 tempPitchProb.push_back(pair<double, double>(tempPitch, peakProbability[iBuf]));
|
matthiasm@32
|
328 }
|
matthiasm@32
|
329 }
|
matthiasm@46
|
330 m_pitchProb.push_back(tempPitchProb);
|
matthiasm@32
|
331 m_timestamp.push_back(timestamp);
|
matthiasm@32
|
332
|
matthiasm@76
|
333 delete[] yinBuffer;
|
matthiasm@76
|
334
|
Chris@39
|
335 return FeatureSet();
|
matthiasm@32
|
336 }
|
matthiasm@32
|
337
|
matthiasm@32
|
338 LocalCandidatePYIN::FeatureSet
|
matthiasm@32
|
339 LocalCandidatePYIN::getRemainingFeatures()
|
matthiasm@32
|
340 {
|
Chris@39
|
341 // timestamp -> candidate number -> value
|
Chris@39
|
342 map<RealTime, map<int, float> > featureValues;
|
matthiasm@32
|
343
|
matthiasm@37
|
344 // std::cerr << "in remaining features" << std::endl;
|
matthiasm@32
|
345
|
matthiasm@32
|
346 if (m_pitchProb.empty()) {
|
Chris@39
|
347 return FeatureSet();
|
matthiasm@32
|
348 }
|
matthiasm@32
|
349
|
matthiasm@32
|
350 // MONO-PITCH STUFF
|
mail@132
|
351 MonoPitchHMM hmm(0);
|
matthiasm@32
|
352 size_t nFrame = m_timestamp.size();
|
matthiasm@32
|
353 vector<vector<float> > pitchTracks;
|
matthiasm@32
|
354 vector<float> freqSum = vector<float>(m_nCandidate);
|
matthiasm@32
|
355 vector<float> freqNumber = vector<float>(m_nCandidate);
|
matthiasm@32
|
356 vector<float> freqMean = vector<float>(m_nCandidate);
|
matthiasm@44
|
357
|
matthiasm@46
|
358 boost::math::normal normalDist(0, 8); // semitones sd
|
matthiasm@46
|
359 float maxNormalDist = boost::math::pdf(normalDist, 0);
|
matthiasm@46
|
360
|
matthiasm@110
|
361 // Viterbi-decode multiple times with different frequencies emphasised
|
matthiasm@32
|
362 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
|
matthiasm@32
|
363 {
|
matthiasm@32
|
364 pitchTracks.push_back(vector<float>(nFrame));
|
mail@132
|
365 vector<pair<double,double> > tempPitchProb;
|
mail@132
|
366 vector<vector<double> > tempObsProb;
|
matthiasm@46
|
367 float centrePitch = 45 + 3 * iCandidate;
|
matthiasm@109
|
368
|
matthiasm@46
|
369 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) {
|
matthiasm@46
|
370 float sumProb = 0;
|
matthiasm@46
|
371 float pitch = 0;
|
matthiasm@46
|
372 float prob = 0;
|
matthiasm@109
|
373 for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb)
|
matthiasm@109
|
374 {
|
matthiasm@109
|
375 pitch = m_pitchProb[iFrame][iProb].first;
|
matthiasm@109
|
376 prob = m_pitchProb[iFrame][iProb].second *
|
matthiasm@109
|
377 boost::math::pdf(normalDist, pitch-centrePitch) /
|
matthiasm@109
|
378 maxNormalDist * 2;
|
matthiasm@46
|
379 sumProb += prob;
|
mail@132
|
380 tempPitchProb.push_back(
|
matthiasm@109
|
381 pair<double,double>(pitch,prob));
|
matthiasm@46
|
382 }
|
matthiasm@109
|
383 for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb)
|
matthiasm@109
|
384 {
|
mail@132
|
385 tempPitchProb[iProb].second /= sumProb;
|
matthiasm@46
|
386 }
|
mail@132
|
387 tempObsProb.push_back(hmm.calculateObsProb(tempPitchProb));
|
matthiasm@46
|
388 }
|
matthiasm@109
|
389
|
mail@132
|
390 vector<int> rawPitchPath = hmm.decodeViterbi(tempObsProb);
|
mail@132
|
391 vector<float> mpOut;
|
mail@132
|
392
|
mail@132
|
393 for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame)
|
mail@132
|
394 {
|
mail@132
|
395 float freq = hmm.nearestFreq(rawPitchPath[iFrame],
|
Chris@141
|
396 m_pitchProb[iFrame]);
|
mail@132
|
397 mpOut.push_back(freq); // for note processing below
|
mail@132
|
398 }
|
mail@132
|
399
|
Chris@141
|
400 for (size_t iFrame = 0; iFrame < rawPitchPath.size(); ++iFrame)
|
matthiasm@32
|
401 {
|
matthiasm@32
|
402 if (mpOut[iFrame] > 0) {
|
matthiasm@109
|
403
|
matthiasm@32
|
404 pitchTracks[iCandidate][iFrame] = mpOut[iFrame];
|
matthiasm@32
|
405 freqSum[iCandidate] += mpOut[iFrame];
|
matthiasm@32
|
406 freqNumber[iCandidate]++;
|
matthiasm@32
|
407 }
|
matthiasm@32
|
408 }
|
matthiasm@32
|
409 freqMean[iCandidate] = freqSum[iCandidate]*1.0/freqNumber[iCandidate];
|
matthiasm@32
|
410 }
|
matthiasm@32
|
411
|
matthiasm@37
|
412 // find near duplicate pitch tracks
|
matthiasm@34
|
413 vector<size_t> duplicates;
|
matthiasm@34
|
414 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) {
|
matthiasm@34
|
415 for (size_t jCandidate = iCandidate+1; jCandidate < m_nCandidate; ++jCandidate) {
|
matthiasm@34
|
416 size_t countEqual = 0;
|
matthiasm@34
|
417 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
matthiasm@34
|
418 {
|
matthiasm@46
|
419 if ((pitchTracks[jCandidate][iFrame] == 0 && pitchTracks[iCandidate][iFrame] == 0) ||
|
matthiasm@46
|
420 fabs(pitchTracks[iCandidate][iFrame]/pitchTracks[jCandidate][iFrame]-1)<0.01)
|
matthiasm@34
|
421 countEqual++;
|
matthiasm@34
|
422 }
|
matthiasm@46
|
423 // std::cerr << "proportion equal: " << (countEqual * 1.0 / nFrame) << std::endl;
|
matthiasm@34
|
424 if (countEqual * 1.0 / nFrame > 0.8) {
|
matthiasm@34
|
425 if (freqNumber[iCandidate] > freqNumber[jCandidate]) {
|
matthiasm@34
|
426 duplicates.push_back(jCandidate);
|
matthiasm@46
|
427 } else if (iCandidate < jCandidate) {
|
matthiasm@34
|
428 duplicates.push_back(iCandidate);
|
matthiasm@34
|
429 }
|
matthiasm@34
|
430 }
|
matthiasm@34
|
431 }
|
matthiasm@34
|
432 }
|
matthiasm@34
|
433
|
matthiasm@37
|
434 // now find non-duplicate pitch tracks
|
Chris@39
|
435 map<int, int> candidateActuals;
|
Chris@39
|
436 map<int, std::string> candidateLabels;
|
Chris@39
|
437
|
matthiasm@46
|
438 vector<vector<float> > outputFrequencies;
|
matthiasm@60
|
439 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame) outputFrequencies.push_back(vector<float>());
|
matthiasm@46
|
440
|
matthiasm@32
|
441 int actualCandidateNumber = 0;
|
matthiasm@110
|
442 for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
|
matthiasm@110
|
443 {
|
matthiasm@34
|
444 bool isDuplicate = false;
|
matthiasm@34
|
445 for (size_t i = 0; i < duplicates.size(); ++i) {
|
matthiasm@110
|
446
|
matthiasm@34
|
447 if (duplicates[i] == iCandidate) {
|
matthiasm@34
|
448 isDuplicate = true;
|
matthiasm@34
|
449 break;
|
matthiasm@34
|
450 }
|
matthiasm@34
|
451 }
|
matthiasm@46
|
452 if (!isDuplicate && freqNumber[iCandidate] > 0.5*nFrame)
|
matthiasm@32
|
453 {
|
matthiasm@32
|
454 std::ostringstream convert;
|
matthiasm@32
|
455 convert << actualCandidateNumber++;
|
Chris@39
|
456 candidateLabels[iCandidate] = convert.str();
|
Chris@39
|
457 candidateActuals[iCandidate] = actualCandidateNumber;
|
matthiasm@46
|
458 // std::cerr << iCandidate << " " << actualCandidateNumber << " " << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl;
|
matthiasm@32
|
459 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
|
matthiasm@32
|
460 {
|
matthiasm@32
|
461 if (pitchTracks[iCandidate][iFrame] > 0)
|
matthiasm@32
|
462 {
|
matthiasm@46
|
463 // featureValues[m_timestamp[iFrame]][iCandidate] =
|
matthiasm@46
|
464 // pitchTracks[iCandidate][iFrame];
|
matthiasm@46
|
465 outputFrequencies[iFrame].push_back(pitchTracks[iCandidate][iFrame]);
|
matthiasm@60
|
466 } else {
|
matthiasm@60
|
467 outputFrequencies[iFrame].push_back(0);
|
matthiasm@32
|
468 }
|
matthiasm@32
|
469 }
|
matthiasm@32
|
470 }
|
matthiasm@43
|
471 // fs[m_oPitchTrackCandidates].push_back(f);
|
matthiasm@32
|
472 }
|
matthiasm@32
|
473
|
Chris@39
|
474 // adapt our features so as to return a stack of candidate values
|
Chris@39
|
475 // per frame
|
Chris@39
|
476
|
Chris@39
|
477 FeatureSet fs;
|
Chris@39
|
478
|
matthiasm@46
|
479 for (size_t iFrame = 0; iFrame < nFrame; ++iFrame){
|
Chris@39
|
480 Feature f;
|
Chris@39
|
481 f.hasTimestamp = true;
|
matthiasm@46
|
482 f.timestamp = m_timestamp[iFrame];
|
matthiasm@46
|
483 f.values = outputFrequencies[iFrame];
|
Chris@39
|
484 fs[0].push_back(f);
|
Chris@39
|
485 }
|
matthiasm@46
|
486
|
matthiasm@46
|
487 // I stopped using Chris's map stuff below because I couldn't get my head around it
|
matthiasm@46
|
488 //
|
matthiasm@46
|
489 // for (map<RealTime, map<int, float> >::const_iterator i =
|
matthiasm@46
|
490 // featureValues.begin(); i != featureValues.end(); ++i) {
|
matthiasm@46
|
491 // Feature f;
|
matthiasm@46
|
492 // f.hasTimestamp = true;
|
matthiasm@46
|
493 // f.timestamp = i->first;
|
matthiasm@46
|
494 // int nextCandidate = candidateActuals.begin()->second;
|
matthiasm@46
|
495 // for (map<int, float>::const_iterator j =
|
matthiasm@46
|
496 // i->second.begin(); j != i->second.end(); ++j) {
|
matthiasm@46
|
497 // while (candidateActuals[j->first] > nextCandidate) {
|
matthiasm@46
|
498 // f.values.push_back(0);
|
matthiasm@46
|
499 // ++nextCandidate;
|
matthiasm@46
|
500 // }
|
matthiasm@46
|
501 // f.values.push_back(j->second);
|
matthiasm@46
|
502 // nextCandidate = j->first + 1;
|
matthiasm@46
|
503 // }
|
matthiasm@46
|
504 // //!!! can't use labels?
|
matthiasm@46
|
505 // fs[0].push_back(f);
|
matthiasm@46
|
506 // }
|
matthiasm@32
|
507
|
matthiasm@32
|
508 return fs;
|
matthiasm@32
|
509 }
|