Chris@47
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@47
|
2
|
Chris@47
|
3 /*
|
Chris@47
|
4 Centre for Digital Music, Queen Mary University of London.
|
Chris@47
|
5
|
Chris@47
|
6 This program is free software; you can redistribute it and/or
|
Chris@47
|
7 modify it under the terms of the GNU General Public License as
|
Chris@47
|
8 published by the Free Software Foundation; either version 2 of the
|
Chris@47
|
9 License, or (at your option) any later version. See the file
|
Chris@47
|
10 COPYING included with this distribution for more information.
|
Chris@47
|
11 */
|
Chris@47
|
12
|
Chris@55
|
13 #include "TuningDifference.h"
|
Chris@47
|
14
|
Chris@47
|
15 #include <iostream>
|
Chris@47
|
16
|
Chris@47
|
17 #include <cmath>
|
Chris@47
|
18 #include <cstdio>
|
Chris@47
|
19 #include <climits>
|
Chris@47
|
20
|
Chris@47
|
21 #include <algorithm>
|
Chris@47
|
22 #include <numeric>
|
Chris@47
|
23
|
Chris@47
|
24 using namespace std;
|
Chris@47
|
25
|
Chris@47
|
26 static double pitchToFrequency(int pitch,
|
Chris@47
|
27 double centsOffset = 0.,
|
Chris@47
|
28 double concertA = 440.)
|
Chris@47
|
29 {
|
Chris@47
|
30 double p = double(pitch) + (centsOffset / 100.);
|
Chris@47
|
31 return concertA * pow(2.0, (p - 69.0) / 12.0);
|
Chris@47
|
32 }
|
Chris@47
|
33
|
Chris@47
|
34 static double frequencyForCentsAbove440(double cents)
|
Chris@47
|
35 {
|
Chris@47
|
36 return pitchToFrequency(69, cents, 440.);
|
Chris@47
|
37 }
|
Chris@47
|
38
|
Chris@47
|
39 static float defaultMaxDuration = 0.f;
|
Chris@50
|
40 static int defaultMaxSemis = 5;
|
Chris@47
|
41 static bool defaultFineTuning = true;
|
Chris@47
|
42
|
Chris@55
|
43 TuningDifference::TuningDifference(float inputSampleRate) :
|
Chris@47
|
44 Plugin(inputSampleRate),
|
Chris@47
|
45 m_channelCount(0),
|
Chris@47
|
46 m_bpo(120),
|
Chris@47
|
47 m_blockSize(0),
|
Chris@47
|
48 m_frameCount(0),
|
Chris@47
|
49 m_maxDuration(defaultMaxDuration),
|
Chris@47
|
50 m_maxSemis(defaultMaxSemis),
|
Chris@47
|
51 m_fineTuning(defaultFineTuning)
|
Chris@47
|
52 {
|
Chris@47
|
53 }
|
Chris@47
|
54
|
Chris@55
|
55 TuningDifference::~TuningDifference()
|
Chris@47
|
56 {
|
Chris@47
|
57 }
|
Chris@47
|
58
|
Chris@47
|
59 string
|
Chris@55
|
60 TuningDifference::getIdentifier() const
|
Chris@47
|
61 {
|
Chris@55
|
62 return "tuning-difference";
|
Chris@47
|
63 }
|
Chris@47
|
64
|
Chris@47
|
65 string
|
Chris@55
|
66 TuningDifference::getName() const
|
Chris@47
|
67 {
|
Chris@55
|
68 return "Tuning Difference";
|
Chris@47
|
69 }
|
Chris@47
|
70
|
Chris@47
|
71 string
|
Chris@55
|
72 TuningDifference::getDescription() const
|
Chris@47
|
73 {
|
Chris@47
|
74 return "Estimate the tuning frequencies of a set of recordings at once, by comparing them to a reference recording of the same music whose tuning frequency is known";
|
Chris@47
|
75 }
|
Chris@47
|
76
|
Chris@47
|
77 string
|
Chris@55
|
78 TuningDifference::getMaker() const
|
Chris@47
|
79 {
|
Chris@47
|
80 return "Chris Cannam";
|
Chris@47
|
81 }
|
Chris@47
|
82
|
Chris@47
|
83 int
|
Chris@55
|
84 TuningDifference::getPluginVersion() const
|
Chris@47
|
85 {
|
Chris@47
|
86 // Increment this each time you release a version that behaves
|
Chris@47
|
87 // differently from the previous one
|
Chris@47
|
88 return 3;
|
Chris@47
|
89 }
|
Chris@47
|
90
|
Chris@47
|
91 string
|
Chris@55
|
92 TuningDifference::getCopyright() const
|
Chris@47
|
93 {
|
Chris@47
|
94 // This function is not ideally named. It does not necessarily
|
Chris@47
|
95 // need to say who made the plugin -- getMaker does that -- but it
|
Chris@47
|
96 // should indicate the terms under which it is distributed. For
|
Chris@47
|
97 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@47
|
98 return "GPL";
|
Chris@47
|
99 }
|
Chris@47
|
100
|
Chris@55
|
101 TuningDifference::InputDomain
|
Chris@55
|
102 TuningDifference::getInputDomain() const
|
Chris@47
|
103 {
|
Chris@47
|
104 return TimeDomain;
|
Chris@47
|
105 }
|
Chris@47
|
106
|
Chris@47
|
107 size_t
|
Chris@55
|
108 TuningDifference::getPreferredBlockSize() const
|
Chris@47
|
109 {
|
Chris@47
|
110 return 0;
|
Chris@47
|
111 }
|
Chris@47
|
112
|
Chris@47
|
113 size_t
|
Chris@55
|
114 TuningDifference::getPreferredStepSize() const
|
Chris@47
|
115 {
|
Chris@47
|
116 return 0;
|
Chris@47
|
117 }
|
Chris@47
|
118
|
Chris@47
|
119 size_t
|
Chris@55
|
120 TuningDifference::getMinChannelCount() const
|
Chris@47
|
121 {
|
Chris@47
|
122 return 2;
|
Chris@47
|
123 }
|
Chris@47
|
124
|
Chris@47
|
125 size_t
|
Chris@55
|
126 TuningDifference::getMaxChannelCount() const
|
Chris@47
|
127 {
|
Chris@52
|
128 return 1000;
|
Chris@47
|
129 }
|
Chris@47
|
130
|
Chris@55
|
131 TuningDifference::ParameterList
|
Chris@55
|
132 TuningDifference::getParameterDescriptors() const
|
Chris@47
|
133 {
|
Chris@47
|
134 ParameterList list;
|
Chris@47
|
135
|
Chris@47
|
136 ParameterDescriptor desc;
|
Chris@47
|
137
|
Chris@47
|
138 desc.identifier = "maxduration";
|
Chris@47
|
139 desc.name = "Maximum duration to analyse";
|
Chris@47
|
140 desc.description = "The maximum duration (in seconds) to consider from either input file, always taken from the start of the input. Zero means there is no limit.";
|
Chris@47
|
141 desc.minValue = 0;
|
Chris@47
|
142 desc.maxValue = 3600;
|
Chris@47
|
143 desc.defaultValue = defaultMaxDuration;
|
Chris@47
|
144 desc.isQuantized = false;
|
Chris@47
|
145 desc.unit = "s";
|
Chris@47
|
146 list.push_back(desc);
|
Chris@47
|
147
|
Chris@47
|
148 desc.identifier = "maxrange";
|
Chris@47
|
149 desc.name = "Maximum range in semitones";
|
Chris@47
|
150 desc.description = "The maximum difference in semitones that will be searched.";
|
Chris@47
|
151 desc.minValue = 1;
|
Chris@47
|
152 desc.maxValue = 11;
|
Chris@47
|
153 desc.defaultValue = defaultMaxSemis;
|
Chris@47
|
154 desc.isQuantized = true;
|
Chris@47
|
155 desc.quantizeStep = 1;
|
Chris@47
|
156 desc.unit = "semitones";
|
Chris@47
|
157 list.push_back(desc);
|
Chris@47
|
158
|
Chris@47
|
159 desc.identifier = "finetuning";
|
Chris@47
|
160 desc.name = "Fine tuning";
|
Chris@47
|
161 desc.description = "Use a fine tuning stage to increase nominal resolution from 10 cents to 1 cent.";
|
Chris@47
|
162 desc.minValue = 0;
|
Chris@47
|
163 desc.maxValue = 1;
|
Chris@47
|
164 desc.defaultValue = (defaultFineTuning ? 1.f : 0.f);
|
Chris@47
|
165 desc.isQuantized = true;
|
Chris@47
|
166 desc.quantizeStep = 1;
|
Chris@47
|
167 desc.unit = "";
|
Chris@47
|
168 list.push_back(desc);
|
Chris@47
|
169
|
Chris@47
|
170 return list;
|
Chris@47
|
171 }
|
Chris@47
|
172
|
Chris@47
|
173 float
|
Chris@55
|
174 TuningDifference::getParameter(string id) const
|
Chris@47
|
175 {
|
Chris@47
|
176 if (id == "maxduration") {
|
Chris@47
|
177 return m_maxDuration;
|
Chris@47
|
178 } else if (id == "maxrange") {
|
Chris@47
|
179 return float(m_maxSemis);
|
Chris@47
|
180 } else if (id == "finetuning") {
|
Chris@47
|
181 return m_fineTuning ? 1.f : 0.f;
|
Chris@47
|
182 }
|
Chris@47
|
183 return 0;
|
Chris@47
|
184 }
|
Chris@47
|
185
|
Chris@47
|
186 void
|
Chris@55
|
187 TuningDifference::setParameter(string id, float value)
|
Chris@47
|
188 {
|
Chris@47
|
189 if (id == "maxduration") {
|
Chris@47
|
190 m_maxDuration = value;
|
Chris@47
|
191 } else if (id == "maxrange") {
|
Chris@47
|
192 m_maxSemis = int(roundf(value));
|
Chris@47
|
193 } else if (id == "finetuning") {
|
Chris@47
|
194 m_fineTuning = (value > 0.5f);
|
Chris@47
|
195 }
|
Chris@47
|
196 }
|
Chris@47
|
197
|
Chris@55
|
198 TuningDifference::ProgramList
|
Chris@55
|
199 TuningDifference::getPrograms() const
|
Chris@47
|
200 {
|
Chris@47
|
201 ProgramList list;
|
Chris@47
|
202 return list;
|
Chris@47
|
203 }
|
Chris@47
|
204
|
Chris@47
|
205 string
|
Chris@55
|
206 TuningDifference::getCurrentProgram() const
|
Chris@47
|
207 {
|
Chris@47
|
208 return ""; // no programs
|
Chris@47
|
209 }
|
Chris@47
|
210
|
Chris@47
|
211 void
|
Chris@55
|
212 TuningDifference::selectProgram(string)
|
Chris@47
|
213 {
|
Chris@47
|
214 }
|
Chris@47
|
215
|
Chris@55
|
216 TuningDifference::OutputList
|
Chris@55
|
217 TuningDifference::getOutputDescriptors() const
|
Chris@47
|
218 {
|
Chris@47
|
219 OutputList list;
|
Chris@47
|
220
|
Chris@47
|
221 OutputDescriptor d;
|
Chris@47
|
222 d.identifier = "cents";
|
Chris@47
|
223 d.name = "Tuning Differences";
|
Chris@47
|
224 d.description = "A single feature vector containing a value for each input channel after the first (reference) channel, containing the difference in averaged frequency profile between that channel and the reference channel, in cents. A positive value means the corresponding channel is higher than the reference.";
|
Chris@47
|
225 d.unit = "cents";
|
Chris@54
|
226 d.hasFixedBinCount = true;
|
Chris@47
|
227 if (m_channelCount > 1) {
|
Chris@47
|
228 d.binCount = m_channelCount - 1;
|
Chris@47
|
229 } else {
|
Chris@54
|
230 d.binCount = 1;
|
Chris@47
|
231 }
|
Chris@47
|
232 d.hasKnownExtents = false;
|
Chris@47
|
233 d.isQuantized = false;
|
Chris@47
|
234 d.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@47
|
235 d.hasDuration = false;
|
Chris@47
|
236 m_outputs[d.identifier] = int(list.size());
|
Chris@47
|
237 list.push_back(d);
|
Chris@47
|
238
|
Chris@47
|
239 d.identifier = "tuningfreq";
|
Chris@47
|
240 d.name = "Relative Tuning Frequencies";
|
Chris@47
|
241 d.description = "A single feature vector containing a value for each input channel after the first (reference) channel, containing the tuning frequency of that channel, if the reference channel is assumed to contain the same music as it at a tuning frequency of A=440Hz.";
|
Chris@47
|
242 d.unit = "hz";
|
Chris@54
|
243 d.hasFixedBinCount = true;
|
Chris@47
|
244 if (m_channelCount > 1) {
|
Chris@47
|
245 d.binCount = m_channelCount - 1;
|
Chris@47
|
246 } else {
|
Chris@54
|
247 d.binCount = 1;
|
Chris@47
|
248 }
|
Chris@47
|
249 d.hasKnownExtents = false;
|
Chris@47
|
250 d.isQuantized = false;
|
Chris@47
|
251 d.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@47
|
252 d.hasDuration = false;
|
Chris@47
|
253 m_outputs[d.identifier] = int(list.size());
|
Chris@47
|
254 list.push_back(d);
|
Chris@47
|
255
|
Chris@47
|
256 d.identifier = "reffeature";
|
Chris@47
|
257 d.name = "Reference Feature";
|
Chris@47
|
258 d.description = "Chroma feature from reference channel.";
|
Chris@47
|
259 d.unit = "";
|
Chris@47
|
260 d.hasFixedBinCount = true;
|
Chris@47
|
261 d.binCount = m_bpo;
|
Chris@47
|
262 d.hasKnownExtents = false;
|
Chris@47
|
263 d.isQuantized = false;
|
Chris@47
|
264 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@47
|
265 d.sampleRate = 1;
|
Chris@47
|
266 d.hasDuration = false;
|
Chris@47
|
267 m_outputs[d.identifier] = int(list.size());
|
Chris@47
|
268 list.push_back(d);
|
Chris@47
|
269
|
Chris@47
|
270 d.identifier = "otherfeature";
|
Chris@47
|
271 d.name = "Other Features";
|
Chris@47
|
272 d.description = "Series of chroma feature vectors from the non-reference audio channels, before rotation.";
|
Chris@47
|
273 d.unit = "";
|
Chris@47
|
274 d.hasFixedBinCount = true;
|
Chris@47
|
275 d.binCount = m_bpo;
|
Chris@47
|
276 d.hasKnownExtents = false;
|
Chris@47
|
277 d.isQuantized = false;
|
Chris@47
|
278 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@47
|
279 d.sampleRate = 1;
|
Chris@47
|
280 d.hasDuration = false;
|
Chris@47
|
281 m_outputs[d.identifier] = int(list.size());
|
Chris@47
|
282 list.push_back(d);
|
Chris@47
|
283
|
Chris@47
|
284 d.identifier = "rotfeature";
|
Chris@47
|
285 d.name = "Other Features at Rotated Frequency";
|
Chris@48
|
286 d.description = "Series of chroma feature vectors from the non-reference audio channels, calculated with the tuning frequency obtained from rotation matching. Note that this does not take into account any fine tuning, only the basic rotation match.";
|
Chris@47
|
287 d.unit = "";
|
Chris@47
|
288 d.hasFixedBinCount = true;
|
Chris@47
|
289 d.binCount = m_bpo;
|
Chris@47
|
290 d.hasKnownExtents = false;
|
Chris@47
|
291 d.isQuantized = false;
|
Chris@47
|
292 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@47
|
293 d.sampleRate = 1;
|
Chris@47
|
294 d.hasDuration = false;
|
Chris@47
|
295 m_outputs[d.identifier] = int(list.size());
|
Chris@47
|
296 list.push_back(d);
|
Chris@47
|
297
|
Chris@47
|
298 return list;
|
Chris@47
|
299 }
|
Chris@47
|
300
|
Chris@47
|
301 bool
|
Chris@55
|
302 TuningDifference::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@47
|
303 {
|
Chris@47
|
304 if (channels < getMinChannelCount()) return false;
|
Chris@47
|
305 if (stepSize != blockSize) return false;
|
Chris@47
|
306 if (m_blockSize > INT_MAX) return false;
|
Chris@47
|
307
|
Chris@50
|
308 m_channelCount = int(channels);
|
Chris@47
|
309 m_blockSize = int(blockSize);
|
Chris@47
|
310
|
Chris@47
|
311 reset();
|
Chris@47
|
312
|
Chris@47
|
313 return true;
|
Chris@47
|
314 }
|
Chris@47
|
315
|
Chris@47
|
316 void
|
Chris@55
|
317 TuningDifference::reset()
|
Chris@47
|
318 {
|
Chris@50
|
319 Chromagram::Parameters params(paramsForTuningFrequency(440.));
|
Chris@50
|
320 m_reference.clear();
|
Chris@50
|
321 m_refChroma.reset(new Chromagram(params));
|
Chris@50
|
322 m_refTotals = TFeature(m_bpo, 0.0);
|
Chris@50
|
323 m_refFeatures.clear();
|
Chris@50
|
324 m_otherChroma.clear();
|
Chris@50
|
325 for (int i = 1; i < m_channelCount; ++i) {
|
Chris@50
|
326 m_otherChroma.push_back(std::make_shared<Chromagram>(params));
|
Chris@47
|
327 }
|
Chris@50
|
328 m_otherTotals = vector<TFeature>(m_channelCount-1, TFeature(m_bpo, 0.0));
|
Chris@50
|
329 m_frameCount = 0;
|
Chris@47
|
330 }
|
Chris@47
|
331
|
Chris@47
|
332 template<typename T>
|
Chris@47
|
333 void addTo(vector<T> &a, const vector<T> &b)
|
Chris@47
|
334 {
|
Chris@69
|
335 int n = int(b.size());
|
Chris@69
|
336
|
Chris@69
|
337 for (int i = 0; i < n; ++i) {
|
Chris@69
|
338 int j = (i == 0 ? n-1 : i-1);
|
Chris@69
|
339 T diff = b[i] - b[j];
|
Chris@69
|
340 a[i] += diff;
|
Chris@69
|
341 }
|
Chris@47
|
342 }
|
Chris@47
|
343
|
Chris@47
|
344 template<typename T>
|
Chris@47
|
345 T distance(const vector<T> &a, const vector<T> &b)
|
Chris@47
|
346 {
|
Chris@47
|
347 return inner_product(a.begin(), a.end(), b.begin(), T(),
|
Chris@47
|
348 plus<T>(), [](T x, T y) { return fabs(x - y); });
|
Chris@47
|
349 }
|
Chris@47
|
350
|
Chris@55
|
351 TuningDifference::TFeature
|
Chris@55
|
352 TuningDifference::computeFeatureFromTotals(const TFeature &totals) const
|
Chris@47
|
353 {
|
Chris@47
|
354 if (m_frameCount == 0) return totals;
|
Chris@47
|
355
|
Chris@47
|
356 TFeature feature(m_bpo);
|
Chris@69
|
357 double max = 0.0;
|
Chris@47
|
358
|
Chris@47
|
359 for (int i = 0; i < m_bpo; ++i) {
|
Chris@47
|
360 double value = totals[i] / m_frameCount;
|
Chris@69
|
361 feature[i] = value;
|
Chris@69
|
362 if (fabs(value) > max) {
|
Chris@69
|
363 max = fabs(value);
|
Chris@69
|
364 }
|
Chris@47
|
365 }
|
Chris@47
|
366
|
Chris@69
|
367 if (max > 0.0) {
|
Chris@50
|
368 for (int i = 0; i < m_bpo; ++i) {
|
Chris@69
|
369 feature[i] /= max;
|
Chris@50
|
370 }
|
Chris@47
|
371 }
|
Chris@47
|
372
|
Chris@47
|
373 return feature;
|
Chris@47
|
374 }
|
Chris@47
|
375
|
Chris@47
|
376 Chromagram::Parameters
|
Chris@55
|
377 TuningDifference::paramsForTuningFrequency(double hz) const
|
Chris@47
|
378 {
|
Chris@47
|
379 Chromagram::Parameters params(m_inputSampleRate);
|
Chris@47
|
380 params.lowestOctave = 2;
|
Chris@47
|
381 params.octaveCount = 4;
|
Chris@47
|
382 params.binsPerOctave = m_bpo;
|
Chris@47
|
383 params.tuningFrequency = hz;
|
Chris@47
|
384 params.atomHopFactor = 0.5;
|
Chris@47
|
385 params.window = CQParameters::Hann;
|
Chris@47
|
386 return params;
|
Chris@47
|
387 }
|
Chris@47
|
388
|
Chris@55
|
389 TuningDifference::TFeature
|
Chris@55
|
390 TuningDifference::computeFeatureFromSignal(const Signal &signal,
|
Chris@55
|
391 double hz) const
|
Chris@47
|
392 {
|
Chris@47
|
393 Chromagram chromagram(paramsForTuningFrequency(hz));
|
Chris@47
|
394
|
Chris@47
|
395 TFeature totals(m_bpo, 0.0);
|
Chris@47
|
396
|
Chris@47
|
397 cerr << "computeFeatureFromSignal: hz = " << hz << ", frame count = " << m_frameCount << endl;
|
Chris@47
|
398
|
Chris@47
|
399 for (int i = 0; i < m_frameCount; ++i) {
|
Chris@47
|
400 Signal::const_iterator first = signal.begin() + i * m_blockSize;
|
Chris@47
|
401 Signal::const_iterator last = first + m_blockSize;
|
Chris@47
|
402 if (last > signal.end()) last = signal.end();
|
Chris@47
|
403 CQBase::RealSequence input(first, last);
|
Chris@47
|
404 input.resize(m_blockSize);
|
Chris@47
|
405 CQBase::RealBlock block = chromagram.process(input);
|
Chris@47
|
406 for (const auto &v: block) addTo(totals, v);
|
Chris@47
|
407 }
|
Chris@47
|
408
|
Chris@47
|
409 return computeFeatureFromTotals(totals);
|
Chris@47
|
410 }
|
Chris@47
|
411
|
Chris@55
|
412 TuningDifference::FeatureSet
|
Chris@55
|
413 TuningDifference::process(const float *const *inputBuffers, Vamp::RealTime)
|
Chris@47
|
414 {
|
Chris@47
|
415 if (m_maxDuration > 0) {
|
Chris@47
|
416 int maxFrames = int((m_maxDuration * m_inputSampleRate) /
|
Chris@47
|
417 float(m_blockSize));
|
Chris@47
|
418 if (m_frameCount > maxFrames) return FeatureSet();
|
Chris@47
|
419 }
|
Chris@50
|
420
|
Chris@47
|
421 CQBase::RealBlock block;
|
Chris@47
|
422 CQBase::RealSequence input;
|
Chris@47
|
423
|
Chris@47
|
424 input = CQBase::RealSequence
|
Chris@47
|
425 (inputBuffers[0], inputBuffers[0] + m_blockSize);
|
Chris@47
|
426 block = m_refChroma->process(input);
|
Chris@47
|
427 for (const auto &v: block) addTo(m_refTotals, v);
|
Chris@47
|
428
|
Chris@50
|
429 if (m_fineTuning) {
|
Chris@50
|
430 m_reference.insert(m_reference.end(),
|
Chris@50
|
431 inputBuffers[0],
|
Chris@50
|
432 inputBuffers[0] + m_blockSize);
|
Chris@50
|
433 }
|
Chris@50
|
434
|
Chris@47
|
435 for (int c = 1; c < m_channelCount; ++c) {
|
Chris@50
|
436 input = CQBase::RealSequence
|
Chris@50
|
437 (inputBuffers[c], inputBuffers[c] + m_blockSize);
|
Chris@50
|
438 block = m_otherChroma[c-1]->process(input);
|
Chris@50
|
439 for (const auto &v: block) addTo(m_otherTotals[c-1], v);
|
Chris@47
|
440 }
|
Chris@47
|
441
|
Chris@47
|
442 ++m_frameCount;
|
Chris@47
|
443 return FeatureSet();
|
Chris@47
|
444 }
|
Chris@47
|
445
|
Chris@55
|
446 TuningDifference::FeatureSet
|
Chris@55
|
447 TuningDifference::getRemainingFeatures()
|
Chris@47
|
448 {
|
Chris@47
|
449 FeatureSet fs;
|
Chris@47
|
450 if (m_frameCount == 0) return fs;
|
Chris@47
|
451
|
Chris@50
|
452 m_refFeatures[0] = computeFeatureFromTotals(m_refTotals);
|
Chris@47
|
453
|
Chris@47
|
454 Feature f;
|
Chris@47
|
455 f.hasTimestamp = true;
|
Chris@47
|
456 f.timestamp = Vamp::RealTime::zeroTime;
|
Chris@47
|
457 f.values.clear();
|
Chris@47
|
458 fs[m_outputs["cents"]].push_back(f);
|
Chris@47
|
459 fs[m_outputs["tuningfreq"]].push_back(f);
|
Chris@47
|
460
|
Chris@47
|
461 for (int c = 1; c < m_channelCount; ++c) {
|
Chris@47
|
462 getRemainingFeaturesForChannel(c, fs);
|
Chris@47
|
463 }
|
Chris@47
|
464
|
Chris@47
|
465 return fs;
|
Chris@47
|
466 }
|
Chris@47
|
467
|
Chris@47
|
468 void
|
Chris@55
|
469 TuningDifference::getRemainingFeaturesForChannel(int channel,
|
Chris@55
|
470 FeatureSet &fs)
|
Chris@47
|
471 {
|
Chris@50
|
472 TFeature otherFeature =
|
Chris@50
|
473 computeFeatureFromTotals(m_otherTotals[channel-1]);
|
Chris@47
|
474
|
Chris@47
|
475 Feature f;
|
Chris@47
|
476 f.hasTimestamp = true;
|
Chris@47
|
477 f.timestamp = Vamp::RealTime::zeroTime;
|
Chris@47
|
478
|
Chris@47
|
479 f.values.clear();
|
Chris@50
|
480 for (auto v: m_refFeatures[0]) f.values.push_back(float(v));
|
Chris@47
|
481 fs[m_outputs["reffeature"]].push_back(f);
|
Chris@47
|
482
|
Chris@47
|
483 f.values.clear();
|
Chris@47
|
484 for (auto v: otherFeature) f.values.push_back(float(v));
|
Chris@47
|
485 fs[m_outputs["otherfeature"]].push_back(f);
|
Chris@47
|
486
|
Chris@50
|
487 int rotation = findBestRotation(m_refFeatures[0], otherFeature);
|
Chris@47
|
488
|
Chris@47
|
489 int coarseCents = -(rotation * 1200) / m_bpo;
|
Chris@47
|
490
|
Chris@47
|
491 cerr << "channel " << channel << ": rotation " << rotation << " -> cents " << coarseCents << endl;
|
Chris@47
|
492
|
Chris@50
|
493 TFeature rotatedFeature = otherFeature;
|
Chris@47
|
494 if (rotation != 0) {
|
Chris@50
|
495 rotateFeature(rotatedFeature, rotation);
|
Chris@47
|
496 }
|
Chris@47
|
497
|
Chris@47
|
498 f.values.clear();
|
Chris@50
|
499 for (auto v: rotatedFeature) f.values.push_back(float(v));
|
Chris@47
|
500 fs[m_outputs["rotfeature"]].push_back(f);
|
Chris@47
|
501
|
Chris@48
|
502 if (m_fineTuning) {
|
Chris@48
|
503
|
Chris@55
|
504 pair<int, double> fine =
|
Chris@55
|
505 findFineFrequency(rotatedFeature, coarseCents);
|
Chris@50
|
506
|
Chris@48
|
507 int fineCents = fine.first;
|
Chris@48
|
508 double fineHz = fine.second;
|
Chris@47
|
509
|
Chris@48
|
510 fs[m_outputs["cents"]][0].values.push_back(float(fineCents));
|
Chris@48
|
511 fs[m_outputs["tuningfreq"]][0].values.push_back(float(fineHz));
|
Chris@47
|
512
|
Chris@48
|
513 cerr << "channel " << channel << ": overall best Hz = " << fineHz << endl;
|
Chris@48
|
514
|
Chris@48
|
515 } else {
|
Chris@48
|
516
|
Chris@48
|
517 fs[m_outputs["cents"]][0].values.push_back(float(coarseCents));
|
Chris@48
|
518 fs[m_outputs["tuningfreq"]][0].values.push_back
|
Chris@48
|
519 (float(frequencyForCentsAbove440(coarseCents)));
|
Chris@48
|
520 }
|
Chris@47
|
521 }
|
Chris@47
|
522
|
Chris@50
|
523 void
|
Chris@55
|
524 TuningDifference::rotateFeature(TFeature &r, int rotation) const
|
Chris@50
|
525 {
|
Chris@50
|
526 if (rotation < 0) {
|
Chris@50
|
527 rotate(r.begin(), r.begin() - rotation, r.end());
|
Chris@50
|
528 } else {
|
Chris@50
|
529 rotate(r.begin(), r.end() - rotation, r.end());
|
Chris@50
|
530 }
|
Chris@50
|
531 }
|
Chris@50
|
532
|
Chris@50
|
533 double
|
Chris@55
|
534 TuningDifference::featureDistance(const TFeature &ref,
|
Chris@55
|
535 const TFeature &other,
|
Chris@55
|
536 int rotation) const
|
Chris@50
|
537 {
|
Chris@50
|
538 if (rotation == 0) {
|
Chris@50
|
539 return distance(ref, other);
|
Chris@50
|
540 } else {
|
Chris@50
|
541 // A positive rotation pushes the tuning frequency up for this
|
Chris@50
|
542 // chroma, negative one pulls it down. If a positive rotation
|
Chris@50
|
543 // makes this chroma match an un-rotated reference, then this
|
Chris@50
|
544 // chroma must have initially been lower than the reference.
|
Chris@50
|
545 TFeature r(other);
|
Chris@50
|
546 rotateFeature(r, rotation);
|
Chris@50
|
547 return distance(ref, r);
|
Chris@50
|
548 }
|
Chris@50
|
549 }
|
Chris@50
|
550
|
Chris@50
|
551 int
|
Chris@55
|
552 TuningDifference::findBestRotation(const TFeature &ref,
|
Chris@55
|
553 const TFeature &other) const
|
Chris@50
|
554 {
|
Chris@50
|
555 map<double, int> dists;
|
Chris@50
|
556
|
Chris@50
|
557 int maxRotation = (m_bpo * m_maxSemis) / 12;
|
Chris@50
|
558
|
Chris@50
|
559 for (int r = -maxRotation; r <= maxRotation; ++r) {
|
Chris@50
|
560 double dist = featureDistance(ref, other, r);
|
Chris@50
|
561 dists[dist] = r;
|
Chris@50
|
562 }
|
Chris@50
|
563
|
Chris@50
|
564 int best = dists.begin()->second;
|
Chris@50
|
565
|
Chris@50
|
566 return best;
|
Chris@50
|
567 }
|
Chris@50
|
568
|
Chris@50
|
569 pair<int, double>
|
Chris@55
|
570 TuningDifference::findFineFrequency(const TFeature &rotatedOtherFeature,
|
Chris@55
|
571 int coarseCents)
|
Chris@50
|
572 {
|
Chris@50
|
573 int coarseResolution = 1200 / m_bpo;
|
Chris@50
|
574 int searchDistance = coarseResolution/2 - 1;
|
Chris@50
|
575
|
Chris@50
|
576 int bestCents = coarseCents;
|
Chris@50
|
577 double bestHz = frequencyForCentsAbove440(coarseCents);
|
Chris@50
|
578
|
Chris@50
|
579 cerr << "findFineFrequency: coarse frequency is " << bestHz << endl;
|
Chris@50
|
580 cerr << "searchDistance = " << searchDistance << endl;
|
Chris@50
|
581
|
Chris@50
|
582 double bestScore = 0;
|
Chris@50
|
583 bool firstScore = true;
|
Chris@50
|
584
|
Chris@50
|
585 for (int sign = -1; sign <= 1; sign += 2) {
|
Chris@50
|
586 for (int offset = (sign < 0 ? 0 : 1);
|
Chris@50
|
587 offset <= searchDistance;
|
Chris@50
|
588 ++offset) {
|
Chris@50
|
589
|
Chris@50
|
590 int fineCents = coarseCents + sign * offset;
|
Chris@50
|
591 double fineHz = frequencyForCentsAbove440(fineCents);
|
Chris@50
|
592
|
Chris@50
|
593 cerr << "trying with fineCents = " << fineCents << "..." << endl;
|
Chris@50
|
594
|
Chris@50
|
595 // compare the rotated "other" chroma with a reference
|
Chris@50
|
596 // chroma shifted by the offset in the opposite direction
|
Chris@50
|
597
|
Chris@50
|
598 int compensatingCents = -sign * offset;
|
Chris@50
|
599 TFeature compensatedReference;
|
Chris@50
|
600
|
Chris@50
|
601 if (m_refFeatures.find(compensatingCents) == m_refFeatures.end()) {
|
Chris@50
|
602 double compensatingHz = frequencyForCentsAbove440
|
Chris@50
|
603 (compensatingCents);
|
Chris@50
|
604
|
Chris@50
|
605 compensatedReference = computeFeatureFromSignal
|
Chris@50
|
606 (m_reference, compensatingHz);
|
Chris@50
|
607
|
Chris@50
|
608 m_refFeatures[compensatingCents] = compensatedReference;
|
Chris@50
|
609
|
Chris@50
|
610 } else {
|
Chris@50
|
611
|
Chris@50
|
612 compensatedReference = m_refFeatures[compensatingCents];
|
Chris@50
|
613 }
|
Chris@50
|
614
|
Chris@50
|
615 double fineScore = featureDistance(compensatedReference,
|
Chris@50
|
616 rotatedOtherFeature,
|
Chris@50
|
617 0); // we are rotated already
|
Chris@50
|
618
|
Chris@50
|
619 cerr << "fine offset = " << offset << ", cents = " << fineCents
|
Chris@50
|
620 << ", Hz = " << fineHz << ", score " << fineScore
|
Chris@50
|
621 << " (best score so far " << bestScore << ")" << endl;
|
Chris@50
|
622
|
Chris@50
|
623 if ((fineScore < bestScore) || firstScore) {
|
Chris@50
|
624 cerr << "is good!" << endl;
|
Chris@50
|
625 bestScore = fineScore;
|
Chris@50
|
626 bestCents = fineCents;
|
Chris@50
|
627 bestHz = fineHz;
|
Chris@50
|
628 firstScore = false;
|
Chris@50
|
629 } else {
|
Chris@50
|
630 break;
|
Chris@50
|
631 }
|
Chris@50
|
632 }
|
Chris@50
|
633 }
|
Chris@50
|
634
|
Chris@50
|
635 return pair<int, double>(bestCents, bestHz);
|
Chris@50
|
636 }
|
Chris@50
|
637
|