Mercurial > hg > tuning-difference
comparison src/TuningDifference.cpp @ 21:d660db57e902
Rearrange code, include subrepo etc
author | Chris Cannam |
---|---|
date | Thu, 05 Feb 2015 10:13:31 +0000 |
parents | chroma-compare-plugin/TuningDifference.cpp@331a520cdadb |
children | 6a75d371938f |
comparison
equal
deleted
inserted
replaced
20:331a520cdadb | 21:d660db57e902 |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 Centre for Digital Music, Queen Mary University of London. | |
5 | |
6 This program is free software; you can redistribute it and/or | |
7 modify it under the terms of the GNU General Public License as | |
8 published by the Free Software Foundation; either version 2 of the | |
9 License, or (at your option) any later version. See the file | |
10 COPYING included with this distribution for more information. | |
11 */ | |
12 | |
13 #include "TuningDifference.h" | |
14 | |
15 #include <iostream> | |
16 | |
17 #include <cmath> | |
18 #include <cstdio> | |
19 | |
20 #include <algorithm> | |
21 | |
22 using namespace std; | |
23 | |
24 static double pitchToFrequency(int pitch, | |
25 double centsOffset = 0., | |
26 double concertA = 440.) | |
27 { | |
28 double p = double(pitch) + (centsOffset / 100.); | |
29 return concertA * pow(2.0, (p - 69.0) / 12.0); | |
30 } | |
31 | |
32 static double frequencyForCentsAbove440(double cents) | |
33 { | |
34 return pitchToFrequency(69, cents, 440.); | |
35 } | |
36 | |
37 TuningDifference::TuningDifference(float inputSampleRate) : | |
38 Plugin(inputSampleRate), | |
39 m_bpo(60), | |
40 m_refChroma(new Chromagram(paramsForTuningFrequency(440.))), | |
41 m_blockSize(0), | |
42 m_frameCount(0) | |
43 { | |
44 } | |
45 | |
46 TuningDifference::~TuningDifference() | |
47 { | |
48 } | |
49 | |
50 string | |
51 TuningDifference::getIdentifier() const | |
52 { | |
53 return "tuning-difference"; | |
54 } | |
55 | |
56 string | |
57 TuningDifference::getName() const | |
58 { | |
59 return "Tuning Difference"; | |
60 } | |
61 | |
62 string | |
63 TuningDifference::getDescription() const | |
64 { | |
65 // Return something helpful here! | |
66 return ""; | |
67 } | |
68 | |
69 string | |
70 TuningDifference::getMaker() const | |
71 { | |
72 // Your name here | |
73 return ""; | |
74 } | |
75 | |
76 int | |
77 TuningDifference::getPluginVersion() const | |
78 { | |
79 // Increment this each time you release a version that behaves | |
80 // differently from the previous one | |
81 return 1; | |
82 } | |
83 | |
84 string | |
85 TuningDifference::getCopyright() const | |
86 { | |
87 // This function is not ideally named. It does not necessarily | |
88 // need to say who made the plugin -- getMaker does that -- but it | |
89 // should indicate the terms under which it is distributed. For | |
90 // example, "Copyright (year). All Rights Reserved", or "GPL" | |
91 return ""; | |
92 } | |
93 | |
94 TuningDifference::InputDomain | |
95 TuningDifference::getInputDomain() const | |
96 { | |
97 return TimeDomain; | |
98 } | |
99 | |
100 size_t | |
101 TuningDifference::getPreferredBlockSize() const | |
102 { | |
103 return 0; | |
104 } | |
105 | |
106 size_t | |
107 TuningDifference::getPreferredStepSize() const | |
108 { | |
109 return 0; | |
110 } | |
111 | |
112 size_t | |
113 TuningDifference::getMinChannelCount() const | |
114 { | |
115 return 2; | |
116 } | |
117 | |
118 size_t | |
119 TuningDifference::getMaxChannelCount() const | |
120 { | |
121 return 2; | |
122 } | |
123 | |
124 TuningDifference::ParameterList | |
125 TuningDifference::getParameterDescriptors() const | |
126 { | |
127 ParameterList list; | |
128 //!!! parameter: max search range | |
129 //!!! parameter: fine search precision | |
130 return list; | |
131 } | |
132 | |
133 float | |
134 TuningDifference::getParameter(string) const | |
135 { | |
136 return 0; | |
137 } | |
138 | |
139 void | |
140 TuningDifference::setParameter(string, float) | |
141 { | |
142 } | |
143 | |
144 TuningDifference::ProgramList | |
145 TuningDifference::getPrograms() const | |
146 { | |
147 ProgramList list; | |
148 return list; | |
149 } | |
150 | |
151 string | |
152 TuningDifference::getCurrentProgram() const | |
153 { | |
154 return ""; // no programs | |
155 } | |
156 | |
157 void | |
158 TuningDifference::selectProgram(string) | |
159 { | |
160 } | |
161 | |
162 TuningDifference::OutputList | |
163 TuningDifference::getOutputDescriptors() const | |
164 { | |
165 OutputList list; | |
166 | |
167 OutputDescriptor d; | |
168 d.identifier = "cents"; | |
169 d.name = "Tuning Difference"; | |
170 d.description = "Difference in averaged frequency profile between channels 1 and 2, in cents. A positive value means channel 2 is higher."; | |
171 d.unit = "cents"; | |
172 d.hasFixedBinCount = true; | |
173 d.binCount = 1; | |
174 d.hasKnownExtents = false; | |
175 d.isQuantized = false; | |
176 d.sampleType = OutputDescriptor::VariableSampleRate; | |
177 d.hasDuration = false; | |
178 m_outputs[d.identifier] = list.size(); | |
179 list.push_back(d); | |
180 | |
181 d.identifier = "tuningfreq"; | |
182 d.name = "Relative Tuning Frequency"; | |
183 d.description = "Tuning frequency of channel 2, if channel 1 is assumed to contain the same music as it at a tuning frequency of A=440Hz."; | |
184 d.unit = "hz"; | |
185 d.hasFixedBinCount = true; | |
186 d.binCount = 1; | |
187 d.hasKnownExtents = false; | |
188 d.isQuantized = false; | |
189 d.sampleType = OutputDescriptor::VariableSampleRate; | |
190 d.hasDuration = false; | |
191 m_outputs[d.identifier] = list.size(); | |
192 list.push_back(d); | |
193 | |
194 d.identifier = "reffeature"; | |
195 d.name = "Reference Feature"; | |
196 d.description = "Chroma feature from reference audio."; | |
197 d.unit = ""; | |
198 d.hasFixedBinCount = true; | |
199 d.binCount = m_bpo; | |
200 d.hasKnownExtents = false; | |
201 d.isQuantized = false; | |
202 d.sampleType = OutputDescriptor::FixedSampleRate; | |
203 d.sampleRate = 1; | |
204 d.hasDuration = false; | |
205 m_outputs[d.identifier] = list.size(); | |
206 list.push_back(d); | |
207 | |
208 d.identifier = "otherfeature"; | |
209 d.name = "Other Feature"; | |
210 d.description = "Chroma feature from other audio, before rotation."; | |
211 d.unit = ""; | |
212 d.hasFixedBinCount = true; | |
213 d.binCount = m_bpo; | |
214 d.hasKnownExtents = false; | |
215 d.isQuantized = false; | |
216 d.sampleType = OutputDescriptor::FixedSampleRate; | |
217 d.sampleRate = 1; | |
218 d.hasDuration = false; | |
219 m_outputs[d.identifier] = list.size(); | |
220 list.push_back(d); | |
221 | |
222 d.identifier = "rotfeature"; | |
223 d.name = "Other Feature at Rotated Frequency"; | |
224 d.description = "Chroma feature from reference audio calculated with the tuning frequency obtained from rotation matching."; | |
225 d.unit = ""; | |
226 d.hasFixedBinCount = true; | |
227 d.binCount = m_bpo; | |
228 d.hasKnownExtents = false; | |
229 d.isQuantized = false; | |
230 d.sampleType = OutputDescriptor::FixedSampleRate; | |
231 d.sampleRate = 1; | |
232 d.hasDuration = false; | |
233 m_outputs[d.identifier] = list.size(); | |
234 list.push_back(d); | |
235 | |
236 return list; | |
237 } | |
238 | |
239 bool | |
240 TuningDifference::initialise(size_t channels, size_t stepSize, size_t blockSize) | |
241 { | |
242 if (channels < getMinChannelCount() || | |
243 channels > getMaxChannelCount()) return false; | |
244 | |
245 if (stepSize != blockSize) return false; | |
246 | |
247 m_blockSize = blockSize; | |
248 | |
249 reset(); | |
250 | |
251 return true; | |
252 } | |
253 | |
254 void | |
255 TuningDifference::reset() | |
256 { | |
257 if (m_frameCount > 0) { | |
258 m_refChroma.reset(new Chromagram(paramsForTuningFrequency(440.))); | |
259 m_frameCount = 0; | |
260 } | |
261 m_refTotals = TFeature(m_bpo, 0.0); | |
262 m_other.clear(); | |
263 } | |
264 | |
265 template<typename T> | |
266 void addTo(vector<T> &a, const vector<T> &b) | |
267 { | |
268 transform(a.begin(), a.end(), b.begin(), a.begin(), plus<T>()); | |
269 } | |
270 | |
271 template<typename T> | |
272 T distance(const vector<T> &a, const vector<T> &b) | |
273 { | |
274 return inner_product(a.begin(), a.end(), b.begin(), T(), | |
275 plus<T>(), [](T x, T y) { return fabs(x - y); }); | |
276 } | |
277 | |
278 TuningDifference::TFeature | |
279 TuningDifference::computeFeatureFromTotals(const TFeature &totals) const | |
280 { | |
281 if (m_frameCount == 0) return totals; | |
282 | |
283 TFeature feature(m_bpo); | |
284 double sum = 0.0; | |
285 | |
286 for (int i = 0; i < m_bpo; ++i) { | |
287 double value = totals[i] / m_frameCount; | |
288 feature[i] += value; | |
289 sum += value; | |
290 } | |
291 | |
292 for (int i = 0; i < m_bpo; ++i) { | |
293 feature[i] /= sum; | |
294 } | |
295 | |
296 cerr << "computeFeatureFromTotals: feature values:" << endl; | |
297 for (auto v: feature) cerr << v << " "; | |
298 cerr << endl; | |
299 | |
300 return feature; | |
301 } | |
302 | |
303 Chromagram::Parameters | |
304 TuningDifference::paramsForTuningFrequency(double hz) const | |
305 { | |
306 Chromagram::Parameters params(m_inputSampleRate); | |
307 params.lowestOctave = 0; | |
308 params.octaveCount = 6; | |
309 params.binsPerOctave = m_bpo; | |
310 params.tuningFrequency = hz; | |
311 params.atomHopFactor = 0.5; | |
312 return params; | |
313 } | |
314 | |
315 TuningDifference::TFeature | |
316 TuningDifference::computeFeatureFromSignal(const Signal &signal, double hz) const | |
317 { | |
318 Chromagram chromagram(paramsForTuningFrequency(hz)); | |
319 | |
320 TFeature totals(m_bpo, 0.0); | |
321 | |
322 cerr << "computeFeatureFromSignal: hz = " << hz << ", frame count = " << m_frameCount << endl; | |
323 | |
324 for (int i = 0; i < m_frameCount; ++i) { | |
325 Signal::const_iterator first = signal.begin() + i * m_blockSize; | |
326 Signal::const_iterator last = first + m_blockSize; | |
327 if (last > signal.end()) last = signal.end(); | |
328 CQBase::RealSequence input(first, last); | |
329 input.resize(m_blockSize); | |
330 CQBase::RealBlock block = chromagram.process(input); | |
331 for (const auto &v: block) addTo(totals, v); | |
332 } | |
333 | |
334 return computeFeatureFromTotals(totals); | |
335 } | |
336 | |
337 TuningDifference::FeatureSet | |
338 TuningDifference::process(const float *const *inputBuffers, Vamp::RealTime) | |
339 { | |
340 CQBase::RealBlock block; | |
341 CQBase::RealSequence input; | |
342 | |
343 input = CQBase::RealSequence | |
344 (inputBuffers[0], inputBuffers[0] + m_blockSize); | |
345 block = m_refChroma->process(input); | |
346 for (const auto &v: block) addTo(m_refTotals, v); | |
347 | |
348 m_other.insert(m_other.end(), | |
349 inputBuffers[1], inputBuffers[1] + m_blockSize); | |
350 | |
351 ++m_frameCount; | |
352 return FeatureSet(); | |
353 } | |
354 | |
355 double | |
356 TuningDifference::featureDistance(const TFeature &other, int rotation) const | |
357 { | |
358 if (rotation == 0) { | |
359 return distance(m_refFeature, other); | |
360 } else { | |
361 // A positive rotation pushes the tuning frequency up for this | |
362 // chroma, negative one pulls it down. If a positive rotation | |
363 // makes this chroma match an un-rotated reference, then this | |
364 // chroma must have initially been lower than the reference. | |
365 TFeature r(other); | |
366 if (rotation < 0) { | |
367 rotate(r.begin(), r.begin() - rotation, r.end()); | |
368 } else { | |
369 rotate(r.begin(), r.end() - rotation, r.end()); | |
370 } | |
371 return distance(m_refFeature, r); | |
372 } | |
373 } | |
374 | |
375 int | |
376 TuningDifference::findBestRotation(const TFeature &other) const | |
377 { | |
378 map<double, int> dists; | |
379 | |
380 int maxSemis = 6; | |
381 int maxRotation = (m_bpo * maxSemis) / 12; | |
382 | |
383 for (int r = -maxRotation; r <= maxRotation; ++r) { | |
384 double dist = featureDistance(other, r); | |
385 dists[dist] = r; | |
386 cerr << "rotation " << r << ": score " << dist << endl; | |
387 } | |
388 | |
389 int best = dists.begin()->second; | |
390 | |
391 cerr << "best is " << best << endl; | |
392 return best; | |
393 } | |
394 | |
395 pair<int, double> | |
396 TuningDifference::findFineFrequency(int coarseCents, double coarseScore) | |
397 { | |
398 int coarseResolution = 1200 / m_bpo; | |
399 int searchDistance = coarseResolution/2 - 1; | |
400 | |
401 double bestScore = coarseScore; | |
402 int bestCents = coarseCents; | |
403 double bestHz = frequencyForCentsAbove440(coarseCents); | |
404 | |
405 cerr << "corresponding coarse Hz " << bestHz << " scores " << coarseScore << endl; | |
406 cerr << "searchDistance = " << searchDistance << endl; | |
407 | |
408 for (int sign = -1; sign <= 1; sign += 2) { | |
409 for (int offset = 1; offset <= searchDistance; ++offset) { | |
410 | |
411 int fineCents = coarseCents + sign * offset; | |
412 | |
413 cerr << "trying with fineCents = " << fineCents << "..." << endl; | |
414 | |
415 double fineHz = frequencyForCentsAbove440(fineCents); | |
416 TFeature fineFeature = computeFeatureFromSignal(m_other, fineHz); | |
417 double fineScore = featureDistance(fineFeature); | |
418 | |
419 cerr << "fine offset = " << offset << ", cents = " << fineCents | |
420 << ", Hz = " << fineHz << ", score " << fineScore | |
421 << " (best score so far " << bestScore << ")" << endl; | |
422 | |
423 if (fineScore < bestScore) { | |
424 cerr << "is good!" << endl; | |
425 bestScore = fineScore; | |
426 bestCents = fineCents; | |
427 bestHz = fineHz; | |
428 } else { | |
429 break; | |
430 } | |
431 } | |
432 } | |
433 | |
434 //!!! could keep a vector of scores & then interpolate... | |
435 | |
436 return pair<int, double>(bestCents, bestHz); | |
437 } | |
438 | |
439 TuningDifference::FeatureSet | |
440 TuningDifference::getRemainingFeatures() | |
441 { | |
442 FeatureSet fs; | |
443 if (m_frameCount == 0) return fs; | |
444 | |
445 m_refFeature = computeFeatureFromTotals(m_refTotals); | |
446 TFeature otherFeature = computeFeatureFromSignal(m_other, 440.); | |
447 | |
448 Feature f; | |
449 | |
450 f.values.clear(); | |
451 for (auto v: m_refFeature) f.values.push_back(v); | |
452 fs[m_outputs["reffeature"]].push_back(f); | |
453 | |
454 f.values.clear(); | |
455 for (auto v: otherFeature) f.values.push_back(v); | |
456 fs[m_outputs["otherfeature"]].push_back(f); | |
457 | |
458 int rotation = findBestRotation(otherFeature); | |
459 | |
460 int coarseCents = -(rotation * 1200) / m_bpo; | |
461 | |
462 cerr << "rotation " << rotation << " -> cents " << coarseCents << endl; | |
463 | |
464 double coarseHz = frequencyForCentsAbove440(coarseCents); | |
465 | |
466 TFeature coarseFeature = computeFeatureFromSignal(m_other, coarseHz); | |
467 double coarseScore = featureDistance(coarseFeature); | |
468 | |
469 cerr << "corresponding Hz " << coarseHz << " scores " << coarseScore << endl; | |
470 | |
471 //!!! This should be returning the fine chroma, not the coarse | |
472 f.values.clear(); | |
473 for (auto v: coarseFeature) f.values.push_back(v); | |
474 fs[m_outputs["rotfeature"]].push_back(f); | |
475 | |
476 pair<int, double> fine = findFineFrequency(coarseCents, coarseScore); | |
477 int fineCents = fine.first; | |
478 double fineHz = fine.second; | |
479 | |
480 f.values.clear(); | |
481 f.values.push_back(fineHz); | |
482 fs[m_outputs["tuningfreq"]].push_back(f); | |
483 | |
484 f.values.clear(); | |
485 f.values.push_back(fineCents); | |
486 fs[m_outputs["cents"]].push_back(f); | |
487 | |
488 cerr << "overall best Hz = " << fineHz << endl; | |
489 | |
490 return fs; | |
491 } | |
492 |