Mercurial > hg > silvet
comparison src/Silvet.cpp @ 314:f98ba4f47e49 livemode
Merge from default branch
author | Chris Cannam |
---|---|
date | Tue, 28 Apr 2015 11:24:23 +0100 |
parents | 5a181a427ac8 fa2ffbb786df |
children | f3e10617a60d |
comparison
equal
deleted
inserted
replaced
308:26be240475b5 | 314:f98ba4f47e49 |
---|---|
22 #include "constant-q-cpp/src/dsp/Resampler.h" | 22 #include "constant-q-cpp/src/dsp/Resampler.h" |
23 #include "flattendynamics-ladspa.h" | 23 #include "flattendynamics-ladspa.h" |
24 #include "LiveInstruments.h" | 24 #include "LiveInstruments.h" |
25 | 25 |
26 #include <vector> | 26 #include <vector> |
27 #include <future> | |
27 | 28 |
28 #include <cstdio> | 29 #include <cstdio> |
29 | 30 |
30 using std::vector; | 31 using std::vector; |
31 using std::cout; | 32 using std::cout; |
32 using std::cerr; | 33 using std::cerr; |
33 using std::endl; | 34 using std::endl; |
35 using std::pair; | |
36 using std::future; | |
37 using std::async; | |
34 using Vamp::RealTime; | 38 using Vamp::RealTime; |
35 | 39 |
36 static int processingSampleRate = 44100; | 40 static int processingSampleRate = 44100; |
37 | 41 |
38 static int binsPerSemitoneLive = 1; | 42 static int binsPerSemitoneLive = 1; |
49 m_flattener(0), | 53 m_flattener(0), |
50 m_cq(0), | 54 m_cq(0), |
51 m_mode(HighQualityMode), | 55 m_mode(HighQualityMode), |
52 m_fineTuning(false), | 56 m_fineTuning(false), |
53 m_instrument(0), | 57 m_instrument(0), |
54 m_colsPerSec(50) | 58 m_colsPerSec(50), |
59 m_haveStartTime(false) | |
55 { | 60 { |
56 } | 61 } |
57 | 62 |
58 Silvet::~Silvet() | 63 Silvet::~Silvet() |
59 { | 64 { |
293 d.sampleRate = m_colsPerSec; | 298 d.sampleRate = m_colsPerSec; |
294 d.hasDuration = false; | 299 d.hasDuration = false; |
295 m_pitchOutputNo = list.size(); | 300 m_pitchOutputNo = list.size(); |
296 list.push_back(d); | 301 list.push_back(d); |
297 | 302 |
303 d.identifier = "chroma"; | |
304 d.name = "Pitch chroma distribution"; | |
305 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins."; | |
306 d.unit = ""; | |
307 d.hasFixedBinCount = true; | |
308 d.binCount = 12; | |
309 d.binNames.clear(); | |
310 if (m_cq) { | |
311 for (int i = 0; i < 12; ++i) { | |
312 d.binNames.push_back(chromaName(i)); | |
313 } | |
314 } | |
315 d.hasKnownExtents = false; | |
316 d.isQuantized = false; | |
317 d.sampleType = OutputDescriptor::FixedSampleRate; | |
318 d.sampleRate = m_colsPerSec; | |
319 d.hasDuration = false; | |
320 m_chromaOutputNo = list.size(); | |
321 list.push_back(d); | |
322 | |
298 d.identifier = "templates"; | 323 d.identifier = "templates"; |
299 d.name = "Templates"; | 324 d.name = "Templates"; |
300 d.description = "Constant-Q spectral templates for the selected instrument pack."; | 325 d.description = "Constant-Q spectral templates for the selected instrument pack."; |
301 d.unit = ""; | 326 d.unit = ""; |
302 d.hasFixedBinCount = true; | 327 d.hasFixedBinCount = true; |
326 | 351 |
327 return list; | 352 return list; |
328 } | 353 } |
329 | 354 |
330 std::string | 355 std::string |
331 Silvet::noteName(int note, int shift, int shiftCount) const | 356 Silvet::chromaName(int pitch) const |
332 { | 357 { |
333 static const char *names[] = { | 358 static const char *names[] = { |
334 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" | 359 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" |
335 }; | 360 }; |
336 | 361 |
337 const char *n = names[note % 12]; | 362 return names[pitch]; |
363 } | |
364 | |
365 std::string | |
366 Silvet::noteName(int note, int shift, int shiftCount) const | |
367 { | |
368 string n = chromaName(note % 12); | |
338 | 369 |
339 int oct = (note + 9) / 12; | 370 int oct = (note + 9) / 12; |
340 | 371 |
341 char buf[30]; | 372 char buf[30]; |
342 | 373 |
346 pshift = | 377 pshift = |
347 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; | 378 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; |
348 } | 379 } |
349 | 380 |
350 if (pshift > 0.f) { | 381 if (pshift > 0.f) { |
351 sprintf(buf, "%s%d+%dc", n, oct, int(round(pshift * 100))); | 382 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100))); |
352 } else if (pshift < 0.f) { | 383 } else if (pshift < 0.f) { |
353 sprintf(buf, "%s%d-%dc", n, oct, int(round((-pshift) * 100))); | 384 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100))); |
354 } else { | 385 } else { |
355 sprintf(buf, "%s%d", n, oct); | 386 sprintf(buf, "%s%d", n.c_str(), oct); |
356 } | 387 } |
357 | 388 |
358 return buf; | 389 return buf; |
359 } | 390 } |
360 | 391 |
490 m_pianoRoll.clear(); | 521 m_pianoRoll.clear(); |
491 m_inputGains.clear(); | 522 m_inputGains.clear(); |
492 m_columnCount = 0; | 523 m_columnCount = 0; |
493 m_resampledCount = 0; | 524 m_resampledCount = 0; |
494 m_startTime = RealTime::zeroTime; | 525 m_startTime = RealTime::zeroTime; |
526 m_haveStartTime = false; | |
495 } | 527 } |
496 | 528 |
497 Silvet::FeatureSet | 529 Silvet::FeatureSet |
498 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp) | 530 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp) |
499 { | 531 { |
500 FeatureSet fs; | 532 FeatureSet fs; |
501 | 533 |
502 if (m_columnCount == 0) { | 534 if (!m_haveStartTime) { |
535 | |
503 m_startTime = timestamp; | 536 m_startTime = timestamp; |
537 m_haveStartTime = true; | |
538 | |
504 insertTemplateFeatures(fs); | 539 insertTemplateFeatures(fs); |
505 } | 540 } |
506 | 541 |
507 vector<float> flattened(m_blockSize); | 542 vector<float> flattened(m_blockSize); |
508 float gain = 1.f; | 543 float gain = 1.f; |
595 fs[m_fcqOutputNo].push_back(f); | 630 fs[m_fcqOutputNo].push_back(f); |
596 } | 631 } |
597 | 632 |
598 int width = filtered.size(); | 633 int width = filtered.size(); |
599 | 634 |
600 int iterations = (m_mode == HighQualityMode ? 20 : 10); | 635 Grid localPitches(width); |
601 | |
602 Grid localPitches(width, vector<double>(pack.templateNoteCount, 0.0)); | |
603 | 636 |
604 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning; | 637 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning; |
605 int shiftCount = 1; | 638 int shiftCount = 1; |
606 if (wantShifts) { | 639 if (wantShifts) { |
607 shiftCount = pack.templateMaxShift * 2 + 1; | 640 shiftCount = pack.templateMaxShift * 2 + 1; |
608 } | 641 } |
609 | 642 |
610 vector<vector<int> > localBestShifts; | 643 vector<vector<int> > localBestShifts; |
611 if (wantShifts) { | 644 if (wantShifts) { |
612 localBestShifts = | 645 localBestShifts = vector<vector<int> >(width); |
613 vector<vector<int> >(width, vector<int>(pack.templateNoteCount, 0)); | 646 } |
614 } | 647 |
615 | 648 #ifndef MAX_EM_THREADS |
616 double columnThreshold = 1e-5; | 649 #define MAX_EM_THREADS 8 |
617 | 650 #endif |
618 if (m_mode == LiveMode) { | 651 |
619 columnThreshold /= 20; | 652 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1)) |
620 } | 653 for (int i = 0; i < width; ) { |
621 | 654 typedef future<pair<vector<double>, vector<int>>> EMFuture; |
622 #pragma omp parallel for | 655 vector<EMFuture> results; |
656 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) { | |
657 results.push_back | |
658 (async(std::launch::async, | |
659 [&](int index) { | |
660 return applyEM(pack, filtered.at(index), wantShifts); | |
661 }, i + j)); | |
662 } | |
663 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) { | |
664 auto out = results[j].get(); | |
665 localPitches[i+j] = out.first; | |
666 if (wantShifts) localBestShifts[i+j] = out.second; | |
667 } | |
668 i += MAX_EM_THREADS; | |
669 } | |
670 #else | |
623 for (int i = 0; i < width; ++i) { | 671 for (int i = 0; i < width; ++i) { |
624 | 672 auto out = applyEM(pack, filtered.at(i), wantShifts); |
625 double sum = 0.0; | 673 localPitches[i] = out.first; |
626 for (int j = 0; j < pack.templateHeight; ++j) { | 674 if (wantShifts) localBestShifts[i] = out.second; |
627 sum += filtered.at(i).at(j); | 675 } |
628 } | 676 #endif |
629 if (sum < columnThreshold) continue; | |
630 | |
631 EM em(&pack, m_mode == HighQualityMode); | |
632 | |
633 em.setPitchSparsity(pack.pitchSparsity); | |
634 em.setSourceSparsity(pack.sourceSparsity); | |
635 | |
636 for (int j = 0; j < iterations; ++j) { | |
637 em.iterate(filtered.at(i).data()); | |
638 } | |
639 | |
640 const float *pitchDist = em.getPitchDistribution(); | |
641 const float *const *shiftDist = em.getShifts(); | |
642 | |
643 for (int j = 0; j < pack.templateNoteCount; ++j) { | |
644 | |
645 localPitches[i][j] = pitchDist[j] * sum; | |
646 | |
647 int bestShift = 0; | |
648 float bestShiftValue = 0.0; | |
649 if (wantShifts) { | |
650 for (int k = 0; k < shiftCount; ++k) { | |
651 float value = shiftDist[k][j]; | |
652 if (k == 0 || value > bestShiftValue) { | |
653 bestShiftValue = value; | |
654 bestShift = k; | |
655 } | |
656 } | |
657 localBestShifts[i][j] = bestShift; | |
658 } | |
659 } | |
660 } | |
661 | 677 |
662 for (int i = 0; i < width; ++i) { | 678 for (int i = 0; i < width; ++i) { |
663 | 679 |
680 // This returns a filtered column, and pushes the | |
681 // up-to-max-polyphony activation column to m_pianoRoll | |
664 vector<double> filtered = postProcess | 682 vector<double> filtered = postProcess |
665 (localPitches[i], localBestShifts[i], wantShifts); | 683 (localPitches[i], localBestShifts[i], wantShifts); |
666 | 684 |
685 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1); | |
686 float inputGain = getInputGainAt(timestamp); | |
687 | |
667 Feature f; | 688 Feature f; |
668 for (int j = 0; j < (int)filtered.size(); ++j) { | 689 for (int j = 0; j < (int)filtered.size(); ++j) { |
669 float v(filtered[j]); | 690 float v = filtered[j]; |
670 if (v < pack.levelThreshold) v = 0.f; | 691 if (v < pack.levelThreshold) v = 0.f; |
671 f.values.push_back(v); | 692 f.values.push_back(v / inputGain); |
672 } | 693 } |
673 fs[m_pitchOutputNo].push_back(f); | 694 fs[m_pitchOutputNo].push_back(f); |
695 | |
696 f.values.clear(); | |
697 f.values.resize(12); | |
698 for (int j = 0; j < (int)filtered.size(); ++j) { | |
699 f.values[j % 12] += filtered[j] / inputGain; | |
700 } | |
701 fs[m_chromaOutputNo].push_back(f); | |
674 | 702 |
675 FeatureList noteFeatures = noteTrack(shiftCount); | 703 FeatureList noteFeatures = noteTrack(shiftCount); |
676 | 704 |
677 for (FeatureList::const_iterator fi = noteFeatures.begin(); | 705 for (FeatureList::const_iterator fi = noteFeatures.begin(); |
678 fi != noteFeatures.end(); ++fi) { | 706 fi != noteFeatures.end(); ++fi) { |
679 fs[m_notesOutputNo].push_back(*fi); | 707 fs[m_notesOutputNo].push_back(*fi); |
680 } | 708 } |
681 } | 709 } |
710 } | |
711 | |
712 pair<vector<double>, vector<int> > | |
713 Silvet::applyEM(const InstrumentPack &pack, | |
714 const vector<double> &column, | |
715 bool wantShifts) | |
716 { | |
717 double columnThreshold = 1e-5; | |
718 | |
719 if (m_mode == LiveMode) { | |
720 columnThreshold /= 20; | |
721 } | |
722 | |
723 vector<double> pitches(pack.templateNoteCount, 0.0); | |
724 vector<int> bestShifts; | |
725 | |
726 double sum = 0.0; | |
727 for (int j = 0; j < pack.templateHeight; ++j) { | |
728 sum += column.at(j); | |
729 } | |
730 if (sum < columnThreshold) return { pitches, bestShifts }; | |
731 | |
732 EM em(&pack, m_mode == HighQualityMode); | |
733 | |
734 em.setPitchSparsity(pack.pitchSparsity); | |
735 em.setSourceSparsity(pack.sourceSparsity); | |
736 | |
737 int iterations = (m_mode == HighQualityMode ? 20 : 10); | |
738 | |
739 for (int j = 0; j < iterations; ++j) { | |
740 em.iterate(column.data()); | |
741 } | |
742 | |
743 const float *pitchDist = em.getPitchDistribution(); | |
744 const float *const *shiftDist = em.getShifts(); | |
745 | |
746 int shiftCount = 1; | |
747 if (wantShifts) { | |
748 shiftCount = pack.templateMaxShift * 2 + 1; | |
749 } | |
750 | |
751 for (int j = 0; j < pack.templateNoteCount; ++j) { | |
752 | |
753 pitches[j] = pitchDist[j] * sum; | |
754 | |
755 int bestShift = 0; | |
756 float bestShiftValue = 0.0; | |
757 if (wantShifts) { | |
758 for (int k = 0; k < shiftCount; ++k) { | |
759 float value = shiftDist[k][j]; | |
760 if (k == 0 || value > bestShiftValue) { | |
761 bestShiftValue = value; | |
762 bestShift = k; | |
763 } | |
764 } | |
765 bestShifts.push_back(bestShift); | |
766 } | |
767 } | |
768 | |
769 return { pitches, bestShifts }; | |
682 } | 770 } |
683 | 771 |
684 Silvet::Grid | 772 Silvet::Grid |
685 Silvet::preProcess(const Grid &in) | 773 Silvet::preProcess(const Grid &in) |
686 { | 774 { |
941 shiftCount, | 1029 shiftCount, |
942 partVelocity)); | 1030 partVelocity)); |
943 } | 1031 } |
944 } | 1032 } |
945 | 1033 |
1034 RealTime | |
1035 Silvet::getColumnTimestamp(int column) | |
1036 { | |
1037 double columnDuration = 1.0 / m_colsPerSec; | |
1038 int postFilterLatency = int(m_postFilter[0]->getSize() / 2); | |
1039 | |
1040 return m_startTime + RealTime::fromSeconds | |
1041 (columnDuration * (column - postFilterLatency) + 0.02); | |
1042 } | |
1043 | |
946 Silvet::Feature | 1044 Silvet::Feature |
947 Silvet::makeNoteFeature(int start, | 1045 Silvet::makeNoteFeature(int start, |
948 int end, | 1046 int end, |
949 int note, | 1047 int note, |
950 int shift, | 1048 int shift, |
951 int shiftCount, | 1049 int shiftCount, |
952 int velocity) | 1050 int velocity) |
953 { | 1051 { |
954 double columnDuration = 1.0 / m_colsPerSec; | |
955 int postFilterLatency = int(m_postFilter[0]->getSize() / 2); | |
956 | |
957 Feature f; | 1052 Feature f; |
958 | 1053 |
959 f.hasTimestamp = true; | 1054 f.hasTimestamp = true; |
960 f.timestamp = m_startTime + RealTime::fromSeconds | 1055 f.timestamp = getColumnTimestamp(start); |
961 (columnDuration * (start - postFilterLatency) + 0.02); | |
962 | 1056 |
963 f.hasDuration = true; | 1057 f.hasDuration = true; |
964 f.duration = RealTime::fromSeconds | 1058 f.duration = getColumnTimestamp(end) - f.timestamp; |
965 (columnDuration * (end - start)); | |
966 | 1059 |
967 f.values.clear(); | 1060 f.values.clear(); |
968 | 1061 |
969 f.values.push_back | 1062 f.values.push_back |
970 (noteFrequency(note, shift, shiftCount)); | 1063 (noteFrequency(note, shift, shiftCount)); |