comparison src/Silvet.cpp @ 314:f98ba4f47e49 livemode

Merge from default branch
author Chris Cannam
date Tue, 28 Apr 2015 11:24:23 +0100
parents 5a181a427ac8 fa2ffbb786df
children f3e10617a60d
comparison
equal deleted inserted replaced
308:26be240475b5 314:f98ba4f47e49
22 #include "constant-q-cpp/src/dsp/Resampler.h" 22 #include "constant-q-cpp/src/dsp/Resampler.h"
23 #include "flattendynamics-ladspa.h" 23 #include "flattendynamics-ladspa.h"
24 #include "LiveInstruments.h" 24 #include "LiveInstruments.h"
25 25
26 #include <vector> 26 #include <vector>
27 #include <future>
27 28
28 #include <cstdio> 29 #include <cstdio>
29 30
30 using std::vector; 31 using std::vector;
31 using std::cout; 32 using std::cout;
32 using std::cerr; 33 using std::cerr;
33 using std::endl; 34 using std::endl;
35 using std::pair;
36 using std::future;
37 using std::async;
34 using Vamp::RealTime; 38 using Vamp::RealTime;
35 39
36 static int processingSampleRate = 44100; 40 static int processingSampleRate = 44100;
37 41
38 static int binsPerSemitoneLive = 1; 42 static int binsPerSemitoneLive = 1;
49 m_flattener(0), 53 m_flattener(0),
50 m_cq(0), 54 m_cq(0),
51 m_mode(HighQualityMode), 55 m_mode(HighQualityMode),
52 m_fineTuning(false), 56 m_fineTuning(false),
53 m_instrument(0), 57 m_instrument(0),
54 m_colsPerSec(50) 58 m_colsPerSec(50),
59 m_haveStartTime(false)
55 { 60 {
56 } 61 }
57 62
58 Silvet::~Silvet() 63 Silvet::~Silvet()
59 { 64 {
293 d.sampleRate = m_colsPerSec; 298 d.sampleRate = m_colsPerSec;
294 d.hasDuration = false; 299 d.hasDuration = false;
295 m_pitchOutputNo = list.size(); 300 m_pitchOutputNo = list.size();
296 list.push_back(d); 301 list.push_back(d);
297 302
303 d.identifier = "chroma";
304 d.name = "Pitch chroma distribution";
305 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
306 d.unit = "";
307 d.hasFixedBinCount = true;
308 d.binCount = 12;
309 d.binNames.clear();
310 if (m_cq) {
311 for (int i = 0; i < 12; ++i) {
312 d.binNames.push_back(chromaName(i));
313 }
314 }
315 d.hasKnownExtents = false;
316 d.isQuantized = false;
317 d.sampleType = OutputDescriptor::FixedSampleRate;
318 d.sampleRate = m_colsPerSec;
319 d.hasDuration = false;
320 m_chromaOutputNo = list.size();
321 list.push_back(d);
322
298 d.identifier = "templates"; 323 d.identifier = "templates";
299 d.name = "Templates"; 324 d.name = "Templates";
300 d.description = "Constant-Q spectral templates for the selected instrument pack."; 325 d.description = "Constant-Q spectral templates for the selected instrument pack.";
301 d.unit = ""; 326 d.unit = "";
302 d.hasFixedBinCount = true; 327 d.hasFixedBinCount = true;
326 351
327 return list; 352 return list;
328 } 353 }
329 354
330 std::string 355 std::string
331 Silvet::noteName(int note, int shift, int shiftCount) const 356 Silvet::chromaName(int pitch) const
332 { 357 {
333 static const char *names[] = { 358 static const char *names[] = {
334 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" 359 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
335 }; 360 };
336 361
337 const char *n = names[note % 12]; 362 return names[pitch];
363 }
364
365 std::string
366 Silvet::noteName(int note, int shift, int shiftCount) const
367 {
368 string n = chromaName(note % 12);
338 369
339 int oct = (note + 9) / 12; 370 int oct = (note + 9) / 12;
340 371
341 char buf[30]; 372 char buf[30];
342 373
346 pshift = 377 pshift =
347 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; 378 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
348 } 379 }
349 380
350 if (pshift > 0.f) { 381 if (pshift > 0.f) {
351 sprintf(buf, "%s%d+%dc", n, oct, int(round(pshift * 100))); 382 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
352 } else if (pshift < 0.f) { 383 } else if (pshift < 0.f) {
353 sprintf(buf, "%s%d-%dc", n, oct, int(round((-pshift) * 100))); 384 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
354 } else { 385 } else {
355 sprintf(buf, "%s%d", n, oct); 386 sprintf(buf, "%s%d", n.c_str(), oct);
356 } 387 }
357 388
358 return buf; 389 return buf;
359 } 390 }
360 391
490 m_pianoRoll.clear(); 521 m_pianoRoll.clear();
491 m_inputGains.clear(); 522 m_inputGains.clear();
492 m_columnCount = 0; 523 m_columnCount = 0;
493 m_resampledCount = 0; 524 m_resampledCount = 0;
494 m_startTime = RealTime::zeroTime; 525 m_startTime = RealTime::zeroTime;
526 m_haveStartTime = false;
495 } 527 }
496 528
497 Silvet::FeatureSet 529 Silvet::FeatureSet
498 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp) 530 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
499 { 531 {
500 FeatureSet fs; 532 FeatureSet fs;
501 533
502 if (m_columnCount == 0) { 534 if (!m_haveStartTime) {
535
503 m_startTime = timestamp; 536 m_startTime = timestamp;
537 m_haveStartTime = true;
538
504 insertTemplateFeatures(fs); 539 insertTemplateFeatures(fs);
505 } 540 }
506 541
507 vector<float> flattened(m_blockSize); 542 vector<float> flattened(m_blockSize);
508 float gain = 1.f; 543 float gain = 1.f;
595 fs[m_fcqOutputNo].push_back(f); 630 fs[m_fcqOutputNo].push_back(f);
596 } 631 }
597 632
598 int width = filtered.size(); 633 int width = filtered.size();
599 634
600 int iterations = (m_mode == HighQualityMode ? 20 : 10); 635 Grid localPitches(width);
601
602 Grid localPitches(width, vector<double>(pack.templateNoteCount, 0.0));
603 636
604 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning; 637 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
605 int shiftCount = 1; 638 int shiftCount = 1;
606 if (wantShifts) { 639 if (wantShifts) {
607 shiftCount = pack.templateMaxShift * 2 + 1; 640 shiftCount = pack.templateMaxShift * 2 + 1;
608 } 641 }
609 642
610 vector<vector<int> > localBestShifts; 643 vector<vector<int> > localBestShifts;
611 if (wantShifts) { 644 if (wantShifts) {
612 localBestShifts = 645 localBestShifts = vector<vector<int> >(width);
613 vector<vector<int> >(width, vector<int>(pack.templateNoteCount, 0)); 646 }
614 } 647
615 648 #ifndef MAX_EM_THREADS
616 double columnThreshold = 1e-5; 649 #define MAX_EM_THREADS 8
617 650 #endif
618 if (m_mode == LiveMode) { 651
619 columnThreshold /= 20; 652 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
620 } 653 for (int i = 0; i < width; ) {
621 654 typedef future<pair<vector<double>, vector<int>>> EMFuture;
622 #pragma omp parallel for 655 vector<EMFuture> results;
656 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) {
657 results.push_back
658 (async(std::launch::async,
659 [&](int index) {
660 return applyEM(pack, filtered.at(index), wantShifts);
661 }, i + j));
662 }
663 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) {
664 auto out = results[j].get();
665 localPitches[i+j] = out.first;
666 if (wantShifts) localBestShifts[i+j] = out.second;
667 }
668 i += MAX_EM_THREADS;
669 }
670 #else
623 for (int i = 0; i < width; ++i) { 671 for (int i = 0; i < width; ++i) {
624 672 auto out = applyEM(pack, filtered.at(i), wantShifts);
625 double sum = 0.0; 673 localPitches[i] = out.first;
626 for (int j = 0; j < pack.templateHeight; ++j) { 674 if (wantShifts) localBestShifts[i] = out.second;
627 sum += filtered.at(i).at(j); 675 }
628 } 676 #endif
629 if (sum < columnThreshold) continue;
630
631 EM em(&pack, m_mode == HighQualityMode);
632
633 em.setPitchSparsity(pack.pitchSparsity);
634 em.setSourceSparsity(pack.sourceSparsity);
635
636 for (int j = 0; j < iterations; ++j) {
637 em.iterate(filtered.at(i).data());
638 }
639
640 const float *pitchDist = em.getPitchDistribution();
641 const float *const *shiftDist = em.getShifts();
642
643 for (int j = 0; j < pack.templateNoteCount; ++j) {
644
645 localPitches[i][j] = pitchDist[j] * sum;
646
647 int bestShift = 0;
648 float bestShiftValue = 0.0;
649 if (wantShifts) {
650 for (int k = 0; k < shiftCount; ++k) {
651 float value = shiftDist[k][j];
652 if (k == 0 || value > bestShiftValue) {
653 bestShiftValue = value;
654 bestShift = k;
655 }
656 }
657 localBestShifts[i][j] = bestShift;
658 }
659 }
660 }
661 677
662 for (int i = 0; i < width; ++i) { 678 for (int i = 0; i < width; ++i) {
663 679
680 // This returns a filtered column, and pushes the
681 // up-to-max-polyphony activation column to m_pianoRoll
664 vector<double> filtered = postProcess 682 vector<double> filtered = postProcess
665 (localPitches[i], localBestShifts[i], wantShifts); 683 (localPitches[i], localBestShifts[i], wantShifts);
666 684
685 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
686 float inputGain = getInputGainAt(timestamp);
687
667 Feature f; 688 Feature f;
668 for (int j = 0; j < (int)filtered.size(); ++j) { 689 for (int j = 0; j < (int)filtered.size(); ++j) {
669 float v(filtered[j]); 690 float v = filtered[j];
670 if (v < pack.levelThreshold) v = 0.f; 691 if (v < pack.levelThreshold) v = 0.f;
671 f.values.push_back(v); 692 f.values.push_back(v / inputGain);
672 } 693 }
673 fs[m_pitchOutputNo].push_back(f); 694 fs[m_pitchOutputNo].push_back(f);
695
696 f.values.clear();
697 f.values.resize(12);
698 for (int j = 0; j < (int)filtered.size(); ++j) {
699 f.values[j % 12] += filtered[j] / inputGain;
700 }
701 fs[m_chromaOutputNo].push_back(f);
674 702
675 FeatureList noteFeatures = noteTrack(shiftCount); 703 FeatureList noteFeatures = noteTrack(shiftCount);
676 704
677 for (FeatureList::const_iterator fi = noteFeatures.begin(); 705 for (FeatureList::const_iterator fi = noteFeatures.begin();
678 fi != noteFeatures.end(); ++fi) { 706 fi != noteFeatures.end(); ++fi) {
679 fs[m_notesOutputNo].push_back(*fi); 707 fs[m_notesOutputNo].push_back(*fi);
680 } 708 }
681 } 709 }
710 }
711
712 pair<vector<double>, vector<int> >
713 Silvet::applyEM(const InstrumentPack &pack,
714 const vector<double> &column,
715 bool wantShifts)
716 {
717 double columnThreshold = 1e-5;
718
719 if (m_mode == LiveMode) {
720 columnThreshold /= 20;
721 }
722
723 vector<double> pitches(pack.templateNoteCount, 0.0);
724 vector<int> bestShifts;
725
726 double sum = 0.0;
727 for (int j = 0; j < pack.templateHeight; ++j) {
728 sum += column.at(j);
729 }
730 if (sum < columnThreshold) return { pitches, bestShifts };
731
732 EM em(&pack, m_mode == HighQualityMode);
733
734 em.setPitchSparsity(pack.pitchSparsity);
735 em.setSourceSparsity(pack.sourceSparsity);
736
737 int iterations = (m_mode == HighQualityMode ? 20 : 10);
738
739 for (int j = 0; j < iterations; ++j) {
740 em.iterate(column.data());
741 }
742
743 const float *pitchDist = em.getPitchDistribution();
744 const float *const *shiftDist = em.getShifts();
745
746 int shiftCount = 1;
747 if (wantShifts) {
748 shiftCount = pack.templateMaxShift * 2 + 1;
749 }
750
751 for (int j = 0; j < pack.templateNoteCount; ++j) {
752
753 pitches[j] = pitchDist[j] * sum;
754
755 int bestShift = 0;
756 float bestShiftValue = 0.0;
757 if (wantShifts) {
758 for (int k = 0; k < shiftCount; ++k) {
759 float value = shiftDist[k][j];
760 if (k == 0 || value > bestShiftValue) {
761 bestShiftValue = value;
762 bestShift = k;
763 }
764 }
765 bestShifts.push_back(bestShift);
766 }
767 }
768
769 return { pitches, bestShifts };
682 } 770 }
683 771
684 Silvet::Grid 772 Silvet::Grid
685 Silvet::preProcess(const Grid &in) 773 Silvet::preProcess(const Grid &in)
686 { 774 {
941 shiftCount, 1029 shiftCount,
942 partVelocity)); 1030 partVelocity));
943 } 1031 }
944 } 1032 }
945 1033
1034 RealTime
1035 Silvet::getColumnTimestamp(int column)
1036 {
1037 double columnDuration = 1.0 / m_colsPerSec;
1038 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
1039
1040 return m_startTime + RealTime::fromSeconds
1041 (columnDuration * (column - postFilterLatency) + 0.02);
1042 }
1043
946 Silvet::Feature 1044 Silvet::Feature
947 Silvet::makeNoteFeature(int start, 1045 Silvet::makeNoteFeature(int start,
948 int end, 1046 int end,
949 int note, 1047 int note,
950 int shift, 1048 int shift,
951 int shiftCount, 1049 int shiftCount,
952 int velocity) 1050 int velocity)
953 { 1051 {
954 double columnDuration = 1.0 / m_colsPerSec;
955 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
956
957 Feature f; 1052 Feature f;
958 1053
959 f.hasTimestamp = true; 1054 f.hasTimestamp = true;
960 f.timestamp = m_startTime + RealTime::fromSeconds 1055 f.timestamp = getColumnTimestamp(start);
961 (columnDuration * (start - postFilterLatency) + 0.02);
962 1056
963 f.hasDuration = true; 1057 f.hasDuration = true;
964 f.duration = RealTime::fromSeconds 1058 f.duration = getColumnTimestamp(end) - f.timestamp;
965 (columnDuration * (end - start));
966 1059
967 f.values.clear(); 1060 f.values.clear();
968 1061
969 f.values.push_back 1062 f.values.push_back
970 (noteFrequency(note, shift, shiftCount)); 1063 (noteFrequency(note, shift, shiftCount));