comparison src/Silvet.cpp @ 331:e8e37f471650 livemode

Merge
author Chris Cannam
date Wed, 29 Apr 2015 18:58:00 +0100
parents 8f5cfd7dbaa5 6f8fa7fc8fdc
children 19c17cd0c7d8
comparison
equal deleted inserted replaced
330:8f5cfd7dbaa5 331:e8e37f471650
249 d.isQuantized = false; 249 d.isQuantized = false;
250 d.sampleType = OutputDescriptor::VariableSampleRate; 250 d.sampleType = OutputDescriptor::VariableSampleRate;
251 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62); 251 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
252 d.hasDuration = true; 252 d.hasDuration = true;
253 m_notesOutputNo = list.size(); 253 m_notesOutputNo = list.size();
254 list.push_back(d);
255
256 d.identifier = "onsets";
257 d.name = "Note onsets";
258 d.description = "Note onsets, without durations. These can be calculated sooner than complete notes, because it isn't necessary to wait for a note to finish before returning its feature. Each event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
259 d.unit = "Hz";
260 d.hasFixedBinCount = true;
261 d.binCount = 2;
262 d.binNames.push_back("Frequency");
263 d.binNames.push_back("Velocity");
264 d.hasKnownExtents = false;
265 d.isQuantized = false;
266 d.sampleType = OutputDescriptor::VariableSampleRate;
267 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
268 d.hasDuration = false;
269 m_onsetsOutputNo = list.size();
254 list.push_back(d); 270 list.push_back(d);
255 271
256 d.identifier = "timefreq"; 272 d.identifier = "timefreq";
257 d.name = "Time-frequency distribution"; 273 d.name = "Time-frequency distribution";
258 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm."; 274 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
289 d.hasFixedBinCount = true; 305 d.hasFixedBinCount = true;
290 d.binCount = getPack(0).templateNoteCount; 306 d.binCount = getPack(0).templateNoteCount;
291 d.binNames.clear(); 307 d.binNames.clear();
292 if (m_cq) { 308 if (m_cq) {
293 for (int i = 0; i < getPack(0).templateNoteCount; ++i) { 309 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
294 d.binNames.push_back(noteName(i, 0, 1)); 310 d.binNames.push_back(getNoteName(i, 0, 1));
295 } 311 }
296 } 312 }
297 d.hasKnownExtents = false; 313 d.hasKnownExtents = false;
298 d.isQuantized = false; 314 d.isQuantized = false;
299 d.sampleType = OutputDescriptor::FixedSampleRate; 315 d.sampleType = OutputDescriptor::FixedSampleRate;
309 d.hasFixedBinCount = true; 325 d.hasFixedBinCount = true;
310 d.binCount = 12; 326 d.binCount = 12;
311 d.binNames.clear(); 327 d.binNames.clear();
312 if (m_cq) { 328 if (m_cq) {
313 for (int i = 0; i < 12; ++i) { 329 for (int i = 0; i < 12; ++i) {
314 d.binNames.push_back(chromaName(i)); 330 d.binNames.push_back(getChromaName(i));
315 } 331 }
316 } 332 }
317 d.hasKnownExtents = false; 333 d.hasKnownExtents = false;
318 d.isQuantized = false; 334 d.isQuantized = false;
319 d.sampleType = OutputDescriptor::FixedSampleRate; 335 d.sampleType = OutputDescriptor::FixedSampleRate;
353 369
354 return list; 370 return list;
355 } 371 }
356 372
357 std::string 373 std::string
358 Silvet::chromaName(int pitch) const 374 Silvet::getChromaName(int pitch) const
359 { 375 {
360 static const char *names[] = { 376 static const char *names[] = {
361 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" 377 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
362 }; 378 };
363 379
364 return names[pitch]; 380 return names[pitch];
365 } 381 }
366 382
367 std::string 383 std::string
368 Silvet::noteName(int note, int shift, int shiftCount) const 384 Silvet::getNoteName(int note, int shift, int shiftCount) const
369 { 385 {
370 string n = chromaName(note % 12); 386 string n = getChromaName(note % 12);
371 387
372 int oct = (note + 9) / 12; 388 int oct = (note + 9) / 12;
373 389
374 char buf[30]; 390 char buf[30];
375 391
376 float pshift = 0.f; 392 float pshift = 0.f;
377 if (shiftCount > 1) { 393 if (shiftCount > 1) {
378 // see noteFrequency below 394 // see getNoteFrequency below
379 pshift = 395 pshift =
380 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; 396 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
381 } 397 }
382 398
383 if (pshift > 0.f) { 399 if (pshift > 0.f) {
390 406
391 return buf; 407 return buf;
392 } 408 }
393 409
394 float 410 float
395 Silvet::noteFrequency(int note, int shift, int shiftCount) const 411 Silvet::getNoteFrequency(int note, int shift, int shiftCount) const
396 { 412 {
397 // Convert shift number to a pitch shift. The given shift number 413 // Convert shift number to a pitch shift. The given shift number
398 // is an offset into the template array, which starts with some 414 // is an offset into the template array, which starts with some
399 // zeros, followed by the template, then some trailing zeros. 415 // zeros, followed by the template, then some trailing zeros.
400 // 416 //
692 } 708 }
693 } 709 }
694 710
695 for (int i = 0; i < width; ++i) { 711 for (int i = 0; i < width; ++i) {
696 712
697 // This returns a filtered column, and pushes the 713 vector<double> filtered;
698 // up-to-max-polyphony activation column to m_pianoRoll 714
699 vector<double> filtered = postProcess 715 for (int j = 0; j < pack.templateNoteCount; ++j) {
700 (localPitches[i], localBestShifts[i], wantShifts); 716 m_postFilter[j]->push(localPitches[i][j]);
717 filtered.push_back(m_postFilter[j]->get());
718 }
701 719
702 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1); 720 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
703 float inputGain = getInputGainAt(timestamp); 721 float inputGain = getInputGainAt(timestamp);
704 722
705 Feature f; 723 Feature f;
714 f.values.resize(12); 732 f.values.resize(12);
715 for (int j = 0; j < (int)filtered.size(); ++j) { 733 for (int j = 0; j < (int)filtered.size(); ++j) {
716 f.values[j % 12] += filtered[j] / inputGain; 734 f.values[j % 12] += filtered[j] / inputGain;
717 } 735 }
718 fs[m_chromaOutputNo].push_back(f); 736 fs[m_chromaOutputNo].push_back(f);
719 737
720 FeatureList noteFeatures = noteTrack(shiftCount); 738 // This pushes the up-to-max-polyphony activation column to
721 739 // m_pianoRoll
740 postProcess(filtered, localBestShifts[i], wantShifts);
741
742 auto events = noteTrack(shiftCount);
743
744 FeatureList noteFeatures = events.first;
722 for (FeatureList::const_iterator fi = noteFeatures.begin(); 745 for (FeatureList::const_iterator fi = noteFeatures.begin();
723 fi != noteFeatures.end(); ++fi) { 746 fi != noteFeatures.end(); ++fi) {
724 fs[m_notesOutputNo].push_back(*fi); 747 fs[m_notesOutputNo].push_back(*fi);
748 }
749
750 FeatureList onsetFeatures = events.second;
751 for (FeatureList::const_iterator fi = onsetFeatures.begin();
752 fi != onsetFeatures.end(); ++fi) {
753 fs[m_onsetsOutputNo].push_back(*fi);
725 } 754 }
726 } 755 }
727 } 756 }
728 757
729 pair<vector<double>, vector<int> > 758 pair<vector<double>, vector<int> >
871 } 900 }
872 901
873 return out; 902 return out;
874 } 903 }
875 904
876 vector<double> 905 void
877 Silvet::postProcess(const vector<double> &pitches, 906 Silvet::postProcess(const vector<double> &pitches,
878 const vector<int> &bestShifts, 907 const vector<int> &bestShifts,
879 bool wantShifts) 908 bool wantShifts)
880 { 909 {
881 const InstrumentPack &pack(getPack(m_instrument)); 910 const InstrumentPack &pack(getPack(m_instrument));
882 911
883 vector<double> filtered; 912 // Threshold for level and reduce number of candidate pitches
913
914 typedef std::multimap<double, int> ValueIndexMap;
915
916 ValueIndexMap strengths;
884 917
885 for (int j = 0; j < pack.templateNoteCount; ++j) { 918 for (int j = 0; j < pack.templateNoteCount; ++j) {
886 m_postFilter[j]->push(pitches[j]); 919
887 filtered.push_back(m_postFilter[j]->get()); 920 double strength = pitches[j];
888 } 921 if (strength < pack.levelThreshold) continue;
889 922
890 if (m_mode == LiveMode) {
891 // In live mode with only a 12-bpo CQ, we are very likely to 923 // In live mode with only a 12-bpo CQ, we are very likely to
892 // get clusters of two or three high scores at a time for 924 // get clusters of two or three high scores at a time for
893 // neighbouring semitones. Eliminate these by picking only the 925 // neighbouring semitones. Eliminate these by picking only the
894 // peaks. This means we can't recognise actual semitone chords 926 // peaks. This means we can't recognise actual semitone chords
895 // if they ever appear, but it's not as if live mode is good 927 // if they ever appear, but it's not as if live mode is good
896 // enough for that to be a big deal anyway. 928 // enough for that to be a big deal anyway.
897 for (int j = 0; j < pack.templateNoteCount; ++j) { 929 if (m_mode == LiveMode) {
898 if (j > 0 && j + 1 < pack.templateNoteCount && 930 if (j == 0 ||
899 filtered[j] >= filtered[j-1] && 931 j + 1 == pack.templateNoteCount ||
900 filtered[j] >= filtered[j+1]) { 932 pitches[j] < pitches[j-1] ||
901 } else { 933 pitches[j] < pitches[j+1]) {
902 filtered[j] = 0.0; 934 continue;
903 } 935 }
904 } 936 }
905 } 937
906
907 // Threshold for level and reduce number of candidate pitches
908
909 typedef std::multimap<double, int> ValueIndexMap;
910
911 ValueIndexMap strengths;
912
913 for (int j = 0; j < pack.templateNoteCount; ++j) {
914 double strength = filtered[j];
915 if (strength < pack.levelThreshold) continue;
916 strengths.insert(ValueIndexMap::value_type(strength, j)); 938 strengths.insert(ValueIndexMap::value_type(strength, j));
917 } 939 }
918 940
919 ValueIndexMap::const_iterator si = strengths.end(); 941 ValueIndexMap::const_iterator si = strengths.end();
920 942
939 961
940 if (wantShifts) { 962 if (wantShifts) {
941 m_pianoRollShifts.push_back(activeShifts); 963 m_pianoRollShifts.push_back(activeShifts);
942 } 964 }
943 965
944 return filtered; 966 return;
945 } 967 }
946 968
947 Vamp::Plugin::FeatureList 969 pair<Vamp::Plugin::FeatureList, Vamp::Plugin::FeatureList>
948 Silvet::noteTrack(int shiftCount) 970 Silvet::noteTrack(int shiftCount)
949 { 971 {
950 // Minimum duration pruning, and conversion to notes. We can only 972 // Minimum duration pruning, and conversion to notes. We can only
951 // report notes that have just ended (i.e. that are absent in the 973 // report notes that have just ended (i.e. that are absent in the
952 // latest active set but present in the prior set in the piano 974 // latest active set but present in the prior set in the piano
959 const map<int, double> &active = m_pianoRoll[width]; 981 const map<int, double> &active = m_pianoRoll[width];
960 982
961 double columnDuration = 1.0 / m_colsPerSec; 983 double columnDuration = 1.0 / m_colsPerSec;
962 984
963 // only keep notes >= 100ms or thereabouts 985 // only keep notes >= 100ms or thereabouts
964 int durationThreshold = floor(0.1 / columnDuration); // columns 986 double durationThrSec = 0.1;
987 if (m_mode == LiveMode) durationThrSec = 0.07;
988 int durationThreshold = floor(durationThrSec / columnDuration); // in cols
965 if (durationThreshold < 1) durationThreshold = 1; 989 if (durationThreshold < 1) durationThreshold = 1;
966 990
967 FeatureList noteFeatures; 991 FeatureList noteFeatures, onsetFeatures;
968 992
969 if (width < durationThreshold + 1) { 993 if (width < durationThreshold + 1) {
970 return noteFeatures; 994 return { noteFeatures, onsetFeatures };
971 } 995 }
972 996
973 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix) 997 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
974 998
975 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin(); 999 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
976 ni != m_pianoRoll[width-1].end(); ++ni) { 1000 ni != m_pianoRoll[width-1].end(); ++ni) {
977 1001
978 int note = ni->first; 1002 int note = ni->first;
979 1003
980 if (active.find(note) != active.end()) {
981 // the note is still playing
982 continue;
983 }
984
985 // the note was playing but just ended
986 int end = width; 1004 int end = width;
987 int start = end-1; 1005 int start = end-1;
988 1006
989 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) { 1007 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
990 --start; 1008 --start;
991 } 1009 }
992 ++start; 1010 ++start;
993 1011
994 if ((end - start) < durationThreshold) { 1012 int duration = end - start;
1013
1014 if (duration < durationThreshold) {
995 continue; 1015 continue;
996 } 1016 }
997 1017
998 emitNote(start, end, note, shiftCount, noteFeatures); 1018 if (duration == durationThreshold) {
1019 emitOnset(start, note, shiftCount, onsetFeatures);
1020 }
1021
1022 if (active.find(note) == active.end()) {
1023 // the note was playing but just ended
1024 emitNote(start, end, note, shiftCount, noteFeatures);
1025 }
999 } 1026 }
1000 1027
1001 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl; 1028 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
1002 1029
1003 return noteFeatures; 1030 return { noteFeatures, onsetFeatures };
1004 } 1031 }
1005 1032
1006 void 1033 void
1007 Silvet::emitNote(int start, int end, int note, int shiftCount, 1034 Silvet::emitNote(int start, int end, int note, int shiftCount,
1008 FeatureList &noteFeatures) 1035 FeatureList &noteFeatures)
1009 { 1036 {
1010 int partStart = start; 1037 int partStart = start;
1011 int partShift = 0; 1038 int partShift = 0;
1012 int partVelocity = 0; 1039 double partStrength = 0;
1013 1040
1014 int partThreshold = floor(0.05 * m_colsPerSec); 1041 int partThreshold = floor(0.05 * m_colsPerSec);
1015 1042
1016 for (int i = start; i != end; ++i) { 1043 for (int i = start; i != end; ++i) {
1017 1044
1035 noteFeatures.push_back(makeNoteFeature(partStart, 1062 noteFeatures.push_back(makeNoteFeature(partStart,
1036 i, 1063 i,
1037 note, 1064 note,
1038 partShift, 1065 partShift,
1039 shiftCount, 1066 shiftCount,
1040 partVelocity)); 1067 partStrength));
1041 partStart = i; 1068 partStart = i;
1042 partShift = shift; 1069 partShift = shift;
1043 partVelocity = 0; 1070 partStrength = 0;
1044 } 1071 }
1045 } 1072 }
1046 1073
1047 int v; 1074 if (strength > partStrength) {
1048 if (m_mode == LiveMode) { 1075 partStrength = strength;
1049 v = round(strength * 20);
1050 } else {
1051 v = round(strength * 2);
1052 }
1053 if (v > partVelocity) {
1054 partVelocity = v;
1055 } 1076 }
1056 } 1077 }
1057 1078
1058 if (end >= partStart + partThreshold) { 1079 if (end >= partStart + partThreshold) {
1059 noteFeatures.push_back(makeNoteFeature(partStart, 1080 noteFeatures.push_back(makeNoteFeature(partStart,
1060 end, 1081 end,
1061 note, 1082 note,
1062 partShift, 1083 partShift,
1063 shiftCount, 1084 shiftCount,
1064 partVelocity)); 1085 partStrength));
1065 } 1086 }
1087 }
1088
1089 void
1090 Silvet::emitOnset(int start, int note, int shiftCount,
1091 FeatureList &onsetFeatures)
1092 {
1093 int len = int(m_pianoRoll.size());
1094
1095 double onsetStrength = 0;
1096
1097 int shift = 0;
1098 if (shiftCount > 1) {
1099 shift = m_pianoRollShifts[start][note];
1100 }
1101
1102 for (int i = start; i < len; ++i) {
1103 double strength = m_pianoRoll[i][note];
1104 if (strength > onsetStrength) {
1105 onsetStrength = strength;
1106 }
1107 }
1108
1109 onsetFeatures.push_back(makeOnsetFeature(start,
1110 note,
1111 shift,
1112 shiftCount,
1113 onsetStrength));
1066 } 1114 }
1067 1115
1068 RealTime 1116 RealTime
1069 Silvet::getColumnTimestamp(int column) 1117 Silvet::getColumnTimestamp(int column)
1070 { 1118 {
1079 Silvet::makeNoteFeature(int start, 1127 Silvet::makeNoteFeature(int start,
1080 int end, 1128 int end,
1081 int note, 1129 int note,
1082 int shift, 1130 int shift,
1083 int shiftCount, 1131 int shiftCount,
1084 int velocity) 1132 double strength)
1085 { 1133 {
1086 Feature f; 1134 Feature f;
1087 1135
1088 f.hasTimestamp = true; 1136 f.hasTimestamp = true;
1089 f.timestamp = getColumnTimestamp(start); 1137 f.timestamp = getColumnTimestamp(start);
1090 1138
1091 f.hasDuration = true; 1139 f.hasDuration = true;
1092 f.duration = getColumnTimestamp(end) - f.timestamp; 1140 f.duration = getColumnTimestamp(end) - f.timestamp;
1093 1141
1094 f.values.clear(); 1142 f.values.clear();
1095 1143 f.values.push_back(getNoteFrequency(note, shift, shiftCount));
1096 f.values.push_back 1144 f.values.push_back(getVelocityFor(strength, start));
1097 (noteFrequency(note, shift, shiftCount)); 1145
1098 1146 f.label = getNoteName(note, shift, shiftCount);
1099 float inputGain = getInputGainAt(f.timestamp);
1100 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
1101 velocity = round(velocity / inputGain);
1102 if (velocity > 127) velocity = 127;
1103 if (velocity < 1) velocity = 1;
1104 f.values.push_back(velocity);
1105
1106 f.label = noteName(note, shift, shiftCount);
1107 1147
1108 return f; 1148 return f;
1149 }
1150
1151 Silvet::Feature
1152 Silvet::makeOnsetFeature(int start,
1153 int note,
1154 int shift,
1155 int shiftCount,
1156 double strength)
1157 {
1158 Feature f;
1159
1160 f.hasTimestamp = true;
1161 f.timestamp = getColumnTimestamp(start);
1162
1163 f.hasDuration = false;
1164
1165 f.values.clear();
1166 f.values.push_back(getNoteFrequency(note, shift, shiftCount));
1167 f.values.push_back(getVelocityFor(strength, start));
1168
1169 f.label = getNoteName(note, shift, shiftCount);
1170
1171 return f;
1172 }
1173
1174 int
1175 Silvet::getVelocityFor(double strength, int column)
1176 {
1177 RealTime rt = getColumnTimestamp(column + 1);
1178
1179 float inputGain = getInputGainAt(rt);
1180
1181 double scale = 2.0;
1182 if (m_mode == LiveMode) scale = 20.0;
1183
1184 double velocity = round((strength * scale) / inputGain);
1185
1186 if (velocity > 127.0) velocity = 127.0;
1187 if (velocity < 1.0) velocity = 1.0; // assume surpassed 0 threshold already
1188
1189 return int(velocity);
1109 } 1190 }
1110 1191
1111 float 1192 float
1112 Silvet::getInputGainAt(RealTime t) 1193 Silvet::getInputGainAt(RealTime t)
1113 { 1194 {