Mercurial > hg > silvet
comparison src/Silvet.cpp @ 331:e8e37f471650 livemode
Merge
author | Chris Cannam |
---|---|
date | Wed, 29 Apr 2015 18:58:00 +0100 |
parents | 8f5cfd7dbaa5 6f8fa7fc8fdc |
children | 19c17cd0c7d8 |
comparison
equal
deleted
inserted
replaced
330:8f5cfd7dbaa5 | 331:e8e37f471650 |
---|---|
249 d.isQuantized = false; | 249 d.isQuantized = false; |
250 d.sampleType = OutputDescriptor::VariableSampleRate; | 250 d.sampleType = OutputDescriptor::VariableSampleRate; |
251 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62); | 251 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62); |
252 d.hasDuration = true; | 252 d.hasDuration = true; |
253 m_notesOutputNo = list.size(); | 253 m_notesOutputNo = list.size(); |
254 list.push_back(d); | |
255 | |
256 d.identifier = "onsets"; | |
257 d.name = "Note onsets"; | |
258 d.description = "Note onsets, without durations. These can be calculated sooner than complete notes, because it isn't necessary to wait for a note to finish before returning its feature. Each event has time, estimated fundamental frequency in Hz, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture."; | |
259 d.unit = "Hz"; | |
260 d.hasFixedBinCount = true; | |
261 d.binCount = 2; | |
262 d.binNames.push_back("Frequency"); | |
263 d.binNames.push_back("Velocity"); | |
264 d.hasKnownExtents = false; | |
265 d.isQuantized = false; | |
266 d.sampleType = OutputDescriptor::VariableSampleRate; | |
267 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62); | |
268 d.hasDuration = false; | |
269 m_onsetsOutputNo = list.size(); | |
254 list.push_back(d); | 270 list.push_back(d); |
255 | 271 |
256 d.identifier = "timefreq"; | 272 d.identifier = "timefreq"; |
257 d.name = "Time-frequency distribution"; | 273 d.name = "Time-frequency distribution"; |
258 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm."; | 274 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm."; |
289 d.hasFixedBinCount = true; | 305 d.hasFixedBinCount = true; |
290 d.binCount = getPack(0).templateNoteCount; | 306 d.binCount = getPack(0).templateNoteCount; |
291 d.binNames.clear(); | 307 d.binNames.clear(); |
292 if (m_cq) { | 308 if (m_cq) { |
293 for (int i = 0; i < getPack(0).templateNoteCount; ++i) { | 309 for (int i = 0; i < getPack(0).templateNoteCount; ++i) { |
294 d.binNames.push_back(noteName(i, 0, 1)); | 310 d.binNames.push_back(getNoteName(i, 0, 1)); |
295 } | 311 } |
296 } | 312 } |
297 d.hasKnownExtents = false; | 313 d.hasKnownExtents = false; |
298 d.isQuantized = false; | 314 d.isQuantized = false; |
299 d.sampleType = OutputDescriptor::FixedSampleRate; | 315 d.sampleType = OutputDescriptor::FixedSampleRate; |
309 d.hasFixedBinCount = true; | 325 d.hasFixedBinCount = true; |
310 d.binCount = 12; | 326 d.binCount = 12; |
311 d.binNames.clear(); | 327 d.binNames.clear(); |
312 if (m_cq) { | 328 if (m_cq) { |
313 for (int i = 0; i < 12; ++i) { | 329 for (int i = 0; i < 12; ++i) { |
314 d.binNames.push_back(chromaName(i)); | 330 d.binNames.push_back(getChromaName(i)); |
315 } | 331 } |
316 } | 332 } |
317 d.hasKnownExtents = false; | 333 d.hasKnownExtents = false; |
318 d.isQuantized = false; | 334 d.isQuantized = false; |
319 d.sampleType = OutputDescriptor::FixedSampleRate; | 335 d.sampleType = OutputDescriptor::FixedSampleRate; |
353 | 369 |
354 return list; | 370 return list; |
355 } | 371 } |
356 | 372 |
357 std::string | 373 std::string |
358 Silvet::chromaName(int pitch) const | 374 Silvet::getChromaName(int pitch) const |
359 { | 375 { |
360 static const char *names[] = { | 376 static const char *names[] = { |
361 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" | 377 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" |
362 }; | 378 }; |
363 | 379 |
364 return names[pitch]; | 380 return names[pitch]; |
365 } | 381 } |
366 | 382 |
367 std::string | 383 std::string |
368 Silvet::noteName(int note, int shift, int shiftCount) const | 384 Silvet::getNoteName(int note, int shift, int shiftCount) const |
369 { | 385 { |
370 string n = chromaName(note % 12); | 386 string n = getChromaName(note % 12); |
371 | 387 |
372 int oct = (note + 9) / 12; | 388 int oct = (note + 9) / 12; |
373 | 389 |
374 char buf[30]; | 390 char buf[30]; |
375 | 391 |
376 float pshift = 0.f; | 392 float pshift = 0.f; |
377 if (shiftCount > 1) { | 393 if (shiftCount > 1) { |
378 // see noteFrequency below | 394 // see getNoteFrequency below |
379 pshift = | 395 pshift = |
380 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; | 396 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; |
381 } | 397 } |
382 | 398 |
383 if (pshift > 0.f) { | 399 if (pshift > 0.f) { |
390 | 406 |
391 return buf; | 407 return buf; |
392 } | 408 } |
393 | 409 |
394 float | 410 float |
395 Silvet::noteFrequency(int note, int shift, int shiftCount) const | 411 Silvet::getNoteFrequency(int note, int shift, int shiftCount) const |
396 { | 412 { |
397 // Convert shift number to a pitch shift. The given shift number | 413 // Convert shift number to a pitch shift. The given shift number |
398 // is an offset into the template array, which starts with some | 414 // is an offset into the template array, which starts with some |
399 // zeros, followed by the template, then some trailing zeros. | 415 // zeros, followed by the template, then some trailing zeros. |
400 // | 416 // |
692 } | 708 } |
693 } | 709 } |
694 | 710 |
695 for (int i = 0; i < width; ++i) { | 711 for (int i = 0; i < width; ++i) { |
696 | 712 |
697 // This returns a filtered column, and pushes the | 713 vector<double> filtered; |
698 // up-to-max-polyphony activation column to m_pianoRoll | 714 |
699 vector<double> filtered = postProcess | 715 for (int j = 0; j < pack.templateNoteCount; ++j) { |
700 (localPitches[i], localBestShifts[i], wantShifts); | 716 m_postFilter[j]->push(localPitches[i][j]); |
717 filtered.push_back(m_postFilter[j]->get()); | |
718 } | |
701 | 719 |
702 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1); | 720 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1); |
703 float inputGain = getInputGainAt(timestamp); | 721 float inputGain = getInputGainAt(timestamp); |
704 | 722 |
705 Feature f; | 723 Feature f; |
714 f.values.resize(12); | 732 f.values.resize(12); |
715 for (int j = 0; j < (int)filtered.size(); ++j) { | 733 for (int j = 0; j < (int)filtered.size(); ++j) { |
716 f.values[j % 12] += filtered[j] / inputGain; | 734 f.values[j % 12] += filtered[j] / inputGain; |
717 } | 735 } |
718 fs[m_chromaOutputNo].push_back(f); | 736 fs[m_chromaOutputNo].push_back(f); |
719 | 737 |
720 FeatureList noteFeatures = noteTrack(shiftCount); | 738 // This pushes the up-to-max-polyphony activation column to |
721 | 739 // m_pianoRoll |
740 postProcess(filtered, localBestShifts[i], wantShifts); | |
741 | |
742 auto events = noteTrack(shiftCount); | |
743 | |
744 FeatureList noteFeatures = events.first; | |
722 for (FeatureList::const_iterator fi = noteFeatures.begin(); | 745 for (FeatureList::const_iterator fi = noteFeatures.begin(); |
723 fi != noteFeatures.end(); ++fi) { | 746 fi != noteFeatures.end(); ++fi) { |
724 fs[m_notesOutputNo].push_back(*fi); | 747 fs[m_notesOutputNo].push_back(*fi); |
748 } | |
749 | |
750 FeatureList onsetFeatures = events.second; | |
751 for (FeatureList::const_iterator fi = onsetFeatures.begin(); | |
752 fi != onsetFeatures.end(); ++fi) { | |
753 fs[m_onsetsOutputNo].push_back(*fi); | |
725 } | 754 } |
726 } | 755 } |
727 } | 756 } |
728 | 757 |
729 pair<vector<double>, vector<int> > | 758 pair<vector<double>, vector<int> > |
871 } | 900 } |
872 | 901 |
873 return out; | 902 return out; |
874 } | 903 } |
875 | 904 |
876 vector<double> | 905 void |
877 Silvet::postProcess(const vector<double> &pitches, | 906 Silvet::postProcess(const vector<double> &pitches, |
878 const vector<int> &bestShifts, | 907 const vector<int> &bestShifts, |
879 bool wantShifts) | 908 bool wantShifts) |
880 { | 909 { |
881 const InstrumentPack &pack(getPack(m_instrument)); | 910 const InstrumentPack &pack(getPack(m_instrument)); |
882 | 911 |
883 vector<double> filtered; | 912 // Threshold for level and reduce number of candidate pitches |
913 | |
914 typedef std::multimap<double, int> ValueIndexMap; | |
915 | |
916 ValueIndexMap strengths; | |
884 | 917 |
885 for (int j = 0; j < pack.templateNoteCount; ++j) { | 918 for (int j = 0; j < pack.templateNoteCount; ++j) { |
886 m_postFilter[j]->push(pitches[j]); | 919 |
887 filtered.push_back(m_postFilter[j]->get()); | 920 double strength = pitches[j]; |
888 } | 921 if (strength < pack.levelThreshold) continue; |
889 | 922 |
890 if (m_mode == LiveMode) { | |
891 // In live mode with only a 12-bpo CQ, we are very likely to | 923 // In live mode with only a 12-bpo CQ, we are very likely to |
892 // get clusters of two or three high scores at a time for | 924 // get clusters of two or three high scores at a time for |
893 // neighbouring semitones. Eliminate these by picking only the | 925 // neighbouring semitones. Eliminate these by picking only the |
894 // peaks. This means we can't recognise actual semitone chords | 926 // peaks. This means we can't recognise actual semitone chords |
895 // if they ever appear, but it's not as if live mode is good | 927 // if they ever appear, but it's not as if live mode is good |
896 // enough for that to be a big deal anyway. | 928 // enough for that to be a big deal anyway. |
897 for (int j = 0; j < pack.templateNoteCount; ++j) { | 929 if (m_mode == LiveMode) { |
898 if (j > 0 && j + 1 < pack.templateNoteCount && | 930 if (j == 0 || |
899 filtered[j] >= filtered[j-1] && | 931 j + 1 == pack.templateNoteCount || |
900 filtered[j] >= filtered[j+1]) { | 932 pitches[j] < pitches[j-1] || |
901 } else { | 933 pitches[j] < pitches[j+1]) { |
902 filtered[j] = 0.0; | 934 continue; |
903 } | 935 } |
904 } | 936 } |
905 } | 937 |
906 | |
907 // Threshold for level and reduce number of candidate pitches | |
908 | |
909 typedef std::multimap<double, int> ValueIndexMap; | |
910 | |
911 ValueIndexMap strengths; | |
912 | |
913 for (int j = 0; j < pack.templateNoteCount; ++j) { | |
914 double strength = filtered[j]; | |
915 if (strength < pack.levelThreshold) continue; | |
916 strengths.insert(ValueIndexMap::value_type(strength, j)); | 938 strengths.insert(ValueIndexMap::value_type(strength, j)); |
917 } | 939 } |
918 | 940 |
919 ValueIndexMap::const_iterator si = strengths.end(); | 941 ValueIndexMap::const_iterator si = strengths.end(); |
920 | 942 |
939 | 961 |
940 if (wantShifts) { | 962 if (wantShifts) { |
941 m_pianoRollShifts.push_back(activeShifts); | 963 m_pianoRollShifts.push_back(activeShifts); |
942 } | 964 } |
943 | 965 |
944 return filtered; | 966 return; |
945 } | 967 } |
946 | 968 |
947 Vamp::Plugin::FeatureList | 969 pair<Vamp::Plugin::FeatureList, Vamp::Plugin::FeatureList> |
948 Silvet::noteTrack(int shiftCount) | 970 Silvet::noteTrack(int shiftCount) |
949 { | 971 { |
950 // Minimum duration pruning, and conversion to notes. We can only | 972 // Minimum duration pruning, and conversion to notes. We can only |
951 // report notes that have just ended (i.e. that are absent in the | 973 // report notes that have just ended (i.e. that are absent in the |
952 // latest active set but present in the prior set in the piano | 974 // latest active set but present in the prior set in the piano |
959 const map<int, double> &active = m_pianoRoll[width]; | 981 const map<int, double> &active = m_pianoRoll[width]; |
960 | 982 |
961 double columnDuration = 1.0 / m_colsPerSec; | 983 double columnDuration = 1.0 / m_colsPerSec; |
962 | 984 |
963 // only keep notes >= 100ms or thereabouts | 985 // only keep notes >= 100ms or thereabouts |
964 int durationThreshold = floor(0.1 / columnDuration); // columns | 986 double durationThrSec = 0.1; |
987 if (m_mode == LiveMode) durationThrSec = 0.07; | |
988 int durationThreshold = floor(durationThrSec / columnDuration); // in cols | |
965 if (durationThreshold < 1) durationThreshold = 1; | 989 if (durationThreshold < 1) durationThreshold = 1; |
966 | 990 |
967 FeatureList noteFeatures; | 991 FeatureList noteFeatures, onsetFeatures; |
968 | 992 |
969 if (width < durationThreshold + 1) { | 993 if (width < durationThreshold + 1) { |
970 return noteFeatures; | 994 return { noteFeatures, onsetFeatures }; |
971 } | 995 } |
972 | 996 |
973 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix) | 997 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix) |
974 | 998 |
975 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin(); | 999 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin(); |
976 ni != m_pianoRoll[width-1].end(); ++ni) { | 1000 ni != m_pianoRoll[width-1].end(); ++ni) { |
977 | 1001 |
978 int note = ni->first; | 1002 int note = ni->first; |
979 | 1003 |
980 if (active.find(note) != active.end()) { | |
981 // the note is still playing | |
982 continue; | |
983 } | |
984 | |
985 // the note was playing but just ended | |
986 int end = width; | 1004 int end = width; |
987 int start = end-1; | 1005 int start = end-1; |
988 | 1006 |
989 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) { | 1007 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) { |
990 --start; | 1008 --start; |
991 } | 1009 } |
992 ++start; | 1010 ++start; |
993 | 1011 |
994 if ((end - start) < durationThreshold) { | 1012 int duration = end - start; |
1013 | |
1014 if (duration < durationThreshold) { | |
995 continue; | 1015 continue; |
996 } | 1016 } |
997 | 1017 |
998 emitNote(start, end, note, shiftCount, noteFeatures); | 1018 if (duration == durationThreshold) { |
1019 emitOnset(start, note, shiftCount, onsetFeatures); | |
1020 } | |
1021 | |
1022 if (active.find(note) == active.end()) { | |
1023 // the note was playing but just ended | |
1024 emitNote(start, end, note, shiftCount, noteFeatures); | |
1025 } | |
999 } | 1026 } |
1000 | 1027 |
1001 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl; | 1028 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl; |
1002 | 1029 |
1003 return noteFeatures; | 1030 return { noteFeatures, onsetFeatures }; |
1004 } | 1031 } |
1005 | 1032 |
1006 void | 1033 void |
1007 Silvet::emitNote(int start, int end, int note, int shiftCount, | 1034 Silvet::emitNote(int start, int end, int note, int shiftCount, |
1008 FeatureList ¬eFeatures) | 1035 FeatureList ¬eFeatures) |
1009 { | 1036 { |
1010 int partStart = start; | 1037 int partStart = start; |
1011 int partShift = 0; | 1038 int partShift = 0; |
1012 int partVelocity = 0; | 1039 double partStrength = 0; |
1013 | 1040 |
1014 int partThreshold = floor(0.05 * m_colsPerSec); | 1041 int partThreshold = floor(0.05 * m_colsPerSec); |
1015 | 1042 |
1016 for (int i = start; i != end; ++i) { | 1043 for (int i = start; i != end; ++i) { |
1017 | 1044 |
1035 noteFeatures.push_back(makeNoteFeature(partStart, | 1062 noteFeatures.push_back(makeNoteFeature(partStart, |
1036 i, | 1063 i, |
1037 note, | 1064 note, |
1038 partShift, | 1065 partShift, |
1039 shiftCount, | 1066 shiftCount, |
1040 partVelocity)); | 1067 partStrength)); |
1041 partStart = i; | 1068 partStart = i; |
1042 partShift = shift; | 1069 partShift = shift; |
1043 partVelocity = 0; | 1070 partStrength = 0; |
1044 } | 1071 } |
1045 } | 1072 } |
1046 | 1073 |
1047 int v; | 1074 if (strength > partStrength) { |
1048 if (m_mode == LiveMode) { | 1075 partStrength = strength; |
1049 v = round(strength * 20); | |
1050 } else { | |
1051 v = round(strength * 2); | |
1052 } | |
1053 if (v > partVelocity) { | |
1054 partVelocity = v; | |
1055 } | 1076 } |
1056 } | 1077 } |
1057 | 1078 |
1058 if (end >= partStart + partThreshold) { | 1079 if (end >= partStart + partThreshold) { |
1059 noteFeatures.push_back(makeNoteFeature(partStart, | 1080 noteFeatures.push_back(makeNoteFeature(partStart, |
1060 end, | 1081 end, |
1061 note, | 1082 note, |
1062 partShift, | 1083 partShift, |
1063 shiftCount, | 1084 shiftCount, |
1064 partVelocity)); | 1085 partStrength)); |
1065 } | 1086 } |
1087 } | |
1088 | |
1089 void | |
1090 Silvet::emitOnset(int start, int note, int shiftCount, | |
1091 FeatureList &onsetFeatures) | |
1092 { | |
1093 int len = int(m_pianoRoll.size()); | |
1094 | |
1095 double onsetStrength = 0; | |
1096 | |
1097 int shift = 0; | |
1098 if (shiftCount > 1) { | |
1099 shift = m_pianoRollShifts[start][note]; | |
1100 } | |
1101 | |
1102 for (int i = start; i < len; ++i) { | |
1103 double strength = m_pianoRoll[i][note]; | |
1104 if (strength > onsetStrength) { | |
1105 onsetStrength = strength; | |
1106 } | |
1107 } | |
1108 | |
1109 onsetFeatures.push_back(makeOnsetFeature(start, | |
1110 note, | |
1111 shift, | |
1112 shiftCount, | |
1113 onsetStrength)); | |
1066 } | 1114 } |
1067 | 1115 |
1068 RealTime | 1116 RealTime |
1069 Silvet::getColumnTimestamp(int column) | 1117 Silvet::getColumnTimestamp(int column) |
1070 { | 1118 { |
1079 Silvet::makeNoteFeature(int start, | 1127 Silvet::makeNoteFeature(int start, |
1080 int end, | 1128 int end, |
1081 int note, | 1129 int note, |
1082 int shift, | 1130 int shift, |
1083 int shiftCount, | 1131 int shiftCount, |
1084 int velocity) | 1132 double strength) |
1085 { | 1133 { |
1086 Feature f; | 1134 Feature f; |
1087 | 1135 |
1088 f.hasTimestamp = true; | 1136 f.hasTimestamp = true; |
1089 f.timestamp = getColumnTimestamp(start); | 1137 f.timestamp = getColumnTimestamp(start); |
1090 | 1138 |
1091 f.hasDuration = true; | 1139 f.hasDuration = true; |
1092 f.duration = getColumnTimestamp(end) - f.timestamp; | 1140 f.duration = getColumnTimestamp(end) - f.timestamp; |
1093 | 1141 |
1094 f.values.clear(); | 1142 f.values.clear(); |
1095 | 1143 f.values.push_back(getNoteFrequency(note, shift, shiftCount)); |
1096 f.values.push_back | 1144 f.values.push_back(getVelocityFor(strength, start)); |
1097 (noteFrequency(note, shift, shiftCount)); | 1145 |
1098 | 1146 f.label = getNoteName(note, shift, shiftCount); |
1099 float inputGain = getInputGainAt(f.timestamp); | |
1100 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl; | |
1101 velocity = round(velocity / inputGain); | |
1102 if (velocity > 127) velocity = 127; | |
1103 if (velocity < 1) velocity = 1; | |
1104 f.values.push_back(velocity); | |
1105 | |
1106 f.label = noteName(note, shift, shiftCount); | |
1107 | 1147 |
1108 return f; | 1148 return f; |
1149 } | |
1150 | |
1151 Silvet::Feature | |
1152 Silvet::makeOnsetFeature(int start, | |
1153 int note, | |
1154 int shift, | |
1155 int shiftCount, | |
1156 double strength) | |
1157 { | |
1158 Feature f; | |
1159 | |
1160 f.hasTimestamp = true; | |
1161 f.timestamp = getColumnTimestamp(start); | |
1162 | |
1163 f.hasDuration = false; | |
1164 | |
1165 f.values.clear(); | |
1166 f.values.push_back(getNoteFrequency(note, shift, shiftCount)); | |
1167 f.values.push_back(getVelocityFor(strength, start)); | |
1168 | |
1169 f.label = getNoteName(note, shift, shiftCount); | |
1170 | |
1171 return f; | |
1172 } | |
1173 | |
1174 int | |
1175 Silvet::getVelocityFor(double strength, int column) | |
1176 { | |
1177 RealTime rt = getColumnTimestamp(column + 1); | |
1178 | |
1179 float inputGain = getInputGainAt(rt); | |
1180 | |
1181 double scale = 2.0; | |
1182 if (m_mode == LiveMode) scale = 20.0; | |
1183 | |
1184 double velocity = round((strength * scale) / inputGain); | |
1185 | |
1186 if (velocity > 127.0) velocity = 127.0; | |
1187 if (velocity < 1.0) velocity = 1.0; // assume surpassed 0 threshold already | |
1188 | |
1189 return int(velocity); | |
1109 } | 1190 } |
1110 | 1191 |
1111 float | 1192 float |
1112 Silvet::getInputGainAt(RealTime t) | 1193 Silvet::getInputGainAt(RealTime t) |
1113 { | 1194 { |