Chris@31
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@31
|
2
|
Chris@31
|
3 /*
|
Chris@31
|
4 Silvet
|
Chris@31
|
5
|
Chris@31
|
6 A Vamp plugin for note transcription.
|
Chris@31
|
7 Centre for Digital Music, Queen Mary University of London.
|
Chris@31
|
8
|
Chris@31
|
9 This program is free software; you can redistribute it and/or
|
Chris@31
|
10 modify it under the terms of the GNU General Public License as
|
Chris@31
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@31
|
12 License, or (at your option) any later version. See the file
|
Chris@31
|
13 COPYING included with this distribution for more information.
|
Chris@31
|
14 */
|
Chris@31
|
15
|
Chris@31
|
16 #include "Silvet.h"
|
Chris@34
|
17 #include "EM.h"
|
Chris@31
|
18
|
Chris@152
|
19 #include <cq/CQSpectrogram.h>
|
Chris@31
|
20
|
Chris@152
|
21 #include "MedianFilter.h"
|
Chris@152
|
22 #include "constant-q-cpp/src/dsp/Resampler.h"
|
Chris@246
|
23 #include "flattendynamics-ladspa.h"
|
Chris@298
|
24 #include "LiveInstruments.h"
|
Chris@31
|
25
|
Chris@31
|
26 #include <vector>
|
Chris@312
|
27 #include <future>
|
Chris@31
|
28
|
Chris@32
|
29 #include <cstdio>
|
Chris@32
|
30
|
Chris@31
|
31 using std::vector;
|
Chris@48
|
32 using std::cout;
|
Chris@31
|
33 using std::cerr;
|
Chris@31
|
34 using std::endl;
|
Chris@311
|
35 using std::pair;
|
Chris@312
|
36 using std::future;
|
Chris@312
|
37 using std::async;
|
Chris@40
|
38 using Vamp::RealTime;
|
Chris@31
|
39
|
Chris@31
|
40 static int processingSampleRate = 44100;
|
Chris@298
|
41
|
Chris@298
|
42 static int binsPerSemitoneLive = 1;
|
Chris@298
|
43 static int binsPerSemitoneNormal = 5;
|
Chris@170
|
44
|
Chris@272
|
45 static int minInputSampleRate = 100;
|
Chris@272
|
46 static int maxInputSampleRate = 192000;
|
Chris@272
|
47
|
Chris@31
|
48 Silvet::Silvet(float inputSampleRate) :
|
Chris@31
|
49 Plugin(inputSampleRate),
|
Chris@161
|
50 m_instruments(InstrumentPack::listInstrumentPacks()),
|
Chris@298
|
51 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
|
Chris@31
|
52 m_resampler(0),
|
Chris@246
|
53 m_flattener(0),
|
Chris@110
|
54 m_cq(0),
|
Chris@297
|
55 m_mode(HighQualityMode),
|
Chris@166
|
56 m_fineTuning(false),
|
Chris@178
|
57 m_instrument(0),
|
Chris@313
|
58 m_colsPerSec(50),
|
Chris@313
|
59 m_haveStartTime(false)
|
Chris@31
|
60 {
|
Chris@31
|
61 }
|
Chris@31
|
62
|
Chris@31
|
63 Silvet::~Silvet()
|
Chris@31
|
64 {
|
Chris@31
|
65 delete m_resampler;
|
Chris@246
|
66 delete m_flattener;
|
Chris@31
|
67 delete m_cq;
|
Chris@41
|
68 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
69 delete m_postFilter[i];
|
Chris@41
|
70 }
|
Chris@31
|
71 }
|
Chris@31
|
72
|
Chris@31
|
73 string
|
Chris@31
|
74 Silvet::getIdentifier() const
|
Chris@31
|
75 {
|
Chris@31
|
76 return "silvet";
|
Chris@31
|
77 }
|
Chris@31
|
78
|
Chris@31
|
79 string
|
Chris@31
|
80 Silvet::getName() const
|
Chris@31
|
81 {
|
Chris@31
|
82 return "Silvet Note Transcription";
|
Chris@31
|
83 }
|
Chris@31
|
84
|
Chris@31
|
85 string
|
Chris@31
|
86 Silvet::getDescription() const
|
Chris@31
|
87 {
|
Chris@191
|
88 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
|
Chris@31
|
89 }
|
Chris@31
|
90
|
Chris@31
|
91 string
|
Chris@31
|
92 Silvet::getMaker() const
|
Chris@31
|
93 {
|
Chris@191
|
94 return "Queen Mary, University of London";
|
Chris@31
|
95 }
|
Chris@31
|
96
|
Chris@31
|
97 int
|
Chris@31
|
98 Silvet::getPluginVersion() const
|
Chris@31
|
99 {
|
Chris@309
|
100 return 3;
|
Chris@31
|
101 }
|
Chris@31
|
102
|
Chris@31
|
103 string
|
Chris@31
|
104 Silvet::getCopyright() const
|
Chris@31
|
105 {
|
Chris@191
|
106 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
|
Chris@31
|
107 }
|
Chris@31
|
108
|
Chris@31
|
109 Silvet::InputDomain
|
Chris@31
|
110 Silvet::getInputDomain() const
|
Chris@31
|
111 {
|
Chris@31
|
112 return TimeDomain;
|
Chris@31
|
113 }
|
Chris@31
|
114
|
Chris@31
|
115 size_t
|
Chris@31
|
116 Silvet::getPreferredBlockSize() const
|
Chris@31
|
117 {
|
Chris@31
|
118 return 0;
|
Chris@31
|
119 }
|
Chris@31
|
120
|
Chris@31
|
121 size_t
|
Chris@31
|
122 Silvet::getPreferredStepSize() const
|
Chris@31
|
123 {
|
Chris@31
|
124 return 0;
|
Chris@31
|
125 }
|
Chris@31
|
126
|
Chris@31
|
127 size_t
|
Chris@31
|
128 Silvet::getMinChannelCount() const
|
Chris@31
|
129 {
|
Chris@31
|
130 return 1;
|
Chris@31
|
131 }
|
Chris@31
|
132
|
Chris@31
|
133 size_t
|
Chris@31
|
134 Silvet::getMaxChannelCount() const
|
Chris@31
|
135 {
|
Chris@31
|
136 return 1;
|
Chris@31
|
137 }
|
Chris@31
|
138
|
Chris@31
|
139 Silvet::ParameterList
|
Chris@31
|
140 Silvet::getParameterDescriptors() const
|
Chris@31
|
141 {
|
Chris@31
|
142 ParameterList list;
|
Chris@110
|
143
|
Chris@110
|
144 ParameterDescriptor desc;
|
Chris@110
|
145 desc.identifier = "mode";
|
Chris@110
|
146 desc.name = "Processing mode";
|
Chris@110
|
147 desc.unit = "";
|
Chris@297
|
148 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
|
Chris@110
|
149 desc.minValue = 0;
|
Chris@297
|
150 desc.maxValue = 2;
|
Chris@113
|
151 desc.defaultValue = 1;
|
Chris@110
|
152 desc.isQuantized = true;
|
Chris@110
|
153 desc.quantizeStep = 1;
|
Chris@166
|
154 desc.valueNames.push_back("Draft (faster)");
|
Chris@165
|
155 desc.valueNames.push_back("Intensive (higher quality)");
|
Chris@297
|
156 desc.valueNames.push_back("Live (lower latency)");
|
Chris@161
|
157 list.push_back(desc);
|
Chris@161
|
158
|
Chris@176
|
159 desc.identifier = "instrument";
|
Chris@176
|
160 desc.name = "Instrument";
|
Chris@161
|
161 desc.unit = "";
|
Chris@271
|
162 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
|
Chris@161
|
163 desc.minValue = 0;
|
Chris@162
|
164 desc.maxValue = m_instruments.size()-1;
|
Chris@162
|
165 desc.defaultValue = 0;
|
Chris@161
|
166 desc.isQuantized = true;
|
Chris@161
|
167 desc.quantizeStep = 1;
|
Chris@161
|
168 desc.valueNames.clear();
|
Chris@162
|
169 for (int i = 0; i < int(m_instruments.size()); ++i) {
|
Chris@162
|
170 desc.valueNames.push_back(m_instruments[i].name);
|
Chris@162
|
171 }
|
Chris@166
|
172 list.push_back(desc);
|
Chris@161
|
173
|
Chris@166
|
174 desc.identifier = "finetune";
|
Chris@166
|
175 desc.name = "Return fine pitch estimates";
|
Chris@166
|
176 desc.unit = "";
|
Chris@271
|
177 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
|
Chris@166
|
178 desc.minValue = 0;
|
Chris@166
|
179 desc.maxValue = 1;
|
Chris@166
|
180 desc.defaultValue = 0;
|
Chris@166
|
181 desc.isQuantized = true;
|
Chris@166
|
182 desc.quantizeStep = 1;
|
Chris@166
|
183 desc.valueNames.clear();
|
Chris@110
|
184 list.push_back(desc);
|
Chris@110
|
185
|
Chris@31
|
186 return list;
|
Chris@31
|
187 }
|
Chris@31
|
188
|
Chris@31
|
189 float
|
Chris@31
|
190 Silvet::getParameter(string identifier) const
|
Chris@31
|
191 {
|
Chris@110
|
192 if (identifier == "mode") {
|
Chris@297
|
193 return (float)(int)m_mode;
|
Chris@166
|
194 } else if (identifier == "finetune") {
|
Chris@166
|
195 return m_fineTuning ? 1.f : 0.f;
|
Chris@176
|
196 } else if (identifier == "instrument") {
|
Chris@162
|
197 return m_instrument;
|
Chris@110
|
198 }
|
Chris@31
|
199 return 0;
|
Chris@31
|
200 }
|
Chris@31
|
201
|
Chris@31
|
202 void
|
Chris@31
|
203 Silvet::setParameter(string identifier, float value)
|
Chris@31
|
204 {
|
Chris@110
|
205 if (identifier == "mode") {
|
Chris@297
|
206 m_mode = (ProcessingMode)(int)(value + 0.5);
|
Chris@166
|
207 } else if (identifier == "finetune") {
|
Chris@166
|
208 m_fineTuning = (value > 0.5);
|
Chris@176
|
209 } else if (identifier == "instrument") {
|
Chris@162
|
210 m_instrument = lrintf(value);
|
Chris@110
|
211 }
|
Chris@31
|
212 }
|
Chris@31
|
213
|
Chris@31
|
214 Silvet::ProgramList
|
Chris@31
|
215 Silvet::getPrograms() const
|
Chris@31
|
216 {
|
Chris@31
|
217 ProgramList list;
|
Chris@31
|
218 return list;
|
Chris@31
|
219 }
|
Chris@31
|
220
|
Chris@31
|
221 string
|
Chris@31
|
222 Silvet::getCurrentProgram() const
|
Chris@31
|
223 {
|
Chris@31
|
224 return "";
|
Chris@31
|
225 }
|
Chris@31
|
226
|
Chris@31
|
227 void
|
Chris@31
|
228 Silvet::selectProgram(string name)
|
Chris@31
|
229 {
|
Chris@31
|
230 }
|
Chris@31
|
231
|
Chris@31
|
232 Silvet::OutputList
|
Chris@31
|
233 Silvet::getOutputDescriptors() const
|
Chris@31
|
234 {
|
Chris@31
|
235 OutputList list;
|
Chris@31
|
236
|
Chris@31
|
237 OutputDescriptor d;
|
Chris@51
|
238 d.identifier = "notes";
|
Chris@51
|
239 d.name = "Note transcription";
|
Chris@271
|
240 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
|
Chris@41
|
241 d.unit = "Hz";
|
Chris@31
|
242 d.hasFixedBinCount = true;
|
Chris@31
|
243 d.binCount = 2;
|
Chris@41
|
244 d.binNames.push_back("Frequency");
|
Chris@31
|
245 d.binNames.push_back("Velocity");
|
Chris@31
|
246 d.hasKnownExtents = false;
|
Chris@31
|
247 d.isQuantized = false;
|
Chris@31
|
248 d.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@246
|
249 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
|
Chris@31
|
250 d.hasDuration = true;
|
Chris@32
|
251 m_notesOutputNo = list.size();
|
Chris@32
|
252 list.push_back(d);
|
Chris@32
|
253
|
Chris@178
|
254 d.identifier = "timefreq";
|
Chris@178
|
255 d.name = "Time-frequency distribution";
|
Chris@271
|
256 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
|
Chris@178
|
257 d.unit = "";
|
Chris@178
|
258 d.hasFixedBinCount = true;
|
Chris@298
|
259 d.binCount = getPack(0).templateHeight;
|
Chris@178
|
260 d.binNames.clear();
|
Chris@178
|
261 if (m_cq) {
|
Chris@294
|
262 char name[50];
|
Chris@298
|
263 for (int i = 0; i < getPack(0).templateHeight; ++i) {
|
Chris@178
|
264 // We have a 600-bin (10 oct 60-bin CQ) of which the
|
Chris@178
|
265 // lowest-frequency 55 bins have been dropped, for a
|
Chris@178
|
266 // 545-bin template. The native CQ bins go high->low
|
Chris@178
|
267 // frequency though, so these are still the first 545 bins
|
Chris@178
|
268 // as reported by getBinFrequency, though in reverse order
|
Chris@178
|
269 float freq = m_cq->getBinFrequency
|
Chris@298
|
270 (getPack(0).templateHeight - i - 1);
|
Chris@178
|
271 sprintf(name, "%.1f Hz", freq);
|
Chris@178
|
272 d.binNames.push_back(name);
|
Chris@178
|
273 }
|
Chris@178
|
274 }
|
Chris@178
|
275 d.hasKnownExtents = false;
|
Chris@178
|
276 d.isQuantized = false;
|
Chris@178
|
277 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@178
|
278 d.sampleRate = m_colsPerSec;
|
Chris@178
|
279 d.hasDuration = false;
|
Chris@178
|
280 m_fcqOutputNo = list.size();
|
Chris@178
|
281 list.push_back(d);
|
Chris@178
|
282
|
Chris@294
|
283 d.identifier = "pitchactivation";
|
Chris@294
|
284 d.name = "Pitch activation distribution";
|
Chris@294
|
285 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
|
Chris@294
|
286 d.unit = "";
|
Chris@294
|
287 d.hasFixedBinCount = true;
|
Chris@298
|
288 d.binCount = getPack(0).templateNoteCount;
|
Chris@294
|
289 d.binNames.clear();
|
Chris@294
|
290 if (m_cq) {
|
Chris@298
|
291 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
|
Chris@294
|
292 d.binNames.push_back(noteName(i, 0, 1));
|
Chris@294
|
293 }
|
Chris@294
|
294 }
|
Chris@294
|
295 d.hasKnownExtents = false;
|
Chris@294
|
296 d.isQuantized = false;
|
Chris@294
|
297 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@294
|
298 d.sampleRate = m_colsPerSec;
|
Chris@294
|
299 d.hasDuration = false;
|
Chris@294
|
300 m_pitchOutputNo = list.size();
|
Chris@294
|
301 list.push_back(d);
|
Chris@294
|
302
|
Chris@309
|
303 d.identifier = "chroma";
|
Chris@309
|
304 d.name = "Pitch chroma distribution";
|
Chris@309
|
305 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
|
Chris@309
|
306 d.unit = "";
|
Chris@309
|
307 d.hasFixedBinCount = true;
|
Chris@309
|
308 d.binCount = 12;
|
Chris@309
|
309 d.binNames.clear();
|
Chris@309
|
310 if (m_cq) {
|
Chris@309
|
311 for (int i = 0; i < 12; ++i) {
|
Chris@309
|
312 d.binNames.push_back(chromaName(i));
|
Chris@309
|
313 }
|
Chris@309
|
314 }
|
Chris@309
|
315 d.hasKnownExtents = false;
|
Chris@309
|
316 d.isQuantized = false;
|
Chris@309
|
317 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@309
|
318 d.sampleRate = m_colsPerSec;
|
Chris@309
|
319 d.hasDuration = false;
|
Chris@309
|
320 m_chromaOutputNo = list.size();
|
Chris@309
|
321 list.push_back(d);
|
Chris@309
|
322
|
Chris@302
|
323 d.identifier = "templates";
|
Chris@302
|
324 d.name = "Templates";
|
Chris@302
|
325 d.description = "Constant-Q spectral templates for the selected instrument pack.";
|
Chris@302
|
326 d.unit = "";
|
Chris@302
|
327 d.hasFixedBinCount = true;
|
Chris@302
|
328 d.binCount = getPack(0).templateHeight;
|
Chris@302
|
329 d.binNames.clear();
|
Chris@302
|
330 if (m_cq) {
|
Chris@302
|
331 char name[50];
|
Chris@302
|
332 for (int i = 0; i < getPack(0).templateHeight; ++i) {
|
Chris@302
|
333 // We have a 600-bin (10 oct 60-bin CQ) of which the
|
Chris@302
|
334 // lowest-frequency 55 bins have been dropped, for a
|
Chris@302
|
335 // 545-bin template. The native CQ bins go high->low
|
Chris@302
|
336 // frequency though, so these are still the first 545 bins
|
Chris@302
|
337 // as reported by getBinFrequency, though in reverse order
|
Chris@302
|
338 float freq = m_cq->getBinFrequency
|
Chris@302
|
339 (getPack(0).templateHeight - i - 1);
|
Chris@302
|
340 sprintf(name, "%.1f Hz", freq);
|
Chris@302
|
341 d.binNames.push_back(name);
|
Chris@302
|
342 }
|
Chris@302
|
343 }
|
Chris@302
|
344 d.hasKnownExtents = false;
|
Chris@302
|
345 d.isQuantized = false;
|
Chris@302
|
346 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@302
|
347 d.sampleRate = m_colsPerSec;
|
Chris@302
|
348 d.hasDuration = false;
|
Chris@302
|
349 m_templateOutputNo = list.size();
|
Chris@302
|
350 list.push_back(d);
|
Chris@302
|
351
|
Chris@31
|
352 return list;
|
Chris@31
|
353 }
|
Chris@31
|
354
|
Chris@38
|
355 std::string
|
Chris@309
|
356 Silvet::chromaName(int pitch) const
|
Chris@38
|
357 {
|
Chris@38
|
358 static const char *names[] = {
|
Chris@38
|
359 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
|
Chris@38
|
360 };
|
Chris@38
|
361
|
Chris@309
|
362 return names[pitch];
|
Chris@309
|
363 }
|
Chris@309
|
364
|
Chris@309
|
365 std::string
|
Chris@309
|
366 Silvet::noteName(int note, int shift, int shiftCount) const
|
Chris@309
|
367 {
|
Chris@309
|
368 string n = chromaName(note % 12);
|
Chris@38
|
369
|
Chris@175
|
370 int oct = (note + 9) / 12;
|
Chris@38
|
371
|
Chris@175
|
372 char buf[30];
|
Chris@175
|
373
|
Chris@175
|
374 float pshift = 0.f;
|
Chris@175
|
375 if (shiftCount > 1) {
|
Chris@175
|
376 // see noteFrequency below
|
Chris@175
|
377 pshift =
|
Chris@175
|
378 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
379 }
|
Chris@175
|
380
|
Chris@175
|
381 if (pshift > 0.f) {
|
Chris@309
|
382 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
|
Chris@175
|
383 } else if (pshift < 0.f) {
|
Chris@309
|
384 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
|
Chris@175
|
385 } else {
|
Chris@309
|
386 sprintf(buf, "%s%d", n.c_str(), oct);
|
Chris@175
|
387 }
|
Chris@38
|
388
|
Chris@38
|
389 return buf;
|
Chris@38
|
390 }
|
Chris@38
|
391
|
Chris@41
|
392 float
|
Chris@168
|
393 Silvet::noteFrequency(int note, int shift, int shiftCount) const
|
Chris@41
|
394 {
|
Chris@169
|
395 // Convert shift number to a pitch shift. The given shift number
|
Chris@169
|
396 // is an offset into the template array, which starts with some
|
Chris@169
|
397 // zeros, followed by the template, then some trailing zeros.
|
Chris@169
|
398 //
|
Chris@169
|
399 // Example: if we have templateMaxShift == 2 and thus shiftCount
|
Chris@169
|
400 // == 5, then the number will be in the range 0-4 and the template
|
Chris@169
|
401 // will have 2 zeros at either end. Thus number 2 represents the
|
Chris@169
|
402 // template "as recorded", for a pitch shift of 0; smaller indices
|
Chris@169
|
403 // represent moving the template *up* in pitch (by introducing
|
Chris@169
|
404 // zeros at the start, which is the low-frequency end), for a
|
Chris@169
|
405 // positive pitch shift; and higher values represent moving it
|
Chris@169
|
406 // down in pitch, for a negative pitch shift.
|
Chris@169
|
407
|
Chris@175
|
408 float pshift = 0.f;
|
Chris@175
|
409 if (shiftCount > 1) {
|
Chris@175
|
410 pshift =
|
Chris@175
|
411 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
412 }
|
Chris@169
|
413
|
Chris@301
|
414 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
|
Chris@301
|
415
|
Chris@303
|
416 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
|
Chris@303
|
417 // << shiftCount << ", obtained freq = " << freq << endl;
|
Chris@301
|
418
|
Chris@301
|
419 return freq;
|
Chris@41
|
420 }
|
Chris@41
|
421
|
Chris@31
|
422 bool
|
Chris@31
|
423 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@31
|
424 {
|
Chris@272
|
425 if (m_inputSampleRate < minInputSampleRate ||
|
Chris@272
|
426 m_inputSampleRate > maxInputSampleRate) {
|
Chris@272
|
427 cerr << "Silvet::initialise: Unsupported input sample rate "
|
Chris@272
|
428 << m_inputSampleRate << " (supported min " << minInputSampleRate
|
Chris@272
|
429 << ", max " << maxInputSampleRate << ")" << endl;
|
Chris@272
|
430 return false;
|
Chris@272
|
431 }
|
Chris@272
|
432
|
Chris@31
|
433 if (channels < getMinChannelCount() ||
|
Chris@272
|
434 channels > getMaxChannelCount()) {
|
Chris@272
|
435 cerr << "Silvet::initialise: Unsupported channel count " << channels
|
Chris@272
|
436 << " (supported min " << getMinChannelCount() << ", max "
|
Chris@272
|
437 << getMaxChannelCount() << ")" << endl;
|
Chris@272
|
438 return false;
|
Chris@272
|
439 }
|
Chris@31
|
440
|
Chris@31
|
441 if (stepSize != blockSize) {
|
Chris@31
|
442 cerr << "Silvet::initialise: Step size must be the same as block size ("
|
Chris@31
|
443 << stepSize << " != " << blockSize << ")" << endl;
|
Chris@31
|
444 return false;
|
Chris@31
|
445 }
|
Chris@31
|
446
|
Chris@31
|
447 m_blockSize = blockSize;
|
Chris@31
|
448
|
Chris@31
|
449 reset();
|
Chris@31
|
450
|
Chris@31
|
451 return true;
|
Chris@31
|
452 }
|
Chris@31
|
453
|
Chris@31
|
454 void
|
Chris@31
|
455 Silvet::reset()
|
Chris@31
|
456 {
|
Chris@31
|
457 delete m_resampler;
|
Chris@246
|
458 delete m_flattener;
|
Chris@31
|
459 delete m_cq;
|
Chris@31
|
460
|
Chris@31
|
461 if (m_inputSampleRate != processingSampleRate) {
|
Chris@31
|
462 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
|
Chris@31
|
463 } else {
|
Chris@31
|
464 m_resampler = 0;
|
Chris@31
|
465 }
|
Chris@31
|
466
|
Chris@246
|
467 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
|
Chris@246
|
468 m_flattener->reset();
|
Chris@246
|
469
|
Chris@301
|
470 // this happens to be processingSampleRate / 3, and is the top
|
Chris@301
|
471 // freq used for the EM templates:
|
Chris@301
|
472 double maxFreq = 14700;
|
Chris@301
|
473
|
Chris@301
|
474 if (m_mode == LiveMode) {
|
Chris@301
|
475 // We only have 12 bpo rather than 60, so we need the top bin
|
Chris@301
|
476 // to be the middle one of the top 5, i.e. 2/5 of a semitone
|
Chris@301
|
477 // lower than 14700
|
Chris@301
|
478 maxFreq *= powf(2.0, -1.0 / 30.0);
|
Chris@301
|
479 }
|
Chris@301
|
480
|
Chris@173
|
481 double minFreq = 27.5;
|
Chris@173
|
482
|
Chris@297
|
483 if (m_mode != HighQualityMode) {
|
Chris@173
|
484 // We don't actually return any notes from the bottom octave,
|
Chris@173
|
485 // so we can just pad with zeros
|
Chris@173
|
486 minFreq *= 2;
|
Chris@173
|
487 }
|
Chris@173
|
488
|
Chris@298
|
489 int bpo = 12 *
|
Chris@298
|
490 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
|
Chris@301
|
491
|
Chris@154
|
492 CQParameters params(processingSampleRate,
|
Chris@173
|
493 minFreq,
|
Chris@303
|
494 maxFreq,
|
Chris@298
|
495 bpo);
|
Chris@154
|
496
|
Chris@155
|
497 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
|
Chris@155
|
498 // drops the FFT size to 512 from 1024 and alters
|
Chris@155
|
499 // some other processing parameters, making
|
Chris@155
|
500 // everything much, much slower. Could be a flaw
|
Chris@155
|
501 // in the CQ parameter calculations, must check
|
Chris@154
|
502 params.atomHopFactor = 0.3;
|
Chris@154
|
503 params.threshold = 0.0005;
|
Chris@172
|
504 params.window = CQParameters::Hann;
|
Chris@154
|
505
|
Chris@154
|
506 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
|
Chris@31
|
507
|
Chris@303
|
508 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
|
Chris@303
|
509 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
|
Chris@297
|
510
|
Chris@297
|
511 m_colsPerSec = (m_mode == DraftMode ? 25 : 50);
|
Chris@165
|
512
|
Chris@41
|
513 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
514 delete m_postFilter[i];
|
Chris@41
|
515 }
|
Chris@41
|
516 m_postFilter.clear();
|
Chris@303
|
517 int postFilterLength = 3;
|
Chris@298
|
518 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
|
Chris@303
|
519 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
|
Chris@41
|
520 }
|
Chris@41
|
521 m_pianoRoll.clear();
|
Chris@246
|
522 m_inputGains.clear();
|
Chris@32
|
523 m_columnCount = 0;
|
Chris@272
|
524 m_resampledCount = 0;
|
Chris@40
|
525 m_startTime = RealTime::zeroTime;
|
Chris@313
|
526 m_haveStartTime = false;
|
Chris@31
|
527 }
|
Chris@31
|
528
|
Chris@31
|
529 Silvet::FeatureSet
|
Chris@31
|
530 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
Chris@31
|
531 {
|
Chris@302
|
532 FeatureSet fs;
|
Chris@302
|
533
|
Chris@313
|
534 if (!m_haveStartTime) {
|
Chris@314
|
535
|
Chris@40
|
536 m_startTime = timestamp;
|
Chris@313
|
537 m_haveStartTime = true;
|
Chris@314
|
538
|
Chris@302
|
539 insertTemplateFeatures(fs);
|
Chris@40
|
540 }
|
Chris@246
|
541
|
Chris@246
|
542 vector<float> flattened(m_blockSize);
|
Chris@246
|
543 float gain = 1.f;
|
Chris@246
|
544 m_flattener->connectInputPort
|
Chris@246
|
545 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
|
Chris@246
|
546 m_flattener->connectOutputPort
|
Chris@246
|
547 (FlattenDynamics::AudioOutputPort, &flattened[0]);
|
Chris@246
|
548 m_flattener->connectOutputPort
|
Chris@246
|
549 (FlattenDynamics::GainOutputPort, &gain);
|
Chris@246
|
550 m_flattener->process(m_blockSize);
|
Chris@246
|
551
|
Chris@252
|
552 m_inputGains[timestamp] = gain;
|
Chris@40
|
553
|
Chris@31
|
554 vector<double> data;
|
Chris@40
|
555 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@246
|
556 double d = flattened[i];
|
Chris@235
|
557 data.push_back(d);
|
Chris@40
|
558 }
|
Chris@31
|
559
|
Chris@31
|
560 if (m_resampler) {
|
Chris@272
|
561
|
Chris@31
|
562 data = m_resampler->process(data.data(), data.size());
|
Chris@272
|
563
|
Chris@272
|
564 int hadCount = m_resampledCount;
|
Chris@272
|
565 m_resampledCount += data.size();
|
Chris@272
|
566
|
Chris@272
|
567 int resamplerLatency = m_resampler->getLatency();
|
Chris@272
|
568
|
Chris@272
|
569 if (hadCount < resamplerLatency) {
|
Chris@272
|
570 int stillToDrop = resamplerLatency - hadCount;
|
Chris@272
|
571 if (stillToDrop >= int(data.size())) {
|
Chris@302
|
572 return fs;
|
Chris@272
|
573 } else {
|
Chris@272
|
574 data = vector<double>(data.begin() + stillToDrop, data.end());
|
Chris@272
|
575 }
|
Chris@272
|
576 }
|
Chris@31
|
577 }
|
Chris@272
|
578
|
Chris@32
|
579 Grid cqout = m_cq->process(data);
|
Chris@302
|
580 transcribe(cqout, fs);
|
Chris@51
|
581 return fs;
|
Chris@34
|
582 }
|
Chris@34
|
583
|
Chris@34
|
584 Silvet::FeatureSet
|
Chris@34
|
585 Silvet::getRemainingFeatures()
|
Chris@34
|
586 {
|
Chris@145
|
587 Grid cqout = m_cq->getRemainingOutput();
|
Chris@302
|
588 FeatureSet fs;
|
Chris@302
|
589 if (m_columnCount == 0) {
|
Chris@302
|
590 // process() was never called, but we still want these
|
Chris@302
|
591 insertTemplateFeatures(fs);
|
Chris@302
|
592 } else {
|
Chris@302
|
593 transcribe(cqout, fs);
|
Chris@302
|
594 }
|
Chris@51
|
595 return fs;
|
Chris@34
|
596 }
|
Chris@34
|
597
|
Chris@302
|
598 void
|
Chris@302
|
599 Silvet::insertTemplateFeatures(FeatureSet &fs)
|
Chris@302
|
600 {
|
Chris@302
|
601 const InstrumentPack &pack = getPack(m_instrument);
|
Chris@302
|
602 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
|
Chris@302
|
603 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
|
Chris@302
|
604 Feature f;
|
Chris@302
|
605 char buffer[50];
|
Chris@302
|
606 sprintf(buffer, "Note %d", i + 1);
|
Chris@302
|
607 f.label = buffer;
|
Chris@302
|
608 f.hasTimestamp = true;
|
Chris@302
|
609 f.timestamp = timestamp;
|
Chris@302
|
610 f.values = pack.templates[i / pack.templateNoteCount]
|
Chris@302
|
611 .data[i % pack.templateNoteCount];
|
Chris@302
|
612 fs[m_templateOutputNo].push_back(f);
|
Chris@302
|
613 }
|
Chris@302
|
614 }
|
Chris@302
|
615
|
Chris@302
|
616 void
|
Chris@302
|
617 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
|
Chris@34
|
618 {
|
Chris@32
|
619 Grid filtered = preProcess(cqout);
|
Chris@31
|
620
|
Chris@302
|
621 if (filtered.empty()) return;
|
Chris@170
|
622
|
Chris@298
|
623 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@104
|
624
|
Chris@178
|
625 for (int i = 0; i < (int)filtered.size(); ++i) {
|
Chris@178
|
626 Feature f;
|
Chris@178
|
627 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@178
|
628 f.values.push_back(float(filtered[i][j]));
|
Chris@178
|
629 }
|
Chris@178
|
630 fs[m_fcqOutputNo].push_back(f);
|
Chris@178
|
631 }
|
Chris@178
|
632
|
Chris@34
|
633 int width = filtered.size();
|
Chris@34
|
634
|
Chris@311
|
635 Grid localPitches(width);
|
Chris@170
|
636
|
Chris@297
|
637 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
|
Chris@170
|
638 int shiftCount = 1;
|
Chris@170
|
639 if (wantShifts) {
|
Chris@170
|
640 shiftCount = pack.templateMaxShift * 2 + 1;
|
Chris@170
|
641 }
|
Chris@170
|
642
|
Chris@170
|
643 vector<vector<int> > localBestShifts;
|
Chris@170
|
644 if (wantShifts) {
|
Chris@311
|
645 localBestShifts = vector<vector<int> >(width);
|
Chris@170
|
646 }
|
Chris@170
|
647
|
Chris@312
|
648 #ifndef MAX_EM_THREADS
|
Chris@312
|
649 #define MAX_EM_THREADS 8
|
Chris@312
|
650 #endif
|
Chris@312
|
651
|
Chris@312
|
652 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
|
Chris@312
|
653 for (int i = 0; i < width; ) {
|
Chris@312
|
654 typedef future<pair<vector<double>, vector<int>>> EMFuture;
|
Chris@312
|
655 vector<EMFuture> results;
|
Chris@312
|
656 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) {
|
Chris@312
|
657 results.push_back
|
Chris@312
|
658 (async(std::launch::async,
|
Chris@312
|
659 [&](int index) {
|
Chris@312
|
660 return applyEM(pack, filtered.at(index), wantShifts);
|
Chris@312
|
661 }, i + j));
|
Chris@312
|
662 }
|
Chris@312
|
663 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) {
|
Chris@312
|
664 auto out = results[j].get();
|
Chris@312
|
665 localPitches[i+j] = out.first;
|
Chris@312
|
666 if (wantShifts) localBestShifts[i+j] = out.second;
|
Chris@312
|
667 }
|
Chris@312
|
668 i += MAX_EM_THREADS;
|
Chris@312
|
669 }
|
Chris@312
|
670 #else
|
Chris@123
|
671 for (int i = 0; i < width; ++i) {
|
Chris@311
|
672 auto out = applyEM(pack, filtered.at(i), wantShifts);
|
Chris@311
|
673 localPitches[i] = out.first;
|
Chris@311
|
674 if (wantShifts) localBestShifts[i] = out.second;
|
Chris@123
|
675 }
|
Chris@312
|
676 #endif
|
Chris@305
|
677
|
Chris@166
|
678 for (int i = 0; i < width; ++i) {
|
Chris@37
|
679
|
Chris@309
|
680 // This returns a filtered column, and pushes the
|
Chris@309
|
681 // up-to-max-polyphony activation column to m_pianoRoll
|
Chris@294
|
682 vector<double> filtered = postProcess
|
Chris@294
|
683 (localPitches[i], localBestShifts[i], wantShifts);
|
Chris@294
|
684
|
Chris@309
|
685 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
|
Chris@309
|
686 float inputGain = getInputGainAt(timestamp);
|
Chris@309
|
687
|
Chris@294
|
688 Feature f;
|
Chris@294
|
689 for (int j = 0; j < (int)filtered.size(); ++j) {
|
Chris@309
|
690 float v = filtered[j];
|
Chris@294
|
691 if (v < pack.levelThreshold) v = 0.f;
|
Chris@309
|
692 f.values.push_back(v / inputGain);
|
Chris@294
|
693 }
|
Chris@294
|
694 fs[m_pitchOutputNo].push_back(f);
|
Chris@309
|
695
|
Chris@309
|
696 f.values.clear();
|
Chris@309
|
697 f.values.resize(12);
|
Chris@309
|
698 for (int j = 0; j < (int)filtered.size(); ++j) {
|
Chris@309
|
699 f.values[j % 12] += filtered[j] / inputGain;
|
Chris@309
|
700 }
|
Chris@309
|
701 fs[m_chromaOutputNo].push_back(f);
|
Chris@166
|
702
|
Chris@168
|
703 FeatureList noteFeatures = noteTrack(shiftCount);
|
Chris@38
|
704
|
Chris@123
|
705 for (FeatureList::const_iterator fi = noteFeatures.begin();
|
Chris@123
|
706 fi != noteFeatures.end(); ++fi) {
|
Chris@123
|
707 fs[m_notesOutputNo].push_back(*fi);
|
Chris@40
|
708 }
|
Chris@34
|
709 }
|
Chris@31
|
710 }
|
Chris@31
|
711
|
Chris@311
|
712 pair<vector<double>, vector<int> >
|
Chris@311
|
713 Silvet::applyEM(const InstrumentPack &pack,
|
Chris@311
|
714 const vector<double> &column,
|
Chris@311
|
715 bool wantShifts)
|
Chris@311
|
716 {
|
Chris@311
|
717 double columnThreshold = 1e-5;
|
Chris@311
|
718
|
Chris@314
|
719 if (m_mode == LiveMode) {
|
Chris@314
|
720 columnThreshold /= 20;
|
Chris@314
|
721 }
|
Chris@314
|
722
|
Chris@311
|
723 vector<double> pitches(pack.templateNoteCount, 0.0);
|
Chris@311
|
724 vector<int> bestShifts;
|
Chris@311
|
725
|
Chris@311
|
726 double sum = 0.0;
|
Chris@311
|
727 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@311
|
728 sum += column.at(j);
|
Chris@311
|
729 }
|
Chris@311
|
730 if (sum < columnThreshold) return { pitches, bestShifts };
|
Chris@311
|
731
|
Chris@314
|
732 EM em(&pack, m_mode == HighQualityMode);
|
Chris@311
|
733
|
Chris@311
|
734 em.setPitchSparsity(pack.pitchSparsity);
|
Chris@311
|
735 em.setSourceSparsity(pack.sourceSparsity);
|
Chris@311
|
736
|
Chris@314
|
737 int iterations = (m_mode == HighQualityMode ? 20 : 10);
|
Chris@311
|
738
|
Chris@311
|
739 for (int j = 0; j < iterations; ++j) {
|
Chris@311
|
740 em.iterate(column.data());
|
Chris@311
|
741 }
|
Chris@311
|
742
|
Chris@311
|
743 const float *pitchDist = em.getPitchDistribution();
|
Chris@311
|
744 const float *const *shiftDist = em.getShifts();
|
Chris@311
|
745
|
Chris@311
|
746 int shiftCount = 1;
|
Chris@311
|
747 if (wantShifts) {
|
Chris@311
|
748 shiftCount = pack.templateMaxShift * 2 + 1;
|
Chris@311
|
749 }
|
Chris@311
|
750
|
Chris@311
|
751 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@311
|
752
|
Chris@311
|
753 pitches[j] = pitchDist[j] * sum;
|
Chris@311
|
754
|
Chris@311
|
755 int bestShift = 0;
|
Chris@311
|
756 float bestShiftValue = 0.0;
|
Chris@311
|
757 if (wantShifts) {
|
Chris@311
|
758 for (int k = 0; k < shiftCount; ++k) {
|
Chris@311
|
759 float value = shiftDist[k][j];
|
Chris@311
|
760 if (k == 0 || value > bestShiftValue) {
|
Chris@311
|
761 bestShiftValue = value;
|
Chris@311
|
762 bestShift = k;
|
Chris@311
|
763 }
|
Chris@311
|
764 }
|
Chris@311
|
765 bestShifts.push_back(bestShift);
|
Chris@311
|
766 }
|
Chris@311
|
767 }
|
Chris@311
|
768
|
Chris@311
|
769 return { pitches, bestShifts };
|
Chris@311
|
770 }
|
Chris@311
|
771
|
Chris@32
|
772 Silvet::Grid
|
Chris@32
|
773 Silvet::preProcess(const Grid &in)
|
Chris@32
|
774 {
|
Chris@32
|
775 int width = in.size();
|
Chris@32
|
776
|
Chris@165
|
777 int spacing = processingSampleRate / m_colsPerSec;
|
Chris@32
|
778
|
Chris@165
|
779 // need to be careful that col spacing is an integer number of samples!
|
Chris@165
|
780 assert(spacing * m_colsPerSec == processingSampleRate);
|
Chris@32
|
781
|
Chris@32
|
782 Grid out;
|
Chris@32
|
783
|
Chris@58
|
784 // We count the CQ latency in terms of processing hops, but
|
Chris@58
|
785 // actually it probably isn't an exact number of hops so this
|
Chris@58
|
786 // isn't quite accurate. But the small constant offset is
|
Chris@165
|
787 // practically irrelevant compared to the jitter from the frame
|
Chris@165
|
788 // size we reduce to in a moment
|
Chris@33
|
789 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
|
Chris@33
|
790
|
Chris@298
|
791 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@176
|
792
|
Chris@32
|
793 for (int i = 0; i < width; ++i) {
|
Chris@32
|
794
|
Chris@33
|
795 if (m_columnCount < latentColumns) {
|
Chris@33
|
796 ++m_columnCount;
|
Chris@33
|
797 continue;
|
Chris@33
|
798 }
|
Chris@33
|
799
|
Chris@32
|
800 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
|
Chris@32
|
801 int sampleNo = m_columnCount * m_cq->getColumnHop();
|
Chris@32
|
802
|
Chris@32
|
803 bool select = (sampleNo / spacing != prevSampleNo / spacing);
|
Chris@32
|
804
|
Chris@32
|
805 if (select) {
|
Chris@32
|
806 vector<double> inCol = in[i];
|
Chris@176
|
807 vector<double> outCol(pack.templateHeight);
|
Chris@32
|
808
|
Chris@178
|
809 // In HQ mode, the CQ returns 600 bins and we ignore the
|
Chris@298
|
810 // lowest 55 of them (assuming binsPerSemitone == 5).
|
Chris@178
|
811 //
|
Chris@297
|
812 // In draft and live mode the CQ is an octave shorter,
|
Chris@300
|
813 // returning 540 bins or equivalent, so we instead pad
|
Chris@300
|
814 // them with an additional 5 or equivalent zeros.
|
Chris@178
|
815 //
|
Chris@178
|
816 // We also need to reverse the column as we go, since the
|
Chris@178
|
817 // raw CQ has the high frequencies first and we need it
|
Chris@178
|
818 // the other way around.
|
Chris@32
|
819
|
Chris@298
|
820 int bps = (m_mode == LiveMode ?
|
Chris@298
|
821 binsPerSemitoneLive : binsPerSemitoneNormal);
|
Chris@298
|
822
|
Chris@297
|
823 if (m_mode == HighQualityMode) {
|
Chris@178
|
824 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@298
|
825 int ix = inCol.size() - j - (11 * bps);
|
Chris@178
|
826 outCol[j] = inCol[ix];
|
Chris@178
|
827 }
|
Chris@178
|
828 } else {
|
Chris@298
|
829 for (int j = 0; j < bps; ++j) {
|
Chris@178
|
830 outCol[j] = 0.0;
|
Chris@178
|
831 }
|
Chris@298
|
832 for (int j = bps; j < pack.templateHeight; ++j) {
|
Chris@298
|
833 int ix = inCol.size() - j + (bps-1);
|
Chris@178
|
834 outCol[j] = inCol[ix];
|
Chris@178
|
835 }
|
Chris@46
|
836 }
|
Chris@32
|
837
|
Chris@46
|
838 vector<double> noiseLevel1 =
|
Chris@298
|
839 MedianFilter<double>::filter(8 * bps, outCol);
|
Chris@176
|
840 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
841 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
|
Chris@46
|
842 }
|
Chris@32
|
843
|
Chris@46
|
844 vector<double> noiseLevel2 =
|
Chris@298
|
845 MedianFilter<double>::filter(8 * bps, noiseLevel1);
|
Chris@176
|
846 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
847 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
|
Chris@32
|
848 }
|
Chris@32
|
849
|
Chris@165
|
850 out.push_back(outCol);
|
Chris@32
|
851 }
|
Chris@32
|
852
|
Chris@32
|
853 ++m_columnCount;
|
Chris@32
|
854 }
|
Chris@32
|
855
|
Chris@32
|
856 return out;
|
Chris@32
|
857 }
|
Chris@32
|
858
|
Chris@294
|
859 vector<double>
|
Chris@170
|
860 Silvet::postProcess(const vector<double> &pitches,
|
Chris@170
|
861 const vector<int> &bestShifts,
|
Chris@170
|
862 bool wantShifts)
|
Chris@166
|
863 {
|
Chris@298
|
864 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@176
|
865
|
Chris@41
|
866 vector<double> filtered;
|
Chris@41
|
867
|
Chris@176
|
868 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@170
|
869 m_postFilter[j]->push(pitches[j]);
|
Chris@41
|
870 filtered.push_back(m_postFilter[j]->get());
|
Chris@41
|
871 }
|
Chris@41
|
872
|
Chris@41
|
873 // Threshold for level and reduce number of candidate pitches
|
Chris@41
|
874
|
Chris@41
|
875 typedef std::multimap<double, int> ValueIndexMap;
|
Chris@41
|
876
|
Chris@41
|
877 ValueIndexMap strengths;
|
Chris@166
|
878
|
Chris@176
|
879 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@166
|
880 double strength = filtered[j];
|
Chris@183
|
881 if (strength < pack.levelThreshold) continue;
|
Chris@168
|
882 strengths.insert(ValueIndexMap::value_type(strength, j));
|
Chris@168
|
883 }
|
Chris@166
|
884
|
Chris@168
|
885 ValueIndexMap::const_iterator si = strengths.end();
|
Chris@167
|
886
|
Chris@168
|
887 map<int, double> active;
|
Chris@168
|
888 map<int, int> activeShifts;
|
Chris@168
|
889
|
Chris@183
|
890 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
|
Chris@168
|
891
|
Chris@168
|
892 --si;
|
Chris@168
|
893
|
Chris@168
|
894 double strength = si->first;
|
Chris@168
|
895 int j = si->second;
|
Chris@168
|
896
|
Chris@168
|
897 active[j] = strength;
|
Chris@168
|
898
|
Chris@170
|
899 if (wantShifts) {
|
Chris@170
|
900 activeShifts[j] = bestShifts[j];
|
Chris@167
|
901 }
|
Chris@41
|
902 }
|
Chris@41
|
903
|
Chris@168
|
904 m_pianoRoll.push_back(active);
|
Chris@170
|
905
|
Chris@170
|
906 if (wantShifts) {
|
Chris@168
|
907 m_pianoRollShifts.push_back(activeShifts);
|
Chris@41
|
908 }
|
Chris@294
|
909
|
Chris@294
|
910 return filtered;
|
Chris@166
|
911 }
|
Chris@166
|
912
|
Chris@166
|
913 Vamp::Plugin::FeatureList
|
Chris@168
|
914 Silvet::noteTrack(int shiftCount)
|
Chris@166
|
915 {
|
Chris@41
|
916 // Minimum duration pruning, and conversion to notes. We can only
|
Chris@41
|
917 // report notes that have just ended (i.e. that are absent in the
|
Chris@168
|
918 // latest active set but present in the prior set in the piano
|
Chris@41
|
919 // roll) -- any notes that ended earlier will have been reported
|
Chris@41
|
920 // already, and if they haven't ended, we don't know their
|
Chris@41
|
921 // duration.
|
Chris@41
|
922
|
Chris@168
|
923 int width = m_pianoRoll.size() - 1;
|
Chris@168
|
924
|
Chris@168
|
925 const map<int, double> &active = m_pianoRoll[width];
|
Chris@41
|
926
|
Chris@165
|
927 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@165
|
928
|
Chris@165
|
929 // only keep notes >= 100ms or thereabouts
|
Chris@165
|
930 int durationThreshold = floor(0.1 / columnDuration); // columns
|
Chris@165
|
931 if (durationThreshold < 1) durationThreshold = 1;
|
Chris@41
|
932
|
Chris@41
|
933 FeatureList noteFeatures;
|
Chris@41
|
934
|
Chris@41
|
935 if (width < durationThreshold + 1) {
|
Chris@41
|
936 return noteFeatures;
|
Chris@41
|
937 }
|
Chris@41
|
938
|
Chris@150
|
939 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
|
Chris@150
|
940
|
Chris@55
|
941 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
|
Chris@41
|
942 ni != m_pianoRoll[width-1].end(); ++ni) {
|
Chris@41
|
943
|
Chris@55
|
944 int note = ni->first;
|
Chris@41
|
945
|
Chris@41
|
946 if (active.find(note) != active.end()) {
|
Chris@41
|
947 // the note is still playing
|
Chris@41
|
948 continue;
|
Chris@41
|
949 }
|
Chris@41
|
950
|
Chris@41
|
951 // the note was playing but just ended
|
Chris@41
|
952 int end = width;
|
Chris@41
|
953 int start = end-1;
|
Chris@41
|
954
|
Chris@41
|
955 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
|
Chris@41
|
956 --start;
|
Chris@41
|
957 }
|
Chris@41
|
958 ++start;
|
Chris@41
|
959
|
Chris@169
|
960 if ((end - start) < durationThreshold) {
|
Chris@41
|
961 continue;
|
Chris@41
|
962 }
|
Chris@41
|
963
|
Chris@169
|
964 emitNote(start, end, note, shiftCount, noteFeatures);
|
Chris@41
|
965 }
|
Chris@41
|
966
|
Chris@62
|
967 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
|
Chris@41
|
968
|
Chris@41
|
969 return noteFeatures;
|
Chris@41
|
970 }
|
Chris@41
|
971
|
Chris@169
|
972 void
|
Chris@169
|
973 Silvet::emitNote(int start, int end, int note, int shiftCount,
|
Chris@169
|
974 FeatureList ¬eFeatures)
|
Chris@169
|
975 {
|
Chris@169
|
976 int partStart = start;
|
Chris@169
|
977 int partShift = 0;
|
Chris@169
|
978 int partVelocity = 0;
|
Chris@169
|
979
|
Chris@252
|
980 int partThreshold = floor(0.05 * m_colsPerSec);
|
Chris@169
|
981
|
Chris@169
|
982 for (int i = start; i != end; ++i) {
|
Chris@169
|
983
|
Chris@169
|
984 double strength = m_pianoRoll[i][note];
|
Chris@169
|
985
|
Chris@169
|
986 int shift = 0;
|
Chris@169
|
987
|
Chris@169
|
988 if (shiftCount > 1) {
|
Chris@169
|
989
|
Chris@169
|
990 shift = m_pianoRollShifts[i][note];
|
Chris@169
|
991
|
Chris@169
|
992 if (i == partStart) {
|
Chris@169
|
993 partShift = shift;
|
Chris@169
|
994 }
|
Chris@169
|
995
|
Chris@169
|
996 if (i > partStart + partThreshold && shift != partShift) {
|
Chris@169
|
997
|
Chris@169
|
998 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
|
Chris@169
|
999
|
Chris@169
|
1000 // pitch has changed, emit an intermediate note
|
Chris@252
|
1001 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
1002 i,
|
Chris@252
|
1003 note,
|
Chris@252
|
1004 partShift,
|
Chris@252
|
1005 shiftCount,
|
Chris@252
|
1006 partVelocity));
|
Chris@169
|
1007 partStart = i;
|
Chris@169
|
1008 partShift = shift;
|
Chris@169
|
1009 partVelocity = 0;
|
Chris@169
|
1010 }
|
Chris@169
|
1011 }
|
Chris@169
|
1012
|
Chris@303
|
1013 int v;
|
Chris@303
|
1014 if (m_mode == LiveMode) {
|
Chris@303
|
1015 v = round(strength * 30);
|
Chris@303
|
1016 } else {
|
Chris@303
|
1017 v = round(strength * 2);
|
Chris@303
|
1018 }
|
Chris@169
|
1019 if (v > partVelocity) {
|
Chris@169
|
1020 partVelocity = v;
|
Chris@169
|
1021 }
|
Chris@169
|
1022 }
|
Chris@169
|
1023
|
Chris@169
|
1024 if (end >= partStart + partThreshold) {
|
Chris@252
|
1025 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
1026 end,
|
Chris@252
|
1027 note,
|
Chris@252
|
1028 partShift,
|
Chris@252
|
1029 shiftCount,
|
Chris@252
|
1030 partVelocity));
|
Chris@169
|
1031 }
|
Chris@169
|
1032 }
|
Chris@252
|
1033
|
Chris@309
|
1034 RealTime
|
Chris@309
|
1035 Silvet::getColumnTimestamp(int column)
|
Chris@309
|
1036 {
|
Chris@309
|
1037 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@309
|
1038 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
|
Chris@309
|
1039
|
Chris@309
|
1040 return m_startTime + RealTime::fromSeconds
|
Chris@309
|
1041 (columnDuration * (column - postFilterLatency) + 0.02);
|
Chris@309
|
1042 }
|
Chris@309
|
1043
|
Chris@252
|
1044 Silvet::Feature
|
Chris@252
|
1045 Silvet::makeNoteFeature(int start,
|
Chris@252
|
1046 int end,
|
Chris@252
|
1047 int note,
|
Chris@252
|
1048 int shift,
|
Chris@252
|
1049 int shiftCount,
|
Chris@252
|
1050 int velocity)
|
Chris@252
|
1051 {
|
Chris@252
|
1052 Feature f;
|
Chris@252
|
1053
|
Chris@252
|
1054 f.hasTimestamp = true;
|
Chris@309
|
1055 f.timestamp = getColumnTimestamp(start);
|
Chris@252
|
1056
|
Chris@252
|
1057 f.hasDuration = true;
|
Chris@309
|
1058 f.duration = getColumnTimestamp(end) - f.timestamp;
|
Chris@252
|
1059
|
Chris@252
|
1060 f.values.clear();
|
Chris@252
|
1061
|
Chris@252
|
1062 f.values.push_back
|
Chris@252
|
1063 (noteFrequency(note, shift, shiftCount));
|
Chris@252
|
1064
|
Chris@252
|
1065 float inputGain = getInputGainAt(f.timestamp);
|
Chris@252
|
1066 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
|
Chris@252
|
1067 velocity = round(velocity / inputGain);
|
Chris@252
|
1068 if (velocity > 127) velocity = 127;
|
Chris@252
|
1069 if (velocity < 1) velocity = 1;
|
Chris@252
|
1070 f.values.push_back(velocity);
|
Chris@252
|
1071
|
Chris@252
|
1072 f.label = noteName(note, shift, shiftCount);
|
Chris@252
|
1073
|
Chris@252
|
1074 return f;
|
Chris@252
|
1075 }
|
Chris@252
|
1076
|
Chris@252
|
1077 float
|
Chris@252
|
1078 Silvet::getInputGainAt(RealTime t)
|
Chris@252
|
1079 {
|
Chris@252
|
1080 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
|
Chris@252
|
1081
|
Chris@252
|
1082 if (i == m_inputGains.end()) {
|
Chris@252
|
1083 if (i != m_inputGains.begin()) {
|
Chris@252
|
1084 --i;
|
Chris@252
|
1085 } else {
|
Chris@252
|
1086 return 1.f; // no data
|
Chris@252
|
1087 }
|
Chris@252
|
1088 }
|
Chris@252
|
1089
|
Chris@252
|
1090 // cerr << "gain at time " << t << " = " << i->second << endl;
|
Chris@252
|
1091
|
Chris@252
|
1092 return i->second;
|
Chris@252
|
1093 }
|
Chris@252
|
1094
|