Chris@31
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@31
|
2
|
Chris@31
|
3 /*
|
Chris@31
|
4 Silvet
|
Chris@31
|
5
|
Chris@31
|
6 A Vamp plugin for note transcription.
|
Chris@31
|
7 Centre for Digital Music, Queen Mary University of London.
|
Chris@31
|
8
|
Chris@31
|
9 This program is free software; you can redistribute it and/or
|
Chris@31
|
10 modify it under the terms of the GNU General Public License as
|
Chris@31
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@31
|
12 License, or (at your option) any later version. See the file
|
Chris@31
|
13 COPYING included with this distribution for more information.
|
Chris@31
|
14 */
|
Chris@31
|
15
|
Chris@31
|
16 #include "Silvet.h"
|
Chris@34
|
17 #include "EM.h"
|
Chris@31
|
18
|
Chris@152
|
19 #include <cq/CQSpectrogram.h>
|
Chris@31
|
20
|
Chris@152
|
21 #include "MedianFilter.h"
|
Chris@152
|
22 #include "constant-q-cpp/src/dsp/Resampler.h"
|
Chris@246
|
23 #include "flattendynamics-ladspa.h"
|
Chris@31
|
24
|
Chris@31
|
25 #include <vector>
|
Chris@312
|
26 #include <future>
|
Chris@31
|
27
|
Chris@32
|
28 #include <cstdio>
|
Chris@32
|
29
|
Chris@31
|
30 using std::vector;
|
Chris@48
|
31 using std::cout;
|
Chris@31
|
32 using std::cerr;
|
Chris@31
|
33 using std::endl;
|
Chris@311
|
34 using std::pair;
|
Chris@312
|
35 using std::future;
|
Chris@312
|
36 using std::async;
|
Chris@40
|
37 using Vamp::RealTime;
|
Chris@31
|
38
|
Chris@31
|
39 static int processingSampleRate = 44100;
|
Chris@31
|
40 static int processingBPO = 60;
|
Chris@170
|
41
|
Chris@272
|
42 static int minInputSampleRate = 100;
|
Chris@272
|
43 static int maxInputSampleRate = 192000;
|
Chris@272
|
44
|
Chris@31
|
45 Silvet::Silvet(float inputSampleRate) :
|
Chris@31
|
46 Plugin(inputSampleRate),
|
Chris@161
|
47 m_instruments(InstrumentPack::listInstrumentPacks()),
|
Chris@31
|
48 m_resampler(0),
|
Chris@246
|
49 m_flattener(0),
|
Chris@110
|
50 m_cq(0),
|
Chris@162
|
51 m_hqMode(true),
|
Chris@166
|
52 m_fineTuning(false),
|
Chris@178
|
53 m_instrument(0),
|
Chris@313
|
54 m_colsPerSec(50),
|
Chris@313
|
55 m_haveStartTime(false)
|
Chris@31
|
56 {
|
Chris@31
|
57 }
|
Chris@31
|
58
|
Chris@31
|
59 Silvet::~Silvet()
|
Chris@31
|
60 {
|
Chris@31
|
61 delete m_resampler;
|
Chris@246
|
62 delete m_flattener;
|
Chris@31
|
63 delete m_cq;
|
Chris@41
|
64 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
65 delete m_postFilter[i];
|
Chris@41
|
66 }
|
Chris@31
|
67 }
|
Chris@31
|
68
|
Chris@31
|
69 string
|
Chris@31
|
70 Silvet::getIdentifier() const
|
Chris@31
|
71 {
|
Chris@31
|
72 return "silvet";
|
Chris@31
|
73 }
|
Chris@31
|
74
|
Chris@31
|
75 string
|
Chris@31
|
76 Silvet::getName() const
|
Chris@31
|
77 {
|
Chris@31
|
78 return "Silvet Note Transcription";
|
Chris@31
|
79 }
|
Chris@31
|
80
|
Chris@31
|
81 string
|
Chris@31
|
82 Silvet::getDescription() const
|
Chris@31
|
83 {
|
Chris@191
|
84 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
|
Chris@31
|
85 }
|
Chris@31
|
86
|
Chris@31
|
87 string
|
Chris@31
|
88 Silvet::getMaker() const
|
Chris@31
|
89 {
|
Chris@191
|
90 return "Queen Mary, University of London";
|
Chris@31
|
91 }
|
Chris@31
|
92
|
Chris@31
|
93 int
|
Chris@31
|
94 Silvet::getPluginVersion() const
|
Chris@31
|
95 {
|
Chris@309
|
96 return 3;
|
Chris@31
|
97 }
|
Chris@31
|
98
|
Chris@31
|
99 string
|
Chris@31
|
100 Silvet::getCopyright() const
|
Chris@31
|
101 {
|
Chris@191
|
102 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
|
Chris@31
|
103 }
|
Chris@31
|
104
|
Chris@31
|
105 Silvet::InputDomain
|
Chris@31
|
106 Silvet::getInputDomain() const
|
Chris@31
|
107 {
|
Chris@31
|
108 return TimeDomain;
|
Chris@31
|
109 }
|
Chris@31
|
110
|
Chris@31
|
111 size_t
|
Chris@31
|
112 Silvet::getPreferredBlockSize() const
|
Chris@31
|
113 {
|
Chris@31
|
114 return 0;
|
Chris@31
|
115 }
|
Chris@31
|
116
|
Chris@31
|
117 size_t
|
Chris@31
|
118 Silvet::getPreferredStepSize() const
|
Chris@31
|
119 {
|
Chris@31
|
120 return 0;
|
Chris@31
|
121 }
|
Chris@31
|
122
|
Chris@31
|
123 size_t
|
Chris@31
|
124 Silvet::getMinChannelCount() const
|
Chris@31
|
125 {
|
Chris@31
|
126 return 1;
|
Chris@31
|
127 }
|
Chris@31
|
128
|
Chris@31
|
129 size_t
|
Chris@31
|
130 Silvet::getMaxChannelCount() const
|
Chris@31
|
131 {
|
Chris@31
|
132 return 1;
|
Chris@31
|
133 }
|
Chris@31
|
134
|
Chris@31
|
135 Silvet::ParameterList
|
Chris@31
|
136 Silvet::getParameterDescriptors() const
|
Chris@31
|
137 {
|
Chris@31
|
138 ParameterList list;
|
Chris@110
|
139
|
Chris@110
|
140 ParameterDescriptor desc;
|
Chris@110
|
141 desc.identifier = "mode";
|
Chris@110
|
142 desc.name = "Processing mode";
|
Chris@110
|
143 desc.unit = "";
|
Chris@271
|
144 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode modifies a number of internal parameters in favour of speed. Intensive mode (the default) will almost always produce better results.";
|
Chris@110
|
145 desc.minValue = 0;
|
Chris@110
|
146 desc.maxValue = 1;
|
Chris@113
|
147 desc.defaultValue = 1;
|
Chris@110
|
148 desc.isQuantized = true;
|
Chris@110
|
149 desc.quantizeStep = 1;
|
Chris@166
|
150 desc.valueNames.push_back("Draft (faster)");
|
Chris@165
|
151 desc.valueNames.push_back("Intensive (higher quality)");
|
Chris@161
|
152 list.push_back(desc);
|
Chris@161
|
153
|
Chris@176
|
154 desc.identifier = "instrument";
|
Chris@176
|
155 desc.name = "Instrument";
|
Chris@161
|
156 desc.unit = "";
|
Chris@271
|
157 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
|
Chris@161
|
158 desc.minValue = 0;
|
Chris@162
|
159 desc.maxValue = m_instruments.size()-1;
|
Chris@162
|
160 desc.defaultValue = 0;
|
Chris@161
|
161 desc.isQuantized = true;
|
Chris@161
|
162 desc.quantizeStep = 1;
|
Chris@161
|
163 desc.valueNames.clear();
|
Chris@162
|
164 for (int i = 0; i < int(m_instruments.size()); ++i) {
|
Chris@162
|
165 desc.valueNames.push_back(m_instruments[i].name);
|
Chris@162
|
166 }
|
Chris@166
|
167 list.push_back(desc);
|
Chris@161
|
168
|
Chris@166
|
169 desc.identifier = "finetune";
|
Chris@166
|
170 desc.name = "Return fine pitch estimates";
|
Chris@166
|
171 desc.unit = "";
|
Chris@271
|
172 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
|
Chris@166
|
173 desc.minValue = 0;
|
Chris@166
|
174 desc.maxValue = 1;
|
Chris@166
|
175 desc.defaultValue = 0;
|
Chris@166
|
176 desc.isQuantized = true;
|
Chris@166
|
177 desc.quantizeStep = 1;
|
Chris@166
|
178 desc.valueNames.clear();
|
Chris@110
|
179 list.push_back(desc);
|
Chris@110
|
180
|
Chris@31
|
181 return list;
|
Chris@31
|
182 }
|
Chris@31
|
183
|
Chris@31
|
184 float
|
Chris@31
|
185 Silvet::getParameter(string identifier) const
|
Chris@31
|
186 {
|
Chris@110
|
187 if (identifier == "mode") {
|
Chris@110
|
188 return m_hqMode ? 1.f : 0.f;
|
Chris@166
|
189 } else if (identifier == "finetune") {
|
Chris@166
|
190 return m_fineTuning ? 1.f : 0.f;
|
Chris@176
|
191 } else if (identifier == "instrument") {
|
Chris@162
|
192 return m_instrument;
|
Chris@110
|
193 }
|
Chris@31
|
194 return 0;
|
Chris@31
|
195 }
|
Chris@31
|
196
|
Chris@31
|
197 void
|
Chris@31
|
198 Silvet::setParameter(string identifier, float value)
|
Chris@31
|
199 {
|
Chris@110
|
200 if (identifier == "mode") {
|
Chris@110
|
201 m_hqMode = (value > 0.5);
|
Chris@166
|
202 } else if (identifier == "finetune") {
|
Chris@166
|
203 m_fineTuning = (value > 0.5);
|
Chris@176
|
204 } else if (identifier == "instrument") {
|
Chris@162
|
205 m_instrument = lrintf(value);
|
Chris@110
|
206 }
|
Chris@31
|
207 }
|
Chris@31
|
208
|
Chris@31
|
209 Silvet::ProgramList
|
Chris@31
|
210 Silvet::getPrograms() const
|
Chris@31
|
211 {
|
Chris@31
|
212 ProgramList list;
|
Chris@31
|
213 return list;
|
Chris@31
|
214 }
|
Chris@31
|
215
|
Chris@31
|
216 string
|
Chris@31
|
217 Silvet::getCurrentProgram() const
|
Chris@31
|
218 {
|
Chris@31
|
219 return "";
|
Chris@31
|
220 }
|
Chris@31
|
221
|
Chris@31
|
222 void
|
Chris@31
|
223 Silvet::selectProgram(string name)
|
Chris@31
|
224 {
|
Chris@31
|
225 }
|
Chris@31
|
226
|
Chris@31
|
227 Silvet::OutputList
|
Chris@31
|
228 Silvet::getOutputDescriptors() const
|
Chris@31
|
229 {
|
Chris@31
|
230 OutputList list;
|
Chris@31
|
231
|
Chris@31
|
232 OutputDescriptor d;
|
Chris@51
|
233 d.identifier = "notes";
|
Chris@51
|
234 d.name = "Note transcription";
|
Chris@271
|
235 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
|
Chris@41
|
236 d.unit = "Hz";
|
Chris@31
|
237 d.hasFixedBinCount = true;
|
Chris@31
|
238 d.binCount = 2;
|
Chris@41
|
239 d.binNames.push_back("Frequency");
|
Chris@31
|
240 d.binNames.push_back("Velocity");
|
Chris@31
|
241 d.hasKnownExtents = false;
|
Chris@31
|
242 d.isQuantized = false;
|
Chris@31
|
243 d.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@246
|
244 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
|
Chris@31
|
245 d.hasDuration = true;
|
Chris@32
|
246 m_notesOutputNo = list.size();
|
Chris@32
|
247 list.push_back(d);
|
Chris@32
|
248
|
Chris@178
|
249 d.identifier = "timefreq";
|
Chris@178
|
250 d.name = "Time-frequency distribution";
|
Chris@271
|
251 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
|
Chris@178
|
252 d.unit = "";
|
Chris@178
|
253 d.hasFixedBinCount = true;
|
Chris@178
|
254 d.binCount = m_instruments[0].templateHeight;
|
Chris@178
|
255 d.binNames.clear();
|
Chris@178
|
256 if (m_cq) {
|
Chris@294
|
257 char name[50];
|
Chris@178
|
258 for (int i = 0; i < m_instruments[0].templateHeight; ++i) {
|
Chris@178
|
259 // We have a 600-bin (10 oct 60-bin CQ) of which the
|
Chris@178
|
260 // lowest-frequency 55 bins have been dropped, for a
|
Chris@178
|
261 // 545-bin template. The native CQ bins go high->low
|
Chris@178
|
262 // frequency though, so these are still the first 545 bins
|
Chris@178
|
263 // as reported by getBinFrequency, though in reverse order
|
Chris@178
|
264 float freq = m_cq->getBinFrequency
|
Chris@178
|
265 (m_instruments[0].templateHeight - i - 1);
|
Chris@178
|
266 sprintf(name, "%.1f Hz", freq);
|
Chris@178
|
267 d.binNames.push_back(name);
|
Chris@178
|
268 }
|
Chris@178
|
269 }
|
Chris@178
|
270 d.hasKnownExtents = false;
|
Chris@178
|
271 d.isQuantized = false;
|
Chris@178
|
272 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@178
|
273 d.sampleRate = m_colsPerSec;
|
Chris@178
|
274 d.hasDuration = false;
|
Chris@178
|
275 m_fcqOutputNo = list.size();
|
Chris@178
|
276 list.push_back(d);
|
Chris@178
|
277
|
Chris@294
|
278 d.identifier = "pitchactivation";
|
Chris@294
|
279 d.name = "Pitch activation distribution";
|
Chris@294
|
280 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
|
Chris@294
|
281 d.unit = "";
|
Chris@294
|
282 d.hasFixedBinCount = true;
|
Chris@294
|
283 d.binCount = m_instruments[0].templateNoteCount;
|
Chris@294
|
284 d.binNames.clear();
|
Chris@294
|
285 if (m_cq) {
|
Chris@294
|
286 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) {
|
Chris@294
|
287 d.binNames.push_back(noteName(i, 0, 1));
|
Chris@294
|
288 }
|
Chris@294
|
289 }
|
Chris@294
|
290 d.hasKnownExtents = false;
|
Chris@294
|
291 d.isQuantized = false;
|
Chris@294
|
292 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@294
|
293 d.sampleRate = m_colsPerSec;
|
Chris@294
|
294 d.hasDuration = false;
|
Chris@294
|
295 m_pitchOutputNo = list.size();
|
Chris@294
|
296 list.push_back(d);
|
Chris@294
|
297
|
Chris@309
|
298 d.identifier = "chroma";
|
Chris@309
|
299 d.name = "Pitch chroma distribution";
|
Chris@309
|
300 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
|
Chris@309
|
301 d.unit = "";
|
Chris@309
|
302 d.hasFixedBinCount = true;
|
Chris@309
|
303 d.binCount = 12;
|
Chris@309
|
304 d.binNames.clear();
|
Chris@309
|
305 if (m_cq) {
|
Chris@309
|
306 for (int i = 0; i < 12; ++i) {
|
Chris@309
|
307 d.binNames.push_back(chromaName(i));
|
Chris@309
|
308 }
|
Chris@309
|
309 }
|
Chris@309
|
310 d.hasKnownExtents = false;
|
Chris@309
|
311 d.isQuantized = false;
|
Chris@309
|
312 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@309
|
313 d.sampleRate = m_colsPerSec;
|
Chris@309
|
314 d.hasDuration = false;
|
Chris@309
|
315 m_chromaOutputNo = list.size();
|
Chris@309
|
316 list.push_back(d);
|
Chris@309
|
317
|
Chris@31
|
318 return list;
|
Chris@31
|
319 }
|
Chris@31
|
320
|
Chris@38
|
321 std::string
|
Chris@309
|
322 Silvet::chromaName(int pitch) const
|
Chris@38
|
323 {
|
Chris@38
|
324 static const char *names[] = {
|
Chris@38
|
325 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
|
Chris@38
|
326 };
|
Chris@38
|
327
|
Chris@309
|
328 return names[pitch];
|
Chris@309
|
329 }
|
Chris@309
|
330
|
Chris@309
|
331 std::string
|
Chris@309
|
332 Silvet::noteName(int note, int shift, int shiftCount) const
|
Chris@309
|
333 {
|
Chris@309
|
334 string n = chromaName(note % 12);
|
Chris@38
|
335
|
Chris@175
|
336 int oct = (note + 9) / 12;
|
Chris@38
|
337
|
Chris@175
|
338 char buf[30];
|
Chris@175
|
339
|
Chris@175
|
340 float pshift = 0.f;
|
Chris@175
|
341 if (shiftCount > 1) {
|
Chris@175
|
342 // see noteFrequency below
|
Chris@175
|
343 pshift =
|
Chris@175
|
344 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
345 }
|
Chris@175
|
346
|
Chris@175
|
347 if (pshift > 0.f) {
|
Chris@309
|
348 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
|
Chris@175
|
349 } else if (pshift < 0.f) {
|
Chris@309
|
350 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
|
Chris@175
|
351 } else {
|
Chris@309
|
352 sprintf(buf, "%s%d", n.c_str(), oct);
|
Chris@175
|
353 }
|
Chris@38
|
354
|
Chris@38
|
355 return buf;
|
Chris@38
|
356 }
|
Chris@38
|
357
|
Chris@41
|
358 float
|
Chris@168
|
359 Silvet::noteFrequency(int note, int shift, int shiftCount) const
|
Chris@41
|
360 {
|
Chris@169
|
361 // Convert shift number to a pitch shift. The given shift number
|
Chris@169
|
362 // is an offset into the template array, which starts with some
|
Chris@169
|
363 // zeros, followed by the template, then some trailing zeros.
|
Chris@169
|
364 //
|
Chris@169
|
365 // Example: if we have templateMaxShift == 2 and thus shiftCount
|
Chris@169
|
366 // == 5, then the number will be in the range 0-4 and the template
|
Chris@169
|
367 // will have 2 zeros at either end. Thus number 2 represents the
|
Chris@169
|
368 // template "as recorded", for a pitch shift of 0; smaller indices
|
Chris@169
|
369 // represent moving the template *up* in pitch (by introducing
|
Chris@169
|
370 // zeros at the start, which is the low-frequency end), for a
|
Chris@169
|
371 // positive pitch shift; and higher values represent moving it
|
Chris@169
|
372 // down in pitch, for a negative pitch shift.
|
Chris@169
|
373
|
Chris@175
|
374 float pshift = 0.f;
|
Chris@175
|
375 if (shiftCount > 1) {
|
Chris@175
|
376 pshift =
|
Chris@175
|
377 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
378 }
|
Chris@169
|
379
|
Chris@169
|
380 return float(27.5 * pow(2.0, (note + pshift) / 12.0));
|
Chris@41
|
381 }
|
Chris@41
|
382
|
Chris@31
|
383 bool
|
Chris@31
|
384 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@31
|
385 {
|
Chris@272
|
386 if (m_inputSampleRate < minInputSampleRate ||
|
Chris@272
|
387 m_inputSampleRate > maxInputSampleRate) {
|
Chris@272
|
388 cerr << "Silvet::initialise: Unsupported input sample rate "
|
Chris@272
|
389 << m_inputSampleRate << " (supported min " << minInputSampleRate
|
Chris@272
|
390 << ", max " << maxInputSampleRate << ")" << endl;
|
Chris@272
|
391 return false;
|
Chris@272
|
392 }
|
Chris@272
|
393
|
Chris@31
|
394 if (channels < getMinChannelCount() ||
|
Chris@272
|
395 channels > getMaxChannelCount()) {
|
Chris@272
|
396 cerr << "Silvet::initialise: Unsupported channel count " << channels
|
Chris@272
|
397 << " (supported min " << getMinChannelCount() << ", max "
|
Chris@272
|
398 << getMaxChannelCount() << ")" << endl;
|
Chris@272
|
399 return false;
|
Chris@272
|
400 }
|
Chris@31
|
401
|
Chris@31
|
402 if (stepSize != blockSize) {
|
Chris@31
|
403 cerr << "Silvet::initialise: Step size must be the same as block size ("
|
Chris@31
|
404 << stepSize << " != " << blockSize << ")" << endl;
|
Chris@31
|
405 return false;
|
Chris@31
|
406 }
|
Chris@31
|
407
|
Chris@31
|
408 m_blockSize = blockSize;
|
Chris@31
|
409
|
Chris@31
|
410 reset();
|
Chris@31
|
411
|
Chris@31
|
412 return true;
|
Chris@31
|
413 }
|
Chris@31
|
414
|
Chris@31
|
415 void
|
Chris@31
|
416 Silvet::reset()
|
Chris@31
|
417 {
|
Chris@31
|
418 delete m_resampler;
|
Chris@246
|
419 delete m_flattener;
|
Chris@31
|
420 delete m_cq;
|
Chris@31
|
421
|
Chris@31
|
422 if (m_inputSampleRate != processingSampleRate) {
|
Chris@31
|
423 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
|
Chris@31
|
424 } else {
|
Chris@31
|
425 m_resampler = 0;
|
Chris@31
|
426 }
|
Chris@31
|
427
|
Chris@246
|
428 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
|
Chris@246
|
429 m_flattener->reset();
|
Chris@246
|
430
|
Chris@173
|
431 double minFreq = 27.5;
|
Chris@173
|
432
|
Chris@173
|
433 if (!m_hqMode) {
|
Chris@173
|
434 // We don't actually return any notes from the bottom octave,
|
Chris@173
|
435 // so we can just pad with zeros
|
Chris@173
|
436 minFreq *= 2;
|
Chris@173
|
437 }
|
Chris@173
|
438
|
Chris@154
|
439 CQParameters params(processingSampleRate,
|
Chris@173
|
440 minFreq,
|
Chris@154
|
441 processingSampleRate / 3,
|
Chris@154
|
442 processingBPO);
|
Chris@154
|
443
|
Chris@155
|
444 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
|
Chris@155
|
445 // drops the FFT size to 512 from 1024 and alters
|
Chris@155
|
446 // some other processing parameters, making
|
Chris@155
|
447 // everything much, much slower. Could be a flaw
|
Chris@155
|
448 // in the CQ parameter calculations, must check
|
Chris@154
|
449 params.atomHopFactor = 0.3;
|
Chris@154
|
450 params.threshold = 0.0005;
|
Chris@172
|
451 params.window = CQParameters::Hann;
|
Chris@154
|
452
|
Chris@154
|
453 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
|
Chris@31
|
454
|
Chris@165
|
455 m_colsPerSec = m_hqMode ? 50 : 25;
|
Chris@165
|
456
|
Chris@41
|
457 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
458 delete m_postFilter[i];
|
Chris@41
|
459 }
|
Chris@41
|
460 m_postFilter.clear();
|
Chris@176
|
461 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) {
|
Chris@41
|
462 m_postFilter.push_back(new MedianFilter<double>(3));
|
Chris@41
|
463 }
|
Chris@41
|
464 m_pianoRoll.clear();
|
Chris@246
|
465 m_inputGains.clear();
|
Chris@32
|
466 m_columnCount = 0;
|
Chris@272
|
467 m_resampledCount = 0;
|
Chris@40
|
468 m_startTime = RealTime::zeroTime;
|
Chris@313
|
469 m_haveStartTime = false;
|
Chris@31
|
470 }
|
Chris@31
|
471
|
Chris@31
|
472 Silvet::FeatureSet
|
Chris@31
|
473 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
Chris@31
|
474 {
|
Chris@313
|
475 if (!m_haveStartTime) {
|
Chris@40
|
476 m_startTime = timestamp;
|
Chris@313
|
477 m_haveStartTime = true;
|
Chris@40
|
478 }
|
Chris@246
|
479
|
Chris@246
|
480 vector<float> flattened(m_blockSize);
|
Chris@246
|
481 float gain = 1.f;
|
Chris@246
|
482 m_flattener->connectInputPort
|
Chris@246
|
483 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
|
Chris@246
|
484 m_flattener->connectOutputPort
|
Chris@246
|
485 (FlattenDynamics::AudioOutputPort, &flattened[0]);
|
Chris@246
|
486 m_flattener->connectOutputPort
|
Chris@246
|
487 (FlattenDynamics::GainOutputPort, &gain);
|
Chris@246
|
488 m_flattener->process(m_blockSize);
|
Chris@246
|
489
|
Chris@252
|
490 m_inputGains[timestamp] = gain;
|
Chris@40
|
491
|
Chris@31
|
492 vector<double> data;
|
Chris@40
|
493 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@246
|
494 double d = flattened[i];
|
Chris@235
|
495 data.push_back(d);
|
Chris@40
|
496 }
|
Chris@31
|
497
|
Chris@31
|
498 if (m_resampler) {
|
Chris@272
|
499
|
Chris@31
|
500 data = m_resampler->process(data.data(), data.size());
|
Chris@272
|
501
|
Chris@272
|
502 int hadCount = m_resampledCount;
|
Chris@272
|
503 m_resampledCount += data.size();
|
Chris@272
|
504
|
Chris@272
|
505 int resamplerLatency = m_resampler->getLatency();
|
Chris@272
|
506
|
Chris@272
|
507 if (hadCount < resamplerLatency) {
|
Chris@272
|
508 int stillToDrop = resamplerLatency - hadCount;
|
Chris@272
|
509 if (stillToDrop >= int(data.size())) {
|
Chris@272
|
510 return FeatureSet();
|
Chris@272
|
511 } else {
|
Chris@272
|
512 data = vector<double>(data.begin() + stillToDrop, data.end());
|
Chris@272
|
513 }
|
Chris@272
|
514 }
|
Chris@31
|
515 }
|
Chris@272
|
516
|
Chris@32
|
517 Grid cqout = m_cq->process(data);
|
Chris@51
|
518 FeatureSet fs = transcribe(cqout);
|
Chris@51
|
519 return fs;
|
Chris@34
|
520 }
|
Chris@34
|
521
|
Chris@34
|
522 Silvet::FeatureSet
|
Chris@34
|
523 Silvet::getRemainingFeatures()
|
Chris@34
|
524 {
|
Chris@145
|
525 Grid cqout = m_cq->getRemainingOutput();
|
Chris@51
|
526 FeatureSet fs = transcribe(cqout);
|
Chris@51
|
527 return fs;
|
Chris@34
|
528 }
|
Chris@34
|
529
|
Chris@34
|
530 Silvet::FeatureSet
|
Chris@34
|
531 Silvet::transcribe(const Grid &cqout)
|
Chris@34
|
532 {
|
Chris@32
|
533 Grid filtered = preProcess(cqout);
|
Chris@31
|
534
|
Chris@32
|
535 FeatureSet fs;
|
Chris@32
|
536
|
Chris@104
|
537 if (filtered.empty()) return fs;
|
Chris@170
|
538
|
Chris@170
|
539 const InstrumentPack &pack = m_instruments[m_instrument];
|
Chris@104
|
540
|
Chris@178
|
541 for (int i = 0; i < (int)filtered.size(); ++i) {
|
Chris@178
|
542 Feature f;
|
Chris@178
|
543 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@178
|
544 f.values.push_back(float(filtered[i][j]));
|
Chris@178
|
545 }
|
Chris@178
|
546 fs[m_fcqOutputNo].push_back(f);
|
Chris@178
|
547 }
|
Chris@178
|
548
|
Chris@34
|
549 int width = filtered.size();
|
Chris@34
|
550
|
Chris@311
|
551 Grid localPitches(width);
|
Chris@170
|
552
|
Chris@170
|
553 bool wantShifts = m_hqMode && m_fineTuning;
|
Chris@170
|
554 int shiftCount = 1;
|
Chris@170
|
555 if (wantShifts) {
|
Chris@170
|
556 shiftCount = pack.templateMaxShift * 2 + 1;
|
Chris@170
|
557 }
|
Chris@170
|
558
|
Chris@170
|
559 vector<vector<int> > localBestShifts;
|
Chris@170
|
560 if (wantShifts) {
|
Chris@311
|
561 localBestShifts = vector<vector<int> >(width);
|
Chris@170
|
562 }
|
Chris@170
|
563
|
Chris@312
|
564 #ifndef MAX_EM_THREADS
|
Chris@312
|
565 #define MAX_EM_THREADS 8
|
Chris@312
|
566 #endif
|
Chris@312
|
567
|
Chris@312
|
568 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
|
Chris@312
|
569 for (int i = 0; i < width; ) {
|
Chris@312
|
570 typedef future<pair<vector<double>, vector<int>>> EMFuture;
|
Chris@312
|
571 vector<EMFuture> results;
|
Chris@312
|
572 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) {
|
Chris@312
|
573 results.push_back
|
Chris@312
|
574 (async(std::launch::async,
|
Chris@312
|
575 [&](int index) {
|
Chris@312
|
576 return applyEM(pack, filtered.at(index), wantShifts);
|
Chris@312
|
577 }, i + j));
|
Chris@312
|
578 }
|
Chris@312
|
579 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) {
|
Chris@312
|
580 auto out = results[j].get();
|
Chris@312
|
581 localPitches[i+j] = out.first;
|
Chris@312
|
582 if (wantShifts) localBestShifts[i+j] = out.second;
|
Chris@312
|
583 }
|
Chris@312
|
584 i += MAX_EM_THREADS;
|
Chris@312
|
585 }
|
Chris@312
|
586 #else
|
Chris@123
|
587 for (int i = 0; i < width; ++i) {
|
Chris@311
|
588 auto out = applyEM(pack, filtered.at(i), wantShifts);
|
Chris@311
|
589 localPitches[i] = out.first;
|
Chris@311
|
590 if (wantShifts) localBestShifts[i] = out.second;
|
Chris@123
|
591 }
|
Chris@312
|
592 #endif
|
Chris@312
|
593
|
Chris@166
|
594 for (int i = 0; i < width; ++i) {
|
Chris@37
|
595
|
Chris@309
|
596 // This returns a filtered column, and pushes the
|
Chris@309
|
597 // up-to-max-polyphony activation column to m_pianoRoll
|
Chris@294
|
598 vector<double> filtered = postProcess
|
Chris@294
|
599 (localPitches[i], localBestShifts[i], wantShifts);
|
Chris@294
|
600
|
Chris@309
|
601 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
|
Chris@309
|
602 float inputGain = getInputGainAt(timestamp);
|
Chris@309
|
603
|
Chris@294
|
604 Feature f;
|
Chris@294
|
605 for (int j = 0; j < (int)filtered.size(); ++j) {
|
Chris@309
|
606 float v = filtered[j];
|
Chris@294
|
607 if (v < pack.levelThreshold) v = 0.f;
|
Chris@309
|
608 f.values.push_back(v / inputGain);
|
Chris@294
|
609 }
|
Chris@294
|
610 fs[m_pitchOutputNo].push_back(f);
|
Chris@309
|
611
|
Chris@309
|
612 f.values.clear();
|
Chris@309
|
613 f.values.resize(12);
|
Chris@309
|
614 for (int j = 0; j < (int)filtered.size(); ++j) {
|
Chris@309
|
615 f.values[j % 12] += filtered[j] / inputGain;
|
Chris@309
|
616 }
|
Chris@309
|
617 fs[m_chromaOutputNo].push_back(f);
|
Chris@166
|
618
|
Chris@168
|
619 FeatureList noteFeatures = noteTrack(shiftCount);
|
Chris@38
|
620
|
Chris@123
|
621 for (FeatureList::const_iterator fi = noteFeatures.begin();
|
Chris@123
|
622 fi != noteFeatures.end(); ++fi) {
|
Chris@123
|
623 fs[m_notesOutputNo].push_back(*fi);
|
Chris@40
|
624 }
|
Chris@34
|
625 }
|
Chris@34
|
626
|
Chris@32
|
627 return fs;
|
Chris@31
|
628 }
|
Chris@31
|
629
|
Chris@311
|
630 pair<vector<double>, vector<int> >
|
Chris@311
|
631 Silvet::applyEM(const InstrumentPack &pack,
|
Chris@311
|
632 const vector<double> &column,
|
Chris@311
|
633 bool wantShifts)
|
Chris@311
|
634 {
|
Chris@311
|
635 double columnThreshold = 1e-5;
|
Chris@311
|
636
|
Chris@311
|
637 vector<double> pitches(pack.templateNoteCount, 0.0);
|
Chris@311
|
638 vector<int> bestShifts;
|
Chris@311
|
639
|
Chris@311
|
640 double sum = 0.0;
|
Chris@311
|
641 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@311
|
642 sum += column.at(j);
|
Chris@311
|
643 }
|
Chris@311
|
644 if (sum < columnThreshold) return { pitches, bestShifts };
|
Chris@311
|
645
|
Chris@311
|
646 EM em(&pack, m_hqMode);
|
Chris@311
|
647
|
Chris@311
|
648 em.setPitchSparsity(pack.pitchSparsity);
|
Chris@311
|
649 em.setSourceSparsity(pack.sourceSparsity);
|
Chris@311
|
650
|
Chris@311
|
651 int iterations = m_hqMode ? 20 : 10;
|
Chris@311
|
652
|
Chris@311
|
653 for (int j = 0; j < iterations; ++j) {
|
Chris@311
|
654 em.iterate(column.data());
|
Chris@311
|
655 }
|
Chris@311
|
656
|
Chris@311
|
657 const float *pitchDist = em.getPitchDistribution();
|
Chris@311
|
658 const float *const *shiftDist = em.getShifts();
|
Chris@311
|
659
|
Chris@311
|
660 int shiftCount = 1;
|
Chris@311
|
661 if (wantShifts) {
|
Chris@311
|
662 shiftCount = pack.templateMaxShift * 2 + 1;
|
Chris@311
|
663 }
|
Chris@311
|
664
|
Chris@311
|
665 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@311
|
666
|
Chris@311
|
667 pitches[j] = pitchDist[j] * sum;
|
Chris@311
|
668
|
Chris@311
|
669 int bestShift = 0;
|
Chris@311
|
670 float bestShiftValue = 0.0;
|
Chris@311
|
671 if (wantShifts) {
|
Chris@311
|
672 for (int k = 0; k < shiftCount; ++k) {
|
Chris@311
|
673 float value = shiftDist[k][j];
|
Chris@311
|
674 if (k == 0 || value > bestShiftValue) {
|
Chris@311
|
675 bestShiftValue = value;
|
Chris@311
|
676 bestShift = k;
|
Chris@311
|
677 }
|
Chris@311
|
678 }
|
Chris@311
|
679 bestShifts.push_back(bestShift);
|
Chris@311
|
680 }
|
Chris@311
|
681 }
|
Chris@311
|
682
|
Chris@311
|
683 return { pitches, bestShifts };
|
Chris@311
|
684 }
|
Chris@311
|
685
|
Chris@32
|
686 Silvet::Grid
|
Chris@32
|
687 Silvet::preProcess(const Grid &in)
|
Chris@32
|
688 {
|
Chris@32
|
689 int width = in.size();
|
Chris@32
|
690
|
Chris@165
|
691 int spacing = processingSampleRate / m_colsPerSec;
|
Chris@32
|
692
|
Chris@165
|
693 // need to be careful that col spacing is an integer number of samples!
|
Chris@165
|
694 assert(spacing * m_colsPerSec == processingSampleRate);
|
Chris@32
|
695
|
Chris@32
|
696 Grid out;
|
Chris@32
|
697
|
Chris@58
|
698 // We count the CQ latency in terms of processing hops, but
|
Chris@58
|
699 // actually it probably isn't an exact number of hops so this
|
Chris@58
|
700 // isn't quite accurate. But the small constant offset is
|
Chris@165
|
701 // practically irrelevant compared to the jitter from the frame
|
Chris@165
|
702 // size we reduce to in a moment
|
Chris@33
|
703 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
|
Chris@33
|
704
|
Chris@176
|
705 const InstrumentPack &pack = m_instruments[m_instrument];
|
Chris@176
|
706
|
Chris@32
|
707 for (int i = 0; i < width; ++i) {
|
Chris@32
|
708
|
Chris@33
|
709 if (m_columnCount < latentColumns) {
|
Chris@33
|
710 ++m_columnCount;
|
Chris@33
|
711 continue;
|
Chris@33
|
712 }
|
Chris@33
|
713
|
Chris@32
|
714 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
|
Chris@32
|
715 int sampleNo = m_columnCount * m_cq->getColumnHop();
|
Chris@32
|
716
|
Chris@32
|
717 bool select = (sampleNo / spacing != prevSampleNo / spacing);
|
Chris@32
|
718
|
Chris@32
|
719 if (select) {
|
Chris@32
|
720 vector<double> inCol = in[i];
|
Chris@176
|
721 vector<double> outCol(pack.templateHeight);
|
Chris@32
|
722
|
Chris@178
|
723 // In HQ mode, the CQ returns 600 bins and we ignore the
|
Chris@178
|
724 // lowest 55 of them.
|
Chris@178
|
725 //
|
Chris@178
|
726 // In draft mode the CQ is an octave shorter, returning
|
Chris@178
|
727 // 540 bins, so we instead pad them with an additional 5
|
Chris@178
|
728 // zeros.
|
Chris@178
|
729 //
|
Chris@178
|
730 // We also need to reverse the column as we go, since the
|
Chris@178
|
731 // raw CQ has the high frequencies first and we need it
|
Chris@178
|
732 // the other way around.
|
Chris@32
|
733
|
Chris@178
|
734 if (m_hqMode) {
|
Chris@178
|
735 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@178
|
736 int ix = inCol.size() - j - 55;
|
Chris@178
|
737 outCol[j] = inCol[ix];
|
Chris@178
|
738 }
|
Chris@178
|
739 } else {
|
Chris@178
|
740 for (int j = 0; j < 5; ++j) {
|
Chris@178
|
741 outCol[j] = 0.0;
|
Chris@178
|
742 }
|
Chris@178
|
743 for (int j = 5; j < pack.templateHeight; ++j) {
|
Chris@178
|
744 int ix = inCol.size() - j + 4;
|
Chris@178
|
745 outCol[j] = inCol[ix];
|
Chris@178
|
746 }
|
Chris@46
|
747 }
|
Chris@32
|
748
|
Chris@46
|
749 vector<double> noiseLevel1 =
|
Chris@46
|
750 MedianFilter<double>::filter(40, outCol);
|
Chris@176
|
751 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
752 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
|
Chris@46
|
753 }
|
Chris@32
|
754
|
Chris@46
|
755 vector<double> noiseLevel2 =
|
Chris@46
|
756 MedianFilter<double>::filter(40, noiseLevel1);
|
Chris@176
|
757 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
758 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
|
Chris@32
|
759 }
|
Chris@32
|
760
|
Chris@165
|
761 out.push_back(outCol);
|
Chris@32
|
762 }
|
Chris@32
|
763
|
Chris@32
|
764 ++m_columnCount;
|
Chris@32
|
765 }
|
Chris@32
|
766
|
Chris@32
|
767 return out;
|
Chris@32
|
768 }
|
Chris@32
|
769
|
Chris@294
|
770 vector<double>
|
Chris@170
|
771 Silvet::postProcess(const vector<double> &pitches,
|
Chris@170
|
772 const vector<int> &bestShifts,
|
Chris@170
|
773 bool wantShifts)
|
Chris@166
|
774 {
|
Chris@176
|
775 const InstrumentPack &pack = m_instruments[m_instrument];
|
Chris@176
|
776
|
Chris@41
|
777 vector<double> filtered;
|
Chris@41
|
778
|
Chris@176
|
779 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@170
|
780 m_postFilter[j]->push(pitches[j]);
|
Chris@41
|
781 filtered.push_back(m_postFilter[j]->get());
|
Chris@41
|
782 }
|
Chris@41
|
783
|
Chris@41
|
784 // Threshold for level and reduce number of candidate pitches
|
Chris@41
|
785
|
Chris@41
|
786 typedef std::multimap<double, int> ValueIndexMap;
|
Chris@41
|
787
|
Chris@41
|
788 ValueIndexMap strengths;
|
Chris@166
|
789
|
Chris@176
|
790 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@166
|
791 double strength = filtered[j];
|
Chris@183
|
792 if (strength < pack.levelThreshold) continue;
|
Chris@168
|
793 strengths.insert(ValueIndexMap::value_type(strength, j));
|
Chris@168
|
794 }
|
Chris@166
|
795
|
Chris@168
|
796 ValueIndexMap::const_iterator si = strengths.end();
|
Chris@167
|
797
|
Chris@168
|
798 map<int, double> active;
|
Chris@168
|
799 map<int, int> activeShifts;
|
Chris@168
|
800
|
Chris@183
|
801 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
|
Chris@168
|
802
|
Chris@168
|
803 --si;
|
Chris@168
|
804
|
Chris@168
|
805 double strength = si->first;
|
Chris@168
|
806 int j = si->second;
|
Chris@168
|
807
|
Chris@168
|
808 active[j] = strength;
|
Chris@168
|
809
|
Chris@170
|
810 if (wantShifts) {
|
Chris@170
|
811 activeShifts[j] = bestShifts[j];
|
Chris@167
|
812 }
|
Chris@41
|
813 }
|
Chris@41
|
814
|
Chris@168
|
815 m_pianoRoll.push_back(active);
|
Chris@170
|
816
|
Chris@170
|
817 if (wantShifts) {
|
Chris@168
|
818 m_pianoRollShifts.push_back(activeShifts);
|
Chris@41
|
819 }
|
Chris@294
|
820
|
Chris@294
|
821 return filtered;
|
Chris@166
|
822 }
|
Chris@166
|
823
|
Chris@166
|
824 Vamp::Plugin::FeatureList
|
Chris@168
|
825 Silvet::noteTrack(int shiftCount)
|
Chris@166
|
826 {
|
Chris@41
|
827 // Minimum duration pruning, and conversion to notes. We can only
|
Chris@41
|
828 // report notes that have just ended (i.e. that are absent in the
|
Chris@168
|
829 // latest active set but present in the prior set in the piano
|
Chris@41
|
830 // roll) -- any notes that ended earlier will have been reported
|
Chris@41
|
831 // already, and if they haven't ended, we don't know their
|
Chris@41
|
832 // duration.
|
Chris@41
|
833
|
Chris@168
|
834 int width = m_pianoRoll.size() - 1;
|
Chris@168
|
835
|
Chris@168
|
836 const map<int, double> &active = m_pianoRoll[width];
|
Chris@41
|
837
|
Chris@165
|
838 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@165
|
839
|
Chris@165
|
840 // only keep notes >= 100ms or thereabouts
|
Chris@165
|
841 int durationThreshold = floor(0.1 / columnDuration); // columns
|
Chris@165
|
842 if (durationThreshold < 1) durationThreshold = 1;
|
Chris@41
|
843
|
Chris@41
|
844 FeatureList noteFeatures;
|
Chris@41
|
845
|
Chris@41
|
846 if (width < durationThreshold + 1) {
|
Chris@41
|
847 return noteFeatures;
|
Chris@41
|
848 }
|
Chris@41
|
849
|
Chris@150
|
850 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
|
Chris@150
|
851
|
Chris@55
|
852 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
|
Chris@41
|
853 ni != m_pianoRoll[width-1].end(); ++ni) {
|
Chris@41
|
854
|
Chris@55
|
855 int note = ni->first;
|
Chris@41
|
856
|
Chris@41
|
857 if (active.find(note) != active.end()) {
|
Chris@41
|
858 // the note is still playing
|
Chris@41
|
859 continue;
|
Chris@41
|
860 }
|
Chris@41
|
861
|
Chris@41
|
862 // the note was playing but just ended
|
Chris@41
|
863 int end = width;
|
Chris@41
|
864 int start = end-1;
|
Chris@41
|
865
|
Chris@41
|
866 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
|
Chris@41
|
867 --start;
|
Chris@41
|
868 }
|
Chris@41
|
869 ++start;
|
Chris@41
|
870
|
Chris@169
|
871 if ((end - start) < durationThreshold) {
|
Chris@41
|
872 continue;
|
Chris@41
|
873 }
|
Chris@41
|
874
|
Chris@169
|
875 emitNote(start, end, note, shiftCount, noteFeatures);
|
Chris@41
|
876 }
|
Chris@41
|
877
|
Chris@62
|
878 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
|
Chris@41
|
879
|
Chris@41
|
880 return noteFeatures;
|
Chris@41
|
881 }
|
Chris@41
|
882
|
Chris@169
|
883 void
|
Chris@169
|
884 Silvet::emitNote(int start, int end, int note, int shiftCount,
|
Chris@169
|
885 FeatureList ¬eFeatures)
|
Chris@169
|
886 {
|
Chris@169
|
887 int partStart = start;
|
Chris@169
|
888 int partShift = 0;
|
Chris@169
|
889 int partVelocity = 0;
|
Chris@169
|
890
|
Chris@252
|
891 int partThreshold = floor(0.05 * m_colsPerSec);
|
Chris@169
|
892
|
Chris@169
|
893 for (int i = start; i != end; ++i) {
|
Chris@169
|
894
|
Chris@169
|
895 double strength = m_pianoRoll[i][note];
|
Chris@169
|
896
|
Chris@169
|
897 int shift = 0;
|
Chris@169
|
898
|
Chris@169
|
899 if (shiftCount > 1) {
|
Chris@169
|
900
|
Chris@169
|
901 shift = m_pianoRollShifts[i][note];
|
Chris@169
|
902
|
Chris@169
|
903 if (i == partStart) {
|
Chris@169
|
904 partShift = shift;
|
Chris@169
|
905 }
|
Chris@169
|
906
|
Chris@169
|
907 if (i > partStart + partThreshold && shift != partShift) {
|
Chris@169
|
908
|
Chris@169
|
909 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
|
Chris@169
|
910
|
Chris@169
|
911 // pitch has changed, emit an intermediate note
|
Chris@252
|
912 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
913 i,
|
Chris@252
|
914 note,
|
Chris@252
|
915 partShift,
|
Chris@252
|
916 shiftCount,
|
Chris@252
|
917 partVelocity));
|
Chris@169
|
918 partStart = i;
|
Chris@169
|
919 partShift = shift;
|
Chris@169
|
920 partVelocity = 0;
|
Chris@169
|
921 }
|
Chris@169
|
922 }
|
Chris@169
|
923
|
Chris@246
|
924 int v = round(strength * 2);
|
Chris@169
|
925 if (v > partVelocity) {
|
Chris@169
|
926 partVelocity = v;
|
Chris@169
|
927 }
|
Chris@169
|
928 }
|
Chris@169
|
929
|
Chris@169
|
930 if (end >= partStart + partThreshold) {
|
Chris@252
|
931 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
932 end,
|
Chris@252
|
933 note,
|
Chris@252
|
934 partShift,
|
Chris@252
|
935 shiftCount,
|
Chris@252
|
936 partVelocity));
|
Chris@169
|
937 }
|
Chris@169
|
938 }
|
Chris@252
|
939
|
Chris@309
|
940 RealTime
|
Chris@309
|
941 Silvet::getColumnTimestamp(int column)
|
Chris@309
|
942 {
|
Chris@309
|
943 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@309
|
944 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
|
Chris@309
|
945
|
Chris@309
|
946 return m_startTime + RealTime::fromSeconds
|
Chris@309
|
947 (columnDuration * (column - postFilterLatency) + 0.02);
|
Chris@309
|
948 }
|
Chris@309
|
949
|
Chris@252
|
950 Silvet::Feature
|
Chris@252
|
951 Silvet::makeNoteFeature(int start,
|
Chris@252
|
952 int end,
|
Chris@252
|
953 int note,
|
Chris@252
|
954 int shift,
|
Chris@252
|
955 int shiftCount,
|
Chris@252
|
956 int velocity)
|
Chris@252
|
957 {
|
Chris@252
|
958 Feature f;
|
Chris@252
|
959
|
Chris@252
|
960 f.hasTimestamp = true;
|
Chris@309
|
961 f.timestamp = getColumnTimestamp(start);
|
Chris@252
|
962
|
Chris@252
|
963 f.hasDuration = true;
|
Chris@309
|
964 f.duration = getColumnTimestamp(end) - f.timestamp;
|
Chris@252
|
965
|
Chris@252
|
966 f.values.clear();
|
Chris@252
|
967
|
Chris@252
|
968 f.values.push_back
|
Chris@252
|
969 (noteFrequency(note, shift, shiftCount));
|
Chris@252
|
970
|
Chris@252
|
971 float inputGain = getInputGainAt(f.timestamp);
|
Chris@252
|
972 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
|
Chris@252
|
973 velocity = round(velocity / inputGain);
|
Chris@252
|
974 if (velocity > 127) velocity = 127;
|
Chris@252
|
975 if (velocity < 1) velocity = 1;
|
Chris@252
|
976 f.values.push_back(velocity);
|
Chris@252
|
977
|
Chris@252
|
978 f.label = noteName(note, shift, shiftCount);
|
Chris@252
|
979
|
Chris@252
|
980 return f;
|
Chris@252
|
981 }
|
Chris@252
|
982
|
Chris@252
|
983 float
|
Chris@252
|
984 Silvet::getInputGainAt(RealTime t)
|
Chris@252
|
985 {
|
Chris@252
|
986 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
|
Chris@252
|
987
|
Chris@252
|
988 if (i == m_inputGains.end()) {
|
Chris@252
|
989 if (i != m_inputGains.begin()) {
|
Chris@252
|
990 --i;
|
Chris@252
|
991 } else {
|
Chris@252
|
992 return 1.f; // no data
|
Chris@252
|
993 }
|
Chris@252
|
994 }
|
Chris@252
|
995
|
Chris@252
|
996 // cerr << "gain at time " << t << " = " << i->second << endl;
|
Chris@252
|
997
|
Chris@252
|
998 return i->second;
|
Chris@252
|
999 }
|
Chris@252
|
1000
|