Chris@31
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@31
|
2
|
Chris@31
|
3 /*
|
Chris@31
|
4 Silvet
|
Chris@31
|
5
|
Chris@31
|
6 A Vamp plugin for note transcription.
|
Chris@31
|
7 Centre for Digital Music, Queen Mary University of London.
|
Chris@31
|
8
|
Chris@31
|
9 This program is free software; you can redistribute it and/or
|
Chris@31
|
10 modify it under the terms of the GNU General Public License as
|
Chris@31
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@31
|
12 License, or (at your option) any later version. See the file
|
Chris@31
|
13 COPYING included with this distribution for more information.
|
Chris@31
|
14 */
|
Chris@31
|
15
|
Chris@31
|
16 #include "Silvet.h"
|
Chris@34
|
17 #include "EM.h"
|
Chris@31
|
18
|
Chris@152
|
19 #include <cq/CQSpectrogram.h>
|
Chris@31
|
20
|
Chris@152
|
21 #include "MedianFilter.h"
|
Chris@152
|
22 #include "constant-q-cpp/src/dsp/Resampler.h"
|
Chris@246
|
23 #include "flattendynamics-ladspa.h"
|
Chris@31
|
24
|
Chris@31
|
25 #include <vector>
|
Chris@312
|
26 #include <future>
|
Chris@31
|
27
|
Chris@32
|
28 #include <cstdio>
|
Chris@32
|
29
|
Chris@31
|
30 using std::vector;
|
Chris@48
|
31 using std::cout;
|
Chris@31
|
32 using std::cerr;
|
Chris@31
|
33 using std::endl;
|
Chris@311
|
34 using std::pair;
|
Chris@312
|
35 using std::future;
|
Chris@312
|
36 using std::async;
|
Chris@40
|
37 using Vamp::RealTime;
|
Chris@31
|
38
|
Chris@31
|
39 static int processingSampleRate = 44100;
|
Chris@31
|
40 static int processingBPO = 60;
|
Chris@170
|
41
|
Chris@272
|
42 static int minInputSampleRate = 100;
|
Chris@272
|
43 static int maxInputSampleRate = 192000;
|
Chris@272
|
44
|
Chris@31
|
45 Silvet::Silvet(float inputSampleRate) :
|
Chris@31
|
46 Plugin(inputSampleRate),
|
Chris@161
|
47 m_instruments(InstrumentPack::listInstrumentPacks()),
|
Chris@31
|
48 m_resampler(0),
|
Chris@246
|
49 m_flattener(0),
|
Chris@110
|
50 m_cq(0),
|
Chris@162
|
51 m_hqMode(true),
|
Chris@166
|
52 m_fineTuning(false),
|
Chris@178
|
53 m_instrument(0),
|
Chris@178
|
54 m_colsPerSec(50)
|
Chris@31
|
55 {
|
Chris@31
|
56 }
|
Chris@31
|
57
|
Chris@31
|
58 Silvet::~Silvet()
|
Chris@31
|
59 {
|
Chris@31
|
60 delete m_resampler;
|
Chris@246
|
61 delete m_flattener;
|
Chris@31
|
62 delete m_cq;
|
Chris@41
|
63 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
64 delete m_postFilter[i];
|
Chris@41
|
65 }
|
Chris@31
|
66 }
|
Chris@31
|
67
|
Chris@31
|
68 string
|
Chris@31
|
69 Silvet::getIdentifier() const
|
Chris@31
|
70 {
|
Chris@31
|
71 return "silvet";
|
Chris@31
|
72 }
|
Chris@31
|
73
|
Chris@31
|
74 string
|
Chris@31
|
75 Silvet::getName() const
|
Chris@31
|
76 {
|
Chris@31
|
77 return "Silvet Note Transcription";
|
Chris@31
|
78 }
|
Chris@31
|
79
|
Chris@31
|
80 string
|
Chris@31
|
81 Silvet::getDescription() const
|
Chris@31
|
82 {
|
Chris@191
|
83 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
|
Chris@31
|
84 }
|
Chris@31
|
85
|
Chris@31
|
86 string
|
Chris@31
|
87 Silvet::getMaker() const
|
Chris@31
|
88 {
|
Chris@191
|
89 return "Queen Mary, University of London";
|
Chris@31
|
90 }
|
Chris@31
|
91
|
Chris@31
|
92 int
|
Chris@31
|
93 Silvet::getPluginVersion() const
|
Chris@31
|
94 {
|
Chris@309
|
95 return 3;
|
Chris@31
|
96 }
|
Chris@31
|
97
|
Chris@31
|
98 string
|
Chris@31
|
99 Silvet::getCopyright() const
|
Chris@31
|
100 {
|
Chris@191
|
101 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
|
Chris@31
|
102 }
|
Chris@31
|
103
|
Chris@31
|
104 Silvet::InputDomain
|
Chris@31
|
105 Silvet::getInputDomain() const
|
Chris@31
|
106 {
|
Chris@31
|
107 return TimeDomain;
|
Chris@31
|
108 }
|
Chris@31
|
109
|
Chris@31
|
110 size_t
|
Chris@31
|
111 Silvet::getPreferredBlockSize() const
|
Chris@31
|
112 {
|
Chris@31
|
113 return 0;
|
Chris@31
|
114 }
|
Chris@31
|
115
|
Chris@31
|
116 size_t
|
Chris@31
|
117 Silvet::getPreferredStepSize() const
|
Chris@31
|
118 {
|
Chris@31
|
119 return 0;
|
Chris@31
|
120 }
|
Chris@31
|
121
|
Chris@31
|
122 size_t
|
Chris@31
|
123 Silvet::getMinChannelCount() const
|
Chris@31
|
124 {
|
Chris@31
|
125 return 1;
|
Chris@31
|
126 }
|
Chris@31
|
127
|
Chris@31
|
128 size_t
|
Chris@31
|
129 Silvet::getMaxChannelCount() const
|
Chris@31
|
130 {
|
Chris@31
|
131 return 1;
|
Chris@31
|
132 }
|
Chris@31
|
133
|
Chris@31
|
134 Silvet::ParameterList
|
Chris@31
|
135 Silvet::getParameterDescriptors() const
|
Chris@31
|
136 {
|
Chris@31
|
137 ParameterList list;
|
Chris@110
|
138
|
Chris@110
|
139 ParameterDescriptor desc;
|
Chris@110
|
140 desc.identifier = "mode";
|
Chris@110
|
141 desc.name = "Processing mode";
|
Chris@110
|
142 desc.unit = "";
|
Chris@271
|
143 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode modifies a number of internal parameters in favour of speed. Intensive mode (the default) will almost always produce better results.";
|
Chris@110
|
144 desc.minValue = 0;
|
Chris@110
|
145 desc.maxValue = 1;
|
Chris@113
|
146 desc.defaultValue = 1;
|
Chris@110
|
147 desc.isQuantized = true;
|
Chris@110
|
148 desc.quantizeStep = 1;
|
Chris@166
|
149 desc.valueNames.push_back("Draft (faster)");
|
Chris@165
|
150 desc.valueNames.push_back("Intensive (higher quality)");
|
Chris@161
|
151 list.push_back(desc);
|
Chris@161
|
152
|
Chris@176
|
153 desc.identifier = "instrument";
|
Chris@176
|
154 desc.name = "Instrument";
|
Chris@161
|
155 desc.unit = "";
|
Chris@271
|
156 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
|
Chris@161
|
157 desc.minValue = 0;
|
Chris@162
|
158 desc.maxValue = m_instruments.size()-1;
|
Chris@162
|
159 desc.defaultValue = 0;
|
Chris@161
|
160 desc.isQuantized = true;
|
Chris@161
|
161 desc.quantizeStep = 1;
|
Chris@161
|
162 desc.valueNames.clear();
|
Chris@162
|
163 for (int i = 0; i < int(m_instruments.size()); ++i) {
|
Chris@162
|
164 desc.valueNames.push_back(m_instruments[i].name);
|
Chris@162
|
165 }
|
Chris@166
|
166 list.push_back(desc);
|
Chris@161
|
167
|
Chris@166
|
168 desc.identifier = "finetune";
|
Chris@166
|
169 desc.name = "Return fine pitch estimates";
|
Chris@166
|
170 desc.unit = "";
|
Chris@271
|
171 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
|
Chris@166
|
172 desc.minValue = 0;
|
Chris@166
|
173 desc.maxValue = 1;
|
Chris@166
|
174 desc.defaultValue = 0;
|
Chris@166
|
175 desc.isQuantized = true;
|
Chris@166
|
176 desc.quantizeStep = 1;
|
Chris@166
|
177 desc.valueNames.clear();
|
Chris@110
|
178 list.push_back(desc);
|
Chris@110
|
179
|
Chris@31
|
180 return list;
|
Chris@31
|
181 }
|
Chris@31
|
182
|
Chris@31
|
183 float
|
Chris@31
|
184 Silvet::getParameter(string identifier) const
|
Chris@31
|
185 {
|
Chris@110
|
186 if (identifier == "mode") {
|
Chris@110
|
187 return m_hqMode ? 1.f : 0.f;
|
Chris@166
|
188 } else if (identifier == "finetune") {
|
Chris@166
|
189 return m_fineTuning ? 1.f : 0.f;
|
Chris@176
|
190 } else if (identifier == "instrument") {
|
Chris@162
|
191 return m_instrument;
|
Chris@110
|
192 }
|
Chris@31
|
193 return 0;
|
Chris@31
|
194 }
|
Chris@31
|
195
|
Chris@31
|
196 void
|
Chris@31
|
197 Silvet::setParameter(string identifier, float value)
|
Chris@31
|
198 {
|
Chris@110
|
199 if (identifier == "mode") {
|
Chris@110
|
200 m_hqMode = (value > 0.5);
|
Chris@166
|
201 } else if (identifier == "finetune") {
|
Chris@166
|
202 m_fineTuning = (value > 0.5);
|
Chris@176
|
203 } else if (identifier == "instrument") {
|
Chris@162
|
204 m_instrument = lrintf(value);
|
Chris@110
|
205 }
|
Chris@31
|
206 }
|
Chris@31
|
207
|
Chris@31
|
208 Silvet::ProgramList
|
Chris@31
|
209 Silvet::getPrograms() const
|
Chris@31
|
210 {
|
Chris@31
|
211 ProgramList list;
|
Chris@31
|
212 return list;
|
Chris@31
|
213 }
|
Chris@31
|
214
|
Chris@31
|
215 string
|
Chris@31
|
216 Silvet::getCurrentProgram() const
|
Chris@31
|
217 {
|
Chris@31
|
218 return "";
|
Chris@31
|
219 }
|
Chris@31
|
220
|
Chris@31
|
221 void
|
Chris@31
|
222 Silvet::selectProgram(string name)
|
Chris@31
|
223 {
|
Chris@31
|
224 }
|
Chris@31
|
225
|
Chris@31
|
226 Silvet::OutputList
|
Chris@31
|
227 Silvet::getOutputDescriptors() const
|
Chris@31
|
228 {
|
Chris@31
|
229 OutputList list;
|
Chris@31
|
230
|
Chris@31
|
231 OutputDescriptor d;
|
Chris@51
|
232 d.identifier = "notes";
|
Chris@51
|
233 d.name = "Note transcription";
|
Chris@271
|
234 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
|
Chris@41
|
235 d.unit = "Hz";
|
Chris@31
|
236 d.hasFixedBinCount = true;
|
Chris@31
|
237 d.binCount = 2;
|
Chris@41
|
238 d.binNames.push_back("Frequency");
|
Chris@31
|
239 d.binNames.push_back("Velocity");
|
Chris@31
|
240 d.hasKnownExtents = false;
|
Chris@31
|
241 d.isQuantized = false;
|
Chris@31
|
242 d.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@246
|
243 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
|
Chris@31
|
244 d.hasDuration = true;
|
Chris@32
|
245 m_notesOutputNo = list.size();
|
Chris@32
|
246 list.push_back(d);
|
Chris@32
|
247
|
Chris@178
|
248 d.identifier = "timefreq";
|
Chris@178
|
249 d.name = "Time-frequency distribution";
|
Chris@271
|
250 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
|
Chris@178
|
251 d.unit = "";
|
Chris@178
|
252 d.hasFixedBinCount = true;
|
Chris@178
|
253 d.binCount = m_instruments[0].templateHeight;
|
Chris@178
|
254 d.binNames.clear();
|
Chris@178
|
255 if (m_cq) {
|
Chris@294
|
256 char name[50];
|
Chris@178
|
257 for (int i = 0; i < m_instruments[0].templateHeight; ++i) {
|
Chris@178
|
258 // We have a 600-bin (10 oct 60-bin CQ) of which the
|
Chris@178
|
259 // lowest-frequency 55 bins have been dropped, for a
|
Chris@178
|
260 // 545-bin template. The native CQ bins go high->low
|
Chris@178
|
261 // frequency though, so these are still the first 545 bins
|
Chris@178
|
262 // as reported by getBinFrequency, though in reverse order
|
Chris@178
|
263 float freq = m_cq->getBinFrequency
|
Chris@178
|
264 (m_instruments[0].templateHeight - i - 1);
|
Chris@178
|
265 sprintf(name, "%.1f Hz", freq);
|
Chris@178
|
266 d.binNames.push_back(name);
|
Chris@178
|
267 }
|
Chris@178
|
268 }
|
Chris@178
|
269 d.hasKnownExtents = false;
|
Chris@178
|
270 d.isQuantized = false;
|
Chris@178
|
271 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@178
|
272 d.sampleRate = m_colsPerSec;
|
Chris@178
|
273 d.hasDuration = false;
|
Chris@178
|
274 m_fcqOutputNo = list.size();
|
Chris@178
|
275 list.push_back(d);
|
Chris@178
|
276
|
Chris@294
|
277 d.identifier = "pitchactivation";
|
Chris@294
|
278 d.name = "Pitch activation distribution";
|
Chris@294
|
279 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
|
Chris@294
|
280 d.unit = "";
|
Chris@294
|
281 d.hasFixedBinCount = true;
|
Chris@294
|
282 d.binCount = m_instruments[0].templateNoteCount;
|
Chris@294
|
283 d.binNames.clear();
|
Chris@294
|
284 if (m_cq) {
|
Chris@294
|
285 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) {
|
Chris@294
|
286 d.binNames.push_back(noteName(i, 0, 1));
|
Chris@294
|
287 }
|
Chris@294
|
288 }
|
Chris@294
|
289 d.hasKnownExtents = false;
|
Chris@294
|
290 d.isQuantized = false;
|
Chris@294
|
291 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@294
|
292 d.sampleRate = m_colsPerSec;
|
Chris@294
|
293 d.hasDuration = false;
|
Chris@294
|
294 m_pitchOutputNo = list.size();
|
Chris@294
|
295 list.push_back(d);
|
Chris@294
|
296
|
Chris@309
|
297 d.identifier = "chroma";
|
Chris@309
|
298 d.name = "Pitch chroma distribution";
|
Chris@309
|
299 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
|
Chris@309
|
300 d.unit = "";
|
Chris@309
|
301 d.hasFixedBinCount = true;
|
Chris@309
|
302 d.binCount = 12;
|
Chris@309
|
303 d.binNames.clear();
|
Chris@309
|
304 if (m_cq) {
|
Chris@309
|
305 for (int i = 0; i < 12; ++i) {
|
Chris@309
|
306 d.binNames.push_back(chromaName(i));
|
Chris@309
|
307 }
|
Chris@309
|
308 }
|
Chris@309
|
309 d.hasKnownExtents = false;
|
Chris@309
|
310 d.isQuantized = false;
|
Chris@309
|
311 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@309
|
312 d.sampleRate = m_colsPerSec;
|
Chris@309
|
313 d.hasDuration = false;
|
Chris@309
|
314 m_chromaOutputNo = list.size();
|
Chris@309
|
315 list.push_back(d);
|
Chris@309
|
316
|
Chris@31
|
317 return list;
|
Chris@31
|
318 }
|
Chris@31
|
319
|
Chris@38
|
320 std::string
|
Chris@309
|
321 Silvet::chromaName(int pitch) const
|
Chris@38
|
322 {
|
Chris@38
|
323 static const char *names[] = {
|
Chris@38
|
324 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
|
Chris@38
|
325 };
|
Chris@38
|
326
|
Chris@309
|
327 return names[pitch];
|
Chris@309
|
328 }
|
Chris@309
|
329
|
Chris@309
|
330 std::string
|
Chris@309
|
331 Silvet::noteName(int note, int shift, int shiftCount) const
|
Chris@309
|
332 {
|
Chris@309
|
333 string n = chromaName(note % 12);
|
Chris@38
|
334
|
Chris@175
|
335 int oct = (note + 9) / 12;
|
Chris@38
|
336
|
Chris@175
|
337 char buf[30];
|
Chris@175
|
338
|
Chris@175
|
339 float pshift = 0.f;
|
Chris@175
|
340 if (shiftCount > 1) {
|
Chris@175
|
341 // see noteFrequency below
|
Chris@175
|
342 pshift =
|
Chris@175
|
343 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
344 }
|
Chris@175
|
345
|
Chris@175
|
346 if (pshift > 0.f) {
|
Chris@309
|
347 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
|
Chris@175
|
348 } else if (pshift < 0.f) {
|
Chris@309
|
349 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
|
Chris@175
|
350 } else {
|
Chris@309
|
351 sprintf(buf, "%s%d", n.c_str(), oct);
|
Chris@175
|
352 }
|
Chris@38
|
353
|
Chris@38
|
354 return buf;
|
Chris@38
|
355 }
|
Chris@38
|
356
|
Chris@41
|
357 float
|
Chris@168
|
358 Silvet::noteFrequency(int note, int shift, int shiftCount) const
|
Chris@41
|
359 {
|
Chris@169
|
360 // Convert shift number to a pitch shift. The given shift number
|
Chris@169
|
361 // is an offset into the template array, which starts with some
|
Chris@169
|
362 // zeros, followed by the template, then some trailing zeros.
|
Chris@169
|
363 //
|
Chris@169
|
364 // Example: if we have templateMaxShift == 2 and thus shiftCount
|
Chris@169
|
365 // == 5, then the number will be in the range 0-4 and the template
|
Chris@169
|
366 // will have 2 zeros at either end. Thus number 2 represents the
|
Chris@169
|
367 // template "as recorded", for a pitch shift of 0; smaller indices
|
Chris@169
|
368 // represent moving the template *up* in pitch (by introducing
|
Chris@169
|
369 // zeros at the start, which is the low-frequency end), for a
|
Chris@169
|
370 // positive pitch shift; and higher values represent moving it
|
Chris@169
|
371 // down in pitch, for a negative pitch shift.
|
Chris@169
|
372
|
Chris@175
|
373 float pshift = 0.f;
|
Chris@175
|
374 if (shiftCount > 1) {
|
Chris@175
|
375 pshift =
|
Chris@175
|
376 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
377 }
|
Chris@169
|
378
|
Chris@169
|
379 return float(27.5 * pow(2.0, (note + pshift) / 12.0));
|
Chris@41
|
380 }
|
Chris@41
|
381
|
Chris@31
|
382 bool
|
Chris@31
|
383 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@31
|
384 {
|
Chris@272
|
385 if (m_inputSampleRate < minInputSampleRate ||
|
Chris@272
|
386 m_inputSampleRate > maxInputSampleRate) {
|
Chris@272
|
387 cerr << "Silvet::initialise: Unsupported input sample rate "
|
Chris@272
|
388 << m_inputSampleRate << " (supported min " << minInputSampleRate
|
Chris@272
|
389 << ", max " << maxInputSampleRate << ")" << endl;
|
Chris@272
|
390 return false;
|
Chris@272
|
391 }
|
Chris@272
|
392
|
Chris@31
|
393 if (channels < getMinChannelCount() ||
|
Chris@272
|
394 channels > getMaxChannelCount()) {
|
Chris@272
|
395 cerr << "Silvet::initialise: Unsupported channel count " << channels
|
Chris@272
|
396 << " (supported min " << getMinChannelCount() << ", max "
|
Chris@272
|
397 << getMaxChannelCount() << ")" << endl;
|
Chris@272
|
398 return false;
|
Chris@272
|
399 }
|
Chris@31
|
400
|
Chris@31
|
401 if (stepSize != blockSize) {
|
Chris@31
|
402 cerr << "Silvet::initialise: Step size must be the same as block size ("
|
Chris@31
|
403 << stepSize << " != " << blockSize << ")" << endl;
|
Chris@31
|
404 return false;
|
Chris@31
|
405 }
|
Chris@31
|
406
|
Chris@31
|
407 m_blockSize = blockSize;
|
Chris@31
|
408
|
Chris@31
|
409 reset();
|
Chris@31
|
410
|
Chris@31
|
411 return true;
|
Chris@31
|
412 }
|
Chris@31
|
413
|
Chris@31
|
414 void
|
Chris@31
|
415 Silvet::reset()
|
Chris@31
|
416 {
|
Chris@31
|
417 delete m_resampler;
|
Chris@246
|
418 delete m_flattener;
|
Chris@31
|
419 delete m_cq;
|
Chris@31
|
420
|
Chris@31
|
421 if (m_inputSampleRate != processingSampleRate) {
|
Chris@31
|
422 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
|
Chris@31
|
423 } else {
|
Chris@31
|
424 m_resampler = 0;
|
Chris@31
|
425 }
|
Chris@31
|
426
|
Chris@246
|
427 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
|
Chris@246
|
428 m_flattener->reset();
|
Chris@246
|
429
|
Chris@173
|
430 double minFreq = 27.5;
|
Chris@173
|
431
|
Chris@173
|
432 if (!m_hqMode) {
|
Chris@173
|
433 // We don't actually return any notes from the bottom octave,
|
Chris@173
|
434 // so we can just pad with zeros
|
Chris@173
|
435 minFreq *= 2;
|
Chris@173
|
436 }
|
Chris@173
|
437
|
Chris@154
|
438 CQParameters params(processingSampleRate,
|
Chris@173
|
439 minFreq,
|
Chris@154
|
440 processingSampleRate / 3,
|
Chris@154
|
441 processingBPO);
|
Chris@154
|
442
|
Chris@155
|
443 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
|
Chris@155
|
444 // drops the FFT size to 512 from 1024 and alters
|
Chris@155
|
445 // some other processing parameters, making
|
Chris@155
|
446 // everything much, much slower. Could be a flaw
|
Chris@155
|
447 // in the CQ parameter calculations, must check
|
Chris@154
|
448 params.atomHopFactor = 0.3;
|
Chris@154
|
449 params.threshold = 0.0005;
|
Chris@172
|
450 params.window = CQParameters::Hann;
|
Chris@154
|
451
|
Chris@154
|
452 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
|
Chris@31
|
453
|
Chris@165
|
454 m_colsPerSec = m_hqMode ? 50 : 25;
|
Chris@165
|
455
|
Chris@41
|
456 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
457 delete m_postFilter[i];
|
Chris@41
|
458 }
|
Chris@41
|
459 m_postFilter.clear();
|
Chris@176
|
460 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) {
|
Chris@41
|
461 m_postFilter.push_back(new MedianFilter<double>(3));
|
Chris@41
|
462 }
|
Chris@41
|
463 m_pianoRoll.clear();
|
Chris@246
|
464 m_inputGains.clear();
|
Chris@32
|
465 m_columnCount = 0;
|
Chris@272
|
466 m_resampledCount = 0;
|
Chris@40
|
467 m_startTime = RealTime::zeroTime;
|
Chris@31
|
468 }
|
Chris@31
|
469
|
Chris@31
|
470 Silvet::FeatureSet
|
Chris@31
|
471 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
Chris@31
|
472 {
|
Chris@40
|
473 if (m_columnCount == 0) {
|
Chris@40
|
474 m_startTime = timestamp;
|
Chris@40
|
475 }
|
Chris@246
|
476
|
Chris@246
|
477 vector<float> flattened(m_blockSize);
|
Chris@246
|
478 float gain = 1.f;
|
Chris@246
|
479 m_flattener->connectInputPort
|
Chris@246
|
480 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
|
Chris@246
|
481 m_flattener->connectOutputPort
|
Chris@246
|
482 (FlattenDynamics::AudioOutputPort, &flattened[0]);
|
Chris@246
|
483 m_flattener->connectOutputPort
|
Chris@246
|
484 (FlattenDynamics::GainOutputPort, &gain);
|
Chris@246
|
485 m_flattener->process(m_blockSize);
|
Chris@246
|
486
|
Chris@252
|
487 m_inputGains[timestamp] = gain;
|
Chris@40
|
488
|
Chris@31
|
489 vector<double> data;
|
Chris@40
|
490 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@246
|
491 double d = flattened[i];
|
Chris@235
|
492 data.push_back(d);
|
Chris@40
|
493 }
|
Chris@31
|
494
|
Chris@31
|
495 if (m_resampler) {
|
Chris@272
|
496
|
Chris@31
|
497 data = m_resampler->process(data.data(), data.size());
|
Chris@272
|
498
|
Chris@272
|
499 int hadCount = m_resampledCount;
|
Chris@272
|
500 m_resampledCount += data.size();
|
Chris@272
|
501
|
Chris@272
|
502 int resamplerLatency = m_resampler->getLatency();
|
Chris@272
|
503
|
Chris@272
|
504 if (hadCount < resamplerLatency) {
|
Chris@272
|
505 int stillToDrop = resamplerLatency - hadCount;
|
Chris@272
|
506 if (stillToDrop >= int(data.size())) {
|
Chris@272
|
507 return FeatureSet();
|
Chris@272
|
508 } else {
|
Chris@272
|
509 data = vector<double>(data.begin() + stillToDrop, data.end());
|
Chris@272
|
510 }
|
Chris@272
|
511 }
|
Chris@31
|
512 }
|
Chris@272
|
513
|
Chris@32
|
514 Grid cqout = m_cq->process(data);
|
Chris@51
|
515 FeatureSet fs = transcribe(cqout);
|
Chris@51
|
516 return fs;
|
Chris@34
|
517 }
|
Chris@34
|
518
|
Chris@34
|
519 Silvet::FeatureSet
|
Chris@34
|
520 Silvet::getRemainingFeatures()
|
Chris@34
|
521 {
|
Chris@145
|
522 Grid cqout = m_cq->getRemainingOutput();
|
Chris@51
|
523 FeatureSet fs = transcribe(cqout);
|
Chris@51
|
524 return fs;
|
Chris@34
|
525 }
|
Chris@34
|
526
|
Chris@34
|
527 Silvet::FeatureSet
|
Chris@34
|
528 Silvet::transcribe(const Grid &cqout)
|
Chris@34
|
529 {
|
Chris@32
|
530 Grid filtered = preProcess(cqout);
|
Chris@31
|
531
|
Chris@32
|
532 FeatureSet fs;
|
Chris@32
|
533
|
Chris@104
|
534 if (filtered.empty()) return fs;
|
Chris@170
|
535
|
Chris@170
|
536 const InstrumentPack &pack = m_instruments[m_instrument];
|
Chris@104
|
537
|
Chris@178
|
538 for (int i = 0; i < (int)filtered.size(); ++i) {
|
Chris@178
|
539 Feature f;
|
Chris@178
|
540 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@178
|
541 f.values.push_back(float(filtered[i][j]));
|
Chris@178
|
542 }
|
Chris@178
|
543 fs[m_fcqOutputNo].push_back(f);
|
Chris@178
|
544 }
|
Chris@178
|
545
|
Chris@34
|
546 int width = filtered.size();
|
Chris@34
|
547
|
Chris@311
|
548 Grid localPitches(width);
|
Chris@170
|
549
|
Chris@170
|
550 bool wantShifts = m_hqMode && m_fineTuning;
|
Chris@170
|
551 int shiftCount = 1;
|
Chris@170
|
552 if (wantShifts) {
|
Chris@170
|
553 shiftCount = pack.templateMaxShift * 2 + 1;
|
Chris@170
|
554 }
|
Chris@170
|
555
|
Chris@170
|
556 vector<vector<int> > localBestShifts;
|
Chris@170
|
557 if (wantShifts) {
|
Chris@311
|
558 localBestShifts = vector<vector<int> >(width);
|
Chris@170
|
559 }
|
Chris@170
|
560
|
Chris@312
|
561 #ifndef MAX_EM_THREADS
|
Chris@312
|
562 #define MAX_EM_THREADS 8
|
Chris@312
|
563 #endif
|
Chris@312
|
564
|
Chris@312
|
565 #if (defined(MAX_EM_THREADS) && (MAX_EM_THREADS > 1))
|
Chris@312
|
566 for (int i = 0; i < width; ) {
|
Chris@312
|
567 typedef future<pair<vector<double>, vector<int>>> EMFuture;
|
Chris@312
|
568 vector<EMFuture> results;
|
Chris@312
|
569 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) {
|
Chris@312
|
570 results.push_back
|
Chris@312
|
571 (async(std::launch::async,
|
Chris@312
|
572 [&](int index) {
|
Chris@312
|
573 return applyEM(pack, filtered.at(index), wantShifts);
|
Chris@312
|
574 }, i + j));
|
Chris@312
|
575 }
|
Chris@312
|
576 for (int j = 0; j < MAX_EM_THREADS && i + j < width; ++j) {
|
Chris@312
|
577 auto out = results[j].get();
|
Chris@312
|
578 localPitches[i+j] = out.first;
|
Chris@312
|
579 if (wantShifts) localBestShifts[i+j] = out.second;
|
Chris@312
|
580 }
|
Chris@312
|
581 i += MAX_EM_THREADS;
|
Chris@312
|
582 }
|
Chris@312
|
583 #else
|
Chris@123
|
584 for (int i = 0; i < width; ++i) {
|
Chris@311
|
585 auto out = applyEM(pack, filtered.at(i), wantShifts);
|
Chris@311
|
586 localPitches[i] = out.first;
|
Chris@311
|
587 if (wantShifts) localBestShifts[i] = out.second;
|
Chris@123
|
588 }
|
Chris@312
|
589 #endif
|
Chris@312
|
590
|
Chris@166
|
591 for (int i = 0; i < width; ++i) {
|
Chris@37
|
592
|
Chris@309
|
593 // This returns a filtered column, and pushes the
|
Chris@309
|
594 // up-to-max-polyphony activation column to m_pianoRoll
|
Chris@294
|
595 vector<double> filtered = postProcess
|
Chris@294
|
596 (localPitches[i], localBestShifts[i], wantShifts);
|
Chris@294
|
597
|
Chris@309
|
598 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
|
Chris@309
|
599 float inputGain = getInputGainAt(timestamp);
|
Chris@309
|
600
|
Chris@294
|
601 Feature f;
|
Chris@294
|
602 for (int j = 0; j < (int)filtered.size(); ++j) {
|
Chris@309
|
603 float v = filtered[j];
|
Chris@294
|
604 if (v < pack.levelThreshold) v = 0.f;
|
Chris@309
|
605 f.values.push_back(v / inputGain);
|
Chris@294
|
606 }
|
Chris@294
|
607 fs[m_pitchOutputNo].push_back(f);
|
Chris@309
|
608
|
Chris@309
|
609 f.values.clear();
|
Chris@309
|
610 f.values.resize(12);
|
Chris@309
|
611 for (int j = 0; j < (int)filtered.size(); ++j) {
|
Chris@309
|
612 f.values[j % 12] += filtered[j] / inputGain;
|
Chris@309
|
613 }
|
Chris@309
|
614 fs[m_chromaOutputNo].push_back(f);
|
Chris@166
|
615
|
Chris@168
|
616 FeatureList noteFeatures = noteTrack(shiftCount);
|
Chris@38
|
617
|
Chris@123
|
618 for (FeatureList::const_iterator fi = noteFeatures.begin();
|
Chris@123
|
619 fi != noteFeatures.end(); ++fi) {
|
Chris@123
|
620 fs[m_notesOutputNo].push_back(*fi);
|
Chris@40
|
621 }
|
Chris@34
|
622 }
|
Chris@34
|
623
|
Chris@32
|
624 return fs;
|
Chris@31
|
625 }
|
Chris@31
|
626
|
Chris@311
|
627 pair<vector<double>, vector<int> >
|
Chris@311
|
628 Silvet::applyEM(const InstrumentPack &pack,
|
Chris@311
|
629 const vector<double> &column,
|
Chris@311
|
630 bool wantShifts)
|
Chris@311
|
631 {
|
Chris@311
|
632 double columnThreshold = 1e-5;
|
Chris@311
|
633
|
Chris@311
|
634 vector<double> pitches(pack.templateNoteCount, 0.0);
|
Chris@311
|
635 vector<int> bestShifts;
|
Chris@311
|
636
|
Chris@311
|
637 double sum = 0.0;
|
Chris@311
|
638 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@311
|
639 sum += column.at(j);
|
Chris@311
|
640 }
|
Chris@311
|
641 if (sum < columnThreshold) return { pitches, bestShifts };
|
Chris@311
|
642
|
Chris@311
|
643 EM em(&pack, m_hqMode);
|
Chris@311
|
644
|
Chris@311
|
645 em.setPitchSparsity(pack.pitchSparsity);
|
Chris@311
|
646 em.setSourceSparsity(pack.sourceSparsity);
|
Chris@311
|
647
|
Chris@311
|
648 int iterations = m_hqMode ? 20 : 10;
|
Chris@311
|
649
|
Chris@311
|
650 for (int j = 0; j < iterations; ++j) {
|
Chris@311
|
651 em.iterate(column.data());
|
Chris@311
|
652 }
|
Chris@311
|
653
|
Chris@311
|
654 const float *pitchDist = em.getPitchDistribution();
|
Chris@311
|
655 const float *const *shiftDist = em.getShifts();
|
Chris@311
|
656
|
Chris@311
|
657 int shiftCount = 1;
|
Chris@311
|
658 if (wantShifts) {
|
Chris@311
|
659 shiftCount = pack.templateMaxShift * 2 + 1;
|
Chris@311
|
660 }
|
Chris@311
|
661
|
Chris@311
|
662 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@311
|
663
|
Chris@311
|
664 pitches[j] = pitchDist[j] * sum;
|
Chris@311
|
665
|
Chris@311
|
666 int bestShift = 0;
|
Chris@311
|
667 float bestShiftValue = 0.0;
|
Chris@311
|
668 if (wantShifts) {
|
Chris@311
|
669 for (int k = 0; k < shiftCount; ++k) {
|
Chris@311
|
670 float value = shiftDist[k][j];
|
Chris@311
|
671 if (k == 0 || value > bestShiftValue) {
|
Chris@311
|
672 bestShiftValue = value;
|
Chris@311
|
673 bestShift = k;
|
Chris@311
|
674 }
|
Chris@311
|
675 }
|
Chris@311
|
676 bestShifts.push_back(bestShift);
|
Chris@311
|
677 }
|
Chris@311
|
678 }
|
Chris@311
|
679
|
Chris@311
|
680 return { pitches, bestShifts };
|
Chris@311
|
681 }
|
Chris@311
|
682
|
Chris@32
|
683 Silvet::Grid
|
Chris@32
|
684 Silvet::preProcess(const Grid &in)
|
Chris@32
|
685 {
|
Chris@32
|
686 int width = in.size();
|
Chris@32
|
687
|
Chris@165
|
688 int spacing = processingSampleRate / m_colsPerSec;
|
Chris@32
|
689
|
Chris@165
|
690 // need to be careful that col spacing is an integer number of samples!
|
Chris@165
|
691 assert(spacing * m_colsPerSec == processingSampleRate);
|
Chris@32
|
692
|
Chris@32
|
693 Grid out;
|
Chris@32
|
694
|
Chris@58
|
695 // We count the CQ latency in terms of processing hops, but
|
Chris@58
|
696 // actually it probably isn't an exact number of hops so this
|
Chris@58
|
697 // isn't quite accurate. But the small constant offset is
|
Chris@165
|
698 // practically irrelevant compared to the jitter from the frame
|
Chris@165
|
699 // size we reduce to in a moment
|
Chris@33
|
700 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
|
Chris@33
|
701
|
Chris@176
|
702 const InstrumentPack &pack = m_instruments[m_instrument];
|
Chris@176
|
703
|
Chris@32
|
704 for (int i = 0; i < width; ++i) {
|
Chris@32
|
705
|
Chris@33
|
706 if (m_columnCount < latentColumns) {
|
Chris@33
|
707 ++m_columnCount;
|
Chris@33
|
708 continue;
|
Chris@33
|
709 }
|
Chris@33
|
710
|
Chris@32
|
711 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
|
Chris@32
|
712 int sampleNo = m_columnCount * m_cq->getColumnHop();
|
Chris@32
|
713
|
Chris@32
|
714 bool select = (sampleNo / spacing != prevSampleNo / spacing);
|
Chris@32
|
715
|
Chris@32
|
716 if (select) {
|
Chris@32
|
717 vector<double> inCol = in[i];
|
Chris@176
|
718 vector<double> outCol(pack.templateHeight);
|
Chris@32
|
719
|
Chris@178
|
720 // In HQ mode, the CQ returns 600 bins and we ignore the
|
Chris@178
|
721 // lowest 55 of them.
|
Chris@178
|
722 //
|
Chris@178
|
723 // In draft mode the CQ is an octave shorter, returning
|
Chris@178
|
724 // 540 bins, so we instead pad them with an additional 5
|
Chris@178
|
725 // zeros.
|
Chris@178
|
726 //
|
Chris@178
|
727 // We also need to reverse the column as we go, since the
|
Chris@178
|
728 // raw CQ has the high frequencies first and we need it
|
Chris@178
|
729 // the other way around.
|
Chris@32
|
730
|
Chris@178
|
731 if (m_hqMode) {
|
Chris@178
|
732 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@178
|
733 int ix = inCol.size() - j - 55;
|
Chris@178
|
734 outCol[j] = inCol[ix];
|
Chris@178
|
735 }
|
Chris@178
|
736 } else {
|
Chris@178
|
737 for (int j = 0; j < 5; ++j) {
|
Chris@178
|
738 outCol[j] = 0.0;
|
Chris@178
|
739 }
|
Chris@178
|
740 for (int j = 5; j < pack.templateHeight; ++j) {
|
Chris@178
|
741 int ix = inCol.size() - j + 4;
|
Chris@178
|
742 outCol[j] = inCol[ix];
|
Chris@178
|
743 }
|
Chris@46
|
744 }
|
Chris@32
|
745
|
Chris@46
|
746 vector<double> noiseLevel1 =
|
Chris@46
|
747 MedianFilter<double>::filter(40, outCol);
|
Chris@176
|
748 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
749 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
|
Chris@46
|
750 }
|
Chris@32
|
751
|
Chris@46
|
752 vector<double> noiseLevel2 =
|
Chris@46
|
753 MedianFilter<double>::filter(40, noiseLevel1);
|
Chris@176
|
754 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
755 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
|
Chris@32
|
756 }
|
Chris@32
|
757
|
Chris@165
|
758 out.push_back(outCol);
|
Chris@32
|
759 }
|
Chris@32
|
760
|
Chris@32
|
761 ++m_columnCount;
|
Chris@32
|
762 }
|
Chris@32
|
763
|
Chris@32
|
764 return out;
|
Chris@32
|
765 }
|
Chris@32
|
766
|
Chris@294
|
767 vector<double>
|
Chris@170
|
768 Silvet::postProcess(const vector<double> &pitches,
|
Chris@170
|
769 const vector<int> &bestShifts,
|
Chris@170
|
770 bool wantShifts)
|
Chris@166
|
771 {
|
Chris@176
|
772 const InstrumentPack &pack = m_instruments[m_instrument];
|
Chris@176
|
773
|
Chris@41
|
774 vector<double> filtered;
|
Chris@41
|
775
|
Chris@176
|
776 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@170
|
777 m_postFilter[j]->push(pitches[j]);
|
Chris@41
|
778 filtered.push_back(m_postFilter[j]->get());
|
Chris@41
|
779 }
|
Chris@41
|
780
|
Chris@41
|
781 // Threshold for level and reduce number of candidate pitches
|
Chris@41
|
782
|
Chris@41
|
783 typedef std::multimap<double, int> ValueIndexMap;
|
Chris@41
|
784
|
Chris@41
|
785 ValueIndexMap strengths;
|
Chris@166
|
786
|
Chris@176
|
787 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@166
|
788 double strength = filtered[j];
|
Chris@183
|
789 if (strength < pack.levelThreshold) continue;
|
Chris@168
|
790 strengths.insert(ValueIndexMap::value_type(strength, j));
|
Chris@168
|
791 }
|
Chris@166
|
792
|
Chris@168
|
793 ValueIndexMap::const_iterator si = strengths.end();
|
Chris@167
|
794
|
Chris@168
|
795 map<int, double> active;
|
Chris@168
|
796 map<int, int> activeShifts;
|
Chris@168
|
797
|
Chris@183
|
798 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
|
Chris@168
|
799
|
Chris@168
|
800 --si;
|
Chris@168
|
801
|
Chris@168
|
802 double strength = si->first;
|
Chris@168
|
803 int j = si->second;
|
Chris@168
|
804
|
Chris@168
|
805 active[j] = strength;
|
Chris@168
|
806
|
Chris@170
|
807 if (wantShifts) {
|
Chris@170
|
808 activeShifts[j] = bestShifts[j];
|
Chris@167
|
809 }
|
Chris@41
|
810 }
|
Chris@41
|
811
|
Chris@168
|
812 m_pianoRoll.push_back(active);
|
Chris@170
|
813
|
Chris@170
|
814 if (wantShifts) {
|
Chris@168
|
815 m_pianoRollShifts.push_back(activeShifts);
|
Chris@41
|
816 }
|
Chris@294
|
817
|
Chris@294
|
818 return filtered;
|
Chris@166
|
819 }
|
Chris@166
|
820
|
Chris@166
|
821 Vamp::Plugin::FeatureList
|
Chris@168
|
822 Silvet::noteTrack(int shiftCount)
|
Chris@166
|
823 {
|
Chris@41
|
824 // Minimum duration pruning, and conversion to notes. We can only
|
Chris@41
|
825 // report notes that have just ended (i.e. that are absent in the
|
Chris@168
|
826 // latest active set but present in the prior set in the piano
|
Chris@41
|
827 // roll) -- any notes that ended earlier will have been reported
|
Chris@41
|
828 // already, and if they haven't ended, we don't know their
|
Chris@41
|
829 // duration.
|
Chris@41
|
830
|
Chris@168
|
831 int width = m_pianoRoll.size() - 1;
|
Chris@168
|
832
|
Chris@168
|
833 const map<int, double> &active = m_pianoRoll[width];
|
Chris@41
|
834
|
Chris@165
|
835 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@165
|
836
|
Chris@165
|
837 // only keep notes >= 100ms or thereabouts
|
Chris@165
|
838 int durationThreshold = floor(0.1 / columnDuration); // columns
|
Chris@165
|
839 if (durationThreshold < 1) durationThreshold = 1;
|
Chris@41
|
840
|
Chris@41
|
841 FeatureList noteFeatures;
|
Chris@41
|
842
|
Chris@41
|
843 if (width < durationThreshold + 1) {
|
Chris@41
|
844 return noteFeatures;
|
Chris@41
|
845 }
|
Chris@41
|
846
|
Chris@150
|
847 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
|
Chris@150
|
848
|
Chris@55
|
849 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
|
Chris@41
|
850 ni != m_pianoRoll[width-1].end(); ++ni) {
|
Chris@41
|
851
|
Chris@55
|
852 int note = ni->first;
|
Chris@41
|
853
|
Chris@41
|
854 if (active.find(note) != active.end()) {
|
Chris@41
|
855 // the note is still playing
|
Chris@41
|
856 continue;
|
Chris@41
|
857 }
|
Chris@41
|
858
|
Chris@41
|
859 // the note was playing but just ended
|
Chris@41
|
860 int end = width;
|
Chris@41
|
861 int start = end-1;
|
Chris@41
|
862
|
Chris@41
|
863 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
|
Chris@41
|
864 --start;
|
Chris@41
|
865 }
|
Chris@41
|
866 ++start;
|
Chris@41
|
867
|
Chris@169
|
868 if ((end - start) < durationThreshold) {
|
Chris@41
|
869 continue;
|
Chris@41
|
870 }
|
Chris@41
|
871
|
Chris@169
|
872 emitNote(start, end, note, shiftCount, noteFeatures);
|
Chris@41
|
873 }
|
Chris@41
|
874
|
Chris@62
|
875 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
|
Chris@41
|
876
|
Chris@41
|
877 return noteFeatures;
|
Chris@41
|
878 }
|
Chris@41
|
879
|
Chris@169
|
880 void
|
Chris@169
|
881 Silvet::emitNote(int start, int end, int note, int shiftCount,
|
Chris@169
|
882 FeatureList ¬eFeatures)
|
Chris@169
|
883 {
|
Chris@169
|
884 int partStart = start;
|
Chris@169
|
885 int partShift = 0;
|
Chris@169
|
886 int partVelocity = 0;
|
Chris@169
|
887
|
Chris@252
|
888 int partThreshold = floor(0.05 * m_colsPerSec);
|
Chris@169
|
889
|
Chris@169
|
890 for (int i = start; i != end; ++i) {
|
Chris@169
|
891
|
Chris@169
|
892 double strength = m_pianoRoll[i][note];
|
Chris@169
|
893
|
Chris@169
|
894 int shift = 0;
|
Chris@169
|
895
|
Chris@169
|
896 if (shiftCount > 1) {
|
Chris@169
|
897
|
Chris@169
|
898 shift = m_pianoRollShifts[i][note];
|
Chris@169
|
899
|
Chris@169
|
900 if (i == partStart) {
|
Chris@169
|
901 partShift = shift;
|
Chris@169
|
902 }
|
Chris@169
|
903
|
Chris@169
|
904 if (i > partStart + partThreshold && shift != partShift) {
|
Chris@169
|
905
|
Chris@169
|
906 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
|
Chris@169
|
907
|
Chris@169
|
908 // pitch has changed, emit an intermediate note
|
Chris@252
|
909 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
910 i,
|
Chris@252
|
911 note,
|
Chris@252
|
912 partShift,
|
Chris@252
|
913 shiftCount,
|
Chris@252
|
914 partVelocity));
|
Chris@169
|
915 partStart = i;
|
Chris@169
|
916 partShift = shift;
|
Chris@169
|
917 partVelocity = 0;
|
Chris@169
|
918 }
|
Chris@169
|
919 }
|
Chris@169
|
920
|
Chris@246
|
921 int v = round(strength * 2);
|
Chris@169
|
922 if (v > partVelocity) {
|
Chris@169
|
923 partVelocity = v;
|
Chris@169
|
924 }
|
Chris@169
|
925 }
|
Chris@169
|
926
|
Chris@169
|
927 if (end >= partStart + partThreshold) {
|
Chris@252
|
928 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
929 end,
|
Chris@252
|
930 note,
|
Chris@252
|
931 partShift,
|
Chris@252
|
932 shiftCount,
|
Chris@252
|
933 partVelocity));
|
Chris@169
|
934 }
|
Chris@169
|
935 }
|
Chris@252
|
936
|
Chris@309
|
937 RealTime
|
Chris@309
|
938 Silvet::getColumnTimestamp(int column)
|
Chris@309
|
939 {
|
Chris@309
|
940 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@309
|
941 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
|
Chris@309
|
942
|
Chris@309
|
943 return m_startTime + RealTime::fromSeconds
|
Chris@309
|
944 (columnDuration * (column - postFilterLatency) + 0.02);
|
Chris@309
|
945 }
|
Chris@309
|
946
|
Chris@252
|
947 Silvet::Feature
|
Chris@252
|
948 Silvet::makeNoteFeature(int start,
|
Chris@252
|
949 int end,
|
Chris@252
|
950 int note,
|
Chris@252
|
951 int shift,
|
Chris@252
|
952 int shiftCount,
|
Chris@252
|
953 int velocity)
|
Chris@252
|
954 {
|
Chris@252
|
955 Feature f;
|
Chris@252
|
956
|
Chris@252
|
957 f.hasTimestamp = true;
|
Chris@309
|
958 f.timestamp = getColumnTimestamp(start);
|
Chris@252
|
959
|
Chris@252
|
960 f.hasDuration = true;
|
Chris@309
|
961 f.duration = getColumnTimestamp(end) - f.timestamp;
|
Chris@252
|
962
|
Chris@252
|
963 f.values.clear();
|
Chris@252
|
964
|
Chris@252
|
965 f.values.push_back
|
Chris@252
|
966 (noteFrequency(note, shift, shiftCount));
|
Chris@252
|
967
|
Chris@252
|
968 float inputGain = getInputGainAt(f.timestamp);
|
Chris@252
|
969 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
|
Chris@252
|
970 velocity = round(velocity / inputGain);
|
Chris@252
|
971 if (velocity > 127) velocity = 127;
|
Chris@252
|
972 if (velocity < 1) velocity = 1;
|
Chris@252
|
973 f.values.push_back(velocity);
|
Chris@252
|
974
|
Chris@252
|
975 f.label = noteName(note, shift, shiftCount);
|
Chris@252
|
976
|
Chris@252
|
977 return f;
|
Chris@252
|
978 }
|
Chris@252
|
979
|
Chris@252
|
980 float
|
Chris@252
|
981 Silvet::getInputGainAt(RealTime t)
|
Chris@252
|
982 {
|
Chris@252
|
983 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
|
Chris@252
|
984
|
Chris@252
|
985 if (i == m_inputGains.end()) {
|
Chris@252
|
986 if (i != m_inputGains.begin()) {
|
Chris@252
|
987 --i;
|
Chris@252
|
988 } else {
|
Chris@252
|
989 return 1.f; // no data
|
Chris@252
|
990 }
|
Chris@252
|
991 }
|
Chris@252
|
992
|
Chris@252
|
993 // cerr << "gain at time " << t << " = " << i->second << endl;
|
Chris@252
|
994
|
Chris@252
|
995 return i->second;
|
Chris@252
|
996 }
|
Chris@252
|
997
|