Chris@31
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@31
|
2
|
Chris@31
|
3 /*
|
Chris@31
|
4 Silvet
|
Chris@31
|
5
|
Chris@31
|
6 A Vamp plugin for note transcription.
|
Chris@31
|
7 Centre for Digital Music, Queen Mary University of London.
|
Chris@31
|
8
|
Chris@31
|
9 This program is free software; you can redistribute it and/or
|
Chris@31
|
10 modify it under the terms of the GNU General Public License as
|
Chris@31
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@31
|
12 License, or (at your option) any later version. See the file
|
Chris@31
|
13 COPYING included with this distribution for more information.
|
Chris@31
|
14 */
|
Chris@31
|
15
|
Chris@31
|
16 #include "Silvet.h"
|
Chris@34
|
17 #include "EM.h"
|
Chris@31
|
18
|
Chris@152
|
19 #include <cq/CQSpectrogram.h>
|
Chris@31
|
20
|
Chris@152
|
21 #include "MedianFilter.h"
|
Chris@152
|
22 #include "constant-q-cpp/src/dsp/Resampler.h"
|
Chris@246
|
23 #include "flattendynamics-ladspa.h"
|
Chris@31
|
24
|
Chris@31
|
25 #include <vector>
|
Chris@31
|
26
|
Chris@32
|
27 #include <cstdio>
|
Chris@32
|
28
|
Chris@31
|
29 using std::vector;
|
Chris@48
|
30 using std::cout;
|
Chris@31
|
31 using std::cerr;
|
Chris@31
|
32 using std::endl;
|
Chris@311
|
33 using std::pair;
|
Chris@40
|
34 using Vamp::RealTime;
|
Chris@31
|
35
|
Chris@31
|
36 static int processingSampleRate = 44100;
|
Chris@31
|
37 static int processingBPO = 60;
|
Chris@170
|
38
|
Chris@272
|
39 static int minInputSampleRate = 100;
|
Chris@272
|
40 static int maxInputSampleRate = 192000;
|
Chris@272
|
41
|
Chris@31
|
42 Silvet::Silvet(float inputSampleRate) :
|
Chris@31
|
43 Plugin(inputSampleRate),
|
Chris@161
|
44 m_instruments(InstrumentPack::listInstrumentPacks()),
|
Chris@31
|
45 m_resampler(0),
|
Chris@246
|
46 m_flattener(0),
|
Chris@110
|
47 m_cq(0),
|
Chris@162
|
48 m_hqMode(true),
|
Chris@166
|
49 m_fineTuning(false),
|
Chris@178
|
50 m_instrument(0),
|
Chris@178
|
51 m_colsPerSec(50)
|
Chris@31
|
52 {
|
Chris@31
|
53 }
|
Chris@31
|
54
|
Chris@31
|
55 Silvet::~Silvet()
|
Chris@31
|
56 {
|
Chris@31
|
57 delete m_resampler;
|
Chris@246
|
58 delete m_flattener;
|
Chris@31
|
59 delete m_cq;
|
Chris@41
|
60 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
61 delete m_postFilter[i];
|
Chris@41
|
62 }
|
Chris@31
|
63 }
|
Chris@31
|
64
|
Chris@31
|
65 string
|
Chris@31
|
66 Silvet::getIdentifier() const
|
Chris@31
|
67 {
|
Chris@31
|
68 return "silvet";
|
Chris@31
|
69 }
|
Chris@31
|
70
|
Chris@31
|
71 string
|
Chris@31
|
72 Silvet::getName() const
|
Chris@31
|
73 {
|
Chris@31
|
74 return "Silvet Note Transcription";
|
Chris@31
|
75 }
|
Chris@31
|
76
|
Chris@31
|
77 string
|
Chris@31
|
78 Silvet::getDescription() const
|
Chris@31
|
79 {
|
Chris@191
|
80 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
|
Chris@31
|
81 }
|
Chris@31
|
82
|
Chris@31
|
83 string
|
Chris@31
|
84 Silvet::getMaker() const
|
Chris@31
|
85 {
|
Chris@191
|
86 return "Queen Mary, University of London";
|
Chris@31
|
87 }
|
Chris@31
|
88
|
Chris@31
|
89 int
|
Chris@31
|
90 Silvet::getPluginVersion() const
|
Chris@31
|
91 {
|
Chris@309
|
92 return 3;
|
Chris@31
|
93 }
|
Chris@31
|
94
|
Chris@31
|
95 string
|
Chris@31
|
96 Silvet::getCopyright() const
|
Chris@31
|
97 {
|
Chris@191
|
98 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
|
Chris@31
|
99 }
|
Chris@31
|
100
|
Chris@31
|
101 Silvet::InputDomain
|
Chris@31
|
102 Silvet::getInputDomain() const
|
Chris@31
|
103 {
|
Chris@31
|
104 return TimeDomain;
|
Chris@31
|
105 }
|
Chris@31
|
106
|
Chris@31
|
107 size_t
|
Chris@31
|
108 Silvet::getPreferredBlockSize() const
|
Chris@31
|
109 {
|
Chris@31
|
110 return 0;
|
Chris@31
|
111 }
|
Chris@31
|
112
|
Chris@31
|
113 size_t
|
Chris@31
|
114 Silvet::getPreferredStepSize() const
|
Chris@31
|
115 {
|
Chris@31
|
116 return 0;
|
Chris@31
|
117 }
|
Chris@31
|
118
|
Chris@31
|
119 size_t
|
Chris@31
|
120 Silvet::getMinChannelCount() const
|
Chris@31
|
121 {
|
Chris@31
|
122 return 1;
|
Chris@31
|
123 }
|
Chris@31
|
124
|
Chris@31
|
125 size_t
|
Chris@31
|
126 Silvet::getMaxChannelCount() const
|
Chris@31
|
127 {
|
Chris@31
|
128 return 1;
|
Chris@31
|
129 }
|
Chris@31
|
130
|
Chris@31
|
131 Silvet::ParameterList
|
Chris@31
|
132 Silvet::getParameterDescriptors() const
|
Chris@31
|
133 {
|
Chris@31
|
134 ParameterList list;
|
Chris@110
|
135
|
Chris@110
|
136 ParameterDescriptor desc;
|
Chris@110
|
137 desc.identifier = "mode";
|
Chris@110
|
138 desc.name = "Processing mode";
|
Chris@110
|
139 desc.unit = "";
|
Chris@271
|
140 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode modifies a number of internal parameters in favour of speed. Intensive mode (the default) will almost always produce better results.";
|
Chris@110
|
141 desc.minValue = 0;
|
Chris@110
|
142 desc.maxValue = 1;
|
Chris@113
|
143 desc.defaultValue = 1;
|
Chris@110
|
144 desc.isQuantized = true;
|
Chris@110
|
145 desc.quantizeStep = 1;
|
Chris@166
|
146 desc.valueNames.push_back("Draft (faster)");
|
Chris@165
|
147 desc.valueNames.push_back("Intensive (higher quality)");
|
Chris@161
|
148 list.push_back(desc);
|
Chris@161
|
149
|
Chris@176
|
150 desc.identifier = "instrument";
|
Chris@176
|
151 desc.name = "Instrument";
|
Chris@161
|
152 desc.unit = "";
|
Chris@271
|
153 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
|
Chris@161
|
154 desc.minValue = 0;
|
Chris@162
|
155 desc.maxValue = m_instruments.size()-1;
|
Chris@162
|
156 desc.defaultValue = 0;
|
Chris@161
|
157 desc.isQuantized = true;
|
Chris@161
|
158 desc.quantizeStep = 1;
|
Chris@161
|
159 desc.valueNames.clear();
|
Chris@162
|
160 for (int i = 0; i < int(m_instruments.size()); ++i) {
|
Chris@162
|
161 desc.valueNames.push_back(m_instruments[i].name);
|
Chris@162
|
162 }
|
Chris@166
|
163 list.push_back(desc);
|
Chris@161
|
164
|
Chris@166
|
165 desc.identifier = "finetune";
|
Chris@166
|
166 desc.name = "Return fine pitch estimates";
|
Chris@166
|
167 desc.unit = "";
|
Chris@271
|
168 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
|
Chris@166
|
169 desc.minValue = 0;
|
Chris@166
|
170 desc.maxValue = 1;
|
Chris@166
|
171 desc.defaultValue = 0;
|
Chris@166
|
172 desc.isQuantized = true;
|
Chris@166
|
173 desc.quantizeStep = 1;
|
Chris@166
|
174 desc.valueNames.clear();
|
Chris@110
|
175 list.push_back(desc);
|
Chris@110
|
176
|
Chris@31
|
177 return list;
|
Chris@31
|
178 }
|
Chris@31
|
179
|
Chris@31
|
180 float
|
Chris@31
|
181 Silvet::getParameter(string identifier) const
|
Chris@31
|
182 {
|
Chris@110
|
183 if (identifier == "mode") {
|
Chris@110
|
184 return m_hqMode ? 1.f : 0.f;
|
Chris@166
|
185 } else if (identifier == "finetune") {
|
Chris@166
|
186 return m_fineTuning ? 1.f : 0.f;
|
Chris@176
|
187 } else if (identifier == "instrument") {
|
Chris@162
|
188 return m_instrument;
|
Chris@110
|
189 }
|
Chris@31
|
190 return 0;
|
Chris@31
|
191 }
|
Chris@31
|
192
|
Chris@31
|
193 void
|
Chris@31
|
194 Silvet::setParameter(string identifier, float value)
|
Chris@31
|
195 {
|
Chris@110
|
196 if (identifier == "mode") {
|
Chris@110
|
197 m_hqMode = (value > 0.5);
|
Chris@166
|
198 } else if (identifier == "finetune") {
|
Chris@166
|
199 m_fineTuning = (value > 0.5);
|
Chris@176
|
200 } else if (identifier == "instrument") {
|
Chris@162
|
201 m_instrument = lrintf(value);
|
Chris@110
|
202 }
|
Chris@31
|
203 }
|
Chris@31
|
204
|
Chris@31
|
205 Silvet::ProgramList
|
Chris@31
|
206 Silvet::getPrograms() const
|
Chris@31
|
207 {
|
Chris@31
|
208 ProgramList list;
|
Chris@31
|
209 return list;
|
Chris@31
|
210 }
|
Chris@31
|
211
|
Chris@31
|
212 string
|
Chris@31
|
213 Silvet::getCurrentProgram() const
|
Chris@31
|
214 {
|
Chris@31
|
215 return "";
|
Chris@31
|
216 }
|
Chris@31
|
217
|
Chris@31
|
218 void
|
Chris@31
|
219 Silvet::selectProgram(string name)
|
Chris@31
|
220 {
|
Chris@31
|
221 }
|
Chris@31
|
222
|
Chris@31
|
223 Silvet::OutputList
|
Chris@31
|
224 Silvet::getOutputDescriptors() const
|
Chris@31
|
225 {
|
Chris@31
|
226 OutputList list;
|
Chris@31
|
227
|
Chris@31
|
228 OutputDescriptor d;
|
Chris@51
|
229 d.identifier = "notes";
|
Chris@51
|
230 d.name = "Note transcription";
|
Chris@271
|
231 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
|
Chris@41
|
232 d.unit = "Hz";
|
Chris@31
|
233 d.hasFixedBinCount = true;
|
Chris@31
|
234 d.binCount = 2;
|
Chris@41
|
235 d.binNames.push_back("Frequency");
|
Chris@31
|
236 d.binNames.push_back("Velocity");
|
Chris@31
|
237 d.hasKnownExtents = false;
|
Chris@31
|
238 d.isQuantized = false;
|
Chris@31
|
239 d.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@246
|
240 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
|
Chris@31
|
241 d.hasDuration = true;
|
Chris@32
|
242 m_notesOutputNo = list.size();
|
Chris@32
|
243 list.push_back(d);
|
Chris@32
|
244
|
Chris@178
|
245 d.identifier = "timefreq";
|
Chris@178
|
246 d.name = "Time-frequency distribution";
|
Chris@271
|
247 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
|
Chris@178
|
248 d.unit = "";
|
Chris@178
|
249 d.hasFixedBinCount = true;
|
Chris@178
|
250 d.binCount = m_instruments[0].templateHeight;
|
Chris@178
|
251 d.binNames.clear();
|
Chris@178
|
252 if (m_cq) {
|
Chris@294
|
253 char name[50];
|
Chris@178
|
254 for (int i = 0; i < m_instruments[0].templateHeight; ++i) {
|
Chris@178
|
255 // We have a 600-bin (10 oct 60-bin CQ) of which the
|
Chris@178
|
256 // lowest-frequency 55 bins have been dropped, for a
|
Chris@178
|
257 // 545-bin template. The native CQ bins go high->low
|
Chris@178
|
258 // frequency though, so these are still the first 545 bins
|
Chris@178
|
259 // as reported by getBinFrequency, though in reverse order
|
Chris@178
|
260 float freq = m_cq->getBinFrequency
|
Chris@178
|
261 (m_instruments[0].templateHeight - i - 1);
|
Chris@178
|
262 sprintf(name, "%.1f Hz", freq);
|
Chris@178
|
263 d.binNames.push_back(name);
|
Chris@178
|
264 }
|
Chris@178
|
265 }
|
Chris@178
|
266 d.hasKnownExtents = false;
|
Chris@178
|
267 d.isQuantized = false;
|
Chris@178
|
268 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@178
|
269 d.sampleRate = m_colsPerSec;
|
Chris@178
|
270 d.hasDuration = false;
|
Chris@178
|
271 m_fcqOutputNo = list.size();
|
Chris@178
|
272 list.push_back(d);
|
Chris@178
|
273
|
Chris@294
|
274 d.identifier = "pitchactivation";
|
Chris@294
|
275 d.name = "Pitch activation distribution";
|
Chris@294
|
276 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
|
Chris@294
|
277 d.unit = "";
|
Chris@294
|
278 d.hasFixedBinCount = true;
|
Chris@294
|
279 d.binCount = m_instruments[0].templateNoteCount;
|
Chris@294
|
280 d.binNames.clear();
|
Chris@294
|
281 if (m_cq) {
|
Chris@294
|
282 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) {
|
Chris@294
|
283 d.binNames.push_back(noteName(i, 0, 1));
|
Chris@294
|
284 }
|
Chris@294
|
285 }
|
Chris@294
|
286 d.hasKnownExtents = false;
|
Chris@294
|
287 d.isQuantized = false;
|
Chris@294
|
288 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@294
|
289 d.sampleRate = m_colsPerSec;
|
Chris@294
|
290 d.hasDuration = false;
|
Chris@294
|
291 m_pitchOutputNo = list.size();
|
Chris@294
|
292 list.push_back(d);
|
Chris@294
|
293
|
Chris@309
|
294 d.identifier = "chroma";
|
Chris@309
|
295 d.name = "Pitch chroma distribution";
|
Chris@309
|
296 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
|
Chris@309
|
297 d.unit = "";
|
Chris@309
|
298 d.hasFixedBinCount = true;
|
Chris@309
|
299 d.binCount = 12;
|
Chris@309
|
300 d.binNames.clear();
|
Chris@309
|
301 if (m_cq) {
|
Chris@309
|
302 for (int i = 0; i < 12; ++i) {
|
Chris@309
|
303 d.binNames.push_back(chromaName(i));
|
Chris@309
|
304 }
|
Chris@309
|
305 }
|
Chris@309
|
306 d.hasKnownExtents = false;
|
Chris@309
|
307 d.isQuantized = false;
|
Chris@309
|
308 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@309
|
309 d.sampleRate = m_colsPerSec;
|
Chris@309
|
310 d.hasDuration = false;
|
Chris@309
|
311 m_chromaOutputNo = list.size();
|
Chris@309
|
312 list.push_back(d);
|
Chris@309
|
313
|
Chris@31
|
314 return list;
|
Chris@31
|
315 }
|
Chris@31
|
316
|
Chris@38
|
317 std::string
|
Chris@309
|
318 Silvet::chromaName(int pitch) const
|
Chris@38
|
319 {
|
Chris@38
|
320 static const char *names[] = {
|
Chris@38
|
321 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
|
Chris@38
|
322 };
|
Chris@38
|
323
|
Chris@309
|
324 return names[pitch];
|
Chris@309
|
325 }
|
Chris@309
|
326
|
Chris@309
|
327 std::string
|
Chris@309
|
328 Silvet::noteName(int note, int shift, int shiftCount) const
|
Chris@309
|
329 {
|
Chris@309
|
330 string n = chromaName(note % 12);
|
Chris@38
|
331
|
Chris@175
|
332 int oct = (note + 9) / 12;
|
Chris@38
|
333
|
Chris@175
|
334 char buf[30];
|
Chris@175
|
335
|
Chris@175
|
336 float pshift = 0.f;
|
Chris@175
|
337 if (shiftCount > 1) {
|
Chris@175
|
338 // see noteFrequency below
|
Chris@175
|
339 pshift =
|
Chris@175
|
340 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
341 }
|
Chris@175
|
342
|
Chris@175
|
343 if (pshift > 0.f) {
|
Chris@309
|
344 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
|
Chris@175
|
345 } else if (pshift < 0.f) {
|
Chris@309
|
346 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
|
Chris@175
|
347 } else {
|
Chris@309
|
348 sprintf(buf, "%s%d", n.c_str(), oct);
|
Chris@175
|
349 }
|
Chris@38
|
350
|
Chris@38
|
351 return buf;
|
Chris@38
|
352 }
|
Chris@38
|
353
|
Chris@41
|
354 float
|
Chris@168
|
355 Silvet::noteFrequency(int note, int shift, int shiftCount) const
|
Chris@41
|
356 {
|
Chris@169
|
357 // Convert shift number to a pitch shift. The given shift number
|
Chris@169
|
358 // is an offset into the template array, which starts with some
|
Chris@169
|
359 // zeros, followed by the template, then some trailing zeros.
|
Chris@169
|
360 //
|
Chris@169
|
361 // Example: if we have templateMaxShift == 2 and thus shiftCount
|
Chris@169
|
362 // == 5, then the number will be in the range 0-4 and the template
|
Chris@169
|
363 // will have 2 zeros at either end. Thus number 2 represents the
|
Chris@169
|
364 // template "as recorded", for a pitch shift of 0; smaller indices
|
Chris@169
|
365 // represent moving the template *up* in pitch (by introducing
|
Chris@169
|
366 // zeros at the start, which is the low-frequency end), for a
|
Chris@169
|
367 // positive pitch shift; and higher values represent moving it
|
Chris@169
|
368 // down in pitch, for a negative pitch shift.
|
Chris@169
|
369
|
Chris@175
|
370 float pshift = 0.f;
|
Chris@175
|
371 if (shiftCount > 1) {
|
Chris@175
|
372 pshift =
|
Chris@175
|
373 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
374 }
|
Chris@169
|
375
|
Chris@169
|
376 return float(27.5 * pow(2.0, (note + pshift) / 12.0));
|
Chris@41
|
377 }
|
Chris@41
|
378
|
Chris@31
|
379 bool
|
Chris@31
|
380 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@31
|
381 {
|
Chris@272
|
382 if (m_inputSampleRate < minInputSampleRate ||
|
Chris@272
|
383 m_inputSampleRate > maxInputSampleRate) {
|
Chris@272
|
384 cerr << "Silvet::initialise: Unsupported input sample rate "
|
Chris@272
|
385 << m_inputSampleRate << " (supported min " << minInputSampleRate
|
Chris@272
|
386 << ", max " << maxInputSampleRate << ")" << endl;
|
Chris@272
|
387 return false;
|
Chris@272
|
388 }
|
Chris@272
|
389
|
Chris@31
|
390 if (channels < getMinChannelCount() ||
|
Chris@272
|
391 channels > getMaxChannelCount()) {
|
Chris@272
|
392 cerr << "Silvet::initialise: Unsupported channel count " << channels
|
Chris@272
|
393 << " (supported min " << getMinChannelCount() << ", max "
|
Chris@272
|
394 << getMaxChannelCount() << ")" << endl;
|
Chris@272
|
395 return false;
|
Chris@272
|
396 }
|
Chris@31
|
397
|
Chris@31
|
398 if (stepSize != blockSize) {
|
Chris@31
|
399 cerr << "Silvet::initialise: Step size must be the same as block size ("
|
Chris@31
|
400 << stepSize << " != " << blockSize << ")" << endl;
|
Chris@31
|
401 return false;
|
Chris@31
|
402 }
|
Chris@31
|
403
|
Chris@31
|
404 m_blockSize = blockSize;
|
Chris@31
|
405
|
Chris@31
|
406 reset();
|
Chris@31
|
407
|
Chris@31
|
408 return true;
|
Chris@31
|
409 }
|
Chris@31
|
410
|
Chris@31
|
411 void
|
Chris@31
|
412 Silvet::reset()
|
Chris@31
|
413 {
|
Chris@31
|
414 delete m_resampler;
|
Chris@246
|
415 delete m_flattener;
|
Chris@31
|
416 delete m_cq;
|
Chris@31
|
417
|
Chris@31
|
418 if (m_inputSampleRate != processingSampleRate) {
|
Chris@31
|
419 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
|
Chris@31
|
420 } else {
|
Chris@31
|
421 m_resampler = 0;
|
Chris@31
|
422 }
|
Chris@31
|
423
|
Chris@246
|
424 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
|
Chris@246
|
425 m_flattener->reset();
|
Chris@246
|
426
|
Chris@173
|
427 double minFreq = 27.5;
|
Chris@173
|
428
|
Chris@173
|
429 if (!m_hqMode) {
|
Chris@173
|
430 // We don't actually return any notes from the bottom octave,
|
Chris@173
|
431 // so we can just pad with zeros
|
Chris@173
|
432 minFreq *= 2;
|
Chris@173
|
433 }
|
Chris@173
|
434
|
Chris@154
|
435 CQParameters params(processingSampleRate,
|
Chris@173
|
436 minFreq,
|
Chris@154
|
437 processingSampleRate / 3,
|
Chris@154
|
438 processingBPO);
|
Chris@154
|
439
|
Chris@155
|
440 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
|
Chris@155
|
441 // drops the FFT size to 512 from 1024 and alters
|
Chris@155
|
442 // some other processing parameters, making
|
Chris@155
|
443 // everything much, much slower. Could be a flaw
|
Chris@155
|
444 // in the CQ parameter calculations, must check
|
Chris@154
|
445 params.atomHopFactor = 0.3;
|
Chris@154
|
446 params.threshold = 0.0005;
|
Chris@172
|
447 params.window = CQParameters::Hann;
|
Chris@154
|
448
|
Chris@154
|
449 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
|
Chris@31
|
450
|
Chris@165
|
451 m_colsPerSec = m_hqMode ? 50 : 25;
|
Chris@165
|
452
|
Chris@41
|
453 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
454 delete m_postFilter[i];
|
Chris@41
|
455 }
|
Chris@41
|
456 m_postFilter.clear();
|
Chris@176
|
457 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) {
|
Chris@41
|
458 m_postFilter.push_back(new MedianFilter<double>(3));
|
Chris@41
|
459 }
|
Chris@41
|
460 m_pianoRoll.clear();
|
Chris@246
|
461 m_inputGains.clear();
|
Chris@32
|
462 m_columnCount = 0;
|
Chris@272
|
463 m_resampledCount = 0;
|
Chris@40
|
464 m_startTime = RealTime::zeroTime;
|
Chris@31
|
465 }
|
Chris@31
|
466
|
Chris@31
|
467 Silvet::FeatureSet
|
Chris@31
|
468 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
Chris@31
|
469 {
|
Chris@40
|
470 if (m_columnCount == 0) {
|
Chris@40
|
471 m_startTime = timestamp;
|
Chris@40
|
472 }
|
Chris@246
|
473
|
Chris@246
|
474 vector<float> flattened(m_blockSize);
|
Chris@246
|
475 float gain = 1.f;
|
Chris@246
|
476 m_flattener->connectInputPort
|
Chris@246
|
477 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
|
Chris@246
|
478 m_flattener->connectOutputPort
|
Chris@246
|
479 (FlattenDynamics::AudioOutputPort, &flattened[0]);
|
Chris@246
|
480 m_flattener->connectOutputPort
|
Chris@246
|
481 (FlattenDynamics::GainOutputPort, &gain);
|
Chris@246
|
482 m_flattener->process(m_blockSize);
|
Chris@246
|
483
|
Chris@252
|
484 m_inputGains[timestamp] = gain;
|
Chris@40
|
485
|
Chris@31
|
486 vector<double> data;
|
Chris@40
|
487 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@246
|
488 double d = flattened[i];
|
Chris@235
|
489 data.push_back(d);
|
Chris@40
|
490 }
|
Chris@31
|
491
|
Chris@31
|
492 if (m_resampler) {
|
Chris@272
|
493
|
Chris@31
|
494 data = m_resampler->process(data.data(), data.size());
|
Chris@272
|
495
|
Chris@272
|
496 int hadCount = m_resampledCount;
|
Chris@272
|
497 m_resampledCount += data.size();
|
Chris@272
|
498
|
Chris@272
|
499 int resamplerLatency = m_resampler->getLatency();
|
Chris@272
|
500
|
Chris@272
|
501 if (hadCount < resamplerLatency) {
|
Chris@272
|
502 int stillToDrop = resamplerLatency - hadCount;
|
Chris@272
|
503 if (stillToDrop >= int(data.size())) {
|
Chris@272
|
504 return FeatureSet();
|
Chris@272
|
505 } else {
|
Chris@272
|
506 data = vector<double>(data.begin() + stillToDrop, data.end());
|
Chris@272
|
507 }
|
Chris@272
|
508 }
|
Chris@31
|
509 }
|
Chris@272
|
510
|
Chris@32
|
511 Grid cqout = m_cq->process(data);
|
Chris@51
|
512 FeatureSet fs = transcribe(cqout);
|
Chris@51
|
513 return fs;
|
Chris@34
|
514 }
|
Chris@34
|
515
|
Chris@34
|
516 Silvet::FeatureSet
|
Chris@34
|
517 Silvet::getRemainingFeatures()
|
Chris@34
|
518 {
|
Chris@145
|
519 Grid cqout = m_cq->getRemainingOutput();
|
Chris@51
|
520 FeatureSet fs = transcribe(cqout);
|
Chris@51
|
521 return fs;
|
Chris@34
|
522 }
|
Chris@34
|
523
|
Chris@34
|
524 Silvet::FeatureSet
|
Chris@34
|
525 Silvet::transcribe(const Grid &cqout)
|
Chris@34
|
526 {
|
Chris@32
|
527 Grid filtered = preProcess(cqout);
|
Chris@31
|
528
|
Chris@32
|
529 FeatureSet fs;
|
Chris@32
|
530
|
Chris@104
|
531 if (filtered.empty()) return fs;
|
Chris@170
|
532
|
Chris@170
|
533 const InstrumentPack &pack = m_instruments[m_instrument];
|
Chris@104
|
534
|
Chris@178
|
535 for (int i = 0; i < (int)filtered.size(); ++i) {
|
Chris@178
|
536 Feature f;
|
Chris@178
|
537 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@178
|
538 f.values.push_back(float(filtered[i][j]));
|
Chris@178
|
539 }
|
Chris@178
|
540 fs[m_fcqOutputNo].push_back(f);
|
Chris@178
|
541 }
|
Chris@178
|
542
|
Chris@34
|
543 int width = filtered.size();
|
Chris@34
|
544
|
Chris@311
|
545 Grid localPitches(width);
|
Chris@170
|
546
|
Chris@170
|
547 bool wantShifts = m_hqMode && m_fineTuning;
|
Chris@170
|
548 int shiftCount = 1;
|
Chris@170
|
549 if (wantShifts) {
|
Chris@170
|
550 shiftCount = pack.templateMaxShift * 2 + 1;
|
Chris@170
|
551 }
|
Chris@170
|
552
|
Chris@170
|
553 vector<vector<int> > localBestShifts;
|
Chris@170
|
554 if (wantShifts) {
|
Chris@311
|
555 localBestShifts = vector<vector<int> >(width);
|
Chris@170
|
556 }
|
Chris@170
|
557
|
Chris@123
|
558 for (int i = 0; i < width; ++i) {
|
Chris@311
|
559 auto out = applyEM(pack, filtered.at(i), wantShifts);
|
Chris@311
|
560 localPitches[i] = out.first;
|
Chris@311
|
561 if (wantShifts) localBestShifts[i] = out.second;
|
Chris@123
|
562 }
|
Chris@166
|
563
|
Chris@166
|
564 for (int i = 0; i < width; ++i) {
|
Chris@37
|
565
|
Chris@309
|
566 // This returns a filtered column, and pushes the
|
Chris@309
|
567 // up-to-max-polyphony activation column to m_pianoRoll
|
Chris@294
|
568 vector<double> filtered = postProcess
|
Chris@294
|
569 (localPitches[i], localBestShifts[i], wantShifts);
|
Chris@294
|
570
|
Chris@309
|
571 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
|
Chris@309
|
572 float inputGain = getInputGainAt(timestamp);
|
Chris@309
|
573
|
Chris@294
|
574 Feature f;
|
Chris@294
|
575 for (int j = 0; j < (int)filtered.size(); ++j) {
|
Chris@309
|
576 float v = filtered[j];
|
Chris@294
|
577 if (v < pack.levelThreshold) v = 0.f;
|
Chris@309
|
578 f.values.push_back(v / inputGain);
|
Chris@294
|
579 }
|
Chris@294
|
580 fs[m_pitchOutputNo].push_back(f);
|
Chris@309
|
581
|
Chris@309
|
582 f.values.clear();
|
Chris@309
|
583 f.values.resize(12);
|
Chris@309
|
584 for (int j = 0; j < (int)filtered.size(); ++j) {
|
Chris@309
|
585 f.values[j % 12] += filtered[j] / inputGain;
|
Chris@309
|
586 }
|
Chris@309
|
587 fs[m_chromaOutputNo].push_back(f);
|
Chris@166
|
588
|
Chris@168
|
589 FeatureList noteFeatures = noteTrack(shiftCount);
|
Chris@38
|
590
|
Chris@123
|
591 for (FeatureList::const_iterator fi = noteFeatures.begin();
|
Chris@123
|
592 fi != noteFeatures.end(); ++fi) {
|
Chris@123
|
593 fs[m_notesOutputNo].push_back(*fi);
|
Chris@40
|
594 }
|
Chris@34
|
595 }
|
Chris@34
|
596
|
Chris@32
|
597 return fs;
|
Chris@31
|
598 }
|
Chris@31
|
599
|
Chris@311
|
600 pair<vector<double>, vector<int> >
|
Chris@311
|
601 Silvet::applyEM(const InstrumentPack &pack,
|
Chris@311
|
602 const vector<double> &column,
|
Chris@311
|
603 bool wantShifts)
|
Chris@311
|
604 {
|
Chris@311
|
605 double columnThreshold = 1e-5;
|
Chris@311
|
606
|
Chris@311
|
607 vector<double> pitches(pack.templateNoteCount, 0.0);
|
Chris@311
|
608 vector<int> bestShifts;
|
Chris@311
|
609
|
Chris@311
|
610 double sum = 0.0;
|
Chris@311
|
611 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@311
|
612 sum += column.at(j);
|
Chris@311
|
613 }
|
Chris@311
|
614 if (sum < columnThreshold) return { pitches, bestShifts };
|
Chris@311
|
615
|
Chris@311
|
616 EM em(&pack, m_hqMode);
|
Chris@311
|
617
|
Chris@311
|
618 em.setPitchSparsity(pack.pitchSparsity);
|
Chris@311
|
619 em.setSourceSparsity(pack.sourceSparsity);
|
Chris@311
|
620
|
Chris@311
|
621 int iterations = m_hqMode ? 20 : 10;
|
Chris@311
|
622
|
Chris@311
|
623 for (int j = 0; j < iterations; ++j) {
|
Chris@311
|
624 em.iterate(column.data());
|
Chris@311
|
625 }
|
Chris@311
|
626
|
Chris@311
|
627 const float *pitchDist = em.getPitchDistribution();
|
Chris@311
|
628 const float *const *shiftDist = em.getShifts();
|
Chris@311
|
629
|
Chris@311
|
630 int shiftCount = 1;
|
Chris@311
|
631 if (wantShifts) {
|
Chris@311
|
632 shiftCount = pack.templateMaxShift * 2 + 1;
|
Chris@311
|
633 }
|
Chris@311
|
634
|
Chris@311
|
635 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@311
|
636
|
Chris@311
|
637 pitches[j] = pitchDist[j] * sum;
|
Chris@311
|
638
|
Chris@311
|
639 int bestShift = 0;
|
Chris@311
|
640 float bestShiftValue = 0.0;
|
Chris@311
|
641 if (wantShifts) {
|
Chris@311
|
642 for (int k = 0; k < shiftCount; ++k) {
|
Chris@311
|
643 float value = shiftDist[k][j];
|
Chris@311
|
644 if (k == 0 || value > bestShiftValue) {
|
Chris@311
|
645 bestShiftValue = value;
|
Chris@311
|
646 bestShift = k;
|
Chris@311
|
647 }
|
Chris@311
|
648 }
|
Chris@311
|
649 bestShifts.push_back(bestShift);
|
Chris@311
|
650 }
|
Chris@311
|
651 }
|
Chris@311
|
652
|
Chris@311
|
653 return { pitches, bestShifts };
|
Chris@311
|
654 }
|
Chris@311
|
655
|
Chris@32
|
656 Silvet::Grid
|
Chris@32
|
657 Silvet::preProcess(const Grid &in)
|
Chris@32
|
658 {
|
Chris@32
|
659 int width = in.size();
|
Chris@32
|
660
|
Chris@165
|
661 int spacing = processingSampleRate / m_colsPerSec;
|
Chris@32
|
662
|
Chris@165
|
663 // need to be careful that col spacing is an integer number of samples!
|
Chris@165
|
664 assert(spacing * m_colsPerSec == processingSampleRate);
|
Chris@32
|
665
|
Chris@32
|
666 Grid out;
|
Chris@32
|
667
|
Chris@58
|
668 // We count the CQ latency in terms of processing hops, but
|
Chris@58
|
669 // actually it probably isn't an exact number of hops so this
|
Chris@58
|
670 // isn't quite accurate. But the small constant offset is
|
Chris@165
|
671 // practically irrelevant compared to the jitter from the frame
|
Chris@165
|
672 // size we reduce to in a moment
|
Chris@33
|
673 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
|
Chris@33
|
674
|
Chris@176
|
675 const InstrumentPack &pack = m_instruments[m_instrument];
|
Chris@176
|
676
|
Chris@32
|
677 for (int i = 0; i < width; ++i) {
|
Chris@32
|
678
|
Chris@33
|
679 if (m_columnCount < latentColumns) {
|
Chris@33
|
680 ++m_columnCount;
|
Chris@33
|
681 continue;
|
Chris@33
|
682 }
|
Chris@33
|
683
|
Chris@32
|
684 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
|
Chris@32
|
685 int sampleNo = m_columnCount * m_cq->getColumnHop();
|
Chris@32
|
686
|
Chris@32
|
687 bool select = (sampleNo / spacing != prevSampleNo / spacing);
|
Chris@32
|
688
|
Chris@32
|
689 if (select) {
|
Chris@32
|
690 vector<double> inCol = in[i];
|
Chris@176
|
691 vector<double> outCol(pack.templateHeight);
|
Chris@32
|
692
|
Chris@178
|
693 // In HQ mode, the CQ returns 600 bins and we ignore the
|
Chris@178
|
694 // lowest 55 of them.
|
Chris@178
|
695 //
|
Chris@178
|
696 // In draft mode the CQ is an octave shorter, returning
|
Chris@178
|
697 // 540 bins, so we instead pad them with an additional 5
|
Chris@178
|
698 // zeros.
|
Chris@178
|
699 //
|
Chris@178
|
700 // We also need to reverse the column as we go, since the
|
Chris@178
|
701 // raw CQ has the high frequencies first and we need it
|
Chris@178
|
702 // the other way around.
|
Chris@32
|
703
|
Chris@178
|
704 if (m_hqMode) {
|
Chris@178
|
705 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@178
|
706 int ix = inCol.size() - j - 55;
|
Chris@178
|
707 outCol[j] = inCol[ix];
|
Chris@178
|
708 }
|
Chris@178
|
709 } else {
|
Chris@178
|
710 for (int j = 0; j < 5; ++j) {
|
Chris@178
|
711 outCol[j] = 0.0;
|
Chris@178
|
712 }
|
Chris@178
|
713 for (int j = 5; j < pack.templateHeight; ++j) {
|
Chris@178
|
714 int ix = inCol.size() - j + 4;
|
Chris@178
|
715 outCol[j] = inCol[ix];
|
Chris@178
|
716 }
|
Chris@46
|
717 }
|
Chris@32
|
718
|
Chris@46
|
719 vector<double> noiseLevel1 =
|
Chris@46
|
720 MedianFilter<double>::filter(40, outCol);
|
Chris@176
|
721 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
722 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
|
Chris@46
|
723 }
|
Chris@32
|
724
|
Chris@46
|
725 vector<double> noiseLevel2 =
|
Chris@46
|
726 MedianFilter<double>::filter(40, noiseLevel1);
|
Chris@176
|
727 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
728 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
|
Chris@32
|
729 }
|
Chris@32
|
730
|
Chris@165
|
731 out.push_back(outCol);
|
Chris@32
|
732 }
|
Chris@32
|
733
|
Chris@32
|
734 ++m_columnCount;
|
Chris@32
|
735 }
|
Chris@32
|
736
|
Chris@32
|
737 return out;
|
Chris@32
|
738 }
|
Chris@32
|
739
|
Chris@294
|
740 vector<double>
|
Chris@170
|
741 Silvet::postProcess(const vector<double> &pitches,
|
Chris@170
|
742 const vector<int> &bestShifts,
|
Chris@170
|
743 bool wantShifts)
|
Chris@166
|
744 {
|
Chris@176
|
745 const InstrumentPack &pack = m_instruments[m_instrument];
|
Chris@176
|
746
|
Chris@41
|
747 vector<double> filtered;
|
Chris@41
|
748
|
Chris@176
|
749 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@170
|
750 m_postFilter[j]->push(pitches[j]);
|
Chris@41
|
751 filtered.push_back(m_postFilter[j]->get());
|
Chris@41
|
752 }
|
Chris@41
|
753
|
Chris@41
|
754 // Threshold for level and reduce number of candidate pitches
|
Chris@41
|
755
|
Chris@41
|
756 typedef std::multimap<double, int> ValueIndexMap;
|
Chris@41
|
757
|
Chris@41
|
758 ValueIndexMap strengths;
|
Chris@166
|
759
|
Chris@176
|
760 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@166
|
761 double strength = filtered[j];
|
Chris@183
|
762 if (strength < pack.levelThreshold) continue;
|
Chris@168
|
763 strengths.insert(ValueIndexMap::value_type(strength, j));
|
Chris@168
|
764 }
|
Chris@166
|
765
|
Chris@168
|
766 ValueIndexMap::const_iterator si = strengths.end();
|
Chris@167
|
767
|
Chris@168
|
768 map<int, double> active;
|
Chris@168
|
769 map<int, int> activeShifts;
|
Chris@168
|
770
|
Chris@183
|
771 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
|
Chris@168
|
772
|
Chris@168
|
773 --si;
|
Chris@168
|
774
|
Chris@168
|
775 double strength = si->first;
|
Chris@168
|
776 int j = si->second;
|
Chris@168
|
777
|
Chris@168
|
778 active[j] = strength;
|
Chris@168
|
779
|
Chris@170
|
780 if (wantShifts) {
|
Chris@170
|
781 activeShifts[j] = bestShifts[j];
|
Chris@167
|
782 }
|
Chris@41
|
783 }
|
Chris@41
|
784
|
Chris@168
|
785 m_pianoRoll.push_back(active);
|
Chris@170
|
786
|
Chris@170
|
787 if (wantShifts) {
|
Chris@168
|
788 m_pianoRollShifts.push_back(activeShifts);
|
Chris@41
|
789 }
|
Chris@294
|
790
|
Chris@294
|
791 return filtered;
|
Chris@166
|
792 }
|
Chris@166
|
793
|
Chris@166
|
794 Vamp::Plugin::FeatureList
|
Chris@168
|
795 Silvet::noteTrack(int shiftCount)
|
Chris@166
|
796 {
|
Chris@41
|
797 // Minimum duration pruning, and conversion to notes. We can only
|
Chris@41
|
798 // report notes that have just ended (i.e. that are absent in the
|
Chris@168
|
799 // latest active set but present in the prior set in the piano
|
Chris@41
|
800 // roll) -- any notes that ended earlier will have been reported
|
Chris@41
|
801 // already, and if they haven't ended, we don't know their
|
Chris@41
|
802 // duration.
|
Chris@41
|
803
|
Chris@168
|
804 int width = m_pianoRoll.size() - 1;
|
Chris@168
|
805
|
Chris@168
|
806 const map<int, double> &active = m_pianoRoll[width];
|
Chris@41
|
807
|
Chris@165
|
808 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@165
|
809
|
Chris@165
|
810 // only keep notes >= 100ms or thereabouts
|
Chris@165
|
811 int durationThreshold = floor(0.1 / columnDuration); // columns
|
Chris@165
|
812 if (durationThreshold < 1) durationThreshold = 1;
|
Chris@41
|
813
|
Chris@41
|
814 FeatureList noteFeatures;
|
Chris@41
|
815
|
Chris@41
|
816 if (width < durationThreshold + 1) {
|
Chris@41
|
817 return noteFeatures;
|
Chris@41
|
818 }
|
Chris@41
|
819
|
Chris@150
|
820 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
|
Chris@150
|
821
|
Chris@55
|
822 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
|
Chris@41
|
823 ni != m_pianoRoll[width-1].end(); ++ni) {
|
Chris@41
|
824
|
Chris@55
|
825 int note = ni->first;
|
Chris@41
|
826
|
Chris@41
|
827 if (active.find(note) != active.end()) {
|
Chris@41
|
828 // the note is still playing
|
Chris@41
|
829 continue;
|
Chris@41
|
830 }
|
Chris@41
|
831
|
Chris@41
|
832 // the note was playing but just ended
|
Chris@41
|
833 int end = width;
|
Chris@41
|
834 int start = end-1;
|
Chris@41
|
835
|
Chris@41
|
836 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
|
Chris@41
|
837 --start;
|
Chris@41
|
838 }
|
Chris@41
|
839 ++start;
|
Chris@41
|
840
|
Chris@169
|
841 if ((end - start) < durationThreshold) {
|
Chris@41
|
842 continue;
|
Chris@41
|
843 }
|
Chris@41
|
844
|
Chris@169
|
845 emitNote(start, end, note, shiftCount, noteFeatures);
|
Chris@41
|
846 }
|
Chris@41
|
847
|
Chris@62
|
848 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
|
Chris@41
|
849
|
Chris@41
|
850 return noteFeatures;
|
Chris@41
|
851 }
|
Chris@41
|
852
|
Chris@169
|
853 void
|
Chris@169
|
854 Silvet::emitNote(int start, int end, int note, int shiftCount,
|
Chris@169
|
855 FeatureList ¬eFeatures)
|
Chris@169
|
856 {
|
Chris@169
|
857 int partStart = start;
|
Chris@169
|
858 int partShift = 0;
|
Chris@169
|
859 int partVelocity = 0;
|
Chris@169
|
860
|
Chris@252
|
861 int partThreshold = floor(0.05 * m_colsPerSec);
|
Chris@169
|
862
|
Chris@169
|
863 for (int i = start; i != end; ++i) {
|
Chris@169
|
864
|
Chris@169
|
865 double strength = m_pianoRoll[i][note];
|
Chris@169
|
866
|
Chris@169
|
867 int shift = 0;
|
Chris@169
|
868
|
Chris@169
|
869 if (shiftCount > 1) {
|
Chris@169
|
870
|
Chris@169
|
871 shift = m_pianoRollShifts[i][note];
|
Chris@169
|
872
|
Chris@169
|
873 if (i == partStart) {
|
Chris@169
|
874 partShift = shift;
|
Chris@169
|
875 }
|
Chris@169
|
876
|
Chris@169
|
877 if (i > partStart + partThreshold && shift != partShift) {
|
Chris@169
|
878
|
Chris@169
|
879 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
|
Chris@169
|
880
|
Chris@169
|
881 // pitch has changed, emit an intermediate note
|
Chris@252
|
882 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
883 i,
|
Chris@252
|
884 note,
|
Chris@252
|
885 partShift,
|
Chris@252
|
886 shiftCount,
|
Chris@252
|
887 partVelocity));
|
Chris@169
|
888 partStart = i;
|
Chris@169
|
889 partShift = shift;
|
Chris@169
|
890 partVelocity = 0;
|
Chris@169
|
891 }
|
Chris@169
|
892 }
|
Chris@169
|
893
|
Chris@246
|
894 int v = round(strength * 2);
|
Chris@169
|
895 if (v > partVelocity) {
|
Chris@169
|
896 partVelocity = v;
|
Chris@169
|
897 }
|
Chris@169
|
898 }
|
Chris@169
|
899
|
Chris@169
|
900 if (end >= partStart + partThreshold) {
|
Chris@252
|
901 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
902 end,
|
Chris@252
|
903 note,
|
Chris@252
|
904 partShift,
|
Chris@252
|
905 shiftCount,
|
Chris@252
|
906 partVelocity));
|
Chris@169
|
907 }
|
Chris@169
|
908 }
|
Chris@252
|
909
|
Chris@309
|
910 RealTime
|
Chris@309
|
911 Silvet::getColumnTimestamp(int column)
|
Chris@309
|
912 {
|
Chris@309
|
913 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@309
|
914 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
|
Chris@309
|
915
|
Chris@309
|
916 return m_startTime + RealTime::fromSeconds
|
Chris@309
|
917 (columnDuration * (column - postFilterLatency) + 0.02);
|
Chris@309
|
918 }
|
Chris@309
|
919
|
Chris@252
|
920 Silvet::Feature
|
Chris@252
|
921 Silvet::makeNoteFeature(int start,
|
Chris@252
|
922 int end,
|
Chris@252
|
923 int note,
|
Chris@252
|
924 int shift,
|
Chris@252
|
925 int shiftCount,
|
Chris@252
|
926 int velocity)
|
Chris@252
|
927 {
|
Chris@252
|
928 Feature f;
|
Chris@252
|
929
|
Chris@252
|
930 f.hasTimestamp = true;
|
Chris@309
|
931 f.timestamp = getColumnTimestamp(start);
|
Chris@252
|
932
|
Chris@252
|
933 f.hasDuration = true;
|
Chris@309
|
934 f.duration = getColumnTimestamp(end) - f.timestamp;
|
Chris@252
|
935
|
Chris@252
|
936 f.values.clear();
|
Chris@252
|
937
|
Chris@252
|
938 f.values.push_back
|
Chris@252
|
939 (noteFrequency(note, shift, shiftCount));
|
Chris@252
|
940
|
Chris@252
|
941 float inputGain = getInputGainAt(f.timestamp);
|
Chris@252
|
942 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
|
Chris@252
|
943 velocity = round(velocity / inputGain);
|
Chris@252
|
944 if (velocity > 127) velocity = 127;
|
Chris@252
|
945 if (velocity < 1) velocity = 1;
|
Chris@252
|
946 f.values.push_back(velocity);
|
Chris@252
|
947
|
Chris@252
|
948 f.label = noteName(note, shift, shiftCount);
|
Chris@252
|
949
|
Chris@252
|
950 return f;
|
Chris@252
|
951 }
|
Chris@252
|
952
|
Chris@252
|
953 float
|
Chris@252
|
954 Silvet::getInputGainAt(RealTime t)
|
Chris@252
|
955 {
|
Chris@252
|
956 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
|
Chris@252
|
957
|
Chris@252
|
958 if (i == m_inputGains.end()) {
|
Chris@252
|
959 if (i != m_inputGains.begin()) {
|
Chris@252
|
960 --i;
|
Chris@252
|
961 } else {
|
Chris@252
|
962 return 1.f; // no data
|
Chris@252
|
963 }
|
Chris@252
|
964 }
|
Chris@252
|
965
|
Chris@252
|
966 // cerr << "gain at time " << t << " = " << i->second << endl;
|
Chris@252
|
967
|
Chris@252
|
968 return i->second;
|
Chris@252
|
969 }
|
Chris@252
|
970
|