Chris@31
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@31
|
2
|
Chris@31
|
3 /*
|
Chris@31
|
4 Silvet
|
Chris@31
|
5
|
Chris@31
|
6 A Vamp plugin for note transcription.
|
Chris@31
|
7 Centre for Digital Music, Queen Mary University of London.
|
Chris@31
|
8
|
Chris@31
|
9 This program is free software; you can redistribute it and/or
|
Chris@31
|
10 modify it under the terms of the GNU General Public License as
|
Chris@31
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@31
|
12 License, or (at your option) any later version. See the file
|
Chris@31
|
13 COPYING included with this distribution for more information.
|
Chris@31
|
14 */
|
Chris@31
|
15
|
Chris@31
|
16 #include "Silvet.h"
|
Chris@34
|
17 #include "EM.h"
|
Chris@31
|
18
|
Chris@152
|
19 #include <cq/CQSpectrogram.h>
|
Chris@31
|
20
|
Chris@152
|
21 #include "MedianFilter.h"
|
Chris@152
|
22 #include "constant-q-cpp/src/dsp/Resampler.h"
|
Chris@246
|
23 #include "flattendynamics-ladspa.h"
|
Chris@31
|
24
|
Chris@31
|
25 #include <vector>
|
Chris@31
|
26
|
Chris@32
|
27 #include <cstdio>
|
Chris@32
|
28
|
Chris@31
|
29 using std::vector;
|
Chris@48
|
30 using std::cout;
|
Chris@31
|
31 using std::cerr;
|
Chris@31
|
32 using std::endl;
|
Chris@40
|
33 using Vamp::RealTime;
|
Chris@31
|
34
|
Chris@31
|
35 static int processingSampleRate = 44100;
|
Chris@31
|
36 static int processingBPO = 60;
|
Chris@170
|
37
|
Chris@272
|
38 static int minInputSampleRate = 100;
|
Chris@272
|
39 static int maxInputSampleRate = 192000;
|
Chris@272
|
40
|
Chris@31
|
41 Silvet::Silvet(float inputSampleRate) :
|
Chris@31
|
42 Plugin(inputSampleRate),
|
Chris@161
|
43 m_instruments(InstrumentPack::listInstrumentPacks()),
|
Chris@31
|
44 m_resampler(0),
|
Chris@246
|
45 m_flattener(0),
|
Chris@110
|
46 m_cq(0),
|
Chris@162
|
47 m_hqMode(true),
|
Chris@166
|
48 m_fineTuning(false),
|
Chris@178
|
49 m_instrument(0),
|
Chris@178
|
50 m_colsPerSec(50)
|
Chris@31
|
51 {
|
Chris@31
|
52 }
|
Chris@31
|
53
|
Chris@31
|
54 Silvet::~Silvet()
|
Chris@31
|
55 {
|
Chris@31
|
56 delete m_resampler;
|
Chris@246
|
57 delete m_flattener;
|
Chris@31
|
58 delete m_cq;
|
Chris@41
|
59 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
60 delete m_postFilter[i];
|
Chris@41
|
61 }
|
Chris@31
|
62 }
|
Chris@31
|
63
|
Chris@31
|
64 string
|
Chris@31
|
65 Silvet::getIdentifier() const
|
Chris@31
|
66 {
|
Chris@31
|
67 return "silvet";
|
Chris@31
|
68 }
|
Chris@31
|
69
|
Chris@31
|
70 string
|
Chris@31
|
71 Silvet::getName() const
|
Chris@31
|
72 {
|
Chris@31
|
73 return "Silvet Note Transcription";
|
Chris@31
|
74 }
|
Chris@31
|
75
|
Chris@31
|
76 string
|
Chris@31
|
77 Silvet::getDescription() const
|
Chris@31
|
78 {
|
Chris@191
|
79 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
|
Chris@31
|
80 }
|
Chris@31
|
81
|
Chris@31
|
82 string
|
Chris@31
|
83 Silvet::getMaker() const
|
Chris@31
|
84 {
|
Chris@191
|
85 return "Queen Mary, University of London";
|
Chris@31
|
86 }
|
Chris@31
|
87
|
Chris@31
|
88 int
|
Chris@31
|
89 Silvet::getPluginVersion() const
|
Chris@31
|
90 {
|
Chris@309
|
91 return 3;
|
Chris@31
|
92 }
|
Chris@31
|
93
|
Chris@31
|
94 string
|
Chris@31
|
95 Silvet::getCopyright() const
|
Chris@31
|
96 {
|
Chris@191
|
97 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
|
Chris@31
|
98 }
|
Chris@31
|
99
|
Chris@31
|
100 Silvet::InputDomain
|
Chris@31
|
101 Silvet::getInputDomain() const
|
Chris@31
|
102 {
|
Chris@31
|
103 return TimeDomain;
|
Chris@31
|
104 }
|
Chris@31
|
105
|
Chris@31
|
106 size_t
|
Chris@31
|
107 Silvet::getPreferredBlockSize() const
|
Chris@31
|
108 {
|
Chris@31
|
109 return 0;
|
Chris@31
|
110 }
|
Chris@31
|
111
|
Chris@31
|
112 size_t
|
Chris@31
|
113 Silvet::getPreferredStepSize() const
|
Chris@31
|
114 {
|
Chris@31
|
115 return 0;
|
Chris@31
|
116 }
|
Chris@31
|
117
|
Chris@31
|
118 size_t
|
Chris@31
|
119 Silvet::getMinChannelCount() const
|
Chris@31
|
120 {
|
Chris@31
|
121 return 1;
|
Chris@31
|
122 }
|
Chris@31
|
123
|
Chris@31
|
124 size_t
|
Chris@31
|
125 Silvet::getMaxChannelCount() const
|
Chris@31
|
126 {
|
Chris@31
|
127 return 1;
|
Chris@31
|
128 }
|
Chris@31
|
129
|
Chris@31
|
130 Silvet::ParameterList
|
Chris@31
|
131 Silvet::getParameterDescriptors() const
|
Chris@31
|
132 {
|
Chris@31
|
133 ParameterList list;
|
Chris@110
|
134
|
Chris@110
|
135 ParameterDescriptor desc;
|
Chris@110
|
136 desc.identifier = "mode";
|
Chris@110
|
137 desc.name = "Processing mode";
|
Chris@110
|
138 desc.unit = "";
|
Chris@271
|
139 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode modifies a number of internal parameters in favour of speed. Intensive mode (the default) will almost always produce better results.";
|
Chris@110
|
140 desc.minValue = 0;
|
Chris@110
|
141 desc.maxValue = 1;
|
Chris@113
|
142 desc.defaultValue = 1;
|
Chris@110
|
143 desc.isQuantized = true;
|
Chris@110
|
144 desc.quantizeStep = 1;
|
Chris@166
|
145 desc.valueNames.push_back("Draft (faster)");
|
Chris@165
|
146 desc.valueNames.push_back("Intensive (higher quality)");
|
Chris@161
|
147 list.push_back(desc);
|
Chris@161
|
148
|
Chris@176
|
149 desc.identifier = "instrument";
|
Chris@176
|
150 desc.name = "Instrument";
|
Chris@161
|
151 desc.unit = "";
|
Chris@271
|
152 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
|
Chris@161
|
153 desc.minValue = 0;
|
Chris@162
|
154 desc.maxValue = m_instruments.size()-1;
|
Chris@162
|
155 desc.defaultValue = 0;
|
Chris@161
|
156 desc.isQuantized = true;
|
Chris@161
|
157 desc.quantizeStep = 1;
|
Chris@161
|
158 desc.valueNames.clear();
|
Chris@162
|
159 for (int i = 0; i < int(m_instruments.size()); ++i) {
|
Chris@162
|
160 desc.valueNames.push_back(m_instruments[i].name);
|
Chris@162
|
161 }
|
Chris@166
|
162 list.push_back(desc);
|
Chris@161
|
163
|
Chris@166
|
164 desc.identifier = "finetune";
|
Chris@166
|
165 desc.name = "Return fine pitch estimates";
|
Chris@166
|
166 desc.unit = "";
|
Chris@271
|
167 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
|
Chris@166
|
168 desc.minValue = 0;
|
Chris@166
|
169 desc.maxValue = 1;
|
Chris@166
|
170 desc.defaultValue = 0;
|
Chris@166
|
171 desc.isQuantized = true;
|
Chris@166
|
172 desc.quantizeStep = 1;
|
Chris@166
|
173 desc.valueNames.clear();
|
Chris@110
|
174 list.push_back(desc);
|
Chris@110
|
175
|
Chris@31
|
176 return list;
|
Chris@31
|
177 }
|
Chris@31
|
178
|
Chris@31
|
179 float
|
Chris@31
|
180 Silvet::getParameter(string identifier) const
|
Chris@31
|
181 {
|
Chris@110
|
182 if (identifier == "mode") {
|
Chris@110
|
183 return m_hqMode ? 1.f : 0.f;
|
Chris@166
|
184 } else if (identifier == "finetune") {
|
Chris@166
|
185 return m_fineTuning ? 1.f : 0.f;
|
Chris@176
|
186 } else if (identifier == "instrument") {
|
Chris@162
|
187 return m_instrument;
|
Chris@110
|
188 }
|
Chris@31
|
189 return 0;
|
Chris@31
|
190 }
|
Chris@31
|
191
|
Chris@31
|
192 void
|
Chris@31
|
193 Silvet::setParameter(string identifier, float value)
|
Chris@31
|
194 {
|
Chris@110
|
195 if (identifier == "mode") {
|
Chris@110
|
196 m_hqMode = (value > 0.5);
|
Chris@166
|
197 } else if (identifier == "finetune") {
|
Chris@166
|
198 m_fineTuning = (value > 0.5);
|
Chris@176
|
199 } else if (identifier == "instrument") {
|
Chris@162
|
200 m_instrument = lrintf(value);
|
Chris@110
|
201 }
|
Chris@31
|
202 }
|
Chris@31
|
203
|
Chris@31
|
204 Silvet::ProgramList
|
Chris@31
|
205 Silvet::getPrograms() const
|
Chris@31
|
206 {
|
Chris@31
|
207 ProgramList list;
|
Chris@31
|
208 return list;
|
Chris@31
|
209 }
|
Chris@31
|
210
|
Chris@31
|
211 string
|
Chris@31
|
212 Silvet::getCurrentProgram() const
|
Chris@31
|
213 {
|
Chris@31
|
214 return "";
|
Chris@31
|
215 }
|
Chris@31
|
216
|
Chris@31
|
217 void
|
Chris@31
|
218 Silvet::selectProgram(string name)
|
Chris@31
|
219 {
|
Chris@31
|
220 }
|
Chris@31
|
221
|
Chris@31
|
222 Silvet::OutputList
|
Chris@31
|
223 Silvet::getOutputDescriptors() const
|
Chris@31
|
224 {
|
Chris@31
|
225 OutputList list;
|
Chris@31
|
226
|
Chris@31
|
227 OutputDescriptor d;
|
Chris@51
|
228 d.identifier = "notes";
|
Chris@51
|
229 d.name = "Note transcription";
|
Chris@271
|
230 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
|
Chris@41
|
231 d.unit = "Hz";
|
Chris@31
|
232 d.hasFixedBinCount = true;
|
Chris@31
|
233 d.binCount = 2;
|
Chris@41
|
234 d.binNames.push_back("Frequency");
|
Chris@31
|
235 d.binNames.push_back("Velocity");
|
Chris@31
|
236 d.hasKnownExtents = false;
|
Chris@31
|
237 d.isQuantized = false;
|
Chris@31
|
238 d.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@246
|
239 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
|
Chris@31
|
240 d.hasDuration = true;
|
Chris@32
|
241 m_notesOutputNo = list.size();
|
Chris@32
|
242 list.push_back(d);
|
Chris@32
|
243
|
Chris@178
|
244 d.identifier = "timefreq";
|
Chris@178
|
245 d.name = "Time-frequency distribution";
|
Chris@271
|
246 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
|
Chris@178
|
247 d.unit = "";
|
Chris@178
|
248 d.hasFixedBinCount = true;
|
Chris@178
|
249 d.binCount = m_instruments[0].templateHeight;
|
Chris@178
|
250 d.binNames.clear();
|
Chris@178
|
251 if (m_cq) {
|
Chris@294
|
252 char name[50];
|
Chris@178
|
253 for (int i = 0; i < m_instruments[0].templateHeight; ++i) {
|
Chris@178
|
254 // We have a 600-bin (10 oct 60-bin CQ) of which the
|
Chris@178
|
255 // lowest-frequency 55 bins have been dropped, for a
|
Chris@178
|
256 // 545-bin template. The native CQ bins go high->low
|
Chris@178
|
257 // frequency though, so these are still the first 545 bins
|
Chris@178
|
258 // as reported by getBinFrequency, though in reverse order
|
Chris@178
|
259 float freq = m_cq->getBinFrequency
|
Chris@178
|
260 (m_instruments[0].templateHeight - i - 1);
|
Chris@178
|
261 sprintf(name, "%.1f Hz", freq);
|
Chris@178
|
262 d.binNames.push_back(name);
|
Chris@178
|
263 }
|
Chris@178
|
264 }
|
Chris@178
|
265 d.hasKnownExtents = false;
|
Chris@178
|
266 d.isQuantized = false;
|
Chris@178
|
267 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@178
|
268 d.sampleRate = m_colsPerSec;
|
Chris@178
|
269 d.hasDuration = false;
|
Chris@178
|
270 m_fcqOutputNo = list.size();
|
Chris@178
|
271 list.push_back(d);
|
Chris@178
|
272
|
Chris@294
|
273 d.identifier = "pitchactivation";
|
Chris@294
|
274 d.name = "Pitch activation distribution";
|
Chris@294
|
275 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
|
Chris@294
|
276 d.unit = "";
|
Chris@294
|
277 d.hasFixedBinCount = true;
|
Chris@294
|
278 d.binCount = m_instruments[0].templateNoteCount;
|
Chris@294
|
279 d.binNames.clear();
|
Chris@294
|
280 if (m_cq) {
|
Chris@294
|
281 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) {
|
Chris@294
|
282 d.binNames.push_back(noteName(i, 0, 1));
|
Chris@294
|
283 }
|
Chris@294
|
284 }
|
Chris@294
|
285 d.hasKnownExtents = false;
|
Chris@294
|
286 d.isQuantized = false;
|
Chris@294
|
287 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@294
|
288 d.sampleRate = m_colsPerSec;
|
Chris@294
|
289 d.hasDuration = false;
|
Chris@294
|
290 m_pitchOutputNo = list.size();
|
Chris@294
|
291 list.push_back(d);
|
Chris@294
|
292
|
Chris@309
|
293 d.identifier = "chroma";
|
Chris@309
|
294 d.name = "Pitch chroma distribution";
|
Chris@309
|
295 d.description = "Pitch chroma distribution formed by wrapping the un-thresholded pitch activation distribution into a single octave of semitone bins.";
|
Chris@309
|
296 d.unit = "";
|
Chris@309
|
297 d.hasFixedBinCount = true;
|
Chris@309
|
298 d.binCount = 12;
|
Chris@309
|
299 d.binNames.clear();
|
Chris@309
|
300 if (m_cq) {
|
Chris@309
|
301 for (int i = 0; i < 12; ++i) {
|
Chris@309
|
302 d.binNames.push_back(chromaName(i));
|
Chris@309
|
303 }
|
Chris@309
|
304 }
|
Chris@309
|
305 d.hasKnownExtents = false;
|
Chris@309
|
306 d.isQuantized = false;
|
Chris@309
|
307 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@309
|
308 d.sampleRate = m_colsPerSec;
|
Chris@309
|
309 d.hasDuration = false;
|
Chris@309
|
310 m_chromaOutputNo = list.size();
|
Chris@309
|
311 list.push_back(d);
|
Chris@309
|
312
|
Chris@31
|
313 return list;
|
Chris@31
|
314 }
|
Chris@31
|
315
|
Chris@38
|
316 std::string
|
Chris@309
|
317 Silvet::chromaName(int pitch) const
|
Chris@38
|
318 {
|
Chris@38
|
319 static const char *names[] = {
|
Chris@38
|
320 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
|
Chris@38
|
321 };
|
Chris@38
|
322
|
Chris@309
|
323 return names[pitch];
|
Chris@309
|
324 }
|
Chris@309
|
325
|
Chris@309
|
326 std::string
|
Chris@309
|
327 Silvet::noteName(int note, int shift, int shiftCount) const
|
Chris@309
|
328 {
|
Chris@309
|
329 string n = chromaName(note % 12);
|
Chris@38
|
330
|
Chris@175
|
331 int oct = (note + 9) / 12;
|
Chris@38
|
332
|
Chris@175
|
333 char buf[30];
|
Chris@175
|
334
|
Chris@175
|
335 float pshift = 0.f;
|
Chris@175
|
336 if (shiftCount > 1) {
|
Chris@175
|
337 // see noteFrequency below
|
Chris@175
|
338 pshift =
|
Chris@175
|
339 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
340 }
|
Chris@175
|
341
|
Chris@175
|
342 if (pshift > 0.f) {
|
Chris@309
|
343 sprintf(buf, "%s%d+%dc", n.c_str(), oct, int(round(pshift * 100)));
|
Chris@175
|
344 } else if (pshift < 0.f) {
|
Chris@309
|
345 sprintf(buf, "%s%d-%dc", n.c_str(), oct, int(round((-pshift) * 100)));
|
Chris@175
|
346 } else {
|
Chris@309
|
347 sprintf(buf, "%s%d", n.c_str(), oct);
|
Chris@175
|
348 }
|
Chris@38
|
349
|
Chris@38
|
350 return buf;
|
Chris@38
|
351 }
|
Chris@38
|
352
|
Chris@41
|
353 float
|
Chris@168
|
354 Silvet::noteFrequency(int note, int shift, int shiftCount) const
|
Chris@41
|
355 {
|
Chris@169
|
356 // Convert shift number to a pitch shift. The given shift number
|
Chris@169
|
357 // is an offset into the template array, which starts with some
|
Chris@169
|
358 // zeros, followed by the template, then some trailing zeros.
|
Chris@169
|
359 //
|
Chris@169
|
360 // Example: if we have templateMaxShift == 2 and thus shiftCount
|
Chris@169
|
361 // == 5, then the number will be in the range 0-4 and the template
|
Chris@169
|
362 // will have 2 zeros at either end. Thus number 2 represents the
|
Chris@169
|
363 // template "as recorded", for a pitch shift of 0; smaller indices
|
Chris@169
|
364 // represent moving the template *up* in pitch (by introducing
|
Chris@169
|
365 // zeros at the start, which is the low-frequency end), for a
|
Chris@169
|
366 // positive pitch shift; and higher values represent moving it
|
Chris@169
|
367 // down in pitch, for a negative pitch shift.
|
Chris@169
|
368
|
Chris@175
|
369 float pshift = 0.f;
|
Chris@175
|
370 if (shiftCount > 1) {
|
Chris@175
|
371 pshift =
|
Chris@175
|
372 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
373 }
|
Chris@169
|
374
|
Chris@169
|
375 return float(27.5 * pow(2.0, (note + pshift) / 12.0));
|
Chris@41
|
376 }
|
Chris@41
|
377
|
Chris@31
|
378 bool
|
Chris@31
|
379 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@31
|
380 {
|
Chris@272
|
381 if (m_inputSampleRate < minInputSampleRate ||
|
Chris@272
|
382 m_inputSampleRate > maxInputSampleRate) {
|
Chris@272
|
383 cerr << "Silvet::initialise: Unsupported input sample rate "
|
Chris@272
|
384 << m_inputSampleRate << " (supported min " << minInputSampleRate
|
Chris@272
|
385 << ", max " << maxInputSampleRate << ")" << endl;
|
Chris@272
|
386 return false;
|
Chris@272
|
387 }
|
Chris@272
|
388
|
Chris@31
|
389 if (channels < getMinChannelCount() ||
|
Chris@272
|
390 channels > getMaxChannelCount()) {
|
Chris@272
|
391 cerr << "Silvet::initialise: Unsupported channel count " << channels
|
Chris@272
|
392 << " (supported min " << getMinChannelCount() << ", max "
|
Chris@272
|
393 << getMaxChannelCount() << ")" << endl;
|
Chris@272
|
394 return false;
|
Chris@272
|
395 }
|
Chris@31
|
396
|
Chris@31
|
397 if (stepSize != blockSize) {
|
Chris@31
|
398 cerr << "Silvet::initialise: Step size must be the same as block size ("
|
Chris@31
|
399 << stepSize << " != " << blockSize << ")" << endl;
|
Chris@31
|
400 return false;
|
Chris@31
|
401 }
|
Chris@31
|
402
|
Chris@31
|
403 m_blockSize = blockSize;
|
Chris@31
|
404
|
Chris@31
|
405 reset();
|
Chris@31
|
406
|
Chris@31
|
407 return true;
|
Chris@31
|
408 }
|
Chris@31
|
409
|
Chris@31
|
410 void
|
Chris@31
|
411 Silvet::reset()
|
Chris@31
|
412 {
|
Chris@31
|
413 delete m_resampler;
|
Chris@246
|
414 delete m_flattener;
|
Chris@31
|
415 delete m_cq;
|
Chris@31
|
416
|
Chris@31
|
417 if (m_inputSampleRate != processingSampleRate) {
|
Chris@31
|
418 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
|
Chris@31
|
419 } else {
|
Chris@31
|
420 m_resampler = 0;
|
Chris@31
|
421 }
|
Chris@31
|
422
|
Chris@246
|
423 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
|
Chris@246
|
424 m_flattener->reset();
|
Chris@246
|
425
|
Chris@173
|
426 double minFreq = 27.5;
|
Chris@173
|
427
|
Chris@173
|
428 if (!m_hqMode) {
|
Chris@173
|
429 // We don't actually return any notes from the bottom octave,
|
Chris@173
|
430 // so we can just pad with zeros
|
Chris@173
|
431 minFreq *= 2;
|
Chris@173
|
432 }
|
Chris@173
|
433
|
Chris@154
|
434 CQParameters params(processingSampleRate,
|
Chris@173
|
435 minFreq,
|
Chris@154
|
436 processingSampleRate / 3,
|
Chris@154
|
437 processingBPO);
|
Chris@154
|
438
|
Chris@155
|
439 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
|
Chris@155
|
440 // drops the FFT size to 512 from 1024 and alters
|
Chris@155
|
441 // some other processing parameters, making
|
Chris@155
|
442 // everything much, much slower. Could be a flaw
|
Chris@155
|
443 // in the CQ parameter calculations, must check
|
Chris@154
|
444 params.atomHopFactor = 0.3;
|
Chris@154
|
445 params.threshold = 0.0005;
|
Chris@172
|
446 params.window = CQParameters::Hann;
|
Chris@154
|
447
|
Chris@154
|
448 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
|
Chris@31
|
449
|
Chris@165
|
450 m_colsPerSec = m_hqMode ? 50 : 25;
|
Chris@165
|
451
|
Chris@41
|
452 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
453 delete m_postFilter[i];
|
Chris@41
|
454 }
|
Chris@41
|
455 m_postFilter.clear();
|
Chris@176
|
456 for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) {
|
Chris@41
|
457 m_postFilter.push_back(new MedianFilter<double>(3));
|
Chris@41
|
458 }
|
Chris@41
|
459 m_pianoRoll.clear();
|
Chris@246
|
460 m_inputGains.clear();
|
Chris@32
|
461 m_columnCount = 0;
|
Chris@272
|
462 m_resampledCount = 0;
|
Chris@40
|
463 m_startTime = RealTime::zeroTime;
|
Chris@31
|
464 }
|
Chris@31
|
465
|
Chris@31
|
466 Silvet::FeatureSet
|
Chris@31
|
467 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
Chris@31
|
468 {
|
Chris@40
|
469 if (m_columnCount == 0) {
|
Chris@40
|
470 m_startTime = timestamp;
|
Chris@40
|
471 }
|
Chris@246
|
472
|
Chris@246
|
473 vector<float> flattened(m_blockSize);
|
Chris@246
|
474 float gain = 1.f;
|
Chris@246
|
475 m_flattener->connectInputPort
|
Chris@246
|
476 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
|
Chris@246
|
477 m_flattener->connectOutputPort
|
Chris@246
|
478 (FlattenDynamics::AudioOutputPort, &flattened[0]);
|
Chris@246
|
479 m_flattener->connectOutputPort
|
Chris@246
|
480 (FlattenDynamics::GainOutputPort, &gain);
|
Chris@246
|
481 m_flattener->process(m_blockSize);
|
Chris@246
|
482
|
Chris@252
|
483 m_inputGains[timestamp] = gain;
|
Chris@40
|
484
|
Chris@31
|
485 vector<double> data;
|
Chris@40
|
486 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@246
|
487 double d = flattened[i];
|
Chris@235
|
488 data.push_back(d);
|
Chris@40
|
489 }
|
Chris@31
|
490
|
Chris@31
|
491 if (m_resampler) {
|
Chris@272
|
492
|
Chris@31
|
493 data = m_resampler->process(data.data(), data.size());
|
Chris@272
|
494
|
Chris@272
|
495 int hadCount = m_resampledCount;
|
Chris@272
|
496 m_resampledCount += data.size();
|
Chris@272
|
497
|
Chris@272
|
498 int resamplerLatency = m_resampler->getLatency();
|
Chris@272
|
499
|
Chris@272
|
500 if (hadCount < resamplerLatency) {
|
Chris@272
|
501 int stillToDrop = resamplerLatency - hadCount;
|
Chris@272
|
502 if (stillToDrop >= int(data.size())) {
|
Chris@272
|
503 return FeatureSet();
|
Chris@272
|
504 } else {
|
Chris@272
|
505 data = vector<double>(data.begin() + stillToDrop, data.end());
|
Chris@272
|
506 }
|
Chris@272
|
507 }
|
Chris@31
|
508 }
|
Chris@272
|
509
|
Chris@32
|
510 Grid cqout = m_cq->process(data);
|
Chris@51
|
511 FeatureSet fs = transcribe(cqout);
|
Chris@51
|
512 return fs;
|
Chris@34
|
513 }
|
Chris@34
|
514
|
Chris@34
|
515 Silvet::FeatureSet
|
Chris@34
|
516 Silvet::getRemainingFeatures()
|
Chris@34
|
517 {
|
Chris@145
|
518 Grid cqout = m_cq->getRemainingOutput();
|
Chris@51
|
519 FeatureSet fs = transcribe(cqout);
|
Chris@51
|
520 return fs;
|
Chris@34
|
521 }
|
Chris@34
|
522
|
Chris@34
|
523 Silvet::FeatureSet
|
Chris@34
|
524 Silvet::transcribe(const Grid &cqout)
|
Chris@34
|
525 {
|
Chris@32
|
526 Grid filtered = preProcess(cqout);
|
Chris@31
|
527
|
Chris@32
|
528 FeatureSet fs;
|
Chris@32
|
529
|
Chris@104
|
530 if (filtered.empty()) return fs;
|
Chris@170
|
531
|
Chris@170
|
532 const InstrumentPack &pack = m_instruments[m_instrument];
|
Chris@104
|
533
|
Chris@178
|
534 for (int i = 0; i < (int)filtered.size(); ++i) {
|
Chris@178
|
535 Feature f;
|
Chris@178
|
536 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@178
|
537 f.values.push_back(float(filtered[i][j]));
|
Chris@178
|
538 }
|
Chris@178
|
539 fs[m_fcqOutputNo].push_back(f);
|
Chris@178
|
540 }
|
Chris@178
|
541
|
Chris@34
|
542 int width = filtered.size();
|
Chris@34
|
543
|
Chris@164
|
544 int iterations = m_hqMode ? 20 : 10;
|
Chris@34
|
545
|
Chris@176
|
546 Grid localPitches(width, vector<double>(pack.templateNoteCount, 0.0));
|
Chris@170
|
547
|
Chris@170
|
548 bool wantShifts = m_hqMode && m_fineTuning;
|
Chris@170
|
549 int shiftCount = 1;
|
Chris@170
|
550 if (wantShifts) {
|
Chris@170
|
551 shiftCount = pack.templateMaxShift * 2 + 1;
|
Chris@170
|
552 }
|
Chris@170
|
553
|
Chris@170
|
554 vector<vector<int> > localBestShifts;
|
Chris@170
|
555 if (wantShifts) {
|
Chris@170
|
556 localBestShifts =
|
Chris@176
|
557 vector<vector<int> >(width, vector<int>(pack.templateNoteCount, 0));
|
Chris@170
|
558 }
|
Chris@170
|
559
|
Chris@305
|
560 double columnThreshold = 1e-5;
|
Chris@305
|
561
|
Chris@123
|
562 #pragma omp parallel for
|
Chris@123
|
563 for (int i = 0; i < width; ++i) {
|
Chris@104
|
564
|
Chris@170
|
565 double sum = 0.0;
|
Chris@176
|
566 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@170
|
567 sum += filtered.at(i).at(j);
|
Chris@170
|
568 }
|
Chris@305
|
569 if (sum < columnThreshold) continue;
|
Chris@170
|
570
|
Chris@170
|
571 EM em(&pack, m_hqMode);
|
Chris@170
|
572
|
Chris@183
|
573 em.setPitchSparsity(pack.pitchSparsity);
|
Chris@213
|
574 em.setSourceSparsity(pack.sourceSparsity);
|
Chris@183
|
575
|
Chris@170
|
576 for (int j = 0; j < iterations; ++j) {
|
Chris@170
|
577 em.iterate(filtered.at(i).data());
|
Chris@37
|
578 }
|
Chris@37
|
579
|
Chris@170
|
580 const float *pitchDist = em.getPitchDistribution();
|
Chris@170
|
581 const float *const *shiftDist = em.getShifts();
|
Chris@37
|
582
|
Chris@176
|
583 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@104
|
584
|
Chris@170
|
585 localPitches[i][j] = pitchDist[j] * sum;
|
Chris@170
|
586
|
Chris@170
|
587 int bestShift = 0;
|
Chris@179
|
588 float bestShiftValue = 0.0;
|
Chris@170
|
589 if (wantShifts) {
|
Chris@170
|
590 for (int k = 0; k < shiftCount; ++k) {
|
Chris@179
|
591 float value = shiftDist[k][j];
|
Chris@179
|
592 if (k == 0 || value > bestShiftValue) {
|
Chris@179
|
593 bestShiftValue = value;
|
Chris@170
|
594 bestShift = k;
|
Chris@170
|
595 }
|
Chris@170
|
596 }
|
Chris@170
|
597 localBestShifts[i][j] = bestShift;
|
Chris@170
|
598 }
|
Chris@123
|
599 }
|
Chris@123
|
600 }
|
Chris@166
|
601
|
Chris@166
|
602 for (int i = 0; i < width; ++i) {
|
Chris@37
|
603
|
Chris@309
|
604 // This returns a filtered column, and pushes the
|
Chris@309
|
605 // up-to-max-polyphony activation column to m_pianoRoll
|
Chris@294
|
606 vector<double> filtered = postProcess
|
Chris@294
|
607 (localPitches[i], localBestShifts[i], wantShifts);
|
Chris@294
|
608
|
Chris@309
|
609 RealTime timestamp = getColumnTimestamp(m_pianoRoll.size() - 1);
|
Chris@309
|
610 float inputGain = getInputGainAt(timestamp);
|
Chris@309
|
611
|
Chris@294
|
612 Feature f;
|
Chris@294
|
613 for (int j = 0; j < (int)filtered.size(); ++j) {
|
Chris@309
|
614 float v = filtered[j];
|
Chris@294
|
615 if (v < pack.levelThreshold) v = 0.f;
|
Chris@309
|
616 f.values.push_back(v / inputGain);
|
Chris@294
|
617 }
|
Chris@294
|
618 fs[m_pitchOutputNo].push_back(f);
|
Chris@309
|
619
|
Chris@309
|
620 f.values.clear();
|
Chris@309
|
621 f.values.resize(12);
|
Chris@309
|
622 for (int j = 0; j < (int)filtered.size(); ++j) {
|
Chris@309
|
623 f.values[j % 12] += filtered[j] / inputGain;
|
Chris@309
|
624 }
|
Chris@309
|
625 fs[m_chromaOutputNo].push_back(f);
|
Chris@166
|
626
|
Chris@168
|
627 FeatureList noteFeatures = noteTrack(shiftCount);
|
Chris@38
|
628
|
Chris@123
|
629 for (FeatureList::const_iterator fi = noteFeatures.begin();
|
Chris@123
|
630 fi != noteFeatures.end(); ++fi) {
|
Chris@123
|
631 fs[m_notesOutputNo].push_back(*fi);
|
Chris@40
|
632 }
|
Chris@34
|
633 }
|
Chris@34
|
634
|
Chris@32
|
635 return fs;
|
Chris@31
|
636 }
|
Chris@31
|
637
|
Chris@32
|
638 Silvet::Grid
|
Chris@32
|
639 Silvet::preProcess(const Grid &in)
|
Chris@32
|
640 {
|
Chris@32
|
641 int width = in.size();
|
Chris@32
|
642
|
Chris@165
|
643 int spacing = processingSampleRate / m_colsPerSec;
|
Chris@32
|
644
|
Chris@165
|
645 // need to be careful that col spacing is an integer number of samples!
|
Chris@165
|
646 assert(spacing * m_colsPerSec == processingSampleRate);
|
Chris@32
|
647
|
Chris@32
|
648 Grid out;
|
Chris@32
|
649
|
Chris@58
|
650 // We count the CQ latency in terms of processing hops, but
|
Chris@58
|
651 // actually it probably isn't an exact number of hops so this
|
Chris@58
|
652 // isn't quite accurate. But the small constant offset is
|
Chris@165
|
653 // practically irrelevant compared to the jitter from the frame
|
Chris@165
|
654 // size we reduce to in a moment
|
Chris@33
|
655 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
|
Chris@33
|
656
|
Chris@176
|
657 const InstrumentPack &pack = m_instruments[m_instrument];
|
Chris@176
|
658
|
Chris@32
|
659 for (int i = 0; i < width; ++i) {
|
Chris@32
|
660
|
Chris@33
|
661 if (m_columnCount < latentColumns) {
|
Chris@33
|
662 ++m_columnCount;
|
Chris@33
|
663 continue;
|
Chris@33
|
664 }
|
Chris@33
|
665
|
Chris@32
|
666 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
|
Chris@32
|
667 int sampleNo = m_columnCount * m_cq->getColumnHop();
|
Chris@32
|
668
|
Chris@32
|
669 bool select = (sampleNo / spacing != prevSampleNo / spacing);
|
Chris@32
|
670
|
Chris@32
|
671 if (select) {
|
Chris@32
|
672 vector<double> inCol = in[i];
|
Chris@176
|
673 vector<double> outCol(pack.templateHeight);
|
Chris@32
|
674
|
Chris@178
|
675 // In HQ mode, the CQ returns 600 bins and we ignore the
|
Chris@178
|
676 // lowest 55 of them.
|
Chris@178
|
677 //
|
Chris@178
|
678 // In draft mode the CQ is an octave shorter, returning
|
Chris@178
|
679 // 540 bins, so we instead pad them with an additional 5
|
Chris@178
|
680 // zeros.
|
Chris@178
|
681 //
|
Chris@178
|
682 // We also need to reverse the column as we go, since the
|
Chris@178
|
683 // raw CQ has the high frequencies first and we need it
|
Chris@178
|
684 // the other way around.
|
Chris@32
|
685
|
Chris@178
|
686 if (m_hqMode) {
|
Chris@178
|
687 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@178
|
688 int ix = inCol.size() - j - 55;
|
Chris@178
|
689 outCol[j] = inCol[ix];
|
Chris@178
|
690 }
|
Chris@178
|
691 } else {
|
Chris@178
|
692 for (int j = 0; j < 5; ++j) {
|
Chris@178
|
693 outCol[j] = 0.0;
|
Chris@178
|
694 }
|
Chris@178
|
695 for (int j = 5; j < pack.templateHeight; ++j) {
|
Chris@178
|
696 int ix = inCol.size() - j + 4;
|
Chris@178
|
697 outCol[j] = inCol[ix];
|
Chris@178
|
698 }
|
Chris@46
|
699 }
|
Chris@32
|
700
|
Chris@46
|
701 vector<double> noiseLevel1 =
|
Chris@46
|
702 MedianFilter<double>::filter(40, outCol);
|
Chris@176
|
703 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
704 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
|
Chris@46
|
705 }
|
Chris@32
|
706
|
Chris@46
|
707 vector<double> noiseLevel2 =
|
Chris@46
|
708 MedianFilter<double>::filter(40, noiseLevel1);
|
Chris@176
|
709 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
710 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
|
Chris@32
|
711 }
|
Chris@32
|
712
|
Chris@165
|
713 out.push_back(outCol);
|
Chris@32
|
714 }
|
Chris@32
|
715
|
Chris@32
|
716 ++m_columnCount;
|
Chris@32
|
717 }
|
Chris@32
|
718
|
Chris@32
|
719 return out;
|
Chris@32
|
720 }
|
Chris@32
|
721
|
Chris@294
|
722 vector<double>
|
Chris@170
|
723 Silvet::postProcess(const vector<double> &pitches,
|
Chris@170
|
724 const vector<int> &bestShifts,
|
Chris@170
|
725 bool wantShifts)
|
Chris@166
|
726 {
|
Chris@176
|
727 const InstrumentPack &pack = m_instruments[m_instrument];
|
Chris@176
|
728
|
Chris@41
|
729 vector<double> filtered;
|
Chris@41
|
730
|
Chris@176
|
731 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@170
|
732 m_postFilter[j]->push(pitches[j]);
|
Chris@41
|
733 filtered.push_back(m_postFilter[j]->get());
|
Chris@41
|
734 }
|
Chris@41
|
735
|
Chris@41
|
736 // Threshold for level and reduce number of candidate pitches
|
Chris@41
|
737
|
Chris@41
|
738 typedef std::multimap<double, int> ValueIndexMap;
|
Chris@41
|
739
|
Chris@41
|
740 ValueIndexMap strengths;
|
Chris@166
|
741
|
Chris@176
|
742 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@166
|
743 double strength = filtered[j];
|
Chris@183
|
744 if (strength < pack.levelThreshold) continue;
|
Chris@168
|
745 strengths.insert(ValueIndexMap::value_type(strength, j));
|
Chris@168
|
746 }
|
Chris@166
|
747
|
Chris@168
|
748 ValueIndexMap::const_iterator si = strengths.end();
|
Chris@167
|
749
|
Chris@168
|
750 map<int, double> active;
|
Chris@168
|
751 map<int, int> activeShifts;
|
Chris@168
|
752
|
Chris@183
|
753 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
|
Chris@168
|
754
|
Chris@168
|
755 --si;
|
Chris@168
|
756
|
Chris@168
|
757 double strength = si->first;
|
Chris@168
|
758 int j = si->second;
|
Chris@168
|
759
|
Chris@168
|
760 active[j] = strength;
|
Chris@168
|
761
|
Chris@170
|
762 if (wantShifts) {
|
Chris@170
|
763 activeShifts[j] = bestShifts[j];
|
Chris@167
|
764 }
|
Chris@41
|
765 }
|
Chris@41
|
766
|
Chris@168
|
767 m_pianoRoll.push_back(active);
|
Chris@170
|
768
|
Chris@170
|
769 if (wantShifts) {
|
Chris@168
|
770 m_pianoRollShifts.push_back(activeShifts);
|
Chris@41
|
771 }
|
Chris@294
|
772
|
Chris@294
|
773 return filtered;
|
Chris@166
|
774 }
|
Chris@166
|
775
|
Chris@166
|
776 Vamp::Plugin::FeatureList
|
Chris@168
|
777 Silvet::noteTrack(int shiftCount)
|
Chris@166
|
778 {
|
Chris@41
|
779 // Minimum duration pruning, and conversion to notes. We can only
|
Chris@41
|
780 // report notes that have just ended (i.e. that are absent in the
|
Chris@168
|
781 // latest active set but present in the prior set in the piano
|
Chris@41
|
782 // roll) -- any notes that ended earlier will have been reported
|
Chris@41
|
783 // already, and if they haven't ended, we don't know their
|
Chris@41
|
784 // duration.
|
Chris@41
|
785
|
Chris@168
|
786 int width = m_pianoRoll.size() - 1;
|
Chris@168
|
787
|
Chris@168
|
788 const map<int, double> &active = m_pianoRoll[width];
|
Chris@41
|
789
|
Chris@165
|
790 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@165
|
791
|
Chris@165
|
792 // only keep notes >= 100ms or thereabouts
|
Chris@165
|
793 int durationThreshold = floor(0.1 / columnDuration); // columns
|
Chris@165
|
794 if (durationThreshold < 1) durationThreshold = 1;
|
Chris@41
|
795
|
Chris@41
|
796 FeatureList noteFeatures;
|
Chris@41
|
797
|
Chris@41
|
798 if (width < durationThreshold + 1) {
|
Chris@41
|
799 return noteFeatures;
|
Chris@41
|
800 }
|
Chris@41
|
801
|
Chris@150
|
802 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
|
Chris@150
|
803
|
Chris@55
|
804 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
|
Chris@41
|
805 ni != m_pianoRoll[width-1].end(); ++ni) {
|
Chris@41
|
806
|
Chris@55
|
807 int note = ni->first;
|
Chris@41
|
808
|
Chris@41
|
809 if (active.find(note) != active.end()) {
|
Chris@41
|
810 // the note is still playing
|
Chris@41
|
811 continue;
|
Chris@41
|
812 }
|
Chris@41
|
813
|
Chris@41
|
814 // the note was playing but just ended
|
Chris@41
|
815 int end = width;
|
Chris@41
|
816 int start = end-1;
|
Chris@41
|
817
|
Chris@41
|
818 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
|
Chris@41
|
819 --start;
|
Chris@41
|
820 }
|
Chris@41
|
821 ++start;
|
Chris@41
|
822
|
Chris@169
|
823 if ((end - start) < durationThreshold) {
|
Chris@41
|
824 continue;
|
Chris@41
|
825 }
|
Chris@41
|
826
|
Chris@169
|
827 emitNote(start, end, note, shiftCount, noteFeatures);
|
Chris@41
|
828 }
|
Chris@41
|
829
|
Chris@62
|
830 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
|
Chris@41
|
831
|
Chris@41
|
832 return noteFeatures;
|
Chris@41
|
833 }
|
Chris@41
|
834
|
Chris@169
|
835 void
|
Chris@169
|
836 Silvet::emitNote(int start, int end, int note, int shiftCount,
|
Chris@169
|
837 FeatureList ¬eFeatures)
|
Chris@169
|
838 {
|
Chris@169
|
839 int partStart = start;
|
Chris@169
|
840 int partShift = 0;
|
Chris@169
|
841 int partVelocity = 0;
|
Chris@169
|
842
|
Chris@252
|
843 int partThreshold = floor(0.05 * m_colsPerSec);
|
Chris@169
|
844
|
Chris@169
|
845 for (int i = start; i != end; ++i) {
|
Chris@169
|
846
|
Chris@169
|
847 double strength = m_pianoRoll[i][note];
|
Chris@169
|
848
|
Chris@169
|
849 int shift = 0;
|
Chris@169
|
850
|
Chris@169
|
851 if (shiftCount > 1) {
|
Chris@169
|
852
|
Chris@169
|
853 shift = m_pianoRollShifts[i][note];
|
Chris@169
|
854
|
Chris@169
|
855 if (i == partStart) {
|
Chris@169
|
856 partShift = shift;
|
Chris@169
|
857 }
|
Chris@169
|
858
|
Chris@169
|
859 if (i > partStart + partThreshold && shift != partShift) {
|
Chris@169
|
860
|
Chris@169
|
861 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
|
Chris@169
|
862
|
Chris@169
|
863 // pitch has changed, emit an intermediate note
|
Chris@252
|
864 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
865 i,
|
Chris@252
|
866 note,
|
Chris@252
|
867 partShift,
|
Chris@252
|
868 shiftCount,
|
Chris@252
|
869 partVelocity));
|
Chris@169
|
870 partStart = i;
|
Chris@169
|
871 partShift = shift;
|
Chris@169
|
872 partVelocity = 0;
|
Chris@169
|
873 }
|
Chris@169
|
874 }
|
Chris@169
|
875
|
Chris@246
|
876 int v = round(strength * 2);
|
Chris@169
|
877 if (v > partVelocity) {
|
Chris@169
|
878 partVelocity = v;
|
Chris@169
|
879 }
|
Chris@169
|
880 }
|
Chris@169
|
881
|
Chris@169
|
882 if (end >= partStart + partThreshold) {
|
Chris@252
|
883 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
884 end,
|
Chris@252
|
885 note,
|
Chris@252
|
886 partShift,
|
Chris@252
|
887 shiftCount,
|
Chris@252
|
888 partVelocity));
|
Chris@169
|
889 }
|
Chris@169
|
890 }
|
Chris@252
|
891
|
Chris@309
|
892 RealTime
|
Chris@309
|
893 Silvet::getColumnTimestamp(int column)
|
Chris@309
|
894 {
|
Chris@309
|
895 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@309
|
896 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
|
Chris@309
|
897
|
Chris@309
|
898 return m_startTime + RealTime::fromSeconds
|
Chris@309
|
899 (columnDuration * (column - postFilterLatency) + 0.02);
|
Chris@309
|
900 }
|
Chris@309
|
901
|
Chris@252
|
902 Silvet::Feature
|
Chris@252
|
903 Silvet::makeNoteFeature(int start,
|
Chris@252
|
904 int end,
|
Chris@252
|
905 int note,
|
Chris@252
|
906 int shift,
|
Chris@252
|
907 int shiftCount,
|
Chris@252
|
908 int velocity)
|
Chris@252
|
909 {
|
Chris@252
|
910 Feature f;
|
Chris@252
|
911
|
Chris@252
|
912 f.hasTimestamp = true;
|
Chris@309
|
913 f.timestamp = getColumnTimestamp(start);
|
Chris@252
|
914
|
Chris@252
|
915 f.hasDuration = true;
|
Chris@309
|
916 f.duration = getColumnTimestamp(end) - f.timestamp;
|
Chris@252
|
917
|
Chris@252
|
918 f.values.clear();
|
Chris@252
|
919
|
Chris@252
|
920 f.values.push_back
|
Chris@252
|
921 (noteFrequency(note, shift, shiftCount));
|
Chris@252
|
922
|
Chris@252
|
923 float inputGain = getInputGainAt(f.timestamp);
|
Chris@252
|
924 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
|
Chris@252
|
925 velocity = round(velocity / inputGain);
|
Chris@252
|
926 if (velocity > 127) velocity = 127;
|
Chris@252
|
927 if (velocity < 1) velocity = 1;
|
Chris@252
|
928 f.values.push_back(velocity);
|
Chris@252
|
929
|
Chris@252
|
930 f.label = noteName(note, shift, shiftCount);
|
Chris@252
|
931
|
Chris@252
|
932 return f;
|
Chris@252
|
933 }
|
Chris@252
|
934
|
Chris@252
|
935 float
|
Chris@252
|
936 Silvet::getInputGainAt(RealTime t)
|
Chris@252
|
937 {
|
Chris@252
|
938 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
|
Chris@252
|
939
|
Chris@252
|
940 if (i == m_inputGains.end()) {
|
Chris@252
|
941 if (i != m_inputGains.begin()) {
|
Chris@252
|
942 --i;
|
Chris@252
|
943 } else {
|
Chris@252
|
944 return 1.f; // no data
|
Chris@252
|
945 }
|
Chris@252
|
946 }
|
Chris@252
|
947
|
Chris@252
|
948 // cerr << "gain at time " << t << " = " << i->second << endl;
|
Chris@252
|
949
|
Chris@252
|
950 return i->second;
|
Chris@252
|
951 }
|
Chris@252
|
952
|