Chris@31
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@31
|
2
|
Chris@31
|
3 /*
|
Chris@31
|
4 Silvet
|
Chris@31
|
5
|
Chris@31
|
6 A Vamp plugin for note transcription.
|
Chris@31
|
7 Centre for Digital Music, Queen Mary University of London.
|
Chris@31
|
8
|
Chris@31
|
9 This program is free software; you can redistribute it and/or
|
Chris@31
|
10 modify it under the terms of the GNU General Public License as
|
Chris@31
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@31
|
12 License, or (at your option) any later version. See the file
|
Chris@31
|
13 COPYING included with this distribution for more information.
|
Chris@31
|
14 */
|
Chris@31
|
15
|
Chris@31
|
16 #include "Silvet.h"
|
Chris@34
|
17 #include "EM.h"
|
Chris@31
|
18
|
Chris@152
|
19 #include <cq/CQSpectrogram.h>
|
Chris@31
|
20
|
Chris@152
|
21 #include "MedianFilter.h"
|
Chris@152
|
22 #include "constant-q-cpp/src/dsp/Resampler.h"
|
Chris@246
|
23 #include "flattendynamics-ladspa.h"
|
Chris@298
|
24 #include "LiveInstruments.h"
|
Chris@31
|
25
|
Chris@31
|
26 #include <vector>
|
Chris@31
|
27
|
Chris@32
|
28 #include <cstdio>
|
Chris@32
|
29
|
Chris@31
|
30 using std::vector;
|
Chris@48
|
31 using std::cout;
|
Chris@31
|
32 using std::cerr;
|
Chris@31
|
33 using std::endl;
|
Chris@40
|
34 using Vamp::RealTime;
|
Chris@31
|
35
|
Chris@31
|
36 static int processingSampleRate = 44100;
|
Chris@298
|
37
|
Chris@298
|
38 static int binsPerSemitoneLive = 1;
|
Chris@298
|
39 static int binsPerSemitoneNormal = 5;
|
Chris@170
|
40
|
Chris@272
|
41 static int minInputSampleRate = 100;
|
Chris@272
|
42 static int maxInputSampleRate = 192000;
|
Chris@272
|
43
|
Chris@31
|
44 Silvet::Silvet(float inputSampleRate) :
|
Chris@31
|
45 Plugin(inputSampleRate),
|
Chris@161
|
46 m_instruments(InstrumentPack::listInstrumentPacks()),
|
Chris@298
|
47 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
|
Chris@31
|
48 m_resampler(0),
|
Chris@246
|
49 m_flattener(0),
|
Chris@110
|
50 m_cq(0),
|
Chris@297
|
51 m_mode(HighQualityMode),
|
Chris@166
|
52 m_fineTuning(false),
|
Chris@178
|
53 m_instrument(0),
|
Chris@178
|
54 m_colsPerSec(50)
|
Chris@31
|
55 {
|
Chris@31
|
56 }
|
Chris@31
|
57
|
Chris@31
|
58 Silvet::~Silvet()
|
Chris@31
|
59 {
|
Chris@31
|
60 delete m_resampler;
|
Chris@246
|
61 delete m_flattener;
|
Chris@31
|
62 delete m_cq;
|
Chris@41
|
63 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
64 delete m_postFilter[i];
|
Chris@41
|
65 }
|
Chris@31
|
66 }
|
Chris@31
|
67
|
Chris@31
|
68 string
|
Chris@31
|
69 Silvet::getIdentifier() const
|
Chris@31
|
70 {
|
Chris@31
|
71 return "silvet";
|
Chris@31
|
72 }
|
Chris@31
|
73
|
Chris@31
|
74 string
|
Chris@31
|
75 Silvet::getName() const
|
Chris@31
|
76 {
|
Chris@31
|
77 return "Silvet Note Transcription";
|
Chris@31
|
78 }
|
Chris@31
|
79
|
Chris@31
|
80 string
|
Chris@31
|
81 Silvet::getDescription() const
|
Chris@31
|
82 {
|
Chris@191
|
83 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
|
Chris@31
|
84 }
|
Chris@31
|
85
|
Chris@31
|
86 string
|
Chris@31
|
87 Silvet::getMaker() const
|
Chris@31
|
88 {
|
Chris@191
|
89 return "Queen Mary, University of London";
|
Chris@31
|
90 }
|
Chris@31
|
91
|
Chris@31
|
92 int
|
Chris@31
|
93 Silvet::getPluginVersion() const
|
Chris@31
|
94 {
|
Chris@295
|
95 return 2;
|
Chris@31
|
96 }
|
Chris@31
|
97
|
Chris@31
|
98 string
|
Chris@31
|
99 Silvet::getCopyright() const
|
Chris@31
|
100 {
|
Chris@191
|
101 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
|
Chris@31
|
102 }
|
Chris@31
|
103
|
Chris@31
|
104 Silvet::InputDomain
|
Chris@31
|
105 Silvet::getInputDomain() const
|
Chris@31
|
106 {
|
Chris@31
|
107 return TimeDomain;
|
Chris@31
|
108 }
|
Chris@31
|
109
|
Chris@31
|
110 size_t
|
Chris@31
|
111 Silvet::getPreferredBlockSize() const
|
Chris@31
|
112 {
|
Chris@31
|
113 return 0;
|
Chris@31
|
114 }
|
Chris@31
|
115
|
Chris@31
|
116 size_t
|
Chris@31
|
117 Silvet::getPreferredStepSize() const
|
Chris@31
|
118 {
|
Chris@31
|
119 return 0;
|
Chris@31
|
120 }
|
Chris@31
|
121
|
Chris@31
|
122 size_t
|
Chris@31
|
123 Silvet::getMinChannelCount() const
|
Chris@31
|
124 {
|
Chris@31
|
125 return 1;
|
Chris@31
|
126 }
|
Chris@31
|
127
|
Chris@31
|
128 size_t
|
Chris@31
|
129 Silvet::getMaxChannelCount() const
|
Chris@31
|
130 {
|
Chris@31
|
131 return 1;
|
Chris@31
|
132 }
|
Chris@31
|
133
|
Chris@31
|
134 Silvet::ParameterList
|
Chris@31
|
135 Silvet::getParameterDescriptors() const
|
Chris@31
|
136 {
|
Chris@31
|
137 ParameterList list;
|
Chris@110
|
138
|
Chris@110
|
139 ParameterDescriptor desc;
|
Chris@110
|
140 desc.identifier = "mode";
|
Chris@110
|
141 desc.name = "Processing mode";
|
Chris@110
|
142 desc.unit = "";
|
Chris@297
|
143 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
|
Chris@110
|
144 desc.minValue = 0;
|
Chris@297
|
145 desc.maxValue = 2;
|
Chris@113
|
146 desc.defaultValue = 1;
|
Chris@110
|
147 desc.isQuantized = true;
|
Chris@110
|
148 desc.quantizeStep = 1;
|
Chris@166
|
149 desc.valueNames.push_back("Draft (faster)");
|
Chris@165
|
150 desc.valueNames.push_back("Intensive (higher quality)");
|
Chris@297
|
151 desc.valueNames.push_back("Live (lower latency)");
|
Chris@161
|
152 list.push_back(desc);
|
Chris@161
|
153
|
Chris@176
|
154 desc.identifier = "instrument";
|
Chris@176
|
155 desc.name = "Instrument";
|
Chris@161
|
156 desc.unit = "";
|
Chris@271
|
157 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
|
Chris@161
|
158 desc.minValue = 0;
|
Chris@162
|
159 desc.maxValue = m_instruments.size()-1;
|
Chris@162
|
160 desc.defaultValue = 0;
|
Chris@161
|
161 desc.isQuantized = true;
|
Chris@161
|
162 desc.quantizeStep = 1;
|
Chris@161
|
163 desc.valueNames.clear();
|
Chris@162
|
164 for (int i = 0; i < int(m_instruments.size()); ++i) {
|
Chris@162
|
165 desc.valueNames.push_back(m_instruments[i].name);
|
Chris@162
|
166 }
|
Chris@166
|
167 list.push_back(desc);
|
Chris@161
|
168
|
Chris@166
|
169 desc.identifier = "finetune";
|
Chris@166
|
170 desc.name = "Return fine pitch estimates";
|
Chris@166
|
171 desc.unit = "";
|
Chris@271
|
172 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
|
Chris@166
|
173 desc.minValue = 0;
|
Chris@166
|
174 desc.maxValue = 1;
|
Chris@166
|
175 desc.defaultValue = 0;
|
Chris@166
|
176 desc.isQuantized = true;
|
Chris@166
|
177 desc.quantizeStep = 1;
|
Chris@166
|
178 desc.valueNames.clear();
|
Chris@110
|
179 list.push_back(desc);
|
Chris@110
|
180
|
Chris@31
|
181 return list;
|
Chris@31
|
182 }
|
Chris@31
|
183
|
Chris@31
|
184 float
|
Chris@31
|
185 Silvet::getParameter(string identifier) const
|
Chris@31
|
186 {
|
Chris@110
|
187 if (identifier == "mode") {
|
Chris@297
|
188 return (float)(int)m_mode;
|
Chris@166
|
189 } else if (identifier == "finetune") {
|
Chris@166
|
190 return m_fineTuning ? 1.f : 0.f;
|
Chris@176
|
191 } else if (identifier == "instrument") {
|
Chris@162
|
192 return m_instrument;
|
Chris@110
|
193 }
|
Chris@31
|
194 return 0;
|
Chris@31
|
195 }
|
Chris@31
|
196
|
Chris@31
|
197 void
|
Chris@31
|
198 Silvet::setParameter(string identifier, float value)
|
Chris@31
|
199 {
|
Chris@110
|
200 if (identifier == "mode") {
|
Chris@297
|
201 m_mode = (ProcessingMode)(int)(value + 0.5);
|
Chris@166
|
202 } else if (identifier == "finetune") {
|
Chris@166
|
203 m_fineTuning = (value > 0.5);
|
Chris@176
|
204 } else if (identifier == "instrument") {
|
Chris@162
|
205 m_instrument = lrintf(value);
|
Chris@110
|
206 }
|
Chris@31
|
207 }
|
Chris@31
|
208
|
Chris@31
|
209 Silvet::ProgramList
|
Chris@31
|
210 Silvet::getPrograms() const
|
Chris@31
|
211 {
|
Chris@31
|
212 ProgramList list;
|
Chris@31
|
213 return list;
|
Chris@31
|
214 }
|
Chris@31
|
215
|
Chris@31
|
216 string
|
Chris@31
|
217 Silvet::getCurrentProgram() const
|
Chris@31
|
218 {
|
Chris@31
|
219 return "";
|
Chris@31
|
220 }
|
Chris@31
|
221
|
Chris@31
|
222 void
|
Chris@31
|
223 Silvet::selectProgram(string name)
|
Chris@31
|
224 {
|
Chris@31
|
225 }
|
Chris@31
|
226
|
Chris@31
|
227 Silvet::OutputList
|
Chris@31
|
228 Silvet::getOutputDescriptors() const
|
Chris@31
|
229 {
|
Chris@31
|
230 OutputList list;
|
Chris@31
|
231
|
Chris@31
|
232 OutputDescriptor d;
|
Chris@51
|
233 d.identifier = "notes";
|
Chris@51
|
234 d.name = "Note transcription";
|
Chris@271
|
235 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
|
Chris@41
|
236 d.unit = "Hz";
|
Chris@31
|
237 d.hasFixedBinCount = true;
|
Chris@31
|
238 d.binCount = 2;
|
Chris@41
|
239 d.binNames.push_back("Frequency");
|
Chris@31
|
240 d.binNames.push_back("Velocity");
|
Chris@31
|
241 d.hasKnownExtents = false;
|
Chris@31
|
242 d.isQuantized = false;
|
Chris@31
|
243 d.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@246
|
244 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
|
Chris@31
|
245 d.hasDuration = true;
|
Chris@32
|
246 m_notesOutputNo = list.size();
|
Chris@32
|
247 list.push_back(d);
|
Chris@32
|
248
|
Chris@178
|
249 d.identifier = "timefreq";
|
Chris@178
|
250 d.name = "Time-frequency distribution";
|
Chris@271
|
251 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
|
Chris@178
|
252 d.unit = "";
|
Chris@178
|
253 d.hasFixedBinCount = true;
|
Chris@298
|
254 d.binCount = getPack(0).templateHeight;
|
Chris@178
|
255 d.binNames.clear();
|
Chris@178
|
256 if (m_cq) {
|
Chris@294
|
257 char name[50];
|
Chris@298
|
258 for (int i = 0; i < getPack(0).templateHeight; ++i) {
|
Chris@178
|
259 // We have a 600-bin (10 oct 60-bin CQ) of which the
|
Chris@178
|
260 // lowest-frequency 55 bins have been dropped, for a
|
Chris@178
|
261 // 545-bin template. The native CQ bins go high->low
|
Chris@178
|
262 // frequency though, so these are still the first 545 bins
|
Chris@178
|
263 // as reported by getBinFrequency, though in reverse order
|
Chris@178
|
264 float freq = m_cq->getBinFrequency
|
Chris@298
|
265 (getPack(0).templateHeight - i - 1);
|
Chris@178
|
266 sprintf(name, "%.1f Hz", freq);
|
Chris@178
|
267 d.binNames.push_back(name);
|
Chris@178
|
268 }
|
Chris@178
|
269 }
|
Chris@178
|
270 d.hasKnownExtents = false;
|
Chris@178
|
271 d.isQuantized = false;
|
Chris@178
|
272 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@178
|
273 d.sampleRate = m_colsPerSec;
|
Chris@178
|
274 d.hasDuration = false;
|
Chris@178
|
275 m_fcqOutputNo = list.size();
|
Chris@178
|
276 list.push_back(d);
|
Chris@178
|
277
|
Chris@294
|
278 d.identifier = "pitchactivation";
|
Chris@294
|
279 d.name = "Pitch activation distribution";
|
Chris@294
|
280 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
|
Chris@294
|
281 d.unit = "";
|
Chris@294
|
282 d.hasFixedBinCount = true;
|
Chris@298
|
283 d.binCount = getPack(0).templateNoteCount;
|
Chris@294
|
284 d.binNames.clear();
|
Chris@294
|
285 if (m_cq) {
|
Chris@298
|
286 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
|
Chris@294
|
287 d.binNames.push_back(noteName(i, 0, 1));
|
Chris@294
|
288 }
|
Chris@294
|
289 }
|
Chris@294
|
290 d.hasKnownExtents = false;
|
Chris@294
|
291 d.isQuantized = false;
|
Chris@294
|
292 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@294
|
293 d.sampleRate = m_colsPerSec;
|
Chris@294
|
294 d.hasDuration = false;
|
Chris@294
|
295 m_pitchOutputNo = list.size();
|
Chris@294
|
296 list.push_back(d);
|
Chris@294
|
297
|
Chris@31
|
298 return list;
|
Chris@31
|
299 }
|
Chris@31
|
300
|
Chris@38
|
301 std::string
|
Chris@175
|
302 Silvet::noteName(int note, int shift, int shiftCount) const
|
Chris@38
|
303 {
|
Chris@38
|
304 static const char *names[] = {
|
Chris@38
|
305 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
|
Chris@38
|
306 };
|
Chris@38
|
307
|
Chris@175
|
308 const char *n = names[note % 12];
|
Chris@38
|
309
|
Chris@175
|
310 int oct = (note + 9) / 12;
|
Chris@38
|
311
|
Chris@175
|
312 char buf[30];
|
Chris@175
|
313
|
Chris@175
|
314 float pshift = 0.f;
|
Chris@175
|
315 if (shiftCount > 1) {
|
Chris@175
|
316 // see noteFrequency below
|
Chris@175
|
317 pshift =
|
Chris@175
|
318 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
319 }
|
Chris@175
|
320
|
Chris@175
|
321 if (pshift > 0.f) {
|
Chris@175
|
322 sprintf(buf, "%s%d+%dc", n, oct, int(round(pshift * 100)));
|
Chris@175
|
323 } else if (pshift < 0.f) {
|
Chris@175
|
324 sprintf(buf, "%s%d-%dc", n, oct, int(round((-pshift) * 100)));
|
Chris@175
|
325 } else {
|
Chris@175
|
326 sprintf(buf, "%s%d", n, oct);
|
Chris@175
|
327 }
|
Chris@38
|
328
|
Chris@38
|
329 return buf;
|
Chris@38
|
330 }
|
Chris@38
|
331
|
Chris@41
|
332 float
|
Chris@168
|
333 Silvet::noteFrequency(int note, int shift, int shiftCount) const
|
Chris@41
|
334 {
|
Chris@169
|
335 // Convert shift number to a pitch shift. The given shift number
|
Chris@169
|
336 // is an offset into the template array, which starts with some
|
Chris@169
|
337 // zeros, followed by the template, then some trailing zeros.
|
Chris@169
|
338 //
|
Chris@169
|
339 // Example: if we have templateMaxShift == 2 and thus shiftCount
|
Chris@169
|
340 // == 5, then the number will be in the range 0-4 and the template
|
Chris@169
|
341 // will have 2 zeros at either end. Thus number 2 represents the
|
Chris@169
|
342 // template "as recorded", for a pitch shift of 0; smaller indices
|
Chris@169
|
343 // represent moving the template *up* in pitch (by introducing
|
Chris@169
|
344 // zeros at the start, which is the low-frequency end), for a
|
Chris@169
|
345 // positive pitch shift; and higher values represent moving it
|
Chris@169
|
346 // down in pitch, for a negative pitch shift.
|
Chris@169
|
347
|
Chris@175
|
348 float pshift = 0.f;
|
Chris@175
|
349 if (shiftCount > 1) {
|
Chris@175
|
350 pshift =
|
Chris@175
|
351 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
352 }
|
Chris@169
|
353
|
Chris@301
|
354 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
|
Chris@301
|
355
|
Chris@301
|
356 cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
|
Chris@301
|
357 << shiftCount << ", obtained freq = " << freq << endl;
|
Chris@301
|
358
|
Chris@301
|
359 return freq;
|
Chris@41
|
360 }
|
Chris@41
|
361
|
Chris@31
|
362 bool
|
Chris@31
|
363 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@31
|
364 {
|
Chris@272
|
365 if (m_inputSampleRate < minInputSampleRate ||
|
Chris@272
|
366 m_inputSampleRate > maxInputSampleRate) {
|
Chris@272
|
367 cerr << "Silvet::initialise: Unsupported input sample rate "
|
Chris@272
|
368 << m_inputSampleRate << " (supported min " << minInputSampleRate
|
Chris@272
|
369 << ", max " << maxInputSampleRate << ")" << endl;
|
Chris@272
|
370 return false;
|
Chris@272
|
371 }
|
Chris@272
|
372
|
Chris@31
|
373 if (channels < getMinChannelCount() ||
|
Chris@272
|
374 channels > getMaxChannelCount()) {
|
Chris@272
|
375 cerr << "Silvet::initialise: Unsupported channel count " << channels
|
Chris@272
|
376 << " (supported min " << getMinChannelCount() << ", max "
|
Chris@272
|
377 << getMaxChannelCount() << ")" << endl;
|
Chris@272
|
378 return false;
|
Chris@272
|
379 }
|
Chris@31
|
380
|
Chris@31
|
381 if (stepSize != blockSize) {
|
Chris@31
|
382 cerr << "Silvet::initialise: Step size must be the same as block size ("
|
Chris@31
|
383 << stepSize << " != " << blockSize << ")" << endl;
|
Chris@31
|
384 return false;
|
Chris@31
|
385 }
|
Chris@31
|
386
|
Chris@31
|
387 m_blockSize = blockSize;
|
Chris@31
|
388
|
Chris@31
|
389 reset();
|
Chris@31
|
390
|
Chris@31
|
391 return true;
|
Chris@31
|
392 }
|
Chris@31
|
393
|
Chris@31
|
394 void
|
Chris@31
|
395 Silvet::reset()
|
Chris@31
|
396 {
|
Chris@31
|
397 delete m_resampler;
|
Chris@246
|
398 delete m_flattener;
|
Chris@31
|
399 delete m_cq;
|
Chris@31
|
400
|
Chris@31
|
401 if (m_inputSampleRate != processingSampleRate) {
|
Chris@31
|
402 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
|
Chris@31
|
403 } else {
|
Chris@31
|
404 m_resampler = 0;
|
Chris@31
|
405 }
|
Chris@31
|
406
|
Chris@246
|
407 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
|
Chris@246
|
408 m_flattener->reset();
|
Chris@246
|
409
|
Chris@301
|
410 // this happens to be processingSampleRate / 3, and is the top
|
Chris@301
|
411 // freq used for the EM templates:
|
Chris@301
|
412 double maxFreq = 14700;
|
Chris@301
|
413
|
Chris@301
|
414 if (m_mode == LiveMode) {
|
Chris@301
|
415 // We only have 12 bpo rather than 60, so we need the top bin
|
Chris@301
|
416 // to be the middle one of the top 5, i.e. 2/5 of a semitone
|
Chris@301
|
417 // lower than 14700
|
Chris@301
|
418 maxFreq *= powf(2.0, -1.0 / 30.0);
|
Chris@301
|
419 }
|
Chris@301
|
420
|
Chris@173
|
421 double minFreq = 27.5;
|
Chris@173
|
422
|
Chris@297
|
423 if (m_mode != HighQualityMode) {
|
Chris@173
|
424 // We don't actually return any notes from the bottom octave,
|
Chris@173
|
425 // so we can just pad with zeros
|
Chris@173
|
426 minFreq *= 2;
|
Chris@173
|
427 }
|
Chris@173
|
428
|
Chris@298
|
429 int bpo = 12 *
|
Chris@298
|
430 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
|
Chris@301
|
431
|
Chris@154
|
432 CQParameters params(processingSampleRate,
|
Chris@173
|
433 minFreq,
|
Chris@154
|
434 processingSampleRate / 3,
|
Chris@298
|
435 bpo);
|
Chris@154
|
436
|
Chris@155
|
437 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
|
Chris@155
|
438 // drops the FFT size to 512 from 1024 and alters
|
Chris@155
|
439 // some other processing parameters, making
|
Chris@155
|
440 // everything much, much slower. Could be a flaw
|
Chris@155
|
441 // in the CQ parameter calculations, must check
|
Chris@154
|
442 params.atomHopFactor = 0.3;
|
Chris@154
|
443 params.threshold = 0.0005;
|
Chris@172
|
444 params.window = CQParameters::Hann;
|
Chris@154
|
445
|
Chris@154
|
446 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
|
Chris@31
|
447
|
Chris@301
|
448 cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
|
Chris@301
|
449 cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
|
Chris@297
|
450
|
Chris@297
|
451 m_colsPerSec = (m_mode == DraftMode ? 25 : 50);
|
Chris@165
|
452
|
Chris@41
|
453 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
454 delete m_postFilter[i];
|
Chris@41
|
455 }
|
Chris@41
|
456 m_postFilter.clear();
|
Chris@298
|
457 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
|
Chris@41
|
458 m_postFilter.push_back(new MedianFilter<double>(3));
|
Chris@41
|
459 }
|
Chris@41
|
460 m_pianoRoll.clear();
|
Chris@246
|
461 m_inputGains.clear();
|
Chris@32
|
462 m_columnCount = 0;
|
Chris@272
|
463 m_resampledCount = 0;
|
Chris@40
|
464 m_startTime = RealTime::zeroTime;
|
Chris@31
|
465 }
|
Chris@31
|
466
|
Chris@31
|
467 Silvet::FeatureSet
|
Chris@31
|
468 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
Chris@31
|
469 {
|
Chris@40
|
470 if (m_columnCount == 0) {
|
Chris@40
|
471 m_startTime = timestamp;
|
Chris@40
|
472 }
|
Chris@246
|
473
|
Chris@246
|
474 vector<float> flattened(m_blockSize);
|
Chris@246
|
475 float gain = 1.f;
|
Chris@246
|
476 m_flattener->connectInputPort
|
Chris@246
|
477 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
|
Chris@246
|
478 m_flattener->connectOutputPort
|
Chris@246
|
479 (FlattenDynamics::AudioOutputPort, &flattened[0]);
|
Chris@246
|
480 m_flattener->connectOutputPort
|
Chris@246
|
481 (FlattenDynamics::GainOutputPort, &gain);
|
Chris@246
|
482 m_flattener->process(m_blockSize);
|
Chris@246
|
483
|
Chris@252
|
484 m_inputGains[timestamp] = gain;
|
Chris@40
|
485
|
Chris@31
|
486 vector<double> data;
|
Chris@40
|
487 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@246
|
488 double d = flattened[i];
|
Chris@235
|
489 data.push_back(d);
|
Chris@40
|
490 }
|
Chris@31
|
491
|
Chris@31
|
492 if (m_resampler) {
|
Chris@272
|
493
|
Chris@31
|
494 data = m_resampler->process(data.data(), data.size());
|
Chris@272
|
495
|
Chris@272
|
496 int hadCount = m_resampledCount;
|
Chris@272
|
497 m_resampledCount += data.size();
|
Chris@272
|
498
|
Chris@272
|
499 int resamplerLatency = m_resampler->getLatency();
|
Chris@272
|
500
|
Chris@272
|
501 if (hadCount < resamplerLatency) {
|
Chris@272
|
502 int stillToDrop = resamplerLatency - hadCount;
|
Chris@272
|
503 if (stillToDrop >= int(data.size())) {
|
Chris@272
|
504 return FeatureSet();
|
Chris@272
|
505 } else {
|
Chris@272
|
506 data = vector<double>(data.begin() + stillToDrop, data.end());
|
Chris@272
|
507 }
|
Chris@272
|
508 }
|
Chris@31
|
509 }
|
Chris@272
|
510
|
Chris@32
|
511 Grid cqout = m_cq->process(data);
|
Chris@51
|
512 FeatureSet fs = transcribe(cqout);
|
Chris@51
|
513 return fs;
|
Chris@34
|
514 }
|
Chris@34
|
515
|
Chris@34
|
516 Silvet::FeatureSet
|
Chris@34
|
517 Silvet::getRemainingFeatures()
|
Chris@34
|
518 {
|
Chris@145
|
519 Grid cqout = m_cq->getRemainingOutput();
|
Chris@51
|
520 FeatureSet fs = transcribe(cqout);
|
Chris@51
|
521 return fs;
|
Chris@34
|
522 }
|
Chris@34
|
523
|
Chris@34
|
524 Silvet::FeatureSet
|
Chris@34
|
525 Silvet::transcribe(const Grid &cqout)
|
Chris@34
|
526 {
|
Chris@32
|
527 Grid filtered = preProcess(cqout);
|
Chris@31
|
528
|
Chris@32
|
529 FeatureSet fs;
|
Chris@32
|
530
|
Chris@104
|
531 if (filtered.empty()) return fs;
|
Chris@170
|
532
|
Chris@298
|
533 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@104
|
534
|
Chris@178
|
535 for (int i = 0; i < (int)filtered.size(); ++i) {
|
Chris@178
|
536 Feature f;
|
Chris@178
|
537 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@178
|
538 f.values.push_back(float(filtered[i][j]));
|
Chris@178
|
539 }
|
Chris@178
|
540 fs[m_fcqOutputNo].push_back(f);
|
Chris@178
|
541 }
|
Chris@178
|
542
|
Chris@34
|
543 int width = filtered.size();
|
Chris@34
|
544
|
Chris@297
|
545 int iterations = (m_mode == HighQualityMode ? 20 : 10);
|
Chris@34
|
546
|
Chris@176
|
547 Grid localPitches(width, vector<double>(pack.templateNoteCount, 0.0));
|
Chris@170
|
548
|
Chris@297
|
549 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
|
Chris@170
|
550 int shiftCount = 1;
|
Chris@170
|
551 if (wantShifts) {
|
Chris@170
|
552 shiftCount = pack.templateMaxShift * 2 + 1;
|
Chris@170
|
553 }
|
Chris@170
|
554
|
Chris@170
|
555 vector<vector<int> > localBestShifts;
|
Chris@170
|
556 if (wantShifts) {
|
Chris@170
|
557 localBestShifts =
|
Chris@176
|
558 vector<vector<int> >(width, vector<int>(pack.templateNoteCount, 0));
|
Chris@170
|
559 }
|
Chris@170
|
560
|
Chris@170
|
561 vector<bool> present(width, false);
|
Chris@37
|
562
|
Chris@123
|
563 #pragma omp parallel for
|
Chris@123
|
564 for (int i = 0; i < width; ++i) {
|
Chris@104
|
565
|
Chris@170
|
566 double sum = 0.0;
|
Chris@176
|
567 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@170
|
568 sum += filtered.at(i).at(j);
|
Chris@170
|
569 }
|
Chris@170
|
570 if (sum < 1e-5) continue;
|
Chris@170
|
571
|
Chris@170
|
572 present[i] = true;
|
Chris@170
|
573
|
Chris@297
|
574 EM em(&pack, m_mode == HighQualityMode);
|
Chris@170
|
575
|
Chris@183
|
576 em.setPitchSparsity(pack.pitchSparsity);
|
Chris@213
|
577 em.setSourceSparsity(pack.sourceSparsity);
|
Chris@183
|
578
|
Chris@170
|
579 for (int j = 0; j < iterations; ++j) {
|
Chris@170
|
580 em.iterate(filtered.at(i).data());
|
Chris@37
|
581 }
|
Chris@37
|
582
|
Chris@170
|
583 const float *pitchDist = em.getPitchDistribution();
|
Chris@170
|
584 const float *const *shiftDist = em.getShifts();
|
Chris@37
|
585
|
Chris@176
|
586 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@104
|
587
|
Chris@170
|
588 localPitches[i][j] = pitchDist[j] * sum;
|
Chris@170
|
589
|
Chris@170
|
590 int bestShift = 0;
|
Chris@179
|
591 float bestShiftValue = 0.0;
|
Chris@170
|
592 if (wantShifts) {
|
Chris@170
|
593 for (int k = 0; k < shiftCount; ++k) {
|
Chris@179
|
594 float value = shiftDist[k][j];
|
Chris@179
|
595 if (k == 0 || value > bestShiftValue) {
|
Chris@179
|
596 bestShiftValue = value;
|
Chris@170
|
597 bestShift = k;
|
Chris@170
|
598 }
|
Chris@170
|
599 }
|
Chris@170
|
600 localBestShifts[i][j] = bestShift;
|
Chris@170
|
601 }
|
Chris@123
|
602 }
|
Chris@123
|
603 }
|
Chris@166
|
604
|
Chris@166
|
605 for (int i = 0; i < width; ++i) {
|
Chris@37
|
606
|
Chris@170
|
607 if (!present[i]) {
|
Chris@170
|
608 // silent column
|
Chris@176
|
609 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@170
|
610 m_postFilter[j]->push(0.0);
|
Chris@170
|
611 }
|
Chris@168
|
612 m_pianoRoll.push_back(map<int, double>());
|
Chris@170
|
613 if (wantShifts) {
|
Chris@168
|
614 m_pianoRollShifts.push_back(map<int, int>());
|
Chris@168
|
615 }
|
Chris@166
|
616 continue;
|
Chris@166
|
617 }
|
Chris@166
|
618
|
Chris@294
|
619 vector<double> filtered = postProcess
|
Chris@294
|
620 (localPitches[i], localBestShifts[i], wantShifts);
|
Chris@294
|
621
|
Chris@294
|
622 Feature f;
|
Chris@294
|
623 for (int j = 0; j < (int)filtered.size(); ++j) {
|
Chris@294
|
624 float v(filtered[j]);
|
Chris@294
|
625 if (v < pack.levelThreshold) v = 0.f;
|
Chris@294
|
626 f.values.push_back(v);
|
Chris@294
|
627 }
|
Chris@294
|
628 fs[m_pitchOutputNo].push_back(f);
|
Chris@166
|
629
|
Chris@168
|
630 FeatureList noteFeatures = noteTrack(shiftCount);
|
Chris@38
|
631
|
Chris@123
|
632 for (FeatureList::const_iterator fi = noteFeatures.begin();
|
Chris@123
|
633 fi != noteFeatures.end(); ++fi) {
|
Chris@123
|
634 fs[m_notesOutputNo].push_back(*fi);
|
Chris@40
|
635 }
|
Chris@34
|
636 }
|
Chris@34
|
637
|
Chris@32
|
638 return fs;
|
Chris@31
|
639 }
|
Chris@31
|
640
|
Chris@32
|
641 Silvet::Grid
|
Chris@32
|
642 Silvet::preProcess(const Grid &in)
|
Chris@32
|
643 {
|
Chris@32
|
644 int width = in.size();
|
Chris@32
|
645
|
Chris@165
|
646 int spacing = processingSampleRate / m_colsPerSec;
|
Chris@32
|
647
|
Chris@165
|
648 // need to be careful that col spacing is an integer number of samples!
|
Chris@165
|
649 assert(spacing * m_colsPerSec == processingSampleRate);
|
Chris@32
|
650
|
Chris@32
|
651 Grid out;
|
Chris@32
|
652
|
Chris@58
|
653 // We count the CQ latency in terms of processing hops, but
|
Chris@58
|
654 // actually it probably isn't an exact number of hops so this
|
Chris@58
|
655 // isn't quite accurate. But the small constant offset is
|
Chris@165
|
656 // practically irrelevant compared to the jitter from the frame
|
Chris@165
|
657 // size we reduce to in a moment
|
Chris@33
|
658 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
|
Chris@33
|
659
|
Chris@298
|
660 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@176
|
661
|
Chris@32
|
662 for (int i = 0; i < width; ++i) {
|
Chris@32
|
663
|
Chris@33
|
664 if (m_columnCount < latentColumns) {
|
Chris@33
|
665 ++m_columnCount;
|
Chris@33
|
666 continue;
|
Chris@33
|
667 }
|
Chris@33
|
668
|
Chris@32
|
669 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
|
Chris@32
|
670 int sampleNo = m_columnCount * m_cq->getColumnHop();
|
Chris@32
|
671
|
Chris@32
|
672 bool select = (sampleNo / spacing != prevSampleNo / spacing);
|
Chris@32
|
673
|
Chris@32
|
674 if (select) {
|
Chris@32
|
675 vector<double> inCol = in[i];
|
Chris@176
|
676 vector<double> outCol(pack.templateHeight);
|
Chris@32
|
677
|
Chris@178
|
678 // In HQ mode, the CQ returns 600 bins and we ignore the
|
Chris@298
|
679 // lowest 55 of them (assuming binsPerSemitone == 5).
|
Chris@178
|
680 //
|
Chris@297
|
681 // In draft and live mode the CQ is an octave shorter,
|
Chris@300
|
682 // returning 540 bins or equivalent, so we instead pad
|
Chris@300
|
683 // them with an additional 5 or equivalent zeros.
|
Chris@178
|
684 //
|
Chris@178
|
685 // We also need to reverse the column as we go, since the
|
Chris@178
|
686 // raw CQ has the high frequencies first and we need it
|
Chris@178
|
687 // the other way around.
|
Chris@32
|
688
|
Chris@298
|
689 int bps = (m_mode == LiveMode ?
|
Chris@298
|
690 binsPerSemitoneLive : binsPerSemitoneNormal);
|
Chris@298
|
691
|
Chris@297
|
692 if (m_mode == HighQualityMode) {
|
Chris@178
|
693 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@298
|
694 int ix = inCol.size() - j - (11 * bps);
|
Chris@178
|
695 outCol[j] = inCol[ix];
|
Chris@178
|
696 }
|
Chris@178
|
697 } else {
|
Chris@298
|
698 for (int j = 0; j < bps; ++j) {
|
Chris@178
|
699 outCol[j] = 0.0;
|
Chris@178
|
700 }
|
Chris@298
|
701 for (int j = bps; j < pack.templateHeight; ++j) {
|
Chris@298
|
702 int ix = inCol.size() - j + (bps-1);
|
Chris@178
|
703 outCol[j] = inCol[ix];
|
Chris@178
|
704 }
|
Chris@46
|
705 }
|
Chris@32
|
706
|
Chris@46
|
707 vector<double> noiseLevel1 =
|
Chris@298
|
708 MedianFilter<double>::filter(8 * bps, outCol);
|
Chris@176
|
709 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
710 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
|
Chris@46
|
711 }
|
Chris@32
|
712
|
Chris@46
|
713 vector<double> noiseLevel2 =
|
Chris@298
|
714 MedianFilter<double>::filter(8 * bps, noiseLevel1);
|
Chris@176
|
715 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
716 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
|
Chris@32
|
717 }
|
Chris@32
|
718
|
Chris@165
|
719 out.push_back(outCol);
|
Chris@32
|
720 }
|
Chris@32
|
721
|
Chris@32
|
722 ++m_columnCount;
|
Chris@32
|
723 }
|
Chris@32
|
724
|
Chris@32
|
725 return out;
|
Chris@32
|
726 }
|
Chris@32
|
727
|
Chris@294
|
728 vector<double>
|
Chris@170
|
729 Silvet::postProcess(const vector<double> &pitches,
|
Chris@170
|
730 const vector<int> &bestShifts,
|
Chris@170
|
731 bool wantShifts)
|
Chris@166
|
732 {
|
Chris@298
|
733 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@176
|
734
|
Chris@41
|
735 vector<double> filtered;
|
Chris@41
|
736
|
Chris@176
|
737 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@170
|
738 m_postFilter[j]->push(pitches[j]);
|
Chris@41
|
739 filtered.push_back(m_postFilter[j]->get());
|
Chris@41
|
740 }
|
Chris@41
|
741
|
Chris@41
|
742 // Threshold for level and reduce number of candidate pitches
|
Chris@41
|
743
|
Chris@41
|
744 typedef std::multimap<double, int> ValueIndexMap;
|
Chris@41
|
745
|
Chris@41
|
746 ValueIndexMap strengths;
|
Chris@166
|
747
|
Chris@176
|
748 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@166
|
749 double strength = filtered[j];
|
Chris@183
|
750 if (strength < pack.levelThreshold) continue;
|
Chris@168
|
751 strengths.insert(ValueIndexMap::value_type(strength, j));
|
Chris@168
|
752 }
|
Chris@166
|
753
|
Chris@168
|
754 ValueIndexMap::const_iterator si = strengths.end();
|
Chris@167
|
755
|
Chris@168
|
756 map<int, double> active;
|
Chris@168
|
757 map<int, int> activeShifts;
|
Chris@168
|
758
|
Chris@183
|
759 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
|
Chris@168
|
760
|
Chris@168
|
761 --si;
|
Chris@168
|
762
|
Chris@168
|
763 double strength = si->first;
|
Chris@168
|
764 int j = si->second;
|
Chris@168
|
765
|
Chris@168
|
766 active[j] = strength;
|
Chris@168
|
767
|
Chris@170
|
768 if (wantShifts) {
|
Chris@170
|
769 activeShifts[j] = bestShifts[j];
|
Chris@167
|
770 }
|
Chris@41
|
771 }
|
Chris@41
|
772
|
Chris@168
|
773 m_pianoRoll.push_back(active);
|
Chris@170
|
774
|
Chris@170
|
775 if (wantShifts) {
|
Chris@168
|
776 m_pianoRollShifts.push_back(activeShifts);
|
Chris@41
|
777 }
|
Chris@294
|
778
|
Chris@294
|
779 return filtered;
|
Chris@166
|
780 }
|
Chris@166
|
781
|
Chris@166
|
782 Vamp::Plugin::FeatureList
|
Chris@168
|
783 Silvet::noteTrack(int shiftCount)
|
Chris@166
|
784 {
|
Chris@41
|
785 // Minimum duration pruning, and conversion to notes. We can only
|
Chris@41
|
786 // report notes that have just ended (i.e. that are absent in the
|
Chris@168
|
787 // latest active set but present in the prior set in the piano
|
Chris@41
|
788 // roll) -- any notes that ended earlier will have been reported
|
Chris@41
|
789 // already, and if they haven't ended, we don't know their
|
Chris@41
|
790 // duration.
|
Chris@41
|
791
|
Chris@168
|
792 int width = m_pianoRoll.size() - 1;
|
Chris@168
|
793
|
Chris@168
|
794 const map<int, double> &active = m_pianoRoll[width];
|
Chris@41
|
795
|
Chris@165
|
796 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@165
|
797
|
Chris@165
|
798 // only keep notes >= 100ms or thereabouts
|
Chris@165
|
799 int durationThreshold = floor(0.1 / columnDuration); // columns
|
Chris@165
|
800 if (durationThreshold < 1) durationThreshold = 1;
|
Chris@41
|
801
|
Chris@41
|
802 FeatureList noteFeatures;
|
Chris@41
|
803
|
Chris@41
|
804 if (width < durationThreshold + 1) {
|
Chris@41
|
805 return noteFeatures;
|
Chris@41
|
806 }
|
Chris@41
|
807
|
Chris@150
|
808 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
|
Chris@150
|
809
|
Chris@55
|
810 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
|
Chris@41
|
811 ni != m_pianoRoll[width-1].end(); ++ni) {
|
Chris@41
|
812
|
Chris@55
|
813 int note = ni->first;
|
Chris@41
|
814
|
Chris@41
|
815 if (active.find(note) != active.end()) {
|
Chris@41
|
816 // the note is still playing
|
Chris@41
|
817 continue;
|
Chris@41
|
818 }
|
Chris@41
|
819
|
Chris@41
|
820 // the note was playing but just ended
|
Chris@41
|
821 int end = width;
|
Chris@41
|
822 int start = end-1;
|
Chris@41
|
823
|
Chris@41
|
824 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
|
Chris@41
|
825 --start;
|
Chris@41
|
826 }
|
Chris@41
|
827 ++start;
|
Chris@41
|
828
|
Chris@169
|
829 if ((end - start) < durationThreshold) {
|
Chris@41
|
830 continue;
|
Chris@41
|
831 }
|
Chris@41
|
832
|
Chris@169
|
833 emitNote(start, end, note, shiftCount, noteFeatures);
|
Chris@41
|
834 }
|
Chris@41
|
835
|
Chris@62
|
836 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
|
Chris@41
|
837
|
Chris@41
|
838 return noteFeatures;
|
Chris@41
|
839 }
|
Chris@41
|
840
|
Chris@169
|
841 void
|
Chris@169
|
842 Silvet::emitNote(int start, int end, int note, int shiftCount,
|
Chris@169
|
843 FeatureList ¬eFeatures)
|
Chris@169
|
844 {
|
Chris@169
|
845 int partStart = start;
|
Chris@169
|
846 int partShift = 0;
|
Chris@169
|
847 int partVelocity = 0;
|
Chris@169
|
848
|
Chris@252
|
849 int partThreshold = floor(0.05 * m_colsPerSec);
|
Chris@169
|
850
|
Chris@169
|
851 for (int i = start; i != end; ++i) {
|
Chris@169
|
852
|
Chris@169
|
853 double strength = m_pianoRoll[i][note];
|
Chris@169
|
854
|
Chris@169
|
855 int shift = 0;
|
Chris@169
|
856
|
Chris@169
|
857 if (shiftCount > 1) {
|
Chris@169
|
858
|
Chris@169
|
859 shift = m_pianoRollShifts[i][note];
|
Chris@169
|
860
|
Chris@169
|
861 if (i == partStart) {
|
Chris@169
|
862 partShift = shift;
|
Chris@169
|
863 }
|
Chris@169
|
864
|
Chris@169
|
865 if (i > partStart + partThreshold && shift != partShift) {
|
Chris@169
|
866
|
Chris@169
|
867 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
|
Chris@169
|
868
|
Chris@169
|
869 // pitch has changed, emit an intermediate note
|
Chris@252
|
870 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
871 i,
|
Chris@252
|
872 note,
|
Chris@252
|
873 partShift,
|
Chris@252
|
874 shiftCount,
|
Chris@252
|
875 partVelocity));
|
Chris@169
|
876 partStart = i;
|
Chris@169
|
877 partShift = shift;
|
Chris@169
|
878 partVelocity = 0;
|
Chris@169
|
879 }
|
Chris@169
|
880 }
|
Chris@169
|
881
|
Chris@246
|
882 int v = round(strength * 2);
|
Chris@169
|
883 if (v > partVelocity) {
|
Chris@169
|
884 partVelocity = v;
|
Chris@169
|
885 }
|
Chris@169
|
886 }
|
Chris@169
|
887
|
Chris@169
|
888 if (end >= partStart + partThreshold) {
|
Chris@252
|
889 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
890 end,
|
Chris@252
|
891 note,
|
Chris@252
|
892 partShift,
|
Chris@252
|
893 shiftCount,
|
Chris@252
|
894 partVelocity));
|
Chris@169
|
895 }
|
Chris@169
|
896 }
|
Chris@252
|
897
|
Chris@252
|
898 Silvet::Feature
|
Chris@252
|
899 Silvet::makeNoteFeature(int start,
|
Chris@252
|
900 int end,
|
Chris@252
|
901 int note,
|
Chris@252
|
902 int shift,
|
Chris@252
|
903 int shiftCount,
|
Chris@252
|
904 int velocity)
|
Chris@252
|
905 {
|
Chris@252
|
906 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@252
|
907 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
|
Chris@252
|
908
|
Chris@252
|
909 Feature f;
|
Chris@252
|
910
|
Chris@252
|
911 f.hasTimestamp = true;
|
Chris@285
|
912 f.timestamp = m_startTime + RealTime::fromSeconds
|
Chris@252
|
913 (columnDuration * (start - postFilterLatency) + 0.02);
|
Chris@252
|
914
|
Chris@252
|
915 f.hasDuration = true;
|
Chris@252
|
916 f.duration = RealTime::fromSeconds
|
Chris@252
|
917 (columnDuration * (end - start));
|
Chris@252
|
918
|
Chris@252
|
919 f.values.clear();
|
Chris@252
|
920
|
Chris@252
|
921 f.values.push_back
|
Chris@252
|
922 (noteFrequency(note, shift, shiftCount));
|
Chris@252
|
923
|
Chris@252
|
924 float inputGain = getInputGainAt(f.timestamp);
|
Chris@252
|
925 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
|
Chris@252
|
926 velocity = round(velocity / inputGain);
|
Chris@252
|
927 if (velocity > 127) velocity = 127;
|
Chris@252
|
928 if (velocity < 1) velocity = 1;
|
Chris@252
|
929 f.values.push_back(velocity);
|
Chris@252
|
930
|
Chris@252
|
931 f.label = noteName(note, shift, shiftCount);
|
Chris@252
|
932
|
Chris@252
|
933 return f;
|
Chris@252
|
934 }
|
Chris@252
|
935
|
Chris@252
|
936 float
|
Chris@252
|
937 Silvet::getInputGainAt(RealTime t)
|
Chris@252
|
938 {
|
Chris@252
|
939 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
|
Chris@252
|
940
|
Chris@252
|
941 if (i == m_inputGains.end()) {
|
Chris@252
|
942 if (i != m_inputGains.begin()) {
|
Chris@252
|
943 --i;
|
Chris@252
|
944 } else {
|
Chris@252
|
945 return 1.f; // no data
|
Chris@252
|
946 }
|
Chris@252
|
947 }
|
Chris@252
|
948
|
Chris@252
|
949 // cerr << "gain at time " << t << " = " << i->second << endl;
|
Chris@252
|
950
|
Chris@252
|
951 return i->second;
|
Chris@252
|
952 }
|
Chris@252
|
953
|