Chris@31
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@31
|
2
|
Chris@31
|
3 /*
|
Chris@31
|
4 Silvet
|
Chris@31
|
5
|
Chris@31
|
6 A Vamp plugin for note transcription.
|
Chris@31
|
7 Centre for Digital Music, Queen Mary University of London.
|
Chris@31
|
8
|
Chris@31
|
9 This program is free software; you can redistribute it and/or
|
Chris@31
|
10 modify it under the terms of the GNU General Public License as
|
Chris@31
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@31
|
12 License, or (at your option) any later version. See the file
|
Chris@31
|
13 COPYING included with this distribution for more information.
|
Chris@31
|
14 */
|
Chris@31
|
15
|
Chris@31
|
16 #include "Silvet.h"
|
Chris@34
|
17 #include "EM.h"
|
Chris@31
|
18
|
Chris@152
|
19 #include <cq/CQSpectrogram.h>
|
Chris@31
|
20
|
Chris@152
|
21 #include "MedianFilter.h"
|
Chris@152
|
22 #include "constant-q-cpp/src/dsp/Resampler.h"
|
Chris@246
|
23 #include "flattendynamics-ladspa.h"
|
Chris@298
|
24 #include "LiveInstruments.h"
|
Chris@31
|
25
|
Chris@31
|
26 #include <vector>
|
Chris@31
|
27
|
Chris@32
|
28 #include <cstdio>
|
Chris@32
|
29
|
Chris@31
|
30 using std::vector;
|
Chris@48
|
31 using std::cout;
|
Chris@31
|
32 using std::cerr;
|
Chris@31
|
33 using std::endl;
|
Chris@40
|
34 using Vamp::RealTime;
|
Chris@31
|
35
|
Chris@31
|
36 static int processingSampleRate = 44100;
|
Chris@298
|
37
|
Chris@298
|
38 static int binsPerSemitoneLive = 1;
|
Chris@298
|
39 static int binsPerSemitoneNormal = 5;
|
Chris@170
|
40
|
Chris@272
|
41 static int minInputSampleRate = 100;
|
Chris@272
|
42 static int maxInputSampleRate = 192000;
|
Chris@272
|
43
|
Chris@31
|
44 Silvet::Silvet(float inputSampleRate) :
|
Chris@31
|
45 Plugin(inputSampleRate),
|
Chris@161
|
46 m_instruments(InstrumentPack::listInstrumentPacks()),
|
Chris@298
|
47 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
|
Chris@31
|
48 m_resampler(0),
|
Chris@246
|
49 m_flattener(0),
|
Chris@110
|
50 m_cq(0),
|
Chris@297
|
51 m_mode(HighQualityMode),
|
Chris@166
|
52 m_fineTuning(false),
|
Chris@178
|
53 m_instrument(0),
|
Chris@178
|
54 m_colsPerSec(50)
|
Chris@31
|
55 {
|
Chris@31
|
56 }
|
Chris@31
|
57
|
Chris@31
|
58 Silvet::~Silvet()
|
Chris@31
|
59 {
|
Chris@31
|
60 delete m_resampler;
|
Chris@246
|
61 delete m_flattener;
|
Chris@31
|
62 delete m_cq;
|
Chris@41
|
63 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
64 delete m_postFilter[i];
|
Chris@41
|
65 }
|
Chris@31
|
66 }
|
Chris@31
|
67
|
Chris@31
|
68 string
|
Chris@31
|
69 Silvet::getIdentifier() const
|
Chris@31
|
70 {
|
Chris@31
|
71 return "silvet";
|
Chris@31
|
72 }
|
Chris@31
|
73
|
Chris@31
|
74 string
|
Chris@31
|
75 Silvet::getName() const
|
Chris@31
|
76 {
|
Chris@31
|
77 return "Silvet Note Transcription";
|
Chris@31
|
78 }
|
Chris@31
|
79
|
Chris@31
|
80 string
|
Chris@31
|
81 Silvet::getDescription() const
|
Chris@31
|
82 {
|
Chris@191
|
83 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
|
Chris@31
|
84 }
|
Chris@31
|
85
|
Chris@31
|
86 string
|
Chris@31
|
87 Silvet::getMaker() const
|
Chris@31
|
88 {
|
Chris@191
|
89 return "Queen Mary, University of London";
|
Chris@31
|
90 }
|
Chris@31
|
91
|
Chris@31
|
92 int
|
Chris@31
|
93 Silvet::getPluginVersion() const
|
Chris@31
|
94 {
|
Chris@295
|
95 return 2;
|
Chris@31
|
96 }
|
Chris@31
|
97
|
Chris@31
|
98 string
|
Chris@31
|
99 Silvet::getCopyright() const
|
Chris@31
|
100 {
|
Chris@191
|
101 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
|
Chris@31
|
102 }
|
Chris@31
|
103
|
Chris@31
|
104 Silvet::InputDomain
|
Chris@31
|
105 Silvet::getInputDomain() const
|
Chris@31
|
106 {
|
Chris@31
|
107 return TimeDomain;
|
Chris@31
|
108 }
|
Chris@31
|
109
|
Chris@31
|
110 size_t
|
Chris@31
|
111 Silvet::getPreferredBlockSize() const
|
Chris@31
|
112 {
|
Chris@31
|
113 return 0;
|
Chris@31
|
114 }
|
Chris@31
|
115
|
Chris@31
|
116 size_t
|
Chris@31
|
117 Silvet::getPreferredStepSize() const
|
Chris@31
|
118 {
|
Chris@31
|
119 return 0;
|
Chris@31
|
120 }
|
Chris@31
|
121
|
Chris@31
|
122 size_t
|
Chris@31
|
123 Silvet::getMinChannelCount() const
|
Chris@31
|
124 {
|
Chris@31
|
125 return 1;
|
Chris@31
|
126 }
|
Chris@31
|
127
|
Chris@31
|
128 size_t
|
Chris@31
|
129 Silvet::getMaxChannelCount() const
|
Chris@31
|
130 {
|
Chris@31
|
131 return 1;
|
Chris@31
|
132 }
|
Chris@31
|
133
|
Chris@31
|
134 Silvet::ParameterList
|
Chris@31
|
135 Silvet::getParameterDescriptors() const
|
Chris@31
|
136 {
|
Chris@31
|
137 ParameterList list;
|
Chris@110
|
138
|
Chris@110
|
139 ParameterDescriptor desc;
|
Chris@110
|
140 desc.identifier = "mode";
|
Chris@110
|
141 desc.name = "Processing mode";
|
Chris@110
|
142 desc.unit = "";
|
Chris@297
|
143 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
|
Chris@110
|
144 desc.minValue = 0;
|
Chris@297
|
145 desc.maxValue = 2;
|
Chris@113
|
146 desc.defaultValue = 1;
|
Chris@110
|
147 desc.isQuantized = true;
|
Chris@110
|
148 desc.quantizeStep = 1;
|
Chris@166
|
149 desc.valueNames.push_back("Draft (faster)");
|
Chris@165
|
150 desc.valueNames.push_back("Intensive (higher quality)");
|
Chris@297
|
151 desc.valueNames.push_back("Live (lower latency)");
|
Chris@161
|
152 list.push_back(desc);
|
Chris@161
|
153
|
Chris@176
|
154 desc.identifier = "instrument";
|
Chris@176
|
155 desc.name = "Instrument";
|
Chris@161
|
156 desc.unit = "";
|
Chris@271
|
157 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
|
Chris@161
|
158 desc.minValue = 0;
|
Chris@162
|
159 desc.maxValue = m_instruments.size()-1;
|
Chris@162
|
160 desc.defaultValue = 0;
|
Chris@161
|
161 desc.isQuantized = true;
|
Chris@161
|
162 desc.quantizeStep = 1;
|
Chris@161
|
163 desc.valueNames.clear();
|
Chris@162
|
164 for (int i = 0; i < int(m_instruments.size()); ++i) {
|
Chris@162
|
165 desc.valueNames.push_back(m_instruments[i].name);
|
Chris@162
|
166 }
|
Chris@166
|
167 list.push_back(desc);
|
Chris@161
|
168
|
Chris@166
|
169 desc.identifier = "finetune";
|
Chris@166
|
170 desc.name = "Return fine pitch estimates";
|
Chris@166
|
171 desc.unit = "";
|
Chris@271
|
172 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
|
Chris@166
|
173 desc.minValue = 0;
|
Chris@166
|
174 desc.maxValue = 1;
|
Chris@166
|
175 desc.defaultValue = 0;
|
Chris@166
|
176 desc.isQuantized = true;
|
Chris@166
|
177 desc.quantizeStep = 1;
|
Chris@166
|
178 desc.valueNames.clear();
|
Chris@110
|
179 list.push_back(desc);
|
Chris@110
|
180
|
Chris@31
|
181 return list;
|
Chris@31
|
182 }
|
Chris@31
|
183
|
Chris@31
|
184 float
|
Chris@31
|
185 Silvet::getParameter(string identifier) const
|
Chris@31
|
186 {
|
Chris@110
|
187 if (identifier == "mode") {
|
Chris@297
|
188 return (float)(int)m_mode;
|
Chris@166
|
189 } else if (identifier == "finetune") {
|
Chris@166
|
190 return m_fineTuning ? 1.f : 0.f;
|
Chris@176
|
191 } else if (identifier == "instrument") {
|
Chris@162
|
192 return m_instrument;
|
Chris@110
|
193 }
|
Chris@31
|
194 return 0;
|
Chris@31
|
195 }
|
Chris@31
|
196
|
Chris@31
|
197 void
|
Chris@31
|
198 Silvet::setParameter(string identifier, float value)
|
Chris@31
|
199 {
|
Chris@110
|
200 if (identifier == "mode") {
|
Chris@297
|
201 m_mode = (ProcessingMode)(int)(value + 0.5);
|
Chris@166
|
202 } else if (identifier == "finetune") {
|
Chris@166
|
203 m_fineTuning = (value > 0.5);
|
Chris@176
|
204 } else if (identifier == "instrument") {
|
Chris@162
|
205 m_instrument = lrintf(value);
|
Chris@110
|
206 }
|
Chris@31
|
207 }
|
Chris@31
|
208
|
Chris@31
|
209 Silvet::ProgramList
|
Chris@31
|
210 Silvet::getPrograms() const
|
Chris@31
|
211 {
|
Chris@31
|
212 ProgramList list;
|
Chris@31
|
213 return list;
|
Chris@31
|
214 }
|
Chris@31
|
215
|
Chris@31
|
216 string
|
Chris@31
|
217 Silvet::getCurrentProgram() const
|
Chris@31
|
218 {
|
Chris@31
|
219 return "";
|
Chris@31
|
220 }
|
Chris@31
|
221
|
Chris@31
|
222 void
|
Chris@31
|
223 Silvet::selectProgram(string name)
|
Chris@31
|
224 {
|
Chris@31
|
225 }
|
Chris@31
|
226
|
Chris@31
|
227 Silvet::OutputList
|
Chris@31
|
228 Silvet::getOutputDescriptors() const
|
Chris@31
|
229 {
|
Chris@31
|
230 OutputList list;
|
Chris@31
|
231
|
Chris@31
|
232 OutputDescriptor d;
|
Chris@51
|
233 d.identifier = "notes";
|
Chris@51
|
234 d.name = "Note transcription";
|
Chris@271
|
235 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
|
Chris@41
|
236 d.unit = "Hz";
|
Chris@31
|
237 d.hasFixedBinCount = true;
|
Chris@31
|
238 d.binCount = 2;
|
Chris@41
|
239 d.binNames.push_back("Frequency");
|
Chris@31
|
240 d.binNames.push_back("Velocity");
|
Chris@31
|
241 d.hasKnownExtents = false;
|
Chris@31
|
242 d.isQuantized = false;
|
Chris@31
|
243 d.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@246
|
244 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
|
Chris@31
|
245 d.hasDuration = true;
|
Chris@32
|
246 m_notesOutputNo = list.size();
|
Chris@32
|
247 list.push_back(d);
|
Chris@32
|
248
|
Chris@178
|
249 d.identifier = "timefreq";
|
Chris@178
|
250 d.name = "Time-frequency distribution";
|
Chris@271
|
251 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
|
Chris@178
|
252 d.unit = "";
|
Chris@178
|
253 d.hasFixedBinCount = true;
|
Chris@298
|
254 d.binCount = getPack(0).templateHeight;
|
Chris@178
|
255 d.binNames.clear();
|
Chris@178
|
256 if (m_cq) {
|
Chris@294
|
257 char name[50];
|
Chris@298
|
258 for (int i = 0; i < getPack(0).templateHeight; ++i) {
|
Chris@178
|
259 // We have a 600-bin (10 oct 60-bin CQ) of which the
|
Chris@178
|
260 // lowest-frequency 55 bins have been dropped, for a
|
Chris@178
|
261 // 545-bin template. The native CQ bins go high->low
|
Chris@178
|
262 // frequency though, so these are still the first 545 bins
|
Chris@178
|
263 // as reported by getBinFrequency, though in reverse order
|
Chris@178
|
264 float freq = m_cq->getBinFrequency
|
Chris@298
|
265 (getPack(0).templateHeight - i - 1);
|
Chris@178
|
266 sprintf(name, "%.1f Hz", freq);
|
Chris@178
|
267 d.binNames.push_back(name);
|
Chris@178
|
268 }
|
Chris@178
|
269 }
|
Chris@178
|
270 d.hasKnownExtents = false;
|
Chris@178
|
271 d.isQuantized = false;
|
Chris@178
|
272 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@178
|
273 d.sampleRate = m_colsPerSec;
|
Chris@178
|
274 d.hasDuration = false;
|
Chris@178
|
275 m_fcqOutputNo = list.size();
|
Chris@178
|
276 list.push_back(d);
|
Chris@178
|
277
|
Chris@294
|
278 d.identifier = "pitchactivation";
|
Chris@294
|
279 d.name = "Pitch activation distribution";
|
Chris@294
|
280 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
|
Chris@294
|
281 d.unit = "";
|
Chris@294
|
282 d.hasFixedBinCount = true;
|
Chris@298
|
283 d.binCount = getPack(0).templateNoteCount;
|
Chris@294
|
284 d.binNames.clear();
|
Chris@294
|
285 if (m_cq) {
|
Chris@298
|
286 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
|
Chris@294
|
287 d.binNames.push_back(noteName(i, 0, 1));
|
Chris@294
|
288 }
|
Chris@294
|
289 }
|
Chris@294
|
290 d.hasKnownExtents = false;
|
Chris@294
|
291 d.isQuantized = false;
|
Chris@294
|
292 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@294
|
293 d.sampleRate = m_colsPerSec;
|
Chris@294
|
294 d.hasDuration = false;
|
Chris@294
|
295 m_pitchOutputNo = list.size();
|
Chris@294
|
296 list.push_back(d);
|
Chris@294
|
297
|
Chris@302
|
298 d.identifier = "templates";
|
Chris@302
|
299 d.name = "Templates";
|
Chris@302
|
300 d.description = "Constant-Q spectral templates for the selected instrument pack.";
|
Chris@302
|
301 d.unit = "";
|
Chris@302
|
302 d.hasFixedBinCount = true;
|
Chris@302
|
303 d.binCount = getPack(0).templateHeight;
|
Chris@302
|
304 d.binNames.clear();
|
Chris@302
|
305 if (m_cq) {
|
Chris@302
|
306 char name[50];
|
Chris@302
|
307 for (int i = 0; i < getPack(0).templateHeight; ++i) {
|
Chris@302
|
308 // We have a 600-bin (10 oct 60-bin CQ) of which the
|
Chris@302
|
309 // lowest-frequency 55 bins have been dropped, for a
|
Chris@302
|
310 // 545-bin template. The native CQ bins go high->low
|
Chris@302
|
311 // frequency though, so these are still the first 545 bins
|
Chris@302
|
312 // as reported by getBinFrequency, though in reverse order
|
Chris@302
|
313 float freq = m_cq->getBinFrequency
|
Chris@302
|
314 (getPack(0).templateHeight - i - 1);
|
Chris@302
|
315 sprintf(name, "%.1f Hz", freq);
|
Chris@302
|
316 d.binNames.push_back(name);
|
Chris@302
|
317 }
|
Chris@302
|
318 }
|
Chris@302
|
319 d.hasKnownExtents = false;
|
Chris@302
|
320 d.isQuantized = false;
|
Chris@302
|
321 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@302
|
322 d.sampleRate = m_colsPerSec;
|
Chris@302
|
323 d.hasDuration = false;
|
Chris@302
|
324 m_templateOutputNo = list.size();
|
Chris@302
|
325 list.push_back(d);
|
Chris@302
|
326
|
Chris@31
|
327 return list;
|
Chris@31
|
328 }
|
Chris@31
|
329
|
Chris@38
|
330 std::string
|
Chris@175
|
331 Silvet::noteName(int note, int shift, int shiftCount) const
|
Chris@38
|
332 {
|
Chris@38
|
333 static const char *names[] = {
|
Chris@38
|
334 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
|
Chris@38
|
335 };
|
Chris@38
|
336
|
Chris@175
|
337 const char *n = names[note % 12];
|
Chris@38
|
338
|
Chris@175
|
339 int oct = (note + 9) / 12;
|
Chris@38
|
340
|
Chris@175
|
341 char buf[30];
|
Chris@175
|
342
|
Chris@175
|
343 float pshift = 0.f;
|
Chris@175
|
344 if (shiftCount > 1) {
|
Chris@175
|
345 // see noteFrequency below
|
Chris@175
|
346 pshift =
|
Chris@175
|
347 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
348 }
|
Chris@175
|
349
|
Chris@175
|
350 if (pshift > 0.f) {
|
Chris@175
|
351 sprintf(buf, "%s%d+%dc", n, oct, int(round(pshift * 100)));
|
Chris@175
|
352 } else if (pshift < 0.f) {
|
Chris@175
|
353 sprintf(buf, "%s%d-%dc", n, oct, int(round((-pshift) * 100)));
|
Chris@175
|
354 } else {
|
Chris@175
|
355 sprintf(buf, "%s%d", n, oct);
|
Chris@175
|
356 }
|
Chris@38
|
357
|
Chris@38
|
358 return buf;
|
Chris@38
|
359 }
|
Chris@38
|
360
|
Chris@41
|
361 float
|
Chris@168
|
362 Silvet::noteFrequency(int note, int shift, int shiftCount) const
|
Chris@41
|
363 {
|
Chris@169
|
364 // Convert shift number to a pitch shift. The given shift number
|
Chris@169
|
365 // is an offset into the template array, which starts with some
|
Chris@169
|
366 // zeros, followed by the template, then some trailing zeros.
|
Chris@169
|
367 //
|
Chris@169
|
368 // Example: if we have templateMaxShift == 2 and thus shiftCount
|
Chris@169
|
369 // == 5, then the number will be in the range 0-4 and the template
|
Chris@169
|
370 // will have 2 zeros at either end. Thus number 2 represents the
|
Chris@169
|
371 // template "as recorded", for a pitch shift of 0; smaller indices
|
Chris@169
|
372 // represent moving the template *up* in pitch (by introducing
|
Chris@169
|
373 // zeros at the start, which is the low-frequency end), for a
|
Chris@169
|
374 // positive pitch shift; and higher values represent moving it
|
Chris@169
|
375 // down in pitch, for a negative pitch shift.
|
Chris@169
|
376
|
Chris@175
|
377 float pshift = 0.f;
|
Chris@175
|
378 if (shiftCount > 1) {
|
Chris@175
|
379 pshift =
|
Chris@175
|
380 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
381 }
|
Chris@169
|
382
|
Chris@301
|
383 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
|
Chris@301
|
384
|
Chris@303
|
385 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
|
Chris@303
|
386 // << shiftCount << ", obtained freq = " << freq << endl;
|
Chris@301
|
387
|
Chris@301
|
388 return freq;
|
Chris@41
|
389 }
|
Chris@41
|
390
|
Chris@31
|
391 bool
|
Chris@31
|
392 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@31
|
393 {
|
Chris@272
|
394 if (m_inputSampleRate < minInputSampleRate ||
|
Chris@272
|
395 m_inputSampleRate > maxInputSampleRate) {
|
Chris@272
|
396 cerr << "Silvet::initialise: Unsupported input sample rate "
|
Chris@272
|
397 << m_inputSampleRate << " (supported min " << minInputSampleRate
|
Chris@272
|
398 << ", max " << maxInputSampleRate << ")" << endl;
|
Chris@272
|
399 return false;
|
Chris@272
|
400 }
|
Chris@272
|
401
|
Chris@31
|
402 if (channels < getMinChannelCount() ||
|
Chris@272
|
403 channels > getMaxChannelCount()) {
|
Chris@272
|
404 cerr << "Silvet::initialise: Unsupported channel count " << channels
|
Chris@272
|
405 << " (supported min " << getMinChannelCount() << ", max "
|
Chris@272
|
406 << getMaxChannelCount() << ")" << endl;
|
Chris@272
|
407 return false;
|
Chris@272
|
408 }
|
Chris@31
|
409
|
Chris@31
|
410 if (stepSize != blockSize) {
|
Chris@31
|
411 cerr << "Silvet::initialise: Step size must be the same as block size ("
|
Chris@31
|
412 << stepSize << " != " << blockSize << ")" << endl;
|
Chris@31
|
413 return false;
|
Chris@31
|
414 }
|
Chris@31
|
415
|
Chris@31
|
416 m_blockSize = blockSize;
|
Chris@31
|
417
|
Chris@31
|
418 reset();
|
Chris@31
|
419
|
Chris@31
|
420 return true;
|
Chris@31
|
421 }
|
Chris@31
|
422
|
Chris@31
|
423 void
|
Chris@31
|
424 Silvet::reset()
|
Chris@31
|
425 {
|
Chris@31
|
426 delete m_resampler;
|
Chris@246
|
427 delete m_flattener;
|
Chris@31
|
428 delete m_cq;
|
Chris@31
|
429
|
Chris@31
|
430 if (m_inputSampleRate != processingSampleRate) {
|
Chris@31
|
431 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
|
Chris@31
|
432 } else {
|
Chris@31
|
433 m_resampler = 0;
|
Chris@31
|
434 }
|
Chris@31
|
435
|
Chris@246
|
436 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
|
Chris@246
|
437 m_flattener->reset();
|
Chris@246
|
438
|
Chris@301
|
439 // this happens to be processingSampleRate / 3, and is the top
|
Chris@301
|
440 // freq used for the EM templates:
|
Chris@301
|
441 double maxFreq = 14700;
|
Chris@301
|
442
|
Chris@301
|
443 if (m_mode == LiveMode) {
|
Chris@301
|
444 // We only have 12 bpo rather than 60, so we need the top bin
|
Chris@301
|
445 // to be the middle one of the top 5, i.e. 2/5 of a semitone
|
Chris@301
|
446 // lower than 14700
|
Chris@301
|
447 maxFreq *= powf(2.0, -1.0 / 30.0);
|
Chris@301
|
448 }
|
Chris@301
|
449
|
Chris@173
|
450 double minFreq = 27.5;
|
Chris@173
|
451
|
Chris@297
|
452 if (m_mode != HighQualityMode) {
|
Chris@173
|
453 // We don't actually return any notes from the bottom octave,
|
Chris@173
|
454 // so we can just pad with zeros
|
Chris@173
|
455 minFreq *= 2;
|
Chris@173
|
456 }
|
Chris@173
|
457
|
Chris@298
|
458 int bpo = 12 *
|
Chris@298
|
459 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
|
Chris@301
|
460
|
Chris@154
|
461 CQParameters params(processingSampleRate,
|
Chris@173
|
462 minFreq,
|
Chris@303
|
463 maxFreq,
|
Chris@298
|
464 bpo);
|
Chris@154
|
465
|
Chris@155
|
466 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
|
Chris@155
|
467 // drops the FFT size to 512 from 1024 and alters
|
Chris@155
|
468 // some other processing parameters, making
|
Chris@155
|
469 // everything much, much slower. Could be a flaw
|
Chris@155
|
470 // in the CQ parameter calculations, must check
|
Chris@154
|
471 params.atomHopFactor = 0.3;
|
Chris@154
|
472 params.threshold = 0.0005;
|
Chris@172
|
473 params.window = CQParameters::Hann;
|
Chris@154
|
474
|
Chris@154
|
475 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
|
Chris@31
|
476
|
Chris@303
|
477 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
|
Chris@303
|
478 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
|
Chris@297
|
479
|
Chris@297
|
480 m_colsPerSec = (m_mode == DraftMode ? 25 : 50);
|
Chris@165
|
481
|
Chris@41
|
482 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
483 delete m_postFilter[i];
|
Chris@41
|
484 }
|
Chris@41
|
485 m_postFilter.clear();
|
Chris@303
|
486 int postFilterLength = 3;
|
Chris@298
|
487 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
|
Chris@303
|
488 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
|
Chris@41
|
489 }
|
Chris@41
|
490 m_pianoRoll.clear();
|
Chris@246
|
491 m_inputGains.clear();
|
Chris@32
|
492 m_columnCount = 0;
|
Chris@272
|
493 m_resampledCount = 0;
|
Chris@40
|
494 m_startTime = RealTime::zeroTime;
|
Chris@31
|
495 }
|
Chris@31
|
496
|
Chris@31
|
497 Silvet::FeatureSet
|
Chris@31
|
498 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
Chris@31
|
499 {
|
Chris@302
|
500 FeatureSet fs;
|
Chris@302
|
501
|
Chris@40
|
502 if (m_columnCount == 0) {
|
Chris@40
|
503 m_startTime = timestamp;
|
Chris@302
|
504 insertTemplateFeatures(fs);
|
Chris@40
|
505 }
|
Chris@246
|
506
|
Chris@246
|
507 vector<float> flattened(m_blockSize);
|
Chris@246
|
508 float gain = 1.f;
|
Chris@246
|
509 m_flattener->connectInputPort
|
Chris@246
|
510 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
|
Chris@246
|
511 m_flattener->connectOutputPort
|
Chris@246
|
512 (FlattenDynamics::AudioOutputPort, &flattened[0]);
|
Chris@246
|
513 m_flattener->connectOutputPort
|
Chris@246
|
514 (FlattenDynamics::GainOutputPort, &gain);
|
Chris@246
|
515 m_flattener->process(m_blockSize);
|
Chris@246
|
516
|
Chris@252
|
517 m_inputGains[timestamp] = gain;
|
Chris@40
|
518
|
Chris@31
|
519 vector<double> data;
|
Chris@40
|
520 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@246
|
521 double d = flattened[i];
|
Chris@235
|
522 data.push_back(d);
|
Chris@40
|
523 }
|
Chris@31
|
524
|
Chris@31
|
525 if (m_resampler) {
|
Chris@272
|
526
|
Chris@31
|
527 data = m_resampler->process(data.data(), data.size());
|
Chris@272
|
528
|
Chris@272
|
529 int hadCount = m_resampledCount;
|
Chris@272
|
530 m_resampledCount += data.size();
|
Chris@272
|
531
|
Chris@272
|
532 int resamplerLatency = m_resampler->getLatency();
|
Chris@272
|
533
|
Chris@272
|
534 if (hadCount < resamplerLatency) {
|
Chris@272
|
535 int stillToDrop = resamplerLatency - hadCount;
|
Chris@272
|
536 if (stillToDrop >= int(data.size())) {
|
Chris@302
|
537 return fs;
|
Chris@272
|
538 } else {
|
Chris@272
|
539 data = vector<double>(data.begin() + stillToDrop, data.end());
|
Chris@272
|
540 }
|
Chris@272
|
541 }
|
Chris@31
|
542 }
|
Chris@272
|
543
|
Chris@32
|
544 Grid cqout = m_cq->process(data);
|
Chris@302
|
545 transcribe(cqout, fs);
|
Chris@51
|
546 return fs;
|
Chris@34
|
547 }
|
Chris@34
|
548
|
Chris@34
|
549 Silvet::FeatureSet
|
Chris@34
|
550 Silvet::getRemainingFeatures()
|
Chris@34
|
551 {
|
Chris@145
|
552 Grid cqout = m_cq->getRemainingOutput();
|
Chris@302
|
553 FeatureSet fs;
|
Chris@302
|
554 if (m_columnCount == 0) {
|
Chris@302
|
555 // process() was never called, but we still want these
|
Chris@302
|
556 insertTemplateFeatures(fs);
|
Chris@302
|
557 } else {
|
Chris@302
|
558 transcribe(cqout, fs);
|
Chris@302
|
559 }
|
Chris@51
|
560 return fs;
|
Chris@34
|
561 }
|
Chris@34
|
562
|
Chris@302
|
563 void
|
Chris@302
|
564 Silvet::insertTemplateFeatures(FeatureSet &fs)
|
Chris@302
|
565 {
|
Chris@302
|
566 const InstrumentPack &pack = getPack(m_instrument);
|
Chris@302
|
567 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
|
Chris@302
|
568 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
|
Chris@302
|
569 Feature f;
|
Chris@302
|
570 char buffer[50];
|
Chris@302
|
571 sprintf(buffer, "Note %d", i + 1);
|
Chris@302
|
572 f.label = buffer;
|
Chris@302
|
573 f.hasTimestamp = true;
|
Chris@302
|
574 f.timestamp = timestamp;
|
Chris@302
|
575 f.values = pack.templates[i / pack.templateNoteCount]
|
Chris@302
|
576 .data[i % pack.templateNoteCount];
|
Chris@302
|
577 fs[m_templateOutputNo].push_back(f);
|
Chris@302
|
578 }
|
Chris@302
|
579 }
|
Chris@302
|
580
|
Chris@302
|
581 void
|
Chris@302
|
582 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
|
Chris@34
|
583 {
|
Chris@32
|
584 Grid filtered = preProcess(cqout);
|
Chris@31
|
585
|
Chris@302
|
586 if (filtered.empty()) return;
|
Chris@170
|
587
|
Chris@298
|
588 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@104
|
589
|
Chris@178
|
590 for (int i = 0; i < (int)filtered.size(); ++i) {
|
Chris@178
|
591 Feature f;
|
Chris@178
|
592 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@178
|
593 f.values.push_back(float(filtered[i][j]));
|
Chris@178
|
594 }
|
Chris@178
|
595 fs[m_fcqOutputNo].push_back(f);
|
Chris@178
|
596 }
|
Chris@178
|
597
|
Chris@34
|
598 int width = filtered.size();
|
Chris@34
|
599
|
Chris@297
|
600 int iterations = (m_mode == HighQualityMode ? 20 : 10);
|
Chris@34
|
601
|
Chris@176
|
602 Grid localPitches(width, vector<double>(pack.templateNoteCount, 0.0));
|
Chris@170
|
603
|
Chris@297
|
604 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
|
Chris@170
|
605 int shiftCount = 1;
|
Chris@170
|
606 if (wantShifts) {
|
Chris@170
|
607 shiftCount = pack.templateMaxShift * 2 + 1;
|
Chris@170
|
608 }
|
Chris@170
|
609
|
Chris@170
|
610 vector<vector<int> > localBestShifts;
|
Chris@170
|
611 if (wantShifts) {
|
Chris@170
|
612 localBestShifts =
|
Chris@176
|
613 vector<vector<int> >(width, vector<int>(pack.templateNoteCount, 0));
|
Chris@170
|
614 }
|
Chris@170
|
615
|
Chris@170
|
616 vector<bool> present(width, false);
|
Chris@37
|
617
|
Chris@123
|
618 #pragma omp parallel for
|
Chris@123
|
619 for (int i = 0; i < width; ++i) {
|
Chris@104
|
620
|
Chris@170
|
621 double sum = 0.0;
|
Chris@176
|
622 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@170
|
623 sum += filtered.at(i).at(j);
|
Chris@170
|
624 }
|
Chris@170
|
625 if (sum < 1e-5) continue;
|
Chris@170
|
626
|
Chris@170
|
627 present[i] = true;
|
Chris@170
|
628
|
Chris@297
|
629 EM em(&pack, m_mode == HighQualityMode);
|
Chris@170
|
630
|
Chris@183
|
631 em.setPitchSparsity(pack.pitchSparsity);
|
Chris@213
|
632 em.setSourceSparsity(pack.sourceSparsity);
|
Chris@183
|
633
|
Chris@170
|
634 for (int j = 0; j < iterations; ++j) {
|
Chris@170
|
635 em.iterate(filtered.at(i).data());
|
Chris@37
|
636 }
|
Chris@37
|
637
|
Chris@170
|
638 const float *pitchDist = em.getPitchDistribution();
|
Chris@170
|
639 const float *const *shiftDist = em.getShifts();
|
Chris@37
|
640
|
Chris@176
|
641 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@104
|
642
|
Chris@170
|
643 localPitches[i][j] = pitchDist[j] * sum;
|
Chris@170
|
644
|
Chris@170
|
645 int bestShift = 0;
|
Chris@179
|
646 float bestShiftValue = 0.0;
|
Chris@170
|
647 if (wantShifts) {
|
Chris@170
|
648 for (int k = 0; k < shiftCount; ++k) {
|
Chris@179
|
649 float value = shiftDist[k][j];
|
Chris@179
|
650 if (k == 0 || value > bestShiftValue) {
|
Chris@179
|
651 bestShiftValue = value;
|
Chris@170
|
652 bestShift = k;
|
Chris@170
|
653 }
|
Chris@170
|
654 }
|
Chris@170
|
655 localBestShifts[i][j] = bestShift;
|
Chris@170
|
656 }
|
Chris@123
|
657 }
|
Chris@123
|
658 }
|
Chris@166
|
659
|
Chris@166
|
660 for (int i = 0; i < width; ++i) {
|
Chris@37
|
661
|
Chris@170
|
662 if (!present[i]) {
|
Chris@170
|
663 // silent column
|
Chris@176
|
664 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@170
|
665 m_postFilter[j]->push(0.0);
|
Chris@170
|
666 }
|
Chris@168
|
667 m_pianoRoll.push_back(map<int, double>());
|
Chris@170
|
668 if (wantShifts) {
|
Chris@168
|
669 m_pianoRollShifts.push_back(map<int, int>());
|
Chris@168
|
670 }
|
Chris@166
|
671 continue;
|
Chris@166
|
672 }
|
Chris@166
|
673
|
Chris@294
|
674 vector<double> filtered = postProcess
|
Chris@294
|
675 (localPitches[i], localBestShifts[i], wantShifts);
|
Chris@294
|
676
|
Chris@294
|
677 Feature f;
|
Chris@294
|
678 for (int j = 0; j < (int)filtered.size(); ++j) {
|
Chris@294
|
679 float v(filtered[j]);
|
Chris@294
|
680 if (v < pack.levelThreshold) v = 0.f;
|
Chris@294
|
681 f.values.push_back(v);
|
Chris@294
|
682 }
|
Chris@294
|
683 fs[m_pitchOutputNo].push_back(f);
|
Chris@166
|
684
|
Chris@168
|
685 FeatureList noteFeatures = noteTrack(shiftCount);
|
Chris@38
|
686
|
Chris@123
|
687 for (FeatureList::const_iterator fi = noteFeatures.begin();
|
Chris@123
|
688 fi != noteFeatures.end(); ++fi) {
|
Chris@123
|
689 fs[m_notesOutputNo].push_back(*fi);
|
Chris@40
|
690 }
|
Chris@34
|
691 }
|
Chris@31
|
692 }
|
Chris@31
|
693
|
Chris@32
|
694 Silvet::Grid
|
Chris@32
|
695 Silvet::preProcess(const Grid &in)
|
Chris@32
|
696 {
|
Chris@32
|
697 int width = in.size();
|
Chris@32
|
698
|
Chris@165
|
699 int spacing = processingSampleRate / m_colsPerSec;
|
Chris@32
|
700
|
Chris@165
|
701 // need to be careful that col spacing is an integer number of samples!
|
Chris@165
|
702 assert(spacing * m_colsPerSec == processingSampleRate);
|
Chris@32
|
703
|
Chris@32
|
704 Grid out;
|
Chris@32
|
705
|
Chris@58
|
706 // We count the CQ latency in terms of processing hops, but
|
Chris@58
|
707 // actually it probably isn't an exact number of hops so this
|
Chris@58
|
708 // isn't quite accurate. But the small constant offset is
|
Chris@165
|
709 // practically irrelevant compared to the jitter from the frame
|
Chris@165
|
710 // size we reduce to in a moment
|
Chris@33
|
711 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
|
Chris@33
|
712
|
Chris@298
|
713 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@176
|
714
|
Chris@32
|
715 for (int i = 0; i < width; ++i) {
|
Chris@32
|
716
|
Chris@33
|
717 if (m_columnCount < latentColumns) {
|
Chris@33
|
718 ++m_columnCount;
|
Chris@33
|
719 continue;
|
Chris@33
|
720 }
|
Chris@33
|
721
|
Chris@32
|
722 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
|
Chris@32
|
723 int sampleNo = m_columnCount * m_cq->getColumnHop();
|
Chris@32
|
724
|
Chris@32
|
725 bool select = (sampleNo / spacing != prevSampleNo / spacing);
|
Chris@32
|
726
|
Chris@32
|
727 if (select) {
|
Chris@32
|
728 vector<double> inCol = in[i];
|
Chris@176
|
729 vector<double> outCol(pack.templateHeight);
|
Chris@32
|
730
|
Chris@178
|
731 // In HQ mode, the CQ returns 600 bins and we ignore the
|
Chris@298
|
732 // lowest 55 of them (assuming binsPerSemitone == 5).
|
Chris@178
|
733 //
|
Chris@297
|
734 // In draft and live mode the CQ is an octave shorter,
|
Chris@300
|
735 // returning 540 bins or equivalent, so we instead pad
|
Chris@300
|
736 // them with an additional 5 or equivalent zeros.
|
Chris@178
|
737 //
|
Chris@178
|
738 // We also need to reverse the column as we go, since the
|
Chris@178
|
739 // raw CQ has the high frequencies first and we need it
|
Chris@178
|
740 // the other way around.
|
Chris@32
|
741
|
Chris@298
|
742 int bps = (m_mode == LiveMode ?
|
Chris@298
|
743 binsPerSemitoneLive : binsPerSemitoneNormal);
|
Chris@298
|
744
|
Chris@297
|
745 if (m_mode == HighQualityMode) {
|
Chris@178
|
746 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@298
|
747 int ix = inCol.size() - j - (11 * bps);
|
Chris@178
|
748 outCol[j] = inCol[ix];
|
Chris@178
|
749 }
|
Chris@178
|
750 } else {
|
Chris@298
|
751 for (int j = 0; j < bps; ++j) {
|
Chris@178
|
752 outCol[j] = 0.0;
|
Chris@178
|
753 }
|
Chris@298
|
754 for (int j = bps; j < pack.templateHeight; ++j) {
|
Chris@298
|
755 int ix = inCol.size() - j + (bps-1);
|
Chris@178
|
756 outCol[j] = inCol[ix];
|
Chris@178
|
757 }
|
Chris@46
|
758 }
|
Chris@32
|
759
|
Chris@46
|
760 vector<double> noiseLevel1 =
|
Chris@298
|
761 MedianFilter<double>::filter(8 * bps, outCol);
|
Chris@176
|
762 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
763 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
|
Chris@46
|
764 }
|
Chris@32
|
765
|
Chris@46
|
766 vector<double> noiseLevel2 =
|
Chris@298
|
767 MedianFilter<double>::filter(8 * bps, noiseLevel1);
|
Chris@176
|
768 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
769 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
|
Chris@32
|
770 }
|
Chris@32
|
771
|
Chris@165
|
772 out.push_back(outCol);
|
Chris@32
|
773 }
|
Chris@32
|
774
|
Chris@32
|
775 ++m_columnCount;
|
Chris@32
|
776 }
|
Chris@32
|
777
|
Chris@32
|
778 return out;
|
Chris@32
|
779 }
|
Chris@32
|
780
|
Chris@294
|
781 vector<double>
|
Chris@170
|
782 Silvet::postProcess(const vector<double> &pitches,
|
Chris@170
|
783 const vector<int> &bestShifts,
|
Chris@170
|
784 bool wantShifts)
|
Chris@166
|
785 {
|
Chris@298
|
786 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@176
|
787
|
Chris@41
|
788 vector<double> filtered;
|
Chris@41
|
789
|
Chris@176
|
790 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@170
|
791 m_postFilter[j]->push(pitches[j]);
|
Chris@41
|
792 filtered.push_back(m_postFilter[j]->get());
|
Chris@41
|
793 }
|
Chris@41
|
794
|
Chris@41
|
795 // Threshold for level and reduce number of candidate pitches
|
Chris@41
|
796
|
Chris@41
|
797 typedef std::multimap<double, int> ValueIndexMap;
|
Chris@41
|
798
|
Chris@41
|
799 ValueIndexMap strengths;
|
Chris@166
|
800
|
Chris@176
|
801 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@166
|
802 double strength = filtered[j];
|
Chris@183
|
803 if (strength < pack.levelThreshold) continue;
|
Chris@168
|
804 strengths.insert(ValueIndexMap::value_type(strength, j));
|
Chris@168
|
805 }
|
Chris@166
|
806
|
Chris@168
|
807 ValueIndexMap::const_iterator si = strengths.end();
|
Chris@167
|
808
|
Chris@168
|
809 map<int, double> active;
|
Chris@168
|
810 map<int, int> activeShifts;
|
Chris@168
|
811
|
Chris@183
|
812 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
|
Chris@168
|
813
|
Chris@168
|
814 --si;
|
Chris@168
|
815
|
Chris@168
|
816 double strength = si->first;
|
Chris@168
|
817 int j = si->second;
|
Chris@168
|
818
|
Chris@168
|
819 active[j] = strength;
|
Chris@168
|
820
|
Chris@170
|
821 if (wantShifts) {
|
Chris@170
|
822 activeShifts[j] = bestShifts[j];
|
Chris@167
|
823 }
|
Chris@41
|
824 }
|
Chris@41
|
825
|
Chris@168
|
826 m_pianoRoll.push_back(active);
|
Chris@170
|
827
|
Chris@170
|
828 if (wantShifts) {
|
Chris@168
|
829 m_pianoRollShifts.push_back(activeShifts);
|
Chris@41
|
830 }
|
Chris@294
|
831
|
Chris@294
|
832 return filtered;
|
Chris@166
|
833 }
|
Chris@166
|
834
|
Chris@166
|
835 Vamp::Plugin::FeatureList
|
Chris@168
|
836 Silvet::noteTrack(int shiftCount)
|
Chris@166
|
837 {
|
Chris@41
|
838 // Minimum duration pruning, and conversion to notes. We can only
|
Chris@41
|
839 // report notes that have just ended (i.e. that are absent in the
|
Chris@168
|
840 // latest active set but present in the prior set in the piano
|
Chris@41
|
841 // roll) -- any notes that ended earlier will have been reported
|
Chris@41
|
842 // already, and if they haven't ended, we don't know their
|
Chris@41
|
843 // duration.
|
Chris@41
|
844
|
Chris@168
|
845 int width = m_pianoRoll.size() - 1;
|
Chris@168
|
846
|
Chris@168
|
847 const map<int, double> &active = m_pianoRoll[width];
|
Chris@41
|
848
|
Chris@165
|
849 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@165
|
850
|
Chris@165
|
851 // only keep notes >= 100ms or thereabouts
|
Chris@165
|
852 int durationThreshold = floor(0.1 / columnDuration); // columns
|
Chris@165
|
853 if (durationThreshold < 1) durationThreshold = 1;
|
Chris@41
|
854
|
Chris@41
|
855 FeatureList noteFeatures;
|
Chris@41
|
856
|
Chris@41
|
857 if (width < durationThreshold + 1) {
|
Chris@41
|
858 return noteFeatures;
|
Chris@41
|
859 }
|
Chris@41
|
860
|
Chris@150
|
861 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
|
Chris@150
|
862
|
Chris@55
|
863 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
|
Chris@41
|
864 ni != m_pianoRoll[width-1].end(); ++ni) {
|
Chris@41
|
865
|
Chris@55
|
866 int note = ni->first;
|
Chris@41
|
867
|
Chris@41
|
868 if (active.find(note) != active.end()) {
|
Chris@41
|
869 // the note is still playing
|
Chris@41
|
870 continue;
|
Chris@41
|
871 }
|
Chris@41
|
872
|
Chris@41
|
873 // the note was playing but just ended
|
Chris@41
|
874 int end = width;
|
Chris@41
|
875 int start = end-1;
|
Chris@41
|
876
|
Chris@41
|
877 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
|
Chris@41
|
878 --start;
|
Chris@41
|
879 }
|
Chris@41
|
880 ++start;
|
Chris@41
|
881
|
Chris@169
|
882 if ((end - start) < durationThreshold) {
|
Chris@41
|
883 continue;
|
Chris@41
|
884 }
|
Chris@41
|
885
|
Chris@169
|
886 emitNote(start, end, note, shiftCount, noteFeatures);
|
Chris@41
|
887 }
|
Chris@41
|
888
|
Chris@62
|
889 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
|
Chris@41
|
890
|
Chris@41
|
891 return noteFeatures;
|
Chris@41
|
892 }
|
Chris@41
|
893
|
Chris@169
|
894 void
|
Chris@169
|
895 Silvet::emitNote(int start, int end, int note, int shiftCount,
|
Chris@169
|
896 FeatureList ¬eFeatures)
|
Chris@169
|
897 {
|
Chris@169
|
898 int partStart = start;
|
Chris@169
|
899 int partShift = 0;
|
Chris@169
|
900 int partVelocity = 0;
|
Chris@169
|
901
|
Chris@252
|
902 int partThreshold = floor(0.05 * m_colsPerSec);
|
Chris@169
|
903
|
Chris@169
|
904 for (int i = start; i != end; ++i) {
|
Chris@169
|
905
|
Chris@169
|
906 double strength = m_pianoRoll[i][note];
|
Chris@169
|
907
|
Chris@169
|
908 int shift = 0;
|
Chris@169
|
909
|
Chris@169
|
910 if (shiftCount > 1) {
|
Chris@169
|
911
|
Chris@169
|
912 shift = m_pianoRollShifts[i][note];
|
Chris@169
|
913
|
Chris@169
|
914 if (i == partStart) {
|
Chris@169
|
915 partShift = shift;
|
Chris@169
|
916 }
|
Chris@169
|
917
|
Chris@169
|
918 if (i > partStart + partThreshold && shift != partShift) {
|
Chris@169
|
919
|
Chris@169
|
920 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
|
Chris@169
|
921
|
Chris@169
|
922 // pitch has changed, emit an intermediate note
|
Chris@252
|
923 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
924 i,
|
Chris@252
|
925 note,
|
Chris@252
|
926 partShift,
|
Chris@252
|
927 shiftCount,
|
Chris@252
|
928 partVelocity));
|
Chris@169
|
929 partStart = i;
|
Chris@169
|
930 partShift = shift;
|
Chris@169
|
931 partVelocity = 0;
|
Chris@169
|
932 }
|
Chris@169
|
933 }
|
Chris@169
|
934
|
Chris@303
|
935 int v;
|
Chris@303
|
936 if (m_mode == LiveMode) {
|
Chris@303
|
937 v = round(strength * 30);
|
Chris@303
|
938 } else {
|
Chris@303
|
939 v = round(strength * 2);
|
Chris@303
|
940 }
|
Chris@169
|
941 if (v > partVelocity) {
|
Chris@169
|
942 partVelocity = v;
|
Chris@169
|
943 }
|
Chris@169
|
944 }
|
Chris@169
|
945
|
Chris@169
|
946 if (end >= partStart + partThreshold) {
|
Chris@252
|
947 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
948 end,
|
Chris@252
|
949 note,
|
Chris@252
|
950 partShift,
|
Chris@252
|
951 shiftCount,
|
Chris@252
|
952 partVelocity));
|
Chris@169
|
953 }
|
Chris@169
|
954 }
|
Chris@252
|
955
|
Chris@252
|
956 Silvet::Feature
|
Chris@252
|
957 Silvet::makeNoteFeature(int start,
|
Chris@252
|
958 int end,
|
Chris@252
|
959 int note,
|
Chris@252
|
960 int shift,
|
Chris@252
|
961 int shiftCount,
|
Chris@252
|
962 int velocity)
|
Chris@252
|
963 {
|
Chris@252
|
964 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@252
|
965 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
|
Chris@252
|
966
|
Chris@252
|
967 Feature f;
|
Chris@252
|
968
|
Chris@252
|
969 f.hasTimestamp = true;
|
Chris@285
|
970 f.timestamp = m_startTime + RealTime::fromSeconds
|
Chris@252
|
971 (columnDuration * (start - postFilterLatency) + 0.02);
|
Chris@252
|
972
|
Chris@252
|
973 f.hasDuration = true;
|
Chris@252
|
974 f.duration = RealTime::fromSeconds
|
Chris@252
|
975 (columnDuration * (end - start));
|
Chris@252
|
976
|
Chris@252
|
977 f.values.clear();
|
Chris@252
|
978
|
Chris@252
|
979 f.values.push_back
|
Chris@252
|
980 (noteFrequency(note, shift, shiftCount));
|
Chris@252
|
981
|
Chris@252
|
982 float inputGain = getInputGainAt(f.timestamp);
|
Chris@252
|
983 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
|
Chris@252
|
984 velocity = round(velocity / inputGain);
|
Chris@252
|
985 if (velocity > 127) velocity = 127;
|
Chris@252
|
986 if (velocity < 1) velocity = 1;
|
Chris@252
|
987 f.values.push_back(velocity);
|
Chris@252
|
988
|
Chris@252
|
989 f.label = noteName(note, shift, shiftCount);
|
Chris@252
|
990
|
Chris@252
|
991 return f;
|
Chris@252
|
992 }
|
Chris@252
|
993
|
Chris@252
|
994 float
|
Chris@252
|
995 Silvet::getInputGainAt(RealTime t)
|
Chris@252
|
996 {
|
Chris@252
|
997 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
|
Chris@252
|
998
|
Chris@252
|
999 if (i == m_inputGains.end()) {
|
Chris@252
|
1000 if (i != m_inputGains.begin()) {
|
Chris@252
|
1001 --i;
|
Chris@252
|
1002 } else {
|
Chris@252
|
1003 return 1.f; // no data
|
Chris@252
|
1004 }
|
Chris@252
|
1005 }
|
Chris@252
|
1006
|
Chris@252
|
1007 // cerr << "gain at time " << t << " = " << i->second << endl;
|
Chris@252
|
1008
|
Chris@252
|
1009 return i->second;
|
Chris@252
|
1010 }
|
Chris@252
|
1011
|