Chris@31
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@31
|
2
|
Chris@31
|
3 /*
|
Chris@31
|
4 Silvet
|
Chris@31
|
5
|
Chris@31
|
6 A Vamp plugin for note transcription.
|
Chris@31
|
7 Centre for Digital Music, Queen Mary University of London.
|
Chris@31
|
8
|
Chris@31
|
9 This program is free software; you can redistribute it and/or
|
Chris@31
|
10 modify it under the terms of the GNU General Public License as
|
Chris@31
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@31
|
12 License, or (at your option) any later version. See the file
|
Chris@31
|
13 COPYING included with this distribution for more information.
|
Chris@31
|
14 */
|
Chris@31
|
15
|
Chris@31
|
16 #include "Silvet.h"
|
Chris@34
|
17 #include "EM.h"
|
Chris@31
|
18
|
Chris@152
|
19 #include <cq/CQSpectrogram.h>
|
Chris@31
|
20
|
Chris@152
|
21 #include "MedianFilter.h"
|
Chris@152
|
22 #include "constant-q-cpp/src/dsp/Resampler.h"
|
Chris@246
|
23 #include "flattendynamics-ladspa.h"
|
Chris@298
|
24 #include "LiveInstruments.h"
|
Chris@31
|
25
|
Chris@31
|
26 #include <vector>
|
Chris@31
|
27
|
Chris@32
|
28 #include <cstdio>
|
Chris@32
|
29
|
Chris@31
|
30 using std::vector;
|
Chris@48
|
31 using std::cout;
|
Chris@31
|
32 using std::cerr;
|
Chris@31
|
33 using std::endl;
|
Chris@40
|
34 using Vamp::RealTime;
|
Chris@31
|
35
|
Chris@31
|
36 static int processingSampleRate = 44100;
|
Chris@298
|
37
|
Chris@298
|
38 static int binsPerSemitoneLive = 1;
|
Chris@298
|
39 static int binsPerSemitoneNormal = 5;
|
Chris@170
|
40
|
Chris@272
|
41 static int minInputSampleRate = 100;
|
Chris@272
|
42 static int maxInputSampleRate = 192000;
|
Chris@272
|
43
|
Chris@31
|
44 Silvet::Silvet(float inputSampleRate) :
|
Chris@31
|
45 Plugin(inputSampleRate),
|
Chris@161
|
46 m_instruments(InstrumentPack::listInstrumentPacks()),
|
Chris@298
|
47 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
|
Chris@31
|
48 m_resampler(0),
|
Chris@246
|
49 m_flattener(0),
|
Chris@110
|
50 m_cq(0),
|
Chris@297
|
51 m_mode(HighQualityMode),
|
Chris@166
|
52 m_fineTuning(false),
|
Chris@178
|
53 m_instrument(0),
|
Chris@178
|
54 m_colsPerSec(50)
|
Chris@31
|
55 {
|
Chris@31
|
56 }
|
Chris@31
|
57
|
Chris@31
|
58 Silvet::~Silvet()
|
Chris@31
|
59 {
|
Chris@31
|
60 delete m_resampler;
|
Chris@246
|
61 delete m_flattener;
|
Chris@31
|
62 delete m_cq;
|
Chris@41
|
63 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
64 delete m_postFilter[i];
|
Chris@41
|
65 }
|
Chris@31
|
66 }
|
Chris@31
|
67
|
Chris@31
|
68 string
|
Chris@31
|
69 Silvet::getIdentifier() const
|
Chris@31
|
70 {
|
Chris@31
|
71 return "silvet";
|
Chris@31
|
72 }
|
Chris@31
|
73
|
Chris@31
|
74 string
|
Chris@31
|
75 Silvet::getName() const
|
Chris@31
|
76 {
|
Chris@31
|
77 return "Silvet Note Transcription";
|
Chris@31
|
78 }
|
Chris@31
|
79
|
Chris@31
|
80 string
|
Chris@31
|
81 Silvet::getDescription() const
|
Chris@31
|
82 {
|
Chris@191
|
83 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
|
Chris@31
|
84 }
|
Chris@31
|
85
|
Chris@31
|
86 string
|
Chris@31
|
87 Silvet::getMaker() const
|
Chris@31
|
88 {
|
Chris@191
|
89 return "Queen Mary, University of London";
|
Chris@31
|
90 }
|
Chris@31
|
91
|
Chris@31
|
92 int
|
Chris@31
|
93 Silvet::getPluginVersion() const
|
Chris@31
|
94 {
|
Chris@295
|
95 return 2;
|
Chris@31
|
96 }
|
Chris@31
|
97
|
Chris@31
|
98 string
|
Chris@31
|
99 Silvet::getCopyright() const
|
Chris@31
|
100 {
|
Chris@191
|
101 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
|
Chris@31
|
102 }
|
Chris@31
|
103
|
Chris@31
|
104 Silvet::InputDomain
|
Chris@31
|
105 Silvet::getInputDomain() const
|
Chris@31
|
106 {
|
Chris@31
|
107 return TimeDomain;
|
Chris@31
|
108 }
|
Chris@31
|
109
|
Chris@31
|
110 size_t
|
Chris@31
|
111 Silvet::getPreferredBlockSize() const
|
Chris@31
|
112 {
|
Chris@31
|
113 return 0;
|
Chris@31
|
114 }
|
Chris@31
|
115
|
Chris@31
|
116 size_t
|
Chris@31
|
117 Silvet::getPreferredStepSize() const
|
Chris@31
|
118 {
|
Chris@31
|
119 return 0;
|
Chris@31
|
120 }
|
Chris@31
|
121
|
Chris@31
|
122 size_t
|
Chris@31
|
123 Silvet::getMinChannelCount() const
|
Chris@31
|
124 {
|
Chris@31
|
125 return 1;
|
Chris@31
|
126 }
|
Chris@31
|
127
|
Chris@31
|
128 size_t
|
Chris@31
|
129 Silvet::getMaxChannelCount() const
|
Chris@31
|
130 {
|
Chris@31
|
131 return 1;
|
Chris@31
|
132 }
|
Chris@31
|
133
|
Chris@31
|
134 Silvet::ParameterList
|
Chris@31
|
135 Silvet::getParameterDescriptors() const
|
Chris@31
|
136 {
|
Chris@31
|
137 ParameterList list;
|
Chris@110
|
138
|
Chris@110
|
139 ParameterDescriptor desc;
|
Chris@110
|
140 desc.identifier = "mode";
|
Chris@110
|
141 desc.name = "Processing mode";
|
Chris@110
|
142 desc.unit = "";
|
Chris@297
|
143 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
|
Chris@110
|
144 desc.minValue = 0;
|
Chris@297
|
145 desc.maxValue = 2;
|
Chris@113
|
146 desc.defaultValue = 1;
|
Chris@110
|
147 desc.isQuantized = true;
|
Chris@110
|
148 desc.quantizeStep = 1;
|
Chris@166
|
149 desc.valueNames.push_back("Draft (faster)");
|
Chris@165
|
150 desc.valueNames.push_back("Intensive (higher quality)");
|
Chris@297
|
151 desc.valueNames.push_back("Live (lower latency)");
|
Chris@161
|
152 list.push_back(desc);
|
Chris@161
|
153
|
Chris@176
|
154 desc.identifier = "instrument";
|
Chris@176
|
155 desc.name = "Instrument";
|
Chris@161
|
156 desc.unit = "";
|
Chris@271
|
157 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
|
Chris@161
|
158 desc.minValue = 0;
|
Chris@162
|
159 desc.maxValue = m_instruments.size()-1;
|
Chris@162
|
160 desc.defaultValue = 0;
|
Chris@161
|
161 desc.isQuantized = true;
|
Chris@161
|
162 desc.quantizeStep = 1;
|
Chris@161
|
163 desc.valueNames.clear();
|
Chris@162
|
164 for (int i = 0; i < int(m_instruments.size()); ++i) {
|
Chris@162
|
165 desc.valueNames.push_back(m_instruments[i].name);
|
Chris@162
|
166 }
|
Chris@166
|
167 list.push_back(desc);
|
Chris@161
|
168
|
Chris@166
|
169 desc.identifier = "finetune";
|
Chris@166
|
170 desc.name = "Return fine pitch estimates";
|
Chris@166
|
171 desc.unit = "";
|
Chris@271
|
172 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
|
Chris@166
|
173 desc.minValue = 0;
|
Chris@166
|
174 desc.maxValue = 1;
|
Chris@166
|
175 desc.defaultValue = 0;
|
Chris@166
|
176 desc.isQuantized = true;
|
Chris@166
|
177 desc.quantizeStep = 1;
|
Chris@166
|
178 desc.valueNames.clear();
|
Chris@110
|
179 list.push_back(desc);
|
Chris@110
|
180
|
Chris@31
|
181 return list;
|
Chris@31
|
182 }
|
Chris@31
|
183
|
Chris@31
|
184 float
|
Chris@31
|
185 Silvet::getParameter(string identifier) const
|
Chris@31
|
186 {
|
Chris@110
|
187 if (identifier == "mode") {
|
Chris@297
|
188 return (float)(int)m_mode;
|
Chris@166
|
189 } else if (identifier == "finetune") {
|
Chris@166
|
190 return m_fineTuning ? 1.f : 0.f;
|
Chris@176
|
191 } else if (identifier == "instrument") {
|
Chris@162
|
192 return m_instrument;
|
Chris@110
|
193 }
|
Chris@31
|
194 return 0;
|
Chris@31
|
195 }
|
Chris@31
|
196
|
Chris@31
|
197 void
|
Chris@31
|
198 Silvet::setParameter(string identifier, float value)
|
Chris@31
|
199 {
|
Chris@110
|
200 if (identifier == "mode") {
|
Chris@297
|
201 m_mode = (ProcessingMode)(int)(value + 0.5);
|
Chris@166
|
202 } else if (identifier == "finetune") {
|
Chris@166
|
203 m_fineTuning = (value > 0.5);
|
Chris@176
|
204 } else if (identifier == "instrument") {
|
Chris@162
|
205 m_instrument = lrintf(value);
|
Chris@110
|
206 }
|
Chris@31
|
207 }
|
Chris@31
|
208
|
Chris@31
|
209 Silvet::ProgramList
|
Chris@31
|
210 Silvet::getPrograms() const
|
Chris@31
|
211 {
|
Chris@31
|
212 ProgramList list;
|
Chris@31
|
213 return list;
|
Chris@31
|
214 }
|
Chris@31
|
215
|
Chris@31
|
216 string
|
Chris@31
|
217 Silvet::getCurrentProgram() const
|
Chris@31
|
218 {
|
Chris@31
|
219 return "";
|
Chris@31
|
220 }
|
Chris@31
|
221
|
Chris@31
|
222 void
|
Chris@31
|
223 Silvet::selectProgram(string name)
|
Chris@31
|
224 {
|
Chris@31
|
225 }
|
Chris@31
|
226
|
Chris@31
|
227 Silvet::OutputList
|
Chris@31
|
228 Silvet::getOutputDescriptors() const
|
Chris@31
|
229 {
|
Chris@31
|
230 OutputList list;
|
Chris@31
|
231
|
Chris@31
|
232 OutputDescriptor d;
|
Chris@51
|
233 d.identifier = "notes";
|
Chris@51
|
234 d.name = "Note transcription";
|
Chris@271
|
235 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
|
Chris@41
|
236 d.unit = "Hz";
|
Chris@31
|
237 d.hasFixedBinCount = true;
|
Chris@31
|
238 d.binCount = 2;
|
Chris@41
|
239 d.binNames.push_back("Frequency");
|
Chris@31
|
240 d.binNames.push_back("Velocity");
|
Chris@31
|
241 d.hasKnownExtents = false;
|
Chris@31
|
242 d.isQuantized = false;
|
Chris@31
|
243 d.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@246
|
244 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
|
Chris@31
|
245 d.hasDuration = true;
|
Chris@32
|
246 m_notesOutputNo = list.size();
|
Chris@32
|
247 list.push_back(d);
|
Chris@32
|
248
|
Chris@178
|
249 d.identifier = "timefreq";
|
Chris@178
|
250 d.name = "Time-frequency distribution";
|
Chris@271
|
251 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
|
Chris@178
|
252 d.unit = "";
|
Chris@178
|
253 d.hasFixedBinCount = true;
|
Chris@298
|
254 d.binCount = getPack(0).templateHeight;
|
Chris@178
|
255 d.binNames.clear();
|
Chris@178
|
256 if (m_cq) {
|
Chris@294
|
257 char name[50];
|
Chris@298
|
258 for (int i = 0; i < getPack(0).templateHeight; ++i) {
|
Chris@178
|
259 // We have a 600-bin (10 oct 60-bin CQ) of which the
|
Chris@178
|
260 // lowest-frequency 55 bins have been dropped, for a
|
Chris@178
|
261 // 545-bin template. The native CQ bins go high->low
|
Chris@178
|
262 // frequency though, so these are still the first 545 bins
|
Chris@178
|
263 // as reported by getBinFrequency, though in reverse order
|
Chris@178
|
264 float freq = m_cq->getBinFrequency
|
Chris@298
|
265 (getPack(0).templateHeight - i - 1);
|
Chris@178
|
266 sprintf(name, "%.1f Hz", freq);
|
Chris@178
|
267 d.binNames.push_back(name);
|
Chris@178
|
268 }
|
Chris@178
|
269 }
|
Chris@178
|
270 d.hasKnownExtents = false;
|
Chris@178
|
271 d.isQuantized = false;
|
Chris@178
|
272 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@178
|
273 d.sampleRate = m_colsPerSec;
|
Chris@178
|
274 d.hasDuration = false;
|
Chris@178
|
275 m_fcqOutputNo = list.size();
|
Chris@178
|
276 list.push_back(d);
|
Chris@178
|
277
|
Chris@294
|
278 d.identifier = "pitchactivation";
|
Chris@294
|
279 d.name = "Pitch activation distribution";
|
Chris@294
|
280 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
|
Chris@294
|
281 d.unit = "";
|
Chris@294
|
282 d.hasFixedBinCount = true;
|
Chris@298
|
283 d.binCount = getPack(0).templateNoteCount;
|
Chris@294
|
284 d.binNames.clear();
|
Chris@294
|
285 if (m_cq) {
|
Chris@298
|
286 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
|
Chris@294
|
287 d.binNames.push_back(noteName(i, 0, 1));
|
Chris@294
|
288 }
|
Chris@294
|
289 }
|
Chris@294
|
290 d.hasKnownExtents = false;
|
Chris@294
|
291 d.isQuantized = false;
|
Chris@294
|
292 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@294
|
293 d.sampleRate = m_colsPerSec;
|
Chris@294
|
294 d.hasDuration = false;
|
Chris@294
|
295 m_pitchOutputNo = list.size();
|
Chris@294
|
296 list.push_back(d);
|
Chris@294
|
297
|
Chris@302
|
298 d.identifier = "templates";
|
Chris@302
|
299 d.name = "Templates";
|
Chris@302
|
300 d.description = "Constant-Q spectral templates for the selected instrument pack.";
|
Chris@302
|
301 d.unit = "";
|
Chris@302
|
302 d.hasFixedBinCount = true;
|
Chris@302
|
303 d.binCount = getPack(0).templateHeight;
|
Chris@302
|
304 d.binNames.clear();
|
Chris@302
|
305 if (m_cq) {
|
Chris@302
|
306 char name[50];
|
Chris@302
|
307 for (int i = 0; i < getPack(0).templateHeight; ++i) {
|
Chris@302
|
308 // We have a 600-bin (10 oct 60-bin CQ) of which the
|
Chris@302
|
309 // lowest-frequency 55 bins have been dropped, for a
|
Chris@302
|
310 // 545-bin template. The native CQ bins go high->low
|
Chris@302
|
311 // frequency though, so these are still the first 545 bins
|
Chris@302
|
312 // as reported by getBinFrequency, though in reverse order
|
Chris@302
|
313 float freq = m_cq->getBinFrequency
|
Chris@302
|
314 (getPack(0).templateHeight - i - 1);
|
Chris@302
|
315 sprintf(name, "%.1f Hz", freq);
|
Chris@302
|
316 d.binNames.push_back(name);
|
Chris@302
|
317 }
|
Chris@302
|
318 }
|
Chris@302
|
319 d.hasKnownExtents = false;
|
Chris@302
|
320 d.isQuantized = false;
|
Chris@302
|
321 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@302
|
322 d.sampleRate = m_colsPerSec;
|
Chris@302
|
323 d.hasDuration = false;
|
Chris@302
|
324 m_templateOutputNo = list.size();
|
Chris@302
|
325 list.push_back(d);
|
Chris@302
|
326
|
Chris@31
|
327 return list;
|
Chris@31
|
328 }
|
Chris@31
|
329
|
Chris@38
|
330 std::string
|
Chris@175
|
331 Silvet::noteName(int note, int shift, int shiftCount) const
|
Chris@38
|
332 {
|
Chris@38
|
333 static const char *names[] = {
|
Chris@38
|
334 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
|
Chris@38
|
335 };
|
Chris@38
|
336
|
Chris@175
|
337 const char *n = names[note % 12];
|
Chris@38
|
338
|
Chris@175
|
339 int oct = (note + 9) / 12;
|
Chris@38
|
340
|
Chris@175
|
341 char buf[30];
|
Chris@175
|
342
|
Chris@175
|
343 float pshift = 0.f;
|
Chris@175
|
344 if (shiftCount > 1) {
|
Chris@175
|
345 // see noteFrequency below
|
Chris@175
|
346 pshift =
|
Chris@175
|
347 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
348 }
|
Chris@175
|
349
|
Chris@175
|
350 if (pshift > 0.f) {
|
Chris@175
|
351 sprintf(buf, "%s%d+%dc", n, oct, int(round(pshift * 100)));
|
Chris@175
|
352 } else if (pshift < 0.f) {
|
Chris@175
|
353 sprintf(buf, "%s%d-%dc", n, oct, int(round((-pshift) * 100)));
|
Chris@175
|
354 } else {
|
Chris@175
|
355 sprintf(buf, "%s%d", n, oct);
|
Chris@175
|
356 }
|
Chris@38
|
357
|
Chris@38
|
358 return buf;
|
Chris@38
|
359 }
|
Chris@38
|
360
|
Chris@41
|
361 float
|
Chris@168
|
362 Silvet::noteFrequency(int note, int shift, int shiftCount) const
|
Chris@41
|
363 {
|
Chris@169
|
364 // Convert shift number to a pitch shift. The given shift number
|
Chris@169
|
365 // is an offset into the template array, which starts with some
|
Chris@169
|
366 // zeros, followed by the template, then some trailing zeros.
|
Chris@169
|
367 //
|
Chris@169
|
368 // Example: if we have templateMaxShift == 2 and thus shiftCount
|
Chris@169
|
369 // == 5, then the number will be in the range 0-4 and the template
|
Chris@169
|
370 // will have 2 zeros at either end. Thus number 2 represents the
|
Chris@169
|
371 // template "as recorded", for a pitch shift of 0; smaller indices
|
Chris@169
|
372 // represent moving the template *up* in pitch (by introducing
|
Chris@169
|
373 // zeros at the start, which is the low-frequency end), for a
|
Chris@169
|
374 // positive pitch shift; and higher values represent moving it
|
Chris@169
|
375 // down in pitch, for a negative pitch shift.
|
Chris@169
|
376
|
Chris@175
|
377 float pshift = 0.f;
|
Chris@175
|
378 if (shiftCount > 1) {
|
Chris@175
|
379 pshift =
|
Chris@175
|
380 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
381 }
|
Chris@169
|
382
|
Chris@301
|
383 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
|
Chris@301
|
384
|
Chris@301
|
385 cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
|
Chris@301
|
386 << shiftCount << ", obtained freq = " << freq << endl;
|
Chris@301
|
387
|
Chris@301
|
388 return freq;
|
Chris@41
|
389 }
|
Chris@41
|
390
|
Chris@31
|
391 bool
|
Chris@31
|
392 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@31
|
393 {
|
Chris@272
|
394 if (m_inputSampleRate < minInputSampleRate ||
|
Chris@272
|
395 m_inputSampleRate > maxInputSampleRate) {
|
Chris@272
|
396 cerr << "Silvet::initialise: Unsupported input sample rate "
|
Chris@272
|
397 << m_inputSampleRate << " (supported min " << minInputSampleRate
|
Chris@272
|
398 << ", max " << maxInputSampleRate << ")" << endl;
|
Chris@272
|
399 return false;
|
Chris@272
|
400 }
|
Chris@272
|
401
|
Chris@31
|
402 if (channels < getMinChannelCount() ||
|
Chris@272
|
403 channels > getMaxChannelCount()) {
|
Chris@272
|
404 cerr << "Silvet::initialise: Unsupported channel count " << channels
|
Chris@272
|
405 << " (supported min " << getMinChannelCount() << ", max "
|
Chris@272
|
406 << getMaxChannelCount() << ")" << endl;
|
Chris@272
|
407 return false;
|
Chris@272
|
408 }
|
Chris@31
|
409
|
Chris@31
|
410 if (stepSize != blockSize) {
|
Chris@31
|
411 cerr << "Silvet::initialise: Step size must be the same as block size ("
|
Chris@31
|
412 << stepSize << " != " << blockSize << ")" << endl;
|
Chris@31
|
413 return false;
|
Chris@31
|
414 }
|
Chris@31
|
415
|
Chris@31
|
416 m_blockSize = blockSize;
|
Chris@31
|
417
|
Chris@31
|
418 reset();
|
Chris@31
|
419
|
Chris@31
|
420 return true;
|
Chris@31
|
421 }
|
Chris@31
|
422
|
Chris@31
|
423 void
|
Chris@31
|
424 Silvet::reset()
|
Chris@31
|
425 {
|
Chris@31
|
426 delete m_resampler;
|
Chris@246
|
427 delete m_flattener;
|
Chris@31
|
428 delete m_cq;
|
Chris@31
|
429
|
Chris@31
|
430 if (m_inputSampleRate != processingSampleRate) {
|
Chris@31
|
431 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
|
Chris@31
|
432 } else {
|
Chris@31
|
433 m_resampler = 0;
|
Chris@31
|
434 }
|
Chris@31
|
435
|
Chris@246
|
436 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
|
Chris@246
|
437 m_flattener->reset();
|
Chris@246
|
438
|
Chris@301
|
439 // this happens to be processingSampleRate / 3, and is the top
|
Chris@301
|
440 // freq used for the EM templates:
|
Chris@301
|
441 double maxFreq = 14700;
|
Chris@301
|
442
|
Chris@301
|
443 if (m_mode == LiveMode) {
|
Chris@301
|
444 // We only have 12 bpo rather than 60, so we need the top bin
|
Chris@301
|
445 // to be the middle one of the top 5, i.e. 2/5 of a semitone
|
Chris@301
|
446 // lower than 14700
|
Chris@301
|
447 maxFreq *= powf(2.0, -1.0 / 30.0);
|
Chris@301
|
448 }
|
Chris@301
|
449
|
Chris@173
|
450 double minFreq = 27.5;
|
Chris@173
|
451
|
Chris@297
|
452 if (m_mode != HighQualityMode) {
|
Chris@173
|
453 // We don't actually return any notes from the bottom octave,
|
Chris@173
|
454 // so we can just pad with zeros
|
Chris@173
|
455 minFreq *= 2;
|
Chris@173
|
456 }
|
Chris@173
|
457
|
Chris@298
|
458 int bpo = 12 *
|
Chris@298
|
459 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
|
Chris@301
|
460
|
Chris@154
|
461 CQParameters params(processingSampleRate,
|
Chris@173
|
462 minFreq,
|
Chris@154
|
463 processingSampleRate / 3,
|
Chris@298
|
464 bpo);
|
Chris@154
|
465
|
Chris@155
|
466 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
|
Chris@155
|
467 // drops the FFT size to 512 from 1024 and alters
|
Chris@155
|
468 // some other processing parameters, making
|
Chris@155
|
469 // everything much, much slower. Could be a flaw
|
Chris@155
|
470 // in the CQ parameter calculations, must check
|
Chris@154
|
471 params.atomHopFactor = 0.3;
|
Chris@154
|
472 params.threshold = 0.0005;
|
Chris@172
|
473 params.window = CQParameters::Hann;
|
Chris@154
|
474
|
Chris@154
|
475 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
|
Chris@31
|
476
|
Chris@301
|
477 cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
|
Chris@301
|
478 cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
|
Chris@297
|
479
|
Chris@297
|
480 m_colsPerSec = (m_mode == DraftMode ? 25 : 50);
|
Chris@165
|
481
|
Chris@41
|
482 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
483 delete m_postFilter[i];
|
Chris@41
|
484 }
|
Chris@41
|
485 m_postFilter.clear();
|
Chris@298
|
486 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
|
Chris@41
|
487 m_postFilter.push_back(new MedianFilter<double>(3));
|
Chris@41
|
488 }
|
Chris@41
|
489 m_pianoRoll.clear();
|
Chris@246
|
490 m_inputGains.clear();
|
Chris@32
|
491 m_columnCount = 0;
|
Chris@272
|
492 m_resampledCount = 0;
|
Chris@40
|
493 m_startTime = RealTime::zeroTime;
|
Chris@31
|
494 }
|
Chris@31
|
495
|
Chris@31
|
496 Silvet::FeatureSet
|
Chris@31
|
497 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
Chris@31
|
498 {
|
Chris@302
|
499 FeatureSet fs;
|
Chris@302
|
500
|
Chris@40
|
501 if (m_columnCount == 0) {
|
Chris@40
|
502 m_startTime = timestamp;
|
Chris@302
|
503 insertTemplateFeatures(fs);
|
Chris@40
|
504 }
|
Chris@246
|
505
|
Chris@246
|
506 vector<float> flattened(m_blockSize);
|
Chris@246
|
507 float gain = 1.f;
|
Chris@246
|
508 m_flattener->connectInputPort
|
Chris@246
|
509 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
|
Chris@246
|
510 m_flattener->connectOutputPort
|
Chris@246
|
511 (FlattenDynamics::AudioOutputPort, &flattened[0]);
|
Chris@246
|
512 m_flattener->connectOutputPort
|
Chris@246
|
513 (FlattenDynamics::GainOutputPort, &gain);
|
Chris@246
|
514 m_flattener->process(m_blockSize);
|
Chris@246
|
515
|
Chris@252
|
516 m_inputGains[timestamp] = gain;
|
Chris@40
|
517
|
Chris@31
|
518 vector<double> data;
|
Chris@40
|
519 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@246
|
520 double d = flattened[i];
|
Chris@235
|
521 data.push_back(d);
|
Chris@40
|
522 }
|
Chris@31
|
523
|
Chris@31
|
524 if (m_resampler) {
|
Chris@272
|
525
|
Chris@31
|
526 data = m_resampler->process(data.data(), data.size());
|
Chris@272
|
527
|
Chris@272
|
528 int hadCount = m_resampledCount;
|
Chris@272
|
529 m_resampledCount += data.size();
|
Chris@272
|
530
|
Chris@272
|
531 int resamplerLatency = m_resampler->getLatency();
|
Chris@272
|
532
|
Chris@272
|
533 if (hadCount < resamplerLatency) {
|
Chris@272
|
534 int stillToDrop = resamplerLatency - hadCount;
|
Chris@272
|
535 if (stillToDrop >= int(data.size())) {
|
Chris@302
|
536 return fs;
|
Chris@272
|
537 } else {
|
Chris@272
|
538 data = vector<double>(data.begin() + stillToDrop, data.end());
|
Chris@272
|
539 }
|
Chris@272
|
540 }
|
Chris@31
|
541 }
|
Chris@272
|
542
|
Chris@32
|
543 Grid cqout = m_cq->process(data);
|
Chris@302
|
544 transcribe(cqout, fs);
|
Chris@51
|
545 return fs;
|
Chris@34
|
546 }
|
Chris@34
|
547
|
Chris@34
|
548 Silvet::FeatureSet
|
Chris@34
|
549 Silvet::getRemainingFeatures()
|
Chris@34
|
550 {
|
Chris@145
|
551 Grid cqout = m_cq->getRemainingOutput();
|
Chris@302
|
552 FeatureSet fs;
|
Chris@302
|
553 if (m_columnCount == 0) {
|
Chris@302
|
554 // process() was never called, but we still want these
|
Chris@302
|
555 insertTemplateFeatures(fs);
|
Chris@302
|
556 } else {
|
Chris@302
|
557 transcribe(cqout, fs);
|
Chris@302
|
558 }
|
Chris@51
|
559 return fs;
|
Chris@34
|
560 }
|
Chris@34
|
561
|
Chris@302
|
562 void
|
Chris@302
|
563 Silvet::insertTemplateFeatures(FeatureSet &fs)
|
Chris@302
|
564 {
|
Chris@302
|
565 const InstrumentPack &pack = getPack(m_instrument);
|
Chris@302
|
566 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
|
Chris@302
|
567 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
|
Chris@302
|
568 Feature f;
|
Chris@302
|
569 char buffer[50];
|
Chris@302
|
570 sprintf(buffer, "Note %d", i + 1);
|
Chris@302
|
571 f.label = buffer;
|
Chris@302
|
572 f.hasTimestamp = true;
|
Chris@302
|
573 f.timestamp = timestamp;
|
Chris@302
|
574 f.values = pack.templates[i / pack.templateNoteCount]
|
Chris@302
|
575 .data[i % pack.templateNoteCount];
|
Chris@302
|
576 fs[m_templateOutputNo].push_back(f);
|
Chris@302
|
577 }
|
Chris@302
|
578 }
|
Chris@302
|
579
|
Chris@302
|
580 void
|
Chris@302
|
581 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
|
Chris@34
|
582 {
|
Chris@32
|
583 Grid filtered = preProcess(cqout);
|
Chris@31
|
584
|
Chris@302
|
585 if (filtered.empty()) return;
|
Chris@170
|
586
|
Chris@298
|
587 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@104
|
588
|
Chris@178
|
589 for (int i = 0; i < (int)filtered.size(); ++i) {
|
Chris@178
|
590 Feature f;
|
Chris@178
|
591 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@178
|
592 f.values.push_back(float(filtered[i][j]));
|
Chris@178
|
593 }
|
Chris@178
|
594 fs[m_fcqOutputNo].push_back(f);
|
Chris@178
|
595 }
|
Chris@178
|
596
|
Chris@34
|
597 int width = filtered.size();
|
Chris@34
|
598
|
Chris@297
|
599 int iterations = (m_mode == HighQualityMode ? 20 : 10);
|
Chris@34
|
600
|
Chris@176
|
601 Grid localPitches(width, vector<double>(pack.templateNoteCount, 0.0));
|
Chris@170
|
602
|
Chris@297
|
603 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
|
Chris@170
|
604 int shiftCount = 1;
|
Chris@170
|
605 if (wantShifts) {
|
Chris@170
|
606 shiftCount = pack.templateMaxShift * 2 + 1;
|
Chris@170
|
607 }
|
Chris@170
|
608
|
Chris@170
|
609 vector<vector<int> > localBestShifts;
|
Chris@170
|
610 if (wantShifts) {
|
Chris@170
|
611 localBestShifts =
|
Chris@176
|
612 vector<vector<int> >(width, vector<int>(pack.templateNoteCount, 0));
|
Chris@170
|
613 }
|
Chris@170
|
614
|
Chris@170
|
615 vector<bool> present(width, false);
|
Chris@37
|
616
|
Chris@123
|
617 #pragma omp parallel for
|
Chris@123
|
618 for (int i = 0; i < width; ++i) {
|
Chris@104
|
619
|
Chris@170
|
620 double sum = 0.0;
|
Chris@176
|
621 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@170
|
622 sum += filtered.at(i).at(j);
|
Chris@170
|
623 }
|
Chris@170
|
624 if (sum < 1e-5) continue;
|
Chris@170
|
625
|
Chris@170
|
626 present[i] = true;
|
Chris@170
|
627
|
Chris@297
|
628 EM em(&pack, m_mode == HighQualityMode);
|
Chris@170
|
629
|
Chris@183
|
630 em.setPitchSparsity(pack.pitchSparsity);
|
Chris@213
|
631 em.setSourceSparsity(pack.sourceSparsity);
|
Chris@183
|
632
|
Chris@170
|
633 for (int j = 0; j < iterations; ++j) {
|
Chris@170
|
634 em.iterate(filtered.at(i).data());
|
Chris@37
|
635 }
|
Chris@37
|
636
|
Chris@170
|
637 const float *pitchDist = em.getPitchDistribution();
|
Chris@170
|
638 const float *const *shiftDist = em.getShifts();
|
Chris@37
|
639
|
Chris@176
|
640 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@104
|
641
|
Chris@170
|
642 localPitches[i][j] = pitchDist[j] * sum;
|
Chris@170
|
643
|
Chris@170
|
644 int bestShift = 0;
|
Chris@179
|
645 float bestShiftValue = 0.0;
|
Chris@170
|
646 if (wantShifts) {
|
Chris@170
|
647 for (int k = 0; k < shiftCount; ++k) {
|
Chris@179
|
648 float value = shiftDist[k][j];
|
Chris@179
|
649 if (k == 0 || value > bestShiftValue) {
|
Chris@179
|
650 bestShiftValue = value;
|
Chris@170
|
651 bestShift = k;
|
Chris@170
|
652 }
|
Chris@170
|
653 }
|
Chris@170
|
654 localBestShifts[i][j] = bestShift;
|
Chris@170
|
655 }
|
Chris@123
|
656 }
|
Chris@123
|
657 }
|
Chris@166
|
658
|
Chris@166
|
659 for (int i = 0; i < width; ++i) {
|
Chris@37
|
660
|
Chris@170
|
661 if (!present[i]) {
|
Chris@170
|
662 // silent column
|
Chris@176
|
663 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@170
|
664 m_postFilter[j]->push(0.0);
|
Chris@170
|
665 }
|
Chris@168
|
666 m_pianoRoll.push_back(map<int, double>());
|
Chris@170
|
667 if (wantShifts) {
|
Chris@168
|
668 m_pianoRollShifts.push_back(map<int, int>());
|
Chris@168
|
669 }
|
Chris@166
|
670 continue;
|
Chris@166
|
671 }
|
Chris@166
|
672
|
Chris@294
|
673 vector<double> filtered = postProcess
|
Chris@294
|
674 (localPitches[i], localBestShifts[i], wantShifts);
|
Chris@294
|
675
|
Chris@294
|
676 Feature f;
|
Chris@294
|
677 for (int j = 0; j < (int)filtered.size(); ++j) {
|
Chris@294
|
678 float v(filtered[j]);
|
Chris@294
|
679 if (v < pack.levelThreshold) v = 0.f;
|
Chris@294
|
680 f.values.push_back(v);
|
Chris@294
|
681 }
|
Chris@294
|
682 fs[m_pitchOutputNo].push_back(f);
|
Chris@166
|
683
|
Chris@168
|
684 FeatureList noteFeatures = noteTrack(shiftCount);
|
Chris@38
|
685
|
Chris@123
|
686 for (FeatureList::const_iterator fi = noteFeatures.begin();
|
Chris@123
|
687 fi != noteFeatures.end(); ++fi) {
|
Chris@123
|
688 fs[m_notesOutputNo].push_back(*fi);
|
Chris@40
|
689 }
|
Chris@34
|
690 }
|
Chris@31
|
691 }
|
Chris@31
|
692
|
Chris@32
|
693 Silvet::Grid
|
Chris@32
|
694 Silvet::preProcess(const Grid &in)
|
Chris@32
|
695 {
|
Chris@32
|
696 int width = in.size();
|
Chris@32
|
697
|
Chris@165
|
698 int spacing = processingSampleRate / m_colsPerSec;
|
Chris@32
|
699
|
Chris@165
|
700 // need to be careful that col spacing is an integer number of samples!
|
Chris@165
|
701 assert(spacing * m_colsPerSec == processingSampleRate);
|
Chris@32
|
702
|
Chris@32
|
703 Grid out;
|
Chris@32
|
704
|
Chris@58
|
705 // We count the CQ latency in terms of processing hops, but
|
Chris@58
|
706 // actually it probably isn't an exact number of hops so this
|
Chris@58
|
707 // isn't quite accurate. But the small constant offset is
|
Chris@165
|
708 // practically irrelevant compared to the jitter from the frame
|
Chris@165
|
709 // size we reduce to in a moment
|
Chris@33
|
710 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
|
Chris@33
|
711
|
Chris@298
|
712 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@176
|
713
|
Chris@32
|
714 for (int i = 0; i < width; ++i) {
|
Chris@32
|
715
|
Chris@33
|
716 if (m_columnCount < latentColumns) {
|
Chris@33
|
717 ++m_columnCount;
|
Chris@33
|
718 continue;
|
Chris@33
|
719 }
|
Chris@33
|
720
|
Chris@32
|
721 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
|
Chris@32
|
722 int sampleNo = m_columnCount * m_cq->getColumnHop();
|
Chris@32
|
723
|
Chris@32
|
724 bool select = (sampleNo / spacing != prevSampleNo / spacing);
|
Chris@32
|
725
|
Chris@32
|
726 if (select) {
|
Chris@32
|
727 vector<double> inCol = in[i];
|
Chris@176
|
728 vector<double> outCol(pack.templateHeight);
|
Chris@32
|
729
|
Chris@178
|
730 // In HQ mode, the CQ returns 600 bins and we ignore the
|
Chris@298
|
731 // lowest 55 of them (assuming binsPerSemitone == 5).
|
Chris@178
|
732 //
|
Chris@297
|
733 // In draft and live mode the CQ is an octave shorter,
|
Chris@300
|
734 // returning 540 bins or equivalent, so we instead pad
|
Chris@300
|
735 // them with an additional 5 or equivalent zeros.
|
Chris@178
|
736 //
|
Chris@178
|
737 // We also need to reverse the column as we go, since the
|
Chris@178
|
738 // raw CQ has the high frequencies first and we need it
|
Chris@178
|
739 // the other way around.
|
Chris@32
|
740
|
Chris@298
|
741 int bps = (m_mode == LiveMode ?
|
Chris@298
|
742 binsPerSemitoneLive : binsPerSemitoneNormal);
|
Chris@298
|
743
|
Chris@297
|
744 if (m_mode == HighQualityMode) {
|
Chris@178
|
745 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@298
|
746 int ix = inCol.size() - j - (11 * bps);
|
Chris@178
|
747 outCol[j] = inCol[ix];
|
Chris@178
|
748 }
|
Chris@178
|
749 } else {
|
Chris@298
|
750 for (int j = 0; j < bps; ++j) {
|
Chris@178
|
751 outCol[j] = 0.0;
|
Chris@178
|
752 }
|
Chris@298
|
753 for (int j = bps; j < pack.templateHeight; ++j) {
|
Chris@298
|
754 int ix = inCol.size() - j + (bps-1);
|
Chris@178
|
755 outCol[j] = inCol[ix];
|
Chris@178
|
756 }
|
Chris@46
|
757 }
|
Chris@32
|
758
|
Chris@46
|
759 vector<double> noiseLevel1 =
|
Chris@298
|
760 MedianFilter<double>::filter(8 * bps, outCol);
|
Chris@176
|
761 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
762 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
|
Chris@46
|
763 }
|
Chris@32
|
764
|
Chris@46
|
765 vector<double> noiseLevel2 =
|
Chris@298
|
766 MedianFilter<double>::filter(8 * bps, noiseLevel1);
|
Chris@176
|
767 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
768 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
|
Chris@32
|
769 }
|
Chris@32
|
770
|
Chris@165
|
771 out.push_back(outCol);
|
Chris@32
|
772 }
|
Chris@32
|
773
|
Chris@32
|
774 ++m_columnCount;
|
Chris@32
|
775 }
|
Chris@32
|
776
|
Chris@32
|
777 return out;
|
Chris@32
|
778 }
|
Chris@32
|
779
|
Chris@294
|
780 vector<double>
|
Chris@170
|
781 Silvet::postProcess(const vector<double> &pitches,
|
Chris@170
|
782 const vector<int> &bestShifts,
|
Chris@170
|
783 bool wantShifts)
|
Chris@166
|
784 {
|
Chris@298
|
785 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@176
|
786
|
Chris@41
|
787 vector<double> filtered;
|
Chris@41
|
788
|
Chris@176
|
789 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@170
|
790 m_postFilter[j]->push(pitches[j]);
|
Chris@41
|
791 filtered.push_back(m_postFilter[j]->get());
|
Chris@41
|
792 }
|
Chris@41
|
793
|
Chris@41
|
794 // Threshold for level and reduce number of candidate pitches
|
Chris@41
|
795
|
Chris@41
|
796 typedef std::multimap<double, int> ValueIndexMap;
|
Chris@41
|
797
|
Chris@41
|
798 ValueIndexMap strengths;
|
Chris@166
|
799
|
Chris@176
|
800 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@166
|
801 double strength = filtered[j];
|
Chris@183
|
802 if (strength < pack.levelThreshold) continue;
|
Chris@168
|
803 strengths.insert(ValueIndexMap::value_type(strength, j));
|
Chris@168
|
804 }
|
Chris@166
|
805
|
Chris@168
|
806 ValueIndexMap::const_iterator si = strengths.end();
|
Chris@167
|
807
|
Chris@168
|
808 map<int, double> active;
|
Chris@168
|
809 map<int, int> activeShifts;
|
Chris@168
|
810
|
Chris@183
|
811 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
|
Chris@168
|
812
|
Chris@168
|
813 --si;
|
Chris@168
|
814
|
Chris@168
|
815 double strength = si->first;
|
Chris@168
|
816 int j = si->second;
|
Chris@168
|
817
|
Chris@168
|
818 active[j] = strength;
|
Chris@168
|
819
|
Chris@170
|
820 if (wantShifts) {
|
Chris@170
|
821 activeShifts[j] = bestShifts[j];
|
Chris@167
|
822 }
|
Chris@41
|
823 }
|
Chris@41
|
824
|
Chris@168
|
825 m_pianoRoll.push_back(active);
|
Chris@170
|
826
|
Chris@170
|
827 if (wantShifts) {
|
Chris@168
|
828 m_pianoRollShifts.push_back(activeShifts);
|
Chris@41
|
829 }
|
Chris@294
|
830
|
Chris@294
|
831 return filtered;
|
Chris@166
|
832 }
|
Chris@166
|
833
|
Chris@166
|
834 Vamp::Plugin::FeatureList
|
Chris@168
|
835 Silvet::noteTrack(int shiftCount)
|
Chris@166
|
836 {
|
Chris@41
|
837 // Minimum duration pruning, and conversion to notes. We can only
|
Chris@41
|
838 // report notes that have just ended (i.e. that are absent in the
|
Chris@168
|
839 // latest active set but present in the prior set in the piano
|
Chris@41
|
840 // roll) -- any notes that ended earlier will have been reported
|
Chris@41
|
841 // already, and if they haven't ended, we don't know their
|
Chris@41
|
842 // duration.
|
Chris@41
|
843
|
Chris@168
|
844 int width = m_pianoRoll.size() - 1;
|
Chris@168
|
845
|
Chris@168
|
846 const map<int, double> &active = m_pianoRoll[width];
|
Chris@41
|
847
|
Chris@165
|
848 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@165
|
849
|
Chris@165
|
850 // only keep notes >= 100ms or thereabouts
|
Chris@165
|
851 int durationThreshold = floor(0.1 / columnDuration); // columns
|
Chris@165
|
852 if (durationThreshold < 1) durationThreshold = 1;
|
Chris@41
|
853
|
Chris@41
|
854 FeatureList noteFeatures;
|
Chris@41
|
855
|
Chris@41
|
856 if (width < durationThreshold + 1) {
|
Chris@41
|
857 return noteFeatures;
|
Chris@41
|
858 }
|
Chris@41
|
859
|
Chris@150
|
860 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
|
Chris@150
|
861
|
Chris@55
|
862 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
|
Chris@41
|
863 ni != m_pianoRoll[width-1].end(); ++ni) {
|
Chris@41
|
864
|
Chris@55
|
865 int note = ni->first;
|
Chris@41
|
866
|
Chris@41
|
867 if (active.find(note) != active.end()) {
|
Chris@41
|
868 // the note is still playing
|
Chris@41
|
869 continue;
|
Chris@41
|
870 }
|
Chris@41
|
871
|
Chris@41
|
872 // the note was playing but just ended
|
Chris@41
|
873 int end = width;
|
Chris@41
|
874 int start = end-1;
|
Chris@41
|
875
|
Chris@41
|
876 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
|
Chris@41
|
877 --start;
|
Chris@41
|
878 }
|
Chris@41
|
879 ++start;
|
Chris@41
|
880
|
Chris@169
|
881 if ((end - start) < durationThreshold) {
|
Chris@41
|
882 continue;
|
Chris@41
|
883 }
|
Chris@41
|
884
|
Chris@169
|
885 emitNote(start, end, note, shiftCount, noteFeatures);
|
Chris@41
|
886 }
|
Chris@41
|
887
|
Chris@62
|
888 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
|
Chris@41
|
889
|
Chris@41
|
890 return noteFeatures;
|
Chris@41
|
891 }
|
Chris@41
|
892
|
Chris@169
|
893 void
|
Chris@169
|
894 Silvet::emitNote(int start, int end, int note, int shiftCount,
|
Chris@169
|
895 FeatureList ¬eFeatures)
|
Chris@169
|
896 {
|
Chris@169
|
897 int partStart = start;
|
Chris@169
|
898 int partShift = 0;
|
Chris@169
|
899 int partVelocity = 0;
|
Chris@169
|
900
|
Chris@252
|
901 int partThreshold = floor(0.05 * m_colsPerSec);
|
Chris@169
|
902
|
Chris@169
|
903 for (int i = start; i != end; ++i) {
|
Chris@169
|
904
|
Chris@169
|
905 double strength = m_pianoRoll[i][note];
|
Chris@169
|
906
|
Chris@169
|
907 int shift = 0;
|
Chris@169
|
908
|
Chris@169
|
909 if (shiftCount > 1) {
|
Chris@169
|
910
|
Chris@169
|
911 shift = m_pianoRollShifts[i][note];
|
Chris@169
|
912
|
Chris@169
|
913 if (i == partStart) {
|
Chris@169
|
914 partShift = shift;
|
Chris@169
|
915 }
|
Chris@169
|
916
|
Chris@169
|
917 if (i > partStart + partThreshold && shift != partShift) {
|
Chris@169
|
918
|
Chris@169
|
919 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
|
Chris@169
|
920
|
Chris@169
|
921 // pitch has changed, emit an intermediate note
|
Chris@252
|
922 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
923 i,
|
Chris@252
|
924 note,
|
Chris@252
|
925 partShift,
|
Chris@252
|
926 shiftCount,
|
Chris@252
|
927 partVelocity));
|
Chris@169
|
928 partStart = i;
|
Chris@169
|
929 partShift = shift;
|
Chris@169
|
930 partVelocity = 0;
|
Chris@169
|
931 }
|
Chris@169
|
932 }
|
Chris@169
|
933
|
Chris@246
|
934 int v = round(strength * 2);
|
Chris@169
|
935 if (v > partVelocity) {
|
Chris@169
|
936 partVelocity = v;
|
Chris@169
|
937 }
|
Chris@169
|
938 }
|
Chris@169
|
939
|
Chris@169
|
940 if (end >= partStart + partThreshold) {
|
Chris@252
|
941 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
942 end,
|
Chris@252
|
943 note,
|
Chris@252
|
944 partShift,
|
Chris@252
|
945 shiftCount,
|
Chris@252
|
946 partVelocity));
|
Chris@169
|
947 }
|
Chris@169
|
948 }
|
Chris@252
|
949
|
Chris@252
|
950 Silvet::Feature
|
Chris@252
|
951 Silvet::makeNoteFeature(int start,
|
Chris@252
|
952 int end,
|
Chris@252
|
953 int note,
|
Chris@252
|
954 int shift,
|
Chris@252
|
955 int shiftCount,
|
Chris@252
|
956 int velocity)
|
Chris@252
|
957 {
|
Chris@252
|
958 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@252
|
959 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
|
Chris@252
|
960
|
Chris@252
|
961 Feature f;
|
Chris@252
|
962
|
Chris@252
|
963 f.hasTimestamp = true;
|
Chris@285
|
964 f.timestamp = m_startTime + RealTime::fromSeconds
|
Chris@252
|
965 (columnDuration * (start - postFilterLatency) + 0.02);
|
Chris@252
|
966
|
Chris@252
|
967 f.hasDuration = true;
|
Chris@252
|
968 f.duration = RealTime::fromSeconds
|
Chris@252
|
969 (columnDuration * (end - start));
|
Chris@252
|
970
|
Chris@252
|
971 f.values.clear();
|
Chris@252
|
972
|
Chris@252
|
973 f.values.push_back
|
Chris@252
|
974 (noteFrequency(note, shift, shiftCount));
|
Chris@252
|
975
|
Chris@252
|
976 float inputGain = getInputGainAt(f.timestamp);
|
Chris@252
|
977 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
|
Chris@252
|
978 velocity = round(velocity / inputGain);
|
Chris@252
|
979 if (velocity > 127) velocity = 127;
|
Chris@252
|
980 if (velocity < 1) velocity = 1;
|
Chris@252
|
981 f.values.push_back(velocity);
|
Chris@252
|
982
|
Chris@252
|
983 f.label = noteName(note, shift, shiftCount);
|
Chris@252
|
984
|
Chris@252
|
985 return f;
|
Chris@252
|
986 }
|
Chris@252
|
987
|
Chris@252
|
988 float
|
Chris@252
|
989 Silvet::getInputGainAt(RealTime t)
|
Chris@252
|
990 {
|
Chris@252
|
991 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
|
Chris@252
|
992
|
Chris@252
|
993 if (i == m_inputGains.end()) {
|
Chris@252
|
994 if (i != m_inputGains.begin()) {
|
Chris@252
|
995 --i;
|
Chris@252
|
996 } else {
|
Chris@252
|
997 return 1.f; // no data
|
Chris@252
|
998 }
|
Chris@252
|
999 }
|
Chris@252
|
1000
|
Chris@252
|
1001 // cerr << "gain at time " << t << " = " << i->second << endl;
|
Chris@252
|
1002
|
Chris@252
|
1003 return i->second;
|
Chris@252
|
1004 }
|
Chris@252
|
1005
|