Chris@31
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@31
|
2
|
Chris@31
|
3 /*
|
Chris@31
|
4 Silvet
|
Chris@31
|
5
|
Chris@31
|
6 A Vamp plugin for note transcription.
|
Chris@31
|
7 Centre for Digital Music, Queen Mary University of London.
|
Chris@31
|
8
|
Chris@31
|
9 This program is free software; you can redistribute it and/or
|
Chris@31
|
10 modify it under the terms of the GNU General Public License as
|
Chris@31
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@31
|
12 License, or (at your option) any later version. See the file
|
Chris@31
|
13 COPYING included with this distribution for more information.
|
Chris@31
|
14 */
|
Chris@31
|
15
|
Chris@31
|
16 #include "Silvet.h"
|
Chris@34
|
17 #include "EM.h"
|
Chris@31
|
18
|
Chris@152
|
19 #include <cq/CQSpectrogram.h>
|
Chris@31
|
20
|
Chris@152
|
21 #include "MedianFilter.h"
|
Chris@152
|
22 #include "constant-q-cpp/src/dsp/Resampler.h"
|
Chris@246
|
23 #include "flattendynamics-ladspa.h"
|
Chris@298
|
24 #include "LiveInstruments.h"
|
Chris@31
|
25
|
Chris@31
|
26 #include <vector>
|
Chris@31
|
27
|
Chris@32
|
28 #include <cstdio>
|
Chris@32
|
29
|
Chris@31
|
30 using std::vector;
|
Chris@48
|
31 using std::cout;
|
Chris@31
|
32 using std::cerr;
|
Chris@31
|
33 using std::endl;
|
Chris@40
|
34 using Vamp::RealTime;
|
Chris@31
|
35
|
Chris@31
|
36 static int processingSampleRate = 44100;
|
Chris@298
|
37
|
Chris@298
|
38 static int binsPerSemitoneLive = 1;
|
Chris@298
|
39 static int binsPerSemitoneNormal = 5;
|
Chris@170
|
40
|
Chris@272
|
41 static int minInputSampleRate = 100;
|
Chris@272
|
42 static int maxInputSampleRate = 192000;
|
Chris@272
|
43
|
Chris@31
|
44 Silvet::Silvet(float inputSampleRate) :
|
Chris@31
|
45 Plugin(inputSampleRate),
|
Chris@161
|
46 m_instruments(InstrumentPack::listInstrumentPacks()),
|
Chris@298
|
47 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
|
Chris@31
|
48 m_resampler(0),
|
Chris@246
|
49 m_flattener(0),
|
Chris@110
|
50 m_cq(0),
|
Chris@297
|
51 m_mode(HighQualityMode),
|
Chris@166
|
52 m_fineTuning(false),
|
Chris@178
|
53 m_instrument(0),
|
Chris@178
|
54 m_colsPerSec(50)
|
Chris@31
|
55 {
|
Chris@31
|
56 }
|
Chris@31
|
57
|
Chris@31
|
58 Silvet::~Silvet()
|
Chris@31
|
59 {
|
Chris@31
|
60 delete m_resampler;
|
Chris@246
|
61 delete m_flattener;
|
Chris@31
|
62 delete m_cq;
|
Chris@41
|
63 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
64 delete m_postFilter[i];
|
Chris@41
|
65 }
|
Chris@31
|
66 }
|
Chris@31
|
67
|
Chris@31
|
68 string
|
Chris@31
|
69 Silvet::getIdentifier() const
|
Chris@31
|
70 {
|
Chris@31
|
71 return "silvet";
|
Chris@31
|
72 }
|
Chris@31
|
73
|
Chris@31
|
74 string
|
Chris@31
|
75 Silvet::getName() const
|
Chris@31
|
76 {
|
Chris@31
|
77 return "Silvet Note Transcription";
|
Chris@31
|
78 }
|
Chris@31
|
79
|
Chris@31
|
80 string
|
Chris@31
|
81 Silvet::getDescription() const
|
Chris@31
|
82 {
|
Chris@191
|
83 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
|
Chris@31
|
84 }
|
Chris@31
|
85
|
Chris@31
|
86 string
|
Chris@31
|
87 Silvet::getMaker() const
|
Chris@31
|
88 {
|
Chris@191
|
89 return "Queen Mary, University of London";
|
Chris@31
|
90 }
|
Chris@31
|
91
|
Chris@31
|
92 int
|
Chris@31
|
93 Silvet::getPluginVersion() const
|
Chris@31
|
94 {
|
Chris@304
|
95 return 3;
|
Chris@31
|
96 }
|
Chris@31
|
97
|
Chris@31
|
98 string
|
Chris@31
|
99 Silvet::getCopyright() const
|
Chris@31
|
100 {
|
Chris@191
|
101 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
|
Chris@31
|
102 }
|
Chris@31
|
103
|
Chris@31
|
104 Silvet::InputDomain
|
Chris@31
|
105 Silvet::getInputDomain() const
|
Chris@31
|
106 {
|
Chris@31
|
107 return TimeDomain;
|
Chris@31
|
108 }
|
Chris@31
|
109
|
Chris@31
|
110 size_t
|
Chris@31
|
111 Silvet::getPreferredBlockSize() const
|
Chris@31
|
112 {
|
Chris@31
|
113 return 0;
|
Chris@31
|
114 }
|
Chris@31
|
115
|
Chris@31
|
116 size_t
|
Chris@31
|
117 Silvet::getPreferredStepSize() const
|
Chris@31
|
118 {
|
Chris@31
|
119 return 0;
|
Chris@31
|
120 }
|
Chris@31
|
121
|
Chris@31
|
122 size_t
|
Chris@31
|
123 Silvet::getMinChannelCount() const
|
Chris@31
|
124 {
|
Chris@31
|
125 return 1;
|
Chris@31
|
126 }
|
Chris@31
|
127
|
Chris@31
|
128 size_t
|
Chris@31
|
129 Silvet::getMaxChannelCount() const
|
Chris@31
|
130 {
|
Chris@31
|
131 return 1;
|
Chris@31
|
132 }
|
Chris@31
|
133
|
Chris@31
|
134 Silvet::ParameterList
|
Chris@31
|
135 Silvet::getParameterDescriptors() const
|
Chris@31
|
136 {
|
Chris@31
|
137 ParameterList list;
|
Chris@110
|
138
|
Chris@110
|
139 ParameterDescriptor desc;
|
Chris@110
|
140 desc.identifier = "mode";
|
Chris@110
|
141 desc.name = "Processing mode";
|
Chris@110
|
142 desc.unit = "";
|
Chris@297
|
143 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
|
Chris@110
|
144 desc.minValue = 0;
|
Chris@297
|
145 desc.maxValue = 2;
|
Chris@113
|
146 desc.defaultValue = 1;
|
Chris@110
|
147 desc.isQuantized = true;
|
Chris@110
|
148 desc.quantizeStep = 1;
|
Chris@166
|
149 desc.valueNames.push_back("Draft (faster)");
|
Chris@165
|
150 desc.valueNames.push_back("Intensive (higher quality)");
|
Chris@297
|
151 desc.valueNames.push_back("Live (lower latency)");
|
Chris@161
|
152 list.push_back(desc);
|
Chris@161
|
153
|
Chris@176
|
154 desc.identifier = "instrument";
|
Chris@176
|
155 desc.name = "Instrument";
|
Chris@161
|
156 desc.unit = "";
|
Chris@271
|
157 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
|
Chris@161
|
158 desc.minValue = 0;
|
Chris@162
|
159 desc.maxValue = m_instruments.size()-1;
|
Chris@162
|
160 desc.defaultValue = 0;
|
Chris@161
|
161 desc.isQuantized = true;
|
Chris@161
|
162 desc.quantizeStep = 1;
|
Chris@161
|
163 desc.valueNames.clear();
|
Chris@162
|
164 for (int i = 0; i < int(m_instruments.size()); ++i) {
|
Chris@162
|
165 desc.valueNames.push_back(m_instruments[i].name);
|
Chris@162
|
166 }
|
Chris@166
|
167 list.push_back(desc);
|
Chris@161
|
168
|
Chris@166
|
169 desc.identifier = "finetune";
|
Chris@166
|
170 desc.name = "Return fine pitch estimates";
|
Chris@166
|
171 desc.unit = "";
|
Chris@271
|
172 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
|
Chris@166
|
173 desc.minValue = 0;
|
Chris@166
|
174 desc.maxValue = 1;
|
Chris@166
|
175 desc.defaultValue = 0;
|
Chris@166
|
176 desc.isQuantized = true;
|
Chris@166
|
177 desc.quantizeStep = 1;
|
Chris@166
|
178 desc.valueNames.clear();
|
Chris@110
|
179 list.push_back(desc);
|
Chris@110
|
180
|
Chris@31
|
181 return list;
|
Chris@31
|
182 }
|
Chris@31
|
183
|
Chris@31
|
184 float
|
Chris@31
|
185 Silvet::getParameter(string identifier) const
|
Chris@31
|
186 {
|
Chris@110
|
187 if (identifier == "mode") {
|
Chris@297
|
188 return (float)(int)m_mode;
|
Chris@166
|
189 } else if (identifier == "finetune") {
|
Chris@166
|
190 return m_fineTuning ? 1.f : 0.f;
|
Chris@176
|
191 } else if (identifier == "instrument") {
|
Chris@162
|
192 return m_instrument;
|
Chris@110
|
193 }
|
Chris@31
|
194 return 0;
|
Chris@31
|
195 }
|
Chris@31
|
196
|
Chris@31
|
197 void
|
Chris@31
|
198 Silvet::setParameter(string identifier, float value)
|
Chris@31
|
199 {
|
Chris@110
|
200 if (identifier == "mode") {
|
Chris@297
|
201 m_mode = (ProcessingMode)(int)(value + 0.5);
|
Chris@166
|
202 } else if (identifier == "finetune") {
|
Chris@166
|
203 m_fineTuning = (value > 0.5);
|
Chris@176
|
204 } else if (identifier == "instrument") {
|
Chris@162
|
205 m_instrument = lrintf(value);
|
Chris@110
|
206 }
|
Chris@31
|
207 }
|
Chris@31
|
208
|
Chris@31
|
209 Silvet::ProgramList
|
Chris@31
|
210 Silvet::getPrograms() const
|
Chris@31
|
211 {
|
Chris@31
|
212 ProgramList list;
|
Chris@31
|
213 return list;
|
Chris@31
|
214 }
|
Chris@31
|
215
|
Chris@31
|
216 string
|
Chris@31
|
217 Silvet::getCurrentProgram() const
|
Chris@31
|
218 {
|
Chris@31
|
219 return "";
|
Chris@31
|
220 }
|
Chris@31
|
221
|
Chris@31
|
222 void
|
Chris@31
|
223 Silvet::selectProgram(string name)
|
Chris@31
|
224 {
|
Chris@31
|
225 }
|
Chris@31
|
226
|
Chris@31
|
227 Silvet::OutputList
|
Chris@31
|
228 Silvet::getOutputDescriptors() const
|
Chris@31
|
229 {
|
Chris@31
|
230 OutputList list;
|
Chris@31
|
231
|
Chris@31
|
232 OutputDescriptor d;
|
Chris@51
|
233 d.identifier = "notes";
|
Chris@51
|
234 d.name = "Note transcription";
|
Chris@271
|
235 d.description = "Overall note transcription. Each note has time, duration, estimated pitch, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
|
Chris@41
|
236 d.unit = "Hz";
|
Chris@31
|
237 d.hasFixedBinCount = true;
|
Chris@31
|
238 d.binCount = 2;
|
Chris@41
|
239 d.binNames.push_back("Frequency");
|
Chris@31
|
240 d.binNames.push_back("Velocity");
|
Chris@31
|
241 d.hasKnownExtents = false;
|
Chris@31
|
242 d.isQuantized = false;
|
Chris@31
|
243 d.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@246
|
244 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
|
Chris@31
|
245 d.hasDuration = true;
|
Chris@32
|
246 m_notesOutputNo = list.size();
|
Chris@32
|
247 list.push_back(d);
|
Chris@32
|
248
|
Chris@178
|
249 d.identifier = "timefreq";
|
Chris@178
|
250 d.name = "Time-frequency distribution";
|
Chris@271
|
251 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
|
Chris@178
|
252 d.unit = "";
|
Chris@178
|
253 d.hasFixedBinCount = true;
|
Chris@298
|
254 d.binCount = getPack(0).templateHeight;
|
Chris@178
|
255 d.binNames.clear();
|
Chris@178
|
256 if (m_cq) {
|
Chris@294
|
257 char name[50];
|
Chris@298
|
258 for (int i = 0; i < getPack(0).templateHeight; ++i) {
|
Chris@178
|
259 // We have a 600-bin (10 oct 60-bin CQ) of which the
|
Chris@178
|
260 // lowest-frequency 55 bins have been dropped, for a
|
Chris@178
|
261 // 545-bin template. The native CQ bins go high->low
|
Chris@178
|
262 // frequency though, so these are still the first 545 bins
|
Chris@178
|
263 // as reported by getBinFrequency, though in reverse order
|
Chris@178
|
264 float freq = m_cq->getBinFrequency
|
Chris@298
|
265 (getPack(0).templateHeight - i - 1);
|
Chris@178
|
266 sprintf(name, "%.1f Hz", freq);
|
Chris@178
|
267 d.binNames.push_back(name);
|
Chris@178
|
268 }
|
Chris@178
|
269 }
|
Chris@178
|
270 d.hasKnownExtents = false;
|
Chris@178
|
271 d.isQuantized = false;
|
Chris@178
|
272 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@178
|
273 d.sampleRate = m_colsPerSec;
|
Chris@178
|
274 d.hasDuration = false;
|
Chris@178
|
275 m_fcqOutputNo = list.size();
|
Chris@178
|
276 list.push_back(d);
|
Chris@178
|
277
|
Chris@294
|
278 d.identifier = "pitchactivation";
|
Chris@294
|
279 d.name = "Pitch activation distribution";
|
Chris@294
|
280 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
|
Chris@294
|
281 d.unit = "";
|
Chris@294
|
282 d.hasFixedBinCount = true;
|
Chris@298
|
283 d.binCount = getPack(0).templateNoteCount;
|
Chris@294
|
284 d.binNames.clear();
|
Chris@294
|
285 if (m_cq) {
|
Chris@298
|
286 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
|
Chris@294
|
287 d.binNames.push_back(noteName(i, 0, 1));
|
Chris@294
|
288 }
|
Chris@294
|
289 }
|
Chris@294
|
290 d.hasKnownExtents = false;
|
Chris@294
|
291 d.isQuantized = false;
|
Chris@294
|
292 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@294
|
293 d.sampleRate = m_colsPerSec;
|
Chris@294
|
294 d.hasDuration = false;
|
Chris@294
|
295 m_pitchOutputNo = list.size();
|
Chris@294
|
296 list.push_back(d);
|
Chris@294
|
297
|
Chris@302
|
298 d.identifier = "templates";
|
Chris@302
|
299 d.name = "Templates";
|
Chris@302
|
300 d.description = "Constant-Q spectral templates for the selected instrument pack.";
|
Chris@302
|
301 d.unit = "";
|
Chris@302
|
302 d.hasFixedBinCount = true;
|
Chris@302
|
303 d.binCount = getPack(0).templateHeight;
|
Chris@302
|
304 d.binNames.clear();
|
Chris@302
|
305 if (m_cq) {
|
Chris@302
|
306 char name[50];
|
Chris@302
|
307 for (int i = 0; i < getPack(0).templateHeight; ++i) {
|
Chris@302
|
308 // We have a 600-bin (10 oct 60-bin CQ) of which the
|
Chris@302
|
309 // lowest-frequency 55 bins have been dropped, for a
|
Chris@302
|
310 // 545-bin template. The native CQ bins go high->low
|
Chris@302
|
311 // frequency though, so these are still the first 545 bins
|
Chris@302
|
312 // as reported by getBinFrequency, though in reverse order
|
Chris@302
|
313 float freq = m_cq->getBinFrequency
|
Chris@302
|
314 (getPack(0).templateHeight - i - 1);
|
Chris@302
|
315 sprintf(name, "%.1f Hz", freq);
|
Chris@302
|
316 d.binNames.push_back(name);
|
Chris@302
|
317 }
|
Chris@302
|
318 }
|
Chris@302
|
319 d.hasKnownExtents = false;
|
Chris@302
|
320 d.isQuantized = false;
|
Chris@302
|
321 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@302
|
322 d.sampleRate = m_colsPerSec;
|
Chris@302
|
323 d.hasDuration = false;
|
Chris@302
|
324 m_templateOutputNo = list.size();
|
Chris@302
|
325 list.push_back(d);
|
Chris@302
|
326
|
Chris@31
|
327 return list;
|
Chris@31
|
328 }
|
Chris@31
|
329
|
Chris@38
|
330 std::string
|
Chris@175
|
331 Silvet::noteName(int note, int shift, int shiftCount) const
|
Chris@38
|
332 {
|
Chris@38
|
333 static const char *names[] = {
|
Chris@38
|
334 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
|
Chris@38
|
335 };
|
Chris@38
|
336
|
Chris@175
|
337 const char *n = names[note % 12];
|
Chris@38
|
338
|
Chris@175
|
339 int oct = (note + 9) / 12;
|
Chris@38
|
340
|
Chris@175
|
341 char buf[30];
|
Chris@175
|
342
|
Chris@175
|
343 float pshift = 0.f;
|
Chris@175
|
344 if (shiftCount > 1) {
|
Chris@175
|
345 // see noteFrequency below
|
Chris@175
|
346 pshift =
|
Chris@175
|
347 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
348 }
|
Chris@175
|
349
|
Chris@175
|
350 if (pshift > 0.f) {
|
Chris@175
|
351 sprintf(buf, "%s%d+%dc", n, oct, int(round(pshift * 100)));
|
Chris@175
|
352 } else if (pshift < 0.f) {
|
Chris@175
|
353 sprintf(buf, "%s%d-%dc", n, oct, int(round((-pshift) * 100)));
|
Chris@175
|
354 } else {
|
Chris@175
|
355 sprintf(buf, "%s%d", n, oct);
|
Chris@175
|
356 }
|
Chris@38
|
357
|
Chris@38
|
358 return buf;
|
Chris@38
|
359 }
|
Chris@38
|
360
|
Chris@41
|
361 float
|
Chris@168
|
362 Silvet::noteFrequency(int note, int shift, int shiftCount) const
|
Chris@41
|
363 {
|
Chris@169
|
364 // Convert shift number to a pitch shift. The given shift number
|
Chris@169
|
365 // is an offset into the template array, which starts with some
|
Chris@169
|
366 // zeros, followed by the template, then some trailing zeros.
|
Chris@169
|
367 //
|
Chris@169
|
368 // Example: if we have templateMaxShift == 2 and thus shiftCount
|
Chris@169
|
369 // == 5, then the number will be in the range 0-4 and the template
|
Chris@169
|
370 // will have 2 zeros at either end. Thus number 2 represents the
|
Chris@169
|
371 // template "as recorded", for a pitch shift of 0; smaller indices
|
Chris@169
|
372 // represent moving the template *up* in pitch (by introducing
|
Chris@169
|
373 // zeros at the start, which is the low-frequency end), for a
|
Chris@169
|
374 // positive pitch shift; and higher values represent moving it
|
Chris@169
|
375 // down in pitch, for a negative pitch shift.
|
Chris@169
|
376
|
Chris@175
|
377 float pshift = 0.f;
|
Chris@175
|
378 if (shiftCount > 1) {
|
Chris@175
|
379 pshift =
|
Chris@175
|
380 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
381 }
|
Chris@169
|
382
|
Chris@301
|
383 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
|
Chris@301
|
384
|
Chris@303
|
385 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
|
Chris@303
|
386 // << shiftCount << ", obtained freq = " << freq << endl;
|
Chris@301
|
387
|
Chris@301
|
388 return freq;
|
Chris@41
|
389 }
|
Chris@41
|
390
|
Chris@31
|
391 bool
|
Chris@31
|
392 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@31
|
393 {
|
Chris@272
|
394 if (m_inputSampleRate < minInputSampleRate ||
|
Chris@272
|
395 m_inputSampleRate > maxInputSampleRate) {
|
Chris@272
|
396 cerr << "Silvet::initialise: Unsupported input sample rate "
|
Chris@272
|
397 << m_inputSampleRate << " (supported min " << minInputSampleRate
|
Chris@272
|
398 << ", max " << maxInputSampleRate << ")" << endl;
|
Chris@272
|
399 return false;
|
Chris@272
|
400 }
|
Chris@272
|
401
|
Chris@31
|
402 if (channels < getMinChannelCount() ||
|
Chris@272
|
403 channels > getMaxChannelCount()) {
|
Chris@272
|
404 cerr << "Silvet::initialise: Unsupported channel count " << channels
|
Chris@272
|
405 << " (supported min " << getMinChannelCount() << ", max "
|
Chris@272
|
406 << getMaxChannelCount() << ")" << endl;
|
Chris@272
|
407 return false;
|
Chris@272
|
408 }
|
Chris@31
|
409
|
Chris@31
|
410 if (stepSize != blockSize) {
|
Chris@31
|
411 cerr << "Silvet::initialise: Step size must be the same as block size ("
|
Chris@31
|
412 << stepSize << " != " << blockSize << ")" << endl;
|
Chris@31
|
413 return false;
|
Chris@31
|
414 }
|
Chris@31
|
415
|
Chris@31
|
416 m_blockSize = blockSize;
|
Chris@31
|
417
|
Chris@31
|
418 reset();
|
Chris@31
|
419
|
Chris@31
|
420 return true;
|
Chris@31
|
421 }
|
Chris@31
|
422
|
Chris@31
|
423 void
|
Chris@31
|
424 Silvet::reset()
|
Chris@31
|
425 {
|
Chris@31
|
426 delete m_resampler;
|
Chris@246
|
427 delete m_flattener;
|
Chris@31
|
428 delete m_cq;
|
Chris@31
|
429
|
Chris@31
|
430 if (m_inputSampleRate != processingSampleRate) {
|
Chris@31
|
431 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
|
Chris@31
|
432 } else {
|
Chris@31
|
433 m_resampler = 0;
|
Chris@31
|
434 }
|
Chris@31
|
435
|
Chris@246
|
436 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
|
Chris@246
|
437 m_flattener->reset();
|
Chris@246
|
438
|
Chris@301
|
439 // this happens to be processingSampleRate / 3, and is the top
|
Chris@301
|
440 // freq used for the EM templates:
|
Chris@301
|
441 double maxFreq = 14700;
|
Chris@301
|
442
|
Chris@301
|
443 if (m_mode == LiveMode) {
|
Chris@301
|
444 // We only have 12 bpo rather than 60, so we need the top bin
|
Chris@301
|
445 // to be the middle one of the top 5, i.e. 2/5 of a semitone
|
Chris@301
|
446 // lower than 14700
|
Chris@301
|
447 maxFreq *= powf(2.0, -1.0 / 30.0);
|
Chris@301
|
448 }
|
Chris@301
|
449
|
Chris@173
|
450 double minFreq = 27.5;
|
Chris@173
|
451
|
Chris@297
|
452 if (m_mode != HighQualityMode) {
|
Chris@173
|
453 // We don't actually return any notes from the bottom octave,
|
Chris@173
|
454 // so we can just pad with zeros
|
Chris@173
|
455 minFreq *= 2;
|
Chris@173
|
456 }
|
Chris@173
|
457
|
Chris@298
|
458 int bpo = 12 *
|
Chris@298
|
459 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
|
Chris@301
|
460
|
Chris@154
|
461 CQParameters params(processingSampleRate,
|
Chris@173
|
462 minFreq,
|
Chris@303
|
463 maxFreq,
|
Chris@298
|
464 bpo);
|
Chris@154
|
465
|
Chris@155
|
466 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
|
Chris@155
|
467 // drops the FFT size to 512 from 1024 and alters
|
Chris@155
|
468 // some other processing parameters, making
|
Chris@155
|
469 // everything much, much slower. Could be a flaw
|
Chris@155
|
470 // in the CQ parameter calculations, must check
|
Chris@154
|
471 params.atomHopFactor = 0.3;
|
Chris@154
|
472 params.threshold = 0.0005;
|
Chris@172
|
473 params.window = CQParameters::Hann;
|
Chris@154
|
474
|
Chris@154
|
475 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
|
Chris@31
|
476
|
Chris@303
|
477 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
|
Chris@303
|
478 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
|
Chris@297
|
479
|
Chris@297
|
480 m_colsPerSec = (m_mode == DraftMode ? 25 : 50);
|
Chris@165
|
481
|
Chris@41
|
482 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
483 delete m_postFilter[i];
|
Chris@41
|
484 }
|
Chris@41
|
485 m_postFilter.clear();
|
Chris@303
|
486 int postFilterLength = 3;
|
Chris@298
|
487 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
|
Chris@303
|
488 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
|
Chris@41
|
489 }
|
Chris@41
|
490 m_pianoRoll.clear();
|
Chris@246
|
491 m_inputGains.clear();
|
Chris@32
|
492 m_columnCount = 0;
|
Chris@272
|
493 m_resampledCount = 0;
|
Chris@40
|
494 m_startTime = RealTime::zeroTime;
|
Chris@31
|
495 }
|
Chris@31
|
496
|
Chris@31
|
497 Silvet::FeatureSet
|
Chris@31
|
498 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
Chris@31
|
499 {
|
Chris@302
|
500 FeatureSet fs;
|
Chris@302
|
501
|
Chris@40
|
502 if (m_columnCount == 0) {
|
Chris@40
|
503 m_startTime = timestamp;
|
Chris@302
|
504 insertTemplateFeatures(fs);
|
Chris@40
|
505 }
|
Chris@246
|
506
|
Chris@246
|
507 vector<float> flattened(m_blockSize);
|
Chris@246
|
508 float gain = 1.f;
|
Chris@246
|
509 m_flattener->connectInputPort
|
Chris@246
|
510 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
|
Chris@246
|
511 m_flattener->connectOutputPort
|
Chris@246
|
512 (FlattenDynamics::AudioOutputPort, &flattened[0]);
|
Chris@246
|
513 m_flattener->connectOutputPort
|
Chris@246
|
514 (FlattenDynamics::GainOutputPort, &gain);
|
Chris@246
|
515 m_flattener->process(m_blockSize);
|
Chris@246
|
516
|
Chris@252
|
517 m_inputGains[timestamp] = gain;
|
Chris@40
|
518
|
Chris@31
|
519 vector<double> data;
|
Chris@40
|
520 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@246
|
521 double d = flattened[i];
|
Chris@235
|
522 data.push_back(d);
|
Chris@40
|
523 }
|
Chris@31
|
524
|
Chris@31
|
525 if (m_resampler) {
|
Chris@272
|
526
|
Chris@31
|
527 data = m_resampler->process(data.data(), data.size());
|
Chris@272
|
528
|
Chris@272
|
529 int hadCount = m_resampledCount;
|
Chris@272
|
530 m_resampledCount += data.size();
|
Chris@272
|
531
|
Chris@272
|
532 int resamplerLatency = m_resampler->getLatency();
|
Chris@272
|
533
|
Chris@272
|
534 if (hadCount < resamplerLatency) {
|
Chris@272
|
535 int stillToDrop = resamplerLatency - hadCount;
|
Chris@272
|
536 if (stillToDrop >= int(data.size())) {
|
Chris@302
|
537 return fs;
|
Chris@272
|
538 } else {
|
Chris@272
|
539 data = vector<double>(data.begin() + stillToDrop, data.end());
|
Chris@272
|
540 }
|
Chris@272
|
541 }
|
Chris@31
|
542 }
|
Chris@272
|
543
|
Chris@32
|
544 Grid cqout = m_cq->process(data);
|
Chris@302
|
545 transcribe(cqout, fs);
|
Chris@51
|
546 return fs;
|
Chris@34
|
547 }
|
Chris@34
|
548
|
Chris@34
|
549 Silvet::FeatureSet
|
Chris@34
|
550 Silvet::getRemainingFeatures()
|
Chris@34
|
551 {
|
Chris@145
|
552 Grid cqout = m_cq->getRemainingOutput();
|
Chris@302
|
553 FeatureSet fs;
|
Chris@302
|
554 if (m_columnCount == 0) {
|
Chris@302
|
555 // process() was never called, but we still want these
|
Chris@302
|
556 insertTemplateFeatures(fs);
|
Chris@302
|
557 } else {
|
Chris@302
|
558 transcribe(cqout, fs);
|
Chris@302
|
559 }
|
Chris@51
|
560 return fs;
|
Chris@34
|
561 }
|
Chris@34
|
562
|
Chris@302
|
563 void
|
Chris@302
|
564 Silvet::insertTemplateFeatures(FeatureSet &fs)
|
Chris@302
|
565 {
|
Chris@302
|
566 const InstrumentPack &pack = getPack(m_instrument);
|
Chris@302
|
567 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
|
Chris@302
|
568 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
|
Chris@302
|
569 Feature f;
|
Chris@302
|
570 char buffer[50];
|
Chris@302
|
571 sprintf(buffer, "Note %d", i + 1);
|
Chris@302
|
572 f.label = buffer;
|
Chris@302
|
573 f.hasTimestamp = true;
|
Chris@302
|
574 f.timestamp = timestamp;
|
Chris@302
|
575 f.values = pack.templates[i / pack.templateNoteCount]
|
Chris@302
|
576 .data[i % pack.templateNoteCount];
|
Chris@302
|
577 fs[m_templateOutputNo].push_back(f);
|
Chris@302
|
578 }
|
Chris@302
|
579 }
|
Chris@302
|
580
|
Chris@302
|
581 void
|
Chris@302
|
582 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
|
Chris@34
|
583 {
|
Chris@32
|
584 Grid filtered = preProcess(cqout);
|
Chris@31
|
585
|
Chris@302
|
586 if (filtered.empty()) return;
|
Chris@170
|
587
|
Chris@298
|
588 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@104
|
589
|
Chris@178
|
590 for (int i = 0; i < (int)filtered.size(); ++i) {
|
Chris@178
|
591 Feature f;
|
Chris@178
|
592 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@178
|
593 f.values.push_back(float(filtered[i][j]));
|
Chris@178
|
594 }
|
Chris@178
|
595 fs[m_fcqOutputNo].push_back(f);
|
Chris@178
|
596 }
|
Chris@178
|
597
|
Chris@34
|
598 int width = filtered.size();
|
Chris@34
|
599
|
Chris@297
|
600 int iterations = (m_mode == HighQualityMode ? 20 : 10);
|
Chris@34
|
601
|
Chris@176
|
602 Grid localPitches(width, vector<double>(pack.templateNoteCount, 0.0));
|
Chris@170
|
603
|
Chris@297
|
604 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
|
Chris@170
|
605 int shiftCount = 1;
|
Chris@170
|
606 if (wantShifts) {
|
Chris@170
|
607 shiftCount = pack.templateMaxShift * 2 + 1;
|
Chris@170
|
608 }
|
Chris@170
|
609
|
Chris@170
|
610 vector<vector<int> > localBestShifts;
|
Chris@170
|
611 if (wantShifts) {
|
Chris@170
|
612 localBestShifts =
|
Chris@176
|
613 vector<vector<int> >(width, vector<int>(pack.templateNoteCount, 0));
|
Chris@170
|
614 }
|
Chris@170
|
615
|
Chris@305
|
616 double columnThreshold = 1e-5;
|
Chris@305
|
617
|
Chris@123
|
618 #pragma omp parallel for
|
Chris@123
|
619 for (int i = 0; i < width; ++i) {
|
Chris@104
|
620
|
Chris@170
|
621 double sum = 0.0;
|
Chris@176
|
622 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@170
|
623 sum += filtered.at(i).at(j);
|
Chris@170
|
624 }
|
Chris@305
|
625 if (sum < columnThreshold) continue;
|
Chris@170
|
626
|
Chris@297
|
627 EM em(&pack, m_mode == HighQualityMode);
|
Chris@170
|
628
|
Chris@183
|
629 em.setPitchSparsity(pack.pitchSparsity);
|
Chris@213
|
630 em.setSourceSparsity(pack.sourceSparsity);
|
Chris@183
|
631
|
Chris@170
|
632 for (int j = 0; j < iterations; ++j) {
|
Chris@170
|
633 em.iterate(filtered.at(i).data());
|
Chris@37
|
634 }
|
Chris@37
|
635
|
Chris@170
|
636 const float *pitchDist = em.getPitchDistribution();
|
Chris@170
|
637 const float *const *shiftDist = em.getShifts();
|
Chris@37
|
638
|
Chris@176
|
639 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@104
|
640
|
Chris@170
|
641 localPitches[i][j] = pitchDist[j] * sum;
|
Chris@170
|
642
|
Chris@170
|
643 int bestShift = 0;
|
Chris@179
|
644 float bestShiftValue = 0.0;
|
Chris@170
|
645 if (wantShifts) {
|
Chris@170
|
646 for (int k = 0; k < shiftCount; ++k) {
|
Chris@179
|
647 float value = shiftDist[k][j];
|
Chris@179
|
648 if (k == 0 || value > bestShiftValue) {
|
Chris@179
|
649 bestShiftValue = value;
|
Chris@170
|
650 bestShift = k;
|
Chris@170
|
651 }
|
Chris@170
|
652 }
|
Chris@170
|
653 localBestShifts[i][j] = bestShift;
|
Chris@170
|
654 }
|
Chris@123
|
655 }
|
Chris@123
|
656 }
|
Chris@166
|
657
|
Chris@166
|
658 for (int i = 0; i < width; ++i) {
|
Chris@37
|
659
|
Chris@294
|
660 vector<double> filtered = postProcess
|
Chris@294
|
661 (localPitches[i], localBestShifts[i], wantShifts);
|
Chris@294
|
662
|
Chris@294
|
663 Feature f;
|
Chris@294
|
664 for (int j = 0; j < (int)filtered.size(); ++j) {
|
Chris@294
|
665 float v(filtered[j]);
|
Chris@294
|
666 if (v < pack.levelThreshold) v = 0.f;
|
Chris@294
|
667 f.values.push_back(v);
|
Chris@294
|
668 }
|
Chris@294
|
669 fs[m_pitchOutputNo].push_back(f);
|
Chris@166
|
670
|
Chris@168
|
671 FeatureList noteFeatures = noteTrack(shiftCount);
|
Chris@38
|
672
|
Chris@123
|
673 for (FeatureList::const_iterator fi = noteFeatures.begin();
|
Chris@123
|
674 fi != noteFeatures.end(); ++fi) {
|
Chris@123
|
675 fs[m_notesOutputNo].push_back(*fi);
|
Chris@40
|
676 }
|
Chris@34
|
677 }
|
Chris@31
|
678 }
|
Chris@31
|
679
|
Chris@32
|
680 Silvet::Grid
|
Chris@32
|
681 Silvet::preProcess(const Grid &in)
|
Chris@32
|
682 {
|
Chris@32
|
683 int width = in.size();
|
Chris@32
|
684
|
Chris@165
|
685 int spacing = processingSampleRate / m_colsPerSec;
|
Chris@32
|
686
|
Chris@165
|
687 // need to be careful that col spacing is an integer number of samples!
|
Chris@165
|
688 assert(spacing * m_colsPerSec == processingSampleRate);
|
Chris@32
|
689
|
Chris@32
|
690 Grid out;
|
Chris@32
|
691
|
Chris@58
|
692 // We count the CQ latency in terms of processing hops, but
|
Chris@58
|
693 // actually it probably isn't an exact number of hops so this
|
Chris@58
|
694 // isn't quite accurate. But the small constant offset is
|
Chris@165
|
695 // practically irrelevant compared to the jitter from the frame
|
Chris@165
|
696 // size we reduce to in a moment
|
Chris@33
|
697 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
|
Chris@33
|
698
|
Chris@298
|
699 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@176
|
700
|
Chris@32
|
701 for (int i = 0; i < width; ++i) {
|
Chris@32
|
702
|
Chris@33
|
703 if (m_columnCount < latentColumns) {
|
Chris@33
|
704 ++m_columnCount;
|
Chris@33
|
705 continue;
|
Chris@33
|
706 }
|
Chris@33
|
707
|
Chris@32
|
708 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
|
Chris@32
|
709 int sampleNo = m_columnCount * m_cq->getColumnHop();
|
Chris@32
|
710
|
Chris@32
|
711 bool select = (sampleNo / spacing != prevSampleNo / spacing);
|
Chris@32
|
712
|
Chris@32
|
713 if (select) {
|
Chris@32
|
714 vector<double> inCol = in[i];
|
Chris@176
|
715 vector<double> outCol(pack.templateHeight);
|
Chris@32
|
716
|
Chris@178
|
717 // In HQ mode, the CQ returns 600 bins and we ignore the
|
Chris@298
|
718 // lowest 55 of them (assuming binsPerSemitone == 5).
|
Chris@178
|
719 //
|
Chris@297
|
720 // In draft and live mode the CQ is an octave shorter,
|
Chris@300
|
721 // returning 540 bins or equivalent, so we instead pad
|
Chris@300
|
722 // them with an additional 5 or equivalent zeros.
|
Chris@178
|
723 //
|
Chris@178
|
724 // We also need to reverse the column as we go, since the
|
Chris@178
|
725 // raw CQ has the high frequencies first and we need it
|
Chris@178
|
726 // the other way around.
|
Chris@32
|
727
|
Chris@298
|
728 int bps = (m_mode == LiveMode ?
|
Chris@298
|
729 binsPerSemitoneLive : binsPerSemitoneNormal);
|
Chris@298
|
730
|
Chris@297
|
731 if (m_mode == HighQualityMode) {
|
Chris@178
|
732 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@298
|
733 int ix = inCol.size() - j - (11 * bps);
|
Chris@178
|
734 outCol[j] = inCol[ix];
|
Chris@178
|
735 }
|
Chris@178
|
736 } else {
|
Chris@298
|
737 for (int j = 0; j < bps; ++j) {
|
Chris@178
|
738 outCol[j] = 0.0;
|
Chris@178
|
739 }
|
Chris@298
|
740 for (int j = bps; j < pack.templateHeight; ++j) {
|
Chris@298
|
741 int ix = inCol.size() - j + (bps-1);
|
Chris@178
|
742 outCol[j] = inCol[ix];
|
Chris@178
|
743 }
|
Chris@46
|
744 }
|
Chris@32
|
745
|
Chris@46
|
746 vector<double> noiseLevel1 =
|
Chris@298
|
747 MedianFilter<double>::filter(8 * bps, outCol);
|
Chris@176
|
748 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
749 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
|
Chris@46
|
750 }
|
Chris@32
|
751
|
Chris@46
|
752 vector<double> noiseLevel2 =
|
Chris@298
|
753 MedianFilter<double>::filter(8 * bps, noiseLevel1);
|
Chris@176
|
754 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
755 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
|
Chris@32
|
756 }
|
Chris@32
|
757
|
Chris@165
|
758 out.push_back(outCol);
|
Chris@32
|
759 }
|
Chris@32
|
760
|
Chris@32
|
761 ++m_columnCount;
|
Chris@32
|
762 }
|
Chris@32
|
763
|
Chris@32
|
764 return out;
|
Chris@32
|
765 }
|
Chris@32
|
766
|
Chris@294
|
767 vector<double>
|
Chris@170
|
768 Silvet::postProcess(const vector<double> &pitches,
|
Chris@170
|
769 const vector<int> &bestShifts,
|
Chris@170
|
770 bool wantShifts)
|
Chris@166
|
771 {
|
Chris@298
|
772 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@176
|
773
|
Chris@41
|
774 vector<double> filtered;
|
Chris@41
|
775
|
Chris@176
|
776 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@170
|
777 m_postFilter[j]->push(pitches[j]);
|
Chris@41
|
778 filtered.push_back(m_postFilter[j]->get());
|
Chris@41
|
779 }
|
Chris@41
|
780
|
Chris@41
|
781 // Threshold for level and reduce number of candidate pitches
|
Chris@41
|
782
|
Chris@41
|
783 typedef std::multimap<double, int> ValueIndexMap;
|
Chris@41
|
784
|
Chris@41
|
785 ValueIndexMap strengths;
|
Chris@166
|
786
|
Chris@176
|
787 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@166
|
788 double strength = filtered[j];
|
Chris@183
|
789 if (strength < pack.levelThreshold) continue;
|
Chris@168
|
790 strengths.insert(ValueIndexMap::value_type(strength, j));
|
Chris@168
|
791 }
|
Chris@166
|
792
|
Chris@168
|
793 ValueIndexMap::const_iterator si = strengths.end();
|
Chris@167
|
794
|
Chris@168
|
795 map<int, double> active;
|
Chris@168
|
796 map<int, int> activeShifts;
|
Chris@168
|
797
|
Chris@183
|
798 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
|
Chris@168
|
799
|
Chris@168
|
800 --si;
|
Chris@168
|
801
|
Chris@168
|
802 double strength = si->first;
|
Chris@168
|
803 int j = si->second;
|
Chris@168
|
804
|
Chris@168
|
805 active[j] = strength;
|
Chris@168
|
806
|
Chris@170
|
807 if (wantShifts) {
|
Chris@170
|
808 activeShifts[j] = bestShifts[j];
|
Chris@167
|
809 }
|
Chris@41
|
810 }
|
Chris@41
|
811
|
Chris@168
|
812 m_pianoRoll.push_back(active);
|
Chris@170
|
813
|
Chris@170
|
814 if (wantShifts) {
|
Chris@168
|
815 m_pianoRollShifts.push_back(activeShifts);
|
Chris@41
|
816 }
|
Chris@294
|
817
|
Chris@294
|
818 return filtered;
|
Chris@166
|
819 }
|
Chris@166
|
820
|
Chris@166
|
821 Vamp::Plugin::FeatureList
|
Chris@168
|
822 Silvet::noteTrack(int shiftCount)
|
Chris@166
|
823 {
|
Chris@41
|
824 // Minimum duration pruning, and conversion to notes. We can only
|
Chris@41
|
825 // report notes that have just ended (i.e. that are absent in the
|
Chris@168
|
826 // latest active set but present in the prior set in the piano
|
Chris@41
|
827 // roll) -- any notes that ended earlier will have been reported
|
Chris@41
|
828 // already, and if they haven't ended, we don't know their
|
Chris@41
|
829 // duration.
|
Chris@41
|
830
|
Chris@168
|
831 int width = m_pianoRoll.size() - 1;
|
Chris@168
|
832
|
Chris@168
|
833 const map<int, double> &active = m_pianoRoll[width];
|
Chris@41
|
834
|
Chris@165
|
835 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@165
|
836
|
Chris@165
|
837 // only keep notes >= 100ms or thereabouts
|
Chris@165
|
838 int durationThreshold = floor(0.1 / columnDuration); // columns
|
Chris@165
|
839 if (durationThreshold < 1) durationThreshold = 1;
|
Chris@41
|
840
|
Chris@41
|
841 FeatureList noteFeatures;
|
Chris@41
|
842
|
Chris@41
|
843 if (width < durationThreshold + 1) {
|
Chris@41
|
844 return noteFeatures;
|
Chris@41
|
845 }
|
Chris@41
|
846
|
Chris@150
|
847 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
|
Chris@150
|
848
|
Chris@55
|
849 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
|
Chris@41
|
850 ni != m_pianoRoll[width-1].end(); ++ni) {
|
Chris@41
|
851
|
Chris@55
|
852 int note = ni->first;
|
Chris@41
|
853
|
Chris@41
|
854 if (active.find(note) != active.end()) {
|
Chris@41
|
855 // the note is still playing
|
Chris@41
|
856 continue;
|
Chris@41
|
857 }
|
Chris@41
|
858
|
Chris@41
|
859 // the note was playing but just ended
|
Chris@41
|
860 int end = width;
|
Chris@41
|
861 int start = end-1;
|
Chris@41
|
862
|
Chris@41
|
863 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
|
Chris@41
|
864 --start;
|
Chris@41
|
865 }
|
Chris@41
|
866 ++start;
|
Chris@41
|
867
|
Chris@169
|
868 if ((end - start) < durationThreshold) {
|
Chris@41
|
869 continue;
|
Chris@41
|
870 }
|
Chris@41
|
871
|
Chris@169
|
872 emitNote(start, end, note, shiftCount, noteFeatures);
|
Chris@41
|
873 }
|
Chris@41
|
874
|
Chris@62
|
875 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
|
Chris@41
|
876
|
Chris@41
|
877 return noteFeatures;
|
Chris@41
|
878 }
|
Chris@41
|
879
|
Chris@169
|
880 void
|
Chris@169
|
881 Silvet::emitNote(int start, int end, int note, int shiftCount,
|
Chris@169
|
882 FeatureList ¬eFeatures)
|
Chris@169
|
883 {
|
Chris@169
|
884 int partStart = start;
|
Chris@169
|
885 int partShift = 0;
|
Chris@169
|
886 int partVelocity = 0;
|
Chris@169
|
887
|
Chris@252
|
888 int partThreshold = floor(0.05 * m_colsPerSec);
|
Chris@169
|
889
|
Chris@169
|
890 for (int i = start; i != end; ++i) {
|
Chris@169
|
891
|
Chris@169
|
892 double strength = m_pianoRoll[i][note];
|
Chris@169
|
893
|
Chris@169
|
894 int shift = 0;
|
Chris@169
|
895
|
Chris@169
|
896 if (shiftCount > 1) {
|
Chris@169
|
897
|
Chris@169
|
898 shift = m_pianoRollShifts[i][note];
|
Chris@169
|
899
|
Chris@169
|
900 if (i == partStart) {
|
Chris@169
|
901 partShift = shift;
|
Chris@169
|
902 }
|
Chris@169
|
903
|
Chris@169
|
904 if (i > partStart + partThreshold && shift != partShift) {
|
Chris@169
|
905
|
Chris@169
|
906 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
|
Chris@169
|
907
|
Chris@169
|
908 // pitch has changed, emit an intermediate note
|
Chris@252
|
909 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
910 i,
|
Chris@252
|
911 note,
|
Chris@252
|
912 partShift,
|
Chris@252
|
913 shiftCount,
|
Chris@252
|
914 partVelocity));
|
Chris@169
|
915 partStart = i;
|
Chris@169
|
916 partShift = shift;
|
Chris@169
|
917 partVelocity = 0;
|
Chris@169
|
918 }
|
Chris@169
|
919 }
|
Chris@169
|
920
|
Chris@303
|
921 int v;
|
Chris@303
|
922 if (m_mode == LiveMode) {
|
Chris@303
|
923 v = round(strength * 30);
|
Chris@303
|
924 } else {
|
Chris@303
|
925 v = round(strength * 2);
|
Chris@303
|
926 }
|
Chris@169
|
927 if (v > partVelocity) {
|
Chris@169
|
928 partVelocity = v;
|
Chris@169
|
929 }
|
Chris@169
|
930 }
|
Chris@169
|
931
|
Chris@169
|
932 if (end >= partStart + partThreshold) {
|
Chris@252
|
933 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
934 end,
|
Chris@252
|
935 note,
|
Chris@252
|
936 partShift,
|
Chris@252
|
937 shiftCount,
|
Chris@252
|
938 partVelocity));
|
Chris@169
|
939 }
|
Chris@169
|
940 }
|
Chris@252
|
941
|
Chris@252
|
942 Silvet::Feature
|
Chris@252
|
943 Silvet::makeNoteFeature(int start,
|
Chris@252
|
944 int end,
|
Chris@252
|
945 int note,
|
Chris@252
|
946 int shift,
|
Chris@252
|
947 int shiftCount,
|
Chris@252
|
948 int velocity)
|
Chris@252
|
949 {
|
Chris@252
|
950 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@252
|
951 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
|
Chris@252
|
952
|
Chris@252
|
953 Feature f;
|
Chris@252
|
954
|
Chris@252
|
955 f.hasTimestamp = true;
|
Chris@285
|
956 f.timestamp = m_startTime + RealTime::fromSeconds
|
Chris@252
|
957 (columnDuration * (start - postFilterLatency) + 0.02);
|
Chris@252
|
958
|
Chris@252
|
959 f.hasDuration = true;
|
Chris@252
|
960 f.duration = RealTime::fromSeconds
|
Chris@252
|
961 (columnDuration * (end - start));
|
Chris@252
|
962
|
Chris@252
|
963 f.values.clear();
|
Chris@252
|
964
|
Chris@252
|
965 f.values.push_back
|
Chris@252
|
966 (noteFrequency(note, shift, shiftCount));
|
Chris@252
|
967
|
Chris@252
|
968 float inputGain = getInputGainAt(f.timestamp);
|
Chris@252
|
969 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
|
Chris@252
|
970 velocity = round(velocity / inputGain);
|
Chris@252
|
971 if (velocity > 127) velocity = 127;
|
Chris@252
|
972 if (velocity < 1) velocity = 1;
|
Chris@252
|
973 f.values.push_back(velocity);
|
Chris@252
|
974
|
Chris@252
|
975 f.label = noteName(note, shift, shiftCount);
|
Chris@252
|
976
|
Chris@252
|
977 return f;
|
Chris@252
|
978 }
|
Chris@252
|
979
|
Chris@252
|
980 float
|
Chris@252
|
981 Silvet::getInputGainAt(RealTime t)
|
Chris@252
|
982 {
|
Chris@252
|
983 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
|
Chris@252
|
984
|
Chris@252
|
985 if (i == m_inputGains.end()) {
|
Chris@252
|
986 if (i != m_inputGains.begin()) {
|
Chris@252
|
987 --i;
|
Chris@252
|
988 } else {
|
Chris@252
|
989 return 1.f; // no data
|
Chris@252
|
990 }
|
Chris@252
|
991 }
|
Chris@252
|
992
|
Chris@252
|
993 // cerr << "gain at time " << t << " = " << i->second << endl;
|
Chris@252
|
994
|
Chris@252
|
995 return i->second;
|
Chris@252
|
996 }
|
Chris@252
|
997
|