Chris@31
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@31
|
2
|
Chris@31
|
3 /*
|
Chris@31
|
4 Silvet
|
Chris@31
|
5
|
Chris@31
|
6 A Vamp plugin for note transcription.
|
Chris@31
|
7 Centre for Digital Music, Queen Mary University of London.
|
Chris@31
|
8
|
Chris@31
|
9 This program is free software; you can redistribute it and/or
|
Chris@31
|
10 modify it under the terms of the GNU General Public License as
|
Chris@31
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@31
|
12 License, or (at your option) any later version. See the file
|
Chris@31
|
13 COPYING included with this distribution for more information.
|
Chris@31
|
14 */
|
Chris@31
|
15
|
Chris@31
|
16 #include "Silvet.h"
|
Chris@34
|
17 #include "EM.h"
|
Chris@31
|
18
|
Chris@152
|
19 #include <cq/CQSpectrogram.h>
|
Chris@31
|
20
|
Chris@152
|
21 #include "MedianFilter.h"
|
Chris@152
|
22 #include "constant-q-cpp/src/dsp/Resampler.h"
|
Chris@246
|
23 #include "flattendynamics-ladspa.h"
|
Chris@298
|
24 #include "LiveInstruments.h"
|
Chris@31
|
25
|
Chris@31
|
26 #include <vector>
|
Chris@31
|
27
|
Chris@32
|
28 #include <cstdio>
|
Chris@32
|
29
|
Chris@31
|
30 using std::vector;
|
Chris@48
|
31 using std::cout;
|
Chris@31
|
32 using std::cerr;
|
Chris@31
|
33 using std::endl;
|
Chris@40
|
34 using Vamp::RealTime;
|
Chris@31
|
35
|
Chris@31
|
36 static int processingSampleRate = 44100;
|
Chris@298
|
37
|
Chris@298
|
38 static int binsPerSemitoneLive = 1;
|
Chris@298
|
39 static int binsPerSemitoneNormal = 5;
|
Chris@170
|
40
|
Chris@272
|
41 static int minInputSampleRate = 100;
|
Chris@272
|
42 static int maxInputSampleRate = 192000;
|
Chris@272
|
43
|
Chris@31
|
44 Silvet::Silvet(float inputSampleRate) :
|
Chris@31
|
45 Plugin(inputSampleRate),
|
Chris@161
|
46 m_instruments(InstrumentPack::listInstrumentPacks()),
|
Chris@298
|
47 m_liveInstruments(LiveAdapter::adaptAll(m_instruments)),
|
Chris@31
|
48 m_resampler(0),
|
Chris@246
|
49 m_flattener(0),
|
Chris@110
|
50 m_cq(0),
|
Chris@297
|
51 m_mode(HighQualityMode),
|
Chris@166
|
52 m_fineTuning(false),
|
Chris@178
|
53 m_instrument(0),
|
Chris@178
|
54 m_colsPerSec(50)
|
Chris@31
|
55 {
|
Chris@31
|
56 }
|
Chris@31
|
57
|
Chris@31
|
58 Silvet::~Silvet()
|
Chris@31
|
59 {
|
Chris@31
|
60 delete m_resampler;
|
Chris@246
|
61 delete m_flattener;
|
Chris@31
|
62 delete m_cq;
|
Chris@41
|
63 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
64 delete m_postFilter[i];
|
Chris@41
|
65 }
|
Chris@31
|
66 }
|
Chris@31
|
67
|
Chris@31
|
68 string
|
Chris@31
|
69 Silvet::getIdentifier() const
|
Chris@31
|
70 {
|
Chris@31
|
71 return "silvet";
|
Chris@31
|
72 }
|
Chris@31
|
73
|
Chris@31
|
74 string
|
Chris@31
|
75 Silvet::getName() const
|
Chris@31
|
76 {
|
Chris@31
|
77 return "Silvet Note Transcription";
|
Chris@31
|
78 }
|
Chris@31
|
79
|
Chris@31
|
80 string
|
Chris@31
|
81 Silvet::getDescription() const
|
Chris@31
|
82 {
|
Chris@191
|
83 return "Estimate the note onsets, pitches, and durations that make up a music recording.";
|
Chris@31
|
84 }
|
Chris@31
|
85
|
Chris@31
|
86 string
|
Chris@31
|
87 Silvet::getMaker() const
|
Chris@31
|
88 {
|
Chris@191
|
89 return "Queen Mary, University of London";
|
Chris@31
|
90 }
|
Chris@31
|
91
|
Chris@31
|
92 int
|
Chris@31
|
93 Silvet::getPluginVersion() const
|
Chris@31
|
94 {
|
Chris@304
|
95 return 3;
|
Chris@31
|
96 }
|
Chris@31
|
97
|
Chris@31
|
98 string
|
Chris@31
|
99 Silvet::getCopyright() const
|
Chris@31
|
100 {
|
Chris@191
|
101 return "Method by Emmanouil Benetos and Simon Dixon; plugin by Chris Cannam and Emmanouil Benetos. GPL licence.";
|
Chris@31
|
102 }
|
Chris@31
|
103
|
Chris@31
|
104 Silvet::InputDomain
|
Chris@31
|
105 Silvet::getInputDomain() const
|
Chris@31
|
106 {
|
Chris@31
|
107 return TimeDomain;
|
Chris@31
|
108 }
|
Chris@31
|
109
|
Chris@31
|
110 size_t
|
Chris@31
|
111 Silvet::getPreferredBlockSize() const
|
Chris@31
|
112 {
|
Chris@31
|
113 return 0;
|
Chris@31
|
114 }
|
Chris@31
|
115
|
Chris@31
|
116 size_t
|
Chris@31
|
117 Silvet::getPreferredStepSize() const
|
Chris@31
|
118 {
|
Chris@31
|
119 return 0;
|
Chris@31
|
120 }
|
Chris@31
|
121
|
Chris@31
|
122 size_t
|
Chris@31
|
123 Silvet::getMinChannelCount() const
|
Chris@31
|
124 {
|
Chris@31
|
125 return 1;
|
Chris@31
|
126 }
|
Chris@31
|
127
|
Chris@31
|
128 size_t
|
Chris@31
|
129 Silvet::getMaxChannelCount() const
|
Chris@31
|
130 {
|
Chris@31
|
131 return 1;
|
Chris@31
|
132 }
|
Chris@31
|
133
|
Chris@31
|
134 Silvet::ParameterList
|
Chris@31
|
135 Silvet::getParameterDescriptors() const
|
Chris@31
|
136 {
|
Chris@31
|
137 ParameterList list;
|
Chris@110
|
138
|
Chris@110
|
139 ParameterDescriptor desc;
|
Chris@110
|
140 desc.identifier = "mode";
|
Chris@110
|
141 desc.name = "Processing mode";
|
Chris@110
|
142 desc.unit = "";
|
Chris@297
|
143 desc.description = "Sets the tradeoff of processing speed against transcription quality. Draft mode is tuned in favour of overall speed; Live mode is tuned in favour of lower latency; while Intensive mode (the default) will almost always produce the best results.";
|
Chris@110
|
144 desc.minValue = 0;
|
Chris@297
|
145 desc.maxValue = 2;
|
Chris@113
|
146 desc.defaultValue = 1;
|
Chris@110
|
147 desc.isQuantized = true;
|
Chris@110
|
148 desc.quantizeStep = 1;
|
Chris@166
|
149 desc.valueNames.push_back("Draft (faster)");
|
Chris@165
|
150 desc.valueNames.push_back("Intensive (higher quality)");
|
Chris@297
|
151 desc.valueNames.push_back("Live (lower latency)");
|
Chris@161
|
152 list.push_back(desc);
|
Chris@161
|
153
|
Chris@176
|
154 desc.identifier = "instrument";
|
Chris@176
|
155 desc.name = "Instrument";
|
Chris@161
|
156 desc.unit = "";
|
Chris@271
|
157 desc.description = "The instrument or instruments known to be present in the recording. This affects the set of instrument templates used, as well as the expected level of polyphony in the output. Using a more limited set of instruments than the default will also make the plugin run faster.\nNote that this plugin cannot isolate instruments: you can't use this setting to request notes from only one instrument in a recording with several. Instead, use this as a hint to the plugin about which instruments are actually present.";
|
Chris@161
|
158 desc.minValue = 0;
|
Chris@162
|
159 desc.maxValue = m_instruments.size()-1;
|
Chris@162
|
160 desc.defaultValue = 0;
|
Chris@161
|
161 desc.isQuantized = true;
|
Chris@161
|
162 desc.quantizeStep = 1;
|
Chris@161
|
163 desc.valueNames.clear();
|
Chris@162
|
164 for (int i = 0; i < int(m_instruments.size()); ++i) {
|
Chris@162
|
165 desc.valueNames.push_back(m_instruments[i].name);
|
Chris@162
|
166 }
|
Chris@166
|
167 list.push_back(desc);
|
Chris@161
|
168
|
Chris@166
|
169 desc.identifier = "finetune";
|
Chris@166
|
170 desc.name = "Return fine pitch estimates";
|
Chris@166
|
171 desc.unit = "";
|
Chris@271
|
172 desc.description = "Return pitch estimates at finer than semitone resolution. This works only in Intensive mode. Notes that appear to drift in pitch will be split up into shorter notes with individually finer pitches.";
|
Chris@166
|
173 desc.minValue = 0;
|
Chris@166
|
174 desc.maxValue = 1;
|
Chris@166
|
175 desc.defaultValue = 0;
|
Chris@166
|
176 desc.isQuantized = true;
|
Chris@166
|
177 desc.quantizeStep = 1;
|
Chris@166
|
178 desc.valueNames.clear();
|
Chris@110
|
179 list.push_back(desc);
|
Chris@110
|
180
|
Chris@31
|
181 return list;
|
Chris@31
|
182 }
|
Chris@31
|
183
|
Chris@31
|
184 float
|
Chris@31
|
185 Silvet::getParameter(string identifier) const
|
Chris@31
|
186 {
|
Chris@110
|
187 if (identifier == "mode") {
|
Chris@297
|
188 return (float)(int)m_mode;
|
Chris@166
|
189 } else if (identifier == "finetune") {
|
Chris@166
|
190 return m_fineTuning ? 1.f : 0.f;
|
Chris@176
|
191 } else if (identifier == "instrument") {
|
Chris@162
|
192 return m_instrument;
|
Chris@110
|
193 }
|
Chris@31
|
194 return 0;
|
Chris@31
|
195 }
|
Chris@31
|
196
|
Chris@31
|
197 void
|
Chris@31
|
198 Silvet::setParameter(string identifier, float value)
|
Chris@31
|
199 {
|
Chris@110
|
200 if (identifier == "mode") {
|
Chris@297
|
201 m_mode = (ProcessingMode)(int)(value + 0.5);
|
Chris@166
|
202 } else if (identifier == "finetune") {
|
Chris@166
|
203 m_fineTuning = (value > 0.5);
|
Chris@176
|
204 } else if (identifier == "instrument") {
|
Chris@162
|
205 m_instrument = lrintf(value);
|
Chris@110
|
206 }
|
Chris@31
|
207 }
|
Chris@31
|
208
|
Chris@31
|
209 Silvet::ProgramList
|
Chris@31
|
210 Silvet::getPrograms() const
|
Chris@31
|
211 {
|
Chris@31
|
212 ProgramList list;
|
Chris@31
|
213 return list;
|
Chris@31
|
214 }
|
Chris@31
|
215
|
Chris@31
|
216 string
|
Chris@31
|
217 Silvet::getCurrentProgram() const
|
Chris@31
|
218 {
|
Chris@31
|
219 return "";
|
Chris@31
|
220 }
|
Chris@31
|
221
|
Chris@31
|
222 void
|
Chris@31
|
223 Silvet::selectProgram(string name)
|
Chris@31
|
224 {
|
Chris@31
|
225 }
|
Chris@31
|
226
|
Chris@31
|
227 Silvet::OutputList
|
Chris@31
|
228 Silvet::getOutputDescriptors() const
|
Chris@31
|
229 {
|
Chris@31
|
230 OutputList list;
|
Chris@31
|
231
|
Chris@31
|
232 OutputDescriptor d;
|
Chris@51
|
233 d.identifier = "notes";
|
Chris@51
|
234 d.name = "Note transcription";
|
Chris@329
|
235 d.description = "Overall note transcription. Each note has time, duration, estimated fundamental frequency, and a synthetic MIDI velocity (1-127) estimated from the strength of the pitch in the mixture.";
|
Chris@41
|
236 d.unit = "Hz";
|
Chris@31
|
237 d.hasFixedBinCount = true;
|
Chris@31
|
238 d.binCount = 2;
|
Chris@41
|
239 d.binNames.push_back("Frequency");
|
Chris@31
|
240 d.binNames.push_back("Velocity");
|
Chris@31
|
241 d.hasKnownExtents = false;
|
Chris@31
|
242 d.isQuantized = false;
|
Chris@31
|
243 d.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@246
|
244 d.sampleRate = processingSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
|
Chris@31
|
245 d.hasDuration = true;
|
Chris@32
|
246 m_notesOutputNo = list.size();
|
Chris@32
|
247 list.push_back(d);
|
Chris@32
|
248
|
Chris@178
|
249 d.identifier = "timefreq";
|
Chris@178
|
250 d.name = "Time-frequency distribution";
|
Chris@271
|
251 d.description = "Filtered constant-Q time-frequency distribution as used as input to the expectation-maximisation algorithm.";
|
Chris@178
|
252 d.unit = "";
|
Chris@178
|
253 d.hasFixedBinCount = true;
|
Chris@298
|
254 d.binCount = getPack(0).templateHeight;
|
Chris@178
|
255 d.binNames.clear();
|
Chris@178
|
256 if (m_cq) {
|
Chris@294
|
257 char name[50];
|
Chris@298
|
258 for (int i = 0; i < getPack(0).templateHeight; ++i) {
|
Chris@178
|
259 // We have a 600-bin (10 oct 60-bin CQ) of which the
|
Chris@178
|
260 // lowest-frequency 55 bins have been dropped, for a
|
Chris@178
|
261 // 545-bin template. The native CQ bins go high->low
|
Chris@178
|
262 // frequency though, so these are still the first 545 bins
|
Chris@178
|
263 // as reported by getBinFrequency, though in reverse order
|
Chris@178
|
264 float freq = m_cq->getBinFrequency
|
Chris@298
|
265 (getPack(0).templateHeight - i - 1);
|
Chris@178
|
266 sprintf(name, "%.1f Hz", freq);
|
Chris@178
|
267 d.binNames.push_back(name);
|
Chris@178
|
268 }
|
Chris@178
|
269 }
|
Chris@178
|
270 d.hasKnownExtents = false;
|
Chris@178
|
271 d.isQuantized = false;
|
Chris@178
|
272 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@178
|
273 d.sampleRate = m_colsPerSec;
|
Chris@178
|
274 d.hasDuration = false;
|
Chris@178
|
275 m_fcqOutputNo = list.size();
|
Chris@178
|
276 list.push_back(d);
|
Chris@178
|
277
|
Chris@294
|
278 d.identifier = "pitchactivation";
|
Chris@294
|
279 d.name = "Pitch activation distribution";
|
Chris@294
|
280 d.description = "Pitch activation distribution resulting from expectation-maximisation algorithm, prior to note extraction.";
|
Chris@294
|
281 d.unit = "";
|
Chris@294
|
282 d.hasFixedBinCount = true;
|
Chris@298
|
283 d.binCount = getPack(0).templateNoteCount;
|
Chris@294
|
284 d.binNames.clear();
|
Chris@294
|
285 if (m_cq) {
|
Chris@298
|
286 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
|
Chris@294
|
287 d.binNames.push_back(noteName(i, 0, 1));
|
Chris@294
|
288 }
|
Chris@294
|
289 }
|
Chris@294
|
290 d.hasKnownExtents = false;
|
Chris@294
|
291 d.isQuantized = false;
|
Chris@294
|
292 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@294
|
293 d.sampleRate = m_colsPerSec;
|
Chris@294
|
294 d.hasDuration = false;
|
Chris@294
|
295 m_pitchOutputNo = list.size();
|
Chris@294
|
296 list.push_back(d);
|
Chris@294
|
297
|
Chris@302
|
298 d.identifier = "templates";
|
Chris@302
|
299 d.name = "Templates";
|
Chris@302
|
300 d.description = "Constant-Q spectral templates for the selected instrument pack.";
|
Chris@302
|
301 d.unit = "";
|
Chris@302
|
302 d.hasFixedBinCount = true;
|
Chris@302
|
303 d.binCount = getPack(0).templateHeight;
|
Chris@302
|
304 d.binNames.clear();
|
Chris@302
|
305 if (m_cq) {
|
Chris@302
|
306 char name[50];
|
Chris@302
|
307 for (int i = 0; i < getPack(0).templateHeight; ++i) {
|
Chris@302
|
308 // We have a 600-bin (10 oct 60-bin CQ) of which the
|
Chris@302
|
309 // lowest-frequency 55 bins have been dropped, for a
|
Chris@302
|
310 // 545-bin template. The native CQ bins go high->low
|
Chris@302
|
311 // frequency though, so these are still the first 545 bins
|
Chris@302
|
312 // as reported by getBinFrequency, though in reverse order
|
Chris@302
|
313 float freq = m_cq->getBinFrequency
|
Chris@302
|
314 (getPack(0).templateHeight - i - 1);
|
Chris@302
|
315 sprintf(name, "%.1f Hz", freq);
|
Chris@302
|
316 d.binNames.push_back(name);
|
Chris@302
|
317 }
|
Chris@302
|
318 }
|
Chris@302
|
319 d.hasKnownExtents = false;
|
Chris@302
|
320 d.isQuantized = false;
|
Chris@302
|
321 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@302
|
322 d.sampleRate = m_colsPerSec;
|
Chris@302
|
323 d.hasDuration = false;
|
Chris@302
|
324 m_templateOutputNo = list.size();
|
Chris@302
|
325 list.push_back(d);
|
Chris@302
|
326
|
Chris@31
|
327 return list;
|
Chris@31
|
328 }
|
Chris@31
|
329
|
Chris@38
|
330 std::string
|
Chris@175
|
331 Silvet::noteName(int note, int shift, int shiftCount) const
|
Chris@38
|
332 {
|
Chris@38
|
333 static const char *names[] = {
|
Chris@38
|
334 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
|
Chris@38
|
335 };
|
Chris@38
|
336
|
Chris@175
|
337 const char *n = names[note % 12];
|
Chris@38
|
338
|
Chris@175
|
339 int oct = (note + 9) / 12;
|
Chris@38
|
340
|
Chris@175
|
341 char buf[30];
|
Chris@175
|
342
|
Chris@175
|
343 float pshift = 0.f;
|
Chris@175
|
344 if (shiftCount > 1) {
|
Chris@175
|
345 // see noteFrequency below
|
Chris@175
|
346 pshift =
|
Chris@175
|
347 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
348 }
|
Chris@175
|
349
|
Chris@175
|
350 if (pshift > 0.f) {
|
Chris@175
|
351 sprintf(buf, "%s%d+%dc", n, oct, int(round(pshift * 100)));
|
Chris@175
|
352 } else if (pshift < 0.f) {
|
Chris@175
|
353 sprintf(buf, "%s%d-%dc", n, oct, int(round((-pshift) * 100)));
|
Chris@175
|
354 } else {
|
Chris@175
|
355 sprintf(buf, "%s%d", n, oct);
|
Chris@175
|
356 }
|
Chris@38
|
357
|
Chris@38
|
358 return buf;
|
Chris@38
|
359 }
|
Chris@38
|
360
|
Chris@41
|
361 float
|
Chris@168
|
362 Silvet::noteFrequency(int note, int shift, int shiftCount) const
|
Chris@41
|
363 {
|
Chris@169
|
364 // Convert shift number to a pitch shift. The given shift number
|
Chris@169
|
365 // is an offset into the template array, which starts with some
|
Chris@169
|
366 // zeros, followed by the template, then some trailing zeros.
|
Chris@169
|
367 //
|
Chris@169
|
368 // Example: if we have templateMaxShift == 2 and thus shiftCount
|
Chris@169
|
369 // == 5, then the number will be in the range 0-4 and the template
|
Chris@169
|
370 // will have 2 zeros at either end. Thus number 2 represents the
|
Chris@169
|
371 // template "as recorded", for a pitch shift of 0; smaller indices
|
Chris@169
|
372 // represent moving the template *up* in pitch (by introducing
|
Chris@169
|
373 // zeros at the start, which is the low-frequency end), for a
|
Chris@169
|
374 // positive pitch shift; and higher values represent moving it
|
Chris@169
|
375 // down in pitch, for a negative pitch shift.
|
Chris@169
|
376
|
Chris@175
|
377 float pshift = 0.f;
|
Chris@175
|
378 if (shiftCount > 1) {
|
Chris@175
|
379 pshift =
|
Chris@175
|
380 float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount;
|
Chris@175
|
381 }
|
Chris@169
|
382
|
Chris@301
|
383 float freq = float(27.5 * pow(2.0, (note + pshift) / 12.0));
|
Chris@301
|
384
|
Chris@303
|
385 // cerr << "note = " << note << ", shift = " << shift << ", shiftCount = "
|
Chris@303
|
386 // << shiftCount << ", obtained freq = " << freq << endl;
|
Chris@301
|
387
|
Chris@301
|
388 return freq;
|
Chris@41
|
389 }
|
Chris@41
|
390
|
Chris@31
|
391 bool
|
Chris@31
|
392 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@31
|
393 {
|
Chris@272
|
394 if (m_inputSampleRate < minInputSampleRate ||
|
Chris@272
|
395 m_inputSampleRate > maxInputSampleRate) {
|
Chris@272
|
396 cerr << "Silvet::initialise: Unsupported input sample rate "
|
Chris@272
|
397 << m_inputSampleRate << " (supported min " << minInputSampleRate
|
Chris@272
|
398 << ", max " << maxInputSampleRate << ")" << endl;
|
Chris@272
|
399 return false;
|
Chris@272
|
400 }
|
Chris@272
|
401
|
Chris@31
|
402 if (channels < getMinChannelCount() ||
|
Chris@272
|
403 channels > getMaxChannelCount()) {
|
Chris@272
|
404 cerr << "Silvet::initialise: Unsupported channel count " << channels
|
Chris@272
|
405 << " (supported min " << getMinChannelCount() << ", max "
|
Chris@272
|
406 << getMaxChannelCount() << ")" << endl;
|
Chris@272
|
407 return false;
|
Chris@272
|
408 }
|
Chris@31
|
409
|
Chris@31
|
410 if (stepSize != blockSize) {
|
Chris@31
|
411 cerr << "Silvet::initialise: Step size must be the same as block size ("
|
Chris@31
|
412 << stepSize << " != " << blockSize << ")" << endl;
|
Chris@31
|
413 return false;
|
Chris@31
|
414 }
|
Chris@31
|
415
|
Chris@31
|
416 m_blockSize = blockSize;
|
Chris@31
|
417
|
Chris@31
|
418 reset();
|
Chris@31
|
419
|
Chris@31
|
420 return true;
|
Chris@31
|
421 }
|
Chris@31
|
422
|
Chris@31
|
423 void
|
Chris@31
|
424 Silvet::reset()
|
Chris@31
|
425 {
|
Chris@31
|
426 delete m_resampler;
|
Chris@246
|
427 delete m_flattener;
|
Chris@31
|
428 delete m_cq;
|
Chris@31
|
429
|
Chris@31
|
430 if (m_inputSampleRate != processingSampleRate) {
|
Chris@31
|
431 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
|
Chris@31
|
432 } else {
|
Chris@31
|
433 m_resampler = 0;
|
Chris@31
|
434 }
|
Chris@31
|
435
|
Chris@246
|
436 m_flattener = new FlattenDynamics(m_inputSampleRate); // before resampling
|
Chris@246
|
437 m_flattener->reset();
|
Chris@246
|
438
|
Chris@301
|
439 // this happens to be processingSampleRate / 3, and is the top
|
Chris@301
|
440 // freq used for the EM templates:
|
Chris@301
|
441 double maxFreq = 14700;
|
Chris@301
|
442
|
Chris@301
|
443 if (m_mode == LiveMode) {
|
Chris@301
|
444 // We only have 12 bpo rather than 60, so we need the top bin
|
Chris@301
|
445 // to be the middle one of the top 5, i.e. 2/5 of a semitone
|
Chris@301
|
446 // lower than 14700
|
Chris@301
|
447 maxFreq *= powf(2.0, -1.0 / 30.0);
|
Chris@301
|
448 }
|
Chris@301
|
449
|
Chris@173
|
450 double minFreq = 27.5;
|
Chris@173
|
451
|
Chris@297
|
452 if (m_mode != HighQualityMode) {
|
Chris@173
|
453 // We don't actually return any notes from the bottom octave,
|
Chris@173
|
454 // so we can just pad with zeros
|
Chris@173
|
455 minFreq *= 2;
|
Chris@173
|
456 }
|
Chris@173
|
457
|
Chris@298
|
458 int bpo = 12 *
|
Chris@298
|
459 (m_mode == LiveMode ? binsPerSemitoneLive : binsPerSemitoneNormal);
|
Chris@301
|
460
|
Chris@154
|
461 CQParameters params(processingSampleRate,
|
Chris@173
|
462 minFreq,
|
Chris@303
|
463 maxFreq,
|
Chris@298
|
464 bpo);
|
Chris@154
|
465
|
Chris@155
|
466 params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower
|
Chris@155
|
467 // drops the FFT size to 512 from 1024 and alters
|
Chris@155
|
468 // some other processing parameters, making
|
Chris@155
|
469 // everything much, much slower. Could be a flaw
|
Chris@155
|
470 // in the CQ parameter calculations, must check
|
Chris@154
|
471 params.atomHopFactor = 0.3;
|
Chris@154
|
472 params.threshold = 0.0005;
|
Chris@172
|
473 params.window = CQParameters::Hann;
|
Chris@154
|
474
|
Chris@154
|
475 m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear);
|
Chris@31
|
476
|
Chris@303
|
477 // cerr << "CQ bins = " << m_cq->getTotalBins() << endl;
|
Chris@303
|
478 // cerr << "CQ min freq = " << m_cq->getMinFrequency() << " (and for confirmation, freq of bin 0 = " << m_cq->getBinFrequency(0) << ")" << endl;
|
Chris@297
|
479
|
Chris@297
|
480 m_colsPerSec = (m_mode == DraftMode ? 25 : 50);
|
Chris@165
|
481
|
Chris@41
|
482 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
483 delete m_postFilter[i];
|
Chris@41
|
484 }
|
Chris@41
|
485 m_postFilter.clear();
|
Chris@303
|
486 int postFilterLength = 3;
|
Chris@298
|
487 for (int i = 0; i < getPack(0).templateNoteCount; ++i) {
|
Chris@303
|
488 m_postFilter.push_back(new MedianFilter<double>(postFilterLength));
|
Chris@41
|
489 }
|
Chris@41
|
490 m_pianoRoll.clear();
|
Chris@246
|
491 m_inputGains.clear();
|
Chris@32
|
492 m_columnCount = 0;
|
Chris@272
|
493 m_resampledCount = 0;
|
Chris@40
|
494 m_startTime = RealTime::zeroTime;
|
Chris@31
|
495 }
|
Chris@31
|
496
|
Chris@31
|
497 Silvet::FeatureSet
|
Chris@31
|
498 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
Chris@31
|
499 {
|
Chris@302
|
500 FeatureSet fs;
|
Chris@302
|
501
|
Chris@40
|
502 if (m_columnCount == 0) {
|
Chris@40
|
503 m_startTime = timestamp;
|
Chris@302
|
504 insertTemplateFeatures(fs);
|
Chris@40
|
505 }
|
Chris@246
|
506
|
Chris@246
|
507 vector<float> flattened(m_blockSize);
|
Chris@246
|
508 float gain = 1.f;
|
Chris@246
|
509 m_flattener->connectInputPort
|
Chris@246
|
510 (FlattenDynamics::AudioInputPort, inputBuffers[0]);
|
Chris@246
|
511 m_flattener->connectOutputPort
|
Chris@246
|
512 (FlattenDynamics::AudioOutputPort, &flattened[0]);
|
Chris@246
|
513 m_flattener->connectOutputPort
|
Chris@246
|
514 (FlattenDynamics::GainOutputPort, &gain);
|
Chris@246
|
515 m_flattener->process(m_blockSize);
|
Chris@246
|
516
|
Chris@252
|
517 m_inputGains[timestamp] = gain;
|
Chris@40
|
518
|
Chris@31
|
519 vector<double> data;
|
Chris@40
|
520 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@246
|
521 double d = flattened[i];
|
Chris@235
|
522 data.push_back(d);
|
Chris@40
|
523 }
|
Chris@31
|
524
|
Chris@31
|
525 if (m_resampler) {
|
Chris@272
|
526
|
Chris@31
|
527 data = m_resampler->process(data.data(), data.size());
|
Chris@272
|
528
|
Chris@272
|
529 int hadCount = m_resampledCount;
|
Chris@272
|
530 m_resampledCount += data.size();
|
Chris@272
|
531
|
Chris@272
|
532 int resamplerLatency = m_resampler->getLatency();
|
Chris@272
|
533
|
Chris@272
|
534 if (hadCount < resamplerLatency) {
|
Chris@272
|
535 int stillToDrop = resamplerLatency - hadCount;
|
Chris@272
|
536 if (stillToDrop >= int(data.size())) {
|
Chris@302
|
537 return fs;
|
Chris@272
|
538 } else {
|
Chris@272
|
539 data = vector<double>(data.begin() + stillToDrop, data.end());
|
Chris@272
|
540 }
|
Chris@272
|
541 }
|
Chris@31
|
542 }
|
Chris@272
|
543
|
Chris@32
|
544 Grid cqout = m_cq->process(data);
|
Chris@302
|
545 transcribe(cqout, fs);
|
Chris@51
|
546 return fs;
|
Chris@34
|
547 }
|
Chris@34
|
548
|
Chris@34
|
549 Silvet::FeatureSet
|
Chris@34
|
550 Silvet::getRemainingFeatures()
|
Chris@34
|
551 {
|
Chris@145
|
552 Grid cqout = m_cq->getRemainingOutput();
|
Chris@302
|
553 FeatureSet fs;
|
Chris@302
|
554 if (m_columnCount == 0) {
|
Chris@302
|
555 // process() was never called, but we still want these
|
Chris@302
|
556 insertTemplateFeatures(fs);
|
Chris@302
|
557 } else {
|
Chris@302
|
558 transcribe(cqout, fs);
|
Chris@302
|
559 }
|
Chris@51
|
560 return fs;
|
Chris@34
|
561 }
|
Chris@34
|
562
|
Chris@302
|
563 void
|
Chris@302
|
564 Silvet::insertTemplateFeatures(FeatureSet &fs)
|
Chris@302
|
565 {
|
Chris@302
|
566 const InstrumentPack &pack = getPack(m_instrument);
|
Chris@302
|
567 for (int i = 0; i < int(pack.templates.size()) * pack.templateNoteCount; ++i) {
|
Chris@302
|
568 RealTime timestamp = RealTime::fromSeconds(double(i) / m_colsPerSec);
|
Chris@302
|
569 Feature f;
|
Chris@302
|
570 char buffer[50];
|
Chris@302
|
571 sprintf(buffer, "Note %d", i + 1);
|
Chris@302
|
572 f.label = buffer;
|
Chris@302
|
573 f.hasTimestamp = true;
|
Chris@302
|
574 f.timestamp = timestamp;
|
Chris@302
|
575 f.values = pack.templates[i / pack.templateNoteCount]
|
Chris@302
|
576 .data[i % pack.templateNoteCount];
|
Chris@302
|
577 fs[m_templateOutputNo].push_back(f);
|
Chris@302
|
578 }
|
Chris@302
|
579 }
|
Chris@302
|
580
|
Chris@302
|
581 void
|
Chris@302
|
582 Silvet::transcribe(const Grid &cqout, Silvet::FeatureSet &fs)
|
Chris@34
|
583 {
|
Chris@32
|
584 Grid filtered = preProcess(cqout);
|
Chris@31
|
585
|
Chris@302
|
586 if (filtered.empty()) return;
|
Chris@170
|
587
|
Chris@298
|
588 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@104
|
589
|
Chris@178
|
590 for (int i = 0; i < (int)filtered.size(); ++i) {
|
Chris@178
|
591 Feature f;
|
Chris@178
|
592 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@178
|
593 f.values.push_back(float(filtered[i][j]));
|
Chris@178
|
594 }
|
Chris@178
|
595 fs[m_fcqOutputNo].push_back(f);
|
Chris@178
|
596 }
|
Chris@178
|
597
|
Chris@34
|
598 int width = filtered.size();
|
Chris@34
|
599
|
Chris@297
|
600 int iterations = (m_mode == HighQualityMode ? 20 : 10);
|
Chris@34
|
601
|
Chris@176
|
602 Grid localPitches(width, vector<double>(pack.templateNoteCount, 0.0));
|
Chris@170
|
603
|
Chris@297
|
604 bool wantShifts = (m_mode == HighQualityMode) && m_fineTuning;
|
Chris@170
|
605 int shiftCount = 1;
|
Chris@170
|
606 if (wantShifts) {
|
Chris@170
|
607 shiftCount = pack.templateMaxShift * 2 + 1;
|
Chris@170
|
608 }
|
Chris@170
|
609
|
Chris@170
|
610 vector<vector<int> > localBestShifts;
|
Chris@170
|
611 if (wantShifts) {
|
Chris@170
|
612 localBestShifts =
|
Chris@176
|
613 vector<vector<int> >(width, vector<int>(pack.templateNoteCount, 0));
|
Chris@170
|
614 }
|
Chris@170
|
615
|
Chris@305
|
616 double columnThreshold = 1e-5;
|
Chris@307
|
617
|
Chris@307
|
618 if (m_mode == LiveMode) {
|
Chris@307
|
619 columnThreshold /= 20;
|
Chris@307
|
620 }
|
Chris@305
|
621
|
Chris@123
|
622 #pragma omp parallel for
|
Chris@123
|
623 for (int i = 0; i < width; ++i) {
|
Chris@104
|
624
|
Chris@170
|
625 double sum = 0.0;
|
Chris@176
|
626 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@170
|
627 sum += filtered.at(i).at(j);
|
Chris@170
|
628 }
|
Chris@305
|
629 if (sum < columnThreshold) continue;
|
Chris@170
|
630
|
Chris@297
|
631 EM em(&pack, m_mode == HighQualityMode);
|
Chris@170
|
632
|
Chris@183
|
633 em.setPitchSparsity(pack.pitchSparsity);
|
Chris@213
|
634 em.setSourceSparsity(pack.sourceSparsity);
|
Chris@183
|
635
|
Chris@170
|
636 for (int j = 0; j < iterations; ++j) {
|
Chris@170
|
637 em.iterate(filtered.at(i).data());
|
Chris@37
|
638 }
|
Chris@37
|
639
|
Chris@170
|
640 const float *pitchDist = em.getPitchDistribution();
|
Chris@170
|
641 const float *const *shiftDist = em.getShifts();
|
Chris@37
|
642
|
Chris@176
|
643 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@104
|
644
|
Chris@170
|
645 localPitches[i][j] = pitchDist[j] * sum;
|
Chris@170
|
646
|
Chris@170
|
647 int bestShift = 0;
|
Chris@179
|
648 float bestShiftValue = 0.0;
|
Chris@170
|
649 if (wantShifts) {
|
Chris@170
|
650 for (int k = 0; k < shiftCount; ++k) {
|
Chris@179
|
651 float value = shiftDist[k][j];
|
Chris@179
|
652 if (k == 0 || value > bestShiftValue) {
|
Chris@179
|
653 bestShiftValue = value;
|
Chris@170
|
654 bestShift = k;
|
Chris@170
|
655 }
|
Chris@170
|
656 }
|
Chris@170
|
657 localBestShifts[i][j] = bestShift;
|
Chris@170
|
658 }
|
Chris@123
|
659 }
|
Chris@123
|
660 }
|
Chris@166
|
661
|
Chris@166
|
662 for (int i = 0; i < width; ++i) {
|
Chris@37
|
663
|
Chris@294
|
664 vector<double> filtered = postProcess
|
Chris@294
|
665 (localPitches[i], localBestShifts[i], wantShifts);
|
Chris@294
|
666
|
Chris@294
|
667 Feature f;
|
Chris@294
|
668 for (int j = 0; j < (int)filtered.size(); ++j) {
|
Chris@294
|
669 float v(filtered[j]);
|
Chris@294
|
670 if (v < pack.levelThreshold) v = 0.f;
|
Chris@294
|
671 f.values.push_back(v);
|
Chris@294
|
672 }
|
Chris@294
|
673 fs[m_pitchOutputNo].push_back(f);
|
Chris@166
|
674
|
Chris@168
|
675 FeatureList noteFeatures = noteTrack(shiftCount);
|
Chris@38
|
676
|
Chris@123
|
677 for (FeatureList::const_iterator fi = noteFeatures.begin();
|
Chris@123
|
678 fi != noteFeatures.end(); ++fi) {
|
Chris@123
|
679 fs[m_notesOutputNo].push_back(*fi);
|
Chris@40
|
680 }
|
Chris@34
|
681 }
|
Chris@31
|
682 }
|
Chris@31
|
683
|
Chris@32
|
684 Silvet::Grid
|
Chris@32
|
685 Silvet::preProcess(const Grid &in)
|
Chris@32
|
686 {
|
Chris@32
|
687 int width = in.size();
|
Chris@32
|
688
|
Chris@165
|
689 int spacing = processingSampleRate / m_colsPerSec;
|
Chris@32
|
690
|
Chris@165
|
691 // need to be careful that col spacing is an integer number of samples!
|
Chris@165
|
692 assert(spacing * m_colsPerSec == processingSampleRate);
|
Chris@32
|
693
|
Chris@32
|
694 Grid out;
|
Chris@32
|
695
|
Chris@58
|
696 // We count the CQ latency in terms of processing hops, but
|
Chris@58
|
697 // actually it probably isn't an exact number of hops so this
|
Chris@58
|
698 // isn't quite accurate. But the small constant offset is
|
Chris@165
|
699 // practically irrelevant compared to the jitter from the frame
|
Chris@165
|
700 // size we reduce to in a moment
|
Chris@33
|
701 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
|
Chris@33
|
702
|
Chris@298
|
703 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@176
|
704
|
Chris@32
|
705 for (int i = 0; i < width; ++i) {
|
Chris@32
|
706
|
Chris@33
|
707 if (m_columnCount < latentColumns) {
|
Chris@33
|
708 ++m_columnCount;
|
Chris@33
|
709 continue;
|
Chris@33
|
710 }
|
Chris@33
|
711
|
Chris@32
|
712 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
|
Chris@32
|
713 int sampleNo = m_columnCount * m_cq->getColumnHop();
|
Chris@32
|
714
|
Chris@32
|
715 bool select = (sampleNo / spacing != prevSampleNo / spacing);
|
Chris@32
|
716
|
Chris@32
|
717 if (select) {
|
Chris@32
|
718 vector<double> inCol = in[i];
|
Chris@176
|
719 vector<double> outCol(pack.templateHeight);
|
Chris@32
|
720
|
Chris@178
|
721 // In HQ mode, the CQ returns 600 bins and we ignore the
|
Chris@298
|
722 // lowest 55 of them (assuming binsPerSemitone == 5).
|
Chris@178
|
723 //
|
Chris@297
|
724 // In draft and live mode the CQ is an octave shorter,
|
Chris@300
|
725 // returning 540 bins or equivalent, so we instead pad
|
Chris@300
|
726 // them with an additional 5 or equivalent zeros.
|
Chris@178
|
727 //
|
Chris@178
|
728 // We also need to reverse the column as we go, since the
|
Chris@178
|
729 // raw CQ has the high frequencies first and we need it
|
Chris@178
|
730 // the other way around.
|
Chris@32
|
731
|
Chris@298
|
732 int bps = (m_mode == LiveMode ?
|
Chris@298
|
733 binsPerSemitoneLive : binsPerSemitoneNormal);
|
Chris@298
|
734
|
Chris@297
|
735 if (m_mode == HighQualityMode) {
|
Chris@178
|
736 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@298
|
737 int ix = inCol.size() - j - (11 * bps);
|
Chris@178
|
738 outCol[j] = inCol[ix];
|
Chris@178
|
739 }
|
Chris@178
|
740 } else {
|
Chris@298
|
741 for (int j = 0; j < bps; ++j) {
|
Chris@178
|
742 outCol[j] = 0.0;
|
Chris@178
|
743 }
|
Chris@298
|
744 for (int j = bps; j < pack.templateHeight; ++j) {
|
Chris@298
|
745 int ix = inCol.size() - j + (bps-1);
|
Chris@178
|
746 outCol[j] = inCol[ix];
|
Chris@178
|
747 }
|
Chris@46
|
748 }
|
Chris@32
|
749
|
Chris@46
|
750 vector<double> noiseLevel1 =
|
Chris@298
|
751 MedianFilter<double>::filter(8 * bps, outCol);
|
Chris@176
|
752 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
753 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
|
Chris@46
|
754 }
|
Chris@32
|
755
|
Chris@46
|
756 vector<double> noiseLevel2 =
|
Chris@298
|
757 MedianFilter<double>::filter(8 * bps, noiseLevel1);
|
Chris@176
|
758 for (int j = 0; j < pack.templateHeight; ++j) {
|
Chris@46
|
759 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
|
Chris@32
|
760 }
|
Chris@32
|
761
|
Chris@165
|
762 out.push_back(outCol);
|
Chris@32
|
763 }
|
Chris@32
|
764
|
Chris@32
|
765 ++m_columnCount;
|
Chris@32
|
766 }
|
Chris@32
|
767
|
Chris@32
|
768 return out;
|
Chris@32
|
769 }
|
Chris@32
|
770
|
Chris@294
|
771 vector<double>
|
Chris@170
|
772 Silvet::postProcess(const vector<double> &pitches,
|
Chris@170
|
773 const vector<int> &bestShifts,
|
Chris@170
|
774 bool wantShifts)
|
Chris@166
|
775 {
|
Chris@298
|
776 const InstrumentPack &pack(getPack(m_instrument));
|
Chris@176
|
777
|
Chris@41
|
778 vector<double> filtered;
|
Chris@41
|
779
|
Chris@176
|
780 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@170
|
781 m_postFilter[j]->push(pitches[j]);
|
Chris@41
|
782 filtered.push_back(m_postFilter[j]->get());
|
Chris@41
|
783 }
|
Chris@41
|
784
|
Chris@41
|
785 // Threshold for level and reduce number of candidate pitches
|
Chris@41
|
786
|
Chris@41
|
787 typedef std::multimap<double, int> ValueIndexMap;
|
Chris@41
|
788
|
Chris@41
|
789 ValueIndexMap strengths;
|
Chris@166
|
790
|
Chris@176
|
791 for (int j = 0; j < pack.templateNoteCount; ++j) {
|
Chris@166
|
792 double strength = filtered[j];
|
Chris@183
|
793 if (strength < pack.levelThreshold) continue;
|
Chris@168
|
794 strengths.insert(ValueIndexMap::value_type(strength, j));
|
Chris@168
|
795 }
|
Chris@166
|
796
|
Chris@168
|
797 ValueIndexMap::const_iterator si = strengths.end();
|
Chris@167
|
798
|
Chris@168
|
799 map<int, double> active;
|
Chris@168
|
800 map<int, int> activeShifts;
|
Chris@168
|
801
|
Chris@183
|
802 while (int(active.size()) < pack.maxPolyphony && si != strengths.begin()) {
|
Chris@168
|
803
|
Chris@168
|
804 --si;
|
Chris@168
|
805
|
Chris@168
|
806 double strength = si->first;
|
Chris@168
|
807 int j = si->second;
|
Chris@168
|
808
|
Chris@168
|
809 active[j] = strength;
|
Chris@168
|
810
|
Chris@170
|
811 if (wantShifts) {
|
Chris@170
|
812 activeShifts[j] = bestShifts[j];
|
Chris@167
|
813 }
|
Chris@41
|
814 }
|
Chris@41
|
815
|
Chris@168
|
816 m_pianoRoll.push_back(active);
|
Chris@170
|
817
|
Chris@170
|
818 if (wantShifts) {
|
Chris@168
|
819 m_pianoRollShifts.push_back(activeShifts);
|
Chris@41
|
820 }
|
Chris@294
|
821
|
Chris@294
|
822 return filtered;
|
Chris@166
|
823 }
|
Chris@166
|
824
|
Chris@166
|
825 Vamp::Plugin::FeatureList
|
Chris@168
|
826 Silvet::noteTrack(int shiftCount)
|
Chris@166
|
827 {
|
Chris@41
|
828 // Minimum duration pruning, and conversion to notes. We can only
|
Chris@41
|
829 // report notes that have just ended (i.e. that are absent in the
|
Chris@168
|
830 // latest active set but present in the prior set in the piano
|
Chris@41
|
831 // roll) -- any notes that ended earlier will have been reported
|
Chris@41
|
832 // already, and if they haven't ended, we don't know their
|
Chris@41
|
833 // duration.
|
Chris@41
|
834
|
Chris@168
|
835 int width = m_pianoRoll.size() - 1;
|
Chris@168
|
836
|
Chris@168
|
837 const map<int, double> &active = m_pianoRoll[width];
|
Chris@41
|
838
|
Chris@165
|
839 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@165
|
840
|
Chris@165
|
841 // only keep notes >= 100ms or thereabouts
|
Chris@165
|
842 int durationThreshold = floor(0.1 / columnDuration); // columns
|
Chris@165
|
843 if (durationThreshold < 1) durationThreshold = 1;
|
Chris@41
|
844
|
Chris@41
|
845 FeatureList noteFeatures;
|
Chris@41
|
846
|
Chris@41
|
847 if (width < durationThreshold + 1) {
|
Chris@41
|
848 return noteFeatures;
|
Chris@41
|
849 }
|
Chris@41
|
850
|
Chris@150
|
851 //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix)
|
Chris@150
|
852
|
Chris@55
|
853 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
|
Chris@41
|
854 ni != m_pianoRoll[width-1].end(); ++ni) {
|
Chris@41
|
855
|
Chris@55
|
856 int note = ni->first;
|
Chris@41
|
857
|
Chris@41
|
858 if (active.find(note) != active.end()) {
|
Chris@41
|
859 // the note is still playing
|
Chris@41
|
860 continue;
|
Chris@41
|
861 }
|
Chris@41
|
862
|
Chris@41
|
863 // the note was playing but just ended
|
Chris@41
|
864 int end = width;
|
Chris@41
|
865 int start = end-1;
|
Chris@41
|
866
|
Chris@41
|
867 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
|
Chris@41
|
868 --start;
|
Chris@41
|
869 }
|
Chris@41
|
870 ++start;
|
Chris@41
|
871
|
Chris@169
|
872 if ((end - start) < durationThreshold) {
|
Chris@41
|
873 continue;
|
Chris@41
|
874 }
|
Chris@41
|
875
|
Chris@169
|
876 emitNote(start, end, note, shiftCount, noteFeatures);
|
Chris@41
|
877 }
|
Chris@41
|
878
|
Chris@62
|
879 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
|
Chris@41
|
880
|
Chris@41
|
881 return noteFeatures;
|
Chris@41
|
882 }
|
Chris@41
|
883
|
Chris@169
|
884 void
|
Chris@169
|
885 Silvet::emitNote(int start, int end, int note, int shiftCount,
|
Chris@169
|
886 FeatureList ¬eFeatures)
|
Chris@169
|
887 {
|
Chris@169
|
888 int partStart = start;
|
Chris@169
|
889 int partShift = 0;
|
Chris@169
|
890 int partVelocity = 0;
|
Chris@169
|
891
|
Chris@252
|
892 int partThreshold = floor(0.05 * m_colsPerSec);
|
Chris@169
|
893
|
Chris@169
|
894 for (int i = start; i != end; ++i) {
|
Chris@169
|
895
|
Chris@169
|
896 double strength = m_pianoRoll[i][note];
|
Chris@169
|
897
|
Chris@169
|
898 int shift = 0;
|
Chris@169
|
899
|
Chris@169
|
900 if (shiftCount > 1) {
|
Chris@169
|
901
|
Chris@169
|
902 shift = m_pianoRollShifts[i][note];
|
Chris@169
|
903
|
Chris@169
|
904 if (i == partStart) {
|
Chris@169
|
905 partShift = shift;
|
Chris@169
|
906 }
|
Chris@169
|
907
|
Chris@169
|
908 if (i > partStart + partThreshold && shift != partShift) {
|
Chris@169
|
909
|
Chris@169
|
910 // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl;
|
Chris@169
|
911
|
Chris@169
|
912 // pitch has changed, emit an intermediate note
|
Chris@252
|
913 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
914 i,
|
Chris@252
|
915 note,
|
Chris@252
|
916 partShift,
|
Chris@252
|
917 shiftCount,
|
Chris@252
|
918 partVelocity));
|
Chris@169
|
919 partStart = i;
|
Chris@169
|
920 partShift = shift;
|
Chris@169
|
921 partVelocity = 0;
|
Chris@169
|
922 }
|
Chris@169
|
923 }
|
Chris@169
|
924
|
Chris@303
|
925 int v;
|
Chris@303
|
926 if (m_mode == LiveMode) {
|
Chris@303
|
927 v = round(strength * 30);
|
Chris@303
|
928 } else {
|
Chris@303
|
929 v = round(strength * 2);
|
Chris@303
|
930 }
|
Chris@169
|
931 if (v > partVelocity) {
|
Chris@169
|
932 partVelocity = v;
|
Chris@169
|
933 }
|
Chris@169
|
934 }
|
Chris@169
|
935
|
Chris@169
|
936 if (end >= partStart + partThreshold) {
|
Chris@252
|
937 noteFeatures.push_back(makeNoteFeature(partStart,
|
Chris@252
|
938 end,
|
Chris@252
|
939 note,
|
Chris@252
|
940 partShift,
|
Chris@252
|
941 shiftCount,
|
Chris@252
|
942 partVelocity));
|
Chris@169
|
943 }
|
Chris@169
|
944 }
|
Chris@252
|
945
|
Chris@252
|
946 Silvet::Feature
|
Chris@252
|
947 Silvet::makeNoteFeature(int start,
|
Chris@252
|
948 int end,
|
Chris@252
|
949 int note,
|
Chris@252
|
950 int shift,
|
Chris@252
|
951 int shiftCount,
|
Chris@252
|
952 int velocity)
|
Chris@252
|
953 {
|
Chris@252
|
954 double columnDuration = 1.0 / m_colsPerSec;
|
Chris@252
|
955 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
|
Chris@252
|
956
|
Chris@252
|
957 Feature f;
|
Chris@252
|
958
|
Chris@252
|
959 f.hasTimestamp = true;
|
Chris@285
|
960 f.timestamp = m_startTime + RealTime::fromSeconds
|
Chris@252
|
961 (columnDuration * (start - postFilterLatency) + 0.02);
|
Chris@252
|
962
|
Chris@252
|
963 f.hasDuration = true;
|
Chris@252
|
964 f.duration = RealTime::fromSeconds
|
Chris@252
|
965 (columnDuration * (end - start));
|
Chris@252
|
966
|
Chris@252
|
967 f.values.clear();
|
Chris@252
|
968
|
Chris@252
|
969 f.values.push_back
|
Chris@252
|
970 (noteFrequency(note, shift, shiftCount));
|
Chris@252
|
971
|
Chris@252
|
972 float inputGain = getInputGainAt(f.timestamp);
|
Chris@252
|
973 // cerr << "adjusting velocity from " << velocity << " to " << round(velocity/inputGain) << endl;
|
Chris@252
|
974 velocity = round(velocity / inputGain);
|
Chris@252
|
975 if (velocity > 127) velocity = 127;
|
Chris@252
|
976 if (velocity < 1) velocity = 1;
|
Chris@252
|
977 f.values.push_back(velocity);
|
Chris@252
|
978
|
Chris@252
|
979 f.label = noteName(note, shift, shiftCount);
|
Chris@252
|
980
|
Chris@252
|
981 return f;
|
Chris@252
|
982 }
|
Chris@252
|
983
|
Chris@252
|
984 float
|
Chris@252
|
985 Silvet::getInputGainAt(RealTime t)
|
Chris@252
|
986 {
|
Chris@252
|
987 map<RealTime, float>::const_iterator i = m_inputGains.lower_bound(t);
|
Chris@252
|
988
|
Chris@252
|
989 if (i == m_inputGains.end()) {
|
Chris@252
|
990 if (i != m_inputGains.begin()) {
|
Chris@252
|
991 --i;
|
Chris@252
|
992 } else {
|
Chris@252
|
993 return 1.f; // no data
|
Chris@252
|
994 }
|
Chris@252
|
995 }
|
Chris@252
|
996
|
Chris@252
|
997 // cerr << "gain at time " << t << " = " << i->second << endl;
|
Chris@252
|
998
|
Chris@252
|
999 return i->second;
|
Chris@252
|
1000 }
|
Chris@252
|
1001
|