Chris@31
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@31
|
2
|
Chris@31
|
3 /*
|
Chris@31
|
4 Silvet
|
Chris@31
|
5
|
Chris@31
|
6 A Vamp plugin for note transcription.
|
Chris@31
|
7 Centre for Digital Music, Queen Mary University of London.
|
Chris@31
|
8
|
Chris@31
|
9 This program is free software; you can redistribute it and/or
|
Chris@31
|
10 modify it under the terms of the GNU General Public License as
|
Chris@31
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@31
|
12 License, or (at your option) any later version. See the file
|
Chris@31
|
13 COPYING included with this distribution for more information.
|
Chris@31
|
14 */
|
Chris@31
|
15
|
Chris@31
|
16 #include "Silvet.h"
|
Chris@34
|
17 #include "EM.h"
|
Chris@31
|
18
|
Chris@32
|
19 #include "maths/MedianFilter.h"
|
Chris@55
|
20 #include "maths/MathUtilities.h"
|
Chris@31
|
21 #include "dsp/rateconversion/Resampler.h"
|
Chris@31
|
22
|
Chris@32
|
23 #include "constant-q-cpp/cpp-qm-dsp/CQInterpolated.h"
|
Chris@31
|
24
|
Chris@31
|
25 #include <vector>
|
Chris@31
|
26
|
Chris@32
|
27 #include <cstdio>
|
Chris@32
|
28
|
Chris@31
|
29 using std::vector;
|
Chris@48
|
30 using std::cout;
|
Chris@31
|
31 using std::cerr;
|
Chris@31
|
32 using std::endl;
|
Chris@40
|
33 using Vamp::RealTime;
|
Chris@31
|
34
|
Chris@31
|
35 static int processingSampleRate = 44100;
|
Chris@31
|
36 static int processingBPO = 60;
|
Chris@32
|
37 static int processingHeight = 545;
|
Chris@38
|
38 static int processingNotes = 88;
|
Chris@31
|
39
|
Chris@31
|
40 Silvet::Silvet(float inputSampleRate) :
|
Chris@31
|
41 Plugin(inputSampleRate),
|
Chris@31
|
42 m_resampler(0),
|
Chris@110
|
43 m_cq(0),
|
Chris@113
|
44 m_hqMode(true)
|
Chris@31
|
45 {
|
Chris@114
|
46 EM::initialise();
|
Chris@31
|
47 }
|
Chris@31
|
48
|
Chris@31
|
49 Silvet::~Silvet()
|
Chris@31
|
50 {
|
Chris@31
|
51 delete m_resampler;
|
Chris@31
|
52 delete m_cq;
|
Chris@41
|
53 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
54 delete m_postFilter[i];
|
Chris@41
|
55 }
|
Chris@31
|
56 }
|
Chris@31
|
57
|
Chris@31
|
58 string
|
Chris@31
|
59 Silvet::getIdentifier() const
|
Chris@31
|
60 {
|
Chris@31
|
61 return "silvet";
|
Chris@31
|
62 }
|
Chris@31
|
63
|
Chris@31
|
64 string
|
Chris@31
|
65 Silvet::getName() const
|
Chris@31
|
66 {
|
Chris@31
|
67 return "Silvet Note Transcription";
|
Chris@31
|
68 }
|
Chris@31
|
69
|
Chris@31
|
70 string
|
Chris@31
|
71 Silvet::getDescription() const
|
Chris@31
|
72 {
|
Chris@31
|
73 // Return something helpful here!
|
Chris@31
|
74 return "";
|
Chris@31
|
75 }
|
Chris@31
|
76
|
Chris@31
|
77 string
|
Chris@31
|
78 Silvet::getMaker() const
|
Chris@31
|
79 {
|
Chris@31
|
80 // Your name here
|
Chris@31
|
81 return "";
|
Chris@31
|
82 }
|
Chris@31
|
83
|
Chris@31
|
84 int
|
Chris@31
|
85 Silvet::getPluginVersion() const
|
Chris@31
|
86 {
|
Chris@31
|
87 return 1;
|
Chris@31
|
88 }
|
Chris@31
|
89
|
Chris@31
|
90 string
|
Chris@31
|
91 Silvet::getCopyright() const
|
Chris@31
|
92 {
|
Chris@31
|
93 // This function is not ideally named. It does not necessarily
|
Chris@31
|
94 // need to say who made the plugin -- getMaker does that -- but it
|
Chris@31
|
95 // should indicate the terms under which it is distributed. For
|
Chris@31
|
96 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@31
|
97 return "";
|
Chris@31
|
98 }
|
Chris@31
|
99
|
Chris@31
|
100 Silvet::InputDomain
|
Chris@31
|
101 Silvet::getInputDomain() const
|
Chris@31
|
102 {
|
Chris@31
|
103 return TimeDomain;
|
Chris@31
|
104 }
|
Chris@31
|
105
|
Chris@31
|
106 size_t
|
Chris@31
|
107 Silvet::getPreferredBlockSize() const
|
Chris@31
|
108 {
|
Chris@31
|
109 return 0;
|
Chris@31
|
110 }
|
Chris@31
|
111
|
Chris@31
|
112 size_t
|
Chris@31
|
113 Silvet::getPreferredStepSize() const
|
Chris@31
|
114 {
|
Chris@31
|
115 return 0;
|
Chris@31
|
116 }
|
Chris@31
|
117
|
Chris@31
|
118 size_t
|
Chris@31
|
119 Silvet::getMinChannelCount() const
|
Chris@31
|
120 {
|
Chris@31
|
121 return 1;
|
Chris@31
|
122 }
|
Chris@31
|
123
|
Chris@31
|
124 size_t
|
Chris@31
|
125 Silvet::getMaxChannelCount() const
|
Chris@31
|
126 {
|
Chris@31
|
127 return 1;
|
Chris@31
|
128 }
|
Chris@31
|
129
|
Chris@31
|
130 Silvet::ParameterList
|
Chris@31
|
131 Silvet::getParameterDescriptors() const
|
Chris@31
|
132 {
|
Chris@31
|
133 ParameterList list;
|
Chris@110
|
134
|
Chris@110
|
135 ParameterDescriptor desc;
|
Chris@110
|
136 desc.identifier = "mode";
|
Chris@110
|
137 desc.name = "Processing mode";
|
Chris@110
|
138 desc.unit = "";
|
Chris@110
|
139 desc.description = "Determines the tradeoff of processing speed against transcription quality";
|
Chris@110
|
140 desc.minValue = 0;
|
Chris@110
|
141 desc.maxValue = 1;
|
Chris@113
|
142 desc.defaultValue = 1;
|
Chris@110
|
143 desc.isQuantized = true;
|
Chris@110
|
144 desc.quantizeStep = 1;
|
Chris@110
|
145 desc.valueNames.push_back("Draft (faster)");
|
Chris@110
|
146 desc.valueNames.push_back("Intensive (higher quality)");
|
Chris@110
|
147 list.push_back(desc);
|
Chris@110
|
148
|
Chris@31
|
149 return list;
|
Chris@31
|
150 }
|
Chris@31
|
151
|
Chris@31
|
152 float
|
Chris@31
|
153 Silvet::getParameter(string identifier) const
|
Chris@31
|
154 {
|
Chris@110
|
155 if (identifier == "mode") {
|
Chris@110
|
156 return m_hqMode ? 1.f : 0.f;
|
Chris@110
|
157 }
|
Chris@31
|
158 return 0;
|
Chris@31
|
159 }
|
Chris@31
|
160
|
Chris@31
|
161 void
|
Chris@31
|
162 Silvet::setParameter(string identifier, float value)
|
Chris@31
|
163 {
|
Chris@110
|
164 if (identifier == "mode") {
|
Chris@110
|
165 m_hqMode = (value > 0.5);
|
Chris@110
|
166 }
|
Chris@31
|
167 }
|
Chris@31
|
168
|
Chris@31
|
169 Silvet::ProgramList
|
Chris@31
|
170 Silvet::getPrograms() const
|
Chris@31
|
171 {
|
Chris@31
|
172 ProgramList list;
|
Chris@31
|
173 return list;
|
Chris@31
|
174 }
|
Chris@31
|
175
|
Chris@31
|
176 string
|
Chris@31
|
177 Silvet::getCurrentProgram() const
|
Chris@31
|
178 {
|
Chris@31
|
179 return "";
|
Chris@31
|
180 }
|
Chris@31
|
181
|
Chris@31
|
182 void
|
Chris@31
|
183 Silvet::selectProgram(string name)
|
Chris@31
|
184 {
|
Chris@31
|
185 }
|
Chris@31
|
186
|
Chris@31
|
187 Silvet::OutputList
|
Chris@31
|
188 Silvet::getOutputDescriptors() const
|
Chris@31
|
189 {
|
Chris@31
|
190 OutputList list;
|
Chris@31
|
191
|
Chris@31
|
192 OutputDescriptor d;
|
Chris@51
|
193 d.identifier = "notes";
|
Chris@51
|
194 d.name = "Note transcription";
|
Chris@51
|
195 d.description = "Overall note transcription across all instruments";
|
Chris@41
|
196 d.unit = "Hz";
|
Chris@31
|
197 d.hasFixedBinCount = true;
|
Chris@31
|
198 d.binCount = 2;
|
Chris@41
|
199 d.binNames.push_back("Frequency");
|
Chris@31
|
200 d.binNames.push_back("Velocity");
|
Chris@31
|
201 d.hasKnownExtents = false;
|
Chris@31
|
202 d.isQuantized = false;
|
Chris@31
|
203 d.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@51
|
204 d.sampleRate = m_inputSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
|
Chris@31
|
205 d.hasDuration = true;
|
Chris@32
|
206 m_notesOutputNo = list.size();
|
Chris@32
|
207 list.push_back(d);
|
Chris@32
|
208
|
Chris@51
|
209 d.identifier = "cq";
|
Chris@51
|
210 d.name = "Raw constant-Q";
|
Chris@51
|
211 d.description = "Unfiltered constant-Q time-frequency distribution";
|
Chris@51
|
212 d.unit = "";
|
Chris@51
|
213 d.hasFixedBinCount = true;
|
Chris@51
|
214 d.binCount = processingHeight + 55;
|
Chris@51
|
215 d.binNames.clear();
|
Chris@51
|
216 if (m_cq) {
|
Chris@51
|
217 char name[20];
|
Chris@51
|
218 for (int i = 0; i < processingHeight + 55; ++i) {
|
Chris@51
|
219 float freq = m_cq->getBinFrequency(i);
|
Chris@51
|
220 sprintf(name, "%.1f Hz", freq);
|
Chris@51
|
221 d.binNames.push_back(name);
|
Chris@51
|
222 }
|
Chris@51
|
223 }
|
Chris@51
|
224 d.hasKnownExtents = false;
|
Chris@51
|
225 d.isQuantized = false;
|
Chris@51
|
226 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@51
|
227 d.sampleRate = m_inputSampleRate / (m_cq ? m_cq->getColumnHop() : 62);
|
Chris@51
|
228 d.hasDuration = false;
|
Chris@51
|
229 m_cqOutputNo = list.size();
|
Chris@51
|
230 list.push_back(d);
|
Chris@51
|
231
|
Chris@32
|
232 d.identifier = "inputgrid";
|
Chris@51
|
233 d.name = "Filtered constant-Q";
|
Chris@51
|
234 d.description = "Filtered constant-Q time-frequency distribution used as input to the PLCA step";
|
Chris@32
|
235 d.unit = "";
|
Chris@32
|
236 d.hasFixedBinCount = true;
|
Chris@32
|
237 d.binCount = processingHeight;
|
Chris@32
|
238 d.binNames.clear();
|
Chris@32
|
239 if (m_cq) {
|
Chris@32
|
240 char name[20];
|
Chris@32
|
241 for (int i = 0; i < processingHeight; ++i) {
|
Chris@32
|
242 float freq = m_cq->getBinFrequency(i + 55);
|
Chris@32
|
243 sprintf(name, "%.1f Hz", freq);
|
Chris@32
|
244 d.binNames.push_back(name);
|
Chris@32
|
245 }
|
Chris@32
|
246 }
|
Chris@32
|
247 d.hasKnownExtents = false;
|
Chris@32
|
248 d.isQuantized = false;
|
Chris@32
|
249 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@32
|
250 d.sampleRate = 25;
|
Chris@32
|
251 d.hasDuration = false;
|
Chris@51
|
252 m_fcqOutputNo = list.size();
|
Chris@31
|
253 list.push_back(d);
|
Chris@31
|
254
|
Chris@51
|
255 d.identifier = "pitches";
|
Chris@51
|
256 d.name = "Pitch activation";
|
Chris@51
|
257 d.description = "Estimated pitch activation matrix";
|
Chris@38
|
258 d.unit = "";
|
Chris@38
|
259 d.hasFixedBinCount = true;
|
Chris@55
|
260 d.binCount = processingNotes;
|
Chris@38
|
261 d.binNames.clear();
|
Chris@55
|
262 for (int i = 0; i < processingNotes; ++i) {
|
Chris@38
|
263 d.binNames.push_back(noteName(i));
|
Chris@38
|
264 }
|
Chris@38
|
265 d.hasKnownExtents = false;
|
Chris@38
|
266 d.isQuantized = false;
|
Chris@38
|
267 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@38
|
268 d.sampleRate = 25;
|
Chris@38
|
269 d.hasDuration = false;
|
Chris@38
|
270 m_pitchOutputNo = list.size();
|
Chris@38
|
271 list.push_back(d);
|
Chris@38
|
272
|
Chris@31
|
273 return list;
|
Chris@31
|
274 }
|
Chris@31
|
275
|
Chris@38
|
276 std::string
|
Chris@38
|
277 Silvet::noteName(int i) const
|
Chris@38
|
278 {
|
Chris@38
|
279 static const char *names[] = {
|
Chris@38
|
280 "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
|
Chris@38
|
281 };
|
Chris@38
|
282
|
Chris@38
|
283 const char *n = names[i % 12];
|
Chris@38
|
284
|
Chris@38
|
285 int oct = (i + 9) / 12;
|
Chris@38
|
286
|
Chris@38
|
287 char buf[20];
|
Chris@38
|
288 sprintf(buf, "%s%d", n, oct);
|
Chris@38
|
289
|
Chris@38
|
290 return buf;
|
Chris@38
|
291 }
|
Chris@38
|
292
|
Chris@41
|
293 float
|
Chris@41
|
294 Silvet::noteFrequency(int note) const
|
Chris@41
|
295 {
|
Chris@41
|
296 return float(27.5 * pow(2.0, note / 12.0));
|
Chris@41
|
297 }
|
Chris@41
|
298
|
Chris@31
|
299 bool
|
Chris@31
|
300 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@31
|
301 {
|
Chris@31
|
302 if (channels < getMinChannelCount() ||
|
Chris@31
|
303 channels > getMaxChannelCount()) return false;
|
Chris@31
|
304
|
Chris@31
|
305 if (stepSize != blockSize) {
|
Chris@31
|
306 cerr << "Silvet::initialise: Step size must be the same as block size ("
|
Chris@31
|
307 << stepSize << " != " << blockSize << ")" << endl;
|
Chris@31
|
308 return false;
|
Chris@31
|
309 }
|
Chris@31
|
310
|
Chris@31
|
311 m_blockSize = blockSize;
|
Chris@31
|
312
|
Chris@31
|
313 reset();
|
Chris@31
|
314
|
Chris@31
|
315 return true;
|
Chris@31
|
316 }
|
Chris@31
|
317
|
Chris@31
|
318 void
|
Chris@31
|
319 Silvet::reset()
|
Chris@31
|
320 {
|
Chris@31
|
321 delete m_resampler;
|
Chris@31
|
322 delete m_cq;
|
Chris@31
|
323
|
Chris@31
|
324 if (m_inputSampleRate != processingSampleRate) {
|
Chris@31
|
325 m_resampler = new Resampler(m_inputSampleRate, processingSampleRate);
|
Chris@31
|
326 } else {
|
Chris@31
|
327 m_resampler = 0;
|
Chris@31
|
328 }
|
Chris@31
|
329
|
Chris@32
|
330 m_cq = new CQInterpolated
|
Chris@32
|
331 (processingSampleRate, 27.5, processingSampleRate / 3, processingBPO,
|
Chris@32
|
332 CQInterpolated::Linear);
|
Chris@31
|
333
|
Chris@41
|
334 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
|
Chris@41
|
335 delete m_postFilter[i];
|
Chris@41
|
336 }
|
Chris@41
|
337 m_postFilter.clear();
|
Chris@41
|
338 for (int i = 0; i < processingNotes; ++i) {
|
Chris@41
|
339 m_postFilter.push_back(new MedianFilter<double>(3));
|
Chris@41
|
340 }
|
Chris@41
|
341 m_pianoRoll.clear();
|
Chris@32
|
342 m_columnCount = 0;
|
Chris@32
|
343 m_reducedColumnCount = 0;
|
Chris@40
|
344 m_startTime = RealTime::zeroTime;
|
Chris@31
|
345 }
|
Chris@31
|
346
|
Chris@31
|
347 Silvet::FeatureSet
|
Chris@31
|
348 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
Chris@31
|
349 {
|
Chris@40
|
350 if (m_columnCount == 0) {
|
Chris@40
|
351 m_startTime = timestamp;
|
Chris@40
|
352 }
|
Chris@40
|
353
|
Chris@31
|
354 vector<double> data;
|
Chris@40
|
355 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@40
|
356 data.push_back(inputBuffers[0][i]);
|
Chris@40
|
357 }
|
Chris@31
|
358
|
Chris@31
|
359 if (m_resampler) {
|
Chris@31
|
360 data = m_resampler->process(data.data(), data.size());
|
Chris@31
|
361 }
|
Chris@31
|
362
|
Chris@32
|
363 Grid cqout = m_cq->process(data);
|
Chris@51
|
364 FeatureSet fs = transcribe(cqout);
|
Chris@51
|
365
|
Chris@51
|
366 for (int i = 0; i < (int)cqout.size(); ++i) {
|
Chris@51
|
367 Feature f;
|
Chris@51
|
368 for (int j = 0; j < (int)cqout[i].size(); ++j) {
|
Chris@51
|
369 f.values.push_back(float(cqout[i][j]));
|
Chris@51
|
370 }
|
Chris@51
|
371 fs[m_cqOutputNo].push_back(f);
|
Chris@51
|
372 }
|
Chris@51
|
373
|
Chris@51
|
374 return fs;
|
Chris@34
|
375 }
|
Chris@34
|
376
|
Chris@34
|
377 Silvet::FeatureSet
|
Chris@34
|
378 Silvet::getRemainingFeatures()
|
Chris@34
|
379 {
|
Chris@34
|
380 Grid cqout = m_cq->getRemainingBlocks();
|
Chris@51
|
381 FeatureSet fs = transcribe(cqout);
|
Chris@51
|
382
|
Chris@51
|
383 for (int i = 0; i < (int)cqout.size(); ++i) {
|
Chris@51
|
384 Feature f;
|
Chris@51
|
385 for (int j = 0; j < (int)cqout[i].size(); ++j) {
|
Chris@51
|
386 f.values.push_back(float(cqout[i][j]));
|
Chris@51
|
387 }
|
Chris@51
|
388 fs[m_cqOutputNo].push_back(f);
|
Chris@51
|
389 }
|
Chris@51
|
390
|
Chris@51
|
391 return fs;
|
Chris@34
|
392 }
|
Chris@34
|
393
|
Chris@34
|
394 Silvet::FeatureSet
|
Chris@34
|
395 Silvet::transcribe(const Grid &cqout)
|
Chris@34
|
396 {
|
Chris@32
|
397 Grid filtered = preProcess(cqout);
|
Chris@31
|
398
|
Chris@32
|
399 FeatureSet fs;
|
Chris@32
|
400
|
Chris@104
|
401 if (filtered.empty()) return fs;
|
Chris@104
|
402
|
Chris@32
|
403 for (int i = 0; i < (int)filtered.size(); ++i) {
|
Chris@32
|
404 Feature f;
|
Chris@32
|
405 for (int j = 0; j < processingHeight; ++j) {
|
Chris@32
|
406 f.values.push_back(float(filtered[i][j]));
|
Chris@32
|
407 }
|
Chris@51
|
408 fs[m_fcqOutputNo].push_back(f);
|
Chris@32
|
409 }
|
Chris@32
|
410
|
Chris@34
|
411 int width = filtered.size();
|
Chris@34
|
412
|
Chris@34
|
413 int iterations = 12;
|
Chris@34
|
414
|
Chris@104
|
415 int stride = 8;
|
Chris@37
|
416
|
Chris@104
|
417 for (int i = 0; i < width; i += stride) {
|
Chris@104
|
418
|
Chris@104
|
419 int chunk = stride;
|
Chris@104
|
420 if (i + chunk > width) {
|
Chris@104
|
421 chunk = width - i;
|
Chris@37
|
422 }
|
Chris@37
|
423
|
Chris@104
|
424 vector<vector<double> > pitchSubMatrix
|
Chris@104
|
425 (chunk, vector<double>(processingNotes));
|
Chris@37
|
426
|
Chris@104
|
427 #pragma omp parallel for
|
Chris@104
|
428 for (int k = 0; k < chunk; ++k) {
|
Chris@104
|
429
|
Chris@104
|
430 double sum = 0.0;
|
Chris@104
|
431 for (int j = 0; j < processingHeight; ++j) {
|
Chris@104
|
432 sum += filtered[i + k][j];
|
Chris@104
|
433 }
|
Chris@104
|
434
|
Chris@104
|
435 if (sum < 1e-5) continue;
|
Chris@104
|
436
|
Chris@112
|
437 EM em(m_hqMode);
|
Chris@104
|
438 for (int j = 0; j < iterations; ++j) {
|
Chris@106
|
439 em.iterate(filtered[i + k].data());
|
Chris@104
|
440 }
|
Chris@104
|
441
|
Chris@106
|
442 const double *pitches = em.getPitchDistribution();
|
Chris@104
|
443
|
Chris@104
|
444 for (int j = 0; j < processingNotes; ++j) {
|
Chris@104
|
445 pitchSubMatrix[k][j] = pitches[j] * sum;
|
Chris@104
|
446 }
|
Chris@34
|
447 }
|
Chris@104
|
448
|
Chris@104
|
449 for (int k = 0; k < chunk; ++k) {
|
Chris@37
|
450
|
Chris@104
|
451 const vector<double> &pitches = pitchSubMatrix[k];
|
Chris@41
|
452
|
Chris@104
|
453 Feature f;
|
Chris@104
|
454 for (int j = 0; j < processingNotes; ++j) {
|
Chris@104
|
455 f.values.push_back(float(pitches[j]));
|
Chris@104
|
456 }
|
Chris@104
|
457 fs[m_pitchOutputNo].push_back(f);
|
Chris@38
|
458
|
Chris@104
|
459 FeatureList noteFeatures = postProcess(pitches);
|
Chris@45
|
460
|
Chris@104
|
461 for (FeatureList::const_iterator fi = noteFeatures.begin();
|
Chris@104
|
462 fi != noteFeatures.end(); ++fi) {
|
Chris@104
|
463 fs[m_notesOutputNo].push_back(*fi);
|
Chris@104
|
464 }
|
Chris@40
|
465 }
|
Chris@34
|
466 }
|
Chris@34
|
467
|
Chris@32
|
468 return fs;
|
Chris@31
|
469 }
|
Chris@31
|
470
|
Chris@32
|
471 Silvet::Grid
|
Chris@32
|
472 Silvet::preProcess(const Grid &in)
|
Chris@32
|
473 {
|
Chris@32
|
474 int width = in.size();
|
Chris@32
|
475
|
Chris@32
|
476 // reduce to 100 columns per second, or one column every 441 samples
|
Chris@32
|
477
|
Chris@32
|
478 int spacing = processingSampleRate / 100;
|
Chris@32
|
479
|
Chris@32
|
480 Grid out;
|
Chris@32
|
481
|
Chris@58
|
482 // We count the CQ latency in terms of processing hops, but
|
Chris@58
|
483 // actually it probably isn't an exact number of hops so this
|
Chris@58
|
484 // isn't quite accurate. But the small constant offset is
|
Chris@58
|
485 // practically irrelevant compared to the jitter from the 40ms
|
Chris@58
|
486 // frame size we reduce to in a moment
|
Chris@33
|
487 int latentColumns = m_cq->getLatency() / m_cq->getColumnHop();
|
Chris@33
|
488
|
Chris@32
|
489 for (int i = 0; i < width; ++i) {
|
Chris@32
|
490
|
Chris@33
|
491 if (m_columnCount < latentColumns) {
|
Chris@33
|
492 ++m_columnCount;
|
Chris@33
|
493 continue;
|
Chris@33
|
494 }
|
Chris@33
|
495
|
Chris@32
|
496 int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop();
|
Chris@32
|
497 int sampleNo = m_columnCount * m_cq->getColumnHop();
|
Chris@32
|
498
|
Chris@32
|
499 bool select = (sampleNo / spacing != prevSampleNo / spacing);
|
Chris@32
|
500
|
Chris@32
|
501 if (select) {
|
Chris@32
|
502 vector<double> inCol = in[i];
|
Chris@32
|
503 vector<double> outCol(processingHeight);
|
Chris@32
|
504
|
Chris@32
|
505 // we reverse the column as we go (the CQ output is
|
Chris@32
|
506 // "upside-down", with high frequencies at the start of
|
Chris@32
|
507 // each column, and we want it the other way around) and
|
Chris@32
|
508 // then ignore the first 55 (lowest-frequency) bins,
|
Chris@32
|
509 // giving us 545 bins instead of 600
|
Chris@32
|
510
|
Chris@32
|
511 for (int j = 0; j < processingHeight; ++j) {
|
Chris@46
|
512 int ix = inCol.size() - j - 55;
|
Chris@46
|
513 outCol[j] = inCol[ix];
|
Chris@46
|
514 }
|
Chris@32
|
515
|
Chris@46
|
516 vector<double> noiseLevel1 =
|
Chris@46
|
517 MedianFilter<double>::filter(40, outCol);
|
Chris@46
|
518 for (int j = 0; j < processingHeight; ++j) {
|
Chris@46
|
519 noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]);
|
Chris@46
|
520 }
|
Chris@32
|
521
|
Chris@46
|
522 vector<double> noiseLevel2 =
|
Chris@46
|
523 MedianFilter<double>::filter(40, noiseLevel1);
|
Chris@46
|
524 for (int j = 0; j < processingHeight; ++j) {
|
Chris@46
|
525 outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0);
|
Chris@32
|
526 }
|
Chris@32
|
527
|
Chris@32
|
528 // then we only use every fourth filtered column, for 25
|
Chris@32
|
529 // columns per second in the eventual grid
|
Chris@32
|
530
|
Chris@32
|
531 if (m_reducedColumnCount % 4 == 0) {
|
Chris@32
|
532 out.push_back(outCol);
|
Chris@32
|
533 }
|
Chris@32
|
534
|
Chris@32
|
535 ++m_reducedColumnCount;
|
Chris@32
|
536 }
|
Chris@32
|
537
|
Chris@32
|
538 ++m_columnCount;
|
Chris@32
|
539 }
|
Chris@32
|
540
|
Chris@32
|
541 return out;
|
Chris@32
|
542 }
|
Chris@32
|
543
|
Chris@41
|
544 Vamp::Plugin::FeatureList
|
Chris@41
|
545 Silvet::postProcess(const vector<double> &pitches)
|
Chris@41
|
546 {
|
Chris@41
|
547 vector<double> filtered;
|
Chris@41
|
548
|
Chris@41
|
549 for (int j = 0; j < processingNotes; ++j) {
|
Chris@55
|
550 m_postFilter[j]->push(pitches[j]);
|
Chris@41
|
551 filtered.push_back(m_postFilter[j]->get());
|
Chris@41
|
552 }
|
Chris@41
|
553
|
Chris@69
|
554 int postFilterLatency = int(m_postFilter[0]->getSize() / 2);
|
Chris@69
|
555
|
Chris@41
|
556 // Threshold for level and reduce number of candidate pitches
|
Chris@41
|
557
|
Chris@41
|
558 int polyphony = 5;
|
Chris@41
|
559 double threshold = 4.8;
|
Chris@41
|
560
|
Chris@41
|
561 typedef std::multimap<double, int> ValueIndexMap;
|
Chris@41
|
562
|
Chris@41
|
563 ValueIndexMap strengths;
|
Chris@41
|
564 for (int j = 0; j < processingNotes; ++j) {
|
Chris@41
|
565 strengths.insert(ValueIndexMap::value_type(filtered[j], j));
|
Chris@41
|
566 }
|
Chris@41
|
567
|
Chris@55
|
568 map<int, double> active;
|
Chris@41
|
569 ValueIndexMap::const_iterator si = strengths.end();
|
Chris@45
|
570 while (int(active.size()) < polyphony) {
|
Chris@41
|
571 --si;
|
Chris@41
|
572 if (si->first < threshold) break;
|
Chris@41
|
573 cerr << si->second << " : " << si->first << endl;
|
Chris@55
|
574 active[si->second] = si->first;
|
Chris@45
|
575 if (si == strengths.begin()) break;
|
Chris@41
|
576 }
|
Chris@41
|
577
|
Chris@41
|
578 // Minimum duration pruning, and conversion to notes. We can only
|
Chris@41
|
579 // report notes that have just ended (i.e. that are absent in the
|
Chris@41
|
580 // latest active set but present in the last set in the piano
|
Chris@41
|
581 // roll) -- any notes that ended earlier will have been reported
|
Chris@41
|
582 // already, and if they haven't ended, we don't know their
|
Chris@41
|
583 // duration.
|
Chris@41
|
584
|
Chris@41
|
585 int width = m_pianoRoll.size();
|
Chris@41
|
586
|
Chris@41
|
587 int durationThreshold = 2; // columns
|
Chris@41
|
588
|
Chris@41
|
589 FeatureList noteFeatures;
|
Chris@41
|
590
|
Chris@41
|
591 if (width < durationThreshold + 1) {
|
Chris@41
|
592 m_pianoRoll.push_back(active);
|
Chris@41
|
593 return noteFeatures;
|
Chris@41
|
594 }
|
Chris@41
|
595
|
Chris@41
|
596 // we have 25 columns per second
|
Chris@41
|
597 double columnDuration = 1.0 / 25.0;
|
Chris@41
|
598
|
Chris@55
|
599 for (map<int, double>::const_iterator ni = m_pianoRoll[width-1].begin();
|
Chris@41
|
600 ni != m_pianoRoll[width-1].end(); ++ni) {
|
Chris@41
|
601
|
Chris@55
|
602 int note = ni->first;
|
Chris@41
|
603
|
Chris@41
|
604 if (active.find(note) != active.end()) {
|
Chris@41
|
605 // the note is still playing
|
Chris@41
|
606 continue;
|
Chris@41
|
607 }
|
Chris@41
|
608
|
Chris@41
|
609 // the note was playing but just ended
|
Chris@41
|
610 int end = width;
|
Chris@41
|
611 int start = end-1;
|
Chris@41
|
612
|
Chris@57
|
613 double maxStrength = 0.0;
|
Chris@55
|
614
|
Chris@41
|
615 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
|
Chris@57
|
616 double strength = m_pianoRoll[start][note];
|
Chris@57
|
617 if (strength > maxStrength) {
|
Chris@57
|
618 maxStrength = strength;
|
Chris@57
|
619 }
|
Chris@41
|
620 --start;
|
Chris@41
|
621 }
|
Chris@41
|
622 ++start;
|
Chris@41
|
623
|
Chris@41
|
624 int duration = width - start;
|
Chris@62
|
625 // cerr << "duration " << duration << " for just-ended note " << note << endl;
|
Chris@41
|
626 if (duration < durationThreshold) {
|
Chris@41
|
627 // spurious
|
Chris@41
|
628 continue;
|
Chris@41
|
629 }
|
Chris@41
|
630
|
Chris@57
|
631 int velocity = maxStrength * 2;
|
Chris@55
|
632 if (velocity > 127) velocity = 127;
|
Chris@55
|
633
|
Chris@62
|
634 cerr << "Found a genuine note, starting at " << columnDuration * start << " with duration " << columnDuration * duration << endl;
|
Chris@62
|
635
|
Chris@41
|
636 Feature nf;
|
Chris@41
|
637 nf.hasTimestamp = true;
|
Chris@69
|
638 nf.timestamp = RealTime::fromSeconds
|
Chris@69
|
639 (columnDuration * (start - postFilterLatency));
|
Chris@41
|
640 nf.hasDuration = true;
|
Chris@69
|
641 nf.duration = RealTime::fromSeconds
|
Chris@69
|
642 (columnDuration * duration);
|
Chris@41
|
643 nf.values.push_back(noteFrequency(note));
|
Chris@55
|
644 nf.values.push_back(velocity);
|
Chris@41
|
645 nf.label = noteName(note);
|
Chris@41
|
646 noteFeatures.push_back(nf);
|
Chris@41
|
647 }
|
Chris@41
|
648
|
Chris@41
|
649 m_pianoRoll.push_back(active);
|
Chris@41
|
650
|
Chris@62
|
651 // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl;
|
Chris@41
|
652
|
Chris@41
|
653 return noteFeatures;
|
Chris@41
|
654 }
|
Chris@41
|
655
|