comparison src/Silvet.cpp @ 41:b49597c93132

Return an actual note transcription
author Chris Cannam
date Sat, 05 Apr 2014 17:57:50 +0100
parents 303c06efa8d2
children e92376d450b0 d7f1c10f4e1d
comparison
equal deleted inserted replaced
40:303c06efa8d2 41:b49597c93132
48 delete m_cq; 48 delete m_cq;
49 for (int i = 0; i < (int)m_filterA.size(); ++i) { 49 for (int i = 0; i < (int)m_filterA.size(); ++i) {
50 delete m_filterA[i]; 50 delete m_filterA[i];
51 delete m_filterB[i]; 51 delete m_filterB[i];
52 } 52 }
53 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
54 delete m_postFilter[i];
55 }
53 } 56 }
54 57
55 string 58 string
56 Silvet::getIdentifier() const 59 Silvet::getIdentifier() const
57 { 60 {
167 170
168 OutputDescriptor d; 171 OutputDescriptor d;
169 d.identifier = "transcription"; 172 d.identifier = "transcription";
170 d.name = "Transcription"; 173 d.name = "Transcription";
171 d.description = ""; //!!! 174 d.description = ""; //!!!
172 d.unit = "MIDI Pitch"; 175 d.unit = "Hz";
173 d.hasFixedBinCount = true; 176 d.hasFixedBinCount = true;
174 d.binCount = 2; 177 d.binCount = 2;
175 d.binNames.push_back("Note"); 178 d.binNames.push_back("Frequency");
176 d.binNames.push_back("Velocity"); 179 d.binNames.push_back("Velocity");
177 d.hasKnownExtents = false; 180 d.hasKnownExtents = false;
178 d.isQuantized = false; 181 d.isQuantized = false;
179 d.sampleType = OutputDescriptor::VariableSampleRate; 182 d.sampleType = OutputDescriptor::VariableSampleRate;
180 d.sampleRate = m_inputSampleRate / (m_cq ? m_cq->getColumnHop() : 256); 183 d.sampleRate = m_inputSampleRate / (m_cq ? m_cq->getColumnHop() : 256);
241 sprintf(buf, "%s%d", n, oct); 244 sprintf(buf, "%s%d", n, oct);
242 245
243 return buf; 246 return buf;
244 } 247 }
245 248
249 float
250 Silvet::noteFrequency(int note) const
251 {
252 return float(27.5 * pow(2.0, note / 12.0));
253 }
254
246 bool 255 bool
247 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize) 256 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize)
248 { 257 {
249 if (channels < getMinChannelCount() || 258 if (channels < getMinChannelCount() ||
250 channels > getMaxChannelCount()) return false; 259 channels > getMaxChannelCount()) return false;
280 289
281 for (int i = 0; i < (int)m_filterA.size(); ++i) { 290 for (int i = 0; i < (int)m_filterA.size(); ++i) {
282 delete m_filterA[i]; 291 delete m_filterA[i];
283 delete m_filterB[i]; 292 delete m_filterB[i];
284 } 293 }
294 for (int i = 0; i < (int)m_postFilter.size(); ++i) {
295 delete m_postFilter[i];
296 }
285 m_filterA.clear(); 297 m_filterA.clear();
286 m_filterB.clear(); 298 m_filterB.clear();
299 m_postFilter.clear();
287 for (int i = 0; i < processingHeight; ++i) { 300 for (int i = 0; i < processingHeight; ++i) {
288 m_filterA.push_back(new MedianFilter<double>(40)); 301 m_filterA.push_back(new MedianFilter<double>(40));
289 m_filterB.push_back(new MedianFilter<double>(40)); 302 m_filterB.push_back(new MedianFilter<double>(40));
290 } 303 }
304 for (int i = 0; i < processingNotes; ++i) {
305 m_postFilter.push_back(new MedianFilter<double>(3));
306 }
307 m_pianoRoll.clear();
291 m_columnCount = 0; 308 m_columnCount = 0;
292 m_reducedColumnCount = 0; 309 m_reducedColumnCount = 0;
293 m_transcribedColumnCount = 0;
294 m_startTime = RealTime::zeroTime; 310 m_startTime = RealTime::zeroTime;
295 } 311 }
296 312
297 Silvet::FeatureSet 313 Silvet::FeatureSet
298 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp) 314 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
338 354
339 int width = filtered.size(); 355 int width = filtered.size();
340 356
341 int iterations = 12; 357 int iterations = 12;
342 358
343 // we have 25 columns per second
344 double columnDuration = 1.0 / 25.0;
345
346 for (int i = 0; i < width; ++i) { 359 for (int i = 0; i < width; ++i) {
347
348 RealTime t = m_startTime +
349 RealTime::fromSeconds(m_transcribedColumnCount * columnDuration);
350
351 ++m_transcribedColumnCount;
352 360
353 double sum = 0.0; 361 double sum = 0.0;
354 for (int j = 0; j < processingHeight; ++j) { 362 for (int j = 0; j < processingHeight; ++j) {
355 sum += filtered[i][j]; 363 sum += filtered[i][j];
356 } 364 }
362 for (int j = 0; j < iterations; ++j) { 370 for (int j = 0; j < iterations; ++j) {
363 em.iterate(filtered[i]); 371 em.iterate(filtered[i]);
364 } 372 }
365 373
366 vector<double> pitches = em.getPitchDistribution(); 374 vector<double> pitches = em.getPitchDistribution();
375
376 for (int j = 0; j < processingNotes; ++j) {
377 pitches[j] *= sum;
378 }
379
367 Feature f; 380 Feature f;
368 for (int j = 0; j < (int)pitches.size(); ++j) { 381 for (int j = 0; j < processingNotes; ++j) {
369 f.values.push_back(float(pitches[j] * sum)); 382 f.values.push_back(float(pitches[j]));
370 } 383 }
371 fs[m_pitchOutputNo].push_back(f); 384 fs[m_pitchOutputNo].push_back(f);
372 385
373 //!!! fake notes 386 FeatureList noteFeatures = postProcess(pitches);
374 for (int j = 0; j < (int)pitches.size(); ++j) { 387 for (FeatureList::const_iterator fi = noteFeatures.begin();
375 if (pitches[j] * sum > 5) { 388 fi != noteFeatures.end(); ++fi) {
376 cerr << "pitch " << j << " level: " << pitches[j] * sum << endl; 389 fs[m_notesOutputNo].push_back(*fi);
377 Feature nf; 390 }
378 nf.hasTimestamp = true;
379 nf.timestamp = t;
380 nf.hasDuration = true;
381 nf.duration = RealTime::fromSeconds(columnDuration);
382 nf.values.push_back(j + 21);
383 float velocity = pitches[j] * sum * 2;
384 if (velocity > 127.f) velocity = 127.f;
385 nf.values.push_back(velocity);
386 fs[m_notesOutputNo].push_back(nf);
387 }
388 }
389
390 //!!! now do something with the results from em!
391 em.report();
392 } 391 }
393 392
394 return fs; 393 return fs;
395 } 394 }
396 395
460 } 459 }
461 460
462 return out; 461 return out;
463 } 462 }
464 463
464 Vamp::Plugin::FeatureList
465 Silvet::postProcess(const vector<double> &pitches)
466 {
467 vector<double> filtered;
468
469 for (int j = 0; j < processingNotes; ++j) {
470 m_postFilter[j]->push(pitches[j]);
471 filtered.push_back(m_postFilter[j]->get());
472 }
473
474 // Threshold for level and reduce number of candidate pitches
475
476 int polyphony = 5;
477 double threshold = 4.8;
478
479 typedef std::multimap<double, int> ValueIndexMap;
480
481 ValueIndexMap strengths;
482 for (int j = 0; j < processingNotes; ++j) {
483 strengths.insert(ValueIndexMap::value_type(filtered[j], j));
484 }
485
486 set<int> active;
487 ValueIndexMap::const_iterator si = strengths.end();
488 for (int j = 0; j < polyphony; ++j) {
489 --si;
490 if (si->first < threshold) break;
491 cerr << si->second << " : " << si->first << endl;
492 active.insert(si->second);
493 }
494
495 // Minimum duration pruning, and conversion to notes. We can only
496 // report notes that have just ended (i.e. that are absent in the
497 // latest active set but present in the last set in the piano
498 // roll) -- any notes that ended earlier will have been reported
499 // already, and if they haven't ended, we don't know their
500 // duration.
501
502 int width = m_pianoRoll.size();
503
504 int durationThreshold = 2; // columns
505
506 FeatureList noteFeatures;
507
508 if (width < durationThreshold + 1) {
509 m_pianoRoll.push_back(active);
510 return noteFeatures;
511 }
512
513 // we have 25 columns per second
514 double columnDuration = 1.0 / 25.0;
515
516 for (set<int>::const_iterator ni = m_pianoRoll[width-1].begin();
517 ni != m_pianoRoll[width-1].end(); ++ni) {
518
519 int note = *ni;
520
521 if (active.find(note) != active.end()) {
522 // the note is still playing
523 continue;
524 }
525
526 // the note was playing but just ended
527 int end = width;
528 int start = end-1;
529
530 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) {
531 --start;
532 }
533 ++start;
534
535 int duration = width - start;
536 cerr << "duration " << duration << " for just-ended note " << note << endl;
537 if (duration < durationThreshold) {
538 // spurious
539 continue;
540 }
541
542 Feature nf;
543 nf.hasTimestamp = true;
544 nf.timestamp = RealTime::fromSeconds(columnDuration * start);
545 nf.hasDuration = true;
546 nf.duration = RealTime::fromSeconds(columnDuration * duration);
547 nf.values.push_back(noteFrequency(note));
548 nf.values.push_back(80.f); //!!! todo: calculate velocity
549 nf.label = noteName(note);
550 noteFeatures.push_back(nf);
551 }
552
553 m_pianoRoll.push_back(active);
554
555 cerr << "returning " << noteFeatures.size() << " complete notes" << endl;
556
557 return noteFeatures;
558 }
559