Mercurial > hg > silvet
comparison src/Silvet.cpp @ 41:b49597c93132
Return an actual note transcription
author | Chris Cannam |
---|---|
date | Sat, 05 Apr 2014 17:57:50 +0100 |
parents | 303c06efa8d2 |
children | e92376d450b0 d7f1c10f4e1d |
comparison
equal
deleted
inserted
replaced
40:303c06efa8d2 | 41:b49597c93132 |
---|---|
48 delete m_cq; | 48 delete m_cq; |
49 for (int i = 0; i < (int)m_filterA.size(); ++i) { | 49 for (int i = 0; i < (int)m_filterA.size(); ++i) { |
50 delete m_filterA[i]; | 50 delete m_filterA[i]; |
51 delete m_filterB[i]; | 51 delete m_filterB[i]; |
52 } | 52 } |
53 for (int i = 0; i < (int)m_postFilter.size(); ++i) { | |
54 delete m_postFilter[i]; | |
55 } | |
53 } | 56 } |
54 | 57 |
55 string | 58 string |
56 Silvet::getIdentifier() const | 59 Silvet::getIdentifier() const |
57 { | 60 { |
167 | 170 |
168 OutputDescriptor d; | 171 OutputDescriptor d; |
169 d.identifier = "transcription"; | 172 d.identifier = "transcription"; |
170 d.name = "Transcription"; | 173 d.name = "Transcription"; |
171 d.description = ""; //!!! | 174 d.description = ""; //!!! |
172 d.unit = "MIDI Pitch"; | 175 d.unit = "Hz"; |
173 d.hasFixedBinCount = true; | 176 d.hasFixedBinCount = true; |
174 d.binCount = 2; | 177 d.binCount = 2; |
175 d.binNames.push_back("Note"); | 178 d.binNames.push_back("Frequency"); |
176 d.binNames.push_back("Velocity"); | 179 d.binNames.push_back("Velocity"); |
177 d.hasKnownExtents = false; | 180 d.hasKnownExtents = false; |
178 d.isQuantized = false; | 181 d.isQuantized = false; |
179 d.sampleType = OutputDescriptor::VariableSampleRate; | 182 d.sampleType = OutputDescriptor::VariableSampleRate; |
180 d.sampleRate = m_inputSampleRate / (m_cq ? m_cq->getColumnHop() : 256); | 183 d.sampleRate = m_inputSampleRate / (m_cq ? m_cq->getColumnHop() : 256); |
241 sprintf(buf, "%s%d", n, oct); | 244 sprintf(buf, "%s%d", n, oct); |
242 | 245 |
243 return buf; | 246 return buf; |
244 } | 247 } |
245 | 248 |
249 float | |
250 Silvet::noteFrequency(int note) const | |
251 { | |
252 return float(27.5 * pow(2.0, note / 12.0)); | |
253 } | |
254 | |
246 bool | 255 bool |
247 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize) | 256 Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize) |
248 { | 257 { |
249 if (channels < getMinChannelCount() || | 258 if (channels < getMinChannelCount() || |
250 channels > getMaxChannelCount()) return false; | 259 channels > getMaxChannelCount()) return false; |
280 | 289 |
281 for (int i = 0; i < (int)m_filterA.size(); ++i) { | 290 for (int i = 0; i < (int)m_filterA.size(); ++i) { |
282 delete m_filterA[i]; | 291 delete m_filterA[i]; |
283 delete m_filterB[i]; | 292 delete m_filterB[i]; |
284 } | 293 } |
294 for (int i = 0; i < (int)m_postFilter.size(); ++i) { | |
295 delete m_postFilter[i]; | |
296 } | |
285 m_filterA.clear(); | 297 m_filterA.clear(); |
286 m_filterB.clear(); | 298 m_filterB.clear(); |
299 m_postFilter.clear(); | |
287 for (int i = 0; i < processingHeight; ++i) { | 300 for (int i = 0; i < processingHeight; ++i) { |
288 m_filterA.push_back(new MedianFilter<double>(40)); | 301 m_filterA.push_back(new MedianFilter<double>(40)); |
289 m_filterB.push_back(new MedianFilter<double>(40)); | 302 m_filterB.push_back(new MedianFilter<double>(40)); |
290 } | 303 } |
304 for (int i = 0; i < processingNotes; ++i) { | |
305 m_postFilter.push_back(new MedianFilter<double>(3)); | |
306 } | |
307 m_pianoRoll.clear(); | |
291 m_columnCount = 0; | 308 m_columnCount = 0; |
292 m_reducedColumnCount = 0; | 309 m_reducedColumnCount = 0; |
293 m_transcribedColumnCount = 0; | |
294 m_startTime = RealTime::zeroTime; | 310 m_startTime = RealTime::zeroTime; |
295 } | 311 } |
296 | 312 |
297 Silvet::FeatureSet | 313 Silvet::FeatureSet |
298 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp) | 314 Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp) |
338 | 354 |
339 int width = filtered.size(); | 355 int width = filtered.size(); |
340 | 356 |
341 int iterations = 12; | 357 int iterations = 12; |
342 | 358 |
343 // we have 25 columns per second | |
344 double columnDuration = 1.0 / 25.0; | |
345 | |
346 for (int i = 0; i < width; ++i) { | 359 for (int i = 0; i < width; ++i) { |
347 | |
348 RealTime t = m_startTime + | |
349 RealTime::fromSeconds(m_transcribedColumnCount * columnDuration); | |
350 | |
351 ++m_transcribedColumnCount; | |
352 | 360 |
353 double sum = 0.0; | 361 double sum = 0.0; |
354 for (int j = 0; j < processingHeight; ++j) { | 362 for (int j = 0; j < processingHeight; ++j) { |
355 sum += filtered[i][j]; | 363 sum += filtered[i][j]; |
356 } | 364 } |
362 for (int j = 0; j < iterations; ++j) { | 370 for (int j = 0; j < iterations; ++j) { |
363 em.iterate(filtered[i]); | 371 em.iterate(filtered[i]); |
364 } | 372 } |
365 | 373 |
366 vector<double> pitches = em.getPitchDistribution(); | 374 vector<double> pitches = em.getPitchDistribution(); |
375 | |
376 for (int j = 0; j < processingNotes; ++j) { | |
377 pitches[j] *= sum; | |
378 } | |
379 | |
367 Feature f; | 380 Feature f; |
368 for (int j = 0; j < (int)pitches.size(); ++j) { | 381 for (int j = 0; j < processingNotes; ++j) { |
369 f.values.push_back(float(pitches[j] * sum)); | 382 f.values.push_back(float(pitches[j])); |
370 } | 383 } |
371 fs[m_pitchOutputNo].push_back(f); | 384 fs[m_pitchOutputNo].push_back(f); |
372 | 385 |
373 //!!! fake notes | 386 FeatureList noteFeatures = postProcess(pitches); |
374 for (int j = 0; j < (int)pitches.size(); ++j) { | 387 for (FeatureList::const_iterator fi = noteFeatures.begin(); |
375 if (pitches[j] * sum > 5) { | 388 fi != noteFeatures.end(); ++fi) { |
376 cerr << "pitch " << j << " level: " << pitches[j] * sum << endl; | 389 fs[m_notesOutputNo].push_back(*fi); |
377 Feature nf; | 390 } |
378 nf.hasTimestamp = true; | |
379 nf.timestamp = t; | |
380 nf.hasDuration = true; | |
381 nf.duration = RealTime::fromSeconds(columnDuration); | |
382 nf.values.push_back(j + 21); | |
383 float velocity = pitches[j] * sum * 2; | |
384 if (velocity > 127.f) velocity = 127.f; | |
385 nf.values.push_back(velocity); | |
386 fs[m_notesOutputNo].push_back(nf); | |
387 } | |
388 } | |
389 | |
390 //!!! now do something with the results from em! | |
391 em.report(); | |
392 } | 391 } |
393 | 392 |
394 return fs; | 393 return fs; |
395 } | 394 } |
396 | 395 |
460 } | 459 } |
461 | 460 |
462 return out; | 461 return out; |
463 } | 462 } |
464 | 463 |
464 Vamp::Plugin::FeatureList | |
465 Silvet::postProcess(const vector<double> &pitches) | |
466 { | |
467 vector<double> filtered; | |
468 | |
469 for (int j = 0; j < processingNotes; ++j) { | |
470 m_postFilter[j]->push(pitches[j]); | |
471 filtered.push_back(m_postFilter[j]->get()); | |
472 } | |
473 | |
474 // Threshold for level and reduce number of candidate pitches | |
475 | |
476 int polyphony = 5; | |
477 double threshold = 4.8; | |
478 | |
479 typedef std::multimap<double, int> ValueIndexMap; | |
480 | |
481 ValueIndexMap strengths; | |
482 for (int j = 0; j < processingNotes; ++j) { | |
483 strengths.insert(ValueIndexMap::value_type(filtered[j], j)); | |
484 } | |
485 | |
486 set<int> active; | |
487 ValueIndexMap::const_iterator si = strengths.end(); | |
488 for (int j = 0; j < polyphony; ++j) { | |
489 --si; | |
490 if (si->first < threshold) break; | |
491 cerr << si->second << " : " << si->first << endl; | |
492 active.insert(si->second); | |
493 } | |
494 | |
495 // Minimum duration pruning, and conversion to notes. We can only | |
496 // report notes that have just ended (i.e. that are absent in the | |
497 // latest active set but present in the last set in the piano | |
498 // roll) -- any notes that ended earlier will have been reported | |
499 // already, and if they haven't ended, we don't know their | |
500 // duration. | |
501 | |
502 int width = m_pianoRoll.size(); | |
503 | |
504 int durationThreshold = 2; // columns | |
505 | |
506 FeatureList noteFeatures; | |
507 | |
508 if (width < durationThreshold + 1) { | |
509 m_pianoRoll.push_back(active); | |
510 return noteFeatures; | |
511 } | |
512 | |
513 // we have 25 columns per second | |
514 double columnDuration = 1.0 / 25.0; | |
515 | |
516 for (set<int>::const_iterator ni = m_pianoRoll[width-1].begin(); | |
517 ni != m_pianoRoll[width-1].end(); ++ni) { | |
518 | |
519 int note = *ni; | |
520 | |
521 if (active.find(note) != active.end()) { | |
522 // the note is still playing | |
523 continue; | |
524 } | |
525 | |
526 // the note was playing but just ended | |
527 int end = width; | |
528 int start = end-1; | |
529 | |
530 while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) { | |
531 --start; | |
532 } | |
533 ++start; | |
534 | |
535 int duration = width - start; | |
536 cerr << "duration " << duration << " for just-ended note " << note << endl; | |
537 if (duration < durationThreshold) { | |
538 // spurious | |
539 continue; | |
540 } | |
541 | |
542 Feature nf; | |
543 nf.hasTimestamp = true; | |
544 nf.timestamp = RealTime::fromSeconds(columnDuration * start); | |
545 nf.hasDuration = true; | |
546 nf.duration = RealTime::fromSeconds(columnDuration * duration); | |
547 nf.values.push_back(noteFrequency(note)); | |
548 nf.values.push_back(80.f); //!!! todo: calculate velocity | |
549 nf.label = noteName(note); | |
550 noteFeatures.push_back(nf); | |
551 } | |
552 | |
553 m_pianoRoll.push_back(active); | |
554 | |
555 cerr << "returning " << noteFeatures.size() << " complete notes" << endl; | |
556 | |
557 return noteFeatures; | |
558 } | |
559 |