# HG changeset patch # User Chris Cannam # Date 1400669634 -3600 # Node ID 7c0ce480a7a4aecebd5a122c24a71ab0fe6ed456 # Parent f73be84f5c904b27928ba4410cd5c04d9cc1c642# Parent 2114b2545c271c394cd562c9eff8ade8768ad50a Merge from branch finetune diff -r f73be84f5c90 -r 7c0ce480a7a4 src/EM.h --- a/src/EM.h Tue May 20 16:48:33 2014 +0100 +++ b/src/EM.h Wed May 21 11:53:54 2014 +0100 @@ -29,6 +29,7 @@ int getBinCount() const { return m_binCount; } int getNoteCount() const { return m_noteCount; } int getSourceCount() const { return m_sourceCount; } + int getShiftCount() const { return m_shiftCount; } /** * Carry out one iteration using the given column as input. The @@ -60,6 +61,15 @@ const float *const *getSources() const { return m_sources; } + + /** + * Return the shift distribution for the current estimate. The + * returned pointer refers to getShiftCount() arrays of + * getNoteCount() values. + */ + const float *const *getShifts() const { + return m_shifts; + } private: const InstrumentPack *m_pack; diff -r f73be84f5c90 -r 7c0ce480a7a4 src/Silvet.cpp --- a/src/Silvet.cpp Tue May 20 16:48:33 2014 +0100 +++ b/src/Silvet.cpp Wed May 21 11:53:54 2014 +0100 @@ -33,6 +33,8 @@ static int processingSampleRate = 44100; static int processingBPO = 60; + +//!!! todo: replace these two with values from instrument pack static int processingHeight = 545; static int processingNotes = 88; @@ -42,6 +44,7 @@ m_resampler(0), m_cq(0), m_hqMode(true), + m_fineTuning(false), m_instrument(0) { } @@ -142,7 +145,7 @@ desc.defaultValue = 1; desc.isQuantized = true; desc.quantizeStep = 1; - desc.valueNames.push_back("Draft (faster)"); + desc.valueNames.push_back("Draft (faster)"); desc.valueNames.push_back("Intensive (higher quality)"); list.push_back(desc); @@ -159,7 +162,18 @@ for (int i = 0; i < int(m_instruments.size()); ++i) { desc.valueNames.push_back(m_instruments[i].name); } + list.push_back(desc); + desc.identifier = "finetune"; + desc.name = "Return fine pitch estimates"; + desc.unit = ""; + desc.description = "Return pitch estimates at finer than semitone resolution (works only in Intensive mode)"; + desc.minValue = 0; + desc.maxValue = 1; + desc.defaultValue = 0; + desc.isQuantized = true; + desc.quantizeStep = 1; + desc.valueNames.clear(); list.push_back(desc); return list; @@ -170,6 +184,8 @@ { if (identifier == "mode") { return m_hqMode ? 1.f : 0.f; + } else if (identifier == "finetune") { + return m_fineTuning ? 1.f : 0.f; } else if (identifier == "soloinstrument") { return m_instrument; } @@ -181,6 +197,8 @@ { if (identifier == "mode") { m_hqMode = (value > 0.5); + } else if (identifier == "finetune") { + m_fineTuning = (value > 0.5); } else if (identifier == "soloinstrument") { m_instrument = lrintf(value); } @@ -247,9 +265,25 @@ } float -Silvet::noteFrequency(int note) const +Silvet::noteFrequency(int note, int shift, int shiftCount) const { - return float(27.5 * pow(2.0, note / 12.0)); + // Convert shift number to a pitch shift. The given shift number + // is an offset into the template array, which starts with some + // zeros, followed by the template, then some trailing zeros. + // + // Example: if we have templateMaxShift == 2 and thus shiftCount + // == 5, then the number will be in the range 0-4 and the template + // will have 2 zeros at either end. Thus number 2 represents the + // template "as recorded", for a pitch shift of 0; smaller indices + // represent moving the template *up* in pitch (by introducing + // zeros at the start, which is the low-frequency end), for a + // positive pitch shift; and higher values represent moving it + // down in pitch, for a negative pitch shift. + + float pshift = + float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; + + return float(27.5 * pow(2.0, (note + pshift) / 12.0)); } bool @@ -350,12 +384,29 @@ FeatureSet fs; if (filtered.empty()) return fs; + + const InstrumentPack &pack = m_instruments[m_instrument]; int width = filtered.size(); int iterations = m_hqMode ? 20 : 10; - Grid pitchMatrix(width, vector(processingNotes)); + //!!! pitches or notes? [terminology] + Grid localPitches(width, vector(processingNotes, 0.0)); + + bool wantShifts = m_hqMode && m_fineTuning; + int shiftCount = 1; + if (wantShifts) { + shiftCount = pack.templateMaxShift * 2 + 1; + } + + vector > localBestShifts; + if (wantShifts) { + localBestShifts = + vector >(width, vector(processingNotes, 0)); + } + + vector present(width, false); #pragma omp parallel for for (int i = 0; i < width; ++i) { @@ -364,25 +415,54 @@ for (int j = 0; j < processingHeight; ++j) { sum += filtered.at(i).at(j); } - if (sum < 1e-5) continue; - EM em(&m_instruments[m_instrument], m_hqMode); + present[i] = true; + + EM em(&pack, m_hqMode); for (int j = 0; j < iterations; ++j) { em.iterate(filtered.at(i).data()); } - - const float *pitches = em.getPitchDistribution(); + + const float *pitchDist = em.getPitchDistribution(); + const float *const *shiftDist = em.getShifts(); for (int j = 0; j < processingNotes; ++j) { - pitchMatrix[i][j] = pitches[j] * sum; + + localPitches[i][j] = pitchDist[j] * sum; + + int bestShift = 0; + int bestShiftValue = 0.0; + if (wantShifts) { + for (int k = 0; k < shiftCount; ++k) { + if (k == 0 || shiftDist[k][j] > bestShiftValue) { + bestShiftValue = shiftDist[k][j]; + bestShift = k; + } + } + localBestShifts[i][j] = bestShift; + } } } + + for (int i = 0; i < width; ++i) { - for (int i = 0; i < width; ++i) { + if (!present[i]) { + // silent column + for (int j = 0; j < processingNotes; ++j) { + m_postFilter[j]->push(0.0); + } + m_pianoRoll.push_back(map()); + if (wantShifts) { + m_pianoRollShifts.push_back(map()); + } + continue; + } + + postProcess(localPitches[i], localBestShifts[i], wantShifts); - FeatureList noteFeatures = postProcess(pitchMatrix[i]); + FeatureList noteFeatures = noteTrack(shiftCount); for (FeatureList::const_iterator fi = noteFeatures.begin(); fi != noteFeatures.end(); ++fi) { @@ -460,9 +540,11 @@ return out; } -Vamp::Plugin::FeatureList -Silvet::postProcess(const vector &pitches) -{ +void +Silvet::postProcess(const vector &pitches, + const vector &bestShifts, + bool wantShifts) +{ vector filtered; for (int j = 0; j < processingNotes; ++j) { @@ -470,8 +552,6 @@ filtered.push_back(m_postFilter[j]->get()); } - int postFilterLatency = int(m_postFilter[0]->getSize() / 2); - // Threshold for level and reduce number of candidate pitches int polyphony = 5; @@ -483,28 +563,52 @@ typedef std::multimap ValueIndexMap; ValueIndexMap strengths; + for (int j = 0; j < processingNotes; ++j) { - strengths.insert(ValueIndexMap::value_type(filtered[j], j)); + double strength = filtered[j]; + if (strength < threshold) continue; + strengths.insert(ValueIndexMap::value_type(strength, j)); } + ValueIndexMap::const_iterator si = strengths.end(); + map active; - ValueIndexMap::const_iterator si = strengths.end(); - while (int(active.size()) < polyphony) { + map activeShifts; + + while (int(active.size()) < polyphony && si != strengths.begin()) { + --si; - if (si->first < threshold) break; -// cerr << si->second << " : " << si->first << endl; - active[si->second] = si->first; - if (si == strengths.begin()) break; + + double strength = si->first; + int j = si->second; + + active[j] = strength; + + if (wantShifts) { + activeShifts[j] = bestShifts[j]; + } } + m_pianoRoll.push_back(active); + + if (wantShifts) { + m_pianoRollShifts.push_back(activeShifts); + } +} + +Vamp::Plugin::FeatureList +Silvet::noteTrack(int shiftCount) +{ // Minimum duration pruning, and conversion to notes. We can only // report notes that have just ended (i.e. that are absent in the - // latest active set but present in the last set in the piano + // latest active set but present in the prior set in the piano // roll) -- any notes that ended earlier will have been reported // already, and if they haven't ended, we don't know their // duration. - int width = m_pianoRoll.size(); + int width = m_pianoRoll.size() - 1; + + const map &active = m_pianoRoll[width]; double columnDuration = 1.0 / m_colsPerSec; @@ -515,11 +619,9 @@ FeatureList noteFeatures; if (width < durationThreshold + 1) { - m_pianoRoll.push_back(active); return noteFeatures; } - //!!! try: 20ms intervals in intensive mode //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix) for (map::const_iterator ni = m_pianoRoll[width-1].begin(); @@ -536,46 +638,92 @@ int end = width; int start = end-1; - double maxStrength = 0.0; - while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) { - double strength = m_pianoRoll[start][note]; - if (strength > maxStrength) { - maxStrength = strength; - } --start; } ++start; - int duration = width - start; -// cerr << "duration " << duration << " for just-ended note " << note << endl; - if (duration < durationThreshold) { - // spurious + if ((end - start) < durationThreshold) { continue; } - int velocity = maxStrength * 2; - if (velocity > 127) velocity = 127; - -// cerr << "Found a genuine note, starting at " << columnDuration * start << " with duration " << columnDuration * duration << endl; - - Feature nf; - nf.hasTimestamp = true; - nf.timestamp = RealTime::fromSeconds - (columnDuration * (start - postFilterLatency) + 0.02); - nf.hasDuration = true; - nf.duration = RealTime::fromSeconds - (columnDuration * duration); - nf.values.push_back(noteFrequency(note)); - nf.values.push_back(velocity); - nf.label = noteName(note); - noteFeatures.push_back(nf); + emitNote(start, end, note, shiftCount, noteFeatures); } - m_pianoRoll.push_back(active); - // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl; return noteFeatures; } +void +Silvet::emitNote(int start, int end, int note, int shiftCount, + FeatureList ¬eFeatures) +{ + int partStart = start; + int partShift = 0; + int partVelocity = 0; + + Feature f; + f.hasTimestamp = true; + f.hasDuration = true; + + double columnDuration = 1.0 / m_colsPerSec; + int postFilterLatency = int(m_postFilter[0]->getSize() / 2); + int partThreshold = floor(0.05 / columnDuration); + + for (int i = start; i != end; ++i) { + + double strength = m_pianoRoll[i][note]; + + int shift = 0; + + if (shiftCount > 1) { + + shift = m_pianoRollShifts[i][note]; + + if (i == partStart) { + partShift = shift; + } + + if (i > partStart + partThreshold && shift != partShift) { + +// cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl; + + // pitch has changed, emit an intermediate note + f.timestamp = RealTime::fromSeconds + (columnDuration * (partStart - postFilterLatency) + 0.02); + f.duration = RealTime::fromSeconds + (columnDuration * (i - partStart)); + f.values.clear(); + f.values.push_back + (noteFrequency(note, partShift, shiftCount)); + f.values.push_back(partVelocity); + f.label = noteName(note); + noteFeatures.push_back(f); + partStart = i; + partShift = shift; + partVelocity = 0; + } + } + + int v = strength * 2; + if (v > 127) v = 127; + + if (v > partVelocity) { + partVelocity = v; + } + } + + if (end >= partStart + partThreshold) { + f.timestamp = RealTime::fromSeconds + (columnDuration * (partStart - postFilterLatency) + 0.02); + f.duration = RealTime::fromSeconds + (columnDuration * (end - partStart)); + f.values.clear(); + f.values.push_back + (noteFrequency(note, partShift, shiftCount)); + f.values.push_back(partVelocity); + f.label = noteName(note); + noteFeatures.push_back(f); + } +} diff -r f73be84f5c90 -r 7c0ce480a7a4 src/Silvet.h --- a/src/Silvet.h Tue May 20 16:48:33 2014 +0100 +++ b/src/Silvet.h Wed May 21 11:53:54 2014 +0100 @@ -77,6 +77,7 @@ CQSpectrogram *m_cq; bool m_hqMode; + bool m_fineTuning; int m_instrument; int m_colsPerSec; @@ -84,13 +85,23 @@ vector *> m_postFilter; vector > m_pianoRoll; + vector > m_pianoRollShifts; Grid preProcess(const Grid &); - FeatureList postProcess(const vector &); + + void postProcess(const vector &pitches, + const vector &bestShifts, + bool wantShifts); // -> piano roll column + + FeatureList noteTrack(int shiftCount); + + void emitNote(int start, int end, int note, int shiftCount, + FeatureList ¬eFeatures); + FeatureSet transcribe(const Grid &); string noteName(int n) const; - float noteFrequency(int n) const; + float noteFrequency(int n, int shift, int shiftCount) const; int m_blockSize; int m_columnCount;