Chris@31: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@31: Chris@31: /* Chris@31: Silvet Chris@31: Chris@31: A Vamp plugin for note transcription. Chris@31: Centre for Digital Music, Queen Mary University of London. Chris@31: Chris@31: This program is free software; you can redistribute it and/or Chris@31: modify it under the terms of the GNU General Public License as Chris@31: published by the Free Software Foundation; either version 2 of the Chris@31: License, or (at your option) any later version. See the file Chris@31: COPYING included with this distribution for more information. Chris@31: */ Chris@31: Chris@31: #include "Silvet.h" Chris@34: #include "EM.h" Chris@31: Chris@152: #include Chris@31: Chris@152: #include "MedianFilter.h" Chris@152: #include "constant-q-cpp/src/dsp/Resampler.h" Chris@31: Chris@31: #include Chris@31: Chris@32: #include Chris@32: Chris@31: using std::vector; Chris@48: using std::cout; Chris@31: using std::cerr; Chris@31: using std::endl; Chris@40: using Vamp::RealTime; Chris@31: Chris@31: static int processingSampleRate = 44100; Chris@31: static int processingBPO = 60; Chris@170: Chris@31: Silvet::Silvet(float inputSampleRate) : Chris@31: Plugin(inputSampleRate), Chris@161: m_instruments(InstrumentPack::listInstrumentPacks()), Chris@31: m_resampler(0), Chris@110: m_cq(0), Chris@162: m_hqMode(true), Chris@166: m_fineTuning(false), Chris@178: m_instrument(0), Chris@178: m_colsPerSec(50) Chris@31: { Chris@31: } Chris@31: Chris@31: Silvet::~Silvet() Chris@31: { Chris@31: delete m_resampler; Chris@31: delete m_cq; Chris@41: for (int i = 0; i < (int)m_postFilter.size(); ++i) { Chris@41: delete m_postFilter[i]; Chris@41: } Chris@31: } Chris@31: Chris@31: string Chris@31: Silvet::getIdentifier() const Chris@31: { Chris@31: return "silvet"; Chris@31: } Chris@31: Chris@31: string Chris@31: Silvet::getName() const Chris@31: { Chris@31: return "Silvet Note Transcription"; Chris@31: } Chris@31: Chris@31: string Chris@31: Silvet::getDescription() const Chris@31: { Chris@31: // Return something helpful here! Chris@31: return ""; Chris@31: } Chris@31: Chris@31: string Chris@31: Silvet::getMaker() const Chris@31: { Chris@31: // Your name here Chris@31: return ""; Chris@31: } Chris@31: Chris@31: int Chris@31: Silvet::getPluginVersion() const Chris@31: { Chris@31: return 1; Chris@31: } Chris@31: Chris@31: string Chris@31: Silvet::getCopyright() const Chris@31: { Chris@31: // This function is not ideally named. It does not necessarily Chris@31: // need to say who made the plugin -- getMaker does that -- but it Chris@31: // should indicate the terms under which it is distributed. For Chris@31: // example, "Copyright (year). All Rights Reserved", or "GPL" Chris@31: return ""; Chris@31: } Chris@31: Chris@31: Silvet::InputDomain Chris@31: Silvet::getInputDomain() const Chris@31: { Chris@31: return TimeDomain; Chris@31: } Chris@31: Chris@31: size_t Chris@31: Silvet::getPreferredBlockSize() const Chris@31: { Chris@31: return 0; Chris@31: } Chris@31: Chris@31: size_t Chris@31: Silvet::getPreferredStepSize() const Chris@31: { Chris@31: return 0; Chris@31: } Chris@31: Chris@31: size_t Chris@31: Silvet::getMinChannelCount() const Chris@31: { Chris@31: return 1; Chris@31: } Chris@31: Chris@31: size_t Chris@31: Silvet::getMaxChannelCount() const Chris@31: { Chris@31: return 1; Chris@31: } Chris@31: Chris@31: Silvet::ParameterList Chris@31: Silvet::getParameterDescriptors() const Chris@31: { Chris@31: ParameterList list; Chris@110: Chris@110: ParameterDescriptor desc; Chris@110: desc.identifier = "mode"; Chris@110: desc.name = "Processing mode"; Chris@110: desc.unit = ""; Chris@110: desc.description = "Determines the tradeoff of processing speed against transcription quality"; Chris@110: desc.minValue = 0; Chris@110: desc.maxValue = 1; Chris@113: desc.defaultValue = 1; Chris@110: desc.isQuantized = true; Chris@110: desc.quantizeStep = 1; Chris@166: desc.valueNames.push_back("Draft (faster)"); Chris@165: desc.valueNames.push_back("Intensive (higher quality)"); Chris@161: list.push_back(desc); Chris@161: Chris@176: desc.identifier = "instrument"; Chris@176: desc.name = "Instrument"; Chris@161: desc.unit = ""; Chris@162: desc.description = "The instrument known to be present in the recording, if there is only one"; Chris@161: desc.minValue = 0; Chris@162: desc.maxValue = m_instruments.size()-1; Chris@162: desc.defaultValue = 0; Chris@161: desc.isQuantized = true; Chris@161: desc.quantizeStep = 1; Chris@161: desc.valueNames.clear(); Chris@162: for (int i = 0; i < int(m_instruments.size()); ++i) { Chris@162: desc.valueNames.push_back(m_instruments[i].name); Chris@162: } Chris@166: list.push_back(desc); Chris@161: Chris@166: desc.identifier = "finetune"; Chris@166: desc.name = "Return fine pitch estimates"; Chris@166: desc.unit = ""; Chris@166: desc.description = "Return pitch estimates at finer than semitone resolution (works only in Intensive mode)"; Chris@166: desc.minValue = 0; Chris@166: desc.maxValue = 1; Chris@166: desc.defaultValue = 0; Chris@166: desc.isQuantized = true; Chris@166: desc.quantizeStep = 1; Chris@166: desc.valueNames.clear(); Chris@110: list.push_back(desc); Chris@110: Chris@31: return list; Chris@31: } Chris@31: Chris@31: float Chris@31: Silvet::getParameter(string identifier) const Chris@31: { Chris@110: if (identifier == "mode") { Chris@110: return m_hqMode ? 1.f : 0.f; Chris@166: } else if (identifier == "finetune") { Chris@166: return m_fineTuning ? 1.f : 0.f; Chris@176: } else if (identifier == "instrument") { Chris@162: return m_instrument; Chris@110: } Chris@31: return 0; Chris@31: } Chris@31: Chris@31: void Chris@31: Silvet::setParameter(string identifier, float value) Chris@31: { Chris@110: if (identifier == "mode") { Chris@110: m_hqMode = (value > 0.5); Chris@166: } else if (identifier == "finetune") { Chris@166: m_fineTuning = (value > 0.5); Chris@176: } else if (identifier == "instrument") { Chris@162: m_instrument = lrintf(value); Chris@110: } Chris@31: } Chris@31: Chris@31: Silvet::ProgramList Chris@31: Silvet::getPrograms() const Chris@31: { Chris@31: ProgramList list; Chris@31: return list; Chris@31: } Chris@31: Chris@31: string Chris@31: Silvet::getCurrentProgram() const Chris@31: { Chris@31: return ""; Chris@31: } Chris@31: Chris@31: void Chris@31: Silvet::selectProgram(string name) Chris@31: { Chris@31: } Chris@31: Chris@31: Silvet::OutputList Chris@31: Silvet::getOutputDescriptors() const Chris@31: { Chris@31: OutputList list; Chris@31: Chris@31: OutputDescriptor d; Chris@51: d.identifier = "notes"; Chris@51: d.name = "Note transcription"; Chris@162: d.description = "Overall note transcription across selected instruments"; Chris@41: d.unit = "Hz"; Chris@31: d.hasFixedBinCount = true; Chris@31: d.binCount = 2; Chris@41: d.binNames.push_back("Frequency"); Chris@31: d.binNames.push_back("Velocity"); Chris@31: d.hasKnownExtents = false; Chris@31: d.isQuantized = false; Chris@31: d.sampleType = OutputDescriptor::VariableSampleRate; Chris@51: d.sampleRate = m_inputSampleRate / (m_cq ? m_cq->getColumnHop() : 62); Chris@31: d.hasDuration = true; Chris@32: m_notesOutputNo = list.size(); Chris@32: list.push_back(d); Chris@32: Chris@178: d.identifier = "timefreq"; Chris@178: d.name = "Time-frequency distribution"; Chris@178: d.description = "Filtered constant-Q time-frequency distribution used as input to the expectation-maximisation algorithm"; Chris@178: d.unit = ""; Chris@178: d.hasFixedBinCount = true; Chris@178: d.binCount = m_instruments[0].templateHeight; Chris@178: d.binNames.clear(); Chris@178: if (m_cq) { Chris@178: char name[20]; Chris@178: for (int i = 0; i < m_instruments[0].templateHeight; ++i) { Chris@178: // We have a 600-bin (10 oct 60-bin CQ) of which the Chris@178: // lowest-frequency 55 bins have been dropped, for a Chris@178: // 545-bin template. The native CQ bins go high->low Chris@178: // frequency though, so these are still the first 545 bins Chris@178: // as reported by getBinFrequency, though in reverse order Chris@178: float freq = m_cq->getBinFrequency Chris@178: (m_instruments[0].templateHeight - i - 1); Chris@178: sprintf(name, "%.1f Hz", freq); Chris@178: d.binNames.push_back(name); Chris@178: } Chris@178: } Chris@178: d.hasKnownExtents = false; Chris@178: d.isQuantized = false; Chris@178: d.sampleType = OutputDescriptor::FixedSampleRate; Chris@178: d.sampleRate = m_colsPerSec; Chris@178: d.hasDuration = false; Chris@178: m_fcqOutputNo = list.size(); Chris@178: list.push_back(d); Chris@178: Chris@31: return list; Chris@31: } Chris@31: Chris@38: std::string Chris@175: Silvet::noteName(int note, int shift, int shiftCount) const Chris@38: { Chris@38: static const char *names[] = { Chris@38: "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" Chris@38: }; Chris@38: Chris@175: const char *n = names[note % 12]; Chris@38: Chris@175: int oct = (note + 9) / 12; Chris@38: Chris@175: char buf[30]; Chris@175: Chris@175: float pshift = 0.f; Chris@175: if (shiftCount > 1) { Chris@175: // see noteFrequency below Chris@175: pshift = Chris@175: float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; Chris@175: } Chris@175: Chris@175: if (pshift > 0.f) { Chris@175: sprintf(buf, "%s%d+%dc", n, oct, int(round(pshift * 100))); Chris@175: } else if (pshift < 0.f) { Chris@175: sprintf(buf, "%s%d-%dc", n, oct, int(round((-pshift) * 100))); Chris@175: } else { Chris@175: sprintf(buf, "%s%d", n, oct); Chris@175: } Chris@38: Chris@38: return buf; Chris@38: } Chris@38: Chris@41: float Chris@168: Silvet::noteFrequency(int note, int shift, int shiftCount) const Chris@41: { Chris@169: // Convert shift number to a pitch shift. The given shift number Chris@169: // is an offset into the template array, which starts with some Chris@169: // zeros, followed by the template, then some trailing zeros. Chris@169: // Chris@169: // Example: if we have templateMaxShift == 2 and thus shiftCount Chris@169: // == 5, then the number will be in the range 0-4 and the template Chris@169: // will have 2 zeros at either end. Thus number 2 represents the Chris@169: // template "as recorded", for a pitch shift of 0; smaller indices Chris@169: // represent moving the template *up* in pitch (by introducing Chris@169: // zeros at the start, which is the low-frequency end), for a Chris@169: // positive pitch shift; and higher values represent moving it Chris@169: // down in pitch, for a negative pitch shift. Chris@169: Chris@175: float pshift = 0.f; Chris@175: if (shiftCount > 1) { Chris@175: pshift = Chris@175: float((shiftCount - shift) - int(shiftCount / 2) - 1) / shiftCount; Chris@175: } Chris@169: Chris@169: return float(27.5 * pow(2.0, (note + pshift) / 12.0)); Chris@41: } Chris@41: Chris@31: bool Chris@31: Silvet::initialise(size_t channels, size_t stepSize, size_t blockSize) Chris@31: { Chris@31: if (channels < getMinChannelCount() || Chris@31: channels > getMaxChannelCount()) return false; Chris@31: Chris@31: if (stepSize != blockSize) { Chris@31: cerr << "Silvet::initialise: Step size must be the same as block size (" Chris@31: << stepSize << " != " << blockSize << ")" << endl; Chris@31: return false; Chris@31: } Chris@31: Chris@31: m_blockSize = blockSize; Chris@31: Chris@31: reset(); Chris@31: Chris@31: return true; Chris@31: } Chris@31: Chris@31: void Chris@31: Silvet::reset() Chris@31: { Chris@31: delete m_resampler; Chris@31: delete m_cq; Chris@31: Chris@31: if (m_inputSampleRate != processingSampleRate) { Chris@31: m_resampler = new Resampler(m_inputSampleRate, processingSampleRate); Chris@31: } else { Chris@31: m_resampler = 0; Chris@31: } Chris@31: Chris@173: double minFreq = 27.5; Chris@173: Chris@173: if (!m_hqMode) { Chris@173: // We don't actually return any notes from the bottom octave, Chris@173: // so we can just pad with zeros Chris@173: minFreq *= 2; Chris@173: } Chris@173: Chris@154: CQParameters params(processingSampleRate, Chris@173: minFreq, Chris@154: processingSampleRate / 3, Chris@154: processingBPO); Chris@154: Chris@155: params.q = 0.95; // MIREX code uses 0.8, but it seems 0.9 or lower Chris@155: // drops the FFT size to 512 from 1024 and alters Chris@155: // some other processing parameters, making Chris@155: // everything much, much slower. Could be a flaw Chris@155: // in the CQ parameter calculations, must check Chris@154: params.atomHopFactor = 0.3; Chris@154: params.threshold = 0.0005; Chris@172: params.window = CQParameters::Hann; Chris@154: Chris@154: m_cq = new CQSpectrogram(params, CQSpectrogram::InterpolateLinear); Chris@31: Chris@165: m_colsPerSec = m_hqMode ? 50 : 25; Chris@165: Chris@41: for (int i = 0; i < (int)m_postFilter.size(); ++i) { Chris@41: delete m_postFilter[i]; Chris@41: } Chris@41: m_postFilter.clear(); Chris@176: for (int i = 0; i < m_instruments[0].templateNoteCount; ++i) { Chris@41: m_postFilter.push_back(new MedianFilter(3)); Chris@41: } Chris@41: m_pianoRoll.clear(); Chris@32: m_columnCount = 0; Chris@40: m_startTime = RealTime::zeroTime; Chris@31: } Chris@31: Chris@31: Silvet::FeatureSet Chris@31: Silvet::process(const float *const *inputBuffers, Vamp::RealTime timestamp) Chris@31: { Chris@40: if (m_columnCount == 0) { Chris@40: m_startTime = timestamp; Chris@40: } Chris@40: Chris@31: vector data; Chris@40: for (int i = 0; i < m_blockSize; ++i) { Chris@40: data.push_back(inputBuffers[0][i]); Chris@40: } Chris@31: Chris@31: if (m_resampler) { Chris@31: data = m_resampler->process(data.data(), data.size()); Chris@31: } Chris@31: Chris@32: Grid cqout = m_cq->process(data); Chris@51: FeatureSet fs = transcribe(cqout); Chris@51: return fs; Chris@34: } Chris@34: Chris@34: Silvet::FeatureSet Chris@34: Silvet::getRemainingFeatures() Chris@34: { Chris@145: Grid cqout = m_cq->getRemainingOutput(); Chris@51: FeatureSet fs = transcribe(cqout); Chris@51: return fs; Chris@34: } Chris@34: Chris@34: Silvet::FeatureSet Chris@34: Silvet::transcribe(const Grid &cqout) Chris@34: { Chris@32: Grid filtered = preProcess(cqout); Chris@31: Chris@32: FeatureSet fs; Chris@32: Chris@104: if (filtered.empty()) return fs; Chris@170: Chris@170: const InstrumentPack &pack = m_instruments[m_instrument]; Chris@104: Chris@178: for (int i = 0; i < (int)filtered.size(); ++i) { Chris@178: Feature f; Chris@178: for (int j = 0; j < pack.templateHeight; ++j) { Chris@178: f.values.push_back(float(filtered[i][j])); Chris@178: } Chris@178: fs[m_fcqOutputNo].push_back(f); Chris@178: } Chris@178: Chris@34: int width = filtered.size(); Chris@34: Chris@164: int iterations = m_hqMode ? 20 : 10; Chris@34: Chris@170: //!!! pitches or notes? [terminology] Chris@176: Grid localPitches(width, vector(pack.templateNoteCount, 0.0)); Chris@170: Chris@170: bool wantShifts = m_hqMode && m_fineTuning; Chris@170: int shiftCount = 1; Chris@170: if (wantShifts) { Chris@170: shiftCount = pack.templateMaxShift * 2 + 1; Chris@170: } Chris@170: Chris@170: vector > localBestShifts; Chris@170: if (wantShifts) { Chris@170: localBestShifts = Chris@176: vector >(width, vector(pack.templateNoteCount, 0)); Chris@170: } Chris@170: Chris@170: vector present(width, false); Chris@37: Chris@123: #pragma omp parallel for Chris@123: for (int i = 0; i < width; ++i) { Chris@104: Chris@170: double sum = 0.0; Chris@176: for (int j = 0; j < pack.templateHeight; ++j) { Chris@170: sum += filtered.at(i).at(j); Chris@170: } Chris@170: if (sum < 1e-5) continue; Chris@170: Chris@170: present[i] = true; Chris@170: Chris@170: EM em(&pack, m_hqMode); Chris@170: Chris@170: for (int j = 0; j < iterations; ++j) { Chris@170: em.iterate(filtered.at(i).data()); Chris@37: } Chris@37: Chris@170: const float *pitchDist = em.getPitchDistribution(); Chris@170: const float *const *shiftDist = em.getShifts(); Chris@37: Chris@176: for (int j = 0; j < pack.templateNoteCount; ++j) { Chris@104: Chris@170: localPitches[i][j] = pitchDist[j] * sum; Chris@170: Chris@170: int bestShift = 0; Chris@170: int bestShiftValue = 0.0; Chris@170: if (wantShifts) { Chris@170: for (int k = 0; k < shiftCount; ++k) { Chris@170: if (k == 0 || shiftDist[k][j] > bestShiftValue) { Chris@170: bestShiftValue = shiftDist[k][j]; Chris@170: bestShift = k; Chris@170: } Chris@170: } Chris@170: localBestShifts[i][j] = bestShift; Chris@170: } Chris@123: } Chris@123: } Chris@166: Chris@166: for (int i = 0; i < width; ++i) { Chris@37: Chris@170: if (!present[i]) { Chris@170: // silent column Chris@176: for (int j = 0; j < pack.templateNoteCount; ++j) { Chris@170: m_postFilter[j]->push(0.0); Chris@170: } Chris@168: m_pianoRoll.push_back(map()); Chris@170: if (wantShifts) { Chris@168: m_pianoRollShifts.push_back(map()); Chris@168: } Chris@166: continue; Chris@166: } Chris@166: Chris@170: postProcess(localPitches[i], localBestShifts[i], wantShifts); Chris@166: Chris@168: FeatureList noteFeatures = noteTrack(shiftCount); Chris@38: Chris@123: for (FeatureList::const_iterator fi = noteFeatures.begin(); Chris@123: fi != noteFeatures.end(); ++fi) { Chris@123: fs[m_notesOutputNo].push_back(*fi); Chris@40: } Chris@34: } Chris@34: Chris@32: return fs; Chris@31: } Chris@31: Chris@32: Silvet::Grid Chris@32: Silvet::preProcess(const Grid &in) Chris@32: { Chris@32: int width = in.size(); Chris@32: Chris@165: int spacing = processingSampleRate / m_colsPerSec; Chris@32: Chris@165: // need to be careful that col spacing is an integer number of samples! Chris@165: assert(spacing * m_colsPerSec == processingSampleRate); Chris@32: Chris@32: Grid out; Chris@32: Chris@58: // We count the CQ latency in terms of processing hops, but Chris@58: // actually it probably isn't an exact number of hops so this Chris@58: // isn't quite accurate. But the small constant offset is Chris@165: // practically irrelevant compared to the jitter from the frame Chris@165: // size we reduce to in a moment Chris@33: int latentColumns = m_cq->getLatency() / m_cq->getColumnHop(); Chris@33: Chris@176: const InstrumentPack &pack = m_instruments[m_instrument]; Chris@176: Chris@32: for (int i = 0; i < width; ++i) { Chris@32: Chris@33: if (m_columnCount < latentColumns) { Chris@33: ++m_columnCount; Chris@33: continue; Chris@33: } Chris@33: Chris@32: int prevSampleNo = (m_columnCount - 1) * m_cq->getColumnHop(); Chris@32: int sampleNo = m_columnCount * m_cq->getColumnHop(); Chris@32: Chris@32: bool select = (sampleNo / spacing != prevSampleNo / spacing); Chris@32: Chris@32: if (select) { Chris@32: vector inCol = in[i]; Chris@176: vector outCol(pack.templateHeight); Chris@32: Chris@178: // In HQ mode, the CQ returns 600 bins and we ignore the Chris@178: // lowest 55 of them. Chris@178: // Chris@178: // In draft mode the CQ is an octave shorter, returning Chris@178: // 540 bins, so we instead pad them with an additional 5 Chris@178: // zeros. Chris@178: // Chris@178: // We also need to reverse the column as we go, since the Chris@178: // raw CQ has the high frequencies first and we need it Chris@178: // the other way around. Chris@32: Chris@178: if (m_hqMode) { Chris@178: for (int j = 0; j < pack.templateHeight; ++j) { Chris@178: int ix = inCol.size() - j - 55; Chris@178: outCol[j] = inCol[ix]; Chris@178: } Chris@178: } else { Chris@178: for (int j = 0; j < 5; ++j) { Chris@178: outCol[j] = 0.0; Chris@178: } Chris@178: for (int j = 5; j < pack.templateHeight; ++j) { Chris@178: int ix = inCol.size() - j + 4; Chris@178: outCol[j] = inCol[ix]; Chris@178: } Chris@46: } Chris@32: Chris@46: vector noiseLevel1 = Chris@46: MedianFilter::filter(40, outCol); Chris@176: for (int j = 0; j < pack.templateHeight; ++j) { Chris@46: noiseLevel1[j] = std::min(outCol[j], noiseLevel1[j]); Chris@46: } Chris@32: Chris@46: vector noiseLevel2 = Chris@46: MedianFilter::filter(40, noiseLevel1); Chris@176: for (int j = 0; j < pack.templateHeight; ++j) { Chris@46: outCol[j] = std::max(outCol[j] - noiseLevel2[j], 0.0); Chris@32: } Chris@32: Chris@165: out.push_back(outCol); Chris@32: } Chris@32: Chris@32: ++m_columnCount; Chris@32: } Chris@32: Chris@32: return out; Chris@32: } Chris@32: Chris@168: void Chris@170: Silvet::postProcess(const vector &pitches, Chris@170: const vector &bestShifts, Chris@170: bool wantShifts) Chris@166: { Chris@176: const InstrumentPack &pack = m_instruments[m_instrument]; Chris@176: Chris@41: vector filtered; Chris@41: Chris@176: for (int j = 0; j < pack.templateNoteCount; ++j) { Chris@170: m_postFilter[j]->push(pitches[j]); Chris@41: filtered.push_back(m_postFilter[j]->get()); Chris@41: } Chris@41: Chris@41: // Threshold for level and reduce number of candidate pitches Chris@41: Chris@41: int polyphony = 5; Chris@150: Chris@150: //!!! make this a parameter (was 4.8, try adjusting, compare levels against matlab code) Chris@150: double threshold = 6; Chris@154: // double threshold = 4.8; Chris@41: Chris@41: typedef std::multimap ValueIndexMap; Chris@41: Chris@41: ValueIndexMap strengths; Chris@166: Chris@176: for (int j = 0; j < pack.templateNoteCount; ++j) { Chris@166: double strength = filtered[j]; Chris@166: if (strength < threshold) continue; Chris@168: strengths.insert(ValueIndexMap::value_type(strength, j)); Chris@168: } Chris@166: Chris@168: ValueIndexMap::const_iterator si = strengths.end(); Chris@167: Chris@168: map active; Chris@168: map activeShifts; Chris@168: Chris@168: while (int(active.size()) < polyphony && si != strengths.begin()) { Chris@168: Chris@168: --si; Chris@168: Chris@168: double strength = si->first; Chris@168: int j = si->second; Chris@168: Chris@168: active[j] = strength; Chris@168: Chris@170: if (wantShifts) { Chris@170: activeShifts[j] = bestShifts[j]; Chris@167: } Chris@41: } Chris@41: Chris@168: m_pianoRoll.push_back(active); Chris@170: Chris@170: if (wantShifts) { Chris@168: m_pianoRollShifts.push_back(activeShifts); Chris@41: } Chris@166: } Chris@166: Chris@166: Vamp::Plugin::FeatureList Chris@168: Silvet::noteTrack(int shiftCount) Chris@166: { Chris@41: // Minimum duration pruning, and conversion to notes. We can only Chris@41: // report notes that have just ended (i.e. that are absent in the Chris@168: // latest active set but present in the prior set in the piano Chris@41: // roll) -- any notes that ended earlier will have been reported Chris@41: // already, and if they haven't ended, we don't know their Chris@41: // duration. Chris@41: Chris@168: int width = m_pianoRoll.size() - 1; Chris@168: Chris@168: const map &active = m_pianoRoll[width]; Chris@41: Chris@165: double columnDuration = 1.0 / m_colsPerSec; Chris@165: Chris@165: // only keep notes >= 100ms or thereabouts Chris@165: int durationThreshold = floor(0.1 / columnDuration); // columns Chris@165: if (durationThreshold < 1) durationThreshold = 1; Chris@41: Chris@41: FeatureList noteFeatures; Chris@41: Chris@41: if (width < durationThreshold + 1) { Chris@41: return noteFeatures; Chris@41: } Chris@41: Chris@150: //!!! try: repeated note detection? (look for change in first derivative of the pitch matrix) Chris@150: Chris@55: for (map::const_iterator ni = m_pianoRoll[width-1].begin(); Chris@41: ni != m_pianoRoll[width-1].end(); ++ni) { Chris@41: Chris@55: int note = ni->first; Chris@41: Chris@41: if (active.find(note) != active.end()) { Chris@41: // the note is still playing Chris@41: continue; Chris@41: } Chris@41: Chris@41: // the note was playing but just ended Chris@41: int end = width; Chris@41: int start = end-1; Chris@41: Chris@41: while (m_pianoRoll[start].find(note) != m_pianoRoll[start].end()) { Chris@41: --start; Chris@41: } Chris@41: ++start; Chris@41: Chris@169: if ((end - start) < durationThreshold) { Chris@41: continue; Chris@41: } Chris@41: Chris@169: emitNote(start, end, note, shiftCount, noteFeatures); Chris@41: } Chris@41: Chris@62: // cerr << "returning " << noteFeatures.size() << " complete note(s) " << endl; Chris@41: Chris@41: return noteFeatures; Chris@41: } Chris@41: Chris@169: void Chris@169: Silvet::emitNote(int start, int end, int note, int shiftCount, Chris@169: FeatureList ¬eFeatures) Chris@169: { Chris@169: int partStart = start; Chris@169: int partShift = 0; Chris@169: int partVelocity = 0; Chris@169: Chris@169: Feature f; Chris@169: f.hasTimestamp = true; Chris@169: f.hasDuration = true; Chris@169: Chris@169: double columnDuration = 1.0 / m_colsPerSec; Chris@169: int postFilterLatency = int(m_postFilter[0]->getSize() / 2); Chris@169: int partThreshold = floor(0.05 / columnDuration); Chris@169: Chris@169: for (int i = start; i != end; ++i) { Chris@169: Chris@169: double strength = m_pianoRoll[i][note]; Chris@169: Chris@169: int shift = 0; Chris@169: Chris@169: if (shiftCount > 1) { Chris@169: Chris@169: shift = m_pianoRollShifts[i][note]; Chris@169: Chris@169: if (i == partStart) { Chris@169: partShift = shift; Chris@169: } Chris@169: Chris@169: if (i > partStart + partThreshold && shift != partShift) { Chris@169: Chris@169: // cerr << "i = " << i << ", partStart = " << partStart << ", shift = " << shift << ", partShift = " << partShift << endl; Chris@169: Chris@169: // pitch has changed, emit an intermediate note Chris@169: f.timestamp = RealTime::fromSeconds Chris@169: (columnDuration * (partStart - postFilterLatency) + 0.02); Chris@169: f.duration = RealTime::fromSeconds Chris@169: (columnDuration * (i - partStart)); Chris@169: f.values.clear(); Chris@169: f.values.push_back Chris@169: (noteFrequency(note, partShift, shiftCount)); Chris@169: f.values.push_back(partVelocity); Chris@175: f.label = noteName(note, partShift, shiftCount); Chris@169: noteFeatures.push_back(f); Chris@169: partStart = i; Chris@169: partShift = shift; Chris@169: partVelocity = 0; Chris@169: } Chris@169: } Chris@169: Chris@169: int v = strength * 2; Chris@169: if (v > 127) v = 127; Chris@169: Chris@169: if (v > partVelocity) { Chris@169: partVelocity = v; Chris@169: } Chris@169: } Chris@169: Chris@169: if (end >= partStart + partThreshold) { Chris@169: f.timestamp = RealTime::fromSeconds Chris@169: (columnDuration * (partStart - postFilterLatency) + 0.02); Chris@169: f.duration = RealTime::fromSeconds Chris@169: (columnDuration * (end - partStart)); Chris@169: f.values.clear(); Chris@169: f.values.push_back Chris@169: (noteFrequency(note, partShift, shiftCount)); Chris@169: f.values.push_back(partVelocity); Chris@175: f.label = noteName(note, partShift, shiftCount); Chris@169: noteFeatures.push_back(f); Chris@169: } Chris@169: }