Chris@7: Chris@7: #include "TipicVampPlugin.h" Chris@7: Chris@19: #include "PitchFilterbank.h" Chris@19: #include "CRP.h" Chris@26: #include "Chroma.h" Chris@39: #include "CENS.h" Chris@35: #include "FeatureDownsample.h" Chris@19: Chris@9: #include Chris@27: #include Chris@9: Chris@9: using namespace std; Chris@9: Chris@15: static const float defaultTuningFrequency = 440.f; Chris@15: Chris@7: Tipic::Tipic(float inputSampleRate) : Chris@9: Plugin(inputSampleRate), Chris@9: m_stepSize(0), Chris@9: m_blockSize(0), Chris@15: m_tuningFrequency(defaultTuningFrequency), Chris@15: m_filterbank(0), Chris@19: m_crp(0), Chris@26: m_chroma(0), Chris@26: m_logChroma(0), Chris@39: m_cens(0), Chris@19: m_pitchOutputNo(-1), Chris@26: m_cpOutputNo(-1), Chris@26: m_clpOutputNo(-1), Chris@39: m_censOutputNo(-1), Chris@19: m_crpOutputNo(-1) Chris@7: { Chris@7: } Chris@7: Chris@7: Tipic::~Tipic() Chris@7: { Chris@15: delete m_filterbank; Chris@26: delete m_crp; Chris@26: delete m_chroma; Chris@26: delete m_logChroma; Chris@39: delete m_cens; Chris@35: Chris@35: for (auto &d: m_downsamplers) delete d.second; Chris@7: } Chris@7: Chris@7: string Chris@7: Tipic::getIdentifier() const Chris@7: { Chris@7: return "tipic"; Chris@7: } Chris@7: Chris@7: string Chris@7: Tipic::getName() const Chris@7: { Chris@47: return "TIPIC"; Chris@7: } Chris@7: Chris@7: string Chris@7: Tipic::getDescription() const Chris@7: { Chris@47: return "Chroma and pitch features, including DCT-reduced chroma with extra timbre invariance. Based on the MATLAB Chroma Toolbox by Müller and Ewert, adapted to use causal filters."; Chris@7: } Chris@7: Chris@7: string Chris@7: Tipic::getMaker() const Chris@7: { Chris@41: return "Queen Mary, University of London"; Chris@7: } Chris@7: Chris@7: int Chris@7: Tipic::getPluginVersion() const Chris@7: { Chris@47: return 1; Chris@7: } Chris@7: Chris@7: string Chris@7: Tipic::getCopyright() const Chris@7: { Chris@54: return "Methods by Sebastian Ewert and Meinard Müller, plugin by Chris Cannam. GPL licence."; Chris@7: } Chris@7: Chris@7: Tipic::InputDomain Chris@7: Tipic::getInputDomain() const Chris@7: { Chris@7: return TimeDomain; Chris@7: } Chris@7: Chris@7: size_t Chris@7: Tipic::getPreferredBlockSize() const Chris@7: { Chris@41: return 0; Chris@7: } Chris@7: Chris@7: size_t Chris@7: Tipic::getPreferredStepSize() const Chris@7: { Chris@41: return 0; Chris@7: } Chris@7: Chris@7: size_t Chris@7: Tipic::getMinChannelCount() const Chris@7: { Chris@7: return 1; Chris@7: } Chris@7: Chris@7: size_t Chris@7: Tipic::getMaxChannelCount() const Chris@7: { Chris@7: return 1; Chris@7: } Chris@7: Chris@7: Tipic::ParameterList Chris@7: Tipic::getParameterDescriptors() const Chris@7: { Chris@7: ParameterList list; Chris@15: Chris@15: ParameterDescriptor desc; Chris@15: desc.identifier = "tuning"; Chris@15: desc.name = "Tuning Frequency"; Chris@15: desc.unit = "Hz"; Chris@15: desc.description = "Frequency of concert A"; Chris@15: desc.minValue = 360; Chris@15: desc.maxValue = 500; Chris@15: desc.defaultValue = defaultTuningFrequency; Chris@15: desc.isQuantized = false; Chris@15: list.push_back(desc); Chris@15: Chris@7: return list; Chris@7: } Chris@7: Chris@7: float Chris@7: Tipic::getParameter(string identifier) const Chris@7: { Chris@15: if (identifier == "tuning") { Chris@15: return m_tuningFrequency; Chris@15: } Chris@7: return 0; Chris@7: } Chris@7: Chris@7: void Chris@7: Tipic::setParameter(string identifier, float value) Chris@7: { Chris@15: if (identifier == "tuning") { Chris@15: m_tuningFrequency = value; Chris@15: } Chris@7: } Chris@7: Chris@7: Tipic::ProgramList Chris@7: Tipic::getPrograms() const Chris@7: { Chris@7: ProgramList list; Chris@7: return list; Chris@7: } Chris@7: Chris@7: string Chris@7: Tipic::getCurrentProgram() const Chris@7: { Chris@7: return ""; // no programs Chris@7: } Chris@7: Chris@7: void Chris@50: Tipic::selectProgram(string) Chris@7: { Chris@7: } Chris@7: Chris@27: static vector noteNames Chris@27: { "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B" }; Chris@27: Chris@27: static std::string noteName(int i) Chris@27: { Chris@27: string name = noteNames[i % 12]; Chris@27: int oct = i / 12 - 1; Chris@27: ostringstream sstr; Chris@27: sstr << i << " " << name << oct << ends; Chris@27: return sstr.str(); Chris@27: } Chris@27: Chris@7: Tipic::OutputList Chris@7: Tipic::getOutputDescriptors() const Chris@7: { Chris@7: OutputList list; Chris@7: Chris@47: string downIdSuffix = "-smoothed"; Chris@47: string downNamePrefix = "Smoothed "; Chris@47: string downDescSuffix = ", smoothed by 10x downsampling"; Chris@47: Chris@7: OutputDescriptor d; Chris@7: d.identifier = "pitch"; Chris@47: d.name = "Pitch Representation"; Chris@47: d.description = "Short-time energy content of the signal within 88 semitone-tuned frequency bands"; Chris@7: d.unit = ""; Chris@7: d.hasFixedBinCount = true; Chris@27: int min = 0, max = 0; Chris@27: PitchFilterbank::getPitchRange(min, max); Chris@27: d.binCount = max - min + 1; Chris@27: d.binNames.clear(); Chris@27: for (int p = min; p <= max; ++p) { Chris@27: d.binNames.push_back(noteName(p)); Chris@27: } Chris@7: d.hasKnownExtents = false; Chris@7: d.isQuantized = false; Chris@7: d.sampleType = OutputDescriptor::FixedSampleRate; Chris@32: d.sampleRate = PitchFilterbank::getOutputSampleRate(); Chris@7: d.hasDuration = false; Chris@9: m_pitchOutputNo = list.size(); Chris@7: list.push_back(d); Chris@7: Chris@47: d.identifier += downIdSuffix; Chris@47: d.name = downNamePrefix + d.name; Chris@47: d.description += downDescSuffix; Chris@35: d.sampleRate /= 10.0; Chris@35: list.push_back(d); Chris@35: Chris@37: d.identifier = "chroma"; Chris@26: d.name = "Chroma Pitch Features"; Chris@47: d.description = "CP Chroma Pitch features derived by summing the Pitch Representation into a single octave"; Chris@26: d.unit = ""; Chris@26: d.hasFixedBinCount = true; Chris@26: d.binCount = 12; Chris@27: d.binNames = noteNames; Chris@26: d.hasKnownExtents = false; Chris@26: d.isQuantized = false; Chris@26: d.sampleType = OutputDescriptor::FixedSampleRate; Chris@32: d.sampleRate = PitchFilterbank::getOutputSampleRate(); Chris@26: d.hasDuration = false; Chris@26: m_cpOutputNo = list.size(); Chris@26: list.push_back(d); Chris@26: Chris@47: d.identifier += downIdSuffix; Chris@47: d.name = downNamePrefix + d.name; Chris@47: d.description += downDescSuffix; Chris@35: d.sampleRate /= 10.0; Chris@35: list.push_back(d); Chris@35: Chris@26: d.identifier = "clp"; Chris@26: d.name = "Chroma Log Pitch Features"; Chris@47: d.description = "CLP Chroma Logarithmic Pitch features derived by summing log of the Pitch Representation energy values into a single octave"; Chris@35: d.sampleRate = PitchFilterbank::getOutputSampleRate(); Chris@26: m_clpOutputNo = list.size(); Chris@26: list.push_back(d); Chris@26: Chris@47: d.identifier += downIdSuffix; Chris@47: d.name = downNamePrefix + d.name; Chris@47: d.description += downDescSuffix; Chris@35: d.sampleRate /= 10.0; Chris@35: list.push_back(d); Chris@35: Chris@39: d.identifier = "cens"; Chris@39: d.name = "Chroma Energy Normalised Statistics Features"; Chris@47: d.description = "CENS statistical features based on L1 normalized pitch energy distribions"; Chris@39: d.sampleRate = PitchFilterbank::getOutputSampleRate() / 10.0; Chris@39: m_censOutputNo = list.size(); Chris@39: list.push_back(d); Chris@39: Chris@19: d.identifier = "crp"; Chris@47: d.name = "Chroma DCT-Reduced Pitch Features"; Chris@47: d.description = "CRP Chroma DCT-Reduced Log Pitch features, providing some timbre-invariance by discarding timbre-related information from lower cepstral coefficients"; Chris@35: d.sampleRate = PitchFilterbank::getOutputSampleRate(); Chris@19: m_crpOutputNo = list.size(); Chris@19: list.push_back(d); Chris@19: Chris@47: d.identifier += downIdSuffix; Chris@47: d.name = downNamePrefix + d.name; Chris@47: d.description += downDescSuffix; Chris@35: d.sampleRate /= 10.0; Chris@35: list.push_back(d); Chris@35: Chris@7: return list; Chris@7: } Chris@7: Chris@7: bool Chris@7: Tipic::initialise(size_t channels, size_t stepSize, size_t blockSize) Chris@7: { Chris@19: if (m_inputSampleRate > 192000) { Chris@19: cerr << "ERROR: Tipic::initialise: Max sample rate 192000 exceeded " Chris@19: << "(requested rate = " << m_inputSampleRate << ")" << endl; Chris@19: return false; Chris@19: } Chris@19: Chris@9: if (m_pitchOutputNo < 0) { Chris@9: // getOutputDescriptors has never been called, it sets up the Chris@9: // outputNo members Chris@9: (void)getOutputDescriptors(); Chris@9: } Chris@26: if (m_pitchOutputNo < 0 || Chris@26: m_cpOutputNo < 0 || Chris@26: m_clpOutputNo < 0 || Chris@39: m_censOutputNo < 0 || Chris@26: m_crpOutputNo < 0) { Chris@19: throw std::logic_error("setup went wrong"); Chris@19: } Chris@9: Chris@7: if (channels < getMinChannelCount() || Chris@9: channels > getMaxChannelCount()) { Chris@19: cerr << "ERROR: Tipic::initialise: wrong number of channels supplied (only 1 supported)" << endl; Chris@9: return false; Chris@9: } Chris@7: Chris@9: m_stepSize = stepSize; Chris@9: m_blockSize = blockSize; Chris@9: Chris@9: if (m_stepSize != m_blockSize) { Chris@9: cerr << "ERROR: initialise: step size and block size must be equal" << endl; Chris@9: return false; Chris@9: } Chris@9: Chris@9: reset(); Chris@9: Chris@7: return true; Chris@7: } Chris@7: Chris@7: void Chris@7: Tipic::reset() Chris@7: { Chris@15: if (!m_filterbank) { Chris@26: Chris@15: m_filterbank = new PitchFilterbank(m_inputSampleRate, m_tuningFrequency); Chris@26: Chris@19: m_crp = new CRP({}); Chris@26: Chris@26: m_chroma = new Chroma({}); Chris@26: Chris@26: Chroma::Parameters params; Chris@26: params.applyLogCompression = true; Chris@26: m_logChroma = new Chroma(params); Chris@39: Chris@39: m_cens = new CENS({}); Chris@15: } Chris@26: Chris@15: m_filterbank->reset(); Chris@35: Chris@35: for (auto &d: m_downsamplers) d.second->reset(); Chris@7: } Chris@7: Chris@7: Tipic::FeatureSet Chris@50: Tipic::process(const float *const *inputBuffers, Vamp::RealTime) Chris@7: { Chris@19: RealSequence in; Chris@9: in.resize(m_blockSize); Chris@42: for (int i = 0; i < m_blockSize; ++i) { Chris@42: in[i] = inputBuffers[0][i]; Chris@42: } Chris@9: Chris@19: RealBlock pitchFiltered = m_filterbank->process(in); Chris@26: Chris@26: RealBlock cp = m_chroma->process(pitchFiltered); Chris@26: RealBlock clp = m_logChroma->process(pitchFiltered); Chris@39: RealBlock cens = m_cens->process(pitchFiltered); Chris@26: RealBlock crp = m_crp->process(pitchFiltered); Chris@9: Chris@9: FeatureSet fs; Chris@36: addFeatures(fs, m_pitchOutputNo, pitchFiltered, false); Chris@36: addFeatures(fs, m_cpOutputNo, cp, false); Chris@36: addFeatures(fs, m_clpOutputNo, clp, false); Chris@39: addFeatures(fs, m_censOutputNo, cens, false); Chris@36: addFeatures(fs, m_crpOutputNo, crp, false); Chris@9: return fs; Chris@7: } Chris@7: Chris@7: Tipic::FeatureSet Chris@7: Tipic::getRemainingFeatures() Chris@7: { Chris@19: RealBlock pitchFiltered = m_filterbank->getRemainingOutput(); Chris@26: Chris@26: RealBlock cp = m_chroma->process(pitchFiltered); Chris@26: RealBlock clp = m_logChroma->process(pitchFiltered); Chris@39: RealBlock cens = m_cens->process(pitchFiltered); Chris@26: RealBlock crp = m_crp->process(pitchFiltered); Chris@9: Chris@9: FeatureSet fs; Chris@36: addFeatures(fs, m_pitchOutputNo, pitchFiltered, true); Chris@36: addFeatures(fs, m_cpOutputNo, cp, true); Chris@36: addFeatures(fs, m_clpOutputNo, clp, true); Chris@39: addFeatures(fs, m_censOutputNo, cens, true); Chris@36: addFeatures(fs, m_crpOutputNo, crp, true); Chris@9: return fs; Chris@7: } Chris@7: Chris@9: void Chris@36: Tipic::addFeatures(FeatureSet &fs, int outputNo, const RealBlock &block, bool final) Chris@9: { Chris@35: if (block.empty()) return; Chris@39: Chris@39: int downsampledOutputNo = outputNo + 1; Chris@39: if (outputNo == m_censOutputNo) { Chris@39: // CENS exists only in downsampled form Chris@39: downsampledOutputNo = outputNo; Chris@39: } Chris@39: Chris@42: int n = block.size(); Chris@42: Chris@39: if (outputNo != downsampledOutputNo) { Chris@42: for (int i = 0; i < n; ++i) { Chris@39: Feature f; Chris@39: int h = block[i].size(); Chris@39: f.values.resize(h); Chris@42: for (int j = 0; j < h; ++j) { Chris@42: f.values[j] = block[i][j]; Chris@42: } Chris@39: fs[outputNo].push_back(f); Chris@39: } Chris@9: } Chris@37: Chris@35: if (m_downsamplers.find(outputNo) == m_downsamplers.end()) { Chris@35: FeatureDownsample::Parameters params; Chris@35: params.featureSize = block[0].size(); Chris@35: m_downsamplers[outputNo] = new FeatureDownsample(params); Chris@35: } Chris@35: Chris@35: RealBlock downsampled = m_downsamplers[outputNo]->process(block); Chris@36: Chris@36: if (final) { Chris@36: RealBlock remaining = m_downsamplers[outputNo]->getRemainingOutput(); Chris@36: downsampled.insert(downsampled.end(), remaining.begin(), remaining.end()); Chris@36: } Chris@42: Chris@42: n = downsampled.size(); Chris@35: Chris@42: for (int i = 0; i < n; ++i) { Chris@35: Feature f; Chris@35: int h = downsampled[i].size(); Chris@35: f.values.resize(h); Chris@42: for (int j = 0; j < h; ++j) { Chris@42: f.values[j] = downsampled[i][j]; Chris@42: } Chris@39: fs[downsampledOutputNo].push_back(f); Chris@35: } Chris@9: }