Chris@23: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ matthiasm@0: Chris@35: /* Chris@35: NNLS-Chroma / Chordino Chris@35: Chris@35: Audio feature extraction plugins for chromagram and chord Chris@35: estimation. Chris@35: Chris@35: Centre for Digital Music, Queen Mary University of London. Chris@35: This file copyright 2008-2010 Matthias Mauch and QMUL. Chris@35: Chris@35: This program is free software; you can redistribute it and/or Chris@35: modify it under the terms of the GNU General Public License as Chris@35: published by the Free Software Foundation; either version 2 of the Chris@35: License, or (at your option) any later version. See the file Chris@35: COPYING included with this distribution for more information. Chris@35: */ Chris@35: Chris@35: #include "NNLSBase.h" Chris@27: Chris@27: #include "chromamethods.h" Chris@27: Chris@27: #include Chris@27: #include matthiasm@0: #include matthiasm@9: Chris@27: #include matthiasm@0: matthiasm@122: static bool debug_on = false; matthiasm@0: Chris@35: NNLSBase::NNLSBase(float inputSampleRate) : Chris@23: Plugin(inputSampleRate), mail@89: m_frameCount(0), Chris@35: m_logSpectrum(0), Chris@23: m_blockSize(0), Chris@23: m_stepSize(0), Chris@23: m_lengthOfNoteIndex(0), mail@80: m_meanTunings(0), mail@80: m_localTunings(0), mail@41: m_whitening(1.0), Chris@23: m_preset(0.0), matthiasm@92: m_useNNLS(1.0), matthiasm@92: m_localTuning(0.0), Chris@23: m_kernelValue(0), Chris@23: m_kernelFftIndex(0), Chris@23: m_kernelNoteIndex(0), Chris@23: m_dict(0), matthiasm@92: m_tuneLocal(0.0), Chris@23: m_doNormalizeChroma(0), matthiasm@92: m_rollon(0.0), matthiasm@95: m_boostN(0.1), matthiasm@42: m_s(0.7), mail@115: m_harte_syntax(0), mail@80: sinvalues(0), mail@80: cosvalues(0) matthiasm@0: { Chris@35: if (debug_on) cerr << "--> NNLSBase" << endl; Chris@23: // make the *note* dictionary matrix Chris@23: m_dict = new float[nNote * 84]; matthiasm@122: for (int i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0; matthiasm@0: } matthiasm@0: matthiasm@0: Chris@35: NNLSBase::~NNLSBase() matthiasm@0: { Chris@35: if (debug_on) cerr << "--> ~NNLSBase" << endl; Chris@23: delete [] m_dict; matthiasm@0: } matthiasm@0: matthiasm@0: string Chris@35: NNLSBase::getMaker() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getMaker" << endl; matthiasm@0: // Your name here matthiasm@0: return "Matthias Mauch"; matthiasm@0: } matthiasm@0: matthiasm@0: int Chris@35: NNLSBase::getPluginVersion() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getPluginVersion" << endl; matthiasm@0: // Increment this each time you release a version that behaves matthiasm@0: // differently from the previous one Chris@170: return 5; matthiasm@0: } matthiasm@0: matthiasm@0: string Chris@35: NNLSBase::getCopyright() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getCopyright" << endl; matthiasm@0: // This function is not ideally named. It does not necessarily matthiasm@0: // need to say who made the plugin -- getMaker does that -- but it matthiasm@0: // should indicate the terms under which it is distributed. For matthiasm@0: // example, "Copyright (year). All Rights Reserved", or "GPL" Chris@35: return "GPL"; matthiasm@0: } matthiasm@0: Chris@35: NNLSBase::InputDomain Chris@35: NNLSBase::getInputDomain() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getInputDomain" << endl; matthiasm@0: return FrequencyDomain; matthiasm@0: } matthiasm@0: matthiasm@0: size_t Chris@35: NNLSBase::getPreferredBlockSize() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getPreferredBlockSize" << endl; matthiasm@0: return 16384; // 0 means "I can handle any block size" matthiasm@0: } matthiasm@0: matthiasm@0: size_t Chris@35: NNLSBase::getPreferredStepSize() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getPreferredStepSize" << endl; matthiasm@0: return 2048; // 0 means "anything sensible"; in practice this Chris@23: // means the same as the block size for TimeDomain Chris@23: // plugins, or half of it for FrequencyDomain plugins matthiasm@0: } matthiasm@0: matthiasm@0: size_t Chris@35: NNLSBase::getMinChannelCount() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getMinChannelCount" << endl; matthiasm@0: return 1; matthiasm@0: } matthiasm@0: matthiasm@0: size_t Chris@35: NNLSBase::getMaxChannelCount() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getMaxChannelCount" << endl; matthiasm@0: return 1; matthiasm@0: } matthiasm@0: Chris@35: NNLSBase::ParameterList Chris@35: NNLSBase::getParameterDescriptors() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getParameterDescriptors" << endl; matthiasm@0: ParameterList list; matthiasm@0: matthiasm@42: ParameterDescriptor d; matthiasm@42: d.identifier = "useNNLS"; matthiasm@42: d.name = "use approximate transcription (NNLS)"; matthiasm@42: d.description = "Toggles approximate transcription (NNLS)."; matthiasm@42: d.unit = ""; matthiasm@42: d.minValue = 0.0; matthiasm@42: d.maxValue = 1.0; matthiasm@42: d.defaultValue = 1.0; matthiasm@42: d.isQuantized = true; matthiasm@42: d.quantizeStep = 1.0; matthiasm@42: list.push_back(d); matthiasm@42: mail@41: ParameterDescriptor d0; mail@41: d0.identifier = "rollon"; mail@115: d0.name = "bass noise threshold"; mail@115: d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed."; matthiasm@59: d0.unit = "%"; mail@41: d0.minValue = 0; matthiasm@59: d0.maxValue = 5; mail@41: d0.defaultValue = 0; matthiasm@48: d0.isQuantized = true; matthiasm@59: d0.quantizeStep = 0.5; mail@41: list.push_back(d0); matthiasm@4: matthiasm@4: ParameterDescriptor d1; matthiasm@4: d1.identifier = "tuningmode"; matthiasm@4: d1.name = "tuning mode"; matthiasm@4: d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing."; matthiasm@4: d1.unit = ""; matthiasm@4: d1.minValue = 0; matthiasm@4: d1.maxValue = 1; matthiasm@4: d1.defaultValue = 0; matthiasm@4: d1.isQuantized = true; matthiasm@4: d1.valueNames.push_back("global tuning"); matthiasm@4: d1.valueNames.push_back("local tuning"); matthiasm@4: d1.quantizeStep = 1.0; matthiasm@4: list.push_back(d1); matthiasm@4: mail@41: ParameterDescriptor d2; mail@41: d2.identifier = "whitening"; mail@41: d2.name = "spectral whitening"; mail@41: d2.description = "Spectral whitening: no whitening - 0; whitening - 1."; mail@41: d2.unit = ""; mail@41: d2.isQuantized = true; mail@41: d2.minValue = 0.0; mail@41: d2.maxValue = 1.0; mail@41: d2.defaultValue = 1.0; mail@41: d2.isQuantized = false; mail@41: list.push_back(d2); mail@41: mail@41: ParameterDescriptor d3; mail@41: d3.identifier = "s"; mail@41: d3.name = "spectral shape"; mail@41: d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics."; mail@41: d3.unit = ""; mail@41: d3.minValue = 0.5; mail@41: d3.maxValue = 0.9; mail@41: d3.defaultValue = 0.7; mail@41: d3.isQuantized = false; mail@41: list.push_back(d3); mail@41: Chris@23: ParameterDescriptor d4; matthiasm@12: d4.identifier = "chromanormalize"; matthiasm@12: d4.name = "chroma normalization"; matthiasm@12: d4.description = "How shall the chroma vector be normalized?"; matthiasm@12: d4.unit = ""; matthiasm@12: d4.minValue = 0; matthiasm@13: d4.maxValue = 3; matthiasm@12: d4.defaultValue = 0; matthiasm@12: d4.isQuantized = true; matthiasm@13: d4.valueNames.push_back("none"); matthiasm@13: d4.valueNames.push_back("maximum norm"); Chris@23: d4.valueNames.push_back("L1 norm"); Chris@23: d4.valueNames.push_back("L2 norm"); matthiasm@12: d4.quantizeStep = 1.0; matthiasm@12: list.push_back(d4); matthiasm@4: matthiasm@0: return list; matthiasm@0: } matthiasm@0: matthiasm@0: float Chris@35: NNLSBase::getParameter(string identifier) const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getParameter" << endl; matthiasm@42: if (identifier == "useNNLS") { matthiasm@42: return m_useNNLS; matthiasm@0: } matthiasm@0: mail@41: if (identifier == "whitening") { mail@41: return m_whitening; mail@41: } mail@41: mail@41: if (identifier == "s") { mail@41: return m_s; matthiasm@0: } matthiasm@17: Chris@23: if (identifier == "rollon") { matthiasm@17: return m_rollon; matthiasm@17: } matthiasm@0: mail@89: if (identifier == "boostn") { mail@89: return m_boostN; mail@89: } mail@89: matthiasm@0: if (identifier == "tuningmode") { matthiasm@0: if (m_tuneLocal) { matthiasm@0: return 1.0; matthiasm@0: } else { matthiasm@0: return 0.0; matthiasm@0: } matthiasm@0: } Chris@23: if (identifier == "preset") { Chris@23: return m_preset; matthiasm@3: } Chris@23: if (identifier == "chromanormalize") { Chris@23: return m_doNormalizeChroma; matthiasm@12: } matthiasm@50: mail@112: if (identifier == "usehartesyntax") { mail@115: return m_harte_syntax; mail@112: } mail@112: matthiasm@0: return 0; matthiasm@0: matthiasm@0: } matthiasm@0: matthiasm@0: void Chris@35: NNLSBase::setParameter(string identifier, float value) matthiasm@0: { Chris@164: // cerr << "setParameter (" << identifier << ") -> " << value << endl; Chris@164: Chris@23: if (debug_on) cerr << "--> setParameter" << endl; matthiasm@42: if (identifier == "useNNLS") { matthiasm@42: m_useNNLS = (int) value; matthiasm@0: } matthiasm@0: mail@41: if (identifier == "whitening") { mail@41: m_whitening = value; matthiasm@0: } matthiasm@0: mail@41: if (identifier == "s") { mail@41: m_s = value; mail@41: } mail@41: mail@89: if (identifier == "boostn") { mail@89: m_boostN = value; mail@89: } mail@89: matthiasm@0: if (identifier == "tuningmode") { mail@60: // m_tuneLocal = (value > 0) ? true : false; mail@60: m_tuneLocal = value; matthiasm@0: // cerr << "m_tuneLocal :" << m_tuneLocal << endl; matthiasm@0: } matthiasm@42: // if (identifier == "preset") { matthiasm@42: // m_preset = value; matthiasm@42: // if (m_preset == 0.0) { matthiasm@42: // m_tuneLocal = false; matthiasm@42: // m_whitening = 1.0; matthiasm@42: // m_dictID = 0.0; matthiasm@42: // } matthiasm@42: // if (m_preset == 1.0) { matthiasm@42: // m_tuneLocal = false; matthiasm@42: // m_whitening = 1.0; matthiasm@42: // m_dictID = 1.0; matthiasm@42: // } matthiasm@42: // if (m_preset == 2.0) { matthiasm@42: // m_tuneLocal = false; matthiasm@42: // m_whitening = 0.7; matthiasm@42: // m_dictID = 0.0; matthiasm@42: // } matthiasm@42: // } Chris@23: if (identifier == "chromanormalize") { Chris@23: m_doNormalizeChroma = value; Chris@23: } matthiasm@17: Chris@23: if (identifier == "rollon") { Chris@23: m_rollon = value; Chris@23: } mail@112: mail@112: if (identifier == "usehartesyntax") { mail@115: m_harte_syntax = value; mail@112: } matthiasm@0: } matthiasm@0: Chris@35: NNLSBase::ProgramList Chris@35: NNLSBase::getPrograms() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getPrograms" << endl; matthiasm@0: ProgramList list; matthiasm@0: matthiasm@0: // If you have no programs, return an empty list (or simply don't matthiasm@0: // implement this function or getCurrentProgram/selectProgram) matthiasm@0: matthiasm@0: return list; matthiasm@0: } matthiasm@0: matthiasm@0: string Chris@35: NNLSBase::getCurrentProgram() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getCurrentProgram" << endl; matthiasm@0: return ""; // no programs matthiasm@0: } matthiasm@0: matthiasm@0: void Chris@35: NNLSBase::selectProgram(string name) matthiasm@0: { Chris@23: if (debug_on) cerr << "--> selectProgram" << endl; matthiasm@0: } matthiasm@0: matthiasm@0: matthiasm@0: bool Chris@35: NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize) matthiasm@0: { Chris@23: if (debug_on) { Chris@23: cerr << "--> initialise"; Chris@23: } matthiasm@1: mail@100: dictionaryMatrix(m_dict, m_s); mail@100: mail@80: // make things for tuning estimation mail@80: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { mail@80: sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS))); mail@80: cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS))); mail@80: } mail@80: mail@80: mail@80: // make hamming window of length 1/2 octave mail@76: int hamwinlength = nBPS * 6 + 1; mail@76: float hamwinsum = 0; mail@76: for (int i = 0; i < hamwinlength; ++i) { mail@76: hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1))); mail@76: hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)); mail@76: } mail@77: for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum; mail@80: mail@80: mail@80: // initialise the tuning mail@80: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { mail@80: m_meanTunings.push_back(0); mail@80: m_localTunings.push_back(0); mail@80: } mail@76: matthiasm@0: if (channels < getMinChannelCount() || matthiasm@0: channels > getMaxChannelCount()) return false; matthiasm@0: m_blockSize = blockSize; matthiasm@0: m_stepSize = stepSize; Chris@35: m_frameCount = 0; mail@77: int tempn = nNote * m_blockSize/2; Chris@23: // cerr << "length of tempkernel : " << tempn << endl; Chris@23: float *tempkernel; matthiasm@1: Chris@23: tempkernel = new float[tempn]; matthiasm@1: Chris@23: logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel); Chris@23: m_kernelValue.clear(); Chris@23: m_kernelFftIndex.clear(); Chris@23: m_kernelNoteIndex.clear(); Chris@23: int countNonzero = 0; Chris@91: for (int iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix matthiasm@122: for (int iFFT = 0; iFFT < static_cast(blockSize/2); ++iFFT) { Chris@23: if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { Chris@23: m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]); Chris@23: if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { Chris@23: countNonzero++; Chris@23: } Chris@23: m_kernelFftIndex.push_back(iFFT); Chris@23: m_kernelNoteIndex.push_back(iNote); Chris@23: } Chris@23: } Chris@23: } Chris@23: // cerr << "nonzero count : " << countNonzero << endl; Chris@23: delete [] tempkernel; Chris@35: /* Chris@23: ofstream myfile; Chris@23: myfile.open ("matrix.txt"); matthiasm@3: // myfile << "Writing this to a file.\n"; Chris@23: for (int i = 0; i < nNote * 84; ++i) { Chris@23: myfile << m_dict[i] << endl; Chris@23: } matthiasm@3: myfile.close(); Chris@35: */ matthiasm@0: return true; matthiasm@0: } matthiasm@0: matthiasm@0: void Chris@35: NNLSBase::reset() matthiasm@0: { Chris@23: if (debug_on) cerr << "--> reset"; matthiasm@4: matthiasm@0: // Clear buffers, reset stored values, etc Chris@35: m_frameCount = 0; matthiasm@42: // m_dictID = 0; Chris@35: m_logSpectrum.clear(); mail@80: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { mail@80: m_meanTunings[iBPS] = 0; mail@80: m_localTunings[iBPS] = 0; mail@80: } Chris@23: m_localTuning.clear(); matthiasm@0: } matthiasm@0: Chris@35: void Chris@35: NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp) matthiasm@0: { Chris@35: m_frameCount++; Chris@23: float *magnitude = new float[m_blockSize/2]; matthiasm@0: Chris@23: const float *fbuf = inputBuffers[0]; Chris@23: float energysum = 0; Chris@23: // make magnitude Chris@23: float maxmag = -10000; matthiasm@122: for (int iBin = 0; iBin < static_cast(m_blockSize/2); iBin++) { Chris@23: magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] + matthiasm@93: fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]); matthiasm@95: if (magnitude[iBin]>m_blockSize*1.0) magnitude[iBin] = m_blockSize; // a valid audio signal (between -1 and 1) should not be limited here. Chris@23: if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin]; Chris@23: if (m_rollon > 0) { Chris@23: energysum += pow(magnitude[iBin],2); Chris@23: } Chris@23: } matthiasm@14: Chris@23: float cumenergy = 0; Chris@23: if (m_rollon > 0) { matthiasm@122: for (int iBin = 2; iBin < static_cast(m_blockSize/2); iBin++) { Chris@23: cumenergy += pow(magnitude[iBin],2); matthiasm@59: if (cumenergy < energysum * m_rollon / 100) magnitude[iBin-2] = 0; Chris@23: else break; Chris@23: } Chris@23: } matthiasm@17: matthiasm@147: if (maxmag < m_blockSize * 2.0 / 16384.0) { // this is not quite right, I think Chris@23: // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl; matthiasm@122: for (int iBin = 0; iBin < static_cast(m_blockSize/2); iBin++) { Chris@23: magnitude[iBin] = 0; Chris@23: } Chris@23: } matthiasm@4: Chris@23: // note magnitude mapping using pre-calculated matrix Chris@23: float *nm = new float[nNote]; // note magnitude Chris@91: for (int iNote = 0; iNote < nNote; iNote++) { Chris@23: nm[iNote] = 0; // initialise as 0 Chris@23: } Chris@23: int binCount = 0; Chris@23: for (vector::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) { Chris@23: // cerr << "."; Chris@23: nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount]; Chris@23: // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl; Chris@23: binCount++; Chris@23: } Chris@23: // cerr << nm[20]; Chris@23: // cerr << endl; matthiasm@0: matthiasm@0: Chris@35: float one_over_N = 1.0/m_frameCount; matthiasm@0: // update means of complex tuning variables mail@80: for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] *= float(m_frameCount-1)*one_over_N; mail@80: mail@80: for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) { mail@80: for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N; Chris@23: float ratioOld = 0.997; mail@80: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { mail@80: m_localTunings[iBPS] *= ratioOld; mail@80: m_localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld); mail@80: } matthiasm@0: } matthiasm@0: // if (m_tuneLocal) { Chris@23: // local tuning mail@80: // float localTuningImag = sinvalue * m_localTunings[1] - sinvalue * m_localTunings[2]; mail@80: // float localTuningReal = m_localTunings[0] + cosvalue * m_localTunings[1] + cosvalue * m_localTunings[2]; mail@80: mail@80: float localTuningImag = 0; mail@80: float localTuningReal = 0; mail@80: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { mail@80: localTuningReal += m_localTunings[iBPS] * cosvalues[iBPS]; mail@80: localTuningImag += m_localTunings[iBPS] * sinvalues[iBPS]; mail@80: } mail@80: Chris@23: float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI); Chris@23: m_localTuning.push_back(normalisedtuning); matthiasm@0: Chris@23: Feature f1; // logfreqspec Chris@23: f1.hasTimestamp = true; matthiasm@0: f1.timestamp = timestamp; Chris@91: for (int iNote = 0; iNote < nNote; iNote++) { Chris@23: f1.values.push_back(nm[iNote]); Chris@23: } matthiasm@0: matthiasm@0: // deletes matthiasm@0: delete[] magnitude; matthiasm@0: delete[] nm; matthiasm@0: Chris@35: m_logSpectrum.push_back(f1); // remember note magnitude matthiasm@0: } matthiasm@0: