Chris@23: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ matthiasm@0: Chris@35: /* Chris@35: NNLS-Chroma / Chordino Chris@35: Chris@35: Audio feature extraction plugins for chromagram and chord Chris@35: estimation. Chris@35: Chris@35: Centre for Digital Music, Queen Mary University of London. Chris@35: This file copyright 2008-2010 Matthias Mauch and QMUL. Chris@35: Chris@35: This program is free software; you can redistribute it and/or Chris@35: modify it under the terms of the GNU General Public License as Chris@35: published by the Free Software Foundation; either version 2 of the Chris@35: License, or (at your option) any later version. See the file Chris@35: COPYING included with this distribution for more information. Chris@35: */ Chris@35: Chris@35: #include "NNLSBase.h" Chris@27: Chris@27: #include "chromamethods.h" Chris@27: Chris@27: #include Chris@27: #include matthiasm@0: #include matthiasm@9: Chris@27: #include matthiasm@0: matthiasm@0: const bool debug_on = false; matthiasm@0: Chris@35: NNLSBase::NNLSBase(float inputSampleRate) : Chris@23: Plugin(inputSampleRate), Chris@35: m_logSpectrum(0), Chris@23: m_blockSize(0), Chris@23: m_stepSize(0), Chris@23: m_lengthOfNoteIndex(0), mail@80: m_meanTunings(0), mail@80: m_localTunings(0), mail@41: m_whitening(1.0), Chris@23: m_preset(0.0), Chris@23: m_localTuning(0), Chris@23: m_kernelValue(0), Chris@23: m_kernelFftIndex(0), Chris@23: m_kernelNoteIndex(0), Chris@23: m_dict(0), mail@60: m_tuneLocal(0), Chris@23: m_chorddict(0), Chris@23: m_chordnames(0), Chris@23: m_doNormalizeChroma(0), mail@60: m_rollon(0), matthiasm@42: m_s(0.7), matthiasm@50: m_useNNLS(1), mail@80: m_useHMM(1), mail@80: sinvalues(0), mail@80: cosvalues(0) matthiasm@0: { Chris@35: if (debug_on) cerr << "--> NNLSBase" << endl; matthiasm@7: Chris@23: // make the *note* dictionary matrix Chris@23: m_dict = new float[nNote * 84]; Chris@23: for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0; mail@41: dictionaryMatrix(m_dict, 0.7); matthiasm@7: Chris@23: // get the *chord* dictionary from file (if the file exists) Chris@23: m_chordnames = chordDictionary(&m_chorddict); matthiasm@0: } matthiasm@0: matthiasm@0: Chris@35: NNLSBase::~NNLSBase() matthiasm@0: { Chris@35: if (debug_on) cerr << "--> ~NNLSBase" << endl; Chris@23: delete [] m_dict; matthiasm@0: } matthiasm@0: matthiasm@0: string Chris@35: NNLSBase::getMaker() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getMaker" << endl; matthiasm@0: // Your name here matthiasm@0: return "Matthias Mauch"; matthiasm@0: } matthiasm@0: matthiasm@0: int Chris@35: NNLSBase::getPluginVersion() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getPluginVersion" << endl; matthiasm@0: // Increment this each time you release a version that behaves matthiasm@0: // differently from the previous one matthiasm@0: return 1; matthiasm@0: } matthiasm@0: matthiasm@0: string Chris@35: NNLSBase::getCopyright() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getCopyright" << endl; matthiasm@0: // This function is not ideally named. It does not necessarily matthiasm@0: // need to say who made the plugin -- getMaker does that -- but it matthiasm@0: // should indicate the terms under which it is distributed. For matthiasm@0: // example, "Copyright (year). All Rights Reserved", or "GPL" Chris@35: return "GPL"; matthiasm@0: } matthiasm@0: Chris@35: NNLSBase::InputDomain Chris@35: NNLSBase::getInputDomain() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getInputDomain" << endl; matthiasm@0: return FrequencyDomain; matthiasm@0: } matthiasm@0: matthiasm@0: size_t Chris@35: NNLSBase::getPreferredBlockSize() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getPreferredBlockSize" << endl; matthiasm@0: return 16384; // 0 means "I can handle any block size" matthiasm@0: } matthiasm@0: matthiasm@0: size_t Chris@35: NNLSBase::getPreferredStepSize() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getPreferredStepSize" << endl; matthiasm@0: return 2048; // 0 means "anything sensible"; in practice this Chris@23: // means the same as the block size for TimeDomain Chris@23: // plugins, or half of it for FrequencyDomain plugins matthiasm@0: } matthiasm@0: matthiasm@0: size_t Chris@35: NNLSBase::getMinChannelCount() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getMinChannelCount" << endl; matthiasm@0: return 1; matthiasm@0: } matthiasm@0: matthiasm@0: size_t Chris@35: NNLSBase::getMaxChannelCount() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getMaxChannelCount" << endl; matthiasm@0: return 1; matthiasm@0: } matthiasm@0: Chris@35: NNLSBase::ParameterList Chris@35: NNLSBase::getParameterDescriptors() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getParameterDescriptors" << endl; matthiasm@0: ParameterList list; matthiasm@0: matthiasm@42: ParameterDescriptor d; matthiasm@42: d.identifier = "useNNLS"; matthiasm@42: d.name = "use approximate transcription (NNLS)"; matthiasm@42: d.description = "Toggles approximate transcription (NNLS)."; matthiasm@42: d.unit = ""; matthiasm@42: d.minValue = 0.0; matthiasm@42: d.maxValue = 1.0; matthiasm@42: d.defaultValue = 1.0; matthiasm@42: d.isQuantized = true; matthiasm@42: d.quantizeStep = 1.0; matthiasm@42: list.push_back(d); matthiasm@42: mail@41: ParameterDescriptor d0; mail@41: d0.identifier = "rollon"; mail@41: d0.name = "spectral roll-on"; matthiasm@58: d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed."; matthiasm@59: d0.unit = "%"; mail@41: d0.minValue = 0; matthiasm@59: d0.maxValue = 5; mail@41: d0.defaultValue = 0; matthiasm@48: d0.isQuantized = true; matthiasm@59: d0.quantizeStep = 0.5; mail@41: list.push_back(d0); matthiasm@4: matthiasm@4: ParameterDescriptor d1; matthiasm@4: d1.identifier = "tuningmode"; matthiasm@4: d1.name = "tuning mode"; matthiasm@4: d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing."; matthiasm@4: d1.unit = ""; matthiasm@4: d1.minValue = 0; matthiasm@4: d1.maxValue = 1; matthiasm@4: d1.defaultValue = 0; matthiasm@4: d1.isQuantized = true; matthiasm@4: d1.valueNames.push_back("global tuning"); matthiasm@4: d1.valueNames.push_back("local tuning"); matthiasm@4: d1.quantizeStep = 1.0; matthiasm@4: list.push_back(d1); matthiasm@4: mail@41: ParameterDescriptor d2; mail@41: d2.identifier = "whitening"; mail@41: d2.name = "spectral whitening"; mail@41: d2.description = "Spectral whitening: no whitening - 0; whitening - 1."; mail@41: d2.unit = ""; mail@41: d2.isQuantized = true; mail@41: d2.minValue = 0.0; mail@41: d2.maxValue = 1.0; mail@41: d2.defaultValue = 1.0; mail@41: d2.isQuantized = false; mail@41: list.push_back(d2); mail@41: mail@41: ParameterDescriptor d3; mail@41: d3.identifier = "s"; mail@41: d3.name = "spectral shape"; mail@41: d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics."; mail@41: d3.unit = ""; mail@41: d3.minValue = 0.5; mail@41: d3.maxValue = 0.9; mail@41: d3.defaultValue = 0.7; mail@41: d3.isQuantized = false; mail@41: list.push_back(d3); mail@41: Chris@23: ParameterDescriptor d4; matthiasm@12: d4.identifier = "chromanormalize"; matthiasm@12: d4.name = "chroma normalization"; matthiasm@12: d4.description = "How shall the chroma vector be normalized?"; matthiasm@12: d4.unit = ""; matthiasm@12: d4.minValue = 0; matthiasm@13: d4.maxValue = 3; matthiasm@12: d4.defaultValue = 0; matthiasm@12: d4.isQuantized = true; matthiasm@13: d4.valueNames.push_back("none"); matthiasm@13: d4.valueNames.push_back("maximum norm"); Chris@23: d4.valueNames.push_back("L1 norm"); Chris@23: d4.valueNames.push_back("L2 norm"); matthiasm@12: d4.quantizeStep = 1.0; matthiasm@12: list.push_back(d4); matthiasm@4: matthiasm@0: return list; matthiasm@0: } matthiasm@0: matthiasm@0: float Chris@35: NNLSBase::getParameter(string identifier) const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getParameter" << endl; matthiasm@42: if (identifier == "useNNLS") { matthiasm@42: return m_useNNLS; matthiasm@0: } matthiasm@0: mail@41: if (identifier == "whitening") { mail@41: return m_whitening; mail@41: } mail@41: mail@41: if (identifier == "s") { mail@41: return m_s; matthiasm@0: } matthiasm@17: Chris@23: if (identifier == "rollon") { matthiasm@17: return m_rollon; matthiasm@17: } matthiasm@0: matthiasm@0: if (identifier == "tuningmode") { matthiasm@0: if (m_tuneLocal) { matthiasm@0: return 1.0; matthiasm@0: } else { matthiasm@0: return 0.0; matthiasm@0: } matthiasm@0: } Chris@23: if (identifier == "preset") { Chris@23: return m_preset; matthiasm@3: } Chris@23: if (identifier == "chromanormalize") { Chris@23: return m_doNormalizeChroma; matthiasm@12: } matthiasm@50: matthiasm@50: if (identifier == "useHMM") { matthiasm@50: return m_useHMM; matthiasm@50: } matthiasm@50: matthiasm@0: return 0; matthiasm@0: matthiasm@0: } matthiasm@0: matthiasm@0: void Chris@35: NNLSBase::setParameter(string identifier, float value) matthiasm@0: { Chris@23: if (debug_on) cerr << "--> setParameter" << endl; matthiasm@42: if (identifier == "useNNLS") { matthiasm@42: m_useNNLS = (int) value; matthiasm@0: } matthiasm@0: mail@41: if (identifier == "whitening") { mail@41: m_whitening = value; matthiasm@0: } matthiasm@0: mail@41: if (identifier == "s") { mail@41: m_s = value; mail@41: } mail@41: matthiasm@50: if (identifier == "useHMM") { matthiasm@50: m_useHMM = value; matthiasm@50: } matthiasm@50: matthiasm@0: if (identifier == "tuningmode") { mail@60: // m_tuneLocal = (value > 0) ? true : false; mail@60: m_tuneLocal = value; matthiasm@0: // cerr << "m_tuneLocal :" << m_tuneLocal << endl; matthiasm@0: } matthiasm@42: // if (identifier == "preset") { matthiasm@42: // m_preset = value; matthiasm@42: // if (m_preset == 0.0) { matthiasm@42: // m_tuneLocal = false; matthiasm@42: // m_whitening = 1.0; matthiasm@42: // m_dictID = 0.0; matthiasm@42: // } matthiasm@42: // if (m_preset == 1.0) { matthiasm@42: // m_tuneLocal = false; matthiasm@42: // m_whitening = 1.0; matthiasm@42: // m_dictID = 1.0; matthiasm@42: // } matthiasm@42: // if (m_preset == 2.0) { matthiasm@42: // m_tuneLocal = false; matthiasm@42: // m_whitening = 0.7; matthiasm@42: // m_dictID = 0.0; matthiasm@42: // } matthiasm@42: // } Chris@23: if (identifier == "chromanormalize") { Chris@23: m_doNormalizeChroma = value; Chris@23: } matthiasm@17: Chris@23: if (identifier == "rollon") { Chris@23: m_rollon = value; Chris@23: } matthiasm@0: } matthiasm@0: Chris@35: NNLSBase::ProgramList Chris@35: NNLSBase::getPrograms() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getPrograms" << endl; matthiasm@0: ProgramList list; matthiasm@0: matthiasm@0: // If you have no programs, return an empty list (or simply don't matthiasm@0: // implement this function or getCurrentProgram/selectProgram) matthiasm@0: matthiasm@0: return list; matthiasm@0: } matthiasm@0: matthiasm@0: string Chris@35: NNLSBase::getCurrentProgram() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getCurrentProgram" << endl; matthiasm@0: return ""; // no programs matthiasm@0: } matthiasm@0: matthiasm@0: void Chris@35: NNLSBase::selectProgram(string name) matthiasm@0: { Chris@23: if (debug_on) cerr << "--> selectProgram" << endl; matthiasm@0: } matthiasm@0: matthiasm@0: matthiasm@0: bool Chris@35: NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize) matthiasm@0: { Chris@23: if (debug_on) { Chris@23: cerr << "--> initialise"; Chris@23: } matthiasm@1: mail@80: // make things for tuning estimation mail@80: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { mail@80: sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS))); mail@80: cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS))); mail@80: } mail@80: mail@80: mail@80: // make hamming window of length 1/2 octave mail@76: int hamwinlength = nBPS * 6 + 1; mail@76: float hamwinsum = 0; mail@76: for (int i = 0; i < hamwinlength; ++i) { mail@76: hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1))); mail@76: hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)); mail@76: } mail@77: for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum; mail@80: mail@80: mail@80: // initialise the tuning mail@80: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { mail@80: m_meanTunings.push_back(0); mail@80: m_localTunings.push_back(0); mail@80: } mail@76: matthiasm@0: if (channels < getMinChannelCount() || matthiasm@0: channels > getMaxChannelCount()) return false; matthiasm@0: m_blockSize = blockSize; matthiasm@0: m_stepSize = stepSize; Chris@35: m_frameCount = 0; mail@77: int tempn = nNote * m_blockSize/2; Chris@23: // cerr << "length of tempkernel : " << tempn << endl; Chris@23: float *tempkernel; matthiasm@1: Chris@23: tempkernel = new float[tempn]; matthiasm@1: Chris@23: logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel); Chris@23: m_kernelValue.clear(); Chris@23: m_kernelFftIndex.clear(); Chris@23: m_kernelNoteIndex.clear(); Chris@23: int countNonzero = 0; Chris@23: for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix Chris@23: for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) { Chris@23: if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { Chris@23: m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]); Chris@23: if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { Chris@23: countNonzero++; Chris@23: } Chris@23: m_kernelFftIndex.push_back(iFFT); Chris@23: m_kernelNoteIndex.push_back(iNote); Chris@23: } Chris@23: } Chris@23: } Chris@23: // cerr << "nonzero count : " << countNonzero << endl; Chris@23: delete [] tempkernel; Chris@35: /* Chris@23: ofstream myfile; Chris@23: myfile.open ("matrix.txt"); matthiasm@3: // myfile << "Writing this to a file.\n"; Chris@23: for (int i = 0; i < nNote * 84; ++i) { Chris@23: myfile << m_dict[i] << endl; Chris@23: } matthiasm@3: myfile.close(); Chris@35: */ matthiasm@0: return true; matthiasm@0: } matthiasm@0: matthiasm@0: void Chris@35: NNLSBase::reset() matthiasm@0: { Chris@23: if (debug_on) cerr << "--> reset"; matthiasm@4: matthiasm@0: // Clear buffers, reset stored values, etc Chris@35: m_frameCount = 0; matthiasm@42: // m_dictID = 0; Chris@35: m_logSpectrum.clear(); mail@80: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { mail@80: m_meanTunings[iBPS] = 0; mail@80: m_localTunings[iBPS] = 0; mail@80: } Chris@23: m_localTuning.clear(); matthiasm@0: } matthiasm@0: Chris@35: void Chris@35: NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp) matthiasm@0: { Chris@35: m_frameCount++; Chris@23: float *magnitude = new float[m_blockSize/2]; matthiasm@0: Chris@23: const float *fbuf = inputBuffers[0]; Chris@23: float energysum = 0; Chris@23: // make magnitude Chris@23: float maxmag = -10000; Chris@23: for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { Chris@23: magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] + Chris@23: fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]); Chris@23: if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin]; Chris@23: if (m_rollon > 0) { Chris@23: energysum += pow(magnitude[iBin],2); Chris@23: } Chris@23: } matthiasm@14: Chris@23: float cumenergy = 0; Chris@23: if (m_rollon > 0) { Chris@23: for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) { Chris@23: cumenergy += pow(magnitude[iBin],2); matthiasm@59: if (cumenergy < energysum * m_rollon / 100) magnitude[iBin-2] = 0; Chris@23: else break; Chris@23: } Chris@23: } matthiasm@17: Chris@23: if (maxmag < 2) { Chris@23: // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl; Chris@23: for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { Chris@23: magnitude[iBin] = 0; Chris@23: } Chris@23: } matthiasm@4: Chris@23: // note magnitude mapping using pre-calculated matrix Chris@23: float *nm = new float[nNote]; // note magnitude Chris@23: for (size_t iNote = 0; iNote < nNote; iNote++) { Chris@23: nm[iNote] = 0; // initialise as 0 Chris@23: } Chris@23: int binCount = 0; Chris@23: for (vector::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) { Chris@23: // cerr << "."; Chris@23: nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount]; Chris@23: // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl; Chris@23: binCount++; Chris@23: } Chris@23: // cerr << nm[20]; Chris@23: // cerr << endl; matthiasm@0: matthiasm@0: Chris@35: float one_over_N = 1.0/m_frameCount; matthiasm@0: // update means of complex tuning variables mail@80: for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] *= float(m_frameCount-1)*one_over_N; mail@80: mail@80: for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) { mail@80: for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N; Chris@23: float ratioOld = 0.997; mail@80: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { mail@80: m_localTunings[iBPS] *= ratioOld; mail@80: m_localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld); mail@80: } matthiasm@0: } matthiasm@0: // if (m_tuneLocal) { Chris@23: // local tuning mail@80: // float localTuningImag = sinvalue * m_localTunings[1] - sinvalue * m_localTunings[2]; mail@80: // float localTuningReal = m_localTunings[0] + cosvalue * m_localTunings[1] + cosvalue * m_localTunings[2]; mail@80: mail@80: float localTuningImag = 0; mail@80: float localTuningReal = 0; mail@80: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { mail@80: localTuningReal += m_localTunings[iBPS] * cosvalues[iBPS]; mail@80: localTuningImag += m_localTunings[iBPS] * sinvalues[iBPS]; mail@80: } mail@80: Chris@23: float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI); Chris@23: m_localTuning.push_back(normalisedtuning); matthiasm@0: Chris@23: Feature f1; // logfreqspec Chris@23: f1.hasTimestamp = true; matthiasm@0: f1.timestamp = timestamp; Chris@23: for (size_t iNote = 0; iNote < nNote; iNote++) { Chris@23: f1.values.push_back(nm[iNote]); Chris@23: } matthiasm@0: matthiasm@0: // deletes matthiasm@0: delete[] magnitude; matthiasm@0: delete[] nm; matthiasm@0: Chris@35: m_logSpectrum.push_back(f1); // remember note magnitude matthiasm@0: } matthiasm@0: Chris@35: Chris@35: #ifdef NOT_DEFINED Chris@35: Chris@35: NNLSBase::FeatureSet Chris@35: NNLSBase::getRemainingFeatures() matthiasm@0: { mail@81: // if (debug_on) cerr << "--> getRemainingFeatures" << endl; mail@81: FeatureSet fsOut; mail@81: // if (m_logSpectrum.size() == 0) return fsOut; mail@81: // int nChord = m_chordnames.size(); mail@81: // // mail@81: // /** Calculate Tuning mail@81: // calculate tuning from (using the angle of the complex number defined by the mail@81: // cumulative mean real and imag values) mail@81: // **/ mail@81: // float meanTuningImag = sinvalue * m_meanTunings[1] - sinvalue * m_meanTunings[2]; mail@81: // float meanTuningReal = m_meanTunings[0] + cosvalue * m_meanTunings[1] + cosvalue * m_meanTunings[2]; mail@81: // float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); mail@81: // float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); mail@81: // int intShift = floor(normalisedtuning * 3); mail@81: // float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this mail@81: // mail@81: // char buffer0 [50]; mail@81: // mail@81: // sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); mail@81: // mail@81: // // cerr << "normalisedtuning: " << normalisedtuning << '\n'; mail@81: // mail@81: // // push tuning to FeatureSet fsOut mail@81: // Feature f0; // tuning mail@81: // f0.hasTimestamp = true; mail@81: // f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));; mail@81: // f0.label = buffer0; mail@81: // fsOut[0].push_back(f0); mail@81: // mail@81: // /** Tune Log-Frequency Spectrogram mail@81: // calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to mail@81: // perform linear interpolation on the existing log-frequency spectrogram (kinda f1). mail@81: // **/ mail@81: // cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... "; mail@81: // mail@81: // float tempValue = 0; mail@81: // float dbThreshold = 0; // relative to the background spectrum mail@81: // float thresh = pow(10,dbThreshold/20); mail@81: // // cerr << "tune local ? " << m_tuneLocal << endl; mail@81: // int count = 0; mail@81: // mail@81: // for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) { mail@81: // Feature f1 = *i; mail@81: // Feature f2; // tuned log-frequency spectrum mail@81: // f2.hasTimestamp = true; mail@81: // f2.timestamp = f1.timestamp; mail@81: // f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero mail@81: // mail@81: // if (m_tuneLocal == 1.0) { mail@81: // intShift = floor(m_localTuning[count] * 3); mail@81: // floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this mail@81: // } mail@81: // mail@81: // // cerr << intShift << " " << floatShift << endl; mail@81: // mail@81: // for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins mail@81: // tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift; mail@81: // f2.values.push_back(tempValue); mail@81: // } mail@81: // mail@81: // f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge mail@81: // vector runningmean = SpecialConvolution(f2.values,hw); mail@81: // vector runningstd; mail@81: // for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance) mail@81: // runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i])); mail@81: // } mail@81: // runningstd = SpecialConvolution(runningstd,hw); // second step convolve mail@81: // for (int i = 0; i < nNote; i++) { mail@81: // runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std mail@81: // if (runningstd[i] > 0) { mail@81: // // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ? mail@81: // // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0; mail@81: // f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ? mail@81: // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0; mail@81: // } mail@81: // if (f2.values[i] < 0) { mail@81: // cerr << "ERROR: negative value in logfreq spectrum" << endl; mail@81: // } mail@81: // } mail@81: // fsOut[2].push_back(f2); mail@81: // count++; mail@81: // } mail@81: // cerr << "done." << endl; mail@81: // mail@81: // /** Semitone spectrum and chromagrams mail@81: // Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum mail@81: // is inferred using a non-negative least squares algorithm. mail@81: // Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means mail@81: // bass and treble stacked onto each other). mail@81: // **/ mail@81: // if (m_useNNLS == 0) { mail@81: // cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... "; mail@81: // } else { mail@81: // cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... "; mail@81: // } Chris@23: // mail@81: // mail@81: // vector > chordogram; mail@81: // vector > scoreChordogram; mail@81: // vector chordchange = vector(fsOut[2].size(),0); mail@81: // vector oldchroma = vector(12,0); mail@81: // vector oldbasschroma = vector(12,0); mail@81: // count = 0; mail@81: // mail@81: // for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) { mail@81: // Feature f2 = *it; // logfreq spectrum mail@81: // Feature f3; // semitone spectrum mail@81: // Feature f4; // treble chromagram mail@81: // Feature f5; // bass chromagram mail@81: // Feature f6; // treble and bass chromagram mail@81: // mail@81: // f3.hasTimestamp = true; mail@81: // f3.timestamp = f2.timestamp; mail@81: // mail@81: // f4.hasTimestamp = true; mail@81: // f4.timestamp = f2.timestamp; mail@81: // mail@81: // f5.hasTimestamp = true; mail@81: // f5.timestamp = f2.timestamp; mail@81: // mail@81: // f6.hasTimestamp = true; mail@81: // f6.timestamp = f2.timestamp; mail@81: // mail@81: // float b[nNote]; mail@81: // mail@81: // bool some_b_greater_zero = false; mail@81: // float sumb = 0; mail@81: // for (int i = 0; i < nNote; i++) { mail@81: // // b[i] = m_dict[(nNote * count + i) % (nNote * 84)]; mail@81: // b[i] = f2.values[i]; mail@81: // sumb += b[i]; mail@81: // if (b[i] > 0) { mail@81: // some_b_greater_zero = true; mail@81: // } mail@81: // } mail@81: // mail@81: // // here's where the non-negative least squares algorithm calculates the note activation x mail@81: // mail@81: // vector chroma = vector(12, 0); mail@81: // vector basschroma = vector(12, 0); mail@81: // float currval; mail@81: // unsigned iSemitone = 0; mail@81: // mail@81: // if (some_b_greater_zero) { mail@81: // if (m_useNNLS == 0) { mail@81: // for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { mail@81: // currval = 0; mail@81: // currval += b[iNote + 1 + -1] * 0.5; mail@81: // currval += b[iNote + 1 + 0] * 1.0; mail@81: // currval += b[iNote + 1 + 1] * 0.5; mail@81: // f3.values.push_back(currval); mail@81: // chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; mail@81: // basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; mail@81: // iSemitone++; mail@81: // } mail@81: // mail@81: // } else { mail@81: // float x[84+1000]; mail@81: // for (int i = 1; i < 1084; ++i) x[i] = 1.0; mail@81: // vector signifIndex; mail@81: // int index=0; mail@81: // sumb /= 84.0; mail@81: // for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { mail@81: // float currval = 0; mail@81: // currval += b[iNote + 1 + -1]; mail@81: // currval += b[iNote + 1 + 0]; mail@81: // currval += b[iNote + 1 + 1]; mail@81: // if (currval > 0) signifIndex.push_back(index); mail@81: // f3.values.push_back(0); // fill the values, change later mail@81: // index++; mail@81: // } mail@81: // float rnorm; mail@81: // float w[84+1000]; mail@81: // float zz[84+1000]; mail@81: // int indx[84+1000]; mail@81: // int mode; mail@81: // int dictsize = nNote*signifIndex.size(); mail@81: // // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl; mail@81: // float *curr_dict = new float[dictsize]; mail@81: // for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { mail@81: // for (unsigned iBin = 0; iBin < nNote; iBin++) { mail@81: // curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin]; mail@81: // } mail@81: // } mail@81: // nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode); mail@81: // delete [] curr_dict; mail@81: // for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { mail@81: // f3.values[signifIndex[iNote]] = x[iNote]; mail@81: // // cerr << mode << endl; mail@81: // chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; mail@81: // basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; mail@81: // } mail@81: // } mail@81: // } mail@81: // mail@81: // mail@81: // mail@81: // mail@81: // f4.values = chroma; mail@81: // f5.values = basschroma; mail@81: // chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas mail@81: // f6.values = chroma; mail@81: // mail@81: // if (m_doNormalizeChroma > 0) { mail@81: // vector chromanorm = vector(3,0); mail@81: // switch (int(m_doNormalizeChroma)) { mail@81: // case 0: // should never end up here mail@81: // break; mail@81: // case 1: mail@81: // chromanorm[0] = *max_element(f4.values.begin(), f4.values.end()); mail@81: // chromanorm[1] = *max_element(f5.values.begin(), f5.values.end()); mail@81: // chromanorm[2] = max(chromanorm[0], chromanorm[1]); mail@81: // break; mail@81: // case 2: mail@81: // for (vector::iterator it = f4.values.begin(); it != f4.values.end(); ++it) { mail@81: // chromanorm[0] += *it; mail@81: // } mail@81: // for (vector::iterator it = f5.values.begin(); it != f5.values.end(); ++it) { mail@81: // chromanorm[1] += *it; mail@81: // } mail@81: // for (vector::iterator it = f6.values.begin(); it != f6.values.end(); ++it) { mail@81: // chromanorm[2] += *it; mail@81: // } mail@81: // break; mail@81: // case 3: mail@81: // for (vector::iterator it = f4.values.begin(); it != f4.values.end(); ++it) { mail@81: // chromanorm[0] += pow(*it,2); mail@81: // } mail@81: // chromanorm[0] = sqrt(chromanorm[0]); mail@81: // for (vector::iterator it = f5.values.begin(); it != f5.values.end(); ++it) { mail@81: // chromanorm[1] += pow(*it,2); mail@81: // } mail@81: // chromanorm[1] = sqrt(chromanorm[1]); mail@81: // for (vector::iterator it = f6.values.begin(); it != f6.values.end(); ++it) { mail@81: // chromanorm[2] += pow(*it,2); mail@81: // } mail@81: // chromanorm[2] = sqrt(chromanorm[2]); mail@81: // break; mail@81: // } mail@81: // if (chromanorm[0] > 0) { mail@81: // for (int i = 0; i < f4.values.size(); i++) { mail@81: // f4.values[i] /= chromanorm[0]; mail@81: // } mail@81: // } mail@81: // if (chromanorm[1] > 0) { mail@81: // for (int i = 0; i < f5.values.size(); i++) { mail@81: // f5.values[i] /= chromanorm[1]; mail@81: // } mail@81: // } mail@81: // if (chromanorm[2] > 0) { mail@81: // for (int i = 0; i < f6.values.size(); i++) { mail@81: // f6.values[i] /= chromanorm[2]; mail@81: // } mail@81: // } mail@81: // mail@81: // } mail@81: // mail@81: // // local chord estimation mail@81: // vector currentChordSalience; mail@81: // float tempchordvalue = 0; mail@81: // float sumchordvalue = 0; mail@81: // mail@81: // for (int iChord = 0; iChord < nChord; iChord++) { mail@81: // tempchordvalue = 0; mail@81: // for (int iBin = 0; iBin < 12; iBin++) { mail@81: // tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; mail@81: // } mail@81: // for (int iBin = 12; iBin < 24; iBin++) { mail@81: // tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; mail@81: // } mail@81: // sumchordvalue+=tempchordvalue; mail@81: // currentChordSalience.push_back(tempchordvalue); mail@81: // } mail@81: // if (sumchordvalue > 0) { mail@81: // for (int iChord = 0; iChord < nChord; iChord++) { mail@81: // currentChordSalience[iChord] /= sumchordvalue; mail@81: // } mail@81: // } else { mail@81: // currentChordSalience[nChord-1] = 1.0; mail@81: // } mail@81: // chordogram.push_back(currentChordSalience); mail@81: // mail@81: // fsOut[3].push_back(f3); mail@81: // fsOut[4].push_back(f4); mail@81: // fsOut[5].push_back(f5); mail@81: // fsOut[6].push_back(f6); mail@81: // count++; mail@81: // } mail@81: // cerr << "done." << endl; mail@81: // mail@81: // mail@81: // /* Simple chord estimation mail@81: // I just take the local chord estimates ("currentChordSalience") and average them over time, then mail@81: // take the maximum. Very simple, don't do this at home... mail@81: // */ mail@81: // cerr << "[NNLS Chroma Plugin] Chord Estimation ... "; mail@81: // count = 0; mail@81: // int halfwindowlength = m_inputSampleRate / m_stepSize; mail@81: // vector chordSequence; mail@81: // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram mail@81: // vector temp = vector(nChord,0); mail@81: // scoreChordogram.push_back(temp); mail@81: // } mail@81: // for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) { mail@81: // int startIndex = count + 1; mail@81: // int endIndex = count + 2 * halfwindowlength; mail@81: // mail@81: // float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1); mail@81: // mail@81: // vector chordCandidates; mail@81: // for (unsigned iChord = 0; iChord < nChord-1; iChord++) { mail@81: // // float currsum = 0; mail@81: // // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { mail@81: // // currsum += chordogram[iFrame][iChord]; mail@81: // // } mail@81: // // if (currsum > chordThreshold) chordCandidates.push_back(iChord); mail@81: // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { mail@81: // if (chordogram[iFrame][iChord] > chordThreshold) { mail@81: // chordCandidates.push_back(iChord); mail@81: // break; mail@81: // } mail@81: // } mail@81: // } mail@81: // chordCandidates.push_back(nChord-1); mail@81: // // cerr << chordCandidates.size() << endl; mail@81: // mail@81: // float maxval = 0; // will be the value of the most salient *chord change* in this frame mail@81: // float maxindex = 0; //... and the index thereof mail@81: // unsigned bestchordL = nChord-1; // index of the best "left" chord mail@81: // unsigned bestchordR = nChord-1; // index of the best "right" chord mail@81: // mail@81: // for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { mail@81: // // now find the max values on both sides of iWF mail@81: // // left side: mail@81: // float maxL = 0; mail@81: // unsigned maxindL = nChord-1; mail@81: // for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { mail@81: // unsigned iChord = chordCandidates[kChord]; mail@81: // float currsum = 0; mail@81: // for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) { mail@81: // currsum += chordogram[count+iFrame][iChord]; mail@81: // } mail@81: // if (iChord == nChord-1) currsum *= 0.8; mail@81: // if (currsum > maxL) { mail@81: // maxL = currsum; mail@81: // maxindL = iChord; mail@81: // } mail@81: // } mail@81: // // right side: mail@81: // float maxR = 0; mail@81: // unsigned maxindR = nChord-1; mail@81: // for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { mail@81: // unsigned iChord = chordCandidates[kChord]; mail@81: // float currsum = 0; mail@81: // for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) { mail@81: // currsum += chordogram[count+iFrame][iChord]; mail@81: // } mail@81: // if (iChord == nChord-1) currsum *= 0.8; mail@81: // if (currsum > maxR) { mail@81: // maxR = currsum; mail@81: // maxindR = iChord; mail@81: // } mail@81: // } mail@81: // if (maxL+maxR > maxval) { mail@81: // maxval = maxL+maxR; mail@81: // maxindex = iWF; mail@81: // bestchordL = maxindL; mail@81: // bestchordR = maxindR; mail@81: // } mail@81: // mail@81: // } mail@81: // // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl; mail@81: // // add a score to every chord-frame-point that was part of a maximum mail@81: // for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) { mail@81: // scoreChordogram[iFrame+count][bestchordL]++; mail@81: // } mail@81: // for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { mail@81: // scoreChordogram[iFrame+count][bestchordR]++; mail@81: // } mail@81: // if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; mail@81: // count++; mail@81: // } mail@81: // // cerr << "******* agent finished *******" << endl; mail@81: // count = 0; mail@81: // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { mail@81: // float maxval = 0; // will be the value of the most salient chord in this frame mail@81: // float maxindex = 0; //... and the index thereof mail@81: // for (unsigned iChord = 0; iChord < nChord; iChord++) { mail@81: // if (scoreChordogram[count][iChord] > maxval) { mail@81: // maxval = scoreChordogram[count][iChord]; mail@81: // maxindex = iChord; mail@81: // // cerr << iChord << endl; mail@81: // } mail@81: // } mail@81: // chordSequence.push_back(maxindex); mail@81: // // cerr << "before modefilter, maxindex: " << maxindex << endl; mail@81: // count++; mail@81: // } mail@81: // // cerr << "******* mode filter done *******" << endl; mail@81: // mail@81: // mail@81: // // mode filter on chordSequence mail@81: // count = 0; mail@81: // string oldChord = ""; mail@81: // for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { mail@81: // Feature f6 = *it; mail@81: // Feature f7; // chord estimate mail@81: // f7.hasTimestamp = true; mail@81: // f7.timestamp = f6.timestamp; mail@81: // Feature f8; // chord estimate mail@81: // f8.hasTimestamp = true; mail@81: // f8.timestamp = f6.timestamp; mail@81: // mail@81: // vector chordCount = vector(nChord,0); mail@81: // int maxChordCount = 0; mail@81: // int maxChordIndex = nChord-1; mail@81: // string maxChord; mail@81: // int startIndex = max(count - halfwindowlength/2,0); mail@81: // int endIndex = min(int(chordogram.size()), count + halfwindowlength/2); mail@81: // for (int i = startIndex; i < endIndex; i++) { mail@81: // chordCount[chordSequence[i]]++; mail@81: // if (chordCount[chordSequence[i]] > maxChordCount) { mail@81: // // cerr << "start index " << startIndex << endl; mail@81: // maxChordCount++; mail@81: // maxChordIndex = chordSequence[i]; mail@81: // maxChord = m_chordnames[maxChordIndex]; mail@81: // } mail@81: // } mail@81: // // chordSequence[count] = maxChordIndex; mail@81: // // cerr << maxChordIndex << endl; mail@81: // f8.values.push_back(chordchange[count]/(halfwindowlength*2)); mail@81: // // cerr << chordchange[count] << endl; mail@81: // fsOut[9].push_back(f8); mail@81: // if (oldChord != maxChord) { mail@81: // oldChord = maxChord; mail@81: // mail@81: // // char buffer1 [50]; mail@81: // // if (maxChordIndex < nChord - 1) { mail@81: // // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]); mail@81: // // } else { mail@81: // // sprintf(buffer1, "N"); mail@81: // // } mail@81: // // f7.label = buffer1; mail@81: // f7.label = m_chordnames[maxChordIndex]; mail@81: // fsOut[7].push_back(f7); mail@81: // } mail@81: // count++; mail@81: // } mail@81: // Feature f7; // last chord estimate mail@81: // f7.hasTimestamp = true; mail@81: // f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp; mail@81: // f7.label = "N"; mail@81: // fsOut[7].push_back(f7); mail@81: // cerr << "done." << endl; mail@81: // // // musicity mail@81: // // count = 0; mail@81: // // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2 mail@81: // // vector musicityValue; mail@81: // // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { mail@81: // // Feature f4 = *it; mail@81: // // mail@81: // // int startIndex = max(count - musicitykernelwidth/2,0); mail@81: // // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); mail@81: // // float chromasum = 0; mail@81: // // float diffsum = 0; mail@81: // // for (int k = 0; k < 12; k++) { mail@81: // // for (int i = startIndex + 1; i < endIndex; i++) { mail@81: // // chromasum += pow(fsOut[4][i].values[k],2); mail@81: // // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]); mail@81: // // } mail@81: // // } mail@81: // // diffsum /= chromasum; mail@81: // // musicityValue.push_back(diffsum); mail@81: // // count++; mail@81: // // } mail@81: // // mail@81: // // float musicityThreshold = 0.44; mail@81: // // if (m_stepSize == 4096) { mail@81: // // musicityThreshold = 0.74; mail@81: // // } mail@81: // // if (m_stepSize == 4410) { mail@81: // // musicityThreshold = 0.77; mail@81: // // } mail@81: // // mail@81: // // count = 0; mail@81: // // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { mail@81: // // Feature f4 = *it; mail@81: // // Feature f8; // musicity mail@81: // // Feature f9; // musicity segmenter mail@81: // // mail@81: // // f8.hasTimestamp = true; mail@81: // // f8.timestamp = f4.timestamp; mail@81: // // f9.hasTimestamp = true; mail@81: // // f9.timestamp = f4.timestamp; mail@81: // // mail@81: // // int startIndex = max(count - musicitykernelwidth/2,0); mail@81: // // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); mail@81: // // int musicityCount = 0; mail@81: // // for (int i = startIndex; i <= endIndex; i++) { mail@81: // // if (musicityValue[i] > musicityThreshold) musicityCount++; mail@81: // // } mail@81: // // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1); mail@81: // // mail@81: // // if (isSpeech) { mail@81: // // if (oldlabeltype != 2) { mail@81: // // f9.label = "Speech"; mail@81: // // fsOut[9].push_back(f9); mail@81: // // oldlabeltype = 2; mail@81: // // } mail@81: // // } else { mail@81: // // if (oldlabeltype != 1) { mail@81: // // f9.label = "Music"; mail@81: // // fsOut[9].push_back(f9); mail@81: // // oldlabeltype = 1; mail@81: // // } mail@81: // // } mail@81: // // f8.values.push_back(musicityValue[count]); mail@81: // // fsOut[8].push_back(f8); mail@81: // // count++; mail@81: // // } Chris@23: return fsOut; matthiasm@0: matthiasm@0: } matthiasm@0: Chris@35: #endif