Mercurial > hg > nnls-chroma
diff NNLSChroma.cpp @ 35:cf8898a0174c matthiasm-plugin
* Split out NNLSChroma plugin into three plugins (chroma, chordino, tuning) with a common base class.
There's still quite a lot of duplication between the getRemainingFeatures functions.
Also add copyright / copying headers, etc.
author | Chris Cannam |
---|---|
date | Fri, 22 Oct 2010 11:30:21 +0100 |
parents | da3195577172 |
children | 3c261b864e49 |
line wrap: on
line diff
--- a/NNLSChroma.cpp Fri Oct 22 16:19:40 2010 +0900 +++ b/NNLSChroma.cpp Fri Oct 22 11:30:21 2010 +0100 @@ -1,5 +1,21 @@ /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ +/* + NNLS-Chroma / Chordino + + Audio feature extraction plugins for chromagram and chord + estimation. + + Centre for Digital Music, Queen Mary University of London. + This file copyright 2008-2010 Matthias Mauch and QMUL. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + #include "NNLSChroma.h" #include "chromamethods.h" @@ -15,49 +31,14 @@ const vector<float> hw(hammingwind, hammingwind+19); NNLSChroma::NNLSChroma(float inputSampleRate) : - Plugin(inputSampleRate), - m_fl(0), - m_blockSize(0), - m_stepSize(0), - m_lengthOfNoteIndex(0), - m_meanTuning0(0), - m_meanTuning1(0), - m_meanTuning2(0), - m_localTuning0(0), - m_localTuning1(0), - m_localTuning2(0), - m_paling(1.0), - m_preset(0.0), - m_localTuning(0), - m_kernelValue(0), - m_kernelFftIndex(0), - m_kernelNoteIndex(0), - m_dict(0), - m_tuneLocal(false), - m_dictID(0), - m_chorddict(0), - m_chordnames(0), - m_doNormalizeChroma(0), - m_rollon(0.01) + NNLSBase(inputSampleRate) { if (debug_on) cerr << "--> NNLSChroma" << endl; - - // make the *note* dictionary matrix - m_dict = new float[nNote * 84]; - for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0; - dictionaryMatrix(m_dict); - - // get the *chord* dictionary from file (if the file exists) - m_chordnames = chordDictionary(&m_chorddict); } - NNLSChroma::~NNLSChroma() { if (debug_on) cerr << "--> ~NNLSChroma" << endl; - delete [] m_dict; - // delete [] m_chorddict; - // delete m_chordnames; } string @@ -77,270 +58,10 @@ string NNLSChroma::getDescription() const { - // Return something helpful here! if (debug_on) cerr << "--> getDescription" << endl; return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate."; } -string -NNLSChroma::getMaker() const -{ - if (debug_on) cerr << "--> getMaker" << endl; - // Your name here - return "Matthias Mauch"; -} - -int -NNLSChroma::getPluginVersion() const -{ - if (debug_on) cerr << "--> getPluginVersion" << endl; - // Increment this each time you release a version that behaves - // differently from the previous one - return 1; -} - -string -NNLSChroma::getCopyright() const -{ - if (debug_on) cerr << "--> getCopyright" << endl; - // This function is not ideally named. It does not necessarily - // need to say who made the plugin -- getMaker does that -- but it - // should indicate the terms under which it is distributed. For - // example, "Copyright (year). All Rights Reserved", or "GPL" - return "Copyright (2010). All rights reserved."; -} - -NNLSChroma::InputDomain -NNLSChroma::getInputDomain() const -{ - if (debug_on) cerr << "--> getInputDomain" << endl; - return FrequencyDomain; -} - -size_t -NNLSChroma::getPreferredBlockSize() const -{ - if (debug_on) cerr << "--> getPreferredBlockSize" << endl; - return 16384; // 0 means "I can handle any block size" -} - -size_t -NNLSChroma::getPreferredStepSize() const -{ - if (debug_on) cerr << "--> getPreferredStepSize" << endl; - return 2048; // 0 means "anything sensible"; in practice this - // means the same as the block size for TimeDomain - // plugins, or half of it for FrequencyDomain plugins -} - -size_t -NNLSChroma::getMinChannelCount() const -{ - if (debug_on) cerr << "--> getMinChannelCount" << endl; - return 1; -} - -size_t -NNLSChroma::getMaxChannelCount() const -{ - if (debug_on) cerr << "--> getMaxChannelCount" << endl; - return 1; -} - -NNLSChroma::ParameterList -NNLSChroma::getParameterDescriptors() const -{ - if (debug_on) cerr << "--> getParameterDescriptors" << endl; - ParameterList list; - - ParameterDescriptor d3; - d3.identifier = "preset"; - d3.name = "preset"; - d3.description = "Spectral paling: no paling - 0; whitening - 1."; - d3.unit = ""; - d3.isQuantized = true; - d3.quantizeStep = 1; - d3.minValue = 0.0; - d3.maxValue = 3.0; - d3.defaultValue = 0.0; - d3.valueNames.push_back("polyphonic pop"); - d3.valueNames.push_back("polyphonic pop (fast)"); - d3.valueNames.push_back("solo keyboard"); - d3.valueNames.push_back("manual"); - list.push_back(d3); - - ParameterDescriptor d5; - d5.identifier = "rollon"; - d5.name = "spectral roll-on"; - d5.description = "The bins below the spectral roll-on quantile will be set to 0."; - d5.unit = ""; - d5.minValue = 0; - d5.maxValue = 1; - d5.defaultValue = 0; - d5.isQuantized = false; - list.push_back(d5); - - // ParameterDescriptor d0; - // d0.identifier = "notedict"; - // d0.name = "note dictionary"; - // d0.description = "Notes in different note dictionaries differ by their spectral shapes."; - // d0.unit = ""; - // d0.minValue = 0; - // d0.maxValue = 1; - // d0.defaultValue = 0; - // d0.isQuantized = true; - // d0.valueNames.push_back("s = 0.6"); - // d0.valueNames.push_back("no NNLS"); - // d0.quantizeStep = 1.0; - // list.push_back(d0); - - ParameterDescriptor d1; - d1.identifier = "tuningmode"; - d1.name = "tuning mode"; - d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing."; - d1.unit = ""; - d1.minValue = 0; - d1.maxValue = 1; - d1.defaultValue = 0; - d1.isQuantized = true; - d1.valueNames.push_back("global tuning"); - d1.valueNames.push_back("local tuning"); - d1.quantizeStep = 1.0; - list.push_back(d1); - - // ParameterDescriptor d2; - // d2.identifier = "paling"; - // d2.name = "spectral paling"; - // d2.description = "Spectral paling: no paling - 0; whitening - 1."; - // d2.unit = ""; - // d2.isQuantized = true; - // // d2.quantizeStep = 0.1; - // d2.minValue = 0.0; - // d2.maxValue = 1.0; - // d2.defaultValue = 1.0; - // d2.isQuantized = false; - // list.push_back(d2); - ParameterDescriptor d4; - d4.identifier = "chromanormalize"; - d4.name = "chroma normalization"; - d4.description = "How shall the chroma vector be normalized?"; - d4.unit = ""; - d4.minValue = 0; - d4.maxValue = 3; - d4.defaultValue = 0; - d4.isQuantized = true; - d4.valueNames.push_back("none"); - d4.valueNames.push_back("maximum norm"); - d4.valueNames.push_back("L1 norm"); - d4.valueNames.push_back("L2 norm"); - d4.quantizeStep = 1.0; - list.push_back(d4); - - return list; -} - -float -NNLSChroma::getParameter(string identifier) const -{ - if (debug_on) cerr << "--> getParameter" << endl; - if (identifier == "notedict") { - return m_dictID; - } - - if (identifier == "paling") { - return m_paling; - } - - if (identifier == "rollon") { - return m_rollon; - } - - if (identifier == "tuningmode") { - if (m_tuneLocal) { - return 1.0; - } else { - return 0.0; - } - } - if (identifier == "preset") { - return m_preset; - } - if (identifier == "chromanormalize") { - return m_doNormalizeChroma; - } - return 0; - -} - -void -NNLSChroma::setParameter(string identifier, float value) -{ - if (debug_on) cerr << "--> setParameter" << endl; - if (identifier == "notedict") { - m_dictID = (int) value; - } - - if (identifier == "paling") { - m_paling = value; - } - - if (identifier == "tuningmode") { - m_tuneLocal = (value > 0) ? true : false; - // cerr << "m_tuneLocal :" << m_tuneLocal << endl; - } - if (identifier == "preset") { - m_preset = value; - if (m_preset == 0.0) { - m_tuneLocal = false; - m_paling = 1.0; - m_dictID = 0.0; - } - if (m_preset == 1.0) { - m_tuneLocal = false; - m_paling = 1.0; - m_dictID = 1.0; - } - if (m_preset == 2.0) { - m_tuneLocal = false; - m_paling = 0.7; - m_dictID = 0.0; - } - } - if (identifier == "chromanormalize") { - m_doNormalizeChroma = value; - } - - if (identifier == "rollon") { - m_rollon = value; - } -} - -NNLSChroma::ProgramList -NNLSChroma::getPrograms() const -{ - if (debug_on) cerr << "--> getPrograms" << endl; - ProgramList list; - - // If you have no programs, return an empty list (or simply don't - // implement this function or getCurrentProgram/selectProgram) - - return list; -} - -string -NNLSChroma::getCurrentProgram() const -{ - if (debug_on) cerr << "--> getCurrentProgram" << endl; - return ""; // no programs -} - -void -NNLSChroma::selectProgram(string name) -{ - if (debug_on) cerr << "--> selectProgram" << endl; -} - - NNLSChroma::OutputList NNLSChroma::getOutputDescriptors() const { @@ -357,26 +78,8 @@ } } - // int nNote = 84; + int index = 0; - // See OutputDescriptor documentation for the possibilities here. - // Every plugin must have at least one output. - - OutputDescriptor d0; - d0.identifier = "tuning"; - d0.name = "Tuning"; - d0.description = "The concert pitch."; - d0.unit = "Hz"; - d0.hasFixedBinCount = true; - d0.binCount = 0; - d0.hasKnownExtents = true; - d0.minValue = 427.47; - d0.maxValue = 452.89; - d0.isQuantized = false; - d0.sampleType = OutputDescriptor::VariableSampleRate; - d0.hasDuration = false; - list.push_back(d0); - OutputDescriptor d1; d1.identifier = "logfreqspec"; d1.name = "Log-Frequency Spectrum"; @@ -390,6 +93,7 @@ d1.hasDuration = false; d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; list.push_back(d1); + m_outputLogSpec = index++; OutputDescriptor d2; d2.identifier = "tunedlogfreqspec"; @@ -404,6 +108,7 @@ d2.hasDuration = false; d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; list.push_back(d2); + m_outputTunedSpec = index++; OutputDescriptor d3; d3.identifier = "semitonespectrum"; @@ -418,6 +123,7 @@ d3.hasDuration = false; d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; list.push_back(d3); + m_outputSemiSpec = index++; OutputDescriptor d4; d4.identifier = "chroma"; @@ -433,6 +139,7 @@ d4.hasDuration = false; d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; list.push_back(d4); + m_outputChroma = index++; OutputDescriptor d5; d5.identifier = "basschroma"; @@ -448,6 +155,7 @@ d5.hasDuration = false; d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; list.push_back(d5); + m_outputBassChroma = index++; OutputDescriptor d6; d6.identifier = "bothchroma"; @@ -463,69 +171,7 @@ d6.hasDuration = false; d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; list.push_back(d6); - - OutputDescriptor d7; - d7.identifier = "simplechord"; - d7.name = "Simple Chord Estimate"; - d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma."; - d7.unit = ""; - d7.hasFixedBinCount = true; - d7.binCount = 0; - d7.hasKnownExtents = false; - d7.isQuantized = false; - d7.sampleType = OutputDescriptor::VariableSampleRate; - d7.hasDuration = false; - d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; - list.push_back(d7); - - // - // OutputDescriptor d9; - // d9.identifier = "inconsistencysegment"; - // d9.name = "Harmonic inconsistency segmenter"; - // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music."; - // d9.unit = ""; - // d9.hasFixedBinCount = true; - // d9.binCount = 0; - // d9.hasKnownExtents = true; - // d9.minValue = 0.1; - // d9.maxValue = 0.9; - // d9.isQuantized = false; - // d9.sampleType = OutputDescriptor::VariableSampleRate; - // d9.hasDuration = false; - // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; - // list.push_back(d9); - // - OutputDescriptor d10; - d10.identifier = "localtuning"; - d10.name = "Local tuning"; - d10.description = "Tuning based on the history up to this timestamp."; - d10.unit = "Hz"; - d10.hasFixedBinCount = true; - d10.binCount = 1; - d10.hasKnownExtents = true; - d10.minValue = 427.47; - d10.maxValue = 452.89; - d10.isQuantized = false; - d10.sampleType = OutputDescriptor::FixedSampleRate; - d10.hasDuration = false; - // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; - list.push_back(d10); - - OutputDescriptor d8; - d8.identifier = "harmonicchange"; - d8.name = "Harmonic change value"; - d8.description = "Harmonic change."; - d8.unit = ""; - d8.hasFixedBinCount = true; - d8.binCount = 1; - d8.hasKnownExtents = true; - d8.minValue = 0.0; - d8.maxValue = 0.999; - d8.isQuantized = false; - d8.sampleType = OutputDescriptor::FixedSampleRate; - d8.hasDuration = false; - // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; - list.push_back(d8); + m_outputBothChroma = index++; return list; } @@ -537,44 +183,11 @@ if (debug_on) { cerr << "--> initialise"; } - - if (channels < getMinChannelCount() || - channels > getMaxChannelCount()) return false; - m_blockSize = blockSize; - m_stepSize = stepSize; - frameCount = 0; - int tempn = 256 * m_blockSize/2; - // cerr << "length of tempkernel : " << tempn << endl; - float *tempkernel; - tempkernel = new float[tempn]; + if (!NNLSBase::initialise(channels, stepSize, blockSize)) { + return false; + } - logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel); - m_kernelValue.clear(); - m_kernelFftIndex.clear(); - m_kernelNoteIndex.clear(); - int countNonzero = 0; - for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix - for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) { - if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { - m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]); - if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { - countNonzero++; - } - m_kernelFftIndex.push_back(iFFT); - m_kernelNoteIndex.push_back(iNote); - } - } - } - // cerr << "nonzero count : " << countNonzero << endl; - delete [] tempkernel; - ofstream myfile; - myfile.open ("matrix.txt"); - // myfile << "Writing this to a file.\n"; - for (int i = 0; i < nNote * 84; ++i) { - myfile << m_dict[i] << endl; - } - myfile.close(); return true; } @@ -582,122 +195,18 @@ NNLSChroma::reset() { if (debug_on) cerr << "--> reset"; - - // Clear buffers, reset stored values, etc - frameCount = 0; - m_dictID = 0; - m_fl.clear(); - m_meanTuning0 = 0; - m_meanTuning1 = 0; - m_meanTuning2 = 0; - m_localTuning0 = 0; - m_localTuning1 = 0; - m_localTuning2 = 0; - m_localTuning.clear(); + NNLSBase::reset(); } NNLSChroma::FeatureSet NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp) { if (debug_on) cerr << "--> process" << endl; - frameCount++; - float *magnitude = new float[m_blockSize/2]; - - Feature f10; // local tuning - f10.hasTimestamp = true; - f10.timestamp = timestamp; - const float *fbuf = inputBuffers[0]; - float energysum = 0; - // make magnitude - float maxmag = -10000; - for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { - magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] + - fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]); - if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin]; - if (m_rollon > 0) { - energysum += pow(magnitude[iBin],2); - } - } - - float cumenergy = 0; - if (m_rollon > 0) { - for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) { - cumenergy += pow(magnitude[iBin],2); - if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0; - else break; - } - } - - if (maxmag < 2) { - // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl; - for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { - magnitude[iBin] = 0; - } - } - - // note magnitude mapping using pre-calculated matrix - float *nm = new float[nNote]; // note magnitude - for (size_t iNote = 0; iNote < nNote; iNote++) { - nm[iNote] = 0; // initialise as 0 - } - int binCount = 0; - for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) { - // cerr << "."; - nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount]; - // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl; - binCount++; - } - // cerr << nm[20]; - // cerr << endl; - - - float one_over_N = 1.0/frameCount; - // update means of complex tuning variables - m_meanTuning0 *= float(frameCount-1)*one_over_N; - m_meanTuning1 *= float(frameCount-1)*one_over_N; - m_meanTuning2 *= float(frameCount-1)*one_over_N; - - for (int iTone = 0; iTone < 160; iTone = iTone + 3) { - m_meanTuning0 += nm[iTone + 0]*one_over_N; - m_meanTuning1 += nm[iTone + 1]*one_over_N; - m_meanTuning2 += nm[iTone + 2]*one_over_N; - float ratioOld = 0.997; - m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld); - m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld); - m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld); - } - - // if (m_tuneLocal) { - // local tuning - float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2; - float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2; - float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI); - m_localTuning.push_back(normalisedtuning); - float tuning440 = 440 * pow(2,normalisedtuning/12); - f10.values.push_back(tuning440); - // cerr << tuning440 << endl; - // } - - Feature f1; // logfreqspec - f1.hasTimestamp = true; - f1.timestamp = timestamp; - for (size_t iNote = 0; iNote < nNote; iNote++) { - f1.values.push_back(nm[iNote]); - } + + NNLSBase::baseProcess(inputBuffers, timestamp); FeatureSet fs; - fs[1].push_back(f1); - fs[8].push_back(f10); - - // deletes - delete[] magnitude; - delete[] nm; - - m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures - char * pPath; - pPath = getenv ("VAMP_PATH"); - - + fs[m_outputLogSpec].push_back(m_logSpectrum[m_logSpectrum.size()-1]); return fs; } @@ -706,8 +215,7 @@ { if (debug_on) cerr << "--> getRemainingFeatures" << endl; FeatureSet fsOut; - if (m_fl.size() == 0) return fsOut; - int nChord = m_chordnames.size(); + if (m_logSpectrum.size() == 0) return fsOut; // /** Calculate Tuning calculate tuning from (using the angle of the complex number defined by the @@ -726,13 +234,6 @@ // cerr << "normalisedtuning: " << normalisedtuning << '\n'; - // push tuning to FeatureSet fsOut - Feature f0; // tuning - f0.hasTimestamp = true; - f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));; - f0.label = buffer0; - fsOut[0].push_back(f0); - /** Tune Log-Frequency Spectrogram calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to perform linear interpolation on the existing log-frequency spectrogram (kinda f1). @@ -745,7 +246,7 @@ // cerr << "tune local ? " << m_tuneLocal << endl; int count = 0; - for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) { + for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) { Feature f1 = *i; Feature f2; // tuned log-frequency spectrum f2.hasTimestamp = true; @@ -783,7 +284,7 @@ cerr << "ERROR: negative value in logfreq spectrum" << endl; } } - fsOut[2].push_back(f2); + fsOut[m_outputTunedSpec].push_back(f2); count++; } cerr << "done." << endl; @@ -801,9 +302,6 @@ } - vector<vector<float> > chordogram; - vector<vector<int> > scoreChordogram; - vector<float> chordchange = vector<float>(fsOut[2].size(),0); vector<float> oldchroma = vector<float>(12,0); vector<float> oldbasschroma = vector<float>(12,0); count = 0; @@ -827,7 +325,7 @@ f6.hasTimestamp = true; f6.timestamp = f2.timestamp; - float b[256]; + float b[256]; bool some_b_greater_zero = false; float sumb = 0; @@ -861,34 +359,34 @@ } } else { - float x[84+1000]; + float x[84+1000]; for (int i = 1; i < 1084; ++i) x[i] = 1.0; vector<int> signifIndex; int index=0; sumb /= 84.0; for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { float currval = 0; - currval += b[iNote + 1 + -1]; - currval += b[iNote + 1 + 0]; + currval += b[iNote + 1 + -1]; + currval += b[iNote + 1 + 0]; currval += b[iNote + 1 + 1]; if (currval > 0) signifIndex.push_back(index); f3.values.push_back(0); // fill the values, change later index++; } - float rnorm; - float w[84+1000]; - float zz[84+1000]; + float rnorm; + float w[84+1000]; + float zz[84+1000]; int indx[84+1000]; int mode; int dictsize = 256*signifIndex.size(); // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl; - float *curr_dict = new float[dictsize]; + float *curr_dict = new float[dictsize]; for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { for (unsigned iBin = 0; iBin < 256; iBin++) { curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin]; } } - nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode); + nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode); delete [] curr_dict; for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { f3.values[signifIndex[iNote]] = x[iNote]; @@ -899,9 +397,6 @@ } } - - - f4.values = chroma; f5.values = basschroma; chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas @@ -958,272 +453,16 @@ f6.values[i] /= chromanorm[2]; } } - } - // local chord estimation - vector<float> currentChordSalience; - float tempchordvalue = 0; - float sumchordvalue = 0; - - for (int iChord = 0; iChord < nChord; iChord++) { - tempchordvalue = 0; - for (int iBin = 0; iBin < 12; iBin++) { - tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; - } - for (int iBin = 12; iBin < 24; iBin++) { - tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; - } - sumchordvalue+=tempchordvalue; - currentChordSalience.push_back(tempchordvalue); - } - if (sumchordvalue > 0) { - for (int iChord = 0; iChord < nChord; iChord++) { - currentChordSalience[iChord] /= sumchordvalue; - } - } else { - currentChordSalience[nChord-1] = 1.0; - } - chordogram.push_back(currentChordSalience); - - fsOut[3].push_back(f3); - fsOut[4].push_back(f4); - fsOut[5].push_back(f5); - fsOut[6].push_back(f6); + fsOut[m_outputSemiSpec].push_back(f3); + fsOut[m_outputChroma].push_back(f4); + fsOut[m_outputBassChroma].push_back(f5); + fsOut[m_outputBothChroma].push_back(f6); count++; } cerr << "done." << endl; - - /* Simple chord estimation - I just take the local chord estimates ("currentChordSalience") and average them over time, then - take the maximum. Very simple, don't do this at home... - */ - cerr << "[NNLS Chroma Plugin] Chord Estimation ... "; - count = 0; - int halfwindowlength = m_inputSampleRate / m_stepSize; - vector<int> chordSequence; - for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram - vector<int> temp = vector<int>(nChord,0); - scoreChordogram.push_back(temp); - } - for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) { - int startIndex = count + 1; - int endIndex = count + 2 * halfwindowlength; - - float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1); - - vector<int> chordCandidates; - for (unsigned iChord = 0; iChord < nChord-1; iChord++) { - // float currsum = 0; - // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { - // currsum += chordogram[iFrame][iChord]; - // } - // if (currsum > chordThreshold) chordCandidates.push_back(iChord); - for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { - if (chordogram[iFrame][iChord] > chordThreshold) { - chordCandidates.push_back(iChord); - break; - } - } - } - chordCandidates.push_back(nChord-1); - // cerr << chordCandidates.size() << endl; - - float maxval = 0; // will be the value of the most salient *chord change* in this frame - float maxindex = 0; //... and the index thereof - unsigned bestchordL = nChord-1; // index of the best "left" chord - unsigned bestchordR = nChord-1; // index of the best "right" chord - - for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { - // now find the max values on both sides of iWF - // left side: - float maxL = 0; - unsigned maxindL = nChord-1; - for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { - unsigned iChord = chordCandidates[kChord]; - float currsum = 0; - for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) { - currsum += chordogram[count+iFrame][iChord]; - } - if (iChord == nChord-1) currsum *= 0.8; - if (currsum > maxL) { - maxL = currsum; - maxindL = iChord; - } - } - // right side: - float maxR = 0; - unsigned maxindR = nChord-1; - for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { - unsigned iChord = chordCandidates[kChord]; - float currsum = 0; - for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) { - currsum += chordogram[count+iFrame][iChord]; - } - if (iChord == nChord-1) currsum *= 0.8; - if (currsum > maxR) { - maxR = currsum; - maxindR = iChord; - } - } - if (maxL+maxR > maxval) { - maxval = maxL+maxR; - maxindex = iWF; - bestchordL = maxindL; - bestchordR = maxindR; - } - - } - // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl; - // add a score to every chord-frame-point that was part of a maximum - for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) { - scoreChordogram[iFrame+count][bestchordL]++; - } - for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { - scoreChordogram[iFrame+count][bestchordR]++; - } - if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; - count++; - } - // cerr << "******* agent finished *******" << endl; - count = 0; - for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { - float maxval = 0; // will be the value of the most salient chord in this frame - float maxindex = 0; //... and the index thereof - for (unsigned iChord = 0; iChord < nChord; iChord++) { - if (scoreChordogram[count][iChord] > maxval) { - maxval = scoreChordogram[count][iChord]; - maxindex = iChord; - // cerr << iChord << endl; - } - } - chordSequence.push_back(maxindex); - // cerr << "before modefilter, maxindex: " << maxindex << endl; - count++; - } - // cerr << "******* mode filter done *******" << endl; - - - // mode filter on chordSequence - count = 0; - string oldChord = ""; - for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { - Feature f6 = *it; - Feature f7; // chord estimate - f7.hasTimestamp = true; - f7.timestamp = f6.timestamp; - Feature f8; // chord estimate - f8.hasTimestamp = true; - f8.timestamp = f6.timestamp; - - vector<int> chordCount = vector<int>(nChord,0); - int maxChordCount = 0; - int maxChordIndex = nChord-1; - string maxChord; - int startIndex = max(count - halfwindowlength/2,0); - int endIndex = min(int(chordogram.size()), count + halfwindowlength/2); - for (int i = startIndex; i < endIndex; i++) { - chordCount[chordSequence[i]]++; - if (chordCount[chordSequence[i]] > maxChordCount) { - // cerr << "start index " << startIndex << endl; - maxChordCount++; - maxChordIndex = chordSequence[i]; - maxChord = m_chordnames[maxChordIndex]; - } - } - // chordSequence[count] = maxChordIndex; - // cerr << maxChordIndex << endl; - f8.values.push_back(chordchange[count]/(halfwindowlength*2)); - // cerr << chordchange[count] << endl; - fsOut[9].push_back(f8); - if (oldChord != maxChord) { - oldChord = maxChord; - - // char buffer1 [50]; - // if (maxChordIndex < nChord - 1) { - // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]); - // } else { - // sprintf(buffer1, "N"); - // } - // f7.label = buffer1; - f7.label = m_chordnames[maxChordIndex]; - fsOut[7].push_back(f7); - } - count++; - } - Feature f7; // last chord estimate - f7.hasTimestamp = true; - f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp; - f7.label = "N"; - fsOut[7].push_back(f7); - cerr << "done." << endl; - // // musicity - // count = 0; - // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2 - // vector<float> musicityValue; - // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { - // Feature f4 = *it; - // - // int startIndex = max(count - musicitykernelwidth/2,0); - // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); - // float chromasum = 0; - // float diffsum = 0; - // for (int k = 0; k < 12; k++) { - // for (int i = startIndex + 1; i < endIndex; i++) { - // chromasum += pow(fsOut[4][i].values[k],2); - // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]); - // } - // } - // diffsum /= chromasum; - // musicityValue.push_back(diffsum); - // count++; - // } - // - // float musicityThreshold = 0.44; - // if (m_stepSize == 4096) { - // musicityThreshold = 0.74; - // } - // if (m_stepSize == 4410) { - // musicityThreshold = 0.77; - // } - // - // count = 0; - // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { - // Feature f4 = *it; - // Feature f8; // musicity - // Feature f9; // musicity segmenter - // - // f8.hasTimestamp = true; - // f8.timestamp = f4.timestamp; - // f9.hasTimestamp = true; - // f9.timestamp = f4.timestamp; - // - // int startIndex = max(count - musicitykernelwidth/2,0); - // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); - // int musicityCount = 0; - // for (int i = startIndex; i <= endIndex; i++) { - // if (musicityValue[i] > musicityThreshold) musicityCount++; - // } - // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1); - // - // if (isSpeech) { - // if (oldlabeltype != 2) { - // f9.label = "Speech"; - // fsOut[9].push_back(f9); - // oldlabeltype = 2; - // } - // } else { - // if (oldlabeltype != 1) { - // f9.label = "Music"; - // fsOut[9].push_back(f9); - // oldlabeltype = 1; - // } - // } - // f8.values.push_back(musicityValue[count]); - // fsOut[8].push_back(f8); - // count++; - // } return fsOut; }