Mercurial > hg > nnls-chroma
diff NNLSChroma.cpp @ 17:e72c4ad432d8 matthiasm-plugin
sorry, long time no commit
author | matthiasm |
---|---|
date | Wed, 29 Sep 2010 11:58:00 +0000 |
parents | 6f7fc832cb44 |
children | 444c344681f3 |
line wrap: on
line diff
--- a/NNLSChroma.cpp Wed Sep 29 11:18:15 2010 +0000 +++ b/NNLSChroma.cpp Wed Sep 29 11:58:00 2010 +0000 @@ -46,6 +46,23 @@ {"G","","A","Bb","B","C","","D","","E","F","F#"}, {"Ab","","Bb","Cb","C","Db","","Eb","","F","Gb","G"} }; + + +// const char* bassnames[12][12] ={ +// {"1","","2","b3","3","4","","5","","6","b7","7"}, +// {"1","","2","b3","3","4","","5","","6","b7","7"}, +// {"1","","2","b3","3","4","","5","","6","b7","7"}, +// {"1","","2","b3","3","4","","5","","6","b7","7"}, +// {"1","","2","b3","3","4","","5","","6","b7","7"}, +// {"1","","2","b3","3","4","","5","","6","b7","7"}, +// {"1","","2","b3","3","4","","5","","6","b7","7"}, +// {"1","","2","b3","3","4","","5","","6","b7","7"}, +// {"1","","2","b3","3","4","","5","","6","b7","7"}, +// {"1","","2","b3","3","4","","5","","6","b7","7"}, +// {"1","","2","b3","3","4","","5","","6","b7","7"}, +// {"1","","2","b3","3","4","","5","","6","b7","7"}, +// }; + const vector<float> hw(hammingwind, hammingwind+19); const int nNote = 256; @@ -184,7 +201,7 @@ return true; } -bool dictionaryMatrix(float* dm) { +void dictionaryMatrix(float* dm) { int binspersemitone = 3; // this must be 3 int minoctave = 0; // this must be 0 int maxoctave = 7; // this must be 7 @@ -247,7 +264,7 @@ // string instring[] = ",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\nm,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0\n6,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0\n7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0\nmaj7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1\nmin7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0\n,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\n,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\ndim,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0\naug,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0\n"; typedef tokenizer<char_separator<char> > Tok; // char_separator<char> sep; // default constructed - char_separator<char> sep(",; ",":"); + char_separator<char> sep(",; ","="); iostreams::stream<iostreams::file_source> chordDictFile(chordDictFilename.c_str()); string line; int iElement = 0; @@ -267,7 +284,7 @@ string tempString = *tok_iter; // cerr << tempString << endl; if (tok_iter == tok.begin()) { // either the chord name or a colon - if (tempString == ":") { + if (tempString == "=") { chordType = ""; } else { chordType = tempString; @@ -306,7 +323,7 @@ } else { os << notenames[12+iSemitone] << chordType << "/" << slashNotation; } - + // cerr << os.str() << endl; loadedChordNames.push_back(os.str()); } } @@ -392,7 +409,8 @@ m_dictID(0), m_chorddict(0), m_chordnames(0), - m_doNormalizeChroma(0) + m_doNormalizeChroma(0), + m_rollon(0.01) { if (debug_on) cerr << "--> NNLSChroma" << endl; @@ -523,6 +541,17 @@ d3.valueNames.push_back("manual"); list.push_back(d3); + ParameterDescriptor d5; + d5.identifier = "rollon"; + d5.name = "spectral roll-on"; + d5.description = "The bins below the spectral roll-on quantile will be set to 0."; + d5.unit = ""; + d5.minValue = 0; + d5.maxValue = 1; + d5.defaultValue = 0; + d5.isQuantized = false; + list.push_back(d5); + // ParameterDescriptor d0; // d0.identifier = "notedict"; // d0.name = "note dictionary"; @@ -593,6 +622,10 @@ if (identifier == "paling") { return m_paling; } + + if (identifier == "rollon") { + return m_rollon; + } if (identifier == "tuningmode") { if (m_tuneLocal) { @@ -648,6 +681,10 @@ if (identifier == "chromanormalize") { m_doNormalizeChroma = value; } + + if (identifier == "rollon") { + m_rollon = value; + } } NNLSChroma::ProgramList @@ -813,19 +850,6 @@ d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; list.push_back(d7); - // OutputDescriptor d8; - // d8.identifier = "inconsistency"; - // d8.name = "Harmonic inconsistency value"; - // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high."; - // d8.unit = ""; - // d8.hasFixedBinCount = true; - // d8.binCount = 1; - // d8.hasKnownExtents = false; - // d8.isQuantized = false; - // d8.sampleType = OutputDescriptor::FixedSampleRate; - // d8.hasDuration = false; - // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; - // list.push_back(d8); // // OutputDescriptor d9; // d9.identifier = "inconsistencysegment"; @@ -844,20 +868,36 @@ // list.push_back(d9); // OutputDescriptor d10; - d10.identifier = "localtuning"; - d10.name = "Local tuning"; - d10.description = "Tuning based on the history up to this timestamp."; - d10.unit = "Hz"; - d10.hasFixedBinCount = true; - d10.binCount = 1; - d10.hasKnownExtents = true; - d10.minValue = 427.47; - d10.maxValue = 452.89; - d10.isQuantized = false; - d10.sampleType = OutputDescriptor::FixedSampleRate; - d10.hasDuration = false; - // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; - list.push_back(d10); + d10.identifier = "localtuning"; + d10.name = "Local tuning"; + d10.description = "Tuning based on the history up to this timestamp."; + d10.unit = "Hz"; + d10.hasFixedBinCount = true; + d10.binCount = 1; + d10.hasKnownExtents = true; + d10.minValue = 427.47; + d10.maxValue = 452.89; + d10.isQuantized = false; + d10.sampleType = OutputDescriptor::FixedSampleRate; + d10.hasDuration = false; + // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; + list.push_back(d10); + + OutputDescriptor d8; + d8.identifier = "harmonicchange"; + d8.name = "Harmonic change value"; + d8.description = "Harmonic change."; + d8.unit = ""; + d8.hasFixedBinCount = true; + d8.binCount = 1; + d8.hasKnownExtents = true; + d8.minValue = 0.0; + d8.maxValue = 0.999; + d8.isQuantized = false; + d8.sampleType = OutputDescriptor::FixedSampleRate; + d8.hasDuration = false; + // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; + list.push_back(d8); return list; } @@ -939,16 +979,28 @@ f10.hasTimestamp = true; f10.timestamp = timestamp; const float *fbuf = inputBuffers[0]; - + float energysum = 0; // make magnitude float maxmag = -10000; for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] + fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]); if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin]; + if (m_rollon > 0) { + energysum += pow(magnitude[iBin],2); + } } - if (maxmag < 10) { + float cumenergy = 0; + if (m_rollon > 0) { + for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) { + cumenergy += pow(magnitude[iBin],2); + if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0; + else break; + } + } + + if (maxmag < 2) { // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl; for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { magnitude[iBin] = 0; @@ -1057,7 +1109,7 @@ calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to perform linear interpolation on the existing log-frequency spectrogram (kinda f1). **/ - cerr << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... "; + cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... "; float tempValue = 0; float dbThreshold = 0; // relative to the background spectrum @@ -1123,6 +1175,7 @@ vector<vector<float> > chordogram; vector<vector<int> > scoreChordogram; + vector<float> chordchange = vector<float>(fsOut[2].size(),0); vector<float> oldchroma = vector<float>(12,0); vector<float> oldbasschroma = vector<float>(12,0); count = 0; @@ -1296,9 +1349,13 @@ sumchordvalue+=tempchordvalue; currentChordSalience.push_back(tempchordvalue); } - for (int iChord = 0; iChord < nChord; iChord++) { - currentChordSalience[iChord] /= sumchordvalue; - } + if (sumchordvalue > 0) { + for (int iChord = 0; iChord < nChord; iChord++) { + currentChordSalience[iChord] /= sumchordvalue; + } + } else { + currentChordSalience[nChord-1] = 1.0; + } chordogram.push_back(currentChordSalience); fsOut[3].push_back(f3); @@ -1398,6 +1455,7 @@ for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { scoreChordogram[iFrame+count][bestchordR]++; } + if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; count++; } // cerr << "******* agent finished *******" << endl; @@ -1427,6 +1485,10 @@ Feature f7; // chord estimate f7.hasTimestamp = true; f7.timestamp = f6.timestamp; + Feature f8; // chord estimate + f8.hasTimestamp = true; + f8.timestamp = f6.timestamp; + vector<int> chordCount = vector<int>(nChord,0); int maxChordCount = 0; int maxChordIndex = nChord-1; @@ -1444,6 +1506,9 @@ } // chordSequence[count] = maxChordIndex; // cerr << maxChordIndex << endl; + f8.values.push_back(chordchange[count]/(halfwindowlength*2)); + // cerr << chordchange[count] << endl; + fsOut[9].push_back(f8); if (oldChord != maxChord) { oldChord = maxChord; @@ -1459,6 +1524,11 @@ } count++; } + Feature f7; // last chord estimate + f7.hasTimestamp = true; + f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp; + f7.label = "N"; + fsOut[7].push_back(f7); cerr << "done." << endl; // // musicity // count = 0;