nnls-chroma: NNLSBase.cpp comparison

comparison NNLSBase.cpp @ 81:4270f3039ab0 matthiasm-plugin

dont remember, sorry

author	Matthias Mauch <mail@matthiasmauch.net>
date	Mon, 15 Nov 2010 11:01:36 +0900
parents	026a5c0ee2c2
children	e5c16976513d

comparison

equal deleted inserted replaced

-:026a5c0ee2c2
+:4270f3039ab0
 #ifdef NOT_DEFINED
 NNLSBase::FeatureSet
 NNLSBase::getRemainingFeatures()
 {
-if (debug_on) cerr << "--> getRemainingFeatures" << endl;
+// if (debug_on) cerr << "--> getRemainingFeatures" << endl;
 FeatureSet fsOut;
-if (m_logSpectrum.size() == 0) return fsOut;
+//    if (m_logSpectrum.size() == 0) return fsOut;
-int nChord = m_chordnames.size();
+//    int nChord = m_chordnames.size();
+//    //
+//    /**  Calculate Tuning
+//         calculate tuning from (using the angle of the complex number defined by the
+//         cumulative mean real and imag values)
+//    **/
+//    float meanTuningImag = sinvalue * m_meanTunings[1] - sinvalue * m_meanTunings[2];
+//    float meanTuningReal = m_meanTunings[0] + cosvalue * m_meanTunings[1] + cosvalue * m_meanTunings[2];
+//    float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
+//    float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
+//    int intShift = floor(normalisedtuning * 3);
+//    float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
+//
+//    char buffer0 [50];
+//
+//    sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
+//
+//    // cerr << "normalisedtuning: " << normalisedtuning << '\n';
+//
+//    // push tuning to FeatureSet fsOut
+//    Feature f0; // tuning
+//    f0.hasTimestamp = true;
+//    f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
+//    f0.label = buffer0;
+//    fsOut[0].push_back(f0);
+//
+//    /** Tune Log-Frequency Spectrogram
+//        calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
+//        perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
+//    **/
+//    cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
+//
+//    float tempValue = 0;
+//    float dbThreshold = 0; // relative to the background spectrum
+//    float thresh = pow(10,dbThreshold/20);
+//    // cerr << "tune local ? " << m_tuneLocal << endl;
+//    int count = 0;
+//
+//    for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
+//        Feature f1 = *i;
+//        Feature f2; // tuned log-frequency spectrum
+//        f2.hasTimestamp = true;
+//        f2.timestamp = f1.timestamp;
+//        f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
+//
+//        if (m_tuneLocal == 1.0) {
+//            intShift = floor(m_localTuning[count] * 3);
+//            floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
+//        }
+//
+//        // cerr << intShift << " " << floatShift << endl;
+//
+//        for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
+//            tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
+//            f2.values.push_back(tempValue);
+//        }
+//
+//        f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
+//        vector<float> runningmean = SpecialConvolution(f2.values,hw);
+//        vector<float> runningstd;
+//        for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
+//            runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
+//        }
+//        runningstd = SpecialConvolution(runningstd,hw); // second step convolve
+//        for (int i = 0; i < nNote; i++) {
+//            runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
+//            if (runningstd[i] > 0) {
+//                // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
+//                //                            (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
+//                f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
+//                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
+//            }
+//            if (f2.values[i] < 0) {
+//                cerr << "ERROR: negative value in logfreq spectrum" << endl;
+//            }
+//        }
+//        fsOut[2].push_back(f2);
+//        count++;
+//    }
+//    cerr << "done." << endl;
+//
+//    /** Semitone spectrum and chromagrams
+//        Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
+//        is inferred using a non-negative least squares algorithm.
+//        Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
+//        bass and treble stacked onto each other).
+//    **/
+//    if (m_useNNLS == 0) {
+//        cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
+//    } else {
+//        cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
+//    }
 //
-/**  Calculate Tuning
+//
-calculate tuning from (using the angle of the complex number defined by the
+//    vector<vector<float> > chordogram;
-cumulative mean real and imag values)
+//    vector<vector<int> > scoreChordogram;
-**/
+//    vector<float> chordchange = vector<float>(fsOut[2].size(),0);
-float meanTuningImag = sinvalue * m_meanTunings[1] - sinvalue * m_meanTunings[2];
+//    vector<float> oldchroma = vector<float>(12,0);
-float meanTuningReal = m_meanTunings[0] + cosvalue * m_meanTunings[1] + cosvalue * m_meanTunings[2];
+//    vector<float> oldbasschroma = vector<float>(12,0);
-float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
+//    count = 0;
-float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
+//
-int intShift = floor(normalisedtuning * 3);
+//    for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
-float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
+//        Feature f2 = *it; // logfreq spectrum
+//        Feature f3; // semitone spectrum
-char buffer0 [50];
+//        Feature f4; // treble chromagram
+//        Feature f5; // bass chromagram
-sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
+//        Feature f6; // treble and bass chromagram
+//
-// cerr << "normalisedtuning: " << normalisedtuning << '\n';
+//        f3.hasTimestamp = true;
+//        f3.timestamp = f2.timestamp;
-// push tuning to FeatureSet fsOut
+//
-Feature f0; // tuning
+//        f4.hasTimestamp = true;
-f0.hasTimestamp = true;
+//        f4.timestamp = f2.timestamp;
-f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
+//
-f0.label = buffer0;
+//        f5.hasTimestamp = true;
-fsOut[0].push_back(f0);
+//        f5.timestamp = f2.timestamp;
+//
-/** Tune Log-Frequency Spectrogram
+//        f6.hasTimestamp = true;
-calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
+//        f6.timestamp = f2.timestamp;
-perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
+//
-**/
+//          float b[nNote];
-cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
+//
+//        bool some_b_greater_zero = false;
-float tempValue = 0;
+//        float sumb = 0;
-float dbThreshold = 0; // relative to the background spectrum
+//        for (int i = 0; i < nNote; i++) {
-float thresh = pow(10,dbThreshold/20);
+//            // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
-// cerr << "tune local ? " << m_tuneLocal << endl;
+//            b[i] = f2.values[i];
-int count = 0;
+//            sumb += b[i];
+//            if (b[i] > 0) {
-for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
+//                some_b_greater_zero = true;
-Feature f1 = *i;
+//            }
-Feature f2; // tuned log-frequency spectrum
+//        }
-f2.hasTimestamp = true;
+//
-f2.timestamp = f1.timestamp;
+//        // here's where the non-negative least squares algorithm calculates the note activation x
-f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
+//
+//        vector<float> chroma = vector<float>(12, 0);
-if (m_tuneLocal == 1.0) {
+//        vector<float> basschroma = vector<float>(12, 0);
-intShift = floor(m_localTuning[count] * 3);
+//        float currval;
-floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
+//        unsigned iSemitone = 0;
-}
+//
+//        if (some_b_greater_zero) {
-// cerr << intShift << " " << floatShift << endl;
+//            if (m_useNNLS == 0) {
+//                for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
-for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
+//                    currval = 0;
-tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
+//                    currval += b[iNote + 1 + -1] * 0.5;
-f2.values.push_back(tempValue);
+//                    currval += b[iNote + 1 +  0] * 1.0;
-}
+//                    currval += b[iNote + 1 +  1] * 0.5;
+//                    f3.values.push_back(currval);
-f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
+//                    chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
-vector<float> runningmean = SpecialConvolution(f2.values,hw);
+//                    basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
-vector<float> runningstd;
+//                    iSemitone++;
-for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
+//                }
-runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
+//
-}
+//            } else {
-runningstd = SpecialConvolution(runningstd,hw); // second step convolve
+//                  float x[84+1000];
-for (int i = 0; i < nNote; i++) {
+//                for (int i = 1; i < 1084; ++i) x[i] = 1.0;
-runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
+//                vector<int> signifIndex;
-if (runningstd[i] > 0) {
+//                int index=0;
-// f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
+//                sumb /= 84.0;
-// 		                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
+//                for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
-f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
+//                    float currval = 0;
-(f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
+//                    currval += b[iNote + 1 + -1];
-}
+//                    currval += b[iNote + 1 +  0];
-if (f2.values[i] < 0) {
+//                    currval += b[iNote + 1 +  1];
-cerr << "ERROR: negative value in logfreq spectrum" << endl;
+//                    if (currval > 0) signifIndex.push_back(index);
-}
+//                    f3.values.push_back(0); // fill the values, change later
-}
+//                    index++;
-fsOut[2].push_back(f2);
+//                }
-count++;
+//                  float rnorm;
-}
+//                  float w[84+1000];
-cerr << "done." << endl;
+//                  float zz[84+1000];
+//                int indx[84+1000];
-/** Semitone spectrum and chromagrams
+//                int mode;
-Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
+//                int dictsize = nNote*signifIndex.size();
-is inferred using a non-negative least squares algorithm.
+//                // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
-Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
+//                  float *curr_dict = new float[dictsize];
-bass and treble stacked onto each other).
+//                for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
-**/
+//                    for (unsigned iBin = 0; iBin < nNote; iBin++) {
-if (m_useNNLS == 0) {
+//                        curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
-cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
+//                    }
-} else {
+//                }
-cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
+//                  nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
-}
+//                delete [] curr_dict;
+//                for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
+//                    f3.values[signifIndex[iNote]] = x[iNote];
-vector<vector<float> > chordogram;
+//                    // cerr << mode << endl;
-vector<vector<int> > scoreChordogram;
+//                    chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
-vector<float> chordchange = vector<float>(fsOut[2].size(),0);
+//                    basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
-vector<float> oldchroma = vector<float>(12,0);
+//                }
-vector<float> oldbasschroma = vector<float>(12,0);
+//            }
-count = 0;
+//        }
+//
-for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
+//
-Feature f2 = *it; // logfreq spectrum
+//
-Feature f3; // semitone spectrum
+//
-Feature f4; // treble chromagram
+//        f4.values = chroma;
-Feature f5; // bass chromagram
+//        f5.values = basschroma;
-Feature f6; // treble and bass chromagram
+//        chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
+//        f6.values = chroma;
-f3.hasTimestamp = true;
+//
-f3.timestamp = f2.timestamp;
+//        if (m_doNormalizeChroma > 0) {
+//            vector<float> chromanorm = vector<float>(3,0);
-f4.hasTimestamp = true;
+//            switch (int(m_doNormalizeChroma)) {
-f4.timestamp = f2.timestamp;
+//            case 0: // should never end up here
+//                break;
-f5.hasTimestamp = true;
+//            case 1:
-f5.timestamp = f2.timestamp;
+//                chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
+//                chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
-f6.hasTimestamp = true;
+//                chromanorm[2] = max(chromanorm[0], chromanorm[1]);
-f6.timestamp = f2.timestamp;
+//                break;
+//            case 2:
-			float b[nNote];
+//                for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
+//                    chromanorm[0] += *it;
-bool some_b_greater_zero = false;
+//                }
-float sumb = 0;
+//                for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
-for (int i = 0; i < nNote; i++) {
+//                    chromanorm[1] += *it;
-// b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
+//                }
-b[i] = f2.values[i];
+//                for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
-sumb += b[i];
+//                    chromanorm[2] += *it;
-if (b[i] > 0) {
+//                }
-some_b_greater_zero = true;
+//                break;
-}
+//            case 3:
-}
+//                for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
+//                    chromanorm[0] += pow(*it,2);
-// here's where the non-negative least squares algorithm calculates the note activation x
+//                }
+//                chromanorm[0] = sqrt(chromanorm[0]);
-vector<float> chroma = vector<float>(12, 0);
+//                for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
-vector<float> basschroma = vector<float>(12, 0);
+//                    chromanorm[1] += pow(*it,2);
-float currval;
+//                }
-unsigned iSemitone = 0;
+//                chromanorm[1] = sqrt(chromanorm[1]);
+//                for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
-if (some_b_greater_zero) {
+//                    chromanorm[2] += pow(*it,2);
-if (m_useNNLS == 0) {
+//                }
-for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
+//                chromanorm[2] = sqrt(chromanorm[2]);
-currval = 0;
+//                break;
-currval += b[iNote + 1 + -1] * 0.5;
+//            }
-currval += b[iNote + 1 +  0] * 1.0;
+//            if (chromanorm[0] > 0) {
-currval += b[iNote + 1 +  1] * 0.5;
+//                for (int i = 0; i < f4.values.size(); i++) {
-f3.values.push_back(currval);
+//                    f4.values[i] /= chromanorm[0];
-chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
+//                }
-basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
+//            }
-iSemitone++;
+//            if (chromanorm[1] > 0) {
-}
+//                for (int i = 0; i < f5.values.size(); i++) {
+//                    f5.values[i] /= chromanorm[1];
-} else {
+//                }
-					float x[84+1000];
+//            }
-for (int i = 1; i < 1084; ++i) x[i] = 1.0;
+//            if (chromanorm[2] > 0) {
-vector<int> signifIndex;
+//                for (int i = 0; i < f6.values.size(); i++) {
-int index=0;
+//                    f6.values[i] /= chromanorm[2];
-sumb /= 84.0;
+//                }
-for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
+//            }
-float currval = 0;
+//
-currval += b[iNote + 1 + -1];
+//        }
-currval += b[iNote + 1 +  0];
+//
-currval += b[iNote + 1 +  1];
+//        // local chord estimation
-if (currval > 0) signifIndex.push_back(index);
+//        vector<float> currentChordSalience;
-f3.values.push_back(0); // fill the values, change later
+//        float tempchordvalue = 0;
-index++;
+//        float sumchordvalue = 0;
-}
+//
-				    float rnorm;
+//        for (int iChord = 0; iChord < nChord; iChord++) {
-				    float w[84+1000];
+//            tempchordvalue = 0;
-				    float zz[84+1000];
+//            for (int iBin = 0; iBin < 12; iBin++) {
-int indx[84+1000];
+//                tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
-int mode;
+//            }
-int dictsize = nNote*signifIndex.size();
+//            for (int iBin = 12; iBin < 24; iBin++) {
-// cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
+//                tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
-					float *curr_dict = new float[dictsize];
+//            }
-for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
+//            sumchordvalue+=tempchordvalue;
-for (unsigned iBin = 0; iBin < nNote; iBin++) {
+//            currentChordSalience.push_back(tempchordvalue);
-curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
+//        }
-}
+//        if (sumchordvalue > 0) {
-}
+//            for (int iChord = 0; iChord < nChord; iChord++) {
-					nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
+//                currentChordSalience[iChord] /= sumchordvalue;
-delete [] curr_dict;
+//            }
-for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
+//        } else {
-f3.values[signifIndex[iNote]] = x[iNote];
+//            currentChordSalience[nChord-1] = 1.0;
-// cerr << mode << endl;
+//        }
-chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
+//        chordogram.push_back(currentChordSalience);
-basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
+//
-}
+//        fsOut[3].push_back(f3);
-}
+//        fsOut[4].push_back(f4);
-}
+//        fsOut[5].push_back(f5);
+//        fsOut[6].push_back(f6);
+//        count++;
+//    }
+//    cerr << "done." << endl;
-f4.values = chroma;
+//
-f5.values = basschroma;
+//
-chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
+//    /* Simple chord estimation
-f6.values = chroma;
+//       I just take the local chord estimates ("currentChordSalience") and average them over time, then
+//       take the maximum. Very simple, don't do this at home...
-if (m_doNormalizeChroma > 0) {
+//    */
-vector<float> chromanorm = vector<float>(3,0);
+//    cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
-switch (int(m_doNormalizeChroma)) {
+//    count = 0;
-case 0: // should never end up here
+//    int halfwindowlength = m_inputSampleRate / m_stepSize;
-break;
+//    vector<int> chordSequence;
-case 1:
+//    for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
-chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
+//        vector<int> temp = vector<int>(nChord,0);
-chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
+//        scoreChordogram.push_back(temp);
-chromanorm[2] = max(chromanorm[0], chromanorm[1]);
+//    }
-break;
+//    for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
-case 2:
+//        int startIndex = count + 1;
-for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
+//        int endIndex = count + 2 * halfwindowlength;
-chromanorm[0] += *it;
+//
-}
+//        float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
-for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
+//
-chromanorm[1] += *it;
+//        vector<int> chordCandidates;
-}
+//        for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
-for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
+//            // float currsum = 0;
-chromanorm[2] += *it;
+//            // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
-}
+//            //  currsum += chordogram[iFrame][iChord];
-break;
+//            // }
-case 3:
+//            //                 if (currsum > chordThreshold) chordCandidates.push_back(iChord);
-for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
+//            for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
-chromanorm[0] += pow(*it,2);
+//                if (chordogram[iFrame][iChord] > chordThreshold) {
-}
+//                    chordCandidates.push_back(iChord);
-chromanorm[0] = sqrt(chromanorm[0]);
+//                    break;
-for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
+//                }
-chromanorm[1] += pow(*it,2);
+//            }
-}
+//        }
-chromanorm[1] = sqrt(chromanorm[1]);
+//        chordCandidates.push_back(nChord-1);
-for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
+//        // cerr << chordCandidates.size() << endl;
-chromanorm[2] += pow(*it,2);
+//
-}
+//        float maxval = 0; // will be the value of the most salient *chord change* in this frame
-chromanorm[2] = sqrt(chromanorm[2]);
+//        float maxindex = 0; //... and the index thereof
-break;
+//        unsigned bestchordL = nChord-1; // index of the best "left" chord
-}
+//        unsigned bestchordR = nChord-1; // index of the best "right" chord
-if (chromanorm[0] > 0) {
+//
-for (int i = 0; i < f4.values.size(); i++) {
+//        for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
-f4.values[i] /= chromanorm[0];
+//            // now find the max values on both sides of iWF
-}
+//            // left side:
-}
+//            float maxL = 0;
-if (chromanorm[1] > 0) {
+//            unsigned maxindL = nChord-1;
-for (int i = 0; i < f5.values.size(); i++) {
+//            for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
-f5.values[i] /= chromanorm[1];
+//                unsigned iChord = chordCandidates[kChord];
-}
+//                float currsum = 0;
-}
+//                for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
-if (chromanorm[2] > 0) {
+//                    currsum += chordogram[count+iFrame][iChord];
-for (int i = 0; i < f6.values.size(); i++) {
+//                }
-f6.values[i] /= chromanorm[2];
+//                if (iChord == nChord-1) currsum *= 0.8;
-}
+//                if (currsum > maxL) {
-}
+//                    maxL = currsum;
+//                    maxindL = iChord;
-}
+//                }
+//            }
-// local chord estimation
+//            // right side:
-vector<float> currentChordSalience;
+//            float maxR = 0;
-float tempchordvalue = 0;
+//            unsigned maxindR = nChord-1;
-float sumchordvalue = 0;
+//            for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
+//                unsigned iChord = chordCandidates[kChord];
-for (int iChord = 0; iChord < nChord; iChord++) {
+//                float currsum = 0;
-tempchordvalue = 0;
+//                for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
-for (int iBin = 0; iBin < 12; iBin++) {
+//                    currsum += chordogram[count+iFrame][iChord];
-tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
+//                }
-}
+//                if (iChord == nChord-1) currsum *= 0.8;
-for (int iBin = 12; iBin < 24; iBin++) {
+//                if (currsum > maxR) {
-tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
+//                    maxR = currsum;
-}
+//                    maxindR = iChord;
-sumchordvalue+=tempchordvalue;
+//                }
-currentChordSalience.push_back(tempchordvalue);
+//            }
-}
+//            if (maxL+maxR > maxval) {
-if (sumchordvalue > 0) {
+//                maxval = maxL+maxR;
-for (int iChord = 0; iChord < nChord; iChord++) {
+//                maxindex = iWF;
-currentChordSalience[iChord] /= sumchordvalue;
+//                bestchordL = maxindL;
-}
+//                bestchordR = maxindR;
-} else {
+//            }
-currentChordSalience[nChord-1] = 1.0;
+//
-}
+//        }
-chordogram.push_back(currentChordSalience);
+//        // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
+//        // add a score to every chord-frame-point that was part of a maximum
-fsOut[3].push_back(f3);
+//        for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
-fsOut[4].push_back(f4);
+//            scoreChordogram[iFrame+count][bestchordL]++;
-fsOut[5].push_back(f5);
+//        }
-fsOut[6].push_back(f6);
+//        for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
-count++;
+//            scoreChordogram[iFrame+count][bestchordR]++;
-}
+//        }
-cerr << "done." << endl;
+//        if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
+//        count++;
+//    }
-/* Simple chord estimation
+//    // cerr << "*******  agent finished   *******" << endl;
-I just take the local chord estimates ("currentChordSalience") and average them over time, then
+//    count = 0;
-take the maximum. Very simple, don't do this at home...
+//    for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
-*/
+//        float maxval = 0; // will be the value of the most salient chord in this frame
-cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
+//        float maxindex = 0; //... and the index thereof
-count = 0;
+//        for (unsigned iChord = 0; iChord < nChord; iChord++) {
-int halfwindowlength = m_inputSampleRate / m_stepSize;
+//            if (scoreChordogram[count][iChord] > maxval) {
-vector<int> chordSequence;
+//                maxval = scoreChordogram[count][iChord];
-for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
+//                maxindex = iChord;
-vector<int> temp = vector<int>(nChord,0);
+//                // cerr << iChord << endl;
-scoreChordogram.push_back(temp);
+//            }
-}
+//        }
-for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
+//        chordSequence.push_back(maxindex);
-int startIndex = count + 1;
+//        // cerr << "before modefilter, maxindex: " << maxindex << endl;
-int endIndex = count + 2 * halfwindowlength;
+//        count++;
+//    }
-float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
+//    // cerr << "*******  mode filter done *******" << endl;
+//
-vector<int> chordCandidates;
+//
-for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
+//    // mode filter on chordSequence
-// float currsum = 0;
+//    count = 0;
-// for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
+//    string oldChord = "";
-//  currsum += chordogram[iFrame][iChord];
+//    for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
-// }
+//        Feature f6 = *it;
-//                 if (currsum > chordThreshold) chordCandidates.push_back(iChord);
+//        Feature f7; // chord estimate
-for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
+//        f7.hasTimestamp = true;
-if (chordogram[iFrame][iChord] > chordThreshold) {
+//        f7.timestamp = f6.timestamp;
-chordCandidates.push_back(iChord);
+//        Feature f8; // chord estimate
-break;
+//        f8.hasTimestamp = true;
-}
+//        f8.timestamp = f6.timestamp;
-}
+//
-}
+//        vector<int> chordCount = vector<int>(nChord,0);
-chordCandidates.push_back(nChord-1);
+//        int maxChordCount = 0;
-// cerr << chordCandidates.size() << endl;
+//        int maxChordIndex = nChord-1;
+//        string maxChord;
-float maxval = 0; // will be the value of the most salient *chord change* in this frame
+//        int startIndex = max(count - halfwindowlength/2,0);
-float maxindex = 0; //... and the index thereof
+//        int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
-unsigned bestchordL = nChord-1; // index of the best "left" chord
+//        for (int i = startIndex; i < endIndex; i++) {
-unsigned bestchordR = nChord-1; // index of the best "right" chord
+//            chordCount[chordSequence[i]]++;
+//            if (chordCount[chordSequence[i]] > maxChordCount) {
-for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
+//                // cerr << "start index " << startIndex << endl;
-// now find the max values on both sides of iWF
+//                maxChordCount++;
-// left side:
+//                maxChordIndex = chordSequence[i];
-float maxL = 0;
+//                maxChord = m_chordnames[maxChordIndex];
-unsigned maxindL = nChord-1;
+//            }
-for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
+//        }
-unsigned iChord = chordCandidates[kChord];
+//        // chordSequence[count] = maxChordIndex;
-float currsum = 0;
+//        // cerr << maxChordIndex << endl;
-for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
+//        f8.values.push_back(chordchange[count]/(halfwindowlength*2));
-currsum += chordogram[count+iFrame][iChord];
+//        // cerr << chordchange[count] << endl;
-}
+//        fsOut[9].push_back(f8);
-if (iChord == nChord-1) currsum *= 0.8;
+//        if (oldChord != maxChord) {
-if (currsum > maxL) {
+//            oldChord = maxChord;
-maxL = currsum;
+//
-maxindL = iChord;
+//            // char buffer1 [50];
-}
+//            // if (maxChordIndex < nChord - 1) {
-}
+//            //     sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
-// right side:
+//            // } else {
-float maxR = 0;
+//            //     sprintf(buffer1, "N");
-unsigned maxindR = nChord-1;
+//            // }
-for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
+//            // f7.label = buffer1;
-unsigned iChord = chordCandidates[kChord];
+//            f7.label = m_chordnames[maxChordIndex];
-float currsum = 0;
+//            fsOut[7].push_back(f7);
-for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
+//        }
-currsum += chordogram[count+iFrame][iChord];
+//        count++;
-}
+//    }
-if (iChord == nChord-1) currsum *= 0.8;
+//    Feature f7; // last chord estimate
-if (currsum > maxR) {
+//    f7.hasTimestamp = true;
-maxR = currsum;
+//    f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
-maxindR = iChord;
+//    f7.label = "N";
-}
+//    fsOut[7].push_back(f7);
-}
+//    cerr << "done." << endl;
-if (maxL+maxR > maxval) {
+//    //     // musicity
-maxval = maxL+maxR;
+//    //     count = 0;
-maxindex = iWF;
+//    //     int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
-bestchordL = maxindL;
+//    //     vector<float> musicityValue;
-bestchordR = maxindR;
+//    //     for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
-}
+//    //         Feature f4 = *it;
+//    //
-}
+//    //         int startIndex = max(count - musicitykernelwidth/2,0);
-// cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
+//    //         int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
-// add a score to every chord-frame-point that was part of a maximum
+//    //         float chromasum = 0;
-for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
+//    //         float diffsum = 0;
-scoreChordogram[iFrame+count][bestchordL]++;
+//    //         for (int k = 0; k < 12; k++) {
-}
+//    //             for (int i = startIndex + 1; i < endIndex; i++) {
-for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
+//    //                 chromasum += pow(fsOut[4][i].values[k],2);
-scoreChordogram[iFrame+count][bestchordR]++;
+//    //                 diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
-}
+//    //             }
-if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
+//    //         }
-count++;
+//    //         diffsum /= chromasum;
-}
+//    //         musicityValue.push_back(diffsum);
-// cerr << "*******  agent finished   *******" << endl;
+//    //         count++;
-count = 0;
+//    //     }
-for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
+//    //
-float maxval = 0; // will be the value of the most salient chord in this frame
+//    //     float musicityThreshold = 0.44;
-float maxindex = 0; //... and the index thereof
+//    //     if (m_stepSize == 4096) {
-for (unsigned iChord = 0; iChord < nChord; iChord++) {
+//    //         musicityThreshold = 0.74;
-if (scoreChordogram[count][iChord] > maxval) {
+//    //     }
-maxval = scoreChordogram[count][iChord];
+//    //     if (m_stepSize == 4410) {
-maxindex = iChord;
+//    //         musicityThreshold = 0.77;
-// cerr << iChord << endl;
+//    //     }
-}
+//    //
-}
+//    //     count = 0;
-chordSequence.push_back(maxindex);
+//    //     for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
-// cerr << "before modefilter, maxindex: " << maxindex << endl;
+//    //         Feature f4 = *it;
-count++;
+//    //         Feature f8; // musicity
-}
+//    //         Feature f9; // musicity segmenter
-// cerr << "*******  mode filter done *******" << endl;
+//    //
+//    //         f8.hasTimestamp = true;
+//    //         f8.timestamp = f4.timestamp;
-// mode filter on chordSequence
+//    //         f9.hasTimestamp = true;
-count = 0;
+//    //         f9.timestamp = f4.timestamp;
-string oldChord = "";
+//    //
-for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
+//    //         int startIndex = max(count - musicitykernelwidth/2,0);
-Feature f6 = *it;
+//    //         int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
-Feature f7; // chord estimate
+//    //         int musicityCount = 0;
-f7.hasTimestamp = true;
+//    //         for (int i = startIndex; i <= endIndex; i++) {
-f7.timestamp = f6.timestamp;
+//    //             if (musicityValue[i] > musicityThreshold) musicityCount++;
-Feature f8; // chord estimate
+//    //         }
-f8.hasTimestamp = true;
+//    //         bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
-f8.timestamp = f6.timestamp;
+//    //
+//    //         if (isSpeech) {
-vector<int> chordCount = vector<int>(nChord,0);
+//    //             if (oldlabeltype != 2) {
-int maxChordCount = 0;
+//    //                 f9.label = "Speech";
-int maxChordIndex = nChord-1;
+//    //                 fsOut[9].push_back(f9);
-string maxChord;
+//    //                 oldlabeltype = 2;
-int startIndex = max(count - halfwindowlength/2,0);
+//    //             }
-int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
+//    //         } else {
-for (int i = startIndex; i < endIndex; i++) {
+//    //             if (oldlabeltype != 1) {
-chordCount[chordSequence[i]]++;
+//    //                 f9.label = "Music";
-if (chordCount[chordSequence[i]] > maxChordCount) {
+//    //                 fsOut[9].push_back(f9);
-// cerr << "start index " << startIndex << endl;
+//    //                 oldlabeltype = 1;
-maxChordCount++;
+//    //             }
-maxChordIndex = chordSequence[i];
+//    //         }
-maxChord = m_chordnames[maxChordIndex];
+//    //         f8.values.push_back(musicityValue[count]);
-}
+//    //         fsOut[8].push_back(f8);
-}
+//    //         count++;
-// chordSequence[count] = maxChordIndex;
+//    //      }
-// cerr << maxChordIndex << endl;
-f8.values.push_back(chordchange[count]/(halfwindowlength*2));
-// cerr << chordchange[count] << endl;
-fsOut[9].push_back(f8);
-if (oldChord != maxChord) {
-oldChord = maxChord;
-// char buffer1 [50];
-// if (maxChordIndex < nChord - 1) {
-//     sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
-// } else {
-//     sprintf(buffer1, "N");
-// }
-// f7.label = buffer1;
-f7.label = m_chordnames[maxChordIndex];
-fsOut[7].push_back(f7);
-}
-count++;
-}
-Feature f7; // last chord estimate
-f7.hasTimestamp = true;
-f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
-f7.label = "N";
-fsOut[7].push_back(f7);
-cerr << "done." << endl;
-//     // musicity
-//     count = 0;
-//     int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
-//     vector<float> musicityValue;
-//     for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
-//         Feature f4 = *it;
-//
-//         int startIndex = max(count - musicitykernelwidth/2,0);
-//         int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
-//         float chromasum = 0;
-//         float diffsum = 0;
-//         for (int k = 0; k < 12; k++) {
-//             for (int i = startIndex + 1; i < endIndex; i++) {
-//                 chromasum += pow(fsOut[4][i].values[k],2);
-//                 diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
-//             }
-//         }
-//         diffsum /= chromasum;
-//         musicityValue.push_back(diffsum);
-//         count++;
-//     }
-//
-//     float musicityThreshold = 0.44;
-//     if (m_stepSize == 4096) {
-//         musicityThreshold = 0.74;
-//     }
-//     if (m_stepSize == 4410) {
-//         musicityThreshold = 0.77;
-//     }
-//
-//     count = 0;
-//     for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
-//         Feature f4 = *it;
-//         Feature f8; // musicity
-//         Feature f9; // musicity segmenter
-//
-//         f8.hasTimestamp = true;
-//         f8.timestamp = f4.timestamp;
-//         f9.hasTimestamp = true;
-//         f9.timestamp = f4.timestamp;
-//
-//         int startIndex = max(count - musicitykernelwidth/2,0);
-//         int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
-//         int musicityCount = 0;
-//         for (int i = startIndex; i <= endIndex; i++) {
-//             if (musicityValue[i] > musicityThreshold) musicityCount++;
-//         }
-//         bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
-//
-//         if (isSpeech) {
-//             if (oldlabeltype != 2) {
-//                 f9.label = "Speech";
-//                 fsOut[9].push_back(f9);
-//                 oldlabeltype = 2;
-//             }
-//         } else {
-//             if (oldlabeltype != 1) {
-//                 f9.label = "Music";
-//                 fsOut[9].push_back(f9);
-//                 oldlabeltype = 1;
-//             }
-//         }
-//         f8.values.push_back(musicityValue[count]);
-//         fsOut[8].push_back(f8);
-//         count++;
-//      }
 return fsOut;
 }
 #endif

Mercurial > hg > nnls-chroma

comparison NNLSBase.cpp @ 81:4270f3039ab0 matthiasm-plugin