Chris@23: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ matthiasm@0: Chris@35: /* Chris@35: NNLS-Chroma / Chordino Chris@35: Chris@35: Audio feature extraction plugins for chromagram and chord Chris@35: estimation. Chris@35: Chris@35: Centre for Digital Music, Queen Mary University of London. Chris@35: This file copyright 2008-2010 Matthias Mauch and QMUL. Chris@35: Chris@35: This program is free software; you can redistribute it and/or Chris@35: modify it under the terms of the GNU General Public License as Chris@35: published by the Free Software Foundation; either version 2 of the Chris@35: License, or (at your option) any later version. See the file Chris@35: COPYING included with this distribution for more information. Chris@35: */ Chris@35: Chris@35: #include "Chordino.h" Chris@27: Chris@27: #include "chromamethods.h" matthiasm@43: #include "viterbi.h" Chris@27: Chris@27: #include Chris@27: #include matthiasm@0: #include matthiasm@9: Chris@27: #include matthiasm@0: matthiasm@0: const bool debug_on = false; matthiasm@0: Chris@35: Chordino::Chordino(float inputSampleRate) : matthiasm@86: NNLSBase(inputSampleRate), matthiasm@86: m_chorddict(0), matthiasm@86: m_chordnotes(0), matthiasm@86: m_chordnames(0) matthiasm@0: { Chris@35: if (debug_on) cerr << "--> Chordino" << endl; matthiasm@0: } matthiasm@0: Chris@35: Chordino::~Chordino() matthiasm@0: { Chris@35: if (debug_on) cerr << "--> ~Chordino" << endl; matthiasm@0: } matthiasm@0: matthiasm@0: string Chris@35: Chordino::getIdentifier() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getIdentifier" << endl; Chris@35: return "chordino"; matthiasm@0: } matthiasm@0: matthiasm@0: string Chris@35: Chordino::getName() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getName" << endl; Chris@35: return "Chordino"; matthiasm@0: } matthiasm@0: matthiasm@0: string Chris@35: Chordino::getDescription() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getDescription" << endl; matthiasm@58: return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach."; matthiasm@0: } matthiasm@0: matthiasm@50: Chordino::ParameterList matthiasm@50: Chordino::getParameterDescriptors() const matthiasm@50: { matthiasm@50: if (debug_on) cerr << "--> getParameterDescriptors" << endl; matthiasm@50: ParameterList list; matthiasm@50: mail@118: ParameterDescriptor useNNLSParam; mail@118: useNNLSParam.identifier = "useNNLS"; mail@118: useNNLSParam.name = "use approximate transcription (NNLS)"; mail@118: useNNLSParam.description = "Toggles approximate transcription (NNLS)."; mail@118: useNNLSParam.unit = ""; mail@118: useNNLSParam.minValue = 0.0; mail@118: useNNLSParam.maxValue = 1.0; mail@118: useNNLSParam.defaultValue = 1.0; mail@118: useNNLSParam.isQuantized = true; mail@118: useNNLSParam.quantizeStep = 1.0; mail@118: list.push_back(useNNLSParam); matthiasm@50: mail@118: ParameterDescriptor useHMMParam; mail@118: useHMMParam.identifier = "useHMM"; mail@118: useHMMParam.name = "HMM (Viterbi decoding)"; mail@118: useHMMParam.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used)."; mail@118: useHMMParam.unit = ""; mail@118: useHMMParam.minValue = 0.0; mail@118: useHMMParam.maxValue = 1.0; mail@118: useHMMParam.defaultValue = 1.0; mail@118: useHMMParam.isQuantized = true; mail@118: useHMMParam.quantizeStep = 1.0; mail@118: list.push_back(useHMMParam); matthiasm@50: mail@118: ParameterDescriptor rollonParam; mail@118: rollonParam.identifier = "rollon"; mail@118: rollonParam.name = "bass noise threshold"; mail@118: rollonParam.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed."; mail@118: rollonParam.unit = "%"; mail@118: rollonParam.minValue = 0; mail@118: rollonParam.maxValue = 5; mail@118: rollonParam.defaultValue = 0.0; mail@118: rollonParam.isQuantized = true; mail@118: rollonParam.quantizeStep = 0.5; mail@118: list.push_back(rollonParam); matthiasm@50: mail@118: ParameterDescriptor tuningmodeParam; mail@118: tuningmodeParam.identifier = "tuningmode"; mail@118: tuningmodeParam.name = "tuning mode"; mail@118: tuningmodeParam.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing."; mail@118: tuningmodeParam.unit = ""; mail@118: tuningmodeParam.minValue = 0; mail@118: tuningmodeParam.maxValue = 1; mail@118: tuningmodeParam.defaultValue = 0.0; mail@118: tuningmodeParam.isQuantized = true; mail@118: tuningmodeParam.valueNames.push_back("global tuning"); mail@118: tuningmodeParam.valueNames.push_back("local tuning"); mail@118: tuningmodeParam.quantizeStep = 1.0; mail@118: list.push_back(tuningmodeParam); matthiasm@50: mail@118: ParameterDescriptor whiteningParam; mail@118: whiteningParam.identifier = "whitening"; mail@118: whiteningParam.name = "spectral whitening"; mail@118: whiteningParam.description = "Spectral whitening: no whitening - 0; whitening - 1."; mail@118: whiteningParam.unit = ""; mail@118: whiteningParam.isQuantized = true; mail@118: whiteningParam.minValue = 0.0; mail@118: whiteningParam.maxValue = 1.0; mail@118: whiteningParam.defaultValue = 1.0; mail@118: whiteningParam.isQuantized = false; mail@118: list.push_back(whiteningParam); matthiasm@50: mail@118: ParameterDescriptor spectralShapeParam; mail@118: spectralShapeParam.identifier = "spectralshape"; mail@118: spectralShapeParam.name = "spectral shape"; mail@118: spectralShapeParam.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics."; mail@118: spectralShapeParam.unit = ""; mail@118: spectralShapeParam.minValue = 0.5; mail@118: spectralShapeParam.maxValue = 0.9; mail@118: spectralShapeParam.defaultValue = 0.7; mail@118: spectralShapeParam.isQuantized = false; mail@118: list.push_back(spectralShapeParam); matthiasm@50: mail@118: ParameterDescriptor boostnParam; mail@118: boostnParam.identifier = "boostn"; mail@118: boostnParam.name = "boost N"; mail@118: boostnParam.description = "Boost likelihood of the N (no chord) label."; mail@118: boostnParam.unit = ""; mail@118: boostnParam.minValue = 0.0; mail@118: boostnParam.maxValue = 1.0; mail@118: boostnParam.defaultValue = 0.1; mail@118: boostnParam.isQuantized = false; mail@118: list.push_back(boostnParam); matthiasm@50: mail@118: ParameterDescriptor usehartesyntaxParam; mail@118: usehartesyntaxParam.identifier = "usehartesyntax"; mail@118: usehartesyntaxParam.name = "use Harte syntax"; mail@118: usehartesyntaxParam.description = "Use the chord syntax proposed by Harte"; mail@118: usehartesyntaxParam.unit = ""; mail@118: usehartesyntaxParam.minValue = 0.0; mail@118: usehartesyntaxParam.maxValue = 1.0; mail@118: usehartesyntaxParam.defaultValue = 0.0; mail@118: usehartesyntaxParam.isQuantized = true; mail@118: usehartesyntaxParam.quantizeStep = 1.0; mail@118: usehartesyntaxParam.valueNames.push_back("no"); mail@118: usehartesyntaxParam.valueNames.push_back("yes"); mail@118: list.push_back(usehartesyntaxParam); mail@112: matthiasm@50: return list; matthiasm@50: } matthiasm@50: Chris@35: Chordino::OutputList Chris@35: Chordino::getOutputDescriptors() const matthiasm@0: { Chris@23: if (debug_on) cerr << "--> getOutputDescriptors" << endl; matthiasm@0: OutputList list; matthiasm@0: Chris@35: int index = 0; matthiasm@0: matthiasm@0: OutputDescriptor d7; matthiasm@0: d7.identifier = "simplechord"; Chris@36: d7.name = "Chord Estimate"; matthiasm@58: d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach."; matthiasm@0: d7.unit = ""; matthiasm@0: d7.hasFixedBinCount = true; matthiasm@0: d7.binCount = 0; matthiasm@0: d7.hasKnownExtents = false; matthiasm@0: d7.isQuantized = false; matthiasm@0: d7.sampleType = OutputDescriptor::VariableSampleRate; matthiasm@0: d7.hasDuration = false; matthiasm@0: d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@0: list.push_back(d7); Chris@35: m_outputChords = index++; matthiasm@0: matthiasm@86: OutputDescriptor chordnotes; matthiasm@86: chordnotes.identifier = "chordnotes"; matthiasm@86: chordnotes.name = "Note Representation of Chord Estimate"; matthiasm@86: chordnotes.description = "A simple represenation of the estimated chord with bass note (if applicable) and chord notes."; matthiasm@86: chordnotes.unit = "MIDI units"; matthiasm@86: chordnotes.hasFixedBinCount = true; matthiasm@86: chordnotes.binCount = 1; matthiasm@86: chordnotes.hasKnownExtents = true; matthiasm@86: chordnotes.minValue = 0; matthiasm@86: chordnotes.maxValue = 127; matthiasm@86: chordnotes.isQuantized = true; matthiasm@86: chordnotes.quantizeStep = 1; matthiasm@86: chordnotes.sampleType = OutputDescriptor::VariableSampleRate; matthiasm@86: chordnotes.hasDuration = true; matthiasm@86: chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; matthiasm@86: list.push_back(chordnotes); matthiasm@86: m_outputChordnotes = index++; matthiasm@86: Chris@23: OutputDescriptor d8; mail@60: d8.identifier = "harmonicchange"; Chris@36: d8.name = "Harmonic Change Value"; matthiasm@58: d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate."; matthiasm@17: d8.unit = ""; matthiasm@17: d8.hasFixedBinCount = true; matthiasm@17: d8.binCount = 1; mail@60: d8.hasKnownExtents = false; matthiasm@17: d8.isQuantized = false; matthiasm@17: d8.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@17: d8.hasDuration = false; matthiasm@17: list.push_back(d8); Chris@35: m_outputHarmonicChange = index++; matthiasm@1: matthiasm@107: OutputDescriptor loglikelihood; matthiasm@107: loglikelihood.identifier = "loglikelihood"; mail@124: loglikelihood.name = "Simple Chord Log-likelihood"; mail@124: loglikelihood.description = "Logarithm of the likelihood value of the simple chord estimate."; matthiasm@107: loglikelihood.unit = ""; matthiasm@107: loglikelihood.hasFixedBinCount = true; matthiasm@107: loglikelihood.binCount = 1; matthiasm@107: loglikelihood.hasKnownExtents = false; matthiasm@107: loglikelihood.isQuantized = false; matthiasm@107: loglikelihood.sampleType = OutputDescriptor::FixedSampleRate; matthiasm@107: loglikelihood.hasDuration = false; matthiasm@107: list.push_back(loglikelihood); matthiasm@107: m_outputLoglikelihood = index++; matthiasm@106: matthiasm@0: return list; matthiasm@0: } matthiasm@0: matthiasm@0: bool Chris@35: Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize) matthiasm@0: { Chris@23: if (debug_on) { Chris@23: cerr << "--> initialise"; Chris@23: } mail@76: Chris@35: if (!NNLSBase::initialise(channels, stepSize, blockSize)) { Chris@35: return false; Chris@35: } mail@115: m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN, m_harte_syntax); matthiasm@0: return true; matthiasm@0: } matthiasm@0: matthiasm@0: void Chris@35: Chordino::reset() matthiasm@0: { Chris@23: if (debug_on) cerr << "--> reset"; Chris@35: NNLSBase::reset(); matthiasm@0: } matthiasm@0: Chris@35: Chordino::FeatureSet Chris@35: Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp) matthiasm@0: { Chris@23: if (debug_on) cerr << "--> process" << endl; matthiasm@0: Chris@35: NNLSBase::baseProcess(inputBuffers, timestamp); matthiasm@0: Chris@35: return FeatureSet(); matthiasm@0: } matthiasm@0: Chris@35: Chordino::FeatureSet Chris@35: Chordino::getRemainingFeatures() matthiasm@0: { mail@89: // cerr << hw[0] << hw[1] << endl; mail@89: if (debug_on) cerr << "--> getRemainingFeatures" << endl; Chris@23: FeatureSet fsOut; Chris@35: if (m_logSpectrum.size() == 0) return fsOut; Chris@23: int nChord = m_chordnames.size(); Chris@23: // Chris@23: /** Calculate Tuning Chris@23: calculate tuning from (using the angle of the complex number defined by the Chris@23: cumulative mean real and imag values) Chris@23: **/ mail@80: float meanTuningImag = 0; mail@80: float meanTuningReal = 0; mail@80: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { mail@80: meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS]; mail@80: meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS]; mail@80: } Chris@23: float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); Chris@23: float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); Chris@23: int intShift = floor(normalisedtuning * 3); mail@80: float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this matthiasm@1: Chris@23: char buffer0 [50]; matthiasm@1: Chris@23: sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); matthiasm@1: matthiasm@1: Chris@23: /** Tune Log-Frequency Spectrogram matthiasm@43: calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to Chris@91: perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum). Chris@23: **/ Chris@35: cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... "; matthiasm@13: Chris@23: int count = 0; matthiasm@1: Chris@35: FeatureList tunedSpec; matthiasm@43: int nFrame = m_logSpectrum.size(); matthiasm@43: matthiasm@43: vector timestamps; Chris@35: Chris@35: for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) { Chris@91: Feature currentLogSpectrum = *i; matthiasm@43: Feature currentTunedSpec; // tuned log-frequency spectrum matthiasm@43: currentTunedSpec.hasTimestamp = true; Chris@91: currentTunedSpec.timestamp = currentLogSpectrum.timestamp; Chris@91: timestamps.push_back(currentLogSpectrum.timestamp); matthiasm@43: currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero matthiasm@1: Chris@23: if (m_tuneLocal) { Chris@23: intShift = floor(m_localTuning[count] * 3); mail@80: floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this Chris@23: } matthiasm@1: mail@80: // cerr << intShift << " " << floatShift << endl; matthiasm@1: Chris@91: for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins mail@115: float tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift; matthiasm@43: currentTunedSpec.values.push_back(tempValue); Chris@23: } matthiasm@1: matthiasm@43: currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge matthiasm@43: vector runningmean = SpecialConvolution(currentTunedSpec.values,hw); Chris@23: vector runningstd; mail@77: for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance) matthiasm@43: runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i])); Chris@23: } Chris@23: runningstd = SpecialConvolution(runningstd,hw); // second step convolve mail@77: for (int i = 0; i < nNote; i++) { Chris@23: runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std Chris@23: if (runningstd[i] > 0) { matthiasm@43: // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ? matthiasm@43: // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0; matthiasm@43: currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ? matthiasm@43: (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0; Chris@23: } matthiasm@43: if (currentTunedSpec.values[i] < 0) { Chris@23: cerr << "ERROR: negative value in logfreq spectrum" << endl; Chris@23: } Chris@23: } matthiasm@43: tunedSpec.push_back(currentTunedSpec); Chris@23: count++; Chris@23: } Chris@23: cerr << "done." << endl; matthiasm@1: Chris@23: /** Semitone spectrum and chromagrams Chris@23: Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum Chris@23: is inferred using a non-negative least squares algorithm. Chris@23: Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means Chris@23: bass and treble stacked onto each other). Chris@23: **/ matthiasm@42: if (m_useNNLS == 0) { Chris@35: cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... "; Chris@23: } else { Chris@35: cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... "; Chris@23: } matthiasm@13: matthiasm@1: matthiasm@43: vector > chordogram; Chris@23: vector > scoreChordogram; Chris@35: vector chordchange = vector(tunedSpec.size(),0); Chris@23: count = 0; matthiasm@9: Chris@35: FeatureList chromaList; matthiasm@43: matthiasm@43: Chris@35: Chris@35: for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) { matthiasm@43: Feature currentTunedSpec = *it; // logfreq spectrum matthiasm@43: Feature currentChromas; // treble and bass chromagram Chris@35: matthiasm@43: currentChromas.hasTimestamp = true; matthiasm@43: currentChromas.timestamp = currentTunedSpec.timestamp; Chris@35: mail@77: float b[nNote]; matthiasm@1: Chris@23: bool some_b_greater_zero = false; Chris@23: float sumb = 0; mail@77: for (int i = 0; i < nNote; i++) { mail@77: // b[i] = m_dict[(nNote * count + i) % (nNote * 84)]; matthiasm@43: b[i] = currentTunedSpec.values[i]; Chris@23: sumb += b[i]; Chris@23: if (b[i] > 0) { Chris@23: some_b_greater_zero = true; Chris@23: } Chris@23: } matthiasm@1: Chris@23: // here's where the non-negative least squares algorithm calculates the note activation x matthiasm@1: Chris@23: vector chroma = vector(12, 0); Chris@23: vector basschroma = vector(12, 0); Chris@23: float currval; Chris@91: int iSemitone = 0; matthiasm@1: Chris@23: if (some_b_greater_zero) { matthiasm@42: if (m_useNNLS == 0) { Chris@91: for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { Chris@23: currval = 0; mail@81: for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { mail@81: currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1))); mail@81: } Chris@23: chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; Chris@23: basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; Chris@23: iSemitone++; Chris@23: } matthiasm@1: Chris@23: } else { Chris@35: float x[84+1000]; Chris@23: for (int i = 1; i < 1084; ++i) x[i] = 1.0; Chris@23: vector signifIndex; Chris@23: int index=0; Chris@23: sumb /= 84.0; Chris@91: for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { Chris@23: float currval = 0; mail@81: for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { mail@81: currval += b[iNote + iBPS]; mail@81: } Chris@23: if (currval > 0) signifIndex.push_back(index); Chris@23: index++; Chris@23: } Chris@35: float rnorm; Chris@35: float w[84+1000]; Chris@35: float zz[84+1000]; Chris@23: int indx[84+1000]; Chris@23: int mode; mail@77: int dictsize = nNote*signifIndex.size(); mail@81: // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl; Chris@35: float *curr_dict = new float[dictsize]; Chris@91: for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) { Chris@91: for (int iBin = 0; iBin < nNote; iBin++) { mail@77: curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin]; Chris@23: } Chris@23: } Chris@35: nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode); Chris@23: delete [] curr_dict; Chris@91: for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) { Chris@23: // cerr << mode << endl; Chris@23: chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; Chris@23: basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; Chris@23: } Chris@23: } Chris@23: } Chris@35: Chris@35: vector origchroma = chroma; Chris@23: chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas matthiasm@43: currentChromas.values = chroma; Chris@35: Chris@23: if (m_doNormalizeChroma > 0) { Chris@23: vector chromanorm = vector(3,0); Chris@23: switch (int(m_doNormalizeChroma)) { Chris@23: case 0: // should never end up here Chris@23: break; Chris@23: case 1: Chris@35: chromanorm[0] = *max_element(origchroma.begin(), origchroma.end()); Chris@35: chromanorm[1] = *max_element(basschroma.begin(), basschroma.end()); Chris@23: chromanorm[2] = max(chromanorm[0], chromanorm[1]); Chris@23: break; Chris@23: case 2: Chris@35: for (vector::iterator it = chroma.begin(); it != chroma.end(); ++it) { Chris@23: chromanorm[2] += *it; Chris@23: } Chris@23: break; Chris@23: case 3: Chris@35: for (vector::iterator it = chroma.begin(); it != chroma.end(); ++it) { Chris@23: chromanorm[2] += pow(*it,2); Chris@23: } Chris@23: chromanorm[2] = sqrt(chromanorm[2]); Chris@23: break; Chris@23: } Chris@23: if (chromanorm[2] > 0) { Chris@91: for (int i = 0; i < (int)chroma.size(); i++) { matthiasm@43: currentChromas.values[i] /= chromanorm[2]; Chris@23: } Chris@23: } Chris@23: } Chris@35: matthiasm@43: chromaList.push_back(currentChromas); Chris@35: Chris@23: // local chord estimation matthiasm@43: vector currentChordSalience; matthiasm@43: double tempchordvalue = 0; matthiasm@43: double sumchordvalue = 0; matthiasm@9: Chris@23: for (int iChord = 0; iChord < nChord; iChord++) { Chris@23: tempchordvalue = 0; Chris@23: for (int iBin = 0; iBin < 12; iBin++) { matthiasm@44: tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; Chris@23: } Chris@23: for (int iBin = 12; iBin < 24; iBin++) { Chris@23: tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; Chris@23: } matthiasm@48: if (iChord == nChord-1) tempchordvalue *= .7; matthiasm@48: if (tempchordvalue < 0) tempchordvalue = 0.0; matthiasm@50: tempchordvalue = pow(1.3,tempchordvalue); Chris@23: sumchordvalue+=tempchordvalue; Chris@23: currentChordSalience.push_back(tempchordvalue); Chris@23: } Chris@23: if (sumchordvalue > 0) { Chris@23: for (int iChord = 0; iChord < nChord; iChord++) { Chris@23: currentChordSalience[iChord] /= sumchordvalue; Chris@23: } Chris@23: } else { Chris@23: currentChordSalience[nChord-1] = 1.0; Chris@23: } Chris@23: chordogram.push_back(currentChordSalience); matthiasm@1: Chris@23: count++; Chris@23: } Chris@23: cerr << "done." << endl; matthiasm@13: matthiasm@86: vector oldnotes; matthiasm@10: matthiasm@50: // bool m_useHMM = true; // this will go into the chordino header file. matthiasm@50: if (m_useHMM == 1.0) { matthiasm@44: cerr << "[Chordino Plugin] HMM Chord Estimation ... "; matthiasm@43: int oldchord = nChord-1; matthiasm@48: double selftransprob = 0.99; matthiasm@43: matthiasm@48: // vector init = vector(nChord,1.0/nChord); matthiasm@48: vector init = vector(nChord,0); init[nChord-1] = 1; matthiasm@48: matthiasm@50: double *delta; matthiasm@50: delta = (double *)malloc(sizeof(double)*nFrame*nChord); matthiasm@50: matthiasm@43: vector > trans; matthiasm@43: for (int iChord = 0; iChord < nChord; iChord++) { matthiasm@43: vector temp = vector(nChord,(1-selftransprob)/(nChord-1)); matthiasm@43: temp[iChord] = selftransprob; matthiasm@43: trans.push_back(temp); matthiasm@43: } matthiasm@106: vector scale; matthiasm@106: vector chordpath = ViterbiPath(init, trans, chordogram, delta, &scale); matthiasm@106: matthiasm@48: matthiasm@48: Feature chord_feature; // chord estimate matthiasm@48: chord_feature.hasTimestamp = true; matthiasm@48: chord_feature.timestamp = timestamps[0]; matthiasm@48: chord_feature.label = m_chordnames[chordpath[0]]; mail@60: fsOut[m_outputChords].push_back(chord_feature); matthiasm@43: mail@60: chordchange[0] = 0; Chris@91: for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) { matthiasm@43: // cerr << chordpath[iFrame] << endl; matthiasm@48: if (chordpath[iFrame] != oldchord ) { matthiasm@86: // chord matthiasm@43: Feature chord_feature; // chord estimate matthiasm@43: chord_feature.hasTimestamp = true; matthiasm@43: chord_feature.timestamp = timestamps[iFrame]; matthiasm@43: chord_feature.label = m_chordnames[chordpath[iFrame]]; mail@60: fsOut[m_outputChords].push_back(chord_feature); matthiasm@43: oldchord = chordpath[iFrame]; matthiasm@86: // chord notes Chris@91: for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord matthiasm@86: oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame]; matthiasm@86: fsOut[m_outputChordnotes].push_back(oldnotes[iNote]); matthiasm@86: } matthiasm@86: oldnotes.clear(); Chris@91: for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord matthiasm@86: Feature chordnote_feature; matthiasm@86: chordnote_feature.hasTimestamp = true; matthiasm@86: chordnote_feature.timestamp = timestamps[iFrame]; matthiasm@86: chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]); matthiasm@86: chordnote_feature.hasDuration = true; matthiasm@86: chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord matthiasm@86: oldnotes.push_back(chordnote_feature); matthiasm@86: } Chris@23: } matthiasm@50: /* calculating simple chord change prob */ matthiasm@50: for (int iChord = 0; iChord < nChord; iChord++) { matthiasm@50: chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]); matthiasm@50: } Chris@23: } matthiasm@43: matthiasm@106: float logscale = 0; matthiasm@106: for (int iFrame = 0; iFrame < nFrame; ++iFrame) { matthiasm@106: logscale -= log(scale[iFrame]); matthiasm@106: Feature loglikelihood; matthiasm@106: loglikelihood.hasTimestamp = true; matthiasm@106: loglikelihood.timestamp = timestamps[iFrame]; matthiasm@106: loglikelihood.values.push_back(-log(scale[iFrame])); matthiasm@106: // cerr << chordchange[iFrame] << endl; matthiasm@107: fsOut[m_outputLoglikelihood].push_back(loglikelihood); matthiasm@106: } matthiasm@106: logscale /= nFrame; mail@111: // cerr << "loglik" << logscale << endl; matthiasm@106: matthiasm@106: matthiasm@43: // cerr << chordpath[0] << endl; matthiasm@43: } else { matthiasm@43: /* Simple chord estimation matthiasm@43: I just take the local chord estimates ("currentChordSalience") and average them over time, then matthiasm@43: take the maximum. Very simple, don't do this at home... matthiasm@43: */ matthiasm@44: cerr << "[Chordino Plugin] Simple Chord Estimation ... "; matthiasm@43: count = 0; matthiasm@43: int halfwindowlength = m_inputSampleRate / m_stepSize; matthiasm@43: vector chordSequence; matthiasm@43: for (vector::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram matthiasm@43: vector temp = vector(nChord,0); matthiasm@43: scoreChordogram.push_back(temp); matthiasm@43: } matthiasm@43: for (vector::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) { matthiasm@43: int startIndex = count + 1; matthiasm@43: int endIndex = count + 2 * halfwindowlength; matthiasm@43: matthiasm@43: float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1); matthiasm@43: matthiasm@43: vector chordCandidates; Chris@91: for (int iChord = 0; iChord+1 < nChord; iChord++) { matthiasm@43: // float currsum = 0; Chris@91: // for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) { matthiasm@43: // currsum += chordogram[iFrame][iChord]; matthiasm@43: // } matthiasm@43: // if (currsum > chordThreshold) chordCandidates.push_back(iChord); Chris@91: for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) { matthiasm@43: if (chordogram[iFrame][iChord] > chordThreshold) { matthiasm@43: chordCandidates.push_back(iChord); matthiasm@43: break; matthiasm@43: } Chris@23: } Chris@23: } matthiasm@43: chordCandidates.push_back(nChord-1); matthiasm@43: // cerr << chordCandidates.size() << endl; matthiasm@43: matthiasm@43: float maxval = 0; // will be the value of the most salient *chord change* in this frame matthiasm@43: float maxindex = 0; //... and the index thereof Chris@91: int bestchordL = nChord-1; // index of the best "left" chord Chris@91: int bestchordR = nChord-1; // index of the best "right" chord matthiasm@43: matthiasm@43: for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { matthiasm@43: // now find the max values on both sides of iWF matthiasm@43: // left side: matthiasm@43: float maxL = 0; Chris@91: int maxindL = nChord-1; Chris@91: for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) { Chris@91: int iChord = chordCandidates[kChord]; matthiasm@43: float currsum = 0; Chris@91: for (int iFrame = 0; iFrame < iWF-1; ++iFrame) { matthiasm@43: currsum += chordogram[count+iFrame][iChord]; matthiasm@43: } matthiasm@43: if (iChord == nChord-1) currsum *= 0.8; matthiasm@43: if (currsum > maxL) { matthiasm@43: maxL = currsum; matthiasm@43: maxindL = iChord; matthiasm@43: } matthiasm@43: } matthiasm@43: // right side: matthiasm@43: float maxR = 0; Chris@91: int maxindR = nChord-1; Chris@91: for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) { Chris@91: int iChord = chordCandidates[kChord]; matthiasm@43: float currsum = 0; Chris@91: for (int iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) { matthiasm@43: currsum += chordogram[count+iFrame][iChord]; matthiasm@43: } matthiasm@43: if (iChord == nChord-1) currsum *= 0.8; matthiasm@43: if (currsum > maxR) { matthiasm@43: maxR = currsum; matthiasm@43: maxindR = iChord; matthiasm@43: } matthiasm@43: } matthiasm@43: if (maxL+maxR > maxval) { matthiasm@43: maxval = maxL+maxR; matthiasm@43: maxindex = iWF; matthiasm@43: bestchordL = maxindL; matthiasm@43: bestchordR = maxindR; matthiasm@43: } matthiasm@43: Chris@23: } matthiasm@43: // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl; matthiasm@43: // add a score to every chord-frame-point that was part of a maximum Chris@91: for (int iFrame = 0; iFrame < maxindex-1; ++iFrame) { matthiasm@43: scoreChordogram[iFrame+count][bestchordL]++; matthiasm@43: } Chris@91: for (int iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { matthiasm@43: scoreChordogram[iFrame+count][bestchordR]++; matthiasm@43: } matthiasm@50: if (bestchordL != bestchordR) { matthiasm@50: chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; matthiasm@50: } matthiasm@43: count++; Chris@23: } matthiasm@43: // cerr << "******* agent finished *******" << endl; matthiasm@43: count = 0; matthiasm@43: for (vector::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { matthiasm@43: float maxval = 0; // will be the value of the most salient chord in this frame matthiasm@43: float maxindex = 0; //... and the index thereof Chris@91: for (int iChord = 0; iChord < nChord; iChord++) { matthiasm@43: if (scoreChordogram[count][iChord] > maxval) { matthiasm@43: maxval = scoreChordogram[count][iChord]; matthiasm@43: maxindex = iChord; matthiasm@43: // cerr << iChord << endl; matthiasm@43: } matthiasm@43: } matthiasm@43: chordSequence.push_back(maxindex); matthiasm@43: count++; Chris@23: } matthiasm@43: matthiasm@43: matthiasm@43: // mode filter on chordSequence matthiasm@43: count = 0; matthiasm@43: string oldChord = ""; matthiasm@43: for (vector::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { matthiasm@43: Feature chord_feature; // chord estimate matthiasm@43: chord_feature.hasTimestamp = true; matthiasm@43: chord_feature.timestamp = *it; matthiasm@43: // Feature currentChord; // chord estimate matthiasm@43: // currentChord.hasTimestamp = true; matthiasm@43: // currentChord.timestamp = currentChromas.timestamp; matthiasm@43: matthiasm@43: vector chordCount = vector(nChord,0); matthiasm@43: int maxChordCount = 0; matthiasm@43: int maxChordIndex = nChord-1; matthiasm@43: string maxChord; matthiasm@43: int startIndex = max(count - halfwindowlength/2,0); matthiasm@43: int endIndex = min(int(chordogram.size()), count + halfwindowlength/2); matthiasm@43: for (int i = startIndex; i < endIndex; i++) { matthiasm@43: chordCount[chordSequence[i]]++; matthiasm@43: if (chordCount[chordSequence[i]] > maxChordCount) { matthiasm@43: // cerr << "start index " << startIndex << endl; matthiasm@43: maxChordCount++; matthiasm@43: maxChordIndex = chordSequence[i]; matthiasm@43: maxChord = m_chordnames[maxChordIndex]; matthiasm@43: } matthiasm@43: } matthiasm@43: // chordSequence[count] = maxChordIndex; matthiasm@43: // cerr << maxChordIndex << endl; matthiasm@50: // cerr << chordchange[count] << endl; matthiasm@43: if (oldChord != maxChord) { matthiasm@43: oldChord = maxChord; matthiasm@43: chord_feature.label = m_chordnames[maxChordIndex]; mail@60: fsOut[m_outputChords].push_back(chord_feature); Chris@91: for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord matthiasm@86: oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp; matthiasm@86: fsOut[m_outputChordnotes].push_back(oldnotes[iNote]); matthiasm@86: } matthiasm@86: oldnotes.clear(); Chris@91: for (int iNote = 0; iNote < (int)m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord matthiasm@86: Feature chordnote_feature; matthiasm@86: chordnote_feature.hasTimestamp = true; matthiasm@86: chordnote_feature.timestamp = chord_feature.timestamp; matthiasm@86: chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]); matthiasm@86: chordnote_feature.hasDuration = true; matthiasm@86: chordnote_feature.duration = -chord_feature.timestamp; // this will be corrected at the next chord matthiasm@86: oldnotes.push_back(chordnote_feature); matthiasm@86: } matthiasm@43: } matthiasm@43: count++; Chris@23: } Chris@23: } matthiasm@43: Feature chord_feature; // last chord estimate matthiasm@43: chord_feature.hasTimestamp = true; matthiasm@43: chord_feature.timestamp = timestamps[timestamps.size()-1]; matthiasm@43: chord_feature.label = "N"; mail@60: fsOut[m_outputChords].push_back(chord_feature); matthiasm@86: Chris@91: for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord matthiasm@86: oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1]; matthiasm@86: fsOut[m_outputChordnotes].push_back(oldnotes[iNote]); matthiasm@86: } matthiasm@86: Chris@23: cerr << "done." << endl; matthiasm@50: matthiasm@50: for (int iFrame = 0; iFrame < nFrame; iFrame++) { matthiasm@50: Feature chordchange_feature; matthiasm@50: chordchange_feature.hasTimestamp = true; matthiasm@50: chordchange_feature.timestamp = timestamps[iFrame]; matthiasm@50: chordchange_feature.values.push_back(chordchange[iFrame]); mail@60: // cerr << chordchange[iFrame] << endl; mail@60: fsOut[m_outputHarmonicChange].push_back(chordchange_feature); matthiasm@50: } matthiasm@50: mail@60: // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl; matthiasm@50: matthiasm@50: Chris@23: return fsOut; matthiasm@0: }