| Chris@23 | 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */ | 
| matthiasm@0 | 2 | 
| Chris@35 | 3 /* | 
| Chris@35 | 4   NNLS-Chroma / Chordino | 
| Chris@35 | 5 | 
| Chris@35 | 6   Audio feature extraction plugins for chromagram and chord | 
| Chris@35 | 7   estimation. | 
| Chris@35 | 8 | 
| Chris@35 | 9   Centre for Digital Music, Queen Mary University of London. | 
| Chris@35 | 10   This file copyright 2008-2010 Matthias Mauch and QMUL. | 
| Chris@35 | 11 | 
| Chris@35 | 12   This program is free software; you can redistribute it and/or | 
| Chris@35 | 13   modify it under the terms of the GNU General Public License as | 
| Chris@35 | 14   published by the Free Software Foundation; either version 2 of the | 
| Chris@35 | 15   License, or (at your option) any later version.  See the file | 
| Chris@35 | 16   COPYING included with this distribution for more information. | 
| Chris@35 | 17 */ | 
| Chris@35 | 18 | 
| Chris@35 | 19 #include "Chordino.h" | 
| Chris@27 | 20 | 
| Chris@27 | 21 #include "chromamethods.h" | 
| matthiasm@43 | 22 #include "viterbi.h" | 
| Chris@27 | 23 | 
| Chris@27 | 24 #include <cstdlib> | 
| Chris@27 | 25 #include <fstream> | 
| matthiasm@0 | 26 #include <cmath> | 
| matthiasm@9 | 27 | 
| Chris@27 | 28 #include <algorithm> | 
| matthiasm@0 | 29 | 
| matthiasm@0 | 30 const bool debug_on = false; | 
| matthiasm@0 | 31 | 
| Chris@27 | 32 const vector<float> hw(hammingwind, hammingwind+19); | 
| matthiasm@0 | 33 | 
| Chris@35 | 34 Chordino::Chordino(float inputSampleRate) : | 
| Chris@35 | 35     NNLSBase(inputSampleRate) | 
| matthiasm@0 | 36 { | 
| Chris@35 | 37     if (debug_on) cerr << "--> Chordino" << endl; | 
| matthiasm@0 | 38 } | 
| matthiasm@0 | 39 | 
| Chris@35 | 40 Chordino::~Chordino() | 
| matthiasm@0 | 41 { | 
| Chris@35 | 42     if (debug_on) cerr << "--> ~Chordino" << endl; | 
| matthiasm@0 | 43 } | 
| matthiasm@0 | 44 | 
| matthiasm@0 | 45 string | 
| Chris@35 | 46 Chordino::getIdentifier() const | 
| matthiasm@0 | 47 { | 
| Chris@23 | 48     if (debug_on) cerr << "--> getIdentifier" << endl; | 
| Chris@35 | 49     return "chordino"; | 
| matthiasm@0 | 50 } | 
| matthiasm@0 | 51 | 
| matthiasm@0 | 52 string | 
| Chris@35 | 53 Chordino::getName() const | 
| matthiasm@0 | 54 { | 
| Chris@23 | 55     if (debug_on) cerr << "--> getName" << endl; | 
| Chris@35 | 56     return "Chordino"; | 
| matthiasm@0 | 57 } | 
| matthiasm@0 | 58 | 
| matthiasm@0 | 59 string | 
| Chris@35 | 60 Chordino::getDescription() const | 
| matthiasm@0 | 61 { | 
| Chris@23 | 62     if (debug_on) cerr << "--> getDescription" << endl; | 
| matthiasm@13 | 63     return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate."; | 
| matthiasm@0 | 64 } | 
| matthiasm@0 | 65 | 
| Chris@35 | 66 Chordino::OutputList | 
| Chris@35 | 67 Chordino::getOutputDescriptors() const | 
| matthiasm@0 | 68 { | 
| Chris@23 | 69     if (debug_on) cerr << "--> getOutputDescriptors" << endl; | 
| matthiasm@0 | 70     OutputList list; | 
| matthiasm@0 | 71 | 
| Chris@35 | 72     int index = 0; | 
| matthiasm@0 | 73 | 
| matthiasm@0 | 74     OutputDescriptor d7; | 
| matthiasm@0 | 75     d7.identifier = "simplechord"; | 
| Chris@36 | 76     d7.name = "Chord Estimate"; | 
| matthiasm@0 | 77     d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma."; | 
| matthiasm@0 | 78     d7.unit = ""; | 
| matthiasm@0 | 79     d7.hasFixedBinCount = true; | 
| matthiasm@0 | 80     d7.binCount = 0; | 
| matthiasm@0 | 81     d7.hasKnownExtents = false; | 
| matthiasm@0 | 82     d7.isQuantized = false; | 
| matthiasm@0 | 83     d7.sampleType = OutputDescriptor::VariableSampleRate; | 
| matthiasm@0 | 84     d7.hasDuration = false; | 
| matthiasm@0 | 85     d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 
| matthiasm@0 | 86     list.push_back(d7); | 
| Chris@35 | 87     m_outputChords = index++; | 
| matthiasm@0 | 88 | 
| Chris@23 | 89     OutputDescriptor d8; | 
| matthiasm@17 | 90     d8.identifier = "harmonicchange"; | 
| Chris@36 | 91     d8.name = "Harmonic Change Value"; | 
| matthiasm@17 | 92     d8.description = "Harmonic change."; | 
| matthiasm@17 | 93     d8.unit = ""; | 
| matthiasm@17 | 94     d8.hasFixedBinCount = true; | 
| matthiasm@17 | 95     d8.binCount = 1; | 
| matthiasm@17 | 96     d8.hasKnownExtents = true; | 
| Chris@23 | 97     d8.minValue = 0.0; | 
| Chris@23 | 98     d8.maxValue = 0.999; | 
| matthiasm@17 | 99     d8.isQuantized = false; | 
| matthiasm@17 | 100     d8.sampleType = OutputDescriptor::FixedSampleRate; | 
| matthiasm@17 | 101     d8.hasDuration = false; | 
| matthiasm@17 | 102     // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 
| matthiasm@17 | 103     list.push_back(d8); | 
| Chris@35 | 104     m_outputHarmonicChange = index++; | 
| matthiasm@1 | 105 | 
| matthiasm@0 | 106     return list; | 
| matthiasm@0 | 107 } | 
| matthiasm@0 | 108 | 
| matthiasm@0 | 109 bool | 
| Chris@35 | 110 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize) | 
| matthiasm@0 | 111 { | 
| Chris@23 | 112     if (debug_on) { | 
| Chris@23 | 113         cerr << "--> initialise"; | 
| Chris@23 | 114     } | 
| matthiasm@1 | 115 | 
| Chris@35 | 116     if (!NNLSBase::initialise(channels, stepSize, blockSize)) { | 
| Chris@35 | 117         return false; | 
| Chris@35 | 118     } | 
| matthiasm@1 | 119 | 
| matthiasm@0 | 120     return true; | 
| matthiasm@0 | 121 } | 
| matthiasm@0 | 122 | 
| matthiasm@0 | 123 void | 
| Chris@35 | 124 Chordino::reset() | 
| matthiasm@0 | 125 { | 
| Chris@23 | 126     if (debug_on) cerr << "--> reset"; | 
| Chris@35 | 127     NNLSBase::reset(); | 
| matthiasm@0 | 128 } | 
| matthiasm@0 | 129 | 
| Chris@35 | 130 Chordino::FeatureSet | 
| Chris@35 | 131 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp) | 
| matthiasm@0 | 132 { | 
| Chris@23 | 133     if (debug_on) cerr << "--> process" << endl; | 
| matthiasm@0 | 134 | 
| Chris@35 | 135     NNLSBase::baseProcess(inputBuffers, timestamp); | 
| matthiasm@0 | 136 | 
| Chris@35 | 137     return FeatureSet(); | 
| matthiasm@0 | 138 } | 
| matthiasm@0 | 139 | 
| Chris@35 | 140 Chordino::FeatureSet | 
| Chris@35 | 141 Chordino::getRemainingFeatures() | 
| matthiasm@0 | 142 { | 
| Chris@23 | 143     if (debug_on) cerr << "--> getRemainingFeatures" << endl; | 
| Chris@23 | 144     FeatureSet fsOut; | 
| Chris@35 | 145     if (m_logSpectrum.size() == 0) return fsOut; | 
| Chris@23 | 146     int nChord = m_chordnames.size(); | 
| Chris@23 | 147     // | 
| Chris@23 | 148     /**  Calculate Tuning | 
| Chris@23 | 149          calculate tuning from (using the angle of the complex number defined by the | 
| Chris@23 | 150          cumulative mean real and imag values) | 
| Chris@23 | 151     **/ | 
| Chris@23 | 152     float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2; | 
| Chris@23 | 153     float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2; | 
| Chris@23 | 154     float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); | 
| Chris@23 | 155     float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); | 
| Chris@23 | 156     int intShift = floor(normalisedtuning * 3); | 
| Chris@23 | 157     float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this | 
| matthiasm@1 | 158 | 
| Chris@23 | 159     char buffer0 [50]; | 
| matthiasm@1 | 160 | 
| Chris@23 | 161     sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); | 
| matthiasm@1 | 162 | 
| matthiasm@1 | 163 | 
| Chris@23 | 164     /** Tune Log-Frequency Spectrogram | 
| matthiasm@43 | 165         calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to | 
| matthiasm@43 | 166         perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectum). | 
| Chris@23 | 167     **/ | 
| Chris@35 | 168     cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... "; | 
| matthiasm@13 | 169 | 
| Chris@23 | 170     float tempValue = 0; | 
| Chris@23 | 171     float dbThreshold = 0; // relative to the background spectrum | 
| Chris@23 | 172     float thresh = pow(10,dbThreshold/20); | 
| Chris@23 | 173     // cerr << "tune local ? " << m_tuneLocal << endl; | 
| Chris@23 | 174     int count = 0; | 
| matthiasm@1 | 175 | 
| Chris@35 | 176     FeatureList tunedSpec; | 
| matthiasm@43 | 177     int nFrame = m_logSpectrum.size(); | 
| matthiasm@43 | 178 | 
| matthiasm@43 | 179     vector<Vamp::RealTime> timestamps; | 
| Chris@35 | 180 | 
| Chris@35 | 181     for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) { | 
| matthiasm@43 | 182         Feature currentLogSpectum = *i; | 
| matthiasm@43 | 183         Feature currentTunedSpec; // tuned log-frequency spectrum | 
| matthiasm@43 | 184         currentTunedSpec.hasTimestamp = true; | 
| matthiasm@43 | 185         currentTunedSpec.timestamp = currentLogSpectum.timestamp; | 
| matthiasm@43 | 186         timestamps.push_back(currentLogSpectum.timestamp); | 
| matthiasm@43 | 187         currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero | 
| matthiasm@1 | 188 | 
| Chris@23 | 189         if (m_tuneLocal) { | 
| Chris@23 | 190             intShift = floor(m_localTuning[count] * 3); | 
| Chris@23 | 191             intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this | 
| Chris@23 | 192         } | 
| matthiasm@1 | 193 | 
| Chris@23 | 194         // cerr << intShift << " " << intFactor << endl; | 
| matthiasm@1 | 195 | 
| matthiasm@43 | 196         for (unsigned k = 2; k < currentLogSpectum.values.size() - 3; ++k) { // interpolate all inner bins | 
| matthiasm@43 | 197             tempValue = currentLogSpectum.values[k + intShift] * (1-intFactor) + currentLogSpectum.values[k+intShift+1] * intFactor; | 
| matthiasm@43 | 198             currentTunedSpec.values.push_back(tempValue); | 
| Chris@23 | 199         } | 
| matthiasm@1 | 200 | 
| matthiasm@43 | 201         currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge | 
| matthiasm@43 | 202         vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw); | 
| Chris@23 | 203         vector<float> runningstd; | 
| Chris@23 | 204         for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance) | 
| matthiasm@43 | 205             runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i])); | 
| Chris@23 | 206         } | 
| Chris@23 | 207         runningstd = SpecialConvolution(runningstd,hw); // second step convolve | 
| Chris@23 | 208         for (int i = 0; i < 256; i++) { | 
| Chris@23 | 209             runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std | 
| Chris@23 | 210             if (runningstd[i] > 0) { | 
| matthiasm@43 | 211                 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ? | 
| matthiasm@43 | 212                 // 		                    (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0; | 
| matthiasm@43 | 213                 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ? | 
| matthiasm@43 | 214                     (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0; | 
| Chris@23 | 215             } | 
| matthiasm@43 | 216             if (currentTunedSpec.values[i] < 0) { | 
| Chris@23 | 217                 cerr << "ERROR: negative value in logfreq spectrum" << endl; | 
| Chris@23 | 218             } | 
| Chris@23 | 219         } | 
| matthiasm@43 | 220         tunedSpec.push_back(currentTunedSpec); | 
| Chris@23 | 221         count++; | 
| Chris@23 | 222     } | 
| Chris@23 | 223     cerr << "done." << endl; | 
| matthiasm@1 | 224 | 
| Chris@23 | 225     /** Semitone spectrum and chromagrams | 
| Chris@23 | 226         Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum | 
| Chris@23 | 227         is inferred using a non-negative least squares algorithm. | 
| Chris@23 | 228         Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means | 
| Chris@23 | 229         bass and treble stacked onto each other). | 
| Chris@23 | 230     **/ | 
| matthiasm@42 | 231     if (m_useNNLS == 0) { | 
| Chris@35 | 232         cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... "; | 
| Chris@23 | 233     } else { | 
| Chris@35 | 234         cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... "; | 
| Chris@23 | 235     } | 
| matthiasm@13 | 236 | 
| matthiasm@1 | 237 | 
| matthiasm@43 | 238     vector<vector<double> > chordogram; | 
| Chris@23 | 239     vector<vector<int> > scoreChordogram; | 
| Chris@35 | 240     vector<float> chordchange = vector<float>(tunedSpec.size(),0); | 
| Chris@23 | 241     count = 0; | 
| matthiasm@9 | 242 | 
| Chris@35 | 243     FeatureList chromaList; | 
| matthiasm@43 | 244 | 
| matthiasm@43 | 245 | 
| Chris@35 | 246 | 
| Chris@35 | 247     for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) { | 
| matthiasm@43 | 248         Feature currentTunedSpec = *it; // logfreq spectrum | 
| matthiasm@43 | 249         Feature currentChromas; // treble and bass chromagram | 
| Chris@35 | 250 | 
| matthiasm@43 | 251         currentChromas.hasTimestamp = true; | 
| matthiasm@43 | 252         currentChromas.timestamp = currentTunedSpec.timestamp; | 
| Chris@35 | 253 | 
| Chris@35 | 254         float b[256]; | 
| matthiasm@1 | 255 | 
| Chris@23 | 256         bool some_b_greater_zero = false; | 
| Chris@23 | 257         float sumb = 0; | 
| Chris@23 | 258         for (int i = 0; i < 256; i++) { | 
| Chris@23 | 259             // b[i] = m_dict[(256 * count + i) % (256 * 84)]; | 
| matthiasm@43 | 260             b[i] = currentTunedSpec.values[i]; | 
| Chris@23 | 261             sumb += b[i]; | 
| Chris@23 | 262             if (b[i] > 0) { | 
| Chris@23 | 263                 some_b_greater_zero = true; | 
| Chris@23 | 264             } | 
| Chris@23 | 265         } | 
| matthiasm@1 | 266 | 
| Chris@23 | 267         // here's where the non-negative least squares algorithm calculates the note activation x | 
| matthiasm@1 | 268 | 
| Chris@23 | 269         vector<float> chroma = vector<float>(12, 0); | 
| Chris@23 | 270         vector<float> basschroma = vector<float>(12, 0); | 
| Chris@23 | 271         float currval; | 
| Chris@23 | 272         unsigned iSemitone = 0; | 
| matthiasm@1 | 273 | 
| Chris@23 | 274         if (some_b_greater_zero) { | 
| matthiasm@42 | 275             if (m_useNNLS == 0) { | 
| Chris@23 | 276                 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { | 
| Chris@23 | 277                     currval = 0; | 
| Chris@35 | 278                     currval += b[iNote + 1 + -1] * 0.5; | 
| Chris@35 | 279                     currval += b[iNote + 1 +  0] * 1.0; | 
| Chris@35 | 280                     currval += b[iNote + 1 +  1] * 0.5; | 
| Chris@23 | 281                     chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; | 
| Chris@23 | 282                     basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; | 
| Chris@23 | 283                     iSemitone++; | 
| Chris@23 | 284                 } | 
| matthiasm@1 | 285 | 
| Chris@23 | 286             } else { | 
| Chris@35 | 287                 float x[84+1000]; | 
| Chris@23 | 288                 for (int i = 1; i < 1084; ++i) x[i] = 1.0; | 
| Chris@23 | 289                 vector<int> signifIndex; | 
| Chris@23 | 290                 int index=0; | 
| Chris@23 | 291                 sumb /= 84.0; | 
| Chris@23 | 292                 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { | 
| Chris@23 | 293                     float currval = 0; | 
| Chris@23 | 294                     currval += b[iNote + 1 + -1]; | 
| Chris@23 | 295                     currval += b[iNote + 1 +  0]; | 
| Chris@23 | 296                     currval += b[iNote + 1 +  1]; | 
| Chris@23 | 297                     if (currval > 0) signifIndex.push_back(index); | 
| Chris@23 | 298                     index++; | 
| Chris@23 | 299                 } | 
| Chris@35 | 300                 float rnorm; | 
| Chris@35 | 301                 float w[84+1000]; | 
| Chris@35 | 302                 float zz[84+1000]; | 
| Chris@23 | 303                 int indx[84+1000]; | 
| Chris@23 | 304                 int mode; | 
| Chris@23 | 305                 int dictsize = 256*signifIndex.size(); | 
| Chris@35 | 306                 float *curr_dict = new float[dictsize]; | 
| Chris@23 | 307                 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { | 
| Chris@23 | 308                     for (unsigned iBin = 0; iBin < 256; iBin++) { | 
| Chris@23 | 309                         curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin]; | 
| Chris@23 | 310                     } | 
| Chris@23 | 311                 } | 
| Chris@35 | 312                 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode); | 
| Chris@23 | 313                 delete [] curr_dict; | 
| Chris@23 | 314                 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { | 
| Chris@23 | 315                     // cerr << mode << endl; | 
| Chris@23 | 316                     chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; | 
| Chris@23 | 317                     basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; | 
| Chris@23 | 318                 } | 
| Chris@23 | 319             } | 
| Chris@23 | 320         } | 
| Chris@35 | 321 | 
| Chris@35 | 322         vector<float> origchroma = chroma; | 
| Chris@23 | 323         chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas | 
| matthiasm@43 | 324         currentChromas.values = chroma; | 
| Chris@35 | 325 | 
| Chris@23 | 326         if (m_doNormalizeChroma > 0) { | 
| Chris@23 | 327             vector<float> chromanorm = vector<float>(3,0); | 
| Chris@23 | 328             switch (int(m_doNormalizeChroma)) { | 
| Chris@23 | 329             case 0: // should never end up here | 
| Chris@23 | 330                 break; | 
| Chris@23 | 331             case 1: | 
| Chris@35 | 332                 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end()); | 
| Chris@35 | 333                 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end()); | 
| Chris@23 | 334                 chromanorm[2] = max(chromanorm[0], chromanorm[1]); | 
| Chris@23 | 335                 break; | 
| Chris@23 | 336             case 2: | 
| Chris@35 | 337                 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) { | 
| Chris@23 | 338                     chromanorm[2] += *it; | 
| Chris@23 | 339                 } | 
| Chris@23 | 340                 break; | 
| Chris@23 | 341             case 3: | 
| Chris@35 | 342                 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) { | 
| Chris@23 | 343                     chromanorm[2] += pow(*it,2); | 
| Chris@23 | 344                 } | 
| Chris@23 | 345                 chromanorm[2] = sqrt(chromanorm[2]); | 
| Chris@23 | 346                 break; | 
| Chris@23 | 347             } | 
| Chris@23 | 348             if (chromanorm[2] > 0) { | 
| Chris@35 | 349                 for (int i = 0; i < chroma.size(); i++) { | 
| matthiasm@43 | 350                     currentChromas.values[i] /= chromanorm[2]; | 
| Chris@23 | 351                 } | 
| Chris@23 | 352             } | 
| Chris@23 | 353         } | 
| Chris@35 | 354 | 
| matthiasm@43 | 355         chromaList.push_back(currentChromas); | 
| Chris@35 | 356 | 
| Chris@23 | 357         // local chord estimation | 
| matthiasm@43 | 358         vector<double> currentChordSalience; | 
| matthiasm@43 | 359         double tempchordvalue = 0; | 
| matthiasm@43 | 360         double sumchordvalue = 0; | 
| matthiasm@9 | 361 | 
| Chris@23 | 362         for (int iChord = 0; iChord < nChord; iChord++) { | 
| Chris@23 | 363             tempchordvalue = 0; | 
| Chris@23 | 364             for (int iBin = 0; iBin < 12; iBin++) { | 
| matthiasm@44 | 365                 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; | 
| Chris@23 | 366             } | 
| Chris@23 | 367             for (int iBin = 12; iBin < 24; iBin++) { | 
| Chris@23 | 368                 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; | 
| Chris@23 | 369             } | 
| matthiasm@44 | 370             if (tempchordvalue < 0) tempchordvalue = 0; | 
| Chris@23 | 371             sumchordvalue+=tempchordvalue; | 
| Chris@23 | 372             currentChordSalience.push_back(tempchordvalue); | 
| Chris@23 | 373         } | 
| Chris@23 | 374         if (sumchordvalue > 0) { | 
| Chris@23 | 375             for (int iChord = 0; iChord < nChord; iChord++) { | 
| Chris@23 | 376                 currentChordSalience[iChord] /= sumchordvalue; | 
| Chris@23 | 377             } | 
| Chris@23 | 378         } else { | 
| Chris@23 | 379             currentChordSalience[nChord-1] = 1.0; | 
| Chris@23 | 380         } | 
| Chris@23 | 381         chordogram.push_back(currentChordSalience); | 
| matthiasm@1 | 382 | 
| Chris@23 | 383         count++; | 
| Chris@23 | 384     } | 
| Chris@23 | 385     cerr << "done." << endl; | 
| matthiasm@13 | 386 | 
| matthiasm@10 | 387 | 
| matthiasm@43 | 388     bool m_useHMM = true; // this will go into the chordino header file. | 
| matthiasm@43 | 389 	if (m_useHMM) { | 
| matthiasm@44 | 390         cerr << "[Chordino Plugin] HMM Chord Estimation ... "; | 
| matthiasm@43 | 391         int oldchord = nChord-1; | 
| matthiasm@44 | 392         double selftransprob = 0.9; | 
| matthiasm@43 | 393 | 
| matthiasm@43 | 394         vector<double> init = vector<double>(nChord,1.0/nChord); | 
| matthiasm@43 | 395         vector<vector<double> > trans; | 
| matthiasm@43 | 396         for (int iChord = 0; iChord < nChord; iChord++) { | 
| matthiasm@43 | 397             vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1)); | 
| matthiasm@43 | 398             temp[iChord] = selftransprob; | 
| matthiasm@43 | 399             trans.push_back(temp); | 
| matthiasm@43 | 400         } | 
| matthiasm@43 | 401         vector<int> chordpath = ViterbiPath(init,trans,chordogram); | 
| matthiasm@43 | 402 | 
| matthiasm@43 | 403         for (int iFrame = 0; iFrame < chordpath.size(); ++iFrame) { | 
| matthiasm@43 | 404             // cerr << chordpath[iFrame] << endl; | 
| matthiasm@43 | 405             if (chordpath[iFrame] != oldchord) { | 
| matthiasm@43 | 406                 Feature chord_feature; // chord estimate | 
| matthiasm@43 | 407                 chord_feature.hasTimestamp = true; | 
| matthiasm@43 | 408                 chord_feature.timestamp = timestamps[iFrame]; | 
| matthiasm@43 | 409                 chord_feature.label = m_chordnames[chordpath[iFrame]]; | 
| matthiasm@43 | 410                 fsOut[0].push_back(chord_feature); | 
| matthiasm@43 | 411                 oldchord = chordpath[iFrame]; | 
| Chris@23 | 412             } | 
| Chris@23 | 413         } | 
| matthiasm@43 | 414 | 
| matthiasm@43 | 415         // cerr << chordpath[0] << endl; | 
| matthiasm@43 | 416 	} else { | 
| matthiasm@43 | 417         /* Simple chord estimation | 
| matthiasm@43 | 418            I just take the local chord estimates ("currentChordSalience") and average them over time, then | 
| matthiasm@43 | 419            take the maximum. Very simple, don't do this at home... | 
| matthiasm@43 | 420         */ | 
| matthiasm@44 | 421         cerr << "[Chordino Plugin] Simple Chord Estimation ... "; | 
| matthiasm@43 | 422         count = 0; | 
| matthiasm@43 | 423         int halfwindowlength = m_inputSampleRate / m_stepSize; | 
| matthiasm@43 | 424         vector<int> chordSequence; | 
| matthiasm@43 | 425         for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram | 
| matthiasm@43 | 426             vector<int> temp = vector<int>(nChord,0); | 
| matthiasm@43 | 427             scoreChordogram.push_back(temp); | 
| matthiasm@43 | 428         } | 
| matthiasm@43 | 429         for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) { | 
| matthiasm@43 | 430             int startIndex = count + 1; | 
| matthiasm@43 | 431             int endIndex = count + 2 * halfwindowlength; | 
| matthiasm@43 | 432 | 
| matthiasm@43 | 433             float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1); | 
| matthiasm@43 | 434 | 
| matthiasm@43 | 435             vector<int> chordCandidates; | 
| matthiasm@43 | 436             for (unsigned iChord = 0; iChord < nChord-1; iChord++) { | 
| matthiasm@43 | 437                 // float currsum = 0; | 
| matthiasm@43 | 438                 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { | 
| matthiasm@43 | 439                 //  currsum += chordogram[iFrame][iChord]; | 
| matthiasm@43 | 440                 // } | 
| matthiasm@43 | 441                 //                 if (currsum > chordThreshold) chordCandidates.push_back(iChord); | 
| matthiasm@43 | 442                 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { | 
| matthiasm@43 | 443                     if (chordogram[iFrame][iChord] > chordThreshold) { | 
| matthiasm@43 | 444                         chordCandidates.push_back(iChord); | 
| matthiasm@43 | 445                         break; | 
| matthiasm@43 | 446                     } | 
| Chris@23 | 447                 } | 
| Chris@23 | 448             } | 
| matthiasm@43 | 449             chordCandidates.push_back(nChord-1); | 
| matthiasm@43 | 450             // cerr << chordCandidates.size() << endl; | 
| matthiasm@43 | 451 | 
| matthiasm@43 | 452             float maxval = 0; // will be the value of the most salient *chord change* in this frame | 
| matthiasm@43 | 453             float maxindex = 0; //... and the index thereof | 
| matthiasm@43 | 454             unsigned bestchordL = nChord-1; // index of the best "left" chord | 
| matthiasm@43 | 455             unsigned bestchordR = nChord-1; // index of the best "right" chord | 
| matthiasm@43 | 456 | 
| matthiasm@43 | 457             for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { | 
| matthiasm@43 | 458                 // now find the max values on both sides of iWF | 
| matthiasm@43 | 459                 // left side: | 
| matthiasm@43 | 460                 float maxL = 0; | 
| matthiasm@43 | 461                 unsigned maxindL = nChord-1; | 
| matthiasm@43 | 462                 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { | 
| matthiasm@43 | 463                     unsigned iChord = chordCandidates[kChord]; | 
| matthiasm@43 | 464                     float currsum = 0; | 
| matthiasm@43 | 465                     for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) { | 
| matthiasm@43 | 466                         currsum += chordogram[count+iFrame][iChord]; | 
| matthiasm@43 | 467                     } | 
| matthiasm@43 | 468                     if (iChord == nChord-1) currsum *= 0.8; | 
| matthiasm@43 | 469                     if (currsum > maxL) { | 
| matthiasm@43 | 470                         maxL = currsum; | 
| matthiasm@43 | 471                         maxindL = iChord; | 
| matthiasm@43 | 472                     } | 
| matthiasm@43 | 473                 } | 
| matthiasm@43 | 474                 // right side: | 
| matthiasm@43 | 475                 float maxR = 0; | 
| matthiasm@43 | 476                 unsigned maxindR = nChord-1; | 
| matthiasm@43 | 477                 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { | 
| matthiasm@43 | 478                     unsigned iChord = chordCandidates[kChord]; | 
| matthiasm@43 | 479                     float currsum = 0; | 
| matthiasm@43 | 480                     for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) { | 
| matthiasm@43 | 481                         currsum += chordogram[count+iFrame][iChord]; | 
| matthiasm@43 | 482                     } | 
| matthiasm@43 | 483                     if (iChord == nChord-1) currsum *= 0.8; | 
| matthiasm@43 | 484                     if (currsum > maxR) { | 
| matthiasm@43 | 485                         maxR = currsum; | 
| matthiasm@43 | 486                         maxindR = iChord; | 
| matthiasm@43 | 487                     } | 
| matthiasm@43 | 488                 } | 
| matthiasm@43 | 489                 if (maxL+maxR > maxval) { | 
| matthiasm@43 | 490                     maxval = maxL+maxR; | 
| matthiasm@43 | 491                     maxindex = iWF; | 
| matthiasm@43 | 492                     bestchordL = maxindL; | 
| matthiasm@43 | 493                     bestchordR = maxindR; | 
| matthiasm@43 | 494                 } | 
| matthiasm@43 | 495 | 
| Chris@23 | 496             } | 
| matthiasm@43 | 497             // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl; | 
| matthiasm@43 | 498             // add a score to every chord-frame-point that was part of a maximum | 
| matthiasm@43 | 499             for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) { | 
| matthiasm@43 | 500                 scoreChordogram[iFrame+count][bestchordL]++; | 
| matthiasm@43 | 501             } | 
| matthiasm@43 | 502             for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { | 
| matthiasm@43 | 503                 scoreChordogram[iFrame+count][bestchordR]++; | 
| matthiasm@43 | 504             } | 
| matthiasm@43 | 505             if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; | 
| matthiasm@43 | 506             count++; | 
| Chris@23 | 507         } | 
| matthiasm@43 | 508         // cerr << "*******  agent finished   *******" << endl; | 
| matthiasm@43 | 509         count = 0; | 
| matthiasm@43 | 510         for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { | 
| matthiasm@43 | 511             float maxval = 0; // will be the value of the most salient chord in this frame | 
| matthiasm@43 | 512             float maxindex = 0; //... and the index thereof | 
| matthiasm@43 | 513             for (unsigned iChord = 0; iChord < nChord; iChord++) { | 
| matthiasm@43 | 514                 if (scoreChordogram[count][iChord] > maxval) { | 
| matthiasm@43 | 515                     maxval = scoreChordogram[count][iChord]; | 
| matthiasm@43 | 516                     maxindex = iChord; | 
| matthiasm@43 | 517                     // cerr << iChord << endl; | 
| matthiasm@43 | 518                 } | 
| matthiasm@43 | 519             } | 
| matthiasm@43 | 520             chordSequence.push_back(maxindex); | 
| matthiasm@43 | 521             count++; | 
| Chris@23 | 522         } | 
| matthiasm@43 | 523 | 
| matthiasm@43 | 524 | 
| matthiasm@43 | 525         // mode filter on chordSequence | 
| matthiasm@43 | 526         count = 0; | 
| matthiasm@43 | 527         string oldChord = ""; | 
| matthiasm@43 | 528         for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { | 
| matthiasm@43 | 529             Feature chord_feature; // chord estimate | 
| matthiasm@43 | 530             chord_feature.hasTimestamp = true; | 
| matthiasm@43 | 531             chord_feature.timestamp = *it; | 
| matthiasm@43 | 532             // Feature currentChord; // chord estimate | 
| matthiasm@43 | 533             // currentChord.hasTimestamp = true; | 
| matthiasm@43 | 534             // currentChord.timestamp = currentChromas.timestamp; | 
| matthiasm@43 | 535 | 
| matthiasm@43 | 536             vector<int> chordCount = vector<int>(nChord,0); | 
| matthiasm@43 | 537             int maxChordCount = 0; | 
| matthiasm@43 | 538             int maxChordIndex = nChord-1; | 
| matthiasm@43 | 539             string maxChord; | 
| matthiasm@43 | 540             int startIndex = max(count - halfwindowlength/2,0); | 
| matthiasm@43 | 541             int endIndex = min(int(chordogram.size()), count + halfwindowlength/2); | 
| matthiasm@43 | 542             for (int i = startIndex; i < endIndex; i++) { | 
| matthiasm@43 | 543                 chordCount[chordSequence[i]]++; | 
| matthiasm@43 | 544                 if (chordCount[chordSequence[i]] > maxChordCount) { | 
| matthiasm@43 | 545                     // cerr << "start index " << startIndex << endl; | 
| matthiasm@43 | 546                     maxChordCount++; | 
| matthiasm@43 | 547                     maxChordIndex = chordSequence[i]; | 
| matthiasm@43 | 548                     maxChord = m_chordnames[maxChordIndex]; | 
| matthiasm@43 | 549                 } | 
| matthiasm@43 | 550             } | 
| matthiasm@43 | 551             // chordSequence[count] = maxChordIndex; | 
| matthiasm@43 | 552             // cerr << maxChordIndex << endl; | 
| matthiasm@43 | 553             // cerr << chordchange[count] << endl; | 
| matthiasm@43 | 554             // fsOut[9].push_back(currentChord); | 
| matthiasm@43 | 555             if (oldChord != maxChord) { | 
| matthiasm@43 | 556                 oldChord = maxChord; | 
| matthiasm@43 | 557                 chord_feature.label = m_chordnames[maxChordIndex]; | 
| matthiasm@43 | 558                 fsOut[0].push_back(chord_feature); | 
| matthiasm@43 | 559             } | 
| matthiasm@43 | 560             count++; | 
| Chris@23 | 561         } | 
| Chris@23 | 562     } | 
| matthiasm@43 | 563     Feature chord_feature; // last chord estimate | 
| matthiasm@43 | 564     chord_feature.hasTimestamp = true; | 
| matthiasm@43 | 565     chord_feature.timestamp = timestamps[timestamps.size()-1]; | 
| matthiasm@43 | 566     chord_feature.label = "N"; | 
| matthiasm@43 | 567     fsOut[0].push_back(chord_feature); | 
| Chris@23 | 568     cerr << "done." << endl; | 
| Chris@23 | 569     return fsOut; | 
| matthiasm@0 | 570 } |