annotate Chordino.cpp @ 44:109d3b2c7105 matthiasm-plugin

regarding the chord estimation:\n * tweaked chord templates\n * that means that the original method also changed
author matthiasm
date Mon, 25 Oct 2010 01:58:37 +0900
parents 131801714118
children 6e76c7710fa1
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@35 19 #include "Chordino.h"
Chris@27 20
Chris@27 21 #include "chromamethods.h"
matthiasm@43 22 #include "viterbi.h"
Chris@27 23
Chris@27 24 #include <cstdlib>
Chris@27 25 #include <fstream>
matthiasm@0 26 #include <cmath>
matthiasm@9 27
Chris@27 28 #include <algorithm>
matthiasm@0 29
matthiasm@0 30 const bool debug_on = false;
matthiasm@0 31
Chris@27 32 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0 33
Chris@35 34 Chordino::Chordino(float inputSampleRate) :
Chris@35 35 NNLSBase(inputSampleRate)
matthiasm@0 36 {
Chris@35 37 if (debug_on) cerr << "--> Chordino" << endl;
matthiasm@0 38 }
matthiasm@0 39
Chris@35 40 Chordino::~Chordino()
matthiasm@0 41 {
Chris@35 42 if (debug_on) cerr << "--> ~Chordino" << endl;
matthiasm@0 43 }
matthiasm@0 44
matthiasm@0 45 string
Chris@35 46 Chordino::getIdentifier() const
matthiasm@0 47 {
Chris@23 48 if (debug_on) cerr << "--> getIdentifier" << endl;
Chris@35 49 return "chordino";
matthiasm@0 50 }
matthiasm@0 51
matthiasm@0 52 string
Chris@35 53 Chordino::getName() const
matthiasm@0 54 {
Chris@23 55 if (debug_on) cerr << "--> getName" << endl;
Chris@35 56 return "Chordino";
matthiasm@0 57 }
matthiasm@0 58
matthiasm@0 59 string
Chris@35 60 Chordino::getDescription() const
matthiasm@0 61 {
Chris@23 62 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@13 63 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate.";
matthiasm@0 64 }
matthiasm@0 65
Chris@35 66 Chordino::OutputList
Chris@35 67 Chordino::getOutputDescriptors() const
matthiasm@0 68 {
Chris@23 69 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0 70 OutputList list;
matthiasm@0 71
Chris@35 72 int index = 0;
matthiasm@0 73
matthiasm@0 74 OutputDescriptor d7;
matthiasm@0 75 d7.identifier = "simplechord";
Chris@36 76 d7.name = "Chord Estimate";
matthiasm@0 77 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
matthiasm@0 78 d7.unit = "";
matthiasm@0 79 d7.hasFixedBinCount = true;
matthiasm@0 80 d7.binCount = 0;
matthiasm@0 81 d7.hasKnownExtents = false;
matthiasm@0 82 d7.isQuantized = false;
matthiasm@0 83 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 84 d7.hasDuration = false;
matthiasm@0 85 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 86 list.push_back(d7);
Chris@35 87 m_outputChords = index++;
matthiasm@0 88
Chris@23 89 OutputDescriptor d8;
matthiasm@17 90 d8.identifier = "harmonicchange";
Chris@36 91 d8.name = "Harmonic Change Value";
matthiasm@17 92 d8.description = "Harmonic change.";
matthiasm@17 93 d8.unit = "";
matthiasm@17 94 d8.hasFixedBinCount = true;
matthiasm@17 95 d8.binCount = 1;
matthiasm@17 96 d8.hasKnownExtents = true;
Chris@23 97 d8.minValue = 0.0;
Chris@23 98 d8.maxValue = 0.999;
matthiasm@17 99 d8.isQuantized = false;
matthiasm@17 100 d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@17 101 d8.hasDuration = false;
matthiasm@17 102 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@17 103 list.push_back(d8);
Chris@35 104 m_outputHarmonicChange = index++;
matthiasm@1 105
matthiasm@0 106 return list;
matthiasm@0 107 }
matthiasm@0 108
matthiasm@0 109 bool
Chris@35 110 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 111 {
Chris@23 112 if (debug_on) {
Chris@23 113 cerr << "--> initialise";
Chris@23 114 }
matthiasm@1 115
Chris@35 116 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
Chris@35 117 return false;
Chris@35 118 }
matthiasm@1 119
matthiasm@0 120 return true;
matthiasm@0 121 }
matthiasm@0 122
matthiasm@0 123 void
Chris@35 124 Chordino::reset()
matthiasm@0 125 {
Chris@23 126 if (debug_on) cerr << "--> reset";
Chris@35 127 NNLSBase::reset();
matthiasm@0 128 }
matthiasm@0 129
Chris@35 130 Chordino::FeatureSet
Chris@35 131 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 132 {
Chris@23 133 if (debug_on) cerr << "--> process" << endl;
matthiasm@0 134
Chris@35 135 NNLSBase::baseProcess(inputBuffers, timestamp);
matthiasm@0 136
Chris@35 137 return FeatureSet();
matthiasm@0 138 }
matthiasm@0 139
Chris@35 140 Chordino::FeatureSet
Chris@35 141 Chordino::getRemainingFeatures()
matthiasm@0 142 {
Chris@23 143 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 144 FeatureSet fsOut;
Chris@35 145 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23 146 int nChord = m_chordnames.size();
Chris@23 147 //
Chris@23 148 /** Calculate Tuning
Chris@23 149 calculate tuning from (using the angle of the complex number defined by the
Chris@23 150 cumulative mean real and imag values)
Chris@23 151 **/
Chris@23 152 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
Chris@23 153 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
Chris@23 154 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 155 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 156 int intShift = floor(normalisedtuning * 3);
Chris@23 157 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 158
Chris@23 159 char buffer0 [50];
matthiasm@1 160
Chris@23 161 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 162
matthiasm@1 163
Chris@23 164 /** Tune Log-Frequency Spectrogram
matthiasm@43 165 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
matthiasm@43 166 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectum).
Chris@23 167 **/
Chris@35 168 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 169
Chris@23 170 float tempValue = 0;
Chris@23 171 float dbThreshold = 0; // relative to the background spectrum
Chris@23 172 float thresh = pow(10,dbThreshold/20);
Chris@23 173 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23 174 int count = 0;
matthiasm@1 175
Chris@35 176 FeatureList tunedSpec;
matthiasm@43 177 int nFrame = m_logSpectrum.size();
matthiasm@43 178
matthiasm@43 179 vector<Vamp::RealTime> timestamps;
Chris@35 180
Chris@35 181 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
matthiasm@43 182 Feature currentLogSpectum = *i;
matthiasm@43 183 Feature currentTunedSpec; // tuned log-frequency spectrum
matthiasm@43 184 currentTunedSpec.hasTimestamp = true;
matthiasm@43 185 currentTunedSpec.timestamp = currentLogSpectum.timestamp;
matthiasm@43 186 timestamps.push_back(currentLogSpectum.timestamp);
matthiasm@43 187 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
matthiasm@1 188
Chris@23 189 if (m_tuneLocal) {
Chris@23 190 intShift = floor(m_localTuning[count] * 3);
Chris@23 191 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
Chris@23 192 }
matthiasm@1 193
Chris@23 194 // cerr << intShift << " " << intFactor << endl;
matthiasm@1 195
matthiasm@43 196 for (unsigned k = 2; k < currentLogSpectum.values.size() - 3; ++k) { // interpolate all inner bins
matthiasm@43 197 tempValue = currentLogSpectum.values[k + intShift] * (1-intFactor) + currentLogSpectum.values[k+intShift+1] * intFactor;
matthiasm@43 198 currentTunedSpec.values.push_back(tempValue);
Chris@23 199 }
matthiasm@1 200
matthiasm@43 201 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
matthiasm@43 202 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
Chris@23 203 vector<float> runningstd;
Chris@23 204 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
matthiasm@43 205 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
Chris@23 206 }
Chris@23 207 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
Chris@23 208 for (int i = 0; i < 256; i++) {
Chris@23 209 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 210 if (runningstd[i] > 0) {
matthiasm@43 211 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
matthiasm@43 212 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
matthiasm@43 213 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
matthiasm@43 214 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 215 }
matthiasm@43 216 if (currentTunedSpec.values[i] < 0) {
Chris@23 217 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 218 }
Chris@23 219 }
matthiasm@43 220 tunedSpec.push_back(currentTunedSpec);
Chris@23 221 count++;
Chris@23 222 }
Chris@23 223 cerr << "done." << endl;
matthiasm@1 224
Chris@23 225 /** Semitone spectrum and chromagrams
Chris@23 226 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 227 is inferred using a non-negative least squares algorithm.
Chris@23 228 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 229 bass and treble stacked onto each other).
Chris@23 230 **/
matthiasm@42 231 if (m_useNNLS == 0) {
Chris@35 232 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 233 } else {
Chris@35 234 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 235 }
matthiasm@13 236
matthiasm@1 237
matthiasm@43 238 vector<vector<double> > chordogram;
Chris@23 239 vector<vector<int> > scoreChordogram;
Chris@35 240 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
Chris@23 241 count = 0;
matthiasm@9 242
Chris@35 243 FeatureList chromaList;
matthiasm@43 244
matthiasm@43 245
Chris@35 246
Chris@35 247 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
matthiasm@43 248 Feature currentTunedSpec = *it; // logfreq spectrum
matthiasm@43 249 Feature currentChromas; // treble and bass chromagram
Chris@35 250
matthiasm@43 251 currentChromas.hasTimestamp = true;
matthiasm@43 252 currentChromas.timestamp = currentTunedSpec.timestamp;
Chris@35 253
Chris@35 254 float b[256];
matthiasm@1 255
Chris@23 256 bool some_b_greater_zero = false;
Chris@23 257 float sumb = 0;
Chris@23 258 for (int i = 0; i < 256; i++) {
Chris@23 259 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
matthiasm@43 260 b[i] = currentTunedSpec.values[i];
Chris@23 261 sumb += b[i];
Chris@23 262 if (b[i] > 0) {
Chris@23 263 some_b_greater_zero = true;
Chris@23 264 }
Chris@23 265 }
matthiasm@1 266
Chris@23 267 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 268
Chris@23 269 vector<float> chroma = vector<float>(12, 0);
Chris@23 270 vector<float> basschroma = vector<float>(12, 0);
Chris@23 271 float currval;
Chris@23 272 unsigned iSemitone = 0;
matthiasm@1 273
Chris@23 274 if (some_b_greater_zero) {
matthiasm@42 275 if (m_useNNLS == 0) {
Chris@23 276 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 277 currval = 0;
Chris@35 278 currval += b[iNote + 1 + -1] * 0.5;
Chris@35 279 currval += b[iNote + 1 + 0] * 1.0;
Chris@35 280 currval += b[iNote + 1 + 1] * 0.5;
Chris@23 281 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 282 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 283 iSemitone++;
Chris@23 284 }
matthiasm@1 285
Chris@23 286 } else {
Chris@35 287 float x[84+1000];
Chris@23 288 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 289 vector<int> signifIndex;
Chris@23 290 int index=0;
Chris@23 291 sumb /= 84.0;
Chris@23 292 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 293 float currval = 0;
Chris@23 294 currval += b[iNote + 1 + -1];
Chris@23 295 currval += b[iNote + 1 + 0];
Chris@23 296 currval += b[iNote + 1 + 1];
Chris@23 297 if (currval > 0) signifIndex.push_back(index);
Chris@23 298 index++;
Chris@23 299 }
Chris@35 300 float rnorm;
Chris@35 301 float w[84+1000];
Chris@35 302 float zz[84+1000];
Chris@23 303 int indx[84+1000];
Chris@23 304 int mode;
Chris@23 305 int dictsize = 256*signifIndex.size();
Chris@35 306 float *curr_dict = new float[dictsize];
Chris@23 307 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 308 for (unsigned iBin = 0; iBin < 256; iBin++) {
Chris@23 309 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
Chris@23 310 }
Chris@23 311 }
Chris@35 312 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 313 delete [] curr_dict;
Chris@23 314 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 315 // cerr << mode << endl;
Chris@23 316 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 317 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 318 }
Chris@23 319 }
Chris@23 320 }
Chris@35 321
Chris@35 322 vector<float> origchroma = chroma;
Chris@23 323 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@43 324 currentChromas.values = chroma;
Chris@35 325
Chris@23 326 if (m_doNormalizeChroma > 0) {
Chris@23 327 vector<float> chromanorm = vector<float>(3,0);
Chris@23 328 switch (int(m_doNormalizeChroma)) {
Chris@23 329 case 0: // should never end up here
Chris@23 330 break;
Chris@23 331 case 1:
Chris@35 332 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
Chris@35 333 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
Chris@23 334 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 335 break;
Chris@23 336 case 2:
Chris@35 337 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23 338 chromanorm[2] += *it;
Chris@23 339 }
Chris@23 340 break;
Chris@23 341 case 3:
Chris@35 342 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23 343 chromanorm[2] += pow(*it,2);
Chris@23 344 }
Chris@23 345 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 346 break;
Chris@23 347 }
Chris@23 348 if (chromanorm[2] > 0) {
Chris@35 349 for (int i = 0; i < chroma.size(); i++) {
matthiasm@43 350 currentChromas.values[i] /= chromanorm[2];
Chris@23 351 }
Chris@23 352 }
Chris@23 353 }
Chris@35 354
matthiasm@43 355 chromaList.push_back(currentChromas);
Chris@35 356
Chris@23 357 // local chord estimation
matthiasm@43 358 vector<double> currentChordSalience;
matthiasm@43 359 double tempchordvalue = 0;
matthiasm@43 360 double sumchordvalue = 0;
matthiasm@9 361
Chris@23 362 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 363 tempchordvalue = 0;
Chris@23 364 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@44 365 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 366 }
Chris@23 367 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 368 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 369 }
matthiasm@44 370 if (tempchordvalue < 0) tempchordvalue = 0;
Chris@23 371 sumchordvalue+=tempchordvalue;
Chris@23 372 currentChordSalience.push_back(tempchordvalue);
Chris@23 373 }
Chris@23 374 if (sumchordvalue > 0) {
Chris@23 375 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 376 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 377 }
Chris@23 378 } else {
Chris@23 379 currentChordSalience[nChord-1] = 1.0;
Chris@23 380 }
Chris@23 381 chordogram.push_back(currentChordSalience);
matthiasm@1 382
Chris@23 383 count++;
Chris@23 384 }
Chris@23 385 cerr << "done." << endl;
matthiasm@13 386
matthiasm@10 387
matthiasm@43 388 bool m_useHMM = true; // this will go into the chordino header file.
matthiasm@43 389 if (m_useHMM) {
matthiasm@44 390 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
matthiasm@43 391 int oldchord = nChord-1;
matthiasm@44 392 double selftransprob = 0.9;
matthiasm@43 393
matthiasm@43 394 vector<double> init = vector<double>(nChord,1.0/nChord);
matthiasm@43 395 vector<vector<double> > trans;
matthiasm@43 396 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@43 397 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
matthiasm@43 398 temp[iChord] = selftransprob;
matthiasm@43 399 trans.push_back(temp);
matthiasm@43 400 }
matthiasm@43 401 vector<int> chordpath = ViterbiPath(init,trans,chordogram);
matthiasm@43 402
matthiasm@43 403 for (int iFrame = 0; iFrame < chordpath.size(); ++iFrame) {
matthiasm@43 404 // cerr << chordpath[iFrame] << endl;
matthiasm@43 405 if (chordpath[iFrame] != oldchord) {
matthiasm@43 406 Feature chord_feature; // chord estimate
matthiasm@43 407 chord_feature.hasTimestamp = true;
matthiasm@43 408 chord_feature.timestamp = timestamps[iFrame];
matthiasm@43 409 chord_feature.label = m_chordnames[chordpath[iFrame]];
matthiasm@43 410 fsOut[0].push_back(chord_feature);
matthiasm@43 411 oldchord = chordpath[iFrame];
Chris@23 412 }
Chris@23 413 }
matthiasm@43 414
matthiasm@43 415 // cerr << chordpath[0] << endl;
matthiasm@43 416 } else {
matthiasm@43 417 /* Simple chord estimation
matthiasm@43 418 I just take the local chord estimates ("currentChordSalience") and average them over time, then
matthiasm@43 419 take the maximum. Very simple, don't do this at home...
matthiasm@43 420 */
matthiasm@44 421 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
matthiasm@43 422 count = 0;
matthiasm@43 423 int halfwindowlength = m_inputSampleRate / m_stepSize;
matthiasm@43 424 vector<int> chordSequence;
matthiasm@43 425 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
matthiasm@43 426 vector<int> temp = vector<int>(nChord,0);
matthiasm@43 427 scoreChordogram.push_back(temp);
matthiasm@43 428 }
matthiasm@43 429 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
matthiasm@43 430 int startIndex = count + 1;
matthiasm@43 431 int endIndex = count + 2 * halfwindowlength;
matthiasm@43 432
matthiasm@43 433 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@43 434
matthiasm@43 435 vector<int> chordCandidates;
matthiasm@43 436 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
matthiasm@43 437 // float currsum = 0;
matthiasm@43 438 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43 439 // currsum += chordogram[iFrame][iChord];
matthiasm@43 440 // }
matthiasm@43 441 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
matthiasm@43 442 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43 443 if (chordogram[iFrame][iChord] > chordThreshold) {
matthiasm@43 444 chordCandidates.push_back(iChord);
matthiasm@43 445 break;
matthiasm@43 446 }
Chris@23 447 }
Chris@23 448 }
matthiasm@43 449 chordCandidates.push_back(nChord-1);
matthiasm@43 450 // cerr << chordCandidates.size() << endl;
matthiasm@43 451
matthiasm@43 452 float maxval = 0; // will be the value of the most salient *chord change* in this frame
matthiasm@43 453 float maxindex = 0; //... and the index thereof
matthiasm@43 454 unsigned bestchordL = nChord-1; // index of the best "left" chord
matthiasm@43 455 unsigned bestchordR = nChord-1; // index of the best "right" chord
matthiasm@43 456
matthiasm@43 457 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
matthiasm@43 458 // now find the max values on both sides of iWF
matthiasm@43 459 // left side:
matthiasm@43 460 float maxL = 0;
matthiasm@43 461 unsigned maxindL = nChord-1;
matthiasm@43 462 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
matthiasm@43 463 unsigned iChord = chordCandidates[kChord];
matthiasm@43 464 float currsum = 0;
matthiasm@43 465 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
matthiasm@43 466 currsum += chordogram[count+iFrame][iChord];
matthiasm@43 467 }
matthiasm@43 468 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43 469 if (currsum > maxL) {
matthiasm@43 470 maxL = currsum;
matthiasm@43 471 maxindL = iChord;
matthiasm@43 472 }
matthiasm@43 473 }
matthiasm@43 474 // right side:
matthiasm@43 475 float maxR = 0;
matthiasm@43 476 unsigned maxindR = nChord-1;
matthiasm@43 477 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
matthiasm@43 478 unsigned iChord = chordCandidates[kChord];
matthiasm@43 479 float currsum = 0;
matthiasm@43 480 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43 481 currsum += chordogram[count+iFrame][iChord];
matthiasm@43 482 }
matthiasm@43 483 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43 484 if (currsum > maxR) {
matthiasm@43 485 maxR = currsum;
matthiasm@43 486 maxindR = iChord;
matthiasm@43 487 }
matthiasm@43 488 }
matthiasm@43 489 if (maxL+maxR > maxval) {
matthiasm@43 490 maxval = maxL+maxR;
matthiasm@43 491 maxindex = iWF;
matthiasm@43 492 bestchordL = maxindL;
matthiasm@43 493 bestchordR = maxindR;
matthiasm@43 494 }
matthiasm@43 495
Chris@23 496 }
matthiasm@43 497 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
matthiasm@43 498 // add a score to every chord-frame-point that was part of a maximum
matthiasm@43 499 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
matthiasm@43 500 scoreChordogram[iFrame+count][bestchordL]++;
matthiasm@43 501 }
matthiasm@43 502 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43 503 scoreChordogram[iFrame+count][bestchordR]++;
matthiasm@43 504 }
matthiasm@43 505 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
matthiasm@43 506 count++;
Chris@23 507 }
matthiasm@43 508 // cerr << "******* agent finished *******" << endl;
matthiasm@43 509 count = 0;
matthiasm@43 510 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43 511 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@43 512 float maxindex = 0; //... and the index thereof
matthiasm@43 513 for (unsigned iChord = 0; iChord < nChord; iChord++) {
matthiasm@43 514 if (scoreChordogram[count][iChord] > maxval) {
matthiasm@43 515 maxval = scoreChordogram[count][iChord];
matthiasm@43 516 maxindex = iChord;
matthiasm@43 517 // cerr << iChord << endl;
matthiasm@43 518 }
matthiasm@43 519 }
matthiasm@43 520 chordSequence.push_back(maxindex);
matthiasm@43 521 count++;
Chris@23 522 }
matthiasm@43 523
matthiasm@43 524
matthiasm@43 525 // mode filter on chordSequence
matthiasm@43 526 count = 0;
matthiasm@43 527 string oldChord = "";
matthiasm@43 528 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43 529 Feature chord_feature; // chord estimate
matthiasm@43 530 chord_feature.hasTimestamp = true;
matthiasm@43 531 chord_feature.timestamp = *it;
matthiasm@43 532 // Feature currentChord; // chord estimate
matthiasm@43 533 // currentChord.hasTimestamp = true;
matthiasm@43 534 // currentChord.timestamp = currentChromas.timestamp;
matthiasm@43 535
matthiasm@43 536 vector<int> chordCount = vector<int>(nChord,0);
matthiasm@43 537 int maxChordCount = 0;
matthiasm@43 538 int maxChordIndex = nChord-1;
matthiasm@43 539 string maxChord;
matthiasm@43 540 int startIndex = max(count - halfwindowlength/2,0);
matthiasm@43 541 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
matthiasm@43 542 for (int i = startIndex; i < endIndex; i++) {
matthiasm@43 543 chordCount[chordSequence[i]]++;
matthiasm@43 544 if (chordCount[chordSequence[i]] > maxChordCount) {
matthiasm@43 545 // cerr << "start index " << startIndex << endl;
matthiasm@43 546 maxChordCount++;
matthiasm@43 547 maxChordIndex = chordSequence[i];
matthiasm@43 548 maxChord = m_chordnames[maxChordIndex];
matthiasm@43 549 }
matthiasm@43 550 }
matthiasm@43 551 // chordSequence[count] = maxChordIndex;
matthiasm@43 552 // cerr << maxChordIndex << endl;
matthiasm@43 553 // cerr << chordchange[count] << endl;
matthiasm@43 554 // fsOut[9].push_back(currentChord);
matthiasm@43 555 if (oldChord != maxChord) {
matthiasm@43 556 oldChord = maxChord;
matthiasm@43 557 chord_feature.label = m_chordnames[maxChordIndex];
matthiasm@43 558 fsOut[0].push_back(chord_feature);
matthiasm@43 559 }
matthiasm@43 560 count++;
Chris@23 561 }
Chris@23 562 }
matthiasm@43 563 Feature chord_feature; // last chord estimate
matthiasm@43 564 chord_feature.hasTimestamp = true;
matthiasm@43 565 chord_feature.timestamp = timestamps[timestamps.size()-1];
matthiasm@43 566 chord_feature.label = "N";
matthiasm@43 567 fsOut[0].push_back(chord_feature);
Chris@23 568 cerr << "done." << endl;
Chris@23 569 return fsOut;
matthiasm@0 570 }