annotate Chordino.cpp @ 35:cf8898a0174c matthiasm-plugin

* Split out NNLSChroma plugin into three plugins (chroma, chordino, tuning) with a common base class. There's still quite a lot of duplication between the getRemainingFeatures functions. Also add copyright / copying headers, etc.
author Chris Cannam
date Fri, 22 Oct 2010 11:30:21 +0100
parents NNLSChroma.cpp@da3195577172
children 7409ab74c63b
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@35 19 #include "Chordino.h"
Chris@27 20
Chris@27 21 #include "chromamethods.h"
Chris@27 22
Chris@27 23 #include <cstdlib>
Chris@27 24 #include <fstream>
matthiasm@0 25 #include <cmath>
matthiasm@9 26
Chris@27 27 #include <algorithm>
matthiasm@0 28
matthiasm@0 29 const bool debug_on = false;
matthiasm@0 30
Chris@27 31 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0 32
Chris@35 33 Chordino::Chordino(float inputSampleRate) :
Chris@35 34 NNLSBase(inputSampleRate)
matthiasm@0 35 {
Chris@35 36 if (debug_on) cerr << "--> Chordino" << endl;
matthiasm@0 37 }
matthiasm@0 38
Chris@35 39 Chordino::~Chordino()
matthiasm@0 40 {
Chris@35 41 if (debug_on) cerr << "--> ~Chordino" << endl;
matthiasm@0 42 }
matthiasm@0 43
matthiasm@0 44 string
Chris@35 45 Chordino::getIdentifier() const
matthiasm@0 46 {
Chris@23 47 if (debug_on) cerr << "--> getIdentifier" << endl;
Chris@35 48 return "chordino";
matthiasm@0 49 }
matthiasm@0 50
matthiasm@0 51 string
Chris@35 52 Chordino::getName() const
matthiasm@0 53 {
Chris@23 54 if (debug_on) cerr << "--> getName" << endl;
Chris@35 55 return "Chordino";
matthiasm@0 56 }
matthiasm@0 57
matthiasm@0 58 string
Chris@35 59 Chordino::getDescription() const
matthiasm@0 60 {
Chris@23 61 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@13 62 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate.";
matthiasm@0 63 }
matthiasm@0 64
Chris@35 65 Chordino::OutputList
Chris@35 66 Chordino::getOutputDescriptors() const
matthiasm@0 67 {
Chris@23 68 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0 69 OutputList list;
matthiasm@0 70
Chris@35 71 int index = 0;
matthiasm@0 72
matthiasm@0 73 OutputDescriptor d7;
matthiasm@0 74 d7.identifier = "simplechord";
matthiasm@0 75 d7.name = "Simple Chord Estimate";
matthiasm@0 76 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
matthiasm@0 77 d7.unit = "";
matthiasm@0 78 d7.hasFixedBinCount = true;
matthiasm@0 79 d7.binCount = 0;
matthiasm@0 80 d7.hasKnownExtents = false;
matthiasm@0 81 d7.isQuantized = false;
matthiasm@0 82 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 83 d7.hasDuration = false;
matthiasm@0 84 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 85 list.push_back(d7);
Chris@35 86 m_outputChords = index++;
matthiasm@0 87
Chris@23 88 OutputDescriptor d8;
matthiasm@17 89 d8.identifier = "harmonicchange";
matthiasm@17 90 d8.name = "Harmonic change value";
matthiasm@17 91 d8.description = "Harmonic change.";
matthiasm@17 92 d8.unit = "";
matthiasm@17 93 d8.hasFixedBinCount = true;
matthiasm@17 94 d8.binCount = 1;
matthiasm@17 95 d8.hasKnownExtents = true;
Chris@23 96 d8.minValue = 0.0;
Chris@23 97 d8.maxValue = 0.999;
matthiasm@17 98 d8.isQuantized = false;
matthiasm@17 99 d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@17 100 d8.hasDuration = false;
matthiasm@17 101 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@17 102 list.push_back(d8);
Chris@35 103 m_outputHarmonicChange = index++;
matthiasm@1 104
matthiasm@0 105 return list;
matthiasm@0 106 }
matthiasm@0 107
matthiasm@0 108 bool
Chris@35 109 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 110 {
Chris@23 111 if (debug_on) {
Chris@23 112 cerr << "--> initialise";
Chris@23 113 }
matthiasm@1 114
Chris@35 115 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
Chris@35 116 return false;
Chris@35 117 }
matthiasm@1 118
matthiasm@0 119 return true;
matthiasm@0 120 }
matthiasm@0 121
matthiasm@0 122 void
Chris@35 123 Chordino::reset()
matthiasm@0 124 {
Chris@23 125 if (debug_on) cerr << "--> reset";
Chris@35 126 NNLSBase::reset();
matthiasm@0 127 }
matthiasm@0 128
Chris@35 129 Chordino::FeatureSet
Chris@35 130 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 131 {
Chris@23 132 if (debug_on) cerr << "--> process" << endl;
matthiasm@0 133
Chris@35 134 NNLSBase::baseProcess(inputBuffers, timestamp);
matthiasm@0 135
Chris@35 136 return FeatureSet();
matthiasm@0 137 }
matthiasm@0 138
Chris@35 139 Chordino::FeatureSet
Chris@35 140 Chordino::getRemainingFeatures()
matthiasm@0 141 {
Chris@23 142 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 143 FeatureSet fsOut;
Chris@35 144 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23 145 int nChord = m_chordnames.size();
Chris@23 146 //
Chris@23 147 /** Calculate Tuning
Chris@23 148 calculate tuning from (using the angle of the complex number defined by the
Chris@23 149 cumulative mean real and imag values)
Chris@23 150 **/
Chris@23 151 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
Chris@23 152 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
Chris@23 153 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 154 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 155 int intShift = floor(normalisedtuning * 3);
Chris@23 156 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 157
Chris@23 158 char buffer0 [50];
matthiasm@1 159
Chris@23 160 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 161
matthiasm@1 162
Chris@23 163 /** Tune Log-Frequency Spectrogram
Chris@23 164 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
Chris@23 165 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
Chris@23 166 **/
Chris@35 167 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 168
Chris@23 169 float tempValue = 0;
Chris@23 170 float dbThreshold = 0; // relative to the background spectrum
Chris@23 171 float thresh = pow(10,dbThreshold/20);
Chris@23 172 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23 173 int count = 0;
matthiasm@1 174
Chris@35 175 FeatureList tunedSpec;
Chris@35 176
Chris@35 177 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@23 178 Feature f1 = *i;
Chris@23 179 Feature f2; // tuned log-frequency spectrum
Chris@23 180 f2.hasTimestamp = true;
Chris@23 181 f2.timestamp = f1.timestamp;
Chris@23 182 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1 183
Chris@23 184 if (m_tuneLocal) {
Chris@23 185 intShift = floor(m_localTuning[count] * 3);
Chris@23 186 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
Chris@23 187 }
matthiasm@1 188
Chris@23 189 // cerr << intShift << " " << intFactor << endl;
matthiasm@1 190
Chris@23 191 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
Chris@23 192 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
Chris@23 193 f2.values.push_back(tempValue);
Chris@23 194 }
matthiasm@1 195
Chris@23 196 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
Chris@23 197 vector<float> runningmean = SpecialConvolution(f2.values,hw);
Chris@23 198 vector<float> runningstd;
Chris@23 199 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
Chris@23 200 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
Chris@23 201 }
Chris@23 202 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
Chris@23 203 for (int i = 0; i < 256; i++) {
Chris@23 204 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 205 if (runningstd[i] > 0) {
Chris@23 206 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
Chris@23 207 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
Chris@23 208 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
Chris@23 209 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
Chris@23 210 }
Chris@23 211 if (f2.values[i] < 0) {
Chris@23 212 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 213 }
Chris@23 214 }
Chris@35 215 tunedSpec.push_back(f2);
Chris@23 216 count++;
Chris@23 217 }
Chris@23 218 cerr << "done." << endl;
matthiasm@1 219
Chris@23 220 /** Semitone spectrum and chromagrams
Chris@23 221 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 222 is inferred using a non-negative least squares algorithm.
Chris@23 223 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 224 bass and treble stacked onto each other).
Chris@23 225 **/
Chris@23 226 if (m_dictID == 1) {
Chris@35 227 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 228 } else {
Chris@35 229 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 230 }
matthiasm@13 231
matthiasm@1 232
Chris@23 233 vector<vector<float> > chordogram;
Chris@23 234 vector<vector<int> > scoreChordogram;
Chris@35 235 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
Chris@23 236 count = 0;
matthiasm@9 237
Chris@35 238 FeatureList chromaList;
Chris@35 239
Chris@35 240 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
Chris@23 241 Feature f2 = *it; // logfreq spectrum
Chris@23 242 Feature f6; // treble and bass chromagram
Chris@35 243
Chris@23 244 f6.hasTimestamp = true;
Chris@23 245 f6.timestamp = f2.timestamp;
Chris@35 246
Chris@35 247 float b[256];
matthiasm@1 248
Chris@23 249 bool some_b_greater_zero = false;
Chris@23 250 float sumb = 0;
Chris@23 251 for (int i = 0; i < 256; i++) {
Chris@23 252 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
Chris@23 253 b[i] = f2.values[i];
Chris@23 254 sumb += b[i];
Chris@23 255 if (b[i] > 0) {
Chris@23 256 some_b_greater_zero = true;
Chris@23 257 }
Chris@23 258 }
matthiasm@1 259
Chris@23 260 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 261
Chris@23 262 vector<float> chroma = vector<float>(12, 0);
Chris@23 263 vector<float> basschroma = vector<float>(12, 0);
Chris@23 264 float currval;
Chris@23 265 unsigned iSemitone = 0;
matthiasm@1 266
Chris@23 267 if (some_b_greater_zero) {
Chris@23 268 if (m_dictID == 1) {
Chris@23 269 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 270 currval = 0;
Chris@35 271 currval += b[iNote + 1 + -1] * 0.5;
Chris@35 272 currval += b[iNote + 1 + 0] * 1.0;
Chris@35 273 currval += b[iNote + 1 + 1] * 0.5;
Chris@23 274 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 275 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 276 iSemitone++;
Chris@23 277 }
matthiasm@1 278
Chris@23 279 } else {
Chris@35 280 float x[84+1000];
Chris@23 281 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 282 vector<int> signifIndex;
Chris@23 283 int index=0;
Chris@23 284 sumb /= 84.0;
Chris@23 285 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 286 float currval = 0;
Chris@23 287 currval += b[iNote + 1 + -1];
Chris@23 288 currval += b[iNote + 1 + 0];
Chris@23 289 currval += b[iNote + 1 + 1];
Chris@23 290 if (currval > 0) signifIndex.push_back(index);
Chris@23 291 index++;
Chris@23 292 }
Chris@35 293 float rnorm;
Chris@35 294 float w[84+1000];
Chris@35 295 float zz[84+1000];
Chris@23 296 int indx[84+1000];
Chris@23 297 int mode;
Chris@23 298 int dictsize = 256*signifIndex.size();
Chris@35 299 float *curr_dict = new float[dictsize];
Chris@23 300 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 301 for (unsigned iBin = 0; iBin < 256; iBin++) {
Chris@23 302 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
Chris@23 303 }
Chris@23 304 }
Chris@35 305 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 306 delete [] curr_dict;
Chris@23 307 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 308 // cerr << mode << endl;
Chris@23 309 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 310 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 311 }
Chris@23 312 }
Chris@23 313 }
Chris@35 314
Chris@35 315 vector<float> origchroma = chroma;
Chris@23 316 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
Chris@35 317 f6.values = chroma;
Chris@35 318
Chris@23 319 if (m_doNormalizeChroma > 0) {
Chris@23 320 vector<float> chromanorm = vector<float>(3,0);
Chris@23 321 switch (int(m_doNormalizeChroma)) {
Chris@23 322 case 0: // should never end up here
Chris@23 323 break;
Chris@23 324 case 1:
Chris@35 325 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
Chris@35 326 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
Chris@23 327 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 328 break;
Chris@23 329 case 2:
Chris@35 330 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23 331 chromanorm[2] += *it;
Chris@23 332 }
Chris@23 333 break;
Chris@23 334 case 3:
Chris@35 335 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23 336 chromanorm[2] += pow(*it,2);
Chris@23 337 }
Chris@23 338 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 339 break;
Chris@23 340 }
Chris@23 341 if (chromanorm[2] > 0) {
Chris@35 342 for (int i = 0; i < chroma.size(); i++) {
Chris@23 343 f6.values[i] /= chromanorm[2];
Chris@23 344 }
Chris@23 345 }
Chris@23 346 }
Chris@35 347
Chris@35 348 chromaList.push_back(f6);
Chris@35 349
Chris@23 350 // local chord estimation
Chris@23 351 vector<float> currentChordSalience;
Chris@23 352 float tempchordvalue = 0;
Chris@23 353 float sumchordvalue = 0;
matthiasm@9 354
Chris@23 355 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 356 tempchordvalue = 0;
Chris@23 357 for (int iBin = 0; iBin < 12; iBin++) {
Chris@23 358 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 359 }
Chris@23 360 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 361 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 362 }
Chris@23 363 sumchordvalue+=tempchordvalue;
Chris@23 364 currentChordSalience.push_back(tempchordvalue);
Chris@23 365 }
Chris@23 366 if (sumchordvalue > 0) {
Chris@23 367 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 368 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 369 }
Chris@23 370 } else {
Chris@23 371 currentChordSalience[nChord-1] = 1.0;
Chris@23 372 }
Chris@23 373 chordogram.push_back(currentChordSalience);
matthiasm@1 374
Chris@23 375 count++;
Chris@23 376 }
Chris@23 377 cerr << "done." << endl;
matthiasm@13 378
matthiasm@10 379
Chris@23 380 /* Simple chord estimation
Chris@23 381 I just take the local chord estimates ("currentChordSalience") and average them over time, then
Chris@23 382 take the maximum. Very simple, don't do this at home...
Chris@23 383 */
Chris@35 384 cerr << "[Chordino Plugin] Chord Estimation ... ";
Chris@23 385 count = 0;
Chris@23 386 int halfwindowlength = m_inputSampleRate / m_stepSize;
Chris@23 387 vector<int> chordSequence;
Chris@35 388
Chris@35 389 for (FeatureList::iterator it = chromaList.begin(); it != chromaList.end(); ++it) { // initialise the score chordogram
Chris@23 390 vector<int> temp = vector<int>(nChord,0);
Chris@23 391 scoreChordogram.push_back(temp);
Chris@23 392 }
Chris@35 393
Chris@35 394 for (FeatureList::iterator it = chromaList.begin(); it < chromaList.end()-2*halfwindowlength-1; ++it) {
Chris@23 395 int startIndex = count + 1;
Chris@23 396 int endIndex = count + 2 * halfwindowlength;
matthiasm@10 397
Chris@23 398 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@10 399
Chris@23 400 vector<int> chordCandidates;
Chris@23 401 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
Chris@23 402 // float currsum = 0;
Chris@23 403 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 404 // currsum += chordogram[iFrame][iChord];
Chris@23 405 // }
Chris@23 406 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
Chris@23 407 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 408 if (chordogram[iFrame][iChord] > chordThreshold) {
Chris@23 409 chordCandidates.push_back(iChord);
Chris@23 410 break;
Chris@23 411 }
Chris@23 412 }
Chris@23 413 }
Chris@23 414 chordCandidates.push_back(nChord-1);
Chris@35 415 // cerr << chordCandidates.size() << endl;
Chris@23 416
Chris@23 417 float maxval = 0; // will be the value of the most salient *chord change* in this frame
Chris@23 418 float maxindex = 0; //... and the index thereof
Chris@23 419 unsigned bestchordL = nChord-1; // index of the best "left" chord
Chris@23 420 unsigned bestchordR = nChord-1; // index of the best "right" chord
Chris@23 421
Chris@23 422 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
Chris@23 423 // now find the max values on both sides of iWF
Chris@23 424 // left side:
Chris@23 425 float maxL = 0;
Chris@23 426 unsigned maxindL = nChord-1;
Chris@23 427 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 428 unsigned iChord = chordCandidates[kChord];
Chris@23 429 float currsum = 0;
Chris@23 430 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
Chris@23 431 currsum += chordogram[count+iFrame][iChord];
matthiasm@10 432 }
Chris@23 433 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 434 if (currsum > maxL) {
Chris@23 435 maxL = currsum;
Chris@23 436 maxindL = iChord;
Chris@23 437 }
Chris@23 438 }
Chris@23 439 // right side:
Chris@23 440 float maxR = 0;
Chris@23 441 unsigned maxindR = nChord-1;
Chris@23 442 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 443 unsigned iChord = chordCandidates[kChord];
Chris@23 444 float currsum = 0;
Chris@23 445 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 446 currsum += chordogram[count+iFrame][iChord];
Chris@23 447 }
Chris@23 448 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 449 if (currsum > maxR) {
Chris@23 450 maxR = currsum;
Chris@23 451 maxindR = iChord;
Chris@23 452 }
Chris@23 453 }
Chris@23 454 if (maxL+maxR > maxval) {
Chris@23 455 maxval = maxL+maxR;
Chris@23 456 maxindex = iWF;
Chris@23 457 bestchordL = maxindL;
Chris@23 458 bestchordR = maxindR;
Chris@23 459 }
matthiasm@3 460
Chris@23 461 }
Chris@35 462 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
Chris@23 463 // add a score to every chord-frame-point that was part of a maximum
Chris@23 464 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
Chris@23 465 scoreChordogram[iFrame+count][bestchordL]++;
Chris@23 466 }
Chris@23 467 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 468 scoreChordogram[iFrame+count][bestchordR]++;
Chris@23 469 }
Chris@23 470 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
Chris@23 471 count++;
Chris@23 472 }
Chris@35 473 // cerr << "******* agent finished *******" << endl;
Chris@23 474 count = 0;
Chris@35 475 for (FeatureList::iterator it = chromaList.begin(); it != chromaList.end(); ++it) {
Chris@23 476 float maxval = 0; // will be the value of the most salient chord in this frame
Chris@23 477 float maxindex = 0; //... and the index thereof
Chris@23 478 for (unsigned iChord = 0; iChord < nChord; iChord++) {
Chris@23 479 if (scoreChordogram[count][iChord] > maxval) {
Chris@23 480 maxval = scoreChordogram[count][iChord];
Chris@23 481 maxindex = iChord;
Chris@23 482 // cerr << iChord << endl;
Chris@23 483 }
Chris@23 484 }
Chris@23 485 chordSequence.push_back(maxindex);
Chris@23 486 // cerr << "before modefilter, maxindex: " << maxindex << endl;
Chris@23 487 count++;
Chris@23 488 }
Chris@35 489 // cerr << "******* mode filter done *******" << endl;
matthiasm@10 490
matthiasm@3 491
Chris@23 492 // mode filter on chordSequence
Chris@23 493 count = 0;
Chris@23 494 string oldChord = "";
Chris@35 495 for (FeatureList::iterator it = chromaList.begin(); it != chromaList.end(); ++it) {
Chris@23 496 Feature f6 = *it;
Chris@23 497 Feature f7; // chord estimate
Chris@23 498 f7.hasTimestamp = true;
Chris@23 499 f7.timestamp = f6.timestamp;
Chris@23 500 Feature f8; // chord estimate
Chris@23 501 f8.hasTimestamp = true;
Chris@23 502 f8.timestamp = f6.timestamp;
matthiasm@17 503
Chris@23 504 vector<int> chordCount = vector<int>(nChord,0);
Chris@23 505 int maxChordCount = 0;
Chris@23 506 int maxChordIndex = nChord-1;
Chris@23 507 string maxChord;
Chris@23 508 int startIndex = max(count - halfwindowlength/2,0);
Chris@23 509 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
Chris@23 510 for (int i = startIndex; i < endIndex; i++) {
Chris@23 511 chordCount[chordSequence[i]]++;
Chris@23 512 if (chordCount[chordSequence[i]] > maxChordCount) {
Chris@23 513 // cerr << "start index " << startIndex << endl;
Chris@23 514 maxChordCount++;
Chris@23 515 maxChordIndex = chordSequence[i];
Chris@23 516 maxChord = m_chordnames[maxChordIndex];
Chris@23 517 }
Chris@23 518 }
Chris@23 519 // chordSequence[count] = maxChordIndex;
Chris@23 520 // cerr << maxChordIndex << endl;
Chris@23 521 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
Chris@23 522 // cerr << chordchange[count] << endl;
Chris@35 523 fsOut[m_outputHarmonicChange].push_back(f8);
Chris@23 524 if (oldChord != maxChord) {
Chris@23 525 oldChord = maxChord;
Chris@23 526 f7.label = m_chordnames[maxChordIndex];
Chris@35 527 fsOut[m_outputChords].push_back(f7);
Chris@23 528 }
Chris@23 529 count++;
Chris@23 530 }
Chris@23 531 Feature f7; // last chord estimate
Chris@23 532 f7.hasTimestamp = true;
Chris@35 533 f7.timestamp = chromaList[chromaList.size()-1].timestamp;
Chris@23 534 f7.label = "N";
Chris@35 535 fsOut[m_outputChords].push_back(f7);
Chris@23 536 cerr << "done." << endl;
Chris@35 537
Chris@23 538 return fsOut;
matthiasm@0 539
matthiasm@0 540 }
matthiasm@0 541