annotate Chordino.cpp @ 81:4270f3039ab0 matthiasm-plugin

dont remember, sorry
author Matthias Mauch <mail@matthiasmauch.net>
date Mon, 15 Nov 2010 11:01:36 +0900
parents 026a5c0ee2c2
children e5c16976513d
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@35 19 #include "Chordino.h"
Chris@27 20
Chris@27 21 #include "chromamethods.h"
matthiasm@43 22 #include "viterbi.h"
Chris@27 23
Chris@27 24 #include <cstdlib>
Chris@27 25 #include <fstream>
matthiasm@0 26 #include <cmath>
matthiasm@9 27
Chris@27 28 #include <algorithm>
matthiasm@0 29
matthiasm@0 30 const bool debug_on = false;
matthiasm@0 31
Chris@35 32 Chordino::Chordino(float inputSampleRate) :
Chris@35 33 NNLSBase(inputSampleRate)
matthiasm@0 34 {
Chris@35 35 if (debug_on) cerr << "--> Chordino" << endl;
matthiasm@0 36 }
matthiasm@0 37
Chris@35 38 Chordino::~Chordino()
matthiasm@0 39 {
Chris@35 40 if (debug_on) cerr << "--> ~Chordino" << endl;
matthiasm@0 41 }
matthiasm@0 42
matthiasm@0 43 string
Chris@35 44 Chordino::getIdentifier() const
matthiasm@0 45 {
Chris@23 46 if (debug_on) cerr << "--> getIdentifier" << endl;
Chris@35 47 return "chordino";
matthiasm@0 48 }
matthiasm@0 49
matthiasm@0 50 string
Chris@35 51 Chordino::getName() const
matthiasm@0 52 {
Chris@23 53 if (debug_on) cerr << "--> getName" << endl;
Chris@35 54 return "Chordino";
matthiasm@0 55 }
matthiasm@0 56
matthiasm@0 57 string
Chris@35 58 Chordino::getDescription() const
matthiasm@0 59 {
Chris@23 60 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@58 61 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
matthiasm@0 62 }
matthiasm@0 63
matthiasm@50 64 Chordino::ParameterList
matthiasm@50 65 Chordino::getParameterDescriptors() const
matthiasm@50 66 {
matthiasm@50 67 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@50 68 ParameterList list;
matthiasm@50 69
matthiasm@50 70 ParameterDescriptor d;
matthiasm@50 71 d.identifier = "useNNLS";
matthiasm@50 72 d.name = "use approximate transcription (NNLS)";
matthiasm@50 73 d.description = "Toggles approximate transcription (NNLS).";
matthiasm@50 74 d.unit = "";
matthiasm@50 75 d.minValue = 0.0;
matthiasm@50 76 d.maxValue = 1.0;
matthiasm@50 77 d.defaultValue = 1.0;
matthiasm@50 78 d.isQuantized = true;
matthiasm@50 79 d.quantizeStep = 1.0;
matthiasm@50 80 list.push_back(d);
matthiasm@50 81
matthiasm@50 82 ParameterDescriptor d4;
matthiasm@50 83 d4.identifier = "useHMM";
matthiasm@53 84 d4.name = "HMM (Viterbi decoding)";
matthiasm@50 85 d4.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
matthiasm@50 86 d4.unit = "";
matthiasm@50 87 d4.minValue = 0.0;
matthiasm@50 88 d4.maxValue = 1.0;
matthiasm@50 89 d4.defaultValue = 1.0;
matthiasm@50 90 d4.isQuantized = true;
matthiasm@50 91 d4.quantizeStep = 1.0;
matthiasm@50 92 list.push_back(d4);
matthiasm@50 93
matthiasm@50 94 ParameterDescriptor d0;
matthiasm@50 95 d0.identifier = "rollon";
matthiasm@50 96 d0.name = "spectral roll-on";
matthiasm@58 97 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
matthiasm@59 98 d0.unit = "%";
matthiasm@50 99 d0.minValue = 0;
mail@76 100 d0.maxValue = 5;
matthiasm@50 101 d0.defaultValue = 0;
matthiasm@50 102 d0.isQuantized = true;
mail@76 103 d0.quantizeStep = 0.5;
matthiasm@50 104 list.push_back(d0);
matthiasm@50 105
matthiasm@50 106 ParameterDescriptor d1;
matthiasm@50 107 d1.identifier = "tuningmode";
matthiasm@50 108 d1.name = "tuning mode";
matthiasm@50 109 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@50 110 d1.unit = "";
matthiasm@50 111 d1.minValue = 0;
matthiasm@50 112 d1.maxValue = 1;
matthiasm@50 113 d1.defaultValue = 0;
matthiasm@50 114 d1.isQuantized = true;
matthiasm@50 115 d1.valueNames.push_back("global tuning");
matthiasm@50 116 d1.valueNames.push_back("local tuning");
matthiasm@50 117 d1.quantizeStep = 1.0;
matthiasm@50 118 list.push_back(d1);
matthiasm@50 119
matthiasm@50 120 ParameterDescriptor d2;
matthiasm@50 121 d2.identifier = "whitening";
matthiasm@50 122 d2.name = "spectral whitening";
matthiasm@50 123 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
matthiasm@50 124 d2.unit = "";
matthiasm@50 125 d2.isQuantized = true;
matthiasm@50 126 d2.minValue = 0.0;
matthiasm@50 127 d2.maxValue = 1.0;
matthiasm@50 128 d2.defaultValue = 1.0;
matthiasm@50 129 d2.isQuantized = false;
matthiasm@50 130 list.push_back(d2);
matthiasm@50 131
matthiasm@50 132 ParameterDescriptor d3;
matthiasm@50 133 d3.identifier = "s";
matthiasm@50 134 d3.name = "spectral shape";
matthiasm@50 135 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
matthiasm@50 136 d3.unit = "";
matthiasm@50 137 d3.minValue = 0.5;
matthiasm@50 138 d3.maxValue = 0.9;
matthiasm@50 139 d3.defaultValue = 0.7;
matthiasm@50 140 d3.isQuantized = false;
matthiasm@50 141 list.push_back(d3);
matthiasm@50 142
matthiasm@50 143 // ParameterDescriptor d4;
matthiasm@50 144 // d4.identifier = "chromanormalize";
matthiasm@50 145 // d4.name = "chroma normalization";
matthiasm@50 146 // d4.description = "How shall the chroma vector be normalized?";
matthiasm@50 147 // d4.unit = "";
matthiasm@50 148 // d4.minValue = 0;
matthiasm@50 149 // d4.maxValue = 3;
matthiasm@50 150 // d4.defaultValue = 0;
matthiasm@50 151 // d4.isQuantized = true;
matthiasm@50 152 // d4.valueNames.push_back("none");
matthiasm@50 153 // d4.valueNames.push_back("maximum norm");
matthiasm@50 154 // d4.valueNames.push_back("L1 norm");
matthiasm@50 155 // d4.valueNames.push_back("L2 norm");
matthiasm@50 156 // d4.quantizeStep = 1.0;
matthiasm@50 157 // list.push_back(d4);
matthiasm@50 158
matthiasm@50 159 return list;
matthiasm@50 160 }
matthiasm@50 161
Chris@35 162 Chordino::OutputList
Chris@35 163 Chordino::getOutputDescriptors() const
matthiasm@0 164 {
Chris@23 165 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0 166 OutputList list;
matthiasm@0 167
Chris@35 168 int index = 0;
matthiasm@0 169
matthiasm@0 170 OutputDescriptor d7;
matthiasm@0 171 d7.identifier = "simplechord";
Chris@36 172 d7.name = "Chord Estimate";
matthiasm@58 173 d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
matthiasm@0 174 d7.unit = "";
matthiasm@0 175 d7.hasFixedBinCount = true;
matthiasm@0 176 d7.binCount = 0;
matthiasm@0 177 d7.hasKnownExtents = false;
matthiasm@0 178 d7.isQuantized = false;
matthiasm@0 179 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 180 d7.hasDuration = false;
matthiasm@0 181 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 182 list.push_back(d7);
Chris@35 183 m_outputChords = index++;
matthiasm@0 184
Chris@23 185 OutputDescriptor d8;
mail@60 186 d8.identifier = "harmonicchange";
Chris@36 187 d8.name = "Harmonic Change Value";
matthiasm@58 188 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
matthiasm@17 189 d8.unit = "";
matthiasm@17 190 d8.hasFixedBinCount = true;
matthiasm@17 191 d8.binCount = 1;
mail@60 192 d8.hasKnownExtents = false;
mail@60 193 // d8.minValue = 0.0;
mail@60 194 // d8.maxValue = 0.999;
matthiasm@17 195 d8.isQuantized = false;
matthiasm@17 196 d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@17 197 d8.hasDuration = false;
matthiasm@17 198 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@17 199 list.push_back(d8);
Chris@35 200 m_outputHarmonicChange = index++;
matthiasm@1 201
matthiasm@0 202 return list;
matthiasm@0 203 }
matthiasm@0 204
matthiasm@0 205 bool
Chris@35 206 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 207 {
Chris@23 208 if (debug_on) {
Chris@23 209 cerr << "--> initialise";
Chris@23 210 }
mail@76 211
Chris@35 212 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
Chris@35 213 return false;
Chris@35 214 }
matthiasm@1 215
matthiasm@0 216 return true;
matthiasm@0 217 }
matthiasm@0 218
matthiasm@0 219 void
Chris@35 220 Chordino::reset()
matthiasm@0 221 {
Chris@23 222 if (debug_on) cerr << "--> reset";
Chris@35 223 NNLSBase::reset();
matthiasm@0 224 }
matthiasm@0 225
Chris@35 226 Chordino::FeatureSet
Chris@35 227 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 228 {
Chris@23 229 if (debug_on) cerr << "--> process" << endl;
matthiasm@0 230
Chris@35 231 NNLSBase::baseProcess(inputBuffers, timestamp);
matthiasm@0 232
Chris@35 233 return FeatureSet();
matthiasm@0 234 }
matthiasm@0 235
Chris@35 236 Chordino::FeatureSet
Chris@35 237 Chordino::getRemainingFeatures()
matthiasm@0 238 {
mail@76 239 cerr << hw[0] << hw[1] << endl;
Chris@23 240 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 241 FeatureSet fsOut;
Chris@35 242 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23 243 int nChord = m_chordnames.size();
Chris@23 244 //
Chris@23 245 /** Calculate Tuning
Chris@23 246 calculate tuning from (using the angle of the complex number defined by the
Chris@23 247 cumulative mean real and imag values)
Chris@23 248 **/
mail@80 249 float meanTuningImag = 0;
mail@80 250 float meanTuningReal = 0;
mail@80 251 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
mail@80 252 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
mail@80 253 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
mail@80 254 }
Chris@23 255 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 256 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 257 int intShift = floor(normalisedtuning * 3);
mail@80 258 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
matthiasm@1 259
Chris@23 260 char buffer0 [50];
matthiasm@1 261
Chris@23 262 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 263
matthiasm@1 264
Chris@23 265 /** Tune Log-Frequency Spectrogram
matthiasm@43 266 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
matthiasm@43 267 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectum).
Chris@23 268 **/
Chris@35 269 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 270
Chris@23 271 float tempValue = 0;
Chris@23 272 float dbThreshold = 0; // relative to the background spectrum
Chris@23 273 float thresh = pow(10,dbThreshold/20);
Chris@23 274 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23 275 int count = 0;
matthiasm@1 276
Chris@35 277 FeatureList tunedSpec;
matthiasm@43 278 int nFrame = m_logSpectrum.size();
matthiasm@43 279
matthiasm@43 280 vector<Vamp::RealTime> timestamps;
Chris@35 281
Chris@35 282 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
matthiasm@43 283 Feature currentLogSpectum = *i;
matthiasm@43 284 Feature currentTunedSpec; // tuned log-frequency spectrum
matthiasm@43 285 currentTunedSpec.hasTimestamp = true;
matthiasm@43 286 currentTunedSpec.timestamp = currentLogSpectum.timestamp;
matthiasm@43 287 timestamps.push_back(currentLogSpectum.timestamp);
matthiasm@43 288 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
matthiasm@1 289
Chris@23 290 if (m_tuneLocal) {
Chris@23 291 intShift = floor(m_localTuning[count] * 3);
mail@80 292 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
Chris@23 293 }
matthiasm@1 294
mail@80 295 // cerr << intShift << " " << floatShift << endl;
matthiasm@1 296
matthiasm@43 297 for (unsigned k = 2; k < currentLogSpectum.values.size() - 3; ++k) { // interpolate all inner bins
mail@80 298 tempValue = currentLogSpectum.values[k + intShift] * (1-floatShift) + currentLogSpectum.values[k+intShift+1] * floatShift;
matthiasm@43 299 currentTunedSpec.values.push_back(tempValue);
Chris@23 300 }
matthiasm@1 301
matthiasm@43 302 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
matthiasm@43 303 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
Chris@23 304 vector<float> runningstd;
mail@77 305 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
matthiasm@43 306 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
Chris@23 307 }
Chris@23 308 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
mail@77 309 for (int i = 0; i < nNote; i++) {
Chris@23 310 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 311 if (runningstd[i] > 0) {
matthiasm@43 312 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
matthiasm@43 313 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
matthiasm@43 314 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
matthiasm@43 315 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 316 }
matthiasm@43 317 if (currentTunedSpec.values[i] < 0) {
Chris@23 318 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 319 }
Chris@23 320 }
matthiasm@43 321 tunedSpec.push_back(currentTunedSpec);
Chris@23 322 count++;
Chris@23 323 }
Chris@23 324 cerr << "done." << endl;
matthiasm@1 325
Chris@23 326 /** Semitone spectrum and chromagrams
Chris@23 327 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 328 is inferred using a non-negative least squares algorithm.
Chris@23 329 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 330 bass and treble stacked onto each other).
Chris@23 331 **/
matthiasm@42 332 if (m_useNNLS == 0) {
Chris@35 333 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 334 } else {
Chris@35 335 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 336 }
matthiasm@13 337
matthiasm@1 338
matthiasm@43 339 vector<vector<double> > chordogram;
Chris@23 340 vector<vector<int> > scoreChordogram;
Chris@35 341 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
Chris@23 342 count = 0;
matthiasm@9 343
Chris@35 344 FeatureList chromaList;
matthiasm@43 345
matthiasm@43 346
Chris@35 347
Chris@35 348 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
matthiasm@43 349 Feature currentTunedSpec = *it; // logfreq spectrum
matthiasm@43 350 Feature currentChromas; // treble and bass chromagram
Chris@35 351
matthiasm@43 352 currentChromas.hasTimestamp = true;
matthiasm@43 353 currentChromas.timestamp = currentTunedSpec.timestamp;
Chris@35 354
mail@77 355 float b[nNote];
matthiasm@1 356
Chris@23 357 bool some_b_greater_zero = false;
Chris@23 358 float sumb = 0;
mail@77 359 for (int i = 0; i < nNote; i++) {
mail@77 360 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
matthiasm@43 361 b[i] = currentTunedSpec.values[i];
Chris@23 362 sumb += b[i];
Chris@23 363 if (b[i] > 0) {
Chris@23 364 some_b_greater_zero = true;
Chris@23 365 }
Chris@23 366 }
matthiasm@1 367
Chris@23 368 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 369
Chris@23 370 vector<float> chroma = vector<float>(12, 0);
Chris@23 371 vector<float> basschroma = vector<float>(12, 0);
Chris@23 372 float currval;
Chris@23 373 unsigned iSemitone = 0;
matthiasm@1 374
Chris@23 375 if (some_b_greater_zero) {
matthiasm@42 376 if (m_useNNLS == 0) {
mail@81 377 for (unsigned iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23 378 currval = 0;
mail@81 379 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81 380 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
mail@81 381 }
Chris@23 382 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 383 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 384 iSemitone++;
Chris@23 385 }
matthiasm@1 386
Chris@23 387 } else {
Chris@35 388 float x[84+1000];
Chris@23 389 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 390 vector<int> signifIndex;
Chris@23 391 int index=0;
Chris@23 392 sumb /= 84.0;
mail@81 393 for (unsigned iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23 394 float currval = 0;
mail@81 395 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81 396 currval += b[iNote + iBPS];
mail@81 397 }
Chris@23 398 if (currval > 0) signifIndex.push_back(index);
Chris@23 399 index++;
Chris@23 400 }
Chris@35 401 float rnorm;
Chris@35 402 float w[84+1000];
Chris@35 403 float zz[84+1000];
Chris@23 404 int indx[84+1000];
Chris@23 405 int mode;
mail@77 406 int dictsize = nNote*signifIndex.size();
mail@81 407 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@35 408 float *curr_dict = new float[dictsize];
Chris@23 409 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
mail@77 410 for (unsigned iBin = 0; iBin < nNote; iBin++) {
mail@77 411 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
Chris@23 412 }
Chris@23 413 }
Chris@35 414 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 415 delete [] curr_dict;
Chris@23 416 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 417 // cerr << mode << endl;
Chris@23 418 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 419 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 420 }
Chris@23 421 }
Chris@23 422 }
Chris@35 423
Chris@35 424 vector<float> origchroma = chroma;
Chris@23 425 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@43 426 currentChromas.values = chroma;
Chris@35 427
Chris@23 428 if (m_doNormalizeChroma > 0) {
Chris@23 429 vector<float> chromanorm = vector<float>(3,0);
Chris@23 430 switch (int(m_doNormalizeChroma)) {
Chris@23 431 case 0: // should never end up here
Chris@23 432 break;
Chris@23 433 case 1:
Chris@35 434 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
Chris@35 435 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
Chris@23 436 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 437 break;
Chris@23 438 case 2:
Chris@35 439 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23 440 chromanorm[2] += *it;
Chris@23 441 }
Chris@23 442 break;
Chris@23 443 case 3:
Chris@35 444 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23 445 chromanorm[2] += pow(*it,2);
Chris@23 446 }
Chris@23 447 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 448 break;
Chris@23 449 }
Chris@23 450 if (chromanorm[2] > 0) {
Chris@35 451 for (int i = 0; i < chroma.size(); i++) {
matthiasm@43 452 currentChromas.values[i] /= chromanorm[2];
Chris@23 453 }
Chris@23 454 }
Chris@23 455 }
Chris@35 456
matthiasm@43 457 chromaList.push_back(currentChromas);
Chris@35 458
Chris@23 459 // local chord estimation
matthiasm@43 460 vector<double> currentChordSalience;
matthiasm@43 461 double tempchordvalue = 0;
matthiasm@43 462 double sumchordvalue = 0;
matthiasm@9 463
Chris@23 464 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 465 tempchordvalue = 0;
Chris@23 466 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@44 467 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 468 }
Chris@23 469 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 470 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 471 }
matthiasm@48 472 if (iChord == nChord-1) tempchordvalue *= .7;
matthiasm@48 473 if (tempchordvalue < 0) tempchordvalue = 0.0;
matthiasm@50 474 tempchordvalue = pow(1.3,tempchordvalue);
Chris@23 475 sumchordvalue+=tempchordvalue;
Chris@23 476 currentChordSalience.push_back(tempchordvalue);
Chris@23 477 }
Chris@23 478 if (sumchordvalue > 0) {
Chris@23 479 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 480 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 481 }
Chris@23 482 } else {
Chris@23 483 currentChordSalience[nChord-1] = 1.0;
Chris@23 484 }
Chris@23 485 chordogram.push_back(currentChordSalience);
matthiasm@1 486
Chris@23 487 count++;
Chris@23 488 }
Chris@23 489 cerr << "done." << endl;
matthiasm@13 490
matthiasm@10 491
matthiasm@50 492 // bool m_useHMM = true; // this will go into the chordino header file.
matthiasm@50 493 if (m_useHMM == 1.0) {
matthiasm@44 494 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
matthiasm@43 495 int oldchord = nChord-1;
matthiasm@48 496 double selftransprob = 0.99;
matthiasm@43 497
matthiasm@48 498 // vector<double> init = vector<double>(nChord,1.0/nChord);
matthiasm@48 499 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
matthiasm@48 500
matthiasm@50 501 double *delta;
matthiasm@50 502 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
matthiasm@50 503
matthiasm@43 504 vector<vector<double> > trans;
matthiasm@43 505 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@43 506 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
matthiasm@43 507 temp[iChord] = selftransprob;
matthiasm@43 508 trans.push_back(temp);
matthiasm@43 509 }
matthiasm@50 510 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta);
matthiasm@48 511
matthiasm@48 512
matthiasm@48 513 Feature chord_feature; // chord estimate
matthiasm@48 514 chord_feature.hasTimestamp = true;
matthiasm@48 515 chord_feature.timestamp = timestamps[0];
matthiasm@48 516 chord_feature.label = m_chordnames[chordpath[0]];
mail@60 517 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@43 518
mail@60 519 chordchange[0] = 0;
matthiasm@50 520 for (int iFrame = 1; iFrame < chordpath.size(); ++iFrame) {
matthiasm@43 521 // cerr << chordpath[iFrame] << endl;
matthiasm@48 522 if (chordpath[iFrame] != oldchord ) {
matthiasm@43 523 Feature chord_feature; // chord estimate
matthiasm@43 524 chord_feature.hasTimestamp = true;
matthiasm@43 525 chord_feature.timestamp = timestamps[iFrame];
matthiasm@43 526 chord_feature.label = m_chordnames[chordpath[iFrame]];
mail@60 527 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@43 528 oldchord = chordpath[iFrame];
Chris@23 529 }
matthiasm@50 530 /* calculating simple chord change prob */
matthiasm@50 531 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@50 532 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
matthiasm@50 533 }
Chris@23 534 }
matthiasm@43 535
matthiasm@43 536 // cerr << chordpath[0] << endl;
matthiasm@43 537 } else {
matthiasm@43 538 /* Simple chord estimation
matthiasm@43 539 I just take the local chord estimates ("currentChordSalience") and average them over time, then
matthiasm@43 540 take the maximum. Very simple, don't do this at home...
matthiasm@43 541 */
matthiasm@44 542 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
matthiasm@43 543 count = 0;
matthiasm@43 544 int halfwindowlength = m_inputSampleRate / m_stepSize;
matthiasm@43 545 vector<int> chordSequence;
matthiasm@43 546 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
matthiasm@43 547 vector<int> temp = vector<int>(nChord,0);
matthiasm@43 548 scoreChordogram.push_back(temp);
matthiasm@43 549 }
matthiasm@43 550 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
matthiasm@43 551 int startIndex = count + 1;
matthiasm@43 552 int endIndex = count + 2 * halfwindowlength;
matthiasm@43 553
matthiasm@43 554 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@43 555
matthiasm@43 556 vector<int> chordCandidates;
matthiasm@43 557 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
matthiasm@43 558 // float currsum = 0;
matthiasm@43 559 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43 560 // currsum += chordogram[iFrame][iChord];
matthiasm@43 561 // }
matthiasm@43 562 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
matthiasm@43 563 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43 564 if (chordogram[iFrame][iChord] > chordThreshold) {
matthiasm@43 565 chordCandidates.push_back(iChord);
matthiasm@43 566 break;
matthiasm@43 567 }
Chris@23 568 }
Chris@23 569 }
matthiasm@43 570 chordCandidates.push_back(nChord-1);
matthiasm@43 571 // cerr << chordCandidates.size() << endl;
matthiasm@43 572
matthiasm@43 573 float maxval = 0; // will be the value of the most salient *chord change* in this frame
matthiasm@43 574 float maxindex = 0; //... and the index thereof
matthiasm@43 575 unsigned bestchordL = nChord-1; // index of the best "left" chord
matthiasm@43 576 unsigned bestchordR = nChord-1; // index of the best "right" chord
matthiasm@43 577
matthiasm@43 578 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
matthiasm@43 579 // now find the max values on both sides of iWF
matthiasm@43 580 // left side:
matthiasm@43 581 float maxL = 0;
matthiasm@43 582 unsigned maxindL = nChord-1;
matthiasm@43 583 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
matthiasm@43 584 unsigned iChord = chordCandidates[kChord];
matthiasm@43 585 float currsum = 0;
matthiasm@43 586 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
matthiasm@43 587 currsum += chordogram[count+iFrame][iChord];
matthiasm@43 588 }
matthiasm@43 589 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43 590 if (currsum > maxL) {
matthiasm@43 591 maxL = currsum;
matthiasm@43 592 maxindL = iChord;
matthiasm@43 593 }
matthiasm@43 594 }
matthiasm@43 595 // right side:
matthiasm@43 596 float maxR = 0;
matthiasm@43 597 unsigned maxindR = nChord-1;
matthiasm@43 598 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
matthiasm@43 599 unsigned iChord = chordCandidates[kChord];
matthiasm@43 600 float currsum = 0;
matthiasm@43 601 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43 602 currsum += chordogram[count+iFrame][iChord];
matthiasm@43 603 }
matthiasm@43 604 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43 605 if (currsum > maxR) {
matthiasm@43 606 maxR = currsum;
matthiasm@43 607 maxindR = iChord;
matthiasm@43 608 }
matthiasm@43 609 }
matthiasm@43 610 if (maxL+maxR > maxval) {
matthiasm@43 611 maxval = maxL+maxR;
matthiasm@43 612 maxindex = iWF;
matthiasm@43 613 bestchordL = maxindL;
matthiasm@43 614 bestchordR = maxindR;
matthiasm@43 615 }
matthiasm@43 616
Chris@23 617 }
matthiasm@43 618 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
matthiasm@43 619 // add a score to every chord-frame-point that was part of a maximum
matthiasm@43 620 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
matthiasm@43 621 scoreChordogram[iFrame+count][bestchordL]++;
matthiasm@43 622 }
matthiasm@43 623 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43 624 scoreChordogram[iFrame+count][bestchordR]++;
matthiasm@43 625 }
matthiasm@50 626 if (bestchordL != bestchordR) {
matthiasm@50 627 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
matthiasm@50 628 }
matthiasm@43 629 count++;
Chris@23 630 }
matthiasm@43 631 // cerr << "******* agent finished *******" << endl;
matthiasm@43 632 count = 0;
matthiasm@43 633 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43 634 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@43 635 float maxindex = 0; //... and the index thereof
matthiasm@43 636 for (unsigned iChord = 0; iChord < nChord; iChord++) {
matthiasm@43 637 if (scoreChordogram[count][iChord] > maxval) {
matthiasm@43 638 maxval = scoreChordogram[count][iChord];
matthiasm@43 639 maxindex = iChord;
matthiasm@43 640 // cerr << iChord << endl;
matthiasm@43 641 }
matthiasm@43 642 }
matthiasm@43 643 chordSequence.push_back(maxindex);
matthiasm@43 644 count++;
Chris@23 645 }
matthiasm@43 646
matthiasm@43 647
matthiasm@43 648 // mode filter on chordSequence
matthiasm@43 649 count = 0;
matthiasm@43 650 string oldChord = "";
matthiasm@43 651 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43 652 Feature chord_feature; // chord estimate
matthiasm@43 653 chord_feature.hasTimestamp = true;
matthiasm@43 654 chord_feature.timestamp = *it;
matthiasm@43 655 // Feature currentChord; // chord estimate
matthiasm@43 656 // currentChord.hasTimestamp = true;
matthiasm@43 657 // currentChord.timestamp = currentChromas.timestamp;
matthiasm@43 658
matthiasm@43 659 vector<int> chordCount = vector<int>(nChord,0);
matthiasm@43 660 int maxChordCount = 0;
matthiasm@43 661 int maxChordIndex = nChord-1;
matthiasm@43 662 string maxChord;
matthiasm@43 663 int startIndex = max(count - halfwindowlength/2,0);
matthiasm@43 664 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
matthiasm@43 665 for (int i = startIndex; i < endIndex; i++) {
matthiasm@43 666 chordCount[chordSequence[i]]++;
matthiasm@43 667 if (chordCount[chordSequence[i]] > maxChordCount) {
matthiasm@43 668 // cerr << "start index " << startIndex << endl;
matthiasm@43 669 maxChordCount++;
matthiasm@43 670 maxChordIndex = chordSequence[i];
matthiasm@43 671 maxChord = m_chordnames[maxChordIndex];
matthiasm@43 672 }
matthiasm@43 673 }
matthiasm@43 674 // chordSequence[count] = maxChordIndex;
matthiasm@43 675 // cerr << maxChordIndex << endl;
matthiasm@50 676 // cerr << chordchange[count] << endl;
matthiasm@43 677 if (oldChord != maxChord) {
matthiasm@43 678 oldChord = maxChord;
matthiasm@43 679 chord_feature.label = m_chordnames[maxChordIndex];
mail@60 680 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@43 681 }
matthiasm@43 682 count++;
Chris@23 683 }
Chris@23 684 }
matthiasm@43 685 Feature chord_feature; // last chord estimate
matthiasm@43 686 chord_feature.hasTimestamp = true;
matthiasm@43 687 chord_feature.timestamp = timestamps[timestamps.size()-1];
matthiasm@43 688 chord_feature.label = "N";
mail@60 689 fsOut[m_outputChords].push_back(chord_feature);
Chris@23 690 cerr << "done." << endl;
matthiasm@50 691
matthiasm@50 692 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
matthiasm@50 693 Feature chordchange_feature;
matthiasm@50 694 chordchange_feature.hasTimestamp = true;
matthiasm@50 695 chordchange_feature.timestamp = timestamps[iFrame];
matthiasm@50 696 chordchange_feature.values.push_back(chordchange[iFrame]);
mail@60 697 // cerr << chordchange[iFrame] << endl;
mail@60 698 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
matthiasm@50 699 }
matthiasm@50 700
mail@60 701 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
matthiasm@50 702
matthiasm@50 703
Chris@23 704 return fsOut;
matthiasm@0 705 }