annotate Chordino.cpp @ 112:846b552ea3b0 monophonicness

Harte syntax as option in Chordino
author Matthias Mauch <mail@matthiasmauch.net>
date Tue, 29 Mar 2011 15:12:19 +0100
parents 96cea9c05046
children 5bcba43e2317
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@35 19 #include "Chordino.h"
Chris@27 20
Chris@27 21 #include "chromamethods.h"
matthiasm@43 22 #include "viterbi.h"
Chris@27 23
Chris@27 24 #include <cstdlib>
Chris@27 25 #include <fstream>
matthiasm@0 26 #include <cmath>
matthiasm@9 27
Chris@27 28 #include <algorithm>
matthiasm@0 29
matthiasm@0 30 const bool debug_on = false;
matthiasm@0 31
Chris@35 32 Chordino::Chordino(float inputSampleRate) :
matthiasm@86 33 NNLSBase(inputSampleRate),
matthiasm@86 34 m_chorddict(0),
matthiasm@86 35 m_chordnotes(0),
matthiasm@86 36 m_chordnames(0)
matthiasm@0 37 {
Chris@35 38 if (debug_on) cerr << "--> Chordino" << endl;
matthiasm@86 39 // get the *chord* dictionary from file (if the file exists)
matthiasm@86 40
matthiasm@0 41 }
matthiasm@0 42
Chris@35 43 Chordino::~Chordino()
matthiasm@0 44 {
Chris@35 45 if (debug_on) cerr << "--> ~Chordino" << endl;
matthiasm@0 46 }
matthiasm@0 47
matthiasm@0 48 string
Chris@35 49 Chordino::getIdentifier() const
matthiasm@0 50 {
Chris@23 51 if (debug_on) cerr << "--> getIdentifier" << endl;
Chris@35 52 return "chordino";
matthiasm@0 53 }
matthiasm@0 54
matthiasm@0 55 string
Chris@35 56 Chordino::getName() const
matthiasm@0 57 {
Chris@23 58 if (debug_on) cerr << "--> getName" << endl;
Chris@35 59 return "Chordino";
matthiasm@0 60 }
matthiasm@0 61
matthiasm@0 62 string
Chris@35 63 Chordino::getDescription() const
matthiasm@0 64 {
Chris@23 65 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@58 66 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
matthiasm@0 67 }
matthiasm@0 68
matthiasm@50 69 Chordino::ParameterList
matthiasm@50 70 Chordino::getParameterDescriptors() const
matthiasm@50 71 {
matthiasm@50 72 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@50 73 ParameterList list;
matthiasm@50 74
matthiasm@50 75 ParameterDescriptor d;
matthiasm@50 76 d.identifier = "useNNLS";
matthiasm@50 77 d.name = "use approximate transcription (NNLS)";
matthiasm@50 78 d.description = "Toggles approximate transcription (NNLS).";
matthiasm@50 79 d.unit = "";
matthiasm@50 80 d.minValue = 0.0;
matthiasm@50 81 d.maxValue = 1.0;
matthiasm@50 82 d.defaultValue = 1.0;
matthiasm@50 83 d.isQuantized = true;
matthiasm@50 84 d.quantizeStep = 1.0;
matthiasm@50 85 list.push_back(d);
matthiasm@50 86
matthiasm@50 87 ParameterDescriptor d4;
matthiasm@50 88 d4.identifier = "useHMM";
matthiasm@53 89 d4.name = "HMM (Viterbi decoding)";
matthiasm@50 90 d4.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
matthiasm@50 91 d4.unit = "";
matthiasm@50 92 d4.minValue = 0.0;
matthiasm@50 93 d4.maxValue = 1.0;
matthiasm@50 94 d4.defaultValue = 1.0;
matthiasm@50 95 d4.isQuantized = true;
matthiasm@50 96 d4.quantizeStep = 1.0;
matthiasm@50 97 list.push_back(d4);
matthiasm@50 98
matthiasm@50 99 ParameterDescriptor d0;
matthiasm@50 100 d0.identifier = "rollon";
matthiasm@50 101 d0.name = "spectral roll-on";
matthiasm@58 102 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
matthiasm@59 103 d0.unit = "%";
matthiasm@50 104 d0.minValue = 0;
mail@76 105 d0.maxValue = 5;
matthiasm@92 106 d0.defaultValue = 0.0;
matthiasm@50 107 d0.isQuantized = true;
mail@76 108 d0.quantizeStep = 0.5;
matthiasm@50 109 list.push_back(d0);
matthiasm@50 110
matthiasm@50 111 ParameterDescriptor d1;
matthiasm@50 112 d1.identifier = "tuningmode";
matthiasm@50 113 d1.name = "tuning mode";
matthiasm@50 114 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@50 115 d1.unit = "";
matthiasm@50 116 d1.minValue = 0;
matthiasm@50 117 d1.maxValue = 1;
matthiasm@92 118 d1.defaultValue = 0.0;
matthiasm@50 119 d1.isQuantized = true;
matthiasm@50 120 d1.valueNames.push_back("global tuning");
matthiasm@50 121 d1.valueNames.push_back("local tuning");
matthiasm@50 122 d1.quantizeStep = 1.0;
matthiasm@50 123 list.push_back(d1);
matthiasm@50 124
matthiasm@50 125 ParameterDescriptor d2;
matthiasm@50 126 d2.identifier = "whitening";
matthiasm@50 127 d2.name = "spectral whitening";
matthiasm@50 128 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
matthiasm@50 129 d2.unit = "";
matthiasm@50 130 d2.isQuantized = true;
matthiasm@50 131 d2.minValue = 0.0;
matthiasm@50 132 d2.maxValue = 1.0;
matthiasm@50 133 d2.defaultValue = 1.0;
matthiasm@50 134 d2.isQuantized = false;
matthiasm@50 135 list.push_back(d2);
matthiasm@50 136
matthiasm@50 137 ParameterDescriptor d3;
matthiasm@50 138 d3.identifier = "s";
matthiasm@50 139 d3.name = "spectral shape";
matthiasm@50 140 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
matthiasm@50 141 d3.unit = "";
matthiasm@50 142 d3.minValue = 0.5;
matthiasm@50 143 d3.maxValue = 0.9;
matthiasm@50 144 d3.defaultValue = 0.7;
matthiasm@50 145 d3.isQuantized = false;
matthiasm@50 146 list.push_back(d3);
matthiasm@50 147
mail@89 148 ParameterDescriptor boostn;
mail@89 149 boostn.identifier = "boostn";
mail@89 150 boostn.name = "boost N";
matthiasm@95 151 boostn.description = "Boost likelihood of the N (no chord) label.";
mail@89 152 boostn.unit = "";
matthiasm@95 153 boostn.minValue = 0.0;
matthiasm@95 154 boostn.maxValue = 1.0;
matthiasm@95 155 boostn.defaultValue = 0.1;
mail@89 156 boostn.isQuantized = false;
mail@89 157 list.push_back(boostn);
matthiasm@50 158
mail@112 159 ParameterDescriptor usehartesyntax;
mail@112 160 usehartesyntax.identifier = "usehartesyntax";
mail@112 161 usehartesyntax.name = "use Harte syntax";
mail@112 162 usehartesyntax.description = "Use the chord syntax proposed by Harte";
mail@112 163 usehartesyntax.unit = "";
mail@112 164 usehartesyntax.minValue = 0.0;
mail@112 165 usehartesyntax.maxValue = 1.0;
mail@112 166 usehartesyntax.defaultValue = 0.0;
mail@112 167 usehartesyntax.isQuantized = true;
mail@112 168 usehartesyntax.quantizeStep = 1.0;
mail@112 169 usehartesyntax.valueNames.push_back("no");
mail@112 170 usehartesyntax.valueNames.push_back("yes");
mail@112 171 list.push_back(usehartesyntax);
mail@112 172
matthiasm@50 173 return list;
matthiasm@50 174 }
matthiasm@50 175
Chris@35 176 Chordino::OutputList
Chris@35 177 Chordino::getOutputDescriptors() const
matthiasm@0 178 {
Chris@23 179 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0 180 OutputList list;
matthiasm@0 181
Chris@35 182 int index = 0;
matthiasm@0 183
matthiasm@0 184 OutputDescriptor d7;
matthiasm@0 185 d7.identifier = "simplechord";
Chris@36 186 d7.name = "Chord Estimate";
matthiasm@58 187 d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
matthiasm@0 188 d7.unit = "";
matthiasm@0 189 d7.hasFixedBinCount = true;
matthiasm@0 190 d7.binCount = 0;
matthiasm@0 191 d7.hasKnownExtents = false;
matthiasm@0 192 d7.isQuantized = false;
matthiasm@0 193 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 194 d7.hasDuration = false;
matthiasm@0 195 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 196 list.push_back(d7);
Chris@35 197 m_outputChords = index++;
matthiasm@0 198
matthiasm@86 199 OutputDescriptor chordnotes;
matthiasm@86 200 chordnotes.identifier = "chordnotes";
matthiasm@86 201 chordnotes.name = "Note Representation of Chord Estimate";
matthiasm@86 202 chordnotes.description = "A simple represenation of the estimated chord with bass note (if applicable) and chord notes.";
matthiasm@86 203 chordnotes.unit = "MIDI units";
matthiasm@86 204 chordnotes.hasFixedBinCount = true;
matthiasm@86 205 chordnotes.binCount = 1;
matthiasm@86 206 chordnotes.hasKnownExtents = true;
matthiasm@86 207 chordnotes.minValue = 0;
matthiasm@86 208 chordnotes.maxValue = 127;
matthiasm@86 209 chordnotes.isQuantized = true;
matthiasm@86 210 chordnotes.quantizeStep = 1;
matthiasm@86 211 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@86 212 chordnotes.hasDuration = true;
matthiasm@86 213 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@86 214 list.push_back(chordnotes);
matthiasm@86 215 m_outputChordnotes = index++;
matthiasm@86 216
Chris@23 217 OutputDescriptor d8;
mail@60 218 d8.identifier = "harmonicchange";
Chris@36 219 d8.name = "Harmonic Change Value";
matthiasm@58 220 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
matthiasm@17 221 d8.unit = "";
matthiasm@17 222 d8.hasFixedBinCount = true;
matthiasm@17 223 d8.binCount = 1;
mail@60 224 d8.hasKnownExtents = false;
mail@60 225 // d8.minValue = 0.0;
mail@60 226 // d8.maxValue = 0.999;
matthiasm@17 227 d8.isQuantized = false;
matthiasm@17 228 d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@17 229 d8.hasDuration = false;
matthiasm@17 230 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@17 231 list.push_back(d8);
Chris@35 232 m_outputHarmonicChange = index++;
matthiasm@1 233
matthiasm@107 234 OutputDescriptor loglikelihood;
matthiasm@107 235 loglikelihood.identifier = "loglikelihood";
matthiasm@107 236 loglikelihood.name = "chord estimate log-likelihood";
matthiasm@107 237 loglikelihood.description = ".";
matthiasm@107 238 loglikelihood.unit = "";
matthiasm@107 239 loglikelihood.hasFixedBinCount = true;
matthiasm@107 240 loglikelihood.binCount = 1;
matthiasm@107 241 loglikelihood.hasKnownExtents = false;
matthiasm@107 242 loglikelihood.isQuantized = false;
matthiasm@107 243 loglikelihood.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@107 244 loglikelihood.hasDuration = false;
matthiasm@107 245 // loglikelihood.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@107 246 list.push_back(loglikelihood);
matthiasm@107 247 m_outputLoglikelihood = index++;
matthiasm@106 248
matthiasm@0 249 return list;
matthiasm@0 250 }
matthiasm@0 251
matthiasm@0 252 bool
Chris@35 253 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 254 {
Chris@23 255 if (debug_on) {
Chris@23 256 cerr << "--> initialise";
Chris@23 257 }
mail@76 258
Chris@35 259 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
Chris@35 260 return false;
Chris@35 261 }
mail@112 262 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN, m_useHarte);
matthiasm@0 263 return true;
matthiasm@0 264 }
matthiasm@0 265
matthiasm@0 266 void
Chris@35 267 Chordino::reset()
matthiasm@0 268 {
Chris@23 269 if (debug_on) cerr << "--> reset";
Chris@35 270 NNLSBase::reset();
matthiasm@0 271 }
matthiasm@0 272
Chris@35 273 Chordino::FeatureSet
Chris@35 274 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 275 {
Chris@23 276 if (debug_on) cerr << "--> process" << endl;
matthiasm@0 277
Chris@35 278 NNLSBase::baseProcess(inputBuffers, timestamp);
matthiasm@0 279
Chris@35 280 return FeatureSet();
matthiasm@0 281 }
matthiasm@0 282
Chris@35 283 Chordino::FeatureSet
Chris@35 284 Chordino::getRemainingFeatures()
matthiasm@0 285 {
mail@89 286 // cerr << hw[0] << hw[1] << endl;
mail@89 287 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 288 FeatureSet fsOut;
Chris@35 289 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23 290 int nChord = m_chordnames.size();
Chris@23 291 //
Chris@23 292 /** Calculate Tuning
Chris@23 293 calculate tuning from (using the angle of the complex number defined by the
Chris@23 294 cumulative mean real and imag values)
Chris@23 295 **/
mail@80 296 float meanTuningImag = 0;
mail@80 297 float meanTuningReal = 0;
mail@80 298 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
mail@80 299 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
mail@80 300 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
mail@80 301 }
Chris@23 302 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 303 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 304 int intShift = floor(normalisedtuning * 3);
mail@80 305 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
matthiasm@1 306
Chris@23 307 char buffer0 [50];
matthiasm@1 308
Chris@23 309 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 310
matthiasm@1 311
Chris@23 312 /** Tune Log-Frequency Spectrogram
matthiasm@43 313 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
Chris@91 314 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
Chris@23 315 **/
Chris@35 316 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 317
Chris@23 318 float tempValue = 0;
Chris@23 319 float dbThreshold = 0; // relative to the background spectrum
Chris@23 320 float thresh = pow(10,dbThreshold/20);
Chris@23 321 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23 322 int count = 0;
matthiasm@1 323
Chris@35 324 FeatureList tunedSpec;
matthiasm@43 325 int nFrame = m_logSpectrum.size();
matthiasm@43 326
matthiasm@43 327 vector<Vamp::RealTime> timestamps;
Chris@35 328
Chris@35 329 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@91 330 Feature currentLogSpectrum = *i;
matthiasm@43 331 Feature currentTunedSpec; // tuned log-frequency spectrum
matthiasm@43 332 currentTunedSpec.hasTimestamp = true;
Chris@91 333 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
Chris@91 334 timestamps.push_back(currentLogSpectrum.timestamp);
matthiasm@43 335 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
matthiasm@1 336
Chris@23 337 if (m_tuneLocal) {
Chris@23 338 intShift = floor(m_localTuning[count] * 3);
mail@80 339 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
Chris@23 340 }
matthiasm@1 341
mail@80 342 // cerr << intShift << " " << floatShift << endl;
matthiasm@1 343
Chris@91 344 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
Chris@91 345 tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
matthiasm@43 346 currentTunedSpec.values.push_back(tempValue);
Chris@23 347 }
matthiasm@1 348
matthiasm@43 349 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
matthiasm@43 350 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
Chris@23 351 vector<float> runningstd;
mail@77 352 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
matthiasm@43 353 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
Chris@23 354 }
Chris@23 355 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
mail@77 356 for (int i = 0; i < nNote; i++) {
Chris@23 357 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 358 if (runningstd[i] > 0) {
matthiasm@43 359 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
matthiasm@43 360 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
matthiasm@43 361 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
matthiasm@43 362 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 363 }
matthiasm@43 364 if (currentTunedSpec.values[i] < 0) {
Chris@23 365 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 366 }
Chris@23 367 }
matthiasm@43 368 tunedSpec.push_back(currentTunedSpec);
Chris@23 369 count++;
Chris@23 370 }
Chris@23 371 cerr << "done." << endl;
matthiasm@1 372
Chris@23 373 /** Semitone spectrum and chromagrams
Chris@23 374 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 375 is inferred using a non-negative least squares algorithm.
Chris@23 376 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 377 bass and treble stacked onto each other).
Chris@23 378 **/
matthiasm@42 379 if (m_useNNLS == 0) {
Chris@35 380 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 381 } else {
Chris@35 382 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 383 }
matthiasm@13 384
matthiasm@1 385
matthiasm@43 386 vector<vector<double> > chordogram;
Chris@23 387 vector<vector<int> > scoreChordogram;
Chris@35 388 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
Chris@23 389 count = 0;
matthiasm@9 390
Chris@35 391 FeatureList chromaList;
matthiasm@43 392
matthiasm@43 393
Chris@35 394
Chris@35 395 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
matthiasm@43 396 Feature currentTunedSpec = *it; // logfreq spectrum
matthiasm@43 397 Feature currentChromas; // treble and bass chromagram
Chris@35 398
matthiasm@43 399 currentChromas.hasTimestamp = true;
matthiasm@43 400 currentChromas.timestamp = currentTunedSpec.timestamp;
Chris@35 401
mail@77 402 float b[nNote];
matthiasm@1 403
Chris@23 404 bool some_b_greater_zero = false;
Chris@23 405 float sumb = 0;
mail@77 406 for (int i = 0; i < nNote; i++) {
mail@77 407 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
matthiasm@43 408 b[i] = currentTunedSpec.values[i];
Chris@23 409 sumb += b[i];
Chris@23 410 if (b[i] > 0) {
Chris@23 411 some_b_greater_zero = true;
Chris@23 412 }
Chris@23 413 }
matthiasm@1 414
Chris@23 415 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 416
Chris@23 417 vector<float> chroma = vector<float>(12, 0);
Chris@23 418 vector<float> basschroma = vector<float>(12, 0);
Chris@23 419 float currval;
Chris@91 420 int iSemitone = 0;
matthiasm@1 421
Chris@23 422 if (some_b_greater_zero) {
matthiasm@42 423 if (m_useNNLS == 0) {
Chris@91 424 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23 425 currval = 0;
mail@81 426 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81 427 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
mail@81 428 }
Chris@23 429 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 430 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 431 iSemitone++;
Chris@23 432 }
matthiasm@1 433
Chris@23 434 } else {
Chris@35 435 float x[84+1000];
Chris@23 436 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 437 vector<int> signifIndex;
Chris@23 438 int index=0;
Chris@23 439 sumb /= 84.0;
Chris@91 440 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23 441 float currval = 0;
mail@81 442 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81 443 currval += b[iNote + iBPS];
mail@81 444 }
Chris@23 445 if (currval > 0) signifIndex.push_back(index);
Chris@23 446 index++;
Chris@23 447 }
Chris@35 448 float rnorm;
Chris@35 449 float w[84+1000];
Chris@35 450 float zz[84+1000];
Chris@23 451 int indx[84+1000];
Chris@23 452 int mode;
mail@77 453 int dictsize = nNote*signifIndex.size();
mail@81 454 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@35 455 float *curr_dict = new float[dictsize];
Chris@91 456 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@91 457 for (int iBin = 0; iBin < nNote; iBin++) {
mail@77 458 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
Chris@23 459 }
Chris@23 460 }
Chris@35 461 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 462 delete [] curr_dict;
Chris@91 463 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@23 464 // cerr << mode << endl;
Chris@23 465 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 466 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 467 }
Chris@23 468 }
Chris@23 469 }
Chris@35 470
Chris@35 471 vector<float> origchroma = chroma;
Chris@23 472 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@43 473 currentChromas.values = chroma;
Chris@35 474
Chris@23 475 if (m_doNormalizeChroma > 0) {
Chris@23 476 vector<float> chromanorm = vector<float>(3,0);
Chris@23 477 switch (int(m_doNormalizeChroma)) {
Chris@23 478 case 0: // should never end up here
Chris@23 479 break;
Chris@23 480 case 1:
Chris@35 481 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
Chris@35 482 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
Chris@23 483 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 484 break;
Chris@23 485 case 2:
Chris@35 486 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23 487 chromanorm[2] += *it;
Chris@23 488 }
Chris@23 489 break;
Chris@23 490 case 3:
Chris@35 491 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23 492 chromanorm[2] += pow(*it,2);
Chris@23 493 }
Chris@23 494 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 495 break;
Chris@23 496 }
Chris@23 497 if (chromanorm[2] > 0) {
Chris@91 498 for (int i = 0; i < (int)chroma.size(); i++) {
matthiasm@43 499 currentChromas.values[i] /= chromanorm[2];
Chris@23 500 }
Chris@23 501 }
Chris@23 502 }
Chris@35 503
matthiasm@43 504 chromaList.push_back(currentChromas);
Chris@35 505
Chris@23 506 // local chord estimation
matthiasm@43 507 vector<double> currentChordSalience;
matthiasm@43 508 double tempchordvalue = 0;
matthiasm@43 509 double sumchordvalue = 0;
matthiasm@9 510
Chris@23 511 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 512 tempchordvalue = 0;
Chris@23 513 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@44 514 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 515 }
Chris@23 516 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 517 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 518 }
matthiasm@48 519 if (iChord == nChord-1) tempchordvalue *= .7;
matthiasm@48 520 if (tempchordvalue < 0) tempchordvalue = 0.0;
matthiasm@50 521 tempchordvalue = pow(1.3,tempchordvalue);
Chris@23 522 sumchordvalue+=tempchordvalue;
Chris@23 523 currentChordSalience.push_back(tempchordvalue);
Chris@23 524 }
Chris@23 525 if (sumchordvalue > 0) {
Chris@23 526 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 527 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 528 }
Chris@23 529 } else {
Chris@23 530 currentChordSalience[nChord-1] = 1.0;
Chris@23 531 }
Chris@23 532 chordogram.push_back(currentChordSalience);
matthiasm@1 533
Chris@23 534 count++;
Chris@23 535 }
Chris@23 536 cerr << "done." << endl;
matthiasm@13 537
matthiasm@86 538 vector<Feature> oldnotes;
matthiasm@10 539
matthiasm@50 540 // bool m_useHMM = true; // this will go into the chordino header file.
matthiasm@50 541 if (m_useHMM == 1.0) {
matthiasm@44 542 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
matthiasm@43 543 int oldchord = nChord-1;
matthiasm@48 544 double selftransprob = 0.99;
matthiasm@43 545
matthiasm@48 546 // vector<double> init = vector<double>(nChord,1.0/nChord);
matthiasm@48 547 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
matthiasm@48 548
matthiasm@50 549 double *delta;
matthiasm@50 550 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
matthiasm@50 551
matthiasm@43 552 vector<vector<double> > trans;
matthiasm@43 553 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@43 554 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
matthiasm@43 555 temp[iChord] = selftransprob;
matthiasm@43 556 trans.push_back(temp);
matthiasm@43 557 }
matthiasm@106 558 vector<double> scale;
matthiasm@106 559 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale);
matthiasm@106 560
matthiasm@48 561
matthiasm@48 562 Feature chord_feature; // chord estimate
matthiasm@48 563 chord_feature.hasTimestamp = true;
matthiasm@48 564 chord_feature.timestamp = timestamps[0];
matthiasm@48 565 chord_feature.label = m_chordnames[chordpath[0]];
mail@60 566 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@43 567
mail@60 568 chordchange[0] = 0;
Chris@91 569 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
matthiasm@43 570 // cerr << chordpath[iFrame] << endl;
matthiasm@48 571 if (chordpath[iFrame] != oldchord ) {
matthiasm@86 572 // chord
matthiasm@43 573 Feature chord_feature; // chord estimate
matthiasm@43 574 chord_feature.hasTimestamp = true;
matthiasm@43 575 chord_feature.timestamp = timestamps[iFrame];
matthiasm@43 576 chord_feature.label = m_chordnames[chordpath[iFrame]];
mail@60 577 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@43 578 oldchord = chordpath[iFrame];
matthiasm@86 579 // chord notes
Chris@91 580 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86 581 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
matthiasm@86 582 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86 583 }
matthiasm@86 584 oldnotes.clear();
Chris@91 585 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
matthiasm@86 586 Feature chordnote_feature;
matthiasm@86 587 chordnote_feature.hasTimestamp = true;
matthiasm@86 588 chordnote_feature.timestamp = timestamps[iFrame];
matthiasm@86 589 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
matthiasm@86 590 chordnote_feature.hasDuration = true;
matthiasm@86 591 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
matthiasm@86 592 oldnotes.push_back(chordnote_feature);
matthiasm@86 593 }
Chris@23 594 }
matthiasm@50 595 /* calculating simple chord change prob */
matthiasm@50 596 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@50 597 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
matthiasm@50 598 }
Chris@23 599 }
matthiasm@43 600
matthiasm@106 601 float logscale = 0;
matthiasm@106 602 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
matthiasm@106 603 logscale -= log(scale[iFrame]);
matthiasm@106 604 Feature loglikelihood;
matthiasm@106 605 loglikelihood.hasTimestamp = true;
matthiasm@106 606 loglikelihood.timestamp = timestamps[iFrame];
matthiasm@106 607 loglikelihood.values.push_back(-log(scale[iFrame]));
matthiasm@106 608 // cerr << chordchange[iFrame] << endl;
matthiasm@107 609 fsOut[m_outputLoglikelihood].push_back(loglikelihood);
matthiasm@106 610 }
matthiasm@106 611 logscale /= nFrame;
mail@111 612 // cerr << "loglik" << logscale << endl;
matthiasm@106 613
matthiasm@106 614
matthiasm@43 615 // cerr << chordpath[0] << endl;
matthiasm@43 616 } else {
matthiasm@43 617 /* Simple chord estimation
matthiasm@43 618 I just take the local chord estimates ("currentChordSalience") and average them over time, then
matthiasm@43 619 take the maximum. Very simple, don't do this at home...
matthiasm@43 620 */
matthiasm@44 621 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
matthiasm@43 622 count = 0;
matthiasm@43 623 int halfwindowlength = m_inputSampleRate / m_stepSize;
matthiasm@43 624 vector<int> chordSequence;
matthiasm@43 625 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
matthiasm@43 626 vector<int> temp = vector<int>(nChord,0);
matthiasm@43 627 scoreChordogram.push_back(temp);
matthiasm@43 628 }
matthiasm@43 629 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
matthiasm@43 630 int startIndex = count + 1;
matthiasm@43 631 int endIndex = count + 2 * halfwindowlength;
matthiasm@43 632
matthiasm@43 633 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@43 634
matthiasm@43 635 vector<int> chordCandidates;
Chris@91 636 for (int iChord = 0; iChord+1 < nChord; iChord++) {
matthiasm@43 637 // float currsum = 0;
Chris@91 638 // for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43 639 // currsum += chordogram[iFrame][iChord];
matthiasm@43 640 // }
matthiasm@43 641 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
Chris@91 642 for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43 643 if (chordogram[iFrame][iChord] > chordThreshold) {
matthiasm@43 644 chordCandidates.push_back(iChord);
matthiasm@43 645 break;
matthiasm@43 646 }
Chris@23 647 }
Chris@23 648 }
matthiasm@43 649 chordCandidates.push_back(nChord-1);
matthiasm@43 650 // cerr << chordCandidates.size() << endl;
matthiasm@43 651
matthiasm@43 652 float maxval = 0; // will be the value of the most salient *chord change* in this frame
matthiasm@43 653 float maxindex = 0; //... and the index thereof
Chris@91 654 int bestchordL = nChord-1; // index of the best "left" chord
Chris@91 655 int bestchordR = nChord-1; // index of the best "right" chord
matthiasm@43 656
matthiasm@43 657 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
matthiasm@43 658 // now find the max values on both sides of iWF
matthiasm@43 659 // left side:
matthiasm@43 660 float maxL = 0;
Chris@91 661 int maxindL = nChord-1;
Chris@91 662 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
Chris@91 663 int iChord = chordCandidates[kChord];
matthiasm@43 664 float currsum = 0;
Chris@91 665 for (int iFrame = 0; iFrame < iWF-1; ++iFrame) {
matthiasm@43 666 currsum += chordogram[count+iFrame][iChord];
matthiasm@43 667 }
matthiasm@43 668 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43 669 if (currsum > maxL) {
matthiasm@43 670 maxL = currsum;
matthiasm@43 671 maxindL = iChord;
matthiasm@43 672 }
matthiasm@43 673 }
matthiasm@43 674 // right side:
matthiasm@43 675 float maxR = 0;
Chris@91 676 int maxindR = nChord-1;
Chris@91 677 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
Chris@91 678 int iChord = chordCandidates[kChord];
matthiasm@43 679 float currsum = 0;
Chris@91 680 for (int iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43 681 currsum += chordogram[count+iFrame][iChord];
matthiasm@43 682 }
matthiasm@43 683 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43 684 if (currsum > maxR) {
matthiasm@43 685 maxR = currsum;
matthiasm@43 686 maxindR = iChord;
matthiasm@43 687 }
matthiasm@43 688 }
matthiasm@43 689 if (maxL+maxR > maxval) {
matthiasm@43 690 maxval = maxL+maxR;
matthiasm@43 691 maxindex = iWF;
matthiasm@43 692 bestchordL = maxindL;
matthiasm@43 693 bestchordR = maxindR;
matthiasm@43 694 }
matthiasm@43 695
Chris@23 696 }
matthiasm@43 697 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
matthiasm@43 698 // add a score to every chord-frame-point that was part of a maximum
Chris@91 699 for (int iFrame = 0; iFrame < maxindex-1; ++iFrame) {
matthiasm@43 700 scoreChordogram[iFrame+count][bestchordL]++;
matthiasm@43 701 }
Chris@91 702 for (int iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43 703 scoreChordogram[iFrame+count][bestchordR]++;
matthiasm@43 704 }
matthiasm@50 705 if (bestchordL != bestchordR) {
matthiasm@50 706 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
matthiasm@50 707 }
matthiasm@43 708 count++;
Chris@23 709 }
matthiasm@43 710 // cerr << "******* agent finished *******" << endl;
matthiasm@43 711 count = 0;
matthiasm@43 712 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43 713 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@43 714 float maxindex = 0; //... and the index thereof
Chris@91 715 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@43 716 if (scoreChordogram[count][iChord] > maxval) {
matthiasm@43 717 maxval = scoreChordogram[count][iChord];
matthiasm@43 718 maxindex = iChord;
matthiasm@43 719 // cerr << iChord << endl;
matthiasm@43 720 }
matthiasm@43 721 }
matthiasm@43 722 chordSequence.push_back(maxindex);
matthiasm@43 723 count++;
Chris@23 724 }
matthiasm@43 725
matthiasm@43 726
matthiasm@43 727 // mode filter on chordSequence
matthiasm@43 728 count = 0;
matthiasm@43 729 string oldChord = "";
matthiasm@43 730 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43 731 Feature chord_feature; // chord estimate
matthiasm@43 732 chord_feature.hasTimestamp = true;
matthiasm@43 733 chord_feature.timestamp = *it;
matthiasm@43 734 // Feature currentChord; // chord estimate
matthiasm@43 735 // currentChord.hasTimestamp = true;
matthiasm@43 736 // currentChord.timestamp = currentChromas.timestamp;
matthiasm@43 737
matthiasm@43 738 vector<int> chordCount = vector<int>(nChord,0);
matthiasm@43 739 int maxChordCount = 0;
matthiasm@43 740 int maxChordIndex = nChord-1;
matthiasm@43 741 string maxChord;
matthiasm@43 742 int startIndex = max(count - halfwindowlength/2,0);
matthiasm@43 743 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
matthiasm@43 744 for (int i = startIndex; i < endIndex; i++) {
matthiasm@43 745 chordCount[chordSequence[i]]++;
matthiasm@43 746 if (chordCount[chordSequence[i]] > maxChordCount) {
matthiasm@43 747 // cerr << "start index " << startIndex << endl;
matthiasm@43 748 maxChordCount++;
matthiasm@43 749 maxChordIndex = chordSequence[i];
matthiasm@43 750 maxChord = m_chordnames[maxChordIndex];
matthiasm@43 751 }
matthiasm@43 752 }
matthiasm@43 753 // chordSequence[count] = maxChordIndex;
matthiasm@43 754 // cerr << maxChordIndex << endl;
matthiasm@50 755 // cerr << chordchange[count] << endl;
matthiasm@43 756 if (oldChord != maxChord) {
matthiasm@43 757 oldChord = maxChord;
matthiasm@43 758 chord_feature.label = m_chordnames[maxChordIndex];
mail@60 759 fsOut[m_outputChords].push_back(chord_feature);
Chris@91 760 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86 761 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp;
matthiasm@86 762 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86 763 }
matthiasm@86 764 oldnotes.clear();
Chris@91 765 for (int iNote = 0; iNote < (int)m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord
matthiasm@86 766 Feature chordnote_feature;
matthiasm@86 767 chordnote_feature.hasTimestamp = true;
matthiasm@86 768 chordnote_feature.timestamp = chord_feature.timestamp;
matthiasm@86 769 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]);
matthiasm@86 770 chordnote_feature.hasDuration = true;
matthiasm@86 771 chordnote_feature.duration = -chord_feature.timestamp; // this will be corrected at the next chord
matthiasm@86 772 oldnotes.push_back(chordnote_feature);
matthiasm@86 773 }
matthiasm@43 774 }
matthiasm@43 775 count++;
Chris@23 776 }
Chris@23 777 }
matthiasm@43 778 Feature chord_feature; // last chord estimate
matthiasm@43 779 chord_feature.hasTimestamp = true;
matthiasm@43 780 chord_feature.timestamp = timestamps[timestamps.size()-1];
matthiasm@43 781 chord_feature.label = "N";
mail@60 782 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@86 783
Chris@91 784 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86 785 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
matthiasm@86 786 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86 787 }
matthiasm@86 788
Chris@23 789 cerr << "done." << endl;
matthiasm@50 790
matthiasm@50 791 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
matthiasm@50 792 Feature chordchange_feature;
matthiasm@50 793 chordchange_feature.hasTimestamp = true;
matthiasm@50 794 chordchange_feature.timestamp = timestamps[iFrame];
matthiasm@50 795 chordchange_feature.values.push_back(chordchange[iFrame]);
mail@60 796 // cerr << chordchange[iFrame] << endl;
mail@60 797 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
matthiasm@50 798 }
matthiasm@50 799
mail@60 800 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
matthiasm@50 801
matthiasm@50 802
Chris@23 803 return fsOut;
matthiasm@0 804 }