annotate Chordino.cpp @ 106:99b87ce4bb70 monophonicness

implemented loglikelihood stuff
author matthiasm
date Sun, 19 Dec 2010 21:55:01 +0900
parents dab7e7bfeba1
children ea5d533536ab
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@35 19 #include "Chordino.h"
Chris@27 20
Chris@27 21 #include "chromamethods.h"
matthiasm@43 22 #include "viterbi.h"
Chris@27 23
Chris@27 24 #include <cstdlib>
Chris@27 25 #include <fstream>
matthiasm@0 26 #include <cmath>
matthiasm@9 27
Chris@27 28 #include <algorithm>
matthiasm@0 29
matthiasm@0 30 const bool debug_on = false;
matthiasm@0 31
Chris@35 32 Chordino::Chordino(float inputSampleRate) :
matthiasm@86 33 NNLSBase(inputSampleRate),
matthiasm@86 34 m_chorddict(0),
matthiasm@86 35 m_chordnotes(0),
matthiasm@86 36 m_chordnames(0)
matthiasm@0 37 {
Chris@35 38 if (debug_on) cerr << "--> Chordino" << endl;
matthiasm@86 39 // get the *chord* dictionary from file (if the file exists)
matthiasm@86 40
matthiasm@0 41 }
matthiasm@0 42
Chris@35 43 Chordino::~Chordino()
matthiasm@0 44 {
Chris@35 45 if (debug_on) cerr << "--> ~Chordino" << endl;
matthiasm@0 46 }
matthiasm@0 47
matthiasm@0 48 string
Chris@35 49 Chordino::getIdentifier() const
matthiasm@0 50 {
Chris@23 51 if (debug_on) cerr << "--> getIdentifier" << endl;
Chris@35 52 return "chordino";
matthiasm@0 53 }
matthiasm@0 54
matthiasm@0 55 string
Chris@35 56 Chordino::getName() const
matthiasm@0 57 {
Chris@23 58 if (debug_on) cerr << "--> getName" << endl;
Chris@35 59 return "Chordino";
matthiasm@0 60 }
matthiasm@0 61
matthiasm@0 62 string
Chris@35 63 Chordino::getDescription() const
matthiasm@0 64 {
Chris@23 65 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@58 66 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
matthiasm@0 67 }
matthiasm@0 68
matthiasm@50 69 Chordino::ParameterList
matthiasm@50 70 Chordino::getParameterDescriptors() const
matthiasm@50 71 {
matthiasm@50 72 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@50 73 ParameterList list;
matthiasm@50 74
matthiasm@50 75 ParameterDescriptor d;
matthiasm@50 76 d.identifier = "useNNLS";
matthiasm@50 77 d.name = "use approximate transcription (NNLS)";
matthiasm@50 78 d.description = "Toggles approximate transcription (NNLS).";
matthiasm@50 79 d.unit = "";
matthiasm@50 80 d.minValue = 0.0;
matthiasm@50 81 d.maxValue = 1.0;
matthiasm@50 82 d.defaultValue = 1.0;
matthiasm@50 83 d.isQuantized = true;
matthiasm@50 84 d.quantizeStep = 1.0;
matthiasm@50 85 list.push_back(d);
matthiasm@50 86
matthiasm@50 87 ParameterDescriptor d4;
matthiasm@50 88 d4.identifier = "useHMM";
matthiasm@53 89 d4.name = "HMM (Viterbi decoding)";
matthiasm@50 90 d4.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
matthiasm@50 91 d4.unit = "";
matthiasm@50 92 d4.minValue = 0.0;
matthiasm@50 93 d4.maxValue = 1.0;
matthiasm@50 94 d4.defaultValue = 1.0;
matthiasm@50 95 d4.isQuantized = true;
matthiasm@50 96 d4.quantizeStep = 1.0;
matthiasm@50 97 list.push_back(d4);
matthiasm@50 98
matthiasm@50 99 ParameterDescriptor d0;
matthiasm@50 100 d0.identifier = "rollon";
matthiasm@50 101 d0.name = "spectral roll-on";
matthiasm@58 102 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
matthiasm@59 103 d0.unit = "%";
matthiasm@50 104 d0.minValue = 0;
mail@76 105 d0.maxValue = 5;
matthiasm@92 106 d0.defaultValue = 0.0;
matthiasm@50 107 d0.isQuantized = true;
mail@76 108 d0.quantizeStep = 0.5;
matthiasm@50 109 list.push_back(d0);
matthiasm@50 110
matthiasm@50 111 ParameterDescriptor d1;
matthiasm@50 112 d1.identifier = "tuningmode";
matthiasm@50 113 d1.name = "tuning mode";
matthiasm@50 114 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@50 115 d1.unit = "";
matthiasm@50 116 d1.minValue = 0;
matthiasm@50 117 d1.maxValue = 1;
matthiasm@92 118 d1.defaultValue = 0.0;
matthiasm@50 119 d1.isQuantized = true;
matthiasm@50 120 d1.valueNames.push_back("global tuning");
matthiasm@50 121 d1.valueNames.push_back("local tuning");
matthiasm@50 122 d1.quantizeStep = 1.0;
matthiasm@50 123 list.push_back(d1);
matthiasm@50 124
matthiasm@50 125 ParameterDescriptor d2;
matthiasm@50 126 d2.identifier = "whitening";
matthiasm@50 127 d2.name = "spectral whitening";
matthiasm@50 128 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
matthiasm@50 129 d2.unit = "";
matthiasm@50 130 d2.isQuantized = true;
matthiasm@50 131 d2.minValue = 0.0;
matthiasm@50 132 d2.maxValue = 1.0;
matthiasm@50 133 d2.defaultValue = 1.0;
matthiasm@50 134 d2.isQuantized = false;
matthiasm@50 135 list.push_back(d2);
matthiasm@50 136
matthiasm@50 137 ParameterDescriptor d3;
matthiasm@50 138 d3.identifier = "s";
matthiasm@50 139 d3.name = "spectral shape";
matthiasm@50 140 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
matthiasm@50 141 d3.unit = "";
matthiasm@50 142 d3.minValue = 0.5;
matthiasm@50 143 d3.maxValue = 0.9;
matthiasm@50 144 d3.defaultValue = 0.7;
matthiasm@50 145 d3.isQuantized = false;
matthiasm@50 146 list.push_back(d3);
matthiasm@50 147
mail@89 148 ParameterDescriptor boostn;
mail@89 149 boostn.identifier = "boostn";
mail@89 150 boostn.name = "boost N";
matthiasm@95 151 boostn.description = "Boost likelihood of the N (no chord) label.";
mail@89 152 boostn.unit = "";
matthiasm@95 153 boostn.minValue = 0.0;
matthiasm@95 154 boostn.maxValue = 1.0;
matthiasm@95 155 boostn.defaultValue = 0.1;
mail@89 156 boostn.isQuantized = false;
mail@89 157 list.push_back(boostn);
matthiasm@50 158
matthiasm@50 159 return list;
matthiasm@50 160 }
matthiasm@50 161
Chris@35 162 Chordino::OutputList
Chris@35 163 Chordino::getOutputDescriptors() const
matthiasm@0 164 {
Chris@23 165 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0 166 OutputList list;
matthiasm@0 167
Chris@35 168 int index = 0;
matthiasm@0 169
matthiasm@0 170 OutputDescriptor d7;
matthiasm@0 171 d7.identifier = "simplechord";
Chris@36 172 d7.name = "Chord Estimate";
matthiasm@58 173 d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
matthiasm@0 174 d7.unit = "";
matthiasm@0 175 d7.hasFixedBinCount = true;
matthiasm@0 176 d7.binCount = 0;
matthiasm@0 177 d7.hasKnownExtents = false;
matthiasm@0 178 d7.isQuantized = false;
matthiasm@0 179 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 180 d7.hasDuration = false;
matthiasm@0 181 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 182 list.push_back(d7);
Chris@35 183 m_outputChords = index++;
matthiasm@0 184
matthiasm@86 185 OutputDescriptor chordnotes;
matthiasm@86 186 chordnotes.identifier = "chordnotes";
matthiasm@86 187 chordnotes.name = "Note Representation of Chord Estimate";
matthiasm@86 188 chordnotes.description = "A simple represenation of the estimated chord with bass note (if applicable) and chord notes.";
matthiasm@86 189 chordnotes.unit = "MIDI units";
matthiasm@86 190 chordnotes.hasFixedBinCount = true;
matthiasm@86 191 chordnotes.binCount = 1;
matthiasm@86 192 chordnotes.hasKnownExtents = true;
matthiasm@86 193 chordnotes.minValue = 0;
matthiasm@86 194 chordnotes.maxValue = 127;
matthiasm@86 195 chordnotes.isQuantized = true;
matthiasm@86 196 chordnotes.quantizeStep = 1;
matthiasm@86 197 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@86 198 chordnotes.hasDuration = true;
matthiasm@86 199 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@86 200 list.push_back(chordnotes);
matthiasm@86 201 m_outputChordnotes = index++;
matthiasm@86 202
Chris@23 203 OutputDescriptor d8;
mail@60 204 d8.identifier = "harmonicchange";
Chris@36 205 d8.name = "Harmonic Change Value";
matthiasm@58 206 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
matthiasm@17 207 d8.unit = "";
matthiasm@17 208 d8.hasFixedBinCount = true;
matthiasm@17 209 d8.binCount = 1;
mail@60 210 d8.hasKnownExtents = false;
mail@60 211 // d8.minValue = 0.0;
mail@60 212 // d8.maxValue = 0.999;
matthiasm@17 213 d8.isQuantized = false;
matthiasm@17 214 d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@17 215 d8.hasDuration = false;
matthiasm@17 216 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@17 217 list.push_back(d8);
Chris@35 218 m_outputHarmonicChange = index++;
matthiasm@1 219
matthiasm@106 220 OutputDescriptor meanloglikelihood;
matthiasm@106 221 meanloglikelihood.identifier = "meanloglikelihood";
matthiasm@106 222 meanloglikelihood.name = "chord estimate mean log-likelihood";
matthiasm@106 223 meanloglikelihood.description = ".";
matthiasm@106 224 meanloglikelihood.unit = "";
matthiasm@106 225 meanloglikelihood.hasFixedBinCount = true;
matthiasm@106 226 meanloglikelihood.binCount = 1;
matthiasm@106 227 meanloglikelihood.hasKnownExtents = false;
matthiasm@106 228 meanloglikelihood.isQuantized = false;
matthiasm@106 229 meanloglikelihood.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@106 230 meanloglikelihood.hasDuration = false;
matthiasm@106 231 // meanloglikelihood.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@106 232 list.push_back(meanloglikelihood);
matthiasm@106 233 m_outputMeanloglikelihood = index++;
matthiasm@106 234
matthiasm@0 235 return list;
matthiasm@0 236 }
matthiasm@0 237
matthiasm@0 238 bool
Chris@35 239 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 240 {
Chris@23 241 if (debug_on) {
Chris@23 242 cerr << "--> initialise";
Chris@23 243 }
mail@76 244
Chris@35 245 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
Chris@35 246 return false;
Chris@35 247 }
mail@89 248 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN);
matthiasm@0 249 return true;
matthiasm@0 250 }
matthiasm@0 251
matthiasm@0 252 void
Chris@35 253 Chordino::reset()
matthiasm@0 254 {
Chris@23 255 if (debug_on) cerr << "--> reset";
Chris@35 256 NNLSBase::reset();
matthiasm@0 257 }
matthiasm@0 258
Chris@35 259 Chordino::FeatureSet
Chris@35 260 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 261 {
Chris@23 262 if (debug_on) cerr << "--> process" << endl;
matthiasm@0 263
Chris@35 264 NNLSBase::baseProcess(inputBuffers, timestamp);
matthiasm@0 265
Chris@35 266 return FeatureSet();
matthiasm@0 267 }
matthiasm@0 268
Chris@35 269 Chordino::FeatureSet
Chris@35 270 Chordino::getRemainingFeatures()
matthiasm@0 271 {
mail@89 272 // cerr << hw[0] << hw[1] << endl;
mail@89 273 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 274 FeatureSet fsOut;
Chris@35 275 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23 276 int nChord = m_chordnames.size();
Chris@23 277 //
Chris@23 278 /** Calculate Tuning
Chris@23 279 calculate tuning from (using the angle of the complex number defined by the
Chris@23 280 cumulative mean real and imag values)
Chris@23 281 **/
mail@80 282 float meanTuningImag = 0;
mail@80 283 float meanTuningReal = 0;
mail@80 284 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
mail@80 285 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
mail@80 286 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
mail@80 287 }
Chris@23 288 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 289 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 290 int intShift = floor(normalisedtuning * 3);
mail@80 291 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
matthiasm@1 292
Chris@23 293 char buffer0 [50];
matthiasm@1 294
Chris@23 295 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 296
matthiasm@1 297
Chris@23 298 /** Tune Log-Frequency Spectrogram
matthiasm@43 299 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
Chris@91 300 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
Chris@23 301 **/
Chris@35 302 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 303
Chris@23 304 float tempValue = 0;
Chris@23 305 float dbThreshold = 0; // relative to the background spectrum
Chris@23 306 float thresh = pow(10,dbThreshold/20);
Chris@23 307 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23 308 int count = 0;
matthiasm@1 309
Chris@35 310 FeatureList tunedSpec;
matthiasm@43 311 int nFrame = m_logSpectrum.size();
matthiasm@43 312
matthiasm@43 313 vector<Vamp::RealTime> timestamps;
Chris@35 314
Chris@35 315 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@91 316 Feature currentLogSpectrum = *i;
matthiasm@43 317 Feature currentTunedSpec; // tuned log-frequency spectrum
matthiasm@43 318 currentTunedSpec.hasTimestamp = true;
Chris@91 319 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
Chris@91 320 timestamps.push_back(currentLogSpectrum.timestamp);
matthiasm@43 321 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
matthiasm@1 322
Chris@23 323 if (m_tuneLocal) {
Chris@23 324 intShift = floor(m_localTuning[count] * 3);
mail@80 325 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
Chris@23 326 }
matthiasm@1 327
mail@80 328 // cerr << intShift << " " << floatShift << endl;
matthiasm@1 329
Chris@91 330 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
Chris@91 331 tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
matthiasm@43 332 currentTunedSpec.values.push_back(tempValue);
Chris@23 333 }
matthiasm@1 334
matthiasm@43 335 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
matthiasm@43 336 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
Chris@23 337 vector<float> runningstd;
mail@77 338 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
matthiasm@43 339 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
Chris@23 340 }
Chris@23 341 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
mail@77 342 for (int i = 0; i < nNote; i++) {
Chris@23 343 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 344 if (runningstd[i] > 0) {
matthiasm@43 345 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
matthiasm@43 346 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
matthiasm@43 347 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
matthiasm@43 348 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 349 }
matthiasm@43 350 if (currentTunedSpec.values[i] < 0) {
Chris@23 351 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 352 }
Chris@23 353 }
matthiasm@43 354 tunedSpec.push_back(currentTunedSpec);
Chris@23 355 count++;
Chris@23 356 }
Chris@23 357 cerr << "done." << endl;
matthiasm@1 358
Chris@23 359 /** Semitone spectrum and chromagrams
Chris@23 360 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 361 is inferred using a non-negative least squares algorithm.
Chris@23 362 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 363 bass and treble stacked onto each other).
Chris@23 364 **/
matthiasm@42 365 if (m_useNNLS == 0) {
Chris@35 366 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 367 } else {
Chris@35 368 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 369 }
matthiasm@13 370
matthiasm@1 371
matthiasm@43 372 vector<vector<double> > chordogram;
Chris@23 373 vector<vector<int> > scoreChordogram;
Chris@35 374 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
Chris@23 375 count = 0;
matthiasm@9 376
Chris@35 377 FeatureList chromaList;
matthiasm@43 378
matthiasm@43 379
Chris@35 380
Chris@35 381 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
matthiasm@43 382 Feature currentTunedSpec = *it; // logfreq spectrum
matthiasm@43 383 Feature currentChromas; // treble and bass chromagram
Chris@35 384
matthiasm@43 385 currentChromas.hasTimestamp = true;
matthiasm@43 386 currentChromas.timestamp = currentTunedSpec.timestamp;
Chris@35 387
mail@77 388 float b[nNote];
matthiasm@1 389
Chris@23 390 bool some_b_greater_zero = false;
Chris@23 391 float sumb = 0;
mail@77 392 for (int i = 0; i < nNote; i++) {
mail@77 393 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
matthiasm@43 394 b[i] = currentTunedSpec.values[i];
Chris@23 395 sumb += b[i];
Chris@23 396 if (b[i] > 0) {
Chris@23 397 some_b_greater_zero = true;
Chris@23 398 }
Chris@23 399 }
matthiasm@1 400
Chris@23 401 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 402
Chris@23 403 vector<float> chroma = vector<float>(12, 0);
Chris@23 404 vector<float> basschroma = vector<float>(12, 0);
Chris@23 405 float currval;
Chris@91 406 int iSemitone = 0;
matthiasm@1 407
Chris@23 408 if (some_b_greater_zero) {
matthiasm@42 409 if (m_useNNLS == 0) {
Chris@91 410 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23 411 currval = 0;
mail@81 412 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81 413 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
mail@81 414 }
Chris@23 415 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 416 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 417 iSemitone++;
Chris@23 418 }
matthiasm@1 419
Chris@23 420 } else {
Chris@35 421 float x[84+1000];
Chris@23 422 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 423 vector<int> signifIndex;
Chris@23 424 int index=0;
Chris@23 425 sumb /= 84.0;
Chris@91 426 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23 427 float currval = 0;
mail@81 428 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81 429 currval += b[iNote + iBPS];
mail@81 430 }
Chris@23 431 if (currval > 0) signifIndex.push_back(index);
Chris@23 432 index++;
Chris@23 433 }
Chris@35 434 float rnorm;
Chris@35 435 float w[84+1000];
Chris@35 436 float zz[84+1000];
Chris@23 437 int indx[84+1000];
Chris@23 438 int mode;
mail@77 439 int dictsize = nNote*signifIndex.size();
mail@81 440 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@35 441 float *curr_dict = new float[dictsize];
Chris@91 442 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@91 443 for (int iBin = 0; iBin < nNote; iBin++) {
mail@77 444 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
Chris@23 445 }
Chris@23 446 }
Chris@35 447 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 448 delete [] curr_dict;
Chris@91 449 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@23 450 // cerr << mode << endl;
Chris@23 451 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 452 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 453 }
Chris@23 454 }
Chris@23 455 }
Chris@35 456
Chris@35 457 vector<float> origchroma = chroma;
Chris@23 458 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@43 459 currentChromas.values = chroma;
Chris@35 460
Chris@23 461 if (m_doNormalizeChroma > 0) {
Chris@23 462 vector<float> chromanorm = vector<float>(3,0);
Chris@23 463 switch (int(m_doNormalizeChroma)) {
Chris@23 464 case 0: // should never end up here
Chris@23 465 break;
Chris@23 466 case 1:
Chris@35 467 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
Chris@35 468 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
Chris@23 469 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 470 break;
Chris@23 471 case 2:
Chris@35 472 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23 473 chromanorm[2] += *it;
Chris@23 474 }
Chris@23 475 break;
Chris@23 476 case 3:
Chris@35 477 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23 478 chromanorm[2] += pow(*it,2);
Chris@23 479 }
Chris@23 480 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 481 break;
Chris@23 482 }
Chris@23 483 if (chromanorm[2] > 0) {
Chris@91 484 for (int i = 0; i < (int)chroma.size(); i++) {
matthiasm@43 485 currentChromas.values[i] /= chromanorm[2];
Chris@23 486 }
Chris@23 487 }
Chris@23 488 }
Chris@35 489
matthiasm@43 490 chromaList.push_back(currentChromas);
Chris@35 491
Chris@23 492 // local chord estimation
matthiasm@43 493 vector<double> currentChordSalience;
matthiasm@43 494 double tempchordvalue = 0;
matthiasm@43 495 double sumchordvalue = 0;
matthiasm@9 496
Chris@23 497 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 498 tempchordvalue = 0;
Chris@23 499 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@44 500 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 501 }
Chris@23 502 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 503 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 504 }
matthiasm@48 505 if (iChord == nChord-1) tempchordvalue *= .7;
matthiasm@48 506 if (tempchordvalue < 0) tempchordvalue = 0.0;
matthiasm@50 507 tempchordvalue = pow(1.3,tempchordvalue);
Chris@23 508 sumchordvalue+=tempchordvalue;
Chris@23 509 currentChordSalience.push_back(tempchordvalue);
Chris@23 510 }
Chris@23 511 if (sumchordvalue > 0) {
Chris@23 512 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 513 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 514 }
Chris@23 515 } else {
Chris@23 516 currentChordSalience[nChord-1] = 1.0;
Chris@23 517 }
Chris@23 518 chordogram.push_back(currentChordSalience);
matthiasm@1 519
Chris@23 520 count++;
Chris@23 521 }
Chris@23 522 cerr << "done." << endl;
matthiasm@13 523
matthiasm@86 524 vector<Feature> oldnotes;
matthiasm@10 525
matthiasm@50 526 // bool m_useHMM = true; // this will go into the chordino header file.
matthiasm@50 527 if (m_useHMM == 1.0) {
matthiasm@44 528 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
matthiasm@43 529 int oldchord = nChord-1;
matthiasm@48 530 double selftransprob = 0.99;
matthiasm@43 531
matthiasm@48 532 // vector<double> init = vector<double>(nChord,1.0/nChord);
matthiasm@48 533 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
matthiasm@48 534
matthiasm@50 535 double *delta;
matthiasm@50 536 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
matthiasm@50 537
matthiasm@43 538 vector<vector<double> > trans;
matthiasm@43 539 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@43 540 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
matthiasm@43 541 temp[iChord] = selftransprob;
matthiasm@43 542 trans.push_back(temp);
matthiasm@43 543 }
matthiasm@106 544 vector<double> scale;
matthiasm@106 545 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale);
matthiasm@106 546
matthiasm@48 547
matthiasm@48 548 Feature chord_feature; // chord estimate
matthiasm@48 549 chord_feature.hasTimestamp = true;
matthiasm@48 550 chord_feature.timestamp = timestamps[0];
matthiasm@48 551 chord_feature.label = m_chordnames[chordpath[0]];
mail@60 552 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@43 553
mail@60 554 chordchange[0] = 0;
Chris@91 555 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
matthiasm@43 556 // cerr << chordpath[iFrame] << endl;
matthiasm@48 557 if (chordpath[iFrame] != oldchord ) {
matthiasm@86 558 // chord
matthiasm@43 559 Feature chord_feature; // chord estimate
matthiasm@43 560 chord_feature.hasTimestamp = true;
matthiasm@43 561 chord_feature.timestamp = timestamps[iFrame];
matthiasm@43 562 chord_feature.label = m_chordnames[chordpath[iFrame]];
mail@60 563 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@43 564 oldchord = chordpath[iFrame];
matthiasm@86 565 // chord notes
Chris@91 566 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86 567 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
matthiasm@86 568 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86 569 }
matthiasm@86 570 oldnotes.clear();
Chris@91 571 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
matthiasm@86 572 Feature chordnote_feature;
matthiasm@86 573 chordnote_feature.hasTimestamp = true;
matthiasm@86 574 chordnote_feature.timestamp = timestamps[iFrame];
matthiasm@86 575 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
matthiasm@86 576 chordnote_feature.hasDuration = true;
matthiasm@86 577 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
matthiasm@86 578 oldnotes.push_back(chordnote_feature);
matthiasm@86 579 }
Chris@23 580 }
matthiasm@50 581 /* calculating simple chord change prob */
matthiasm@50 582 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@50 583 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
matthiasm@50 584 }
Chris@23 585 }
matthiasm@43 586
matthiasm@106 587 float logscale = 0;
matthiasm@106 588 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
matthiasm@106 589 logscale -= log(scale[iFrame]);
matthiasm@106 590 Feature loglikelihood;
matthiasm@106 591 loglikelihood.hasTimestamp = true;
matthiasm@106 592 loglikelihood.timestamp = timestamps[iFrame];
matthiasm@106 593 loglikelihood.values.push_back(-log(scale[iFrame]));
matthiasm@106 594 // cerr << chordchange[iFrame] << endl;
matthiasm@106 595 fsOut[m_outputMeanloglikelihood].push_back(loglikelihood);
matthiasm@106 596 }
matthiasm@106 597 logscale /= nFrame;
matthiasm@106 598 cerr << "loglik" << logscale << endl;
matthiasm@106 599
matthiasm@106 600
matthiasm@43 601 // cerr << chordpath[0] << endl;
matthiasm@43 602 } else {
matthiasm@43 603 /* Simple chord estimation
matthiasm@43 604 I just take the local chord estimates ("currentChordSalience") and average them over time, then
matthiasm@43 605 take the maximum. Very simple, don't do this at home...
matthiasm@43 606 */
matthiasm@44 607 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
matthiasm@43 608 count = 0;
matthiasm@43 609 int halfwindowlength = m_inputSampleRate / m_stepSize;
matthiasm@43 610 vector<int> chordSequence;
matthiasm@43 611 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
matthiasm@43 612 vector<int> temp = vector<int>(nChord,0);
matthiasm@43 613 scoreChordogram.push_back(temp);
matthiasm@43 614 }
matthiasm@43 615 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
matthiasm@43 616 int startIndex = count + 1;
matthiasm@43 617 int endIndex = count + 2 * halfwindowlength;
matthiasm@43 618
matthiasm@43 619 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@43 620
matthiasm@43 621 vector<int> chordCandidates;
Chris@91 622 for (int iChord = 0; iChord+1 < nChord; iChord++) {
matthiasm@43 623 // float currsum = 0;
Chris@91 624 // for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43 625 // currsum += chordogram[iFrame][iChord];
matthiasm@43 626 // }
matthiasm@43 627 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
Chris@91 628 for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43 629 if (chordogram[iFrame][iChord] > chordThreshold) {
matthiasm@43 630 chordCandidates.push_back(iChord);
matthiasm@43 631 break;
matthiasm@43 632 }
Chris@23 633 }
Chris@23 634 }
matthiasm@43 635 chordCandidates.push_back(nChord-1);
matthiasm@43 636 // cerr << chordCandidates.size() << endl;
matthiasm@43 637
matthiasm@43 638 float maxval = 0; // will be the value of the most salient *chord change* in this frame
matthiasm@43 639 float maxindex = 0; //... and the index thereof
Chris@91 640 int bestchordL = nChord-1; // index of the best "left" chord
Chris@91 641 int bestchordR = nChord-1; // index of the best "right" chord
matthiasm@43 642
matthiasm@43 643 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
matthiasm@43 644 // now find the max values on both sides of iWF
matthiasm@43 645 // left side:
matthiasm@43 646 float maxL = 0;
Chris@91 647 int maxindL = nChord-1;
Chris@91 648 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
Chris@91 649 int iChord = chordCandidates[kChord];
matthiasm@43 650 float currsum = 0;
Chris@91 651 for (int iFrame = 0; iFrame < iWF-1; ++iFrame) {
matthiasm@43 652 currsum += chordogram[count+iFrame][iChord];
matthiasm@43 653 }
matthiasm@43 654 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43 655 if (currsum > maxL) {
matthiasm@43 656 maxL = currsum;
matthiasm@43 657 maxindL = iChord;
matthiasm@43 658 }
matthiasm@43 659 }
matthiasm@43 660 // right side:
matthiasm@43 661 float maxR = 0;
Chris@91 662 int maxindR = nChord-1;
Chris@91 663 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
Chris@91 664 int iChord = chordCandidates[kChord];
matthiasm@43 665 float currsum = 0;
Chris@91 666 for (int iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43 667 currsum += chordogram[count+iFrame][iChord];
matthiasm@43 668 }
matthiasm@43 669 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43 670 if (currsum > maxR) {
matthiasm@43 671 maxR = currsum;
matthiasm@43 672 maxindR = iChord;
matthiasm@43 673 }
matthiasm@43 674 }
matthiasm@43 675 if (maxL+maxR > maxval) {
matthiasm@43 676 maxval = maxL+maxR;
matthiasm@43 677 maxindex = iWF;
matthiasm@43 678 bestchordL = maxindL;
matthiasm@43 679 bestchordR = maxindR;
matthiasm@43 680 }
matthiasm@43 681
Chris@23 682 }
matthiasm@43 683 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
matthiasm@43 684 // add a score to every chord-frame-point that was part of a maximum
Chris@91 685 for (int iFrame = 0; iFrame < maxindex-1; ++iFrame) {
matthiasm@43 686 scoreChordogram[iFrame+count][bestchordL]++;
matthiasm@43 687 }
Chris@91 688 for (int iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43 689 scoreChordogram[iFrame+count][bestchordR]++;
matthiasm@43 690 }
matthiasm@50 691 if (bestchordL != bestchordR) {
matthiasm@50 692 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
matthiasm@50 693 }
matthiasm@43 694 count++;
Chris@23 695 }
matthiasm@43 696 // cerr << "******* agent finished *******" << endl;
matthiasm@43 697 count = 0;
matthiasm@43 698 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43 699 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@43 700 float maxindex = 0; //... and the index thereof
Chris@91 701 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@43 702 if (scoreChordogram[count][iChord] > maxval) {
matthiasm@43 703 maxval = scoreChordogram[count][iChord];
matthiasm@43 704 maxindex = iChord;
matthiasm@43 705 // cerr << iChord << endl;
matthiasm@43 706 }
matthiasm@43 707 }
matthiasm@43 708 chordSequence.push_back(maxindex);
matthiasm@43 709 count++;
Chris@23 710 }
matthiasm@43 711
matthiasm@43 712
matthiasm@43 713 // mode filter on chordSequence
matthiasm@43 714 count = 0;
matthiasm@43 715 string oldChord = "";
matthiasm@43 716 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43 717 Feature chord_feature; // chord estimate
matthiasm@43 718 chord_feature.hasTimestamp = true;
matthiasm@43 719 chord_feature.timestamp = *it;
matthiasm@43 720 // Feature currentChord; // chord estimate
matthiasm@43 721 // currentChord.hasTimestamp = true;
matthiasm@43 722 // currentChord.timestamp = currentChromas.timestamp;
matthiasm@43 723
matthiasm@43 724 vector<int> chordCount = vector<int>(nChord,0);
matthiasm@43 725 int maxChordCount = 0;
matthiasm@43 726 int maxChordIndex = nChord-1;
matthiasm@43 727 string maxChord;
matthiasm@43 728 int startIndex = max(count - halfwindowlength/2,0);
matthiasm@43 729 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
matthiasm@43 730 for (int i = startIndex; i < endIndex; i++) {
matthiasm@43 731 chordCount[chordSequence[i]]++;
matthiasm@43 732 if (chordCount[chordSequence[i]] > maxChordCount) {
matthiasm@43 733 // cerr << "start index " << startIndex << endl;
matthiasm@43 734 maxChordCount++;
matthiasm@43 735 maxChordIndex = chordSequence[i];
matthiasm@43 736 maxChord = m_chordnames[maxChordIndex];
matthiasm@43 737 }
matthiasm@43 738 }
matthiasm@43 739 // chordSequence[count] = maxChordIndex;
matthiasm@43 740 // cerr << maxChordIndex << endl;
matthiasm@50 741 // cerr << chordchange[count] << endl;
matthiasm@43 742 if (oldChord != maxChord) {
matthiasm@43 743 oldChord = maxChord;
matthiasm@43 744 chord_feature.label = m_chordnames[maxChordIndex];
mail@60 745 fsOut[m_outputChords].push_back(chord_feature);
Chris@91 746 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86 747 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp;
matthiasm@86 748 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86 749 }
matthiasm@86 750 oldnotes.clear();
Chris@91 751 for (int iNote = 0; iNote < (int)m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord
matthiasm@86 752 Feature chordnote_feature;
matthiasm@86 753 chordnote_feature.hasTimestamp = true;
matthiasm@86 754 chordnote_feature.timestamp = chord_feature.timestamp;
matthiasm@86 755 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]);
matthiasm@86 756 chordnote_feature.hasDuration = true;
matthiasm@86 757 chordnote_feature.duration = -chord_feature.timestamp; // this will be corrected at the next chord
matthiasm@86 758 oldnotes.push_back(chordnote_feature);
matthiasm@86 759 }
matthiasm@43 760 }
matthiasm@43 761 count++;
Chris@23 762 }
Chris@23 763 }
matthiasm@43 764 Feature chord_feature; // last chord estimate
matthiasm@43 765 chord_feature.hasTimestamp = true;
matthiasm@43 766 chord_feature.timestamp = timestamps[timestamps.size()-1];
matthiasm@43 767 chord_feature.label = "N";
mail@60 768 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@86 769
Chris@91 770 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86 771 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
matthiasm@86 772 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86 773 }
matthiasm@86 774
Chris@23 775 cerr << "done." << endl;
matthiasm@50 776
matthiasm@50 777 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
matthiasm@50 778 Feature chordchange_feature;
matthiasm@50 779 chordchange_feature.hasTimestamp = true;
matthiasm@50 780 chordchange_feature.timestamp = timestamps[iFrame];
matthiasm@50 781 chordchange_feature.values.push_back(chordchange[iFrame]);
mail@60 782 // cerr << chordchange[iFrame] << endl;
mail@60 783 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
matthiasm@50 784 }
matthiasm@50 785
mail@60 786 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
matthiasm@50 787
matthiasm@50 788
Chris@23 789 return fsOut;
matthiasm@0 790 }