annotate Chordino.cpp @ 184:82d5d11b68d7 tip

Update library URI so it's not document-local
author Chris Cannam
date Wed, 22 Apr 2020 14:21:25 +0100
parents d95c4cdef8af
children
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@35 19 #include "Chordino.h"
Chris@27 20
Chris@27 21 #include "chromamethods.h"
matthiasm@43 22 #include "viterbi.h"
Chris@27 23
Chris@27 24 #include <cstdlib>
Chris@27 25 #include <fstream>
matthiasm@0 26 #include <cmath>
matthiasm@9 27
Chris@27 28 #include <algorithm>
matthiasm@0 29
matthiasm@0 30 const bool debug_on = false;
matthiasm@0 31
Chris@35 32 Chordino::Chordino(float inputSampleRate) :
matthiasm@86 33 NNLSBase(inputSampleRate),
matthiasm@86 34 m_chorddict(0),
matthiasm@86 35 m_chordnotes(0),
matthiasm@86 36 m_chordnames(0)
matthiasm@0 37 {
Chris@35 38 if (debug_on) cerr << "--> Chordino" << endl;
matthiasm@0 39 }
matthiasm@0 40
Chris@35 41 Chordino::~Chordino()
matthiasm@0 42 {
Chris@35 43 if (debug_on) cerr << "--> ~Chordino" << endl;
matthiasm@0 44 }
matthiasm@0 45
matthiasm@0 46 string
Chris@35 47 Chordino::getIdentifier() const
matthiasm@0 48 {
Chris@23 49 if (debug_on) cerr << "--> getIdentifier" << endl;
Chris@35 50 return "chordino";
matthiasm@0 51 }
matthiasm@0 52
matthiasm@0 53 string
Chris@35 54 Chordino::getName() const
matthiasm@0 55 {
Chris@23 56 if (debug_on) cerr << "--> getName" << endl;
Chris@35 57 return "Chordino";
matthiasm@0 58 }
matthiasm@0 59
matthiasm@0 60 string
Chris@35 61 Chordino::getDescription() const
matthiasm@0 62 {
Chris@23 63 if (debug_on) cerr << "--> getDescription" << endl;
Chris@149 64 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. A simple (non-state-of-the-art!) algorithm smooths these to provide a chord transcription using a standard HMM/Viterbi approach.";
matthiasm@0 65 }
matthiasm@0 66
matthiasm@50 67 Chordino::ParameterList
matthiasm@50 68 Chordino::getParameterDescriptors() const
matthiasm@50 69 {
matthiasm@50 70 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@50 71 ParameterList list;
matthiasm@50 72
mail@118 73 ParameterDescriptor useNNLSParam;
mail@118 74 useNNLSParam.identifier = "useNNLS";
mail@118 75 useNNLSParam.name = "use approximate transcription (NNLS)";
mail@118 76 useNNLSParam.description = "Toggles approximate transcription (NNLS).";
mail@118 77 useNNLSParam.unit = "";
mail@118 78 useNNLSParam.minValue = 0.0;
mail@118 79 useNNLSParam.maxValue = 1.0;
mail@118 80 useNNLSParam.defaultValue = 1.0;
mail@118 81 useNNLSParam.isQuantized = true;
mail@118 82 useNNLSParam.quantizeStep = 1.0;
mail@118 83 list.push_back(useNNLSParam);
matthiasm@50 84
mail@118 85 ParameterDescriptor rollonParam;
mail@118 86 rollonParam.identifier = "rollon";
mail@118 87 rollonParam.name = "bass noise threshold";
mail@118 88 rollonParam.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed.";
mail@118 89 rollonParam.unit = "%";
mail@118 90 rollonParam.minValue = 0;
mail@118 91 rollonParam.maxValue = 5;
mail@118 92 rollonParam.defaultValue = 0.0;
mail@118 93 rollonParam.isQuantized = true;
mail@118 94 rollonParam.quantizeStep = 0.5;
mail@118 95 list.push_back(rollonParam);
matthiasm@50 96
mail@118 97 ParameterDescriptor tuningmodeParam;
mail@118 98 tuningmodeParam.identifier = "tuningmode";
mail@118 99 tuningmodeParam.name = "tuning mode";
mail@118 100 tuningmodeParam.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
mail@118 101 tuningmodeParam.unit = "";
mail@118 102 tuningmodeParam.minValue = 0;
mail@118 103 tuningmodeParam.maxValue = 1;
mail@118 104 tuningmodeParam.defaultValue = 0.0;
mail@118 105 tuningmodeParam.isQuantized = true;
mail@118 106 tuningmodeParam.valueNames.push_back("global tuning");
mail@118 107 tuningmodeParam.valueNames.push_back("local tuning");
mail@118 108 tuningmodeParam.quantizeStep = 1.0;
mail@118 109 list.push_back(tuningmodeParam);
matthiasm@50 110
mail@118 111 ParameterDescriptor whiteningParam;
mail@118 112 whiteningParam.identifier = "whitening";
mail@118 113 whiteningParam.name = "spectral whitening";
mail@118 114 whiteningParam.description = "Spectral whitening: no whitening - 0; whitening - 1.";
mail@118 115 whiteningParam.unit = "";
mail@118 116 whiteningParam.isQuantized = true;
mail@118 117 whiteningParam.minValue = 0.0;
mail@118 118 whiteningParam.maxValue = 1.0;
mail@118 119 whiteningParam.defaultValue = 1.0;
mail@118 120 whiteningParam.isQuantized = false;
mail@118 121 list.push_back(whiteningParam);
matthiasm@50 122
mail@118 123 ParameterDescriptor spectralShapeParam;
Chris@164 124 spectralShapeParam.identifier = "s";
mail@118 125 spectralShapeParam.name = "spectral shape";
mail@118 126 spectralShapeParam.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
mail@118 127 spectralShapeParam.unit = "";
mail@118 128 spectralShapeParam.minValue = 0.5;
mail@118 129 spectralShapeParam.maxValue = 0.9;
mail@118 130 spectralShapeParam.defaultValue = 0.7;
mail@118 131 spectralShapeParam.isQuantized = false;
mail@118 132 list.push_back(spectralShapeParam);
matthiasm@50 133
mail@118 134 ParameterDescriptor boostnParam;
mail@118 135 boostnParam.identifier = "boostn";
mail@118 136 boostnParam.name = "boost N";
mail@118 137 boostnParam.description = "Boost likelihood of the N (no chord) label.";
mail@118 138 boostnParam.unit = "";
mail@118 139 boostnParam.minValue = 0.0;
mail@118 140 boostnParam.maxValue = 1.0;
mail@118 141 boostnParam.defaultValue = 0.1;
mail@118 142 boostnParam.isQuantized = false;
mail@118 143 list.push_back(boostnParam);
matthiasm@50 144
mail@118 145 ParameterDescriptor usehartesyntaxParam;
mail@118 146 usehartesyntaxParam.identifier = "usehartesyntax";
mail@118 147 usehartesyntaxParam.name = "use Harte syntax";
mail@118 148 usehartesyntaxParam.description = "Use the chord syntax proposed by Harte";
mail@118 149 usehartesyntaxParam.unit = "";
mail@118 150 usehartesyntaxParam.minValue = 0.0;
mail@118 151 usehartesyntaxParam.maxValue = 1.0;
mail@118 152 usehartesyntaxParam.defaultValue = 0.0;
mail@118 153 usehartesyntaxParam.isQuantized = true;
mail@118 154 usehartesyntaxParam.quantizeStep = 1.0;
mail@118 155 usehartesyntaxParam.valueNames.push_back("no");
mail@118 156 usehartesyntaxParam.valueNames.push_back("yes");
mail@118 157 list.push_back(usehartesyntaxParam);
mail@112 158
matthiasm@50 159 return list;
matthiasm@50 160 }
matthiasm@50 161
Chris@35 162 Chordino::OutputList
Chris@35 163 Chordino::getOutputDescriptors() const
matthiasm@0 164 {
Chris@23 165 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0 166 OutputList list;
matthiasm@0 167
Chris@35 168 int index = 0;
matthiasm@0 169
Chris@164 170 float featureRate =
Chris@164 171 (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
Chris@164 172
matthiasm@0 173 OutputDescriptor d7;
matthiasm@0 174 d7.identifier = "simplechord";
Chris@36 175 d7.name = "Chord Estimate";
matthiasm@133 176 d7.description = "Estimated chord times and labels.";
matthiasm@0 177 d7.unit = "";
matthiasm@0 178 d7.hasFixedBinCount = true;
matthiasm@0 179 d7.binCount = 0;
matthiasm@0 180 d7.hasKnownExtents = false;
matthiasm@0 181 d7.isQuantized = false;
matthiasm@0 182 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 183 d7.hasDuration = false;
Chris@164 184 d7.sampleRate = featureRate;
matthiasm@0 185 list.push_back(d7);
Chris@35 186 m_outputChords = index++;
matthiasm@0 187
matthiasm@86 188 OutputDescriptor chordnotes;
matthiasm@86 189 chordnotes.identifier = "chordnotes";
matthiasm@86 190 chordnotes.name = "Note Representation of Chord Estimate";
Chris@149 191 chordnotes.description = "A simple representation of the estimated chord with bass note (if applicable) and chord notes.";
matthiasm@86 192 chordnotes.unit = "MIDI units";
matthiasm@86 193 chordnotes.hasFixedBinCount = true;
matthiasm@86 194 chordnotes.binCount = 1;
matthiasm@86 195 chordnotes.hasKnownExtents = true;
matthiasm@86 196 chordnotes.minValue = 0;
matthiasm@86 197 chordnotes.maxValue = 127;
matthiasm@86 198 chordnotes.isQuantized = true;
matthiasm@86 199 chordnotes.quantizeStep = 1;
matthiasm@86 200 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@86 201 chordnotes.hasDuration = true;
Chris@164 202 chordnotes.sampleRate = featureRate;
matthiasm@86 203 list.push_back(chordnotes);
matthiasm@86 204 m_outputChordnotes = index++;
matthiasm@86 205
Chris@23 206 OutputDescriptor d8;
mail@60 207 d8.identifier = "harmonicchange";
Chris@36 208 d8.name = "Harmonic Change Value";
matthiasm@58 209 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
matthiasm@17 210 d8.unit = "";
matthiasm@17 211 d8.hasFixedBinCount = true;
matthiasm@17 212 d8.binCount = 1;
mail@60 213 d8.hasKnownExtents = false;
matthiasm@17 214 d8.isQuantized = false;
matthiasm@17 215 d8.sampleType = OutputDescriptor::FixedSampleRate;
Chris@164 216 d8.sampleRate = featureRate;
matthiasm@17 217 d8.hasDuration = false;
matthiasm@17 218 list.push_back(d8);
Chris@35 219 m_outputHarmonicChange = index++;
matthiasm@1 220
matthiasm@107 221 OutputDescriptor loglikelihood;
matthiasm@107 222 loglikelihood.identifier = "loglikelihood";
mail@126 223 loglikelihood.name = "Log-Likelihood of Chord Estimate";
mail@124 224 loglikelihood.description = "Logarithm of the likelihood value of the simple chord estimate.";
matthiasm@107 225 loglikelihood.unit = "";
matthiasm@107 226 loglikelihood.hasFixedBinCount = true;
matthiasm@107 227 loglikelihood.binCount = 1;
matthiasm@107 228 loglikelihood.hasKnownExtents = false;
matthiasm@107 229 loglikelihood.isQuantized = false;
matthiasm@107 230 loglikelihood.sampleType = OutputDescriptor::FixedSampleRate;
Chris@164 231 loglikelihood.sampleRate = featureRate;
matthiasm@107 232 loglikelihood.hasDuration = false;
matthiasm@107 233 list.push_back(loglikelihood);
matthiasm@107 234 m_outputLoglikelihood = index++;
matthiasm@106 235
matthiasm@0 236 return list;
matthiasm@0 237 }
matthiasm@0 238
matthiasm@0 239 bool
Chris@35 240 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 241 {
Chris@23 242 if (debug_on) {
Chris@23 243 cerr << "--> initialise";
Chris@23 244 }
mail@76 245
Chris@35 246 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
Chris@35 247 return false;
Chris@35 248 }
mail@115 249 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN, m_harte_syntax);
matthiasm@0 250 return true;
matthiasm@0 251 }
matthiasm@0 252
matthiasm@0 253 void
Chris@35 254 Chordino::reset()
matthiasm@0 255 {
Chris@23 256 if (debug_on) cerr << "--> reset";
Chris@35 257 NNLSBase::reset();
matthiasm@0 258 }
matthiasm@0 259
Chris@35 260 Chordino::FeatureSet
Chris@35 261 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 262 {
Chris@23 263 if (debug_on) cerr << "--> process" << endl;
matthiasm@0 264
Chris@35 265 NNLSBase::baseProcess(inputBuffers, timestamp);
matthiasm@0 266
Chris@35 267 return FeatureSet();
matthiasm@0 268 }
matthiasm@0 269
Chris@35 270 Chordino::FeatureSet
Chris@35 271 Chordino::getRemainingFeatures()
matthiasm@0 272 {
mail@89 273 // cerr << hw[0] << hw[1] << endl;
mail@89 274 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 275 FeatureSet fsOut;
Chris@35 276 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23 277 int nChord = m_chordnames.size();
Chris@23 278 //
Chris@23 279 /** Calculate Tuning
Chris@23 280 calculate tuning from (using the angle of the complex number defined by the
Chris@23 281 cumulative mean real and imag values)
Chris@23 282 **/
mail@80 283 float meanTuningImag = 0;
mail@80 284 float meanTuningReal = 0;
mail@80 285 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
mail@80 286 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
mail@80 287 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
mail@80 288 }
Chris@23 289 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 290 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 291 int intShift = floor(normalisedtuning * 3);
mail@80 292 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
matthiasm@1 293
Chris@23 294 char buffer0 [50];
matthiasm@1 295
Chris@23 296 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 297
matthiasm@1 298
Chris@23 299 /** Tune Log-Frequency Spectrogram
matthiasm@43 300 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
Chris@91 301 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
Chris@23 302 **/
Chris@163 303 if (debug_on) cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 304
Chris@23 305 int count = 0;
matthiasm@1 306
Chris@35 307 FeatureList tunedSpec;
matthiasm@43 308 int nFrame = m_logSpectrum.size();
matthiasm@43 309
matthiasm@43 310 vector<Vamp::RealTime> timestamps;
Chris@35 311
Chris@35 312 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@91 313 Feature currentLogSpectrum = *i;
matthiasm@43 314 Feature currentTunedSpec; // tuned log-frequency spectrum
matthiasm@43 315 currentTunedSpec.hasTimestamp = true;
Chris@91 316 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
Chris@91 317 timestamps.push_back(currentLogSpectrum.timestamp);
matthiasm@43 318 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
matthiasm@1 319
Chris@23 320 if (m_tuneLocal) {
Chris@23 321 intShift = floor(m_localTuning[count] * 3);
mail@80 322 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
Chris@23 323 }
matthiasm@1 324
mail@80 325 // cerr << intShift << " " << floatShift << endl;
matthiasm@1 326
Chris@91 327 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
mail@115 328 float tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
matthiasm@43 329 currentTunedSpec.values.push_back(tempValue);
Chris@23 330 }
matthiasm@1 331
matthiasm@43 332 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
matthiasm@43 333 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
Chris@23 334 vector<float> runningstd;
mail@77 335 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
matthiasm@43 336 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
Chris@23 337 }
Chris@23 338 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
mail@77 339 for (int i = 0; i < nNote; i++) {
Chris@23 340 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 341 if (runningstd[i] > 0) {
matthiasm@43 342 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
matthiasm@43 343 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
matthiasm@43 344 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
matthiasm@43 345 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 346 }
matthiasm@43 347 if (currentTunedSpec.values[i] < 0) {
Chris@23 348 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 349 }
Chris@23 350 }
matthiasm@43 351 tunedSpec.push_back(currentTunedSpec);
Chris@23 352 count++;
Chris@23 353 }
Chris@163 354 if (debug_on) cerr << "done." << endl;
matthiasm@1 355
Chris@23 356 /** Semitone spectrum and chromagrams
Chris@23 357 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 358 is inferred using a non-negative least squares algorithm.
Chris@23 359 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 360 bass and treble stacked onto each other).
Chris@23 361 **/
matthiasm@42 362 if (m_useNNLS == 0) {
Chris@163 363 if (debug_on) cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 364 } else {
Chris@163 365 if (debug_on) cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 366 }
matthiasm@13 367
matthiasm@1 368
matthiasm@43 369 vector<vector<double> > chordogram;
Chris@23 370 vector<vector<int> > scoreChordogram;
Chris@35 371 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
Chris@23 372 count = 0;
matthiasm@9 373
Chris@35 374 FeatureList chromaList;
matthiasm@43 375
Chris@164 376 bool clipwarned = false;
Chris@35 377
Chris@35 378 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
matthiasm@43 379 Feature currentTunedSpec = *it; // logfreq spectrum
matthiasm@43 380 Feature currentChromas; // treble and bass chromagram
Chris@35 381
matthiasm@43 382 currentChromas.hasTimestamp = true;
matthiasm@43 383 currentChromas.timestamp = currentTunedSpec.timestamp;
Chris@35 384
mail@77 385 float b[nNote];
matthiasm@1 386
Chris@23 387 bool some_b_greater_zero = false;
Chris@23 388 float sumb = 0;
mail@77 389 for (int i = 0; i < nNote; i++) {
mail@77 390 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
matthiasm@43 391 b[i] = currentTunedSpec.values[i];
Chris@23 392 sumb += b[i];
Chris@23 393 if (b[i] > 0) {
Chris@23 394 some_b_greater_zero = true;
Chris@23 395 }
Chris@23 396 }
matthiasm@1 397
Chris@23 398 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 399
Chris@23 400 vector<float> chroma = vector<float>(12, 0);
Chris@23 401 vector<float> basschroma = vector<float>(12, 0);
Chris@23 402 float currval;
Chris@91 403 int iSemitone = 0;
matthiasm@1 404
Chris@23 405 if (some_b_greater_zero) {
matthiasm@42 406 if (m_useNNLS == 0) {
Chris@91 407 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23 408 currval = 0;
mail@81 409 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81 410 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
mail@81 411 }
Chris@23 412 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 413 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 414 iSemitone++;
Chris@23 415 }
matthiasm@1 416
Chris@23 417 } else {
Chris@35 418 float x[84+1000];
Chris@23 419 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 420 vector<int> signifIndex;
Chris@23 421 int index=0;
Chris@23 422 sumb /= 84.0;
Chris@91 423 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23 424 float currval = 0;
mail@81 425 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81 426 currval += b[iNote + iBPS];
mail@81 427 }
Chris@23 428 if (currval > 0) signifIndex.push_back(index);
Chris@23 429 index++;
Chris@23 430 }
Chris@35 431 float rnorm;
Chris@35 432 float w[84+1000];
Chris@35 433 float zz[84+1000];
Chris@23 434 int indx[84+1000];
Chris@23 435 int mode;
mail@77 436 int dictsize = nNote*signifIndex.size();
mail@81 437 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@35 438 float *curr_dict = new float[dictsize];
Chris@91 439 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@91 440 for (int iBin = 0; iBin < nNote; iBin++) {
mail@77 441 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
Chris@23 442 }
Chris@23 443 }
Chris@35 444 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 445 delete [] curr_dict;
Chris@91 446 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@23 447 // cerr << mode << endl;
Chris@23 448 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 449 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 450 }
Chris@23 451 }
Chris@23 452 }
Chris@35 453
Chris@35 454 vector<float> origchroma = chroma;
Chris@23 455 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@43 456 currentChromas.values = chroma;
Chris@164 457
Chris@23 458 if (m_doNormalizeChroma > 0) {
Chris@23 459 vector<float> chromanorm = vector<float>(3,0);
Chris@23 460 switch (int(m_doNormalizeChroma)) {
Chris@23 461 case 0: // should never end up here
Chris@23 462 break;
Chris@23 463 case 1:
Chris@35 464 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
Chris@35 465 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
Chris@23 466 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 467 break;
Chris@23 468 case 2:
Chris@35 469 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23 470 chromanorm[2] += *it;
Chris@23 471 }
Chris@23 472 break;
Chris@23 473 case 3:
Chris@35 474 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23 475 chromanorm[2] += pow(*it,2);
Chris@23 476 }
Chris@23 477 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 478 break;
Chris@23 479 }
Chris@23 480 if (chromanorm[2] > 0) {
Chris@91 481 for (int i = 0; i < (int)chroma.size(); i++) {
matthiasm@43 482 currentChromas.values[i] /= chromanorm[2];
Chris@23 483 }
Chris@23 484 }
Chris@23 485 }
Chris@35 486
matthiasm@43 487 chromaList.push_back(currentChromas);
Chris@35 488
Chris@23 489 // local chord estimation
matthiasm@43 490 vector<double> currentChordSalience;
matthiasm@43 491 double tempchordvalue = 0;
matthiasm@43 492 double sumchordvalue = 0;
matthiasm@9 493
Chris@23 494 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 495 tempchordvalue = 0;
Chris@23 496 for (int iBin = 0; iBin < 12; iBin++) {
Chris@164 497 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 498 }
Chris@23 499 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 500 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 501 }
matthiasm@48 502 if (iChord == nChord-1) tempchordvalue *= .7;
matthiasm@48 503 if (tempchordvalue < 0) tempchordvalue = 0.0;
Chris@168 504 if (tempchordvalue > 200.0) {
Chris@164 505 if (!clipwarned) {
Chris@164 506 cerr << "WARNING: interim chroma contains extreme chord value " << tempchordvalue << ", clipping this and any others that appear" << endl;
Chris@164 507 clipwarned = true;
Chris@164 508 }
Chris@168 509 tempchordvalue = 200.0;
Chris@164 510 }
Chris@164 511 tempchordvalue = pow(1.3, tempchordvalue);
Chris@164 512 sumchordvalue += tempchordvalue;
Chris@23 513 currentChordSalience.push_back(tempchordvalue);
Chris@23 514 }
Chris@23 515 if (sumchordvalue > 0) {
Chris@23 516 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 517 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 518 }
Chris@23 519 } else {
Chris@23 520 currentChordSalience[nChord-1] = 1.0;
Chris@23 521 }
Chris@23 522 chordogram.push_back(currentChordSalience);
matthiasm@1 523
Chris@23 524 count++;
Chris@23 525 }
Chris@163 526 if (debug_on) cerr << "done." << endl;
matthiasm@13 527
matthiasm@86 528 vector<Feature> oldnotes;
matthiasm@10 529
Chris@163 530 if (debug_on) cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
matthiasm@131 531 int oldchord = nChord-1;
matthiasm@131 532 double selftransprob = 0.99;
matthiasm@131 533
matthiasm@131 534 // vector<double> init = vector<double>(nChord,1.0/nChord);
matthiasm@131 535 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
matthiasm@131 536
matthiasm@131 537 double *delta;
matthiasm@131 538 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
matthiasm@131 539
matthiasm@131 540 vector<vector<double> > trans;
matthiasm@131 541 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@131 542 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
matthiasm@131 543 temp[iChord] = selftransprob;
matthiasm@131 544 trans.push_back(temp);
matthiasm@131 545 }
matthiasm@131 546 vector<double> scale;
matthiasm@131 547 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale);
matthiasm@131 548
matthiasm@131 549 Feature chord_feature; // chord estimate
matthiasm@131 550 chord_feature.hasTimestamp = true;
matthiasm@131 551 chord_feature.timestamp = timestamps[0];
matthiasm@131 552 chord_feature.label = m_chordnames[chordpath[0]];
matthiasm@131 553 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@131 554
matthiasm@131 555 chordchange[0] = 0;
matthiasm@131 556 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
matthiasm@131 557 if (chordpath[iFrame] != oldchord ) {
matthiasm@131 558 // chord
matthiasm@131 559 Feature chord_feature; // chord estimate
matthiasm@131 560 chord_feature.hasTimestamp = true;
matthiasm@131 561 chord_feature.timestamp = timestamps[iFrame];
matthiasm@131 562 chord_feature.label = m_chordnames[chordpath[iFrame]];
matthiasm@131 563 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@131 564 oldchord = chordpath[iFrame];
matthiasm@131 565 // chord notes
matthiasm@131 566 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@131 567 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
matthiasm@131 568 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
Chris@23 569 }
matthiasm@131 570 oldnotes.clear();
matthiasm@131 571 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
matthiasm@131 572 Feature chordnote_feature;
matthiasm@131 573 chordnote_feature.hasTimestamp = true;
matthiasm@131 574 chordnote_feature.timestamp = timestamps[iFrame];
matthiasm@131 575 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
matthiasm@131 576 chordnote_feature.hasDuration = true;
matthiasm@131 577 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
matthiasm@131 578 oldnotes.push_back(chordnote_feature);
matthiasm@50 579 }
Chris@23 580 }
matthiasm@131 581 /* calculating simple chord change prob */
matthiasm@131 582 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@164 583 double num = delta[(iFrame-1) * nChord + iChord];
Chris@164 584 double denom = delta[iFrame * nChord + iChord];
Chris@164 585 double eps = 1e-7;
Chris@164 586 if (denom < eps) denom = eps;
Chris@164 587 chordchange[iFrame-1] += num * log(num / denom + eps);
Chris@23 588 }
Chris@23 589 }
matthiasm@131 590
matthiasm@131 591 float logscale = 0;
matthiasm@131 592 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
matthiasm@131 593 logscale -= log(scale[iFrame]);
matthiasm@131 594 Feature loglikelihood;
matthiasm@131 595 loglikelihood.hasTimestamp = true;
matthiasm@131 596 loglikelihood.timestamp = timestamps[iFrame];
matthiasm@131 597 loglikelihood.values.push_back(-log(scale[iFrame]));
matthiasm@131 598 // cerr << chordchange[iFrame] << endl;
matthiasm@131 599 fsOut[m_outputLoglikelihood].push_back(loglikelihood);
matthiasm@131 600 }
matthiasm@131 601 logscale /= nFrame;
matthiasm@131 602
matthiasm@43 603 chord_feature.hasTimestamp = true;
matthiasm@43 604 chord_feature.timestamp = timestamps[timestamps.size()-1];
matthiasm@43 605 chord_feature.label = "N";
mail@60 606 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@86 607
Chris@91 608 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86 609 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
matthiasm@86 610 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86 611 }
matthiasm@86 612
Chris@163 613 if (debug_on) cerr << "done." << endl;
Chris@159 614
matthiasm@50 615 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
matthiasm@50 616 Feature chordchange_feature;
matthiasm@50 617 chordchange_feature.hasTimestamp = true;
matthiasm@50 618 chordchange_feature.timestamp = timestamps[iFrame];
matthiasm@50 619 chordchange_feature.values.push_back(chordchange[iFrame]);
Chris@164 620 // cerr << "putting value " << chordchange[iFrame] << " at time " << chordchange_feature.timestamp << endl;
mail@60 621 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
matthiasm@50 622 }
Chris@161 623
Chris@161 624 free(delta);
matthiasm@50 625
mail@60 626 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
matthiasm@50 627
Chris@23 628 return fsOut;
matthiasm@0 629 }