annotate Chordino.cpp @ 159:f01e5707b804

Typo fix
author Chris Cannam
date Fri, 05 Dec 2014 10:46:04 +0000
parents 5c1a25b3daf0
children ed3e7d4bcdaf
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@35 19 #include "Chordino.h"
Chris@27 20
Chris@27 21 #include "chromamethods.h"
matthiasm@43 22 #include "viterbi.h"
Chris@27 23
Chris@27 24 #include <cstdlib>
Chris@27 25 #include <fstream>
matthiasm@0 26 #include <cmath>
matthiasm@9 27
Chris@27 28 #include <algorithm>
matthiasm@0 29
matthiasm@0 30 const bool debug_on = false;
matthiasm@0 31
Chris@35 32 Chordino::Chordino(float inputSampleRate) :
matthiasm@86 33 NNLSBase(inputSampleRate),
matthiasm@86 34 m_chorddict(0),
matthiasm@86 35 m_chordnotes(0),
matthiasm@86 36 m_chordnames(0)
matthiasm@0 37 {
Chris@35 38 if (debug_on) cerr << "--> Chordino" << endl;
matthiasm@0 39 }
matthiasm@0 40
Chris@35 41 Chordino::~Chordino()
matthiasm@0 42 {
Chris@35 43 if (debug_on) cerr << "--> ~Chordino" << endl;
matthiasm@0 44 }
matthiasm@0 45
matthiasm@0 46 string
Chris@35 47 Chordino::getIdentifier() const
matthiasm@0 48 {
Chris@23 49 if (debug_on) cerr << "--> getIdentifier" << endl;
Chris@35 50 return "chordino";
matthiasm@0 51 }
matthiasm@0 52
matthiasm@0 53 string
Chris@35 54 Chordino::getName() const
matthiasm@0 55 {
Chris@23 56 if (debug_on) cerr << "--> getName" << endl;
Chris@35 57 return "Chordino";
matthiasm@0 58 }
matthiasm@0 59
matthiasm@0 60 string
Chris@35 61 Chordino::getDescription() const
matthiasm@0 62 {
Chris@23 63 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@133 64 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. A simple (non-state-of-the-art!) algorithm smoothes these to provide a chord transcription using a standard HMM/Viterbi approach.";
matthiasm@0 65 }
matthiasm@0 66
matthiasm@50 67 Chordino::ParameterList
matthiasm@50 68 Chordino::getParameterDescriptors() const
matthiasm@50 69 {
matthiasm@50 70 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@50 71 ParameterList list;
matthiasm@50 72
mail@118 73 ParameterDescriptor useNNLSParam;
mail@118 74 useNNLSParam.identifier = "useNNLS";
mail@118 75 useNNLSParam.name = "use approximate transcription (NNLS)";
mail@118 76 useNNLSParam.description = "Toggles approximate transcription (NNLS).";
mail@118 77 useNNLSParam.unit = "";
mail@118 78 useNNLSParam.minValue = 0.0;
mail@118 79 useNNLSParam.maxValue = 1.0;
mail@118 80 useNNLSParam.defaultValue = 1.0;
mail@118 81 useNNLSParam.isQuantized = true;
mail@118 82 useNNLSParam.quantizeStep = 1.0;
mail@118 83 list.push_back(useNNLSParam);
matthiasm@50 84
mail@118 85 ParameterDescriptor rollonParam;
mail@118 86 rollonParam.identifier = "rollon";
mail@118 87 rollonParam.name = "bass noise threshold";
mail@118 88 rollonParam.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed.";
mail@118 89 rollonParam.unit = "%";
mail@118 90 rollonParam.minValue = 0;
mail@118 91 rollonParam.maxValue = 5;
mail@118 92 rollonParam.defaultValue = 0.0;
mail@118 93 rollonParam.isQuantized = true;
mail@118 94 rollonParam.quantizeStep = 0.5;
mail@118 95 list.push_back(rollonParam);
matthiasm@50 96
mail@118 97 ParameterDescriptor tuningmodeParam;
mail@118 98 tuningmodeParam.identifier = "tuningmode";
mail@118 99 tuningmodeParam.name = "tuning mode";
mail@118 100 tuningmodeParam.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
mail@118 101 tuningmodeParam.unit = "";
mail@118 102 tuningmodeParam.minValue = 0;
mail@118 103 tuningmodeParam.maxValue = 1;
mail@118 104 tuningmodeParam.defaultValue = 0.0;
mail@118 105 tuningmodeParam.isQuantized = true;
mail@118 106 tuningmodeParam.valueNames.push_back("global tuning");
mail@118 107 tuningmodeParam.valueNames.push_back("local tuning");
mail@118 108 tuningmodeParam.quantizeStep = 1.0;
mail@118 109 list.push_back(tuningmodeParam);
matthiasm@50 110
mail@118 111 ParameterDescriptor whiteningParam;
mail@118 112 whiteningParam.identifier = "whitening";
mail@118 113 whiteningParam.name = "spectral whitening";
mail@118 114 whiteningParam.description = "Spectral whitening: no whitening - 0; whitening - 1.";
mail@118 115 whiteningParam.unit = "";
mail@118 116 whiteningParam.isQuantized = true;
mail@118 117 whiteningParam.minValue = 0.0;
mail@118 118 whiteningParam.maxValue = 1.0;
mail@118 119 whiteningParam.defaultValue = 1.0;
mail@118 120 whiteningParam.isQuantized = false;
mail@118 121 list.push_back(whiteningParam);
matthiasm@50 122
mail@118 123 ParameterDescriptor spectralShapeParam;
mail@118 124 spectralShapeParam.identifier = "spectralshape";
mail@118 125 spectralShapeParam.name = "spectral shape";
mail@118 126 spectralShapeParam.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
mail@118 127 spectralShapeParam.unit = "";
mail@118 128 spectralShapeParam.minValue = 0.5;
mail@118 129 spectralShapeParam.maxValue = 0.9;
mail@118 130 spectralShapeParam.defaultValue = 0.7;
mail@118 131 spectralShapeParam.isQuantized = false;
mail@118 132 list.push_back(spectralShapeParam);
matthiasm@50 133
mail@118 134 ParameterDescriptor boostnParam;
mail@118 135 boostnParam.identifier = "boostn";
mail@118 136 boostnParam.name = "boost N";
mail@118 137 boostnParam.description = "Boost likelihood of the N (no chord) label.";
mail@118 138 boostnParam.unit = "";
mail@118 139 boostnParam.minValue = 0.0;
mail@118 140 boostnParam.maxValue = 1.0;
mail@118 141 boostnParam.defaultValue = 0.1;
mail@118 142 boostnParam.isQuantized = false;
mail@118 143 list.push_back(boostnParam);
matthiasm@50 144
mail@118 145 ParameterDescriptor usehartesyntaxParam;
mail@118 146 usehartesyntaxParam.identifier = "usehartesyntax";
mail@118 147 usehartesyntaxParam.name = "use Harte syntax";
mail@118 148 usehartesyntaxParam.description = "Use the chord syntax proposed by Harte";
mail@118 149 usehartesyntaxParam.unit = "";
mail@118 150 usehartesyntaxParam.minValue = 0.0;
mail@118 151 usehartesyntaxParam.maxValue = 1.0;
mail@118 152 usehartesyntaxParam.defaultValue = 0.0;
mail@118 153 usehartesyntaxParam.isQuantized = true;
mail@118 154 usehartesyntaxParam.quantizeStep = 1.0;
mail@118 155 usehartesyntaxParam.valueNames.push_back("no");
mail@118 156 usehartesyntaxParam.valueNames.push_back("yes");
mail@118 157 list.push_back(usehartesyntaxParam);
mail@112 158
matthiasm@50 159 return list;
matthiasm@50 160 }
matthiasm@50 161
Chris@35 162 Chordino::OutputList
Chris@35 163 Chordino::getOutputDescriptors() const
matthiasm@0 164 {
Chris@23 165 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0 166 OutputList list;
matthiasm@0 167
Chris@35 168 int index = 0;
matthiasm@0 169
matthiasm@0 170 OutputDescriptor d7;
matthiasm@0 171 d7.identifier = "simplechord";
Chris@36 172 d7.name = "Chord Estimate";
matthiasm@133 173 d7.description = "Estimated chord times and labels.";
matthiasm@0 174 d7.unit = "";
matthiasm@0 175 d7.hasFixedBinCount = true;
matthiasm@0 176 d7.binCount = 0;
matthiasm@0 177 d7.hasKnownExtents = false;
matthiasm@0 178 d7.isQuantized = false;
matthiasm@0 179 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 180 d7.hasDuration = false;
matthiasm@0 181 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 182 list.push_back(d7);
Chris@35 183 m_outputChords = index++;
matthiasm@0 184
matthiasm@86 185 OutputDescriptor chordnotes;
matthiasm@86 186 chordnotes.identifier = "chordnotes";
matthiasm@86 187 chordnotes.name = "Note Representation of Chord Estimate";
Chris@159 188 chordnotes.description = "A simple representation of the estimated chord with bass note (if applicable) and chord notes.";
matthiasm@86 189 chordnotes.unit = "MIDI units";
matthiasm@86 190 chordnotes.hasFixedBinCount = true;
matthiasm@86 191 chordnotes.binCount = 1;
matthiasm@86 192 chordnotes.hasKnownExtents = true;
matthiasm@86 193 chordnotes.minValue = 0;
matthiasm@86 194 chordnotes.maxValue = 127;
matthiasm@86 195 chordnotes.isQuantized = true;
matthiasm@86 196 chordnotes.quantizeStep = 1;
matthiasm@86 197 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@86 198 chordnotes.hasDuration = true;
matthiasm@86 199 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@86 200 list.push_back(chordnotes);
matthiasm@86 201 m_outputChordnotes = index++;
matthiasm@86 202
Chris@23 203 OutputDescriptor d8;
mail@60 204 d8.identifier = "harmonicchange";
Chris@36 205 d8.name = "Harmonic Change Value";
matthiasm@58 206 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
matthiasm@17 207 d8.unit = "";
matthiasm@17 208 d8.hasFixedBinCount = true;
matthiasm@17 209 d8.binCount = 1;
mail@60 210 d8.hasKnownExtents = false;
matthiasm@17 211 d8.isQuantized = false;
matthiasm@17 212 d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@17 213 d8.hasDuration = false;
matthiasm@17 214 list.push_back(d8);
Chris@35 215 m_outputHarmonicChange = index++;
matthiasm@1 216
matthiasm@107 217 OutputDescriptor loglikelihood;
matthiasm@107 218 loglikelihood.identifier = "loglikelihood";
mail@126 219 loglikelihood.name = "Log-Likelihood of Chord Estimate";
mail@124 220 loglikelihood.description = "Logarithm of the likelihood value of the simple chord estimate.";
matthiasm@107 221 loglikelihood.unit = "";
matthiasm@107 222 loglikelihood.hasFixedBinCount = true;
matthiasm@107 223 loglikelihood.binCount = 1;
matthiasm@107 224 loglikelihood.hasKnownExtents = false;
matthiasm@107 225 loglikelihood.isQuantized = false;
matthiasm@107 226 loglikelihood.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@107 227 loglikelihood.hasDuration = false;
matthiasm@107 228 list.push_back(loglikelihood);
matthiasm@107 229 m_outputLoglikelihood = index++;
matthiasm@106 230
matthiasm@0 231 return list;
matthiasm@0 232 }
matthiasm@0 233
matthiasm@0 234 bool
Chris@35 235 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 236 {
Chris@23 237 if (debug_on) {
Chris@23 238 cerr << "--> initialise";
Chris@23 239 }
mail@76 240
Chris@35 241 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
Chris@35 242 return false;
Chris@35 243 }
mail@115 244 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN, m_harte_syntax);
matthiasm@0 245 return true;
matthiasm@0 246 }
matthiasm@0 247
matthiasm@0 248 void
Chris@35 249 Chordino::reset()
matthiasm@0 250 {
Chris@23 251 if (debug_on) cerr << "--> reset";
Chris@35 252 NNLSBase::reset();
matthiasm@0 253 }
matthiasm@0 254
Chris@35 255 Chordino::FeatureSet
Chris@35 256 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 257 {
Chris@23 258 if (debug_on) cerr << "--> process" << endl;
matthiasm@0 259
Chris@35 260 NNLSBase::baseProcess(inputBuffers, timestamp);
matthiasm@0 261
Chris@35 262 return FeatureSet();
matthiasm@0 263 }
matthiasm@0 264
Chris@35 265 Chordino::FeatureSet
Chris@35 266 Chordino::getRemainingFeatures()
matthiasm@0 267 {
mail@89 268 // cerr << hw[0] << hw[1] << endl;
mail@89 269 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 270 FeatureSet fsOut;
Chris@35 271 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23 272 int nChord = m_chordnames.size();
Chris@23 273 //
Chris@23 274 /** Calculate Tuning
Chris@23 275 calculate tuning from (using the angle of the complex number defined by the
Chris@23 276 cumulative mean real and imag values)
Chris@23 277 **/
mail@80 278 float meanTuningImag = 0;
mail@80 279 float meanTuningReal = 0;
mail@80 280 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
mail@80 281 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
mail@80 282 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
mail@80 283 }
Chris@23 284 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 285 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 286 int intShift = floor(normalisedtuning * 3);
mail@80 287 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
matthiasm@1 288
Chris@23 289 char buffer0 [50];
matthiasm@1 290
Chris@23 291 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 292
matthiasm@1 293
Chris@23 294 /** Tune Log-Frequency Spectrogram
matthiasm@43 295 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
Chris@91 296 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
Chris@23 297 **/
Chris@35 298 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 299
Chris@23 300 int count = 0;
matthiasm@1 301
Chris@35 302 FeatureList tunedSpec;
matthiasm@43 303 int nFrame = m_logSpectrum.size();
matthiasm@43 304
matthiasm@43 305 vector<Vamp::RealTime> timestamps;
Chris@35 306
Chris@35 307 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@91 308 Feature currentLogSpectrum = *i;
matthiasm@43 309 Feature currentTunedSpec; // tuned log-frequency spectrum
matthiasm@43 310 currentTunedSpec.hasTimestamp = true;
Chris@91 311 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
Chris@91 312 timestamps.push_back(currentLogSpectrum.timestamp);
matthiasm@43 313 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
matthiasm@1 314
Chris@23 315 if (m_tuneLocal) {
Chris@23 316 intShift = floor(m_localTuning[count] * 3);
mail@80 317 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
Chris@23 318 }
matthiasm@1 319
mail@80 320 // cerr << intShift << " " << floatShift << endl;
matthiasm@1 321
Chris@91 322 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
mail@115 323 float tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
matthiasm@43 324 currentTunedSpec.values.push_back(tempValue);
Chris@23 325 }
matthiasm@1 326
matthiasm@43 327 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
matthiasm@43 328 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
Chris@23 329 vector<float> runningstd;
mail@77 330 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
matthiasm@43 331 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
Chris@23 332 }
Chris@23 333 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
mail@77 334 for (int i = 0; i < nNote; i++) {
Chris@23 335 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 336 if (runningstd[i] > 0) {
matthiasm@43 337 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
matthiasm@43 338 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
matthiasm@43 339 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
matthiasm@43 340 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 341 }
matthiasm@43 342 if (currentTunedSpec.values[i] < 0) {
Chris@23 343 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 344 }
Chris@23 345 }
matthiasm@43 346 tunedSpec.push_back(currentTunedSpec);
Chris@23 347 count++;
Chris@23 348 }
Chris@23 349 cerr << "done." << endl;
matthiasm@1 350
Chris@23 351 /** Semitone spectrum and chromagrams
Chris@23 352 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 353 is inferred using a non-negative least squares algorithm.
Chris@23 354 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 355 bass and treble stacked onto each other).
Chris@23 356 **/
matthiasm@42 357 if (m_useNNLS == 0) {
Chris@35 358 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 359 } else {
Chris@35 360 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 361 }
matthiasm@13 362
matthiasm@1 363
matthiasm@43 364 vector<vector<double> > chordogram;
Chris@23 365 vector<vector<int> > scoreChordogram;
Chris@35 366 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
Chris@23 367 count = 0;
matthiasm@9 368
Chris@35 369 FeatureList chromaList;
matthiasm@43 370
matthiasm@43 371
Chris@35 372
Chris@35 373 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
matthiasm@43 374 Feature currentTunedSpec = *it; // logfreq spectrum
matthiasm@43 375 Feature currentChromas; // treble and bass chromagram
Chris@35 376
matthiasm@43 377 currentChromas.hasTimestamp = true;
matthiasm@43 378 currentChromas.timestamp = currentTunedSpec.timestamp;
Chris@35 379
mail@77 380 float b[nNote];
matthiasm@1 381
Chris@23 382 bool some_b_greater_zero = false;
Chris@23 383 float sumb = 0;
mail@77 384 for (int i = 0; i < nNote; i++) {
mail@77 385 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
matthiasm@43 386 b[i] = currentTunedSpec.values[i];
Chris@23 387 sumb += b[i];
Chris@23 388 if (b[i] > 0) {
Chris@23 389 some_b_greater_zero = true;
Chris@23 390 }
Chris@23 391 }
matthiasm@1 392
Chris@23 393 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 394
Chris@23 395 vector<float> chroma = vector<float>(12, 0);
Chris@23 396 vector<float> basschroma = vector<float>(12, 0);
Chris@23 397 float currval;
Chris@91 398 int iSemitone = 0;
matthiasm@1 399
Chris@23 400 if (some_b_greater_zero) {
matthiasm@42 401 if (m_useNNLS == 0) {
Chris@91 402 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23 403 currval = 0;
mail@81 404 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81 405 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
mail@81 406 }
Chris@23 407 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 408 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 409 iSemitone++;
Chris@23 410 }
matthiasm@1 411
Chris@23 412 } else {
Chris@35 413 float x[84+1000];
Chris@23 414 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 415 vector<int> signifIndex;
Chris@23 416 int index=0;
Chris@23 417 sumb /= 84.0;
Chris@91 418 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23 419 float currval = 0;
mail@81 420 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81 421 currval += b[iNote + iBPS];
mail@81 422 }
Chris@23 423 if (currval > 0) signifIndex.push_back(index);
Chris@23 424 index++;
Chris@23 425 }
Chris@35 426 float rnorm;
Chris@35 427 float w[84+1000];
Chris@35 428 float zz[84+1000];
Chris@23 429 int indx[84+1000];
Chris@23 430 int mode;
mail@77 431 int dictsize = nNote*signifIndex.size();
mail@81 432 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@35 433 float *curr_dict = new float[dictsize];
Chris@91 434 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@91 435 for (int iBin = 0; iBin < nNote; iBin++) {
mail@77 436 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
Chris@23 437 }
Chris@23 438 }
Chris@35 439 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 440 delete [] curr_dict;
Chris@91 441 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@23 442 // cerr << mode << endl;
Chris@23 443 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 444 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 445 }
Chris@23 446 }
Chris@23 447 }
Chris@35 448
Chris@35 449 vector<float> origchroma = chroma;
Chris@23 450 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@43 451 currentChromas.values = chroma;
Chris@35 452
Chris@23 453 if (m_doNormalizeChroma > 0) {
Chris@23 454 vector<float> chromanorm = vector<float>(3,0);
Chris@23 455 switch (int(m_doNormalizeChroma)) {
Chris@23 456 case 0: // should never end up here
Chris@23 457 break;
Chris@23 458 case 1:
Chris@35 459 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
Chris@35 460 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
Chris@23 461 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 462 break;
Chris@23 463 case 2:
Chris@35 464 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23 465 chromanorm[2] += *it;
Chris@23 466 }
Chris@23 467 break;
Chris@23 468 case 3:
Chris@35 469 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23 470 chromanorm[2] += pow(*it,2);
Chris@23 471 }
Chris@23 472 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 473 break;
Chris@23 474 }
Chris@23 475 if (chromanorm[2] > 0) {
Chris@91 476 for (int i = 0; i < (int)chroma.size(); i++) {
matthiasm@43 477 currentChromas.values[i] /= chromanorm[2];
Chris@23 478 }
Chris@23 479 }
Chris@23 480 }
Chris@35 481
matthiasm@43 482 chromaList.push_back(currentChromas);
Chris@35 483
Chris@23 484 // local chord estimation
matthiasm@43 485 vector<double> currentChordSalience;
matthiasm@43 486 double tempchordvalue = 0;
matthiasm@43 487 double sumchordvalue = 0;
matthiasm@9 488
Chris@23 489 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 490 tempchordvalue = 0;
Chris@23 491 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@44 492 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 493 }
Chris@23 494 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 495 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 496 }
matthiasm@48 497 if (iChord == nChord-1) tempchordvalue *= .7;
matthiasm@48 498 if (tempchordvalue < 0) tempchordvalue = 0.0;
matthiasm@50 499 tempchordvalue = pow(1.3,tempchordvalue);
Chris@23 500 sumchordvalue+=tempchordvalue;
Chris@23 501 currentChordSalience.push_back(tempchordvalue);
Chris@23 502 }
Chris@23 503 if (sumchordvalue > 0) {
Chris@23 504 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 505 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 506 }
Chris@23 507 } else {
Chris@23 508 currentChordSalience[nChord-1] = 1.0;
Chris@23 509 }
Chris@23 510 chordogram.push_back(currentChordSalience);
matthiasm@1 511
Chris@23 512 count++;
Chris@23 513 }
Chris@23 514 cerr << "done." << endl;
matthiasm@13 515
matthiasm@86 516 vector<Feature> oldnotes;
matthiasm@10 517
matthiasm@131 518 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
matthiasm@131 519 int oldchord = nChord-1;
matthiasm@131 520 double selftransprob = 0.99;
matthiasm@131 521
matthiasm@131 522 // vector<double> init = vector<double>(nChord,1.0/nChord);
matthiasm@131 523 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
matthiasm@131 524
matthiasm@131 525 double *delta;
matthiasm@131 526 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
matthiasm@131 527
matthiasm@131 528 vector<vector<double> > trans;
matthiasm@131 529 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@131 530 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
matthiasm@131 531 temp[iChord] = selftransprob;
matthiasm@131 532 trans.push_back(temp);
matthiasm@131 533 }
matthiasm@131 534 vector<double> scale;
matthiasm@131 535 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale);
matthiasm@131 536
matthiasm@131 537 Feature chord_feature; // chord estimate
matthiasm@131 538 chord_feature.hasTimestamp = true;
matthiasm@131 539 chord_feature.timestamp = timestamps[0];
matthiasm@131 540 chord_feature.label = m_chordnames[chordpath[0]];
matthiasm@131 541 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@131 542
matthiasm@131 543 chordchange[0] = 0;
matthiasm@131 544 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
matthiasm@131 545 if (chordpath[iFrame] != oldchord ) {
matthiasm@131 546 // chord
matthiasm@131 547 Feature chord_feature; // chord estimate
matthiasm@131 548 chord_feature.hasTimestamp = true;
matthiasm@131 549 chord_feature.timestamp = timestamps[iFrame];
matthiasm@131 550 chord_feature.label = m_chordnames[chordpath[iFrame]];
matthiasm@131 551 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@131 552 oldchord = chordpath[iFrame];
matthiasm@131 553 // chord notes
matthiasm@131 554 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@131 555 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
matthiasm@131 556 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
Chris@23 557 }
matthiasm@131 558 oldnotes.clear();
matthiasm@131 559 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
matthiasm@131 560 Feature chordnote_feature;
matthiasm@131 561 chordnote_feature.hasTimestamp = true;
matthiasm@131 562 chordnote_feature.timestamp = timestamps[iFrame];
matthiasm@131 563 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
matthiasm@131 564 chordnote_feature.hasDuration = true;
matthiasm@131 565 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
matthiasm@131 566 oldnotes.push_back(chordnote_feature);
matthiasm@50 567 }
Chris@23 568 }
matthiasm@131 569 /* calculating simple chord change prob */
matthiasm@131 570 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@131 571 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
Chris@23 572 }
Chris@23 573 }
matthiasm@131 574
matthiasm@131 575 float logscale = 0;
matthiasm@131 576 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
matthiasm@131 577 logscale -= log(scale[iFrame]);
matthiasm@131 578 Feature loglikelihood;
matthiasm@131 579 loglikelihood.hasTimestamp = true;
matthiasm@131 580 loglikelihood.timestamp = timestamps[iFrame];
matthiasm@131 581 loglikelihood.values.push_back(-log(scale[iFrame]));
matthiasm@131 582 // cerr << chordchange[iFrame] << endl;
matthiasm@131 583 fsOut[m_outputLoglikelihood].push_back(loglikelihood);
matthiasm@131 584 }
matthiasm@131 585 logscale /= nFrame;
matthiasm@131 586
matthiasm@43 587 chord_feature.hasTimestamp = true;
matthiasm@43 588 chord_feature.timestamp = timestamps[timestamps.size()-1];
matthiasm@43 589 chord_feature.label = "N";
mail@60 590 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@86 591
Chris@91 592 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86 593 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
matthiasm@86 594 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86 595 }
matthiasm@86 596
Chris@23 597 cerr << "done." << endl;
Chris@159 598
matthiasm@50 599 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
matthiasm@50 600 Feature chordchange_feature;
matthiasm@50 601 chordchange_feature.hasTimestamp = true;
matthiasm@50 602 chordchange_feature.timestamp = timestamps[iFrame];
matthiasm@50 603 chordchange_feature.values.push_back(chordchange[iFrame]);
mail@60 604 // cerr << chordchange[iFrame] << endl;
mail@60 605 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
matthiasm@50 606 }
matthiasm@50 607
mail@60 608 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
matthiasm@50 609
Chris@23 610 return fsOut;
matthiasm@0 611 }