annotate NNLSBase.cpp @ 80:026a5c0ee2c2 matthiasm-plugin

bins per semitone can now be chosen in chromamethods.h
author Matthias Mauch <mail@matthiasmauch.net>
date Thu, 11 Nov 2010 15:11:05 +0900
parents ba930176df5b
children 4270f3039ab0
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@35 19 #include "NNLSBase.h"
Chris@27 20
Chris@27 21 #include "chromamethods.h"
Chris@27 22
Chris@27 23 #include <cstdlib>
Chris@27 24 #include <fstream>
matthiasm@0 25 #include <cmath>
matthiasm@9 26
Chris@27 27 #include <algorithm>
matthiasm@0 28
matthiasm@0 29 const bool debug_on = false;
matthiasm@0 30
Chris@35 31 NNLSBase::NNLSBase(float inputSampleRate) :
Chris@23 32 Plugin(inputSampleRate),
Chris@35 33 m_logSpectrum(0),
Chris@23 34 m_blockSize(0),
Chris@23 35 m_stepSize(0),
Chris@23 36 m_lengthOfNoteIndex(0),
mail@80 37 m_meanTunings(0),
mail@80 38 m_localTunings(0),
mail@41 39 m_whitening(1.0),
Chris@23 40 m_preset(0.0),
Chris@23 41 m_localTuning(0),
Chris@23 42 m_kernelValue(0),
Chris@23 43 m_kernelFftIndex(0),
Chris@23 44 m_kernelNoteIndex(0),
Chris@23 45 m_dict(0),
mail@60 46 m_tuneLocal(0),
Chris@23 47 m_chorddict(0),
Chris@23 48 m_chordnames(0),
Chris@23 49 m_doNormalizeChroma(0),
mail@60 50 m_rollon(0),
matthiasm@42 51 m_s(0.7),
matthiasm@50 52 m_useNNLS(1),
mail@80 53 m_useHMM(1),
mail@80 54 sinvalues(0),
mail@80 55 cosvalues(0)
matthiasm@0 56 {
Chris@35 57 if (debug_on) cerr << "--> NNLSBase" << endl;
matthiasm@7 58
Chris@23 59 // make the *note* dictionary matrix
Chris@23 60 m_dict = new float[nNote * 84];
Chris@23 61 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
mail@41 62 dictionaryMatrix(m_dict, 0.7);
matthiasm@7 63
Chris@23 64 // get the *chord* dictionary from file (if the file exists)
Chris@23 65 m_chordnames = chordDictionary(&m_chorddict);
matthiasm@0 66 }
matthiasm@0 67
matthiasm@0 68
Chris@35 69 NNLSBase::~NNLSBase()
matthiasm@0 70 {
Chris@35 71 if (debug_on) cerr << "--> ~NNLSBase" << endl;
Chris@23 72 delete [] m_dict;
matthiasm@0 73 }
matthiasm@0 74
matthiasm@0 75 string
Chris@35 76 NNLSBase::getMaker() const
matthiasm@0 77 {
Chris@23 78 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0 79 // Your name here
matthiasm@0 80 return "Matthias Mauch";
matthiasm@0 81 }
matthiasm@0 82
matthiasm@0 83 int
Chris@35 84 NNLSBase::getPluginVersion() const
matthiasm@0 85 {
Chris@23 86 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0 87 // Increment this each time you release a version that behaves
matthiasm@0 88 // differently from the previous one
matthiasm@0 89 return 1;
matthiasm@0 90 }
matthiasm@0 91
matthiasm@0 92 string
Chris@35 93 NNLSBase::getCopyright() const
matthiasm@0 94 {
Chris@23 95 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0 96 // This function is not ideally named. It does not necessarily
matthiasm@0 97 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0 98 // should indicate the terms under which it is distributed. For
matthiasm@0 99 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@35 100 return "GPL";
matthiasm@0 101 }
matthiasm@0 102
Chris@35 103 NNLSBase::InputDomain
Chris@35 104 NNLSBase::getInputDomain() const
matthiasm@0 105 {
Chris@23 106 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0 107 return FrequencyDomain;
matthiasm@0 108 }
matthiasm@0 109
matthiasm@0 110 size_t
Chris@35 111 NNLSBase::getPreferredBlockSize() const
matthiasm@0 112 {
Chris@23 113 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0 114 return 16384; // 0 means "I can handle any block size"
matthiasm@0 115 }
matthiasm@0 116
matthiasm@0 117 size_t
Chris@35 118 NNLSBase::getPreferredStepSize() const
matthiasm@0 119 {
Chris@23 120 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0 121 return 2048; // 0 means "anything sensible"; in practice this
Chris@23 122 // means the same as the block size for TimeDomain
Chris@23 123 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0 124 }
matthiasm@0 125
matthiasm@0 126 size_t
Chris@35 127 NNLSBase::getMinChannelCount() const
matthiasm@0 128 {
Chris@23 129 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0 130 return 1;
matthiasm@0 131 }
matthiasm@0 132
matthiasm@0 133 size_t
Chris@35 134 NNLSBase::getMaxChannelCount() const
matthiasm@0 135 {
Chris@23 136 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0 137 return 1;
matthiasm@0 138 }
matthiasm@0 139
Chris@35 140 NNLSBase::ParameterList
Chris@35 141 NNLSBase::getParameterDescriptors() const
matthiasm@0 142 {
Chris@23 143 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0 144 ParameterList list;
matthiasm@0 145
matthiasm@42 146 ParameterDescriptor d;
matthiasm@42 147 d.identifier = "useNNLS";
matthiasm@42 148 d.name = "use approximate transcription (NNLS)";
matthiasm@42 149 d.description = "Toggles approximate transcription (NNLS).";
matthiasm@42 150 d.unit = "";
matthiasm@42 151 d.minValue = 0.0;
matthiasm@42 152 d.maxValue = 1.0;
matthiasm@42 153 d.defaultValue = 1.0;
matthiasm@42 154 d.isQuantized = true;
matthiasm@42 155 d.quantizeStep = 1.0;
matthiasm@42 156 list.push_back(d);
matthiasm@42 157
mail@41 158 ParameterDescriptor d0;
mail@41 159 d0.identifier = "rollon";
mail@41 160 d0.name = "spectral roll-on";
matthiasm@58 161 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
matthiasm@59 162 d0.unit = "%";
mail@41 163 d0.minValue = 0;
matthiasm@59 164 d0.maxValue = 5;
mail@41 165 d0.defaultValue = 0;
matthiasm@48 166 d0.isQuantized = true;
matthiasm@59 167 d0.quantizeStep = 0.5;
mail@41 168 list.push_back(d0);
matthiasm@4 169
matthiasm@4 170 ParameterDescriptor d1;
matthiasm@4 171 d1.identifier = "tuningmode";
matthiasm@4 172 d1.name = "tuning mode";
matthiasm@4 173 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@4 174 d1.unit = "";
matthiasm@4 175 d1.minValue = 0;
matthiasm@4 176 d1.maxValue = 1;
matthiasm@4 177 d1.defaultValue = 0;
matthiasm@4 178 d1.isQuantized = true;
matthiasm@4 179 d1.valueNames.push_back("global tuning");
matthiasm@4 180 d1.valueNames.push_back("local tuning");
matthiasm@4 181 d1.quantizeStep = 1.0;
matthiasm@4 182 list.push_back(d1);
matthiasm@4 183
mail@41 184 ParameterDescriptor d2;
mail@41 185 d2.identifier = "whitening";
mail@41 186 d2.name = "spectral whitening";
mail@41 187 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
mail@41 188 d2.unit = "";
mail@41 189 d2.isQuantized = true;
mail@41 190 d2.minValue = 0.0;
mail@41 191 d2.maxValue = 1.0;
mail@41 192 d2.defaultValue = 1.0;
mail@41 193 d2.isQuantized = false;
mail@41 194 list.push_back(d2);
mail@41 195
mail@41 196 ParameterDescriptor d3;
mail@41 197 d3.identifier = "s";
mail@41 198 d3.name = "spectral shape";
mail@41 199 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
mail@41 200 d3.unit = "";
mail@41 201 d3.minValue = 0.5;
mail@41 202 d3.maxValue = 0.9;
mail@41 203 d3.defaultValue = 0.7;
mail@41 204 d3.isQuantized = false;
mail@41 205 list.push_back(d3);
mail@41 206
Chris@23 207 ParameterDescriptor d4;
matthiasm@12 208 d4.identifier = "chromanormalize";
matthiasm@12 209 d4.name = "chroma normalization";
matthiasm@12 210 d4.description = "How shall the chroma vector be normalized?";
matthiasm@12 211 d4.unit = "";
matthiasm@12 212 d4.minValue = 0;
matthiasm@13 213 d4.maxValue = 3;
matthiasm@12 214 d4.defaultValue = 0;
matthiasm@12 215 d4.isQuantized = true;
matthiasm@13 216 d4.valueNames.push_back("none");
matthiasm@13 217 d4.valueNames.push_back("maximum norm");
Chris@23 218 d4.valueNames.push_back("L1 norm");
Chris@23 219 d4.valueNames.push_back("L2 norm");
matthiasm@12 220 d4.quantizeStep = 1.0;
matthiasm@12 221 list.push_back(d4);
matthiasm@4 222
matthiasm@0 223 return list;
matthiasm@0 224 }
matthiasm@0 225
matthiasm@0 226 float
Chris@35 227 NNLSBase::getParameter(string identifier) const
matthiasm@0 228 {
Chris@23 229 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@42 230 if (identifier == "useNNLS") {
matthiasm@42 231 return m_useNNLS;
matthiasm@0 232 }
matthiasm@0 233
mail@41 234 if (identifier == "whitening") {
mail@41 235 return m_whitening;
mail@41 236 }
mail@41 237
mail@41 238 if (identifier == "s") {
mail@41 239 return m_s;
matthiasm@0 240 }
matthiasm@17 241
Chris@23 242 if (identifier == "rollon") {
matthiasm@17 243 return m_rollon;
matthiasm@17 244 }
matthiasm@0 245
matthiasm@0 246 if (identifier == "tuningmode") {
matthiasm@0 247 if (m_tuneLocal) {
matthiasm@0 248 return 1.0;
matthiasm@0 249 } else {
matthiasm@0 250 return 0.0;
matthiasm@0 251 }
matthiasm@0 252 }
Chris@23 253 if (identifier == "preset") {
Chris@23 254 return m_preset;
matthiasm@3 255 }
Chris@23 256 if (identifier == "chromanormalize") {
Chris@23 257 return m_doNormalizeChroma;
matthiasm@12 258 }
matthiasm@50 259
matthiasm@50 260 if (identifier == "useHMM") {
matthiasm@50 261 return m_useHMM;
matthiasm@50 262 }
matthiasm@50 263
matthiasm@0 264 return 0;
matthiasm@0 265
matthiasm@0 266 }
matthiasm@0 267
matthiasm@0 268 void
Chris@35 269 NNLSBase::setParameter(string identifier, float value)
matthiasm@0 270 {
Chris@23 271 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@42 272 if (identifier == "useNNLS") {
matthiasm@42 273 m_useNNLS = (int) value;
matthiasm@0 274 }
matthiasm@0 275
mail@41 276 if (identifier == "whitening") {
mail@41 277 m_whitening = value;
matthiasm@0 278 }
matthiasm@0 279
mail@41 280 if (identifier == "s") {
mail@41 281 m_s = value;
mail@41 282 }
mail@41 283
matthiasm@50 284 if (identifier == "useHMM") {
matthiasm@50 285 m_useHMM = value;
matthiasm@50 286 }
matthiasm@50 287
matthiasm@0 288 if (identifier == "tuningmode") {
mail@60 289 // m_tuneLocal = (value > 0) ? true : false;
mail@60 290 m_tuneLocal = value;
matthiasm@0 291 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0 292 }
matthiasm@42 293 // if (identifier == "preset") {
matthiasm@42 294 // m_preset = value;
matthiasm@42 295 // if (m_preset == 0.0) {
matthiasm@42 296 // m_tuneLocal = false;
matthiasm@42 297 // m_whitening = 1.0;
matthiasm@42 298 // m_dictID = 0.0;
matthiasm@42 299 // }
matthiasm@42 300 // if (m_preset == 1.0) {
matthiasm@42 301 // m_tuneLocal = false;
matthiasm@42 302 // m_whitening = 1.0;
matthiasm@42 303 // m_dictID = 1.0;
matthiasm@42 304 // }
matthiasm@42 305 // if (m_preset == 2.0) {
matthiasm@42 306 // m_tuneLocal = false;
matthiasm@42 307 // m_whitening = 0.7;
matthiasm@42 308 // m_dictID = 0.0;
matthiasm@42 309 // }
matthiasm@42 310 // }
Chris@23 311 if (identifier == "chromanormalize") {
Chris@23 312 m_doNormalizeChroma = value;
Chris@23 313 }
matthiasm@17 314
Chris@23 315 if (identifier == "rollon") {
Chris@23 316 m_rollon = value;
Chris@23 317 }
matthiasm@0 318 }
matthiasm@0 319
Chris@35 320 NNLSBase::ProgramList
Chris@35 321 NNLSBase::getPrograms() const
matthiasm@0 322 {
Chris@23 323 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0 324 ProgramList list;
matthiasm@0 325
matthiasm@0 326 // If you have no programs, return an empty list (or simply don't
matthiasm@0 327 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0 328
matthiasm@0 329 return list;
matthiasm@0 330 }
matthiasm@0 331
matthiasm@0 332 string
Chris@35 333 NNLSBase::getCurrentProgram() const
matthiasm@0 334 {
Chris@23 335 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0 336 return ""; // no programs
matthiasm@0 337 }
matthiasm@0 338
matthiasm@0 339 void
Chris@35 340 NNLSBase::selectProgram(string name)
matthiasm@0 341 {
Chris@23 342 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0 343 }
matthiasm@0 344
matthiasm@0 345
matthiasm@0 346 bool
Chris@35 347 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 348 {
Chris@23 349 if (debug_on) {
Chris@23 350 cerr << "--> initialise";
Chris@23 351 }
matthiasm@1 352
mail@80 353 // make things for tuning estimation
mail@80 354 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
mail@80 355 sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS)));
mail@80 356 cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS)));
mail@80 357 }
mail@80 358
mail@80 359
mail@80 360 // make hamming window of length 1/2 octave
mail@76 361 int hamwinlength = nBPS * 6 + 1;
mail@76 362 float hamwinsum = 0;
mail@76 363 for (int i = 0; i < hamwinlength; ++i) {
mail@76 364 hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)));
mail@76 365 hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1));
mail@76 366 }
mail@77 367 for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
mail@80 368
mail@80 369
mail@80 370 // initialise the tuning
mail@80 371 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
mail@80 372 m_meanTunings.push_back(0);
mail@80 373 m_localTunings.push_back(0);
mail@80 374 }
mail@76 375
matthiasm@0 376 if (channels < getMinChannelCount() ||
matthiasm@0 377 channels > getMaxChannelCount()) return false;
matthiasm@0 378 m_blockSize = blockSize;
matthiasm@0 379 m_stepSize = stepSize;
Chris@35 380 m_frameCount = 0;
mail@77 381 int tempn = nNote * m_blockSize/2;
Chris@23 382 // cerr << "length of tempkernel : " << tempn << endl;
Chris@23 383 float *tempkernel;
matthiasm@1 384
Chris@23 385 tempkernel = new float[tempn];
matthiasm@1 386
Chris@23 387 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
Chris@23 388 m_kernelValue.clear();
Chris@23 389 m_kernelFftIndex.clear();
Chris@23 390 m_kernelNoteIndex.clear();
Chris@23 391 int countNonzero = 0;
Chris@23 392 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
Chris@23 393 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
Chris@23 394 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 395 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
Chris@23 396 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 397 countNonzero++;
Chris@23 398 }
Chris@23 399 m_kernelFftIndex.push_back(iFFT);
Chris@23 400 m_kernelNoteIndex.push_back(iNote);
Chris@23 401 }
Chris@23 402 }
Chris@23 403 }
Chris@23 404 // cerr << "nonzero count : " << countNonzero << endl;
Chris@23 405 delete [] tempkernel;
Chris@35 406 /*
Chris@23 407 ofstream myfile;
Chris@23 408 myfile.open ("matrix.txt");
matthiasm@3 409 // myfile << "Writing this to a file.\n";
Chris@23 410 for (int i = 0; i < nNote * 84; ++i) {
Chris@23 411 myfile << m_dict[i] << endl;
Chris@23 412 }
matthiasm@3 413 myfile.close();
Chris@35 414 */
matthiasm@0 415 return true;
matthiasm@0 416 }
matthiasm@0 417
matthiasm@0 418 void
Chris@35 419 NNLSBase::reset()
matthiasm@0 420 {
Chris@23 421 if (debug_on) cerr << "--> reset";
matthiasm@4 422
matthiasm@0 423 // Clear buffers, reset stored values, etc
Chris@35 424 m_frameCount = 0;
matthiasm@42 425 // m_dictID = 0;
Chris@35 426 m_logSpectrum.clear();
mail@80 427 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
mail@80 428 m_meanTunings[iBPS] = 0;
mail@80 429 m_localTunings[iBPS] = 0;
mail@80 430 }
Chris@23 431 m_localTuning.clear();
matthiasm@0 432 }
matthiasm@0 433
Chris@35 434 void
Chris@35 435 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 436 {
Chris@35 437 m_frameCount++;
Chris@23 438 float *magnitude = new float[m_blockSize/2];
matthiasm@0 439
Chris@23 440 const float *fbuf = inputBuffers[0];
Chris@23 441 float energysum = 0;
Chris@23 442 // make magnitude
Chris@23 443 float maxmag = -10000;
Chris@23 444 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 445 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
Chris@23 446 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
Chris@23 447 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
Chris@23 448 if (m_rollon > 0) {
Chris@23 449 energysum += pow(magnitude[iBin],2);
Chris@23 450 }
Chris@23 451 }
matthiasm@14 452
Chris@23 453 float cumenergy = 0;
Chris@23 454 if (m_rollon > 0) {
Chris@23 455 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
Chris@23 456 cumenergy += pow(magnitude[iBin],2);
matthiasm@59 457 if (cumenergy < energysum * m_rollon / 100) magnitude[iBin-2] = 0;
Chris@23 458 else break;
Chris@23 459 }
Chris@23 460 }
matthiasm@17 461
Chris@23 462 if (maxmag < 2) {
Chris@23 463 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
Chris@23 464 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 465 magnitude[iBin] = 0;
Chris@23 466 }
Chris@23 467 }
matthiasm@4 468
Chris@23 469 // note magnitude mapping using pre-calculated matrix
Chris@23 470 float *nm = new float[nNote]; // note magnitude
Chris@23 471 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 472 nm[iNote] = 0; // initialise as 0
Chris@23 473 }
Chris@23 474 int binCount = 0;
Chris@23 475 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
Chris@23 476 // cerr << ".";
Chris@23 477 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
Chris@23 478 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
Chris@23 479 binCount++;
Chris@23 480 }
Chris@23 481 // cerr << nm[20];
Chris@23 482 // cerr << endl;
matthiasm@0 483
matthiasm@0 484
Chris@35 485 float one_over_N = 1.0/m_frameCount;
matthiasm@0 486 // update means of complex tuning variables
mail@80 487 for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] *= float(m_frameCount-1)*one_over_N;
mail@80 488
mail@80 489 for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) {
mail@80 490 for (int iBPS = 0; iBPS < nBPS; ++iBPS) m_meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
Chris@23 491 float ratioOld = 0.997;
mail@80 492 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
mail@80 493 m_localTunings[iBPS] *= ratioOld;
mail@80 494 m_localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
mail@80 495 }
matthiasm@0 496 }
matthiasm@0 497 // if (m_tuneLocal) {
Chris@23 498 // local tuning
mail@80 499 // float localTuningImag = sinvalue * m_localTunings[1] - sinvalue * m_localTunings[2];
mail@80 500 // float localTuningReal = m_localTunings[0] + cosvalue * m_localTunings[1] + cosvalue * m_localTunings[2];
mail@80 501
mail@80 502 float localTuningImag = 0;
mail@80 503 float localTuningReal = 0;
mail@80 504 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
mail@80 505 localTuningReal += m_localTunings[iBPS] * cosvalues[iBPS];
mail@80 506 localTuningImag += m_localTunings[iBPS] * sinvalues[iBPS];
mail@80 507 }
mail@80 508
Chris@23 509 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
Chris@23 510 m_localTuning.push_back(normalisedtuning);
matthiasm@0 511
Chris@23 512 Feature f1; // logfreqspec
Chris@23 513 f1.hasTimestamp = true;
matthiasm@0 514 f1.timestamp = timestamp;
Chris@23 515 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 516 f1.values.push_back(nm[iNote]);
Chris@23 517 }
matthiasm@0 518
matthiasm@0 519 // deletes
matthiasm@0 520 delete[] magnitude;
matthiasm@0 521 delete[] nm;
matthiasm@0 522
Chris@35 523 m_logSpectrum.push_back(f1); // remember note magnitude
matthiasm@0 524 }
matthiasm@0 525
Chris@35 526
Chris@35 527 #ifdef NOT_DEFINED
Chris@35 528
Chris@35 529 NNLSBase::FeatureSet
Chris@35 530 NNLSBase::getRemainingFeatures()
matthiasm@0 531 {
Chris@23 532 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 533 FeatureSet fsOut;
Chris@35 534 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23 535 int nChord = m_chordnames.size();
Chris@23 536 //
Chris@23 537 /** Calculate Tuning
Chris@23 538 calculate tuning from (using the angle of the complex number defined by the
Chris@23 539 cumulative mean real and imag values)
Chris@23 540 **/
mail@80 541 float meanTuningImag = sinvalue * m_meanTunings[1] - sinvalue * m_meanTunings[2];
mail@80 542 float meanTuningReal = m_meanTunings[0] + cosvalue * m_meanTunings[1] + cosvalue * m_meanTunings[2];
Chris@23 543 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 544 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 545 int intShift = floor(normalisedtuning * 3);
mail@80 546 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
matthiasm@1 547
Chris@23 548 char buffer0 [50];
matthiasm@1 549
Chris@23 550 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 551
Chris@23 552 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1 553
Chris@23 554 // push tuning to FeatureSet fsOut
Chris@23 555 Feature f0; // tuning
Chris@23 556 f0.hasTimestamp = true;
Chris@23 557 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
Chris@23 558 f0.label = buffer0;
Chris@23 559 fsOut[0].push_back(f0);
matthiasm@1 560
Chris@23 561 /** Tune Log-Frequency Spectrogram
Chris@23 562 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
Chris@23 563 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
Chris@23 564 **/
Chris@23 565 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 566
Chris@23 567 float tempValue = 0;
Chris@23 568 float dbThreshold = 0; // relative to the background spectrum
Chris@23 569 float thresh = pow(10,dbThreshold/20);
Chris@23 570 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23 571 int count = 0;
matthiasm@1 572
Chris@35 573 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@23 574 Feature f1 = *i;
Chris@23 575 Feature f2; // tuned log-frequency spectrum
Chris@23 576 f2.hasTimestamp = true;
Chris@23 577 f2.timestamp = f1.timestamp;
Chris@23 578 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1 579
mail@60 580 if (m_tuneLocal == 1.0) {
Chris@23 581 intShift = floor(m_localTuning[count] * 3);
mail@80 582 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
Chris@23 583 }
matthiasm@1 584
mail@80 585 // cerr << intShift << " " << floatShift << endl;
matthiasm@1 586
Chris@23 587 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
mail@80 588 tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
Chris@23 589 f2.values.push_back(tempValue);
Chris@23 590 }
matthiasm@1 591
Chris@23 592 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
Chris@23 593 vector<float> runningmean = SpecialConvolution(f2.values,hw);
Chris@23 594 vector<float> runningstd;
mail@77 595 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
Chris@23 596 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
Chris@23 597 }
Chris@23 598 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
mail@77 599 for (int i = 0; i < nNote; i++) {
Chris@23 600 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 601 if (runningstd[i] > 0) {
Chris@23 602 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
mail@41 603 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 604 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
mail@41 605 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 606 }
Chris@23 607 if (f2.values[i] < 0) {
Chris@23 608 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 609 }
Chris@23 610 }
Chris@23 611 fsOut[2].push_back(f2);
Chris@23 612 count++;
Chris@23 613 }
Chris@23 614 cerr << "done." << endl;
matthiasm@1 615
Chris@23 616 /** Semitone spectrum and chromagrams
Chris@23 617 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 618 is inferred using a non-negative least squares algorithm.
Chris@23 619 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 620 bass and treble stacked onto each other).
Chris@23 621 **/
matthiasm@42 622 if (m_useNNLS == 0) {
Chris@23 623 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 624 } else {
Chris@23 625 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 626 }
matthiasm@13 627
matthiasm@1 628
Chris@23 629 vector<vector<float> > chordogram;
Chris@23 630 vector<vector<int> > scoreChordogram;
Chris@23 631 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
Chris@23 632 vector<float> oldchroma = vector<float>(12,0);
Chris@23 633 vector<float> oldbasschroma = vector<float>(12,0);
Chris@23 634 count = 0;
matthiasm@9 635
Chris@23 636 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
Chris@23 637 Feature f2 = *it; // logfreq spectrum
Chris@23 638 Feature f3; // semitone spectrum
Chris@23 639 Feature f4; // treble chromagram
Chris@23 640 Feature f5; // bass chromagram
Chris@23 641 Feature f6; // treble and bass chromagram
matthiasm@1 642
Chris@23 643 f3.hasTimestamp = true;
Chris@23 644 f3.timestamp = f2.timestamp;
matthiasm@1 645
Chris@23 646 f4.hasTimestamp = true;
Chris@23 647 f4.timestamp = f2.timestamp;
matthiasm@1 648
Chris@23 649 f5.hasTimestamp = true;
Chris@23 650 f5.timestamp = f2.timestamp;
matthiasm@1 651
Chris@23 652 f6.hasTimestamp = true;
Chris@23 653 f6.timestamp = f2.timestamp;
matthiasm@1 654
mail@77 655 float b[nNote];
matthiasm@1 656
Chris@23 657 bool some_b_greater_zero = false;
Chris@23 658 float sumb = 0;
mail@77 659 for (int i = 0; i < nNote; i++) {
mail@77 660 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
Chris@23 661 b[i] = f2.values[i];
Chris@23 662 sumb += b[i];
Chris@23 663 if (b[i] > 0) {
Chris@23 664 some_b_greater_zero = true;
Chris@23 665 }
Chris@23 666 }
matthiasm@1 667
Chris@23 668 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 669
Chris@23 670 vector<float> chroma = vector<float>(12, 0);
Chris@23 671 vector<float> basschroma = vector<float>(12, 0);
Chris@23 672 float currval;
Chris@23 673 unsigned iSemitone = 0;
matthiasm@1 674
Chris@23 675 if (some_b_greater_zero) {
matthiasm@42 676 if (m_useNNLS == 0) {
Chris@23 677 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 678 currval = 0;
Chris@23 679 currval += b[iNote + 1 + -1] * 0.5;
Chris@23 680 currval += b[iNote + 1 + 0] * 1.0;
Chris@23 681 currval += b[iNote + 1 + 1] * 0.5;
Chris@23 682 f3.values.push_back(currval);
Chris@23 683 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 684 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 685 iSemitone++;
Chris@23 686 }
matthiasm@1 687
Chris@23 688 } else {
Chris@29 689 float x[84+1000];
Chris@23 690 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 691 vector<int> signifIndex;
Chris@23 692 int index=0;
Chris@23 693 sumb /= 84.0;
Chris@23 694 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 695 float currval = 0;
Chris@23 696 currval += b[iNote + 1 + -1];
Chris@23 697 currval += b[iNote + 1 + 0];
Chris@23 698 currval += b[iNote + 1 + 1];
Chris@23 699 if (currval > 0) signifIndex.push_back(index);
Chris@23 700 f3.values.push_back(0); // fill the values, change later
Chris@23 701 index++;
Chris@23 702 }
Chris@29 703 float rnorm;
Chris@29 704 float w[84+1000];
Chris@29 705 float zz[84+1000];
Chris@23 706 int indx[84+1000];
Chris@23 707 int mode;
mail@77 708 int dictsize = nNote*signifIndex.size();
Chris@23 709 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@29 710 float *curr_dict = new float[dictsize];
Chris@23 711 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
mail@77 712 for (unsigned iBin = 0; iBin < nNote; iBin++) {
mail@77 713 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
Chris@23 714 }
Chris@23 715 }
Chris@29 716 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 717 delete [] curr_dict;
Chris@23 718 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 719 f3.values[signifIndex[iNote]] = x[iNote];
Chris@23 720 // cerr << mode << endl;
Chris@23 721 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 722 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 723 }
Chris@23 724 }
Chris@23 725 }
matthiasm@13 726
matthiasm@10 727
matthiasm@12 728
matthiasm@13 729
Chris@23 730 f4.values = chroma;
Chris@23 731 f5.values = basschroma;
Chris@23 732 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
Chris@23 733 f6.values = chroma;
matthiasm@1 734
Chris@23 735 if (m_doNormalizeChroma > 0) {
Chris@23 736 vector<float> chromanorm = vector<float>(3,0);
Chris@23 737 switch (int(m_doNormalizeChroma)) {
Chris@23 738 case 0: // should never end up here
Chris@23 739 break;
Chris@23 740 case 1:
Chris@23 741 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
Chris@23 742 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
Chris@23 743 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 744 break;
Chris@23 745 case 2:
Chris@23 746 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 747 chromanorm[0] += *it;
Chris@23 748 }
Chris@23 749 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 750 chromanorm[1] += *it;
Chris@23 751 }
Chris@23 752 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 753 chromanorm[2] += *it;
Chris@23 754 }
Chris@23 755 break;
Chris@23 756 case 3:
Chris@23 757 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 758 chromanorm[0] += pow(*it,2);
Chris@23 759 }
Chris@23 760 chromanorm[0] = sqrt(chromanorm[0]);
Chris@23 761 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 762 chromanorm[1] += pow(*it,2);
Chris@23 763 }
Chris@23 764 chromanorm[1] = sqrt(chromanorm[1]);
Chris@23 765 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 766 chromanorm[2] += pow(*it,2);
Chris@23 767 }
Chris@23 768 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 769 break;
Chris@23 770 }
Chris@23 771 if (chromanorm[0] > 0) {
Chris@23 772 for (int i = 0; i < f4.values.size(); i++) {
Chris@23 773 f4.values[i] /= chromanorm[0];
Chris@23 774 }
Chris@23 775 }
Chris@23 776 if (chromanorm[1] > 0) {
Chris@23 777 for (int i = 0; i < f5.values.size(); i++) {
Chris@23 778 f5.values[i] /= chromanorm[1];
Chris@23 779 }
Chris@23 780 }
Chris@23 781 if (chromanorm[2] > 0) {
Chris@23 782 for (int i = 0; i < f6.values.size(); i++) {
Chris@23 783 f6.values[i] /= chromanorm[2];
Chris@23 784 }
Chris@23 785 }
matthiasm@13 786
Chris@23 787 }
matthiasm@13 788
Chris@23 789 // local chord estimation
Chris@23 790 vector<float> currentChordSalience;
Chris@23 791 float tempchordvalue = 0;
Chris@23 792 float sumchordvalue = 0;
matthiasm@9 793
Chris@23 794 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 795 tempchordvalue = 0;
Chris@23 796 for (int iBin = 0; iBin < 12; iBin++) {
Chris@23 797 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 798 }
Chris@23 799 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 800 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 801 }
Chris@23 802 sumchordvalue+=tempchordvalue;
Chris@23 803 currentChordSalience.push_back(tempchordvalue);
Chris@23 804 }
Chris@23 805 if (sumchordvalue > 0) {
Chris@23 806 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 807 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 808 }
Chris@23 809 } else {
Chris@23 810 currentChordSalience[nChord-1] = 1.0;
Chris@23 811 }
Chris@23 812 chordogram.push_back(currentChordSalience);
matthiasm@1 813
Chris@23 814 fsOut[3].push_back(f3);
Chris@23 815 fsOut[4].push_back(f4);
Chris@23 816 fsOut[5].push_back(f5);
Chris@23 817 fsOut[6].push_back(f6);
Chris@23 818 count++;
Chris@23 819 }
Chris@23 820 cerr << "done." << endl;
matthiasm@13 821
matthiasm@10 822
Chris@23 823 /* Simple chord estimation
Chris@23 824 I just take the local chord estimates ("currentChordSalience") and average them over time, then
Chris@23 825 take the maximum. Very simple, don't do this at home...
Chris@23 826 */
Chris@23 827 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
Chris@23 828 count = 0;
Chris@23 829 int halfwindowlength = m_inputSampleRate / m_stepSize;
Chris@23 830 vector<int> chordSequence;
Chris@23 831 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
Chris@23 832 vector<int> temp = vector<int>(nChord,0);
Chris@23 833 scoreChordogram.push_back(temp);
Chris@23 834 }
Chris@23 835 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
Chris@23 836 int startIndex = count + 1;
Chris@23 837 int endIndex = count + 2 * halfwindowlength;
matthiasm@10 838
Chris@23 839 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@10 840
Chris@23 841 vector<int> chordCandidates;
Chris@23 842 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
Chris@23 843 // float currsum = 0;
Chris@23 844 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 845 // currsum += chordogram[iFrame][iChord];
Chris@23 846 // }
Chris@23 847 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
Chris@23 848 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 849 if (chordogram[iFrame][iChord] > chordThreshold) {
Chris@23 850 chordCandidates.push_back(iChord);
Chris@23 851 break;
Chris@23 852 }
Chris@23 853 }
Chris@23 854 }
Chris@23 855 chordCandidates.push_back(nChord-1);
Chris@23 856 // cerr << chordCandidates.size() << endl;
Chris@23 857
Chris@23 858 float maxval = 0; // will be the value of the most salient *chord change* in this frame
Chris@23 859 float maxindex = 0; //... and the index thereof
Chris@23 860 unsigned bestchordL = nChord-1; // index of the best "left" chord
Chris@23 861 unsigned bestchordR = nChord-1; // index of the best "right" chord
Chris@23 862
Chris@23 863 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
Chris@23 864 // now find the max values on both sides of iWF
Chris@23 865 // left side:
Chris@23 866 float maxL = 0;
Chris@23 867 unsigned maxindL = nChord-1;
Chris@23 868 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 869 unsigned iChord = chordCandidates[kChord];
Chris@23 870 float currsum = 0;
Chris@23 871 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
Chris@23 872 currsum += chordogram[count+iFrame][iChord];
matthiasm@10 873 }
Chris@23 874 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 875 if (currsum > maxL) {
Chris@23 876 maxL = currsum;
Chris@23 877 maxindL = iChord;
Chris@23 878 }
Chris@23 879 }
Chris@23 880 // right side:
Chris@23 881 float maxR = 0;
Chris@23 882 unsigned maxindR = nChord-1;
Chris@23 883 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 884 unsigned iChord = chordCandidates[kChord];
Chris@23 885 float currsum = 0;
Chris@23 886 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 887 currsum += chordogram[count+iFrame][iChord];
Chris@23 888 }
Chris@23 889 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 890 if (currsum > maxR) {
Chris@23 891 maxR = currsum;
Chris@23 892 maxindR = iChord;
Chris@23 893 }
Chris@23 894 }
Chris@23 895 if (maxL+maxR > maxval) {
Chris@23 896 maxval = maxL+maxR;
Chris@23 897 maxindex = iWF;
Chris@23 898 bestchordL = maxindL;
Chris@23 899 bestchordR = maxindR;
Chris@23 900 }
matthiasm@3 901
Chris@23 902 }
Chris@23 903 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
Chris@23 904 // add a score to every chord-frame-point that was part of a maximum
Chris@23 905 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
Chris@23 906 scoreChordogram[iFrame+count][bestchordL]++;
Chris@23 907 }
Chris@23 908 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 909 scoreChordogram[iFrame+count][bestchordR]++;
Chris@23 910 }
Chris@23 911 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
Chris@23 912 count++;
Chris@23 913 }
Chris@23 914 // cerr << "******* agent finished *******" << endl;
Chris@23 915 count = 0;
Chris@23 916 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 917 float maxval = 0; // will be the value of the most salient chord in this frame
Chris@23 918 float maxindex = 0; //... and the index thereof
Chris@23 919 for (unsigned iChord = 0; iChord < nChord; iChord++) {
Chris@23 920 if (scoreChordogram[count][iChord] > maxval) {
Chris@23 921 maxval = scoreChordogram[count][iChord];
Chris@23 922 maxindex = iChord;
Chris@23 923 // cerr << iChord << endl;
Chris@23 924 }
Chris@23 925 }
Chris@23 926 chordSequence.push_back(maxindex);
Chris@23 927 // cerr << "before modefilter, maxindex: " << maxindex << endl;
Chris@23 928 count++;
Chris@23 929 }
Chris@23 930 // cerr << "******* mode filter done *******" << endl;
matthiasm@10 931
matthiasm@3 932
Chris@23 933 // mode filter on chordSequence
Chris@23 934 count = 0;
Chris@23 935 string oldChord = "";
Chris@23 936 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 937 Feature f6 = *it;
Chris@23 938 Feature f7; // chord estimate
Chris@23 939 f7.hasTimestamp = true;
Chris@23 940 f7.timestamp = f6.timestamp;
Chris@23 941 Feature f8; // chord estimate
Chris@23 942 f8.hasTimestamp = true;
Chris@23 943 f8.timestamp = f6.timestamp;
matthiasm@17 944
Chris@23 945 vector<int> chordCount = vector<int>(nChord,0);
Chris@23 946 int maxChordCount = 0;
Chris@23 947 int maxChordIndex = nChord-1;
Chris@23 948 string maxChord;
Chris@23 949 int startIndex = max(count - halfwindowlength/2,0);
Chris@23 950 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
Chris@23 951 for (int i = startIndex; i < endIndex; i++) {
Chris@23 952 chordCount[chordSequence[i]]++;
Chris@23 953 if (chordCount[chordSequence[i]] > maxChordCount) {
Chris@23 954 // cerr << "start index " << startIndex << endl;
Chris@23 955 maxChordCount++;
Chris@23 956 maxChordIndex = chordSequence[i];
Chris@23 957 maxChord = m_chordnames[maxChordIndex];
Chris@23 958 }
Chris@23 959 }
Chris@23 960 // chordSequence[count] = maxChordIndex;
Chris@23 961 // cerr << maxChordIndex << endl;
Chris@23 962 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
Chris@23 963 // cerr << chordchange[count] << endl;
Chris@23 964 fsOut[9].push_back(f8);
Chris@23 965 if (oldChord != maxChord) {
Chris@23 966 oldChord = maxChord;
matthiasm@3 967
Chris@23 968 // char buffer1 [50];
Chris@23 969 // if (maxChordIndex < nChord - 1) {
Chris@23 970 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
Chris@23 971 // } else {
Chris@23 972 // sprintf(buffer1, "N");
Chris@23 973 // }
Chris@23 974 // f7.label = buffer1;
Chris@23 975 f7.label = m_chordnames[maxChordIndex];
Chris@23 976 fsOut[7].push_back(f7);
Chris@23 977 }
Chris@23 978 count++;
Chris@23 979 }
Chris@23 980 Feature f7; // last chord estimate
Chris@23 981 f7.hasTimestamp = true;
Chris@23 982 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
Chris@23 983 f7.label = "N";
Chris@23 984 fsOut[7].push_back(f7);
Chris@23 985 cerr << "done." << endl;
Chris@23 986 // // musicity
Chris@23 987 // count = 0;
Chris@23 988 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
Chris@23 989 // vector<float> musicityValue;
Chris@23 990 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 991 // Feature f4 = *it;
Chris@23 992 //
Chris@23 993 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 994 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 995 // float chromasum = 0;
Chris@23 996 // float diffsum = 0;
Chris@23 997 // for (int k = 0; k < 12; k++) {
Chris@23 998 // for (int i = startIndex + 1; i < endIndex; i++) {
Chris@23 999 // chromasum += pow(fsOut[4][i].values[k],2);
Chris@23 1000 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
Chris@23 1001 // }
Chris@23 1002 // }
Chris@23 1003 // diffsum /= chromasum;
Chris@23 1004 // musicityValue.push_back(diffsum);
Chris@23 1005 // count++;
Chris@23 1006 // }
Chris@23 1007 //
Chris@23 1008 // float musicityThreshold = 0.44;
Chris@23 1009 // if (m_stepSize == 4096) {
Chris@23 1010 // musicityThreshold = 0.74;
Chris@23 1011 // }
Chris@23 1012 // if (m_stepSize == 4410) {
Chris@23 1013 // musicityThreshold = 0.77;
Chris@23 1014 // }
Chris@23 1015 //
Chris@23 1016 // count = 0;
Chris@23 1017 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 1018 // Feature f4 = *it;
Chris@23 1019 // Feature f8; // musicity
Chris@23 1020 // Feature f9; // musicity segmenter
Chris@23 1021 //
Chris@23 1022 // f8.hasTimestamp = true;
Chris@23 1023 // f8.timestamp = f4.timestamp;
Chris@23 1024 // f9.hasTimestamp = true;
Chris@23 1025 // f9.timestamp = f4.timestamp;
Chris@23 1026 //
Chris@23 1027 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 1028 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 1029 // int musicityCount = 0;
Chris@23 1030 // for (int i = startIndex; i <= endIndex; i++) {
Chris@23 1031 // if (musicityValue[i] > musicityThreshold) musicityCount++;
Chris@23 1032 // }
Chris@23 1033 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
Chris@23 1034 //
Chris@23 1035 // if (isSpeech) {
Chris@23 1036 // if (oldlabeltype != 2) {
Chris@23 1037 // f9.label = "Speech";
Chris@23 1038 // fsOut[9].push_back(f9);
Chris@23 1039 // oldlabeltype = 2;
Chris@23 1040 // }
Chris@23 1041 // } else {
Chris@23 1042 // if (oldlabeltype != 1) {
Chris@23 1043 // f9.label = "Music";
Chris@23 1044 // fsOut[9].push_back(f9);
Chris@23 1045 // oldlabeltype = 1;
Chris@23 1046 // }
Chris@23 1047 // }
Chris@23 1048 // f8.values.push_back(musicityValue[count]);
Chris@23 1049 // fsOut[8].push_back(f8);
Chris@23 1050 // count++;
Chris@23 1051 // }
Chris@23 1052 return fsOut;
matthiasm@0 1053
matthiasm@0 1054 }
matthiasm@0 1055
Chris@35 1056 #endif