annotate NNLSBase.cpp @ 60:9a1f83057e84 matthiasm-plugin

harmonic change had no identifier, changed that in .cpp and .n3 files
author Matthias Mauch <mail@matthiasmauch.net>
date Tue, 26 Oct 2010 13:01:54 +0900
parents 1ccb883b585f
children d398e73b46e0
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@35 19 #include "NNLSBase.h"
Chris@27 20
Chris@27 21 #include "chromamethods.h"
Chris@27 22
Chris@27 23 #include <cstdlib>
Chris@27 24 #include <fstream>
matthiasm@0 25 #include <cmath>
matthiasm@9 26
Chris@27 27 #include <algorithm>
matthiasm@0 28
matthiasm@0 29 const bool debug_on = false;
matthiasm@0 30
Chris@27 31 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0 32
Chris@35 33 NNLSBase::NNLSBase(float inputSampleRate) :
Chris@23 34 Plugin(inputSampleRate),
Chris@35 35 m_logSpectrum(0),
Chris@23 36 m_blockSize(0),
Chris@23 37 m_stepSize(0),
Chris@23 38 m_lengthOfNoteIndex(0),
Chris@23 39 m_meanTuning0(0),
Chris@23 40 m_meanTuning1(0),
Chris@23 41 m_meanTuning2(0),
Chris@23 42 m_localTuning0(0),
Chris@23 43 m_localTuning1(0),
Chris@23 44 m_localTuning2(0),
mail@41 45 m_whitening(1.0),
Chris@23 46 m_preset(0.0),
Chris@23 47 m_localTuning(0),
Chris@23 48 m_kernelValue(0),
Chris@23 49 m_kernelFftIndex(0),
Chris@23 50 m_kernelNoteIndex(0),
Chris@23 51 m_dict(0),
mail@60 52 m_tuneLocal(0),
Chris@23 53 m_chorddict(0),
Chris@23 54 m_chordnames(0),
Chris@23 55 m_doNormalizeChroma(0),
mail@60 56 m_rollon(0),
matthiasm@42 57 m_s(0.7),
matthiasm@50 58 m_useNNLS(1),
matthiasm@50 59 m_useHMM(1)
matthiasm@0 60 {
Chris@35 61 if (debug_on) cerr << "--> NNLSBase" << endl;
matthiasm@7 62
Chris@23 63 // make the *note* dictionary matrix
Chris@23 64 m_dict = new float[nNote * 84];
Chris@23 65 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
mail@41 66 dictionaryMatrix(m_dict, 0.7);
matthiasm@7 67
Chris@23 68 // get the *chord* dictionary from file (if the file exists)
Chris@23 69 m_chordnames = chordDictionary(&m_chorddict);
matthiasm@0 70 }
matthiasm@0 71
matthiasm@0 72
Chris@35 73 NNLSBase::~NNLSBase()
matthiasm@0 74 {
Chris@35 75 if (debug_on) cerr << "--> ~NNLSBase" << endl;
Chris@23 76 delete [] m_dict;
matthiasm@0 77 }
matthiasm@0 78
matthiasm@0 79 string
Chris@35 80 NNLSBase::getMaker() const
matthiasm@0 81 {
Chris@23 82 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0 83 // Your name here
matthiasm@0 84 return "Matthias Mauch";
matthiasm@0 85 }
matthiasm@0 86
matthiasm@0 87 int
Chris@35 88 NNLSBase::getPluginVersion() const
matthiasm@0 89 {
Chris@23 90 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0 91 // Increment this each time you release a version that behaves
matthiasm@0 92 // differently from the previous one
matthiasm@0 93 return 1;
matthiasm@0 94 }
matthiasm@0 95
matthiasm@0 96 string
Chris@35 97 NNLSBase::getCopyright() const
matthiasm@0 98 {
Chris@23 99 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0 100 // This function is not ideally named. It does not necessarily
matthiasm@0 101 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0 102 // should indicate the terms under which it is distributed. For
matthiasm@0 103 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@35 104 return "GPL";
matthiasm@0 105 }
matthiasm@0 106
Chris@35 107 NNLSBase::InputDomain
Chris@35 108 NNLSBase::getInputDomain() const
matthiasm@0 109 {
Chris@23 110 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0 111 return FrequencyDomain;
matthiasm@0 112 }
matthiasm@0 113
matthiasm@0 114 size_t
Chris@35 115 NNLSBase::getPreferredBlockSize() const
matthiasm@0 116 {
Chris@23 117 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0 118 return 16384; // 0 means "I can handle any block size"
matthiasm@0 119 }
matthiasm@0 120
matthiasm@0 121 size_t
Chris@35 122 NNLSBase::getPreferredStepSize() const
matthiasm@0 123 {
Chris@23 124 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0 125 return 2048; // 0 means "anything sensible"; in practice this
Chris@23 126 // means the same as the block size for TimeDomain
Chris@23 127 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0 128 }
matthiasm@0 129
matthiasm@0 130 size_t
Chris@35 131 NNLSBase::getMinChannelCount() const
matthiasm@0 132 {
Chris@23 133 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0 134 return 1;
matthiasm@0 135 }
matthiasm@0 136
matthiasm@0 137 size_t
Chris@35 138 NNLSBase::getMaxChannelCount() const
matthiasm@0 139 {
Chris@23 140 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0 141 return 1;
matthiasm@0 142 }
matthiasm@0 143
Chris@35 144 NNLSBase::ParameterList
Chris@35 145 NNLSBase::getParameterDescriptors() const
matthiasm@0 146 {
Chris@23 147 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0 148 ParameterList list;
matthiasm@0 149
matthiasm@42 150 ParameterDescriptor d;
matthiasm@42 151 d.identifier = "useNNLS";
matthiasm@42 152 d.name = "use approximate transcription (NNLS)";
matthiasm@42 153 d.description = "Toggles approximate transcription (NNLS).";
matthiasm@42 154 d.unit = "";
matthiasm@42 155 d.minValue = 0.0;
matthiasm@42 156 d.maxValue = 1.0;
matthiasm@42 157 d.defaultValue = 1.0;
matthiasm@42 158 d.isQuantized = true;
matthiasm@42 159 d.quantizeStep = 1.0;
matthiasm@42 160 list.push_back(d);
matthiasm@42 161
mail@41 162 ParameterDescriptor d0;
mail@41 163 d0.identifier = "rollon";
mail@41 164 d0.name = "spectral roll-on";
matthiasm@58 165 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
matthiasm@59 166 d0.unit = "%";
mail@41 167 d0.minValue = 0;
matthiasm@59 168 d0.maxValue = 5;
mail@41 169 d0.defaultValue = 0;
matthiasm@48 170 d0.isQuantized = true;
matthiasm@59 171 d0.quantizeStep = 0.5;
mail@41 172 list.push_back(d0);
matthiasm@4 173
matthiasm@4 174 ParameterDescriptor d1;
matthiasm@4 175 d1.identifier = "tuningmode";
matthiasm@4 176 d1.name = "tuning mode";
matthiasm@4 177 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@4 178 d1.unit = "";
matthiasm@4 179 d1.minValue = 0;
matthiasm@4 180 d1.maxValue = 1;
matthiasm@4 181 d1.defaultValue = 0;
matthiasm@4 182 d1.isQuantized = true;
matthiasm@4 183 d1.valueNames.push_back("global tuning");
matthiasm@4 184 d1.valueNames.push_back("local tuning");
matthiasm@4 185 d1.quantizeStep = 1.0;
matthiasm@4 186 list.push_back(d1);
matthiasm@4 187
mail@41 188 ParameterDescriptor d2;
mail@41 189 d2.identifier = "whitening";
mail@41 190 d2.name = "spectral whitening";
mail@41 191 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
mail@41 192 d2.unit = "";
mail@41 193 d2.isQuantized = true;
mail@41 194 d2.minValue = 0.0;
mail@41 195 d2.maxValue = 1.0;
mail@41 196 d2.defaultValue = 1.0;
mail@41 197 d2.isQuantized = false;
mail@41 198 list.push_back(d2);
mail@41 199
mail@41 200 ParameterDescriptor d3;
mail@41 201 d3.identifier = "s";
mail@41 202 d3.name = "spectral shape";
mail@41 203 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
mail@41 204 d3.unit = "";
mail@41 205 d3.minValue = 0.5;
mail@41 206 d3.maxValue = 0.9;
mail@41 207 d3.defaultValue = 0.7;
mail@41 208 d3.isQuantized = false;
mail@41 209 list.push_back(d3);
mail@41 210
Chris@23 211 ParameterDescriptor d4;
matthiasm@12 212 d4.identifier = "chromanormalize";
matthiasm@12 213 d4.name = "chroma normalization";
matthiasm@12 214 d4.description = "How shall the chroma vector be normalized?";
matthiasm@12 215 d4.unit = "";
matthiasm@12 216 d4.minValue = 0;
matthiasm@13 217 d4.maxValue = 3;
matthiasm@12 218 d4.defaultValue = 0;
matthiasm@12 219 d4.isQuantized = true;
matthiasm@13 220 d4.valueNames.push_back("none");
matthiasm@13 221 d4.valueNames.push_back("maximum norm");
Chris@23 222 d4.valueNames.push_back("L1 norm");
Chris@23 223 d4.valueNames.push_back("L2 norm");
matthiasm@12 224 d4.quantizeStep = 1.0;
matthiasm@12 225 list.push_back(d4);
matthiasm@4 226
matthiasm@0 227 return list;
matthiasm@0 228 }
matthiasm@0 229
matthiasm@0 230 float
Chris@35 231 NNLSBase::getParameter(string identifier) const
matthiasm@0 232 {
Chris@23 233 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@42 234 if (identifier == "useNNLS") {
matthiasm@42 235 return m_useNNLS;
matthiasm@0 236 }
matthiasm@0 237
mail@41 238 if (identifier == "whitening") {
mail@41 239 return m_whitening;
mail@41 240 }
mail@41 241
mail@41 242 if (identifier == "s") {
mail@41 243 return m_s;
matthiasm@0 244 }
matthiasm@17 245
Chris@23 246 if (identifier == "rollon") {
matthiasm@17 247 return m_rollon;
matthiasm@17 248 }
matthiasm@0 249
matthiasm@0 250 if (identifier == "tuningmode") {
matthiasm@0 251 if (m_tuneLocal) {
matthiasm@0 252 return 1.0;
matthiasm@0 253 } else {
matthiasm@0 254 return 0.0;
matthiasm@0 255 }
matthiasm@0 256 }
Chris@23 257 if (identifier == "preset") {
Chris@23 258 return m_preset;
matthiasm@3 259 }
Chris@23 260 if (identifier == "chromanormalize") {
Chris@23 261 return m_doNormalizeChroma;
matthiasm@12 262 }
matthiasm@50 263
matthiasm@50 264 if (identifier == "useHMM") {
matthiasm@50 265 return m_useHMM;
matthiasm@50 266 }
matthiasm@50 267
matthiasm@0 268 return 0;
matthiasm@0 269
matthiasm@0 270 }
matthiasm@0 271
matthiasm@0 272 void
Chris@35 273 NNLSBase::setParameter(string identifier, float value)
matthiasm@0 274 {
Chris@23 275 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@42 276 if (identifier == "useNNLS") {
matthiasm@42 277 m_useNNLS = (int) value;
matthiasm@0 278 }
matthiasm@0 279
mail@41 280 if (identifier == "whitening") {
mail@41 281 m_whitening = value;
matthiasm@0 282 }
matthiasm@0 283
mail@41 284 if (identifier == "s") {
mail@41 285 m_s = value;
mail@41 286 }
mail@41 287
matthiasm@50 288 if (identifier == "useHMM") {
matthiasm@50 289 m_useHMM = value;
matthiasm@50 290 }
matthiasm@50 291
matthiasm@0 292 if (identifier == "tuningmode") {
mail@60 293 // m_tuneLocal = (value > 0) ? true : false;
mail@60 294 m_tuneLocal = value;
matthiasm@0 295 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0 296 }
matthiasm@42 297 // if (identifier == "preset") {
matthiasm@42 298 // m_preset = value;
matthiasm@42 299 // if (m_preset == 0.0) {
matthiasm@42 300 // m_tuneLocal = false;
matthiasm@42 301 // m_whitening = 1.0;
matthiasm@42 302 // m_dictID = 0.0;
matthiasm@42 303 // }
matthiasm@42 304 // if (m_preset == 1.0) {
matthiasm@42 305 // m_tuneLocal = false;
matthiasm@42 306 // m_whitening = 1.0;
matthiasm@42 307 // m_dictID = 1.0;
matthiasm@42 308 // }
matthiasm@42 309 // if (m_preset == 2.0) {
matthiasm@42 310 // m_tuneLocal = false;
matthiasm@42 311 // m_whitening = 0.7;
matthiasm@42 312 // m_dictID = 0.0;
matthiasm@42 313 // }
matthiasm@42 314 // }
Chris@23 315 if (identifier == "chromanormalize") {
Chris@23 316 m_doNormalizeChroma = value;
Chris@23 317 }
matthiasm@17 318
Chris@23 319 if (identifier == "rollon") {
Chris@23 320 m_rollon = value;
Chris@23 321 }
matthiasm@0 322 }
matthiasm@0 323
Chris@35 324 NNLSBase::ProgramList
Chris@35 325 NNLSBase::getPrograms() const
matthiasm@0 326 {
Chris@23 327 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0 328 ProgramList list;
matthiasm@0 329
matthiasm@0 330 // If you have no programs, return an empty list (or simply don't
matthiasm@0 331 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0 332
matthiasm@0 333 return list;
matthiasm@0 334 }
matthiasm@0 335
matthiasm@0 336 string
Chris@35 337 NNLSBase::getCurrentProgram() const
matthiasm@0 338 {
Chris@23 339 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0 340 return ""; // no programs
matthiasm@0 341 }
matthiasm@0 342
matthiasm@0 343 void
Chris@35 344 NNLSBase::selectProgram(string name)
matthiasm@0 345 {
Chris@23 346 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0 347 }
matthiasm@0 348
matthiasm@0 349
matthiasm@0 350 bool
Chris@35 351 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 352 {
Chris@23 353 if (debug_on) {
Chris@23 354 cerr << "--> initialise";
Chris@23 355 }
matthiasm@1 356
matthiasm@0 357 if (channels < getMinChannelCount() ||
matthiasm@0 358 channels > getMaxChannelCount()) return false;
matthiasm@0 359 m_blockSize = blockSize;
matthiasm@0 360 m_stepSize = stepSize;
Chris@35 361 m_frameCount = 0;
Chris@23 362 int tempn = 256 * m_blockSize/2;
Chris@23 363 // cerr << "length of tempkernel : " << tempn << endl;
Chris@23 364 float *tempkernel;
matthiasm@1 365
Chris@23 366 tempkernel = new float[tempn];
matthiasm@1 367
Chris@23 368 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
Chris@23 369 m_kernelValue.clear();
Chris@23 370 m_kernelFftIndex.clear();
Chris@23 371 m_kernelNoteIndex.clear();
Chris@23 372 int countNonzero = 0;
Chris@23 373 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
Chris@23 374 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
Chris@23 375 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 376 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
Chris@23 377 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 378 countNonzero++;
Chris@23 379 }
Chris@23 380 m_kernelFftIndex.push_back(iFFT);
Chris@23 381 m_kernelNoteIndex.push_back(iNote);
Chris@23 382 }
Chris@23 383 }
Chris@23 384 }
Chris@23 385 // cerr << "nonzero count : " << countNonzero << endl;
Chris@23 386 delete [] tempkernel;
Chris@35 387 /*
Chris@23 388 ofstream myfile;
Chris@23 389 myfile.open ("matrix.txt");
matthiasm@3 390 // myfile << "Writing this to a file.\n";
Chris@23 391 for (int i = 0; i < nNote * 84; ++i) {
Chris@23 392 myfile << m_dict[i] << endl;
Chris@23 393 }
matthiasm@3 394 myfile.close();
Chris@35 395 */
matthiasm@0 396 return true;
matthiasm@0 397 }
matthiasm@0 398
matthiasm@0 399 void
Chris@35 400 NNLSBase::reset()
matthiasm@0 401 {
Chris@23 402 if (debug_on) cerr << "--> reset";
matthiasm@4 403
matthiasm@0 404 // Clear buffers, reset stored values, etc
Chris@35 405 m_frameCount = 0;
matthiasm@42 406 // m_dictID = 0;
Chris@35 407 m_logSpectrum.clear();
Chris@23 408 m_meanTuning0 = 0;
Chris@23 409 m_meanTuning1 = 0;
Chris@23 410 m_meanTuning2 = 0;
Chris@23 411 m_localTuning0 = 0;
Chris@23 412 m_localTuning1 = 0;
Chris@23 413 m_localTuning2 = 0;
Chris@23 414 m_localTuning.clear();
matthiasm@0 415 }
matthiasm@0 416
Chris@35 417 void
Chris@35 418 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 419 {
Chris@35 420 m_frameCount++;
Chris@23 421 float *magnitude = new float[m_blockSize/2];
matthiasm@0 422
Chris@23 423 const float *fbuf = inputBuffers[0];
Chris@23 424 float energysum = 0;
Chris@23 425 // make magnitude
Chris@23 426 float maxmag = -10000;
Chris@23 427 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 428 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
Chris@23 429 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
Chris@23 430 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
Chris@23 431 if (m_rollon > 0) {
Chris@23 432 energysum += pow(magnitude[iBin],2);
Chris@23 433 }
Chris@23 434 }
matthiasm@14 435
Chris@23 436 float cumenergy = 0;
Chris@23 437 if (m_rollon > 0) {
Chris@23 438 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
Chris@23 439 cumenergy += pow(magnitude[iBin],2);
matthiasm@59 440 if (cumenergy < energysum * m_rollon / 100) magnitude[iBin-2] = 0;
Chris@23 441 else break;
Chris@23 442 }
Chris@23 443 }
matthiasm@17 444
Chris@23 445 if (maxmag < 2) {
Chris@23 446 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
Chris@23 447 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 448 magnitude[iBin] = 0;
Chris@23 449 }
Chris@23 450 }
matthiasm@4 451
Chris@23 452 // note magnitude mapping using pre-calculated matrix
Chris@23 453 float *nm = new float[nNote]; // note magnitude
Chris@23 454 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 455 nm[iNote] = 0; // initialise as 0
Chris@23 456 }
Chris@23 457 int binCount = 0;
Chris@23 458 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
Chris@23 459 // cerr << ".";
Chris@23 460 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
Chris@23 461 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
Chris@23 462 binCount++;
Chris@23 463 }
Chris@23 464 // cerr << nm[20];
Chris@23 465 // cerr << endl;
matthiasm@0 466
matthiasm@0 467
Chris@35 468 float one_over_N = 1.0/m_frameCount;
matthiasm@0 469 // update means of complex tuning variables
Chris@35 470 m_meanTuning0 *= float(m_frameCount-1)*one_over_N;
Chris@35 471 m_meanTuning1 *= float(m_frameCount-1)*one_over_N;
Chris@35 472 m_meanTuning2 *= float(m_frameCount-1)*one_over_N;
matthiasm@0 473
matthiasm@0 474 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
matthiasm@0 475 m_meanTuning0 += nm[iTone + 0]*one_over_N;
matthiasm@0 476 m_meanTuning1 += nm[iTone + 1]*one_over_N;
matthiasm@0 477 m_meanTuning2 += nm[iTone + 2]*one_over_N;
Chris@23 478 float ratioOld = 0.997;
matthiasm@3 479 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
matthiasm@3 480 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
matthiasm@3 481 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
matthiasm@0 482 }
matthiasm@0 483
matthiasm@0 484 // if (m_tuneLocal) {
Chris@23 485 // local tuning
Chris@23 486 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
Chris@23 487 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
Chris@23 488 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
Chris@23 489 m_localTuning.push_back(normalisedtuning);
matthiasm@0 490
Chris@23 491 Feature f1; // logfreqspec
Chris@23 492 f1.hasTimestamp = true;
matthiasm@0 493 f1.timestamp = timestamp;
Chris@23 494 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 495 f1.values.push_back(nm[iNote]);
Chris@23 496 }
matthiasm@0 497
matthiasm@0 498 // deletes
matthiasm@0 499 delete[] magnitude;
matthiasm@0 500 delete[] nm;
matthiasm@0 501
Chris@35 502 m_logSpectrum.push_back(f1); // remember note magnitude
matthiasm@0 503 }
matthiasm@0 504
Chris@35 505
Chris@35 506 #ifdef NOT_DEFINED
Chris@35 507
Chris@35 508 NNLSBase::FeatureSet
Chris@35 509 NNLSBase::getRemainingFeatures()
matthiasm@0 510 {
Chris@23 511 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 512 FeatureSet fsOut;
Chris@35 513 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23 514 int nChord = m_chordnames.size();
Chris@23 515 //
Chris@23 516 /** Calculate Tuning
Chris@23 517 calculate tuning from (using the angle of the complex number defined by the
Chris@23 518 cumulative mean real and imag values)
Chris@23 519 **/
Chris@23 520 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
Chris@23 521 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
Chris@23 522 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 523 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 524 int intShift = floor(normalisedtuning * 3);
Chris@23 525 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 526
Chris@23 527 char buffer0 [50];
matthiasm@1 528
Chris@23 529 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 530
Chris@23 531 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1 532
Chris@23 533 // push tuning to FeatureSet fsOut
Chris@23 534 Feature f0; // tuning
Chris@23 535 f0.hasTimestamp = true;
Chris@23 536 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
Chris@23 537 f0.label = buffer0;
Chris@23 538 fsOut[0].push_back(f0);
matthiasm@1 539
Chris@23 540 /** Tune Log-Frequency Spectrogram
Chris@23 541 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
Chris@23 542 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
Chris@23 543 **/
Chris@23 544 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 545
Chris@23 546 float tempValue = 0;
Chris@23 547 float dbThreshold = 0; // relative to the background spectrum
Chris@23 548 float thresh = pow(10,dbThreshold/20);
Chris@23 549 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23 550 int count = 0;
matthiasm@1 551
Chris@35 552 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@23 553 Feature f1 = *i;
Chris@23 554 Feature f2; // tuned log-frequency spectrum
Chris@23 555 f2.hasTimestamp = true;
Chris@23 556 f2.timestamp = f1.timestamp;
Chris@23 557 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1 558
mail@60 559 if (m_tuneLocal == 1.0) {
Chris@23 560 intShift = floor(m_localTuning[count] * 3);
Chris@23 561 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
Chris@23 562 }
matthiasm@1 563
Chris@23 564 // cerr << intShift << " " << intFactor << endl;
matthiasm@1 565
Chris@23 566 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
Chris@23 567 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
Chris@23 568 f2.values.push_back(tempValue);
Chris@23 569 }
matthiasm@1 570
Chris@23 571 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
Chris@23 572 vector<float> runningmean = SpecialConvolution(f2.values,hw);
Chris@23 573 vector<float> runningstd;
Chris@23 574 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
Chris@23 575 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
Chris@23 576 }
Chris@23 577 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
Chris@23 578 for (int i = 0; i < 256; i++) {
Chris@23 579 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 580 if (runningstd[i] > 0) {
Chris@23 581 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
mail@41 582 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 583 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
mail@41 584 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 585 }
Chris@23 586 if (f2.values[i] < 0) {
Chris@23 587 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 588 }
Chris@23 589 }
Chris@23 590 fsOut[2].push_back(f2);
Chris@23 591 count++;
Chris@23 592 }
Chris@23 593 cerr << "done." << endl;
matthiasm@1 594
Chris@23 595 /** Semitone spectrum and chromagrams
Chris@23 596 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 597 is inferred using a non-negative least squares algorithm.
Chris@23 598 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 599 bass and treble stacked onto each other).
Chris@23 600 **/
matthiasm@42 601 if (m_useNNLS == 0) {
Chris@23 602 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 603 } else {
Chris@23 604 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 605 }
matthiasm@13 606
matthiasm@1 607
Chris@23 608 vector<vector<float> > chordogram;
Chris@23 609 vector<vector<int> > scoreChordogram;
Chris@23 610 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
Chris@23 611 vector<float> oldchroma = vector<float>(12,0);
Chris@23 612 vector<float> oldbasschroma = vector<float>(12,0);
Chris@23 613 count = 0;
matthiasm@9 614
Chris@23 615 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
Chris@23 616 Feature f2 = *it; // logfreq spectrum
Chris@23 617 Feature f3; // semitone spectrum
Chris@23 618 Feature f4; // treble chromagram
Chris@23 619 Feature f5; // bass chromagram
Chris@23 620 Feature f6; // treble and bass chromagram
matthiasm@1 621
Chris@23 622 f3.hasTimestamp = true;
Chris@23 623 f3.timestamp = f2.timestamp;
matthiasm@1 624
Chris@23 625 f4.hasTimestamp = true;
Chris@23 626 f4.timestamp = f2.timestamp;
matthiasm@1 627
Chris@23 628 f5.hasTimestamp = true;
Chris@23 629 f5.timestamp = f2.timestamp;
matthiasm@1 630
Chris@23 631 f6.hasTimestamp = true;
Chris@23 632 f6.timestamp = f2.timestamp;
matthiasm@1 633
Chris@29 634 float b[256];
matthiasm@1 635
Chris@23 636 bool some_b_greater_zero = false;
Chris@23 637 float sumb = 0;
Chris@23 638 for (int i = 0; i < 256; i++) {
Chris@23 639 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
Chris@23 640 b[i] = f2.values[i];
Chris@23 641 sumb += b[i];
Chris@23 642 if (b[i] > 0) {
Chris@23 643 some_b_greater_zero = true;
Chris@23 644 }
Chris@23 645 }
matthiasm@1 646
Chris@23 647 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 648
Chris@23 649 vector<float> chroma = vector<float>(12, 0);
Chris@23 650 vector<float> basschroma = vector<float>(12, 0);
Chris@23 651 float currval;
Chris@23 652 unsigned iSemitone = 0;
matthiasm@1 653
Chris@23 654 if (some_b_greater_zero) {
matthiasm@42 655 if (m_useNNLS == 0) {
Chris@23 656 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 657 currval = 0;
Chris@23 658 currval += b[iNote + 1 + -1] * 0.5;
Chris@23 659 currval += b[iNote + 1 + 0] * 1.0;
Chris@23 660 currval += b[iNote + 1 + 1] * 0.5;
Chris@23 661 f3.values.push_back(currval);
Chris@23 662 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 663 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 664 iSemitone++;
Chris@23 665 }
matthiasm@1 666
Chris@23 667 } else {
Chris@29 668 float x[84+1000];
Chris@23 669 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 670 vector<int> signifIndex;
Chris@23 671 int index=0;
Chris@23 672 sumb /= 84.0;
Chris@23 673 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 674 float currval = 0;
Chris@23 675 currval += b[iNote + 1 + -1];
Chris@23 676 currval += b[iNote + 1 + 0];
Chris@23 677 currval += b[iNote + 1 + 1];
Chris@23 678 if (currval > 0) signifIndex.push_back(index);
Chris@23 679 f3.values.push_back(0); // fill the values, change later
Chris@23 680 index++;
Chris@23 681 }
Chris@29 682 float rnorm;
Chris@29 683 float w[84+1000];
Chris@29 684 float zz[84+1000];
Chris@23 685 int indx[84+1000];
Chris@23 686 int mode;
Chris@23 687 int dictsize = 256*signifIndex.size();
Chris@23 688 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@29 689 float *curr_dict = new float[dictsize];
Chris@23 690 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 691 for (unsigned iBin = 0; iBin < 256; iBin++) {
Chris@23 692 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
Chris@23 693 }
Chris@23 694 }
Chris@29 695 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 696 delete [] curr_dict;
Chris@23 697 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 698 f3.values[signifIndex[iNote]] = x[iNote];
Chris@23 699 // cerr << mode << endl;
Chris@23 700 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 701 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 702 }
Chris@23 703 }
Chris@23 704 }
matthiasm@13 705
matthiasm@10 706
matthiasm@12 707
matthiasm@13 708
Chris@23 709 f4.values = chroma;
Chris@23 710 f5.values = basschroma;
Chris@23 711 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
Chris@23 712 f6.values = chroma;
matthiasm@1 713
Chris@23 714 if (m_doNormalizeChroma > 0) {
Chris@23 715 vector<float> chromanorm = vector<float>(3,0);
Chris@23 716 switch (int(m_doNormalizeChroma)) {
Chris@23 717 case 0: // should never end up here
Chris@23 718 break;
Chris@23 719 case 1:
Chris@23 720 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
Chris@23 721 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
Chris@23 722 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 723 break;
Chris@23 724 case 2:
Chris@23 725 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 726 chromanorm[0] += *it;
Chris@23 727 }
Chris@23 728 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 729 chromanorm[1] += *it;
Chris@23 730 }
Chris@23 731 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 732 chromanorm[2] += *it;
Chris@23 733 }
Chris@23 734 break;
Chris@23 735 case 3:
Chris@23 736 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 737 chromanorm[0] += pow(*it,2);
Chris@23 738 }
Chris@23 739 chromanorm[0] = sqrt(chromanorm[0]);
Chris@23 740 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 741 chromanorm[1] += pow(*it,2);
Chris@23 742 }
Chris@23 743 chromanorm[1] = sqrt(chromanorm[1]);
Chris@23 744 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 745 chromanorm[2] += pow(*it,2);
Chris@23 746 }
Chris@23 747 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 748 break;
Chris@23 749 }
Chris@23 750 if (chromanorm[0] > 0) {
Chris@23 751 for (int i = 0; i < f4.values.size(); i++) {
Chris@23 752 f4.values[i] /= chromanorm[0];
Chris@23 753 }
Chris@23 754 }
Chris@23 755 if (chromanorm[1] > 0) {
Chris@23 756 for (int i = 0; i < f5.values.size(); i++) {
Chris@23 757 f5.values[i] /= chromanorm[1];
Chris@23 758 }
Chris@23 759 }
Chris@23 760 if (chromanorm[2] > 0) {
Chris@23 761 for (int i = 0; i < f6.values.size(); i++) {
Chris@23 762 f6.values[i] /= chromanorm[2];
Chris@23 763 }
Chris@23 764 }
matthiasm@13 765
Chris@23 766 }
matthiasm@13 767
Chris@23 768 // local chord estimation
Chris@23 769 vector<float> currentChordSalience;
Chris@23 770 float tempchordvalue = 0;
Chris@23 771 float sumchordvalue = 0;
matthiasm@9 772
Chris@23 773 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 774 tempchordvalue = 0;
Chris@23 775 for (int iBin = 0; iBin < 12; iBin++) {
Chris@23 776 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 777 }
Chris@23 778 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 779 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 780 }
Chris@23 781 sumchordvalue+=tempchordvalue;
Chris@23 782 currentChordSalience.push_back(tempchordvalue);
Chris@23 783 }
Chris@23 784 if (sumchordvalue > 0) {
Chris@23 785 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 786 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 787 }
Chris@23 788 } else {
Chris@23 789 currentChordSalience[nChord-1] = 1.0;
Chris@23 790 }
Chris@23 791 chordogram.push_back(currentChordSalience);
matthiasm@1 792
Chris@23 793 fsOut[3].push_back(f3);
Chris@23 794 fsOut[4].push_back(f4);
Chris@23 795 fsOut[5].push_back(f5);
Chris@23 796 fsOut[6].push_back(f6);
Chris@23 797 count++;
Chris@23 798 }
Chris@23 799 cerr << "done." << endl;
matthiasm@13 800
matthiasm@10 801
Chris@23 802 /* Simple chord estimation
Chris@23 803 I just take the local chord estimates ("currentChordSalience") and average them over time, then
Chris@23 804 take the maximum. Very simple, don't do this at home...
Chris@23 805 */
Chris@23 806 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
Chris@23 807 count = 0;
Chris@23 808 int halfwindowlength = m_inputSampleRate / m_stepSize;
Chris@23 809 vector<int> chordSequence;
Chris@23 810 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
Chris@23 811 vector<int> temp = vector<int>(nChord,0);
Chris@23 812 scoreChordogram.push_back(temp);
Chris@23 813 }
Chris@23 814 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
Chris@23 815 int startIndex = count + 1;
Chris@23 816 int endIndex = count + 2 * halfwindowlength;
matthiasm@10 817
Chris@23 818 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@10 819
Chris@23 820 vector<int> chordCandidates;
Chris@23 821 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
Chris@23 822 // float currsum = 0;
Chris@23 823 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 824 // currsum += chordogram[iFrame][iChord];
Chris@23 825 // }
Chris@23 826 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
Chris@23 827 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 828 if (chordogram[iFrame][iChord] > chordThreshold) {
Chris@23 829 chordCandidates.push_back(iChord);
Chris@23 830 break;
Chris@23 831 }
Chris@23 832 }
Chris@23 833 }
Chris@23 834 chordCandidates.push_back(nChord-1);
Chris@23 835 // cerr << chordCandidates.size() << endl;
Chris@23 836
Chris@23 837 float maxval = 0; // will be the value of the most salient *chord change* in this frame
Chris@23 838 float maxindex = 0; //... and the index thereof
Chris@23 839 unsigned bestchordL = nChord-1; // index of the best "left" chord
Chris@23 840 unsigned bestchordR = nChord-1; // index of the best "right" chord
Chris@23 841
Chris@23 842 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
Chris@23 843 // now find the max values on both sides of iWF
Chris@23 844 // left side:
Chris@23 845 float maxL = 0;
Chris@23 846 unsigned maxindL = nChord-1;
Chris@23 847 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 848 unsigned iChord = chordCandidates[kChord];
Chris@23 849 float currsum = 0;
Chris@23 850 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
Chris@23 851 currsum += chordogram[count+iFrame][iChord];
matthiasm@10 852 }
Chris@23 853 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 854 if (currsum > maxL) {
Chris@23 855 maxL = currsum;
Chris@23 856 maxindL = iChord;
Chris@23 857 }
Chris@23 858 }
Chris@23 859 // right side:
Chris@23 860 float maxR = 0;
Chris@23 861 unsigned maxindR = nChord-1;
Chris@23 862 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 863 unsigned iChord = chordCandidates[kChord];
Chris@23 864 float currsum = 0;
Chris@23 865 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 866 currsum += chordogram[count+iFrame][iChord];
Chris@23 867 }
Chris@23 868 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 869 if (currsum > maxR) {
Chris@23 870 maxR = currsum;
Chris@23 871 maxindR = iChord;
Chris@23 872 }
Chris@23 873 }
Chris@23 874 if (maxL+maxR > maxval) {
Chris@23 875 maxval = maxL+maxR;
Chris@23 876 maxindex = iWF;
Chris@23 877 bestchordL = maxindL;
Chris@23 878 bestchordR = maxindR;
Chris@23 879 }
matthiasm@3 880
Chris@23 881 }
Chris@23 882 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
Chris@23 883 // add a score to every chord-frame-point that was part of a maximum
Chris@23 884 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
Chris@23 885 scoreChordogram[iFrame+count][bestchordL]++;
Chris@23 886 }
Chris@23 887 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 888 scoreChordogram[iFrame+count][bestchordR]++;
Chris@23 889 }
Chris@23 890 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
Chris@23 891 count++;
Chris@23 892 }
Chris@23 893 // cerr << "******* agent finished *******" << endl;
Chris@23 894 count = 0;
Chris@23 895 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 896 float maxval = 0; // will be the value of the most salient chord in this frame
Chris@23 897 float maxindex = 0; //... and the index thereof
Chris@23 898 for (unsigned iChord = 0; iChord < nChord; iChord++) {
Chris@23 899 if (scoreChordogram[count][iChord] > maxval) {
Chris@23 900 maxval = scoreChordogram[count][iChord];
Chris@23 901 maxindex = iChord;
Chris@23 902 // cerr << iChord << endl;
Chris@23 903 }
Chris@23 904 }
Chris@23 905 chordSequence.push_back(maxindex);
Chris@23 906 // cerr << "before modefilter, maxindex: " << maxindex << endl;
Chris@23 907 count++;
Chris@23 908 }
Chris@23 909 // cerr << "******* mode filter done *******" << endl;
matthiasm@10 910
matthiasm@3 911
Chris@23 912 // mode filter on chordSequence
Chris@23 913 count = 0;
Chris@23 914 string oldChord = "";
Chris@23 915 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 916 Feature f6 = *it;
Chris@23 917 Feature f7; // chord estimate
Chris@23 918 f7.hasTimestamp = true;
Chris@23 919 f7.timestamp = f6.timestamp;
Chris@23 920 Feature f8; // chord estimate
Chris@23 921 f8.hasTimestamp = true;
Chris@23 922 f8.timestamp = f6.timestamp;
matthiasm@17 923
Chris@23 924 vector<int> chordCount = vector<int>(nChord,0);
Chris@23 925 int maxChordCount = 0;
Chris@23 926 int maxChordIndex = nChord-1;
Chris@23 927 string maxChord;
Chris@23 928 int startIndex = max(count - halfwindowlength/2,0);
Chris@23 929 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
Chris@23 930 for (int i = startIndex; i < endIndex; i++) {
Chris@23 931 chordCount[chordSequence[i]]++;
Chris@23 932 if (chordCount[chordSequence[i]] > maxChordCount) {
Chris@23 933 // cerr << "start index " << startIndex << endl;
Chris@23 934 maxChordCount++;
Chris@23 935 maxChordIndex = chordSequence[i];
Chris@23 936 maxChord = m_chordnames[maxChordIndex];
Chris@23 937 }
Chris@23 938 }
Chris@23 939 // chordSequence[count] = maxChordIndex;
Chris@23 940 // cerr << maxChordIndex << endl;
Chris@23 941 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
Chris@23 942 // cerr << chordchange[count] << endl;
Chris@23 943 fsOut[9].push_back(f8);
Chris@23 944 if (oldChord != maxChord) {
Chris@23 945 oldChord = maxChord;
matthiasm@3 946
Chris@23 947 // char buffer1 [50];
Chris@23 948 // if (maxChordIndex < nChord - 1) {
Chris@23 949 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
Chris@23 950 // } else {
Chris@23 951 // sprintf(buffer1, "N");
Chris@23 952 // }
Chris@23 953 // f7.label = buffer1;
Chris@23 954 f7.label = m_chordnames[maxChordIndex];
Chris@23 955 fsOut[7].push_back(f7);
Chris@23 956 }
Chris@23 957 count++;
Chris@23 958 }
Chris@23 959 Feature f7; // last chord estimate
Chris@23 960 f7.hasTimestamp = true;
Chris@23 961 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
Chris@23 962 f7.label = "N";
Chris@23 963 fsOut[7].push_back(f7);
Chris@23 964 cerr << "done." << endl;
Chris@23 965 // // musicity
Chris@23 966 // count = 0;
Chris@23 967 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
Chris@23 968 // vector<float> musicityValue;
Chris@23 969 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 970 // Feature f4 = *it;
Chris@23 971 //
Chris@23 972 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 973 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 974 // float chromasum = 0;
Chris@23 975 // float diffsum = 0;
Chris@23 976 // for (int k = 0; k < 12; k++) {
Chris@23 977 // for (int i = startIndex + 1; i < endIndex; i++) {
Chris@23 978 // chromasum += pow(fsOut[4][i].values[k],2);
Chris@23 979 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
Chris@23 980 // }
Chris@23 981 // }
Chris@23 982 // diffsum /= chromasum;
Chris@23 983 // musicityValue.push_back(diffsum);
Chris@23 984 // count++;
Chris@23 985 // }
Chris@23 986 //
Chris@23 987 // float musicityThreshold = 0.44;
Chris@23 988 // if (m_stepSize == 4096) {
Chris@23 989 // musicityThreshold = 0.74;
Chris@23 990 // }
Chris@23 991 // if (m_stepSize == 4410) {
Chris@23 992 // musicityThreshold = 0.77;
Chris@23 993 // }
Chris@23 994 //
Chris@23 995 // count = 0;
Chris@23 996 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 997 // Feature f4 = *it;
Chris@23 998 // Feature f8; // musicity
Chris@23 999 // Feature f9; // musicity segmenter
Chris@23 1000 //
Chris@23 1001 // f8.hasTimestamp = true;
Chris@23 1002 // f8.timestamp = f4.timestamp;
Chris@23 1003 // f9.hasTimestamp = true;
Chris@23 1004 // f9.timestamp = f4.timestamp;
Chris@23 1005 //
Chris@23 1006 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 1007 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 1008 // int musicityCount = 0;
Chris@23 1009 // for (int i = startIndex; i <= endIndex; i++) {
Chris@23 1010 // if (musicityValue[i] > musicityThreshold) musicityCount++;
Chris@23 1011 // }
Chris@23 1012 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
Chris@23 1013 //
Chris@23 1014 // if (isSpeech) {
Chris@23 1015 // if (oldlabeltype != 2) {
Chris@23 1016 // f9.label = "Speech";
Chris@23 1017 // fsOut[9].push_back(f9);
Chris@23 1018 // oldlabeltype = 2;
Chris@23 1019 // }
Chris@23 1020 // } else {
Chris@23 1021 // if (oldlabeltype != 1) {
Chris@23 1022 // f9.label = "Music";
Chris@23 1023 // fsOut[9].push_back(f9);
Chris@23 1024 // oldlabeltype = 1;
Chris@23 1025 // }
Chris@23 1026 // }
Chris@23 1027 // f8.values.push_back(musicityValue[count]);
Chris@23 1028 // fsOut[8].push_back(f8);
Chris@23 1029 // count++;
Chris@23 1030 // }
Chris@23 1031 return fsOut;
matthiasm@0 1032
matthiasm@0 1033 }
matthiasm@0 1034
Chris@35 1035 #endif