annotate NNLSBase.cpp @ 58:01bc078f5f61 matthiasm-plugin

updated plugin and some parameter and output descriptions. adjusted the n3 file (only skeleton so far).
author matthiasm
date Mon, 25 Oct 2010 22:57:47 +0900
parents b6cddb109482
children 1ccb883b585f
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@35 19 #include "NNLSBase.h"
Chris@27 20
Chris@27 21 #include "chromamethods.h"
Chris@27 22
Chris@27 23 #include <cstdlib>
Chris@27 24 #include <fstream>
matthiasm@0 25 #include <cmath>
matthiasm@9 26
Chris@27 27 #include <algorithm>
matthiasm@0 28
matthiasm@0 29 const bool debug_on = false;
matthiasm@0 30
Chris@27 31 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0 32
Chris@35 33 NNLSBase::NNLSBase(float inputSampleRate) :
Chris@23 34 Plugin(inputSampleRate),
Chris@35 35 m_logSpectrum(0),
Chris@23 36 m_blockSize(0),
Chris@23 37 m_stepSize(0),
Chris@23 38 m_lengthOfNoteIndex(0),
Chris@23 39 m_meanTuning0(0),
Chris@23 40 m_meanTuning1(0),
Chris@23 41 m_meanTuning2(0),
Chris@23 42 m_localTuning0(0),
Chris@23 43 m_localTuning1(0),
Chris@23 44 m_localTuning2(0),
mail@41 45 m_whitening(1.0),
Chris@23 46 m_preset(0.0),
Chris@23 47 m_localTuning(0),
Chris@23 48 m_kernelValue(0),
Chris@23 49 m_kernelFftIndex(0),
Chris@23 50 m_kernelNoteIndex(0),
Chris@23 51 m_dict(0),
Chris@23 52 m_tuneLocal(false),
Chris@23 53 m_chorddict(0),
Chris@23 54 m_chordnames(0),
Chris@23 55 m_doNormalizeChroma(0),
mail@41 56 m_rollon(0.0),
matthiasm@42 57 m_s(0.7),
matthiasm@50 58 m_useNNLS(1),
matthiasm@50 59 m_useHMM(1)
matthiasm@0 60 {
Chris@35 61 if (debug_on) cerr << "--> NNLSBase" << endl;
matthiasm@7 62
Chris@23 63 // make the *note* dictionary matrix
Chris@23 64 m_dict = new float[nNote * 84];
Chris@23 65 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
mail@41 66 dictionaryMatrix(m_dict, 0.7);
matthiasm@7 67
Chris@23 68 // get the *chord* dictionary from file (if the file exists)
Chris@23 69 m_chordnames = chordDictionary(&m_chorddict);
matthiasm@0 70 }
matthiasm@0 71
matthiasm@0 72
Chris@35 73 NNLSBase::~NNLSBase()
matthiasm@0 74 {
Chris@35 75 if (debug_on) cerr << "--> ~NNLSBase" << endl;
Chris@23 76 delete [] m_dict;
matthiasm@0 77 }
matthiasm@0 78
matthiasm@0 79 string
Chris@35 80 NNLSBase::getMaker() const
matthiasm@0 81 {
Chris@23 82 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0 83 // Your name here
matthiasm@0 84 return "Matthias Mauch";
matthiasm@0 85 }
matthiasm@0 86
matthiasm@0 87 int
Chris@35 88 NNLSBase::getPluginVersion() const
matthiasm@0 89 {
Chris@23 90 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0 91 // Increment this each time you release a version that behaves
matthiasm@0 92 // differently from the previous one
matthiasm@0 93 return 1;
matthiasm@0 94 }
matthiasm@0 95
matthiasm@0 96 string
Chris@35 97 NNLSBase::getCopyright() const
matthiasm@0 98 {
Chris@23 99 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0 100 // This function is not ideally named. It does not necessarily
matthiasm@0 101 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0 102 // should indicate the terms under which it is distributed. For
matthiasm@0 103 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@35 104 return "GPL";
matthiasm@0 105 }
matthiasm@0 106
Chris@35 107 NNLSBase::InputDomain
Chris@35 108 NNLSBase::getInputDomain() const
matthiasm@0 109 {
Chris@23 110 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0 111 return FrequencyDomain;
matthiasm@0 112 }
matthiasm@0 113
matthiasm@0 114 size_t
Chris@35 115 NNLSBase::getPreferredBlockSize() const
matthiasm@0 116 {
Chris@23 117 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0 118 return 16384; // 0 means "I can handle any block size"
matthiasm@0 119 }
matthiasm@0 120
matthiasm@0 121 size_t
Chris@35 122 NNLSBase::getPreferredStepSize() const
matthiasm@0 123 {
Chris@23 124 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0 125 return 2048; // 0 means "anything sensible"; in practice this
Chris@23 126 // means the same as the block size for TimeDomain
Chris@23 127 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0 128 }
matthiasm@0 129
matthiasm@0 130 size_t
Chris@35 131 NNLSBase::getMinChannelCount() const
matthiasm@0 132 {
Chris@23 133 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0 134 return 1;
matthiasm@0 135 }
matthiasm@0 136
matthiasm@0 137 size_t
Chris@35 138 NNLSBase::getMaxChannelCount() const
matthiasm@0 139 {
Chris@23 140 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0 141 return 1;
matthiasm@0 142 }
matthiasm@0 143
Chris@35 144 NNLSBase::ParameterList
Chris@35 145 NNLSBase::getParameterDescriptors() const
matthiasm@0 146 {
Chris@23 147 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0 148 ParameterList list;
matthiasm@0 149
matthiasm@42 150 ParameterDescriptor d;
matthiasm@42 151 d.identifier = "useNNLS";
matthiasm@42 152 d.name = "use approximate transcription (NNLS)";
matthiasm@42 153 d.description = "Toggles approximate transcription (NNLS).";
matthiasm@42 154 d.unit = "";
matthiasm@42 155 d.minValue = 0.0;
matthiasm@42 156 d.maxValue = 1.0;
matthiasm@42 157 d.defaultValue = 1.0;
matthiasm@42 158 d.isQuantized = true;
matthiasm@42 159 d.quantizeStep = 1.0;
matthiasm@42 160 list.push_back(d);
matthiasm@42 161
mail@41 162 ParameterDescriptor d0;
mail@41 163 d0.identifier = "rollon";
mail@41 164 d0.name = "spectral roll-on";
matthiasm@58 165 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
mail@41 166 d0.unit = "";
mail@41 167 d0.minValue = 0;
mail@41 168 d0.maxValue = 0.05;
mail@41 169 d0.defaultValue = 0;
matthiasm@48 170 d0.isQuantized = true;
matthiasm@48 171 d0.quantizeStep = 0.005;
mail@41 172 list.push_back(d0);
matthiasm@4 173
matthiasm@4 174 ParameterDescriptor d1;
matthiasm@4 175 d1.identifier = "tuningmode";
matthiasm@4 176 d1.name = "tuning mode";
matthiasm@4 177 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@4 178 d1.unit = "";
matthiasm@4 179 d1.minValue = 0;
matthiasm@4 180 d1.maxValue = 1;
matthiasm@4 181 d1.defaultValue = 0;
matthiasm@4 182 d1.isQuantized = true;
matthiasm@4 183 d1.valueNames.push_back("global tuning");
matthiasm@4 184 d1.valueNames.push_back("local tuning");
matthiasm@4 185 d1.quantizeStep = 1.0;
matthiasm@4 186 list.push_back(d1);
matthiasm@4 187
mail@41 188 ParameterDescriptor d2;
mail@41 189 d2.identifier = "whitening";
mail@41 190 d2.name = "spectral whitening";
mail@41 191 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
mail@41 192 d2.unit = "";
mail@41 193 d2.isQuantized = true;
mail@41 194 d2.minValue = 0.0;
mail@41 195 d2.maxValue = 1.0;
mail@41 196 d2.defaultValue = 1.0;
mail@41 197 d2.isQuantized = false;
mail@41 198 list.push_back(d2);
mail@41 199
mail@41 200 ParameterDescriptor d3;
mail@41 201 d3.identifier = "s";
mail@41 202 d3.name = "spectral shape";
mail@41 203 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
mail@41 204 d3.unit = "";
mail@41 205 d3.minValue = 0.5;
mail@41 206 d3.maxValue = 0.9;
mail@41 207 d3.defaultValue = 0.7;
mail@41 208 d3.isQuantized = false;
mail@41 209 list.push_back(d3);
mail@41 210
Chris@23 211 ParameterDescriptor d4;
matthiasm@12 212 d4.identifier = "chromanormalize";
matthiasm@12 213 d4.name = "chroma normalization";
matthiasm@12 214 d4.description = "How shall the chroma vector be normalized?";
matthiasm@12 215 d4.unit = "";
matthiasm@12 216 d4.minValue = 0;
matthiasm@13 217 d4.maxValue = 3;
matthiasm@12 218 d4.defaultValue = 0;
matthiasm@12 219 d4.isQuantized = true;
matthiasm@13 220 d4.valueNames.push_back("none");
matthiasm@13 221 d4.valueNames.push_back("maximum norm");
Chris@23 222 d4.valueNames.push_back("L1 norm");
Chris@23 223 d4.valueNames.push_back("L2 norm");
matthiasm@12 224 d4.quantizeStep = 1.0;
matthiasm@12 225 list.push_back(d4);
matthiasm@4 226
matthiasm@0 227 return list;
matthiasm@0 228 }
matthiasm@0 229
matthiasm@0 230 float
Chris@35 231 NNLSBase::getParameter(string identifier) const
matthiasm@0 232 {
Chris@23 233 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@42 234 if (identifier == "useNNLS") {
matthiasm@42 235 return m_useNNLS;
matthiasm@0 236 }
matthiasm@0 237
mail@41 238 if (identifier == "whitening") {
mail@41 239 return m_whitening;
mail@41 240 }
mail@41 241
mail@41 242 if (identifier == "s") {
mail@41 243 return m_s;
matthiasm@0 244 }
matthiasm@17 245
Chris@23 246 if (identifier == "rollon") {
matthiasm@17 247 return m_rollon;
matthiasm@17 248 }
matthiasm@0 249
matthiasm@0 250 if (identifier == "tuningmode") {
matthiasm@0 251 if (m_tuneLocal) {
matthiasm@0 252 return 1.0;
matthiasm@0 253 } else {
matthiasm@0 254 return 0.0;
matthiasm@0 255 }
matthiasm@0 256 }
Chris@23 257 if (identifier == "preset") {
Chris@23 258 return m_preset;
matthiasm@3 259 }
Chris@23 260 if (identifier == "chromanormalize") {
Chris@23 261 return m_doNormalizeChroma;
matthiasm@12 262 }
matthiasm@50 263
matthiasm@50 264 if (identifier == "useHMM") {
matthiasm@50 265 return m_useHMM;
matthiasm@50 266 }
matthiasm@50 267
matthiasm@0 268 return 0;
matthiasm@0 269
matthiasm@0 270 }
matthiasm@0 271
matthiasm@0 272 void
Chris@35 273 NNLSBase::setParameter(string identifier, float value)
matthiasm@0 274 {
Chris@23 275 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@42 276 if (identifier == "useNNLS") {
matthiasm@42 277 m_useNNLS = (int) value;
matthiasm@0 278 }
matthiasm@0 279
mail@41 280 if (identifier == "whitening") {
mail@41 281 m_whitening = value;
matthiasm@0 282 }
matthiasm@0 283
mail@41 284 if (identifier == "s") {
mail@41 285 m_s = value;
mail@41 286 }
mail@41 287
matthiasm@50 288 if (identifier == "useHMM") {
matthiasm@50 289 m_useHMM = value;
matthiasm@50 290 }
matthiasm@50 291
matthiasm@0 292 if (identifier == "tuningmode") {
matthiasm@0 293 m_tuneLocal = (value > 0) ? true : false;
matthiasm@0 294 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0 295 }
matthiasm@42 296 // if (identifier == "preset") {
matthiasm@42 297 // m_preset = value;
matthiasm@42 298 // if (m_preset == 0.0) {
matthiasm@42 299 // m_tuneLocal = false;
matthiasm@42 300 // m_whitening = 1.0;
matthiasm@42 301 // m_dictID = 0.0;
matthiasm@42 302 // }
matthiasm@42 303 // if (m_preset == 1.0) {
matthiasm@42 304 // m_tuneLocal = false;
matthiasm@42 305 // m_whitening = 1.0;
matthiasm@42 306 // m_dictID = 1.0;
matthiasm@42 307 // }
matthiasm@42 308 // if (m_preset == 2.0) {
matthiasm@42 309 // m_tuneLocal = false;
matthiasm@42 310 // m_whitening = 0.7;
matthiasm@42 311 // m_dictID = 0.0;
matthiasm@42 312 // }
matthiasm@42 313 // }
Chris@23 314 if (identifier == "chromanormalize") {
Chris@23 315 m_doNormalizeChroma = value;
Chris@23 316 }
matthiasm@17 317
Chris@23 318 if (identifier == "rollon") {
Chris@23 319 m_rollon = value;
Chris@23 320 }
matthiasm@0 321 }
matthiasm@0 322
Chris@35 323 NNLSBase::ProgramList
Chris@35 324 NNLSBase::getPrograms() const
matthiasm@0 325 {
Chris@23 326 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0 327 ProgramList list;
matthiasm@0 328
matthiasm@0 329 // If you have no programs, return an empty list (or simply don't
matthiasm@0 330 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0 331
matthiasm@0 332 return list;
matthiasm@0 333 }
matthiasm@0 334
matthiasm@0 335 string
Chris@35 336 NNLSBase::getCurrentProgram() const
matthiasm@0 337 {
Chris@23 338 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0 339 return ""; // no programs
matthiasm@0 340 }
matthiasm@0 341
matthiasm@0 342 void
Chris@35 343 NNLSBase::selectProgram(string name)
matthiasm@0 344 {
Chris@23 345 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0 346 }
matthiasm@0 347
matthiasm@0 348
matthiasm@0 349 bool
Chris@35 350 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 351 {
Chris@23 352 if (debug_on) {
Chris@23 353 cerr << "--> initialise";
Chris@23 354 }
matthiasm@1 355
matthiasm@0 356 if (channels < getMinChannelCount() ||
matthiasm@0 357 channels > getMaxChannelCount()) return false;
matthiasm@0 358 m_blockSize = blockSize;
matthiasm@0 359 m_stepSize = stepSize;
Chris@35 360 m_frameCount = 0;
Chris@23 361 int tempn = 256 * m_blockSize/2;
Chris@23 362 // cerr << "length of tempkernel : " << tempn << endl;
Chris@23 363 float *tempkernel;
matthiasm@1 364
Chris@23 365 tempkernel = new float[tempn];
matthiasm@1 366
Chris@23 367 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
Chris@23 368 m_kernelValue.clear();
Chris@23 369 m_kernelFftIndex.clear();
Chris@23 370 m_kernelNoteIndex.clear();
Chris@23 371 int countNonzero = 0;
Chris@23 372 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
Chris@23 373 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
Chris@23 374 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 375 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
Chris@23 376 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 377 countNonzero++;
Chris@23 378 }
Chris@23 379 m_kernelFftIndex.push_back(iFFT);
Chris@23 380 m_kernelNoteIndex.push_back(iNote);
Chris@23 381 }
Chris@23 382 }
Chris@23 383 }
Chris@23 384 // cerr << "nonzero count : " << countNonzero << endl;
Chris@23 385 delete [] tempkernel;
Chris@35 386 /*
Chris@23 387 ofstream myfile;
Chris@23 388 myfile.open ("matrix.txt");
matthiasm@3 389 // myfile << "Writing this to a file.\n";
Chris@23 390 for (int i = 0; i < nNote * 84; ++i) {
Chris@23 391 myfile << m_dict[i] << endl;
Chris@23 392 }
matthiasm@3 393 myfile.close();
Chris@35 394 */
matthiasm@0 395 return true;
matthiasm@0 396 }
matthiasm@0 397
matthiasm@0 398 void
Chris@35 399 NNLSBase::reset()
matthiasm@0 400 {
Chris@23 401 if (debug_on) cerr << "--> reset";
matthiasm@4 402
matthiasm@0 403 // Clear buffers, reset stored values, etc
Chris@35 404 m_frameCount = 0;
matthiasm@42 405 // m_dictID = 0;
Chris@35 406 m_logSpectrum.clear();
Chris@23 407 m_meanTuning0 = 0;
Chris@23 408 m_meanTuning1 = 0;
Chris@23 409 m_meanTuning2 = 0;
Chris@23 410 m_localTuning0 = 0;
Chris@23 411 m_localTuning1 = 0;
Chris@23 412 m_localTuning2 = 0;
Chris@23 413 m_localTuning.clear();
matthiasm@0 414 }
matthiasm@0 415
Chris@35 416 void
Chris@35 417 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 418 {
Chris@35 419 m_frameCount++;
Chris@23 420 float *magnitude = new float[m_blockSize/2];
matthiasm@0 421
Chris@23 422 const float *fbuf = inputBuffers[0];
Chris@23 423 float energysum = 0;
Chris@23 424 // make magnitude
Chris@23 425 float maxmag = -10000;
Chris@23 426 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 427 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
Chris@23 428 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
Chris@23 429 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
Chris@23 430 if (m_rollon > 0) {
Chris@23 431 energysum += pow(magnitude[iBin],2);
Chris@23 432 }
Chris@23 433 }
matthiasm@14 434
Chris@23 435 float cumenergy = 0;
Chris@23 436 if (m_rollon > 0) {
Chris@23 437 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
Chris@23 438 cumenergy += pow(magnitude[iBin],2);
Chris@23 439 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
Chris@23 440 else break;
Chris@23 441 }
Chris@23 442 }
matthiasm@17 443
Chris@23 444 if (maxmag < 2) {
Chris@23 445 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
Chris@23 446 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 447 magnitude[iBin] = 0;
Chris@23 448 }
Chris@23 449 }
matthiasm@4 450
Chris@23 451 // note magnitude mapping using pre-calculated matrix
Chris@23 452 float *nm = new float[nNote]; // note magnitude
Chris@23 453 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 454 nm[iNote] = 0; // initialise as 0
Chris@23 455 }
Chris@23 456 int binCount = 0;
Chris@23 457 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
Chris@23 458 // cerr << ".";
Chris@23 459 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
Chris@23 460 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
Chris@23 461 binCount++;
Chris@23 462 }
Chris@23 463 // cerr << nm[20];
Chris@23 464 // cerr << endl;
matthiasm@0 465
matthiasm@0 466
Chris@35 467 float one_over_N = 1.0/m_frameCount;
matthiasm@0 468 // update means of complex tuning variables
Chris@35 469 m_meanTuning0 *= float(m_frameCount-1)*one_over_N;
Chris@35 470 m_meanTuning1 *= float(m_frameCount-1)*one_over_N;
Chris@35 471 m_meanTuning2 *= float(m_frameCount-1)*one_over_N;
matthiasm@0 472
matthiasm@0 473 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
matthiasm@0 474 m_meanTuning0 += nm[iTone + 0]*one_over_N;
matthiasm@0 475 m_meanTuning1 += nm[iTone + 1]*one_over_N;
matthiasm@0 476 m_meanTuning2 += nm[iTone + 2]*one_over_N;
Chris@23 477 float ratioOld = 0.997;
matthiasm@3 478 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
matthiasm@3 479 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
matthiasm@3 480 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
matthiasm@0 481 }
matthiasm@0 482
matthiasm@0 483 // if (m_tuneLocal) {
Chris@23 484 // local tuning
Chris@23 485 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
Chris@23 486 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
Chris@23 487 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
Chris@23 488 m_localTuning.push_back(normalisedtuning);
matthiasm@0 489
Chris@23 490 Feature f1; // logfreqspec
Chris@23 491 f1.hasTimestamp = true;
matthiasm@0 492 f1.timestamp = timestamp;
Chris@23 493 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 494 f1.values.push_back(nm[iNote]);
Chris@23 495 }
matthiasm@0 496
matthiasm@0 497 // deletes
matthiasm@0 498 delete[] magnitude;
matthiasm@0 499 delete[] nm;
matthiasm@0 500
Chris@35 501 m_logSpectrum.push_back(f1); // remember note magnitude
matthiasm@0 502 }
matthiasm@0 503
Chris@35 504
Chris@35 505 #ifdef NOT_DEFINED
Chris@35 506
Chris@35 507 NNLSBase::FeatureSet
Chris@35 508 NNLSBase::getRemainingFeatures()
matthiasm@0 509 {
Chris@23 510 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 511 FeatureSet fsOut;
Chris@35 512 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23 513 int nChord = m_chordnames.size();
Chris@23 514 //
Chris@23 515 /** Calculate Tuning
Chris@23 516 calculate tuning from (using the angle of the complex number defined by the
Chris@23 517 cumulative mean real and imag values)
Chris@23 518 **/
Chris@23 519 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
Chris@23 520 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
Chris@23 521 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 522 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 523 int intShift = floor(normalisedtuning * 3);
Chris@23 524 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 525
Chris@23 526 char buffer0 [50];
matthiasm@1 527
Chris@23 528 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 529
Chris@23 530 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1 531
Chris@23 532 // push tuning to FeatureSet fsOut
Chris@23 533 Feature f0; // tuning
Chris@23 534 f0.hasTimestamp = true;
Chris@23 535 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
Chris@23 536 f0.label = buffer0;
Chris@23 537 fsOut[0].push_back(f0);
matthiasm@1 538
Chris@23 539 /** Tune Log-Frequency Spectrogram
Chris@23 540 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
Chris@23 541 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
Chris@23 542 **/
Chris@23 543 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 544
Chris@23 545 float tempValue = 0;
Chris@23 546 float dbThreshold = 0; // relative to the background spectrum
Chris@23 547 float thresh = pow(10,dbThreshold/20);
Chris@23 548 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23 549 int count = 0;
matthiasm@1 550
Chris@35 551 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@23 552 Feature f1 = *i;
Chris@23 553 Feature f2; // tuned log-frequency spectrum
Chris@23 554 f2.hasTimestamp = true;
Chris@23 555 f2.timestamp = f1.timestamp;
Chris@23 556 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1 557
Chris@23 558 if (m_tuneLocal) {
Chris@23 559 intShift = floor(m_localTuning[count] * 3);
Chris@23 560 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
Chris@23 561 }
matthiasm@1 562
Chris@23 563 // cerr << intShift << " " << intFactor << endl;
matthiasm@1 564
Chris@23 565 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
Chris@23 566 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
Chris@23 567 f2.values.push_back(tempValue);
Chris@23 568 }
matthiasm@1 569
Chris@23 570 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
Chris@23 571 vector<float> runningmean = SpecialConvolution(f2.values,hw);
Chris@23 572 vector<float> runningstd;
Chris@23 573 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
Chris@23 574 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
Chris@23 575 }
Chris@23 576 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
Chris@23 577 for (int i = 0; i < 256; i++) {
Chris@23 578 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 579 if (runningstd[i] > 0) {
Chris@23 580 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
mail@41 581 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 582 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
mail@41 583 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 584 }
Chris@23 585 if (f2.values[i] < 0) {
Chris@23 586 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 587 }
Chris@23 588 }
Chris@23 589 fsOut[2].push_back(f2);
Chris@23 590 count++;
Chris@23 591 }
Chris@23 592 cerr << "done." << endl;
matthiasm@1 593
Chris@23 594 /** Semitone spectrum and chromagrams
Chris@23 595 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 596 is inferred using a non-negative least squares algorithm.
Chris@23 597 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 598 bass and treble stacked onto each other).
Chris@23 599 **/
matthiasm@42 600 if (m_useNNLS == 0) {
Chris@23 601 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 602 } else {
Chris@23 603 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 604 }
matthiasm@13 605
matthiasm@1 606
Chris@23 607 vector<vector<float> > chordogram;
Chris@23 608 vector<vector<int> > scoreChordogram;
Chris@23 609 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
Chris@23 610 vector<float> oldchroma = vector<float>(12,0);
Chris@23 611 vector<float> oldbasschroma = vector<float>(12,0);
Chris@23 612 count = 0;
matthiasm@9 613
Chris@23 614 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
Chris@23 615 Feature f2 = *it; // logfreq spectrum
Chris@23 616 Feature f3; // semitone spectrum
Chris@23 617 Feature f4; // treble chromagram
Chris@23 618 Feature f5; // bass chromagram
Chris@23 619 Feature f6; // treble and bass chromagram
matthiasm@1 620
Chris@23 621 f3.hasTimestamp = true;
Chris@23 622 f3.timestamp = f2.timestamp;
matthiasm@1 623
Chris@23 624 f4.hasTimestamp = true;
Chris@23 625 f4.timestamp = f2.timestamp;
matthiasm@1 626
Chris@23 627 f5.hasTimestamp = true;
Chris@23 628 f5.timestamp = f2.timestamp;
matthiasm@1 629
Chris@23 630 f6.hasTimestamp = true;
Chris@23 631 f6.timestamp = f2.timestamp;
matthiasm@1 632
Chris@29 633 float b[256];
matthiasm@1 634
Chris@23 635 bool some_b_greater_zero = false;
Chris@23 636 float sumb = 0;
Chris@23 637 for (int i = 0; i < 256; i++) {
Chris@23 638 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
Chris@23 639 b[i] = f2.values[i];
Chris@23 640 sumb += b[i];
Chris@23 641 if (b[i] > 0) {
Chris@23 642 some_b_greater_zero = true;
Chris@23 643 }
Chris@23 644 }
matthiasm@1 645
Chris@23 646 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 647
Chris@23 648 vector<float> chroma = vector<float>(12, 0);
Chris@23 649 vector<float> basschroma = vector<float>(12, 0);
Chris@23 650 float currval;
Chris@23 651 unsigned iSemitone = 0;
matthiasm@1 652
Chris@23 653 if (some_b_greater_zero) {
matthiasm@42 654 if (m_useNNLS == 0) {
Chris@23 655 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 656 currval = 0;
Chris@23 657 currval += b[iNote + 1 + -1] * 0.5;
Chris@23 658 currval += b[iNote + 1 + 0] * 1.0;
Chris@23 659 currval += b[iNote + 1 + 1] * 0.5;
Chris@23 660 f3.values.push_back(currval);
Chris@23 661 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 662 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 663 iSemitone++;
Chris@23 664 }
matthiasm@1 665
Chris@23 666 } else {
Chris@29 667 float x[84+1000];
Chris@23 668 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 669 vector<int> signifIndex;
Chris@23 670 int index=0;
Chris@23 671 sumb /= 84.0;
Chris@23 672 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 673 float currval = 0;
Chris@23 674 currval += b[iNote + 1 + -1];
Chris@23 675 currval += b[iNote + 1 + 0];
Chris@23 676 currval += b[iNote + 1 + 1];
Chris@23 677 if (currval > 0) signifIndex.push_back(index);
Chris@23 678 f3.values.push_back(0); // fill the values, change later
Chris@23 679 index++;
Chris@23 680 }
Chris@29 681 float rnorm;
Chris@29 682 float w[84+1000];
Chris@29 683 float zz[84+1000];
Chris@23 684 int indx[84+1000];
Chris@23 685 int mode;
Chris@23 686 int dictsize = 256*signifIndex.size();
Chris@23 687 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@29 688 float *curr_dict = new float[dictsize];
Chris@23 689 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 690 for (unsigned iBin = 0; iBin < 256; iBin++) {
Chris@23 691 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
Chris@23 692 }
Chris@23 693 }
Chris@29 694 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 695 delete [] curr_dict;
Chris@23 696 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 697 f3.values[signifIndex[iNote]] = x[iNote];
Chris@23 698 // cerr << mode << endl;
Chris@23 699 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 700 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 701 }
Chris@23 702 }
Chris@23 703 }
matthiasm@13 704
matthiasm@10 705
matthiasm@12 706
matthiasm@13 707
Chris@23 708 f4.values = chroma;
Chris@23 709 f5.values = basschroma;
Chris@23 710 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
Chris@23 711 f6.values = chroma;
matthiasm@1 712
Chris@23 713 if (m_doNormalizeChroma > 0) {
Chris@23 714 vector<float> chromanorm = vector<float>(3,0);
Chris@23 715 switch (int(m_doNormalizeChroma)) {
Chris@23 716 case 0: // should never end up here
Chris@23 717 break;
Chris@23 718 case 1:
Chris@23 719 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
Chris@23 720 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
Chris@23 721 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 722 break;
Chris@23 723 case 2:
Chris@23 724 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 725 chromanorm[0] += *it;
Chris@23 726 }
Chris@23 727 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 728 chromanorm[1] += *it;
Chris@23 729 }
Chris@23 730 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 731 chromanorm[2] += *it;
Chris@23 732 }
Chris@23 733 break;
Chris@23 734 case 3:
Chris@23 735 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 736 chromanorm[0] += pow(*it,2);
Chris@23 737 }
Chris@23 738 chromanorm[0] = sqrt(chromanorm[0]);
Chris@23 739 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 740 chromanorm[1] += pow(*it,2);
Chris@23 741 }
Chris@23 742 chromanorm[1] = sqrt(chromanorm[1]);
Chris@23 743 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 744 chromanorm[2] += pow(*it,2);
Chris@23 745 }
Chris@23 746 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 747 break;
Chris@23 748 }
Chris@23 749 if (chromanorm[0] > 0) {
Chris@23 750 for (int i = 0; i < f4.values.size(); i++) {
Chris@23 751 f4.values[i] /= chromanorm[0];
Chris@23 752 }
Chris@23 753 }
Chris@23 754 if (chromanorm[1] > 0) {
Chris@23 755 for (int i = 0; i < f5.values.size(); i++) {
Chris@23 756 f5.values[i] /= chromanorm[1];
Chris@23 757 }
Chris@23 758 }
Chris@23 759 if (chromanorm[2] > 0) {
Chris@23 760 for (int i = 0; i < f6.values.size(); i++) {
Chris@23 761 f6.values[i] /= chromanorm[2];
Chris@23 762 }
Chris@23 763 }
matthiasm@13 764
Chris@23 765 }
matthiasm@13 766
Chris@23 767 // local chord estimation
Chris@23 768 vector<float> currentChordSalience;
Chris@23 769 float tempchordvalue = 0;
Chris@23 770 float sumchordvalue = 0;
matthiasm@9 771
Chris@23 772 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 773 tempchordvalue = 0;
Chris@23 774 for (int iBin = 0; iBin < 12; iBin++) {
Chris@23 775 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 776 }
Chris@23 777 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 778 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 779 }
Chris@23 780 sumchordvalue+=tempchordvalue;
Chris@23 781 currentChordSalience.push_back(tempchordvalue);
Chris@23 782 }
Chris@23 783 if (sumchordvalue > 0) {
Chris@23 784 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 785 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 786 }
Chris@23 787 } else {
Chris@23 788 currentChordSalience[nChord-1] = 1.0;
Chris@23 789 }
Chris@23 790 chordogram.push_back(currentChordSalience);
matthiasm@1 791
Chris@23 792 fsOut[3].push_back(f3);
Chris@23 793 fsOut[4].push_back(f4);
Chris@23 794 fsOut[5].push_back(f5);
Chris@23 795 fsOut[6].push_back(f6);
Chris@23 796 count++;
Chris@23 797 }
Chris@23 798 cerr << "done." << endl;
matthiasm@13 799
matthiasm@10 800
Chris@23 801 /* Simple chord estimation
Chris@23 802 I just take the local chord estimates ("currentChordSalience") and average them over time, then
Chris@23 803 take the maximum. Very simple, don't do this at home...
Chris@23 804 */
Chris@23 805 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
Chris@23 806 count = 0;
Chris@23 807 int halfwindowlength = m_inputSampleRate / m_stepSize;
Chris@23 808 vector<int> chordSequence;
Chris@23 809 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
Chris@23 810 vector<int> temp = vector<int>(nChord,0);
Chris@23 811 scoreChordogram.push_back(temp);
Chris@23 812 }
Chris@23 813 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
Chris@23 814 int startIndex = count + 1;
Chris@23 815 int endIndex = count + 2 * halfwindowlength;
matthiasm@10 816
Chris@23 817 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@10 818
Chris@23 819 vector<int> chordCandidates;
Chris@23 820 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
Chris@23 821 // float currsum = 0;
Chris@23 822 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 823 // currsum += chordogram[iFrame][iChord];
Chris@23 824 // }
Chris@23 825 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
Chris@23 826 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 827 if (chordogram[iFrame][iChord] > chordThreshold) {
Chris@23 828 chordCandidates.push_back(iChord);
Chris@23 829 break;
Chris@23 830 }
Chris@23 831 }
Chris@23 832 }
Chris@23 833 chordCandidates.push_back(nChord-1);
Chris@23 834 // cerr << chordCandidates.size() << endl;
Chris@23 835
Chris@23 836 float maxval = 0; // will be the value of the most salient *chord change* in this frame
Chris@23 837 float maxindex = 0; //... and the index thereof
Chris@23 838 unsigned bestchordL = nChord-1; // index of the best "left" chord
Chris@23 839 unsigned bestchordR = nChord-1; // index of the best "right" chord
Chris@23 840
Chris@23 841 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
Chris@23 842 // now find the max values on both sides of iWF
Chris@23 843 // left side:
Chris@23 844 float maxL = 0;
Chris@23 845 unsigned maxindL = nChord-1;
Chris@23 846 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 847 unsigned iChord = chordCandidates[kChord];
Chris@23 848 float currsum = 0;
Chris@23 849 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
Chris@23 850 currsum += chordogram[count+iFrame][iChord];
matthiasm@10 851 }
Chris@23 852 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 853 if (currsum > maxL) {
Chris@23 854 maxL = currsum;
Chris@23 855 maxindL = iChord;
Chris@23 856 }
Chris@23 857 }
Chris@23 858 // right side:
Chris@23 859 float maxR = 0;
Chris@23 860 unsigned maxindR = nChord-1;
Chris@23 861 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 862 unsigned iChord = chordCandidates[kChord];
Chris@23 863 float currsum = 0;
Chris@23 864 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 865 currsum += chordogram[count+iFrame][iChord];
Chris@23 866 }
Chris@23 867 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 868 if (currsum > maxR) {
Chris@23 869 maxR = currsum;
Chris@23 870 maxindR = iChord;
Chris@23 871 }
Chris@23 872 }
Chris@23 873 if (maxL+maxR > maxval) {
Chris@23 874 maxval = maxL+maxR;
Chris@23 875 maxindex = iWF;
Chris@23 876 bestchordL = maxindL;
Chris@23 877 bestchordR = maxindR;
Chris@23 878 }
matthiasm@3 879
Chris@23 880 }
Chris@23 881 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
Chris@23 882 // add a score to every chord-frame-point that was part of a maximum
Chris@23 883 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
Chris@23 884 scoreChordogram[iFrame+count][bestchordL]++;
Chris@23 885 }
Chris@23 886 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 887 scoreChordogram[iFrame+count][bestchordR]++;
Chris@23 888 }
Chris@23 889 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
Chris@23 890 count++;
Chris@23 891 }
Chris@23 892 // cerr << "******* agent finished *******" << endl;
Chris@23 893 count = 0;
Chris@23 894 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 895 float maxval = 0; // will be the value of the most salient chord in this frame
Chris@23 896 float maxindex = 0; //... and the index thereof
Chris@23 897 for (unsigned iChord = 0; iChord < nChord; iChord++) {
Chris@23 898 if (scoreChordogram[count][iChord] > maxval) {
Chris@23 899 maxval = scoreChordogram[count][iChord];
Chris@23 900 maxindex = iChord;
Chris@23 901 // cerr << iChord << endl;
Chris@23 902 }
Chris@23 903 }
Chris@23 904 chordSequence.push_back(maxindex);
Chris@23 905 // cerr << "before modefilter, maxindex: " << maxindex << endl;
Chris@23 906 count++;
Chris@23 907 }
Chris@23 908 // cerr << "******* mode filter done *******" << endl;
matthiasm@10 909
matthiasm@3 910
Chris@23 911 // mode filter on chordSequence
Chris@23 912 count = 0;
Chris@23 913 string oldChord = "";
Chris@23 914 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 915 Feature f6 = *it;
Chris@23 916 Feature f7; // chord estimate
Chris@23 917 f7.hasTimestamp = true;
Chris@23 918 f7.timestamp = f6.timestamp;
Chris@23 919 Feature f8; // chord estimate
Chris@23 920 f8.hasTimestamp = true;
Chris@23 921 f8.timestamp = f6.timestamp;
matthiasm@17 922
Chris@23 923 vector<int> chordCount = vector<int>(nChord,0);
Chris@23 924 int maxChordCount = 0;
Chris@23 925 int maxChordIndex = nChord-1;
Chris@23 926 string maxChord;
Chris@23 927 int startIndex = max(count - halfwindowlength/2,0);
Chris@23 928 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
Chris@23 929 for (int i = startIndex; i < endIndex; i++) {
Chris@23 930 chordCount[chordSequence[i]]++;
Chris@23 931 if (chordCount[chordSequence[i]] > maxChordCount) {
Chris@23 932 // cerr << "start index " << startIndex << endl;
Chris@23 933 maxChordCount++;
Chris@23 934 maxChordIndex = chordSequence[i];
Chris@23 935 maxChord = m_chordnames[maxChordIndex];
Chris@23 936 }
Chris@23 937 }
Chris@23 938 // chordSequence[count] = maxChordIndex;
Chris@23 939 // cerr << maxChordIndex << endl;
Chris@23 940 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
Chris@23 941 // cerr << chordchange[count] << endl;
Chris@23 942 fsOut[9].push_back(f8);
Chris@23 943 if (oldChord != maxChord) {
Chris@23 944 oldChord = maxChord;
matthiasm@3 945
Chris@23 946 // char buffer1 [50];
Chris@23 947 // if (maxChordIndex < nChord - 1) {
Chris@23 948 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
Chris@23 949 // } else {
Chris@23 950 // sprintf(buffer1, "N");
Chris@23 951 // }
Chris@23 952 // f7.label = buffer1;
Chris@23 953 f7.label = m_chordnames[maxChordIndex];
Chris@23 954 fsOut[7].push_back(f7);
Chris@23 955 }
Chris@23 956 count++;
Chris@23 957 }
Chris@23 958 Feature f7; // last chord estimate
Chris@23 959 f7.hasTimestamp = true;
Chris@23 960 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
Chris@23 961 f7.label = "N";
Chris@23 962 fsOut[7].push_back(f7);
Chris@23 963 cerr << "done." << endl;
Chris@23 964 // // musicity
Chris@23 965 // count = 0;
Chris@23 966 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
Chris@23 967 // vector<float> musicityValue;
Chris@23 968 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 969 // Feature f4 = *it;
Chris@23 970 //
Chris@23 971 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 972 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 973 // float chromasum = 0;
Chris@23 974 // float diffsum = 0;
Chris@23 975 // for (int k = 0; k < 12; k++) {
Chris@23 976 // for (int i = startIndex + 1; i < endIndex; i++) {
Chris@23 977 // chromasum += pow(fsOut[4][i].values[k],2);
Chris@23 978 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
Chris@23 979 // }
Chris@23 980 // }
Chris@23 981 // diffsum /= chromasum;
Chris@23 982 // musicityValue.push_back(diffsum);
Chris@23 983 // count++;
Chris@23 984 // }
Chris@23 985 //
Chris@23 986 // float musicityThreshold = 0.44;
Chris@23 987 // if (m_stepSize == 4096) {
Chris@23 988 // musicityThreshold = 0.74;
Chris@23 989 // }
Chris@23 990 // if (m_stepSize == 4410) {
Chris@23 991 // musicityThreshold = 0.77;
Chris@23 992 // }
Chris@23 993 //
Chris@23 994 // count = 0;
Chris@23 995 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 996 // Feature f4 = *it;
Chris@23 997 // Feature f8; // musicity
Chris@23 998 // Feature f9; // musicity segmenter
Chris@23 999 //
Chris@23 1000 // f8.hasTimestamp = true;
Chris@23 1001 // f8.timestamp = f4.timestamp;
Chris@23 1002 // f9.hasTimestamp = true;
Chris@23 1003 // f9.timestamp = f4.timestamp;
Chris@23 1004 //
Chris@23 1005 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 1006 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 1007 // int musicityCount = 0;
Chris@23 1008 // for (int i = startIndex; i <= endIndex; i++) {
Chris@23 1009 // if (musicityValue[i] > musicityThreshold) musicityCount++;
Chris@23 1010 // }
Chris@23 1011 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
Chris@23 1012 //
Chris@23 1013 // if (isSpeech) {
Chris@23 1014 // if (oldlabeltype != 2) {
Chris@23 1015 // f9.label = "Speech";
Chris@23 1016 // fsOut[9].push_back(f9);
Chris@23 1017 // oldlabeltype = 2;
Chris@23 1018 // }
Chris@23 1019 // } else {
Chris@23 1020 // if (oldlabeltype != 1) {
Chris@23 1021 // f9.label = "Music";
Chris@23 1022 // fsOut[9].push_back(f9);
Chris@23 1023 // oldlabeltype = 1;
Chris@23 1024 // }
Chris@23 1025 // }
Chris@23 1026 // f8.values.push_back(musicityValue[count]);
Chris@23 1027 // fsOut[8].push_back(f8);
Chris@23 1028 // count++;
Chris@23 1029 // }
Chris@23 1030 return fsOut;
matthiasm@0 1031
matthiasm@0 1032 }
matthiasm@0 1033
Chris@35 1034 #endif