annotate NNLSBase.cpp @ 41:d6bb9b43ac1c matthiasm-plugin

README and new parameters, not tested yet
author Matthias Mauch <mail@matthiasmauch.net>
date Fri, 22 Oct 2010 21:43:57 +0900
parents cf8898a0174c
children d01f94d58ef0
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@35 19 #include "NNLSBase.h"
Chris@27 20
Chris@27 21 #include "chromamethods.h"
Chris@27 22
Chris@27 23 #include <cstdlib>
Chris@27 24 #include <fstream>
matthiasm@0 25 #include <cmath>
matthiasm@9 26
Chris@27 27 #include <algorithm>
matthiasm@0 28
matthiasm@0 29 const bool debug_on = false;
matthiasm@0 30
Chris@27 31 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0 32
Chris@35 33 NNLSBase::NNLSBase(float inputSampleRate) :
Chris@23 34 Plugin(inputSampleRate),
Chris@35 35 m_logSpectrum(0),
Chris@23 36 m_blockSize(0),
Chris@23 37 m_stepSize(0),
Chris@23 38 m_lengthOfNoteIndex(0),
Chris@23 39 m_meanTuning0(0),
Chris@23 40 m_meanTuning1(0),
Chris@23 41 m_meanTuning2(0),
Chris@23 42 m_localTuning0(0),
Chris@23 43 m_localTuning1(0),
Chris@23 44 m_localTuning2(0),
mail@41 45 m_whitening(1.0),
Chris@23 46 m_preset(0.0),
Chris@23 47 m_localTuning(0),
Chris@23 48 m_kernelValue(0),
Chris@23 49 m_kernelFftIndex(0),
Chris@23 50 m_kernelNoteIndex(0),
Chris@23 51 m_dict(0),
Chris@23 52 m_tuneLocal(false),
Chris@23 53 m_dictID(0),
Chris@23 54 m_chorddict(0),
Chris@23 55 m_chordnames(0),
Chris@23 56 m_doNormalizeChroma(0),
mail@41 57 m_rollon(0.0),
mail@41 58 m_s(0.7)
matthiasm@0 59 {
Chris@35 60 if (debug_on) cerr << "--> NNLSBase" << endl;
matthiasm@7 61
Chris@23 62 // make the *note* dictionary matrix
Chris@23 63 m_dict = new float[nNote * 84];
Chris@23 64 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
mail@41 65 dictionaryMatrix(m_dict, 0.7);
matthiasm@7 66
Chris@23 67 // get the *chord* dictionary from file (if the file exists)
Chris@23 68 m_chordnames = chordDictionary(&m_chorddict);
matthiasm@0 69 }
matthiasm@0 70
matthiasm@0 71
Chris@35 72 NNLSBase::~NNLSBase()
matthiasm@0 73 {
Chris@35 74 if (debug_on) cerr << "--> ~NNLSBase" << endl;
Chris@23 75 delete [] m_dict;
matthiasm@0 76 }
matthiasm@0 77
matthiasm@0 78 string
Chris@35 79 NNLSBase::getMaker() const
matthiasm@0 80 {
Chris@23 81 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0 82 // Your name here
matthiasm@0 83 return "Matthias Mauch";
matthiasm@0 84 }
matthiasm@0 85
matthiasm@0 86 int
Chris@35 87 NNLSBase::getPluginVersion() const
matthiasm@0 88 {
Chris@23 89 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0 90 // Increment this each time you release a version that behaves
matthiasm@0 91 // differently from the previous one
matthiasm@0 92 return 1;
matthiasm@0 93 }
matthiasm@0 94
matthiasm@0 95 string
Chris@35 96 NNLSBase::getCopyright() const
matthiasm@0 97 {
Chris@23 98 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0 99 // This function is not ideally named. It does not necessarily
matthiasm@0 100 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0 101 // should indicate the terms under which it is distributed. For
matthiasm@0 102 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@35 103 return "GPL";
matthiasm@0 104 }
matthiasm@0 105
Chris@35 106 NNLSBase::InputDomain
Chris@35 107 NNLSBase::getInputDomain() const
matthiasm@0 108 {
Chris@23 109 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0 110 return FrequencyDomain;
matthiasm@0 111 }
matthiasm@0 112
matthiasm@0 113 size_t
Chris@35 114 NNLSBase::getPreferredBlockSize() const
matthiasm@0 115 {
Chris@23 116 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0 117 return 16384; // 0 means "I can handle any block size"
matthiasm@0 118 }
matthiasm@0 119
matthiasm@0 120 size_t
Chris@35 121 NNLSBase::getPreferredStepSize() const
matthiasm@0 122 {
Chris@23 123 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0 124 return 2048; // 0 means "anything sensible"; in practice this
Chris@23 125 // means the same as the block size for TimeDomain
Chris@23 126 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0 127 }
matthiasm@0 128
matthiasm@0 129 size_t
Chris@35 130 NNLSBase::getMinChannelCount() const
matthiasm@0 131 {
Chris@23 132 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0 133 return 1;
matthiasm@0 134 }
matthiasm@0 135
matthiasm@0 136 size_t
Chris@35 137 NNLSBase::getMaxChannelCount() const
matthiasm@0 138 {
Chris@23 139 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0 140 return 1;
matthiasm@0 141 }
matthiasm@0 142
Chris@35 143 NNLSBase::ParameterList
Chris@35 144 NNLSBase::getParameterDescriptors() const
matthiasm@0 145 {
Chris@23 146 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0 147 ParameterList list;
matthiasm@0 148
mail@41 149 ParameterDescriptor d0;
mail@41 150 d0.identifier = "rollon";
mail@41 151 d0.name = "spectral roll-on";
mail@41 152 d0.description = "The bins below the spectral roll-on quantile will be set to 0.";
mail@41 153 d0.unit = "";
mail@41 154 d0.minValue = 0;
mail@41 155 d0.maxValue = 0.05;
mail@41 156 d0.defaultValue = 0;
mail@41 157 d0.isQuantized = false;
mail@41 158 list.push_back(d0);
matthiasm@4 159
matthiasm@4 160 ParameterDescriptor d1;
matthiasm@4 161 d1.identifier = "tuningmode";
matthiasm@4 162 d1.name = "tuning mode";
matthiasm@4 163 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@4 164 d1.unit = "";
matthiasm@4 165 d1.minValue = 0;
matthiasm@4 166 d1.maxValue = 1;
matthiasm@4 167 d1.defaultValue = 0;
matthiasm@4 168 d1.isQuantized = true;
matthiasm@4 169 d1.valueNames.push_back("global tuning");
matthiasm@4 170 d1.valueNames.push_back("local tuning");
matthiasm@4 171 d1.quantizeStep = 1.0;
matthiasm@4 172 list.push_back(d1);
matthiasm@4 173
mail@41 174 ParameterDescriptor d2;
mail@41 175 d2.identifier = "whitening";
mail@41 176 d2.name = "spectral whitening";
mail@41 177 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
mail@41 178 d2.unit = "";
mail@41 179 d2.isQuantized = true;
mail@41 180 d2.minValue = 0.0;
mail@41 181 d2.maxValue = 1.0;
mail@41 182 d2.defaultValue = 1.0;
mail@41 183 d2.isQuantized = false;
mail@41 184 list.push_back(d2);
mail@41 185
mail@41 186 ParameterDescriptor d3;
mail@41 187 d3.identifier = "s";
mail@41 188 d3.name = "spectral shape";
mail@41 189 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
mail@41 190 d3.unit = "";
mail@41 191 d3.minValue = 0.5;
mail@41 192 d3.maxValue = 0.9;
mail@41 193 d3.defaultValue = 0.7;
mail@41 194 d3.isQuantized = false;
mail@41 195 list.push_back(d3);
mail@41 196
Chris@23 197 ParameterDescriptor d4;
matthiasm@12 198 d4.identifier = "chromanormalize";
matthiasm@12 199 d4.name = "chroma normalization";
matthiasm@12 200 d4.description = "How shall the chroma vector be normalized?";
matthiasm@12 201 d4.unit = "";
matthiasm@12 202 d4.minValue = 0;
matthiasm@13 203 d4.maxValue = 3;
matthiasm@12 204 d4.defaultValue = 0;
matthiasm@12 205 d4.isQuantized = true;
matthiasm@13 206 d4.valueNames.push_back("none");
matthiasm@13 207 d4.valueNames.push_back("maximum norm");
Chris@23 208 d4.valueNames.push_back("L1 norm");
Chris@23 209 d4.valueNames.push_back("L2 norm");
matthiasm@12 210 d4.quantizeStep = 1.0;
matthiasm@12 211 list.push_back(d4);
matthiasm@4 212
matthiasm@0 213 return list;
matthiasm@0 214 }
matthiasm@0 215
matthiasm@0 216 float
Chris@35 217 NNLSBase::getParameter(string identifier) const
matthiasm@0 218 {
Chris@23 219 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@0 220 if (identifier == "notedict") {
matthiasm@0 221 return m_dictID;
matthiasm@0 222 }
matthiasm@0 223
mail@41 224 if (identifier == "whitening") {
mail@41 225 return m_whitening;
mail@41 226 }
mail@41 227
mail@41 228 if (identifier == "s") {
mail@41 229 return m_s;
matthiasm@0 230 }
matthiasm@17 231
Chris@23 232 if (identifier == "rollon") {
matthiasm@17 233 return m_rollon;
matthiasm@17 234 }
matthiasm@0 235
matthiasm@0 236 if (identifier == "tuningmode") {
matthiasm@0 237 if (m_tuneLocal) {
matthiasm@0 238 return 1.0;
matthiasm@0 239 } else {
matthiasm@0 240 return 0.0;
matthiasm@0 241 }
matthiasm@0 242 }
Chris@23 243 if (identifier == "preset") {
Chris@23 244 return m_preset;
matthiasm@3 245 }
Chris@23 246 if (identifier == "chromanormalize") {
Chris@23 247 return m_doNormalizeChroma;
matthiasm@12 248 }
matthiasm@0 249 return 0;
matthiasm@0 250
matthiasm@0 251 }
matthiasm@0 252
matthiasm@0 253 void
Chris@35 254 NNLSBase::setParameter(string identifier, float value)
matthiasm@0 255 {
Chris@23 256 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@0 257 if (identifier == "notedict") {
matthiasm@0 258 m_dictID = (int) value;
matthiasm@0 259 }
matthiasm@0 260
mail@41 261 if (identifier == "whitening") {
mail@41 262 m_whitening = value;
matthiasm@0 263 }
matthiasm@0 264
mail@41 265 if (identifier == "s") {
mail@41 266 m_s = value;
mail@41 267 }
mail@41 268
matthiasm@0 269 if (identifier == "tuningmode") {
matthiasm@0 270 m_tuneLocal = (value > 0) ? true : false;
matthiasm@0 271 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0 272 }
matthiasm@3 273 if (identifier == "preset") {
matthiasm@3 274 m_preset = value;
Chris@23 275 if (m_preset == 0.0) {
Chris@23 276 m_tuneLocal = false;
mail@41 277 m_whitening = 1.0;
Chris@23 278 m_dictID = 0.0;
Chris@23 279 }
Chris@23 280 if (m_preset == 1.0) {
Chris@23 281 m_tuneLocal = false;
mail@41 282 m_whitening = 1.0;
Chris@23 283 m_dictID = 1.0;
Chris@23 284 }
Chris@23 285 if (m_preset == 2.0) {
Chris@23 286 m_tuneLocal = false;
mail@41 287 m_whitening = 0.7;
Chris@23 288 m_dictID = 0.0;
Chris@23 289 }
matthiasm@3 290 }
Chris@23 291 if (identifier == "chromanormalize") {
Chris@23 292 m_doNormalizeChroma = value;
Chris@23 293 }
matthiasm@17 294
Chris@23 295 if (identifier == "rollon") {
Chris@23 296 m_rollon = value;
Chris@23 297 }
matthiasm@0 298 }
matthiasm@0 299
Chris@35 300 NNLSBase::ProgramList
Chris@35 301 NNLSBase::getPrograms() const
matthiasm@0 302 {
Chris@23 303 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0 304 ProgramList list;
matthiasm@0 305
matthiasm@0 306 // If you have no programs, return an empty list (or simply don't
matthiasm@0 307 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0 308
matthiasm@0 309 return list;
matthiasm@0 310 }
matthiasm@0 311
matthiasm@0 312 string
Chris@35 313 NNLSBase::getCurrentProgram() const
matthiasm@0 314 {
Chris@23 315 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0 316 return ""; // no programs
matthiasm@0 317 }
matthiasm@0 318
matthiasm@0 319 void
Chris@35 320 NNLSBase::selectProgram(string name)
matthiasm@0 321 {
Chris@23 322 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0 323 }
matthiasm@0 324
matthiasm@0 325
matthiasm@0 326 bool
Chris@35 327 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 328 {
Chris@23 329 if (debug_on) {
Chris@23 330 cerr << "--> initialise";
Chris@23 331 }
matthiasm@1 332
matthiasm@0 333 if (channels < getMinChannelCount() ||
matthiasm@0 334 channels > getMaxChannelCount()) return false;
matthiasm@0 335 m_blockSize = blockSize;
matthiasm@0 336 m_stepSize = stepSize;
Chris@35 337 m_frameCount = 0;
Chris@23 338 int tempn = 256 * m_blockSize/2;
Chris@23 339 // cerr << "length of tempkernel : " << tempn << endl;
Chris@23 340 float *tempkernel;
matthiasm@1 341
Chris@23 342 tempkernel = new float[tempn];
matthiasm@1 343
Chris@23 344 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
Chris@23 345 m_kernelValue.clear();
Chris@23 346 m_kernelFftIndex.clear();
Chris@23 347 m_kernelNoteIndex.clear();
Chris@23 348 int countNonzero = 0;
Chris@23 349 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
Chris@23 350 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
Chris@23 351 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 352 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
Chris@23 353 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 354 countNonzero++;
Chris@23 355 }
Chris@23 356 m_kernelFftIndex.push_back(iFFT);
Chris@23 357 m_kernelNoteIndex.push_back(iNote);
Chris@23 358 }
Chris@23 359 }
Chris@23 360 }
Chris@23 361 // cerr << "nonzero count : " << countNonzero << endl;
Chris@23 362 delete [] tempkernel;
Chris@35 363 /*
Chris@23 364 ofstream myfile;
Chris@23 365 myfile.open ("matrix.txt");
matthiasm@3 366 // myfile << "Writing this to a file.\n";
Chris@23 367 for (int i = 0; i < nNote * 84; ++i) {
Chris@23 368 myfile << m_dict[i] << endl;
Chris@23 369 }
matthiasm@3 370 myfile.close();
Chris@35 371 */
matthiasm@0 372 return true;
matthiasm@0 373 }
matthiasm@0 374
matthiasm@0 375 void
Chris@35 376 NNLSBase::reset()
matthiasm@0 377 {
Chris@23 378 if (debug_on) cerr << "--> reset";
matthiasm@4 379
matthiasm@0 380 // Clear buffers, reset stored values, etc
Chris@35 381 m_frameCount = 0;
Chris@23 382 m_dictID = 0;
Chris@35 383 m_logSpectrum.clear();
Chris@23 384 m_meanTuning0 = 0;
Chris@23 385 m_meanTuning1 = 0;
Chris@23 386 m_meanTuning2 = 0;
Chris@23 387 m_localTuning0 = 0;
Chris@23 388 m_localTuning1 = 0;
Chris@23 389 m_localTuning2 = 0;
Chris@23 390 m_localTuning.clear();
matthiasm@0 391 }
matthiasm@0 392
Chris@35 393 void
Chris@35 394 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 395 {
Chris@35 396 m_frameCount++;
Chris@23 397 float *magnitude = new float[m_blockSize/2];
matthiasm@0 398
Chris@23 399 const float *fbuf = inputBuffers[0];
Chris@23 400 float energysum = 0;
Chris@23 401 // make magnitude
Chris@23 402 float maxmag = -10000;
Chris@23 403 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 404 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
Chris@23 405 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
Chris@23 406 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
Chris@23 407 if (m_rollon > 0) {
Chris@23 408 energysum += pow(magnitude[iBin],2);
Chris@23 409 }
Chris@23 410 }
matthiasm@14 411
Chris@23 412 float cumenergy = 0;
Chris@23 413 if (m_rollon > 0) {
Chris@23 414 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
Chris@23 415 cumenergy += pow(magnitude[iBin],2);
Chris@23 416 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
Chris@23 417 else break;
Chris@23 418 }
Chris@23 419 }
matthiasm@17 420
Chris@23 421 if (maxmag < 2) {
Chris@23 422 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
Chris@23 423 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 424 magnitude[iBin] = 0;
Chris@23 425 }
Chris@23 426 }
matthiasm@4 427
Chris@23 428 // note magnitude mapping using pre-calculated matrix
Chris@23 429 float *nm = new float[nNote]; // note magnitude
Chris@23 430 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 431 nm[iNote] = 0; // initialise as 0
Chris@23 432 }
Chris@23 433 int binCount = 0;
Chris@23 434 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
Chris@23 435 // cerr << ".";
Chris@23 436 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
Chris@23 437 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
Chris@23 438 binCount++;
Chris@23 439 }
Chris@23 440 // cerr << nm[20];
Chris@23 441 // cerr << endl;
matthiasm@0 442
matthiasm@0 443
Chris@35 444 float one_over_N = 1.0/m_frameCount;
matthiasm@0 445 // update means of complex tuning variables
Chris@35 446 m_meanTuning0 *= float(m_frameCount-1)*one_over_N;
Chris@35 447 m_meanTuning1 *= float(m_frameCount-1)*one_over_N;
Chris@35 448 m_meanTuning2 *= float(m_frameCount-1)*one_over_N;
matthiasm@0 449
matthiasm@0 450 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
matthiasm@0 451 m_meanTuning0 += nm[iTone + 0]*one_over_N;
matthiasm@0 452 m_meanTuning1 += nm[iTone + 1]*one_over_N;
matthiasm@0 453 m_meanTuning2 += nm[iTone + 2]*one_over_N;
Chris@23 454 float ratioOld = 0.997;
matthiasm@3 455 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
matthiasm@3 456 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
matthiasm@3 457 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
matthiasm@0 458 }
matthiasm@0 459
matthiasm@0 460 // if (m_tuneLocal) {
Chris@23 461 // local tuning
Chris@23 462 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
Chris@23 463 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
Chris@23 464 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
Chris@23 465 m_localTuning.push_back(normalisedtuning);
matthiasm@0 466
Chris@23 467 Feature f1; // logfreqspec
Chris@23 468 f1.hasTimestamp = true;
matthiasm@0 469 f1.timestamp = timestamp;
Chris@23 470 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 471 f1.values.push_back(nm[iNote]);
Chris@23 472 }
matthiasm@0 473
matthiasm@0 474 // deletes
matthiasm@0 475 delete[] magnitude;
matthiasm@0 476 delete[] nm;
matthiasm@0 477
Chris@35 478 m_logSpectrum.push_back(f1); // remember note magnitude
matthiasm@0 479 }
matthiasm@0 480
Chris@35 481
Chris@35 482 #ifdef NOT_DEFINED
Chris@35 483
Chris@35 484 NNLSBase::FeatureSet
Chris@35 485 NNLSBase::getRemainingFeatures()
matthiasm@0 486 {
Chris@23 487 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 488 FeatureSet fsOut;
Chris@35 489 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23 490 int nChord = m_chordnames.size();
Chris@23 491 //
Chris@23 492 /** Calculate Tuning
Chris@23 493 calculate tuning from (using the angle of the complex number defined by the
Chris@23 494 cumulative mean real and imag values)
Chris@23 495 **/
Chris@23 496 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
Chris@23 497 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
Chris@23 498 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 499 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 500 int intShift = floor(normalisedtuning * 3);
Chris@23 501 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 502
Chris@23 503 char buffer0 [50];
matthiasm@1 504
Chris@23 505 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 506
Chris@23 507 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1 508
Chris@23 509 // push tuning to FeatureSet fsOut
Chris@23 510 Feature f0; // tuning
Chris@23 511 f0.hasTimestamp = true;
Chris@23 512 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
Chris@23 513 f0.label = buffer0;
Chris@23 514 fsOut[0].push_back(f0);
matthiasm@1 515
Chris@23 516 /** Tune Log-Frequency Spectrogram
Chris@23 517 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
Chris@23 518 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
Chris@23 519 **/
Chris@23 520 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 521
Chris@23 522 float tempValue = 0;
Chris@23 523 float dbThreshold = 0; // relative to the background spectrum
Chris@23 524 float thresh = pow(10,dbThreshold/20);
Chris@23 525 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23 526 int count = 0;
matthiasm@1 527
Chris@35 528 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@23 529 Feature f1 = *i;
Chris@23 530 Feature f2; // tuned log-frequency spectrum
Chris@23 531 f2.hasTimestamp = true;
Chris@23 532 f2.timestamp = f1.timestamp;
Chris@23 533 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1 534
Chris@23 535 if (m_tuneLocal) {
Chris@23 536 intShift = floor(m_localTuning[count] * 3);
Chris@23 537 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
Chris@23 538 }
matthiasm@1 539
Chris@23 540 // cerr << intShift << " " << intFactor << endl;
matthiasm@1 541
Chris@23 542 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
Chris@23 543 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
Chris@23 544 f2.values.push_back(tempValue);
Chris@23 545 }
matthiasm@1 546
Chris@23 547 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
Chris@23 548 vector<float> runningmean = SpecialConvolution(f2.values,hw);
Chris@23 549 vector<float> runningstd;
Chris@23 550 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
Chris@23 551 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
Chris@23 552 }
Chris@23 553 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
Chris@23 554 for (int i = 0; i < 256; i++) {
Chris@23 555 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 556 if (runningstd[i] > 0) {
Chris@23 557 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
mail@41 558 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 559 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
mail@41 560 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 561 }
Chris@23 562 if (f2.values[i] < 0) {
Chris@23 563 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 564 }
Chris@23 565 }
Chris@23 566 fsOut[2].push_back(f2);
Chris@23 567 count++;
Chris@23 568 }
Chris@23 569 cerr << "done." << endl;
matthiasm@1 570
Chris@23 571 /** Semitone spectrum and chromagrams
Chris@23 572 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 573 is inferred using a non-negative least squares algorithm.
Chris@23 574 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 575 bass and treble stacked onto each other).
Chris@23 576 **/
Chris@23 577 if (m_dictID == 1) {
Chris@23 578 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 579 } else {
Chris@23 580 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 581 }
matthiasm@13 582
matthiasm@1 583
Chris@23 584 vector<vector<float> > chordogram;
Chris@23 585 vector<vector<int> > scoreChordogram;
Chris@23 586 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
Chris@23 587 vector<float> oldchroma = vector<float>(12,0);
Chris@23 588 vector<float> oldbasschroma = vector<float>(12,0);
Chris@23 589 count = 0;
matthiasm@9 590
Chris@23 591 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
Chris@23 592 Feature f2 = *it; // logfreq spectrum
Chris@23 593 Feature f3; // semitone spectrum
Chris@23 594 Feature f4; // treble chromagram
Chris@23 595 Feature f5; // bass chromagram
Chris@23 596 Feature f6; // treble and bass chromagram
matthiasm@1 597
Chris@23 598 f3.hasTimestamp = true;
Chris@23 599 f3.timestamp = f2.timestamp;
matthiasm@1 600
Chris@23 601 f4.hasTimestamp = true;
Chris@23 602 f4.timestamp = f2.timestamp;
matthiasm@1 603
Chris@23 604 f5.hasTimestamp = true;
Chris@23 605 f5.timestamp = f2.timestamp;
matthiasm@1 606
Chris@23 607 f6.hasTimestamp = true;
Chris@23 608 f6.timestamp = f2.timestamp;
matthiasm@1 609
Chris@29 610 float b[256];
matthiasm@1 611
Chris@23 612 bool some_b_greater_zero = false;
Chris@23 613 float sumb = 0;
Chris@23 614 for (int i = 0; i < 256; i++) {
Chris@23 615 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
Chris@23 616 b[i] = f2.values[i];
Chris@23 617 sumb += b[i];
Chris@23 618 if (b[i] > 0) {
Chris@23 619 some_b_greater_zero = true;
Chris@23 620 }
Chris@23 621 }
matthiasm@1 622
Chris@23 623 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 624
Chris@23 625 vector<float> chroma = vector<float>(12, 0);
Chris@23 626 vector<float> basschroma = vector<float>(12, 0);
Chris@23 627 float currval;
Chris@23 628 unsigned iSemitone = 0;
matthiasm@1 629
Chris@23 630 if (some_b_greater_zero) {
Chris@23 631 if (m_dictID == 1) {
Chris@23 632 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 633 currval = 0;
Chris@23 634 currval += b[iNote + 1 + -1] * 0.5;
Chris@23 635 currval += b[iNote + 1 + 0] * 1.0;
Chris@23 636 currval += b[iNote + 1 + 1] * 0.5;
Chris@23 637 f3.values.push_back(currval);
Chris@23 638 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 639 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 640 iSemitone++;
Chris@23 641 }
matthiasm@1 642
Chris@23 643 } else {
Chris@29 644 float x[84+1000];
Chris@23 645 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 646 vector<int> signifIndex;
Chris@23 647 int index=0;
Chris@23 648 sumb /= 84.0;
Chris@23 649 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 650 float currval = 0;
Chris@23 651 currval += b[iNote + 1 + -1];
Chris@23 652 currval += b[iNote + 1 + 0];
Chris@23 653 currval += b[iNote + 1 + 1];
Chris@23 654 if (currval > 0) signifIndex.push_back(index);
Chris@23 655 f3.values.push_back(0); // fill the values, change later
Chris@23 656 index++;
Chris@23 657 }
Chris@29 658 float rnorm;
Chris@29 659 float w[84+1000];
Chris@29 660 float zz[84+1000];
Chris@23 661 int indx[84+1000];
Chris@23 662 int mode;
Chris@23 663 int dictsize = 256*signifIndex.size();
Chris@23 664 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@29 665 float *curr_dict = new float[dictsize];
Chris@23 666 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 667 for (unsigned iBin = 0; iBin < 256; iBin++) {
Chris@23 668 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
Chris@23 669 }
Chris@23 670 }
Chris@29 671 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 672 delete [] curr_dict;
Chris@23 673 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 674 f3.values[signifIndex[iNote]] = x[iNote];
Chris@23 675 // cerr << mode << endl;
Chris@23 676 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 677 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 678 }
Chris@23 679 }
Chris@23 680 }
matthiasm@13 681
matthiasm@10 682
matthiasm@12 683
matthiasm@13 684
Chris@23 685 f4.values = chroma;
Chris@23 686 f5.values = basschroma;
Chris@23 687 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
Chris@23 688 f6.values = chroma;
matthiasm@1 689
Chris@23 690 if (m_doNormalizeChroma > 0) {
Chris@23 691 vector<float> chromanorm = vector<float>(3,0);
Chris@23 692 switch (int(m_doNormalizeChroma)) {
Chris@23 693 case 0: // should never end up here
Chris@23 694 break;
Chris@23 695 case 1:
Chris@23 696 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
Chris@23 697 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
Chris@23 698 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 699 break;
Chris@23 700 case 2:
Chris@23 701 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 702 chromanorm[0] += *it;
Chris@23 703 }
Chris@23 704 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 705 chromanorm[1] += *it;
Chris@23 706 }
Chris@23 707 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 708 chromanorm[2] += *it;
Chris@23 709 }
Chris@23 710 break;
Chris@23 711 case 3:
Chris@23 712 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 713 chromanorm[0] += pow(*it,2);
Chris@23 714 }
Chris@23 715 chromanorm[0] = sqrt(chromanorm[0]);
Chris@23 716 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 717 chromanorm[1] += pow(*it,2);
Chris@23 718 }
Chris@23 719 chromanorm[1] = sqrt(chromanorm[1]);
Chris@23 720 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 721 chromanorm[2] += pow(*it,2);
Chris@23 722 }
Chris@23 723 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 724 break;
Chris@23 725 }
Chris@23 726 if (chromanorm[0] > 0) {
Chris@23 727 for (int i = 0; i < f4.values.size(); i++) {
Chris@23 728 f4.values[i] /= chromanorm[0];
Chris@23 729 }
Chris@23 730 }
Chris@23 731 if (chromanorm[1] > 0) {
Chris@23 732 for (int i = 0; i < f5.values.size(); i++) {
Chris@23 733 f5.values[i] /= chromanorm[1];
Chris@23 734 }
Chris@23 735 }
Chris@23 736 if (chromanorm[2] > 0) {
Chris@23 737 for (int i = 0; i < f6.values.size(); i++) {
Chris@23 738 f6.values[i] /= chromanorm[2];
Chris@23 739 }
Chris@23 740 }
matthiasm@13 741
Chris@23 742 }
matthiasm@13 743
Chris@23 744 // local chord estimation
Chris@23 745 vector<float> currentChordSalience;
Chris@23 746 float tempchordvalue = 0;
Chris@23 747 float sumchordvalue = 0;
matthiasm@9 748
Chris@23 749 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 750 tempchordvalue = 0;
Chris@23 751 for (int iBin = 0; iBin < 12; iBin++) {
Chris@23 752 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 753 }
Chris@23 754 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 755 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 756 }
Chris@23 757 sumchordvalue+=tempchordvalue;
Chris@23 758 currentChordSalience.push_back(tempchordvalue);
Chris@23 759 }
Chris@23 760 if (sumchordvalue > 0) {
Chris@23 761 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 762 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 763 }
Chris@23 764 } else {
Chris@23 765 currentChordSalience[nChord-1] = 1.0;
Chris@23 766 }
Chris@23 767 chordogram.push_back(currentChordSalience);
matthiasm@1 768
Chris@23 769 fsOut[3].push_back(f3);
Chris@23 770 fsOut[4].push_back(f4);
Chris@23 771 fsOut[5].push_back(f5);
Chris@23 772 fsOut[6].push_back(f6);
Chris@23 773 count++;
Chris@23 774 }
Chris@23 775 cerr << "done." << endl;
matthiasm@13 776
matthiasm@10 777
Chris@23 778 /* Simple chord estimation
Chris@23 779 I just take the local chord estimates ("currentChordSalience") and average them over time, then
Chris@23 780 take the maximum. Very simple, don't do this at home...
Chris@23 781 */
Chris@23 782 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
Chris@23 783 count = 0;
Chris@23 784 int halfwindowlength = m_inputSampleRate / m_stepSize;
Chris@23 785 vector<int> chordSequence;
Chris@23 786 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
Chris@23 787 vector<int> temp = vector<int>(nChord,0);
Chris@23 788 scoreChordogram.push_back(temp);
Chris@23 789 }
Chris@23 790 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
Chris@23 791 int startIndex = count + 1;
Chris@23 792 int endIndex = count + 2 * halfwindowlength;
matthiasm@10 793
Chris@23 794 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@10 795
Chris@23 796 vector<int> chordCandidates;
Chris@23 797 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
Chris@23 798 // float currsum = 0;
Chris@23 799 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 800 // currsum += chordogram[iFrame][iChord];
Chris@23 801 // }
Chris@23 802 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
Chris@23 803 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 804 if (chordogram[iFrame][iChord] > chordThreshold) {
Chris@23 805 chordCandidates.push_back(iChord);
Chris@23 806 break;
Chris@23 807 }
Chris@23 808 }
Chris@23 809 }
Chris@23 810 chordCandidates.push_back(nChord-1);
Chris@23 811 // cerr << chordCandidates.size() << endl;
Chris@23 812
Chris@23 813 float maxval = 0; // will be the value of the most salient *chord change* in this frame
Chris@23 814 float maxindex = 0; //... and the index thereof
Chris@23 815 unsigned bestchordL = nChord-1; // index of the best "left" chord
Chris@23 816 unsigned bestchordR = nChord-1; // index of the best "right" chord
Chris@23 817
Chris@23 818 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
Chris@23 819 // now find the max values on both sides of iWF
Chris@23 820 // left side:
Chris@23 821 float maxL = 0;
Chris@23 822 unsigned maxindL = nChord-1;
Chris@23 823 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 824 unsigned iChord = chordCandidates[kChord];
Chris@23 825 float currsum = 0;
Chris@23 826 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
Chris@23 827 currsum += chordogram[count+iFrame][iChord];
matthiasm@10 828 }
Chris@23 829 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 830 if (currsum > maxL) {
Chris@23 831 maxL = currsum;
Chris@23 832 maxindL = iChord;
Chris@23 833 }
Chris@23 834 }
Chris@23 835 // right side:
Chris@23 836 float maxR = 0;
Chris@23 837 unsigned maxindR = nChord-1;
Chris@23 838 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 839 unsigned iChord = chordCandidates[kChord];
Chris@23 840 float currsum = 0;
Chris@23 841 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 842 currsum += chordogram[count+iFrame][iChord];
Chris@23 843 }
Chris@23 844 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 845 if (currsum > maxR) {
Chris@23 846 maxR = currsum;
Chris@23 847 maxindR = iChord;
Chris@23 848 }
Chris@23 849 }
Chris@23 850 if (maxL+maxR > maxval) {
Chris@23 851 maxval = maxL+maxR;
Chris@23 852 maxindex = iWF;
Chris@23 853 bestchordL = maxindL;
Chris@23 854 bestchordR = maxindR;
Chris@23 855 }
matthiasm@3 856
Chris@23 857 }
Chris@23 858 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
Chris@23 859 // add a score to every chord-frame-point that was part of a maximum
Chris@23 860 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
Chris@23 861 scoreChordogram[iFrame+count][bestchordL]++;
Chris@23 862 }
Chris@23 863 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 864 scoreChordogram[iFrame+count][bestchordR]++;
Chris@23 865 }
Chris@23 866 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
Chris@23 867 count++;
Chris@23 868 }
Chris@23 869 // cerr << "******* agent finished *******" << endl;
Chris@23 870 count = 0;
Chris@23 871 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 872 float maxval = 0; // will be the value of the most salient chord in this frame
Chris@23 873 float maxindex = 0; //... and the index thereof
Chris@23 874 for (unsigned iChord = 0; iChord < nChord; iChord++) {
Chris@23 875 if (scoreChordogram[count][iChord] > maxval) {
Chris@23 876 maxval = scoreChordogram[count][iChord];
Chris@23 877 maxindex = iChord;
Chris@23 878 // cerr << iChord << endl;
Chris@23 879 }
Chris@23 880 }
Chris@23 881 chordSequence.push_back(maxindex);
Chris@23 882 // cerr << "before modefilter, maxindex: " << maxindex << endl;
Chris@23 883 count++;
Chris@23 884 }
Chris@23 885 // cerr << "******* mode filter done *******" << endl;
matthiasm@10 886
matthiasm@3 887
Chris@23 888 // mode filter on chordSequence
Chris@23 889 count = 0;
Chris@23 890 string oldChord = "";
Chris@23 891 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 892 Feature f6 = *it;
Chris@23 893 Feature f7; // chord estimate
Chris@23 894 f7.hasTimestamp = true;
Chris@23 895 f7.timestamp = f6.timestamp;
Chris@23 896 Feature f8; // chord estimate
Chris@23 897 f8.hasTimestamp = true;
Chris@23 898 f8.timestamp = f6.timestamp;
matthiasm@17 899
Chris@23 900 vector<int> chordCount = vector<int>(nChord,0);
Chris@23 901 int maxChordCount = 0;
Chris@23 902 int maxChordIndex = nChord-1;
Chris@23 903 string maxChord;
Chris@23 904 int startIndex = max(count - halfwindowlength/2,0);
Chris@23 905 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
Chris@23 906 for (int i = startIndex; i < endIndex; i++) {
Chris@23 907 chordCount[chordSequence[i]]++;
Chris@23 908 if (chordCount[chordSequence[i]] > maxChordCount) {
Chris@23 909 // cerr << "start index " << startIndex << endl;
Chris@23 910 maxChordCount++;
Chris@23 911 maxChordIndex = chordSequence[i];
Chris@23 912 maxChord = m_chordnames[maxChordIndex];
Chris@23 913 }
Chris@23 914 }
Chris@23 915 // chordSequence[count] = maxChordIndex;
Chris@23 916 // cerr << maxChordIndex << endl;
Chris@23 917 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
Chris@23 918 // cerr << chordchange[count] << endl;
Chris@23 919 fsOut[9].push_back(f8);
Chris@23 920 if (oldChord != maxChord) {
Chris@23 921 oldChord = maxChord;
matthiasm@3 922
Chris@23 923 // char buffer1 [50];
Chris@23 924 // if (maxChordIndex < nChord - 1) {
Chris@23 925 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
Chris@23 926 // } else {
Chris@23 927 // sprintf(buffer1, "N");
Chris@23 928 // }
Chris@23 929 // f7.label = buffer1;
Chris@23 930 f7.label = m_chordnames[maxChordIndex];
Chris@23 931 fsOut[7].push_back(f7);
Chris@23 932 }
Chris@23 933 count++;
Chris@23 934 }
Chris@23 935 Feature f7; // last chord estimate
Chris@23 936 f7.hasTimestamp = true;
Chris@23 937 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
Chris@23 938 f7.label = "N";
Chris@23 939 fsOut[7].push_back(f7);
Chris@23 940 cerr << "done." << endl;
Chris@23 941 // // musicity
Chris@23 942 // count = 0;
Chris@23 943 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
Chris@23 944 // vector<float> musicityValue;
Chris@23 945 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 946 // Feature f4 = *it;
Chris@23 947 //
Chris@23 948 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 949 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 950 // float chromasum = 0;
Chris@23 951 // float diffsum = 0;
Chris@23 952 // for (int k = 0; k < 12; k++) {
Chris@23 953 // for (int i = startIndex + 1; i < endIndex; i++) {
Chris@23 954 // chromasum += pow(fsOut[4][i].values[k],2);
Chris@23 955 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
Chris@23 956 // }
Chris@23 957 // }
Chris@23 958 // diffsum /= chromasum;
Chris@23 959 // musicityValue.push_back(diffsum);
Chris@23 960 // count++;
Chris@23 961 // }
Chris@23 962 //
Chris@23 963 // float musicityThreshold = 0.44;
Chris@23 964 // if (m_stepSize == 4096) {
Chris@23 965 // musicityThreshold = 0.74;
Chris@23 966 // }
Chris@23 967 // if (m_stepSize == 4410) {
Chris@23 968 // musicityThreshold = 0.77;
Chris@23 969 // }
Chris@23 970 //
Chris@23 971 // count = 0;
Chris@23 972 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 973 // Feature f4 = *it;
Chris@23 974 // Feature f8; // musicity
Chris@23 975 // Feature f9; // musicity segmenter
Chris@23 976 //
Chris@23 977 // f8.hasTimestamp = true;
Chris@23 978 // f8.timestamp = f4.timestamp;
Chris@23 979 // f9.hasTimestamp = true;
Chris@23 980 // f9.timestamp = f4.timestamp;
Chris@23 981 //
Chris@23 982 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 983 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 984 // int musicityCount = 0;
Chris@23 985 // for (int i = startIndex; i <= endIndex; i++) {
Chris@23 986 // if (musicityValue[i] > musicityThreshold) musicityCount++;
Chris@23 987 // }
Chris@23 988 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
Chris@23 989 //
Chris@23 990 // if (isSpeech) {
Chris@23 991 // if (oldlabeltype != 2) {
Chris@23 992 // f9.label = "Speech";
Chris@23 993 // fsOut[9].push_back(f9);
Chris@23 994 // oldlabeltype = 2;
Chris@23 995 // }
Chris@23 996 // } else {
Chris@23 997 // if (oldlabeltype != 1) {
Chris@23 998 // f9.label = "Music";
Chris@23 999 // fsOut[9].push_back(f9);
Chris@23 1000 // oldlabeltype = 1;
Chris@23 1001 // }
Chris@23 1002 // }
Chris@23 1003 // f8.values.push_back(musicityValue[count]);
Chris@23 1004 // fsOut[8].push_back(f8);
Chris@23 1005 // count++;
Chris@23 1006 // }
Chris@23 1007 return fsOut;
matthiasm@0 1008
matthiasm@0 1009 }
matthiasm@0 1010
Chris@35 1011 #endif