annotate NNLSBase.cpp @ 48:6e76c7710fa1 matthiasm-plugin

removed subtraction in chroma dictionary, added to-the-power-of-1.5 in chordino
author matthiasm
date Mon, 25 Oct 2010 16:58:32 +0900
parents d01f94d58ef0
children b6cddb109482
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@35 19 #include "NNLSBase.h"
Chris@27 20
Chris@27 21 #include "chromamethods.h"
Chris@27 22
Chris@27 23 #include <cstdlib>
Chris@27 24 #include <fstream>
matthiasm@0 25 #include <cmath>
matthiasm@9 26
Chris@27 27 #include <algorithm>
matthiasm@0 28
matthiasm@0 29 const bool debug_on = false;
matthiasm@0 30
Chris@27 31 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0 32
Chris@35 33 NNLSBase::NNLSBase(float inputSampleRate) :
Chris@23 34 Plugin(inputSampleRate),
Chris@35 35 m_logSpectrum(0),
Chris@23 36 m_blockSize(0),
Chris@23 37 m_stepSize(0),
Chris@23 38 m_lengthOfNoteIndex(0),
Chris@23 39 m_meanTuning0(0),
Chris@23 40 m_meanTuning1(0),
Chris@23 41 m_meanTuning2(0),
Chris@23 42 m_localTuning0(0),
Chris@23 43 m_localTuning1(0),
Chris@23 44 m_localTuning2(0),
mail@41 45 m_whitening(1.0),
Chris@23 46 m_preset(0.0),
Chris@23 47 m_localTuning(0),
Chris@23 48 m_kernelValue(0),
Chris@23 49 m_kernelFftIndex(0),
Chris@23 50 m_kernelNoteIndex(0),
Chris@23 51 m_dict(0),
Chris@23 52 m_tuneLocal(false),
Chris@23 53 m_chorddict(0),
Chris@23 54 m_chordnames(0),
Chris@23 55 m_doNormalizeChroma(0),
mail@41 56 m_rollon(0.0),
matthiasm@42 57 m_s(0.7),
matthiasm@42 58 m_useNNLS(1)
matthiasm@0 59 {
Chris@35 60 if (debug_on) cerr << "--> NNLSBase" << endl;
matthiasm@7 61
Chris@23 62 // make the *note* dictionary matrix
Chris@23 63 m_dict = new float[nNote * 84];
Chris@23 64 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
mail@41 65 dictionaryMatrix(m_dict, 0.7);
matthiasm@7 66
Chris@23 67 // get the *chord* dictionary from file (if the file exists)
Chris@23 68 m_chordnames = chordDictionary(&m_chorddict);
matthiasm@0 69 }
matthiasm@0 70
matthiasm@0 71
Chris@35 72 NNLSBase::~NNLSBase()
matthiasm@0 73 {
Chris@35 74 if (debug_on) cerr << "--> ~NNLSBase" << endl;
Chris@23 75 delete [] m_dict;
matthiasm@0 76 }
matthiasm@0 77
matthiasm@0 78 string
Chris@35 79 NNLSBase::getMaker() const
matthiasm@0 80 {
Chris@23 81 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0 82 // Your name here
matthiasm@0 83 return "Matthias Mauch";
matthiasm@0 84 }
matthiasm@0 85
matthiasm@0 86 int
Chris@35 87 NNLSBase::getPluginVersion() const
matthiasm@0 88 {
Chris@23 89 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0 90 // Increment this each time you release a version that behaves
matthiasm@0 91 // differently from the previous one
matthiasm@0 92 return 1;
matthiasm@0 93 }
matthiasm@0 94
matthiasm@0 95 string
Chris@35 96 NNLSBase::getCopyright() const
matthiasm@0 97 {
Chris@23 98 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0 99 // This function is not ideally named. It does not necessarily
matthiasm@0 100 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0 101 // should indicate the terms under which it is distributed. For
matthiasm@0 102 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@35 103 return "GPL";
matthiasm@0 104 }
matthiasm@0 105
Chris@35 106 NNLSBase::InputDomain
Chris@35 107 NNLSBase::getInputDomain() const
matthiasm@0 108 {
Chris@23 109 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0 110 return FrequencyDomain;
matthiasm@0 111 }
matthiasm@0 112
matthiasm@0 113 size_t
Chris@35 114 NNLSBase::getPreferredBlockSize() const
matthiasm@0 115 {
Chris@23 116 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0 117 return 16384; // 0 means "I can handle any block size"
matthiasm@0 118 }
matthiasm@0 119
matthiasm@0 120 size_t
Chris@35 121 NNLSBase::getPreferredStepSize() const
matthiasm@0 122 {
Chris@23 123 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0 124 return 2048; // 0 means "anything sensible"; in practice this
Chris@23 125 // means the same as the block size for TimeDomain
Chris@23 126 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0 127 }
matthiasm@0 128
matthiasm@0 129 size_t
Chris@35 130 NNLSBase::getMinChannelCount() const
matthiasm@0 131 {
Chris@23 132 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0 133 return 1;
matthiasm@0 134 }
matthiasm@0 135
matthiasm@0 136 size_t
Chris@35 137 NNLSBase::getMaxChannelCount() const
matthiasm@0 138 {
Chris@23 139 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0 140 return 1;
matthiasm@0 141 }
matthiasm@0 142
Chris@35 143 NNLSBase::ParameterList
Chris@35 144 NNLSBase::getParameterDescriptors() const
matthiasm@0 145 {
Chris@23 146 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0 147 ParameterList list;
matthiasm@0 148
matthiasm@42 149 ParameterDescriptor d;
matthiasm@42 150 d.identifier = "useNNLS";
matthiasm@42 151 d.name = "use approximate transcription (NNLS)";
matthiasm@42 152 d.description = "Toggles approximate transcription (NNLS).";
matthiasm@42 153 d.unit = "";
matthiasm@42 154 d.minValue = 0.0;
matthiasm@42 155 d.maxValue = 1.0;
matthiasm@42 156 d.defaultValue = 1.0;
matthiasm@42 157 d.isQuantized = true;
matthiasm@42 158 d.quantizeStep = 1.0;
matthiasm@42 159 list.push_back(d);
matthiasm@42 160
mail@41 161 ParameterDescriptor d0;
mail@41 162 d0.identifier = "rollon";
mail@41 163 d0.name = "spectral roll-on";
mail@41 164 d0.description = "The bins below the spectral roll-on quantile will be set to 0.";
mail@41 165 d0.unit = "";
mail@41 166 d0.minValue = 0;
mail@41 167 d0.maxValue = 0.05;
mail@41 168 d0.defaultValue = 0;
matthiasm@48 169 d0.isQuantized = true;
matthiasm@48 170 d0.quantizeStep = 0.005;
mail@41 171 list.push_back(d0);
matthiasm@4 172
matthiasm@4 173 ParameterDescriptor d1;
matthiasm@4 174 d1.identifier = "tuningmode";
matthiasm@4 175 d1.name = "tuning mode";
matthiasm@4 176 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@4 177 d1.unit = "";
matthiasm@4 178 d1.minValue = 0;
matthiasm@4 179 d1.maxValue = 1;
matthiasm@4 180 d1.defaultValue = 0;
matthiasm@4 181 d1.isQuantized = true;
matthiasm@4 182 d1.valueNames.push_back("global tuning");
matthiasm@4 183 d1.valueNames.push_back("local tuning");
matthiasm@4 184 d1.quantizeStep = 1.0;
matthiasm@4 185 list.push_back(d1);
matthiasm@4 186
mail@41 187 ParameterDescriptor d2;
mail@41 188 d2.identifier = "whitening";
mail@41 189 d2.name = "spectral whitening";
mail@41 190 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
mail@41 191 d2.unit = "";
mail@41 192 d2.isQuantized = true;
mail@41 193 d2.minValue = 0.0;
mail@41 194 d2.maxValue = 1.0;
mail@41 195 d2.defaultValue = 1.0;
mail@41 196 d2.isQuantized = false;
mail@41 197 list.push_back(d2);
mail@41 198
mail@41 199 ParameterDescriptor d3;
mail@41 200 d3.identifier = "s";
mail@41 201 d3.name = "spectral shape";
mail@41 202 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
mail@41 203 d3.unit = "";
mail@41 204 d3.minValue = 0.5;
mail@41 205 d3.maxValue = 0.9;
mail@41 206 d3.defaultValue = 0.7;
mail@41 207 d3.isQuantized = false;
mail@41 208 list.push_back(d3);
mail@41 209
Chris@23 210 ParameterDescriptor d4;
matthiasm@12 211 d4.identifier = "chromanormalize";
matthiasm@12 212 d4.name = "chroma normalization";
matthiasm@12 213 d4.description = "How shall the chroma vector be normalized?";
matthiasm@12 214 d4.unit = "";
matthiasm@12 215 d4.minValue = 0;
matthiasm@13 216 d4.maxValue = 3;
matthiasm@12 217 d4.defaultValue = 0;
matthiasm@12 218 d4.isQuantized = true;
matthiasm@13 219 d4.valueNames.push_back("none");
matthiasm@13 220 d4.valueNames.push_back("maximum norm");
Chris@23 221 d4.valueNames.push_back("L1 norm");
Chris@23 222 d4.valueNames.push_back("L2 norm");
matthiasm@12 223 d4.quantizeStep = 1.0;
matthiasm@12 224 list.push_back(d4);
matthiasm@4 225
matthiasm@0 226 return list;
matthiasm@0 227 }
matthiasm@0 228
matthiasm@0 229 float
Chris@35 230 NNLSBase::getParameter(string identifier) const
matthiasm@0 231 {
Chris@23 232 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@42 233 if (identifier == "useNNLS") {
matthiasm@42 234 return m_useNNLS;
matthiasm@0 235 }
matthiasm@0 236
mail@41 237 if (identifier == "whitening") {
mail@41 238 return m_whitening;
mail@41 239 }
mail@41 240
mail@41 241 if (identifier == "s") {
mail@41 242 return m_s;
matthiasm@0 243 }
matthiasm@17 244
Chris@23 245 if (identifier == "rollon") {
matthiasm@17 246 return m_rollon;
matthiasm@17 247 }
matthiasm@0 248
matthiasm@0 249 if (identifier == "tuningmode") {
matthiasm@0 250 if (m_tuneLocal) {
matthiasm@0 251 return 1.0;
matthiasm@0 252 } else {
matthiasm@0 253 return 0.0;
matthiasm@0 254 }
matthiasm@0 255 }
Chris@23 256 if (identifier == "preset") {
Chris@23 257 return m_preset;
matthiasm@3 258 }
Chris@23 259 if (identifier == "chromanormalize") {
Chris@23 260 return m_doNormalizeChroma;
matthiasm@12 261 }
matthiasm@0 262 return 0;
matthiasm@0 263
matthiasm@0 264 }
matthiasm@0 265
matthiasm@0 266 void
Chris@35 267 NNLSBase::setParameter(string identifier, float value)
matthiasm@0 268 {
Chris@23 269 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@42 270 if (identifier == "useNNLS") {
matthiasm@42 271 m_useNNLS = (int) value;
matthiasm@0 272 }
matthiasm@0 273
mail@41 274 if (identifier == "whitening") {
mail@41 275 m_whitening = value;
matthiasm@0 276 }
matthiasm@0 277
mail@41 278 if (identifier == "s") {
mail@41 279 m_s = value;
mail@41 280 }
mail@41 281
matthiasm@0 282 if (identifier == "tuningmode") {
matthiasm@0 283 m_tuneLocal = (value > 0) ? true : false;
matthiasm@0 284 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0 285 }
matthiasm@42 286 // if (identifier == "preset") {
matthiasm@42 287 // m_preset = value;
matthiasm@42 288 // if (m_preset == 0.0) {
matthiasm@42 289 // m_tuneLocal = false;
matthiasm@42 290 // m_whitening = 1.0;
matthiasm@42 291 // m_dictID = 0.0;
matthiasm@42 292 // }
matthiasm@42 293 // if (m_preset == 1.0) {
matthiasm@42 294 // m_tuneLocal = false;
matthiasm@42 295 // m_whitening = 1.0;
matthiasm@42 296 // m_dictID = 1.0;
matthiasm@42 297 // }
matthiasm@42 298 // if (m_preset == 2.0) {
matthiasm@42 299 // m_tuneLocal = false;
matthiasm@42 300 // m_whitening = 0.7;
matthiasm@42 301 // m_dictID = 0.0;
matthiasm@42 302 // }
matthiasm@42 303 // }
Chris@23 304 if (identifier == "chromanormalize") {
Chris@23 305 m_doNormalizeChroma = value;
Chris@23 306 }
matthiasm@17 307
Chris@23 308 if (identifier == "rollon") {
Chris@23 309 m_rollon = value;
Chris@23 310 }
matthiasm@0 311 }
matthiasm@0 312
Chris@35 313 NNLSBase::ProgramList
Chris@35 314 NNLSBase::getPrograms() const
matthiasm@0 315 {
Chris@23 316 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0 317 ProgramList list;
matthiasm@0 318
matthiasm@0 319 // If you have no programs, return an empty list (or simply don't
matthiasm@0 320 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0 321
matthiasm@0 322 return list;
matthiasm@0 323 }
matthiasm@0 324
matthiasm@0 325 string
Chris@35 326 NNLSBase::getCurrentProgram() const
matthiasm@0 327 {
Chris@23 328 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0 329 return ""; // no programs
matthiasm@0 330 }
matthiasm@0 331
matthiasm@0 332 void
Chris@35 333 NNLSBase::selectProgram(string name)
matthiasm@0 334 {
Chris@23 335 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0 336 }
matthiasm@0 337
matthiasm@0 338
matthiasm@0 339 bool
Chris@35 340 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 341 {
Chris@23 342 if (debug_on) {
Chris@23 343 cerr << "--> initialise";
Chris@23 344 }
matthiasm@1 345
matthiasm@0 346 if (channels < getMinChannelCount() ||
matthiasm@0 347 channels > getMaxChannelCount()) return false;
matthiasm@0 348 m_blockSize = blockSize;
matthiasm@0 349 m_stepSize = stepSize;
Chris@35 350 m_frameCount = 0;
Chris@23 351 int tempn = 256 * m_blockSize/2;
Chris@23 352 // cerr << "length of tempkernel : " << tempn << endl;
Chris@23 353 float *tempkernel;
matthiasm@1 354
Chris@23 355 tempkernel = new float[tempn];
matthiasm@1 356
Chris@23 357 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
Chris@23 358 m_kernelValue.clear();
Chris@23 359 m_kernelFftIndex.clear();
Chris@23 360 m_kernelNoteIndex.clear();
Chris@23 361 int countNonzero = 0;
Chris@23 362 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
Chris@23 363 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
Chris@23 364 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 365 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
Chris@23 366 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 367 countNonzero++;
Chris@23 368 }
Chris@23 369 m_kernelFftIndex.push_back(iFFT);
Chris@23 370 m_kernelNoteIndex.push_back(iNote);
Chris@23 371 }
Chris@23 372 }
Chris@23 373 }
Chris@23 374 // cerr << "nonzero count : " << countNonzero << endl;
Chris@23 375 delete [] tempkernel;
Chris@35 376 /*
Chris@23 377 ofstream myfile;
Chris@23 378 myfile.open ("matrix.txt");
matthiasm@3 379 // myfile << "Writing this to a file.\n";
Chris@23 380 for (int i = 0; i < nNote * 84; ++i) {
Chris@23 381 myfile << m_dict[i] << endl;
Chris@23 382 }
matthiasm@3 383 myfile.close();
Chris@35 384 */
matthiasm@0 385 return true;
matthiasm@0 386 }
matthiasm@0 387
matthiasm@0 388 void
Chris@35 389 NNLSBase::reset()
matthiasm@0 390 {
Chris@23 391 if (debug_on) cerr << "--> reset";
matthiasm@4 392
matthiasm@0 393 // Clear buffers, reset stored values, etc
Chris@35 394 m_frameCount = 0;
matthiasm@42 395 // m_dictID = 0;
Chris@35 396 m_logSpectrum.clear();
Chris@23 397 m_meanTuning0 = 0;
Chris@23 398 m_meanTuning1 = 0;
Chris@23 399 m_meanTuning2 = 0;
Chris@23 400 m_localTuning0 = 0;
Chris@23 401 m_localTuning1 = 0;
Chris@23 402 m_localTuning2 = 0;
Chris@23 403 m_localTuning.clear();
matthiasm@0 404 }
matthiasm@0 405
Chris@35 406 void
Chris@35 407 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 408 {
Chris@35 409 m_frameCount++;
Chris@23 410 float *magnitude = new float[m_blockSize/2];
matthiasm@0 411
Chris@23 412 const float *fbuf = inputBuffers[0];
Chris@23 413 float energysum = 0;
Chris@23 414 // make magnitude
Chris@23 415 float maxmag = -10000;
Chris@23 416 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 417 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
Chris@23 418 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
Chris@23 419 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
Chris@23 420 if (m_rollon > 0) {
Chris@23 421 energysum += pow(magnitude[iBin],2);
Chris@23 422 }
Chris@23 423 }
matthiasm@14 424
Chris@23 425 float cumenergy = 0;
Chris@23 426 if (m_rollon > 0) {
Chris@23 427 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
Chris@23 428 cumenergy += pow(magnitude[iBin],2);
Chris@23 429 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
Chris@23 430 else break;
Chris@23 431 }
Chris@23 432 }
matthiasm@17 433
Chris@23 434 if (maxmag < 2) {
Chris@23 435 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
Chris@23 436 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 437 magnitude[iBin] = 0;
Chris@23 438 }
Chris@23 439 }
matthiasm@4 440
Chris@23 441 // note magnitude mapping using pre-calculated matrix
Chris@23 442 float *nm = new float[nNote]; // note magnitude
Chris@23 443 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 444 nm[iNote] = 0; // initialise as 0
Chris@23 445 }
Chris@23 446 int binCount = 0;
Chris@23 447 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
Chris@23 448 // cerr << ".";
Chris@23 449 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
Chris@23 450 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
Chris@23 451 binCount++;
Chris@23 452 }
Chris@23 453 // cerr << nm[20];
Chris@23 454 // cerr << endl;
matthiasm@0 455
matthiasm@0 456
Chris@35 457 float one_over_N = 1.0/m_frameCount;
matthiasm@0 458 // update means of complex tuning variables
Chris@35 459 m_meanTuning0 *= float(m_frameCount-1)*one_over_N;
Chris@35 460 m_meanTuning1 *= float(m_frameCount-1)*one_over_N;
Chris@35 461 m_meanTuning2 *= float(m_frameCount-1)*one_over_N;
matthiasm@0 462
matthiasm@0 463 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
matthiasm@0 464 m_meanTuning0 += nm[iTone + 0]*one_over_N;
matthiasm@0 465 m_meanTuning1 += nm[iTone + 1]*one_over_N;
matthiasm@0 466 m_meanTuning2 += nm[iTone + 2]*one_over_N;
Chris@23 467 float ratioOld = 0.997;
matthiasm@3 468 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
matthiasm@3 469 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
matthiasm@3 470 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
matthiasm@0 471 }
matthiasm@0 472
matthiasm@0 473 // if (m_tuneLocal) {
Chris@23 474 // local tuning
Chris@23 475 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
Chris@23 476 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
Chris@23 477 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
Chris@23 478 m_localTuning.push_back(normalisedtuning);
matthiasm@0 479
Chris@23 480 Feature f1; // logfreqspec
Chris@23 481 f1.hasTimestamp = true;
matthiasm@0 482 f1.timestamp = timestamp;
Chris@23 483 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 484 f1.values.push_back(nm[iNote]);
Chris@23 485 }
matthiasm@0 486
matthiasm@0 487 // deletes
matthiasm@0 488 delete[] magnitude;
matthiasm@0 489 delete[] nm;
matthiasm@0 490
Chris@35 491 m_logSpectrum.push_back(f1); // remember note magnitude
matthiasm@0 492 }
matthiasm@0 493
Chris@35 494
Chris@35 495 #ifdef NOT_DEFINED
Chris@35 496
Chris@35 497 NNLSBase::FeatureSet
Chris@35 498 NNLSBase::getRemainingFeatures()
matthiasm@0 499 {
Chris@23 500 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 501 FeatureSet fsOut;
Chris@35 502 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23 503 int nChord = m_chordnames.size();
Chris@23 504 //
Chris@23 505 /** Calculate Tuning
Chris@23 506 calculate tuning from (using the angle of the complex number defined by the
Chris@23 507 cumulative mean real and imag values)
Chris@23 508 **/
Chris@23 509 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
Chris@23 510 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
Chris@23 511 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 512 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 513 int intShift = floor(normalisedtuning * 3);
Chris@23 514 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 515
Chris@23 516 char buffer0 [50];
matthiasm@1 517
Chris@23 518 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 519
Chris@23 520 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1 521
Chris@23 522 // push tuning to FeatureSet fsOut
Chris@23 523 Feature f0; // tuning
Chris@23 524 f0.hasTimestamp = true;
Chris@23 525 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
Chris@23 526 f0.label = buffer0;
Chris@23 527 fsOut[0].push_back(f0);
matthiasm@1 528
Chris@23 529 /** Tune Log-Frequency Spectrogram
Chris@23 530 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
Chris@23 531 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
Chris@23 532 **/
Chris@23 533 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 534
Chris@23 535 float tempValue = 0;
Chris@23 536 float dbThreshold = 0; // relative to the background spectrum
Chris@23 537 float thresh = pow(10,dbThreshold/20);
Chris@23 538 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23 539 int count = 0;
matthiasm@1 540
Chris@35 541 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@23 542 Feature f1 = *i;
Chris@23 543 Feature f2; // tuned log-frequency spectrum
Chris@23 544 f2.hasTimestamp = true;
Chris@23 545 f2.timestamp = f1.timestamp;
Chris@23 546 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1 547
Chris@23 548 if (m_tuneLocal) {
Chris@23 549 intShift = floor(m_localTuning[count] * 3);
Chris@23 550 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
Chris@23 551 }
matthiasm@1 552
Chris@23 553 // cerr << intShift << " " << intFactor << endl;
matthiasm@1 554
Chris@23 555 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
Chris@23 556 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
Chris@23 557 f2.values.push_back(tempValue);
Chris@23 558 }
matthiasm@1 559
Chris@23 560 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
Chris@23 561 vector<float> runningmean = SpecialConvolution(f2.values,hw);
Chris@23 562 vector<float> runningstd;
Chris@23 563 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
Chris@23 564 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
Chris@23 565 }
Chris@23 566 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
Chris@23 567 for (int i = 0; i < 256; i++) {
Chris@23 568 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 569 if (runningstd[i] > 0) {
Chris@23 570 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
mail@41 571 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 572 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
mail@41 573 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 574 }
Chris@23 575 if (f2.values[i] < 0) {
Chris@23 576 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 577 }
Chris@23 578 }
Chris@23 579 fsOut[2].push_back(f2);
Chris@23 580 count++;
Chris@23 581 }
Chris@23 582 cerr << "done." << endl;
matthiasm@1 583
Chris@23 584 /** Semitone spectrum and chromagrams
Chris@23 585 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 586 is inferred using a non-negative least squares algorithm.
Chris@23 587 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 588 bass and treble stacked onto each other).
Chris@23 589 **/
matthiasm@42 590 if (m_useNNLS == 0) {
Chris@23 591 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 592 } else {
Chris@23 593 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 594 }
matthiasm@13 595
matthiasm@1 596
Chris@23 597 vector<vector<float> > chordogram;
Chris@23 598 vector<vector<int> > scoreChordogram;
Chris@23 599 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
Chris@23 600 vector<float> oldchroma = vector<float>(12,0);
Chris@23 601 vector<float> oldbasschroma = vector<float>(12,0);
Chris@23 602 count = 0;
matthiasm@9 603
Chris@23 604 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
Chris@23 605 Feature f2 = *it; // logfreq spectrum
Chris@23 606 Feature f3; // semitone spectrum
Chris@23 607 Feature f4; // treble chromagram
Chris@23 608 Feature f5; // bass chromagram
Chris@23 609 Feature f6; // treble and bass chromagram
matthiasm@1 610
Chris@23 611 f3.hasTimestamp = true;
Chris@23 612 f3.timestamp = f2.timestamp;
matthiasm@1 613
Chris@23 614 f4.hasTimestamp = true;
Chris@23 615 f4.timestamp = f2.timestamp;
matthiasm@1 616
Chris@23 617 f5.hasTimestamp = true;
Chris@23 618 f5.timestamp = f2.timestamp;
matthiasm@1 619
Chris@23 620 f6.hasTimestamp = true;
Chris@23 621 f6.timestamp = f2.timestamp;
matthiasm@1 622
Chris@29 623 float b[256];
matthiasm@1 624
Chris@23 625 bool some_b_greater_zero = false;
Chris@23 626 float sumb = 0;
Chris@23 627 for (int i = 0; i < 256; i++) {
Chris@23 628 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
Chris@23 629 b[i] = f2.values[i];
Chris@23 630 sumb += b[i];
Chris@23 631 if (b[i] > 0) {
Chris@23 632 some_b_greater_zero = true;
Chris@23 633 }
Chris@23 634 }
matthiasm@1 635
Chris@23 636 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 637
Chris@23 638 vector<float> chroma = vector<float>(12, 0);
Chris@23 639 vector<float> basschroma = vector<float>(12, 0);
Chris@23 640 float currval;
Chris@23 641 unsigned iSemitone = 0;
matthiasm@1 642
Chris@23 643 if (some_b_greater_zero) {
matthiasm@42 644 if (m_useNNLS == 0) {
Chris@23 645 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 646 currval = 0;
Chris@23 647 currval += b[iNote + 1 + -1] * 0.5;
Chris@23 648 currval += b[iNote + 1 + 0] * 1.0;
Chris@23 649 currval += b[iNote + 1 + 1] * 0.5;
Chris@23 650 f3.values.push_back(currval);
Chris@23 651 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 652 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 653 iSemitone++;
Chris@23 654 }
matthiasm@1 655
Chris@23 656 } else {
Chris@29 657 float x[84+1000];
Chris@23 658 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 659 vector<int> signifIndex;
Chris@23 660 int index=0;
Chris@23 661 sumb /= 84.0;
Chris@23 662 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 663 float currval = 0;
Chris@23 664 currval += b[iNote + 1 + -1];
Chris@23 665 currval += b[iNote + 1 + 0];
Chris@23 666 currval += b[iNote + 1 + 1];
Chris@23 667 if (currval > 0) signifIndex.push_back(index);
Chris@23 668 f3.values.push_back(0); // fill the values, change later
Chris@23 669 index++;
Chris@23 670 }
Chris@29 671 float rnorm;
Chris@29 672 float w[84+1000];
Chris@29 673 float zz[84+1000];
Chris@23 674 int indx[84+1000];
Chris@23 675 int mode;
Chris@23 676 int dictsize = 256*signifIndex.size();
Chris@23 677 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@29 678 float *curr_dict = new float[dictsize];
Chris@23 679 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 680 for (unsigned iBin = 0; iBin < 256; iBin++) {
Chris@23 681 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
Chris@23 682 }
Chris@23 683 }
Chris@29 684 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 685 delete [] curr_dict;
Chris@23 686 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 687 f3.values[signifIndex[iNote]] = x[iNote];
Chris@23 688 // cerr << mode << endl;
Chris@23 689 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 690 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 691 }
Chris@23 692 }
Chris@23 693 }
matthiasm@13 694
matthiasm@10 695
matthiasm@12 696
matthiasm@13 697
Chris@23 698 f4.values = chroma;
Chris@23 699 f5.values = basschroma;
Chris@23 700 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
Chris@23 701 f6.values = chroma;
matthiasm@1 702
Chris@23 703 if (m_doNormalizeChroma > 0) {
Chris@23 704 vector<float> chromanorm = vector<float>(3,0);
Chris@23 705 switch (int(m_doNormalizeChroma)) {
Chris@23 706 case 0: // should never end up here
Chris@23 707 break;
Chris@23 708 case 1:
Chris@23 709 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
Chris@23 710 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
Chris@23 711 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 712 break;
Chris@23 713 case 2:
Chris@23 714 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 715 chromanorm[0] += *it;
Chris@23 716 }
Chris@23 717 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 718 chromanorm[1] += *it;
Chris@23 719 }
Chris@23 720 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 721 chromanorm[2] += *it;
Chris@23 722 }
Chris@23 723 break;
Chris@23 724 case 3:
Chris@23 725 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 726 chromanorm[0] += pow(*it,2);
Chris@23 727 }
Chris@23 728 chromanorm[0] = sqrt(chromanorm[0]);
Chris@23 729 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 730 chromanorm[1] += pow(*it,2);
Chris@23 731 }
Chris@23 732 chromanorm[1] = sqrt(chromanorm[1]);
Chris@23 733 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 734 chromanorm[2] += pow(*it,2);
Chris@23 735 }
Chris@23 736 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 737 break;
Chris@23 738 }
Chris@23 739 if (chromanorm[0] > 0) {
Chris@23 740 for (int i = 0; i < f4.values.size(); i++) {
Chris@23 741 f4.values[i] /= chromanorm[0];
Chris@23 742 }
Chris@23 743 }
Chris@23 744 if (chromanorm[1] > 0) {
Chris@23 745 for (int i = 0; i < f5.values.size(); i++) {
Chris@23 746 f5.values[i] /= chromanorm[1];
Chris@23 747 }
Chris@23 748 }
Chris@23 749 if (chromanorm[2] > 0) {
Chris@23 750 for (int i = 0; i < f6.values.size(); i++) {
Chris@23 751 f6.values[i] /= chromanorm[2];
Chris@23 752 }
Chris@23 753 }
matthiasm@13 754
Chris@23 755 }
matthiasm@13 756
Chris@23 757 // local chord estimation
Chris@23 758 vector<float> currentChordSalience;
Chris@23 759 float tempchordvalue = 0;
Chris@23 760 float sumchordvalue = 0;
matthiasm@9 761
Chris@23 762 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 763 tempchordvalue = 0;
Chris@23 764 for (int iBin = 0; iBin < 12; iBin++) {
Chris@23 765 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 766 }
Chris@23 767 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 768 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 769 }
Chris@23 770 sumchordvalue+=tempchordvalue;
Chris@23 771 currentChordSalience.push_back(tempchordvalue);
Chris@23 772 }
Chris@23 773 if (sumchordvalue > 0) {
Chris@23 774 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 775 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 776 }
Chris@23 777 } else {
Chris@23 778 currentChordSalience[nChord-1] = 1.0;
Chris@23 779 }
Chris@23 780 chordogram.push_back(currentChordSalience);
matthiasm@1 781
Chris@23 782 fsOut[3].push_back(f3);
Chris@23 783 fsOut[4].push_back(f4);
Chris@23 784 fsOut[5].push_back(f5);
Chris@23 785 fsOut[6].push_back(f6);
Chris@23 786 count++;
Chris@23 787 }
Chris@23 788 cerr << "done." << endl;
matthiasm@13 789
matthiasm@10 790
Chris@23 791 /* Simple chord estimation
Chris@23 792 I just take the local chord estimates ("currentChordSalience") and average them over time, then
Chris@23 793 take the maximum. Very simple, don't do this at home...
Chris@23 794 */
Chris@23 795 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
Chris@23 796 count = 0;
Chris@23 797 int halfwindowlength = m_inputSampleRate / m_stepSize;
Chris@23 798 vector<int> chordSequence;
Chris@23 799 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
Chris@23 800 vector<int> temp = vector<int>(nChord,0);
Chris@23 801 scoreChordogram.push_back(temp);
Chris@23 802 }
Chris@23 803 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
Chris@23 804 int startIndex = count + 1;
Chris@23 805 int endIndex = count + 2 * halfwindowlength;
matthiasm@10 806
Chris@23 807 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@10 808
Chris@23 809 vector<int> chordCandidates;
Chris@23 810 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
Chris@23 811 // float currsum = 0;
Chris@23 812 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 813 // currsum += chordogram[iFrame][iChord];
Chris@23 814 // }
Chris@23 815 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
Chris@23 816 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 817 if (chordogram[iFrame][iChord] > chordThreshold) {
Chris@23 818 chordCandidates.push_back(iChord);
Chris@23 819 break;
Chris@23 820 }
Chris@23 821 }
Chris@23 822 }
Chris@23 823 chordCandidates.push_back(nChord-1);
Chris@23 824 // cerr << chordCandidates.size() << endl;
Chris@23 825
Chris@23 826 float maxval = 0; // will be the value of the most salient *chord change* in this frame
Chris@23 827 float maxindex = 0; //... and the index thereof
Chris@23 828 unsigned bestchordL = nChord-1; // index of the best "left" chord
Chris@23 829 unsigned bestchordR = nChord-1; // index of the best "right" chord
Chris@23 830
Chris@23 831 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
Chris@23 832 // now find the max values on both sides of iWF
Chris@23 833 // left side:
Chris@23 834 float maxL = 0;
Chris@23 835 unsigned maxindL = nChord-1;
Chris@23 836 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 837 unsigned iChord = chordCandidates[kChord];
Chris@23 838 float currsum = 0;
Chris@23 839 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
Chris@23 840 currsum += chordogram[count+iFrame][iChord];
matthiasm@10 841 }
Chris@23 842 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 843 if (currsum > maxL) {
Chris@23 844 maxL = currsum;
Chris@23 845 maxindL = iChord;
Chris@23 846 }
Chris@23 847 }
Chris@23 848 // right side:
Chris@23 849 float maxR = 0;
Chris@23 850 unsigned maxindR = nChord-1;
Chris@23 851 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 852 unsigned iChord = chordCandidates[kChord];
Chris@23 853 float currsum = 0;
Chris@23 854 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 855 currsum += chordogram[count+iFrame][iChord];
Chris@23 856 }
Chris@23 857 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 858 if (currsum > maxR) {
Chris@23 859 maxR = currsum;
Chris@23 860 maxindR = iChord;
Chris@23 861 }
Chris@23 862 }
Chris@23 863 if (maxL+maxR > maxval) {
Chris@23 864 maxval = maxL+maxR;
Chris@23 865 maxindex = iWF;
Chris@23 866 bestchordL = maxindL;
Chris@23 867 bestchordR = maxindR;
Chris@23 868 }
matthiasm@3 869
Chris@23 870 }
Chris@23 871 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
Chris@23 872 // add a score to every chord-frame-point that was part of a maximum
Chris@23 873 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
Chris@23 874 scoreChordogram[iFrame+count][bestchordL]++;
Chris@23 875 }
Chris@23 876 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 877 scoreChordogram[iFrame+count][bestchordR]++;
Chris@23 878 }
Chris@23 879 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
Chris@23 880 count++;
Chris@23 881 }
Chris@23 882 // cerr << "******* agent finished *******" << endl;
Chris@23 883 count = 0;
Chris@23 884 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 885 float maxval = 0; // will be the value of the most salient chord in this frame
Chris@23 886 float maxindex = 0; //... and the index thereof
Chris@23 887 for (unsigned iChord = 0; iChord < nChord; iChord++) {
Chris@23 888 if (scoreChordogram[count][iChord] > maxval) {
Chris@23 889 maxval = scoreChordogram[count][iChord];
Chris@23 890 maxindex = iChord;
Chris@23 891 // cerr << iChord << endl;
Chris@23 892 }
Chris@23 893 }
Chris@23 894 chordSequence.push_back(maxindex);
Chris@23 895 // cerr << "before modefilter, maxindex: " << maxindex << endl;
Chris@23 896 count++;
Chris@23 897 }
Chris@23 898 // cerr << "******* mode filter done *******" << endl;
matthiasm@10 899
matthiasm@3 900
Chris@23 901 // mode filter on chordSequence
Chris@23 902 count = 0;
Chris@23 903 string oldChord = "";
Chris@23 904 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 905 Feature f6 = *it;
Chris@23 906 Feature f7; // chord estimate
Chris@23 907 f7.hasTimestamp = true;
Chris@23 908 f7.timestamp = f6.timestamp;
Chris@23 909 Feature f8; // chord estimate
Chris@23 910 f8.hasTimestamp = true;
Chris@23 911 f8.timestamp = f6.timestamp;
matthiasm@17 912
Chris@23 913 vector<int> chordCount = vector<int>(nChord,0);
Chris@23 914 int maxChordCount = 0;
Chris@23 915 int maxChordIndex = nChord-1;
Chris@23 916 string maxChord;
Chris@23 917 int startIndex = max(count - halfwindowlength/2,0);
Chris@23 918 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
Chris@23 919 for (int i = startIndex; i < endIndex; i++) {
Chris@23 920 chordCount[chordSequence[i]]++;
Chris@23 921 if (chordCount[chordSequence[i]] > maxChordCount) {
Chris@23 922 // cerr << "start index " << startIndex << endl;
Chris@23 923 maxChordCount++;
Chris@23 924 maxChordIndex = chordSequence[i];
Chris@23 925 maxChord = m_chordnames[maxChordIndex];
Chris@23 926 }
Chris@23 927 }
Chris@23 928 // chordSequence[count] = maxChordIndex;
Chris@23 929 // cerr << maxChordIndex << endl;
Chris@23 930 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
Chris@23 931 // cerr << chordchange[count] << endl;
Chris@23 932 fsOut[9].push_back(f8);
Chris@23 933 if (oldChord != maxChord) {
Chris@23 934 oldChord = maxChord;
matthiasm@3 935
Chris@23 936 // char buffer1 [50];
Chris@23 937 // if (maxChordIndex < nChord - 1) {
Chris@23 938 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
Chris@23 939 // } else {
Chris@23 940 // sprintf(buffer1, "N");
Chris@23 941 // }
Chris@23 942 // f7.label = buffer1;
Chris@23 943 f7.label = m_chordnames[maxChordIndex];
Chris@23 944 fsOut[7].push_back(f7);
Chris@23 945 }
Chris@23 946 count++;
Chris@23 947 }
Chris@23 948 Feature f7; // last chord estimate
Chris@23 949 f7.hasTimestamp = true;
Chris@23 950 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
Chris@23 951 f7.label = "N";
Chris@23 952 fsOut[7].push_back(f7);
Chris@23 953 cerr << "done." << endl;
Chris@23 954 // // musicity
Chris@23 955 // count = 0;
Chris@23 956 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
Chris@23 957 // vector<float> musicityValue;
Chris@23 958 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 959 // Feature f4 = *it;
Chris@23 960 //
Chris@23 961 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 962 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 963 // float chromasum = 0;
Chris@23 964 // float diffsum = 0;
Chris@23 965 // for (int k = 0; k < 12; k++) {
Chris@23 966 // for (int i = startIndex + 1; i < endIndex; i++) {
Chris@23 967 // chromasum += pow(fsOut[4][i].values[k],2);
Chris@23 968 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
Chris@23 969 // }
Chris@23 970 // }
Chris@23 971 // diffsum /= chromasum;
Chris@23 972 // musicityValue.push_back(diffsum);
Chris@23 973 // count++;
Chris@23 974 // }
Chris@23 975 //
Chris@23 976 // float musicityThreshold = 0.44;
Chris@23 977 // if (m_stepSize == 4096) {
Chris@23 978 // musicityThreshold = 0.74;
Chris@23 979 // }
Chris@23 980 // if (m_stepSize == 4410) {
Chris@23 981 // musicityThreshold = 0.77;
Chris@23 982 // }
Chris@23 983 //
Chris@23 984 // count = 0;
Chris@23 985 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 986 // Feature f4 = *it;
Chris@23 987 // Feature f8; // musicity
Chris@23 988 // Feature f9; // musicity segmenter
Chris@23 989 //
Chris@23 990 // f8.hasTimestamp = true;
Chris@23 991 // f8.timestamp = f4.timestamp;
Chris@23 992 // f9.hasTimestamp = true;
Chris@23 993 // f9.timestamp = f4.timestamp;
Chris@23 994 //
Chris@23 995 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 996 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 997 // int musicityCount = 0;
Chris@23 998 // for (int i = startIndex; i <= endIndex; i++) {
Chris@23 999 // if (musicityValue[i] > musicityThreshold) musicityCount++;
Chris@23 1000 // }
Chris@23 1001 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
Chris@23 1002 //
Chris@23 1003 // if (isSpeech) {
Chris@23 1004 // if (oldlabeltype != 2) {
Chris@23 1005 // f9.label = "Speech";
Chris@23 1006 // fsOut[9].push_back(f9);
Chris@23 1007 // oldlabeltype = 2;
Chris@23 1008 // }
Chris@23 1009 // } else {
Chris@23 1010 // if (oldlabeltype != 1) {
Chris@23 1011 // f9.label = "Music";
Chris@23 1012 // fsOut[9].push_back(f9);
Chris@23 1013 // oldlabeltype = 1;
Chris@23 1014 // }
Chris@23 1015 // }
Chris@23 1016 // f8.values.push_back(musicityValue[count]);
Chris@23 1017 // fsOut[8].push_back(f8);
Chris@23 1018 // count++;
Chris@23 1019 // }
Chris@23 1020 return fsOut;
matthiasm@0 1021
matthiasm@0 1022 }
matthiasm@0 1023
Chris@35 1024 #endif