annotate NNLSBase.cpp @ 76:d398e73b46e0 matthiasm-plugin

it's broken at the moment, work in progress (makeing bins per semitone variable, ie not necessarily 3)
author Matthias Mauch <mail@matthiasmauch.net>
date Wed, 10 Nov 2010 22:52:46 +0900
parents 9a1f83057e84
children ba930176df5b
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@35 19 #include "NNLSBase.h"
Chris@27 20
Chris@27 21 #include "chromamethods.h"
Chris@27 22
Chris@27 23 #include <cstdlib>
Chris@27 24 #include <fstream>
matthiasm@0 25 #include <cmath>
matthiasm@9 26
Chris@27 27 #include <algorithm>
matthiasm@0 28
matthiasm@0 29 const bool debug_on = false;
matthiasm@0 30
Chris@35 31 NNLSBase::NNLSBase(float inputSampleRate) :
Chris@23 32 Plugin(inputSampleRate),
Chris@35 33 m_logSpectrum(0),
Chris@23 34 m_blockSize(0),
Chris@23 35 m_stepSize(0),
Chris@23 36 m_lengthOfNoteIndex(0),
Chris@23 37 m_meanTuning0(0),
Chris@23 38 m_meanTuning1(0),
Chris@23 39 m_meanTuning2(0),
Chris@23 40 m_localTuning0(0),
Chris@23 41 m_localTuning1(0),
Chris@23 42 m_localTuning2(0),
mail@41 43 m_whitening(1.0),
Chris@23 44 m_preset(0.0),
Chris@23 45 m_localTuning(0),
Chris@23 46 m_kernelValue(0),
Chris@23 47 m_kernelFftIndex(0),
Chris@23 48 m_kernelNoteIndex(0),
Chris@23 49 m_dict(0),
mail@60 50 m_tuneLocal(0),
Chris@23 51 m_chorddict(0),
Chris@23 52 m_chordnames(0),
Chris@23 53 m_doNormalizeChroma(0),
mail@60 54 m_rollon(0),
matthiasm@42 55 m_s(0.7),
matthiasm@50 56 m_useNNLS(1),
matthiasm@50 57 m_useHMM(1)
matthiasm@0 58 {
Chris@35 59 if (debug_on) cerr << "--> NNLSBase" << endl;
matthiasm@7 60
Chris@23 61 // make the *note* dictionary matrix
Chris@23 62 m_dict = new float[nNote * 84];
Chris@23 63 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
mail@41 64 dictionaryMatrix(m_dict, 0.7);
matthiasm@7 65
Chris@23 66 // get the *chord* dictionary from file (if the file exists)
Chris@23 67 m_chordnames = chordDictionary(&m_chorddict);
matthiasm@0 68 }
matthiasm@0 69
matthiasm@0 70
Chris@35 71 NNLSBase::~NNLSBase()
matthiasm@0 72 {
Chris@35 73 if (debug_on) cerr << "--> ~NNLSBase" << endl;
Chris@23 74 delete [] m_dict;
matthiasm@0 75 }
matthiasm@0 76
matthiasm@0 77 string
Chris@35 78 NNLSBase::getMaker() const
matthiasm@0 79 {
Chris@23 80 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0 81 // Your name here
matthiasm@0 82 return "Matthias Mauch";
matthiasm@0 83 }
matthiasm@0 84
matthiasm@0 85 int
Chris@35 86 NNLSBase::getPluginVersion() const
matthiasm@0 87 {
Chris@23 88 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0 89 // Increment this each time you release a version that behaves
matthiasm@0 90 // differently from the previous one
matthiasm@0 91 return 1;
matthiasm@0 92 }
matthiasm@0 93
matthiasm@0 94 string
Chris@35 95 NNLSBase::getCopyright() const
matthiasm@0 96 {
Chris@23 97 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0 98 // This function is not ideally named. It does not necessarily
matthiasm@0 99 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0 100 // should indicate the terms under which it is distributed. For
matthiasm@0 101 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@35 102 return "GPL";
matthiasm@0 103 }
matthiasm@0 104
Chris@35 105 NNLSBase::InputDomain
Chris@35 106 NNLSBase::getInputDomain() const
matthiasm@0 107 {
Chris@23 108 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0 109 return FrequencyDomain;
matthiasm@0 110 }
matthiasm@0 111
matthiasm@0 112 size_t
Chris@35 113 NNLSBase::getPreferredBlockSize() const
matthiasm@0 114 {
Chris@23 115 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0 116 return 16384; // 0 means "I can handle any block size"
matthiasm@0 117 }
matthiasm@0 118
matthiasm@0 119 size_t
Chris@35 120 NNLSBase::getPreferredStepSize() const
matthiasm@0 121 {
Chris@23 122 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0 123 return 2048; // 0 means "anything sensible"; in practice this
Chris@23 124 // means the same as the block size for TimeDomain
Chris@23 125 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0 126 }
matthiasm@0 127
matthiasm@0 128 size_t
Chris@35 129 NNLSBase::getMinChannelCount() const
matthiasm@0 130 {
Chris@23 131 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0 132 return 1;
matthiasm@0 133 }
matthiasm@0 134
matthiasm@0 135 size_t
Chris@35 136 NNLSBase::getMaxChannelCount() const
matthiasm@0 137 {
Chris@23 138 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0 139 return 1;
matthiasm@0 140 }
matthiasm@0 141
Chris@35 142 NNLSBase::ParameterList
Chris@35 143 NNLSBase::getParameterDescriptors() const
matthiasm@0 144 {
Chris@23 145 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0 146 ParameterList list;
matthiasm@0 147
matthiasm@42 148 ParameterDescriptor d;
matthiasm@42 149 d.identifier = "useNNLS";
matthiasm@42 150 d.name = "use approximate transcription (NNLS)";
matthiasm@42 151 d.description = "Toggles approximate transcription (NNLS).";
matthiasm@42 152 d.unit = "";
matthiasm@42 153 d.minValue = 0.0;
matthiasm@42 154 d.maxValue = 1.0;
matthiasm@42 155 d.defaultValue = 1.0;
matthiasm@42 156 d.isQuantized = true;
matthiasm@42 157 d.quantizeStep = 1.0;
matthiasm@42 158 list.push_back(d);
matthiasm@42 159
mail@41 160 ParameterDescriptor d0;
mail@41 161 d0.identifier = "rollon";
mail@41 162 d0.name = "spectral roll-on";
matthiasm@58 163 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
matthiasm@59 164 d0.unit = "%";
mail@41 165 d0.minValue = 0;
matthiasm@59 166 d0.maxValue = 5;
mail@41 167 d0.defaultValue = 0;
matthiasm@48 168 d0.isQuantized = true;
matthiasm@59 169 d0.quantizeStep = 0.5;
mail@41 170 list.push_back(d0);
matthiasm@4 171
matthiasm@4 172 ParameterDescriptor d1;
matthiasm@4 173 d1.identifier = "tuningmode";
matthiasm@4 174 d1.name = "tuning mode";
matthiasm@4 175 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@4 176 d1.unit = "";
matthiasm@4 177 d1.minValue = 0;
matthiasm@4 178 d1.maxValue = 1;
matthiasm@4 179 d1.defaultValue = 0;
matthiasm@4 180 d1.isQuantized = true;
matthiasm@4 181 d1.valueNames.push_back("global tuning");
matthiasm@4 182 d1.valueNames.push_back("local tuning");
matthiasm@4 183 d1.quantizeStep = 1.0;
matthiasm@4 184 list.push_back(d1);
matthiasm@4 185
mail@41 186 ParameterDescriptor d2;
mail@41 187 d2.identifier = "whitening";
mail@41 188 d2.name = "spectral whitening";
mail@41 189 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
mail@41 190 d2.unit = "";
mail@41 191 d2.isQuantized = true;
mail@41 192 d2.minValue = 0.0;
mail@41 193 d2.maxValue = 1.0;
mail@41 194 d2.defaultValue = 1.0;
mail@41 195 d2.isQuantized = false;
mail@41 196 list.push_back(d2);
mail@41 197
mail@41 198 ParameterDescriptor d3;
mail@41 199 d3.identifier = "s";
mail@41 200 d3.name = "spectral shape";
mail@41 201 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
mail@41 202 d3.unit = "";
mail@41 203 d3.minValue = 0.5;
mail@41 204 d3.maxValue = 0.9;
mail@41 205 d3.defaultValue = 0.7;
mail@41 206 d3.isQuantized = false;
mail@41 207 list.push_back(d3);
mail@41 208
Chris@23 209 ParameterDescriptor d4;
matthiasm@12 210 d4.identifier = "chromanormalize";
matthiasm@12 211 d4.name = "chroma normalization";
matthiasm@12 212 d4.description = "How shall the chroma vector be normalized?";
matthiasm@12 213 d4.unit = "";
matthiasm@12 214 d4.minValue = 0;
matthiasm@13 215 d4.maxValue = 3;
matthiasm@12 216 d4.defaultValue = 0;
matthiasm@12 217 d4.isQuantized = true;
matthiasm@13 218 d4.valueNames.push_back("none");
matthiasm@13 219 d4.valueNames.push_back("maximum norm");
Chris@23 220 d4.valueNames.push_back("L1 norm");
Chris@23 221 d4.valueNames.push_back("L2 norm");
matthiasm@12 222 d4.quantizeStep = 1.0;
matthiasm@12 223 list.push_back(d4);
matthiasm@4 224
matthiasm@0 225 return list;
matthiasm@0 226 }
matthiasm@0 227
matthiasm@0 228 float
Chris@35 229 NNLSBase::getParameter(string identifier) const
matthiasm@0 230 {
Chris@23 231 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@42 232 if (identifier == "useNNLS") {
matthiasm@42 233 return m_useNNLS;
matthiasm@0 234 }
matthiasm@0 235
mail@41 236 if (identifier == "whitening") {
mail@41 237 return m_whitening;
mail@41 238 }
mail@41 239
mail@41 240 if (identifier == "s") {
mail@41 241 return m_s;
matthiasm@0 242 }
matthiasm@17 243
Chris@23 244 if (identifier == "rollon") {
matthiasm@17 245 return m_rollon;
matthiasm@17 246 }
matthiasm@0 247
matthiasm@0 248 if (identifier == "tuningmode") {
matthiasm@0 249 if (m_tuneLocal) {
matthiasm@0 250 return 1.0;
matthiasm@0 251 } else {
matthiasm@0 252 return 0.0;
matthiasm@0 253 }
matthiasm@0 254 }
Chris@23 255 if (identifier == "preset") {
Chris@23 256 return m_preset;
matthiasm@3 257 }
Chris@23 258 if (identifier == "chromanormalize") {
Chris@23 259 return m_doNormalizeChroma;
matthiasm@12 260 }
matthiasm@50 261
matthiasm@50 262 if (identifier == "useHMM") {
matthiasm@50 263 return m_useHMM;
matthiasm@50 264 }
matthiasm@50 265
matthiasm@0 266 return 0;
matthiasm@0 267
matthiasm@0 268 }
matthiasm@0 269
matthiasm@0 270 void
Chris@35 271 NNLSBase::setParameter(string identifier, float value)
matthiasm@0 272 {
Chris@23 273 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@42 274 if (identifier == "useNNLS") {
matthiasm@42 275 m_useNNLS = (int) value;
matthiasm@0 276 }
matthiasm@0 277
mail@41 278 if (identifier == "whitening") {
mail@41 279 m_whitening = value;
matthiasm@0 280 }
matthiasm@0 281
mail@41 282 if (identifier == "s") {
mail@41 283 m_s = value;
mail@41 284 }
mail@41 285
matthiasm@50 286 if (identifier == "useHMM") {
matthiasm@50 287 m_useHMM = value;
matthiasm@50 288 }
matthiasm@50 289
matthiasm@0 290 if (identifier == "tuningmode") {
mail@60 291 // m_tuneLocal = (value > 0) ? true : false;
mail@60 292 m_tuneLocal = value;
matthiasm@0 293 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0 294 }
matthiasm@42 295 // if (identifier == "preset") {
matthiasm@42 296 // m_preset = value;
matthiasm@42 297 // if (m_preset == 0.0) {
matthiasm@42 298 // m_tuneLocal = false;
matthiasm@42 299 // m_whitening = 1.0;
matthiasm@42 300 // m_dictID = 0.0;
matthiasm@42 301 // }
matthiasm@42 302 // if (m_preset == 1.0) {
matthiasm@42 303 // m_tuneLocal = false;
matthiasm@42 304 // m_whitening = 1.0;
matthiasm@42 305 // m_dictID = 1.0;
matthiasm@42 306 // }
matthiasm@42 307 // if (m_preset == 2.0) {
matthiasm@42 308 // m_tuneLocal = false;
matthiasm@42 309 // m_whitening = 0.7;
matthiasm@42 310 // m_dictID = 0.0;
matthiasm@42 311 // }
matthiasm@42 312 // }
Chris@23 313 if (identifier == "chromanormalize") {
Chris@23 314 m_doNormalizeChroma = value;
Chris@23 315 }
matthiasm@17 316
Chris@23 317 if (identifier == "rollon") {
Chris@23 318 m_rollon = value;
Chris@23 319 }
matthiasm@0 320 }
matthiasm@0 321
Chris@35 322 NNLSBase::ProgramList
Chris@35 323 NNLSBase::getPrograms() const
matthiasm@0 324 {
Chris@23 325 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0 326 ProgramList list;
matthiasm@0 327
matthiasm@0 328 // If you have no programs, return an empty list (or simply don't
matthiasm@0 329 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0 330
matthiasm@0 331 return list;
matthiasm@0 332 }
matthiasm@0 333
matthiasm@0 334 string
Chris@35 335 NNLSBase::getCurrentProgram() const
matthiasm@0 336 {
Chris@23 337 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0 338 return ""; // no programs
matthiasm@0 339 }
matthiasm@0 340
matthiasm@0 341 void
Chris@35 342 NNLSBase::selectProgram(string name)
matthiasm@0 343 {
Chris@23 344 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0 345 }
matthiasm@0 346
matthiasm@0 347
matthiasm@0 348 bool
Chris@35 349 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 350 {
Chris@23 351 if (debug_on) {
Chris@23 352 cerr << "--> initialise";
Chris@23 353 }
matthiasm@1 354
mail@76 355 int hamwinlength = nBPS * 6 + 1;
mail@76 356 float hamwinsum = 0;
mail@76 357 for (int i = 0; i < hamwinlength; ++i) {
mail@76 358 hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)));
mail@76 359 hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1));
mail@76 360 }
mail@76 361 for (int i = 0; i < hamwinlength; ++i) hw[i] = round(hw[i] / hamwinsum * 10000)*1.0/10000;
mail@76 362
matthiasm@0 363 if (channels < getMinChannelCount() ||
matthiasm@0 364 channels > getMaxChannelCount()) return false;
matthiasm@0 365 m_blockSize = blockSize;
matthiasm@0 366 m_stepSize = stepSize;
Chris@35 367 m_frameCount = 0;
Chris@23 368 int tempn = 256 * m_blockSize/2;
Chris@23 369 // cerr << "length of tempkernel : " << tempn << endl;
Chris@23 370 float *tempkernel;
matthiasm@1 371
Chris@23 372 tempkernel = new float[tempn];
matthiasm@1 373
Chris@23 374 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
Chris@23 375 m_kernelValue.clear();
Chris@23 376 m_kernelFftIndex.clear();
Chris@23 377 m_kernelNoteIndex.clear();
Chris@23 378 int countNonzero = 0;
Chris@23 379 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
Chris@23 380 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
Chris@23 381 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 382 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
Chris@23 383 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 384 countNonzero++;
Chris@23 385 }
Chris@23 386 m_kernelFftIndex.push_back(iFFT);
Chris@23 387 m_kernelNoteIndex.push_back(iNote);
Chris@23 388 }
Chris@23 389 }
Chris@23 390 }
Chris@23 391 // cerr << "nonzero count : " << countNonzero << endl;
Chris@23 392 delete [] tempkernel;
Chris@35 393 /*
Chris@23 394 ofstream myfile;
Chris@23 395 myfile.open ("matrix.txt");
matthiasm@3 396 // myfile << "Writing this to a file.\n";
Chris@23 397 for (int i = 0; i < nNote * 84; ++i) {
Chris@23 398 myfile << m_dict[i] << endl;
Chris@23 399 }
matthiasm@3 400 myfile.close();
Chris@35 401 */
matthiasm@0 402 return true;
matthiasm@0 403 }
matthiasm@0 404
matthiasm@0 405 void
Chris@35 406 NNLSBase::reset()
matthiasm@0 407 {
Chris@23 408 if (debug_on) cerr << "--> reset";
matthiasm@4 409
matthiasm@0 410 // Clear buffers, reset stored values, etc
Chris@35 411 m_frameCount = 0;
matthiasm@42 412 // m_dictID = 0;
Chris@35 413 m_logSpectrum.clear();
Chris@23 414 m_meanTuning0 = 0;
Chris@23 415 m_meanTuning1 = 0;
Chris@23 416 m_meanTuning2 = 0;
Chris@23 417 m_localTuning0 = 0;
Chris@23 418 m_localTuning1 = 0;
Chris@23 419 m_localTuning2 = 0;
Chris@23 420 m_localTuning.clear();
matthiasm@0 421 }
matthiasm@0 422
Chris@35 423 void
Chris@35 424 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 425 {
Chris@35 426 m_frameCount++;
Chris@23 427 float *magnitude = new float[m_blockSize/2];
matthiasm@0 428
Chris@23 429 const float *fbuf = inputBuffers[0];
Chris@23 430 float energysum = 0;
Chris@23 431 // make magnitude
Chris@23 432 float maxmag = -10000;
Chris@23 433 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 434 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
Chris@23 435 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
Chris@23 436 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
Chris@23 437 if (m_rollon > 0) {
Chris@23 438 energysum += pow(magnitude[iBin],2);
Chris@23 439 }
Chris@23 440 }
matthiasm@14 441
Chris@23 442 float cumenergy = 0;
Chris@23 443 if (m_rollon > 0) {
Chris@23 444 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
Chris@23 445 cumenergy += pow(magnitude[iBin],2);
matthiasm@59 446 if (cumenergy < energysum * m_rollon / 100) magnitude[iBin-2] = 0;
Chris@23 447 else break;
Chris@23 448 }
Chris@23 449 }
matthiasm@17 450
Chris@23 451 if (maxmag < 2) {
Chris@23 452 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
Chris@23 453 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 454 magnitude[iBin] = 0;
Chris@23 455 }
Chris@23 456 }
matthiasm@4 457
Chris@23 458 // note magnitude mapping using pre-calculated matrix
Chris@23 459 float *nm = new float[nNote]; // note magnitude
Chris@23 460 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 461 nm[iNote] = 0; // initialise as 0
Chris@23 462 }
Chris@23 463 int binCount = 0;
Chris@23 464 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
Chris@23 465 // cerr << ".";
Chris@23 466 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
Chris@23 467 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
Chris@23 468 binCount++;
Chris@23 469 }
Chris@23 470 // cerr << nm[20];
Chris@23 471 // cerr << endl;
matthiasm@0 472
matthiasm@0 473
Chris@35 474 float one_over_N = 1.0/m_frameCount;
matthiasm@0 475 // update means of complex tuning variables
Chris@35 476 m_meanTuning0 *= float(m_frameCount-1)*one_over_N;
Chris@35 477 m_meanTuning1 *= float(m_frameCount-1)*one_over_N;
Chris@35 478 m_meanTuning2 *= float(m_frameCount-1)*one_over_N;
matthiasm@0 479
matthiasm@0 480 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
matthiasm@0 481 m_meanTuning0 += nm[iTone + 0]*one_over_N;
matthiasm@0 482 m_meanTuning1 += nm[iTone + 1]*one_over_N;
matthiasm@0 483 m_meanTuning2 += nm[iTone + 2]*one_over_N;
Chris@23 484 float ratioOld = 0.997;
matthiasm@3 485 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
matthiasm@3 486 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
matthiasm@3 487 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
matthiasm@0 488 }
matthiasm@0 489
matthiasm@0 490 // if (m_tuneLocal) {
Chris@23 491 // local tuning
Chris@23 492 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
Chris@23 493 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
Chris@23 494 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
Chris@23 495 m_localTuning.push_back(normalisedtuning);
matthiasm@0 496
Chris@23 497 Feature f1; // logfreqspec
Chris@23 498 f1.hasTimestamp = true;
matthiasm@0 499 f1.timestamp = timestamp;
Chris@23 500 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 501 f1.values.push_back(nm[iNote]);
Chris@23 502 }
matthiasm@0 503
matthiasm@0 504 // deletes
matthiasm@0 505 delete[] magnitude;
matthiasm@0 506 delete[] nm;
matthiasm@0 507
Chris@35 508 m_logSpectrum.push_back(f1); // remember note magnitude
matthiasm@0 509 }
matthiasm@0 510
Chris@35 511
Chris@35 512 #ifdef NOT_DEFINED
Chris@35 513
Chris@35 514 NNLSBase::FeatureSet
Chris@35 515 NNLSBase::getRemainingFeatures()
matthiasm@0 516 {
Chris@23 517 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 518 FeatureSet fsOut;
Chris@35 519 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23 520 int nChord = m_chordnames.size();
Chris@23 521 //
Chris@23 522 /** Calculate Tuning
Chris@23 523 calculate tuning from (using the angle of the complex number defined by the
Chris@23 524 cumulative mean real and imag values)
Chris@23 525 **/
Chris@23 526 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
Chris@23 527 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
Chris@23 528 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 529 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 530 int intShift = floor(normalisedtuning * 3);
Chris@23 531 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 532
Chris@23 533 char buffer0 [50];
matthiasm@1 534
Chris@23 535 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 536
Chris@23 537 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1 538
Chris@23 539 // push tuning to FeatureSet fsOut
Chris@23 540 Feature f0; // tuning
Chris@23 541 f0.hasTimestamp = true;
Chris@23 542 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
Chris@23 543 f0.label = buffer0;
Chris@23 544 fsOut[0].push_back(f0);
matthiasm@1 545
Chris@23 546 /** Tune Log-Frequency Spectrogram
Chris@23 547 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
Chris@23 548 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
Chris@23 549 **/
Chris@23 550 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 551
Chris@23 552 float tempValue = 0;
Chris@23 553 float dbThreshold = 0; // relative to the background spectrum
Chris@23 554 float thresh = pow(10,dbThreshold/20);
Chris@23 555 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23 556 int count = 0;
matthiasm@1 557
Chris@35 558 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@23 559 Feature f1 = *i;
Chris@23 560 Feature f2; // tuned log-frequency spectrum
Chris@23 561 f2.hasTimestamp = true;
Chris@23 562 f2.timestamp = f1.timestamp;
Chris@23 563 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1 564
mail@60 565 if (m_tuneLocal == 1.0) {
Chris@23 566 intShift = floor(m_localTuning[count] * 3);
Chris@23 567 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
Chris@23 568 }
matthiasm@1 569
Chris@23 570 // cerr << intShift << " " << intFactor << endl;
matthiasm@1 571
Chris@23 572 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
Chris@23 573 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
Chris@23 574 f2.values.push_back(tempValue);
Chris@23 575 }
matthiasm@1 576
Chris@23 577 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
Chris@23 578 vector<float> runningmean = SpecialConvolution(f2.values,hw);
Chris@23 579 vector<float> runningstd;
Chris@23 580 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
Chris@23 581 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
Chris@23 582 }
Chris@23 583 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
Chris@23 584 for (int i = 0; i < 256; i++) {
Chris@23 585 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 586 if (runningstd[i] > 0) {
Chris@23 587 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
mail@41 588 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 589 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
mail@41 590 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 591 }
Chris@23 592 if (f2.values[i] < 0) {
Chris@23 593 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 594 }
Chris@23 595 }
Chris@23 596 fsOut[2].push_back(f2);
Chris@23 597 count++;
Chris@23 598 }
Chris@23 599 cerr << "done." << endl;
matthiasm@1 600
Chris@23 601 /** Semitone spectrum and chromagrams
Chris@23 602 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 603 is inferred using a non-negative least squares algorithm.
Chris@23 604 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 605 bass and treble stacked onto each other).
Chris@23 606 **/
matthiasm@42 607 if (m_useNNLS == 0) {
Chris@23 608 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 609 } else {
Chris@23 610 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 611 }
matthiasm@13 612
matthiasm@1 613
Chris@23 614 vector<vector<float> > chordogram;
Chris@23 615 vector<vector<int> > scoreChordogram;
Chris@23 616 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
Chris@23 617 vector<float> oldchroma = vector<float>(12,0);
Chris@23 618 vector<float> oldbasschroma = vector<float>(12,0);
Chris@23 619 count = 0;
matthiasm@9 620
Chris@23 621 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
Chris@23 622 Feature f2 = *it; // logfreq spectrum
Chris@23 623 Feature f3; // semitone spectrum
Chris@23 624 Feature f4; // treble chromagram
Chris@23 625 Feature f5; // bass chromagram
Chris@23 626 Feature f6; // treble and bass chromagram
matthiasm@1 627
Chris@23 628 f3.hasTimestamp = true;
Chris@23 629 f3.timestamp = f2.timestamp;
matthiasm@1 630
Chris@23 631 f4.hasTimestamp = true;
Chris@23 632 f4.timestamp = f2.timestamp;
matthiasm@1 633
Chris@23 634 f5.hasTimestamp = true;
Chris@23 635 f5.timestamp = f2.timestamp;
matthiasm@1 636
Chris@23 637 f6.hasTimestamp = true;
Chris@23 638 f6.timestamp = f2.timestamp;
matthiasm@1 639
Chris@29 640 float b[256];
matthiasm@1 641
Chris@23 642 bool some_b_greater_zero = false;
Chris@23 643 float sumb = 0;
Chris@23 644 for (int i = 0; i < 256; i++) {
Chris@23 645 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
Chris@23 646 b[i] = f2.values[i];
Chris@23 647 sumb += b[i];
Chris@23 648 if (b[i] > 0) {
Chris@23 649 some_b_greater_zero = true;
Chris@23 650 }
Chris@23 651 }
matthiasm@1 652
Chris@23 653 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 654
Chris@23 655 vector<float> chroma = vector<float>(12, 0);
Chris@23 656 vector<float> basschroma = vector<float>(12, 0);
Chris@23 657 float currval;
Chris@23 658 unsigned iSemitone = 0;
matthiasm@1 659
Chris@23 660 if (some_b_greater_zero) {
matthiasm@42 661 if (m_useNNLS == 0) {
Chris@23 662 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 663 currval = 0;
Chris@23 664 currval += b[iNote + 1 + -1] * 0.5;
Chris@23 665 currval += b[iNote + 1 + 0] * 1.0;
Chris@23 666 currval += b[iNote + 1 + 1] * 0.5;
Chris@23 667 f3.values.push_back(currval);
Chris@23 668 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 669 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 670 iSemitone++;
Chris@23 671 }
matthiasm@1 672
Chris@23 673 } else {
Chris@29 674 float x[84+1000];
Chris@23 675 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 676 vector<int> signifIndex;
Chris@23 677 int index=0;
Chris@23 678 sumb /= 84.0;
Chris@23 679 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 680 float currval = 0;
Chris@23 681 currval += b[iNote + 1 + -1];
Chris@23 682 currval += b[iNote + 1 + 0];
Chris@23 683 currval += b[iNote + 1 + 1];
Chris@23 684 if (currval > 0) signifIndex.push_back(index);
Chris@23 685 f3.values.push_back(0); // fill the values, change later
Chris@23 686 index++;
Chris@23 687 }
Chris@29 688 float rnorm;
Chris@29 689 float w[84+1000];
Chris@29 690 float zz[84+1000];
Chris@23 691 int indx[84+1000];
Chris@23 692 int mode;
Chris@23 693 int dictsize = 256*signifIndex.size();
Chris@23 694 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@29 695 float *curr_dict = new float[dictsize];
Chris@23 696 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 697 for (unsigned iBin = 0; iBin < 256; iBin++) {
Chris@23 698 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
Chris@23 699 }
Chris@23 700 }
Chris@29 701 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 702 delete [] curr_dict;
Chris@23 703 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 704 f3.values[signifIndex[iNote]] = x[iNote];
Chris@23 705 // cerr << mode << endl;
Chris@23 706 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 707 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 708 }
Chris@23 709 }
Chris@23 710 }
matthiasm@13 711
matthiasm@10 712
matthiasm@12 713
matthiasm@13 714
Chris@23 715 f4.values = chroma;
Chris@23 716 f5.values = basschroma;
Chris@23 717 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
Chris@23 718 f6.values = chroma;
matthiasm@1 719
Chris@23 720 if (m_doNormalizeChroma > 0) {
Chris@23 721 vector<float> chromanorm = vector<float>(3,0);
Chris@23 722 switch (int(m_doNormalizeChroma)) {
Chris@23 723 case 0: // should never end up here
Chris@23 724 break;
Chris@23 725 case 1:
Chris@23 726 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
Chris@23 727 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
Chris@23 728 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 729 break;
Chris@23 730 case 2:
Chris@23 731 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 732 chromanorm[0] += *it;
Chris@23 733 }
Chris@23 734 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 735 chromanorm[1] += *it;
Chris@23 736 }
Chris@23 737 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 738 chromanorm[2] += *it;
Chris@23 739 }
Chris@23 740 break;
Chris@23 741 case 3:
Chris@23 742 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 743 chromanorm[0] += pow(*it,2);
Chris@23 744 }
Chris@23 745 chromanorm[0] = sqrt(chromanorm[0]);
Chris@23 746 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 747 chromanorm[1] += pow(*it,2);
Chris@23 748 }
Chris@23 749 chromanorm[1] = sqrt(chromanorm[1]);
Chris@23 750 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 751 chromanorm[2] += pow(*it,2);
Chris@23 752 }
Chris@23 753 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 754 break;
Chris@23 755 }
Chris@23 756 if (chromanorm[0] > 0) {
Chris@23 757 for (int i = 0; i < f4.values.size(); i++) {
Chris@23 758 f4.values[i] /= chromanorm[0];
Chris@23 759 }
Chris@23 760 }
Chris@23 761 if (chromanorm[1] > 0) {
Chris@23 762 for (int i = 0; i < f5.values.size(); i++) {
Chris@23 763 f5.values[i] /= chromanorm[1];
Chris@23 764 }
Chris@23 765 }
Chris@23 766 if (chromanorm[2] > 0) {
Chris@23 767 for (int i = 0; i < f6.values.size(); i++) {
Chris@23 768 f6.values[i] /= chromanorm[2];
Chris@23 769 }
Chris@23 770 }
matthiasm@13 771
Chris@23 772 }
matthiasm@13 773
Chris@23 774 // local chord estimation
Chris@23 775 vector<float> currentChordSalience;
Chris@23 776 float tempchordvalue = 0;
Chris@23 777 float sumchordvalue = 0;
matthiasm@9 778
Chris@23 779 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 780 tempchordvalue = 0;
Chris@23 781 for (int iBin = 0; iBin < 12; iBin++) {
Chris@23 782 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 783 }
Chris@23 784 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 785 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 786 }
Chris@23 787 sumchordvalue+=tempchordvalue;
Chris@23 788 currentChordSalience.push_back(tempchordvalue);
Chris@23 789 }
Chris@23 790 if (sumchordvalue > 0) {
Chris@23 791 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 792 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 793 }
Chris@23 794 } else {
Chris@23 795 currentChordSalience[nChord-1] = 1.0;
Chris@23 796 }
Chris@23 797 chordogram.push_back(currentChordSalience);
matthiasm@1 798
Chris@23 799 fsOut[3].push_back(f3);
Chris@23 800 fsOut[4].push_back(f4);
Chris@23 801 fsOut[5].push_back(f5);
Chris@23 802 fsOut[6].push_back(f6);
Chris@23 803 count++;
Chris@23 804 }
Chris@23 805 cerr << "done." << endl;
matthiasm@13 806
matthiasm@10 807
Chris@23 808 /* Simple chord estimation
Chris@23 809 I just take the local chord estimates ("currentChordSalience") and average them over time, then
Chris@23 810 take the maximum. Very simple, don't do this at home...
Chris@23 811 */
Chris@23 812 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
Chris@23 813 count = 0;
Chris@23 814 int halfwindowlength = m_inputSampleRate / m_stepSize;
Chris@23 815 vector<int> chordSequence;
Chris@23 816 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
Chris@23 817 vector<int> temp = vector<int>(nChord,0);
Chris@23 818 scoreChordogram.push_back(temp);
Chris@23 819 }
Chris@23 820 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
Chris@23 821 int startIndex = count + 1;
Chris@23 822 int endIndex = count + 2 * halfwindowlength;
matthiasm@10 823
Chris@23 824 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@10 825
Chris@23 826 vector<int> chordCandidates;
Chris@23 827 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
Chris@23 828 // float currsum = 0;
Chris@23 829 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 830 // currsum += chordogram[iFrame][iChord];
Chris@23 831 // }
Chris@23 832 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
Chris@23 833 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 834 if (chordogram[iFrame][iChord] > chordThreshold) {
Chris@23 835 chordCandidates.push_back(iChord);
Chris@23 836 break;
Chris@23 837 }
Chris@23 838 }
Chris@23 839 }
Chris@23 840 chordCandidates.push_back(nChord-1);
Chris@23 841 // cerr << chordCandidates.size() << endl;
Chris@23 842
Chris@23 843 float maxval = 0; // will be the value of the most salient *chord change* in this frame
Chris@23 844 float maxindex = 0; //... and the index thereof
Chris@23 845 unsigned bestchordL = nChord-1; // index of the best "left" chord
Chris@23 846 unsigned bestchordR = nChord-1; // index of the best "right" chord
Chris@23 847
Chris@23 848 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
Chris@23 849 // now find the max values on both sides of iWF
Chris@23 850 // left side:
Chris@23 851 float maxL = 0;
Chris@23 852 unsigned maxindL = nChord-1;
Chris@23 853 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 854 unsigned iChord = chordCandidates[kChord];
Chris@23 855 float currsum = 0;
Chris@23 856 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
Chris@23 857 currsum += chordogram[count+iFrame][iChord];
matthiasm@10 858 }
Chris@23 859 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 860 if (currsum > maxL) {
Chris@23 861 maxL = currsum;
Chris@23 862 maxindL = iChord;
Chris@23 863 }
Chris@23 864 }
Chris@23 865 // right side:
Chris@23 866 float maxR = 0;
Chris@23 867 unsigned maxindR = nChord-1;
Chris@23 868 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 869 unsigned iChord = chordCandidates[kChord];
Chris@23 870 float currsum = 0;
Chris@23 871 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 872 currsum += chordogram[count+iFrame][iChord];
Chris@23 873 }
Chris@23 874 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 875 if (currsum > maxR) {
Chris@23 876 maxR = currsum;
Chris@23 877 maxindR = iChord;
Chris@23 878 }
Chris@23 879 }
Chris@23 880 if (maxL+maxR > maxval) {
Chris@23 881 maxval = maxL+maxR;
Chris@23 882 maxindex = iWF;
Chris@23 883 bestchordL = maxindL;
Chris@23 884 bestchordR = maxindR;
Chris@23 885 }
matthiasm@3 886
Chris@23 887 }
Chris@23 888 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
Chris@23 889 // add a score to every chord-frame-point that was part of a maximum
Chris@23 890 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
Chris@23 891 scoreChordogram[iFrame+count][bestchordL]++;
Chris@23 892 }
Chris@23 893 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 894 scoreChordogram[iFrame+count][bestchordR]++;
Chris@23 895 }
Chris@23 896 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
Chris@23 897 count++;
Chris@23 898 }
Chris@23 899 // cerr << "******* agent finished *******" << endl;
Chris@23 900 count = 0;
Chris@23 901 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 902 float maxval = 0; // will be the value of the most salient chord in this frame
Chris@23 903 float maxindex = 0; //... and the index thereof
Chris@23 904 for (unsigned iChord = 0; iChord < nChord; iChord++) {
Chris@23 905 if (scoreChordogram[count][iChord] > maxval) {
Chris@23 906 maxval = scoreChordogram[count][iChord];
Chris@23 907 maxindex = iChord;
Chris@23 908 // cerr << iChord << endl;
Chris@23 909 }
Chris@23 910 }
Chris@23 911 chordSequence.push_back(maxindex);
Chris@23 912 // cerr << "before modefilter, maxindex: " << maxindex << endl;
Chris@23 913 count++;
Chris@23 914 }
Chris@23 915 // cerr << "******* mode filter done *******" << endl;
matthiasm@10 916
matthiasm@3 917
Chris@23 918 // mode filter on chordSequence
Chris@23 919 count = 0;
Chris@23 920 string oldChord = "";
Chris@23 921 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 922 Feature f6 = *it;
Chris@23 923 Feature f7; // chord estimate
Chris@23 924 f7.hasTimestamp = true;
Chris@23 925 f7.timestamp = f6.timestamp;
Chris@23 926 Feature f8; // chord estimate
Chris@23 927 f8.hasTimestamp = true;
Chris@23 928 f8.timestamp = f6.timestamp;
matthiasm@17 929
Chris@23 930 vector<int> chordCount = vector<int>(nChord,0);
Chris@23 931 int maxChordCount = 0;
Chris@23 932 int maxChordIndex = nChord-1;
Chris@23 933 string maxChord;
Chris@23 934 int startIndex = max(count - halfwindowlength/2,0);
Chris@23 935 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
Chris@23 936 for (int i = startIndex; i < endIndex; i++) {
Chris@23 937 chordCount[chordSequence[i]]++;
Chris@23 938 if (chordCount[chordSequence[i]] > maxChordCount) {
Chris@23 939 // cerr << "start index " << startIndex << endl;
Chris@23 940 maxChordCount++;
Chris@23 941 maxChordIndex = chordSequence[i];
Chris@23 942 maxChord = m_chordnames[maxChordIndex];
Chris@23 943 }
Chris@23 944 }
Chris@23 945 // chordSequence[count] = maxChordIndex;
Chris@23 946 // cerr << maxChordIndex << endl;
Chris@23 947 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
Chris@23 948 // cerr << chordchange[count] << endl;
Chris@23 949 fsOut[9].push_back(f8);
Chris@23 950 if (oldChord != maxChord) {
Chris@23 951 oldChord = maxChord;
matthiasm@3 952
Chris@23 953 // char buffer1 [50];
Chris@23 954 // if (maxChordIndex < nChord - 1) {
Chris@23 955 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
Chris@23 956 // } else {
Chris@23 957 // sprintf(buffer1, "N");
Chris@23 958 // }
Chris@23 959 // f7.label = buffer1;
Chris@23 960 f7.label = m_chordnames[maxChordIndex];
Chris@23 961 fsOut[7].push_back(f7);
Chris@23 962 }
Chris@23 963 count++;
Chris@23 964 }
Chris@23 965 Feature f7; // last chord estimate
Chris@23 966 f7.hasTimestamp = true;
Chris@23 967 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
Chris@23 968 f7.label = "N";
Chris@23 969 fsOut[7].push_back(f7);
Chris@23 970 cerr << "done." << endl;
Chris@23 971 // // musicity
Chris@23 972 // count = 0;
Chris@23 973 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
Chris@23 974 // vector<float> musicityValue;
Chris@23 975 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 976 // Feature f4 = *it;
Chris@23 977 //
Chris@23 978 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 979 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 980 // float chromasum = 0;
Chris@23 981 // float diffsum = 0;
Chris@23 982 // for (int k = 0; k < 12; k++) {
Chris@23 983 // for (int i = startIndex + 1; i < endIndex; i++) {
Chris@23 984 // chromasum += pow(fsOut[4][i].values[k],2);
Chris@23 985 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
Chris@23 986 // }
Chris@23 987 // }
Chris@23 988 // diffsum /= chromasum;
Chris@23 989 // musicityValue.push_back(diffsum);
Chris@23 990 // count++;
Chris@23 991 // }
Chris@23 992 //
Chris@23 993 // float musicityThreshold = 0.44;
Chris@23 994 // if (m_stepSize == 4096) {
Chris@23 995 // musicityThreshold = 0.74;
Chris@23 996 // }
Chris@23 997 // if (m_stepSize == 4410) {
Chris@23 998 // musicityThreshold = 0.77;
Chris@23 999 // }
Chris@23 1000 //
Chris@23 1001 // count = 0;
Chris@23 1002 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 1003 // Feature f4 = *it;
Chris@23 1004 // Feature f8; // musicity
Chris@23 1005 // Feature f9; // musicity segmenter
Chris@23 1006 //
Chris@23 1007 // f8.hasTimestamp = true;
Chris@23 1008 // f8.timestamp = f4.timestamp;
Chris@23 1009 // f9.hasTimestamp = true;
Chris@23 1010 // f9.timestamp = f4.timestamp;
Chris@23 1011 //
Chris@23 1012 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 1013 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 1014 // int musicityCount = 0;
Chris@23 1015 // for (int i = startIndex; i <= endIndex; i++) {
Chris@23 1016 // if (musicityValue[i] > musicityThreshold) musicityCount++;
Chris@23 1017 // }
Chris@23 1018 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
Chris@23 1019 //
Chris@23 1020 // if (isSpeech) {
Chris@23 1021 // if (oldlabeltype != 2) {
Chris@23 1022 // f9.label = "Speech";
Chris@23 1023 // fsOut[9].push_back(f9);
Chris@23 1024 // oldlabeltype = 2;
Chris@23 1025 // }
Chris@23 1026 // } else {
Chris@23 1027 // if (oldlabeltype != 1) {
Chris@23 1028 // f9.label = "Music";
Chris@23 1029 // fsOut[9].push_back(f9);
Chris@23 1030 // oldlabeltype = 1;
Chris@23 1031 // }
Chris@23 1032 // }
Chris@23 1033 // f8.values.push_back(musicityValue[count]);
Chris@23 1034 // fsOut[8].push_back(f8);
Chris@23 1035 // count++;
Chris@23 1036 // }
Chris@23 1037 return fsOut;
matthiasm@0 1038
matthiasm@0 1039 }
matthiasm@0 1040
Chris@35 1041 #endif