annotate NNLSBase.cpp @ 42:d01f94d58ef0 matthiasm-plugin

* added new parameter that toggles NNLS
author matthiasm
date Sun, 24 Oct 2010 20:43:11 +0900
parents d6bb9b43ac1c
children 6e76c7710fa1
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@35 19 #include "NNLSBase.h"
Chris@27 20
Chris@27 21 #include "chromamethods.h"
Chris@27 22
Chris@27 23 #include <cstdlib>
Chris@27 24 #include <fstream>
matthiasm@0 25 #include <cmath>
matthiasm@9 26
Chris@27 27 #include <algorithm>
matthiasm@0 28
matthiasm@0 29 const bool debug_on = false;
matthiasm@0 30
Chris@27 31 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0 32
Chris@35 33 NNLSBase::NNLSBase(float inputSampleRate) :
Chris@23 34 Plugin(inputSampleRate),
Chris@35 35 m_logSpectrum(0),
Chris@23 36 m_blockSize(0),
Chris@23 37 m_stepSize(0),
Chris@23 38 m_lengthOfNoteIndex(0),
Chris@23 39 m_meanTuning0(0),
Chris@23 40 m_meanTuning1(0),
Chris@23 41 m_meanTuning2(0),
Chris@23 42 m_localTuning0(0),
Chris@23 43 m_localTuning1(0),
Chris@23 44 m_localTuning2(0),
mail@41 45 m_whitening(1.0),
Chris@23 46 m_preset(0.0),
Chris@23 47 m_localTuning(0),
Chris@23 48 m_kernelValue(0),
Chris@23 49 m_kernelFftIndex(0),
Chris@23 50 m_kernelNoteIndex(0),
Chris@23 51 m_dict(0),
Chris@23 52 m_tuneLocal(false),
Chris@23 53 m_chorddict(0),
Chris@23 54 m_chordnames(0),
Chris@23 55 m_doNormalizeChroma(0),
mail@41 56 m_rollon(0.0),
matthiasm@42 57 m_s(0.7),
matthiasm@42 58 m_useNNLS(1)
matthiasm@0 59 {
Chris@35 60 if (debug_on) cerr << "--> NNLSBase" << endl;
matthiasm@7 61
Chris@23 62 // make the *note* dictionary matrix
Chris@23 63 m_dict = new float[nNote * 84];
Chris@23 64 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
mail@41 65 dictionaryMatrix(m_dict, 0.7);
matthiasm@7 66
Chris@23 67 // get the *chord* dictionary from file (if the file exists)
Chris@23 68 m_chordnames = chordDictionary(&m_chorddict);
matthiasm@0 69 }
matthiasm@0 70
matthiasm@0 71
Chris@35 72 NNLSBase::~NNLSBase()
matthiasm@0 73 {
Chris@35 74 if (debug_on) cerr << "--> ~NNLSBase" << endl;
Chris@23 75 delete [] m_dict;
matthiasm@0 76 }
matthiasm@0 77
matthiasm@0 78 string
Chris@35 79 NNLSBase::getMaker() const
matthiasm@0 80 {
Chris@23 81 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0 82 // Your name here
matthiasm@0 83 return "Matthias Mauch";
matthiasm@0 84 }
matthiasm@0 85
matthiasm@0 86 int
Chris@35 87 NNLSBase::getPluginVersion() const
matthiasm@0 88 {
Chris@23 89 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0 90 // Increment this each time you release a version that behaves
matthiasm@0 91 // differently from the previous one
matthiasm@0 92 return 1;
matthiasm@0 93 }
matthiasm@0 94
matthiasm@0 95 string
Chris@35 96 NNLSBase::getCopyright() const
matthiasm@0 97 {
Chris@23 98 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0 99 // This function is not ideally named. It does not necessarily
matthiasm@0 100 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0 101 // should indicate the terms under which it is distributed. For
matthiasm@0 102 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@35 103 return "GPL";
matthiasm@0 104 }
matthiasm@0 105
Chris@35 106 NNLSBase::InputDomain
Chris@35 107 NNLSBase::getInputDomain() const
matthiasm@0 108 {
Chris@23 109 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0 110 return FrequencyDomain;
matthiasm@0 111 }
matthiasm@0 112
matthiasm@0 113 size_t
Chris@35 114 NNLSBase::getPreferredBlockSize() const
matthiasm@0 115 {
Chris@23 116 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0 117 return 16384; // 0 means "I can handle any block size"
matthiasm@0 118 }
matthiasm@0 119
matthiasm@0 120 size_t
Chris@35 121 NNLSBase::getPreferredStepSize() const
matthiasm@0 122 {
Chris@23 123 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0 124 return 2048; // 0 means "anything sensible"; in practice this
Chris@23 125 // means the same as the block size for TimeDomain
Chris@23 126 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0 127 }
matthiasm@0 128
matthiasm@0 129 size_t
Chris@35 130 NNLSBase::getMinChannelCount() const
matthiasm@0 131 {
Chris@23 132 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0 133 return 1;
matthiasm@0 134 }
matthiasm@0 135
matthiasm@0 136 size_t
Chris@35 137 NNLSBase::getMaxChannelCount() const
matthiasm@0 138 {
Chris@23 139 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0 140 return 1;
matthiasm@0 141 }
matthiasm@0 142
Chris@35 143 NNLSBase::ParameterList
Chris@35 144 NNLSBase::getParameterDescriptors() const
matthiasm@0 145 {
Chris@23 146 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0 147 ParameterList list;
matthiasm@0 148
matthiasm@42 149 ParameterDescriptor d;
matthiasm@42 150 d.identifier = "useNNLS";
matthiasm@42 151 d.name = "use approximate transcription (NNLS)";
matthiasm@42 152 d.description = "Toggles approximate transcription (NNLS).";
matthiasm@42 153 d.unit = "";
matthiasm@42 154 d.minValue = 0.0;
matthiasm@42 155 d.maxValue = 1.0;
matthiasm@42 156 d.defaultValue = 1.0;
matthiasm@42 157 d.isQuantized = true;
matthiasm@42 158 d.quantizeStep = 1.0;
matthiasm@42 159 list.push_back(d);
matthiasm@42 160
mail@41 161 ParameterDescriptor d0;
mail@41 162 d0.identifier = "rollon";
mail@41 163 d0.name = "spectral roll-on";
mail@41 164 d0.description = "The bins below the spectral roll-on quantile will be set to 0.";
mail@41 165 d0.unit = "";
mail@41 166 d0.minValue = 0;
mail@41 167 d0.maxValue = 0.05;
mail@41 168 d0.defaultValue = 0;
mail@41 169 d0.isQuantized = false;
mail@41 170 list.push_back(d0);
matthiasm@4 171
matthiasm@4 172 ParameterDescriptor d1;
matthiasm@4 173 d1.identifier = "tuningmode";
matthiasm@4 174 d1.name = "tuning mode";
matthiasm@4 175 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@4 176 d1.unit = "";
matthiasm@4 177 d1.minValue = 0;
matthiasm@4 178 d1.maxValue = 1;
matthiasm@4 179 d1.defaultValue = 0;
matthiasm@4 180 d1.isQuantized = true;
matthiasm@4 181 d1.valueNames.push_back("global tuning");
matthiasm@4 182 d1.valueNames.push_back("local tuning");
matthiasm@4 183 d1.quantizeStep = 1.0;
matthiasm@4 184 list.push_back(d1);
matthiasm@4 185
mail@41 186 ParameterDescriptor d2;
mail@41 187 d2.identifier = "whitening";
mail@41 188 d2.name = "spectral whitening";
mail@41 189 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
mail@41 190 d2.unit = "";
mail@41 191 d2.isQuantized = true;
mail@41 192 d2.minValue = 0.0;
mail@41 193 d2.maxValue = 1.0;
mail@41 194 d2.defaultValue = 1.0;
mail@41 195 d2.isQuantized = false;
mail@41 196 list.push_back(d2);
mail@41 197
mail@41 198 ParameterDescriptor d3;
mail@41 199 d3.identifier = "s";
mail@41 200 d3.name = "spectral shape";
mail@41 201 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
mail@41 202 d3.unit = "";
mail@41 203 d3.minValue = 0.5;
mail@41 204 d3.maxValue = 0.9;
mail@41 205 d3.defaultValue = 0.7;
mail@41 206 d3.isQuantized = false;
mail@41 207 list.push_back(d3);
mail@41 208
Chris@23 209 ParameterDescriptor d4;
matthiasm@12 210 d4.identifier = "chromanormalize";
matthiasm@12 211 d4.name = "chroma normalization";
matthiasm@12 212 d4.description = "How shall the chroma vector be normalized?";
matthiasm@12 213 d4.unit = "";
matthiasm@12 214 d4.minValue = 0;
matthiasm@13 215 d4.maxValue = 3;
matthiasm@12 216 d4.defaultValue = 0;
matthiasm@12 217 d4.isQuantized = true;
matthiasm@13 218 d4.valueNames.push_back("none");
matthiasm@13 219 d4.valueNames.push_back("maximum norm");
Chris@23 220 d4.valueNames.push_back("L1 norm");
Chris@23 221 d4.valueNames.push_back("L2 norm");
matthiasm@12 222 d4.quantizeStep = 1.0;
matthiasm@12 223 list.push_back(d4);
matthiasm@4 224
matthiasm@0 225 return list;
matthiasm@0 226 }
matthiasm@0 227
matthiasm@0 228 float
Chris@35 229 NNLSBase::getParameter(string identifier) const
matthiasm@0 230 {
Chris@23 231 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@42 232 if (identifier == "useNNLS") {
matthiasm@42 233 return m_useNNLS;
matthiasm@0 234 }
matthiasm@0 235
mail@41 236 if (identifier == "whitening") {
mail@41 237 return m_whitening;
mail@41 238 }
mail@41 239
mail@41 240 if (identifier == "s") {
mail@41 241 return m_s;
matthiasm@0 242 }
matthiasm@17 243
Chris@23 244 if (identifier == "rollon") {
matthiasm@17 245 return m_rollon;
matthiasm@17 246 }
matthiasm@0 247
matthiasm@0 248 if (identifier == "tuningmode") {
matthiasm@0 249 if (m_tuneLocal) {
matthiasm@0 250 return 1.0;
matthiasm@0 251 } else {
matthiasm@0 252 return 0.0;
matthiasm@0 253 }
matthiasm@0 254 }
Chris@23 255 if (identifier == "preset") {
Chris@23 256 return m_preset;
matthiasm@3 257 }
Chris@23 258 if (identifier == "chromanormalize") {
Chris@23 259 return m_doNormalizeChroma;
matthiasm@12 260 }
matthiasm@0 261 return 0;
matthiasm@0 262
matthiasm@0 263 }
matthiasm@0 264
matthiasm@0 265 void
Chris@35 266 NNLSBase::setParameter(string identifier, float value)
matthiasm@0 267 {
Chris@23 268 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@42 269 if (identifier == "useNNLS") {
matthiasm@42 270 m_useNNLS = (int) value;
matthiasm@0 271 }
matthiasm@0 272
mail@41 273 if (identifier == "whitening") {
mail@41 274 m_whitening = value;
matthiasm@0 275 }
matthiasm@0 276
mail@41 277 if (identifier == "s") {
mail@41 278 m_s = value;
mail@41 279 }
mail@41 280
matthiasm@0 281 if (identifier == "tuningmode") {
matthiasm@0 282 m_tuneLocal = (value > 0) ? true : false;
matthiasm@0 283 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0 284 }
matthiasm@42 285 // if (identifier == "preset") {
matthiasm@42 286 // m_preset = value;
matthiasm@42 287 // if (m_preset == 0.0) {
matthiasm@42 288 // m_tuneLocal = false;
matthiasm@42 289 // m_whitening = 1.0;
matthiasm@42 290 // m_dictID = 0.0;
matthiasm@42 291 // }
matthiasm@42 292 // if (m_preset == 1.0) {
matthiasm@42 293 // m_tuneLocal = false;
matthiasm@42 294 // m_whitening = 1.0;
matthiasm@42 295 // m_dictID = 1.0;
matthiasm@42 296 // }
matthiasm@42 297 // if (m_preset == 2.0) {
matthiasm@42 298 // m_tuneLocal = false;
matthiasm@42 299 // m_whitening = 0.7;
matthiasm@42 300 // m_dictID = 0.0;
matthiasm@42 301 // }
matthiasm@42 302 // }
Chris@23 303 if (identifier == "chromanormalize") {
Chris@23 304 m_doNormalizeChroma = value;
Chris@23 305 }
matthiasm@17 306
Chris@23 307 if (identifier == "rollon") {
Chris@23 308 m_rollon = value;
Chris@23 309 }
matthiasm@0 310 }
matthiasm@0 311
Chris@35 312 NNLSBase::ProgramList
Chris@35 313 NNLSBase::getPrograms() const
matthiasm@0 314 {
Chris@23 315 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0 316 ProgramList list;
matthiasm@0 317
matthiasm@0 318 // If you have no programs, return an empty list (or simply don't
matthiasm@0 319 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0 320
matthiasm@0 321 return list;
matthiasm@0 322 }
matthiasm@0 323
matthiasm@0 324 string
Chris@35 325 NNLSBase::getCurrentProgram() const
matthiasm@0 326 {
Chris@23 327 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0 328 return ""; // no programs
matthiasm@0 329 }
matthiasm@0 330
matthiasm@0 331 void
Chris@35 332 NNLSBase::selectProgram(string name)
matthiasm@0 333 {
Chris@23 334 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0 335 }
matthiasm@0 336
matthiasm@0 337
matthiasm@0 338 bool
Chris@35 339 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 340 {
Chris@23 341 if (debug_on) {
Chris@23 342 cerr << "--> initialise";
Chris@23 343 }
matthiasm@1 344
matthiasm@0 345 if (channels < getMinChannelCount() ||
matthiasm@0 346 channels > getMaxChannelCount()) return false;
matthiasm@0 347 m_blockSize = blockSize;
matthiasm@0 348 m_stepSize = stepSize;
Chris@35 349 m_frameCount = 0;
Chris@23 350 int tempn = 256 * m_blockSize/2;
Chris@23 351 // cerr << "length of tempkernel : " << tempn << endl;
Chris@23 352 float *tempkernel;
matthiasm@1 353
Chris@23 354 tempkernel = new float[tempn];
matthiasm@1 355
Chris@23 356 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
Chris@23 357 m_kernelValue.clear();
Chris@23 358 m_kernelFftIndex.clear();
Chris@23 359 m_kernelNoteIndex.clear();
Chris@23 360 int countNonzero = 0;
Chris@23 361 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
Chris@23 362 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
Chris@23 363 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 364 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
Chris@23 365 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 366 countNonzero++;
Chris@23 367 }
Chris@23 368 m_kernelFftIndex.push_back(iFFT);
Chris@23 369 m_kernelNoteIndex.push_back(iNote);
Chris@23 370 }
Chris@23 371 }
Chris@23 372 }
Chris@23 373 // cerr << "nonzero count : " << countNonzero << endl;
Chris@23 374 delete [] tempkernel;
Chris@35 375 /*
Chris@23 376 ofstream myfile;
Chris@23 377 myfile.open ("matrix.txt");
matthiasm@3 378 // myfile << "Writing this to a file.\n";
Chris@23 379 for (int i = 0; i < nNote * 84; ++i) {
Chris@23 380 myfile << m_dict[i] << endl;
Chris@23 381 }
matthiasm@3 382 myfile.close();
Chris@35 383 */
matthiasm@0 384 return true;
matthiasm@0 385 }
matthiasm@0 386
matthiasm@0 387 void
Chris@35 388 NNLSBase::reset()
matthiasm@0 389 {
Chris@23 390 if (debug_on) cerr << "--> reset";
matthiasm@4 391
matthiasm@0 392 // Clear buffers, reset stored values, etc
Chris@35 393 m_frameCount = 0;
matthiasm@42 394 // m_dictID = 0;
Chris@35 395 m_logSpectrum.clear();
Chris@23 396 m_meanTuning0 = 0;
Chris@23 397 m_meanTuning1 = 0;
Chris@23 398 m_meanTuning2 = 0;
Chris@23 399 m_localTuning0 = 0;
Chris@23 400 m_localTuning1 = 0;
Chris@23 401 m_localTuning2 = 0;
Chris@23 402 m_localTuning.clear();
matthiasm@0 403 }
matthiasm@0 404
Chris@35 405 void
Chris@35 406 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 407 {
Chris@35 408 m_frameCount++;
Chris@23 409 float *magnitude = new float[m_blockSize/2];
matthiasm@0 410
Chris@23 411 const float *fbuf = inputBuffers[0];
Chris@23 412 float energysum = 0;
Chris@23 413 // make magnitude
Chris@23 414 float maxmag = -10000;
Chris@23 415 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 416 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
Chris@23 417 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
Chris@23 418 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
Chris@23 419 if (m_rollon > 0) {
Chris@23 420 energysum += pow(magnitude[iBin],2);
Chris@23 421 }
Chris@23 422 }
matthiasm@14 423
Chris@23 424 float cumenergy = 0;
Chris@23 425 if (m_rollon > 0) {
Chris@23 426 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
Chris@23 427 cumenergy += pow(magnitude[iBin],2);
Chris@23 428 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
Chris@23 429 else break;
Chris@23 430 }
Chris@23 431 }
matthiasm@17 432
Chris@23 433 if (maxmag < 2) {
Chris@23 434 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
Chris@23 435 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 436 magnitude[iBin] = 0;
Chris@23 437 }
Chris@23 438 }
matthiasm@4 439
Chris@23 440 // note magnitude mapping using pre-calculated matrix
Chris@23 441 float *nm = new float[nNote]; // note magnitude
Chris@23 442 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 443 nm[iNote] = 0; // initialise as 0
Chris@23 444 }
Chris@23 445 int binCount = 0;
Chris@23 446 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
Chris@23 447 // cerr << ".";
Chris@23 448 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
Chris@23 449 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
Chris@23 450 binCount++;
Chris@23 451 }
Chris@23 452 // cerr << nm[20];
Chris@23 453 // cerr << endl;
matthiasm@0 454
matthiasm@0 455
Chris@35 456 float one_over_N = 1.0/m_frameCount;
matthiasm@0 457 // update means of complex tuning variables
Chris@35 458 m_meanTuning0 *= float(m_frameCount-1)*one_over_N;
Chris@35 459 m_meanTuning1 *= float(m_frameCount-1)*one_over_N;
Chris@35 460 m_meanTuning2 *= float(m_frameCount-1)*one_over_N;
matthiasm@0 461
matthiasm@0 462 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
matthiasm@0 463 m_meanTuning0 += nm[iTone + 0]*one_over_N;
matthiasm@0 464 m_meanTuning1 += nm[iTone + 1]*one_over_N;
matthiasm@0 465 m_meanTuning2 += nm[iTone + 2]*one_over_N;
Chris@23 466 float ratioOld = 0.997;
matthiasm@3 467 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
matthiasm@3 468 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
matthiasm@3 469 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
matthiasm@0 470 }
matthiasm@0 471
matthiasm@0 472 // if (m_tuneLocal) {
Chris@23 473 // local tuning
Chris@23 474 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
Chris@23 475 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
Chris@23 476 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
Chris@23 477 m_localTuning.push_back(normalisedtuning);
matthiasm@0 478
Chris@23 479 Feature f1; // logfreqspec
Chris@23 480 f1.hasTimestamp = true;
matthiasm@0 481 f1.timestamp = timestamp;
Chris@23 482 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 483 f1.values.push_back(nm[iNote]);
Chris@23 484 }
matthiasm@0 485
matthiasm@0 486 // deletes
matthiasm@0 487 delete[] magnitude;
matthiasm@0 488 delete[] nm;
matthiasm@0 489
Chris@35 490 m_logSpectrum.push_back(f1); // remember note magnitude
matthiasm@0 491 }
matthiasm@0 492
Chris@35 493
Chris@35 494 #ifdef NOT_DEFINED
Chris@35 495
Chris@35 496 NNLSBase::FeatureSet
Chris@35 497 NNLSBase::getRemainingFeatures()
matthiasm@0 498 {
Chris@23 499 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 500 FeatureSet fsOut;
Chris@35 501 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23 502 int nChord = m_chordnames.size();
Chris@23 503 //
Chris@23 504 /** Calculate Tuning
Chris@23 505 calculate tuning from (using the angle of the complex number defined by the
Chris@23 506 cumulative mean real and imag values)
Chris@23 507 **/
Chris@23 508 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
Chris@23 509 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
Chris@23 510 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 511 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 512 int intShift = floor(normalisedtuning * 3);
Chris@23 513 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 514
Chris@23 515 char buffer0 [50];
matthiasm@1 516
Chris@23 517 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 518
Chris@23 519 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1 520
Chris@23 521 // push tuning to FeatureSet fsOut
Chris@23 522 Feature f0; // tuning
Chris@23 523 f0.hasTimestamp = true;
Chris@23 524 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
Chris@23 525 f0.label = buffer0;
Chris@23 526 fsOut[0].push_back(f0);
matthiasm@1 527
Chris@23 528 /** Tune Log-Frequency Spectrogram
Chris@23 529 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
Chris@23 530 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
Chris@23 531 **/
Chris@23 532 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 533
Chris@23 534 float tempValue = 0;
Chris@23 535 float dbThreshold = 0; // relative to the background spectrum
Chris@23 536 float thresh = pow(10,dbThreshold/20);
Chris@23 537 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23 538 int count = 0;
matthiasm@1 539
Chris@35 540 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@23 541 Feature f1 = *i;
Chris@23 542 Feature f2; // tuned log-frequency spectrum
Chris@23 543 f2.hasTimestamp = true;
Chris@23 544 f2.timestamp = f1.timestamp;
Chris@23 545 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1 546
Chris@23 547 if (m_tuneLocal) {
Chris@23 548 intShift = floor(m_localTuning[count] * 3);
Chris@23 549 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
Chris@23 550 }
matthiasm@1 551
Chris@23 552 // cerr << intShift << " " << intFactor << endl;
matthiasm@1 553
Chris@23 554 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
Chris@23 555 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
Chris@23 556 f2.values.push_back(tempValue);
Chris@23 557 }
matthiasm@1 558
Chris@23 559 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
Chris@23 560 vector<float> runningmean = SpecialConvolution(f2.values,hw);
Chris@23 561 vector<float> runningstd;
Chris@23 562 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
Chris@23 563 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
Chris@23 564 }
Chris@23 565 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
Chris@23 566 for (int i = 0; i < 256; i++) {
Chris@23 567 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 568 if (runningstd[i] > 0) {
Chris@23 569 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
mail@41 570 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 571 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
mail@41 572 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23 573 }
Chris@23 574 if (f2.values[i] < 0) {
Chris@23 575 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 576 }
Chris@23 577 }
Chris@23 578 fsOut[2].push_back(f2);
Chris@23 579 count++;
Chris@23 580 }
Chris@23 581 cerr << "done." << endl;
matthiasm@1 582
Chris@23 583 /** Semitone spectrum and chromagrams
Chris@23 584 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 585 is inferred using a non-negative least squares algorithm.
Chris@23 586 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 587 bass and treble stacked onto each other).
Chris@23 588 **/
matthiasm@42 589 if (m_useNNLS == 0) {
Chris@23 590 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 591 } else {
Chris@23 592 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 593 }
matthiasm@13 594
matthiasm@1 595
Chris@23 596 vector<vector<float> > chordogram;
Chris@23 597 vector<vector<int> > scoreChordogram;
Chris@23 598 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
Chris@23 599 vector<float> oldchroma = vector<float>(12,0);
Chris@23 600 vector<float> oldbasschroma = vector<float>(12,0);
Chris@23 601 count = 0;
matthiasm@9 602
Chris@23 603 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
Chris@23 604 Feature f2 = *it; // logfreq spectrum
Chris@23 605 Feature f3; // semitone spectrum
Chris@23 606 Feature f4; // treble chromagram
Chris@23 607 Feature f5; // bass chromagram
Chris@23 608 Feature f6; // treble and bass chromagram
matthiasm@1 609
Chris@23 610 f3.hasTimestamp = true;
Chris@23 611 f3.timestamp = f2.timestamp;
matthiasm@1 612
Chris@23 613 f4.hasTimestamp = true;
Chris@23 614 f4.timestamp = f2.timestamp;
matthiasm@1 615
Chris@23 616 f5.hasTimestamp = true;
Chris@23 617 f5.timestamp = f2.timestamp;
matthiasm@1 618
Chris@23 619 f6.hasTimestamp = true;
Chris@23 620 f6.timestamp = f2.timestamp;
matthiasm@1 621
Chris@29 622 float b[256];
matthiasm@1 623
Chris@23 624 bool some_b_greater_zero = false;
Chris@23 625 float sumb = 0;
Chris@23 626 for (int i = 0; i < 256; i++) {
Chris@23 627 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
Chris@23 628 b[i] = f2.values[i];
Chris@23 629 sumb += b[i];
Chris@23 630 if (b[i] > 0) {
Chris@23 631 some_b_greater_zero = true;
Chris@23 632 }
Chris@23 633 }
matthiasm@1 634
Chris@23 635 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 636
Chris@23 637 vector<float> chroma = vector<float>(12, 0);
Chris@23 638 vector<float> basschroma = vector<float>(12, 0);
Chris@23 639 float currval;
Chris@23 640 unsigned iSemitone = 0;
matthiasm@1 641
Chris@23 642 if (some_b_greater_zero) {
matthiasm@42 643 if (m_useNNLS == 0) {
Chris@23 644 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 645 currval = 0;
Chris@23 646 currval += b[iNote + 1 + -1] * 0.5;
Chris@23 647 currval += b[iNote + 1 + 0] * 1.0;
Chris@23 648 currval += b[iNote + 1 + 1] * 0.5;
Chris@23 649 f3.values.push_back(currval);
Chris@23 650 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 651 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 652 iSemitone++;
Chris@23 653 }
matthiasm@1 654
Chris@23 655 } else {
Chris@29 656 float x[84+1000];
Chris@23 657 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 658 vector<int> signifIndex;
Chris@23 659 int index=0;
Chris@23 660 sumb /= 84.0;
Chris@23 661 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 662 float currval = 0;
Chris@23 663 currval += b[iNote + 1 + -1];
Chris@23 664 currval += b[iNote + 1 + 0];
Chris@23 665 currval += b[iNote + 1 + 1];
Chris@23 666 if (currval > 0) signifIndex.push_back(index);
Chris@23 667 f3.values.push_back(0); // fill the values, change later
Chris@23 668 index++;
Chris@23 669 }
Chris@29 670 float rnorm;
Chris@29 671 float w[84+1000];
Chris@29 672 float zz[84+1000];
Chris@23 673 int indx[84+1000];
Chris@23 674 int mode;
Chris@23 675 int dictsize = 256*signifIndex.size();
Chris@23 676 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@29 677 float *curr_dict = new float[dictsize];
Chris@23 678 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 679 for (unsigned iBin = 0; iBin < 256; iBin++) {
Chris@23 680 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
Chris@23 681 }
Chris@23 682 }
Chris@29 683 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 684 delete [] curr_dict;
Chris@23 685 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 686 f3.values[signifIndex[iNote]] = x[iNote];
Chris@23 687 // cerr << mode << endl;
Chris@23 688 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 689 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 690 }
Chris@23 691 }
Chris@23 692 }
matthiasm@13 693
matthiasm@10 694
matthiasm@12 695
matthiasm@13 696
Chris@23 697 f4.values = chroma;
Chris@23 698 f5.values = basschroma;
Chris@23 699 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
Chris@23 700 f6.values = chroma;
matthiasm@1 701
Chris@23 702 if (m_doNormalizeChroma > 0) {
Chris@23 703 vector<float> chromanorm = vector<float>(3,0);
Chris@23 704 switch (int(m_doNormalizeChroma)) {
Chris@23 705 case 0: // should never end up here
Chris@23 706 break;
Chris@23 707 case 1:
Chris@23 708 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
Chris@23 709 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
Chris@23 710 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 711 break;
Chris@23 712 case 2:
Chris@23 713 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 714 chromanorm[0] += *it;
Chris@23 715 }
Chris@23 716 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 717 chromanorm[1] += *it;
Chris@23 718 }
Chris@23 719 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 720 chromanorm[2] += *it;
Chris@23 721 }
Chris@23 722 break;
Chris@23 723 case 3:
Chris@23 724 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 725 chromanorm[0] += pow(*it,2);
Chris@23 726 }
Chris@23 727 chromanorm[0] = sqrt(chromanorm[0]);
Chris@23 728 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 729 chromanorm[1] += pow(*it,2);
Chris@23 730 }
Chris@23 731 chromanorm[1] = sqrt(chromanorm[1]);
Chris@23 732 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 733 chromanorm[2] += pow(*it,2);
Chris@23 734 }
Chris@23 735 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 736 break;
Chris@23 737 }
Chris@23 738 if (chromanorm[0] > 0) {
Chris@23 739 for (int i = 0; i < f4.values.size(); i++) {
Chris@23 740 f4.values[i] /= chromanorm[0];
Chris@23 741 }
Chris@23 742 }
Chris@23 743 if (chromanorm[1] > 0) {
Chris@23 744 for (int i = 0; i < f5.values.size(); i++) {
Chris@23 745 f5.values[i] /= chromanorm[1];
Chris@23 746 }
Chris@23 747 }
Chris@23 748 if (chromanorm[2] > 0) {
Chris@23 749 for (int i = 0; i < f6.values.size(); i++) {
Chris@23 750 f6.values[i] /= chromanorm[2];
Chris@23 751 }
Chris@23 752 }
matthiasm@13 753
Chris@23 754 }
matthiasm@13 755
Chris@23 756 // local chord estimation
Chris@23 757 vector<float> currentChordSalience;
Chris@23 758 float tempchordvalue = 0;
Chris@23 759 float sumchordvalue = 0;
matthiasm@9 760
Chris@23 761 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 762 tempchordvalue = 0;
Chris@23 763 for (int iBin = 0; iBin < 12; iBin++) {
Chris@23 764 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 765 }
Chris@23 766 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 767 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 768 }
Chris@23 769 sumchordvalue+=tempchordvalue;
Chris@23 770 currentChordSalience.push_back(tempchordvalue);
Chris@23 771 }
Chris@23 772 if (sumchordvalue > 0) {
Chris@23 773 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 774 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 775 }
Chris@23 776 } else {
Chris@23 777 currentChordSalience[nChord-1] = 1.0;
Chris@23 778 }
Chris@23 779 chordogram.push_back(currentChordSalience);
matthiasm@1 780
Chris@23 781 fsOut[3].push_back(f3);
Chris@23 782 fsOut[4].push_back(f4);
Chris@23 783 fsOut[5].push_back(f5);
Chris@23 784 fsOut[6].push_back(f6);
Chris@23 785 count++;
Chris@23 786 }
Chris@23 787 cerr << "done." << endl;
matthiasm@13 788
matthiasm@10 789
Chris@23 790 /* Simple chord estimation
Chris@23 791 I just take the local chord estimates ("currentChordSalience") and average them over time, then
Chris@23 792 take the maximum. Very simple, don't do this at home...
Chris@23 793 */
Chris@23 794 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
Chris@23 795 count = 0;
Chris@23 796 int halfwindowlength = m_inputSampleRate / m_stepSize;
Chris@23 797 vector<int> chordSequence;
Chris@23 798 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
Chris@23 799 vector<int> temp = vector<int>(nChord,0);
Chris@23 800 scoreChordogram.push_back(temp);
Chris@23 801 }
Chris@23 802 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
Chris@23 803 int startIndex = count + 1;
Chris@23 804 int endIndex = count + 2 * halfwindowlength;
matthiasm@10 805
Chris@23 806 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@10 807
Chris@23 808 vector<int> chordCandidates;
Chris@23 809 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
Chris@23 810 // float currsum = 0;
Chris@23 811 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 812 // currsum += chordogram[iFrame][iChord];
Chris@23 813 // }
Chris@23 814 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
Chris@23 815 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 816 if (chordogram[iFrame][iChord] > chordThreshold) {
Chris@23 817 chordCandidates.push_back(iChord);
Chris@23 818 break;
Chris@23 819 }
Chris@23 820 }
Chris@23 821 }
Chris@23 822 chordCandidates.push_back(nChord-1);
Chris@23 823 // cerr << chordCandidates.size() << endl;
Chris@23 824
Chris@23 825 float maxval = 0; // will be the value of the most salient *chord change* in this frame
Chris@23 826 float maxindex = 0; //... and the index thereof
Chris@23 827 unsigned bestchordL = nChord-1; // index of the best "left" chord
Chris@23 828 unsigned bestchordR = nChord-1; // index of the best "right" chord
Chris@23 829
Chris@23 830 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
Chris@23 831 // now find the max values on both sides of iWF
Chris@23 832 // left side:
Chris@23 833 float maxL = 0;
Chris@23 834 unsigned maxindL = nChord-1;
Chris@23 835 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 836 unsigned iChord = chordCandidates[kChord];
Chris@23 837 float currsum = 0;
Chris@23 838 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
Chris@23 839 currsum += chordogram[count+iFrame][iChord];
matthiasm@10 840 }
Chris@23 841 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 842 if (currsum > maxL) {
Chris@23 843 maxL = currsum;
Chris@23 844 maxindL = iChord;
Chris@23 845 }
Chris@23 846 }
Chris@23 847 // right side:
Chris@23 848 float maxR = 0;
Chris@23 849 unsigned maxindR = nChord-1;
Chris@23 850 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 851 unsigned iChord = chordCandidates[kChord];
Chris@23 852 float currsum = 0;
Chris@23 853 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 854 currsum += chordogram[count+iFrame][iChord];
Chris@23 855 }
Chris@23 856 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 857 if (currsum > maxR) {
Chris@23 858 maxR = currsum;
Chris@23 859 maxindR = iChord;
Chris@23 860 }
Chris@23 861 }
Chris@23 862 if (maxL+maxR > maxval) {
Chris@23 863 maxval = maxL+maxR;
Chris@23 864 maxindex = iWF;
Chris@23 865 bestchordL = maxindL;
Chris@23 866 bestchordR = maxindR;
Chris@23 867 }
matthiasm@3 868
Chris@23 869 }
Chris@23 870 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
Chris@23 871 // add a score to every chord-frame-point that was part of a maximum
Chris@23 872 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
Chris@23 873 scoreChordogram[iFrame+count][bestchordL]++;
Chris@23 874 }
Chris@23 875 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 876 scoreChordogram[iFrame+count][bestchordR]++;
Chris@23 877 }
Chris@23 878 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
Chris@23 879 count++;
Chris@23 880 }
Chris@23 881 // cerr << "******* agent finished *******" << endl;
Chris@23 882 count = 0;
Chris@23 883 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 884 float maxval = 0; // will be the value of the most salient chord in this frame
Chris@23 885 float maxindex = 0; //... and the index thereof
Chris@23 886 for (unsigned iChord = 0; iChord < nChord; iChord++) {
Chris@23 887 if (scoreChordogram[count][iChord] > maxval) {
Chris@23 888 maxval = scoreChordogram[count][iChord];
Chris@23 889 maxindex = iChord;
Chris@23 890 // cerr << iChord << endl;
Chris@23 891 }
Chris@23 892 }
Chris@23 893 chordSequence.push_back(maxindex);
Chris@23 894 // cerr << "before modefilter, maxindex: " << maxindex << endl;
Chris@23 895 count++;
Chris@23 896 }
Chris@23 897 // cerr << "******* mode filter done *******" << endl;
matthiasm@10 898
matthiasm@3 899
Chris@23 900 // mode filter on chordSequence
Chris@23 901 count = 0;
Chris@23 902 string oldChord = "";
Chris@23 903 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 904 Feature f6 = *it;
Chris@23 905 Feature f7; // chord estimate
Chris@23 906 f7.hasTimestamp = true;
Chris@23 907 f7.timestamp = f6.timestamp;
Chris@23 908 Feature f8; // chord estimate
Chris@23 909 f8.hasTimestamp = true;
Chris@23 910 f8.timestamp = f6.timestamp;
matthiasm@17 911
Chris@23 912 vector<int> chordCount = vector<int>(nChord,0);
Chris@23 913 int maxChordCount = 0;
Chris@23 914 int maxChordIndex = nChord-1;
Chris@23 915 string maxChord;
Chris@23 916 int startIndex = max(count - halfwindowlength/2,0);
Chris@23 917 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
Chris@23 918 for (int i = startIndex; i < endIndex; i++) {
Chris@23 919 chordCount[chordSequence[i]]++;
Chris@23 920 if (chordCount[chordSequence[i]] > maxChordCount) {
Chris@23 921 // cerr << "start index " << startIndex << endl;
Chris@23 922 maxChordCount++;
Chris@23 923 maxChordIndex = chordSequence[i];
Chris@23 924 maxChord = m_chordnames[maxChordIndex];
Chris@23 925 }
Chris@23 926 }
Chris@23 927 // chordSequence[count] = maxChordIndex;
Chris@23 928 // cerr << maxChordIndex << endl;
Chris@23 929 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
Chris@23 930 // cerr << chordchange[count] << endl;
Chris@23 931 fsOut[9].push_back(f8);
Chris@23 932 if (oldChord != maxChord) {
Chris@23 933 oldChord = maxChord;
matthiasm@3 934
Chris@23 935 // char buffer1 [50];
Chris@23 936 // if (maxChordIndex < nChord - 1) {
Chris@23 937 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
Chris@23 938 // } else {
Chris@23 939 // sprintf(buffer1, "N");
Chris@23 940 // }
Chris@23 941 // f7.label = buffer1;
Chris@23 942 f7.label = m_chordnames[maxChordIndex];
Chris@23 943 fsOut[7].push_back(f7);
Chris@23 944 }
Chris@23 945 count++;
Chris@23 946 }
Chris@23 947 Feature f7; // last chord estimate
Chris@23 948 f7.hasTimestamp = true;
Chris@23 949 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
Chris@23 950 f7.label = "N";
Chris@23 951 fsOut[7].push_back(f7);
Chris@23 952 cerr << "done." << endl;
Chris@23 953 // // musicity
Chris@23 954 // count = 0;
Chris@23 955 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
Chris@23 956 // vector<float> musicityValue;
Chris@23 957 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 958 // Feature f4 = *it;
Chris@23 959 //
Chris@23 960 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 961 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 962 // float chromasum = 0;
Chris@23 963 // float diffsum = 0;
Chris@23 964 // for (int k = 0; k < 12; k++) {
Chris@23 965 // for (int i = startIndex + 1; i < endIndex; i++) {
Chris@23 966 // chromasum += pow(fsOut[4][i].values[k],2);
Chris@23 967 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
Chris@23 968 // }
Chris@23 969 // }
Chris@23 970 // diffsum /= chromasum;
Chris@23 971 // musicityValue.push_back(diffsum);
Chris@23 972 // count++;
Chris@23 973 // }
Chris@23 974 //
Chris@23 975 // float musicityThreshold = 0.44;
Chris@23 976 // if (m_stepSize == 4096) {
Chris@23 977 // musicityThreshold = 0.74;
Chris@23 978 // }
Chris@23 979 // if (m_stepSize == 4410) {
Chris@23 980 // musicityThreshold = 0.77;
Chris@23 981 // }
Chris@23 982 //
Chris@23 983 // count = 0;
Chris@23 984 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 985 // Feature f4 = *it;
Chris@23 986 // Feature f8; // musicity
Chris@23 987 // Feature f9; // musicity segmenter
Chris@23 988 //
Chris@23 989 // f8.hasTimestamp = true;
Chris@23 990 // f8.timestamp = f4.timestamp;
Chris@23 991 // f9.hasTimestamp = true;
Chris@23 992 // f9.timestamp = f4.timestamp;
Chris@23 993 //
Chris@23 994 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 995 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 996 // int musicityCount = 0;
Chris@23 997 // for (int i = startIndex; i <= endIndex; i++) {
Chris@23 998 // if (musicityValue[i] > musicityThreshold) musicityCount++;
Chris@23 999 // }
Chris@23 1000 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
Chris@23 1001 //
Chris@23 1002 // if (isSpeech) {
Chris@23 1003 // if (oldlabeltype != 2) {
Chris@23 1004 // f9.label = "Speech";
Chris@23 1005 // fsOut[9].push_back(f9);
Chris@23 1006 // oldlabeltype = 2;
Chris@23 1007 // }
Chris@23 1008 // } else {
Chris@23 1009 // if (oldlabeltype != 1) {
Chris@23 1010 // f9.label = "Music";
Chris@23 1011 // fsOut[9].push_back(f9);
Chris@23 1012 // oldlabeltype = 1;
Chris@23 1013 // }
Chris@23 1014 // }
Chris@23 1015 // f8.values.push_back(musicityValue[count]);
Chris@23 1016 // fsOut[8].push_back(f8);
Chris@23 1017 // count++;
Chris@23 1018 // }
Chris@23 1019 return fsOut;
matthiasm@0 1020
matthiasm@0 1021 }
matthiasm@0 1022
Chris@35 1023 #endif