annotate NNLSBase.cpp @ 35:cf8898a0174c matthiasm-plugin

* Split out NNLSChroma plugin into three plugins (chroma, chordino, tuning) with a common base class. There's still quite a lot of duplication between the getRemainingFeatures functions. Also add copyright / copying headers, etc.
author Chris Cannam
date Fri, 22 Oct 2010 11:30:21 +0100
parents NNLSChroma.cpp@da3195577172
children d6bb9b43ac1c
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
Chris@35 3 /*
Chris@35 4 NNLS-Chroma / Chordino
Chris@35 5
Chris@35 6 Audio feature extraction plugins for chromagram and chord
Chris@35 7 estimation.
Chris@35 8
Chris@35 9 Centre for Digital Music, Queen Mary University of London.
Chris@35 10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35 11
Chris@35 12 This program is free software; you can redistribute it and/or
Chris@35 13 modify it under the terms of the GNU General Public License as
Chris@35 14 published by the Free Software Foundation; either version 2 of the
Chris@35 15 License, or (at your option) any later version. See the file
Chris@35 16 COPYING included with this distribution for more information.
Chris@35 17 */
Chris@35 18
Chris@35 19 #include "NNLSBase.h"
Chris@27 20
Chris@27 21 #include "chromamethods.h"
Chris@27 22
Chris@27 23 #include <cstdlib>
Chris@27 24 #include <fstream>
matthiasm@0 25 #include <cmath>
matthiasm@9 26
Chris@27 27 #include <algorithm>
matthiasm@0 28
matthiasm@0 29 const bool debug_on = false;
matthiasm@0 30
Chris@27 31 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0 32
Chris@35 33 NNLSBase::NNLSBase(float inputSampleRate) :
Chris@23 34 Plugin(inputSampleRate),
Chris@35 35 m_logSpectrum(0),
Chris@23 36 m_blockSize(0),
Chris@23 37 m_stepSize(0),
Chris@23 38 m_lengthOfNoteIndex(0),
Chris@23 39 m_meanTuning0(0),
Chris@23 40 m_meanTuning1(0),
Chris@23 41 m_meanTuning2(0),
Chris@23 42 m_localTuning0(0),
Chris@23 43 m_localTuning1(0),
Chris@23 44 m_localTuning2(0),
Chris@23 45 m_paling(1.0),
Chris@23 46 m_preset(0.0),
Chris@23 47 m_localTuning(0),
Chris@23 48 m_kernelValue(0),
Chris@23 49 m_kernelFftIndex(0),
Chris@23 50 m_kernelNoteIndex(0),
Chris@23 51 m_dict(0),
Chris@23 52 m_tuneLocal(false),
Chris@23 53 m_dictID(0),
Chris@23 54 m_chorddict(0),
Chris@23 55 m_chordnames(0),
Chris@23 56 m_doNormalizeChroma(0),
Chris@23 57 m_rollon(0.01)
matthiasm@0 58 {
Chris@35 59 if (debug_on) cerr << "--> NNLSBase" << endl;
matthiasm@7 60
Chris@23 61 // make the *note* dictionary matrix
Chris@23 62 m_dict = new float[nNote * 84];
Chris@23 63 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
Chris@23 64 dictionaryMatrix(m_dict);
matthiasm@7 65
Chris@23 66 // get the *chord* dictionary from file (if the file exists)
Chris@23 67 m_chordnames = chordDictionary(&m_chorddict);
matthiasm@0 68 }
matthiasm@0 69
matthiasm@0 70
Chris@35 71 NNLSBase::~NNLSBase()
matthiasm@0 72 {
Chris@35 73 if (debug_on) cerr << "--> ~NNLSBase" << endl;
Chris@23 74 delete [] m_dict;
matthiasm@0 75 }
matthiasm@0 76
matthiasm@0 77 string
Chris@35 78 NNLSBase::getMaker() const
matthiasm@0 79 {
Chris@23 80 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0 81 // Your name here
matthiasm@0 82 return "Matthias Mauch";
matthiasm@0 83 }
matthiasm@0 84
matthiasm@0 85 int
Chris@35 86 NNLSBase::getPluginVersion() const
matthiasm@0 87 {
Chris@23 88 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0 89 // Increment this each time you release a version that behaves
matthiasm@0 90 // differently from the previous one
matthiasm@0 91 return 1;
matthiasm@0 92 }
matthiasm@0 93
matthiasm@0 94 string
Chris@35 95 NNLSBase::getCopyright() const
matthiasm@0 96 {
Chris@23 97 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0 98 // This function is not ideally named. It does not necessarily
matthiasm@0 99 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0 100 // should indicate the terms under which it is distributed. For
matthiasm@0 101 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@35 102 return "GPL";
matthiasm@0 103 }
matthiasm@0 104
Chris@35 105 NNLSBase::InputDomain
Chris@35 106 NNLSBase::getInputDomain() const
matthiasm@0 107 {
Chris@23 108 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0 109 return FrequencyDomain;
matthiasm@0 110 }
matthiasm@0 111
matthiasm@0 112 size_t
Chris@35 113 NNLSBase::getPreferredBlockSize() const
matthiasm@0 114 {
Chris@23 115 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0 116 return 16384; // 0 means "I can handle any block size"
matthiasm@0 117 }
matthiasm@0 118
matthiasm@0 119 size_t
Chris@35 120 NNLSBase::getPreferredStepSize() const
matthiasm@0 121 {
Chris@23 122 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0 123 return 2048; // 0 means "anything sensible"; in practice this
Chris@23 124 // means the same as the block size for TimeDomain
Chris@23 125 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0 126 }
matthiasm@0 127
matthiasm@0 128 size_t
Chris@35 129 NNLSBase::getMinChannelCount() const
matthiasm@0 130 {
Chris@23 131 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0 132 return 1;
matthiasm@0 133 }
matthiasm@0 134
matthiasm@0 135 size_t
Chris@35 136 NNLSBase::getMaxChannelCount() const
matthiasm@0 137 {
Chris@23 138 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0 139 return 1;
matthiasm@0 140 }
matthiasm@0 141
Chris@35 142 NNLSBase::ParameterList
Chris@35 143 NNLSBase::getParameterDescriptors() const
matthiasm@0 144 {
Chris@23 145 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0 146 ParameterList list;
matthiasm@0 147
matthiasm@3 148 ParameterDescriptor d3;
matthiasm@3 149 d3.identifier = "preset";
matthiasm@3 150 d3.name = "preset";
matthiasm@3 151 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
matthiasm@3 152 d3.unit = "";
Chris@23 153 d3.isQuantized = true;
Chris@23 154 d3.quantizeStep = 1;
matthiasm@3 155 d3.minValue = 0.0;
matthiasm@4 156 d3.maxValue = 3.0;
matthiasm@3 157 d3.defaultValue = 0.0;
matthiasm@3 158 d3.valueNames.push_back("polyphonic pop");
Chris@23 159 d3.valueNames.push_back("polyphonic pop (fast)");
matthiasm@3 160 d3.valueNames.push_back("solo keyboard");
Chris@23 161 d3.valueNames.push_back("manual");
matthiasm@3 162 list.push_back(d3);
matthiasm@4 163
matthiasm@17 164 ParameterDescriptor d5;
Chris@23 165 d5.identifier = "rollon";
Chris@23 166 d5.name = "spectral roll-on";
Chris@23 167 d5.description = "The bins below the spectral roll-on quantile will be set to 0.";
Chris@23 168 d5.unit = "";
Chris@23 169 d5.minValue = 0;
Chris@23 170 d5.maxValue = 1;
Chris@23 171 d5.defaultValue = 0;
Chris@23 172 d5.isQuantized = false;
Chris@23 173 list.push_back(d5);
matthiasm@17 174
matthiasm@4 175 // ParameterDescriptor d0;
matthiasm@4 176 // d0.identifier = "notedict";
matthiasm@4 177 // d0.name = "note dictionary";
matthiasm@4 178 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
matthiasm@4 179 // d0.unit = "";
matthiasm@4 180 // d0.minValue = 0;
matthiasm@4 181 // d0.maxValue = 1;
matthiasm@4 182 // d0.defaultValue = 0;
matthiasm@4 183 // d0.isQuantized = true;
matthiasm@4 184 // d0.valueNames.push_back("s = 0.6");
matthiasm@4 185 // d0.valueNames.push_back("no NNLS");
matthiasm@4 186 // d0.quantizeStep = 1.0;
matthiasm@4 187 // list.push_back(d0);
matthiasm@4 188
matthiasm@4 189 ParameterDescriptor d1;
matthiasm@4 190 d1.identifier = "tuningmode";
matthiasm@4 191 d1.name = "tuning mode";
matthiasm@4 192 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@4 193 d1.unit = "";
matthiasm@4 194 d1.minValue = 0;
matthiasm@4 195 d1.maxValue = 1;
matthiasm@4 196 d1.defaultValue = 0;
matthiasm@4 197 d1.isQuantized = true;
matthiasm@4 198 d1.valueNames.push_back("global tuning");
matthiasm@4 199 d1.valueNames.push_back("local tuning");
matthiasm@4 200 d1.quantizeStep = 1.0;
matthiasm@4 201 list.push_back(d1);
matthiasm@4 202
Chris@23 203 // ParameterDescriptor d2;
Chris@23 204 // d2.identifier = "paling";
Chris@23 205 // d2.name = "spectral paling";
Chris@23 206 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
Chris@23 207 // d2.unit = "";
Chris@23 208 // d2.isQuantized = true;
Chris@23 209 // // d2.quantizeStep = 0.1;
Chris@23 210 // d2.minValue = 0.0;
Chris@23 211 // d2.maxValue = 1.0;
Chris@23 212 // d2.defaultValue = 1.0;
Chris@23 213 // d2.isQuantized = false;
Chris@23 214 // list.push_back(d2);
Chris@23 215 ParameterDescriptor d4;
matthiasm@12 216 d4.identifier = "chromanormalize";
matthiasm@12 217 d4.name = "chroma normalization";
matthiasm@12 218 d4.description = "How shall the chroma vector be normalized?";
matthiasm@12 219 d4.unit = "";
matthiasm@12 220 d4.minValue = 0;
matthiasm@13 221 d4.maxValue = 3;
matthiasm@12 222 d4.defaultValue = 0;
matthiasm@12 223 d4.isQuantized = true;
matthiasm@13 224 d4.valueNames.push_back("none");
matthiasm@13 225 d4.valueNames.push_back("maximum norm");
Chris@23 226 d4.valueNames.push_back("L1 norm");
Chris@23 227 d4.valueNames.push_back("L2 norm");
matthiasm@12 228 d4.quantizeStep = 1.0;
matthiasm@12 229 list.push_back(d4);
matthiasm@4 230
matthiasm@0 231 return list;
matthiasm@0 232 }
matthiasm@0 233
matthiasm@0 234 float
Chris@35 235 NNLSBase::getParameter(string identifier) const
matthiasm@0 236 {
Chris@23 237 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@0 238 if (identifier == "notedict") {
matthiasm@0 239 return m_dictID;
matthiasm@0 240 }
matthiasm@0 241
matthiasm@0 242 if (identifier == "paling") {
matthiasm@0 243 return m_paling;
matthiasm@0 244 }
matthiasm@17 245
Chris@23 246 if (identifier == "rollon") {
matthiasm@17 247 return m_rollon;
matthiasm@17 248 }
matthiasm@0 249
matthiasm@0 250 if (identifier == "tuningmode") {
matthiasm@0 251 if (m_tuneLocal) {
matthiasm@0 252 return 1.0;
matthiasm@0 253 } else {
matthiasm@0 254 return 0.0;
matthiasm@0 255 }
matthiasm@0 256 }
Chris@23 257 if (identifier == "preset") {
Chris@23 258 return m_preset;
matthiasm@3 259 }
Chris@23 260 if (identifier == "chromanormalize") {
Chris@23 261 return m_doNormalizeChroma;
matthiasm@12 262 }
matthiasm@0 263 return 0;
matthiasm@0 264
matthiasm@0 265 }
matthiasm@0 266
matthiasm@0 267 void
Chris@35 268 NNLSBase::setParameter(string identifier, float value)
matthiasm@0 269 {
Chris@23 270 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@0 271 if (identifier == "notedict") {
matthiasm@0 272 m_dictID = (int) value;
matthiasm@0 273 }
matthiasm@0 274
matthiasm@0 275 if (identifier == "paling") {
matthiasm@0 276 m_paling = value;
matthiasm@0 277 }
matthiasm@0 278
matthiasm@0 279 if (identifier == "tuningmode") {
matthiasm@0 280 m_tuneLocal = (value > 0) ? true : false;
matthiasm@0 281 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0 282 }
matthiasm@3 283 if (identifier == "preset") {
matthiasm@3 284 m_preset = value;
Chris@23 285 if (m_preset == 0.0) {
Chris@23 286 m_tuneLocal = false;
Chris@23 287 m_paling = 1.0;
Chris@23 288 m_dictID = 0.0;
Chris@23 289 }
Chris@23 290 if (m_preset == 1.0) {
Chris@23 291 m_tuneLocal = false;
Chris@23 292 m_paling = 1.0;
Chris@23 293 m_dictID = 1.0;
Chris@23 294 }
Chris@23 295 if (m_preset == 2.0) {
Chris@23 296 m_tuneLocal = false;
Chris@23 297 m_paling = 0.7;
Chris@23 298 m_dictID = 0.0;
Chris@23 299 }
matthiasm@3 300 }
Chris@23 301 if (identifier == "chromanormalize") {
Chris@23 302 m_doNormalizeChroma = value;
Chris@23 303 }
matthiasm@17 304
Chris@23 305 if (identifier == "rollon") {
Chris@23 306 m_rollon = value;
Chris@23 307 }
matthiasm@0 308 }
matthiasm@0 309
Chris@35 310 NNLSBase::ProgramList
Chris@35 311 NNLSBase::getPrograms() const
matthiasm@0 312 {
Chris@23 313 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0 314 ProgramList list;
matthiasm@0 315
matthiasm@0 316 // If you have no programs, return an empty list (or simply don't
matthiasm@0 317 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0 318
matthiasm@0 319 return list;
matthiasm@0 320 }
matthiasm@0 321
matthiasm@0 322 string
Chris@35 323 NNLSBase::getCurrentProgram() const
matthiasm@0 324 {
Chris@23 325 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0 326 return ""; // no programs
matthiasm@0 327 }
matthiasm@0 328
matthiasm@0 329 void
Chris@35 330 NNLSBase::selectProgram(string name)
matthiasm@0 331 {
Chris@23 332 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0 333 }
matthiasm@0 334
matthiasm@0 335
matthiasm@0 336 bool
Chris@35 337 NNLSBase::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 338 {
Chris@23 339 if (debug_on) {
Chris@23 340 cerr << "--> initialise";
Chris@23 341 }
matthiasm@1 342
matthiasm@0 343 if (channels < getMinChannelCount() ||
matthiasm@0 344 channels > getMaxChannelCount()) return false;
matthiasm@0 345 m_blockSize = blockSize;
matthiasm@0 346 m_stepSize = stepSize;
Chris@35 347 m_frameCount = 0;
Chris@23 348 int tempn = 256 * m_blockSize/2;
Chris@23 349 // cerr << "length of tempkernel : " << tempn << endl;
Chris@23 350 float *tempkernel;
matthiasm@1 351
Chris@23 352 tempkernel = new float[tempn];
matthiasm@1 353
Chris@23 354 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
Chris@23 355 m_kernelValue.clear();
Chris@23 356 m_kernelFftIndex.clear();
Chris@23 357 m_kernelNoteIndex.clear();
Chris@23 358 int countNonzero = 0;
Chris@23 359 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
Chris@23 360 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
Chris@23 361 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 362 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
Chris@23 363 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 364 countNonzero++;
Chris@23 365 }
Chris@23 366 m_kernelFftIndex.push_back(iFFT);
Chris@23 367 m_kernelNoteIndex.push_back(iNote);
Chris@23 368 }
Chris@23 369 }
Chris@23 370 }
Chris@23 371 // cerr << "nonzero count : " << countNonzero << endl;
Chris@23 372 delete [] tempkernel;
Chris@35 373 /*
Chris@23 374 ofstream myfile;
Chris@23 375 myfile.open ("matrix.txt");
matthiasm@3 376 // myfile << "Writing this to a file.\n";
Chris@23 377 for (int i = 0; i < nNote * 84; ++i) {
Chris@23 378 myfile << m_dict[i] << endl;
Chris@23 379 }
matthiasm@3 380 myfile.close();
Chris@35 381 */
matthiasm@0 382 return true;
matthiasm@0 383 }
matthiasm@0 384
matthiasm@0 385 void
Chris@35 386 NNLSBase::reset()
matthiasm@0 387 {
Chris@23 388 if (debug_on) cerr << "--> reset";
matthiasm@4 389
matthiasm@0 390 // Clear buffers, reset stored values, etc
Chris@35 391 m_frameCount = 0;
Chris@23 392 m_dictID = 0;
Chris@35 393 m_logSpectrum.clear();
Chris@23 394 m_meanTuning0 = 0;
Chris@23 395 m_meanTuning1 = 0;
Chris@23 396 m_meanTuning2 = 0;
Chris@23 397 m_localTuning0 = 0;
Chris@23 398 m_localTuning1 = 0;
Chris@23 399 m_localTuning2 = 0;
Chris@23 400 m_localTuning.clear();
matthiasm@0 401 }
matthiasm@0 402
Chris@35 403 void
Chris@35 404 NNLSBase::baseProcess(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 405 {
Chris@35 406 m_frameCount++;
Chris@23 407 float *magnitude = new float[m_blockSize/2];
matthiasm@0 408
Chris@23 409 const float *fbuf = inputBuffers[0];
Chris@23 410 float energysum = 0;
Chris@23 411 // make magnitude
Chris@23 412 float maxmag = -10000;
Chris@23 413 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 414 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
Chris@23 415 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
Chris@23 416 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
Chris@23 417 if (m_rollon > 0) {
Chris@23 418 energysum += pow(magnitude[iBin],2);
Chris@23 419 }
Chris@23 420 }
matthiasm@14 421
Chris@23 422 float cumenergy = 0;
Chris@23 423 if (m_rollon > 0) {
Chris@23 424 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
Chris@23 425 cumenergy += pow(magnitude[iBin],2);
Chris@23 426 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
Chris@23 427 else break;
Chris@23 428 }
Chris@23 429 }
matthiasm@17 430
Chris@23 431 if (maxmag < 2) {
Chris@23 432 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
Chris@23 433 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 434 magnitude[iBin] = 0;
Chris@23 435 }
Chris@23 436 }
matthiasm@4 437
Chris@23 438 // note magnitude mapping using pre-calculated matrix
Chris@23 439 float *nm = new float[nNote]; // note magnitude
Chris@23 440 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 441 nm[iNote] = 0; // initialise as 0
Chris@23 442 }
Chris@23 443 int binCount = 0;
Chris@23 444 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
Chris@23 445 // cerr << ".";
Chris@23 446 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
Chris@23 447 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
Chris@23 448 binCount++;
Chris@23 449 }
Chris@23 450 // cerr << nm[20];
Chris@23 451 // cerr << endl;
matthiasm@0 452
matthiasm@0 453
Chris@35 454 float one_over_N = 1.0/m_frameCount;
matthiasm@0 455 // update means of complex tuning variables
Chris@35 456 m_meanTuning0 *= float(m_frameCount-1)*one_over_N;
Chris@35 457 m_meanTuning1 *= float(m_frameCount-1)*one_over_N;
Chris@35 458 m_meanTuning2 *= float(m_frameCount-1)*one_over_N;
matthiasm@0 459
matthiasm@0 460 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
matthiasm@0 461 m_meanTuning0 += nm[iTone + 0]*one_over_N;
matthiasm@0 462 m_meanTuning1 += nm[iTone + 1]*one_over_N;
matthiasm@0 463 m_meanTuning2 += nm[iTone + 2]*one_over_N;
Chris@23 464 float ratioOld = 0.997;
matthiasm@3 465 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
matthiasm@3 466 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
matthiasm@3 467 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
matthiasm@0 468 }
matthiasm@0 469
matthiasm@0 470 // if (m_tuneLocal) {
Chris@23 471 // local tuning
Chris@23 472 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
Chris@23 473 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
Chris@23 474 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
Chris@23 475 m_localTuning.push_back(normalisedtuning);
matthiasm@0 476
Chris@23 477 Feature f1; // logfreqspec
Chris@23 478 f1.hasTimestamp = true;
matthiasm@0 479 f1.timestamp = timestamp;
Chris@23 480 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 481 f1.values.push_back(nm[iNote]);
Chris@23 482 }
matthiasm@0 483
matthiasm@0 484 // deletes
matthiasm@0 485 delete[] magnitude;
matthiasm@0 486 delete[] nm;
matthiasm@0 487
Chris@35 488 m_logSpectrum.push_back(f1); // remember note magnitude
matthiasm@0 489 }
matthiasm@0 490
Chris@35 491
Chris@35 492 #ifdef NOT_DEFINED
Chris@35 493
Chris@35 494 NNLSBase::FeatureSet
Chris@35 495 NNLSBase::getRemainingFeatures()
matthiasm@0 496 {
Chris@23 497 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 498 FeatureSet fsOut;
Chris@35 499 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23 500 int nChord = m_chordnames.size();
Chris@23 501 //
Chris@23 502 /** Calculate Tuning
Chris@23 503 calculate tuning from (using the angle of the complex number defined by the
Chris@23 504 cumulative mean real and imag values)
Chris@23 505 **/
Chris@23 506 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
Chris@23 507 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
Chris@23 508 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 509 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 510 int intShift = floor(normalisedtuning * 3);
Chris@23 511 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 512
Chris@23 513 char buffer0 [50];
matthiasm@1 514
Chris@23 515 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 516
Chris@23 517 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1 518
Chris@23 519 // push tuning to FeatureSet fsOut
Chris@23 520 Feature f0; // tuning
Chris@23 521 f0.hasTimestamp = true;
Chris@23 522 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
Chris@23 523 f0.label = buffer0;
Chris@23 524 fsOut[0].push_back(f0);
matthiasm@1 525
Chris@23 526 /** Tune Log-Frequency Spectrogram
Chris@23 527 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
Chris@23 528 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
Chris@23 529 **/
Chris@23 530 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 531
Chris@23 532 float tempValue = 0;
Chris@23 533 float dbThreshold = 0; // relative to the background spectrum
Chris@23 534 float thresh = pow(10,dbThreshold/20);
Chris@23 535 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23 536 int count = 0;
matthiasm@1 537
Chris@35 538 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@23 539 Feature f1 = *i;
Chris@23 540 Feature f2; // tuned log-frequency spectrum
Chris@23 541 f2.hasTimestamp = true;
Chris@23 542 f2.timestamp = f1.timestamp;
Chris@23 543 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1 544
Chris@23 545 if (m_tuneLocal) {
Chris@23 546 intShift = floor(m_localTuning[count] * 3);
Chris@23 547 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
Chris@23 548 }
matthiasm@1 549
Chris@23 550 // cerr << intShift << " " << intFactor << endl;
matthiasm@1 551
Chris@23 552 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
Chris@23 553 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
Chris@23 554 f2.values.push_back(tempValue);
Chris@23 555 }
matthiasm@1 556
Chris@23 557 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
Chris@23 558 vector<float> runningmean = SpecialConvolution(f2.values,hw);
Chris@23 559 vector<float> runningstd;
Chris@23 560 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
Chris@23 561 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
Chris@23 562 }
Chris@23 563 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
Chris@23 564 for (int i = 0; i < 256; i++) {
Chris@23 565 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 566 if (runningstd[i] > 0) {
Chris@23 567 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
Chris@23 568 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
Chris@23 569 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
Chris@23 570 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
Chris@23 571 }
Chris@23 572 if (f2.values[i] < 0) {
Chris@23 573 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 574 }
Chris@23 575 }
Chris@23 576 fsOut[2].push_back(f2);
Chris@23 577 count++;
Chris@23 578 }
Chris@23 579 cerr << "done." << endl;
matthiasm@1 580
Chris@23 581 /** Semitone spectrum and chromagrams
Chris@23 582 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 583 is inferred using a non-negative least squares algorithm.
Chris@23 584 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 585 bass and treble stacked onto each other).
Chris@23 586 **/
Chris@23 587 if (m_dictID == 1) {
Chris@23 588 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 589 } else {
Chris@23 590 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 591 }
matthiasm@13 592
matthiasm@1 593
Chris@23 594 vector<vector<float> > chordogram;
Chris@23 595 vector<vector<int> > scoreChordogram;
Chris@23 596 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
Chris@23 597 vector<float> oldchroma = vector<float>(12,0);
Chris@23 598 vector<float> oldbasschroma = vector<float>(12,0);
Chris@23 599 count = 0;
matthiasm@9 600
Chris@23 601 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
Chris@23 602 Feature f2 = *it; // logfreq spectrum
Chris@23 603 Feature f3; // semitone spectrum
Chris@23 604 Feature f4; // treble chromagram
Chris@23 605 Feature f5; // bass chromagram
Chris@23 606 Feature f6; // treble and bass chromagram
matthiasm@1 607
Chris@23 608 f3.hasTimestamp = true;
Chris@23 609 f3.timestamp = f2.timestamp;
matthiasm@1 610
Chris@23 611 f4.hasTimestamp = true;
Chris@23 612 f4.timestamp = f2.timestamp;
matthiasm@1 613
Chris@23 614 f5.hasTimestamp = true;
Chris@23 615 f5.timestamp = f2.timestamp;
matthiasm@1 616
Chris@23 617 f6.hasTimestamp = true;
Chris@23 618 f6.timestamp = f2.timestamp;
matthiasm@1 619
Chris@29 620 float b[256];
matthiasm@1 621
Chris@23 622 bool some_b_greater_zero = false;
Chris@23 623 float sumb = 0;
Chris@23 624 for (int i = 0; i < 256; i++) {
Chris@23 625 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
Chris@23 626 b[i] = f2.values[i];
Chris@23 627 sumb += b[i];
Chris@23 628 if (b[i] > 0) {
Chris@23 629 some_b_greater_zero = true;
Chris@23 630 }
Chris@23 631 }
matthiasm@1 632
Chris@23 633 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 634
Chris@23 635 vector<float> chroma = vector<float>(12, 0);
Chris@23 636 vector<float> basschroma = vector<float>(12, 0);
Chris@23 637 float currval;
Chris@23 638 unsigned iSemitone = 0;
matthiasm@1 639
Chris@23 640 if (some_b_greater_zero) {
Chris@23 641 if (m_dictID == 1) {
Chris@23 642 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 643 currval = 0;
Chris@23 644 currval += b[iNote + 1 + -1] * 0.5;
Chris@23 645 currval += b[iNote + 1 + 0] * 1.0;
Chris@23 646 currval += b[iNote + 1 + 1] * 0.5;
Chris@23 647 f3.values.push_back(currval);
Chris@23 648 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 649 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 650 iSemitone++;
Chris@23 651 }
matthiasm@1 652
Chris@23 653 } else {
Chris@29 654 float x[84+1000];
Chris@23 655 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 656 vector<int> signifIndex;
Chris@23 657 int index=0;
Chris@23 658 sumb /= 84.0;
Chris@23 659 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 660 float currval = 0;
Chris@23 661 currval += b[iNote + 1 + -1];
Chris@23 662 currval += b[iNote + 1 + 0];
Chris@23 663 currval += b[iNote + 1 + 1];
Chris@23 664 if (currval > 0) signifIndex.push_back(index);
Chris@23 665 f3.values.push_back(0); // fill the values, change later
Chris@23 666 index++;
Chris@23 667 }
Chris@29 668 float rnorm;
Chris@29 669 float w[84+1000];
Chris@29 670 float zz[84+1000];
Chris@23 671 int indx[84+1000];
Chris@23 672 int mode;
Chris@23 673 int dictsize = 256*signifIndex.size();
Chris@23 674 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@29 675 float *curr_dict = new float[dictsize];
Chris@23 676 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 677 for (unsigned iBin = 0; iBin < 256; iBin++) {
Chris@23 678 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
Chris@23 679 }
Chris@23 680 }
Chris@29 681 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 682 delete [] curr_dict;
Chris@23 683 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 684 f3.values[signifIndex[iNote]] = x[iNote];
Chris@23 685 // cerr << mode << endl;
Chris@23 686 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 687 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 688 }
Chris@23 689 }
Chris@23 690 }
matthiasm@13 691
matthiasm@10 692
matthiasm@12 693
matthiasm@13 694
Chris@23 695 f4.values = chroma;
Chris@23 696 f5.values = basschroma;
Chris@23 697 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
Chris@23 698 f6.values = chroma;
matthiasm@1 699
Chris@23 700 if (m_doNormalizeChroma > 0) {
Chris@23 701 vector<float> chromanorm = vector<float>(3,0);
Chris@23 702 switch (int(m_doNormalizeChroma)) {
Chris@23 703 case 0: // should never end up here
Chris@23 704 break;
Chris@23 705 case 1:
Chris@23 706 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
Chris@23 707 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
Chris@23 708 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 709 break;
Chris@23 710 case 2:
Chris@23 711 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 712 chromanorm[0] += *it;
Chris@23 713 }
Chris@23 714 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 715 chromanorm[1] += *it;
Chris@23 716 }
Chris@23 717 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 718 chromanorm[2] += *it;
Chris@23 719 }
Chris@23 720 break;
Chris@23 721 case 3:
Chris@23 722 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 723 chromanorm[0] += pow(*it,2);
Chris@23 724 }
Chris@23 725 chromanorm[0] = sqrt(chromanorm[0]);
Chris@23 726 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 727 chromanorm[1] += pow(*it,2);
Chris@23 728 }
Chris@23 729 chromanorm[1] = sqrt(chromanorm[1]);
Chris@23 730 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 731 chromanorm[2] += pow(*it,2);
Chris@23 732 }
Chris@23 733 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 734 break;
Chris@23 735 }
Chris@23 736 if (chromanorm[0] > 0) {
Chris@23 737 for (int i = 0; i < f4.values.size(); i++) {
Chris@23 738 f4.values[i] /= chromanorm[0];
Chris@23 739 }
Chris@23 740 }
Chris@23 741 if (chromanorm[1] > 0) {
Chris@23 742 for (int i = 0; i < f5.values.size(); i++) {
Chris@23 743 f5.values[i] /= chromanorm[1];
Chris@23 744 }
Chris@23 745 }
Chris@23 746 if (chromanorm[2] > 0) {
Chris@23 747 for (int i = 0; i < f6.values.size(); i++) {
Chris@23 748 f6.values[i] /= chromanorm[2];
Chris@23 749 }
Chris@23 750 }
matthiasm@13 751
Chris@23 752 }
matthiasm@13 753
Chris@23 754 // local chord estimation
Chris@23 755 vector<float> currentChordSalience;
Chris@23 756 float tempchordvalue = 0;
Chris@23 757 float sumchordvalue = 0;
matthiasm@9 758
Chris@23 759 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 760 tempchordvalue = 0;
Chris@23 761 for (int iBin = 0; iBin < 12; iBin++) {
Chris@23 762 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 763 }
Chris@23 764 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 765 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 766 }
Chris@23 767 sumchordvalue+=tempchordvalue;
Chris@23 768 currentChordSalience.push_back(tempchordvalue);
Chris@23 769 }
Chris@23 770 if (sumchordvalue > 0) {
Chris@23 771 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 772 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 773 }
Chris@23 774 } else {
Chris@23 775 currentChordSalience[nChord-1] = 1.0;
Chris@23 776 }
Chris@23 777 chordogram.push_back(currentChordSalience);
matthiasm@1 778
Chris@23 779 fsOut[3].push_back(f3);
Chris@23 780 fsOut[4].push_back(f4);
Chris@23 781 fsOut[5].push_back(f5);
Chris@23 782 fsOut[6].push_back(f6);
Chris@23 783 count++;
Chris@23 784 }
Chris@23 785 cerr << "done." << endl;
matthiasm@13 786
matthiasm@10 787
Chris@23 788 /* Simple chord estimation
Chris@23 789 I just take the local chord estimates ("currentChordSalience") and average them over time, then
Chris@23 790 take the maximum. Very simple, don't do this at home...
Chris@23 791 */
Chris@23 792 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
Chris@23 793 count = 0;
Chris@23 794 int halfwindowlength = m_inputSampleRate / m_stepSize;
Chris@23 795 vector<int> chordSequence;
Chris@23 796 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
Chris@23 797 vector<int> temp = vector<int>(nChord,0);
Chris@23 798 scoreChordogram.push_back(temp);
Chris@23 799 }
Chris@23 800 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
Chris@23 801 int startIndex = count + 1;
Chris@23 802 int endIndex = count + 2 * halfwindowlength;
matthiasm@10 803
Chris@23 804 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@10 805
Chris@23 806 vector<int> chordCandidates;
Chris@23 807 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
Chris@23 808 // float currsum = 0;
Chris@23 809 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 810 // currsum += chordogram[iFrame][iChord];
Chris@23 811 // }
Chris@23 812 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
Chris@23 813 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 814 if (chordogram[iFrame][iChord] > chordThreshold) {
Chris@23 815 chordCandidates.push_back(iChord);
Chris@23 816 break;
Chris@23 817 }
Chris@23 818 }
Chris@23 819 }
Chris@23 820 chordCandidates.push_back(nChord-1);
Chris@23 821 // cerr << chordCandidates.size() << endl;
Chris@23 822
Chris@23 823 float maxval = 0; // will be the value of the most salient *chord change* in this frame
Chris@23 824 float maxindex = 0; //... and the index thereof
Chris@23 825 unsigned bestchordL = nChord-1; // index of the best "left" chord
Chris@23 826 unsigned bestchordR = nChord-1; // index of the best "right" chord
Chris@23 827
Chris@23 828 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
Chris@23 829 // now find the max values on both sides of iWF
Chris@23 830 // left side:
Chris@23 831 float maxL = 0;
Chris@23 832 unsigned maxindL = nChord-1;
Chris@23 833 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 834 unsigned iChord = chordCandidates[kChord];
Chris@23 835 float currsum = 0;
Chris@23 836 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
Chris@23 837 currsum += chordogram[count+iFrame][iChord];
matthiasm@10 838 }
Chris@23 839 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 840 if (currsum > maxL) {
Chris@23 841 maxL = currsum;
Chris@23 842 maxindL = iChord;
Chris@23 843 }
Chris@23 844 }
Chris@23 845 // right side:
Chris@23 846 float maxR = 0;
Chris@23 847 unsigned maxindR = nChord-1;
Chris@23 848 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 849 unsigned iChord = chordCandidates[kChord];
Chris@23 850 float currsum = 0;
Chris@23 851 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 852 currsum += chordogram[count+iFrame][iChord];
Chris@23 853 }
Chris@23 854 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 855 if (currsum > maxR) {
Chris@23 856 maxR = currsum;
Chris@23 857 maxindR = iChord;
Chris@23 858 }
Chris@23 859 }
Chris@23 860 if (maxL+maxR > maxval) {
Chris@23 861 maxval = maxL+maxR;
Chris@23 862 maxindex = iWF;
Chris@23 863 bestchordL = maxindL;
Chris@23 864 bestchordR = maxindR;
Chris@23 865 }
matthiasm@3 866
Chris@23 867 }
Chris@23 868 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
Chris@23 869 // add a score to every chord-frame-point that was part of a maximum
Chris@23 870 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
Chris@23 871 scoreChordogram[iFrame+count][bestchordL]++;
Chris@23 872 }
Chris@23 873 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 874 scoreChordogram[iFrame+count][bestchordR]++;
Chris@23 875 }
Chris@23 876 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
Chris@23 877 count++;
Chris@23 878 }
Chris@23 879 // cerr << "******* agent finished *******" << endl;
Chris@23 880 count = 0;
Chris@23 881 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 882 float maxval = 0; // will be the value of the most salient chord in this frame
Chris@23 883 float maxindex = 0; //... and the index thereof
Chris@23 884 for (unsigned iChord = 0; iChord < nChord; iChord++) {
Chris@23 885 if (scoreChordogram[count][iChord] > maxval) {
Chris@23 886 maxval = scoreChordogram[count][iChord];
Chris@23 887 maxindex = iChord;
Chris@23 888 // cerr << iChord << endl;
Chris@23 889 }
Chris@23 890 }
Chris@23 891 chordSequence.push_back(maxindex);
Chris@23 892 // cerr << "before modefilter, maxindex: " << maxindex << endl;
Chris@23 893 count++;
Chris@23 894 }
Chris@23 895 // cerr << "******* mode filter done *******" << endl;
matthiasm@10 896
matthiasm@3 897
Chris@23 898 // mode filter on chordSequence
Chris@23 899 count = 0;
Chris@23 900 string oldChord = "";
Chris@23 901 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 902 Feature f6 = *it;
Chris@23 903 Feature f7; // chord estimate
Chris@23 904 f7.hasTimestamp = true;
Chris@23 905 f7.timestamp = f6.timestamp;
Chris@23 906 Feature f8; // chord estimate
Chris@23 907 f8.hasTimestamp = true;
Chris@23 908 f8.timestamp = f6.timestamp;
matthiasm@17 909
Chris@23 910 vector<int> chordCount = vector<int>(nChord,0);
Chris@23 911 int maxChordCount = 0;
Chris@23 912 int maxChordIndex = nChord-1;
Chris@23 913 string maxChord;
Chris@23 914 int startIndex = max(count - halfwindowlength/2,0);
Chris@23 915 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
Chris@23 916 for (int i = startIndex; i < endIndex; i++) {
Chris@23 917 chordCount[chordSequence[i]]++;
Chris@23 918 if (chordCount[chordSequence[i]] > maxChordCount) {
Chris@23 919 // cerr << "start index " << startIndex << endl;
Chris@23 920 maxChordCount++;
Chris@23 921 maxChordIndex = chordSequence[i];
Chris@23 922 maxChord = m_chordnames[maxChordIndex];
Chris@23 923 }
Chris@23 924 }
Chris@23 925 // chordSequence[count] = maxChordIndex;
Chris@23 926 // cerr << maxChordIndex << endl;
Chris@23 927 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
Chris@23 928 // cerr << chordchange[count] << endl;
Chris@23 929 fsOut[9].push_back(f8);
Chris@23 930 if (oldChord != maxChord) {
Chris@23 931 oldChord = maxChord;
matthiasm@3 932
Chris@23 933 // char buffer1 [50];
Chris@23 934 // if (maxChordIndex < nChord - 1) {
Chris@23 935 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
Chris@23 936 // } else {
Chris@23 937 // sprintf(buffer1, "N");
Chris@23 938 // }
Chris@23 939 // f7.label = buffer1;
Chris@23 940 f7.label = m_chordnames[maxChordIndex];
Chris@23 941 fsOut[7].push_back(f7);
Chris@23 942 }
Chris@23 943 count++;
Chris@23 944 }
Chris@23 945 Feature f7; // last chord estimate
Chris@23 946 f7.hasTimestamp = true;
Chris@23 947 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
Chris@23 948 f7.label = "N";
Chris@23 949 fsOut[7].push_back(f7);
Chris@23 950 cerr << "done." << endl;
Chris@23 951 // // musicity
Chris@23 952 // count = 0;
Chris@23 953 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
Chris@23 954 // vector<float> musicityValue;
Chris@23 955 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 956 // Feature f4 = *it;
Chris@23 957 //
Chris@23 958 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 959 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 960 // float chromasum = 0;
Chris@23 961 // float diffsum = 0;
Chris@23 962 // for (int k = 0; k < 12; k++) {
Chris@23 963 // for (int i = startIndex + 1; i < endIndex; i++) {
Chris@23 964 // chromasum += pow(fsOut[4][i].values[k],2);
Chris@23 965 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
Chris@23 966 // }
Chris@23 967 // }
Chris@23 968 // diffsum /= chromasum;
Chris@23 969 // musicityValue.push_back(diffsum);
Chris@23 970 // count++;
Chris@23 971 // }
Chris@23 972 //
Chris@23 973 // float musicityThreshold = 0.44;
Chris@23 974 // if (m_stepSize == 4096) {
Chris@23 975 // musicityThreshold = 0.74;
Chris@23 976 // }
Chris@23 977 // if (m_stepSize == 4410) {
Chris@23 978 // musicityThreshold = 0.77;
Chris@23 979 // }
Chris@23 980 //
Chris@23 981 // count = 0;
Chris@23 982 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 983 // Feature f4 = *it;
Chris@23 984 // Feature f8; // musicity
Chris@23 985 // Feature f9; // musicity segmenter
Chris@23 986 //
Chris@23 987 // f8.hasTimestamp = true;
Chris@23 988 // f8.timestamp = f4.timestamp;
Chris@23 989 // f9.hasTimestamp = true;
Chris@23 990 // f9.timestamp = f4.timestamp;
Chris@23 991 //
Chris@23 992 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 993 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 994 // int musicityCount = 0;
Chris@23 995 // for (int i = startIndex; i <= endIndex; i++) {
Chris@23 996 // if (musicityValue[i] > musicityThreshold) musicityCount++;
Chris@23 997 // }
Chris@23 998 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
Chris@23 999 //
Chris@23 1000 // if (isSpeech) {
Chris@23 1001 // if (oldlabeltype != 2) {
Chris@23 1002 // f9.label = "Speech";
Chris@23 1003 // fsOut[9].push_back(f9);
Chris@23 1004 // oldlabeltype = 2;
Chris@23 1005 // }
Chris@23 1006 // } else {
Chris@23 1007 // if (oldlabeltype != 1) {
Chris@23 1008 // f9.label = "Music";
Chris@23 1009 // fsOut[9].push_back(f9);
Chris@23 1010 // oldlabeltype = 1;
Chris@23 1011 // }
Chris@23 1012 // }
Chris@23 1013 // f8.values.push_back(musicityValue[count]);
Chris@23 1014 // fsOut[8].push_back(f8);
Chris@23 1015 // count++;
Chris@23 1016 // }
Chris@23 1017 return fsOut;
matthiasm@0 1018
matthiasm@0 1019 }
matthiasm@0 1020
Chris@35 1021 #endif