annotate NNLSChroma.cpp @ 27:690bd9148467 matthiasm-plugin

* Split out some common code into chromamethods.cpp from NNLSChroma.cpp (the latter is destined to become the chroma plugin only, eventually)
author Chris Cannam
date Thu, 21 Oct 2010 16:34:58 +0100
parents 93c836cfb8c5
children da3195577172
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
matthiasm@0 3 #include "NNLSChroma.h"
Chris@27 4
Chris@27 5 #include "chromamethods.h"
Chris@27 6
Chris@27 7 #include <cstdlib>
Chris@27 8 #include <fstream>
matthiasm@0 9 #include <cmath>
matthiasm@9 10
Chris@27 11 #include <algorithm>
matthiasm@0 12
matthiasm@0 13 const bool debug_on = false;
matthiasm@0 14
Chris@27 15 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0 16
matthiasm@0 17 NNLSChroma::NNLSChroma(float inputSampleRate) :
Chris@23 18 Plugin(inputSampleRate),
Chris@23 19 m_fl(0),
Chris@23 20 m_blockSize(0),
Chris@23 21 m_stepSize(0),
Chris@23 22 m_lengthOfNoteIndex(0),
Chris@23 23 m_meanTuning0(0),
Chris@23 24 m_meanTuning1(0),
Chris@23 25 m_meanTuning2(0),
Chris@23 26 m_localTuning0(0),
Chris@23 27 m_localTuning1(0),
Chris@23 28 m_localTuning2(0),
Chris@23 29 m_paling(1.0),
Chris@23 30 m_preset(0.0),
Chris@23 31 m_localTuning(0),
Chris@23 32 m_kernelValue(0),
Chris@23 33 m_kernelFftIndex(0),
Chris@23 34 m_kernelNoteIndex(0),
Chris@23 35 m_dict(0),
Chris@23 36 m_tuneLocal(false),
Chris@23 37 m_dictID(0),
Chris@23 38 m_chorddict(0),
Chris@23 39 m_chordnames(0),
Chris@23 40 m_doNormalizeChroma(0),
Chris@23 41 m_rollon(0.01)
matthiasm@0 42 {
Chris@23 43 if (debug_on) cerr << "--> NNLSChroma" << endl;
matthiasm@7 44
Chris@23 45 // make the *note* dictionary matrix
Chris@23 46 m_dict = new float[nNote * 84];
Chris@23 47 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
Chris@23 48 dictionaryMatrix(m_dict);
matthiasm@7 49
Chris@23 50 // get the *chord* dictionary from file (if the file exists)
Chris@23 51 m_chordnames = chordDictionary(&m_chorddict);
matthiasm@0 52 }
matthiasm@0 53
matthiasm@0 54
matthiasm@0 55 NNLSChroma::~NNLSChroma()
matthiasm@0 56 {
Chris@23 57 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
Chris@23 58 delete [] m_dict;
Chris@23 59 // delete [] m_chorddict;
Chris@23 60 // delete m_chordnames;
matthiasm@0 61 }
matthiasm@0 62
matthiasm@0 63 string
matthiasm@0 64 NNLSChroma::getIdentifier() const
matthiasm@0 65 {
Chris@23 66 if (debug_on) cerr << "--> getIdentifier" << endl;
matthiasm@0 67 return "nnls_chroma";
matthiasm@0 68 }
matthiasm@0 69
matthiasm@0 70 string
matthiasm@0 71 NNLSChroma::getName() const
matthiasm@0 72 {
Chris@23 73 if (debug_on) cerr << "--> getName" << endl;
matthiasm@0 74 return "NNLS Chroma";
matthiasm@0 75 }
matthiasm@0 76
matthiasm@0 77 string
matthiasm@0 78 NNLSChroma::getDescription() const
matthiasm@0 79 {
matthiasm@0 80 // Return something helpful here!
Chris@23 81 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@13 82 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate.";
matthiasm@0 83 }
matthiasm@0 84
matthiasm@0 85 string
matthiasm@0 86 NNLSChroma::getMaker() const
matthiasm@0 87 {
Chris@23 88 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0 89 // Your name here
matthiasm@0 90 return "Matthias Mauch";
matthiasm@0 91 }
matthiasm@0 92
matthiasm@0 93 int
matthiasm@0 94 NNLSChroma::getPluginVersion() const
matthiasm@0 95 {
Chris@23 96 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0 97 // Increment this each time you release a version that behaves
matthiasm@0 98 // differently from the previous one
matthiasm@0 99 return 1;
matthiasm@0 100 }
matthiasm@0 101
matthiasm@0 102 string
matthiasm@0 103 NNLSChroma::getCopyright() const
matthiasm@0 104 {
Chris@23 105 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0 106 // This function is not ideally named. It does not necessarily
matthiasm@0 107 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0 108 // should indicate the terms under which it is distributed. For
matthiasm@0 109 // example, "Copyright (year). All Rights Reserved", or "GPL"
matthiasm@0 110 return "Copyright (2010). All rights reserved.";
matthiasm@0 111 }
matthiasm@0 112
matthiasm@0 113 NNLSChroma::InputDomain
matthiasm@0 114 NNLSChroma::getInputDomain() const
matthiasm@0 115 {
Chris@23 116 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0 117 return FrequencyDomain;
matthiasm@0 118 }
matthiasm@0 119
matthiasm@0 120 size_t
matthiasm@0 121 NNLSChroma::getPreferredBlockSize() const
matthiasm@0 122 {
Chris@23 123 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0 124 return 16384; // 0 means "I can handle any block size"
matthiasm@0 125 }
matthiasm@0 126
matthiasm@0 127 size_t
matthiasm@0 128 NNLSChroma::getPreferredStepSize() const
matthiasm@0 129 {
Chris@23 130 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0 131 return 2048; // 0 means "anything sensible"; in practice this
Chris@23 132 // means the same as the block size for TimeDomain
Chris@23 133 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0 134 }
matthiasm@0 135
matthiasm@0 136 size_t
matthiasm@0 137 NNLSChroma::getMinChannelCount() const
matthiasm@0 138 {
Chris@23 139 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0 140 return 1;
matthiasm@0 141 }
matthiasm@0 142
matthiasm@0 143 size_t
matthiasm@0 144 NNLSChroma::getMaxChannelCount() const
matthiasm@0 145 {
Chris@23 146 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0 147 return 1;
matthiasm@0 148 }
matthiasm@0 149
matthiasm@0 150 NNLSChroma::ParameterList
matthiasm@0 151 NNLSChroma::getParameterDescriptors() const
matthiasm@0 152 {
Chris@23 153 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0 154 ParameterList list;
matthiasm@0 155
matthiasm@3 156 ParameterDescriptor d3;
matthiasm@3 157 d3.identifier = "preset";
matthiasm@3 158 d3.name = "preset";
matthiasm@3 159 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
matthiasm@3 160 d3.unit = "";
Chris@23 161 d3.isQuantized = true;
Chris@23 162 d3.quantizeStep = 1;
matthiasm@3 163 d3.minValue = 0.0;
matthiasm@4 164 d3.maxValue = 3.0;
matthiasm@3 165 d3.defaultValue = 0.0;
matthiasm@3 166 d3.valueNames.push_back("polyphonic pop");
Chris@23 167 d3.valueNames.push_back("polyphonic pop (fast)");
matthiasm@3 168 d3.valueNames.push_back("solo keyboard");
Chris@23 169 d3.valueNames.push_back("manual");
matthiasm@3 170 list.push_back(d3);
matthiasm@4 171
matthiasm@17 172 ParameterDescriptor d5;
Chris@23 173 d5.identifier = "rollon";
Chris@23 174 d5.name = "spectral roll-on";
Chris@23 175 d5.description = "The bins below the spectral roll-on quantile will be set to 0.";
Chris@23 176 d5.unit = "";
Chris@23 177 d5.minValue = 0;
Chris@23 178 d5.maxValue = 1;
Chris@23 179 d5.defaultValue = 0;
Chris@23 180 d5.isQuantized = false;
Chris@23 181 list.push_back(d5);
matthiasm@17 182
matthiasm@4 183 // ParameterDescriptor d0;
matthiasm@4 184 // d0.identifier = "notedict";
matthiasm@4 185 // d0.name = "note dictionary";
matthiasm@4 186 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
matthiasm@4 187 // d0.unit = "";
matthiasm@4 188 // d0.minValue = 0;
matthiasm@4 189 // d0.maxValue = 1;
matthiasm@4 190 // d0.defaultValue = 0;
matthiasm@4 191 // d0.isQuantized = true;
matthiasm@4 192 // d0.valueNames.push_back("s = 0.6");
matthiasm@4 193 // d0.valueNames.push_back("no NNLS");
matthiasm@4 194 // d0.quantizeStep = 1.0;
matthiasm@4 195 // list.push_back(d0);
matthiasm@4 196
matthiasm@4 197 ParameterDescriptor d1;
matthiasm@4 198 d1.identifier = "tuningmode";
matthiasm@4 199 d1.name = "tuning mode";
matthiasm@4 200 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@4 201 d1.unit = "";
matthiasm@4 202 d1.minValue = 0;
matthiasm@4 203 d1.maxValue = 1;
matthiasm@4 204 d1.defaultValue = 0;
matthiasm@4 205 d1.isQuantized = true;
matthiasm@4 206 d1.valueNames.push_back("global tuning");
matthiasm@4 207 d1.valueNames.push_back("local tuning");
matthiasm@4 208 d1.quantizeStep = 1.0;
matthiasm@4 209 list.push_back(d1);
matthiasm@4 210
Chris@23 211 // ParameterDescriptor d2;
Chris@23 212 // d2.identifier = "paling";
Chris@23 213 // d2.name = "spectral paling";
Chris@23 214 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
Chris@23 215 // d2.unit = "";
Chris@23 216 // d2.isQuantized = true;
Chris@23 217 // // d2.quantizeStep = 0.1;
Chris@23 218 // d2.minValue = 0.0;
Chris@23 219 // d2.maxValue = 1.0;
Chris@23 220 // d2.defaultValue = 1.0;
Chris@23 221 // d2.isQuantized = false;
Chris@23 222 // list.push_back(d2);
Chris@23 223 ParameterDescriptor d4;
matthiasm@12 224 d4.identifier = "chromanormalize";
matthiasm@12 225 d4.name = "chroma normalization";
matthiasm@12 226 d4.description = "How shall the chroma vector be normalized?";
matthiasm@12 227 d4.unit = "";
matthiasm@12 228 d4.minValue = 0;
matthiasm@13 229 d4.maxValue = 3;
matthiasm@12 230 d4.defaultValue = 0;
matthiasm@12 231 d4.isQuantized = true;
matthiasm@13 232 d4.valueNames.push_back("none");
matthiasm@13 233 d4.valueNames.push_back("maximum norm");
Chris@23 234 d4.valueNames.push_back("L1 norm");
Chris@23 235 d4.valueNames.push_back("L2 norm");
matthiasm@12 236 d4.quantizeStep = 1.0;
matthiasm@12 237 list.push_back(d4);
matthiasm@4 238
matthiasm@0 239 return list;
matthiasm@0 240 }
matthiasm@0 241
matthiasm@0 242 float
matthiasm@0 243 NNLSChroma::getParameter(string identifier) const
matthiasm@0 244 {
Chris@23 245 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@0 246 if (identifier == "notedict") {
matthiasm@0 247 return m_dictID;
matthiasm@0 248 }
matthiasm@0 249
matthiasm@0 250 if (identifier == "paling") {
matthiasm@0 251 return m_paling;
matthiasm@0 252 }
matthiasm@17 253
Chris@23 254 if (identifier == "rollon") {
matthiasm@17 255 return m_rollon;
matthiasm@17 256 }
matthiasm@0 257
matthiasm@0 258 if (identifier == "tuningmode") {
matthiasm@0 259 if (m_tuneLocal) {
matthiasm@0 260 return 1.0;
matthiasm@0 261 } else {
matthiasm@0 262 return 0.0;
matthiasm@0 263 }
matthiasm@0 264 }
Chris@23 265 if (identifier == "preset") {
Chris@23 266 return m_preset;
matthiasm@3 267 }
Chris@23 268 if (identifier == "chromanormalize") {
Chris@23 269 return m_doNormalizeChroma;
matthiasm@12 270 }
matthiasm@0 271 return 0;
matthiasm@0 272
matthiasm@0 273 }
matthiasm@0 274
matthiasm@0 275 void
matthiasm@0 276 NNLSChroma::setParameter(string identifier, float value)
matthiasm@0 277 {
Chris@23 278 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@0 279 if (identifier == "notedict") {
matthiasm@0 280 m_dictID = (int) value;
matthiasm@0 281 }
matthiasm@0 282
matthiasm@0 283 if (identifier == "paling") {
matthiasm@0 284 m_paling = value;
matthiasm@0 285 }
matthiasm@0 286
matthiasm@0 287 if (identifier == "tuningmode") {
matthiasm@0 288 m_tuneLocal = (value > 0) ? true : false;
matthiasm@0 289 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0 290 }
matthiasm@3 291 if (identifier == "preset") {
matthiasm@3 292 m_preset = value;
Chris@23 293 if (m_preset == 0.0) {
Chris@23 294 m_tuneLocal = false;
Chris@23 295 m_paling = 1.0;
Chris@23 296 m_dictID = 0.0;
Chris@23 297 }
Chris@23 298 if (m_preset == 1.0) {
Chris@23 299 m_tuneLocal = false;
Chris@23 300 m_paling = 1.0;
Chris@23 301 m_dictID = 1.0;
Chris@23 302 }
Chris@23 303 if (m_preset == 2.0) {
Chris@23 304 m_tuneLocal = false;
Chris@23 305 m_paling = 0.7;
Chris@23 306 m_dictID = 0.0;
Chris@23 307 }
matthiasm@3 308 }
Chris@23 309 if (identifier == "chromanormalize") {
Chris@23 310 m_doNormalizeChroma = value;
Chris@23 311 }
matthiasm@17 312
Chris@23 313 if (identifier == "rollon") {
Chris@23 314 m_rollon = value;
Chris@23 315 }
matthiasm@0 316 }
matthiasm@0 317
matthiasm@0 318 NNLSChroma::ProgramList
matthiasm@0 319 NNLSChroma::getPrograms() const
matthiasm@0 320 {
Chris@23 321 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0 322 ProgramList list;
matthiasm@0 323
matthiasm@0 324 // If you have no programs, return an empty list (or simply don't
matthiasm@0 325 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0 326
matthiasm@0 327 return list;
matthiasm@0 328 }
matthiasm@0 329
matthiasm@0 330 string
matthiasm@0 331 NNLSChroma::getCurrentProgram() const
matthiasm@0 332 {
Chris@23 333 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0 334 return ""; // no programs
matthiasm@0 335 }
matthiasm@0 336
matthiasm@0 337 void
matthiasm@0 338 NNLSChroma::selectProgram(string name)
matthiasm@0 339 {
Chris@23 340 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0 341 }
matthiasm@0 342
matthiasm@0 343
matthiasm@0 344 NNLSChroma::OutputList
matthiasm@0 345 NNLSChroma::getOutputDescriptors() const
matthiasm@0 346 {
Chris@23 347 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0 348 OutputList list;
matthiasm@0 349
matthiasm@0 350 // Make chroma names for the binNames property
matthiasm@0 351 vector<string> chromanames;
matthiasm@0 352 vector<string> bothchromanames;
matthiasm@0 353 for (int iNote = 0; iNote < 24; iNote++) {
matthiasm@0 354 bothchromanames.push_back(notenames[iNote]);
matthiasm@0 355 if (iNote < 12) {
matthiasm@0 356 chromanames.push_back(notenames[iNote]);
matthiasm@0 357 }
matthiasm@0 358 }
matthiasm@0 359
Chris@23 360 // int nNote = 84;
matthiasm@0 361
matthiasm@0 362 // See OutputDescriptor documentation for the possibilities here.
matthiasm@0 363 // Every plugin must have at least one output.
matthiasm@0 364
matthiasm@0 365 OutputDescriptor d0;
matthiasm@0 366 d0.identifier = "tuning";
matthiasm@0 367 d0.name = "Tuning";
matthiasm@0 368 d0.description = "The concert pitch.";
matthiasm@0 369 d0.unit = "Hz";
matthiasm@0 370 d0.hasFixedBinCount = true;
matthiasm@0 371 d0.binCount = 0;
matthiasm@0 372 d0.hasKnownExtents = true;
Chris@23 373 d0.minValue = 427.47;
Chris@23 374 d0.maxValue = 452.89;
matthiasm@0 375 d0.isQuantized = false;
matthiasm@0 376 d0.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 377 d0.hasDuration = false;
matthiasm@0 378 list.push_back(d0);
matthiasm@0 379
Chris@23 380 OutputDescriptor d1;
matthiasm@0 381 d1.identifier = "logfreqspec";
matthiasm@0 382 d1.name = "Log-Frequency Spectrum";
matthiasm@0 383 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
matthiasm@0 384 d1.unit = "";
matthiasm@0 385 d1.hasFixedBinCount = true;
matthiasm@0 386 d1.binCount = nNote;
matthiasm@0 387 d1.hasKnownExtents = false;
matthiasm@0 388 d1.isQuantized = false;
matthiasm@0 389 d1.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 390 d1.hasDuration = false;
matthiasm@0 391 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 392 list.push_back(d1);
matthiasm@0 393
Chris@23 394 OutputDescriptor d2;
matthiasm@0 395 d2.identifier = "tunedlogfreqspec";
matthiasm@0 396 d2.name = "Tuned Log-Frequency Spectrum";
matthiasm@0 397 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
matthiasm@0 398 d2.unit = "";
matthiasm@0 399 d2.hasFixedBinCount = true;
matthiasm@0 400 d2.binCount = 256;
matthiasm@0 401 d2.hasKnownExtents = false;
matthiasm@0 402 d2.isQuantized = false;
matthiasm@0 403 d2.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 404 d2.hasDuration = false;
matthiasm@0 405 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 406 list.push_back(d2);
matthiasm@0 407
matthiasm@0 408 OutputDescriptor d3;
matthiasm@0 409 d3.identifier = "semitonespectrum";
matthiasm@0 410 d3.name = "Semitone Spectrum";
matthiasm@0 411 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
matthiasm@0 412 d3.unit = "";
matthiasm@0 413 d3.hasFixedBinCount = true;
matthiasm@0 414 d3.binCount = 84;
matthiasm@0 415 d3.hasKnownExtents = false;
matthiasm@0 416 d3.isQuantized = false;
matthiasm@0 417 d3.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 418 d3.hasDuration = false;
matthiasm@0 419 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 420 list.push_back(d3);
matthiasm@0 421
matthiasm@0 422 OutputDescriptor d4;
matthiasm@0 423 d4.identifier = "chroma";
matthiasm@0 424 d4.name = "Chromagram";
matthiasm@0 425 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
matthiasm@0 426 d4.unit = "";
matthiasm@0 427 d4.hasFixedBinCount = true;
matthiasm@0 428 d4.binCount = 12;
matthiasm@0 429 d4.binNames = chromanames;
matthiasm@0 430 d4.hasKnownExtents = false;
matthiasm@0 431 d4.isQuantized = false;
matthiasm@0 432 d4.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 433 d4.hasDuration = false;
matthiasm@0 434 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 435 list.push_back(d4);
matthiasm@0 436
matthiasm@0 437 OutputDescriptor d5;
matthiasm@0 438 d5.identifier = "basschroma";
matthiasm@0 439 d5.name = "Bass Chromagram";
matthiasm@0 440 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
matthiasm@0 441 d5.unit = "";
matthiasm@0 442 d5.hasFixedBinCount = true;
matthiasm@0 443 d5.binCount = 12;
matthiasm@0 444 d5.binNames = chromanames;
matthiasm@0 445 d5.hasKnownExtents = false;
matthiasm@0 446 d5.isQuantized = false;
matthiasm@0 447 d5.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 448 d5.hasDuration = false;
matthiasm@0 449 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 450 list.push_back(d5);
matthiasm@0 451
matthiasm@0 452 OutputDescriptor d6;
matthiasm@0 453 d6.identifier = "bothchroma";
matthiasm@0 454 d6.name = "Chromagram and Bass Chromagram";
matthiasm@0 455 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
matthiasm@0 456 d6.unit = "";
matthiasm@0 457 d6.hasFixedBinCount = true;
matthiasm@0 458 d6.binCount = 24;
matthiasm@0 459 d6.binNames = bothchromanames;
matthiasm@0 460 d6.hasKnownExtents = false;
matthiasm@0 461 d6.isQuantized = false;
matthiasm@0 462 d6.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 463 d6.hasDuration = false;
matthiasm@0 464 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 465 list.push_back(d6);
matthiasm@0 466
matthiasm@0 467 OutputDescriptor d7;
matthiasm@0 468 d7.identifier = "simplechord";
matthiasm@0 469 d7.name = "Simple Chord Estimate";
matthiasm@0 470 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
matthiasm@0 471 d7.unit = "";
matthiasm@0 472 d7.hasFixedBinCount = true;
matthiasm@0 473 d7.binCount = 0;
matthiasm@0 474 d7.hasKnownExtents = false;
matthiasm@0 475 d7.isQuantized = false;
matthiasm@0 476 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 477 d7.hasDuration = false;
matthiasm@0 478 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 479 list.push_back(d7);
matthiasm@0 480
Chris@23 481 //
Chris@23 482 // OutputDescriptor d9;
Chris@23 483 // d9.identifier = "inconsistencysegment";
Chris@23 484 // d9.name = "Harmonic inconsistency segmenter";
Chris@23 485 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
Chris@23 486 // d9.unit = "";
Chris@23 487 // d9.hasFixedBinCount = true;
Chris@23 488 // d9.binCount = 0;
Chris@23 489 // d9.hasKnownExtents = true;
Chris@23 490 // d9.minValue = 0.1;
Chris@23 491 // d9.maxValue = 0.9;
Chris@23 492 // d9.isQuantized = false;
Chris@23 493 // d9.sampleType = OutputDescriptor::VariableSampleRate;
Chris@23 494 // d9.hasDuration = false;
Chris@23 495 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
Chris@23 496 // list.push_back(d9);
Chris@23 497 //
Chris@23 498 OutputDescriptor d10;
Chris@23 499 d10.identifier = "localtuning";
Chris@23 500 d10.name = "Local tuning";
Chris@23 501 d10.description = "Tuning based on the history up to this timestamp.";
Chris@23 502 d10.unit = "Hz";
Chris@23 503 d10.hasFixedBinCount = true;
Chris@23 504 d10.binCount = 1;
Chris@23 505 d10.hasKnownExtents = true;
Chris@23 506 d10.minValue = 427.47;
Chris@23 507 d10.maxValue = 452.89;
Chris@23 508 d10.isQuantized = false;
Chris@23 509 d10.sampleType = OutputDescriptor::FixedSampleRate;
Chris@23 510 d10.hasDuration = false;
Chris@23 511 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
Chris@23 512 list.push_back(d10);
matthiasm@17 513
Chris@23 514 OutputDescriptor d8;
matthiasm@17 515 d8.identifier = "harmonicchange";
matthiasm@17 516 d8.name = "Harmonic change value";
matthiasm@17 517 d8.description = "Harmonic change.";
matthiasm@17 518 d8.unit = "";
matthiasm@17 519 d8.hasFixedBinCount = true;
matthiasm@17 520 d8.binCount = 1;
matthiasm@17 521 d8.hasKnownExtents = true;
Chris@23 522 d8.minValue = 0.0;
Chris@23 523 d8.maxValue = 0.999;
matthiasm@17 524 d8.isQuantized = false;
matthiasm@17 525 d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@17 526 d8.hasDuration = false;
matthiasm@17 527 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@17 528 list.push_back(d8);
matthiasm@1 529
matthiasm@0 530 return list;
matthiasm@0 531 }
matthiasm@0 532
matthiasm@0 533
matthiasm@0 534 bool
matthiasm@0 535 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 536 {
Chris@23 537 if (debug_on) {
Chris@23 538 cerr << "--> initialise";
Chris@23 539 }
matthiasm@1 540
matthiasm@0 541 if (channels < getMinChannelCount() ||
matthiasm@0 542 channels > getMaxChannelCount()) return false;
matthiasm@0 543 m_blockSize = blockSize;
matthiasm@0 544 m_stepSize = stepSize;
matthiasm@0 545 frameCount = 0;
Chris@23 546 int tempn = 256 * m_blockSize/2;
Chris@23 547 // cerr << "length of tempkernel : " << tempn << endl;
Chris@23 548 float *tempkernel;
matthiasm@1 549
Chris@23 550 tempkernel = new float[tempn];
matthiasm@1 551
Chris@23 552 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
Chris@23 553 m_kernelValue.clear();
Chris@23 554 m_kernelFftIndex.clear();
Chris@23 555 m_kernelNoteIndex.clear();
Chris@23 556 int countNonzero = 0;
Chris@23 557 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
Chris@23 558 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
Chris@23 559 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 560 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
Chris@23 561 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 562 countNonzero++;
Chris@23 563 }
Chris@23 564 m_kernelFftIndex.push_back(iFFT);
Chris@23 565 m_kernelNoteIndex.push_back(iNote);
Chris@23 566 }
Chris@23 567 }
Chris@23 568 }
Chris@23 569 // cerr << "nonzero count : " << countNonzero << endl;
Chris@23 570 delete [] tempkernel;
Chris@23 571 ofstream myfile;
Chris@23 572 myfile.open ("matrix.txt");
matthiasm@3 573 // myfile << "Writing this to a file.\n";
Chris@23 574 for (int i = 0; i < nNote * 84; ++i) {
Chris@23 575 myfile << m_dict[i] << endl;
Chris@23 576 }
matthiasm@3 577 myfile.close();
matthiasm@0 578 return true;
matthiasm@0 579 }
matthiasm@0 580
matthiasm@0 581 void
matthiasm@0 582 NNLSChroma::reset()
matthiasm@0 583 {
Chris@23 584 if (debug_on) cerr << "--> reset";
matthiasm@4 585
matthiasm@0 586 // Clear buffers, reset stored values, etc
Chris@23 587 frameCount = 0;
Chris@23 588 m_dictID = 0;
Chris@23 589 m_fl.clear();
Chris@23 590 m_meanTuning0 = 0;
Chris@23 591 m_meanTuning1 = 0;
Chris@23 592 m_meanTuning2 = 0;
Chris@23 593 m_localTuning0 = 0;
Chris@23 594 m_localTuning1 = 0;
Chris@23 595 m_localTuning2 = 0;
Chris@23 596 m_localTuning.clear();
matthiasm@0 597 }
matthiasm@0 598
matthiasm@0 599 NNLSChroma::FeatureSet
matthiasm@0 600 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 601 {
Chris@23 602 if (debug_on) cerr << "--> process" << endl;
Chris@23 603 frameCount++;
Chris@23 604 float *magnitude = new float[m_blockSize/2];
matthiasm@0 605
Chris@23 606 Feature f10; // local tuning
Chris@23 607 f10.hasTimestamp = true;
Chris@23 608 f10.timestamp = timestamp;
Chris@23 609 const float *fbuf = inputBuffers[0];
Chris@23 610 float energysum = 0;
Chris@23 611 // make magnitude
Chris@23 612 float maxmag = -10000;
Chris@23 613 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 614 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
Chris@23 615 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
Chris@23 616 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
Chris@23 617 if (m_rollon > 0) {
Chris@23 618 energysum += pow(magnitude[iBin],2);
Chris@23 619 }
Chris@23 620 }
matthiasm@14 621
Chris@23 622 float cumenergy = 0;
Chris@23 623 if (m_rollon > 0) {
Chris@23 624 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
Chris@23 625 cumenergy += pow(magnitude[iBin],2);
Chris@23 626 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
Chris@23 627 else break;
Chris@23 628 }
Chris@23 629 }
matthiasm@17 630
Chris@23 631 if (maxmag < 2) {
Chris@23 632 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
Chris@23 633 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 634 magnitude[iBin] = 0;
Chris@23 635 }
Chris@23 636 }
matthiasm@4 637
Chris@23 638 // note magnitude mapping using pre-calculated matrix
Chris@23 639 float *nm = new float[nNote]; // note magnitude
Chris@23 640 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 641 nm[iNote] = 0; // initialise as 0
Chris@23 642 }
Chris@23 643 int binCount = 0;
Chris@23 644 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
Chris@23 645 // cerr << ".";
Chris@23 646 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
Chris@23 647 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
Chris@23 648 binCount++;
Chris@23 649 }
Chris@23 650 // cerr << nm[20];
Chris@23 651 // cerr << endl;
matthiasm@0 652
matthiasm@0 653
matthiasm@0 654 float one_over_N = 1.0/frameCount;
matthiasm@0 655 // update means of complex tuning variables
matthiasm@0 656 m_meanTuning0 *= float(frameCount-1)*one_over_N;
matthiasm@0 657 m_meanTuning1 *= float(frameCount-1)*one_over_N;
matthiasm@0 658 m_meanTuning2 *= float(frameCount-1)*one_over_N;
matthiasm@0 659
matthiasm@0 660 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
matthiasm@0 661 m_meanTuning0 += nm[iTone + 0]*one_over_N;
matthiasm@0 662 m_meanTuning1 += nm[iTone + 1]*one_over_N;
matthiasm@0 663 m_meanTuning2 += nm[iTone + 2]*one_over_N;
Chris@23 664 float ratioOld = 0.997;
matthiasm@3 665 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
matthiasm@3 666 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
matthiasm@3 667 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
matthiasm@0 668 }
matthiasm@0 669
matthiasm@0 670 // if (m_tuneLocal) {
Chris@23 671 // local tuning
Chris@23 672 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
Chris@23 673 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
Chris@23 674 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
Chris@23 675 m_localTuning.push_back(normalisedtuning);
Chris@23 676 float tuning440 = 440 * pow(2,normalisedtuning/12);
Chris@23 677 f10.values.push_back(tuning440);
Chris@23 678 // cerr << tuning440 << endl;
matthiasm@0 679 // }
matthiasm@0 680
Chris@23 681 Feature f1; // logfreqspec
Chris@23 682 f1.hasTimestamp = true;
matthiasm@0 683 f1.timestamp = timestamp;
Chris@23 684 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 685 f1.values.push_back(nm[iNote]);
Chris@23 686 }
matthiasm@0 687
Chris@23 688 FeatureSet fs;
Chris@23 689 fs[1].push_back(f1);
matthiasm@3 690 fs[8].push_back(f10);
matthiasm@0 691
matthiasm@0 692 // deletes
matthiasm@0 693 delete[] magnitude;
matthiasm@0 694 delete[] nm;
matthiasm@0 695
matthiasm@0 696 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
Chris@23 697 char * pPath;
Chris@23 698 pPath = getenv ("VAMP_PATH");
matthiasm@7 699
matthiasm@7 700
Chris@23 701 return fs;
matthiasm@0 702 }
matthiasm@0 703
matthiasm@0 704 NNLSChroma::FeatureSet
matthiasm@0 705 NNLSChroma::getRemainingFeatures()
matthiasm@0 706 {
Chris@23 707 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 708 FeatureSet fsOut;
Chris@23 709 if (m_fl.size() == 0) return fsOut;
Chris@23 710 int nChord = m_chordnames.size();
Chris@23 711 //
Chris@23 712 /** Calculate Tuning
Chris@23 713 calculate tuning from (using the angle of the complex number defined by the
Chris@23 714 cumulative mean real and imag values)
Chris@23 715 **/
Chris@23 716 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
Chris@23 717 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
Chris@23 718 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 719 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 720 int intShift = floor(normalisedtuning * 3);
Chris@23 721 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 722
Chris@23 723 char buffer0 [50];
matthiasm@1 724
Chris@23 725 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 726
Chris@23 727 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1 728
Chris@23 729 // push tuning to FeatureSet fsOut
Chris@23 730 Feature f0; // tuning
Chris@23 731 f0.hasTimestamp = true;
Chris@23 732 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
Chris@23 733 f0.label = buffer0;
Chris@23 734 fsOut[0].push_back(f0);
matthiasm@1 735
Chris@23 736 /** Tune Log-Frequency Spectrogram
Chris@23 737 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
Chris@23 738 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
Chris@23 739 **/
Chris@23 740 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 741
Chris@23 742 float tempValue = 0;
Chris@23 743 float dbThreshold = 0; // relative to the background spectrum
Chris@23 744 float thresh = pow(10,dbThreshold/20);
Chris@23 745 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23 746 int count = 0;
matthiasm@1 747
Chris@23 748 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
Chris@23 749 Feature f1 = *i;
Chris@23 750 Feature f2; // tuned log-frequency spectrum
Chris@23 751 f2.hasTimestamp = true;
Chris@23 752 f2.timestamp = f1.timestamp;
Chris@23 753 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1 754
Chris@23 755 if (m_tuneLocal) {
Chris@23 756 intShift = floor(m_localTuning[count] * 3);
Chris@23 757 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
Chris@23 758 }
matthiasm@1 759
Chris@23 760 // cerr << intShift << " " << intFactor << endl;
matthiasm@1 761
Chris@23 762 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
Chris@23 763 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
Chris@23 764 f2.values.push_back(tempValue);
Chris@23 765 }
matthiasm@1 766
Chris@23 767 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
Chris@23 768 vector<float> runningmean = SpecialConvolution(f2.values,hw);
Chris@23 769 vector<float> runningstd;
Chris@23 770 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
Chris@23 771 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
Chris@23 772 }
Chris@23 773 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
Chris@23 774 for (int i = 0; i < 256; i++) {
Chris@23 775 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 776 if (runningstd[i] > 0) {
Chris@23 777 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
Chris@23 778 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
Chris@23 779 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
Chris@23 780 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
Chris@23 781 }
Chris@23 782 if (f2.values[i] < 0) {
Chris@23 783 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 784 }
Chris@23 785 }
Chris@23 786 fsOut[2].push_back(f2);
Chris@23 787 count++;
Chris@23 788 }
Chris@23 789 cerr << "done." << endl;
matthiasm@1 790
Chris@23 791 /** Semitone spectrum and chromagrams
Chris@23 792 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 793 is inferred using a non-negative least squares algorithm.
Chris@23 794 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 795 bass and treble stacked onto each other).
Chris@23 796 **/
Chris@23 797 if (m_dictID == 1) {
Chris@23 798 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 799 } else {
Chris@23 800 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 801 }
matthiasm@13 802
matthiasm@1 803
Chris@23 804 vector<vector<float> > chordogram;
Chris@23 805 vector<vector<int> > scoreChordogram;
Chris@23 806 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
Chris@23 807 vector<float> oldchroma = vector<float>(12,0);
Chris@23 808 vector<float> oldbasschroma = vector<float>(12,0);
Chris@23 809 count = 0;
matthiasm@9 810
Chris@23 811 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
Chris@23 812 Feature f2 = *it; // logfreq spectrum
Chris@23 813 Feature f3; // semitone spectrum
Chris@23 814 Feature f4; // treble chromagram
Chris@23 815 Feature f5; // bass chromagram
Chris@23 816 Feature f6; // treble and bass chromagram
matthiasm@1 817
Chris@23 818 f3.hasTimestamp = true;
Chris@23 819 f3.timestamp = f2.timestamp;
matthiasm@1 820
Chris@23 821 f4.hasTimestamp = true;
Chris@23 822 f4.timestamp = f2.timestamp;
matthiasm@1 823
Chris@23 824 f5.hasTimestamp = true;
Chris@23 825 f5.timestamp = f2.timestamp;
matthiasm@1 826
Chris@23 827 f6.hasTimestamp = true;
Chris@23 828 f6.timestamp = f2.timestamp;
matthiasm@1 829
Chris@23 830 double b[256];
matthiasm@1 831
Chris@23 832 bool some_b_greater_zero = false;
Chris@23 833 float sumb = 0;
Chris@23 834 for (int i = 0; i < 256; i++) {
Chris@23 835 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
Chris@23 836 b[i] = f2.values[i];
Chris@23 837 sumb += b[i];
Chris@23 838 if (b[i] > 0) {
Chris@23 839 some_b_greater_zero = true;
Chris@23 840 }
Chris@23 841 }
matthiasm@1 842
Chris@23 843 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 844
Chris@23 845 vector<float> chroma = vector<float>(12, 0);
Chris@23 846 vector<float> basschroma = vector<float>(12, 0);
Chris@23 847 float currval;
Chris@23 848 unsigned iSemitone = 0;
matthiasm@1 849
Chris@23 850 if (some_b_greater_zero) {
Chris@23 851 if (m_dictID == 1) {
Chris@23 852 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 853 currval = 0;
Chris@23 854 currval += b[iNote + 1 + -1] * 0.5;
Chris@23 855 currval += b[iNote + 1 + 0] * 1.0;
Chris@23 856 currval += b[iNote + 1 + 1] * 0.5;
Chris@23 857 f3.values.push_back(currval);
Chris@23 858 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 859 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 860 iSemitone++;
Chris@23 861 }
matthiasm@1 862
Chris@23 863 } else {
Chris@23 864 double x[84+1000];
Chris@23 865 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 866 vector<int> signifIndex;
Chris@23 867 int index=0;
Chris@23 868 sumb /= 84.0;
Chris@23 869 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 870 float currval = 0;
Chris@23 871 currval += b[iNote + 1 + -1];
Chris@23 872 currval += b[iNote + 1 + 0];
Chris@23 873 currval += b[iNote + 1 + 1];
Chris@23 874 if (currval > 0) signifIndex.push_back(index);
Chris@23 875 f3.values.push_back(0); // fill the values, change later
Chris@23 876 index++;
Chris@23 877 }
Chris@23 878 double rnorm;
Chris@23 879 double w[84+1000];
Chris@23 880 double zz[84+1000];
Chris@23 881 int indx[84+1000];
Chris@23 882 int mode;
Chris@23 883 int dictsize = 256*signifIndex.size();
Chris@23 884 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@23 885 double *curr_dict = new double[dictsize];
Chris@23 886 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 887 for (unsigned iBin = 0; iBin < 256; iBin++) {
Chris@23 888 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
Chris@23 889 }
Chris@23 890 }
Chris@23 891 int sz = signifIndex.size();
Chris@23 892 int nn = nNote;
Chris@23 893 NNLS(curr_dict, &nn, &nn, &sz, b, x, &rnorm, w, zz, indx, &mode);
Chris@23 894 delete [] curr_dict;
Chris@23 895 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 896 f3.values[signifIndex[iNote]] = x[iNote];
Chris@23 897 // cerr << mode << endl;
Chris@23 898 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 899 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 900 }
Chris@23 901 }
Chris@23 902 }
matthiasm@13 903
matthiasm@10 904
matthiasm@12 905
matthiasm@13 906
Chris@23 907 f4.values = chroma;
Chris@23 908 f5.values = basschroma;
Chris@23 909 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
Chris@23 910 f6.values = chroma;
matthiasm@1 911
Chris@23 912 if (m_doNormalizeChroma > 0) {
Chris@23 913 vector<float> chromanorm = vector<float>(3,0);
Chris@23 914 switch (int(m_doNormalizeChroma)) {
Chris@23 915 case 0: // should never end up here
Chris@23 916 break;
Chris@23 917 case 1:
Chris@23 918 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
Chris@23 919 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
Chris@23 920 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 921 break;
Chris@23 922 case 2:
Chris@23 923 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 924 chromanorm[0] += *it;
Chris@23 925 }
Chris@23 926 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 927 chromanorm[1] += *it;
Chris@23 928 }
Chris@23 929 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 930 chromanorm[2] += *it;
Chris@23 931 }
Chris@23 932 break;
Chris@23 933 case 3:
Chris@23 934 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 935 chromanorm[0] += pow(*it,2);
Chris@23 936 }
Chris@23 937 chromanorm[0] = sqrt(chromanorm[0]);
Chris@23 938 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 939 chromanorm[1] += pow(*it,2);
Chris@23 940 }
Chris@23 941 chromanorm[1] = sqrt(chromanorm[1]);
Chris@23 942 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 943 chromanorm[2] += pow(*it,2);
Chris@23 944 }
Chris@23 945 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 946 break;
Chris@23 947 }
Chris@23 948 if (chromanorm[0] > 0) {
Chris@23 949 for (int i = 0; i < f4.values.size(); i++) {
Chris@23 950 f4.values[i] /= chromanorm[0];
Chris@23 951 }
Chris@23 952 }
Chris@23 953 if (chromanorm[1] > 0) {
Chris@23 954 for (int i = 0; i < f5.values.size(); i++) {
Chris@23 955 f5.values[i] /= chromanorm[1];
Chris@23 956 }
Chris@23 957 }
Chris@23 958 if (chromanorm[2] > 0) {
Chris@23 959 for (int i = 0; i < f6.values.size(); i++) {
Chris@23 960 f6.values[i] /= chromanorm[2];
Chris@23 961 }
Chris@23 962 }
matthiasm@13 963
Chris@23 964 }
matthiasm@13 965
Chris@23 966 // local chord estimation
Chris@23 967 vector<float> currentChordSalience;
Chris@23 968 float tempchordvalue = 0;
Chris@23 969 float sumchordvalue = 0;
matthiasm@9 970
Chris@23 971 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 972 tempchordvalue = 0;
Chris@23 973 for (int iBin = 0; iBin < 12; iBin++) {
Chris@23 974 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 975 }
Chris@23 976 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 977 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 978 }
Chris@23 979 sumchordvalue+=tempchordvalue;
Chris@23 980 currentChordSalience.push_back(tempchordvalue);
Chris@23 981 }
Chris@23 982 if (sumchordvalue > 0) {
Chris@23 983 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 984 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 985 }
Chris@23 986 } else {
Chris@23 987 currentChordSalience[nChord-1] = 1.0;
Chris@23 988 }
Chris@23 989 chordogram.push_back(currentChordSalience);
matthiasm@1 990
Chris@23 991 fsOut[3].push_back(f3);
Chris@23 992 fsOut[4].push_back(f4);
Chris@23 993 fsOut[5].push_back(f5);
Chris@23 994 fsOut[6].push_back(f6);
Chris@23 995 count++;
Chris@23 996 }
Chris@23 997 cerr << "done." << endl;
matthiasm@13 998
matthiasm@10 999
Chris@23 1000 /* Simple chord estimation
Chris@23 1001 I just take the local chord estimates ("currentChordSalience") and average them over time, then
Chris@23 1002 take the maximum. Very simple, don't do this at home...
Chris@23 1003 */
Chris@23 1004 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
Chris@23 1005 count = 0;
Chris@23 1006 int halfwindowlength = m_inputSampleRate / m_stepSize;
Chris@23 1007 vector<int> chordSequence;
Chris@23 1008 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
Chris@23 1009 vector<int> temp = vector<int>(nChord,0);
Chris@23 1010 scoreChordogram.push_back(temp);
Chris@23 1011 }
Chris@23 1012 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
Chris@23 1013 int startIndex = count + 1;
Chris@23 1014 int endIndex = count + 2 * halfwindowlength;
matthiasm@10 1015
Chris@23 1016 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@10 1017
Chris@23 1018 vector<int> chordCandidates;
Chris@23 1019 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
Chris@23 1020 // float currsum = 0;
Chris@23 1021 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 1022 // currsum += chordogram[iFrame][iChord];
Chris@23 1023 // }
Chris@23 1024 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
Chris@23 1025 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 1026 if (chordogram[iFrame][iChord] > chordThreshold) {
Chris@23 1027 chordCandidates.push_back(iChord);
Chris@23 1028 break;
Chris@23 1029 }
Chris@23 1030 }
Chris@23 1031 }
Chris@23 1032 chordCandidates.push_back(nChord-1);
Chris@23 1033 // cerr << chordCandidates.size() << endl;
Chris@23 1034
Chris@23 1035 float maxval = 0; // will be the value of the most salient *chord change* in this frame
Chris@23 1036 float maxindex = 0; //... and the index thereof
Chris@23 1037 unsigned bestchordL = nChord-1; // index of the best "left" chord
Chris@23 1038 unsigned bestchordR = nChord-1; // index of the best "right" chord
Chris@23 1039
Chris@23 1040 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
Chris@23 1041 // now find the max values on both sides of iWF
Chris@23 1042 // left side:
Chris@23 1043 float maxL = 0;
Chris@23 1044 unsigned maxindL = nChord-1;
Chris@23 1045 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 1046 unsigned iChord = chordCandidates[kChord];
Chris@23 1047 float currsum = 0;
Chris@23 1048 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
Chris@23 1049 currsum += chordogram[count+iFrame][iChord];
matthiasm@10 1050 }
Chris@23 1051 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 1052 if (currsum > maxL) {
Chris@23 1053 maxL = currsum;
Chris@23 1054 maxindL = iChord;
Chris@23 1055 }
Chris@23 1056 }
Chris@23 1057 // right side:
Chris@23 1058 float maxR = 0;
Chris@23 1059 unsigned maxindR = nChord-1;
Chris@23 1060 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 1061 unsigned iChord = chordCandidates[kChord];
Chris@23 1062 float currsum = 0;
Chris@23 1063 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 1064 currsum += chordogram[count+iFrame][iChord];
Chris@23 1065 }
Chris@23 1066 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 1067 if (currsum > maxR) {
Chris@23 1068 maxR = currsum;
Chris@23 1069 maxindR = iChord;
Chris@23 1070 }
Chris@23 1071 }
Chris@23 1072 if (maxL+maxR > maxval) {
Chris@23 1073 maxval = maxL+maxR;
Chris@23 1074 maxindex = iWF;
Chris@23 1075 bestchordL = maxindL;
Chris@23 1076 bestchordR = maxindR;
Chris@23 1077 }
matthiasm@3 1078
Chris@23 1079 }
Chris@23 1080 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
Chris@23 1081 // add a score to every chord-frame-point that was part of a maximum
Chris@23 1082 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
Chris@23 1083 scoreChordogram[iFrame+count][bestchordL]++;
Chris@23 1084 }
Chris@23 1085 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 1086 scoreChordogram[iFrame+count][bestchordR]++;
Chris@23 1087 }
Chris@23 1088 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
Chris@23 1089 count++;
Chris@23 1090 }
Chris@23 1091 // cerr << "******* agent finished *******" << endl;
Chris@23 1092 count = 0;
Chris@23 1093 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 1094 float maxval = 0; // will be the value of the most salient chord in this frame
Chris@23 1095 float maxindex = 0; //... and the index thereof
Chris@23 1096 for (unsigned iChord = 0; iChord < nChord; iChord++) {
Chris@23 1097 if (scoreChordogram[count][iChord] > maxval) {
Chris@23 1098 maxval = scoreChordogram[count][iChord];
Chris@23 1099 maxindex = iChord;
Chris@23 1100 // cerr << iChord << endl;
Chris@23 1101 }
Chris@23 1102 }
Chris@23 1103 chordSequence.push_back(maxindex);
Chris@23 1104 // cerr << "before modefilter, maxindex: " << maxindex << endl;
Chris@23 1105 count++;
Chris@23 1106 }
Chris@23 1107 // cerr << "******* mode filter done *******" << endl;
matthiasm@10 1108
matthiasm@3 1109
Chris@23 1110 // mode filter on chordSequence
Chris@23 1111 count = 0;
Chris@23 1112 string oldChord = "";
Chris@23 1113 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 1114 Feature f6 = *it;
Chris@23 1115 Feature f7; // chord estimate
Chris@23 1116 f7.hasTimestamp = true;
Chris@23 1117 f7.timestamp = f6.timestamp;
Chris@23 1118 Feature f8; // chord estimate
Chris@23 1119 f8.hasTimestamp = true;
Chris@23 1120 f8.timestamp = f6.timestamp;
matthiasm@17 1121
Chris@23 1122 vector<int> chordCount = vector<int>(nChord,0);
Chris@23 1123 int maxChordCount = 0;
Chris@23 1124 int maxChordIndex = nChord-1;
Chris@23 1125 string maxChord;
Chris@23 1126 int startIndex = max(count - halfwindowlength/2,0);
Chris@23 1127 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
Chris@23 1128 for (int i = startIndex; i < endIndex; i++) {
Chris@23 1129 chordCount[chordSequence[i]]++;
Chris@23 1130 if (chordCount[chordSequence[i]] > maxChordCount) {
Chris@23 1131 // cerr << "start index " << startIndex << endl;
Chris@23 1132 maxChordCount++;
Chris@23 1133 maxChordIndex = chordSequence[i];
Chris@23 1134 maxChord = m_chordnames[maxChordIndex];
Chris@23 1135 }
Chris@23 1136 }
Chris@23 1137 // chordSequence[count] = maxChordIndex;
Chris@23 1138 // cerr << maxChordIndex << endl;
Chris@23 1139 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
Chris@23 1140 // cerr << chordchange[count] << endl;
Chris@23 1141 fsOut[9].push_back(f8);
Chris@23 1142 if (oldChord != maxChord) {
Chris@23 1143 oldChord = maxChord;
matthiasm@3 1144
Chris@23 1145 // char buffer1 [50];
Chris@23 1146 // if (maxChordIndex < nChord - 1) {
Chris@23 1147 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
Chris@23 1148 // } else {
Chris@23 1149 // sprintf(buffer1, "N");
Chris@23 1150 // }
Chris@23 1151 // f7.label = buffer1;
Chris@23 1152 f7.label = m_chordnames[maxChordIndex];
Chris@23 1153 fsOut[7].push_back(f7);
Chris@23 1154 }
Chris@23 1155 count++;
Chris@23 1156 }
Chris@23 1157 Feature f7; // last chord estimate
Chris@23 1158 f7.hasTimestamp = true;
Chris@23 1159 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
Chris@23 1160 f7.label = "N";
Chris@23 1161 fsOut[7].push_back(f7);
Chris@23 1162 cerr << "done." << endl;
Chris@23 1163 // // musicity
Chris@23 1164 // count = 0;
Chris@23 1165 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
Chris@23 1166 // vector<float> musicityValue;
Chris@23 1167 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 1168 // Feature f4 = *it;
Chris@23 1169 //
Chris@23 1170 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 1171 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 1172 // float chromasum = 0;
Chris@23 1173 // float diffsum = 0;
Chris@23 1174 // for (int k = 0; k < 12; k++) {
Chris@23 1175 // for (int i = startIndex + 1; i < endIndex; i++) {
Chris@23 1176 // chromasum += pow(fsOut[4][i].values[k],2);
Chris@23 1177 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
Chris@23 1178 // }
Chris@23 1179 // }
Chris@23 1180 // diffsum /= chromasum;
Chris@23 1181 // musicityValue.push_back(diffsum);
Chris@23 1182 // count++;
Chris@23 1183 // }
Chris@23 1184 //
Chris@23 1185 // float musicityThreshold = 0.44;
Chris@23 1186 // if (m_stepSize == 4096) {
Chris@23 1187 // musicityThreshold = 0.74;
Chris@23 1188 // }
Chris@23 1189 // if (m_stepSize == 4410) {
Chris@23 1190 // musicityThreshold = 0.77;
Chris@23 1191 // }
Chris@23 1192 //
Chris@23 1193 // count = 0;
Chris@23 1194 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 1195 // Feature f4 = *it;
Chris@23 1196 // Feature f8; // musicity
Chris@23 1197 // Feature f9; // musicity segmenter
Chris@23 1198 //
Chris@23 1199 // f8.hasTimestamp = true;
Chris@23 1200 // f8.timestamp = f4.timestamp;
Chris@23 1201 // f9.hasTimestamp = true;
Chris@23 1202 // f9.timestamp = f4.timestamp;
Chris@23 1203 //
Chris@23 1204 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 1205 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 1206 // int musicityCount = 0;
Chris@23 1207 // for (int i = startIndex; i <= endIndex; i++) {
Chris@23 1208 // if (musicityValue[i] > musicityThreshold) musicityCount++;
Chris@23 1209 // }
Chris@23 1210 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
Chris@23 1211 //
Chris@23 1212 // if (isSpeech) {
Chris@23 1213 // if (oldlabeltype != 2) {
Chris@23 1214 // f9.label = "Speech";
Chris@23 1215 // fsOut[9].push_back(f9);
Chris@23 1216 // oldlabeltype = 2;
Chris@23 1217 // }
Chris@23 1218 // } else {
Chris@23 1219 // if (oldlabeltype != 1) {
Chris@23 1220 // f9.label = "Music";
Chris@23 1221 // fsOut[9].push_back(f9);
Chris@23 1222 // oldlabeltype = 1;
Chris@23 1223 // }
Chris@23 1224 // }
Chris@23 1225 // f8.values.push_back(musicityValue[count]);
Chris@23 1226 // fsOut[8].push_back(f8);
Chris@23 1227 // count++;
Chris@23 1228 // }
Chris@23 1229 return fsOut;
matthiasm@0 1230
matthiasm@0 1231 }
matthiasm@0 1232