annotate NNLSChroma.cpp @ 29:da3195577172 matthiasm-plugin

* Revert to the previous version of nnls.c[c], which used single-precision floats (better for us). But we don't like the static local variables -- change those for thread-safety.
author Chris Cannam
date Thu, 21 Oct 2010 20:50:22 +0100
parents 690bd9148467
children cf8898a0174c
rev   line source
Chris@23 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
matthiasm@0 2
matthiasm@0 3 #include "NNLSChroma.h"
Chris@27 4
Chris@27 5 #include "chromamethods.h"
Chris@27 6
Chris@27 7 #include <cstdlib>
Chris@27 8 #include <fstream>
matthiasm@0 9 #include <cmath>
matthiasm@9 10
Chris@27 11 #include <algorithm>
matthiasm@0 12
matthiasm@0 13 const bool debug_on = false;
matthiasm@0 14
Chris@27 15 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0 16
matthiasm@0 17 NNLSChroma::NNLSChroma(float inputSampleRate) :
Chris@23 18 Plugin(inputSampleRate),
Chris@23 19 m_fl(0),
Chris@23 20 m_blockSize(0),
Chris@23 21 m_stepSize(0),
Chris@23 22 m_lengthOfNoteIndex(0),
Chris@23 23 m_meanTuning0(0),
Chris@23 24 m_meanTuning1(0),
Chris@23 25 m_meanTuning2(0),
Chris@23 26 m_localTuning0(0),
Chris@23 27 m_localTuning1(0),
Chris@23 28 m_localTuning2(0),
Chris@23 29 m_paling(1.0),
Chris@23 30 m_preset(0.0),
Chris@23 31 m_localTuning(0),
Chris@23 32 m_kernelValue(0),
Chris@23 33 m_kernelFftIndex(0),
Chris@23 34 m_kernelNoteIndex(0),
Chris@23 35 m_dict(0),
Chris@23 36 m_tuneLocal(false),
Chris@23 37 m_dictID(0),
Chris@23 38 m_chorddict(0),
Chris@23 39 m_chordnames(0),
Chris@23 40 m_doNormalizeChroma(0),
Chris@23 41 m_rollon(0.01)
matthiasm@0 42 {
Chris@23 43 if (debug_on) cerr << "--> NNLSChroma" << endl;
matthiasm@7 44
Chris@23 45 // make the *note* dictionary matrix
Chris@23 46 m_dict = new float[nNote * 84];
Chris@23 47 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
Chris@23 48 dictionaryMatrix(m_dict);
matthiasm@7 49
Chris@23 50 // get the *chord* dictionary from file (if the file exists)
Chris@23 51 m_chordnames = chordDictionary(&m_chorddict);
matthiasm@0 52 }
matthiasm@0 53
matthiasm@0 54
matthiasm@0 55 NNLSChroma::~NNLSChroma()
matthiasm@0 56 {
Chris@23 57 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
Chris@23 58 delete [] m_dict;
Chris@23 59 // delete [] m_chorddict;
Chris@23 60 // delete m_chordnames;
matthiasm@0 61 }
matthiasm@0 62
matthiasm@0 63 string
matthiasm@0 64 NNLSChroma::getIdentifier() const
matthiasm@0 65 {
Chris@23 66 if (debug_on) cerr << "--> getIdentifier" << endl;
matthiasm@0 67 return "nnls_chroma";
matthiasm@0 68 }
matthiasm@0 69
matthiasm@0 70 string
matthiasm@0 71 NNLSChroma::getName() const
matthiasm@0 72 {
Chris@23 73 if (debug_on) cerr << "--> getName" << endl;
matthiasm@0 74 return "NNLS Chroma";
matthiasm@0 75 }
matthiasm@0 76
matthiasm@0 77 string
matthiasm@0 78 NNLSChroma::getDescription() const
matthiasm@0 79 {
matthiasm@0 80 // Return something helpful here!
Chris@23 81 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@13 82 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate.";
matthiasm@0 83 }
matthiasm@0 84
matthiasm@0 85 string
matthiasm@0 86 NNLSChroma::getMaker() const
matthiasm@0 87 {
Chris@23 88 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0 89 // Your name here
matthiasm@0 90 return "Matthias Mauch";
matthiasm@0 91 }
matthiasm@0 92
matthiasm@0 93 int
matthiasm@0 94 NNLSChroma::getPluginVersion() const
matthiasm@0 95 {
Chris@23 96 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0 97 // Increment this each time you release a version that behaves
matthiasm@0 98 // differently from the previous one
matthiasm@0 99 return 1;
matthiasm@0 100 }
matthiasm@0 101
matthiasm@0 102 string
matthiasm@0 103 NNLSChroma::getCopyright() const
matthiasm@0 104 {
Chris@23 105 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0 106 // This function is not ideally named. It does not necessarily
matthiasm@0 107 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0 108 // should indicate the terms under which it is distributed. For
matthiasm@0 109 // example, "Copyright (year). All Rights Reserved", or "GPL"
matthiasm@0 110 return "Copyright (2010). All rights reserved.";
matthiasm@0 111 }
matthiasm@0 112
matthiasm@0 113 NNLSChroma::InputDomain
matthiasm@0 114 NNLSChroma::getInputDomain() const
matthiasm@0 115 {
Chris@23 116 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0 117 return FrequencyDomain;
matthiasm@0 118 }
matthiasm@0 119
matthiasm@0 120 size_t
matthiasm@0 121 NNLSChroma::getPreferredBlockSize() const
matthiasm@0 122 {
Chris@23 123 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0 124 return 16384; // 0 means "I can handle any block size"
matthiasm@0 125 }
matthiasm@0 126
matthiasm@0 127 size_t
matthiasm@0 128 NNLSChroma::getPreferredStepSize() const
matthiasm@0 129 {
Chris@23 130 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0 131 return 2048; // 0 means "anything sensible"; in practice this
Chris@23 132 // means the same as the block size for TimeDomain
Chris@23 133 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0 134 }
matthiasm@0 135
matthiasm@0 136 size_t
matthiasm@0 137 NNLSChroma::getMinChannelCount() const
matthiasm@0 138 {
Chris@23 139 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0 140 return 1;
matthiasm@0 141 }
matthiasm@0 142
matthiasm@0 143 size_t
matthiasm@0 144 NNLSChroma::getMaxChannelCount() const
matthiasm@0 145 {
Chris@23 146 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0 147 return 1;
matthiasm@0 148 }
matthiasm@0 149
matthiasm@0 150 NNLSChroma::ParameterList
matthiasm@0 151 NNLSChroma::getParameterDescriptors() const
matthiasm@0 152 {
Chris@23 153 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0 154 ParameterList list;
matthiasm@0 155
matthiasm@3 156 ParameterDescriptor d3;
matthiasm@3 157 d3.identifier = "preset";
matthiasm@3 158 d3.name = "preset";
matthiasm@3 159 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
matthiasm@3 160 d3.unit = "";
Chris@23 161 d3.isQuantized = true;
Chris@23 162 d3.quantizeStep = 1;
matthiasm@3 163 d3.minValue = 0.0;
matthiasm@4 164 d3.maxValue = 3.0;
matthiasm@3 165 d3.defaultValue = 0.0;
matthiasm@3 166 d3.valueNames.push_back("polyphonic pop");
Chris@23 167 d3.valueNames.push_back("polyphonic pop (fast)");
matthiasm@3 168 d3.valueNames.push_back("solo keyboard");
Chris@23 169 d3.valueNames.push_back("manual");
matthiasm@3 170 list.push_back(d3);
matthiasm@4 171
matthiasm@17 172 ParameterDescriptor d5;
Chris@23 173 d5.identifier = "rollon";
Chris@23 174 d5.name = "spectral roll-on";
Chris@23 175 d5.description = "The bins below the spectral roll-on quantile will be set to 0.";
Chris@23 176 d5.unit = "";
Chris@23 177 d5.minValue = 0;
Chris@23 178 d5.maxValue = 1;
Chris@23 179 d5.defaultValue = 0;
Chris@23 180 d5.isQuantized = false;
Chris@23 181 list.push_back(d5);
matthiasm@17 182
matthiasm@4 183 // ParameterDescriptor d0;
matthiasm@4 184 // d0.identifier = "notedict";
matthiasm@4 185 // d0.name = "note dictionary";
matthiasm@4 186 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
matthiasm@4 187 // d0.unit = "";
matthiasm@4 188 // d0.minValue = 0;
matthiasm@4 189 // d0.maxValue = 1;
matthiasm@4 190 // d0.defaultValue = 0;
matthiasm@4 191 // d0.isQuantized = true;
matthiasm@4 192 // d0.valueNames.push_back("s = 0.6");
matthiasm@4 193 // d0.valueNames.push_back("no NNLS");
matthiasm@4 194 // d0.quantizeStep = 1.0;
matthiasm@4 195 // list.push_back(d0);
matthiasm@4 196
matthiasm@4 197 ParameterDescriptor d1;
matthiasm@4 198 d1.identifier = "tuningmode";
matthiasm@4 199 d1.name = "tuning mode";
matthiasm@4 200 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@4 201 d1.unit = "";
matthiasm@4 202 d1.minValue = 0;
matthiasm@4 203 d1.maxValue = 1;
matthiasm@4 204 d1.defaultValue = 0;
matthiasm@4 205 d1.isQuantized = true;
matthiasm@4 206 d1.valueNames.push_back("global tuning");
matthiasm@4 207 d1.valueNames.push_back("local tuning");
matthiasm@4 208 d1.quantizeStep = 1.0;
matthiasm@4 209 list.push_back(d1);
matthiasm@4 210
Chris@23 211 // ParameterDescriptor d2;
Chris@23 212 // d2.identifier = "paling";
Chris@23 213 // d2.name = "spectral paling";
Chris@23 214 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
Chris@23 215 // d2.unit = "";
Chris@23 216 // d2.isQuantized = true;
Chris@23 217 // // d2.quantizeStep = 0.1;
Chris@23 218 // d2.minValue = 0.0;
Chris@23 219 // d2.maxValue = 1.0;
Chris@23 220 // d2.defaultValue = 1.0;
Chris@23 221 // d2.isQuantized = false;
Chris@23 222 // list.push_back(d2);
Chris@23 223 ParameterDescriptor d4;
matthiasm@12 224 d4.identifier = "chromanormalize";
matthiasm@12 225 d4.name = "chroma normalization";
matthiasm@12 226 d4.description = "How shall the chroma vector be normalized?";
matthiasm@12 227 d4.unit = "";
matthiasm@12 228 d4.minValue = 0;
matthiasm@13 229 d4.maxValue = 3;
matthiasm@12 230 d4.defaultValue = 0;
matthiasm@12 231 d4.isQuantized = true;
matthiasm@13 232 d4.valueNames.push_back("none");
matthiasm@13 233 d4.valueNames.push_back("maximum norm");
Chris@23 234 d4.valueNames.push_back("L1 norm");
Chris@23 235 d4.valueNames.push_back("L2 norm");
matthiasm@12 236 d4.quantizeStep = 1.0;
matthiasm@12 237 list.push_back(d4);
matthiasm@4 238
matthiasm@0 239 return list;
matthiasm@0 240 }
matthiasm@0 241
matthiasm@0 242 float
matthiasm@0 243 NNLSChroma::getParameter(string identifier) const
matthiasm@0 244 {
Chris@23 245 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@0 246 if (identifier == "notedict") {
matthiasm@0 247 return m_dictID;
matthiasm@0 248 }
matthiasm@0 249
matthiasm@0 250 if (identifier == "paling") {
matthiasm@0 251 return m_paling;
matthiasm@0 252 }
matthiasm@17 253
Chris@23 254 if (identifier == "rollon") {
matthiasm@17 255 return m_rollon;
matthiasm@17 256 }
matthiasm@0 257
matthiasm@0 258 if (identifier == "tuningmode") {
matthiasm@0 259 if (m_tuneLocal) {
matthiasm@0 260 return 1.0;
matthiasm@0 261 } else {
matthiasm@0 262 return 0.0;
matthiasm@0 263 }
matthiasm@0 264 }
Chris@23 265 if (identifier == "preset") {
Chris@23 266 return m_preset;
matthiasm@3 267 }
Chris@23 268 if (identifier == "chromanormalize") {
Chris@23 269 return m_doNormalizeChroma;
matthiasm@12 270 }
matthiasm@0 271 return 0;
matthiasm@0 272
matthiasm@0 273 }
matthiasm@0 274
matthiasm@0 275 void
matthiasm@0 276 NNLSChroma::setParameter(string identifier, float value)
matthiasm@0 277 {
Chris@23 278 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@0 279 if (identifier == "notedict") {
matthiasm@0 280 m_dictID = (int) value;
matthiasm@0 281 }
matthiasm@0 282
matthiasm@0 283 if (identifier == "paling") {
matthiasm@0 284 m_paling = value;
matthiasm@0 285 }
matthiasm@0 286
matthiasm@0 287 if (identifier == "tuningmode") {
matthiasm@0 288 m_tuneLocal = (value > 0) ? true : false;
matthiasm@0 289 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0 290 }
matthiasm@3 291 if (identifier == "preset") {
matthiasm@3 292 m_preset = value;
Chris@23 293 if (m_preset == 0.0) {
Chris@23 294 m_tuneLocal = false;
Chris@23 295 m_paling = 1.0;
Chris@23 296 m_dictID = 0.0;
Chris@23 297 }
Chris@23 298 if (m_preset == 1.0) {
Chris@23 299 m_tuneLocal = false;
Chris@23 300 m_paling = 1.0;
Chris@23 301 m_dictID = 1.0;
Chris@23 302 }
Chris@23 303 if (m_preset == 2.0) {
Chris@23 304 m_tuneLocal = false;
Chris@23 305 m_paling = 0.7;
Chris@23 306 m_dictID = 0.0;
Chris@23 307 }
matthiasm@3 308 }
Chris@23 309 if (identifier == "chromanormalize") {
Chris@23 310 m_doNormalizeChroma = value;
Chris@23 311 }
matthiasm@17 312
Chris@23 313 if (identifier == "rollon") {
Chris@23 314 m_rollon = value;
Chris@23 315 }
matthiasm@0 316 }
matthiasm@0 317
matthiasm@0 318 NNLSChroma::ProgramList
matthiasm@0 319 NNLSChroma::getPrograms() const
matthiasm@0 320 {
Chris@23 321 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0 322 ProgramList list;
matthiasm@0 323
matthiasm@0 324 // If you have no programs, return an empty list (or simply don't
matthiasm@0 325 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0 326
matthiasm@0 327 return list;
matthiasm@0 328 }
matthiasm@0 329
matthiasm@0 330 string
matthiasm@0 331 NNLSChroma::getCurrentProgram() const
matthiasm@0 332 {
Chris@23 333 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0 334 return ""; // no programs
matthiasm@0 335 }
matthiasm@0 336
matthiasm@0 337 void
matthiasm@0 338 NNLSChroma::selectProgram(string name)
matthiasm@0 339 {
Chris@23 340 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0 341 }
matthiasm@0 342
matthiasm@0 343
matthiasm@0 344 NNLSChroma::OutputList
matthiasm@0 345 NNLSChroma::getOutputDescriptors() const
matthiasm@0 346 {
Chris@23 347 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0 348 OutputList list;
matthiasm@0 349
matthiasm@0 350 // Make chroma names for the binNames property
matthiasm@0 351 vector<string> chromanames;
matthiasm@0 352 vector<string> bothchromanames;
matthiasm@0 353 for (int iNote = 0; iNote < 24; iNote++) {
matthiasm@0 354 bothchromanames.push_back(notenames[iNote]);
matthiasm@0 355 if (iNote < 12) {
matthiasm@0 356 chromanames.push_back(notenames[iNote]);
matthiasm@0 357 }
matthiasm@0 358 }
matthiasm@0 359
Chris@23 360 // int nNote = 84;
matthiasm@0 361
matthiasm@0 362 // See OutputDescriptor documentation for the possibilities here.
matthiasm@0 363 // Every plugin must have at least one output.
matthiasm@0 364
matthiasm@0 365 OutputDescriptor d0;
matthiasm@0 366 d0.identifier = "tuning";
matthiasm@0 367 d0.name = "Tuning";
matthiasm@0 368 d0.description = "The concert pitch.";
matthiasm@0 369 d0.unit = "Hz";
matthiasm@0 370 d0.hasFixedBinCount = true;
matthiasm@0 371 d0.binCount = 0;
matthiasm@0 372 d0.hasKnownExtents = true;
Chris@23 373 d0.minValue = 427.47;
Chris@23 374 d0.maxValue = 452.89;
matthiasm@0 375 d0.isQuantized = false;
matthiasm@0 376 d0.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 377 d0.hasDuration = false;
matthiasm@0 378 list.push_back(d0);
matthiasm@0 379
Chris@23 380 OutputDescriptor d1;
matthiasm@0 381 d1.identifier = "logfreqspec";
matthiasm@0 382 d1.name = "Log-Frequency Spectrum";
matthiasm@0 383 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
matthiasm@0 384 d1.unit = "";
matthiasm@0 385 d1.hasFixedBinCount = true;
matthiasm@0 386 d1.binCount = nNote;
matthiasm@0 387 d1.hasKnownExtents = false;
matthiasm@0 388 d1.isQuantized = false;
matthiasm@0 389 d1.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 390 d1.hasDuration = false;
matthiasm@0 391 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 392 list.push_back(d1);
matthiasm@0 393
Chris@23 394 OutputDescriptor d2;
matthiasm@0 395 d2.identifier = "tunedlogfreqspec";
matthiasm@0 396 d2.name = "Tuned Log-Frequency Spectrum";
matthiasm@0 397 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
matthiasm@0 398 d2.unit = "";
matthiasm@0 399 d2.hasFixedBinCount = true;
matthiasm@0 400 d2.binCount = 256;
matthiasm@0 401 d2.hasKnownExtents = false;
matthiasm@0 402 d2.isQuantized = false;
matthiasm@0 403 d2.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 404 d2.hasDuration = false;
matthiasm@0 405 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 406 list.push_back(d2);
matthiasm@0 407
matthiasm@0 408 OutputDescriptor d3;
matthiasm@0 409 d3.identifier = "semitonespectrum";
matthiasm@0 410 d3.name = "Semitone Spectrum";
matthiasm@0 411 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
matthiasm@0 412 d3.unit = "";
matthiasm@0 413 d3.hasFixedBinCount = true;
matthiasm@0 414 d3.binCount = 84;
matthiasm@0 415 d3.hasKnownExtents = false;
matthiasm@0 416 d3.isQuantized = false;
matthiasm@0 417 d3.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 418 d3.hasDuration = false;
matthiasm@0 419 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 420 list.push_back(d3);
matthiasm@0 421
matthiasm@0 422 OutputDescriptor d4;
matthiasm@0 423 d4.identifier = "chroma";
matthiasm@0 424 d4.name = "Chromagram";
matthiasm@0 425 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
matthiasm@0 426 d4.unit = "";
matthiasm@0 427 d4.hasFixedBinCount = true;
matthiasm@0 428 d4.binCount = 12;
matthiasm@0 429 d4.binNames = chromanames;
matthiasm@0 430 d4.hasKnownExtents = false;
matthiasm@0 431 d4.isQuantized = false;
matthiasm@0 432 d4.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 433 d4.hasDuration = false;
matthiasm@0 434 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 435 list.push_back(d4);
matthiasm@0 436
matthiasm@0 437 OutputDescriptor d5;
matthiasm@0 438 d5.identifier = "basschroma";
matthiasm@0 439 d5.name = "Bass Chromagram";
matthiasm@0 440 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
matthiasm@0 441 d5.unit = "";
matthiasm@0 442 d5.hasFixedBinCount = true;
matthiasm@0 443 d5.binCount = 12;
matthiasm@0 444 d5.binNames = chromanames;
matthiasm@0 445 d5.hasKnownExtents = false;
matthiasm@0 446 d5.isQuantized = false;
matthiasm@0 447 d5.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 448 d5.hasDuration = false;
matthiasm@0 449 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 450 list.push_back(d5);
matthiasm@0 451
matthiasm@0 452 OutputDescriptor d6;
matthiasm@0 453 d6.identifier = "bothchroma";
matthiasm@0 454 d6.name = "Chromagram and Bass Chromagram";
matthiasm@0 455 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
matthiasm@0 456 d6.unit = "";
matthiasm@0 457 d6.hasFixedBinCount = true;
matthiasm@0 458 d6.binCount = 24;
matthiasm@0 459 d6.binNames = bothchromanames;
matthiasm@0 460 d6.hasKnownExtents = false;
matthiasm@0 461 d6.isQuantized = false;
matthiasm@0 462 d6.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 463 d6.hasDuration = false;
matthiasm@0 464 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 465 list.push_back(d6);
matthiasm@0 466
matthiasm@0 467 OutputDescriptor d7;
matthiasm@0 468 d7.identifier = "simplechord";
matthiasm@0 469 d7.name = "Simple Chord Estimate";
matthiasm@0 470 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
matthiasm@0 471 d7.unit = "";
matthiasm@0 472 d7.hasFixedBinCount = true;
matthiasm@0 473 d7.binCount = 0;
matthiasm@0 474 d7.hasKnownExtents = false;
matthiasm@0 475 d7.isQuantized = false;
matthiasm@0 476 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 477 d7.hasDuration = false;
matthiasm@0 478 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 479 list.push_back(d7);
matthiasm@0 480
Chris@23 481 //
Chris@23 482 // OutputDescriptor d9;
Chris@23 483 // d9.identifier = "inconsistencysegment";
Chris@23 484 // d9.name = "Harmonic inconsistency segmenter";
Chris@23 485 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
Chris@23 486 // d9.unit = "";
Chris@23 487 // d9.hasFixedBinCount = true;
Chris@23 488 // d9.binCount = 0;
Chris@23 489 // d9.hasKnownExtents = true;
Chris@23 490 // d9.minValue = 0.1;
Chris@23 491 // d9.maxValue = 0.9;
Chris@23 492 // d9.isQuantized = false;
Chris@23 493 // d9.sampleType = OutputDescriptor::VariableSampleRate;
Chris@23 494 // d9.hasDuration = false;
Chris@23 495 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
Chris@23 496 // list.push_back(d9);
Chris@23 497 //
Chris@23 498 OutputDescriptor d10;
Chris@23 499 d10.identifier = "localtuning";
Chris@23 500 d10.name = "Local tuning";
Chris@23 501 d10.description = "Tuning based on the history up to this timestamp.";
Chris@23 502 d10.unit = "Hz";
Chris@23 503 d10.hasFixedBinCount = true;
Chris@23 504 d10.binCount = 1;
Chris@23 505 d10.hasKnownExtents = true;
Chris@23 506 d10.minValue = 427.47;
Chris@23 507 d10.maxValue = 452.89;
Chris@23 508 d10.isQuantized = false;
Chris@23 509 d10.sampleType = OutputDescriptor::FixedSampleRate;
Chris@23 510 d10.hasDuration = false;
Chris@23 511 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
Chris@23 512 list.push_back(d10);
matthiasm@17 513
Chris@23 514 OutputDescriptor d8;
matthiasm@17 515 d8.identifier = "harmonicchange";
matthiasm@17 516 d8.name = "Harmonic change value";
matthiasm@17 517 d8.description = "Harmonic change.";
matthiasm@17 518 d8.unit = "";
matthiasm@17 519 d8.hasFixedBinCount = true;
matthiasm@17 520 d8.binCount = 1;
matthiasm@17 521 d8.hasKnownExtents = true;
Chris@23 522 d8.minValue = 0.0;
Chris@23 523 d8.maxValue = 0.999;
matthiasm@17 524 d8.isQuantized = false;
matthiasm@17 525 d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@17 526 d8.hasDuration = false;
matthiasm@17 527 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@17 528 list.push_back(d8);
matthiasm@1 529
matthiasm@0 530 return list;
matthiasm@0 531 }
matthiasm@0 532
matthiasm@0 533
matthiasm@0 534 bool
matthiasm@0 535 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 536 {
Chris@23 537 if (debug_on) {
Chris@23 538 cerr << "--> initialise";
Chris@23 539 }
matthiasm@1 540
matthiasm@0 541 if (channels < getMinChannelCount() ||
matthiasm@0 542 channels > getMaxChannelCount()) return false;
matthiasm@0 543 m_blockSize = blockSize;
matthiasm@0 544 m_stepSize = stepSize;
matthiasm@0 545 frameCount = 0;
Chris@23 546 int tempn = 256 * m_blockSize/2;
Chris@23 547 // cerr << "length of tempkernel : " << tempn << endl;
Chris@23 548 float *tempkernel;
matthiasm@1 549
Chris@23 550 tempkernel = new float[tempn];
matthiasm@1 551
Chris@23 552 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
Chris@23 553 m_kernelValue.clear();
Chris@23 554 m_kernelFftIndex.clear();
Chris@23 555 m_kernelNoteIndex.clear();
Chris@23 556 int countNonzero = 0;
Chris@23 557 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
Chris@23 558 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
Chris@23 559 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 560 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
Chris@23 561 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
Chris@23 562 countNonzero++;
Chris@23 563 }
Chris@23 564 m_kernelFftIndex.push_back(iFFT);
Chris@23 565 m_kernelNoteIndex.push_back(iNote);
Chris@23 566 }
Chris@23 567 }
Chris@23 568 }
Chris@23 569 // cerr << "nonzero count : " << countNonzero << endl;
Chris@23 570 delete [] tempkernel;
Chris@23 571 ofstream myfile;
Chris@23 572 myfile.open ("matrix.txt");
matthiasm@3 573 // myfile << "Writing this to a file.\n";
Chris@23 574 for (int i = 0; i < nNote * 84; ++i) {
Chris@23 575 myfile << m_dict[i] << endl;
Chris@23 576 }
matthiasm@3 577 myfile.close();
matthiasm@0 578 return true;
matthiasm@0 579 }
matthiasm@0 580
matthiasm@0 581 void
matthiasm@0 582 NNLSChroma::reset()
matthiasm@0 583 {
Chris@23 584 if (debug_on) cerr << "--> reset";
matthiasm@4 585
matthiasm@0 586 // Clear buffers, reset stored values, etc
Chris@23 587 frameCount = 0;
Chris@23 588 m_dictID = 0;
Chris@23 589 m_fl.clear();
Chris@23 590 m_meanTuning0 = 0;
Chris@23 591 m_meanTuning1 = 0;
Chris@23 592 m_meanTuning2 = 0;
Chris@23 593 m_localTuning0 = 0;
Chris@23 594 m_localTuning1 = 0;
Chris@23 595 m_localTuning2 = 0;
Chris@23 596 m_localTuning.clear();
matthiasm@0 597 }
matthiasm@0 598
matthiasm@0 599 NNLSChroma::FeatureSet
matthiasm@0 600 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 601 {
Chris@23 602 if (debug_on) cerr << "--> process" << endl;
Chris@23 603 frameCount++;
Chris@23 604 float *magnitude = new float[m_blockSize/2];
matthiasm@0 605
Chris@23 606 Feature f10; // local tuning
Chris@23 607 f10.hasTimestamp = true;
Chris@23 608 f10.timestamp = timestamp;
Chris@23 609 const float *fbuf = inputBuffers[0];
Chris@23 610 float energysum = 0;
Chris@23 611 // make magnitude
Chris@23 612 float maxmag = -10000;
Chris@23 613 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 614 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
Chris@23 615 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
Chris@23 616 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
Chris@23 617 if (m_rollon > 0) {
Chris@23 618 energysum += pow(magnitude[iBin],2);
Chris@23 619 }
Chris@23 620 }
matthiasm@14 621
Chris@23 622 float cumenergy = 0;
Chris@23 623 if (m_rollon > 0) {
Chris@23 624 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
Chris@23 625 cumenergy += pow(magnitude[iBin],2);
Chris@23 626 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
Chris@23 627 else break;
Chris@23 628 }
Chris@23 629 }
matthiasm@17 630
Chris@23 631 if (maxmag < 2) {
Chris@23 632 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
Chris@23 633 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
Chris@23 634 magnitude[iBin] = 0;
Chris@23 635 }
Chris@23 636 }
matthiasm@4 637
Chris@23 638 // note magnitude mapping using pre-calculated matrix
Chris@23 639 float *nm = new float[nNote]; // note magnitude
Chris@23 640 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 641 nm[iNote] = 0; // initialise as 0
Chris@23 642 }
Chris@23 643 int binCount = 0;
Chris@23 644 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
Chris@23 645 // cerr << ".";
Chris@23 646 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
Chris@23 647 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
Chris@23 648 binCount++;
Chris@23 649 }
Chris@23 650 // cerr << nm[20];
Chris@23 651 // cerr << endl;
matthiasm@0 652
matthiasm@0 653
matthiasm@0 654 float one_over_N = 1.0/frameCount;
matthiasm@0 655 // update means of complex tuning variables
matthiasm@0 656 m_meanTuning0 *= float(frameCount-1)*one_over_N;
matthiasm@0 657 m_meanTuning1 *= float(frameCount-1)*one_over_N;
matthiasm@0 658 m_meanTuning2 *= float(frameCount-1)*one_over_N;
matthiasm@0 659
matthiasm@0 660 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
matthiasm@0 661 m_meanTuning0 += nm[iTone + 0]*one_over_N;
matthiasm@0 662 m_meanTuning1 += nm[iTone + 1]*one_over_N;
matthiasm@0 663 m_meanTuning2 += nm[iTone + 2]*one_over_N;
Chris@23 664 float ratioOld = 0.997;
matthiasm@3 665 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
matthiasm@3 666 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
matthiasm@3 667 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
matthiasm@0 668 }
matthiasm@0 669
matthiasm@0 670 // if (m_tuneLocal) {
Chris@23 671 // local tuning
Chris@23 672 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
Chris@23 673 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
Chris@23 674 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
Chris@23 675 m_localTuning.push_back(normalisedtuning);
Chris@23 676 float tuning440 = 440 * pow(2,normalisedtuning/12);
Chris@23 677 f10.values.push_back(tuning440);
Chris@23 678 // cerr << tuning440 << endl;
matthiasm@0 679 // }
matthiasm@0 680
Chris@23 681 Feature f1; // logfreqspec
Chris@23 682 f1.hasTimestamp = true;
matthiasm@0 683 f1.timestamp = timestamp;
Chris@23 684 for (size_t iNote = 0; iNote < nNote; iNote++) {
Chris@23 685 f1.values.push_back(nm[iNote]);
Chris@23 686 }
matthiasm@0 687
Chris@23 688 FeatureSet fs;
Chris@23 689 fs[1].push_back(f1);
matthiasm@3 690 fs[8].push_back(f10);
matthiasm@0 691
matthiasm@0 692 // deletes
matthiasm@0 693 delete[] magnitude;
matthiasm@0 694 delete[] nm;
matthiasm@0 695
matthiasm@0 696 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
Chris@23 697 char * pPath;
Chris@23 698 pPath = getenv ("VAMP_PATH");
matthiasm@7 699
matthiasm@7 700
Chris@23 701 return fs;
matthiasm@0 702 }
matthiasm@0 703
matthiasm@0 704 NNLSChroma::FeatureSet
matthiasm@0 705 NNLSChroma::getRemainingFeatures()
matthiasm@0 706 {
Chris@23 707 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23 708 FeatureSet fsOut;
Chris@23 709 if (m_fl.size() == 0) return fsOut;
Chris@23 710 int nChord = m_chordnames.size();
Chris@23 711 //
Chris@23 712 /** Calculate Tuning
Chris@23 713 calculate tuning from (using the angle of the complex number defined by the
Chris@23 714 cumulative mean real and imag values)
Chris@23 715 **/
Chris@23 716 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
Chris@23 717 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
Chris@23 718 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23 719 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23 720 int intShift = floor(normalisedtuning * 3);
Chris@23 721 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 722
Chris@23 723 char buffer0 [50];
matthiasm@1 724
Chris@23 725 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 726
Chris@23 727 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1 728
Chris@23 729 // push tuning to FeatureSet fsOut
Chris@23 730 Feature f0; // tuning
Chris@23 731 f0.hasTimestamp = true;
Chris@23 732 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
Chris@23 733 f0.label = buffer0;
Chris@23 734 fsOut[0].push_back(f0);
matthiasm@1 735
Chris@23 736 /** Tune Log-Frequency Spectrogram
Chris@23 737 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
Chris@23 738 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
Chris@23 739 **/
Chris@23 740 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 741
Chris@23 742 float tempValue = 0;
Chris@23 743 float dbThreshold = 0; // relative to the background spectrum
Chris@23 744 float thresh = pow(10,dbThreshold/20);
Chris@23 745 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23 746 int count = 0;
matthiasm@1 747
Chris@23 748 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
Chris@23 749 Feature f1 = *i;
Chris@23 750 Feature f2; // tuned log-frequency spectrum
Chris@23 751 f2.hasTimestamp = true;
Chris@23 752 f2.timestamp = f1.timestamp;
Chris@23 753 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1 754
Chris@23 755 if (m_tuneLocal) {
Chris@23 756 intShift = floor(m_localTuning[count] * 3);
Chris@23 757 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
Chris@23 758 }
matthiasm@1 759
Chris@23 760 // cerr << intShift << " " << intFactor << endl;
matthiasm@1 761
Chris@23 762 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
Chris@23 763 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
Chris@23 764 f2.values.push_back(tempValue);
Chris@23 765 }
matthiasm@1 766
Chris@23 767 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
Chris@23 768 vector<float> runningmean = SpecialConvolution(f2.values,hw);
Chris@23 769 vector<float> runningstd;
Chris@23 770 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
Chris@23 771 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
Chris@23 772 }
Chris@23 773 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
Chris@23 774 for (int i = 0; i < 256; i++) {
Chris@23 775 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23 776 if (runningstd[i] > 0) {
Chris@23 777 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
Chris@23 778 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
Chris@23 779 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
Chris@23 780 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
Chris@23 781 }
Chris@23 782 if (f2.values[i] < 0) {
Chris@23 783 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23 784 }
Chris@23 785 }
Chris@23 786 fsOut[2].push_back(f2);
Chris@23 787 count++;
Chris@23 788 }
Chris@23 789 cerr << "done." << endl;
matthiasm@1 790
Chris@23 791 /** Semitone spectrum and chromagrams
Chris@23 792 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23 793 is inferred using a non-negative least squares algorithm.
Chris@23 794 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23 795 bass and treble stacked onto each other).
Chris@23 796 **/
Chris@23 797 if (m_dictID == 1) {
Chris@23 798 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23 799 } else {
Chris@23 800 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23 801 }
matthiasm@13 802
matthiasm@1 803
Chris@23 804 vector<vector<float> > chordogram;
Chris@23 805 vector<vector<int> > scoreChordogram;
Chris@23 806 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
Chris@23 807 vector<float> oldchroma = vector<float>(12,0);
Chris@23 808 vector<float> oldbasschroma = vector<float>(12,0);
Chris@23 809 count = 0;
matthiasm@9 810
Chris@23 811 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
Chris@23 812 Feature f2 = *it; // logfreq spectrum
Chris@23 813 Feature f3; // semitone spectrum
Chris@23 814 Feature f4; // treble chromagram
Chris@23 815 Feature f5; // bass chromagram
Chris@23 816 Feature f6; // treble and bass chromagram
matthiasm@1 817
Chris@23 818 f3.hasTimestamp = true;
Chris@23 819 f3.timestamp = f2.timestamp;
matthiasm@1 820
Chris@23 821 f4.hasTimestamp = true;
Chris@23 822 f4.timestamp = f2.timestamp;
matthiasm@1 823
Chris@23 824 f5.hasTimestamp = true;
Chris@23 825 f5.timestamp = f2.timestamp;
matthiasm@1 826
Chris@23 827 f6.hasTimestamp = true;
Chris@23 828 f6.timestamp = f2.timestamp;
matthiasm@1 829
Chris@29 830 float b[256];
matthiasm@1 831
Chris@23 832 bool some_b_greater_zero = false;
Chris@23 833 float sumb = 0;
Chris@23 834 for (int i = 0; i < 256; i++) {
Chris@23 835 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
Chris@23 836 b[i] = f2.values[i];
Chris@23 837 sumb += b[i];
Chris@23 838 if (b[i] > 0) {
Chris@23 839 some_b_greater_zero = true;
Chris@23 840 }
Chris@23 841 }
matthiasm@1 842
Chris@23 843 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 844
Chris@23 845 vector<float> chroma = vector<float>(12, 0);
Chris@23 846 vector<float> basschroma = vector<float>(12, 0);
Chris@23 847 float currval;
Chris@23 848 unsigned iSemitone = 0;
matthiasm@1 849
Chris@23 850 if (some_b_greater_zero) {
Chris@23 851 if (m_dictID == 1) {
Chris@23 852 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 853 currval = 0;
Chris@23 854 currval += b[iNote + 1 + -1] * 0.5;
Chris@23 855 currval += b[iNote + 1 + 0] * 1.0;
Chris@23 856 currval += b[iNote + 1 + 1] * 0.5;
Chris@23 857 f3.values.push_back(currval);
Chris@23 858 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23 859 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23 860 iSemitone++;
Chris@23 861 }
matthiasm@1 862
Chris@23 863 } else {
Chris@29 864 float x[84+1000];
Chris@23 865 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23 866 vector<int> signifIndex;
Chris@23 867 int index=0;
Chris@23 868 sumb /= 84.0;
Chris@23 869 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23 870 float currval = 0;
Chris@23 871 currval += b[iNote + 1 + -1];
Chris@23 872 currval += b[iNote + 1 + 0];
Chris@23 873 currval += b[iNote + 1 + 1];
Chris@23 874 if (currval > 0) signifIndex.push_back(index);
Chris@23 875 f3.values.push_back(0); // fill the values, change later
Chris@23 876 index++;
Chris@23 877 }
Chris@29 878 float rnorm;
Chris@29 879 float w[84+1000];
Chris@29 880 float zz[84+1000];
Chris@23 881 int indx[84+1000];
Chris@23 882 int mode;
Chris@23 883 int dictsize = 256*signifIndex.size();
Chris@23 884 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@29 885 float *curr_dict = new float[dictsize];
Chris@23 886 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 887 for (unsigned iBin = 0; iBin < 256; iBin++) {
Chris@23 888 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
Chris@23 889 }
Chris@23 890 }
Chris@29 891 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23 892 delete [] curr_dict;
Chris@23 893 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23 894 f3.values[signifIndex[iNote]] = x[iNote];
Chris@23 895 // cerr << mode << endl;
Chris@23 896 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23 897 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23 898 }
Chris@23 899 }
Chris@23 900 }
matthiasm@13 901
matthiasm@10 902
matthiasm@12 903
matthiasm@13 904
Chris@23 905 f4.values = chroma;
Chris@23 906 f5.values = basschroma;
Chris@23 907 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
Chris@23 908 f6.values = chroma;
matthiasm@1 909
Chris@23 910 if (m_doNormalizeChroma > 0) {
Chris@23 911 vector<float> chromanorm = vector<float>(3,0);
Chris@23 912 switch (int(m_doNormalizeChroma)) {
Chris@23 913 case 0: // should never end up here
Chris@23 914 break;
Chris@23 915 case 1:
Chris@23 916 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
Chris@23 917 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
Chris@23 918 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23 919 break;
Chris@23 920 case 2:
Chris@23 921 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 922 chromanorm[0] += *it;
Chris@23 923 }
Chris@23 924 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 925 chromanorm[1] += *it;
Chris@23 926 }
Chris@23 927 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 928 chromanorm[2] += *it;
Chris@23 929 }
Chris@23 930 break;
Chris@23 931 case 3:
Chris@23 932 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
Chris@23 933 chromanorm[0] += pow(*it,2);
Chris@23 934 }
Chris@23 935 chromanorm[0] = sqrt(chromanorm[0]);
Chris@23 936 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
Chris@23 937 chromanorm[1] += pow(*it,2);
Chris@23 938 }
Chris@23 939 chromanorm[1] = sqrt(chromanorm[1]);
Chris@23 940 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
Chris@23 941 chromanorm[2] += pow(*it,2);
Chris@23 942 }
Chris@23 943 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23 944 break;
Chris@23 945 }
Chris@23 946 if (chromanorm[0] > 0) {
Chris@23 947 for (int i = 0; i < f4.values.size(); i++) {
Chris@23 948 f4.values[i] /= chromanorm[0];
Chris@23 949 }
Chris@23 950 }
Chris@23 951 if (chromanorm[1] > 0) {
Chris@23 952 for (int i = 0; i < f5.values.size(); i++) {
Chris@23 953 f5.values[i] /= chromanorm[1];
Chris@23 954 }
Chris@23 955 }
Chris@23 956 if (chromanorm[2] > 0) {
Chris@23 957 for (int i = 0; i < f6.values.size(); i++) {
Chris@23 958 f6.values[i] /= chromanorm[2];
Chris@23 959 }
Chris@23 960 }
matthiasm@13 961
Chris@23 962 }
matthiasm@13 963
Chris@23 964 // local chord estimation
Chris@23 965 vector<float> currentChordSalience;
Chris@23 966 float tempchordvalue = 0;
Chris@23 967 float sumchordvalue = 0;
matthiasm@9 968
Chris@23 969 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 970 tempchordvalue = 0;
Chris@23 971 for (int iBin = 0; iBin < 12; iBin++) {
Chris@23 972 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 973 }
Chris@23 974 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23 975 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23 976 }
Chris@23 977 sumchordvalue+=tempchordvalue;
Chris@23 978 currentChordSalience.push_back(tempchordvalue);
Chris@23 979 }
Chris@23 980 if (sumchordvalue > 0) {
Chris@23 981 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23 982 currentChordSalience[iChord] /= sumchordvalue;
Chris@23 983 }
Chris@23 984 } else {
Chris@23 985 currentChordSalience[nChord-1] = 1.0;
Chris@23 986 }
Chris@23 987 chordogram.push_back(currentChordSalience);
matthiasm@1 988
Chris@23 989 fsOut[3].push_back(f3);
Chris@23 990 fsOut[4].push_back(f4);
Chris@23 991 fsOut[5].push_back(f5);
Chris@23 992 fsOut[6].push_back(f6);
Chris@23 993 count++;
Chris@23 994 }
Chris@23 995 cerr << "done." << endl;
matthiasm@13 996
matthiasm@10 997
Chris@23 998 /* Simple chord estimation
Chris@23 999 I just take the local chord estimates ("currentChordSalience") and average them over time, then
Chris@23 1000 take the maximum. Very simple, don't do this at home...
Chris@23 1001 */
Chris@23 1002 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
Chris@23 1003 count = 0;
Chris@23 1004 int halfwindowlength = m_inputSampleRate / m_stepSize;
Chris@23 1005 vector<int> chordSequence;
Chris@23 1006 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
Chris@23 1007 vector<int> temp = vector<int>(nChord,0);
Chris@23 1008 scoreChordogram.push_back(temp);
Chris@23 1009 }
Chris@23 1010 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
Chris@23 1011 int startIndex = count + 1;
Chris@23 1012 int endIndex = count + 2 * halfwindowlength;
matthiasm@10 1013
Chris@23 1014 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@10 1015
Chris@23 1016 vector<int> chordCandidates;
Chris@23 1017 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
Chris@23 1018 // float currsum = 0;
Chris@23 1019 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 1020 // currsum += chordogram[iFrame][iChord];
Chris@23 1021 // }
Chris@23 1022 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
Chris@23 1023 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
Chris@23 1024 if (chordogram[iFrame][iChord] > chordThreshold) {
Chris@23 1025 chordCandidates.push_back(iChord);
Chris@23 1026 break;
Chris@23 1027 }
Chris@23 1028 }
Chris@23 1029 }
Chris@23 1030 chordCandidates.push_back(nChord-1);
Chris@23 1031 // cerr << chordCandidates.size() << endl;
Chris@23 1032
Chris@23 1033 float maxval = 0; // will be the value of the most salient *chord change* in this frame
Chris@23 1034 float maxindex = 0; //... and the index thereof
Chris@23 1035 unsigned bestchordL = nChord-1; // index of the best "left" chord
Chris@23 1036 unsigned bestchordR = nChord-1; // index of the best "right" chord
Chris@23 1037
Chris@23 1038 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
Chris@23 1039 // now find the max values on both sides of iWF
Chris@23 1040 // left side:
Chris@23 1041 float maxL = 0;
Chris@23 1042 unsigned maxindL = nChord-1;
Chris@23 1043 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 1044 unsigned iChord = chordCandidates[kChord];
Chris@23 1045 float currsum = 0;
Chris@23 1046 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
Chris@23 1047 currsum += chordogram[count+iFrame][iChord];
matthiasm@10 1048 }
Chris@23 1049 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 1050 if (currsum > maxL) {
Chris@23 1051 maxL = currsum;
Chris@23 1052 maxindL = iChord;
Chris@23 1053 }
Chris@23 1054 }
Chris@23 1055 // right side:
Chris@23 1056 float maxR = 0;
Chris@23 1057 unsigned maxindR = nChord-1;
Chris@23 1058 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
Chris@23 1059 unsigned iChord = chordCandidates[kChord];
Chris@23 1060 float currsum = 0;
Chris@23 1061 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 1062 currsum += chordogram[count+iFrame][iChord];
Chris@23 1063 }
Chris@23 1064 if (iChord == nChord-1) currsum *= 0.8;
Chris@23 1065 if (currsum > maxR) {
Chris@23 1066 maxR = currsum;
Chris@23 1067 maxindR = iChord;
Chris@23 1068 }
Chris@23 1069 }
Chris@23 1070 if (maxL+maxR > maxval) {
Chris@23 1071 maxval = maxL+maxR;
Chris@23 1072 maxindex = iWF;
Chris@23 1073 bestchordL = maxindL;
Chris@23 1074 bestchordR = maxindR;
Chris@23 1075 }
matthiasm@3 1076
Chris@23 1077 }
Chris@23 1078 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
Chris@23 1079 // add a score to every chord-frame-point that was part of a maximum
Chris@23 1080 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
Chris@23 1081 scoreChordogram[iFrame+count][bestchordL]++;
Chris@23 1082 }
Chris@23 1083 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
Chris@23 1084 scoreChordogram[iFrame+count][bestchordR]++;
Chris@23 1085 }
Chris@23 1086 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
Chris@23 1087 count++;
Chris@23 1088 }
Chris@23 1089 // cerr << "******* agent finished *******" << endl;
Chris@23 1090 count = 0;
Chris@23 1091 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 1092 float maxval = 0; // will be the value of the most salient chord in this frame
Chris@23 1093 float maxindex = 0; //... and the index thereof
Chris@23 1094 for (unsigned iChord = 0; iChord < nChord; iChord++) {
Chris@23 1095 if (scoreChordogram[count][iChord] > maxval) {
Chris@23 1096 maxval = scoreChordogram[count][iChord];
Chris@23 1097 maxindex = iChord;
Chris@23 1098 // cerr << iChord << endl;
Chris@23 1099 }
Chris@23 1100 }
Chris@23 1101 chordSequence.push_back(maxindex);
Chris@23 1102 // cerr << "before modefilter, maxindex: " << maxindex << endl;
Chris@23 1103 count++;
Chris@23 1104 }
Chris@23 1105 // cerr << "******* mode filter done *******" << endl;
matthiasm@10 1106
matthiasm@3 1107
Chris@23 1108 // mode filter on chordSequence
Chris@23 1109 count = 0;
Chris@23 1110 string oldChord = "";
Chris@23 1111 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
Chris@23 1112 Feature f6 = *it;
Chris@23 1113 Feature f7; // chord estimate
Chris@23 1114 f7.hasTimestamp = true;
Chris@23 1115 f7.timestamp = f6.timestamp;
Chris@23 1116 Feature f8; // chord estimate
Chris@23 1117 f8.hasTimestamp = true;
Chris@23 1118 f8.timestamp = f6.timestamp;
matthiasm@17 1119
Chris@23 1120 vector<int> chordCount = vector<int>(nChord,0);
Chris@23 1121 int maxChordCount = 0;
Chris@23 1122 int maxChordIndex = nChord-1;
Chris@23 1123 string maxChord;
Chris@23 1124 int startIndex = max(count - halfwindowlength/2,0);
Chris@23 1125 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
Chris@23 1126 for (int i = startIndex; i < endIndex; i++) {
Chris@23 1127 chordCount[chordSequence[i]]++;
Chris@23 1128 if (chordCount[chordSequence[i]] > maxChordCount) {
Chris@23 1129 // cerr << "start index " << startIndex << endl;
Chris@23 1130 maxChordCount++;
Chris@23 1131 maxChordIndex = chordSequence[i];
Chris@23 1132 maxChord = m_chordnames[maxChordIndex];
Chris@23 1133 }
Chris@23 1134 }
Chris@23 1135 // chordSequence[count] = maxChordIndex;
Chris@23 1136 // cerr << maxChordIndex << endl;
Chris@23 1137 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
Chris@23 1138 // cerr << chordchange[count] << endl;
Chris@23 1139 fsOut[9].push_back(f8);
Chris@23 1140 if (oldChord != maxChord) {
Chris@23 1141 oldChord = maxChord;
matthiasm@3 1142
Chris@23 1143 // char buffer1 [50];
Chris@23 1144 // if (maxChordIndex < nChord - 1) {
Chris@23 1145 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
Chris@23 1146 // } else {
Chris@23 1147 // sprintf(buffer1, "N");
Chris@23 1148 // }
Chris@23 1149 // f7.label = buffer1;
Chris@23 1150 f7.label = m_chordnames[maxChordIndex];
Chris@23 1151 fsOut[7].push_back(f7);
Chris@23 1152 }
Chris@23 1153 count++;
Chris@23 1154 }
Chris@23 1155 Feature f7; // last chord estimate
Chris@23 1156 f7.hasTimestamp = true;
Chris@23 1157 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
Chris@23 1158 f7.label = "N";
Chris@23 1159 fsOut[7].push_back(f7);
Chris@23 1160 cerr << "done." << endl;
Chris@23 1161 // // musicity
Chris@23 1162 // count = 0;
Chris@23 1163 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
Chris@23 1164 // vector<float> musicityValue;
Chris@23 1165 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 1166 // Feature f4 = *it;
Chris@23 1167 //
Chris@23 1168 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 1169 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 1170 // float chromasum = 0;
Chris@23 1171 // float diffsum = 0;
Chris@23 1172 // for (int k = 0; k < 12; k++) {
Chris@23 1173 // for (int i = startIndex + 1; i < endIndex; i++) {
Chris@23 1174 // chromasum += pow(fsOut[4][i].values[k],2);
Chris@23 1175 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
Chris@23 1176 // }
Chris@23 1177 // }
Chris@23 1178 // diffsum /= chromasum;
Chris@23 1179 // musicityValue.push_back(diffsum);
Chris@23 1180 // count++;
Chris@23 1181 // }
Chris@23 1182 //
Chris@23 1183 // float musicityThreshold = 0.44;
Chris@23 1184 // if (m_stepSize == 4096) {
Chris@23 1185 // musicityThreshold = 0.74;
Chris@23 1186 // }
Chris@23 1187 // if (m_stepSize == 4410) {
Chris@23 1188 // musicityThreshold = 0.77;
Chris@23 1189 // }
Chris@23 1190 //
Chris@23 1191 // count = 0;
Chris@23 1192 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
Chris@23 1193 // Feature f4 = *it;
Chris@23 1194 // Feature f8; // musicity
Chris@23 1195 // Feature f9; // musicity segmenter
Chris@23 1196 //
Chris@23 1197 // f8.hasTimestamp = true;
Chris@23 1198 // f8.timestamp = f4.timestamp;
Chris@23 1199 // f9.hasTimestamp = true;
Chris@23 1200 // f9.timestamp = f4.timestamp;
Chris@23 1201 //
Chris@23 1202 // int startIndex = max(count - musicitykernelwidth/2,0);
Chris@23 1203 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
Chris@23 1204 // int musicityCount = 0;
Chris@23 1205 // for (int i = startIndex; i <= endIndex; i++) {
Chris@23 1206 // if (musicityValue[i] > musicityThreshold) musicityCount++;
Chris@23 1207 // }
Chris@23 1208 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
Chris@23 1209 //
Chris@23 1210 // if (isSpeech) {
Chris@23 1211 // if (oldlabeltype != 2) {
Chris@23 1212 // f9.label = "Speech";
Chris@23 1213 // fsOut[9].push_back(f9);
Chris@23 1214 // oldlabeltype = 2;
Chris@23 1215 // }
Chris@23 1216 // } else {
Chris@23 1217 // if (oldlabeltype != 1) {
Chris@23 1218 // f9.label = "Music";
Chris@23 1219 // fsOut[9].push_back(f9);
Chris@23 1220 // oldlabeltype = 1;
Chris@23 1221 // }
Chris@23 1222 // }
Chris@23 1223 // f8.values.push_back(musicityValue[count]);
Chris@23 1224 // fsOut[8].push_back(f8);
Chris@23 1225 // count++;
Chris@23 1226 // }
Chris@23 1227 return fsOut;
matthiasm@0 1228
matthiasm@0 1229 }
matthiasm@0 1230