nnls-chroma: Chordino.cpp annotate

annotate Chordino.cpp @ 95:dab7e7bfeba1 matthiasm-plugin 0.2

added release notes, updated README

author	matthiasm
date	Fri, 03 Dec 2010 00:46:12 +0900
parents	a76598852303
children	99b87ce4bb70

rev	line source
Chris@23	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
matthiasm@0	2
Chris@35	3 /*
Chris@35	4 NNLS-Chroma / Chordino
Chris@35	5
Chris@35	6 Audio feature extraction plugins for chromagram and chord
Chris@35	7 estimation.
Chris@35	8
Chris@35	9 Centre for Digital Music, Queen Mary University of London.
Chris@35	10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35	11
Chris@35	12 This program is free software; you can redistribute it and/or
Chris@35	13 modify it under the terms of the GNU General Public License as
Chris@35	14 published by the Free Software Foundation; either version 2 of the
Chris@35	15 License, or (at your option) any later version. See the file
Chris@35	16 COPYING included with this distribution for more information.
Chris@35	17 */
Chris@35	18
Chris@35	19 #include "Chordino.h"
Chris@27	20
Chris@27	21 #include "chromamethods.h"
matthiasm@43	22 #include "viterbi.h"
Chris@27	23
Chris@27	24 #include <cstdlib>
Chris@27	25 #include <fstream>
matthiasm@0	26 #include <cmath>
matthiasm@9	27
Chris@27	28 #include <algorithm>
matthiasm@0	29
matthiasm@0	30 const bool debug_on = false;
matthiasm@0	31
Chris@35	32 Chordino::Chordino(float inputSampleRate) :
matthiasm@86	33 NNLSBase(inputSampleRate),
matthiasm@86	34 m_chorddict(0),
matthiasm@86	35 m_chordnotes(0),
matthiasm@86	36 m_chordnames(0)
matthiasm@0	37 {
Chris@35	38 if (debug_on) cerr << "--> Chordino" << endl;
matthiasm@86	39 // get the chord dictionary from file (if the file exists)
matthiasm@86	40
matthiasm@0	41 }
matthiasm@0	42
Chris@35	43 Chordino::~Chordino()
matthiasm@0	44 {
Chris@35	45 if (debug_on) cerr << "--> ~Chordino" << endl;
matthiasm@0	46 }
matthiasm@0	47
matthiasm@0	48 string
Chris@35	49 Chordino::getIdentifier() const
matthiasm@0	50 {
Chris@23	51 if (debug_on) cerr << "--> getIdentifier" << endl;
Chris@35	52 return "chordino";
matthiasm@0	53 }
matthiasm@0	54
matthiasm@0	55 string
Chris@35	56 Chordino::getName() const
matthiasm@0	57 {
Chris@23	58 if (debug_on) cerr << "--> getName" << endl;
Chris@35	59 return "Chordino";
matthiasm@0	60 }
matthiasm@0	61
matthiasm@0	62 string
Chris@35	63 Chordino::getDescription() const
matthiasm@0	64 {
Chris@23	65 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@58	66 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
matthiasm@0	67 }
matthiasm@0	68
matthiasm@50	69 Chordino::ParameterList
matthiasm@50	70 Chordino::getParameterDescriptors() const
matthiasm@50	71 {
matthiasm@50	72 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@50	73 ParameterList list;
matthiasm@50	74
matthiasm@50	75 ParameterDescriptor d;
matthiasm@50	76 d.identifier = "useNNLS";
matthiasm@50	77 d.name = "use approximate transcription (NNLS)";
matthiasm@50	78 d.description = "Toggles approximate transcription (NNLS).";
matthiasm@50	79 d.unit = "";
matthiasm@50	80 d.minValue = 0.0;
matthiasm@50	81 d.maxValue = 1.0;
matthiasm@50	82 d.defaultValue = 1.0;
matthiasm@50	83 d.isQuantized = true;
matthiasm@50	84 d.quantizeStep = 1.0;
matthiasm@50	85 list.push_back(d);
matthiasm@50	86
matthiasm@50	87 ParameterDescriptor d4;
matthiasm@50	88 d4.identifier = "useHMM";
matthiasm@53	89 d4.name = "HMM (Viterbi decoding)";
matthiasm@50	90 d4.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
matthiasm@50	91 d4.unit = "";
matthiasm@50	92 d4.minValue = 0.0;
matthiasm@50	93 d4.maxValue = 1.0;
matthiasm@50	94 d4.defaultValue = 1.0;
matthiasm@50	95 d4.isQuantized = true;
matthiasm@50	96 d4.quantizeStep = 1.0;
matthiasm@50	97 list.push_back(d4);
matthiasm@50	98
matthiasm@50	99 ParameterDescriptor d0;
matthiasm@50	100 d0.identifier = "rollon";
matthiasm@50	101 d0.name = "spectral roll-on";
matthiasm@58	102 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
matthiasm@59	103 d0.unit = "%";
matthiasm@50	104 d0.minValue = 0;
mail@76	105 d0.maxValue = 5;
matthiasm@92	106 d0.defaultValue = 0.0;
matthiasm@50	107 d0.isQuantized = true;
mail@76	108 d0.quantizeStep = 0.5;
matthiasm@50	109 list.push_back(d0);
matthiasm@50	110
matthiasm@50	111 ParameterDescriptor d1;
matthiasm@50	112 d1.identifier = "tuningmode";
matthiasm@50	113 d1.name = "tuning mode";
matthiasm@50	114 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@50	115 d1.unit = "";
matthiasm@50	116 d1.minValue = 0;
matthiasm@50	117 d1.maxValue = 1;
matthiasm@92	118 d1.defaultValue = 0.0;
matthiasm@50	119 d1.isQuantized = true;
matthiasm@50	120 d1.valueNames.push_back("global tuning");
matthiasm@50	121 d1.valueNames.push_back("local tuning");
matthiasm@50	122 d1.quantizeStep = 1.0;
matthiasm@50	123 list.push_back(d1);
matthiasm@50	124
matthiasm@50	125 ParameterDescriptor d2;
matthiasm@50	126 d2.identifier = "whitening";
matthiasm@50	127 d2.name = "spectral whitening";
matthiasm@50	128 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
matthiasm@50	129 d2.unit = "";
matthiasm@50	130 d2.isQuantized = true;
matthiasm@50	131 d2.minValue = 0.0;
matthiasm@50	132 d2.maxValue = 1.0;
matthiasm@50	133 d2.defaultValue = 1.0;
matthiasm@50	134 d2.isQuantized = false;
matthiasm@50	135 list.push_back(d2);
matthiasm@50	136
matthiasm@50	137 ParameterDescriptor d3;
matthiasm@50	138 d3.identifier = "s";
matthiasm@50	139 d3.name = "spectral shape";
matthiasm@50	140 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
matthiasm@50	141 d3.unit = "";
matthiasm@50	142 d3.minValue = 0.5;
matthiasm@50	143 d3.maxValue = 0.9;
matthiasm@50	144 d3.defaultValue = 0.7;
matthiasm@50	145 d3.isQuantized = false;
matthiasm@50	146 list.push_back(d3);
matthiasm@50	147
mail@89	148 ParameterDescriptor boostn;
mail@89	149 boostn.identifier = "boostn";
mail@89	150 boostn.name = "boost N";
matthiasm@95	151 boostn.description = "Boost likelihood of the N (no chord) label.";
mail@89	152 boostn.unit = "";
matthiasm@95	153 boostn.minValue = 0.0;
matthiasm@95	154 boostn.maxValue = 1.0;
matthiasm@95	155 boostn.defaultValue = 0.1;
mail@89	156 boostn.isQuantized = false;
mail@89	157 list.push_back(boostn);
matthiasm@50	158
matthiasm@50	159 return list;
matthiasm@50	160 }
matthiasm@50	161
Chris@35	162 Chordino::OutputList
Chris@35	163 Chordino::getOutputDescriptors() const
matthiasm@0	164 {
Chris@23	165 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0	166 OutputList list;
matthiasm@0	167
Chris@35	168 int index = 0;
matthiasm@0	169
matthiasm@0	170 OutputDescriptor d7;
matthiasm@0	171 d7.identifier = "simplechord";
Chris@36	172 d7.name = "Chord Estimate";
matthiasm@58	173 d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
matthiasm@0	174 d7.unit = "";
matthiasm@0	175 d7.hasFixedBinCount = true;
matthiasm@0	176 d7.binCount = 0;
matthiasm@0	177 d7.hasKnownExtents = false;
matthiasm@0	178 d7.isQuantized = false;
matthiasm@0	179 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0	180 d7.hasDuration = false;
matthiasm@0	181 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	182 list.push_back(d7);
Chris@35	183 m_outputChords = index++;
matthiasm@0	184
matthiasm@86	185 OutputDescriptor chordnotes;
matthiasm@86	186 chordnotes.identifier = "chordnotes";
matthiasm@86	187 chordnotes.name = "Note Representation of Chord Estimate";
matthiasm@86	188 chordnotes.description = "A simple represenation of the estimated chord with bass note (if applicable) and chord notes.";
matthiasm@86	189 chordnotes.unit = "MIDI units";
matthiasm@86	190 chordnotes.hasFixedBinCount = true;
matthiasm@86	191 chordnotes.binCount = 1;
matthiasm@86	192 chordnotes.hasKnownExtents = true;
matthiasm@86	193 chordnotes.minValue = 0;
matthiasm@86	194 chordnotes.maxValue = 127;
matthiasm@86	195 chordnotes.isQuantized = true;
matthiasm@86	196 chordnotes.quantizeStep = 1;
matthiasm@86	197 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@86	198 chordnotes.hasDuration = true;
matthiasm@86	199 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@86	200 list.push_back(chordnotes);
matthiasm@86	201 m_outputChordnotes = index++;
matthiasm@86	202
Chris@23	203 OutputDescriptor d8;
mail@60	204 d8.identifier = "harmonicchange";
Chris@36	205 d8.name = "Harmonic Change Value";
matthiasm@58	206 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
matthiasm@17	207 d8.unit = "";
matthiasm@17	208 d8.hasFixedBinCount = true;
matthiasm@17	209 d8.binCount = 1;
mail@60	210 d8.hasKnownExtents = false;
mail@60	211 // d8.minValue = 0.0;
mail@60	212 // d8.maxValue = 0.999;
matthiasm@17	213 d8.isQuantized = false;
matthiasm@17	214 d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@17	215 d8.hasDuration = false;
matthiasm@17	216 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@17	217 list.push_back(d8);
Chris@35	218 m_outputHarmonicChange = index++;
matthiasm@1	219
matthiasm@0	220 return list;
matthiasm@0	221 }
matthiasm@0	222
matthiasm@0	223 bool
Chris@35	224 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0	225 {
Chris@23	226 if (debug_on) {
Chris@23	227 cerr << "--> initialise";
Chris@23	228 }
mail@76	229
Chris@35	230 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
Chris@35	231 return false;
Chris@35	232 }
mail@89	233 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN);
matthiasm@0	234 return true;
matthiasm@0	235 }
matthiasm@0	236
matthiasm@0	237 void
Chris@35	238 Chordino::reset()
matthiasm@0	239 {
Chris@23	240 if (debug_on) cerr << "--> reset";
Chris@35	241 NNLSBase::reset();
matthiasm@0	242 }
matthiasm@0	243
Chris@35	244 Chordino::FeatureSet
Chris@35	245 Chordino::process(const float const inputBuffers, Vamp::RealTime timestamp)
matthiasm@0	246 {
Chris@23	247 if (debug_on) cerr << "--> process" << endl;
matthiasm@0	248
Chris@35	249 NNLSBase::baseProcess(inputBuffers, timestamp);
matthiasm@0	250
Chris@35	251 return FeatureSet();
matthiasm@0	252 }
matthiasm@0	253
Chris@35	254 Chordino::FeatureSet
Chris@35	255 Chordino::getRemainingFeatures()
matthiasm@0	256 {
mail@89	257 // cerr << hw[0] << hw[1] << endl;
mail@89	258 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23	259 FeatureSet fsOut;
Chris@35	260 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23	261 int nChord = m_chordnames.size();
Chris@23	262 //
Chris@23	263 /** Calculate Tuning
Chris@23	264 calculate tuning from (using the angle of the complex number defined by the
Chris@23	265 cumulative mean real and imag values)
Chris@23	266 **/
mail@80	267 float meanTuningImag = 0;
mail@80	268 float meanTuningReal = 0;
mail@80	269 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
mail@80	270 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
mail@80	271 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
mail@80	272 }
Chris@23	273 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23	274 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23	275 int intShift = floor(normalisedtuning * 3);
mail@80	276 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
matthiasm@1	277
Chris@23	278 char buffer0 [50];
matthiasm@1	279
Chris@23	280 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1	281
matthiasm@1	282
Chris@23	283 /** Tune Log-Frequency Spectrogram
matthiasm@43	284 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
Chris@91	285 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
Chris@23	286 **/
Chris@35	287 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13	288
Chris@23	289 float tempValue = 0;
Chris@23	290 float dbThreshold = 0; // relative to the background spectrum
Chris@23	291 float thresh = pow(10,dbThreshold/20);
Chris@23	292 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23	293 int count = 0;
matthiasm@1	294
Chris@35	295 FeatureList tunedSpec;
matthiasm@43	296 int nFrame = m_logSpectrum.size();
matthiasm@43	297
matthiasm@43	298 vector<Vamp::RealTime> timestamps;
Chris@35	299
Chris@35	300 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@91	301 Feature currentLogSpectrum = *i;
matthiasm@43	302 Feature currentTunedSpec; // tuned log-frequency spectrum
matthiasm@43	303 currentTunedSpec.hasTimestamp = true;
Chris@91	304 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
Chris@91	305 timestamps.push_back(currentLogSpectrum.timestamp);
matthiasm@43	306 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
matthiasm@1	307
Chris@23	308 if (m_tuneLocal) {
Chris@23	309 intShift = floor(m_localTuning[count] * 3);
mail@80	310 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
Chris@23	311 }
matthiasm@1	312
mail@80	313 // cerr << intShift << " " << floatShift << endl;
matthiasm@1	314
Chris@91	315 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
Chris@91	316 tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
matthiasm@43	317 currentTunedSpec.values.push_back(tempValue);
Chris@23	318 }
matthiasm@1	319
matthiasm@43	320 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
matthiasm@43	321 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
Chris@23	322 vector<float> runningstd;
mail@77	323 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
matthiasm@43	324 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
Chris@23	325 }
Chris@23	326 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
mail@77	327 for (int i = 0; i < nNote; i++) {
Chris@23	328 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23	329 if (runningstd[i] > 0) {
matthiasm@43	330 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
matthiasm@43	331 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
matthiasm@43	332 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
matthiasm@43	333 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23	334 }
matthiasm@43	335 if (currentTunedSpec.values[i] < 0) {
Chris@23	336 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23	337 }
Chris@23	338 }
matthiasm@43	339 tunedSpec.push_back(currentTunedSpec);
Chris@23	340 count++;
Chris@23	341 }
Chris@23	342 cerr << "done." << endl;
matthiasm@1	343
Chris@23	344 /** Semitone spectrum and chromagrams
Chris@23	345 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23	346 is inferred using a non-negative least squares algorithm.
Chris@23	347 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23	348 bass and treble stacked onto each other).
Chris@23	349 **/
matthiasm@42	350 if (m_useNNLS == 0) {
Chris@35	351 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23	352 } else {
Chris@35	353 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23	354 }
matthiasm@13	355
matthiasm@1	356
matthiasm@43	357 vector<vector<double> > chordogram;
Chris@23	358 vector<vector<int> > scoreChordogram;
Chris@35	359 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
Chris@23	360 count = 0;
matthiasm@9	361
Chris@35	362 FeatureList chromaList;
matthiasm@43	363
matthiasm@43	364
Chris@35	365
Chris@35	366 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
matthiasm@43	367 Feature currentTunedSpec = *it; // logfreq spectrum
matthiasm@43	368 Feature currentChromas; // treble and bass chromagram
Chris@35	369
matthiasm@43	370 currentChromas.hasTimestamp = true;
matthiasm@43	371 currentChromas.timestamp = currentTunedSpec.timestamp;
Chris@35	372
mail@77	373 float b[nNote];
matthiasm@1	374
Chris@23	375 bool some_b_greater_zero = false;
Chris@23	376 float sumb = 0;
mail@77	377 for (int i = 0; i < nNote; i++) {
mail@77	378 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
matthiasm@43	379 b[i] = currentTunedSpec.values[i];
Chris@23	380 sumb += b[i];
Chris@23	381 if (b[i] > 0) {
Chris@23	382 some_b_greater_zero = true;
Chris@23	383 }
Chris@23	384 }
matthiasm@1	385
Chris@23	386 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1	387
Chris@23	388 vector<float> chroma = vector<float>(12, 0);
Chris@23	389 vector<float> basschroma = vector<float>(12, 0);
Chris@23	390 float currval;
Chris@91	391 int iSemitone = 0;
matthiasm@1	392
Chris@23	393 if (some_b_greater_zero) {
matthiasm@42	394 if (m_useNNLS == 0) {
Chris@91	395 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23	396 currval = 0;
mail@81	397 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81	398 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
mail@81	399 }
Chris@23	400 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23	401 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23	402 iSemitone++;
Chris@23	403 }
matthiasm@1	404
Chris@23	405 } else {
Chris@35	406 float x[84+1000];
Chris@23	407 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23	408 vector<int> signifIndex;
Chris@23	409 int index=0;
Chris@23	410 sumb /= 84.0;
Chris@91	411 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23	412 float currval = 0;
mail@81	413 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81	414 currval += b[iNote + iBPS];
mail@81	415 }
Chris@23	416 if (currval > 0) signifIndex.push_back(index);
Chris@23	417 index++;
Chris@23	418 }
Chris@35	419 float rnorm;
Chris@35	420 float w[84+1000];
Chris@35	421 float zz[84+1000];
Chris@23	422 int indx[84+1000];
Chris@23	423 int mode;
mail@77	424 int dictsize = nNote*signifIndex.size();
mail@81	425 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@35	426 float *curr_dict = new float[dictsize];
Chris@91	427 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@91	428 for (int iBin = 0; iBin < nNote; iBin++) {
mail@77	429 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
Chris@23	430 }
Chris@23	431 }
Chris@35	432 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23	433 delete [] curr_dict;
Chris@91	434 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@23	435 // cerr << mode << endl;
Chris@23	436 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23	437 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23	438 }
Chris@23	439 }
Chris@23	440 }
Chris@35	441
Chris@35	442 vector<float> origchroma = chroma;
Chris@23	443 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@43	444 currentChromas.values = chroma;
Chris@35	445
Chris@23	446 if (m_doNormalizeChroma > 0) {
Chris@23	447 vector<float> chromanorm = vector<float>(3,0);
Chris@23	448 switch (int(m_doNormalizeChroma)) {
Chris@23	449 case 0: // should never end up here
Chris@23	450 break;
Chris@23	451 case 1:
Chris@35	452 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
Chris@35	453 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
Chris@23	454 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23	455 break;
Chris@23	456 case 2:
Chris@35	457 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23	458 chromanorm[2] += *it;
Chris@23	459 }
Chris@23	460 break;
Chris@23	461 case 3:
Chris@35	462 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23	463 chromanorm[2] += pow(*it,2);
Chris@23	464 }
Chris@23	465 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23	466 break;
Chris@23	467 }
Chris@23	468 if (chromanorm[2] > 0) {
Chris@91	469 for (int i = 0; i < (int)chroma.size(); i++) {
matthiasm@43	470 currentChromas.values[i] /= chromanorm[2];
Chris@23	471 }
Chris@23	472 }
Chris@23	473 }
Chris@35	474
matthiasm@43	475 chromaList.push_back(currentChromas);
Chris@35	476
Chris@23	477 // local chord estimation
matthiasm@43	478 vector<double> currentChordSalience;
matthiasm@43	479 double tempchordvalue = 0;
matthiasm@43	480 double sumchordvalue = 0;
matthiasm@9	481
Chris@23	482 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23	483 tempchordvalue = 0;
Chris@23	484 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@44	485 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23	486 }
Chris@23	487 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23	488 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23	489 }
matthiasm@48	490 if (iChord == nChord-1) tempchordvalue *= .7;
matthiasm@48	491 if (tempchordvalue < 0) tempchordvalue = 0.0;
matthiasm@50	492 tempchordvalue = pow(1.3,tempchordvalue);
Chris@23	493 sumchordvalue+=tempchordvalue;
Chris@23	494 currentChordSalience.push_back(tempchordvalue);
Chris@23	495 }
Chris@23	496 if (sumchordvalue > 0) {
Chris@23	497 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23	498 currentChordSalience[iChord] /= sumchordvalue;
Chris@23	499 }
Chris@23	500 } else {
Chris@23	501 currentChordSalience[nChord-1] = 1.0;
Chris@23	502 }
Chris@23	503 chordogram.push_back(currentChordSalience);
matthiasm@1	504
Chris@23	505 count++;
Chris@23	506 }
Chris@23	507 cerr << "done." << endl;
matthiasm@13	508
matthiasm@86	509 vector<Feature> oldnotes;
matthiasm@10	510
matthiasm@50	511 // bool m_useHMM = true; // this will go into the chordino header file.
matthiasm@50	512 if (m_useHMM == 1.0) {
matthiasm@44	513 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
matthiasm@43	514 int oldchord = nChord-1;
matthiasm@48	515 double selftransprob = 0.99;
matthiasm@43	516
matthiasm@48	517 // vector<double> init = vector<double>(nChord,1.0/nChord);
matthiasm@48	518 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
matthiasm@48	519
matthiasm@50	520 double *delta;
matthiasm@50	521 delta = (double )malloc(sizeof(double)nFrame*nChord);
matthiasm@50	522
matthiasm@43	523 vector<vector<double> > trans;
matthiasm@43	524 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@43	525 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
matthiasm@43	526 temp[iChord] = selftransprob;
matthiasm@43	527 trans.push_back(temp);
matthiasm@43	528 }
matthiasm@50	529 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta);
matthiasm@48	530
matthiasm@48	531
matthiasm@48	532 Feature chord_feature; // chord estimate
matthiasm@48	533 chord_feature.hasTimestamp = true;
matthiasm@48	534 chord_feature.timestamp = timestamps[0];
matthiasm@48	535 chord_feature.label = m_chordnames[chordpath[0]];
mail@60	536 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@43	537
mail@60	538 chordchange[0] = 0;
Chris@91	539 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
matthiasm@43	540 // cerr << chordpath[iFrame] << endl;
matthiasm@48	541 if (chordpath[iFrame] != oldchord ) {
matthiasm@86	542 // chord
matthiasm@43	543 Feature chord_feature; // chord estimate
matthiasm@43	544 chord_feature.hasTimestamp = true;
matthiasm@43	545 chord_feature.timestamp = timestamps[iFrame];
matthiasm@43	546 chord_feature.label = m_chordnames[chordpath[iFrame]];
mail@60	547 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@43	548 oldchord = chordpath[iFrame];
matthiasm@86	549 // chord notes
Chris@91	550 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86	551 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
matthiasm@86	552 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86	553 }
matthiasm@86	554 oldnotes.clear();
Chris@91	555 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
matthiasm@86	556 Feature chordnote_feature;
matthiasm@86	557 chordnote_feature.hasTimestamp = true;
matthiasm@86	558 chordnote_feature.timestamp = timestamps[iFrame];
matthiasm@86	559 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
matthiasm@86	560 chordnote_feature.hasDuration = true;
matthiasm@86	561 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
matthiasm@86	562 oldnotes.push_back(chordnote_feature);
matthiasm@86	563 }
Chris@23	564 }
matthiasm@50	565 /* calculating simple chord change prob */
matthiasm@50	566 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@50	567 chordchange[iFrame-1] += delta[(iFrame-1)nChord + iChord] log(delta[(iFrame-1)nChord + iChord]/delta[iFramenChord + iChord]);
matthiasm@50	568 }
Chris@23	569 }
matthiasm@43	570
matthiasm@43	571 // cerr << chordpath[0] << endl;
matthiasm@43	572 } else {
matthiasm@43	573 /* Simple chord estimation
matthiasm@43	574 I just take the local chord estimates ("currentChordSalience") and average them over time, then
matthiasm@43	575 take the maximum. Very simple, don't do this at home...
matthiasm@43	576 */
matthiasm@44	577 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
matthiasm@43	578 count = 0;
matthiasm@43	579 int halfwindowlength = m_inputSampleRate / m_stepSize;
matthiasm@43	580 vector<int> chordSequence;
matthiasm@43	581 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
matthiasm@43	582 vector<int> temp = vector<int>(nChord,0);
matthiasm@43	583 scoreChordogram.push_back(temp);
matthiasm@43	584 }
matthiasm@43	585 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
matthiasm@43	586 int startIndex = count + 1;
matthiasm@43	587 int endIndex = count + 2 * halfwindowlength;
matthiasm@43	588
matthiasm@43	589 float chordThreshold = 2.5/nChord;//(2halfwindowlength+1);
matthiasm@43	590
matthiasm@43	591 vector<int> chordCandidates;
Chris@91	592 for (int iChord = 0; iChord+1 < nChord; iChord++) {
matthiasm@43	593 // float currsum = 0;
Chris@91	594 // for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43	595 // currsum += chordogram[iFrame][iChord];
matthiasm@43	596 // }
matthiasm@43	597 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
Chris@91	598 for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43	599 if (chordogram[iFrame][iChord] > chordThreshold) {
matthiasm@43	600 chordCandidates.push_back(iChord);
matthiasm@43	601 break;
matthiasm@43	602 }
Chris@23	603 }
Chris@23	604 }
matthiasm@43	605 chordCandidates.push_back(nChord-1);
matthiasm@43	606 // cerr << chordCandidates.size() << endl;
matthiasm@43	607
matthiasm@43	608 float maxval = 0; // will be the value of the most salient chord change in this frame
matthiasm@43	609 float maxindex = 0; //... and the index thereof
Chris@91	610 int bestchordL = nChord-1; // index of the best "left" chord
Chris@91	611 int bestchordR = nChord-1; // index of the best "right" chord
matthiasm@43	612
matthiasm@43	613 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
matthiasm@43	614 // now find the max values on both sides of iWF
matthiasm@43	615 // left side:
matthiasm@43	616 float maxL = 0;
Chris@91	617 int maxindL = nChord-1;
Chris@91	618 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
Chris@91	619 int iChord = chordCandidates[kChord];
matthiasm@43	620 float currsum = 0;
Chris@91	621 for (int iFrame = 0; iFrame < iWF-1; ++iFrame) {
matthiasm@43	622 currsum += chordogram[count+iFrame][iChord];
matthiasm@43	623 }
matthiasm@43	624 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43	625 if (currsum > maxL) {
matthiasm@43	626 maxL = currsum;
matthiasm@43	627 maxindL = iChord;
matthiasm@43	628 }
matthiasm@43	629 }
matthiasm@43	630 // right side:
matthiasm@43	631 float maxR = 0;
Chris@91	632 int maxindR = nChord-1;
Chris@91	633 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
Chris@91	634 int iChord = chordCandidates[kChord];
matthiasm@43	635 float currsum = 0;
Chris@91	636 for (int iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43	637 currsum += chordogram[count+iFrame][iChord];
matthiasm@43	638 }
matthiasm@43	639 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43	640 if (currsum > maxR) {
matthiasm@43	641 maxR = currsum;
matthiasm@43	642 maxindR = iChord;
matthiasm@43	643 }
matthiasm@43	644 }
matthiasm@43	645 if (maxL+maxR > maxval) {
matthiasm@43	646 maxval = maxL+maxR;
matthiasm@43	647 maxindex = iWF;
matthiasm@43	648 bestchordL = maxindL;
matthiasm@43	649 bestchordR = maxindR;
matthiasm@43	650 }
matthiasm@43	651
Chris@23	652 }
matthiasm@43	653 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
matthiasm@43	654 // add a score to every chord-frame-point that was part of a maximum
Chris@91	655 for (int iFrame = 0; iFrame < maxindex-1; ++iFrame) {
matthiasm@43	656 scoreChordogram[iFrame+count][bestchordL]++;
matthiasm@43	657 }
Chris@91	658 for (int iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43	659 scoreChordogram[iFrame+count][bestchordR]++;
matthiasm@43	660 }
matthiasm@50	661 if (bestchordL != bestchordR) {
matthiasm@50	662 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
matthiasm@50	663 }
matthiasm@43	664 count++;
Chris@23	665 }
matthiasm@43	666 // cerr << "***** agent finished *****" << endl;
matthiasm@43	667 count = 0;
matthiasm@43	668 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43	669 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@43	670 float maxindex = 0; //... and the index thereof
Chris@91	671 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@43	672 if (scoreChordogram[count][iChord] > maxval) {
matthiasm@43	673 maxval = scoreChordogram[count][iChord];
matthiasm@43	674 maxindex = iChord;
matthiasm@43	675 // cerr << iChord << endl;
matthiasm@43	676 }
matthiasm@43	677 }
matthiasm@43	678 chordSequence.push_back(maxindex);
matthiasm@43	679 count++;
Chris@23	680 }
matthiasm@43	681
matthiasm@43	682
matthiasm@43	683 // mode filter on chordSequence
matthiasm@43	684 count = 0;
matthiasm@43	685 string oldChord = "";
matthiasm@43	686 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43	687 Feature chord_feature; // chord estimate
matthiasm@43	688 chord_feature.hasTimestamp = true;
matthiasm@43	689 chord_feature.timestamp = *it;
matthiasm@43	690 // Feature currentChord; // chord estimate
matthiasm@43	691 // currentChord.hasTimestamp = true;
matthiasm@43	692 // currentChord.timestamp = currentChromas.timestamp;
matthiasm@43	693
matthiasm@43	694 vector<int> chordCount = vector<int>(nChord,0);
matthiasm@43	695 int maxChordCount = 0;
matthiasm@43	696 int maxChordIndex = nChord-1;
matthiasm@43	697 string maxChord;
matthiasm@43	698 int startIndex = max(count - halfwindowlength/2,0);
matthiasm@43	699 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
matthiasm@43	700 for (int i = startIndex; i < endIndex; i++) {
matthiasm@43	701 chordCount[chordSequence[i]]++;
matthiasm@43	702 if (chordCount[chordSequence[i]] > maxChordCount) {
matthiasm@43	703 // cerr << "start index " << startIndex << endl;
matthiasm@43	704 maxChordCount++;
matthiasm@43	705 maxChordIndex = chordSequence[i];
matthiasm@43	706 maxChord = m_chordnames[maxChordIndex];
matthiasm@43	707 }
matthiasm@43	708 }
matthiasm@43	709 // chordSequence[count] = maxChordIndex;
matthiasm@43	710 // cerr << maxChordIndex << endl;
matthiasm@50	711 // cerr << chordchange[count] << endl;
matthiasm@43	712 if (oldChord != maxChord) {
matthiasm@43	713 oldChord = maxChord;
matthiasm@43	714 chord_feature.label = m_chordnames[maxChordIndex];
mail@60	715 fsOut[m_outputChords].push_back(chord_feature);
Chris@91	716 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86	717 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp;
matthiasm@86	718 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86	719 }
matthiasm@86	720 oldnotes.clear();
Chris@91	721 for (int iNote = 0; iNote < (int)m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord
matthiasm@86	722 Feature chordnote_feature;
matthiasm@86	723 chordnote_feature.hasTimestamp = true;
matthiasm@86	724 chordnote_feature.timestamp = chord_feature.timestamp;
matthiasm@86	725 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]);
matthiasm@86	726 chordnote_feature.hasDuration = true;
matthiasm@86	727 chordnote_feature.duration = -chord_feature.timestamp; // this will be corrected at the next chord
matthiasm@86	728 oldnotes.push_back(chordnote_feature);
matthiasm@86	729 }
matthiasm@43	730 }
matthiasm@43	731 count++;
Chris@23	732 }
Chris@23	733 }
matthiasm@43	734 Feature chord_feature; // last chord estimate
matthiasm@43	735 chord_feature.hasTimestamp = true;
matthiasm@43	736 chord_feature.timestamp = timestamps[timestamps.size()-1];
matthiasm@43	737 chord_feature.label = "N";
mail@60	738 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@86	739
Chris@91	740 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86	741 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
matthiasm@86	742 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86	743 }
matthiasm@86	744
Chris@23	745 cerr << "done." << endl;
matthiasm@50	746
matthiasm@50	747 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
matthiasm@50	748 Feature chordchange_feature;
matthiasm@50	749 chordchange_feature.hasTimestamp = true;
matthiasm@50	750 chordchange_feature.timestamp = timestamps[iFrame];
matthiasm@50	751 chordchange_feature.values.push_back(chordchange[iFrame]);
mail@60	752 // cerr << chordchange[iFrame] << endl;
mail@60	753 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
matthiasm@50	754 }
matthiasm@50	755
mail@60	756 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
matthiasm@50	757
matthiasm@50	758
Chris@23	759 return fsOut;
matthiasm@0	760 }

Mercurial > hg > nnls-chroma

annotate Chordino.cpp @ 95:dab7e7bfeba1 matthiasm-plugin 0.2