nnls-chroma: Chordino.cpp annotate

annotate Chordino.cpp @ 86:e5c16976513d consonance

implemented note output for estimated chords

author	matthiasm
date	Sun, 28 Nov 2010 23:10:57 +0900
parents	4270f3039ab0
children	7af5312e66f8

rev	line source
Chris@23	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
matthiasm@0	2
Chris@35	3 /*
Chris@35	4 NNLS-Chroma / Chordino
Chris@35	5
Chris@35	6 Audio feature extraction plugins for chromagram and chord
Chris@35	7 estimation.
Chris@35	8
Chris@35	9 Centre for Digital Music, Queen Mary University of London.
Chris@35	10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35	11
Chris@35	12 This program is free software; you can redistribute it and/or
Chris@35	13 modify it under the terms of the GNU General Public License as
Chris@35	14 published by the Free Software Foundation; either version 2 of the
Chris@35	15 License, or (at your option) any later version. See the file
Chris@35	16 COPYING included with this distribution for more information.
Chris@35	17 */
Chris@35	18
Chris@35	19 #include "Chordino.h"
Chris@27	20
Chris@27	21 #include "chromamethods.h"
matthiasm@43	22 #include "viterbi.h"
Chris@27	23
Chris@27	24 #include <cstdlib>
Chris@27	25 #include <fstream>
matthiasm@0	26 #include <cmath>
matthiasm@9	27
Chris@27	28 #include <algorithm>
matthiasm@0	29
matthiasm@0	30 const bool debug_on = false;
matthiasm@0	31
Chris@35	32 Chordino::Chordino(float inputSampleRate) :
matthiasm@86	33 NNLSBase(inputSampleRate),
matthiasm@86	34 m_chorddict(0),
matthiasm@86	35 m_chordnotes(0),
matthiasm@86	36 m_chordnames(0)
matthiasm@0	37 {
Chris@35	38 if (debug_on) cerr << "--> Chordino" << endl;
matthiasm@86	39 // get the chord dictionary from file (if the file exists)
matthiasm@86	40 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes);
matthiasm@86	41
matthiasm@0	42 }
matthiasm@0	43
Chris@35	44 Chordino::~Chordino()
matthiasm@0	45 {
Chris@35	46 if (debug_on) cerr << "--> ~Chordino" << endl;
matthiasm@0	47 }
matthiasm@0	48
matthiasm@0	49 string
Chris@35	50 Chordino::getIdentifier() const
matthiasm@0	51 {
Chris@23	52 if (debug_on) cerr << "--> getIdentifier" << endl;
Chris@35	53 return "chordino";
matthiasm@0	54 }
matthiasm@0	55
matthiasm@0	56 string
Chris@35	57 Chordino::getName() const
matthiasm@0	58 {
Chris@23	59 if (debug_on) cerr << "--> getName" << endl;
Chris@35	60 return "Chordino";
matthiasm@0	61 }
matthiasm@0	62
matthiasm@0	63 string
Chris@35	64 Chordino::getDescription() const
matthiasm@0	65 {
Chris@23	66 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@58	67 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
matthiasm@0	68 }
matthiasm@0	69
matthiasm@50	70 Chordino::ParameterList
matthiasm@50	71 Chordino::getParameterDescriptors() const
matthiasm@50	72 {
matthiasm@50	73 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@50	74 ParameterList list;
matthiasm@50	75
matthiasm@50	76 ParameterDescriptor d;
matthiasm@50	77 d.identifier = "useNNLS";
matthiasm@50	78 d.name = "use approximate transcription (NNLS)";
matthiasm@50	79 d.description = "Toggles approximate transcription (NNLS).";
matthiasm@50	80 d.unit = "";
matthiasm@50	81 d.minValue = 0.0;
matthiasm@50	82 d.maxValue = 1.0;
matthiasm@50	83 d.defaultValue = 1.0;
matthiasm@50	84 d.isQuantized = true;
matthiasm@50	85 d.quantizeStep = 1.0;
matthiasm@50	86 list.push_back(d);
matthiasm@50	87
matthiasm@50	88 ParameterDescriptor d4;
matthiasm@50	89 d4.identifier = "useHMM";
matthiasm@53	90 d4.name = "HMM (Viterbi decoding)";
matthiasm@50	91 d4.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
matthiasm@50	92 d4.unit = "";
matthiasm@50	93 d4.minValue = 0.0;
matthiasm@50	94 d4.maxValue = 1.0;
matthiasm@50	95 d4.defaultValue = 1.0;
matthiasm@50	96 d4.isQuantized = true;
matthiasm@50	97 d4.quantizeStep = 1.0;
matthiasm@50	98 list.push_back(d4);
matthiasm@50	99
matthiasm@50	100 ParameterDescriptor d0;
matthiasm@50	101 d0.identifier = "rollon";
matthiasm@50	102 d0.name = "spectral roll-on";
matthiasm@58	103 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
matthiasm@59	104 d0.unit = "%";
matthiasm@50	105 d0.minValue = 0;
mail@76	106 d0.maxValue = 5;
matthiasm@50	107 d0.defaultValue = 0;
matthiasm@50	108 d0.isQuantized = true;
mail@76	109 d0.quantizeStep = 0.5;
matthiasm@50	110 list.push_back(d0);
matthiasm@50	111
matthiasm@50	112 ParameterDescriptor d1;
matthiasm@50	113 d1.identifier = "tuningmode";
matthiasm@50	114 d1.name = "tuning mode";
matthiasm@50	115 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@50	116 d1.unit = "";
matthiasm@50	117 d1.minValue = 0;
matthiasm@50	118 d1.maxValue = 1;
matthiasm@50	119 d1.defaultValue = 0;
matthiasm@50	120 d1.isQuantized = true;
matthiasm@50	121 d1.valueNames.push_back("global tuning");
matthiasm@50	122 d1.valueNames.push_back("local tuning");
matthiasm@50	123 d1.quantizeStep = 1.0;
matthiasm@50	124 list.push_back(d1);
matthiasm@50	125
matthiasm@50	126 ParameterDescriptor d2;
matthiasm@50	127 d2.identifier = "whitening";
matthiasm@50	128 d2.name = "spectral whitening";
matthiasm@50	129 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
matthiasm@50	130 d2.unit = "";
matthiasm@50	131 d2.isQuantized = true;
matthiasm@50	132 d2.minValue = 0.0;
matthiasm@50	133 d2.maxValue = 1.0;
matthiasm@50	134 d2.defaultValue = 1.0;
matthiasm@50	135 d2.isQuantized = false;
matthiasm@50	136 list.push_back(d2);
matthiasm@50	137
matthiasm@50	138 ParameterDescriptor d3;
matthiasm@50	139 d3.identifier = "s";
matthiasm@50	140 d3.name = "spectral shape";
matthiasm@50	141 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
matthiasm@50	142 d3.unit = "";
matthiasm@50	143 d3.minValue = 0.5;
matthiasm@50	144 d3.maxValue = 0.9;
matthiasm@50	145 d3.defaultValue = 0.7;
matthiasm@50	146 d3.isQuantized = false;
matthiasm@50	147 list.push_back(d3);
matthiasm@50	148
matthiasm@50	149 // ParameterDescriptor d4;
matthiasm@50	150 // d4.identifier = "chromanormalize";
matthiasm@50	151 // d4.name = "chroma normalization";
matthiasm@50	152 // d4.description = "How shall the chroma vector be normalized?";
matthiasm@50	153 // d4.unit = "";
matthiasm@50	154 // d4.minValue = 0;
matthiasm@50	155 // d4.maxValue = 3;
matthiasm@50	156 // d4.defaultValue = 0;
matthiasm@50	157 // d4.isQuantized = true;
matthiasm@50	158 // d4.valueNames.push_back("none");
matthiasm@50	159 // d4.valueNames.push_back("maximum norm");
matthiasm@50	160 // d4.valueNames.push_back("L1 norm");
matthiasm@50	161 // d4.valueNames.push_back("L2 norm");
matthiasm@50	162 // d4.quantizeStep = 1.0;
matthiasm@50	163 // list.push_back(d4);
matthiasm@50	164
matthiasm@50	165 return list;
matthiasm@50	166 }
matthiasm@50	167
Chris@35	168 Chordino::OutputList
Chris@35	169 Chordino::getOutputDescriptors() const
matthiasm@0	170 {
Chris@23	171 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0	172 OutputList list;
matthiasm@0	173
Chris@35	174 int index = 0;
matthiasm@0	175
matthiasm@0	176 OutputDescriptor d7;
matthiasm@0	177 d7.identifier = "simplechord";
Chris@36	178 d7.name = "Chord Estimate";
matthiasm@58	179 d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
matthiasm@0	180 d7.unit = "";
matthiasm@0	181 d7.hasFixedBinCount = true;
matthiasm@0	182 d7.binCount = 0;
matthiasm@0	183 d7.hasKnownExtents = false;
matthiasm@0	184 d7.isQuantized = false;
matthiasm@0	185 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0	186 d7.hasDuration = false;
matthiasm@0	187 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	188 list.push_back(d7);
Chris@35	189 m_outputChords = index++;
matthiasm@0	190
matthiasm@86	191 OutputDescriptor chordnotes;
matthiasm@86	192 chordnotes.identifier = "chordnotes";
matthiasm@86	193 chordnotes.name = "Note Representation of Chord Estimate";
matthiasm@86	194 chordnotes.description = "A simple represenation of the estimated chord with bass note (if applicable) and chord notes.";
matthiasm@86	195 chordnotes.unit = "MIDI units";
matthiasm@86	196 chordnotes.hasFixedBinCount = true;
matthiasm@86	197 chordnotes.binCount = 1;
matthiasm@86	198 chordnotes.hasKnownExtents = true;
matthiasm@86	199 chordnotes.minValue = 0;
matthiasm@86	200 chordnotes.maxValue = 127;
matthiasm@86	201 chordnotes.isQuantized = true;
matthiasm@86	202 chordnotes.quantizeStep = 1;
matthiasm@86	203 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@86	204 chordnotes.hasDuration = true;
matthiasm@86	205 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@86	206 list.push_back(chordnotes);
matthiasm@86	207 m_outputChordnotes = index++;
matthiasm@86	208
Chris@23	209 OutputDescriptor d8;
mail@60	210 d8.identifier = "harmonicchange";
Chris@36	211 d8.name = "Harmonic Change Value";
matthiasm@58	212 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
matthiasm@17	213 d8.unit = "";
matthiasm@17	214 d8.hasFixedBinCount = true;
matthiasm@17	215 d8.binCount = 1;
mail@60	216 d8.hasKnownExtents = false;
mail@60	217 // d8.minValue = 0.0;
mail@60	218 // d8.maxValue = 0.999;
matthiasm@17	219 d8.isQuantized = false;
matthiasm@17	220 d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@17	221 d8.hasDuration = false;
matthiasm@17	222 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@17	223 list.push_back(d8);
Chris@35	224 m_outputHarmonicChange = index++;
matthiasm@1	225
matthiasm@0	226 return list;
matthiasm@0	227 }
matthiasm@0	228
matthiasm@0	229 bool
Chris@35	230 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0	231 {
Chris@23	232 if (debug_on) {
Chris@23	233 cerr << "--> initialise";
Chris@23	234 }
mail@76	235
Chris@35	236 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
Chris@35	237 return false;
Chris@35	238 }
matthiasm@1	239
matthiasm@0	240 return true;
matthiasm@0	241 }
matthiasm@0	242
matthiasm@0	243 void
Chris@35	244 Chordino::reset()
matthiasm@0	245 {
Chris@23	246 if (debug_on) cerr << "--> reset";
Chris@35	247 NNLSBase::reset();
matthiasm@0	248 }
matthiasm@0	249
Chris@35	250 Chordino::FeatureSet
Chris@35	251 Chordino::process(const float const inputBuffers, Vamp::RealTime timestamp)
matthiasm@0	252 {
Chris@23	253 if (debug_on) cerr << "--> process" << endl;
matthiasm@0	254
Chris@35	255 NNLSBase::baseProcess(inputBuffers, timestamp);
matthiasm@0	256
Chris@35	257 return FeatureSet();
matthiasm@0	258 }
matthiasm@0	259
Chris@35	260 Chordino::FeatureSet
Chris@35	261 Chordino::getRemainingFeatures()
matthiasm@0	262 {
mail@76	263 cerr << hw[0] << hw[1] << endl;
Chris@23	264 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23	265 FeatureSet fsOut;
Chris@35	266 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23	267 int nChord = m_chordnames.size();
Chris@23	268 //
Chris@23	269 /** Calculate Tuning
Chris@23	270 calculate tuning from (using the angle of the complex number defined by the
Chris@23	271 cumulative mean real and imag values)
Chris@23	272 **/
mail@80	273 float meanTuningImag = 0;
mail@80	274 float meanTuningReal = 0;
mail@80	275 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
mail@80	276 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
mail@80	277 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
mail@80	278 }
Chris@23	279 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23	280 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23	281 int intShift = floor(normalisedtuning * 3);
mail@80	282 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
matthiasm@1	283
Chris@23	284 char buffer0 [50];
matthiasm@1	285
Chris@23	286 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1	287
matthiasm@1	288
Chris@23	289 /** Tune Log-Frequency Spectrogram
matthiasm@43	290 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
matthiasm@43	291 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectum).
Chris@23	292 **/
Chris@35	293 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13	294
Chris@23	295 float tempValue = 0;
Chris@23	296 float dbThreshold = 0; // relative to the background spectrum
Chris@23	297 float thresh = pow(10,dbThreshold/20);
Chris@23	298 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23	299 int count = 0;
matthiasm@1	300
Chris@35	301 FeatureList tunedSpec;
matthiasm@43	302 int nFrame = m_logSpectrum.size();
matthiasm@43	303
matthiasm@43	304 vector<Vamp::RealTime> timestamps;
Chris@35	305
Chris@35	306 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
matthiasm@43	307 Feature currentLogSpectum = *i;
matthiasm@43	308 Feature currentTunedSpec; // tuned log-frequency spectrum
matthiasm@43	309 currentTunedSpec.hasTimestamp = true;
matthiasm@43	310 currentTunedSpec.timestamp = currentLogSpectum.timestamp;
matthiasm@43	311 timestamps.push_back(currentLogSpectum.timestamp);
matthiasm@43	312 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
matthiasm@1	313
Chris@23	314 if (m_tuneLocal) {
Chris@23	315 intShift = floor(m_localTuning[count] * 3);
mail@80	316 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
Chris@23	317 }
matthiasm@1	318
mail@80	319 // cerr << intShift << " " << floatShift << endl;
matthiasm@1	320
matthiasm@43	321 for (unsigned k = 2; k < currentLogSpectum.values.size() - 3; ++k) { // interpolate all inner bins
mail@80	322 tempValue = currentLogSpectum.values[k + intShift] * (1-floatShift) + currentLogSpectum.values[k+intShift+1] * floatShift;
matthiasm@43	323 currentTunedSpec.values.push_back(tempValue);
Chris@23	324 }
matthiasm@1	325
matthiasm@43	326 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
matthiasm@43	327 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
Chris@23	328 vector<float> runningstd;
mail@77	329 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
matthiasm@43	330 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
Chris@23	331 }
Chris@23	332 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
mail@77	333 for (int i = 0; i < nNote; i++) {
Chris@23	334 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23	335 if (runningstd[i] > 0) {
matthiasm@43	336 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
matthiasm@43	337 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
matthiasm@43	338 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
matthiasm@43	339 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23	340 }
matthiasm@43	341 if (currentTunedSpec.values[i] < 0) {
Chris@23	342 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23	343 }
Chris@23	344 }
matthiasm@43	345 tunedSpec.push_back(currentTunedSpec);
Chris@23	346 count++;
Chris@23	347 }
Chris@23	348 cerr << "done." << endl;
matthiasm@1	349
Chris@23	350 /** Semitone spectrum and chromagrams
Chris@23	351 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23	352 is inferred using a non-negative least squares algorithm.
Chris@23	353 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23	354 bass and treble stacked onto each other).
Chris@23	355 **/
matthiasm@42	356 if (m_useNNLS == 0) {
Chris@35	357 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23	358 } else {
Chris@35	359 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23	360 }
matthiasm@13	361
matthiasm@1	362
matthiasm@43	363 vector<vector<double> > chordogram;
Chris@23	364 vector<vector<int> > scoreChordogram;
Chris@35	365 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
Chris@23	366 count = 0;
matthiasm@9	367
Chris@35	368 FeatureList chromaList;
matthiasm@43	369
matthiasm@43	370
Chris@35	371
Chris@35	372 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
matthiasm@43	373 Feature currentTunedSpec = *it; // logfreq spectrum
matthiasm@43	374 Feature currentChromas; // treble and bass chromagram
Chris@35	375
matthiasm@43	376 currentChromas.hasTimestamp = true;
matthiasm@43	377 currentChromas.timestamp = currentTunedSpec.timestamp;
Chris@35	378
mail@77	379 float b[nNote];
matthiasm@1	380
Chris@23	381 bool some_b_greater_zero = false;
Chris@23	382 float sumb = 0;
mail@77	383 for (int i = 0; i < nNote; i++) {
mail@77	384 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
matthiasm@43	385 b[i] = currentTunedSpec.values[i];
Chris@23	386 sumb += b[i];
Chris@23	387 if (b[i] > 0) {
Chris@23	388 some_b_greater_zero = true;
Chris@23	389 }
Chris@23	390 }
matthiasm@1	391
Chris@23	392 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1	393
Chris@23	394 vector<float> chroma = vector<float>(12, 0);
Chris@23	395 vector<float> basschroma = vector<float>(12, 0);
Chris@23	396 float currval;
Chris@23	397 unsigned iSemitone = 0;
matthiasm@1	398
Chris@23	399 if (some_b_greater_zero) {
matthiasm@42	400 if (m_useNNLS == 0) {
mail@81	401 for (unsigned iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23	402 currval = 0;
mail@81	403 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81	404 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
mail@81	405 }
Chris@23	406 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23	407 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23	408 iSemitone++;
Chris@23	409 }
matthiasm@1	410
Chris@23	411 } else {
Chris@35	412 float x[84+1000];
Chris@23	413 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23	414 vector<int> signifIndex;
Chris@23	415 int index=0;
Chris@23	416 sumb /= 84.0;
mail@81	417 for (unsigned iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23	418 float currval = 0;
mail@81	419 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81	420 currval += b[iNote + iBPS];
mail@81	421 }
Chris@23	422 if (currval > 0) signifIndex.push_back(index);
Chris@23	423 index++;
Chris@23	424 }
Chris@35	425 float rnorm;
Chris@35	426 float w[84+1000];
Chris@35	427 float zz[84+1000];
Chris@23	428 int indx[84+1000];
Chris@23	429 int mode;
mail@77	430 int dictsize = nNote*signifIndex.size();
mail@81	431 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@35	432 float *curr_dict = new float[dictsize];
Chris@23	433 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
mail@77	434 for (unsigned iBin = 0; iBin < nNote; iBin++) {
mail@77	435 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
Chris@23	436 }
Chris@23	437 }
Chris@35	438 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23	439 delete [] curr_dict;
Chris@23	440 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23	441 // cerr << mode << endl;
Chris@23	442 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23	443 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23	444 }
Chris@23	445 }
Chris@23	446 }
Chris@35	447
Chris@35	448 vector<float> origchroma = chroma;
Chris@23	449 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@43	450 currentChromas.values = chroma;
Chris@35	451
Chris@23	452 if (m_doNormalizeChroma > 0) {
Chris@23	453 vector<float> chromanorm = vector<float>(3,0);
Chris@23	454 switch (int(m_doNormalizeChroma)) {
Chris@23	455 case 0: // should never end up here
Chris@23	456 break;
Chris@23	457 case 1:
Chris@35	458 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
Chris@35	459 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
Chris@23	460 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23	461 break;
Chris@23	462 case 2:
Chris@35	463 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23	464 chromanorm[2] += *it;
Chris@23	465 }
Chris@23	466 break;
Chris@23	467 case 3:
Chris@35	468 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23	469 chromanorm[2] += pow(*it,2);
Chris@23	470 }
Chris@23	471 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23	472 break;
Chris@23	473 }
Chris@23	474 if (chromanorm[2] > 0) {
Chris@35	475 for (int i = 0; i < chroma.size(); i++) {
matthiasm@43	476 currentChromas.values[i] /= chromanorm[2];
Chris@23	477 }
Chris@23	478 }
Chris@23	479 }
Chris@35	480
matthiasm@43	481 chromaList.push_back(currentChromas);
Chris@35	482
Chris@23	483 // local chord estimation
matthiasm@43	484 vector<double> currentChordSalience;
matthiasm@43	485 double tempchordvalue = 0;
matthiasm@43	486 double sumchordvalue = 0;
matthiasm@9	487
Chris@23	488 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23	489 tempchordvalue = 0;
Chris@23	490 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@44	491 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23	492 }
Chris@23	493 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23	494 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23	495 }
matthiasm@48	496 if (iChord == nChord-1) tempchordvalue *= .7;
matthiasm@48	497 if (tempchordvalue < 0) tempchordvalue = 0.0;
matthiasm@50	498 tempchordvalue = pow(1.3,tempchordvalue);
Chris@23	499 sumchordvalue+=tempchordvalue;
Chris@23	500 currentChordSalience.push_back(tempchordvalue);
Chris@23	501 }
Chris@23	502 if (sumchordvalue > 0) {
Chris@23	503 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23	504 currentChordSalience[iChord] /= sumchordvalue;
Chris@23	505 }
Chris@23	506 } else {
Chris@23	507 currentChordSalience[nChord-1] = 1.0;
Chris@23	508 }
Chris@23	509 chordogram.push_back(currentChordSalience);
matthiasm@1	510
Chris@23	511 count++;
Chris@23	512 }
Chris@23	513 cerr << "done." << endl;
matthiasm@13	514
matthiasm@86	515 vector<Feature> oldnotes;
matthiasm@10	516
matthiasm@50	517 // bool m_useHMM = true; // this will go into the chordino header file.
matthiasm@50	518 if (m_useHMM == 1.0) {
matthiasm@44	519 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
matthiasm@43	520 int oldchord = nChord-1;
matthiasm@48	521 double selftransprob = 0.99;
matthiasm@43	522
matthiasm@48	523 // vector<double> init = vector<double>(nChord,1.0/nChord);
matthiasm@48	524 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
matthiasm@48	525
matthiasm@50	526 double *delta;
matthiasm@50	527 delta = (double )malloc(sizeof(double)nFrame*nChord);
matthiasm@50	528
matthiasm@43	529 vector<vector<double> > trans;
matthiasm@43	530 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@43	531 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
matthiasm@43	532 temp[iChord] = selftransprob;
matthiasm@43	533 trans.push_back(temp);
matthiasm@43	534 }
matthiasm@50	535 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta);
matthiasm@48	536
matthiasm@48	537
matthiasm@48	538 Feature chord_feature; // chord estimate
matthiasm@48	539 chord_feature.hasTimestamp = true;
matthiasm@48	540 chord_feature.timestamp = timestamps[0];
matthiasm@48	541 chord_feature.label = m_chordnames[chordpath[0]];
mail@60	542 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@43	543
mail@60	544 chordchange[0] = 0;
matthiasm@50	545 for (int iFrame = 1; iFrame < chordpath.size(); ++iFrame) {
matthiasm@43	546 // cerr << chordpath[iFrame] << endl;
matthiasm@48	547 if (chordpath[iFrame] != oldchord ) {
matthiasm@86	548 // chord
matthiasm@43	549 Feature chord_feature; // chord estimate
matthiasm@43	550 chord_feature.hasTimestamp = true;
matthiasm@43	551 chord_feature.timestamp = timestamps[iFrame];
matthiasm@43	552 chord_feature.label = m_chordnames[chordpath[iFrame]];
mail@60	553 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@43	554 oldchord = chordpath[iFrame];
matthiasm@86	555 // chord notes
matthiasm@86	556 for (int iNote = 0; iNote < oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86	557 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
matthiasm@86	558 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86	559 }
matthiasm@86	560 oldnotes.clear();
matthiasm@86	561 for (int iNote = 0; iNote < m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
matthiasm@86	562 Feature chordnote_feature;
matthiasm@86	563 chordnote_feature.hasTimestamp = true;
matthiasm@86	564 chordnote_feature.timestamp = timestamps[iFrame];
matthiasm@86	565 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
matthiasm@86	566 chordnote_feature.hasDuration = true;
matthiasm@86	567 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
matthiasm@86	568 oldnotes.push_back(chordnote_feature);
matthiasm@86	569 }
Chris@23	570 }
matthiasm@50	571 /* calculating simple chord change prob */
matthiasm@50	572 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@50	573 chordchange[iFrame-1] += delta[(iFrame-1)nChord + iChord] log(delta[(iFrame-1)nChord + iChord]/delta[iFramenChord + iChord]);
matthiasm@50	574 }
Chris@23	575 }
matthiasm@43	576
matthiasm@43	577 // cerr << chordpath[0] << endl;
matthiasm@43	578 } else {
matthiasm@43	579 /* Simple chord estimation
matthiasm@43	580 I just take the local chord estimates ("currentChordSalience") and average them over time, then
matthiasm@43	581 take the maximum. Very simple, don't do this at home...
matthiasm@43	582 */
matthiasm@44	583 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
matthiasm@43	584 count = 0;
matthiasm@43	585 int halfwindowlength = m_inputSampleRate / m_stepSize;
matthiasm@43	586 vector<int> chordSequence;
matthiasm@43	587 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
matthiasm@43	588 vector<int> temp = vector<int>(nChord,0);
matthiasm@43	589 scoreChordogram.push_back(temp);
matthiasm@43	590 }
matthiasm@43	591 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
matthiasm@43	592 int startIndex = count + 1;
matthiasm@43	593 int endIndex = count + 2 * halfwindowlength;
matthiasm@43	594
matthiasm@43	595 float chordThreshold = 2.5/nChord;//(2halfwindowlength+1);
matthiasm@43	596
matthiasm@43	597 vector<int> chordCandidates;
matthiasm@43	598 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
matthiasm@43	599 // float currsum = 0;
matthiasm@43	600 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43	601 // currsum += chordogram[iFrame][iChord];
matthiasm@43	602 // }
matthiasm@43	603 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
matthiasm@43	604 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43	605 if (chordogram[iFrame][iChord] > chordThreshold) {
matthiasm@43	606 chordCandidates.push_back(iChord);
matthiasm@43	607 break;
matthiasm@43	608 }
Chris@23	609 }
Chris@23	610 }
matthiasm@43	611 chordCandidates.push_back(nChord-1);
matthiasm@43	612 // cerr << chordCandidates.size() << endl;
matthiasm@43	613
matthiasm@43	614 float maxval = 0; // will be the value of the most salient chord change in this frame
matthiasm@43	615 float maxindex = 0; //... and the index thereof
matthiasm@43	616 unsigned bestchordL = nChord-1; // index of the best "left" chord
matthiasm@43	617 unsigned bestchordR = nChord-1; // index of the best "right" chord
matthiasm@43	618
matthiasm@43	619 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
matthiasm@43	620 // now find the max values on both sides of iWF
matthiasm@43	621 // left side:
matthiasm@43	622 float maxL = 0;
matthiasm@43	623 unsigned maxindL = nChord-1;
matthiasm@43	624 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
matthiasm@43	625 unsigned iChord = chordCandidates[kChord];
matthiasm@43	626 float currsum = 0;
matthiasm@43	627 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
matthiasm@43	628 currsum += chordogram[count+iFrame][iChord];
matthiasm@43	629 }
matthiasm@43	630 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43	631 if (currsum > maxL) {
matthiasm@43	632 maxL = currsum;
matthiasm@43	633 maxindL = iChord;
matthiasm@43	634 }
matthiasm@43	635 }
matthiasm@43	636 // right side:
matthiasm@43	637 float maxR = 0;
matthiasm@43	638 unsigned maxindR = nChord-1;
matthiasm@43	639 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
matthiasm@43	640 unsigned iChord = chordCandidates[kChord];
matthiasm@43	641 float currsum = 0;
matthiasm@43	642 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43	643 currsum += chordogram[count+iFrame][iChord];
matthiasm@43	644 }
matthiasm@43	645 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43	646 if (currsum > maxR) {
matthiasm@43	647 maxR = currsum;
matthiasm@43	648 maxindR = iChord;
matthiasm@43	649 }
matthiasm@43	650 }
matthiasm@43	651 if (maxL+maxR > maxval) {
matthiasm@43	652 maxval = maxL+maxR;
matthiasm@43	653 maxindex = iWF;
matthiasm@43	654 bestchordL = maxindL;
matthiasm@43	655 bestchordR = maxindR;
matthiasm@43	656 }
matthiasm@43	657
Chris@23	658 }
matthiasm@43	659 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
matthiasm@43	660 // add a score to every chord-frame-point that was part of a maximum
matthiasm@43	661 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
matthiasm@43	662 scoreChordogram[iFrame+count][bestchordL]++;
matthiasm@43	663 }
matthiasm@43	664 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43	665 scoreChordogram[iFrame+count][bestchordR]++;
matthiasm@43	666 }
matthiasm@50	667 if (bestchordL != bestchordR) {
matthiasm@50	668 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
matthiasm@50	669 }
matthiasm@43	670 count++;
Chris@23	671 }
matthiasm@43	672 // cerr << "***** agent finished *****" << endl;
matthiasm@43	673 count = 0;
matthiasm@43	674 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43	675 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@43	676 float maxindex = 0; //... and the index thereof
matthiasm@43	677 for (unsigned iChord = 0; iChord < nChord; iChord++) {
matthiasm@43	678 if (scoreChordogram[count][iChord] > maxval) {
matthiasm@43	679 maxval = scoreChordogram[count][iChord];
matthiasm@43	680 maxindex = iChord;
matthiasm@43	681 // cerr << iChord << endl;
matthiasm@43	682 }
matthiasm@43	683 }
matthiasm@43	684 chordSequence.push_back(maxindex);
matthiasm@43	685 count++;
Chris@23	686 }
matthiasm@43	687
matthiasm@43	688
matthiasm@43	689 // mode filter on chordSequence
matthiasm@43	690 count = 0;
matthiasm@43	691 string oldChord = "";
matthiasm@43	692 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43	693 Feature chord_feature; // chord estimate
matthiasm@43	694 chord_feature.hasTimestamp = true;
matthiasm@43	695 chord_feature.timestamp = *it;
matthiasm@43	696 // Feature currentChord; // chord estimate
matthiasm@43	697 // currentChord.hasTimestamp = true;
matthiasm@43	698 // currentChord.timestamp = currentChromas.timestamp;
matthiasm@43	699
matthiasm@43	700 vector<int> chordCount = vector<int>(nChord,0);
matthiasm@43	701 int maxChordCount = 0;
matthiasm@43	702 int maxChordIndex = nChord-1;
matthiasm@43	703 string maxChord;
matthiasm@43	704 int startIndex = max(count - halfwindowlength/2,0);
matthiasm@43	705 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
matthiasm@43	706 for (int i = startIndex; i < endIndex; i++) {
matthiasm@43	707 chordCount[chordSequence[i]]++;
matthiasm@43	708 if (chordCount[chordSequence[i]] > maxChordCount) {
matthiasm@43	709 // cerr << "start index " << startIndex << endl;
matthiasm@43	710 maxChordCount++;
matthiasm@43	711 maxChordIndex = chordSequence[i];
matthiasm@43	712 maxChord = m_chordnames[maxChordIndex];
matthiasm@43	713 }
matthiasm@43	714 }
matthiasm@43	715 // chordSequence[count] = maxChordIndex;
matthiasm@43	716 // cerr << maxChordIndex << endl;
matthiasm@50	717 // cerr << chordchange[count] << endl;
matthiasm@43	718 if (oldChord != maxChord) {
matthiasm@43	719 oldChord = maxChord;
matthiasm@43	720 chord_feature.label = m_chordnames[maxChordIndex];
mail@60	721 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@86	722 for (int iNote = 0; iNote < oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86	723 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp;
matthiasm@86	724 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86	725 }
matthiasm@86	726 oldnotes.clear();
matthiasm@86	727 for (int iNote = 0; iNote < m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord
matthiasm@86	728 Feature chordnote_feature;
matthiasm@86	729 chordnote_feature.hasTimestamp = true;
matthiasm@86	730 chordnote_feature.timestamp = chord_feature.timestamp;
matthiasm@86	731 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]);
matthiasm@86	732 chordnote_feature.hasDuration = true;
matthiasm@86	733 chordnote_feature.duration = -chord_feature.timestamp; // this will be corrected at the next chord
matthiasm@86	734 oldnotes.push_back(chordnote_feature);
matthiasm@86	735 }
matthiasm@43	736 }
matthiasm@43	737 count++;
Chris@23	738 }
Chris@23	739 }
matthiasm@43	740 Feature chord_feature; // last chord estimate
matthiasm@43	741 chord_feature.hasTimestamp = true;
matthiasm@43	742 chord_feature.timestamp = timestamps[timestamps.size()-1];
matthiasm@43	743 chord_feature.label = "N";
mail@60	744 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@86	745
matthiasm@86	746 for (int iNote = 0; iNote < oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86	747 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
matthiasm@86	748 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86	749 }
matthiasm@86	750
Chris@23	751 cerr << "done." << endl;
matthiasm@50	752
matthiasm@50	753 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
matthiasm@50	754 Feature chordchange_feature;
matthiasm@50	755 chordchange_feature.hasTimestamp = true;
matthiasm@50	756 chordchange_feature.timestamp = timestamps[iFrame];
matthiasm@50	757 chordchange_feature.values.push_back(chordchange[iFrame]);
mail@60	758 // cerr << chordchange[iFrame] << endl;
mail@60	759 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
matthiasm@50	760 }
matthiasm@50	761
mail@60	762 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
matthiasm@50	763
matthiasm@50	764
Chris@23	765 return fsOut;
matthiasm@0	766 }

Mercurial > hg > nnls-chroma

annotate Chordino.cpp @ 86:e5c16976513d consonance