nnls-chroma: Chordino.cpp annotate

annotate Chordino.cpp @ 44:109d3b2c7105 matthiasm-plugin

regarding the chord estimation:\n * tweaked chord templates\n * that means that the original method also changed

author	matthiasm
date	Mon, 25 Oct 2010 01:58:37 +0900
parents	131801714118
children	6e76c7710fa1

rev	line source
Chris@23	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
matthiasm@0	2
Chris@35	3 /*
Chris@35	4 NNLS-Chroma / Chordino
Chris@35	5
Chris@35	6 Audio feature extraction plugins for chromagram and chord
Chris@35	7 estimation.
Chris@35	8
Chris@35	9 Centre for Digital Music, Queen Mary University of London.
Chris@35	10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35	11
Chris@35	12 This program is free software; you can redistribute it and/or
Chris@35	13 modify it under the terms of the GNU General Public License as
Chris@35	14 published by the Free Software Foundation; either version 2 of the
Chris@35	15 License, or (at your option) any later version. See the file
Chris@35	16 COPYING included with this distribution for more information.
Chris@35	17 */
Chris@35	18
Chris@35	19 #include "Chordino.h"
Chris@27	20
Chris@27	21 #include "chromamethods.h"
matthiasm@43	22 #include "viterbi.h"
Chris@27	23
Chris@27	24 #include <cstdlib>
Chris@27	25 #include <fstream>
matthiasm@0	26 #include <cmath>
matthiasm@9	27
Chris@27	28 #include <algorithm>
matthiasm@0	29
matthiasm@0	30 const bool debug_on = false;
matthiasm@0	31
Chris@27	32 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0	33
Chris@35	34 Chordino::Chordino(float inputSampleRate) :
Chris@35	35 NNLSBase(inputSampleRate)
matthiasm@0	36 {
Chris@35	37 if (debug_on) cerr << "--> Chordino" << endl;
matthiasm@0	38 }
matthiasm@0	39
Chris@35	40 Chordino::~Chordino()
matthiasm@0	41 {
Chris@35	42 if (debug_on) cerr << "--> ~Chordino" << endl;
matthiasm@0	43 }
matthiasm@0	44
matthiasm@0	45 string
Chris@35	46 Chordino::getIdentifier() const
matthiasm@0	47 {
Chris@23	48 if (debug_on) cerr << "--> getIdentifier" << endl;
Chris@35	49 return "chordino";
matthiasm@0	50 }
matthiasm@0	51
matthiasm@0	52 string
Chris@35	53 Chordino::getName() const
matthiasm@0	54 {
Chris@23	55 if (debug_on) cerr << "--> getName" << endl;
Chris@35	56 return "Chordino";
matthiasm@0	57 }
matthiasm@0	58
matthiasm@0	59 string
Chris@35	60 Chordino::getDescription() const
matthiasm@0	61 {
Chris@23	62 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@13	63 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate.";
matthiasm@0	64 }
matthiasm@0	65
Chris@35	66 Chordino::OutputList
Chris@35	67 Chordino::getOutputDescriptors() const
matthiasm@0	68 {
Chris@23	69 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0	70 OutputList list;
matthiasm@0	71
Chris@35	72 int index = 0;
matthiasm@0	73
matthiasm@0	74 OutputDescriptor d7;
matthiasm@0	75 d7.identifier = "simplechord";
Chris@36	76 d7.name = "Chord Estimate";
matthiasm@0	77 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
matthiasm@0	78 d7.unit = "";
matthiasm@0	79 d7.hasFixedBinCount = true;
matthiasm@0	80 d7.binCount = 0;
matthiasm@0	81 d7.hasKnownExtents = false;
matthiasm@0	82 d7.isQuantized = false;
matthiasm@0	83 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0	84 d7.hasDuration = false;
matthiasm@0	85 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	86 list.push_back(d7);
Chris@35	87 m_outputChords = index++;
matthiasm@0	88
Chris@23	89 OutputDescriptor d8;
matthiasm@17	90 d8.identifier = "harmonicchange";
Chris@36	91 d8.name = "Harmonic Change Value";
matthiasm@17	92 d8.description = "Harmonic change.";
matthiasm@17	93 d8.unit = "";
matthiasm@17	94 d8.hasFixedBinCount = true;
matthiasm@17	95 d8.binCount = 1;
matthiasm@17	96 d8.hasKnownExtents = true;
Chris@23	97 d8.minValue = 0.0;
Chris@23	98 d8.maxValue = 0.999;
matthiasm@17	99 d8.isQuantized = false;
matthiasm@17	100 d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@17	101 d8.hasDuration = false;
matthiasm@17	102 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@17	103 list.push_back(d8);
Chris@35	104 m_outputHarmonicChange = index++;
matthiasm@1	105
matthiasm@0	106 return list;
matthiasm@0	107 }
matthiasm@0	108
matthiasm@0	109 bool
Chris@35	110 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0	111 {
Chris@23	112 if (debug_on) {
Chris@23	113 cerr << "--> initialise";
Chris@23	114 }
matthiasm@1	115
Chris@35	116 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
Chris@35	117 return false;
Chris@35	118 }
matthiasm@1	119
matthiasm@0	120 return true;
matthiasm@0	121 }
matthiasm@0	122
matthiasm@0	123 void
Chris@35	124 Chordino::reset()
matthiasm@0	125 {
Chris@23	126 if (debug_on) cerr << "--> reset";
Chris@35	127 NNLSBase::reset();
matthiasm@0	128 }
matthiasm@0	129
Chris@35	130 Chordino::FeatureSet
Chris@35	131 Chordino::process(const float const inputBuffers, Vamp::RealTime timestamp)
matthiasm@0	132 {
Chris@23	133 if (debug_on) cerr << "--> process" << endl;
matthiasm@0	134
Chris@35	135 NNLSBase::baseProcess(inputBuffers, timestamp);
matthiasm@0	136
Chris@35	137 return FeatureSet();
matthiasm@0	138 }
matthiasm@0	139
Chris@35	140 Chordino::FeatureSet
Chris@35	141 Chordino::getRemainingFeatures()
matthiasm@0	142 {
Chris@23	143 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23	144 FeatureSet fsOut;
Chris@35	145 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23	146 int nChord = m_chordnames.size();
Chris@23	147 //
Chris@23	148 /** Calculate Tuning
Chris@23	149 calculate tuning from (using the angle of the complex number defined by the
Chris@23	150 cumulative mean real and imag values)
Chris@23	151 **/
Chris@23	152 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
Chris@23	153 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
Chris@23	154 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23	155 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23	156 int intShift = floor(normalisedtuning * 3);
Chris@23	157 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1	158
Chris@23	159 char buffer0 [50];
matthiasm@1	160
Chris@23	161 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1	162
matthiasm@1	163
Chris@23	164 /** Tune Log-Frequency Spectrogram
matthiasm@43	165 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
matthiasm@43	166 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectum).
Chris@23	167 **/
Chris@35	168 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13	169
Chris@23	170 float tempValue = 0;
Chris@23	171 float dbThreshold = 0; // relative to the background spectrum
Chris@23	172 float thresh = pow(10,dbThreshold/20);
Chris@23	173 // cerr << "tune local ? " << m_tuneLocal << endl;
Chris@23	174 int count = 0;
matthiasm@1	175
Chris@35	176 FeatureList tunedSpec;
matthiasm@43	177 int nFrame = m_logSpectrum.size();
matthiasm@43	178
matthiasm@43	179 vector<Vamp::RealTime> timestamps;
Chris@35	180
Chris@35	181 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
matthiasm@43	182 Feature currentLogSpectum = *i;
matthiasm@43	183 Feature currentTunedSpec; // tuned log-frequency spectrum
matthiasm@43	184 currentTunedSpec.hasTimestamp = true;
matthiasm@43	185 currentTunedSpec.timestamp = currentLogSpectum.timestamp;
matthiasm@43	186 timestamps.push_back(currentLogSpectum.timestamp);
matthiasm@43	187 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
matthiasm@1	188
Chris@23	189 if (m_tuneLocal) {
Chris@23	190 intShift = floor(m_localTuning[count] * 3);
Chris@23	191 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
Chris@23	192 }
matthiasm@1	193
Chris@23	194 // cerr << intShift << " " << intFactor << endl;
matthiasm@1	195
matthiasm@43	196 for (unsigned k = 2; k < currentLogSpectum.values.size() - 3; ++k) { // interpolate all inner bins
matthiasm@43	197 tempValue = currentLogSpectum.values[k + intShift] * (1-intFactor) + currentLogSpectum.values[k+intShift+1] * intFactor;
matthiasm@43	198 currentTunedSpec.values.push_back(tempValue);
Chris@23	199 }
matthiasm@1	200
matthiasm@43	201 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
matthiasm@43	202 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
Chris@23	203 vector<float> runningstd;
Chris@23	204 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
matthiasm@43	205 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
Chris@23	206 }
Chris@23	207 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
Chris@23	208 for (int i = 0; i < 256; i++) {
Chris@23	209 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23	210 if (runningstd[i] > 0) {
matthiasm@43	211 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
matthiasm@43	212 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
matthiasm@43	213 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
matthiasm@43	214 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23	215 }
matthiasm@43	216 if (currentTunedSpec.values[i] < 0) {
Chris@23	217 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23	218 }
Chris@23	219 }
matthiasm@43	220 tunedSpec.push_back(currentTunedSpec);
Chris@23	221 count++;
Chris@23	222 }
Chris@23	223 cerr << "done." << endl;
matthiasm@1	224
Chris@23	225 /** Semitone spectrum and chromagrams
Chris@23	226 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23	227 is inferred using a non-negative least squares algorithm.
Chris@23	228 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23	229 bass and treble stacked onto each other).
Chris@23	230 **/
matthiasm@42	231 if (m_useNNLS == 0) {
Chris@35	232 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23	233 } else {
Chris@35	234 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23	235 }
matthiasm@13	236
matthiasm@1	237
matthiasm@43	238 vector<vector<double> > chordogram;
Chris@23	239 vector<vector<int> > scoreChordogram;
Chris@35	240 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
Chris@23	241 count = 0;
matthiasm@9	242
Chris@35	243 FeatureList chromaList;
matthiasm@43	244
matthiasm@43	245
Chris@35	246
Chris@35	247 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
matthiasm@43	248 Feature currentTunedSpec = *it; // logfreq spectrum
matthiasm@43	249 Feature currentChromas; // treble and bass chromagram
Chris@35	250
matthiasm@43	251 currentChromas.hasTimestamp = true;
matthiasm@43	252 currentChromas.timestamp = currentTunedSpec.timestamp;
Chris@35	253
Chris@35	254 float b[256];
matthiasm@1	255
Chris@23	256 bool some_b_greater_zero = false;
Chris@23	257 float sumb = 0;
Chris@23	258 for (int i = 0; i < 256; i++) {
Chris@23	259 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
matthiasm@43	260 b[i] = currentTunedSpec.values[i];
Chris@23	261 sumb += b[i];
Chris@23	262 if (b[i] > 0) {
Chris@23	263 some_b_greater_zero = true;
Chris@23	264 }
Chris@23	265 }
matthiasm@1	266
Chris@23	267 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1	268
Chris@23	269 vector<float> chroma = vector<float>(12, 0);
Chris@23	270 vector<float> basschroma = vector<float>(12, 0);
Chris@23	271 float currval;
Chris@23	272 unsigned iSemitone = 0;
matthiasm@1	273
Chris@23	274 if (some_b_greater_zero) {
matthiasm@42	275 if (m_useNNLS == 0) {
Chris@23	276 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23	277 currval = 0;
Chris@35	278 currval += b[iNote + 1 + -1] * 0.5;
Chris@35	279 currval += b[iNote + 1 + 0] * 1.0;
Chris@35	280 currval += b[iNote + 1 + 1] * 0.5;
Chris@23	281 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23	282 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23	283 iSemitone++;
Chris@23	284 }
matthiasm@1	285
Chris@23	286 } else {
Chris@35	287 float x[84+1000];
Chris@23	288 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23	289 vector<int> signifIndex;
Chris@23	290 int index=0;
Chris@23	291 sumb /= 84.0;
Chris@23	292 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
Chris@23	293 float currval = 0;
Chris@23	294 currval += b[iNote + 1 + -1];
Chris@23	295 currval += b[iNote + 1 + 0];
Chris@23	296 currval += b[iNote + 1 + 1];
Chris@23	297 if (currval > 0) signifIndex.push_back(index);
Chris@23	298 index++;
Chris@23	299 }
Chris@35	300 float rnorm;
Chris@35	301 float w[84+1000];
Chris@35	302 float zz[84+1000];
Chris@23	303 int indx[84+1000];
Chris@23	304 int mode;
Chris@23	305 int dictsize = 256*signifIndex.size();
Chris@35	306 float *curr_dict = new float[dictsize];
Chris@23	307 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23	308 for (unsigned iBin = 0; iBin < 256; iBin++) {
Chris@23	309 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
Chris@23	310 }
Chris@23	311 }
Chris@35	312 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23	313 delete [] curr_dict;
Chris@23	314 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
Chris@23	315 // cerr << mode << endl;
Chris@23	316 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23	317 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23	318 }
Chris@23	319 }
Chris@23	320 }
Chris@35	321
Chris@35	322 vector<float> origchroma = chroma;
Chris@23	323 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@43	324 currentChromas.values = chroma;
Chris@35	325
Chris@23	326 if (m_doNormalizeChroma > 0) {
Chris@23	327 vector<float> chromanorm = vector<float>(3,0);
Chris@23	328 switch (int(m_doNormalizeChroma)) {
Chris@23	329 case 0: // should never end up here
Chris@23	330 break;
Chris@23	331 case 1:
Chris@35	332 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
Chris@35	333 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
Chris@23	334 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23	335 break;
Chris@23	336 case 2:
Chris@35	337 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23	338 chromanorm[2] += *it;
Chris@23	339 }
Chris@23	340 break;
Chris@23	341 case 3:
Chris@35	342 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23	343 chromanorm[2] += pow(*it,2);
Chris@23	344 }
Chris@23	345 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23	346 break;
Chris@23	347 }
Chris@23	348 if (chromanorm[2] > 0) {
Chris@35	349 for (int i = 0; i < chroma.size(); i++) {
matthiasm@43	350 currentChromas.values[i] /= chromanorm[2];
Chris@23	351 }
Chris@23	352 }
Chris@23	353 }
Chris@35	354
matthiasm@43	355 chromaList.push_back(currentChromas);
Chris@35	356
Chris@23	357 // local chord estimation
matthiasm@43	358 vector<double> currentChordSalience;
matthiasm@43	359 double tempchordvalue = 0;
matthiasm@43	360 double sumchordvalue = 0;
matthiasm@9	361
Chris@23	362 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23	363 tempchordvalue = 0;
Chris@23	364 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@44	365 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23	366 }
Chris@23	367 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23	368 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23	369 }
matthiasm@44	370 if (tempchordvalue < 0) tempchordvalue = 0;
Chris@23	371 sumchordvalue+=tempchordvalue;
Chris@23	372 currentChordSalience.push_back(tempchordvalue);
Chris@23	373 }
Chris@23	374 if (sumchordvalue > 0) {
Chris@23	375 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23	376 currentChordSalience[iChord] /= sumchordvalue;
Chris@23	377 }
Chris@23	378 } else {
Chris@23	379 currentChordSalience[nChord-1] = 1.0;
Chris@23	380 }
Chris@23	381 chordogram.push_back(currentChordSalience);
matthiasm@1	382
Chris@23	383 count++;
Chris@23	384 }
Chris@23	385 cerr << "done." << endl;
matthiasm@13	386
matthiasm@10	387
matthiasm@43	388 bool m_useHMM = true; // this will go into the chordino header file.
matthiasm@43	389 if (m_useHMM) {
matthiasm@44	390 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
matthiasm@43	391 int oldchord = nChord-1;
matthiasm@44	392 double selftransprob = 0.9;
matthiasm@43	393
matthiasm@43	394 vector<double> init = vector<double>(nChord,1.0/nChord);
matthiasm@43	395 vector<vector<double> > trans;
matthiasm@43	396 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@43	397 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
matthiasm@43	398 temp[iChord] = selftransprob;
matthiasm@43	399 trans.push_back(temp);
matthiasm@43	400 }
matthiasm@43	401 vector<int> chordpath = ViterbiPath(init,trans,chordogram);
matthiasm@43	402
matthiasm@43	403 for (int iFrame = 0; iFrame < chordpath.size(); ++iFrame) {
matthiasm@43	404 // cerr << chordpath[iFrame] << endl;
matthiasm@43	405 if (chordpath[iFrame] != oldchord) {
matthiasm@43	406 Feature chord_feature; // chord estimate
matthiasm@43	407 chord_feature.hasTimestamp = true;
matthiasm@43	408 chord_feature.timestamp = timestamps[iFrame];
matthiasm@43	409 chord_feature.label = m_chordnames[chordpath[iFrame]];
matthiasm@43	410 fsOut[0].push_back(chord_feature);
matthiasm@43	411 oldchord = chordpath[iFrame];
Chris@23	412 }
Chris@23	413 }
matthiasm@43	414
matthiasm@43	415 // cerr << chordpath[0] << endl;
matthiasm@43	416 } else {
matthiasm@43	417 /* Simple chord estimation
matthiasm@43	418 I just take the local chord estimates ("currentChordSalience") and average them over time, then
matthiasm@43	419 take the maximum. Very simple, don't do this at home...
matthiasm@43	420 */
matthiasm@44	421 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
matthiasm@43	422 count = 0;
matthiasm@43	423 int halfwindowlength = m_inputSampleRate / m_stepSize;
matthiasm@43	424 vector<int> chordSequence;
matthiasm@43	425 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
matthiasm@43	426 vector<int> temp = vector<int>(nChord,0);
matthiasm@43	427 scoreChordogram.push_back(temp);
matthiasm@43	428 }
matthiasm@43	429 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
matthiasm@43	430 int startIndex = count + 1;
matthiasm@43	431 int endIndex = count + 2 * halfwindowlength;
matthiasm@43	432
matthiasm@43	433 float chordThreshold = 2.5/nChord;//(2halfwindowlength+1);
matthiasm@43	434
matthiasm@43	435 vector<int> chordCandidates;
matthiasm@43	436 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
matthiasm@43	437 // float currsum = 0;
matthiasm@43	438 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43	439 // currsum += chordogram[iFrame][iChord];
matthiasm@43	440 // }
matthiasm@43	441 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
matthiasm@43	442 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43	443 if (chordogram[iFrame][iChord] > chordThreshold) {
matthiasm@43	444 chordCandidates.push_back(iChord);
matthiasm@43	445 break;
matthiasm@43	446 }
Chris@23	447 }
Chris@23	448 }
matthiasm@43	449 chordCandidates.push_back(nChord-1);
matthiasm@43	450 // cerr << chordCandidates.size() << endl;
matthiasm@43	451
matthiasm@43	452 float maxval = 0; // will be the value of the most salient chord change in this frame
matthiasm@43	453 float maxindex = 0; //... and the index thereof
matthiasm@43	454 unsigned bestchordL = nChord-1; // index of the best "left" chord
matthiasm@43	455 unsigned bestchordR = nChord-1; // index of the best "right" chord
matthiasm@43	456
matthiasm@43	457 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
matthiasm@43	458 // now find the max values on both sides of iWF
matthiasm@43	459 // left side:
matthiasm@43	460 float maxL = 0;
matthiasm@43	461 unsigned maxindL = nChord-1;
matthiasm@43	462 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
matthiasm@43	463 unsigned iChord = chordCandidates[kChord];
matthiasm@43	464 float currsum = 0;
matthiasm@43	465 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
matthiasm@43	466 currsum += chordogram[count+iFrame][iChord];
matthiasm@43	467 }
matthiasm@43	468 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43	469 if (currsum > maxL) {
matthiasm@43	470 maxL = currsum;
matthiasm@43	471 maxindL = iChord;
matthiasm@43	472 }
matthiasm@43	473 }
matthiasm@43	474 // right side:
matthiasm@43	475 float maxR = 0;
matthiasm@43	476 unsigned maxindR = nChord-1;
matthiasm@43	477 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
matthiasm@43	478 unsigned iChord = chordCandidates[kChord];
matthiasm@43	479 float currsum = 0;
matthiasm@43	480 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43	481 currsum += chordogram[count+iFrame][iChord];
matthiasm@43	482 }
matthiasm@43	483 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43	484 if (currsum > maxR) {
matthiasm@43	485 maxR = currsum;
matthiasm@43	486 maxindR = iChord;
matthiasm@43	487 }
matthiasm@43	488 }
matthiasm@43	489 if (maxL+maxR > maxval) {
matthiasm@43	490 maxval = maxL+maxR;
matthiasm@43	491 maxindex = iWF;
matthiasm@43	492 bestchordL = maxindL;
matthiasm@43	493 bestchordR = maxindR;
matthiasm@43	494 }
matthiasm@43	495
Chris@23	496 }
matthiasm@43	497 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
matthiasm@43	498 // add a score to every chord-frame-point that was part of a maximum
matthiasm@43	499 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
matthiasm@43	500 scoreChordogram[iFrame+count][bestchordL]++;
matthiasm@43	501 }
matthiasm@43	502 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43	503 scoreChordogram[iFrame+count][bestchordR]++;
matthiasm@43	504 }
matthiasm@43	505 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
matthiasm@43	506 count++;
Chris@23	507 }
matthiasm@43	508 // cerr << "***** agent finished *****" << endl;
matthiasm@43	509 count = 0;
matthiasm@43	510 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43	511 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@43	512 float maxindex = 0; //... and the index thereof
matthiasm@43	513 for (unsigned iChord = 0; iChord < nChord; iChord++) {
matthiasm@43	514 if (scoreChordogram[count][iChord] > maxval) {
matthiasm@43	515 maxval = scoreChordogram[count][iChord];
matthiasm@43	516 maxindex = iChord;
matthiasm@43	517 // cerr << iChord << endl;
matthiasm@43	518 }
matthiasm@43	519 }
matthiasm@43	520 chordSequence.push_back(maxindex);
matthiasm@43	521 count++;
Chris@23	522 }
matthiasm@43	523
matthiasm@43	524
matthiasm@43	525 // mode filter on chordSequence
matthiasm@43	526 count = 0;
matthiasm@43	527 string oldChord = "";
matthiasm@43	528 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43	529 Feature chord_feature; // chord estimate
matthiasm@43	530 chord_feature.hasTimestamp = true;
matthiasm@43	531 chord_feature.timestamp = *it;
matthiasm@43	532 // Feature currentChord; // chord estimate
matthiasm@43	533 // currentChord.hasTimestamp = true;
matthiasm@43	534 // currentChord.timestamp = currentChromas.timestamp;
matthiasm@43	535
matthiasm@43	536 vector<int> chordCount = vector<int>(nChord,0);
matthiasm@43	537 int maxChordCount = 0;
matthiasm@43	538 int maxChordIndex = nChord-1;
matthiasm@43	539 string maxChord;
matthiasm@43	540 int startIndex = max(count - halfwindowlength/2,0);
matthiasm@43	541 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
matthiasm@43	542 for (int i = startIndex; i < endIndex; i++) {
matthiasm@43	543 chordCount[chordSequence[i]]++;
matthiasm@43	544 if (chordCount[chordSequence[i]] > maxChordCount) {
matthiasm@43	545 // cerr << "start index " << startIndex << endl;
matthiasm@43	546 maxChordCount++;
matthiasm@43	547 maxChordIndex = chordSequence[i];
matthiasm@43	548 maxChord = m_chordnames[maxChordIndex];
matthiasm@43	549 }
matthiasm@43	550 }
matthiasm@43	551 // chordSequence[count] = maxChordIndex;
matthiasm@43	552 // cerr << maxChordIndex << endl;
matthiasm@43	553 // cerr << chordchange[count] << endl;
matthiasm@43	554 // fsOut[9].push_back(currentChord);
matthiasm@43	555 if (oldChord != maxChord) {
matthiasm@43	556 oldChord = maxChord;
matthiasm@43	557 chord_feature.label = m_chordnames[maxChordIndex];
matthiasm@43	558 fsOut[0].push_back(chord_feature);
matthiasm@43	559 }
matthiasm@43	560 count++;
Chris@23	561 }
Chris@23	562 }
matthiasm@43	563 Feature chord_feature; // last chord estimate
matthiasm@43	564 chord_feature.hasTimestamp = true;
matthiasm@43	565 chord_feature.timestamp = timestamps[timestamps.size()-1];
matthiasm@43	566 chord_feature.label = "N";
matthiasm@43	567 fsOut[0].push_back(chord_feature);
Chris@23	568 cerr << "done." << endl;
Chris@23	569 return fsOut;
matthiasm@0	570 }

Mercurial > hg > nnls-chroma

annotate Chordino.cpp @ 44:109d3b2c7105 matthiasm-plugin