nnls-chroma: Chordino.cpp annotate

annotate Chordino.cpp @ 124:a17ff20fb897 darwintunes

made darwintunes branch

author	Matthias Mauch <mail@matthiasmauch.net>
date	Sat, 11 Jun 2011 17:41:23 +0100
parents	21181297da99
children	a5ee5fe71e52

rev	line source
Chris@23	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
matthiasm@0	2
Chris@35	3 /*
Chris@35	4 NNLS-Chroma / Chordino
Chris@35	5
Chris@35	6 Audio feature extraction plugins for chromagram and chord
Chris@35	7 estimation.
Chris@35	8
Chris@35	9 Centre for Digital Music, Queen Mary University of London.
Chris@35	10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35	11
Chris@35	12 This program is free software; you can redistribute it and/or
Chris@35	13 modify it under the terms of the GNU General Public License as
Chris@35	14 published by the Free Software Foundation; either version 2 of the
Chris@35	15 License, or (at your option) any later version. See the file
Chris@35	16 COPYING included with this distribution for more information.
Chris@35	17 */
Chris@35	18
Chris@35	19 #include "Chordino.h"
Chris@27	20
Chris@27	21 #include "chromamethods.h"
matthiasm@43	22 #include "viterbi.h"
Chris@27	23
Chris@27	24 #include <cstdlib>
Chris@27	25 #include <fstream>
matthiasm@0	26 #include <cmath>
matthiasm@9	27
Chris@27	28 #include <algorithm>
matthiasm@0	29
matthiasm@0	30 const bool debug_on = false;
matthiasm@0	31
Chris@35	32 Chordino::Chordino(float inputSampleRate) :
matthiasm@86	33 NNLSBase(inputSampleRate),
matthiasm@86	34 m_chorddict(0),
matthiasm@86	35 m_chordnotes(0),
matthiasm@86	36 m_chordnames(0)
matthiasm@0	37 {
Chris@35	38 if (debug_on) cerr << "--> Chordino" << endl;
matthiasm@0	39 }
matthiasm@0	40
Chris@35	41 Chordino::~Chordino()
matthiasm@0	42 {
Chris@35	43 if (debug_on) cerr << "--> ~Chordino" << endl;
matthiasm@0	44 }
matthiasm@0	45
matthiasm@0	46 string
Chris@35	47 Chordino::getIdentifier() const
matthiasm@0	48 {
Chris@23	49 if (debug_on) cerr << "--> getIdentifier" << endl;
Chris@35	50 return "chordino";
matthiasm@0	51 }
matthiasm@0	52
matthiasm@0	53 string
Chris@35	54 Chordino::getName() const
matthiasm@0	55 {
Chris@23	56 if (debug_on) cerr << "--> getName" << endl;
Chris@35	57 return "Chordino";
matthiasm@0	58 }
matthiasm@0	59
matthiasm@0	60 string
Chris@35	61 Chordino::getDescription() const
matthiasm@0	62 {
Chris@23	63 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@58	64 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
matthiasm@0	65 }
matthiasm@0	66
matthiasm@50	67 Chordino::ParameterList
matthiasm@50	68 Chordino::getParameterDescriptors() const
matthiasm@50	69 {
matthiasm@50	70 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@50	71 ParameterList list;
matthiasm@50	72
mail@118	73 ParameterDescriptor useNNLSParam;
mail@118	74 useNNLSParam.identifier = "useNNLS";
mail@118	75 useNNLSParam.name = "use approximate transcription (NNLS)";
mail@118	76 useNNLSParam.description = "Toggles approximate transcription (NNLS).";
mail@118	77 useNNLSParam.unit = "";
mail@118	78 useNNLSParam.minValue = 0.0;
mail@118	79 useNNLSParam.maxValue = 1.0;
mail@118	80 useNNLSParam.defaultValue = 1.0;
mail@118	81 useNNLSParam.isQuantized = true;
mail@118	82 useNNLSParam.quantizeStep = 1.0;
mail@118	83 list.push_back(useNNLSParam);
matthiasm@50	84
mail@118	85 ParameterDescriptor useHMMParam;
mail@118	86 useHMMParam.identifier = "useHMM";
mail@118	87 useHMMParam.name = "HMM (Viterbi decoding)";
mail@118	88 useHMMParam.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
mail@118	89 useHMMParam.unit = "";
mail@118	90 useHMMParam.minValue = 0.0;
mail@118	91 useHMMParam.maxValue = 1.0;
mail@118	92 useHMMParam.defaultValue = 1.0;
mail@118	93 useHMMParam.isQuantized = true;
mail@118	94 useHMMParam.quantizeStep = 1.0;
mail@118	95 list.push_back(useHMMParam);
matthiasm@50	96
mail@118	97 ParameterDescriptor rollonParam;
mail@118	98 rollonParam.identifier = "rollon";
mail@118	99 rollonParam.name = "bass noise threshold";
mail@118	100 rollonParam.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed.";
mail@118	101 rollonParam.unit = "%";
mail@118	102 rollonParam.minValue = 0;
mail@118	103 rollonParam.maxValue = 5;
mail@118	104 rollonParam.defaultValue = 0.0;
mail@118	105 rollonParam.isQuantized = true;
mail@118	106 rollonParam.quantizeStep = 0.5;
mail@118	107 list.push_back(rollonParam);
matthiasm@50	108
mail@118	109 ParameterDescriptor tuningmodeParam;
mail@118	110 tuningmodeParam.identifier = "tuningmode";
mail@118	111 tuningmodeParam.name = "tuning mode";
mail@118	112 tuningmodeParam.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
mail@118	113 tuningmodeParam.unit = "";
mail@118	114 tuningmodeParam.minValue = 0;
mail@118	115 tuningmodeParam.maxValue = 1;
mail@118	116 tuningmodeParam.defaultValue = 0.0;
mail@118	117 tuningmodeParam.isQuantized = true;
mail@118	118 tuningmodeParam.valueNames.push_back("global tuning");
mail@118	119 tuningmodeParam.valueNames.push_back("local tuning");
mail@118	120 tuningmodeParam.quantizeStep = 1.0;
mail@118	121 list.push_back(tuningmodeParam);
matthiasm@50	122
mail@118	123 ParameterDescriptor whiteningParam;
mail@118	124 whiteningParam.identifier = "whitening";
mail@118	125 whiteningParam.name = "spectral whitening";
mail@118	126 whiteningParam.description = "Spectral whitening: no whitening - 0; whitening - 1.";
mail@118	127 whiteningParam.unit = "";
mail@118	128 whiteningParam.isQuantized = true;
mail@118	129 whiteningParam.minValue = 0.0;
mail@118	130 whiteningParam.maxValue = 1.0;
mail@118	131 whiteningParam.defaultValue = 1.0;
mail@118	132 whiteningParam.isQuantized = false;
mail@118	133 list.push_back(whiteningParam);
matthiasm@50	134
mail@118	135 ParameterDescriptor spectralShapeParam;
mail@118	136 spectralShapeParam.identifier = "spectralshape";
mail@118	137 spectralShapeParam.name = "spectral shape";
mail@118	138 spectralShapeParam.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
mail@118	139 spectralShapeParam.unit = "";
mail@118	140 spectralShapeParam.minValue = 0.5;
mail@118	141 spectralShapeParam.maxValue = 0.9;
mail@118	142 spectralShapeParam.defaultValue = 0.7;
mail@118	143 spectralShapeParam.isQuantized = false;
mail@118	144 list.push_back(spectralShapeParam);
matthiasm@50	145
mail@118	146 ParameterDescriptor boostnParam;
mail@118	147 boostnParam.identifier = "boostn";
mail@118	148 boostnParam.name = "boost N";
mail@118	149 boostnParam.description = "Boost likelihood of the N (no chord) label.";
mail@118	150 boostnParam.unit = "";
mail@118	151 boostnParam.minValue = 0.0;
mail@118	152 boostnParam.maxValue = 1.0;
mail@118	153 boostnParam.defaultValue = 0.1;
mail@118	154 boostnParam.isQuantized = false;
mail@118	155 list.push_back(boostnParam);
matthiasm@50	156
mail@118	157 ParameterDescriptor usehartesyntaxParam;
mail@118	158 usehartesyntaxParam.identifier = "usehartesyntax";
mail@118	159 usehartesyntaxParam.name = "use Harte syntax";
mail@118	160 usehartesyntaxParam.description = "Use the chord syntax proposed by Harte";
mail@118	161 usehartesyntaxParam.unit = "";
mail@118	162 usehartesyntaxParam.minValue = 0.0;
mail@118	163 usehartesyntaxParam.maxValue = 1.0;
mail@118	164 usehartesyntaxParam.defaultValue = 0.0;
mail@118	165 usehartesyntaxParam.isQuantized = true;
mail@118	166 usehartesyntaxParam.quantizeStep = 1.0;
mail@118	167 usehartesyntaxParam.valueNames.push_back("no");
mail@118	168 usehartesyntaxParam.valueNames.push_back("yes");
mail@118	169 list.push_back(usehartesyntaxParam);
mail@112	170
matthiasm@50	171 return list;
matthiasm@50	172 }
matthiasm@50	173
Chris@35	174 Chordino::OutputList
Chris@35	175 Chordino::getOutputDescriptors() const
matthiasm@0	176 {
Chris@23	177 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0	178 OutputList list;
matthiasm@0	179
Chris@35	180 int index = 0;
matthiasm@0	181
matthiasm@0	182 OutputDescriptor d7;
matthiasm@0	183 d7.identifier = "simplechord";
Chris@36	184 d7.name = "Chord Estimate";
matthiasm@58	185 d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
matthiasm@0	186 d7.unit = "";
matthiasm@0	187 d7.hasFixedBinCount = true;
matthiasm@0	188 d7.binCount = 0;
matthiasm@0	189 d7.hasKnownExtents = false;
matthiasm@0	190 d7.isQuantized = false;
matthiasm@0	191 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0	192 d7.hasDuration = false;
matthiasm@0	193 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	194 list.push_back(d7);
Chris@35	195 m_outputChords = index++;
matthiasm@0	196
matthiasm@86	197 OutputDescriptor chordnotes;
matthiasm@86	198 chordnotes.identifier = "chordnotes";
matthiasm@86	199 chordnotes.name = "Note Representation of Chord Estimate";
matthiasm@86	200 chordnotes.description = "A simple represenation of the estimated chord with bass note (if applicable) and chord notes.";
matthiasm@86	201 chordnotes.unit = "MIDI units";
matthiasm@86	202 chordnotes.hasFixedBinCount = true;
matthiasm@86	203 chordnotes.binCount = 1;
matthiasm@86	204 chordnotes.hasKnownExtents = true;
matthiasm@86	205 chordnotes.minValue = 0;
matthiasm@86	206 chordnotes.maxValue = 127;
matthiasm@86	207 chordnotes.isQuantized = true;
matthiasm@86	208 chordnotes.quantizeStep = 1;
matthiasm@86	209 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@86	210 chordnotes.hasDuration = true;
matthiasm@86	211 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@86	212 list.push_back(chordnotes);
matthiasm@86	213 m_outputChordnotes = index++;
matthiasm@86	214
Chris@23	215 OutputDescriptor d8;
mail@60	216 d8.identifier = "harmonicchange";
Chris@36	217 d8.name = "Harmonic Change Value";
matthiasm@58	218 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
matthiasm@17	219 d8.unit = "";
matthiasm@17	220 d8.hasFixedBinCount = true;
matthiasm@17	221 d8.binCount = 1;
mail@60	222 d8.hasKnownExtents = false;
matthiasm@17	223 d8.isQuantized = false;
matthiasm@17	224 d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@17	225 d8.hasDuration = false;
matthiasm@17	226 list.push_back(d8);
Chris@35	227 m_outputHarmonicChange = index++;
matthiasm@1	228
matthiasm@107	229 OutputDescriptor loglikelihood;
matthiasm@107	230 loglikelihood.identifier = "loglikelihood";
mail@124	231 loglikelihood.name = "Simple Chord Log-likelihood";
mail@124	232 loglikelihood.description = "Logarithm of the likelihood value of the simple chord estimate.";
matthiasm@107	233 loglikelihood.unit = "";
matthiasm@107	234 loglikelihood.hasFixedBinCount = true;
matthiasm@107	235 loglikelihood.binCount = 1;
matthiasm@107	236 loglikelihood.hasKnownExtents = false;
matthiasm@107	237 loglikelihood.isQuantized = false;
matthiasm@107	238 loglikelihood.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@107	239 loglikelihood.hasDuration = false;
matthiasm@107	240 list.push_back(loglikelihood);
matthiasm@107	241 m_outputLoglikelihood = index++;
matthiasm@106	242
matthiasm@0	243 return list;
matthiasm@0	244 }
matthiasm@0	245
matthiasm@0	246 bool
Chris@35	247 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0	248 {
Chris@23	249 if (debug_on) {
Chris@23	250 cerr << "--> initialise";
Chris@23	251 }
mail@76	252
Chris@35	253 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
Chris@35	254 return false;
Chris@35	255 }
mail@115	256 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN, m_harte_syntax);
matthiasm@0	257 return true;
matthiasm@0	258 }
matthiasm@0	259
matthiasm@0	260 void
Chris@35	261 Chordino::reset()
matthiasm@0	262 {
Chris@23	263 if (debug_on) cerr << "--> reset";
Chris@35	264 NNLSBase::reset();
matthiasm@0	265 }
matthiasm@0	266
Chris@35	267 Chordino::FeatureSet
Chris@35	268 Chordino::process(const float const inputBuffers, Vamp::RealTime timestamp)
matthiasm@0	269 {
Chris@23	270 if (debug_on) cerr << "--> process" << endl;
matthiasm@0	271
Chris@35	272 NNLSBase::baseProcess(inputBuffers, timestamp);
matthiasm@0	273
Chris@35	274 return FeatureSet();
matthiasm@0	275 }
matthiasm@0	276
Chris@35	277 Chordino::FeatureSet
Chris@35	278 Chordino::getRemainingFeatures()
matthiasm@0	279 {
mail@89	280 // cerr << hw[0] << hw[1] << endl;
mail@89	281 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23	282 FeatureSet fsOut;
Chris@35	283 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23	284 int nChord = m_chordnames.size();
Chris@23	285 //
Chris@23	286 /** Calculate Tuning
Chris@23	287 calculate tuning from (using the angle of the complex number defined by the
Chris@23	288 cumulative mean real and imag values)
Chris@23	289 **/
mail@80	290 float meanTuningImag = 0;
mail@80	291 float meanTuningReal = 0;
mail@80	292 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
mail@80	293 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
mail@80	294 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
mail@80	295 }
Chris@23	296 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23	297 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23	298 int intShift = floor(normalisedtuning * 3);
mail@80	299 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
matthiasm@1	300
Chris@23	301 char buffer0 [50];
matthiasm@1	302
Chris@23	303 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1	304
matthiasm@1	305
Chris@23	306 /** Tune Log-Frequency Spectrogram
matthiasm@43	307 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
Chris@91	308 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
Chris@23	309 **/
Chris@35	310 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13	311
Chris@23	312 int count = 0;
matthiasm@1	313
Chris@35	314 FeatureList tunedSpec;
matthiasm@43	315 int nFrame = m_logSpectrum.size();
matthiasm@43	316
matthiasm@43	317 vector<Vamp::RealTime> timestamps;
Chris@35	318
Chris@35	319 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@91	320 Feature currentLogSpectrum = *i;
matthiasm@43	321 Feature currentTunedSpec; // tuned log-frequency spectrum
matthiasm@43	322 currentTunedSpec.hasTimestamp = true;
Chris@91	323 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
Chris@91	324 timestamps.push_back(currentLogSpectrum.timestamp);
matthiasm@43	325 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
matthiasm@1	326
Chris@23	327 if (m_tuneLocal) {
Chris@23	328 intShift = floor(m_localTuning[count] * 3);
mail@80	329 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
Chris@23	330 }
matthiasm@1	331
mail@80	332 // cerr << intShift << " " << floatShift << endl;
matthiasm@1	333
Chris@91	334 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
mail@115	335 float tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
matthiasm@43	336 currentTunedSpec.values.push_back(tempValue);
Chris@23	337 }
matthiasm@1	338
matthiasm@43	339 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
matthiasm@43	340 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
Chris@23	341 vector<float> runningstd;
mail@77	342 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
matthiasm@43	343 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
Chris@23	344 }
Chris@23	345 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
mail@77	346 for (int i = 0; i < nNote; i++) {
Chris@23	347 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23	348 if (runningstd[i] > 0) {
matthiasm@43	349 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
matthiasm@43	350 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
matthiasm@43	351 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
matthiasm@43	352 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23	353 }
matthiasm@43	354 if (currentTunedSpec.values[i] < 0) {
Chris@23	355 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23	356 }
Chris@23	357 }
matthiasm@43	358 tunedSpec.push_back(currentTunedSpec);
Chris@23	359 count++;
Chris@23	360 }
Chris@23	361 cerr << "done." << endl;
matthiasm@1	362
Chris@23	363 /** Semitone spectrum and chromagrams
Chris@23	364 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23	365 is inferred using a non-negative least squares algorithm.
Chris@23	366 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23	367 bass and treble stacked onto each other).
Chris@23	368 **/
matthiasm@42	369 if (m_useNNLS == 0) {
Chris@35	370 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23	371 } else {
Chris@35	372 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23	373 }
matthiasm@13	374
matthiasm@1	375
matthiasm@43	376 vector<vector<double> > chordogram;
Chris@23	377 vector<vector<int> > scoreChordogram;
Chris@35	378 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
Chris@23	379 count = 0;
matthiasm@9	380
Chris@35	381 FeatureList chromaList;
matthiasm@43	382
matthiasm@43	383
Chris@35	384
Chris@35	385 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
matthiasm@43	386 Feature currentTunedSpec = *it; // logfreq spectrum
matthiasm@43	387 Feature currentChromas; // treble and bass chromagram
Chris@35	388
matthiasm@43	389 currentChromas.hasTimestamp = true;
matthiasm@43	390 currentChromas.timestamp = currentTunedSpec.timestamp;
Chris@35	391
mail@77	392 float b[nNote];
matthiasm@1	393
Chris@23	394 bool some_b_greater_zero = false;
Chris@23	395 float sumb = 0;
mail@77	396 for (int i = 0; i < nNote; i++) {
mail@77	397 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
matthiasm@43	398 b[i] = currentTunedSpec.values[i];
Chris@23	399 sumb += b[i];
Chris@23	400 if (b[i] > 0) {
Chris@23	401 some_b_greater_zero = true;
Chris@23	402 }
Chris@23	403 }
matthiasm@1	404
Chris@23	405 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1	406
Chris@23	407 vector<float> chroma = vector<float>(12, 0);
Chris@23	408 vector<float> basschroma = vector<float>(12, 0);
Chris@23	409 float currval;
Chris@91	410 int iSemitone = 0;
matthiasm@1	411
Chris@23	412 if (some_b_greater_zero) {
matthiasm@42	413 if (m_useNNLS == 0) {
Chris@91	414 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23	415 currval = 0;
mail@81	416 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81	417 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
mail@81	418 }
Chris@23	419 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23	420 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23	421 iSemitone++;
Chris@23	422 }
matthiasm@1	423
Chris@23	424 } else {
Chris@35	425 float x[84+1000];
Chris@23	426 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23	427 vector<int> signifIndex;
Chris@23	428 int index=0;
Chris@23	429 sumb /= 84.0;
Chris@91	430 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23	431 float currval = 0;
mail@81	432 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81	433 currval += b[iNote + iBPS];
mail@81	434 }
Chris@23	435 if (currval > 0) signifIndex.push_back(index);
Chris@23	436 index++;
Chris@23	437 }
Chris@35	438 float rnorm;
Chris@35	439 float w[84+1000];
Chris@35	440 float zz[84+1000];
Chris@23	441 int indx[84+1000];
Chris@23	442 int mode;
mail@77	443 int dictsize = nNote*signifIndex.size();
mail@81	444 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@35	445 float *curr_dict = new float[dictsize];
Chris@91	446 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@91	447 for (int iBin = 0; iBin < nNote; iBin++) {
mail@77	448 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
Chris@23	449 }
Chris@23	450 }
Chris@35	451 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23	452 delete [] curr_dict;
Chris@91	453 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@23	454 // cerr << mode << endl;
Chris@23	455 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23	456 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23	457 }
Chris@23	458 }
Chris@23	459 }
Chris@35	460
Chris@35	461 vector<float> origchroma = chroma;
Chris@23	462 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@43	463 currentChromas.values = chroma;
Chris@35	464
Chris@23	465 if (m_doNormalizeChroma > 0) {
Chris@23	466 vector<float> chromanorm = vector<float>(3,0);
Chris@23	467 switch (int(m_doNormalizeChroma)) {
Chris@23	468 case 0: // should never end up here
Chris@23	469 break;
Chris@23	470 case 1:
Chris@35	471 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
Chris@35	472 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
Chris@23	473 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23	474 break;
Chris@23	475 case 2:
Chris@35	476 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23	477 chromanorm[2] += *it;
Chris@23	478 }
Chris@23	479 break;
Chris@23	480 case 3:
Chris@35	481 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23	482 chromanorm[2] += pow(*it,2);
Chris@23	483 }
Chris@23	484 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23	485 break;
Chris@23	486 }
Chris@23	487 if (chromanorm[2] > 0) {
Chris@91	488 for (int i = 0; i < (int)chroma.size(); i++) {
matthiasm@43	489 currentChromas.values[i] /= chromanorm[2];
Chris@23	490 }
Chris@23	491 }
Chris@23	492 }
Chris@35	493
matthiasm@43	494 chromaList.push_back(currentChromas);
Chris@35	495
Chris@23	496 // local chord estimation
matthiasm@43	497 vector<double> currentChordSalience;
matthiasm@43	498 double tempchordvalue = 0;
matthiasm@43	499 double sumchordvalue = 0;
matthiasm@9	500
Chris@23	501 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23	502 tempchordvalue = 0;
Chris@23	503 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@44	504 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23	505 }
Chris@23	506 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23	507 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23	508 }
matthiasm@48	509 if (iChord == nChord-1) tempchordvalue *= .7;
matthiasm@48	510 if (tempchordvalue < 0) tempchordvalue = 0.0;
matthiasm@50	511 tempchordvalue = pow(1.3,tempchordvalue);
Chris@23	512 sumchordvalue+=tempchordvalue;
Chris@23	513 currentChordSalience.push_back(tempchordvalue);
Chris@23	514 }
Chris@23	515 if (sumchordvalue > 0) {
Chris@23	516 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23	517 currentChordSalience[iChord] /= sumchordvalue;
Chris@23	518 }
Chris@23	519 } else {
Chris@23	520 currentChordSalience[nChord-1] = 1.0;
Chris@23	521 }
Chris@23	522 chordogram.push_back(currentChordSalience);
matthiasm@1	523
Chris@23	524 count++;
Chris@23	525 }
Chris@23	526 cerr << "done." << endl;
matthiasm@13	527
matthiasm@86	528 vector<Feature> oldnotes;
matthiasm@10	529
matthiasm@50	530 // bool m_useHMM = true; // this will go into the chordino header file.
matthiasm@50	531 if (m_useHMM == 1.0) {
matthiasm@44	532 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
matthiasm@43	533 int oldchord = nChord-1;
matthiasm@48	534 double selftransprob = 0.99;
matthiasm@43	535
matthiasm@48	536 // vector<double> init = vector<double>(nChord,1.0/nChord);
matthiasm@48	537 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
matthiasm@48	538
matthiasm@50	539 double *delta;
matthiasm@50	540 delta = (double )malloc(sizeof(double)nFrame*nChord);
matthiasm@50	541
matthiasm@43	542 vector<vector<double> > trans;
matthiasm@43	543 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@43	544 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
matthiasm@43	545 temp[iChord] = selftransprob;
matthiasm@43	546 trans.push_back(temp);
matthiasm@43	547 }
matthiasm@106	548 vector<double> scale;
matthiasm@106	549 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale);
matthiasm@106	550
matthiasm@48	551
matthiasm@48	552 Feature chord_feature; // chord estimate
matthiasm@48	553 chord_feature.hasTimestamp = true;
matthiasm@48	554 chord_feature.timestamp = timestamps[0];
matthiasm@48	555 chord_feature.label = m_chordnames[chordpath[0]];
mail@60	556 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@43	557
mail@60	558 chordchange[0] = 0;
Chris@91	559 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
matthiasm@43	560 // cerr << chordpath[iFrame] << endl;
matthiasm@48	561 if (chordpath[iFrame] != oldchord ) {
matthiasm@86	562 // chord
matthiasm@43	563 Feature chord_feature; // chord estimate
matthiasm@43	564 chord_feature.hasTimestamp = true;
matthiasm@43	565 chord_feature.timestamp = timestamps[iFrame];
matthiasm@43	566 chord_feature.label = m_chordnames[chordpath[iFrame]];
mail@60	567 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@43	568 oldchord = chordpath[iFrame];
matthiasm@86	569 // chord notes
Chris@91	570 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86	571 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
matthiasm@86	572 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86	573 }
matthiasm@86	574 oldnotes.clear();
Chris@91	575 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
matthiasm@86	576 Feature chordnote_feature;
matthiasm@86	577 chordnote_feature.hasTimestamp = true;
matthiasm@86	578 chordnote_feature.timestamp = timestamps[iFrame];
matthiasm@86	579 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
matthiasm@86	580 chordnote_feature.hasDuration = true;
matthiasm@86	581 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
matthiasm@86	582 oldnotes.push_back(chordnote_feature);
matthiasm@86	583 }
Chris@23	584 }
matthiasm@50	585 /* calculating simple chord change prob */
matthiasm@50	586 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@50	587 chordchange[iFrame-1] += delta[(iFrame-1)nChord + iChord] log(delta[(iFrame-1)nChord + iChord]/delta[iFramenChord + iChord]);
matthiasm@50	588 }
Chris@23	589 }
matthiasm@43	590
matthiasm@106	591 float logscale = 0;
matthiasm@106	592 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
matthiasm@106	593 logscale -= log(scale[iFrame]);
matthiasm@106	594 Feature loglikelihood;
matthiasm@106	595 loglikelihood.hasTimestamp = true;
matthiasm@106	596 loglikelihood.timestamp = timestamps[iFrame];
matthiasm@106	597 loglikelihood.values.push_back(-log(scale[iFrame]));
matthiasm@106	598 // cerr << chordchange[iFrame] << endl;
matthiasm@107	599 fsOut[m_outputLoglikelihood].push_back(loglikelihood);
matthiasm@106	600 }
matthiasm@106	601 logscale /= nFrame;
mail@111	602 // cerr << "loglik" << logscale << endl;
matthiasm@106	603
matthiasm@106	604
matthiasm@43	605 // cerr << chordpath[0] << endl;
matthiasm@43	606 } else {
matthiasm@43	607 /* Simple chord estimation
matthiasm@43	608 I just take the local chord estimates ("currentChordSalience") and average them over time, then
matthiasm@43	609 take the maximum. Very simple, don't do this at home...
matthiasm@43	610 */
matthiasm@44	611 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
matthiasm@43	612 count = 0;
matthiasm@43	613 int halfwindowlength = m_inputSampleRate / m_stepSize;
matthiasm@43	614 vector<int> chordSequence;
matthiasm@43	615 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
matthiasm@43	616 vector<int> temp = vector<int>(nChord,0);
matthiasm@43	617 scoreChordogram.push_back(temp);
matthiasm@43	618 }
matthiasm@43	619 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
matthiasm@43	620 int startIndex = count + 1;
matthiasm@43	621 int endIndex = count + 2 * halfwindowlength;
matthiasm@43	622
matthiasm@43	623 float chordThreshold = 2.5/nChord;//(2halfwindowlength+1);
matthiasm@43	624
matthiasm@43	625 vector<int> chordCandidates;
Chris@91	626 for (int iChord = 0; iChord+1 < nChord; iChord++) {
matthiasm@43	627 // float currsum = 0;
Chris@91	628 // for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43	629 // currsum += chordogram[iFrame][iChord];
matthiasm@43	630 // }
matthiasm@43	631 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
Chris@91	632 for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@43	633 if (chordogram[iFrame][iChord] > chordThreshold) {
matthiasm@43	634 chordCandidates.push_back(iChord);
matthiasm@43	635 break;
matthiasm@43	636 }
Chris@23	637 }
Chris@23	638 }
matthiasm@43	639 chordCandidates.push_back(nChord-1);
matthiasm@43	640 // cerr << chordCandidates.size() << endl;
matthiasm@43	641
matthiasm@43	642 float maxval = 0; // will be the value of the most salient chord change in this frame
matthiasm@43	643 float maxindex = 0; //... and the index thereof
Chris@91	644 int bestchordL = nChord-1; // index of the best "left" chord
Chris@91	645 int bestchordR = nChord-1; // index of the best "right" chord
matthiasm@43	646
matthiasm@43	647 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
matthiasm@43	648 // now find the max values on both sides of iWF
matthiasm@43	649 // left side:
matthiasm@43	650 float maxL = 0;
Chris@91	651 int maxindL = nChord-1;
Chris@91	652 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
Chris@91	653 int iChord = chordCandidates[kChord];
matthiasm@43	654 float currsum = 0;
Chris@91	655 for (int iFrame = 0; iFrame < iWF-1; ++iFrame) {
matthiasm@43	656 currsum += chordogram[count+iFrame][iChord];
matthiasm@43	657 }
matthiasm@43	658 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43	659 if (currsum > maxL) {
matthiasm@43	660 maxL = currsum;
matthiasm@43	661 maxindL = iChord;
matthiasm@43	662 }
matthiasm@43	663 }
matthiasm@43	664 // right side:
matthiasm@43	665 float maxR = 0;
Chris@91	666 int maxindR = nChord-1;
Chris@91	667 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
Chris@91	668 int iChord = chordCandidates[kChord];
matthiasm@43	669 float currsum = 0;
Chris@91	670 for (int iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43	671 currsum += chordogram[count+iFrame][iChord];
matthiasm@43	672 }
matthiasm@43	673 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@43	674 if (currsum > maxR) {
matthiasm@43	675 maxR = currsum;
matthiasm@43	676 maxindR = iChord;
matthiasm@43	677 }
matthiasm@43	678 }
matthiasm@43	679 if (maxL+maxR > maxval) {
matthiasm@43	680 maxval = maxL+maxR;
matthiasm@43	681 maxindex = iWF;
matthiasm@43	682 bestchordL = maxindL;
matthiasm@43	683 bestchordR = maxindR;
matthiasm@43	684 }
matthiasm@43	685
Chris@23	686 }
matthiasm@43	687 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
matthiasm@43	688 // add a score to every chord-frame-point that was part of a maximum
Chris@91	689 for (int iFrame = 0; iFrame < maxindex-1; ++iFrame) {
matthiasm@43	690 scoreChordogram[iFrame+count][bestchordL]++;
matthiasm@43	691 }
Chris@91	692 for (int iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@43	693 scoreChordogram[iFrame+count][bestchordR]++;
matthiasm@43	694 }
matthiasm@50	695 if (bestchordL != bestchordR) {
matthiasm@50	696 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
matthiasm@50	697 }
matthiasm@43	698 count++;
Chris@23	699 }
matthiasm@43	700 // cerr << "***** agent finished *****" << endl;
matthiasm@43	701 count = 0;
matthiasm@43	702 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43	703 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@43	704 float maxindex = 0; //... and the index thereof
Chris@91	705 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@43	706 if (scoreChordogram[count][iChord] > maxval) {
matthiasm@43	707 maxval = scoreChordogram[count][iChord];
matthiasm@43	708 maxindex = iChord;
matthiasm@43	709 // cerr << iChord << endl;
matthiasm@43	710 }
matthiasm@43	711 }
matthiasm@43	712 chordSequence.push_back(maxindex);
matthiasm@43	713 count++;
Chris@23	714 }
matthiasm@43	715
matthiasm@43	716
matthiasm@43	717 // mode filter on chordSequence
matthiasm@43	718 count = 0;
matthiasm@43	719 string oldChord = "";
matthiasm@43	720 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
matthiasm@43	721 Feature chord_feature; // chord estimate
matthiasm@43	722 chord_feature.hasTimestamp = true;
matthiasm@43	723 chord_feature.timestamp = *it;
matthiasm@43	724 // Feature currentChord; // chord estimate
matthiasm@43	725 // currentChord.hasTimestamp = true;
matthiasm@43	726 // currentChord.timestamp = currentChromas.timestamp;
matthiasm@43	727
matthiasm@43	728 vector<int> chordCount = vector<int>(nChord,0);
matthiasm@43	729 int maxChordCount = 0;
matthiasm@43	730 int maxChordIndex = nChord-1;
matthiasm@43	731 string maxChord;
matthiasm@43	732 int startIndex = max(count - halfwindowlength/2,0);
matthiasm@43	733 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
matthiasm@43	734 for (int i = startIndex; i < endIndex; i++) {
matthiasm@43	735 chordCount[chordSequence[i]]++;
matthiasm@43	736 if (chordCount[chordSequence[i]] > maxChordCount) {
matthiasm@43	737 // cerr << "start index " << startIndex << endl;
matthiasm@43	738 maxChordCount++;
matthiasm@43	739 maxChordIndex = chordSequence[i];
matthiasm@43	740 maxChord = m_chordnames[maxChordIndex];
matthiasm@43	741 }
matthiasm@43	742 }
matthiasm@43	743 // chordSequence[count] = maxChordIndex;
matthiasm@43	744 // cerr << maxChordIndex << endl;
matthiasm@50	745 // cerr << chordchange[count] << endl;
matthiasm@43	746 if (oldChord != maxChord) {
matthiasm@43	747 oldChord = maxChord;
matthiasm@43	748 chord_feature.label = m_chordnames[maxChordIndex];
mail@60	749 fsOut[m_outputChords].push_back(chord_feature);
Chris@91	750 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86	751 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp;
matthiasm@86	752 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86	753 }
matthiasm@86	754 oldnotes.clear();
Chris@91	755 for (int iNote = 0; iNote < (int)m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord
matthiasm@86	756 Feature chordnote_feature;
matthiasm@86	757 chordnote_feature.hasTimestamp = true;
matthiasm@86	758 chordnote_feature.timestamp = chord_feature.timestamp;
matthiasm@86	759 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]);
matthiasm@86	760 chordnote_feature.hasDuration = true;
matthiasm@86	761 chordnote_feature.duration = -chord_feature.timestamp; // this will be corrected at the next chord
matthiasm@86	762 oldnotes.push_back(chordnote_feature);
matthiasm@86	763 }
matthiasm@43	764 }
matthiasm@43	765 count++;
Chris@23	766 }
Chris@23	767 }
matthiasm@43	768 Feature chord_feature; // last chord estimate
matthiasm@43	769 chord_feature.hasTimestamp = true;
matthiasm@43	770 chord_feature.timestamp = timestamps[timestamps.size()-1];
matthiasm@43	771 chord_feature.label = "N";
mail@60	772 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@86	773
Chris@91	774 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86	775 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
matthiasm@86	776 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86	777 }
matthiasm@86	778
Chris@23	779 cerr << "done." << endl;
matthiasm@50	780
matthiasm@50	781 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
matthiasm@50	782 Feature chordchange_feature;
matthiasm@50	783 chordchange_feature.hasTimestamp = true;
matthiasm@50	784 chordchange_feature.timestamp = timestamps[iFrame];
matthiasm@50	785 chordchange_feature.values.push_back(chordchange[iFrame]);
mail@60	786 // cerr << chordchange[iFrame] << endl;
mail@60	787 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
matthiasm@50	788 }
matthiasm@50	789
mail@60	790 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
matthiasm@50	791
matthiasm@50	792
Chris@23	793 return fsOut;
matthiasm@0	794 }

Mercurial > hg > nnls-chroma

annotate Chordino.cpp @ 124:a17ff20fb897 darwintunes