nnls-chroma: Chordino.cpp annotate

annotate Chordino.cpp @ 184:82d5d11b68d7 tip

Update library URI so it's not document-local

author	Chris Cannam
date	Wed, 22 Apr 2020 14:21:25 +0100
parents	d95c4cdef8af
children

rev	line source
Chris@23	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
matthiasm@0	2
Chris@35	3 /*
Chris@35	4 NNLS-Chroma / Chordino
Chris@35	5
Chris@35	6 Audio feature extraction plugins for chromagram and chord
Chris@35	7 estimation.
Chris@35	8
Chris@35	9 Centre for Digital Music, Queen Mary University of London.
Chris@35	10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35	11
Chris@35	12 This program is free software; you can redistribute it and/or
Chris@35	13 modify it under the terms of the GNU General Public License as
Chris@35	14 published by the Free Software Foundation; either version 2 of the
Chris@35	15 License, or (at your option) any later version. See the file
Chris@35	16 COPYING included with this distribution for more information.
Chris@35	17 */
Chris@35	18
Chris@35	19 #include "Chordino.h"
Chris@27	20
Chris@27	21 #include "chromamethods.h"
matthiasm@43	22 #include "viterbi.h"
Chris@27	23
Chris@27	24 #include <cstdlib>
Chris@27	25 #include <fstream>
matthiasm@0	26 #include <cmath>
matthiasm@9	27
Chris@27	28 #include <algorithm>
matthiasm@0	29
matthiasm@0	30 const bool debug_on = false;
matthiasm@0	31
Chris@35	32 Chordino::Chordino(float inputSampleRate) :
matthiasm@86	33 NNLSBase(inputSampleRate),
matthiasm@86	34 m_chorddict(0),
matthiasm@86	35 m_chordnotes(0),
matthiasm@86	36 m_chordnames(0)
matthiasm@0	37 {
Chris@35	38 if (debug_on) cerr << "--> Chordino" << endl;
matthiasm@0	39 }
matthiasm@0	40
Chris@35	41 Chordino::~Chordino()
matthiasm@0	42 {
Chris@35	43 if (debug_on) cerr << "--> ~Chordino" << endl;
matthiasm@0	44 }
matthiasm@0	45
matthiasm@0	46 string
Chris@35	47 Chordino::getIdentifier() const
matthiasm@0	48 {
Chris@23	49 if (debug_on) cerr << "--> getIdentifier" << endl;
Chris@35	50 return "chordino";
matthiasm@0	51 }
matthiasm@0	52
matthiasm@0	53 string
Chris@35	54 Chordino::getName() const
matthiasm@0	55 {
Chris@23	56 if (debug_on) cerr << "--> getName" << endl;
Chris@35	57 return "Chordino";
matthiasm@0	58 }
matthiasm@0	59
matthiasm@0	60 string
Chris@35	61 Chordino::getDescription() const
matthiasm@0	62 {
Chris@23	63 if (debug_on) cerr << "--> getDescription" << endl;
Chris@149	64 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. A simple (non-state-of-the-art!) algorithm smooths these to provide a chord transcription using a standard HMM/Viterbi approach.";
matthiasm@0	65 }
matthiasm@0	66
matthiasm@50	67 Chordino::ParameterList
matthiasm@50	68 Chordino::getParameterDescriptors() const
matthiasm@50	69 {
matthiasm@50	70 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@50	71 ParameterList list;
matthiasm@50	72
mail@118	73 ParameterDescriptor useNNLSParam;
mail@118	74 useNNLSParam.identifier = "useNNLS";
mail@118	75 useNNLSParam.name = "use approximate transcription (NNLS)";
mail@118	76 useNNLSParam.description = "Toggles approximate transcription (NNLS).";
mail@118	77 useNNLSParam.unit = "";
mail@118	78 useNNLSParam.minValue = 0.0;
mail@118	79 useNNLSParam.maxValue = 1.0;
mail@118	80 useNNLSParam.defaultValue = 1.0;
mail@118	81 useNNLSParam.isQuantized = true;
mail@118	82 useNNLSParam.quantizeStep = 1.0;
mail@118	83 list.push_back(useNNLSParam);
matthiasm@50	84
mail@118	85 ParameterDescriptor rollonParam;
mail@118	86 rollonParam.identifier = "rollon";
mail@118	87 rollonParam.name = "bass noise threshold";
mail@118	88 rollonParam.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed.";
mail@118	89 rollonParam.unit = "%";
mail@118	90 rollonParam.minValue = 0;
mail@118	91 rollonParam.maxValue = 5;
mail@118	92 rollonParam.defaultValue = 0.0;
mail@118	93 rollonParam.isQuantized = true;
mail@118	94 rollonParam.quantizeStep = 0.5;
mail@118	95 list.push_back(rollonParam);
matthiasm@50	96
mail@118	97 ParameterDescriptor tuningmodeParam;
mail@118	98 tuningmodeParam.identifier = "tuningmode";
mail@118	99 tuningmodeParam.name = "tuning mode";
mail@118	100 tuningmodeParam.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
mail@118	101 tuningmodeParam.unit = "";
mail@118	102 tuningmodeParam.minValue = 0;
mail@118	103 tuningmodeParam.maxValue = 1;
mail@118	104 tuningmodeParam.defaultValue = 0.0;
mail@118	105 tuningmodeParam.isQuantized = true;
mail@118	106 tuningmodeParam.valueNames.push_back("global tuning");
mail@118	107 tuningmodeParam.valueNames.push_back("local tuning");
mail@118	108 tuningmodeParam.quantizeStep = 1.0;
mail@118	109 list.push_back(tuningmodeParam);
matthiasm@50	110
mail@118	111 ParameterDescriptor whiteningParam;
mail@118	112 whiteningParam.identifier = "whitening";
mail@118	113 whiteningParam.name = "spectral whitening";
mail@118	114 whiteningParam.description = "Spectral whitening: no whitening - 0; whitening - 1.";
mail@118	115 whiteningParam.unit = "";
mail@118	116 whiteningParam.isQuantized = true;
mail@118	117 whiteningParam.minValue = 0.0;
mail@118	118 whiteningParam.maxValue = 1.0;
mail@118	119 whiteningParam.defaultValue = 1.0;
mail@118	120 whiteningParam.isQuantized = false;
mail@118	121 list.push_back(whiteningParam);
matthiasm@50	122
mail@118	123 ParameterDescriptor spectralShapeParam;
Chris@164	124 spectralShapeParam.identifier = "s";
mail@118	125 spectralShapeParam.name = "spectral shape";
mail@118	126 spectralShapeParam.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
mail@118	127 spectralShapeParam.unit = "";
mail@118	128 spectralShapeParam.minValue = 0.5;
mail@118	129 spectralShapeParam.maxValue = 0.9;
mail@118	130 spectralShapeParam.defaultValue = 0.7;
mail@118	131 spectralShapeParam.isQuantized = false;
mail@118	132 list.push_back(spectralShapeParam);
matthiasm@50	133
mail@118	134 ParameterDescriptor boostnParam;
mail@118	135 boostnParam.identifier = "boostn";
mail@118	136 boostnParam.name = "boost N";
mail@118	137 boostnParam.description = "Boost likelihood of the N (no chord) label.";
mail@118	138 boostnParam.unit = "";
mail@118	139 boostnParam.minValue = 0.0;
mail@118	140 boostnParam.maxValue = 1.0;
mail@118	141 boostnParam.defaultValue = 0.1;
mail@118	142 boostnParam.isQuantized = false;
mail@118	143 list.push_back(boostnParam);
matthiasm@50	144
mail@118	145 ParameterDescriptor usehartesyntaxParam;
mail@118	146 usehartesyntaxParam.identifier = "usehartesyntax";
mail@118	147 usehartesyntaxParam.name = "use Harte syntax";
mail@118	148 usehartesyntaxParam.description = "Use the chord syntax proposed by Harte";
mail@118	149 usehartesyntaxParam.unit = "";
mail@118	150 usehartesyntaxParam.minValue = 0.0;
mail@118	151 usehartesyntaxParam.maxValue = 1.0;
mail@118	152 usehartesyntaxParam.defaultValue = 0.0;
mail@118	153 usehartesyntaxParam.isQuantized = true;
mail@118	154 usehartesyntaxParam.quantizeStep = 1.0;
mail@118	155 usehartesyntaxParam.valueNames.push_back("no");
mail@118	156 usehartesyntaxParam.valueNames.push_back("yes");
mail@118	157 list.push_back(usehartesyntaxParam);
mail@112	158
matthiasm@50	159 return list;
matthiasm@50	160 }
matthiasm@50	161
Chris@35	162 Chordino::OutputList
Chris@35	163 Chordino::getOutputDescriptors() const
matthiasm@0	164 {
Chris@23	165 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0	166 OutputList list;
matthiasm@0	167
Chris@35	168 int index = 0;
matthiasm@0	169
Chris@164	170 float featureRate =
Chris@164	171 (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
Chris@164	172
matthiasm@0	173 OutputDescriptor d7;
matthiasm@0	174 d7.identifier = "simplechord";
Chris@36	175 d7.name = "Chord Estimate";
matthiasm@133	176 d7.description = "Estimated chord times and labels.";
matthiasm@0	177 d7.unit = "";
matthiasm@0	178 d7.hasFixedBinCount = true;
matthiasm@0	179 d7.binCount = 0;
matthiasm@0	180 d7.hasKnownExtents = false;
matthiasm@0	181 d7.isQuantized = false;
matthiasm@0	182 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0	183 d7.hasDuration = false;
Chris@164	184 d7.sampleRate = featureRate;
matthiasm@0	185 list.push_back(d7);
Chris@35	186 m_outputChords = index++;
matthiasm@0	187
matthiasm@86	188 OutputDescriptor chordnotes;
matthiasm@86	189 chordnotes.identifier = "chordnotes";
matthiasm@86	190 chordnotes.name = "Note Representation of Chord Estimate";
Chris@149	191 chordnotes.description = "A simple representation of the estimated chord with bass note (if applicable) and chord notes.";
matthiasm@86	192 chordnotes.unit = "MIDI units";
matthiasm@86	193 chordnotes.hasFixedBinCount = true;
matthiasm@86	194 chordnotes.binCount = 1;
matthiasm@86	195 chordnotes.hasKnownExtents = true;
matthiasm@86	196 chordnotes.minValue = 0;
matthiasm@86	197 chordnotes.maxValue = 127;
matthiasm@86	198 chordnotes.isQuantized = true;
matthiasm@86	199 chordnotes.quantizeStep = 1;
matthiasm@86	200 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@86	201 chordnotes.hasDuration = true;
Chris@164	202 chordnotes.sampleRate = featureRate;
matthiasm@86	203 list.push_back(chordnotes);
matthiasm@86	204 m_outputChordnotes = index++;
matthiasm@86	205
Chris@23	206 OutputDescriptor d8;
mail@60	207 d8.identifier = "harmonicchange";
Chris@36	208 d8.name = "Harmonic Change Value";
matthiasm@58	209 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
matthiasm@17	210 d8.unit = "";
matthiasm@17	211 d8.hasFixedBinCount = true;
matthiasm@17	212 d8.binCount = 1;
mail@60	213 d8.hasKnownExtents = false;
matthiasm@17	214 d8.isQuantized = false;
matthiasm@17	215 d8.sampleType = OutputDescriptor::FixedSampleRate;
Chris@164	216 d8.sampleRate = featureRate;
matthiasm@17	217 d8.hasDuration = false;
matthiasm@17	218 list.push_back(d8);
Chris@35	219 m_outputHarmonicChange = index++;
matthiasm@1	220
matthiasm@107	221 OutputDescriptor loglikelihood;
matthiasm@107	222 loglikelihood.identifier = "loglikelihood";
mail@126	223 loglikelihood.name = "Log-Likelihood of Chord Estimate";
mail@124	224 loglikelihood.description = "Logarithm of the likelihood value of the simple chord estimate.";
matthiasm@107	225 loglikelihood.unit = "";
matthiasm@107	226 loglikelihood.hasFixedBinCount = true;
matthiasm@107	227 loglikelihood.binCount = 1;
matthiasm@107	228 loglikelihood.hasKnownExtents = false;
matthiasm@107	229 loglikelihood.isQuantized = false;
matthiasm@107	230 loglikelihood.sampleType = OutputDescriptor::FixedSampleRate;
Chris@164	231 loglikelihood.sampleRate = featureRate;
matthiasm@107	232 loglikelihood.hasDuration = false;
matthiasm@107	233 list.push_back(loglikelihood);
matthiasm@107	234 m_outputLoglikelihood = index++;
matthiasm@106	235
matthiasm@0	236 return list;
matthiasm@0	237 }
matthiasm@0	238
matthiasm@0	239 bool
Chris@35	240 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0	241 {
Chris@23	242 if (debug_on) {
Chris@23	243 cerr << "--> initialise";
Chris@23	244 }
mail@76	245
Chris@35	246 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
Chris@35	247 return false;
Chris@35	248 }
mail@115	249 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN, m_harte_syntax);
matthiasm@0	250 return true;
matthiasm@0	251 }
matthiasm@0	252
matthiasm@0	253 void
Chris@35	254 Chordino::reset()
matthiasm@0	255 {
Chris@23	256 if (debug_on) cerr << "--> reset";
Chris@35	257 NNLSBase::reset();
matthiasm@0	258 }
matthiasm@0	259
Chris@35	260 Chordino::FeatureSet
Chris@35	261 Chordino::process(const float const inputBuffers, Vamp::RealTime timestamp)
matthiasm@0	262 {
Chris@23	263 if (debug_on) cerr << "--> process" << endl;
matthiasm@0	264
Chris@35	265 NNLSBase::baseProcess(inputBuffers, timestamp);
matthiasm@0	266
Chris@35	267 return FeatureSet();
matthiasm@0	268 }
matthiasm@0	269
Chris@35	270 Chordino::FeatureSet
Chris@35	271 Chordino::getRemainingFeatures()
matthiasm@0	272 {
mail@89	273 // cerr << hw[0] << hw[1] << endl;
mail@89	274 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23	275 FeatureSet fsOut;
Chris@35	276 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23	277 int nChord = m_chordnames.size();
Chris@23	278 //
Chris@23	279 /** Calculate Tuning
Chris@23	280 calculate tuning from (using the angle of the complex number defined by the
Chris@23	281 cumulative mean real and imag values)
Chris@23	282 **/
mail@80	283 float meanTuningImag = 0;
mail@80	284 float meanTuningReal = 0;
mail@80	285 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
mail@80	286 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
mail@80	287 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
mail@80	288 }
Chris@23	289 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23	290 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23	291 int intShift = floor(normalisedtuning * 3);
mail@80	292 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
matthiasm@1	293
Chris@23	294 char buffer0 [50];
matthiasm@1	295
Chris@23	296 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1	297
matthiasm@1	298
Chris@23	299 /** Tune Log-Frequency Spectrogram
matthiasm@43	300 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
Chris@91	301 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
Chris@23	302 **/
Chris@163	303 if (debug_on) cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13	304
Chris@23	305 int count = 0;
matthiasm@1	306
Chris@35	307 FeatureList tunedSpec;
matthiasm@43	308 int nFrame = m_logSpectrum.size();
matthiasm@43	309
matthiasm@43	310 vector<Vamp::RealTime> timestamps;
Chris@35	311
Chris@35	312 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@91	313 Feature currentLogSpectrum = *i;
matthiasm@43	314 Feature currentTunedSpec; // tuned log-frequency spectrum
matthiasm@43	315 currentTunedSpec.hasTimestamp = true;
Chris@91	316 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
Chris@91	317 timestamps.push_back(currentLogSpectrum.timestamp);
matthiasm@43	318 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
matthiasm@1	319
Chris@23	320 if (m_tuneLocal) {
Chris@23	321 intShift = floor(m_localTuning[count] * 3);
mail@80	322 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
Chris@23	323 }
matthiasm@1	324
mail@80	325 // cerr << intShift << " " << floatShift << endl;
matthiasm@1	326
Chris@91	327 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
mail@115	328 float tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
matthiasm@43	329 currentTunedSpec.values.push_back(tempValue);
Chris@23	330 }
matthiasm@1	331
matthiasm@43	332 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
matthiasm@43	333 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
Chris@23	334 vector<float> runningstd;
mail@77	335 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
matthiasm@43	336 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
Chris@23	337 }
Chris@23	338 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
mail@77	339 for (int i = 0; i < nNote; i++) {
Chris@23	340 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23	341 if (runningstd[i] > 0) {
matthiasm@43	342 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
matthiasm@43	343 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
matthiasm@43	344 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
matthiasm@43	345 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23	346 }
matthiasm@43	347 if (currentTunedSpec.values[i] < 0) {
Chris@23	348 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23	349 }
Chris@23	350 }
matthiasm@43	351 tunedSpec.push_back(currentTunedSpec);
Chris@23	352 count++;
Chris@23	353 }
Chris@163	354 if (debug_on) cerr << "done." << endl;
matthiasm@1	355
Chris@23	356 /** Semitone spectrum and chromagrams
Chris@23	357 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23	358 is inferred using a non-negative least squares algorithm.
Chris@23	359 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23	360 bass and treble stacked onto each other).
Chris@23	361 **/
matthiasm@42	362 if (m_useNNLS == 0) {
Chris@163	363 if (debug_on) cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23	364 } else {
Chris@163	365 if (debug_on) cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23	366 }
matthiasm@13	367
matthiasm@1	368
matthiasm@43	369 vector<vector<double> > chordogram;
Chris@23	370 vector<vector<int> > scoreChordogram;
Chris@35	371 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
Chris@23	372 count = 0;
matthiasm@9	373
Chris@35	374 FeatureList chromaList;
matthiasm@43	375
Chris@164	376 bool clipwarned = false;
Chris@35	377
Chris@35	378 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
matthiasm@43	379 Feature currentTunedSpec = *it; // logfreq spectrum
matthiasm@43	380 Feature currentChromas; // treble and bass chromagram
Chris@35	381
matthiasm@43	382 currentChromas.hasTimestamp = true;
matthiasm@43	383 currentChromas.timestamp = currentTunedSpec.timestamp;
Chris@35	384
mail@77	385 float b[nNote];
matthiasm@1	386
Chris@23	387 bool some_b_greater_zero = false;
Chris@23	388 float sumb = 0;
mail@77	389 for (int i = 0; i < nNote; i++) {
mail@77	390 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
matthiasm@43	391 b[i] = currentTunedSpec.values[i];
Chris@23	392 sumb += b[i];
Chris@23	393 if (b[i] > 0) {
Chris@23	394 some_b_greater_zero = true;
Chris@23	395 }
Chris@23	396 }
matthiasm@1	397
Chris@23	398 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1	399
Chris@23	400 vector<float> chroma = vector<float>(12, 0);
Chris@23	401 vector<float> basschroma = vector<float>(12, 0);
Chris@23	402 float currval;
Chris@91	403 int iSemitone = 0;
matthiasm@1	404
Chris@23	405 if (some_b_greater_zero) {
matthiasm@42	406 if (m_useNNLS == 0) {
Chris@91	407 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23	408 currval = 0;
mail@81	409 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81	410 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
mail@81	411 }
Chris@23	412 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23	413 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23	414 iSemitone++;
Chris@23	415 }
matthiasm@1	416
Chris@23	417 } else {
Chris@35	418 float x[84+1000];
Chris@23	419 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23	420 vector<int> signifIndex;
Chris@23	421 int index=0;
Chris@23	422 sumb /= 84.0;
Chris@91	423 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23	424 float currval = 0;
mail@81	425 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81	426 currval += b[iNote + iBPS];
mail@81	427 }
Chris@23	428 if (currval > 0) signifIndex.push_back(index);
Chris@23	429 index++;
Chris@23	430 }
Chris@35	431 float rnorm;
Chris@35	432 float w[84+1000];
Chris@35	433 float zz[84+1000];
Chris@23	434 int indx[84+1000];
Chris@23	435 int mode;
mail@77	436 int dictsize = nNote*signifIndex.size();
mail@81	437 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@35	438 float *curr_dict = new float[dictsize];
Chris@91	439 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@91	440 for (int iBin = 0; iBin < nNote; iBin++) {
mail@77	441 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
Chris@23	442 }
Chris@23	443 }
Chris@35	444 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23	445 delete [] curr_dict;
Chris@91	446 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@23	447 // cerr << mode << endl;
Chris@23	448 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23	449 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23	450 }
Chris@23	451 }
Chris@23	452 }
Chris@35	453
Chris@35	454 vector<float> origchroma = chroma;
Chris@23	455 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@43	456 currentChromas.values = chroma;
Chris@164	457
Chris@23	458 if (m_doNormalizeChroma > 0) {
Chris@23	459 vector<float> chromanorm = vector<float>(3,0);
Chris@23	460 switch (int(m_doNormalizeChroma)) {
Chris@23	461 case 0: // should never end up here
Chris@23	462 break;
Chris@23	463 case 1:
Chris@35	464 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
Chris@35	465 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
Chris@23	466 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23	467 break;
Chris@23	468 case 2:
Chris@35	469 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23	470 chromanorm[2] += *it;
Chris@23	471 }
Chris@23	472 break;
Chris@23	473 case 3:
Chris@35	474 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23	475 chromanorm[2] += pow(*it,2);
Chris@23	476 }
Chris@23	477 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23	478 break;
Chris@23	479 }
Chris@23	480 if (chromanorm[2] > 0) {
Chris@91	481 for (int i = 0; i < (int)chroma.size(); i++) {
matthiasm@43	482 currentChromas.values[i] /= chromanorm[2];
Chris@23	483 }
Chris@23	484 }
Chris@23	485 }
Chris@35	486
matthiasm@43	487 chromaList.push_back(currentChromas);
Chris@35	488
Chris@23	489 // local chord estimation
matthiasm@43	490 vector<double> currentChordSalience;
matthiasm@43	491 double tempchordvalue = 0;
matthiasm@43	492 double sumchordvalue = 0;
matthiasm@9	493
Chris@23	494 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23	495 tempchordvalue = 0;
Chris@23	496 for (int iBin = 0; iBin < 12; iBin++) {
Chris@164	497 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23	498 }
Chris@23	499 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23	500 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23	501 }
matthiasm@48	502 if (iChord == nChord-1) tempchordvalue *= .7;
matthiasm@48	503 if (tempchordvalue < 0) tempchordvalue = 0.0;
Chris@168	504 if (tempchordvalue > 200.0) {
Chris@164	505 if (!clipwarned) {
Chris@164	506 cerr << "WARNING: interim chroma contains extreme chord value " << tempchordvalue << ", clipping this and any others that appear" << endl;
Chris@164	507 clipwarned = true;
Chris@164	508 }
Chris@168	509 tempchordvalue = 200.0;
Chris@164	510 }
Chris@164	511 tempchordvalue = pow(1.3, tempchordvalue);
Chris@164	512 sumchordvalue += tempchordvalue;
Chris@23	513 currentChordSalience.push_back(tempchordvalue);
Chris@23	514 }
Chris@23	515 if (sumchordvalue > 0) {
Chris@23	516 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23	517 currentChordSalience[iChord] /= sumchordvalue;
Chris@23	518 }
Chris@23	519 } else {
Chris@23	520 currentChordSalience[nChord-1] = 1.0;
Chris@23	521 }
Chris@23	522 chordogram.push_back(currentChordSalience);
matthiasm@1	523
Chris@23	524 count++;
Chris@23	525 }
Chris@163	526 if (debug_on) cerr << "done." << endl;
matthiasm@13	527
matthiasm@86	528 vector<Feature> oldnotes;
matthiasm@10	529
Chris@163	530 if (debug_on) cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
matthiasm@131	531 int oldchord = nChord-1;
matthiasm@131	532 double selftransprob = 0.99;
matthiasm@131	533
matthiasm@131	534 // vector<double> init = vector<double>(nChord,1.0/nChord);
matthiasm@131	535 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
matthiasm@131	536
matthiasm@131	537 double *delta;
matthiasm@131	538 delta = (double )malloc(sizeof(double)nFrame*nChord);
matthiasm@131	539
matthiasm@131	540 vector<vector<double> > trans;
matthiasm@131	541 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@131	542 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
matthiasm@131	543 temp[iChord] = selftransprob;
matthiasm@131	544 trans.push_back(temp);
matthiasm@131	545 }
matthiasm@131	546 vector<double> scale;
matthiasm@131	547 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale);
matthiasm@131	548
matthiasm@131	549 Feature chord_feature; // chord estimate
matthiasm@131	550 chord_feature.hasTimestamp = true;
matthiasm@131	551 chord_feature.timestamp = timestamps[0];
matthiasm@131	552 chord_feature.label = m_chordnames[chordpath[0]];
matthiasm@131	553 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@131	554
matthiasm@131	555 chordchange[0] = 0;
matthiasm@131	556 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
matthiasm@131	557 if (chordpath[iFrame] != oldchord ) {
matthiasm@131	558 // chord
matthiasm@131	559 Feature chord_feature; // chord estimate
matthiasm@131	560 chord_feature.hasTimestamp = true;
matthiasm@131	561 chord_feature.timestamp = timestamps[iFrame];
matthiasm@131	562 chord_feature.label = m_chordnames[chordpath[iFrame]];
matthiasm@131	563 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@131	564 oldchord = chordpath[iFrame];
matthiasm@131	565 // chord notes
matthiasm@131	566 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@131	567 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
matthiasm@131	568 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
Chris@23	569 }
matthiasm@131	570 oldnotes.clear();
matthiasm@131	571 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
matthiasm@131	572 Feature chordnote_feature;
matthiasm@131	573 chordnote_feature.hasTimestamp = true;
matthiasm@131	574 chordnote_feature.timestamp = timestamps[iFrame];
matthiasm@131	575 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
matthiasm@131	576 chordnote_feature.hasDuration = true;
matthiasm@131	577 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
matthiasm@131	578 oldnotes.push_back(chordnote_feature);
matthiasm@50	579 }
Chris@23	580 }
matthiasm@131	581 /* calculating simple chord change prob */
matthiasm@131	582 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@164	583 double num = delta[(iFrame-1) * nChord + iChord];
Chris@164	584 double denom = delta[iFrame * nChord + iChord];
Chris@164	585 double eps = 1e-7;
Chris@164	586 if (denom < eps) denom = eps;
Chris@164	587 chordchange[iFrame-1] += num * log(num / denom + eps);
Chris@23	588 }
Chris@23	589 }
matthiasm@131	590
matthiasm@131	591 float logscale = 0;
matthiasm@131	592 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
matthiasm@131	593 logscale -= log(scale[iFrame]);
matthiasm@131	594 Feature loglikelihood;
matthiasm@131	595 loglikelihood.hasTimestamp = true;
matthiasm@131	596 loglikelihood.timestamp = timestamps[iFrame];
matthiasm@131	597 loglikelihood.values.push_back(-log(scale[iFrame]));
matthiasm@131	598 // cerr << chordchange[iFrame] << endl;
matthiasm@131	599 fsOut[m_outputLoglikelihood].push_back(loglikelihood);
matthiasm@131	600 }
matthiasm@131	601 logscale /= nFrame;
matthiasm@131	602
matthiasm@43	603 chord_feature.hasTimestamp = true;
matthiasm@43	604 chord_feature.timestamp = timestamps[timestamps.size()-1];
matthiasm@43	605 chord_feature.label = "N";
mail@60	606 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@86	607
Chris@91	608 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86	609 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
matthiasm@86	610 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86	611 }
matthiasm@86	612
Chris@163	613 if (debug_on) cerr << "done." << endl;
Chris@159	614
matthiasm@50	615 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
matthiasm@50	616 Feature chordchange_feature;
matthiasm@50	617 chordchange_feature.hasTimestamp = true;
matthiasm@50	618 chordchange_feature.timestamp = timestamps[iFrame];
matthiasm@50	619 chordchange_feature.values.push_back(chordchange[iFrame]);
Chris@164	620 // cerr << "putting value " << chordchange[iFrame] << " at time " << chordchange_feature.timestamp << endl;
mail@60	621 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
matthiasm@50	622 }
Chris@161	623
Chris@161	624 free(delta);
matthiasm@50	625
mail@60	626 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
matthiasm@50	627
Chris@23	628 return fsOut;
matthiasm@0	629 }

Mercurial > hg > nnls-chroma

annotate Chordino.cpp @ 184:82d5d11b68d7 tip