nnls-chroma: Chordino.cpp annotate

annotate Chordino.cpp @ 133:5c1a25b3daf0 darwintunes

changed descriptions

author	matthiasm
date	Thu, 16 Jun 2011 18:32:41 +0100
parents	b547e7238bf5
children	d0ea842539a9 f01e5707b804

rev	line source
Chris@23	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
matthiasm@0	2
Chris@35	3 /*
Chris@35	4 NNLS-Chroma / Chordino
Chris@35	5
Chris@35	6 Audio feature extraction plugins for chromagram and chord
Chris@35	7 estimation.
Chris@35	8
Chris@35	9 Centre for Digital Music, Queen Mary University of London.
Chris@35	10 This file copyright 2008-2010 Matthias Mauch and QMUL.
Chris@35	11
Chris@35	12 This program is free software; you can redistribute it and/or
Chris@35	13 modify it under the terms of the GNU General Public License as
Chris@35	14 published by the Free Software Foundation; either version 2 of the
Chris@35	15 License, or (at your option) any later version. See the file
Chris@35	16 COPYING included with this distribution for more information.
Chris@35	17 */
Chris@35	18
Chris@35	19 #include "Chordino.h"
Chris@27	20
Chris@27	21 #include "chromamethods.h"
matthiasm@43	22 #include "viterbi.h"
Chris@27	23
Chris@27	24 #include <cstdlib>
Chris@27	25 #include <fstream>
matthiasm@0	26 #include <cmath>
matthiasm@9	27
Chris@27	28 #include <algorithm>
matthiasm@0	29
matthiasm@0	30 const bool debug_on = false;
matthiasm@0	31
Chris@35	32 Chordino::Chordino(float inputSampleRate) :
matthiasm@86	33 NNLSBase(inputSampleRate),
matthiasm@86	34 m_chorddict(0),
matthiasm@86	35 m_chordnotes(0),
matthiasm@86	36 m_chordnames(0)
matthiasm@0	37 {
Chris@35	38 if (debug_on) cerr << "--> Chordino" << endl;
matthiasm@0	39 }
matthiasm@0	40
Chris@35	41 Chordino::~Chordino()
matthiasm@0	42 {
Chris@35	43 if (debug_on) cerr << "--> ~Chordino" << endl;
matthiasm@0	44 }
matthiasm@0	45
matthiasm@0	46 string
Chris@35	47 Chordino::getIdentifier() const
matthiasm@0	48 {
Chris@23	49 if (debug_on) cerr << "--> getIdentifier" << endl;
Chris@35	50 return "chordino";
matthiasm@0	51 }
matthiasm@0	52
matthiasm@0	53 string
Chris@35	54 Chordino::getName() const
matthiasm@0	55 {
Chris@23	56 if (debug_on) cerr << "--> getName" << endl;
Chris@35	57 return "Chordino";
matthiasm@0	58 }
matthiasm@0	59
matthiasm@0	60 string
Chris@35	61 Chordino::getDescription() const
matthiasm@0	62 {
Chris@23	63 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@133	64 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. A simple (non-state-of-the-art!) algorithm smoothes these to provide a chord transcription using a standard HMM/Viterbi approach.";
matthiasm@0	65 }
matthiasm@0	66
matthiasm@50	67 Chordino::ParameterList
matthiasm@50	68 Chordino::getParameterDescriptors() const
matthiasm@50	69 {
matthiasm@50	70 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@50	71 ParameterList list;
matthiasm@50	72
mail@118	73 ParameterDescriptor useNNLSParam;
mail@118	74 useNNLSParam.identifier = "useNNLS";
mail@118	75 useNNLSParam.name = "use approximate transcription (NNLS)";
mail@118	76 useNNLSParam.description = "Toggles approximate transcription (NNLS).";
mail@118	77 useNNLSParam.unit = "";
mail@118	78 useNNLSParam.minValue = 0.0;
mail@118	79 useNNLSParam.maxValue = 1.0;
mail@118	80 useNNLSParam.defaultValue = 1.0;
mail@118	81 useNNLSParam.isQuantized = true;
mail@118	82 useNNLSParam.quantizeStep = 1.0;
mail@118	83 list.push_back(useNNLSParam);
matthiasm@50	84
mail@118	85 ParameterDescriptor rollonParam;
mail@118	86 rollonParam.identifier = "rollon";
mail@118	87 rollonParam.name = "bass noise threshold";
mail@118	88 rollonParam.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [bass noise threshold] x [total energy] will be set to 0. A threshold value of 0 means that no bins will be changed.";
mail@118	89 rollonParam.unit = "%";
mail@118	90 rollonParam.minValue = 0;
mail@118	91 rollonParam.maxValue = 5;
mail@118	92 rollonParam.defaultValue = 0.0;
mail@118	93 rollonParam.isQuantized = true;
mail@118	94 rollonParam.quantizeStep = 0.5;
mail@118	95 list.push_back(rollonParam);
matthiasm@50	96
mail@118	97 ParameterDescriptor tuningmodeParam;
mail@118	98 tuningmodeParam.identifier = "tuningmode";
mail@118	99 tuningmodeParam.name = "tuning mode";
mail@118	100 tuningmodeParam.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
mail@118	101 tuningmodeParam.unit = "";
mail@118	102 tuningmodeParam.minValue = 0;
mail@118	103 tuningmodeParam.maxValue = 1;
mail@118	104 tuningmodeParam.defaultValue = 0.0;
mail@118	105 tuningmodeParam.isQuantized = true;
mail@118	106 tuningmodeParam.valueNames.push_back("global tuning");
mail@118	107 tuningmodeParam.valueNames.push_back("local tuning");
mail@118	108 tuningmodeParam.quantizeStep = 1.0;
mail@118	109 list.push_back(tuningmodeParam);
matthiasm@50	110
mail@118	111 ParameterDescriptor whiteningParam;
mail@118	112 whiteningParam.identifier = "whitening";
mail@118	113 whiteningParam.name = "spectral whitening";
mail@118	114 whiteningParam.description = "Spectral whitening: no whitening - 0; whitening - 1.";
mail@118	115 whiteningParam.unit = "";
mail@118	116 whiteningParam.isQuantized = true;
mail@118	117 whiteningParam.minValue = 0.0;
mail@118	118 whiteningParam.maxValue = 1.0;
mail@118	119 whiteningParam.defaultValue = 1.0;
mail@118	120 whiteningParam.isQuantized = false;
mail@118	121 list.push_back(whiteningParam);
matthiasm@50	122
mail@118	123 ParameterDescriptor spectralShapeParam;
mail@118	124 spectralShapeParam.identifier = "spectralshape";
mail@118	125 spectralShapeParam.name = "spectral shape";
mail@118	126 spectralShapeParam.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
mail@118	127 spectralShapeParam.unit = "";
mail@118	128 spectralShapeParam.minValue = 0.5;
mail@118	129 spectralShapeParam.maxValue = 0.9;
mail@118	130 spectralShapeParam.defaultValue = 0.7;
mail@118	131 spectralShapeParam.isQuantized = false;
mail@118	132 list.push_back(spectralShapeParam);
matthiasm@50	133
mail@118	134 ParameterDescriptor boostnParam;
mail@118	135 boostnParam.identifier = "boostn";
mail@118	136 boostnParam.name = "boost N";
mail@118	137 boostnParam.description = "Boost likelihood of the N (no chord) label.";
mail@118	138 boostnParam.unit = "";
mail@118	139 boostnParam.minValue = 0.0;
mail@118	140 boostnParam.maxValue = 1.0;
mail@118	141 boostnParam.defaultValue = 0.1;
mail@118	142 boostnParam.isQuantized = false;
mail@118	143 list.push_back(boostnParam);
matthiasm@50	144
mail@118	145 ParameterDescriptor usehartesyntaxParam;
mail@118	146 usehartesyntaxParam.identifier = "usehartesyntax";
mail@118	147 usehartesyntaxParam.name = "use Harte syntax";
mail@118	148 usehartesyntaxParam.description = "Use the chord syntax proposed by Harte";
mail@118	149 usehartesyntaxParam.unit = "";
mail@118	150 usehartesyntaxParam.minValue = 0.0;
mail@118	151 usehartesyntaxParam.maxValue = 1.0;
mail@118	152 usehartesyntaxParam.defaultValue = 0.0;
mail@118	153 usehartesyntaxParam.isQuantized = true;
mail@118	154 usehartesyntaxParam.quantizeStep = 1.0;
mail@118	155 usehartesyntaxParam.valueNames.push_back("no");
mail@118	156 usehartesyntaxParam.valueNames.push_back("yes");
mail@118	157 list.push_back(usehartesyntaxParam);
mail@112	158
matthiasm@50	159 return list;
matthiasm@50	160 }
matthiasm@50	161
Chris@35	162 Chordino::OutputList
Chris@35	163 Chordino::getOutputDescriptors() const
matthiasm@0	164 {
Chris@23	165 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0	166 OutputList list;
matthiasm@0	167
Chris@35	168 int index = 0;
matthiasm@0	169
matthiasm@0	170 OutputDescriptor d7;
matthiasm@0	171 d7.identifier = "simplechord";
Chris@36	172 d7.name = "Chord Estimate";
matthiasm@133	173 d7.description = "Estimated chord times and labels.";
matthiasm@0	174 d7.unit = "";
matthiasm@0	175 d7.hasFixedBinCount = true;
matthiasm@0	176 d7.binCount = 0;
matthiasm@0	177 d7.hasKnownExtents = false;
matthiasm@0	178 d7.isQuantized = false;
matthiasm@0	179 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0	180 d7.hasDuration = false;
matthiasm@0	181 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	182 list.push_back(d7);
Chris@35	183 m_outputChords = index++;
matthiasm@0	184
matthiasm@86	185 OutputDescriptor chordnotes;
matthiasm@86	186 chordnotes.identifier = "chordnotes";
matthiasm@86	187 chordnotes.name = "Note Representation of Chord Estimate";
matthiasm@86	188 chordnotes.description = "A simple represenation of the estimated chord with bass note (if applicable) and chord notes.";
matthiasm@86	189 chordnotes.unit = "MIDI units";
matthiasm@86	190 chordnotes.hasFixedBinCount = true;
matthiasm@86	191 chordnotes.binCount = 1;
matthiasm@86	192 chordnotes.hasKnownExtents = true;
matthiasm@86	193 chordnotes.minValue = 0;
matthiasm@86	194 chordnotes.maxValue = 127;
matthiasm@86	195 chordnotes.isQuantized = true;
matthiasm@86	196 chordnotes.quantizeStep = 1;
matthiasm@86	197 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@86	198 chordnotes.hasDuration = true;
matthiasm@86	199 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@86	200 list.push_back(chordnotes);
matthiasm@86	201 m_outputChordnotes = index++;
matthiasm@86	202
Chris@23	203 OutputDescriptor d8;
mail@60	204 d8.identifier = "harmonicchange";
Chris@36	205 d8.name = "Harmonic Change Value";
matthiasm@58	206 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
matthiasm@17	207 d8.unit = "";
matthiasm@17	208 d8.hasFixedBinCount = true;
matthiasm@17	209 d8.binCount = 1;
mail@60	210 d8.hasKnownExtents = false;
matthiasm@17	211 d8.isQuantized = false;
matthiasm@17	212 d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@17	213 d8.hasDuration = false;
matthiasm@17	214 list.push_back(d8);
Chris@35	215 m_outputHarmonicChange = index++;
matthiasm@1	216
matthiasm@107	217 OutputDescriptor loglikelihood;
matthiasm@107	218 loglikelihood.identifier = "loglikelihood";
mail@126	219 loglikelihood.name = "Log-Likelihood of Chord Estimate";
mail@124	220 loglikelihood.description = "Logarithm of the likelihood value of the simple chord estimate.";
matthiasm@107	221 loglikelihood.unit = "";
matthiasm@107	222 loglikelihood.hasFixedBinCount = true;
matthiasm@107	223 loglikelihood.binCount = 1;
matthiasm@107	224 loglikelihood.hasKnownExtents = false;
matthiasm@107	225 loglikelihood.isQuantized = false;
matthiasm@107	226 loglikelihood.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@107	227 loglikelihood.hasDuration = false;
matthiasm@107	228 list.push_back(loglikelihood);
matthiasm@107	229 m_outputLoglikelihood = index++;
matthiasm@106	230
matthiasm@0	231 return list;
matthiasm@0	232 }
matthiasm@0	233
matthiasm@0	234 bool
Chris@35	235 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0	236 {
Chris@23	237 if (debug_on) {
Chris@23	238 cerr << "--> initialise";
Chris@23	239 }
mail@76	240
Chris@35	241 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
Chris@35	242 return false;
Chris@35	243 }
mail@115	244 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN, m_harte_syntax);
matthiasm@0	245 return true;
matthiasm@0	246 }
matthiasm@0	247
matthiasm@0	248 void
Chris@35	249 Chordino::reset()
matthiasm@0	250 {
Chris@23	251 if (debug_on) cerr << "--> reset";
Chris@35	252 NNLSBase::reset();
matthiasm@0	253 }
matthiasm@0	254
Chris@35	255 Chordino::FeatureSet
Chris@35	256 Chordino::process(const float const inputBuffers, Vamp::RealTime timestamp)
matthiasm@0	257 {
Chris@23	258 if (debug_on) cerr << "--> process" << endl;
matthiasm@0	259
Chris@35	260 NNLSBase::baseProcess(inputBuffers, timestamp);
matthiasm@0	261
Chris@35	262 return FeatureSet();
matthiasm@0	263 }
matthiasm@0	264
Chris@35	265 Chordino::FeatureSet
Chris@35	266 Chordino::getRemainingFeatures()
matthiasm@0	267 {
mail@89	268 // cerr << hw[0] << hw[1] << endl;
mail@89	269 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
Chris@23	270 FeatureSet fsOut;
Chris@35	271 if (m_logSpectrum.size() == 0) return fsOut;
Chris@23	272 int nChord = m_chordnames.size();
Chris@23	273 //
Chris@23	274 /** Calculate Tuning
Chris@23	275 calculate tuning from (using the angle of the complex number defined by the
Chris@23	276 cumulative mean real and imag values)
Chris@23	277 **/
mail@80	278 float meanTuningImag = 0;
mail@80	279 float meanTuningReal = 0;
mail@80	280 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
mail@80	281 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
mail@80	282 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
mail@80	283 }
Chris@23	284 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
Chris@23	285 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
Chris@23	286 int intShift = floor(normalisedtuning * 3);
mail@80	287 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
matthiasm@1	288
Chris@23	289 char buffer0 [50];
matthiasm@1	290
Chris@23	291 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1	292
matthiasm@1	293
Chris@23	294 /** Tune Log-Frequency Spectrogram
matthiasm@43	295 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
Chris@91	296 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
Chris@23	297 **/
Chris@35	298 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13	299
Chris@23	300 int count = 0;
matthiasm@1	301
Chris@35	302 FeatureList tunedSpec;
matthiasm@43	303 int nFrame = m_logSpectrum.size();
matthiasm@43	304
matthiasm@43	305 vector<Vamp::RealTime> timestamps;
Chris@35	306
Chris@35	307 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
Chris@91	308 Feature currentLogSpectrum = *i;
matthiasm@43	309 Feature currentTunedSpec; // tuned log-frequency spectrum
matthiasm@43	310 currentTunedSpec.hasTimestamp = true;
Chris@91	311 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
Chris@91	312 timestamps.push_back(currentLogSpectrum.timestamp);
matthiasm@43	313 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
matthiasm@1	314
Chris@23	315 if (m_tuneLocal) {
Chris@23	316 intShift = floor(m_localTuning[count] * 3);
mail@80	317 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
Chris@23	318 }
matthiasm@1	319
mail@80	320 // cerr << intShift << " " << floatShift << endl;
matthiasm@1	321
Chris@91	322 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
mail@115	323 float tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
matthiasm@43	324 currentTunedSpec.values.push_back(tempValue);
Chris@23	325 }
matthiasm@1	326
matthiasm@43	327 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
matthiasm@43	328 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
Chris@23	329 vector<float> runningstd;
mail@77	330 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
matthiasm@43	331 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
Chris@23	332 }
Chris@23	333 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
mail@77	334 for (int i = 0; i < nNote; i++) {
Chris@23	335 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
Chris@23	336 if (runningstd[i] > 0) {
matthiasm@43	337 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
matthiasm@43	338 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
matthiasm@43	339 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
matthiasm@43	340 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
Chris@23	341 }
matthiasm@43	342 if (currentTunedSpec.values[i] < 0) {
Chris@23	343 cerr << "ERROR: negative value in logfreq spectrum" << endl;
Chris@23	344 }
Chris@23	345 }
matthiasm@43	346 tunedSpec.push_back(currentTunedSpec);
Chris@23	347 count++;
Chris@23	348 }
Chris@23	349 cerr << "done." << endl;
matthiasm@1	350
Chris@23	351 /** Semitone spectrum and chromagrams
Chris@23	352 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
Chris@23	353 is inferred using a non-negative least squares algorithm.
Chris@23	354 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
Chris@23	355 bass and treble stacked onto each other).
Chris@23	356 **/
matthiasm@42	357 if (m_useNNLS == 0) {
Chris@35	358 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
Chris@23	359 } else {
Chris@35	360 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
Chris@23	361 }
matthiasm@13	362
matthiasm@1	363
matthiasm@43	364 vector<vector<double> > chordogram;
Chris@23	365 vector<vector<int> > scoreChordogram;
Chris@35	366 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
Chris@23	367 count = 0;
matthiasm@9	368
Chris@35	369 FeatureList chromaList;
matthiasm@43	370
matthiasm@43	371
Chris@35	372
Chris@35	373 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
matthiasm@43	374 Feature currentTunedSpec = *it; // logfreq spectrum
matthiasm@43	375 Feature currentChromas; // treble and bass chromagram
Chris@35	376
matthiasm@43	377 currentChromas.hasTimestamp = true;
matthiasm@43	378 currentChromas.timestamp = currentTunedSpec.timestamp;
Chris@35	379
mail@77	380 float b[nNote];
matthiasm@1	381
Chris@23	382 bool some_b_greater_zero = false;
Chris@23	383 float sumb = 0;
mail@77	384 for (int i = 0; i < nNote; i++) {
mail@77	385 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
matthiasm@43	386 b[i] = currentTunedSpec.values[i];
Chris@23	387 sumb += b[i];
Chris@23	388 if (b[i] > 0) {
Chris@23	389 some_b_greater_zero = true;
Chris@23	390 }
Chris@23	391 }
matthiasm@1	392
Chris@23	393 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1	394
Chris@23	395 vector<float> chroma = vector<float>(12, 0);
Chris@23	396 vector<float> basschroma = vector<float>(12, 0);
Chris@23	397 float currval;
Chris@91	398 int iSemitone = 0;
matthiasm@1	399
Chris@23	400 if (some_b_greater_zero) {
matthiasm@42	401 if (m_useNNLS == 0) {
Chris@91	402 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23	403 currval = 0;
mail@81	404 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81	405 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
mail@81	406 }
Chris@23	407 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
Chris@23	408 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
Chris@23	409 iSemitone++;
Chris@23	410 }
matthiasm@1	411
Chris@23	412 } else {
Chris@35	413 float x[84+1000];
Chris@23	414 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
Chris@23	415 vector<int> signifIndex;
Chris@23	416 int index=0;
Chris@23	417 sumb /= 84.0;
Chris@91	418 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
Chris@23	419 float currval = 0;
mail@81	420 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
mail@81	421 currval += b[iNote + iBPS];
mail@81	422 }
Chris@23	423 if (currval > 0) signifIndex.push_back(index);
Chris@23	424 index++;
Chris@23	425 }
Chris@35	426 float rnorm;
Chris@35	427 float w[84+1000];
Chris@35	428 float zz[84+1000];
Chris@23	429 int indx[84+1000];
Chris@23	430 int mode;
mail@77	431 int dictsize = nNote*signifIndex.size();
mail@81	432 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
Chris@35	433 float *curr_dict = new float[dictsize];
Chris@91	434 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@91	435 for (int iBin = 0; iBin < nNote; iBin++) {
mail@77	436 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
Chris@23	437 }
Chris@23	438 }
Chris@35	439 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
Chris@23	440 delete [] curr_dict;
Chris@91	441 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
Chris@23	442 // cerr << mode << endl;
Chris@23	443 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
Chris@23	444 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
Chris@23	445 }
Chris@23	446 }
Chris@23	447 }
Chris@35	448
Chris@35	449 vector<float> origchroma = chroma;
Chris@23	450 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@43	451 currentChromas.values = chroma;
Chris@35	452
Chris@23	453 if (m_doNormalizeChroma > 0) {
Chris@23	454 vector<float> chromanorm = vector<float>(3,0);
Chris@23	455 switch (int(m_doNormalizeChroma)) {
Chris@23	456 case 0: // should never end up here
Chris@23	457 break;
Chris@23	458 case 1:
Chris@35	459 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
Chris@35	460 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
Chris@23	461 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
Chris@23	462 break;
Chris@23	463 case 2:
Chris@35	464 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23	465 chromanorm[2] += *it;
Chris@23	466 }
Chris@23	467 break;
Chris@23	468 case 3:
Chris@35	469 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
Chris@23	470 chromanorm[2] += pow(*it,2);
Chris@23	471 }
Chris@23	472 chromanorm[2] = sqrt(chromanorm[2]);
Chris@23	473 break;
Chris@23	474 }
Chris@23	475 if (chromanorm[2] > 0) {
Chris@91	476 for (int i = 0; i < (int)chroma.size(); i++) {
matthiasm@43	477 currentChromas.values[i] /= chromanorm[2];
Chris@23	478 }
Chris@23	479 }
Chris@23	480 }
Chris@35	481
matthiasm@43	482 chromaList.push_back(currentChromas);
Chris@35	483
Chris@23	484 // local chord estimation
matthiasm@43	485 vector<double> currentChordSalience;
matthiasm@43	486 double tempchordvalue = 0;
matthiasm@43	487 double sumchordvalue = 0;
matthiasm@9	488
Chris@23	489 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23	490 tempchordvalue = 0;
Chris@23	491 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@44	492 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23	493 }
Chris@23	494 for (int iBin = 12; iBin < 24; iBin++) {
Chris@23	495 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
Chris@23	496 }
matthiasm@48	497 if (iChord == nChord-1) tempchordvalue *= .7;
matthiasm@48	498 if (tempchordvalue < 0) tempchordvalue = 0.0;
matthiasm@50	499 tempchordvalue = pow(1.3,tempchordvalue);
Chris@23	500 sumchordvalue+=tempchordvalue;
Chris@23	501 currentChordSalience.push_back(tempchordvalue);
Chris@23	502 }
Chris@23	503 if (sumchordvalue > 0) {
Chris@23	504 for (int iChord = 0; iChord < nChord; iChord++) {
Chris@23	505 currentChordSalience[iChord] /= sumchordvalue;
Chris@23	506 }
Chris@23	507 } else {
Chris@23	508 currentChordSalience[nChord-1] = 1.0;
Chris@23	509 }
Chris@23	510 chordogram.push_back(currentChordSalience);
matthiasm@1	511
Chris@23	512 count++;
Chris@23	513 }
Chris@23	514 cerr << "done." << endl;
matthiasm@13	515
matthiasm@86	516 vector<Feature> oldnotes;
matthiasm@10	517
matthiasm@131	518 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
matthiasm@131	519 int oldchord = nChord-1;
matthiasm@131	520 double selftransprob = 0.99;
matthiasm@131	521
matthiasm@131	522 // vector<double> init = vector<double>(nChord,1.0/nChord);
matthiasm@131	523 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
matthiasm@131	524
matthiasm@131	525 double *delta;
matthiasm@131	526 delta = (double )malloc(sizeof(double)nFrame*nChord);
matthiasm@131	527
matthiasm@131	528 vector<vector<double> > trans;
matthiasm@131	529 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@131	530 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
matthiasm@131	531 temp[iChord] = selftransprob;
matthiasm@131	532 trans.push_back(temp);
matthiasm@131	533 }
matthiasm@131	534 vector<double> scale;
matthiasm@131	535 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta, &scale);
matthiasm@131	536
matthiasm@48	537
matthiasm@131	538 Feature chord_feature; // chord estimate
matthiasm@131	539 chord_feature.hasTimestamp = true;
matthiasm@131	540 chord_feature.timestamp = timestamps[0];
matthiasm@131	541 chord_feature.label = m_chordnames[chordpath[0]];
matthiasm@131	542 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@131	543
matthiasm@131	544 chordchange[0] = 0;
matthiasm@131	545 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
matthiasm@131	546 // cerr << chordpath[iFrame] << endl;
matthiasm@131	547 if (chordpath[iFrame] != oldchord ) {
matthiasm@131	548 // chord
matthiasm@131	549 Feature chord_feature; // chord estimate
matthiasm@131	550 chord_feature.hasTimestamp = true;
matthiasm@131	551 chord_feature.timestamp = timestamps[iFrame];
matthiasm@131	552 chord_feature.label = m_chordnames[chordpath[iFrame]];
matthiasm@131	553 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@131	554 oldchord = chordpath[iFrame];
matthiasm@131	555 // chord notes
matthiasm@131	556 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@131	557 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
matthiasm@131	558 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
Chris@23	559 }
matthiasm@131	560 oldnotes.clear();
matthiasm@131	561 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
matthiasm@131	562 Feature chordnote_feature;
matthiasm@131	563 chordnote_feature.hasTimestamp = true;
matthiasm@131	564 chordnote_feature.timestamp = timestamps[iFrame];
matthiasm@131	565 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
matthiasm@131	566 chordnote_feature.hasDuration = true;
matthiasm@131	567 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
matthiasm@131	568 oldnotes.push_back(chordnote_feature);
matthiasm@50	569 }
Chris@23	570 }
matthiasm@131	571 /* calculating simple chord change prob */
matthiasm@131	572 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@131	573 chordchange[iFrame-1] += delta[(iFrame-1)nChord + iChord] log(delta[(iFrame-1)nChord + iChord]/delta[iFramenChord + iChord]);
Chris@23	574 }
Chris@23	575 }
matthiasm@131	576
matthiasm@131	577 float logscale = 0;
matthiasm@131	578 for (int iFrame = 0; iFrame < nFrame; ++iFrame) {
matthiasm@131	579 logscale -= log(scale[iFrame]);
matthiasm@131	580 Feature loglikelihood;
matthiasm@131	581 loglikelihood.hasTimestamp = true;
matthiasm@131	582 loglikelihood.timestamp = timestamps[iFrame];
matthiasm@131	583 loglikelihood.values.push_back(-log(scale[iFrame]));
matthiasm@131	584 // cerr << chordchange[iFrame] << endl;
matthiasm@131	585 fsOut[m_outputLoglikelihood].push_back(loglikelihood);
matthiasm@131	586 }
matthiasm@131	587 logscale /= nFrame;
matthiasm@131	588
matthiasm@43	589 chord_feature.hasTimestamp = true;
matthiasm@43	590 chord_feature.timestamp = timestamps[timestamps.size()-1];
matthiasm@43	591 chord_feature.label = "N";
mail@60	592 fsOut[m_outputChords].push_back(chord_feature);
matthiasm@86	593
Chris@91	594 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
matthiasm@86	595 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
matthiasm@86	596 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
matthiasm@86	597 }
matthiasm@86	598
Chris@23	599 cerr << "done." << endl;
matthiasm@50	600
matthiasm@50	601 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
matthiasm@50	602 Feature chordchange_feature;
matthiasm@50	603 chordchange_feature.hasTimestamp = true;
matthiasm@50	604 chordchange_feature.timestamp = timestamps[iFrame];
matthiasm@50	605 chordchange_feature.values.push_back(chordchange[iFrame]);
mail@60	606 // cerr << chordchange[iFrame] << endl;
mail@60	607 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
matthiasm@50	608 }
matthiasm@50	609
mail@60	610 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
matthiasm@50	611
matthiasm@50	612
Chris@23	613 return fsOut;
matthiasm@0	614 }

Mercurial > hg > nnls-chroma

annotate Chordino.cpp @ 133:5c1a25b3daf0 darwintunes