nnls-chroma: NNLSChroma.cpp annotate

annotate NNLSChroma.cpp @ 4:266d23a41cdc matthiasm-plugin

tested almost finished plugin, chord est with mode filter

author	matthiasm
date	Tue, 01 Jun 2010 09:41:31 +0000
parents	8360483a026e
children	84db8ce38fd3

rev	line source
matthiasm@0	1
matthiasm@0	2 #include "NNLSChroma.h"
matthiasm@0	3 #include <cmath>
matthiasm@0	4 #include <list>
matthiasm@0	5 #include <iostream>
matthiasm@3	6 #include <fstream>
matthiasm@0	7 #include <sstream>
matthiasm@0	8 #include <cassert>
matthiasm@0	9 #include <cstdio>
matthiasm@1	10 #include "nnls.h"
matthiasm@0	11 // #include "cblas.h"
matthiasm@0	12 #include "chorddict.cpp"
matthiasm@0	13 using namespace std;
matthiasm@0	14
matthiasm@0	15 const float sinvalue = 0.866025404;
matthiasm@0	16 const float cosvalue = -0.5;
matthiasm@0	17 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
matthiasm@0	18 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
matthiasm@0	19 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
matthiasm@0	20 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
matthiasm@0	21 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
matthiasm@0	22 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0	23 const int nNote = 256;
matthiasm@0	24
matthiasm@0	25 /** Special Convolution
matthiasm@0	26 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
matthiasm@0	27 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
matthiasm@0	28 as the first (last) valid convolution bin.
matthiasm@0	29 **/
matthiasm@0	30
matthiasm@0	31 const bool debug_on = false;
matthiasm@0	32
matthiasm@0	33 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
matthiasm@0	34 {
matthiasm@0	35 float s;
matthiasm@0	36 int m, n;
matthiasm@0	37 int lenConvolvee = convolvee.size();
matthiasm@0	38 int lenKernel = kernel.size();
matthiasm@0	39
matthiasm@0	40 vector<float> Z(256,0);
matthiasm@0	41 assert(lenKernel % 2 != 0); // no exception handling !!!
matthiasm@0	42
matthiasm@0	43 for (n = lenKernel - 1; n < lenConvolvee; n++) {
matthiasm@0	44 s=0.0;
matthiasm@0	45 for (m = 0; m < lenKernel; m++) {
matthiasm@0	46 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
matthiasm@0	47 s += convolvee[n-m] * kernel[m];
matthiasm@0	48 // if (debug_on) cerr << "--> s = " << s << '\n';
matthiasm@0	49 }
matthiasm@0	50 // cerr << n - lenKernel/2 << endl;
matthiasm@0	51 Z[n -lenKernel/2] = s;
matthiasm@0	52 }
matthiasm@0	53
matthiasm@0	54 // fill upper and lower pads
matthiasm@0	55 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
matthiasm@0	56 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
matthiasm@0	57 Z[lenConvolvee - lenKernel/2 - 1];
matthiasm@0	58 return Z;
matthiasm@0	59 }
matthiasm@0	60
matthiasm@0	61 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
matthiasm@0	62 // {
matthiasm@0	63 // vector<float> freq(binnumbers.size, 0.0);
matthiasm@0	64 // for (unsigned i = 0; i < binnumbers.size; ++i) {
matthiasm@0	65 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
matthiasm@0	66 // }
matthiasm@0	67 // return freq;
matthiasm@0	68 // }
matthiasm@0	69
matthiasm@0	70 float cospuls(float x, float centre, float width)
matthiasm@0	71 {
matthiasm@0	72 float recipwidth = 1.0/width;
matthiasm@0	73 if (abs(x - centre) <= 0.5 * width) {
matthiasm@0	74 return cos((x-centre)2M_PIrecipwidth).5+.5;
matthiasm@0	75 }
matthiasm@0	76 return 0.0;
matthiasm@0	77 }
matthiasm@0	78
matthiasm@0	79 float pitchCospuls(float x, float centre, int binsperoctave)
matthiasm@0	80 {
matthiasm@0	81 float warpedf = -binsperoctave * (log2(centre) - log2(x));
matthiasm@0	82 float out = cospuls(warpedf, 0.0, 2.0);
matthiasm@0	83 // now scale to correct for note density
matthiasm@0	84 float c = log(2.0)/binsperoctave;
matthiasm@0	85 if (x > 0) {
matthiasm@0	86 out = out / (c * x);
matthiasm@0	87 } else {
matthiasm@0	88 out = 0;
matthiasm@0	89 }
matthiasm@0	90 return out;
matthiasm@0	91 }
matthiasm@0	92
matthiasm@0	93 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
matthiasm@0	94
matthiasm@0	95 int binspersemitone = 3; // this must be 3
matthiasm@0	96 int minoctave = 0; // this must be 0
matthiasm@0	97 int maxoctave = 7; // this must be 7
matthiasm@1	98 int oversampling = 80;
matthiasm@0	99
matthiasm@0	100 // linear frequency vector
matthiasm@0	101 vector<float> fft_f;
matthiasm@0	102 for (int i = 0; i < blocksize/2; ++i) {
matthiasm@0	103 fft_f.push_back(i * (fs * 1.0 / blocksize));
matthiasm@0	104 }
matthiasm@0	105 float fft_width = fs * 2.0 / blocksize;
matthiasm@0	106
matthiasm@0	107 // linear oversampled frequency vector
matthiasm@0	108 vector<float> oversampled_f;
matthiasm@0	109 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
matthiasm@0	110 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
matthiasm@0	111 }
matthiasm@0	112
matthiasm@0	113 // pitch-spaced frequency vector
matthiasm@0	114 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
matthiasm@0	115 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
matthiasm@0	116 vector<float> cq_f;
matthiasm@0	117 float oob = 1.0/binspersemitone; // one over binspersemitone
matthiasm@0	118 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
matthiasm@0	119 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
matthiasm@0	120 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
matthiasm@0	121 for (int k = -1; k < 2; ++k) {
matthiasm@0	122 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
matthiasm@0	123 }
matthiasm@0	124 }
matthiasm@0	125 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
matthiasm@0	126 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
matthiasm@0	127
matthiasm@0	128 int nFFT = fft_f.size();
matthiasm@0	129
matthiasm@0	130 vector<float> fft_activation;
matthiasm@0	131 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
matthiasm@0	132 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
matthiasm@0	133 fft_activation.push_back(cosp);
matthiasm@0	134 // cerr << cosp << endl;
matthiasm@0	135 }
matthiasm@0	136
matthiasm@0	137 float cq_activation;
matthiasm@0	138 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
matthiasm@0	139 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
matthiasm@0	140 int curr_start = oversampling * iFFT - oversampling;
matthiasm@0	141 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
matthiasm@0	142 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
matthiasm@0	143 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
matthiasm@0	144 outmatrix[iFFT + nFFT * iCQ] = 0;
matthiasm@1	145 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
matthiasm@0	146 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
matthiasm@0	147 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
matthiasm@0	148 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
matthiasm@0	149 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
matthiasm@0	150 }
matthiasm@0	151 // if (iCQ == 1 \|\| iCQ == 2) {
matthiasm@0	152 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
matthiasm@0	153 // }
matthiasm@0	154 }
matthiasm@0	155 }
matthiasm@0	156 }
matthiasm@0	157 return true;
matthiasm@0	158 }
matthiasm@0	159
matthiasm@3	160 bool dictionaryMatrix(float* dm) {
matthiasm@1	161 int binspersemitone = 3; // this must be 3
matthiasm@1	162 int minoctave = 0; // this must be 0
matthiasm@1	163 int maxoctave = 7; // this must be 7
matthiasm@4	164 float s_param = 0.7;
matthiasm@1	165
matthiasm@1	166 // pitch-spaced frequency vector
matthiasm@1	167 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
matthiasm@1	168 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
matthiasm@1	169 vector<float> cq_f;
matthiasm@1	170 float oob = 1.0/binspersemitone; // one over binspersemitone
matthiasm@1	171 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
matthiasm@1	172 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
matthiasm@1	173 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
matthiasm@1	174 for (int k = -1; k < 2; ++k) {
matthiasm@1	175 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
matthiasm@1	176 }
matthiasm@1	177 }
matthiasm@1	178 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
matthiasm@1	179 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
matthiasm@1	180
matthiasm@1	181 float curr_f;
matthiasm@1	182 float floatbin;
matthiasm@1	183 float curr_amp;
matthiasm@1	184 // now for every combination calculate the matrix element
matthiasm@1	185 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
matthiasm@3	186 // cerr << iOut << endl;
matthiasm@1	187 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
matthiasm@1	188 curr_f = 440 * pow(2,(minMIDI-69+iOut)1.0/12) iHarm;
matthiasm@3	189 // if (curr_f > cq_f[nNote-1]) break;
matthiasm@3	190 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
matthiasm@3	191 // cerr << floatbin << endl;
matthiasm@1	192 curr_amp = pow(s_param,float(iHarm-1));
matthiasm@3	193 // cerr << "curramp" << curr_amp << endl;
matthiasm@1	194 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
matthiasm@3	195 if (abs(iNote+1.0-floatbin)<2) {
matthiasm@3	196 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
matthiasm@3	197 // dm[iNote + nNote * iOut] += 1 * curr_amp;
matthiasm@3	198 }
matthiasm@1	199 }
matthiasm@3	200 }
matthiasm@1	201 }
matthiasm@3	202
matthiasm@3	203
matthiasm@1	204 }
matthiasm@1	205
matthiasm@0	206
matthiasm@0	207 NNLSChroma::NNLSChroma(float inputSampleRate) :
matthiasm@0	208 Plugin(inputSampleRate),
matthiasm@0	209 m_fl(0),
matthiasm@0	210 m_blockSize(0),
matthiasm@0	211 m_stepSize(0),
matthiasm@0	212 m_lengthOfNoteIndex(0),
matthiasm@0	213 m_meanTuning0(0),
matthiasm@0	214 m_meanTuning1(0),
matthiasm@0	215 m_meanTuning2(0),
matthiasm@0	216 m_localTuning0(0),
matthiasm@0	217 m_localTuning1(0),
matthiasm@0	218 m_localTuning2(0),
matthiasm@4	219 m_paling(1.0),
matthiasm@3	220 m_preset(0.0),
matthiasm@0	221 m_localTuning(0),
matthiasm@0	222 m_kernelValue(0),
matthiasm@0	223 m_kernelFftIndex(0),
matthiasm@0	224 m_kernelNoteIndex(0),
matthiasm@1	225 m_dict(0),
matthiasm@0	226 m_tuneLocal(false),
matthiasm@0	227 m_dictID(0)
matthiasm@0	228 {
matthiasm@0	229 if (debug_on) cerr << "--> NNLSChroma" << endl;
matthiasm@3	230 m_dict = new float[nNote * 84];
matthiasm@3	231 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
matthiasm@1	232 dictionaryMatrix(m_dict);
matthiasm@0	233 }
matthiasm@0	234
matthiasm@0	235
matthiasm@0	236 NNLSChroma::~NNLSChroma()
matthiasm@0	237 {
matthiasm@0	238 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
matthiasm@1	239 delete [] m_dict;
matthiasm@0	240 }
matthiasm@0	241
matthiasm@0	242 string
matthiasm@0	243 NNLSChroma::getIdentifier() const
matthiasm@0	244 {
matthiasm@0	245 if (debug_on) cerr << "--> getIdentifier" << endl;
matthiasm@0	246 return "nnls_chroma";
matthiasm@0	247 }
matthiasm@0	248
matthiasm@0	249 string
matthiasm@0	250 NNLSChroma::getName() const
matthiasm@0	251 {
matthiasm@0	252 if (debug_on) cerr << "--> getName" << endl;
matthiasm@0	253 return "NNLS Chroma";
matthiasm@0	254 }
matthiasm@0	255
matthiasm@0	256 string
matthiasm@0	257 NNLSChroma::getDescription() const
matthiasm@0	258 {
matthiasm@0	259 // Return something helpful here!
matthiasm@0	260 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@4	261 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum (LAS) of the DFT: the LAS itself, a standard-tuned version thereof (the local and global tuning estimates can are also be output), an approximate transcription to semitone activation using non-linear least squares (NNLS). Furthermore chroma features and a simple chord estimate derived from this NNLS semitone transcription.";
matthiasm@0	262 }
matthiasm@0	263
matthiasm@0	264 string
matthiasm@0	265 NNLSChroma::getMaker() const
matthiasm@0	266 {
matthiasm@0	267 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0	268 // Your name here
matthiasm@0	269 return "Matthias Mauch";
matthiasm@0	270 }
matthiasm@0	271
matthiasm@0	272 int
matthiasm@0	273 NNLSChroma::getPluginVersion() const
matthiasm@0	274 {
matthiasm@0	275 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0	276 // Increment this each time you release a version that behaves
matthiasm@0	277 // differently from the previous one
matthiasm@0	278 return 1;
matthiasm@0	279 }
matthiasm@0	280
matthiasm@0	281 string
matthiasm@0	282 NNLSChroma::getCopyright() const
matthiasm@0	283 {
matthiasm@0	284 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0	285 // This function is not ideally named. It does not necessarily
matthiasm@0	286 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0	287 // should indicate the terms under which it is distributed. For
matthiasm@0	288 // example, "Copyright (year). All Rights Reserved", or "GPL"
matthiasm@0	289 return "Copyright (2010). All rights reserved.";
matthiasm@0	290 }
matthiasm@0	291
matthiasm@0	292 NNLSChroma::InputDomain
matthiasm@0	293 NNLSChroma::getInputDomain() const
matthiasm@0	294 {
matthiasm@0	295 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0	296 return FrequencyDomain;
matthiasm@0	297 }
matthiasm@0	298
matthiasm@0	299 size_t
matthiasm@0	300 NNLSChroma::getPreferredBlockSize() const
matthiasm@0	301 {
matthiasm@0	302 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0	303 return 16384; // 0 means "I can handle any block size"
matthiasm@0	304 }
matthiasm@0	305
matthiasm@0	306 size_t
matthiasm@0	307 NNLSChroma::getPreferredStepSize() const
matthiasm@0	308 {
matthiasm@0	309 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0	310 return 2048; // 0 means "anything sensible"; in practice this
matthiasm@0	311 // means the same as the block size for TimeDomain
matthiasm@0	312 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0	313 }
matthiasm@0	314
matthiasm@0	315 size_t
matthiasm@0	316 NNLSChroma::getMinChannelCount() const
matthiasm@0	317 {
matthiasm@0	318 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0	319 return 1;
matthiasm@0	320 }
matthiasm@0	321
matthiasm@0	322 size_t
matthiasm@0	323 NNLSChroma::getMaxChannelCount() const
matthiasm@0	324 {
matthiasm@0	325 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0	326 return 1;
matthiasm@0	327 }
matthiasm@0	328
matthiasm@0	329 NNLSChroma::ParameterList
matthiasm@0	330 NNLSChroma::getParameterDescriptors() const
matthiasm@0	331 {
matthiasm@0	332 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0	333 ParameterList list;
matthiasm@0	334
matthiasm@3	335 ParameterDescriptor d3;
matthiasm@3	336 d3.identifier = "preset";
matthiasm@3	337 d3.name = "preset";
matthiasm@3	338 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
matthiasm@3	339 d3.unit = "";
matthiasm@3	340 d3.isQuantized = true;
matthiasm@3	341 d3.quantizeStep = 1;
matthiasm@3	342 d3.minValue = 0.0;
matthiasm@4	343 d3.maxValue = 3.0;
matthiasm@3	344 d3.defaultValue = 0.0;
matthiasm@3	345 d3.valueNames.push_back("polyphonic pop");
matthiasm@3	346 d3.valueNames.push_back("polyphonic pop (fast)");
matthiasm@3	347 d3.valueNames.push_back("solo keyboard");
matthiasm@3	348 d3.valueNames.push_back("manual");
matthiasm@3	349 list.push_back(d3);
matthiasm@4	350
matthiasm@4	351 // ParameterDescriptor d0;
matthiasm@4	352 // d0.identifier = "notedict";
matthiasm@4	353 // d0.name = "note dictionary";
matthiasm@4	354 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
matthiasm@4	355 // d0.unit = "";
matthiasm@4	356 // d0.minValue = 0;
matthiasm@4	357 // d0.maxValue = 1;
matthiasm@4	358 // d0.defaultValue = 0;
matthiasm@4	359 // d0.isQuantized = true;
matthiasm@4	360 // d0.valueNames.push_back("s = 0.6");
matthiasm@4	361 // d0.valueNames.push_back("no NNLS");
matthiasm@4	362 // d0.quantizeStep = 1.0;
matthiasm@4	363 // list.push_back(d0);
matthiasm@4	364
matthiasm@4	365 ParameterDescriptor d1;
matthiasm@4	366 d1.identifier = "tuningmode";
matthiasm@4	367 d1.name = "tuning mode";
matthiasm@4	368 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@4	369 d1.unit = "";
matthiasm@4	370 d1.minValue = 0;
matthiasm@4	371 d1.maxValue = 1;
matthiasm@4	372 d1.defaultValue = 0;
matthiasm@4	373 d1.isQuantized = true;
matthiasm@4	374 d1.valueNames.push_back("global tuning");
matthiasm@4	375 d1.valueNames.push_back("local tuning");
matthiasm@4	376 d1.quantizeStep = 1.0;
matthiasm@4	377 list.push_back(d1);
matthiasm@4	378
matthiasm@4	379 // ParameterDescriptor d2;
matthiasm@4	380 // d2.identifier = "paling";
matthiasm@4	381 // d2.name = "spectral paling";
matthiasm@4	382 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
matthiasm@4	383 // d2.unit = "";
matthiasm@4	384 // d2.isQuantized = true;
matthiasm@4	385 // // d2.quantizeStep = 0.1;
matthiasm@4	386 // d2.minValue = 0.0;
matthiasm@4	387 // d2.maxValue = 1.0;
matthiasm@4	388 // d2.defaultValue = 1.0;
matthiasm@4	389 // d2.isQuantized = false;
matthiasm@4	390 // list.push_back(d2);
matthiasm@4	391
matthiasm@0	392 return list;
matthiasm@0	393 }
matthiasm@0	394
matthiasm@0	395 float
matthiasm@0	396 NNLSChroma::getParameter(string identifier) const
matthiasm@0	397 {
matthiasm@3	398 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@0	399 if (identifier == "notedict") {
matthiasm@0	400 return m_dictID;
matthiasm@0	401 }
matthiasm@0	402
matthiasm@0	403 if (identifier == "paling") {
matthiasm@0	404 return m_paling;
matthiasm@0	405 }
matthiasm@0	406
matthiasm@0	407 if (identifier == "tuningmode") {
matthiasm@0	408 if (m_tuneLocal) {
matthiasm@0	409 return 1.0;
matthiasm@0	410 } else {
matthiasm@0	411 return 0.0;
matthiasm@0	412 }
matthiasm@0	413 }
matthiasm@3	414 if (identifier == "preset") {
matthiasm@3	415 return m_preset;
matthiasm@3	416 }
matthiasm@0	417 return 0;
matthiasm@0	418
matthiasm@0	419 }
matthiasm@0	420
matthiasm@0	421 void
matthiasm@0	422 NNLSChroma::setParameter(string identifier, float value)
matthiasm@0	423 {
matthiasm@3	424 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@0	425 if (identifier == "notedict") {
matthiasm@0	426 m_dictID = (int) value;
matthiasm@0	427 }
matthiasm@0	428
matthiasm@0	429 if (identifier == "paling") {
matthiasm@0	430 m_paling = value;
matthiasm@0	431 }
matthiasm@0	432
matthiasm@0	433 if (identifier == "tuningmode") {
matthiasm@0	434 m_tuneLocal = (value > 0) ? true : false;
matthiasm@0	435 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0	436 }
matthiasm@3	437 if (identifier == "preset") {
matthiasm@3	438 m_preset = value;
matthiasm@3	439 if (m_preset == 0.0) {
matthiasm@3	440 m_tuneLocal = false;
matthiasm@3	441 m_paling = 1.0;
matthiasm@3	442 m_dictID = 0.0;
matthiasm@3	443 }
matthiasm@3	444 if (m_preset == 1.0) {
matthiasm@3	445 m_tuneLocal = false;
matthiasm@3	446 m_paling = 1.0;
matthiasm@3	447 m_dictID = 1.0;
matthiasm@3	448 }
matthiasm@3	449 if (m_preset == 2.0) {
matthiasm@3	450 m_tuneLocal = false;
matthiasm@3	451 m_paling = 0.7;
matthiasm@3	452 m_dictID = 0.0;
matthiasm@3	453 }
matthiasm@3	454 }
matthiasm@0	455 }
matthiasm@0	456
matthiasm@0	457 NNLSChroma::ProgramList
matthiasm@0	458 NNLSChroma::getPrograms() const
matthiasm@0	459 {
matthiasm@0	460 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0	461 ProgramList list;
matthiasm@0	462
matthiasm@0	463 // If you have no programs, return an empty list (or simply don't
matthiasm@0	464 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0	465
matthiasm@0	466 return list;
matthiasm@0	467 }
matthiasm@0	468
matthiasm@0	469 string
matthiasm@0	470 NNLSChroma::getCurrentProgram() const
matthiasm@0	471 {
matthiasm@0	472 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0	473 return ""; // no programs
matthiasm@0	474 }
matthiasm@0	475
matthiasm@0	476 void
matthiasm@0	477 NNLSChroma::selectProgram(string name)
matthiasm@0	478 {
matthiasm@0	479 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0	480 }
matthiasm@0	481
matthiasm@0	482
matthiasm@0	483 NNLSChroma::OutputList
matthiasm@0	484 NNLSChroma::getOutputDescriptors() const
matthiasm@0	485 {
matthiasm@0	486 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0	487 OutputList list;
matthiasm@0	488
matthiasm@0	489 // Make chroma names for the binNames property
matthiasm@0	490 vector<string> chromanames;
matthiasm@0	491 vector<string> bothchromanames;
matthiasm@0	492 for (int iNote = 0; iNote < 24; iNote++) {
matthiasm@0	493 bothchromanames.push_back(notenames[iNote]);
matthiasm@0	494 if (iNote < 12) {
matthiasm@0	495 chromanames.push_back(notenames[iNote]);
matthiasm@0	496 }
matthiasm@0	497 }
matthiasm@0	498
matthiasm@1	499 // int nNote = 84;
matthiasm@0	500
matthiasm@0	501 // See OutputDescriptor documentation for the possibilities here.
matthiasm@0	502 // Every plugin must have at least one output.
matthiasm@0	503
matthiasm@0	504 OutputDescriptor d0;
matthiasm@0	505 d0.identifier = "tuning";
matthiasm@0	506 d0.name = "Tuning";
matthiasm@0	507 d0.description = "The concert pitch.";
matthiasm@0	508 d0.unit = "Hz";
matthiasm@0	509 d0.hasFixedBinCount = true;
matthiasm@0	510 d0.binCount = 0;
matthiasm@0	511 d0.hasKnownExtents = true;
matthiasm@0	512 d0.minValue = 427.47;
matthiasm@0	513 d0.maxValue = 452.89;
matthiasm@0	514 d0.isQuantized = false;
matthiasm@0	515 d0.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0	516 d0.hasDuration = false;
matthiasm@0	517 list.push_back(d0);
matthiasm@0	518
matthiasm@0	519 OutputDescriptor d1;
matthiasm@0	520 d1.identifier = "logfreqspec";
matthiasm@0	521 d1.name = "Log-Frequency Spectrum";
matthiasm@0	522 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
matthiasm@0	523 d1.unit = "";
matthiasm@0	524 d1.hasFixedBinCount = true;
matthiasm@0	525 d1.binCount = nNote;
matthiasm@0	526 d1.hasKnownExtents = false;
matthiasm@0	527 d1.isQuantized = false;
matthiasm@0	528 d1.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0	529 d1.hasDuration = false;
matthiasm@0	530 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	531 list.push_back(d1);
matthiasm@0	532
matthiasm@0	533 OutputDescriptor d2;
matthiasm@0	534 d2.identifier = "tunedlogfreqspec";
matthiasm@0	535 d2.name = "Tuned Log-Frequency Spectrum";
matthiasm@0	536 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
matthiasm@0	537 d2.unit = "";
matthiasm@0	538 d2.hasFixedBinCount = true;
matthiasm@0	539 d2.binCount = 256;
matthiasm@0	540 d2.hasKnownExtents = false;
matthiasm@0	541 d2.isQuantized = false;
matthiasm@0	542 d2.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0	543 d2.hasDuration = false;
matthiasm@0	544 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	545 list.push_back(d2);
matthiasm@0	546
matthiasm@0	547 OutputDescriptor d3;
matthiasm@0	548 d3.identifier = "semitonespectrum";
matthiasm@0	549 d3.name = "Semitone Spectrum";
matthiasm@0	550 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
matthiasm@0	551 d3.unit = "";
matthiasm@0	552 d3.hasFixedBinCount = true;
matthiasm@0	553 d3.binCount = 84;
matthiasm@0	554 d3.hasKnownExtents = false;
matthiasm@0	555 d3.isQuantized = false;
matthiasm@0	556 d3.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0	557 d3.hasDuration = false;
matthiasm@0	558 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	559 list.push_back(d3);
matthiasm@0	560
matthiasm@0	561 OutputDescriptor d4;
matthiasm@0	562 d4.identifier = "chroma";
matthiasm@0	563 d4.name = "Chromagram";
matthiasm@0	564 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
matthiasm@0	565 d4.unit = "";
matthiasm@0	566 d4.hasFixedBinCount = true;
matthiasm@0	567 d4.binCount = 12;
matthiasm@0	568 d4.binNames = chromanames;
matthiasm@0	569 d4.hasKnownExtents = false;
matthiasm@0	570 d4.isQuantized = false;
matthiasm@0	571 d4.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0	572 d4.hasDuration = false;
matthiasm@0	573 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	574 list.push_back(d4);
matthiasm@0	575
matthiasm@0	576 OutputDescriptor d5;
matthiasm@0	577 d5.identifier = "basschroma";
matthiasm@0	578 d5.name = "Bass Chromagram";
matthiasm@0	579 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
matthiasm@0	580 d5.unit = "";
matthiasm@0	581 d5.hasFixedBinCount = true;
matthiasm@0	582 d5.binCount = 12;
matthiasm@0	583 d5.binNames = chromanames;
matthiasm@0	584 d5.hasKnownExtents = false;
matthiasm@0	585 d5.isQuantized = false;
matthiasm@0	586 d5.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0	587 d5.hasDuration = false;
matthiasm@0	588 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	589 list.push_back(d5);
matthiasm@0	590
matthiasm@0	591 OutputDescriptor d6;
matthiasm@0	592 d6.identifier = "bothchroma";
matthiasm@0	593 d6.name = "Chromagram and Bass Chromagram";
matthiasm@0	594 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
matthiasm@0	595 d6.unit = "";
matthiasm@0	596 d6.hasFixedBinCount = true;
matthiasm@0	597 d6.binCount = 24;
matthiasm@0	598 d6.binNames = bothchromanames;
matthiasm@0	599 d6.hasKnownExtents = false;
matthiasm@0	600 d6.isQuantized = false;
matthiasm@0	601 d6.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0	602 d6.hasDuration = false;
matthiasm@0	603 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	604 list.push_back(d6);
matthiasm@0	605
matthiasm@0	606 OutputDescriptor d7;
matthiasm@0	607 d7.identifier = "simplechord";
matthiasm@0	608 d7.name = "Simple Chord Estimate";
matthiasm@0	609 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
matthiasm@0	610 d7.unit = "";
matthiasm@0	611 d7.hasFixedBinCount = true;
matthiasm@0	612 d7.binCount = 0;
matthiasm@0	613 d7.hasKnownExtents = false;
matthiasm@0	614 d7.isQuantized = false;
matthiasm@0	615 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0	616 d7.hasDuration = false;
matthiasm@0	617 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	618 list.push_back(d7);
matthiasm@0	619
matthiasm@1	620 // OutputDescriptor d8;
matthiasm@1	621 // d8.identifier = "inconsistency";
matthiasm@1	622 // d8.name = "Harmonic inconsistency value";
matthiasm@1	623 // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high.";
matthiasm@1	624 // d8.unit = "";
matthiasm@1	625 // d8.hasFixedBinCount = true;
matthiasm@1	626 // d8.binCount = 1;
matthiasm@1	627 // d8.hasKnownExtents = false;
matthiasm@1	628 // d8.isQuantized = false;
matthiasm@1	629 // d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@1	630 // d8.hasDuration = false;
matthiasm@1	631 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1	632 // list.push_back(d8);
matthiasm@1	633 //
matthiasm@1	634 // OutputDescriptor d9;
matthiasm@1	635 // d9.identifier = "inconsistencysegment";
matthiasm@1	636 // d9.name = "Harmonic inconsistency segmenter";
matthiasm@1	637 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
matthiasm@1	638 // d9.unit = "";
matthiasm@1	639 // d9.hasFixedBinCount = true;
matthiasm@1	640 // d9.binCount = 0;
matthiasm@1	641 // d9.hasKnownExtents = true;
matthiasm@1	642 // d9.minValue = 0.1;
matthiasm@1	643 // d9.maxValue = 0.9;
matthiasm@1	644 // d9.isQuantized = false;
matthiasm@1	645 // d9.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@1	646 // d9.hasDuration = false;
matthiasm@1	647 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1	648 // list.push_back(d9);
matthiasm@1	649 //
matthiasm@1	650 OutputDescriptor d10;
matthiasm@1	651 d10.identifier = "localtuning";
matthiasm@1	652 d10.name = "Local tuning";
matthiasm@4	653 d10.description = "Tuning based on the history up to this timestamp.";
matthiasm@1	654 d10.unit = "Hz";
matthiasm@1	655 d10.hasFixedBinCount = true;
matthiasm@1	656 d10.binCount = 1;
matthiasm@1	657 d10.hasKnownExtents = true;
matthiasm@1	658 d10.minValue = 427.47;
matthiasm@1	659 d10.maxValue = 452.89;
matthiasm@1	660 d10.isQuantized = false;
matthiasm@3	661 d10.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@1	662 d10.hasDuration = false;
matthiasm@3	663 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1	664 list.push_back(d10);
matthiasm@1	665
matthiasm@0	666 return list;
matthiasm@0	667 }
matthiasm@0	668
matthiasm@0	669
matthiasm@0	670 bool
matthiasm@0	671 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0	672 {
matthiasm@1	673 if (debug_on) {
matthiasm@1	674 cerr << "--> initialise";
matthiasm@1	675 }
matthiasm@1	676
matthiasm@0	677 if (channels < getMinChannelCount() \|\|
matthiasm@0	678 channels > getMaxChannelCount()) return false;
matthiasm@0	679 m_blockSize = blockSize;
matthiasm@0	680 m_stepSize = stepSize;
matthiasm@0	681 frameCount = 0;
matthiasm@0	682 int tempn = 256 * m_blockSize/2;
matthiasm@4	683 // cerr << "length of tempkernel : " << tempn << endl;
matthiasm@1	684 float *tempkernel;
matthiasm@1	685
matthiasm@1	686 tempkernel = new float[tempn];
matthiasm@1	687
matthiasm@0	688 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
matthiasm@1	689 m_kernelValue.clear();
matthiasm@1	690 m_kernelFftIndex.clear();
matthiasm@1	691 m_kernelNoteIndex.clear();
matthiasm@1	692 int countNonzero = 0;
matthiasm@0	693 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
matthiasm@1	694 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
matthiasm@1	695 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
matthiasm@1	696 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
matthiasm@0	697 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
matthiasm@1	698 countNonzero++;
matthiasm@0	699 }
matthiasm@1	700 m_kernelFftIndex.push_back(iFFT);
matthiasm@1	701 m_kernelNoteIndex.push_back(iNote);
matthiasm@0	702 }
matthiasm@0	703 }
matthiasm@1	704 }
matthiasm@4	705 // cerr << "nonzero count : " << countNonzero << endl;
matthiasm@1	706 delete [] tempkernel;
matthiasm@3	707 ofstream myfile;
matthiasm@3	708 myfile.open ("matrix.txt");
matthiasm@3	709 // myfile << "Writing this to a file.\n";
matthiasm@3	710 for (int i = 0; i < nNote * 84; ++i) {
matthiasm@3	711 myfile << m_dict[i] << endl;
matthiasm@3	712 }
matthiasm@3	713 myfile.close();
matthiasm@0	714 return true;
matthiasm@0	715 }
matthiasm@0	716
matthiasm@0	717 void
matthiasm@0	718 NNLSChroma::reset()
matthiasm@0	719 {
matthiasm@4	720 if (debug_on) cerr << "--> reset";
matthiasm@4	721
matthiasm@0	722 // Clear buffers, reset stored values, etc
matthiasm@4	723 frameCount = 0;
matthiasm@4	724 m_dictID = 0;
matthiasm@4	725 m_fl.clear();
matthiasm@4	726 m_meanTuning0 = 0;
matthiasm@4	727 m_meanTuning1 = 0;
matthiasm@4	728 m_meanTuning2 = 0;
matthiasm@4	729 m_localTuning0 = 0;
matthiasm@4	730 m_localTuning1 = 0;
matthiasm@4	731 m_localTuning2 = 0;
matthiasm@4	732 m_localTuning.clear();
matthiasm@0	733 }
matthiasm@0	734
matthiasm@0	735 NNLSChroma::FeatureSet
matthiasm@0	736 NNLSChroma::process(const float const inputBuffers, Vamp::RealTime timestamp)
matthiasm@0	737 {
matthiasm@4	738 if (debug_on) cerr << "--> process" << endl;
matthiasm@4	739
matthiasm@0	740 frameCount++;
matthiasm@0	741 float *magnitude = new float[m_blockSize/2];
matthiasm@0	742
matthiasm@0	743 Feature f10; // local tuning
matthiasm@3	744 f10.hasTimestamp = true;
matthiasm@4	745 f10.timestamp = timestamp;
matthiasm@0	746 const float *fbuf = inputBuffers[0];
matthiasm@0	747
matthiasm@0	748 // make magnitude
matthiasm@0	749 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
matthiasm@0	750 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
matthiasm@0	751 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
matthiasm@0	752 }
matthiasm@4	753
matthiasm@0	754 // note magnitude mapping using pre-calculated matrix
matthiasm@0	755 float *nm = new float[nNote]; // note magnitude
matthiasm@0	756 for (size_t iNote = 0; iNote < nNote; iNote++) {
matthiasm@0	757 nm[iNote] = 0; // initialise as 0
matthiasm@0	758 }
matthiasm@0	759 int binCount = 0;
matthiasm@0	760 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
matthiasm@0	761 // cerr << ".";
matthiasm@1	762 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
matthiasm@1	763 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
matthiasm@0	764 binCount++;
matthiasm@0	765 }
matthiasm@1	766 // cerr << nm[20];
matthiasm@1	767 // cerr << endl;
matthiasm@0	768
matthiasm@0	769
matthiasm@0	770 float one_over_N = 1.0/frameCount;
matthiasm@0	771 // update means of complex tuning variables
matthiasm@0	772 m_meanTuning0 = float(frameCount-1)one_over_N;
matthiasm@0	773 m_meanTuning1 = float(frameCount-1)one_over_N;
matthiasm@0	774 m_meanTuning2 = float(frameCount-1)one_over_N;
matthiasm@0	775
matthiasm@0	776 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
matthiasm@0	777 m_meanTuning0 += nm[iTone + 0]*one_over_N;
matthiasm@0	778 m_meanTuning1 += nm[iTone + 1]*one_over_N;
matthiasm@0	779 m_meanTuning2 += nm[iTone + 2]*one_over_N;
matthiasm@3	780 float ratioOld = 0.997;
matthiasm@3	781 m_localTuning0 = ratioOld; m_localTuning0 += nm[iTone + 0] (1 - ratioOld);
matthiasm@3	782 m_localTuning1 = ratioOld; m_localTuning1 += nm[iTone + 1] (1 - ratioOld);
matthiasm@3	783 m_localTuning2 = ratioOld; m_localTuning2 += nm[iTone + 2] (1 - ratioOld);
matthiasm@0	784 }
matthiasm@0	785
matthiasm@0	786 // if (m_tuneLocal) {
matthiasm@0	787 // local tuning
matthiasm@0	788 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
matthiasm@0	789 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
matthiasm@0	790 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
matthiasm@0	791 m_localTuning.push_back(normalisedtuning);
matthiasm@0	792 float tuning440 = 440 * pow(2,normalisedtuning/12);
matthiasm@0	793 f10.values.push_back(tuning440);
matthiasm@3	794 // cerr << tuning440 << endl;
matthiasm@0	795 // }
matthiasm@0	796
matthiasm@0	797 Feature f1; // logfreqspec
matthiasm@0	798 f1.hasTimestamp = true;
matthiasm@0	799 f1.timestamp = timestamp;
matthiasm@0	800 for (size_t iNote = 0; iNote < nNote; iNote++) {
matthiasm@0	801 f1.values.push_back(nm[iNote]);
matthiasm@0	802 }
matthiasm@0	803
matthiasm@0	804 FeatureSet fs;
matthiasm@0	805 fs[1].push_back(f1);
matthiasm@3	806 fs[8].push_back(f10);
matthiasm@0	807
matthiasm@0	808 // deletes
matthiasm@0	809 delete[] magnitude;
matthiasm@0	810 delete[] nm;
matthiasm@0	811
matthiasm@0	812 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
matthiasm@0	813 return fs;
matthiasm@0	814 }
matthiasm@0	815
matthiasm@0	816 NNLSChroma::FeatureSet
matthiasm@0	817 NNLSChroma::getRemainingFeatures()
matthiasm@0	818 {
matthiasm@4	819 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
matthiasm@4	820 FeatureSet fsOut;
matthiasm@4	821 if (m_fl.size() == 0) return fsOut;
matthiasm@0	822 //
matthiasm@1	823 /** Calculate Tuning
matthiasm@1	824 calculate tuning from (using the angle of the complex number defined by the
matthiasm@1	825 cumulative mean real and imag values)
matthiasm@1	826 **/
matthiasm@1	827 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
matthiasm@1	828 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
matthiasm@1	829 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
matthiasm@1	830 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
matthiasm@1	831 int intShift = floor(normalisedtuning * 3);
matthiasm@1	832 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1	833
matthiasm@1	834 char buffer0 [50];
matthiasm@1	835
matthiasm@1	836 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1	837
matthiasm@1	838 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1	839
matthiasm@1	840 // push tuning to FeatureSet fsOut
matthiasm@1	841 Feature f0; // tuning
matthiasm@1	842 f0.hasTimestamp = true;
matthiasm@1	843 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
matthiasm@1	844 f0.label = buffer0;
matthiasm@1	845 fsOut[0].push_back(f0);
matthiasm@1	846
matthiasm@1	847 /** Tune Log-Frequency Spectrogram
matthiasm@1	848 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
matthiasm@1	849 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
matthiasm@1	850 **/
matthiasm@1	851
matthiasm@1	852 float tempValue = 0;
matthiasm@1	853 float dbThreshold = 0; // relative to the background spectrum
matthiasm@1	854 float thresh = pow(10,dbThreshold/20);
matthiasm@1	855 // cerr << "tune local ? " << m_tuneLocal << endl;
matthiasm@1	856 int count = 0;
matthiasm@1	857
matthiasm@1	858 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
matthiasm@1	859 Feature f1 = *i;
matthiasm@1	860 Feature f2; // tuned log-frequency spectrum
matthiasm@1	861 f2.hasTimestamp = true;
matthiasm@1	862 f2.timestamp = f1.timestamp;
matthiasm@1	863 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1	864
matthiasm@1	865 if (m_tuneLocal) {
matthiasm@1	866 intShift = floor(m_localTuning[count] * 3);
matthiasm@1	867 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1	868 }
matthiasm@1	869
matthiasm@1	870 // cerr << intShift << " " << intFactor << endl;
matthiasm@1	871
matthiasm@4	872 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
matthiasm@1	873 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
matthiasm@1	874 f2.values.push_back(tempValue);
matthiasm@1	875 }
matthiasm@1	876
matthiasm@1	877 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
matthiasm@1	878 vector<float> runningmean = SpecialConvolution(f2.values,hw);
matthiasm@1	879 vector<float> runningstd;
matthiasm@1	880 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
matthiasm@1	881 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
matthiasm@1	882 }
matthiasm@1	883 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
matthiasm@1	884 for (int i = 0; i < 256; i++) {
matthiasm@1	885 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
matthiasm@1	886 if (runningstd[i] > 0) {
matthiasm@1	887 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
matthiasm@1	888 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
matthiasm@1	889 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
matthiasm@1	890 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
matthiasm@1	891 }
matthiasm@1	892 if (f2.values[i] < 0) {
matthiasm@1	893 cerr << "ERROR: negative value in logfreq spectrum" << endl;
matthiasm@1	894 }
matthiasm@1	895 }
matthiasm@1	896 fsOut[2].push_back(f2);
matthiasm@1	897 count++;
matthiasm@1	898 }
matthiasm@1	899
matthiasm@1	900 /** Semitone spectrum and chromagrams
matthiasm@1	901 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
matthiasm@1	902 is inferred using a non-negative least squares algorithm.
matthiasm@1	903 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
matthiasm@1	904 bass and treble stacked onto each other).
matthiasm@1	905 **/
matthiasm@1	906 // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n);
matthiasm@1	907
matthiasm@1	908 vector<vector<float> > chordogram;
matthiasm@3	909 vector<vector<int> > scoreChordogram;
matthiasm@1	910 vector<float> oldchroma = vector<float>(12,0);
matthiasm@1	911 vector<float> oldbasschroma = vector<float>(12,0);
matthiasm@1	912 count = 0;
matthiasm@1	913
matthiasm@1	914 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
matthiasm@1	915 Feature f2 = *it; // logfreq spectrum
matthiasm@1	916 Feature f3; // semitone spectrum
matthiasm@1	917 Feature f4; // treble chromagram
matthiasm@1	918 Feature f5; // bass chromagram
matthiasm@1	919 Feature f6; // treble and bass chromagram
matthiasm@1	920
matthiasm@1	921 f3.hasTimestamp = true;
matthiasm@1	922 f3.timestamp = f2.timestamp;
matthiasm@1	923
matthiasm@1	924 f4.hasTimestamp = true;
matthiasm@1	925 f4.timestamp = f2.timestamp;
matthiasm@1	926
matthiasm@1	927 f5.hasTimestamp = true;
matthiasm@1	928 f5.timestamp = f2.timestamp;
matthiasm@1	929
matthiasm@1	930 f6.hasTimestamp = true;
matthiasm@1	931 f6.timestamp = f2.timestamp;
matthiasm@1	932
matthiasm@3	933 float b[256];
matthiasm@1	934
matthiasm@1	935 bool some_b_greater_zero = false;
matthiasm@3	936 float sumb = 0;
matthiasm@1	937 for (int i = 0; i < 256; i++) {
matthiasm@3	938 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
matthiasm@3	939 b[i] = f2.values[i];
matthiasm@3	940 sumb += b[i];
matthiasm@1	941 if (b[i] > 0) {
matthiasm@1	942 some_b_greater_zero = true;
matthiasm@1	943 }
matthiasm@1	944 }
matthiasm@1	945
matthiasm@1	946 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1	947
matthiasm@1	948 vector<float> chroma = vector<float>(12, 0);
matthiasm@1	949 vector<float> basschroma = vector<float>(12, 0);
matthiasm@1	950 float currval;
matthiasm@1	951 unsigned iSemitone = 0;
matthiasm@1	952
matthiasm@1	953 if (some_b_greater_zero) {
matthiasm@3	954 if (m_dictID == 1) {
matthiasm@1	955 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
matthiasm@1	956 currval = 0;
matthiasm@3	957 currval += b[iNote + 1 + -1] * 0.5;
matthiasm@3	958 currval += b[iNote + 1 + 0] * 1.0;
matthiasm@3	959 currval += b[iNote + 1 + 1] * 0.5;
matthiasm@1	960 f3.values.push_back(currval);
matthiasm@1	961 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
matthiasm@1	962 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
matthiasm@1	963 iSemitone++;
matthiasm@1	964 }
matthiasm@1	965
matthiasm@1	966 } else {
matthiasm@3	967 float x[84+1000];
matthiasm@3	968 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
matthiasm@3	969 // for (int i = 0; i < 84; ++i) {
matthiasm@3	970 // x[i] = b[3*i+3];
matthiasm@3	971 // }
matthiasm@3	972 float rnorm;
matthiasm@3	973 float w[84+1000];
matthiasm@3	974 float zz[84+1000];
matthiasm@3	975 int indx[84+1000];
matthiasm@1	976 int mode;
matthiasm@3	977 float curr_dict[256*84];
matthiasm@3	978 for (unsigned i = 0; i < 256 * 84; ++i) {
matthiasm@3	979 curr_dict[i] = 1.0 * m_dict[i];
matthiasm@3	980 }
matthiasm@3	981 nnls(curr_dict, nNote, nNote, 84, b, x, &rnorm, w, zz, indx, &mode);
matthiasm@3	982 for (unsigned iNote = 0; iNote < 84; ++iNote) {
matthiasm@3	983 // for (unsigned kNote = 0; kNote < 256; ++kNote) {
matthiasm@3	984 // x[iNote] += m_dict[kNote + nNote * iNote] * b[kNote];
matthiasm@3	985 // }
matthiasm@3	986 f3.values.push_back(x[iNote]);
matthiasm@3	987 // cerr << mode << endl;
matthiasm@3	988 chroma[iNote % 12] += x[iNote] * treblewindow[iNote];
matthiasm@3	989 basschroma[iNote % 12] += x[iNote] * basswindow[iNote];
matthiasm@3	990 // iSemitone++;
matthiasm@3	991 }
matthiasm@1	992 }
matthiasm@1	993 }
matthiasm@1	994
matthiasm@1	995 f4.values = chroma;
matthiasm@1	996 f5.values = basschroma;
matthiasm@1	997 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@1	998 f6.values = chroma;
matthiasm@1	999
matthiasm@1	1000 // local chord estimation
matthiasm@1	1001 vector<float> currentChordSalience;
matthiasm@1	1002 float tempchordvalue = 0;
matthiasm@1	1003 float sumchordvalue = 0;
matthiasm@1	1004 int nChord = nChorddict / 24;
matthiasm@1	1005 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@1	1006 tempchordvalue = 0;
matthiasm@1	1007 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@1	1008 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
matthiasm@1	1009 }
matthiasm@1	1010 for (int iBin = 12; iBin < 24; iBin++) {
matthiasm@1	1011 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
matthiasm@1	1012 }
matthiasm@1	1013 sumchordvalue+=tempchordvalue;
matthiasm@1	1014 currentChordSalience.push_back(tempchordvalue);
matthiasm@1	1015 }
matthiasm@1	1016 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@1	1017 currentChordSalience[iChord] /= sumchordvalue;
matthiasm@1	1018 }
matthiasm@1	1019 chordogram.push_back(currentChordSalience);
matthiasm@1	1020
matthiasm@1	1021 fsOut[3].push_back(f3);
matthiasm@1	1022 fsOut[4].push_back(f4);
matthiasm@1	1023 fsOut[5].push_back(f5);
matthiasm@1	1024 fsOut[6].push_back(f6);
matthiasm@1	1025 count++;
matthiasm@1	1026 }
matthiasm@0	1027 // int musicitykernelwidth = (50 * 2048) / m_stepSize;
matthiasm@0	1028 //
matthiasm@3	1029 /* Simple chord estimation
matthiasm@3	1030 I just take the local chord estimates ("currentChordSalience") and average them over time, then
matthiasm@3	1031 take the maximum. Very simple, don't do this at home...
matthiasm@3	1032 */
matthiasm@3	1033 count = 0;
matthiasm@3	1034 int halfwindowlength = m_inputSampleRate / m_stepSize;
matthiasm@3	1035 int nChord = nChorddict / 24;
matthiasm@3	1036 vector<int> chordSequence;
matthiasm@3	1037 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
matthiasm@3	1038 vector<int> temp = vector<int>(nChord,0);
matthiasm@3	1039 scoreChordogram.push_back(temp);
matthiasm@3	1040 }
matthiasm@4	1041 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
matthiasm@3	1042 int startIndex = count + 1;
matthiasm@3	1043 int endIndex = count + 2 * halfwindowlength;
matthiasm@3	1044 vector<float> temp = vector<float>(nChord,0);
matthiasm@3	1045 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@4	1046 float maxindex = 0; //... and the index thereof
matthiasm@3	1047 unsigned bestchordL = 0; // index of the best "left" chord
matthiasm@3	1048 unsigned bestchordR = 0; // index of the best "right" chord
matthiasm@4	1049 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
matthiasm@3	1050 // now find the max values on both sides of iWF
matthiasm@3	1051 // left side:
matthiasm@3	1052 float maxL = 0;
matthiasm@3	1053 unsigned maxindL = nChord-1;
matthiasm@3	1054 for (unsigned iChord = 0; iChord < nChord; iChord++) {
matthiasm@3	1055 float currsum = 0;
matthiasm@3	1056 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
matthiasm@3	1057 currsum += chordogram[count+iFrame][iChord];
matthiasm@3	1058 }
matthiasm@3	1059 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@3	1060 if (currsum > maxL) {
matthiasm@3	1061 maxL = currsum;
matthiasm@3	1062 maxindL = iChord;
matthiasm@3	1063 }
matthiasm@3	1064 }
matthiasm@3	1065 // right side:
matthiasm@3	1066 float maxR = 0;
matthiasm@3	1067 unsigned maxindR = nChord-1;
matthiasm@3	1068 for (unsigned iChord = 0; iChord < nChord; iChord++) {
matthiasm@3	1069 float currsum = 0;
matthiasm@3	1070 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@3	1071 currsum += chordogram[count+iFrame][iChord];
matthiasm@3	1072 }
matthiasm@3	1073 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@3	1074 if (currsum > maxR) {
matthiasm@3	1075 maxR = currsum;
matthiasm@3	1076 maxindR = iChord;
matthiasm@3	1077 }
matthiasm@3	1078 }
matthiasm@3	1079 if (maxL+maxR > maxval) {
matthiasm@3	1080 maxval = maxL+maxR;
matthiasm@3	1081 maxindex = iWF;
matthiasm@3	1082 bestchordL = maxindL;
matthiasm@3	1083 bestchordR = maxindR;
matthiasm@3	1084 }
matthiasm@3	1085
matthiasm@3	1086 }
matthiasm@3	1087 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
matthiasm@3	1088 // add a score to every chord-frame-point that was part of a maximum
matthiasm@3	1089 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
matthiasm@3	1090 scoreChordogram[iFrame+count][bestchordL]++;
matthiasm@3	1091 }
matthiasm@3	1092 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@3	1093 scoreChordogram[iFrame+count][bestchordR]++;
matthiasm@3	1094 }
matthiasm@3	1095 count++;
matthiasm@3	1096 }
matthiasm@3	1097
matthiasm@3	1098 count = 0;
matthiasm@3	1099 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
matthiasm@3	1100 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@3	1101 float maxindex = 0; //... and the index thereof
matthiasm@3	1102 for (unsigned iChord = 0; iChord < nChord; iChord++) {
matthiasm@3	1103 if (scoreChordogram[count][iChord] > maxval) {
matthiasm@3	1104 maxval = scoreChordogram[count][iChord];
matthiasm@3	1105 maxindex = iChord;
matthiasm@4	1106 // cerr << iChord << endl;
matthiasm@3	1107 }
matthiasm@3	1108 }
matthiasm@3	1109 chordSequence.push_back(maxindex);
matthiasm@4	1110 // cerr << "before modefilter, maxindex: " << maxindex << endl;
matthiasm@3	1111 count++;
matthiasm@3	1112 }
matthiasm@3	1113
matthiasm@3	1114
matthiasm@3	1115 // mode filter on chordSequence
matthiasm@3	1116 count = 0;
matthiasm@3	1117 int oldChordIndex = -1;
matthiasm@3	1118 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
matthiasm@3	1119 Feature f6 = *it;
matthiasm@3	1120 Feature f7; // chord estimate
matthiasm@3	1121 f7.hasTimestamp = true;
matthiasm@3	1122 f7.timestamp = f6.timestamp;
matthiasm@3	1123 vector<int> chordCount = vector<int>(nChord,0);
matthiasm@3	1124 int maxChordCount = 0;
matthiasm@3	1125 int maxChordIndex = nChord-1;
matthiasm@4	1126 int startIndex = max(count - halfwindowlength/2,0);
matthiasm@4	1127 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
matthiasm@4	1128 for (int i = startIndex; i < endIndex; i++) {
matthiasm@4	1129 chordCount[chordSequence[i]]++;
matthiasm@4	1130 if (chordCount[chordSequence[i]] > maxChordCount) {
matthiasm@4	1131 cerr << "start index " << startIndex << endl;
matthiasm@4	1132 maxChordCount++;
matthiasm@4	1133 maxChordIndex = chordSequence[i];
matthiasm@4	1134 }
matthiasm@4	1135 }
matthiasm@4	1136 // chordSequence[count] = maxChordIndex;
matthiasm@4	1137 cerr << maxChordIndex << endl;
matthiasm@3	1138 if (oldChordIndex != maxChordIndex) {
matthiasm@3	1139 oldChordIndex = maxChordIndex;
matthiasm@3	1140
matthiasm@3	1141 char buffer1 [50];
matthiasm@3	1142 if (maxChordIndex < nChord - 1) {
matthiasm@3	1143 sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
matthiasm@3	1144 } else {
matthiasm@3	1145 sprintf(buffer1, "N");
matthiasm@3	1146 }
matthiasm@3	1147 f7.label = buffer1;
matthiasm@3	1148 fsOut[7].push_back(f7);
matthiasm@3	1149 }
matthiasm@3	1150 count++;
matthiasm@3	1151 }
matthiasm@0	1152 // // musicity
matthiasm@0	1153 // count = 0;
matthiasm@0	1154 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
matthiasm@0	1155 // vector<float> musicityValue;
matthiasm@0	1156 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
matthiasm@0	1157 // Feature f4 = *it;
matthiasm@0	1158 //
matthiasm@0	1159 // int startIndex = max(count - musicitykernelwidth/2,0);
matthiasm@0	1160 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
matthiasm@0	1161 // float chromasum = 0;
matthiasm@0	1162 // float diffsum = 0;
matthiasm@0	1163 // for (int k = 0; k < 12; k++) {
matthiasm@0	1164 // for (int i = startIndex + 1; i < endIndex; i++) {
matthiasm@0	1165 // chromasum += pow(fsOut[4][i].values[k],2);
matthiasm@0	1166 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
matthiasm@0	1167 // }
matthiasm@0	1168 // }
matthiasm@0	1169 // diffsum /= chromasum;
matthiasm@0	1170 // musicityValue.push_back(diffsum);
matthiasm@0	1171 // count++;
matthiasm@0	1172 // }
matthiasm@0	1173 //
matthiasm@0	1174 // float musicityThreshold = 0.44;
matthiasm@0	1175 // if (m_stepSize == 4096) {
matthiasm@0	1176 // musicityThreshold = 0.74;
matthiasm@0	1177 // }
matthiasm@0	1178 // if (m_stepSize == 4410) {
matthiasm@0	1179 // musicityThreshold = 0.77;
matthiasm@0	1180 // }
matthiasm@0	1181 //
matthiasm@0	1182 // count = 0;
matthiasm@0	1183 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
matthiasm@0	1184 // Feature f4 = *it;
matthiasm@0	1185 // Feature f8; // musicity
matthiasm@0	1186 // Feature f9; // musicity segmenter
matthiasm@0	1187 //
matthiasm@0	1188 // f8.hasTimestamp = true;
matthiasm@0	1189 // f8.timestamp = f4.timestamp;
matthiasm@0	1190 // f9.hasTimestamp = true;
matthiasm@0	1191 // f9.timestamp = f4.timestamp;
matthiasm@0	1192 //
matthiasm@0	1193 // int startIndex = max(count - musicitykernelwidth/2,0);
matthiasm@0	1194 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
matthiasm@0	1195 // int musicityCount = 0;
matthiasm@0	1196 // for (int i = startIndex; i <= endIndex; i++) {
matthiasm@0	1197 // if (musicityValue[i] > musicityThreshold) musicityCount++;
matthiasm@0	1198 // }
matthiasm@0	1199 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
matthiasm@0	1200 //
matthiasm@0	1201 // if (isSpeech) {
matthiasm@0	1202 // if (oldlabeltype != 2) {
matthiasm@0	1203 // f9.label = "Speech";
matthiasm@0	1204 // fsOut[9].push_back(f9);
matthiasm@0	1205 // oldlabeltype = 2;
matthiasm@0	1206 // }
matthiasm@0	1207 // } else {
matthiasm@0	1208 // if (oldlabeltype != 1) {
matthiasm@0	1209 // f9.label = "Music";
matthiasm@0	1210 // fsOut[9].push_back(f9);
matthiasm@0	1211 // oldlabeltype = 1;
matthiasm@0	1212 // }
matthiasm@0	1213 // }
matthiasm@0	1214 // f8.values.push_back(musicityValue[count]);
matthiasm@0	1215 // fsOut[8].push_back(f8);
matthiasm@0	1216 // count++;
matthiasm@0	1217 // }
matthiasm@0	1218 return fsOut;
matthiasm@0	1219
matthiasm@0	1220 }
matthiasm@0	1221

Mercurial > hg > nnls-chroma

annotate NNLSChroma.cpp @ 4:266d23a41cdc matthiasm-plugin