nnls-chroma: NNLSChroma.cpp annotate

annotate NNLSChroma.cpp @ 3:8360483a026e matthiasm-plugin

new simple chord estimation

author	matthiasm
date	Mon, 31 May 2010 14:12:37 +0000
parents	2a491d71057d
children	266d23a41cdc

rev	line source
matthiasm@0	1
matthiasm@0	2 #include "NNLSChroma.h"
matthiasm@0	3 #include <cmath>
matthiasm@0	4 #include <list>
matthiasm@0	5 #include <iostream>
matthiasm@3	6 #include <fstream>
matthiasm@0	7 #include <sstream>
matthiasm@0	8 #include <cassert>
matthiasm@0	9 #include <cstdio>
matthiasm@1	10 #include "nnls.h"
matthiasm@0	11 // #include "cblas.h"
matthiasm@0	12 #include "chorddict.cpp"
matthiasm@0	13 using namespace std;
matthiasm@0	14
matthiasm@0	15 const float sinvalue = 0.866025404;
matthiasm@0	16 const float cosvalue = -0.5;
matthiasm@0	17 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
matthiasm@0	18 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
matthiasm@0	19 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
matthiasm@0	20 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
matthiasm@0	21 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
matthiasm@0	22 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0	23 const int nNote = 256;
matthiasm@0	24
matthiasm@0	25 /** Special Convolution
matthiasm@0	26 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
matthiasm@0	27 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
matthiasm@0	28 as the first (last) valid convolution bin.
matthiasm@0	29 **/
matthiasm@0	30
matthiasm@0	31 const bool debug_on = false;
matthiasm@0	32
matthiasm@0	33 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
matthiasm@0	34 {
matthiasm@0	35 float s;
matthiasm@0	36 int m, n;
matthiasm@0	37 int lenConvolvee = convolvee.size();
matthiasm@0	38 int lenKernel = kernel.size();
matthiasm@0	39
matthiasm@0	40 vector<float> Z(256,0);
matthiasm@0	41 assert(lenKernel % 2 != 0); // no exception handling !!!
matthiasm@0	42
matthiasm@0	43 for (n = lenKernel - 1; n < lenConvolvee; n++) {
matthiasm@0	44 s=0.0;
matthiasm@0	45 for (m = 0; m < lenKernel; m++) {
matthiasm@0	46 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
matthiasm@0	47 s += convolvee[n-m] * kernel[m];
matthiasm@0	48 // if (debug_on) cerr << "--> s = " << s << '\n';
matthiasm@0	49 }
matthiasm@0	50 // cerr << n - lenKernel/2 << endl;
matthiasm@0	51 Z[n -lenKernel/2] = s;
matthiasm@0	52 }
matthiasm@0	53
matthiasm@0	54 // fill upper and lower pads
matthiasm@0	55 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
matthiasm@0	56 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
matthiasm@0	57 Z[lenConvolvee - lenKernel/2 - 1];
matthiasm@0	58 return Z;
matthiasm@0	59 }
matthiasm@0	60
matthiasm@0	61 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
matthiasm@0	62 // {
matthiasm@0	63 // vector<float> freq(binnumbers.size, 0.0);
matthiasm@0	64 // for (unsigned i = 0; i < binnumbers.size; ++i) {
matthiasm@0	65 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
matthiasm@0	66 // }
matthiasm@0	67 // return freq;
matthiasm@0	68 // }
matthiasm@0	69
matthiasm@0	70 float cospuls(float x, float centre, float width)
matthiasm@0	71 {
matthiasm@0	72 float recipwidth = 1.0/width;
matthiasm@0	73 if (abs(x - centre) <= 0.5 * width) {
matthiasm@0	74 return cos((x-centre)2M_PIrecipwidth).5+.5;
matthiasm@0	75 }
matthiasm@0	76 return 0.0;
matthiasm@0	77 }
matthiasm@0	78
matthiasm@0	79 float pitchCospuls(float x, float centre, int binsperoctave)
matthiasm@0	80 {
matthiasm@0	81 float warpedf = -binsperoctave * (log2(centre) - log2(x));
matthiasm@0	82 float out = cospuls(warpedf, 0.0, 2.0);
matthiasm@0	83 // now scale to correct for note density
matthiasm@0	84 float c = log(2.0)/binsperoctave;
matthiasm@0	85 if (x > 0) {
matthiasm@0	86 out = out / (c * x);
matthiasm@0	87 } else {
matthiasm@0	88 out = 0;
matthiasm@0	89 }
matthiasm@0	90 return out;
matthiasm@0	91 }
matthiasm@0	92
matthiasm@0	93 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
matthiasm@0	94
matthiasm@0	95 int binspersemitone = 3; // this must be 3
matthiasm@0	96 int minoctave = 0; // this must be 0
matthiasm@0	97 int maxoctave = 7; // this must be 7
matthiasm@1	98 int oversampling = 80;
matthiasm@0	99
matthiasm@0	100 // linear frequency vector
matthiasm@0	101 vector<float> fft_f;
matthiasm@0	102 for (int i = 0; i < blocksize/2; ++i) {
matthiasm@0	103 fft_f.push_back(i * (fs * 1.0 / blocksize));
matthiasm@0	104 }
matthiasm@0	105 float fft_width = fs * 2.0 / blocksize;
matthiasm@0	106
matthiasm@0	107 // linear oversampled frequency vector
matthiasm@0	108 vector<float> oversampled_f;
matthiasm@0	109 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
matthiasm@0	110 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
matthiasm@0	111 }
matthiasm@0	112
matthiasm@0	113 // pitch-spaced frequency vector
matthiasm@0	114 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
matthiasm@0	115 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
matthiasm@0	116 vector<float> cq_f;
matthiasm@0	117 float oob = 1.0/binspersemitone; // one over binspersemitone
matthiasm@0	118 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
matthiasm@0	119 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
matthiasm@0	120 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
matthiasm@0	121 for (int k = -1; k < 2; ++k) {
matthiasm@0	122 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
matthiasm@0	123 }
matthiasm@0	124 }
matthiasm@0	125 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
matthiasm@0	126 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
matthiasm@0	127
matthiasm@0	128 int nFFT = fft_f.size();
matthiasm@0	129
matthiasm@0	130 vector<float> fft_activation;
matthiasm@0	131 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
matthiasm@0	132 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
matthiasm@0	133 fft_activation.push_back(cosp);
matthiasm@0	134 // cerr << cosp << endl;
matthiasm@0	135 }
matthiasm@0	136
matthiasm@0	137 float cq_activation;
matthiasm@0	138 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
matthiasm@0	139 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
matthiasm@0	140 int curr_start = oversampling * iFFT - oversampling;
matthiasm@0	141 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
matthiasm@0	142 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
matthiasm@0	143 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
matthiasm@0	144 outmatrix[iFFT + nFFT * iCQ] = 0;
matthiasm@1	145 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
matthiasm@0	146 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
matthiasm@0	147 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
matthiasm@0	148 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
matthiasm@0	149 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
matthiasm@0	150 }
matthiasm@0	151 // if (iCQ == 1 \|\| iCQ == 2) {
matthiasm@0	152 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
matthiasm@0	153 // }
matthiasm@0	154 }
matthiasm@0	155 }
matthiasm@0	156 }
matthiasm@0	157 return true;
matthiasm@0	158 }
matthiasm@0	159
matthiasm@3	160 bool dictionaryMatrix(float* dm) {
matthiasm@1	161 int binspersemitone = 3; // this must be 3
matthiasm@1	162 int minoctave = 0; // this must be 0
matthiasm@1	163 int maxoctave = 7; // this must be 7
matthiasm@1	164 float s_param = 0.6;
matthiasm@1	165
matthiasm@1	166 // pitch-spaced frequency vector
matthiasm@1	167 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
matthiasm@1	168 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
matthiasm@1	169 vector<float> cq_f;
matthiasm@1	170 float oob = 1.0/binspersemitone; // one over binspersemitone
matthiasm@1	171 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
matthiasm@1	172 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
matthiasm@1	173 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
matthiasm@1	174 for (int k = -1; k < 2; ++k) {
matthiasm@1	175 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
matthiasm@1	176 }
matthiasm@1	177 }
matthiasm@1	178 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
matthiasm@1	179 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
matthiasm@1	180
matthiasm@1	181 float curr_f;
matthiasm@1	182 float floatbin;
matthiasm@1	183 float curr_amp;
matthiasm@1	184 // now for every combination calculate the matrix element
matthiasm@1	185 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
matthiasm@3	186 // cerr << iOut << endl;
matthiasm@1	187 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
matthiasm@1	188 curr_f = 440 * pow(2,(minMIDI-69+iOut)1.0/12) iHarm;
matthiasm@3	189 // if (curr_f > cq_f[nNote-1]) break;
matthiasm@3	190 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
matthiasm@3	191 // cerr << floatbin << endl;
matthiasm@1	192 curr_amp = pow(s_param,float(iHarm-1));
matthiasm@3	193 // cerr << "curramp" << curr_amp << endl;
matthiasm@1	194 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
matthiasm@3	195 if (abs(iNote+1.0-floatbin)<2) {
matthiasm@3	196 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
matthiasm@3	197 // dm[iNote + nNote * iOut] += 1 * curr_amp;
matthiasm@3	198 }
matthiasm@1	199 }
matthiasm@3	200 }
matthiasm@1	201 }
matthiasm@3	202
matthiasm@3	203
matthiasm@1	204 }
matthiasm@1	205
matthiasm@0	206
matthiasm@0	207 NNLSChroma::NNLSChroma(float inputSampleRate) :
matthiasm@0	208 Plugin(inputSampleRate),
matthiasm@0	209 m_fl(0),
matthiasm@0	210 m_blockSize(0),
matthiasm@0	211 m_stepSize(0),
matthiasm@0	212 m_lengthOfNoteIndex(0),
matthiasm@0	213 m_meanTuning0(0),
matthiasm@0	214 m_meanTuning1(0),
matthiasm@0	215 m_meanTuning2(0),
matthiasm@0	216 m_localTuning0(0),
matthiasm@0	217 m_localTuning1(0),
matthiasm@0	218 m_localTuning2(0),
matthiasm@3	219 m_paling(0.8),
matthiasm@3	220 m_preset(0.0),
matthiasm@0	221 m_localTuning(0),
matthiasm@0	222 m_kernelValue(0),
matthiasm@0	223 m_kernelFftIndex(0),
matthiasm@0	224 m_kernelNoteIndex(0),
matthiasm@1	225 m_dict(0),
matthiasm@0	226 m_tuneLocal(false),
matthiasm@0	227 m_dictID(0)
matthiasm@0	228 {
matthiasm@0	229 if (debug_on) cerr << "--> NNLSChroma" << endl;
matthiasm@3	230 m_dict = new float[nNote * 84];
matthiasm@3	231 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
matthiasm@1	232 dictionaryMatrix(m_dict);
matthiasm@0	233 }
matthiasm@0	234
matthiasm@0	235
matthiasm@0	236 NNLSChroma::~NNLSChroma()
matthiasm@0	237 {
matthiasm@0	238 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
matthiasm@1	239 delete [] m_dict;
matthiasm@0	240 }
matthiasm@0	241
matthiasm@0	242 string
matthiasm@0	243 NNLSChroma::getIdentifier() const
matthiasm@0	244 {
matthiasm@0	245 if (debug_on) cerr << "--> getIdentifier" << endl;
matthiasm@0	246 return "nnls_chroma";
matthiasm@0	247 }
matthiasm@0	248
matthiasm@0	249 string
matthiasm@0	250 NNLSChroma::getName() const
matthiasm@0	251 {
matthiasm@0	252 if (debug_on) cerr << "--> getName" << endl;
matthiasm@0	253 return "NNLS Chroma";
matthiasm@0	254 }
matthiasm@0	255
matthiasm@0	256 string
matthiasm@0	257 NNLSChroma::getDescription() const
matthiasm@0	258 {
matthiasm@0	259 // Return something helpful here!
matthiasm@0	260 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@0	261 return "";
matthiasm@0	262 }
matthiasm@0	263
matthiasm@0	264 string
matthiasm@0	265 NNLSChroma::getMaker() const
matthiasm@0	266 {
matthiasm@0	267 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0	268 // Your name here
matthiasm@0	269 return "Matthias Mauch";
matthiasm@0	270 }
matthiasm@0	271
matthiasm@0	272 int
matthiasm@0	273 NNLSChroma::getPluginVersion() const
matthiasm@0	274 {
matthiasm@0	275 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0	276 // Increment this each time you release a version that behaves
matthiasm@0	277 // differently from the previous one
matthiasm@0	278 return 1;
matthiasm@0	279 }
matthiasm@0	280
matthiasm@0	281 string
matthiasm@0	282 NNLSChroma::getCopyright() const
matthiasm@0	283 {
matthiasm@0	284 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0	285 // This function is not ideally named. It does not necessarily
matthiasm@0	286 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0	287 // should indicate the terms under which it is distributed. For
matthiasm@0	288 // example, "Copyright (year). All Rights Reserved", or "GPL"
matthiasm@0	289 return "Copyright (2010). All rights reserved.";
matthiasm@0	290 }
matthiasm@0	291
matthiasm@0	292 NNLSChroma::InputDomain
matthiasm@0	293 NNLSChroma::getInputDomain() const
matthiasm@0	294 {
matthiasm@0	295 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0	296 return FrequencyDomain;
matthiasm@0	297 }
matthiasm@0	298
matthiasm@0	299 size_t
matthiasm@0	300 NNLSChroma::getPreferredBlockSize() const
matthiasm@0	301 {
matthiasm@0	302 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0	303 return 16384; // 0 means "I can handle any block size"
matthiasm@0	304 }
matthiasm@0	305
matthiasm@0	306 size_t
matthiasm@0	307 NNLSChroma::getPreferredStepSize() const
matthiasm@0	308 {
matthiasm@0	309 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0	310 return 2048; // 0 means "anything sensible"; in practice this
matthiasm@0	311 // means the same as the block size for TimeDomain
matthiasm@0	312 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0	313 }
matthiasm@0	314
matthiasm@0	315 size_t
matthiasm@0	316 NNLSChroma::getMinChannelCount() const
matthiasm@0	317 {
matthiasm@0	318 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0	319 return 1;
matthiasm@0	320 }
matthiasm@0	321
matthiasm@0	322 size_t
matthiasm@0	323 NNLSChroma::getMaxChannelCount() const
matthiasm@0	324 {
matthiasm@0	325 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0	326 return 1;
matthiasm@0	327 }
matthiasm@0	328
matthiasm@0	329 NNLSChroma::ParameterList
matthiasm@0	330 NNLSChroma::getParameterDescriptors() const
matthiasm@0	331 {
matthiasm@0	332 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0	333 ParameterList list;
matthiasm@0	334
matthiasm@0	335 ParameterDescriptor d0;
matthiasm@0	336 d0.identifier = "notedict";
matthiasm@0	337 d0.name = "note dictionary";
matthiasm@0	338 d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
matthiasm@0	339 d0.unit = "";
matthiasm@0	340 d0.minValue = 0;
matthiasm@1	341 d0.maxValue = 1;
matthiasm@0	342 d0.defaultValue = 0;
matthiasm@0	343 d0.isQuantized = true;
matthiasm@0	344 d0.valueNames.push_back("s = 0.6");
matthiasm@1	345 // d0.valueNames.push_back("s = 0.9");
matthiasm@1	346 // d0.valueNames.push_back("s linearly spaced");
matthiasm@0	347 d0.valueNames.push_back("no NNLS");
matthiasm@0	348 d0.quantizeStep = 1.0;
matthiasm@0	349 list.push_back(d0);
matthiasm@0	350
matthiasm@0	351 ParameterDescriptor d1;
matthiasm@0	352 d1.identifier = "tuningmode";
matthiasm@0	353 d1.name = "tuning mode";
matthiasm@3	354 d1.description = "Tuning can be performed locally or on the whole extraction segment.";
matthiasm@0	355 d1.unit = "";
matthiasm@0	356 d1.minValue = 0;
matthiasm@0	357 d1.maxValue = 1;
matthiasm@0	358 d1.defaultValue = 1;
matthiasm@0	359 d1.isQuantized = true;
matthiasm@0	360 d1.valueNames.push_back("global tuning");
matthiasm@0	361 d1.valueNames.push_back("local tuning");
matthiasm@0	362 d1.quantizeStep = 1.0;
matthiasm@0	363 list.push_back(d1);
matthiasm@0	364
matthiasm@0	365 ParameterDescriptor d2;
matthiasm@0	366 d2.identifier = "paling";
matthiasm@0	367 d2.name = "spectral paling";
matthiasm@0	368 d2.description = "Spectral paling: no paling - 0; whitening - 1.";
matthiasm@0	369 d2.unit = "";
matthiasm@3	370 d2.isQuantized = true;
matthiasm@3	371 d2.quantizeStep = 0.1;
matthiasm@3	372 d2.minValue = 0.0;
matthiasm@3	373 d2.maxValue = 1.0;
matthiasm@0	374 d2.defaultValue = 0.5;
matthiasm@3	375 // d2.isQuantized = false;
matthiasm@0	376 list.push_back(d2);
matthiasm@0	377
matthiasm@3	378 ParameterDescriptor d3;
matthiasm@3	379 d3.identifier = "preset";
matthiasm@3	380 d3.name = "preset";
matthiasm@3	381 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
matthiasm@3	382 d3.unit = "";
matthiasm@3	383 d3.isQuantized = true;
matthiasm@3	384 d3.quantizeStep = 1;
matthiasm@3	385 d3.minValue = 0.0;
matthiasm@3	386 d3.maxValue = 2.0;
matthiasm@3	387 d3.defaultValue = 0.0;
matthiasm@3	388 d3.valueNames.push_back("polyphonic pop");
matthiasm@3	389 d3.valueNames.push_back("polyphonic pop (fast)");
matthiasm@3	390 d3.valueNames.push_back("solo keyboard");
matthiasm@3	391 d3.valueNames.push_back("manual");
matthiasm@3	392 list.push_back(d3);
matthiasm@0	393 return list;
matthiasm@0	394 }
matthiasm@0	395
matthiasm@0	396 float
matthiasm@0	397 NNLSChroma::getParameter(string identifier) const
matthiasm@0	398 {
matthiasm@3	399 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@0	400 if (identifier == "notedict") {
matthiasm@0	401 return m_dictID;
matthiasm@0	402 }
matthiasm@0	403
matthiasm@0	404 if (identifier == "paling") {
matthiasm@0	405 return m_paling;
matthiasm@0	406 }
matthiasm@0	407
matthiasm@0	408 if (identifier == "tuningmode") {
matthiasm@0	409 if (m_tuneLocal) {
matthiasm@0	410 return 1.0;
matthiasm@0	411 } else {
matthiasm@0	412 return 0.0;
matthiasm@0	413 }
matthiasm@0	414 }
matthiasm@3	415 if (identifier == "preset") {
matthiasm@3	416 return m_preset;
matthiasm@3	417 }
matthiasm@0	418 return 0;
matthiasm@0	419
matthiasm@0	420 }
matthiasm@0	421
matthiasm@0	422 void
matthiasm@0	423 NNLSChroma::setParameter(string identifier, float value)
matthiasm@0	424 {
matthiasm@3	425 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@0	426 if (identifier == "notedict") {
matthiasm@0	427 m_dictID = (int) value;
matthiasm@0	428 }
matthiasm@0	429
matthiasm@0	430 if (identifier == "paling") {
matthiasm@0	431 m_paling = value;
matthiasm@0	432 }
matthiasm@0	433
matthiasm@0	434 if (identifier == "tuningmode") {
matthiasm@0	435 m_tuneLocal = (value > 0) ? true : false;
matthiasm@0	436 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0	437 }
matthiasm@3	438 if (identifier == "preset") {
matthiasm@3	439 m_preset = value;
matthiasm@3	440 if (m_preset == 0.0) {
matthiasm@3	441 m_tuneLocal = false;
matthiasm@3	442 m_paling = 1.0;
matthiasm@3	443 m_dictID = 0.0;
matthiasm@3	444 }
matthiasm@3	445 if (m_preset == 1.0) {
matthiasm@3	446 m_tuneLocal = false;
matthiasm@3	447 m_paling = 1.0;
matthiasm@3	448 m_dictID = 1.0;
matthiasm@3	449 }
matthiasm@3	450 if (m_preset == 2.0) {
matthiasm@3	451 m_tuneLocal = false;
matthiasm@3	452 m_paling = 0.7;
matthiasm@3	453 m_dictID = 0.0;
matthiasm@3	454 }
matthiasm@3	455 }
matthiasm@0	456 }
matthiasm@0	457
matthiasm@0	458 NNLSChroma::ProgramList
matthiasm@0	459 NNLSChroma::getPrograms() const
matthiasm@0	460 {
matthiasm@0	461 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0	462 ProgramList list;
matthiasm@0	463
matthiasm@0	464 // If you have no programs, return an empty list (or simply don't
matthiasm@0	465 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0	466
matthiasm@0	467 return list;
matthiasm@0	468 }
matthiasm@0	469
matthiasm@0	470 string
matthiasm@0	471 NNLSChroma::getCurrentProgram() const
matthiasm@0	472 {
matthiasm@0	473 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0	474 return ""; // no programs
matthiasm@0	475 }
matthiasm@0	476
matthiasm@0	477 void
matthiasm@0	478 NNLSChroma::selectProgram(string name)
matthiasm@0	479 {
matthiasm@0	480 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0	481 }
matthiasm@0	482
matthiasm@0	483
matthiasm@0	484 NNLSChroma::OutputList
matthiasm@0	485 NNLSChroma::getOutputDescriptors() const
matthiasm@0	486 {
matthiasm@0	487 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0	488 OutputList list;
matthiasm@0	489
matthiasm@0	490 // Make chroma names for the binNames property
matthiasm@0	491 vector<string> chromanames;
matthiasm@0	492 vector<string> bothchromanames;
matthiasm@0	493 for (int iNote = 0; iNote < 24; iNote++) {
matthiasm@0	494 bothchromanames.push_back(notenames[iNote]);
matthiasm@0	495 if (iNote < 12) {
matthiasm@0	496 chromanames.push_back(notenames[iNote]);
matthiasm@0	497 }
matthiasm@0	498 }
matthiasm@0	499
matthiasm@1	500 // int nNote = 84;
matthiasm@0	501
matthiasm@0	502 // See OutputDescriptor documentation for the possibilities here.
matthiasm@0	503 // Every plugin must have at least one output.
matthiasm@0	504
matthiasm@0	505 OutputDescriptor d0;
matthiasm@0	506 d0.identifier = "tuning";
matthiasm@0	507 d0.name = "Tuning";
matthiasm@0	508 d0.description = "The concert pitch.";
matthiasm@0	509 d0.unit = "Hz";
matthiasm@0	510 d0.hasFixedBinCount = true;
matthiasm@0	511 d0.binCount = 0;
matthiasm@0	512 d0.hasKnownExtents = true;
matthiasm@0	513 d0.minValue = 427.47;
matthiasm@0	514 d0.maxValue = 452.89;
matthiasm@0	515 d0.isQuantized = false;
matthiasm@0	516 d0.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0	517 d0.hasDuration = false;
matthiasm@0	518 list.push_back(d0);
matthiasm@0	519
matthiasm@0	520 OutputDescriptor d1;
matthiasm@0	521 d1.identifier = "logfreqspec";
matthiasm@0	522 d1.name = "Log-Frequency Spectrum";
matthiasm@0	523 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
matthiasm@0	524 d1.unit = "";
matthiasm@0	525 d1.hasFixedBinCount = true;
matthiasm@0	526 d1.binCount = nNote;
matthiasm@0	527 d1.hasKnownExtents = false;
matthiasm@0	528 d1.isQuantized = false;
matthiasm@0	529 d1.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0	530 d1.hasDuration = false;
matthiasm@0	531 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	532 list.push_back(d1);
matthiasm@0	533
matthiasm@0	534 OutputDescriptor d2;
matthiasm@0	535 d2.identifier = "tunedlogfreqspec";
matthiasm@0	536 d2.name = "Tuned Log-Frequency Spectrum";
matthiasm@0	537 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
matthiasm@0	538 d2.unit = "";
matthiasm@0	539 d2.hasFixedBinCount = true;
matthiasm@0	540 d2.binCount = 256;
matthiasm@0	541 d2.hasKnownExtents = false;
matthiasm@0	542 d2.isQuantized = false;
matthiasm@0	543 d2.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0	544 d2.hasDuration = false;
matthiasm@0	545 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	546 list.push_back(d2);
matthiasm@0	547
matthiasm@0	548 OutputDescriptor d3;
matthiasm@0	549 d3.identifier = "semitonespectrum";
matthiasm@0	550 d3.name = "Semitone Spectrum";
matthiasm@0	551 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
matthiasm@0	552 d3.unit = "";
matthiasm@0	553 d3.hasFixedBinCount = true;
matthiasm@0	554 d3.binCount = 84;
matthiasm@0	555 d3.hasKnownExtents = false;
matthiasm@0	556 d3.isQuantized = false;
matthiasm@0	557 d3.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0	558 d3.hasDuration = false;
matthiasm@0	559 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	560 list.push_back(d3);
matthiasm@0	561
matthiasm@0	562 OutputDescriptor d4;
matthiasm@0	563 d4.identifier = "chroma";
matthiasm@0	564 d4.name = "Chromagram";
matthiasm@0	565 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
matthiasm@0	566 d4.unit = "";
matthiasm@0	567 d4.hasFixedBinCount = true;
matthiasm@0	568 d4.binCount = 12;
matthiasm@0	569 d4.binNames = chromanames;
matthiasm@0	570 d4.hasKnownExtents = false;
matthiasm@0	571 d4.isQuantized = false;
matthiasm@0	572 d4.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0	573 d4.hasDuration = false;
matthiasm@0	574 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	575 list.push_back(d4);
matthiasm@0	576
matthiasm@0	577 OutputDescriptor d5;
matthiasm@0	578 d5.identifier = "basschroma";
matthiasm@0	579 d5.name = "Bass Chromagram";
matthiasm@0	580 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
matthiasm@0	581 d5.unit = "";
matthiasm@0	582 d5.hasFixedBinCount = true;
matthiasm@0	583 d5.binCount = 12;
matthiasm@0	584 d5.binNames = chromanames;
matthiasm@0	585 d5.hasKnownExtents = false;
matthiasm@0	586 d5.isQuantized = false;
matthiasm@0	587 d5.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0	588 d5.hasDuration = false;
matthiasm@0	589 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	590 list.push_back(d5);
matthiasm@0	591
matthiasm@0	592 OutputDescriptor d6;
matthiasm@0	593 d6.identifier = "bothchroma";
matthiasm@0	594 d6.name = "Chromagram and Bass Chromagram";
matthiasm@0	595 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
matthiasm@0	596 d6.unit = "";
matthiasm@0	597 d6.hasFixedBinCount = true;
matthiasm@0	598 d6.binCount = 24;
matthiasm@0	599 d6.binNames = bothchromanames;
matthiasm@0	600 d6.hasKnownExtents = false;
matthiasm@0	601 d6.isQuantized = false;
matthiasm@0	602 d6.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0	603 d6.hasDuration = false;
matthiasm@0	604 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	605 list.push_back(d6);
matthiasm@0	606
matthiasm@0	607 OutputDescriptor d7;
matthiasm@0	608 d7.identifier = "simplechord";
matthiasm@0	609 d7.name = "Simple Chord Estimate";
matthiasm@0	610 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
matthiasm@0	611 d7.unit = "";
matthiasm@0	612 d7.hasFixedBinCount = true;
matthiasm@0	613 d7.binCount = 0;
matthiasm@0	614 d7.hasKnownExtents = false;
matthiasm@0	615 d7.isQuantized = false;
matthiasm@0	616 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0	617 d7.hasDuration = false;
matthiasm@0	618 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0	619 list.push_back(d7);
matthiasm@0	620
matthiasm@1	621 // OutputDescriptor d8;
matthiasm@1	622 // d8.identifier = "inconsistency";
matthiasm@1	623 // d8.name = "Harmonic inconsistency value";
matthiasm@1	624 // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high.";
matthiasm@1	625 // d8.unit = "";
matthiasm@1	626 // d8.hasFixedBinCount = true;
matthiasm@1	627 // d8.binCount = 1;
matthiasm@1	628 // d8.hasKnownExtents = false;
matthiasm@1	629 // d8.isQuantized = false;
matthiasm@1	630 // d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@1	631 // d8.hasDuration = false;
matthiasm@1	632 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1	633 // list.push_back(d8);
matthiasm@1	634 //
matthiasm@1	635 // OutputDescriptor d9;
matthiasm@1	636 // d9.identifier = "inconsistencysegment";
matthiasm@1	637 // d9.name = "Harmonic inconsistency segmenter";
matthiasm@1	638 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
matthiasm@1	639 // d9.unit = "";
matthiasm@1	640 // d9.hasFixedBinCount = true;
matthiasm@1	641 // d9.binCount = 0;
matthiasm@1	642 // d9.hasKnownExtents = true;
matthiasm@1	643 // d9.minValue = 0.1;
matthiasm@1	644 // d9.maxValue = 0.9;
matthiasm@1	645 // d9.isQuantized = false;
matthiasm@1	646 // d9.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@1	647 // d9.hasDuration = false;
matthiasm@1	648 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1	649 // list.push_back(d9);
matthiasm@1	650 //
matthiasm@1	651 OutputDescriptor d10;
matthiasm@1	652 d10.identifier = "localtuning";
matthiasm@1	653 d10.name = "Local tuning";
matthiasm@1	654 d10.description = "";
matthiasm@1	655 d10.unit = "Hz";
matthiasm@1	656 d10.hasFixedBinCount = true;
matthiasm@1	657 d10.binCount = 1;
matthiasm@1	658 d10.hasKnownExtents = true;
matthiasm@1	659 d10.minValue = 427.47;
matthiasm@1	660 d10.maxValue = 452.89;
matthiasm@1	661 d10.isQuantized = false;
matthiasm@3	662 d10.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@1	663 d10.hasDuration = false;
matthiasm@3	664 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1	665 list.push_back(d10);
matthiasm@1	666
matthiasm@0	667 return list;
matthiasm@0	668 }
matthiasm@0	669
matthiasm@0	670
matthiasm@0	671 bool
matthiasm@0	672 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0	673 {
matthiasm@1	674 if (debug_on) {
matthiasm@1	675 cerr << "--> initialise";
matthiasm@1	676 }
matthiasm@1	677
matthiasm@0	678 if (channels < getMinChannelCount() \|\|
matthiasm@0	679 channels > getMaxChannelCount()) return false;
matthiasm@0	680 m_blockSize = blockSize;
matthiasm@0	681 m_stepSize = stepSize;
matthiasm@0	682 frameCount = 0;
matthiasm@0	683 int tempn = 256 * m_blockSize/2;
matthiasm@1	684 cerr << "length of tempkernel : " << tempn << endl;
matthiasm@1	685 float *tempkernel;
matthiasm@1	686
matthiasm@1	687 tempkernel = new float[tempn];
matthiasm@1	688
matthiasm@0	689 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
matthiasm@1	690 m_kernelValue.clear();
matthiasm@1	691 m_kernelFftIndex.clear();
matthiasm@1	692 m_kernelNoteIndex.clear();
matthiasm@1	693 int countNonzero = 0;
matthiasm@0	694 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
matthiasm@1	695 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
matthiasm@1	696 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
matthiasm@1	697 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
matthiasm@0	698 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
matthiasm@1	699 countNonzero++;
matthiasm@0	700 }
matthiasm@1	701 m_kernelFftIndex.push_back(iFFT);
matthiasm@1	702 m_kernelNoteIndex.push_back(iNote);
matthiasm@0	703 }
matthiasm@0	704 }
matthiasm@1	705 }
matthiasm@1	706 cerr << "nonzero count : " << countNonzero << endl;
matthiasm@1	707 delete [] tempkernel;
matthiasm@3	708 ofstream myfile;
matthiasm@3	709 myfile.open ("matrix.txt");
matthiasm@3	710 // myfile << "Writing this to a file.\n";
matthiasm@3	711 for (int i = 0; i < nNote * 84; ++i) {
matthiasm@3	712 myfile << m_dict[i] << endl;
matthiasm@3	713 }
matthiasm@3	714 myfile.close();
matthiasm@0	715 return true;
matthiasm@0	716 }
matthiasm@0	717
matthiasm@0	718 void
matthiasm@0	719 NNLSChroma::reset()
matthiasm@0	720 {
matthiasm@0	721 if (debug_on) cerr << "--> reset";
matthiasm@0	722 // Clear buffers, reset stored values, etc
matthiasm@0	723 frameCount = 0;
matthiasm@0	724 m_dictID = 0;
matthiasm@1	725 m_kernelValue.clear();
matthiasm@1	726 m_kernelFftIndex.clear();
matthiasm@1	727 m_kernelNoteIndex.clear();
matthiasm@0	728 }
matthiasm@0	729
matthiasm@0	730 NNLSChroma::FeatureSet
matthiasm@0	731 NNLSChroma::process(const float const inputBuffers, Vamp::RealTime timestamp)
matthiasm@0	732 {
matthiasm@0	733 if (debug_on) cerr << "--> process" << endl;
matthiasm@0	734 // int nNote = 84; // TODO: this should be globally set and/or depend on the kernel matrix
matthiasm@0	735
matthiasm@0	736 frameCount++;
matthiasm@0	737 float *magnitude = new float[m_blockSize/2];
matthiasm@0	738
matthiasm@0	739 Feature f10; // local tuning
matthiasm@3	740 f10.hasTimestamp = true;
matthiasm@3	741 f10.timestamp = timestamp - Vamp::RealTime::fromSeconds(0);
matthiasm@0	742 const float *fbuf = inputBuffers[0];
matthiasm@0	743
matthiasm@0	744 // make magnitude
matthiasm@0	745 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
matthiasm@0	746 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
matthiasm@0	747 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
matthiasm@1	748 // magnitude[iBin] = (iBin == frameCount - 1 \|\| frameCount < 2) ? 1.0 : 0.0;
matthiasm@0	749 }
matthiasm@0	750
matthiasm@0	751
matthiasm@0	752 // note magnitude mapping using pre-calculated matrix
matthiasm@0	753 float *nm = new float[nNote]; // note magnitude
matthiasm@0	754 for (size_t iNote = 0; iNote < nNote; iNote++) {
matthiasm@0	755 nm[iNote] = 0; // initialise as 0
matthiasm@0	756 }
matthiasm@0	757 int binCount = 0;
matthiasm@0	758 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
matthiasm@0	759 // cerr << ".";
matthiasm@1	760 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
matthiasm@1	761 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
matthiasm@0	762 binCount++;
matthiasm@0	763 }
matthiasm@1	764 // cerr << nm[20];
matthiasm@1	765 // cerr << endl;
matthiasm@0	766
matthiasm@0	767
matthiasm@0	768 float one_over_N = 1.0/frameCount;
matthiasm@0	769 // update means of complex tuning variables
matthiasm@0	770 m_meanTuning0 = float(frameCount-1)one_over_N;
matthiasm@0	771 m_meanTuning1 = float(frameCount-1)one_over_N;
matthiasm@0	772 m_meanTuning2 = float(frameCount-1)one_over_N;
matthiasm@0	773
matthiasm@0	774 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
matthiasm@0	775 m_meanTuning0 += nm[iTone + 0]*one_over_N;
matthiasm@0	776 m_meanTuning1 += nm[iTone + 1]*one_over_N;
matthiasm@0	777 m_meanTuning2 += nm[iTone + 2]*one_over_N;
matthiasm@3	778 float ratioOld = 0.997;
matthiasm@3	779 m_localTuning0 = ratioOld; m_localTuning0 += nm[iTone + 0] (1 - ratioOld);
matthiasm@3	780 m_localTuning1 = ratioOld; m_localTuning1 += nm[iTone + 1] (1 - ratioOld);
matthiasm@3	781 m_localTuning2 = ratioOld; m_localTuning2 += nm[iTone + 2] (1 - ratioOld);
matthiasm@0	782 }
matthiasm@0	783
matthiasm@0	784 // if (m_tuneLocal) {
matthiasm@0	785 // local tuning
matthiasm@0	786 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
matthiasm@0	787 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
matthiasm@0	788 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
matthiasm@0	789 m_localTuning.push_back(normalisedtuning);
matthiasm@0	790 float tuning440 = 440 * pow(2,normalisedtuning/12);
matthiasm@0	791 f10.values.push_back(tuning440);
matthiasm@3	792 // cerr << tuning440 << endl;
matthiasm@0	793 // }
matthiasm@0	794
matthiasm@0	795 Feature f1; // logfreqspec
matthiasm@0	796 f1.hasTimestamp = true;
matthiasm@0	797 f1.timestamp = timestamp;
matthiasm@0	798 for (size_t iNote = 0; iNote < nNote; iNote++) {
matthiasm@0	799 f1.values.push_back(nm[iNote]);
matthiasm@0	800 }
matthiasm@0	801
matthiasm@0	802 FeatureSet fs;
matthiasm@0	803 fs[1].push_back(f1);
matthiasm@3	804 fs[8].push_back(f10);
matthiasm@0	805
matthiasm@0	806 // deletes
matthiasm@0	807 delete[] magnitude;
matthiasm@0	808 delete[] nm;
matthiasm@0	809
matthiasm@0	810 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
matthiasm@0	811 return fs;
matthiasm@0	812 }
matthiasm@0	813
matthiasm@0	814 NNLSChroma::FeatureSet
matthiasm@0	815 NNLSChroma::getRemainingFeatures()
matthiasm@0	816 {
matthiasm@0	817 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
matthiasm@0	818 FeatureSet fsOut;
matthiasm@0	819 //
matthiasm@1	820 /** Calculate Tuning
matthiasm@1	821 calculate tuning from (using the angle of the complex number defined by the
matthiasm@1	822 cumulative mean real and imag values)
matthiasm@1	823 **/
matthiasm@1	824 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
matthiasm@1	825 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
matthiasm@1	826 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
matthiasm@1	827 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
matthiasm@1	828 int intShift = floor(normalisedtuning * 3);
matthiasm@1	829 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1	830
matthiasm@1	831 char buffer0 [50];
matthiasm@1	832
matthiasm@1	833 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1	834
matthiasm@1	835 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1	836
matthiasm@1	837 // push tuning to FeatureSet fsOut
matthiasm@1	838 Feature f0; // tuning
matthiasm@1	839 f0.hasTimestamp = true;
matthiasm@1	840 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
matthiasm@1	841 f0.label = buffer0;
matthiasm@1	842 fsOut[0].push_back(f0);
matthiasm@1	843
matthiasm@1	844 /** Tune Log-Frequency Spectrogram
matthiasm@1	845 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
matthiasm@1	846 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
matthiasm@1	847 **/
matthiasm@1	848
matthiasm@1	849 float tempValue = 0;
matthiasm@1	850 float dbThreshold = 0; // relative to the background spectrum
matthiasm@1	851 float thresh = pow(10,dbThreshold/20);
matthiasm@1	852 // cerr << "tune local ? " << m_tuneLocal << endl;
matthiasm@1	853 int count = 0;
matthiasm@1	854
matthiasm@1	855 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
matthiasm@1	856 Feature f1 = *i;
matthiasm@1	857 Feature f2; // tuned log-frequency spectrum
matthiasm@1	858 f2.hasTimestamp = true;
matthiasm@1	859 f2.timestamp = f1.timestamp;
matthiasm@1	860 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1	861
matthiasm@1	862 if (m_tuneLocal) {
matthiasm@1	863 intShift = floor(m_localTuning[count] * 3);
matthiasm@1	864 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1	865 }
matthiasm@1	866
matthiasm@1	867 // cerr << intShift << " " << intFactor << endl;
matthiasm@1	868
matthiasm@1	869 for (int k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
matthiasm@1	870 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
matthiasm@1	871 f2.values.push_back(tempValue);
matthiasm@1	872 }
matthiasm@1	873
matthiasm@1	874 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
matthiasm@1	875 vector<float> runningmean = SpecialConvolution(f2.values,hw);
matthiasm@1	876 vector<float> runningstd;
matthiasm@1	877 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
matthiasm@1	878 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
matthiasm@1	879 }
matthiasm@1	880 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
matthiasm@1	881 for (int i = 0; i < 256; i++) {
matthiasm@1	882 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
matthiasm@1	883 if (runningstd[i] > 0) {
matthiasm@1	884 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
matthiasm@1	885 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
matthiasm@1	886 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
matthiasm@1	887 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
matthiasm@1	888 }
matthiasm@1	889 if (f2.values[i] < 0) {
matthiasm@1	890 cerr << "ERROR: negative value in logfreq spectrum" << endl;
matthiasm@1	891 }
matthiasm@1	892 }
matthiasm@1	893 fsOut[2].push_back(f2);
matthiasm@1	894 count++;
matthiasm@1	895 }
matthiasm@1	896
matthiasm@1	897 /** Semitone spectrum and chromagrams
matthiasm@1	898 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
matthiasm@1	899 is inferred using a non-negative least squares algorithm.
matthiasm@1	900 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
matthiasm@1	901 bass and treble stacked onto each other).
matthiasm@1	902 **/
matthiasm@1	903 // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n);
matthiasm@1	904
matthiasm@1	905 vector<vector<float> > chordogram;
matthiasm@3	906 vector<vector<int> > scoreChordogram;
matthiasm@1	907 vector<float> oldchroma = vector<float>(12,0);
matthiasm@1	908 vector<float> oldbasschroma = vector<float>(12,0);
matthiasm@1	909 count = 0;
matthiasm@1	910
matthiasm@1	911 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
matthiasm@1	912 Feature f2 = *it; // logfreq spectrum
matthiasm@1	913 Feature f3; // semitone spectrum
matthiasm@1	914 Feature f4; // treble chromagram
matthiasm@1	915 Feature f5; // bass chromagram
matthiasm@1	916 Feature f6; // treble and bass chromagram
matthiasm@1	917
matthiasm@1	918 f3.hasTimestamp = true;
matthiasm@1	919 f3.timestamp = f2.timestamp;
matthiasm@1	920
matthiasm@1	921 f4.hasTimestamp = true;
matthiasm@1	922 f4.timestamp = f2.timestamp;
matthiasm@1	923
matthiasm@1	924 f5.hasTimestamp = true;
matthiasm@1	925 f5.timestamp = f2.timestamp;
matthiasm@1	926
matthiasm@1	927 f6.hasTimestamp = true;
matthiasm@1	928 f6.timestamp = f2.timestamp;
matthiasm@1	929
matthiasm@3	930 float b[256];
matthiasm@1	931
matthiasm@1	932 bool some_b_greater_zero = false;
matthiasm@3	933 float sumb = 0;
matthiasm@1	934 for (int i = 0; i < 256; i++) {
matthiasm@3	935 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
matthiasm@3	936 b[i] = f2.values[i];
matthiasm@3	937 sumb += b[i];
matthiasm@1	938 if (b[i] > 0) {
matthiasm@1	939 some_b_greater_zero = true;
matthiasm@1	940 }
matthiasm@1	941 }
matthiasm@1	942
matthiasm@1	943 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1	944
matthiasm@1	945 vector<float> chroma = vector<float>(12, 0);
matthiasm@1	946 vector<float> basschroma = vector<float>(12, 0);
matthiasm@1	947 float currval;
matthiasm@1	948 unsigned iSemitone = 0;
matthiasm@1	949
matthiasm@1	950 if (some_b_greater_zero) {
matthiasm@3	951 if (m_dictID == 1) {
matthiasm@1	952 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
matthiasm@1	953 currval = 0;
matthiasm@3	954 currval += b[iNote + 1 + -1] * 0.5;
matthiasm@3	955 currval += b[iNote + 1 + 0] * 1.0;
matthiasm@3	956 currval += b[iNote + 1 + 1] * 0.5;
matthiasm@1	957 f3.values.push_back(currval);
matthiasm@1	958 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
matthiasm@1	959 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
matthiasm@1	960 iSemitone++;
matthiasm@1	961 }
matthiasm@1	962
matthiasm@1	963 } else {
matthiasm@3	964 float x[84+1000];
matthiasm@3	965 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
matthiasm@3	966 // for (int i = 0; i < 84; ++i) {
matthiasm@3	967 // x[i] = b[3*i+3];
matthiasm@3	968 // }
matthiasm@3	969 float rnorm;
matthiasm@3	970 float w[84+1000];
matthiasm@3	971 float zz[84+1000];
matthiasm@3	972 int indx[84+1000];
matthiasm@1	973 int mode;
matthiasm@3	974 float curr_dict[256*84];
matthiasm@3	975 for (unsigned i = 0; i < 256 * 84; ++i) {
matthiasm@3	976 curr_dict[i] = 1.0 * m_dict[i];
matthiasm@3	977 }
matthiasm@3	978 nnls(curr_dict, nNote, nNote, 84, b, x, &rnorm, w, zz, indx, &mode);
matthiasm@3	979 for (unsigned iNote = 0; iNote < 84; ++iNote) {
matthiasm@3	980 // for (unsigned kNote = 0; kNote < 256; ++kNote) {
matthiasm@3	981 // x[iNote] += m_dict[kNote + nNote * iNote] * b[kNote];
matthiasm@3	982 // }
matthiasm@3	983 f3.values.push_back(x[iNote]);
matthiasm@3	984 // cerr << mode << endl;
matthiasm@3	985 chroma[iNote % 12] += x[iNote] * treblewindow[iNote];
matthiasm@3	986 basschroma[iNote % 12] += x[iNote] * basswindow[iNote];
matthiasm@3	987 // iSemitone++;
matthiasm@3	988 }
matthiasm@1	989 }
matthiasm@1	990 }
matthiasm@1	991
matthiasm@1	992 f4.values = chroma;
matthiasm@1	993 f5.values = basschroma;
matthiasm@1	994 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@1	995 f6.values = chroma;
matthiasm@1	996
matthiasm@1	997 // local chord estimation
matthiasm@1	998 vector<float> currentChordSalience;
matthiasm@1	999 float tempchordvalue = 0;
matthiasm@1	1000 float sumchordvalue = 0;
matthiasm@1	1001 int nChord = nChorddict / 24;
matthiasm@1	1002 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@1	1003 tempchordvalue = 0;
matthiasm@1	1004 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@1	1005 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
matthiasm@1	1006 }
matthiasm@1	1007 for (int iBin = 12; iBin < 24; iBin++) {
matthiasm@1	1008 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
matthiasm@1	1009 }
matthiasm@1	1010 sumchordvalue+=tempchordvalue;
matthiasm@1	1011 currentChordSalience.push_back(tempchordvalue);
matthiasm@1	1012 }
matthiasm@1	1013 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@1	1014 currentChordSalience[iChord] /= sumchordvalue;
matthiasm@1	1015 }
matthiasm@1	1016 chordogram.push_back(currentChordSalience);
matthiasm@1	1017
matthiasm@1	1018 fsOut[3].push_back(f3);
matthiasm@1	1019 fsOut[4].push_back(f4);
matthiasm@1	1020 fsOut[5].push_back(f5);
matthiasm@1	1021 fsOut[6].push_back(f6);
matthiasm@1	1022 count++;
matthiasm@1	1023 }
matthiasm@0	1024 // int musicitykernelwidth = (50 * 2048) / m_stepSize;
matthiasm@0	1025 //
matthiasm@3	1026 /* Simple chord estimation
matthiasm@3	1027 I just take the local chord estimates ("currentChordSalience") and average them over time, then
matthiasm@3	1028 take the maximum. Very simple, don't do this at home...
matthiasm@3	1029 */
matthiasm@3	1030 count = 0;
matthiasm@3	1031 int halfwindowlength = m_inputSampleRate / m_stepSize;
matthiasm@3	1032 int nChord = nChorddict / 24;
matthiasm@3	1033 vector<int> chordSequence;
matthiasm@3	1034 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
matthiasm@3	1035 vector<int> temp = vector<int>(nChord,0);
matthiasm@3	1036 scoreChordogram.push_back(temp);
matthiasm@3	1037 }
matthiasm@3	1038 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end()-2*halfwindowlength-1; ++it) {
matthiasm@3	1039 int startIndex = count + 1;
matthiasm@3	1040 int endIndex = count + 2 * halfwindowlength;
matthiasm@3	1041 vector<float> temp = vector<float>(nChord,0);
matthiasm@3	1042 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@3	1043 float maxindex = nChord-1; //... and the index thereof
matthiasm@3	1044 unsigned bestchordL = 0; // index of the best "left" chord
matthiasm@3	1045 unsigned bestchordR = 0; // index of the best "right" chord
matthiasm@3	1046 for (unsigned iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
matthiasm@3	1047 // now find the max values on both sides of iWF
matthiasm@3	1048 // left side:
matthiasm@3	1049 float maxL = 0;
matthiasm@3	1050 unsigned maxindL = nChord-1;
matthiasm@3	1051 for (unsigned iChord = 0; iChord < nChord; iChord++) {
matthiasm@3	1052 float currsum = 0;
matthiasm@3	1053 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
matthiasm@3	1054 currsum += chordogram[count+iFrame][iChord];
matthiasm@3	1055 }
matthiasm@3	1056 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@3	1057 if (currsum > maxL) {
matthiasm@3	1058 maxL = currsum;
matthiasm@3	1059 maxindL = iChord;
matthiasm@3	1060 }
matthiasm@3	1061 }
matthiasm@3	1062 // right side:
matthiasm@3	1063 float maxR = 0;
matthiasm@3	1064 unsigned maxindR = nChord-1;
matthiasm@3	1065 for (unsigned iChord = 0; iChord < nChord; iChord++) {
matthiasm@3	1066 float currsum = 0;
matthiasm@3	1067 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@3	1068 currsum += chordogram[count+iFrame][iChord];
matthiasm@3	1069 }
matthiasm@3	1070 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@3	1071 if (currsum > maxR) {
matthiasm@3	1072 maxR = currsum;
matthiasm@3	1073 maxindR = iChord;
matthiasm@3	1074 }
matthiasm@3	1075 }
matthiasm@3	1076 if (maxL+maxR > maxval) {
matthiasm@3	1077 maxval = maxL+maxR;
matthiasm@3	1078 maxindex = iWF;
matthiasm@3	1079 bestchordL = maxindL;
matthiasm@3	1080 bestchordR = maxindR;
matthiasm@3	1081 }
matthiasm@3	1082
matthiasm@3	1083 }
matthiasm@3	1084 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
matthiasm@3	1085 // add a score to every chord-frame-point that was part of a maximum
matthiasm@3	1086 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
matthiasm@3	1087 scoreChordogram[iFrame+count][bestchordL]++;
matthiasm@3	1088 }
matthiasm@3	1089 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@3	1090 scoreChordogram[iFrame+count][bestchordR]++;
matthiasm@3	1091 }
matthiasm@3	1092 count++;
matthiasm@3	1093 }
matthiasm@3	1094
matthiasm@3	1095 count = 0;
matthiasm@3	1096 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
matthiasm@3	1097 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@3	1098 float maxindex = 0; //... and the index thereof
matthiasm@3	1099 for (unsigned iChord = 0; iChord < nChord; iChord++) {
matthiasm@3	1100 if (scoreChordogram[count][iChord] > maxval) {
matthiasm@3	1101 maxval = scoreChordogram[count][iChord];
matthiasm@3	1102 maxindex = iChord;
matthiasm@3	1103 cerr << iChord << endl;
matthiasm@3	1104 }
matthiasm@3	1105 }
matthiasm@3	1106 chordSequence.push_back(maxindex);
matthiasm@3	1107 cerr << "before modefilter, maxindex: " << maxindex << endl;
matthiasm@3	1108 count++;
matthiasm@3	1109 }
matthiasm@3	1110
matthiasm@3	1111
matthiasm@3	1112 // mode filter on chordSequence
matthiasm@3	1113 count = 0;
matthiasm@3	1114 int oldChordIndex = -1;
matthiasm@3	1115 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
matthiasm@3	1116 Feature f6 = *it;
matthiasm@3	1117 Feature f7; // chord estimate
matthiasm@3	1118 f7.hasTimestamp = true;
matthiasm@3	1119 f7.timestamp = f6.timestamp;
matthiasm@3	1120 vector<int> chordCount = vector<int>(nChord,0);
matthiasm@3	1121 int maxChordCount = 0;
matthiasm@3	1122 int maxChordIndex = nChord-1;
matthiasm@3	1123 // int startIndex = max(count - halfwindowlength,0);
matthiasm@3	1124 // int endIndex = min(int(chordogram.size()), startIndex + halfwindowlength);
matthiasm@3	1125 // for (int i = startIndex; i < endIndex; i++) {
matthiasm@3	1126 // chordCount[chordSequence[i]]++;
matthiasm@3	1127 // if (chordCount[chordSequence[i]] > maxChordCount) {
matthiasm@3	1128 // maxChordCount++;
matthiasm@3	1129 // maxChordIndex = chordSequence[i];
matthiasm@3	1130 // }
matthiasm@3	1131 // }
matthiasm@3	1132 maxChordIndex = chordSequence[count];
matthiasm@3	1133 if (oldChordIndex != maxChordIndex) {
matthiasm@3	1134 oldChordIndex = maxChordIndex;
matthiasm@3	1135
matthiasm@3	1136 char buffer1 [50];
matthiasm@3	1137 if (maxChordIndex < nChord - 1) {
matthiasm@3	1138 sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
matthiasm@3	1139 } else {
matthiasm@3	1140 sprintf(buffer1, "N");
matthiasm@3	1141 }
matthiasm@3	1142 f7.label = buffer1;
matthiasm@3	1143 fsOut[7].push_back(f7);
matthiasm@3	1144 }
matthiasm@3	1145 count++;
matthiasm@3	1146 }
matthiasm@0	1147 // // musicity
matthiasm@0	1148 // count = 0;
matthiasm@0	1149 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
matthiasm@0	1150 // vector<float> musicityValue;
matthiasm@0	1151 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
matthiasm@0	1152 // Feature f4 = *it;
matthiasm@0	1153 //
matthiasm@0	1154 // int startIndex = max(count - musicitykernelwidth/2,0);
matthiasm@0	1155 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
matthiasm@0	1156 // float chromasum = 0;
matthiasm@0	1157 // float diffsum = 0;
matthiasm@0	1158 // for (int k = 0; k < 12; k++) {
matthiasm@0	1159 // for (int i = startIndex + 1; i < endIndex; i++) {
matthiasm@0	1160 // chromasum += pow(fsOut[4][i].values[k],2);
matthiasm@0	1161 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
matthiasm@0	1162 // }
matthiasm@0	1163 // }
matthiasm@0	1164 // diffsum /= chromasum;
matthiasm@0	1165 // musicityValue.push_back(diffsum);
matthiasm@0	1166 // count++;
matthiasm@0	1167 // }
matthiasm@0	1168 //
matthiasm@0	1169 // float musicityThreshold = 0.44;
matthiasm@0	1170 // if (m_stepSize == 4096) {
matthiasm@0	1171 // musicityThreshold = 0.74;
matthiasm@0	1172 // }
matthiasm@0	1173 // if (m_stepSize == 4410) {
matthiasm@0	1174 // musicityThreshold = 0.77;
matthiasm@0	1175 // }
matthiasm@0	1176 //
matthiasm@0	1177 // count = 0;
matthiasm@0	1178 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
matthiasm@0	1179 // Feature f4 = *it;
matthiasm@0	1180 // Feature f8; // musicity
matthiasm@0	1181 // Feature f9; // musicity segmenter
matthiasm@0	1182 //
matthiasm@0	1183 // f8.hasTimestamp = true;
matthiasm@0	1184 // f8.timestamp = f4.timestamp;
matthiasm@0	1185 // f9.hasTimestamp = true;
matthiasm@0	1186 // f9.timestamp = f4.timestamp;
matthiasm@0	1187 //
matthiasm@0	1188 // int startIndex = max(count - musicitykernelwidth/2,0);
matthiasm@0	1189 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
matthiasm@0	1190 // int musicityCount = 0;
matthiasm@0	1191 // for (int i = startIndex; i <= endIndex; i++) {
matthiasm@0	1192 // if (musicityValue[i] > musicityThreshold) musicityCount++;
matthiasm@0	1193 // }
matthiasm@0	1194 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
matthiasm@0	1195 //
matthiasm@0	1196 // if (isSpeech) {
matthiasm@0	1197 // if (oldlabeltype != 2) {
matthiasm@0	1198 // f9.label = "Speech";
matthiasm@0	1199 // fsOut[9].push_back(f9);
matthiasm@0	1200 // oldlabeltype = 2;
matthiasm@0	1201 // }
matthiasm@0	1202 // } else {
matthiasm@0	1203 // if (oldlabeltype != 1) {
matthiasm@0	1204 // f9.label = "Music";
matthiasm@0	1205 // fsOut[9].push_back(f9);
matthiasm@0	1206 // oldlabeltype = 1;
matthiasm@0	1207 // }
matthiasm@0	1208 // }
matthiasm@0	1209 // f8.values.push_back(musicityValue[count]);
matthiasm@0	1210 // fsOut[8].push_back(f8);
matthiasm@0	1211 // count++;
matthiasm@0	1212 // }
matthiasm@0	1213 return fsOut;
matthiasm@0	1214
matthiasm@0	1215 }
matthiasm@0	1216

Mercurial > hg > nnls-chroma

annotate NNLSChroma.cpp @ 3:8360483a026e matthiasm-plugin