| matthiasm@0 | 1 | 
| matthiasm@0 | 2 #include "NNLSChroma.h" | 
| matthiasm@0 | 3 #include <cmath> | 
| matthiasm@0 | 4 #include <list> | 
| matthiasm@0 | 5 #include <iostream> | 
| matthiasm@3 | 6 #include <fstream> | 
| matthiasm@0 | 7 #include <sstream> | 
| matthiasm@0 | 8 #include <cassert> | 
| matthiasm@0 | 9 #include <cstdio> | 
| matthiasm@1 | 10 #include "nnls.h" | 
| matthiasm@0 | 11 // #include "cblas.h" | 
| matthiasm@0 | 12 #include "chorddict.cpp" | 
| matthiasm@0 | 13 using namespace std; | 
| matthiasm@0 | 14 | 
| matthiasm@0 | 15 const float sinvalue = 0.866025404; | 
| matthiasm@0 | 16 const float cosvalue = -0.5; | 
| matthiasm@0 | 17 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082}; | 
| matthiasm@0 | 18 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000}; | 
| matthiasm@0 | 19 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350}; | 
| matthiasm@0 | 20 const char* notenames[24] = {"A  (bass)","Bb (bass)","B  (bass)","C  (bass)","C# (bass)","D  (bass)","Eb (bass)","E  (bass)","F  (bass)","F# (bass)","G  (bass)","Ab (bass)", | 
| matthiasm@0 | 21 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"}; | 
| matthiasm@0 | 22 const vector<float> hw(hammingwind, hammingwind+19); | 
| matthiasm@0 | 23 const int nNote = 256; | 
| matthiasm@0 | 24 | 
| matthiasm@0 | 25 /** Special Convolution | 
| matthiasm@0 | 26 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the | 
| matthiasm@0 | 27 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values | 
| matthiasm@0 | 28 as the first (last) valid convolution bin. | 
| matthiasm@0 | 29 **/ | 
| matthiasm@0 | 30 | 
| matthiasm@0 | 31 const bool debug_on = false; | 
| matthiasm@0 | 32 | 
| matthiasm@0 | 33 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel) | 
| matthiasm@0 | 34 { | 
| matthiasm@0 | 35     float s; | 
| matthiasm@0 | 36     int m, n; | 
| matthiasm@0 | 37     int lenConvolvee = convolvee.size(); | 
| matthiasm@0 | 38     int lenKernel = kernel.size(); | 
| matthiasm@0 | 39 | 
| matthiasm@0 | 40     vector<float> Z(256,0); | 
| matthiasm@0 | 41     assert(lenKernel % 2 != 0); // no exception handling !!! | 
| matthiasm@0 | 42 | 
| matthiasm@0 | 43     for (n = lenKernel - 1; n < lenConvolvee; n++) { | 
| matthiasm@0 | 44     	s=0.0; | 
| matthiasm@0 | 45     	for (m = 0; m < lenKernel; m++) { | 
| matthiasm@0 | 46             // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n'; | 
| matthiasm@0 | 47             s += convolvee[n-m] * kernel[m]; | 
| matthiasm@0 | 48             // if (debug_on) cerr << "--> s = " << s << '\n'; | 
| matthiasm@0 | 49     	} | 
| matthiasm@0 | 50         // cerr << n - lenKernel/2 << endl; | 
| matthiasm@0 | 51         Z[n -lenKernel/2] = s; | 
| matthiasm@0 | 52     } | 
| matthiasm@0 | 53 | 
| matthiasm@0 | 54     // fill upper and lower pads | 
| matthiasm@0 | 55     for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2]; | 
| matthiasm@0 | 56     for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] = | 
| matthiasm@0 | 57         Z[lenConvolvee - lenKernel/2 -  1]; | 
| matthiasm@0 | 58     return Z; | 
| matthiasm@0 | 59 } | 
| matthiasm@0 | 60 | 
| matthiasm@0 | 61 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize) | 
| matthiasm@0 | 62 // { | 
| matthiasm@0 | 63 // 	vector<float> freq(binnumbers.size, 0.0); | 
| matthiasm@0 | 64 // 	for (unsigned i = 0; i < binnumbers.size; ++i) { | 
| matthiasm@0 | 65 // 		freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize; | 
| matthiasm@0 | 66 // 	} | 
| matthiasm@0 | 67 // 	return freq; | 
| matthiasm@0 | 68 // } | 
| matthiasm@0 | 69 | 
| matthiasm@0 | 70 float cospuls(float x, float centre, float width) | 
| matthiasm@0 | 71 { | 
| matthiasm@0 | 72 	float recipwidth = 1.0/width; | 
| matthiasm@0 | 73 	if (abs(x - centre) <= 0.5 * width) { | 
| matthiasm@0 | 74 		return cos((x-centre)*2*M_PI*recipwidth)*.5+.5; | 
| matthiasm@0 | 75 	} | 
| matthiasm@0 | 76 	return 0.0; | 
| matthiasm@0 | 77 } | 
| matthiasm@0 | 78 | 
| matthiasm@0 | 79 float pitchCospuls(float x, float centre, int binsperoctave) | 
| matthiasm@0 | 80 { | 
| matthiasm@0 | 81 	float warpedf = -binsperoctave * (log2(centre) - log2(x)); | 
| matthiasm@0 | 82 	float out = cospuls(warpedf, 0.0, 2.0); | 
| matthiasm@0 | 83 	// now scale to correct for note density | 
| matthiasm@0 | 84 	float c = log(2.0)/binsperoctave; | 
| matthiasm@0 | 85 	if (x > 0) { | 
| matthiasm@0 | 86 		out = out / (c * x); | 
| matthiasm@0 | 87 	} else { | 
| matthiasm@0 | 88 		out = 0; | 
| matthiasm@0 | 89 	} | 
| matthiasm@0 | 90 	return out; | 
| matthiasm@0 | 91 } | 
| matthiasm@0 | 92 | 
| matthiasm@0 | 93 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) { | 
| matthiasm@0 | 94 | 
| matthiasm@0 | 95 	int binspersemitone = 3; // this must be 3 | 
| matthiasm@0 | 96 	int minoctave = 0; // this must be 0 | 
| matthiasm@0 | 97 	int maxoctave = 7; // this must be 7 | 
| matthiasm@1 | 98 	int oversampling = 80; | 
| matthiasm@0 | 99 | 
| matthiasm@0 | 100 	// linear frequency vector | 
| matthiasm@0 | 101 	vector<float> fft_f; | 
| matthiasm@0 | 102 	for (int i = 0; i < blocksize/2; ++i) { | 
| matthiasm@0 | 103 		fft_f.push_back(i * (fs * 1.0 / blocksize)); | 
| matthiasm@0 | 104 	} | 
| matthiasm@0 | 105 	float fft_width = fs * 2.0 / blocksize; | 
| matthiasm@0 | 106 | 
| matthiasm@0 | 107 	// linear oversampled frequency vector | 
| matthiasm@0 | 108 	vector<float> oversampled_f; | 
| matthiasm@0 | 109 	for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) { | 
| matthiasm@0 | 110 		oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling)); | 
| matthiasm@0 | 111 	} | 
| matthiasm@0 | 112 | 
| matthiasm@0 | 113 	// pitch-spaced frequency vector | 
| matthiasm@0 | 114 	int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! | 
| matthiasm@0 | 115 	int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! | 
| matthiasm@0 | 116 	vector<float> cq_f; | 
| matthiasm@0 | 117 	float oob = 1.0/binspersemitone; // one over binspersemitone | 
| matthiasm@0 | 118 	cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 | 
| matthiasm@0 | 119 	cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); | 
| matthiasm@0 | 120 	for (int i = minMIDI + 1; i < maxMIDI; ++i) { | 
| matthiasm@0 | 121 		for (int k = -1; k < 2; ++k)	 { | 
| matthiasm@0 | 122 			cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); | 
| matthiasm@0 | 123 		} | 
| matthiasm@0 | 124 	} | 
| matthiasm@0 | 125 	cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); | 
| matthiasm@0 | 126 	cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); | 
| matthiasm@0 | 127 | 
| matthiasm@0 | 128 	int nFFT = fft_f.size(); | 
| matthiasm@0 | 129 | 
| matthiasm@0 | 130 	vector<float> fft_activation; | 
| matthiasm@0 | 131 	for (int iOS = 0; iOS < 2 * oversampling; ++iOS) { | 
| matthiasm@0 | 132 		float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width); | 
| matthiasm@0 | 133 		fft_activation.push_back(cosp); | 
| matthiasm@0 | 134 		// cerr << cosp << endl; | 
| matthiasm@0 | 135 	} | 
| matthiasm@0 | 136 | 
| matthiasm@0 | 137 	float cq_activation; | 
| matthiasm@0 | 138 	for (int iFFT = 1; iFFT < nFFT; ++iFFT) { | 
| matthiasm@0 | 139 		// find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency) | 
| matthiasm@0 | 140 		int curr_start = oversampling * iFFT - oversampling; | 
| matthiasm@0 | 141 		int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here | 
| matthiasm@0 | 142 		// cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl; | 
| matthiasm@0 | 143 		for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) { | 
| matthiasm@0 | 144 			outmatrix[iFFT + nFFT * iCQ] = 0; | 
| matthiasm@1 | 145 			if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood | 
| matthiasm@0 | 146 				for (int iOS = curr_start; iOS < curr_end; ++iOS) { | 
| matthiasm@0 | 147 					cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12); | 
| matthiasm@0 | 148 					// cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl; | 
| matthiasm@0 | 149 					outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start]; | 
| matthiasm@0 | 150 				} | 
| matthiasm@0 | 151 				// if (iCQ == 1 || iCQ == 2) { | 
| matthiasm@0 | 152 				// 	cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl; | 
| matthiasm@0 | 153 				// } | 
| matthiasm@0 | 154 			} | 
| matthiasm@0 | 155 		} | 
| matthiasm@0 | 156 	} | 
| matthiasm@0 | 157 	return true; | 
| matthiasm@0 | 158 } | 
| matthiasm@0 | 159 | 
| matthiasm@3 | 160 bool dictionaryMatrix(float* dm) { | 
| matthiasm@1 | 161 	int binspersemitone = 3; // this must be 3 | 
| matthiasm@1 | 162 	int minoctave = 0; // this must be 0 | 
| matthiasm@1 | 163 	int maxoctave = 7; // this must be 7 | 
| matthiasm@4 | 164 	float s_param = 0.7; | 
| matthiasm@1 | 165 | 
| matthiasm@1 | 166 	// pitch-spaced frequency vector | 
| matthiasm@1 | 167 	int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! | 
| matthiasm@1 | 168 	int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! | 
| matthiasm@1 | 169 	vector<float> cq_f; | 
| matthiasm@1 | 170 	float oob = 1.0/binspersemitone; // one over binspersemitone | 
| matthiasm@1 | 171 	cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 | 
| matthiasm@1 | 172 	cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); | 
| matthiasm@1 | 173 	for (int i = minMIDI + 1; i < maxMIDI; ++i) { | 
| matthiasm@1 | 174 		for (int k = -1; k < 2; ++k)	 { | 
| matthiasm@1 | 175 			cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); | 
| matthiasm@1 | 176 		} | 
| matthiasm@1 | 177 	} | 
| matthiasm@1 | 178 	cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); | 
| matthiasm@1 | 179 	cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); | 
| matthiasm@1 | 180 | 
| matthiasm@1 | 181 	float curr_f; | 
| matthiasm@1 | 182 	float floatbin; | 
| matthiasm@1 | 183 	float curr_amp; | 
| matthiasm@1 | 184 	// now for every combination calculate the matrix element | 
| matthiasm@1 | 185 	for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) { | 
| matthiasm@3 | 186 		// cerr << iOut << endl; | 
| matthiasm@1 | 187 		for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) { | 
| matthiasm@1 | 188 			curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm; | 
| matthiasm@3 | 189 			// if (curr_f > cq_f[nNote-1])  break; | 
| matthiasm@3 | 190 			floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm); | 
| matthiasm@3 | 191 			// cerr << floatbin << endl; | 
| matthiasm@1 | 192 			curr_amp = pow(s_param,float(iHarm-1)); | 
| matthiasm@3 | 193 			// cerr << "curramp" << curr_amp << endl; | 
| matthiasm@1 | 194 			for (unsigned iNote = 0; iNote < nNote; ++iNote) { | 
| matthiasm@3 | 195 				if (abs(iNote+1.0-floatbin)<2) { | 
| matthiasm@3 | 196 					dm[iNote  + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp; | 
| matthiasm@3 | 197 					// dm[iNote + nNote * iOut] += 1 * curr_amp; | 
| matthiasm@3 | 198 				} | 
| matthiasm@1 | 199 			} | 
| matthiasm@3 | 200 		} | 
| matthiasm@1 | 201 	} | 
| matthiasm@3 | 202 | 
| matthiasm@3 | 203 | 
| matthiasm@1 | 204 } | 
| matthiasm@1 | 205 | 
| matthiasm@0 | 206 | 
| matthiasm@0 | 207 NNLSChroma::NNLSChroma(float inputSampleRate) : | 
| matthiasm@0 | 208   Plugin(inputSampleRate), | 
| matthiasm@0 | 209   m_fl(0), | 
| matthiasm@0 | 210   m_blockSize(0), | 
| matthiasm@0 | 211   m_stepSize(0), | 
| matthiasm@0 | 212   m_lengthOfNoteIndex(0), | 
| matthiasm@0 | 213   m_meanTuning0(0), | 
| matthiasm@0 | 214   m_meanTuning1(0), | 
| matthiasm@0 | 215   m_meanTuning2(0), | 
| matthiasm@0 | 216   m_localTuning0(0), | 
| matthiasm@0 | 217   m_localTuning1(0), | 
| matthiasm@0 | 218   m_localTuning2(0), | 
| matthiasm@4 | 219   m_paling(1.0), | 
| matthiasm@3 | 220   m_preset(0.0), | 
| matthiasm@0 | 221   m_localTuning(0), | 
| matthiasm@0 | 222   m_kernelValue(0), | 
| matthiasm@0 | 223   m_kernelFftIndex(0), | 
| matthiasm@0 | 224   m_kernelNoteIndex(0), | 
| matthiasm@1 | 225   m_dict(0), | 
| matthiasm@0 | 226   m_tuneLocal(false), | 
| matthiasm@0 | 227   m_dictID(0) | 
| matthiasm@0 | 228 { | 
| matthiasm@0 | 229 	if (debug_on) cerr << "--> NNLSChroma" << endl; | 
| matthiasm@3 | 230 	m_dict = new float[nNote * 84]; | 
| matthiasm@3 | 231 	for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0; | 
| matthiasm@1 | 232 	dictionaryMatrix(m_dict); | 
| matthiasm@0 | 233 } | 
| matthiasm@0 | 234 | 
| matthiasm@0 | 235 | 
| matthiasm@0 | 236 NNLSChroma::~NNLSChroma() | 
| matthiasm@0 | 237 { | 
| matthiasm@0 | 238 		if (debug_on) cerr << "--> ~NNLSChroma" << endl; | 
| matthiasm@1 | 239 		delete [] m_dict; | 
| matthiasm@0 | 240 } | 
| matthiasm@0 | 241 | 
| matthiasm@0 | 242 string | 
| matthiasm@0 | 243 NNLSChroma::getIdentifier() const | 
| matthiasm@0 | 244 { | 
| matthiasm@0 | 245 	if (debug_on) cerr << "--> getIdentifier" << endl; | 
| matthiasm@0 | 246     return "nnls_chroma"; | 
| matthiasm@0 | 247 } | 
| matthiasm@0 | 248 | 
| matthiasm@0 | 249 string | 
| matthiasm@0 | 250 NNLSChroma::getName() const | 
| matthiasm@0 | 251 { | 
| matthiasm@0 | 252 		if (debug_on) cerr << "--> getName" << endl; | 
| matthiasm@0 | 253     return "NNLS Chroma"; | 
| matthiasm@0 | 254 } | 
| matthiasm@0 | 255 | 
| matthiasm@0 | 256 string | 
| matthiasm@0 | 257 NNLSChroma::getDescription() const | 
| matthiasm@0 | 258 { | 
| matthiasm@0 | 259     // Return something helpful here! | 
| matthiasm@0 | 260 	if (debug_on) cerr << "--> getDescription" << endl; | 
| matthiasm@4 | 261     return "This plugin provides a number of features derived from a log-frequency amplitude spectrum (LAS) of the DFT: the LAS itself, a standard-tuned version thereof (the local and global tuning estimates can are also be output), an approximate transcription to semitone activation using non-linear least squares (NNLS). Furthermore chroma features and a simple chord estimate derived from this NNLS semitone transcription."; | 
| matthiasm@0 | 262 } | 
| matthiasm@0 | 263 | 
| matthiasm@0 | 264 string | 
| matthiasm@0 | 265 NNLSChroma::getMaker() const | 
| matthiasm@0 | 266 { | 
| matthiasm@0 | 267 		if (debug_on) cerr << "--> getMaker" << endl; | 
| matthiasm@0 | 268     // Your name here | 
| matthiasm@0 | 269     return "Matthias Mauch"; | 
| matthiasm@0 | 270 } | 
| matthiasm@0 | 271 | 
| matthiasm@0 | 272 int | 
| matthiasm@0 | 273 NNLSChroma::getPluginVersion() const | 
| matthiasm@0 | 274 { | 
| matthiasm@0 | 275 		if (debug_on) cerr << "--> getPluginVersion" << endl; | 
| matthiasm@0 | 276     // Increment this each time you release a version that behaves | 
| matthiasm@0 | 277     // differently from the previous one | 
| matthiasm@0 | 278     return 1; | 
| matthiasm@0 | 279 } | 
| matthiasm@0 | 280 | 
| matthiasm@0 | 281 string | 
| matthiasm@0 | 282 NNLSChroma::getCopyright() const | 
| matthiasm@0 | 283 { | 
| matthiasm@0 | 284 		if (debug_on) cerr << "--> getCopyright" << endl; | 
| matthiasm@0 | 285     // This function is not ideally named.  It does not necessarily | 
| matthiasm@0 | 286     // need to say who made the plugin -- getMaker does that -- but it | 
| matthiasm@0 | 287     // should indicate the terms under which it is distributed.  For | 
| matthiasm@0 | 288     // example, "Copyright (year). All Rights Reserved", or "GPL" | 
| matthiasm@0 | 289     return "Copyright (2010). All rights reserved."; | 
| matthiasm@0 | 290 } | 
| matthiasm@0 | 291 | 
| matthiasm@0 | 292 NNLSChroma::InputDomain | 
| matthiasm@0 | 293 NNLSChroma::getInputDomain() const | 
| matthiasm@0 | 294 { | 
| matthiasm@0 | 295 		if (debug_on) cerr << "--> getInputDomain" << endl; | 
| matthiasm@0 | 296     return FrequencyDomain; | 
| matthiasm@0 | 297 } | 
| matthiasm@0 | 298 | 
| matthiasm@0 | 299 size_t | 
| matthiasm@0 | 300 NNLSChroma::getPreferredBlockSize() const | 
| matthiasm@0 | 301 { | 
| matthiasm@0 | 302 		if (debug_on) cerr << "--> getPreferredBlockSize" << endl; | 
| matthiasm@0 | 303     return 16384; // 0 means "I can handle any block size" | 
| matthiasm@0 | 304 } | 
| matthiasm@0 | 305 | 
| matthiasm@0 | 306 size_t | 
| matthiasm@0 | 307 NNLSChroma::getPreferredStepSize() const | 
| matthiasm@0 | 308 { | 
| matthiasm@0 | 309 		if (debug_on) cerr << "--> getPreferredStepSize" << endl; | 
| matthiasm@0 | 310     return 2048; // 0 means "anything sensible"; in practice this | 
| matthiasm@0 | 311               // means the same as the block size for TimeDomain | 
| matthiasm@0 | 312               // plugins, or half of it for FrequencyDomain plugins | 
| matthiasm@0 | 313 } | 
| matthiasm@0 | 314 | 
| matthiasm@0 | 315 size_t | 
| matthiasm@0 | 316 NNLSChroma::getMinChannelCount() const | 
| matthiasm@0 | 317 { | 
| matthiasm@0 | 318 	if (debug_on) cerr << "--> getMinChannelCount" << endl; | 
| matthiasm@0 | 319     return 1; | 
| matthiasm@0 | 320 } | 
| matthiasm@0 | 321 | 
| matthiasm@0 | 322 size_t | 
| matthiasm@0 | 323 NNLSChroma::getMaxChannelCount() const | 
| matthiasm@0 | 324 { | 
| matthiasm@0 | 325 		if (debug_on) cerr << "--> getMaxChannelCount" << endl; | 
| matthiasm@0 | 326     return 1; | 
| matthiasm@0 | 327 } | 
| matthiasm@0 | 328 | 
| matthiasm@0 | 329 NNLSChroma::ParameterList | 
| matthiasm@0 | 330 NNLSChroma::getParameterDescriptors() const | 
| matthiasm@0 | 331 { | 
| matthiasm@0 | 332 		if (debug_on) cerr << "--> getParameterDescriptors" << endl; | 
| matthiasm@0 | 333     ParameterList list; | 
| matthiasm@0 | 334 | 
| matthiasm@3 | 335     ParameterDescriptor d3; | 
| matthiasm@3 | 336     d3.identifier = "preset"; | 
| matthiasm@3 | 337     d3.name = "preset"; | 
| matthiasm@3 | 338     d3.description = "Spectral paling: no paling - 0; whitening - 1."; | 
| matthiasm@3 | 339     d3.unit = ""; | 
| matthiasm@3 | 340 	d3.isQuantized = true; | 
| matthiasm@3 | 341 	d3.quantizeStep = 1; | 
| matthiasm@3 | 342     d3.minValue = 0.0; | 
| matthiasm@4 | 343     d3.maxValue = 3.0; | 
| matthiasm@3 | 344     d3.defaultValue = 0.0; | 
| matthiasm@3 | 345     d3.valueNames.push_back("polyphonic pop"); | 
| matthiasm@3 | 346 	d3.valueNames.push_back("polyphonic pop (fast)"); | 
| matthiasm@3 | 347     d3.valueNames.push_back("solo keyboard"); | 
| matthiasm@3 | 348 	d3.valueNames.push_back("manual"); | 
| matthiasm@3 | 349     list.push_back(d3); | 
| matthiasm@4 | 350 | 
| matthiasm@4 | 351     // ParameterDescriptor d0; | 
| matthiasm@4 | 352     //  d0.identifier = "notedict"; | 
| matthiasm@4 | 353     //  d0.name = "note dictionary"; | 
| matthiasm@4 | 354     //  d0.description = "Notes in different note dictionaries differ by their spectral shapes."; | 
| matthiasm@4 | 355     //  d0.unit = ""; | 
| matthiasm@4 | 356     //  d0.minValue = 0; | 
| matthiasm@4 | 357     //  d0.maxValue = 1; | 
| matthiasm@4 | 358     //  d0.defaultValue = 0; | 
| matthiasm@4 | 359     //  d0.isQuantized = true; | 
| matthiasm@4 | 360     //  d0.valueNames.push_back("s = 0.6"); | 
| matthiasm@4 | 361     //  d0.valueNames.push_back("no NNLS"); | 
| matthiasm@4 | 362     //  d0.quantizeStep = 1.0; | 
| matthiasm@4 | 363     //  list.push_back(d0); | 
| matthiasm@4 | 364 | 
| matthiasm@4 | 365     ParameterDescriptor d1; | 
| matthiasm@4 | 366     d1.identifier = "tuningmode"; | 
| matthiasm@4 | 367     d1.name = "tuning mode"; | 
| matthiasm@4 | 368     d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing."; | 
| matthiasm@4 | 369     d1.unit = ""; | 
| matthiasm@4 | 370     d1.minValue = 0; | 
| matthiasm@4 | 371     d1.maxValue = 1; | 
| matthiasm@4 | 372     d1.defaultValue = 0; | 
| matthiasm@4 | 373     d1.isQuantized = true; | 
| matthiasm@4 | 374     d1.valueNames.push_back("global tuning"); | 
| matthiasm@4 | 375     d1.valueNames.push_back("local tuning"); | 
| matthiasm@4 | 376     d1.quantizeStep = 1.0; | 
| matthiasm@4 | 377     list.push_back(d1); | 
| matthiasm@4 | 378 | 
| matthiasm@4 | 379 	//     ParameterDescriptor d2; | 
| matthiasm@4 | 380 	//     d2.identifier = "paling"; | 
| matthiasm@4 | 381 	//     d2.name = "spectral paling"; | 
| matthiasm@4 | 382 	//     d2.description = "Spectral paling: no paling - 0; whitening - 1."; | 
| matthiasm@4 | 383 	//     d2.unit = ""; | 
| matthiasm@4 | 384 	// d2.isQuantized = true; | 
| matthiasm@4 | 385 	// // d2.quantizeStep = 0.1; | 
| matthiasm@4 | 386 	//     d2.minValue = 0.0; | 
| matthiasm@4 | 387 	//     d2.maxValue = 1.0; | 
| matthiasm@4 | 388 	//     d2.defaultValue = 1.0; | 
| matthiasm@4 | 389 	//     d2.isQuantized = false; | 
| matthiasm@4 | 390 	//     list.push_back(d2); | 
| matthiasm@4 | 391 | 
| matthiasm@0 | 392     return list; | 
| matthiasm@0 | 393 } | 
| matthiasm@0 | 394 | 
| matthiasm@0 | 395 float | 
| matthiasm@0 | 396 NNLSChroma::getParameter(string identifier) const | 
| matthiasm@0 | 397 { | 
| matthiasm@3 | 398 	if (debug_on) cerr << "--> getParameter" << endl; | 
| matthiasm@0 | 399     if (identifier == "notedict") { | 
| matthiasm@0 | 400         return m_dictID; | 
| matthiasm@0 | 401     } | 
| matthiasm@0 | 402 | 
| matthiasm@0 | 403     if (identifier == "paling") { | 
| matthiasm@0 | 404         return m_paling; | 
| matthiasm@0 | 405     } | 
| matthiasm@0 | 406 | 
| matthiasm@0 | 407     if (identifier == "tuningmode") { | 
| matthiasm@0 | 408         if (m_tuneLocal) { | 
| matthiasm@0 | 409             return 1.0; | 
| matthiasm@0 | 410         } else { | 
| matthiasm@0 | 411             return 0.0; | 
| matthiasm@0 | 412         } | 
| matthiasm@0 | 413     } | 
| matthiasm@3 | 414 	if (identifier == "preset") { | 
| matthiasm@3 | 415 		return m_preset; | 
| matthiasm@3 | 416     } | 
| matthiasm@0 | 417     return 0; | 
| matthiasm@0 | 418 | 
| matthiasm@0 | 419 } | 
| matthiasm@0 | 420 | 
| matthiasm@0 | 421 void | 
| matthiasm@0 | 422 NNLSChroma::setParameter(string identifier, float value) | 
| matthiasm@0 | 423 { | 
| matthiasm@3 | 424 	if (debug_on) cerr << "--> setParameter" << endl; | 
| matthiasm@0 | 425     if (identifier == "notedict") { | 
| matthiasm@0 | 426         m_dictID = (int) value; | 
| matthiasm@0 | 427     } | 
| matthiasm@0 | 428 | 
| matthiasm@0 | 429     if (identifier == "paling") { | 
| matthiasm@0 | 430         m_paling = value; | 
| matthiasm@0 | 431     } | 
| matthiasm@0 | 432 | 
| matthiasm@0 | 433     if (identifier == "tuningmode") { | 
| matthiasm@0 | 434         m_tuneLocal = (value > 0) ? true : false; | 
| matthiasm@0 | 435         // cerr << "m_tuneLocal :" << m_tuneLocal << endl; | 
| matthiasm@0 | 436     } | 
| matthiasm@3 | 437     if (identifier == "preset") { | 
| matthiasm@3 | 438         m_preset = value; | 
| matthiasm@3 | 439 		if (m_preset == 0.0) { | 
| matthiasm@3 | 440 			m_tuneLocal = false; | 
| matthiasm@3 | 441 			m_paling = 1.0; | 
| matthiasm@3 | 442 			m_dictID = 0.0; | 
| matthiasm@3 | 443 		} | 
| matthiasm@3 | 444 		if (m_preset == 1.0) { | 
| matthiasm@3 | 445 			m_tuneLocal = false; | 
| matthiasm@3 | 446 			m_paling = 1.0; | 
| matthiasm@3 | 447 			m_dictID = 1.0; | 
| matthiasm@3 | 448 		} | 
| matthiasm@3 | 449 		if (m_preset == 2.0) { | 
| matthiasm@3 | 450 			m_tuneLocal = false; | 
| matthiasm@3 | 451 			m_paling = 0.7; | 
| matthiasm@3 | 452 			m_dictID = 0.0; | 
| matthiasm@3 | 453 		} | 
| matthiasm@3 | 454     } | 
| matthiasm@0 | 455 } | 
| matthiasm@0 | 456 | 
| matthiasm@0 | 457 NNLSChroma::ProgramList | 
| matthiasm@0 | 458 NNLSChroma::getPrograms() const | 
| matthiasm@0 | 459 { | 
| matthiasm@0 | 460 		if (debug_on) cerr << "--> getPrograms" << endl; | 
| matthiasm@0 | 461     ProgramList list; | 
| matthiasm@0 | 462 | 
| matthiasm@0 | 463     // If you have no programs, return an empty list (or simply don't | 
| matthiasm@0 | 464     // implement this function or getCurrentProgram/selectProgram) | 
| matthiasm@0 | 465 | 
| matthiasm@0 | 466     return list; | 
| matthiasm@0 | 467 } | 
| matthiasm@0 | 468 | 
| matthiasm@0 | 469 string | 
| matthiasm@0 | 470 NNLSChroma::getCurrentProgram() const | 
| matthiasm@0 | 471 { | 
| matthiasm@0 | 472 		if (debug_on) cerr << "--> getCurrentProgram" << endl; | 
| matthiasm@0 | 473     return ""; // no programs | 
| matthiasm@0 | 474 } | 
| matthiasm@0 | 475 | 
| matthiasm@0 | 476 void | 
| matthiasm@0 | 477 NNLSChroma::selectProgram(string name) | 
| matthiasm@0 | 478 { | 
| matthiasm@0 | 479 		if (debug_on) cerr << "--> selectProgram" << endl; | 
| matthiasm@0 | 480 } | 
| matthiasm@0 | 481 | 
| matthiasm@0 | 482 | 
| matthiasm@0 | 483 NNLSChroma::OutputList | 
| matthiasm@0 | 484 NNLSChroma::getOutputDescriptors() const | 
| matthiasm@0 | 485 { | 
| matthiasm@0 | 486 		if (debug_on) cerr << "--> getOutputDescriptors" << endl; | 
| matthiasm@0 | 487     OutputList list; | 
| matthiasm@0 | 488 | 
| matthiasm@0 | 489     // Make chroma names for the binNames property | 
| matthiasm@0 | 490     vector<string> chromanames; | 
| matthiasm@0 | 491     vector<string> bothchromanames; | 
| matthiasm@0 | 492     for (int iNote = 0; iNote < 24; iNote++) { | 
| matthiasm@0 | 493         bothchromanames.push_back(notenames[iNote]); | 
| matthiasm@0 | 494         if (iNote < 12) { | 
| matthiasm@0 | 495             chromanames.push_back(notenames[iNote]); | 
| matthiasm@0 | 496         } | 
| matthiasm@0 | 497     } | 
| matthiasm@0 | 498 | 
| matthiasm@1 | 499 	// int nNote = 84; | 
| matthiasm@0 | 500 | 
| matthiasm@0 | 501     // See OutputDescriptor documentation for the possibilities here. | 
| matthiasm@0 | 502     // Every plugin must have at least one output. | 
| matthiasm@0 | 503 | 
| matthiasm@0 | 504     OutputDescriptor d0; | 
| matthiasm@0 | 505     d0.identifier = "tuning"; | 
| matthiasm@0 | 506     d0.name = "Tuning"; | 
| matthiasm@0 | 507     d0.description = "The concert pitch."; | 
| matthiasm@0 | 508     d0.unit = "Hz"; | 
| matthiasm@0 | 509     d0.hasFixedBinCount = true; | 
| matthiasm@0 | 510     d0.binCount = 0; | 
| matthiasm@0 | 511     d0.hasKnownExtents = true; | 
| matthiasm@0 | 512 	d0.minValue = 427.47; | 
| matthiasm@0 | 513 	d0.maxValue = 452.89; | 
| matthiasm@0 | 514     d0.isQuantized = false; | 
| matthiasm@0 | 515     d0.sampleType = OutputDescriptor::VariableSampleRate; | 
| matthiasm@0 | 516     d0.hasDuration = false; | 
| matthiasm@0 | 517     list.push_back(d0); | 
| matthiasm@0 | 518 | 
| matthiasm@0 | 519 	OutputDescriptor d1; | 
| matthiasm@0 | 520     d1.identifier = "logfreqspec"; | 
| matthiasm@0 | 521     d1.name = "Log-Frequency Spectrum"; | 
| matthiasm@0 | 522     d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping."; | 
| matthiasm@0 | 523     d1.unit = ""; | 
| matthiasm@0 | 524     d1.hasFixedBinCount = true; | 
| matthiasm@0 | 525     d1.binCount = nNote; | 
| matthiasm@0 | 526     d1.hasKnownExtents = false; | 
| matthiasm@0 | 527     d1.isQuantized = false; | 
| matthiasm@0 | 528     d1.sampleType = OutputDescriptor::FixedSampleRate; | 
| matthiasm@0 | 529     d1.hasDuration = false; | 
| matthiasm@0 | 530     d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 
| matthiasm@0 | 531     list.push_back(d1); | 
| matthiasm@0 | 532 | 
| matthiasm@0 | 533 	OutputDescriptor d2; | 
| matthiasm@0 | 534     d2.identifier = "tunedlogfreqspec"; | 
| matthiasm@0 | 535     d2.name = "Tuned Log-Frequency Spectrum"; | 
| matthiasm@0 | 536     d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency."; | 
| matthiasm@0 | 537     d2.unit = ""; | 
| matthiasm@0 | 538     d2.hasFixedBinCount = true; | 
| matthiasm@0 | 539     d2.binCount = 256; | 
| matthiasm@0 | 540     d2.hasKnownExtents = false; | 
| matthiasm@0 | 541     d2.isQuantized = false; | 
| matthiasm@0 | 542     d2.sampleType = OutputDescriptor::FixedSampleRate; | 
| matthiasm@0 | 543     d2.hasDuration = false; | 
| matthiasm@0 | 544     d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 
| matthiasm@0 | 545     list.push_back(d2); | 
| matthiasm@0 | 546 | 
| matthiasm@0 | 547     OutputDescriptor d3; | 
| matthiasm@0 | 548     d3.identifier = "semitonespectrum"; | 
| matthiasm@0 | 549     d3.name = "Semitone Spectrum"; | 
| matthiasm@0 | 550     d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum."; | 
| matthiasm@0 | 551     d3.unit = ""; | 
| matthiasm@0 | 552     d3.hasFixedBinCount = true; | 
| matthiasm@0 | 553     d3.binCount = 84; | 
| matthiasm@0 | 554     d3.hasKnownExtents = false; | 
| matthiasm@0 | 555     d3.isQuantized = false; | 
| matthiasm@0 | 556     d3.sampleType = OutputDescriptor::FixedSampleRate; | 
| matthiasm@0 | 557     d3.hasDuration = false; | 
| matthiasm@0 | 558     d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 
| matthiasm@0 | 559     list.push_back(d3); | 
| matthiasm@0 | 560 | 
| matthiasm@0 | 561     OutputDescriptor d4; | 
| matthiasm@0 | 562     d4.identifier = "chroma"; | 
| matthiasm@0 | 563     d4.name = "Chromagram"; | 
| matthiasm@0 | 564     d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range."; | 
| matthiasm@0 | 565     d4.unit = ""; | 
| matthiasm@0 | 566     d4.hasFixedBinCount = true; | 
| matthiasm@0 | 567     d4.binCount = 12; | 
| matthiasm@0 | 568     d4.binNames = chromanames; | 
| matthiasm@0 | 569     d4.hasKnownExtents = false; | 
| matthiasm@0 | 570     d4.isQuantized = false; | 
| matthiasm@0 | 571     d4.sampleType = OutputDescriptor::FixedSampleRate; | 
| matthiasm@0 | 572     d4.hasDuration = false; | 
| matthiasm@0 | 573     d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 
| matthiasm@0 | 574     list.push_back(d4); | 
| matthiasm@0 | 575 | 
| matthiasm@0 | 576     OutputDescriptor d5; | 
| matthiasm@0 | 577     d5.identifier = "basschroma"; | 
| matthiasm@0 | 578     d5.name = "Bass Chromagram"; | 
| matthiasm@0 | 579     d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range."; | 
| matthiasm@0 | 580     d5.unit = ""; | 
| matthiasm@0 | 581     d5.hasFixedBinCount = true; | 
| matthiasm@0 | 582     d5.binCount = 12; | 
| matthiasm@0 | 583     d5.binNames = chromanames; | 
| matthiasm@0 | 584     d5.hasKnownExtents = false; | 
| matthiasm@0 | 585     d5.isQuantized = false; | 
| matthiasm@0 | 586     d5.sampleType = OutputDescriptor::FixedSampleRate; | 
| matthiasm@0 | 587     d5.hasDuration = false; | 
| matthiasm@0 | 588     d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 
| matthiasm@0 | 589     list.push_back(d5); | 
| matthiasm@0 | 590 | 
| matthiasm@0 | 591     OutputDescriptor d6; | 
| matthiasm@0 | 592     d6.identifier = "bothchroma"; | 
| matthiasm@0 | 593     d6.name = "Chromagram and Bass Chromagram"; | 
| matthiasm@0 | 594     d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription."; | 
| matthiasm@0 | 595     d6.unit = ""; | 
| matthiasm@0 | 596     d6.hasFixedBinCount = true; | 
| matthiasm@0 | 597     d6.binCount = 24; | 
| matthiasm@0 | 598     d6.binNames = bothchromanames; | 
| matthiasm@0 | 599     d6.hasKnownExtents = false; | 
| matthiasm@0 | 600     d6.isQuantized = false; | 
| matthiasm@0 | 601     d6.sampleType = OutputDescriptor::FixedSampleRate; | 
| matthiasm@0 | 602     d6.hasDuration = false; | 
| matthiasm@0 | 603     d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 
| matthiasm@0 | 604     list.push_back(d6); | 
| matthiasm@0 | 605 | 
| matthiasm@0 | 606     OutputDescriptor d7; | 
| matthiasm@0 | 607     d7.identifier = "simplechord"; | 
| matthiasm@0 | 608     d7.name = "Simple Chord Estimate"; | 
| matthiasm@0 | 609     d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma."; | 
| matthiasm@0 | 610     d7.unit = ""; | 
| matthiasm@0 | 611     d7.hasFixedBinCount = true; | 
| matthiasm@0 | 612     d7.binCount = 0; | 
| matthiasm@0 | 613     d7.hasKnownExtents = false; | 
| matthiasm@0 | 614     d7.isQuantized = false; | 
| matthiasm@0 | 615     d7.sampleType = OutputDescriptor::VariableSampleRate; | 
| matthiasm@0 | 616     d7.hasDuration = false; | 
| matthiasm@0 | 617     d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 
| matthiasm@0 | 618     list.push_back(d7); | 
| matthiasm@0 | 619 | 
| matthiasm@1 | 620   	//   OutputDescriptor d8; | 
| matthiasm@1 | 621   	//     d8.identifier = "inconsistency"; | 
| matthiasm@1 | 622   	//     d8.name = "Harmonic inconsistency value"; | 
| matthiasm@1 | 623   	//     d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high."; | 
| matthiasm@1 | 624   	//     d8.unit = ""; | 
| matthiasm@1 | 625   	//     d8.hasFixedBinCount = true; | 
| matthiasm@1 | 626   	//     d8.binCount = 1; | 
| matthiasm@1 | 627   	//     d8.hasKnownExtents = false; | 
| matthiasm@1 | 628   	//     d8.isQuantized = false; | 
| matthiasm@1 | 629   	//     d8.sampleType = OutputDescriptor::FixedSampleRate; | 
| matthiasm@1 | 630   	//     d8.hasDuration = false; | 
| matthiasm@1 | 631   	//     d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 
| matthiasm@1 | 632   	//     list.push_back(d8); | 
| matthiasm@1 | 633   	// | 
| matthiasm@1 | 634   	//     OutputDescriptor d9; | 
| matthiasm@1 | 635   	//     d9.identifier = "inconsistencysegment"; | 
| matthiasm@1 | 636   	//     d9.name = "Harmonic inconsistency segmenter"; | 
| matthiasm@1 | 637   	//     d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music."; | 
| matthiasm@1 | 638   	//     d9.unit = ""; | 
| matthiasm@1 | 639   	//     d9.hasFixedBinCount = true; | 
| matthiasm@1 | 640   	//     d9.binCount = 0; | 
| matthiasm@1 | 641   	//     d9.hasKnownExtents = true; | 
| matthiasm@1 | 642   	//     d9.minValue = 0.1; | 
| matthiasm@1 | 643   	// d9.maxValue = 0.9; | 
| matthiasm@1 | 644   	//     d9.isQuantized = false; | 
| matthiasm@1 | 645   	//     d9.sampleType = OutputDescriptor::VariableSampleRate; | 
| matthiasm@1 | 646   	//     d9.hasDuration = false; | 
| matthiasm@1 | 647   	//     d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 
| matthiasm@1 | 648   	//     list.push_back(d9); | 
| matthiasm@1 | 649   	// | 
| matthiasm@1 | 650   	OutputDescriptor d10; | 
| matthiasm@1 | 651   	    d10.identifier = "localtuning"; | 
| matthiasm@1 | 652   	    d10.name = "Local tuning"; | 
| matthiasm@4 | 653   	    d10.description = "Tuning based on the history up to this timestamp."; | 
| matthiasm@1 | 654   	    d10.unit = "Hz"; | 
| matthiasm@1 | 655   	    d10.hasFixedBinCount = true; | 
| matthiasm@1 | 656   	    d10.binCount = 1; | 
| matthiasm@1 | 657   	    d10.hasKnownExtents = true; | 
| matthiasm@1 | 658 		d10.minValue = 427.47; | 
| matthiasm@1 | 659   		d10.maxValue = 452.89; | 
| matthiasm@1 | 660   	    d10.isQuantized = false; | 
| matthiasm@3 | 661   	    d10.sampleType = OutputDescriptor::FixedSampleRate; | 
| matthiasm@1 | 662   	    d10.hasDuration = false; | 
| matthiasm@3 | 663   	    // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 
| matthiasm@1 | 664   	    list.push_back(d10); | 
| matthiasm@1 | 665 | 
| matthiasm@0 | 666     return list; | 
| matthiasm@0 | 667 } | 
| matthiasm@0 | 668 | 
| matthiasm@0 | 669 | 
| matthiasm@0 | 670 bool | 
| matthiasm@0 | 671 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize) | 
| matthiasm@0 | 672 { | 
| matthiasm@1 | 673 	if (debug_on) { | 
| matthiasm@1 | 674 		cerr << "--> initialise"; | 
| matthiasm@1 | 675 	} | 
| matthiasm@1 | 676 | 
| matthiasm@0 | 677     if (channels < getMinChannelCount() || | 
| matthiasm@0 | 678 	channels > getMaxChannelCount()) return false; | 
| matthiasm@0 | 679     m_blockSize = blockSize; | 
| matthiasm@0 | 680     m_stepSize = stepSize; | 
| matthiasm@0 | 681     frameCount = 0; | 
| matthiasm@0 | 682 	int tempn = 256 * m_blockSize/2; | 
| matthiasm@4 | 683 	// cerr << "length of tempkernel : " <<  tempn << endl; | 
| matthiasm@1 | 684 	float *tempkernel; | 
| matthiasm@1 | 685 | 
| matthiasm@1 | 686 	tempkernel = new float[tempn]; | 
| matthiasm@1 | 687 | 
| matthiasm@0 | 688 	logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel); | 
| matthiasm@1 | 689 	m_kernelValue.clear(); | 
| matthiasm@1 | 690 	m_kernelFftIndex.clear(); | 
| matthiasm@1 | 691 	m_kernelNoteIndex.clear(); | 
| matthiasm@1 | 692 	int countNonzero = 0; | 
| matthiasm@0 | 693 	for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix | 
| matthiasm@1 | 694 		for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) { | 
| matthiasm@1 | 695 			if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { | 
| matthiasm@1 | 696 				m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]); | 
| matthiasm@0 | 697 				if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { | 
| matthiasm@1 | 698 					countNonzero++; | 
| matthiasm@0 | 699 				} | 
| matthiasm@1 | 700 				m_kernelFftIndex.push_back(iFFT); | 
| matthiasm@1 | 701 				m_kernelNoteIndex.push_back(iNote); | 
| matthiasm@0 | 702 			} | 
| matthiasm@0 | 703 		} | 
| matthiasm@1 | 704 	} | 
| matthiasm@4 | 705 	// cerr << "nonzero count : " << countNonzero << endl; | 
| matthiasm@1 | 706 	delete [] tempkernel; | 
| matthiasm@3 | 707 	ofstream myfile; | 
| matthiasm@3 | 708 	myfile.open ("matrix.txt"); | 
| matthiasm@3 | 709     // myfile << "Writing this to a file.\n"; | 
| matthiasm@3 | 710 	for (int i = 0; i < nNote * 84; ++i) { | 
| matthiasm@3 | 711 		myfile << m_dict[i] << endl; | 
| matthiasm@3 | 712 	} | 
| matthiasm@3 | 713     myfile.close(); | 
| matthiasm@0 | 714     return true; | 
| matthiasm@0 | 715 } | 
| matthiasm@0 | 716 | 
| matthiasm@0 | 717 void | 
| matthiasm@0 | 718 NNLSChroma::reset() | 
| matthiasm@0 | 719 { | 
| matthiasm@4 | 720 	if (debug_on) cerr << "--> reset"; | 
| matthiasm@4 | 721 | 
| matthiasm@0 | 722     // Clear buffers, reset stored values, etc | 
| matthiasm@4 | 723 	frameCount = 0; | 
| matthiasm@4 | 724 	m_dictID = 0; | 
| matthiasm@4 | 725 	m_fl.clear(); | 
| matthiasm@4 | 726 	m_meanTuning0 = 0; | 
| matthiasm@4 | 727 	m_meanTuning1 = 0; | 
| matthiasm@4 | 728 	m_meanTuning2 = 0; | 
| matthiasm@4 | 729 	m_localTuning0 = 0; | 
| matthiasm@4 | 730 	m_localTuning1 = 0; | 
| matthiasm@4 | 731 	m_localTuning2 = 0; | 
| matthiasm@4 | 732 	m_localTuning.clear(); | 
| matthiasm@0 | 733 } | 
| matthiasm@0 | 734 | 
| matthiasm@0 | 735 NNLSChroma::FeatureSet | 
| matthiasm@0 | 736 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp) | 
| matthiasm@0 | 737 { | 
| matthiasm@4 | 738 	if (debug_on) cerr << "--> process" << endl; | 
| matthiasm@4 | 739 | 
| matthiasm@0 | 740 	frameCount++; | 
| matthiasm@0 | 741 	float *magnitude = new float[m_blockSize/2]; | 
| matthiasm@0 | 742 | 
| matthiasm@0 | 743 	Feature f10; // local tuning | 
| matthiasm@3 | 744 	f10.hasTimestamp = true; | 
| matthiasm@4 | 745 	f10.timestamp = timestamp; | 
| matthiasm@0 | 746 	const float *fbuf = inputBuffers[0]; | 
| matthiasm@0 | 747 | 
| matthiasm@0 | 748 	// make magnitude | 
| matthiasm@0 | 749 	for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { | 
| matthiasm@0 | 750 		magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] + | 
| matthiasm@0 | 751 			fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]); | 
| matthiasm@0 | 752 	} | 
| matthiasm@4 | 753 | 
| matthiasm@0 | 754 	// note magnitude mapping using pre-calculated matrix | 
| matthiasm@0 | 755 	float *nm  = new float[nNote]; // note magnitude | 
| matthiasm@0 | 756 	for (size_t iNote = 0; iNote < nNote; iNote++) { | 
| matthiasm@0 | 757 		nm[iNote] = 0; // initialise as 0 | 
| matthiasm@0 | 758 	} | 
| matthiasm@0 | 759 	int binCount = 0; | 
| matthiasm@0 | 760 	for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) { | 
| matthiasm@0 | 761 		// cerr << "."; | 
| matthiasm@1 | 762 		nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount]; | 
| matthiasm@1 | 763 		// cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl; | 
| matthiasm@0 | 764 		binCount++; | 
| matthiasm@0 | 765 	} | 
| matthiasm@1 | 766 	// cerr << nm[20]; | 
| matthiasm@1 | 767 	// cerr << endl; | 
| matthiasm@0 | 768 | 
| matthiasm@0 | 769 | 
| matthiasm@0 | 770     float one_over_N = 1.0/frameCount; | 
| matthiasm@0 | 771     // update means of complex tuning variables | 
| matthiasm@0 | 772     m_meanTuning0 *= float(frameCount-1)*one_over_N; | 
| matthiasm@0 | 773     m_meanTuning1 *= float(frameCount-1)*one_over_N; | 
| matthiasm@0 | 774     m_meanTuning2 *= float(frameCount-1)*one_over_N; | 
| matthiasm@0 | 775 | 
| matthiasm@0 | 776     for (int iTone = 0; iTone < 160; iTone = iTone + 3) { | 
| matthiasm@0 | 777         m_meanTuning0 += nm[iTone + 0]*one_over_N; | 
| matthiasm@0 | 778     	m_meanTuning1 += nm[iTone + 1]*one_over_N; | 
| matthiasm@0 | 779     	m_meanTuning2 += nm[iTone + 2]*one_over_N; | 
| matthiasm@3 | 780 		float ratioOld = 0.997; | 
| matthiasm@3 | 781         m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld); | 
| matthiasm@3 | 782         m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld); | 
| matthiasm@3 | 783         m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld); | 
| matthiasm@0 | 784     } | 
| matthiasm@0 | 785 | 
| matthiasm@0 | 786     // if (m_tuneLocal) { | 
| matthiasm@0 | 787     	// local tuning | 
| matthiasm@0 | 788         float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2; | 
| matthiasm@0 | 789         float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2; | 
| matthiasm@0 | 790         float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI); | 
| matthiasm@0 | 791         m_localTuning.push_back(normalisedtuning); | 
| matthiasm@0 | 792         float tuning440 = 440 * pow(2,normalisedtuning/12); | 
| matthiasm@0 | 793         f10.values.push_back(tuning440); | 
| matthiasm@3 | 794 		// cerr << tuning440 << endl; | 
| matthiasm@0 | 795     // } | 
| matthiasm@0 | 796 | 
| matthiasm@0 | 797 	Feature f1; // logfreqspec | 
| matthiasm@0 | 798 	f1.hasTimestamp = true; | 
| matthiasm@0 | 799     f1.timestamp = timestamp; | 
| matthiasm@0 | 800 	for (size_t iNote = 0; iNote < nNote; iNote++) { | 
| matthiasm@0 | 801 		f1.values.push_back(nm[iNote]); | 
| matthiasm@0 | 802 	} | 
| matthiasm@0 | 803 | 
| matthiasm@0 | 804 	FeatureSet fs; | 
| matthiasm@0 | 805 	fs[1].push_back(f1); | 
| matthiasm@3 | 806     fs[8].push_back(f10); | 
| matthiasm@0 | 807 | 
| matthiasm@0 | 808     // deletes | 
| matthiasm@0 | 809     delete[] magnitude; | 
| matthiasm@0 | 810     delete[] nm; | 
| matthiasm@0 | 811 | 
| matthiasm@0 | 812     m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures | 
| matthiasm@0 | 813 	return fs; | 
| matthiasm@0 | 814 } | 
| matthiasm@0 | 815 | 
| matthiasm@0 | 816 NNLSChroma::FeatureSet | 
| matthiasm@0 | 817 NNLSChroma::getRemainingFeatures() | 
| matthiasm@0 | 818 { | 
| matthiasm@4 | 819 	if (debug_on) cerr << "--> getRemainingFeatures" << endl; | 
| matthiasm@4 | 820 	FeatureSet fsOut; | 
| matthiasm@4 | 821 	if (m_fl.size() == 0) return fsOut; | 
| matthiasm@0 | 822 	// | 
| matthiasm@1 | 823 	/**  Calculate Tuning | 
| matthiasm@1 | 824 		calculate tuning from (using the angle of the complex number defined by the | 
| matthiasm@1 | 825 		cumulative mean real and imag values) | 
| matthiasm@1 | 826 		**/ | 
| matthiasm@1 | 827 		float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2; | 
| matthiasm@1 | 828 		    float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2; | 
| matthiasm@1 | 829 		    float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); | 
| matthiasm@1 | 830 		    float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); | 
| matthiasm@1 | 831 		    int intShift = floor(normalisedtuning * 3); | 
| matthiasm@1 | 832 		    float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this | 
| matthiasm@1 | 833 | 
| matthiasm@1 | 834 		    char buffer0 [50]; | 
| matthiasm@1 | 835 | 
| matthiasm@1 | 836 		    sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); | 
| matthiasm@1 | 837 | 
| matthiasm@1 | 838 		    // cerr << "normalisedtuning: " << normalisedtuning << '\n'; | 
| matthiasm@1 | 839 | 
| matthiasm@1 | 840 		    // push tuning to FeatureSet fsOut | 
| matthiasm@1 | 841 		Feature f0; // tuning | 
| matthiasm@1 | 842 		f0.hasTimestamp = true; | 
| matthiasm@1 | 843 		    f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));; | 
| matthiasm@1 | 844 		    f0.label = buffer0; | 
| matthiasm@1 | 845 		    fsOut[0].push_back(f0); | 
| matthiasm@1 | 846 | 
| matthiasm@1 | 847 		    /** Tune Log-Frequency Spectrogram | 
| matthiasm@1 | 848 		    calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to | 
| matthiasm@1 | 849 		    perform linear interpolation on the existing log-frequency spectrogram (kinda f1). | 
| matthiasm@1 | 850 		    **/ | 
| matthiasm@1 | 851 | 
| matthiasm@1 | 852 		    float tempValue = 0; | 
| matthiasm@1 | 853 		    float dbThreshold = 0; // relative to the background spectrum | 
| matthiasm@1 | 854 		    float thresh = pow(10,dbThreshold/20); | 
| matthiasm@1 | 855 		    // cerr << "tune local ? " << m_tuneLocal << endl; | 
| matthiasm@1 | 856 		    int count = 0; | 
| matthiasm@1 | 857 | 
| matthiasm@1 | 858 		    for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) { | 
| matthiasm@1 | 859 		        Feature f1 = *i; | 
| matthiasm@1 | 860 		        Feature f2; // tuned log-frequency spectrum | 
| matthiasm@1 | 861 		        f2.hasTimestamp = true; | 
| matthiasm@1 | 862 		        f2.timestamp = f1.timestamp; | 
| matthiasm@1 | 863 		        f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero | 
| matthiasm@1 | 864 | 
| matthiasm@1 | 865 		        if (m_tuneLocal) { | 
| matthiasm@1 | 866 		            intShift = floor(m_localTuning[count] * 3); | 
| matthiasm@1 | 867 		            intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this | 
| matthiasm@1 | 868 		        } | 
| matthiasm@1 | 869 | 
| matthiasm@1 | 870 		        // cerr << intShift << " " << intFactor << endl; | 
| matthiasm@1 | 871 | 
| matthiasm@4 | 872 		        for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins | 
| matthiasm@1 | 873 		            tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor; | 
| matthiasm@1 | 874 		            f2.values.push_back(tempValue); | 
| matthiasm@1 | 875 		        } | 
| matthiasm@1 | 876 | 
| matthiasm@1 | 877 		        f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge | 
| matthiasm@1 | 878 		        vector<float> runningmean = SpecialConvolution(f2.values,hw); | 
| matthiasm@1 | 879 		        vector<float> runningstd; | 
| matthiasm@1 | 880 		        for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance) | 
| matthiasm@1 | 881 		            runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i])); | 
| matthiasm@1 | 882 		        } | 
| matthiasm@1 | 883 		        runningstd = SpecialConvolution(runningstd,hw); // second step convolve | 
| matthiasm@1 | 884 		        for (int i = 0; i < 256; i++) { | 
| matthiasm@1 | 885 		            runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std | 
| matthiasm@1 | 886 		            if (runningstd[i] > 0) { | 
| matthiasm@1 | 887 		                // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ? | 
| matthiasm@1 | 888 		                // 		                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0; | 
| matthiasm@1 | 889 						f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ? | 
| matthiasm@1 | 890 		                    (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0; | 
| matthiasm@1 | 891 		            } | 
| matthiasm@1 | 892 		            if (f2.values[i] < 0) { | 
| matthiasm@1 | 893 		                cerr << "ERROR: negative value in logfreq spectrum" << endl; | 
| matthiasm@1 | 894 		            } | 
| matthiasm@1 | 895 		        } | 
| matthiasm@1 | 896 		        fsOut[2].push_back(f2); | 
| matthiasm@1 | 897 		        count++; | 
| matthiasm@1 | 898 		    } | 
| matthiasm@1 | 899 | 
| matthiasm@1 | 900 	    /** Semitone spectrum and chromagrams | 
| matthiasm@1 | 901 	    Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum | 
| matthiasm@1 | 902 	    is inferred using a non-negative least squares algorithm. | 
| matthiasm@1 | 903 	    Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means | 
| matthiasm@1 | 904 	    bass and treble stacked onto each other). | 
| matthiasm@1 | 905 	    **/ | 
| matthiasm@1 | 906 	    // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n); | 
| matthiasm@1 | 907 | 
| matthiasm@1 | 908 	    vector<vector<float> > chordogram; | 
| matthiasm@3 | 909 		vector<vector<int> > scoreChordogram; | 
| matthiasm@1 | 910 	    vector<float> oldchroma = vector<float>(12,0); | 
| matthiasm@1 | 911 	    vector<float> oldbasschroma = vector<float>(12,0); | 
| matthiasm@1 | 912 	    count = 0; | 
| matthiasm@1 | 913 | 
| matthiasm@1 | 914 	    for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) { | 
| matthiasm@1 | 915 	        Feature f2 = *it; // logfreq spectrum | 
| matthiasm@1 | 916 	        Feature f3; // semitone spectrum | 
| matthiasm@1 | 917 	        Feature f4; // treble chromagram | 
| matthiasm@1 | 918 	        Feature f5; // bass chromagram | 
| matthiasm@1 | 919 	        Feature f6; // treble and bass chromagram | 
| matthiasm@1 | 920 | 
| matthiasm@1 | 921 	        f3.hasTimestamp = true; | 
| matthiasm@1 | 922 	        f3.timestamp = f2.timestamp; | 
| matthiasm@1 | 923 | 
| matthiasm@1 | 924 	        f4.hasTimestamp = true; | 
| matthiasm@1 | 925 	        f4.timestamp = f2.timestamp; | 
| matthiasm@1 | 926 | 
| matthiasm@1 | 927 	        f5.hasTimestamp = true; | 
| matthiasm@1 | 928 	        f5.timestamp = f2.timestamp; | 
| matthiasm@1 | 929 | 
| matthiasm@1 | 930 	        f6.hasTimestamp = true; | 
| matthiasm@1 | 931 	        f6.timestamp = f2.timestamp; | 
| matthiasm@1 | 932 | 
| matthiasm@3 | 933 			float b[256]; | 
| matthiasm@1 | 934 | 
| matthiasm@1 | 935 	        bool some_b_greater_zero = false; | 
| matthiasm@3 | 936 			float sumb = 0; | 
| matthiasm@1 | 937 	        for (int i = 0; i < 256; i++) { | 
| matthiasm@3 | 938 				// b[i] = m_dict[(256 * count + i) % (256 * 84)]; | 
| matthiasm@3 | 939 				b[i] = f2.values[i]; | 
| matthiasm@3 | 940 				sumb += b[i]; | 
| matthiasm@1 | 941 	            if (b[i] > 0) { | 
| matthiasm@1 | 942 	                some_b_greater_zero = true; | 
| matthiasm@1 | 943 	            } | 
| matthiasm@1 | 944 	        } | 
| matthiasm@1 | 945 | 
| matthiasm@1 | 946 	        // here's where the non-negative least squares algorithm calculates the note activation x | 
| matthiasm@1 | 947 | 
| matthiasm@1 | 948 	        vector<float> chroma = vector<float>(12, 0); | 
| matthiasm@1 | 949 	        vector<float> basschroma = vector<float>(12, 0); | 
| matthiasm@1 | 950 			float currval; | 
| matthiasm@1 | 951 			unsigned iSemitone = 0; | 
| matthiasm@1 | 952 | 
| matthiasm@1 | 953 			if (some_b_greater_zero) { | 
| matthiasm@3 | 954 				if (m_dictID == 1) { | 
| matthiasm@1 | 955 					for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { | 
| matthiasm@1 | 956 						currval = 0; | 
| matthiasm@3 | 957 						currval += b[iNote + 1 + -1] * 0.5; | 
| matthiasm@3 | 958 						currval += b[iNote + 1 +  0] * 1.0; | 
| matthiasm@3 | 959 						currval += b[iNote + 1 +  1] * 0.5; | 
| matthiasm@1 | 960 						f3.values.push_back(currval); | 
| matthiasm@1 | 961 						chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; | 
| matthiasm@1 | 962 						basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; | 
| matthiasm@1 | 963 						iSemitone++; | 
| matthiasm@1 | 964 					} | 
| matthiasm@1 | 965 | 
| matthiasm@1 | 966 				} else { | 
| matthiasm@3 | 967 					float x[84+1000]; | 
| matthiasm@3 | 968 					for (int i = 1; i < 1084; ++i) x[i] = 1.0; | 
| matthiasm@3 | 969 					// for (int i = 0; i < 84; ++i) { | 
| matthiasm@3 | 970 					// 	x[i] = b[3*i+3]; | 
| matthiasm@3 | 971 					// } | 
| matthiasm@3 | 972 				    float rnorm; | 
| matthiasm@3 | 973 				    float w[84+1000]; | 
| matthiasm@3 | 974 				    float zz[84+1000]; | 
| matthiasm@3 | 975 				    int indx[84+1000]; | 
| matthiasm@1 | 976 				    int mode; | 
| matthiasm@3 | 977 					float curr_dict[256*84]; | 
| matthiasm@3 | 978 					for (unsigned i = 0; i < 256 * 84; ++i) { | 
| matthiasm@3 | 979 						curr_dict[i] = 1.0 * m_dict[i]; | 
| matthiasm@3 | 980 					} | 
| matthiasm@3 | 981 					nnls(curr_dict, nNote, nNote, 84, b, x, &rnorm, w, zz, indx, &mode); | 
| matthiasm@3 | 982 					for (unsigned iNote = 0; iNote < 84; ++iNote) { | 
| matthiasm@3 | 983 						// for	(unsigned kNote = 0; kNote < 256; ++kNote) { | 
| matthiasm@3 | 984 						// 						x[iNote] += m_dict[kNote + nNote * iNote] * b[kNote]; | 
| matthiasm@3 | 985 						// 					} | 
| matthiasm@3 | 986 						f3.values.push_back(x[iNote]); | 
| matthiasm@3 | 987 						// cerr << mode << endl; | 
| matthiasm@3 | 988 						chroma[iNote % 12] += x[iNote] * treblewindow[iNote]; | 
| matthiasm@3 | 989 						basschroma[iNote % 12] += x[iNote] * basswindow[iNote]; | 
| matthiasm@3 | 990 						// iSemitone++; | 
| matthiasm@3 | 991 					} | 
| matthiasm@1 | 992 				} | 
| matthiasm@1 | 993 			} | 
| matthiasm@1 | 994 | 
| matthiasm@1 | 995 	        f4.values = chroma; | 
| matthiasm@1 | 996 	        f5.values = basschroma; | 
| matthiasm@1 | 997 	        chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas | 
| matthiasm@1 | 998 	        f6.values = chroma; | 
| matthiasm@1 | 999 | 
| matthiasm@1 | 1000 	        // local chord estimation | 
| matthiasm@1 | 1001 	        vector<float> currentChordSalience; | 
| matthiasm@1 | 1002 	        float tempchordvalue = 0; | 
| matthiasm@1 | 1003 	        float sumchordvalue = 0; | 
| matthiasm@1 | 1004 	        int nChord = nChorddict / 24; | 
| matthiasm@1 | 1005 	        for (int iChord = 0; iChord < nChord; iChord++) { | 
| matthiasm@1 | 1006 	            tempchordvalue = 0; | 
| matthiasm@1 | 1007 	            for (int iBin = 0; iBin < 12; iBin++) { | 
| matthiasm@1 | 1008 	                tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin]; | 
| matthiasm@1 | 1009 	            } | 
| matthiasm@1 | 1010 	            for (int iBin = 12; iBin < 24; iBin++) { | 
| matthiasm@1 | 1011 	                tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin]; | 
| matthiasm@1 | 1012 	            } | 
| matthiasm@1 | 1013 	            sumchordvalue+=tempchordvalue; | 
| matthiasm@1 | 1014 	            currentChordSalience.push_back(tempchordvalue); | 
| matthiasm@1 | 1015 	        } | 
| matthiasm@1 | 1016 	        for (int iChord = 0; iChord < nChord; iChord++) { | 
| matthiasm@1 | 1017 	            currentChordSalience[iChord] /= sumchordvalue; | 
| matthiasm@1 | 1018 	        } | 
| matthiasm@1 | 1019 	        chordogram.push_back(currentChordSalience); | 
| matthiasm@1 | 1020 | 
| matthiasm@1 | 1021 	        fsOut[3].push_back(f3); | 
| matthiasm@1 | 1022 	        fsOut[4].push_back(f4); | 
| matthiasm@1 | 1023 	        fsOut[5].push_back(f5); | 
| matthiasm@1 | 1024 	        fsOut[6].push_back(f6); | 
| matthiasm@1 | 1025 	        count++; | 
| matthiasm@1 | 1026 	    } | 
| matthiasm@0 | 1027 	//     int musicitykernelwidth = (50 * 2048) / m_stepSize; | 
| matthiasm@0 | 1028 	// | 
| matthiasm@3 | 1029 	    /* Simple chord estimation | 
| matthiasm@3 | 1030 	    I just take the local chord estimates ("currentChordSalience") and average them over time, then | 
| matthiasm@3 | 1031 	    take the maximum. Very simple, don't do this at home... | 
| matthiasm@3 | 1032 	    */ | 
| matthiasm@3 | 1033 	    count = 0; | 
| matthiasm@3 | 1034 	    int halfwindowlength = m_inputSampleRate / m_stepSize; | 
| matthiasm@3 | 1035 	    int nChord = nChorddict / 24; | 
| matthiasm@3 | 1036 	    vector<int> chordSequence; | 
| matthiasm@3 | 1037   	 	for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram | 
| matthiasm@3 | 1038 			vector<int> temp = vector<int>(nChord,0); | 
| matthiasm@3 | 1039 			scoreChordogram.push_back(temp); | 
| matthiasm@3 | 1040 		} | 
| matthiasm@4 | 1041 	    for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) { | 
| matthiasm@3 | 1042 			int startIndex = count + 1; | 
| matthiasm@3 | 1043 			int endIndex = count + 2 * halfwindowlength; | 
| matthiasm@3 | 1044 	        vector<float> temp = vector<float>(nChord,0); | 
| matthiasm@3 | 1045 			float maxval = 0; // will be the value of the most salient chord in this frame | 
| matthiasm@4 | 1046 			float maxindex = 0; //... and the index thereof | 
| matthiasm@3 | 1047 			unsigned bestchordL = 0; // index of the best "left" chord | 
| matthiasm@3 | 1048  	 		unsigned bestchordR = 0; // index of the best "right" chord | 
| matthiasm@4 | 1049 			for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { | 
| matthiasm@3 | 1050 				// now find the max values on both sides of iWF | 
| matthiasm@3 | 1051 				// left side: | 
| matthiasm@3 | 1052 				float maxL = 0; | 
| matthiasm@3 | 1053 				unsigned maxindL = nChord-1; | 
| matthiasm@3 | 1054 				for (unsigned iChord = 0; iChord < nChord; iChord++) { | 
| matthiasm@3 | 1055 					float currsum = 0; | 
| matthiasm@3 | 1056 					for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) { | 
| matthiasm@3 | 1057 						currsum += chordogram[count+iFrame][iChord]; | 
| matthiasm@3 | 1058 					} | 
| matthiasm@3 | 1059 					if (iChord == nChord-1) currsum *= 0.8; | 
| matthiasm@3 | 1060 					if (currsum > maxL) { | 
| matthiasm@3 | 1061 						maxL = currsum; | 
| matthiasm@3 | 1062 						maxindL = iChord; | 
| matthiasm@3 | 1063 					} | 
| matthiasm@3 | 1064 				} | 
| matthiasm@3 | 1065 				// right side: | 
| matthiasm@3 | 1066 				float maxR = 0; | 
| matthiasm@3 | 1067 				unsigned maxindR = nChord-1; | 
| matthiasm@3 | 1068 				for (unsigned iChord = 0; iChord < nChord; iChord++) { | 
| matthiasm@3 | 1069 					float currsum = 0; | 
| matthiasm@3 | 1070 					for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) { | 
| matthiasm@3 | 1071 						currsum += chordogram[count+iFrame][iChord]; | 
| matthiasm@3 | 1072 					} | 
| matthiasm@3 | 1073 					if (iChord == nChord-1) currsum *= 0.8; | 
| matthiasm@3 | 1074 					if (currsum > maxR) { | 
| matthiasm@3 | 1075 						maxR = currsum; | 
| matthiasm@3 | 1076 						maxindR = iChord; | 
| matthiasm@3 | 1077 					} | 
| matthiasm@3 | 1078 				} | 
| matthiasm@3 | 1079 				if (maxL+maxR > maxval) { | 
| matthiasm@3 | 1080 					maxval = maxL+maxR; | 
| matthiasm@3 | 1081 					maxindex = iWF; | 
| matthiasm@3 | 1082 					bestchordL = maxindL; | 
| matthiasm@3 | 1083 					bestchordR = maxindR; | 
| matthiasm@3 | 1084 				} | 
| matthiasm@3 | 1085 | 
| matthiasm@3 | 1086 			} | 
| matthiasm@3 | 1087 			// cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl; | 
| matthiasm@3 | 1088 			// add a score to every chord-frame-point that was part of a maximum | 
| matthiasm@3 | 1089 			for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) { | 
| matthiasm@3 | 1090 				scoreChordogram[iFrame+count][bestchordL]++; | 
| matthiasm@3 | 1091 			} | 
| matthiasm@3 | 1092 			for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { | 
| matthiasm@3 | 1093 				scoreChordogram[iFrame+count][bestchordR]++; | 
| matthiasm@3 | 1094 			} | 
| matthiasm@3 | 1095 			count++; | 
| matthiasm@3 | 1096 	    } | 
| matthiasm@3 | 1097 | 
| matthiasm@3 | 1098 		count = 0; | 
| matthiasm@3 | 1099 	 	for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { | 
| matthiasm@3 | 1100 			float maxval = 0; // will be the value of the most salient chord in this frame | 
| matthiasm@3 | 1101 			float maxindex = 0; //... and the index thereof | 
| matthiasm@3 | 1102 			for (unsigned iChord = 0; iChord < nChord; iChord++) { | 
| matthiasm@3 | 1103 				if (scoreChordogram[count][iChord] > maxval) { | 
| matthiasm@3 | 1104 					maxval = scoreChordogram[count][iChord]; | 
| matthiasm@3 | 1105 					maxindex = iChord; | 
| matthiasm@4 | 1106 					// cerr << iChord << endl; | 
| matthiasm@3 | 1107 				} | 
| matthiasm@3 | 1108 			} | 
| matthiasm@3 | 1109 			chordSequence.push_back(maxindex); | 
| matthiasm@4 | 1110 			// cerr << "before modefilter, maxindex: " << maxindex << endl; | 
| matthiasm@3 | 1111 			count++; | 
| matthiasm@3 | 1112 		} | 
| matthiasm@3 | 1113 | 
| matthiasm@3 | 1114 | 
| matthiasm@3 | 1115 	    // mode filter on chordSequence | 
| matthiasm@3 | 1116 	    count = 0; | 
| matthiasm@3 | 1117 	    int oldChordIndex = -1; | 
| matthiasm@3 | 1118 	    for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { | 
| matthiasm@3 | 1119 			Feature f6 = *it; | 
| matthiasm@3 | 1120 			Feature f7; // chord estimate | 
| matthiasm@3 | 1121 			f7.hasTimestamp = true; | 
| matthiasm@3 | 1122 			f7.timestamp = f6.timestamp; | 
| matthiasm@3 | 1123 			vector<int> chordCount = vector<int>(nChord,0); | 
| matthiasm@3 | 1124 	        int maxChordCount = 0; | 
| matthiasm@3 | 1125 	        int maxChordIndex = nChord-1; | 
| matthiasm@4 | 1126 	        int startIndex = max(count - halfwindowlength/2,0); | 
| matthiasm@4 | 1127 	        int endIndex = min(int(chordogram.size()), count + halfwindowlength/2); | 
| matthiasm@4 | 1128 	        for (int i = startIndex; i < endIndex; i++) { | 
| matthiasm@4 | 1129 	            chordCount[chordSequence[i]]++; | 
| matthiasm@4 | 1130 	            if (chordCount[chordSequence[i]] > maxChordCount) { | 
| matthiasm@4 | 1131 					cerr << "start index " << startIndex << endl; | 
| matthiasm@4 | 1132 	                maxChordCount++; | 
| matthiasm@4 | 1133 	                maxChordIndex = chordSequence[i]; | 
| matthiasm@4 | 1134 	            } | 
| matthiasm@4 | 1135 	        } | 
| matthiasm@4 | 1136 			// chordSequence[count] = maxChordIndex; | 
| matthiasm@4 | 1137 			cerr << maxChordIndex << endl; | 
| matthiasm@3 | 1138 	        if (oldChordIndex != maxChordIndex) { | 
| matthiasm@3 | 1139 	            oldChordIndex = maxChordIndex; | 
| matthiasm@3 | 1140 | 
| matthiasm@3 | 1141 	            char buffer1 [50]; | 
| matthiasm@3 | 1142 	            if (maxChordIndex < nChord - 1) { | 
| matthiasm@3 | 1143 	                sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]); | 
| matthiasm@3 | 1144 	            } else { | 
| matthiasm@3 | 1145 	                sprintf(buffer1, "N"); | 
| matthiasm@3 | 1146 	            } | 
| matthiasm@3 | 1147 	            f7.label = buffer1; | 
| matthiasm@3 | 1148 	            fsOut[7].push_back(f7); | 
| matthiasm@3 | 1149 	        } | 
| matthiasm@3 | 1150 	        count++; | 
| matthiasm@3 | 1151 	    } | 
| matthiasm@0 | 1152 	//     // musicity | 
| matthiasm@0 | 1153 	//     count = 0; | 
| matthiasm@0 | 1154 	//     int oldlabeltype = 0; // start value is 0, music is 1, speech is 2 | 
| matthiasm@0 | 1155 	//     vector<float> musicityValue; | 
| matthiasm@0 | 1156 	//     for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { | 
| matthiasm@0 | 1157 	//         Feature f4 = *it; | 
| matthiasm@0 | 1158 	// | 
| matthiasm@0 | 1159 	//         int startIndex = max(count - musicitykernelwidth/2,0); | 
| matthiasm@0 | 1160 	//         int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); | 
| matthiasm@0 | 1161 	//         float chromasum = 0; | 
| matthiasm@0 | 1162 	//         float diffsum = 0; | 
| matthiasm@0 | 1163 	//         for (int k = 0; k < 12; k++) { | 
| matthiasm@0 | 1164 	//             for (int i = startIndex + 1; i < endIndex; i++) { | 
| matthiasm@0 | 1165 	//                 chromasum += pow(fsOut[4][i].values[k],2); | 
| matthiasm@0 | 1166 	//                 diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]); | 
| matthiasm@0 | 1167 	//             } | 
| matthiasm@0 | 1168 	//         } | 
| matthiasm@0 | 1169 	//         diffsum /= chromasum; | 
| matthiasm@0 | 1170 	//         musicityValue.push_back(diffsum); | 
| matthiasm@0 | 1171 	//         count++; | 
| matthiasm@0 | 1172 	//     } | 
| matthiasm@0 | 1173 	// | 
| matthiasm@0 | 1174 	//     float musicityThreshold = 0.44; | 
| matthiasm@0 | 1175 	//     if (m_stepSize == 4096) { | 
| matthiasm@0 | 1176 	//         musicityThreshold = 0.74; | 
| matthiasm@0 | 1177 	//     } | 
| matthiasm@0 | 1178 	//     if (m_stepSize == 4410) { | 
| matthiasm@0 | 1179 	//         musicityThreshold = 0.77; | 
| matthiasm@0 | 1180 	//     } | 
| matthiasm@0 | 1181 	// | 
| matthiasm@0 | 1182 	//     count = 0; | 
| matthiasm@0 | 1183 	//     for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { | 
| matthiasm@0 | 1184 	//         Feature f4 = *it; | 
| matthiasm@0 | 1185 	//         Feature f8; // musicity | 
| matthiasm@0 | 1186 	//         Feature f9; // musicity segmenter | 
| matthiasm@0 | 1187 	// | 
| matthiasm@0 | 1188 	//         f8.hasTimestamp = true; | 
| matthiasm@0 | 1189 	//         f8.timestamp = f4.timestamp; | 
| matthiasm@0 | 1190 	//         f9.hasTimestamp = true; | 
| matthiasm@0 | 1191 	//         f9.timestamp = f4.timestamp; | 
| matthiasm@0 | 1192 	// | 
| matthiasm@0 | 1193 	//         int startIndex = max(count - musicitykernelwidth/2,0); | 
| matthiasm@0 | 1194 	//         int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); | 
| matthiasm@0 | 1195 	//         int musicityCount = 0; | 
| matthiasm@0 | 1196 	//         for (int i = startIndex; i <= endIndex; i++) { | 
| matthiasm@0 | 1197 	//             if (musicityValue[i] > musicityThreshold) musicityCount++; | 
| matthiasm@0 | 1198 	//         } | 
| matthiasm@0 | 1199 	//         bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1); | 
| matthiasm@0 | 1200 	// | 
| matthiasm@0 | 1201 	//         if (isSpeech) { | 
| matthiasm@0 | 1202 	//             if (oldlabeltype != 2) { | 
| matthiasm@0 | 1203 	//                 f9.label = "Speech"; | 
| matthiasm@0 | 1204 	//                 fsOut[9].push_back(f9); | 
| matthiasm@0 | 1205 	//                 oldlabeltype = 2; | 
| matthiasm@0 | 1206 	//             } | 
| matthiasm@0 | 1207 	//         } else { | 
| matthiasm@0 | 1208 	//             if (oldlabeltype != 1) { | 
| matthiasm@0 | 1209 	//                 f9.label = "Music"; | 
| matthiasm@0 | 1210 	//                 fsOut[9].push_back(f9); | 
| matthiasm@0 | 1211 	//                 oldlabeltype = 1; | 
| matthiasm@0 | 1212 	//             } | 
| matthiasm@0 | 1213 	//         } | 
| matthiasm@0 | 1214 	//         f8.values.push_back(musicityValue[count]); | 
| matthiasm@0 | 1215 	//         fsOut[8].push_back(f8); | 
| matthiasm@0 | 1216 	//         count++; | 
| matthiasm@0 | 1217 	//      } | 
| matthiasm@0 | 1218      return fsOut; | 
| matthiasm@0 | 1219 | 
| matthiasm@0 | 1220 } | 
| matthiasm@0 | 1221 |