matthiasm@0
|
1
|
matthiasm@0
|
2 #include "NNLSChroma.h"
|
matthiasm@0
|
3 #include <cmath>
|
matthiasm@0
|
4 #include <list>
|
matthiasm@0
|
5 #include <iostream>
|
matthiasm@3
|
6 #include <fstream>
|
matthiasm@0
|
7 #include <sstream>
|
matthiasm@0
|
8 #include <cassert>
|
matthiasm@0
|
9 #include <cstdio>
|
matthiasm@1
|
10 #include "nnls.h"
|
matthiasm@0
|
11 // #include "cblas.h"
|
matthiasm@0
|
12 #include "chorddict.cpp"
|
matthiasm@0
|
13 using namespace std;
|
matthiasm@0
|
14
|
matthiasm@0
|
15 const float sinvalue = 0.866025404;
|
matthiasm@0
|
16 const float cosvalue = -0.5;
|
matthiasm@0
|
17 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
|
matthiasm@0
|
18 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
|
matthiasm@0
|
19 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
|
matthiasm@0
|
20 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
|
matthiasm@0
|
21 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
|
matthiasm@0
|
22 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
23 const int nNote = 256;
|
matthiasm@0
|
24
|
matthiasm@0
|
25 /** Special Convolution
|
matthiasm@0
|
26 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
|
matthiasm@0
|
27 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
|
matthiasm@0
|
28 as the first (last) valid convolution bin.
|
matthiasm@0
|
29 **/
|
matthiasm@0
|
30
|
matthiasm@0
|
31 const bool debug_on = false;
|
matthiasm@0
|
32
|
matthiasm@0
|
33 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
|
matthiasm@0
|
34 {
|
matthiasm@0
|
35 float s;
|
matthiasm@0
|
36 int m, n;
|
matthiasm@0
|
37 int lenConvolvee = convolvee.size();
|
matthiasm@0
|
38 int lenKernel = kernel.size();
|
matthiasm@0
|
39
|
matthiasm@0
|
40 vector<float> Z(256,0);
|
matthiasm@0
|
41 assert(lenKernel % 2 != 0); // no exception handling !!!
|
matthiasm@0
|
42
|
matthiasm@0
|
43 for (n = lenKernel - 1; n < lenConvolvee; n++) {
|
matthiasm@0
|
44 s=0.0;
|
matthiasm@0
|
45 for (m = 0; m < lenKernel; m++) {
|
matthiasm@0
|
46 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
|
matthiasm@0
|
47 s += convolvee[n-m] * kernel[m];
|
matthiasm@0
|
48 // if (debug_on) cerr << "--> s = " << s << '\n';
|
matthiasm@0
|
49 }
|
matthiasm@0
|
50 // cerr << n - lenKernel/2 << endl;
|
matthiasm@0
|
51 Z[n -lenKernel/2] = s;
|
matthiasm@0
|
52 }
|
matthiasm@0
|
53
|
matthiasm@0
|
54 // fill upper and lower pads
|
matthiasm@0
|
55 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
|
matthiasm@0
|
56 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
|
matthiasm@0
|
57 Z[lenConvolvee - lenKernel/2 - 1];
|
matthiasm@0
|
58 return Z;
|
matthiasm@0
|
59 }
|
matthiasm@0
|
60
|
matthiasm@0
|
61 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
|
matthiasm@0
|
62 // {
|
matthiasm@0
|
63 // vector<float> freq(binnumbers.size, 0.0);
|
matthiasm@0
|
64 // for (unsigned i = 0; i < binnumbers.size; ++i) {
|
matthiasm@0
|
65 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
|
matthiasm@0
|
66 // }
|
matthiasm@0
|
67 // return freq;
|
matthiasm@0
|
68 // }
|
matthiasm@0
|
69
|
matthiasm@0
|
70 float cospuls(float x, float centre, float width)
|
matthiasm@0
|
71 {
|
matthiasm@0
|
72 float recipwidth = 1.0/width;
|
matthiasm@0
|
73 if (abs(x - centre) <= 0.5 * width) {
|
matthiasm@0
|
74 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
|
matthiasm@0
|
75 }
|
matthiasm@0
|
76 return 0.0;
|
matthiasm@0
|
77 }
|
matthiasm@0
|
78
|
matthiasm@0
|
79 float pitchCospuls(float x, float centre, int binsperoctave)
|
matthiasm@0
|
80 {
|
matthiasm@0
|
81 float warpedf = -binsperoctave * (log2(centre) - log2(x));
|
matthiasm@0
|
82 float out = cospuls(warpedf, 0.0, 2.0);
|
matthiasm@0
|
83 // now scale to correct for note density
|
matthiasm@0
|
84 float c = log(2.0)/binsperoctave;
|
matthiasm@0
|
85 if (x > 0) {
|
matthiasm@0
|
86 out = out / (c * x);
|
matthiasm@0
|
87 } else {
|
matthiasm@0
|
88 out = 0;
|
matthiasm@0
|
89 }
|
matthiasm@0
|
90 return out;
|
matthiasm@0
|
91 }
|
matthiasm@0
|
92
|
matthiasm@0
|
93 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
|
matthiasm@0
|
94
|
matthiasm@0
|
95 int binspersemitone = 3; // this must be 3
|
matthiasm@0
|
96 int minoctave = 0; // this must be 0
|
matthiasm@0
|
97 int maxoctave = 7; // this must be 7
|
matthiasm@1
|
98 int oversampling = 80;
|
matthiasm@0
|
99
|
matthiasm@0
|
100 // linear frequency vector
|
matthiasm@0
|
101 vector<float> fft_f;
|
matthiasm@0
|
102 for (int i = 0; i < blocksize/2; ++i) {
|
matthiasm@0
|
103 fft_f.push_back(i * (fs * 1.0 / blocksize));
|
matthiasm@0
|
104 }
|
matthiasm@0
|
105 float fft_width = fs * 2.0 / blocksize;
|
matthiasm@0
|
106
|
matthiasm@0
|
107 // linear oversampled frequency vector
|
matthiasm@0
|
108 vector<float> oversampled_f;
|
matthiasm@0
|
109 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
|
matthiasm@0
|
110 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
|
matthiasm@0
|
111 }
|
matthiasm@0
|
112
|
matthiasm@0
|
113 // pitch-spaced frequency vector
|
matthiasm@0
|
114 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@0
|
115 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@0
|
116 vector<float> cq_f;
|
matthiasm@0
|
117 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@0
|
118 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@0
|
119 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@0
|
120 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@0
|
121 for (int k = -1; k < 2; ++k) {
|
matthiasm@0
|
122 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@0
|
123 }
|
matthiasm@0
|
124 }
|
matthiasm@0
|
125 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@0
|
126 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@0
|
127
|
matthiasm@0
|
128 int nFFT = fft_f.size();
|
matthiasm@0
|
129
|
matthiasm@0
|
130 vector<float> fft_activation;
|
matthiasm@0
|
131 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
|
matthiasm@0
|
132 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
|
matthiasm@0
|
133 fft_activation.push_back(cosp);
|
matthiasm@0
|
134 // cerr << cosp << endl;
|
matthiasm@0
|
135 }
|
matthiasm@0
|
136
|
matthiasm@0
|
137 float cq_activation;
|
matthiasm@0
|
138 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
|
matthiasm@0
|
139 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
|
matthiasm@0
|
140 int curr_start = oversampling * iFFT - oversampling;
|
matthiasm@0
|
141 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
|
matthiasm@0
|
142 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
|
matthiasm@0
|
143 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
|
matthiasm@0
|
144 outmatrix[iFFT + nFFT * iCQ] = 0;
|
matthiasm@1
|
145 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
|
matthiasm@0
|
146 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
|
matthiasm@0
|
147 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
|
matthiasm@0
|
148 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
|
matthiasm@0
|
149 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
|
matthiasm@0
|
150 }
|
matthiasm@0
|
151 // if (iCQ == 1 || iCQ == 2) {
|
matthiasm@0
|
152 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
|
matthiasm@0
|
153 // }
|
matthiasm@0
|
154 }
|
matthiasm@0
|
155 }
|
matthiasm@0
|
156 }
|
matthiasm@0
|
157 return true;
|
matthiasm@0
|
158 }
|
matthiasm@0
|
159
|
matthiasm@3
|
160 bool dictionaryMatrix(float* dm) {
|
matthiasm@1
|
161 int binspersemitone = 3; // this must be 3
|
matthiasm@1
|
162 int minoctave = 0; // this must be 0
|
matthiasm@1
|
163 int maxoctave = 7; // this must be 7
|
matthiasm@1
|
164 float s_param = 0.6;
|
matthiasm@1
|
165
|
matthiasm@1
|
166 // pitch-spaced frequency vector
|
matthiasm@1
|
167 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@1
|
168 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@1
|
169 vector<float> cq_f;
|
matthiasm@1
|
170 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@1
|
171 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@1
|
172 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@1
|
173 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@1
|
174 for (int k = -1; k < 2; ++k) {
|
matthiasm@1
|
175 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@1
|
176 }
|
matthiasm@1
|
177 }
|
matthiasm@1
|
178 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@1
|
179 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@1
|
180
|
matthiasm@1
|
181 float curr_f;
|
matthiasm@1
|
182 float floatbin;
|
matthiasm@1
|
183 float curr_amp;
|
matthiasm@1
|
184 // now for every combination calculate the matrix element
|
matthiasm@1
|
185 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
|
matthiasm@3
|
186 // cerr << iOut << endl;
|
matthiasm@1
|
187 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
|
matthiasm@1
|
188 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
|
matthiasm@3
|
189 // if (curr_f > cq_f[nNote-1]) break;
|
matthiasm@3
|
190 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
|
matthiasm@3
|
191 // cerr << floatbin << endl;
|
matthiasm@1
|
192 curr_amp = pow(s_param,float(iHarm-1));
|
matthiasm@3
|
193 // cerr << "curramp" << curr_amp << endl;
|
matthiasm@1
|
194 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
|
matthiasm@3
|
195 if (abs(iNote+1.0-floatbin)<2) {
|
matthiasm@3
|
196 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
|
matthiasm@3
|
197 // dm[iNote + nNote * iOut] += 1 * curr_amp;
|
matthiasm@3
|
198 }
|
matthiasm@1
|
199 }
|
matthiasm@3
|
200 }
|
matthiasm@1
|
201 }
|
matthiasm@3
|
202
|
matthiasm@3
|
203
|
matthiasm@1
|
204 }
|
matthiasm@1
|
205
|
matthiasm@0
|
206
|
matthiasm@0
|
207 NNLSChroma::NNLSChroma(float inputSampleRate) :
|
matthiasm@0
|
208 Plugin(inputSampleRate),
|
matthiasm@0
|
209 m_fl(0),
|
matthiasm@0
|
210 m_blockSize(0),
|
matthiasm@0
|
211 m_stepSize(0),
|
matthiasm@0
|
212 m_lengthOfNoteIndex(0),
|
matthiasm@0
|
213 m_meanTuning0(0),
|
matthiasm@0
|
214 m_meanTuning1(0),
|
matthiasm@0
|
215 m_meanTuning2(0),
|
matthiasm@0
|
216 m_localTuning0(0),
|
matthiasm@0
|
217 m_localTuning1(0),
|
matthiasm@0
|
218 m_localTuning2(0),
|
matthiasm@3
|
219 m_paling(0.8),
|
matthiasm@3
|
220 m_preset(0.0),
|
matthiasm@0
|
221 m_localTuning(0),
|
matthiasm@0
|
222 m_kernelValue(0),
|
matthiasm@0
|
223 m_kernelFftIndex(0),
|
matthiasm@0
|
224 m_kernelNoteIndex(0),
|
matthiasm@1
|
225 m_dict(0),
|
matthiasm@0
|
226 m_tuneLocal(false),
|
matthiasm@0
|
227 m_dictID(0)
|
matthiasm@0
|
228 {
|
matthiasm@0
|
229 if (debug_on) cerr << "--> NNLSChroma" << endl;
|
matthiasm@3
|
230 m_dict = new float[nNote * 84];
|
matthiasm@3
|
231 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
matthiasm@1
|
232 dictionaryMatrix(m_dict);
|
matthiasm@0
|
233 }
|
matthiasm@0
|
234
|
matthiasm@0
|
235
|
matthiasm@0
|
236 NNLSChroma::~NNLSChroma()
|
matthiasm@0
|
237 {
|
matthiasm@0
|
238 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
|
matthiasm@1
|
239 delete [] m_dict;
|
matthiasm@0
|
240 }
|
matthiasm@0
|
241
|
matthiasm@0
|
242 string
|
matthiasm@0
|
243 NNLSChroma::getIdentifier() const
|
matthiasm@0
|
244 {
|
matthiasm@0
|
245 if (debug_on) cerr << "--> getIdentifier" << endl;
|
matthiasm@0
|
246 return "nnls_chroma";
|
matthiasm@0
|
247 }
|
matthiasm@0
|
248
|
matthiasm@0
|
249 string
|
matthiasm@0
|
250 NNLSChroma::getName() const
|
matthiasm@0
|
251 {
|
matthiasm@0
|
252 if (debug_on) cerr << "--> getName" << endl;
|
matthiasm@0
|
253 return "NNLS Chroma";
|
matthiasm@0
|
254 }
|
matthiasm@0
|
255
|
matthiasm@0
|
256 string
|
matthiasm@0
|
257 NNLSChroma::getDescription() const
|
matthiasm@0
|
258 {
|
matthiasm@0
|
259 // Return something helpful here!
|
matthiasm@0
|
260 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@0
|
261 return "";
|
matthiasm@0
|
262 }
|
matthiasm@0
|
263
|
matthiasm@0
|
264 string
|
matthiasm@0
|
265 NNLSChroma::getMaker() const
|
matthiasm@0
|
266 {
|
matthiasm@0
|
267 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
268 // Your name here
|
matthiasm@0
|
269 return "Matthias Mauch";
|
matthiasm@0
|
270 }
|
matthiasm@0
|
271
|
matthiasm@0
|
272 int
|
matthiasm@0
|
273 NNLSChroma::getPluginVersion() const
|
matthiasm@0
|
274 {
|
matthiasm@0
|
275 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
276 // Increment this each time you release a version that behaves
|
matthiasm@0
|
277 // differently from the previous one
|
matthiasm@0
|
278 return 1;
|
matthiasm@0
|
279 }
|
matthiasm@0
|
280
|
matthiasm@0
|
281 string
|
matthiasm@0
|
282 NNLSChroma::getCopyright() const
|
matthiasm@0
|
283 {
|
matthiasm@0
|
284 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
285 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
286 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
287 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
288 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
matthiasm@0
|
289 return "Copyright (2010). All rights reserved.";
|
matthiasm@0
|
290 }
|
matthiasm@0
|
291
|
matthiasm@0
|
292 NNLSChroma::InputDomain
|
matthiasm@0
|
293 NNLSChroma::getInputDomain() const
|
matthiasm@0
|
294 {
|
matthiasm@0
|
295 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
296 return FrequencyDomain;
|
matthiasm@0
|
297 }
|
matthiasm@0
|
298
|
matthiasm@0
|
299 size_t
|
matthiasm@0
|
300 NNLSChroma::getPreferredBlockSize() const
|
matthiasm@0
|
301 {
|
matthiasm@0
|
302 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
303 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
304 }
|
matthiasm@0
|
305
|
matthiasm@0
|
306 size_t
|
matthiasm@0
|
307 NNLSChroma::getPreferredStepSize() const
|
matthiasm@0
|
308 {
|
matthiasm@0
|
309 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
310 return 2048; // 0 means "anything sensible"; in practice this
|
matthiasm@0
|
311 // means the same as the block size for TimeDomain
|
matthiasm@0
|
312 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
313 }
|
matthiasm@0
|
314
|
matthiasm@0
|
315 size_t
|
matthiasm@0
|
316 NNLSChroma::getMinChannelCount() const
|
matthiasm@0
|
317 {
|
matthiasm@0
|
318 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
319 return 1;
|
matthiasm@0
|
320 }
|
matthiasm@0
|
321
|
matthiasm@0
|
322 size_t
|
matthiasm@0
|
323 NNLSChroma::getMaxChannelCount() const
|
matthiasm@0
|
324 {
|
matthiasm@0
|
325 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
326 return 1;
|
matthiasm@0
|
327 }
|
matthiasm@0
|
328
|
matthiasm@0
|
329 NNLSChroma::ParameterList
|
matthiasm@0
|
330 NNLSChroma::getParameterDescriptors() const
|
matthiasm@0
|
331 {
|
matthiasm@0
|
332 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
333 ParameterList list;
|
matthiasm@0
|
334
|
matthiasm@0
|
335 ParameterDescriptor d0;
|
matthiasm@0
|
336 d0.identifier = "notedict";
|
matthiasm@0
|
337 d0.name = "note dictionary";
|
matthiasm@0
|
338 d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
|
matthiasm@0
|
339 d0.unit = "";
|
matthiasm@0
|
340 d0.minValue = 0;
|
matthiasm@1
|
341 d0.maxValue = 1;
|
matthiasm@0
|
342 d0.defaultValue = 0;
|
matthiasm@0
|
343 d0.isQuantized = true;
|
matthiasm@0
|
344 d0.valueNames.push_back("s = 0.6");
|
matthiasm@1
|
345 // d0.valueNames.push_back("s = 0.9");
|
matthiasm@1
|
346 // d0.valueNames.push_back("s linearly spaced");
|
matthiasm@0
|
347 d0.valueNames.push_back("no NNLS");
|
matthiasm@0
|
348 d0.quantizeStep = 1.0;
|
matthiasm@0
|
349 list.push_back(d0);
|
matthiasm@0
|
350
|
matthiasm@0
|
351 ParameterDescriptor d1;
|
matthiasm@0
|
352 d1.identifier = "tuningmode";
|
matthiasm@0
|
353 d1.name = "tuning mode";
|
matthiasm@3
|
354 d1.description = "Tuning can be performed locally or on the whole extraction segment.";
|
matthiasm@0
|
355 d1.unit = "";
|
matthiasm@0
|
356 d1.minValue = 0;
|
matthiasm@0
|
357 d1.maxValue = 1;
|
matthiasm@0
|
358 d1.defaultValue = 1;
|
matthiasm@0
|
359 d1.isQuantized = true;
|
matthiasm@0
|
360 d1.valueNames.push_back("global tuning");
|
matthiasm@0
|
361 d1.valueNames.push_back("local tuning");
|
matthiasm@0
|
362 d1.quantizeStep = 1.0;
|
matthiasm@0
|
363 list.push_back(d1);
|
matthiasm@0
|
364
|
matthiasm@0
|
365 ParameterDescriptor d2;
|
matthiasm@0
|
366 d2.identifier = "paling";
|
matthiasm@0
|
367 d2.name = "spectral paling";
|
matthiasm@0
|
368 d2.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@0
|
369 d2.unit = "";
|
matthiasm@3
|
370 d2.isQuantized = true;
|
matthiasm@3
|
371 d2.quantizeStep = 0.1;
|
matthiasm@3
|
372 d2.minValue = 0.0;
|
matthiasm@3
|
373 d2.maxValue = 1.0;
|
matthiasm@0
|
374 d2.defaultValue = 0.5;
|
matthiasm@3
|
375 // d2.isQuantized = false;
|
matthiasm@0
|
376 list.push_back(d2);
|
matthiasm@0
|
377
|
matthiasm@3
|
378 ParameterDescriptor d3;
|
matthiasm@3
|
379 d3.identifier = "preset";
|
matthiasm@3
|
380 d3.name = "preset";
|
matthiasm@3
|
381 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@3
|
382 d3.unit = "";
|
matthiasm@3
|
383 d3.isQuantized = true;
|
matthiasm@3
|
384 d3.quantizeStep = 1;
|
matthiasm@3
|
385 d3.minValue = 0.0;
|
matthiasm@3
|
386 d3.maxValue = 2.0;
|
matthiasm@3
|
387 d3.defaultValue = 0.0;
|
matthiasm@3
|
388 d3.valueNames.push_back("polyphonic pop");
|
matthiasm@3
|
389 d3.valueNames.push_back("polyphonic pop (fast)");
|
matthiasm@3
|
390 d3.valueNames.push_back("solo keyboard");
|
matthiasm@3
|
391 d3.valueNames.push_back("manual");
|
matthiasm@3
|
392 list.push_back(d3);
|
matthiasm@0
|
393 return list;
|
matthiasm@0
|
394 }
|
matthiasm@0
|
395
|
matthiasm@0
|
396 float
|
matthiasm@0
|
397 NNLSChroma::getParameter(string identifier) const
|
matthiasm@0
|
398 {
|
matthiasm@3
|
399 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@0
|
400 if (identifier == "notedict") {
|
matthiasm@0
|
401 return m_dictID;
|
matthiasm@0
|
402 }
|
matthiasm@0
|
403
|
matthiasm@0
|
404 if (identifier == "paling") {
|
matthiasm@0
|
405 return m_paling;
|
matthiasm@0
|
406 }
|
matthiasm@0
|
407
|
matthiasm@0
|
408 if (identifier == "tuningmode") {
|
matthiasm@0
|
409 if (m_tuneLocal) {
|
matthiasm@0
|
410 return 1.0;
|
matthiasm@0
|
411 } else {
|
matthiasm@0
|
412 return 0.0;
|
matthiasm@0
|
413 }
|
matthiasm@0
|
414 }
|
matthiasm@3
|
415 if (identifier == "preset") {
|
matthiasm@3
|
416 return m_preset;
|
matthiasm@3
|
417 }
|
matthiasm@0
|
418 return 0;
|
matthiasm@0
|
419
|
matthiasm@0
|
420 }
|
matthiasm@0
|
421
|
matthiasm@0
|
422 void
|
matthiasm@0
|
423 NNLSChroma::setParameter(string identifier, float value)
|
matthiasm@0
|
424 {
|
matthiasm@3
|
425 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@0
|
426 if (identifier == "notedict") {
|
matthiasm@0
|
427 m_dictID = (int) value;
|
matthiasm@0
|
428 }
|
matthiasm@0
|
429
|
matthiasm@0
|
430 if (identifier == "paling") {
|
matthiasm@0
|
431 m_paling = value;
|
matthiasm@0
|
432 }
|
matthiasm@0
|
433
|
matthiasm@0
|
434 if (identifier == "tuningmode") {
|
matthiasm@0
|
435 m_tuneLocal = (value > 0) ? true : false;
|
matthiasm@0
|
436 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
437 }
|
matthiasm@3
|
438 if (identifier == "preset") {
|
matthiasm@3
|
439 m_preset = value;
|
matthiasm@3
|
440 if (m_preset == 0.0) {
|
matthiasm@3
|
441 m_tuneLocal = false;
|
matthiasm@3
|
442 m_paling = 1.0;
|
matthiasm@3
|
443 m_dictID = 0.0;
|
matthiasm@3
|
444 }
|
matthiasm@3
|
445 if (m_preset == 1.0) {
|
matthiasm@3
|
446 m_tuneLocal = false;
|
matthiasm@3
|
447 m_paling = 1.0;
|
matthiasm@3
|
448 m_dictID = 1.0;
|
matthiasm@3
|
449 }
|
matthiasm@3
|
450 if (m_preset == 2.0) {
|
matthiasm@3
|
451 m_tuneLocal = false;
|
matthiasm@3
|
452 m_paling = 0.7;
|
matthiasm@3
|
453 m_dictID = 0.0;
|
matthiasm@3
|
454 }
|
matthiasm@3
|
455 }
|
matthiasm@0
|
456 }
|
matthiasm@0
|
457
|
matthiasm@0
|
458 NNLSChroma::ProgramList
|
matthiasm@0
|
459 NNLSChroma::getPrograms() const
|
matthiasm@0
|
460 {
|
matthiasm@0
|
461 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
462 ProgramList list;
|
matthiasm@0
|
463
|
matthiasm@0
|
464 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
465 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
466
|
matthiasm@0
|
467 return list;
|
matthiasm@0
|
468 }
|
matthiasm@0
|
469
|
matthiasm@0
|
470 string
|
matthiasm@0
|
471 NNLSChroma::getCurrentProgram() const
|
matthiasm@0
|
472 {
|
matthiasm@0
|
473 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
474 return ""; // no programs
|
matthiasm@0
|
475 }
|
matthiasm@0
|
476
|
matthiasm@0
|
477 void
|
matthiasm@0
|
478 NNLSChroma::selectProgram(string name)
|
matthiasm@0
|
479 {
|
matthiasm@0
|
480 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
481 }
|
matthiasm@0
|
482
|
matthiasm@0
|
483
|
matthiasm@0
|
484 NNLSChroma::OutputList
|
matthiasm@0
|
485 NNLSChroma::getOutputDescriptors() const
|
matthiasm@0
|
486 {
|
matthiasm@0
|
487 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
488 OutputList list;
|
matthiasm@0
|
489
|
matthiasm@0
|
490 // Make chroma names for the binNames property
|
matthiasm@0
|
491 vector<string> chromanames;
|
matthiasm@0
|
492 vector<string> bothchromanames;
|
matthiasm@0
|
493 for (int iNote = 0; iNote < 24; iNote++) {
|
matthiasm@0
|
494 bothchromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
495 if (iNote < 12) {
|
matthiasm@0
|
496 chromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
497 }
|
matthiasm@0
|
498 }
|
matthiasm@0
|
499
|
matthiasm@1
|
500 // int nNote = 84;
|
matthiasm@0
|
501
|
matthiasm@0
|
502 // See OutputDescriptor documentation for the possibilities here.
|
matthiasm@0
|
503 // Every plugin must have at least one output.
|
matthiasm@0
|
504
|
matthiasm@0
|
505 OutputDescriptor d0;
|
matthiasm@0
|
506 d0.identifier = "tuning";
|
matthiasm@0
|
507 d0.name = "Tuning";
|
matthiasm@0
|
508 d0.description = "The concert pitch.";
|
matthiasm@0
|
509 d0.unit = "Hz";
|
matthiasm@0
|
510 d0.hasFixedBinCount = true;
|
matthiasm@0
|
511 d0.binCount = 0;
|
matthiasm@0
|
512 d0.hasKnownExtents = true;
|
matthiasm@0
|
513 d0.minValue = 427.47;
|
matthiasm@0
|
514 d0.maxValue = 452.89;
|
matthiasm@0
|
515 d0.isQuantized = false;
|
matthiasm@0
|
516 d0.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
517 d0.hasDuration = false;
|
matthiasm@0
|
518 list.push_back(d0);
|
matthiasm@0
|
519
|
matthiasm@0
|
520 OutputDescriptor d1;
|
matthiasm@0
|
521 d1.identifier = "logfreqspec";
|
matthiasm@0
|
522 d1.name = "Log-Frequency Spectrum";
|
matthiasm@0
|
523 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
|
matthiasm@0
|
524 d1.unit = "";
|
matthiasm@0
|
525 d1.hasFixedBinCount = true;
|
matthiasm@0
|
526 d1.binCount = nNote;
|
matthiasm@0
|
527 d1.hasKnownExtents = false;
|
matthiasm@0
|
528 d1.isQuantized = false;
|
matthiasm@0
|
529 d1.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
530 d1.hasDuration = false;
|
matthiasm@0
|
531 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
532 list.push_back(d1);
|
matthiasm@0
|
533
|
matthiasm@0
|
534 OutputDescriptor d2;
|
matthiasm@0
|
535 d2.identifier = "tunedlogfreqspec";
|
matthiasm@0
|
536 d2.name = "Tuned Log-Frequency Spectrum";
|
matthiasm@0
|
537 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
|
matthiasm@0
|
538 d2.unit = "";
|
matthiasm@0
|
539 d2.hasFixedBinCount = true;
|
matthiasm@0
|
540 d2.binCount = 256;
|
matthiasm@0
|
541 d2.hasKnownExtents = false;
|
matthiasm@0
|
542 d2.isQuantized = false;
|
matthiasm@0
|
543 d2.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
544 d2.hasDuration = false;
|
matthiasm@0
|
545 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
546 list.push_back(d2);
|
matthiasm@0
|
547
|
matthiasm@0
|
548 OutputDescriptor d3;
|
matthiasm@0
|
549 d3.identifier = "semitonespectrum";
|
matthiasm@0
|
550 d3.name = "Semitone Spectrum";
|
matthiasm@0
|
551 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
|
matthiasm@0
|
552 d3.unit = "";
|
matthiasm@0
|
553 d3.hasFixedBinCount = true;
|
matthiasm@0
|
554 d3.binCount = 84;
|
matthiasm@0
|
555 d3.hasKnownExtents = false;
|
matthiasm@0
|
556 d3.isQuantized = false;
|
matthiasm@0
|
557 d3.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
558 d3.hasDuration = false;
|
matthiasm@0
|
559 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
560 list.push_back(d3);
|
matthiasm@0
|
561
|
matthiasm@0
|
562 OutputDescriptor d4;
|
matthiasm@0
|
563 d4.identifier = "chroma";
|
matthiasm@0
|
564 d4.name = "Chromagram";
|
matthiasm@0
|
565 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
|
matthiasm@0
|
566 d4.unit = "";
|
matthiasm@0
|
567 d4.hasFixedBinCount = true;
|
matthiasm@0
|
568 d4.binCount = 12;
|
matthiasm@0
|
569 d4.binNames = chromanames;
|
matthiasm@0
|
570 d4.hasKnownExtents = false;
|
matthiasm@0
|
571 d4.isQuantized = false;
|
matthiasm@0
|
572 d4.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
573 d4.hasDuration = false;
|
matthiasm@0
|
574 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
575 list.push_back(d4);
|
matthiasm@0
|
576
|
matthiasm@0
|
577 OutputDescriptor d5;
|
matthiasm@0
|
578 d5.identifier = "basschroma";
|
matthiasm@0
|
579 d5.name = "Bass Chromagram";
|
matthiasm@0
|
580 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
|
matthiasm@0
|
581 d5.unit = "";
|
matthiasm@0
|
582 d5.hasFixedBinCount = true;
|
matthiasm@0
|
583 d5.binCount = 12;
|
matthiasm@0
|
584 d5.binNames = chromanames;
|
matthiasm@0
|
585 d5.hasKnownExtents = false;
|
matthiasm@0
|
586 d5.isQuantized = false;
|
matthiasm@0
|
587 d5.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
588 d5.hasDuration = false;
|
matthiasm@0
|
589 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
590 list.push_back(d5);
|
matthiasm@0
|
591
|
matthiasm@0
|
592 OutputDescriptor d6;
|
matthiasm@0
|
593 d6.identifier = "bothchroma";
|
matthiasm@0
|
594 d6.name = "Chromagram and Bass Chromagram";
|
matthiasm@0
|
595 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
|
matthiasm@0
|
596 d6.unit = "";
|
matthiasm@0
|
597 d6.hasFixedBinCount = true;
|
matthiasm@0
|
598 d6.binCount = 24;
|
matthiasm@0
|
599 d6.binNames = bothchromanames;
|
matthiasm@0
|
600 d6.hasKnownExtents = false;
|
matthiasm@0
|
601 d6.isQuantized = false;
|
matthiasm@0
|
602 d6.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
603 d6.hasDuration = false;
|
matthiasm@0
|
604 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
605 list.push_back(d6);
|
matthiasm@0
|
606
|
matthiasm@0
|
607 OutputDescriptor d7;
|
matthiasm@0
|
608 d7.identifier = "simplechord";
|
matthiasm@0
|
609 d7.name = "Simple Chord Estimate";
|
matthiasm@0
|
610 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
|
matthiasm@0
|
611 d7.unit = "";
|
matthiasm@0
|
612 d7.hasFixedBinCount = true;
|
matthiasm@0
|
613 d7.binCount = 0;
|
matthiasm@0
|
614 d7.hasKnownExtents = false;
|
matthiasm@0
|
615 d7.isQuantized = false;
|
matthiasm@0
|
616 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
617 d7.hasDuration = false;
|
matthiasm@0
|
618 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
619 list.push_back(d7);
|
matthiasm@0
|
620
|
matthiasm@1
|
621 // OutputDescriptor d8;
|
matthiasm@1
|
622 // d8.identifier = "inconsistency";
|
matthiasm@1
|
623 // d8.name = "Harmonic inconsistency value";
|
matthiasm@1
|
624 // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high.";
|
matthiasm@1
|
625 // d8.unit = "";
|
matthiasm@1
|
626 // d8.hasFixedBinCount = true;
|
matthiasm@1
|
627 // d8.binCount = 1;
|
matthiasm@1
|
628 // d8.hasKnownExtents = false;
|
matthiasm@1
|
629 // d8.isQuantized = false;
|
matthiasm@1
|
630 // d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@1
|
631 // d8.hasDuration = false;
|
matthiasm@1
|
632 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
633 // list.push_back(d8);
|
matthiasm@1
|
634 //
|
matthiasm@1
|
635 // OutputDescriptor d9;
|
matthiasm@1
|
636 // d9.identifier = "inconsistencysegment";
|
matthiasm@1
|
637 // d9.name = "Harmonic inconsistency segmenter";
|
matthiasm@1
|
638 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
|
matthiasm@1
|
639 // d9.unit = "";
|
matthiasm@1
|
640 // d9.hasFixedBinCount = true;
|
matthiasm@1
|
641 // d9.binCount = 0;
|
matthiasm@1
|
642 // d9.hasKnownExtents = true;
|
matthiasm@1
|
643 // d9.minValue = 0.1;
|
matthiasm@1
|
644 // d9.maxValue = 0.9;
|
matthiasm@1
|
645 // d9.isQuantized = false;
|
matthiasm@1
|
646 // d9.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@1
|
647 // d9.hasDuration = false;
|
matthiasm@1
|
648 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
649 // list.push_back(d9);
|
matthiasm@1
|
650 //
|
matthiasm@1
|
651 OutputDescriptor d10;
|
matthiasm@1
|
652 d10.identifier = "localtuning";
|
matthiasm@1
|
653 d10.name = "Local tuning";
|
matthiasm@1
|
654 d10.description = "";
|
matthiasm@1
|
655 d10.unit = "Hz";
|
matthiasm@1
|
656 d10.hasFixedBinCount = true;
|
matthiasm@1
|
657 d10.binCount = 1;
|
matthiasm@1
|
658 d10.hasKnownExtents = true;
|
matthiasm@1
|
659 d10.minValue = 427.47;
|
matthiasm@1
|
660 d10.maxValue = 452.89;
|
matthiasm@1
|
661 d10.isQuantized = false;
|
matthiasm@3
|
662 d10.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@1
|
663 d10.hasDuration = false;
|
matthiasm@3
|
664 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
665 list.push_back(d10);
|
matthiasm@1
|
666
|
matthiasm@0
|
667 return list;
|
matthiasm@0
|
668 }
|
matthiasm@0
|
669
|
matthiasm@0
|
670
|
matthiasm@0
|
671 bool
|
matthiasm@0
|
672 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
673 {
|
matthiasm@1
|
674 if (debug_on) {
|
matthiasm@1
|
675 cerr << "--> initialise";
|
matthiasm@1
|
676 }
|
matthiasm@1
|
677
|
matthiasm@0
|
678 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
679 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
680 m_blockSize = blockSize;
|
matthiasm@0
|
681 m_stepSize = stepSize;
|
matthiasm@0
|
682 frameCount = 0;
|
matthiasm@0
|
683 int tempn = 256 * m_blockSize/2;
|
matthiasm@1
|
684 cerr << "length of tempkernel : " << tempn << endl;
|
matthiasm@1
|
685 float *tempkernel;
|
matthiasm@1
|
686
|
matthiasm@1
|
687 tempkernel = new float[tempn];
|
matthiasm@1
|
688
|
matthiasm@0
|
689 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
matthiasm@1
|
690 m_kernelValue.clear();
|
matthiasm@1
|
691 m_kernelFftIndex.clear();
|
matthiasm@1
|
692 m_kernelNoteIndex.clear();
|
matthiasm@1
|
693 int countNonzero = 0;
|
matthiasm@0
|
694 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
matthiasm@1
|
695 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
matthiasm@1
|
696 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
697 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
matthiasm@0
|
698 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
699 countNonzero++;
|
matthiasm@0
|
700 }
|
matthiasm@1
|
701 m_kernelFftIndex.push_back(iFFT);
|
matthiasm@1
|
702 m_kernelNoteIndex.push_back(iNote);
|
matthiasm@0
|
703 }
|
matthiasm@0
|
704 }
|
matthiasm@1
|
705 }
|
matthiasm@1
|
706 cerr << "nonzero count : " << countNonzero << endl;
|
matthiasm@1
|
707 delete [] tempkernel;
|
matthiasm@3
|
708 ofstream myfile;
|
matthiasm@3
|
709 myfile.open ("matrix.txt");
|
matthiasm@3
|
710 // myfile << "Writing this to a file.\n";
|
matthiasm@3
|
711 for (int i = 0; i < nNote * 84; ++i) {
|
matthiasm@3
|
712 myfile << m_dict[i] << endl;
|
matthiasm@3
|
713 }
|
matthiasm@3
|
714 myfile.close();
|
matthiasm@0
|
715 return true;
|
matthiasm@0
|
716 }
|
matthiasm@0
|
717
|
matthiasm@0
|
718 void
|
matthiasm@0
|
719 NNLSChroma::reset()
|
matthiasm@0
|
720 {
|
matthiasm@0
|
721 if (debug_on) cerr << "--> reset";
|
matthiasm@0
|
722 // Clear buffers, reset stored values, etc
|
matthiasm@0
|
723 frameCount = 0;
|
matthiasm@0
|
724 m_dictID = 0;
|
matthiasm@1
|
725 m_kernelValue.clear();
|
matthiasm@1
|
726 m_kernelFftIndex.clear();
|
matthiasm@1
|
727 m_kernelNoteIndex.clear();
|
matthiasm@0
|
728 }
|
matthiasm@0
|
729
|
matthiasm@0
|
730 NNLSChroma::FeatureSet
|
matthiasm@0
|
731 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
732 {
|
matthiasm@0
|
733 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
734 // int nNote = 84; // TODO: this should be globally set and/or depend on the kernel matrix
|
matthiasm@0
|
735
|
matthiasm@0
|
736 frameCount++;
|
matthiasm@0
|
737 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
738
|
matthiasm@0
|
739 Feature f10; // local tuning
|
matthiasm@3
|
740 f10.hasTimestamp = true;
|
matthiasm@3
|
741 f10.timestamp = timestamp - Vamp::RealTime::fromSeconds(0);
|
matthiasm@0
|
742 const float *fbuf = inputBuffers[0];
|
matthiasm@0
|
743
|
matthiasm@0
|
744 // make magnitude
|
matthiasm@0
|
745 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
matthiasm@0
|
746 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
matthiasm@0
|
747 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
matthiasm@1
|
748 // magnitude[iBin] = (iBin == frameCount - 1 || frameCount < 2) ? 1.0 : 0.0;
|
matthiasm@0
|
749 }
|
matthiasm@0
|
750
|
matthiasm@0
|
751
|
matthiasm@0
|
752 // note magnitude mapping using pre-calculated matrix
|
matthiasm@0
|
753 float *nm = new float[nNote]; // note magnitude
|
matthiasm@0
|
754 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
755 nm[iNote] = 0; // initialise as 0
|
matthiasm@0
|
756 }
|
matthiasm@0
|
757 int binCount = 0;
|
matthiasm@0
|
758 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
matthiasm@0
|
759 // cerr << ".";
|
matthiasm@1
|
760 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
matthiasm@1
|
761 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
matthiasm@0
|
762 binCount++;
|
matthiasm@0
|
763 }
|
matthiasm@1
|
764 // cerr << nm[20];
|
matthiasm@1
|
765 // cerr << endl;
|
matthiasm@0
|
766
|
matthiasm@0
|
767
|
matthiasm@0
|
768 float one_over_N = 1.0/frameCount;
|
matthiasm@0
|
769 // update means of complex tuning variables
|
matthiasm@0
|
770 m_meanTuning0 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
771 m_meanTuning1 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
772 m_meanTuning2 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
773
|
matthiasm@0
|
774 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
775 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
776 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
777 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
matthiasm@3
|
778 float ratioOld = 0.997;
|
matthiasm@3
|
779 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
|
matthiasm@3
|
780 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
|
matthiasm@3
|
781 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
|
matthiasm@0
|
782 }
|
matthiasm@0
|
783
|
matthiasm@0
|
784 // if (m_tuneLocal) {
|
matthiasm@0
|
785 // local tuning
|
matthiasm@0
|
786 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
matthiasm@0
|
787 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
matthiasm@0
|
788 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
matthiasm@0
|
789 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
790 float tuning440 = 440 * pow(2,normalisedtuning/12);
|
matthiasm@0
|
791 f10.values.push_back(tuning440);
|
matthiasm@3
|
792 // cerr << tuning440 << endl;
|
matthiasm@0
|
793 // }
|
matthiasm@0
|
794
|
matthiasm@0
|
795 Feature f1; // logfreqspec
|
matthiasm@0
|
796 f1.hasTimestamp = true;
|
matthiasm@0
|
797 f1.timestamp = timestamp;
|
matthiasm@0
|
798 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
799 f1.values.push_back(nm[iNote]);
|
matthiasm@0
|
800 }
|
matthiasm@0
|
801
|
matthiasm@0
|
802 FeatureSet fs;
|
matthiasm@0
|
803 fs[1].push_back(f1);
|
matthiasm@3
|
804 fs[8].push_back(f10);
|
matthiasm@0
|
805
|
matthiasm@0
|
806 // deletes
|
matthiasm@0
|
807 delete[] magnitude;
|
matthiasm@0
|
808 delete[] nm;
|
matthiasm@0
|
809
|
matthiasm@0
|
810 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
|
matthiasm@0
|
811 return fs;
|
matthiasm@0
|
812 }
|
matthiasm@0
|
813
|
matthiasm@0
|
814 NNLSChroma::FeatureSet
|
matthiasm@0
|
815 NNLSChroma::getRemainingFeatures()
|
matthiasm@0
|
816 {
|
matthiasm@0
|
817 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
matthiasm@0
|
818 FeatureSet fsOut;
|
matthiasm@0
|
819 //
|
matthiasm@1
|
820 /** Calculate Tuning
|
matthiasm@1
|
821 calculate tuning from (using the angle of the complex number defined by the
|
matthiasm@1
|
822 cumulative mean real and imag values)
|
matthiasm@1
|
823 **/
|
matthiasm@1
|
824 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
matthiasm@1
|
825 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
matthiasm@1
|
826 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
matthiasm@1
|
827 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
matthiasm@1
|
828 int intShift = floor(normalisedtuning * 3);
|
matthiasm@1
|
829 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
830
|
matthiasm@1
|
831 char buffer0 [50];
|
matthiasm@1
|
832
|
matthiasm@1
|
833 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
834
|
matthiasm@1
|
835 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
836
|
matthiasm@1
|
837 // push tuning to FeatureSet fsOut
|
matthiasm@1
|
838 Feature f0; // tuning
|
matthiasm@1
|
839 f0.hasTimestamp = true;
|
matthiasm@1
|
840 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
matthiasm@1
|
841 f0.label = buffer0;
|
matthiasm@1
|
842 fsOut[0].push_back(f0);
|
matthiasm@1
|
843
|
matthiasm@1
|
844 /** Tune Log-Frequency Spectrogram
|
matthiasm@1
|
845 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
matthiasm@1
|
846 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
matthiasm@1
|
847 **/
|
matthiasm@1
|
848
|
matthiasm@1
|
849 float tempValue = 0;
|
matthiasm@1
|
850 float dbThreshold = 0; // relative to the background spectrum
|
matthiasm@1
|
851 float thresh = pow(10,dbThreshold/20);
|
matthiasm@1
|
852 // cerr << "tune local ? " << m_tuneLocal << endl;
|
matthiasm@1
|
853 int count = 0;
|
matthiasm@1
|
854
|
matthiasm@1
|
855 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
|
matthiasm@1
|
856 Feature f1 = *i;
|
matthiasm@1
|
857 Feature f2; // tuned log-frequency spectrum
|
matthiasm@1
|
858 f2.hasTimestamp = true;
|
matthiasm@1
|
859 f2.timestamp = f1.timestamp;
|
matthiasm@1
|
860 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
861
|
matthiasm@1
|
862 if (m_tuneLocal) {
|
matthiasm@1
|
863 intShift = floor(m_localTuning[count] * 3);
|
matthiasm@1
|
864 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
865 }
|
matthiasm@1
|
866
|
matthiasm@1
|
867 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
868
|
matthiasm@1
|
869 for (int k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
matthiasm@1
|
870 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
matthiasm@1
|
871 f2.values.push_back(tempValue);
|
matthiasm@1
|
872 }
|
matthiasm@1
|
873
|
matthiasm@1
|
874 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
matthiasm@1
|
875 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
matthiasm@1
|
876 vector<float> runningstd;
|
matthiasm@1
|
877 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
matthiasm@1
|
878 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
matthiasm@1
|
879 }
|
matthiasm@1
|
880 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
matthiasm@1
|
881 for (int i = 0; i < 256; i++) {
|
matthiasm@1
|
882 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
matthiasm@1
|
883 if (runningstd[i] > 0) {
|
matthiasm@1
|
884 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
matthiasm@1
|
885 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
886 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
matthiasm@1
|
887 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
888 }
|
matthiasm@1
|
889 if (f2.values[i] < 0) {
|
matthiasm@1
|
890 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
matthiasm@1
|
891 }
|
matthiasm@1
|
892 }
|
matthiasm@1
|
893 fsOut[2].push_back(f2);
|
matthiasm@1
|
894 count++;
|
matthiasm@1
|
895 }
|
matthiasm@1
|
896
|
matthiasm@1
|
897 /** Semitone spectrum and chromagrams
|
matthiasm@1
|
898 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
matthiasm@1
|
899 is inferred using a non-negative least squares algorithm.
|
matthiasm@1
|
900 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
matthiasm@1
|
901 bass and treble stacked onto each other).
|
matthiasm@1
|
902 **/
|
matthiasm@1
|
903 // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n);
|
matthiasm@1
|
904
|
matthiasm@1
|
905 vector<vector<float> > chordogram;
|
matthiasm@3
|
906 vector<vector<int> > scoreChordogram;
|
matthiasm@1
|
907 vector<float> oldchroma = vector<float>(12,0);
|
matthiasm@1
|
908 vector<float> oldbasschroma = vector<float>(12,0);
|
matthiasm@1
|
909 count = 0;
|
matthiasm@1
|
910
|
matthiasm@1
|
911 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
matthiasm@1
|
912 Feature f2 = *it; // logfreq spectrum
|
matthiasm@1
|
913 Feature f3; // semitone spectrum
|
matthiasm@1
|
914 Feature f4; // treble chromagram
|
matthiasm@1
|
915 Feature f5; // bass chromagram
|
matthiasm@1
|
916 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
917
|
matthiasm@1
|
918 f3.hasTimestamp = true;
|
matthiasm@1
|
919 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
920
|
matthiasm@1
|
921 f4.hasTimestamp = true;
|
matthiasm@1
|
922 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
923
|
matthiasm@1
|
924 f5.hasTimestamp = true;
|
matthiasm@1
|
925 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
926
|
matthiasm@1
|
927 f6.hasTimestamp = true;
|
matthiasm@1
|
928 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
929
|
matthiasm@3
|
930 float b[256];
|
matthiasm@1
|
931
|
matthiasm@1
|
932 bool some_b_greater_zero = false;
|
matthiasm@3
|
933 float sumb = 0;
|
matthiasm@1
|
934 for (int i = 0; i < 256; i++) {
|
matthiasm@3
|
935 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
matthiasm@3
|
936 b[i] = f2.values[i];
|
matthiasm@3
|
937 sumb += b[i];
|
matthiasm@1
|
938 if (b[i] > 0) {
|
matthiasm@1
|
939 some_b_greater_zero = true;
|
matthiasm@1
|
940 }
|
matthiasm@1
|
941 }
|
matthiasm@1
|
942
|
matthiasm@1
|
943 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
944
|
matthiasm@1
|
945 vector<float> chroma = vector<float>(12, 0);
|
matthiasm@1
|
946 vector<float> basschroma = vector<float>(12, 0);
|
matthiasm@1
|
947 float currval;
|
matthiasm@1
|
948 unsigned iSemitone = 0;
|
matthiasm@1
|
949
|
matthiasm@1
|
950 if (some_b_greater_zero) {
|
matthiasm@3
|
951 if (m_dictID == 1) {
|
matthiasm@1
|
952 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
matthiasm@1
|
953 currval = 0;
|
matthiasm@3
|
954 currval += b[iNote + 1 + -1] * 0.5;
|
matthiasm@3
|
955 currval += b[iNote + 1 + 0] * 1.0;
|
matthiasm@3
|
956 currval += b[iNote + 1 + 1] * 0.5;
|
matthiasm@1
|
957 f3.values.push_back(currval);
|
matthiasm@1
|
958 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
matthiasm@1
|
959 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
matthiasm@1
|
960 iSemitone++;
|
matthiasm@1
|
961 }
|
matthiasm@1
|
962
|
matthiasm@1
|
963 } else {
|
matthiasm@3
|
964 float x[84+1000];
|
matthiasm@3
|
965 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
matthiasm@3
|
966 // for (int i = 0; i < 84; ++i) {
|
matthiasm@3
|
967 // x[i] = b[3*i+3];
|
matthiasm@3
|
968 // }
|
matthiasm@3
|
969 float rnorm;
|
matthiasm@3
|
970 float w[84+1000];
|
matthiasm@3
|
971 float zz[84+1000];
|
matthiasm@3
|
972 int indx[84+1000];
|
matthiasm@1
|
973 int mode;
|
matthiasm@3
|
974 float curr_dict[256*84];
|
matthiasm@3
|
975 for (unsigned i = 0; i < 256 * 84; ++i) {
|
matthiasm@3
|
976 curr_dict[i] = 1.0 * m_dict[i];
|
matthiasm@3
|
977 }
|
matthiasm@3
|
978 nnls(curr_dict, nNote, nNote, 84, b, x, &rnorm, w, zz, indx, &mode);
|
matthiasm@3
|
979 for (unsigned iNote = 0; iNote < 84; ++iNote) {
|
matthiasm@3
|
980 // for (unsigned kNote = 0; kNote < 256; ++kNote) {
|
matthiasm@3
|
981 // x[iNote] += m_dict[kNote + nNote * iNote] * b[kNote];
|
matthiasm@3
|
982 // }
|
matthiasm@3
|
983 f3.values.push_back(x[iNote]);
|
matthiasm@3
|
984 // cerr << mode << endl;
|
matthiasm@3
|
985 chroma[iNote % 12] += x[iNote] * treblewindow[iNote];
|
matthiasm@3
|
986 basschroma[iNote % 12] += x[iNote] * basswindow[iNote];
|
matthiasm@3
|
987 // iSemitone++;
|
matthiasm@3
|
988 }
|
matthiasm@1
|
989 }
|
matthiasm@1
|
990 }
|
matthiasm@1
|
991
|
matthiasm@1
|
992 f4.values = chroma;
|
matthiasm@1
|
993 f5.values = basschroma;
|
matthiasm@1
|
994 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@1
|
995 f6.values = chroma;
|
matthiasm@1
|
996
|
matthiasm@1
|
997 // local chord estimation
|
matthiasm@1
|
998 vector<float> currentChordSalience;
|
matthiasm@1
|
999 float tempchordvalue = 0;
|
matthiasm@1
|
1000 float sumchordvalue = 0;
|
matthiasm@1
|
1001 int nChord = nChorddict / 24;
|
matthiasm@1
|
1002 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@1
|
1003 tempchordvalue = 0;
|
matthiasm@1
|
1004 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@1
|
1005 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
1006 }
|
matthiasm@1
|
1007 for (int iBin = 12; iBin < 24; iBin++) {
|
matthiasm@1
|
1008 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
1009 }
|
matthiasm@1
|
1010 sumchordvalue+=tempchordvalue;
|
matthiasm@1
|
1011 currentChordSalience.push_back(tempchordvalue);
|
matthiasm@1
|
1012 }
|
matthiasm@1
|
1013 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@1
|
1014 currentChordSalience[iChord] /= sumchordvalue;
|
matthiasm@1
|
1015 }
|
matthiasm@1
|
1016 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
1017
|
matthiasm@1
|
1018 fsOut[3].push_back(f3);
|
matthiasm@1
|
1019 fsOut[4].push_back(f4);
|
matthiasm@1
|
1020 fsOut[5].push_back(f5);
|
matthiasm@1
|
1021 fsOut[6].push_back(f6);
|
matthiasm@1
|
1022 count++;
|
matthiasm@1
|
1023 }
|
matthiasm@0
|
1024 // int musicitykernelwidth = (50 * 2048) / m_stepSize;
|
matthiasm@0
|
1025 //
|
matthiasm@3
|
1026 /* Simple chord estimation
|
matthiasm@3
|
1027 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@3
|
1028 take the maximum. Very simple, don't do this at home...
|
matthiasm@3
|
1029 */
|
matthiasm@3
|
1030 count = 0;
|
matthiasm@3
|
1031 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@3
|
1032 int nChord = nChorddict / 24;
|
matthiasm@3
|
1033 vector<int> chordSequence;
|
matthiasm@3
|
1034 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
matthiasm@3
|
1035 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@3
|
1036 scoreChordogram.push_back(temp);
|
matthiasm@3
|
1037 }
|
matthiasm@3
|
1038 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
matthiasm@3
|
1039 int startIndex = count + 1;
|
matthiasm@3
|
1040 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@3
|
1041 vector<float> temp = vector<float>(nChord,0);
|
matthiasm@3
|
1042 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@3
|
1043 float maxindex = nChord-1; //... and the index thereof
|
matthiasm@3
|
1044 unsigned bestchordL = 0; // index of the best "left" chord
|
matthiasm@3
|
1045 unsigned bestchordR = 0; // index of the best "right" chord
|
matthiasm@3
|
1046 for (unsigned iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@3
|
1047 // now find the max values on both sides of iWF
|
matthiasm@3
|
1048 // left side:
|
matthiasm@3
|
1049 float maxL = 0;
|
matthiasm@3
|
1050 unsigned maxindL = nChord-1;
|
matthiasm@3
|
1051 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@3
|
1052 float currsum = 0;
|
matthiasm@3
|
1053 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@3
|
1054 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@3
|
1055 }
|
matthiasm@3
|
1056 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@3
|
1057 if (currsum > maxL) {
|
matthiasm@3
|
1058 maxL = currsum;
|
matthiasm@3
|
1059 maxindL = iChord;
|
matthiasm@3
|
1060 }
|
matthiasm@3
|
1061 }
|
matthiasm@3
|
1062 // right side:
|
matthiasm@3
|
1063 float maxR = 0;
|
matthiasm@3
|
1064 unsigned maxindR = nChord-1;
|
matthiasm@3
|
1065 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@3
|
1066 float currsum = 0;
|
matthiasm@3
|
1067 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@3
|
1068 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@3
|
1069 }
|
matthiasm@3
|
1070 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@3
|
1071 if (currsum > maxR) {
|
matthiasm@3
|
1072 maxR = currsum;
|
matthiasm@3
|
1073 maxindR = iChord;
|
matthiasm@3
|
1074 }
|
matthiasm@3
|
1075 }
|
matthiasm@3
|
1076 if (maxL+maxR > maxval) {
|
matthiasm@3
|
1077 maxval = maxL+maxR;
|
matthiasm@3
|
1078 maxindex = iWF;
|
matthiasm@3
|
1079 bestchordL = maxindL;
|
matthiasm@3
|
1080 bestchordR = maxindR;
|
matthiasm@3
|
1081 }
|
matthiasm@3
|
1082
|
matthiasm@3
|
1083 }
|
matthiasm@3
|
1084 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@3
|
1085 // add a score to every chord-frame-point that was part of a maximum
|
matthiasm@3
|
1086 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@3
|
1087 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@3
|
1088 }
|
matthiasm@3
|
1089 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@3
|
1090 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@3
|
1091 }
|
matthiasm@3
|
1092 count++;
|
matthiasm@3
|
1093 }
|
matthiasm@3
|
1094
|
matthiasm@3
|
1095 count = 0;
|
matthiasm@3
|
1096 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@3
|
1097 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@3
|
1098 float maxindex = 0; //... and the index thereof
|
matthiasm@3
|
1099 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@3
|
1100 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@3
|
1101 maxval = scoreChordogram[count][iChord];
|
matthiasm@3
|
1102 maxindex = iChord;
|
matthiasm@3
|
1103 cerr << iChord << endl;
|
matthiasm@3
|
1104 }
|
matthiasm@3
|
1105 }
|
matthiasm@3
|
1106 chordSequence.push_back(maxindex);
|
matthiasm@3
|
1107 cerr << "before modefilter, maxindex: " << maxindex << endl;
|
matthiasm@3
|
1108 count++;
|
matthiasm@3
|
1109 }
|
matthiasm@3
|
1110
|
matthiasm@3
|
1111
|
matthiasm@3
|
1112 // mode filter on chordSequence
|
matthiasm@3
|
1113 count = 0;
|
matthiasm@3
|
1114 int oldChordIndex = -1;
|
matthiasm@3
|
1115 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@3
|
1116 Feature f6 = *it;
|
matthiasm@3
|
1117 Feature f7; // chord estimate
|
matthiasm@3
|
1118 f7.hasTimestamp = true;
|
matthiasm@3
|
1119 f7.timestamp = f6.timestamp;
|
matthiasm@3
|
1120 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@3
|
1121 int maxChordCount = 0;
|
matthiasm@3
|
1122 int maxChordIndex = nChord-1;
|
matthiasm@3
|
1123 // int startIndex = max(count - halfwindowlength,0);
|
matthiasm@3
|
1124 // int endIndex = min(int(chordogram.size()), startIndex + halfwindowlength);
|
matthiasm@3
|
1125 // for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@3
|
1126 // chordCount[chordSequence[i]]++;
|
matthiasm@3
|
1127 // if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@3
|
1128 // maxChordCount++;
|
matthiasm@3
|
1129 // maxChordIndex = chordSequence[i];
|
matthiasm@3
|
1130 // }
|
matthiasm@3
|
1131 // }
|
matthiasm@3
|
1132 maxChordIndex = chordSequence[count];
|
matthiasm@3
|
1133 if (oldChordIndex != maxChordIndex) {
|
matthiasm@3
|
1134 oldChordIndex = maxChordIndex;
|
matthiasm@3
|
1135
|
matthiasm@3
|
1136 char buffer1 [50];
|
matthiasm@3
|
1137 if (maxChordIndex < nChord - 1) {
|
matthiasm@3
|
1138 sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
matthiasm@3
|
1139 } else {
|
matthiasm@3
|
1140 sprintf(buffer1, "N");
|
matthiasm@3
|
1141 }
|
matthiasm@3
|
1142 f7.label = buffer1;
|
matthiasm@3
|
1143 fsOut[7].push_back(f7);
|
matthiasm@3
|
1144 }
|
matthiasm@3
|
1145 count++;
|
matthiasm@3
|
1146 }
|
matthiasm@0
|
1147 // // musicity
|
matthiasm@0
|
1148 // count = 0;
|
matthiasm@0
|
1149 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
matthiasm@0
|
1150 // vector<float> musicityValue;
|
matthiasm@0
|
1151 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1152 // Feature f4 = *it;
|
matthiasm@0
|
1153 //
|
matthiasm@0
|
1154 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1155 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1156 // float chromasum = 0;
|
matthiasm@0
|
1157 // float diffsum = 0;
|
matthiasm@0
|
1158 // for (int k = 0; k < 12; k++) {
|
matthiasm@0
|
1159 // for (int i = startIndex + 1; i < endIndex; i++) {
|
matthiasm@0
|
1160 // chromasum += pow(fsOut[4][i].values[k],2);
|
matthiasm@0
|
1161 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
matthiasm@0
|
1162 // }
|
matthiasm@0
|
1163 // }
|
matthiasm@0
|
1164 // diffsum /= chromasum;
|
matthiasm@0
|
1165 // musicityValue.push_back(diffsum);
|
matthiasm@0
|
1166 // count++;
|
matthiasm@0
|
1167 // }
|
matthiasm@0
|
1168 //
|
matthiasm@0
|
1169 // float musicityThreshold = 0.44;
|
matthiasm@0
|
1170 // if (m_stepSize == 4096) {
|
matthiasm@0
|
1171 // musicityThreshold = 0.74;
|
matthiasm@0
|
1172 // }
|
matthiasm@0
|
1173 // if (m_stepSize == 4410) {
|
matthiasm@0
|
1174 // musicityThreshold = 0.77;
|
matthiasm@0
|
1175 // }
|
matthiasm@0
|
1176 //
|
matthiasm@0
|
1177 // count = 0;
|
matthiasm@0
|
1178 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1179 // Feature f4 = *it;
|
matthiasm@0
|
1180 // Feature f8; // musicity
|
matthiasm@0
|
1181 // Feature f9; // musicity segmenter
|
matthiasm@0
|
1182 //
|
matthiasm@0
|
1183 // f8.hasTimestamp = true;
|
matthiasm@0
|
1184 // f8.timestamp = f4.timestamp;
|
matthiasm@0
|
1185 // f9.hasTimestamp = true;
|
matthiasm@0
|
1186 // f9.timestamp = f4.timestamp;
|
matthiasm@0
|
1187 //
|
matthiasm@0
|
1188 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1189 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1190 // int musicityCount = 0;
|
matthiasm@0
|
1191 // for (int i = startIndex; i <= endIndex; i++) {
|
matthiasm@0
|
1192 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
matthiasm@0
|
1193 // }
|
matthiasm@0
|
1194 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
matthiasm@0
|
1195 //
|
matthiasm@0
|
1196 // if (isSpeech) {
|
matthiasm@0
|
1197 // if (oldlabeltype != 2) {
|
matthiasm@0
|
1198 // f9.label = "Speech";
|
matthiasm@0
|
1199 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1200 // oldlabeltype = 2;
|
matthiasm@0
|
1201 // }
|
matthiasm@0
|
1202 // } else {
|
matthiasm@0
|
1203 // if (oldlabeltype != 1) {
|
matthiasm@0
|
1204 // f9.label = "Music";
|
matthiasm@0
|
1205 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1206 // oldlabeltype = 1;
|
matthiasm@0
|
1207 // }
|
matthiasm@0
|
1208 // }
|
matthiasm@0
|
1209 // f8.values.push_back(musicityValue[count]);
|
matthiasm@0
|
1210 // fsOut[8].push_back(f8);
|
matthiasm@0
|
1211 // count++;
|
matthiasm@0
|
1212 // }
|
matthiasm@0
|
1213 return fsOut;
|
matthiasm@0
|
1214
|
matthiasm@0
|
1215 }
|
matthiasm@0
|
1216
|