matthiasm@0
|
1
|
matthiasm@0
|
2 #include "NNLSChroma.h"
|
matthiasm@0
|
3 #include <cmath>
|
matthiasm@0
|
4 #include <list>
|
matthiasm@0
|
5 #include <iostream>
|
matthiasm@3
|
6 #include <fstream>
|
matthiasm@0
|
7 #include <sstream>
|
matthiasm@0
|
8 #include <cassert>
|
matthiasm@0
|
9 #include <cstdio>
|
matthiasm@1
|
10 #include "nnls.h"
|
matthiasm@0
|
11 // #include "cblas.h"
|
matthiasm@0
|
12 #include "chorddict.cpp"
|
matthiasm@0
|
13 using namespace std;
|
matthiasm@0
|
14
|
matthiasm@0
|
15 const float sinvalue = 0.866025404;
|
matthiasm@0
|
16 const float cosvalue = -0.5;
|
matthiasm@0
|
17 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
|
matthiasm@0
|
18 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
|
matthiasm@0
|
19 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
|
matthiasm@0
|
20 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
|
matthiasm@0
|
21 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
|
matthiasm@0
|
22 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
23 const int nNote = 256;
|
matthiasm@0
|
24
|
matthiasm@0
|
25 /** Special Convolution
|
matthiasm@0
|
26 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
|
matthiasm@0
|
27 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
|
matthiasm@0
|
28 as the first (last) valid convolution bin.
|
matthiasm@0
|
29 **/
|
matthiasm@0
|
30
|
matthiasm@0
|
31 const bool debug_on = false;
|
matthiasm@0
|
32
|
matthiasm@0
|
33 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
|
matthiasm@0
|
34 {
|
matthiasm@0
|
35 float s;
|
matthiasm@0
|
36 int m, n;
|
matthiasm@0
|
37 int lenConvolvee = convolvee.size();
|
matthiasm@0
|
38 int lenKernel = kernel.size();
|
matthiasm@0
|
39
|
matthiasm@0
|
40 vector<float> Z(256,0);
|
matthiasm@0
|
41 assert(lenKernel % 2 != 0); // no exception handling !!!
|
matthiasm@0
|
42
|
matthiasm@0
|
43 for (n = lenKernel - 1; n < lenConvolvee; n++) {
|
matthiasm@0
|
44 s=0.0;
|
matthiasm@0
|
45 for (m = 0; m < lenKernel; m++) {
|
matthiasm@0
|
46 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
|
matthiasm@0
|
47 s += convolvee[n-m] * kernel[m];
|
matthiasm@0
|
48 // if (debug_on) cerr << "--> s = " << s << '\n';
|
matthiasm@0
|
49 }
|
matthiasm@0
|
50 // cerr << n - lenKernel/2 << endl;
|
matthiasm@0
|
51 Z[n -lenKernel/2] = s;
|
matthiasm@0
|
52 }
|
matthiasm@0
|
53
|
matthiasm@0
|
54 // fill upper and lower pads
|
matthiasm@0
|
55 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
|
matthiasm@0
|
56 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
|
matthiasm@0
|
57 Z[lenConvolvee - lenKernel/2 - 1];
|
matthiasm@0
|
58 return Z;
|
matthiasm@0
|
59 }
|
matthiasm@0
|
60
|
matthiasm@0
|
61 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
|
matthiasm@0
|
62 // {
|
matthiasm@0
|
63 // vector<float> freq(binnumbers.size, 0.0);
|
matthiasm@0
|
64 // for (unsigned i = 0; i < binnumbers.size; ++i) {
|
matthiasm@0
|
65 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
|
matthiasm@0
|
66 // }
|
matthiasm@0
|
67 // return freq;
|
matthiasm@0
|
68 // }
|
matthiasm@0
|
69
|
matthiasm@0
|
70 float cospuls(float x, float centre, float width)
|
matthiasm@0
|
71 {
|
matthiasm@0
|
72 float recipwidth = 1.0/width;
|
matthiasm@0
|
73 if (abs(x - centre) <= 0.5 * width) {
|
matthiasm@0
|
74 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
|
matthiasm@0
|
75 }
|
matthiasm@0
|
76 return 0.0;
|
matthiasm@0
|
77 }
|
matthiasm@0
|
78
|
matthiasm@0
|
79 float pitchCospuls(float x, float centre, int binsperoctave)
|
matthiasm@0
|
80 {
|
matthiasm@0
|
81 float warpedf = -binsperoctave * (log2(centre) - log2(x));
|
matthiasm@0
|
82 float out = cospuls(warpedf, 0.0, 2.0);
|
matthiasm@0
|
83 // now scale to correct for note density
|
matthiasm@0
|
84 float c = log(2.0)/binsperoctave;
|
matthiasm@0
|
85 if (x > 0) {
|
matthiasm@0
|
86 out = out / (c * x);
|
matthiasm@0
|
87 } else {
|
matthiasm@0
|
88 out = 0;
|
matthiasm@0
|
89 }
|
matthiasm@0
|
90 return out;
|
matthiasm@0
|
91 }
|
matthiasm@0
|
92
|
matthiasm@0
|
93 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
|
matthiasm@0
|
94
|
matthiasm@0
|
95 int binspersemitone = 3; // this must be 3
|
matthiasm@0
|
96 int minoctave = 0; // this must be 0
|
matthiasm@0
|
97 int maxoctave = 7; // this must be 7
|
matthiasm@1
|
98 int oversampling = 80;
|
matthiasm@0
|
99
|
matthiasm@0
|
100 // linear frequency vector
|
matthiasm@0
|
101 vector<float> fft_f;
|
matthiasm@0
|
102 for (int i = 0; i < blocksize/2; ++i) {
|
matthiasm@0
|
103 fft_f.push_back(i * (fs * 1.0 / blocksize));
|
matthiasm@0
|
104 }
|
matthiasm@0
|
105 float fft_width = fs * 2.0 / blocksize;
|
matthiasm@0
|
106
|
matthiasm@0
|
107 // linear oversampled frequency vector
|
matthiasm@0
|
108 vector<float> oversampled_f;
|
matthiasm@0
|
109 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
|
matthiasm@0
|
110 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
|
matthiasm@0
|
111 }
|
matthiasm@0
|
112
|
matthiasm@0
|
113 // pitch-spaced frequency vector
|
matthiasm@0
|
114 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@0
|
115 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@0
|
116 vector<float> cq_f;
|
matthiasm@0
|
117 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@0
|
118 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@0
|
119 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@0
|
120 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@0
|
121 for (int k = -1; k < 2; ++k) {
|
matthiasm@0
|
122 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@0
|
123 }
|
matthiasm@0
|
124 }
|
matthiasm@0
|
125 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@0
|
126 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@0
|
127
|
matthiasm@0
|
128 int nFFT = fft_f.size();
|
matthiasm@0
|
129
|
matthiasm@0
|
130 vector<float> fft_activation;
|
matthiasm@0
|
131 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
|
matthiasm@0
|
132 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
|
matthiasm@0
|
133 fft_activation.push_back(cosp);
|
matthiasm@0
|
134 // cerr << cosp << endl;
|
matthiasm@0
|
135 }
|
matthiasm@0
|
136
|
matthiasm@0
|
137 float cq_activation;
|
matthiasm@0
|
138 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
|
matthiasm@0
|
139 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
|
matthiasm@0
|
140 int curr_start = oversampling * iFFT - oversampling;
|
matthiasm@0
|
141 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
|
matthiasm@0
|
142 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
|
matthiasm@0
|
143 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
|
matthiasm@0
|
144 outmatrix[iFFT + nFFT * iCQ] = 0;
|
matthiasm@1
|
145 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
|
matthiasm@0
|
146 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
|
matthiasm@0
|
147 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
|
matthiasm@0
|
148 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
|
matthiasm@0
|
149 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
|
matthiasm@0
|
150 }
|
matthiasm@0
|
151 // if (iCQ == 1 || iCQ == 2) {
|
matthiasm@0
|
152 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
|
matthiasm@0
|
153 // }
|
matthiasm@0
|
154 }
|
matthiasm@0
|
155 }
|
matthiasm@0
|
156 }
|
matthiasm@0
|
157 return true;
|
matthiasm@0
|
158 }
|
matthiasm@0
|
159
|
matthiasm@3
|
160 bool dictionaryMatrix(float* dm) {
|
matthiasm@1
|
161 int binspersemitone = 3; // this must be 3
|
matthiasm@1
|
162 int minoctave = 0; // this must be 0
|
matthiasm@1
|
163 int maxoctave = 7; // this must be 7
|
matthiasm@4
|
164 float s_param = 0.7;
|
matthiasm@1
|
165
|
matthiasm@1
|
166 // pitch-spaced frequency vector
|
matthiasm@1
|
167 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@1
|
168 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@1
|
169 vector<float> cq_f;
|
matthiasm@1
|
170 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@1
|
171 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@1
|
172 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@1
|
173 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@1
|
174 for (int k = -1; k < 2; ++k) {
|
matthiasm@1
|
175 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@1
|
176 }
|
matthiasm@1
|
177 }
|
matthiasm@1
|
178 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@1
|
179 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@1
|
180
|
matthiasm@1
|
181 float curr_f;
|
matthiasm@1
|
182 float floatbin;
|
matthiasm@1
|
183 float curr_amp;
|
matthiasm@1
|
184 // now for every combination calculate the matrix element
|
matthiasm@1
|
185 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
|
matthiasm@3
|
186 // cerr << iOut << endl;
|
matthiasm@1
|
187 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
|
matthiasm@1
|
188 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
|
matthiasm@3
|
189 // if (curr_f > cq_f[nNote-1]) break;
|
matthiasm@3
|
190 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
|
matthiasm@3
|
191 // cerr << floatbin << endl;
|
matthiasm@1
|
192 curr_amp = pow(s_param,float(iHarm-1));
|
matthiasm@3
|
193 // cerr << "curramp" << curr_amp << endl;
|
matthiasm@1
|
194 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
|
matthiasm@3
|
195 if (abs(iNote+1.0-floatbin)<2) {
|
matthiasm@3
|
196 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
|
matthiasm@3
|
197 // dm[iNote + nNote * iOut] += 1 * curr_amp;
|
matthiasm@3
|
198 }
|
matthiasm@1
|
199 }
|
matthiasm@3
|
200 }
|
matthiasm@1
|
201 }
|
matthiasm@3
|
202
|
matthiasm@3
|
203
|
matthiasm@1
|
204 }
|
matthiasm@1
|
205
|
matthiasm@0
|
206
|
matthiasm@0
|
207 NNLSChroma::NNLSChroma(float inputSampleRate) :
|
matthiasm@0
|
208 Plugin(inputSampleRate),
|
matthiasm@0
|
209 m_fl(0),
|
matthiasm@0
|
210 m_blockSize(0),
|
matthiasm@0
|
211 m_stepSize(0),
|
matthiasm@0
|
212 m_lengthOfNoteIndex(0),
|
matthiasm@0
|
213 m_meanTuning0(0),
|
matthiasm@0
|
214 m_meanTuning1(0),
|
matthiasm@0
|
215 m_meanTuning2(0),
|
matthiasm@0
|
216 m_localTuning0(0),
|
matthiasm@0
|
217 m_localTuning1(0),
|
matthiasm@0
|
218 m_localTuning2(0),
|
matthiasm@4
|
219 m_paling(1.0),
|
matthiasm@3
|
220 m_preset(0.0),
|
matthiasm@0
|
221 m_localTuning(0),
|
matthiasm@0
|
222 m_kernelValue(0),
|
matthiasm@0
|
223 m_kernelFftIndex(0),
|
matthiasm@0
|
224 m_kernelNoteIndex(0),
|
matthiasm@1
|
225 m_dict(0),
|
matthiasm@0
|
226 m_tuneLocal(false),
|
matthiasm@0
|
227 m_dictID(0)
|
matthiasm@0
|
228 {
|
matthiasm@0
|
229 if (debug_on) cerr << "--> NNLSChroma" << endl;
|
matthiasm@3
|
230 m_dict = new float[nNote * 84];
|
matthiasm@3
|
231 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
matthiasm@1
|
232 dictionaryMatrix(m_dict);
|
matthiasm@0
|
233 }
|
matthiasm@0
|
234
|
matthiasm@0
|
235
|
matthiasm@0
|
236 NNLSChroma::~NNLSChroma()
|
matthiasm@0
|
237 {
|
matthiasm@0
|
238 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
|
matthiasm@1
|
239 delete [] m_dict;
|
matthiasm@0
|
240 }
|
matthiasm@0
|
241
|
matthiasm@0
|
242 string
|
matthiasm@0
|
243 NNLSChroma::getIdentifier() const
|
matthiasm@0
|
244 {
|
matthiasm@0
|
245 if (debug_on) cerr << "--> getIdentifier" << endl;
|
matthiasm@0
|
246 return "nnls_chroma";
|
matthiasm@0
|
247 }
|
matthiasm@0
|
248
|
matthiasm@0
|
249 string
|
matthiasm@0
|
250 NNLSChroma::getName() const
|
matthiasm@0
|
251 {
|
matthiasm@0
|
252 if (debug_on) cerr << "--> getName" << endl;
|
matthiasm@0
|
253 return "NNLS Chroma";
|
matthiasm@0
|
254 }
|
matthiasm@0
|
255
|
matthiasm@0
|
256 string
|
matthiasm@0
|
257 NNLSChroma::getDescription() const
|
matthiasm@0
|
258 {
|
matthiasm@0
|
259 // Return something helpful here!
|
matthiasm@0
|
260 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@4
|
261 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum (LAS) of the DFT: the LAS itself, a standard-tuned version thereof (the local and global tuning estimates can are also be output), an approximate transcription to semitone activation using non-linear least squares (NNLS). Furthermore chroma features and a simple chord estimate derived from this NNLS semitone transcription.";
|
matthiasm@0
|
262 }
|
matthiasm@0
|
263
|
matthiasm@0
|
264 string
|
matthiasm@0
|
265 NNLSChroma::getMaker() const
|
matthiasm@0
|
266 {
|
matthiasm@0
|
267 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
268 // Your name here
|
matthiasm@0
|
269 return "Matthias Mauch";
|
matthiasm@0
|
270 }
|
matthiasm@0
|
271
|
matthiasm@0
|
272 int
|
matthiasm@0
|
273 NNLSChroma::getPluginVersion() const
|
matthiasm@0
|
274 {
|
matthiasm@0
|
275 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
276 // Increment this each time you release a version that behaves
|
matthiasm@0
|
277 // differently from the previous one
|
matthiasm@0
|
278 return 1;
|
matthiasm@0
|
279 }
|
matthiasm@0
|
280
|
matthiasm@0
|
281 string
|
matthiasm@0
|
282 NNLSChroma::getCopyright() const
|
matthiasm@0
|
283 {
|
matthiasm@0
|
284 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
285 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
286 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
287 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
288 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
matthiasm@0
|
289 return "Copyright (2010). All rights reserved.";
|
matthiasm@0
|
290 }
|
matthiasm@0
|
291
|
matthiasm@0
|
292 NNLSChroma::InputDomain
|
matthiasm@0
|
293 NNLSChroma::getInputDomain() const
|
matthiasm@0
|
294 {
|
matthiasm@0
|
295 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
296 return FrequencyDomain;
|
matthiasm@0
|
297 }
|
matthiasm@0
|
298
|
matthiasm@0
|
299 size_t
|
matthiasm@0
|
300 NNLSChroma::getPreferredBlockSize() const
|
matthiasm@0
|
301 {
|
matthiasm@0
|
302 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
303 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
304 }
|
matthiasm@0
|
305
|
matthiasm@0
|
306 size_t
|
matthiasm@0
|
307 NNLSChroma::getPreferredStepSize() const
|
matthiasm@0
|
308 {
|
matthiasm@0
|
309 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
310 return 2048; // 0 means "anything sensible"; in practice this
|
matthiasm@0
|
311 // means the same as the block size for TimeDomain
|
matthiasm@0
|
312 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
313 }
|
matthiasm@0
|
314
|
matthiasm@0
|
315 size_t
|
matthiasm@0
|
316 NNLSChroma::getMinChannelCount() const
|
matthiasm@0
|
317 {
|
matthiasm@0
|
318 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
319 return 1;
|
matthiasm@0
|
320 }
|
matthiasm@0
|
321
|
matthiasm@0
|
322 size_t
|
matthiasm@0
|
323 NNLSChroma::getMaxChannelCount() const
|
matthiasm@0
|
324 {
|
matthiasm@0
|
325 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
326 return 1;
|
matthiasm@0
|
327 }
|
matthiasm@0
|
328
|
matthiasm@0
|
329 NNLSChroma::ParameterList
|
matthiasm@0
|
330 NNLSChroma::getParameterDescriptors() const
|
matthiasm@0
|
331 {
|
matthiasm@0
|
332 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
333 ParameterList list;
|
matthiasm@0
|
334
|
matthiasm@3
|
335 ParameterDescriptor d3;
|
matthiasm@3
|
336 d3.identifier = "preset";
|
matthiasm@3
|
337 d3.name = "preset";
|
matthiasm@3
|
338 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@3
|
339 d3.unit = "";
|
matthiasm@3
|
340 d3.isQuantized = true;
|
matthiasm@3
|
341 d3.quantizeStep = 1;
|
matthiasm@3
|
342 d3.minValue = 0.0;
|
matthiasm@4
|
343 d3.maxValue = 3.0;
|
matthiasm@3
|
344 d3.defaultValue = 0.0;
|
matthiasm@3
|
345 d3.valueNames.push_back("polyphonic pop");
|
matthiasm@3
|
346 d3.valueNames.push_back("polyphonic pop (fast)");
|
matthiasm@3
|
347 d3.valueNames.push_back("solo keyboard");
|
matthiasm@3
|
348 d3.valueNames.push_back("manual");
|
matthiasm@3
|
349 list.push_back(d3);
|
matthiasm@4
|
350
|
matthiasm@4
|
351 // ParameterDescriptor d0;
|
matthiasm@4
|
352 // d0.identifier = "notedict";
|
matthiasm@4
|
353 // d0.name = "note dictionary";
|
matthiasm@4
|
354 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
|
matthiasm@4
|
355 // d0.unit = "";
|
matthiasm@4
|
356 // d0.minValue = 0;
|
matthiasm@4
|
357 // d0.maxValue = 1;
|
matthiasm@4
|
358 // d0.defaultValue = 0;
|
matthiasm@4
|
359 // d0.isQuantized = true;
|
matthiasm@4
|
360 // d0.valueNames.push_back("s = 0.6");
|
matthiasm@4
|
361 // d0.valueNames.push_back("no NNLS");
|
matthiasm@4
|
362 // d0.quantizeStep = 1.0;
|
matthiasm@4
|
363 // list.push_back(d0);
|
matthiasm@4
|
364
|
matthiasm@4
|
365 ParameterDescriptor d1;
|
matthiasm@4
|
366 d1.identifier = "tuningmode";
|
matthiasm@4
|
367 d1.name = "tuning mode";
|
matthiasm@4
|
368 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
369 d1.unit = "";
|
matthiasm@4
|
370 d1.minValue = 0;
|
matthiasm@4
|
371 d1.maxValue = 1;
|
matthiasm@4
|
372 d1.defaultValue = 0;
|
matthiasm@4
|
373 d1.isQuantized = true;
|
matthiasm@4
|
374 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
375 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
376 d1.quantizeStep = 1.0;
|
matthiasm@4
|
377 list.push_back(d1);
|
matthiasm@4
|
378
|
matthiasm@4
|
379 // ParameterDescriptor d2;
|
matthiasm@4
|
380 // d2.identifier = "paling";
|
matthiasm@4
|
381 // d2.name = "spectral paling";
|
matthiasm@4
|
382 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@4
|
383 // d2.unit = "";
|
matthiasm@4
|
384 // d2.isQuantized = true;
|
matthiasm@4
|
385 // // d2.quantizeStep = 0.1;
|
matthiasm@4
|
386 // d2.minValue = 0.0;
|
matthiasm@4
|
387 // d2.maxValue = 1.0;
|
matthiasm@4
|
388 // d2.defaultValue = 1.0;
|
matthiasm@4
|
389 // d2.isQuantized = false;
|
matthiasm@4
|
390 // list.push_back(d2);
|
matthiasm@4
|
391
|
matthiasm@0
|
392 return list;
|
matthiasm@0
|
393 }
|
matthiasm@0
|
394
|
matthiasm@0
|
395 float
|
matthiasm@0
|
396 NNLSChroma::getParameter(string identifier) const
|
matthiasm@0
|
397 {
|
matthiasm@3
|
398 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@0
|
399 if (identifier == "notedict") {
|
matthiasm@0
|
400 return m_dictID;
|
matthiasm@0
|
401 }
|
matthiasm@0
|
402
|
matthiasm@0
|
403 if (identifier == "paling") {
|
matthiasm@0
|
404 return m_paling;
|
matthiasm@0
|
405 }
|
matthiasm@0
|
406
|
matthiasm@0
|
407 if (identifier == "tuningmode") {
|
matthiasm@0
|
408 if (m_tuneLocal) {
|
matthiasm@0
|
409 return 1.0;
|
matthiasm@0
|
410 } else {
|
matthiasm@0
|
411 return 0.0;
|
matthiasm@0
|
412 }
|
matthiasm@0
|
413 }
|
matthiasm@3
|
414 if (identifier == "preset") {
|
matthiasm@3
|
415 return m_preset;
|
matthiasm@3
|
416 }
|
matthiasm@0
|
417 return 0;
|
matthiasm@0
|
418
|
matthiasm@0
|
419 }
|
matthiasm@0
|
420
|
matthiasm@0
|
421 void
|
matthiasm@0
|
422 NNLSChroma::setParameter(string identifier, float value)
|
matthiasm@0
|
423 {
|
matthiasm@3
|
424 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@0
|
425 if (identifier == "notedict") {
|
matthiasm@0
|
426 m_dictID = (int) value;
|
matthiasm@0
|
427 }
|
matthiasm@0
|
428
|
matthiasm@0
|
429 if (identifier == "paling") {
|
matthiasm@0
|
430 m_paling = value;
|
matthiasm@0
|
431 }
|
matthiasm@0
|
432
|
matthiasm@0
|
433 if (identifier == "tuningmode") {
|
matthiasm@0
|
434 m_tuneLocal = (value > 0) ? true : false;
|
matthiasm@0
|
435 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
436 }
|
matthiasm@3
|
437 if (identifier == "preset") {
|
matthiasm@3
|
438 m_preset = value;
|
matthiasm@3
|
439 if (m_preset == 0.0) {
|
matthiasm@3
|
440 m_tuneLocal = false;
|
matthiasm@3
|
441 m_paling = 1.0;
|
matthiasm@3
|
442 m_dictID = 0.0;
|
matthiasm@3
|
443 }
|
matthiasm@3
|
444 if (m_preset == 1.0) {
|
matthiasm@3
|
445 m_tuneLocal = false;
|
matthiasm@3
|
446 m_paling = 1.0;
|
matthiasm@3
|
447 m_dictID = 1.0;
|
matthiasm@3
|
448 }
|
matthiasm@3
|
449 if (m_preset == 2.0) {
|
matthiasm@3
|
450 m_tuneLocal = false;
|
matthiasm@3
|
451 m_paling = 0.7;
|
matthiasm@3
|
452 m_dictID = 0.0;
|
matthiasm@3
|
453 }
|
matthiasm@3
|
454 }
|
matthiasm@0
|
455 }
|
matthiasm@0
|
456
|
matthiasm@0
|
457 NNLSChroma::ProgramList
|
matthiasm@0
|
458 NNLSChroma::getPrograms() const
|
matthiasm@0
|
459 {
|
matthiasm@0
|
460 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
461 ProgramList list;
|
matthiasm@0
|
462
|
matthiasm@0
|
463 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
464 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
465
|
matthiasm@0
|
466 return list;
|
matthiasm@0
|
467 }
|
matthiasm@0
|
468
|
matthiasm@0
|
469 string
|
matthiasm@0
|
470 NNLSChroma::getCurrentProgram() const
|
matthiasm@0
|
471 {
|
matthiasm@0
|
472 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
473 return ""; // no programs
|
matthiasm@0
|
474 }
|
matthiasm@0
|
475
|
matthiasm@0
|
476 void
|
matthiasm@0
|
477 NNLSChroma::selectProgram(string name)
|
matthiasm@0
|
478 {
|
matthiasm@0
|
479 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
480 }
|
matthiasm@0
|
481
|
matthiasm@0
|
482
|
matthiasm@0
|
483 NNLSChroma::OutputList
|
matthiasm@0
|
484 NNLSChroma::getOutputDescriptors() const
|
matthiasm@0
|
485 {
|
matthiasm@0
|
486 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
487 OutputList list;
|
matthiasm@0
|
488
|
matthiasm@0
|
489 // Make chroma names for the binNames property
|
matthiasm@0
|
490 vector<string> chromanames;
|
matthiasm@0
|
491 vector<string> bothchromanames;
|
matthiasm@0
|
492 for (int iNote = 0; iNote < 24; iNote++) {
|
matthiasm@0
|
493 bothchromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
494 if (iNote < 12) {
|
matthiasm@0
|
495 chromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
496 }
|
matthiasm@0
|
497 }
|
matthiasm@0
|
498
|
matthiasm@1
|
499 // int nNote = 84;
|
matthiasm@0
|
500
|
matthiasm@0
|
501 // See OutputDescriptor documentation for the possibilities here.
|
matthiasm@0
|
502 // Every plugin must have at least one output.
|
matthiasm@0
|
503
|
matthiasm@0
|
504 OutputDescriptor d0;
|
matthiasm@0
|
505 d0.identifier = "tuning";
|
matthiasm@0
|
506 d0.name = "Tuning";
|
matthiasm@0
|
507 d0.description = "The concert pitch.";
|
matthiasm@0
|
508 d0.unit = "Hz";
|
matthiasm@0
|
509 d0.hasFixedBinCount = true;
|
matthiasm@0
|
510 d0.binCount = 0;
|
matthiasm@0
|
511 d0.hasKnownExtents = true;
|
matthiasm@0
|
512 d0.minValue = 427.47;
|
matthiasm@0
|
513 d0.maxValue = 452.89;
|
matthiasm@0
|
514 d0.isQuantized = false;
|
matthiasm@0
|
515 d0.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
516 d0.hasDuration = false;
|
matthiasm@0
|
517 list.push_back(d0);
|
matthiasm@0
|
518
|
matthiasm@0
|
519 OutputDescriptor d1;
|
matthiasm@0
|
520 d1.identifier = "logfreqspec";
|
matthiasm@0
|
521 d1.name = "Log-Frequency Spectrum";
|
matthiasm@0
|
522 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
|
matthiasm@0
|
523 d1.unit = "";
|
matthiasm@0
|
524 d1.hasFixedBinCount = true;
|
matthiasm@0
|
525 d1.binCount = nNote;
|
matthiasm@0
|
526 d1.hasKnownExtents = false;
|
matthiasm@0
|
527 d1.isQuantized = false;
|
matthiasm@0
|
528 d1.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
529 d1.hasDuration = false;
|
matthiasm@0
|
530 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
531 list.push_back(d1);
|
matthiasm@0
|
532
|
matthiasm@0
|
533 OutputDescriptor d2;
|
matthiasm@0
|
534 d2.identifier = "tunedlogfreqspec";
|
matthiasm@0
|
535 d2.name = "Tuned Log-Frequency Spectrum";
|
matthiasm@0
|
536 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
|
matthiasm@0
|
537 d2.unit = "";
|
matthiasm@0
|
538 d2.hasFixedBinCount = true;
|
matthiasm@0
|
539 d2.binCount = 256;
|
matthiasm@0
|
540 d2.hasKnownExtents = false;
|
matthiasm@0
|
541 d2.isQuantized = false;
|
matthiasm@0
|
542 d2.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
543 d2.hasDuration = false;
|
matthiasm@0
|
544 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
545 list.push_back(d2);
|
matthiasm@0
|
546
|
matthiasm@0
|
547 OutputDescriptor d3;
|
matthiasm@0
|
548 d3.identifier = "semitonespectrum";
|
matthiasm@0
|
549 d3.name = "Semitone Spectrum";
|
matthiasm@0
|
550 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
|
matthiasm@0
|
551 d3.unit = "";
|
matthiasm@0
|
552 d3.hasFixedBinCount = true;
|
matthiasm@0
|
553 d3.binCount = 84;
|
matthiasm@0
|
554 d3.hasKnownExtents = false;
|
matthiasm@0
|
555 d3.isQuantized = false;
|
matthiasm@0
|
556 d3.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
557 d3.hasDuration = false;
|
matthiasm@0
|
558 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
559 list.push_back(d3);
|
matthiasm@0
|
560
|
matthiasm@0
|
561 OutputDescriptor d4;
|
matthiasm@0
|
562 d4.identifier = "chroma";
|
matthiasm@0
|
563 d4.name = "Chromagram";
|
matthiasm@0
|
564 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
|
matthiasm@0
|
565 d4.unit = "";
|
matthiasm@0
|
566 d4.hasFixedBinCount = true;
|
matthiasm@0
|
567 d4.binCount = 12;
|
matthiasm@0
|
568 d4.binNames = chromanames;
|
matthiasm@0
|
569 d4.hasKnownExtents = false;
|
matthiasm@0
|
570 d4.isQuantized = false;
|
matthiasm@0
|
571 d4.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
572 d4.hasDuration = false;
|
matthiasm@0
|
573 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
574 list.push_back(d4);
|
matthiasm@0
|
575
|
matthiasm@0
|
576 OutputDescriptor d5;
|
matthiasm@0
|
577 d5.identifier = "basschroma";
|
matthiasm@0
|
578 d5.name = "Bass Chromagram";
|
matthiasm@0
|
579 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
|
matthiasm@0
|
580 d5.unit = "";
|
matthiasm@0
|
581 d5.hasFixedBinCount = true;
|
matthiasm@0
|
582 d5.binCount = 12;
|
matthiasm@0
|
583 d5.binNames = chromanames;
|
matthiasm@0
|
584 d5.hasKnownExtents = false;
|
matthiasm@0
|
585 d5.isQuantized = false;
|
matthiasm@0
|
586 d5.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
587 d5.hasDuration = false;
|
matthiasm@0
|
588 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
589 list.push_back(d5);
|
matthiasm@0
|
590
|
matthiasm@0
|
591 OutputDescriptor d6;
|
matthiasm@0
|
592 d6.identifier = "bothchroma";
|
matthiasm@0
|
593 d6.name = "Chromagram and Bass Chromagram";
|
matthiasm@0
|
594 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
|
matthiasm@0
|
595 d6.unit = "";
|
matthiasm@0
|
596 d6.hasFixedBinCount = true;
|
matthiasm@0
|
597 d6.binCount = 24;
|
matthiasm@0
|
598 d6.binNames = bothchromanames;
|
matthiasm@0
|
599 d6.hasKnownExtents = false;
|
matthiasm@0
|
600 d6.isQuantized = false;
|
matthiasm@0
|
601 d6.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
602 d6.hasDuration = false;
|
matthiasm@0
|
603 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
604 list.push_back(d6);
|
matthiasm@0
|
605
|
matthiasm@0
|
606 OutputDescriptor d7;
|
matthiasm@0
|
607 d7.identifier = "simplechord";
|
matthiasm@0
|
608 d7.name = "Simple Chord Estimate";
|
matthiasm@0
|
609 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
|
matthiasm@0
|
610 d7.unit = "";
|
matthiasm@0
|
611 d7.hasFixedBinCount = true;
|
matthiasm@0
|
612 d7.binCount = 0;
|
matthiasm@0
|
613 d7.hasKnownExtents = false;
|
matthiasm@0
|
614 d7.isQuantized = false;
|
matthiasm@0
|
615 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
616 d7.hasDuration = false;
|
matthiasm@0
|
617 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
618 list.push_back(d7);
|
matthiasm@0
|
619
|
matthiasm@1
|
620 // OutputDescriptor d8;
|
matthiasm@1
|
621 // d8.identifier = "inconsistency";
|
matthiasm@1
|
622 // d8.name = "Harmonic inconsistency value";
|
matthiasm@1
|
623 // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high.";
|
matthiasm@1
|
624 // d8.unit = "";
|
matthiasm@1
|
625 // d8.hasFixedBinCount = true;
|
matthiasm@1
|
626 // d8.binCount = 1;
|
matthiasm@1
|
627 // d8.hasKnownExtents = false;
|
matthiasm@1
|
628 // d8.isQuantized = false;
|
matthiasm@1
|
629 // d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@1
|
630 // d8.hasDuration = false;
|
matthiasm@1
|
631 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
632 // list.push_back(d8);
|
matthiasm@1
|
633 //
|
matthiasm@1
|
634 // OutputDescriptor d9;
|
matthiasm@1
|
635 // d9.identifier = "inconsistencysegment";
|
matthiasm@1
|
636 // d9.name = "Harmonic inconsistency segmenter";
|
matthiasm@1
|
637 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
|
matthiasm@1
|
638 // d9.unit = "";
|
matthiasm@1
|
639 // d9.hasFixedBinCount = true;
|
matthiasm@1
|
640 // d9.binCount = 0;
|
matthiasm@1
|
641 // d9.hasKnownExtents = true;
|
matthiasm@1
|
642 // d9.minValue = 0.1;
|
matthiasm@1
|
643 // d9.maxValue = 0.9;
|
matthiasm@1
|
644 // d9.isQuantized = false;
|
matthiasm@1
|
645 // d9.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@1
|
646 // d9.hasDuration = false;
|
matthiasm@1
|
647 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
648 // list.push_back(d9);
|
matthiasm@1
|
649 //
|
matthiasm@1
|
650 OutputDescriptor d10;
|
matthiasm@1
|
651 d10.identifier = "localtuning";
|
matthiasm@1
|
652 d10.name = "Local tuning";
|
matthiasm@4
|
653 d10.description = "Tuning based on the history up to this timestamp.";
|
matthiasm@1
|
654 d10.unit = "Hz";
|
matthiasm@1
|
655 d10.hasFixedBinCount = true;
|
matthiasm@1
|
656 d10.binCount = 1;
|
matthiasm@1
|
657 d10.hasKnownExtents = true;
|
matthiasm@1
|
658 d10.minValue = 427.47;
|
matthiasm@1
|
659 d10.maxValue = 452.89;
|
matthiasm@1
|
660 d10.isQuantized = false;
|
matthiasm@3
|
661 d10.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@1
|
662 d10.hasDuration = false;
|
matthiasm@3
|
663 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
664 list.push_back(d10);
|
matthiasm@1
|
665
|
matthiasm@0
|
666 return list;
|
matthiasm@0
|
667 }
|
matthiasm@0
|
668
|
matthiasm@0
|
669
|
matthiasm@0
|
670 bool
|
matthiasm@0
|
671 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
672 {
|
matthiasm@1
|
673 if (debug_on) {
|
matthiasm@1
|
674 cerr << "--> initialise";
|
matthiasm@1
|
675 }
|
matthiasm@1
|
676
|
matthiasm@0
|
677 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
678 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
679 m_blockSize = blockSize;
|
matthiasm@0
|
680 m_stepSize = stepSize;
|
matthiasm@0
|
681 frameCount = 0;
|
matthiasm@0
|
682 int tempn = 256 * m_blockSize/2;
|
matthiasm@4
|
683 // cerr << "length of tempkernel : " << tempn << endl;
|
matthiasm@1
|
684 float *tempkernel;
|
matthiasm@1
|
685
|
matthiasm@1
|
686 tempkernel = new float[tempn];
|
matthiasm@1
|
687
|
matthiasm@0
|
688 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
matthiasm@1
|
689 m_kernelValue.clear();
|
matthiasm@1
|
690 m_kernelFftIndex.clear();
|
matthiasm@1
|
691 m_kernelNoteIndex.clear();
|
matthiasm@1
|
692 int countNonzero = 0;
|
matthiasm@0
|
693 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
matthiasm@1
|
694 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
matthiasm@1
|
695 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
696 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
matthiasm@0
|
697 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
698 countNonzero++;
|
matthiasm@0
|
699 }
|
matthiasm@1
|
700 m_kernelFftIndex.push_back(iFFT);
|
matthiasm@1
|
701 m_kernelNoteIndex.push_back(iNote);
|
matthiasm@0
|
702 }
|
matthiasm@0
|
703 }
|
matthiasm@1
|
704 }
|
matthiasm@4
|
705 // cerr << "nonzero count : " << countNonzero << endl;
|
matthiasm@1
|
706 delete [] tempkernel;
|
matthiasm@3
|
707 ofstream myfile;
|
matthiasm@3
|
708 myfile.open ("matrix.txt");
|
matthiasm@3
|
709 // myfile << "Writing this to a file.\n";
|
matthiasm@3
|
710 for (int i = 0; i < nNote * 84; ++i) {
|
matthiasm@3
|
711 myfile << m_dict[i] << endl;
|
matthiasm@3
|
712 }
|
matthiasm@3
|
713 myfile.close();
|
matthiasm@0
|
714 return true;
|
matthiasm@0
|
715 }
|
matthiasm@0
|
716
|
matthiasm@0
|
717 void
|
matthiasm@0
|
718 NNLSChroma::reset()
|
matthiasm@0
|
719 {
|
matthiasm@4
|
720 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
721
|
matthiasm@0
|
722 // Clear buffers, reset stored values, etc
|
matthiasm@4
|
723 frameCount = 0;
|
matthiasm@4
|
724 m_dictID = 0;
|
matthiasm@4
|
725 m_fl.clear();
|
matthiasm@4
|
726 m_meanTuning0 = 0;
|
matthiasm@4
|
727 m_meanTuning1 = 0;
|
matthiasm@4
|
728 m_meanTuning2 = 0;
|
matthiasm@4
|
729 m_localTuning0 = 0;
|
matthiasm@4
|
730 m_localTuning1 = 0;
|
matthiasm@4
|
731 m_localTuning2 = 0;
|
matthiasm@4
|
732 m_localTuning.clear();
|
matthiasm@0
|
733 }
|
matthiasm@0
|
734
|
matthiasm@0
|
735 NNLSChroma::FeatureSet
|
matthiasm@0
|
736 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
737 {
|
matthiasm@4
|
738 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@4
|
739
|
matthiasm@0
|
740 frameCount++;
|
matthiasm@0
|
741 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
742
|
matthiasm@0
|
743 Feature f10; // local tuning
|
matthiasm@3
|
744 f10.hasTimestamp = true;
|
matthiasm@4
|
745 f10.timestamp = timestamp;
|
matthiasm@0
|
746 const float *fbuf = inputBuffers[0];
|
matthiasm@0
|
747
|
matthiasm@0
|
748 // make magnitude
|
matthiasm@0
|
749 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
matthiasm@0
|
750 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
matthiasm@0
|
751 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
matthiasm@0
|
752 }
|
matthiasm@4
|
753
|
matthiasm@0
|
754 // note magnitude mapping using pre-calculated matrix
|
matthiasm@0
|
755 float *nm = new float[nNote]; // note magnitude
|
matthiasm@0
|
756 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
757 nm[iNote] = 0; // initialise as 0
|
matthiasm@0
|
758 }
|
matthiasm@0
|
759 int binCount = 0;
|
matthiasm@0
|
760 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
matthiasm@0
|
761 // cerr << ".";
|
matthiasm@1
|
762 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
matthiasm@1
|
763 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
matthiasm@0
|
764 binCount++;
|
matthiasm@0
|
765 }
|
matthiasm@1
|
766 // cerr << nm[20];
|
matthiasm@1
|
767 // cerr << endl;
|
matthiasm@0
|
768
|
matthiasm@0
|
769
|
matthiasm@0
|
770 float one_over_N = 1.0/frameCount;
|
matthiasm@0
|
771 // update means of complex tuning variables
|
matthiasm@0
|
772 m_meanTuning0 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
773 m_meanTuning1 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
774 m_meanTuning2 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
775
|
matthiasm@0
|
776 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
777 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
778 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
779 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
matthiasm@3
|
780 float ratioOld = 0.997;
|
matthiasm@3
|
781 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
|
matthiasm@3
|
782 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
|
matthiasm@3
|
783 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
|
matthiasm@0
|
784 }
|
matthiasm@0
|
785
|
matthiasm@0
|
786 // if (m_tuneLocal) {
|
matthiasm@0
|
787 // local tuning
|
matthiasm@0
|
788 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
matthiasm@0
|
789 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
matthiasm@0
|
790 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
matthiasm@0
|
791 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
792 float tuning440 = 440 * pow(2,normalisedtuning/12);
|
matthiasm@0
|
793 f10.values.push_back(tuning440);
|
matthiasm@3
|
794 // cerr << tuning440 << endl;
|
matthiasm@0
|
795 // }
|
matthiasm@0
|
796
|
matthiasm@0
|
797 Feature f1; // logfreqspec
|
matthiasm@0
|
798 f1.hasTimestamp = true;
|
matthiasm@0
|
799 f1.timestamp = timestamp;
|
matthiasm@0
|
800 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
801 f1.values.push_back(nm[iNote]);
|
matthiasm@0
|
802 }
|
matthiasm@0
|
803
|
matthiasm@0
|
804 FeatureSet fs;
|
matthiasm@0
|
805 fs[1].push_back(f1);
|
matthiasm@3
|
806 fs[8].push_back(f10);
|
matthiasm@0
|
807
|
matthiasm@0
|
808 // deletes
|
matthiasm@0
|
809 delete[] magnitude;
|
matthiasm@0
|
810 delete[] nm;
|
matthiasm@0
|
811
|
matthiasm@0
|
812 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
|
matthiasm@0
|
813 return fs;
|
matthiasm@0
|
814 }
|
matthiasm@0
|
815
|
matthiasm@0
|
816 NNLSChroma::FeatureSet
|
matthiasm@0
|
817 NNLSChroma::getRemainingFeatures()
|
matthiasm@0
|
818 {
|
matthiasm@4
|
819 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
matthiasm@4
|
820 FeatureSet fsOut;
|
matthiasm@4
|
821 if (m_fl.size() == 0) return fsOut;
|
matthiasm@0
|
822 //
|
matthiasm@1
|
823 /** Calculate Tuning
|
matthiasm@1
|
824 calculate tuning from (using the angle of the complex number defined by the
|
matthiasm@1
|
825 cumulative mean real and imag values)
|
matthiasm@1
|
826 **/
|
matthiasm@1
|
827 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
matthiasm@1
|
828 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
matthiasm@1
|
829 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
matthiasm@1
|
830 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
matthiasm@1
|
831 int intShift = floor(normalisedtuning * 3);
|
matthiasm@1
|
832 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
833
|
matthiasm@1
|
834 char buffer0 [50];
|
matthiasm@1
|
835
|
matthiasm@1
|
836 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
837
|
matthiasm@1
|
838 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
839
|
matthiasm@1
|
840 // push tuning to FeatureSet fsOut
|
matthiasm@1
|
841 Feature f0; // tuning
|
matthiasm@1
|
842 f0.hasTimestamp = true;
|
matthiasm@1
|
843 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
matthiasm@1
|
844 f0.label = buffer0;
|
matthiasm@1
|
845 fsOut[0].push_back(f0);
|
matthiasm@1
|
846
|
matthiasm@1
|
847 /** Tune Log-Frequency Spectrogram
|
matthiasm@1
|
848 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
matthiasm@1
|
849 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
matthiasm@1
|
850 **/
|
matthiasm@1
|
851
|
matthiasm@1
|
852 float tempValue = 0;
|
matthiasm@1
|
853 float dbThreshold = 0; // relative to the background spectrum
|
matthiasm@1
|
854 float thresh = pow(10,dbThreshold/20);
|
matthiasm@1
|
855 // cerr << "tune local ? " << m_tuneLocal << endl;
|
matthiasm@1
|
856 int count = 0;
|
matthiasm@1
|
857
|
matthiasm@1
|
858 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
|
matthiasm@1
|
859 Feature f1 = *i;
|
matthiasm@1
|
860 Feature f2; // tuned log-frequency spectrum
|
matthiasm@1
|
861 f2.hasTimestamp = true;
|
matthiasm@1
|
862 f2.timestamp = f1.timestamp;
|
matthiasm@1
|
863 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
864
|
matthiasm@1
|
865 if (m_tuneLocal) {
|
matthiasm@1
|
866 intShift = floor(m_localTuning[count] * 3);
|
matthiasm@1
|
867 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
868 }
|
matthiasm@1
|
869
|
matthiasm@1
|
870 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
871
|
matthiasm@4
|
872 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
matthiasm@1
|
873 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
matthiasm@1
|
874 f2.values.push_back(tempValue);
|
matthiasm@1
|
875 }
|
matthiasm@1
|
876
|
matthiasm@1
|
877 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
matthiasm@1
|
878 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
matthiasm@1
|
879 vector<float> runningstd;
|
matthiasm@1
|
880 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
matthiasm@1
|
881 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
matthiasm@1
|
882 }
|
matthiasm@1
|
883 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
matthiasm@1
|
884 for (int i = 0; i < 256; i++) {
|
matthiasm@1
|
885 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
matthiasm@1
|
886 if (runningstd[i] > 0) {
|
matthiasm@1
|
887 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
matthiasm@1
|
888 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
889 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
matthiasm@1
|
890 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
891 }
|
matthiasm@1
|
892 if (f2.values[i] < 0) {
|
matthiasm@1
|
893 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
matthiasm@1
|
894 }
|
matthiasm@1
|
895 }
|
matthiasm@1
|
896 fsOut[2].push_back(f2);
|
matthiasm@1
|
897 count++;
|
matthiasm@1
|
898 }
|
matthiasm@1
|
899
|
matthiasm@1
|
900 /** Semitone spectrum and chromagrams
|
matthiasm@1
|
901 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
matthiasm@1
|
902 is inferred using a non-negative least squares algorithm.
|
matthiasm@1
|
903 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
matthiasm@1
|
904 bass and treble stacked onto each other).
|
matthiasm@1
|
905 **/
|
matthiasm@1
|
906 // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n);
|
matthiasm@1
|
907
|
matthiasm@1
|
908 vector<vector<float> > chordogram;
|
matthiasm@3
|
909 vector<vector<int> > scoreChordogram;
|
matthiasm@1
|
910 vector<float> oldchroma = vector<float>(12,0);
|
matthiasm@1
|
911 vector<float> oldbasschroma = vector<float>(12,0);
|
matthiasm@1
|
912 count = 0;
|
matthiasm@1
|
913
|
matthiasm@1
|
914 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
matthiasm@1
|
915 Feature f2 = *it; // logfreq spectrum
|
matthiasm@1
|
916 Feature f3; // semitone spectrum
|
matthiasm@1
|
917 Feature f4; // treble chromagram
|
matthiasm@1
|
918 Feature f5; // bass chromagram
|
matthiasm@1
|
919 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
920
|
matthiasm@1
|
921 f3.hasTimestamp = true;
|
matthiasm@1
|
922 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
923
|
matthiasm@1
|
924 f4.hasTimestamp = true;
|
matthiasm@1
|
925 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
926
|
matthiasm@1
|
927 f5.hasTimestamp = true;
|
matthiasm@1
|
928 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
929
|
matthiasm@1
|
930 f6.hasTimestamp = true;
|
matthiasm@1
|
931 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
932
|
matthiasm@3
|
933 float b[256];
|
matthiasm@1
|
934
|
matthiasm@1
|
935 bool some_b_greater_zero = false;
|
matthiasm@3
|
936 float sumb = 0;
|
matthiasm@1
|
937 for (int i = 0; i < 256; i++) {
|
matthiasm@3
|
938 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
matthiasm@3
|
939 b[i] = f2.values[i];
|
matthiasm@3
|
940 sumb += b[i];
|
matthiasm@1
|
941 if (b[i] > 0) {
|
matthiasm@1
|
942 some_b_greater_zero = true;
|
matthiasm@1
|
943 }
|
matthiasm@1
|
944 }
|
matthiasm@1
|
945
|
matthiasm@1
|
946 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
947
|
matthiasm@1
|
948 vector<float> chroma = vector<float>(12, 0);
|
matthiasm@1
|
949 vector<float> basschroma = vector<float>(12, 0);
|
matthiasm@1
|
950 float currval;
|
matthiasm@1
|
951 unsigned iSemitone = 0;
|
matthiasm@1
|
952
|
matthiasm@1
|
953 if (some_b_greater_zero) {
|
matthiasm@3
|
954 if (m_dictID == 1) {
|
matthiasm@1
|
955 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
matthiasm@1
|
956 currval = 0;
|
matthiasm@3
|
957 currval += b[iNote + 1 + -1] * 0.5;
|
matthiasm@3
|
958 currval += b[iNote + 1 + 0] * 1.0;
|
matthiasm@3
|
959 currval += b[iNote + 1 + 1] * 0.5;
|
matthiasm@1
|
960 f3.values.push_back(currval);
|
matthiasm@1
|
961 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
matthiasm@1
|
962 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
matthiasm@1
|
963 iSemitone++;
|
matthiasm@1
|
964 }
|
matthiasm@1
|
965
|
matthiasm@1
|
966 } else {
|
matthiasm@3
|
967 float x[84+1000];
|
matthiasm@3
|
968 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
matthiasm@3
|
969 // for (int i = 0; i < 84; ++i) {
|
matthiasm@3
|
970 // x[i] = b[3*i+3];
|
matthiasm@3
|
971 // }
|
matthiasm@3
|
972 float rnorm;
|
matthiasm@3
|
973 float w[84+1000];
|
matthiasm@3
|
974 float zz[84+1000];
|
matthiasm@3
|
975 int indx[84+1000];
|
matthiasm@1
|
976 int mode;
|
matthiasm@3
|
977 float curr_dict[256*84];
|
matthiasm@3
|
978 for (unsigned i = 0; i < 256 * 84; ++i) {
|
matthiasm@3
|
979 curr_dict[i] = 1.0 * m_dict[i];
|
matthiasm@3
|
980 }
|
matthiasm@3
|
981 nnls(curr_dict, nNote, nNote, 84, b, x, &rnorm, w, zz, indx, &mode);
|
matthiasm@3
|
982 for (unsigned iNote = 0; iNote < 84; ++iNote) {
|
matthiasm@3
|
983 // for (unsigned kNote = 0; kNote < 256; ++kNote) {
|
matthiasm@3
|
984 // x[iNote] += m_dict[kNote + nNote * iNote] * b[kNote];
|
matthiasm@3
|
985 // }
|
matthiasm@3
|
986 f3.values.push_back(x[iNote]);
|
matthiasm@3
|
987 // cerr << mode << endl;
|
matthiasm@3
|
988 chroma[iNote % 12] += x[iNote] * treblewindow[iNote];
|
matthiasm@3
|
989 basschroma[iNote % 12] += x[iNote] * basswindow[iNote];
|
matthiasm@3
|
990 // iSemitone++;
|
matthiasm@3
|
991 }
|
matthiasm@1
|
992 }
|
matthiasm@1
|
993 }
|
matthiasm@1
|
994
|
matthiasm@1
|
995 f4.values = chroma;
|
matthiasm@1
|
996 f5.values = basschroma;
|
matthiasm@1
|
997 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@1
|
998 f6.values = chroma;
|
matthiasm@1
|
999
|
matthiasm@1
|
1000 // local chord estimation
|
matthiasm@1
|
1001 vector<float> currentChordSalience;
|
matthiasm@1
|
1002 float tempchordvalue = 0;
|
matthiasm@1
|
1003 float sumchordvalue = 0;
|
matthiasm@1
|
1004 int nChord = nChorddict / 24;
|
matthiasm@1
|
1005 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@1
|
1006 tempchordvalue = 0;
|
matthiasm@1
|
1007 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@1
|
1008 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
1009 }
|
matthiasm@1
|
1010 for (int iBin = 12; iBin < 24; iBin++) {
|
matthiasm@1
|
1011 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
1012 }
|
matthiasm@1
|
1013 sumchordvalue+=tempchordvalue;
|
matthiasm@1
|
1014 currentChordSalience.push_back(tempchordvalue);
|
matthiasm@1
|
1015 }
|
matthiasm@1
|
1016 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@1
|
1017 currentChordSalience[iChord] /= sumchordvalue;
|
matthiasm@1
|
1018 }
|
matthiasm@1
|
1019 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
1020
|
matthiasm@1
|
1021 fsOut[3].push_back(f3);
|
matthiasm@1
|
1022 fsOut[4].push_back(f4);
|
matthiasm@1
|
1023 fsOut[5].push_back(f5);
|
matthiasm@1
|
1024 fsOut[6].push_back(f6);
|
matthiasm@1
|
1025 count++;
|
matthiasm@1
|
1026 }
|
matthiasm@0
|
1027 // int musicitykernelwidth = (50 * 2048) / m_stepSize;
|
matthiasm@0
|
1028 //
|
matthiasm@3
|
1029 /* Simple chord estimation
|
matthiasm@3
|
1030 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@3
|
1031 take the maximum. Very simple, don't do this at home...
|
matthiasm@3
|
1032 */
|
matthiasm@3
|
1033 count = 0;
|
matthiasm@3
|
1034 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@3
|
1035 int nChord = nChorddict / 24;
|
matthiasm@3
|
1036 vector<int> chordSequence;
|
matthiasm@3
|
1037 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
matthiasm@3
|
1038 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@3
|
1039 scoreChordogram.push_back(temp);
|
matthiasm@3
|
1040 }
|
matthiasm@4
|
1041 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
matthiasm@3
|
1042 int startIndex = count + 1;
|
matthiasm@3
|
1043 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@3
|
1044 vector<float> temp = vector<float>(nChord,0);
|
matthiasm@3
|
1045 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@4
|
1046 float maxindex = 0; //... and the index thereof
|
matthiasm@3
|
1047 unsigned bestchordL = 0; // index of the best "left" chord
|
matthiasm@3
|
1048 unsigned bestchordR = 0; // index of the best "right" chord
|
matthiasm@4
|
1049 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@3
|
1050 // now find the max values on both sides of iWF
|
matthiasm@3
|
1051 // left side:
|
matthiasm@3
|
1052 float maxL = 0;
|
matthiasm@3
|
1053 unsigned maxindL = nChord-1;
|
matthiasm@3
|
1054 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@3
|
1055 float currsum = 0;
|
matthiasm@3
|
1056 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@3
|
1057 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@3
|
1058 }
|
matthiasm@3
|
1059 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@3
|
1060 if (currsum > maxL) {
|
matthiasm@3
|
1061 maxL = currsum;
|
matthiasm@3
|
1062 maxindL = iChord;
|
matthiasm@3
|
1063 }
|
matthiasm@3
|
1064 }
|
matthiasm@3
|
1065 // right side:
|
matthiasm@3
|
1066 float maxR = 0;
|
matthiasm@3
|
1067 unsigned maxindR = nChord-1;
|
matthiasm@3
|
1068 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@3
|
1069 float currsum = 0;
|
matthiasm@3
|
1070 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@3
|
1071 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@3
|
1072 }
|
matthiasm@3
|
1073 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@3
|
1074 if (currsum > maxR) {
|
matthiasm@3
|
1075 maxR = currsum;
|
matthiasm@3
|
1076 maxindR = iChord;
|
matthiasm@3
|
1077 }
|
matthiasm@3
|
1078 }
|
matthiasm@3
|
1079 if (maxL+maxR > maxval) {
|
matthiasm@3
|
1080 maxval = maxL+maxR;
|
matthiasm@3
|
1081 maxindex = iWF;
|
matthiasm@3
|
1082 bestchordL = maxindL;
|
matthiasm@3
|
1083 bestchordR = maxindR;
|
matthiasm@3
|
1084 }
|
matthiasm@3
|
1085
|
matthiasm@3
|
1086 }
|
matthiasm@3
|
1087 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@3
|
1088 // add a score to every chord-frame-point that was part of a maximum
|
matthiasm@3
|
1089 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@3
|
1090 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@3
|
1091 }
|
matthiasm@3
|
1092 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@3
|
1093 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@3
|
1094 }
|
matthiasm@3
|
1095 count++;
|
matthiasm@3
|
1096 }
|
matthiasm@3
|
1097
|
matthiasm@3
|
1098 count = 0;
|
matthiasm@3
|
1099 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@3
|
1100 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@3
|
1101 float maxindex = 0; //... and the index thereof
|
matthiasm@3
|
1102 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@3
|
1103 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@3
|
1104 maxval = scoreChordogram[count][iChord];
|
matthiasm@3
|
1105 maxindex = iChord;
|
matthiasm@4
|
1106 // cerr << iChord << endl;
|
matthiasm@3
|
1107 }
|
matthiasm@3
|
1108 }
|
matthiasm@3
|
1109 chordSequence.push_back(maxindex);
|
matthiasm@4
|
1110 // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
matthiasm@3
|
1111 count++;
|
matthiasm@3
|
1112 }
|
matthiasm@3
|
1113
|
matthiasm@3
|
1114
|
matthiasm@3
|
1115 // mode filter on chordSequence
|
matthiasm@3
|
1116 count = 0;
|
matthiasm@3
|
1117 int oldChordIndex = -1;
|
matthiasm@3
|
1118 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@3
|
1119 Feature f6 = *it;
|
matthiasm@3
|
1120 Feature f7; // chord estimate
|
matthiasm@3
|
1121 f7.hasTimestamp = true;
|
matthiasm@3
|
1122 f7.timestamp = f6.timestamp;
|
matthiasm@3
|
1123 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@3
|
1124 int maxChordCount = 0;
|
matthiasm@3
|
1125 int maxChordIndex = nChord-1;
|
matthiasm@4
|
1126 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@4
|
1127 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@4
|
1128 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@4
|
1129 chordCount[chordSequence[i]]++;
|
matthiasm@4
|
1130 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@4
|
1131 cerr << "start index " << startIndex << endl;
|
matthiasm@4
|
1132 maxChordCount++;
|
matthiasm@4
|
1133 maxChordIndex = chordSequence[i];
|
matthiasm@4
|
1134 }
|
matthiasm@4
|
1135 }
|
matthiasm@4
|
1136 // chordSequence[count] = maxChordIndex;
|
matthiasm@4
|
1137 cerr << maxChordIndex << endl;
|
matthiasm@3
|
1138 if (oldChordIndex != maxChordIndex) {
|
matthiasm@3
|
1139 oldChordIndex = maxChordIndex;
|
matthiasm@3
|
1140
|
matthiasm@3
|
1141 char buffer1 [50];
|
matthiasm@3
|
1142 if (maxChordIndex < nChord - 1) {
|
matthiasm@3
|
1143 sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
matthiasm@3
|
1144 } else {
|
matthiasm@3
|
1145 sprintf(buffer1, "N");
|
matthiasm@3
|
1146 }
|
matthiasm@3
|
1147 f7.label = buffer1;
|
matthiasm@3
|
1148 fsOut[7].push_back(f7);
|
matthiasm@3
|
1149 }
|
matthiasm@3
|
1150 count++;
|
matthiasm@3
|
1151 }
|
matthiasm@0
|
1152 // // musicity
|
matthiasm@0
|
1153 // count = 0;
|
matthiasm@0
|
1154 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
matthiasm@0
|
1155 // vector<float> musicityValue;
|
matthiasm@0
|
1156 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1157 // Feature f4 = *it;
|
matthiasm@0
|
1158 //
|
matthiasm@0
|
1159 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1160 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1161 // float chromasum = 0;
|
matthiasm@0
|
1162 // float diffsum = 0;
|
matthiasm@0
|
1163 // for (int k = 0; k < 12; k++) {
|
matthiasm@0
|
1164 // for (int i = startIndex + 1; i < endIndex; i++) {
|
matthiasm@0
|
1165 // chromasum += pow(fsOut[4][i].values[k],2);
|
matthiasm@0
|
1166 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
matthiasm@0
|
1167 // }
|
matthiasm@0
|
1168 // }
|
matthiasm@0
|
1169 // diffsum /= chromasum;
|
matthiasm@0
|
1170 // musicityValue.push_back(diffsum);
|
matthiasm@0
|
1171 // count++;
|
matthiasm@0
|
1172 // }
|
matthiasm@0
|
1173 //
|
matthiasm@0
|
1174 // float musicityThreshold = 0.44;
|
matthiasm@0
|
1175 // if (m_stepSize == 4096) {
|
matthiasm@0
|
1176 // musicityThreshold = 0.74;
|
matthiasm@0
|
1177 // }
|
matthiasm@0
|
1178 // if (m_stepSize == 4410) {
|
matthiasm@0
|
1179 // musicityThreshold = 0.77;
|
matthiasm@0
|
1180 // }
|
matthiasm@0
|
1181 //
|
matthiasm@0
|
1182 // count = 0;
|
matthiasm@0
|
1183 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1184 // Feature f4 = *it;
|
matthiasm@0
|
1185 // Feature f8; // musicity
|
matthiasm@0
|
1186 // Feature f9; // musicity segmenter
|
matthiasm@0
|
1187 //
|
matthiasm@0
|
1188 // f8.hasTimestamp = true;
|
matthiasm@0
|
1189 // f8.timestamp = f4.timestamp;
|
matthiasm@0
|
1190 // f9.hasTimestamp = true;
|
matthiasm@0
|
1191 // f9.timestamp = f4.timestamp;
|
matthiasm@0
|
1192 //
|
matthiasm@0
|
1193 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1194 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1195 // int musicityCount = 0;
|
matthiasm@0
|
1196 // for (int i = startIndex; i <= endIndex; i++) {
|
matthiasm@0
|
1197 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
matthiasm@0
|
1198 // }
|
matthiasm@0
|
1199 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
matthiasm@0
|
1200 //
|
matthiasm@0
|
1201 // if (isSpeech) {
|
matthiasm@0
|
1202 // if (oldlabeltype != 2) {
|
matthiasm@0
|
1203 // f9.label = "Speech";
|
matthiasm@0
|
1204 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1205 // oldlabeltype = 2;
|
matthiasm@0
|
1206 // }
|
matthiasm@0
|
1207 // } else {
|
matthiasm@0
|
1208 // if (oldlabeltype != 1) {
|
matthiasm@0
|
1209 // f9.label = "Music";
|
matthiasm@0
|
1210 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1211 // oldlabeltype = 1;
|
matthiasm@0
|
1212 // }
|
matthiasm@0
|
1213 // }
|
matthiasm@0
|
1214 // f8.values.push_back(musicityValue[count]);
|
matthiasm@0
|
1215 // fsOut[8].push_back(f8);
|
matthiasm@0
|
1216 // count++;
|
matthiasm@0
|
1217 // }
|
matthiasm@0
|
1218 return fsOut;
|
matthiasm@0
|
1219
|
matthiasm@0
|
1220 }
|
matthiasm@0
|
1221
|