Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
matthiasm@0
|
3 #include "NNLSChroma.h"
|
matthiasm@0
|
4 #include <cmath>
|
matthiasm@10
|
5 // #include <omp.h>
|
matthiasm@0
|
6 #include <list>
|
matthiasm@0
|
7 #include <iostream>
|
matthiasm@3
|
8 #include <fstream>
|
matthiasm@0
|
9 #include <sstream>
|
matthiasm@0
|
10 #include <cassert>
|
matthiasm@7
|
11 #include <cstdlib>
|
matthiasm@0
|
12 #include <cstdio>
|
matthiasm@7
|
13 #include <boost/tokenizer.hpp>
|
matthiasm@7
|
14 #include <boost/iostreams/device/file.hpp>
|
matthiasm@7
|
15 #include <boost/iostreams/stream.hpp>
|
matthiasm@7
|
16 #include <boost/lexical_cast.hpp>
|
matthiasm@1
|
17 #include "nnls.h"
|
matthiasm@0
|
18 #include "chorddict.cpp"
|
matthiasm@9
|
19
|
matthiasm@10
|
20 // #include <omp.h>
|
matthiasm@10
|
21 // #define N 1000
|
matthiasm@10
|
22 // #define CHUNKSIZE 100
|
matthiasm@9
|
23
|
matthiasm@9
|
24
|
matthiasm@0
|
25 using namespace std;
|
matthiasm@7
|
26 using namespace boost;
|
matthiasm@0
|
27
|
matthiasm@0
|
28 const float sinvalue = 0.866025404;
|
matthiasm@0
|
29 const float cosvalue = -0.5;
|
matthiasm@0
|
30 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
|
matthiasm@0
|
31 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
|
matthiasm@0
|
32 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
|
matthiasm@0
|
33 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
|
Chris@23
|
34 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
|
matthiasm@7
|
35
|
matthiasm@7
|
36 const char* bassnames[12][12] ={
|
Chris@23
|
37 {"A","","B","C","C#","D","","E","","F#","G","G#"},
|
Chris@23
|
38 {"Bb","","C","Db","D","Eb","","F","","G","Ab","A"},
|
Chris@23
|
39 {"B","","C#","D","D#","E","","F#","","G#","A","A#"},
|
Chris@23
|
40 {"C","","D","Eb","E","F","","G","","A","Bb","B"},
|
Chris@23
|
41 {"C#","","D#","E","E#","F#","","G#","","A#","B","B#"},
|
Chris@23
|
42 {"D","","E","F","F#","G","","A","","B","C","C#"},
|
Chris@23
|
43 {"Eb","","F","Gb","G","Ab","","Bb","","C","Db","D"},
|
Chris@23
|
44 {"E","","F#","G","G#","A","","B","","C#","D","D#"},
|
Chris@23
|
45 {"F","","G","Ab","A","Bb","","C","","D","Eb","E"},
|
Chris@23
|
46 {"F#","","G#","A","A#","B","","C#","","D#","E","E#"},
|
Chris@23
|
47 {"G","","A","Bb","B","C","","D","","E","F","F#"},
|
Chris@23
|
48 {"Ab","","Bb","Cb","C","Db","","Eb","","F","Gb","G"}
|
matthiasm@7
|
49 };
|
matthiasm@17
|
50
|
matthiasm@17
|
51
|
matthiasm@17
|
52 // const char* bassnames[12][12] ={
|
matthiasm@17
|
53 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
54 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
55 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
56 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
57 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
58 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
59 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
60 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
61 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
62 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
63 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
64 // {"1","","2","b3","3","4","","5","","6","b7","7"},
|
matthiasm@17
|
65 // };
|
matthiasm@17
|
66
|
matthiasm@0
|
67 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
68 const int nNote = 256;
|
matthiasm@0
|
69
|
matthiasm@0
|
70 /** Special Convolution
|
Chris@23
|
71 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
|
Chris@23
|
72 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
|
Chris@23
|
73 as the first (last) valid convolution bin.
|
matthiasm@0
|
74 **/
|
matthiasm@0
|
75
|
matthiasm@0
|
76 const bool debug_on = false;
|
matthiasm@0
|
77
|
matthiasm@0
|
78 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
|
matthiasm@0
|
79 {
|
matthiasm@0
|
80 float s;
|
matthiasm@0
|
81 int m, n;
|
matthiasm@0
|
82 int lenConvolvee = convolvee.size();
|
matthiasm@0
|
83 int lenKernel = kernel.size();
|
matthiasm@0
|
84
|
matthiasm@0
|
85 vector<float> Z(256,0);
|
matthiasm@0
|
86 assert(lenKernel % 2 != 0); // no exception handling !!!
|
matthiasm@0
|
87
|
matthiasm@0
|
88 for (n = lenKernel - 1; n < lenConvolvee; n++) {
|
matthiasm@0
|
89 s=0.0;
|
matthiasm@0
|
90 for (m = 0; m < lenKernel; m++) {
|
matthiasm@0
|
91 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
|
matthiasm@0
|
92 s += convolvee[n-m] * kernel[m];
|
matthiasm@0
|
93 // if (debug_on) cerr << "--> s = " << s << '\n';
|
matthiasm@0
|
94 }
|
matthiasm@0
|
95 // cerr << n - lenKernel/2 << endl;
|
matthiasm@0
|
96 Z[n -lenKernel/2] = s;
|
matthiasm@0
|
97 }
|
matthiasm@0
|
98
|
matthiasm@0
|
99 // fill upper and lower pads
|
matthiasm@0
|
100 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
|
matthiasm@0
|
101 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
|
Chris@23
|
102 Z[lenConvolvee - lenKernel/2 - 1];
|
matthiasm@0
|
103 return Z;
|
matthiasm@0
|
104 }
|
matthiasm@0
|
105
|
matthiasm@0
|
106 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
|
matthiasm@0
|
107 // {
|
matthiasm@0
|
108 // vector<float> freq(binnumbers.size, 0.0);
|
matthiasm@0
|
109 // for (unsigned i = 0; i < binnumbers.size; ++i) {
|
matthiasm@0
|
110 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
|
matthiasm@0
|
111 // }
|
matthiasm@0
|
112 // return freq;
|
matthiasm@0
|
113 // }
|
matthiasm@0
|
114
|
matthiasm@0
|
115 float cospuls(float x, float centre, float width)
|
matthiasm@0
|
116 {
|
Chris@23
|
117 float recipwidth = 1.0/width;
|
Chris@23
|
118 if (abs(x - centre) <= 0.5 * width) {
|
Chris@23
|
119 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
|
Chris@23
|
120 }
|
Chris@23
|
121 return 0.0;
|
matthiasm@0
|
122 }
|
matthiasm@0
|
123
|
matthiasm@0
|
124 float pitchCospuls(float x, float centre, int binsperoctave)
|
matthiasm@0
|
125 {
|
Chris@23
|
126 float warpedf = -binsperoctave * (log2(centre) - log2(x));
|
Chris@23
|
127 float out = cospuls(warpedf, 0.0, 2.0);
|
Chris@23
|
128 // now scale to correct for note density
|
Chris@23
|
129 float c = log(2.0)/binsperoctave;
|
Chris@23
|
130 if (x > 0) {
|
Chris@23
|
131 out = out / (c * x);
|
Chris@23
|
132 } else {
|
Chris@23
|
133 out = 0;
|
Chris@23
|
134 }
|
Chris@23
|
135 return out;
|
matthiasm@0
|
136 }
|
matthiasm@0
|
137
|
matthiasm@0
|
138 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
|
matthiasm@0
|
139
|
Chris@23
|
140 int binspersemitone = 3; // this must be 3
|
Chris@23
|
141 int minoctave = 0; // this must be 0
|
Chris@23
|
142 int maxoctave = 7; // this must be 7
|
Chris@23
|
143 int oversampling = 80;
|
matthiasm@0
|
144
|
Chris@23
|
145 // linear frequency vector
|
Chris@23
|
146 vector<float> fft_f;
|
Chris@23
|
147 for (int i = 0; i < blocksize/2; ++i) {
|
Chris@23
|
148 fft_f.push_back(i * (fs * 1.0 / blocksize));
|
Chris@23
|
149 }
|
Chris@23
|
150 float fft_width = fs * 2.0 / blocksize;
|
matthiasm@0
|
151
|
Chris@23
|
152 // linear oversampled frequency vector
|
Chris@23
|
153 vector<float> oversampled_f;
|
Chris@23
|
154 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
|
Chris@23
|
155 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
|
Chris@23
|
156 }
|
matthiasm@0
|
157
|
Chris@23
|
158 // pitch-spaced frequency vector
|
Chris@23
|
159 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
Chris@23
|
160 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
Chris@23
|
161 vector<float> cq_f;
|
Chris@23
|
162 float oob = 1.0/binspersemitone; // one over binspersemitone
|
Chris@23
|
163 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
Chris@23
|
164 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
Chris@23
|
165 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
Chris@23
|
166 for (int k = -1; k < 2; ++k) {
|
Chris@23
|
167 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
Chris@23
|
168 }
|
Chris@23
|
169 }
|
Chris@23
|
170 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
Chris@23
|
171 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@0
|
172
|
Chris@23
|
173 int nFFT = fft_f.size();
|
matthiasm@0
|
174
|
Chris@23
|
175 vector<float> fft_activation;
|
Chris@23
|
176 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
|
Chris@23
|
177 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
|
Chris@23
|
178 fft_activation.push_back(cosp);
|
Chris@23
|
179 // cerr << cosp << endl;
|
Chris@23
|
180 }
|
matthiasm@0
|
181
|
Chris@23
|
182 float cq_activation;
|
Chris@23
|
183 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
|
Chris@23
|
184 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
|
Chris@23
|
185 int curr_start = oversampling * iFFT - oversampling;
|
Chris@23
|
186 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
|
Chris@23
|
187 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
|
Chris@23
|
188 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
|
Chris@23
|
189 outmatrix[iFFT + nFFT * iCQ] = 0;
|
Chris@23
|
190 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
|
Chris@23
|
191 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
|
Chris@23
|
192 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
|
Chris@23
|
193 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
|
Chris@23
|
194 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
|
Chris@23
|
195 }
|
Chris@23
|
196 // if (iCQ == 1 || iCQ == 2) {
|
Chris@23
|
197 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
|
Chris@23
|
198 // }
|
Chris@23
|
199 }
|
Chris@23
|
200 }
|
Chris@23
|
201 }
|
Chris@23
|
202 return true;
|
matthiasm@0
|
203 }
|
matthiasm@0
|
204
|
matthiasm@17
|
205 void dictionaryMatrix(float* dm) {
|
Chris@23
|
206 int binspersemitone = 3; // this must be 3
|
Chris@23
|
207 int minoctave = 0; // this must be 0
|
Chris@23
|
208 int maxoctave = 7; // this must be 7
|
Chris@23
|
209 float s_param = 0.7;
|
matthiasm@1
|
210
|
Chris@23
|
211 // pitch-spaced frequency vector
|
Chris@23
|
212 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
Chris@23
|
213 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
Chris@23
|
214 vector<float> cq_f;
|
Chris@23
|
215 float oob = 1.0/binspersemitone; // one over binspersemitone
|
Chris@23
|
216 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
Chris@23
|
217 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
Chris@23
|
218 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
Chris@23
|
219 for (int k = -1; k < 2; ++k) {
|
Chris@23
|
220 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
Chris@23
|
221 }
|
Chris@23
|
222 }
|
Chris@23
|
223 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
Chris@23
|
224 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@1
|
225
|
Chris@23
|
226 float curr_f;
|
Chris@23
|
227 float floatbin;
|
Chris@23
|
228 float curr_amp;
|
Chris@23
|
229 // now for every combination calculate the matrix element
|
Chris@23
|
230 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
|
Chris@23
|
231 // cerr << iOut << endl;
|
Chris@23
|
232 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
|
Chris@23
|
233 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
|
Chris@23
|
234 // if (curr_f > cq_f[nNote-1]) break;
|
Chris@23
|
235 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
|
Chris@23
|
236 // cerr << floatbin << endl;
|
Chris@23
|
237 curr_amp = pow(s_param,float(iHarm-1));
|
Chris@23
|
238 // cerr << "curramp" << curr_amp << endl;
|
Chris@23
|
239 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
|
Chris@23
|
240 if (abs(iNote+1.0-floatbin)<2) {
|
Chris@23
|
241 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
|
Chris@23
|
242 // dm[iNote + nNote * iOut] += 1 * curr_amp;
|
Chris@23
|
243 }
|
Chris@23
|
244 }
|
Chris@23
|
245 }
|
Chris@23
|
246 }
|
matthiasm@3
|
247
|
matthiasm@3
|
248
|
matthiasm@1
|
249 }
|
matthiasm@1
|
250
|
matthiasm@7
|
251 string get_env_var( std::string const & key ) {
|
Chris@23
|
252 char * val;
|
Chris@23
|
253 val = getenv( key.c_str() );
|
Chris@23
|
254 string retval;
|
Chris@23
|
255 if (val != NULL) {
|
Chris@23
|
256 retval = val;
|
Chris@23
|
257 }
|
Chris@23
|
258 return retval;
|
matthiasm@7
|
259 }
|
matthiasm@7
|
260
|
matthiasm@7
|
261
|
matthiasm@9
|
262 vector<string> chordDictionary(vector<float> *mchorddict) {
|
Chris@23
|
263 // ifstream chordDictFile;
|
Chris@23
|
264 string chordDictFilename(get_env_var("VAMP_PATH")+"/chord.dict");
|
Chris@23
|
265 // string instring[] = ",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\nm,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0\n6,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0\n7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0\nmaj7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1\nmin7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0\n,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\n,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\ndim,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0\naug,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0\n";
|
Chris@23
|
266 typedef tokenizer<char_separator<char> > Tok;
|
Chris@23
|
267 // char_separator<char> sep; // default constructed
|
Chris@23
|
268 char_separator<char> sep(",; ","=");
|
matthiasm@7
|
269 iostreams::stream<iostreams::file_source> chordDictFile(chordDictFilename.c_str());
|
matthiasm@7
|
270 string line;
|
Chris@23
|
271 int iElement = 0;
|
Chris@23
|
272 int nChord = 0;
|
matthiasm@7
|
273
|
Chris@23
|
274 vector<string> loadedChordNames;
|
Chris@23
|
275 vector<float> loadedChordDict;
|
Chris@23
|
276 if (chordDictFile.is_open()) {
|
Chris@23
|
277 while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file
|
Chris@23
|
278 // first, get the chord definition
|
Chris@23
|
279 string chordType;
|
Chris@23
|
280 vector<float> tempPCVector;
|
Chris@23
|
281 // cerr << line << endl;
|
Chris@23
|
282 if (!line.empty() && line.substr(0,1) != "#") {
|
Chris@23
|
283 Tok tok(line, sep);
|
Chris@23
|
284 for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements
|
Chris@23
|
285 string tempString = *tok_iter;
|
Chris@23
|
286 // cerr << tempString << endl;
|
Chris@23
|
287 if (tok_iter == tok.begin()) { // either the chord name or a colon
|
Chris@23
|
288 if (tempString == "=") {
|
Chris@23
|
289 chordType = "";
|
Chris@23
|
290 } else {
|
Chris@23
|
291 chordType = tempString;
|
Chris@23
|
292 tok_iter++; // is this cheating ? :)
|
Chris@23
|
293 }
|
Chris@23
|
294 } else {
|
Chris@23
|
295 tempPCVector.push_back(lexical_cast<float>(*tok_iter));
|
Chris@23
|
296 }
|
Chris@23
|
297 }
|
matthiasm@7
|
298
|
Chris@23
|
299 // now make all 12 chords of every type
|
Chris@23
|
300 for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) {
|
Chris@23
|
301 // add bass slash notation
|
Chris@23
|
302 string slashNotation = "";
|
Chris@23
|
303 for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) {
|
Chris@23
|
304 if (tempPCVector[(kSemitone) % 12] > 0.99) {
|
Chris@23
|
305 slashNotation = bassnames[iSemitone][kSemitone];
|
Chris@23
|
306 }
|
Chris@23
|
307 }
|
Chris@23
|
308 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes
|
Chris@23
|
309 // cerr << ((kSemitone - iSemitone + 12) % 12) << endl;
|
Chris@23
|
310 float bassValue = 0;
|
Chris@23
|
311 if (tempPCVector[(kSemitone - iSemitone + 12) % 12]==1) {
|
Chris@23
|
312 bassValue = 1;
|
Chris@23
|
313 } else {
|
Chris@23
|
314 if (tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12] == 1) bassValue = 0.5;
|
Chris@23
|
315 }
|
Chris@23
|
316 loadedChordDict.push_back(bassValue);
|
Chris@23
|
317 }
|
Chris@23
|
318 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes
|
Chris@23
|
319 loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]);
|
Chris@23
|
320 }
|
Chris@23
|
321 ostringstream os;
|
Chris@23
|
322 if (slashNotation.empty()) {
|
Chris@23
|
323 os << notenames[12+iSemitone] << chordType;
|
Chris@23
|
324 } else {
|
Chris@23
|
325 os << notenames[12+iSemitone] << chordType << "/" << slashNotation;
|
Chris@23
|
326 }
|
Chris@23
|
327 // cerr << os.str() << endl;
|
Chris@23
|
328 loadedChordNames.push_back(os.str());
|
Chris@23
|
329 }
|
Chris@23
|
330 }
|
Chris@23
|
331 }
|
Chris@23
|
332 // N type
|
Chris@23
|
333 loadedChordNames.push_back("N");
|
Chris@23
|
334 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5);
|
Chris@23
|
335 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0);
|
matthiasm@7
|
336
|
Chris@23
|
337 // normalise
|
Chris@23
|
338 float sum = 0;
|
Chris@23
|
339 for (int i = 0; i < loadedChordDict.size(); i++) {
|
Chris@23
|
340 sum += pow(loadedChordDict[i],2);
|
Chris@23
|
341 if (i % 24 == 23) {
|
Chris@23
|
342 float invertedsum = 1.0/sqrt(sum);
|
Chris@23
|
343 for (int k = 0; k < 24; k++) {
|
Chris@23
|
344 loadedChordDict[i-k] *= invertedsum;
|
Chris@23
|
345 }
|
Chris@23
|
346 sum = 0;
|
Chris@23
|
347 }
|
matthiasm@7
|
348
|
Chris@23
|
349 }
|
matthiasm@7
|
350
|
matthiasm@7
|
351
|
Chris@23
|
352 nChord = 0;
|
Chris@23
|
353 for (int i = 0; i < loadedChordNames.size(); i++) {
|
Chris@23
|
354 nChord++;
|
Chris@23
|
355 }
|
Chris@23
|
356 chordDictFile.close();
|
matthiasm@7
|
357
|
matthiasm@7
|
358
|
Chris@23
|
359 // mchorddict = new float[nChord*24];
|
Chris@23
|
360 for (int i = 0; i < nChord*24; i++) {
|
Chris@23
|
361 mchorddict->push_back(loadedChordDict[i]);
|
Chris@23
|
362 }
|
matthiasm@9
|
363
|
Chris@23
|
364 } else {// use default from chorddict.cpp
|
Chris@23
|
365 // mchorddict = new float[nChorddict];
|
Chris@23
|
366 for (int i = 0; i < nChorddict; i++) {
|
Chris@23
|
367 mchorddict->push_back(chorddict[i]);
|
Chris@23
|
368 }
|
matthiasm@7
|
369
|
Chris@23
|
370 nChord = nChorddict/24;
|
Chris@23
|
371 // mchordnames = new string[nChorddict/24];
|
Chris@23
|
372 char buffer1 [50];
|
Chris@23
|
373 for (int i = 0; i < nChorddict/24; i++) {
|
Chris@23
|
374 if (i < nChorddict/24 - 1) {
|
Chris@23
|
375 sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]);
|
Chris@23
|
376 } else {
|
Chris@23
|
377 sprintf(buffer1, "N");
|
Chris@23
|
378 }
|
Chris@23
|
379 ostringstream os;
|
Chris@23
|
380 os << buffer1;
|
Chris@23
|
381 loadedChordNames.push_back(os.str());
|
matthiasm@9
|
382
|
Chris@23
|
383 }
|
matthiasm@7
|
384
|
Chris@23
|
385 }
|
Chris@23
|
386 // cerr << "before leaving" << chordnames[1] << endl;
|
Chris@23
|
387 return loadedChordNames;
|
matthiasm@7
|
388 }
|
matthiasm@0
|
389
|
matthiasm@0
|
390 NNLSChroma::NNLSChroma(float inputSampleRate) :
|
Chris@23
|
391 Plugin(inputSampleRate),
|
Chris@23
|
392 m_fl(0),
|
Chris@23
|
393 m_blockSize(0),
|
Chris@23
|
394 m_stepSize(0),
|
Chris@23
|
395 m_lengthOfNoteIndex(0),
|
Chris@23
|
396 m_meanTuning0(0),
|
Chris@23
|
397 m_meanTuning1(0),
|
Chris@23
|
398 m_meanTuning2(0),
|
Chris@23
|
399 m_localTuning0(0),
|
Chris@23
|
400 m_localTuning1(0),
|
Chris@23
|
401 m_localTuning2(0),
|
Chris@23
|
402 m_paling(1.0),
|
Chris@23
|
403 m_preset(0.0),
|
Chris@23
|
404 m_localTuning(0),
|
Chris@23
|
405 m_kernelValue(0),
|
Chris@23
|
406 m_kernelFftIndex(0),
|
Chris@23
|
407 m_kernelNoteIndex(0),
|
Chris@23
|
408 m_dict(0),
|
Chris@23
|
409 m_tuneLocal(false),
|
Chris@23
|
410 m_dictID(0),
|
Chris@23
|
411 m_chorddict(0),
|
Chris@23
|
412 m_chordnames(0),
|
Chris@23
|
413 m_doNormalizeChroma(0),
|
Chris@23
|
414 m_rollon(0.01)
|
matthiasm@0
|
415 {
|
Chris@23
|
416 if (debug_on) cerr << "--> NNLSChroma" << endl;
|
matthiasm@7
|
417
|
Chris@23
|
418 // make the *note* dictionary matrix
|
Chris@23
|
419 m_dict = new float[nNote * 84];
|
Chris@23
|
420 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
Chris@23
|
421 dictionaryMatrix(m_dict);
|
matthiasm@7
|
422
|
Chris@23
|
423 // get the *chord* dictionary from file (if the file exists)
|
Chris@23
|
424 m_chordnames = chordDictionary(&m_chorddict);
|
matthiasm@0
|
425 }
|
matthiasm@0
|
426
|
matthiasm@0
|
427
|
matthiasm@0
|
428 NNLSChroma::~NNLSChroma()
|
matthiasm@0
|
429 {
|
Chris@23
|
430 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
|
Chris@23
|
431 delete [] m_dict;
|
Chris@23
|
432 // delete [] m_chorddict;
|
Chris@23
|
433 // delete m_chordnames;
|
matthiasm@0
|
434 }
|
matthiasm@0
|
435
|
matthiasm@0
|
436 string
|
matthiasm@0
|
437 NNLSChroma::getIdentifier() const
|
matthiasm@0
|
438 {
|
Chris@23
|
439 if (debug_on) cerr << "--> getIdentifier" << endl;
|
matthiasm@0
|
440 return "nnls_chroma";
|
matthiasm@0
|
441 }
|
matthiasm@0
|
442
|
matthiasm@0
|
443 string
|
matthiasm@0
|
444 NNLSChroma::getName() const
|
matthiasm@0
|
445 {
|
Chris@23
|
446 if (debug_on) cerr << "--> getName" << endl;
|
matthiasm@0
|
447 return "NNLS Chroma";
|
matthiasm@0
|
448 }
|
matthiasm@0
|
449
|
matthiasm@0
|
450 string
|
matthiasm@0
|
451 NNLSChroma::getDescription() const
|
matthiasm@0
|
452 {
|
matthiasm@0
|
453 // Return something helpful here!
|
Chris@23
|
454 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@13
|
455 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate.";
|
matthiasm@0
|
456 }
|
matthiasm@0
|
457
|
matthiasm@0
|
458 string
|
matthiasm@0
|
459 NNLSChroma::getMaker() const
|
matthiasm@0
|
460 {
|
Chris@23
|
461 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
462 // Your name here
|
matthiasm@0
|
463 return "Matthias Mauch";
|
matthiasm@0
|
464 }
|
matthiasm@0
|
465
|
matthiasm@0
|
466 int
|
matthiasm@0
|
467 NNLSChroma::getPluginVersion() const
|
matthiasm@0
|
468 {
|
Chris@23
|
469 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
470 // Increment this each time you release a version that behaves
|
matthiasm@0
|
471 // differently from the previous one
|
matthiasm@0
|
472 return 1;
|
matthiasm@0
|
473 }
|
matthiasm@0
|
474
|
matthiasm@0
|
475 string
|
matthiasm@0
|
476 NNLSChroma::getCopyright() const
|
matthiasm@0
|
477 {
|
Chris@23
|
478 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
479 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
480 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
481 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
482 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
matthiasm@0
|
483 return "Copyright (2010). All rights reserved.";
|
matthiasm@0
|
484 }
|
matthiasm@0
|
485
|
matthiasm@0
|
486 NNLSChroma::InputDomain
|
matthiasm@0
|
487 NNLSChroma::getInputDomain() const
|
matthiasm@0
|
488 {
|
Chris@23
|
489 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
490 return FrequencyDomain;
|
matthiasm@0
|
491 }
|
matthiasm@0
|
492
|
matthiasm@0
|
493 size_t
|
matthiasm@0
|
494 NNLSChroma::getPreferredBlockSize() const
|
matthiasm@0
|
495 {
|
Chris@23
|
496 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
497 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
498 }
|
matthiasm@0
|
499
|
matthiasm@0
|
500 size_t
|
matthiasm@0
|
501 NNLSChroma::getPreferredStepSize() const
|
matthiasm@0
|
502 {
|
Chris@23
|
503 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
504 return 2048; // 0 means "anything sensible"; in practice this
|
Chris@23
|
505 // means the same as the block size for TimeDomain
|
Chris@23
|
506 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
507 }
|
matthiasm@0
|
508
|
matthiasm@0
|
509 size_t
|
matthiasm@0
|
510 NNLSChroma::getMinChannelCount() const
|
matthiasm@0
|
511 {
|
Chris@23
|
512 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
513 return 1;
|
matthiasm@0
|
514 }
|
matthiasm@0
|
515
|
matthiasm@0
|
516 size_t
|
matthiasm@0
|
517 NNLSChroma::getMaxChannelCount() const
|
matthiasm@0
|
518 {
|
Chris@23
|
519 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
520 return 1;
|
matthiasm@0
|
521 }
|
matthiasm@0
|
522
|
matthiasm@0
|
523 NNLSChroma::ParameterList
|
matthiasm@0
|
524 NNLSChroma::getParameterDescriptors() const
|
matthiasm@0
|
525 {
|
Chris@23
|
526 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
527 ParameterList list;
|
matthiasm@0
|
528
|
matthiasm@3
|
529 ParameterDescriptor d3;
|
matthiasm@3
|
530 d3.identifier = "preset";
|
matthiasm@3
|
531 d3.name = "preset";
|
matthiasm@3
|
532 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@3
|
533 d3.unit = "";
|
Chris@23
|
534 d3.isQuantized = true;
|
Chris@23
|
535 d3.quantizeStep = 1;
|
matthiasm@3
|
536 d3.minValue = 0.0;
|
matthiasm@4
|
537 d3.maxValue = 3.0;
|
matthiasm@3
|
538 d3.defaultValue = 0.0;
|
matthiasm@3
|
539 d3.valueNames.push_back("polyphonic pop");
|
Chris@23
|
540 d3.valueNames.push_back("polyphonic pop (fast)");
|
matthiasm@3
|
541 d3.valueNames.push_back("solo keyboard");
|
Chris@23
|
542 d3.valueNames.push_back("manual");
|
matthiasm@3
|
543 list.push_back(d3);
|
matthiasm@4
|
544
|
matthiasm@17
|
545 ParameterDescriptor d5;
|
Chris@23
|
546 d5.identifier = "rollon";
|
Chris@23
|
547 d5.name = "spectral roll-on";
|
Chris@23
|
548 d5.description = "The bins below the spectral roll-on quantile will be set to 0.";
|
Chris@23
|
549 d5.unit = "";
|
Chris@23
|
550 d5.minValue = 0;
|
Chris@23
|
551 d5.maxValue = 1;
|
Chris@23
|
552 d5.defaultValue = 0;
|
Chris@23
|
553 d5.isQuantized = false;
|
Chris@23
|
554 list.push_back(d5);
|
matthiasm@17
|
555
|
matthiasm@4
|
556 // ParameterDescriptor d0;
|
matthiasm@4
|
557 // d0.identifier = "notedict";
|
matthiasm@4
|
558 // d0.name = "note dictionary";
|
matthiasm@4
|
559 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
|
matthiasm@4
|
560 // d0.unit = "";
|
matthiasm@4
|
561 // d0.minValue = 0;
|
matthiasm@4
|
562 // d0.maxValue = 1;
|
matthiasm@4
|
563 // d0.defaultValue = 0;
|
matthiasm@4
|
564 // d0.isQuantized = true;
|
matthiasm@4
|
565 // d0.valueNames.push_back("s = 0.6");
|
matthiasm@4
|
566 // d0.valueNames.push_back("no NNLS");
|
matthiasm@4
|
567 // d0.quantizeStep = 1.0;
|
matthiasm@4
|
568 // list.push_back(d0);
|
matthiasm@4
|
569
|
matthiasm@4
|
570 ParameterDescriptor d1;
|
matthiasm@4
|
571 d1.identifier = "tuningmode";
|
matthiasm@4
|
572 d1.name = "tuning mode";
|
matthiasm@4
|
573 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
574 d1.unit = "";
|
matthiasm@4
|
575 d1.minValue = 0;
|
matthiasm@4
|
576 d1.maxValue = 1;
|
matthiasm@4
|
577 d1.defaultValue = 0;
|
matthiasm@4
|
578 d1.isQuantized = true;
|
matthiasm@4
|
579 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
580 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
581 d1.quantizeStep = 1.0;
|
matthiasm@4
|
582 list.push_back(d1);
|
matthiasm@4
|
583
|
Chris@23
|
584 // ParameterDescriptor d2;
|
Chris@23
|
585 // d2.identifier = "paling";
|
Chris@23
|
586 // d2.name = "spectral paling";
|
Chris@23
|
587 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
|
Chris@23
|
588 // d2.unit = "";
|
Chris@23
|
589 // d2.isQuantized = true;
|
Chris@23
|
590 // // d2.quantizeStep = 0.1;
|
Chris@23
|
591 // d2.minValue = 0.0;
|
Chris@23
|
592 // d2.maxValue = 1.0;
|
Chris@23
|
593 // d2.defaultValue = 1.0;
|
Chris@23
|
594 // d2.isQuantized = false;
|
Chris@23
|
595 // list.push_back(d2);
|
Chris@23
|
596 ParameterDescriptor d4;
|
matthiasm@12
|
597 d4.identifier = "chromanormalize";
|
matthiasm@12
|
598 d4.name = "chroma normalization";
|
matthiasm@12
|
599 d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@12
|
600 d4.unit = "";
|
matthiasm@12
|
601 d4.minValue = 0;
|
matthiasm@13
|
602 d4.maxValue = 3;
|
matthiasm@12
|
603 d4.defaultValue = 0;
|
matthiasm@12
|
604 d4.isQuantized = true;
|
matthiasm@13
|
605 d4.valueNames.push_back("none");
|
matthiasm@13
|
606 d4.valueNames.push_back("maximum norm");
|
Chris@23
|
607 d4.valueNames.push_back("L1 norm");
|
Chris@23
|
608 d4.valueNames.push_back("L2 norm");
|
matthiasm@12
|
609 d4.quantizeStep = 1.0;
|
matthiasm@12
|
610 list.push_back(d4);
|
matthiasm@4
|
611
|
matthiasm@0
|
612 return list;
|
matthiasm@0
|
613 }
|
matthiasm@0
|
614
|
matthiasm@0
|
615 float
|
matthiasm@0
|
616 NNLSChroma::getParameter(string identifier) const
|
matthiasm@0
|
617 {
|
Chris@23
|
618 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@0
|
619 if (identifier == "notedict") {
|
matthiasm@0
|
620 return m_dictID;
|
matthiasm@0
|
621 }
|
matthiasm@0
|
622
|
matthiasm@0
|
623 if (identifier == "paling") {
|
matthiasm@0
|
624 return m_paling;
|
matthiasm@0
|
625 }
|
matthiasm@17
|
626
|
Chris@23
|
627 if (identifier == "rollon") {
|
matthiasm@17
|
628 return m_rollon;
|
matthiasm@17
|
629 }
|
matthiasm@0
|
630
|
matthiasm@0
|
631 if (identifier == "tuningmode") {
|
matthiasm@0
|
632 if (m_tuneLocal) {
|
matthiasm@0
|
633 return 1.0;
|
matthiasm@0
|
634 } else {
|
matthiasm@0
|
635 return 0.0;
|
matthiasm@0
|
636 }
|
matthiasm@0
|
637 }
|
Chris@23
|
638 if (identifier == "preset") {
|
Chris@23
|
639 return m_preset;
|
matthiasm@3
|
640 }
|
Chris@23
|
641 if (identifier == "chromanormalize") {
|
Chris@23
|
642 return m_doNormalizeChroma;
|
matthiasm@12
|
643 }
|
matthiasm@0
|
644 return 0;
|
matthiasm@0
|
645
|
matthiasm@0
|
646 }
|
matthiasm@0
|
647
|
matthiasm@0
|
648 void
|
matthiasm@0
|
649 NNLSChroma::setParameter(string identifier, float value)
|
matthiasm@0
|
650 {
|
Chris@23
|
651 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@0
|
652 if (identifier == "notedict") {
|
matthiasm@0
|
653 m_dictID = (int) value;
|
matthiasm@0
|
654 }
|
matthiasm@0
|
655
|
matthiasm@0
|
656 if (identifier == "paling") {
|
matthiasm@0
|
657 m_paling = value;
|
matthiasm@0
|
658 }
|
matthiasm@0
|
659
|
matthiasm@0
|
660 if (identifier == "tuningmode") {
|
matthiasm@0
|
661 m_tuneLocal = (value > 0) ? true : false;
|
matthiasm@0
|
662 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
663 }
|
matthiasm@3
|
664 if (identifier == "preset") {
|
matthiasm@3
|
665 m_preset = value;
|
Chris@23
|
666 if (m_preset == 0.0) {
|
Chris@23
|
667 m_tuneLocal = false;
|
Chris@23
|
668 m_paling = 1.0;
|
Chris@23
|
669 m_dictID = 0.0;
|
Chris@23
|
670 }
|
Chris@23
|
671 if (m_preset == 1.0) {
|
Chris@23
|
672 m_tuneLocal = false;
|
Chris@23
|
673 m_paling = 1.0;
|
Chris@23
|
674 m_dictID = 1.0;
|
Chris@23
|
675 }
|
Chris@23
|
676 if (m_preset == 2.0) {
|
Chris@23
|
677 m_tuneLocal = false;
|
Chris@23
|
678 m_paling = 0.7;
|
Chris@23
|
679 m_dictID = 0.0;
|
Chris@23
|
680 }
|
matthiasm@3
|
681 }
|
Chris@23
|
682 if (identifier == "chromanormalize") {
|
Chris@23
|
683 m_doNormalizeChroma = value;
|
Chris@23
|
684 }
|
matthiasm@17
|
685
|
Chris@23
|
686 if (identifier == "rollon") {
|
Chris@23
|
687 m_rollon = value;
|
Chris@23
|
688 }
|
matthiasm@0
|
689 }
|
matthiasm@0
|
690
|
matthiasm@0
|
691 NNLSChroma::ProgramList
|
matthiasm@0
|
692 NNLSChroma::getPrograms() const
|
matthiasm@0
|
693 {
|
Chris@23
|
694 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
695 ProgramList list;
|
matthiasm@0
|
696
|
matthiasm@0
|
697 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
698 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
699
|
matthiasm@0
|
700 return list;
|
matthiasm@0
|
701 }
|
matthiasm@0
|
702
|
matthiasm@0
|
703 string
|
matthiasm@0
|
704 NNLSChroma::getCurrentProgram() const
|
matthiasm@0
|
705 {
|
Chris@23
|
706 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
707 return ""; // no programs
|
matthiasm@0
|
708 }
|
matthiasm@0
|
709
|
matthiasm@0
|
710 void
|
matthiasm@0
|
711 NNLSChroma::selectProgram(string name)
|
matthiasm@0
|
712 {
|
Chris@23
|
713 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
714 }
|
matthiasm@0
|
715
|
matthiasm@0
|
716
|
matthiasm@0
|
717 NNLSChroma::OutputList
|
matthiasm@0
|
718 NNLSChroma::getOutputDescriptors() const
|
matthiasm@0
|
719 {
|
Chris@23
|
720 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
721 OutputList list;
|
matthiasm@0
|
722
|
matthiasm@0
|
723 // Make chroma names for the binNames property
|
matthiasm@0
|
724 vector<string> chromanames;
|
matthiasm@0
|
725 vector<string> bothchromanames;
|
matthiasm@0
|
726 for (int iNote = 0; iNote < 24; iNote++) {
|
matthiasm@0
|
727 bothchromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
728 if (iNote < 12) {
|
matthiasm@0
|
729 chromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
730 }
|
matthiasm@0
|
731 }
|
matthiasm@0
|
732
|
Chris@23
|
733 // int nNote = 84;
|
matthiasm@0
|
734
|
matthiasm@0
|
735 // See OutputDescriptor documentation for the possibilities here.
|
matthiasm@0
|
736 // Every plugin must have at least one output.
|
matthiasm@0
|
737
|
matthiasm@0
|
738 OutputDescriptor d0;
|
matthiasm@0
|
739 d0.identifier = "tuning";
|
matthiasm@0
|
740 d0.name = "Tuning";
|
matthiasm@0
|
741 d0.description = "The concert pitch.";
|
matthiasm@0
|
742 d0.unit = "Hz";
|
matthiasm@0
|
743 d0.hasFixedBinCount = true;
|
matthiasm@0
|
744 d0.binCount = 0;
|
matthiasm@0
|
745 d0.hasKnownExtents = true;
|
Chris@23
|
746 d0.minValue = 427.47;
|
Chris@23
|
747 d0.maxValue = 452.89;
|
matthiasm@0
|
748 d0.isQuantized = false;
|
matthiasm@0
|
749 d0.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
750 d0.hasDuration = false;
|
matthiasm@0
|
751 list.push_back(d0);
|
matthiasm@0
|
752
|
Chris@23
|
753 OutputDescriptor d1;
|
matthiasm@0
|
754 d1.identifier = "logfreqspec";
|
matthiasm@0
|
755 d1.name = "Log-Frequency Spectrum";
|
matthiasm@0
|
756 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
|
matthiasm@0
|
757 d1.unit = "";
|
matthiasm@0
|
758 d1.hasFixedBinCount = true;
|
matthiasm@0
|
759 d1.binCount = nNote;
|
matthiasm@0
|
760 d1.hasKnownExtents = false;
|
matthiasm@0
|
761 d1.isQuantized = false;
|
matthiasm@0
|
762 d1.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
763 d1.hasDuration = false;
|
matthiasm@0
|
764 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
765 list.push_back(d1);
|
matthiasm@0
|
766
|
Chris@23
|
767 OutputDescriptor d2;
|
matthiasm@0
|
768 d2.identifier = "tunedlogfreqspec";
|
matthiasm@0
|
769 d2.name = "Tuned Log-Frequency Spectrum";
|
matthiasm@0
|
770 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
|
matthiasm@0
|
771 d2.unit = "";
|
matthiasm@0
|
772 d2.hasFixedBinCount = true;
|
matthiasm@0
|
773 d2.binCount = 256;
|
matthiasm@0
|
774 d2.hasKnownExtents = false;
|
matthiasm@0
|
775 d2.isQuantized = false;
|
matthiasm@0
|
776 d2.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
777 d2.hasDuration = false;
|
matthiasm@0
|
778 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
779 list.push_back(d2);
|
matthiasm@0
|
780
|
matthiasm@0
|
781 OutputDescriptor d3;
|
matthiasm@0
|
782 d3.identifier = "semitonespectrum";
|
matthiasm@0
|
783 d3.name = "Semitone Spectrum";
|
matthiasm@0
|
784 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
|
matthiasm@0
|
785 d3.unit = "";
|
matthiasm@0
|
786 d3.hasFixedBinCount = true;
|
matthiasm@0
|
787 d3.binCount = 84;
|
matthiasm@0
|
788 d3.hasKnownExtents = false;
|
matthiasm@0
|
789 d3.isQuantized = false;
|
matthiasm@0
|
790 d3.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
791 d3.hasDuration = false;
|
matthiasm@0
|
792 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
793 list.push_back(d3);
|
matthiasm@0
|
794
|
matthiasm@0
|
795 OutputDescriptor d4;
|
matthiasm@0
|
796 d4.identifier = "chroma";
|
matthiasm@0
|
797 d4.name = "Chromagram";
|
matthiasm@0
|
798 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
|
matthiasm@0
|
799 d4.unit = "";
|
matthiasm@0
|
800 d4.hasFixedBinCount = true;
|
matthiasm@0
|
801 d4.binCount = 12;
|
matthiasm@0
|
802 d4.binNames = chromanames;
|
matthiasm@0
|
803 d4.hasKnownExtents = false;
|
matthiasm@0
|
804 d4.isQuantized = false;
|
matthiasm@0
|
805 d4.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
806 d4.hasDuration = false;
|
matthiasm@0
|
807 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
808 list.push_back(d4);
|
matthiasm@0
|
809
|
matthiasm@0
|
810 OutputDescriptor d5;
|
matthiasm@0
|
811 d5.identifier = "basschroma";
|
matthiasm@0
|
812 d5.name = "Bass Chromagram";
|
matthiasm@0
|
813 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
|
matthiasm@0
|
814 d5.unit = "";
|
matthiasm@0
|
815 d5.hasFixedBinCount = true;
|
matthiasm@0
|
816 d5.binCount = 12;
|
matthiasm@0
|
817 d5.binNames = chromanames;
|
matthiasm@0
|
818 d5.hasKnownExtents = false;
|
matthiasm@0
|
819 d5.isQuantized = false;
|
matthiasm@0
|
820 d5.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
821 d5.hasDuration = false;
|
matthiasm@0
|
822 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
823 list.push_back(d5);
|
matthiasm@0
|
824
|
matthiasm@0
|
825 OutputDescriptor d6;
|
matthiasm@0
|
826 d6.identifier = "bothchroma";
|
matthiasm@0
|
827 d6.name = "Chromagram and Bass Chromagram";
|
matthiasm@0
|
828 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
|
matthiasm@0
|
829 d6.unit = "";
|
matthiasm@0
|
830 d6.hasFixedBinCount = true;
|
matthiasm@0
|
831 d6.binCount = 24;
|
matthiasm@0
|
832 d6.binNames = bothchromanames;
|
matthiasm@0
|
833 d6.hasKnownExtents = false;
|
matthiasm@0
|
834 d6.isQuantized = false;
|
matthiasm@0
|
835 d6.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
836 d6.hasDuration = false;
|
matthiasm@0
|
837 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
838 list.push_back(d6);
|
matthiasm@0
|
839
|
matthiasm@0
|
840 OutputDescriptor d7;
|
matthiasm@0
|
841 d7.identifier = "simplechord";
|
matthiasm@0
|
842 d7.name = "Simple Chord Estimate";
|
matthiasm@0
|
843 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
|
matthiasm@0
|
844 d7.unit = "";
|
matthiasm@0
|
845 d7.hasFixedBinCount = true;
|
matthiasm@0
|
846 d7.binCount = 0;
|
matthiasm@0
|
847 d7.hasKnownExtents = false;
|
matthiasm@0
|
848 d7.isQuantized = false;
|
matthiasm@0
|
849 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
850 d7.hasDuration = false;
|
matthiasm@0
|
851 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
852 list.push_back(d7);
|
matthiasm@0
|
853
|
Chris@23
|
854 //
|
Chris@23
|
855 // OutputDescriptor d9;
|
Chris@23
|
856 // d9.identifier = "inconsistencysegment";
|
Chris@23
|
857 // d9.name = "Harmonic inconsistency segmenter";
|
Chris@23
|
858 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
|
Chris@23
|
859 // d9.unit = "";
|
Chris@23
|
860 // d9.hasFixedBinCount = true;
|
Chris@23
|
861 // d9.binCount = 0;
|
Chris@23
|
862 // d9.hasKnownExtents = true;
|
Chris@23
|
863 // d9.minValue = 0.1;
|
Chris@23
|
864 // d9.maxValue = 0.9;
|
Chris@23
|
865 // d9.isQuantized = false;
|
Chris@23
|
866 // d9.sampleType = OutputDescriptor::VariableSampleRate;
|
Chris@23
|
867 // d9.hasDuration = false;
|
Chris@23
|
868 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
Chris@23
|
869 // list.push_back(d9);
|
Chris@23
|
870 //
|
Chris@23
|
871 OutputDescriptor d10;
|
Chris@23
|
872 d10.identifier = "localtuning";
|
Chris@23
|
873 d10.name = "Local tuning";
|
Chris@23
|
874 d10.description = "Tuning based on the history up to this timestamp.";
|
Chris@23
|
875 d10.unit = "Hz";
|
Chris@23
|
876 d10.hasFixedBinCount = true;
|
Chris@23
|
877 d10.binCount = 1;
|
Chris@23
|
878 d10.hasKnownExtents = true;
|
Chris@23
|
879 d10.minValue = 427.47;
|
Chris@23
|
880 d10.maxValue = 452.89;
|
Chris@23
|
881 d10.isQuantized = false;
|
Chris@23
|
882 d10.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@23
|
883 d10.hasDuration = false;
|
Chris@23
|
884 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
Chris@23
|
885 list.push_back(d10);
|
matthiasm@17
|
886
|
Chris@23
|
887 OutputDescriptor d8;
|
matthiasm@17
|
888 d8.identifier = "harmonicchange";
|
matthiasm@17
|
889 d8.name = "Harmonic change value";
|
matthiasm@17
|
890 d8.description = "Harmonic change.";
|
matthiasm@17
|
891 d8.unit = "";
|
matthiasm@17
|
892 d8.hasFixedBinCount = true;
|
matthiasm@17
|
893 d8.binCount = 1;
|
matthiasm@17
|
894 d8.hasKnownExtents = true;
|
Chris@23
|
895 d8.minValue = 0.0;
|
Chris@23
|
896 d8.maxValue = 0.999;
|
matthiasm@17
|
897 d8.isQuantized = false;
|
matthiasm@17
|
898 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@17
|
899 d8.hasDuration = false;
|
matthiasm@17
|
900 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@17
|
901 list.push_back(d8);
|
matthiasm@1
|
902
|
matthiasm@0
|
903 return list;
|
matthiasm@0
|
904 }
|
matthiasm@0
|
905
|
matthiasm@0
|
906
|
matthiasm@0
|
907 bool
|
matthiasm@0
|
908 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
909 {
|
Chris@23
|
910 if (debug_on) {
|
Chris@23
|
911 cerr << "--> initialise";
|
Chris@23
|
912 }
|
matthiasm@1
|
913
|
matthiasm@0
|
914 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
915 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
916 m_blockSize = blockSize;
|
matthiasm@0
|
917 m_stepSize = stepSize;
|
matthiasm@0
|
918 frameCount = 0;
|
Chris@23
|
919 int tempn = 256 * m_blockSize/2;
|
Chris@23
|
920 // cerr << "length of tempkernel : " << tempn << endl;
|
Chris@23
|
921 float *tempkernel;
|
matthiasm@1
|
922
|
Chris@23
|
923 tempkernel = new float[tempn];
|
matthiasm@1
|
924
|
Chris@23
|
925 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
Chris@23
|
926 m_kernelValue.clear();
|
Chris@23
|
927 m_kernelFftIndex.clear();
|
Chris@23
|
928 m_kernelNoteIndex.clear();
|
Chris@23
|
929 int countNonzero = 0;
|
Chris@23
|
930 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
Chris@23
|
931 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
Chris@23
|
932 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
933 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
Chris@23
|
934 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
Chris@23
|
935 countNonzero++;
|
Chris@23
|
936 }
|
Chris@23
|
937 m_kernelFftIndex.push_back(iFFT);
|
Chris@23
|
938 m_kernelNoteIndex.push_back(iNote);
|
Chris@23
|
939 }
|
Chris@23
|
940 }
|
Chris@23
|
941 }
|
Chris@23
|
942 // cerr << "nonzero count : " << countNonzero << endl;
|
Chris@23
|
943 delete [] tempkernel;
|
Chris@23
|
944 ofstream myfile;
|
Chris@23
|
945 myfile.open ("matrix.txt");
|
matthiasm@3
|
946 // myfile << "Writing this to a file.\n";
|
Chris@23
|
947 for (int i = 0; i < nNote * 84; ++i) {
|
Chris@23
|
948 myfile << m_dict[i] << endl;
|
Chris@23
|
949 }
|
matthiasm@3
|
950 myfile.close();
|
matthiasm@0
|
951 return true;
|
matthiasm@0
|
952 }
|
matthiasm@0
|
953
|
matthiasm@0
|
954 void
|
matthiasm@0
|
955 NNLSChroma::reset()
|
matthiasm@0
|
956 {
|
Chris@23
|
957 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
958
|
matthiasm@0
|
959 // Clear buffers, reset stored values, etc
|
Chris@23
|
960 frameCount = 0;
|
Chris@23
|
961 m_dictID = 0;
|
Chris@23
|
962 m_fl.clear();
|
Chris@23
|
963 m_meanTuning0 = 0;
|
Chris@23
|
964 m_meanTuning1 = 0;
|
Chris@23
|
965 m_meanTuning2 = 0;
|
Chris@23
|
966 m_localTuning0 = 0;
|
Chris@23
|
967 m_localTuning1 = 0;
|
Chris@23
|
968 m_localTuning2 = 0;
|
Chris@23
|
969 m_localTuning.clear();
|
matthiasm@0
|
970 }
|
matthiasm@0
|
971
|
matthiasm@0
|
972 NNLSChroma::FeatureSet
|
matthiasm@0
|
973 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
974 {
|
Chris@23
|
975 if (debug_on) cerr << "--> process" << endl;
|
Chris@23
|
976 frameCount++;
|
Chris@23
|
977 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
978
|
Chris@23
|
979 Feature f10; // local tuning
|
Chris@23
|
980 f10.hasTimestamp = true;
|
Chris@23
|
981 f10.timestamp = timestamp;
|
Chris@23
|
982 const float *fbuf = inputBuffers[0];
|
Chris@23
|
983 float energysum = 0;
|
Chris@23
|
984 // make magnitude
|
Chris@23
|
985 float maxmag = -10000;
|
Chris@23
|
986 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
987 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
Chris@23
|
988 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
Chris@23
|
989 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
|
Chris@23
|
990 if (m_rollon > 0) {
|
Chris@23
|
991 energysum += pow(magnitude[iBin],2);
|
Chris@23
|
992 }
|
Chris@23
|
993 }
|
matthiasm@14
|
994
|
Chris@23
|
995 float cumenergy = 0;
|
Chris@23
|
996 if (m_rollon > 0) {
|
Chris@23
|
997 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
998 cumenergy += pow(magnitude[iBin],2);
|
Chris@23
|
999 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0;
|
Chris@23
|
1000 else break;
|
Chris@23
|
1001 }
|
Chris@23
|
1002 }
|
matthiasm@17
|
1003
|
Chris@23
|
1004 if (maxmag < 2) {
|
Chris@23
|
1005 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
|
Chris@23
|
1006 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
Chris@23
|
1007 magnitude[iBin] = 0;
|
Chris@23
|
1008 }
|
Chris@23
|
1009 }
|
matthiasm@4
|
1010
|
Chris@23
|
1011 // note magnitude mapping using pre-calculated matrix
|
Chris@23
|
1012 float *nm = new float[nNote]; // note magnitude
|
Chris@23
|
1013 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
1014 nm[iNote] = 0; // initialise as 0
|
Chris@23
|
1015 }
|
Chris@23
|
1016 int binCount = 0;
|
Chris@23
|
1017 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
Chris@23
|
1018 // cerr << ".";
|
Chris@23
|
1019 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
Chris@23
|
1020 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
Chris@23
|
1021 binCount++;
|
Chris@23
|
1022 }
|
Chris@23
|
1023 // cerr << nm[20];
|
Chris@23
|
1024 // cerr << endl;
|
matthiasm@0
|
1025
|
matthiasm@0
|
1026
|
matthiasm@0
|
1027 float one_over_N = 1.0/frameCount;
|
matthiasm@0
|
1028 // update means of complex tuning variables
|
matthiasm@0
|
1029 m_meanTuning0 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
1030 m_meanTuning1 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
1031 m_meanTuning2 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
1032
|
matthiasm@0
|
1033 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
1034 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
1035 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
1036 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
Chris@23
|
1037 float ratioOld = 0.997;
|
matthiasm@3
|
1038 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
|
matthiasm@3
|
1039 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
|
matthiasm@3
|
1040 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
|
matthiasm@0
|
1041 }
|
matthiasm@0
|
1042
|
matthiasm@0
|
1043 // if (m_tuneLocal) {
|
Chris@23
|
1044 // local tuning
|
Chris@23
|
1045 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
Chris@23
|
1046 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
Chris@23
|
1047 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
Chris@23
|
1048 m_localTuning.push_back(normalisedtuning);
|
Chris@23
|
1049 float tuning440 = 440 * pow(2,normalisedtuning/12);
|
Chris@23
|
1050 f10.values.push_back(tuning440);
|
Chris@23
|
1051 // cerr << tuning440 << endl;
|
matthiasm@0
|
1052 // }
|
matthiasm@0
|
1053
|
Chris@23
|
1054 Feature f1; // logfreqspec
|
Chris@23
|
1055 f1.hasTimestamp = true;
|
matthiasm@0
|
1056 f1.timestamp = timestamp;
|
Chris@23
|
1057 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
Chris@23
|
1058 f1.values.push_back(nm[iNote]);
|
Chris@23
|
1059 }
|
matthiasm@0
|
1060
|
Chris@23
|
1061 FeatureSet fs;
|
Chris@23
|
1062 fs[1].push_back(f1);
|
matthiasm@3
|
1063 fs[8].push_back(f10);
|
matthiasm@0
|
1064
|
matthiasm@0
|
1065 // deletes
|
matthiasm@0
|
1066 delete[] magnitude;
|
matthiasm@0
|
1067 delete[] nm;
|
matthiasm@0
|
1068
|
matthiasm@0
|
1069 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
|
Chris@23
|
1070 char * pPath;
|
Chris@23
|
1071 pPath = getenv ("VAMP_PATH");
|
matthiasm@7
|
1072
|
matthiasm@7
|
1073
|
Chris@23
|
1074 return fs;
|
matthiasm@0
|
1075 }
|
matthiasm@0
|
1076
|
matthiasm@0
|
1077 NNLSChroma::FeatureSet
|
matthiasm@0
|
1078 NNLSChroma::getRemainingFeatures()
|
matthiasm@0
|
1079 {
|
Chris@23
|
1080 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
1081 FeatureSet fsOut;
|
Chris@23
|
1082 if (m_fl.size() == 0) return fsOut;
|
Chris@23
|
1083 int nChord = m_chordnames.size();
|
Chris@23
|
1084 //
|
Chris@23
|
1085 /** Calculate Tuning
|
Chris@23
|
1086 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
1087 cumulative mean real and imag values)
|
Chris@23
|
1088 **/
|
Chris@23
|
1089 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
Chris@23
|
1090 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
Chris@23
|
1091 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
1092 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
1093 int intShift = floor(normalisedtuning * 3);
|
Chris@23
|
1094 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
1095
|
Chris@23
|
1096 char buffer0 [50];
|
matthiasm@1
|
1097
|
Chris@23
|
1098 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
1099
|
Chris@23
|
1100 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
1101
|
Chris@23
|
1102 // push tuning to FeatureSet fsOut
|
Chris@23
|
1103 Feature f0; // tuning
|
Chris@23
|
1104 f0.hasTimestamp = true;
|
Chris@23
|
1105 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
Chris@23
|
1106 f0.label = buffer0;
|
Chris@23
|
1107 fsOut[0].push_back(f0);
|
matthiasm@1
|
1108
|
Chris@23
|
1109 /** Tune Log-Frequency Spectrogram
|
Chris@23
|
1110 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
Chris@23
|
1111 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
Chris@23
|
1112 **/
|
Chris@23
|
1113 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
1114
|
Chris@23
|
1115 float tempValue = 0;
|
Chris@23
|
1116 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
1117 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
1118 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
1119 int count = 0;
|
matthiasm@1
|
1120
|
Chris@23
|
1121 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
|
Chris@23
|
1122 Feature f1 = *i;
|
Chris@23
|
1123 Feature f2; // tuned log-frequency spectrum
|
Chris@23
|
1124 f2.hasTimestamp = true;
|
Chris@23
|
1125 f2.timestamp = f1.timestamp;
|
Chris@23
|
1126 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
1127
|
Chris@23
|
1128 if (m_tuneLocal) {
|
Chris@23
|
1129 intShift = floor(m_localTuning[count] * 3);
|
Chris@23
|
1130 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
Chris@23
|
1131 }
|
matthiasm@1
|
1132
|
Chris@23
|
1133 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
1134
|
Chris@23
|
1135 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
Chris@23
|
1136 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
Chris@23
|
1137 f2.values.push_back(tempValue);
|
Chris@23
|
1138 }
|
matthiasm@1
|
1139
|
Chris@23
|
1140 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
Chris@23
|
1141 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
Chris@23
|
1142 vector<float> runningstd;
|
Chris@23
|
1143 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
Chris@23
|
1144 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
Chris@23
|
1145 }
|
Chris@23
|
1146 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
Chris@23
|
1147 for (int i = 0; i < 256; i++) {
|
Chris@23
|
1148 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
1149 if (runningstd[i] > 0) {
|
Chris@23
|
1150 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
Chris@23
|
1151 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
Chris@23
|
1152 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
Chris@23
|
1153 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
Chris@23
|
1154 }
|
Chris@23
|
1155 if (f2.values[i] < 0) {
|
Chris@23
|
1156 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
1157 }
|
Chris@23
|
1158 }
|
Chris@23
|
1159 fsOut[2].push_back(f2);
|
Chris@23
|
1160 count++;
|
Chris@23
|
1161 }
|
Chris@23
|
1162 cerr << "done." << endl;
|
matthiasm@1
|
1163
|
Chris@23
|
1164 /** Semitone spectrum and chromagrams
|
Chris@23
|
1165 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
1166 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
1167 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
1168 bass and treble stacked onto each other).
|
Chris@23
|
1169 **/
|
Chris@23
|
1170 if (m_dictID == 1) {
|
Chris@23
|
1171 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
1172 } else {
|
Chris@23
|
1173 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
1174 }
|
matthiasm@13
|
1175
|
matthiasm@1
|
1176
|
Chris@23
|
1177 vector<vector<float> > chordogram;
|
Chris@23
|
1178 vector<vector<int> > scoreChordogram;
|
Chris@23
|
1179 vector<float> chordchange = vector<float>(fsOut[2].size(),0);
|
Chris@23
|
1180 vector<float> oldchroma = vector<float>(12,0);
|
Chris@23
|
1181 vector<float> oldbasschroma = vector<float>(12,0);
|
Chris@23
|
1182 count = 0;
|
matthiasm@9
|
1183
|
Chris@23
|
1184 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
Chris@23
|
1185 Feature f2 = *it; // logfreq spectrum
|
Chris@23
|
1186 Feature f3; // semitone spectrum
|
Chris@23
|
1187 Feature f4; // treble chromagram
|
Chris@23
|
1188 Feature f5; // bass chromagram
|
Chris@23
|
1189 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
1190
|
Chris@23
|
1191 f3.hasTimestamp = true;
|
Chris@23
|
1192 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
1193
|
Chris@23
|
1194 f4.hasTimestamp = true;
|
Chris@23
|
1195 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
1196
|
Chris@23
|
1197 f5.hasTimestamp = true;
|
Chris@23
|
1198 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
1199
|
Chris@23
|
1200 f6.hasTimestamp = true;
|
Chris@23
|
1201 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
1202
|
Chris@23
|
1203 double b[256];
|
matthiasm@1
|
1204
|
Chris@23
|
1205 bool some_b_greater_zero = false;
|
Chris@23
|
1206 float sumb = 0;
|
Chris@23
|
1207 for (int i = 0; i < 256; i++) {
|
Chris@23
|
1208 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
Chris@23
|
1209 b[i] = f2.values[i];
|
Chris@23
|
1210 sumb += b[i];
|
Chris@23
|
1211 if (b[i] > 0) {
|
Chris@23
|
1212 some_b_greater_zero = true;
|
Chris@23
|
1213 }
|
Chris@23
|
1214 }
|
matthiasm@1
|
1215
|
Chris@23
|
1216 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
1217
|
Chris@23
|
1218 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
1219 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
1220 float currval;
|
Chris@23
|
1221 unsigned iSemitone = 0;
|
matthiasm@1
|
1222
|
Chris@23
|
1223 if (some_b_greater_zero) {
|
Chris@23
|
1224 if (m_dictID == 1) {
|
Chris@23
|
1225 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
1226 currval = 0;
|
Chris@23
|
1227 currval += b[iNote + 1 + -1] * 0.5;
|
Chris@23
|
1228 currval += b[iNote + 1 + 0] * 1.0;
|
Chris@23
|
1229 currval += b[iNote + 1 + 1] * 0.5;
|
Chris@23
|
1230 f3.values.push_back(currval);
|
Chris@23
|
1231 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
1232 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
1233 iSemitone++;
|
Chris@23
|
1234 }
|
matthiasm@1
|
1235
|
Chris@23
|
1236 } else {
|
Chris@23
|
1237 double x[84+1000];
|
Chris@23
|
1238 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
1239 vector<int> signifIndex;
|
Chris@23
|
1240 int index=0;
|
Chris@23
|
1241 sumb /= 84.0;
|
Chris@23
|
1242 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
1243 float currval = 0;
|
Chris@23
|
1244 currval += b[iNote + 1 + -1];
|
Chris@23
|
1245 currval += b[iNote + 1 + 0];
|
Chris@23
|
1246 currval += b[iNote + 1 + 1];
|
Chris@23
|
1247 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
1248 f3.values.push_back(0); // fill the values, change later
|
Chris@23
|
1249 index++;
|
Chris@23
|
1250 }
|
Chris@23
|
1251 double rnorm;
|
Chris@23
|
1252 double w[84+1000];
|
Chris@23
|
1253 double zz[84+1000];
|
Chris@23
|
1254 int indx[84+1000];
|
Chris@23
|
1255 int mode;
|
Chris@23
|
1256 int dictsize = 256*signifIndex.size();
|
Chris@23
|
1257 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@23
|
1258 double *curr_dict = new double[dictsize];
|
Chris@23
|
1259 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
1260 for (unsigned iBin = 0; iBin < 256; iBin++) {
|
Chris@23
|
1261 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
|
Chris@23
|
1262 }
|
Chris@23
|
1263 }
|
Chris@23
|
1264 int sz = signifIndex.size();
|
Chris@23
|
1265 int nn = nNote;
|
Chris@23
|
1266 NNLS(curr_dict, &nn, &nn, &sz, b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
1267 delete [] curr_dict;
|
Chris@23
|
1268 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
1269 f3.values[signifIndex[iNote]] = x[iNote];
|
Chris@23
|
1270 // cerr << mode << endl;
|
Chris@23
|
1271 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
1272 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
1273 }
|
Chris@23
|
1274 }
|
Chris@23
|
1275 }
|
matthiasm@13
|
1276
|
matthiasm@10
|
1277
|
matthiasm@12
|
1278
|
matthiasm@13
|
1279
|
Chris@23
|
1280 f4.values = chroma;
|
Chris@23
|
1281 f5.values = basschroma;
|
Chris@23
|
1282 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
Chris@23
|
1283 f6.values = chroma;
|
matthiasm@1
|
1284
|
Chris@23
|
1285 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
1286 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
1287 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
1288 case 0: // should never end up here
|
Chris@23
|
1289 break;
|
Chris@23
|
1290 case 1:
|
Chris@23
|
1291 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
|
Chris@23
|
1292 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
|
Chris@23
|
1293 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
1294 break;
|
Chris@23
|
1295 case 2:
|
Chris@23
|
1296 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
1297 chromanorm[0] += *it;
|
Chris@23
|
1298 }
|
Chris@23
|
1299 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
1300 chromanorm[1] += *it;
|
Chris@23
|
1301 }
|
Chris@23
|
1302 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
1303 chromanorm[2] += *it;
|
Chris@23
|
1304 }
|
Chris@23
|
1305 break;
|
Chris@23
|
1306 case 3:
|
Chris@23
|
1307 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
|
Chris@23
|
1308 chromanorm[0] += pow(*it,2);
|
Chris@23
|
1309 }
|
Chris@23
|
1310 chromanorm[0] = sqrt(chromanorm[0]);
|
Chris@23
|
1311 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
|
Chris@23
|
1312 chromanorm[1] += pow(*it,2);
|
Chris@23
|
1313 }
|
Chris@23
|
1314 chromanorm[1] = sqrt(chromanorm[1]);
|
Chris@23
|
1315 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
|
Chris@23
|
1316 chromanorm[2] += pow(*it,2);
|
Chris@23
|
1317 }
|
Chris@23
|
1318 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
1319 break;
|
Chris@23
|
1320 }
|
Chris@23
|
1321 if (chromanorm[0] > 0) {
|
Chris@23
|
1322 for (int i = 0; i < f4.values.size(); i++) {
|
Chris@23
|
1323 f4.values[i] /= chromanorm[0];
|
Chris@23
|
1324 }
|
Chris@23
|
1325 }
|
Chris@23
|
1326 if (chromanorm[1] > 0) {
|
Chris@23
|
1327 for (int i = 0; i < f5.values.size(); i++) {
|
Chris@23
|
1328 f5.values[i] /= chromanorm[1];
|
Chris@23
|
1329 }
|
Chris@23
|
1330 }
|
Chris@23
|
1331 if (chromanorm[2] > 0) {
|
Chris@23
|
1332 for (int i = 0; i < f6.values.size(); i++) {
|
Chris@23
|
1333 f6.values[i] /= chromanorm[2];
|
Chris@23
|
1334 }
|
Chris@23
|
1335 }
|
matthiasm@13
|
1336
|
Chris@23
|
1337 }
|
matthiasm@13
|
1338
|
Chris@23
|
1339 // local chord estimation
|
Chris@23
|
1340 vector<float> currentChordSalience;
|
Chris@23
|
1341 float tempchordvalue = 0;
|
Chris@23
|
1342 float sumchordvalue = 0;
|
matthiasm@9
|
1343
|
Chris@23
|
1344 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
1345 tempchordvalue = 0;
|
Chris@23
|
1346 for (int iBin = 0; iBin < 12; iBin++) {
|
Chris@23
|
1347 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
1348 }
|
Chris@23
|
1349 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
1350 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
1351 }
|
Chris@23
|
1352 sumchordvalue+=tempchordvalue;
|
Chris@23
|
1353 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
1354 }
|
Chris@23
|
1355 if (sumchordvalue > 0) {
|
Chris@23
|
1356 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
1357 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
1358 }
|
Chris@23
|
1359 } else {
|
Chris@23
|
1360 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
1361 }
|
Chris@23
|
1362 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
1363
|
Chris@23
|
1364 fsOut[3].push_back(f3);
|
Chris@23
|
1365 fsOut[4].push_back(f4);
|
Chris@23
|
1366 fsOut[5].push_back(f5);
|
Chris@23
|
1367 fsOut[6].push_back(f6);
|
Chris@23
|
1368 count++;
|
Chris@23
|
1369 }
|
Chris@23
|
1370 cerr << "done." << endl;
|
matthiasm@13
|
1371
|
matthiasm@10
|
1372
|
Chris@23
|
1373 /* Simple chord estimation
|
Chris@23
|
1374 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
Chris@23
|
1375 take the maximum. Very simple, don't do this at home...
|
Chris@23
|
1376 */
|
Chris@23
|
1377 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
|
Chris@23
|
1378 count = 0;
|
Chris@23
|
1379 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
Chris@23
|
1380 vector<int> chordSequence;
|
Chris@23
|
1381 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
Chris@23
|
1382 vector<int> temp = vector<int>(nChord,0);
|
Chris@23
|
1383 scoreChordogram.push_back(temp);
|
Chris@23
|
1384 }
|
Chris@23
|
1385 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
Chris@23
|
1386 int startIndex = count + 1;
|
Chris@23
|
1387 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@10
|
1388
|
Chris@23
|
1389 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@10
|
1390
|
Chris@23
|
1391 vector<int> chordCandidates;
|
Chris@23
|
1392 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
Chris@23
|
1393 // float currsum = 0;
|
Chris@23
|
1394 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
1395 // currsum += chordogram[iFrame][iChord];
|
Chris@23
|
1396 // }
|
Chris@23
|
1397 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
Chris@23
|
1398 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
Chris@23
|
1399 if (chordogram[iFrame][iChord] > chordThreshold) {
|
Chris@23
|
1400 chordCandidates.push_back(iChord);
|
Chris@23
|
1401 break;
|
Chris@23
|
1402 }
|
Chris@23
|
1403 }
|
Chris@23
|
1404 }
|
Chris@23
|
1405 chordCandidates.push_back(nChord-1);
|
Chris@23
|
1406 // cerr << chordCandidates.size() << endl;
|
Chris@23
|
1407
|
Chris@23
|
1408 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
Chris@23
|
1409 float maxindex = 0; //... and the index thereof
|
Chris@23
|
1410 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
Chris@23
|
1411 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
Chris@23
|
1412
|
Chris@23
|
1413 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
Chris@23
|
1414 // now find the max values on both sides of iWF
|
Chris@23
|
1415 // left side:
|
Chris@23
|
1416 float maxL = 0;
|
Chris@23
|
1417 unsigned maxindL = nChord-1;
|
Chris@23
|
1418 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
1419 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
1420 float currsum = 0;
|
Chris@23
|
1421 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
Chris@23
|
1422 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@10
|
1423 }
|
Chris@23
|
1424 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
1425 if (currsum > maxL) {
|
Chris@23
|
1426 maxL = currsum;
|
Chris@23
|
1427 maxindL = iChord;
|
Chris@23
|
1428 }
|
Chris@23
|
1429 }
|
Chris@23
|
1430 // right side:
|
Chris@23
|
1431 float maxR = 0;
|
Chris@23
|
1432 unsigned maxindR = nChord-1;
|
Chris@23
|
1433 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
Chris@23
|
1434 unsigned iChord = chordCandidates[kChord];
|
Chris@23
|
1435 float currsum = 0;
|
Chris@23
|
1436 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
1437 currsum += chordogram[count+iFrame][iChord];
|
Chris@23
|
1438 }
|
Chris@23
|
1439 if (iChord == nChord-1) currsum *= 0.8;
|
Chris@23
|
1440 if (currsum > maxR) {
|
Chris@23
|
1441 maxR = currsum;
|
Chris@23
|
1442 maxindR = iChord;
|
Chris@23
|
1443 }
|
Chris@23
|
1444 }
|
Chris@23
|
1445 if (maxL+maxR > maxval) {
|
Chris@23
|
1446 maxval = maxL+maxR;
|
Chris@23
|
1447 maxindex = iWF;
|
Chris@23
|
1448 bestchordL = maxindL;
|
Chris@23
|
1449 bestchordR = maxindR;
|
Chris@23
|
1450 }
|
matthiasm@3
|
1451
|
Chris@23
|
1452 }
|
Chris@23
|
1453 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
Chris@23
|
1454 // add a score to every chord-frame-point that was part of a maximum
|
Chris@23
|
1455 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
Chris@23
|
1456 scoreChordogram[iFrame+count][bestchordL]++;
|
Chris@23
|
1457 }
|
Chris@23
|
1458 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
Chris@23
|
1459 scoreChordogram[iFrame+count][bestchordR]++;
|
Chris@23
|
1460 }
|
Chris@23
|
1461 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
Chris@23
|
1462 count++;
|
Chris@23
|
1463 }
|
Chris@23
|
1464 // cerr << "******* agent finished *******" << endl;
|
Chris@23
|
1465 count = 0;
|
Chris@23
|
1466 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
1467 float maxval = 0; // will be the value of the most salient chord in this frame
|
Chris@23
|
1468 float maxindex = 0; //... and the index thereof
|
Chris@23
|
1469 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
1470 if (scoreChordogram[count][iChord] > maxval) {
|
Chris@23
|
1471 maxval = scoreChordogram[count][iChord];
|
Chris@23
|
1472 maxindex = iChord;
|
Chris@23
|
1473 // cerr << iChord << endl;
|
Chris@23
|
1474 }
|
Chris@23
|
1475 }
|
Chris@23
|
1476 chordSequence.push_back(maxindex);
|
Chris@23
|
1477 // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
Chris@23
|
1478 count++;
|
Chris@23
|
1479 }
|
Chris@23
|
1480 // cerr << "******* mode filter done *******" << endl;
|
matthiasm@10
|
1481
|
matthiasm@3
|
1482
|
Chris@23
|
1483 // mode filter on chordSequence
|
Chris@23
|
1484 count = 0;
|
Chris@23
|
1485 string oldChord = "";
|
Chris@23
|
1486 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
Chris@23
|
1487 Feature f6 = *it;
|
Chris@23
|
1488 Feature f7; // chord estimate
|
Chris@23
|
1489 f7.hasTimestamp = true;
|
Chris@23
|
1490 f7.timestamp = f6.timestamp;
|
Chris@23
|
1491 Feature f8; // chord estimate
|
Chris@23
|
1492 f8.hasTimestamp = true;
|
Chris@23
|
1493 f8.timestamp = f6.timestamp;
|
matthiasm@17
|
1494
|
Chris@23
|
1495 vector<int> chordCount = vector<int>(nChord,0);
|
Chris@23
|
1496 int maxChordCount = 0;
|
Chris@23
|
1497 int maxChordIndex = nChord-1;
|
Chris@23
|
1498 string maxChord;
|
Chris@23
|
1499 int startIndex = max(count - halfwindowlength/2,0);
|
Chris@23
|
1500 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
Chris@23
|
1501 for (int i = startIndex; i < endIndex; i++) {
|
Chris@23
|
1502 chordCount[chordSequence[i]]++;
|
Chris@23
|
1503 if (chordCount[chordSequence[i]] > maxChordCount) {
|
Chris@23
|
1504 // cerr << "start index " << startIndex << endl;
|
Chris@23
|
1505 maxChordCount++;
|
Chris@23
|
1506 maxChordIndex = chordSequence[i];
|
Chris@23
|
1507 maxChord = m_chordnames[maxChordIndex];
|
Chris@23
|
1508 }
|
Chris@23
|
1509 }
|
Chris@23
|
1510 // chordSequence[count] = maxChordIndex;
|
Chris@23
|
1511 // cerr << maxChordIndex << endl;
|
Chris@23
|
1512 f8.values.push_back(chordchange[count]/(halfwindowlength*2));
|
Chris@23
|
1513 // cerr << chordchange[count] << endl;
|
Chris@23
|
1514 fsOut[9].push_back(f8);
|
Chris@23
|
1515 if (oldChord != maxChord) {
|
Chris@23
|
1516 oldChord = maxChord;
|
matthiasm@3
|
1517
|
Chris@23
|
1518 // char buffer1 [50];
|
Chris@23
|
1519 // if (maxChordIndex < nChord - 1) {
|
Chris@23
|
1520 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
Chris@23
|
1521 // } else {
|
Chris@23
|
1522 // sprintf(buffer1, "N");
|
Chris@23
|
1523 // }
|
Chris@23
|
1524 // f7.label = buffer1;
|
Chris@23
|
1525 f7.label = m_chordnames[maxChordIndex];
|
Chris@23
|
1526 fsOut[7].push_back(f7);
|
Chris@23
|
1527 }
|
Chris@23
|
1528 count++;
|
Chris@23
|
1529 }
|
Chris@23
|
1530 Feature f7; // last chord estimate
|
Chris@23
|
1531 f7.hasTimestamp = true;
|
Chris@23
|
1532 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp;
|
Chris@23
|
1533 f7.label = "N";
|
Chris@23
|
1534 fsOut[7].push_back(f7);
|
Chris@23
|
1535 cerr << "done." << endl;
|
Chris@23
|
1536 // // musicity
|
Chris@23
|
1537 // count = 0;
|
Chris@23
|
1538 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
Chris@23
|
1539 // vector<float> musicityValue;
|
Chris@23
|
1540 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
1541 // Feature f4 = *it;
|
Chris@23
|
1542 //
|
Chris@23
|
1543 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
1544 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
1545 // float chromasum = 0;
|
Chris@23
|
1546 // float diffsum = 0;
|
Chris@23
|
1547 // for (int k = 0; k < 12; k++) {
|
Chris@23
|
1548 // for (int i = startIndex + 1; i < endIndex; i++) {
|
Chris@23
|
1549 // chromasum += pow(fsOut[4][i].values[k],2);
|
Chris@23
|
1550 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
Chris@23
|
1551 // }
|
Chris@23
|
1552 // }
|
Chris@23
|
1553 // diffsum /= chromasum;
|
Chris@23
|
1554 // musicityValue.push_back(diffsum);
|
Chris@23
|
1555 // count++;
|
Chris@23
|
1556 // }
|
Chris@23
|
1557 //
|
Chris@23
|
1558 // float musicityThreshold = 0.44;
|
Chris@23
|
1559 // if (m_stepSize == 4096) {
|
Chris@23
|
1560 // musicityThreshold = 0.74;
|
Chris@23
|
1561 // }
|
Chris@23
|
1562 // if (m_stepSize == 4410) {
|
Chris@23
|
1563 // musicityThreshold = 0.77;
|
Chris@23
|
1564 // }
|
Chris@23
|
1565 //
|
Chris@23
|
1566 // count = 0;
|
Chris@23
|
1567 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
Chris@23
|
1568 // Feature f4 = *it;
|
Chris@23
|
1569 // Feature f8; // musicity
|
Chris@23
|
1570 // Feature f9; // musicity segmenter
|
Chris@23
|
1571 //
|
Chris@23
|
1572 // f8.hasTimestamp = true;
|
Chris@23
|
1573 // f8.timestamp = f4.timestamp;
|
Chris@23
|
1574 // f9.hasTimestamp = true;
|
Chris@23
|
1575 // f9.timestamp = f4.timestamp;
|
Chris@23
|
1576 //
|
Chris@23
|
1577 // int startIndex = max(count - musicitykernelwidth/2,0);
|
Chris@23
|
1578 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
Chris@23
|
1579 // int musicityCount = 0;
|
Chris@23
|
1580 // for (int i = startIndex; i <= endIndex; i++) {
|
Chris@23
|
1581 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
Chris@23
|
1582 // }
|
Chris@23
|
1583 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
Chris@23
|
1584 //
|
Chris@23
|
1585 // if (isSpeech) {
|
Chris@23
|
1586 // if (oldlabeltype != 2) {
|
Chris@23
|
1587 // f9.label = "Speech";
|
Chris@23
|
1588 // fsOut[9].push_back(f9);
|
Chris@23
|
1589 // oldlabeltype = 2;
|
Chris@23
|
1590 // }
|
Chris@23
|
1591 // } else {
|
Chris@23
|
1592 // if (oldlabeltype != 1) {
|
Chris@23
|
1593 // f9.label = "Music";
|
Chris@23
|
1594 // fsOut[9].push_back(f9);
|
Chris@23
|
1595 // oldlabeltype = 1;
|
Chris@23
|
1596 // }
|
Chris@23
|
1597 // }
|
Chris@23
|
1598 // f8.values.push_back(musicityValue[count]);
|
Chris@23
|
1599 // fsOut[8].push_back(f8);
|
Chris@23
|
1600 // count++;
|
Chris@23
|
1601 // }
|
Chris@23
|
1602 return fsOut;
|
matthiasm@0
|
1603
|
matthiasm@0
|
1604 }
|
matthiasm@0
|
1605
|