matthiasm@0
|
1
|
matthiasm@0
|
2 #include "NNLSChroma.h"
|
matthiasm@0
|
3 #include <cmath>
|
matthiasm@0
|
4 #include <list>
|
matthiasm@0
|
5 #include <iostream>
|
matthiasm@3
|
6 #include <fstream>
|
matthiasm@0
|
7 #include <sstream>
|
matthiasm@0
|
8 #include <cassert>
|
matthiasm@7
|
9 #include <cstdlib>
|
matthiasm@0
|
10 #include <cstdio>
|
matthiasm@7
|
11 #include <boost/tokenizer.hpp>
|
matthiasm@7
|
12 #include <boost/iostreams/device/file.hpp>
|
matthiasm@7
|
13 #include <boost/iostreams/stream.hpp>
|
matthiasm@7
|
14 #include <boost/lexical_cast.hpp>
|
matthiasm@1
|
15 #include "nnls.h"
|
matthiasm@0
|
16 #include "chorddict.cpp"
|
matthiasm@9
|
17
|
matthiasm@9
|
18 #include <omp.h>
|
matthiasm@9
|
19 #define N 1000
|
matthiasm@9
|
20 #define CHUNKSIZE 100
|
matthiasm@9
|
21
|
matthiasm@9
|
22
|
matthiasm@0
|
23 using namespace std;
|
matthiasm@7
|
24 using namespace boost;
|
matthiasm@0
|
25
|
matthiasm@0
|
26 const float sinvalue = 0.866025404;
|
matthiasm@0
|
27 const float cosvalue = -0.5;
|
matthiasm@0
|
28 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
|
matthiasm@0
|
29 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
|
matthiasm@0
|
30 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
|
matthiasm@0
|
31 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
|
matthiasm@0
|
32 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
|
matthiasm@7
|
33
|
matthiasm@7
|
34 const char* bassnames[12][12] ={
|
matthiasm@7
|
35 {"A","","B","C","C#","D","","E","","F#","G","G#"},
|
matthiasm@7
|
36 {"Bb","","C","Db","D","Eb","","F","","G","Ab","A"},
|
matthiasm@7
|
37 {"B","","C#","D","D#","E","","F#","","G#","A","A#"},
|
matthiasm@7
|
38 {"C","","D","Eb","E","F","","G","","A","Bb","B"},
|
matthiasm@7
|
39 {"C#","","D#","E","E#","F#","","G#","","A#","B","B#"},
|
matthiasm@7
|
40 {"D","","E","F","F#","G","","A","","B","C","C#"},
|
matthiasm@7
|
41 {"Eb","","F","Gb","G","Ab","","Bb","","C","Db","D"},
|
matthiasm@7
|
42 {"E","","F#","G","G#","A","","B","","C#","D","D#"},
|
matthiasm@7
|
43 {"F","","G","Ab","A","Bb","","C","","D","Eb","E"},
|
matthiasm@7
|
44 {"F#","","G#","A","A#","B","","C#","","D#","E","E#"},
|
matthiasm@7
|
45 {"G","","A","Bb","B","C","","D","","E","F","F#"},
|
matthiasm@7
|
46 {"Ab","","Bb","Cb","C","Db","","Eb","","F","Gb","G"}
|
matthiasm@7
|
47 };
|
matthiasm@0
|
48 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
49 const int nNote = 256;
|
matthiasm@0
|
50
|
matthiasm@0
|
51 /** Special Convolution
|
matthiasm@0
|
52 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
|
matthiasm@0
|
53 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
|
matthiasm@0
|
54 as the first (last) valid convolution bin.
|
matthiasm@0
|
55 **/
|
matthiasm@0
|
56
|
matthiasm@0
|
57 const bool debug_on = false;
|
matthiasm@0
|
58
|
matthiasm@0
|
59 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
|
matthiasm@0
|
60 {
|
matthiasm@0
|
61 float s;
|
matthiasm@0
|
62 int m, n;
|
matthiasm@0
|
63 int lenConvolvee = convolvee.size();
|
matthiasm@0
|
64 int lenKernel = kernel.size();
|
matthiasm@0
|
65
|
matthiasm@0
|
66 vector<float> Z(256,0);
|
matthiasm@0
|
67 assert(lenKernel % 2 != 0); // no exception handling !!!
|
matthiasm@0
|
68
|
matthiasm@0
|
69 for (n = lenKernel - 1; n < lenConvolvee; n++) {
|
matthiasm@0
|
70 s=0.0;
|
matthiasm@0
|
71 for (m = 0; m < lenKernel; m++) {
|
matthiasm@0
|
72 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
|
matthiasm@0
|
73 s += convolvee[n-m] * kernel[m];
|
matthiasm@0
|
74 // if (debug_on) cerr << "--> s = " << s << '\n';
|
matthiasm@0
|
75 }
|
matthiasm@0
|
76 // cerr << n - lenKernel/2 << endl;
|
matthiasm@0
|
77 Z[n -lenKernel/2] = s;
|
matthiasm@0
|
78 }
|
matthiasm@0
|
79
|
matthiasm@0
|
80 // fill upper and lower pads
|
matthiasm@0
|
81 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
|
matthiasm@0
|
82 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
|
matthiasm@0
|
83 Z[lenConvolvee - lenKernel/2 - 1];
|
matthiasm@0
|
84 return Z;
|
matthiasm@0
|
85 }
|
matthiasm@0
|
86
|
matthiasm@0
|
87 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
|
matthiasm@0
|
88 // {
|
matthiasm@0
|
89 // vector<float> freq(binnumbers.size, 0.0);
|
matthiasm@0
|
90 // for (unsigned i = 0; i < binnumbers.size; ++i) {
|
matthiasm@0
|
91 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
|
matthiasm@0
|
92 // }
|
matthiasm@0
|
93 // return freq;
|
matthiasm@0
|
94 // }
|
matthiasm@0
|
95
|
matthiasm@0
|
96 float cospuls(float x, float centre, float width)
|
matthiasm@0
|
97 {
|
matthiasm@0
|
98 float recipwidth = 1.0/width;
|
matthiasm@0
|
99 if (abs(x - centre) <= 0.5 * width) {
|
matthiasm@0
|
100 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
|
matthiasm@0
|
101 }
|
matthiasm@0
|
102 return 0.0;
|
matthiasm@0
|
103 }
|
matthiasm@0
|
104
|
matthiasm@0
|
105 float pitchCospuls(float x, float centre, int binsperoctave)
|
matthiasm@0
|
106 {
|
matthiasm@0
|
107 float warpedf = -binsperoctave * (log2(centre) - log2(x));
|
matthiasm@0
|
108 float out = cospuls(warpedf, 0.0, 2.0);
|
matthiasm@0
|
109 // now scale to correct for note density
|
matthiasm@0
|
110 float c = log(2.0)/binsperoctave;
|
matthiasm@0
|
111 if (x > 0) {
|
matthiasm@0
|
112 out = out / (c * x);
|
matthiasm@0
|
113 } else {
|
matthiasm@0
|
114 out = 0;
|
matthiasm@0
|
115 }
|
matthiasm@0
|
116 return out;
|
matthiasm@0
|
117 }
|
matthiasm@0
|
118
|
matthiasm@0
|
119 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
|
matthiasm@0
|
120
|
matthiasm@0
|
121 int binspersemitone = 3; // this must be 3
|
matthiasm@0
|
122 int minoctave = 0; // this must be 0
|
matthiasm@0
|
123 int maxoctave = 7; // this must be 7
|
matthiasm@1
|
124 int oversampling = 80;
|
matthiasm@0
|
125
|
matthiasm@0
|
126 // linear frequency vector
|
matthiasm@0
|
127 vector<float> fft_f;
|
matthiasm@0
|
128 for (int i = 0; i < blocksize/2; ++i) {
|
matthiasm@0
|
129 fft_f.push_back(i * (fs * 1.0 / blocksize));
|
matthiasm@0
|
130 }
|
matthiasm@0
|
131 float fft_width = fs * 2.0 / blocksize;
|
matthiasm@0
|
132
|
matthiasm@0
|
133 // linear oversampled frequency vector
|
matthiasm@0
|
134 vector<float> oversampled_f;
|
matthiasm@0
|
135 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
|
matthiasm@0
|
136 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
|
matthiasm@0
|
137 }
|
matthiasm@0
|
138
|
matthiasm@0
|
139 // pitch-spaced frequency vector
|
matthiasm@0
|
140 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@0
|
141 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@0
|
142 vector<float> cq_f;
|
matthiasm@0
|
143 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@0
|
144 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@0
|
145 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@0
|
146 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@0
|
147 for (int k = -1; k < 2; ++k) {
|
matthiasm@0
|
148 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@0
|
149 }
|
matthiasm@0
|
150 }
|
matthiasm@0
|
151 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@0
|
152 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@0
|
153
|
matthiasm@0
|
154 int nFFT = fft_f.size();
|
matthiasm@0
|
155
|
matthiasm@0
|
156 vector<float> fft_activation;
|
matthiasm@0
|
157 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
|
matthiasm@0
|
158 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
|
matthiasm@0
|
159 fft_activation.push_back(cosp);
|
matthiasm@0
|
160 // cerr << cosp << endl;
|
matthiasm@0
|
161 }
|
matthiasm@0
|
162
|
matthiasm@0
|
163 float cq_activation;
|
matthiasm@0
|
164 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
|
matthiasm@0
|
165 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
|
matthiasm@0
|
166 int curr_start = oversampling * iFFT - oversampling;
|
matthiasm@0
|
167 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
|
matthiasm@0
|
168 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
|
matthiasm@0
|
169 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
|
matthiasm@0
|
170 outmatrix[iFFT + nFFT * iCQ] = 0;
|
matthiasm@1
|
171 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
|
matthiasm@0
|
172 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
|
matthiasm@0
|
173 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
|
matthiasm@0
|
174 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
|
matthiasm@0
|
175 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
|
matthiasm@0
|
176 }
|
matthiasm@0
|
177 // if (iCQ == 1 || iCQ == 2) {
|
matthiasm@0
|
178 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
|
matthiasm@0
|
179 // }
|
matthiasm@0
|
180 }
|
matthiasm@0
|
181 }
|
matthiasm@0
|
182 }
|
matthiasm@0
|
183 return true;
|
matthiasm@0
|
184 }
|
matthiasm@0
|
185
|
matthiasm@3
|
186 bool dictionaryMatrix(float* dm) {
|
matthiasm@1
|
187 int binspersemitone = 3; // this must be 3
|
matthiasm@1
|
188 int minoctave = 0; // this must be 0
|
matthiasm@1
|
189 int maxoctave = 7; // this must be 7
|
matthiasm@4
|
190 float s_param = 0.7;
|
matthiasm@1
|
191
|
matthiasm@1
|
192 // pitch-spaced frequency vector
|
matthiasm@1
|
193 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@1
|
194 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@1
|
195 vector<float> cq_f;
|
matthiasm@1
|
196 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@1
|
197 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@1
|
198 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@1
|
199 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@1
|
200 for (int k = -1; k < 2; ++k) {
|
matthiasm@1
|
201 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@1
|
202 }
|
matthiasm@1
|
203 }
|
matthiasm@1
|
204 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@1
|
205 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@1
|
206
|
matthiasm@1
|
207 float curr_f;
|
matthiasm@1
|
208 float floatbin;
|
matthiasm@1
|
209 float curr_amp;
|
matthiasm@1
|
210 // now for every combination calculate the matrix element
|
matthiasm@1
|
211 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
|
matthiasm@3
|
212 // cerr << iOut << endl;
|
matthiasm@1
|
213 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
|
matthiasm@1
|
214 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
|
matthiasm@3
|
215 // if (curr_f > cq_f[nNote-1]) break;
|
matthiasm@3
|
216 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
|
matthiasm@3
|
217 // cerr << floatbin << endl;
|
matthiasm@1
|
218 curr_amp = pow(s_param,float(iHarm-1));
|
matthiasm@3
|
219 // cerr << "curramp" << curr_amp << endl;
|
matthiasm@1
|
220 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
|
matthiasm@3
|
221 if (abs(iNote+1.0-floatbin)<2) {
|
matthiasm@3
|
222 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
|
matthiasm@3
|
223 // dm[iNote + nNote * iOut] += 1 * curr_amp;
|
matthiasm@3
|
224 }
|
matthiasm@1
|
225 }
|
matthiasm@3
|
226 }
|
matthiasm@1
|
227 }
|
matthiasm@3
|
228
|
matthiasm@3
|
229
|
matthiasm@1
|
230 }
|
matthiasm@1
|
231
|
matthiasm@7
|
232 string get_env_var( std::string const & key ) {
|
matthiasm@7
|
233 char * val;
|
matthiasm@7
|
234 val = getenv( key.c_str() );
|
matthiasm@7
|
235 string retval;
|
matthiasm@7
|
236 if (val != NULL) {
|
matthiasm@7
|
237 retval = val;
|
matthiasm@7
|
238 }
|
matthiasm@7
|
239 return retval;
|
matthiasm@7
|
240 }
|
matthiasm@7
|
241
|
matthiasm@7
|
242
|
matthiasm@9
|
243 vector<string> chordDictionary(vector<float> *mchorddict) {
|
matthiasm@7
|
244 // ifstream chordDictFile;
|
matthiasm@7
|
245 string chordDictFilename(get_env_var("VAMP_PATH")+"/chord.dict");
|
matthiasm@7
|
246 // string instring[] = ",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\nm,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0\n6,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0\n7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0\nmaj7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1\nmin7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0\n,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\n,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\ndim,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0\naug,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0\n";
|
matthiasm@7
|
247 typedef tokenizer<char_separator<char> > Tok;
|
matthiasm@7
|
248 // char_separator<char> sep; // default constructed
|
matthiasm@7
|
249 char_separator<char> sep(",; ",":");
|
matthiasm@7
|
250 iostreams::stream<iostreams::file_source> chordDictFile(chordDictFilename.c_str());
|
matthiasm@7
|
251 string line;
|
matthiasm@7
|
252 int iElement = 0;
|
matthiasm@7
|
253 int nChord = 0;
|
matthiasm@7
|
254
|
matthiasm@7
|
255 vector<string> loadedChordNames;
|
matthiasm@7
|
256 vector<float> loadedChordDict;
|
matthiasm@7
|
257 if (chordDictFile.is_open()) {
|
matthiasm@7
|
258 while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file
|
matthiasm@7
|
259 // first, get the chord definition
|
matthiasm@7
|
260 string chordType;
|
matthiasm@7
|
261 vector<float> tempPCVector;
|
matthiasm@7
|
262 // cerr << line << endl;
|
matthiasm@7
|
263 if (!line.empty() && line.substr(0,1) != "#") {
|
matthiasm@7
|
264 Tok tok(line, sep);
|
matthiasm@7
|
265 for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements
|
matthiasm@7
|
266 string tempString = *tok_iter;
|
matthiasm@7
|
267 // cerr << tempString << endl;
|
matthiasm@7
|
268 if (tok_iter == tok.begin()) { // either the chord name or a colon
|
matthiasm@7
|
269 if (tempString == ":") {
|
matthiasm@7
|
270 chordType = "";
|
matthiasm@7
|
271 } else {
|
matthiasm@7
|
272 chordType = tempString;
|
matthiasm@7
|
273 tok_iter++; // is this cheating ? :)
|
matthiasm@7
|
274 }
|
matthiasm@7
|
275 } else {
|
matthiasm@7
|
276 tempPCVector.push_back(lexical_cast<float>(*tok_iter));
|
matthiasm@7
|
277 }
|
matthiasm@7
|
278 }
|
matthiasm@7
|
279
|
matthiasm@7
|
280 // now make all 12 chords of every type
|
matthiasm@7
|
281 for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) {
|
matthiasm@7
|
282 // add bass slash notation
|
matthiasm@7
|
283 string slashNotation = "";
|
matthiasm@7
|
284 for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) {
|
matthiasm@7
|
285 if (tempPCVector[(kSemitone) % 12] > 0.99) {
|
matthiasm@7
|
286 slashNotation = bassnames[iSemitone][kSemitone];
|
matthiasm@7
|
287 }
|
matthiasm@7
|
288 }
|
matthiasm@7
|
289 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes
|
matthiasm@9
|
290 // cerr << ((kSemitone - iSemitone + 12) % 12) << endl;
|
matthiasm@9
|
291 float bassValue = 0;
|
matthiasm@9
|
292 if (tempPCVector[(kSemitone - iSemitone + 12) % 12]==1) {
|
matthiasm@9
|
293 bassValue = 1;
|
matthiasm@9
|
294 } else {
|
matthiasm@9
|
295 if (tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12] == 1) bassValue = 0.2;
|
matthiasm@9
|
296 }
|
matthiasm@9
|
297 loadedChordDict.push_back(bassValue);
|
matthiasm@7
|
298 }
|
matthiasm@7
|
299 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes
|
matthiasm@7
|
300 loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]);
|
matthiasm@7
|
301 }
|
matthiasm@7
|
302 ostringstream os;
|
matthiasm@7
|
303 if (slashNotation.empty()) {
|
matthiasm@7
|
304 os << notenames[12+iSemitone] << chordType;
|
matthiasm@7
|
305 } else {
|
matthiasm@7
|
306 os << notenames[12+iSemitone] << chordType << "/" << slashNotation;
|
matthiasm@7
|
307 }
|
matthiasm@7
|
308
|
matthiasm@7
|
309 loadedChordNames.push_back(os.str());
|
matthiasm@7
|
310 }
|
matthiasm@7
|
311 }
|
matthiasm@7
|
312 }
|
matthiasm@7
|
313 // N type
|
matthiasm@7
|
314 loadedChordNames.push_back("N");
|
matthiasm@7
|
315 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5);
|
matthiasm@7
|
316 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0);
|
matthiasm@7
|
317
|
matthiasm@7
|
318 // normalise
|
matthiasm@7
|
319 float sum = 0;
|
matthiasm@7
|
320 for (int i = 0; i < loadedChordDict.size(); i++) {
|
matthiasm@7
|
321 sum += pow(loadedChordDict[i],2);
|
matthiasm@7
|
322 if (i % 24 == 23) {
|
matthiasm@7
|
323 float invertedsum = 1.0/sqrt(sum);
|
matthiasm@7
|
324 for (int k = 0; k < 24; k++) {
|
matthiasm@7
|
325 loadedChordDict[i-k] *= invertedsum;
|
matthiasm@7
|
326 }
|
matthiasm@7
|
327 sum = 0;
|
matthiasm@7
|
328 }
|
matthiasm@7
|
329
|
matthiasm@7
|
330 }
|
matthiasm@7
|
331
|
matthiasm@7
|
332
|
matthiasm@7
|
333 nChord = 0;
|
matthiasm@7
|
334 for (int i = 0; i < loadedChordNames.size(); i++) {
|
matthiasm@7
|
335 nChord++;
|
matthiasm@7
|
336 }
|
matthiasm@7
|
337 chordDictFile.close();
|
matthiasm@7
|
338
|
matthiasm@7
|
339
|
matthiasm@9
|
340 // mchorddict = new float[nChord*24];
|
matthiasm@7
|
341 for (int i = 0; i < nChord*24; i++) {
|
matthiasm@9
|
342 mchorddict->push_back(loadedChordDict[i]);
|
matthiasm@7
|
343 }
|
matthiasm@9
|
344
|
matthiasm@7
|
345 } else {// use default from chorddict.cpp
|
matthiasm@9
|
346 // mchorddict = new float[nChorddict];
|
matthiasm@7
|
347 for (int i = 0; i < nChorddict; i++) {
|
matthiasm@9
|
348 mchorddict->push_back(chorddict[i]);
|
matthiasm@7
|
349 }
|
matthiasm@7
|
350
|
matthiasm@7
|
351 nChord = nChorddict/24;
|
matthiasm@7
|
352 // mchordnames = new string[nChorddict/24];
|
matthiasm@7
|
353 char buffer1 [50];
|
matthiasm@7
|
354 for (int i = 0; i < nChorddict/24; i++) {
|
matthiasm@7
|
355 if (i < nChorddict/24 - 1) {
|
matthiasm@7
|
356 sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]);
|
matthiasm@7
|
357 } else {
|
matthiasm@7
|
358 sprintf(buffer1, "N");
|
matthiasm@7
|
359 }
|
matthiasm@7
|
360 ostringstream os;
|
matthiasm@7
|
361 os << buffer1;
|
matthiasm@9
|
362 loadedChordNames.push_back(os.str());
|
matthiasm@9
|
363
|
matthiasm@7
|
364 }
|
matthiasm@7
|
365
|
matthiasm@7
|
366 }
|
matthiasm@9
|
367 // cerr << "before leaving" << chordnames[1] << endl;
|
matthiasm@9
|
368 return loadedChordNames;
|
matthiasm@7
|
369 }
|
matthiasm@0
|
370
|
matthiasm@0
|
371 NNLSChroma::NNLSChroma(float inputSampleRate) :
|
matthiasm@0
|
372 Plugin(inputSampleRate),
|
matthiasm@0
|
373 m_fl(0),
|
matthiasm@0
|
374 m_blockSize(0),
|
matthiasm@0
|
375 m_stepSize(0),
|
matthiasm@0
|
376 m_lengthOfNoteIndex(0),
|
matthiasm@0
|
377 m_meanTuning0(0),
|
matthiasm@0
|
378 m_meanTuning1(0),
|
matthiasm@0
|
379 m_meanTuning2(0),
|
matthiasm@0
|
380 m_localTuning0(0),
|
matthiasm@0
|
381 m_localTuning1(0),
|
matthiasm@0
|
382 m_localTuning2(0),
|
matthiasm@4
|
383 m_paling(1.0),
|
matthiasm@3
|
384 m_preset(0.0),
|
matthiasm@0
|
385 m_localTuning(0),
|
matthiasm@0
|
386 m_kernelValue(0),
|
matthiasm@0
|
387 m_kernelFftIndex(0),
|
matthiasm@0
|
388 m_kernelNoteIndex(0),
|
matthiasm@1
|
389 m_dict(0),
|
matthiasm@0
|
390 m_tuneLocal(false),
|
matthiasm@7
|
391 m_dictID(0),
|
matthiasm@7
|
392 m_chorddict(0),
|
matthiasm@7
|
393 m_chordnames(0)
|
matthiasm@0
|
394 {
|
matthiasm@0
|
395 if (debug_on) cerr << "--> NNLSChroma" << endl;
|
matthiasm@7
|
396
|
matthiasm@7
|
397 // make the *note* dictionary matrix
|
matthiasm@3
|
398 m_dict = new float[nNote * 84];
|
matthiasm@3
|
399 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
matthiasm@1
|
400 dictionaryMatrix(m_dict);
|
matthiasm@7
|
401
|
matthiasm@7
|
402 // get the *chord* dictionary from file (if the file exists)
|
matthiasm@9
|
403 m_chordnames = chordDictionary(&m_chorddict);
|
matthiasm@0
|
404 }
|
matthiasm@0
|
405
|
matthiasm@0
|
406
|
matthiasm@0
|
407 NNLSChroma::~NNLSChroma()
|
matthiasm@0
|
408 {
|
matthiasm@0
|
409 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
|
matthiasm@1
|
410 delete [] m_dict;
|
matthiasm@9
|
411 // delete [] m_chorddict;
|
matthiasm@7
|
412 // delete m_chordnames;
|
matthiasm@0
|
413 }
|
matthiasm@0
|
414
|
matthiasm@0
|
415 string
|
matthiasm@0
|
416 NNLSChroma::getIdentifier() const
|
matthiasm@0
|
417 {
|
matthiasm@0
|
418 if (debug_on) cerr << "--> getIdentifier" << endl;
|
matthiasm@0
|
419 return "nnls_chroma";
|
matthiasm@0
|
420 }
|
matthiasm@0
|
421
|
matthiasm@0
|
422 string
|
matthiasm@0
|
423 NNLSChroma::getName() const
|
matthiasm@0
|
424 {
|
matthiasm@0
|
425 if (debug_on) cerr << "--> getName" << endl;
|
matthiasm@0
|
426 return "NNLS Chroma";
|
matthiasm@0
|
427 }
|
matthiasm@0
|
428
|
matthiasm@0
|
429 string
|
matthiasm@0
|
430 NNLSChroma::getDescription() const
|
matthiasm@0
|
431 {
|
matthiasm@0
|
432 // Return something helpful here!
|
matthiasm@0
|
433 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@4
|
434 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum (LAS) of the DFT: the LAS itself, a standard-tuned version thereof (the local and global tuning estimates can are also be output), an approximate transcription to semitone activation using non-linear least squares (NNLS). Furthermore chroma features and a simple chord estimate derived from this NNLS semitone transcription.";
|
matthiasm@0
|
435 }
|
matthiasm@0
|
436
|
matthiasm@0
|
437 string
|
matthiasm@0
|
438 NNLSChroma::getMaker() const
|
matthiasm@0
|
439 {
|
matthiasm@0
|
440 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
441 // Your name here
|
matthiasm@0
|
442 return "Matthias Mauch";
|
matthiasm@0
|
443 }
|
matthiasm@0
|
444
|
matthiasm@0
|
445 int
|
matthiasm@0
|
446 NNLSChroma::getPluginVersion() const
|
matthiasm@0
|
447 {
|
matthiasm@0
|
448 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
449 // Increment this each time you release a version that behaves
|
matthiasm@0
|
450 // differently from the previous one
|
matthiasm@0
|
451 return 1;
|
matthiasm@0
|
452 }
|
matthiasm@0
|
453
|
matthiasm@0
|
454 string
|
matthiasm@0
|
455 NNLSChroma::getCopyright() const
|
matthiasm@0
|
456 {
|
matthiasm@0
|
457 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
458 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
459 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
460 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
461 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
matthiasm@0
|
462 return "Copyright (2010). All rights reserved.";
|
matthiasm@0
|
463 }
|
matthiasm@0
|
464
|
matthiasm@0
|
465 NNLSChroma::InputDomain
|
matthiasm@0
|
466 NNLSChroma::getInputDomain() const
|
matthiasm@0
|
467 {
|
matthiasm@0
|
468 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
469 return FrequencyDomain;
|
matthiasm@0
|
470 }
|
matthiasm@0
|
471
|
matthiasm@0
|
472 size_t
|
matthiasm@0
|
473 NNLSChroma::getPreferredBlockSize() const
|
matthiasm@0
|
474 {
|
matthiasm@0
|
475 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
476 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
477 }
|
matthiasm@0
|
478
|
matthiasm@0
|
479 size_t
|
matthiasm@0
|
480 NNLSChroma::getPreferredStepSize() const
|
matthiasm@0
|
481 {
|
matthiasm@0
|
482 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
483 return 2048; // 0 means "anything sensible"; in practice this
|
matthiasm@0
|
484 // means the same as the block size for TimeDomain
|
matthiasm@0
|
485 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
486 }
|
matthiasm@0
|
487
|
matthiasm@0
|
488 size_t
|
matthiasm@0
|
489 NNLSChroma::getMinChannelCount() const
|
matthiasm@0
|
490 {
|
matthiasm@0
|
491 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
492 return 1;
|
matthiasm@0
|
493 }
|
matthiasm@0
|
494
|
matthiasm@0
|
495 size_t
|
matthiasm@0
|
496 NNLSChroma::getMaxChannelCount() const
|
matthiasm@0
|
497 {
|
matthiasm@0
|
498 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
499 return 1;
|
matthiasm@0
|
500 }
|
matthiasm@0
|
501
|
matthiasm@0
|
502 NNLSChroma::ParameterList
|
matthiasm@0
|
503 NNLSChroma::getParameterDescriptors() const
|
matthiasm@0
|
504 {
|
matthiasm@0
|
505 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
506 ParameterList list;
|
matthiasm@0
|
507
|
matthiasm@3
|
508 ParameterDescriptor d3;
|
matthiasm@3
|
509 d3.identifier = "preset";
|
matthiasm@3
|
510 d3.name = "preset";
|
matthiasm@3
|
511 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@3
|
512 d3.unit = "";
|
matthiasm@3
|
513 d3.isQuantized = true;
|
matthiasm@3
|
514 d3.quantizeStep = 1;
|
matthiasm@3
|
515 d3.minValue = 0.0;
|
matthiasm@4
|
516 d3.maxValue = 3.0;
|
matthiasm@3
|
517 d3.defaultValue = 0.0;
|
matthiasm@3
|
518 d3.valueNames.push_back("polyphonic pop");
|
matthiasm@3
|
519 d3.valueNames.push_back("polyphonic pop (fast)");
|
matthiasm@3
|
520 d3.valueNames.push_back("solo keyboard");
|
matthiasm@3
|
521 d3.valueNames.push_back("manual");
|
matthiasm@3
|
522 list.push_back(d3);
|
matthiasm@4
|
523
|
matthiasm@4
|
524 // ParameterDescriptor d0;
|
matthiasm@4
|
525 // d0.identifier = "notedict";
|
matthiasm@4
|
526 // d0.name = "note dictionary";
|
matthiasm@4
|
527 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
|
matthiasm@4
|
528 // d0.unit = "";
|
matthiasm@4
|
529 // d0.minValue = 0;
|
matthiasm@4
|
530 // d0.maxValue = 1;
|
matthiasm@4
|
531 // d0.defaultValue = 0;
|
matthiasm@4
|
532 // d0.isQuantized = true;
|
matthiasm@4
|
533 // d0.valueNames.push_back("s = 0.6");
|
matthiasm@4
|
534 // d0.valueNames.push_back("no NNLS");
|
matthiasm@4
|
535 // d0.quantizeStep = 1.0;
|
matthiasm@4
|
536 // list.push_back(d0);
|
matthiasm@4
|
537
|
matthiasm@4
|
538 ParameterDescriptor d1;
|
matthiasm@4
|
539 d1.identifier = "tuningmode";
|
matthiasm@4
|
540 d1.name = "tuning mode";
|
matthiasm@4
|
541 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
542 d1.unit = "";
|
matthiasm@4
|
543 d1.minValue = 0;
|
matthiasm@4
|
544 d1.maxValue = 1;
|
matthiasm@4
|
545 d1.defaultValue = 0;
|
matthiasm@4
|
546 d1.isQuantized = true;
|
matthiasm@4
|
547 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
548 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
549 d1.quantizeStep = 1.0;
|
matthiasm@4
|
550 list.push_back(d1);
|
matthiasm@4
|
551
|
matthiasm@4
|
552 // ParameterDescriptor d2;
|
matthiasm@4
|
553 // d2.identifier = "paling";
|
matthiasm@4
|
554 // d2.name = "spectral paling";
|
matthiasm@4
|
555 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@4
|
556 // d2.unit = "";
|
matthiasm@4
|
557 // d2.isQuantized = true;
|
matthiasm@4
|
558 // // d2.quantizeStep = 0.1;
|
matthiasm@4
|
559 // d2.minValue = 0.0;
|
matthiasm@4
|
560 // d2.maxValue = 1.0;
|
matthiasm@4
|
561 // d2.defaultValue = 1.0;
|
matthiasm@4
|
562 // d2.isQuantized = false;
|
matthiasm@4
|
563 // list.push_back(d2);
|
matthiasm@4
|
564
|
matthiasm@0
|
565 return list;
|
matthiasm@0
|
566 }
|
matthiasm@0
|
567
|
matthiasm@0
|
568 float
|
matthiasm@0
|
569 NNLSChroma::getParameter(string identifier) const
|
matthiasm@0
|
570 {
|
matthiasm@3
|
571 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@0
|
572 if (identifier == "notedict") {
|
matthiasm@0
|
573 return m_dictID;
|
matthiasm@0
|
574 }
|
matthiasm@0
|
575
|
matthiasm@0
|
576 if (identifier == "paling") {
|
matthiasm@0
|
577 return m_paling;
|
matthiasm@0
|
578 }
|
matthiasm@0
|
579
|
matthiasm@0
|
580 if (identifier == "tuningmode") {
|
matthiasm@0
|
581 if (m_tuneLocal) {
|
matthiasm@0
|
582 return 1.0;
|
matthiasm@0
|
583 } else {
|
matthiasm@0
|
584 return 0.0;
|
matthiasm@0
|
585 }
|
matthiasm@0
|
586 }
|
matthiasm@3
|
587 if (identifier == "preset") {
|
matthiasm@3
|
588 return m_preset;
|
matthiasm@3
|
589 }
|
matthiasm@0
|
590 return 0;
|
matthiasm@0
|
591
|
matthiasm@0
|
592 }
|
matthiasm@0
|
593
|
matthiasm@0
|
594 void
|
matthiasm@0
|
595 NNLSChroma::setParameter(string identifier, float value)
|
matthiasm@0
|
596 {
|
matthiasm@3
|
597 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@0
|
598 if (identifier == "notedict") {
|
matthiasm@0
|
599 m_dictID = (int) value;
|
matthiasm@0
|
600 }
|
matthiasm@0
|
601
|
matthiasm@0
|
602 if (identifier == "paling") {
|
matthiasm@0
|
603 m_paling = value;
|
matthiasm@0
|
604 }
|
matthiasm@0
|
605
|
matthiasm@0
|
606 if (identifier == "tuningmode") {
|
matthiasm@0
|
607 m_tuneLocal = (value > 0) ? true : false;
|
matthiasm@0
|
608 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
609 }
|
matthiasm@3
|
610 if (identifier == "preset") {
|
matthiasm@3
|
611 m_preset = value;
|
matthiasm@3
|
612 if (m_preset == 0.0) {
|
matthiasm@3
|
613 m_tuneLocal = false;
|
matthiasm@3
|
614 m_paling = 1.0;
|
matthiasm@3
|
615 m_dictID = 0.0;
|
matthiasm@3
|
616 }
|
matthiasm@3
|
617 if (m_preset == 1.0) {
|
matthiasm@3
|
618 m_tuneLocal = false;
|
matthiasm@3
|
619 m_paling = 1.0;
|
matthiasm@3
|
620 m_dictID = 1.0;
|
matthiasm@3
|
621 }
|
matthiasm@3
|
622 if (m_preset == 2.0) {
|
matthiasm@3
|
623 m_tuneLocal = false;
|
matthiasm@3
|
624 m_paling = 0.7;
|
matthiasm@3
|
625 m_dictID = 0.0;
|
matthiasm@3
|
626 }
|
matthiasm@3
|
627 }
|
matthiasm@0
|
628 }
|
matthiasm@0
|
629
|
matthiasm@0
|
630 NNLSChroma::ProgramList
|
matthiasm@0
|
631 NNLSChroma::getPrograms() const
|
matthiasm@0
|
632 {
|
matthiasm@0
|
633 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
634 ProgramList list;
|
matthiasm@0
|
635
|
matthiasm@0
|
636 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
637 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
638
|
matthiasm@0
|
639 return list;
|
matthiasm@0
|
640 }
|
matthiasm@0
|
641
|
matthiasm@0
|
642 string
|
matthiasm@0
|
643 NNLSChroma::getCurrentProgram() const
|
matthiasm@0
|
644 {
|
matthiasm@0
|
645 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
646 return ""; // no programs
|
matthiasm@0
|
647 }
|
matthiasm@0
|
648
|
matthiasm@0
|
649 void
|
matthiasm@0
|
650 NNLSChroma::selectProgram(string name)
|
matthiasm@0
|
651 {
|
matthiasm@0
|
652 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
653 }
|
matthiasm@0
|
654
|
matthiasm@0
|
655
|
matthiasm@0
|
656 NNLSChroma::OutputList
|
matthiasm@0
|
657 NNLSChroma::getOutputDescriptors() const
|
matthiasm@0
|
658 {
|
matthiasm@0
|
659 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
660 OutputList list;
|
matthiasm@0
|
661
|
matthiasm@0
|
662 // Make chroma names for the binNames property
|
matthiasm@0
|
663 vector<string> chromanames;
|
matthiasm@0
|
664 vector<string> bothchromanames;
|
matthiasm@0
|
665 for (int iNote = 0; iNote < 24; iNote++) {
|
matthiasm@0
|
666 bothchromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
667 if (iNote < 12) {
|
matthiasm@0
|
668 chromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
669 }
|
matthiasm@0
|
670 }
|
matthiasm@0
|
671
|
matthiasm@1
|
672 // int nNote = 84;
|
matthiasm@0
|
673
|
matthiasm@0
|
674 // See OutputDescriptor documentation for the possibilities here.
|
matthiasm@0
|
675 // Every plugin must have at least one output.
|
matthiasm@0
|
676
|
matthiasm@0
|
677 OutputDescriptor d0;
|
matthiasm@0
|
678 d0.identifier = "tuning";
|
matthiasm@0
|
679 d0.name = "Tuning";
|
matthiasm@0
|
680 d0.description = "The concert pitch.";
|
matthiasm@0
|
681 d0.unit = "Hz";
|
matthiasm@0
|
682 d0.hasFixedBinCount = true;
|
matthiasm@0
|
683 d0.binCount = 0;
|
matthiasm@0
|
684 d0.hasKnownExtents = true;
|
matthiasm@0
|
685 d0.minValue = 427.47;
|
matthiasm@0
|
686 d0.maxValue = 452.89;
|
matthiasm@0
|
687 d0.isQuantized = false;
|
matthiasm@0
|
688 d0.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
689 d0.hasDuration = false;
|
matthiasm@0
|
690 list.push_back(d0);
|
matthiasm@0
|
691
|
matthiasm@0
|
692 OutputDescriptor d1;
|
matthiasm@0
|
693 d1.identifier = "logfreqspec";
|
matthiasm@0
|
694 d1.name = "Log-Frequency Spectrum";
|
matthiasm@0
|
695 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
|
matthiasm@0
|
696 d1.unit = "";
|
matthiasm@0
|
697 d1.hasFixedBinCount = true;
|
matthiasm@0
|
698 d1.binCount = nNote;
|
matthiasm@0
|
699 d1.hasKnownExtents = false;
|
matthiasm@0
|
700 d1.isQuantized = false;
|
matthiasm@0
|
701 d1.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
702 d1.hasDuration = false;
|
matthiasm@0
|
703 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
704 list.push_back(d1);
|
matthiasm@0
|
705
|
matthiasm@0
|
706 OutputDescriptor d2;
|
matthiasm@0
|
707 d2.identifier = "tunedlogfreqspec";
|
matthiasm@0
|
708 d2.name = "Tuned Log-Frequency Spectrum";
|
matthiasm@0
|
709 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
|
matthiasm@0
|
710 d2.unit = "";
|
matthiasm@0
|
711 d2.hasFixedBinCount = true;
|
matthiasm@0
|
712 d2.binCount = 256;
|
matthiasm@0
|
713 d2.hasKnownExtents = false;
|
matthiasm@0
|
714 d2.isQuantized = false;
|
matthiasm@0
|
715 d2.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
716 d2.hasDuration = false;
|
matthiasm@0
|
717 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
718 list.push_back(d2);
|
matthiasm@0
|
719
|
matthiasm@0
|
720 OutputDescriptor d3;
|
matthiasm@0
|
721 d3.identifier = "semitonespectrum";
|
matthiasm@0
|
722 d3.name = "Semitone Spectrum";
|
matthiasm@0
|
723 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
|
matthiasm@0
|
724 d3.unit = "";
|
matthiasm@0
|
725 d3.hasFixedBinCount = true;
|
matthiasm@0
|
726 d3.binCount = 84;
|
matthiasm@0
|
727 d3.hasKnownExtents = false;
|
matthiasm@0
|
728 d3.isQuantized = false;
|
matthiasm@0
|
729 d3.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
730 d3.hasDuration = false;
|
matthiasm@0
|
731 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
732 list.push_back(d3);
|
matthiasm@0
|
733
|
matthiasm@0
|
734 OutputDescriptor d4;
|
matthiasm@0
|
735 d4.identifier = "chroma";
|
matthiasm@0
|
736 d4.name = "Chromagram";
|
matthiasm@0
|
737 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
|
matthiasm@0
|
738 d4.unit = "";
|
matthiasm@0
|
739 d4.hasFixedBinCount = true;
|
matthiasm@0
|
740 d4.binCount = 12;
|
matthiasm@0
|
741 d4.binNames = chromanames;
|
matthiasm@0
|
742 d4.hasKnownExtents = false;
|
matthiasm@0
|
743 d4.isQuantized = false;
|
matthiasm@0
|
744 d4.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
745 d4.hasDuration = false;
|
matthiasm@0
|
746 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
747 list.push_back(d4);
|
matthiasm@0
|
748
|
matthiasm@0
|
749 OutputDescriptor d5;
|
matthiasm@0
|
750 d5.identifier = "basschroma";
|
matthiasm@0
|
751 d5.name = "Bass Chromagram";
|
matthiasm@0
|
752 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
|
matthiasm@0
|
753 d5.unit = "";
|
matthiasm@0
|
754 d5.hasFixedBinCount = true;
|
matthiasm@0
|
755 d5.binCount = 12;
|
matthiasm@0
|
756 d5.binNames = chromanames;
|
matthiasm@0
|
757 d5.hasKnownExtents = false;
|
matthiasm@0
|
758 d5.isQuantized = false;
|
matthiasm@0
|
759 d5.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
760 d5.hasDuration = false;
|
matthiasm@0
|
761 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
762 list.push_back(d5);
|
matthiasm@0
|
763
|
matthiasm@0
|
764 OutputDescriptor d6;
|
matthiasm@0
|
765 d6.identifier = "bothchroma";
|
matthiasm@0
|
766 d6.name = "Chromagram and Bass Chromagram";
|
matthiasm@0
|
767 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
|
matthiasm@0
|
768 d6.unit = "";
|
matthiasm@0
|
769 d6.hasFixedBinCount = true;
|
matthiasm@0
|
770 d6.binCount = 24;
|
matthiasm@0
|
771 d6.binNames = bothchromanames;
|
matthiasm@0
|
772 d6.hasKnownExtents = false;
|
matthiasm@0
|
773 d6.isQuantized = false;
|
matthiasm@0
|
774 d6.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
775 d6.hasDuration = false;
|
matthiasm@0
|
776 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
777 list.push_back(d6);
|
matthiasm@0
|
778
|
matthiasm@0
|
779 OutputDescriptor d7;
|
matthiasm@0
|
780 d7.identifier = "simplechord";
|
matthiasm@0
|
781 d7.name = "Simple Chord Estimate";
|
matthiasm@0
|
782 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
|
matthiasm@0
|
783 d7.unit = "";
|
matthiasm@0
|
784 d7.hasFixedBinCount = true;
|
matthiasm@0
|
785 d7.binCount = 0;
|
matthiasm@0
|
786 d7.hasKnownExtents = false;
|
matthiasm@0
|
787 d7.isQuantized = false;
|
matthiasm@0
|
788 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
789 d7.hasDuration = false;
|
matthiasm@0
|
790 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
791 list.push_back(d7);
|
matthiasm@0
|
792
|
matthiasm@1
|
793 // OutputDescriptor d8;
|
matthiasm@1
|
794 // d8.identifier = "inconsistency";
|
matthiasm@1
|
795 // d8.name = "Harmonic inconsistency value";
|
matthiasm@1
|
796 // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high.";
|
matthiasm@1
|
797 // d8.unit = "";
|
matthiasm@1
|
798 // d8.hasFixedBinCount = true;
|
matthiasm@1
|
799 // d8.binCount = 1;
|
matthiasm@1
|
800 // d8.hasKnownExtents = false;
|
matthiasm@1
|
801 // d8.isQuantized = false;
|
matthiasm@1
|
802 // d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@1
|
803 // d8.hasDuration = false;
|
matthiasm@1
|
804 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
805 // list.push_back(d8);
|
matthiasm@1
|
806 //
|
matthiasm@1
|
807 // OutputDescriptor d9;
|
matthiasm@1
|
808 // d9.identifier = "inconsistencysegment";
|
matthiasm@1
|
809 // d9.name = "Harmonic inconsistency segmenter";
|
matthiasm@1
|
810 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
|
matthiasm@1
|
811 // d9.unit = "";
|
matthiasm@1
|
812 // d9.hasFixedBinCount = true;
|
matthiasm@1
|
813 // d9.binCount = 0;
|
matthiasm@1
|
814 // d9.hasKnownExtents = true;
|
matthiasm@1
|
815 // d9.minValue = 0.1;
|
matthiasm@1
|
816 // d9.maxValue = 0.9;
|
matthiasm@1
|
817 // d9.isQuantized = false;
|
matthiasm@1
|
818 // d9.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@1
|
819 // d9.hasDuration = false;
|
matthiasm@1
|
820 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
821 // list.push_back(d9);
|
matthiasm@1
|
822 //
|
matthiasm@1
|
823 OutputDescriptor d10;
|
matthiasm@1
|
824 d10.identifier = "localtuning";
|
matthiasm@1
|
825 d10.name = "Local tuning";
|
matthiasm@4
|
826 d10.description = "Tuning based on the history up to this timestamp.";
|
matthiasm@1
|
827 d10.unit = "Hz";
|
matthiasm@1
|
828 d10.hasFixedBinCount = true;
|
matthiasm@1
|
829 d10.binCount = 1;
|
matthiasm@1
|
830 d10.hasKnownExtents = true;
|
matthiasm@1
|
831 d10.minValue = 427.47;
|
matthiasm@1
|
832 d10.maxValue = 452.89;
|
matthiasm@1
|
833 d10.isQuantized = false;
|
matthiasm@3
|
834 d10.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@1
|
835 d10.hasDuration = false;
|
matthiasm@3
|
836 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
837 list.push_back(d10);
|
matthiasm@1
|
838
|
matthiasm@0
|
839 return list;
|
matthiasm@0
|
840 }
|
matthiasm@0
|
841
|
matthiasm@0
|
842
|
matthiasm@0
|
843 bool
|
matthiasm@0
|
844 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
845 {
|
matthiasm@1
|
846 if (debug_on) {
|
matthiasm@1
|
847 cerr << "--> initialise";
|
matthiasm@1
|
848 }
|
matthiasm@1
|
849
|
matthiasm@0
|
850 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
851 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
852 m_blockSize = blockSize;
|
matthiasm@0
|
853 m_stepSize = stepSize;
|
matthiasm@0
|
854 frameCount = 0;
|
matthiasm@0
|
855 int tempn = 256 * m_blockSize/2;
|
matthiasm@4
|
856 // cerr << "length of tempkernel : " << tempn << endl;
|
matthiasm@1
|
857 float *tempkernel;
|
matthiasm@1
|
858
|
matthiasm@1
|
859 tempkernel = new float[tempn];
|
matthiasm@1
|
860
|
matthiasm@0
|
861 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
matthiasm@1
|
862 m_kernelValue.clear();
|
matthiasm@1
|
863 m_kernelFftIndex.clear();
|
matthiasm@1
|
864 m_kernelNoteIndex.clear();
|
matthiasm@1
|
865 int countNonzero = 0;
|
matthiasm@0
|
866 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
matthiasm@1
|
867 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
matthiasm@1
|
868 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
869 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
matthiasm@0
|
870 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
871 countNonzero++;
|
matthiasm@0
|
872 }
|
matthiasm@1
|
873 m_kernelFftIndex.push_back(iFFT);
|
matthiasm@1
|
874 m_kernelNoteIndex.push_back(iNote);
|
matthiasm@0
|
875 }
|
matthiasm@0
|
876 }
|
matthiasm@1
|
877 }
|
matthiasm@4
|
878 // cerr << "nonzero count : " << countNonzero << endl;
|
matthiasm@1
|
879 delete [] tempkernel;
|
matthiasm@3
|
880 ofstream myfile;
|
matthiasm@3
|
881 myfile.open ("matrix.txt");
|
matthiasm@3
|
882 // myfile << "Writing this to a file.\n";
|
matthiasm@3
|
883 for (int i = 0; i < nNote * 84; ++i) {
|
matthiasm@3
|
884 myfile << m_dict[i] << endl;
|
matthiasm@3
|
885 }
|
matthiasm@3
|
886 myfile.close();
|
matthiasm@0
|
887 return true;
|
matthiasm@0
|
888 }
|
matthiasm@0
|
889
|
matthiasm@0
|
890 void
|
matthiasm@0
|
891 NNLSChroma::reset()
|
matthiasm@0
|
892 {
|
matthiasm@4
|
893 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
894
|
matthiasm@0
|
895 // Clear buffers, reset stored values, etc
|
matthiasm@4
|
896 frameCount = 0;
|
matthiasm@4
|
897 m_dictID = 0;
|
matthiasm@4
|
898 m_fl.clear();
|
matthiasm@4
|
899 m_meanTuning0 = 0;
|
matthiasm@4
|
900 m_meanTuning1 = 0;
|
matthiasm@4
|
901 m_meanTuning2 = 0;
|
matthiasm@4
|
902 m_localTuning0 = 0;
|
matthiasm@4
|
903 m_localTuning1 = 0;
|
matthiasm@4
|
904 m_localTuning2 = 0;
|
matthiasm@4
|
905 m_localTuning.clear();
|
matthiasm@0
|
906 }
|
matthiasm@0
|
907
|
matthiasm@0
|
908 NNLSChroma::FeatureSet
|
matthiasm@0
|
909 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
910 {
|
matthiasm@4
|
911 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
912 frameCount++;
|
matthiasm@0
|
913 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
914
|
matthiasm@0
|
915 Feature f10; // local tuning
|
matthiasm@3
|
916 f10.hasTimestamp = true;
|
matthiasm@4
|
917 f10.timestamp = timestamp;
|
matthiasm@0
|
918 const float *fbuf = inputBuffers[0];
|
matthiasm@0
|
919
|
matthiasm@0
|
920 // make magnitude
|
matthiasm@0
|
921 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
matthiasm@0
|
922 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
matthiasm@0
|
923 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
matthiasm@0
|
924 }
|
matthiasm@4
|
925
|
matthiasm@0
|
926 // note magnitude mapping using pre-calculated matrix
|
matthiasm@0
|
927 float *nm = new float[nNote]; // note magnitude
|
matthiasm@0
|
928 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
929 nm[iNote] = 0; // initialise as 0
|
matthiasm@0
|
930 }
|
matthiasm@0
|
931 int binCount = 0;
|
matthiasm@0
|
932 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
matthiasm@0
|
933 // cerr << ".";
|
matthiasm@1
|
934 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
matthiasm@1
|
935 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
matthiasm@0
|
936 binCount++;
|
matthiasm@0
|
937 }
|
matthiasm@1
|
938 // cerr << nm[20];
|
matthiasm@1
|
939 // cerr << endl;
|
matthiasm@0
|
940
|
matthiasm@0
|
941
|
matthiasm@0
|
942 float one_over_N = 1.0/frameCount;
|
matthiasm@0
|
943 // update means of complex tuning variables
|
matthiasm@0
|
944 m_meanTuning0 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
945 m_meanTuning1 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
946 m_meanTuning2 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
947
|
matthiasm@0
|
948 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
949 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
950 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
951 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
matthiasm@3
|
952 float ratioOld = 0.997;
|
matthiasm@3
|
953 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
|
matthiasm@3
|
954 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
|
matthiasm@3
|
955 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
|
matthiasm@0
|
956 }
|
matthiasm@0
|
957
|
matthiasm@0
|
958 // if (m_tuneLocal) {
|
matthiasm@0
|
959 // local tuning
|
matthiasm@0
|
960 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
matthiasm@0
|
961 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
matthiasm@0
|
962 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
matthiasm@0
|
963 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
964 float tuning440 = 440 * pow(2,normalisedtuning/12);
|
matthiasm@0
|
965 f10.values.push_back(tuning440);
|
matthiasm@3
|
966 // cerr << tuning440 << endl;
|
matthiasm@0
|
967 // }
|
matthiasm@0
|
968
|
matthiasm@0
|
969 Feature f1; // logfreqspec
|
matthiasm@0
|
970 f1.hasTimestamp = true;
|
matthiasm@0
|
971 f1.timestamp = timestamp;
|
matthiasm@0
|
972 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
973 f1.values.push_back(nm[iNote]);
|
matthiasm@0
|
974 }
|
matthiasm@0
|
975
|
matthiasm@0
|
976 FeatureSet fs;
|
matthiasm@0
|
977 fs[1].push_back(f1);
|
matthiasm@3
|
978 fs[8].push_back(f10);
|
matthiasm@0
|
979
|
matthiasm@0
|
980 // deletes
|
matthiasm@0
|
981 delete[] magnitude;
|
matthiasm@0
|
982 delete[] nm;
|
matthiasm@0
|
983
|
matthiasm@0
|
984 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
|
matthiasm@7
|
985 char * pPath;
|
matthiasm@7
|
986 pPath = getenv ("VAMP_PATH");
|
matthiasm@7
|
987
|
matthiasm@7
|
988
|
matthiasm@0
|
989 return fs;
|
matthiasm@0
|
990 }
|
matthiasm@0
|
991
|
matthiasm@0
|
992 NNLSChroma::FeatureSet
|
matthiasm@0
|
993 NNLSChroma::getRemainingFeatures()
|
matthiasm@0
|
994 {
|
matthiasm@4
|
995 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
matthiasm@4
|
996 FeatureSet fsOut;
|
matthiasm@4
|
997 if (m_fl.size() == 0) return fsOut;
|
matthiasm@9
|
998 int nChord = m_chordnames.size();
|
matthiasm@0
|
999 //
|
matthiasm@1
|
1000 /** Calculate Tuning
|
matthiasm@1
|
1001 calculate tuning from (using the angle of the complex number defined by the
|
matthiasm@1
|
1002 cumulative mean real and imag values)
|
matthiasm@1
|
1003 **/
|
matthiasm@1
|
1004 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
matthiasm@1
|
1005 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
matthiasm@1
|
1006 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
matthiasm@1
|
1007 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
matthiasm@1
|
1008 int intShift = floor(normalisedtuning * 3);
|
matthiasm@1
|
1009 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
1010
|
matthiasm@1
|
1011 char buffer0 [50];
|
matthiasm@1
|
1012
|
matthiasm@1
|
1013 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
1014
|
matthiasm@1
|
1015 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
1016
|
matthiasm@1
|
1017 // push tuning to FeatureSet fsOut
|
matthiasm@1
|
1018 Feature f0; // tuning
|
matthiasm@1
|
1019 f0.hasTimestamp = true;
|
matthiasm@1
|
1020 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
matthiasm@1
|
1021 f0.label = buffer0;
|
matthiasm@1
|
1022 fsOut[0].push_back(f0);
|
matthiasm@1
|
1023
|
matthiasm@1
|
1024 /** Tune Log-Frequency Spectrogram
|
matthiasm@1
|
1025 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
matthiasm@1
|
1026 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
matthiasm@1
|
1027 **/
|
matthiasm@1
|
1028
|
matthiasm@1
|
1029 float tempValue = 0;
|
matthiasm@1
|
1030 float dbThreshold = 0; // relative to the background spectrum
|
matthiasm@1
|
1031 float thresh = pow(10,dbThreshold/20);
|
matthiasm@1
|
1032 // cerr << "tune local ? " << m_tuneLocal << endl;
|
matthiasm@1
|
1033 int count = 0;
|
matthiasm@1
|
1034
|
matthiasm@1
|
1035 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
|
matthiasm@1
|
1036 Feature f1 = *i;
|
matthiasm@1
|
1037 Feature f2; // tuned log-frequency spectrum
|
matthiasm@1
|
1038 f2.hasTimestamp = true;
|
matthiasm@1
|
1039 f2.timestamp = f1.timestamp;
|
matthiasm@1
|
1040 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
1041
|
matthiasm@1
|
1042 if (m_tuneLocal) {
|
matthiasm@1
|
1043 intShift = floor(m_localTuning[count] * 3);
|
matthiasm@1
|
1044 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
1045 }
|
matthiasm@1
|
1046
|
matthiasm@1
|
1047 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
1048
|
matthiasm@4
|
1049 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
matthiasm@1
|
1050 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
matthiasm@1
|
1051 f2.values.push_back(tempValue);
|
matthiasm@1
|
1052 }
|
matthiasm@1
|
1053
|
matthiasm@1
|
1054 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
matthiasm@1
|
1055 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
matthiasm@1
|
1056 vector<float> runningstd;
|
matthiasm@1
|
1057 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
matthiasm@1
|
1058 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
matthiasm@1
|
1059 }
|
matthiasm@1
|
1060 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
matthiasm@1
|
1061 for (int i = 0; i < 256; i++) {
|
matthiasm@1
|
1062 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
matthiasm@1
|
1063 if (runningstd[i] > 0) {
|
matthiasm@1
|
1064 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
matthiasm@1
|
1065 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
1066 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
matthiasm@1
|
1067 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
1068 }
|
matthiasm@1
|
1069 if (f2.values[i] < 0) {
|
matthiasm@1
|
1070 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
matthiasm@1
|
1071 }
|
matthiasm@1
|
1072 }
|
matthiasm@1
|
1073 fsOut[2].push_back(f2);
|
matthiasm@1
|
1074 count++;
|
matthiasm@1
|
1075 }
|
matthiasm@1
|
1076
|
matthiasm@1
|
1077 /** Semitone spectrum and chromagrams
|
matthiasm@1
|
1078 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
matthiasm@1
|
1079 is inferred using a non-negative least squares algorithm.
|
matthiasm@1
|
1080 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
matthiasm@1
|
1081 bass and treble stacked onto each other).
|
matthiasm@1
|
1082 **/
|
matthiasm@1
|
1083 // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n);
|
matthiasm@1
|
1084
|
matthiasm@1
|
1085 vector<vector<float> > chordogram;
|
matthiasm@3
|
1086 vector<vector<int> > scoreChordogram;
|
matthiasm@1
|
1087 vector<float> oldchroma = vector<float>(12,0);
|
matthiasm@1
|
1088 vector<float> oldbasschroma = vector<float>(12,0);
|
matthiasm@1
|
1089 count = 0;
|
matthiasm@9
|
1090
|
matthiasm@1
|
1091 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
matthiasm@1
|
1092 Feature f2 = *it; // logfreq spectrum
|
matthiasm@1
|
1093 Feature f3; // semitone spectrum
|
matthiasm@1
|
1094 Feature f4; // treble chromagram
|
matthiasm@1
|
1095 Feature f5; // bass chromagram
|
matthiasm@1
|
1096 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
1097
|
matthiasm@1
|
1098 f3.hasTimestamp = true;
|
matthiasm@1
|
1099 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
1100
|
matthiasm@1
|
1101 f4.hasTimestamp = true;
|
matthiasm@1
|
1102 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
1103
|
matthiasm@1
|
1104 f5.hasTimestamp = true;
|
matthiasm@1
|
1105 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
1106
|
matthiasm@1
|
1107 f6.hasTimestamp = true;
|
matthiasm@1
|
1108 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
1109
|
matthiasm@3
|
1110 float b[256];
|
matthiasm@1
|
1111
|
matthiasm@1
|
1112 bool some_b_greater_zero = false;
|
matthiasm@3
|
1113 float sumb = 0;
|
matthiasm@1
|
1114 for (int i = 0; i < 256; i++) {
|
matthiasm@3
|
1115 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
matthiasm@3
|
1116 b[i] = f2.values[i];
|
matthiasm@3
|
1117 sumb += b[i];
|
matthiasm@1
|
1118 if (b[i] > 0) {
|
matthiasm@1
|
1119 some_b_greater_zero = true;
|
matthiasm@1
|
1120 }
|
matthiasm@1
|
1121 }
|
matthiasm@1
|
1122
|
matthiasm@1
|
1123 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
1124
|
matthiasm@1
|
1125 vector<float> chroma = vector<float>(12, 0);
|
matthiasm@1
|
1126 vector<float> basschroma = vector<float>(12, 0);
|
matthiasm@1
|
1127 float currval;
|
matthiasm@1
|
1128 unsigned iSemitone = 0;
|
matthiasm@1
|
1129
|
matthiasm@1
|
1130 if (some_b_greater_zero) {
|
matthiasm@3
|
1131 if (m_dictID == 1) {
|
matthiasm@1
|
1132 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
matthiasm@1
|
1133 currval = 0;
|
matthiasm@3
|
1134 currval += b[iNote + 1 + -1] * 0.5;
|
matthiasm@3
|
1135 currval += b[iNote + 1 + 0] * 1.0;
|
matthiasm@3
|
1136 currval += b[iNote + 1 + 1] * 0.5;
|
matthiasm@1
|
1137 f3.values.push_back(currval);
|
matthiasm@1
|
1138 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
matthiasm@1
|
1139 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
matthiasm@1
|
1140 iSemitone++;
|
matthiasm@1
|
1141 }
|
matthiasm@1
|
1142
|
matthiasm@1
|
1143 } else {
|
matthiasm@3
|
1144 float x[84+1000];
|
matthiasm@3
|
1145 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
matthiasm@3
|
1146 // for (int i = 0; i < 84; ++i) {
|
matthiasm@3
|
1147 // x[i] = b[3*i+3];
|
matthiasm@3
|
1148 // }
|
matthiasm@3
|
1149 float rnorm;
|
matthiasm@3
|
1150 float w[84+1000];
|
matthiasm@3
|
1151 float zz[84+1000];
|
matthiasm@3
|
1152 int indx[84+1000];
|
matthiasm@1
|
1153 int mode;
|
matthiasm@3
|
1154 float curr_dict[256*84];
|
matthiasm@3
|
1155 for (unsigned i = 0; i < 256 * 84; ++i) {
|
matthiasm@3
|
1156 curr_dict[i] = 1.0 * m_dict[i];
|
matthiasm@3
|
1157 }
|
matthiasm@3
|
1158 nnls(curr_dict, nNote, nNote, 84, b, x, &rnorm, w, zz, indx, &mode);
|
matthiasm@3
|
1159 for (unsigned iNote = 0; iNote < 84; ++iNote) {
|
matthiasm@3
|
1160 // for (unsigned kNote = 0; kNote < 256; ++kNote) {
|
matthiasm@3
|
1161 // x[iNote] += m_dict[kNote + nNote * iNote] * b[kNote];
|
matthiasm@3
|
1162 // }
|
matthiasm@3
|
1163 f3.values.push_back(x[iNote]);
|
matthiasm@3
|
1164 // cerr << mode << endl;
|
matthiasm@3
|
1165 chroma[iNote % 12] += x[iNote] * treblewindow[iNote];
|
matthiasm@3
|
1166 basschroma[iNote % 12] += x[iNote] * basswindow[iNote];
|
matthiasm@3
|
1167 // iSemitone++;
|
matthiasm@3
|
1168 }
|
matthiasm@1
|
1169 }
|
matthiasm@1
|
1170 }
|
matthiasm@1
|
1171
|
matthiasm@1
|
1172 f4.values = chroma;
|
matthiasm@1
|
1173 f5.values = basschroma;
|
matthiasm@1
|
1174 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@1
|
1175 f6.values = chroma;
|
matthiasm@1
|
1176
|
matthiasm@1
|
1177 // local chord estimation
|
matthiasm@1
|
1178 vector<float> currentChordSalience;
|
matthiasm@1
|
1179 float tempchordvalue = 0;
|
matthiasm@1
|
1180 float sumchordvalue = 0;
|
matthiasm@9
|
1181
|
matthiasm@1
|
1182 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@1
|
1183 tempchordvalue = 0;
|
matthiasm@1
|
1184 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@9
|
1185 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
1186 }
|
matthiasm@1
|
1187 for (int iBin = 12; iBin < 24; iBin++) {
|
matthiasm@9
|
1188 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
1189 }
|
matthiasm@1
|
1190 sumchordvalue+=tempchordvalue;
|
matthiasm@1
|
1191 currentChordSalience.push_back(tempchordvalue);
|
matthiasm@1
|
1192 }
|
matthiasm@1
|
1193 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@1
|
1194 currentChordSalience[iChord] /= sumchordvalue;
|
matthiasm@1
|
1195 }
|
matthiasm@1
|
1196 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
1197
|
matthiasm@1
|
1198 fsOut[3].push_back(f3);
|
matthiasm@1
|
1199 fsOut[4].push_back(f4);
|
matthiasm@1
|
1200 fsOut[5].push_back(f5);
|
matthiasm@1
|
1201 fsOut[6].push_back(f6);
|
matthiasm@1
|
1202 count++;
|
matthiasm@1
|
1203 }
|
matthiasm@0
|
1204 // int musicitykernelwidth = (50 * 2048) / m_stepSize;
|
matthiasm@0
|
1205 //
|
matthiasm@3
|
1206 /* Simple chord estimation
|
matthiasm@3
|
1207 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@3
|
1208 take the maximum. Very simple, don't do this at home...
|
matthiasm@3
|
1209 */
|
matthiasm@3
|
1210 count = 0;
|
matthiasm@3
|
1211 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@3
|
1212 vector<int> chordSequence;
|
matthiasm@3
|
1213 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
matthiasm@3
|
1214 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@3
|
1215 scoreChordogram.push_back(temp);
|
matthiasm@3
|
1216 }
|
matthiasm@4
|
1217 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
matthiasm@3
|
1218 int startIndex = count + 1;
|
matthiasm@3
|
1219 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@3
|
1220 vector<float> temp = vector<float>(nChord,0);
|
matthiasm@3
|
1221 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@4
|
1222 float maxindex = 0; //... and the index thereof
|
matthiasm@3
|
1223 unsigned bestchordL = 0; // index of the best "left" chord
|
matthiasm@3
|
1224 unsigned bestchordR = 0; // index of the best "right" chord
|
matthiasm@4
|
1225 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@3
|
1226 // now find the max values on both sides of iWF
|
matthiasm@3
|
1227 // left side:
|
matthiasm@3
|
1228 float maxL = 0;
|
matthiasm@3
|
1229 unsigned maxindL = nChord-1;
|
matthiasm@3
|
1230 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@3
|
1231 float currsum = 0;
|
matthiasm@3
|
1232 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@3
|
1233 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@3
|
1234 }
|
matthiasm@3
|
1235 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@3
|
1236 if (currsum > maxL) {
|
matthiasm@3
|
1237 maxL = currsum;
|
matthiasm@3
|
1238 maxindL = iChord;
|
matthiasm@3
|
1239 }
|
matthiasm@3
|
1240 }
|
matthiasm@3
|
1241 // right side:
|
matthiasm@3
|
1242 float maxR = 0;
|
matthiasm@3
|
1243 unsigned maxindR = nChord-1;
|
matthiasm@3
|
1244 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@3
|
1245 float currsum = 0;
|
matthiasm@3
|
1246 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@3
|
1247 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@3
|
1248 }
|
matthiasm@3
|
1249 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@3
|
1250 if (currsum > maxR) {
|
matthiasm@3
|
1251 maxR = currsum;
|
matthiasm@3
|
1252 maxindR = iChord;
|
matthiasm@3
|
1253 }
|
matthiasm@3
|
1254 }
|
matthiasm@3
|
1255 if (maxL+maxR > maxval) {
|
matthiasm@3
|
1256 maxval = maxL+maxR;
|
matthiasm@3
|
1257 maxindex = iWF;
|
matthiasm@3
|
1258 bestchordL = maxindL;
|
matthiasm@3
|
1259 bestchordR = maxindR;
|
matthiasm@3
|
1260 }
|
matthiasm@3
|
1261
|
matthiasm@3
|
1262 }
|
matthiasm@3
|
1263 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@3
|
1264 // add a score to every chord-frame-point that was part of a maximum
|
matthiasm@3
|
1265 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@3
|
1266 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@3
|
1267 }
|
matthiasm@3
|
1268 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@3
|
1269 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@3
|
1270 }
|
matthiasm@3
|
1271 count++;
|
matthiasm@3
|
1272 }
|
matthiasm@3
|
1273
|
matthiasm@3
|
1274 count = 0;
|
matthiasm@3
|
1275 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@3
|
1276 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@3
|
1277 float maxindex = 0; //... and the index thereof
|
matthiasm@3
|
1278 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@3
|
1279 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@3
|
1280 maxval = scoreChordogram[count][iChord];
|
matthiasm@3
|
1281 maxindex = iChord;
|
matthiasm@4
|
1282 // cerr << iChord << endl;
|
matthiasm@3
|
1283 }
|
matthiasm@3
|
1284 }
|
matthiasm@3
|
1285 chordSequence.push_back(maxindex);
|
matthiasm@4
|
1286 // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
matthiasm@3
|
1287 count++;
|
matthiasm@3
|
1288 }
|
matthiasm@3
|
1289
|
matthiasm@3
|
1290
|
matthiasm@3
|
1291 // mode filter on chordSequence
|
matthiasm@3
|
1292 count = 0;
|
matthiasm@3
|
1293 int oldChordIndex = -1;
|
matthiasm@3
|
1294 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@3
|
1295 Feature f6 = *it;
|
matthiasm@3
|
1296 Feature f7; // chord estimate
|
matthiasm@3
|
1297 f7.hasTimestamp = true;
|
matthiasm@3
|
1298 f7.timestamp = f6.timestamp;
|
matthiasm@3
|
1299 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@3
|
1300 int maxChordCount = 0;
|
matthiasm@3
|
1301 int maxChordIndex = nChord-1;
|
matthiasm@4
|
1302 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@4
|
1303 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@4
|
1304 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@4
|
1305 chordCount[chordSequence[i]]++;
|
matthiasm@4
|
1306 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@7
|
1307 // cerr << "start index " << startIndex << endl;
|
matthiasm@4
|
1308 maxChordCount++;
|
matthiasm@4
|
1309 maxChordIndex = chordSequence[i];
|
matthiasm@4
|
1310 }
|
matthiasm@4
|
1311 }
|
matthiasm@4
|
1312 // chordSequence[count] = maxChordIndex;
|
matthiasm@7
|
1313 // cerr << maxChordIndex << endl;
|
matthiasm@3
|
1314 if (oldChordIndex != maxChordIndex) {
|
matthiasm@3
|
1315 oldChordIndex = maxChordIndex;
|
matthiasm@3
|
1316
|
matthiasm@9
|
1317 // char buffer1 [50];
|
matthiasm@9
|
1318 // if (maxChordIndex < nChord - 1) {
|
matthiasm@9
|
1319 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
matthiasm@9
|
1320 // } else {
|
matthiasm@9
|
1321 // sprintf(buffer1, "N");
|
matthiasm@9
|
1322 // }
|
matthiasm@9
|
1323 // f7.label = buffer1;
|
matthiasm@9
|
1324 f7.label = m_chordnames[maxChordIndex];
|
matthiasm@3
|
1325 fsOut[7].push_back(f7);
|
matthiasm@3
|
1326 }
|
matthiasm@3
|
1327 count++;
|
matthiasm@3
|
1328 }
|
matthiasm@0
|
1329 // // musicity
|
matthiasm@0
|
1330 // count = 0;
|
matthiasm@0
|
1331 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
matthiasm@0
|
1332 // vector<float> musicityValue;
|
matthiasm@0
|
1333 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1334 // Feature f4 = *it;
|
matthiasm@0
|
1335 //
|
matthiasm@0
|
1336 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1337 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1338 // float chromasum = 0;
|
matthiasm@0
|
1339 // float diffsum = 0;
|
matthiasm@0
|
1340 // for (int k = 0; k < 12; k++) {
|
matthiasm@0
|
1341 // for (int i = startIndex + 1; i < endIndex; i++) {
|
matthiasm@0
|
1342 // chromasum += pow(fsOut[4][i].values[k],2);
|
matthiasm@0
|
1343 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
matthiasm@0
|
1344 // }
|
matthiasm@0
|
1345 // }
|
matthiasm@0
|
1346 // diffsum /= chromasum;
|
matthiasm@0
|
1347 // musicityValue.push_back(diffsum);
|
matthiasm@0
|
1348 // count++;
|
matthiasm@0
|
1349 // }
|
matthiasm@0
|
1350 //
|
matthiasm@0
|
1351 // float musicityThreshold = 0.44;
|
matthiasm@0
|
1352 // if (m_stepSize == 4096) {
|
matthiasm@0
|
1353 // musicityThreshold = 0.74;
|
matthiasm@0
|
1354 // }
|
matthiasm@0
|
1355 // if (m_stepSize == 4410) {
|
matthiasm@0
|
1356 // musicityThreshold = 0.77;
|
matthiasm@0
|
1357 // }
|
matthiasm@0
|
1358 //
|
matthiasm@0
|
1359 // count = 0;
|
matthiasm@0
|
1360 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1361 // Feature f4 = *it;
|
matthiasm@0
|
1362 // Feature f8; // musicity
|
matthiasm@0
|
1363 // Feature f9; // musicity segmenter
|
matthiasm@0
|
1364 //
|
matthiasm@0
|
1365 // f8.hasTimestamp = true;
|
matthiasm@0
|
1366 // f8.timestamp = f4.timestamp;
|
matthiasm@0
|
1367 // f9.hasTimestamp = true;
|
matthiasm@0
|
1368 // f9.timestamp = f4.timestamp;
|
matthiasm@0
|
1369 //
|
matthiasm@0
|
1370 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1371 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1372 // int musicityCount = 0;
|
matthiasm@0
|
1373 // for (int i = startIndex; i <= endIndex; i++) {
|
matthiasm@0
|
1374 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
matthiasm@0
|
1375 // }
|
matthiasm@0
|
1376 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
matthiasm@0
|
1377 //
|
matthiasm@0
|
1378 // if (isSpeech) {
|
matthiasm@0
|
1379 // if (oldlabeltype != 2) {
|
matthiasm@0
|
1380 // f9.label = "Speech";
|
matthiasm@0
|
1381 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1382 // oldlabeltype = 2;
|
matthiasm@0
|
1383 // }
|
matthiasm@0
|
1384 // } else {
|
matthiasm@0
|
1385 // if (oldlabeltype != 1) {
|
matthiasm@0
|
1386 // f9.label = "Music";
|
matthiasm@0
|
1387 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1388 // oldlabeltype = 1;
|
matthiasm@0
|
1389 // }
|
matthiasm@0
|
1390 // }
|
matthiasm@0
|
1391 // f8.values.push_back(musicityValue[count]);
|
matthiasm@0
|
1392 // fsOut[8].push_back(f8);
|
matthiasm@0
|
1393 // count++;
|
matthiasm@0
|
1394 // }
|
matthiasm@0
|
1395 return fsOut;
|
matthiasm@0
|
1396
|
matthiasm@0
|
1397 }
|
matthiasm@0
|
1398
|