matthiasm@0
|
1
|
matthiasm@0
|
2 #include "NNLSChroma.h"
|
matthiasm@0
|
3 #include <cmath>
|
matthiasm@10
|
4 // #include <omp.h>
|
matthiasm@0
|
5 #include <list>
|
matthiasm@0
|
6 #include <iostream>
|
matthiasm@3
|
7 #include <fstream>
|
matthiasm@0
|
8 #include <sstream>
|
matthiasm@0
|
9 #include <cassert>
|
matthiasm@7
|
10 #include <cstdlib>
|
matthiasm@0
|
11 #include <cstdio>
|
matthiasm@7
|
12 #include <boost/tokenizer.hpp>
|
matthiasm@7
|
13 #include <boost/iostreams/device/file.hpp>
|
matthiasm@7
|
14 #include <boost/iostreams/stream.hpp>
|
matthiasm@7
|
15 #include <boost/lexical_cast.hpp>
|
matthiasm@1
|
16 #include "nnls.h"
|
matthiasm@0
|
17 #include "chorddict.cpp"
|
matthiasm@9
|
18
|
matthiasm@10
|
19 // #include <omp.h>
|
matthiasm@10
|
20 // #define N 1000
|
matthiasm@10
|
21 // #define CHUNKSIZE 100
|
matthiasm@9
|
22
|
matthiasm@9
|
23
|
matthiasm@0
|
24 using namespace std;
|
matthiasm@7
|
25 using namespace boost;
|
matthiasm@0
|
26
|
matthiasm@0
|
27 const float sinvalue = 0.866025404;
|
matthiasm@0
|
28 const float cosvalue = -0.5;
|
matthiasm@0
|
29 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
|
matthiasm@0
|
30 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
|
matthiasm@0
|
31 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
|
matthiasm@0
|
32 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
|
matthiasm@0
|
33 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
|
matthiasm@7
|
34
|
matthiasm@7
|
35 const char* bassnames[12][12] ={
|
matthiasm@7
|
36 {"A","","B","C","C#","D","","E","","F#","G","G#"},
|
matthiasm@7
|
37 {"Bb","","C","Db","D","Eb","","F","","G","Ab","A"},
|
matthiasm@7
|
38 {"B","","C#","D","D#","E","","F#","","G#","A","A#"},
|
matthiasm@7
|
39 {"C","","D","Eb","E","F","","G","","A","Bb","B"},
|
matthiasm@7
|
40 {"C#","","D#","E","E#","F#","","G#","","A#","B","B#"},
|
matthiasm@7
|
41 {"D","","E","F","F#","G","","A","","B","C","C#"},
|
matthiasm@7
|
42 {"Eb","","F","Gb","G","Ab","","Bb","","C","Db","D"},
|
matthiasm@7
|
43 {"E","","F#","G","G#","A","","B","","C#","D","D#"},
|
matthiasm@7
|
44 {"F","","G","Ab","A","Bb","","C","","D","Eb","E"},
|
matthiasm@7
|
45 {"F#","","G#","A","A#","B","","C#","","D#","E","E#"},
|
matthiasm@7
|
46 {"G","","A","Bb","B","C","","D","","E","F","F#"},
|
matthiasm@7
|
47 {"Ab","","Bb","Cb","C","Db","","Eb","","F","Gb","G"}
|
matthiasm@7
|
48 };
|
matthiasm@0
|
49 const vector<float> hw(hammingwind, hammingwind+19);
|
matthiasm@0
|
50 const int nNote = 256;
|
matthiasm@0
|
51
|
matthiasm@0
|
52 /** Special Convolution
|
matthiasm@0
|
53 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
|
matthiasm@0
|
54 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
|
matthiasm@0
|
55 as the first (last) valid convolution bin.
|
matthiasm@0
|
56 **/
|
matthiasm@0
|
57
|
matthiasm@0
|
58 const bool debug_on = false;
|
matthiasm@0
|
59
|
matthiasm@0
|
60 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
|
matthiasm@0
|
61 {
|
matthiasm@0
|
62 float s;
|
matthiasm@0
|
63 int m, n;
|
matthiasm@0
|
64 int lenConvolvee = convolvee.size();
|
matthiasm@0
|
65 int lenKernel = kernel.size();
|
matthiasm@0
|
66
|
matthiasm@0
|
67 vector<float> Z(256,0);
|
matthiasm@0
|
68 assert(lenKernel % 2 != 0); // no exception handling !!!
|
matthiasm@0
|
69
|
matthiasm@0
|
70 for (n = lenKernel - 1; n < lenConvolvee; n++) {
|
matthiasm@0
|
71 s=0.0;
|
matthiasm@0
|
72 for (m = 0; m < lenKernel; m++) {
|
matthiasm@0
|
73 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
|
matthiasm@0
|
74 s += convolvee[n-m] * kernel[m];
|
matthiasm@0
|
75 // if (debug_on) cerr << "--> s = " << s << '\n';
|
matthiasm@0
|
76 }
|
matthiasm@0
|
77 // cerr << n - lenKernel/2 << endl;
|
matthiasm@0
|
78 Z[n -lenKernel/2] = s;
|
matthiasm@0
|
79 }
|
matthiasm@0
|
80
|
matthiasm@0
|
81 // fill upper and lower pads
|
matthiasm@0
|
82 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
|
matthiasm@0
|
83 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
|
matthiasm@0
|
84 Z[lenConvolvee - lenKernel/2 - 1];
|
matthiasm@0
|
85 return Z;
|
matthiasm@0
|
86 }
|
matthiasm@0
|
87
|
matthiasm@0
|
88 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
|
matthiasm@0
|
89 // {
|
matthiasm@0
|
90 // vector<float> freq(binnumbers.size, 0.0);
|
matthiasm@0
|
91 // for (unsigned i = 0; i < binnumbers.size; ++i) {
|
matthiasm@0
|
92 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
|
matthiasm@0
|
93 // }
|
matthiasm@0
|
94 // return freq;
|
matthiasm@0
|
95 // }
|
matthiasm@0
|
96
|
matthiasm@0
|
97 float cospuls(float x, float centre, float width)
|
matthiasm@0
|
98 {
|
matthiasm@0
|
99 float recipwidth = 1.0/width;
|
matthiasm@0
|
100 if (abs(x - centre) <= 0.5 * width) {
|
matthiasm@0
|
101 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
|
matthiasm@0
|
102 }
|
matthiasm@0
|
103 return 0.0;
|
matthiasm@0
|
104 }
|
matthiasm@0
|
105
|
matthiasm@0
|
106 float pitchCospuls(float x, float centre, int binsperoctave)
|
matthiasm@0
|
107 {
|
matthiasm@0
|
108 float warpedf = -binsperoctave * (log2(centre) - log2(x));
|
matthiasm@0
|
109 float out = cospuls(warpedf, 0.0, 2.0);
|
matthiasm@0
|
110 // now scale to correct for note density
|
matthiasm@0
|
111 float c = log(2.0)/binsperoctave;
|
matthiasm@0
|
112 if (x > 0) {
|
matthiasm@0
|
113 out = out / (c * x);
|
matthiasm@0
|
114 } else {
|
matthiasm@0
|
115 out = 0;
|
matthiasm@0
|
116 }
|
matthiasm@0
|
117 return out;
|
matthiasm@0
|
118 }
|
matthiasm@0
|
119
|
matthiasm@0
|
120 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
|
matthiasm@0
|
121
|
matthiasm@0
|
122 int binspersemitone = 3; // this must be 3
|
matthiasm@0
|
123 int minoctave = 0; // this must be 0
|
matthiasm@0
|
124 int maxoctave = 7; // this must be 7
|
matthiasm@1
|
125 int oversampling = 80;
|
matthiasm@0
|
126
|
matthiasm@0
|
127 // linear frequency vector
|
matthiasm@0
|
128 vector<float> fft_f;
|
matthiasm@0
|
129 for (int i = 0; i < blocksize/2; ++i) {
|
matthiasm@0
|
130 fft_f.push_back(i * (fs * 1.0 / blocksize));
|
matthiasm@0
|
131 }
|
matthiasm@0
|
132 float fft_width = fs * 2.0 / blocksize;
|
matthiasm@0
|
133
|
matthiasm@0
|
134 // linear oversampled frequency vector
|
matthiasm@0
|
135 vector<float> oversampled_f;
|
matthiasm@0
|
136 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
|
matthiasm@0
|
137 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
|
matthiasm@0
|
138 }
|
matthiasm@0
|
139
|
matthiasm@0
|
140 // pitch-spaced frequency vector
|
matthiasm@0
|
141 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@0
|
142 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@0
|
143 vector<float> cq_f;
|
matthiasm@0
|
144 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@0
|
145 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@0
|
146 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@0
|
147 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@0
|
148 for (int k = -1; k < 2; ++k) {
|
matthiasm@0
|
149 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@0
|
150 }
|
matthiasm@0
|
151 }
|
matthiasm@0
|
152 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@0
|
153 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@0
|
154
|
matthiasm@0
|
155 int nFFT = fft_f.size();
|
matthiasm@0
|
156
|
matthiasm@0
|
157 vector<float> fft_activation;
|
matthiasm@0
|
158 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
|
matthiasm@0
|
159 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
|
matthiasm@0
|
160 fft_activation.push_back(cosp);
|
matthiasm@0
|
161 // cerr << cosp << endl;
|
matthiasm@0
|
162 }
|
matthiasm@0
|
163
|
matthiasm@0
|
164 float cq_activation;
|
matthiasm@0
|
165 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
|
matthiasm@0
|
166 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
|
matthiasm@0
|
167 int curr_start = oversampling * iFFT - oversampling;
|
matthiasm@0
|
168 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
|
matthiasm@0
|
169 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
|
matthiasm@0
|
170 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
|
matthiasm@0
|
171 outmatrix[iFFT + nFFT * iCQ] = 0;
|
matthiasm@1
|
172 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
|
matthiasm@0
|
173 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
|
matthiasm@0
|
174 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
|
matthiasm@0
|
175 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
|
matthiasm@0
|
176 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
|
matthiasm@0
|
177 }
|
matthiasm@0
|
178 // if (iCQ == 1 || iCQ == 2) {
|
matthiasm@0
|
179 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
|
matthiasm@0
|
180 // }
|
matthiasm@0
|
181 }
|
matthiasm@0
|
182 }
|
matthiasm@0
|
183 }
|
matthiasm@0
|
184 return true;
|
matthiasm@0
|
185 }
|
matthiasm@0
|
186
|
matthiasm@3
|
187 bool dictionaryMatrix(float* dm) {
|
matthiasm@1
|
188 int binspersemitone = 3; // this must be 3
|
matthiasm@1
|
189 int minoctave = 0; // this must be 0
|
matthiasm@1
|
190 int maxoctave = 7; // this must be 7
|
matthiasm@4
|
191 float s_param = 0.7;
|
matthiasm@1
|
192
|
matthiasm@1
|
193 // pitch-spaced frequency vector
|
matthiasm@1
|
194 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
|
matthiasm@1
|
195 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
|
matthiasm@1
|
196 vector<float> cq_f;
|
matthiasm@1
|
197 float oob = 1.0/binspersemitone; // one over binspersemitone
|
matthiasm@1
|
198 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
|
matthiasm@1
|
199 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
|
matthiasm@1
|
200 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
|
matthiasm@1
|
201 for (int k = -1; k < 2; ++k) {
|
matthiasm@1
|
202 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
|
matthiasm@1
|
203 }
|
matthiasm@1
|
204 }
|
matthiasm@1
|
205 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
|
matthiasm@1
|
206 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
|
matthiasm@1
|
207
|
matthiasm@1
|
208 float curr_f;
|
matthiasm@1
|
209 float floatbin;
|
matthiasm@1
|
210 float curr_amp;
|
matthiasm@1
|
211 // now for every combination calculate the matrix element
|
matthiasm@1
|
212 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
|
matthiasm@3
|
213 // cerr << iOut << endl;
|
matthiasm@1
|
214 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
|
matthiasm@1
|
215 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
|
matthiasm@3
|
216 // if (curr_f > cq_f[nNote-1]) break;
|
matthiasm@3
|
217 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
|
matthiasm@3
|
218 // cerr << floatbin << endl;
|
matthiasm@1
|
219 curr_amp = pow(s_param,float(iHarm-1));
|
matthiasm@3
|
220 // cerr << "curramp" << curr_amp << endl;
|
matthiasm@1
|
221 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
|
matthiasm@3
|
222 if (abs(iNote+1.0-floatbin)<2) {
|
matthiasm@3
|
223 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
|
matthiasm@3
|
224 // dm[iNote + nNote * iOut] += 1 * curr_amp;
|
matthiasm@3
|
225 }
|
matthiasm@1
|
226 }
|
matthiasm@3
|
227 }
|
matthiasm@1
|
228 }
|
matthiasm@3
|
229
|
matthiasm@3
|
230
|
matthiasm@1
|
231 }
|
matthiasm@1
|
232
|
matthiasm@7
|
233 string get_env_var( std::string const & key ) {
|
matthiasm@7
|
234 char * val;
|
matthiasm@7
|
235 val = getenv( key.c_str() );
|
matthiasm@7
|
236 string retval;
|
matthiasm@7
|
237 if (val != NULL) {
|
matthiasm@7
|
238 retval = val;
|
matthiasm@7
|
239 }
|
matthiasm@7
|
240 return retval;
|
matthiasm@7
|
241 }
|
matthiasm@7
|
242
|
matthiasm@7
|
243
|
matthiasm@9
|
244 vector<string> chordDictionary(vector<float> *mchorddict) {
|
matthiasm@7
|
245 // ifstream chordDictFile;
|
matthiasm@7
|
246 string chordDictFilename(get_env_var("VAMP_PATH")+"/chord.dict");
|
matthiasm@7
|
247 // string instring[] = ",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\nm,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0\n6,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0\n7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0\nmaj7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1\nmin7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0\n,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\n,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\ndim,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0\naug,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0\n";
|
matthiasm@7
|
248 typedef tokenizer<char_separator<char> > Tok;
|
matthiasm@7
|
249 // char_separator<char> sep; // default constructed
|
matthiasm@7
|
250 char_separator<char> sep(",; ",":");
|
matthiasm@7
|
251 iostreams::stream<iostreams::file_source> chordDictFile(chordDictFilename.c_str());
|
matthiasm@7
|
252 string line;
|
matthiasm@7
|
253 int iElement = 0;
|
matthiasm@7
|
254 int nChord = 0;
|
matthiasm@7
|
255
|
matthiasm@7
|
256 vector<string> loadedChordNames;
|
matthiasm@7
|
257 vector<float> loadedChordDict;
|
matthiasm@7
|
258 if (chordDictFile.is_open()) {
|
matthiasm@7
|
259 while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file
|
matthiasm@7
|
260 // first, get the chord definition
|
matthiasm@7
|
261 string chordType;
|
matthiasm@7
|
262 vector<float> tempPCVector;
|
matthiasm@7
|
263 // cerr << line << endl;
|
matthiasm@7
|
264 if (!line.empty() && line.substr(0,1) != "#") {
|
matthiasm@7
|
265 Tok tok(line, sep);
|
matthiasm@7
|
266 for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements
|
matthiasm@7
|
267 string tempString = *tok_iter;
|
matthiasm@7
|
268 // cerr << tempString << endl;
|
matthiasm@7
|
269 if (tok_iter == tok.begin()) { // either the chord name or a colon
|
matthiasm@7
|
270 if (tempString == ":") {
|
matthiasm@7
|
271 chordType = "";
|
matthiasm@7
|
272 } else {
|
matthiasm@7
|
273 chordType = tempString;
|
matthiasm@7
|
274 tok_iter++; // is this cheating ? :)
|
matthiasm@7
|
275 }
|
matthiasm@7
|
276 } else {
|
matthiasm@7
|
277 tempPCVector.push_back(lexical_cast<float>(*tok_iter));
|
matthiasm@7
|
278 }
|
matthiasm@7
|
279 }
|
matthiasm@7
|
280
|
matthiasm@7
|
281 // now make all 12 chords of every type
|
matthiasm@7
|
282 for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) {
|
matthiasm@7
|
283 // add bass slash notation
|
matthiasm@7
|
284 string slashNotation = "";
|
matthiasm@7
|
285 for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) {
|
matthiasm@7
|
286 if (tempPCVector[(kSemitone) % 12] > 0.99) {
|
matthiasm@7
|
287 slashNotation = bassnames[iSemitone][kSemitone];
|
matthiasm@7
|
288 }
|
matthiasm@7
|
289 }
|
matthiasm@7
|
290 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes
|
matthiasm@9
|
291 // cerr << ((kSemitone - iSemitone + 12) % 12) << endl;
|
matthiasm@9
|
292 float bassValue = 0;
|
matthiasm@9
|
293 if (tempPCVector[(kSemitone - iSemitone + 12) % 12]==1) {
|
matthiasm@9
|
294 bassValue = 1;
|
matthiasm@9
|
295 } else {
|
matthiasm@10
|
296 if (tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12] == 1) bassValue = 0.5;
|
matthiasm@9
|
297 }
|
matthiasm@9
|
298 loadedChordDict.push_back(bassValue);
|
matthiasm@7
|
299 }
|
matthiasm@7
|
300 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes
|
matthiasm@7
|
301 loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]);
|
matthiasm@7
|
302 }
|
matthiasm@7
|
303 ostringstream os;
|
matthiasm@7
|
304 if (slashNotation.empty()) {
|
matthiasm@7
|
305 os << notenames[12+iSemitone] << chordType;
|
matthiasm@7
|
306 } else {
|
matthiasm@7
|
307 os << notenames[12+iSemitone] << chordType << "/" << slashNotation;
|
matthiasm@7
|
308 }
|
matthiasm@7
|
309
|
matthiasm@7
|
310 loadedChordNames.push_back(os.str());
|
matthiasm@7
|
311 }
|
matthiasm@7
|
312 }
|
matthiasm@7
|
313 }
|
matthiasm@7
|
314 // N type
|
matthiasm@7
|
315 loadedChordNames.push_back("N");
|
matthiasm@7
|
316 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5);
|
matthiasm@7
|
317 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0);
|
matthiasm@7
|
318
|
matthiasm@7
|
319 // normalise
|
matthiasm@7
|
320 float sum = 0;
|
matthiasm@7
|
321 for (int i = 0; i < loadedChordDict.size(); i++) {
|
matthiasm@7
|
322 sum += pow(loadedChordDict[i],2);
|
matthiasm@7
|
323 if (i % 24 == 23) {
|
matthiasm@7
|
324 float invertedsum = 1.0/sqrt(sum);
|
matthiasm@7
|
325 for (int k = 0; k < 24; k++) {
|
matthiasm@7
|
326 loadedChordDict[i-k] *= invertedsum;
|
matthiasm@7
|
327 }
|
matthiasm@7
|
328 sum = 0;
|
matthiasm@7
|
329 }
|
matthiasm@7
|
330
|
matthiasm@7
|
331 }
|
matthiasm@7
|
332
|
matthiasm@7
|
333
|
matthiasm@7
|
334 nChord = 0;
|
matthiasm@7
|
335 for (int i = 0; i < loadedChordNames.size(); i++) {
|
matthiasm@7
|
336 nChord++;
|
matthiasm@7
|
337 }
|
matthiasm@7
|
338 chordDictFile.close();
|
matthiasm@7
|
339
|
matthiasm@7
|
340
|
matthiasm@9
|
341 // mchorddict = new float[nChord*24];
|
matthiasm@7
|
342 for (int i = 0; i < nChord*24; i++) {
|
matthiasm@9
|
343 mchorddict->push_back(loadedChordDict[i]);
|
matthiasm@7
|
344 }
|
matthiasm@9
|
345
|
matthiasm@7
|
346 } else {// use default from chorddict.cpp
|
matthiasm@9
|
347 // mchorddict = new float[nChorddict];
|
matthiasm@7
|
348 for (int i = 0; i < nChorddict; i++) {
|
matthiasm@9
|
349 mchorddict->push_back(chorddict[i]);
|
matthiasm@7
|
350 }
|
matthiasm@7
|
351
|
matthiasm@7
|
352 nChord = nChorddict/24;
|
matthiasm@7
|
353 // mchordnames = new string[nChorddict/24];
|
matthiasm@7
|
354 char buffer1 [50];
|
matthiasm@7
|
355 for (int i = 0; i < nChorddict/24; i++) {
|
matthiasm@7
|
356 if (i < nChorddict/24 - 1) {
|
matthiasm@7
|
357 sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]);
|
matthiasm@7
|
358 } else {
|
matthiasm@7
|
359 sprintf(buffer1, "N");
|
matthiasm@7
|
360 }
|
matthiasm@7
|
361 ostringstream os;
|
matthiasm@7
|
362 os << buffer1;
|
matthiasm@9
|
363 loadedChordNames.push_back(os.str());
|
matthiasm@9
|
364
|
matthiasm@7
|
365 }
|
matthiasm@7
|
366
|
matthiasm@7
|
367 }
|
matthiasm@9
|
368 // cerr << "before leaving" << chordnames[1] << endl;
|
matthiasm@9
|
369 return loadedChordNames;
|
matthiasm@7
|
370 }
|
matthiasm@0
|
371
|
matthiasm@0
|
372 NNLSChroma::NNLSChroma(float inputSampleRate) :
|
matthiasm@0
|
373 Plugin(inputSampleRate),
|
matthiasm@0
|
374 m_fl(0),
|
matthiasm@0
|
375 m_blockSize(0),
|
matthiasm@0
|
376 m_stepSize(0),
|
matthiasm@0
|
377 m_lengthOfNoteIndex(0),
|
matthiasm@0
|
378 m_meanTuning0(0),
|
matthiasm@0
|
379 m_meanTuning1(0),
|
matthiasm@0
|
380 m_meanTuning2(0),
|
matthiasm@0
|
381 m_localTuning0(0),
|
matthiasm@0
|
382 m_localTuning1(0),
|
matthiasm@0
|
383 m_localTuning2(0),
|
matthiasm@4
|
384 m_paling(1.0),
|
matthiasm@3
|
385 m_preset(0.0),
|
matthiasm@0
|
386 m_localTuning(0),
|
matthiasm@0
|
387 m_kernelValue(0),
|
matthiasm@0
|
388 m_kernelFftIndex(0),
|
matthiasm@0
|
389 m_kernelNoteIndex(0),
|
matthiasm@1
|
390 m_dict(0),
|
matthiasm@0
|
391 m_tuneLocal(false),
|
matthiasm@7
|
392 m_dictID(0),
|
matthiasm@7
|
393 m_chorddict(0),
|
matthiasm@7
|
394 m_chordnames(0)
|
matthiasm@0
|
395 {
|
matthiasm@0
|
396 if (debug_on) cerr << "--> NNLSChroma" << endl;
|
matthiasm@7
|
397
|
matthiasm@7
|
398 // make the *note* dictionary matrix
|
matthiasm@3
|
399 m_dict = new float[nNote * 84];
|
matthiasm@3
|
400 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
|
matthiasm@1
|
401 dictionaryMatrix(m_dict);
|
matthiasm@7
|
402
|
matthiasm@7
|
403 // get the *chord* dictionary from file (if the file exists)
|
matthiasm@9
|
404 m_chordnames = chordDictionary(&m_chorddict);
|
matthiasm@0
|
405 }
|
matthiasm@0
|
406
|
matthiasm@0
|
407
|
matthiasm@0
|
408 NNLSChroma::~NNLSChroma()
|
matthiasm@0
|
409 {
|
matthiasm@0
|
410 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
|
matthiasm@1
|
411 delete [] m_dict;
|
matthiasm@9
|
412 // delete [] m_chorddict;
|
matthiasm@7
|
413 // delete m_chordnames;
|
matthiasm@0
|
414 }
|
matthiasm@0
|
415
|
matthiasm@0
|
416 string
|
matthiasm@0
|
417 NNLSChroma::getIdentifier() const
|
matthiasm@0
|
418 {
|
matthiasm@0
|
419 if (debug_on) cerr << "--> getIdentifier" << endl;
|
matthiasm@0
|
420 return "nnls_chroma";
|
matthiasm@0
|
421 }
|
matthiasm@0
|
422
|
matthiasm@0
|
423 string
|
matthiasm@0
|
424 NNLSChroma::getName() const
|
matthiasm@0
|
425 {
|
matthiasm@0
|
426 if (debug_on) cerr << "--> getName" << endl;
|
matthiasm@0
|
427 return "NNLS Chroma";
|
matthiasm@0
|
428 }
|
matthiasm@0
|
429
|
matthiasm@0
|
430 string
|
matthiasm@0
|
431 NNLSChroma::getDescription() const
|
matthiasm@0
|
432 {
|
matthiasm@0
|
433 // Return something helpful here!
|
matthiasm@0
|
434 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@4
|
435 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum (LAS) of the DFT: the LAS itself, a standard-tuned version thereof (the local and global tuning estimates can are also be output), an approximate transcription to semitone activation using non-linear least squares (NNLS). Furthermore chroma features and a simple chord estimate derived from this NNLS semitone transcription.";
|
matthiasm@0
|
436 }
|
matthiasm@0
|
437
|
matthiasm@0
|
438 string
|
matthiasm@0
|
439 NNLSChroma::getMaker() const
|
matthiasm@0
|
440 {
|
matthiasm@0
|
441 if (debug_on) cerr << "--> getMaker" << endl;
|
matthiasm@0
|
442 // Your name here
|
matthiasm@0
|
443 return "Matthias Mauch";
|
matthiasm@0
|
444 }
|
matthiasm@0
|
445
|
matthiasm@0
|
446 int
|
matthiasm@0
|
447 NNLSChroma::getPluginVersion() const
|
matthiasm@0
|
448 {
|
matthiasm@0
|
449 if (debug_on) cerr << "--> getPluginVersion" << endl;
|
matthiasm@0
|
450 // Increment this each time you release a version that behaves
|
matthiasm@0
|
451 // differently from the previous one
|
matthiasm@0
|
452 return 1;
|
matthiasm@0
|
453 }
|
matthiasm@0
|
454
|
matthiasm@0
|
455 string
|
matthiasm@0
|
456 NNLSChroma::getCopyright() const
|
matthiasm@0
|
457 {
|
matthiasm@0
|
458 if (debug_on) cerr << "--> getCopyright" << endl;
|
matthiasm@0
|
459 // This function is not ideally named. It does not necessarily
|
matthiasm@0
|
460 // need to say who made the plugin -- getMaker does that -- but it
|
matthiasm@0
|
461 // should indicate the terms under which it is distributed. For
|
matthiasm@0
|
462 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
matthiasm@0
|
463 return "Copyright (2010). All rights reserved.";
|
matthiasm@0
|
464 }
|
matthiasm@0
|
465
|
matthiasm@0
|
466 NNLSChroma::InputDomain
|
matthiasm@0
|
467 NNLSChroma::getInputDomain() const
|
matthiasm@0
|
468 {
|
matthiasm@0
|
469 if (debug_on) cerr << "--> getInputDomain" << endl;
|
matthiasm@0
|
470 return FrequencyDomain;
|
matthiasm@0
|
471 }
|
matthiasm@0
|
472
|
matthiasm@0
|
473 size_t
|
matthiasm@0
|
474 NNLSChroma::getPreferredBlockSize() const
|
matthiasm@0
|
475 {
|
matthiasm@0
|
476 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
|
matthiasm@0
|
477 return 16384; // 0 means "I can handle any block size"
|
matthiasm@0
|
478 }
|
matthiasm@0
|
479
|
matthiasm@0
|
480 size_t
|
matthiasm@0
|
481 NNLSChroma::getPreferredStepSize() const
|
matthiasm@0
|
482 {
|
matthiasm@0
|
483 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
|
matthiasm@0
|
484 return 2048; // 0 means "anything sensible"; in practice this
|
matthiasm@0
|
485 // means the same as the block size for TimeDomain
|
matthiasm@0
|
486 // plugins, or half of it for FrequencyDomain plugins
|
matthiasm@0
|
487 }
|
matthiasm@0
|
488
|
matthiasm@0
|
489 size_t
|
matthiasm@0
|
490 NNLSChroma::getMinChannelCount() const
|
matthiasm@0
|
491 {
|
matthiasm@0
|
492 if (debug_on) cerr << "--> getMinChannelCount" << endl;
|
matthiasm@0
|
493 return 1;
|
matthiasm@0
|
494 }
|
matthiasm@0
|
495
|
matthiasm@0
|
496 size_t
|
matthiasm@0
|
497 NNLSChroma::getMaxChannelCount() const
|
matthiasm@0
|
498 {
|
matthiasm@0
|
499 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
|
matthiasm@0
|
500 return 1;
|
matthiasm@0
|
501 }
|
matthiasm@0
|
502
|
matthiasm@0
|
503 NNLSChroma::ParameterList
|
matthiasm@0
|
504 NNLSChroma::getParameterDescriptors() const
|
matthiasm@0
|
505 {
|
matthiasm@0
|
506 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@0
|
507 ParameterList list;
|
matthiasm@0
|
508
|
matthiasm@3
|
509 ParameterDescriptor d3;
|
matthiasm@3
|
510 d3.identifier = "preset";
|
matthiasm@3
|
511 d3.name = "preset";
|
matthiasm@3
|
512 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@3
|
513 d3.unit = "";
|
matthiasm@3
|
514 d3.isQuantized = true;
|
matthiasm@3
|
515 d3.quantizeStep = 1;
|
matthiasm@3
|
516 d3.minValue = 0.0;
|
matthiasm@4
|
517 d3.maxValue = 3.0;
|
matthiasm@3
|
518 d3.defaultValue = 0.0;
|
matthiasm@3
|
519 d3.valueNames.push_back("polyphonic pop");
|
matthiasm@3
|
520 d3.valueNames.push_back("polyphonic pop (fast)");
|
matthiasm@3
|
521 d3.valueNames.push_back("solo keyboard");
|
matthiasm@3
|
522 d3.valueNames.push_back("manual");
|
matthiasm@3
|
523 list.push_back(d3);
|
matthiasm@4
|
524
|
matthiasm@4
|
525 // ParameterDescriptor d0;
|
matthiasm@4
|
526 // d0.identifier = "notedict";
|
matthiasm@4
|
527 // d0.name = "note dictionary";
|
matthiasm@4
|
528 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
|
matthiasm@4
|
529 // d0.unit = "";
|
matthiasm@4
|
530 // d0.minValue = 0;
|
matthiasm@4
|
531 // d0.maxValue = 1;
|
matthiasm@4
|
532 // d0.defaultValue = 0;
|
matthiasm@4
|
533 // d0.isQuantized = true;
|
matthiasm@4
|
534 // d0.valueNames.push_back("s = 0.6");
|
matthiasm@4
|
535 // d0.valueNames.push_back("no NNLS");
|
matthiasm@4
|
536 // d0.quantizeStep = 1.0;
|
matthiasm@4
|
537 // list.push_back(d0);
|
matthiasm@4
|
538
|
matthiasm@4
|
539 ParameterDescriptor d1;
|
matthiasm@4
|
540 d1.identifier = "tuningmode";
|
matthiasm@4
|
541 d1.name = "tuning mode";
|
matthiasm@4
|
542 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@4
|
543 d1.unit = "";
|
matthiasm@4
|
544 d1.minValue = 0;
|
matthiasm@4
|
545 d1.maxValue = 1;
|
matthiasm@4
|
546 d1.defaultValue = 0;
|
matthiasm@4
|
547 d1.isQuantized = true;
|
matthiasm@4
|
548 d1.valueNames.push_back("global tuning");
|
matthiasm@4
|
549 d1.valueNames.push_back("local tuning");
|
matthiasm@4
|
550 d1.quantizeStep = 1.0;
|
matthiasm@4
|
551 list.push_back(d1);
|
matthiasm@4
|
552
|
matthiasm@4
|
553 // ParameterDescriptor d2;
|
matthiasm@4
|
554 // d2.identifier = "paling";
|
matthiasm@4
|
555 // d2.name = "spectral paling";
|
matthiasm@4
|
556 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
|
matthiasm@4
|
557 // d2.unit = "";
|
matthiasm@4
|
558 // d2.isQuantized = true;
|
matthiasm@4
|
559 // // d2.quantizeStep = 0.1;
|
matthiasm@4
|
560 // d2.minValue = 0.0;
|
matthiasm@4
|
561 // d2.maxValue = 1.0;
|
matthiasm@4
|
562 // d2.defaultValue = 1.0;
|
matthiasm@4
|
563 // d2.isQuantized = false;
|
matthiasm@4
|
564 // list.push_back(d2);
|
matthiasm@4
|
565
|
matthiasm@0
|
566 return list;
|
matthiasm@0
|
567 }
|
matthiasm@0
|
568
|
matthiasm@0
|
569 float
|
matthiasm@0
|
570 NNLSChroma::getParameter(string identifier) const
|
matthiasm@0
|
571 {
|
matthiasm@3
|
572 if (debug_on) cerr << "--> getParameter" << endl;
|
matthiasm@0
|
573 if (identifier == "notedict") {
|
matthiasm@0
|
574 return m_dictID;
|
matthiasm@0
|
575 }
|
matthiasm@0
|
576
|
matthiasm@0
|
577 if (identifier == "paling") {
|
matthiasm@0
|
578 return m_paling;
|
matthiasm@0
|
579 }
|
matthiasm@0
|
580
|
matthiasm@0
|
581 if (identifier == "tuningmode") {
|
matthiasm@0
|
582 if (m_tuneLocal) {
|
matthiasm@0
|
583 return 1.0;
|
matthiasm@0
|
584 } else {
|
matthiasm@0
|
585 return 0.0;
|
matthiasm@0
|
586 }
|
matthiasm@0
|
587 }
|
matthiasm@3
|
588 if (identifier == "preset") {
|
matthiasm@3
|
589 return m_preset;
|
matthiasm@3
|
590 }
|
matthiasm@0
|
591 return 0;
|
matthiasm@0
|
592
|
matthiasm@0
|
593 }
|
matthiasm@0
|
594
|
matthiasm@0
|
595 void
|
matthiasm@0
|
596 NNLSChroma::setParameter(string identifier, float value)
|
matthiasm@0
|
597 {
|
matthiasm@3
|
598 if (debug_on) cerr << "--> setParameter" << endl;
|
matthiasm@0
|
599 if (identifier == "notedict") {
|
matthiasm@0
|
600 m_dictID = (int) value;
|
matthiasm@0
|
601 }
|
matthiasm@0
|
602
|
matthiasm@0
|
603 if (identifier == "paling") {
|
matthiasm@0
|
604 m_paling = value;
|
matthiasm@0
|
605 }
|
matthiasm@0
|
606
|
matthiasm@0
|
607 if (identifier == "tuningmode") {
|
matthiasm@0
|
608 m_tuneLocal = (value > 0) ? true : false;
|
matthiasm@0
|
609 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
|
matthiasm@0
|
610 }
|
matthiasm@3
|
611 if (identifier == "preset") {
|
matthiasm@3
|
612 m_preset = value;
|
matthiasm@3
|
613 if (m_preset == 0.0) {
|
matthiasm@3
|
614 m_tuneLocal = false;
|
matthiasm@3
|
615 m_paling = 1.0;
|
matthiasm@3
|
616 m_dictID = 0.0;
|
matthiasm@3
|
617 }
|
matthiasm@3
|
618 if (m_preset == 1.0) {
|
matthiasm@3
|
619 m_tuneLocal = false;
|
matthiasm@3
|
620 m_paling = 1.0;
|
matthiasm@3
|
621 m_dictID = 1.0;
|
matthiasm@3
|
622 }
|
matthiasm@3
|
623 if (m_preset == 2.0) {
|
matthiasm@3
|
624 m_tuneLocal = false;
|
matthiasm@3
|
625 m_paling = 0.7;
|
matthiasm@3
|
626 m_dictID = 0.0;
|
matthiasm@3
|
627 }
|
matthiasm@3
|
628 }
|
matthiasm@0
|
629 }
|
matthiasm@0
|
630
|
matthiasm@0
|
631 NNLSChroma::ProgramList
|
matthiasm@0
|
632 NNLSChroma::getPrograms() const
|
matthiasm@0
|
633 {
|
matthiasm@0
|
634 if (debug_on) cerr << "--> getPrograms" << endl;
|
matthiasm@0
|
635 ProgramList list;
|
matthiasm@0
|
636
|
matthiasm@0
|
637 // If you have no programs, return an empty list (or simply don't
|
matthiasm@0
|
638 // implement this function or getCurrentProgram/selectProgram)
|
matthiasm@0
|
639
|
matthiasm@0
|
640 return list;
|
matthiasm@0
|
641 }
|
matthiasm@0
|
642
|
matthiasm@0
|
643 string
|
matthiasm@0
|
644 NNLSChroma::getCurrentProgram() const
|
matthiasm@0
|
645 {
|
matthiasm@0
|
646 if (debug_on) cerr << "--> getCurrentProgram" << endl;
|
matthiasm@0
|
647 return ""; // no programs
|
matthiasm@0
|
648 }
|
matthiasm@0
|
649
|
matthiasm@0
|
650 void
|
matthiasm@0
|
651 NNLSChroma::selectProgram(string name)
|
matthiasm@0
|
652 {
|
matthiasm@0
|
653 if (debug_on) cerr << "--> selectProgram" << endl;
|
matthiasm@0
|
654 }
|
matthiasm@0
|
655
|
matthiasm@0
|
656
|
matthiasm@0
|
657 NNLSChroma::OutputList
|
matthiasm@0
|
658 NNLSChroma::getOutputDescriptors() const
|
matthiasm@0
|
659 {
|
matthiasm@0
|
660 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
661 OutputList list;
|
matthiasm@0
|
662
|
matthiasm@0
|
663 // Make chroma names for the binNames property
|
matthiasm@0
|
664 vector<string> chromanames;
|
matthiasm@0
|
665 vector<string> bothchromanames;
|
matthiasm@0
|
666 for (int iNote = 0; iNote < 24; iNote++) {
|
matthiasm@0
|
667 bothchromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
668 if (iNote < 12) {
|
matthiasm@0
|
669 chromanames.push_back(notenames[iNote]);
|
matthiasm@0
|
670 }
|
matthiasm@0
|
671 }
|
matthiasm@0
|
672
|
matthiasm@1
|
673 // int nNote = 84;
|
matthiasm@0
|
674
|
matthiasm@0
|
675 // See OutputDescriptor documentation for the possibilities here.
|
matthiasm@0
|
676 // Every plugin must have at least one output.
|
matthiasm@0
|
677
|
matthiasm@0
|
678 OutputDescriptor d0;
|
matthiasm@0
|
679 d0.identifier = "tuning";
|
matthiasm@0
|
680 d0.name = "Tuning";
|
matthiasm@0
|
681 d0.description = "The concert pitch.";
|
matthiasm@0
|
682 d0.unit = "Hz";
|
matthiasm@0
|
683 d0.hasFixedBinCount = true;
|
matthiasm@0
|
684 d0.binCount = 0;
|
matthiasm@0
|
685 d0.hasKnownExtents = true;
|
matthiasm@0
|
686 d0.minValue = 427.47;
|
matthiasm@0
|
687 d0.maxValue = 452.89;
|
matthiasm@0
|
688 d0.isQuantized = false;
|
matthiasm@0
|
689 d0.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
690 d0.hasDuration = false;
|
matthiasm@0
|
691 list.push_back(d0);
|
matthiasm@0
|
692
|
matthiasm@0
|
693 OutputDescriptor d1;
|
matthiasm@0
|
694 d1.identifier = "logfreqspec";
|
matthiasm@0
|
695 d1.name = "Log-Frequency Spectrum";
|
matthiasm@0
|
696 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
|
matthiasm@0
|
697 d1.unit = "";
|
matthiasm@0
|
698 d1.hasFixedBinCount = true;
|
matthiasm@0
|
699 d1.binCount = nNote;
|
matthiasm@0
|
700 d1.hasKnownExtents = false;
|
matthiasm@0
|
701 d1.isQuantized = false;
|
matthiasm@0
|
702 d1.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
703 d1.hasDuration = false;
|
matthiasm@0
|
704 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
705 list.push_back(d1);
|
matthiasm@0
|
706
|
matthiasm@0
|
707 OutputDescriptor d2;
|
matthiasm@0
|
708 d2.identifier = "tunedlogfreqspec";
|
matthiasm@0
|
709 d2.name = "Tuned Log-Frequency Spectrum";
|
matthiasm@0
|
710 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
|
matthiasm@0
|
711 d2.unit = "";
|
matthiasm@0
|
712 d2.hasFixedBinCount = true;
|
matthiasm@0
|
713 d2.binCount = 256;
|
matthiasm@0
|
714 d2.hasKnownExtents = false;
|
matthiasm@0
|
715 d2.isQuantized = false;
|
matthiasm@0
|
716 d2.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
717 d2.hasDuration = false;
|
matthiasm@0
|
718 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
719 list.push_back(d2);
|
matthiasm@0
|
720
|
matthiasm@0
|
721 OutputDescriptor d3;
|
matthiasm@0
|
722 d3.identifier = "semitonespectrum";
|
matthiasm@0
|
723 d3.name = "Semitone Spectrum";
|
matthiasm@0
|
724 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
|
matthiasm@0
|
725 d3.unit = "";
|
matthiasm@0
|
726 d3.hasFixedBinCount = true;
|
matthiasm@0
|
727 d3.binCount = 84;
|
matthiasm@0
|
728 d3.hasKnownExtents = false;
|
matthiasm@0
|
729 d3.isQuantized = false;
|
matthiasm@0
|
730 d3.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
731 d3.hasDuration = false;
|
matthiasm@0
|
732 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
733 list.push_back(d3);
|
matthiasm@0
|
734
|
matthiasm@0
|
735 OutputDescriptor d4;
|
matthiasm@0
|
736 d4.identifier = "chroma";
|
matthiasm@0
|
737 d4.name = "Chromagram";
|
matthiasm@0
|
738 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
|
matthiasm@0
|
739 d4.unit = "";
|
matthiasm@0
|
740 d4.hasFixedBinCount = true;
|
matthiasm@0
|
741 d4.binCount = 12;
|
matthiasm@0
|
742 d4.binNames = chromanames;
|
matthiasm@0
|
743 d4.hasKnownExtents = false;
|
matthiasm@0
|
744 d4.isQuantized = false;
|
matthiasm@0
|
745 d4.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
746 d4.hasDuration = false;
|
matthiasm@0
|
747 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
748 list.push_back(d4);
|
matthiasm@0
|
749
|
matthiasm@0
|
750 OutputDescriptor d5;
|
matthiasm@0
|
751 d5.identifier = "basschroma";
|
matthiasm@0
|
752 d5.name = "Bass Chromagram";
|
matthiasm@0
|
753 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
|
matthiasm@0
|
754 d5.unit = "";
|
matthiasm@0
|
755 d5.hasFixedBinCount = true;
|
matthiasm@0
|
756 d5.binCount = 12;
|
matthiasm@0
|
757 d5.binNames = chromanames;
|
matthiasm@0
|
758 d5.hasKnownExtents = false;
|
matthiasm@0
|
759 d5.isQuantized = false;
|
matthiasm@0
|
760 d5.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
761 d5.hasDuration = false;
|
matthiasm@0
|
762 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
763 list.push_back(d5);
|
matthiasm@0
|
764
|
matthiasm@0
|
765 OutputDescriptor d6;
|
matthiasm@0
|
766 d6.identifier = "bothchroma";
|
matthiasm@0
|
767 d6.name = "Chromagram and Bass Chromagram";
|
matthiasm@0
|
768 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
|
matthiasm@0
|
769 d6.unit = "";
|
matthiasm@0
|
770 d6.hasFixedBinCount = true;
|
matthiasm@0
|
771 d6.binCount = 24;
|
matthiasm@0
|
772 d6.binNames = bothchromanames;
|
matthiasm@0
|
773 d6.hasKnownExtents = false;
|
matthiasm@0
|
774 d6.isQuantized = false;
|
matthiasm@0
|
775 d6.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@0
|
776 d6.hasDuration = false;
|
matthiasm@0
|
777 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
778 list.push_back(d6);
|
matthiasm@0
|
779
|
matthiasm@0
|
780 OutputDescriptor d7;
|
matthiasm@0
|
781 d7.identifier = "simplechord";
|
matthiasm@0
|
782 d7.name = "Simple Chord Estimate";
|
matthiasm@0
|
783 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
|
matthiasm@0
|
784 d7.unit = "";
|
matthiasm@0
|
785 d7.hasFixedBinCount = true;
|
matthiasm@0
|
786 d7.binCount = 0;
|
matthiasm@0
|
787 d7.hasKnownExtents = false;
|
matthiasm@0
|
788 d7.isQuantized = false;
|
matthiasm@0
|
789 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
790 d7.hasDuration = false;
|
matthiasm@0
|
791 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
792 list.push_back(d7);
|
matthiasm@0
|
793
|
matthiasm@1
|
794 // OutputDescriptor d8;
|
matthiasm@1
|
795 // d8.identifier = "inconsistency";
|
matthiasm@1
|
796 // d8.name = "Harmonic inconsistency value";
|
matthiasm@1
|
797 // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high.";
|
matthiasm@1
|
798 // d8.unit = "";
|
matthiasm@1
|
799 // d8.hasFixedBinCount = true;
|
matthiasm@1
|
800 // d8.binCount = 1;
|
matthiasm@1
|
801 // d8.hasKnownExtents = false;
|
matthiasm@1
|
802 // d8.isQuantized = false;
|
matthiasm@1
|
803 // d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@1
|
804 // d8.hasDuration = false;
|
matthiasm@1
|
805 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
806 // list.push_back(d8);
|
matthiasm@1
|
807 //
|
matthiasm@1
|
808 // OutputDescriptor d9;
|
matthiasm@1
|
809 // d9.identifier = "inconsistencysegment";
|
matthiasm@1
|
810 // d9.name = "Harmonic inconsistency segmenter";
|
matthiasm@1
|
811 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
|
matthiasm@1
|
812 // d9.unit = "";
|
matthiasm@1
|
813 // d9.hasFixedBinCount = true;
|
matthiasm@1
|
814 // d9.binCount = 0;
|
matthiasm@1
|
815 // d9.hasKnownExtents = true;
|
matthiasm@1
|
816 // d9.minValue = 0.1;
|
matthiasm@1
|
817 // d9.maxValue = 0.9;
|
matthiasm@1
|
818 // d9.isQuantized = false;
|
matthiasm@1
|
819 // d9.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@1
|
820 // d9.hasDuration = false;
|
matthiasm@1
|
821 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
822 // list.push_back(d9);
|
matthiasm@1
|
823 //
|
matthiasm@1
|
824 OutputDescriptor d10;
|
matthiasm@1
|
825 d10.identifier = "localtuning";
|
matthiasm@1
|
826 d10.name = "Local tuning";
|
matthiasm@4
|
827 d10.description = "Tuning based on the history up to this timestamp.";
|
matthiasm@1
|
828 d10.unit = "Hz";
|
matthiasm@1
|
829 d10.hasFixedBinCount = true;
|
matthiasm@1
|
830 d10.binCount = 1;
|
matthiasm@1
|
831 d10.hasKnownExtents = true;
|
matthiasm@1
|
832 d10.minValue = 427.47;
|
matthiasm@1
|
833 d10.maxValue = 452.89;
|
matthiasm@1
|
834 d10.isQuantized = false;
|
matthiasm@3
|
835 d10.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@1
|
836 d10.hasDuration = false;
|
matthiasm@3
|
837 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@1
|
838 list.push_back(d10);
|
matthiasm@1
|
839
|
matthiasm@0
|
840 return list;
|
matthiasm@0
|
841 }
|
matthiasm@0
|
842
|
matthiasm@0
|
843
|
matthiasm@0
|
844 bool
|
matthiasm@0
|
845 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
846 {
|
matthiasm@1
|
847 if (debug_on) {
|
matthiasm@1
|
848 cerr << "--> initialise";
|
matthiasm@1
|
849 }
|
matthiasm@1
|
850
|
matthiasm@0
|
851 if (channels < getMinChannelCount() ||
|
matthiasm@0
|
852 channels > getMaxChannelCount()) return false;
|
matthiasm@0
|
853 m_blockSize = blockSize;
|
matthiasm@0
|
854 m_stepSize = stepSize;
|
matthiasm@0
|
855 frameCount = 0;
|
matthiasm@0
|
856 int tempn = 256 * m_blockSize/2;
|
matthiasm@4
|
857 // cerr << "length of tempkernel : " << tempn << endl;
|
matthiasm@1
|
858 float *tempkernel;
|
matthiasm@1
|
859
|
matthiasm@1
|
860 tempkernel = new float[tempn];
|
matthiasm@1
|
861
|
matthiasm@0
|
862 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
|
matthiasm@1
|
863 m_kernelValue.clear();
|
matthiasm@1
|
864 m_kernelFftIndex.clear();
|
matthiasm@1
|
865 m_kernelNoteIndex.clear();
|
matthiasm@1
|
866 int countNonzero = 0;
|
matthiasm@0
|
867 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
|
matthiasm@1
|
868 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
|
matthiasm@1
|
869 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
870 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
|
matthiasm@0
|
871 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
|
matthiasm@1
|
872 countNonzero++;
|
matthiasm@0
|
873 }
|
matthiasm@1
|
874 m_kernelFftIndex.push_back(iFFT);
|
matthiasm@1
|
875 m_kernelNoteIndex.push_back(iNote);
|
matthiasm@0
|
876 }
|
matthiasm@0
|
877 }
|
matthiasm@1
|
878 }
|
matthiasm@4
|
879 // cerr << "nonzero count : " << countNonzero << endl;
|
matthiasm@1
|
880 delete [] tempkernel;
|
matthiasm@3
|
881 ofstream myfile;
|
matthiasm@3
|
882 myfile.open ("matrix.txt");
|
matthiasm@3
|
883 // myfile << "Writing this to a file.\n";
|
matthiasm@3
|
884 for (int i = 0; i < nNote * 84; ++i) {
|
matthiasm@3
|
885 myfile << m_dict[i] << endl;
|
matthiasm@3
|
886 }
|
matthiasm@3
|
887 myfile.close();
|
matthiasm@0
|
888 return true;
|
matthiasm@0
|
889 }
|
matthiasm@0
|
890
|
matthiasm@0
|
891 void
|
matthiasm@0
|
892 NNLSChroma::reset()
|
matthiasm@0
|
893 {
|
matthiasm@4
|
894 if (debug_on) cerr << "--> reset";
|
matthiasm@4
|
895
|
matthiasm@0
|
896 // Clear buffers, reset stored values, etc
|
matthiasm@4
|
897 frameCount = 0;
|
matthiasm@4
|
898 m_dictID = 0;
|
matthiasm@4
|
899 m_fl.clear();
|
matthiasm@4
|
900 m_meanTuning0 = 0;
|
matthiasm@4
|
901 m_meanTuning1 = 0;
|
matthiasm@4
|
902 m_meanTuning2 = 0;
|
matthiasm@4
|
903 m_localTuning0 = 0;
|
matthiasm@4
|
904 m_localTuning1 = 0;
|
matthiasm@4
|
905 m_localTuning2 = 0;
|
matthiasm@4
|
906 m_localTuning.clear();
|
matthiasm@0
|
907 }
|
matthiasm@0
|
908
|
matthiasm@0
|
909 NNLSChroma::FeatureSet
|
matthiasm@0
|
910 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
911 {
|
matthiasm@4
|
912 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
913 frameCount++;
|
matthiasm@0
|
914 float *magnitude = new float[m_blockSize/2];
|
matthiasm@0
|
915
|
matthiasm@0
|
916 Feature f10; // local tuning
|
matthiasm@3
|
917 f10.hasTimestamp = true;
|
matthiasm@4
|
918 f10.timestamp = timestamp;
|
matthiasm@0
|
919 const float *fbuf = inputBuffers[0];
|
matthiasm@0
|
920
|
matthiasm@0
|
921 // make magnitude
|
matthiasm@0
|
922 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
|
matthiasm@0
|
923 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
|
matthiasm@0
|
924 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
|
matthiasm@0
|
925 }
|
matthiasm@4
|
926
|
matthiasm@0
|
927 // note magnitude mapping using pre-calculated matrix
|
matthiasm@0
|
928 float *nm = new float[nNote]; // note magnitude
|
matthiasm@0
|
929 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
930 nm[iNote] = 0; // initialise as 0
|
matthiasm@0
|
931 }
|
matthiasm@0
|
932 int binCount = 0;
|
matthiasm@0
|
933 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
|
matthiasm@0
|
934 // cerr << ".";
|
matthiasm@1
|
935 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
|
matthiasm@1
|
936 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
|
matthiasm@0
|
937 binCount++;
|
matthiasm@0
|
938 }
|
matthiasm@1
|
939 // cerr << nm[20];
|
matthiasm@1
|
940 // cerr << endl;
|
matthiasm@0
|
941
|
matthiasm@0
|
942
|
matthiasm@0
|
943 float one_over_N = 1.0/frameCount;
|
matthiasm@0
|
944 // update means of complex tuning variables
|
matthiasm@0
|
945 m_meanTuning0 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
946 m_meanTuning1 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
947 m_meanTuning2 *= float(frameCount-1)*one_over_N;
|
matthiasm@0
|
948
|
matthiasm@0
|
949 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
|
matthiasm@0
|
950 m_meanTuning0 += nm[iTone + 0]*one_over_N;
|
matthiasm@0
|
951 m_meanTuning1 += nm[iTone + 1]*one_over_N;
|
matthiasm@0
|
952 m_meanTuning2 += nm[iTone + 2]*one_over_N;
|
matthiasm@3
|
953 float ratioOld = 0.997;
|
matthiasm@3
|
954 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
|
matthiasm@3
|
955 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
|
matthiasm@3
|
956 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
|
matthiasm@0
|
957 }
|
matthiasm@0
|
958
|
matthiasm@0
|
959 // if (m_tuneLocal) {
|
matthiasm@0
|
960 // local tuning
|
matthiasm@0
|
961 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
|
matthiasm@0
|
962 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
|
matthiasm@0
|
963 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
|
matthiasm@0
|
964 m_localTuning.push_back(normalisedtuning);
|
matthiasm@0
|
965 float tuning440 = 440 * pow(2,normalisedtuning/12);
|
matthiasm@0
|
966 f10.values.push_back(tuning440);
|
matthiasm@3
|
967 // cerr << tuning440 << endl;
|
matthiasm@0
|
968 // }
|
matthiasm@0
|
969
|
matthiasm@0
|
970 Feature f1; // logfreqspec
|
matthiasm@0
|
971 f1.hasTimestamp = true;
|
matthiasm@0
|
972 f1.timestamp = timestamp;
|
matthiasm@0
|
973 for (size_t iNote = 0; iNote < nNote; iNote++) {
|
matthiasm@0
|
974 f1.values.push_back(nm[iNote]);
|
matthiasm@0
|
975 }
|
matthiasm@0
|
976
|
matthiasm@0
|
977 FeatureSet fs;
|
matthiasm@0
|
978 fs[1].push_back(f1);
|
matthiasm@3
|
979 fs[8].push_back(f10);
|
matthiasm@0
|
980
|
matthiasm@0
|
981 // deletes
|
matthiasm@0
|
982 delete[] magnitude;
|
matthiasm@0
|
983 delete[] nm;
|
matthiasm@0
|
984
|
matthiasm@0
|
985 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
|
matthiasm@7
|
986 char * pPath;
|
matthiasm@7
|
987 pPath = getenv ("VAMP_PATH");
|
matthiasm@7
|
988
|
matthiasm@7
|
989
|
matthiasm@0
|
990 return fs;
|
matthiasm@0
|
991 }
|
matthiasm@0
|
992
|
matthiasm@0
|
993 NNLSChroma::FeatureSet
|
matthiasm@0
|
994 NNLSChroma::getRemainingFeatures()
|
matthiasm@0
|
995 {
|
matthiasm@4
|
996 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
matthiasm@4
|
997 FeatureSet fsOut;
|
matthiasm@4
|
998 if (m_fl.size() == 0) return fsOut;
|
matthiasm@9
|
999 int nChord = m_chordnames.size();
|
matthiasm@0
|
1000 //
|
matthiasm@1
|
1001 /** Calculate Tuning
|
matthiasm@1
|
1002 calculate tuning from (using the angle of the complex number defined by the
|
matthiasm@1
|
1003 cumulative mean real and imag values)
|
matthiasm@1
|
1004 **/
|
matthiasm@1
|
1005 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
matthiasm@1
|
1006 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
matthiasm@1
|
1007 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
matthiasm@1
|
1008 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
matthiasm@1
|
1009 int intShift = floor(normalisedtuning * 3);
|
matthiasm@1
|
1010 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
1011
|
matthiasm@1
|
1012 char buffer0 [50];
|
matthiasm@1
|
1013
|
matthiasm@1
|
1014 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
1015
|
matthiasm@1
|
1016 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
|
matthiasm@1
|
1017
|
matthiasm@1
|
1018 // push tuning to FeatureSet fsOut
|
matthiasm@1
|
1019 Feature f0; // tuning
|
matthiasm@1
|
1020 f0.hasTimestamp = true;
|
matthiasm@1
|
1021 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
|
matthiasm@1
|
1022 f0.label = buffer0;
|
matthiasm@1
|
1023 fsOut[0].push_back(f0);
|
matthiasm@1
|
1024
|
matthiasm@1
|
1025 /** Tune Log-Frequency Spectrogram
|
matthiasm@1
|
1026 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
|
matthiasm@1
|
1027 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
|
matthiasm@1
|
1028 **/
|
matthiasm@1
|
1029
|
matthiasm@1
|
1030 float tempValue = 0;
|
matthiasm@1
|
1031 float dbThreshold = 0; // relative to the background spectrum
|
matthiasm@1
|
1032 float thresh = pow(10,dbThreshold/20);
|
matthiasm@1
|
1033 // cerr << "tune local ? " << m_tuneLocal << endl;
|
matthiasm@1
|
1034 int count = 0;
|
matthiasm@1
|
1035
|
matthiasm@1
|
1036 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
|
matthiasm@1
|
1037 Feature f1 = *i;
|
matthiasm@1
|
1038 Feature f2; // tuned log-frequency spectrum
|
matthiasm@1
|
1039 f2.hasTimestamp = true;
|
matthiasm@1
|
1040 f2.timestamp = f1.timestamp;
|
matthiasm@1
|
1041 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
1042
|
matthiasm@1
|
1043 if (m_tuneLocal) {
|
matthiasm@1
|
1044 intShift = floor(m_localTuning[count] * 3);
|
matthiasm@1
|
1045 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
1046 }
|
matthiasm@1
|
1047
|
matthiasm@1
|
1048 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
1049
|
matthiasm@4
|
1050 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
|
matthiasm@1
|
1051 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
|
matthiasm@1
|
1052 f2.values.push_back(tempValue);
|
matthiasm@1
|
1053 }
|
matthiasm@1
|
1054
|
matthiasm@1
|
1055 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
|
matthiasm@1
|
1056 vector<float> runningmean = SpecialConvolution(f2.values,hw);
|
matthiasm@1
|
1057 vector<float> runningstd;
|
matthiasm@1
|
1058 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
|
matthiasm@1
|
1059 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
|
matthiasm@1
|
1060 }
|
matthiasm@1
|
1061 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
matthiasm@1
|
1062 for (int i = 0; i < 256; i++) {
|
matthiasm@1
|
1063 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
matthiasm@1
|
1064 if (runningstd[i] > 0) {
|
matthiasm@1
|
1065 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
|
matthiasm@1
|
1066 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
1067 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
|
matthiasm@1
|
1068 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
|
matthiasm@1
|
1069 }
|
matthiasm@1
|
1070 if (f2.values[i] < 0) {
|
matthiasm@1
|
1071 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
matthiasm@1
|
1072 }
|
matthiasm@1
|
1073 }
|
matthiasm@1
|
1074 fsOut[2].push_back(f2);
|
matthiasm@1
|
1075 count++;
|
matthiasm@1
|
1076 }
|
matthiasm@1
|
1077
|
matthiasm@1
|
1078 /** Semitone spectrum and chromagrams
|
matthiasm@1
|
1079 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
matthiasm@1
|
1080 is inferred using a non-negative least squares algorithm.
|
matthiasm@1
|
1081 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
matthiasm@1
|
1082 bass and treble stacked onto each other).
|
matthiasm@1
|
1083 **/
|
matthiasm@1
|
1084 // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n);
|
matthiasm@1
|
1085
|
matthiasm@1
|
1086 vector<vector<float> > chordogram;
|
matthiasm@3
|
1087 vector<vector<int> > scoreChordogram;
|
matthiasm@1
|
1088 vector<float> oldchroma = vector<float>(12,0);
|
matthiasm@1
|
1089 vector<float> oldbasschroma = vector<float>(12,0);
|
matthiasm@1
|
1090 count = 0;
|
matthiasm@9
|
1091
|
matthiasm@1
|
1092 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
|
matthiasm@1
|
1093 Feature f2 = *it; // logfreq spectrum
|
matthiasm@1
|
1094 Feature f3; // semitone spectrum
|
matthiasm@1
|
1095 Feature f4; // treble chromagram
|
matthiasm@1
|
1096 Feature f5; // bass chromagram
|
matthiasm@1
|
1097 Feature f6; // treble and bass chromagram
|
matthiasm@1
|
1098
|
matthiasm@1
|
1099 f3.hasTimestamp = true;
|
matthiasm@1
|
1100 f3.timestamp = f2.timestamp;
|
matthiasm@1
|
1101
|
matthiasm@1
|
1102 f4.hasTimestamp = true;
|
matthiasm@1
|
1103 f4.timestamp = f2.timestamp;
|
matthiasm@1
|
1104
|
matthiasm@1
|
1105 f5.hasTimestamp = true;
|
matthiasm@1
|
1106 f5.timestamp = f2.timestamp;
|
matthiasm@1
|
1107
|
matthiasm@1
|
1108 f6.hasTimestamp = true;
|
matthiasm@1
|
1109 f6.timestamp = f2.timestamp;
|
matthiasm@1
|
1110
|
matthiasm@3
|
1111 float b[256];
|
matthiasm@1
|
1112
|
matthiasm@1
|
1113 bool some_b_greater_zero = false;
|
matthiasm@3
|
1114 float sumb = 0;
|
matthiasm@1
|
1115 for (int i = 0; i < 256; i++) {
|
matthiasm@3
|
1116 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
|
matthiasm@3
|
1117 b[i] = f2.values[i];
|
matthiasm@3
|
1118 sumb += b[i];
|
matthiasm@1
|
1119 if (b[i] > 0) {
|
matthiasm@1
|
1120 some_b_greater_zero = true;
|
matthiasm@1
|
1121 }
|
matthiasm@1
|
1122 }
|
matthiasm@1
|
1123
|
matthiasm@1
|
1124 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
1125
|
matthiasm@1
|
1126 vector<float> chroma = vector<float>(12, 0);
|
matthiasm@1
|
1127 vector<float> basschroma = vector<float>(12, 0);
|
matthiasm@1
|
1128 float currval;
|
matthiasm@1
|
1129 unsigned iSemitone = 0;
|
matthiasm@1
|
1130
|
matthiasm@1
|
1131 if (some_b_greater_zero) {
|
matthiasm@3
|
1132 if (m_dictID == 1) {
|
matthiasm@1
|
1133 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
matthiasm@1
|
1134 currval = 0;
|
matthiasm@3
|
1135 currval += b[iNote + 1 + -1] * 0.5;
|
matthiasm@3
|
1136 currval += b[iNote + 1 + 0] * 1.0;
|
matthiasm@3
|
1137 currval += b[iNote + 1 + 1] * 0.5;
|
matthiasm@1
|
1138 f3.values.push_back(currval);
|
matthiasm@1
|
1139 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
matthiasm@1
|
1140 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
matthiasm@1
|
1141 iSemitone++;
|
matthiasm@1
|
1142 }
|
matthiasm@1
|
1143
|
matthiasm@1
|
1144 } else {
|
matthiasm@3
|
1145 float x[84+1000];
|
matthiasm@3
|
1146 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
matthiasm@10
|
1147 vector<int> signifIndex;
|
matthiasm@10
|
1148 int index=0;
|
matthiasm@10
|
1149 sumb /= 84.0;
|
matthiasm@10
|
1150 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
matthiasm@10
|
1151 float currval = 0;
|
matthiasm@10
|
1152 currval += b[iNote + 1 + -1];
|
matthiasm@10
|
1153 currval += b[iNote + 1 + 0];
|
matthiasm@10
|
1154 currval += b[iNote + 1 + 1];
|
matthiasm@10
|
1155 if (currval > 0) signifIndex.push_back(index);
|
matthiasm@10
|
1156 f3.values.push_back(0); // fill the values, change later
|
matthiasm@10
|
1157 index++;
|
matthiasm@10
|
1158 }
|
matthiasm@3
|
1159 float rnorm;
|
matthiasm@3
|
1160 float w[84+1000];
|
matthiasm@3
|
1161 float zz[84+1000];
|
matthiasm@3
|
1162 int indx[84+1000];
|
matthiasm@1
|
1163 int mode;
|
matthiasm@10
|
1164 int dictsize = 256*signifIndex.size();
|
matthiasm@10
|
1165 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
matthiasm@10
|
1166 float *curr_dict = new float[dictsize];
|
matthiasm@10
|
1167 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
matthiasm@10
|
1168 for (unsigned iBin = 0; iBin < 256; iBin++) {
|
matthiasm@10
|
1169 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
|
matthiasm@10
|
1170 }
|
matthiasm@3
|
1171 }
|
matthiasm@10
|
1172 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
matthiasm@10
|
1173 delete [] curr_dict;
|
matthiasm@10
|
1174 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
matthiasm@10
|
1175 f3.values[signifIndex[iNote]] = x[iNote];
|
matthiasm@3
|
1176 // cerr << mode << endl;
|
matthiasm@10
|
1177 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
matthiasm@10
|
1178 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
matthiasm@3
|
1179 }
|
matthiasm@1
|
1180 }
|
matthiasm@1
|
1181 }
|
matthiasm@10
|
1182
|
matthiasm@1
|
1183 f4.values = chroma;
|
matthiasm@1
|
1184 f5.values = basschroma;
|
matthiasm@1
|
1185 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@1
|
1186 f6.values = chroma;
|
matthiasm@1
|
1187
|
matthiasm@1
|
1188 // local chord estimation
|
matthiasm@1
|
1189 vector<float> currentChordSalience;
|
matthiasm@1
|
1190 float tempchordvalue = 0;
|
matthiasm@1
|
1191 float sumchordvalue = 0;
|
matthiasm@9
|
1192
|
matthiasm@1
|
1193 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@1
|
1194 tempchordvalue = 0;
|
matthiasm@1
|
1195 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@9
|
1196 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
1197 }
|
matthiasm@1
|
1198 for (int iBin = 12; iBin < 24; iBin++) {
|
matthiasm@9
|
1199 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
matthiasm@1
|
1200 }
|
matthiasm@1
|
1201 sumchordvalue+=tempchordvalue;
|
matthiasm@1
|
1202 currentChordSalience.push_back(tempchordvalue);
|
matthiasm@1
|
1203 }
|
matthiasm@1
|
1204 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@1
|
1205 currentChordSalience[iChord] /= sumchordvalue;
|
matthiasm@1
|
1206 }
|
matthiasm@1
|
1207 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
1208
|
matthiasm@1
|
1209 fsOut[3].push_back(f3);
|
matthiasm@1
|
1210 fsOut[4].push_back(f4);
|
matthiasm@1
|
1211 fsOut[5].push_back(f5);
|
matthiasm@1
|
1212 fsOut[6].push_back(f6);
|
matthiasm@1
|
1213 count++;
|
matthiasm@1
|
1214 }
|
matthiasm@10
|
1215 cerr << "******* NNLS done *******" << endl;
|
matthiasm@10
|
1216
|
matthiasm@3
|
1217 /* Simple chord estimation
|
matthiasm@3
|
1218 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@3
|
1219 take the maximum. Very simple, don't do this at home...
|
matthiasm@3
|
1220 */
|
matthiasm@3
|
1221 count = 0;
|
matthiasm@3
|
1222 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@3
|
1223 vector<int> chordSequence;
|
matthiasm@3
|
1224 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
|
matthiasm@3
|
1225 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@3
|
1226 scoreChordogram.push_back(temp);
|
matthiasm@3
|
1227 }
|
matthiasm@4
|
1228 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
|
matthiasm@3
|
1229 int startIndex = count + 1;
|
matthiasm@3
|
1230 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@10
|
1231
|
matthiasm@10
|
1232 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@10
|
1233
|
matthiasm@10
|
1234 vector<int> chordCandidates;
|
matthiasm@10
|
1235 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
matthiasm@10
|
1236 // float currsum = 0;
|
matthiasm@10
|
1237 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@10
|
1238 // currsum += chordogram[iFrame][iChord];
|
matthiasm@10
|
1239 // }
|
matthiasm@10
|
1240 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
matthiasm@10
|
1241 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@10
|
1242 if (chordogram[iFrame][iChord] > chordThreshold) {
|
matthiasm@10
|
1243 chordCandidates.push_back(iChord);
|
matthiasm@10
|
1244 break;
|
matthiasm@10
|
1245 }
|
matthiasm@10
|
1246 }
|
matthiasm@10
|
1247 }
|
matthiasm@10
|
1248 chordCandidates.push_back(nChord-1);
|
matthiasm@10
|
1249 // cerr << chordCandidates.size() << endl;
|
matthiasm@10
|
1250
|
matthiasm@10
|
1251 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
matthiasm@4
|
1252 float maxindex = 0; //... and the index thereof
|
matthiasm@10
|
1253 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
matthiasm@10
|
1254 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
matthiasm@10
|
1255
|
matthiasm@4
|
1256 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@3
|
1257 // now find the max values on both sides of iWF
|
matthiasm@3
|
1258 // left side:
|
matthiasm@3
|
1259 float maxL = 0;
|
matthiasm@3
|
1260 unsigned maxindL = nChord-1;
|
matthiasm@10
|
1261 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
matthiasm@10
|
1262 unsigned iChord = chordCandidates[kChord];
|
matthiasm@3
|
1263 float currsum = 0;
|
matthiasm@3
|
1264 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@3
|
1265 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@3
|
1266 }
|
matthiasm@3
|
1267 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@3
|
1268 if (currsum > maxL) {
|
matthiasm@3
|
1269 maxL = currsum;
|
matthiasm@3
|
1270 maxindL = iChord;
|
matthiasm@3
|
1271 }
|
matthiasm@3
|
1272 }
|
matthiasm@3
|
1273 // right side:
|
matthiasm@3
|
1274 float maxR = 0;
|
matthiasm@3
|
1275 unsigned maxindR = nChord-1;
|
matthiasm@10
|
1276 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
matthiasm@10
|
1277 unsigned iChord = chordCandidates[kChord];
|
matthiasm@3
|
1278 float currsum = 0;
|
matthiasm@3
|
1279 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@3
|
1280 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@3
|
1281 }
|
matthiasm@3
|
1282 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@3
|
1283 if (currsum > maxR) {
|
matthiasm@3
|
1284 maxR = currsum;
|
matthiasm@3
|
1285 maxindR = iChord;
|
matthiasm@3
|
1286 }
|
matthiasm@3
|
1287 }
|
matthiasm@3
|
1288 if (maxL+maxR > maxval) {
|
matthiasm@3
|
1289 maxval = maxL+maxR;
|
matthiasm@3
|
1290 maxindex = iWF;
|
matthiasm@3
|
1291 bestchordL = maxindL;
|
matthiasm@3
|
1292 bestchordR = maxindR;
|
matthiasm@3
|
1293 }
|
matthiasm@3
|
1294
|
matthiasm@3
|
1295 }
|
matthiasm@3
|
1296 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@3
|
1297 // add a score to every chord-frame-point that was part of a maximum
|
matthiasm@3
|
1298 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@3
|
1299 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@3
|
1300 }
|
matthiasm@3
|
1301 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@3
|
1302 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@3
|
1303 }
|
matthiasm@3
|
1304 count++;
|
matthiasm@3
|
1305 }
|
matthiasm@10
|
1306 cerr << "******* agent finished *******" << endl;
|
matthiasm@3
|
1307 count = 0;
|
matthiasm@3
|
1308 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@3
|
1309 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@3
|
1310 float maxindex = 0; //... and the index thereof
|
matthiasm@3
|
1311 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@3
|
1312 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@3
|
1313 maxval = scoreChordogram[count][iChord];
|
matthiasm@3
|
1314 maxindex = iChord;
|
matthiasm@4
|
1315 // cerr << iChord << endl;
|
matthiasm@3
|
1316 }
|
matthiasm@3
|
1317 }
|
matthiasm@3
|
1318 chordSequence.push_back(maxindex);
|
matthiasm@4
|
1319 // cerr << "before modefilter, maxindex: " << maxindex << endl;
|
matthiasm@3
|
1320 count++;
|
matthiasm@3
|
1321 }
|
matthiasm@10
|
1322 cerr << "******* mode filter done *******" << endl;
|
matthiasm@10
|
1323
|
matthiasm@3
|
1324
|
matthiasm@3
|
1325 // mode filter on chordSequence
|
matthiasm@3
|
1326 count = 0;
|
matthiasm@3
|
1327 int oldChordIndex = -1;
|
matthiasm@3
|
1328 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
|
matthiasm@3
|
1329 Feature f6 = *it;
|
matthiasm@3
|
1330 Feature f7; // chord estimate
|
matthiasm@3
|
1331 f7.hasTimestamp = true;
|
matthiasm@3
|
1332 f7.timestamp = f6.timestamp;
|
matthiasm@3
|
1333 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@3
|
1334 int maxChordCount = 0;
|
matthiasm@3
|
1335 int maxChordIndex = nChord-1;
|
matthiasm@4
|
1336 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@4
|
1337 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@4
|
1338 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@4
|
1339 chordCount[chordSequence[i]]++;
|
matthiasm@4
|
1340 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@7
|
1341 // cerr << "start index " << startIndex << endl;
|
matthiasm@4
|
1342 maxChordCount++;
|
matthiasm@4
|
1343 maxChordIndex = chordSequence[i];
|
matthiasm@4
|
1344 }
|
matthiasm@4
|
1345 }
|
matthiasm@4
|
1346 // chordSequence[count] = maxChordIndex;
|
matthiasm@7
|
1347 // cerr << maxChordIndex << endl;
|
matthiasm@3
|
1348 if (oldChordIndex != maxChordIndex) {
|
matthiasm@3
|
1349 oldChordIndex = maxChordIndex;
|
matthiasm@3
|
1350
|
matthiasm@9
|
1351 // char buffer1 [50];
|
matthiasm@9
|
1352 // if (maxChordIndex < nChord - 1) {
|
matthiasm@9
|
1353 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
|
matthiasm@9
|
1354 // } else {
|
matthiasm@9
|
1355 // sprintf(buffer1, "N");
|
matthiasm@9
|
1356 // }
|
matthiasm@9
|
1357 // f7.label = buffer1;
|
matthiasm@9
|
1358 f7.label = m_chordnames[maxChordIndex];
|
matthiasm@3
|
1359 fsOut[7].push_back(f7);
|
matthiasm@3
|
1360 }
|
matthiasm@3
|
1361 count++;
|
matthiasm@3
|
1362 }
|
matthiasm@0
|
1363 // // musicity
|
matthiasm@0
|
1364 // count = 0;
|
matthiasm@0
|
1365 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
|
matthiasm@0
|
1366 // vector<float> musicityValue;
|
matthiasm@0
|
1367 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1368 // Feature f4 = *it;
|
matthiasm@0
|
1369 //
|
matthiasm@0
|
1370 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1371 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1372 // float chromasum = 0;
|
matthiasm@0
|
1373 // float diffsum = 0;
|
matthiasm@0
|
1374 // for (int k = 0; k < 12; k++) {
|
matthiasm@0
|
1375 // for (int i = startIndex + 1; i < endIndex; i++) {
|
matthiasm@0
|
1376 // chromasum += pow(fsOut[4][i].values[k],2);
|
matthiasm@0
|
1377 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
|
matthiasm@0
|
1378 // }
|
matthiasm@0
|
1379 // }
|
matthiasm@0
|
1380 // diffsum /= chromasum;
|
matthiasm@0
|
1381 // musicityValue.push_back(diffsum);
|
matthiasm@0
|
1382 // count++;
|
matthiasm@0
|
1383 // }
|
matthiasm@0
|
1384 //
|
matthiasm@0
|
1385 // float musicityThreshold = 0.44;
|
matthiasm@0
|
1386 // if (m_stepSize == 4096) {
|
matthiasm@0
|
1387 // musicityThreshold = 0.74;
|
matthiasm@0
|
1388 // }
|
matthiasm@0
|
1389 // if (m_stepSize == 4410) {
|
matthiasm@0
|
1390 // musicityThreshold = 0.77;
|
matthiasm@0
|
1391 // }
|
matthiasm@0
|
1392 //
|
matthiasm@0
|
1393 // count = 0;
|
matthiasm@0
|
1394 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
|
matthiasm@0
|
1395 // Feature f4 = *it;
|
matthiasm@0
|
1396 // Feature f8; // musicity
|
matthiasm@0
|
1397 // Feature f9; // musicity segmenter
|
matthiasm@0
|
1398 //
|
matthiasm@0
|
1399 // f8.hasTimestamp = true;
|
matthiasm@0
|
1400 // f8.timestamp = f4.timestamp;
|
matthiasm@0
|
1401 // f9.hasTimestamp = true;
|
matthiasm@0
|
1402 // f9.timestamp = f4.timestamp;
|
matthiasm@0
|
1403 //
|
matthiasm@0
|
1404 // int startIndex = max(count - musicitykernelwidth/2,0);
|
matthiasm@0
|
1405 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
|
matthiasm@0
|
1406 // int musicityCount = 0;
|
matthiasm@0
|
1407 // for (int i = startIndex; i <= endIndex; i++) {
|
matthiasm@0
|
1408 // if (musicityValue[i] > musicityThreshold) musicityCount++;
|
matthiasm@0
|
1409 // }
|
matthiasm@0
|
1410 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
|
matthiasm@0
|
1411 //
|
matthiasm@0
|
1412 // if (isSpeech) {
|
matthiasm@0
|
1413 // if (oldlabeltype != 2) {
|
matthiasm@0
|
1414 // f9.label = "Speech";
|
matthiasm@0
|
1415 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1416 // oldlabeltype = 2;
|
matthiasm@0
|
1417 // }
|
matthiasm@0
|
1418 // } else {
|
matthiasm@0
|
1419 // if (oldlabeltype != 1) {
|
matthiasm@0
|
1420 // f9.label = "Music";
|
matthiasm@0
|
1421 // fsOut[9].push_back(f9);
|
matthiasm@0
|
1422 // oldlabeltype = 1;
|
matthiasm@0
|
1423 // }
|
matthiasm@0
|
1424 // }
|
matthiasm@0
|
1425 // f8.values.push_back(musicityValue[count]);
|
matthiasm@0
|
1426 // fsOut[8].push_back(f8);
|
matthiasm@0
|
1427 // count++;
|
matthiasm@0
|
1428 // }
|
matthiasm@0
|
1429 return fsOut;
|
matthiasm@0
|
1430
|
matthiasm@0
|
1431 }
|
matthiasm@0
|
1432
|