Mercurial > hg > nnls-chroma
comparison NNLSChroma.cpp @ 23:93c836cfb8c5 matthiasm-plugin
* Consistent indentation (spaces only)
author | Chris Cannam |
---|---|
date | Thu, 21 Oct 2010 12:12:23 +0100 |
parents | 444c344681f3 |
children | 690bd9148467 |
comparison
equal
deleted
inserted
replaced
22:444c344681f3 | 23:93c836cfb8c5 |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
1 | 2 |
2 #include "NNLSChroma.h" | 3 #include "NNLSChroma.h" |
3 #include <cmath> | 4 #include <cmath> |
4 // #include <omp.h> | 5 // #include <omp.h> |
5 #include <list> | 6 #include <list> |
28 const float cosvalue = -0.5; | 29 const float cosvalue = -0.5; |
29 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082}; | 30 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082}; |
30 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000}; | 31 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000}; |
31 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350}; | 32 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350}; |
32 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)", | 33 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)", |
33 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"}; | 34 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"}; |
34 | 35 |
35 const char* bassnames[12][12] ={ | 36 const char* bassnames[12][12] ={ |
36 {"A","","B","C","C#","D","","E","","F#","G","G#"}, | 37 {"A","","B","C","C#","D","","E","","F#","G","G#"}, |
37 {"Bb","","C","Db","D","Eb","","F","","G","Ab","A"}, | 38 {"Bb","","C","Db","D","Eb","","F","","G","Ab","A"}, |
38 {"B","","C#","D","D#","E","","F#","","G#","A","A#"}, | 39 {"B","","C#","D","D#","E","","F#","","G#","A","A#"}, |
39 {"C","","D","Eb","E","F","","G","","A","Bb","B"}, | 40 {"C","","D","Eb","E","F","","G","","A","Bb","B"}, |
40 {"C#","","D#","E","E#","F#","","G#","","A#","B","B#"}, | 41 {"C#","","D#","E","E#","F#","","G#","","A#","B","B#"}, |
41 {"D","","E","F","F#","G","","A","","B","C","C#"}, | 42 {"D","","E","F","F#","G","","A","","B","C","C#"}, |
42 {"Eb","","F","Gb","G","Ab","","Bb","","C","Db","D"}, | 43 {"Eb","","F","Gb","G","Ab","","Bb","","C","Db","D"}, |
43 {"E","","F#","G","G#","A","","B","","C#","D","D#"}, | 44 {"E","","F#","G","G#","A","","B","","C#","D","D#"}, |
44 {"F","","G","Ab","A","Bb","","C","","D","Eb","E"}, | 45 {"F","","G","Ab","A","Bb","","C","","D","Eb","E"}, |
45 {"F#","","G#","A","A#","B","","C#","","D#","E","E#"}, | 46 {"F#","","G#","A","A#","B","","C#","","D#","E","E#"}, |
46 {"G","","A","Bb","B","C","","D","","E","F","F#"}, | 47 {"G","","A","Bb","B","C","","D","","E","F","F#"}, |
47 {"Ab","","Bb","Cb","C","Db","","Eb","","F","Gb","G"} | 48 {"Ab","","Bb","Cb","C","Db","","Eb","","F","Gb","G"} |
48 }; | 49 }; |
49 | 50 |
50 | 51 |
51 // const char* bassnames[12][12] ={ | 52 // const char* bassnames[12][12] ={ |
52 // {"1","","2","b3","3","4","","5","","6","b7","7"}, | 53 // {"1","","2","b3","3","4","","5","","6","b7","7"}, |
65 | 66 |
66 const vector<float> hw(hammingwind, hammingwind+19); | 67 const vector<float> hw(hammingwind, hammingwind+19); |
67 const int nNote = 256; | 68 const int nNote = 256; |
68 | 69 |
69 /** Special Convolution | 70 /** Special Convolution |
70 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the | 71 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the |
71 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values | 72 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values |
72 as the first (last) valid convolution bin. | 73 as the first (last) valid convolution bin. |
73 **/ | 74 **/ |
74 | 75 |
75 const bool debug_on = false; | 76 const bool debug_on = false; |
76 | 77 |
77 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel) | 78 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel) |
96 } | 97 } |
97 | 98 |
98 // fill upper and lower pads | 99 // fill upper and lower pads |
99 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2]; | 100 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2]; |
100 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] = | 101 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] = |
101 Z[lenConvolvee - lenKernel/2 - 1]; | 102 Z[lenConvolvee - lenKernel/2 - 1]; |
102 return Z; | 103 return Z; |
103 } | 104 } |
104 | 105 |
105 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize) | 106 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize) |
106 // { | 107 // { |
111 // return freq; | 112 // return freq; |
112 // } | 113 // } |
113 | 114 |
114 float cospuls(float x, float centre, float width) | 115 float cospuls(float x, float centre, float width) |
115 { | 116 { |
116 float recipwidth = 1.0/width; | 117 float recipwidth = 1.0/width; |
117 if (abs(x - centre) <= 0.5 * width) { | 118 if (abs(x - centre) <= 0.5 * width) { |
118 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5; | 119 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5; |
119 } | 120 } |
120 return 0.0; | 121 return 0.0; |
121 } | 122 } |
122 | 123 |
123 float pitchCospuls(float x, float centre, int binsperoctave) | 124 float pitchCospuls(float x, float centre, int binsperoctave) |
124 { | 125 { |
125 float warpedf = -binsperoctave * (log2(centre) - log2(x)); | 126 float warpedf = -binsperoctave * (log2(centre) - log2(x)); |
126 float out = cospuls(warpedf, 0.0, 2.0); | 127 float out = cospuls(warpedf, 0.0, 2.0); |
127 // now scale to correct for note density | 128 // now scale to correct for note density |
128 float c = log(2.0)/binsperoctave; | 129 float c = log(2.0)/binsperoctave; |
129 if (x > 0) { | 130 if (x > 0) { |
130 out = out / (c * x); | 131 out = out / (c * x); |
131 } else { | 132 } else { |
132 out = 0; | 133 out = 0; |
133 } | 134 } |
134 return out; | 135 return out; |
135 } | 136 } |
136 | 137 |
137 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) { | 138 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) { |
138 | 139 |
139 int binspersemitone = 3; // this must be 3 | 140 int binspersemitone = 3; // this must be 3 |
140 int minoctave = 0; // this must be 0 | 141 int minoctave = 0; // this must be 0 |
141 int maxoctave = 7; // this must be 7 | 142 int maxoctave = 7; // this must be 7 |
142 int oversampling = 80; | 143 int oversampling = 80; |
143 | 144 |
144 // linear frequency vector | 145 // linear frequency vector |
145 vector<float> fft_f; | 146 vector<float> fft_f; |
146 for (int i = 0; i < blocksize/2; ++i) { | 147 for (int i = 0; i < blocksize/2; ++i) { |
147 fft_f.push_back(i * (fs * 1.0 / blocksize)); | 148 fft_f.push_back(i * (fs * 1.0 / blocksize)); |
148 } | 149 } |
149 float fft_width = fs * 2.0 / blocksize; | 150 float fft_width = fs * 2.0 / blocksize; |
150 | 151 |
151 // linear oversampled frequency vector | 152 // linear oversampled frequency vector |
152 vector<float> oversampled_f; | 153 vector<float> oversampled_f; |
153 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) { | 154 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) { |
154 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling)); | 155 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling)); |
155 } | 156 } |
156 | 157 |
157 // pitch-spaced frequency vector | 158 // pitch-spaced frequency vector |
158 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! | 159 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! |
159 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! | 160 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! |
160 vector<float> cq_f; | 161 vector<float> cq_f; |
161 float oob = 1.0/binspersemitone; // one over binspersemitone | 162 float oob = 1.0/binspersemitone; // one over binspersemitone |
162 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 | 163 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 |
163 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); | 164 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); |
164 for (int i = minMIDI + 1; i < maxMIDI; ++i) { | 165 for (int i = minMIDI + 1; i < maxMIDI; ++i) { |
165 for (int k = -1; k < 2; ++k) { | 166 for (int k = -1; k < 2; ++k) { |
166 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); | 167 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); |
167 } | 168 } |
168 } | 169 } |
169 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); | 170 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); |
170 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); | 171 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); |
171 | 172 |
172 int nFFT = fft_f.size(); | 173 int nFFT = fft_f.size(); |
173 | 174 |
174 vector<float> fft_activation; | 175 vector<float> fft_activation; |
175 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) { | 176 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) { |
176 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width); | 177 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width); |
177 fft_activation.push_back(cosp); | 178 fft_activation.push_back(cosp); |
178 // cerr << cosp << endl; | 179 // cerr << cosp << endl; |
179 } | 180 } |
180 | 181 |
181 float cq_activation; | 182 float cq_activation; |
182 for (int iFFT = 1; iFFT < nFFT; ++iFFT) { | 183 for (int iFFT = 1; iFFT < nFFT; ++iFFT) { |
183 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency) | 184 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency) |
184 int curr_start = oversampling * iFFT - oversampling; | 185 int curr_start = oversampling * iFFT - oversampling; |
185 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here | 186 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here |
186 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl; | 187 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl; |
187 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) { | 188 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) { |
188 outmatrix[iFFT + nFFT * iCQ] = 0; | 189 outmatrix[iFFT + nFFT * iCQ] = 0; |
189 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood | 190 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood |
190 for (int iOS = curr_start; iOS < curr_end; ++iOS) { | 191 for (int iOS = curr_start; iOS < curr_end; ++iOS) { |
191 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12); | 192 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12); |
192 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl; | 193 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl; |
193 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start]; | 194 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start]; |
194 } | 195 } |
195 // if (iCQ == 1 || iCQ == 2) { | 196 // if (iCQ == 1 || iCQ == 2) { |
196 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl; | 197 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl; |
197 // } | 198 // } |
198 } | 199 } |
199 } | 200 } |
200 } | 201 } |
201 return true; | 202 return true; |
202 } | 203 } |
203 | 204 |
204 void dictionaryMatrix(float* dm) { | 205 void dictionaryMatrix(float* dm) { |
205 int binspersemitone = 3; // this must be 3 | 206 int binspersemitone = 3; // this must be 3 |
206 int minoctave = 0; // this must be 0 | 207 int minoctave = 0; // this must be 0 |
207 int maxoctave = 7; // this must be 7 | 208 int maxoctave = 7; // this must be 7 |
208 float s_param = 0.7; | 209 float s_param = 0.7; |
209 | 210 |
210 // pitch-spaced frequency vector | 211 // pitch-spaced frequency vector |
211 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! | 212 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone! |
212 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! | 213 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone! |
213 vector<float> cq_f; | 214 vector<float> cq_f; |
214 float oob = 1.0/binspersemitone; // one over binspersemitone | 215 float oob = 1.0/binspersemitone; // one over binspersemitone |
215 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 | 216 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12 |
216 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); | 217 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69))); |
217 for (int i = minMIDI + 1; i < maxMIDI; ++i) { | 218 for (int i = minMIDI + 1; i < maxMIDI; ++i) { |
218 for (int k = -1; k < 2; ++k) { | 219 for (int k = -1; k < 2; ++k) { |
219 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); | 220 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69))); |
220 } | 221 } |
221 } | 222 } |
222 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); | 223 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69))); |
223 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); | 224 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69))); |
224 | 225 |
225 float curr_f; | 226 float curr_f; |
226 float floatbin; | 227 float floatbin; |
227 float curr_amp; | 228 float curr_amp; |
228 // now for every combination calculate the matrix element | 229 // now for every combination calculate the matrix element |
229 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) { | 230 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) { |
230 // cerr << iOut << endl; | 231 // cerr << iOut << endl; |
231 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) { | 232 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) { |
232 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm; | 233 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm; |
233 // if (curr_f > cq_f[nNote-1]) break; | 234 // if (curr_f > cq_f[nNote-1]) break; |
234 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm); | 235 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm); |
235 // cerr << floatbin << endl; | 236 // cerr << floatbin << endl; |
236 curr_amp = pow(s_param,float(iHarm-1)); | 237 curr_amp = pow(s_param,float(iHarm-1)); |
237 // cerr << "curramp" << curr_amp << endl; | 238 // cerr << "curramp" << curr_amp << endl; |
238 for (unsigned iNote = 0; iNote < nNote; ++iNote) { | 239 for (unsigned iNote = 0; iNote < nNote; ++iNote) { |
239 if (abs(iNote+1.0-floatbin)<2) { | 240 if (abs(iNote+1.0-floatbin)<2) { |
240 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp; | 241 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp; |
241 // dm[iNote + nNote * iOut] += 1 * curr_amp; | 242 // dm[iNote + nNote * iOut] += 1 * curr_amp; |
242 } | 243 } |
243 } | 244 } |
244 } | 245 } |
245 } | 246 } |
246 | 247 |
247 | 248 |
248 } | 249 } |
249 | 250 |
250 string get_env_var( std::string const & key ) { | 251 string get_env_var( std::string const & key ) { |
251 char * val; | 252 char * val; |
252 val = getenv( key.c_str() ); | 253 val = getenv( key.c_str() ); |
253 string retval; | 254 string retval; |
254 if (val != NULL) { | 255 if (val != NULL) { |
255 retval = val; | 256 retval = val; |
256 } | 257 } |
257 return retval; | 258 return retval; |
258 } | 259 } |
259 | 260 |
260 | 261 |
261 vector<string> chordDictionary(vector<float> *mchorddict) { | 262 vector<string> chordDictionary(vector<float> *mchorddict) { |
262 // ifstream chordDictFile; | 263 // ifstream chordDictFile; |
263 string chordDictFilename(get_env_var("VAMP_PATH")+"/chord.dict"); | 264 string chordDictFilename(get_env_var("VAMP_PATH")+"/chord.dict"); |
264 // string instring[] = ",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\nm,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0\n6,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0\n7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0\nmaj7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1\nmin7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0\n,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\n,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\ndim,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0\naug,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0\n"; | 265 // string instring[] = ",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\nm,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0\n6,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0\n7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0\nmaj7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1\nmin7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0\n,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\n,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\ndim,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0\naug,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0\n"; |
265 typedef tokenizer<char_separator<char> > Tok; | 266 typedef tokenizer<char_separator<char> > Tok; |
266 // char_separator<char> sep; // default constructed | 267 // char_separator<char> sep; // default constructed |
267 char_separator<char> sep(",; ","="); | 268 char_separator<char> sep(",; ","="); |
268 iostreams::stream<iostreams::file_source> chordDictFile(chordDictFilename.c_str()); | 269 iostreams::stream<iostreams::file_source> chordDictFile(chordDictFilename.c_str()); |
269 string line; | 270 string line; |
270 int iElement = 0; | 271 int iElement = 0; |
271 int nChord = 0; | 272 int nChord = 0; |
272 | 273 |
273 vector<string> loadedChordNames; | 274 vector<string> loadedChordNames; |
274 vector<float> loadedChordDict; | 275 vector<float> loadedChordDict; |
275 if (chordDictFile.is_open()) { | 276 if (chordDictFile.is_open()) { |
276 while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file | 277 while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file |
277 // first, get the chord definition | 278 // first, get the chord definition |
278 string chordType; | 279 string chordType; |
279 vector<float> tempPCVector; | 280 vector<float> tempPCVector; |
280 // cerr << line << endl; | 281 // cerr << line << endl; |
281 if (!line.empty() && line.substr(0,1) != "#") { | 282 if (!line.empty() && line.substr(0,1) != "#") { |
282 Tok tok(line, sep); | 283 Tok tok(line, sep); |
283 for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements | 284 for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements |
284 string tempString = *tok_iter; | 285 string tempString = *tok_iter; |
285 // cerr << tempString << endl; | 286 // cerr << tempString << endl; |
286 if (tok_iter == tok.begin()) { // either the chord name or a colon | 287 if (tok_iter == tok.begin()) { // either the chord name or a colon |
287 if (tempString == "=") { | 288 if (tempString == "=") { |
288 chordType = ""; | 289 chordType = ""; |
289 } else { | 290 } else { |
290 chordType = tempString; | 291 chordType = tempString; |
291 tok_iter++; // is this cheating ? :) | 292 tok_iter++; // is this cheating ? :) |
292 } | 293 } |
293 } else { | 294 } else { |
294 tempPCVector.push_back(lexical_cast<float>(*tok_iter)); | 295 tempPCVector.push_back(lexical_cast<float>(*tok_iter)); |
295 } | 296 } |
296 } | 297 } |
297 | 298 |
298 // now make all 12 chords of every type | 299 // now make all 12 chords of every type |
299 for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) { | 300 for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) { |
300 // add bass slash notation | 301 // add bass slash notation |
301 string slashNotation = ""; | 302 string slashNotation = ""; |
302 for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) { | 303 for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) { |
303 if (tempPCVector[(kSemitone) % 12] > 0.99) { | 304 if (tempPCVector[(kSemitone) % 12] > 0.99) { |
304 slashNotation = bassnames[iSemitone][kSemitone]; | 305 slashNotation = bassnames[iSemitone][kSemitone]; |
305 } | 306 } |
306 } | 307 } |
307 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes | 308 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes |
308 // cerr << ((kSemitone - iSemitone + 12) % 12) << endl; | 309 // cerr << ((kSemitone - iSemitone + 12) % 12) << endl; |
309 float bassValue = 0; | 310 float bassValue = 0; |
310 if (tempPCVector[(kSemitone - iSemitone + 12) % 12]==1) { | 311 if (tempPCVector[(kSemitone - iSemitone + 12) % 12]==1) { |
311 bassValue = 1; | 312 bassValue = 1; |
312 } else { | 313 } else { |
313 if (tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12] == 1) bassValue = 0.5; | 314 if (tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12] == 1) bassValue = 0.5; |
314 } | 315 } |
315 loadedChordDict.push_back(bassValue); | 316 loadedChordDict.push_back(bassValue); |
316 } | 317 } |
317 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes | 318 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes |
318 loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]); | 319 loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]); |
319 } | 320 } |
320 ostringstream os; | 321 ostringstream os; |
321 if (slashNotation.empty()) { | 322 if (slashNotation.empty()) { |
322 os << notenames[12+iSemitone] << chordType; | 323 os << notenames[12+iSemitone] << chordType; |
323 } else { | 324 } else { |
324 os << notenames[12+iSemitone] << chordType << "/" << slashNotation; | 325 os << notenames[12+iSemitone] << chordType << "/" << slashNotation; |
325 } | 326 } |
326 // cerr << os.str() << endl; | 327 // cerr << os.str() << endl; |
327 loadedChordNames.push_back(os.str()); | 328 loadedChordNames.push_back(os.str()); |
328 } | 329 } |
329 } | 330 } |
330 } | 331 } |
331 // N type | 332 // N type |
332 loadedChordNames.push_back("N"); | 333 loadedChordNames.push_back("N"); |
333 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5); | 334 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5); |
334 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0); | 335 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0); |
335 | 336 |
336 // normalise | 337 // normalise |
337 float sum = 0; | 338 float sum = 0; |
338 for (int i = 0; i < loadedChordDict.size(); i++) { | 339 for (int i = 0; i < loadedChordDict.size(); i++) { |
339 sum += pow(loadedChordDict[i],2); | 340 sum += pow(loadedChordDict[i],2); |
340 if (i % 24 == 23) { | 341 if (i % 24 == 23) { |
341 float invertedsum = 1.0/sqrt(sum); | 342 float invertedsum = 1.0/sqrt(sum); |
342 for (int k = 0; k < 24; k++) { | 343 for (int k = 0; k < 24; k++) { |
343 loadedChordDict[i-k] *= invertedsum; | 344 loadedChordDict[i-k] *= invertedsum; |
344 } | 345 } |
345 sum = 0; | 346 sum = 0; |
346 } | 347 } |
347 | 348 |
348 } | 349 } |
349 | 350 |
350 | 351 |
351 nChord = 0; | 352 nChord = 0; |
352 for (int i = 0; i < loadedChordNames.size(); i++) { | 353 for (int i = 0; i < loadedChordNames.size(); i++) { |
353 nChord++; | 354 nChord++; |
354 } | 355 } |
355 chordDictFile.close(); | 356 chordDictFile.close(); |
356 | 357 |
357 | 358 |
358 // mchorddict = new float[nChord*24]; | 359 // mchorddict = new float[nChord*24]; |
359 for (int i = 0; i < nChord*24; i++) { | 360 for (int i = 0; i < nChord*24; i++) { |
360 mchorddict->push_back(loadedChordDict[i]); | 361 mchorddict->push_back(loadedChordDict[i]); |
361 } | 362 } |
362 | 363 |
363 } else {// use default from chorddict.cpp | 364 } else {// use default from chorddict.cpp |
364 // mchorddict = new float[nChorddict]; | 365 // mchorddict = new float[nChorddict]; |
365 for (int i = 0; i < nChorddict; i++) { | 366 for (int i = 0; i < nChorddict; i++) { |
366 mchorddict->push_back(chorddict[i]); | 367 mchorddict->push_back(chorddict[i]); |
367 } | 368 } |
368 | 369 |
369 nChord = nChorddict/24; | 370 nChord = nChorddict/24; |
370 // mchordnames = new string[nChorddict/24]; | 371 // mchordnames = new string[nChorddict/24]; |
371 char buffer1 [50]; | 372 char buffer1 [50]; |
372 for (int i = 0; i < nChorddict/24; i++) { | 373 for (int i = 0; i < nChorddict/24; i++) { |
373 if (i < nChorddict/24 - 1) { | 374 if (i < nChorddict/24 - 1) { |
374 sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]); | 375 sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]); |
375 } else { | 376 } else { |
376 sprintf(buffer1, "N"); | 377 sprintf(buffer1, "N"); |
377 } | 378 } |
378 ostringstream os; | 379 ostringstream os; |
379 os << buffer1; | 380 os << buffer1; |
380 loadedChordNames.push_back(os.str()); | 381 loadedChordNames.push_back(os.str()); |
381 | 382 |
382 } | 383 } |
383 | 384 |
384 } | 385 } |
385 // cerr << "before leaving" << chordnames[1] << endl; | 386 // cerr << "before leaving" << chordnames[1] << endl; |
386 return loadedChordNames; | 387 return loadedChordNames; |
387 } | 388 } |
388 | 389 |
389 NNLSChroma::NNLSChroma(float inputSampleRate) : | 390 NNLSChroma::NNLSChroma(float inputSampleRate) : |
390 Plugin(inputSampleRate), | 391 Plugin(inputSampleRate), |
391 m_fl(0), | 392 m_fl(0), |
392 m_blockSize(0), | 393 m_blockSize(0), |
393 m_stepSize(0), | 394 m_stepSize(0), |
394 m_lengthOfNoteIndex(0), | 395 m_lengthOfNoteIndex(0), |
395 m_meanTuning0(0), | 396 m_meanTuning0(0), |
396 m_meanTuning1(0), | 397 m_meanTuning1(0), |
397 m_meanTuning2(0), | 398 m_meanTuning2(0), |
398 m_localTuning0(0), | 399 m_localTuning0(0), |
399 m_localTuning1(0), | 400 m_localTuning1(0), |
400 m_localTuning2(0), | 401 m_localTuning2(0), |
401 m_paling(1.0), | 402 m_paling(1.0), |
402 m_preset(0.0), | 403 m_preset(0.0), |
403 m_localTuning(0), | 404 m_localTuning(0), |
404 m_kernelValue(0), | 405 m_kernelValue(0), |
405 m_kernelFftIndex(0), | 406 m_kernelFftIndex(0), |
406 m_kernelNoteIndex(0), | 407 m_kernelNoteIndex(0), |
407 m_dict(0), | 408 m_dict(0), |
408 m_tuneLocal(false), | 409 m_tuneLocal(false), |
409 m_dictID(0), | 410 m_dictID(0), |
410 m_chorddict(0), | 411 m_chorddict(0), |
411 m_chordnames(0), | 412 m_chordnames(0), |
412 m_doNormalizeChroma(0), | 413 m_doNormalizeChroma(0), |
413 m_rollon(0.01) | 414 m_rollon(0.01) |
414 { | 415 { |
415 if (debug_on) cerr << "--> NNLSChroma" << endl; | 416 if (debug_on) cerr << "--> NNLSChroma" << endl; |
416 | 417 |
417 // make the *note* dictionary matrix | 418 // make the *note* dictionary matrix |
418 m_dict = new float[nNote * 84]; | 419 m_dict = new float[nNote * 84]; |
419 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0; | 420 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0; |
420 dictionaryMatrix(m_dict); | 421 dictionaryMatrix(m_dict); |
421 | 422 |
422 // get the *chord* dictionary from file (if the file exists) | 423 // get the *chord* dictionary from file (if the file exists) |
423 m_chordnames = chordDictionary(&m_chorddict); | 424 m_chordnames = chordDictionary(&m_chorddict); |
424 } | 425 } |
425 | 426 |
426 | 427 |
427 NNLSChroma::~NNLSChroma() | 428 NNLSChroma::~NNLSChroma() |
428 { | 429 { |
429 if (debug_on) cerr << "--> ~NNLSChroma" << endl; | 430 if (debug_on) cerr << "--> ~NNLSChroma" << endl; |
430 delete [] m_dict; | 431 delete [] m_dict; |
431 // delete [] m_chorddict; | 432 // delete [] m_chorddict; |
432 // delete m_chordnames; | 433 // delete m_chordnames; |
433 } | 434 } |
434 | 435 |
435 string | 436 string |
436 NNLSChroma::getIdentifier() const | 437 NNLSChroma::getIdentifier() const |
437 { | 438 { |
438 if (debug_on) cerr << "--> getIdentifier" << endl; | 439 if (debug_on) cerr << "--> getIdentifier" << endl; |
439 return "nnls_chroma"; | 440 return "nnls_chroma"; |
440 } | 441 } |
441 | 442 |
442 string | 443 string |
443 NNLSChroma::getName() const | 444 NNLSChroma::getName() const |
444 { | 445 { |
445 if (debug_on) cerr << "--> getName" << endl; | 446 if (debug_on) cerr << "--> getName" << endl; |
446 return "NNLS Chroma"; | 447 return "NNLS Chroma"; |
447 } | 448 } |
448 | 449 |
449 string | 450 string |
450 NNLSChroma::getDescription() const | 451 NNLSChroma::getDescription() const |
451 { | 452 { |
452 // Return something helpful here! | 453 // Return something helpful here! |
453 if (debug_on) cerr << "--> getDescription" << endl; | 454 if (debug_on) cerr << "--> getDescription" << endl; |
454 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate."; | 455 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate."; |
455 } | 456 } |
456 | 457 |
457 string | 458 string |
458 NNLSChroma::getMaker() const | 459 NNLSChroma::getMaker() const |
459 { | 460 { |
460 if (debug_on) cerr << "--> getMaker" << endl; | 461 if (debug_on) cerr << "--> getMaker" << endl; |
461 // Your name here | 462 // Your name here |
462 return "Matthias Mauch"; | 463 return "Matthias Mauch"; |
463 } | 464 } |
464 | 465 |
465 int | 466 int |
466 NNLSChroma::getPluginVersion() const | 467 NNLSChroma::getPluginVersion() const |
467 { | 468 { |
468 if (debug_on) cerr << "--> getPluginVersion" << endl; | 469 if (debug_on) cerr << "--> getPluginVersion" << endl; |
469 // Increment this each time you release a version that behaves | 470 // Increment this each time you release a version that behaves |
470 // differently from the previous one | 471 // differently from the previous one |
471 return 1; | 472 return 1; |
472 } | 473 } |
473 | 474 |
474 string | 475 string |
475 NNLSChroma::getCopyright() const | 476 NNLSChroma::getCopyright() const |
476 { | 477 { |
477 if (debug_on) cerr << "--> getCopyright" << endl; | 478 if (debug_on) cerr << "--> getCopyright" << endl; |
478 // This function is not ideally named. It does not necessarily | 479 // This function is not ideally named. It does not necessarily |
479 // need to say who made the plugin -- getMaker does that -- but it | 480 // need to say who made the plugin -- getMaker does that -- but it |
480 // should indicate the terms under which it is distributed. For | 481 // should indicate the terms under which it is distributed. For |
481 // example, "Copyright (year). All Rights Reserved", or "GPL" | 482 // example, "Copyright (year). All Rights Reserved", or "GPL" |
482 return "Copyright (2010). All rights reserved."; | 483 return "Copyright (2010). All rights reserved."; |
483 } | 484 } |
484 | 485 |
485 NNLSChroma::InputDomain | 486 NNLSChroma::InputDomain |
486 NNLSChroma::getInputDomain() const | 487 NNLSChroma::getInputDomain() const |
487 { | 488 { |
488 if (debug_on) cerr << "--> getInputDomain" << endl; | 489 if (debug_on) cerr << "--> getInputDomain" << endl; |
489 return FrequencyDomain; | 490 return FrequencyDomain; |
490 } | 491 } |
491 | 492 |
492 size_t | 493 size_t |
493 NNLSChroma::getPreferredBlockSize() const | 494 NNLSChroma::getPreferredBlockSize() const |
494 { | 495 { |
495 if (debug_on) cerr << "--> getPreferredBlockSize" << endl; | 496 if (debug_on) cerr << "--> getPreferredBlockSize" << endl; |
496 return 16384; // 0 means "I can handle any block size" | 497 return 16384; // 0 means "I can handle any block size" |
497 } | 498 } |
498 | 499 |
499 size_t | 500 size_t |
500 NNLSChroma::getPreferredStepSize() const | 501 NNLSChroma::getPreferredStepSize() const |
501 { | 502 { |
502 if (debug_on) cerr << "--> getPreferredStepSize" << endl; | 503 if (debug_on) cerr << "--> getPreferredStepSize" << endl; |
503 return 2048; // 0 means "anything sensible"; in practice this | 504 return 2048; // 0 means "anything sensible"; in practice this |
504 // means the same as the block size for TimeDomain | 505 // means the same as the block size for TimeDomain |
505 // plugins, or half of it for FrequencyDomain plugins | 506 // plugins, or half of it for FrequencyDomain plugins |
506 } | 507 } |
507 | 508 |
508 size_t | 509 size_t |
509 NNLSChroma::getMinChannelCount() const | 510 NNLSChroma::getMinChannelCount() const |
510 { | 511 { |
511 if (debug_on) cerr << "--> getMinChannelCount" << endl; | 512 if (debug_on) cerr << "--> getMinChannelCount" << endl; |
512 return 1; | 513 return 1; |
513 } | 514 } |
514 | 515 |
515 size_t | 516 size_t |
516 NNLSChroma::getMaxChannelCount() const | 517 NNLSChroma::getMaxChannelCount() const |
517 { | 518 { |
518 if (debug_on) cerr << "--> getMaxChannelCount" << endl; | 519 if (debug_on) cerr << "--> getMaxChannelCount" << endl; |
519 return 1; | 520 return 1; |
520 } | 521 } |
521 | 522 |
522 NNLSChroma::ParameterList | 523 NNLSChroma::ParameterList |
523 NNLSChroma::getParameterDescriptors() const | 524 NNLSChroma::getParameterDescriptors() const |
524 { | 525 { |
525 if (debug_on) cerr << "--> getParameterDescriptors" << endl; | 526 if (debug_on) cerr << "--> getParameterDescriptors" << endl; |
526 ParameterList list; | 527 ParameterList list; |
527 | 528 |
528 ParameterDescriptor d3; | 529 ParameterDescriptor d3; |
529 d3.identifier = "preset"; | 530 d3.identifier = "preset"; |
530 d3.name = "preset"; | 531 d3.name = "preset"; |
531 d3.description = "Spectral paling: no paling - 0; whitening - 1."; | 532 d3.description = "Spectral paling: no paling - 0; whitening - 1."; |
532 d3.unit = ""; | 533 d3.unit = ""; |
533 d3.isQuantized = true; | 534 d3.isQuantized = true; |
534 d3.quantizeStep = 1; | 535 d3.quantizeStep = 1; |
535 d3.minValue = 0.0; | 536 d3.minValue = 0.0; |
536 d3.maxValue = 3.0; | 537 d3.maxValue = 3.0; |
537 d3.defaultValue = 0.0; | 538 d3.defaultValue = 0.0; |
538 d3.valueNames.push_back("polyphonic pop"); | 539 d3.valueNames.push_back("polyphonic pop"); |
539 d3.valueNames.push_back("polyphonic pop (fast)"); | 540 d3.valueNames.push_back("polyphonic pop (fast)"); |
540 d3.valueNames.push_back("solo keyboard"); | 541 d3.valueNames.push_back("solo keyboard"); |
541 d3.valueNames.push_back("manual"); | 542 d3.valueNames.push_back("manual"); |
542 list.push_back(d3); | 543 list.push_back(d3); |
543 | 544 |
544 ParameterDescriptor d5; | 545 ParameterDescriptor d5; |
545 d5.identifier = "rollon"; | 546 d5.identifier = "rollon"; |
546 d5.name = "spectral roll-on"; | 547 d5.name = "spectral roll-on"; |
547 d5.description = "The bins below the spectral roll-on quantile will be set to 0."; | 548 d5.description = "The bins below the spectral roll-on quantile will be set to 0."; |
548 d5.unit = ""; | 549 d5.unit = ""; |
549 d5.minValue = 0; | 550 d5.minValue = 0; |
550 d5.maxValue = 1; | 551 d5.maxValue = 1; |
551 d5.defaultValue = 0; | 552 d5.defaultValue = 0; |
552 d5.isQuantized = false; | 553 d5.isQuantized = false; |
553 list.push_back(d5); | 554 list.push_back(d5); |
554 | 555 |
555 // ParameterDescriptor d0; | 556 // ParameterDescriptor d0; |
556 // d0.identifier = "notedict"; | 557 // d0.identifier = "notedict"; |
557 // d0.name = "note dictionary"; | 558 // d0.name = "note dictionary"; |
558 // d0.description = "Notes in different note dictionaries differ by their spectral shapes."; | 559 // d0.description = "Notes in different note dictionaries differ by their spectral shapes."; |
578 d1.valueNames.push_back("global tuning"); | 579 d1.valueNames.push_back("global tuning"); |
579 d1.valueNames.push_back("local tuning"); | 580 d1.valueNames.push_back("local tuning"); |
580 d1.quantizeStep = 1.0; | 581 d1.quantizeStep = 1.0; |
581 list.push_back(d1); | 582 list.push_back(d1); |
582 | 583 |
583 // ParameterDescriptor d2; | 584 // ParameterDescriptor d2; |
584 // d2.identifier = "paling"; | 585 // d2.identifier = "paling"; |
585 // d2.name = "spectral paling"; | 586 // d2.name = "spectral paling"; |
586 // d2.description = "Spectral paling: no paling - 0; whitening - 1."; | 587 // d2.description = "Spectral paling: no paling - 0; whitening - 1."; |
587 // d2.unit = ""; | 588 // d2.unit = ""; |
588 // d2.isQuantized = true; | 589 // d2.isQuantized = true; |
589 // // d2.quantizeStep = 0.1; | 590 // // d2.quantizeStep = 0.1; |
590 // d2.minValue = 0.0; | 591 // d2.minValue = 0.0; |
591 // d2.maxValue = 1.0; | 592 // d2.maxValue = 1.0; |
592 // d2.defaultValue = 1.0; | 593 // d2.defaultValue = 1.0; |
593 // d2.isQuantized = false; | 594 // d2.isQuantized = false; |
594 // list.push_back(d2); | 595 // list.push_back(d2); |
595 ParameterDescriptor d4; | 596 ParameterDescriptor d4; |
596 d4.identifier = "chromanormalize"; | 597 d4.identifier = "chromanormalize"; |
597 d4.name = "chroma normalization"; | 598 d4.name = "chroma normalization"; |
598 d4.description = "How shall the chroma vector be normalized?"; | 599 d4.description = "How shall the chroma vector be normalized?"; |
599 d4.unit = ""; | 600 d4.unit = ""; |
600 d4.minValue = 0; | 601 d4.minValue = 0; |
601 d4.maxValue = 3; | 602 d4.maxValue = 3; |
602 d4.defaultValue = 0; | 603 d4.defaultValue = 0; |
603 d4.isQuantized = true; | 604 d4.isQuantized = true; |
604 d4.valueNames.push_back("none"); | 605 d4.valueNames.push_back("none"); |
605 d4.valueNames.push_back("maximum norm"); | 606 d4.valueNames.push_back("maximum norm"); |
606 d4.valueNames.push_back("L1 norm"); | 607 d4.valueNames.push_back("L1 norm"); |
607 d4.valueNames.push_back("L2 norm"); | 608 d4.valueNames.push_back("L2 norm"); |
608 d4.quantizeStep = 1.0; | 609 d4.quantizeStep = 1.0; |
609 list.push_back(d4); | 610 list.push_back(d4); |
610 | 611 |
611 return list; | 612 return list; |
612 } | 613 } |
613 | 614 |
614 float | 615 float |
615 NNLSChroma::getParameter(string identifier) const | 616 NNLSChroma::getParameter(string identifier) const |
616 { | 617 { |
617 if (debug_on) cerr << "--> getParameter" << endl; | 618 if (debug_on) cerr << "--> getParameter" << endl; |
618 if (identifier == "notedict") { | 619 if (identifier == "notedict") { |
619 return m_dictID; | 620 return m_dictID; |
620 } | 621 } |
621 | 622 |
622 if (identifier == "paling") { | 623 if (identifier == "paling") { |
623 return m_paling; | 624 return m_paling; |
624 } | 625 } |
625 | 626 |
626 if (identifier == "rollon") { | 627 if (identifier == "rollon") { |
627 return m_rollon; | 628 return m_rollon; |
628 } | 629 } |
629 | 630 |
630 if (identifier == "tuningmode") { | 631 if (identifier == "tuningmode") { |
631 if (m_tuneLocal) { | 632 if (m_tuneLocal) { |
632 return 1.0; | 633 return 1.0; |
633 } else { | 634 } else { |
634 return 0.0; | 635 return 0.0; |
635 } | 636 } |
636 } | 637 } |
637 if (identifier == "preset") { | 638 if (identifier == "preset") { |
638 return m_preset; | 639 return m_preset; |
639 } | 640 } |
640 if (identifier == "chromanormalize") { | 641 if (identifier == "chromanormalize") { |
641 return m_doNormalizeChroma; | 642 return m_doNormalizeChroma; |
642 } | 643 } |
643 return 0; | 644 return 0; |
644 | 645 |
645 } | 646 } |
646 | 647 |
647 void | 648 void |
648 NNLSChroma::setParameter(string identifier, float value) | 649 NNLSChroma::setParameter(string identifier, float value) |
649 { | 650 { |
650 if (debug_on) cerr << "--> setParameter" << endl; | 651 if (debug_on) cerr << "--> setParameter" << endl; |
651 if (identifier == "notedict") { | 652 if (identifier == "notedict") { |
652 m_dictID = (int) value; | 653 m_dictID = (int) value; |
653 } | 654 } |
654 | 655 |
655 if (identifier == "paling") { | 656 if (identifier == "paling") { |
660 m_tuneLocal = (value > 0) ? true : false; | 661 m_tuneLocal = (value > 0) ? true : false; |
661 // cerr << "m_tuneLocal :" << m_tuneLocal << endl; | 662 // cerr << "m_tuneLocal :" << m_tuneLocal << endl; |
662 } | 663 } |
663 if (identifier == "preset") { | 664 if (identifier == "preset") { |
664 m_preset = value; | 665 m_preset = value; |
665 if (m_preset == 0.0) { | 666 if (m_preset == 0.0) { |
666 m_tuneLocal = false; | 667 m_tuneLocal = false; |
667 m_paling = 1.0; | 668 m_paling = 1.0; |
668 m_dictID = 0.0; | 669 m_dictID = 0.0; |
669 } | 670 } |
670 if (m_preset == 1.0) { | 671 if (m_preset == 1.0) { |
671 m_tuneLocal = false; | 672 m_tuneLocal = false; |
672 m_paling = 1.0; | 673 m_paling = 1.0; |
673 m_dictID = 1.0; | 674 m_dictID = 1.0; |
674 } | 675 } |
675 if (m_preset == 2.0) { | 676 if (m_preset == 2.0) { |
676 m_tuneLocal = false; | 677 m_tuneLocal = false; |
677 m_paling = 0.7; | 678 m_paling = 0.7; |
678 m_dictID = 0.0; | 679 m_dictID = 0.0; |
679 } | 680 } |
680 } | 681 } |
681 if (identifier == "chromanormalize") { | 682 if (identifier == "chromanormalize") { |
682 m_doNormalizeChroma = value; | 683 m_doNormalizeChroma = value; |
683 } | 684 } |
684 | 685 |
685 if (identifier == "rollon") { | 686 if (identifier == "rollon") { |
686 m_rollon = value; | 687 m_rollon = value; |
687 } | 688 } |
688 } | 689 } |
689 | 690 |
690 NNLSChroma::ProgramList | 691 NNLSChroma::ProgramList |
691 NNLSChroma::getPrograms() const | 692 NNLSChroma::getPrograms() const |
692 { | 693 { |
693 if (debug_on) cerr << "--> getPrograms" << endl; | 694 if (debug_on) cerr << "--> getPrograms" << endl; |
694 ProgramList list; | 695 ProgramList list; |
695 | 696 |
696 // If you have no programs, return an empty list (or simply don't | 697 // If you have no programs, return an empty list (or simply don't |
697 // implement this function or getCurrentProgram/selectProgram) | 698 // implement this function or getCurrentProgram/selectProgram) |
698 | 699 |
700 } | 701 } |
701 | 702 |
702 string | 703 string |
703 NNLSChroma::getCurrentProgram() const | 704 NNLSChroma::getCurrentProgram() const |
704 { | 705 { |
705 if (debug_on) cerr << "--> getCurrentProgram" << endl; | 706 if (debug_on) cerr << "--> getCurrentProgram" << endl; |
706 return ""; // no programs | 707 return ""; // no programs |
707 } | 708 } |
708 | 709 |
709 void | 710 void |
710 NNLSChroma::selectProgram(string name) | 711 NNLSChroma::selectProgram(string name) |
711 { | 712 { |
712 if (debug_on) cerr << "--> selectProgram" << endl; | 713 if (debug_on) cerr << "--> selectProgram" << endl; |
713 } | 714 } |
714 | 715 |
715 | 716 |
716 NNLSChroma::OutputList | 717 NNLSChroma::OutputList |
717 NNLSChroma::getOutputDescriptors() const | 718 NNLSChroma::getOutputDescriptors() const |
718 { | 719 { |
719 if (debug_on) cerr << "--> getOutputDescriptors" << endl; | 720 if (debug_on) cerr << "--> getOutputDescriptors" << endl; |
720 OutputList list; | 721 OutputList list; |
721 | 722 |
722 // Make chroma names for the binNames property | 723 // Make chroma names for the binNames property |
723 vector<string> chromanames; | 724 vector<string> chromanames; |
724 vector<string> bothchromanames; | 725 vector<string> bothchromanames; |
727 if (iNote < 12) { | 728 if (iNote < 12) { |
728 chromanames.push_back(notenames[iNote]); | 729 chromanames.push_back(notenames[iNote]); |
729 } | 730 } |
730 } | 731 } |
731 | 732 |
732 // int nNote = 84; | 733 // int nNote = 84; |
733 | 734 |
734 // See OutputDescriptor documentation for the possibilities here. | 735 // See OutputDescriptor documentation for the possibilities here. |
735 // Every plugin must have at least one output. | 736 // Every plugin must have at least one output. |
736 | 737 |
737 OutputDescriptor d0; | 738 OutputDescriptor d0; |
740 d0.description = "The concert pitch."; | 741 d0.description = "The concert pitch."; |
741 d0.unit = "Hz"; | 742 d0.unit = "Hz"; |
742 d0.hasFixedBinCount = true; | 743 d0.hasFixedBinCount = true; |
743 d0.binCount = 0; | 744 d0.binCount = 0; |
744 d0.hasKnownExtents = true; | 745 d0.hasKnownExtents = true; |
745 d0.minValue = 427.47; | 746 d0.minValue = 427.47; |
746 d0.maxValue = 452.89; | 747 d0.maxValue = 452.89; |
747 d0.isQuantized = false; | 748 d0.isQuantized = false; |
748 d0.sampleType = OutputDescriptor::VariableSampleRate; | 749 d0.sampleType = OutputDescriptor::VariableSampleRate; |
749 d0.hasDuration = false; | 750 d0.hasDuration = false; |
750 list.push_back(d0); | 751 list.push_back(d0); |
751 | 752 |
752 OutputDescriptor d1; | 753 OutputDescriptor d1; |
753 d1.identifier = "logfreqspec"; | 754 d1.identifier = "logfreqspec"; |
754 d1.name = "Log-Frequency Spectrum"; | 755 d1.name = "Log-Frequency Spectrum"; |
755 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping."; | 756 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping."; |
756 d1.unit = ""; | 757 d1.unit = ""; |
757 d1.hasFixedBinCount = true; | 758 d1.hasFixedBinCount = true; |
761 d1.sampleType = OutputDescriptor::FixedSampleRate; | 762 d1.sampleType = OutputDescriptor::FixedSampleRate; |
762 d1.hasDuration = false; | 763 d1.hasDuration = false; |
763 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 764 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; |
764 list.push_back(d1); | 765 list.push_back(d1); |
765 | 766 |
766 OutputDescriptor d2; | 767 OutputDescriptor d2; |
767 d2.identifier = "tunedlogfreqspec"; | 768 d2.identifier = "tunedlogfreqspec"; |
768 d2.name = "Tuned Log-Frequency Spectrum"; | 769 d2.name = "Tuned Log-Frequency Spectrum"; |
769 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency."; | 770 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency."; |
770 d2.unit = ""; | 771 d2.unit = ""; |
771 d2.hasFixedBinCount = true; | 772 d2.hasFixedBinCount = true; |
848 d7.sampleType = OutputDescriptor::VariableSampleRate; | 849 d7.sampleType = OutputDescriptor::VariableSampleRate; |
849 d7.hasDuration = false; | 850 d7.hasDuration = false; |
850 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 851 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; |
851 list.push_back(d7); | 852 list.push_back(d7); |
852 | 853 |
853 // | 854 // |
854 // OutputDescriptor d9; | 855 // OutputDescriptor d9; |
855 // d9.identifier = "inconsistencysegment"; | 856 // d9.identifier = "inconsistencysegment"; |
856 // d9.name = "Harmonic inconsistency segmenter"; | 857 // d9.name = "Harmonic inconsistency segmenter"; |
857 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music."; | 858 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music."; |
858 // d9.unit = ""; | 859 // d9.unit = ""; |
859 // d9.hasFixedBinCount = true; | 860 // d9.hasFixedBinCount = true; |
860 // d9.binCount = 0; | 861 // d9.binCount = 0; |
861 // d9.hasKnownExtents = true; | 862 // d9.hasKnownExtents = true; |
862 // d9.minValue = 0.1; | 863 // d9.minValue = 0.1; |
863 // d9.maxValue = 0.9; | 864 // d9.maxValue = 0.9; |
864 // d9.isQuantized = false; | 865 // d9.isQuantized = false; |
865 // d9.sampleType = OutputDescriptor::VariableSampleRate; | 866 // d9.sampleType = OutputDescriptor::VariableSampleRate; |
866 // d9.hasDuration = false; | 867 // d9.hasDuration = false; |
867 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 868 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; |
868 // list.push_back(d9); | 869 // list.push_back(d9); |
869 // | 870 // |
870 OutputDescriptor d10; | 871 OutputDescriptor d10; |
871 d10.identifier = "localtuning"; | 872 d10.identifier = "localtuning"; |
872 d10.name = "Local tuning"; | 873 d10.name = "Local tuning"; |
873 d10.description = "Tuning based on the history up to this timestamp."; | 874 d10.description = "Tuning based on the history up to this timestamp."; |
874 d10.unit = "Hz"; | 875 d10.unit = "Hz"; |
875 d10.hasFixedBinCount = true; | 876 d10.hasFixedBinCount = true; |
876 d10.binCount = 1; | 877 d10.binCount = 1; |
877 d10.hasKnownExtents = true; | 878 d10.hasKnownExtents = true; |
878 d10.minValue = 427.47; | 879 d10.minValue = 427.47; |
879 d10.maxValue = 452.89; | 880 d10.maxValue = 452.89; |
880 d10.isQuantized = false; | 881 d10.isQuantized = false; |
881 d10.sampleType = OutputDescriptor::FixedSampleRate; | 882 d10.sampleType = OutputDescriptor::FixedSampleRate; |
882 d10.hasDuration = false; | 883 d10.hasDuration = false; |
883 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 884 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; |
884 list.push_back(d10); | 885 list.push_back(d10); |
885 | 886 |
886 OutputDescriptor d8; | 887 OutputDescriptor d8; |
887 d8.identifier = "harmonicchange"; | 888 d8.identifier = "harmonicchange"; |
888 d8.name = "Harmonic change value"; | 889 d8.name = "Harmonic change value"; |
889 d8.description = "Harmonic change."; | 890 d8.description = "Harmonic change."; |
890 d8.unit = ""; | 891 d8.unit = ""; |
891 d8.hasFixedBinCount = true; | 892 d8.hasFixedBinCount = true; |
892 d8.binCount = 1; | 893 d8.binCount = 1; |
893 d8.hasKnownExtents = true; | 894 d8.hasKnownExtents = true; |
894 d8.minValue = 0.0; | 895 d8.minValue = 0.0; |
895 d8.maxValue = 0.999; | 896 d8.maxValue = 0.999; |
896 d8.isQuantized = false; | 897 d8.isQuantized = false; |
897 d8.sampleType = OutputDescriptor::FixedSampleRate; | 898 d8.sampleType = OutputDescriptor::FixedSampleRate; |
898 d8.hasDuration = false; | 899 d8.hasDuration = false; |
899 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; | 900 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize; |
900 list.push_back(d8); | 901 list.push_back(d8); |
904 | 905 |
905 | 906 |
906 bool | 907 bool |
907 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize) | 908 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize) |
908 { | 909 { |
909 if (debug_on) { | 910 if (debug_on) { |
910 cerr << "--> initialise"; | 911 cerr << "--> initialise"; |
911 } | 912 } |
912 | 913 |
913 if (channels < getMinChannelCount() || | 914 if (channels < getMinChannelCount() || |
914 channels > getMaxChannelCount()) return false; | 915 channels > getMaxChannelCount()) return false; |
915 m_blockSize = blockSize; | 916 m_blockSize = blockSize; |
916 m_stepSize = stepSize; | 917 m_stepSize = stepSize; |
917 frameCount = 0; | 918 frameCount = 0; |
918 int tempn = 256 * m_blockSize/2; | 919 int tempn = 256 * m_blockSize/2; |
919 // cerr << "length of tempkernel : " << tempn << endl; | 920 // cerr << "length of tempkernel : " << tempn << endl; |
920 float *tempkernel; | 921 float *tempkernel; |
921 | 922 |
922 tempkernel = new float[tempn]; | 923 tempkernel = new float[tempn]; |
923 | 924 |
924 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel); | 925 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel); |
925 m_kernelValue.clear(); | 926 m_kernelValue.clear(); |
926 m_kernelFftIndex.clear(); | 927 m_kernelFftIndex.clear(); |
927 m_kernelNoteIndex.clear(); | 928 m_kernelNoteIndex.clear(); |
928 int countNonzero = 0; | 929 int countNonzero = 0; |
929 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix | 930 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix |
930 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) { | 931 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) { |
931 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { | 932 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { |
932 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]); | 933 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]); |
933 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { | 934 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { |
934 countNonzero++; | 935 countNonzero++; |
935 } | 936 } |
936 m_kernelFftIndex.push_back(iFFT); | 937 m_kernelFftIndex.push_back(iFFT); |
937 m_kernelNoteIndex.push_back(iNote); | 938 m_kernelNoteIndex.push_back(iNote); |
938 } | 939 } |
939 } | 940 } |
940 } | 941 } |
941 // cerr << "nonzero count : " << countNonzero << endl; | 942 // cerr << "nonzero count : " << countNonzero << endl; |
942 delete [] tempkernel; | 943 delete [] tempkernel; |
943 ofstream myfile; | 944 ofstream myfile; |
944 myfile.open ("matrix.txt"); | 945 myfile.open ("matrix.txt"); |
945 // myfile << "Writing this to a file.\n"; | 946 // myfile << "Writing this to a file.\n"; |
946 for (int i = 0; i < nNote * 84; ++i) { | 947 for (int i = 0; i < nNote * 84; ++i) { |
947 myfile << m_dict[i] << endl; | 948 myfile << m_dict[i] << endl; |
948 } | 949 } |
949 myfile.close(); | 950 myfile.close(); |
950 return true; | 951 return true; |
951 } | 952 } |
952 | 953 |
953 void | 954 void |
954 NNLSChroma::reset() | 955 NNLSChroma::reset() |
955 { | 956 { |
956 if (debug_on) cerr << "--> reset"; | 957 if (debug_on) cerr << "--> reset"; |
957 | 958 |
958 // Clear buffers, reset stored values, etc | 959 // Clear buffers, reset stored values, etc |
959 frameCount = 0; | 960 frameCount = 0; |
960 m_dictID = 0; | 961 m_dictID = 0; |
961 m_fl.clear(); | 962 m_fl.clear(); |
962 m_meanTuning0 = 0; | 963 m_meanTuning0 = 0; |
963 m_meanTuning1 = 0; | 964 m_meanTuning1 = 0; |
964 m_meanTuning2 = 0; | 965 m_meanTuning2 = 0; |
965 m_localTuning0 = 0; | 966 m_localTuning0 = 0; |
966 m_localTuning1 = 0; | 967 m_localTuning1 = 0; |
967 m_localTuning2 = 0; | 968 m_localTuning2 = 0; |
968 m_localTuning.clear(); | 969 m_localTuning.clear(); |
969 } | 970 } |
970 | 971 |
971 NNLSChroma::FeatureSet | 972 NNLSChroma::FeatureSet |
972 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp) | 973 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp) |
973 { | 974 { |
974 if (debug_on) cerr << "--> process" << endl; | 975 if (debug_on) cerr << "--> process" << endl; |
975 frameCount++; | 976 frameCount++; |
976 float *magnitude = new float[m_blockSize/2]; | 977 float *magnitude = new float[m_blockSize/2]; |
977 | 978 |
978 Feature f10; // local tuning | 979 Feature f10; // local tuning |
979 f10.hasTimestamp = true; | 980 f10.hasTimestamp = true; |
980 f10.timestamp = timestamp; | 981 f10.timestamp = timestamp; |
981 const float *fbuf = inputBuffers[0]; | 982 const float *fbuf = inputBuffers[0]; |
982 float energysum = 0; | 983 float energysum = 0; |
983 // make magnitude | 984 // make magnitude |
984 float maxmag = -10000; | 985 float maxmag = -10000; |
985 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { | 986 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { |
986 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] + | 987 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] + |
987 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]); | 988 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]); |
988 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin]; | 989 if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin]; |
989 if (m_rollon > 0) { | 990 if (m_rollon > 0) { |
990 energysum += pow(magnitude[iBin],2); | 991 energysum += pow(magnitude[iBin],2); |
991 } | 992 } |
992 } | 993 } |
993 | 994 |
994 float cumenergy = 0; | 995 float cumenergy = 0; |
995 if (m_rollon > 0) { | 996 if (m_rollon > 0) { |
996 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) { | 997 for (size_t iBin = 2; iBin < m_blockSize/2; iBin++) { |
997 cumenergy += pow(magnitude[iBin],2); | 998 cumenergy += pow(magnitude[iBin],2); |
998 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0; | 999 if (cumenergy < energysum * m_rollon) magnitude[iBin-2] = 0; |
999 else break; | 1000 else break; |
1000 } | 1001 } |
1001 } | 1002 } |
1002 | 1003 |
1003 if (maxmag < 2) { | 1004 if (maxmag < 2) { |
1004 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl; | 1005 // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl; |
1005 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { | 1006 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) { |
1006 magnitude[iBin] = 0; | 1007 magnitude[iBin] = 0; |
1007 } | 1008 } |
1008 } | 1009 } |
1009 | 1010 |
1010 // note magnitude mapping using pre-calculated matrix | 1011 // note magnitude mapping using pre-calculated matrix |
1011 float *nm = new float[nNote]; // note magnitude | 1012 float *nm = new float[nNote]; // note magnitude |
1012 for (size_t iNote = 0; iNote < nNote; iNote++) { | 1013 for (size_t iNote = 0; iNote < nNote; iNote++) { |
1013 nm[iNote] = 0; // initialise as 0 | 1014 nm[iNote] = 0; // initialise as 0 |
1014 } | 1015 } |
1015 int binCount = 0; | 1016 int binCount = 0; |
1016 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) { | 1017 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) { |
1017 // cerr << "."; | 1018 // cerr << "."; |
1018 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount]; | 1019 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount]; |
1019 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl; | 1020 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl; |
1020 binCount++; | 1021 binCount++; |
1021 } | 1022 } |
1022 // cerr << nm[20]; | 1023 // cerr << nm[20]; |
1023 // cerr << endl; | 1024 // cerr << endl; |
1024 | 1025 |
1025 | 1026 |
1026 float one_over_N = 1.0/frameCount; | 1027 float one_over_N = 1.0/frameCount; |
1027 // update means of complex tuning variables | 1028 // update means of complex tuning variables |
1028 m_meanTuning0 *= float(frameCount-1)*one_over_N; | 1029 m_meanTuning0 *= float(frameCount-1)*one_over_N; |
1031 | 1032 |
1032 for (int iTone = 0; iTone < 160; iTone = iTone + 3) { | 1033 for (int iTone = 0; iTone < 160; iTone = iTone + 3) { |
1033 m_meanTuning0 += nm[iTone + 0]*one_over_N; | 1034 m_meanTuning0 += nm[iTone + 0]*one_over_N; |
1034 m_meanTuning1 += nm[iTone + 1]*one_over_N; | 1035 m_meanTuning1 += nm[iTone + 1]*one_over_N; |
1035 m_meanTuning2 += nm[iTone + 2]*one_over_N; | 1036 m_meanTuning2 += nm[iTone + 2]*one_over_N; |
1036 float ratioOld = 0.997; | 1037 float ratioOld = 0.997; |
1037 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld); | 1038 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld); |
1038 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld); | 1039 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld); |
1039 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld); | 1040 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld); |
1040 } | 1041 } |
1041 | 1042 |
1042 // if (m_tuneLocal) { | 1043 // if (m_tuneLocal) { |
1043 // local tuning | 1044 // local tuning |
1044 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2; | 1045 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2; |
1045 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2; | 1046 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2; |
1046 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI); | 1047 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI); |
1047 m_localTuning.push_back(normalisedtuning); | 1048 m_localTuning.push_back(normalisedtuning); |
1048 float tuning440 = 440 * pow(2,normalisedtuning/12); | 1049 float tuning440 = 440 * pow(2,normalisedtuning/12); |
1049 f10.values.push_back(tuning440); | 1050 f10.values.push_back(tuning440); |
1050 // cerr << tuning440 << endl; | 1051 // cerr << tuning440 << endl; |
1051 // } | 1052 // } |
1052 | 1053 |
1053 Feature f1; // logfreqspec | 1054 Feature f1; // logfreqspec |
1054 f1.hasTimestamp = true; | 1055 f1.hasTimestamp = true; |
1055 f1.timestamp = timestamp; | 1056 f1.timestamp = timestamp; |
1056 for (size_t iNote = 0; iNote < nNote; iNote++) { | 1057 for (size_t iNote = 0; iNote < nNote; iNote++) { |
1057 f1.values.push_back(nm[iNote]); | 1058 f1.values.push_back(nm[iNote]); |
1058 } | 1059 } |
1059 | 1060 |
1060 FeatureSet fs; | 1061 FeatureSet fs; |
1061 fs[1].push_back(f1); | 1062 fs[1].push_back(f1); |
1062 fs[8].push_back(f10); | 1063 fs[8].push_back(f10); |
1063 | 1064 |
1064 // deletes | 1065 // deletes |
1065 delete[] magnitude; | 1066 delete[] magnitude; |
1066 delete[] nm; | 1067 delete[] nm; |
1067 | 1068 |
1068 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures | 1069 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures |
1069 char * pPath; | 1070 char * pPath; |
1070 pPath = getenv ("VAMP_PATH"); | 1071 pPath = getenv ("VAMP_PATH"); |
1071 | 1072 |
1072 | 1073 |
1073 return fs; | 1074 return fs; |
1074 } | 1075 } |
1075 | 1076 |
1076 NNLSChroma::FeatureSet | 1077 NNLSChroma::FeatureSet |
1077 NNLSChroma::getRemainingFeatures() | 1078 NNLSChroma::getRemainingFeatures() |
1078 { | 1079 { |
1079 if (debug_on) cerr << "--> getRemainingFeatures" << endl; | 1080 if (debug_on) cerr << "--> getRemainingFeatures" << endl; |
1080 FeatureSet fsOut; | 1081 FeatureSet fsOut; |
1081 if (m_fl.size() == 0) return fsOut; | 1082 if (m_fl.size() == 0) return fsOut; |
1082 int nChord = m_chordnames.size(); | 1083 int nChord = m_chordnames.size(); |
1083 // | 1084 // |
1084 /** Calculate Tuning | 1085 /** Calculate Tuning |
1085 calculate tuning from (using the angle of the complex number defined by the | 1086 calculate tuning from (using the angle of the complex number defined by the |
1086 cumulative mean real and imag values) | 1087 cumulative mean real and imag values) |
1087 **/ | 1088 **/ |
1088 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2; | 1089 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2; |
1089 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2; | 1090 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2; |
1090 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); | 1091 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); |
1091 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); | 1092 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); |
1092 int intShift = floor(normalisedtuning * 3); | 1093 int intShift = floor(normalisedtuning * 3); |
1093 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this | 1094 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this |
1094 | 1095 |
1095 char buffer0 [50]; | 1096 char buffer0 [50]; |
1096 | 1097 |
1097 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); | 1098 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); |
1098 | 1099 |
1099 // cerr << "normalisedtuning: " << normalisedtuning << '\n'; | 1100 // cerr << "normalisedtuning: " << normalisedtuning << '\n'; |
1100 | 1101 |
1101 // push tuning to FeatureSet fsOut | 1102 // push tuning to FeatureSet fsOut |
1102 Feature f0; // tuning | 1103 Feature f0; // tuning |
1103 f0.hasTimestamp = true; | 1104 f0.hasTimestamp = true; |
1104 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));; | 1105 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));; |
1105 f0.label = buffer0; | 1106 f0.label = buffer0; |
1106 fsOut[0].push_back(f0); | 1107 fsOut[0].push_back(f0); |
1107 | 1108 |
1108 /** Tune Log-Frequency Spectrogram | 1109 /** Tune Log-Frequency Spectrogram |
1109 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to | 1110 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to |
1110 perform linear interpolation on the existing log-frequency spectrogram (kinda f1). | 1111 perform linear interpolation on the existing log-frequency spectrogram (kinda f1). |
1111 **/ | 1112 **/ |
1112 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... "; | 1113 cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... "; |
1113 | 1114 |
1114 float tempValue = 0; | 1115 float tempValue = 0; |
1115 float dbThreshold = 0; // relative to the background spectrum | 1116 float dbThreshold = 0; // relative to the background spectrum |
1116 float thresh = pow(10,dbThreshold/20); | 1117 float thresh = pow(10,dbThreshold/20); |
1117 // cerr << "tune local ? " << m_tuneLocal << endl; | 1118 // cerr << "tune local ? " << m_tuneLocal << endl; |
1118 int count = 0; | 1119 int count = 0; |
1119 | 1120 |
1120 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) { | 1121 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) { |
1121 Feature f1 = *i; | 1122 Feature f1 = *i; |
1122 Feature f2; // tuned log-frequency spectrum | 1123 Feature f2; // tuned log-frequency spectrum |
1123 f2.hasTimestamp = true; | 1124 f2.hasTimestamp = true; |
1124 f2.timestamp = f1.timestamp; | 1125 f2.timestamp = f1.timestamp; |
1125 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero | 1126 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero |
1126 | 1127 |
1127 if (m_tuneLocal) { | 1128 if (m_tuneLocal) { |
1128 intShift = floor(m_localTuning[count] * 3); | 1129 intShift = floor(m_localTuning[count] * 3); |
1129 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this | 1130 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this |
1130 } | 1131 } |
1131 | 1132 |
1132 // cerr << intShift << " " << intFactor << endl; | 1133 // cerr << intShift << " " << intFactor << endl; |
1133 | 1134 |
1134 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins | 1135 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins |
1135 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor; | 1136 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor; |
1136 f2.values.push_back(tempValue); | 1137 f2.values.push_back(tempValue); |
1137 } | 1138 } |
1138 | 1139 |
1139 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge | 1140 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge |
1140 vector<float> runningmean = SpecialConvolution(f2.values,hw); | 1141 vector<float> runningmean = SpecialConvolution(f2.values,hw); |
1141 vector<float> runningstd; | 1142 vector<float> runningstd; |
1142 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance) | 1143 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance) |
1143 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i])); | 1144 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i])); |
1144 } | 1145 } |
1145 runningstd = SpecialConvolution(runningstd,hw); // second step convolve | 1146 runningstd = SpecialConvolution(runningstd,hw); // second step convolve |
1146 for (int i = 0; i < 256; i++) { | 1147 for (int i = 0; i < 256; i++) { |
1147 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std | 1148 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std |
1148 if (runningstd[i] > 0) { | 1149 if (runningstd[i] > 0) { |
1149 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ? | 1150 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ? |
1150 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0; | 1151 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0; |
1151 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ? | 1152 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ? |
1152 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0; | 1153 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0; |
1153 } | 1154 } |
1154 if (f2.values[i] < 0) { | 1155 if (f2.values[i] < 0) { |
1155 cerr << "ERROR: negative value in logfreq spectrum" << endl; | 1156 cerr << "ERROR: negative value in logfreq spectrum" << endl; |
1156 } | 1157 } |
1157 } | 1158 } |
1158 fsOut[2].push_back(f2); | 1159 fsOut[2].push_back(f2); |
1159 count++; | 1160 count++; |
1160 } | 1161 } |
1161 cerr << "done." << endl; | 1162 cerr << "done." << endl; |
1162 | 1163 |
1163 /** Semitone spectrum and chromagrams | 1164 /** Semitone spectrum and chromagrams |
1164 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum | 1165 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum |
1165 is inferred using a non-negative least squares algorithm. | 1166 is inferred using a non-negative least squares algorithm. |
1166 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means | 1167 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means |
1167 bass and treble stacked onto each other). | 1168 bass and treble stacked onto each other). |
1168 **/ | 1169 **/ |
1169 if (m_dictID == 1) { | 1170 if (m_dictID == 1) { |
1170 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... "; | 1171 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... "; |
1171 } else { | 1172 } else { |
1172 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... "; | 1173 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... "; |
1173 } | 1174 } |
1174 | 1175 |
1175 | 1176 |
1176 vector<vector<float> > chordogram; | 1177 vector<vector<float> > chordogram; |
1177 vector<vector<int> > scoreChordogram; | 1178 vector<vector<int> > scoreChordogram; |
1178 vector<float> chordchange = vector<float>(fsOut[2].size(),0); | 1179 vector<float> chordchange = vector<float>(fsOut[2].size(),0); |
1179 vector<float> oldchroma = vector<float>(12,0); | 1180 vector<float> oldchroma = vector<float>(12,0); |
1180 vector<float> oldbasschroma = vector<float>(12,0); | 1181 vector<float> oldbasschroma = vector<float>(12,0); |
1181 count = 0; | 1182 count = 0; |
1182 | 1183 |
1183 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) { | 1184 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) { |
1184 Feature f2 = *it; // logfreq spectrum | 1185 Feature f2 = *it; // logfreq spectrum |
1185 Feature f3; // semitone spectrum | 1186 Feature f3; // semitone spectrum |
1186 Feature f4; // treble chromagram | 1187 Feature f4; // treble chromagram |
1187 Feature f5; // bass chromagram | 1188 Feature f5; // bass chromagram |
1188 Feature f6; // treble and bass chromagram | 1189 Feature f6; // treble and bass chromagram |
1189 | 1190 |
1190 f3.hasTimestamp = true; | 1191 f3.hasTimestamp = true; |
1191 f3.timestamp = f2.timestamp; | 1192 f3.timestamp = f2.timestamp; |
1192 | 1193 |
1193 f4.hasTimestamp = true; | 1194 f4.hasTimestamp = true; |
1194 f4.timestamp = f2.timestamp; | 1195 f4.timestamp = f2.timestamp; |
1195 | 1196 |
1196 f5.hasTimestamp = true; | 1197 f5.hasTimestamp = true; |
1197 f5.timestamp = f2.timestamp; | 1198 f5.timestamp = f2.timestamp; |
1198 | 1199 |
1199 f6.hasTimestamp = true; | 1200 f6.hasTimestamp = true; |
1200 f6.timestamp = f2.timestamp; | 1201 f6.timestamp = f2.timestamp; |
1201 | 1202 |
1202 double b[256]; | 1203 double b[256]; |
1203 | 1204 |
1204 bool some_b_greater_zero = false; | 1205 bool some_b_greater_zero = false; |
1205 float sumb = 0; | 1206 float sumb = 0; |
1206 for (int i = 0; i < 256; i++) { | 1207 for (int i = 0; i < 256; i++) { |
1207 // b[i] = m_dict[(256 * count + i) % (256 * 84)]; | 1208 // b[i] = m_dict[(256 * count + i) % (256 * 84)]; |
1208 b[i] = f2.values[i]; | 1209 b[i] = f2.values[i]; |
1209 sumb += b[i]; | 1210 sumb += b[i]; |
1210 if (b[i] > 0) { | 1211 if (b[i] > 0) { |
1211 some_b_greater_zero = true; | 1212 some_b_greater_zero = true; |
1212 } | 1213 } |
1213 } | 1214 } |
1214 | 1215 |
1215 // here's where the non-negative least squares algorithm calculates the note activation x | 1216 // here's where the non-negative least squares algorithm calculates the note activation x |
1216 | 1217 |
1217 vector<float> chroma = vector<float>(12, 0); | 1218 vector<float> chroma = vector<float>(12, 0); |
1218 vector<float> basschroma = vector<float>(12, 0); | 1219 vector<float> basschroma = vector<float>(12, 0); |
1219 float currval; | 1220 float currval; |
1220 unsigned iSemitone = 0; | 1221 unsigned iSemitone = 0; |
1221 | 1222 |
1222 if (some_b_greater_zero) { | 1223 if (some_b_greater_zero) { |
1223 if (m_dictID == 1) { | 1224 if (m_dictID == 1) { |
1224 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { | 1225 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { |
1225 currval = 0; | 1226 currval = 0; |
1226 currval += b[iNote + 1 + -1] * 0.5; | 1227 currval += b[iNote + 1 + -1] * 0.5; |
1227 currval += b[iNote + 1 + 0] * 1.0; | 1228 currval += b[iNote + 1 + 0] * 1.0; |
1228 currval += b[iNote + 1 + 1] * 0.5; | 1229 currval += b[iNote + 1 + 1] * 0.5; |
1229 f3.values.push_back(currval); | 1230 f3.values.push_back(currval); |
1230 chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; | 1231 chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; |
1231 basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; | 1232 basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; |
1232 iSemitone++; | 1233 iSemitone++; |
1233 } | 1234 } |
1234 | 1235 |
1235 } else { | 1236 } else { |
1236 double x[84+1000]; | 1237 double x[84+1000]; |
1237 for (int i = 1; i < 1084; ++i) x[i] = 1.0; | 1238 for (int i = 1; i < 1084; ++i) x[i] = 1.0; |
1238 vector<int> signifIndex; | 1239 vector<int> signifIndex; |
1239 int index=0; | 1240 int index=0; |
1240 sumb /= 84.0; | 1241 sumb /= 84.0; |
1241 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { | 1242 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) { |
1242 float currval = 0; | 1243 float currval = 0; |
1243 currval += b[iNote + 1 + -1]; | 1244 currval += b[iNote + 1 + -1]; |
1244 currval += b[iNote + 1 + 0]; | 1245 currval += b[iNote + 1 + 0]; |
1245 currval += b[iNote + 1 + 1]; | 1246 currval += b[iNote + 1 + 1]; |
1246 if (currval > 0) signifIndex.push_back(index); | 1247 if (currval > 0) signifIndex.push_back(index); |
1247 f3.values.push_back(0); // fill the values, change later | 1248 f3.values.push_back(0); // fill the values, change later |
1248 index++; | 1249 index++; |
1249 } | 1250 } |
1250 double rnorm; | 1251 double rnorm; |
1251 double w[84+1000]; | 1252 double w[84+1000]; |
1252 double zz[84+1000]; | 1253 double zz[84+1000]; |
1253 int indx[84+1000]; | 1254 int indx[84+1000]; |
1254 int mode; | 1255 int mode; |
1255 int dictsize = 256*signifIndex.size(); | 1256 int dictsize = 256*signifIndex.size(); |
1256 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl; | 1257 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl; |
1257 double *curr_dict = new double[dictsize]; | 1258 double *curr_dict = new double[dictsize]; |
1258 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { | 1259 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { |
1259 for (unsigned iBin = 0; iBin < 256; iBin++) { | 1260 for (unsigned iBin = 0; iBin < 256; iBin++) { |
1260 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin]; | 1261 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin]; |
1261 } | 1262 } |
1262 } | 1263 } |
1263 int sz = signifIndex.size(); | 1264 int sz = signifIndex.size(); |
1264 int nn = nNote; | 1265 int nn = nNote; |
1265 NNLS(curr_dict, &nn, &nn, &sz, b, x, &rnorm, w, zz, indx, &mode); | 1266 NNLS(curr_dict, &nn, &nn, &sz, b, x, &rnorm, w, zz, indx, &mode); |
1266 delete [] curr_dict; | 1267 delete [] curr_dict; |
1267 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { | 1268 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { |
1268 f3.values[signifIndex[iNote]] = x[iNote]; | 1269 f3.values[signifIndex[iNote]] = x[iNote]; |
1269 // cerr << mode << endl; | 1270 // cerr << mode << endl; |
1270 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; | 1271 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; |
1271 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; | 1272 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; |
1272 } | 1273 } |
1273 } | 1274 } |
1274 } | 1275 } |
1275 | 1276 |
1276 | 1277 |
1277 | 1278 |
1278 | 1279 |
1279 f4.values = chroma; | 1280 f4.values = chroma; |
1280 f5.values = basschroma; | 1281 f5.values = basschroma; |
1281 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas | 1282 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas |
1282 f6.values = chroma; | 1283 f6.values = chroma; |
1283 | 1284 |
1284 if (m_doNormalizeChroma > 0) { | 1285 if (m_doNormalizeChroma > 0) { |
1285 vector<float> chromanorm = vector<float>(3,0); | 1286 vector<float> chromanorm = vector<float>(3,0); |
1286 switch (int(m_doNormalizeChroma)) { | 1287 switch (int(m_doNormalizeChroma)) { |
1287 case 0: // should never end up here | 1288 case 0: // should never end up here |
1288 break; | 1289 break; |
1289 case 1: | 1290 case 1: |
1290 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end()); | 1291 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end()); |
1291 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end()); | 1292 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end()); |
1292 chromanorm[2] = max(chromanorm[0], chromanorm[1]); | 1293 chromanorm[2] = max(chromanorm[0], chromanorm[1]); |
1293 break; | 1294 break; |
1294 case 2: | 1295 case 2: |
1295 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) { | 1296 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) { |
1296 chromanorm[0] += *it; | 1297 chromanorm[0] += *it; |
1297 } | 1298 } |
1298 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) { | 1299 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) { |
1299 chromanorm[1] += *it; | 1300 chromanorm[1] += *it; |
1300 } | 1301 } |
1301 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) { | 1302 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) { |
1302 chromanorm[2] += *it; | 1303 chromanorm[2] += *it; |
1303 } | 1304 } |
1304 break; | 1305 break; |
1305 case 3: | 1306 case 3: |
1306 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) { | 1307 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) { |
1307 chromanorm[0] += pow(*it,2); | 1308 chromanorm[0] += pow(*it,2); |
1308 } | 1309 } |
1309 chromanorm[0] = sqrt(chromanorm[0]); | 1310 chromanorm[0] = sqrt(chromanorm[0]); |
1310 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) { | 1311 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) { |
1311 chromanorm[1] += pow(*it,2); | 1312 chromanorm[1] += pow(*it,2); |
1312 } | 1313 } |
1313 chromanorm[1] = sqrt(chromanorm[1]); | 1314 chromanorm[1] = sqrt(chromanorm[1]); |
1314 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) { | 1315 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) { |
1315 chromanorm[2] += pow(*it,2); | 1316 chromanorm[2] += pow(*it,2); |
1316 } | 1317 } |
1317 chromanorm[2] = sqrt(chromanorm[2]); | 1318 chromanorm[2] = sqrt(chromanorm[2]); |
1318 break; | 1319 break; |
1319 } | 1320 } |
1320 if (chromanorm[0] > 0) { | 1321 if (chromanorm[0] > 0) { |
1321 for (int i = 0; i < f4.values.size(); i++) { | 1322 for (int i = 0; i < f4.values.size(); i++) { |
1322 f4.values[i] /= chromanorm[0]; | 1323 f4.values[i] /= chromanorm[0]; |
1323 } | 1324 } |
1324 } | 1325 } |
1325 if (chromanorm[1] > 0) { | 1326 if (chromanorm[1] > 0) { |
1326 for (int i = 0; i < f5.values.size(); i++) { | 1327 for (int i = 0; i < f5.values.size(); i++) { |
1327 f5.values[i] /= chromanorm[1]; | 1328 f5.values[i] /= chromanorm[1]; |
1328 } | 1329 } |
1329 } | 1330 } |
1330 if (chromanorm[2] > 0) { | 1331 if (chromanorm[2] > 0) { |
1331 for (int i = 0; i < f6.values.size(); i++) { | 1332 for (int i = 0; i < f6.values.size(); i++) { |
1332 f6.values[i] /= chromanorm[2]; | 1333 f6.values[i] /= chromanorm[2]; |
1333 } | 1334 } |
1334 } | 1335 } |
1335 | 1336 |
1336 } | 1337 } |
1337 | 1338 |
1338 // local chord estimation | 1339 // local chord estimation |
1339 vector<float> currentChordSalience; | 1340 vector<float> currentChordSalience; |
1340 float tempchordvalue = 0; | 1341 float tempchordvalue = 0; |
1341 float sumchordvalue = 0; | 1342 float sumchordvalue = 0; |
1342 | 1343 |
1343 for (int iChord = 0; iChord < nChord; iChord++) { | 1344 for (int iChord = 0; iChord < nChord; iChord++) { |
1344 tempchordvalue = 0; | 1345 tempchordvalue = 0; |
1345 for (int iBin = 0; iBin < 12; iBin++) { | 1346 for (int iBin = 0; iBin < 12; iBin++) { |
1346 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; | 1347 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; |
1347 } | 1348 } |
1348 for (int iBin = 12; iBin < 24; iBin++) { | 1349 for (int iBin = 12; iBin < 24; iBin++) { |
1349 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; | 1350 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin]; |
1350 } | 1351 } |
1351 sumchordvalue+=tempchordvalue; | 1352 sumchordvalue+=tempchordvalue; |
1352 currentChordSalience.push_back(tempchordvalue); | 1353 currentChordSalience.push_back(tempchordvalue); |
1353 } | 1354 } |
1354 if (sumchordvalue > 0) { | 1355 if (sumchordvalue > 0) { |
1355 for (int iChord = 0; iChord < nChord; iChord++) { | 1356 for (int iChord = 0; iChord < nChord; iChord++) { |
1356 currentChordSalience[iChord] /= sumchordvalue; | 1357 currentChordSalience[iChord] /= sumchordvalue; |
1357 } | 1358 } |
1358 } else { | 1359 } else { |
1359 currentChordSalience[nChord-1] = 1.0; | 1360 currentChordSalience[nChord-1] = 1.0; |
1360 } | 1361 } |
1361 chordogram.push_back(currentChordSalience); | 1362 chordogram.push_back(currentChordSalience); |
1362 | 1363 |
1363 fsOut[3].push_back(f3); | 1364 fsOut[3].push_back(f3); |
1364 fsOut[4].push_back(f4); | 1365 fsOut[4].push_back(f4); |
1365 fsOut[5].push_back(f5); | 1366 fsOut[5].push_back(f5); |
1366 fsOut[6].push_back(f6); | 1367 fsOut[6].push_back(f6); |
1367 count++; | 1368 count++; |
1368 } | 1369 } |
1369 cerr << "done." << endl; | 1370 cerr << "done." << endl; |
1370 | 1371 |
1371 | 1372 |
1372 /* Simple chord estimation | 1373 /* Simple chord estimation |
1373 I just take the local chord estimates ("currentChordSalience") and average them over time, then | 1374 I just take the local chord estimates ("currentChordSalience") and average them over time, then |
1374 take the maximum. Very simple, don't do this at home... | 1375 take the maximum. Very simple, don't do this at home... |
1375 */ | 1376 */ |
1376 cerr << "[NNLS Chroma Plugin] Chord Estimation ... "; | 1377 cerr << "[NNLS Chroma Plugin] Chord Estimation ... "; |
1377 count = 0; | 1378 count = 0; |
1378 int halfwindowlength = m_inputSampleRate / m_stepSize; | 1379 int halfwindowlength = m_inputSampleRate / m_stepSize; |
1379 vector<int> chordSequence; | 1380 vector<int> chordSequence; |
1380 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram | 1381 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram |
1381 vector<int> temp = vector<int>(nChord,0); | 1382 vector<int> temp = vector<int>(nChord,0); |
1382 scoreChordogram.push_back(temp); | 1383 scoreChordogram.push_back(temp); |
1383 } | 1384 } |
1384 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) { | 1385 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) { |
1385 int startIndex = count + 1; | 1386 int startIndex = count + 1; |
1386 int endIndex = count + 2 * halfwindowlength; | 1387 int endIndex = count + 2 * halfwindowlength; |
1387 | 1388 |
1388 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1); | 1389 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1); |
1389 | 1390 |
1390 vector<int> chordCandidates; | 1391 vector<int> chordCandidates; |
1391 for (unsigned iChord = 0; iChord < nChord-1; iChord++) { | 1392 for (unsigned iChord = 0; iChord < nChord-1; iChord++) { |
1392 // float currsum = 0; | 1393 // float currsum = 0; |
1393 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { | 1394 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { |
1394 // currsum += chordogram[iFrame][iChord]; | 1395 // currsum += chordogram[iFrame][iChord]; |
1395 // } | 1396 // } |
1396 // if (currsum > chordThreshold) chordCandidates.push_back(iChord); | 1397 // if (currsum > chordThreshold) chordCandidates.push_back(iChord); |
1397 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { | 1398 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) { |
1398 if (chordogram[iFrame][iChord] > chordThreshold) { | 1399 if (chordogram[iFrame][iChord] > chordThreshold) { |
1399 chordCandidates.push_back(iChord); | 1400 chordCandidates.push_back(iChord); |
1400 break; | 1401 break; |
1401 } | 1402 } |
1402 } | 1403 } |
1403 } | 1404 } |
1404 chordCandidates.push_back(nChord-1); | 1405 chordCandidates.push_back(nChord-1); |
1405 // cerr << chordCandidates.size() << endl; | 1406 // cerr << chordCandidates.size() << endl; |
1406 | 1407 |
1407 float maxval = 0; // will be the value of the most salient *chord change* in this frame | 1408 float maxval = 0; // will be the value of the most salient *chord change* in this frame |
1408 float maxindex = 0; //... and the index thereof | 1409 float maxindex = 0; //... and the index thereof |
1409 unsigned bestchordL = nChord-1; // index of the best "left" chord | 1410 unsigned bestchordL = nChord-1; // index of the best "left" chord |
1410 unsigned bestchordR = nChord-1; // index of the best "right" chord | 1411 unsigned bestchordR = nChord-1; // index of the best "right" chord |
1411 | 1412 |
1412 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { | 1413 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) { |
1413 // now find the max values on both sides of iWF | 1414 // now find the max values on both sides of iWF |
1414 // left side: | 1415 // left side: |
1415 float maxL = 0; | 1416 float maxL = 0; |
1416 unsigned maxindL = nChord-1; | 1417 unsigned maxindL = nChord-1; |
1417 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { | 1418 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { |
1418 unsigned iChord = chordCandidates[kChord]; | 1419 unsigned iChord = chordCandidates[kChord]; |
1419 float currsum = 0; | 1420 float currsum = 0; |
1420 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) { | 1421 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) { |
1421 currsum += chordogram[count+iFrame][iChord]; | 1422 currsum += chordogram[count+iFrame][iChord]; |
1422 } | 1423 } |
1423 if (iChord == nChord-1) currsum *= 0.8; | 1424 if (iChord == nChord-1) currsum *= 0.8; |
1424 if (currsum > maxL) { | 1425 if (currsum > maxL) { |
1425 maxL = currsum; | 1426 maxL = currsum; |
1426 maxindL = iChord; | 1427 maxindL = iChord; |
1427 } | 1428 } |
1428 } | 1429 } |
1429 // right side: | 1430 // right side: |
1430 float maxR = 0; | 1431 float maxR = 0; |
1431 unsigned maxindR = nChord-1; | 1432 unsigned maxindR = nChord-1; |
1432 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { | 1433 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) { |
1433 unsigned iChord = chordCandidates[kChord]; | 1434 unsigned iChord = chordCandidates[kChord]; |
1434 float currsum = 0; | 1435 float currsum = 0; |
1435 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) { | 1436 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) { |
1436 currsum += chordogram[count+iFrame][iChord]; | 1437 currsum += chordogram[count+iFrame][iChord]; |
1437 } | 1438 } |
1438 if (iChord == nChord-1) currsum *= 0.8; | 1439 if (iChord == nChord-1) currsum *= 0.8; |
1439 if (currsum > maxR) { | 1440 if (currsum > maxR) { |
1440 maxR = currsum; | 1441 maxR = currsum; |
1441 maxindR = iChord; | 1442 maxindR = iChord; |
1442 } | 1443 } |
1443 } | 1444 } |
1444 if (maxL+maxR > maxval) { | 1445 if (maxL+maxR > maxval) { |
1445 maxval = maxL+maxR; | 1446 maxval = maxL+maxR; |
1446 maxindex = iWF; | 1447 maxindex = iWF; |
1447 bestchordL = maxindL; | 1448 bestchordL = maxindL; |
1448 bestchordR = maxindR; | 1449 bestchordR = maxindR; |
1449 } | 1450 } |
1450 | 1451 |
1451 } | 1452 } |
1452 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl; | 1453 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl; |
1453 // add a score to every chord-frame-point that was part of a maximum | 1454 // add a score to every chord-frame-point that was part of a maximum |
1454 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) { | 1455 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) { |
1455 scoreChordogram[iFrame+count][bestchordL]++; | 1456 scoreChordogram[iFrame+count][bestchordL]++; |
1456 } | 1457 } |
1457 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { | 1458 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { |
1458 scoreChordogram[iFrame+count][bestchordR]++; | 1459 scoreChordogram[iFrame+count][bestchordR]++; |
1459 } | 1460 } |
1460 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; | 1461 if (bestchordL != bestchordR) chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength; |
1461 count++; | 1462 count++; |
1462 } | 1463 } |
1463 // cerr << "******* agent finished *******" << endl; | 1464 // cerr << "******* agent finished *******" << endl; |
1464 count = 0; | 1465 count = 0; |
1465 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { | 1466 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { |
1466 float maxval = 0; // will be the value of the most salient chord in this frame | 1467 float maxval = 0; // will be the value of the most salient chord in this frame |
1467 float maxindex = 0; //... and the index thereof | 1468 float maxindex = 0; //... and the index thereof |
1468 for (unsigned iChord = 0; iChord < nChord; iChord++) { | 1469 for (unsigned iChord = 0; iChord < nChord; iChord++) { |
1469 if (scoreChordogram[count][iChord] > maxval) { | 1470 if (scoreChordogram[count][iChord] > maxval) { |
1470 maxval = scoreChordogram[count][iChord]; | 1471 maxval = scoreChordogram[count][iChord]; |
1471 maxindex = iChord; | 1472 maxindex = iChord; |
1472 // cerr << iChord << endl; | 1473 // cerr << iChord << endl; |
1473 } | 1474 } |
1474 } | 1475 } |
1475 chordSequence.push_back(maxindex); | 1476 chordSequence.push_back(maxindex); |
1476 // cerr << "before modefilter, maxindex: " << maxindex << endl; | 1477 // cerr << "before modefilter, maxindex: " << maxindex << endl; |
1477 count++; | 1478 count++; |
1478 } | 1479 } |
1479 // cerr << "******* mode filter done *******" << endl; | 1480 // cerr << "******* mode filter done *******" << endl; |
1480 | 1481 |
1481 | 1482 |
1482 // mode filter on chordSequence | 1483 // mode filter on chordSequence |
1483 count = 0; | 1484 count = 0; |
1484 string oldChord = ""; | 1485 string oldChord = ""; |
1485 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { | 1486 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { |
1486 Feature f6 = *it; | 1487 Feature f6 = *it; |
1487 Feature f7; // chord estimate | 1488 Feature f7; // chord estimate |
1488 f7.hasTimestamp = true; | 1489 f7.hasTimestamp = true; |
1489 f7.timestamp = f6.timestamp; | 1490 f7.timestamp = f6.timestamp; |
1490 Feature f8; // chord estimate | 1491 Feature f8; // chord estimate |
1491 f8.hasTimestamp = true; | 1492 f8.hasTimestamp = true; |
1492 f8.timestamp = f6.timestamp; | 1493 f8.timestamp = f6.timestamp; |
1493 | 1494 |
1494 vector<int> chordCount = vector<int>(nChord,0); | 1495 vector<int> chordCount = vector<int>(nChord,0); |
1495 int maxChordCount = 0; | 1496 int maxChordCount = 0; |
1496 int maxChordIndex = nChord-1; | 1497 int maxChordIndex = nChord-1; |
1497 string maxChord; | 1498 string maxChord; |
1498 int startIndex = max(count - halfwindowlength/2,0); | 1499 int startIndex = max(count - halfwindowlength/2,0); |
1499 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2); | 1500 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2); |
1500 for (int i = startIndex; i < endIndex; i++) { | 1501 for (int i = startIndex; i < endIndex; i++) { |
1501 chordCount[chordSequence[i]]++; | 1502 chordCount[chordSequence[i]]++; |
1502 if (chordCount[chordSequence[i]] > maxChordCount) { | 1503 if (chordCount[chordSequence[i]] > maxChordCount) { |
1503 // cerr << "start index " << startIndex << endl; | 1504 // cerr << "start index " << startIndex << endl; |
1504 maxChordCount++; | 1505 maxChordCount++; |
1505 maxChordIndex = chordSequence[i]; | 1506 maxChordIndex = chordSequence[i]; |
1506 maxChord = m_chordnames[maxChordIndex]; | 1507 maxChord = m_chordnames[maxChordIndex]; |
1507 } | 1508 } |
1508 } | 1509 } |
1509 // chordSequence[count] = maxChordIndex; | 1510 // chordSequence[count] = maxChordIndex; |
1510 // cerr << maxChordIndex << endl; | 1511 // cerr << maxChordIndex << endl; |
1511 f8.values.push_back(chordchange[count]/(halfwindowlength*2)); | 1512 f8.values.push_back(chordchange[count]/(halfwindowlength*2)); |
1512 // cerr << chordchange[count] << endl; | 1513 // cerr << chordchange[count] << endl; |
1513 fsOut[9].push_back(f8); | 1514 fsOut[9].push_back(f8); |
1514 if (oldChord != maxChord) { | 1515 if (oldChord != maxChord) { |
1515 oldChord = maxChord; | 1516 oldChord = maxChord; |
1516 | 1517 |
1517 // char buffer1 [50]; | 1518 // char buffer1 [50]; |
1518 // if (maxChordIndex < nChord - 1) { | 1519 // if (maxChordIndex < nChord - 1) { |
1519 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]); | 1520 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]); |
1520 // } else { | 1521 // } else { |
1521 // sprintf(buffer1, "N"); | 1522 // sprintf(buffer1, "N"); |
1522 // } | 1523 // } |
1523 // f7.label = buffer1; | 1524 // f7.label = buffer1; |
1524 f7.label = m_chordnames[maxChordIndex]; | 1525 f7.label = m_chordnames[maxChordIndex]; |
1525 fsOut[7].push_back(f7); | 1526 fsOut[7].push_back(f7); |
1526 } | 1527 } |
1527 count++; | 1528 count++; |
1528 } | 1529 } |
1529 Feature f7; // last chord estimate | 1530 Feature f7; // last chord estimate |
1530 f7.hasTimestamp = true; | 1531 f7.hasTimestamp = true; |
1531 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp; | 1532 f7.timestamp = fsOut[6][fsOut[6].size()-1].timestamp; |
1532 f7.label = "N"; | 1533 f7.label = "N"; |
1533 fsOut[7].push_back(f7); | 1534 fsOut[7].push_back(f7); |
1534 cerr << "done." << endl; | 1535 cerr << "done." << endl; |
1535 // // musicity | 1536 // // musicity |
1536 // count = 0; | 1537 // count = 0; |
1537 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2 | 1538 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2 |
1538 // vector<float> musicityValue; | 1539 // vector<float> musicityValue; |
1539 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { | 1540 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { |
1540 // Feature f4 = *it; | 1541 // Feature f4 = *it; |
1541 // | 1542 // |
1542 // int startIndex = max(count - musicitykernelwidth/2,0); | 1543 // int startIndex = max(count - musicitykernelwidth/2,0); |
1543 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); | 1544 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); |
1544 // float chromasum = 0; | 1545 // float chromasum = 0; |
1545 // float diffsum = 0; | 1546 // float diffsum = 0; |
1546 // for (int k = 0; k < 12; k++) { | 1547 // for (int k = 0; k < 12; k++) { |
1547 // for (int i = startIndex + 1; i < endIndex; i++) { | 1548 // for (int i = startIndex + 1; i < endIndex; i++) { |
1548 // chromasum += pow(fsOut[4][i].values[k],2); | 1549 // chromasum += pow(fsOut[4][i].values[k],2); |
1549 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]); | 1550 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]); |
1550 // } | 1551 // } |
1551 // } | 1552 // } |
1552 // diffsum /= chromasum; | 1553 // diffsum /= chromasum; |
1553 // musicityValue.push_back(diffsum); | 1554 // musicityValue.push_back(diffsum); |
1554 // count++; | 1555 // count++; |
1555 // } | 1556 // } |
1556 // | 1557 // |
1557 // float musicityThreshold = 0.44; | 1558 // float musicityThreshold = 0.44; |
1558 // if (m_stepSize == 4096) { | 1559 // if (m_stepSize == 4096) { |
1559 // musicityThreshold = 0.74; | 1560 // musicityThreshold = 0.74; |
1560 // } | 1561 // } |
1561 // if (m_stepSize == 4410) { | 1562 // if (m_stepSize == 4410) { |
1562 // musicityThreshold = 0.77; | 1563 // musicityThreshold = 0.77; |
1563 // } | 1564 // } |
1564 // | 1565 // |
1565 // count = 0; | 1566 // count = 0; |
1566 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { | 1567 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { |
1567 // Feature f4 = *it; | 1568 // Feature f4 = *it; |
1568 // Feature f8; // musicity | 1569 // Feature f8; // musicity |
1569 // Feature f9; // musicity segmenter | 1570 // Feature f9; // musicity segmenter |
1570 // | 1571 // |
1571 // f8.hasTimestamp = true; | 1572 // f8.hasTimestamp = true; |
1572 // f8.timestamp = f4.timestamp; | 1573 // f8.timestamp = f4.timestamp; |
1573 // f9.hasTimestamp = true; | 1574 // f9.hasTimestamp = true; |
1574 // f9.timestamp = f4.timestamp; | 1575 // f9.timestamp = f4.timestamp; |
1575 // | 1576 // |
1576 // int startIndex = max(count - musicitykernelwidth/2,0); | 1577 // int startIndex = max(count - musicitykernelwidth/2,0); |
1577 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); | 1578 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1); |
1578 // int musicityCount = 0; | 1579 // int musicityCount = 0; |
1579 // for (int i = startIndex; i <= endIndex; i++) { | 1580 // for (int i = startIndex; i <= endIndex; i++) { |
1580 // if (musicityValue[i] > musicityThreshold) musicityCount++; | 1581 // if (musicityValue[i] > musicityThreshold) musicityCount++; |
1581 // } | 1582 // } |
1582 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1); | 1583 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1); |
1583 // | 1584 // |
1584 // if (isSpeech) { | 1585 // if (isSpeech) { |
1585 // if (oldlabeltype != 2) { | 1586 // if (oldlabeltype != 2) { |
1586 // f9.label = "Speech"; | 1587 // f9.label = "Speech"; |
1587 // fsOut[9].push_back(f9); | 1588 // fsOut[9].push_back(f9); |
1588 // oldlabeltype = 2; | 1589 // oldlabeltype = 2; |
1589 // } | 1590 // } |
1590 // } else { | 1591 // } else { |
1591 // if (oldlabeltype != 1) { | 1592 // if (oldlabeltype != 1) { |
1592 // f9.label = "Music"; | 1593 // f9.label = "Music"; |
1593 // fsOut[9].push_back(f9); | 1594 // fsOut[9].push_back(f9); |
1594 // oldlabeltype = 1; | 1595 // oldlabeltype = 1; |
1595 // } | 1596 // } |
1596 // } | 1597 // } |
1597 // f8.values.push_back(musicityValue[count]); | 1598 // f8.values.push_back(musicityValue[count]); |
1598 // fsOut[8].push_back(f8); | 1599 // fsOut[8].push_back(f8); |
1599 // count++; | 1600 // count++; |
1600 // } | 1601 // } |
1601 return fsOut; | 1602 return fsOut; |
1602 | 1603 |
1603 } | 1604 } |
1604 | 1605 |