Chris@0
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@7
|
2 /*
|
Chris@7
|
3 Permission is hereby granted, free of charge, to any person
|
Chris@7
|
4 obtaining a copy of this software and associated documentation
|
Chris@7
|
5 files (the "Software"), to deal in the Software without
|
Chris@7
|
6 restriction, including without limitation the rights to use, copy,
|
Chris@7
|
7 modify, merge, publish, distribute, sublicense, and/or sell copies
|
Chris@7
|
8 of the Software, and to permit persons to whom the Software is
|
Chris@7
|
9 furnished to do so, subject to the following conditions:
|
Chris@7
|
10
|
Chris@7
|
11 The above copyright notice and this permission notice shall be
|
Chris@7
|
12 included in all copies or substantial portions of the Software.
|
Chris@7
|
13
|
Chris@7
|
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
Chris@7
|
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
Chris@7
|
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
Chris@7
|
17 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
|
Chris@7
|
18 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
|
Chris@7
|
19 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
Chris@7
|
20 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
Chris@7
|
21 */
|
Chris@0
|
22
|
Chris@0
|
23 #include "SimpleCepstrum.h"
|
Chris@0
|
24
|
Chris@33
|
25 #include "vamp-sdk/FFT.h"
|
Chris@33
|
26
|
Chris@0
|
27 #include <vector>
|
Chris@0
|
28 #include <algorithm>
|
Chris@0
|
29
|
Chris@0
|
30 #include <cstdio>
|
Chris@0
|
31 #include <cmath>
|
Chris@4
|
32 #include <complex>
|
Chris@0
|
33
|
Chris@33
|
34 #if ( VAMP_SDK_MAJOR_VERSION < 2 || ( VAMP_SDK_MAJOR_VERSION == 2 && VAMP_SDK_MINOR_VERSION < 4 ) )
|
Chris@33
|
35 #error Vamp SDK version 2.4 or newer required
|
Chris@33
|
36 #endif
|
Chris@33
|
37
|
Chris@0
|
38 using std::string;
|
Chris@0
|
39
|
Chris@0
|
40 SimpleCepstrum::SimpleCepstrum(float inputSampleRate) :
|
Chris@0
|
41 Plugin(inputSampleRate),
|
Chris@0
|
42 m_channels(0),
|
Chris@0
|
43 m_stepSize(256),
|
Chris@0
|
44 m_blockSize(1024),
|
Chris@0
|
45 m_fmin(50),
|
Chris@0
|
46 m_fmax(1000),
|
Chris@10
|
47 m_histlen(1),
|
Chris@10
|
48 m_vflen(1),
|
Chris@3
|
49 m_clamp(false),
|
Chris@5
|
50 m_method(InverseSymmetric),
|
Chris@5
|
51 m_binFrom(0),
|
Chris@5
|
52 m_binTo(0),
|
Chris@5
|
53 m_bins(0),
|
Chris@5
|
54 m_history(0)
|
Chris@0
|
55 {
|
Chris@0
|
56 }
|
Chris@0
|
57
|
Chris@0
|
58 SimpleCepstrum::~SimpleCepstrum()
|
Chris@0
|
59 {
|
Chris@5
|
60 if (m_history) {
|
Chris@5
|
61 for (int i = 0; i < m_histlen; ++i) {
|
Chris@5
|
62 delete[] m_history[i];
|
Chris@5
|
63 }
|
Chris@5
|
64 delete[] m_history;
|
Chris@5
|
65 }
|
Chris@0
|
66 }
|
Chris@0
|
67
|
Chris@0
|
68 string
|
Chris@0
|
69 SimpleCepstrum::getIdentifier() const
|
Chris@0
|
70 {
|
Chris@0
|
71 return "simple-cepstrum";
|
Chris@0
|
72 }
|
Chris@0
|
73
|
Chris@0
|
74 string
|
Chris@0
|
75 SimpleCepstrum::getName() const
|
Chris@0
|
76 {
|
Chris@0
|
77 return "Simple Cepstrum";
|
Chris@0
|
78 }
|
Chris@0
|
79
|
Chris@0
|
80 string
|
Chris@0
|
81 SimpleCepstrum::getDescription() const
|
Chris@0
|
82 {
|
Chris@2
|
83 return "Return simple cepstral data from DFT bins. This plugin is intended for casual inspection of cepstral data. It returns a lot of different sorts of data and is quite slow; it's not a good way to extract a single feature rapidly.";
|
Chris@0
|
84 }
|
Chris@0
|
85
|
Chris@0
|
86 string
|
Chris@0
|
87 SimpleCepstrum::getMaker() const
|
Chris@0
|
88 {
|
Chris@7
|
89 return "Chris Cannam";
|
Chris@0
|
90 }
|
Chris@0
|
91
|
Chris@0
|
92 int
|
Chris@0
|
93 SimpleCepstrum::getPluginVersion() const
|
Chris@0
|
94 {
|
Chris@0
|
95 // Increment this each time you release a version that behaves
|
Chris@0
|
96 // differently from the previous one
|
Chris@0
|
97 return 1;
|
Chris@0
|
98 }
|
Chris@0
|
99
|
Chris@0
|
100 string
|
Chris@0
|
101 SimpleCepstrum::getCopyright() const
|
Chris@0
|
102 {
|
Chris@7
|
103 return "Freely redistributable (BSD license)";
|
Chris@0
|
104 }
|
Chris@0
|
105
|
Chris@0
|
106 SimpleCepstrum::InputDomain
|
Chris@0
|
107 SimpleCepstrum::getInputDomain() const
|
Chris@0
|
108 {
|
Chris@0
|
109 return FrequencyDomain;
|
Chris@0
|
110 }
|
Chris@0
|
111
|
Chris@0
|
112 size_t
|
Chris@0
|
113 SimpleCepstrum::getPreferredBlockSize() const
|
Chris@0
|
114 {
|
Chris@0
|
115 return 1024;
|
Chris@0
|
116 }
|
Chris@0
|
117
|
Chris@0
|
118 size_t
|
Chris@0
|
119 SimpleCepstrum::getPreferredStepSize() const
|
Chris@0
|
120 {
|
Chris@0
|
121 return 256;
|
Chris@0
|
122 }
|
Chris@0
|
123
|
Chris@0
|
124 size_t
|
Chris@0
|
125 SimpleCepstrum::getMinChannelCount() const
|
Chris@0
|
126 {
|
Chris@0
|
127 return 1;
|
Chris@0
|
128 }
|
Chris@0
|
129
|
Chris@0
|
130 size_t
|
Chris@0
|
131 SimpleCepstrum::getMaxChannelCount() const
|
Chris@0
|
132 {
|
Chris@0
|
133 return 1;
|
Chris@0
|
134 }
|
Chris@0
|
135
|
Chris@0
|
136 SimpleCepstrum::ParameterList
|
Chris@0
|
137 SimpleCepstrum::getParameterDescriptors() const
|
Chris@0
|
138 {
|
Chris@0
|
139 ParameterList list;
|
Chris@0
|
140
|
Chris@0
|
141 ParameterDescriptor d;
|
Chris@0
|
142
|
Chris@0
|
143 d.identifier = "fmin";
|
Chris@0
|
144 d.name = "Minimum frequency";
|
Chris@0
|
145 d.description = "";
|
Chris@0
|
146 d.unit = "Hz";
|
Chris@0
|
147 d.minValue = m_inputSampleRate / m_blockSize;
|
Chris@0
|
148 d.maxValue = m_inputSampleRate / 2;
|
Chris@0
|
149 d.defaultValue = 50;
|
Chris@0
|
150 d.isQuantized = false;
|
Chris@0
|
151 list.push_back(d);
|
Chris@0
|
152
|
Chris@0
|
153 d.identifier = "fmax";
|
Chris@0
|
154 d.name = "Maximum frequency";
|
Chris@0
|
155 d.description = "";
|
Chris@0
|
156 d.unit = "Hz";
|
Chris@0
|
157 d.minValue = m_inputSampleRate / m_blockSize;
|
Chris@0
|
158 d.maxValue = m_inputSampleRate / 2;
|
Chris@0
|
159 d.defaultValue = 1000;
|
Chris@0
|
160 d.isQuantized = false;
|
Chris@0
|
161 list.push_back(d);
|
Chris@0
|
162
|
Chris@5
|
163 d.identifier = "histlen";
|
Chris@5
|
164 d.name = "Mean filter history length";
|
Chris@5
|
165 d.description = "";
|
Chris@5
|
166 d.unit = "";
|
Chris@5
|
167 d.minValue = 1;
|
Chris@5
|
168 d.maxValue = 10;
|
Chris@10
|
169 d.defaultValue = 1;
|
Chris@5
|
170 d.isQuantized = true;
|
Chris@5
|
171 d.quantizeStep = 1;
|
Chris@5
|
172 list.push_back(d);
|
Chris@5
|
173
|
Chris@10
|
174 d.identifier = "vflen";
|
Chris@10
|
175 d.name = "Vertical filter length";
|
Chris@10
|
176 d.description = "";
|
Chris@10
|
177 d.unit = "";
|
Chris@10
|
178 d.minValue = 1;
|
Chris@10
|
179 d.maxValue = 11;
|
Chris@10
|
180 d.defaultValue = 1;
|
Chris@10
|
181 d.isQuantized = true;
|
Chris@10
|
182 d.quantizeStep = 2;
|
Chris@10
|
183 list.push_back(d);
|
Chris@10
|
184
|
Chris@3
|
185 d.identifier = "method";
|
Chris@3
|
186 d.name = "Cepstrum transform method";
|
Chris@3
|
187 d.unit = "";
|
Chris@3
|
188 d.minValue = 0;
|
Chris@5
|
189 d.maxValue = 4;
|
Chris@3
|
190 d.defaultValue = 0;
|
Chris@3
|
191 d.isQuantized = true;
|
Chris@3
|
192 d.quantizeStep = 1;
|
Chris@3
|
193 d.valueNames.push_back("Inverse symmetric");
|
Chris@3
|
194 d.valueNames.push_back("Inverse asymmetric");
|
Chris@4
|
195 d.valueNames.push_back("Inverse complex");
|
Chris@3
|
196 d.valueNames.push_back("Forward magnitude");
|
Chris@3
|
197 d.valueNames.push_back("Forward difference");
|
Chris@3
|
198 list.push_back(d);
|
Chris@3
|
199
|
Chris@10
|
200 d.identifier = "clamp";
|
Chris@10
|
201 d.name = "Clamp negative values in cepstrum at zero";
|
Chris@10
|
202 d.unit = "";
|
Chris@10
|
203 d.minValue = 0;
|
Chris@10
|
204 d.maxValue = 1;
|
Chris@10
|
205 d.defaultValue = 0;
|
Chris@10
|
206 d.isQuantized = true;
|
Chris@10
|
207 d.quantizeStep = 1;
|
Chris@10
|
208 d.valueNames.clear();
|
Chris@10
|
209 list.push_back(d);
|
Chris@10
|
210
|
Chris@0
|
211 return list;
|
Chris@0
|
212 }
|
Chris@0
|
213
|
Chris@0
|
214 float
|
Chris@0
|
215 SimpleCepstrum::getParameter(string identifier) const
|
Chris@0
|
216 {
|
Chris@0
|
217 if (identifier == "fmin") return m_fmin;
|
Chris@0
|
218 else if (identifier == "fmax") return m_fmax;
|
Chris@5
|
219 else if (identifier == "histlen") return m_histlen;
|
Chris@10
|
220 else if (identifier == "vflen") return m_vflen;
|
Chris@10
|
221 else if (identifier == "clamp") return (m_clamp ? 1 : 0);
|
Chris@3
|
222 else if (identifier == "method") return (int)m_method;
|
Chris@0
|
223 else return 0.f;
|
Chris@0
|
224 }
|
Chris@0
|
225
|
Chris@0
|
226 void
|
Chris@0
|
227 SimpleCepstrum::setParameter(string identifier, float value)
|
Chris@0
|
228 {
|
Chris@0
|
229 if (identifier == "fmin") m_fmin = value;
|
Chris@0
|
230 else if (identifier == "fmax") m_fmax = value;
|
Chris@5
|
231 else if (identifier == "histlen") m_histlen = value;
|
Chris@10
|
232 else if (identifier == "vflen") m_vflen = value;
|
Chris@10
|
233 else if (identifier == "clamp") m_clamp = (value > 0.5);
|
Chris@3
|
234 else if (identifier == "method") m_method = Method(int(value + 0.5));
|
Chris@0
|
235 }
|
Chris@0
|
236
|
Chris@0
|
237 SimpleCepstrum::ProgramList
|
Chris@0
|
238 SimpleCepstrum::getPrograms() const
|
Chris@0
|
239 {
|
Chris@0
|
240 ProgramList list;
|
Chris@0
|
241 return list;
|
Chris@0
|
242 }
|
Chris@0
|
243
|
Chris@0
|
244 string
|
Chris@0
|
245 SimpleCepstrum::getCurrentProgram() const
|
Chris@0
|
246 {
|
Chris@0
|
247 return ""; // no programs
|
Chris@0
|
248 }
|
Chris@0
|
249
|
Chris@0
|
250 void
|
Chris@0
|
251 SimpleCepstrum::selectProgram(string name)
|
Chris@0
|
252 {
|
Chris@0
|
253 }
|
Chris@0
|
254
|
Chris@0
|
255 SimpleCepstrum::OutputList
|
Chris@0
|
256 SimpleCepstrum::getOutputDescriptors() const
|
Chris@0
|
257 {
|
Chris@0
|
258 OutputList outputs;
|
Chris@0
|
259
|
Chris@0
|
260 int n = 0;
|
Chris@0
|
261
|
Chris@0
|
262 OutputDescriptor d;
|
Chris@2
|
263
|
Chris@7
|
264 d.identifier = "raw_cepstral_peak";
|
Chris@7
|
265 d.name = "Frequency corresponding to raw cepstral peak";
|
Chris@24
|
266 d.description = "Return the frequency whose period corresponds to the quefrency with the maximum bin value within the specified range of the cepstrum";
|
Chris@6
|
267 d.unit = "Hz";
|
Chris@0
|
268 d.hasFixedBinCount = true;
|
Chris@0
|
269 d.binCount = 1;
|
Chris@0
|
270 d.hasKnownExtents = true;
|
Chris@0
|
271 d.minValue = m_fmin;
|
Chris@0
|
272 d.maxValue = m_fmax;
|
Chris@0
|
273 d.isQuantized = false;
|
Chris@0
|
274 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@0
|
275 d.hasDuration = false;
|
Chris@5
|
276 m_pkOutput = n++;
|
Chris@0
|
277 outputs.push_back(d);
|
Chris@0
|
278
|
Chris@24
|
279 d.identifier = "interpolated_peak";
|
Chris@24
|
280 d.name = "Interpolated peak frequency";
|
Chris@24
|
281 d.description = "Return the frequency whose period corresponds to the quefrency with the maximum bin value within the specified range of the cepstrum, using cubic interpolation to estimate the peak quefrency to finer than single bin resolution";
|
Chris@24
|
282 m_ipkOutput = n++;
|
Chris@24
|
283 outputs.push_back(d);
|
Chris@24
|
284
|
Chris@0
|
285 d.identifier = "variance";
|
Chris@0
|
286 d.name = "Variance of cepstral bins in range";
|
Chris@0
|
287 d.unit = "";
|
Chris@2
|
288 d.description = "Return the variance of bin values within the specified range of the cepstrum";
|
Chris@6
|
289 d.hasKnownExtents = false;
|
Chris@0
|
290 m_varOutput = n++;
|
Chris@0
|
291 outputs.push_back(d);
|
Chris@0
|
292
|
Chris@0
|
293 d.identifier = "peak";
|
Chris@8
|
294 d.name = "Value at peak";
|
Chris@0
|
295 d.unit = "";
|
Chris@2
|
296 d.description = "Return the value found in the maximum-valued bin within the specified range of the cepstrum";
|
Chris@0
|
297 m_pvOutput = n++;
|
Chris@0
|
298 outputs.push_back(d);
|
Chris@0
|
299
|
Chris@5
|
300 d.identifier = "peak_to_rms";
|
Chris@5
|
301 d.name = "Peak-to-RMS distance";
|
Chris@5
|
302 d.unit = "";
|
Chris@5
|
303 d.description = "Return the difference between maximum and root mean square bin values within the specified range of the cepstrum";
|
Chris@5
|
304 m_p2rOutput = n++;
|
Chris@5
|
305 outputs.push_back(d);
|
Chris@5
|
306
|
Chris@6
|
307 d.identifier = "peak_proportion";
|
Chris@7
|
308 d.name = "Energy around peak";
|
Chris@6
|
309 d.unit = "";
|
Chris@7
|
310 d.description = "Return the proportion of total energy that is found in the bins around the peak bin (as far as the nearest local minima), within the specified range of the cepstrum";
|
Chris@6
|
311 m_ppOutput = n++;
|
Chris@6
|
312 outputs.push_back(d);
|
Chris@6
|
313
|
Chris@20
|
314 d.identifier = "peak_to_second_peak";
|
Chris@27
|
315 d.name = "Peak to second-peak difference";
|
Chris@20
|
316 d.unit = "";
|
Chris@27
|
317 d.description = "Return the difference between the value found in the peak bin within the specified range of the cepstrum, and that found in the next highest peak";
|
Chris@20
|
318 m_pkoOutput = n++;
|
Chris@20
|
319 outputs.push_back(d);
|
Chris@20
|
320
|
Chris@6
|
321 d.identifier = "total";
|
Chris@6
|
322 d.name = "Total energy";
|
Chris@6
|
323 d.unit = "";
|
Chris@7
|
324 d.description = "Return the total energy found in all bins within the specified range of the cepstrum";
|
Chris@6
|
325 m_totOutput = n++;
|
Chris@6
|
326 outputs.push_back(d);
|
Chris@6
|
327
|
Chris@0
|
328 d.identifier = "cepstrum";
|
Chris@0
|
329 d.name = "Cepstrum";
|
Chris@0
|
330 d.unit = "";
|
Chris@2
|
331 d.description = "The unprocessed cepstrum bins within the specified range";
|
Chris@0
|
332
|
Chris@0
|
333 int from = int(m_inputSampleRate / m_fmax);
|
Chris@0
|
334 int to = int(m_inputSampleRate / m_fmin);
|
Chris@0
|
335 if (to >= (int)m_blockSize / 2) {
|
Chris@0
|
336 to = m_blockSize / 2 - 1;
|
Chris@0
|
337 }
|
Chris@0
|
338 d.binCount = to - from + 1;
|
Chris@0
|
339 for (int i = from; i <= to; ++i) {
|
Chris@0
|
340 float freq = m_inputSampleRate / i;
|
Chris@5
|
341 char buffer[20];
|
Chris@2
|
342 sprintf(buffer, "%.2f Hz", freq);
|
Chris@0
|
343 d.binNames.push_back(buffer);
|
Chris@0
|
344 }
|
Chris@0
|
345
|
Chris@0
|
346 d.hasKnownExtents = false;
|
Chris@0
|
347 m_cepOutput = n++;
|
Chris@0
|
348 outputs.push_back(d);
|
Chris@0
|
349
|
Chris@0
|
350 d.identifier = "am";
|
Chris@5
|
351 d.name = "Cepstrum bins relative to RMS";
|
Chris@5
|
352 d.description = "The cepstrum bins within the specified range, expressed as a value relative to the root mean square bin value in the range, with values below the RMS clamped to zero";
|
Chris@0
|
353 m_amOutput = n++;
|
Chris@0
|
354 outputs.push_back(d);
|
Chris@0
|
355
|
Chris@2
|
356 d.identifier = "env";
|
Chris@2
|
357 d.name = "Spectral envelope";
|
Chris@2
|
358 d.description = "Envelope calculated from the cepstral values below the specified minimum";
|
Chris@2
|
359 d.binCount = m_blockSize/2 + 1;
|
Chris@2
|
360 d.binNames.clear();
|
Chris@7
|
361 for (int i = 0; i < (int)d.binCount; ++i) {
|
Chris@2
|
362 float freq = (m_inputSampleRate / m_blockSize) * i;
|
Chris@5
|
363 char buffer[20];
|
Chris@2
|
364 sprintf(buffer, "%.2f Hz", freq);
|
Chris@2
|
365 d.binNames.push_back(buffer);
|
Chris@2
|
366 }
|
Chris@2
|
367 m_envOutput = n++;
|
Chris@2
|
368 outputs.push_back(d);
|
Chris@2
|
369
|
Chris@2
|
370 d.identifier = "es";
|
Chris@2
|
371 d.name = "Spectrum without envelope";
|
Chris@2
|
372 d.description = "Magnitude of spectrum values divided by calculated envelope values, to deconvolve the envelope";
|
Chris@2
|
373 m_esOutput = n++;
|
Chris@2
|
374 outputs.push_back(d);
|
Chris@2
|
375
|
Chris@0
|
376 return outputs;
|
Chris@0
|
377 }
|
Chris@0
|
378
|
Chris@0
|
379 bool
|
Chris@0
|
380 SimpleCepstrum::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@0
|
381 {
|
Chris@0
|
382 if (channels < getMinChannelCount() ||
|
Chris@0
|
383 channels > getMaxChannelCount()) return false;
|
Chris@0
|
384
|
Chris@0
|
385 // std::cerr << "SimpleCepstrum::initialise: channels = " << channels
|
Chris@0
|
386 // << ", stepSize = " << stepSize << ", blockSize = " << blockSize
|
Chris@0
|
387 // << std::endl;
|
Chris@0
|
388
|
Chris@0
|
389 m_channels = channels;
|
Chris@0
|
390 m_stepSize = stepSize;
|
Chris@0
|
391 m_blockSize = blockSize;
|
Chris@0
|
392
|
Chris@5
|
393 m_binFrom = int(m_inputSampleRate / m_fmax);
|
Chris@5
|
394 m_binTo = int(m_inputSampleRate / m_fmin);
|
Chris@5
|
395
|
Chris@7
|
396 if (m_binTo >= (int)m_blockSize / 2) {
|
Chris@5
|
397 m_binTo = m_blockSize / 2 - 1;
|
Chris@5
|
398 }
|
Chris@5
|
399
|
Chris@5
|
400 m_bins = (m_binTo - m_binFrom) + 1;
|
Chris@5
|
401
|
Chris@5
|
402 m_history = new double *[m_histlen];
|
Chris@5
|
403 for (int i = 0; i < m_histlen; ++i) {
|
Chris@5
|
404 m_history[i] = new double[m_bins];
|
Chris@5
|
405 }
|
Chris@5
|
406
|
Chris@5
|
407 reset();
|
Chris@5
|
408
|
Chris@0
|
409 return true;
|
Chris@0
|
410 }
|
Chris@0
|
411
|
Chris@0
|
412 void
|
Chris@0
|
413 SimpleCepstrum::reset()
|
Chris@0
|
414 {
|
Chris@5
|
415 for (int i = 0; i < m_histlen; ++i) {
|
Chris@5
|
416 for (int j = 0; j < m_bins; ++j) {
|
Chris@5
|
417 m_history[i][j] = 0.0;
|
Chris@5
|
418 }
|
Chris@5
|
419 }
|
Chris@5
|
420 }
|
Chris@5
|
421
|
Chris@5
|
422 void
|
Chris@5
|
423 SimpleCepstrum::filter(const double *cep, double *result)
|
Chris@5
|
424 {
|
Chris@5
|
425 int hix = m_histlen - 1; // current history index
|
Chris@5
|
426
|
Chris@5
|
427 // roll back the history
|
Chris@5
|
428 if (m_histlen > 1) {
|
Chris@5
|
429 double *oldest = m_history[0];
|
Chris@5
|
430 for (int i = 1; i < m_histlen; ++i) {
|
Chris@5
|
431 m_history[i-1] = m_history[i];
|
Chris@5
|
432 }
|
Chris@5
|
433 // and stick this back in the newest spot, to recycle
|
Chris@5
|
434 m_history[hix] = oldest;
|
Chris@5
|
435 }
|
Chris@5
|
436
|
Chris@5
|
437 for (int i = 0; i < m_bins; ++i) {
|
Chris@10
|
438 double v = 0;
|
Chris@10
|
439 int n = 0;
|
Chris@10
|
440 // average according to the vertical filter length
|
Chris@10
|
441 for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
|
Chris@10
|
442 int ix = i + m_binFrom + j;
|
Chris@10
|
443 if (ix >= 0 && ix < m_blockSize) {
|
Chris@10
|
444 v += cep[ix];
|
Chris@10
|
445 ++n;
|
Chris@10
|
446 }
|
Chris@10
|
447 }
|
Chris@10
|
448 m_history[hix][i] = v / n;
|
Chris@5
|
449 }
|
Chris@5
|
450
|
Chris@5
|
451 for (int i = 0; i < m_bins; ++i) {
|
Chris@5
|
452 double mean = 0.0;
|
Chris@5
|
453 for (int j = 0; j < m_histlen; ++j) {
|
Chris@5
|
454 mean += m_history[j][i];
|
Chris@5
|
455 }
|
Chris@5
|
456 mean /= m_histlen;
|
Chris@5
|
457 result[i] = mean;
|
Chris@5
|
458 }
|
Chris@5
|
459 }
|
Chris@24
|
460
|
Chris@24
|
461 double
|
Chris@24
|
462 SimpleCepstrum::cubicInterpolate(const double y[4], double x)
|
Chris@24
|
463 {
|
Chris@24
|
464 double a0 = y[3] - y[2] - y[0] + y[1];
|
Chris@24
|
465 double a1 = y[0] - y[1] - a0;
|
Chris@24
|
466 double a2 = y[2] - y[0];
|
Chris@24
|
467 double a3 = y[1];
|
Chris@24
|
468 return
|
Chris@24
|
469 a0 * x * x * x +
|
Chris@24
|
470 a1 * x * x +
|
Chris@24
|
471 a2 * x +
|
Chris@24
|
472 a3;
|
Chris@24
|
473 }
|
Chris@24
|
474
|
Chris@24
|
475 double
|
Chris@24
|
476 SimpleCepstrum::findInterpolatedPeak(const double *in, int maxbin)
|
Chris@24
|
477 {
|
Chris@24
|
478 if (maxbin < 2 || maxbin > m_bins - 3) {
|
Chris@24
|
479 return maxbin;
|
Chris@24
|
480 }
|
Chris@24
|
481
|
Chris@24
|
482 double maxval = 0.0;
|
Chris@24
|
483 double maxidx = maxbin;
|
Chris@24
|
484
|
Chris@24
|
485 const int divisions = 10;
|
Chris@24
|
486 double y[4];
|
Chris@24
|
487
|
Chris@24
|
488 y[0] = in[maxbin-1];
|
Chris@24
|
489 y[1] = in[maxbin];
|
Chris@24
|
490 y[2] = in[maxbin+1];
|
Chris@24
|
491 y[3] = in[maxbin+2];
|
Chris@24
|
492 for (int i = 0; i < divisions; ++i) {
|
Chris@24
|
493 double probe = double(i) / double(divisions);
|
Chris@24
|
494 double value = cubicInterpolate(y, probe);
|
Chris@24
|
495 if (value > maxval) {
|
Chris@24
|
496 maxval = value;
|
Chris@24
|
497 maxidx = maxbin + probe;
|
Chris@24
|
498 }
|
Chris@24
|
499 }
|
Chris@24
|
500
|
Chris@24
|
501 y[3] = y[2];
|
Chris@24
|
502 y[2] = y[1];
|
Chris@24
|
503 y[1] = y[0];
|
Chris@24
|
504 y[0] = in[maxbin-2];
|
Chris@24
|
505 for (int i = 0; i < divisions; ++i) {
|
Chris@24
|
506 double probe = double(i) / double(divisions);
|
Chris@24
|
507 double value = cubicInterpolate(y, probe);
|
Chris@24
|
508 if (value > maxval) {
|
Chris@24
|
509 maxval = value;
|
Chris@24
|
510 maxidx = maxbin - 1 + probe;
|
Chris@24
|
511 }
|
Chris@24
|
512 }
|
Chris@24
|
513
|
Chris@24
|
514 /*
|
Chris@24
|
515 std::cerr << "centre = " << maxbin << ": ["
|
Chris@24
|
516 << in[maxbin-2] << ","
|
Chris@24
|
517 << in[maxbin-1] << ","
|
Chris@24
|
518 << in[maxbin] << ","
|
Chris@24
|
519 << in[maxbin+1] << ","
|
Chris@24
|
520 << in[maxbin+2] << "] -> " << maxidx << std::endl;
|
Chris@24
|
521 */
|
Chris@24
|
522
|
Chris@24
|
523 return maxidx;
|
Chris@24
|
524 }
|
Chris@5
|
525
|
Chris@5
|
526 void
|
Chris@5
|
527 SimpleCepstrum::addStatisticalOutputs(FeatureSet &fs, const double *data)
|
Chris@5
|
528 {
|
Chris@5
|
529 int n = m_bins;
|
Chris@5
|
530
|
Chris@6
|
531 double maxval = 0.0;
|
Chris@5
|
532 int maxbin = 0;
|
Chris@5
|
533
|
Chris@5
|
534 for (int i = 0; i < n; ++i) {
|
Chris@5
|
535 if (data[i] > maxval) {
|
Chris@5
|
536 maxval = data[i];
|
Chris@6
|
537 maxbin = i;
|
Chris@5
|
538 }
|
Chris@5
|
539 }
|
Chris@5
|
540
|
Chris@20
|
541 double nextPeakVal = 0.0;
|
Chris@20
|
542
|
Chris@20
|
543 for (int i = 1; i+1 < n; ++i) {
|
Chris@20
|
544 if (data[i] > data[i-1] &&
|
Chris@20
|
545 data[i] > data[i+1] &&
|
Chris@20
|
546 i != maxbin &&
|
Chris@20
|
547 data[i] > nextPeakVal) {
|
Chris@20
|
548 nextPeakVal = data[i];
|
Chris@20
|
549 }
|
Chris@20
|
550 }
|
Chris@20
|
551
|
Chris@5
|
552 Feature rf;
|
Chris@24
|
553 Feature irf;
|
Chris@6
|
554 if (maxval > 0.0) {
|
Chris@6
|
555 rf.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
|
Chris@24
|
556 double cimax = findInterpolatedPeak(data, maxbin);
|
Chris@24
|
557 irf.values.push_back(m_inputSampleRate / (cimax + m_binFrom));
|
Chris@5
|
558 } else {
|
Chris@5
|
559 rf.values.push_back(0);
|
Chris@24
|
560 irf.values.push_back(0);
|
Chris@5
|
561 }
|
Chris@5
|
562 fs[m_pkOutput].push_back(rf);
|
Chris@24
|
563 fs[m_ipkOutput].push_back(irf);
|
Chris@5
|
564
|
Chris@6
|
565 double total = 0;
|
Chris@5
|
566 for (int i = 0; i < n; ++i) {
|
Chris@6
|
567 total += data[i];
|
Chris@5
|
568 }
|
Chris@5
|
569
|
Chris@6
|
570 Feature tot;
|
Chris@6
|
571 tot.values.push_back(total);
|
Chris@6
|
572 fs[m_totOutput].push_back(tot);
|
Chris@6
|
573
|
Chris@6
|
574 double mean = total / n;
|
Chris@6
|
575
|
Chris@6
|
576 double totsqr = 0;
|
Chris@8
|
577 double abstot = 0;
|
Chris@5
|
578 for (int i = 0; i < n; ++i) {
|
Chris@6
|
579 totsqr += data[i] * data[i];
|
Chris@8
|
580 abstot += fabs(data[i]);
|
Chris@5
|
581 }
|
Chris@6
|
582 double rms = sqrt(totsqr / n);
|
Chris@5
|
583
|
Chris@5
|
584 double variance = 0;
|
Chris@5
|
585 for (int i = 0; i < n; ++i) {
|
Chris@5
|
586 double dev = fabs(data[i] - mean);
|
Chris@5
|
587 variance += dev * dev;
|
Chris@5
|
588 }
|
Chris@5
|
589 variance /= n;
|
Chris@5
|
590
|
Chris@6
|
591 double aroundPeak = 0.0;
|
Chris@6
|
592 double peakProportion = 0.0;
|
Chris@6
|
593 if (maxval > 0.0) {
|
Chris@7
|
594 aroundPeak += fabs(maxval);
|
Chris@6
|
595 int i = maxbin - 1;
|
Chris@6
|
596 while (i > 0 && data[i] <= data[i+1]) {
|
Chris@7
|
597 aroundPeak += fabs(data[i]);
|
Chris@6
|
598 --i;
|
Chris@6
|
599 }
|
Chris@6
|
600 i = maxbin + 1;
|
Chris@6
|
601 while (i < n && data[i] <= data[i-1]) {
|
Chris@7
|
602 aroundPeak += fabs(data[i]);
|
Chris@6
|
603 ++i;
|
Chris@6
|
604 }
|
Chris@6
|
605 }
|
Chris@8
|
606 peakProportion = aroundPeak / abstot;
|
Chris@6
|
607 Feature pp;
|
Chris@6
|
608 pp.values.push_back(peakProportion);
|
Chris@6
|
609 fs[m_ppOutput].push_back(pp);
|
Chris@6
|
610
|
Chris@5
|
611 Feature vf;
|
Chris@5
|
612 vf.values.push_back(variance);
|
Chris@5
|
613 fs[m_varOutput].push_back(vf);
|
Chris@5
|
614
|
Chris@5
|
615 Feature pr;
|
Chris@5
|
616 pr.values.push_back(maxval - rms);
|
Chris@5
|
617 fs[m_p2rOutput].push_back(pr);
|
Chris@5
|
618
|
Chris@5
|
619 Feature pv;
|
Chris@5
|
620 pv.values.push_back(maxval);
|
Chris@5
|
621 fs[m_pvOutput].push_back(pv);
|
Chris@5
|
622
|
Chris@20
|
623 Feature pko;
|
Chris@20
|
624 if (nextPeakVal != 0.0) {
|
Chris@27
|
625 pko.values.push_back(maxval - nextPeakVal);
|
Chris@20
|
626 } else {
|
Chris@20
|
627 pko.values.push_back(0.0);
|
Chris@20
|
628 }
|
Chris@20
|
629 fs[m_pkoOutput].push_back(pko);
|
Chris@20
|
630
|
Chris@5
|
631 Feature am;
|
Chris@5
|
632 for (int i = 0; i < n; ++i) {
|
Chris@5
|
633 if (data[i] < rms) am.values.push_back(0);
|
Chris@5
|
634 else am.values.push_back(data[i] - rms);
|
Chris@5
|
635 }
|
Chris@5
|
636 fs[m_amOutput].push_back(am);
|
Chris@5
|
637 }
|
Chris@5
|
638
|
Chris@5
|
639 void
|
Chris@5
|
640 SimpleCepstrum::addEnvelopeOutputs(FeatureSet &fs, const float *const *inputBuffers, const double *cep)
|
Chris@5
|
641 {
|
Chris@5
|
642 // Wipe the higher cepstral bins in order to calculate the
|
Chris@5
|
643 // envelope. This calculation uses the raw cepstrum, not the
|
Chris@5
|
644 // filtered values (because only values "in frequency range" are
|
Chris@5
|
645 // filtered).
|
Chris@5
|
646 int bs = m_blockSize;
|
Chris@5
|
647 int hs = m_blockSize/2 + 1;
|
Chris@5
|
648
|
Chris@5
|
649 double *ecep = new double[bs];
|
Chris@5
|
650 for (int i = 0; i < m_binFrom; ++i) {
|
Chris@5
|
651 ecep[i] = cep[i] / bs;
|
Chris@5
|
652 }
|
Chris@5
|
653 for (int i = m_binFrom; i < bs; ++i) {
|
Chris@5
|
654 ecep[i] = 0;
|
Chris@5
|
655 }
|
Chris@5
|
656 ecep[0] /= 2;
|
Chris@5
|
657 ecep[m_binFrom-1] /= 2;
|
Chris@5
|
658
|
Chris@5
|
659 double *env = new double[bs];
|
Chris@5
|
660 double *io = new double[bs];
|
Chris@7
|
661
|
Chris@7
|
662 //!!! This is only right if the previous transform was an inverse one!
|
Chris@33
|
663 Vamp::FFT::forward(bs, ecep, 0, env, io);
|
Chris@5
|
664
|
Chris@5
|
665 for (int i = 0; i < hs; ++i) {
|
Chris@5
|
666 env[i] = exp(env[i]);
|
Chris@5
|
667 }
|
Chris@5
|
668 Feature envf;
|
Chris@5
|
669 for (int i = 0; i < hs; ++i) {
|
Chris@5
|
670 envf.values.push_back(env[i]);
|
Chris@5
|
671 }
|
Chris@5
|
672 fs[m_envOutput].push_back(envf);
|
Chris@5
|
673
|
Chris@5
|
674 Feature es;
|
Chris@5
|
675 for (int i = 0; i < hs; ++i) {
|
Chris@5
|
676 double re = inputBuffers[0][i*2 ] / env[i];
|
Chris@5
|
677 double im = inputBuffers[0][i*2+1] / env[i];
|
Chris@5
|
678 double mag = sqrt(re*re + im*im);
|
Chris@5
|
679 es.values.push_back(mag);
|
Chris@5
|
680 }
|
Chris@5
|
681 fs[m_esOutput].push_back(es);
|
Chris@5
|
682
|
Chris@5
|
683 delete[] env;
|
Chris@5
|
684 delete[] ecep;
|
Chris@5
|
685 delete[] io;
|
Chris@0
|
686 }
|
Chris@0
|
687
|
Chris@0
|
688 SimpleCepstrum::FeatureSet
|
Chris@0
|
689 SimpleCepstrum::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
Chris@0
|
690 {
|
Chris@1
|
691 FeatureSet fs;
|
Chris@1
|
692
|
Chris@0
|
693 int bs = m_blockSize;
|
Chris@0
|
694 int hs = m_blockSize/2 + 1;
|
Chris@0
|
695
|
Chris@5
|
696 double *rawcep = new double[bs];
|
Chris@3
|
697 double *io = new double[bs];
|
Chris@3
|
698
|
Chris@4
|
699 if (m_method != InverseComplex) {
|
Chris@3
|
700
|
Chris@4
|
701 double *logmag = new double[bs];
|
Chris@4
|
702
|
Chris@4
|
703 for (int i = 0; i < hs; ++i) {
|
Chris@3
|
704
|
Chris@4
|
705 double power =
|
Chris@4
|
706 inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] +
|
Chris@4
|
707 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
|
Chris@5
|
708 double mag = sqrt(power);
|
Chris@3
|
709
|
Chris@5
|
710 double lm = log(mag + 0.00000001);
|
Chris@4
|
711
|
Chris@4
|
712 switch (m_method) {
|
Chris@4
|
713 case InverseSymmetric:
|
Chris@4
|
714 logmag[i] = lm;
|
Chris@4
|
715 if (i > 0) logmag[bs - i] = lm;
|
Chris@4
|
716 break;
|
Chris@4
|
717 case InverseAsymmetric:
|
Chris@4
|
718 logmag[i] = lm;
|
Chris@4
|
719 if (i > 0) logmag[bs - i] = 0;
|
Chris@4
|
720 break;
|
Chris@4
|
721 default:
|
Chris@4
|
722 logmag[bs/2 + i - 1] = lm;
|
Chris@4
|
723 if (i < hs-1) {
|
Chris@4
|
724 logmag[bs/2 - i - 1] = lm;
|
Chris@4
|
725 }
|
Chris@4
|
726 break;
|
Chris@3
|
727 }
|
Chris@3
|
728 }
|
Chris@4
|
729
|
Chris@4
|
730 if (m_method == InverseSymmetric ||
|
Chris@4
|
731 m_method == InverseAsymmetric) {
|
Chris@4
|
732
|
Chris@33
|
733 Vamp::FFT::inverse(bs, logmag, 0, rawcep, io);
|
Chris@4
|
734
|
Chris@4
|
735 } else {
|
Chris@4
|
736
|
Chris@33
|
737 Vamp::FFT::forward(bs, logmag, 0, rawcep, io);
|
Chris@4
|
738
|
Chris@4
|
739 if (m_method == ForwardDifference) {
|
Chris@4
|
740 for (int i = 0; i < hs; ++i) {
|
Chris@5
|
741 rawcep[i] = fabs(io[i]) - fabs(rawcep[i]);
|
Chris@4
|
742 }
|
Chris@4
|
743 } else {
|
Chris@4
|
744 for (int i = 0; i < hs; ++i) {
|
Chris@5
|
745 rawcep[i] = sqrt(rawcep[i]*rawcep[i] + io[i]*io[i]);
|
Chris@4
|
746 }
|
Chris@4
|
747 }
|
Chris@4
|
748 }
|
Chris@4
|
749
|
Chris@4
|
750 delete[] logmag;
|
Chris@4
|
751
|
Chris@4
|
752 } else { // InverseComplex
|
Chris@4
|
753
|
Chris@4
|
754 double *ri = new double[bs];
|
Chris@4
|
755 double *ii = new double[bs];
|
Chris@4
|
756
|
Chris@4
|
757 for (int i = 0; i < hs; ++i) {
|
Chris@4
|
758 double re = inputBuffers[0][i*2];
|
Chris@4
|
759 double im = inputBuffers[0][i*2+1];
|
Chris@4
|
760 std::complex<double> c(re, im);
|
Chris@4
|
761 std::complex<double> clog = std::log(c);
|
Chris@4
|
762 ri[i] = clog.real();
|
Chris@4
|
763 ii[i] = clog.imag();
|
Chris@4
|
764 if (i > 0) {
|
Chris@4
|
765 ri[bs - i] = ri[i];
|
Chris@4
|
766 ii[bs - i] = -ii[i];
|
Chris@4
|
767 }
|
Chris@4
|
768 }
|
Chris@4
|
769
|
Chris@33
|
770 Vamp::FFT::inverse(bs, ri, ii, rawcep, io);
|
Chris@4
|
771
|
Chris@4
|
772 delete[] ri;
|
Chris@4
|
773 delete[] ii;
|
Chris@3
|
774 }
|
Chris@0
|
775
|
Chris@0
|
776 if (m_clamp) {
|
Chris@0
|
777 for (int i = 0; i < bs; ++i) {
|
Chris@5
|
778 if (rawcep[i] < 0) rawcep[i] = 0;
|
Chris@0
|
779 }
|
Chris@0
|
780 }
|
Chris@0
|
781
|
Chris@5
|
782 delete[] io;
|
Chris@0
|
783
|
Chris@5
|
784 double *latest = new double[m_bins];
|
Chris@5
|
785 filter(rawcep, latest);
|
Chris@5
|
786
|
Chris@5
|
787 int n = m_bins;
|
Chris@0
|
788
|
Chris@0
|
789 Feature cf;
|
Chris@5
|
790 for (int i = 0; i < n; ++i) {
|
Chris@5
|
791 cf.values.push_back(latest[i]);
|
Chris@0
|
792 }
|
Chris@0
|
793 fs[m_cepOutput].push_back(cf);
|
Chris@0
|
794
|
Chris@5
|
795 addStatisticalOutputs(fs, latest);
|
Chris@0
|
796
|
Chris@5
|
797 addEnvelopeOutputs(fs, inputBuffers, rawcep);
|
Chris@0
|
798
|
Chris@5
|
799 delete[] latest;
|
Chris@7
|
800 delete[] rawcep;
|
Chris@0
|
801
|
Chris@0
|
802 return fs;
|
Chris@0
|
803 }
|
Chris@0
|
804
|
Chris@0
|
805 SimpleCepstrum::FeatureSet
|
Chris@0
|
806 SimpleCepstrum::getRemainingFeatures()
|
Chris@0
|
807 {
|
Chris@0
|
808 FeatureSet fs;
|
Chris@0
|
809 return fs;
|
Chris@0
|
810 }
|