Chris@0
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@7
|
2 /*
|
Chris@38
|
3 This file is Copyright (c) 2012 Chris Cannam
|
Chris@38
|
4
|
Chris@7
|
5 Permission is hereby granted, free of charge, to any person
|
Chris@7
|
6 obtaining a copy of this software and associated documentation
|
Chris@7
|
7 files (the "Software"), to deal in the Software without
|
Chris@7
|
8 restriction, including without limitation the rights to use, copy,
|
Chris@7
|
9 modify, merge, publish, distribute, sublicense, and/or sell copies
|
Chris@7
|
10 of the Software, and to permit persons to whom the Software is
|
Chris@7
|
11 furnished to do so, subject to the following conditions:
|
Chris@7
|
12
|
Chris@7
|
13 The above copyright notice and this permission notice shall be
|
Chris@7
|
14 included in all copies or substantial portions of the Software.
|
Chris@7
|
15
|
Chris@7
|
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
Chris@7
|
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
Chris@7
|
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
Chris@7
|
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
|
Chris@7
|
20 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
|
Chris@7
|
21 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
Chris@7
|
22 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
Chris@7
|
23 */
|
Chris@0
|
24
|
Chris@0
|
25 #include "SimpleCepstrum.h"
|
Chris@0
|
26
|
Chris@33
|
27 #include "vamp-sdk/FFT.h"
|
Chris@33
|
28
|
Chris@0
|
29 #include <vector>
|
Chris@0
|
30 #include <algorithm>
|
Chris@0
|
31
|
Chris@0
|
32 #include <cstdio>
|
Chris@0
|
33 #include <cmath>
|
Chris@4
|
34 #include <complex>
|
Chris@0
|
35
|
Chris@33
|
36 #if ( VAMP_SDK_MAJOR_VERSION < 2 || ( VAMP_SDK_MAJOR_VERSION == 2 && VAMP_SDK_MINOR_VERSION < 4 ) )
|
Chris@33
|
37 #error Vamp SDK version 2.4 or newer required
|
Chris@33
|
38 #endif
|
Chris@33
|
39
|
Chris@0
|
40 using std::string;
|
Chris@0
|
41
|
Chris@0
|
42 SimpleCepstrum::SimpleCepstrum(float inputSampleRate) :
|
Chris@0
|
43 Plugin(inputSampleRate),
|
Chris@0
|
44 m_channels(0),
|
Chris@0
|
45 m_stepSize(256),
|
Chris@0
|
46 m_blockSize(1024),
|
Chris@0
|
47 m_fmin(50),
|
Chris@0
|
48 m_fmax(1000),
|
Chris@10
|
49 m_histlen(1),
|
Chris@10
|
50 m_vflen(1),
|
Chris@3
|
51 m_clamp(false),
|
Chris@5
|
52 m_method(InverseSymmetric),
|
Chris@5
|
53 m_binFrom(0),
|
Chris@5
|
54 m_binTo(0),
|
Chris@5
|
55 m_bins(0),
|
Chris@5
|
56 m_history(0)
|
Chris@0
|
57 {
|
Chris@0
|
58 }
|
Chris@0
|
59
|
Chris@0
|
60 SimpleCepstrum::~SimpleCepstrum()
|
Chris@0
|
61 {
|
Chris@5
|
62 if (m_history) {
|
Chris@5
|
63 for (int i = 0; i < m_histlen; ++i) {
|
Chris@5
|
64 delete[] m_history[i];
|
Chris@5
|
65 }
|
Chris@5
|
66 delete[] m_history;
|
Chris@5
|
67 }
|
Chris@0
|
68 }
|
Chris@0
|
69
|
Chris@0
|
70 string
|
Chris@0
|
71 SimpleCepstrum::getIdentifier() const
|
Chris@0
|
72 {
|
Chris@0
|
73 return "simple-cepstrum";
|
Chris@0
|
74 }
|
Chris@0
|
75
|
Chris@0
|
76 string
|
Chris@0
|
77 SimpleCepstrum::getName() const
|
Chris@0
|
78 {
|
Chris@0
|
79 return "Simple Cepstrum";
|
Chris@0
|
80 }
|
Chris@0
|
81
|
Chris@0
|
82 string
|
Chris@0
|
83 SimpleCepstrum::getDescription() const
|
Chris@0
|
84 {
|
Chris@2
|
85 return "Return simple cepstral data from DFT bins. This plugin is intended for casual inspection of cepstral data. It returns a lot of different sorts of data and is quite slow; it's not a good way to extract a single feature rapidly.";
|
Chris@0
|
86 }
|
Chris@0
|
87
|
Chris@0
|
88 string
|
Chris@0
|
89 SimpleCepstrum::getMaker() const
|
Chris@0
|
90 {
|
Chris@7
|
91 return "Chris Cannam";
|
Chris@0
|
92 }
|
Chris@0
|
93
|
Chris@0
|
94 int
|
Chris@0
|
95 SimpleCepstrum::getPluginVersion() const
|
Chris@0
|
96 {
|
Chris@0
|
97 // Increment this each time you release a version that behaves
|
Chris@0
|
98 // differently from the previous one
|
Chris@0
|
99 return 1;
|
Chris@0
|
100 }
|
Chris@0
|
101
|
Chris@0
|
102 string
|
Chris@0
|
103 SimpleCepstrum::getCopyright() const
|
Chris@0
|
104 {
|
Chris@7
|
105 return "Freely redistributable (BSD license)";
|
Chris@0
|
106 }
|
Chris@0
|
107
|
Chris@0
|
108 SimpleCepstrum::InputDomain
|
Chris@0
|
109 SimpleCepstrum::getInputDomain() const
|
Chris@0
|
110 {
|
Chris@0
|
111 return FrequencyDomain;
|
Chris@0
|
112 }
|
Chris@0
|
113
|
Chris@0
|
114 size_t
|
Chris@0
|
115 SimpleCepstrum::getPreferredBlockSize() const
|
Chris@0
|
116 {
|
Chris@0
|
117 return 1024;
|
Chris@0
|
118 }
|
Chris@0
|
119
|
Chris@0
|
120 size_t
|
Chris@0
|
121 SimpleCepstrum::getPreferredStepSize() const
|
Chris@0
|
122 {
|
Chris@0
|
123 return 256;
|
Chris@0
|
124 }
|
Chris@0
|
125
|
Chris@0
|
126 size_t
|
Chris@0
|
127 SimpleCepstrum::getMinChannelCount() const
|
Chris@0
|
128 {
|
Chris@0
|
129 return 1;
|
Chris@0
|
130 }
|
Chris@0
|
131
|
Chris@0
|
132 size_t
|
Chris@0
|
133 SimpleCepstrum::getMaxChannelCount() const
|
Chris@0
|
134 {
|
Chris@0
|
135 return 1;
|
Chris@0
|
136 }
|
Chris@0
|
137
|
Chris@0
|
138 SimpleCepstrum::ParameterList
|
Chris@0
|
139 SimpleCepstrum::getParameterDescriptors() const
|
Chris@0
|
140 {
|
Chris@0
|
141 ParameterList list;
|
Chris@0
|
142
|
Chris@0
|
143 ParameterDescriptor d;
|
Chris@0
|
144
|
Chris@0
|
145 d.identifier = "fmin";
|
Chris@0
|
146 d.name = "Minimum frequency";
|
Chris@43
|
147 d.description = "Frequency whose period corresponds to the quefrency of the last cepstrum bin in range";
|
Chris@0
|
148 d.unit = "Hz";
|
Chris@0
|
149 d.minValue = m_inputSampleRate / m_blockSize;
|
Chris@0
|
150 d.maxValue = m_inputSampleRate / 2;
|
Chris@0
|
151 d.defaultValue = 50;
|
Chris@0
|
152 d.isQuantized = false;
|
Chris@0
|
153 list.push_back(d);
|
Chris@0
|
154
|
Chris@0
|
155 d.identifier = "fmax";
|
Chris@0
|
156 d.name = "Maximum frequency";
|
Chris@43
|
157 d.description = "Frequency whose period corresponds to the quefrency of the first cepstrum bin in range";
|
Chris@0
|
158 d.unit = "Hz";
|
Chris@0
|
159 d.minValue = m_inputSampleRate / m_blockSize;
|
Chris@0
|
160 d.maxValue = m_inputSampleRate / 2;
|
Chris@0
|
161 d.defaultValue = 1000;
|
Chris@0
|
162 d.isQuantized = false;
|
Chris@0
|
163 list.push_back(d);
|
Chris@0
|
164
|
Chris@5
|
165 d.identifier = "histlen";
|
Chris@5
|
166 d.name = "Mean filter history length";
|
Chris@43
|
167 d.description = "Length of mean filter used for smoothing cepstrum across time bins";
|
Chris@5
|
168 d.unit = "";
|
Chris@5
|
169 d.minValue = 1;
|
Chris@5
|
170 d.maxValue = 10;
|
Chris@10
|
171 d.defaultValue = 1;
|
Chris@5
|
172 d.isQuantized = true;
|
Chris@5
|
173 d.quantizeStep = 1;
|
Chris@5
|
174 list.push_back(d);
|
Chris@5
|
175
|
Chris@10
|
176 d.identifier = "vflen";
|
Chris@10
|
177 d.name = "Vertical filter length";
|
Chris@43
|
178 d.description = "Length of mean filter used for smoothing cepstrum across quefrency bins";
|
Chris@10
|
179 d.unit = "";
|
Chris@10
|
180 d.minValue = 1;
|
Chris@10
|
181 d.maxValue = 11;
|
Chris@10
|
182 d.defaultValue = 1;
|
Chris@10
|
183 d.isQuantized = true;
|
Chris@10
|
184 d.quantizeStep = 2;
|
Chris@10
|
185 list.push_back(d);
|
Chris@10
|
186
|
Chris@3
|
187 d.identifier = "method";
|
Chris@3
|
188 d.name = "Cepstrum transform method";
|
Chris@44
|
189 d.description = "Method to use for calculating cepstrum, starting from the complex short-time Fourier transform of the input audio.\nInverse symmetric - Real part of inverse FFT of log magnitude spectrum, with frequencies above Nyquist reflecting those below it.\nInverse asymmetric - Real part of inverse FFT of log magnitude spectrum, with frequencies above Nyquist set to zero.\nInverse complex - Real part of inverse FFT of complex log spectrum.\nForward magnitude - Magnitude of forward FFT of log magnitude spectrum.\nForward difference - Difference between imaginary and real parts of forward FFT of log magnitude spectrum";
|
Chris@44
|
190 d.unit = "";
|
Chris@3
|
191 d.minValue = 0;
|
Chris@5
|
192 d.maxValue = 4;
|
Chris@3
|
193 d.defaultValue = 0;
|
Chris@3
|
194 d.isQuantized = true;
|
Chris@3
|
195 d.quantizeStep = 1;
|
Chris@3
|
196 d.valueNames.push_back("Inverse symmetric");
|
Chris@3
|
197 d.valueNames.push_back("Inverse asymmetric");
|
Chris@4
|
198 d.valueNames.push_back("Inverse complex");
|
Chris@3
|
199 d.valueNames.push_back("Forward magnitude");
|
Chris@3
|
200 d.valueNames.push_back("Forward difference");
|
Chris@3
|
201 list.push_back(d);
|
Chris@3
|
202
|
Chris@10
|
203 d.identifier = "clamp";
|
Chris@10
|
204 d.name = "Clamp negative values in cepstrum at zero";
|
Chris@44
|
205 d.description = "If set, no negative values will be returned; they will be replaced by zeroes";
|
Chris@44
|
206 d.unit = "";
|
Chris@10
|
207 d.minValue = 0;
|
Chris@10
|
208 d.maxValue = 1;
|
Chris@10
|
209 d.defaultValue = 0;
|
Chris@10
|
210 d.isQuantized = true;
|
Chris@10
|
211 d.quantizeStep = 1;
|
Chris@10
|
212 d.valueNames.clear();
|
Chris@10
|
213 list.push_back(d);
|
Chris@10
|
214
|
Chris@0
|
215 return list;
|
Chris@0
|
216 }
|
Chris@0
|
217
|
Chris@0
|
218 float
|
Chris@0
|
219 SimpleCepstrum::getParameter(string identifier) const
|
Chris@0
|
220 {
|
Chris@0
|
221 if (identifier == "fmin") return m_fmin;
|
Chris@0
|
222 else if (identifier == "fmax") return m_fmax;
|
Chris@5
|
223 else if (identifier == "histlen") return m_histlen;
|
Chris@10
|
224 else if (identifier == "vflen") return m_vflen;
|
Chris@10
|
225 else if (identifier == "clamp") return (m_clamp ? 1 : 0);
|
Chris@3
|
226 else if (identifier == "method") return (int)m_method;
|
Chris@0
|
227 else return 0.f;
|
Chris@0
|
228 }
|
Chris@0
|
229
|
Chris@0
|
230 void
|
Chris@0
|
231 SimpleCepstrum::setParameter(string identifier, float value)
|
Chris@0
|
232 {
|
Chris@0
|
233 if (identifier == "fmin") m_fmin = value;
|
Chris@0
|
234 else if (identifier == "fmax") m_fmax = value;
|
Chris@5
|
235 else if (identifier == "histlen") m_histlen = value;
|
Chris@10
|
236 else if (identifier == "vflen") m_vflen = value;
|
Chris@10
|
237 else if (identifier == "clamp") m_clamp = (value > 0.5);
|
Chris@3
|
238 else if (identifier == "method") m_method = Method(int(value + 0.5));
|
Chris@0
|
239 }
|
Chris@0
|
240
|
Chris@0
|
241 SimpleCepstrum::ProgramList
|
Chris@0
|
242 SimpleCepstrum::getPrograms() const
|
Chris@0
|
243 {
|
Chris@0
|
244 ProgramList list;
|
Chris@0
|
245 return list;
|
Chris@0
|
246 }
|
Chris@0
|
247
|
Chris@0
|
248 string
|
Chris@0
|
249 SimpleCepstrum::getCurrentProgram() const
|
Chris@0
|
250 {
|
Chris@0
|
251 return ""; // no programs
|
Chris@0
|
252 }
|
Chris@0
|
253
|
Chris@0
|
254 void
|
Chris@0
|
255 SimpleCepstrum::selectProgram(string name)
|
Chris@0
|
256 {
|
Chris@0
|
257 }
|
Chris@0
|
258
|
Chris@0
|
259 SimpleCepstrum::OutputList
|
Chris@0
|
260 SimpleCepstrum::getOutputDescriptors() const
|
Chris@0
|
261 {
|
Chris@0
|
262 OutputList outputs;
|
Chris@0
|
263
|
Chris@0
|
264 int n = 0;
|
Chris@0
|
265
|
Chris@0
|
266 OutputDescriptor d;
|
Chris@2
|
267
|
Chris@7
|
268 d.identifier = "raw_cepstral_peak";
|
Chris@7
|
269 d.name = "Frequency corresponding to raw cepstral peak";
|
Chris@24
|
270 d.description = "Return the frequency whose period corresponds to the quefrency with the maximum bin value within the specified range of the cepstrum";
|
Chris@6
|
271 d.unit = "Hz";
|
Chris@0
|
272 d.hasFixedBinCount = true;
|
Chris@0
|
273 d.binCount = 1;
|
Chris@0
|
274 d.hasKnownExtents = true;
|
Chris@0
|
275 d.minValue = m_fmin;
|
Chris@0
|
276 d.maxValue = m_fmax;
|
Chris@0
|
277 d.isQuantized = false;
|
Chris@0
|
278 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@0
|
279 d.hasDuration = false;
|
Chris@5
|
280 m_pkOutput = n++;
|
Chris@0
|
281 outputs.push_back(d);
|
Chris@0
|
282
|
Chris@24
|
283 d.identifier = "interpolated_peak";
|
Chris@24
|
284 d.name = "Interpolated peak frequency";
|
Chris@40
|
285 d.description = "Return the frequency whose period corresponds to the quefrency with the maximum bin value within the specified range of the cepstrum, using parabolic interpolation to estimate the peak quefrency to finer than single bin resolution";
|
Chris@24
|
286 m_ipkOutput = n++;
|
Chris@24
|
287 outputs.push_back(d);
|
Chris@24
|
288
|
Chris@0
|
289 d.identifier = "variance";
|
Chris@0
|
290 d.name = "Variance of cepstral bins in range";
|
Chris@0
|
291 d.unit = "";
|
Chris@2
|
292 d.description = "Return the variance of bin values within the specified range of the cepstrum";
|
Chris@6
|
293 d.hasKnownExtents = false;
|
Chris@0
|
294 m_varOutput = n++;
|
Chris@0
|
295 outputs.push_back(d);
|
Chris@0
|
296
|
Chris@0
|
297 d.identifier = "peak";
|
Chris@8
|
298 d.name = "Value at peak";
|
Chris@0
|
299 d.unit = "";
|
Chris@2
|
300 d.description = "Return the value found in the maximum-valued bin within the specified range of the cepstrum";
|
Chris@0
|
301 m_pvOutput = n++;
|
Chris@0
|
302 outputs.push_back(d);
|
Chris@0
|
303
|
Chris@5
|
304 d.identifier = "peak_to_rms";
|
Chris@5
|
305 d.name = "Peak-to-RMS distance";
|
Chris@5
|
306 d.unit = "";
|
Chris@5
|
307 d.description = "Return the difference between maximum and root mean square bin values within the specified range of the cepstrum";
|
Chris@5
|
308 m_p2rOutput = n++;
|
Chris@5
|
309 outputs.push_back(d);
|
Chris@5
|
310
|
Chris@6
|
311 d.identifier = "peak_proportion";
|
Chris@7
|
312 d.name = "Energy around peak";
|
Chris@6
|
313 d.unit = "";
|
Chris@7
|
314 d.description = "Return the proportion of total energy that is found in the bins around the peak bin (as far as the nearest local minima), within the specified range of the cepstrum";
|
Chris@6
|
315 m_ppOutput = n++;
|
Chris@6
|
316 outputs.push_back(d);
|
Chris@6
|
317
|
Chris@20
|
318 d.identifier = "peak_to_second_peak";
|
Chris@27
|
319 d.name = "Peak to second-peak difference";
|
Chris@20
|
320 d.unit = "";
|
Chris@27
|
321 d.description = "Return the difference between the value found in the peak bin within the specified range of the cepstrum, and that found in the next highest peak";
|
Chris@20
|
322 m_pkoOutput = n++;
|
Chris@20
|
323 outputs.push_back(d);
|
Chris@20
|
324
|
Chris@6
|
325 d.identifier = "total";
|
Chris@6
|
326 d.name = "Total energy";
|
Chris@6
|
327 d.unit = "";
|
Chris@7
|
328 d.description = "Return the total energy found in all bins within the specified range of the cepstrum";
|
Chris@6
|
329 m_totOutput = n++;
|
Chris@6
|
330 outputs.push_back(d);
|
Chris@6
|
331
|
Chris@0
|
332 d.identifier = "cepstrum";
|
Chris@0
|
333 d.name = "Cepstrum";
|
Chris@0
|
334 d.unit = "";
|
Chris@2
|
335 d.description = "The unprocessed cepstrum bins within the specified range";
|
Chris@0
|
336
|
Chris@0
|
337 int from = int(m_inputSampleRate / m_fmax);
|
Chris@0
|
338 int to = int(m_inputSampleRate / m_fmin);
|
Chris@43
|
339 if (from >= (int)m_blockSize / 2) {
|
Chris@43
|
340 from = m_blockSize / 2 - 1;
|
Chris@43
|
341 }
|
Chris@0
|
342 if (to >= (int)m_blockSize / 2) {
|
Chris@0
|
343 to = m_blockSize / 2 - 1;
|
Chris@0
|
344 }
|
Chris@43
|
345 if (to < from) {
|
Chris@43
|
346 to = from;
|
Chris@43
|
347 }
|
Chris@0
|
348 d.binCount = to - from + 1;
|
Chris@0
|
349 for (int i = from; i <= to; ++i) {
|
Chris@0
|
350 float freq = m_inputSampleRate / i;
|
Chris@43
|
351 char buffer[50];
|
Chris@2
|
352 sprintf(buffer, "%.2f Hz", freq);
|
Chris@0
|
353 d.binNames.push_back(buffer);
|
Chris@0
|
354 }
|
Chris@0
|
355
|
Chris@0
|
356 d.hasKnownExtents = false;
|
Chris@0
|
357 m_cepOutput = n++;
|
Chris@0
|
358 outputs.push_back(d);
|
Chris@0
|
359
|
Chris@0
|
360 d.identifier = "am";
|
Chris@5
|
361 d.name = "Cepstrum bins relative to RMS";
|
Chris@5
|
362 d.description = "The cepstrum bins within the specified range, expressed as a value relative to the root mean square bin value in the range, with values below the RMS clamped to zero";
|
Chris@0
|
363 m_amOutput = n++;
|
Chris@0
|
364 outputs.push_back(d);
|
Chris@0
|
365
|
Chris@2
|
366 d.identifier = "env";
|
Chris@2
|
367 d.name = "Spectral envelope";
|
Chris@2
|
368 d.description = "Envelope calculated from the cepstral values below the specified minimum";
|
Chris@2
|
369 d.binCount = m_blockSize/2 + 1;
|
Chris@2
|
370 d.binNames.clear();
|
Chris@7
|
371 for (int i = 0; i < (int)d.binCount; ++i) {
|
Chris@2
|
372 float freq = (m_inputSampleRate / m_blockSize) * i;
|
Chris@43
|
373 char buffer[50];
|
Chris@2
|
374 sprintf(buffer, "%.2f Hz", freq);
|
Chris@2
|
375 d.binNames.push_back(buffer);
|
Chris@2
|
376 }
|
Chris@2
|
377 m_envOutput = n++;
|
Chris@2
|
378 outputs.push_back(d);
|
Chris@2
|
379
|
Chris@2
|
380 d.identifier = "es";
|
Chris@2
|
381 d.name = "Spectrum without envelope";
|
Chris@2
|
382 d.description = "Magnitude of spectrum values divided by calculated envelope values, to deconvolve the envelope";
|
Chris@2
|
383 m_esOutput = n++;
|
Chris@2
|
384 outputs.push_back(d);
|
Chris@2
|
385
|
Chris@0
|
386 return outputs;
|
Chris@0
|
387 }
|
Chris@0
|
388
|
Chris@0
|
389 bool
|
Chris@0
|
390 SimpleCepstrum::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@0
|
391 {
|
Chris@0
|
392 if (channels < getMinChannelCount() ||
|
Chris@0
|
393 channels > getMaxChannelCount()) return false;
|
Chris@0
|
394
|
Chris@0
|
395 // std::cerr << "SimpleCepstrum::initialise: channels = " << channels
|
Chris@0
|
396 // << ", stepSize = " << stepSize << ", blockSize = " << blockSize
|
Chris@0
|
397 // << std::endl;
|
Chris@0
|
398
|
Chris@0
|
399 m_channels = channels;
|
Chris@0
|
400 m_stepSize = stepSize;
|
Chris@0
|
401 m_blockSize = blockSize;
|
Chris@0
|
402
|
Chris@5
|
403 m_binFrom = int(m_inputSampleRate / m_fmax);
|
Chris@43
|
404 m_binTo = int(m_inputSampleRate / m_fmin);
|
Chris@5
|
405
|
Chris@43
|
406 if (m_binFrom >= (int)m_blockSize / 2) {
|
Chris@43
|
407 m_binFrom = m_blockSize / 2 - 1;
|
Chris@43
|
408 }
|
Chris@7
|
409 if (m_binTo >= (int)m_blockSize / 2) {
|
Chris@5
|
410 m_binTo = m_blockSize / 2 - 1;
|
Chris@5
|
411 }
|
Chris@43
|
412 if (m_binTo < m_binFrom) {
|
Chris@43
|
413 m_binTo = m_binFrom;
|
Chris@43
|
414 }
|
Chris@5
|
415
|
Chris@5
|
416 m_bins = (m_binTo - m_binFrom) + 1;
|
Chris@5
|
417
|
Chris@5
|
418 m_history = new double *[m_histlen];
|
Chris@5
|
419 for (int i = 0; i < m_histlen; ++i) {
|
Chris@5
|
420 m_history[i] = new double[m_bins];
|
Chris@5
|
421 }
|
Chris@5
|
422
|
Chris@5
|
423 reset();
|
Chris@5
|
424
|
Chris@0
|
425 return true;
|
Chris@0
|
426 }
|
Chris@0
|
427
|
Chris@0
|
428 void
|
Chris@0
|
429 SimpleCepstrum::reset()
|
Chris@0
|
430 {
|
Chris@5
|
431 for (int i = 0; i < m_histlen; ++i) {
|
Chris@5
|
432 for (int j = 0; j < m_bins; ++j) {
|
Chris@5
|
433 m_history[i][j] = 0.0;
|
Chris@5
|
434 }
|
Chris@5
|
435 }
|
Chris@5
|
436 }
|
Chris@5
|
437
|
Chris@5
|
438 void
|
Chris@5
|
439 SimpleCepstrum::filter(const double *cep, double *result)
|
Chris@5
|
440 {
|
Chris@5
|
441 int hix = m_histlen - 1; // current history index
|
Chris@5
|
442
|
Chris@5
|
443 // roll back the history
|
Chris@5
|
444 if (m_histlen > 1) {
|
Chris@5
|
445 double *oldest = m_history[0];
|
Chris@5
|
446 for (int i = 1; i < m_histlen; ++i) {
|
Chris@5
|
447 m_history[i-1] = m_history[i];
|
Chris@5
|
448 }
|
Chris@5
|
449 // and stick this back in the newest spot, to recycle
|
Chris@5
|
450 m_history[hix] = oldest;
|
Chris@5
|
451 }
|
Chris@5
|
452
|
Chris@5
|
453 for (int i = 0; i < m_bins; ++i) {
|
Chris@10
|
454 double v = 0;
|
Chris@10
|
455 int n = 0;
|
Chris@10
|
456 // average according to the vertical filter length
|
Chris@10
|
457 for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
|
Chris@10
|
458 int ix = i + m_binFrom + j;
|
Chris@39
|
459 if (ix >= 0 && ix < (int)m_blockSize) {
|
Chris@10
|
460 v += cep[ix];
|
Chris@10
|
461 ++n;
|
Chris@10
|
462 }
|
Chris@10
|
463 }
|
Chris@10
|
464 m_history[hix][i] = v / n;
|
Chris@5
|
465 }
|
Chris@5
|
466
|
Chris@5
|
467 for (int i = 0; i < m_bins; ++i) {
|
Chris@5
|
468 double mean = 0.0;
|
Chris@5
|
469 for (int j = 0; j < m_histlen; ++j) {
|
Chris@5
|
470 mean += m_history[j][i];
|
Chris@5
|
471 }
|
Chris@5
|
472 mean /= m_histlen;
|
Chris@5
|
473 result[i] = mean;
|
Chris@5
|
474 }
|
Chris@5
|
475 }
|
Chris@24
|
476
|
Chris@24
|
477 double
|
Chris@24
|
478 SimpleCepstrum::findInterpolatedPeak(const double *in, int maxbin)
|
Chris@24
|
479 {
|
Chris@40
|
480 // after jos,
|
Chris@40
|
481 // https://ccrma.stanford.edu/~jos/sasp/Quadratic_Interpolation_Spectral_Peaks.html
|
Chris@40
|
482
|
Chris@40
|
483 if (maxbin < 1 || maxbin > m_bins - 2) {
|
Chris@24
|
484 return maxbin;
|
Chris@24
|
485 }
|
Chris@24
|
486
|
Chris@40
|
487 double alpha = in[maxbin-1];
|
Chris@40
|
488 double beta = in[maxbin];
|
Chris@40
|
489 double gamma = in[maxbin+1];
|
Chris@24
|
490
|
Chris@40
|
491 double denom = (alpha - 2*beta + gamma);
|
Chris@24
|
492
|
Chris@40
|
493 if (denom == 0) {
|
Chris@40
|
494 // flat
|
Chris@40
|
495 return maxbin;
|
Chris@24
|
496 }
|
Chris@24
|
497
|
Chris@40
|
498 double p = ((alpha - gamma) / denom) / 2.0;
|
Chris@24
|
499
|
Chris@40
|
500 return double(maxbin) + p;
|
Chris@24
|
501 }
|
Chris@5
|
502
|
Chris@5
|
503 void
|
Chris@5
|
504 SimpleCepstrum::addStatisticalOutputs(FeatureSet &fs, const double *data)
|
Chris@5
|
505 {
|
Chris@5
|
506 int n = m_bins;
|
Chris@5
|
507
|
Chris@6
|
508 double maxval = 0.0;
|
Chris@5
|
509 int maxbin = 0;
|
Chris@5
|
510
|
Chris@5
|
511 for (int i = 0; i < n; ++i) {
|
Chris@5
|
512 if (data[i] > maxval) {
|
Chris@5
|
513 maxval = data[i];
|
Chris@6
|
514 maxbin = i;
|
Chris@5
|
515 }
|
Chris@5
|
516 }
|
Chris@5
|
517
|
Chris@20
|
518 double nextPeakVal = 0.0;
|
Chris@20
|
519
|
Chris@20
|
520 for (int i = 1; i+1 < n; ++i) {
|
Chris@20
|
521 if (data[i] > data[i-1] &&
|
Chris@20
|
522 data[i] > data[i+1] &&
|
Chris@20
|
523 i != maxbin &&
|
Chris@20
|
524 data[i] > nextPeakVal) {
|
Chris@20
|
525 nextPeakVal = data[i];
|
Chris@20
|
526 }
|
Chris@20
|
527 }
|
Chris@20
|
528
|
Chris@5
|
529 Feature rf;
|
Chris@24
|
530 Feature irf;
|
Chris@6
|
531 if (maxval > 0.0) {
|
Chris@6
|
532 rf.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
|
Chris@24
|
533 double cimax = findInterpolatedPeak(data, maxbin);
|
Chris@24
|
534 irf.values.push_back(m_inputSampleRate / (cimax + m_binFrom));
|
Chris@5
|
535 } else {
|
Chris@5
|
536 rf.values.push_back(0);
|
Chris@24
|
537 irf.values.push_back(0);
|
Chris@5
|
538 }
|
Chris@5
|
539 fs[m_pkOutput].push_back(rf);
|
Chris@24
|
540 fs[m_ipkOutput].push_back(irf);
|
Chris@5
|
541
|
Chris@6
|
542 double total = 0;
|
Chris@5
|
543 for (int i = 0; i < n; ++i) {
|
Chris@6
|
544 total += data[i];
|
Chris@5
|
545 }
|
Chris@5
|
546
|
Chris@6
|
547 Feature tot;
|
Chris@6
|
548 tot.values.push_back(total);
|
Chris@6
|
549 fs[m_totOutput].push_back(tot);
|
Chris@6
|
550
|
Chris@6
|
551 double mean = total / n;
|
Chris@6
|
552
|
Chris@6
|
553 double totsqr = 0;
|
Chris@8
|
554 double abstot = 0;
|
Chris@5
|
555 for (int i = 0; i < n; ++i) {
|
Chris@6
|
556 totsqr += data[i] * data[i];
|
Chris@8
|
557 abstot += fabs(data[i]);
|
Chris@5
|
558 }
|
Chris@6
|
559 double rms = sqrt(totsqr / n);
|
Chris@5
|
560
|
Chris@5
|
561 double variance = 0;
|
Chris@5
|
562 for (int i = 0; i < n; ++i) {
|
Chris@5
|
563 double dev = fabs(data[i] - mean);
|
Chris@5
|
564 variance += dev * dev;
|
Chris@5
|
565 }
|
Chris@5
|
566 variance /= n;
|
Chris@5
|
567
|
Chris@6
|
568 double aroundPeak = 0.0;
|
Chris@6
|
569 double peakProportion = 0.0;
|
Chris@6
|
570 if (maxval > 0.0) {
|
Chris@7
|
571 aroundPeak += fabs(maxval);
|
Chris@6
|
572 int i = maxbin - 1;
|
Chris@6
|
573 while (i > 0 && data[i] <= data[i+1]) {
|
Chris@7
|
574 aroundPeak += fabs(data[i]);
|
Chris@6
|
575 --i;
|
Chris@6
|
576 }
|
Chris@6
|
577 i = maxbin + 1;
|
Chris@6
|
578 while (i < n && data[i] <= data[i-1]) {
|
Chris@7
|
579 aroundPeak += fabs(data[i]);
|
Chris@6
|
580 ++i;
|
Chris@6
|
581 }
|
Chris@6
|
582 }
|
Chris@8
|
583 peakProportion = aroundPeak / abstot;
|
Chris@6
|
584 Feature pp;
|
Chris@6
|
585 pp.values.push_back(peakProportion);
|
Chris@6
|
586 fs[m_ppOutput].push_back(pp);
|
Chris@6
|
587
|
Chris@5
|
588 Feature vf;
|
Chris@5
|
589 vf.values.push_back(variance);
|
Chris@5
|
590 fs[m_varOutput].push_back(vf);
|
Chris@5
|
591
|
Chris@5
|
592 Feature pr;
|
Chris@5
|
593 pr.values.push_back(maxval - rms);
|
Chris@5
|
594 fs[m_p2rOutput].push_back(pr);
|
Chris@5
|
595
|
Chris@5
|
596 Feature pv;
|
Chris@5
|
597 pv.values.push_back(maxval);
|
Chris@5
|
598 fs[m_pvOutput].push_back(pv);
|
Chris@5
|
599
|
Chris@20
|
600 Feature pko;
|
Chris@20
|
601 if (nextPeakVal != 0.0) {
|
Chris@27
|
602 pko.values.push_back(maxval - nextPeakVal);
|
Chris@20
|
603 } else {
|
Chris@20
|
604 pko.values.push_back(0.0);
|
Chris@20
|
605 }
|
Chris@20
|
606 fs[m_pkoOutput].push_back(pko);
|
Chris@20
|
607
|
Chris@5
|
608 Feature am;
|
Chris@5
|
609 for (int i = 0; i < n; ++i) {
|
Chris@5
|
610 if (data[i] < rms) am.values.push_back(0);
|
Chris@5
|
611 else am.values.push_back(data[i] - rms);
|
Chris@5
|
612 }
|
Chris@5
|
613 fs[m_amOutput].push_back(am);
|
Chris@5
|
614 }
|
Chris@5
|
615
|
Chris@5
|
616 void
|
Chris@5
|
617 SimpleCepstrum::addEnvelopeOutputs(FeatureSet &fs, const float *const *inputBuffers, const double *cep)
|
Chris@5
|
618 {
|
Chris@5
|
619 // Wipe the higher cepstral bins in order to calculate the
|
Chris@5
|
620 // envelope. This calculation uses the raw cepstrum, not the
|
Chris@5
|
621 // filtered values (because only values "in frequency range" are
|
Chris@5
|
622 // filtered).
|
Chris@5
|
623 int bs = m_blockSize;
|
Chris@5
|
624 int hs = m_blockSize/2 + 1;
|
Chris@5
|
625
|
Chris@5
|
626 double *ecep = new double[bs];
|
Chris@5
|
627 for (int i = 0; i < m_binFrom; ++i) {
|
Chris@5
|
628 ecep[i] = cep[i] / bs;
|
Chris@5
|
629 }
|
Chris@5
|
630 for (int i = m_binFrom; i < bs; ++i) {
|
Chris@5
|
631 ecep[i] = 0;
|
Chris@5
|
632 }
|
Chris@5
|
633 ecep[0] /= 2;
|
Chris@43
|
634 if (m_binFrom > 0) {
|
Chris@43
|
635 ecep[m_binFrom-1] /= 2;
|
Chris@43
|
636 }
|
Chris@5
|
637
|
Chris@5
|
638 double *env = new double[bs];
|
Chris@5
|
639 double *io = new double[bs];
|
Chris@7
|
640
|
Chris@7
|
641 //!!! This is only right if the previous transform was an inverse one!
|
Chris@33
|
642 Vamp::FFT::forward(bs, ecep, 0, env, io);
|
Chris@5
|
643
|
Chris@5
|
644 for (int i = 0; i < hs; ++i) {
|
Chris@5
|
645 env[i] = exp(env[i]);
|
Chris@5
|
646 }
|
Chris@5
|
647 Feature envf;
|
Chris@5
|
648 for (int i = 0; i < hs; ++i) {
|
Chris@5
|
649 envf.values.push_back(env[i]);
|
Chris@5
|
650 }
|
Chris@5
|
651 fs[m_envOutput].push_back(envf);
|
Chris@5
|
652
|
Chris@5
|
653 Feature es;
|
Chris@5
|
654 for (int i = 0; i < hs; ++i) {
|
Chris@5
|
655 double re = inputBuffers[0][i*2 ] / env[i];
|
Chris@5
|
656 double im = inputBuffers[0][i*2+1] / env[i];
|
Chris@5
|
657 double mag = sqrt(re*re + im*im);
|
Chris@5
|
658 es.values.push_back(mag);
|
Chris@5
|
659 }
|
Chris@5
|
660 fs[m_esOutput].push_back(es);
|
Chris@5
|
661
|
Chris@5
|
662 delete[] env;
|
Chris@5
|
663 delete[] ecep;
|
Chris@5
|
664 delete[] io;
|
Chris@0
|
665 }
|
Chris@0
|
666
|
Chris@0
|
667 SimpleCepstrum::FeatureSet
|
Chris@0
|
668 SimpleCepstrum::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
Chris@0
|
669 {
|
Chris@1
|
670 FeatureSet fs;
|
Chris@1
|
671
|
Chris@0
|
672 int bs = m_blockSize;
|
Chris@0
|
673 int hs = m_blockSize/2 + 1;
|
Chris@0
|
674
|
Chris@5
|
675 double *rawcep = new double[bs];
|
Chris@3
|
676 double *io = new double[bs];
|
Chris@3
|
677
|
Chris@4
|
678 if (m_method != InverseComplex) {
|
Chris@3
|
679
|
Chris@4
|
680 double *logmag = new double[bs];
|
Chris@4
|
681
|
Chris@4
|
682 for (int i = 0; i < hs; ++i) {
|
Chris@3
|
683
|
Chris@4
|
684 double power =
|
Chris@4
|
685 inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] +
|
Chris@4
|
686 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
|
Chris@5
|
687 double mag = sqrt(power);
|
Chris@3
|
688
|
Chris@5
|
689 double lm = log(mag + 0.00000001);
|
Chris@4
|
690
|
Chris@4
|
691 switch (m_method) {
|
Chris@4
|
692 case InverseSymmetric:
|
Chris@4
|
693 logmag[i] = lm;
|
Chris@4
|
694 if (i > 0) logmag[bs - i] = lm;
|
Chris@4
|
695 break;
|
Chris@4
|
696 case InverseAsymmetric:
|
Chris@4
|
697 logmag[i] = lm;
|
Chris@4
|
698 if (i > 0) logmag[bs - i] = 0;
|
Chris@4
|
699 break;
|
Chris@4
|
700 default:
|
Chris@4
|
701 logmag[bs/2 + i - 1] = lm;
|
Chris@4
|
702 if (i < hs-1) {
|
Chris@4
|
703 logmag[bs/2 - i - 1] = lm;
|
Chris@4
|
704 }
|
Chris@4
|
705 break;
|
Chris@3
|
706 }
|
Chris@3
|
707 }
|
Chris@4
|
708
|
Chris@4
|
709 if (m_method == InverseSymmetric ||
|
Chris@4
|
710 m_method == InverseAsymmetric) {
|
Chris@4
|
711
|
Chris@33
|
712 Vamp::FFT::inverse(bs, logmag, 0, rawcep, io);
|
Chris@4
|
713
|
Chris@4
|
714 } else {
|
Chris@4
|
715
|
Chris@33
|
716 Vamp::FFT::forward(bs, logmag, 0, rawcep, io);
|
Chris@4
|
717
|
Chris@4
|
718 if (m_method == ForwardDifference) {
|
Chris@4
|
719 for (int i = 0; i < hs; ++i) {
|
Chris@5
|
720 rawcep[i] = fabs(io[i]) - fabs(rawcep[i]);
|
Chris@4
|
721 }
|
Chris@4
|
722 } else {
|
Chris@4
|
723 for (int i = 0; i < hs; ++i) {
|
Chris@5
|
724 rawcep[i] = sqrt(rawcep[i]*rawcep[i] + io[i]*io[i]);
|
Chris@4
|
725 }
|
Chris@4
|
726 }
|
Chris@4
|
727 }
|
Chris@4
|
728
|
Chris@4
|
729 delete[] logmag;
|
Chris@4
|
730
|
Chris@4
|
731 } else { // InverseComplex
|
Chris@4
|
732
|
Chris@4
|
733 double *ri = new double[bs];
|
Chris@4
|
734 double *ii = new double[bs];
|
Chris@4
|
735
|
Chris@4
|
736 for (int i = 0; i < hs; ++i) {
|
Chris@4
|
737 double re = inputBuffers[0][i*2];
|
Chris@4
|
738 double im = inputBuffers[0][i*2+1];
|
Chris@4
|
739 std::complex<double> c(re, im);
|
Chris@4
|
740 std::complex<double> clog = std::log(c);
|
Chris@4
|
741 ri[i] = clog.real();
|
Chris@4
|
742 ii[i] = clog.imag();
|
Chris@4
|
743 if (i > 0) {
|
Chris@4
|
744 ri[bs - i] = ri[i];
|
Chris@4
|
745 ii[bs - i] = -ii[i];
|
Chris@4
|
746 }
|
Chris@4
|
747 }
|
Chris@4
|
748
|
Chris@33
|
749 Vamp::FFT::inverse(bs, ri, ii, rawcep, io);
|
Chris@4
|
750
|
Chris@4
|
751 delete[] ri;
|
Chris@4
|
752 delete[] ii;
|
Chris@3
|
753 }
|
Chris@0
|
754
|
Chris@0
|
755 if (m_clamp) {
|
Chris@0
|
756 for (int i = 0; i < bs; ++i) {
|
Chris@5
|
757 if (rawcep[i] < 0) rawcep[i] = 0;
|
Chris@0
|
758 }
|
Chris@0
|
759 }
|
Chris@0
|
760
|
Chris@5
|
761 delete[] io;
|
Chris@0
|
762
|
Chris@5
|
763 double *latest = new double[m_bins];
|
Chris@5
|
764 filter(rawcep, latest);
|
Chris@5
|
765
|
Chris@5
|
766 int n = m_bins;
|
Chris@0
|
767
|
Chris@0
|
768 Feature cf;
|
Chris@5
|
769 for (int i = 0; i < n; ++i) {
|
Chris@5
|
770 cf.values.push_back(latest[i]);
|
Chris@0
|
771 }
|
Chris@0
|
772 fs[m_cepOutput].push_back(cf);
|
Chris@0
|
773
|
Chris@5
|
774 addStatisticalOutputs(fs, latest);
|
Chris@0
|
775
|
Chris@5
|
776 addEnvelopeOutputs(fs, inputBuffers, rawcep);
|
Chris@0
|
777
|
Chris@5
|
778 delete[] latest;
|
Chris@7
|
779 delete[] rawcep;
|
Chris@0
|
780
|
Chris@0
|
781 return fs;
|
Chris@0
|
782 }
|
Chris@0
|
783
|
Chris@0
|
784 SimpleCepstrum::FeatureSet
|
Chris@0
|
785 SimpleCepstrum::getRemainingFeatures()
|
Chris@0
|
786 {
|
Chris@0
|
787 FeatureSet fs;
|
Chris@0
|
788 return fs;
|
Chris@0
|
789 }
|