Chris@0
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@7
|
2 /*
|
Chris@38
|
3 This file is Copyright (c) 2012 Chris Cannam
|
Chris@38
|
4
|
Chris@7
|
5 Permission is hereby granted, free of charge, to any person
|
Chris@7
|
6 obtaining a copy of this software and associated documentation
|
Chris@7
|
7 files (the "Software"), to deal in the Software without
|
Chris@7
|
8 restriction, including without limitation the rights to use, copy,
|
Chris@7
|
9 modify, merge, publish, distribute, sublicense, and/or sell copies
|
Chris@7
|
10 of the Software, and to permit persons to whom the Software is
|
Chris@7
|
11 furnished to do so, subject to the following conditions:
|
Chris@7
|
12
|
Chris@7
|
13 The above copyright notice and this permission notice shall be
|
Chris@7
|
14 included in all copies or substantial portions of the Software.
|
Chris@7
|
15
|
Chris@7
|
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
Chris@7
|
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
Chris@7
|
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
Chris@7
|
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
|
Chris@7
|
20 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
|
Chris@7
|
21 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
Chris@7
|
22 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
Chris@7
|
23 */
|
Chris@0
|
24
|
Chris@0
|
25 #include "SimpleCepstrum.h"
|
Chris@0
|
26
|
Chris@33
|
27 #include "vamp-sdk/FFT.h"
|
Chris@33
|
28
|
Chris@0
|
29 #include <vector>
|
Chris@0
|
30 #include <algorithm>
|
Chris@0
|
31
|
Chris@0
|
32 #include <cstdio>
|
Chris@0
|
33 #include <cmath>
|
Chris@4
|
34 #include <complex>
|
Chris@0
|
35
|
Chris@33
|
36 #if ( VAMP_SDK_MAJOR_VERSION < 2 || ( VAMP_SDK_MAJOR_VERSION == 2 && VAMP_SDK_MINOR_VERSION < 4 ) )
|
Chris@33
|
37 #error Vamp SDK version 2.4 or newer required
|
Chris@33
|
38 #endif
|
Chris@33
|
39
|
Chris@0
|
40 using std::string;
|
Chris@0
|
41
|
Chris@0
|
42 SimpleCepstrum::SimpleCepstrum(float inputSampleRate) :
|
Chris@0
|
43 Plugin(inputSampleRate),
|
Chris@0
|
44 m_channels(0),
|
Chris@0
|
45 m_stepSize(256),
|
Chris@0
|
46 m_blockSize(1024),
|
Chris@0
|
47 m_fmin(50),
|
Chris@0
|
48 m_fmax(1000),
|
Chris@10
|
49 m_histlen(1),
|
Chris@10
|
50 m_vflen(1),
|
Chris@3
|
51 m_clamp(false),
|
Chris@5
|
52 m_method(InverseSymmetric),
|
Chris@5
|
53 m_binFrom(0),
|
Chris@5
|
54 m_binTo(0),
|
Chris@5
|
55 m_bins(0),
|
Chris@5
|
56 m_history(0)
|
Chris@0
|
57 {
|
Chris@0
|
58 }
|
Chris@0
|
59
|
Chris@0
|
60 SimpleCepstrum::~SimpleCepstrum()
|
Chris@0
|
61 {
|
Chris@5
|
62 if (m_history) {
|
Chris@5
|
63 for (int i = 0; i < m_histlen; ++i) {
|
Chris@5
|
64 delete[] m_history[i];
|
Chris@5
|
65 }
|
Chris@5
|
66 delete[] m_history;
|
Chris@5
|
67 }
|
Chris@0
|
68 }
|
Chris@0
|
69
|
Chris@0
|
70 string
|
Chris@0
|
71 SimpleCepstrum::getIdentifier() const
|
Chris@0
|
72 {
|
Chris@0
|
73 return "simple-cepstrum";
|
Chris@0
|
74 }
|
Chris@0
|
75
|
Chris@0
|
76 string
|
Chris@0
|
77 SimpleCepstrum::getName() const
|
Chris@0
|
78 {
|
Chris@0
|
79 return "Simple Cepstrum";
|
Chris@0
|
80 }
|
Chris@0
|
81
|
Chris@0
|
82 string
|
Chris@0
|
83 SimpleCepstrum::getDescription() const
|
Chris@0
|
84 {
|
Chris@2
|
85 return "Return simple cepstral data from DFT bins. This plugin is intended for casual inspection of cepstral data. It returns a lot of different sorts of data and is quite slow; it's not a good way to extract a single feature rapidly.";
|
Chris@0
|
86 }
|
Chris@0
|
87
|
Chris@0
|
88 string
|
Chris@0
|
89 SimpleCepstrum::getMaker() const
|
Chris@0
|
90 {
|
Chris@7
|
91 return "Chris Cannam";
|
Chris@0
|
92 }
|
Chris@0
|
93
|
Chris@0
|
94 int
|
Chris@0
|
95 SimpleCepstrum::getPluginVersion() const
|
Chris@0
|
96 {
|
Chris@0
|
97 // Increment this each time you release a version that behaves
|
Chris@0
|
98 // differently from the previous one
|
Chris@0
|
99 return 1;
|
Chris@0
|
100 }
|
Chris@0
|
101
|
Chris@0
|
102 string
|
Chris@0
|
103 SimpleCepstrum::getCopyright() const
|
Chris@0
|
104 {
|
Chris@7
|
105 return "Freely redistributable (BSD license)";
|
Chris@0
|
106 }
|
Chris@0
|
107
|
Chris@0
|
108 SimpleCepstrum::InputDomain
|
Chris@0
|
109 SimpleCepstrum::getInputDomain() const
|
Chris@0
|
110 {
|
Chris@0
|
111 return FrequencyDomain;
|
Chris@0
|
112 }
|
Chris@0
|
113
|
Chris@0
|
114 size_t
|
Chris@0
|
115 SimpleCepstrum::getPreferredBlockSize() const
|
Chris@0
|
116 {
|
Chris@0
|
117 return 1024;
|
Chris@0
|
118 }
|
Chris@0
|
119
|
Chris@0
|
120 size_t
|
Chris@0
|
121 SimpleCepstrum::getPreferredStepSize() const
|
Chris@0
|
122 {
|
Chris@0
|
123 return 256;
|
Chris@0
|
124 }
|
Chris@0
|
125
|
Chris@0
|
126 size_t
|
Chris@0
|
127 SimpleCepstrum::getMinChannelCount() const
|
Chris@0
|
128 {
|
Chris@0
|
129 return 1;
|
Chris@0
|
130 }
|
Chris@0
|
131
|
Chris@0
|
132 size_t
|
Chris@0
|
133 SimpleCepstrum::getMaxChannelCount() const
|
Chris@0
|
134 {
|
Chris@0
|
135 return 1;
|
Chris@0
|
136 }
|
Chris@0
|
137
|
Chris@0
|
138 SimpleCepstrum::ParameterList
|
Chris@0
|
139 SimpleCepstrum::getParameterDescriptors() const
|
Chris@0
|
140 {
|
Chris@0
|
141 ParameterList list;
|
Chris@0
|
142
|
Chris@0
|
143 ParameterDescriptor d;
|
Chris@0
|
144
|
Chris@0
|
145 d.identifier = "fmin";
|
Chris@0
|
146 d.name = "Minimum frequency";
|
Chris@43
|
147 d.description = "Frequency whose period corresponds to the quefrency of the last cepstrum bin in range";
|
Chris@0
|
148 d.unit = "Hz";
|
Chris@0
|
149 d.minValue = m_inputSampleRate / m_blockSize;
|
Chris@0
|
150 d.maxValue = m_inputSampleRate / 2;
|
Chris@0
|
151 d.defaultValue = 50;
|
Chris@0
|
152 d.isQuantized = false;
|
Chris@0
|
153 list.push_back(d);
|
Chris@0
|
154
|
Chris@0
|
155 d.identifier = "fmax";
|
Chris@0
|
156 d.name = "Maximum frequency";
|
Chris@43
|
157 d.description = "Frequency whose period corresponds to the quefrency of the first cepstrum bin in range";
|
Chris@0
|
158 d.unit = "Hz";
|
Chris@0
|
159 d.minValue = m_inputSampleRate / m_blockSize;
|
Chris@0
|
160 d.maxValue = m_inputSampleRate / 2;
|
Chris@0
|
161 d.defaultValue = 1000;
|
Chris@0
|
162 d.isQuantized = false;
|
Chris@0
|
163 list.push_back(d);
|
Chris@0
|
164
|
Chris@5
|
165 d.identifier = "histlen";
|
Chris@5
|
166 d.name = "Mean filter history length";
|
Chris@43
|
167 d.description = "Length of mean filter used for smoothing cepstrum across time bins";
|
Chris@5
|
168 d.unit = "";
|
Chris@5
|
169 d.minValue = 1;
|
Chris@5
|
170 d.maxValue = 10;
|
Chris@10
|
171 d.defaultValue = 1;
|
Chris@5
|
172 d.isQuantized = true;
|
Chris@5
|
173 d.quantizeStep = 1;
|
Chris@5
|
174 list.push_back(d);
|
Chris@5
|
175
|
Chris@10
|
176 d.identifier = "vflen";
|
Chris@10
|
177 d.name = "Vertical filter length";
|
Chris@43
|
178 d.description = "Length of mean filter used for smoothing cepstrum across quefrency bins";
|
Chris@10
|
179 d.unit = "";
|
Chris@10
|
180 d.minValue = 1;
|
Chris@10
|
181 d.maxValue = 11;
|
Chris@10
|
182 d.defaultValue = 1;
|
Chris@10
|
183 d.isQuantized = true;
|
Chris@10
|
184 d.quantizeStep = 2;
|
Chris@10
|
185 list.push_back(d);
|
Chris@10
|
186
|
Chris@3
|
187 d.identifier = "method";
|
Chris@3
|
188 d.name = "Cepstrum transform method";
|
Chris@43
|
189 d.unit = "Method to use for calculating cepstrum, starting from the complex short-time Fourier transform of the input audio.\nInverse symmetric - Real part of inverse FFT of log magnitude spectrum, with frequencies above Nyquist reflecting those below it.\nInverse asymmetric - Real part of inverse FFT of log magnitude spectrum, with frequencies above Nyquist set to zero.\nInverse complex - Real part of inverse FFT of complex log spectrum.\nForward magnitude - Magnitude of forward FFT of log magnitude spectrum.\nForward difference - Difference between imaginary and real parts of forward FFT of log magnitude spectrum";
|
Chris@3
|
190 d.minValue = 0;
|
Chris@5
|
191 d.maxValue = 4;
|
Chris@3
|
192 d.defaultValue = 0;
|
Chris@3
|
193 d.isQuantized = true;
|
Chris@3
|
194 d.quantizeStep = 1;
|
Chris@3
|
195 d.valueNames.push_back("Inverse symmetric");
|
Chris@3
|
196 d.valueNames.push_back("Inverse asymmetric");
|
Chris@4
|
197 d.valueNames.push_back("Inverse complex");
|
Chris@3
|
198 d.valueNames.push_back("Forward magnitude");
|
Chris@3
|
199 d.valueNames.push_back("Forward difference");
|
Chris@3
|
200 list.push_back(d);
|
Chris@3
|
201
|
Chris@10
|
202 d.identifier = "clamp";
|
Chris@10
|
203 d.name = "Clamp negative values in cepstrum at zero";
|
Chris@43
|
204 d.unit = "If set, no negative values will be returned; they will be replaced by zeroes";
|
Chris@10
|
205 d.minValue = 0;
|
Chris@10
|
206 d.maxValue = 1;
|
Chris@10
|
207 d.defaultValue = 0;
|
Chris@10
|
208 d.isQuantized = true;
|
Chris@10
|
209 d.quantizeStep = 1;
|
Chris@10
|
210 d.valueNames.clear();
|
Chris@10
|
211 list.push_back(d);
|
Chris@10
|
212
|
Chris@0
|
213 return list;
|
Chris@0
|
214 }
|
Chris@0
|
215
|
Chris@0
|
216 float
|
Chris@0
|
217 SimpleCepstrum::getParameter(string identifier) const
|
Chris@0
|
218 {
|
Chris@0
|
219 if (identifier == "fmin") return m_fmin;
|
Chris@0
|
220 else if (identifier == "fmax") return m_fmax;
|
Chris@5
|
221 else if (identifier == "histlen") return m_histlen;
|
Chris@10
|
222 else if (identifier == "vflen") return m_vflen;
|
Chris@10
|
223 else if (identifier == "clamp") return (m_clamp ? 1 : 0);
|
Chris@3
|
224 else if (identifier == "method") return (int)m_method;
|
Chris@0
|
225 else return 0.f;
|
Chris@0
|
226 }
|
Chris@0
|
227
|
Chris@0
|
228 void
|
Chris@0
|
229 SimpleCepstrum::setParameter(string identifier, float value)
|
Chris@0
|
230 {
|
Chris@0
|
231 if (identifier == "fmin") m_fmin = value;
|
Chris@0
|
232 else if (identifier == "fmax") m_fmax = value;
|
Chris@5
|
233 else if (identifier == "histlen") m_histlen = value;
|
Chris@10
|
234 else if (identifier == "vflen") m_vflen = value;
|
Chris@10
|
235 else if (identifier == "clamp") m_clamp = (value > 0.5);
|
Chris@3
|
236 else if (identifier == "method") m_method = Method(int(value + 0.5));
|
Chris@0
|
237 }
|
Chris@0
|
238
|
Chris@0
|
239 SimpleCepstrum::ProgramList
|
Chris@0
|
240 SimpleCepstrum::getPrograms() const
|
Chris@0
|
241 {
|
Chris@0
|
242 ProgramList list;
|
Chris@0
|
243 return list;
|
Chris@0
|
244 }
|
Chris@0
|
245
|
Chris@0
|
246 string
|
Chris@0
|
247 SimpleCepstrum::getCurrentProgram() const
|
Chris@0
|
248 {
|
Chris@0
|
249 return ""; // no programs
|
Chris@0
|
250 }
|
Chris@0
|
251
|
Chris@0
|
252 void
|
Chris@0
|
253 SimpleCepstrum::selectProgram(string name)
|
Chris@0
|
254 {
|
Chris@0
|
255 }
|
Chris@0
|
256
|
Chris@0
|
257 SimpleCepstrum::OutputList
|
Chris@0
|
258 SimpleCepstrum::getOutputDescriptors() const
|
Chris@0
|
259 {
|
Chris@0
|
260 OutputList outputs;
|
Chris@0
|
261
|
Chris@0
|
262 int n = 0;
|
Chris@0
|
263
|
Chris@0
|
264 OutputDescriptor d;
|
Chris@2
|
265
|
Chris@7
|
266 d.identifier = "raw_cepstral_peak";
|
Chris@7
|
267 d.name = "Frequency corresponding to raw cepstral peak";
|
Chris@24
|
268 d.description = "Return the frequency whose period corresponds to the quefrency with the maximum bin value within the specified range of the cepstrum";
|
Chris@6
|
269 d.unit = "Hz";
|
Chris@0
|
270 d.hasFixedBinCount = true;
|
Chris@0
|
271 d.binCount = 1;
|
Chris@0
|
272 d.hasKnownExtents = true;
|
Chris@0
|
273 d.minValue = m_fmin;
|
Chris@0
|
274 d.maxValue = m_fmax;
|
Chris@0
|
275 d.isQuantized = false;
|
Chris@0
|
276 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@0
|
277 d.hasDuration = false;
|
Chris@5
|
278 m_pkOutput = n++;
|
Chris@0
|
279 outputs.push_back(d);
|
Chris@0
|
280
|
Chris@24
|
281 d.identifier = "interpolated_peak";
|
Chris@24
|
282 d.name = "Interpolated peak frequency";
|
Chris@40
|
283 d.description = "Return the frequency whose period corresponds to the quefrency with the maximum bin value within the specified range of the cepstrum, using parabolic interpolation to estimate the peak quefrency to finer than single bin resolution";
|
Chris@24
|
284 m_ipkOutput = n++;
|
Chris@24
|
285 outputs.push_back(d);
|
Chris@24
|
286
|
Chris@0
|
287 d.identifier = "variance";
|
Chris@0
|
288 d.name = "Variance of cepstral bins in range";
|
Chris@0
|
289 d.unit = "";
|
Chris@2
|
290 d.description = "Return the variance of bin values within the specified range of the cepstrum";
|
Chris@6
|
291 d.hasKnownExtents = false;
|
Chris@0
|
292 m_varOutput = n++;
|
Chris@0
|
293 outputs.push_back(d);
|
Chris@0
|
294
|
Chris@0
|
295 d.identifier = "peak";
|
Chris@8
|
296 d.name = "Value at peak";
|
Chris@0
|
297 d.unit = "";
|
Chris@2
|
298 d.description = "Return the value found in the maximum-valued bin within the specified range of the cepstrum";
|
Chris@0
|
299 m_pvOutput = n++;
|
Chris@0
|
300 outputs.push_back(d);
|
Chris@0
|
301
|
Chris@5
|
302 d.identifier = "peak_to_rms";
|
Chris@5
|
303 d.name = "Peak-to-RMS distance";
|
Chris@5
|
304 d.unit = "";
|
Chris@5
|
305 d.description = "Return the difference between maximum and root mean square bin values within the specified range of the cepstrum";
|
Chris@5
|
306 m_p2rOutput = n++;
|
Chris@5
|
307 outputs.push_back(d);
|
Chris@5
|
308
|
Chris@6
|
309 d.identifier = "peak_proportion";
|
Chris@7
|
310 d.name = "Energy around peak";
|
Chris@6
|
311 d.unit = "";
|
Chris@7
|
312 d.description = "Return the proportion of total energy that is found in the bins around the peak bin (as far as the nearest local minima), within the specified range of the cepstrum";
|
Chris@6
|
313 m_ppOutput = n++;
|
Chris@6
|
314 outputs.push_back(d);
|
Chris@6
|
315
|
Chris@20
|
316 d.identifier = "peak_to_second_peak";
|
Chris@27
|
317 d.name = "Peak to second-peak difference";
|
Chris@20
|
318 d.unit = "";
|
Chris@27
|
319 d.description = "Return the difference between the value found in the peak bin within the specified range of the cepstrum, and that found in the next highest peak";
|
Chris@20
|
320 m_pkoOutput = n++;
|
Chris@20
|
321 outputs.push_back(d);
|
Chris@20
|
322
|
Chris@6
|
323 d.identifier = "total";
|
Chris@6
|
324 d.name = "Total energy";
|
Chris@6
|
325 d.unit = "";
|
Chris@7
|
326 d.description = "Return the total energy found in all bins within the specified range of the cepstrum";
|
Chris@6
|
327 m_totOutput = n++;
|
Chris@6
|
328 outputs.push_back(d);
|
Chris@6
|
329
|
Chris@0
|
330 d.identifier = "cepstrum";
|
Chris@0
|
331 d.name = "Cepstrum";
|
Chris@0
|
332 d.unit = "";
|
Chris@2
|
333 d.description = "The unprocessed cepstrum bins within the specified range";
|
Chris@0
|
334
|
Chris@0
|
335 int from = int(m_inputSampleRate / m_fmax);
|
Chris@0
|
336 int to = int(m_inputSampleRate / m_fmin);
|
Chris@43
|
337 if (from >= (int)m_blockSize / 2) {
|
Chris@43
|
338 from = m_blockSize / 2 - 1;
|
Chris@43
|
339 }
|
Chris@0
|
340 if (to >= (int)m_blockSize / 2) {
|
Chris@0
|
341 to = m_blockSize / 2 - 1;
|
Chris@0
|
342 }
|
Chris@43
|
343 if (to < from) {
|
Chris@43
|
344 to = from;
|
Chris@43
|
345 }
|
Chris@0
|
346 d.binCount = to - from + 1;
|
Chris@0
|
347 for (int i = from; i <= to; ++i) {
|
Chris@0
|
348 float freq = m_inputSampleRate / i;
|
Chris@43
|
349 char buffer[50];
|
Chris@2
|
350 sprintf(buffer, "%.2f Hz", freq);
|
Chris@0
|
351 d.binNames.push_back(buffer);
|
Chris@0
|
352 }
|
Chris@0
|
353
|
Chris@0
|
354 d.hasKnownExtents = false;
|
Chris@0
|
355 m_cepOutput = n++;
|
Chris@0
|
356 outputs.push_back(d);
|
Chris@0
|
357
|
Chris@0
|
358 d.identifier = "am";
|
Chris@5
|
359 d.name = "Cepstrum bins relative to RMS";
|
Chris@5
|
360 d.description = "The cepstrum bins within the specified range, expressed as a value relative to the root mean square bin value in the range, with values below the RMS clamped to zero";
|
Chris@0
|
361 m_amOutput = n++;
|
Chris@0
|
362 outputs.push_back(d);
|
Chris@0
|
363
|
Chris@2
|
364 d.identifier = "env";
|
Chris@2
|
365 d.name = "Spectral envelope";
|
Chris@2
|
366 d.description = "Envelope calculated from the cepstral values below the specified minimum";
|
Chris@2
|
367 d.binCount = m_blockSize/2 + 1;
|
Chris@2
|
368 d.binNames.clear();
|
Chris@7
|
369 for (int i = 0; i < (int)d.binCount; ++i) {
|
Chris@2
|
370 float freq = (m_inputSampleRate / m_blockSize) * i;
|
Chris@43
|
371 char buffer[50];
|
Chris@2
|
372 sprintf(buffer, "%.2f Hz", freq);
|
Chris@2
|
373 d.binNames.push_back(buffer);
|
Chris@2
|
374 }
|
Chris@2
|
375 m_envOutput = n++;
|
Chris@2
|
376 outputs.push_back(d);
|
Chris@2
|
377
|
Chris@2
|
378 d.identifier = "es";
|
Chris@2
|
379 d.name = "Spectrum without envelope";
|
Chris@2
|
380 d.description = "Magnitude of spectrum values divided by calculated envelope values, to deconvolve the envelope";
|
Chris@2
|
381 m_esOutput = n++;
|
Chris@2
|
382 outputs.push_back(d);
|
Chris@2
|
383
|
Chris@0
|
384 return outputs;
|
Chris@0
|
385 }
|
Chris@0
|
386
|
Chris@0
|
387 bool
|
Chris@0
|
388 SimpleCepstrum::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@0
|
389 {
|
Chris@0
|
390 if (channels < getMinChannelCount() ||
|
Chris@0
|
391 channels > getMaxChannelCount()) return false;
|
Chris@0
|
392
|
Chris@0
|
393 // std::cerr << "SimpleCepstrum::initialise: channels = " << channels
|
Chris@0
|
394 // << ", stepSize = " << stepSize << ", blockSize = " << blockSize
|
Chris@0
|
395 // << std::endl;
|
Chris@0
|
396
|
Chris@0
|
397 m_channels = channels;
|
Chris@0
|
398 m_stepSize = stepSize;
|
Chris@0
|
399 m_blockSize = blockSize;
|
Chris@0
|
400
|
Chris@5
|
401 m_binFrom = int(m_inputSampleRate / m_fmax);
|
Chris@43
|
402 m_binTo = int(m_inputSampleRate / m_fmin);
|
Chris@5
|
403
|
Chris@43
|
404 if (m_binFrom >= (int)m_blockSize / 2) {
|
Chris@43
|
405 m_binFrom = m_blockSize / 2 - 1;
|
Chris@43
|
406 }
|
Chris@7
|
407 if (m_binTo >= (int)m_blockSize / 2) {
|
Chris@5
|
408 m_binTo = m_blockSize / 2 - 1;
|
Chris@5
|
409 }
|
Chris@43
|
410 if (m_binTo < m_binFrom) {
|
Chris@43
|
411 m_binTo = m_binFrom;
|
Chris@43
|
412 }
|
Chris@5
|
413
|
Chris@5
|
414 m_bins = (m_binTo - m_binFrom) + 1;
|
Chris@5
|
415
|
Chris@5
|
416 m_history = new double *[m_histlen];
|
Chris@5
|
417 for (int i = 0; i < m_histlen; ++i) {
|
Chris@5
|
418 m_history[i] = new double[m_bins];
|
Chris@5
|
419 }
|
Chris@5
|
420
|
Chris@5
|
421 reset();
|
Chris@5
|
422
|
Chris@0
|
423 return true;
|
Chris@0
|
424 }
|
Chris@0
|
425
|
Chris@0
|
426 void
|
Chris@0
|
427 SimpleCepstrum::reset()
|
Chris@0
|
428 {
|
Chris@5
|
429 for (int i = 0; i < m_histlen; ++i) {
|
Chris@5
|
430 for (int j = 0; j < m_bins; ++j) {
|
Chris@5
|
431 m_history[i][j] = 0.0;
|
Chris@5
|
432 }
|
Chris@5
|
433 }
|
Chris@5
|
434 }
|
Chris@5
|
435
|
Chris@5
|
436 void
|
Chris@5
|
437 SimpleCepstrum::filter(const double *cep, double *result)
|
Chris@5
|
438 {
|
Chris@5
|
439 int hix = m_histlen - 1; // current history index
|
Chris@5
|
440
|
Chris@5
|
441 // roll back the history
|
Chris@5
|
442 if (m_histlen > 1) {
|
Chris@5
|
443 double *oldest = m_history[0];
|
Chris@5
|
444 for (int i = 1; i < m_histlen; ++i) {
|
Chris@5
|
445 m_history[i-1] = m_history[i];
|
Chris@5
|
446 }
|
Chris@5
|
447 // and stick this back in the newest spot, to recycle
|
Chris@5
|
448 m_history[hix] = oldest;
|
Chris@5
|
449 }
|
Chris@5
|
450
|
Chris@5
|
451 for (int i = 0; i < m_bins; ++i) {
|
Chris@10
|
452 double v = 0;
|
Chris@10
|
453 int n = 0;
|
Chris@10
|
454 // average according to the vertical filter length
|
Chris@10
|
455 for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
|
Chris@10
|
456 int ix = i + m_binFrom + j;
|
Chris@39
|
457 if (ix >= 0 && ix < (int)m_blockSize) {
|
Chris@10
|
458 v += cep[ix];
|
Chris@10
|
459 ++n;
|
Chris@10
|
460 }
|
Chris@10
|
461 }
|
Chris@10
|
462 m_history[hix][i] = v / n;
|
Chris@5
|
463 }
|
Chris@5
|
464
|
Chris@5
|
465 for (int i = 0; i < m_bins; ++i) {
|
Chris@5
|
466 double mean = 0.0;
|
Chris@5
|
467 for (int j = 0; j < m_histlen; ++j) {
|
Chris@5
|
468 mean += m_history[j][i];
|
Chris@5
|
469 }
|
Chris@5
|
470 mean /= m_histlen;
|
Chris@5
|
471 result[i] = mean;
|
Chris@5
|
472 }
|
Chris@5
|
473 }
|
Chris@24
|
474
|
Chris@24
|
475 double
|
Chris@24
|
476 SimpleCepstrum::findInterpolatedPeak(const double *in, int maxbin)
|
Chris@24
|
477 {
|
Chris@40
|
478 // after jos,
|
Chris@40
|
479 // https://ccrma.stanford.edu/~jos/sasp/Quadratic_Interpolation_Spectral_Peaks.html
|
Chris@40
|
480
|
Chris@40
|
481 if (maxbin < 1 || maxbin > m_bins - 2) {
|
Chris@24
|
482 return maxbin;
|
Chris@24
|
483 }
|
Chris@24
|
484
|
Chris@40
|
485 double alpha = in[maxbin-1];
|
Chris@40
|
486 double beta = in[maxbin];
|
Chris@40
|
487 double gamma = in[maxbin+1];
|
Chris@24
|
488
|
Chris@40
|
489 double denom = (alpha - 2*beta + gamma);
|
Chris@24
|
490
|
Chris@40
|
491 if (denom == 0) {
|
Chris@40
|
492 // flat
|
Chris@40
|
493 return maxbin;
|
Chris@24
|
494 }
|
Chris@24
|
495
|
Chris@40
|
496 double p = ((alpha - gamma) / denom) / 2.0;
|
Chris@24
|
497
|
Chris@40
|
498 return double(maxbin) + p;
|
Chris@24
|
499 }
|
Chris@5
|
500
|
Chris@5
|
501 void
|
Chris@5
|
502 SimpleCepstrum::addStatisticalOutputs(FeatureSet &fs, const double *data)
|
Chris@5
|
503 {
|
Chris@5
|
504 int n = m_bins;
|
Chris@5
|
505
|
Chris@6
|
506 double maxval = 0.0;
|
Chris@5
|
507 int maxbin = 0;
|
Chris@5
|
508
|
Chris@5
|
509 for (int i = 0; i < n; ++i) {
|
Chris@5
|
510 if (data[i] > maxval) {
|
Chris@5
|
511 maxval = data[i];
|
Chris@6
|
512 maxbin = i;
|
Chris@5
|
513 }
|
Chris@5
|
514 }
|
Chris@5
|
515
|
Chris@20
|
516 double nextPeakVal = 0.0;
|
Chris@20
|
517
|
Chris@20
|
518 for (int i = 1; i+1 < n; ++i) {
|
Chris@20
|
519 if (data[i] > data[i-1] &&
|
Chris@20
|
520 data[i] > data[i+1] &&
|
Chris@20
|
521 i != maxbin &&
|
Chris@20
|
522 data[i] > nextPeakVal) {
|
Chris@20
|
523 nextPeakVal = data[i];
|
Chris@20
|
524 }
|
Chris@20
|
525 }
|
Chris@20
|
526
|
Chris@5
|
527 Feature rf;
|
Chris@24
|
528 Feature irf;
|
Chris@6
|
529 if (maxval > 0.0) {
|
Chris@6
|
530 rf.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
|
Chris@24
|
531 double cimax = findInterpolatedPeak(data, maxbin);
|
Chris@24
|
532 irf.values.push_back(m_inputSampleRate / (cimax + m_binFrom));
|
Chris@5
|
533 } else {
|
Chris@5
|
534 rf.values.push_back(0);
|
Chris@24
|
535 irf.values.push_back(0);
|
Chris@5
|
536 }
|
Chris@5
|
537 fs[m_pkOutput].push_back(rf);
|
Chris@24
|
538 fs[m_ipkOutput].push_back(irf);
|
Chris@5
|
539
|
Chris@6
|
540 double total = 0;
|
Chris@5
|
541 for (int i = 0; i < n; ++i) {
|
Chris@6
|
542 total += data[i];
|
Chris@5
|
543 }
|
Chris@5
|
544
|
Chris@6
|
545 Feature tot;
|
Chris@6
|
546 tot.values.push_back(total);
|
Chris@6
|
547 fs[m_totOutput].push_back(tot);
|
Chris@6
|
548
|
Chris@6
|
549 double mean = total / n;
|
Chris@6
|
550
|
Chris@6
|
551 double totsqr = 0;
|
Chris@8
|
552 double abstot = 0;
|
Chris@5
|
553 for (int i = 0; i < n; ++i) {
|
Chris@6
|
554 totsqr += data[i] * data[i];
|
Chris@8
|
555 abstot += fabs(data[i]);
|
Chris@5
|
556 }
|
Chris@6
|
557 double rms = sqrt(totsqr / n);
|
Chris@5
|
558
|
Chris@5
|
559 double variance = 0;
|
Chris@5
|
560 for (int i = 0; i < n; ++i) {
|
Chris@5
|
561 double dev = fabs(data[i] - mean);
|
Chris@5
|
562 variance += dev * dev;
|
Chris@5
|
563 }
|
Chris@5
|
564 variance /= n;
|
Chris@5
|
565
|
Chris@6
|
566 double aroundPeak = 0.0;
|
Chris@6
|
567 double peakProportion = 0.0;
|
Chris@6
|
568 if (maxval > 0.0) {
|
Chris@7
|
569 aroundPeak += fabs(maxval);
|
Chris@6
|
570 int i = maxbin - 1;
|
Chris@6
|
571 while (i > 0 && data[i] <= data[i+1]) {
|
Chris@7
|
572 aroundPeak += fabs(data[i]);
|
Chris@6
|
573 --i;
|
Chris@6
|
574 }
|
Chris@6
|
575 i = maxbin + 1;
|
Chris@6
|
576 while (i < n && data[i] <= data[i-1]) {
|
Chris@7
|
577 aroundPeak += fabs(data[i]);
|
Chris@6
|
578 ++i;
|
Chris@6
|
579 }
|
Chris@6
|
580 }
|
Chris@8
|
581 peakProportion = aroundPeak / abstot;
|
Chris@6
|
582 Feature pp;
|
Chris@6
|
583 pp.values.push_back(peakProportion);
|
Chris@6
|
584 fs[m_ppOutput].push_back(pp);
|
Chris@6
|
585
|
Chris@5
|
586 Feature vf;
|
Chris@5
|
587 vf.values.push_back(variance);
|
Chris@5
|
588 fs[m_varOutput].push_back(vf);
|
Chris@5
|
589
|
Chris@5
|
590 Feature pr;
|
Chris@5
|
591 pr.values.push_back(maxval - rms);
|
Chris@5
|
592 fs[m_p2rOutput].push_back(pr);
|
Chris@5
|
593
|
Chris@5
|
594 Feature pv;
|
Chris@5
|
595 pv.values.push_back(maxval);
|
Chris@5
|
596 fs[m_pvOutput].push_back(pv);
|
Chris@5
|
597
|
Chris@20
|
598 Feature pko;
|
Chris@20
|
599 if (nextPeakVal != 0.0) {
|
Chris@27
|
600 pko.values.push_back(maxval - nextPeakVal);
|
Chris@20
|
601 } else {
|
Chris@20
|
602 pko.values.push_back(0.0);
|
Chris@20
|
603 }
|
Chris@20
|
604 fs[m_pkoOutput].push_back(pko);
|
Chris@20
|
605
|
Chris@5
|
606 Feature am;
|
Chris@5
|
607 for (int i = 0; i < n; ++i) {
|
Chris@5
|
608 if (data[i] < rms) am.values.push_back(0);
|
Chris@5
|
609 else am.values.push_back(data[i] - rms);
|
Chris@5
|
610 }
|
Chris@5
|
611 fs[m_amOutput].push_back(am);
|
Chris@5
|
612 }
|
Chris@5
|
613
|
Chris@5
|
614 void
|
Chris@5
|
615 SimpleCepstrum::addEnvelopeOutputs(FeatureSet &fs, const float *const *inputBuffers, const double *cep)
|
Chris@5
|
616 {
|
Chris@5
|
617 // Wipe the higher cepstral bins in order to calculate the
|
Chris@5
|
618 // envelope. This calculation uses the raw cepstrum, not the
|
Chris@5
|
619 // filtered values (because only values "in frequency range" are
|
Chris@5
|
620 // filtered).
|
Chris@5
|
621 int bs = m_blockSize;
|
Chris@5
|
622 int hs = m_blockSize/2 + 1;
|
Chris@5
|
623
|
Chris@5
|
624 double *ecep = new double[bs];
|
Chris@5
|
625 for (int i = 0; i < m_binFrom; ++i) {
|
Chris@5
|
626 ecep[i] = cep[i] / bs;
|
Chris@5
|
627 }
|
Chris@5
|
628 for (int i = m_binFrom; i < bs; ++i) {
|
Chris@5
|
629 ecep[i] = 0;
|
Chris@5
|
630 }
|
Chris@5
|
631 ecep[0] /= 2;
|
Chris@43
|
632 if (m_binFrom > 0) {
|
Chris@43
|
633 ecep[m_binFrom-1] /= 2;
|
Chris@43
|
634 }
|
Chris@5
|
635
|
Chris@5
|
636 double *env = new double[bs];
|
Chris@5
|
637 double *io = new double[bs];
|
Chris@7
|
638
|
Chris@7
|
639 //!!! This is only right if the previous transform was an inverse one!
|
Chris@33
|
640 Vamp::FFT::forward(bs, ecep, 0, env, io);
|
Chris@5
|
641
|
Chris@5
|
642 for (int i = 0; i < hs; ++i) {
|
Chris@5
|
643 env[i] = exp(env[i]);
|
Chris@5
|
644 }
|
Chris@5
|
645 Feature envf;
|
Chris@5
|
646 for (int i = 0; i < hs; ++i) {
|
Chris@5
|
647 envf.values.push_back(env[i]);
|
Chris@5
|
648 }
|
Chris@5
|
649 fs[m_envOutput].push_back(envf);
|
Chris@5
|
650
|
Chris@5
|
651 Feature es;
|
Chris@5
|
652 for (int i = 0; i < hs; ++i) {
|
Chris@5
|
653 double re = inputBuffers[0][i*2 ] / env[i];
|
Chris@5
|
654 double im = inputBuffers[0][i*2+1] / env[i];
|
Chris@5
|
655 double mag = sqrt(re*re + im*im);
|
Chris@5
|
656 es.values.push_back(mag);
|
Chris@5
|
657 }
|
Chris@5
|
658 fs[m_esOutput].push_back(es);
|
Chris@5
|
659
|
Chris@5
|
660 delete[] env;
|
Chris@5
|
661 delete[] ecep;
|
Chris@5
|
662 delete[] io;
|
Chris@0
|
663 }
|
Chris@0
|
664
|
Chris@0
|
665 SimpleCepstrum::FeatureSet
|
Chris@0
|
666 SimpleCepstrum::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
Chris@0
|
667 {
|
Chris@1
|
668 FeatureSet fs;
|
Chris@1
|
669
|
Chris@0
|
670 int bs = m_blockSize;
|
Chris@0
|
671 int hs = m_blockSize/2 + 1;
|
Chris@0
|
672
|
Chris@5
|
673 double *rawcep = new double[bs];
|
Chris@3
|
674 double *io = new double[bs];
|
Chris@3
|
675
|
Chris@4
|
676 if (m_method != InverseComplex) {
|
Chris@3
|
677
|
Chris@4
|
678 double *logmag = new double[bs];
|
Chris@4
|
679
|
Chris@4
|
680 for (int i = 0; i < hs; ++i) {
|
Chris@3
|
681
|
Chris@4
|
682 double power =
|
Chris@4
|
683 inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] +
|
Chris@4
|
684 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
|
Chris@5
|
685 double mag = sqrt(power);
|
Chris@3
|
686
|
Chris@5
|
687 double lm = log(mag + 0.00000001);
|
Chris@4
|
688
|
Chris@4
|
689 switch (m_method) {
|
Chris@4
|
690 case InverseSymmetric:
|
Chris@4
|
691 logmag[i] = lm;
|
Chris@4
|
692 if (i > 0) logmag[bs - i] = lm;
|
Chris@4
|
693 break;
|
Chris@4
|
694 case InverseAsymmetric:
|
Chris@4
|
695 logmag[i] = lm;
|
Chris@4
|
696 if (i > 0) logmag[bs - i] = 0;
|
Chris@4
|
697 break;
|
Chris@4
|
698 default:
|
Chris@4
|
699 logmag[bs/2 + i - 1] = lm;
|
Chris@4
|
700 if (i < hs-1) {
|
Chris@4
|
701 logmag[bs/2 - i - 1] = lm;
|
Chris@4
|
702 }
|
Chris@4
|
703 break;
|
Chris@3
|
704 }
|
Chris@3
|
705 }
|
Chris@4
|
706
|
Chris@4
|
707 if (m_method == InverseSymmetric ||
|
Chris@4
|
708 m_method == InverseAsymmetric) {
|
Chris@4
|
709
|
Chris@33
|
710 Vamp::FFT::inverse(bs, logmag, 0, rawcep, io);
|
Chris@4
|
711
|
Chris@4
|
712 } else {
|
Chris@4
|
713
|
Chris@33
|
714 Vamp::FFT::forward(bs, logmag, 0, rawcep, io);
|
Chris@4
|
715
|
Chris@4
|
716 if (m_method == ForwardDifference) {
|
Chris@4
|
717 for (int i = 0; i < hs; ++i) {
|
Chris@5
|
718 rawcep[i] = fabs(io[i]) - fabs(rawcep[i]);
|
Chris@4
|
719 }
|
Chris@4
|
720 } else {
|
Chris@4
|
721 for (int i = 0; i < hs; ++i) {
|
Chris@5
|
722 rawcep[i] = sqrt(rawcep[i]*rawcep[i] + io[i]*io[i]);
|
Chris@4
|
723 }
|
Chris@4
|
724 }
|
Chris@4
|
725 }
|
Chris@4
|
726
|
Chris@4
|
727 delete[] logmag;
|
Chris@4
|
728
|
Chris@4
|
729 } else { // InverseComplex
|
Chris@4
|
730
|
Chris@4
|
731 double *ri = new double[bs];
|
Chris@4
|
732 double *ii = new double[bs];
|
Chris@4
|
733
|
Chris@4
|
734 for (int i = 0; i < hs; ++i) {
|
Chris@4
|
735 double re = inputBuffers[0][i*2];
|
Chris@4
|
736 double im = inputBuffers[0][i*2+1];
|
Chris@4
|
737 std::complex<double> c(re, im);
|
Chris@4
|
738 std::complex<double> clog = std::log(c);
|
Chris@4
|
739 ri[i] = clog.real();
|
Chris@4
|
740 ii[i] = clog.imag();
|
Chris@4
|
741 if (i > 0) {
|
Chris@4
|
742 ri[bs - i] = ri[i];
|
Chris@4
|
743 ii[bs - i] = -ii[i];
|
Chris@4
|
744 }
|
Chris@4
|
745 }
|
Chris@4
|
746
|
Chris@33
|
747 Vamp::FFT::inverse(bs, ri, ii, rawcep, io);
|
Chris@4
|
748
|
Chris@4
|
749 delete[] ri;
|
Chris@4
|
750 delete[] ii;
|
Chris@3
|
751 }
|
Chris@0
|
752
|
Chris@0
|
753 if (m_clamp) {
|
Chris@0
|
754 for (int i = 0; i < bs; ++i) {
|
Chris@5
|
755 if (rawcep[i] < 0) rawcep[i] = 0;
|
Chris@0
|
756 }
|
Chris@0
|
757 }
|
Chris@0
|
758
|
Chris@5
|
759 delete[] io;
|
Chris@0
|
760
|
Chris@5
|
761 double *latest = new double[m_bins];
|
Chris@5
|
762 filter(rawcep, latest);
|
Chris@5
|
763
|
Chris@5
|
764 int n = m_bins;
|
Chris@0
|
765
|
Chris@0
|
766 Feature cf;
|
Chris@5
|
767 for (int i = 0; i < n; ++i) {
|
Chris@5
|
768 cf.values.push_back(latest[i]);
|
Chris@0
|
769 }
|
Chris@0
|
770 fs[m_cepOutput].push_back(cf);
|
Chris@0
|
771
|
Chris@5
|
772 addStatisticalOutputs(fs, latest);
|
Chris@0
|
773
|
Chris@5
|
774 addEnvelopeOutputs(fs, inputBuffers, rawcep);
|
Chris@0
|
775
|
Chris@5
|
776 delete[] latest;
|
Chris@7
|
777 delete[] rawcep;
|
Chris@0
|
778
|
Chris@0
|
779 return fs;
|
Chris@0
|
780 }
|
Chris@0
|
781
|
Chris@0
|
782 SimpleCepstrum::FeatureSet
|
Chris@0
|
783 SimpleCepstrum::getRemainingFeatures()
|
Chris@0
|
784 {
|
Chris@0
|
785 FeatureSet fs;
|
Chris@0
|
786 return fs;
|
Chris@0
|
787 }
|