annotate SimpleCepstrum.cpp @ 43:fcb6562b43f7

Docs and fixes from plugin tester reports
author Chris Cannam
date Fri, 28 Nov 2014 15:28:07 +0000
parents d6acf12f0a8e
children f021dc97da29
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@7 2 /*
Chris@38 3 This file is Copyright (c) 2012 Chris Cannam
Chris@38 4
Chris@7 5 Permission is hereby granted, free of charge, to any person
Chris@7 6 obtaining a copy of this software and associated documentation
Chris@7 7 files (the "Software"), to deal in the Software without
Chris@7 8 restriction, including without limitation the rights to use, copy,
Chris@7 9 modify, merge, publish, distribute, sublicense, and/or sell copies
Chris@7 10 of the Software, and to permit persons to whom the Software is
Chris@7 11 furnished to do so, subject to the following conditions:
Chris@7 12
Chris@7 13 The above copyright notice and this permission notice shall be
Chris@7 14 included in all copies or substantial portions of the Software.
Chris@7 15
Chris@7 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Chris@7 17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Chris@7 18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Chris@7 19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
Chris@7 20 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
Chris@7 21 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
Chris@7 22 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Chris@7 23 */
Chris@0 24
Chris@0 25 #include "SimpleCepstrum.h"
Chris@0 26
Chris@33 27 #include "vamp-sdk/FFT.h"
Chris@33 28
Chris@0 29 #include <vector>
Chris@0 30 #include <algorithm>
Chris@0 31
Chris@0 32 #include <cstdio>
Chris@0 33 #include <cmath>
Chris@4 34 #include <complex>
Chris@0 35
Chris@33 36 #if ( VAMP_SDK_MAJOR_VERSION < 2 || ( VAMP_SDK_MAJOR_VERSION == 2 && VAMP_SDK_MINOR_VERSION < 4 ) )
Chris@33 37 #error Vamp SDK version 2.4 or newer required
Chris@33 38 #endif
Chris@33 39
Chris@0 40 using std::string;
Chris@0 41
Chris@0 42 SimpleCepstrum::SimpleCepstrum(float inputSampleRate) :
Chris@0 43 Plugin(inputSampleRate),
Chris@0 44 m_channels(0),
Chris@0 45 m_stepSize(256),
Chris@0 46 m_blockSize(1024),
Chris@0 47 m_fmin(50),
Chris@0 48 m_fmax(1000),
Chris@10 49 m_histlen(1),
Chris@10 50 m_vflen(1),
Chris@3 51 m_clamp(false),
Chris@5 52 m_method(InverseSymmetric),
Chris@5 53 m_binFrom(0),
Chris@5 54 m_binTo(0),
Chris@5 55 m_bins(0),
Chris@5 56 m_history(0)
Chris@0 57 {
Chris@0 58 }
Chris@0 59
Chris@0 60 SimpleCepstrum::~SimpleCepstrum()
Chris@0 61 {
Chris@5 62 if (m_history) {
Chris@5 63 for (int i = 0; i < m_histlen; ++i) {
Chris@5 64 delete[] m_history[i];
Chris@5 65 }
Chris@5 66 delete[] m_history;
Chris@5 67 }
Chris@0 68 }
Chris@0 69
Chris@0 70 string
Chris@0 71 SimpleCepstrum::getIdentifier() const
Chris@0 72 {
Chris@0 73 return "simple-cepstrum";
Chris@0 74 }
Chris@0 75
Chris@0 76 string
Chris@0 77 SimpleCepstrum::getName() const
Chris@0 78 {
Chris@0 79 return "Simple Cepstrum";
Chris@0 80 }
Chris@0 81
Chris@0 82 string
Chris@0 83 SimpleCepstrum::getDescription() const
Chris@0 84 {
Chris@2 85 return "Return simple cepstral data from DFT bins. This plugin is intended for casual inspection of cepstral data. It returns a lot of different sorts of data and is quite slow; it's not a good way to extract a single feature rapidly.";
Chris@0 86 }
Chris@0 87
Chris@0 88 string
Chris@0 89 SimpleCepstrum::getMaker() const
Chris@0 90 {
Chris@7 91 return "Chris Cannam";
Chris@0 92 }
Chris@0 93
Chris@0 94 int
Chris@0 95 SimpleCepstrum::getPluginVersion() const
Chris@0 96 {
Chris@0 97 // Increment this each time you release a version that behaves
Chris@0 98 // differently from the previous one
Chris@0 99 return 1;
Chris@0 100 }
Chris@0 101
Chris@0 102 string
Chris@0 103 SimpleCepstrum::getCopyright() const
Chris@0 104 {
Chris@7 105 return "Freely redistributable (BSD license)";
Chris@0 106 }
Chris@0 107
Chris@0 108 SimpleCepstrum::InputDomain
Chris@0 109 SimpleCepstrum::getInputDomain() const
Chris@0 110 {
Chris@0 111 return FrequencyDomain;
Chris@0 112 }
Chris@0 113
Chris@0 114 size_t
Chris@0 115 SimpleCepstrum::getPreferredBlockSize() const
Chris@0 116 {
Chris@0 117 return 1024;
Chris@0 118 }
Chris@0 119
Chris@0 120 size_t
Chris@0 121 SimpleCepstrum::getPreferredStepSize() const
Chris@0 122 {
Chris@0 123 return 256;
Chris@0 124 }
Chris@0 125
Chris@0 126 size_t
Chris@0 127 SimpleCepstrum::getMinChannelCount() const
Chris@0 128 {
Chris@0 129 return 1;
Chris@0 130 }
Chris@0 131
Chris@0 132 size_t
Chris@0 133 SimpleCepstrum::getMaxChannelCount() const
Chris@0 134 {
Chris@0 135 return 1;
Chris@0 136 }
Chris@0 137
Chris@0 138 SimpleCepstrum::ParameterList
Chris@0 139 SimpleCepstrum::getParameterDescriptors() const
Chris@0 140 {
Chris@0 141 ParameterList list;
Chris@0 142
Chris@0 143 ParameterDescriptor d;
Chris@0 144
Chris@0 145 d.identifier = "fmin";
Chris@0 146 d.name = "Minimum frequency";
Chris@43 147 d.description = "Frequency whose period corresponds to the quefrency of the last cepstrum bin in range";
Chris@0 148 d.unit = "Hz";
Chris@0 149 d.minValue = m_inputSampleRate / m_blockSize;
Chris@0 150 d.maxValue = m_inputSampleRate / 2;
Chris@0 151 d.defaultValue = 50;
Chris@0 152 d.isQuantized = false;
Chris@0 153 list.push_back(d);
Chris@0 154
Chris@0 155 d.identifier = "fmax";
Chris@0 156 d.name = "Maximum frequency";
Chris@43 157 d.description = "Frequency whose period corresponds to the quefrency of the first cepstrum bin in range";
Chris@0 158 d.unit = "Hz";
Chris@0 159 d.minValue = m_inputSampleRate / m_blockSize;
Chris@0 160 d.maxValue = m_inputSampleRate / 2;
Chris@0 161 d.defaultValue = 1000;
Chris@0 162 d.isQuantized = false;
Chris@0 163 list.push_back(d);
Chris@0 164
Chris@5 165 d.identifier = "histlen";
Chris@5 166 d.name = "Mean filter history length";
Chris@43 167 d.description = "Length of mean filter used for smoothing cepstrum across time bins";
Chris@5 168 d.unit = "";
Chris@5 169 d.minValue = 1;
Chris@5 170 d.maxValue = 10;
Chris@10 171 d.defaultValue = 1;
Chris@5 172 d.isQuantized = true;
Chris@5 173 d.quantizeStep = 1;
Chris@5 174 list.push_back(d);
Chris@5 175
Chris@10 176 d.identifier = "vflen";
Chris@10 177 d.name = "Vertical filter length";
Chris@43 178 d.description = "Length of mean filter used for smoothing cepstrum across quefrency bins";
Chris@10 179 d.unit = "";
Chris@10 180 d.minValue = 1;
Chris@10 181 d.maxValue = 11;
Chris@10 182 d.defaultValue = 1;
Chris@10 183 d.isQuantized = true;
Chris@10 184 d.quantizeStep = 2;
Chris@10 185 list.push_back(d);
Chris@10 186
Chris@3 187 d.identifier = "method";
Chris@3 188 d.name = "Cepstrum transform method";
Chris@43 189 d.unit = "Method to use for calculating cepstrum, starting from the complex short-time Fourier transform of the input audio.\nInverse symmetric - Real part of inverse FFT of log magnitude spectrum, with frequencies above Nyquist reflecting those below it.\nInverse asymmetric - Real part of inverse FFT of log magnitude spectrum, with frequencies above Nyquist set to zero.\nInverse complex - Real part of inverse FFT of complex log spectrum.\nForward magnitude - Magnitude of forward FFT of log magnitude spectrum.\nForward difference - Difference between imaginary and real parts of forward FFT of log magnitude spectrum";
Chris@3 190 d.minValue = 0;
Chris@5 191 d.maxValue = 4;
Chris@3 192 d.defaultValue = 0;
Chris@3 193 d.isQuantized = true;
Chris@3 194 d.quantizeStep = 1;
Chris@3 195 d.valueNames.push_back("Inverse symmetric");
Chris@3 196 d.valueNames.push_back("Inverse asymmetric");
Chris@4 197 d.valueNames.push_back("Inverse complex");
Chris@3 198 d.valueNames.push_back("Forward magnitude");
Chris@3 199 d.valueNames.push_back("Forward difference");
Chris@3 200 list.push_back(d);
Chris@3 201
Chris@10 202 d.identifier = "clamp";
Chris@10 203 d.name = "Clamp negative values in cepstrum at zero";
Chris@43 204 d.unit = "If set, no negative values will be returned; they will be replaced by zeroes";
Chris@10 205 d.minValue = 0;
Chris@10 206 d.maxValue = 1;
Chris@10 207 d.defaultValue = 0;
Chris@10 208 d.isQuantized = true;
Chris@10 209 d.quantizeStep = 1;
Chris@10 210 d.valueNames.clear();
Chris@10 211 list.push_back(d);
Chris@10 212
Chris@0 213 return list;
Chris@0 214 }
Chris@0 215
Chris@0 216 float
Chris@0 217 SimpleCepstrum::getParameter(string identifier) const
Chris@0 218 {
Chris@0 219 if (identifier == "fmin") return m_fmin;
Chris@0 220 else if (identifier == "fmax") return m_fmax;
Chris@5 221 else if (identifier == "histlen") return m_histlen;
Chris@10 222 else if (identifier == "vflen") return m_vflen;
Chris@10 223 else if (identifier == "clamp") return (m_clamp ? 1 : 0);
Chris@3 224 else if (identifier == "method") return (int)m_method;
Chris@0 225 else return 0.f;
Chris@0 226 }
Chris@0 227
Chris@0 228 void
Chris@0 229 SimpleCepstrum::setParameter(string identifier, float value)
Chris@0 230 {
Chris@0 231 if (identifier == "fmin") m_fmin = value;
Chris@0 232 else if (identifier == "fmax") m_fmax = value;
Chris@5 233 else if (identifier == "histlen") m_histlen = value;
Chris@10 234 else if (identifier == "vflen") m_vflen = value;
Chris@10 235 else if (identifier == "clamp") m_clamp = (value > 0.5);
Chris@3 236 else if (identifier == "method") m_method = Method(int(value + 0.5));
Chris@0 237 }
Chris@0 238
Chris@0 239 SimpleCepstrum::ProgramList
Chris@0 240 SimpleCepstrum::getPrograms() const
Chris@0 241 {
Chris@0 242 ProgramList list;
Chris@0 243 return list;
Chris@0 244 }
Chris@0 245
Chris@0 246 string
Chris@0 247 SimpleCepstrum::getCurrentProgram() const
Chris@0 248 {
Chris@0 249 return ""; // no programs
Chris@0 250 }
Chris@0 251
Chris@0 252 void
Chris@0 253 SimpleCepstrum::selectProgram(string name)
Chris@0 254 {
Chris@0 255 }
Chris@0 256
Chris@0 257 SimpleCepstrum::OutputList
Chris@0 258 SimpleCepstrum::getOutputDescriptors() const
Chris@0 259 {
Chris@0 260 OutputList outputs;
Chris@0 261
Chris@0 262 int n = 0;
Chris@0 263
Chris@0 264 OutputDescriptor d;
Chris@2 265
Chris@7 266 d.identifier = "raw_cepstral_peak";
Chris@7 267 d.name = "Frequency corresponding to raw cepstral peak";
Chris@24 268 d.description = "Return the frequency whose period corresponds to the quefrency with the maximum bin value within the specified range of the cepstrum";
Chris@6 269 d.unit = "Hz";
Chris@0 270 d.hasFixedBinCount = true;
Chris@0 271 d.binCount = 1;
Chris@0 272 d.hasKnownExtents = true;
Chris@0 273 d.minValue = m_fmin;
Chris@0 274 d.maxValue = m_fmax;
Chris@0 275 d.isQuantized = false;
Chris@0 276 d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@0 277 d.hasDuration = false;
Chris@5 278 m_pkOutput = n++;
Chris@0 279 outputs.push_back(d);
Chris@0 280
Chris@24 281 d.identifier = "interpolated_peak";
Chris@24 282 d.name = "Interpolated peak frequency";
Chris@40 283 d.description = "Return the frequency whose period corresponds to the quefrency with the maximum bin value within the specified range of the cepstrum, using parabolic interpolation to estimate the peak quefrency to finer than single bin resolution";
Chris@24 284 m_ipkOutput = n++;
Chris@24 285 outputs.push_back(d);
Chris@24 286
Chris@0 287 d.identifier = "variance";
Chris@0 288 d.name = "Variance of cepstral bins in range";
Chris@0 289 d.unit = "";
Chris@2 290 d.description = "Return the variance of bin values within the specified range of the cepstrum";
Chris@6 291 d.hasKnownExtents = false;
Chris@0 292 m_varOutput = n++;
Chris@0 293 outputs.push_back(d);
Chris@0 294
Chris@0 295 d.identifier = "peak";
Chris@8 296 d.name = "Value at peak";
Chris@0 297 d.unit = "";
Chris@2 298 d.description = "Return the value found in the maximum-valued bin within the specified range of the cepstrum";
Chris@0 299 m_pvOutput = n++;
Chris@0 300 outputs.push_back(d);
Chris@0 301
Chris@5 302 d.identifier = "peak_to_rms";
Chris@5 303 d.name = "Peak-to-RMS distance";
Chris@5 304 d.unit = "";
Chris@5 305 d.description = "Return the difference between maximum and root mean square bin values within the specified range of the cepstrum";
Chris@5 306 m_p2rOutput = n++;
Chris@5 307 outputs.push_back(d);
Chris@5 308
Chris@6 309 d.identifier = "peak_proportion";
Chris@7 310 d.name = "Energy around peak";
Chris@6 311 d.unit = "";
Chris@7 312 d.description = "Return the proportion of total energy that is found in the bins around the peak bin (as far as the nearest local minima), within the specified range of the cepstrum";
Chris@6 313 m_ppOutput = n++;
Chris@6 314 outputs.push_back(d);
Chris@6 315
Chris@20 316 d.identifier = "peak_to_second_peak";
Chris@27 317 d.name = "Peak to second-peak difference";
Chris@20 318 d.unit = "";
Chris@27 319 d.description = "Return the difference between the value found in the peak bin within the specified range of the cepstrum, and that found in the next highest peak";
Chris@20 320 m_pkoOutput = n++;
Chris@20 321 outputs.push_back(d);
Chris@20 322
Chris@6 323 d.identifier = "total";
Chris@6 324 d.name = "Total energy";
Chris@6 325 d.unit = "";
Chris@7 326 d.description = "Return the total energy found in all bins within the specified range of the cepstrum";
Chris@6 327 m_totOutput = n++;
Chris@6 328 outputs.push_back(d);
Chris@6 329
Chris@0 330 d.identifier = "cepstrum";
Chris@0 331 d.name = "Cepstrum";
Chris@0 332 d.unit = "";
Chris@2 333 d.description = "The unprocessed cepstrum bins within the specified range";
Chris@0 334
Chris@0 335 int from = int(m_inputSampleRate / m_fmax);
Chris@0 336 int to = int(m_inputSampleRate / m_fmin);
Chris@43 337 if (from >= (int)m_blockSize / 2) {
Chris@43 338 from = m_blockSize / 2 - 1;
Chris@43 339 }
Chris@0 340 if (to >= (int)m_blockSize / 2) {
Chris@0 341 to = m_blockSize / 2 - 1;
Chris@0 342 }
Chris@43 343 if (to < from) {
Chris@43 344 to = from;
Chris@43 345 }
Chris@0 346 d.binCount = to - from + 1;
Chris@0 347 for (int i = from; i <= to; ++i) {
Chris@0 348 float freq = m_inputSampleRate / i;
Chris@43 349 char buffer[50];
Chris@2 350 sprintf(buffer, "%.2f Hz", freq);
Chris@0 351 d.binNames.push_back(buffer);
Chris@0 352 }
Chris@0 353
Chris@0 354 d.hasKnownExtents = false;
Chris@0 355 m_cepOutput = n++;
Chris@0 356 outputs.push_back(d);
Chris@0 357
Chris@0 358 d.identifier = "am";
Chris@5 359 d.name = "Cepstrum bins relative to RMS";
Chris@5 360 d.description = "The cepstrum bins within the specified range, expressed as a value relative to the root mean square bin value in the range, with values below the RMS clamped to zero";
Chris@0 361 m_amOutput = n++;
Chris@0 362 outputs.push_back(d);
Chris@0 363
Chris@2 364 d.identifier = "env";
Chris@2 365 d.name = "Spectral envelope";
Chris@2 366 d.description = "Envelope calculated from the cepstral values below the specified minimum";
Chris@2 367 d.binCount = m_blockSize/2 + 1;
Chris@2 368 d.binNames.clear();
Chris@7 369 for (int i = 0; i < (int)d.binCount; ++i) {
Chris@2 370 float freq = (m_inputSampleRate / m_blockSize) * i;
Chris@43 371 char buffer[50];
Chris@2 372 sprintf(buffer, "%.2f Hz", freq);
Chris@2 373 d.binNames.push_back(buffer);
Chris@2 374 }
Chris@2 375 m_envOutput = n++;
Chris@2 376 outputs.push_back(d);
Chris@2 377
Chris@2 378 d.identifier = "es";
Chris@2 379 d.name = "Spectrum without envelope";
Chris@2 380 d.description = "Magnitude of spectrum values divided by calculated envelope values, to deconvolve the envelope";
Chris@2 381 m_esOutput = n++;
Chris@2 382 outputs.push_back(d);
Chris@2 383
Chris@0 384 return outputs;
Chris@0 385 }
Chris@0 386
Chris@0 387 bool
Chris@0 388 SimpleCepstrum::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@0 389 {
Chris@0 390 if (channels < getMinChannelCount() ||
Chris@0 391 channels > getMaxChannelCount()) return false;
Chris@0 392
Chris@0 393 // std::cerr << "SimpleCepstrum::initialise: channels = " << channels
Chris@0 394 // << ", stepSize = " << stepSize << ", blockSize = " << blockSize
Chris@0 395 // << std::endl;
Chris@0 396
Chris@0 397 m_channels = channels;
Chris@0 398 m_stepSize = stepSize;
Chris@0 399 m_blockSize = blockSize;
Chris@0 400
Chris@5 401 m_binFrom = int(m_inputSampleRate / m_fmax);
Chris@43 402 m_binTo = int(m_inputSampleRate / m_fmin);
Chris@5 403
Chris@43 404 if (m_binFrom >= (int)m_blockSize / 2) {
Chris@43 405 m_binFrom = m_blockSize / 2 - 1;
Chris@43 406 }
Chris@7 407 if (m_binTo >= (int)m_blockSize / 2) {
Chris@5 408 m_binTo = m_blockSize / 2 - 1;
Chris@5 409 }
Chris@43 410 if (m_binTo < m_binFrom) {
Chris@43 411 m_binTo = m_binFrom;
Chris@43 412 }
Chris@5 413
Chris@5 414 m_bins = (m_binTo - m_binFrom) + 1;
Chris@5 415
Chris@5 416 m_history = new double *[m_histlen];
Chris@5 417 for (int i = 0; i < m_histlen; ++i) {
Chris@5 418 m_history[i] = new double[m_bins];
Chris@5 419 }
Chris@5 420
Chris@5 421 reset();
Chris@5 422
Chris@0 423 return true;
Chris@0 424 }
Chris@0 425
Chris@0 426 void
Chris@0 427 SimpleCepstrum::reset()
Chris@0 428 {
Chris@5 429 for (int i = 0; i < m_histlen; ++i) {
Chris@5 430 for (int j = 0; j < m_bins; ++j) {
Chris@5 431 m_history[i][j] = 0.0;
Chris@5 432 }
Chris@5 433 }
Chris@5 434 }
Chris@5 435
Chris@5 436 void
Chris@5 437 SimpleCepstrum::filter(const double *cep, double *result)
Chris@5 438 {
Chris@5 439 int hix = m_histlen - 1; // current history index
Chris@5 440
Chris@5 441 // roll back the history
Chris@5 442 if (m_histlen > 1) {
Chris@5 443 double *oldest = m_history[0];
Chris@5 444 for (int i = 1; i < m_histlen; ++i) {
Chris@5 445 m_history[i-1] = m_history[i];
Chris@5 446 }
Chris@5 447 // and stick this back in the newest spot, to recycle
Chris@5 448 m_history[hix] = oldest;
Chris@5 449 }
Chris@5 450
Chris@5 451 for (int i = 0; i < m_bins; ++i) {
Chris@10 452 double v = 0;
Chris@10 453 int n = 0;
Chris@10 454 // average according to the vertical filter length
Chris@10 455 for (int j = -m_vflen/2; j <= m_vflen/2; ++j) {
Chris@10 456 int ix = i + m_binFrom + j;
Chris@39 457 if (ix >= 0 && ix < (int)m_blockSize) {
Chris@10 458 v += cep[ix];
Chris@10 459 ++n;
Chris@10 460 }
Chris@10 461 }
Chris@10 462 m_history[hix][i] = v / n;
Chris@5 463 }
Chris@5 464
Chris@5 465 for (int i = 0; i < m_bins; ++i) {
Chris@5 466 double mean = 0.0;
Chris@5 467 for (int j = 0; j < m_histlen; ++j) {
Chris@5 468 mean += m_history[j][i];
Chris@5 469 }
Chris@5 470 mean /= m_histlen;
Chris@5 471 result[i] = mean;
Chris@5 472 }
Chris@5 473 }
Chris@24 474
Chris@24 475 double
Chris@24 476 SimpleCepstrum::findInterpolatedPeak(const double *in, int maxbin)
Chris@24 477 {
Chris@40 478 // after jos,
Chris@40 479 // https://ccrma.stanford.edu/~jos/sasp/Quadratic_Interpolation_Spectral_Peaks.html
Chris@40 480
Chris@40 481 if (maxbin < 1 || maxbin > m_bins - 2) {
Chris@24 482 return maxbin;
Chris@24 483 }
Chris@24 484
Chris@40 485 double alpha = in[maxbin-1];
Chris@40 486 double beta = in[maxbin];
Chris@40 487 double gamma = in[maxbin+1];
Chris@24 488
Chris@40 489 double denom = (alpha - 2*beta + gamma);
Chris@24 490
Chris@40 491 if (denom == 0) {
Chris@40 492 // flat
Chris@40 493 return maxbin;
Chris@24 494 }
Chris@24 495
Chris@40 496 double p = ((alpha - gamma) / denom) / 2.0;
Chris@24 497
Chris@40 498 return double(maxbin) + p;
Chris@24 499 }
Chris@5 500
Chris@5 501 void
Chris@5 502 SimpleCepstrum::addStatisticalOutputs(FeatureSet &fs, const double *data)
Chris@5 503 {
Chris@5 504 int n = m_bins;
Chris@5 505
Chris@6 506 double maxval = 0.0;
Chris@5 507 int maxbin = 0;
Chris@5 508
Chris@5 509 for (int i = 0; i < n; ++i) {
Chris@5 510 if (data[i] > maxval) {
Chris@5 511 maxval = data[i];
Chris@6 512 maxbin = i;
Chris@5 513 }
Chris@5 514 }
Chris@5 515
Chris@20 516 double nextPeakVal = 0.0;
Chris@20 517
Chris@20 518 for (int i = 1; i+1 < n; ++i) {
Chris@20 519 if (data[i] > data[i-1] &&
Chris@20 520 data[i] > data[i+1] &&
Chris@20 521 i != maxbin &&
Chris@20 522 data[i] > nextPeakVal) {
Chris@20 523 nextPeakVal = data[i];
Chris@20 524 }
Chris@20 525 }
Chris@20 526
Chris@5 527 Feature rf;
Chris@24 528 Feature irf;
Chris@6 529 if (maxval > 0.0) {
Chris@6 530 rf.values.push_back(m_inputSampleRate / (maxbin + m_binFrom));
Chris@24 531 double cimax = findInterpolatedPeak(data, maxbin);
Chris@24 532 irf.values.push_back(m_inputSampleRate / (cimax + m_binFrom));
Chris@5 533 } else {
Chris@5 534 rf.values.push_back(0);
Chris@24 535 irf.values.push_back(0);
Chris@5 536 }
Chris@5 537 fs[m_pkOutput].push_back(rf);
Chris@24 538 fs[m_ipkOutput].push_back(irf);
Chris@5 539
Chris@6 540 double total = 0;
Chris@5 541 for (int i = 0; i < n; ++i) {
Chris@6 542 total += data[i];
Chris@5 543 }
Chris@5 544
Chris@6 545 Feature tot;
Chris@6 546 tot.values.push_back(total);
Chris@6 547 fs[m_totOutput].push_back(tot);
Chris@6 548
Chris@6 549 double mean = total / n;
Chris@6 550
Chris@6 551 double totsqr = 0;
Chris@8 552 double abstot = 0;
Chris@5 553 for (int i = 0; i < n; ++i) {
Chris@6 554 totsqr += data[i] * data[i];
Chris@8 555 abstot += fabs(data[i]);
Chris@5 556 }
Chris@6 557 double rms = sqrt(totsqr / n);
Chris@5 558
Chris@5 559 double variance = 0;
Chris@5 560 for (int i = 0; i < n; ++i) {
Chris@5 561 double dev = fabs(data[i] - mean);
Chris@5 562 variance += dev * dev;
Chris@5 563 }
Chris@5 564 variance /= n;
Chris@5 565
Chris@6 566 double aroundPeak = 0.0;
Chris@6 567 double peakProportion = 0.0;
Chris@6 568 if (maxval > 0.0) {
Chris@7 569 aroundPeak += fabs(maxval);
Chris@6 570 int i = maxbin - 1;
Chris@6 571 while (i > 0 && data[i] <= data[i+1]) {
Chris@7 572 aroundPeak += fabs(data[i]);
Chris@6 573 --i;
Chris@6 574 }
Chris@6 575 i = maxbin + 1;
Chris@6 576 while (i < n && data[i] <= data[i-1]) {
Chris@7 577 aroundPeak += fabs(data[i]);
Chris@6 578 ++i;
Chris@6 579 }
Chris@6 580 }
Chris@8 581 peakProportion = aroundPeak / abstot;
Chris@6 582 Feature pp;
Chris@6 583 pp.values.push_back(peakProportion);
Chris@6 584 fs[m_ppOutput].push_back(pp);
Chris@6 585
Chris@5 586 Feature vf;
Chris@5 587 vf.values.push_back(variance);
Chris@5 588 fs[m_varOutput].push_back(vf);
Chris@5 589
Chris@5 590 Feature pr;
Chris@5 591 pr.values.push_back(maxval - rms);
Chris@5 592 fs[m_p2rOutput].push_back(pr);
Chris@5 593
Chris@5 594 Feature pv;
Chris@5 595 pv.values.push_back(maxval);
Chris@5 596 fs[m_pvOutput].push_back(pv);
Chris@5 597
Chris@20 598 Feature pko;
Chris@20 599 if (nextPeakVal != 0.0) {
Chris@27 600 pko.values.push_back(maxval - nextPeakVal);
Chris@20 601 } else {
Chris@20 602 pko.values.push_back(0.0);
Chris@20 603 }
Chris@20 604 fs[m_pkoOutput].push_back(pko);
Chris@20 605
Chris@5 606 Feature am;
Chris@5 607 for (int i = 0; i < n; ++i) {
Chris@5 608 if (data[i] < rms) am.values.push_back(0);
Chris@5 609 else am.values.push_back(data[i] - rms);
Chris@5 610 }
Chris@5 611 fs[m_amOutput].push_back(am);
Chris@5 612 }
Chris@5 613
Chris@5 614 void
Chris@5 615 SimpleCepstrum::addEnvelopeOutputs(FeatureSet &fs, const float *const *inputBuffers, const double *cep)
Chris@5 616 {
Chris@5 617 // Wipe the higher cepstral bins in order to calculate the
Chris@5 618 // envelope. This calculation uses the raw cepstrum, not the
Chris@5 619 // filtered values (because only values "in frequency range" are
Chris@5 620 // filtered).
Chris@5 621 int bs = m_blockSize;
Chris@5 622 int hs = m_blockSize/2 + 1;
Chris@5 623
Chris@5 624 double *ecep = new double[bs];
Chris@5 625 for (int i = 0; i < m_binFrom; ++i) {
Chris@5 626 ecep[i] = cep[i] / bs;
Chris@5 627 }
Chris@5 628 for (int i = m_binFrom; i < bs; ++i) {
Chris@5 629 ecep[i] = 0;
Chris@5 630 }
Chris@5 631 ecep[0] /= 2;
Chris@43 632 if (m_binFrom > 0) {
Chris@43 633 ecep[m_binFrom-1] /= 2;
Chris@43 634 }
Chris@5 635
Chris@5 636 double *env = new double[bs];
Chris@5 637 double *io = new double[bs];
Chris@7 638
Chris@7 639 //!!! This is only right if the previous transform was an inverse one!
Chris@33 640 Vamp::FFT::forward(bs, ecep, 0, env, io);
Chris@5 641
Chris@5 642 for (int i = 0; i < hs; ++i) {
Chris@5 643 env[i] = exp(env[i]);
Chris@5 644 }
Chris@5 645 Feature envf;
Chris@5 646 for (int i = 0; i < hs; ++i) {
Chris@5 647 envf.values.push_back(env[i]);
Chris@5 648 }
Chris@5 649 fs[m_envOutput].push_back(envf);
Chris@5 650
Chris@5 651 Feature es;
Chris@5 652 for (int i = 0; i < hs; ++i) {
Chris@5 653 double re = inputBuffers[0][i*2 ] / env[i];
Chris@5 654 double im = inputBuffers[0][i*2+1] / env[i];
Chris@5 655 double mag = sqrt(re*re + im*im);
Chris@5 656 es.values.push_back(mag);
Chris@5 657 }
Chris@5 658 fs[m_esOutput].push_back(es);
Chris@5 659
Chris@5 660 delete[] env;
Chris@5 661 delete[] ecep;
Chris@5 662 delete[] io;
Chris@0 663 }
Chris@0 664
Chris@0 665 SimpleCepstrum::FeatureSet
Chris@0 666 SimpleCepstrum::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Chris@0 667 {
Chris@1 668 FeatureSet fs;
Chris@1 669
Chris@0 670 int bs = m_blockSize;
Chris@0 671 int hs = m_blockSize/2 + 1;
Chris@0 672
Chris@5 673 double *rawcep = new double[bs];
Chris@3 674 double *io = new double[bs];
Chris@3 675
Chris@4 676 if (m_method != InverseComplex) {
Chris@3 677
Chris@4 678 double *logmag = new double[bs];
Chris@4 679
Chris@4 680 for (int i = 0; i < hs; ++i) {
Chris@3 681
Chris@4 682 double power =
Chris@4 683 inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] +
Chris@4 684 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
Chris@5 685 double mag = sqrt(power);
Chris@3 686
Chris@5 687 double lm = log(mag + 0.00000001);
Chris@4 688
Chris@4 689 switch (m_method) {
Chris@4 690 case InverseSymmetric:
Chris@4 691 logmag[i] = lm;
Chris@4 692 if (i > 0) logmag[bs - i] = lm;
Chris@4 693 break;
Chris@4 694 case InverseAsymmetric:
Chris@4 695 logmag[i] = lm;
Chris@4 696 if (i > 0) logmag[bs - i] = 0;
Chris@4 697 break;
Chris@4 698 default:
Chris@4 699 logmag[bs/2 + i - 1] = lm;
Chris@4 700 if (i < hs-1) {
Chris@4 701 logmag[bs/2 - i - 1] = lm;
Chris@4 702 }
Chris@4 703 break;
Chris@3 704 }
Chris@3 705 }
Chris@4 706
Chris@4 707 if (m_method == InverseSymmetric ||
Chris@4 708 m_method == InverseAsymmetric) {
Chris@4 709
Chris@33 710 Vamp::FFT::inverse(bs, logmag, 0, rawcep, io);
Chris@4 711
Chris@4 712 } else {
Chris@4 713
Chris@33 714 Vamp::FFT::forward(bs, logmag, 0, rawcep, io);
Chris@4 715
Chris@4 716 if (m_method == ForwardDifference) {
Chris@4 717 for (int i = 0; i < hs; ++i) {
Chris@5 718 rawcep[i] = fabs(io[i]) - fabs(rawcep[i]);
Chris@4 719 }
Chris@4 720 } else {
Chris@4 721 for (int i = 0; i < hs; ++i) {
Chris@5 722 rawcep[i] = sqrt(rawcep[i]*rawcep[i] + io[i]*io[i]);
Chris@4 723 }
Chris@4 724 }
Chris@4 725 }
Chris@4 726
Chris@4 727 delete[] logmag;
Chris@4 728
Chris@4 729 } else { // InverseComplex
Chris@4 730
Chris@4 731 double *ri = new double[bs];
Chris@4 732 double *ii = new double[bs];
Chris@4 733
Chris@4 734 for (int i = 0; i < hs; ++i) {
Chris@4 735 double re = inputBuffers[0][i*2];
Chris@4 736 double im = inputBuffers[0][i*2+1];
Chris@4 737 std::complex<double> c(re, im);
Chris@4 738 std::complex<double> clog = std::log(c);
Chris@4 739 ri[i] = clog.real();
Chris@4 740 ii[i] = clog.imag();
Chris@4 741 if (i > 0) {
Chris@4 742 ri[bs - i] = ri[i];
Chris@4 743 ii[bs - i] = -ii[i];
Chris@4 744 }
Chris@4 745 }
Chris@4 746
Chris@33 747 Vamp::FFT::inverse(bs, ri, ii, rawcep, io);
Chris@4 748
Chris@4 749 delete[] ri;
Chris@4 750 delete[] ii;
Chris@3 751 }
Chris@0 752
Chris@0 753 if (m_clamp) {
Chris@0 754 for (int i = 0; i < bs; ++i) {
Chris@5 755 if (rawcep[i] < 0) rawcep[i] = 0;
Chris@0 756 }
Chris@0 757 }
Chris@0 758
Chris@5 759 delete[] io;
Chris@0 760
Chris@5 761 double *latest = new double[m_bins];
Chris@5 762 filter(rawcep, latest);
Chris@5 763
Chris@5 764 int n = m_bins;
Chris@0 765
Chris@0 766 Feature cf;
Chris@5 767 for (int i = 0; i < n; ++i) {
Chris@5 768 cf.values.push_back(latest[i]);
Chris@0 769 }
Chris@0 770 fs[m_cepOutput].push_back(cf);
Chris@0 771
Chris@5 772 addStatisticalOutputs(fs, latest);
Chris@0 773
Chris@5 774 addEnvelopeOutputs(fs, inputBuffers, rawcep);
Chris@0 775
Chris@5 776 delete[] latest;
Chris@7 777 delete[] rawcep;
Chris@0 778
Chris@0 779 return fs;
Chris@0 780 }
Chris@0 781
Chris@0 782 SimpleCepstrum::FeatureSet
Chris@0 783 SimpleCepstrum::getRemainingFeatures()
Chris@0 784 {
Chris@0 785 FeatureSet fs;
Chris@0 786 return fs;
Chris@0 787 }