annotate plugins/XTractPlugin.cpp @ 40:08d9660e57e8 tip

Or better, this
author Chris Cannam
date Thu, 16 May 2024 10:17:33 +0100
parents 11b10bf3147a
children
rev   line source
cannam@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
cannam@0 2
cannam@0 3 /*
cannam@0 4 Vamp feature extraction plugins using Jamie Bullock's
cannam@0 5 libxtract audio feature extraction library.
cannam@0 6
cannam@0 7 Centre for Digital Music, Queen Mary, University of London.
Chris@28 8 This file copyright 2006-2012 Queen Mary, University of London.
cannam@0 9
cannam@0 10 This program is free software; you can redistribute it and/or
cannam@0 11 modify it under the terms of the GNU General Public License as
cannam@0 12 published by the Free Software Foundation; either version 2 of the
cannam@0 13 License, or (at your option) any later version. See the file
cannam@0 14 COPYING included with this distribution for more information.
cannam@0 15 */
cannam@0 16
cannam@0 17 #include "XTractPlugin.h"
cannam@0 18
cannam@0 19 #include <cassert>
Chris@22 20 #include <cstdio>
cannam@1 21 #include <math.h>
Chris@35 22 #include <stdio.h>
cannam@0 23
cannam@0 24 using std::cerr;
cannam@0 25 using std::endl;
cannam@0 26 using std::string;
cannam@0 27
cannam@1 28 xtract_function_descriptor_t *
cannam@1 29 XTractPlugin::m_xtDescriptors = 0;
cannam@1 30
cannam@1 31 int
cannam@1 32 XTractPlugin::m_xtDescRefCount = 0;
cannam@1 33
cannam@0 34 XTractPlugin::XTractPlugin(unsigned int xtFeature, float inputSampleRate) :
cannam@0 35 Plugin(inputSampleRate),
cannam@0 36 m_xtFeature(xtFeature),
cannam@0 37 m_channels(0),
cannam@0 38 m_stepSize(0),
cannam@0 39 m_blockSize(0),
cannam@0 40 m_resultBuffer(0),
cannam@1 41 m_peakThreshold(10),
cannam@1 42 m_rolloffThreshold(90),
cannam@1 43 m_harmonicThreshold(.1),
cannam@0 44 m_minFreq(80),
cannam@0 45 m_maxFreq(18000),
cannam@9 46 m_coeffCount(40),
cannam@9 47 m_highestCoef(20),
cannam@9 48 m_lowestCoef(0),
cannam@0 49 m_mfccFilters(0),
cannam@1 50 m_mfccStyle((int)XTRACT_EQUAL_GAIN),
cannam@14 51 m_spectrumType((int)XTRACT_MAGNITUDE_SPECTRUM),
cannam@14 52 m_dc(0),
cannam@14 53 m_normalise(0),
cannam@0 54 m_barkBandLimits(0),
cannam@0 55 m_outputBinCount(0),
cannam@0 56 m_initialised(false)
cannam@0 57 {
cannam@1 58 if (m_xtDescRefCount++ == 0) {
cannam@1 59 m_xtDescriptors =
cannam@1 60 (xtract_function_descriptor_t *)xtract_make_descriptors();
cannam@1 61 }
cannam@0 62 }
cannam@0 63
cannam@0 64 XTractPlugin::~XTractPlugin()
cannam@0 65 {
cannam@0 66 if (m_mfccFilters) {
cannam@0 67 for (size_t i = 0; i < m_coeffCount; ++i) {
cannam@0 68 delete[] m_mfccFilters[i];
cannam@0 69 }
cannam@0 70 delete[] m_mfccFilters;
cannam@0 71 }
cannam@0 72 if (m_barkBandLimits) {
cannam@0 73 delete[] m_barkBandLimits;
cannam@0 74 }
cannam@0 75 if (m_resultBuffer) {
cannam@0 76 delete[] m_resultBuffer;
cannam@0 77 }
cannam@1 78
cannam@1 79 if (--m_xtDescRefCount == 0) {
cannam@1 80 xtract_free_descriptors(m_xtDescriptors);
cannam@1 81 }
cannam@0 82 }
cannam@0 83
cannam@0 84 string
cannam@2 85 XTractPlugin::getIdentifier() const
cannam@0 86 {
cannam@1 87 return xtDescriptor()->algo.name;
cannam@0 88 }
cannam@0 89
cannam@0 90 string
cannam@2 91 XTractPlugin::getName() const
cannam@2 92 {
cannam@2 93 return xtDescriptor()->algo.p_name;
cannam@2 94 }
cannam@2 95
cannam@2 96 string
cannam@0 97 XTractPlugin::getDescription() const
cannam@0 98 {
cannam@2 99 return xtDescriptor()->algo.p_desc;
cannam@0 100 }
cannam@1 101
cannam@0 102
cannam@0 103 string
cannam@0 104 XTractPlugin::getMaker() const
cannam@0 105 {
cannam@0 106 return "libxtract by Jamie Bullock (plugin by Chris Cannam)";
cannam@0 107 }
cannam@0 108
cannam@0 109 int
cannam@0 110 XTractPlugin::getPluginVersion() const
cannam@0 111 {
Chris@28 112 return 4;
cannam@0 113 }
cannam@0 114
cannam@0 115 string
cannam@0 116 XTractPlugin::getCopyright() const
cannam@0 117 {
Chris@28 118 string text = "Copyright 2006-2012 Jamie Bullock, plugin Copyright 2006-2012 Queen Mary, University of London. ";
cannam@0 119
cannam@1 120 string method = "";
cannam@0 121
cannam@1 122 method += xtDescriptor()->algo.author;
cannam@0 123
cannam@9 124 if (method != "") {
cannam@9 125 int year = xtDescriptor()->algo.year;
cannam@9 126 if (year != 0) {
cannam@9 127 char yearstr[12];
cannam@9 128 sprintf(yearstr, " (%d)", year);
cannam@9 129 method += yearstr;
cannam@9 130 }
cannam@9 131 text += "Method from " + method + ". ";
cannam@9 132 }
cannam@9 133
cannam@0 134 text += "Distributed under the GNU General Public License";
cannam@0 135 return text;
cannam@0 136 }
cannam@0 137
cannam@0 138 XTractPlugin::InputDomain
cannam@0 139 XTractPlugin::getInputDomain() const
cannam@0 140 {
cannam@1 141
cannam@1 142 if (xtDescriptor()->data.format == XTRACT_AUDIO_SAMPLES)
cannam@1 143 return TimeDomain;
cannam@1 144 else
cannam@1 145 return FrequencyDomain;
cannam@0 146 }
cannam@1 147
cannam@1 148
cannam@9 149 bool XTractPlugin::m_anyInitialised = false;
cannam@0 150
cannam@0 151 bool
cannam@0 152 XTractPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
cannam@0 153 {
cannam@1 154
cannam@1 155 int donor = *(xtDescriptor()->argv.donor),
cannam@1 156 data_format = xtDescriptor()->data.format;
cannam@1 157
cannam@0 158 if (channels < getMinChannelCount() ||
cannam@0 159 channels > getMaxChannelCount()) return false;
cannam@0 160
cannam@9 161 if (blockSize != getPreferredBlockSize()) {
cannam@9 162 cerr << "XTractPlugin::initialise: ERROR: "
cannam@9 163 << "Only the standard block size of " << getPreferredBlockSize()
cannam@9 164 << " is supported (owing to global FFT initialisation requirements)" << endl;
cannam@9 165 return false;
cannam@9 166 }
cannam@9 167
cannam@0 168 m_channels = channels;
cannam@0 169 m_stepSize = stepSize;
cannam@0 170 m_blockSize = blockSize;
cannam@0 171
cannam@9 172 if (!m_anyInitialised) {
cannam@9 173 m_anyInitialised = true;
cannam@9 174 // initialise libxtract
cannam@9 175 xtract_init_fft(m_blockSize, XTRACT_SPECTRUM);
cannam@9 176 xtract_init_fft(m_blockSize, XTRACT_AUTOCORRELATION_FFT);
cannam@9 177 xtract_init_fft(m_blockSize, XTRACT_DCT);
cannam@9 178 xtract_init_fft(m_blockSize, XTRACT_MFCC);
cannam@9 179 }
cannam@9 180
cannam@1 181 if (donor == XTRACT_INIT_MFCC) {
cannam@0 182
Chris@34 183 m_mfccFilters = new double *[m_coeffCount];
cannam@0 184 for (size_t i = 0; i < m_coeffCount; ++i) {
Chris@34 185 m_mfccFilters[i] = new double[m_blockSize];
cannam@0 186 }
cannam@0 187
cannam@0 188 int error = (int)xtract_init_mfcc(m_blockSize, m_inputSampleRate/2,
cannam@0 189 m_mfccStyle, m_minFreq, m_maxFreq,
cannam@0 190 m_coeffCount, m_mfccFilters);
cannam@1 191 if (error != XTRACT_SUCCESS) {
cannam@0 192 cerr << "XTractPlugin::initialise: ERROR: "
cannam@0 193 << "xtract_init_mfcc returned error code " << error << endl;
cannam@0 194 return false;
cannam@0 195 }
cannam@0 196
cannam@1 197 } else if (donor == XTRACT_BARK_COEFFICIENTS ||
cannam@7 198 donor == XTRACT_INIT_BARK ||
cannam@1 199 data_format == XTRACT_BARK_COEFFS) {
cannam@7 200
cannam@1 201 m_barkBandLimits = new int[XTRACT_BARK_BANDS];
cannam@0 202
cannam@1 203 /*int error = *(int)*/xtract_init_bark(m_blockSize, m_inputSampleRate,
cannam@0 204 m_barkBandLimits);
cannam@0 205 // if (error != SUCCESS) {
cannam@0 206 // cerr << "XTractPlugin::initialise: ERROR: "
cannam@0 207 // << "xtract_init_bark returned error code " << error << endl;
cannam@0 208 // return false;
cannam@0 209 // }
cannam@0 210 }
cannam@0 211
cannam@0 212 switch (m_xtFeature) {
cannam@1 213 case XTRACT_SPECTRUM:
cannam@14 214 m_outputBinCount = m_blockSize / 2 + (m_dc ? 1 : 0); break;
cannam@1 215 case XTRACT_HARMONIC_SPECTRUM:
cannam@1 216 case XTRACT_PEAK_SPECTRUM:
cannam@1 217 m_outputBinCount = m_blockSize / 2; break;
cannam@1 218 case XTRACT_DCT:
cannam@1 219 case XTRACT_AUTOCORRELATION_FFT:
cannam@1 220 case XTRACT_AUTOCORRELATION:
cannam@1 221 case XTRACT_AMDF:
cannam@1 222 case XTRACT_ASDF:
cannam@1 223 m_outputBinCount = m_blockSize; break;
cannam@1 224 case XTRACT_MFCC:
cannam@9 225 m_outputBinCount = (m_highestCoef - m_lowestCoef)+1; break;
cannam@1 226 case XTRACT_BARK_COEFFICIENTS:
cannam@1 227 m_outputBinCount = XTRACT_BARK_BANDS; break;
cannam@1 228 default:
cannam@1 229 m_outputBinCount = 1; break;
cannam@0 230 }
cannam@0 231
cannam@13 232 m_outputDescriptors.clear();
cannam@0 233 setupOutputDescriptors();
cannam@0 234
cannam@0 235 m_initialised = true;
cannam@0 236
cannam@0 237 return true;
cannam@0 238 }
cannam@0 239
cannam@0 240 void
cannam@0 241 XTractPlugin::reset()
cannam@0 242 {
cannam@0 243 }
cannam@0 244
cannam@0 245 size_t
cannam@0 246 XTractPlugin::getMinChannelCount() const
cannam@0 247 {
cannam@0 248 return 1;
cannam@0 249 }
cannam@0 250
cannam@0 251 size_t
cannam@0 252 XTractPlugin::getMaxChannelCount() const
cannam@0 253 {
cannam@0 254 return 1;
cannam@0 255 }
cannam@0 256
cannam@0 257 size_t
cannam@0 258 XTractPlugin::getPreferredStepSize() const
cannam@0 259 {
cannam@0 260 if (getInputDomain() == FrequencyDomain) {
cannam@1 261 return getPreferredBlockSize();
cannam@1 262 } else {
cannam@0 263 return getPreferredBlockSize() / 2;
cannam@0 264 }
cannam@0 265 }
cannam@0 266
cannam@0 267 size_t
cannam@0 268 XTractPlugin::getPreferredBlockSize() const
cannam@0 269 {
cannam@0 270 return 1024;
cannam@0 271 }
cannam@0 272
cannam@0 273 XTractPlugin::ParameterList
cannam@0 274 XTractPlugin::getParameterDescriptors() const
cannam@0 275 {
cannam@0 276 ParameterList list;
cannam@0 277 ParameterDescriptor desc;
cannam@0 278
cannam@1 279 if (m_xtFeature == XTRACT_MFCC) {
cannam@0 280
cannam@2 281 desc.identifier = "minfreq";
cannam@2 282 desc.name = "Minimum Frequency";
cannam@0 283 desc.minValue = 0;
cannam@0 284 desc.maxValue = m_inputSampleRate / 2;
cannam@0 285 desc.defaultValue = 80;
cannam@0 286 desc.isQuantized = false;
cannam@0 287 desc.unit = "Hz";
cannam@0 288 list.push_back(desc);
cannam@0 289
cannam@2 290 desc.identifier = "maxfreq";
cannam@2 291 desc.name = "Maximum Frequency";
cannam@0 292 desc.defaultValue = 18000;
cannam@0 293 if (desc.defaultValue > m_inputSampleRate * 0.875) {
cannam@0 294 desc.defaultValue = m_inputSampleRate * 0.875;
cannam@0 295 }
cannam@0 296 list.push_back(desc);
cannam@0 297
cannam@2 298 desc.identifier = "bands";
cannam@9 299 desc.name = "# Mel Frequency Bands";
cannam@0 300 desc.minValue = 10;
cannam@9 301 desc.maxValue = 80;
cannam@9 302 desc.defaultValue = 40;
cannam@9 303 desc.unit = "";
cannam@9 304 desc.isQuantized = true;
cannam@9 305 desc.quantizeStep = 1;
cannam@9 306 list.push_back(desc);
cannam@9 307
cannam@9 308 desc.identifier = "lowestcoef";
cannam@9 309 desc.name = "Lowest Coefficient Returned";
cannam@9 310 desc.minValue = 0;
cannam@9 311 desc.maxValue = 80;
cannam@9 312 desc.defaultValue = 0;
cannam@9 313 desc.unit = "";
cannam@9 314 desc.isQuantized = true;
cannam@9 315 desc.quantizeStep = 1;
cannam@9 316 list.push_back(desc);
cannam@9 317
cannam@9 318 desc.identifier = "highestcoef";
cannam@9 319 desc.name = "Highest Coefficient Returned";
cannam@9 320 desc.minValue = 0;
cannam@9 321 desc.maxValue = 80;
cannam@0 322 desc.defaultValue = 20;
cannam@0 323 desc.unit = "";
cannam@0 324 desc.isQuantized = true;
cannam@0 325 desc.quantizeStep = 1;
cannam@0 326 list.push_back(desc);
cannam@0 327
cannam@2 328 desc.identifier = "style";
cannam@2 329 desc.name = "MFCC Type";
cannam@0 330 desc.minValue = 0;
cannam@0 331 desc.maxValue = 1;
cannam@0 332 desc.defaultValue = 0;
cannam@0 333 desc.valueNames.push_back("Equal Gain");
cannam@0 334 desc.valueNames.push_back("Equal Area");
cannam@0 335 list.push_back(desc);
cannam@0 336 }
cannam@0 337
cannam@14 338 if (m_xtFeature == XTRACT_SPECTRUM) {
cannam@14 339
cannam@14 340 desc.identifier = "spectrumtype";
cannam@14 341 desc.name = "Type";
cannam@14 342 desc.minValue = 0;
cannam@14 343 desc.maxValue = 3;
cannam@14 344 desc.defaultValue = int(XTRACT_MAGNITUDE_SPECTRUM);
cannam@14 345 desc.isQuantized = true;
cannam@14 346 desc.quantizeStep = 1;
cannam@14 347 desc.valueNames.push_back("Magnitude Spectrum");
cannam@14 348 desc.valueNames.push_back("Log Magnitude Spectrum");
cannam@14 349 desc.valueNames.push_back("Power Spectrum");
cannam@14 350 desc.valueNames.push_back("Log Power Spectrum");
cannam@14 351 list.push_back(desc);
cannam@14 352
cannam@14 353 desc.identifier = "dc";
cannam@14 354 desc.name = "Include DC";
cannam@14 355 desc.maxValue = 1;
cannam@14 356 desc.defaultValue = 0;
cannam@14 357 desc.valueNames.clear();
cannam@14 358 list.push_back(desc);
cannam@14 359
cannam@14 360 desc.identifier = "normalise";
cannam@14 361 desc.name = "Normalise";
cannam@14 362 list.push_back(desc);
cannam@14 363 }
cannam@14 364
cannam@0 365 if (needPeakThreshold()) {
cannam@0 366
cannam@10 367 desc.identifier = "peak-threshold";
cannam@2 368 desc.name = "Peak Threshold";
cannam@0 369 desc.minValue = 0;
cannam@0 370 desc.maxValue = 100;
cannam@1 371 desc.defaultValue = 10; /* Threshold as % of maximum peak found */
cannam@0 372 desc.isQuantized = false;
cannam@0 373 desc.valueNames.clear();
cannam@0 374 desc.unit = "%";
cannam@0 375 list.push_back(desc);
cannam@0 376
cannam@1 377 }
cannam@1 378
cannam@1 379 if (needRolloffThreshold()) {
cannam@0 380
cannam@10 381 desc.identifier = "rolloff-threshold";
cannam@2 382 desc.name = "Rolloff Threshold";
cannam@0 383 desc.minValue = 0;
cannam@0 384 desc.maxValue = 100;
cannam@1 385 desc.defaultValue = 90; /* Freq below which 90% of energy is */
cannam@0 386 desc.isQuantized = false;
cannam@0 387 desc.valueNames.clear();
cannam@0 388 desc.unit = "%";
cannam@0 389 list.push_back(desc);
cannam@1 390
cannam@1 391 }
cannam@1 392
cannam@1 393 if (needHarmonicThreshold()) {
cannam@1 394
cannam@10 395 desc.identifier = "harmonic-threshold";
cannam@2 396 desc.name = "Harmonic Threshold";
cannam@1 397 desc.minValue = 0;
cannam@1 398 desc.maxValue = 1.0;
cannam@1 399 desc.defaultValue = .1; /* Distance from nearesst harmonic number */
cannam@1 400 desc.isQuantized = false;
cannam@1 401 desc.valueNames.clear();
cannam@1 402 desc.unit = "";
cannam@1 403 list.push_back(desc);
cannam@0 404 }
cannam@0 405
cannam@0 406 return list;
cannam@0 407 }
cannam@0 408
cannam@0 409 float
cannam@0 410 XTractPlugin::getParameter(string param) const
cannam@0 411 {
cannam@1 412 if (m_xtFeature == XTRACT_MFCC) {
cannam@0 413 if (param == "minfreq") return m_minFreq;
cannam@0 414 if (param == "maxfreq") return m_maxFreq;
cannam@0 415 if (param == "bands") return m_coeffCount;
cannam@9 416 if (param == "lowestcoef") return m_lowestCoef;
cannam@9 417 if (param == "highestcoef") return m_highestCoef;
cannam@0 418 if (param == "style") return m_mfccStyle;
cannam@0 419 }
cannam@0 420
cannam@14 421 if (m_xtFeature == XTRACT_SPECTRUM) {
cannam@14 422 if (param == "spectrumtype") return m_spectrumType;
cannam@14 423 if (param == "dc") return m_dc;
cannam@14 424 if (param == "normalise") return m_normalise;
cannam@14 425 }
cannam@14 426
cannam@10 427 if (param == "peak-threshold") return m_peakThreshold;
cannam@10 428 if (param == "rolloff-threshold") return m_rolloffThreshold;
cannam@10 429 if (param == "harmonic-threshold") return m_harmonicThreshold;
cannam@0 430
cannam@0 431 return 0.f;
cannam@0 432 }
cannam@0 433
cannam@0 434 void
cannam@0 435 XTractPlugin::setParameter(string param, float value)
cannam@0 436 {
cannam@1 437 if (m_xtFeature == XTRACT_MFCC) {
cannam@0 438 if (param == "minfreq") m_minFreq = value;
cannam@0 439 else if (param == "maxfreq") m_maxFreq = value;
cannam@14 440 else if (param == "bands") m_coeffCount = int(value + .1);
cannam@9 441 else if (param == "lowestcoef"){
cannam@14 442 m_lowestCoef = int(value + .1);
cannam@9 443 if(m_lowestCoef >= m_coeffCount) m_lowestCoef = m_coeffCount - 1;
cannam@9 444 if(m_lowestCoef > m_highestCoef) m_lowestCoef = m_highestCoef;
cannam@9 445 }
cannam@9 446 else if (param == "highestcoef"){
cannam@14 447 m_highestCoef = int(value + .1);
cannam@9 448 if(m_highestCoef >= m_coeffCount) m_highestCoef = m_coeffCount - 1;
cannam@9 449 if(m_highestCoef < m_lowestCoef) m_highestCoef = m_lowestCoef;
cannam@9 450 }
cannam@14 451 else if (param == "style") m_mfccStyle = int(value + .1);
cannam@14 452 }
cannam@14 453
cannam@14 454 if (m_xtFeature == XTRACT_SPECTRUM) {
cannam@14 455 if (param == "spectrumtype") m_spectrumType = int(value + .1);
cannam@14 456 if (param == "dc") m_dc = int(value + .1);
cannam@14 457 if (param == "normalise") m_normalise = int(value + .1);
cannam@0 458 }
cannam@0 459
cannam@10 460 if (param == "peak-threshold") m_peakThreshold = value;
cannam@10 461 if (param == "rolloff-threshold") m_rolloffThreshold = value;
cannam@10 462 if (param == "harmonic-threshold") m_harmonicThreshold = value;
cannam@0 463 }
cannam@0 464
cannam@0 465 XTractPlugin::OutputList
cannam@0 466 XTractPlugin::getOutputDescriptors() const
cannam@0 467 {
cannam@13 468 if (m_outputDescriptors.empty()) {
cannam@13 469 setupOutputDescriptors();
cannam@13 470 }
cannam@0 471 return m_outputDescriptors;
cannam@0 472 }
cannam@0 473
cannam@0 474 void
cannam@0 475 XTractPlugin::setupOutputDescriptors() const
cannam@0 476 {
cannam@0 477 OutputDescriptor d;
cannam@1 478 const xtract_function_descriptor_t *xtFd = xtDescriptor();
cannam@2 479 d.identifier = getIdentifier();
cannam@2 480 d.name = getName();
cannam@2 481 d.description = getDescription();
cannam@0 482 d.unit = "";
cannam@0 483 d.hasFixedBinCount = true;
cannam@0 484 d.binCount = m_outputBinCount;
cannam@0 485 d.hasKnownExtents = false;
cannam@0 486 d.isQuantized = false;
cannam@0 487 d.sampleType = OutputDescriptor::OneSamplePerStep;
cannam@0 488
cannam@9 489 if (xtFd->is_scalar){
cannam@1 490 switch(xtFd->result.scalar.unit){
cannam@1 491 case XTRACT_HERTZ: d.unit = "Hz"; break;
cannam@1 492 case XTRACT_DBFS: d.unit = "dB"; break;
cannam@1 493 default: d.unit = ""; break;
cannam@1 494 }
cannam@1 495 }
cannam@1 496 else {
cannam@1 497 if (xtFd->result.vector.format == XTRACT_SPECTRAL){
cannam@0 498
cannam@1 499 d.binCount /= 2;
cannam@2 500 d.identifier = "amplitudes";
cannam@2 501 d.name = "Peak Amplitudes";
cannam@2 502 d.description = "";
cannam@1 503 }
cannam@1 504 }
cannam@0 505
cannam@0 506 m_outputDescriptors.push_back(d);
cannam@0 507 }
cannam@0 508
cannam@0 509 bool
cannam@0 510 XTractPlugin::needPeakThreshold() const
cannam@0 511 {
cannam@1 512 const xtract_function_descriptor_t *xtFd = xtDescriptor();
cannam@0 513
cannam@1 514 if(m_xtFeature == XTRACT_PEAK_SPECTRUM ||
cannam@1 515 xtFd->data.format == XTRACT_SPECTRAL_PEAKS ||
cannam@1 516 xtFd->data.format == XTRACT_SPECTRAL_PEAKS_MAGNITUDES ||
cannam@1 517 needHarmonicThreshold())
cannam@1 518 return true;
cannam@1 519 else return false;
cannam@1 520 }
cannam@1 521
cannam@1 522 bool
cannam@1 523 XTractPlugin::needHarmonicThreshold() const
cannam@1 524 {
cannam@1 525 const xtract_function_descriptor_t *xtFd = xtDescriptor();
cannam@1 526
cannam@1 527 if(m_xtFeature == XTRACT_HARMONIC_SPECTRUM ||
cannam@1 528 xtFd->data.format == XTRACT_SPECTRAL_HARMONICS_FREQUENCIES ||
cannam@1 529 m_xtFeature == XTRACT_NOISINESS ||
cannam@1 530 xtFd->data.format == XTRACT_SPECTRAL_HARMONICS_MAGNITUDES)
cannam@1 531 return true;
cannam@1 532 else return false;
cannam@1 533 }
cannam@1 534
cannam@1 535 bool
cannam@1 536 XTractPlugin::needRolloffThreshold() const
cannam@1 537 {
cannam@1 538 if(m_xtFeature == XTRACT_ROLLOFF)
cannam@1 539 return true;
cannam@1 540 else
cannam@1 541 return false;
cannam@0 542 }
cannam@0 543
cannam@0 544 XTractPlugin::FeatureSet
cannam@0 545 XTractPlugin::process(const float *const *inputBuffers,
cannam@0 546 Vamp::RealTime timestamp)
cannam@0 547 {
cannam@13 548 if (m_outputDescriptors.empty()) {
cannam@13 549 setupOutputDescriptors();
cannam@13 550 }
cannam@0 551
cannam@14 552 int rbs =
cannam@14 553 // Add 2 here to accommodate extra data for spectrum with DC
cannam@14 554 2 + (m_outputBinCount > m_blockSize ? m_outputBinCount : m_blockSize);
cannam@0 555 if (!m_resultBuffer) {
Chris@34 556 m_resultBuffer = new double[rbs];
cannam@0 557 }
cannam@0 558
cannam@1 559 int i;
cannam@1 560
cannam@1 561 for (i = 0; i < rbs; ++i) m_resultBuffer[i] = 0.f;
cannam@1 562
cannam@1 563 int N = m_blockSize, M = N >> 1;
Chris@34 564
Chris@34 565 const double *data = 0;
Chris@34 566 double *input_d = new double[N];
Chris@34 567 for (int i = 0; i < N; ++i) {
Chris@34 568 input_d[i] = inputBuffers[0][i];
Chris@34 569 }
Chris@34 570
Chris@34 571 double *fft_temp = 0, *data_temp = 0;
cannam@0 572 void *argv = 0;
cannam@1 573 bool isSpectral = false;
cannam@1 574 xtract_function_descriptor_t *xtFd = xtDescriptor();
cannam@0 575
cannam@0 576 FeatureSet fs;
cannam@0 577
cannam@1 578 switch (xtFd->data.format) {
cannam@1 579 case XTRACT_AUDIO_SAMPLES:
Chris@34 580 data = input_d;
cannam@1 581 break;
cannam@1 582 case XTRACT_SPECTRAL:
cannam@1 583 default:
cannam@1 584 // All the rest are derived from the spectrum
cannam@1 585 // Need same format as would be output by xtract_spectrum
Chris@34 586 double q = m_inputSampleRate / N;
Chris@34 587 fft_temp = new double[N];
cannam@1 588 for (int n = 1; n < N/2; ++n) {
Chris@34 589 fft_temp[n] = sqrt(input_d[n*2] *
Chris@34 590 input_d[n*2] + input_d[n*2+1] *
Chris@34 591 input_d[n*2+1]) / N;
cannam@1 592 fft_temp[N-n] = (N/2 - n) * q;
cannam@1 593 }
Chris@34 594 fft_temp[0] = fabs(input_d[0]) / N;
Chris@34 595 fft_temp[N/2] = fabs(input_d[N]) / N;
cannam@1 596 data = &fft_temp[0];
cannam@1 597 isSpectral = true;
cannam@1 598 break;
cannam@0 599 }
cannam@0 600
cannam@0 601 assert(m_outputBinCount > 0);
cannam@0 602
Chris@34 603 double *result = m_resultBuffer;
cannam@0 604
Chris@34 605 double argf[XTRACT_MAXARGS];
cannam@0 606 argv = &argf[0];
cannam@14 607 argf[0] = 0.f; // handy for some, e.g. lowest_value which has a threshold
cannam@0 608
Chris@34 609 double mean, variance, sd, npartials, nharmonics;
cannam@0 610
cannam@1 611 bool needSD, needVariance, needMean, needPeaks,
cannam@1 612 needBarkCoefficients, needHarmonics, needF0, needSFM, needMax,
cannam@1 613 needNumPartials, needNumHarmonics;
cannam@0 614
cannam@1 615 int donor;
cannam@0 616
cannam@1 617 needSD = needVariance = needMean = needPeaks =
cannam@1 618 needBarkCoefficients = needF0 = needHarmonics = needSFM = needMax =
cannam@1 619 needNumPartials = needNumHarmonics = 0;
cannam@0 620
cannam@1 621 mean = variance = sd = npartials = nharmonics = 0.f;
cannam@0 622
cannam@1 623 i = xtFd->argc;
cannam@0 624
cannam@1 625 while(i--){
cannam@14 626 if (m_xtFeature == XTRACT_BARK_COEFFICIENTS) {
cannam@14 627 /* "BARK_COEFFICIENTS is special because argc = BARK_BANDS" */
cannam@14 628 break;
cannam@14 629 }
cannam@1 630 donor = xtFd->argv.donor[i];
cannam@1 631 switch(donor){
cannam@1 632 case XTRACT_STANDARD_DEVIATION:
cannam@1 633 case XTRACT_SPECTRAL_STANDARD_DEVIATION:
cannam@1 634 needSD = 1;
cannam@1 635 break;
cannam@1 636 case XTRACT_VARIANCE:
cannam@1 637 case XTRACT_SPECTRAL_VARIANCE:
cannam@1 638 needVariance = 1;
cannam@1 639 break;
cannam@1 640 case XTRACT_MEAN:
cannam@1 641 case XTRACT_SPECTRAL_MEAN:
cannam@1 642 needMean = 1;
cannam@1 643 break;
cannam@1 644 case XTRACT_F0:
cannam@1 645 case XTRACT_FAILSAFE_F0:
cannam@1 646 needF0 = 1;
cannam@1 647 break;
cannam@1 648 case XTRACT_FLATNESS:
cannam@1 649 needSFM = 1;
cannam@1 650 case XTRACT_HIGHEST_VALUE:
cannam@1 651 needMax = 1;
cannam@1 652 break;
cannam@1 653 }
cannam@1 654 }
cannam@1 655
cannam@1 656 if(needHarmonicThreshold() && m_xtFeature != XTRACT_HARMONIC_SPECTRUM)
cannam@1 657 needHarmonics = needF0 = 1;
cannam@1 658
cannam@1 659 if(needPeakThreshold() && m_xtFeature != XTRACT_PEAK_SPECTRUM)
cannam@1 660 needPeaks = 1;
cannam@1 661
cannam@1 662 if(xtFd->data.format == XTRACT_BARK_COEFFS &&
cannam@1 663 m_xtFeature != XTRACT_BARK_COEFFICIENTS){
cannam@1 664 needBarkCoefficients = 1;
cannam@0 665 }
cannam@0 666
cannam@0 667 if (needMean) {
cannam@1 668 if(isSpectral)
cannam@1 669 xtract_spectral_mean(data, N, 0, result);
cannam@1 670 else
cannam@1 671 xtract_mean(data, M, 0, result);
cannam@0 672 mean = *result;
cannam@0 673 *result = 0.f;
cannam@0 674 }
cannam@0 675
cannam@1 676 if (needVariance || needSD) {
cannam@0 677 argf[0] = mean;
cannam@1 678 if(isSpectral)
cannam@1 679 xtract_spectral_variance(data, N, argv, result);
cannam@1 680 else
cannam@1 681 xtract_variance(data, M, argv, result);
cannam@0 682 variance = *result;
cannam@0 683 *result = 0.f;
cannam@0 684 }
cannam@0 685
cannam@0 686 if (needSD) {
cannam@0 687 argf[0] = variance;
cannam@1 688 if(isSpectral)
cannam@1 689 xtract_spectral_standard_deviation(data, N, argv, result);
cannam@1 690 else
cannam@1 691 xtract_standard_deviation(data, M, argv, result);
cannam@0 692 sd = *result;
cannam@0 693 *result = 0.f;
cannam@0 694 }
cannam@0 695
cannam@1 696 if (needMax) {
cannam@1 697 xtract_highest_value(data, M, argv, result);
cannam@1 698 argf[1] = *result;
cannam@1 699 *result = 0.f;
cannam@1 700 }
cannam@1 701
cannam@0 702 if (needSD) {
cannam@0 703 argf[0] = mean;
cannam@0 704 argf[1] = sd;
cannam@0 705 } else if (needVariance) {
cannam@0 706 argf[0] = variance;
cannam@0 707 } else if (needMean) {
cannam@0 708 argf[0] = mean;
cannam@0 709 }
cannam@0 710
cannam@0 711 // data should be now correct for all except:
cannam@1 712 // XTRACT_SPECTRAL_CENTROID -- N/2 magnitude peaks and N/2 frequencies
cannam@1 713 // TONALITY -- SFM
cannam@0 714 // TRISTIMULUS_1/2/3 -- harmonic spectrum
cannam@0 715 // ODD_EVEN_RATIO -- harmonic spectrum
cannam@0 716 // LOUDNESS -- Bark coefficients
cannam@1 717 // XTRACT_HARMONIC_SPECTRUM -- peak spectrum
cannam@0 718
cannam@0 719 // argv should be now correct for all except:
cannam@0 720 //
cannam@1 721 // XTRACT_ROLLOFF -- (sr/N), threshold (%)
cannam@1 722 // XTRACT_PEAK_SPECTRUM -- (sr / N), peak threshold (%)
cannam@1 723 // XTRACT_HARMONIC_SPECTRUM -- f0, harmonic threshold
cannam@1 724 // XTRACT_F0 -- samplerate
cannam@1 725 // XTRACT_MFCC -- Mel filter coefficients
cannam@1 726 // XTRACT_BARK_COEFFICIENTS -- Bark band limits
cannam@1 727 // XTRACT_NOISINESS -- npartials, nharmonics.
cannam@14 728 // XTRACT_SPECTRUM -- q, spectrum type, dc, normalise
cannam@0 729
Chris@34 730 data_temp = new double[N];
cannam@1 731
cannam@1 732 if (m_xtFeature == XTRACT_ROLLOFF ||
cannam@9 733 m_xtFeature == XTRACT_PEAK_SPECTRUM || needPeaks) {
cannam@1 734 argf[0] = m_inputSampleRate / N;
cannam@1 735 if(m_xtFeature == XTRACT_ROLLOFF)
cannam@1 736 argf[1] = m_rolloffThreshold;
cannam@1 737 else
cannam@1 738 argf[1] = m_peakThreshold;
cannam@0 739 argv = &argf[0];
cannam@0 740 }
cannam@0 741
cannam@14 742 if (m_xtFeature == XTRACT_SPECTRUM) {
cannam@14 743 argf[0] = 0; // xtract_spectrum will calculate this for us
cannam@14 744 argf[1] = m_spectrumType;
cannam@14 745 argf[2] = m_dc;
cannam@14 746 argf[3] = m_normalise;
cannam@14 747 argv = &argf[0];
cannam@14 748 }
cannam@14 749
cannam@0 750 if (needPeaks) {
cannam@1 751 //We only read in the magnitudes (M)
cannam@1 752 /*int rv = */ xtract_peak_spectrum(data, M, argv, result);
cannam@0 753 for (int n = 0; n < N; ++n) {
cannam@1 754 data_temp[n] = result[n];
cannam@0 755 result[n] = 0.f;
cannam@0 756 }
cannam@0 757 // rv not trustworthy
cannam@0 758 // if (rv != SUCCESS) {
cannam@0 759 // cerr << "ERROR: XTractPlugin::process: xtract_peaks failed (error code = " << rv << ")" << endl;
cannam@0 760 // goto done;
cannam@0 761 // }
cannam@0 762 }
cannam@0 763
cannam@1 764 if (needNumPartials) {
cannam@1 765 xtract_nonzero_count(data_temp, M, NULL, &npartials);
cannam@1 766 }
cannam@1 767
cannam@1 768 if (needF0 || m_xtFeature == XTRACT_FAILSAFE_F0 ||
cannam@1 769 m_xtFeature == XTRACT_F0) {
cannam@1 770 argf[0] = m_inputSampleRate;
cannam@1 771 argv = &argf[0];
cannam@1 772 }
cannam@1 773
cannam@1 774 if (needF0) {
Chris@34 775 xtract_failsafe_f0(&input_d[0], N, (void *)&m_inputSampleRate, result);
cannam@1 776 argf[0] = *result;
cannam@1 777 argv = &argf[0];
cannam@1 778 }
cannam@1 779
cannam@1 780 if (needSFM) {
cannam@1 781 xtract_flatness(data, N >> 1, 0, &argf[0]);
cannam@1 782 argv = &argf[0];
cannam@1 783 }
cannam@1 784
cannam@1 785 if (needHarmonics || m_xtFeature == XTRACT_HARMONIC_SPECTRUM){
cannam@1 786 argf[1] = m_harmonicThreshold;
cannam@1 787 }
cannam@1 788
cannam@1 789 if (needHarmonics){
cannam@1 790 xtract_harmonic_spectrum(data_temp, N, argv, result);
cannam@1 791 for (int n = 0; n < N; ++n) {
cannam@1 792 data_temp[n] = result[n];
cannam@1 793 result[n] = 0.f;
cannam@1 794 }
cannam@1 795 }
cannam@1 796
cannam@1 797 if (needNumHarmonics) {
cannam@1 798 xtract_nonzero_count(data_temp, M, NULL, &nharmonics);
cannam@1 799 }
cannam@1 800
cannam@1 801 if (m_xtFeature == XTRACT_NOISINESS) {
cannam@1 802
cannam@1 803 argf[0] = nharmonics;
cannam@1 804 argf[1] = npartials;
cannam@1 805 argv = &argf[0];
cannam@1 806
cannam@1 807 }
cannam@1 808
cannam@1 809 if (needBarkCoefficients || m_xtFeature == XTRACT_BARK_COEFFICIENTS) {
cannam@1 810 argv = &m_barkBandLimits[0];
cannam@1 811 }
cannam@1 812
cannam@1 813 xtract_mel_filter mfccFilterBank;
cannam@1 814 if (m_xtFeature == XTRACT_MFCC) {
cannam@1 815 mfccFilterBank.n_filters = m_coeffCount;
cannam@1 816 mfccFilterBank.filters = m_mfccFilters;
cannam@1 817 argv = &mfccFilterBank;
cannam@1 818 }
cannam@1 819
cannam@0 820 if (needBarkCoefficients) {
cannam@1 821
cannam@1 822 /*int rv = */ xtract_bark_coefficients(data, 0, argv, data_temp);
cannam@0 823 // if (rv != SUCCESS) {
cannam@0 824 // cerr << "ERROR: XTractPlugin::process: xtract_bark_coefficients failed (error code = " << rv << ")" << endl;
cannam@0 825 // goto done;
cannam@0 826 // }
cannam@1 827 data = &data_temp[0];
cannam@0 828 argv = 0;
cannam@0 829 }
cannam@1 830
cannam@1 831 if (xtFd->data.format == XTRACT_SPECTRAL_HARMONICS_FREQUENCIES) {
cannam@0 832
cannam@1 833 N = M;
cannam@1 834 data = &data_temp[N];
cannam@0 835
cannam@1 836 } else if (xtFd->data.format == XTRACT_SPECTRAL_HARMONICS_MAGNITUDES) {
cannam@0 837
cannam@1 838 N = M;
cannam@1 839 data = &data_temp[0];
cannam@1 840
cannam@1 841 }
cannam@0 842
cannam@1 843 // If we only want spectral magnitudes, use first half of the input array
cannam@1 844 else if(xtFd->data.format == XTRACT_SPECTRAL_MAGNITUDES ||
cannam@1 845 xtFd->data.format == XTRACT_SPECTRAL_PEAKS_MAGNITUDES ||
cannam@1 846 xtFd->data.format == XTRACT_ARBITRARY_SERIES) {
cannam@1 847 N = M;
cannam@1 848 }
cannam@1 849
cannam@1 850 else if(xtFd->data.format == XTRACT_BARK_COEFFS) {
cannam@1 851
cannam@1 852 N = XTRACT_BARK_BANDS - 1; /* Because our SR is 44100 (< 54000)*/
cannam@1 853 }
cannam@1 854
cannam@1 855 if (needPeaks && !needHarmonics) {
cannam@1 856
cannam@1 857 data = &data_temp[0];
cannam@1 858
cannam@0 859 }
cannam@0 860
cannam@0 861 // now the main result
cannam@0 862 xtract[m_xtFeature](data, N, argv, result);
cannam@0 863
cannam@1 864 //haveResult:
cannam@1 865 // {
cannam@0 866 int index = 0;
cannam@0 867
cannam@0 868 for (size_t output = 0; output < m_outputDescriptors.size(); ++output) {
cannam@0 869
cannam@0 870 Feature feature;
cannam@0 871 feature.hasTimestamp = false;
cannam@0 872 bool good = true;
cannam@0 873
cannam@0 874 for (size_t n = 0; n < m_outputDescriptors[output].binCount; ++n) {
Chris@34 875 double value = m_resultBuffer[index + m_lowestCoef];
cannam@0 876 if (isnan(value) || isinf(value)) {
cannam@0 877 good = false;
cannam@0 878 index += (m_outputDescriptors[output].binCount - n);
cannam@0 879 break;
cannam@0 880 }
cannam@0 881 feature.values.push_back(value);
cannam@0 882 ++index;
cannam@0 883 }
cannam@13 884
cannam@0 885 if (good) fs[output].push_back(feature);
cannam@0 886 }
cannam@1 887 // }
cannam@0 888
cannam@1 889 //done:
cannam@1 890 delete[] fft_temp;
cannam@1 891 delete[] data_temp;
Chris@34 892 delete[] input_d;
cannam@0 893
cannam@3 894 // cerr << "XTractPlugin::process returning" << endl;
cannam@0 895
cannam@0 896 return fs;
cannam@0 897 }
cannam@0 898
cannam@0 899 XTractPlugin::FeatureSet
cannam@0 900 XTractPlugin::getRemainingFeatures()
cannam@0 901 {
cannam@0 902 return FeatureSet();
cannam@0 903 }
cannam@0 904