annotate FChTransformF0gram.cpp @ 24:430c730ae912 spect

Remove nfft for unimplemented oversampling option; rejig nsamp param to use indexed list
author Chris Cannam
date Thu, 04 Oct 2018 15:22:23 +0100
parents 2c54d83a196f
children 8e57d2f41926 9ebd08bdd168
rev   line source
Chris@0 1 /*
Chris@0 2 copyright (C) 2011 I. Irigaray, M. Rocamora
Chris@0 3
Chris@0 4 This program is free software: you can redistribute it and/or modify
Chris@0 5 it under the terms of the GNU General Public License as published by
Chris@0 6 the Free Software Foundation, either version 3 of the License, or
Chris@0 7 (at your option) any later version.
Chris@0 8
Chris@0 9 This program is distributed in the hope that it will be useful,
Chris@0 10 but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@0 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@0 12 GNU General Public License for more details.
Chris@0 13
Chris@0 14 You should have received a copy of the GNU General Public License
Chris@0 15 along with this program. If not, see <http://www.gnu.org/licenses/>.
Chris@7 16 */
Chris@0 17
Chris@0 18 #include "FChTransformF0gram.h"
Chris@0 19 #include "FChTransformUtils.h"
Chris@0 20 #include <math.h>
Chris@0 21 #include <float.h>
Chris@14 22
Chris@19 23 #include <set>
Chris@19 24
Chris@14 25 #include "bqvec/Allocators.h"
Chris@14 26
Chris@14 27 using namespace breakfastquay;
Chris@14 28
Chris@24 29 #define DEBUG
Chris@7 30
Chris@0 31 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
Chris@0 32
Chris@15 33 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode,
Chris@15 34 float inputSampleRate) :
Chris@7 35 Plugin(inputSampleRate),
Chris@15 36 m_processingMode(mode),
Chris@20 37 m_initialised(false),
Chris@20 38 m_stepSize(256),
Chris@20 39 m_blockSize(8192) {
Chris@0 40
Chris@24 41 nsamp_options.push_back(256);
Chris@24 42 nsamp_options.push_back(512);
Chris@24 43 nsamp_options.push_back(1024);
Chris@24 44 nsamp_options.push_back(2048);
Chris@24 45 nsamp_options.push_back(4096);
Chris@24 46 nsamp_options.push_back(8192);
Chris@24 47
Chris@0 48 m_fs = inputSampleRate;
Chris@0 49 // max frequency of interest (Hz)
Chris@0 50 m_fmax = 10000.f;
Chris@0 51 // warping parameters
Chris@12 52 m_warp_params.nsamps_twarp = 2048;
Chris@0 53 m_warp_params.alpha_max = 4;
Chris@0 54 m_warp_params.num_warps = 21;
Chris@0 55 m_warp_params.fact_over_samp = 2;
Chris@0 56 m_warp_params.alpha_dist = 0;
Chris@0 57 // f0 parameters
Chris@0 58 m_f0_params.f0min = 80.0;
Chris@0 59 m_f0_params.num_octs = 4;
Chris@0 60 m_f0_params.num_f0s_per_oct = 192;
Chris@0 61 m_f0_params.prefer = true;
Chris@0 62 m_f0_params.prefer_mean = 60;
Chris@0 63 m_f0_params.prefer_stdev = 18;
Chris@0 64 // glogs parameters
Chris@0 65 m_glogs_params.HP_logS = true;
Chris@0 66 m_glogs_params.att_subharms = 1;
Chris@7 67 // display parameters
Chris@15 68 m_f0gram_mode = BestBinOfAllDirections;
Chris@0 69
Chris@0 70 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
Chris@0 71 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
Chris@0 72 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
Chris@0 73
Chris@0 74 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
Chris@0 75 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
Chris@0 76 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
Chris@0 77
Chris@0 78 m_num_f0s = 0;
Chris@16 79 m_f0s = 0;
Chris@0 80 }
Chris@0 81
Chris@14 82 FChTransformF0gram::~FChTransformF0gram()
Chris@14 83 {
Chris@20 84 if (!m_initialised) {
Chris@14 85 return; // nothing was allocated
Chris@14 86 }
Chris@14 87
Chris@20 88 deallocate(m_inputBuffer);
Chris@20 89
Chris@14 90 deallocate(m_warpings.pos_int);
Chris@14 91 deallocate(m_warpings.pos_frac);
Chris@14 92 deallocate(m_warpings.chirp_rates);
Chris@14 93
Chris@14 94 clean_LPF();
Chris@14 95
Chris@14 96 deallocate(m_timeWindow);
Chris@14 97
Chris@14 98 deallocate(mp_HanningWindow);
Chris@14 99
Chris@14 100 // Warping
Chris@14 101 deallocate(x_warping);
Chris@14 102 delete fft_xwarping;
Chris@14 103 deallocate(m_absFanChirpTransform);
Chris@14 104 deallocate(m_auxFanChirpTransform);
Chris@14 105
Chris@14 106 // design_GLogS
Chris@14 107 deallocate(m_glogs_f0);
Chris@14 108 deallocate(m_glogs);
Chris@14 109 deallocate(m_glogs_n);
Chris@14 110 deallocate(m_glogs_index);
Chris@14 111 deallocate(m_glogs_posint);
Chris@14 112 deallocate(m_glogs_posfrac);
Chris@14 113 deallocate(m_glogs_interp);
Chris@14 114 deallocate(m_glogs_third_harmonic_posint);
Chris@14 115 deallocate(m_glogs_third_harmonic_posfrac);
Chris@14 116 deallocate(m_glogs_third_harmonic);
Chris@14 117 deallocate(m_glogs_fifth_harmonic_posint);
Chris@14 118 deallocate(m_glogs_fifth_harmonic_posfrac);
Chris@14 119 deallocate(m_glogs_fifth_harmonic);
Chris@14 120 deallocate(m_glogs_f0_preference_weights);
Chris@14 121 deallocate(m_glogs_median_correction);
Chris@14 122 deallocate(m_glogs_sigma_correction);
Chris@16 123
Chris@16 124 deallocate(m_f0s);
Chris@0 125 }
Chris@0 126
Chris@0 127 string
Chris@0 128 FChTransformF0gram::getIdentifier() const {
Chris@15 129 switch (m_processingMode) {
Chris@15 130 case ModeF0Gram: return "fchtransformf0gram";
Chris@15 131 case ModeSpectrogram: return "fchtransformspectrogram";
Chris@15 132 case ModeRoughSpectrogram: return "fchtransformrough";
Chris@15 133 }
Chris@17 134 throw std::logic_error("unknown mode");
Chris@0 135 }
Chris@0 136
Chris@0 137 string
Chris@0 138 FChTransformF0gram::getName() const {
Chris@15 139 switch (m_processingMode) {
Chris@15 140 case ModeF0Gram: return "Fan Chirp Transform F0gram";
Chris@15 141 case ModeSpectrogram: return "Fan Chirp Transform Spectrogram";
Chris@15 142 case ModeRoughSpectrogram: return "Fan Chirp Transform Rough Spectrogram";
Chris@15 143 }
Chris@17 144 throw std::logic_error("unknown mode");
Chris@0 145 }
Chris@0 146
Chris@0 147 string
Chris@0 148 FChTransformF0gram::getDescription() const {
Chris@15 149 switch (m_processingMode) {
Chris@15 150 case ModeF0Gram:
Chris@15 151 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010.";
Chris@15 152 case ModeSpectrogram:
Chris@15 153 return "This plug-in produces a spectral representation of the audio using Fan Chirp Transform analysis.";
Chris@15 154 case ModeRoughSpectrogram:
Chris@15 155 return "This plug-in produces a more approximate spectral representation of the audio using Fan Chirp Transform analysis.";
Chris@15 156 }
Chris@17 157 throw std::logic_error("unknown mode");
Chris@0 158 }
Chris@0 159
Chris@0 160 string
Chris@0 161 FChTransformF0gram::getMaker() const {
Chris@0 162 // Your name here
Chris@0 163 return "Audio Processing Group \n Universidad de la Republica";
Chris@0 164 }
Chris@0 165
Chris@0 166 int
Chris@0 167 FChTransformF0gram::getPluginVersion() const {
Chris@0 168 // Increment this each time you release a version that behaves
Chris@0 169 // differently from the previous one
Chris@0 170 //
Chris@0 171 // 0 - initial version from scratch
Chris@15 172 return 1;
Chris@0 173 }
Chris@0 174
Chris@0 175 string
Chris@0 176 FChTransformF0gram::getCopyright() const {
Chris@0 177 // This function is not ideally named. It does not necessarily
Chris@0 178 // need to say who made the plugin -- getMaker does that -- but it
Chris@0 179 // should indicate the terms under which it is distributed. For
Chris@0 180 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@0 181 return "copyright (C) 2011 GPL - Audio Processing Group, UdelaR";
Chris@0 182 }
Chris@0 183
Chris@0 184 FChTransformF0gram::InputDomain
Chris@0 185 FChTransformF0gram::getInputDomain() const {
Chris@0 186 return TimeDomain;
Chris@0 187 }
Chris@0 188
Chris@0 189 size_t FChTransformF0gram::getPreferredBlockSize() const {
Chris@20 190 // We do our own accumulating into blocks within process()
Chris@20 191 return m_blockSize/2;
Chris@0 192 }
Chris@0 193
Chris@0 194 size_t
Chris@0 195 FChTransformF0gram::getPreferredStepSize() const {
Chris@20 196 return m_stepSize;
Chris@0 197 }
Chris@0 198
Chris@0 199 size_t
Chris@0 200 FChTransformF0gram::getMinChannelCount() const {
Chris@0 201 return 1;
Chris@0 202 }
Chris@0 203
Chris@0 204 size_t
Chris@0 205 FChTransformF0gram::getMaxChannelCount() const {
Chris@0 206 return 1;
Chris@0 207 }
Chris@0 208
Chris@0 209 FChTransformF0gram::ParameterList
Chris@0 210 FChTransformF0gram::getParameterDescriptors() const {
Chris@0 211 ParameterList list;
Chris@0 212
Chris@0 213 // If the plugin has no adjustable parameters, return an empty
Chris@0 214 // list here (and there's no need to provide implementations of
Chris@0 215 // getParameter and setParameter in that case either).
Chris@0 216
Chris@0 217 // Note that it is your responsibility to make sure the parameters
Chris@0 218 // start off having their default values (e.g. in the constructor
Chris@0 219 // above). The host needs to know the default value so it can do
Chris@0 220 // things like provide a "reset to default" function, but it will
Chris@0 221 // not explicitly set your parameters to their defaults for you if
Chris@0 222 // they have not changed in the mean time.
Chris@0 223
Chris@0 224 // ============= WARPING PARAMETERS =============
Chris@0 225
Chris@0 226 ParameterDescriptor fmax;
Chris@0 227 fmax.identifier = "fmax";
Chris@0 228 fmax.name = "Maximum frequency";
Chris@0 229 fmax.description = "Maximum frequency of interest for the analysis.";
Chris@0 230 fmax.unit = "Hz";
Chris@0 231 fmax.minValue = 2000;
Chris@0 232 fmax.maxValue = 22050;
Chris@0 233 fmax.defaultValue = 10000;
Chris@0 234 fmax.isQuantized = true;
Chris@0 235 fmax.quantizeStep = 1.0;
Chris@0 236 list.push_back(fmax);
Chris@0 237
Chris@0 238 ParameterDescriptor nsamp;
Chris@24 239 nsamp.identifier = "nsamp_ix";
Chris@0 240 nsamp.name = "Number of samples";
Chris@0 241 nsamp.description = "Number of samples of the time warped frame";
Chris@24 242 nsamp.minValue = 0;
Chris@24 243 nsamp.maxValue = nsamp_options.size()-1;
Chris@24 244 nsamp.defaultValue = 3;
Chris@24 245 nsamp.isQuantized = true;
Chris@24 246 nsamp.quantizeStep = 1.0;
Chris@24 247 char label[100];
Chris@24 248 for (int i = 0; i < int(nsamp_options.size()); ++i) {
Chris@24 249 sprintf(label, "%d", nsamp_options[i]);
Chris@24 250 nsamp.valueNames.push_back(label);
Chris@24 251 }
Chris@0 252 nsamp.isQuantized = true;
Chris@0 253 nsamp.quantizeStep = 1.0;
Chris@0 254 list.push_back(nsamp);
Chris@0 255
Chris@0 256 ParameterDescriptor alpha_max;
Chris@0 257 alpha_max.identifier = "alpha_max";
Chris@0 258 alpha_max.name = "Maximum alpha value";
Chris@0 259 alpha_max.description = "Maximum value for the alpha parameter of the transform.";
Chris@0 260 alpha_max.unit = "Hz/s";
Chris@0 261 alpha_max.minValue = -10;
Chris@0 262 alpha_max.maxValue = 10;
Chris@0 263 alpha_max.defaultValue = 5;
Chris@0 264 alpha_max.isQuantized = true;
Chris@0 265 alpha_max.quantizeStep = 1.0;
Chris@0 266 list.push_back(alpha_max);
Chris@0 267
Chris@0 268 ParameterDescriptor num_warps;
Chris@0 269 num_warps.identifier = "num_warps";
Chris@0 270 num_warps.name = "Number of warpings";
Chris@0 271 num_warps.description = "Number of different warpings in the specified range (must be odd).";
Chris@0 272 num_warps.unit = "";
Chris@0 273 num_warps.minValue = 1;
Chris@0 274 num_warps.maxValue = 101;
Chris@0 275 num_warps.defaultValue = 21;
Chris@0 276 num_warps.isQuantized = true;
Chris@0 277 num_warps.quantizeStep = 2.0;
Chris@0 278 list.push_back(num_warps);
Chris@0 279
Chris@0 280 ParameterDescriptor alpha_dist;
Chris@0 281 alpha_dist.identifier = "alpha_dist";
Chris@0 282 alpha_dist.name = "alpha distribution";
Chris@0 283 alpha_dist.description = "Type of distribution of alpha values (linear or log).";
Chris@0 284 alpha_dist.unit = "";
Chris@0 285 alpha_dist.minValue = 0;
Chris@0 286 alpha_dist.maxValue = 1;
Chris@0 287 alpha_dist.defaultValue = 1;
Chris@0 288 alpha_dist.isQuantized = true;
Chris@0 289 alpha_dist.quantizeStep = 1.0;
Chris@0 290 // lin (0), log (1)
Chris@0 291 alpha_dist.valueNames.push_back("lin");
Chris@0 292 alpha_dist.valueNames.push_back("log");
Chris@0 293 list.push_back(alpha_dist);
Chris@0 294
Chris@0 295 // ============= F0-GRAM PARAMETERS =============
Chris@0 296
Chris@0 297 ParameterDescriptor f0min;
Chris@0 298 f0min.identifier = "f0min";
Chris@0 299 f0min.name = "min f0";
Chris@0 300 f0min.description = "Minimum fundamental frequency (f0) value.";
Chris@0 301 f0min.unit = "Hz";
Chris@0 302 f0min.minValue = 1;
Chris@0 303 f0min.maxValue = 500;
Chris@0 304 f0min.defaultValue = 80;
Chris@0 305 f0min.isQuantized = true;
Chris@0 306 f0min.quantizeStep = 1.0;
Chris@0 307 list.push_back(f0min);
Chris@0 308
Chris@0 309 ParameterDescriptor num_octs;
Chris@0 310 num_octs.identifier = "num_octs";
Chris@0 311 num_octs.name = "number of octaves";
Chris@0 312 num_octs.description = "Number of octaves for F0gram computation.";
Chris@0 313 num_octs.unit = "";
Chris@0 314 num_octs.minValue = 1;
Chris@0 315 num_octs.maxValue = 10;
Chris@0 316 num_octs.defaultValue = 4;
Chris@0 317 num_octs.isQuantized = true;
Chris@0 318 num_octs.quantizeStep = 1.0;
Chris@0 319 list.push_back(num_octs);
Chris@0 320
Chris@0 321 ParameterDescriptor f0s_per_oct;
Chris@0 322 f0s_per_oct.identifier = "f0s_per_oct";
Chris@0 323 f0s_per_oct.name = "f0 values per octave";
Chris@0 324 f0s_per_oct.description = "Number of f0 values per octave.";
Chris@0 325 f0s_per_oct.unit = "";
Chris@0 326 f0s_per_oct.minValue = 12;
Chris@0 327 f0s_per_oct.maxValue = 768;
Chris@0 328 f0s_per_oct.defaultValue = 192;
Chris@0 329 f0s_per_oct.isQuantized = true;
Chris@0 330 f0s_per_oct.quantizeStep = 1.0;
Chris@0 331 list.push_back(f0s_per_oct);
Chris@0 332
Chris@0 333 ParameterDescriptor f0_prefer_fun;
Chris@0 334 f0_prefer_fun.identifier = "f0_prefer_fun";
Chris@22 335 f0_prefer_fun.name = "Use f0 weighting";
Chris@22 336 f0_prefer_fun.description = "Whether to use a f0 weighting function to prefer frequencies nearer a mean value.";
Chris@0 337 f0_prefer_fun.unit = "";
Chris@0 338 f0_prefer_fun.minValue = 0;
Chris@0 339 f0_prefer_fun.maxValue = 1;
Chris@0 340 f0_prefer_fun.defaultValue = 1;
Chris@0 341 f0_prefer_fun.isQuantized = true;
Chris@0 342 f0_prefer_fun.quantizeStep = 1.0;
Chris@0 343 list.push_back(f0_prefer_fun);
Chris@0 344
Chris@0 345 ParameterDescriptor f0_prefer_mean;
Chris@0 346 f0_prefer_mean.identifier = "f0_prefer_mean";
Chris@22 347 f0_prefer_mean.name = "Mean pitch for f0 weighting";
Chris@0 348 f0_prefer_mean.description = "Mean value for f0 weighting function (MIDI number).";
Chris@0 349 f0_prefer_mean.unit = "";
Chris@0 350 f0_prefer_mean.minValue = 1;
Chris@0 351 f0_prefer_mean.maxValue = 127;
Chris@0 352 f0_prefer_mean.defaultValue = 60;
Chris@0 353 f0_prefer_mean.isQuantized = true;
Chris@0 354 f0_prefer_mean.quantizeStep = 1.0;
Chris@0 355 list.push_back(f0_prefer_mean);
Chris@0 356
Chris@0 357 ParameterDescriptor f0_prefer_stdev;
Chris@0 358 f0_prefer_stdev.identifier = "f0_prefer_stdev";
Chris@22 359 f0_prefer_stdev.name = "Stdev for f0 weighting";
Chris@22 360 f0_prefer_stdev.description = "Standard deviation for f0 weighting function (MIDI number).";
Chris@0 361 f0_prefer_stdev.unit = "";
Chris@0 362 f0_prefer_stdev.minValue = 1;
Chris@0 363 f0_prefer_stdev.maxValue = 127;
Chris@0 364 f0_prefer_stdev.defaultValue = 18;
Chris@0 365 f0_prefer_stdev.isQuantized = true;
Chris@0 366 f0_prefer_stdev.quantizeStep = 1.0;
Chris@0 367 list.push_back(f0_prefer_stdev);
Chris@0 368
Chris@0 369 ParameterDescriptor f0gram_mode;
Chris@0 370 f0gram_mode.identifier = "f0gram_mode";
Chris@0 371 f0gram_mode.name = "display mode of f0gram";
Chris@0 372 f0gram_mode.description = "Display all bins of the best direction, or the best bin for each direction.";
Chris@0 373 f0gram_mode.unit = "";
Chris@0 374 f0gram_mode.minValue = 0;
Chris@0 375 f0gram_mode.maxValue = 1;
Chris@0 376 f0gram_mode.defaultValue = 1;
Chris@0 377 f0gram_mode.isQuantized = true;
Chris@0 378 f0gram_mode.quantizeStep = 1.0;
Chris@0 379 list.push_back(f0gram_mode);
Chris@0 380
Chris@0 381 return list;
Chris@0 382 }
Chris@0 383
Chris@0 384 float
Chris@0 385 FChTransformF0gram::getParameter(string identifier) const {
Chris@0 386
Chris@0 387 if (identifier == "fmax") {
Chris@0 388 return m_fmax;
Chris@24 389 } else if (identifier == "nsamp_ix") {
Chris@24 390 for (int i = 0; i < int(nsamp_options.size()); ++i) {
Chris@24 391 if (m_warp_params.nsamps_twarp == nsamp_options[i]) {
Chris@24 392 return i;
Chris@24 393 }
Chris@24 394 }
Chris@24 395 throw std::logic_error("internal error: nsamps_twarp not in nsamp_options");
Chris@0 396 } else if (identifier == "alpha_max") {
Chris@0 397 return m_warp_params.alpha_max;
Chris@0 398 } else if (identifier == "num_warps") {
Chris@0 399 return m_warp_params.num_warps;
Chris@0 400 } else if (identifier == "alpha_dist") {
Chris@0 401 return m_warp_params.alpha_dist;
Chris@0 402 } else if (identifier == "f0min") {
Chris@0 403 return m_f0_params.f0min;
Chris@0 404 } else if (identifier == "num_octs") {
Chris@0 405 return m_f0_params.num_octs;
Chris@0 406 } else if (identifier == "f0s_per_oct") {
Chris@0 407 return m_f0_params.num_f0s_per_oct;
Chris@0 408 } else if (identifier == "f0_prefer_fun") {
Chris@22 409 return m_f0_params.prefer ? 1.0 : 0.0;
Chris@0 410 } else if (identifier == "f0_prefer_mean") {
Chris@0 411 return m_f0_params.prefer_mean;
Chris@0 412 } else if (identifier == "f0_prefer_stdev") {
Chris@0 413 return m_f0_params.prefer_stdev;
Chris@7 414 } else if (identifier == "f0gram_mode") {
Chris@15 415 return m_f0gram_mode == BestBinOfAllDirections ? 1.0 : 0.0;
Chris@0 416 } else {
Chris@0 417 return 0.f;
Chris@0 418 }
Chris@0 419
Chris@0 420 }
Chris@0 421
Chris@15 422 void FChTransformF0gram::setParameter(string identifier, float value)
Chris@15 423 {
Chris@0 424 if (identifier == "fmax") {
Chris@0 425 m_fmax = value;
Chris@24 426 } else if (identifier == "nsamp_ix") {
Chris@24 427 int n = int(roundf(value));
Chris@24 428 for (int i = 0; i < int(nsamp_options.size()); ++i) {
Chris@24 429 if (i == n) {
Chris@24 430 m_warp_params.nsamps_twarp = nsamp_options[i];
Chris@24 431 m_blockSize = m_warp_params.nsamps_twarp * 4;
Chris@24 432 }
Chris@24 433 }
Chris@0 434 } else if (identifier == "alpha_max") {
Chris@0 435 m_warp_params.alpha_max = value;
Chris@0 436 } else if (identifier == "num_warps") {
Chris@0 437 m_warp_params.num_warps = value;
Chris@0 438 } else if (identifier == "alpha_dist") {
Chris@0 439 m_warp_params.alpha_dist = value;
Chris@0 440 } else if (identifier == "f0min") {
Chris@0 441 m_f0_params.f0min = value;
Chris@0 442 } else if (identifier == "num_octs") {
Chris@0 443 m_f0_params.num_octs = value;
Chris@0 444 } else if (identifier == "f0s_per_oct") {
Chris@0 445 m_f0_params.num_f0s_per_oct = value;
Chris@0 446 } else if (identifier == "f0_prefer_fun") {
Chris@22 447 m_f0_params.prefer = (value > 0.5);
Chris@0 448 } else if (identifier == "f0_prefer_mean") {
Chris@0 449 m_f0_params.prefer_mean = value;
Chris@0 450 } else if (identifier == "f0_prefer_stdev") {
Chris@0 451 m_f0_params.prefer_stdev = value;
Chris@0 452 } else if (identifier == "f0gram_mode") {
Chris@15 453 m_f0gram_mode = (value > 0.5 ?
Chris@15 454 BestBinOfAllDirections :
Chris@15 455 AllBinsOfBestDirection);
Chris@15 456 } else {
Chris@15 457 cerr << "WARNING: Unknown parameter id \""
Chris@15 458 << identifier << "\"" << endl;
Chris@0 459 }
Chris@0 460 }
Chris@0 461
Chris@0 462 FChTransformF0gram::ProgramList
Chris@0 463 FChTransformF0gram::getPrograms() const {
Chris@0 464 ProgramList list;
Chris@0 465 return list;
Chris@0 466 }
Chris@0 467
Chris@0 468 FChTransformF0gram::OutputList
Chris@0 469 FChTransformF0gram::getOutputDescriptors() const {
Chris@0 470
Chris@0 471 OutputList list;
Chris@0 472
Chris@16 473 vector<string> labels;
Chris@16 474 char label[100];
Chris@0 475
Chris@16 476 if (m_processingMode == ModeF0Gram) {
Chris@16 477
Chris@16 478 /* f0 values of F0gram grid as string values */
Chris@16 479 for (int i = 0; i < m_num_f0s; ++i) {
Chris@16 480 sprintf(label, "%4.2f Hz", m_f0s[i]);
Chris@16 481 labels.push_back(label);
Chris@16 482 }
Chris@16 483
Chris@16 484 /* The F0gram */
Chris@16 485 OutputDescriptor d;
Chris@16 486 d.identifier = "f0gram";
Chris@19 487 d.name = "F0gram";
Chris@19 488 d.description = "The salience of the different f0s in the signal.";
Chris@16 489 d.hasFixedBinCount = true;
Chris@16 490 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
Chris@16 491 d.binNames = labels;
Chris@16 492 d.hasKnownExtents = false;
Chris@16 493 d.isQuantized = false;
Chris@16 494 d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@16 495 d.hasDuration = false;
Chris@16 496 list.push_back(d);
Chris@16 497
Chris@19 498 d.identifier = "pitch";
Chris@19 499 d.name = "Most salient pitch";
Chris@19 500 d.description = "The most salient f0 in the signal for each time step.";
Chris@19 501 d.unit = "Hz";
Chris@19 502 d.hasFixedBinCount = true;
Chris@19 503 d.binCount = 1;
Chris@19 504 d.binNames.clear();
Chris@19 505 d.hasKnownExtents = false;
Chris@19 506 d.isQuantized = false;
Chris@19 507 d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@19 508 d.hasDuration = false;
Chris@19 509 list.push_back(d);
Chris@19 510
Chris@16 511 } else {
Chris@16 512
Chris@16 513 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; ++i) {
Chris@24 514 double freq = i * (m_warpings.fs_warp / m_warp_params.nsamps_twarp);
Chris@16 515 sprintf(label, "%4.2f Hz", freq);
Chris@16 516 labels.push_back(label);
Chris@16 517 }
Chris@16 518
Chris@16 519 OutputDescriptor d;
Chris@16 520 d.identifier = "spectrogram";
Chris@16 521 d.name = "Spectrogram";
Chris@16 522 d.description = "Time/frequency spectrogram derived from the Fan Chirp Transform output";
Chris@16 523 d.hasFixedBinCount = true;
Chris@16 524 d.binCount = m_warp_params.nsamps_twarp/2+1;
Chris@16 525 d.binNames = labels;
Chris@16 526 d.hasKnownExtents = false;
Chris@16 527 d.isQuantized = false;
Chris@16 528 d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@16 529 d.hasDuration = false;
Chris@16 530 list.push_back(d);
Chris@0 531 }
Chris@16 532
Chris@0 533 return list;
Chris@0 534 }
Chris@0 535
Chris@0 536 bool
Chris@0 537 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) {
Chris@0 538 if (channels < getMinChannelCount() ||
Chris@20 539 channels > getMaxChannelCount() ||
Chris@20 540 blockSize != m_blockSize/2 ||
Chris@20 541 stepSize != m_stepSize) {
Chris@14 542 return false;
Chris@14 543 }
Chris@0 544
Chris@20 545 m_inputBuffer = allocate_and_zero<float>(m_blockSize);
Chris@20 546
Chris@0 547 // WARNING !!!
Chris@0 548 // these values in fact are determined by the sampling frequency m_fs
Chris@0 549 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz
Chris@0 550 //m_blockSize = 4 * m_warp_params.nsamps_twarp;
Chris@16 551 // m_stepSize = floor(m_hop / m_warp_params.fact_over_samp);
Chris@16 552
Chris@16 553 /* design of FChT */
Chris@16 554 design_FChT();
Chris@0 555
Chris@0 556 /* initialise m_glogs_params */
Chris@7 557 design_GLogS();
Chris@0 558
Chris@7 559 design_LPF();
Chris@0 560
Chris@7 561 design_time_window();
Chris@0 562
Chris@7 563 // Create Hanning window for warped signals
Chris@14 564 mp_HanningWindow = allocate<double>(m_warp_params.nsamps_twarp);
Chris@7 565 bool normalize = false;
Chris@14 566 Utils::hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize);
Chris@0 567
Chris@16 568 m_num_f0s = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
Chris@16 569 m_f0s = allocate<double>(m_num_f0s);
Chris@16 570 for (int i = 0; i < m_num_f0s; ++i) {
Chris@16 571 m_f0s[i] = m_glogs_f0[m_glogs_init_f0s + i];
Chris@16 572 }
Chris@20 573
Chris@20 574 m_initialised = true;
Chris@0 575 return true;
Chris@0 576 }
Chris@0 577
Chris@0 578 void
Chris@0 579 FChTransformF0gram::design_GLogS() {
Chris@0 580
Chris@7 581 // total number & initial quantity of f0s
Chris@16 582
Chris@10 583 m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1;
Chris@7 584 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s;
Chris@0 585
Chris@7 586 // Initialize arrays
Chris@14 587 m_glogs_f0 = allocate<double>(m_glogs_num_f0s);
Chris@14 588 m_glogs = allocate<double>(m_glogs_num_f0s*m_warp_params.num_warps);
Chris@14 589 m_glogs_n = allocate<int>(m_glogs_num_f0s);
Chris@14 590 m_glogs_index = allocate<int>(m_glogs_num_f0s);
Chris@0 591
Chris@7 592 // Compute f0 values
Chris@7 593 m_glogs_harmonic_count = 0;
Chris@7 594 double factor = (double)(m_warp_params.nsamps_twarp/2)/(double)(m_warp_params.nsamps_twarp/2+1);
Chris@10 595 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@7 596 m_glogs_f0[i] = (m_f0_params.f0min/5.0)*pow(2.0,(double)i/(double)m_f0_params.num_f0s_per_oct);
Chris@7 597 // for every f0 compute number of partials less or equal than m_fmax.
Chris@7 598 m_glogs_n[i] = m_fmax*factor/m_glogs_f0[i];
Chris@7 599 m_glogs_index[i] = m_glogs_harmonic_count;
Chris@7 600 m_glogs_harmonic_count += m_glogs_n[i];
Chris@7 601 }
Chris@0 602
Chris@7 603 // Initialize arrays for interpolation
Chris@14 604 m_glogs_posint = allocate<int>(m_glogs_harmonic_count);
Chris@14 605 m_glogs_posfrac = allocate<double>(m_glogs_harmonic_count);
Chris@14 606 m_glogs_interp = allocate<double>(m_glogs_harmonic_count);
Chris@0 607
Chris@7 608 // Compute int & frac of interpolation positions
Chris@10 609 int aux_index = 0;
Chris@7 610 double aux_pos;
Chris@10 611 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@10 612 for (int j = 1; j <= m_glogs_n[i]; j++) {
Chris@18 613 aux_pos = ((double)j * m_glogs_f0[i]) * ((double)(m_warp_params.nsamps_twarp))/m_warpings.fs_warp;
Chris@10 614 m_glogs_posint[aux_index] = (int)aux_pos;
Chris@7 615 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index];
Chris@7 616 aux_index++;
Chris@7 617 }
Chris@7 618 }
Chris@0 619
Chris@7 620 // Third harmonic attenuation
Chris@7 621 double aux_third_harmonic;
Chris@14 622 m_glogs_third_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14 623 m_glogs_third_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 624 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
Chris@7 625 aux_third_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(3.0);
Chris@10 626 m_glogs_third_harmonic_posint[i] = (int)aux_third_harmonic;
Chris@7 627 m_glogs_third_harmonic_posfrac[i] = aux_third_harmonic - (double)(m_glogs_third_harmonic_posint[i]);
Chris@7 628 }
Chris@14 629 m_glogs_third_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@0 630
Chris@7 631 // Fifth harmonic attenuation
Chris@7 632 double aux_fifth_harmonic;
Chris@14 633 m_glogs_fifth_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14 634 m_glogs_fifth_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 635 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
Chris@7 636 aux_fifth_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(5.0);
Chris@10 637 m_glogs_fifth_harmonic_posint[i] = (int)aux_fifth_harmonic;
Chris@7 638 m_glogs_fifth_harmonic_posfrac[i] = aux_fifth_harmonic - (double)(m_glogs_fifth_harmonic_posint[i]);
Chris@7 639 }
Chris@14 640 m_glogs_fifth_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@0 641
Chris@7 642 // Normalization & attenuation windows
Chris@14 643 m_glogs_f0_preference_weights = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@14 644 m_glogs_median_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@14 645 m_glogs_sigma_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@7 646 double MIDI_value;
Chris@10 647 for (int i = 0; i < m_f0_params.num_octs*m_f0_params.num_f0s_per_oct; i++) {
Chris@22 648 if (m_f0_params.prefer) {
Chris@22 649 MIDI_value = 69.0 + 12.0 * log2(m_glogs_f0[i + m_glogs_init_f0s]/440.0);
Chris@22 650 m_glogs_f0_preference_weights[i] = 1.0/sqrt(2.0*M_PI*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev)*exp(-(MIDI_value-m_f0_params.prefer_mean)*(MIDI_value-m_f0_params.prefer_mean)/(2.0*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev));
Chris@22 651 m_glogs_f0_preference_weights[i] = (0.01 + m_glogs_f0_preference_weights[i]) / (1.01);
Chris@22 652 } else {
Chris@22 653 m_glogs_f0_preference_weights[i] = 1.0;
Chris@22 654 }
Chris@0 655
Chris@7 656 m_glogs_median_correction[i] = m_glogs_params.median_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.median_poly_coefs[1]*(i+1.0) + m_glogs_params.median_poly_coefs[2];
Chris@7 657 m_glogs_sigma_correction[i] = 1.0 / (m_glogs_params.sigma_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.sigma_poly_coefs[1]*(i+1.0) + m_glogs_params.sigma_poly_coefs[2]);
Chris@7 658 }
Chris@0 659 }
Chris@0 660
Chris@0 661 void
Chris@0 662 FChTransformF0gram::design_FChT() {
Chris@0 663
Chris@0 664 /* ============= WARPING DESIGN ============= */
Chris@0 665
Chris@0 666 // sampling frequency after oversampling
Chris@0 667 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs;
Chris@0 668
Chris@0 669 // number of samples of the original signal frame
Chris@0 670 m_warpings.nsamps_torig = 4 * m_warp_params.fact_over_samp * m_warp_params.nsamps_twarp;
Chris@0 671 // equivalent to: m_warpings.nsamps_torig = m_warp_params.fact_over_samp * m_blockSize;
Chris@0 672
Chris@0 673 // time instants of the original signal frame
Chris@14 674 double *t_orig = allocate<double>(m_warpings.nsamps_torig);
Chris@10 675 for (int ind = 0; ind < m_warpings.nsamps_torig; ind++) {
Chris@0 676 t_orig[ind] = ((double)(ind + 1) - (double)m_warpings.nsamps_torig / 2.0) / m_warpings.fs_orig;
Chris@0 677 }
Chris@0 678
Chris@0 679 // linear chirps warping definition as relative frequency deviation
Chris@7 680 //TODO
Chris@14 681 double *freq_relative = allocate<double>(m_warpings.nsamps_torig * m_warp_params.num_warps);
Chris@0 682 define_warps_linear_chirps(freq_relative, t_orig);
Chris@0 683
Chris@0 684 // maximum relative frequency deviation
Chris@0 685 double freq_relative_max = 0;
Chris@14 686 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
Chris@14 687 for (int j = 0; j < m_warp_params.num_warps; j++) {
Chris@14 688 if (freq_relative_max < freq_relative[j * m_warpings.nsamps_torig + i]) {
Chris@0 689 freq_relative_max = freq_relative[j * m_warpings.nsamps_torig + i];
Chris@14 690 }
Chris@14 691 }
Chris@14 692 }
Chris@0 693
Chris@0 694 // sampling frequency of warped signal to be free of aliasing up to fmax
Chris@0 695 m_warpings.fs_warp = 2 * m_fmax * freq_relative_max;
Chris@0 696
Chris@0 697 // time instants of the warped signal frame
Chris@14 698 double *t_warp = allocate<double>(m_warp_params.nsamps_twarp);
Chris@10 699 for (int ind = 0; ind < m_warp_params.nsamps_twarp; ind++) {
Chris@0 700 t_warp[ind] = ((double)((int)(ind + 1)- (int)m_warp_params.nsamps_twarp / 2)) / (double)m_warpings.fs_warp;
Chris@0 701 }
Chris@0 702
Chris@0 703 // design of warpings for efficient interpolation
Chris@0 704 design_warps(freq_relative, t_orig, t_warp);
Chris@0 705
Chris@14 706 deallocate(freq_relative);
Chris@14 707 deallocate(t_orig);
Chris@14 708 deallocate(t_warp);
Chris@14 709
Chris@0 710 /* ============= FFTW PLAN DESIGN ============= */
Chris@7 711 // Initialize 2-d array for warped signals
Chris@14 712 x_warping = allocate<double>(m_warp_params.nsamps_twarp);
Chris@14 713 m_absFanChirpTransform = allocate<double>(m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1));
Chris@14 714 m_auxFanChirpTransform = allocate<double>(2 * (m_warp_params.nsamps_twarp/2 + 1));
Chris@14 715 fft_xwarping = new FFTReal(m_warp_params.nsamps_twarp);
Chris@0 716 }
Chris@0 717
Chris@0 718 void
Chris@0 719 FChTransformF0gram::design_warps(double * freq_relative, double * t_orig, double * t_warp) {
Chris@0 720 /* the warping is done by interpolating the original signal in time instants
Chris@0 721 given by the desired frequency deviation, to do this, the interpolation
Chris@0 722 instants are stored in a structure as an integer index and a fractional value
Chris@0 723 hypothesis: sampling frequency at the central point equals the original
Chris@7 724 */
Chris@0 725
Chris@14 726 m_warpings.pos_int = allocate<int>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
Chris@14 727 m_warpings.pos_frac = allocate<double>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
Chris@0 728
Chris@7 729 // vector of phase values
Chris@14 730 double *phi = allocate<double>(m_warpings.nsamps_torig);
Chris@7 731 double aux;
Chris@0 732
Chris@7 733 // warped positions
Chris@14 734 double *pos1 = allocate<double>(m_warp_params.nsamps_twarp*m_warp_params.num_warps);
Chris@0 735
Chris@10 736 for (int i = 0; i < m_warp_params.num_warps; i++) {
Chris@0 737
Chris@7 738 // integration of relative frequency to obtain phase values
Chris@14 739 Utils::cumtrapz(t_orig, freq_relative + i*(m_warpings.nsamps_torig), m_warpings.nsamps_torig, phi);
Chris@0 740
Chris@7 741 // centering of phase values to force original frequency in the middle
Chris@7 742 aux = phi[m_warpings.nsamps_torig/2];
Chris@10 743 for (int j = 0; j < m_warpings.nsamps_torig; j++) {
Chris@7 744 phi[j] -= aux;
Chris@7 745 } //for
Chris@0 746
Chris@7 747 // interpolation of phase values to obtain warped positions
Chris@14 748 Utils::interp1(phi, t_orig, m_warpings.nsamps_torig, t_warp, pos1 + i*m_warp_params.nsamps_twarp, m_warp_params.nsamps_twarp);
Chris@0 749 }
Chris@0 750
Chris@0 751 // % previous sample index
Chris@0 752 // pos1_int = uint32(floor(pos1))';
Chris@0 753 // % integer corresponding to previous sample index in "c"
Chris@0 754 // warps.pos1_int = (pos1_int - uint32(1));
Chris@0 755 // % fractional value that defines the warped position
Chris@0 756 // warps.pos1_frac = (double(pos1)' - double(pos1_int));
Chris@0 757
Chris@10 758 for (int j = 0; j < m_warp_params.nsamps_twarp*m_warp_params.num_warps; j++) {
Chris@7 759 // previous sample index
Chris@7 760 pos1[j] = pos1[j]*m_warpings.fs_orig + m_warpings.nsamps_torig/2 + 1;
Chris@10 761 m_warpings.pos_int[j] = (int) pos1[j];
Chris@7 762 m_warpings.pos_frac[j] = pos1[j] - (double)(m_warpings.pos_int[j]);
Chris@7 763 } //for
Chris@0 764
Chris@14 765 deallocate(phi);
Chris@14 766 deallocate(pos1);
Chris@0 767 }
Chris@0 768
Chris@0 769 void
Chris@0 770 FChTransformF0gram::define_warps_linear_chirps(double * freq_relative, double * t_orig) {
Chris@0 771 /** define warps as relative frequency deviation from original frequency
Chris@7 772 t_orig : time vector
Chris@7 773 freq_relative : relative frequency deviations
Chris@7 774 */
Chris@0 775 if (m_warp_params.alpha_dist == 0) {
Chris@0 776
Chris@0 777 // linear alpha values spacing
Chris@14 778 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
Chris@0 779 // WARNING m_warp_params.num_warps must be odd
Chris@0 780 m_warpings.chirp_rates[0] = -m_warp_params.alpha_max;
Chris@0 781 double increment = (double) m_warp_params.alpha_max / ((m_warp_params.num_warps - 1) / 2);
Chris@0 782
Chris@10 783 for (int ind = 1; ind < m_warp_params.num_warps; ind++) {
Chris@0 784 m_warpings.chirp_rates[ind] = m_warpings.chirp_rates[ind - 1] + increment;
Chris@0 785 }
Chris@0 786 // force zero value
Chris@0 787 m_warpings.chirp_rates[(int) ((m_warp_params.num_warps - 1) / 2)] = 0;
Chris@0 788
Chris@0 789 } else {
Chris@0 790 // log alpha values spacing
Chris@14 791 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
Chris@0 792
Chris@0 793 // force zero value
Chris@0 794 int middle_point = (int) ((m_warp_params.num_warps - 1) / 2);
Chris@0 795 m_warpings.chirp_rates[middle_point] = 0;
Chris@0 796
Chris@0 797 double logMax = log10(m_warp_params.alpha_max + 1);
Chris@0 798 double increment = logMax / ((m_warp_params.num_warps - 1) / 2.0f);
Chris@0 799 double exponent = 0;
Chris@0 800
Chris@0 801 // fill positive values
Chris@0 802 int ind_log = middle_point;
Chris@10 803 for (int ind = 0; ind < (m_warp_params.num_warps + 1) / 2; ind++) {
Chris@0 804 m_warpings.chirp_rates[ind_log] = pow(10, exponent) - 1;
Chris@0 805 exponent += increment;
Chris@0 806 ind_log++;
Chris@0 807 }
Chris@0 808 // fill negative values
Chris@10 809 for (int ind = 0; ind < (m_warp_params.num_warps - 1) / 2; ind++) {
Chris@0 810 m_warpings.chirp_rates[ind] = -m_warpings.chirp_rates[m_warp_params.num_warps - 1 - ind];
Chris@0 811 }
Chris@0 812 }
Chris@0 813
Chris@0 814 // compute relative frequency deviation
Chris@14 815 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
Chris@14 816 for (int j = 0; j < m_warp_params.num_warps; j++) {
Chris@0 817 freq_relative[j * m_warpings.nsamps_torig + i] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
Chris@14 818 }
Chris@14 819 }
Chris@0 820 }
Chris@0 821
Chris@0 822 void
Chris@14 823 FChTransformF0gram::design_LPF()
Chris@14 824 {
Chris@14 825 double *lp_LPFWindow_aux = allocate<double>(m_blockSize/2+1);
Chris@14 826 mp_LPFWindow = allocate<double>(m_blockSize/2+1);
Chris@0 827
Chris@10 828 int i_max = (int) ((2.0*m_fmax/m_fs) * ( (double)m_blockSize / 2.0 + 1.0 ));
Chris@10 829 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@0 830 if (i >= i_max) {
Chris@0 831 lp_LPFWindow_aux[i] = 0.0;
Chris@0 832 } else {
Chris@0 833 lp_LPFWindow_aux[i] = 1.0;
Chris@0 834 }
Chris@0 835 }
Chris@14 836
Chris@14 837 LPF_time = allocate_and_zero<double>(m_warpings.nsamps_torig);
Chris@14 838 LPF_frequency = allocate_and_zero<double>(2 * (m_warpings.nsamps_torig/2 + 1));
Chris@14 839
Chris@14 840 fft_forward_LPF = new FFTReal(m_blockSize);
Chris@14 841 fft_inverse_LPF = new FFTReal(m_warpings.nsamps_torig);
Chris@0 842
Chris@10 843 int winWidth = 11;
Chris@14 844 double *lp_hanningWindow = allocate<double>(winWidth);
Chris@0 845 double accum=0;
Chris@10 846 for (int i = 0; i < winWidth; i++) {
Chris@0 847 lp_hanningWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)winWidth+1.0)));
Chris@0 848 accum+=lp_hanningWindow[i];
Chris@0 849
Chris@0 850 }
Chris@10 851 for (int i = 0; i < winWidth; i++) { //window normalization
Chris@0 852 lp_hanningWindow[i]=lp_hanningWindow[i]/accum;
Chris@0 853 }
Chris@10 854 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@0 855 //if (((i-(winWidth-1)/2)<0)||(i+(winWidth-1))/2>m_blockSize/2-1) {//consideramos winWidth impar, si la ventana sale del arreglo se rellena con el valor origianl
Chris@7 856 if ( (i > (i_max + (winWidth-1)/2)) || (i <= (i_max - (winWidth-1)/2)) ) {
Chris@0 857 mp_LPFWindow[i]=lp_LPFWindow_aux[i];
Chris@0 858 } else {
Chris@0 859 accum=0;
Chris@10 860 for (int j = -((winWidth-1)/2); j <= (winWidth-1)/2; j++) {
Chris@0 861 accum+=lp_LPFWindow_aux[i-j]*lp_hanningWindow[j+(winWidth-1)/2];
Chris@7 862 }
Chris@0 863 mp_LPFWindow[i]=accum;
Chris@0 864 }
Chris@0 865 }
Chris@0 866
Chris@14 867 deallocate(lp_LPFWindow_aux);
Chris@14 868 deallocate(lp_hanningWindow);
Chris@0 869 }
Chris@0 870
Chris@14 871 void FChTransformF0gram::apply_LPF()
Chris@14 872 {
Chris@14 873 fft_forward_LPF->forward(LPF_time, LPF_frequency);
Chris@14 874
Chris@10 875 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@16 876 LPF_frequency[i*2] *= mp_LPFWindow[i];
Chris@16 877 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i];
Chris@0 878 }
Chris@14 879
Chris@14 880 fft_inverse_LPF->inverse(LPF_frequency, LPF_time);
Chris@20 881
Chris@7 882 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame.
Chris@7 883 // nota: ademÔs de aplicar el LPF, esta función resamplea la señal original.
Chris@0 884 }
Chris@0 885
Chris@14 886 void FChTransformF0gram::clean_LPF()
Chris@14 887 {
Chris@14 888 delete fft_forward_LPF;
Chris@14 889 delete fft_inverse_LPF;
Chris@14 890 deallocate(LPF_time);
Chris@14 891 deallocate(LPF_frequency);
Chris@14 892 deallocate(mp_LPFWindow);
Chris@0 893 }
Chris@0 894
Chris@14 895 void FChTransformF0gram::reset()
Chris@14 896 {
Chris@0 897 }
Chris@0 898
Chris@0 899 FChTransformF0gram::FeatureSet
Chris@5 900 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) {
Chris@0 901
Chris@20 902 if (!m_initialised) return FeatureSet();
Chris@20 903
Chris@7 904 /* PSEUDOCƓDIGO:
Chris@7 905 - Aplicar FFT al frame entero.
Chris@7 906 - Filtro pasabajos en frecuencia.
Chris@7 907 - FFT inversa al frame entero.
Chris@7 908 -----------------------------------------------------------------------------
Chris@7 909 - Para cada warp: *Si es un espectrograma direccional (un solo warp
Chris@7 910 => no es para cada warp sino para el elegido)
Chris@7 911 - Hacer la interpolación con interp1q.
Chris@7 912 - Aplicar la FFT al frame warpeado.
Chris@7 913 - (Opcional) GLogS.
Chris@7 914 - ...
Chris@7 915 */
Chris@0 916
Chris@0 917 //---------------------------------------------------------------------------
Chris@7 918 FeatureSet fs;
Chris@0 919
Chris@7 920 #ifdef DEBUG
Chris@16 921 fprintf(stderr, "\n ----- DEBUG INFORMATION ----- \n");
Chris@16 922 fprintf(stderr, " m_fs = %f Hz.\n",m_fs);
Chris@16 923 fprintf(stderr, " fs_orig = %f Hz.\n",m_warpings.fs_orig);
Chris@16 924 fprintf(stderr, " fs_warp = %f Hz.\n",m_warpings.fs_warp);
Chris@16 925 fprintf(stderr, " m_blockSize = %d.\n",m_blockSize);
Chris@16 926 fprintf(stderr, " m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig);
Chris@24 927 fprintf(stderr, " m_warp_params.nsamps_twarp = %d.\n",m_warp_params.nsamps_twarp);
Chris@16 928 fprintf(stderr, " m_warp_params.num_warps = %d.\n",m_warp_params.num_warps);
Chris@16 929 fprintf(stderr, " m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count);
Chris@7 930 #endif
Chris@0 931
Chris@20 932 for (int i = 0; i < m_blockSize - m_stepSize; ++i) {
Chris@20 933 m_inputBuffer[i] = m_inputBuffer[i + m_stepSize];
Chris@0 934 }
Chris@20 935 for (int i = 0; i < m_blockSize/2; ++i) {
Chris@20 936 m_inputBuffer[m_blockSize/2 + i] = inputBuffers[0][i];
Chris@20 937 }
Chris@20 938 for (int i = 0; i < m_blockSize; ++i) {
Chris@20 939 LPF_time[i] = m_inputBuffer[i] * m_timeWindow[i];
Chris@20 940 }
Chris@20 941 for (int i = 0; i < m_blockSize; ++i) {
Chris@20 942 LPF_time[m_blockSize + i] = 0.0;
Chris@20 943 }
Chris@20 944
Chris@7 945 apply_LPF();
Chris@7 946 // SeƱal filtrada queda en LPF_time
Chris@0 947
Chris@7 948 Feature feature;
Chris@0 949 feature.hasTimestamp = false;
Chris@0 950
Chris@15 951 if (m_processingMode == ModeRoughSpectrogram) {
Chris@15 952 feature.values = vector<float>(m_warp_params.nsamps_twarp/2+1, 0.f);
Chris@15 953 }
Chris@15 954
Chris@0 955 // ----------------------------------------------------------------------------------------------
Chris@0 956 // Hanning window & FFT for all warp directions
Chris@0 957
Chris@7 958 double max_glogs = -DBL_MAX;
Chris@10 959 int ind_max_glogs = 0;
Chris@0 960
Chris@10 961 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
Chris@16 962
Chris@7 963 // Interpolate
Chris@14 964 Utils::interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp);
Chris@0 965
Chris@7 966 // Apply window
Chris@10 967 for (int i = 0; i < m_warp_params.nsamps_twarp; i++) {
Chris@7 968 x_warping[i] *= mp_HanningWindow[i];
Chris@7 969 }
Chris@0 970
Chris@7 971 // Transform
Chris@14 972 fft_xwarping->forward(x_warping, m_auxFanChirpTransform);
Chris@0 973
Chris@15 974 if (m_processingMode == ModeRoughSpectrogram) {
Chris@15 975 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
Chris@15 976 double abs = sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]);
Chris@15 977 if (abs > feature.values[i]) {
Chris@15 978 feature.values[i] = abs;
Chris@15 979 }
Chris@15 980 }
Chris@15 981 continue;
Chris@15 982 }
Chris@15 983
Chris@7 984 // Copy result
Chris@7 985 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1);
Chris@10 986 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
Chris@14 987 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]));
Chris@7 988 }
Chris@0 989
Chris@0 990 // -----------------------------------------------------------------------------------------
Chris@0 991 // GLogS
Chris@14 992 Utils::interp1q(aux_abs_fcht, m_glogs_posint, m_glogs_posfrac, m_glogs_interp, m_glogs_harmonic_count);
Chris@10 993 int glogs_ind = 0;
Chris@10 994 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@7 995 double glogs_accum = 0;
Chris@10 996 for (int j = 1; j <= m_glogs_n[i]; j++) {
Chris@7 997 glogs_accum += m_glogs_interp[glogs_ind++];
Chris@7 998 }
Chris@7 999 m_glogs[i + i_warp*m_glogs_num_f0s] = glogs_accum/(double)m_glogs_n[i];
Chris@7 1000 }
Chris@0 1001
Chris@0 1002 // Sub/super harmonic correction
Chris@14 1003 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_third_harmonic_posint, m_glogs_third_harmonic_posfrac, m_glogs_third_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14 1004 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_fifth_harmonic_posint, m_glogs_fifth_harmonic_posfrac, m_glogs_fifth_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 1005 for (int i = m_glogs_num_f0s-1; i >= m_glogs_init_f0s; i--) {
Chris@7 1006 m_glogs[i + i_warp*m_glogs_num_f0s] -= MAX(MAX(m_glogs[i-m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s],m_glogs_third_harmonic[i-m_glogs_init_f0s]),m_glogs_fifth_harmonic[i-m_glogs_init_f0s]);
Chris@7 1007 }
Chris@10 1008 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
Chris@7 1009 m_glogs[i + i_warp*m_glogs_num_f0s] -= 0.3*m_glogs[i+m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s];
Chris@7 1010 // Median, sigma $ weights correction
Chris@7 1011 m_glogs[i + i_warp*m_glogs_num_f0s] = (m_glogs[i + i_warp*m_glogs_num_f0s]-m_glogs_median_correction[i-m_glogs_init_f0s])*m_glogs_sigma_correction[i-m_glogs_init_f0s]*m_glogs_f0_preference_weights[i-m_glogs_init_f0s];
Chris@7 1012 }
Chris@0 1013
Chris@7 1014 // Look for maximum value to determine best direction
Chris@10 1015 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
Chris@7 1016 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
Chris@7 1017 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
Chris@7 1018 ind_max_glogs = i_warp;
Chris@7 1019 }
Chris@7 1020 }
Chris@7 1021 }
Chris@0 1022
Chris@15 1023 if (m_processingMode == ModeRoughSpectrogram) {
Chris@15 1024
Chris@15 1025 // already accumulated our return values in feature
Chris@19 1026 fs[0].push_back(feature);
Chris@15 1027
Chris@15 1028 } else if (m_processingMode == ModeSpectrogram) {
Chris@15 1029
Chris@15 1030 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) {
Chris@15 1031 feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0);
Chris@15 1032 }
Chris@19 1033 fs[0].push_back(feature);
Chris@15 1034
Chris@15 1035 } else { // f0gram
Chris@15 1036
Chris@19 1037 int bestIndex = -1;
Chris@19 1038
Chris@15 1039 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) {
Chris@19 1040 double value = 0.0;
Chris@15 1041 switch (m_f0gram_mode) {
Chris@15 1042 case AllBinsOfBestDirection:
Chris@19 1043 value = m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s];
Chris@15 1044 break;
Chris@15 1045 case BestBinOfAllDirections:
Chris@15 1046 max_glogs = -DBL_MAX;
Chris@15 1047 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
Chris@15 1048 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
Chris@15 1049 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
Chris@15 1050 ind_max_glogs = i_warp;
Chris@15 1051 }
Chris@7 1052 }
Chris@19 1053 value = max_glogs;
Chris@15 1054 break;
Chris@7 1055 }
Chris@19 1056 if (bestIndex < 0 || float(value) > feature.values[bestIndex]) {
Chris@19 1057 bestIndex = int(feature.values.size());
Chris@19 1058 }
Chris@19 1059 feature.values.push_back(float(value));
Chris@19 1060 }
Chris@19 1061
Chris@19 1062 fs[0].push_back(feature);
Chris@19 1063
Chris@19 1064 if (bestIndex >= 0) {
Chris@19 1065
Chris@19 1066 double bestValue = feature.values[bestIndex];
Chris@19 1067 set<double> ordered(feature.values.begin(), feature.values.end());
Chris@19 1068 vector<double> flattened(ordered.begin(), ordered.end());
Chris@19 1069 double median = flattened[flattened.size()/2];
Chris@19 1070 if (bestValue > median * 8.0) {
Chris@19 1071 Feature pfeature;
Chris@19 1072 pfeature.hasTimestamp = false;
Chris@19 1073 pfeature.values.push_back(m_f0s[bestIndex]);
Chris@19 1074 fs[1].push_back(pfeature);
Chris@19 1075 }
Chris@7 1076 }
Chris@7 1077 }
Chris@0 1078
Chris@7 1079 return fs;
Chris@0 1080 }
Chris@0 1081
Chris@0 1082 FChTransformF0gram::FeatureSet
Chris@0 1083 FChTransformF0gram::getRemainingFeatures() {
Chris@0 1084 return FeatureSet();
Chris@0 1085 }
Chris@0 1086
Chris@0 1087 void
Chris@0 1088 FChTransformF0gram::design_time_window() {
Chris@0 1089
Chris@20 1090 int transitionWidth = (int)m_blockSize/128 + 128;
Chris@14 1091 m_timeWindow = allocate<double>(m_blockSize);
Chris@14 1092 double *lp_transitionWindow = allocate<double>(transitionWidth);
Chris@0 1093
Chris@10 1094 for (int i = 0; i < m_blockSize; i++) {
Chris@7 1095 m_timeWindow[i] = 1.0;
Chris@7 1096 }
Chris@0 1097
Chris@10 1098 for (int i = 0; i < transitionWidth; i++) {
Chris@0 1099 lp_transitionWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)transitionWidth+1.0)));
Chris@0 1100 }
Chris@0 1101
Chris@10 1102 for (int i = 0; i < transitionWidth/2; i++) {
Chris@7 1103 m_timeWindow[i] = lp_transitionWindow[i];
Chris@7 1104 m_timeWindow[m_blockSize-1-i] = lp_transitionWindow[transitionWidth-1-i];
Chris@7 1105 }
Chris@0 1106
Chris@7 1107 #ifdef DEBUG
Chris@7 1108 for (int i = 0; i < m_blockSize; i++) {
Chris@7 1109 if ((i<transitionWidth)) {
Chris@16 1110 fprintf(stderr, " m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]);
Chris@7 1111 }
Chris@7 1112 }
Chris@7 1113 #endif
Chris@0 1114
Chris@14 1115 deallocate(lp_transitionWindow);
Chris@0 1116 }
Chris@0 1117