annotate FChTransformF0gram.cpp @ 14:44b86c346a5a perf

Switch to Vamp SDK FFT implementation (it is close enough in performance - FFTs aren't really a bottleneck here - and simpler for the build) and use bqvec allocators
author Chris Cannam
date Tue, 02 Oct 2018 16:38:52 +0100
parents fc8f351d2cd6
children 0a860992b4f4
rev   line source
Chris@0 1 /*
Chris@0 2 copyright (C) 2011 I. Irigaray, M. Rocamora
Chris@0 3
Chris@0 4 This program is free software: you can redistribute it and/or modify
Chris@0 5 it under the terms of the GNU General Public License as published by
Chris@0 6 the Free Software Foundation, either version 3 of the License, or
Chris@0 7 (at your option) any later version.
Chris@0 8
Chris@0 9 This program is distributed in the hope that it will be useful,
Chris@0 10 but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@0 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@0 12 GNU General Public License for more details.
Chris@0 13
Chris@0 14 You should have received a copy of the GNU General Public License
Chris@0 15 along with this program. If not, see <http://www.gnu.org/licenses/>.
Chris@7 16 */
Chris@0 17
Chris@0 18 #include "FChTransformF0gram.h"
Chris@0 19 #include "FChTransformUtils.h"
Chris@0 20 #include <math.h>
Chris@0 21 #include <float.h>
Chris@14 22
Chris@14 23 #include "bqvec/Allocators.h"
Chris@14 24
Chris@14 25 using namespace breakfastquay;
Chris@14 26
Chris@0 27 //#define DEBUG
Chris@7 28
Chris@0 29 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
Chris@0 30
Chris@0 31 FChTransformF0gram::FChTransformF0gram(float inputSampleRate) :
Chris@7 32 Plugin(inputSampleRate),
Chris@7 33 m_currentProgram("default"),
Chris@7 34 m_stepSize(0), // We are using 0 for step and block size to indicate "not yet set".
Chris@7 35 m_blockSize(0) {
Chris@0 36
Chris@0 37 m_fs = inputSampleRate;
Chris@0 38 // max frequency of interest (Hz)
Chris@0 39 m_fmax = 10000.f;
Chris@0 40 // warping parameters
Chris@12 41 m_warp_params.nsamps_twarp = 2048;
Chris@0 42 //m_warp_params.nsamps_twarp = 8;
Chris@0 43 m_warp_params.alpha_max = 4;
Chris@0 44 m_warp_params.num_warps = 21;
Chris@0 45 //m_warp_params.num_warps = 11;
Chris@0 46 m_warp_params.fact_over_samp = 2;
Chris@0 47 m_warp_params.alpha_dist = 0;
Chris@0 48 // f0 parameters
Chris@0 49 m_f0_params.f0min = 80.0;
Chris@0 50 m_f0_params.num_octs = 4;
Chris@0 51 m_f0_params.num_f0s_per_oct = 192;
Chris@0 52 m_f0_params.num_f0_hyps = 5;
Chris@0 53 m_f0_params.prefer = true;
Chris@0 54 m_f0_params.prefer_mean = 60;
Chris@0 55 m_f0_params.prefer_stdev = 18;
Chris@0 56 // glogs parameters
Chris@0 57 m_glogs_params.HP_logS = true;
Chris@0 58 m_glogs_params.att_subharms = 1;
Chris@7 59 // display parameters
Chris@7 60 m_f0gram_mode = true;
Chris@0 61
Chris@0 62 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
Chris@0 63 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
Chris@0 64 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
Chris@0 65
Chris@0 66 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
Chris@0 67 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
Chris@0 68 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
Chris@0 69
Chris@0 70 // number of fft points (controls zero-padding)
Chris@0 71 m_nfft = m_warp_params.nsamps_twarp;
Chris@0 72 // hop in samples
Chris@0 73 m_hop = m_warp_params.fact_over_samp * 256;
Chris@0 74
Chris@0 75 m_num_f0s = 0;
Chris@0 76
Chris@0 77 }
Chris@0 78
Chris@14 79 FChTransformF0gram::~FChTransformF0gram()
Chris@14 80 {
Chris@14 81 if (!m_blockSize) {
Chris@14 82 return; // nothing was allocated
Chris@14 83 }
Chris@14 84
Chris@14 85 deallocate(m_warpings.pos_int);
Chris@14 86 deallocate(m_warpings.pos_frac);
Chris@14 87 deallocate(m_warpings.chirp_rates);
Chris@14 88
Chris@14 89 clean_LPF();
Chris@14 90
Chris@14 91 deallocate(m_timeWindow);
Chris@14 92
Chris@14 93 deallocate(mp_HanningWindow);
Chris@14 94
Chris@14 95 // Warping
Chris@14 96 deallocate(x_warping);
Chris@14 97 delete fft_xwarping;
Chris@14 98 deallocate(m_absFanChirpTransform);
Chris@14 99 deallocate(m_auxFanChirpTransform);
Chris@14 100
Chris@14 101 // design_GLogS
Chris@14 102 deallocate(m_glogs_f0);
Chris@14 103 deallocate(m_glogs);
Chris@14 104 deallocate(m_glogs_n);
Chris@14 105 deallocate(m_glogs_index);
Chris@14 106 deallocate(m_glogs_posint);
Chris@14 107 deallocate(m_glogs_posfrac);
Chris@14 108 deallocate(m_glogs_interp);
Chris@14 109 deallocate(m_glogs_third_harmonic_posint);
Chris@14 110 deallocate(m_glogs_third_harmonic_posfrac);
Chris@14 111 deallocate(m_glogs_third_harmonic);
Chris@14 112 deallocate(m_glogs_fifth_harmonic_posint);
Chris@14 113 deallocate(m_glogs_fifth_harmonic_posfrac);
Chris@14 114 deallocate(m_glogs_fifth_harmonic);
Chris@14 115 deallocate(m_glogs_f0_preference_weights);
Chris@14 116 deallocate(m_glogs_median_correction);
Chris@14 117 deallocate(m_glogs_sigma_correction);
Chris@0 118 }
Chris@0 119
Chris@0 120 string
Chris@0 121 FChTransformF0gram::getIdentifier() const {
Chris@0 122 return "fchtransformf0gram";
Chris@0 123 }
Chris@0 124
Chris@0 125 string
Chris@0 126 FChTransformF0gram::getName() const {
Chris@0 127 return "Fan Chirp Transform F0gram";
Chris@0 128 }
Chris@0 129
Chris@0 130 string
Chris@0 131 FChTransformF0gram::getDescription() const {
Chris@0 132 // Return something helpful here!
Chris@0 133 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010.";
Chris@0 134 }
Chris@0 135
Chris@0 136 string
Chris@0 137 FChTransformF0gram::getMaker() const {
Chris@0 138 // Your name here
Chris@0 139 return "Audio Processing Group \n Universidad de la Republica";
Chris@0 140 }
Chris@0 141
Chris@0 142 int
Chris@0 143 FChTransformF0gram::getPluginVersion() const {
Chris@0 144 // Increment this each time you release a version that behaves
Chris@0 145 // differently from the previous one
Chris@0 146 //
Chris@0 147 // 0 - initial version from scratch
Chris@0 148 return 0;
Chris@0 149 }
Chris@0 150
Chris@0 151 string
Chris@0 152 FChTransformF0gram::getCopyright() const {
Chris@0 153 // This function is not ideally named. It does not necessarily
Chris@0 154 // need to say who made the plugin -- getMaker does that -- but it
Chris@0 155 // should indicate the terms under which it is distributed. For
Chris@0 156 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@0 157 return "copyright (C) 2011 GPL - Audio Processing Group, UdelaR";
Chris@0 158 }
Chris@0 159
Chris@0 160 FChTransformF0gram::InputDomain
Chris@0 161 FChTransformF0gram::getInputDomain() const {
Chris@0 162 return TimeDomain;
Chris@0 163 }
Chris@0 164
Chris@0 165 size_t FChTransformF0gram::getPreferredBlockSize() const {
Chris@0 166 return 8192; // 0 means "I can handle any block size"
Chris@0 167 }
Chris@0 168
Chris@0 169 size_t
Chris@0 170 FChTransformF0gram::getPreferredStepSize() const {
Chris@0 171 return 256; // 0 means "anything sensible"; in practice this
Chris@0 172 // means the same as the block size for TimeDomain
Chris@0 173 // plugins, or half of it for FrequencyDomain plugins
Chris@0 174 }
Chris@0 175
Chris@0 176 size_t
Chris@0 177 FChTransformF0gram::getMinChannelCount() const {
Chris@0 178 return 1;
Chris@0 179 }
Chris@0 180
Chris@0 181 size_t
Chris@0 182 FChTransformF0gram::getMaxChannelCount() const {
Chris@0 183 return 1;
Chris@0 184 }
Chris@0 185
Chris@0 186 FChTransformF0gram::ParameterList
Chris@0 187 FChTransformF0gram::getParameterDescriptors() const {
Chris@0 188 ParameterList list;
Chris@0 189
Chris@0 190 // If the plugin has no adjustable parameters, return an empty
Chris@0 191 // list here (and there's no need to provide implementations of
Chris@0 192 // getParameter and setParameter in that case either).
Chris@0 193
Chris@0 194 // Note that it is your responsibility to make sure the parameters
Chris@0 195 // start off having their default values (e.g. in the constructor
Chris@0 196 // above). The host needs to know the default value so it can do
Chris@0 197 // things like provide a "reset to default" function, but it will
Chris@0 198 // not explicitly set your parameters to their defaults for you if
Chris@0 199 // they have not changed in the mean time.
Chris@0 200
Chris@0 201 // ============= WARPING PARAMETERS =============
Chris@0 202
Chris@0 203 ParameterDescriptor fmax;
Chris@0 204 fmax.identifier = "fmax";
Chris@0 205 fmax.name = "Maximum frequency";
Chris@0 206 fmax.description = "Maximum frequency of interest for the analysis.";
Chris@0 207 fmax.unit = "Hz";
Chris@0 208 fmax.minValue = 2000;
Chris@0 209 fmax.maxValue = 22050;
Chris@0 210 fmax.defaultValue = 10000;
Chris@0 211 fmax.isQuantized = true;
Chris@0 212 fmax.quantizeStep = 1.0;
Chris@0 213 list.push_back(fmax);
Chris@0 214
Chris@0 215 ParameterDescriptor nsamp;
Chris@0 216 nsamp.identifier = "nsamp";
Chris@0 217 nsamp.name = "Number of samples";
Chris@0 218 nsamp.description = "Number of samples of the time warped frame";
Chris@0 219 nsamp.unit = "samples";
Chris@0 220 nsamp.minValue = 128;
Chris@0 221 nsamp.maxValue = 4096;
Chris@0 222 nsamp.defaultValue = 2048;
Chris@0 223 nsamp.isQuantized = true;
Chris@0 224 nsamp.quantizeStep = 1.0;
Chris@0 225 list.push_back(nsamp);
Chris@0 226
Chris@0 227 ParameterDescriptor nfft;
Chris@0 228 nfft.identifier = "nfft";
Chris@0 229 nfft.name = "FFT number of points";
Chris@0 230 nfft.description = "Number of FFT points (controls zero-padding)";
Chris@0 231 nfft.unit = "samples";
Chris@0 232 nfft.minValue = 0;
Chris@0 233 nfft.maxValue = 4;
Chris@0 234 nfft.defaultValue = 3;
Chris@0 235 nfft.isQuantized = true;
Chris@0 236 nfft.quantizeStep = 1.0;
Chris@0 237 nfft.valueNames.push_back("256");
Chris@0 238 nfft.valueNames.push_back("512");
Chris@0 239 nfft.valueNames.push_back("1024");
Chris@0 240 nfft.valueNames.push_back("2048");
Chris@0 241 nfft.valueNames.push_back("4096");
Chris@0 242 nfft.valueNames.push_back("8192");
Chris@0 243 list.push_back(nfft);
Chris@0 244
Chris@0 245 ParameterDescriptor alpha_max;
Chris@0 246 alpha_max.identifier = "alpha_max";
Chris@0 247 alpha_max.name = "Maximum alpha value";
Chris@0 248 alpha_max.description = "Maximum value for the alpha parameter of the transform.";
Chris@0 249 alpha_max.unit = "Hz/s";
Chris@0 250 alpha_max.minValue = -10;
Chris@0 251 alpha_max.maxValue = 10;
Chris@0 252 alpha_max.defaultValue = 5;
Chris@0 253 alpha_max.isQuantized = true;
Chris@0 254 alpha_max.quantizeStep = 1.0;
Chris@0 255 list.push_back(alpha_max);
Chris@0 256
Chris@0 257 ParameterDescriptor num_warps;
Chris@0 258 num_warps.identifier = "num_warps";
Chris@0 259 num_warps.name = "Number of warpings";
Chris@0 260 num_warps.description = "Number of different warpings in the specified range (must be odd).";
Chris@0 261 num_warps.unit = "";
Chris@0 262 num_warps.minValue = 1;
Chris@0 263 num_warps.maxValue = 101;
Chris@0 264 num_warps.defaultValue = 21;
Chris@0 265 num_warps.isQuantized = true;
Chris@0 266 num_warps.quantizeStep = 2.0;
Chris@0 267 list.push_back(num_warps);
Chris@0 268
Chris@0 269 ParameterDescriptor alpha_dist;
Chris@0 270 alpha_dist.identifier = "alpha_dist";
Chris@0 271 alpha_dist.name = "alpha distribution";
Chris@0 272 alpha_dist.description = "Type of distribution of alpha values (linear or log).";
Chris@0 273 alpha_dist.unit = "";
Chris@0 274 alpha_dist.minValue = 0;
Chris@0 275 alpha_dist.maxValue = 1;
Chris@0 276 alpha_dist.defaultValue = 1;
Chris@0 277 alpha_dist.isQuantized = true;
Chris@0 278 alpha_dist.quantizeStep = 1.0;
Chris@0 279 // lin (0), log (1)
Chris@0 280 alpha_dist.valueNames.push_back("lin");
Chris@0 281 alpha_dist.valueNames.push_back("log");
Chris@0 282 list.push_back(alpha_dist);
Chris@0 283
Chris@0 284 // ============= F0-GRAM PARAMETERS =============
Chris@0 285
Chris@0 286 ParameterDescriptor f0min;
Chris@0 287 f0min.identifier = "f0min";
Chris@0 288 f0min.name = "min f0";
Chris@0 289 f0min.description = "Minimum fundamental frequency (f0) value.";
Chris@0 290 f0min.unit = "Hz";
Chris@0 291 f0min.minValue = 1;
Chris@0 292 f0min.maxValue = 500;
Chris@0 293 f0min.defaultValue = 80;
Chris@0 294 f0min.isQuantized = true;
Chris@0 295 f0min.quantizeStep = 1.0;
Chris@0 296 list.push_back(f0min);
Chris@0 297
Chris@0 298 ParameterDescriptor num_octs;
Chris@0 299 num_octs.identifier = "num_octs";
Chris@0 300 num_octs.name = "number of octaves";
Chris@0 301 num_octs.description = "Number of octaves for F0gram computation.";
Chris@0 302 num_octs.unit = "";
Chris@0 303 num_octs.minValue = 1;
Chris@0 304 num_octs.maxValue = 10;
Chris@0 305 num_octs.defaultValue = 4;
Chris@0 306 num_octs.isQuantized = true;
Chris@0 307 num_octs.quantizeStep = 1.0;
Chris@0 308 list.push_back(num_octs);
Chris@0 309
Chris@0 310 ParameterDescriptor num_f0_hyps;
Chris@0 311 num_f0_hyps.identifier = "num_f0_hyps";
Chris@0 312 num_f0_hyps.name = "number of f0 hypotesis";
Chris@0 313 num_f0_hyps.description = "Number of f0 hypotesis to extract.";
Chris@0 314 num_f0_hyps.unit = "";
Chris@0 315 num_f0_hyps.minValue = 1;
Chris@0 316 num_f0_hyps.maxValue = 100;
Chris@0 317 num_f0_hyps.defaultValue = 10;
Chris@0 318 num_f0_hyps.isQuantized = true;
Chris@0 319 num_f0_hyps.quantizeStep = 1.0;
Chris@0 320 list.push_back(num_f0_hyps);
Chris@0 321
Chris@0 322 ParameterDescriptor f0s_per_oct;
Chris@0 323 f0s_per_oct.identifier = "f0s_per_oct";
Chris@0 324 f0s_per_oct.name = "f0 values per octave";
Chris@0 325 f0s_per_oct.description = "Number of f0 values per octave.";
Chris@0 326 f0s_per_oct.unit = "";
Chris@0 327 f0s_per_oct.minValue = 12;
Chris@0 328 f0s_per_oct.maxValue = 768;
Chris@0 329 f0s_per_oct.defaultValue = 192;
Chris@0 330 f0s_per_oct.isQuantized = true;
Chris@0 331 f0s_per_oct.quantizeStep = 1.0;
Chris@0 332 list.push_back(f0s_per_oct);
Chris@0 333
Chris@0 334 ParameterDescriptor f0_prefer_fun;
Chris@0 335 f0_prefer_fun.identifier = "f0_prefer_fun";
Chris@0 336 f0_prefer_fun.name = "f0 preference function";
Chris@0 337 f0_prefer_fun.description = "Whether to use a f0 weighting function.";
Chris@0 338 f0_prefer_fun.unit = "";
Chris@0 339 f0_prefer_fun.minValue = 0;
Chris@0 340 f0_prefer_fun.maxValue = 1;
Chris@0 341 f0_prefer_fun.defaultValue = 1;
Chris@0 342 f0_prefer_fun.isQuantized = true;
Chris@0 343 f0_prefer_fun.quantizeStep = 1.0;
Chris@0 344 list.push_back(f0_prefer_fun);
Chris@0 345
Chris@0 346 ParameterDescriptor f0_prefer_mean;
Chris@0 347 f0_prefer_mean.identifier = "f0_prefer_mean";
Chris@0 348 f0_prefer_mean.name = "mean f0 preference function";
Chris@0 349 f0_prefer_mean.description = "Mean value for f0 weighting function (MIDI number).";
Chris@0 350 f0_prefer_mean.unit = "";
Chris@0 351 f0_prefer_mean.minValue = 1;
Chris@0 352 f0_prefer_mean.maxValue = 127;
Chris@0 353 f0_prefer_mean.defaultValue = 60;
Chris@0 354 f0_prefer_mean.isQuantized = true;
Chris@0 355 f0_prefer_mean.quantizeStep = 1.0;
Chris@0 356 list.push_back(f0_prefer_mean);
Chris@0 357
Chris@0 358 ParameterDescriptor f0_prefer_stdev;
Chris@0 359 f0_prefer_stdev.identifier = "f0_prefer_stdev";
Chris@0 360 f0_prefer_stdev.name = "stdev of f0 preference function";
Chris@0 361 f0_prefer_stdev.description = "Stdev for f0 weighting function (MIDI number).";
Chris@0 362 f0_prefer_stdev.unit = "";
Chris@0 363 f0_prefer_stdev.minValue = 1;
Chris@0 364 f0_prefer_stdev.maxValue = 127;
Chris@0 365 f0_prefer_stdev.defaultValue = 18;
Chris@0 366 f0_prefer_stdev.isQuantized = true;
Chris@0 367 f0_prefer_stdev.quantizeStep = 1.0;
Chris@0 368 list.push_back(f0_prefer_stdev);
Chris@0 369
Chris@0 370 ParameterDescriptor f0gram_mode;
Chris@0 371 f0gram_mode.identifier = "f0gram_mode";
Chris@0 372 f0gram_mode.name = "display mode of f0gram";
Chris@0 373 f0gram_mode.description = "Display all bins of the best direction, or the best bin for each direction.";
Chris@0 374 f0gram_mode.unit = "";
Chris@0 375 f0gram_mode.minValue = 0;
Chris@0 376 f0gram_mode.maxValue = 1;
Chris@0 377 f0gram_mode.defaultValue = 1;
Chris@0 378 f0gram_mode.isQuantized = true;
Chris@0 379 f0gram_mode.quantizeStep = 1.0;
Chris@0 380 list.push_back(f0gram_mode);
Chris@0 381
Chris@0 382 return list;
Chris@0 383 }
Chris@0 384
Chris@0 385 float
Chris@0 386 FChTransformF0gram::getParameter(string identifier) const {
Chris@0 387
Chris@0 388 if (identifier == "fmax") {
Chris@0 389 return m_fmax;
Chris@0 390 } else if (identifier == "nsamp") {
Chris@0 391 return m_warp_params.nsamps_twarp;
Chris@0 392 } else if (identifier == "alpha_max") {
Chris@0 393 return m_warp_params.alpha_max;
Chris@0 394 } else if (identifier == "num_warps") {
Chris@0 395 return m_warp_params.num_warps;
Chris@0 396 } else if (identifier == "alpha_dist") {
Chris@0 397 return m_warp_params.alpha_dist;
Chris@0 398 } else if (identifier == "nfft") {
Chris@0 399 return m_nfft;
Chris@0 400 } else if (identifier == "f0min") {
Chris@0 401 return m_f0_params.f0min;
Chris@0 402 } else if (identifier == "num_octs") {
Chris@0 403 return m_f0_params.num_octs;
Chris@0 404 } else if (identifier == "f0s_per_oct") {
Chris@0 405 return m_f0_params.num_f0s_per_oct;
Chris@0 406 } else if (identifier == "num_f0_hyps") {
Chris@0 407 return m_f0_params.num_f0_hyps;
Chris@0 408 } else if (identifier == "f0_prefer_fun") {
Chris@0 409 return m_f0_params.prefer;
Chris@0 410 } else if (identifier == "f0_prefer_mean") {
Chris@0 411 return m_f0_params.prefer_mean;
Chris@0 412 } else if (identifier == "f0_prefer_stdev") {
Chris@0 413 return m_f0_params.prefer_stdev;
Chris@7 414 } else if (identifier == "f0gram_mode") {
Chris@0 415 return m_f0gram_mode;
Chris@0 416 } else {
Chris@0 417 return 0.f;
Chris@0 418 }
Chris@0 419
Chris@0 420 }
Chris@0 421
Chris@0 422 void FChTransformF0gram::setParameter(string identifier, float value) {
Chris@0 423
Chris@0 424 if (identifier == "fmax") {
Chris@0 425 m_fmax = value;
Chris@0 426 } else if (identifier == "nsamp") {
Chris@0 427 m_warp_params.nsamps_twarp = value;
Chris@0 428 } else if (identifier == "alpha_max") {
Chris@0 429 m_warp_params.alpha_max = value;
Chris@0 430 } else if (identifier == "num_warps") {
Chris@0 431 m_warp_params.num_warps = value;
Chris@0 432 } else if (identifier == "alpha_dist") {
Chris@0 433 m_warp_params.alpha_dist = value;
Chris@0 434 } else if (identifier == "nfft") {
Chris@0 435 m_nfft = value;
Chris@0 436 } else if (identifier == "f0min") {
Chris@0 437 m_f0_params.f0min = value;
Chris@0 438 } else if (identifier == "num_octs") {
Chris@0 439 m_f0_params.num_octs = value;
Chris@0 440 } else if (identifier == "f0s_per_oct") {
Chris@0 441 m_f0_params.num_f0s_per_oct = value;
Chris@0 442 } else if (identifier == "num_f0_hyps") {
Chris@0 443 m_f0_params.num_f0_hyps = value;
Chris@0 444 } else if (identifier == "f0_prefer_fun") {
Chris@0 445 m_f0_params.prefer = value;
Chris@0 446 } else if (identifier == "f0_prefer_mean") {
Chris@0 447 m_f0_params.prefer_mean = value;
Chris@0 448 } else if (identifier == "f0_prefer_stdev") {
Chris@0 449 m_f0_params.prefer_stdev = value;
Chris@0 450 } else if (identifier == "f0gram_mode") {
Chris@0 451 m_f0gram_mode = value;
Chris@0 452 }
Chris@0 453
Chris@0 454 }
Chris@0 455
Chris@0 456 FChTransformF0gram::ProgramList
Chris@0 457 FChTransformF0gram::getPrograms() const {
Chris@0 458 ProgramList list;
Chris@0 459
Chris@0 460 list.push_back("default");
Chris@0 461
Chris@0 462 return list;
Chris@0 463 }
Chris@0 464
Chris@0 465 string
Chris@0 466 FChTransformF0gram::getCurrentProgram() const {
Chris@0 467 return m_currentProgram;
Chris@0 468 }
Chris@0 469
Chris@0 470 void
Chris@0 471 FChTransformF0gram::selectProgram(string name) {
Chris@0 472
Chris@0 473 m_currentProgram = name;
Chris@0 474
Chris@0 475 if (name == "default") {
Chris@0 476 m_fmax = 10000.f;
Chris@0 477
Chris@0 478 m_warp_params.nsamps_twarp = 2048;
Chris@0 479 m_warp_params.alpha_max = 4;
Chris@0 480 m_warp_params.num_warps = 21;
Chris@0 481 m_warp_params.fact_over_samp = 2;
Chris@0 482 m_warp_params.alpha_dist = 0;
Chris@0 483
Chris@0 484 m_f0_params.f0min = 80.0;
Chris@0 485 m_f0_params.num_octs = 4;
Chris@0 486 m_f0_params.num_f0s_per_oct = 192;
Chris@0 487 m_f0_params.num_f0_hyps = 5;
Chris@0 488 m_f0_params.prefer = true;
Chris@0 489 m_f0_params.prefer_mean = 60;
Chris@0 490 m_f0_params.prefer_stdev = 18;
Chris@0 491
Chris@0 492 m_glogs_params.HP_logS = true;
Chris@0 493 m_glogs_params.att_subharms = 1;
Chris@0 494
Chris@0 495 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
Chris@0 496 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
Chris@0 497 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
Chris@0 498
Chris@0 499 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
Chris@0 500 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
Chris@0 501 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
Chris@0 502
Chris@0 503 m_nfft = m_warp_params.nsamps_twarp;
Chris@0 504 m_hop = m_warp_params.fact_over_samp * 256;
Chris@0 505
Chris@0 506 m_num_f0s = 0;
Chris@0 507
Chris@7 508 m_f0gram_mode = 1;
Chris@0 509
Chris@0 510 }
Chris@0 511 }
Chris@0 512
Chris@0 513 FChTransformF0gram::OutputList
Chris@0 514 FChTransformF0gram::getOutputDescriptors() const {
Chris@0 515
Chris@0 516 OutputList list;
Chris@0 517
Chris@0 518 // See OutputDescriptor documentation for the possibilities here.
Chris@0 519 // Every plugin must have at least one output.
Chris@0 520
Chris@0 521 /* f0 values of F0gram grid as string values */
Chris@0 522 vector<string> f0values;
Chris@10 523 int ind = 0;
Chris@14 524 char f0String[100];
Chris@0 525 while (ind < m_num_f0s) {
Chris@0 526 sprintf(f0String, "%4.2f", m_f0s[ind]);
Chris@0 527 f0values.push_back(f0String);
Chris@0 528 ind++;
Chris@0 529 }
Chris@0 530
Chris@0 531 /* The F0gram */
Chris@0 532 OutputDescriptor d;
Chris@0 533 d.identifier = "f0gram";
Chris@0 534 d.name = "F0gram: salience of f0s";
Chris@0 535 d.description = "This representation show the salience of the different f0s in the signal.";
Chris@0 536 d.hasFixedBinCount = true;
Chris@14 537 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
Chris@0 538 d.binNames = f0values;
Chris@0 539 d.hasKnownExtents = false;
Chris@0 540 d.isQuantized = false;
Chris@0 541 d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@0 542 d.hasDuration = false;
Chris@0 543 list.push_back(d);
Chris@0 544
Chris@0 545 return list;
Chris@0 546 }
Chris@0 547
Chris@0 548 bool
Chris@0 549 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) {
Chris@0 550 if (channels < getMinChannelCount() ||
Chris@14 551 channels > getMaxChannelCount()) {
Chris@14 552 return false;
Chris@14 553 }
Chris@0 554
Chris@0 555 // set blockSize and stepSize (but changed below)
Chris@0 556 m_blockSize = blockSize;
Chris@0 557 m_stepSize = stepSize;
Chris@0 558
Chris@0 559 // WARNING !!!
Chris@0 560 // these values in fact are determined by the sampling frequency m_fs
Chris@0 561 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz
Chris@0 562 //m_blockSize = 4 * m_warp_params.nsamps_twarp;
Chris@0 563 m_stepSize = floor(m_hop / m_warp_params.fact_over_samp);
Chris@0 564
Chris@0 565 /* initialise m_glogs_params */
Chris@7 566 design_GLogS();
Chris@0 567
Chris@0 568 /* design of FChT */
Chris@0 569 design_FChT();
Chris@0 570
Chris@7 571 design_LPF();
Chris@0 572
Chris@7 573 design_time_window();
Chris@0 574
Chris@7 575 // Create Hanning window for warped signals
Chris@14 576 mp_HanningWindow = allocate<double>(m_warp_params.nsamps_twarp);
Chris@7 577 bool normalize = false;
Chris@14 578 Utils::hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize);
Chris@0 579
Chris@0 580 return true;
Chris@0 581 }
Chris@0 582
Chris@0 583 void
Chris@0 584 FChTransformF0gram::design_GLogS() {
Chris@0 585
Chris@7 586 // total number & initial quantity of f0s
Chris@10 587 m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1;
Chris@7 588 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s;
Chris@0 589
Chris@7 590 // Initialize arrays
Chris@14 591 m_glogs_f0 = allocate<double>(m_glogs_num_f0s);
Chris@14 592 m_glogs = allocate<double>(m_glogs_num_f0s*m_warp_params.num_warps);
Chris@14 593 m_glogs_n = allocate<int>(m_glogs_num_f0s);
Chris@14 594 m_glogs_index = allocate<int>(m_glogs_num_f0s);
Chris@0 595
Chris@7 596 // Compute f0 values
Chris@7 597 m_glogs_harmonic_count = 0;
Chris@7 598 double factor = (double)(m_warp_params.nsamps_twarp/2)/(double)(m_warp_params.nsamps_twarp/2+1);
Chris@10 599 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@7 600 m_glogs_f0[i] = (m_f0_params.f0min/5.0)*pow(2.0,(double)i/(double)m_f0_params.num_f0s_per_oct);
Chris@7 601 // for every f0 compute number of partials less or equal than m_fmax.
Chris@7 602 m_glogs_n[i] = m_fmax*factor/m_glogs_f0[i];
Chris@7 603 m_glogs_index[i] = m_glogs_harmonic_count;
Chris@7 604 m_glogs_harmonic_count += m_glogs_n[i];
Chris@7 605 }
Chris@0 606
Chris@7 607 // Initialize arrays for interpolation
Chris@14 608 m_glogs_posint = allocate<int>(m_glogs_harmonic_count);
Chris@14 609 m_glogs_posfrac = allocate<double>(m_glogs_harmonic_count);
Chris@14 610 m_glogs_interp = allocate<double>(m_glogs_harmonic_count);
Chris@0 611
Chris@7 612 // Compute int & frac of interpolation positions
Chris@10 613 int aux_index = 0;
Chris@7 614 double aux_pos;
Chris@10 615 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@10 616 for (int j = 1; j <= m_glogs_n[i]; j++) {
Chris@7 617 // indice en el vector de largo t_warp/2+1 donde el ultimo valor corresponde a f=m_fmax
Chris@7 618 aux_pos = ((double)j*m_glogs_f0[i])*((double)(m_warp_params.nsamps_twarp/2+1))/m_fmax;
Chris@10 619 m_glogs_posint[aux_index] = (int)aux_pos;
Chris@7 620 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index];
Chris@7 621 aux_index++;
Chris@7 622 }
Chris@7 623 }
Chris@0 624
Chris@7 625 // Third harmonic attenuation
Chris@7 626 double aux_third_harmonic;
Chris@14 627 m_glogs_third_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14 628 m_glogs_third_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 629 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
Chris@7 630 aux_third_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(3.0);
Chris@10 631 m_glogs_third_harmonic_posint[i] = (int)aux_third_harmonic;
Chris@7 632 m_glogs_third_harmonic_posfrac[i] = aux_third_harmonic - (double)(m_glogs_third_harmonic_posint[i]);
Chris@7 633 }
Chris@14 634 m_glogs_third_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@0 635
Chris@7 636 // Fifth harmonic attenuation
Chris@7 637 double aux_fifth_harmonic;
Chris@14 638 m_glogs_fifth_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14 639 m_glogs_fifth_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 640 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
Chris@7 641 aux_fifth_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(5.0);
Chris@10 642 m_glogs_fifth_harmonic_posint[i] = (int)aux_fifth_harmonic;
Chris@7 643 m_glogs_fifth_harmonic_posfrac[i] = aux_fifth_harmonic - (double)(m_glogs_fifth_harmonic_posint[i]);
Chris@7 644 }
Chris@14 645 m_glogs_fifth_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@0 646
Chris@7 647 // Normalization & attenuation windows
Chris@14 648 m_glogs_f0_preference_weights = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@14 649 m_glogs_median_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@14 650 m_glogs_sigma_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@7 651 double MIDI_value;
Chris@10 652 for (int i = 0; i < m_f0_params.num_octs*m_f0_params.num_f0s_per_oct; i++) {
Chris@7 653 MIDI_value = 69.0 + 12.0 * log2(m_glogs_f0[i + m_glogs_init_f0s]/440.0);
Chris@7 654 m_glogs_f0_preference_weights[i] = 1.0/sqrt(2.0*M_PI*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev)*exp(-(MIDI_value-m_f0_params.prefer_mean)*(MIDI_value-m_f0_params.prefer_mean)/(2.0*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev));
Chris@7 655 m_glogs_f0_preference_weights[i] = (0.01 + m_glogs_f0_preference_weights[i]) / (1.01);
Chris@0 656
Chris@7 657 m_glogs_median_correction[i] = m_glogs_params.median_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.median_poly_coefs[1]*(i+1.0) + m_glogs_params.median_poly_coefs[2];
Chris@7 658 m_glogs_sigma_correction[i] = 1.0 / (m_glogs_params.sigma_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.sigma_poly_coefs[1]*(i+1.0) + m_glogs_params.sigma_poly_coefs[2]);
Chris@7 659 }
Chris@0 660 }
Chris@0 661
Chris@0 662 void
Chris@0 663 FChTransformF0gram::design_FChT() {
Chris@0 664
Chris@0 665 /*
Chris@0 666 * FILES FOR DEBUGGING
Chris@0 667 */
Chris@0 668
Chris@0 669 //ofstream output("output.txt");
Chris@0 670
Chris@0 671
Chris@0 672 /* ============= WARPING DESIGN ============= */
Chris@0 673
Chris@0 674 // sampling frequency after oversampling
Chris@0 675 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs;
Chris@0 676
Chris@0 677 // number of samples of the original signal frame
Chris@0 678 m_warpings.nsamps_torig = 4 * m_warp_params.fact_over_samp * m_warp_params.nsamps_twarp;
Chris@0 679 // equivalent to: m_warpings.nsamps_torig = m_warp_params.fact_over_samp * m_blockSize;
Chris@0 680
Chris@0 681 // time instants of the original signal frame
Chris@14 682 double *t_orig = allocate<double>(m_warpings.nsamps_torig);
Chris@10 683 for (int ind = 0; ind < m_warpings.nsamps_torig; ind++) {
Chris@0 684 t_orig[ind] = ((double)(ind + 1) - (double)m_warpings.nsamps_torig / 2.0) / m_warpings.fs_orig;
Chris@0 685 }
Chris@0 686
Chris@0 687 // linear chirps warping definition as relative frequency deviation
Chris@7 688 //TODO
Chris@14 689 double *freq_relative = allocate<double>(m_warpings.nsamps_torig * m_warp_params.num_warps);
Chris@0 690 define_warps_linear_chirps(freq_relative, t_orig);
Chris@0 691
Chris@0 692 // maximum relative frequency deviation
Chris@0 693 double freq_relative_max = 0;
Chris@14 694 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
Chris@14 695 for (int j = 0; j < m_warp_params.num_warps; j++) {
Chris@14 696 if (freq_relative_max < freq_relative[j * m_warpings.nsamps_torig + i]) {
Chris@0 697 freq_relative_max = freq_relative[j * m_warpings.nsamps_torig + i];
Chris@14 698 }
Chris@14 699 }
Chris@14 700 }
Chris@0 701
Chris@0 702 // sampling frequency of warped signal to be free of aliasing up to fmax
Chris@0 703 m_warpings.fs_warp = 2 * m_fmax * freq_relative_max;
Chris@0 704
Chris@0 705 // time instants of the warped signal frame
Chris@14 706 double *t_warp = allocate<double>(m_warp_params.nsamps_twarp);
Chris@10 707 for (int ind = 0; ind < m_warp_params.nsamps_twarp; ind++) {
Chris@0 708 t_warp[ind] = ((double)((int)(ind + 1)- (int)m_warp_params.nsamps_twarp / 2)) / (double)m_warpings.fs_warp;
Chris@0 709 }
Chris@0 710
Chris@0 711 // design of warpings for efficient interpolation
Chris@0 712 design_warps(freq_relative, t_orig, t_warp);
Chris@0 713
Chris@0 714
Chris@0 715 /*
Chris@0 716 * FILES FOR DEBUGGING
Chris@0 717 */
Chris@0 718
Chris@0 719 /*
Chris@7 720 output << "chirp_rates" << endl;
Chris@10 721 for (int j = 0; j < m_warp_params.num_warps; j++){
Chris@7 722 output << m_warpings.chirp_rates[j];
Chris@7 723 output << " ";
Chris@7 724 }
Chris@7 725 output << endl << "freq_relative" << endl;
Chris@0 726
Chris@10 727 for (int i = 0; i < m_warpings.nsamps_torig; i++){
Chris@10 728 for (int j = 0; j < m_warp_params.num_warps; j++){
Chris@7 729 output << freq_relative[j * m_warpings.nsamps_torig + i];
Chris@7 730 output << " ";
Chris@7 731 }
Chris@7 732 output << endl;
Chris@7 733 }
Chris@0 734
Chris@7 735 output << endl << "t_orig" << endl;
Chris@0 736
Chris@10 737 for (int i = 0; i < m_warpings.nsamps_torig; i++){
Chris@7 738 output << t_orig[i] << endl ;
Chris@7 739 }
Chris@7 740 */
Chris@0 741
Chris@14 742 deallocate(freq_relative);
Chris@14 743 deallocate(t_orig);
Chris@14 744 deallocate(t_warp);
Chris@14 745
Chris@0 746 //output.close();
Chris@0 747
Chris@0 748 /* ============= FFTW PLAN DESIGN ============= */
Chris@7 749 // Initialize 2-d array for warped signals
Chris@14 750 x_warping = allocate<double>(m_warp_params.nsamps_twarp);
Chris@14 751 m_absFanChirpTransform = allocate<double>(m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1));
Chris@14 752 m_auxFanChirpTransform = allocate<double>(2 * (m_warp_params.nsamps_twarp/2 + 1));
Chris@14 753 fft_xwarping = new FFTReal(m_warp_params.nsamps_twarp);
Chris@0 754 }
Chris@0 755
Chris@0 756 void
Chris@0 757 FChTransformF0gram::design_warps(double * freq_relative, double * t_orig, double * t_warp) {
Chris@0 758 /* the warping is done by interpolating the original signal in time instants
Chris@0 759 given by the desired frequency deviation, to do this, the interpolation
Chris@0 760 instants are stored in a structure as an integer index and a fractional value
Chris@0 761 hypothesis: sampling frequency at the central point equals the original
Chris@7 762 */
Chris@0 763
Chris@14 764 m_warpings.pos_int = allocate<int>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
Chris@14 765 m_warpings.pos_frac = allocate<double>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
Chris@0 766
Chris@7 767 // vector of phase values
Chris@14 768 double *phi = allocate<double>(m_warpings.nsamps_torig);
Chris@7 769 double aux;
Chris@0 770
Chris@7 771 // warped positions
Chris@14 772 double *pos1 = allocate<double>(m_warp_params.nsamps_twarp*m_warp_params.num_warps);
Chris@0 773
Chris@10 774 for (int i = 0; i < m_warp_params.num_warps; i++) {
Chris@0 775
Chris@7 776 // integration of relative frequency to obtain phase values
Chris@14 777 Utils::cumtrapz(t_orig, freq_relative + i*(m_warpings.nsamps_torig), m_warpings.nsamps_torig, phi);
Chris@0 778
Chris@7 779 // centering of phase values to force original frequency in the middle
Chris@7 780 aux = phi[m_warpings.nsamps_torig/2];
Chris@10 781 for (int j = 0; j < m_warpings.nsamps_torig; j++) {
Chris@7 782 phi[j] -= aux;
Chris@7 783 } //for
Chris@0 784
Chris@7 785 // interpolation of phase values to obtain warped positions
Chris@14 786 Utils::interp1(phi, t_orig, m_warpings.nsamps_torig, t_warp, pos1 + i*m_warp_params.nsamps_twarp, m_warp_params.nsamps_twarp);
Chris@0 787 }
Chris@0 788
Chris@0 789 // % previous sample index
Chris@0 790 // pos1_int = uint32(floor(pos1))';
Chris@0 791 // % integer corresponding to previous sample index in "c"
Chris@0 792 // warps.pos1_int = (pos1_int - uint32(1));
Chris@0 793 // % fractional value that defines the warped position
Chris@0 794 // warps.pos1_frac = (double(pos1)' - double(pos1_int));
Chris@0 795
Chris@10 796 for (int j = 0; j < m_warp_params.nsamps_twarp*m_warp_params.num_warps; j++) {
Chris@7 797 // previous sample index
Chris@7 798 pos1[j] = pos1[j]*m_warpings.fs_orig + m_warpings.nsamps_torig/2 + 1;
Chris@10 799 m_warpings.pos_int[j] = (int) pos1[j];
Chris@7 800 m_warpings.pos_frac[j] = pos1[j] - (double)(m_warpings.pos_int[j]);
Chris@7 801 } //for
Chris@0 802
Chris@14 803 deallocate(phi);
Chris@14 804 deallocate(pos1);
Chris@0 805 }
Chris@0 806
Chris@0 807 void
Chris@0 808 FChTransformF0gram::define_warps_linear_chirps(double * freq_relative, double * t_orig) {
Chris@0 809 /** define warps as relative frequency deviation from original frequency
Chris@7 810 t_orig : time vector
Chris@7 811 freq_relative : relative frequency deviations
Chris@7 812 */
Chris@0 813 if (m_warp_params.alpha_dist == 0) {
Chris@0 814
Chris@0 815 // linear alpha values spacing
Chris@14 816 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
Chris@0 817 // WARNING m_warp_params.num_warps must be odd
Chris@0 818 m_warpings.chirp_rates[0] = -m_warp_params.alpha_max;
Chris@0 819 double increment = (double) m_warp_params.alpha_max / ((m_warp_params.num_warps - 1) / 2);
Chris@0 820
Chris@10 821 for (int ind = 1; ind < m_warp_params.num_warps; ind++) {
Chris@0 822 m_warpings.chirp_rates[ind] = m_warpings.chirp_rates[ind - 1] + increment;
Chris@0 823 }
Chris@0 824 // force zero value
Chris@0 825 m_warpings.chirp_rates[(int) ((m_warp_params.num_warps - 1) / 2)] = 0;
Chris@0 826
Chris@0 827 } else {
Chris@0 828 // log alpha values spacing
Chris@14 829 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
Chris@0 830
Chris@0 831 // force zero value
Chris@0 832 int middle_point = (int) ((m_warp_params.num_warps - 1) / 2);
Chris@0 833 m_warpings.chirp_rates[middle_point] = 0;
Chris@0 834
Chris@0 835 double logMax = log10(m_warp_params.alpha_max + 1);
Chris@0 836 double increment = logMax / ((m_warp_params.num_warps - 1) / 2.0f);
Chris@0 837 double exponent = 0;
Chris@0 838
Chris@0 839 // fill positive values
Chris@0 840 int ind_log = middle_point;
Chris@10 841 for (int ind = 0; ind < (m_warp_params.num_warps + 1) / 2; ind++) {
Chris@0 842 m_warpings.chirp_rates[ind_log] = pow(10, exponent) - 1;
Chris@0 843 exponent += increment;
Chris@0 844 ind_log++;
Chris@0 845 }
Chris@0 846 // fill negative values
Chris@10 847 for (int ind = 0; ind < (m_warp_params.num_warps - 1) / 2; ind++) {
Chris@0 848 m_warpings.chirp_rates[ind] = -m_warpings.chirp_rates[m_warp_params.num_warps - 1 - ind];
Chris@0 849 }
Chris@0 850 }
Chris@0 851
Chris@0 852 // compute relative frequency deviation
Chris@14 853 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
Chris@14 854 for (int j = 0; j < m_warp_params.num_warps; j++) {
Chris@0 855 freq_relative[j * m_warpings.nsamps_torig + i] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
Chris@14 856 }
Chris@14 857 }
Chris@0 858 }
Chris@0 859
Chris@0 860 void
Chris@14 861 FChTransformF0gram::design_LPF()
Chris@14 862 {
Chris@14 863 double *lp_LPFWindow_aux = allocate<double>(m_blockSize/2+1);
Chris@14 864 mp_LPFWindow = allocate<double>(m_blockSize/2+1);
Chris@0 865
Chris@10 866 int i_max = (int) ((2.0*m_fmax/m_fs) * ( (double)m_blockSize / 2.0 + 1.0 ));
Chris@10 867 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@0 868 if (i >= i_max) {
Chris@0 869 lp_LPFWindow_aux[i] = 0.0;
Chris@0 870 } else {
Chris@0 871 lp_LPFWindow_aux[i] = 1.0;
Chris@0 872 }
Chris@0 873 }
Chris@14 874
Chris@14 875 LPF_time = allocate_and_zero<double>(m_warpings.nsamps_torig);
Chris@14 876 LPF_frequency = allocate_and_zero<double>(2 * (m_warpings.nsamps_torig/2 + 1));
Chris@14 877
Chris@14 878 fft_forward_LPF = new FFTReal(m_blockSize);
Chris@14 879 fft_inverse_LPF = new FFTReal(m_warpings.nsamps_torig);
Chris@0 880
Chris@10 881 int winWidth = 11;
Chris@14 882 double *lp_hanningWindow = allocate<double>(winWidth);
Chris@0 883 double accum=0;
Chris@10 884 for (int i = 0; i < winWidth; i++) {
Chris@0 885 lp_hanningWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)winWidth+1.0)));
Chris@0 886 accum+=lp_hanningWindow[i];
Chris@0 887
Chris@0 888 }
Chris@10 889 for (int i = 0; i < winWidth; i++) { //window normalization
Chris@0 890 lp_hanningWindow[i]=lp_hanningWindow[i]/accum;
Chris@0 891 }
Chris@10 892 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@0 893 //if (((i-(winWidth-1)/2)<0)||(i+(winWidth-1))/2>m_blockSize/2-1) {//consideramos winWidth impar, si la ventana sale del arreglo se rellena con el valor origianl
Chris@7 894 if ( (i > (i_max + (winWidth-1)/2)) || (i <= (i_max - (winWidth-1)/2)) ) {
Chris@0 895 mp_LPFWindow[i]=lp_LPFWindow_aux[i];
Chris@0 896 } else {
Chris@0 897 accum=0;
Chris@10 898 for (int j = -((winWidth-1)/2); j <= (winWidth-1)/2; j++) {
Chris@0 899 accum+=lp_LPFWindow_aux[i-j]*lp_hanningWindow[j+(winWidth-1)/2];
Chris@7 900 }
Chris@0 901 mp_LPFWindow[i]=accum;
Chris@0 902 }
Chris@0 903 }
Chris@0 904
Chris@14 905 deallocate(lp_LPFWindow_aux);
Chris@14 906 deallocate(lp_hanningWindow);
Chris@0 907 }
Chris@0 908
Chris@14 909 void FChTransformF0gram::apply_LPF()
Chris@14 910 {
Chris@14 911 fft_forward_LPF->forward(LPF_time, LPF_frequency);
Chris@14 912
Chris@10 913 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@14 914 LPF_frequency[i*2] *= mp_LPFWindow[i] * m_warpings.nsamps_torig;
Chris@14 915 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i] * m_warpings.nsamps_torig;
Chris@0 916 }
Chris@14 917
Chris@14 918 fft_inverse_LPF->inverse(LPF_frequency, LPF_time);
Chris@0 919
Chris@7 920 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame.
Chris@7 921 // nota: además de aplicar el LPF, esta función resamplea la señal original.
Chris@0 922 }
Chris@0 923
Chris@14 924 void FChTransformF0gram::clean_LPF()
Chris@14 925 {
Chris@14 926 delete fft_forward_LPF;
Chris@14 927 delete fft_inverse_LPF;
Chris@14 928 deallocate(LPF_time);
Chris@14 929 deallocate(LPF_frequency);
Chris@14 930 deallocate(mp_LPFWindow);
Chris@0 931 }
Chris@0 932
Chris@14 933 void FChTransformF0gram::reset()
Chris@14 934 {
Chris@0 935 }
Chris@0 936
Chris@0 937 FChTransformF0gram::FeatureSet
Chris@5 938 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) {
Chris@0 939
Chris@0 940 // // Do actual work!
Chris@0 941 //
Chris@0 942
Chris@7 943 /* PSEUDOCÓDIGO:
Chris@7 944 - Aplicar FFT al frame entero.
Chris@7 945 - Filtro pasabajos en frecuencia.
Chris@7 946 - FFT inversa al frame entero.
Chris@7 947 -----------------------------------------------------------------------------
Chris@7 948 - Para cada warp: *Si es un espectrograma direccional (un solo warp
Chris@7 949 => no es para cada warp sino para el elegido)
Chris@7 950 - Hacer la interpolación con interp1q.
Chris@7 951 - Aplicar la FFT al frame warpeado.
Chris@7 952 - (Opcional) GLogS.
Chris@7 953 - ...
Chris@7 954 */
Chris@0 955
Chris@0 956 //---------------------------------------------------------------------------
Chris@7 957 FeatureSet fs;
Chris@0 958
Chris@7 959 #ifdef DEBUG
Chris@7 960 printf("\n ----- DEBUG INFORMATION ----- \n");
Chris@7 961 printf(" m_fs = %f Hz.\n",m_fs);
Chris@7 962 printf(" fs_orig = %f Hz.\n",m_warpings.fs_orig);
Chris@7 963 printf(" fs_warp = %f Hz.\n",m_warpings.fs_warp);
Chris@7 964 printf(" m_nfft = %d.\n",m_nfft);
Chris@7 965 printf(" m_blockSize = %d.\n",m_blockSize);
Chris@7 966 printf(" m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig);
Chris@7 967 printf(" m_warp_params.num_warps = %d.\n",m_warp_params.num_warps);
Chris@7 968 printf(" m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count);
Chris@7 969 #endif
Chris@0 970
Chris@10 971 for (int i = 0; i < m_blockSize; i++) {
Chris@0 972 LPF_time[i] = (double)(inputBuffers[0][i]) * m_timeWindow[i];
Chris@0 973 }
Chris@0 974
Chris@0 975 // #ifdef DEBUG
Chris@0 976 // printf(" HASTA ACÁ ANDA!!!\n");
Chris@0 977 // cout << flush;
Chris@0 978 // #endif
Chris@0 979
Chris@7 980 apply_LPF();
Chris@7 981 // Señal filtrada queda en LPF_time
Chris@0 982
Chris@7 983 Feature feature;
Chris@0 984 feature.hasTimestamp = false;
Chris@0 985
Chris@0 986 // ----------------------------------------------------------------------------------------------
Chris@0 987 // Hanning window & FFT for all warp directions
Chris@0 988
Chris@7 989 double max_glogs = -DBL_MAX;
Chris@10 990 int ind_max_glogs = 0;
Chris@0 991
Chris@10 992 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
Chris@7 993 // Interpolate
Chris@14 994 Utils::interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp);
Chris@0 995
Chris@7 996 // Apply window
Chris@10 997 for (int i = 0; i < m_warp_params.nsamps_twarp; i++) {
Chris@7 998 x_warping[i] *= mp_HanningWindow[i];
Chris@7 999 }
Chris@0 1000
Chris@7 1001 // Transform
Chris@14 1002 fft_xwarping->forward(x_warping, m_auxFanChirpTransform);
Chris@0 1003
Chris@7 1004 // Copy result
Chris@7 1005 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1);
Chris@10 1006 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
Chris@14 1007 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]));
Chris@7 1008 }
Chris@0 1009
Chris@0 1010 // -----------------------------------------------------------------------------------------
Chris@0 1011 // GLogS
Chris@14 1012 Utils::interp1q(aux_abs_fcht, m_glogs_posint, m_glogs_posfrac, m_glogs_interp, m_glogs_harmonic_count);
Chris@10 1013 int glogs_ind = 0;
Chris@10 1014 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@7 1015 double glogs_accum = 0;
Chris@10 1016 for (int j = 1; j <= m_glogs_n[i]; j++) {
Chris@7 1017 glogs_accum += m_glogs_interp[glogs_ind++];
Chris@7 1018 }
Chris@7 1019 m_glogs[i + i_warp*m_glogs_num_f0s] = glogs_accum/(double)m_glogs_n[i];
Chris@7 1020 }
Chris@0 1021
Chris@0 1022 // Sub/super harmonic correction
Chris@14 1023 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_third_harmonic_posint, m_glogs_third_harmonic_posfrac, m_glogs_third_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14 1024 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_fifth_harmonic_posint, m_glogs_fifth_harmonic_posfrac, m_glogs_fifth_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 1025 for (int i = m_glogs_num_f0s-1; i >= m_glogs_init_f0s; i--) {
Chris@7 1026 m_glogs[i + i_warp*m_glogs_num_f0s] -= MAX(MAX(m_glogs[i-m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s],m_glogs_third_harmonic[i-m_glogs_init_f0s]),m_glogs_fifth_harmonic[i-m_glogs_init_f0s]);
Chris@7 1027 //m_glogs[i] -= MAX(m_glogs[i-m_f0_params.num_f0s_per_oct],m_glogs_third_harmonic[i-m_glogs_init_f0s]);
Chris@7 1028 }
Chris@10 1029 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
Chris@7 1030 m_glogs[i + i_warp*m_glogs_num_f0s] -= 0.3*m_glogs[i+m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s];
Chris@7 1031 // Median, sigma $ weights correction
Chris@7 1032 m_glogs[i + i_warp*m_glogs_num_f0s] = (m_glogs[i + i_warp*m_glogs_num_f0s]-m_glogs_median_correction[i-m_glogs_init_f0s])*m_glogs_sigma_correction[i-m_glogs_init_f0s]*m_glogs_f0_preference_weights[i-m_glogs_init_f0s];
Chris@7 1033 }
Chris@0 1034
Chris@7 1035 // Look for maximum value to determine best direction
Chris@10 1036 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
Chris@7 1037 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
Chris@7 1038 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
Chris@7 1039 ind_max_glogs = i_warp;
Chris@7 1040 }
Chris@7 1041 }
Chris@7 1042 }
Chris@0 1043
Chris@0 1044 // ----------------------------------------------------------------------------------------------
Chris@0 1045
Chris@10 1046 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) {
Chris@7 1047 switch (m_f0gram_mode) {
Chris@7 1048 case 1:
Chris@7 1049 max_glogs = -DBL_MAX;
Chris@10 1050 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
Chris@7 1051 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
Chris@7 1052 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
Chris@7 1053 ind_max_glogs = i_warp;
Chris@7 1054 }
Chris@7 1055 }
Chris@7 1056 feature.values.push_back((float)max_glogs);
Chris@7 1057 break;
Chris@7 1058 case 0:
Chris@10 1059 feature.values.push_back((float)m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s]);
Chris@7 1060 break;
Chris@7 1061 }
Chris@7 1062 }
Chris@0 1063
Chris@0 1064 // ----------------------------------------------------------------------------------------------
Chris@0 1065
Chris@7 1066 fs[0].push_back(feature);
Chris@0 1067
Chris@7 1068 #ifdef DEBUG
Chris@7 1069 printf(" ----------------------------- \n");
Chris@7 1070 #endif
Chris@0 1071
Chris@7 1072 return fs;
Chris@0 1073 //---------------------------------------------------------------------------
Chris@0 1074
Chris@0 1075 }
Chris@0 1076
Chris@0 1077 FChTransformF0gram::FeatureSet
Chris@0 1078 FChTransformF0gram::getRemainingFeatures() {
Chris@0 1079 return FeatureSet();
Chris@0 1080 }
Chris@0 1081
Chris@0 1082 void
Chris@0 1083 FChTransformF0gram::design_time_window() {
Chris@0 1084
Chris@10 1085 int transitionWidth = (int)m_blockSize/128 + 1;;
Chris@14 1086 m_timeWindow = allocate<double>(m_blockSize);
Chris@14 1087 double *lp_transitionWindow = allocate<double>(transitionWidth);
Chris@0 1088
Chris@7 1089 //memset(m_timeWindow, 1.0, m_blockSize);
Chris@10 1090 for (int i = 0; i < m_blockSize; i++) {
Chris@7 1091 m_timeWindow[i] = 1.0;
Chris@7 1092 }
Chris@0 1093
Chris@10 1094 for (int i = 0; i < transitionWidth; i++) {
Chris@0 1095 lp_transitionWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)transitionWidth+1.0)));
Chris@0 1096 }
Chris@0 1097
Chris@10 1098 for (int i = 0; i < transitionWidth/2; i++) {
Chris@7 1099 m_timeWindow[i] = lp_transitionWindow[i];
Chris@7 1100 m_timeWindow[m_blockSize-1-i] = lp_transitionWindow[transitionWidth-1-i];
Chris@7 1101 }
Chris@0 1102
Chris@7 1103 #ifdef DEBUG
Chris@7 1104 for (int i = 0; i < m_blockSize; i++) {
Chris@7 1105 if ((i<transitionWidth)) {
Chris@7 1106 printf(" m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]);
Chris@7 1107 }
Chris@7 1108 }
Chris@7 1109 #endif
Chris@0 1110
Chris@14 1111 deallocate(lp_transitionWindow);
Chris@0 1112 }
Chris@0 1113