annotate FChTransformF0gram.cpp @ 10:af59167b3d35 perf

size_t -> int throughout: permits better optimisation for tight for-loops, making the whole thing about 10% faster
author Chris Cannam
date Tue, 02 Oct 2018 13:21:15 +0100
parents 54dd64b6cfc0
children fc8f351d2cd6
rev   line source
Chris@0 1 /*
Chris@0 2 copyright (C) 2011 I. Irigaray, M. Rocamora
Chris@0 3
Chris@0 4 This program is free software: you can redistribute it and/or modify
Chris@0 5 it under the terms of the GNU General Public License as published by
Chris@0 6 the Free Software Foundation, either version 3 of the License, or
Chris@0 7 (at your option) any later version.
Chris@0 8
Chris@0 9 This program is distributed in the hope that it will be useful,
Chris@0 10 but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@0 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@0 12 GNU General Public License for more details.
Chris@0 13
Chris@0 14 You should have received a copy of the GNU General Public License
Chris@0 15 along with this program. If not, see <http://www.gnu.org/licenses/>.
Chris@7 16 */
Chris@0 17
Chris@0 18 #include "FChTransformF0gram.h"
Chris@0 19 #include "FChTransformUtils.h"
Chris@0 20 #include <math.h>
Chris@0 21 #include <float.h>
Chris@0 22 //#define DEBUG
Chris@7 23
Chris@0 24 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
Chris@0 25
Chris@0 26 FChTransformF0gram::FChTransformF0gram(float inputSampleRate) :
Chris@7 27 Plugin(inputSampleRate),
Chris@7 28 m_currentProgram("default"),
Chris@7 29 m_stepSize(0), // We are using 0 for step and block size to indicate "not yet set".
Chris@7 30 m_blockSize(0) {
Chris@0 31
Chris@0 32 m_fs = inputSampleRate;
Chris@0 33 // max frequency of interest (Hz)
Chris@0 34 m_fmax = 10000.f;
Chris@0 35 // warping parameters
Chris@0 36 m_warp_params.nsamps_twarp = 2048;
Chris@0 37 //m_warp_params.nsamps_twarp = 8;
Chris@0 38 m_warp_params.alpha_max = 4;
Chris@0 39 m_warp_params.num_warps = 21;
Chris@0 40 //m_warp_params.num_warps = 11;
Chris@0 41 m_warp_params.fact_over_samp = 2;
Chris@0 42 m_warp_params.alpha_dist = 0;
Chris@0 43 // f0 parameters
Chris@0 44 m_f0_params.f0min = 80.0;
Chris@0 45 m_f0_params.num_octs = 4;
Chris@0 46 m_f0_params.num_f0s_per_oct = 192;
Chris@0 47 m_f0_params.num_f0_hyps = 5;
Chris@0 48 m_f0_params.prefer = true;
Chris@0 49 m_f0_params.prefer_mean = 60;
Chris@0 50 m_f0_params.prefer_stdev = 18;
Chris@0 51 // glogs parameters
Chris@0 52 m_glogs_params.HP_logS = true;
Chris@0 53 m_glogs_params.att_subharms = 1;
Chris@7 54 // display parameters
Chris@7 55 m_f0gram_mode = true;
Chris@0 56
Chris@0 57 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
Chris@0 58 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
Chris@0 59 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
Chris@0 60
Chris@0 61 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
Chris@0 62 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
Chris@0 63 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
Chris@0 64
Chris@0 65 // number of fft points (controls zero-padding)
Chris@0 66 m_nfft = m_warp_params.nsamps_twarp;
Chris@0 67 // hop in samples
Chris@0 68 m_hop = m_warp_params.fact_over_samp * 256;
Chris@0 69
Chris@0 70 m_num_f0s = 0;
Chris@0 71
Chris@0 72 }
Chris@0 73
Chris@0 74 FChTransformF0gram::~FChTransformF0gram() {
Chris@0 75 // remeber to delete everything that deserves to
Chris@0 76 }
Chris@0 77
Chris@0 78 string
Chris@0 79 FChTransformF0gram::getIdentifier() const {
Chris@0 80 return "fchtransformf0gram";
Chris@0 81 }
Chris@0 82
Chris@0 83 string
Chris@0 84 FChTransformF0gram::getName() const {
Chris@0 85 return "Fan Chirp Transform F0gram";
Chris@0 86 }
Chris@0 87
Chris@0 88 string
Chris@0 89 FChTransformF0gram::getDescription() const {
Chris@0 90 // Return something helpful here!
Chris@0 91 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010.";
Chris@0 92 }
Chris@0 93
Chris@0 94 string
Chris@0 95 FChTransformF0gram::getMaker() const {
Chris@0 96 // Your name here
Chris@0 97 return "Audio Processing Group \n Universidad de la Republica";
Chris@0 98 }
Chris@0 99
Chris@0 100 int
Chris@0 101 FChTransformF0gram::getPluginVersion() const {
Chris@0 102 // Increment this each time you release a version that behaves
Chris@0 103 // differently from the previous one
Chris@0 104 //
Chris@0 105 // 0 - initial version from scratch
Chris@0 106 return 0;
Chris@0 107 }
Chris@0 108
Chris@0 109 string
Chris@0 110 FChTransformF0gram::getCopyright() const {
Chris@0 111 // This function is not ideally named. It does not necessarily
Chris@0 112 // need to say who made the plugin -- getMaker does that -- but it
Chris@0 113 // should indicate the terms under which it is distributed. For
Chris@0 114 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@0 115 return "copyright (C) 2011 GPL - Audio Processing Group, UdelaR";
Chris@0 116 }
Chris@0 117
Chris@0 118 FChTransformF0gram::InputDomain
Chris@0 119 FChTransformF0gram::getInputDomain() const {
Chris@0 120 return TimeDomain;
Chris@0 121 }
Chris@0 122
Chris@0 123 size_t FChTransformF0gram::getPreferredBlockSize() const {
Chris@0 124 return 8192; // 0 means "I can handle any block size"
Chris@0 125 }
Chris@0 126
Chris@0 127 size_t
Chris@0 128 FChTransformF0gram::getPreferredStepSize() const {
Chris@0 129 return 256; // 0 means "anything sensible"; in practice this
Chris@0 130 // means the same as the block size for TimeDomain
Chris@0 131 // plugins, or half of it for FrequencyDomain plugins
Chris@0 132 }
Chris@0 133
Chris@0 134 size_t
Chris@0 135 FChTransformF0gram::getMinChannelCount() const {
Chris@0 136 return 1;
Chris@0 137 }
Chris@0 138
Chris@0 139 size_t
Chris@0 140 FChTransformF0gram::getMaxChannelCount() const {
Chris@0 141 return 1;
Chris@0 142 }
Chris@0 143
Chris@0 144 FChTransformF0gram::ParameterList
Chris@0 145 FChTransformF0gram::getParameterDescriptors() const {
Chris@0 146 ParameterList list;
Chris@0 147
Chris@0 148 // If the plugin has no adjustable parameters, return an empty
Chris@0 149 // list here (and there's no need to provide implementations of
Chris@0 150 // getParameter and setParameter in that case either).
Chris@0 151
Chris@0 152 // Note that it is your responsibility to make sure the parameters
Chris@0 153 // start off having their default values (e.g. in the constructor
Chris@0 154 // above). The host needs to know the default value so it can do
Chris@0 155 // things like provide a "reset to default" function, but it will
Chris@0 156 // not explicitly set your parameters to their defaults for you if
Chris@0 157 // they have not changed in the mean time.
Chris@0 158
Chris@0 159 // ============= WARPING PARAMETERS =============
Chris@0 160
Chris@0 161 ParameterDescriptor fmax;
Chris@0 162 fmax.identifier = "fmax";
Chris@0 163 fmax.name = "Maximum frequency";
Chris@0 164 fmax.description = "Maximum frequency of interest for the analysis.";
Chris@0 165 fmax.unit = "Hz";
Chris@0 166 fmax.minValue = 2000;
Chris@0 167 fmax.maxValue = 22050;
Chris@0 168 fmax.defaultValue = 10000;
Chris@0 169 fmax.isQuantized = true;
Chris@0 170 fmax.quantizeStep = 1.0;
Chris@0 171 list.push_back(fmax);
Chris@0 172
Chris@0 173 ParameterDescriptor nsamp;
Chris@0 174 nsamp.identifier = "nsamp";
Chris@0 175 nsamp.name = "Number of samples";
Chris@0 176 nsamp.description = "Number of samples of the time warped frame";
Chris@0 177 nsamp.unit = "samples";
Chris@0 178 nsamp.minValue = 128;
Chris@0 179 nsamp.maxValue = 4096;
Chris@0 180 nsamp.defaultValue = 2048;
Chris@0 181 nsamp.isQuantized = true;
Chris@0 182 nsamp.quantizeStep = 1.0;
Chris@0 183 list.push_back(nsamp);
Chris@0 184
Chris@0 185 ParameterDescriptor nfft;
Chris@0 186 nfft.identifier = "nfft";
Chris@0 187 nfft.name = "FFT number of points";
Chris@0 188 nfft.description = "Number of FFT points (controls zero-padding)";
Chris@0 189 nfft.unit = "samples";
Chris@0 190 nfft.minValue = 0;
Chris@0 191 nfft.maxValue = 4;
Chris@0 192 nfft.defaultValue = 3;
Chris@0 193 nfft.isQuantized = true;
Chris@0 194 nfft.quantizeStep = 1.0;
Chris@0 195 nfft.valueNames.push_back("256");
Chris@0 196 nfft.valueNames.push_back("512");
Chris@0 197 nfft.valueNames.push_back("1024");
Chris@0 198 nfft.valueNames.push_back("2048");
Chris@0 199 nfft.valueNames.push_back("4096");
Chris@0 200 nfft.valueNames.push_back("8192");
Chris@0 201 list.push_back(nfft);
Chris@0 202
Chris@0 203 ParameterDescriptor alpha_max;
Chris@0 204 alpha_max.identifier = "alpha_max";
Chris@0 205 alpha_max.name = "Maximum alpha value";
Chris@0 206 alpha_max.description = "Maximum value for the alpha parameter of the transform.";
Chris@0 207 alpha_max.unit = "Hz/s";
Chris@0 208 alpha_max.minValue = -10;
Chris@0 209 alpha_max.maxValue = 10;
Chris@0 210 alpha_max.defaultValue = 5;
Chris@0 211 alpha_max.isQuantized = true;
Chris@0 212 alpha_max.quantizeStep = 1.0;
Chris@0 213 list.push_back(alpha_max);
Chris@0 214
Chris@0 215 ParameterDescriptor num_warps;
Chris@0 216 num_warps.identifier = "num_warps";
Chris@0 217 num_warps.name = "Number of warpings";
Chris@0 218 num_warps.description = "Number of different warpings in the specified range (must be odd).";
Chris@0 219 num_warps.unit = "";
Chris@0 220 num_warps.minValue = 1;
Chris@0 221 num_warps.maxValue = 101;
Chris@0 222 num_warps.defaultValue = 21;
Chris@0 223 num_warps.isQuantized = true;
Chris@0 224 num_warps.quantizeStep = 2.0;
Chris@0 225 list.push_back(num_warps);
Chris@0 226
Chris@0 227 ParameterDescriptor alpha_dist;
Chris@0 228 alpha_dist.identifier = "alpha_dist";
Chris@0 229 alpha_dist.name = "alpha distribution";
Chris@0 230 alpha_dist.description = "Type of distribution of alpha values (linear or log).";
Chris@0 231 alpha_dist.unit = "";
Chris@0 232 alpha_dist.minValue = 0;
Chris@0 233 alpha_dist.maxValue = 1;
Chris@0 234 alpha_dist.defaultValue = 1;
Chris@0 235 alpha_dist.isQuantized = true;
Chris@0 236 alpha_dist.quantizeStep = 1.0;
Chris@0 237 // lin (0), log (1)
Chris@0 238 alpha_dist.valueNames.push_back("lin");
Chris@0 239 alpha_dist.valueNames.push_back("log");
Chris@0 240 list.push_back(alpha_dist);
Chris@0 241
Chris@0 242 // ============= F0-GRAM PARAMETERS =============
Chris@0 243
Chris@0 244 ParameterDescriptor f0min;
Chris@0 245 f0min.identifier = "f0min";
Chris@0 246 f0min.name = "min f0";
Chris@0 247 f0min.description = "Minimum fundamental frequency (f0) value.";
Chris@0 248 f0min.unit = "Hz";
Chris@0 249 f0min.minValue = 1;
Chris@0 250 f0min.maxValue = 500;
Chris@0 251 f0min.defaultValue = 80;
Chris@0 252 f0min.isQuantized = true;
Chris@0 253 f0min.quantizeStep = 1.0;
Chris@0 254 list.push_back(f0min);
Chris@0 255
Chris@0 256 ParameterDescriptor num_octs;
Chris@0 257 num_octs.identifier = "num_octs";
Chris@0 258 num_octs.name = "number of octaves";
Chris@0 259 num_octs.description = "Number of octaves for F0gram computation.";
Chris@0 260 num_octs.unit = "";
Chris@0 261 num_octs.minValue = 1;
Chris@0 262 num_octs.maxValue = 10;
Chris@0 263 num_octs.defaultValue = 4;
Chris@0 264 num_octs.isQuantized = true;
Chris@0 265 num_octs.quantizeStep = 1.0;
Chris@0 266 list.push_back(num_octs);
Chris@0 267
Chris@0 268 ParameterDescriptor num_f0_hyps;
Chris@0 269 num_f0_hyps.identifier = "num_f0_hyps";
Chris@0 270 num_f0_hyps.name = "number of f0 hypotesis";
Chris@0 271 num_f0_hyps.description = "Number of f0 hypotesis to extract.";
Chris@0 272 num_f0_hyps.unit = "";
Chris@0 273 num_f0_hyps.minValue = 1;
Chris@0 274 num_f0_hyps.maxValue = 100;
Chris@0 275 num_f0_hyps.defaultValue = 10;
Chris@0 276 num_f0_hyps.isQuantized = true;
Chris@0 277 num_f0_hyps.quantizeStep = 1.0;
Chris@0 278 list.push_back(num_f0_hyps);
Chris@0 279
Chris@0 280 ParameterDescriptor f0s_per_oct;
Chris@0 281 f0s_per_oct.identifier = "f0s_per_oct";
Chris@0 282 f0s_per_oct.name = "f0 values per octave";
Chris@0 283 f0s_per_oct.description = "Number of f0 values per octave.";
Chris@0 284 f0s_per_oct.unit = "";
Chris@0 285 f0s_per_oct.minValue = 12;
Chris@0 286 f0s_per_oct.maxValue = 768;
Chris@0 287 f0s_per_oct.defaultValue = 192;
Chris@0 288 f0s_per_oct.isQuantized = true;
Chris@0 289 f0s_per_oct.quantizeStep = 1.0;
Chris@0 290 list.push_back(f0s_per_oct);
Chris@0 291
Chris@0 292 ParameterDescriptor f0_prefer_fun;
Chris@0 293 f0_prefer_fun.identifier = "f0_prefer_fun";
Chris@0 294 f0_prefer_fun.name = "f0 preference function";
Chris@0 295 f0_prefer_fun.description = "Whether to use a f0 weighting function.";
Chris@0 296 f0_prefer_fun.unit = "";
Chris@0 297 f0_prefer_fun.minValue = 0;
Chris@0 298 f0_prefer_fun.maxValue = 1;
Chris@0 299 f0_prefer_fun.defaultValue = 1;
Chris@0 300 f0_prefer_fun.isQuantized = true;
Chris@0 301 f0_prefer_fun.quantizeStep = 1.0;
Chris@0 302 list.push_back(f0_prefer_fun);
Chris@0 303
Chris@0 304 ParameterDescriptor f0_prefer_mean;
Chris@0 305 f0_prefer_mean.identifier = "f0_prefer_mean";
Chris@0 306 f0_prefer_mean.name = "mean f0 preference function";
Chris@0 307 f0_prefer_mean.description = "Mean value for f0 weighting function (MIDI number).";
Chris@0 308 f0_prefer_mean.unit = "";
Chris@0 309 f0_prefer_mean.minValue = 1;
Chris@0 310 f0_prefer_mean.maxValue = 127;
Chris@0 311 f0_prefer_mean.defaultValue = 60;
Chris@0 312 f0_prefer_mean.isQuantized = true;
Chris@0 313 f0_prefer_mean.quantizeStep = 1.0;
Chris@0 314 list.push_back(f0_prefer_mean);
Chris@0 315
Chris@0 316 ParameterDescriptor f0_prefer_stdev;
Chris@0 317 f0_prefer_stdev.identifier = "f0_prefer_stdev";
Chris@0 318 f0_prefer_stdev.name = "stdev of f0 preference function";
Chris@0 319 f0_prefer_stdev.description = "Stdev for f0 weighting function (MIDI number).";
Chris@0 320 f0_prefer_stdev.unit = "";
Chris@0 321 f0_prefer_stdev.minValue = 1;
Chris@0 322 f0_prefer_stdev.maxValue = 127;
Chris@0 323 f0_prefer_stdev.defaultValue = 18;
Chris@0 324 f0_prefer_stdev.isQuantized = true;
Chris@0 325 f0_prefer_stdev.quantizeStep = 1.0;
Chris@0 326 list.push_back(f0_prefer_stdev);
Chris@0 327
Chris@0 328 ParameterDescriptor f0gram_mode;
Chris@0 329 f0gram_mode.identifier = "f0gram_mode";
Chris@0 330 f0gram_mode.name = "display mode of f0gram";
Chris@0 331 f0gram_mode.description = "Display all bins of the best direction, or the best bin for each direction.";
Chris@0 332 f0gram_mode.unit = "";
Chris@0 333 f0gram_mode.minValue = 0;
Chris@0 334 f0gram_mode.maxValue = 1;
Chris@0 335 f0gram_mode.defaultValue = 1;
Chris@0 336 f0gram_mode.isQuantized = true;
Chris@0 337 f0gram_mode.quantizeStep = 1.0;
Chris@0 338 list.push_back(f0gram_mode);
Chris@0 339
Chris@0 340 return list;
Chris@0 341 }
Chris@0 342
Chris@0 343 float
Chris@0 344 FChTransformF0gram::getParameter(string identifier) const {
Chris@0 345
Chris@0 346 if (identifier == "fmax") {
Chris@0 347 return m_fmax;
Chris@0 348 } else if (identifier == "nsamp") {
Chris@0 349 return m_warp_params.nsamps_twarp;
Chris@0 350 } else if (identifier == "alpha_max") {
Chris@0 351 return m_warp_params.alpha_max;
Chris@0 352 } else if (identifier == "num_warps") {
Chris@0 353 return m_warp_params.num_warps;
Chris@0 354 } else if (identifier == "alpha_dist") {
Chris@0 355 return m_warp_params.alpha_dist;
Chris@0 356 } else if (identifier == "nfft") {
Chris@0 357 return m_nfft;
Chris@0 358 } else if (identifier == "f0min") {
Chris@0 359 return m_f0_params.f0min;
Chris@0 360 } else if (identifier == "num_octs") {
Chris@0 361 return m_f0_params.num_octs;
Chris@0 362 } else if (identifier == "f0s_per_oct") {
Chris@0 363 return m_f0_params.num_f0s_per_oct;
Chris@0 364 } else if (identifier == "num_f0_hyps") {
Chris@0 365 return m_f0_params.num_f0_hyps;
Chris@0 366 } else if (identifier == "f0_prefer_fun") {
Chris@0 367 return m_f0_params.prefer;
Chris@0 368 } else if (identifier == "f0_prefer_mean") {
Chris@0 369 return m_f0_params.prefer_mean;
Chris@0 370 } else if (identifier == "f0_prefer_stdev") {
Chris@0 371 return m_f0_params.prefer_stdev;
Chris@7 372 } else if (identifier == "f0gram_mode") {
Chris@0 373 return m_f0gram_mode;
Chris@0 374 } else {
Chris@0 375 return 0.f;
Chris@0 376 }
Chris@0 377
Chris@0 378 }
Chris@0 379
Chris@0 380 void FChTransformF0gram::setParameter(string identifier, float value) {
Chris@0 381
Chris@0 382 if (identifier == "fmax") {
Chris@0 383 m_fmax = value;
Chris@0 384 } else if (identifier == "nsamp") {
Chris@0 385 m_warp_params.nsamps_twarp = value;
Chris@0 386 } else if (identifier == "alpha_max") {
Chris@0 387 m_warp_params.alpha_max = value;
Chris@0 388 } else if (identifier == "num_warps") {
Chris@0 389 m_warp_params.num_warps = value;
Chris@0 390 } else if (identifier == "alpha_dist") {
Chris@0 391 m_warp_params.alpha_dist = value;
Chris@0 392 } else if (identifier == "nfft") {
Chris@0 393 m_nfft = value;
Chris@0 394 } else if (identifier == "f0min") {
Chris@0 395 m_f0_params.f0min = value;
Chris@0 396 } else if (identifier == "num_octs") {
Chris@0 397 m_f0_params.num_octs = value;
Chris@0 398 } else if (identifier == "f0s_per_oct") {
Chris@0 399 m_f0_params.num_f0s_per_oct = value;
Chris@0 400 } else if (identifier == "num_f0_hyps") {
Chris@0 401 m_f0_params.num_f0_hyps = value;
Chris@0 402 } else if (identifier == "f0_prefer_fun") {
Chris@0 403 m_f0_params.prefer = value;
Chris@0 404 } else if (identifier == "f0_prefer_mean") {
Chris@0 405 m_f0_params.prefer_mean = value;
Chris@0 406 } else if (identifier == "f0_prefer_stdev") {
Chris@0 407 m_f0_params.prefer_stdev = value;
Chris@0 408 } else if (identifier == "f0gram_mode") {
Chris@0 409 m_f0gram_mode = value;
Chris@0 410 }
Chris@0 411
Chris@0 412 }
Chris@0 413
Chris@0 414 FChTransformF0gram::ProgramList
Chris@0 415 FChTransformF0gram::getPrograms() const {
Chris@0 416 ProgramList list;
Chris@0 417
Chris@0 418 list.push_back("default");
Chris@0 419
Chris@0 420 return list;
Chris@0 421 }
Chris@0 422
Chris@0 423 string
Chris@0 424 FChTransformF0gram::getCurrentProgram() const {
Chris@0 425 return m_currentProgram;
Chris@0 426 }
Chris@0 427
Chris@0 428 void
Chris@0 429 FChTransformF0gram::selectProgram(string name) {
Chris@0 430
Chris@0 431 m_currentProgram = name;
Chris@0 432
Chris@0 433 if (name == "default") {
Chris@0 434 m_fmax = 10000.f;
Chris@0 435
Chris@0 436 m_warp_params.nsamps_twarp = 2048;
Chris@0 437 m_warp_params.alpha_max = 4;
Chris@0 438 m_warp_params.num_warps = 21;
Chris@0 439 m_warp_params.fact_over_samp = 2;
Chris@0 440 m_warp_params.alpha_dist = 0;
Chris@0 441
Chris@0 442 m_f0_params.f0min = 80.0;
Chris@0 443 m_f0_params.num_octs = 4;
Chris@0 444 m_f0_params.num_f0s_per_oct = 192;
Chris@0 445 m_f0_params.num_f0_hyps = 5;
Chris@0 446 m_f0_params.prefer = true;
Chris@0 447 m_f0_params.prefer_mean = 60;
Chris@0 448 m_f0_params.prefer_stdev = 18;
Chris@0 449
Chris@0 450 m_glogs_params.HP_logS = true;
Chris@0 451 m_glogs_params.att_subharms = 1;
Chris@0 452
Chris@0 453 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
Chris@0 454 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
Chris@0 455 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
Chris@0 456
Chris@0 457 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
Chris@0 458 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
Chris@0 459 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
Chris@0 460
Chris@0 461 m_nfft = m_warp_params.nsamps_twarp;
Chris@0 462 m_hop = m_warp_params.fact_over_samp * 256;
Chris@0 463
Chris@0 464 m_num_f0s = 0;
Chris@0 465
Chris@7 466 m_f0gram_mode = 1;
Chris@0 467
Chris@0 468 }
Chris@0 469 }
Chris@0 470
Chris@0 471 FChTransformF0gram::OutputList
Chris@0 472 FChTransformF0gram::getOutputDescriptors() const {
Chris@0 473
Chris@0 474 OutputList list;
Chris@0 475
Chris@0 476 // See OutputDescriptor documentation for the possibilities here.
Chris@0 477 // Every plugin must have at least one output.
Chris@0 478
Chris@0 479 /* f0 values of F0gram grid as string values */
Chris@0 480 vector<string> f0values;
Chris@10 481 int ind = 0;
Chris@0 482 char f0String[10];
Chris@0 483 while (ind < m_num_f0s) {
Chris@0 484 sprintf(f0String, "%4.2f", m_f0s[ind]);
Chris@0 485 f0values.push_back(f0String);
Chris@0 486 ind++;
Chris@0 487 }
Chris@0 488
Chris@0 489 /* The F0gram */
Chris@0 490 OutputDescriptor d;
Chris@0 491 d.identifier = "f0gram";
Chris@0 492 d.name = "F0gram: salience of f0s";
Chris@0 493 d.description = "This representation show the salience of the different f0s in the signal.";
Chris@0 494 d.unit = "Hertz";
Chris@0 495 d.hasFixedBinCount = true;
Chris@0 496 //d.binCount = m_num_f0s;
Chris@7 497 //d.binCount = m_blockSize/2+1;
Chris@7 498 //d.binCount = m_warp_params.nsamps_twarp/2+1;
Chris@7 499 //d.binCount = m_warpings.nsamps_torig;
Chris@7 500 d.binCount = m_f0_params.num_octs*m_f0_params.num_f0s_per_oct;
Chris@0 501 d.binNames = f0values;
Chris@0 502 d.hasKnownExtents = false;
Chris@0 503 d.isQuantized = false;
Chris@0 504 d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@0 505 d.hasDuration = false;
Chris@0 506 list.push_back(d);
Chris@0 507
Chris@0 508 return list;
Chris@0 509 }
Chris@0 510
Chris@0 511 bool
Chris@0 512 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) {
Chris@0 513 if (channels < getMinChannelCount() ||
Chris@7 514 channels > getMaxChannelCount()) return false;
Chris@0 515
Chris@0 516 // set blockSize and stepSize (but changed below)
Chris@0 517 m_blockSize = blockSize;
Chris@0 518 m_stepSize = stepSize;
Chris@0 519
Chris@0 520 // WARNING !!!
Chris@0 521 // these values in fact are determined by the sampling frequency m_fs
Chris@0 522 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz
Chris@0 523 //m_blockSize = 4 * m_warp_params.nsamps_twarp;
Chris@0 524 m_stepSize = floor(m_hop / m_warp_params.fact_over_samp);
Chris@0 525
Chris@0 526 /* initialise m_warp_params */
Chris@0 527 // FChTF0gram:warping_design m_warpings = new warping_design;
Chris@0 528 /* initialise m_f0_params */
Chris@0 529
Chris@0 530 /* initialise m_glogs_params */
Chris@7 531 design_GLogS();
Chris@0 532
Chris@0 533 /* design of FChT */
Chris@0 534 // design_fcht(m_warps, m_accums, m_f0s)
Chris@0 535 design_FChT();
Chris@0 536
Chris@7 537 design_FFT();
Chris@0 538
Chris@7 539 design_LPF();
Chris@0 540
Chris@7 541 design_time_window();
Chris@0 542
Chris@7 543 // Create Hanning window for warped signals
Chris@7 544 mp_HanningWindow = new double[m_warp_params.nsamps_twarp];
Chris@7 545 bool normalize = false;
Chris@7 546 hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize);
Chris@0 547
Chris@0 548 return true;
Chris@0 549 }
Chris@0 550
Chris@0 551 void
Chris@0 552 FChTransformF0gram::design_GLogS() {
Chris@0 553
Chris@7 554 // total number & initial quantity of f0s
Chris@10 555 m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1;
Chris@7 556 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s;
Chris@0 557
Chris@7 558 // Initialize arrays
Chris@7 559 m_glogs_f0 = new double[m_glogs_num_f0s];
Chris@7 560 m_glogs = new double[m_glogs_num_f0s*m_warp_params.num_warps];
Chris@10 561 m_glogs_n = new int[m_glogs_num_f0s];
Chris@10 562 m_glogs_index = new int[m_glogs_num_f0s];
Chris@0 563
Chris@7 564 // Compute f0 values
Chris@7 565 m_glogs_harmonic_count = 0;
Chris@7 566 double factor = (double)(m_warp_params.nsamps_twarp/2)/(double)(m_warp_params.nsamps_twarp/2+1);
Chris@10 567 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@7 568 m_glogs_f0[i] = (m_f0_params.f0min/5.0)*pow(2.0,(double)i/(double)m_f0_params.num_f0s_per_oct);
Chris@7 569 // for every f0 compute number of partials less or equal than m_fmax.
Chris@7 570 m_glogs_n[i] = m_fmax*factor/m_glogs_f0[i];
Chris@7 571 m_glogs_index[i] = m_glogs_harmonic_count;
Chris@7 572 m_glogs_harmonic_count += m_glogs_n[i];
Chris@7 573 }
Chris@0 574
Chris@7 575 // Initialize arrays for interpolation
Chris@10 576 m_glogs_posint = new int[m_glogs_harmonic_count];
Chris@7 577 m_glogs_posfrac = new double[m_glogs_harmonic_count];
Chris@7 578 m_glogs_interp = new double[m_glogs_harmonic_count];
Chris@0 579
Chris@7 580 // Compute int & frac of interpolation positions
Chris@10 581 int aux_index = 0;
Chris@7 582 double aux_pos;
Chris@10 583 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@10 584 for (int j = 1; j <= m_glogs_n[i]; j++) {
Chris@7 585 // indice en el vector de largo t_warp/2+1 donde el ultimo valor corresponde a f=m_fmax
Chris@7 586 aux_pos = ((double)j*m_glogs_f0[i])*((double)(m_warp_params.nsamps_twarp/2+1))/m_fmax;
Chris@10 587 m_glogs_posint[aux_index] = (int)aux_pos;
Chris@7 588 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index];
Chris@7 589 aux_index++;
Chris@7 590 }
Chris@7 591 }
Chris@0 592
Chris@7 593 // Third harmonic attenuation
Chris@7 594 double aux_third_harmonic;
Chris@10 595 m_glogs_third_harmonic_posint = new int[(m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct];
Chris@7 596 m_glogs_third_harmonic_posfrac = new double[(m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct];
Chris@10 597 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
Chris@7 598 aux_third_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(3.0);
Chris@10 599 m_glogs_third_harmonic_posint[i] = (int)aux_third_harmonic;
Chris@7 600 m_glogs_third_harmonic_posfrac[i] = aux_third_harmonic - (double)(m_glogs_third_harmonic_posint[i]);
Chris@7 601 }
Chris@7 602 m_glogs_third_harmonic = new double[(m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct];
Chris@0 603
Chris@7 604 // Fifth harmonic attenuation
Chris@7 605 double aux_fifth_harmonic;
Chris@10 606 m_glogs_fifth_harmonic_posint = new int[(m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct];
Chris@7 607 m_glogs_fifth_harmonic_posfrac = new double[(m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct];
Chris@10 608 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
Chris@7 609 aux_fifth_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(5.0);
Chris@10 610 m_glogs_fifth_harmonic_posint[i] = (int)aux_fifth_harmonic;
Chris@7 611 m_glogs_fifth_harmonic_posfrac[i] = aux_fifth_harmonic - (double)(m_glogs_fifth_harmonic_posint[i]);
Chris@7 612 }
Chris@7 613 m_glogs_fifth_harmonic = new double[(m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct];
Chris@0 614
Chris@7 615 // Normalization & attenuation windows
Chris@7 616 m_glogs_f0_preference_weights = new double[m_f0_params.num_octs*m_f0_params.num_f0s_per_oct];
Chris@7 617 m_glogs_median_correction = new double[m_f0_params.num_octs*m_f0_params.num_f0s_per_oct];
Chris@7 618 m_glogs_sigma_correction = new double[m_f0_params.num_octs*m_f0_params.num_f0s_per_oct];
Chris@7 619 m_glogs_hf_smoothing_window = new double[m_warp_params.nsamps_twarp/2+1];
Chris@7 620 double MIDI_value;
Chris@10 621 for (int i = 0; i < m_f0_params.num_octs*m_f0_params.num_f0s_per_oct; i++) {
Chris@7 622 MIDI_value = 69.0 + 12.0 * log2(m_glogs_f0[i + m_glogs_init_f0s]/440.0);
Chris@7 623 m_glogs_f0_preference_weights[i] = 1.0/sqrt(2.0*M_PI*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev)*exp(-(MIDI_value-m_f0_params.prefer_mean)*(MIDI_value-m_f0_params.prefer_mean)/(2.0*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev));
Chris@7 624 m_glogs_f0_preference_weights[i] = (0.01 + m_glogs_f0_preference_weights[i]) / (1.01);
Chris@0 625
Chris@7 626 m_glogs_median_correction[i] = m_glogs_params.median_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.median_poly_coefs[1]*(i+1.0) + m_glogs_params.median_poly_coefs[2];
Chris@7 627 m_glogs_sigma_correction[i] = 1.0 / (m_glogs_params.sigma_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.sigma_poly_coefs[1]*(i+1.0) + m_glogs_params.sigma_poly_coefs[2]);
Chris@7 628 }
Chris@0 629
Chris@7 630 double smooth_width = 1000.0; // hertz.
Chris@7 631 double smooth_aux = (double)(m_warp_params.nsamps_twarp/2+1)*(m_fmax-smooth_width)/m_fmax;
Chris@10 632 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) {
Chris@7 633 if (i < smooth_aux) {
Chris@7 634 m_glogs_hf_smoothing_window[i] = 1.0;
Chris@7 635 } else {
Chris@7 636 m_glogs_hf_smoothing_window[i] = ((double)i - (double)m_warp_params.nsamps_twarp/2.0)*(-1.0/((double)(m_warp_params.nsamps_twarp/2+1)-smooth_aux));
Chris@7 637 }
Chris@7 638 }
Chris@0 639 }
Chris@0 640
Chris@0 641 void
Chris@0 642 FChTransformF0gram::design_FFT() {
Chris@0 643 in = (fftw_complex*) fftw_malloc(sizeof (fftw_complex) * m_nfft);
Chris@0 644 out = (fftw_complex*) fftw_malloc(sizeof (fftw_complex) * m_nfft);
Chris@7 645 //TODO verificar que el tipo de datos de in_window es del tipo double, era float.
Chris@0 646 in_window = (double*) fftw_malloc(sizeof (double) * m_nfft);
Chris@0 647 planFFT = fftw_plan_dft_1d(m_nfft, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
Chris@0 648
Chris@7 649 //TODO hacer diseño del FFT para el filtrado pasabajos.
Chris@0 650
Chris@0 651 }
Chris@0 652
Chris@0 653 void
Chris@0 654 FChTransformF0gram::design_FChT() {
Chris@0 655
Chris@0 656 /*
Chris@0 657 * FILES FOR DEBUGGING
Chris@0 658 */
Chris@0 659
Chris@0 660 //ofstream output("output.txt");
Chris@0 661
Chris@0 662
Chris@0 663 /* ============= WARPING DESIGN ============= */
Chris@0 664
Chris@0 665 // sampling frequency after oversampling
Chris@0 666 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs;
Chris@0 667
Chris@0 668 // number of samples of the original signal frame
Chris@0 669 m_warpings.nsamps_torig = 4 * m_warp_params.fact_over_samp * m_warp_params.nsamps_twarp;
Chris@0 670 // equivalent to: m_warpings.nsamps_torig = m_warp_params.fact_over_samp * m_blockSize;
Chris@0 671
Chris@0 672 // time instants of the original signal frame
Chris@0 673 double t_orig[m_warpings.nsamps_torig];
Chris@0 674 //float * t_orig = new float [m_warpings.nsamps_torig];
Chris@10 675 for (int ind = 0; ind < m_warpings.nsamps_torig; ind++) {
Chris@0 676 t_orig[ind] = ((double)(ind + 1) - (double)m_warpings.nsamps_torig / 2.0) / m_warpings.fs_orig;
Chris@0 677 }
Chris@0 678
Chris@0 679 // linear chirps warping definition as relative frequency deviation
Chris@0 680 //double * freq_relative = new double [m_warpings.nsamps_torig * m_warp_params.num_warps];
Chris@7 681 //TODO
Chris@7 682 double *freq_relative = new double [m_warpings.nsamps_torig * m_warp_params.num_warps];
Chris@0 683 define_warps_linear_chirps(freq_relative, t_orig);
Chris@0 684
Chris@0 685 // maximum relative frequency deviation
Chris@0 686 double freq_relative_max = 0;
Chris@10 687 for (int i = 0; i < m_warpings.nsamps_torig; i++)
Chris@10 688 for (int j = 0; j < m_warp_params.num_warps; j++)
Chris@0 689 if (freq_relative_max < freq_relative[j * m_warpings.nsamps_torig + i])
Chris@0 690 freq_relative_max = freq_relative[j * m_warpings.nsamps_torig + i];
Chris@0 691
Chris@0 692 // sampling frequency of warped signal to be free of aliasing up to fmax
Chris@0 693 m_warpings.fs_warp = 2 * m_fmax * freq_relative_max;
Chris@0 694
Chris@0 695 // time instants of the warped signal frame
Chris@0 696 double t_warp[m_warp_params.nsamps_twarp];
Chris@10 697 for (int ind = 0; ind < m_warp_params.nsamps_twarp; ind++) {
Chris@0 698 t_warp[ind] = ((double)((int)(ind + 1)- (int)m_warp_params.nsamps_twarp / 2)) / (double)m_warpings.fs_warp;
Chris@0 699 }
Chris@0 700
Chris@0 701 // design of warpings for efficient interpolation
Chris@0 702 design_warps(freq_relative, t_orig, t_warp);
Chris@0 703
Chris@0 704
Chris@0 705 /*
Chris@0 706 * FILES FOR DEBUGGING
Chris@0 707 */
Chris@0 708
Chris@0 709 /*
Chris@7 710 output << "chirp_rates" << endl;
Chris@10 711 for (int j = 0; j < m_warp_params.num_warps; j++){
Chris@7 712 output << m_warpings.chirp_rates[j];
Chris@7 713 output << " ";
Chris@7 714 }
Chris@7 715 output << endl << "freq_relative" << endl;
Chris@0 716
Chris@10 717 for (int i = 0; i < m_warpings.nsamps_torig; i++){
Chris@10 718 for (int j = 0; j < m_warp_params.num_warps; j++){
Chris@7 719 output << freq_relative[j * m_warpings.nsamps_torig + i];
Chris@7 720 output << " ";
Chris@7 721 }
Chris@7 722 output << endl;
Chris@7 723 }
Chris@0 724
Chris@7 725 output << endl << "t_orig" << endl;
Chris@0 726
Chris@10 727 for (int i = 0; i < m_warpings.nsamps_torig; i++){
Chris@7 728 output << t_orig[i] << endl ;
Chris@7 729 }
Chris@7 730 */
Chris@0 731
Chris@7 732 delete [] freq_relative;
Chris@0 733 //output.close();
Chris@0 734
Chris@0 735 /* ============= FFTW PLAN DESIGN ============= */
Chris@7 736 // Initialize 2-d array for warped signals
Chris@7 737 x_warping = new double[m_warp_params.nsamps_twarp];
Chris@7 738 m_absFanChirpTransform = (double*)fftw_malloc(sizeof (double) * m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1));
Chris@7 739 m_auxFanChirpTransform = (fftw_complex*)fftw_malloc(sizeof ( fftw_complex) * (m_warp_params.nsamps_twarp/2 + 1));
Chris@7 740 plan_forward_xwarping = fftw_plan_dft_r2c_1d(m_warp_params.nsamps_twarp, x_warping, m_auxFanChirpTransform, FFTW_ESTIMATE);
Chris@0 741
Chris@0 742 }
Chris@0 743
Chris@0 744 void
Chris@0 745 FChTransformF0gram::design_warps(double * freq_relative, double * t_orig, double * t_warp) {
Chris@0 746 /* the warping is done by interpolating the original signal in time instants
Chris@0 747 given by the desired frequency deviation, to do this, the interpolation
Chris@0 748 instants are stored in a structure as an integer index and a fractional value
Chris@0 749 hypothesis: sampling frequency at the central point equals the original
Chris@7 750 */
Chris@0 751
Chris@10 752 m_warpings.pos_int = new int[m_warp_params.num_warps * m_warp_params.nsamps_twarp];
Chris@7 753 m_warpings.pos_frac = new double[m_warp_params.num_warps * m_warp_params.nsamps_twarp];
Chris@0 754
Chris@7 755 // vector of phase values
Chris@7 756 double *phi = new double[m_warpings.nsamps_torig];
Chris@7 757 double aux;
Chris@0 758
Chris@7 759 // warped positions
Chris@7 760 double *pos1 = new double[m_warp_params.nsamps_twarp*m_warp_params.num_warps];
Chris@0 761
Chris@10 762 for (int i = 0; i < m_warp_params.num_warps; i++) {
Chris@0 763
Chris@7 764 // integration of relative frequency to obtain phase values
Chris@7 765 cumtrapz(t_orig, freq_relative + i*(m_warpings.nsamps_torig), m_warpings.nsamps_torig, phi);
Chris@0 766
Chris@7 767 // centering of phase values to force original frequency in the middle
Chris@7 768 aux = phi[m_warpings.nsamps_torig/2];
Chris@10 769 for (int j = 0; j < m_warpings.nsamps_torig; j++) {
Chris@7 770 phi[j] -= aux;
Chris@7 771 } //for
Chris@0 772
Chris@7 773 // interpolation of phase values to obtain warped positions
Chris@7 774 interp1(phi, t_orig, m_warpings.nsamps_torig, t_warp, pos1 + i*m_warp_params.nsamps_twarp, m_warp_params.nsamps_twarp);
Chris@0 775 }
Chris@0 776
Chris@0 777 // % previous sample index
Chris@0 778 // pos1_int = uint32(floor(pos1))';
Chris@0 779 // % integer corresponding to previous sample index in "c"
Chris@0 780 // warps.pos1_int = (pos1_int - uint32(1));
Chris@0 781 // % fractional value that defines the warped position
Chris@0 782 // warps.pos1_frac = (double(pos1)' - double(pos1_int));
Chris@0 783
Chris@10 784 for (int j = 0; j < m_warp_params.nsamps_twarp*m_warp_params.num_warps; j++) {
Chris@7 785 // previous sample index
Chris@7 786 pos1[j] = pos1[j]*m_warpings.fs_orig + m_warpings.nsamps_torig/2 + 1;
Chris@10 787 m_warpings.pos_int[j] = (int) pos1[j];
Chris@7 788 m_warpings.pos_frac[j] = pos1[j] - (double)(m_warpings.pos_int[j]);
Chris@7 789 } //for
Chris@0 790
Chris@7 791 delete [] phi;
Chris@7 792 delete [] pos1;
Chris@0 793 }
Chris@0 794
Chris@0 795 void
Chris@0 796 FChTransformF0gram::define_warps_linear_chirps(double * freq_relative, double * t_orig) {
Chris@0 797 /** define warps as relative frequency deviation from original frequency
Chris@7 798 t_orig : time vector
Chris@7 799 freq_relative : relative frequency deviations
Chris@7 800 */
Chris@0 801 if (m_warp_params.alpha_dist == 0) {
Chris@0 802
Chris@0 803 // linear alpha values spacing
Chris@0 804 m_warpings.chirp_rates = new double [m_warp_params.num_warps];
Chris@0 805 // WARNING m_warp_params.num_warps must be odd
Chris@0 806 m_warpings.chirp_rates[0] = -m_warp_params.alpha_max;
Chris@0 807 double increment = (double) m_warp_params.alpha_max / ((m_warp_params.num_warps - 1) / 2);
Chris@0 808
Chris@10 809 for (int ind = 1; ind < m_warp_params.num_warps; ind++) {
Chris@0 810 m_warpings.chirp_rates[ind] = m_warpings.chirp_rates[ind - 1] + increment;
Chris@0 811 }
Chris@0 812 // force zero value
Chris@0 813 m_warpings.chirp_rates[(int) ((m_warp_params.num_warps - 1) / 2)] = 0;
Chris@0 814
Chris@0 815 } else {
Chris@0 816 // log alpha values spacing
Chris@0 817 m_warpings.chirp_rates = new double [m_warp_params.num_warps];
Chris@0 818
Chris@0 819 // force zero value
Chris@0 820 int middle_point = (int) ((m_warp_params.num_warps - 1) / 2);
Chris@0 821 m_warpings.chirp_rates[middle_point] = 0;
Chris@0 822
Chris@0 823 double logMax = log10(m_warp_params.alpha_max + 1);
Chris@0 824 double increment = logMax / ((m_warp_params.num_warps - 1) / 2.0f);
Chris@0 825 double exponent = 0;
Chris@0 826
Chris@0 827 // fill positive values
Chris@0 828 int ind_log = middle_point;
Chris@10 829 for (int ind = 0; ind < (m_warp_params.num_warps + 1) / 2; ind++) {
Chris@0 830 m_warpings.chirp_rates[ind_log] = pow(10, exponent) - 1;
Chris@0 831 exponent += increment;
Chris@0 832 ind_log++;
Chris@0 833 }
Chris@0 834 // fill negative values
Chris@10 835 for (int ind = 0; ind < (m_warp_params.num_warps - 1) / 2; ind++) {
Chris@0 836 m_warpings.chirp_rates[ind] = -m_warpings.chirp_rates[m_warp_params.num_warps - 1 - ind];
Chris@0 837 }
Chris@0 838 }
Chris@0 839
Chris@0 840 // compute relative frequency deviation
Chris@10 841 for (int i = 0; i < m_warpings.nsamps_torig; i++)
Chris@10 842 for (int j = 0; j < m_warp_params.num_warps; j++)
Chris@0 843 freq_relative[j * m_warpings.nsamps_torig + i] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
Chris@0 844 //freq_relative[i * m_warpings.nsamps_torig + j] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
Chris@0 845 //freq_relative[i][j] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
Chris@0 846 }
Chris@0 847
Chris@0 848 void
Chris@0 849 FChTransformF0gram::design_LPF() {
Chris@0 850
Chris@0 851 // in = (fftw_complex*) fftw_malloc(sizeof (fftw_complex) * tamanoVentana);
Chris@0 852 // out = (fftw_complex*) fftw_malloc(sizeof (fftw_complex) * tamanoVentana);
Chris@0 853 // in_window = (float*) fftw_malloc(sizeof (float) * tamanoVentana);
Chris@0 854 // p = fftw_plan_dft_1d(tamanoVentana, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
Chris@0 855 double *lp_LPFWindow_aux = new double[m_blockSize/2+1];
Chris@0 856 mp_LPFWindow = new double[m_blockSize/2+1];
Chris@0 857
Chris@10 858 int i_max = (int) ((2.0*m_fmax/m_fs) * ( (double)m_blockSize / 2.0 + 1.0 ));
Chris@10 859 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@0 860 if (i >= i_max) {
Chris@0 861 lp_LPFWindow_aux[i] = 0.0;
Chris@0 862 } else {
Chris@0 863 lp_LPFWindow_aux[i] = 1.0;
Chris@0 864 }
Chris@0 865 }
Chris@0 866 LPF_time = (double*)fftw_malloc(sizeof ( double) * m_warpings.nsamps_torig);
Chris@7 867 //memset((char*)LPF_time, 0, m_warpings.nsamps_torig * sizeof(double));
Chris@7 868 // sustituyo el memset por un for:
Chris@10 869 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
Chris@7 870 LPF_time[i] = 0.0;
Chris@7 871 }
Chris@7 872 #ifdef DEBUG
Chris@7 873 printf(" Corrio primer memset...\n");
Chris@7 874 #endif
Chris@0 875 LPF_frequency = (fftw_complex*)fftw_malloc(sizeof ( fftw_complex) * (m_warpings.nsamps_torig/2 + 1)); //tamaño de la fft cuando la entrada es real
Chris@7 876 //memset((char*)LPF_frequency, 0, sizeof(fftw_complex) * (m_warpings.nsamps_torig/2 + 1));
Chris@7 877 // sustituyo el memset por un for:
Chris@10 878 for (int i = 0; i < (m_warpings.nsamps_torig/2 + 1); i++) {
Chris@7 879 LPF_frequency[i][0] = 0.0;
Chris@7 880 LPF_frequency[i][1] = 0.0;
Chris@7 881 }
Chris@0 882 // for (int i=0; i<(m_blockSize/2+1); i++) {
Chris@0 883 // LPF_frequency[i] = new fftw_complex;
Chris@0 884 // }
Chris@0 885 plan_forward_LPF = fftw_plan_dft_r2c_1d(m_blockSize, LPF_time, LPF_frequency, FFTW_ESTIMATE);
Chris@0 886 plan_backward_LPF = fftw_plan_dft_c2r_1d(m_warpings.nsamps_torig, LPF_frequency, LPF_time, FFTW_ESTIMATE|FFTW_PRESERVE_INPUT);
Chris@0 887
Chris@10 888 int winWidth = 11;
Chris@0 889 double *lp_hanningWindow = new double[winWidth];
Chris@0 890 double accum=0;
Chris@10 891 for (int i = 0; i < winWidth; i++) {
Chris@0 892 lp_hanningWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)winWidth+1.0)));
Chris@0 893 accum+=lp_hanningWindow[i];
Chris@0 894
Chris@0 895 }
Chris@10 896 for (int i = 0; i < winWidth; i++) { //window normalization
Chris@0 897 lp_hanningWindow[i]=lp_hanningWindow[i]/accum;
Chris@0 898 }
Chris@10 899 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@0 900 //if (((i-(winWidth-1)/2)<0)||(i+(winWidth-1))/2>m_blockSize/2-1) {//consideramos winWidth impar, si la ventana sale del arreglo se rellena con el valor origianl
Chris@7 901 if ( (i > (i_max + (winWidth-1)/2)) || (i <= (i_max - (winWidth-1)/2)) ) {
Chris@0 902 mp_LPFWindow[i]=lp_LPFWindow_aux[i];
Chris@0 903 } else {
Chris@0 904 accum=0;
Chris@10 905 for (int j = -((winWidth-1)/2); j <= (winWidth-1)/2; j++) {
Chris@0 906 accum+=lp_LPFWindow_aux[i-j]*lp_hanningWindow[j+(winWidth-1)/2];
Chris@7 907 }
Chris@0 908 mp_LPFWindow[i]=accum;
Chris@0 909 }
Chris@0 910 }
Chris@0 911
Chris@0 912 delete[] lp_LPFWindow_aux;
Chris@0 913 delete[] lp_hanningWindow;
Chris@0 914 }
Chris@0 915
Chris@0 916 void FChTransformF0gram::apply_LPF() {
Chris@0 917 fftw_execute(plan_forward_LPF);
Chris@10 918 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@0 919 LPF_frequency[i][0]*=mp_LPFWindow[i];
Chris@0 920 LPF_frequency[i][1]*=mp_LPFWindow[i];
Chris@0 921 }
Chris@0 922 fftw_execute(plan_backward_LPF);
Chris@0 923
Chris@7 924 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame.
Chris@7 925 // nota: además de aplicar el LPF, esta función resamplea la señal original.
Chris@0 926 }
Chris@0 927
Chris@0 928 void FChTransformF0gram::clean_LPF() {
Chris@0 929 delete[] mp_LPFWindow;
Chris@0 930
Chris@7 931 fftw_destroy_plan(plan_forward_LPF);
Chris@7 932 fftw_destroy_plan(plan_backward_LPF);
Chris@7 933 fftw_free(LPF_time);
Chris@7 934 fftw_free(LPF_frequency);
Chris@0 935 }
Chris@0 936
Chris@0 937 void FChTransformF0gram::reset() {
Chris@0 938
Chris@0 939 // Clear buffers, reset stored values, etc
Chris@0 940
Chris@7 941 delete [] m_warpings.pos_int;
Chris@7 942 delete [] m_warpings.pos_frac;
Chris@0 943
Chris@0 944 fftw_destroy_plan(planFFT);
Chris@7 945 fftw_free(in);
Chris@7 946 fftw_free(out);
Chris@0 947
Chris@7 948 clean_LPF();
Chris@0 949
Chris@7 950 delete [] m_timeWindow;
Chris@0 951
Chris@7 952 delete [] mp_HanningWindow;
Chris@0 953
Chris@7 954 // Warping
Chris@7 955 delete [] x_warping;
Chris@7 956 fftw_destroy_plan(plan_forward_xwarping);
Chris@7 957 fftw_free(m_absFanChirpTransform);
Chris@7 958 fftw_free(m_auxFanChirpTransform);
Chris@0 959
Chris@7 960 // design_GLogS
Chris@7 961 delete [] m_glogs_f0;
Chris@7 962 delete [] m_glogs;
Chris@7 963 delete [] m_glogs_n;
Chris@7 964 delete [] m_glogs_index;
Chris@7 965 delete [] m_glogs_posint;
Chris@7 966 delete [] m_glogs_posfrac;
Chris@7 967 delete [] m_glogs_third_harmonic_posint;
Chris@7 968 delete [] m_glogs_third_harmonic_posfrac;
Chris@7 969 delete [] m_glogs_third_harmonic;
Chris@7 970 delete [] m_glogs_fifth_harmonic_posint;
Chris@7 971 delete [] m_glogs_fifth_harmonic_posfrac;
Chris@7 972 delete [] m_glogs_fifth_harmonic;
Chris@7 973 delete [] m_glogs_f0_preference_weights;
Chris@7 974 delete [] m_glogs_median_correction;
Chris@7 975 delete [] m_glogs_sigma_correction;
Chris@7 976 delete [] m_glogs_hf_smoothing_window;
Chris@0 977
Chris@0 978 }
Chris@0 979
Chris@0 980 FChTransformF0gram::FeatureSet
Chris@5 981 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) {
Chris@0 982
Chris@0 983 // // Do actual work!
Chris@0 984 //
Chris@0 985
Chris@7 986 /* PSEUDOCÓDIGO:
Chris@7 987 - Aplicar FFT al frame entero.
Chris@7 988 - Filtro pasabajos en frecuencia.
Chris@7 989 - FFT inversa al frame entero.
Chris@7 990 -----------------------------------------------------------------------------
Chris@7 991 - Para cada warp: *Si es un espectrograma direccional (un solo warp
Chris@7 992 => no es para cada warp sino para el elegido)
Chris@7 993 - Hacer la interpolación con interp1q.
Chris@7 994 - Aplicar la FFT al frame warpeado.
Chris@7 995 - (Opcional) GLogS.
Chris@7 996 - ...
Chris@7 997 */
Chris@0 998
Chris@0 999 //---------------------------------------------------------------------------
Chris@7 1000 FeatureSet fs;
Chris@0 1001
Chris@7 1002 #ifdef DEBUG
Chris@7 1003 printf("\n ----- DEBUG INFORMATION ----- \n");
Chris@7 1004 printf(" m_fs = %f Hz.\n",m_fs);
Chris@7 1005 printf(" fs_orig = %f Hz.\n",m_warpings.fs_orig);
Chris@7 1006 printf(" fs_warp = %f Hz.\n",m_warpings.fs_warp);
Chris@7 1007 printf(" m_nfft = %d.\n",m_nfft);
Chris@7 1008 printf(" m_blockSize = %d.\n",m_blockSize);
Chris@7 1009 printf(" m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig);
Chris@7 1010 printf(" m_warp_params.num_warps = %d.\n",m_warp_params.num_warps);
Chris@7 1011 printf(" m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count);
Chris@7 1012 #endif
Chris@0 1013
Chris@10 1014 // int n = m_nfft/2 + 1;
Chris@7 1015 // double *tbuf = in_window;
Chris@0 1016
Chris@10 1017 for (int i = 0; i < m_blockSize; i++) {
Chris@0 1018 LPF_time[i] = (double)(inputBuffers[0][i]) * m_timeWindow[i];
Chris@0 1019 }
Chris@0 1020
Chris@0 1021 // #ifdef DEBUG
Chris@0 1022 // printf(" HASTA ACÁ ANDA!!!\n");
Chris@0 1023 // cout << flush;
Chris@0 1024 // #endif
Chris@0 1025
Chris@7 1026 apply_LPF();
Chris@7 1027 // Señal filtrada queda en LPF_time
Chris@0 1028
Chris@7 1029 Feature feature;
Chris@0 1030 feature.hasTimestamp = false;
Chris@0 1031
Chris@0 1032
Chris@7 1033 /* Solo a modo de prueba, voy a poner la salida del filtrado en «in» y
Chris@7 1034 voy a mostrar la FFT de eso, para ver el efecto del filtrado. */
Chris@10 1035 // for (int i = 0; i < m_nfft; i++) {
Chris@0 1036 // in[i][0] = tbuf[i];
Chris@0 1037 // in[i][1] = 0;
Chris@0 1038 // }
Chris@0 1039 // fftw_execute(planFFT);
Chris@0 1040 // double real, imag;
Chris@10 1041 // for (int i=0; i<n; ++i) { // preincremento?? ver version de nacho
Chris@0 1042 // real = out[i][0];
Chris@0 1043 // imag = out[i][1];
Chris@0 1044 // feature.values.push_back(real*real + imag*imag);
Chris@0 1045 // }
Chris@0 1046 // fs[0].push_back(feature);
Chris@0 1047
Chris@0 1048 // float real;
Chris@0 1049 // float imag;
Chris@10 1050 // for (int i=0; i<m_blockSize/2+1; i++) {
Chris@0 1051 // real = (float)(LPF_frequency[i][0]);
Chris@0 1052 // imag = (float)(LPF_frequency[i][1]);
Chris@0 1053 // feature.values.push_back(real*real+imag*imag);
Chris@0 1054 // //feature.values.push_back((float)(mp_LPFWindow[i]));
Chris@0 1055 // }
Chris@0 1056
Chris@0 1057 // ----------------------------------------------------------------------------------------------
Chris@0 1058 // Hanning window & FFT for all warp directions
Chris@0 1059
Chris@7 1060 double max_glogs = -DBL_MAX;
Chris@10 1061 int ind_max_glogs = 0;
Chris@0 1062
Chris@10 1063 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
Chris@7 1064 // Interpolate
Chris@7 1065 interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp);
Chris@0 1066
Chris@7 1067 // Apply window
Chris@10 1068 for (int i = 0; i < m_warp_params.nsamps_twarp; i++) {
Chris@7 1069 x_warping[i] *= mp_HanningWindow[i];
Chris@7 1070 }
Chris@0 1071
Chris@7 1072 // Transform
Chris@7 1073 fftw_execute(plan_forward_xwarping);
Chris@0 1074
Chris@7 1075 // Copy result
Chris@7 1076 //memcpy(m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1), m_auxFanChirpTransform, (m_warp_params.nsamps_twarp/2+1)*sizeof(fftw_complex)); asi como esta no funciona
Chris@7 1077 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1);
Chris@10 1078 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
Chris@7 1079 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i][0]*m_auxFanChirpTransform[i][0]+m_auxFanChirpTransform[i][1]*m_auxFanChirpTransform[i][1]));
Chris@7 1080 // smoothing high frequency values
Chris@7 1081 //aux_abs_fcht[i] *= m_glogs_hf_smoothing_window[i];
Chris@7 1082 }
Chris@0 1083
Chris@0 1084 // -----------------------------------------------------------------------------------------
Chris@0 1085 // GLogS
Chris@7 1086 interp1q(aux_abs_fcht, m_glogs_posint, m_glogs_posfrac, m_glogs_interp, m_glogs_harmonic_count);
Chris@10 1087 int glogs_ind = 0;
Chris@10 1088 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@7 1089 double glogs_accum = 0;
Chris@10 1090 for (int j = 1; j <= m_glogs_n[i]; j++) {
Chris@7 1091 glogs_accum += m_glogs_interp[glogs_ind++];
Chris@7 1092 }
Chris@7 1093 m_glogs[i + i_warp*m_glogs_num_f0s] = glogs_accum/(double)m_glogs_n[i];
Chris@7 1094 }
Chris@0 1095
Chris@0 1096 // Sub/super harmonic correction
Chris@7 1097 interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_third_harmonic_posint, m_glogs_third_harmonic_posfrac, m_glogs_third_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@7 1098 interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_fifth_harmonic_posint, m_glogs_fifth_harmonic_posfrac, m_glogs_fifth_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 1099 for (int i = m_glogs_num_f0s-1; i >= m_glogs_init_f0s; i--) {
Chris@7 1100 m_glogs[i + i_warp*m_glogs_num_f0s] -= MAX(MAX(m_glogs[i-m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s],m_glogs_third_harmonic[i-m_glogs_init_f0s]),m_glogs_fifth_harmonic[i-m_glogs_init_f0s]);
Chris@7 1101 //m_glogs[i] -= MAX(m_glogs[i-m_f0_params.num_f0s_per_oct],m_glogs_third_harmonic[i-m_glogs_init_f0s]);
Chris@7 1102 }
Chris@10 1103 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
Chris@7 1104 m_glogs[i + i_warp*m_glogs_num_f0s] -= 0.3*m_glogs[i+m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s];
Chris@7 1105 // Median, sigma $ weights correction
Chris@7 1106 m_glogs[i + i_warp*m_glogs_num_f0s] = (m_glogs[i + i_warp*m_glogs_num_f0s]-m_glogs_median_correction[i-m_glogs_init_f0s])*m_glogs_sigma_correction[i-m_glogs_init_f0s]*m_glogs_f0_preference_weights[i-m_glogs_init_f0s];
Chris@7 1107 }
Chris@0 1108
Chris@7 1109 // Look for maximum value to determine best direction
Chris@10 1110 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
Chris@7 1111 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
Chris@7 1112 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
Chris@7 1113 ind_max_glogs = i_warp;
Chris@7 1114 }
Chris@7 1115 }
Chris@7 1116 }
Chris@0 1117
Chris@0 1118 // ----------------------------------------------------------------------------------------------
Chris@0 1119
Chris@10 1120 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) {
Chris@10 1121 //for (int i=0; i<(m_warp_params.nsamps_twarp/2+1); i++) {
Chris@7 1122 //feature.values.push_back((float)(m_warpings.pos_int[i])+ (float)(m_warpings.pos_frac[i]));
Chris@7 1123 //feature.values.push_back((float)(phi[i]*100000.0));
Chris@7 1124 //feature.values.push_back((float)(t_orig[i]));
Chris@7 1125 //feature.values.push_back((float)(pos1[i]));
Chris@7 1126 //feature.values.push_back((float)x_warping[i]);
Chris@7 1127 //feature.values.push_back(m_absFanChirpTransform[i + ind_max_glogs*(m_warp_params.nsamps_twarp/2+1)]);
Chris@7 1128 //feature.values.push_back((float)m_glogs[i+(long)ind_max_glogs*(long)m_glogs_num_f0s]);
Chris@7 1129 switch (m_f0gram_mode) {
Chris@7 1130 case 1:
Chris@7 1131 max_glogs = -DBL_MAX;
Chris@10 1132 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
Chris@7 1133 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
Chris@7 1134 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
Chris@7 1135 ind_max_glogs = i_warp;
Chris@7 1136 }
Chris@7 1137 }
Chris@7 1138 feature.values.push_back((float)max_glogs);
Chris@7 1139 break;
Chris@7 1140 case 0:
Chris@10 1141 feature.values.push_back((float)m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s]);
Chris@7 1142 break;
Chris@7 1143 }
Chris@7 1144 //feature.values.push_back((float)m_glogs_hf_smoothing_window[i]);
Chris@7 1145 }
Chris@0 1146
Chris@0 1147 // ----------------------------------------------------------------------------------------------
Chris@0 1148
Chris@7 1149 fs[0].push_back(feature);
Chris@0 1150
Chris@7 1151 #ifdef DEBUG
Chris@7 1152 printf(" ----------------------------- \n");
Chris@7 1153 #endif
Chris@0 1154
Chris@7 1155 return fs;
Chris@0 1156 //---------------------------------------------------------------------------
Chris@0 1157
Chris@0 1158 //return FeatureSet();
Chris@0 1159 }
Chris@0 1160
Chris@0 1161 FChTransformF0gram::FeatureSet
Chris@0 1162 FChTransformF0gram::getRemainingFeatures() {
Chris@0 1163 return FeatureSet();
Chris@0 1164 }
Chris@0 1165
Chris@0 1166 void
Chris@0 1167 FChTransformF0gram::design_time_window() {
Chris@0 1168
Chris@10 1169 int transitionWidth = (int)m_blockSize/128 + 1;;
Chris@0 1170 m_timeWindow = new double[m_blockSize];
Chris@7 1171 double *lp_transitionWindow = new double[transitionWidth];
Chris@0 1172
Chris@7 1173 //memset(m_timeWindow, 1.0, m_blockSize);
Chris@10 1174 for (int i = 0; i < m_blockSize; i++) {
Chris@7 1175 m_timeWindow[i] = 1.0;
Chris@7 1176 }
Chris@0 1177
Chris@10 1178 for (int i = 0; i < transitionWidth; i++) {
Chris@0 1179 lp_transitionWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)transitionWidth+1.0)));
Chris@0 1180 }
Chris@0 1181
Chris@10 1182 for (int i = 0; i < transitionWidth/2; i++) {
Chris@7 1183 m_timeWindow[i] = lp_transitionWindow[i];
Chris@7 1184 m_timeWindow[m_blockSize-1-i] = lp_transitionWindow[transitionWidth-1-i];
Chris@7 1185 }
Chris@0 1186
Chris@7 1187 #ifdef DEBUG
Chris@7 1188 for (int i = 0; i < m_blockSize; i++) {
Chris@7 1189 if ((i<transitionWidth)) {
Chris@7 1190 printf(" m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]);
Chris@7 1191 }
Chris@7 1192 }
Chris@7 1193 #endif
Chris@0 1194
Chris@7 1195 delete [] lp_transitionWindow;
Chris@0 1196 }
Chris@0 1197