annotate FChTransformF0gram.cpp @ 15:0a860992b4f4 spect

Break out the plugin into three different plugins (using the same class in different modes) in order to provide simplistic and more sophisticated spectrograms as well as the f0-gram. Remove the program support, since it doesn't work usefully anyway (it just overrides the user's preferred settings).
author Chris Cannam
date Wed, 03 Oct 2018 13:16:09 +0100
parents 44b86c346a5a
children ce62ed201de8
rev   line source
Chris@0 1 /*
Chris@0 2 copyright (C) 2011 I. Irigaray, M. Rocamora
Chris@0 3
Chris@0 4 This program is free software: you can redistribute it and/or modify
Chris@0 5 it under the terms of the GNU General Public License as published by
Chris@0 6 the Free Software Foundation, either version 3 of the License, or
Chris@0 7 (at your option) any later version.
Chris@0 8
Chris@0 9 This program is distributed in the hope that it will be useful,
Chris@0 10 but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@0 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@0 12 GNU General Public License for more details.
Chris@0 13
Chris@0 14 You should have received a copy of the GNU General Public License
Chris@0 15 along with this program. If not, see <http://www.gnu.org/licenses/>.
Chris@7 16 */
Chris@0 17
Chris@0 18 #include "FChTransformF0gram.h"
Chris@0 19 #include "FChTransformUtils.h"
Chris@0 20 #include <math.h>
Chris@0 21 #include <float.h>
Chris@14 22
Chris@14 23 #include "bqvec/Allocators.h"
Chris@14 24
Chris@14 25 using namespace breakfastquay;
Chris@14 26
Chris@0 27 //#define DEBUG
Chris@7 28
Chris@0 29 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
Chris@0 30
Chris@15 31 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode,
Chris@15 32 float inputSampleRate) :
Chris@7 33 Plugin(inputSampleRate),
Chris@15 34 m_processingMode(mode),
Chris@7 35 m_stepSize(0), // We are using 0 for step and block size to indicate "not yet set".
Chris@7 36 m_blockSize(0) {
Chris@0 37
Chris@0 38 m_fs = inputSampleRate;
Chris@0 39 // max frequency of interest (Hz)
Chris@0 40 m_fmax = 10000.f;
Chris@0 41 // warping parameters
Chris@12 42 m_warp_params.nsamps_twarp = 2048;
Chris@0 43 //m_warp_params.nsamps_twarp = 8;
Chris@0 44 m_warp_params.alpha_max = 4;
Chris@0 45 m_warp_params.num_warps = 21;
Chris@0 46 //m_warp_params.num_warps = 11;
Chris@0 47 m_warp_params.fact_over_samp = 2;
Chris@0 48 m_warp_params.alpha_dist = 0;
Chris@0 49 // f0 parameters
Chris@0 50 m_f0_params.f0min = 80.0;
Chris@0 51 m_f0_params.num_octs = 4;
Chris@0 52 m_f0_params.num_f0s_per_oct = 192;
Chris@0 53 m_f0_params.num_f0_hyps = 5;
Chris@0 54 m_f0_params.prefer = true;
Chris@0 55 m_f0_params.prefer_mean = 60;
Chris@0 56 m_f0_params.prefer_stdev = 18;
Chris@0 57 // glogs parameters
Chris@0 58 m_glogs_params.HP_logS = true;
Chris@0 59 m_glogs_params.att_subharms = 1;
Chris@7 60 // display parameters
Chris@15 61 m_f0gram_mode = BestBinOfAllDirections;
Chris@0 62
Chris@0 63 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
Chris@0 64 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
Chris@0 65 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
Chris@0 66
Chris@0 67 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
Chris@0 68 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
Chris@0 69 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
Chris@0 70
Chris@0 71 // number of fft points (controls zero-padding)
Chris@0 72 m_nfft = m_warp_params.nsamps_twarp;
Chris@0 73 // hop in samples
Chris@0 74 m_hop = m_warp_params.fact_over_samp * 256;
Chris@0 75
Chris@0 76 m_num_f0s = 0;
Chris@0 77
Chris@0 78 }
Chris@0 79
Chris@14 80 FChTransformF0gram::~FChTransformF0gram()
Chris@14 81 {
Chris@14 82 if (!m_blockSize) {
Chris@14 83 return; // nothing was allocated
Chris@14 84 }
Chris@14 85
Chris@14 86 deallocate(m_warpings.pos_int);
Chris@14 87 deallocate(m_warpings.pos_frac);
Chris@14 88 deallocate(m_warpings.chirp_rates);
Chris@14 89
Chris@14 90 clean_LPF();
Chris@14 91
Chris@14 92 deallocate(m_timeWindow);
Chris@14 93
Chris@14 94 deallocate(mp_HanningWindow);
Chris@14 95
Chris@14 96 // Warping
Chris@14 97 deallocate(x_warping);
Chris@14 98 delete fft_xwarping;
Chris@14 99 deallocate(m_absFanChirpTransform);
Chris@14 100 deallocate(m_auxFanChirpTransform);
Chris@14 101
Chris@14 102 // design_GLogS
Chris@14 103 deallocate(m_glogs_f0);
Chris@14 104 deallocate(m_glogs);
Chris@14 105 deallocate(m_glogs_n);
Chris@14 106 deallocate(m_glogs_index);
Chris@14 107 deallocate(m_glogs_posint);
Chris@14 108 deallocate(m_glogs_posfrac);
Chris@14 109 deallocate(m_glogs_interp);
Chris@14 110 deallocate(m_glogs_third_harmonic_posint);
Chris@14 111 deallocate(m_glogs_third_harmonic_posfrac);
Chris@14 112 deallocate(m_glogs_third_harmonic);
Chris@14 113 deallocate(m_glogs_fifth_harmonic_posint);
Chris@14 114 deallocate(m_glogs_fifth_harmonic_posfrac);
Chris@14 115 deallocate(m_glogs_fifth_harmonic);
Chris@14 116 deallocate(m_glogs_f0_preference_weights);
Chris@14 117 deallocate(m_glogs_median_correction);
Chris@14 118 deallocate(m_glogs_sigma_correction);
Chris@0 119 }
Chris@0 120
Chris@0 121 string
Chris@0 122 FChTransformF0gram::getIdentifier() const {
Chris@15 123 switch (m_processingMode) {
Chris@15 124 case ModeF0Gram: return "fchtransformf0gram";
Chris@15 125 case ModeSpectrogram: return "fchtransformspectrogram";
Chris@15 126 case ModeRoughSpectrogram: return "fchtransformrough";
Chris@15 127 }
Chris@0 128 }
Chris@0 129
Chris@0 130 string
Chris@0 131 FChTransformF0gram::getName() const {
Chris@15 132 switch (m_processingMode) {
Chris@15 133 case ModeF0Gram: return "Fan Chirp Transform F0gram";
Chris@15 134 case ModeSpectrogram: return "Fan Chirp Transform Spectrogram";
Chris@15 135 case ModeRoughSpectrogram: return "Fan Chirp Transform Rough Spectrogram";
Chris@15 136 }
Chris@0 137 }
Chris@0 138
Chris@0 139 string
Chris@0 140 FChTransformF0gram::getDescription() const {
Chris@15 141 switch (m_processingMode) {
Chris@15 142 case ModeF0Gram:
Chris@15 143 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010.";
Chris@15 144 case ModeSpectrogram:
Chris@15 145 return "This plug-in produces a spectral representation of the audio using Fan Chirp Transform analysis.";
Chris@15 146 case ModeRoughSpectrogram:
Chris@15 147 return "This plug-in produces a more approximate spectral representation of the audio using Fan Chirp Transform analysis.";
Chris@15 148 }
Chris@0 149 }
Chris@0 150
Chris@0 151 string
Chris@0 152 FChTransformF0gram::getMaker() const {
Chris@0 153 // Your name here
Chris@0 154 return "Audio Processing Group \n Universidad de la Republica";
Chris@0 155 }
Chris@0 156
Chris@0 157 int
Chris@0 158 FChTransformF0gram::getPluginVersion() const {
Chris@0 159 // Increment this each time you release a version that behaves
Chris@0 160 // differently from the previous one
Chris@0 161 //
Chris@0 162 // 0 - initial version from scratch
Chris@15 163 return 1;
Chris@0 164 }
Chris@0 165
Chris@0 166 string
Chris@0 167 FChTransformF0gram::getCopyright() const {
Chris@0 168 // This function is not ideally named. It does not necessarily
Chris@0 169 // need to say who made the plugin -- getMaker does that -- but it
Chris@0 170 // should indicate the terms under which it is distributed. For
Chris@0 171 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@0 172 return "copyright (C) 2011 GPL - Audio Processing Group, UdelaR";
Chris@0 173 }
Chris@0 174
Chris@0 175 FChTransformF0gram::InputDomain
Chris@0 176 FChTransformF0gram::getInputDomain() const {
Chris@0 177 return TimeDomain;
Chris@0 178 }
Chris@0 179
Chris@0 180 size_t FChTransformF0gram::getPreferredBlockSize() const {
Chris@0 181 return 8192; // 0 means "I can handle any block size"
Chris@0 182 }
Chris@0 183
Chris@0 184 size_t
Chris@0 185 FChTransformF0gram::getPreferredStepSize() const {
Chris@0 186 return 256; // 0 means "anything sensible"; in practice this
Chris@0 187 // means the same as the block size for TimeDomain
Chris@0 188 // plugins, or half of it for FrequencyDomain plugins
Chris@0 189 }
Chris@0 190
Chris@0 191 size_t
Chris@0 192 FChTransformF0gram::getMinChannelCount() const {
Chris@0 193 return 1;
Chris@0 194 }
Chris@0 195
Chris@0 196 size_t
Chris@0 197 FChTransformF0gram::getMaxChannelCount() const {
Chris@0 198 return 1;
Chris@0 199 }
Chris@0 200
Chris@0 201 FChTransformF0gram::ParameterList
Chris@0 202 FChTransformF0gram::getParameterDescriptors() const {
Chris@0 203 ParameterList list;
Chris@0 204
Chris@0 205 // If the plugin has no adjustable parameters, return an empty
Chris@0 206 // list here (and there's no need to provide implementations of
Chris@0 207 // getParameter and setParameter in that case either).
Chris@0 208
Chris@0 209 // Note that it is your responsibility to make sure the parameters
Chris@0 210 // start off having their default values (e.g. in the constructor
Chris@0 211 // above). The host needs to know the default value so it can do
Chris@0 212 // things like provide a "reset to default" function, but it will
Chris@0 213 // not explicitly set your parameters to their defaults for you if
Chris@0 214 // they have not changed in the mean time.
Chris@0 215
Chris@0 216 // ============= WARPING PARAMETERS =============
Chris@0 217
Chris@0 218 ParameterDescriptor fmax;
Chris@0 219 fmax.identifier = "fmax";
Chris@0 220 fmax.name = "Maximum frequency";
Chris@0 221 fmax.description = "Maximum frequency of interest for the analysis.";
Chris@0 222 fmax.unit = "Hz";
Chris@0 223 fmax.minValue = 2000;
Chris@0 224 fmax.maxValue = 22050;
Chris@0 225 fmax.defaultValue = 10000;
Chris@0 226 fmax.isQuantized = true;
Chris@0 227 fmax.quantizeStep = 1.0;
Chris@0 228 list.push_back(fmax);
Chris@0 229
Chris@0 230 ParameterDescriptor nsamp;
Chris@0 231 nsamp.identifier = "nsamp";
Chris@0 232 nsamp.name = "Number of samples";
Chris@0 233 nsamp.description = "Number of samples of the time warped frame";
Chris@0 234 nsamp.unit = "samples";
Chris@0 235 nsamp.minValue = 128;
Chris@0 236 nsamp.maxValue = 4096;
Chris@0 237 nsamp.defaultValue = 2048;
Chris@0 238 nsamp.isQuantized = true;
Chris@0 239 nsamp.quantizeStep = 1.0;
Chris@0 240 list.push_back(nsamp);
Chris@0 241
Chris@0 242 ParameterDescriptor nfft;
Chris@0 243 nfft.identifier = "nfft";
Chris@0 244 nfft.name = "FFT number of points";
Chris@0 245 nfft.description = "Number of FFT points (controls zero-padding)";
Chris@0 246 nfft.unit = "samples";
Chris@0 247 nfft.minValue = 0;
Chris@0 248 nfft.maxValue = 4;
Chris@0 249 nfft.defaultValue = 3;
Chris@0 250 nfft.isQuantized = true;
Chris@0 251 nfft.quantizeStep = 1.0;
Chris@0 252 nfft.valueNames.push_back("256");
Chris@0 253 nfft.valueNames.push_back("512");
Chris@0 254 nfft.valueNames.push_back("1024");
Chris@0 255 nfft.valueNames.push_back("2048");
Chris@0 256 nfft.valueNames.push_back("4096");
Chris@0 257 nfft.valueNames.push_back("8192");
Chris@0 258 list.push_back(nfft);
Chris@0 259
Chris@0 260 ParameterDescriptor alpha_max;
Chris@0 261 alpha_max.identifier = "alpha_max";
Chris@0 262 alpha_max.name = "Maximum alpha value";
Chris@0 263 alpha_max.description = "Maximum value for the alpha parameter of the transform.";
Chris@0 264 alpha_max.unit = "Hz/s";
Chris@0 265 alpha_max.minValue = -10;
Chris@0 266 alpha_max.maxValue = 10;
Chris@0 267 alpha_max.defaultValue = 5;
Chris@0 268 alpha_max.isQuantized = true;
Chris@0 269 alpha_max.quantizeStep = 1.0;
Chris@0 270 list.push_back(alpha_max);
Chris@0 271
Chris@0 272 ParameterDescriptor num_warps;
Chris@0 273 num_warps.identifier = "num_warps";
Chris@0 274 num_warps.name = "Number of warpings";
Chris@0 275 num_warps.description = "Number of different warpings in the specified range (must be odd).";
Chris@0 276 num_warps.unit = "";
Chris@0 277 num_warps.minValue = 1;
Chris@0 278 num_warps.maxValue = 101;
Chris@0 279 num_warps.defaultValue = 21;
Chris@0 280 num_warps.isQuantized = true;
Chris@0 281 num_warps.quantizeStep = 2.0;
Chris@0 282 list.push_back(num_warps);
Chris@0 283
Chris@0 284 ParameterDescriptor alpha_dist;
Chris@0 285 alpha_dist.identifier = "alpha_dist";
Chris@0 286 alpha_dist.name = "alpha distribution";
Chris@0 287 alpha_dist.description = "Type of distribution of alpha values (linear or log).";
Chris@0 288 alpha_dist.unit = "";
Chris@0 289 alpha_dist.minValue = 0;
Chris@0 290 alpha_dist.maxValue = 1;
Chris@0 291 alpha_dist.defaultValue = 1;
Chris@0 292 alpha_dist.isQuantized = true;
Chris@0 293 alpha_dist.quantizeStep = 1.0;
Chris@0 294 // lin (0), log (1)
Chris@0 295 alpha_dist.valueNames.push_back("lin");
Chris@0 296 alpha_dist.valueNames.push_back("log");
Chris@0 297 list.push_back(alpha_dist);
Chris@0 298
Chris@0 299 // ============= F0-GRAM PARAMETERS =============
Chris@0 300
Chris@0 301 ParameterDescriptor f0min;
Chris@0 302 f0min.identifier = "f0min";
Chris@0 303 f0min.name = "min f0";
Chris@0 304 f0min.description = "Minimum fundamental frequency (f0) value.";
Chris@0 305 f0min.unit = "Hz";
Chris@0 306 f0min.minValue = 1;
Chris@0 307 f0min.maxValue = 500;
Chris@0 308 f0min.defaultValue = 80;
Chris@0 309 f0min.isQuantized = true;
Chris@0 310 f0min.quantizeStep = 1.0;
Chris@0 311 list.push_back(f0min);
Chris@0 312
Chris@0 313 ParameterDescriptor num_octs;
Chris@0 314 num_octs.identifier = "num_octs";
Chris@0 315 num_octs.name = "number of octaves";
Chris@0 316 num_octs.description = "Number of octaves for F0gram computation.";
Chris@0 317 num_octs.unit = "";
Chris@0 318 num_octs.minValue = 1;
Chris@0 319 num_octs.maxValue = 10;
Chris@0 320 num_octs.defaultValue = 4;
Chris@0 321 num_octs.isQuantized = true;
Chris@0 322 num_octs.quantizeStep = 1.0;
Chris@0 323 list.push_back(num_octs);
Chris@0 324
Chris@0 325 ParameterDescriptor num_f0_hyps;
Chris@0 326 num_f0_hyps.identifier = "num_f0_hyps";
Chris@0 327 num_f0_hyps.name = "number of f0 hypotesis";
Chris@0 328 num_f0_hyps.description = "Number of f0 hypotesis to extract.";
Chris@0 329 num_f0_hyps.unit = "";
Chris@0 330 num_f0_hyps.minValue = 1;
Chris@0 331 num_f0_hyps.maxValue = 100;
Chris@0 332 num_f0_hyps.defaultValue = 10;
Chris@0 333 num_f0_hyps.isQuantized = true;
Chris@0 334 num_f0_hyps.quantizeStep = 1.0;
Chris@0 335 list.push_back(num_f0_hyps);
Chris@0 336
Chris@0 337 ParameterDescriptor f0s_per_oct;
Chris@0 338 f0s_per_oct.identifier = "f0s_per_oct";
Chris@0 339 f0s_per_oct.name = "f0 values per octave";
Chris@0 340 f0s_per_oct.description = "Number of f0 values per octave.";
Chris@0 341 f0s_per_oct.unit = "";
Chris@0 342 f0s_per_oct.minValue = 12;
Chris@0 343 f0s_per_oct.maxValue = 768;
Chris@0 344 f0s_per_oct.defaultValue = 192;
Chris@0 345 f0s_per_oct.isQuantized = true;
Chris@0 346 f0s_per_oct.quantizeStep = 1.0;
Chris@0 347 list.push_back(f0s_per_oct);
Chris@0 348
Chris@0 349 ParameterDescriptor f0_prefer_fun;
Chris@0 350 f0_prefer_fun.identifier = "f0_prefer_fun";
Chris@0 351 f0_prefer_fun.name = "f0 preference function";
Chris@0 352 f0_prefer_fun.description = "Whether to use a f0 weighting function.";
Chris@0 353 f0_prefer_fun.unit = "";
Chris@0 354 f0_prefer_fun.minValue = 0;
Chris@0 355 f0_prefer_fun.maxValue = 1;
Chris@0 356 f0_prefer_fun.defaultValue = 1;
Chris@0 357 f0_prefer_fun.isQuantized = true;
Chris@0 358 f0_prefer_fun.quantizeStep = 1.0;
Chris@0 359 list.push_back(f0_prefer_fun);
Chris@0 360
Chris@0 361 ParameterDescriptor f0_prefer_mean;
Chris@0 362 f0_prefer_mean.identifier = "f0_prefer_mean";
Chris@0 363 f0_prefer_mean.name = "mean f0 preference function";
Chris@0 364 f0_prefer_mean.description = "Mean value for f0 weighting function (MIDI number).";
Chris@0 365 f0_prefer_mean.unit = "";
Chris@0 366 f0_prefer_mean.minValue = 1;
Chris@0 367 f0_prefer_mean.maxValue = 127;
Chris@0 368 f0_prefer_mean.defaultValue = 60;
Chris@0 369 f0_prefer_mean.isQuantized = true;
Chris@0 370 f0_prefer_mean.quantizeStep = 1.0;
Chris@0 371 list.push_back(f0_prefer_mean);
Chris@0 372
Chris@0 373 ParameterDescriptor f0_prefer_stdev;
Chris@0 374 f0_prefer_stdev.identifier = "f0_prefer_stdev";
Chris@0 375 f0_prefer_stdev.name = "stdev of f0 preference function";
Chris@0 376 f0_prefer_stdev.description = "Stdev for f0 weighting function (MIDI number).";
Chris@0 377 f0_prefer_stdev.unit = "";
Chris@0 378 f0_prefer_stdev.minValue = 1;
Chris@0 379 f0_prefer_stdev.maxValue = 127;
Chris@0 380 f0_prefer_stdev.defaultValue = 18;
Chris@0 381 f0_prefer_stdev.isQuantized = true;
Chris@0 382 f0_prefer_stdev.quantizeStep = 1.0;
Chris@0 383 list.push_back(f0_prefer_stdev);
Chris@0 384
Chris@0 385 ParameterDescriptor f0gram_mode;
Chris@0 386 f0gram_mode.identifier = "f0gram_mode";
Chris@0 387 f0gram_mode.name = "display mode of f0gram";
Chris@0 388 f0gram_mode.description = "Display all bins of the best direction, or the best bin for each direction.";
Chris@0 389 f0gram_mode.unit = "";
Chris@0 390 f0gram_mode.minValue = 0;
Chris@0 391 f0gram_mode.maxValue = 1;
Chris@0 392 f0gram_mode.defaultValue = 1;
Chris@0 393 f0gram_mode.isQuantized = true;
Chris@0 394 f0gram_mode.quantizeStep = 1.0;
Chris@0 395 list.push_back(f0gram_mode);
Chris@0 396
Chris@0 397 return list;
Chris@0 398 }
Chris@0 399
Chris@0 400 float
Chris@0 401 FChTransformF0gram::getParameter(string identifier) const {
Chris@0 402
Chris@0 403 if (identifier == "fmax") {
Chris@0 404 return m_fmax;
Chris@0 405 } else if (identifier == "nsamp") {
Chris@0 406 return m_warp_params.nsamps_twarp;
Chris@0 407 } else if (identifier == "alpha_max") {
Chris@0 408 return m_warp_params.alpha_max;
Chris@0 409 } else if (identifier == "num_warps") {
Chris@0 410 return m_warp_params.num_warps;
Chris@0 411 } else if (identifier == "alpha_dist") {
Chris@0 412 return m_warp_params.alpha_dist;
Chris@0 413 } else if (identifier == "nfft") {
Chris@0 414 return m_nfft;
Chris@0 415 } else if (identifier == "f0min") {
Chris@0 416 return m_f0_params.f0min;
Chris@0 417 } else if (identifier == "num_octs") {
Chris@0 418 return m_f0_params.num_octs;
Chris@0 419 } else if (identifier == "f0s_per_oct") {
Chris@0 420 return m_f0_params.num_f0s_per_oct;
Chris@0 421 } else if (identifier == "num_f0_hyps") {
Chris@0 422 return m_f0_params.num_f0_hyps;
Chris@0 423 } else if (identifier == "f0_prefer_fun") {
Chris@0 424 return m_f0_params.prefer;
Chris@0 425 } else if (identifier == "f0_prefer_mean") {
Chris@0 426 return m_f0_params.prefer_mean;
Chris@0 427 } else if (identifier == "f0_prefer_stdev") {
Chris@0 428 return m_f0_params.prefer_stdev;
Chris@7 429 } else if (identifier == "f0gram_mode") {
Chris@15 430 return m_f0gram_mode == BestBinOfAllDirections ? 1.0 : 0.0;
Chris@0 431 } else {
Chris@0 432 return 0.f;
Chris@0 433 }
Chris@0 434
Chris@0 435 }
Chris@0 436
Chris@15 437 void FChTransformF0gram::setParameter(string identifier, float value)
Chris@15 438 {
Chris@0 439 if (identifier == "fmax") {
Chris@0 440 m_fmax = value;
Chris@0 441 } else if (identifier == "nsamp") {
Chris@0 442 m_warp_params.nsamps_twarp = value;
Chris@0 443 } else if (identifier == "alpha_max") {
Chris@0 444 m_warp_params.alpha_max = value;
Chris@0 445 } else if (identifier == "num_warps") {
Chris@0 446 m_warp_params.num_warps = value;
Chris@0 447 } else if (identifier == "alpha_dist") {
Chris@0 448 m_warp_params.alpha_dist = value;
Chris@0 449 } else if (identifier == "nfft") {
Chris@0 450 m_nfft = value;
Chris@0 451 } else if (identifier == "f0min") {
Chris@0 452 m_f0_params.f0min = value;
Chris@0 453 } else if (identifier == "num_octs") {
Chris@0 454 m_f0_params.num_octs = value;
Chris@0 455 } else if (identifier == "f0s_per_oct") {
Chris@0 456 m_f0_params.num_f0s_per_oct = value;
Chris@0 457 } else if (identifier == "num_f0_hyps") {
Chris@0 458 m_f0_params.num_f0_hyps = value;
Chris@0 459 } else if (identifier == "f0_prefer_fun") {
Chris@0 460 m_f0_params.prefer = value;
Chris@0 461 } else if (identifier == "f0_prefer_mean") {
Chris@0 462 m_f0_params.prefer_mean = value;
Chris@0 463 } else if (identifier == "f0_prefer_stdev") {
Chris@0 464 m_f0_params.prefer_stdev = value;
Chris@0 465 } else if (identifier == "f0gram_mode") {
Chris@15 466 m_f0gram_mode = (value > 0.5 ?
Chris@15 467 BestBinOfAllDirections :
Chris@15 468 AllBinsOfBestDirection);
Chris@15 469 } else {
Chris@15 470 cerr << "WARNING: Unknown parameter id \""
Chris@15 471 << identifier << "\"" << endl;
Chris@0 472 }
Chris@0 473 }
Chris@0 474
Chris@0 475 FChTransformF0gram::ProgramList
Chris@0 476 FChTransformF0gram::getPrograms() const {
Chris@0 477 ProgramList list;
Chris@0 478 return list;
Chris@0 479 }
Chris@0 480
Chris@0 481 FChTransformF0gram::OutputList
Chris@0 482 FChTransformF0gram::getOutputDescriptors() const {
Chris@0 483
Chris@0 484 OutputList list;
Chris@0 485
Chris@0 486 // See OutputDescriptor documentation for the possibilities here.
Chris@0 487 // Every plugin must have at least one output.
Chris@0 488
Chris@0 489 /* f0 values of F0gram grid as string values */
Chris@0 490 vector<string> f0values;
Chris@10 491 int ind = 0;
Chris@14 492 char f0String[100];
Chris@0 493 while (ind < m_num_f0s) {
Chris@0 494 sprintf(f0String, "%4.2f", m_f0s[ind]);
Chris@0 495 f0values.push_back(f0String);
Chris@0 496 ind++;
Chris@0 497 }
Chris@0 498
Chris@0 499 /* The F0gram */
Chris@0 500 OutputDescriptor d;
Chris@0 501 d.identifier = "f0gram";
Chris@0 502 d.name = "F0gram: salience of f0s";
Chris@0 503 d.description = "This representation show the salience of the different f0s in the signal.";
Chris@0 504 d.hasFixedBinCount = true;
Chris@14 505 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
Chris@0 506 d.binNames = f0values;
Chris@0 507 d.hasKnownExtents = false;
Chris@0 508 d.isQuantized = false;
Chris@0 509 d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@0 510 d.hasDuration = false;
Chris@0 511 list.push_back(d);
Chris@0 512
Chris@0 513 return list;
Chris@0 514 }
Chris@0 515
Chris@0 516 bool
Chris@0 517 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) {
Chris@0 518 if (channels < getMinChannelCount() ||
Chris@14 519 channels > getMaxChannelCount()) {
Chris@14 520 return false;
Chris@14 521 }
Chris@0 522
Chris@0 523 // set blockSize and stepSize (but changed below)
Chris@0 524 m_blockSize = blockSize;
Chris@0 525 m_stepSize = stepSize;
Chris@0 526
Chris@0 527 // WARNING !!!
Chris@0 528 // these values in fact are determined by the sampling frequency m_fs
Chris@0 529 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz
Chris@0 530 //m_blockSize = 4 * m_warp_params.nsamps_twarp;
Chris@0 531 m_stepSize = floor(m_hop / m_warp_params.fact_over_samp);
Chris@0 532
Chris@0 533 /* initialise m_glogs_params */
Chris@7 534 design_GLogS();
Chris@0 535
Chris@0 536 /* design of FChT */
Chris@0 537 design_FChT();
Chris@0 538
Chris@7 539 design_LPF();
Chris@0 540
Chris@7 541 design_time_window();
Chris@0 542
Chris@7 543 // Create Hanning window for warped signals
Chris@14 544 mp_HanningWindow = allocate<double>(m_warp_params.nsamps_twarp);
Chris@7 545 bool normalize = false;
Chris@14 546 Utils::hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize);
Chris@0 547
Chris@0 548 return true;
Chris@0 549 }
Chris@0 550
Chris@0 551 void
Chris@0 552 FChTransformF0gram::design_GLogS() {
Chris@0 553
Chris@7 554 // total number & initial quantity of f0s
Chris@10 555 m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1;
Chris@7 556 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s;
Chris@0 557
Chris@7 558 // Initialize arrays
Chris@14 559 m_glogs_f0 = allocate<double>(m_glogs_num_f0s);
Chris@14 560 m_glogs = allocate<double>(m_glogs_num_f0s*m_warp_params.num_warps);
Chris@14 561 m_glogs_n = allocate<int>(m_glogs_num_f0s);
Chris@14 562 m_glogs_index = allocate<int>(m_glogs_num_f0s);
Chris@0 563
Chris@7 564 // Compute f0 values
Chris@7 565 m_glogs_harmonic_count = 0;
Chris@7 566 double factor = (double)(m_warp_params.nsamps_twarp/2)/(double)(m_warp_params.nsamps_twarp/2+1);
Chris@10 567 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@7 568 m_glogs_f0[i] = (m_f0_params.f0min/5.0)*pow(2.0,(double)i/(double)m_f0_params.num_f0s_per_oct);
Chris@7 569 // for every f0 compute number of partials less or equal than m_fmax.
Chris@7 570 m_glogs_n[i] = m_fmax*factor/m_glogs_f0[i];
Chris@7 571 m_glogs_index[i] = m_glogs_harmonic_count;
Chris@7 572 m_glogs_harmonic_count += m_glogs_n[i];
Chris@7 573 }
Chris@0 574
Chris@7 575 // Initialize arrays for interpolation
Chris@14 576 m_glogs_posint = allocate<int>(m_glogs_harmonic_count);
Chris@14 577 m_glogs_posfrac = allocate<double>(m_glogs_harmonic_count);
Chris@14 578 m_glogs_interp = allocate<double>(m_glogs_harmonic_count);
Chris@0 579
Chris@7 580 // Compute int & frac of interpolation positions
Chris@10 581 int aux_index = 0;
Chris@7 582 double aux_pos;
Chris@10 583 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@10 584 for (int j = 1; j <= m_glogs_n[i]; j++) {
Chris@7 585 // indice en el vector de largo t_warp/2+1 donde el ultimo valor corresponde a f=m_fmax
Chris@7 586 aux_pos = ((double)j*m_glogs_f0[i])*((double)(m_warp_params.nsamps_twarp/2+1))/m_fmax;
Chris@10 587 m_glogs_posint[aux_index] = (int)aux_pos;
Chris@7 588 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index];
Chris@7 589 aux_index++;
Chris@7 590 }
Chris@7 591 }
Chris@0 592
Chris@7 593 // Third harmonic attenuation
Chris@7 594 double aux_third_harmonic;
Chris@14 595 m_glogs_third_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14 596 m_glogs_third_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 597 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
Chris@7 598 aux_third_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(3.0);
Chris@10 599 m_glogs_third_harmonic_posint[i] = (int)aux_third_harmonic;
Chris@7 600 m_glogs_third_harmonic_posfrac[i] = aux_third_harmonic - (double)(m_glogs_third_harmonic_posint[i]);
Chris@7 601 }
Chris@14 602 m_glogs_third_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@0 603
Chris@7 604 // Fifth harmonic attenuation
Chris@7 605 double aux_fifth_harmonic;
Chris@14 606 m_glogs_fifth_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14 607 m_glogs_fifth_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 608 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
Chris@7 609 aux_fifth_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(5.0);
Chris@10 610 m_glogs_fifth_harmonic_posint[i] = (int)aux_fifth_harmonic;
Chris@7 611 m_glogs_fifth_harmonic_posfrac[i] = aux_fifth_harmonic - (double)(m_glogs_fifth_harmonic_posint[i]);
Chris@7 612 }
Chris@14 613 m_glogs_fifth_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@0 614
Chris@7 615 // Normalization & attenuation windows
Chris@14 616 m_glogs_f0_preference_weights = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@14 617 m_glogs_median_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@14 618 m_glogs_sigma_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@7 619 double MIDI_value;
Chris@10 620 for (int i = 0; i < m_f0_params.num_octs*m_f0_params.num_f0s_per_oct; i++) {
Chris@7 621 MIDI_value = 69.0 + 12.0 * log2(m_glogs_f0[i + m_glogs_init_f0s]/440.0);
Chris@7 622 m_glogs_f0_preference_weights[i] = 1.0/sqrt(2.0*M_PI*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev)*exp(-(MIDI_value-m_f0_params.prefer_mean)*(MIDI_value-m_f0_params.prefer_mean)/(2.0*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev));
Chris@7 623 m_glogs_f0_preference_weights[i] = (0.01 + m_glogs_f0_preference_weights[i]) / (1.01);
Chris@0 624
Chris@7 625 m_glogs_median_correction[i] = m_glogs_params.median_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.median_poly_coefs[1]*(i+1.0) + m_glogs_params.median_poly_coefs[2];
Chris@7 626 m_glogs_sigma_correction[i] = 1.0 / (m_glogs_params.sigma_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.sigma_poly_coefs[1]*(i+1.0) + m_glogs_params.sigma_poly_coefs[2]);
Chris@7 627 }
Chris@0 628 }
Chris@0 629
Chris@0 630 void
Chris@0 631 FChTransformF0gram::design_FChT() {
Chris@0 632
Chris@0 633 /*
Chris@0 634 * FILES FOR DEBUGGING
Chris@0 635 */
Chris@0 636
Chris@0 637 //ofstream output("output.txt");
Chris@0 638
Chris@0 639
Chris@0 640 /* ============= WARPING DESIGN ============= */
Chris@0 641
Chris@0 642 // sampling frequency after oversampling
Chris@0 643 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs;
Chris@0 644
Chris@0 645 // number of samples of the original signal frame
Chris@0 646 m_warpings.nsamps_torig = 4 * m_warp_params.fact_over_samp * m_warp_params.nsamps_twarp;
Chris@0 647 // equivalent to: m_warpings.nsamps_torig = m_warp_params.fact_over_samp * m_blockSize;
Chris@0 648
Chris@0 649 // time instants of the original signal frame
Chris@14 650 double *t_orig = allocate<double>(m_warpings.nsamps_torig);
Chris@10 651 for (int ind = 0; ind < m_warpings.nsamps_torig; ind++) {
Chris@0 652 t_orig[ind] = ((double)(ind + 1) - (double)m_warpings.nsamps_torig / 2.0) / m_warpings.fs_orig;
Chris@0 653 }
Chris@0 654
Chris@0 655 // linear chirps warping definition as relative frequency deviation
Chris@7 656 //TODO
Chris@14 657 double *freq_relative = allocate<double>(m_warpings.nsamps_torig * m_warp_params.num_warps);
Chris@0 658 define_warps_linear_chirps(freq_relative, t_orig);
Chris@0 659
Chris@0 660 // maximum relative frequency deviation
Chris@0 661 double freq_relative_max = 0;
Chris@14 662 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
Chris@14 663 for (int j = 0; j < m_warp_params.num_warps; j++) {
Chris@14 664 if (freq_relative_max < freq_relative[j * m_warpings.nsamps_torig + i]) {
Chris@0 665 freq_relative_max = freq_relative[j * m_warpings.nsamps_torig + i];
Chris@14 666 }
Chris@14 667 }
Chris@14 668 }
Chris@0 669
Chris@0 670 // sampling frequency of warped signal to be free of aliasing up to fmax
Chris@0 671 m_warpings.fs_warp = 2 * m_fmax * freq_relative_max;
Chris@0 672
Chris@0 673 // time instants of the warped signal frame
Chris@14 674 double *t_warp = allocate<double>(m_warp_params.nsamps_twarp);
Chris@10 675 for (int ind = 0; ind < m_warp_params.nsamps_twarp; ind++) {
Chris@0 676 t_warp[ind] = ((double)((int)(ind + 1)- (int)m_warp_params.nsamps_twarp / 2)) / (double)m_warpings.fs_warp;
Chris@0 677 }
Chris@0 678
Chris@0 679 // design of warpings for efficient interpolation
Chris@0 680 design_warps(freq_relative, t_orig, t_warp);
Chris@0 681
Chris@0 682
Chris@0 683 /*
Chris@0 684 * FILES FOR DEBUGGING
Chris@0 685 */
Chris@0 686
Chris@0 687 /*
Chris@7 688 output << "chirp_rates" << endl;
Chris@10 689 for (int j = 0; j < m_warp_params.num_warps; j++){
Chris@7 690 output << m_warpings.chirp_rates[j];
Chris@7 691 output << " ";
Chris@7 692 }
Chris@7 693 output << endl << "freq_relative" << endl;
Chris@0 694
Chris@10 695 for (int i = 0; i < m_warpings.nsamps_torig; i++){
Chris@10 696 for (int j = 0; j < m_warp_params.num_warps; j++){
Chris@7 697 output << freq_relative[j * m_warpings.nsamps_torig + i];
Chris@7 698 output << " ";
Chris@7 699 }
Chris@7 700 output << endl;
Chris@7 701 }
Chris@0 702
Chris@7 703 output << endl << "t_orig" << endl;
Chris@0 704
Chris@10 705 for (int i = 0; i < m_warpings.nsamps_torig; i++){
Chris@7 706 output << t_orig[i] << endl ;
Chris@7 707 }
Chris@7 708 */
Chris@0 709
Chris@14 710 deallocate(freq_relative);
Chris@14 711 deallocate(t_orig);
Chris@14 712 deallocate(t_warp);
Chris@14 713
Chris@0 714 //output.close();
Chris@0 715
Chris@0 716 /* ============= FFTW PLAN DESIGN ============= */
Chris@7 717 // Initialize 2-d array for warped signals
Chris@14 718 x_warping = allocate<double>(m_warp_params.nsamps_twarp);
Chris@14 719 m_absFanChirpTransform = allocate<double>(m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1));
Chris@14 720 m_auxFanChirpTransform = allocate<double>(2 * (m_warp_params.nsamps_twarp/2 + 1));
Chris@14 721 fft_xwarping = new FFTReal(m_warp_params.nsamps_twarp);
Chris@0 722 }
Chris@0 723
Chris@0 724 void
Chris@0 725 FChTransformF0gram::design_warps(double * freq_relative, double * t_orig, double * t_warp) {
Chris@0 726 /* the warping is done by interpolating the original signal in time instants
Chris@0 727 given by the desired frequency deviation, to do this, the interpolation
Chris@0 728 instants are stored in a structure as an integer index and a fractional value
Chris@0 729 hypothesis: sampling frequency at the central point equals the original
Chris@7 730 */
Chris@0 731
Chris@14 732 m_warpings.pos_int = allocate<int>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
Chris@14 733 m_warpings.pos_frac = allocate<double>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
Chris@0 734
Chris@7 735 // vector of phase values
Chris@14 736 double *phi = allocate<double>(m_warpings.nsamps_torig);
Chris@7 737 double aux;
Chris@0 738
Chris@7 739 // warped positions
Chris@14 740 double *pos1 = allocate<double>(m_warp_params.nsamps_twarp*m_warp_params.num_warps);
Chris@0 741
Chris@10 742 for (int i = 0; i < m_warp_params.num_warps; i++) {
Chris@0 743
Chris@7 744 // integration of relative frequency to obtain phase values
Chris@14 745 Utils::cumtrapz(t_orig, freq_relative + i*(m_warpings.nsamps_torig), m_warpings.nsamps_torig, phi);
Chris@0 746
Chris@7 747 // centering of phase values to force original frequency in the middle
Chris@7 748 aux = phi[m_warpings.nsamps_torig/2];
Chris@10 749 for (int j = 0; j < m_warpings.nsamps_torig; j++) {
Chris@7 750 phi[j] -= aux;
Chris@7 751 } //for
Chris@0 752
Chris@7 753 // interpolation of phase values to obtain warped positions
Chris@14 754 Utils::interp1(phi, t_orig, m_warpings.nsamps_torig, t_warp, pos1 + i*m_warp_params.nsamps_twarp, m_warp_params.nsamps_twarp);
Chris@0 755 }
Chris@0 756
Chris@0 757 // % previous sample index
Chris@0 758 // pos1_int = uint32(floor(pos1))';
Chris@0 759 // % integer corresponding to previous sample index in "c"
Chris@0 760 // warps.pos1_int = (pos1_int - uint32(1));
Chris@0 761 // % fractional value that defines the warped position
Chris@0 762 // warps.pos1_frac = (double(pos1)' - double(pos1_int));
Chris@0 763
Chris@10 764 for (int j = 0; j < m_warp_params.nsamps_twarp*m_warp_params.num_warps; j++) {
Chris@7 765 // previous sample index
Chris@7 766 pos1[j] = pos1[j]*m_warpings.fs_orig + m_warpings.nsamps_torig/2 + 1;
Chris@10 767 m_warpings.pos_int[j] = (int) pos1[j];
Chris@7 768 m_warpings.pos_frac[j] = pos1[j] - (double)(m_warpings.pos_int[j]);
Chris@7 769 } //for
Chris@0 770
Chris@14 771 deallocate(phi);
Chris@14 772 deallocate(pos1);
Chris@0 773 }
Chris@0 774
Chris@0 775 void
Chris@0 776 FChTransformF0gram::define_warps_linear_chirps(double * freq_relative, double * t_orig) {
Chris@0 777 /** define warps as relative frequency deviation from original frequency
Chris@7 778 t_orig : time vector
Chris@7 779 freq_relative : relative frequency deviations
Chris@7 780 */
Chris@0 781 if (m_warp_params.alpha_dist == 0) {
Chris@0 782
Chris@0 783 // linear alpha values spacing
Chris@14 784 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
Chris@0 785 // WARNING m_warp_params.num_warps must be odd
Chris@0 786 m_warpings.chirp_rates[0] = -m_warp_params.alpha_max;
Chris@0 787 double increment = (double) m_warp_params.alpha_max / ((m_warp_params.num_warps - 1) / 2);
Chris@0 788
Chris@10 789 for (int ind = 1; ind < m_warp_params.num_warps; ind++) {
Chris@0 790 m_warpings.chirp_rates[ind] = m_warpings.chirp_rates[ind - 1] + increment;
Chris@0 791 }
Chris@0 792 // force zero value
Chris@0 793 m_warpings.chirp_rates[(int) ((m_warp_params.num_warps - 1) / 2)] = 0;
Chris@0 794
Chris@0 795 } else {
Chris@0 796 // log alpha values spacing
Chris@14 797 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
Chris@0 798
Chris@0 799 // force zero value
Chris@0 800 int middle_point = (int) ((m_warp_params.num_warps - 1) / 2);
Chris@0 801 m_warpings.chirp_rates[middle_point] = 0;
Chris@0 802
Chris@0 803 double logMax = log10(m_warp_params.alpha_max + 1);
Chris@0 804 double increment = logMax / ((m_warp_params.num_warps - 1) / 2.0f);
Chris@0 805 double exponent = 0;
Chris@0 806
Chris@0 807 // fill positive values
Chris@0 808 int ind_log = middle_point;
Chris@10 809 for (int ind = 0; ind < (m_warp_params.num_warps + 1) / 2; ind++) {
Chris@0 810 m_warpings.chirp_rates[ind_log] = pow(10, exponent) - 1;
Chris@0 811 exponent += increment;
Chris@0 812 ind_log++;
Chris@0 813 }
Chris@0 814 // fill negative values
Chris@10 815 for (int ind = 0; ind < (m_warp_params.num_warps - 1) / 2; ind++) {
Chris@0 816 m_warpings.chirp_rates[ind] = -m_warpings.chirp_rates[m_warp_params.num_warps - 1 - ind];
Chris@0 817 }
Chris@0 818 }
Chris@0 819
Chris@0 820 // compute relative frequency deviation
Chris@14 821 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
Chris@14 822 for (int j = 0; j < m_warp_params.num_warps; j++) {
Chris@0 823 freq_relative[j * m_warpings.nsamps_torig + i] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
Chris@14 824 }
Chris@14 825 }
Chris@0 826 }
Chris@0 827
Chris@0 828 void
Chris@14 829 FChTransformF0gram::design_LPF()
Chris@14 830 {
Chris@14 831 double *lp_LPFWindow_aux = allocate<double>(m_blockSize/2+1);
Chris@14 832 mp_LPFWindow = allocate<double>(m_blockSize/2+1);
Chris@0 833
Chris@10 834 int i_max = (int) ((2.0*m_fmax/m_fs) * ( (double)m_blockSize / 2.0 + 1.0 ));
Chris@10 835 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@0 836 if (i >= i_max) {
Chris@0 837 lp_LPFWindow_aux[i] = 0.0;
Chris@0 838 } else {
Chris@0 839 lp_LPFWindow_aux[i] = 1.0;
Chris@0 840 }
Chris@0 841 }
Chris@14 842
Chris@14 843 LPF_time = allocate_and_zero<double>(m_warpings.nsamps_torig);
Chris@14 844 LPF_frequency = allocate_and_zero<double>(2 * (m_warpings.nsamps_torig/2 + 1));
Chris@14 845
Chris@14 846 fft_forward_LPF = new FFTReal(m_blockSize);
Chris@14 847 fft_inverse_LPF = new FFTReal(m_warpings.nsamps_torig);
Chris@0 848
Chris@10 849 int winWidth = 11;
Chris@14 850 double *lp_hanningWindow = allocate<double>(winWidth);
Chris@0 851 double accum=0;
Chris@10 852 for (int i = 0; i < winWidth; i++) {
Chris@0 853 lp_hanningWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)winWidth+1.0)));
Chris@0 854 accum+=lp_hanningWindow[i];
Chris@0 855
Chris@0 856 }
Chris@10 857 for (int i = 0; i < winWidth; i++) { //window normalization
Chris@0 858 lp_hanningWindow[i]=lp_hanningWindow[i]/accum;
Chris@0 859 }
Chris@10 860 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@0 861 //if (((i-(winWidth-1)/2)<0)||(i+(winWidth-1))/2>m_blockSize/2-1) {//consideramos winWidth impar, si la ventana sale del arreglo se rellena con el valor origianl
Chris@7 862 if ( (i > (i_max + (winWidth-1)/2)) || (i <= (i_max - (winWidth-1)/2)) ) {
Chris@0 863 mp_LPFWindow[i]=lp_LPFWindow_aux[i];
Chris@0 864 } else {
Chris@0 865 accum=0;
Chris@10 866 for (int j = -((winWidth-1)/2); j <= (winWidth-1)/2; j++) {
Chris@0 867 accum+=lp_LPFWindow_aux[i-j]*lp_hanningWindow[j+(winWidth-1)/2];
Chris@7 868 }
Chris@0 869 mp_LPFWindow[i]=accum;
Chris@0 870 }
Chris@0 871 }
Chris@0 872
Chris@14 873 deallocate(lp_LPFWindow_aux);
Chris@14 874 deallocate(lp_hanningWindow);
Chris@0 875 }
Chris@0 876
Chris@14 877 void FChTransformF0gram::apply_LPF()
Chris@14 878 {
Chris@14 879 fft_forward_LPF->forward(LPF_time, LPF_frequency);
Chris@14 880
Chris@10 881 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@14 882 LPF_frequency[i*2] *= mp_LPFWindow[i] * m_warpings.nsamps_torig;
Chris@14 883 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i] * m_warpings.nsamps_torig;
Chris@0 884 }
Chris@14 885
Chris@14 886 fft_inverse_LPF->inverse(LPF_frequency, LPF_time);
Chris@0 887
Chris@7 888 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame.
Chris@7 889 // nota: además de aplicar el LPF, esta función resamplea la señal original.
Chris@0 890 }
Chris@0 891
Chris@14 892 void FChTransformF0gram::clean_LPF()
Chris@14 893 {
Chris@14 894 delete fft_forward_LPF;
Chris@14 895 delete fft_inverse_LPF;
Chris@14 896 deallocate(LPF_time);
Chris@14 897 deallocate(LPF_frequency);
Chris@14 898 deallocate(mp_LPFWindow);
Chris@0 899 }
Chris@0 900
Chris@14 901 void FChTransformF0gram::reset()
Chris@14 902 {
Chris@0 903 }
Chris@0 904
Chris@0 905 FChTransformF0gram::FeatureSet
Chris@5 906 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) {
Chris@0 907
Chris@0 908 // // Do actual work!
Chris@0 909 //
Chris@0 910
Chris@7 911 /* PSEUDOCÓDIGO:
Chris@7 912 - Aplicar FFT al frame entero.
Chris@7 913 - Filtro pasabajos en frecuencia.
Chris@7 914 - FFT inversa al frame entero.
Chris@7 915 -----------------------------------------------------------------------------
Chris@7 916 - Para cada warp: *Si es un espectrograma direccional (un solo warp
Chris@7 917 => no es para cada warp sino para el elegido)
Chris@7 918 - Hacer la interpolación con interp1q.
Chris@7 919 - Aplicar la FFT al frame warpeado.
Chris@7 920 - (Opcional) GLogS.
Chris@7 921 - ...
Chris@7 922 */
Chris@0 923
Chris@0 924 //---------------------------------------------------------------------------
Chris@7 925 FeatureSet fs;
Chris@0 926
Chris@7 927 #ifdef DEBUG
Chris@7 928 printf("\n ----- DEBUG INFORMATION ----- \n");
Chris@7 929 printf(" m_fs = %f Hz.\n",m_fs);
Chris@7 930 printf(" fs_orig = %f Hz.\n",m_warpings.fs_orig);
Chris@7 931 printf(" fs_warp = %f Hz.\n",m_warpings.fs_warp);
Chris@7 932 printf(" m_nfft = %d.\n",m_nfft);
Chris@7 933 printf(" m_blockSize = %d.\n",m_blockSize);
Chris@7 934 printf(" m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig);
Chris@7 935 printf(" m_warp_params.num_warps = %d.\n",m_warp_params.num_warps);
Chris@7 936 printf(" m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count);
Chris@7 937 #endif
Chris@0 938
Chris@10 939 for (int i = 0; i < m_blockSize; i++) {
Chris@0 940 LPF_time[i] = (double)(inputBuffers[0][i]) * m_timeWindow[i];
Chris@0 941 }
Chris@0 942
Chris@0 943 // #ifdef DEBUG
Chris@0 944 // printf(" HASTA ACÁ ANDA!!!\n");
Chris@0 945 // cout << flush;
Chris@0 946 // #endif
Chris@0 947
Chris@7 948 apply_LPF();
Chris@7 949 // Señal filtrada queda en LPF_time
Chris@0 950
Chris@7 951 Feature feature;
Chris@0 952 feature.hasTimestamp = false;
Chris@0 953
Chris@15 954 if (m_processingMode == ModeRoughSpectrogram) {
Chris@15 955 feature.values = vector<float>(m_warp_params.nsamps_twarp/2+1, 0.f);
Chris@15 956 }
Chris@15 957
Chris@0 958 // ----------------------------------------------------------------------------------------------
Chris@0 959 // Hanning window & FFT for all warp directions
Chris@0 960
Chris@7 961 double max_glogs = -DBL_MAX;
Chris@10 962 int ind_max_glogs = 0;
Chris@0 963
Chris@10 964 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
Chris@7 965 // Interpolate
Chris@14 966 Utils::interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp);
Chris@0 967
Chris@7 968 // Apply window
Chris@10 969 for (int i = 0; i < m_warp_params.nsamps_twarp; i++) {
Chris@7 970 x_warping[i] *= mp_HanningWindow[i];
Chris@7 971 }
Chris@0 972
Chris@7 973 // Transform
Chris@14 974 fft_xwarping->forward(x_warping, m_auxFanChirpTransform);
Chris@0 975
Chris@15 976 if (m_processingMode == ModeRoughSpectrogram) {
Chris@15 977 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
Chris@15 978 double abs = sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]);
Chris@15 979 if (abs > feature.values[i]) {
Chris@15 980 feature.values[i] = abs;
Chris@15 981 }
Chris@15 982 }
Chris@15 983 continue;
Chris@15 984 }
Chris@15 985
Chris@7 986 // Copy result
Chris@7 987 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1);
Chris@10 988 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
Chris@14 989 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]));
Chris@7 990 }
Chris@0 991
Chris@0 992 // -----------------------------------------------------------------------------------------
Chris@0 993 // GLogS
Chris@14 994 Utils::interp1q(aux_abs_fcht, m_glogs_posint, m_glogs_posfrac, m_glogs_interp, m_glogs_harmonic_count);
Chris@10 995 int glogs_ind = 0;
Chris@10 996 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@7 997 double glogs_accum = 0;
Chris@10 998 for (int j = 1; j <= m_glogs_n[i]; j++) {
Chris@7 999 glogs_accum += m_glogs_interp[glogs_ind++];
Chris@7 1000 }
Chris@7 1001 m_glogs[i + i_warp*m_glogs_num_f0s] = glogs_accum/(double)m_glogs_n[i];
Chris@7 1002 }
Chris@0 1003
Chris@0 1004 // Sub/super harmonic correction
Chris@14 1005 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_third_harmonic_posint, m_glogs_third_harmonic_posfrac, m_glogs_third_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14 1006 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_fifth_harmonic_posint, m_glogs_fifth_harmonic_posfrac, m_glogs_fifth_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 1007 for (int i = m_glogs_num_f0s-1; i >= m_glogs_init_f0s; i--) {
Chris@7 1008 m_glogs[i + i_warp*m_glogs_num_f0s] -= MAX(MAX(m_glogs[i-m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s],m_glogs_third_harmonic[i-m_glogs_init_f0s]),m_glogs_fifth_harmonic[i-m_glogs_init_f0s]);
Chris@7 1009 //m_glogs[i] -= MAX(m_glogs[i-m_f0_params.num_f0s_per_oct],m_glogs_third_harmonic[i-m_glogs_init_f0s]);
Chris@7 1010 }
Chris@10 1011 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
Chris@7 1012 m_glogs[i + i_warp*m_glogs_num_f0s] -= 0.3*m_glogs[i+m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s];
Chris@7 1013 // Median, sigma $ weights correction
Chris@7 1014 m_glogs[i + i_warp*m_glogs_num_f0s] = (m_glogs[i + i_warp*m_glogs_num_f0s]-m_glogs_median_correction[i-m_glogs_init_f0s])*m_glogs_sigma_correction[i-m_glogs_init_f0s]*m_glogs_f0_preference_weights[i-m_glogs_init_f0s];
Chris@7 1015 }
Chris@0 1016
Chris@7 1017 // Look for maximum value to determine best direction
Chris@10 1018 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
Chris@7 1019 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
Chris@7 1020 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
Chris@7 1021 ind_max_glogs = i_warp;
Chris@7 1022 }
Chris@7 1023 }
Chris@7 1024 }
Chris@0 1025
Chris@15 1026 if (m_processingMode == ModeRoughSpectrogram) {
Chris@15 1027
Chris@15 1028 // already accumulated our return values in feature
Chris@15 1029
Chris@15 1030 } else if (m_processingMode == ModeSpectrogram) {
Chris@15 1031
Chris@15 1032 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) {
Chris@15 1033 feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0);
Chris@15 1034 }
Chris@15 1035
Chris@15 1036 } else { // f0gram
Chris@15 1037
Chris@15 1038 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) {
Chris@15 1039 switch (m_f0gram_mode) {
Chris@15 1040 case AllBinsOfBestDirection:
Chris@15 1041 feature.values.push_back((float)m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s]);
Chris@15 1042 break;
Chris@15 1043 case BestBinOfAllDirections:
Chris@15 1044 max_glogs = -DBL_MAX;
Chris@15 1045 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
Chris@15 1046 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
Chris@15 1047 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
Chris@15 1048 ind_max_glogs = i_warp;
Chris@15 1049 }
Chris@7 1050 }
Chris@15 1051 feature.values.push_back((float)max_glogs);
Chris@15 1052 break;
Chris@7 1053 }
Chris@7 1054 }
Chris@7 1055 }
Chris@0 1056
Chris@7 1057 fs[0].push_back(feature);
Chris@7 1058 return fs;
Chris@0 1059 }
Chris@0 1060
Chris@0 1061 FChTransformF0gram::FeatureSet
Chris@0 1062 FChTransformF0gram::getRemainingFeatures() {
Chris@0 1063 return FeatureSet();
Chris@0 1064 }
Chris@0 1065
Chris@0 1066 void
Chris@0 1067 FChTransformF0gram::design_time_window() {
Chris@0 1068
Chris@10 1069 int transitionWidth = (int)m_blockSize/128 + 1;;
Chris@14 1070 m_timeWindow = allocate<double>(m_blockSize);
Chris@14 1071 double *lp_transitionWindow = allocate<double>(transitionWidth);
Chris@0 1072
Chris@7 1073 //memset(m_timeWindow, 1.0, m_blockSize);
Chris@10 1074 for (int i = 0; i < m_blockSize; i++) {
Chris@7 1075 m_timeWindow[i] = 1.0;
Chris@7 1076 }
Chris@0 1077
Chris@10 1078 for (int i = 0; i < transitionWidth; i++) {
Chris@0 1079 lp_transitionWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)transitionWidth+1.0)));
Chris@0 1080 }
Chris@0 1081
Chris@10 1082 for (int i = 0; i < transitionWidth/2; i++) {
Chris@7 1083 m_timeWindow[i] = lp_transitionWindow[i];
Chris@7 1084 m_timeWindow[m_blockSize-1-i] = lp_transitionWindow[transitionWidth-1-i];
Chris@7 1085 }
Chris@0 1086
Chris@7 1087 #ifdef DEBUG
Chris@7 1088 for (int i = 0; i < m_blockSize; i++) {
Chris@7 1089 if ((i<transitionWidth)) {
Chris@7 1090 printf(" m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]);
Chris@7 1091 }
Chris@7 1092 }
Chris@7 1093 #endif
Chris@0 1094
Chris@14 1095 deallocate(lp_transitionWindow);
Chris@0 1096 }
Chris@0 1097