annotate FChTransformF0gram.cpp @ 22:a1879532385e spect

Connect up an interesting parameter that was exposed but not connected
author Chris Cannam
date Thu, 04 Oct 2018 14:56:41 +0100
parents 37917af73ae9
children 2c54d83a196f
rev   line source
Chris@0 1 /*
Chris@0 2 copyright (C) 2011 I. Irigaray, M. Rocamora
Chris@0 3
Chris@0 4 This program is free software: you can redistribute it and/or modify
Chris@0 5 it under the terms of the GNU General Public License as published by
Chris@0 6 the Free Software Foundation, either version 3 of the License, or
Chris@0 7 (at your option) any later version.
Chris@0 8
Chris@0 9 This program is distributed in the hope that it will be useful,
Chris@0 10 but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@0 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@0 12 GNU General Public License for more details.
Chris@0 13
Chris@0 14 You should have received a copy of the GNU General Public License
Chris@0 15 along with this program. If not, see <http://www.gnu.org/licenses/>.
Chris@7 16 */
Chris@0 17
Chris@0 18 #include "FChTransformF0gram.h"
Chris@0 19 #include "FChTransformUtils.h"
Chris@0 20 #include <math.h>
Chris@0 21 #include <float.h>
Chris@14 22
Chris@19 23 #include <set>
Chris@19 24
Chris@14 25 #include "bqvec/Allocators.h"
Chris@14 26
Chris@14 27 using namespace breakfastquay;
Chris@14 28
Chris@21 29 //#define DEBUG
Chris@7 30
Chris@0 31 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
Chris@0 32
Chris@15 33 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode,
Chris@15 34 float inputSampleRate) :
Chris@7 35 Plugin(inputSampleRate),
Chris@15 36 m_processingMode(mode),
Chris@20 37 m_initialised(false),
Chris@20 38 m_stepSize(256),
Chris@20 39 m_blockSize(8192) {
Chris@0 40
Chris@0 41 m_fs = inputSampleRate;
Chris@0 42 // max frequency of interest (Hz)
Chris@0 43 m_fmax = 10000.f;
Chris@0 44 // warping parameters
Chris@12 45 m_warp_params.nsamps_twarp = 2048;
Chris@0 46 m_warp_params.alpha_max = 4;
Chris@0 47 m_warp_params.num_warps = 21;
Chris@0 48 m_warp_params.fact_over_samp = 2;
Chris@0 49 m_warp_params.alpha_dist = 0;
Chris@0 50 // f0 parameters
Chris@0 51 m_f0_params.f0min = 80.0;
Chris@0 52 m_f0_params.num_octs = 4;
Chris@0 53 m_f0_params.num_f0s_per_oct = 192;
Chris@0 54 m_f0_params.num_f0_hyps = 5;
Chris@0 55 m_f0_params.prefer = true;
Chris@0 56 m_f0_params.prefer_mean = 60;
Chris@0 57 m_f0_params.prefer_stdev = 18;
Chris@0 58 // glogs parameters
Chris@0 59 m_glogs_params.HP_logS = true;
Chris@0 60 m_glogs_params.att_subharms = 1;
Chris@7 61 // display parameters
Chris@15 62 m_f0gram_mode = BestBinOfAllDirections;
Chris@0 63
Chris@0 64 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
Chris@0 65 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
Chris@0 66 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
Chris@0 67
Chris@0 68 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
Chris@0 69 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
Chris@0 70 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
Chris@0 71
Chris@0 72 // number of fft points (controls zero-padding)
Chris@0 73 m_nfft = m_warp_params.nsamps_twarp;
Chris@0 74 // hop in samples
Chris@0 75 m_hop = m_warp_params.fact_over_samp * 256;
Chris@0 76
Chris@0 77 m_num_f0s = 0;
Chris@16 78 m_f0s = 0;
Chris@0 79 }
Chris@0 80
Chris@14 81 FChTransformF0gram::~FChTransformF0gram()
Chris@14 82 {
Chris@20 83 if (!m_initialised) {
Chris@14 84 return; // nothing was allocated
Chris@14 85 }
Chris@14 86
Chris@20 87 deallocate(m_inputBuffer);
Chris@20 88
Chris@14 89 deallocate(m_warpings.pos_int);
Chris@14 90 deallocate(m_warpings.pos_frac);
Chris@14 91 deallocate(m_warpings.chirp_rates);
Chris@14 92
Chris@14 93 clean_LPF();
Chris@14 94
Chris@14 95 deallocate(m_timeWindow);
Chris@14 96
Chris@14 97 deallocate(mp_HanningWindow);
Chris@14 98
Chris@14 99 // Warping
Chris@14 100 deallocate(x_warping);
Chris@14 101 delete fft_xwarping;
Chris@14 102 deallocate(m_absFanChirpTransform);
Chris@14 103 deallocate(m_auxFanChirpTransform);
Chris@14 104
Chris@14 105 // design_GLogS
Chris@14 106 deallocate(m_glogs_f0);
Chris@14 107 deallocate(m_glogs);
Chris@14 108 deallocate(m_glogs_n);
Chris@14 109 deallocate(m_glogs_index);
Chris@14 110 deallocate(m_glogs_posint);
Chris@14 111 deallocate(m_glogs_posfrac);
Chris@14 112 deallocate(m_glogs_interp);
Chris@14 113 deallocate(m_glogs_third_harmonic_posint);
Chris@14 114 deallocate(m_glogs_third_harmonic_posfrac);
Chris@14 115 deallocate(m_glogs_third_harmonic);
Chris@14 116 deallocate(m_glogs_fifth_harmonic_posint);
Chris@14 117 deallocate(m_glogs_fifth_harmonic_posfrac);
Chris@14 118 deallocate(m_glogs_fifth_harmonic);
Chris@14 119 deallocate(m_glogs_f0_preference_weights);
Chris@14 120 deallocate(m_glogs_median_correction);
Chris@14 121 deallocate(m_glogs_sigma_correction);
Chris@16 122
Chris@16 123 deallocate(m_f0s);
Chris@0 124 }
Chris@0 125
Chris@0 126 string
Chris@0 127 FChTransformF0gram::getIdentifier() const {
Chris@15 128 switch (m_processingMode) {
Chris@15 129 case ModeF0Gram: return "fchtransformf0gram";
Chris@15 130 case ModeSpectrogram: return "fchtransformspectrogram";
Chris@15 131 case ModeRoughSpectrogram: return "fchtransformrough";
Chris@15 132 }
Chris@17 133 throw std::logic_error("unknown mode");
Chris@0 134 }
Chris@0 135
Chris@0 136 string
Chris@0 137 FChTransformF0gram::getName() const {
Chris@15 138 switch (m_processingMode) {
Chris@15 139 case ModeF0Gram: return "Fan Chirp Transform F0gram";
Chris@15 140 case ModeSpectrogram: return "Fan Chirp Transform Spectrogram";
Chris@15 141 case ModeRoughSpectrogram: return "Fan Chirp Transform Rough Spectrogram";
Chris@15 142 }
Chris@17 143 throw std::logic_error("unknown mode");
Chris@0 144 }
Chris@0 145
Chris@0 146 string
Chris@0 147 FChTransformF0gram::getDescription() const {
Chris@15 148 switch (m_processingMode) {
Chris@15 149 case ModeF0Gram:
Chris@15 150 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010.";
Chris@15 151 case ModeSpectrogram:
Chris@15 152 return "This plug-in produces a spectral representation of the audio using Fan Chirp Transform analysis.";
Chris@15 153 case ModeRoughSpectrogram:
Chris@15 154 return "This plug-in produces a more approximate spectral representation of the audio using Fan Chirp Transform analysis.";
Chris@15 155 }
Chris@17 156 throw std::logic_error("unknown mode");
Chris@0 157 }
Chris@0 158
Chris@0 159 string
Chris@0 160 FChTransformF0gram::getMaker() const {
Chris@0 161 // Your name here
Chris@0 162 return "Audio Processing Group \n Universidad de la Republica";
Chris@0 163 }
Chris@0 164
Chris@0 165 int
Chris@0 166 FChTransformF0gram::getPluginVersion() const {
Chris@0 167 // Increment this each time you release a version that behaves
Chris@0 168 // differently from the previous one
Chris@0 169 //
Chris@0 170 // 0 - initial version from scratch
Chris@15 171 return 1;
Chris@0 172 }
Chris@0 173
Chris@0 174 string
Chris@0 175 FChTransformF0gram::getCopyright() const {
Chris@0 176 // This function is not ideally named. It does not necessarily
Chris@0 177 // need to say who made the plugin -- getMaker does that -- but it
Chris@0 178 // should indicate the terms under which it is distributed. For
Chris@0 179 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@0 180 return "copyright (C) 2011 GPL - Audio Processing Group, UdelaR";
Chris@0 181 }
Chris@0 182
Chris@0 183 FChTransformF0gram::InputDomain
Chris@0 184 FChTransformF0gram::getInputDomain() const {
Chris@0 185 return TimeDomain;
Chris@0 186 }
Chris@0 187
Chris@0 188 size_t FChTransformF0gram::getPreferredBlockSize() const {
Chris@20 189 // We do our own accumulating into blocks within process()
Chris@20 190 return m_blockSize/2;
Chris@0 191 }
Chris@0 192
Chris@0 193 size_t
Chris@0 194 FChTransformF0gram::getPreferredStepSize() const {
Chris@20 195 return m_stepSize;
Chris@0 196 }
Chris@0 197
Chris@0 198 size_t
Chris@0 199 FChTransformF0gram::getMinChannelCount() const {
Chris@0 200 return 1;
Chris@0 201 }
Chris@0 202
Chris@0 203 size_t
Chris@0 204 FChTransformF0gram::getMaxChannelCount() const {
Chris@0 205 return 1;
Chris@0 206 }
Chris@0 207
Chris@0 208 FChTransformF0gram::ParameterList
Chris@0 209 FChTransformF0gram::getParameterDescriptors() const {
Chris@0 210 ParameterList list;
Chris@0 211
Chris@0 212 // If the plugin has no adjustable parameters, return an empty
Chris@0 213 // list here (and there's no need to provide implementations of
Chris@0 214 // getParameter and setParameter in that case either).
Chris@0 215
Chris@0 216 // Note that it is your responsibility to make sure the parameters
Chris@0 217 // start off having their default values (e.g. in the constructor
Chris@0 218 // above). The host needs to know the default value so it can do
Chris@0 219 // things like provide a "reset to default" function, but it will
Chris@0 220 // not explicitly set your parameters to their defaults for you if
Chris@0 221 // they have not changed in the mean time.
Chris@0 222
Chris@0 223 // ============= WARPING PARAMETERS =============
Chris@0 224
Chris@0 225 ParameterDescriptor fmax;
Chris@0 226 fmax.identifier = "fmax";
Chris@0 227 fmax.name = "Maximum frequency";
Chris@0 228 fmax.description = "Maximum frequency of interest for the analysis.";
Chris@0 229 fmax.unit = "Hz";
Chris@0 230 fmax.minValue = 2000;
Chris@0 231 fmax.maxValue = 22050;
Chris@0 232 fmax.defaultValue = 10000;
Chris@0 233 fmax.isQuantized = true;
Chris@0 234 fmax.quantizeStep = 1.0;
Chris@0 235 list.push_back(fmax);
Chris@0 236
Chris@0 237 ParameterDescriptor nsamp;
Chris@0 238 nsamp.identifier = "nsamp";
Chris@0 239 nsamp.name = "Number of samples";
Chris@0 240 nsamp.description = "Number of samples of the time warped frame";
Chris@0 241 nsamp.unit = "samples";
Chris@0 242 nsamp.minValue = 128;
Chris@0 243 nsamp.maxValue = 4096;
Chris@0 244 nsamp.defaultValue = 2048;
Chris@0 245 nsamp.isQuantized = true;
Chris@0 246 nsamp.quantizeStep = 1.0;
Chris@0 247 list.push_back(nsamp);
Chris@0 248
Chris@0 249 ParameterDescriptor nfft;
Chris@0 250 nfft.identifier = "nfft";
Chris@0 251 nfft.name = "FFT number of points";
Chris@0 252 nfft.description = "Number of FFT points (controls zero-padding)";
Chris@0 253 nfft.unit = "samples";
Chris@0 254 nfft.minValue = 0;
Chris@0 255 nfft.maxValue = 4;
Chris@0 256 nfft.defaultValue = 3;
Chris@0 257 nfft.isQuantized = true;
Chris@0 258 nfft.quantizeStep = 1.0;
Chris@0 259 nfft.valueNames.push_back("256");
Chris@0 260 nfft.valueNames.push_back("512");
Chris@0 261 nfft.valueNames.push_back("1024");
Chris@0 262 nfft.valueNames.push_back("2048");
Chris@0 263 nfft.valueNames.push_back("4096");
Chris@0 264 nfft.valueNames.push_back("8192");
Chris@0 265 list.push_back(nfft);
Chris@0 266
Chris@0 267 ParameterDescriptor alpha_max;
Chris@0 268 alpha_max.identifier = "alpha_max";
Chris@0 269 alpha_max.name = "Maximum alpha value";
Chris@0 270 alpha_max.description = "Maximum value for the alpha parameter of the transform.";
Chris@0 271 alpha_max.unit = "Hz/s";
Chris@0 272 alpha_max.minValue = -10;
Chris@0 273 alpha_max.maxValue = 10;
Chris@0 274 alpha_max.defaultValue = 5;
Chris@0 275 alpha_max.isQuantized = true;
Chris@0 276 alpha_max.quantizeStep = 1.0;
Chris@0 277 list.push_back(alpha_max);
Chris@0 278
Chris@0 279 ParameterDescriptor num_warps;
Chris@0 280 num_warps.identifier = "num_warps";
Chris@0 281 num_warps.name = "Number of warpings";
Chris@0 282 num_warps.description = "Number of different warpings in the specified range (must be odd).";
Chris@0 283 num_warps.unit = "";
Chris@0 284 num_warps.minValue = 1;
Chris@0 285 num_warps.maxValue = 101;
Chris@0 286 num_warps.defaultValue = 21;
Chris@0 287 num_warps.isQuantized = true;
Chris@0 288 num_warps.quantizeStep = 2.0;
Chris@0 289 list.push_back(num_warps);
Chris@0 290
Chris@0 291 ParameterDescriptor alpha_dist;
Chris@0 292 alpha_dist.identifier = "alpha_dist";
Chris@0 293 alpha_dist.name = "alpha distribution";
Chris@0 294 alpha_dist.description = "Type of distribution of alpha values (linear or log).";
Chris@0 295 alpha_dist.unit = "";
Chris@0 296 alpha_dist.minValue = 0;
Chris@0 297 alpha_dist.maxValue = 1;
Chris@0 298 alpha_dist.defaultValue = 1;
Chris@0 299 alpha_dist.isQuantized = true;
Chris@0 300 alpha_dist.quantizeStep = 1.0;
Chris@0 301 // lin (0), log (1)
Chris@0 302 alpha_dist.valueNames.push_back("lin");
Chris@0 303 alpha_dist.valueNames.push_back("log");
Chris@0 304 list.push_back(alpha_dist);
Chris@0 305
Chris@0 306 // ============= F0-GRAM PARAMETERS =============
Chris@0 307
Chris@0 308 ParameterDescriptor f0min;
Chris@0 309 f0min.identifier = "f0min";
Chris@0 310 f0min.name = "min f0";
Chris@0 311 f0min.description = "Minimum fundamental frequency (f0) value.";
Chris@0 312 f0min.unit = "Hz";
Chris@0 313 f0min.minValue = 1;
Chris@0 314 f0min.maxValue = 500;
Chris@0 315 f0min.defaultValue = 80;
Chris@0 316 f0min.isQuantized = true;
Chris@0 317 f0min.quantizeStep = 1.0;
Chris@0 318 list.push_back(f0min);
Chris@0 319
Chris@0 320 ParameterDescriptor num_octs;
Chris@0 321 num_octs.identifier = "num_octs";
Chris@0 322 num_octs.name = "number of octaves";
Chris@0 323 num_octs.description = "Number of octaves for F0gram computation.";
Chris@0 324 num_octs.unit = "";
Chris@0 325 num_octs.minValue = 1;
Chris@0 326 num_octs.maxValue = 10;
Chris@0 327 num_octs.defaultValue = 4;
Chris@0 328 num_octs.isQuantized = true;
Chris@0 329 num_octs.quantizeStep = 1.0;
Chris@0 330 list.push_back(num_octs);
Chris@0 331
Chris@0 332 ParameterDescriptor num_f0_hyps;
Chris@0 333 num_f0_hyps.identifier = "num_f0_hyps";
Chris@0 334 num_f0_hyps.name = "number of f0 hypotesis";
Chris@0 335 num_f0_hyps.description = "Number of f0 hypotesis to extract.";
Chris@0 336 num_f0_hyps.unit = "";
Chris@0 337 num_f0_hyps.minValue = 1;
Chris@0 338 num_f0_hyps.maxValue = 100;
Chris@0 339 num_f0_hyps.defaultValue = 10;
Chris@0 340 num_f0_hyps.isQuantized = true;
Chris@0 341 num_f0_hyps.quantizeStep = 1.0;
Chris@0 342 list.push_back(num_f0_hyps);
Chris@0 343
Chris@0 344 ParameterDescriptor f0s_per_oct;
Chris@0 345 f0s_per_oct.identifier = "f0s_per_oct";
Chris@0 346 f0s_per_oct.name = "f0 values per octave";
Chris@0 347 f0s_per_oct.description = "Number of f0 values per octave.";
Chris@0 348 f0s_per_oct.unit = "";
Chris@0 349 f0s_per_oct.minValue = 12;
Chris@0 350 f0s_per_oct.maxValue = 768;
Chris@0 351 f0s_per_oct.defaultValue = 192;
Chris@0 352 f0s_per_oct.isQuantized = true;
Chris@0 353 f0s_per_oct.quantizeStep = 1.0;
Chris@0 354 list.push_back(f0s_per_oct);
Chris@0 355
Chris@0 356 ParameterDescriptor f0_prefer_fun;
Chris@0 357 f0_prefer_fun.identifier = "f0_prefer_fun";
Chris@22 358 f0_prefer_fun.name = "Use f0 weighting";
Chris@22 359 f0_prefer_fun.description = "Whether to use a f0 weighting function to prefer frequencies nearer a mean value.";
Chris@0 360 f0_prefer_fun.unit = "";
Chris@0 361 f0_prefer_fun.minValue = 0;
Chris@0 362 f0_prefer_fun.maxValue = 1;
Chris@0 363 f0_prefer_fun.defaultValue = 1;
Chris@0 364 f0_prefer_fun.isQuantized = true;
Chris@0 365 f0_prefer_fun.quantizeStep = 1.0;
Chris@0 366 list.push_back(f0_prefer_fun);
Chris@0 367
Chris@0 368 ParameterDescriptor f0_prefer_mean;
Chris@0 369 f0_prefer_mean.identifier = "f0_prefer_mean";
Chris@22 370 f0_prefer_mean.name = "Mean pitch for f0 weighting";
Chris@0 371 f0_prefer_mean.description = "Mean value for f0 weighting function (MIDI number).";
Chris@0 372 f0_prefer_mean.unit = "";
Chris@0 373 f0_prefer_mean.minValue = 1;
Chris@0 374 f0_prefer_mean.maxValue = 127;
Chris@0 375 f0_prefer_mean.defaultValue = 60;
Chris@0 376 f0_prefer_mean.isQuantized = true;
Chris@0 377 f0_prefer_mean.quantizeStep = 1.0;
Chris@0 378 list.push_back(f0_prefer_mean);
Chris@0 379
Chris@0 380 ParameterDescriptor f0_prefer_stdev;
Chris@0 381 f0_prefer_stdev.identifier = "f0_prefer_stdev";
Chris@22 382 f0_prefer_stdev.name = "Stdev for f0 weighting";
Chris@22 383 f0_prefer_stdev.description = "Standard deviation for f0 weighting function (MIDI number).";
Chris@0 384 f0_prefer_stdev.unit = "";
Chris@0 385 f0_prefer_stdev.minValue = 1;
Chris@0 386 f0_prefer_stdev.maxValue = 127;
Chris@0 387 f0_prefer_stdev.defaultValue = 18;
Chris@0 388 f0_prefer_stdev.isQuantized = true;
Chris@0 389 f0_prefer_stdev.quantizeStep = 1.0;
Chris@0 390 list.push_back(f0_prefer_stdev);
Chris@0 391
Chris@0 392 ParameterDescriptor f0gram_mode;
Chris@0 393 f0gram_mode.identifier = "f0gram_mode";
Chris@0 394 f0gram_mode.name = "display mode of f0gram";
Chris@0 395 f0gram_mode.description = "Display all bins of the best direction, or the best bin for each direction.";
Chris@0 396 f0gram_mode.unit = "";
Chris@0 397 f0gram_mode.minValue = 0;
Chris@0 398 f0gram_mode.maxValue = 1;
Chris@0 399 f0gram_mode.defaultValue = 1;
Chris@0 400 f0gram_mode.isQuantized = true;
Chris@0 401 f0gram_mode.quantizeStep = 1.0;
Chris@0 402 list.push_back(f0gram_mode);
Chris@0 403
Chris@0 404 return list;
Chris@0 405 }
Chris@0 406
Chris@0 407 float
Chris@0 408 FChTransformF0gram::getParameter(string identifier) const {
Chris@0 409
Chris@0 410 if (identifier == "fmax") {
Chris@0 411 return m_fmax;
Chris@0 412 } else if (identifier == "nsamp") {
Chris@0 413 return m_warp_params.nsamps_twarp;
Chris@0 414 } else if (identifier == "alpha_max") {
Chris@0 415 return m_warp_params.alpha_max;
Chris@0 416 } else if (identifier == "num_warps") {
Chris@0 417 return m_warp_params.num_warps;
Chris@0 418 } else if (identifier == "alpha_dist") {
Chris@0 419 return m_warp_params.alpha_dist;
Chris@0 420 } else if (identifier == "nfft") {
Chris@0 421 return m_nfft;
Chris@0 422 } else if (identifier == "f0min") {
Chris@0 423 return m_f0_params.f0min;
Chris@0 424 } else if (identifier == "num_octs") {
Chris@0 425 return m_f0_params.num_octs;
Chris@0 426 } else if (identifier == "f0s_per_oct") {
Chris@0 427 return m_f0_params.num_f0s_per_oct;
Chris@0 428 } else if (identifier == "num_f0_hyps") {
Chris@0 429 return m_f0_params.num_f0_hyps;
Chris@0 430 } else if (identifier == "f0_prefer_fun") {
Chris@22 431 return m_f0_params.prefer ? 1.0 : 0.0;
Chris@0 432 } else if (identifier == "f0_prefer_mean") {
Chris@0 433 return m_f0_params.prefer_mean;
Chris@0 434 } else if (identifier == "f0_prefer_stdev") {
Chris@0 435 return m_f0_params.prefer_stdev;
Chris@7 436 } else if (identifier == "f0gram_mode") {
Chris@15 437 return m_f0gram_mode == BestBinOfAllDirections ? 1.0 : 0.0;
Chris@0 438 } else {
Chris@0 439 return 0.f;
Chris@0 440 }
Chris@0 441
Chris@0 442 }
Chris@0 443
Chris@15 444 void FChTransformF0gram::setParameter(string identifier, float value)
Chris@15 445 {
Chris@0 446 if (identifier == "fmax") {
Chris@0 447 m_fmax = value;
Chris@0 448 } else if (identifier == "nsamp") {
Chris@0 449 m_warp_params.nsamps_twarp = value;
Chris@0 450 } else if (identifier == "alpha_max") {
Chris@0 451 m_warp_params.alpha_max = value;
Chris@0 452 } else if (identifier == "num_warps") {
Chris@0 453 m_warp_params.num_warps = value;
Chris@0 454 } else if (identifier == "alpha_dist") {
Chris@0 455 m_warp_params.alpha_dist = value;
Chris@0 456 } else if (identifier == "nfft") {
Chris@0 457 m_nfft = value;
Chris@0 458 } else if (identifier == "f0min") {
Chris@0 459 m_f0_params.f0min = value;
Chris@0 460 } else if (identifier == "num_octs") {
Chris@0 461 m_f0_params.num_octs = value;
Chris@0 462 } else if (identifier == "f0s_per_oct") {
Chris@0 463 m_f0_params.num_f0s_per_oct = value;
Chris@0 464 } else if (identifier == "num_f0_hyps") {
Chris@0 465 m_f0_params.num_f0_hyps = value;
Chris@0 466 } else if (identifier == "f0_prefer_fun") {
Chris@22 467 m_f0_params.prefer = (value > 0.5);
Chris@0 468 } else if (identifier == "f0_prefer_mean") {
Chris@0 469 m_f0_params.prefer_mean = value;
Chris@0 470 } else if (identifier == "f0_prefer_stdev") {
Chris@0 471 m_f0_params.prefer_stdev = value;
Chris@0 472 } else if (identifier == "f0gram_mode") {
Chris@15 473 m_f0gram_mode = (value > 0.5 ?
Chris@15 474 BestBinOfAllDirections :
Chris@15 475 AllBinsOfBestDirection);
Chris@15 476 } else {
Chris@15 477 cerr << "WARNING: Unknown parameter id \""
Chris@15 478 << identifier << "\"" << endl;
Chris@0 479 }
Chris@0 480 }
Chris@0 481
Chris@0 482 FChTransformF0gram::ProgramList
Chris@0 483 FChTransformF0gram::getPrograms() const {
Chris@0 484 ProgramList list;
Chris@0 485 return list;
Chris@0 486 }
Chris@0 487
Chris@0 488 FChTransformF0gram::OutputList
Chris@0 489 FChTransformF0gram::getOutputDescriptors() const {
Chris@0 490
Chris@0 491 OutputList list;
Chris@0 492
Chris@16 493 vector<string> labels;
Chris@16 494 char label[100];
Chris@0 495
Chris@16 496 if (m_processingMode == ModeF0Gram) {
Chris@16 497
Chris@16 498 /* f0 values of F0gram grid as string values */
Chris@16 499 for (int i = 0; i < m_num_f0s; ++i) {
Chris@16 500 sprintf(label, "%4.2f Hz", m_f0s[i]);
Chris@16 501 labels.push_back(label);
Chris@16 502 }
Chris@16 503
Chris@16 504 /* The F0gram */
Chris@16 505 OutputDescriptor d;
Chris@16 506 d.identifier = "f0gram";
Chris@19 507 d.name = "F0gram";
Chris@19 508 d.description = "The salience of the different f0s in the signal.";
Chris@16 509 d.hasFixedBinCount = true;
Chris@16 510 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
Chris@16 511 d.binNames = labels;
Chris@16 512 d.hasKnownExtents = false;
Chris@16 513 d.isQuantized = false;
Chris@16 514 d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@16 515 d.hasDuration = false;
Chris@16 516 list.push_back(d);
Chris@16 517
Chris@19 518 d.identifier = "pitch";
Chris@19 519 d.name = "Most salient pitch";
Chris@19 520 d.description = "The most salient f0 in the signal for each time step.";
Chris@19 521 d.unit = "Hz";
Chris@19 522 d.hasFixedBinCount = true;
Chris@19 523 d.binCount = 1;
Chris@19 524 d.binNames.clear();
Chris@19 525 d.hasKnownExtents = false;
Chris@19 526 d.isQuantized = false;
Chris@19 527 d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@19 528 d.hasDuration = false;
Chris@19 529 list.push_back(d);
Chris@19 530
Chris@16 531 } else {
Chris@16 532
Chris@16 533 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; ++i) {
Chris@16 534 double freq = i * (m_warpings.fs_warp / m_nfft);
Chris@16 535 sprintf(label, "%4.2f Hz", freq);
Chris@16 536 labels.push_back(label);
Chris@16 537 }
Chris@16 538
Chris@16 539 OutputDescriptor d;
Chris@16 540 d.identifier = "spectrogram";
Chris@16 541 d.name = "Spectrogram";
Chris@16 542 d.description = "Time/frequency spectrogram derived from the Fan Chirp Transform output";
Chris@16 543 d.hasFixedBinCount = true;
Chris@16 544 d.binCount = m_warp_params.nsamps_twarp/2+1;
Chris@16 545 d.binNames = labels;
Chris@16 546 d.hasKnownExtents = false;
Chris@16 547 d.isQuantized = false;
Chris@16 548 d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@16 549 d.hasDuration = false;
Chris@16 550 list.push_back(d);
Chris@0 551 }
Chris@16 552
Chris@0 553 return list;
Chris@0 554 }
Chris@0 555
Chris@0 556 bool
Chris@0 557 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) {
Chris@0 558 if (channels < getMinChannelCount() ||
Chris@20 559 channels > getMaxChannelCount() ||
Chris@20 560 blockSize != m_blockSize/2 ||
Chris@20 561 stepSize != m_stepSize) {
Chris@14 562 return false;
Chris@14 563 }
Chris@0 564
Chris@20 565 m_inputBuffer = allocate_and_zero<float>(m_blockSize);
Chris@20 566
Chris@0 567 // WARNING !!!
Chris@0 568 // these values in fact are determined by the sampling frequency m_fs
Chris@0 569 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz
Chris@0 570 //m_blockSize = 4 * m_warp_params.nsamps_twarp;
Chris@16 571 // m_stepSize = floor(m_hop / m_warp_params.fact_over_samp);
Chris@16 572
Chris@16 573 /* design of FChT */
Chris@16 574 design_FChT();
Chris@0 575
Chris@0 576 /* initialise m_glogs_params */
Chris@7 577 design_GLogS();
Chris@0 578
Chris@7 579 design_LPF();
Chris@0 580
Chris@7 581 design_time_window();
Chris@0 582
Chris@7 583 // Create Hanning window for warped signals
Chris@14 584 mp_HanningWindow = allocate<double>(m_warp_params.nsamps_twarp);
Chris@7 585 bool normalize = false;
Chris@14 586 Utils::hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize);
Chris@0 587
Chris@16 588 m_num_f0s = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
Chris@16 589 m_f0s = allocate<double>(m_num_f0s);
Chris@16 590 for (int i = 0; i < m_num_f0s; ++i) {
Chris@16 591 m_f0s[i] = m_glogs_f0[m_glogs_init_f0s + i];
Chris@16 592 }
Chris@20 593
Chris@20 594 m_initialised = true;
Chris@0 595 return true;
Chris@0 596 }
Chris@0 597
Chris@0 598 void
Chris@0 599 FChTransformF0gram::design_GLogS() {
Chris@0 600
Chris@7 601 // total number & initial quantity of f0s
Chris@16 602
Chris@10 603 m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1;
Chris@7 604 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s;
Chris@0 605
Chris@7 606 // Initialize arrays
Chris@14 607 m_glogs_f0 = allocate<double>(m_glogs_num_f0s);
Chris@14 608 m_glogs = allocate<double>(m_glogs_num_f0s*m_warp_params.num_warps);
Chris@14 609 m_glogs_n = allocate<int>(m_glogs_num_f0s);
Chris@14 610 m_glogs_index = allocate<int>(m_glogs_num_f0s);
Chris@0 611
Chris@7 612 // Compute f0 values
Chris@7 613 m_glogs_harmonic_count = 0;
Chris@7 614 double factor = (double)(m_warp_params.nsamps_twarp/2)/(double)(m_warp_params.nsamps_twarp/2+1);
Chris@10 615 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@7 616 m_glogs_f0[i] = (m_f0_params.f0min/5.0)*pow(2.0,(double)i/(double)m_f0_params.num_f0s_per_oct);
Chris@7 617 // for every f0 compute number of partials less or equal than m_fmax.
Chris@7 618 m_glogs_n[i] = m_fmax*factor/m_glogs_f0[i];
Chris@7 619 m_glogs_index[i] = m_glogs_harmonic_count;
Chris@7 620 m_glogs_harmonic_count += m_glogs_n[i];
Chris@7 621 }
Chris@0 622
Chris@7 623 // Initialize arrays for interpolation
Chris@14 624 m_glogs_posint = allocate<int>(m_glogs_harmonic_count);
Chris@14 625 m_glogs_posfrac = allocate<double>(m_glogs_harmonic_count);
Chris@14 626 m_glogs_interp = allocate<double>(m_glogs_harmonic_count);
Chris@0 627
Chris@7 628 // Compute int & frac of interpolation positions
Chris@10 629 int aux_index = 0;
Chris@7 630 double aux_pos;
Chris@10 631 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@10 632 for (int j = 1; j <= m_glogs_n[i]; j++) {
Chris@18 633 aux_pos = ((double)j * m_glogs_f0[i]) * ((double)(m_warp_params.nsamps_twarp))/m_warpings.fs_warp;
Chris@10 634 m_glogs_posint[aux_index] = (int)aux_pos;
Chris@7 635 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index];
Chris@7 636 aux_index++;
Chris@7 637 }
Chris@7 638 }
Chris@0 639
Chris@7 640 // Third harmonic attenuation
Chris@7 641 double aux_third_harmonic;
Chris@14 642 m_glogs_third_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14 643 m_glogs_third_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 644 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
Chris@7 645 aux_third_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(3.0);
Chris@10 646 m_glogs_third_harmonic_posint[i] = (int)aux_third_harmonic;
Chris@7 647 m_glogs_third_harmonic_posfrac[i] = aux_third_harmonic - (double)(m_glogs_third_harmonic_posint[i]);
Chris@7 648 }
Chris@14 649 m_glogs_third_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@0 650
Chris@7 651 // Fifth harmonic attenuation
Chris@7 652 double aux_fifth_harmonic;
Chris@14 653 m_glogs_fifth_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14 654 m_glogs_fifth_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 655 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
Chris@7 656 aux_fifth_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(5.0);
Chris@10 657 m_glogs_fifth_harmonic_posint[i] = (int)aux_fifth_harmonic;
Chris@7 658 m_glogs_fifth_harmonic_posfrac[i] = aux_fifth_harmonic - (double)(m_glogs_fifth_harmonic_posint[i]);
Chris@7 659 }
Chris@14 660 m_glogs_fifth_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@0 661
Chris@7 662 // Normalization & attenuation windows
Chris@14 663 m_glogs_f0_preference_weights = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@14 664 m_glogs_median_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@14 665 m_glogs_sigma_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@7 666 double MIDI_value;
Chris@10 667 for (int i = 0; i < m_f0_params.num_octs*m_f0_params.num_f0s_per_oct; i++) {
Chris@22 668 if (m_f0_params.prefer) {
Chris@22 669 MIDI_value = 69.0 + 12.0 * log2(m_glogs_f0[i + m_glogs_init_f0s]/440.0);
Chris@22 670 m_glogs_f0_preference_weights[i] = 1.0/sqrt(2.0*M_PI*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev)*exp(-(MIDI_value-m_f0_params.prefer_mean)*(MIDI_value-m_f0_params.prefer_mean)/(2.0*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev));
Chris@22 671 m_glogs_f0_preference_weights[i] = (0.01 + m_glogs_f0_preference_weights[i]) / (1.01);
Chris@22 672 } else {
Chris@22 673 m_glogs_f0_preference_weights[i] = 1.0;
Chris@22 674 }
Chris@0 675
Chris@7 676 m_glogs_median_correction[i] = m_glogs_params.median_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.median_poly_coefs[1]*(i+1.0) + m_glogs_params.median_poly_coefs[2];
Chris@7 677 m_glogs_sigma_correction[i] = 1.0 / (m_glogs_params.sigma_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.sigma_poly_coefs[1]*(i+1.0) + m_glogs_params.sigma_poly_coefs[2]);
Chris@7 678 }
Chris@0 679 }
Chris@0 680
Chris@0 681 void
Chris@0 682 FChTransformF0gram::design_FChT() {
Chris@0 683
Chris@0 684 /* ============= WARPING DESIGN ============= */
Chris@0 685
Chris@0 686 // sampling frequency after oversampling
Chris@0 687 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs;
Chris@0 688
Chris@0 689 // number of samples of the original signal frame
Chris@0 690 m_warpings.nsamps_torig = 4 * m_warp_params.fact_over_samp * m_warp_params.nsamps_twarp;
Chris@0 691 // equivalent to: m_warpings.nsamps_torig = m_warp_params.fact_over_samp * m_blockSize;
Chris@0 692
Chris@0 693 // time instants of the original signal frame
Chris@14 694 double *t_orig = allocate<double>(m_warpings.nsamps_torig);
Chris@10 695 for (int ind = 0; ind < m_warpings.nsamps_torig; ind++) {
Chris@0 696 t_orig[ind] = ((double)(ind + 1) - (double)m_warpings.nsamps_torig / 2.0) / m_warpings.fs_orig;
Chris@0 697 }
Chris@0 698
Chris@0 699 // linear chirps warping definition as relative frequency deviation
Chris@7 700 //TODO
Chris@14 701 double *freq_relative = allocate<double>(m_warpings.nsamps_torig * m_warp_params.num_warps);
Chris@0 702 define_warps_linear_chirps(freq_relative, t_orig);
Chris@0 703
Chris@0 704 // maximum relative frequency deviation
Chris@0 705 double freq_relative_max = 0;
Chris@14 706 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
Chris@14 707 for (int j = 0; j < m_warp_params.num_warps; j++) {
Chris@14 708 if (freq_relative_max < freq_relative[j * m_warpings.nsamps_torig + i]) {
Chris@0 709 freq_relative_max = freq_relative[j * m_warpings.nsamps_torig + i];
Chris@14 710 }
Chris@14 711 }
Chris@14 712 }
Chris@0 713
Chris@0 714 // sampling frequency of warped signal to be free of aliasing up to fmax
Chris@0 715 m_warpings.fs_warp = 2 * m_fmax * freq_relative_max;
Chris@0 716
Chris@0 717 // time instants of the warped signal frame
Chris@14 718 double *t_warp = allocate<double>(m_warp_params.nsamps_twarp);
Chris@10 719 for (int ind = 0; ind < m_warp_params.nsamps_twarp; ind++) {
Chris@0 720 t_warp[ind] = ((double)((int)(ind + 1)- (int)m_warp_params.nsamps_twarp / 2)) / (double)m_warpings.fs_warp;
Chris@0 721 }
Chris@0 722
Chris@0 723 // design of warpings for efficient interpolation
Chris@0 724 design_warps(freq_relative, t_orig, t_warp);
Chris@0 725
Chris@14 726 deallocate(freq_relative);
Chris@14 727 deallocate(t_orig);
Chris@14 728 deallocate(t_warp);
Chris@14 729
Chris@0 730 /* ============= FFTW PLAN DESIGN ============= */
Chris@7 731 // Initialize 2-d array for warped signals
Chris@14 732 x_warping = allocate<double>(m_warp_params.nsamps_twarp);
Chris@14 733 m_absFanChirpTransform = allocate<double>(m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1));
Chris@14 734 m_auxFanChirpTransform = allocate<double>(2 * (m_warp_params.nsamps_twarp/2 + 1));
Chris@14 735 fft_xwarping = new FFTReal(m_warp_params.nsamps_twarp);
Chris@0 736 }
Chris@0 737
Chris@0 738 void
Chris@0 739 FChTransformF0gram::design_warps(double * freq_relative, double * t_orig, double * t_warp) {
Chris@0 740 /* the warping is done by interpolating the original signal in time instants
Chris@0 741 given by the desired frequency deviation, to do this, the interpolation
Chris@0 742 instants are stored in a structure as an integer index and a fractional value
Chris@0 743 hypothesis: sampling frequency at the central point equals the original
Chris@7 744 */
Chris@0 745
Chris@14 746 m_warpings.pos_int = allocate<int>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
Chris@14 747 m_warpings.pos_frac = allocate<double>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
Chris@0 748
Chris@7 749 // vector of phase values
Chris@14 750 double *phi = allocate<double>(m_warpings.nsamps_torig);
Chris@7 751 double aux;
Chris@0 752
Chris@7 753 // warped positions
Chris@14 754 double *pos1 = allocate<double>(m_warp_params.nsamps_twarp*m_warp_params.num_warps);
Chris@0 755
Chris@10 756 for (int i = 0; i < m_warp_params.num_warps; i++) {
Chris@0 757
Chris@7 758 // integration of relative frequency to obtain phase values
Chris@14 759 Utils::cumtrapz(t_orig, freq_relative + i*(m_warpings.nsamps_torig), m_warpings.nsamps_torig, phi);
Chris@0 760
Chris@7 761 // centering of phase values to force original frequency in the middle
Chris@7 762 aux = phi[m_warpings.nsamps_torig/2];
Chris@10 763 for (int j = 0; j < m_warpings.nsamps_torig; j++) {
Chris@7 764 phi[j] -= aux;
Chris@7 765 } //for
Chris@0 766
Chris@7 767 // interpolation of phase values to obtain warped positions
Chris@14 768 Utils::interp1(phi, t_orig, m_warpings.nsamps_torig, t_warp, pos1 + i*m_warp_params.nsamps_twarp, m_warp_params.nsamps_twarp);
Chris@0 769 }
Chris@0 770
Chris@0 771 // % previous sample index
Chris@0 772 // pos1_int = uint32(floor(pos1))';
Chris@0 773 // % integer corresponding to previous sample index in "c"
Chris@0 774 // warps.pos1_int = (pos1_int - uint32(1));
Chris@0 775 // % fractional value that defines the warped position
Chris@0 776 // warps.pos1_frac = (double(pos1)' - double(pos1_int));
Chris@0 777
Chris@10 778 for (int j = 0; j < m_warp_params.nsamps_twarp*m_warp_params.num_warps; j++) {
Chris@7 779 // previous sample index
Chris@7 780 pos1[j] = pos1[j]*m_warpings.fs_orig + m_warpings.nsamps_torig/2 + 1;
Chris@10 781 m_warpings.pos_int[j] = (int) pos1[j];
Chris@7 782 m_warpings.pos_frac[j] = pos1[j] - (double)(m_warpings.pos_int[j]);
Chris@7 783 } //for
Chris@0 784
Chris@14 785 deallocate(phi);
Chris@14 786 deallocate(pos1);
Chris@0 787 }
Chris@0 788
Chris@0 789 void
Chris@0 790 FChTransformF0gram::define_warps_linear_chirps(double * freq_relative, double * t_orig) {
Chris@0 791 /** define warps as relative frequency deviation from original frequency
Chris@7 792 t_orig : time vector
Chris@7 793 freq_relative : relative frequency deviations
Chris@7 794 */
Chris@0 795 if (m_warp_params.alpha_dist == 0) {
Chris@0 796
Chris@0 797 // linear alpha values spacing
Chris@14 798 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
Chris@0 799 // WARNING m_warp_params.num_warps must be odd
Chris@0 800 m_warpings.chirp_rates[0] = -m_warp_params.alpha_max;
Chris@0 801 double increment = (double) m_warp_params.alpha_max / ((m_warp_params.num_warps - 1) / 2);
Chris@0 802
Chris@10 803 for (int ind = 1; ind < m_warp_params.num_warps; ind++) {
Chris@0 804 m_warpings.chirp_rates[ind] = m_warpings.chirp_rates[ind - 1] + increment;
Chris@0 805 }
Chris@0 806 // force zero value
Chris@0 807 m_warpings.chirp_rates[(int) ((m_warp_params.num_warps - 1) / 2)] = 0;
Chris@0 808
Chris@0 809 } else {
Chris@0 810 // log alpha values spacing
Chris@14 811 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
Chris@0 812
Chris@0 813 // force zero value
Chris@0 814 int middle_point = (int) ((m_warp_params.num_warps - 1) / 2);
Chris@0 815 m_warpings.chirp_rates[middle_point] = 0;
Chris@0 816
Chris@0 817 double logMax = log10(m_warp_params.alpha_max + 1);
Chris@0 818 double increment = logMax / ((m_warp_params.num_warps - 1) / 2.0f);
Chris@0 819 double exponent = 0;
Chris@0 820
Chris@0 821 // fill positive values
Chris@0 822 int ind_log = middle_point;
Chris@10 823 for (int ind = 0; ind < (m_warp_params.num_warps + 1) / 2; ind++) {
Chris@0 824 m_warpings.chirp_rates[ind_log] = pow(10, exponent) - 1;
Chris@0 825 exponent += increment;
Chris@0 826 ind_log++;
Chris@0 827 }
Chris@0 828 // fill negative values
Chris@10 829 for (int ind = 0; ind < (m_warp_params.num_warps - 1) / 2; ind++) {
Chris@0 830 m_warpings.chirp_rates[ind] = -m_warpings.chirp_rates[m_warp_params.num_warps - 1 - ind];
Chris@0 831 }
Chris@0 832 }
Chris@0 833
Chris@0 834 // compute relative frequency deviation
Chris@14 835 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
Chris@14 836 for (int j = 0; j < m_warp_params.num_warps; j++) {
Chris@0 837 freq_relative[j * m_warpings.nsamps_torig + i] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
Chris@14 838 }
Chris@14 839 }
Chris@0 840 }
Chris@0 841
Chris@0 842 void
Chris@14 843 FChTransformF0gram::design_LPF()
Chris@14 844 {
Chris@14 845 double *lp_LPFWindow_aux = allocate<double>(m_blockSize/2+1);
Chris@14 846 mp_LPFWindow = allocate<double>(m_blockSize/2+1);
Chris@0 847
Chris@10 848 int i_max = (int) ((2.0*m_fmax/m_fs) * ( (double)m_blockSize / 2.0 + 1.0 ));
Chris@10 849 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@0 850 if (i >= i_max) {
Chris@0 851 lp_LPFWindow_aux[i] = 0.0;
Chris@0 852 } else {
Chris@0 853 lp_LPFWindow_aux[i] = 1.0;
Chris@0 854 }
Chris@0 855 }
Chris@14 856
Chris@14 857 LPF_time = allocate_and_zero<double>(m_warpings.nsamps_torig);
Chris@14 858 LPF_frequency = allocate_and_zero<double>(2 * (m_warpings.nsamps_torig/2 + 1));
Chris@14 859
Chris@14 860 fft_forward_LPF = new FFTReal(m_blockSize);
Chris@14 861 fft_inverse_LPF = new FFTReal(m_warpings.nsamps_torig);
Chris@0 862
Chris@10 863 int winWidth = 11;
Chris@14 864 double *lp_hanningWindow = allocate<double>(winWidth);
Chris@0 865 double accum=0;
Chris@10 866 for (int i = 0; i < winWidth; i++) {
Chris@0 867 lp_hanningWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)winWidth+1.0)));
Chris@0 868 accum+=lp_hanningWindow[i];
Chris@0 869
Chris@0 870 }
Chris@10 871 for (int i = 0; i < winWidth; i++) { //window normalization
Chris@0 872 lp_hanningWindow[i]=lp_hanningWindow[i]/accum;
Chris@0 873 }
Chris@10 874 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@0 875 //if (((i-(winWidth-1)/2)<0)||(i+(winWidth-1))/2>m_blockSize/2-1) {//consideramos winWidth impar, si la ventana sale del arreglo se rellena con el valor origianl
Chris@7 876 if ( (i > (i_max + (winWidth-1)/2)) || (i <= (i_max - (winWidth-1)/2)) ) {
Chris@0 877 mp_LPFWindow[i]=lp_LPFWindow_aux[i];
Chris@0 878 } else {
Chris@0 879 accum=0;
Chris@10 880 for (int j = -((winWidth-1)/2); j <= (winWidth-1)/2; j++) {
Chris@0 881 accum+=lp_LPFWindow_aux[i-j]*lp_hanningWindow[j+(winWidth-1)/2];
Chris@7 882 }
Chris@0 883 mp_LPFWindow[i]=accum;
Chris@0 884 }
Chris@0 885 }
Chris@0 886
Chris@14 887 deallocate(lp_LPFWindow_aux);
Chris@14 888 deallocate(lp_hanningWindow);
Chris@0 889 }
Chris@0 890
Chris@14 891 void FChTransformF0gram::apply_LPF()
Chris@14 892 {
Chris@14 893 fft_forward_LPF->forward(LPF_time, LPF_frequency);
Chris@14 894
Chris@10 895 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@16 896 LPF_frequency[i*2] *= mp_LPFWindow[i];
Chris@16 897 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i];
Chris@0 898 }
Chris@14 899
Chris@14 900 fft_inverse_LPF->inverse(LPF_frequency, LPF_time);
Chris@20 901
Chris@7 902 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame.
Chris@7 903 // nota: ademÔs de aplicar el LPF, esta función resamplea la señal original.
Chris@0 904 }
Chris@0 905
Chris@14 906 void FChTransformF0gram::clean_LPF()
Chris@14 907 {
Chris@14 908 delete fft_forward_LPF;
Chris@14 909 delete fft_inverse_LPF;
Chris@14 910 deallocate(LPF_time);
Chris@14 911 deallocate(LPF_frequency);
Chris@14 912 deallocate(mp_LPFWindow);
Chris@0 913 }
Chris@0 914
Chris@14 915 void FChTransformF0gram::reset()
Chris@14 916 {
Chris@0 917 }
Chris@0 918
Chris@0 919 FChTransformF0gram::FeatureSet
Chris@5 920 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) {
Chris@0 921
Chris@20 922 if (!m_initialised) return FeatureSet();
Chris@20 923
Chris@7 924 /* PSEUDOCƓDIGO:
Chris@7 925 - Aplicar FFT al frame entero.
Chris@7 926 - Filtro pasabajos en frecuencia.
Chris@7 927 - FFT inversa al frame entero.
Chris@7 928 -----------------------------------------------------------------------------
Chris@7 929 - Para cada warp: *Si es un espectrograma direccional (un solo warp
Chris@7 930 => no es para cada warp sino para el elegido)
Chris@7 931 - Hacer la interpolación con interp1q.
Chris@7 932 - Aplicar la FFT al frame warpeado.
Chris@7 933 - (Opcional) GLogS.
Chris@7 934 - ...
Chris@7 935 */
Chris@0 936
Chris@0 937 //---------------------------------------------------------------------------
Chris@7 938 FeatureSet fs;
Chris@0 939
Chris@7 940 #ifdef DEBUG
Chris@16 941 fprintf(stderr, "\n ----- DEBUG INFORMATION ----- \n");
Chris@16 942 fprintf(stderr, " m_fs = %f Hz.\n",m_fs);
Chris@16 943 fprintf(stderr, " fs_orig = %f Hz.\n",m_warpings.fs_orig);
Chris@16 944 fprintf(stderr, " fs_warp = %f Hz.\n",m_warpings.fs_warp);
Chris@16 945 fprintf(stderr, " m_nfft = %d.\n",m_nfft);
Chris@16 946 fprintf(stderr, " m_blockSize = %d.\n",m_blockSize);
Chris@16 947 fprintf(stderr, " m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig);
Chris@16 948 fprintf(stderr, " m_warp_params.num_warps = %d.\n",m_warp_params.num_warps);
Chris@16 949 fprintf(stderr, " m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count);
Chris@7 950 #endif
Chris@0 951
Chris@20 952 for (int i = 0; i < m_blockSize - m_stepSize; ++i) {
Chris@20 953 m_inputBuffer[i] = m_inputBuffer[i + m_stepSize];
Chris@0 954 }
Chris@20 955 for (int i = 0; i < m_blockSize/2; ++i) {
Chris@20 956 m_inputBuffer[m_blockSize/2 + i] = inputBuffers[0][i];
Chris@20 957 }
Chris@20 958 for (int i = 0; i < m_blockSize; ++i) {
Chris@20 959 LPF_time[i] = m_inputBuffer[i] * m_timeWindow[i];
Chris@20 960 }
Chris@20 961 for (int i = 0; i < m_blockSize; ++i) {
Chris@20 962 LPF_time[m_blockSize + i] = 0.0;
Chris@20 963 }
Chris@20 964
Chris@7 965 apply_LPF();
Chris@7 966 // SeƱal filtrada queda en LPF_time
Chris@0 967
Chris@7 968 Feature feature;
Chris@0 969 feature.hasTimestamp = false;
Chris@0 970
Chris@15 971 if (m_processingMode == ModeRoughSpectrogram) {
Chris@15 972 feature.values = vector<float>(m_warp_params.nsamps_twarp/2+1, 0.f);
Chris@15 973 }
Chris@15 974
Chris@0 975 // ----------------------------------------------------------------------------------------------
Chris@0 976 // Hanning window & FFT for all warp directions
Chris@0 977
Chris@7 978 double max_glogs = -DBL_MAX;
Chris@10 979 int ind_max_glogs = 0;
Chris@0 980
Chris@10 981 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
Chris@16 982
Chris@7 983 // Interpolate
Chris@14 984 Utils::interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp);
Chris@0 985
Chris@7 986 // Apply window
Chris@10 987 for (int i = 0; i < m_warp_params.nsamps_twarp; i++) {
Chris@7 988 x_warping[i] *= mp_HanningWindow[i];
Chris@7 989 }
Chris@0 990
Chris@7 991 // Transform
Chris@14 992 fft_xwarping->forward(x_warping, m_auxFanChirpTransform);
Chris@0 993
Chris@15 994 if (m_processingMode == ModeRoughSpectrogram) {
Chris@15 995 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
Chris@15 996 double abs = sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]);
Chris@15 997 if (abs > feature.values[i]) {
Chris@15 998 feature.values[i] = abs;
Chris@15 999 }
Chris@15 1000 }
Chris@15 1001 continue;
Chris@15 1002 }
Chris@15 1003
Chris@7 1004 // Copy result
Chris@7 1005 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1);
Chris@10 1006 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
Chris@14 1007 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]));
Chris@7 1008 }
Chris@0 1009
Chris@0 1010 // -----------------------------------------------------------------------------------------
Chris@0 1011 // GLogS
Chris@14 1012 Utils::interp1q(aux_abs_fcht, m_glogs_posint, m_glogs_posfrac, m_glogs_interp, m_glogs_harmonic_count);
Chris@10 1013 int glogs_ind = 0;
Chris@10 1014 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@7 1015 double glogs_accum = 0;
Chris@10 1016 for (int j = 1; j <= m_glogs_n[i]; j++) {
Chris@7 1017 glogs_accum += m_glogs_interp[glogs_ind++];
Chris@7 1018 }
Chris@7 1019 m_glogs[i + i_warp*m_glogs_num_f0s] = glogs_accum/(double)m_glogs_n[i];
Chris@7 1020 }
Chris@0 1021
Chris@0 1022 // Sub/super harmonic correction
Chris@14 1023 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_third_harmonic_posint, m_glogs_third_harmonic_posfrac, m_glogs_third_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14 1024 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_fifth_harmonic_posint, m_glogs_fifth_harmonic_posfrac, m_glogs_fifth_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 1025 for (int i = m_glogs_num_f0s-1; i >= m_glogs_init_f0s; i--) {
Chris@7 1026 m_glogs[i + i_warp*m_glogs_num_f0s] -= MAX(MAX(m_glogs[i-m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s],m_glogs_third_harmonic[i-m_glogs_init_f0s]),m_glogs_fifth_harmonic[i-m_glogs_init_f0s]);
Chris@7 1027 }
Chris@10 1028 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
Chris@7 1029 m_glogs[i + i_warp*m_glogs_num_f0s] -= 0.3*m_glogs[i+m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s];
Chris@7 1030 // Median, sigma $ weights correction
Chris@7 1031 m_glogs[i + i_warp*m_glogs_num_f0s] = (m_glogs[i + i_warp*m_glogs_num_f0s]-m_glogs_median_correction[i-m_glogs_init_f0s])*m_glogs_sigma_correction[i-m_glogs_init_f0s]*m_glogs_f0_preference_weights[i-m_glogs_init_f0s];
Chris@7 1032 }
Chris@0 1033
Chris@7 1034 // Look for maximum value to determine best direction
Chris@10 1035 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
Chris@7 1036 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
Chris@7 1037 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
Chris@7 1038 ind_max_glogs = i_warp;
Chris@7 1039 }
Chris@7 1040 }
Chris@7 1041 }
Chris@0 1042
Chris@15 1043 if (m_processingMode == ModeRoughSpectrogram) {
Chris@15 1044
Chris@15 1045 // already accumulated our return values in feature
Chris@19 1046 fs[0].push_back(feature);
Chris@15 1047
Chris@15 1048 } else if (m_processingMode == ModeSpectrogram) {
Chris@15 1049
Chris@15 1050 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) {
Chris@15 1051 feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0);
Chris@15 1052 }
Chris@19 1053 fs[0].push_back(feature);
Chris@15 1054
Chris@15 1055 } else { // f0gram
Chris@15 1056
Chris@19 1057 int bestIndex = -1;
Chris@19 1058
Chris@15 1059 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) {
Chris@19 1060 double value = 0.0;
Chris@15 1061 switch (m_f0gram_mode) {
Chris@15 1062 case AllBinsOfBestDirection:
Chris@19 1063 value = m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s];
Chris@15 1064 break;
Chris@15 1065 case BestBinOfAllDirections:
Chris@15 1066 max_glogs = -DBL_MAX;
Chris@15 1067 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
Chris@15 1068 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
Chris@15 1069 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
Chris@15 1070 ind_max_glogs = i_warp;
Chris@15 1071 }
Chris@7 1072 }
Chris@19 1073 value = max_glogs;
Chris@15 1074 break;
Chris@7 1075 }
Chris@19 1076 if (bestIndex < 0 || float(value) > feature.values[bestIndex]) {
Chris@19 1077 bestIndex = int(feature.values.size());
Chris@19 1078 }
Chris@19 1079 feature.values.push_back(float(value));
Chris@19 1080 }
Chris@19 1081
Chris@19 1082 fs[0].push_back(feature);
Chris@19 1083
Chris@19 1084 if (bestIndex >= 0) {
Chris@19 1085
Chris@19 1086 double bestValue = feature.values[bestIndex];
Chris@19 1087 set<double> ordered(feature.values.begin(), feature.values.end());
Chris@19 1088 vector<double> flattened(ordered.begin(), ordered.end());
Chris@19 1089 double median = flattened[flattened.size()/2];
Chris@19 1090 if (bestValue > median * 8.0) {
Chris@19 1091 Feature pfeature;
Chris@19 1092 pfeature.hasTimestamp = false;
Chris@19 1093 pfeature.values.push_back(m_f0s[bestIndex]);
Chris@19 1094 fs[1].push_back(pfeature);
Chris@19 1095 }
Chris@7 1096 }
Chris@7 1097 }
Chris@0 1098
Chris@7 1099 return fs;
Chris@0 1100 }
Chris@0 1101
Chris@0 1102 FChTransformF0gram::FeatureSet
Chris@0 1103 FChTransformF0gram::getRemainingFeatures() {
Chris@0 1104 return FeatureSet();
Chris@0 1105 }
Chris@0 1106
Chris@0 1107 void
Chris@0 1108 FChTransformF0gram::design_time_window() {
Chris@0 1109
Chris@20 1110 int transitionWidth = (int)m_blockSize/128 + 128;
Chris@14 1111 m_timeWindow = allocate<double>(m_blockSize);
Chris@14 1112 double *lp_transitionWindow = allocate<double>(transitionWidth);
Chris@0 1113
Chris@10 1114 for (int i = 0; i < m_blockSize; i++) {
Chris@7 1115 m_timeWindow[i] = 1.0;
Chris@7 1116 }
Chris@0 1117
Chris@10 1118 for (int i = 0; i < transitionWidth; i++) {
Chris@0 1119 lp_transitionWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)transitionWidth+1.0)));
Chris@0 1120 }
Chris@0 1121
Chris@10 1122 for (int i = 0; i < transitionWidth/2; i++) {
Chris@7 1123 m_timeWindow[i] = lp_transitionWindow[i];
Chris@7 1124 m_timeWindow[m_blockSize-1-i] = lp_transitionWindow[transitionWidth-1-i];
Chris@7 1125 }
Chris@0 1126
Chris@7 1127 #ifdef DEBUG
Chris@7 1128 for (int i = 0; i < m_blockSize; i++) {
Chris@7 1129 if ((i<transitionWidth)) {
Chris@16 1130 fprintf(stderr, " m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]);
Chris@7 1131 }
Chris@7 1132 }
Chris@7 1133 #endif
Chris@0 1134
Chris@14 1135 deallocate(lp_transitionWindow);
Chris@0 1136 }
Chris@0 1137