annotate FChTransformF0gram.cpp @ 33:b21704074c9c spect tip

Ensure default parameter values match the actual internal defaults
author Chris Cannam
date Fri, 07 Feb 2020 11:49:39 +0000
parents 60e2ef9fc5fc
children
rev   line source
Chris@0 1 /*
Chris@0 2 copyright (C) 2011 I. Irigaray, M. Rocamora
Chris@0 3
Chris@0 4 This program is free software: you can redistribute it and/or modify
Chris@0 5 it under the terms of the GNU General Public License as published by
Chris@0 6 the Free Software Foundation, either version 3 of the License, or
Chris@0 7 (at your option) any later version.
Chris@0 8
Chris@0 9 This program is distributed in the hope that it will be useful,
Chris@0 10 but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@0 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@0 12 GNU General Public License for more details.
Chris@0 13
Chris@0 14 You should have received a copy of the GNU General Public License
Chris@0 15 along with this program. If not, see <http://www.gnu.org/licenses/>.
Chris@7 16 */
Chris@0 17
Chris@0 18 #include "FChTransformF0gram.h"
Chris@0 19 #include "FChTransformUtils.h"
Chris@0 20 #include <math.h>
Chris@0 21 #include <float.h>
Chris@14 22
Chris@19 23 #include <set>
Chris@19 24
Chris@14 25 #include "bqvec/Allocators.h"
Chris@14 26
Chris@14 27 using namespace breakfastquay;
Chris@14 28
Chris@27 29 //#define DEBUG
Chris@7 30
Chris@0 31 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
Chris@0 32
Chris@15 33 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode,
Chris@15 34 float inputSampleRate) :
Chris@7 35 Plugin(inputSampleRate),
Chris@15 36 m_processingMode(mode),
Chris@20 37 m_initialised(false),
Chris@20 38 m_stepSize(256),
Chris@20 39 m_blockSize(8192) {
Chris@0 40
Chris@24 41 nsamp_options.push_back(256);
Chris@24 42 nsamp_options.push_back(512);
Chris@24 43 nsamp_options.push_back(1024);
Chris@24 44 nsamp_options.push_back(2048);
Chris@24 45 nsamp_options.push_back(4096);
Chris@24 46 nsamp_options.push_back(8192);
Chris@24 47
Chris@0 48 m_fs = inputSampleRate;
Chris@0 49 // max frequency of interest (Hz)
Chris@0 50 m_fmax = 10000.f;
Chris@0 51 // warping parameters
Chris@12 52 m_warp_params.nsamps_twarp = 2048;
Chris@0 53 m_warp_params.alpha_max = 4;
Chris@0 54 m_warp_params.num_warps = 21;
Chris@0 55 m_warp_params.fact_over_samp = 2;
Chris@0 56 m_warp_params.alpha_dist = 0;
Chris@0 57 // f0 parameters
Chris@0 58 m_f0_params.f0min = 80.0;
Chris@0 59 m_f0_params.num_octs = 4;
Chris@0 60 m_f0_params.num_f0s_per_oct = 192;
Chris@0 61 m_f0_params.prefer = true;
Chris@0 62 m_f0_params.prefer_mean = 60;
Chris@0 63 m_f0_params.prefer_stdev = 18;
Chris@0 64 // glogs parameters
Chris@0 65 m_glogs_params.HP_logS = true;
Chris@0 66 m_glogs_params.att_subharms = 1;
Chris@7 67 // display parameters
Chris@15 68 m_f0gram_mode = BestBinOfAllDirections;
Chris@0 69
Chris@0 70 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
Chris@0 71 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
Chris@0 72 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
Chris@0 73
Chris@0 74 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
Chris@0 75 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
Chris@0 76 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
Chris@0 77
Chris@0 78 m_num_f0s = 0;
Chris@16 79 m_f0s = 0;
Chris@0 80 }
Chris@0 81
Chris@14 82 FChTransformF0gram::~FChTransformF0gram()
Chris@14 83 {
Chris@20 84 if (!m_initialised) {
Chris@14 85 return; // nothing was allocated
Chris@14 86 }
Chris@14 87
Chris@20 88 deallocate(m_inputBuffer);
Chris@20 89
Chris@14 90 deallocate(m_warpings.pos_int);
Chris@14 91 deallocate(m_warpings.pos_frac);
Chris@14 92 deallocate(m_warpings.chirp_rates);
Chris@14 93
Chris@14 94 clean_LPF();
Chris@14 95
Chris@14 96 deallocate(m_timeWindow);
Chris@14 97
Chris@14 98 deallocate(mp_HanningWindow);
Chris@14 99
Chris@14 100 // Warping
Chris@14 101 deallocate(x_warping);
Chris@14 102 delete fft_xwarping;
Chris@14 103 deallocate(m_absFanChirpTransform);
Chris@14 104 deallocate(m_auxFanChirpTransform);
Chris@14 105
Chris@14 106 // design_GLogS
Chris@14 107 deallocate(m_glogs_f0);
Chris@14 108 deallocate(m_glogs);
Chris@14 109 deallocate(m_glogs_n);
Chris@14 110 deallocate(m_glogs_index);
Chris@14 111 deallocate(m_glogs_posint);
Chris@14 112 deallocate(m_glogs_posfrac);
Chris@14 113 deallocate(m_glogs_interp);
Chris@14 114 deallocate(m_glogs_third_harmonic_posint);
Chris@14 115 deallocate(m_glogs_third_harmonic_posfrac);
Chris@14 116 deallocate(m_glogs_third_harmonic);
Chris@14 117 deallocate(m_glogs_fifth_harmonic_posint);
Chris@14 118 deallocate(m_glogs_fifth_harmonic_posfrac);
Chris@14 119 deallocate(m_glogs_fifth_harmonic);
Chris@14 120 deallocate(m_glogs_f0_preference_weights);
Chris@14 121 deallocate(m_glogs_median_correction);
Chris@14 122 deallocate(m_glogs_sigma_correction);
Chris@16 123
Chris@16 124 deallocate(m_f0s);
Chris@0 125 }
Chris@0 126
Chris@0 127 string
Chris@0 128 FChTransformF0gram::getIdentifier() const {
Chris@15 129 switch (m_processingMode) {
Chris@15 130 case ModeF0Gram: return "fchtransformf0gram";
Chris@15 131 case ModeSpectrogram: return "fchtransformspectrogram";
Chris@15 132 case ModeRoughSpectrogram: return "fchtransformrough";
Chris@15 133 }
Chris@17 134 throw std::logic_error("unknown mode");
Chris@0 135 }
Chris@0 136
Chris@0 137 string
Chris@0 138 FChTransformF0gram::getName() const {
Chris@15 139 switch (m_processingMode) {
Chris@15 140 case ModeF0Gram: return "Fan Chirp Transform F0gram";
Chris@15 141 case ModeSpectrogram: return "Fan Chirp Transform Spectrogram";
Chris@15 142 case ModeRoughSpectrogram: return "Fan Chirp Transform Rough Spectrogram";
Chris@15 143 }
Chris@17 144 throw std::logic_error("unknown mode");
Chris@0 145 }
Chris@0 146
Chris@0 147 string
Chris@0 148 FChTransformF0gram::getDescription() const {
Chris@15 149 switch (m_processingMode) {
Chris@15 150 case ModeF0Gram:
Chris@15 151 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010.";
Chris@15 152 case ModeSpectrogram:
Chris@15 153 return "This plug-in produces a spectral representation of the audio using Fan Chirp Transform analysis.";
Chris@15 154 case ModeRoughSpectrogram:
Chris@15 155 return "This plug-in produces a more approximate spectral representation of the audio using Fan Chirp Transform analysis.";
Chris@15 156 }
Chris@17 157 throw std::logic_error("unknown mode");
Chris@0 158 }
Chris@0 159
Chris@0 160 string
Chris@0 161 FChTransformF0gram::getMaker() const {
Chris@0 162 // Your name here
Chris@0 163 return "Audio Processing Group \n Universidad de la Republica";
Chris@0 164 }
Chris@0 165
Chris@0 166 int
Chris@0 167 FChTransformF0gram::getPluginVersion() const {
Chris@0 168 // Increment this each time you release a version that behaves
Chris@0 169 // differently from the previous one
Chris@0 170 //
Chris@0 171 // 0 - initial version from scratch
Chris@15 172 return 1;
Chris@0 173 }
Chris@0 174
Chris@0 175 string
Chris@0 176 FChTransformF0gram::getCopyright() const {
Chris@0 177 // This function is not ideally named. It does not necessarily
Chris@0 178 // need to say who made the plugin -- getMaker does that -- but it
Chris@0 179 // should indicate the terms under which it is distributed. For
Chris@0 180 // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@0 181 return "copyright (C) 2011 GPL - Audio Processing Group, UdelaR";
Chris@0 182 }
Chris@0 183
Chris@0 184 FChTransformF0gram::InputDomain
Chris@0 185 FChTransformF0gram::getInputDomain() const {
Chris@0 186 return TimeDomain;
Chris@0 187 }
Chris@0 188
Chris@0 189 size_t FChTransformF0gram::getPreferredBlockSize() const {
Chris@20 190 // We do our own accumulating into blocks within process()
Chris@20 191 return m_blockSize/2;
Chris@0 192 }
Chris@0 193
Chris@0 194 size_t
Chris@0 195 FChTransformF0gram::getPreferredStepSize() const {
Chris@20 196 return m_stepSize;
Chris@0 197 }
Chris@0 198
Chris@0 199 size_t
Chris@0 200 FChTransformF0gram::getMinChannelCount() const {
Chris@0 201 return 1;
Chris@0 202 }
Chris@0 203
Chris@0 204 size_t
Chris@0 205 FChTransformF0gram::getMaxChannelCount() const {
Chris@0 206 return 1;
Chris@0 207 }
Chris@0 208
Chris@0 209 FChTransformF0gram::ParameterList
Chris@0 210 FChTransformF0gram::getParameterDescriptors() const {
Chris@0 211 ParameterList list;
Chris@0 212
Chris@0 213 // If the plugin has no adjustable parameters, return an empty
Chris@0 214 // list here (and there's no need to provide implementations of
Chris@0 215 // getParameter and setParameter in that case either).
Chris@0 216
Chris@0 217 // Note that it is your responsibility to make sure the parameters
Chris@0 218 // start off having their default values (e.g. in the constructor
Chris@0 219 // above). The host needs to know the default value so it can do
Chris@0 220 // things like provide a "reset to default" function, but it will
Chris@0 221 // not explicitly set your parameters to their defaults for you if
Chris@0 222 // they have not changed in the mean time.
Chris@0 223
Chris@0 224 // ============= WARPING PARAMETERS =============
Chris@0 225
Chris@0 226 ParameterDescriptor fmax;
Chris@0 227 fmax.identifier = "fmax";
Chris@0 228 fmax.name = "Maximum frequency";
Chris@0 229 fmax.description = "Maximum frequency of interest for the analysis.";
Chris@0 230 fmax.unit = "Hz";
Chris@0 231 fmax.minValue = 2000;
Chris@0 232 fmax.maxValue = 22050;
Chris@0 233 fmax.defaultValue = 10000;
Chris@0 234 fmax.isQuantized = true;
Chris@0 235 fmax.quantizeStep = 1.0;
Chris@0 236 list.push_back(fmax);
Chris@0 237
Chris@0 238 ParameterDescriptor nsamp;
Chris@24 239 nsamp.identifier = "nsamp_ix";
Chris@0 240 nsamp.name = "Number of samples";
Chris@0 241 nsamp.description = "Number of samples of the time warped frame";
Chris@24 242 nsamp.minValue = 0;
Chris@24 243 nsamp.maxValue = nsamp_options.size()-1;
Chris@24 244 nsamp.defaultValue = 3;
Chris@24 245 nsamp.isQuantized = true;
Chris@24 246 nsamp.quantizeStep = 1.0;
Chris@24 247 char label[100];
Chris@24 248 for (int i = 0; i < int(nsamp_options.size()); ++i) {
Chris@24 249 sprintf(label, "%d", nsamp_options[i]);
Chris@24 250 nsamp.valueNames.push_back(label);
Chris@24 251 }
Chris@0 252 nsamp.isQuantized = true;
Chris@0 253 nsamp.quantizeStep = 1.0;
Chris@0 254 list.push_back(nsamp);
Chris@0 255
Chris@0 256 ParameterDescriptor alpha_max;
Chris@0 257 alpha_max.identifier = "alpha_max";
Chris@0 258 alpha_max.name = "Maximum alpha value";
Chris@0 259 alpha_max.description = "Maximum value for the alpha parameter of the transform.";
Chris@0 260 alpha_max.unit = "Hz/s";
Chris@0 261 alpha_max.minValue = -10;
Chris@0 262 alpha_max.maxValue = 10;
Chris@33 263 alpha_max.defaultValue = 4;
Chris@0 264 alpha_max.isQuantized = true;
Chris@0 265 alpha_max.quantizeStep = 1.0;
Chris@0 266 list.push_back(alpha_max);
Chris@0 267
Chris@0 268 ParameterDescriptor num_warps;
Chris@0 269 num_warps.identifier = "num_warps";
Chris@0 270 num_warps.name = "Number of warpings";
Chris@0 271 num_warps.description = "Number of different warpings in the specified range (must be odd).";
Chris@0 272 num_warps.unit = "";
Chris@0 273 num_warps.minValue = 1;
Chris@0 274 num_warps.maxValue = 101;
Chris@0 275 num_warps.defaultValue = 21;
Chris@0 276 num_warps.isQuantized = true;
Chris@0 277 num_warps.quantizeStep = 2.0;
Chris@0 278 list.push_back(num_warps);
Chris@0 279
Chris@0 280 ParameterDescriptor alpha_dist;
Chris@0 281 alpha_dist.identifier = "alpha_dist";
Chris@0 282 alpha_dist.name = "alpha distribution";
Chris@0 283 alpha_dist.description = "Type of distribution of alpha values (linear or log).";
Chris@0 284 alpha_dist.unit = "";
Chris@0 285 alpha_dist.minValue = 0;
Chris@0 286 alpha_dist.maxValue = 1;
Chris@33 287 alpha_dist.defaultValue = 0;
Chris@0 288 alpha_dist.isQuantized = true;
Chris@0 289 alpha_dist.quantizeStep = 1.0;
Chris@0 290 // lin (0), log (1)
Chris@0 291 alpha_dist.valueNames.push_back("lin");
Chris@0 292 alpha_dist.valueNames.push_back("log");
Chris@0 293 list.push_back(alpha_dist);
Chris@0 294
Chris@0 295 // ============= F0-GRAM PARAMETERS =============
Chris@0 296
Chris@0 297 ParameterDescriptor f0min;
Chris@0 298 f0min.identifier = "f0min";
Chris@0 299 f0min.name = "min f0";
Chris@0 300 f0min.description = "Minimum fundamental frequency (f0) value.";
Chris@0 301 f0min.unit = "Hz";
Chris@0 302 f0min.minValue = 1;
Chris@0 303 f0min.maxValue = 500;
Chris@0 304 f0min.defaultValue = 80;
Chris@0 305 f0min.isQuantized = true;
Chris@0 306 f0min.quantizeStep = 1.0;
Chris@0 307 list.push_back(f0min);
Chris@0 308
Chris@0 309 ParameterDescriptor num_octs;
Chris@0 310 num_octs.identifier = "num_octs";
Chris@0 311 num_octs.name = "number of octaves";
Chris@0 312 num_octs.description = "Number of octaves for F0gram computation.";
Chris@0 313 num_octs.unit = "";
Chris@0 314 num_octs.minValue = 1;
Chris@0 315 num_octs.maxValue = 10;
Chris@0 316 num_octs.defaultValue = 4;
Chris@0 317 num_octs.isQuantized = true;
Chris@0 318 num_octs.quantizeStep = 1.0;
Chris@0 319 list.push_back(num_octs);
Chris@0 320
Chris@0 321 ParameterDescriptor f0s_per_oct;
Chris@0 322 f0s_per_oct.identifier = "f0s_per_oct";
Chris@0 323 f0s_per_oct.name = "f0 values per octave";
Chris@0 324 f0s_per_oct.description = "Number of f0 values per octave.";
Chris@0 325 f0s_per_oct.unit = "";
Chris@0 326 f0s_per_oct.minValue = 12;
Chris@0 327 f0s_per_oct.maxValue = 768;
Chris@0 328 f0s_per_oct.defaultValue = 192;
Chris@0 329 f0s_per_oct.isQuantized = true;
Chris@0 330 f0s_per_oct.quantizeStep = 1.0;
Chris@0 331 list.push_back(f0s_per_oct);
Chris@0 332
Chris@0 333 ParameterDescriptor f0_prefer_fun;
Chris@0 334 f0_prefer_fun.identifier = "f0_prefer_fun";
Chris@22 335 f0_prefer_fun.name = "Use f0 weighting";
Chris@22 336 f0_prefer_fun.description = "Whether to use a f0 weighting function to prefer frequencies nearer a mean value.";
Chris@0 337 f0_prefer_fun.unit = "";
Chris@0 338 f0_prefer_fun.minValue = 0;
Chris@0 339 f0_prefer_fun.maxValue = 1;
Chris@0 340 f0_prefer_fun.defaultValue = 1;
Chris@0 341 f0_prefer_fun.isQuantized = true;
Chris@0 342 f0_prefer_fun.quantizeStep = 1.0;
Chris@0 343 list.push_back(f0_prefer_fun);
Chris@0 344
Chris@0 345 ParameterDescriptor f0_prefer_mean;
Chris@0 346 f0_prefer_mean.identifier = "f0_prefer_mean";
Chris@22 347 f0_prefer_mean.name = "Mean pitch for f0 weighting";
Chris@0 348 f0_prefer_mean.description = "Mean value for f0 weighting function (MIDI number).";
Chris@0 349 f0_prefer_mean.unit = "";
Chris@0 350 f0_prefer_mean.minValue = 1;
Chris@0 351 f0_prefer_mean.maxValue = 127;
Chris@0 352 f0_prefer_mean.defaultValue = 60;
Chris@0 353 f0_prefer_mean.isQuantized = true;
Chris@0 354 f0_prefer_mean.quantizeStep = 1.0;
Chris@0 355 list.push_back(f0_prefer_mean);
Chris@0 356
Chris@0 357 ParameterDescriptor f0_prefer_stdev;
Chris@0 358 f0_prefer_stdev.identifier = "f0_prefer_stdev";
Chris@22 359 f0_prefer_stdev.name = "Stdev for f0 weighting";
Chris@22 360 f0_prefer_stdev.description = "Standard deviation for f0 weighting function (MIDI number).";
Chris@0 361 f0_prefer_stdev.unit = "";
Chris@0 362 f0_prefer_stdev.minValue = 1;
Chris@0 363 f0_prefer_stdev.maxValue = 127;
Chris@0 364 f0_prefer_stdev.defaultValue = 18;
Chris@0 365 f0_prefer_stdev.isQuantized = true;
Chris@0 366 f0_prefer_stdev.quantizeStep = 1.0;
Chris@0 367 list.push_back(f0_prefer_stdev);
Chris@0 368
Chris@0 369 ParameterDescriptor f0gram_mode;
Chris@0 370 f0gram_mode.identifier = "f0gram_mode";
Chris@0 371 f0gram_mode.name = "display mode of f0gram";
Chris@0 372 f0gram_mode.description = "Display all bins of the best direction, or the best bin for each direction.";
Chris@0 373 f0gram_mode.unit = "";
Chris@0 374 f0gram_mode.minValue = 0;
Chris@0 375 f0gram_mode.maxValue = 1;
Chris@0 376 f0gram_mode.defaultValue = 1;
Chris@0 377 f0gram_mode.isQuantized = true;
Chris@0 378 f0gram_mode.quantizeStep = 1.0;
Chris@0 379 list.push_back(f0gram_mode);
Chris@0 380
Chris@0 381 return list;
Chris@0 382 }
Chris@0 383
Chris@0 384 float
Chris@0 385 FChTransformF0gram::getParameter(string identifier) const {
Chris@0 386
Chris@0 387 if (identifier == "fmax") {
Chris@0 388 return m_fmax;
Chris@24 389 } else if (identifier == "nsamp_ix") {
Chris@24 390 for (int i = 0; i < int(nsamp_options.size()); ++i) {
Chris@24 391 if (m_warp_params.nsamps_twarp == nsamp_options[i]) {
Chris@24 392 return i;
Chris@24 393 }
Chris@24 394 }
Chris@24 395 throw std::logic_error("internal error: nsamps_twarp not in nsamp_options");
Chris@0 396 } else if (identifier == "alpha_max") {
Chris@0 397 return m_warp_params.alpha_max;
Chris@0 398 } else if (identifier == "num_warps") {
Chris@0 399 return m_warp_params.num_warps;
Chris@0 400 } else if (identifier == "alpha_dist") {
Chris@0 401 return m_warp_params.alpha_dist;
Chris@0 402 } else if (identifier == "f0min") {
Chris@0 403 return m_f0_params.f0min;
Chris@0 404 } else if (identifier == "num_octs") {
Chris@0 405 return m_f0_params.num_octs;
Chris@0 406 } else if (identifier == "f0s_per_oct") {
Chris@0 407 return m_f0_params.num_f0s_per_oct;
Chris@0 408 } else if (identifier == "f0_prefer_fun") {
Chris@22 409 return m_f0_params.prefer ? 1.0 : 0.0;
Chris@0 410 } else if (identifier == "f0_prefer_mean") {
Chris@0 411 return m_f0_params.prefer_mean;
Chris@0 412 } else if (identifier == "f0_prefer_stdev") {
Chris@0 413 return m_f0_params.prefer_stdev;
Chris@7 414 } else if (identifier == "f0gram_mode") {
Chris@15 415 return m_f0gram_mode == BestBinOfAllDirections ? 1.0 : 0.0;
Chris@0 416 } else {
Chris@0 417 return 0.f;
Chris@0 418 }
Chris@0 419
Chris@0 420 }
Chris@0 421
Chris@15 422 void FChTransformF0gram::setParameter(string identifier, float value)
Chris@15 423 {
Chris@0 424 if (identifier == "fmax") {
Chris@0 425 m_fmax = value;
Chris@24 426 } else if (identifier == "nsamp_ix") {
Chris@24 427 int n = int(roundf(value));
Chris@24 428 for (int i = 0; i < int(nsamp_options.size()); ++i) {
Chris@24 429 if (i == n) {
Chris@24 430 m_warp_params.nsamps_twarp = nsamp_options[i];
Chris@24 431 m_blockSize = m_warp_params.nsamps_twarp * 4;
Chris@24 432 }
Chris@24 433 }
Chris@0 434 } else if (identifier == "alpha_max") {
Chris@0 435 m_warp_params.alpha_max = value;
Chris@0 436 } else if (identifier == "num_warps") {
Chris@0 437 m_warp_params.num_warps = value;
Chris@0 438 } else if (identifier == "alpha_dist") {
Chris@0 439 m_warp_params.alpha_dist = value;
Chris@0 440 } else if (identifier == "f0min") {
Chris@0 441 m_f0_params.f0min = value;
Chris@0 442 } else if (identifier == "num_octs") {
Chris@0 443 m_f0_params.num_octs = value;
Chris@0 444 } else if (identifier == "f0s_per_oct") {
Chris@0 445 m_f0_params.num_f0s_per_oct = value;
Chris@0 446 } else if (identifier == "f0_prefer_fun") {
Chris@22 447 m_f0_params.prefer = (value > 0.5);
Chris@0 448 } else if (identifier == "f0_prefer_mean") {
Chris@0 449 m_f0_params.prefer_mean = value;
Chris@0 450 } else if (identifier == "f0_prefer_stdev") {
Chris@0 451 m_f0_params.prefer_stdev = value;
Chris@0 452 } else if (identifier == "f0gram_mode") {
Chris@15 453 m_f0gram_mode = (value > 0.5 ?
Chris@15 454 BestBinOfAllDirections :
Chris@15 455 AllBinsOfBestDirection);
Chris@15 456 } else {
Chris@15 457 cerr << "WARNING: Unknown parameter id \""
Chris@15 458 << identifier << "\"" << endl;
Chris@0 459 }
Chris@0 460 }
Chris@0 461
Chris@0 462 FChTransformF0gram::ProgramList
Chris@0 463 FChTransformF0gram::getPrograms() const {
Chris@0 464 ProgramList list;
Chris@0 465 return list;
Chris@0 466 }
Chris@0 467
Chris@0 468 FChTransformF0gram::OutputList
Chris@0 469 FChTransformF0gram::getOutputDescriptors() const {
Chris@0 470
Chris@0 471 OutputList list;
Chris@0 472
Chris@16 473 vector<string> labels;
Chris@16 474 char label[100];
Chris@0 475
Chris@16 476 if (m_processingMode == ModeF0Gram) {
Chris@16 477
Chris@16 478 /* f0 values of F0gram grid as string values */
Chris@16 479 for (int i = 0; i < m_num_f0s; ++i) {
Chris@16 480 sprintf(label, "%4.2f Hz", m_f0s[i]);
Chris@16 481 labels.push_back(label);
Chris@16 482 }
Chris@16 483
Chris@16 484 /* The F0gram */
Chris@16 485 OutputDescriptor d;
Chris@16 486 d.identifier = "f0gram";
Chris@19 487 d.name = "F0gram";
Chris@19 488 d.description = "The salience of the different f0s in the signal.";
Chris@16 489 d.hasFixedBinCount = true;
Chris@16 490 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
Chris@16 491 d.binNames = labels;
Chris@16 492 d.hasKnownExtents = false;
Chris@16 493 d.isQuantized = false;
Chris@16 494 d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@16 495 d.hasDuration = false;
Chris@16 496 list.push_back(d);
Chris@16 497
Chris@19 498 d.identifier = "pitch";
Chris@19 499 d.name = "Most salient pitch";
Chris@19 500 d.description = "The most salient f0 in the signal for each time step.";
Chris@19 501 d.unit = "Hz";
Chris@19 502 d.hasFixedBinCount = true;
Chris@19 503 d.binCount = 1;
Chris@19 504 d.binNames.clear();
Chris@19 505 d.hasKnownExtents = false;
Chris@19 506 d.isQuantized = false;
Chris@19 507 d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@19 508 d.hasDuration = false;
Chris@19 509 list.push_back(d);
Chris@19 510
Chris@16 511 } else {
Chris@16 512
Chris@16 513 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; ++i) {
Chris@24 514 double freq = i * (m_warpings.fs_warp / m_warp_params.nsamps_twarp);
Chris@16 515 sprintf(label, "%4.2f Hz", freq);
Chris@16 516 labels.push_back(label);
Chris@16 517 }
Chris@16 518
Chris@16 519 OutputDescriptor d;
Chris@16 520 d.identifier = "spectrogram";
Chris@16 521 d.name = "Spectrogram";
Chris@16 522 d.description = "Time/frequency spectrogram derived from the Fan Chirp Transform output";
Chris@16 523 d.hasFixedBinCount = true;
Chris@16 524 d.binCount = m_warp_params.nsamps_twarp/2+1;
Chris@16 525 d.binNames = labels;
Chris@16 526 d.hasKnownExtents = false;
Chris@16 527 d.isQuantized = false;
Chris@16 528 d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@16 529 d.hasDuration = false;
Chris@16 530 list.push_back(d);
Chris@0 531 }
Chris@16 532
Chris@0 533 return list;
Chris@0 534 }
Chris@0 535
Chris@0 536 bool
Chris@0 537 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) {
Chris@0 538 if (channels < getMinChannelCount() ||
Chris@20 539 channels > getMaxChannelCount() ||
Chris@27 540 int(blockSize) != m_blockSize/2 ||
Chris@27 541 int(stepSize) != m_stepSize) {
Chris@14 542 return false;
Chris@14 543 }
Chris@0 544
Chris@20 545 m_inputBuffer = allocate_and_zero<float>(m_blockSize);
Chris@20 546
Chris@0 547 // WARNING !!!
Chris@0 548 // these values in fact are determined by the sampling frequency m_fs
Chris@0 549 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz
Chris@0 550 //m_blockSize = 4 * m_warp_params.nsamps_twarp;
Chris@16 551 // m_stepSize = floor(m_hop / m_warp_params.fact_over_samp);
Chris@16 552
Chris@16 553 /* design of FChT */
Chris@16 554 design_FChT();
Chris@0 555
Chris@0 556 /* initialise m_glogs_params */
Chris@7 557 design_GLogS();
Chris@0 558
Chris@7 559 design_LPF();
Chris@0 560
Chris@7 561 design_time_window();
Chris@0 562
Chris@7 563 // Create Hanning window for warped signals
Chris@14 564 mp_HanningWindow = allocate<double>(m_warp_params.nsamps_twarp);
Chris@7 565 bool normalize = false;
Chris@14 566 Utils::hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize);
Chris@0 567
Chris@16 568 m_num_f0s = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
Chris@16 569 m_f0s = allocate<double>(m_num_f0s);
Chris@16 570 for (int i = 0; i < m_num_f0s; ++i) {
Chris@16 571 m_f0s[i] = m_glogs_f0[m_glogs_init_f0s + i];
Chris@16 572 }
Chris@20 573
Chris@20 574 m_initialised = true;
Chris@0 575 return true;
Chris@0 576 }
Chris@0 577
Chris@0 578 void
Chris@0 579 FChTransformF0gram::design_GLogS() {
Chris@0 580
Chris@7 581 // total number & initial quantity of f0s
Chris@16 582
Chris@10 583 m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1;
Chris@7 584 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s;
Chris@0 585
Chris@7 586 // Initialize arrays
Chris@14 587 m_glogs_f0 = allocate<double>(m_glogs_num_f0s);
Chris@14 588 m_glogs = allocate<double>(m_glogs_num_f0s*m_warp_params.num_warps);
Chris@14 589 m_glogs_n = allocate<int>(m_glogs_num_f0s);
Chris@14 590 m_glogs_index = allocate<int>(m_glogs_num_f0s);
Chris@0 591
Chris@7 592 // Compute f0 values
Chris@7 593 m_glogs_harmonic_count = 0;
Chris@7 594 double factor = (double)(m_warp_params.nsamps_twarp/2)/(double)(m_warp_params.nsamps_twarp/2+1);
Chris@10 595 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@7 596 m_glogs_f0[i] = (m_f0_params.f0min/5.0)*pow(2.0,(double)i/(double)m_f0_params.num_f0s_per_oct);
Chris@7 597 // for every f0 compute number of partials less or equal than m_fmax.
Chris@7 598 m_glogs_n[i] = m_fmax*factor/m_glogs_f0[i];
Chris@7 599 m_glogs_index[i] = m_glogs_harmonic_count;
Chris@7 600 m_glogs_harmonic_count += m_glogs_n[i];
Chris@7 601 }
Chris@0 602
Chris@7 603 // Initialize arrays for interpolation
Chris@14 604 m_glogs_posint = allocate<int>(m_glogs_harmonic_count);
Chris@14 605 m_glogs_posfrac = allocate<double>(m_glogs_harmonic_count);
Chris@14 606 m_glogs_interp = allocate<double>(m_glogs_harmonic_count);
Chris@0 607
Chris@7 608 // Compute int & frac of interpolation positions
Chris@10 609 int aux_index = 0;
Chris@7 610 double aux_pos;
Chris@10 611 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@10 612 for (int j = 1; j <= m_glogs_n[i]; j++) {
Chris@18 613 aux_pos = ((double)j * m_glogs_f0[i]) * ((double)(m_warp_params.nsamps_twarp))/m_warpings.fs_warp;
Chris@10 614 m_glogs_posint[aux_index] = (int)aux_pos;
Chris@7 615 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index];
Chris@7 616 aux_index++;
Chris@7 617 }
Chris@7 618 }
Chris@0 619
Chris@7 620 // Third harmonic attenuation
Chris@7 621 double aux_third_harmonic;
Chris@14 622 m_glogs_third_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14 623 m_glogs_third_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 624 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
Chris@7 625 aux_third_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(3.0);
Chris@10 626 m_glogs_third_harmonic_posint[i] = (int)aux_third_harmonic;
Chris@7 627 m_glogs_third_harmonic_posfrac[i] = aux_third_harmonic - (double)(m_glogs_third_harmonic_posint[i]);
Chris@7 628 }
Chris@14 629 m_glogs_third_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@0 630
Chris@7 631 // Fifth harmonic attenuation
Chris@7 632 double aux_fifth_harmonic;
Chris@14 633 m_glogs_fifth_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14 634 m_glogs_fifth_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 635 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
Chris@7 636 aux_fifth_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(5.0);
Chris@10 637 m_glogs_fifth_harmonic_posint[i] = (int)aux_fifth_harmonic;
Chris@7 638 m_glogs_fifth_harmonic_posfrac[i] = aux_fifth_harmonic - (double)(m_glogs_fifth_harmonic_posint[i]);
Chris@7 639 }
Chris@14 640 m_glogs_fifth_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@0 641
Chris@7 642 // Normalization & attenuation windows
Chris@14 643 m_glogs_f0_preference_weights = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@14 644 m_glogs_median_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@14 645 m_glogs_sigma_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@7 646 double MIDI_value;
Chris@10 647 for (int i = 0; i < m_f0_params.num_octs*m_f0_params.num_f0s_per_oct; i++) {
Chris@22 648 if (m_f0_params.prefer) {
Chris@22 649 MIDI_value = 69.0 + 12.0 * log2(m_glogs_f0[i + m_glogs_init_f0s]/440.0);
Chris@22 650 m_glogs_f0_preference_weights[i] = 1.0/sqrt(2.0*M_PI*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev)*exp(-(MIDI_value-m_f0_params.prefer_mean)*(MIDI_value-m_f0_params.prefer_mean)/(2.0*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev));
Chris@22 651 m_glogs_f0_preference_weights[i] = (0.01 + m_glogs_f0_preference_weights[i]) / (1.01);
Chris@22 652 } else {
Chris@22 653 m_glogs_f0_preference_weights[i] = 1.0;
Chris@22 654 }
Chris@0 655
Chris@7 656 m_glogs_median_correction[i] = m_glogs_params.median_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.median_poly_coefs[1]*(i+1.0) + m_glogs_params.median_poly_coefs[2];
Chris@7 657 m_glogs_sigma_correction[i] = 1.0 / (m_glogs_params.sigma_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.sigma_poly_coefs[1]*(i+1.0) + m_glogs_params.sigma_poly_coefs[2]);
Chris@7 658 }
Chris@0 659 }
Chris@0 660
Chris@0 661 void
Chris@0 662 FChTransformF0gram::design_FChT() {
Chris@0 663
Chris@0 664 /* ============= WARPING DESIGN ============= */
Chris@0 665
Chris@0 666 // sampling frequency after oversampling
Chris@0 667 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs;
Chris@0 668
Chris@0 669 // number of samples of the original signal frame
Chris@0 670 m_warpings.nsamps_torig = 4 * m_warp_params.fact_over_samp * m_warp_params.nsamps_twarp;
Chris@0 671 // equivalent to: m_warpings.nsamps_torig = m_warp_params.fact_over_samp * m_blockSize;
Chris@0 672
Chris@0 673 // time instants of the original signal frame
Chris@14 674 double *t_orig = allocate<double>(m_warpings.nsamps_torig);
Chris@10 675 for (int ind = 0; ind < m_warpings.nsamps_torig; ind++) {
Chris@0 676 t_orig[ind] = ((double)(ind + 1) - (double)m_warpings.nsamps_torig / 2.0) / m_warpings.fs_orig;
Chris@0 677 }
Chris@0 678
Chris@0 679 // linear chirps warping definition as relative frequency deviation
Chris@7 680 //TODO
Chris@14 681 double *freq_relative = allocate<double>(m_warpings.nsamps_torig * m_warp_params.num_warps);
Chris@0 682 define_warps_linear_chirps(freq_relative, t_orig);
Chris@0 683
Chris@0 684 // maximum relative frequency deviation
Chris@0 685 double freq_relative_max = 0;
Chris@14 686 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
Chris@14 687 for (int j = 0; j < m_warp_params.num_warps; j++) {
Chris@14 688 if (freq_relative_max < freq_relative[j * m_warpings.nsamps_torig + i]) {
Chris@0 689 freq_relative_max = freq_relative[j * m_warpings.nsamps_torig + i];
Chris@14 690 }
Chris@14 691 }
Chris@14 692 }
Chris@0 693
Chris@0 694 // sampling frequency of warped signal to be free of aliasing up to fmax
Chris@0 695 m_warpings.fs_warp = 2 * m_fmax * freq_relative_max;
Chris@0 696
Chris@0 697 // time instants of the warped signal frame
Chris@14 698 double *t_warp = allocate<double>(m_warp_params.nsamps_twarp);
Chris@10 699 for (int ind = 0; ind < m_warp_params.nsamps_twarp; ind++) {
Chris@0 700 t_warp[ind] = ((double)((int)(ind + 1)- (int)m_warp_params.nsamps_twarp / 2)) / (double)m_warpings.fs_warp;
Chris@0 701 }
Chris@0 702
Chris@0 703 // design of warpings for efficient interpolation
Chris@0 704 design_warps(freq_relative, t_orig, t_warp);
Chris@0 705
Chris@14 706 deallocate(freq_relative);
Chris@14 707 deallocate(t_orig);
Chris@14 708 deallocate(t_warp);
Chris@14 709
Chris@14 710 x_warping = allocate<double>(m_warp_params.nsamps_twarp);
Chris@14 711 m_absFanChirpTransform = allocate<double>(m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1));
Chris@14 712 m_auxFanChirpTransform = allocate<double>(2 * (m_warp_params.nsamps_twarp/2 + 1));
Chris@14 713 fft_xwarping = new FFTReal(m_warp_params.nsamps_twarp);
Chris@0 714 }
Chris@0 715
Chris@0 716 void
Chris@0 717 FChTransformF0gram::design_warps(double * freq_relative, double * t_orig, double * t_warp) {
Chris@0 718 /* the warping is done by interpolating the original signal in time instants
Chris@0 719 given by the desired frequency deviation, to do this, the interpolation
Chris@0 720 instants are stored in a structure as an integer index and a fractional value
Chris@0 721 hypothesis: sampling frequency at the central point equals the original
Chris@7 722 */
Chris@0 723
Chris@14 724 m_warpings.pos_int = allocate<int>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
Chris@14 725 m_warpings.pos_frac = allocate<double>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
Chris@0 726
Chris@7 727 // vector of phase values
Chris@14 728 double *phi = allocate<double>(m_warpings.nsamps_torig);
Chris@7 729 double aux;
Chris@0 730
Chris@7 731 // warped positions
Chris@14 732 double *pos1 = allocate<double>(m_warp_params.nsamps_twarp*m_warp_params.num_warps);
Chris@0 733
Chris@10 734 for (int i = 0; i < m_warp_params.num_warps; i++) {
Chris@0 735
Chris@7 736 // integration of relative frequency to obtain phase values
Chris@14 737 Utils::cumtrapz(t_orig, freq_relative + i*(m_warpings.nsamps_torig), m_warpings.nsamps_torig, phi);
Chris@0 738
Chris@7 739 // centering of phase values to force original frequency in the middle
Chris@7 740 aux = phi[m_warpings.nsamps_torig/2];
Chris@10 741 for (int j = 0; j < m_warpings.nsamps_torig; j++) {
Chris@7 742 phi[j] -= aux;
Chris@7 743 } //for
Chris@0 744
Chris@7 745 // interpolation of phase values to obtain warped positions
Chris@14 746 Utils::interp1(phi, t_orig, m_warpings.nsamps_torig, t_warp, pos1 + i*m_warp_params.nsamps_twarp, m_warp_params.nsamps_twarp);
Chris@0 747 }
Chris@0 748
Chris@0 749 // % previous sample index
Chris@0 750 // pos1_int = uint32(floor(pos1))';
Chris@0 751 // % integer corresponding to previous sample index in "c"
Chris@0 752 // warps.pos1_int = (pos1_int - uint32(1));
Chris@0 753 // % fractional value that defines the warped position
Chris@0 754 // warps.pos1_frac = (double(pos1)' - double(pos1_int));
Chris@0 755
Chris@10 756 for (int j = 0; j < m_warp_params.nsamps_twarp*m_warp_params.num_warps; j++) {
Chris@7 757 // previous sample index
Chris@7 758 pos1[j] = pos1[j]*m_warpings.fs_orig + m_warpings.nsamps_torig/2 + 1;
Chris@10 759 m_warpings.pos_int[j] = (int) pos1[j];
Chris@7 760 m_warpings.pos_frac[j] = pos1[j] - (double)(m_warpings.pos_int[j]);
Chris@7 761 } //for
Chris@0 762
Chris@14 763 deallocate(phi);
Chris@14 764 deallocate(pos1);
Chris@0 765 }
Chris@0 766
Chris@0 767 void
Chris@0 768 FChTransformF0gram::define_warps_linear_chirps(double * freq_relative, double * t_orig) {
Chris@0 769 /** define warps as relative frequency deviation from original frequency
Chris@7 770 t_orig : time vector
Chris@7 771 freq_relative : relative frequency deviations
Chris@7 772 */
Chris@0 773 if (m_warp_params.alpha_dist == 0) {
Chris@0 774
Chris@0 775 // linear alpha values spacing
Chris@14 776 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
Chris@0 777 // WARNING m_warp_params.num_warps must be odd
Chris@0 778 m_warpings.chirp_rates[0] = -m_warp_params.alpha_max;
Chris@0 779 double increment = (double) m_warp_params.alpha_max / ((m_warp_params.num_warps - 1) / 2);
Chris@0 780
Chris@10 781 for (int ind = 1; ind < m_warp_params.num_warps; ind++) {
Chris@0 782 m_warpings.chirp_rates[ind] = m_warpings.chirp_rates[ind - 1] + increment;
Chris@0 783 }
Chris@0 784 // force zero value
Chris@0 785 m_warpings.chirp_rates[(int) ((m_warp_params.num_warps - 1) / 2)] = 0;
Chris@0 786
Chris@0 787 } else {
Chris@0 788 // log alpha values spacing
Chris@14 789 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
Chris@0 790
Chris@0 791 // force zero value
Chris@0 792 int middle_point = (int) ((m_warp_params.num_warps - 1) / 2);
Chris@0 793 m_warpings.chirp_rates[middle_point] = 0;
Chris@0 794
Chris@0 795 double logMax = log10(m_warp_params.alpha_max + 1);
Chris@0 796 double increment = logMax / ((m_warp_params.num_warps - 1) / 2.0f);
Chris@0 797 double exponent = 0;
Chris@0 798
Chris@0 799 // fill positive values
Chris@0 800 int ind_log = middle_point;
Chris@10 801 for (int ind = 0; ind < (m_warp_params.num_warps + 1) / 2; ind++) {
Chris@0 802 m_warpings.chirp_rates[ind_log] = pow(10, exponent) - 1;
Chris@0 803 exponent += increment;
Chris@0 804 ind_log++;
Chris@0 805 }
Chris@0 806 // fill negative values
Chris@10 807 for (int ind = 0; ind < (m_warp_params.num_warps - 1) / 2; ind++) {
Chris@0 808 m_warpings.chirp_rates[ind] = -m_warpings.chirp_rates[m_warp_params.num_warps - 1 - ind];
Chris@0 809 }
Chris@0 810 }
Chris@0 811
Chris@0 812 // compute relative frequency deviation
Chris@14 813 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
Chris@14 814 for (int j = 0; j < m_warp_params.num_warps; j++) {
Chris@0 815 freq_relative[j * m_warpings.nsamps_torig + i] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
Chris@14 816 }
Chris@14 817 }
Chris@0 818 }
Chris@0 819
Chris@0 820 void
Chris@14 821 FChTransformF0gram::design_LPF()
Chris@14 822 {
Chris@14 823 double *lp_LPFWindow_aux = allocate<double>(m_blockSize/2+1);
Chris@14 824 mp_LPFWindow = allocate<double>(m_blockSize/2+1);
Chris@0 825
Chris@10 826 int i_max = (int) ((2.0*m_fmax/m_fs) * ( (double)m_blockSize / 2.0 + 1.0 ));
Chris@10 827 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@0 828 if (i >= i_max) {
Chris@0 829 lp_LPFWindow_aux[i] = 0.0;
Chris@0 830 } else {
Chris@0 831 lp_LPFWindow_aux[i] = 1.0;
Chris@0 832 }
Chris@0 833 }
Chris@14 834
Chris@14 835 LPF_time = allocate_and_zero<double>(m_warpings.nsamps_torig);
Chris@14 836 LPF_frequency = allocate_and_zero<double>(2 * (m_warpings.nsamps_torig/2 + 1));
Chris@14 837
Chris@14 838 fft_forward_LPF = new FFTReal(m_blockSize);
Chris@14 839 fft_inverse_LPF = new FFTReal(m_warpings.nsamps_torig);
Chris@0 840
Chris@10 841 int winWidth = 11;
Chris@14 842 double *lp_hanningWindow = allocate<double>(winWidth);
Chris@0 843 double accum=0;
Chris@10 844 for (int i = 0; i < winWidth; i++) {
Chris@0 845 lp_hanningWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)winWidth+1.0)));
Chris@0 846 accum+=lp_hanningWindow[i];
Chris@0 847
Chris@0 848 }
Chris@10 849 for (int i = 0; i < winWidth; i++) { //window normalization
Chris@0 850 lp_hanningWindow[i]=lp_hanningWindow[i]/accum;
Chris@0 851 }
Chris@10 852 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@0 853 //if (((i-(winWidth-1)/2)<0)||(i+(winWidth-1))/2>m_blockSize/2-1) {//consideramos winWidth impar, si la ventana sale del arreglo se rellena con el valor origianl
Chris@7 854 if ( (i > (i_max + (winWidth-1)/2)) || (i <= (i_max - (winWidth-1)/2)) ) {
Chris@0 855 mp_LPFWindow[i]=lp_LPFWindow_aux[i];
Chris@0 856 } else {
Chris@0 857 accum=0;
Chris@10 858 for (int j = -((winWidth-1)/2); j <= (winWidth-1)/2; j++) {
Chris@0 859 accum+=lp_LPFWindow_aux[i-j]*lp_hanningWindow[j+(winWidth-1)/2];
Chris@7 860 }
Chris@0 861 mp_LPFWindow[i]=accum;
Chris@0 862 }
Chris@0 863 }
Chris@0 864
Chris@14 865 deallocate(lp_LPFWindow_aux);
Chris@14 866 deallocate(lp_hanningWindow);
Chris@0 867 }
Chris@0 868
Chris@14 869 void FChTransformF0gram::apply_LPF()
Chris@14 870 {
Chris@14 871 fft_forward_LPF->forward(LPF_time, LPF_frequency);
Chris@14 872
Chris@10 873 for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@16 874 LPF_frequency[i*2] *= mp_LPFWindow[i];
Chris@16 875 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i];
Chris@0 876 }
Chris@14 877
Chris@14 878 fft_inverse_LPF->inverse(LPF_frequency, LPF_time);
Chris@20 879
Chris@7 880 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame.
Chris@7 881 // nota: ademÔs de aplicar el LPF, esta función resamplea la señal original.
Chris@0 882 }
Chris@0 883
Chris@14 884 void FChTransformF0gram::clean_LPF()
Chris@14 885 {
Chris@14 886 delete fft_forward_LPF;
Chris@14 887 delete fft_inverse_LPF;
Chris@14 888 deallocate(LPF_time);
Chris@14 889 deallocate(LPF_frequency);
Chris@14 890 deallocate(mp_LPFWindow);
Chris@0 891 }
Chris@0 892
Chris@14 893 void FChTransformF0gram::reset()
Chris@14 894 {
Chris@0 895 }
Chris@0 896
Chris@0 897 FChTransformF0gram::FeatureSet
Chris@5 898 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) {
Chris@0 899
Chris@20 900 if (!m_initialised) return FeatureSet();
Chris@20 901
Chris@7 902 /* PSEUDOCƓDIGO:
Chris@7 903 - Aplicar FFT al frame entero.
Chris@7 904 - Filtro pasabajos en frecuencia.
Chris@7 905 - FFT inversa al frame entero.
Chris@7 906 -----------------------------------------------------------------------------
Chris@7 907 - Para cada warp: *Si es un espectrograma direccional (un solo warp
Chris@7 908 => no es para cada warp sino para el elegido)
Chris@7 909 - Hacer la interpolación con interp1q.
Chris@7 910 - Aplicar la FFT al frame warpeado.
Chris@7 911 - (Opcional) GLogS.
Chris@7 912 - ...
Chris@7 913 */
Chris@0 914
Chris@0 915 //---------------------------------------------------------------------------
Chris@7 916 FeatureSet fs;
Chris@0 917
Chris@7 918 #ifdef DEBUG
Chris@16 919 fprintf(stderr, "\n ----- DEBUG INFORMATION ----- \n");
Chris@16 920 fprintf(stderr, " m_fs = %f Hz.\n",m_fs);
Chris@16 921 fprintf(stderr, " fs_orig = %f Hz.\n",m_warpings.fs_orig);
Chris@16 922 fprintf(stderr, " fs_warp = %f Hz.\n",m_warpings.fs_warp);
Chris@16 923 fprintf(stderr, " m_blockSize = %d.\n",m_blockSize);
Chris@16 924 fprintf(stderr, " m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig);
Chris@24 925 fprintf(stderr, " m_warp_params.nsamps_twarp = %d.\n",m_warp_params.nsamps_twarp);
Chris@16 926 fprintf(stderr, " m_warp_params.num_warps = %d.\n",m_warp_params.num_warps);
Chris@16 927 fprintf(stderr, " m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count);
Chris@7 928 #endif
Chris@0 929
Chris@20 930 for (int i = 0; i < m_blockSize - m_stepSize; ++i) {
Chris@20 931 m_inputBuffer[i] = m_inputBuffer[i + m_stepSize];
Chris@0 932 }
Chris@20 933 for (int i = 0; i < m_blockSize/2; ++i) {
Chris@20 934 m_inputBuffer[m_blockSize/2 + i] = inputBuffers[0][i];
Chris@20 935 }
Chris@20 936 for (int i = 0; i < m_blockSize; ++i) {
Chris@20 937 LPF_time[i] = m_inputBuffer[i] * m_timeWindow[i];
Chris@20 938 }
Chris@20 939 for (int i = 0; i < m_blockSize; ++i) {
Chris@20 940 LPF_time[m_blockSize + i] = 0.0;
Chris@20 941 }
Chris@20 942
Chris@7 943 apply_LPF();
Chris@7 944 // SeƱal filtrada queda en LPF_time
Chris@0 945
Chris@7 946 Feature feature;
Chris@0 947 feature.hasTimestamp = false;
Chris@0 948
Chris@15 949 if (m_processingMode == ModeRoughSpectrogram) {
Chris@15 950 feature.values = vector<float>(m_warp_params.nsamps_twarp/2+1, 0.f);
Chris@15 951 }
Chris@15 952
Chris@0 953 // ----------------------------------------------------------------------------------------------
Chris@0 954 // Hanning window & FFT for all warp directions
Chris@0 955
Chris@7 956 double max_glogs = -DBL_MAX;
Chris@10 957 int ind_max_glogs = 0;
Chris@0 958
Chris@10 959 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
Chris@16 960
Chris@7 961 // Interpolate
Chris@14 962 Utils::interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp);
Chris@0 963
Chris@7 964 // Apply window
Chris@10 965 for (int i = 0; i < m_warp_params.nsamps_twarp; i++) {
Chris@7 966 x_warping[i] *= mp_HanningWindow[i];
Chris@7 967 }
Chris@0 968
Chris@7 969 // Transform
Chris@14 970 fft_xwarping->forward(x_warping, m_auxFanChirpTransform);
Chris@0 971
Chris@15 972 if (m_processingMode == ModeRoughSpectrogram) {
Chris@15 973 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
Chris@15 974 double abs = sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]);
Chris@15 975 if (abs > feature.values[i]) {
Chris@15 976 feature.values[i] = abs;
Chris@15 977 }
Chris@15 978 }
Chris@15 979 continue;
Chris@15 980 }
Chris@15 981
Chris@7 982 // Copy result
Chris@7 983 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1);
Chris@10 984 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
Chris@14 985 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]));
Chris@7 986 }
Chris@0 987
Chris@0 988 // -----------------------------------------------------------------------------------------
Chris@0 989 // GLogS
Chris@14 990 Utils::interp1q(aux_abs_fcht, m_glogs_posint, m_glogs_posfrac, m_glogs_interp, m_glogs_harmonic_count);
Chris@10 991 int glogs_ind = 0;
Chris@10 992 for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@7 993 double glogs_accum = 0;
Chris@10 994 for (int j = 1; j <= m_glogs_n[i]; j++) {
Chris@7 995 glogs_accum += m_glogs_interp[glogs_ind++];
Chris@7 996 }
Chris@7 997 m_glogs[i + i_warp*m_glogs_num_f0s] = glogs_accum/(double)m_glogs_n[i];
Chris@7 998 }
Chris@0 999
Chris@0 1000 // Sub/super harmonic correction
Chris@14 1001 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_third_harmonic_posint, m_glogs_third_harmonic_posfrac, m_glogs_third_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14 1002 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_fifth_harmonic_posint, m_glogs_fifth_harmonic_posfrac, m_glogs_fifth_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10 1003 for (int i = m_glogs_num_f0s-1; i >= m_glogs_init_f0s; i--) {
Chris@7 1004 m_glogs[i + i_warp*m_glogs_num_f0s] -= MAX(MAX(m_glogs[i-m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s],m_glogs_third_harmonic[i-m_glogs_init_f0s]),m_glogs_fifth_harmonic[i-m_glogs_init_f0s]);
Chris@7 1005 }
Chris@10 1006 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
Chris@7 1007 m_glogs[i + i_warp*m_glogs_num_f0s] -= 0.3*m_glogs[i+m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s];
Chris@7 1008 // Median, sigma $ weights correction
Chris@7 1009 m_glogs[i + i_warp*m_glogs_num_f0s] = (m_glogs[i + i_warp*m_glogs_num_f0s]-m_glogs_median_correction[i-m_glogs_init_f0s])*m_glogs_sigma_correction[i-m_glogs_init_f0s]*m_glogs_f0_preference_weights[i-m_glogs_init_f0s];
Chris@7 1010 }
Chris@0 1011
Chris@7 1012 // Look for maximum value to determine best direction
Chris@10 1013 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
Chris@7 1014 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
Chris@7 1015 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
Chris@7 1016 ind_max_glogs = i_warp;
Chris@7 1017 }
Chris@7 1018 }
Chris@7 1019 }
Chris@0 1020
Chris@15 1021 if (m_processingMode == ModeRoughSpectrogram) {
Chris@15 1022
Chris@15 1023 // already accumulated our return values in feature
Chris@19 1024 fs[0].push_back(feature);
Chris@15 1025
Chris@15 1026 } else if (m_processingMode == ModeSpectrogram) {
Chris@15 1027
Chris@15 1028 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) {
Chris@15 1029 feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0);
Chris@15 1030 }
Chris@19 1031 fs[0].push_back(feature);
Chris@15 1032
Chris@15 1033 } else { // f0gram
Chris@15 1034
Chris@19 1035 int bestIndex = -1;
Chris@19 1036
Chris@15 1037 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) {
Chris@19 1038 double value = 0.0;
Chris@15 1039 switch (m_f0gram_mode) {
Chris@15 1040 case AllBinsOfBestDirection:
Chris@19 1041 value = m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s];
Chris@15 1042 break;
Chris@15 1043 case BestBinOfAllDirections:
Chris@15 1044 max_glogs = -DBL_MAX;
Chris@15 1045 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
Chris@15 1046 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
Chris@15 1047 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
Chris@15 1048 ind_max_glogs = i_warp;
Chris@15 1049 }
Chris@7 1050 }
Chris@19 1051 value = max_glogs;
Chris@15 1052 break;
Chris@7 1053 }
Chris@19 1054 if (bestIndex < 0 || float(value) > feature.values[bestIndex]) {
Chris@19 1055 bestIndex = int(feature.values.size());
Chris@19 1056 }
Chris@19 1057 feature.values.push_back(float(value));
Chris@19 1058 }
Chris@19 1059
Chris@19 1060 fs[0].push_back(feature);
Chris@19 1061
Chris@19 1062 if (bestIndex >= 0) {
Chris@19 1063
Chris@19 1064 double bestValue = feature.values[bestIndex];
Chris@19 1065 set<double> ordered(feature.values.begin(), feature.values.end());
Chris@19 1066 vector<double> flattened(ordered.begin(), ordered.end());
Chris@19 1067 double median = flattened[flattened.size()/2];
Chris@19 1068 if (bestValue > median * 8.0) {
Chris@19 1069 Feature pfeature;
Chris@19 1070 pfeature.hasTimestamp = false;
Chris@19 1071 pfeature.values.push_back(m_f0s[bestIndex]);
Chris@19 1072 fs[1].push_back(pfeature);
Chris@19 1073 }
Chris@7 1074 }
Chris@7 1075 }
Chris@0 1076
Chris@7 1077 return fs;
Chris@0 1078 }
Chris@0 1079
Chris@0 1080 FChTransformF0gram::FeatureSet
Chris@0 1081 FChTransformF0gram::getRemainingFeatures() {
Chris@0 1082 return FeatureSet();
Chris@0 1083 }
Chris@0 1084
Chris@0 1085 void
Chris@0 1086 FChTransformF0gram::design_time_window() {
Chris@0 1087
Chris@20 1088 int transitionWidth = (int)m_blockSize/128 + 128;
Chris@14 1089 m_timeWindow = allocate<double>(m_blockSize);
Chris@14 1090 double *lp_transitionWindow = allocate<double>(transitionWidth);
Chris@0 1091
Chris@10 1092 for (int i = 0; i < m_blockSize; i++) {
Chris@7 1093 m_timeWindow[i] = 1.0;
Chris@7 1094 }
Chris@0 1095
Chris@10 1096 for (int i = 0; i < transitionWidth; i++) {
Chris@0 1097 lp_transitionWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)transitionWidth+1.0)));
Chris@0 1098 }
Chris@0 1099
Chris@10 1100 for (int i = 0; i < transitionWidth/2; i++) {
Chris@7 1101 m_timeWindow[i] = lp_transitionWindow[i];
Chris@7 1102 m_timeWindow[m_blockSize-1-i] = lp_transitionWindow[transitionWidth-1-i];
Chris@7 1103 }
Chris@0 1104
Chris@7 1105 #ifdef DEBUG
Chris@7 1106 for (int i = 0; i < m_blockSize; i++) {
Chris@7 1107 if ((i<transitionWidth)) {
Chris@16 1108 fprintf(stderr, " m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]);
Chris@7 1109 }
Chris@7 1110 }
Chris@7 1111 #endif
Chris@0 1112
Chris@14 1113 deallocate(lp_transitionWindow);
Chris@0 1114 }
Chris@0 1115