Chris@0: /*
Chris@0: copyright (C) 2011 I. Irigaray, M. Rocamora
Chris@0:
Chris@0: This program is free software: you can redistribute it and/or modify
Chris@0: it under the terms of the GNU General Public License as published by
Chris@0: the Free Software Foundation, either version 3 of the License, or
Chris@0: (at your option) any later version.
Chris@0:
Chris@0: This program is distributed in the hope that it will be useful,
Chris@0: but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@0: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@0: GNU General Public License for more details.
Chris@0:
Chris@0: You should have received a copy of the GNU General Public License
Chris@0: along with this program. If not, see .
Chris@7: */
Chris@0:
Chris@0: #include "FChTransformF0gram.h"
Chris@0: #include "FChTransformUtils.h"
Chris@0: #include
Chris@0: #include
Chris@14:
Chris@19: #include
Chris@19:
Chris@14: #include "bqvec/Allocators.h"
Chris@14:
Chris@14: using namespace breakfastquay;
Chris@14:
Chris@27: //#define DEBUG
Chris@7:
Chris@0: #define MAX(x, y) (((x) > (y)) ? (x) : (y))
Chris@0:
Chris@15: FChTransformF0gram::FChTransformF0gram(ProcessingMode mode,
Chris@15: float inputSampleRate) :
Chris@7: Plugin(inputSampleRate),
Chris@15: m_processingMode(mode),
Chris@20: m_initialised(false),
Chris@20: m_stepSize(256),
Chris@20: m_blockSize(8192) {
Chris@0:
Chris@24: nsamp_options.push_back(256);
Chris@24: nsamp_options.push_back(512);
Chris@24: nsamp_options.push_back(1024);
Chris@24: nsamp_options.push_back(2048);
Chris@24: nsamp_options.push_back(4096);
Chris@24: nsamp_options.push_back(8192);
Chris@24:
Chris@0: m_fs = inputSampleRate;
Chris@0: // max frequency of interest (Hz)
Chris@0: m_fmax = 10000.f;
Chris@0: // warping parameters
Chris@12: m_warp_params.nsamps_twarp = 2048;
Chris@0: m_warp_params.alpha_max = 4;
Chris@0: m_warp_params.num_warps = 21;
Chris@0: m_warp_params.fact_over_samp = 2;
Chris@0: m_warp_params.alpha_dist = 0;
Chris@0: // f0 parameters
Chris@0: m_f0_params.f0min = 80.0;
Chris@0: m_f0_params.num_octs = 4;
Chris@0: m_f0_params.num_f0s_per_oct = 192;
Chris@0: m_f0_params.prefer = true;
Chris@0: m_f0_params.prefer_mean = 60;
Chris@0: m_f0_params.prefer_stdev = 18;
Chris@0: // glogs parameters
Chris@0: m_glogs_params.HP_logS = true;
Chris@0: m_glogs_params.att_subharms = 1;
Chris@7: // display parameters
Chris@15: m_f0gram_mode = BestBinOfAllDirections;
Chris@0:
Chris@0: m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
Chris@0: m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
Chris@0: m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
Chris@0:
Chris@0: m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
Chris@0: m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
Chris@0: m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
Chris@0:
Chris@0: m_num_f0s = 0;
Chris@16: m_f0s = 0;
Chris@0: }
Chris@0:
Chris@14: FChTransformF0gram::~FChTransformF0gram()
Chris@14: {
Chris@20: if (!m_initialised) {
Chris@14: return; // nothing was allocated
Chris@14: }
Chris@14:
Chris@20: deallocate(m_inputBuffer);
Chris@20:
Chris@14: deallocate(m_warpings.pos_int);
Chris@14: deallocate(m_warpings.pos_frac);
Chris@14: deallocate(m_warpings.chirp_rates);
Chris@14:
Chris@14: clean_LPF();
Chris@14:
Chris@14: deallocate(m_timeWindow);
Chris@14:
Chris@14: deallocate(mp_HanningWindow);
Chris@14:
Chris@14: // Warping
Chris@14: deallocate(x_warping);
Chris@14: delete fft_xwarping;
Chris@14: deallocate(m_absFanChirpTransform);
Chris@14: deallocate(m_auxFanChirpTransform);
Chris@14:
Chris@14: // design_GLogS
Chris@14: deallocate(m_glogs_f0);
Chris@14: deallocate(m_glogs);
Chris@14: deallocate(m_glogs_n);
Chris@14: deallocate(m_glogs_index);
Chris@14: deallocate(m_glogs_posint);
Chris@14: deallocate(m_glogs_posfrac);
Chris@14: deallocate(m_glogs_interp);
Chris@14: deallocate(m_glogs_third_harmonic_posint);
Chris@14: deallocate(m_glogs_third_harmonic_posfrac);
Chris@14: deallocate(m_glogs_third_harmonic);
Chris@14: deallocate(m_glogs_fifth_harmonic_posint);
Chris@14: deallocate(m_glogs_fifth_harmonic_posfrac);
Chris@14: deallocate(m_glogs_fifth_harmonic);
Chris@14: deallocate(m_glogs_f0_preference_weights);
Chris@14: deallocate(m_glogs_median_correction);
Chris@14: deallocate(m_glogs_sigma_correction);
Chris@16:
Chris@16: deallocate(m_f0s);
Chris@0: }
Chris@0:
Chris@0: string
Chris@0: FChTransformF0gram::getIdentifier() const {
Chris@15: switch (m_processingMode) {
Chris@15: case ModeF0Gram: return "fchtransformf0gram";
Chris@15: case ModeSpectrogram: return "fchtransformspectrogram";
Chris@15: case ModeRoughSpectrogram: return "fchtransformrough";
Chris@15: }
Chris@17: throw std::logic_error("unknown mode");
Chris@0: }
Chris@0:
Chris@0: string
Chris@0: FChTransformF0gram::getName() const {
Chris@15: switch (m_processingMode) {
Chris@15: case ModeF0Gram: return "Fan Chirp Transform F0gram";
Chris@15: case ModeSpectrogram: return "Fan Chirp Transform Spectrogram";
Chris@15: case ModeRoughSpectrogram: return "Fan Chirp Transform Rough Spectrogram";
Chris@15: }
Chris@17: throw std::logic_error("unknown mode");
Chris@0: }
Chris@0:
Chris@0: string
Chris@0: FChTransformF0gram::getDescription() const {
Chris@15: switch (m_processingMode) {
Chris@15: case ModeF0Gram:
Chris@15: return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010.";
Chris@15: case ModeSpectrogram:
Chris@15: return "This plug-in produces a spectral representation of the audio using Fan Chirp Transform analysis.";
Chris@15: case ModeRoughSpectrogram:
Chris@15: return "This plug-in produces a more approximate spectral representation of the audio using Fan Chirp Transform analysis.";
Chris@15: }
Chris@17: throw std::logic_error("unknown mode");
Chris@0: }
Chris@0:
Chris@0: string
Chris@0: FChTransformF0gram::getMaker() const {
Chris@0: // Your name here
Chris@0: return "Audio Processing Group \n Universidad de la Republica";
Chris@0: }
Chris@0:
Chris@0: int
Chris@0: FChTransformF0gram::getPluginVersion() const {
Chris@0: // Increment this each time you release a version that behaves
Chris@0: // differently from the previous one
Chris@0: //
Chris@0: // 0 - initial version from scratch
Chris@15: return 1;
Chris@0: }
Chris@0:
Chris@0: string
Chris@0: FChTransformF0gram::getCopyright() const {
Chris@0: // This function is not ideally named. It does not necessarily
Chris@0: // need to say who made the plugin -- getMaker does that -- but it
Chris@0: // should indicate the terms under which it is distributed. For
Chris@0: // example, "Copyright (year). All Rights Reserved", or "GPL"
Chris@0: return "copyright (C) 2011 GPL - Audio Processing Group, UdelaR";
Chris@0: }
Chris@0:
Chris@0: FChTransformF0gram::InputDomain
Chris@0: FChTransformF0gram::getInputDomain() const {
Chris@0: return TimeDomain;
Chris@0: }
Chris@0:
Chris@0: size_t FChTransformF0gram::getPreferredBlockSize() const {
Chris@20: // We do our own accumulating into blocks within process()
Chris@20: return m_blockSize/2;
Chris@0: }
Chris@0:
Chris@0: size_t
Chris@0: FChTransformF0gram::getPreferredStepSize() const {
Chris@20: return m_stepSize;
Chris@0: }
Chris@0:
Chris@0: size_t
Chris@0: FChTransformF0gram::getMinChannelCount() const {
Chris@0: return 1;
Chris@0: }
Chris@0:
Chris@0: size_t
Chris@0: FChTransformF0gram::getMaxChannelCount() const {
Chris@0: return 1;
Chris@0: }
Chris@0:
Chris@0: FChTransformF0gram::ParameterList
Chris@0: FChTransformF0gram::getParameterDescriptors() const {
Chris@0: ParameterList list;
Chris@0:
Chris@0: // If the plugin has no adjustable parameters, return an empty
Chris@0: // list here (and there's no need to provide implementations of
Chris@0: // getParameter and setParameter in that case either).
Chris@0:
Chris@0: // Note that it is your responsibility to make sure the parameters
Chris@0: // start off having their default values (e.g. in the constructor
Chris@0: // above). The host needs to know the default value so it can do
Chris@0: // things like provide a "reset to default" function, but it will
Chris@0: // not explicitly set your parameters to their defaults for you if
Chris@0: // they have not changed in the mean time.
Chris@0:
Chris@0: // ============= WARPING PARAMETERS =============
Chris@0:
Chris@0: ParameterDescriptor fmax;
Chris@0: fmax.identifier = "fmax";
Chris@0: fmax.name = "Maximum frequency";
Chris@0: fmax.description = "Maximum frequency of interest for the analysis.";
Chris@0: fmax.unit = "Hz";
Chris@0: fmax.minValue = 2000;
Chris@0: fmax.maxValue = 22050;
Chris@0: fmax.defaultValue = 10000;
Chris@0: fmax.isQuantized = true;
Chris@0: fmax.quantizeStep = 1.0;
Chris@0: list.push_back(fmax);
Chris@0:
Chris@0: ParameterDescriptor nsamp;
Chris@24: nsamp.identifier = "nsamp_ix";
Chris@0: nsamp.name = "Number of samples";
Chris@0: nsamp.description = "Number of samples of the time warped frame";
Chris@24: nsamp.minValue = 0;
Chris@24: nsamp.maxValue = nsamp_options.size()-1;
Chris@24: nsamp.defaultValue = 3;
Chris@24: nsamp.isQuantized = true;
Chris@24: nsamp.quantizeStep = 1.0;
Chris@24: char label[100];
Chris@24: for (int i = 0; i < int(nsamp_options.size()); ++i) {
Chris@24: sprintf(label, "%d", nsamp_options[i]);
Chris@24: nsamp.valueNames.push_back(label);
Chris@24: }
Chris@0: nsamp.isQuantized = true;
Chris@0: nsamp.quantizeStep = 1.0;
Chris@0: list.push_back(nsamp);
Chris@0:
Chris@0: ParameterDescriptor alpha_max;
Chris@0: alpha_max.identifier = "alpha_max";
Chris@0: alpha_max.name = "Maximum alpha value";
Chris@0: alpha_max.description = "Maximum value for the alpha parameter of the transform.";
Chris@0: alpha_max.unit = "Hz/s";
Chris@0: alpha_max.minValue = -10;
Chris@0: alpha_max.maxValue = 10;
Chris@33: alpha_max.defaultValue = 4;
Chris@0: alpha_max.isQuantized = true;
Chris@0: alpha_max.quantizeStep = 1.0;
Chris@0: list.push_back(alpha_max);
Chris@0:
Chris@0: ParameterDescriptor num_warps;
Chris@0: num_warps.identifier = "num_warps";
Chris@0: num_warps.name = "Number of warpings";
Chris@0: num_warps.description = "Number of different warpings in the specified range (must be odd).";
Chris@0: num_warps.unit = "";
Chris@0: num_warps.minValue = 1;
Chris@0: num_warps.maxValue = 101;
Chris@0: num_warps.defaultValue = 21;
Chris@0: num_warps.isQuantized = true;
Chris@0: num_warps.quantizeStep = 2.0;
Chris@0: list.push_back(num_warps);
Chris@0:
Chris@0: ParameterDescriptor alpha_dist;
Chris@0: alpha_dist.identifier = "alpha_dist";
Chris@0: alpha_dist.name = "alpha distribution";
Chris@0: alpha_dist.description = "Type of distribution of alpha values (linear or log).";
Chris@0: alpha_dist.unit = "";
Chris@0: alpha_dist.minValue = 0;
Chris@0: alpha_dist.maxValue = 1;
Chris@33: alpha_dist.defaultValue = 0;
Chris@0: alpha_dist.isQuantized = true;
Chris@0: alpha_dist.quantizeStep = 1.0;
Chris@0: // lin (0), log (1)
Chris@0: alpha_dist.valueNames.push_back("lin");
Chris@0: alpha_dist.valueNames.push_back("log");
Chris@0: list.push_back(alpha_dist);
Chris@0:
Chris@0: // ============= F0-GRAM PARAMETERS =============
Chris@0:
Chris@0: ParameterDescriptor f0min;
Chris@0: f0min.identifier = "f0min";
Chris@0: f0min.name = "min f0";
Chris@0: f0min.description = "Minimum fundamental frequency (f0) value.";
Chris@0: f0min.unit = "Hz";
Chris@0: f0min.minValue = 1;
Chris@0: f0min.maxValue = 500;
Chris@0: f0min.defaultValue = 80;
Chris@0: f0min.isQuantized = true;
Chris@0: f0min.quantizeStep = 1.0;
Chris@0: list.push_back(f0min);
Chris@0:
Chris@0: ParameterDescriptor num_octs;
Chris@0: num_octs.identifier = "num_octs";
Chris@0: num_octs.name = "number of octaves";
Chris@0: num_octs.description = "Number of octaves for F0gram computation.";
Chris@0: num_octs.unit = "";
Chris@0: num_octs.minValue = 1;
Chris@0: num_octs.maxValue = 10;
Chris@0: num_octs.defaultValue = 4;
Chris@0: num_octs.isQuantized = true;
Chris@0: num_octs.quantizeStep = 1.0;
Chris@0: list.push_back(num_octs);
Chris@0:
Chris@0: ParameterDescriptor f0s_per_oct;
Chris@0: f0s_per_oct.identifier = "f0s_per_oct";
Chris@0: f0s_per_oct.name = "f0 values per octave";
Chris@0: f0s_per_oct.description = "Number of f0 values per octave.";
Chris@0: f0s_per_oct.unit = "";
Chris@0: f0s_per_oct.minValue = 12;
Chris@0: f0s_per_oct.maxValue = 768;
Chris@0: f0s_per_oct.defaultValue = 192;
Chris@0: f0s_per_oct.isQuantized = true;
Chris@0: f0s_per_oct.quantizeStep = 1.0;
Chris@0: list.push_back(f0s_per_oct);
Chris@0:
Chris@0: ParameterDescriptor f0_prefer_fun;
Chris@0: f0_prefer_fun.identifier = "f0_prefer_fun";
Chris@22: f0_prefer_fun.name = "Use f0 weighting";
Chris@22: f0_prefer_fun.description = "Whether to use a f0 weighting function to prefer frequencies nearer a mean value.";
Chris@0: f0_prefer_fun.unit = "";
Chris@0: f0_prefer_fun.minValue = 0;
Chris@0: f0_prefer_fun.maxValue = 1;
Chris@0: f0_prefer_fun.defaultValue = 1;
Chris@0: f0_prefer_fun.isQuantized = true;
Chris@0: f0_prefer_fun.quantizeStep = 1.0;
Chris@0: list.push_back(f0_prefer_fun);
Chris@0:
Chris@0: ParameterDescriptor f0_prefer_mean;
Chris@0: f0_prefer_mean.identifier = "f0_prefer_mean";
Chris@22: f0_prefer_mean.name = "Mean pitch for f0 weighting";
Chris@0: f0_prefer_mean.description = "Mean value for f0 weighting function (MIDI number).";
Chris@0: f0_prefer_mean.unit = "";
Chris@0: f0_prefer_mean.minValue = 1;
Chris@0: f0_prefer_mean.maxValue = 127;
Chris@0: f0_prefer_mean.defaultValue = 60;
Chris@0: f0_prefer_mean.isQuantized = true;
Chris@0: f0_prefer_mean.quantizeStep = 1.0;
Chris@0: list.push_back(f0_prefer_mean);
Chris@0:
Chris@0: ParameterDescriptor f0_prefer_stdev;
Chris@0: f0_prefer_stdev.identifier = "f0_prefer_stdev";
Chris@22: f0_prefer_stdev.name = "Stdev for f0 weighting";
Chris@22: f0_prefer_stdev.description = "Standard deviation for f0 weighting function (MIDI number).";
Chris@0: f0_prefer_stdev.unit = "";
Chris@0: f0_prefer_stdev.minValue = 1;
Chris@0: f0_prefer_stdev.maxValue = 127;
Chris@0: f0_prefer_stdev.defaultValue = 18;
Chris@0: f0_prefer_stdev.isQuantized = true;
Chris@0: f0_prefer_stdev.quantizeStep = 1.0;
Chris@0: list.push_back(f0_prefer_stdev);
Chris@0:
Chris@0: ParameterDescriptor f0gram_mode;
Chris@0: f0gram_mode.identifier = "f0gram_mode";
Chris@0: f0gram_mode.name = "display mode of f0gram";
Chris@0: f0gram_mode.description = "Display all bins of the best direction, or the best bin for each direction.";
Chris@0: f0gram_mode.unit = "";
Chris@0: f0gram_mode.minValue = 0;
Chris@0: f0gram_mode.maxValue = 1;
Chris@0: f0gram_mode.defaultValue = 1;
Chris@0: f0gram_mode.isQuantized = true;
Chris@0: f0gram_mode.quantizeStep = 1.0;
Chris@0: list.push_back(f0gram_mode);
Chris@0:
Chris@0: return list;
Chris@0: }
Chris@0:
Chris@0: float
Chris@0: FChTransformF0gram::getParameter(string identifier) const {
Chris@0:
Chris@0: if (identifier == "fmax") {
Chris@0: return m_fmax;
Chris@24: } else if (identifier == "nsamp_ix") {
Chris@24: for (int i = 0; i < int(nsamp_options.size()); ++i) {
Chris@24: if (m_warp_params.nsamps_twarp == nsamp_options[i]) {
Chris@24: return i;
Chris@24: }
Chris@24: }
Chris@24: throw std::logic_error("internal error: nsamps_twarp not in nsamp_options");
Chris@0: } else if (identifier == "alpha_max") {
Chris@0: return m_warp_params.alpha_max;
Chris@0: } else if (identifier == "num_warps") {
Chris@0: return m_warp_params.num_warps;
Chris@0: } else if (identifier == "alpha_dist") {
Chris@0: return m_warp_params.alpha_dist;
Chris@0: } else if (identifier == "f0min") {
Chris@0: return m_f0_params.f0min;
Chris@0: } else if (identifier == "num_octs") {
Chris@0: return m_f0_params.num_octs;
Chris@0: } else if (identifier == "f0s_per_oct") {
Chris@0: return m_f0_params.num_f0s_per_oct;
Chris@0: } else if (identifier == "f0_prefer_fun") {
Chris@22: return m_f0_params.prefer ? 1.0 : 0.0;
Chris@0: } else if (identifier == "f0_prefer_mean") {
Chris@0: return m_f0_params.prefer_mean;
Chris@0: } else if (identifier == "f0_prefer_stdev") {
Chris@0: return m_f0_params.prefer_stdev;
Chris@7: } else if (identifier == "f0gram_mode") {
Chris@15: return m_f0gram_mode == BestBinOfAllDirections ? 1.0 : 0.0;
Chris@0: } else {
Chris@0: return 0.f;
Chris@0: }
Chris@0:
Chris@0: }
Chris@0:
Chris@15: void FChTransformF0gram::setParameter(string identifier, float value)
Chris@15: {
Chris@0: if (identifier == "fmax") {
Chris@0: m_fmax = value;
Chris@24: } else if (identifier == "nsamp_ix") {
Chris@24: int n = int(roundf(value));
Chris@24: for (int i = 0; i < int(nsamp_options.size()); ++i) {
Chris@24: if (i == n) {
Chris@24: m_warp_params.nsamps_twarp = nsamp_options[i];
Chris@24: m_blockSize = m_warp_params.nsamps_twarp * 4;
Chris@24: }
Chris@24: }
Chris@0: } else if (identifier == "alpha_max") {
Chris@0: m_warp_params.alpha_max = value;
Chris@0: } else if (identifier == "num_warps") {
Chris@0: m_warp_params.num_warps = value;
Chris@0: } else if (identifier == "alpha_dist") {
Chris@0: m_warp_params.alpha_dist = value;
Chris@0: } else if (identifier == "f0min") {
Chris@0: m_f0_params.f0min = value;
Chris@0: } else if (identifier == "num_octs") {
Chris@0: m_f0_params.num_octs = value;
Chris@0: } else if (identifier == "f0s_per_oct") {
Chris@0: m_f0_params.num_f0s_per_oct = value;
Chris@0: } else if (identifier == "f0_prefer_fun") {
Chris@22: m_f0_params.prefer = (value > 0.5);
Chris@0: } else if (identifier == "f0_prefer_mean") {
Chris@0: m_f0_params.prefer_mean = value;
Chris@0: } else if (identifier == "f0_prefer_stdev") {
Chris@0: m_f0_params.prefer_stdev = value;
Chris@0: } else if (identifier == "f0gram_mode") {
Chris@15: m_f0gram_mode = (value > 0.5 ?
Chris@15: BestBinOfAllDirections :
Chris@15: AllBinsOfBestDirection);
Chris@15: } else {
Chris@15: cerr << "WARNING: Unknown parameter id \""
Chris@15: << identifier << "\"" << endl;
Chris@0: }
Chris@0: }
Chris@0:
Chris@0: FChTransformF0gram::ProgramList
Chris@0: FChTransformF0gram::getPrograms() const {
Chris@0: ProgramList list;
Chris@0: return list;
Chris@0: }
Chris@0:
Chris@0: FChTransformF0gram::OutputList
Chris@0: FChTransformF0gram::getOutputDescriptors() const {
Chris@0:
Chris@0: OutputList list;
Chris@0:
Chris@16: vector labels;
Chris@16: char label[100];
Chris@0:
Chris@16: if (m_processingMode == ModeF0Gram) {
Chris@16:
Chris@16: /* f0 values of F0gram grid as string values */
Chris@16: for (int i = 0; i < m_num_f0s; ++i) {
Chris@16: sprintf(label, "%4.2f Hz", m_f0s[i]);
Chris@16: labels.push_back(label);
Chris@16: }
Chris@16:
Chris@16: /* The F0gram */
Chris@16: OutputDescriptor d;
Chris@16: d.identifier = "f0gram";
Chris@19: d.name = "F0gram";
Chris@19: d.description = "The salience of the different f0s in the signal.";
Chris@16: d.hasFixedBinCount = true;
Chris@16: d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
Chris@16: d.binNames = labels;
Chris@16: d.hasKnownExtents = false;
Chris@16: d.isQuantized = false;
Chris@16: d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@16: d.hasDuration = false;
Chris@16: list.push_back(d);
Chris@16:
Chris@19: d.identifier = "pitch";
Chris@19: d.name = "Most salient pitch";
Chris@19: d.description = "The most salient f0 in the signal for each time step.";
Chris@19: d.unit = "Hz";
Chris@19: d.hasFixedBinCount = true;
Chris@19: d.binCount = 1;
Chris@19: d.binNames.clear();
Chris@19: d.hasKnownExtents = false;
Chris@19: d.isQuantized = false;
Chris@19: d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@19: d.hasDuration = false;
Chris@19: list.push_back(d);
Chris@19:
Chris@16: } else {
Chris@16:
Chris@16: for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; ++i) {
Chris@24: double freq = i * (m_warpings.fs_warp / m_warp_params.nsamps_twarp);
Chris@16: sprintf(label, "%4.2f Hz", freq);
Chris@16: labels.push_back(label);
Chris@16: }
Chris@16:
Chris@16: OutputDescriptor d;
Chris@16: d.identifier = "spectrogram";
Chris@16: d.name = "Spectrogram";
Chris@16: d.description = "Time/frequency spectrogram derived from the Fan Chirp Transform output";
Chris@16: d.hasFixedBinCount = true;
Chris@16: d.binCount = m_warp_params.nsamps_twarp/2+1;
Chris@16: d.binNames = labels;
Chris@16: d.hasKnownExtents = false;
Chris@16: d.isQuantized = false;
Chris@16: d.sampleType = OutputDescriptor::OneSamplePerStep;
Chris@16: d.hasDuration = false;
Chris@16: list.push_back(d);
Chris@0: }
Chris@16:
Chris@0: return list;
Chris@0: }
Chris@0:
Chris@0: bool
Chris@0: FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) {
Chris@0: if (channels < getMinChannelCount() ||
Chris@20: channels > getMaxChannelCount() ||
Chris@27: int(blockSize) != m_blockSize/2 ||
Chris@27: int(stepSize) != m_stepSize) {
Chris@14: return false;
Chris@14: }
Chris@0:
Chris@20: m_inputBuffer = allocate_and_zero(m_blockSize);
Chris@20:
Chris@0: // WARNING !!!
Chris@0: // these values in fact are determined by the sampling frequency m_fs
Chris@0: // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz
Chris@0: //m_blockSize = 4 * m_warp_params.nsamps_twarp;
Chris@16: // m_stepSize = floor(m_hop / m_warp_params.fact_over_samp);
Chris@16:
Chris@16: /* design of FChT */
Chris@16: design_FChT();
Chris@0:
Chris@0: /* initialise m_glogs_params */
Chris@7: design_GLogS();
Chris@0:
Chris@7: design_LPF();
Chris@0:
Chris@7: design_time_window();
Chris@0:
Chris@7: // Create Hanning window for warped signals
Chris@14: mp_HanningWindow = allocate(m_warp_params.nsamps_twarp);
Chris@7: bool normalize = false;
Chris@14: Utils::hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize);
Chris@0:
Chris@16: m_num_f0s = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
Chris@16: m_f0s = allocate(m_num_f0s);
Chris@16: for (int i = 0; i < m_num_f0s; ++i) {
Chris@16: m_f0s[i] = m_glogs_f0[m_glogs_init_f0s + i];
Chris@16: }
Chris@20:
Chris@20: m_initialised = true;
Chris@0: return true;
Chris@0: }
Chris@0:
Chris@0: void
Chris@0: FChTransformF0gram::design_GLogS() {
Chris@0:
Chris@7: // total number & initial quantity of f0s
Chris@16:
Chris@10: m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1;
Chris@7: m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s;
Chris@0:
Chris@7: // Initialize arrays
Chris@14: m_glogs_f0 = allocate(m_glogs_num_f0s);
Chris@14: m_glogs = allocate(m_glogs_num_f0s*m_warp_params.num_warps);
Chris@14: m_glogs_n = allocate(m_glogs_num_f0s);
Chris@14: m_glogs_index = allocate(m_glogs_num_f0s);
Chris@0:
Chris@7: // Compute f0 values
Chris@7: m_glogs_harmonic_count = 0;
Chris@7: double factor = (double)(m_warp_params.nsamps_twarp/2)/(double)(m_warp_params.nsamps_twarp/2+1);
Chris@10: for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@7: m_glogs_f0[i] = (m_f0_params.f0min/5.0)*pow(2.0,(double)i/(double)m_f0_params.num_f0s_per_oct);
Chris@7: // for every f0 compute number of partials less or equal than m_fmax.
Chris@7: m_glogs_n[i] = m_fmax*factor/m_glogs_f0[i];
Chris@7: m_glogs_index[i] = m_glogs_harmonic_count;
Chris@7: m_glogs_harmonic_count += m_glogs_n[i];
Chris@7: }
Chris@0:
Chris@7: // Initialize arrays for interpolation
Chris@14: m_glogs_posint = allocate(m_glogs_harmonic_count);
Chris@14: m_glogs_posfrac = allocate(m_glogs_harmonic_count);
Chris@14: m_glogs_interp = allocate(m_glogs_harmonic_count);
Chris@0:
Chris@7: // Compute int & frac of interpolation positions
Chris@10: int aux_index = 0;
Chris@7: double aux_pos;
Chris@10: for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@10: for (int j = 1; j <= m_glogs_n[i]; j++) {
Chris@18: aux_pos = ((double)j * m_glogs_f0[i]) * ((double)(m_warp_params.nsamps_twarp))/m_warpings.fs_warp;
Chris@10: m_glogs_posint[aux_index] = (int)aux_pos;
Chris@7: m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index];
Chris@7: aux_index++;
Chris@7: }
Chris@7: }
Chris@0:
Chris@7: // Third harmonic attenuation
Chris@7: double aux_third_harmonic;
Chris@14: m_glogs_third_harmonic_posint = allocate((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14: m_glogs_third_harmonic_posfrac = allocate((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10: for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
Chris@7: aux_third_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(3.0);
Chris@10: m_glogs_third_harmonic_posint[i] = (int)aux_third_harmonic;
Chris@7: m_glogs_third_harmonic_posfrac[i] = aux_third_harmonic - (double)(m_glogs_third_harmonic_posint[i]);
Chris@7: }
Chris@14: m_glogs_third_harmonic = allocate((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@0:
Chris@7: // Fifth harmonic attenuation
Chris@7: double aux_fifth_harmonic;
Chris@14: m_glogs_fifth_harmonic_posint = allocate((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14: m_glogs_fifth_harmonic_posfrac = allocate((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10: for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
Chris@7: aux_fifth_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(5.0);
Chris@10: m_glogs_fifth_harmonic_posint[i] = (int)aux_fifth_harmonic;
Chris@7: m_glogs_fifth_harmonic_posfrac[i] = aux_fifth_harmonic - (double)(m_glogs_fifth_harmonic_posint[i]);
Chris@7: }
Chris@14: m_glogs_fifth_harmonic = allocate((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@0:
Chris@7: // Normalization & attenuation windows
Chris@14: m_glogs_f0_preference_weights = allocate(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@14: m_glogs_median_correction = allocate(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@14: m_glogs_sigma_correction = allocate(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
Chris@7: double MIDI_value;
Chris@10: for (int i = 0; i < m_f0_params.num_octs*m_f0_params.num_f0s_per_oct; i++) {
Chris@22: if (m_f0_params.prefer) {
Chris@22: MIDI_value = 69.0 + 12.0 * log2(m_glogs_f0[i + m_glogs_init_f0s]/440.0);
Chris@22: m_glogs_f0_preference_weights[i] = 1.0/sqrt(2.0*M_PI*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev)*exp(-(MIDI_value-m_f0_params.prefer_mean)*(MIDI_value-m_f0_params.prefer_mean)/(2.0*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev));
Chris@22: m_glogs_f0_preference_weights[i] = (0.01 + m_glogs_f0_preference_weights[i]) / (1.01);
Chris@22: } else {
Chris@22: m_glogs_f0_preference_weights[i] = 1.0;
Chris@22: }
Chris@0:
Chris@7: m_glogs_median_correction[i] = m_glogs_params.median_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.median_poly_coefs[1]*(i+1.0) + m_glogs_params.median_poly_coefs[2];
Chris@7: m_glogs_sigma_correction[i] = 1.0 / (m_glogs_params.sigma_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.sigma_poly_coefs[1]*(i+1.0) + m_glogs_params.sigma_poly_coefs[2]);
Chris@7: }
Chris@0: }
Chris@0:
Chris@0: void
Chris@0: FChTransformF0gram::design_FChT() {
Chris@0:
Chris@0: /* ============= WARPING DESIGN ============= */
Chris@0:
Chris@0: // sampling frequency after oversampling
Chris@0: m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs;
Chris@0:
Chris@0: // number of samples of the original signal frame
Chris@0: m_warpings.nsamps_torig = 4 * m_warp_params.fact_over_samp * m_warp_params.nsamps_twarp;
Chris@0: // equivalent to: m_warpings.nsamps_torig = m_warp_params.fact_over_samp * m_blockSize;
Chris@0:
Chris@0: // time instants of the original signal frame
Chris@14: double *t_orig = allocate(m_warpings.nsamps_torig);
Chris@10: for (int ind = 0; ind < m_warpings.nsamps_torig; ind++) {
Chris@0: t_orig[ind] = ((double)(ind + 1) - (double)m_warpings.nsamps_torig / 2.0) / m_warpings.fs_orig;
Chris@0: }
Chris@0:
Chris@0: // linear chirps warping definition as relative frequency deviation
Chris@7: //TODO
Chris@14: double *freq_relative = allocate(m_warpings.nsamps_torig * m_warp_params.num_warps);
Chris@0: define_warps_linear_chirps(freq_relative, t_orig);
Chris@0:
Chris@0: // maximum relative frequency deviation
Chris@0: double freq_relative_max = 0;
Chris@14: for (int i = 0; i < m_warpings.nsamps_torig; i++) {
Chris@14: for (int j = 0; j < m_warp_params.num_warps; j++) {
Chris@14: if (freq_relative_max < freq_relative[j * m_warpings.nsamps_torig + i]) {
Chris@0: freq_relative_max = freq_relative[j * m_warpings.nsamps_torig + i];
Chris@14: }
Chris@14: }
Chris@14: }
Chris@0:
Chris@0: // sampling frequency of warped signal to be free of aliasing up to fmax
Chris@0: m_warpings.fs_warp = 2 * m_fmax * freq_relative_max;
Chris@0:
Chris@0: // time instants of the warped signal frame
Chris@14: double *t_warp = allocate(m_warp_params.nsamps_twarp);
Chris@10: for (int ind = 0; ind < m_warp_params.nsamps_twarp; ind++) {
Chris@0: t_warp[ind] = ((double)((int)(ind + 1)- (int)m_warp_params.nsamps_twarp / 2)) / (double)m_warpings.fs_warp;
Chris@0: }
Chris@0:
Chris@0: // design of warpings for efficient interpolation
Chris@0: design_warps(freq_relative, t_orig, t_warp);
Chris@0:
Chris@14: deallocate(freq_relative);
Chris@14: deallocate(t_orig);
Chris@14: deallocate(t_warp);
Chris@14:
Chris@14: x_warping = allocate(m_warp_params.nsamps_twarp);
Chris@14: m_absFanChirpTransform = allocate(m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1));
Chris@14: m_auxFanChirpTransform = allocate(2 * (m_warp_params.nsamps_twarp/2 + 1));
Chris@14: fft_xwarping = new FFTReal(m_warp_params.nsamps_twarp);
Chris@0: }
Chris@0:
Chris@0: void
Chris@0: FChTransformF0gram::design_warps(double * freq_relative, double * t_orig, double * t_warp) {
Chris@0: /* the warping is done by interpolating the original signal in time instants
Chris@0: given by the desired frequency deviation, to do this, the interpolation
Chris@0: instants are stored in a structure as an integer index and a fractional value
Chris@0: hypothesis: sampling frequency at the central point equals the original
Chris@7: */
Chris@0:
Chris@14: m_warpings.pos_int = allocate(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
Chris@14: m_warpings.pos_frac = allocate(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
Chris@0:
Chris@7: // vector of phase values
Chris@14: double *phi = allocate(m_warpings.nsamps_torig);
Chris@7: double aux;
Chris@0:
Chris@7: // warped positions
Chris@14: double *pos1 = allocate(m_warp_params.nsamps_twarp*m_warp_params.num_warps);
Chris@0:
Chris@10: for (int i = 0; i < m_warp_params.num_warps; i++) {
Chris@0:
Chris@7: // integration of relative frequency to obtain phase values
Chris@14: Utils::cumtrapz(t_orig, freq_relative + i*(m_warpings.nsamps_torig), m_warpings.nsamps_torig, phi);
Chris@0:
Chris@7: // centering of phase values to force original frequency in the middle
Chris@7: aux = phi[m_warpings.nsamps_torig/2];
Chris@10: for (int j = 0; j < m_warpings.nsamps_torig; j++) {
Chris@7: phi[j] -= aux;
Chris@7: } //for
Chris@0:
Chris@7: // interpolation of phase values to obtain warped positions
Chris@14: Utils::interp1(phi, t_orig, m_warpings.nsamps_torig, t_warp, pos1 + i*m_warp_params.nsamps_twarp, m_warp_params.nsamps_twarp);
Chris@0: }
Chris@0:
Chris@0: // % previous sample index
Chris@0: // pos1_int = uint32(floor(pos1))';
Chris@0: // % integer corresponding to previous sample index in "c"
Chris@0: // warps.pos1_int = (pos1_int - uint32(1));
Chris@0: // % fractional value that defines the warped position
Chris@0: // warps.pos1_frac = (double(pos1)' - double(pos1_int));
Chris@0:
Chris@10: for (int j = 0; j < m_warp_params.nsamps_twarp*m_warp_params.num_warps; j++) {
Chris@7: // previous sample index
Chris@7: pos1[j] = pos1[j]*m_warpings.fs_orig + m_warpings.nsamps_torig/2 + 1;
Chris@10: m_warpings.pos_int[j] = (int) pos1[j];
Chris@7: m_warpings.pos_frac[j] = pos1[j] - (double)(m_warpings.pos_int[j]);
Chris@7: } //for
Chris@0:
Chris@14: deallocate(phi);
Chris@14: deallocate(pos1);
Chris@0: }
Chris@0:
Chris@0: void
Chris@0: FChTransformF0gram::define_warps_linear_chirps(double * freq_relative, double * t_orig) {
Chris@0: /** define warps as relative frequency deviation from original frequency
Chris@7: t_orig : time vector
Chris@7: freq_relative : relative frequency deviations
Chris@7: */
Chris@0: if (m_warp_params.alpha_dist == 0) {
Chris@0:
Chris@0: // linear alpha values spacing
Chris@14: m_warpings.chirp_rates = allocate(m_warp_params.num_warps);
Chris@0: // WARNING m_warp_params.num_warps must be odd
Chris@0: m_warpings.chirp_rates[0] = -m_warp_params.alpha_max;
Chris@0: double increment = (double) m_warp_params.alpha_max / ((m_warp_params.num_warps - 1) / 2);
Chris@0:
Chris@10: for (int ind = 1; ind < m_warp_params.num_warps; ind++) {
Chris@0: m_warpings.chirp_rates[ind] = m_warpings.chirp_rates[ind - 1] + increment;
Chris@0: }
Chris@0: // force zero value
Chris@0: m_warpings.chirp_rates[(int) ((m_warp_params.num_warps - 1) / 2)] = 0;
Chris@0:
Chris@0: } else {
Chris@0: // log alpha values spacing
Chris@14: m_warpings.chirp_rates = allocate(m_warp_params.num_warps);
Chris@0:
Chris@0: // force zero value
Chris@0: int middle_point = (int) ((m_warp_params.num_warps - 1) / 2);
Chris@0: m_warpings.chirp_rates[middle_point] = 0;
Chris@0:
Chris@0: double logMax = log10(m_warp_params.alpha_max + 1);
Chris@0: double increment = logMax / ((m_warp_params.num_warps - 1) / 2.0f);
Chris@0: double exponent = 0;
Chris@0:
Chris@0: // fill positive values
Chris@0: int ind_log = middle_point;
Chris@10: for (int ind = 0; ind < (m_warp_params.num_warps + 1) / 2; ind++) {
Chris@0: m_warpings.chirp_rates[ind_log] = pow(10, exponent) - 1;
Chris@0: exponent += increment;
Chris@0: ind_log++;
Chris@0: }
Chris@0: // fill negative values
Chris@10: for (int ind = 0; ind < (m_warp_params.num_warps - 1) / 2; ind++) {
Chris@0: m_warpings.chirp_rates[ind] = -m_warpings.chirp_rates[m_warp_params.num_warps - 1 - ind];
Chris@0: }
Chris@0: }
Chris@0:
Chris@0: // compute relative frequency deviation
Chris@14: for (int i = 0; i < m_warpings.nsamps_torig; i++) {
Chris@14: for (int j = 0; j < m_warp_params.num_warps; j++) {
Chris@0: freq_relative[j * m_warpings.nsamps_torig + i] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
Chris@14: }
Chris@14: }
Chris@0: }
Chris@0:
Chris@0: void
Chris@14: FChTransformF0gram::design_LPF()
Chris@14: {
Chris@14: double *lp_LPFWindow_aux = allocate(m_blockSize/2+1);
Chris@14: mp_LPFWindow = allocate(m_blockSize/2+1);
Chris@0:
Chris@10: int i_max = (int) ((2.0*m_fmax/m_fs) * ( (double)m_blockSize / 2.0 + 1.0 ));
Chris@10: for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@0: if (i >= i_max) {
Chris@0: lp_LPFWindow_aux[i] = 0.0;
Chris@0: } else {
Chris@0: lp_LPFWindow_aux[i] = 1.0;
Chris@0: }
Chris@0: }
Chris@14:
Chris@14: LPF_time = allocate_and_zero(m_warpings.nsamps_torig);
Chris@14: LPF_frequency = allocate_and_zero(2 * (m_warpings.nsamps_torig/2 + 1));
Chris@14:
Chris@14: fft_forward_LPF = new FFTReal(m_blockSize);
Chris@14: fft_inverse_LPF = new FFTReal(m_warpings.nsamps_torig);
Chris@0:
Chris@10: int winWidth = 11;
Chris@14: double *lp_hanningWindow = allocate(winWidth);
Chris@0: double accum=0;
Chris@10: for (int i = 0; i < winWidth; i++) {
Chris@0: lp_hanningWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)winWidth+1.0)));
Chris@0: accum+=lp_hanningWindow[i];
Chris@0:
Chris@0: }
Chris@10: for (int i = 0; i < winWidth; i++) { //window normalization
Chris@0: lp_hanningWindow[i]=lp_hanningWindow[i]/accum;
Chris@0: }
Chris@10: for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@0: //if (((i-(winWidth-1)/2)<0)||(i+(winWidth-1))/2>m_blockSize/2-1) {//consideramos winWidth impar, si la ventana sale del arreglo se rellena con el valor origianl
Chris@7: if ( (i > (i_max + (winWidth-1)/2)) || (i <= (i_max - (winWidth-1)/2)) ) {
Chris@0: mp_LPFWindow[i]=lp_LPFWindow_aux[i];
Chris@0: } else {
Chris@0: accum=0;
Chris@10: for (int j = -((winWidth-1)/2); j <= (winWidth-1)/2; j++) {
Chris@0: accum+=lp_LPFWindow_aux[i-j]*lp_hanningWindow[j+(winWidth-1)/2];
Chris@7: }
Chris@0: mp_LPFWindow[i]=accum;
Chris@0: }
Chris@0: }
Chris@0:
Chris@14: deallocate(lp_LPFWindow_aux);
Chris@14: deallocate(lp_hanningWindow);
Chris@0: }
Chris@0:
Chris@14: void FChTransformF0gram::apply_LPF()
Chris@14: {
Chris@14: fft_forward_LPF->forward(LPF_time, LPF_frequency);
Chris@14:
Chris@10: for (int i = 0; i < m_blockSize/2+1; i++) {
Chris@16: LPF_frequency[i*2] *= mp_LPFWindow[i];
Chris@16: LPF_frequency[i*2 + 1] *= mp_LPFWindow[i];
Chris@0: }
Chris@14:
Chris@14: fft_inverse_LPF->inverse(LPF_frequency, LPF_time);
Chris@20:
Chris@7: // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame.
Chris@7: // nota: además de aplicar el LPF, esta función resamplea la señal original.
Chris@0: }
Chris@0:
Chris@14: void FChTransformF0gram::clean_LPF()
Chris@14: {
Chris@14: delete fft_forward_LPF;
Chris@14: delete fft_inverse_LPF;
Chris@14: deallocate(LPF_time);
Chris@14: deallocate(LPF_frequency);
Chris@14: deallocate(mp_LPFWindow);
Chris@0: }
Chris@0:
Chris@14: void FChTransformF0gram::reset()
Chris@14: {
Chris@0: }
Chris@0:
Chris@0: FChTransformF0gram::FeatureSet
Chris@5: FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) {
Chris@0:
Chris@20: if (!m_initialised) return FeatureSet();
Chris@20:
Chris@7: /* PSEUDOCÓDIGO:
Chris@7: - Aplicar FFT al frame entero.
Chris@7: - Filtro pasabajos en frecuencia.
Chris@7: - FFT inversa al frame entero.
Chris@7: -----------------------------------------------------------------------------
Chris@7: - Para cada warp: *Si es un espectrograma direccional (un solo warp
Chris@7: => no es para cada warp sino para el elegido)
Chris@7: - Hacer la interpolación con interp1q.
Chris@7: - Aplicar la FFT al frame warpeado.
Chris@7: - (Opcional) GLogS.
Chris@7: - ...
Chris@7: */
Chris@0:
Chris@0: //---------------------------------------------------------------------------
Chris@7: FeatureSet fs;
Chris@0:
Chris@7: #ifdef DEBUG
Chris@16: fprintf(stderr, "\n ----- DEBUG INFORMATION ----- \n");
Chris@16: fprintf(stderr, " m_fs = %f Hz.\n",m_fs);
Chris@16: fprintf(stderr, " fs_orig = %f Hz.\n",m_warpings.fs_orig);
Chris@16: fprintf(stderr, " fs_warp = %f Hz.\n",m_warpings.fs_warp);
Chris@16: fprintf(stderr, " m_blockSize = %d.\n",m_blockSize);
Chris@16: fprintf(stderr, " m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig);
Chris@24: fprintf(stderr, " m_warp_params.nsamps_twarp = %d.\n",m_warp_params.nsamps_twarp);
Chris@16: fprintf(stderr, " m_warp_params.num_warps = %d.\n",m_warp_params.num_warps);
Chris@16: fprintf(stderr, " m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count);
Chris@7: #endif
Chris@0:
Chris@20: for (int i = 0; i < m_blockSize - m_stepSize; ++i) {
Chris@20: m_inputBuffer[i] = m_inputBuffer[i + m_stepSize];
Chris@0: }
Chris@20: for (int i = 0; i < m_blockSize/2; ++i) {
Chris@20: m_inputBuffer[m_blockSize/2 + i] = inputBuffers[0][i];
Chris@20: }
Chris@20: for (int i = 0; i < m_blockSize; ++i) {
Chris@20: LPF_time[i] = m_inputBuffer[i] * m_timeWindow[i];
Chris@20: }
Chris@20: for (int i = 0; i < m_blockSize; ++i) {
Chris@20: LPF_time[m_blockSize + i] = 0.0;
Chris@20: }
Chris@20:
Chris@7: apply_LPF();
Chris@7: // Señal filtrada queda en LPF_time
Chris@0:
Chris@7: Feature feature;
Chris@0: feature.hasTimestamp = false;
Chris@0:
Chris@15: if (m_processingMode == ModeRoughSpectrogram) {
Chris@15: feature.values = vector(m_warp_params.nsamps_twarp/2+1, 0.f);
Chris@15: }
Chris@15:
Chris@0: // ----------------------------------------------------------------------------------------------
Chris@0: // Hanning window & FFT for all warp directions
Chris@0:
Chris@7: double max_glogs = -DBL_MAX;
Chris@10: int ind_max_glogs = 0;
Chris@0:
Chris@10: for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
Chris@16:
Chris@7: // Interpolate
Chris@14: Utils::interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp);
Chris@0:
Chris@7: // Apply window
Chris@10: for (int i = 0; i < m_warp_params.nsamps_twarp; i++) {
Chris@7: x_warping[i] *= mp_HanningWindow[i];
Chris@7: }
Chris@0:
Chris@7: // Transform
Chris@14: fft_xwarping->forward(x_warping, m_auxFanChirpTransform);
Chris@0:
Chris@15: if (m_processingMode == ModeRoughSpectrogram) {
Chris@15: for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
Chris@15: double abs = sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]);
Chris@15: if (abs > feature.values[i]) {
Chris@15: feature.values[i] = abs;
Chris@15: }
Chris@15: }
Chris@15: continue;
Chris@15: }
Chris@15:
Chris@7: // Copy result
Chris@7: double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1);
Chris@10: for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
Chris@14: aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]));
Chris@7: }
Chris@0:
Chris@0: // -----------------------------------------------------------------------------------------
Chris@0: // GLogS
Chris@14: Utils::interp1q(aux_abs_fcht, m_glogs_posint, m_glogs_posfrac, m_glogs_interp, m_glogs_harmonic_count);
Chris@10: int glogs_ind = 0;
Chris@10: for (int i = 0; i < m_glogs_num_f0s; i++) {
Chris@7: double glogs_accum = 0;
Chris@10: for (int j = 1; j <= m_glogs_n[i]; j++) {
Chris@7: glogs_accum += m_glogs_interp[glogs_ind++];
Chris@7: }
Chris@7: m_glogs[i + i_warp*m_glogs_num_f0s] = glogs_accum/(double)m_glogs_n[i];
Chris@7: }
Chris@0:
Chris@0: // Sub/super harmonic correction
Chris@14: Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_third_harmonic_posint, m_glogs_third_harmonic_posfrac, m_glogs_third_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@14: Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_fifth_harmonic_posint, m_glogs_fifth_harmonic_posfrac, m_glogs_fifth_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
Chris@10: for (int i = m_glogs_num_f0s-1; i >= m_glogs_init_f0s; i--) {
Chris@7: m_glogs[i + i_warp*m_glogs_num_f0s] -= MAX(MAX(m_glogs[i-m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s],m_glogs_third_harmonic[i-m_glogs_init_f0s]),m_glogs_fifth_harmonic[i-m_glogs_init_f0s]);
Chris@7: }
Chris@10: for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
Chris@7: m_glogs[i + i_warp*m_glogs_num_f0s] -= 0.3*m_glogs[i+m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s];
Chris@7: // Median, sigma $ weights correction
Chris@7: m_glogs[i + i_warp*m_glogs_num_f0s] = (m_glogs[i + i_warp*m_glogs_num_f0s]-m_glogs_median_correction[i-m_glogs_init_f0s])*m_glogs_sigma_correction[i-m_glogs_init_f0s]*m_glogs_f0_preference_weights[i-m_glogs_init_f0s];
Chris@7: }
Chris@0:
Chris@7: // Look for maximum value to determine best direction
Chris@10: for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
Chris@7: if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
Chris@7: max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
Chris@7: ind_max_glogs = i_warp;
Chris@7: }
Chris@7: }
Chris@7: }
Chris@0:
Chris@15: if (m_processingMode == ModeRoughSpectrogram) {
Chris@15:
Chris@15: // already accumulated our return values in feature
Chris@19: fs[0].push_back(feature);
Chris@15:
Chris@15: } else if (m_processingMode == ModeSpectrogram) {
Chris@15:
Chris@15: for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) {
Chris@15: feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0);
Chris@15: }
Chris@19: fs[0].push_back(feature);
Chris@15:
Chris@15: } else { // f0gram
Chris@15:
Chris@19: int bestIndex = -1;
Chris@19:
Chris@15: for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) {
Chris@19: double value = 0.0;
Chris@15: switch (m_f0gram_mode) {
Chris@15: case AllBinsOfBestDirection:
Chris@19: value = m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s];
Chris@15: break;
Chris@15: case BestBinOfAllDirections:
Chris@15: max_glogs = -DBL_MAX;
Chris@15: for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
Chris@15: if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
Chris@15: max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
Chris@15: ind_max_glogs = i_warp;
Chris@15: }
Chris@7: }
Chris@19: value = max_glogs;
Chris@15: break;
Chris@7: }
Chris@19: if (bestIndex < 0 || float(value) > feature.values[bestIndex]) {
Chris@19: bestIndex = int(feature.values.size());
Chris@19: }
Chris@19: feature.values.push_back(float(value));
Chris@19: }
Chris@19:
Chris@19: fs[0].push_back(feature);
Chris@19:
Chris@19: if (bestIndex >= 0) {
Chris@19:
Chris@19: double bestValue = feature.values[bestIndex];
Chris@19: set ordered(feature.values.begin(), feature.values.end());
Chris@19: vector flattened(ordered.begin(), ordered.end());
Chris@19: double median = flattened[flattened.size()/2];
Chris@19: if (bestValue > median * 8.0) {
Chris@19: Feature pfeature;
Chris@19: pfeature.hasTimestamp = false;
Chris@19: pfeature.values.push_back(m_f0s[bestIndex]);
Chris@19: fs[1].push_back(pfeature);
Chris@19: }
Chris@7: }
Chris@7: }
Chris@0:
Chris@7: return fs;
Chris@0: }
Chris@0:
Chris@0: FChTransformF0gram::FeatureSet
Chris@0: FChTransformF0gram::getRemainingFeatures() {
Chris@0: return FeatureSet();
Chris@0: }
Chris@0:
Chris@0: void
Chris@0: FChTransformF0gram::design_time_window() {
Chris@0:
Chris@20: int transitionWidth = (int)m_blockSize/128 + 128;
Chris@14: m_timeWindow = allocate(m_blockSize);
Chris@14: double *lp_transitionWindow = allocate(transitionWidth);
Chris@0:
Chris@10: for (int i = 0; i < m_blockSize; i++) {
Chris@7: m_timeWindow[i] = 1.0;
Chris@7: }
Chris@0:
Chris@10: for (int i = 0; i < transitionWidth; i++) {
Chris@0: lp_transitionWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)transitionWidth+1.0)));
Chris@0: }
Chris@0:
Chris@10: for (int i = 0; i < transitionWidth/2; i++) {
Chris@7: m_timeWindow[i] = lp_transitionWindow[i];
Chris@7: m_timeWindow[m_blockSize-1-i] = lp_transitionWindow[transitionWidth-1-i];
Chris@7: }
Chris@0:
Chris@7: #ifdef DEBUG
Chris@7: for (int i = 0; i < m_blockSize; i++) {
Chris@7: if ((i