Chris@0
|
1 /*
|
Chris@0
|
2 copyright (C) 2011 I. Irigaray, M. Rocamora
|
Chris@0
|
3
|
Chris@0
|
4 This program is free software: you can redistribute it and/or modify
|
Chris@0
|
5 it under the terms of the GNU General Public License as published by
|
Chris@0
|
6 the Free Software Foundation, either version 3 of the License, or
|
Chris@0
|
7 (at your option) any later version.
|
Chris@0
|
8
|
Chris@0
|
9 This program is distributed in the hope that it will be useful,
|
Chris@0
|
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Chris@0
|
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Chris@0
|
12 GNU General Public License for more details.
|
Chris@0
|
13
|
Chris@0
|
14 You should have received a copy of the GNU General Public License
|
Chris@0
|
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
|
Chris@7
|
16 */
|
Chris@0
|
17
|
Chris@0
|
18 #include "FChTransformF0gram.h"
|
Chris@0
|
19 #include "FChTransformUtils.h"
|
Chris@0
|
20 #include <math.h>
|
Chris@0
|
21 #include <float.h>
|
Chris@14
|
22
|
Chris@19
|
23 #include <set>
|
Chris@19
|
24
|
Chris@14
|
25 #include "bqvec/Allocators.h"
|
Chris@14
|
26
|
Chris@14
|
27 using namespace breakfastquay;
|
Chris@14
|
28
|
Chris@16
|
29 #define DEBUG
|
Chris@7
|
30
|
Chris@0
|
31 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
Chris@0
|
32
|
Chris@15
|
33 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode,
|
Chris@15
|
34 float inputSampleRate) :
|
Chris@7
|
35 Plugin(inputSampleRate),
|
Chris@15
|
36 m_processingMode(mode),
|
Chris@20
|
37 m_initialised(false),
|
Chris@20
|
38 m_stepSize(256),
|
Chris@20
|
39 m_blockSize(8192) {
|
Chris@0
|
40
|
Chris@0
|
41 m_fs = inputSampleRate;
|
Chris@0
|
42 // max frequency of interest (Hz)
|
Chris@0
|
43 m_fmax = 10000.f;
|
Chris@0
|
44 // warping parameters
|
Chris@12
|
45 m_warp_params.nsamps_twarp = 2048;
|
Chris@0
|
46 m_warp_params.alpha_max = 4;
|
Chris@0
|
47 m_warp_params.num_warps = 21;
|
Chris@0
|
48 m_warp_params.fact_over_samp = 2;
|
Chris@0
|
49 m_warp_params.alpha_dist = 0;
|
Chris@0
|
50 // f0 parameters
|
Chris@0
|
51 m_f0_params.f0min = 80.0;
|
Chris@0
|
52 m_f0_params.num_octs = 4;
|
Chris@0
|
53 m_f0_params.num_f0s_per_oct = 192;
|
Chris@0
|
54 m_f0_params.num_f0_hyps = 5;
|
Chris@0
|
55 m_f0_params.prefer = true;
|
Chris@0
|
56 m_f0_params.prefer_mean = 60;
|
Chris@0
|
57 m_f0_params.prefer_stdev = 18;
|
Chris@0
|
58 // glogs parameters
|
Chris@0
|
59 m_glogs_params.HP_logS = true;
|
Chris@0
|
60 m_glogs_params.att_subharms = 1;
|
Chris@7
|
61 // display parameters
|
Chris@15
|
62 m_f0gram_mode = BestBinOfAllDirections;
|
Chris@0
|
63
|
Chris@0
|
64 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
|
Chris@0
|
65 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
|
Chris@0
|
66 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
|
Chris@0
|
67
|
Chris@0
|
68 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
|
Chris@0
|
69 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
|
Chris@0
|
70 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
|
Chris@0
|
71
|
Chris@0
|
72 // number of fft points (controls zero-padding)
|
Chris@0
|
73 m_nfft = m_warp_params.nsamps_twarp;
|
Chris@0
|
74 // hop in samples
|
Chris@0
|
75 m_hop = m_warp_params.fact_over_samp * 256;
|
Chris@0
|
76
|
Chris@0
|
77 m_num_f0s = 0;
|
Chris@16
|
78 m_f0s = 0;
|
Chris@0
|
79 }
|
Chris@0
|
80
|
Chris@14
|
81 FChTransformF0gram::~FChTransformF0gram()
|
Chris@14
|
82 {
|
Chris@20
|
83 if (!m_initialised) {
|
Chris@14
|
84 return; // nothing was allocated
|
Chris@14
|
85 }
|
Chris@14
|
86
|
Chris@20
|
87 deallocate(m_inputBuffer);
|
Chris@20
|
88
|
Chris@14
|
89 deallocate(m_warpings.pos_int);
|
Chris@14
|
90 deallocate(m_warpings.pos_frac);
|
Chris@14
|
91 deallocate(m_warpings.chirp_rates);
|
Chris@14
|
92
|
Chris@14
|
93 clean_LPF();
|
Chris@14
|
94
|
Chris@14
|
95 deallocate(m_timeWindow);
|
Chris@14
|
96
|
Chris@14
|
97 deallocate(mp_HanningWindow);
|
Chris@14
|
98
|
Chris@14
|
99 // Warping
|
Chris@14
|
100 deallocate(x_warping);
|
Chris@14
|
101 delete fft_xwarping;
|
Chris@14
|
102 deallocate(m_absFanChirpTransform);
|
Chris@14
|
103 deallocate(m_auxFanChirpTransform);
|
Chris@14
|
104
|
Chris@14
|
105 // design_GLogS
|
Chris@14
|
106 deallocate(m_glogs_f0);
|
Chris@14
|
107 deallocate(m_glogs);
|
Chris@14
|
108 deallocate(m_glogs_n);
|
Chris@14
|
109 deallocate(m_glogs_index);
|
Chris@14
|
110 deallocate(m_glogs_posint);
|
Chris@14
|
111 deallocate(m_glogs_posfrac);
|
Chris@14
|
112 deallocate(m_glogs_interp);
|
Chris@14
|
113 deallocate(m_glogs_third_harmonic_posint);
|
Chris@14
|
114 deallocate(m_glogs_third_harmonic_posfrac);
|
Chris@14
|
115 deallocate(m_glogs_third_harmonic);
|
Chris@14
|
116 deallocate(m_glogs_fifth_harmonic_posint);
|
Chris@14
|
117 deallocate(m_glogs_fifth_harmonic_posfrac);
|
Chris@14
|
118 deallocate(m_glogs_fifth_harmonic);
|
Chris@14
|
119 deallocate(m_glogs_f0_preference_weights);
|
Chris@14
|
120 deallocate(m_glogs_median_correction);
|
Chris@14
|
121 deallocate(m_glogs_sigma_correction);
|
Chris@16
|
122
|
Chris@16
|
123 deallocate(m_f0s);
|
Chris@0
|
124 }
|
Chris@0
|
125
|
Chris@0
|
126 string
|
Chris@0
|
127 FChTransformF0gram::getIdentifier() const {
|
Chris@15
|
128 switch (m_processingMode) {
|
Chris@15
|
129 case ModeF0Gram: return "fchtransformf0gram";
|
Chris@15
|
130 case ModeSpectrogram: return "fchtransformspectrogram";
|
Chris@15
|
131 case ModeRoughSpectrogram: return "fchtransformrough";
|
Chris@15
|
132 }
|
Chris@17
|
133 throw std::logic_error("unknown mode");
|
Chris@0
|
134 }
|
Chris@0
|
135
|
Chris@0
|
136 string
|
Chris@0
|
137 FChTransformF0gram::getName() const {
|
Chris@15
|
138 switch (m_processingMode) {
|
Chris@15
|
139 case ModeF0Gram: return "Fan Chirp Transform F0gram";
|
Chris@15
|
140 case ModeSpectrogram: return "Fan Chirp Transform Spectrogram";
|
Chris@15
|
141 case ModeRoughSpectrogram: return "Fan Chirp Transform Rough Spectrogram";
|
Chris@15
|
142 }
|
Chris@17
|
143 throw std::logic_error("unknown mode");
|
Chris@0
|
144 }
|
Chris@0
|
145
|
Chris@0
|
146 string
|
Chris@0
|
147 FChTransformF0gram::getDescription() const {
|
Chris@15
|
148 switch (m_processingMode) {
|
Chris@15
|
149 case ModeF0Gram:
|
Chris@15
|
150 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010.";
|
Chris@15
|
151 case ModeSpectrogram:
|
Chris@15
|
152 return "This plug-in produces a spectral representation of the audio using Fan Chirp Transform analysis.";
|
Chris@15
|
153 case ModeRoughSpectrogram:
|
Chris@15
|
154 return "This plug-in produces a more approximate spectral representation of the audio using Fan Chirp Transform analysis.";
|
Chris@15
|
155 }
|
Chris@17
|
156 throw std::logic_error("unknown mode");
|
Chris@0
|
157 }
|
Chris@0
|
158
|
Chris@0
|
159 string
|
Chris@0
|
160 FChTransformF0gram::getMaker() const {
|
Chris@0
|
161 // Your name here
|
Chris@0
|
162 return "Audio Processing Group \n Universidad de la Republica";
|
Chris@0
|
163 }
|
Chris@0
|
164
|
Chris@0
|
165 int
|
Chris@0
|
166 FChTransformF0gram::getPluginVersion() const {
|
Chris@0
|
167 // Increment this each time you release a version that behaves
|
Chris@0
|
168 // differently from the previous one
|
Chris@0
|
169 //
|
Chris@0
|
170 // 0 - initial version from scratch
|
Chris@15
|
171 return 1;
|
Chris@0
|
172 }
|
Chris@0
|
173
|
Chris@0
|
174 string
|
Chris@0
|
175 FChTransformF0gram::getCopyright() const {
|
Chris@0
|
176 // This function is not ideally named. It does not necessarily
|
Chris@0
|
177 // need to say who made the plugin -- getMaker does that -- but it
|
Chris@0
|
178 // should indicate the terms under which it is distributed. For
|
Chris@0
|
179 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@0
|
180 return "copyright (C) 2011 GPL - Audio Processing Group, UdelaR";
|
Chris@0
|
181 }
|
Chris@0
|
182
|
Chris@0
|
183 FChTransformF0gram::InputDomain
|
Chris@0
|
184 FChTransformF0gram::getInputDomain() const {
|
Chris@0
|
185 return TimeDomain;
|
Chris@0
|
186 }
|
Chris@0
|
187
|
Chris@0
|
188 size_t FChTransformF0gram::getPreferredBlockSize() const {
|
Chris@20
|
189 // We do our own accumulating into blocks within process()
|
Chris@20
|
190 return m_blockSize/2;
|
Chris@0
|
191 }
|
Chris@0
|
192
|
Chris@0
|
193 size_t
|
Chris@0
|
194 FChTransformF0gram::getPreferredStepSize() const {
|
Chris@20
|
195 return m_stepSize;
|
Chris@0
|
196 }
|
Chris@0
|
197
|
Chris@0
|
198 size_t
|
Chris@0
|
199 FChTransformF0gram::getMinChannelCount() const {
|
Chris@0
|
200 return 1;
|
Chris@0
|
201 }
|
Chris@0
|
202
|
Chris@0
|
203 size_t
|
Chris@0
|
204 FChTransformF0gram::getMaxChannelCount() const {
|
Chris@0
|
205 return 1;
|
Chris@0
|
206 }
|
Chris@0
|
207
|
Chris@0
|
208 FChTransformF0gram::ParameterList
|
Chris@0
|
209 FChTransformF0gram::getParameterDescriptors() const {
|
Chris@0
|
210 ParameterList list;
|
Chris@0
|
211
|
Chris@0
|
212 // If the plugin has no adjustable parameters, return an empty
|
Chris@0
|
213 // list here (and there's no need to provide implementations of
|
Chris@0
|
214 // getParameter and setParameter in that case either).
|
Chris@0
|
215
|
Chris@0
|
216 // Note that it is your responsibility to make sure the parameters
|
Chris@0
|
217 // start off having their default values (e.g. in the constructor
|
Chris@0
|
218 // above). The host needs to know the default value so it can do
|
Chris@0
|
219 // things like provide a "reset to default" function, but it will
|
Chris@0
|
220 // not explicitly set your parameters to their defaults for you if
|
Chris@0
|
221 // they have not changed in the mean time.
|
Chris@0
|
222
|
Chris@0
|
223 // ============= WARPING PARAMETERS =============
|
Chris@0
|
224
|
Chris@0
|
225 ParameterDescriptor fmax;
|
Chris@0
|
226 fmax.identifier = "fmax";
|
Chris@0
|
227 fmax.name = "Maximum frequency";
|
Chris@0
|
228 fmax.description = "Maximum frequency of interest for the analysis.";
|
Chris@0
|
229 fmax.unit = "Hz";
|
Chris@0
|
230 fmax.minValue = 2000;
|
Chris@0
|
231 fmax.maxValue = 22050;
|
Chris@0
|
232 fmax.defaultValue = 10000;
|
Chris@0
|
233 fmax.isQuantized = true;
|
Chris@0
|
234 fmax.quantizeStep = 1.0;
|
Chris@0
|
235 list.push_back(fmax);
|
Chris@0
|
236
|
Chris@0
|
237 ParameterDescriptor nsamp;
|
Chris@0
|
238 nsamp.identifier = "nsamp";
|
Chris@0
|
239 nsamp.name = "Number of samples";
|
Chris@0
|
240 nsamp.description = "Number of samples of the time warped frame";
|
Chris@0
|
241 nsamp.unit = "samples";
|
Chris@0
|
242 nsamp.minValue = 128;
|
Chris@0
|
243 nsamp.maxValue = 4096;
|
Chris@0
|
244 nsamp.defaultValue = 2048;
|
Chris@0
|
245 nsamp.isQuantized = true;
|
Chris@0
|
246 nsamp.quantizeStep = 1.0;
|
Chris@0
|
247 list.push_back(nsamp);
|
Chris@0
|
248
|
Chris@0
|
249 ParameterDescriptor nfft;
|
Chris@0
|
250 nfft.identifier = "nfft";
|
Chris@0
|
251 nfft.name = "FFT number of points";
|
Chris@0
|
252 nfft.description = "Number of FFT points (controls zero-padding)";
|
Chris@0
|
253 nfft.unit = "samples";
|
Chris@0
|
254 nfft.minValue = 0;
|
Chris@0
|
255 nfft.maxValue = 4;
|
Chris@0
|
256 nfft.defaultValue = 3;
|
Chris@0
|
257 nfft.isQuantized = true;
|
Chris@0
|
258 nfft.quantizeStep = 1.0;
|
Chris@0
|
259 nfft.valueNames.push_back("256");
|
Chris@0
|
260 nfft.valueNames.push_back("512");
|
Chris@0
|
261 nfft.valueNames.push_back("1024");
|
Chris@0
|
262 nfft.valueNames.push_back("2048");
|
Chris@0
|
263 nfft.valueNames.push_back("4096");
|
Chris@0
|
264 nfft.valueNames.push_back("8192");
|
Chris@0
|
265 list.push_back(nfft);
|
Chris@0
|
266
|
Chris@0
|
267 ParameterDescriptor alpha_max;
|
Chris@0
|
268 alpha_max.identifier = "alpha_max";
|
Chris@0
|
269 alpha_max.name = "Maximum alpha value";
|
Chris@0
|
270 alpha_max.description = "Maximum value for the alpha parameter of the transform.";
|
Chris@0
|
271 alpha_max.unit = "Hz/s";
|
Chris@0
|
272 alpha_max.minValue = -10;
|
Chris@0
|
273 alpha_max.maxValue = 10;
|
Chris@0
|
274 alpha_max.defaultValue = 5;
|
Chris@0
|
275 alpha_max.isQuantized = true;
|
Chris@0
|
276 alpha_max.quantizeStep = 1.0;
|
Chris@0
|
277 list.push_back(alpha_max);
|
Chris@0
|
278
|
Chris@0
|
279 ParameterDescriptor num_warps;
|
Chris@0
|
280 num_warps.identifier = "num_warps";
|
Chris@0
|
281 num_warps.name = "Number of warpings";
|
Chris@0
|
282 num_warps.description = "Number of different warpings in the specified range (must be odd).";
|
Chris@0
|
283 num_warps.unit = "";
|
Chris@0
|
284 num_warps.minValue = 1;
|
Chris@0
|
285 num_warps.maxValue = 101;
|
Chris@0
|
286 num_warps.defaultValue = 21;
|
Chris@0
|
287 num_warps.isQuantized = true;
|
Chris@0
|
288 num_warps.quantizeStep = 2.0;
|
Chris@0
|
289 list.push_back(num_warps);
|
Chris@0
|
290
|
Chris@0
|
291 ParameterDescriptor alpha_dist;
|
Chris@0
|
292 alpha_dist.identifier = "alpha_dist";
|
Chris@0
|
293 alpha_dist.name = "alpha distribution";
|
Chris@0
|
294 alpha_dist.description = "Type of distribution of alpha values (linear or log).";
|
Chris@0
|
295 alpha_dist.unit = "";
|
Chris@0
|
296 alpha_dist.minValue = 0;
|
Chris@0
|
297 alpha_dist.maxValue = 1;
|
Chris@0
|
298 alpha_dist.defaultValue = 1;
|
Chris@0
|
299 alpha_dist.isQuantized = true;
|
Chris@0
|
300 alpha_dist.quantizeStep = 1.0;
|
Chris@0
|
301 // lin (0), log (1)
|
Chris@0
|
302 alpha_dist.valueNames.push_back("lin");
|
Chris@0
|
303 alpha_dist.valueNames.push_back("log");
|
Chris@0
|
304 list.push_back(alpha_dist);
|
Chris@0
|
305
|
Chris@0
|
306 // ============= F0-GRAM PARAMETERS =============
|
Chris@0
|
307
|
Chris@0
|
308 ParameterDescriptor f0min;
|
Chris@0
|
309 f0min.identifier = "f0min";
|
Chris@0
|
310 f0min.name = "min f0";
|
Chris@0
|
311 f0min.description = "Minimum fundamental frequency (f0) value.";
|
Chris@0
|
312 f0min.unit = "Hz";
|
Chris@0
|
313 f0min.minValue = 1;
|
Chris@0
|
314 f0min.maxValue = 500;
|
Chris@0
|
315 f0min.defaultValue = 80;
|
Chris@0
|
316 f0min.isQuantized = true;
|
Chris@0
|
317 f0min.quantizeStep = 1.0;
|
Chris@0
|
318 list.push_back(f0min);
|
Chris@0
|
319
|
Chris@0
|
320 ParameterDescriptor num_octs;
|
Chris@0
|
321 num_octs.identifier = "num_octs";
|
Chris@0
|
322 num_octs.name = "number of octaves";
|
Chris@0
|
323 num_octs.description = "Number of octaves for F0gram computation.";
|
Chris@0
|
324 num_octs.unit = "";
|
Chris@0
|
325 num_octs.minValue = 1;
|
Chris@0
|
326 num_octs.maxValue = 10;
|
Chris@0
|
327 num_octs.defaultValue = 4;
|
Chris@0
|
328 num_octs.isQuantized = true;
|
Chris@0
|
329 num_octs.quantizeStep = 1.0;
|
Chris@0
|
330 list.push_back(num_octs);
|
Chris@0
|
331
|
Chris@0
|
332 ParameterDescriptor num_f0_hyps;
|
Chris@0
|
333 num_f0_hyps.identifier = "num_f0_hyps";
|
Chris@0
|
334 num_f0_hyps.name = "number of f0 hypotesis";
|
Chris@0
|
335 num_f0_hyps.description = "Number of f0 hypotesis to extract.";
|
Chris@0
|
336 num_f0_hyps.unit = "";
|
Chris@0
|
337 num_f0_hyps.minValue = 1;
|
Chris@0
|
338 num_f0_hyps.maxValue = 100;
|
Chris@0
|
339 num_f0_hyps.defaultValue = 10;
|
Chris@0
|
340 num_f0_hyps.isQuantized = true;
|
Chris@0
|
341 num_f0_hyps.quantizeStep = 1.0;
|
Chris@0
|
342 list.push_back(num_f0_hyps);
|
Chris@0
|
343
|
Chris@0
|
344 ParameterDescriptor f0s_per_oct;
|
Chris@0
|
345 f0s_per_oct.identifier = "f0s_per_oct";
|
Chris@0
|
346 f0s_per_oct.name = "f0 values per octave";
|
Chris@0
|
347 f0s_per_oct.description = "Number of f0 values per octave.";
|
Chris@0
|
348 f0s_per_oct.unit = "";
|
Chris@0
|
349 f0s_per_oct.minValue = 12;
|
Chris@0
|
350 f0s_per_oct.maxValue = 768;
|
Chris@0
|
351 f0s_per_oct.defaultValue = 192;
|
Chris@0
|
352 f0s_per_oct.isQuantized = true;
|
Chris@0
|
353 f0s_per_oct.quantizeStep = 1.0;
|
Chris@0
|
354 list.push_back(f0s_per_oct);
|
Chris@0
|
355
|
Chris@0
|
356 ParameterDescriptor f0_prefer_fun;
|
Chris@0
|
357 f0_prefer_fun.identifier = "f0_prefer_fun";
|
Chris@0
|
358 f0_prefer_fun.name = "f0 preference function";
|
Chris@0
|
359 f0_prefer_fun.description = "Whether to use a f0 weighting function.";
|
Chris@0
|
360 f0_prefer_fun.unit = "";
|
Chris@0
|
361 f0_prefer_fun.minValue = 0;
|
Chris@0
|
362 f0_prefer_fun.maxValue = 1;
|
Chris@0
|
363 f0_prefer_fun.defaultValue = 1;
|
Chris@0
|
364 f0_prefer_fun.isQuantized = true;
|
Chris@0
|
365 f0_prefer_fun.quantizeStep = 1.0;
|
Chris@0
|
366 list.push_back(f0_prefer_fun);
|
Chris@0
|
367
|
Chris@0
|
368 ParameterDescriptor f0_prefer_mean;
|
Chris@0
|
369 f0_prefer_mean.identifier = "f0_prefer_mean";
|
Chris@0
|
370 f0_prefer_mean.name = "mean f0 preference function";
|
Chris@0
|
371 f0_prefer_mean.description = "Mean value for f0 weighting function (MIDI number).";
|
Chris@0
|
372 f0_prefer_mean.unit = "";
|
Chris@0
|
373 f0_prefer_mean.minValue = 1;
|
Chris@0
|
374 f0_prefer_mean.maxValue = 127;
|
Chris@0
|
375 f0_prefer_mean.defaultValue = 60;
|
Chris@0
|
376 f0_prefer_mean.isQuantized = true;
|
Chris@0
|
377 f0_prefer_mean.quantizeStep = 1.0;
|
Chris@0
|
378 list.push_back(f0_prefer_mean);
|
Chris@0
|
379
|
Chris@0
|
380 ParameterDescriptor f0_prefer_stdev;
|
Chris@0
|
381 f0_prefer_stdev.identifier = "f0_prefer_stdev";
|
Chris@0
|
382 f0_prefer_stdev.name = "stdev of f0 preference function";
|
Chris@0
|
383 f0_prefer_stdev.description = "Stdev for f0 weighting function (MIDI number).";
|
Chris@0
|
384 f0_prefer_stdev.unit = "";
|
Chris@0
|
385 f0_prefer_stdev.minValue = 1;
|
Chris@0
|
386 f0_prefer_stdev.maxValue = 127;
|
Chris@0
|
387 f0_prefer_stdev.defaultValue = 18;
|
Chris@0
|
388 f0_prefer_stdev.isQuantized = true;
|
Chris@0
|
389 f0_prefer_stdev.quantizeStep = 1.0;
|
Chris@0
|
390 list.push_back(f0_prefer_stdev);
|
Chris@0
|
391
|
Chris@0
|
392 ParameterDescriptor f0gram_mode;
|
Chris@0
|
393 f0gram_mode.identifier = "f0gram_mode";
|
Chris@0
|
394 f0gram_mode.name = "display mode of f0gram";
|
Chris@0
|
395 f0gram_mode.description = "Display all bins of the best direction, or the best bin for each direction.";
|
Chris@0
|
396 f0gram_mode.unit = "";
|
Chris@0
|
397 f0gram_mode.minValue = 0;
|
Chris@0
|
398 f0gram_mode.maxValue = 1;
|
Chris@0
|
399 f0gram_mode.defaultValue = 1;
|
Chris@0
|
400 f0gram_mode.isQuantized = true;
|
Chris@0
|
401 f0gram_mode.quantizeStep = 1.0;
|
Chris@0
|
402 list.push_back(f0gram_mode);
|
Chris@0
|
403
|
Chris@0
|
404 return list;
|
Chris@0
|
405 }
|
Chris@0
|
406
|
Chris@0
|
407 float
|
Chris@0
|
408 FChTransformF0gram::getParameter(string identifier) const {
|
Chris@0
|
409
|
Chris@0
|
410 if (identifier == "fmax") {
|
Chris@0
|
411 return m_fmax;
|
Chris@0
|
412 } else if (identifier == "nsamp") {
|
Chris@0
|
413 return m_warp_params.nsamps_twarp;
|
Chris@0
|
414 } else if (identifier == "alpha_max") {
|
Chris@0
|
415 return m_warp_params.alpha_max;
|
Chris@0
|
416 } else if (identifier == "num_warps") {
|
Chris@0
|
417 return m_warp_params.num_warps;
|
Chris@0
|
418 } else if (identifier == "alpha_dist") {
|
Chris@0
|
419 return m_warp_params.alpha_dist;
|
Chris@0
|
420 } else if (identifier == "nfft") {
|
Chris@0
|
421 return m_nfft;
|
Chris@0
|
422 } else if (identifier == "f0min") {
|
Chris@0
|
423 return m_f0_params.f0min;
|
Chris@0
|
424 } else if (identifier == "num_octs") {
|
Chris@0
|
425 return m_f0_params.num_octs;
|
Chris@0
|
426 } else if (identifier == "f0s_per_oct") {
|
Chris@0
|
427 return m_f0_params.num_f0s_per_oct;
|
Chris@0
|
428 } else if (identifier == "num_f0_hyps") {
|
Chris@0
|
429 return m_f0_params.num_f0_hyps;
|
Chris@0
|
430 } else if (identifier == "f0_prefer_fun") {
|
Chris@0
|
431 return m_f0_params.prefer;
|
Chris@0
|
432 } else if (identifier == "f0_prefer_mean") {
|
Chris@0
|
433 return m_f0_params.prefer_mean;
|
Chris@0
|
434 } else if (identifier == "f0_prefer_stdev") {
|
Chris@0
|
435 return m_f0_params.prefer_stdev;
|
Chris@7
|
436 } else if (identifier == "f0gram_mode") {
|
Chris@15
|
437 return m_f0gram_mode == BestBinOfAllDirections ? 1.0 : 0.0;
|
Chris@0
|
438 } else {
|
Chris@0
|
439 return 0.f;
|
Chris@0
|
440 }
|
Chris@0
|
441
|
Chris@0
|
442 }
|
Chris@0
|
443
|
Chris@15
|
444 void FChTransformF0gram::setParameter(string identifier, float value)
|
Chris@15
|
445 {
|
Chris@0
|
446 if (identifier == "fmax") {
|
Chris@0
|
447 m_fmax = value;
|
Chris@0
|
448 } else if (identifier == "nsamp") {
|
Chris@0
|
449 m_warp_params.nsamps_twarp = value;
|
Chris@0
|
450 } else if (identifier == "alpha_max") {
|
Chris@0
|
451 m_warp_params.alpha_max = value;
|
Chris@0
|
452 } else if (identifier == "num_warps") {
|
Chris@0
|
453 m_warp_params.num_warps = value;
|
Chris@0
|
454 } else if (identifier == "alpha_dist") {
|
Chris@0
|
455 m_warp_params.alpha_dist = value;
|
Chris@0
|
456 } else if (identifier == "nfft") {
|
Chris@0
|
457 m_nfft = value;
|
Chris@0
|
458 } else if (identifier == "f0min") {
|
Chris@0
|
459 m_f0_params.f0min = value;
|
Chris@0
|
460 } else if (identifier == "num_octs") {
|
Chris@0
|
461 m_f0_params.num_octs = value;
|
Chris@0
|
462 } else if (identifier == "f0s_per_oct") {
|
Chris@0
|
463 m_f0_params.num_f0s_per_oct = value;
|
Chris@0
|
464 } else if (identifier == "num_f0_hyps") {
|
Chris@0
|
465 m_f0_params.num_f0_hyps = value;
|
Chris@0
|
466 } else if (identifier == "f0_prefer_fun") {
|
Chris@0
|
467 m_f0_params.prefer = value;
|
Chris@0
|
468 } else if (identifier == "f0_prefer_mean") {
|
Chris@0
|
469 m_f0_params.prefer_mean = value;
|
Chris@0
|
470 } else if (identifier == "f0_prefer_stdev") {
|
Chris@0
|
471 m_f0_params.prefer_stdev = value;
|
Chris@0
|
472 } else if (identifier == "f0gram_mode") {
|
Chris@15
|
473 m_f0gram_mode = (value > 0.5 ?
|
Chris@15
|
474 BestBinOfAllDirections :
|
Chris@15
|
475 AllBinsOfBestDirection);
|
Chris@15
|
476 } else {
|
Chris@15
|
477 cerr << "WARNING: Unknown parameter id \""
|
Chris@15
|
478 << identifier << "\"" << endl;
|
Chris@0
|
479 }
|
Chris@0
|
480 }
|
Chris@0
|
481
|
Chris@0
|
482 FChTransformF0gram::ProgramList
|
Chris@0
|
483 FChTransformF0gram::getPrograms() const {
|
Chris@0
|
484 ProgramList list;
|
Chris@0
|
485 return list;
|
Chris@0
|
486 }
|
Chris@0
|
487
|
Chris@0
|
488 FChTransformF0gram::OutputList
|
Chris@0
|
489 FChTransformF0gram::getOutputDescriptors() const {
|
Chris@0
|
490
|
Chris@0
|
491 OutputList list;
|
Chris@0
|
492
|
Chris@16
|
493 vector<string> labels;
|
Chris@16
|
494 char label[100];
|
Chris@0
|
495
|
Chris@16
|
496 if (m_processingMode == ModeF0Gram) {
|
Chris@16
|
497
|
Chris@16
|
498 /* f0 values of F0gram grid as string values */
|
Chris@16
|
499 for (int i = 0; i < m_num_f0s; ++i) {
|
Chris@16
|
500 sprintf(label, "%4.2f Hz", m_f0s[i]);
|
Chris@16
|
501 labels.push_back(label);
|
Chris@16
|
502 }
|
Chris@16
|
503
|
Chris@16
|
504 /* The F0gram */
|
Chris@16
|
505 OutputDescriptor d;
|
Chris@16
|
506 d.identifier = "f0gram";
|
Chris@19
|
507 d.name = "F0gram";
|
Chris@19
|
508 d.description = "The salience of the different f0s in the signal.";
|
Chris@16
|
509 d.hasFixedBinCount = true;
|
Chris@16
|
510 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
|
Chris@16
|
511 d.binNames = labels;
|
Chris@16
|
512 d.hasKnownExtents = false;
|
Chris@16
|
513 d.isQuantized = false;
|
Chris@16
|
514 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@16
|
515 d.hasDuration = false;
|
Chris@16
|
516 list.push_back(d);
|
Chris@16
|
517
|
Chris@19
|
518 d.identifier = "pitch";
|
Chris@19
|
519 d.name = "Most salient pitch";
|
Chris@19
|
520 d.description = "The most salient f0 in the signal for each time step.";
|
Chris@19
|
521 d.unit = "Hz";
|
Chris@19
|
522 d.hasFixedBinCount = true;
|
Chris@19
|
523 d.binCount = 1;
|
Chris@19
|
524 d.binNames.clear();
|
Chris@19
|
525 d.hasKnownExtents = false;
|
Chris@19
|
526 d.isQuantized = false;
|
Chris@19
|
527 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@19
|
528 d.hasDuration = false;
|
Chris@19
|
529 list.push_back(d);
|
Chris@19
|
530
|
Chris@16
|
531 } else {
|
Chris@16
|
532
|
Chris@16
|
533 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; ++i) {
|
Chris@16
|
534 double freq = i * (m_warpings.fs_warp / m_nfft);
|
Chris@16
|
535 sprintf(label, "%4.2f Hz", freq);
|
Chris@16
|
536 labels.push_back(label);
|
Chris@16
|
537 }
|
Chris@16
|
538
|
Chris@16
|
539 OutputDescriptor d;
|
Chris@16
|
540 d.identifier = "spectrogram";
|
Chris@16
|
541 d.name = "Spectrogram";
|
Chris@16
|
542 d.description = "Time/frequency spectrogram derived from the Fan Chirp Transform output";
|
Chris@16
|
543 d.hasFixedBinCount = true;
|
Chris@16
|
544 d.binCount = m_warp_params.nsamps_twarp/2+1;
|
Chris@16
|
545 d.binNames = labels;
|
Chris@16
|
546 d.hasKnownExtents = false;
|
Chris@16
|
547 d.isQuantized = false;
|
Chris@16
|
548 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@16
|
549 d.hasDuration = false;
|
Chris@16
|
550 list.push_back(d);
|
Chris@0
|
551 }
|
Chris@16
|
552
|
Chris@0
|
553 return list;
|
Chris@0
|
554 }
|
Chris@0
|
555
|
Chris@0
|
556 bool
|
Chris@0
|
557 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) {
|
Chris@0
|
558 if (channels < getMinChannelCount() ||
|
Chris@20
|
559 channels > getMaxChannelCount() ||
|
Chris@20
|
560 blockSize != m_blockSize/2 ||
|
Chris@20
|
561 stepSize != m_stepSize) {
|
Chris@14
|
562 return false;
|
Chris@14
|
563 }
|
Chris@0
|
564
|
Chris@20
|
565 m_inputBuffer = allocate_and_zero<float>(m_blockSize);
|
Chris@20
|
566
|
Chris@0
|
567 // WARNING !!!
|
Chris@0
|
568 // these values in fact are determined by the sampling frequency m_fs
|
Chris@0
|
569 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz
|
Chris@0
|
570 //m_blockSize = 4 * m_warp_params.nsamps_twarp;
|
Chris@16
|
571 // m_stepSize = floor(m_hop / m_warp_params.fact_over_samp);
|
Chris@16
|
572
|
Chris@16
|
573 /* design of FChT */
|
Chris@16
|
574 design_FChT();
|
Chris@0
|
575
|
Chris@0
|
576 /* initialise m_glogs_params */
|
Chris@7
|
577 design_GLogS();
|
Chris@0
|
578
|
Chris@7
|
579 design_LPF();
|
Chris@0
|
580
|
Chris@7
|
581 design_time_window();
|
Chris@0
|
582
|
Chris@7
|
583 // Create Hanning window for warped signals
|
Chris@14
|
584 mp_HanningWindow = allocate<double>(m_warp_params.nsamps_twarp);
|
Chris@7
|
585 bool normalize = false;
|
Chris@14
|
586 Utils::hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize);
|
Chris@0
|
587
|
Chris@16
|
588 m_num_f0s = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
|
Chris@16
|
589 m_f0s = allocate<double>(m_num_f0s);
|
Chris@16
|
590 for (int i = 0; i < m_num_f0s; ++i) {
|
Chris@16
|
591 m_f0s[i] = m_glogs_f0[m_glogs_init_f0s + i];
|
Chris@16
|
592 }
|
Chris@20
|
593
|
Chris@20
|
594 m_initialised = true;
|
Chris@0
|
595 return true;
|
Chris@0
|
596 }
|
Chris@0
|
597
|
Chris@0
|
598 void
|
Chris@0
|
599 FChTransformF0gram::design_GLogS() {
|
Chris@0
|
600
|
Chris@7
|
601 // total number & initial quantity of f0s
|
Chris@16
|
602
|
Chris@16
|
603 cerr << "per oct = " << m_f0_params.num_f0s_per_oct << ", octs = " << m_f0_params.num_octs << endl;
|
Chris@10
|
604 m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1;
|
Chris@16
|
605 cerr << "init_f0s = " << m_glogs_init_f0s << endl;
|
Chris@7
|
606 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s;
|
Chris@16
|
607 cerr << "num_f0s = " << m_glogs_num_f0s << endl;
|
Chris@0
|
608
|
Chris@7
|
609 // Initialize arrays
|
Chris@14
|
610 m_glogs_f0 = allocate<double>(m_glogs_num_f0s);
|
Chris@14
|
611 m_glogs = allocate<double>(m_glogs_num_f0s*m_warp_params.num_warps);
|
Chris@14
|
612 m_glogs_n = allocate<int>(m_glogs_num_f0s);
|
Chris@14
|
613 m_glogs_index = allocate<int>(m_glogs_num_f0s);
|
Chris@0
|
614
|
Chris@7
|
615 // Compute f0 values
|
Chris@7
|
616 m_glogs_harmonic_count = 0;
|
Chris@7
|
617 double factor = (double)(m_warp_params.nsamps_twarp/2)/(double)(m_warp_params.nsamps_twarp/2+1);
|
Chris@10
|
618 for (int i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@7
|
619 m_glogs_f0[i] = (m_f0_params.f0min/5.0)*pow(2.0,(double)i/(double)m_f0_params.num_f0s_per_oct);
|
Chris@7
|
620 // for every f0 compute number of partials less or equal than m_fmax.
|
Chris@7
|
621 m_glogs_n[i] = m_fmax*factor/m_glogs_f0[i];
|
Chris@7
|
622 m_glogs_index[i] = m_glogs_harmonic_count;
|
Chris@7
|
623 m_glogs_harmonic_count += m_glogs_n[i];
|
Chris@7
|
624 }
|
Chris@0
|
625
|
Chris@7
|
626 // Initialize arrays for interpolation
|
Chris@14
|
627 m_glogs_posint = allocate<int>(m_glogs_harmonic_count);
|
Chris@14
|
628 m_glogs_posfrac = allocate<double>(m_glogs_harmonic_count);
|
Chris@14
|
629 m_glogs_interp = allocate<double>(m_glogs_harmonic_count);
|
Chris@0
|
630
|
Chris@7
|
631 // Compute int & frac of interpolation positions
|
Chris@10
|
632 int aux_index = 0;
|
Chris@7
|
633 double aux_pos;
|
Chris@10
|
634 for (int i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@10
|
635 for (int j = 1; j <= m_glogs_n[i]; j++) {
|
Chris@18
|
636 aux_pos = ((double)j * m_glogs_f0[i]) * ((double)(m_warp_params.nsamps_twarp))/m_warpings.fs_warp;
|
Chris@10
|
637 m_glogs_posint[aux_index] = (int)aux_pos;
|
Chris@7
|
638 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index];
|
Chris@7
|
639 aux_index++;
|
Chris@7
|
640 }
|
Chris@7
|
641 }
|
Chris@0
|
642
|
Chris@7
|
643 // Third harmonic attenuation
|
Chris@7
|
644 double aux_third_harmonic;
|
Chris@14
|
645 m_glogs_third_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
646 m_glogs_third_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@10
|
647 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
648 aux_third_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(3.0);
|
Chris@10
|
649 m_glogs_third_harmonic_posint[i] = (int)aux_third_harmonic;
|
Chris@7
|
650 m_glogs_third_harmonic_posfrac[i] = aux_third_harmonic - (double)(m_glogs_third_harmonic_posint[i]);
|
Chris@7
|
651 }
|
Chris@14
|
652 m_glogs_third_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@0
|
653
|
Chris@7
|
654 // Fifth harmonic attenuation
|
Chris@7
|
655 double aux_fifth_harmonic;
|
Chris@14
|
656 m_glogs_fifth_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
657 m_glogs_fifth_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@10
|
658 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
659 aux_fifth_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(5.0);
|
Chris@10
|
660 m_glogs_fifth_harmonic_posint[i] = (int)aux_fifth_harmonic;
|
Chris@7
|
661 m_glogs_fifth_harmonic_posfrac[i] = aux_fifth_harmonic - (double)(m_glogs_fifth_harmonic_posint[i]);
|
Chris@7
|
662 }
|
Chris@14
|
663 m_glogs_fifth_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@0
|
664
|
Chris@7
|
665 // Normalization & attenuation windows
|
Chris@14
|
666 m_glogs_f0_preference_weights = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
667 m_glogs_median_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
668 m_glogs_sigma_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
|
Chris@7
|
669 double MIDI_value;
|
Chris@10
|
670 for (int i = 0; i < m_f0_params.num_octs*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
671 MIDI_value = 69.0 + 12.0 * log2(m_glogs_f0[i + m_glogs_init_f0s]/440.0);
|
Chris@7
|
672 m_glogs_f0_preference_weights[i] = 1.0/sqrt(2.0*M_PI*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev)*exp(-(MIDI_value-m_f0_params.prefer_mean)*(MIDI_value-m_f0_params.prefer_mean)/(2.0*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev));
|
Chris@7
|
673 m_glogs_f0_preference_weights[i] = (0.01 + m_glogs_f0_preference_weights[i]) / (1.01);
|
Chris@0
|
674
|
Chris@7
|
675 m_glogs_median_correction[i] = m_glogs_params.median_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.median_poly_coefs[1]*(i+1.0) + m_glogs_params.median_poly_coefs[2];
|
Chris@7
|
676 m_glogs_sigma_correction[i] = 1.0 / (m_glogs_params.sigma_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.sigma_poly_coefs[1]*(i+1.0) + m_glogs_params.sigma_poly_coefs[2]);
|
Chris@7
|
677 }
|
Chris@0
|
678 }
|
Chris@0
|
679
|
Chris@0
|
680 void
|
Chris@0
|
681 FChTransformF0gram::design_FChT() {
|
Chris@0
|
682
|
Chris@0
|
683 /* ============= WARPING DESIGN ============= */
|
Chris@0
|
684
|
Chris@0
|
685 // sampling frequency after oversampling
|
Chris@0
|
686 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs;
|
Chris@0
|
687
|
Chris@0
|
688 // number of samples of the original signal frame
|
Chris@0
|
689 m_warpings.nsamps_torig = 4 * m_warp_params.fact_over_samp * m_warp_params.nsamps_twarp;
|
Chris@0
|
690 // equivalent to: m_warpings.nsamps_torig = m_warp_params.fact_over_samp * m_blockSize;
|
Chris@0
|
691
|
Chris@0
|
692 // time instants of the original signal frame
|
Chris@14
|
693 double *t_orig = allocate<double>(m_warpings.nsamps_torig);
|
Chris@10
|
694 for (int ind = 0; ind < m_warpings.nsamps_torig; ind++) {
|
Chris@0
|
695 t_orig[ind] = ((double)(ind + 1) - (double)m_warpings.nsamps_torig / 2.0) / m_warpings.fs_orig;
|
Chris@0
|
696 }
|
Chris@0
|
697
|
Chris@0
|
698 // linear chirps warping definition as relative frequency deviation
|
Chris@7
|
699 //TODO
|
Chris@14
|
700 double *freq_relative = allocate<double>(m_warpings.nsamps_torig * m_warp_params.num_warps);
|
Chris@0
|
701 define_warps_linear_chirps(freq_relative, t_orig);
|
Chris@0
|
702
|
Chris@0
|
703 // maximum relative frequency deviation
|
Chris@0
|
704 double freq_relative_max = 0;
|
Chris@14
|
705 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
|
Chris@14
|
706 for (int j = 0; j < m_warp_params.num_warps; j++) {
|
Chris@14
|
707 if (freq_relative_max < freq_relative[j * m_warpings.nsamps_torig + i]) {
|
Chris@0
|
708 freq_relative_max = freq_relative[j * m_warpings.nsamps_torig + i];
|
Chris@14
|
709 }
|
Chris@14
|
710 }
|
Chris@14
|
711 }
|
Chris@0
|
712
|
Chris@0
|
713 // sampling frequency of warped signal to be free of aliasing up to fmax
|
Chris@0
|
714 m_warpings.fs_warp = 2 * m_fmax * freq_relative_max;
|
Chris@0
|
715
|
Chris@0
|
716 // time instants of the warped signal frame
|
Chris@14
|
717 double *t_warp = allocate<double>(m_warp_params.nsamps_twarp);
|
Chris@10
|
718 for (int ind = 0; ind < m_warp_params.nsamps_twarp; ind++) {
|
Chris@0
|
719 t_warp[ind] = ((double)((int)(ind + 1)- (int)m_warp_params.nsamps_twarp / 2)) / (double)m_warpings.fs_warp;
|
Chris@0
|
720 }
|
Chris@0
|
721
|
Chris@0
|
722 // design of warpings for efficient interpolation
|
Chris@0
|
723 design_warps(freq_relative, t_orig, t_warp);
|
Chris@0
|
724
|
Chris@14
|
725 deallocate(freq_relative);
|
Chris@14
|
726 deallocate(t_orig);
|
Chris@14
|
727 deallocate(t_warp);
|
Chris@14
|
728
|
Chris@0
|
729 /* ============= FFTW PLAN DESIGN ============= */
|
Chris@7
|
730 // Initialize 2-d array for warped signals
|
Chris@14
|
731 x_warping = allocate<double>(m_warp_params.nsamps_twarp);
|
Chris@14
|
732 m_absFanChirpTransform = allocate<double>(m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1));
|
Chris@14
|
733 m_auxFanChirpTransform = allocate<double>(2 * (m_warp_params.nsamps_twarp/2 + 1));
|
Chris@14
|
734 fft_xwarping = new FFTReal(m_warp_params.nsamps_twarp);
|
Chris@0
|
735 }
|
Chris@0
|
736
|
Chris@0
|
737 void
|
Chris@0
|
738 FChTransformF0gram::design_warps(double * freq_relative, double * t_orig, double * t_warp) {
|
Chris@0
|
739 /* the warping is done by interpolating the original signal in time instants
|
Chris@0
|
740 given by the desired frequency deviation, to do this, the interpolation
|
Chris@0
|
741 instants are stored in a structure as an integer index and a fractional value
|
Chris@0
|
742 hypothesis: sampling frequency at the central point equals the original
|
Chris@7
|
743 */
|
Chris@0
|
744
|
Chris@14
|
745 m_warpings.pos_int = allocate<int>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
|
Chris@14
|
746 m_warpings.pos_frac = allocate<double>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
|
Chris@0
|
747
|
Chris@7
|
748 // vector of phase values
|
Chris@14
|
749 double *phi = allocate<double>(m_warpings.nsamps_torig);
|
Chris@7
|
750 double aux;
|
Chris@0
|
751
|
Chris@7
|
752 // warped positions
|
Chris@14
|
753 double *pos1 = allocate<double>(m_warp_params.nsamps_twarp*m_warp_params.num_warps);
|
Chris@0
|
754
|
Chris@10
|
755 for (int i = 0; i < m_warp_params.num_warps; i++) {
|
Chris@0
|
756
|
Chris@7
|
757 // integration of relative frequency to obtain phase values
|
Chris@14
|
758 Utils::cumtrapz(t_orig, freq_relative + i*(m_warpings.nsamps_torig), m_warpings.nsamps_torig, phi);
|
Chris@0
|
759
|
Chris@7
|
760 // centering of phase values to force original frequency in the middle
|
Chris@7
|
761 aux = phi[m_warpings.nsamps_torig/2];
|
Chris@10
|
762 for (int j = 0; j < m_warpings.nsamps_torig; j++) {
|
Chris@7
|
763 phi[j] -= aux;
|
Chris@7
|
764 } //for
|
Chris@0
|
765
|
Chris@7
|
766 // interpolation of phase values to obtain warped positions
|
Chris@14
|
767 Utils::interp1(phi, t_orig, m_warpings.nsamps_torig, t_warp, pos1 + i*m_warp_params.nsamps_twarp, m_warp_params.nsamps_twarp);
|
Chris@0
|
768 }
|
Chris@0
|
769
|
Chris@0
|
770 // % previous sample index
|
Chris@0
|
771 // pos1_int = uint32(floor(pos1))';
|
Chris@0
|
772 // % integer corresponding to previous sample index in "c"
|
Chris@0
|
773 // warps.pos1_int = (pos1_int - uint32(1));
|
Chris@0
|
774 // % fractional value that defines the warped position
|
Chris@0
|
775 // warps.pos1_frac = (double(pos1)' - double(pos1_int));
|
Chris@0
|
776
|
Chris@10
|
777 for (int j = 0; j < m_warp_params.nsamps_twarp*m_warp_params.num_warps; j++) {
|
Chris@7
|
778 // previous sample index
|
Chris@7
|
779 pos1[j] = pos1[j]*m_warpings.fs_orig + m_warpings.nsamps_torig/2 + 1;
|
Chris@10
|
780 m_warpings.pos_int[j] = (int) pos1[j];
|
Chris@7
|
781 m_warpings.pos_frac[j] = pos1[j] - (double)(m_warpings.pos_int[j]);
|
Chris@7
|
782 } //for
|
Chris@0
|
783
|
Chris@14
|
784 deallocate(phi);
|
Chris@14
|
785 deallocate(pos1);
|
Chris@0
|
786 }
|
Chris@0
|
787
|
Chris@0
|
788 void
|
Chris@0
|
789 FChTransformF0gram::define_warps_linear_chirps(double * freq_relative, double * t_orig) {
|
Chris@0
|
790 /** define warps as relative frequency deviation from original frequency
|
Chris@7
|
791 t_orig : time vector
|
Chris@7
|
792 freq_relative : relative frequency deviations
|
Chris@7
|
793 */
|
Chris@0
|
794 if (m_warp_params.alpha_dist == 0) {
|
Chris@0
|
795
|
Chris@0
|
796 // linear alpha values spacing
|
Chris@14
|
797 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
|
Chris@0
|
798 // WARNING m_warp_params.num_warps must be odd
|
Chris@0
|
799 m_warpings.chirp_rates[0] = -m_warp_params.alpha_max;
|
Chris@0
|
800 double increment = (double) m_warp_params.alpha_max / ((m_warp_params.num_warps - 1) / 2);
|
Chris@0
|
801
|
Chris@10
|
802 for (int ind = 1; ind < m_warp_params.num_warps; ind++) {
|
Chris@0
|
803 m_warpings.chirp_rates[ind] = m_warpings.chirp_rates[ind - 1] + increment;
|
Chris@0
|
804 }
|
Chris@0
|
805 // force zero value
|
Chris@0
|
806 m_warpings.chirp_rates[(int) ((m_warp_params.num_warps - 1) / 2)] = 0;
|
Chris@0
|
807
|
Chris@0
|
808 } else {
|
Chris@0
|
809 // log alpha values spacing
|
Chris@14
|
810 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
|
Chris@0
|
811
|
Chris@0
|
812 // force zero value
|
Chris@0
|
813 int middle_point = (int) ((m_warp_params.num_warps - 1) / 2);
|
Chris@0
|
814 m_warpings.chirp_rates[middle_point] = 0;
|
Chris@0
|
815
|
Chris@0
|
816 double logMax = log10(m_warp_params.alpha_max + 1);
|
Chris@0
|
817 double increment = logMax / ((m_warp_params.num_warps - 1) / 2.0f);
|
Chris@0
|
818 double exponent = 0;
|
Chris@0
|
819
|
Chris@0
|
820 // fill positive values
|
Chris@0
|
821 int ind_log = middle_point;
|
Chris@10
|
822 for (int ind = 0; ind < (m_warp_params.num_warps + 1) / 2; ind++) {
|
Chris@0
|
823 m_warpings.chirp_rates[ind_log] = pow(10, exponent) - 1;
|
Chris@0
|
824 exponent += increment;
|
Chris@0
|
825 ind_log++;
|
Chris@0
|
826 }
|
Chris@0
|
827 // fill negative values
|
Chris@10
|
828 for (int ind = 0; ind < (m_warp_params.num_warps - 1) / 2; ind++) {
|
Chris@0
|
829 m_warpings.chirp_rates[ind] = -m_warpings.chirp_rates[m_warp_params.num_warps - 1 - ind];
|
Chris@0
|
830 }
|
Chris@0
|
831 }
|
Chris@0
|
832
|
Chris@0
|
833 // compute relative frequency deviation
|
Chris@14
|
834 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
|
Chris@14
|
835 for (int j = 0; j < m_warp_params.num_warps; j++) {
|
Chris@0
|
836 freq_relative[j * m_warpings.nsamps_torig + i] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
|
Chris@14
|
837 }
|
Chris@14
|
838 }
|
Chris@0
|
839 }
|
Chris@0
|
840
|
Chris@0
|
841 void
|
Chris@14
|
842 FChTransformF0gram::design_LPF()
|
Chris@14
|
843 {
|
Chris@14
|
844 double *lp_LPFWindow_aux = allocate<double>(m_blockSize/2+1);
|
Chris@14
|
845 mp_LPFWindow = allocate<double>(m_blockSize/2+1);
|
Chris@0
|
846
|
Chris@10
|
847 int i_max = (int) ((2.0*m_fmax/m_fs) * ( (double)m_blockSize / 2.0 + 1.0 ));
|
Chris@10
|
848 for (int i = 0; i < m_blockSize/2+1; i++) {
|
Chris@0
|
849 if (i >= i_max) {
|
Chris@0
|
850 lp_LPFWindow_aux[i] = 0.0;
|
Chris@0
|
851 } else {
|
Chris@0
|
852 lp_LPFWindow_aux[i] = 1.0;
|
Chris@0
|
853 }
|
Chris@0
|
854 }
|
Chris@14
|
855
|
Chris@14
|
856 LPF_time = allocate_and_zero<double>(m_warpings.nsamps_torig);
|
Chris@14
|
857 LPF_frequency = allocate_and_zero<double>(2 * (m_warpings.nsamps_torig/2 + 1));
|
Chris@14
|
858
|
Chris@14
|
859 fft_forward_LPF = new FFTReal(m_blockSize);
|
Chris@14
|
860 fft_inverse_LPF = new FFTReal(m_warpings.nsamps_torig);
|
Chris@0
|
861
|
Chris@10
|
862 int winWidth = 11;
|
Chris@14
|
863 double *lp_hanningWindow = allocate<double>(winWidth);
|
Chris@0
|
864 double accum=0;
|
Chris@10
|
865 for (int i = 0; i < winWidth; i++) {
|
Chris@0
|
866 lp_hanningWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)winWidth+1.0)));
|
Chris@0
|
867 accum+=lp_hanningWindow[i];
|
Chris@0
|
868
|
Chris@0
|
869 }
|
Chris@10
|
870 for (int i = 0; i < winWidth; i++) { //window normalization
|
Chris@0
|
871 lp_hanningWindow[i]=lp_hanningWindow[i]/accum;
|
Chris@0
|
872 }
|
Chris@10
|
873 for (int i = 0; i < m_blockSize/2+1; i++) {
|
Chris@0
|
874 //if (((i-(winWidth-1)/2)<0)||(i+(winWidth-1))/2>m_blockSize/2-1) {//consideramos winWidth impar, si la ventana sale del arreglo se rellena con el valor origianl
|
Chris@7
|
875 if ( (i > (i_max + (winWidth-1)/2)) || (i <= (i_max - (winWidth-1)/2)) ) {
|
Chris@0
|
876 mp_LPFWindow[i]=lp_LPFWindow_aux[i];
|
Chris@0
|
877 } else {
|
Chris@0
|
878 accum=0;
|
Chris@10
|
879 for (int j = -((winWidth-1)/2); j <= (winWidth-1)/2; j++) {
|
Chris@0
|
880 accum+=lp_LPFWindow_aux[i-j]*lp_hanningWindow[j+(winWidth-1)/2];
|
Chris@7
|
881 }
|
Chris@0
|
882 mp_LPFWindow[i]=accum;
|
Chris@0
|
883 }
|
Chris@0
|
884 }
|
Chris@0
|
885
|
Chris@14
|
886 deallocate(lp_LPFWindow_aux);
|
Chris@14
|
887 deallocate(lp_hanningWindow);
|
Chris@0
|
888 }
|
Chris@0
|
889
|
Chris@14
|
890 void FChTransformF0gram::apply_LPF()
|
Chris@14
|
891 {
|
Chris@14
|
892 fft_forward_LPF->forward(LPF_time, LPF_frequency);
|
Chris@14
|
893
|
Chris@10
|
894 for (int i = 0; i < m_blockSize/2+1; i++) {
|
Chris@16
|
895 LPF_frequency[i*2] *= mp_LPFWindow[i];
|
Chris@16
|
896 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i];
|
Chris@0
|
897 }
|
Chris@14
|
898
|
Chris@14
|
899 fft_inverse_LPF->inverse(LPF_frequency, LPF_time);
|
Chris@20
|
900
|
Chris@7
|
901 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame.
|
Chris@7
|
902 // nota: ademÔs de aplicar el LPF, esta función resamplea la señal original.
|
Chris@0
|
903 }
|
Chris@0
|
904
|
Chris@14
|
905 void FChTransformF0gram::clean_LPF()
|
Chris@14
|
906 {
|
Chris@14
|
907 delete fft_forward_LPF;
|
Chris@14
|
908 delete fft_inverse_LPF;
|
Chris@14
|
909 deallocate(LPF_time);
|
Chris@14
|
910 deallocate(LPF_frequency);
|
Chris@14
|
911 deallocate(mp_LPFWindow);
|
Chris@0
|
912 }
|
Chris@0
|
913
|
Chris@14
|
914 void FChTransformF0gram::reset()
|
Chris@14
|
915 {
|
Chris@0
|
916 }
|
Chris@0
|
917
|
Chris@0
|
918 FChTransformF0gram::FeatureSet
|
Chris@5
|
919 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) {
|
Chris@0
|
920
|
Chris@20
|
921 if (!m_initialised) return FeatureSet();
|
Chris@20
|
922
|
Chris@7
|
923 /* PSEUDOCĆDIGO:
|
Chris@7
|
924 - Aplicar FFT al frame entero.
|
Chris@7
|
925 - Filtro pasabajos en frecuencia.
|
Chris@7
|
926 - FFT inversa al frame entero.
|
Chris@7
|
927 -----------------------------------------------------------------------------
|
Chris@7
|
928 - Para cada warp: *Si es un espectrograma direccional (un solo warp
|
Chris@7
|
929 => no es para cada warp sino para el elegido)
|
Chris@7
|
930 - Hacer la interpolación con interp1q.
|
Chris@7
|
931 - Aplicar la FFT al frame warpeado.
|
Chris@7
|
932 - (Opcional) GLogS.
|
Chris@7
|
933 - ...
|
Chris@7
|
934 */
|
Chris@0
|
935
|
Chris@0
|
936 //---------------------------------------------------------------------------
|
Chris@7
|
937 FeatureSet fs;
|
Chris@0
|
938
|
Chris@7
|
939 #ifdef DEBUG
|
Chris@16
|
940 fprintf(stderr, "\n ----- DEBUG INFORMATION ----- \n");
|
Chris@16
|
941 fprintf(stderr, " m_fs = %f Hz.\n",m_fs);
|
Chris@16
|
942 fprintf(stderr, " fs_orig = %f Hz.\n",m_warpings.fs_orig);
|
Chris@16
|
943 fprintf(stderr, " fs_warp = %f Hz.\n",m_warpings.fs_warp);
|
Chris@16
|
944 fprintf(stderr, " m_nfft = %d.\n",m_nfft);
|
Chris@16
|
945 fprintf(stderr, " m_blockSize = %d.\n",m_blockSize);
|
Chris@16
|
946 fprintf(stderr, " m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig);
|
Chris@16
|
947 fprintf(stderr, " m_warp_params.num_warps = %d.\n",m_warp_params.num_warps);
|
Chris@16
|
948 fprintf(stderr, " m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count);
|
Chris@7
|
949 #endif
|
Chris@0
|
950
|
Chris@20
|
951 for (int i = 0; i < m_blockSize - m_stepSize; ++i) {
|
Chris@20
|
952 m_inputBuffer[i] = m_inputBuffer[i + m_stepSize];
|
Chris@0
|
953 }
|
Chris@20
|
954 for (int i = 0; i < m_blockSize/2; ++i) {
|
Chris@20
|
955 m_inputBuffer[m_blockSize/2 + i] = inputBuffers[0][i];
|
Chris@20
|
956 }
|
Chris@20
|
957 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@20
|
958 LPF_time[i] = m_inputBuffer[i] * m_timeWindow[i];
|
Chris@20
|
959 }
|
Chris@20
|
960 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@20
|
961 LPF_time[m_blockSize + i] = 0.0;
|
Chris@20
|
962 }
|
Chris@20
|
963
|
Chris@7
|
964 apply_LPF();
|
Chris@7
|
965 // SeƱal filtrada queda en LPF_time
|
Chris@0
|
966
|
Chris@7
|
967 Feature feature;
|
Chris@0
|
968 feature.hasTimestamp = false;
|
Chris@0
|
969
|
Chris@15
|
970 if (m_processingMode == ModeRoughSpectrogram) {
|
Chris@15
|
971 feature.values = vector<float>(m_warp_params.nsamps_twarp/2+1, 0.f);
|
Chris@15
|
972 }
|
Chris@15
|
973
|
Chris@0
|
974 // ----------------------------------------------------------------------------------------------
|
Chris@0
|
975 // Hanning window & FFT for all warp directions
|
Chris@0
|
976
|
Chris@7
|
977 double max_glogs = -DBL_MAX;
|
Chris@10
|
978 int ind_max_glogs = 0;
|
Chris@0
|
979
|
Chris@10
|
980 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
|
Chris@16
|
981
|
Chris@7
|
982 // Interpolate
|
Chris@14
|
983 Utils::interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp);
|
Chris@0
|
984
|
Chris@7
|
985 // Apply window
|
Chris@10
|
986 for (int i = 0; i < m_warp_params.nsamps_twarp; i++) {
|
Chris@7
|
987 x_warping[i] *= mp_HanningWindow[i];
|
Chris@7
|
988 }
|
Chris@0
|
989
|
Chris@7
|
990 // Transform
|
Chris@14
|
991 fft_xwarping->forward(x_warping, m_auxFanChirpTransform);
|
Chris@0
|
992
|
Chris@15
|
993 if (m_processingMode == ModeRoughSpectrogram) {
|
Chris@15
|
994 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
|
Chris@15
|
995 double abs = sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]);
|
Chris@15
|
996 if (abs > feature.values[i]) {
|
Chris@15
|
997 feature.values[i] = abs;
|
Chris@15
|
998 }
|
Chris@15
|
999 }
|
Chris@15
|
1000 continue;
|
Chris@15
|
1001 }
|
Chris@15
|
1002
|
Chris@7
|
1003 // Copy result
|
Chris@7
|
1004 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1);
|
Chris@10
|
1005 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
|
Chris@14
|
1006 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]));
|
Chris@7
|
1007 }
|
Chris@0
|
1008
|
Chris@0
|
1009 // -----------------------------------------------------------------------------------------
|
Chris@0
|
1010 // GLogS
|
Chris@14
|
1011 Utils::interp1q(aux_abs_fcht, m_glogs_posint, m_glogs_posfrac, m_glogs_interp, m_glogs_harmonic_count);
|
Chris@10
|
1012 int glogs_ind = 0;
|
Chris@10
|
1013 for (int i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@7
|
1014 double glogs_accum = 0;
|
Chris@10
|
1015 for (int j = 1; j <= m_glogs_n[i]; j++) {
|
Chris@7
|
1016 glogs_accum += m_glogs_interp[glogs_ind++];
|
Chris@7
|
1017 }
|
Chris@7
|
1018 m_glogs[i + i_warp*m_glogs_num_f0s] = glogs_accum/(double)m_glogs_n[i];
|
Chris@7
|
1019 }
|
Chris@0
|
1020
|
Chris@0
|
1021 // Sub/super harmonic correction
|
Chris@14
|
1022 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_third_harmonic_posint, m_glogs_third_harmonic_posfrac, m_glogs_third_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
1023 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_fifth_harmonic_posint, m_glogs_fifth_harmonic_posfrac, m_glogs_fifth_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@10
|
1024 for (int i = m_glogs_num_f0s-1; i >= m_glogs_init_f0s; i--) {
|
Chris@7
|
1025 m_glogs[i + i_warp*m_glogs_num_f0s] -= MAX(MAX(m_glogs[i-m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s],m_glogs_third_harmonic[i-m_glogs_init_f0s]),m_glogs_fifth_harmonic[i-m_glogs_init_f0s]);
|
Chris@7
|
1026 //m_glogs[i] -= MAX(m_glogs[i-m_f0_params.num_f0s_per_oct],m_glogs_third_harmonic[i-m_glogs_init_f0s]);
|
Chris@7
|
1027 }
|
Chris@10
|
1028 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
1029 m_glogs[i + i_warp*m_glogs_num_f0s] -= 0.3*m_glogs[i+m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s];
|
Chris@7
|
1030 // Median, sigma $ weights correction
|
Chris@7
|
1031 m_glogs[i + i_warp*m_glogs_num_f0s] = (m_glogs[i + i_warp*m_glogs_num_f0s]-m_glogs_median_correction[i-m_glogs_init_f0s])*m_glogs_sigma_correction[i-m_glogs_init_f0s]*m_glogs_f0_preference_weights[i-m_glogs_init_f0s];
|
Chris@7
|
1032 }
|
Chris@0
|
1033
|
Chris@7
|
1034 // Look for maximum value to determine best direction
|
Chris@10
|
1035 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
1036 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
|
Chris@7
|
1037 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
|
Chris@7
|
1038 ind_max_glogs = i_warp;
|
Chris@7
|
1039 }
|
Chris@7
|
1040 }
|
Chris@7
|
1041 }
|
Chris@0
|
1042
|
Chris@15
|
1043 if (m_processingMode == ModeRoughSpectrogram) {
|
Chris@15
|
1044
|
Chris@15
|
1045 // already accumulated our return values in feature
|
Chris@19
|
1046 fs[0].push_back(feature);
|
Chris@15
|
1047
|
Chris@15
|
1048 } else if (m_processingMode == ModeSpectrogram) {
|
Chris@15
|
1049
|
Chris@15
|
1050 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) {
|
Chris@15
|
1051 feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0);
|
Chris@15
|
1052 }
|
Chris@19
|
1053 fs[0].push_back(feature);
|
Chris@15
|
1054
|
Chris@15
|
1055 } else { // f0gram
|
Chris@15
|
1056
|
Chris@19
|
1057 int bestIndex = -1;
|
Chris@19
|
1058
|
Chris@15
|
1059 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) {
|
Chris@19
|
1060 double value = 0.0;
|
Chris@15
|
1061 switch (m_f0gram_mode) {
|
Chris@15
|
1062 case AllBinsOfBestDirection:
|
Chris@19
|
1063 value = m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s];
|
Chris@15
|
1064 break;
|
Chris@15
|
1065 case BestBinOfAllDirections:
|
Chris@15
|
1066 max_glogs = -DBL_MAX;
|
Chris@15
|
1067 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
|
Chris@15
|
1068 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
|
Chris@15
|
1069 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
|
Chris@15
|
1070 ind_max_glogs = i_warp;
|
Chris@15
|
1071 }
|
Chris@7
|
1072 }
|
Chris@19
|
1073 value = max_glogs;
|
Chris@15
|
1074 break;
|
Chris@7
|
1075 }
|
Chris@19
|
1076 if (bestIndex < 0 || float(value) > feature.values[bestIndex]) {
|
Chris@19
|
1077 bestIndex = int(feature.values.size());
|
Chris@19
|
1078 }
|
Chris@19
|
1079 feature.values.push_back(float(value));
|
Chris@19
|
1080 }
|
Chris@19
|
1081
|
Chris@19
|
1082 fs[0].push_back(feature);
|
Chris@19
|
1083
|
Chris@19
|
1084 if (bestIndex >= 0) {
|
Chris@19
|
1085
|
Chris@19
|
1086 double bestValue = feature.values[bestIndex];
|
Chris@19
|
1087 set<double> ordered(feature.values.begin(), feature.values.end());
|
Chris@19
|
1088 vector<double> flattened(ordered.begin(), ordered.end());
|
Chris@19
|
1089 double median = flattened[flattened.size()/2];
|
Chris@19
|
1090 if (bestValue > median * 8.0) {
|
Chris@19
|
1091 Feature pfeature;
|
Chris@19
|
1092 pfeature.hasTimestamp = false;
|
Chris@19
|
1093 pfeature.values.push_back(m_f0s[bestIndex]);
|
Chris@19
|
1094 fs[1].push_back(pfeature);
|
Chris@19
|
1095 }
|
Chris@7
|
1096 }
|
Chris@7
|
1097 }
|
Chris@0
|
1098
|
Chris@7
|
1099 return fs;
|
Chris@0
|
1100 }
|
Chris@0
|
1101
|
Chris@0
|
1102 FChTransformF0gram::FeatureSet
|
Chris@0
|
1103 FChTransformF0gram::getRemainingFeatures() {
|
Chris@0
|
1104 return FeatureSet();
|
Chris@0
|
1105 }
|
Chris@0
|
1106
|
Chris@0
|
1107 void
|
Chris@0
|
1108 FChTransformF0gram::design_time_window() {
|
Chris@0
|
1109
|
Chris@20
|
1110 int transitionWidth = (int)m_blockSize/128 + 128;
|
Chris@14
|
1111 m_timeWindow = allocate<double>(m_blockSize);
|
Chris@14
|
1112 double *lp_transitionWindow = allocate<double>(transitionWidth);
|
Chris@0
|
1113
|
Chris@10
|
1114 for (int i = 0; i < m_blockSize; i++) {
|
Chris@7
|
1115 m_timeWindow[i] = 1.0;
|
Chris@7
|
1116 }
|
Chris@0
|
1117
|
Chris@10
|
1118 for (int i = 0; i < transitionWidth; i++) {
|
Chris@0
|
1119 lp_transitionWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)transitionWidth+1.0)));
|
Chris@0
|
1120 }
|
Chris@0
|
1121
|
Chris@10
|
1122 for (int i = 0; i < transitionWidth/2; i++) {
|
Chris@7
|
1123 m_timeWindow[i] = lp_transitionWindow[i];
|
Chris@7
|
1124 m_timeWindow[m_blockSize-1-i] = lp_transitionWindow[transitionWidth-1-i];
|
Chris@7
|
1125 }
|
Chris@0
|
1126
|
Chris@7
|
1127 #ifdef DEBUG
|
Chris@7
|
1128 for (int i = 0; i < m_blockSize; i++) {
|
Chris@7
|
1129 if ((i<transitionWidth)) {
|
Chris@16
|
1130 fprintf(stderr, " m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]);
|
Chris@7
|
1131 }
|
Chris@7
|
1132 }
|
Chris@7
|
1133 #endif
|
Chris@0
|
1134
|
Chris@14
|
1135 deallocate(lp_transitionWindow);
|
Chris@0
|
1136 }
|
Chris@0
|
1137
|