Chris@0
|
1 /*
|
Chris@0
|
2 copyright (C) 2011 I. Irigaray, M. Rocamora
|
Chris@0
|
3
|
Chris@0
|
4 This program is free software: you can redistribute it and/or modify
|
Chris@0
|
5 it under the terms of the GNU General Public License as published by
|
Chris@0
|
6 the Free Software Foundation, either version 3 of the License, or
|
Chris@0
|
7 (at your option) any later version.
|
Chris@0
|
8
|
Chris@0
|
9 This program is distributed in the hope that it will be useful,
|
Chris@0
|
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Chris@0
|
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Chris@0
|
12 GNU General Public License for more details.
|
Chris@0
|
13
|
Chris@0
|
14 You should have received a copy of the GNU General Public License
|
Chris@0
|
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
|
Chris@7
|
16 */
|
Chris@0
|
17
|
Chris@0
|
18 #include "FChTransformF0gram.h"
|
Chris@0
|
19 #include "FChTransformUtils.h"
|
Chris@0
|
20 #include <math.h>
|
Chris@0
|
21 #include <float.h>
|
Chris@14
|
22
|
Chris@14
|
23 #include "bqvec/Allocators.h"
|
Chris@14
|
24
|
Chris@14
|
25 using namespace breakfastquay;
|
Chris@14
|
26
|
Chris@16
|
27 #define DEBUG
|
Chris@7
|
28
|
Chris@0
|
29 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
Chris@0
|
30
|
Chris@15
|
31 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode,
|
Chris@15
|
32 float inputSampleRate) :
|
Chris@7
|
33 Plugin(inputSampleRate),
|
Chris@15
|
34 m_processingMode(mode),
|
Chris@7
|
35 m_stepSize(0), // We are using 0 for step and block size to indicate "not yet set".
|
Chris@7
|
36 m_blockSize(0) {
|
Chris@0
|
37
|
Chris@0
|
38 m_fs = inputSampleRate;
|
Chris@0
|
39 // max frequency of interest (Hz)
|
Chris@0
|
40 m_fmax = 10000.f;
|
Chris@0
|
41 // warping parameters
|
Chris@12
|
42 m_warp_params.nsamps_twarp = 2048;
|
Chris@0
|
43 m_warp_params.alpha_max = 4;
|
Chris@0
|
44 m_warp_params.num_warps = 21;
|
Chris@0
|
45 m_warp_params.fact_over_samp = 2;
|
Chris@0
|
46 m_warp_params.alpha_dist = 0;
|
Chris@0
|
47 // f0 parameters
|
Chris@0
|
48 m_f0_params.f0min = 80.0;
|
Chris@0
|
49 m_f0_params.num_octs = 4;
|
Chris@0
|
50 m_f0_params.num_f0s_per_oct = 192;
|
Chris@0
|
51 m_f0_params.num_f0_hyps = 5;
|
Chris@0
|
52 m_f0_params.prefer = true;
|
Chris@0
|
53 m_f0_params.prefer_mean = 60;
|
Chris@0
|
54 m_f0_params.prefer_stdev = 18;
|
Chris@0
|
55 // glogs parameters
|
Chris@0
|
56 m_glogs_params.HP_logS = true;
|
Chris@0
|
57 m_glogs_params.att_subharms = 1;
|
Chris@7
|
58 // display parameters
|
Chris@15
|
59 m_f0gram_mode = BestBinOfAllDirections;
|
Chris@0
|
60
|
Chris@0
|
61 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
|
Chris@0
|
62 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
|
Chris@0
|
63 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
|
Chris@0
|
64
|
Chris@0
|
65 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
|
Chris@0
|
66 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
|
Chris@0
|
67 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
|
Chris@0
|
68
|
Chris@0
|
69 // number of fft points (controls zero-padding)
|
Chris@0
|
70 m_nfft = m_warp_params.nsamps_twarp;
|
Chris@0
|
71 // hop in samples
|
Chris@0
|
72 m_hop = m_warp_params.fact_over_samp * 256;
|
Chris@0
|
73
|
Chris@0
|
74 m_num_f0s = 0;
|
Chris@16
|
75 m_f0s = 0;
|
Chris@0
|
76 }
|
Chris@0
|
77
|
Chris@14
|
78 FChTransformF0gram::~FChTransformF0gram()
|
Chris@14
|
79 {
|
Chris@14
|
80 if (!m_blockSize) {
|
Chris@14
|
81 return; // nothing was allocated
|
Chris@14
|
82 }
|
Chris@14
|
83
|
Chris@14
|
84 deallocate(m_warpings.pos_int);
|
Chris@14
|
85 deallocate(m_warpings.pos_frac);
|
Chris@14
|
86 deallocate(m_warpings.chirp_rates);
|
Chris@14
|
87
|
Chris@14
|
88 clean_LPF();
|
Chris@14
|
89
|
Chris@14
|
90 deallocate(m_timeWindow);
|
Chris@14
|
91
|
Chris@14
|
92 deallocate(mp_HanningWindow);
|
Chris@14
|
93
|
Chris@14
|
94 // Warping
|
Chris@14
|
95 deallocate(x_warping);
|
Chris@14
|
96 delete fft_xwarping;
|
Chris@14
|
97 deallocate(m_absFanChirpTransform);
|
Chris@14
|
98 deallocate(m_auxFanChirpTransform);
|
Chris@14
|
99
|
Chris@14
|
100 // design_GLogS
|
Chris@14
|
101 deallocate(m_glogs_f0);
|
Chris@14
|
102 deallocate(m_glogs);
|
Chris@14
|
103 deallocate(m_glogs_n);
|
Chris@14
|
104 deallocate(m_glogs_index);
|
Chris@14
|
105 deallocate(m_glogs_posint);
|
Chris@14
|
106 deallocate(m_glogs_posfrac);
|
Chris@14
|
107 deallocate(m_glogs_interp);
|
Chris@14
|
108 deallocate(m_glogs_third_harmonic_posint);
|
Chris@14
|
109 deallocate(m_glogs_third_harmonic_posfrac);
|
Chris@14
|
110 deallocate(m_glogs_third_harmonic);
|
Chris@14
|
111 deallocate(m_glogs_fifth_harmonic_posint);
|
Chris@14
|
112 deallocate(m_glogs_fifth_harmonic_posfrac);
|
Chris@14
|
113 deallocate(m_glogs_fifth_harmonic);
|
Chris@14
|
114 deallocate(m_glogs_f0_preference_weights);
|
Chris@14
|
115 deallocate(m_glogs_median_correction);
|
Chris@14
|
116 deallocate(m_glogs_sigma_correction);
|
Chris@16
|
117
|
Chris@16
|
118 deallocate(m_f0s);
|
Chris@0
|
119 }
|
Chris@0
|
120
|
Chris@0
|
121 string
|
Chris@0
|
122 FChTransformF0gram::getIdentifier() const {
|
Chris@15
|
123 switch (m_processingMode) {
|
Chris@15
|
124 case ModeF0Gram: return "fchtransformf0gram";
|
Chris@15
|
125 case ModeSpectrogram: return "fchtransformspectrogram";
|
Chris@15
|
126 case ModeRoughSpectrogram: return "fchtransformrough";
|
Chris@15
|
127 }
|
Chris@0
|
128 }
|
Chris@0
|
129
|
Chris@0
|
130 string
|
Chris@0
|
131 FChTransformF0gram::getName() const {
|
Chris@15
|
132 switch (m_processingMode) {
|
Chris@15
|
133 case ModeF0Gram: return "Fan Chirp Transform F0gram";
|
Chris@15
|
134 case ModeSpectrogram: return "Fan Chirp Transform Spectrogram";
|
Chris@15
|
135 case ModeRoughSpectrogram: return "Fan Chirp Transform Rough Spectrogram";
|
Chris@15
|
136 }
|
Chris@0
|
137 }
|
Chris@0
|
138
|
Chris@0
|
139 string
|
Chris@0
|
140 FChTransformF0gram::getDescription() const {
|
Chris@15
|
141 switch (m_processingMode) {
|
Chris@15
|
142 case ModeF0Gram:
|
Chris@15
|
143 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010.";
|
Chris@15
|
144 case ModeSpectrogram:
|
Chris@15
|
145 return "This plug-in produces a spectral representation of the audio using Fan Chirp Transform analysis.";
|
Chris@15
|
146 case ModeRoughSpectrogram:
|
Chris@15
|
147 return "This plug-in produces a more approximate spectral representation of the audio using Fan Chirp Transform analysis.";
|
Chris@15
|
148 }
|
Chris@0
|
149 }
|
Chris@0
|
150
|
Chris@0
|
151 string
|
Chris@0
|
152 FChTransformF0gram::getMaker() const {
|
Chris@0
|
153 // Your name here
|
Chris@0
|
154 return "Audio Processing Group \n Universidad de la Republica";
|
Chris@0
|
155 }
|
Chris@0
|
156
|
Chris@0
|
157 int
|
Chris@0
|
158 FChTransformF0gram::getPluginVersion() const {
|
Chris@0
|
159 // Increment this each time you release a version that behaves
|
Chris@0
|
160 // differently from the previous one
|
Chris@0
|
161 //
|
Chris@0
|
162 // 0 - initial version from scratch
|
Chris@15
|
163 return 1;
|
Chris@0
|
164 }
|
Chris@0
|
165
|
Chris@0
|
166 string
|
Chris@0
|
167 FChTransformF0gram::getCopyright() const {
|
Chris@0
|
168 // This function is not ideally named. It does not necessarily
|
Chris@0
|
169 // need to say who made the plugin -- getMaker does that -- but it
|
Chris@0
|
170 // should indicate the terms under which it is distributed. For
|
Chris@0
|
171 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@0
|
172 return "copyright (C) 2011 GPL - Audio Processing Group, UdelaR";
|
Chris@0
|
173 }
|
Chris@0
|
174
|
Chris@0
|
175 FChTransformF0gram::InputDomain
|
Chris@0
|
176 FChTransformF0gram::getInputDomain() const {
|
Chris@0
|
177 return TimeDomain;
|
Chris@0
|
178 }
|
Chris@0
|
179
|
Chris@0
|
180 size_t FChTransformF0gram::getPreferredBlockSize() const {
|
Chris@0
|
181 return 8192; // 0 means "I can handle any block size"
|
Chris@0
|
182 }
|
Chris@0
|
183
|
Chris@0
|
184 size_t
|
Chris@0
|
185 FChTransformF0gram::getPreferredStepSize() const {
|
Chris@0
|
186 return 256; // 0 means "anything sensible"; in practice this
|
Chris@0
|
187 // means the same as the block size for TimeDomain
|
Chris@0
|
188 // plugins, or half of it for FrequencyDomain plugins
|
Chris@0
|
189 }
|
Chris@0
|
190
|
Chris@0
|
191 size_t
|
Chris@0
|
192 FChTransformF0gram::getMinChannelCount() const {
|
Chris@0
|
193 return 1;
|
Chris@0
|
194 }
|
Chris@0
|
195
|
Chris@0
|
196 size_t
|
Chris@0
|
197 FChTransformF0gram::getMaxChannelCount() const {
|
Chris@0
|
198 return 1;
|
Chris@0
|
199 }
|
Chris@0
|
200
|
Chris@0
|
201 FChTransformF0gram::ParameterList
|
Chris@0
|
202 FChTransformF0gram::getParameterDescriptors() const {
|
Chris@0
|
203 ParameterList list;
|
Chris@0
|
204
|
Chris@0
|
205 // If the plugin has no adjustable parameters, return an empty
|
Chris@0
|
206 // list here (and there's no need to provide implementations of
|
Chris@0
|
207 // getParameter and setParameter in that case either).
|
Chris@0
|
208
|
Chris@0
|
209 // Note that it is your responsibility to make sure the parameters
|
Chris@0
|
210 // start off having their default values (e.g. in the constructor
|
Chris@0
|
211 // above). The host needs to know the default value so it can do
|
Chris@0
|
212 // things like provide a "reset to default" function, but it will
|
Chris@0
|
213 // not explicitly set your parameters to their defaults for you if
|
Chris@0
|
214 // they have not changed in the mean time.
|
Chris@0
|
215
|
Chris@0
|
216 // ============= WARPING PARAMETERS =============
|
Chris@0
|
217
|
Chris@0
|
218 ParameterDescriptor fmax;
|
Chris@0
|
219 fmax.identifier = "fmax";
|
Chris@0
|
220 fmax.name = "Maximum frequency";
|
Chris@0
|
221 fmax.description = "Maximum frequency of interest for the analysis.";
|
Chris@0
|
222 fmax.unit = "Hz";
|
Chris@0
|
223 fmax.minValue = 2000;
|
Chris@0
|
224 fmax.maxValue = 22050;
|
Chris@0
|
225 fmax.defaultValue = 10000;
|
Chris@0
|
226 fmax.isQuantized = true;
|
Chris@0
|
227 fmax.quantizeStep = 1.0;
|
Chris@0
|
228 list.push_back(fmax);
|
Chris@0
|
229
|
Chris@0
|
230 ParameterDescriptor nsamp;
|
Chris@0
|
231 nsamp.identifier = "nsamp";
|
Chris@0
|
232 nsamp.name = "Number of samples";
|
Chris@0
|
233 nsamp.description = "Number of samples of the time warped frame";
|
Chris@0
|
234 nsamp.unit = "samples";
|
Chris@0
|
235 nsamp.minValue = 128;
|
Chris@0
|
236 nsamp.maxValue = 4096;
|
Chris@0
|
237 nsamp.defaultValue = 2048;
|
Chris@0
|
238 nsamp.isQuantized = true;
|
Chris@0
|
239 nsamp.quantizeStep = 1.0;
|
Chris@0
|
240 list.push_back(nsamp);
|
Chris@0
|
241
|
Chris@0
|
242 ParameterDescriptor nfft;
|
Chris@0
|
243 nfft.identifier = "nfft";
|
Chris@0
|
244 nfft.name = "FFT number of points";
|
Chris@0
|
245 nfft.description = "Number of FFT points (controls zero-padding)";
|
Chris@0
|
246 nfft.unit = "samples";
|
Chris@0
|
247 nfft.minValue = 0;
|
Chris@0
|
248 nfft.maxValue = 4;
|
Chris@0
|
249 nfft.defaultValue = 3;
|
Chris@0
|
250 nfft.isQuantized = true;
|
Chris@0
|
251 nfft.quantizeStep = 1.0;
|
Chris@0
|
252 nfft.valueNames.push_back("256");
|
Chris@0
|
253 nfft.valueNames.push_back("512");
|
Chris@0
|
254 nfft.valueNames.push_back("1024");
|
Chris@0
|
255 nfft.valueNames.push_back("2048");
|
Chris@0
|
256 nfft.valueNames.push_back("4096");
|
Chris@0
|
257 nfft.valueNames.push_back("8192");
|
Chris@0
|
258 list.push_back(nfft);
|
Chris@0
|
259
|
Chris@0
|
260 ParameterDescriptor alpha_max;
|
Chris@0
|
261 alpha_max.identifier = "alpha_max";
|
Chris@0
|
262 alpha_max.name = "Maximum alpha value";
|
Chris@0
|
263 alpha_max.description = "Maximum value for the alpha parameter of the transform.";
|
Chris@0
|
264 alpha_max.unit = "Hz/s";
|
Chris@0
|
265 alpha_max.minValue = -10;
|
Chris@0
|
266 alpha_max.maxValue = 10;
|
Chris@0
|
267 alpha_max.defaultValue = 5;
|
Chris@0
|
268 alpha_max.isQuantized = true;
|
Chris@0
|
269 alpha_max.quantizeStep = 1.0;
|
Chris@0
|
270 list.push_back(alpha_max);
|
Chris@0
|
271
|
Chris@0
|
272 ParameterDescriptor num_warps;
|
Chris@0
|
273 num_warps.identifier = "num_warps";
|
Chris@0
|
274 num_warps.name = "Number of warpings";
|
Chris@0
|
275 num_warps.description = "Number of different warpings in the specified range (must be odd).";
|
Chris@0
|
276 num_warps.unit = "";
|
Chris@0
|
277 num_warps.minValue = 1;
|
Chris@0
|
278 num_warps.maxValue = 101;
|
Chris@0
|
279 num_warps.defaultValue = 21;
|
Chris@0
|
280 num_warps.isQuantized = true;
|
Chris@0
|
281 num_warps.quantizeStep = 2.0;
|
Chris@0
|
282 list.push_back(num_warps);
|
Chris@0
|
283
|
Chris@0
|
284 ParameterDescriptor alpha_dist;
|
Chris@0
|
285 alpha_dist.identifier = "alpha_dist";
|
Chris@0
|
286 alpha_dist.name = "alpha distribution";
|
Chris@0
|
287 alpha_dist.description = "Type of distribution of alpha values (linear or log).";
|
Chris@0
|
288 alpha_dist.unit = "";
|
Chris@0
|
289 alpha_dist.minValue = 0;
|
Chris@0
|
290 alpha_dist.maxValue = 1;
|
Chris@0
|
291 alpha_dist.defaultValue = 1;
|
Chris@0
|
292 alpha_dist.isQuantized = true;
|
Chris@0
|
293 alpha_dist.quantizeStep = 1.0;
|
Chris@0
|
294 // lin (0), log (1)
|
Chris@0
|
295 alpha_dist.valueNames.push_back("lin");
|
Chris@0
|
296 alpha_dist.valueNames.push_back("log");
|
Chris@0
|
297 list.push_back(alpha_dist);
|
Chris@0
|
298
|
Chris@0
|
299 // ============= F0-GRAM PARAMETERS =============
|
Chris@0
|
300
|
Chris@0
|
301 ParameterDescriptor f0min;
|
Chris@0
|
302 f0min.identifier = "f0min";
|
Chris@0
|
303 f0min.name = "min f0";
|
Chris@0
|
304 f0min.description = "Minimum fundamental frequency (f0) value.";
|
Chris@0
|
305 f0min.unit = "Hz";
|
Chris@0
|
306 f0min.minValue = 1;
|
Chris@0
|
307 f0min.maxValue = 500;
|
Chris@0
|
308 f0min.defaultValue = 80;
|
Chris@0
|
309 f0min.isQuantized = true;
|
Chris@0
|
310 f0min.quantizeStep = 1.0;
|
Chris@0
|
311 list.push_back(f0min);
|
Chris@0
|
312
|
Chris@0
|
313 ParameterDescriptor num_octs;
|
Chris@0
|
314 num_octs.identifier = "num_octs";
|
Chris@0
|
315 num_octs.name = "number of octaves";
|
Chris@0
|
316 num_octs.description = "Number of octaves for F0gram computation.";
|
Chris@0
|
317 num_octs.unit = "";
|
Chris@0
|
318 num_octs.minValue = 1;
|
Chris@0
|
319 num_octs.maxValue = 10;
|
Chris@0
|
320 num_octs.defaultValue = 4;
|
Chris@0
|
321 num_octs.isQuantized = true;
|
Chris@0
|
322 num_octs.quantizeStep = 1.0;
|
Chris@0
|
323 list.push_back(num_octs);
|
Chris@0
|
324
|
Chris@0
|
325 ParameterDescriptor num_f0_hyps;
|
Chris@0
|
326 num_f0_hyps.identifier = "num_f0_hyps";
|
Chris@0
|
327 num_f0_hyps.name = "number of f0 hypotesis";
|
Chris@0
|
328 num_f0_hyps.description = "Number of f0 hypotesis to extract.";
|
Chris@0
|
329 num_f0_hyps.unit = "";
|
Chris@0
|
330 num_f0_hyps.minValue = 1;
|
Chris@0
|
331 num_f0_hyps.maxValue = 100;
|
Chris@0
|
332 num_f0_hyps.defaultValue = 10;
|
Chris@0
|
333 num_f0_hyps.isQuantized = true;
|
Chris@0
|
334 num_f0_hyps.quantizeStep = 1.0;
|
Chris@0
|
335 list.push_back(num_f0_hyps);
|
Chris@0
|
336
|
Chris@0
|
337 ParameterDescriptor f0s_per_oct;
|
Chris@0
|
338 f0s_per_oct.identifier = "f0s_per_oct";
|
Chris@0
|
339 f0s_per_oct.name = "f0 values per octave";
|
Chris@0
|
340 f0s_per_oct.description = "Number of f0 values per octave.";
|
Chris@0
|
341 f0s_per_oct.unit = "";
|
Chris@0
|
342 f0s_per_oct.minValue = 12;
|
Chris@0
|
343 f0s_per_oct.maxValue = 768;
|
Chris@0
|
344 f0s_per_oct.defaultValue = 192;
|
Chris@0
|
345 f0s_per_oct.isQuantized = true;
|
Chris@0
|
346 f0s_per_oct.quantizeStep = 1.0;
|
Chris@0
|
347 list.push_back(f0s_per_oct);
|
Chris@0
|
348
|
Chris@0
|
349 ParameterDescriptor f0_prefer_fun;
|
Chris@0
|
350 f0_prefer_fun.identifier = "f0_prefer_fun";
|
Chris@0
|
351 f0_prefer_fun.name = "f0 preference function";
|
Chris@0
|
352 f0_prefer_fun.description = "Whether to use a f0 weighting function.";
|
Chris@0
|
353 f0_prefer_fun.unit = "";
|
Chris@0
|
354 f0_prefer_fun.minValue = 0;
|
Chris@0
|
355 f0_prefer_fun.maxValue = 1;
|
Chris@0
|
356 f0_prefer_fun.defaultValue = 1;
|
Chris@0
|
357 f0_prefer_fun.isQuantized = true;
|
Chris@0
|
358 f0_prefer_fun.quantizeStep = 1.0;
|
Chris@0
|
359 list.push_back(f0_prefer_fun);
|
Chris@0
|
360
|
Chris@0
|
361 ParameterDescriptor f0_prefer_mean;
|
Chris@0
|
362 f0_prefer_mean.identifier = "f0_prefer_mean";
|
Chris@0
|
363 f0_prefer_mean.name = "mean f0 preference function";
|
Chris@0
|
364 f0_prefer_mean.description = "Mean value for f0 weighting function (MIDI number).";
|
Chris@0
|
365 f0_prefer_mean.unit = "";
|
Chris@0
|
366 f0_prefer_mean.minValue = 1;
|
Chris@0
|
367 f0_prefer_mean.maxValue = 127;
|
Chris@0
|
368 f0_prefer_mean.defaultValue = 60;
|
Chris@0
|
369 f0_prefer_mean.isQuantized = true;
|
Chris@0
|
370 f0_prefer_mean.quantizeStep = 1.0;
|
Chris@0
|
371 list.push_back(f0_prefer_mean);
|
Chris@0
|
372
|
Chris@0
|
373 ParameterDescriptor f0_prefer_stdev;
|
Chris@0
|
374 f0_prefer_stdev.identifier = "f0_prefer_stdev";
|
Chris@0
|
375 f0_prefer_stdev.name = "stdev of f0 preference function";
|
Chris@0
|
376 f0_prefer_stdev.description = "Stdev for f0 weighting function (MIDI number).";
|
Chris@0
|
377 f0_prefer_stdev.unit = "";
|
Chris@0
|
378 f0_prefer_stdev.minValue = 1;
|
Chris@0
|
379 f0_prefer_stdev.maxValue = 127;
|
Chris@0
|
380 f0_prefer_stdev.defaultValue = 18;
|
Chris@0
|
381 f0_prefer_stdev.isQuantized = true;
|
Chris@0
|
382 f0_prefer_stdev.quantizeStep = 1.0;
|
Chris@0
|
383 list.push_back(f0_prefer_stdev);
|
Chris@0
|
384
|
Chris@0
|
385 ParameterDescriptor f0gram_mode;
|
Chris@0
|
386 f0gram_mode.identifier = "f0gram_mode";
|
Chris@0
|
387 f0gram_mode.name = "display mode of f0gram";
|
Chris@0
|
388 f0gram_mode.description = "Display all bins of the best direction, or the best bin for each direction.";
|
Chris@0
|
389 f0gram_mode.unit = "";
|
Chris@0
|
390 f0gram_mode.minValue = 0;
|
Chris@0
|
391 f0gram_mode.maxValue = 1;
|
Chris@0
|
392 f0gram_mode.defaultValue = 1;
|
Chris@0
|
393 f0gram_mode.isQuantized = true;
|
Chris@0
|
394 f0gram_mode.quantizeStep = 1.0;
|
Chris@0
|
395 list.push_back(f0gram_mode);
|
Chris@0
|
396
|
Chris@0
|
397 return list;
|
Chris@0
|
398 }
|
Chris@0
|
399
|
Chris@0
|
400 float
|
Chris@0
|
401 FChTransformF0gram::getParameter(string identifier) const {
|
Chris@0
|
402
|
Chris@0
|
403 if (identifier == "fmax") {
|
Chris@0
|
404 return m_fmax;
|
Chris@0
|
405 } else if (identifier == "nsamp") {
|
Chris@0
|
406 return m_warp_params.nsamps_twarp;
|
Chris@0
|
407 } else if (identifier == "alpha_max") {
|
Chris@0
|
408 return m_warp_params.alpha_max;
|
Chris@0
|
409 } else if (identifier == "num_warps") {
|
Chris@0
|
410 return m_warp_params.num_warps;
|
Chris@0
|
411 } else if (identifier == "alpha_dist") {
|
Chris@0
|
412 return m_warp_params.alpha_dist;
|
Chris@0
|
413 } else if (identifier == "nfft") {
|
Chris@0
|
414 return m_nfft;
|
Chris@0
|
415 } else if (identifier == "f0min") {
|
Chris@0
|
416 return m_f0_params.f0min;
|
Chris@0
|
417 } else if (identifier == "num_octs") {
|
Chris@0
|
418 return m_f0_params.num_octs;
|
Chris@0
|
419 } else if (identifier == "f0s_per_oct") {
|
Chris@0
|
420 return m_f0_params.num_f0s_per_oct;
|
Chris@0
|
421 } else if (identifier == "num_f0_hyps") {
|
Chris@0
|
422 return m_f0_params.num_f0_hyps;
|
Chris@0
|
423 } else if (identifier == "f0_prefer_fun") {
|
Chris@0
|
424 return m_f0_params.prefer;
|
Chris@0
|
425 } else if (identifier == "f0_prefer_mean") {
|
Chris@0
|
426 return m_f0_params.prefer_mean;
|
Chris@0
|
427 } else if (identifier == "f0_prefer_stdev") {
|
Chris@0
|
428 return m_f0_params.prefer_stdev;
|
Chris@7
|
429 } else if (identifier == "f0gram_mode") {
|
Chris@15
|
430 return m_f0gram_mode == BestBinOfAllDirections ? 1.0 : 0.0;
|
Chris@0
|
431 } else {
|
Chris@0
|
432 return 0.f;
|
Chris@0
|
433 }
|
Chris@0
|
434
|
Chris@0
|
435 }
|
Chris@0
|
436
|
Chris@15
|
437 void FChTransformF0gram::setParameter(string identifier, float value)
|
Chris@15
|
438 {
|
Chris@0
|
439 if (identifier == "fmax") {
|
Chris@0
|
440 m_fmax = value;
|
Chris@0
|
441 } else if (identifier == "nsamp") {
|
Chris@0
|
442 m_warp_params.nsamps_twarp = value;
|
Chris@0
|
443 } else if (identifier == "alpha_max") {
|
Chris@0
|
444 m_warp_params.alpha_max = value;
|
Chris@0
|
445 } else if (identifier == "num_warps") {
|
Chris@0
|
446 m_warp_params.num_warps = value;
|
Chris@0
|
447 } else if (identifier == "alpha_dist") {
|
Chris@0
|
448 m_warp_params.alpha_dist = value;
|
Chris@0
|
449 } else if (identifier == "nfft") {
|
Chris@0
|
450 m_nfft = value;
|
Chris@0
|
451 } else if (identifier == "f0min") {
|
Chris@0
|
452 m_f0_params.f0min = value;
|
Chris@0
|
453 } else if (identifier == "num_octs") {
|
Chris@0
|
454 m_f0_params.num_octs = value;
|
Chris@0
|
455 } else if (identifier == "f0s_per_oct") {
|
Chris@0
|
456 m_f0_params.num_f0s_per_oct = value;
|
Chris@0
|
457 } else if (identifier == "num_f0_hyps") {
|
Chris@0
|
458 m_f0_params.num_f0_hyps = value;
|
Chris@0
|
459 } else if (identifier == "f0_prefer_fun") {
|
Chris@0
|
460 m_f0_params.prefer = value;
|
Chris@0
|
461 } else if (identifier == "f0_prefer_mean") {
|
Chris@0
|
462 m_f0_params.prefer_mean = value;
|
Chris@0
|
463 } else if (identifier == "f0_prefer_stdev") {
|
Chris@0
|
464 m_f0_params.prefer_stdev = value;
|
Chris@0
|
465 } else if (identifier == "f0gram_mode") {
|
Chris@15
|
466 m_f0gram_mode = (value > 0.5 ?
|
Chris@15
|
467 BestBinOfAllDirections :
|
Chris@15
|
468 AllBinsOfBestDirection);
|
Chris@15
|
469 } else {
|
Chris@15
|
470 cerr << "WARNING: Unknown parameter id \""
|
Chris@15
|
471 << identifier << "\"" << endl;
|
Chris@0
|
472 }
|
Chris@0
|
473 }
|
Chris@0
|
474
|
Chris@0
|
475 FChTransformF0gram::ProgramList
|
Chris@0
|
476 FChTransformF0gram::getPrograms() const {
|
Chris@0
|
477 ProgramList list;
|
Chris@0
|
478 return list;
|
Chris@0
|
479 }
|
Chris@0
|
480
|
Chris@0
|
481 FChTransformF0gram::OutputList
|
Chris@0
|
482 FChTransformF0gram::getOutputDescriptors() const {
|
Chris@0
|
483
|
Chris@0
|
484 OutputList list;
|
Chris@0
|
485
|
Chris@16
|
486 vector<string> labels;
|
Chris@16
|
487 char label[100];
|
Chris@0
|
488
|
Chris@16
|
489 if (m_processingMode == ModeF0Gram) {
|
Chris@16
|
490
|
Chris@16
|
491 /* f0 values of F0gram grid as string values */
|
Chris@16
|
492 for (int i = 0; i < m_num_f0s; ++i) {
|
Chris@16
|
493 sprintf(label, "%4.2f Hz", m_f0s[i]);
|
Chris@16
|
494 labels.push_back(label);
|
Chris@16
|
495 }
|
Chris@16
|
496
|
Chris@16
|
497 /* The F0gram */
|
Chris@16
|
498 OutputDescriptor d;
|
Chris@16
|
499 d.identifier = "f0gram";
|
Chris@16
|
500 d.name = "F0gram: salience of f0s";
|
Chris@16
|
501 d.description = "This representation show the salience of the different f0s in the signal.";
|
Chris@16
|
502 d.hasFixedBinCount = true;
|
Chris@16
|
503 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
|
Chris@16
|
504 d.binNames = labels;
|
Chris@16
|
505 d.hasKnownExtents = false;
|
Chris@16
|
506 d.isQuantized = false;
|
Chris@16
|
507 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@16
|
508 d.hasDuration = false;
|
Chris@16
|
509 list.push_back(d);
|
Chris@16
|
510
|
Chris@16
|
511 } else {
|
Chris@16
|
512
|
Chris@16
|
513 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; ++i) {
|
Chris@16
|
514 double freq = i * (m_warpings.fs_warp / m_nfft);
|
Chris@16
|
515 sprintf(label, "%4.2f Hz", freq);
|
Chris@16
|
516 labels.push_back(label);
|
Chris@16
|
517 }
|
Chris@16
|
518
|
Chris@16
|
519 OutputDescriptor d;
|
Chris@16
|
520 d.identifier = "spectrogram";
|
Chris@16
|
521 d.name = "Spectrogram";
|
Chris@16
|
522 d.description = "Time/frequency spectrogram derived from the Fan Chirp Transform output";
|
Chris@16
|
523 d.hasFixedBinCount = true;
|
Chris@16
|
524 d.binCount = m_warp_params.nsamps_twarp/2+1;
|
Chris@16
|
525 d.binNames = labels;
|
Chris@16
|
526 d.hasKnownExtents = false;
|
Chris@16
|
527 d.isQuantized = false;
|
Chris@16
|
528 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@16
|
529 d.hasDuration = false;
|
Chris@16
|
530 list.push_back(d);
|
Chris@0
|
531 }
|
Chris@16
|
532
|
Chris@0
|
533 return list;
|
Chris@0
|
534 }
|
Chris@0
|
535
|
Chris@0
|
536 bool
|
Chris@0
|
537 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) {
|
Chris@0
|
538 if (channels < getMinChannelCount() ||
|
Chris@14
|
539 channels > getMaxChannelCount()) {
|
Chris@14
|
540 return false;
|
Chris@14
|
541 }
|
Chris@0
|
542
|
Chris@0
|
543 // set blockSize and stepSize (but changed below)
|
Chris@0
|
544 m_blockSize = blockSize;
|
Chris@0
|
545 m_stepSize = stepSize;
|
Chris@0
|
546
|
Chris@0
|
547 // WARNING !!!
|
Chris@0
|
548 // these values in fact are determined by the sampling frequency m_fs
|
Chris@0
|
549 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz
|
Chris@0
|
550 //m_blockSize = 4 * m_warp_params.nsamps_twarp;
|
Chris@16
|
551 // m_stepSize = floor(m_hop / m_warp_params.fact_over_samp);
|
Chris@16
|
552
|
Chris@16
|
553 /* design of FChT */
|
Chris@16
|
554 design_FChT();
|
Chris@0
|
555
|
Chris@0
|
556 /* initialise m_glogs_params */
|
Chris@7
|
557 design_GLogS();
|
Chris@0
|
558
|
Chris@7
|
559 design_LPF();
|
Chris@0
|
560
|
Chris@7
|
561 design_time_window();
|
Chris@0
|
562
|
Chris@7
|
563 // Create Hanning window for warped signals
|
Chris@14
|
564 mp_HanningWindow = allocate<double>(m_warp_params.nsamps_twarp);
|
Chris@7
|
565 bool normalize = false;
|
Chris@14
|
566 Utils::hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize);
|
Chris@0
|
567
|
Chris@16
|
568 m_num_f0s = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
|
Chris@16
|
569 m_f0s = allocate<double>(m_num_f0s);
|
Chris@16
|
570 for (int i = 0; i < m_num_f0s; ++i) {
|
Chris@16
|
571 m_f0s[i] = m_glogs_f0[m_glogs_init_f0s + i];
|
Chris@16
|
572 }
|
Chris@16
|
573
|
Chris@0
|
574 return true;
|
Chris@0
|
575 }
|
Chris@0
|
576
|
Chris@0
|
577 void
|
Chris@0
|
578 FChTransformF0gram::design_GLogS() {
|
Chris@0
|
579
|
Chris@7
|
580 // total number & initial quantity of f0s
|
Chris@16
|
581
|
Chris@16
|
582 cerr << "per oct = " << m_f0_params.num_f0s_per_oct << ", octs = " << m_f0_params.num_octs << endl;
|
Chris@10
|
583 m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1;
|
Chris@16
|
584 cerr << "init_f0s = " << m_glogs_init_f0s << endl;
|
Chris@7
|
585 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s;
|
Chris@16
|
586 cerr << "num_f0s = " << m_glogs_num_f0s << endl;
|
Chris@0
|
587
|
Chris@7
|
588 // Initialize arrays
|
Chris@14
|
589 m_glogs_f0 = allocate<double>(m_glogs_num_f0s);
|
Chris@14
|
590 m_glogs = allocate<double>(m_glogs_num_f0s*m_warp_params.num_warps);
|
Chris@14
|
591 m_glogs_n = allocate<int>(m_glogs_num_f0s);
|
Chris@14
|
592 m_glogs_index = allocate<int>(m_glogs_num_f0s);
|
Chris@0
|
593
|
Chris@7
|
594 // Compute f0 values
|
Chris@7
|
595 m_glogs_harmonic_count = 0;
|
Chris@7
|
596 double factor = (double)(m_warp_params.nsamps_twarp/2)/(double)(m_warp_params.nsamps_twarp/2+1);
|
Chris@10
|
597 for (int i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@7
|
598 m_glogs_f0[i] = (m_f0_params.f0min/5.0)*pow(2.0,(double)i/(double)m_f0_params.num_f0s_per_oct);
|
Chris@7
|
599 // for every f0 compute number of partials less or equal than m_fmax.
|
Chris@7
|
600 m_glogs_n[i] = m_fmax*factor/m_glogs_f0[i];
|
Chris@7
|
601 m_glogs_index[i] = m_glogs_harmonic_count;
|
Chris@7
|
602 m_glogs_harmonic_count += m_glogs_n[i];
|
Chris@7
|
603 }
|
Chris@0
|
604
|
Chris@7
|
605 // Initialize arrays for interpolation
|
Chris@14
|
606 m_glogs_posint = allocate<int>(m_glogs_harmonic_count);
|
Chris@14
|
607 m_glogs_posfrac = allocate<double>(m_glogs_harmonic_count);
|
Chris@14
|
608 m_glogs_interp = allocate<double>(m_glogs_harmonic_count);
|
Chris@0
|
609
|
Chris@7
|
610 // Compute int & frac of interpolation positions
|
Chris@10
|
611 int aux_index = 0;
|
Chris@7
|
612 double aux_pos;
|
Chris@10
|
613 for (int i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@10
|
614 for (int j = 1; j <= m_glogs_n[i]; j++) {
|
Chris@7
|
615 // indice en el vector de largo t_warp/2+1 donde el ultimo valor corresponde a f=m_fmax
|
Chris@7
|
616 aux_pos = ((double)j*m_glogs_f0[i])*((double)(m_warp_params.nsamps_twarp/2+1))/m_fmax;
|
Chris@16
|
617 //!!! cerr << "aux_pos = " << aux_pos << endl;
|
Chris@16
|
618 // aux_pos = ((double)j*m_glogs_f0[i])*((double)(m_warp_params.nsamps_twarp/2+1))/m_warpings.fs_warp;
|
Chris@16
|
619 // cerr << "or " << aux_pos << " (as fs_warp = " << m_warpings.fs_warp << ")" << endl;
|
Chris@10
|
620 m_glogs_posint[aux_index] = (int)aux_pos;
|
Chris@7
|
621 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index];
|
Chris@7
|
622 aux_index++;
|
Chris@7
|
623 }
|
Chris@7
|
624 }
|
Chris@0
|
625
|
Chris@7
|
626 // Third harmonic attenuation
|
Chris@7
|
627 double aux_third_harmonic;
|
Chris@14
|
628 m_glogs_third_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
629 m_glogs_third_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@10
|
630 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
631 aux_third_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(3.0);
|
Chris@10
|
632 m_glogs_third_harmonic_posint[i] = (int)aux_third_harmonic;
|
Chris@7
|
633 m_glogs_third_harmonic_posfrac[i] = aux_third_harmonic - (double)(m_glogs_third_harmonic_posint[i]);
|
Chris@7
|
634 }
|
Chris@14
|
635 m_glogs_third_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@0
|
636
|
Chris@7
|
637 // Fifth harmonic attenuation
|
Chris@7
|
638 double aux_fifth_harmonic;
|
Chris@14
|
639 m_glogs_fifth_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
640 m_glogs_fifth_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@10
|
641 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
642 aux_fifth_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(5.0);
|
Chris@10
|
643 m_glogs_fifth_harmonic_posint[i] = (int)aux_fifth_harmonic;
|
Chris@7
|
644 m_glogs_fifth_harmonic_posfrac[i] = aux_fifth_harmonic - (double)(m_glogs_fifth_harmonic_posint[i]);
|
Chris@7
|
645 }
|
Chris@14
|
646 m_glogs_fifth_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@0
|
647
|
Chris@7
|
648 // Normalization & attenuation windows
|
Chris@14
|
649 m_glogs_f0_preference_weights = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
650 m_glogs_median_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
651 m_glogs_sigma_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
|
Chris@7
|
652 double MIDI_value;
|
Chris@10
|
653 for (int i = 0; i < m_f0_params.num_octs*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
654 MIDI_value = 69.0 + 12.0 * log2(m_glogs_f0[i + m_glogs_init_f0s]/440.0);
|
Chris@7
|
655 m_glogs_f0_preference_weights[i] = 1.0/sqrt(2.0*M_PI*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev)*exp(-(MIDI_value-m_f0_params.prefer_mean)*(MIDI_value-m_f0_params.prefer_mean)/(2.0*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev));
|
Chris@7
|
656 m_glogs_f0_preference_weights[i] = (0.01 + m_glogs_f0_preference_weights[i]) / (1.01);
|
Chris@0
|
657
|
Chris@7
|
658 m_glogs_median_correction[i] = m_glogs_params.median_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.median_poly_coefs[1]*(i+1.0) + m_glogs_params.median_poly_coefs[2];
|
Chris@7
|
659 m_glogs_sigma_correction[i] = 1.0 / (m_glogs_params.sigma_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.sigma_poly_coefs[1]*(i+1.0) + m_glogs_params.sigma_poly_coefs[2]);
|
Chris@7
|
660 }
|
Chris@0
|
661 }
|
Chris@0
|
662
|
Chris@0
|
663 void
|
Chris@0
|
664 FChTransformF0gram::design_FChT() {
|
Chris@0
|
665
|
Chris@0
|
666 /*
|
Chris@0
|
667 * FILES FOR DEBUGGING
|
Chris@0
|
668 */
|
Chris@0
|
669
|
Chris@0
|
670 //ofstream output("output.txt");
|
Chris@0
|
671
|
Chris@0
|
672
|
Chris@0
|
673 /* ============= WARPING DESIGN ============= */
|
Chris@0
|
674
|
Chris@0
|
675 // sampling frequency after oversampling
|
Chris@0
|
676 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs;
|
Chris@0
|
677
|
Chris@0
|
678 // number of samples of the original signal frame
|
Chris@0
|
679 m_warpings.nsamps_torig = 4 * m_warp_params.fact_over_samp * m_warp_params.nsamps_twarp;
|
Chris@0
|
680 // equivalent to: m_warpings.nsamps_torig = m_warp_params.fact_over_samp * m_blockSize;
|
Chris@0
|
681
|
Chris@0
|
682 // time instants of the original signal frame
|
Chris@14
|
683 double *t_orig = allocate<double>(m_warpings.nsamps_torig);
|
Chris@10
|
684 for (int ind = 0; ind < m_warpings.nsamps_torig; ind++) {
|
Chris@0
|
685 t_orig[ind] = ((double)(ind + 1) - (double)m_warpings.nsamps_torig / 2.0) / m_warpings.fs_orig;
|
Chris@0
|
686 }
|
Chris@0
|
687
|
Chris@0
|
688 // linear chirps warping definition as relative frequency deviation
|
Chris@7
|
689 //TODO
|
Chris@14
|
690 double *freq_relative = allocate<double>(m_warpings.nsamps_torig * m_warp_params.num_warps);
|
Chris@0
|
691 define_warps_linear_chirps(freq_relative, t_orig);
|
Chris@0
|
692
|
Chris@0
|
693 // maximum relative frequency deviation
|
Chris@0
|
694 double freq_relative_max = 0;
|
Chris@14
|
695 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
|
Chris@14
|
696 for (int j = 0; j < m_warp_params.num_warps; j++) {
|
Chris@14
|
697 if (freq_relative_max < freq_relative[j * m_warpings.nsamps_torig + i]) {
|
Chris@0
|
698 freq_relative_max = freq_relative[j * m_warpings.nsamps_torig + i];
|
Chris@14
|
699 }
|
Chris@14
|
700 }
|
Chris@14
|
701 }
|
Chris@0
|
702
|
Chris@0
|
703 // sampling frequency of warped signal to be free of aliasing up to fmax
|
Chris@0
|
704 m_warpings.fs_warp = 2 * m_fmax * freq_relative_max;
|
Chris@0
|
705
|
Chris@0
|
706 // time instants of the warped signal frame
|
Chris@14
|
707 double *t_warp = allocate<double>(m_warp_params.nsamps_twarp);
|
Chris@10
|
708 for (int ind = 0; ind < m_warp_params.nsamps_twarp; ind++) {
|
Chris@0
|
709 t_warp[ind] = ((double)((int)(ind + 1)- (int)m_warp_params.nsamps_twarp / 2)) / (double)m_warpings.fs_warp;
|
Chris@0
|
710 }
|
Chris@0
|
711
|
Chris@0
|
712 // design of warpings for efficient interpolation
|
Chris@0
|
713 design_warps(freq_relative, t_orig, t_warp);
|
Chris@0
|
714
|
Chris@0
|
715
|
Chris@0
|
716 /*
|
Chris@0
|
717 * FILES FOR DEBUGGING
|
Chris@0
|
718 */
|
Chris@0
|
719
|
Chris@0
|
720 /*
|
Chris@7
|
721 output << "chirp_rates" << endl;
|
Chris@10
|
722 for (int j = 0; j < m_warp_params.num_warps; j++){
|
Chris@7
|
723 output << m_warpings.chirp_rates[j];
|
Chris@7
|
724 output << " ";
|
Chris@7
|
725 }
|
Chris@7
|
726 output << endl << "freq_relative" << endl;
|
Chris@0
|
727
|
Chris@10
|
728 for (int i = 0; i < m_warpings.nsamps_torig; i++){
|
Chris@10
|
729 for (int j = 0; j < m_warp_params.num_warps; j++){
|
Chris@7
|
730 output << freq_relative[j * m_warpings.nsamps_torig + i];
|
Chris@7
|
731 output << " ";
|
Chris@7
|
732 }
|
Chris@7
|
733 output << endl;
|
Chris@7
|
734 }
|
Chris@0
|
735
|
Chris@7
|
736 output << endl << "t_orig" << endl;
|
Chris@0
|
737
|
Chris@10
|
738 for (int i = 0; i < m_warpings.nsamps_torig; i++){
|
Chris@7
|
739 output << t_orig[i] << endl ;
|
Chris@7
|
740 }
|
Chris@7
|
741 */
|
Chris@0
|
742
|
Chris@14
|
743 deallocate(freq_relative);
|
Chris@14
|
744 deallocate(t_orig);
|
Chris@14
|
745 deallocate(t_warp);
|
Chris@14
|
746
|
Chris@0
|
747 //output.close();
|
Chris@0
|
748
|
Chris@0
|
749 /* ============= FFTW PLAN DESIGN ============= */
|
Chris@7
|
750 // Initialize 2-d array for warped signals
|
Chris@14
|
751 x_warping = allocate<double>(m_warp_params.nsamps_twarp);
|
Chris@14
|
752 m_absFanChirpTransform = allocate<double>(m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1));
|
Chris@14
|
753 m_auxFanChirpTransform = allocate<double>(2 * (m_warp_params.nsamps_twarp/2 + 1));
|
Chris@14
|
754 fft_xwarping = new FFTReal(m_warp_params.nsamps_twarp);
|
Chris@0
|
755 }
|
Chris@0
|
756
|
Chris@0
|
757 void
|
Chris@0
|
758 FChTransformF0gram::design_warps(double * freq_relative, double * t_orig, double * t_warp) {
|
Chris@0
|
759 /* the warping is done by interpolating the original signal in time instants
|
Chris@0
|
760 given by the desired frequency deviation, to do this, the interpolation
|
Chris@0
|
761 instants are stored in a structure as an integer index and a fractional value
|
Chris@0
|
762 hypothesis: sampling frequency at the central point equals the original
|
Chris@7
|
763 */
|
Chris@0
|
764
|
Chris@14
|
765 m_warpings.pos_int = allocate<int>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
|
Chris@14
|
766 m_warpings.pos_frac = allocate<double>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
|
Chris@0
|
767
|
Chris@7
|
768 // vector of phase values
|
Chris@14
|
769 double *phi = allocate<double>(m_warpings.nsamps_torig);
|
Chris@7
|
770 double aux;
|
Chris@0
|
771
|
Chris@7
|
772 // warped positions
|
Chris@14
|
773 double *pos1 = allocate<double>(m_warp_params.nsamps_twarp*m_warp_params.num_warps);
|
Chris@0
|
774
|
Chris@10
|
775 for (int i = 0; i < m_warp_params.num_warps; i++) {
|
Chris@0
|
776
|
Chris@7
|
777 // integration of relative frequency to obtain phase values
|
Chris@14
|
778 Utils::cumtrapz(t_orig, freq_relative + i*(m_warpings.nsamps_torig), m_warpings.nsamps_torig, phi);
|
Chris@0
|
779
|
Chris@7
|
780 // centering of phase values to force original frequency in the middle
|
Chris@7
|
781 aux = phi[m_warpings.nsamps_torig/2];
|
Chris@10
|
782 for (int j = 0; j < m_warpings.nsamps_torig; j++) {
|
Chris@7
|
783 phi[j] -= aux;
|
Chris@7
|
784 } //for
|
Chris@0
|
785
|
Chris@7
|
786 // interpolation of phase values to obtain warped positions
|
Chris@14
|
787 Utils::interp1(phi, t_orig, m_warpings.nsamps_torig, t_warp, pos1 + i*m_warp_params.nsamps_twarp, m_warp_params.nsamps_twarp);
|
Chris@0
|
788 }
|
Chris@0
|
789
|
Chris@0
|
790 // % previous sample index
|
Chris@0
|
791 // pos1_int = uint32(floor(pos1))';
|
Chris@0
|
792 // % integer corresponding to previous sample index in "c"
|
Chris@0
|
793 // warps.pos1_int = (pos1_int - uint32(1));
|
Chris@0
|
794 // % fractional value that defines the warped position
|
Chris@0
|
795 // warps.pos1_frac = (double(pos1)' - double(pos1_int));
|
Chris@0
|
796
|
Chris@10
|
797 for (int j = 0; j < m_warp_params.nsamps_twarp*m_warp_params.num_warps; j++) {
|
Chris@7
|
798 // previous sample index
|
Chris@7
|
799 pos1[j] = pos1[j]*m_warpings.fs_orig + m_warpings.nsamps_torig/2 + 1;
|
Chris@10
|
800 m_warpings.pos_int[j] = (int) pos1[j];
|
Chris@7
|
801 m_warpings.pos_frac[j] = pos1[j] - (double)(m_warpings.pos_int[j]);
|
Chris@7
|
802 } //for
|
Chris@0
|
803
|
Chris@14
|
804 deallocate(phi);
|
Chris@14
|
805 deallocate(pos1);
|
Chris@0
|
806 }
|
Chris@0
|
807
|
Chris@0
|
808 void
|
Chris@0
|
809 FChTransformF0gram::define_warps_linear_chirps(double * freq_relative, double * t_orig) {
|
Chris@0
|
810 /** define warps as relative frequency deviation from original frequency
|
Chris@7
|
811 t_orig : time vector
|
Chris@7
|
812 freq_relative : relative frequency deviations
|
Chris@7
|
813 */
|
Chris@0
|
814 if (m_warp_params.alpha_dist == 0) {
|
Chris@0
|
815
|
Chris@0
|
816 // linear alpha values spacing
|
Chris@14
|
817 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
|
Chris@0
|
818 // WARNING m_warp_params.num_warps must be odd
|
Chris@0
|
819 m_warpings.chirp_rates[0] = -m_warp_params.alpha_max;
|
Chris@0
|
820 double increment = (double) m_warp_params.alpha_max / ((m_warp_params.num_warps - 1) / 2);
|
Chris@0
|
821
|
Chris@10
|
822 for (int ind = 1; ind < m_warp_params.num_warps; ind++) {
|
Chris@0
|
823 m_warpings.chirp_rates[ind] = m_warpings.chirp_rates[ind - 1] + increment;
|
Chris@0
|
824 }
|
Chris@0
|
825 // force zero value
|
Chris@0
|
826 m_warpings.chirp_rates[(int) ((m_warp_params.num_warps - 1) / 2)] = 0;
|
Chris@0
|
827
|
Chris@0
|
828 } else {
|
Chris@0
|
829 // log alpha values spacing
|
Chris@14
|
830 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
|
Chris@0
|
831
|
Chris@0
|
832 // force zero value
|
Chris@0
|
833 int middle_point = (int) ((m_warp_params.num_warps - 1) / 2);
|
Chris@0
|
834 m_warpings.chirp_rates[middle_point] = 0;
|
Chris@0
|
835
|
Chris@0
|
836 double logMax = log10(m_warp_params.alpha_max + 1);
|
Chris@0
|
837 double increment = logMax / ((m_warp_params.num_warps - 1) / 2.0f);
|
Chris@0
|
838 double exponent = 0;
|
Chris@0
|
839
|
Chris@0
|
840 // fill positive values
|
Chris@0
|
841 int ind_log = middle_point;
|
Chris@10
|
842 for (int ind = 0; ind < (m_warp_params.num_warps + 1) / 2; ind++) {
|
Chris@0
|
843 m_warpings.chirp_rates[ind_log] = pow(10, exponent) - 1;
|
Chris@0
|
844 exponent += increment;
|
Chris@0
|
845 ind_log++;
|
Chris@0
|
846 }
|
Chris@0
|
847 // fill negative values
|
Chris@10
|
848 for (int ind = 0; ind < (m_warp_params.num_warps - 1) / 2; ind++) {
|
Chris@0
|
849 m_warpings.chirp_rates[ind] = -m_warpings.chirp_rates[m_warp_params.num_warps - 1 - ind];
|
Chris@0
|
850 }
|
Chris@0
|
851 }
|
Chris@0
|
852
|
Chris@0
|
853 // compute relative frequency deviation
|
Chris@14
|
854 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
|
Chris@14
|
855 for (int j = 0; j < m_warp_params.num_warps; j++) {
|
Chris@0
|
856 freq_relative[j * m_warpings.nsamps_torig + i] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
|
Chris@14
|
857 }
|
Chris@14
|
858 }
|
Chris@0
|
859 }
|
Chris@0
|
860
|
Chris@0
|
861 void
|
Chris@14
|
862 FChTransformF0gram::design_LPF()
|
Chris@14
|
863 {
|
Chris@14
|
864 double *lp_LPFWindow_aux = allocate<double>(m_blockSize/2+1);
|
Chris@14
|
865 mp_LPFWindow = allocate<double>(m_blockSize/2+1);
|
Chris@0
|
866
|
Chris@10
|
867 int i_max = (int) ((2.0*m_fmax/m_fs) * ( (double)m_blockSize / 2.0 + 1.0 ));
|
Chris@10
|
868 for (int i = 0; i < m_blockSize/2+1; i++) {
|
Chris@0
|
869 if (i >= i_max) {
|
Chris@0
|
870 lp_LPFWindow_aux[i] = 0.0;
|
Chris@0
|
871 } else {
|
Chris@0
|
872 lp_LPFWindow_aux[i] = 1.0;
|
Chris@0
|
873 }
|
Chris@0
|
874 }
|
Chris@14
|
875
|
Chris@14
|
876 LPF_time = allocate_and_zero<double>(m_warpings.nsamps_torig);
|
Chris@14
|
877 LPF_frequency = allocate_and_zero<double>(2 * (m_warpings.nsamps_torig/2 + 1));
|
Chris@14
|
878
|
Chris@14
|
879 fft_forward_LPF = new FFTReal(m_blockSize);
|
Chris@14
|
880 fft_inverse_LPF = new FFTReal(m_warpings.nsamps_torig);
|
Chris@0
|
881
|
Chris@10
|
882 int winWidth = 11;
|
Chris@14
|
883 double *lp_hanningWindow = allocate<double>(winWidth);
|
Chris@0
|
884 double accum=0;
|
Chris@10
|
885 for (int i = 0; i < winWidth; i++) {
|
Chris@0
|
886 lp_hanningWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)winWidth+1.0)));
|
Chris@0
|
887 accum+=lp_hanningWindow[i];
|
Chris@0
|
888
|
Chris@0
|
889 }
|
Chris@10
|
890 for (int i = 0; i < winWidth; i++) { //window normalization
|
Chris@0
|
891 lp_hanningWindow[i]=lp_hanningWindow[i]/accum;
|
Chris@0
|
892 }
|
Chris@10
|
893 for (int i = 0; i < m_blockSize/2+1; i++) {
|
Chris@0
|
894 //if (((i-(winWidth-1)/2)<0)||(i+(winWidth-1))/2>m_blockSize/2-1) {//consideramos winWidth impar, si la ventana sale del arreglo se rellena con el valor origianl
|
Chris@7
|
895 if ( (i > (i_max + (winWidth-1)/2)) || (i <= (i_max - (winWidth-1)/2)) ) {
|
Chris@0
|
896 mp_LPFWindow[i]=lp_LPFWindow_aux[i];
|
Chris@0
|
897 } else {
|
Chris@0
|
898 accum=0;
|
Chris@10
|
899 for (int j = -((winWidth-1)/2); j <= (winWidth-1)/2; j++) {
|
Chris@0
|
900 accum+=lp_LPFWindow_aux[i-j]*lp_hanningWindow[j+(winWidth-1)/2];
|
Chris@7
|
901 }
|
Chris@0
|
902 mp_LPFWindow[i]=accum;
|
Chris@0
|
903 }
|
Chris@0
|
904 }
|
Chris@0
|
905
|
Chris@14
|
906 deallocate(lp_LPFWindow_aux);
|
Chris@14
|
907 deallocate(lp_hanningWindow);
|
Chris@0
|
908 }
|
Chris@0
|
909
|
Chris@14
|
910 void FChTransformF0gram::apply_LPF()
|
Chris@14
|
911 {
|
Chris@14
|
912 fft_forward_LPF->forward(LPF_time, LPF_frequency);
|
Chris@14
|
913
|
Chris@10
|
914 for (int i = 0; i < m_blockSize/2+1; i++) {
|
Chris@16
|
915 LPF_frequency[i*2] *= mp_LPFWindow[i];
|
Chris@16
|
916 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i];
|
Chris@0
|
917 }
|
Chris@14
|
918
|
Chris@14
|
919 fft_inverse_LPF->inverse(LPF_frequency, LPF_time);
|
Chris@0
|
920
|
Chris@7
|
921 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame.
|
Chris@7
|
922 // nota: además de aplicar el LPF, esta función resamplea la señal original.
|
Chris@0
|
923 }
|
Chris@0
|
924
|
Chris@14
|
925 void FChTransformF0gram::clean_LPF()
|
Chris@14
|
926 {
|
Chris@14
|
927 delete fft_forward_LPF;
|
Chris@14
|
928 delete fft_inverse_LPF;
|
Chris@14
|
929 deallocate(LPF_time);
|
Chris@14
|
930 deallocate(LPF_frequency);
|
Chris@14
|
931 deallocate(mp_LPFWindow);
|
Chris@0
|
932 }
|
Chris@0
|
933
|
Chris@14
|
934 void FChTransformF0gram::reset()
|
Chris@14
|
935 {
|
Chris@0
|
936 }
|
Chris@0
|
937
|
Chris@0
|
938 FChTransformF0gram::FeatureSet
|
Chris@5
|
939 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) {
|
Chris@0
|
940
|
Chris@0
|
941 // // Do actual work!
|
Chris@0
|
942 //
|
Chris@0
|
943
|
Chris@7
|
944 /* PSEUDOCÓDIGO:
|
Chris@7
|
945 - Aplicar FFT al frame entero.
|
Chris@7
|
946 - Filtro pasabajos en frecuencia.
|
Chris@7
|
947 - FFT inversa al frame entero.
|
Chris@7
|
948 -----------------------------------------------------------------------------
|
Chris@7
|
949 - Para cada warp: *Si es un espectrograma direccional (un solo warp
|
Chris@7
|
950 => no es para cada warp sino para el elegido)
|
Chris@7
|
951 - Hacer la interpolación con interp1q.
|
Chris@7
|
952 - Aplicar la FFT al frame warpeado.
|
Chris@7
|
953 - (Opcional) GLogS.
|
Chris@7
|
954 - ...
|
Chris@7
|
955 */
|
Chris@0
|
956
|
Chris@0
|
957 //---------------------------------------------------------------------------
|
Chris@7
|
958 FeatureSet fs;
|
Chris@0
|
959
|
Chris@7
|
960 #ifdef DEBUG
|
Chris@16
|
961 fprintf(stderr, "\n ----- DEBUG INFORMATION ----- \n");
|
Chris@16
|
962 fprintf(stderr, " m_fs = %f Hz.\n",m_fs);
|
Chris@16
|
963 fprintf(stderr, " fs_orig = %f Hz.\n",m_warpings.fs_orig);
|
Chris@16
|
964 fprintf(stderr, " fs_warp = %f Hz.\n",m_warpings.fs_warp);
|
Chris@16
|
965 fprintf(stderr, " m_nfft = %d.\n",m_nfft);
|
Chris@16
|
966 fprintf(stderr, " m_blockSize = %d.\n",m_blockSize);
|
Chris@16
|
967 fprintf(stderr, " m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig);
|
Chris@16
|
968 fprintf(stderr, " m_warp_params.num_warps = %d.\n",m_warp_params.num_warps);
|
Chris@16
|
969 fprintf(stderr, " m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count);
|
Chris@7
|
970 #endif
|
Chris@0
|
971
|
Chris@10
|
972 for (int i = 0; i < m_blockSize; i++) {
|
Chris@0
|
973 LPF_time[i] = (double)(inputBuffers[0][i]) * m_timeWindow[i];
|
Chris@16
|
974 LPF_time[m_blockSize+i] = 0.0;
|
Chris@0
|
975 }
|
Chris@0
|
976
|
Chris@0
|
977 // #ifdef DEBUG
|
Chris@16
|
978 // fprintf(stderr, " HASTA ACÁ ANDA!!!\n");
|
Chris@0
|
979 // cout << flush;
|
Chris@0
|
980 // #endif
|
Chris@0
|
981
|
Chris@7
|
982 apply_LPF();
|
Chris@7
|
983 // Señal filtrada queda en LPF_time
|
Chris@0
|
984
|
Chris@7
|
985 Feature feature;
|
Chris@0
|
986 feature.hasTimestamp = false;
|
Chris@0
|
987
|
Chris@15
|
988 if (m_processingMode == ModeRoughSpectrogram) {
|
Chris@15
|
989 feature.values = vector<float>(m_warp_params.nsamps_twarp/2+1, 0.f);
|
Chris@15
|
990 }
|
Chris@15
|
991
|
Chris@0
|
992 // ----------------------------------------------------------------------------------------------
|
Chris@0
|
993 // Hanning window & FFT for all warp directions
|
Chris@0
|
994
|
Chris@7
|
995 double max_glogs = -DBL_MAX;
|
Chris@10
|
996 int ind_max_glogs = 0;
|
Chris@0
|
997
|
Chris@10
|
998 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
|
Chris@16
|
999
|
Chris@7
|
1000 // Interpolate
|
Chris@14
|
1001 Utils::interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp);
|
Chris@0
|
1002
|
Chris@7
|
1003 // Apply window
|
Chris@10
|
1004 for (int i = 0; i < m_warp_params.nsamps_twarp; i++) {
|
Chris@7
|
1005 x_warping[i] *= mp_HanningWindow[i];
|
Chris@7
|
1006 }
|
Chris@0
|
1007
|
Chris@7
|
1008 // Transform
|
Chris@14
|
1009 fft_xwarping->forward(x_warping, m_auxFanChirpTransform);
|
Chris@0
|
1010
|
Chris@15
|
1011 if (m_processingMode == ModeRoughSpectrogram) {
|
Chris@15
|
1012 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
|
Chris@15
|
1013 double abs = sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]);
|
Chris@15
|
1014 if (abs > feature.values[i]) {
|
Chris@15
|
1015 feature.values[i] = abs;
|
Chris@15
|
1016 }
|
Chris@15
|
1017 }
|
Chris@15
|
1018 continue;
|
Chris@15
|
1019 }
|
Chris@15
|
1020
|
Chris@7
|
1021 // Copy result
|
Chris@7
|
1022 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1);
|
Chris@10
|
1023 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
|
Chris@14
|
1024 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]));
|
Chris@7
|
1025 }
|
Chris@0
|
1026
|
Chris@0
|
1027 // -----------------------------------------------------------------------------------------
|
Chris@0
|
1028 // GLogS
|
Chris@14
|
1029 Utils::interp1q(aux_abs_fcht, m_glogs_posint, m_glogs_posfrac, m_glogs_interp, m_glogs_harmonic_count);
|
Chris@10
|
1030 int glogs_ind = 0;
|
Chris@10
|
1031 for (int i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@7
|
1032 double glogs_accum = 0;
|
Chris@10
|
1033 for (int j = 1; j <= m_glogs_n[i]; j++) {
|
Chris@7
|
1034 glogs_accum += m_glogs_interp[glogs_ind++];
|
Chris@7
|
1035 }
|
Chris@7
|
1036 m_glogs[i + i_warp*m_glogs_num_f0s] = glogs_accum/(double)m_glogs_n[i];
|
Chris@7
|
1037 }
|
Chris@0
|
1038
|
Chris@0
|
1039 // Sub/super harmonic correction
|
Chris@14
|
1040 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_third_harmonic_posint, m_glogs_third_harmonic_posfrac, m_glogs_third_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
1041 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_fifth_harmonic_posint, m_glogs_fifth_harmonic_posfrac, m_glogs_fifth_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@10
|
1042 for (int i = m_glogs_num_f0s-1; i >= m_glogs_init_f0s; i--) {
|
Chris@7
|
1043 m_glogs[i + i_warp*m_glogs_num_f0s] -= MAX(MAX(m_glogs[i-m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s],m_glogs_third_harmonic[i-m_glogs_init_f0s]),m_glogs_fifth_harmonic[i-m_glogs_init_f0s]);
|
Chris@7
|
1044 //m_glogs[i] -= MAX(m_glogs[i-m_f0_params.num_f0s_per_oct],m_glogs_third_harmonic[i-m_glogs_init_f0s]);
|
Chris@7
|
1045 }
|
Chris@10
|
1046 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
1047 m_glogs[i + i_warp*m_glogs_num_f0s] -= 0.3*m_glogs[i+m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s];
|
Chris@7
|
1048 // Median, sigma $ weights correction
|
Chris@7
|
1049 m_glogs[i + i_warp*m_glogs_num_f0s] = (m_glogs[i + i_warp*m_glogs_num_f0s]-m_glogs_median_correction[i-m_glogs_init_f0s])*m_glogs_sigma_correction[i-m_glogs_init_f0s]*m_glogs_f0_preference_weights[i-m_glogs_init_f0s];
|
Chris@7
|
1050 }
|
Chris@0
|
1051
|
Chris@7
|
1052 // Look for maximum value to determine best direction
|
Chris@10
|
1053 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
1054 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
|
Chris@7
|
1055 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
|
Chris@7
|
1056 ind_max_glogs = i_warp;
|
Chris@7
|
1057 }
|
Chris@7
|
1058 }
|
Chris@7
|
1059 }
|
Chris@0
|
1060
|
Chris@15
|
1061 if (m_processingMode == ModeRoughSpectrogram) {
|
Chris@15
|
1062
|
Chris@15
|
1063 // already accumulated our return values in feature
|
Chris@15
|
1064
|
Chris@15
|
1065 } else if (m_processingMode == ModeSpectrogram) {
|
Chris@15
|
1066
|
Chris@15
|
1067 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) {
|
Chris@15
|
1068 feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0);
|
Chris@15
|
1069 }
|
Chris@15
|
1070
|
Chris@15
|
1071 } else { // f0gram
|
Chris@15
|
1072
|
Chris@15
|
1073 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) {
|
Chris@15
|
1074 switch (m_f0gram_mode) {
|
Chris@15
|
1075 case AllBinsOfBestDirection:
|
Chris@15
|
1076 feature.values.push_back((float)m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s]);
|
Chris@15
|
1077 break;
|
Chris@15
|
1078 case BestBinOfAllDirections:
|
Chris@15
|
1079 max_glogs = -DBL_MAX;
|
Chris@15
|
1080 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
|
Chris@15
|
1081 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
|
Chris@15
|
1082 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
|
Chris@15
|
1083 ind_max_glogs = i_warp;
|
Chris@15
|
1084 }
|
Chris@7
|
1085 }
|
Chris@15
|
1086 feature.values.push_back((float)max_glogs);
|
Chris@15
|
1087 break;
|
Chris@7
|
1088 }
|
Chris@7
|
1089 }
|
Chris@7
|
1090 }
|
Chris@0
|
1091
|
Chris@7
|
1092 fs[0].push_back(feature);
|
Chris@7
|
1093 return fs;
|
Chris@0
|
1094 }
|
Chris@0
|
1095
|
Chris@0
|
1096 FChTransformF0gram::FeatureSet
|
Chris@0
|
1097 FChTransformF0gram::getRemainingFeatures() {
|
Chris@0
|
1098 return FeatureSet();
|
Chris@0
|
1099 }
|
Chris@0
|
1100
|
Chris@0
|
1101 void
|
Chris@0
|
1102 FChTransformF0gram::design_time_window() {
|
Chris@0
|
1103
|
Chris@10
|
1104 int transitionWidth = (int)m_blockSize/128 + 1;;
|
Chris@14
|
1105 m_timeWindow = allocate<double>(m_blockSize);
|
Chris@14
|
1106 double *lp_transitionWindow = allocate<double>(transitionWidth);
|
Chris@0
|
1107
|
Chris@7
|
1108 //memset(m_timeWindow, 1.0, m_blockSize);
|
Chris@10
|
1109 for (int i = 0; i < m_blockSize; i++) {
|
Chris@7
|
1110 m_timeWindow[i] = 1.0;
|
Chris@7
|
1111 }
|
Chris@0
|
1112
|
Chris@10
|
1113 for (int i = 0; i < transitionWidth; i++) {
|
Chris@0
|
1114 lp_transitionWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)transitionWidth+1.0)));
|
Chris@0
|
1115 }
|
Chris@0
|
1116
|
Chris@10
|
1117 for (int i = 0; i < transitionWidth/2; i++) {
|
Chris@7
|
1118 m_timeWindow[i] = lp_transitionWindow[i];
|
Chris@7
|
1119 m_timeWindow[m_blockSize-1-i] = lp_transitionWindow[transitionWidth-1-i];
|
Chris@7
|
1120 }
|
Chris@0
|
1121
|
Chris@7
|
1122 #ifdef DEBUG
|
Chris@7
|
1123 for (int i = 0; i < m_blockSize; i++) {
|
Chris@7
|
1124 if ((i<transitionWidth)) {
|
Chris@16
|
1125 fprintf(stderr, " m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]);
|
Chris@7
|
1126 }
|
Chris@7
|
1127 }
|
Chris@7
|
1128 #endif
|
Chris@0
|
1129
|
Chris@14
|
1130 deallocate(lp_transitionWindow);
|
Chris@0
|
1131 }
|
Chris@0
|
1132
|