Chris@0
|
1 /*
|
Chris@0
|
2 copyright (C) 2011 I. Irigaray, M. Rocamora
|
Chris@0
|
3
|
Chris@0
|
4 This program is free software: you can redistribute it and/or modify
|
Chris@0
|
5 it under the terms of the GNU General Public License as published by
|
Chris@0
|
6 the Free Software Foundation, either version 3 of the License, or
|
Chris@0
|
7 (at your option) any later version.
|
Chris@0
|
8
|
Chris@0
|
9 This program is distributed in the hope that it will be useful,
|
Chris@0
|
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Chris@0
|
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Chris@0
|
12 GNU General Public License for more details.
|
Chris@0
|
13
|
Chris@0
|
14 You should have received a copy of the GNU General Public License
|
Chris@0
|
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
|
Chris@7
|
16 */
|
Chris@0
|
17
|
Chris@0
|
18 #include "FChTransformF0gram.h"
|
Chris@0
|
19 #include "FChTransformUtils.h"
|
Chris@0
|
20 #include <math.h>
|
Chris@0
|
21 #include <float.h>
|
Chris@0
|
22 //#define DEBUG
|
Chris@7
|
23
|
Chris@0
|
24 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
Chris@0
|
25
|
Chris@0
|
26 FChTransformF0gram::FChTransformF0gram(float inputSampleRate) :
|
Chris@7
|
27 Plugin(inputSampleRate),
|
Chris@7
|
28 m_currentProgram("default"),
|
Chris@7
|
29 m_stepSize(0), // We are using 0 for step and block size to indicate "not yet set".
|
Chris@7
|
30 m_blockSize(0) {
|
Chris@0
|
31
|
Chris@0
|
32 m_fs = inputSampleRate;
|
Chris@0
|
33 // max frequency of interest (Hz)
|
Chris@0
|
34 m_fmax = 10000.f;
|
Chris@0
|
35 // warping parameters
|
Chris@0
|
36 m_warp_params.nsamps_twarp = 2048;
|
Chris@0
|
37 //m_warp_params.nsamps_twarp = 8;
|
Chris@0
|
38 m_warp_params.alpha_max = 4;
|
Chris@0
|
39 m_warp_params.num_warps = 21;
|
Chris@0
|
40 //m_warp_params.num_warps = 11;
|
Chris@0
|
41 m_warp_params.fact_over_samp = 2;
|
Chris@0
|
42 m_warp_params.alpha_dist = 0;
|
Chris@0
|
43 // f0 parameters
|
Chris@0
|
44 m_f0_params.f0min = 80.0;
|
Chris@0
|
45 m_f0_params.num_octs = 4;
|
Chris@0
|
46 m_f0_params.num_f0s_per_oct = 192;
|
Chris@0
|
47 m_f0_params.num_f0_hyps = 5;
|
Chris@0
|
48 m_f0_params.prefer = true;
|
Chris@0
|
49 m_f0_params.prefer_mean = 60;
|
Chris@0
|
50 m_f0_params.prefer_stdev = 18;
|
Chris@0
|
51 // glogs parameters
|
Chris@0
|
52 m_glogs_params.HP_logS = true;
|
Chris@0
|
53 m_glogs_params.att_subharms = 1;
|
Chris@7
|
54 // display parameters
|
Chris@7
|
55 m_f0gram_mode = true;
|
Chris@0
|
56
|
Chris@0
|
57 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
|
Chris@0
|
58 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
|
Chris@0
|
59 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
|
Chris@0
|
60
|
Chris@0
|
61 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
|
Chris@0
|
62 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
|
Chris@0
|
63 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
|
Chris@0
|
64
|
Chris@0
|
65 // number of fft points (controls zero-padding)
|
Chris@0
|
66 m_nfft = m_warp_params.nsamps_twarp;
|
Chris@0
|
67 // hop in samples
|
Chris@0
|
68 m_hop = m_warp_params.fact_over_samp * 256;
|
Chris@0
|
69
|
Chris@0
|
70 m_num_f0s = 0;
|
Chris@0
|
71
|
Chris@0
|
72 }
|
Chris@0
|
73
|
Chris@0
|
74 FChTransformF0gram::~FChTransformF0gram() {
|
Chris@0
|
75 // remeber to delete everything that deserves to
|
Chris@0
|
76 }
|
Chris@0
|
77
|
Chris@0
|
78 string
|
Chris@0
|
79 FChTransformF0gram::getIdentifier() const {
|
Chris@0
|
80 return "fchtransformf0gram";
|
Chris@0
|
81 }
|
Chris@0
|
82
|
Chris@0
|
83 string
|
Chris@0
|
84 FChTransformF0gram::getName() const {
|
Chris@0
|
85 return "Fan Chirp Transform F0gram";
|
Chris@0
|
86 }
|
Chris@0
|
87
|
Chris@0
|
88 string
|
Chris@0
|
89 FChTransformF0gram::getDescription() const {
|
Chris@0
|
90 // Return something helpful here!
|
Chris@0
|
91 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010.";
|
Chris@0
|
92 }
|
Chris@0
|
93
|
Chris@0
|
94 string
|
Chris@0
|
95 FChTransformF0gram::getMaker() const {
|
Chris@0
|
96 // Your name here
|
Chris@0
|
97 return "Audio Processing Group \n Universidad de la Republica";
|
Chris@0
|
98 }
|
Chris@0
|
99
|
Chris@0
|
100 int
|
Chris@0
|
101 FChTransformF0gram::getPluginVersion() const {
|
Chris@0
|
102 // Increment this each time you release a version that behaves
|
Chris@0
|
103 // differently from the previous one
|
Chris@0
|
104 //
|
Chris@0
|
105 // 0 - initial version from scratch
|
Chris@0
|
106 return 0;
|
Chris@0
|
107 }
|
Chris@0
|
108
|
Chris@0
|
109 string
|
Chris@0
|
110 FChTransformF0gram::getCopyright() const {
|
Chris@0
|
111 // This function is not ideally named. It does not necessarily
|
Chris@0
|
112 // need to say who made the plugin -- getMaker does that -- but it
|
Chris@0
|
113 // should indicate the terms under which it is distributed. For
|
Chris@0
|
114 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@0
|
115 return "copyright (C) 2011 GPL - Audio Processing Group, UdelaR";
|
Chris@0
|
116 }
|
Chris@0
|
117
|
Chris@0
|
118 FChTransformF0gram::InputDomain
|
Chris@0
|
119 FChTransformF0gram::getInputDomain() const {
|
Chris@0
|
120 return TimeDomain;
|
Chris@0
|
121 }
|
Chris@0
|
122
|
Chris@0
|
123 size_t FChTransformF0gram::getPreferredBlockSize() const {
|
Chris@0
|
124 return 8192; // 0 means "I can handle any block size"
|
Chris@0
|
125 }
|
Chris@0
|
126
|
Chris@0
|
127 size_t
|
Chris@0
|
128 FChTransformF0gram::getPreferredStepSize() const {
|
Chris@0
|
129 return 256; // 0 means "anything sensible"; in practice this
|
Chris@0
|
130 // means the same as the block size for TimeDomain
|
Chris@0
|
131 // plugins, or half of it for FrequencyDomain plugins
|
Chris@0
|
132 }
|
Chris@0
|
133
|
Chris@0
|
134 size_t
|
Chris@0
|
135 FChTransformF0gram::getMinChannelCount() const {
|
Chris@0
|
136 return 1;
|
Chris@0
|
137 }
|
Chris@0
|
138
|
Chris@0
|
139 size_t
|
Chris@0
|
140 FChTransformF0gram::getMaxChannelCount() const {
|
Chris@0
|
141 return 1;
|
Chris@0
|
142 }
|
Chris@0
|
143
|
Chris@0
|
144 FChTransformF0gram::ParameterList
|
Chris@0
|
145 FChTransformF0gram::getParameterDescriptors() const {
|
Chris@0
|
146 ParameterList list;
|
Chris@0
|
147
|
Chris@0
|
148 // If the plugin has no adjustable parameters, return an empty
|
Chris@0
|
149 // list here (and there's no need to provide implementations of
|
Chris@0
|
150 // getParameter and setParameter in that case either).
|
Chris@0
|
151
|
Chris@0
|
152 // Note that it is your responsibility to make sure the parameters
|
Chris@0
|
153 // start off having their default values (e.g. in the constructor
|
Chris@0
|
154 // above). The host needs to know the default value so it can do
|
Chris@0
|
155 // things like provide a "reset to default" function, but it will
|
Chris@0
|
156 // not explicitly set your parameters to their defaults for you if
|
Chris@0
|
157 // they have not changed in the mean time.
|
Chris@0
|
158
|
Chris@0
|
159 // ============= WARPING PARAMETERS =============
|
Chris@0
|
160
|
Chris@0
|
161 ParameterDescriptor fmax;
|
Chris@0
|
162 fmax.identifier = "fmax";
|
Chris@0
|
163 fmax.name = "Maximum frequency";
|
Chris@0
|
164 fmax.description = "Maximum frequency of interest for the analysis.";
|
Chris@0
|
165 fmax.unit = "Hz";
|
Chris@0
|
166 fmax.minValue = 2000;
|
Chris@0
|
167 fmax.maxValue = 22050;
|
Chris@0
|
168 fmax.defaultValue = 10000;
|
Chris@0
|
169 fmax.isQuantized = true;
|
Chris@0
|
170 fmax.quantizeStep = 1.0;
|
Chris@0
|
171 list.push_back(fmax);
|
Chris@0
|
172
|
Chris@0
|
173 ParameterDescriptor nsamp;
|
Chris@0
|
174 nsamp.identifier = "nsamp";
|
Chris@0
|
175 nsamp.name = "Number of samples";
|
Chris@0
|
176 nsamp.description = "Number of samples of the time warped frame";
|
Chris@0
|
177 nsamp.unit = "samples";
|
Chris@0
|
178 nsamp.minValue = 128;
|
Chris@0
|
179 nsamp.maxValue = 4096;
|
Chris@0
|
180 nsamp.defaultValue = 2048;
|
Chris@0
|
181 nsamp.isQuantized = true;
|
Chris@0
|
182 nsamp.quantizeStep = 1.0;
|
Chris@0
|
183 list.push_back(nsamp);
|
Chris@0
|
184
|
Chris@0
|
185 ParameterDescriptor nfft;
|
Chris@0
|
186 nfft.identifier = "nfft";
|
Chris@0
|
187 nfft.name = "FFT number of points";
|
Chris@0
|
188 nfft.description = "Number of FFT points (controls zero-padding)";
|
Chris@0
|
189 nfft.unit = "samples";
|
Chris@0
|
190 nfft.minValue = 0;
|
Chris@0
|
191 nfft.maxValue = 4;
|
Chris@0
|
192 nfft.defaultValue = 3;
|
Chris@0
|
193 nfft.isQuantized = true;
|
Chris@0
|
194 nfft.quantizeStep = 1.0;
|
Chris@0
|
195 nfft.valueNames.push_back("256");
|
Chris@0
|
196 nfft.valueNames.push_back("512");
|
Chris@0
|
197 nfft.valueNames.push_back("1024");
|
Chris@0
|
198 nfft.valueNames.push_back("2048");
|
Chris@0
|
199 nfft.valueNames.push_back("4096");
|
Chris@0
|
200 nfft.valueNames.push_back("8192");
|
Chris@0
|
201 list.push_back(nfft);
|
Chris@0
|
202
|
Chris@0
|
203 ParameterDescriptor alpha_max;
|
Chris@0
|
204 alpha_max.identifier = "alpha_max";
|
Chris@0
|
205 alpha_max.name = "Maximum alpha value";
|
Chris@0
|
206 alpha_max.description = "Maximum value for the alpha parameter of the transform.";
|
Chris@0
|
207 alpha_max.unit = "Hz/s";
|
Chris@0
|
208 alpha_max.minValue = -10;
|
Chris@0
|
209 alpha_max.maxValue = 10;
|
Chris@0
|
210 alpha_max.defaultValue = 5;
|
Chris@0
|
211 alpha_max.isQuantized = true;
|
Chris@0
|
212 alpha_max.quantizeStep = 1.0;
|
Chris@0
|
213 list.push_back(alpha_max);
|
Chris@0
|
214
|
Chris@0
|
215 ParameterDescriptor num_warps;
|
Chris@0
|
216 num_warps.identifier = "num_warps";
|
Chris@0
|
217 num_warps.name = "Number of warpings";
|
Chris@0
|
218 num_warps.description = "Number of different warpings in the specified range (must be odd).";
|
Chris@0
|
219 num_warps.unit = "";
|
Chris@0
|
220 num_warps.minValue = 1;
|
Chris@0
|
221 num_warps.maxValue = 101;
|
Chris@0
|
222 num_warps.defaultValue = 21;
|
Chris@0
|
223 num_warps.isQuantized = true;
|
Chris@0
|
224 num_warps.quantizeStep = 2.0;
|
Chris@0
|
225 list.push_back(num_warps);
|
Chris@0
|
226
|
Chris@0
|
227 ParameterDescriptor alpha_dist;
|
Chris@0
|
228 alpha_dist.identifier = "alpha_dist";
|
Chris@0
|
229 alpha_dist.name = "alpha distribution";
|
Chris@0
|
230 alpha_dist.description = "Type of distribution of alpha values (linear or log).";
|
Chris@0
|
231 alpha_dist.unit = "";
|
Chris@0
|
232 alpha_dist.minValue = 0;
|
Chris@0
|
233 alpha_dist.maxValue = 1;
|
Chris@0
|
234 alpha_dist.defaultValue = 1;
|
Chris@0
|
235 alpha_dist.isQuantized = true;
|
Chris@0
|
236 alpha_dist.quantizeStep = 1.0;
|
Chris@0
|
237 // lin (0), log (1)
|
Chris@0
|
238 alpha_dist.valueNames.push_back("lin");
|
Chris@0
|
239 alpha_dist.valueNames.push_back("log");
|
Chris@0
|
240 list.push_back(alpha_dist);
|
Chris@0
|
241
|
Chris@0
|
242 // ============= F0-GRAM PARAMETERS =============
|
Chris@0
|
243
|
Chris@0
|
244 ParameterDescriptor f0min;
|
Chris@0
|
245 f0min.identifier = "f0min";
|
Chris@0
|
246 f0min.name = "min f0";
|
Chris@0
|
247 f0min.description = "Minimum fundamental frequency (f0) value.";
|
Chris@0
|
248 f0min.unit = "Hz";
|
Chris@0
|
249 f0min.minValue = 1;
|
Chris@0
|
250 f0min.maxValue = 500;
|
Chris@0
|
251 f0min.defaultValue = 80;
|
Chris@0
|
252 f0min.isQuantized = true;
|
Chris@0
|
253 f0min.quantizeStep = 1.0;
|
Chris@0
|
254 list.push_back(f0min);
|
Chris@0
|
255
|
Chris@0
|
256 ParameterDescriptor num_octs;
|
Chris@0
|
257 num_octs.identifier = "num_octs";
|
Chris@0
|
258 num_octs.name = "number of octaves";
|
Chris@0
|
259 num_octs.description = "Number of octaves for F0gram computation.";
|
Chris@0
|
260 num_octs.unit = "";
|
Chris@0
|
261 num_octs.minValue = 1;
|
Chris@0
|
262 num_octs.maxValue = 10;
|
Chris@0
|
263 num_octs.defaultValue = 4;
|
Chris@0
|
264 num_octs.isQuantized = true;
|
Chris@0
|
265 num_octs.quantizeStep = 1.0;
|
Chris@0
|
266 list.push_back(num_octs);
|
Chris@0
|
267
|
Chris@0
|
268 ParameterDescriptor num_f0_hyps;
|
Chris@0
|
269 num_f0_hyps.identifier = "num_f0_hyps";
|
Chris@0
|
270 num_f0_hyps.name = "number of f0 hypotesis";
|
Chris@0
|
271 num_f0_hyps.description = "Number of f0 hypotesis to extract.";
|
Chris@0
|
272 num_f0_hyps.unit = "";
|
Chris@0
|
273 num_f0_hyps.minValue = 1;
|
Chris@0
|
274 num_f0_hyps.maxValue = 100;
|
Chris@0
|
275 num_f0_hyps.defaultValue = 10;
|
Chris@0
|
276 num_f0_hyps.isQuantized = true;
|
Chris@0
|
277 num_f0_hyps.quantizeStep = 1.0;
|
Chris@0
|
278 list.push_back(num_f0_hyps);
|
Chris@0
|
279
|
Chris@0
|
280 ParameterDescriptor f0s_per_oct;
|
Chris@0
|
281 f0s_per_oct.identifier = "f0s_per_oct";
|
Chris@0
|
282 f0s_per_oct.name = "f0 values per octave";
|
Chris@0
|
283 f0s_per_oct.description = "Number of f0 values per octave.";
|
Chris@0
|
284 f0s_per_oct.unit = "";
|
Chris@0
|
285 f0s_per_oct.minValue = 12;
|
Chris@0
|
286 f0s_per_oct.maxValue = 768;
|
Chris@0
|
287 f0s_per_oct.defaultValue = 192;
|
Chris@0
|
288 f0s_per_oct.isQuantized = true;
|
Chris@0
|
289 f0s_per_oct.quantizeStep = 1.0;
|
Chris@0
|
290 list.push_back(f0s_per_oct);
|
Chris@0
|
291
|
Chris@0
|
292 ParameterDescriptor f0_prefer_fun;
|
Chris@0
|
293 f0_prefer_fun.identifier = "f0_prefer_fun";
|
Chris@0
|
294 f0_prefer_fun.name = "f0 preference function";
|
Chris@0
|
295 f0_prefer_fun.description = "Whether to use a f0 weighting function.";
|
Chris@0
|
296 f0_prefer_fun.unit = "";
|
Chris@0
|
297 f0_prefer_fun.minValue = 0;
|
Chris@0
|
298 f0_prefer_fun.maxValue = 1;
|
Chris@0
|
299 f0_prefer_fun.defaultValue = 1;
|
Chris@0
|
300 f0_prefer_fun.isQuantized = true;
|
Chris@0
|
301 f0_prefer_fun.quantizeStep = 1.0;
|
Chris@0
|
302 list.push_back(f0_prefer_fun);
|
Chris@0
|
303
|
Chris@0
|
304 ParameterDescriptor f0_prefer_mean;
|
Chris@0
|
305 f0_prefer_mean.identifier = "f0_prefer_mean";
|
Chris@0
|
306 f0_prefer_mean.name = "mean f0 preference function";
|
Chris@0
|
307 f0_prefer_mean.description = "Mean value for f0 weighting function (MIDI number).";
|
Chris@0
|
308 f0_prefer_mean.unit = "";
|
Chris@0
|
309 f0_prefer_mean.minValue = 1;
|
Chris@0
|
310 f0_prefer_mean.maxValue = 127;
|
Chris@0
|
311 f0_prefer_mean.defaultValue = 60;
|
Chris@0
|
312 f0_prefer_mean.isQuantized = true;
|
Chris@0
|
313 f0_prefer_mean.quantizeStep = 1.0;
|
Chris@0
|
314 list.push_back(f0_prefer_mean);
|
Chris@0
|
315
|
Chris@0
|
316 ParameterDescriptor f0_prefer_stdev;
|
Chris@0
|
317 f0_prefer_stdev.identifier = "f0_prefer_stdev";
|
Chris@0
|
318 f0_prefer_stdev.name = "stdev of f0 preference function";
|
Chris@0
|
319 f0_prefer_stdev.description = "Stdev for f0 weighting function (MIDI number).";
|
Chris@0
|
320 f0_prefer_stdev.unit = "";
|
Chris@0
|
321 f0_prefer_stdev.minValue = 1;
|
Chris@0
|
322 f0_prefer_stdev.maxValue = 127;
|
Chris@0
|
323 f0_prefer_stdev.defaultValue = 18;
|
Chris@0
|
324 f0_prefer_stdev.isQuantized = true;
|
Chris@0
|
325 f0_prefer_stdev.quantizeStep = 1.0;
|
Chris@0
|
326 list.push_back(f0_prefer_stdev);
|
Chris@0
|
327
|
Chris@0
|
328 ParameterDescriptor f0gram_mode;
|
Chris@0
|
329 f0gram_mode.identifier = "f0gram_mode";
|
Chris@0
|
330 f0gram_mode.name = "display mode of f0gram";
|
Chris@0
|
331 f0gram_mode.description = "Display all bins of the best direction, or the best bin for each direction.";
|
Chris@0
|
332 f0gram_mode.unit = "";
|
Chris@0
|
333 f0gram_mode.minValue = 0;
|
Chris@0
|
334 f0gram_mode.maxValue = 1;
|
Chris@0
|
335 f0gram_mode.defaultValue = 1;
|
Chris@0
|
336 f0gram_mode.isQuantized = true;
|
Chris@0
|
337 f0gram_mode.quantizeStep = 1.0;
|
Chris@0
|
338 list.push_back(f0gram_mode);
|
Chris@0
|
339
|
Chris@0
|
340 return list;
|
Chris@0
|
341 }
|
Chris@0
|
342
|
Chris@0
|
343 float
|
Chris@0
|
344 FChTransformF0gram::getParameter(string identifier) const {
|
Chris@0
|
345
|
Chris@0
|
346 if (identifier == "fmax") {
|
Chris@0
|
347 return m_fmax;
|
Chris@0
|
348 } else if (identifier == "nsamp") {
|
Chris@0
|
349 return m_warp_params.nsamps_twarp;
|
Chris@0
|
350 } else if (identifier == "alpha_max") {
|
Chris@0
|
351 return m_warp_params.alpha_max;
|
Chris@0
|
352 } else if (identifier == "num_warps") {
|
Chris@0
|
353 return m_warp_params.num_warps;
|
Chris@0
|
354 } else if (identifier == "alpha_dist") {
|
Chris@0
|
355 return m_warp_params.alpha_dist;
|
Chris@0
|
356 } else if (identifier == "nfft") {
|
Chris@0
|
357 return m_nfft;
|
Chris@0
|
358 } else if (identifier == "f0min") {
|
Chris@0
|
359 return m_f0_params.f0min;
|
Chris@0
|
360 } else if (identifier == "num_octs") {
|
Chris@0
|
361 return m_f0_params.num_octs;
|
Chris@0
|
362 } else if (identifier == "f0s_per_oct") {
|
Chris@0
|
363 return m_f0_params.num_f0s_per_oct;
|
Chris@0
|
364 } else if (identifier == "num_f0_hyps") {
|
Chris@0
|
365 return m_f0_params.num_f0_hyps;
|
Chris@0
|
366 } else if (identifier == "f0_prefer_fun") {
|
Chris@0
|
367 return m_f0_params.prefer;
|
Chris@0
|
368 } else if (identifier == "f0_prefer_mean") {
|
Chris@0
|
369 return m_f0_params.prefer_mean;
|
Chris@0
|
370 } else if (identifier == "f0_prefer_stdev") {
|
Chris@0
|
371 return m_f0_params.prefer_stdev;
|
Chris@7
|
372 } else if (identifier == "f0gram_mode") {
|
Chris@0
|
373 return m_f0gram_mode;
|
Chris@0
|
374 } else {
|
Chris@0
|
375 return 0.f;
|
Chris@0
|
376 }
|
Chris@0
|
377
|
Chris@0
|
378 }
|
Chris@0
|
379
|
Chris@0
|
380 void FChTransformF0gram::setParameter(string identifier, float value) {
|
Chris@0
|
381
|
Chris@0
|
382 if (identifier == "fmax") {
|
Chris@0
|
383 m_fmax = value;
|
Chris@0
|
384 } else if (identifier == "nsamp") {
|
Chris@0
|
385 m_warp_params.nsamps_twarp = value;
|
Chris@0
|
386 } else if (identifier == "alpha_max") {
|
Chris@0
|
387 m_warp_params.alpha_max = value;
|
Chris@0
|
388 } else if (identifier == "num_warps") {
|
Chris@0
|
389 m_warp_params.num_warps = value;
|
Chris@0
|
390 } else if (identifier == "alpha_dist") {
|
Chris@0
|
391 m_warp_params.alpha_dist = value;
|
Chris@0
|
392 } else if (identifier == "nfft") {
|
Chris@0
|
393 m_nfft = value;
|
Chris@0
|
394 } else if (identifier == "f0min") {
|
Chris@0
|
395 m_f0_params.f0min = value;
|
Chris@0
|
396 } else if (identifier == "num_octs") {
|
Chris@0
|
397 m_f0_params.num_octs = value;
|
Chris@0
|
398 } else if (identifier == "f0s_per_oct") {
|
Chris@0
|
399 m_f0_params.num_f0s_per_oct = value;
|
Chris@0
|
400 } else if (identifier == "num_f0_hyps") {
|
Chris@0
|
401 m_f0_params.num_f0_hyps = value;
|
Chris@0
|
402 } else if (identifier == "f0_prefer_fun") {
|
Chris@0
|
403 m_f0_params.prefer = value;
|
Chris@0
|
404 } else if (identifier == "f0_prefer_mean") {
|
Chris@0
|
405 m_f0_params.prefer_mean = value;
|
Chris@0
|
406 } else if (identifier == "f0_prefer_stdev") {
|
Chris@0
|
407 m_f0_params.prefer_stdev = value;
|
Chris@0
|
408 } else if (identifier == "f0gram_mode") {
|
Chris@0
|
409 m_f0gram_mode = value;
|
Chris@0
|
410 }
|
Chris@0
|
411
|
Chris@0
|
412 }
|
Chris@0
|
413
|
Chris@0
|
414 FChTransformF0gram::ProgramList
|
Chris@0
|
415 FChTransformF0gram::getPrograms() const {
|
Chris@0
|
416 ProgramList list;
|
Chris@0
|
417
|
Chris@0
|
418 list.push_back("default");
|
Chris@0
|
419
|
Chris@0
|
420 return list;
|
Chris@0
|
421 }
|
Chris@0
|
422
|
Chris@0
|
423 string
|
Chris@0
|
424 FChTransformF0gram::getCurrentProgram() const {
|
Chris@0
|
425 return m_currentProgram;
|
Chris@0
|
426 }
|
Chris@0
|
427
|
Chris@0
|
428 void
|
Chris@0
|
429 FChTransformF0gram::selectProgram(string name) {
|
Chris@0
|
430
|
Chris@0
|
431 m_currentProgram = name;
|
Chris@0
|
432
|
Chris@0
|
433 if (name == "default") {
|
Chris@0
|
434 m_fmax = 10000.f;
|
Chris@0
|
435
|
Chris@0
|
436 m_warp_params.nsamps_twarp = 2048;
|
Chris@0
|
437 m_warp_params.alpha_max = 4;
|
Chris@0
|
438 m_warp_params.num_warps = 21;
|
Chris@0
|
439 m_warp_params.fact_over_samp = 2;
|
Chris@0
|
440 m_warp_params.alpha_dist = 0;
|
Chris@0
|
441
|
Chris@0
|
442 m_f0_params.f0min = 80.0;
|
Chris@0
|
443 m_f0_params.num_octs = 4;
|
Chris@0
|
444 m_f0_params.num_f0s_per_oct = 192;
|
Chris@0
|
445 m_f0_params.num_f0_hyps = 5;
|
Chris@0
|
446 m_f0_params.prefer = true;
|
Chris@0
|
447 m_f0_params.prefer_mean = 60;
|
Chris@0
|
448 m_f0_params.prefer_stdev = 18;
|
Chris@0
|
449
|
Chris@0
|
450 m_glogs_params.HP_logS = true;
|
Chris@0
|
451 m_glogs_params.att_subharms = 1;
|
Chris@0
|
452
|
Chris@0
|
453 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
|
Chris@0
|
454 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
|
Chris@0
|
455 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
|
Chris@0
|
456
|
Chris@0
|
457 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
|
Chris@0
|
458 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
|
Chris@0
|
459 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
|
Chris@0
|
460
|
Chris@0
|
461 m_nfft = m_warp_params.nsamps_twarp;
|
Chris@0
|
462 m_hop = m_warp_params.fact_over_samp * 256;
|
Chris@0
|
463
|
Chris@0
|
464 m_num_f0s = 0;
|
Chris@0
|
465
|
Chris@7
|
466 m_f0gram_mode = 1;
|
Chris@0
|
467
|
Chris@0
|
468 }
|
Chris@0
|
469 }
|
Chris@0
|
470
|
Chris@0
|
471 FChTransformF0gram::OutputList
|
Chris@0
|
472 FChTransformF0gram::getOutputDescriptors() const {
|
Chris@0
|
473
|
Chris@0
|
474 OutputList list;
|
Chris@0
|
475
|
Chris@0
|
476 // See OutputDescriptor documentation for the possibilities here.
|
Chris@0
|
477 // Every plugin must have at least one output.
|
Chris@0
|
478
|
Chris@0
|
479 /* f0 values of F0gram grid as string values */
|
Chris@0
|
480 vector<string> f0values;
|
Chris@0
|
481 size_t ind = 0;
|
Chris@0
|
482 char f0String[10];
|
Chris@0
|
483 while (ind < m_num_f0s) {
|
Chris@0
|
484 sprintf(f0String, "%4.2f", m_f0s[ind]);
|
Chris@0
|
485 f0values.push_back(f0String);
|
Chris@0
|
486 ind++;
|
Chris@0
|
487 }
|
Chris@0
|
488
|
Chris@0
|
489 /* The F0gram */
|
Chris@0
|
490 OutputDescriptor d;
|
Chris@0
|
491 d.identifier = "f0gram";
|
Chris@0
|
492 d.name = "F0gram: salience of f0s";
|
Chris@0
|
493 d.description = "This representation show the salience of the different f0s in the signal.";
|
Chris@0
|
494 d.unit = "Hertz";
|
Chris@0
|
495 d.hasFixedBinCount = true;
|
Chris@0
|
496 //d.binCount = m_num_f0s;
|
Chris@7
|
497 //d.binCount = m_blockSize/2+1;
|
Chris@7
|
498 //d.binCount = m_warp_params.nsamps_twarp/2+1;
|
Chris@7
|
499 //d.binCount = m_warpings.nsamps_torig;
|
Chris@7
|
500 d.binCount = m_f0_params.num_octs*m_f0_params.num_f0s_per_oct;
|
Chris@0
|
501 d.binNames = f0values;
|
Chris@0
|
502 d.hasKnownExtents = false;
|
Chris@0
|
503 d.isQuantized = false;
|
Chris@0
|
504 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@0
|
505 d.hasDuration = false;
|
Chris@0
|
506 list.push_back(d);
|
Chris@0
|
507
|
Chris@0
|
508 return list;
|
Chris@0
|
509 }
|
Chris@0
|
510
|
Chris@0
|
511 bool
|
Chris@0
|
512 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) {
|
Chris@0
|
513 if (channels < getMinChannelCount() ||
|
Chris@7
|
514 channels > getMaxChannelCount()) return false;
|
Chris@0
|
515
|
Chris@0
|
516 // set blockSize and stepSize (but changed below)
|
Chris@0
|
517 m_blockSize = blockSize;
|
Chris@0
|
518 m_stepSize = stepSize;
|
Chris@0
|
519
|
Chris@0
|
520 // WARNING !!!
|
Chris@0
|
521 // these values in fact are determined by the sampling frequency m_fs
|
Chris@0
|
522 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz
|
Chris@0
|
523 //m_blockSize = 4 * m_warp_params.nsamps_twarp;
|
Chris@0
|
524 m_stepSize = floor(m_hop / m_warp_params.fact_over_samp);
|
Chris@0
|
525
|
Chris@0
|
526 /* initialise m_warp_params */
|
Chris@0
|
527 // FChTF0gram:warping_design m_warpings = new warping_design;
|
Chris@0
|
528 /* initialise m_f0_params */
|
Chris@0
|
529
|
Chris@0
|
530 /* initialise m_glogs_params */
|
Chris@7
|
531 design_GLogS();
|
Chris@0
|
532
|
Chris@0
|
533 /* design of FChT */
|
Chris@0
|
534 // design_fcht(m_warps, m_accums, m_f0s)
|
Chris@0
|
535 design_FChT();
|
Chris@0
|
536
|
Chris@7
|
537 design_FFT();
|
Chris@0
|
538
|
Chris@7
|
539 design_LPF();
|
Chris@0
|
540
|
Chris@7
|
541 design_time_window();
|
Chris@0
|
542
|
Chris@7
|
543 // Create Hanning window for warped signals
|
Chris@7
|
544 mp_HanningWindow = new double[m_warp_params.nsamps_twarp];
|
Chris@7
|
545 bool normalize = false;
|
Chris@7
|
546 hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize);
|
Chris@0
|
547
|
Chris@0
|
548 return true;
|
Chris@0
|
549 }
|
Chris@0
|
550
|
Chris@0
|
551 void
|
Chris@0
|
552 FChTransformF0gram::design_GLogS() {
|
Chris@0
|
553
|
Chris@7
|
554 // total number & initial quantity of f0s
|
Chris@7
|
555 m_glogs_init_f0s = (size_t)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1;
|
Chris@7
|
556 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s;
|
Chris@0
|
557
|
Chris@7
|
558 // Initialize arrays
|
Chris@7
|
559 m_glogs_f0 = new double[m_glogs_num_f0s];
|
Chris@7
|
560 m_glogs = new double[m_glogs_num_f0s*m_warp_params.num_warps];
|
Chris@7
|
561 m_glogs_n = new size_t[m_glogs_num_f0s];
|
Chris@7
|
562 m_glogs_index = new size_t[m_glogs_num_f0s];
|
Chris@0
|
563
|
Chris@7
|
564 // Compute f0 values
|
Chris@7
|
565 m_glogs_harmonic_count = 0;
|
Chris@7
|
566 double factor = (double)(m_warp_params.nsamps_twarp/2)/(double)(m_warp_params.nsamps_twarp/2+1);
|
Chris@7
|
567 for (size_t i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@7
|
568 m_glogs_f0[i] = (m_f0_params.f0min/5.0)*pow(2.0,(double)i/(double)m_f0_params.num_f0s_per_oct);
|
Chris@7
|
569 // for every f0 compute number of partials less or equal than m_fmax.
|
Chris@7
|
570 m_glogs_n[i] = m_fmax*factor/m_glogs_f0[i];
|
Chris@7
|
571 m_glogs_index[i] = m_glogs_harmonic_count;
|
Chris@7
|
572 m_glogs_harmonic_count += m_glogs_n[i];
|
Chris@7
|
573 }
|
Chris@0
|
574
|
Chris@7
|
575 // Initialize arrays for interpolation
|
Chris@7
|
576 m_glogs_posint = new size_t[m_glogs_harmonic_count];
|
Chris@7
|
577 m_glogs_posfrac = new double[m_glogs_harmonic_count];
|
Chris@7
|
578 m_glogs_interp = new double[m_glogs_harmonic_count];
|
Chris@0
|
579
|
Chris@7
|
580 // Compute int & frac of interpolation positions
|
Chris@7
|
581 size_t aux_index = 0;
|
Chris@7
|
582 double aux_pos;
|
Chris@7
|
583 for (size_t i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@7
|
584 for (size_t j = 1; j <= m_glogs_n[i]; j++) {
|
Chris@7
|
585 // indice en el vector de largo t_warp/2+1 donde el ultimo valor corresponde a f=m_fmax
|
Chris@7
|
586 aux_pos = ((double)j*m_glogs_f0[i])*((double)(m_warp_params.nsamps_twarp/2+1))/m_fmax;
|
Chris@7
|
587 m_glogs_posint[aux_index] = (size_t)aux_pos;
|
Chris@7
|
588 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index];
|
Chris@7
|
589 aux_index++;
|
Chris@7
|
590 }
|
Chris@7
|
591 }
|
Chris@0
|
592
|
Chris@7
|
593 // Third harmonic attenuation
|
Chris@7
|
594 double aux_third_harmonic;
|
Chris@7
|
595 m_glogs_third_harmonic_posint = new size_t[(m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct];
|
Chris@7
|
596 m_glogs_third_harmonic_posfrac = new double[(m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct];
|
Chris@7
|
597 for (size_t i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
598 aux_third_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(3.0);
|
Chris@7
|
599 m_glogs_third_harmonic_posint[i] = (size_t)aux_third_harmonic;
|
Chris@7
|
600 m_glogs_third_harmonic_posfrac[i] = aux_third_harmonic - (double)(m_glogs_third_harmonic_posint[i]);
|
Chris@7
|
601 }
|
Chris@7
|
602 m_glogs_third_harmonic = new double[(m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct];
|
Chris@0
|
603
|
Chris@7
|
604 // Fifth harmonic attenuation
|
Chris@7
|
605 double aux_fifth_harmonic;
|
Chris@7
|
606 m_glogs_fifth_harmonic_posint = new size_t[(m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct];
|
Chris@7
|
607 m_glogs_fifth_harmonic_posfrac = new double[(m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct];
|
Chris@7
|
608 for (size_t i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
609 aux_fifth_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(5.0);
|
Chris@7
|
610 m_glogs_fifth_harmonic_posint[i] = (size_t)aux_fifth_harmonic;
|
Chris@7
|
611 m_glogs_fifth_harmonic_posfrac[i] = aux_fifth_harmonic - (double)(m_glogs_fifth_harmonic_posint[i]);
|
Chris@7
|
612 }
|
Chris@7
|
613 m_glogs_fifth_harmonic = new double[(m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct];
|
Chris@0
|
614
|
Chris@7
|
615 // Normalization & attenuation windows
|
Chris@7
|
616 m_glogs_f0_preference_weights = new double[m_f0_params.num_octs*m_f0_params.num_f0s_per_oct];
|
Chris@7
|
617 m_glogs_median_correction = new double[m_f0_params.num_octs*m_f0_params.num_f0s_per_oct];
|
Chris@7
|
618 m_glogs_sigma_correction = new double[m_f0_params.num_octs*m_f0_params.num_f0s_per_oct];
|
Chris@7
|
619 m_glogs_hf_smoothing_window = new double[m_warp_params.nsamps_twarp/2+1];
|
Chris@7
|
620 double MIDI_value;
|
Chris@7
|
621 for (size_t i = 0; i < m_f0_params.num_octs*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
622 MIDI_value = 69.0 + 12.0 * log2(m_glogs_f0[i + m_glogs_init_f0s]/440.0);
|
Chris@7
|
623 m_glogs_f0_preference_weights[i] = 1.0/sqrt(2.0*M_PI*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev)*exp(-(MIDI_value-m_f0_params.prefer_mean)*(MIDI_value-m_f0_params.prefer_mean)/(2.0*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev));
|
Chris@7
|
624 m_glogs_f0_preference_weights[i] = (0.01 + m_glogs_f0_preference_weights[i]) / (1.01);
|
Chris@0
|
625
|
Chris@7
|
626 m_glogs_median_correction[i] = m_glogs_params.median_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.median_poly_coefs[1]*(i+1.0) + m_glogs_params.median_poly_coefs[2];
|
Chris@7
|
627 m_glogs_sigma_correction[i] = 1.0 / (m_glogs_params.sigma_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.sigma_poly_coefs[1]*(i+1.0) + m_glogs_params.sigma_poly_coefs[2]);
|
Chris@7
|
628 }
|
Chris@0
|
629
|
Chris@7
|
630 double smooth_width = 1000.0; // hertz.
|
Chris@7
|
631 double smooth_aux = (double)(m_warp_params.nsamps_twarp/2+1)*(m_fmax-smooth_width)/m_fmax;
|
Chris@7
|
632 for (size_t i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) {
|
Chris@7
|
633 if (i < smooth_aux) {
|
Chris@7
|
634 m_glogs_hf_smoothing_window[i] = 1.0;
|
Chris@7
|
635 } else {
|
Chris@7
|
636 m_glogs_hf_smoothing_window[i] = ((double)i - (double)m_warp_params.nsamps_twarp/2.0)*(-1.0/((double)(m_warp_params.nsamps_twarp/2+1)-smooth_aux));
|
Chris@7
|
637 }
|
Chris@7
|
638 }
|
Chris@0
|
639 }
|
Chris@0
|
640
|
Chris@0
|
641 void
|
Chris@0
|
642 FChTransformF0gram::design_FFT() {
|
Chris@0
|
643 in = (fftw_complex*) fftw_malloc(sizeof (fftw_complex) * m_nfft);
|
Chris@0
|
644 out = (fftw_complex*) fftw_malloc(sizeof (fftw_complex) * m_nfft);
|
Chris@7
|
645 //TODO verificar que el tipo de datos de in_window es del tipo double, era float.
|
Chris@0
|
646 in_window = (double*) fftw_malloc(sizeof (double) * m_nfft);
|
Chris@0
|
647 planFFT = fftw_plan_dft_1d(m_nfft, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
|
Chris@0
|
648
|
Chris@7
|
649 //TODO hacer diseño del FFT para el filtrado pasabajos.
|
Chris@0
|
650
|
Chris@0
|
651 }
|
Chris@0
|
652
|
Chris@0
|
653 void
|
Chris@0
|
654 FChTransformF0gram::design_FChT() {
|
Chris@0
|
655
|
Chris@0
|
656 /*
|
Chris@0
|
657 * FILES FOR DEBUGGING
|
Chris@0
|
658 */
|
Chris@0
|
659
|
Chris@0
|
660 //ofstream output("output.txt");
|
Chris@0
|
661
|
Chris@0
|
662
|
Chris@0
|
663 /* ============= WARPING DESIGN ============= */
|
Chris@0
|
664
|
Chris@0
|
665 // sampling frequency after oversampling
|
Chris@0
|
666 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs;
|
Chris@0
|
667
|
Chris@0
|
668 // number of samples of the original signal frame
|
Chris@0
|
669 m_warpings.nsamps_torig = 4 * m_warp_params.fact_over_samp * m_warp_params.nsamps_twarp;
|
Chris@0
|
670 // equivalent to: m_warpings.nsamps_torig = m_warp_params.fact_over_samp * m_blockSize;
|
Chris@0
|
671
|
Chris@0
|
672 // time instants of the original signal frame
|
Chris@0
|
673 double t_orig[m_warpings.nsamps_torig];
|
Chris@0
|
674 //float * t_orig = new float [m_warpings.nsamps_torig];
|
Chris@0
|
675 for (size_t ind = 0; ind < m_warpings.nsamps_torig; ind++) {
|
Chris@0
|
676 t_orig[ind] = ((double)(ind + 1) - (double)m_warpings.nsamps_torig / 2.0) / m_warpings.fs_orig;
|
Chris@0
|
677 }
|
Chris@0
|
678
|
Chris@0
|
679 // linear chirps warping definition as relative frequency deviation
|
Chris@0
|
680 //double * freq_relative = new double [m_warpings.nsamps_torig * m_warp_params.num_warps];
|
Chris@7
|
681 //TODO
|
Chris@7
|
682 double *freq_relative = new double [m_warpings.nsamps_torig * m_warp_params.num_warps];
|
Chris@0
|
683 define_warps_linear_chirps(freq_relative, t_orig);
|
Chris@0
|
684
|
Chris@0
|
685 // maximum relative frequency deviation
|
Chris@0
|
686 double freq_relative_max = 0;
|
Chris@0
|
687 for (size_t i = 0; i < m_warpings.nsamps_torig; i++)
|
Chris@0
|
688 for (size_t j = 0; j < m_warp_params.num_warps; j++)
|
Chris@0
|
689 if (freq_relative_max < freq_relative[j * m_warpings.nsamps_torig + i])
|
Chris@0
|
690 freq_relative_max = freq_relative[j * m_warpings.nsamps_torig + i];
|
Chris@0
|
691
|
Chris@0
|
692 // sampling frequency of warped signal to be free of aliasing up to fmax
|
Chris@0
|
693 m_warpings.fs_warp = 2 * m_fmax * freq_relative_max;
|
Chris@0
|
694
|
Chris@0
|
695 // time instants of the warped signal frame
|
Chris@0
|
696 double t_warp[m_warp_params.nsamps_twarp];
|
Chris@0
|
697 for (size_t ind = 0; ind < m_warp_params.nsamps_twarp; ind++) {
|
Chris@0
|
698 t_warp[ind] = ((double)((int)(ind + 1)- (int)m_warp_params.nsamps_twarp / 2)) / (double)m_warpings.fs_warp;
|
Chris@0
|
699 }
|
Chris@0
|
700
|
Chris@0
|
701 // design of warpings for efficient interpolation
|
Chris@0
|
702 design_warps(freq_relative, t_orig, t_warp);
|
Chris@0
|
703
|
Chris@0
|
704
|
Chris@0
|
705 /*
|
Chris@0
|
706 * FILES FOR DEBUGGING
|
Chris@0
|
707 */
|
Chris@0
|
708
|
Chris@0
|
709 /*
|
Chris@7
|
710 output << "chirp_rates" << endl;
|
Chris@7
|
711 for (size_t j = 0; j < m_warp_params.num_warps; j++){
|
Chris@7
|
712 output << m_warpings.chirp_rates[j];
|
Chris@7
|
713 output << " ";
|
Chris@7
|
714 }
|
Chris@7
|
715 output << endl << "freq_relative" << endl;
|
Chris@0
|
716
|
Chris@7
|
717 for (size_t i = 0; i < m_warpings.nsamps_torig; i++){
|
Chris@7
|
718 for (size_t j = 0; j < m_warp_params.num_warps; j++){
|
Chris@7
|
719 output << freq_relative[j * m_warpings.nsamps_torig + i];
|
Chris@7
|
720 output << " ";
|
Chris@7
|
721 }
|
Chris@7
|
722 output << endl;
|
Chris@7
|
723 }
|
Chris@0
|
724
|
Chris@7
|
725 output << endl << "t_orig" << endl;
|
Chris@0
|
726
|
Chris@7
|
727 for (size_t i = 0; i < m_warpings.nsamps_torig; i++){
|
Chris@7
|
728 output << t_orig[i] << endl ;
|
Chris@7
|
729 }
|
Chris@7
|
730 */
|
Chris@0
|
731
|
Chris@7
|
732 delete [] freq_relative;
|
Chris@0
|
733 //output.close();
|
Chris@0
|
734
|
Chris@0
|
735 /* ============= FFTW PLAN DESIGN ============= */
|
Chris@7
|
736 // Initialize 2-d array for warped signals
|
Chris@7
|
737 x_warping = new double[m_warp_params.nsamps_twarp];
|
Chris@7
|
738 m_absFanChirpTransform = (double*)fftw_malloc(sizeof (double) * m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1));
|
Chris@7
|
739 m_auxFanChirpTransform = (fftw_complex*)fftw_malloc(sizeof ( fftw_complex) * (m_warp_params.nsamps_twarp/2 + 1));
|
Chris@7
|
740 plan_forward_xwarping = fftw_plan_dft_r2c_1d(m_warp_params.nsamps_twarp, x_warping, m_auxFanChirpTransform, FFTW_ESTIMATE);
|
Chris@0
|
741
|
Chris@0
|
742 }
|
Chris@0
|
743
|
Chris@0
|
744 void
|
Chris@0
|
745 FChTransformF0gram::design_warps(double * freq_relative, double * t_orig, double * t_warp) {
|
Chris@0
|
746 /* the warping is done by interpolating the original signal in time instants
|
Chris@0
|
747 given by the desired frequency deviation, to do this, the interpolation
|
Chris@0
|
748 instants are stored in a structure as an integer index and a fractional value
|
Chris@0
|
749 hypothesis: sampling frequency at the central point equals the original
|
Chris@7
|
750 */
|
Chris@0
|
751
|
Chris@0
|
752 m_warpings.pos_int = new size_t[m_warp_params.num_warps * m_warp_params.nsamps_twarp];
|
Chris@7
|
753 m_warpings.pos_frac = new double[m_warp_params.num_warps * m_warp_params.nsamps_twarp];
|
Chris@0
|
754
|
Chris@7
|
755 // vector of phase values
|
Chris@7
|
756 double *phi = new double[m_warpings.nsamps_torig];
|
Chris@7
|
757 double aux;
|
Chris@0
|
758
|
Chris@7
|
759 // warped positions
|
Chris@7
|
760 double *pos1 = new double[m_warp_params.nsamps_twarp*m_warp_params.num_warps];
|
Chris@0
|
761
|
Chris@0
|
762 for (size_t i = 0; i < m_warp_params.num_warps; i++) {
|
Chris@0
|
763 // vector of phase values
|
Chris@0
|
764 // float * phi;
|
Chris@0
|
765 // integration of relative frequency to obtain phase values
|
Chris@0
|
766 // phi = cumtrapz(t_orig,freq_relative(:,i)');
|
Chris@0
|
767 // centering of phase values to force original frequency in the middle
|
Chris@0
|
768 //phi = phi - phi(end/2);
|
Chris@0
|
769 // interpolation of phase values to obtain warped positions
|
Chris@0
|
770 //pos1(i,:) = interp1(phi,t_orig,t_warp)*fs_orig + length(t_orig)/2;
|
Chris@0
|
771
|
Chris@7
|
772 // integration of relative frequency to obtain phase values
|
Chris@7
|
773 cumtrapz(t_orig, freq_relative + i*(m_warpings.nsamps_torig), m_warpings.nsamps_torig, phi);
|
Chris@0
|
774
|
Chris@7
|
775 // centering of phase values to force original frequency in the middle
|
Chris@7
|
776 aux = phi[m_warpings.nsamps_torig/2];
|
Chris@7
|
777 for (size_t j = 0; j < m_warpings.nsamps_torig; j++) {
|
Chris@7
|
778 phi[j] -= aux;
|
Chris@7
|
779 } //for
|
Chris@0
|
780
|
Chris@7
|
781 // interpolation of phase values to obtain warped positions
|
Chris@7
|
782 interp1(phi, t_orig, m_warpings.nsamps_torig, t_warp, pos1 + i*m_warp_params.nsamps_twarp, m_warp_params.nsamps_twarp);
|
Chris@0
|
783
|
Chris@0
|
784 }
|
Chris@0
|
785
|
Chris@0
|
786 // % previous sample index
|
Chris@0
|
787 // pos1_int = uint32(floor(pos1))';
|
Chris@0
|
788 // % integer corresponding to previous sample index in "c"
|
Chris@0
|
789 // warps.pos1_int = (pos1_int - uint32(1));
|
Chris@0
|
790 // % fractional value that defines the warped position
|
Chris@0
|
791 // warps.pos1_frac = (double(pos1)' - double(pos1_int));
|
Chris@0
|
792
|
Chris@7
|
793 // m_warpings.pos_int = new size_t[m_warp_params.num_warps * m_warp_params.nsamps_twarp];
|
Chris@7
|
794 for (size_t j = 0; j < m_warp_params.nsamps_twarp*m_warp_params.num_warps; j++) {
|
Chris@7
|
795 // previous sample index
|
Chris@7
|
796 pos1[j] = pos1[j]*m_warpings.fs_orig + m_warpings.nsamps_torig/2 + 1;
|
Chris@7
|
797 m_warpings.pos_int[j] = (size_t) pos1[j];
|
Chris@7
|
798 m_warpings.pos_frac[j] = pos1[j] - (double)(m_warpings.pos_int[j]);
|
Chris@7
|
799 } //for
|
Chris@0
|
800
|
Chris@7
|
801 delete [] phi;
|
Chris@7
|
802 delete [] pos1;
|
Chris@0
|
803 }
|
Chris@0
|
804
|
Chris@0
|
805 void
|
Chris@0
|
806 FChTransformF0gram::define_warps_linear_chirps(double * freq_relative, double * t_orig) {
|
Chris@0
|
807 /** define warps as relative frequency deviation from original frequency
|
Chris@7
|
808 t_orig : time vector
|
Chris@7
|
809 freq_relative : relative frequency deviations
|
Chris@7
|
810 */
|
Chris@0
|
811 if (m_warp_params.alpha_dist == 0) {
|
Chris@0
|
812
|
Chris@0
|
813 // linear alpha values spacing
|
Chris@0
|
814 m_warpings.chirp_rates = new double [m_warp_params.num_warps];
|
Chris@0
|
815 // WARNING m_warp_params.num_warps must be odd
|
Chris@0
|
816 m_warpings.chirp_rates[0] = -m_warp_params.alpha_max;
|
Chris@0
|
817 double increment = (double) m_warp_params.alpha_max / ((m_warp_params.num_warps - 1) / 2);
|
Chris@0
|
818
|
Chris@0
|
819 for (size_t ind = 1; ind < m_warp_params.num_warps; ind++) {
|
Chris@0
|
820 m_warpings.chirp_rates[ind] = m_warpings.chirp_rates[ind - 1] + increment;
|
Chris@0
|
821 }
|
Chris@0
|
822 // force zero value
|
Chris@0
|
823 m_warpings.chirp_rates[(int) ((m_warp_params.num_warps - 1) / 2)] = 0;
|
Chris@0
|
824
|
Chris@0
|
825 } else {
|
Chris@0
|
826 // log alpha values spacing
|
Chris@0
|
827 m_warpings.chirp_rates = new double [m_warp_params.num_warps];
|
Chris@0
|
828
|
Chris@0
|
829 // force zero value
|
Chris@0
|
830 int middle_point = (int) ((m_warp_params.num_warps - 1) / 2);
|
Chris@0
|
831 m_warpings.chirp_rates[middle_point] = 0;
|
Chris@0
|
832
|
Chris@0
|
833 double logMax = log10(m_warp_params.alpha_max + 1);
|
Chris@0
|
834 double increment = logMax / ((m_warp_params.num_warps - 1) / 2.0f);
|
Chris@0
|
835 double exponent = 0;
|
Chris@0
|
836
|
Chris@0
|
837 // fill positive values
|
Chris@0
|
838 int ind_log = middle_point;
|
Chris@0
|
839 for (size_t ind = 0; ind < (m_warp_params.num_warps + 1) / 2; ind++) {
|
Chris@0
|
840 m_warpings.chirp_rates[ind_log] = pow(10, exponent) - 1;
|
Chris@0
|
841 exponent += increment;
|
Chris@0
|
842 ind_log++;
|
Chris@0
|
843 }
|
Chris@0
|
844 // fill negative values
|
Chris@0
|
845 for (size_t ind = 0; ind < (m_warp_params.num_warps - 1) / 2; ind++) {
|
Chris@0
|
846 m_warpings.chirp_rates[ind] = -m_warpings.chirp_rates[m_warp_params.num_warps - 1 - ind];
|
Chris@0
|
847 }
|
Chris@0
|
848 }
|
Chris@0
|
849
|
Chris@0
|
850 // compute relative frequency deviation
|
Chris@0
|
851 for (size_t i = 0; i < m_warpings.nsamps_torig; i++)
|
Chris@0
|
852 for (size_t j = 0; j < m_warp_params.num_warps; j++)
|
Chris@0
|
853 freq_relative[j * m_warpings.nsamps_torig + i] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
|
Chris@0
|
854 //freq_relative[i * m_warpings.nsamps_torig + j] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
|
Chris@0
|
855 //freq_relative[i][j] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
|
Chris@0
|
856 }
|
Chris@0
|
857
|
Chris@0
|
858 void
|
Chris@0
|
859 FChTransformF0gram::design_LPF() {
|
Chris@0
|
860
|
Chris@0
|
861 // in = (fftw_complex*) fftw_malloc(sizeof (fftw_complex) * tamanoVentana);
|
Chris@0
|
862 // out = (fftw_complex*) fftw_malloc(sizeof (fftw_complex) * tamanoVentana);
|
Chris@0
|
863 // in_window = (float*) fftw_malloc(sizeof (float) * tamanoVentana);
|
Chris@0
|
864 // p = fftw_plan_dft_1d(tamanoVentana, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
|
Chris@0
|
865 double *lp_LPFWindow_aux = new double[m_blockSize/2+1];
|
Chris@0
|
866 mp_LPFWindow = new double[m_blockSize/2+1];
|
Chris@0
|
867
|
Chris@0
|
868 size_t i_max = (size_t) ((2.0*m_fmax/m_fs) * ( (double)m_blockSize / 2.0 + 1.0 ));
|
Chris@0
|
869 for (size_t i = 0; i < m_blockSize/2+1; i++) {
|
Chris@0
|
870 if (i >= i_max) {
|
Chris@0
|
871 lp_LPFWindow_aux[i] = 0.0;
|
Chris@0
|
872 } else {
|
Chris@0
|
873 lp_LPFWindow_aux[i] = 1.0;
|
Chris@0
|
874 }
|
Chris@0
|
875 }
|
Chris@0
|
876 LPF_time = (double*)fftw_malloc(sizeof ( double) * m_warpings.nsamps_torig);
|
Chris@7
|
877 //memset((char*)LPF_time, 0, m_warpings.nsamps_torig * sizeof(double));
|
Chris@7
|
878 // sustituyo el memset por un for:
|
Chris@7
|
879 for (size_t i = 0; i < m_warpings.nsamps_torig; i++) {
|
Chris@7
|
880 LPF_time[i] = 0.0;
|
Chris@7
|
881 }
|
Chris@7
|
882 #ifdef DEBUG
|
Chris@7
|
883 printf(" Corrio primer memset...\n");
|
Chris@7
|
884 #endif
|
Chris@0
|
885 LPF_frequency = (fftw_complex*)fftw_malloc(sizeof ( fftw_complex) * (m_warpings.nsamps_torig/2 + 1)); //tamaño de la fft cuando la entrada es real
|
Chris@7
|
886 //memset((char*)LPF_frequency, 0, sizeof(fftw_complex) * (m_warpings.nsamps_torig/2 + 1));
|
Chris@7
|
887 // sustituyo el memset por un for:
|
Chris@7
|
888 for (size_t i = 0; i < (m_warpings.nsamps_torig/2 + 1); i++) {
|
Chris@7
|
889 LPF_frequency[i][0] = 0.0;
|
Chris@7
|
890 LPF_frequency[i][1] = 0.0;
|
Chris@7
|
891 }
|
Chris@0
|
892 // for (int i=0; i<(m_blockSize/2+1); i++) {
|
Chris@0
|
893 // LPF_frequency[i] = new fftw_complex;
|
Chris@0
|
894 // }
|
Chris@0
|
895 plan_forward_LPF = fftw_plan_dft_r2c_1d(m_blockSize, LPF_time, LPF_frequency, FFTW_ESTIMATE);
|
Chris@0
|
896 plan_backward_LPF = fftw_plan_dft_c2r_1d(m_warpings.nsamps_torig, LPF_frequency, LPF_time, FFTW_ESTIMATE|FFTW_PRESERVE_INPUT);
|
Chris@0
|
897
|
Chris@7
|
898 size_t winWidth = 11;
|
Chris@0
|
899 double *lp_hanningWindow = new double[winWidth];
|
Chris@0
|
900 double accum=0;
|
Chris@0
|
901 for (size_t i = 0; i < winWidth; i++) {
|
Chris@0
|
902 lp_hanningWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)winWidth+1.0)));
|
Chris@0
|
903 accum+=lp_hanningWindow[i];
|
Chris@0
|
904
|
Chris@0
|
905 }
|
Chris@0
|
906 for (size_t i = 0; i < winWidth; i++) { //window normalization
|
Chris@0
|
907 lp_hanningWindow[i]=lp_hanningWindow[i]/accum;
|
Chris@0
|
908 }
|
Chris@0
|
909 for (size_t i = 0; i < m_blockSize/2+1; i++) {
|
Chris@0
|
910 //if (((i-(winWidth-1)/2)<0)||(i+(winWidth-1))/2>m_blockSize/2-1) {//consideramos winWidth impar, si la ventana sale del arreglo se rellena con el valor origianl
|
Chris@7
|
911 if ( (i > (i_max + (winWidth-1)/2)) || (i <= (i_max - (winWidth-1)/2)) ) {
|
Chris@0
|
912 mp_LPFWindow[i]=lp_LPFWindow_aux[i];
|
Chris@0
|
913 } else {
|
Chris@0
|
914 accum=0;
|
Chris@7
|
915 for (size_t j = -((winWidth-1)/2); j <= (winWidth-1)/2; j++) {
|
Chris@0
|
916 accum+=lp_LPFWindow_aux[i-j]*lp_hanningWindow[j+(winWidth-1)/2];
|
Chris@7
|
917 }
|
Chris@0
|
918 mp_LPFWindow[i]=accum;
|
Chris@0
|
919 }
|
Chris@0
|
920 }
|
Chris@0
|
921
|
Chris@0
|
922 delete[] lp_LPFWindow_aux;
|
Chris@0
|
923 delete[] lp_hanningWindow;
|
Chris@0
|
924 }
|
Chris@0
|
925
|
Chris@0
|
926 void FChTransformF0gram::apply_LPF() {
|
Chris@0
|
927 fftw_execute(plan_forward_LPF);
|
Chris@0
|
928 for (size_t i = 0; i < m_blockSize/2+1; i++) {
|
Chris@0
|
929 LPF_frequency[i][0]*=mp_LPFWindow[i];
|
Chris@0
|
930 LPF_frequency[i][1]*=mp_LPFWindow[i];
|
Chris@0
|
931 }
|
Chris@0
|
932 fftw_execute(plan_backward_LPF);
|
Chris@0
|
933
|
Chris@7
|
934 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame.
|
Chris@7
|
935 // nota: además de aplicar el LPF, esta función resamplea la señal original.
|
Chris@0
|
936 }
|
Chris@0
|
937
|
Chris@0
|
938 void FChTransformF0gram::clean_LPF() {
|
Chris@0
|
939 delete[] mp_LPFWindow;
|
Chris@0
|
940
|
Chris@7
|
941 fftw_destroy_plan(plan_forward_LPF);
|
Chris@7
|
942 fftw_destroy_plan(plan_backward_LPF);
|
Chris@7
|
943 fftw_free(LPF_time);
|
Chris@7
|
944 fftw_free(LPF_frequency);
|
Chris@0
|
945 }
|
Chris@0
|
946
|
Chris@0
|
947 void FChTransformF0gram::reset() {
|
Chris@0
|
948
|
Chris@0
|
949 // Clear buffers, reset stored values, etc
|
Chris@0
|
950
|
Chris@7
|
951 delete [] m_warpings.pos_int;
|
Chris@7
|
952 delete [] m_warpings.pos_frac;
|
Chris@0
|
953
|
Chris@0
|
954 fftw_destroy_plan(planFFT);
|
Chris@7
|
955 fftw_free(in);
|
Chris@7
|
956 fftw_free(out);
|
Chris@0
|
957
|
Chris@7
|
958 clean_LPF();
|
Chris@0
|
959
|
Chris@7
|
960 delete [] m_timeWindow;
|
Chris@0
|
961
|
Chris@7
|
962 delete [] mp_HanningWindow;
|
Chris@0
|
963
|
Chris@7
|
964 // Warping
|
Chris@7
|
965 delete [] x_warping;
|
Chris@7
|
966 fftw_destroy_plan(plan_forward_xwarping);
|
Chris@7
|
967 fftw_free(m_absFanChirpTransform);
|
Chris@7
|
968 fftw_free(m_auxFanChirpTransform);
|
Chris@0
|
969
|
Chris@7
|
970 // design_GLogS
|
Chris@7
|
971 delete [] m_glogs_f0;
|
Chris@7
|
972 delete [] m_glogs;
|
Chris@7
|
973 delete [] m_glogs_n;
|
Chris@7
|
974 delete [] m_glogs_index;
|
Chris@7
|
975 delete [] m_glogs_posint;
|
Chris@7
|
976 delete [] m_glogs_posfrac;
|
Chris@7
|
977 delete [] m_glogs_third_harmonic_posint;
|
Chris@7
|
978 delete [] m_glogs_third_harmonic_posfrac;
|
Chris@7
|
979 delete [] m_glogs_third_harmonic;
|
Chris@7
|
980 delete [] m_glogs_fifth_harmonic_posint;
|
Chris@7
|
981 delete [] m_glogs_fifth_harmonic_posfrac;
|
Chris@7
|
982 delete [] m_glogs_fifth_harmonic;
|
Chris@7
|
983 delete [] m_glogs_f0_preference_weights;
|
Chris@7
|
984 delete [] m_glogs_median_correction;
|
Chris@7
|
985 delete [] m_glogs_sigma_correction;
|
Chris@7
|
986 delete [] m_glogs_hf_smoothing_window;
|
Chris@0
|
987
|
Chris@0
|
988 }
|
Chris@0
|
989
|
Chris@0
|
990 FChTransformF0gram::FeatureSet
|
Chris@5
|
991 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) {
|
Chris@0
|
992
|
Chris@0
|
993 // // Do actual work!
|
Chris@0
|
994 //
|
Chris@0
|
995
|
Chris@7
|
996 /* PSEUDOCÓDIGO:
|
Chris@7
|
997 - Aplicar FFT al frame entero.
|
Chris@7
|
998 - Filtro pasabajos en frecuencia.
|
Chris@7
|
999 - FFT inversa al frame entero.
|
Chris@7
|
1000 -----------------------------------------------------------------------------
|
Chris@7
|
1001 - Para cada warp: *Si es un espectrograma direccional (un solo warp
|
Chris@7
|
1002 => no es para cada warp sino para el elegido)
|
Chris@7
|
1003 - Hacer la interpolación con interp1q.
|
Chris@7
|
1004 - Aplicar la FFT al frame warpeado.
|
Chris@7
|
1005 - (Opcional) GLogS.
|
Chris@7
|
1006 - ...
|
Chris@7
|
1007 */
|
Chris@0
|
1008
|
Chris@0
|
1009 //---------------------------------------------------------------------------
|
Chris@7
|
1010 FeatureSet fs;
|
Chris@0
|
1011
|
Chris@7
|
1012 #ifdef DEBUG
|
Chris@7
|
1013 printf("\n ----- DEBUG INFORMATION ----- \n");
|
Chris@7
|
1014 printf(" m_fs = %f Hz.\n",m_fs);
|
Chris@7
|
1015 printf(" fs_orig = %f Hz.\n",m_warpings.fs_orig);
|
Chris@7
|
1016 printf(" fs_warp = %f Hz.\n",m_warpings.fs_warp);
|
Chris@7
|
1017 printf(" m_nfft = %d.\n",m_nfft);
|
Chris@7
|
1018 printf(" m_blockSize = %d.\n",m_blockSize);
|
Chris@7
|
1019 printf(" m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig);
|
Chris@7
|
1020 printf(" m_warp_params.num_warps = %d.\n",m_warp_params.num_warps);
|
Chris@7
|
1021 printf(" m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count);
|
Chris@7
|
1022 #endif
|
Chris@0
|
1023
|
Chris@0
|
1024 // size_t n = m_nfft/2 + 1;
|
Chris@7
|
1025 // double *tbuf = in_window;
|
Chris@0
|
1026
|
Chris@7
|
1027 for (size_t i = 0; i < m_blockSize; i++) {
|
Chris@0
|
1028 LPF_time[i] = (double)(inputBuffers[0][i]) * m_timeWindow[i];
|
Chris@0
|
1029 }
|
Chris@0
|
1030
|
Chris@0
|
1031 // #ifdef DEBUG
|
Chris@0
|
1032 // printf(" HASTA ACÁ ANDA!!!\n");
|
Chris@0
|
1033 // cout << flush;
|
Chris@0
|
1034 // #endif
|
Chris@0
|
1035
|
Chris@7
|
1036 apply_LPF();
|
Chris@7
|
1037 // Señal filtrada queda en LPF_time
|
Chris@0
|
1038
|
Chris@7
|
1039 Feature feature;
|
Chris@0
|
1040 feature.hasTimestamp = false;
|
Chris@0
|
1041
|
Chris@0
|
1042
|
Chris@7
|
1043 /* Solo a modo de prueba, voy a poner la salida del filtrado en «in» y
|
Chris@7
|
1044 voy a mostrar la FFT de eso, para ver el efecto del filtrado. */
|
Chris@0
|
1045 // for (size_t i = 0; i < m_nfft; i++) {
|
Chris@0
|
1046 // in[i][0] = tbuf[i];
|
Chris@0
|
1047 // in[i][1] = 0;
|
Chris@0
|
1048 // }
|
Chris@0
|
1049 // fftw_execute(planFFT);
|
Chris@0
|
1050 // double real, imag;
|
Chris@0
|
1051 // for (size_t i=0; i<n; ++i) { // preincremento?? ver version de nacho
|
Chris@0
|
1052 // real = out[i][0];
|
Chris@0
|
1053 // imag = out[i][1];
|
Chris@0
|
1054 // feature.values.push_back(real*real + imag*imag);
|
Chris@0
|
1055 // }
|
Chris@0
|
1056 // fs[0].push_back(feature);
|
Chris@0
|
1057
|
Chris@0
|
1058 // float real;
|
Chris@0
|
1059 // float imag;
|
Chris@0
|
1060 // for (size_t i=0; i<m_blockSize/2+1; i++) {
|
Chris@0
|
1061 // real = (float)(LPF_frequency[i][0]);
|
Chris@0
|
1062 // imag = (float)(LPF_frequency[i][1]);
|
Chris@0
|
1063 // feature.values.push_back(real*real+imag*imag);
|
Chris@0
|
1064 // //feature.values.push_back((float)(mp_LPFWindow[i]));
|
Chris@0
|
1065 // }
|
Chris@0
|
1066
|
Chris@0
|
1067 // ----------------------------------------------------------------------------------------------
|
Chris@0
|
1068 // Hanning window & FFT for all warp directions
|
Chris@0
|
1069
|
Chris@7
|
1070 double max_glogs = -DBL_MAX;
|
Chris@7
|
1071 size_t ind_max_glogs = 0;
|
Chris@0
|
1072
|
Chris@7
|
1073 for (size_t i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
|
Chris@7
|
1074 // Interpolate
|
Chris@7
|
1075 interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp);
|
Chris@0
|
1076
|
Chris@7
|
1077 // Apply window
|
Chris@7
|
1078 for (size_t i = 0; i < m_warp_params.nsamps_twarp; i++) {
|
Chris@7
|
1079 x_warping[i] *= mp_HanningWindow[i];
|
Chris@7
|
1080 }
|
Chris@0
|
1081
|
Chris@7
|
1082 // Transform
|
Chris@7
|
1083 fftw_execute(plan_forward_xwarping);
|
Chris@0
|
1084
|
Chris@7
|
1085 // Copy result
|
Chris@7
|
1086 //memcpy(m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1), m_auxFanChirpTransform, (m_warp_params.nsamps_twarp/2+1)*sizeof(fftw_complex)); asi como esta no funciona
|
Chris@7
|
1087 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1);
|
Chris@7
|
1088 for (size_t i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
|
Chris@7
|
1089 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i][0]*m_auxFanChirpTransform[i][0]+m_auxFanChirpTransform[i][1]*m_auxFanChirpTransform[i][1]));
|
Chris@7
|
1090 // smoothing high frequency values
|
Chris@7
|
1091 //aux_abs_fcht[i] *= m_glogs_hf_smoothing_window[i];
|
Chris@7
|
1092 }
|
Chris@0
|
1093
|
Chris@0
|
1094 // -----------------------------------------------------------------------------------------
|
Chris@0
|
1095 // GLogS
|
Chris@7
|
1096 interp1q(aux_abs_fcht, m_glogs_posint, m_glogs_posfrac, m_glogs_interp, m_glogs_harmonic_count);
|
Chris@7
|
1097 size_t glogs_ind = 0;
|
Chris@7
|
1098 for (size_t i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@7
|
1099 double glogs_accum = 0;
|
Chris@7
|
1100 for (size_t j = 1; j <= m_glogs_n[i]; j++) {
|
Chris@7
|
1101 glogs_accum += m_glogs_interp[glogs_ind++];
|
Chris@7
|
1102 }
|
Chris@7
|
1103 m_glogs[i + i_warp*m_glogs_num_f0s] = glogs_accum/(double)m_glogs_n[i];
|
Chris@7
|
1104 }
|
Chris@0
|
1105
|
Chris@0
|
1106 // Sub/super harmonic correction
|
Chris@7
|
1107 interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_third_harmonic_posint, m_glogs_third_harmonic_posfrac, m_glogs_third_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@7
|
1108 interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_fifth_harmonic_posint, m_glogs_fifth_harmonic_posfrac, m_glogs_fifth_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@7
|
1109 for (size_t i = m_glogs_num_f0s-1; i >= m_glogs_init_f0s; i--) {
|
Chris@7
|
1110 m_glogs[i + i_warp*m_glogs_num_f0s] -= MAX(MAX(m_glogs[i-m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s],m_glogs_third_harmonic[i-m_glogs_init_f0s]),m_glogs_fifth_harmonic[i-m_glogs_init_f0s]);
|
Chris@7
|
1111 //m_glogs[i] -= MAX(m_glogs[i-m_f0_params.num_f0s_per_oct],m_glogs_third_harmonic[i-m_glogs_init_f0s]);
|
Chris@7
|
1112 }
|
Chris@7
|
1113 for (size_t i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
1114 m_glogs[i + i_warp*m_glogs_num_f0s] -= 0.3*m_glogs[i+m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s];
|
Chris@7
|
1115 // Median, sigma $ weights correction
|
Chris@7
|
1116 m_glogs[i + i_warp*m_glogs_num_f0s] = (m_glogs[i + i_warp*m_glogs_num_f0s]-m_glogs_median_correction[i-m_glogs_init_f0s])*m_glogs_sigma_correction[i-m_glogs_init_f0s]*m_glogs_f0_preference_weights[i-m_glogs_init_f0s];
|
Chris@7
|
1117 }
|
Chris@0
|
1118
|
Chris@7
|
1119 // Look for maximum value to determine best direction
|
Chris@7
|
1120 for (size_t i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
1121 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
|
Chris@7
|
1122 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
|
Chris@7
|
1123 ind_max_glogs = i_warp;
|
Chris@7
|
1124 }
|
Chris@7
|
1125 }
|
Chris@7
|
1126 }
|
Chris@0
|
1127
|
Chris@0
|
1128 // ----------------------------------------------------------------------------------------------
|
Chris@0
|
1129
|
Chris@7
|
1130 for (size_t i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) {
|
Chris@0
|
1131 //for (size_t i=0; i<(m_warp_params.nsamps_twarp/2+1); i++) {
|
Chris@7
|
1132 //feature.values.push_back((float)(m_warpings.pos_int[i])+ (float)(m_warpings.pos_frac[i]));
|
Chris@7
|
1133 //feature.values.push_back((float)(phi[i]*100000.0));
|
Chris@7
|
1134 //feature.values.push_back((float)(t_orig[i]));
|
Chris@7
|
1135 //feature.values.push_back((float)(pos1[i]));
|
Chris@7
|
1136 //feature.values.push_back((float)x_warping[i]);
|
Chris@7
|
1137 //feature.values.push_back(m_absFanChirpTransform[i + ind_max_glogs*(m_warp_params.nsamps_twarp/2+1)]);
|
Chris@7
|
1138 //feature.values.push_back((float)m_glogs[i+(long)ind_max_glogs*(long)m_glogs_num_f0s]);
|
Chris@7
|
1139 switch (m_f0gram_mode) {
|
Chris@7
|
1140 case 1:
|
Chris@7
|
1141 max_glogs = -DBL_MAX;
|
Chris@7
|
1142 for (size_t i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
|
Chris@7
|
1143 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
|
Chris@7
|
1144 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
|
Chris@7
|
1145 ind_max_glogs = i_warp;
|
Chris@7
|
1146 }
|
Chris@7
|
1147 }
|
Chris@7
|
1148 feature.values.push_back((float)max_glogs);
|
Chris@7
|
1149 break;
|
Chris@7
|
1150 case 0:
|
Chris@7
|
1151 feature.values.push_back((float)m_glogs[i+(size_t)ind_max_glogs*(size_t)m_glogs_num_f0s]);
|
Chris@7
|
1152 break;
|
Chris@7
|
1153 }
|
Chris@7
|
1154 //feature.values.push_back((float)m_glogs_hf_smoothing_window[i]);
|
Chris@7
|
1155 }
|
Chris@0
|
1156
|
Chris@0
|
1157 // ----------------------------------------------------------------------------------------------
|
Chris@0
|
1158
|
Chris@7
|
1159 fs[0].push_back(feature);
|
Chris@0
|
1160
|
Chris@7
|
1161 #ifdef DEBUG
|
Chris@7
|
1162 printf(" ----------------------------- \n");
|
Chris@7
|
1163 #endif
|
Chris@0
|
1164
|
Chris@7
|
1165 return fs;
|
Chris@0
|
1166 //---------------------------------------------------------------------------
|
Chris@0
|
1167
|
Chris@0
|
1168 //return FeatureSet();
|
Chris@0
|
1169 }
|
Chris@0
|
1170
|
Chris@0
|
1171 FChTransformF0gram::FeatureSet
|
Chris@0
|
1172 FChTransformF0gram::getRemainingFeatures() {
|
Chris@0
|
1173 return FeatureSet();
|
Chris@0
|
1174 }
|
Chris@0
|
1175
|
Chris@0
|
1176 void
|
Chris@0
|
1177 FChTransformF0gram::design_time_window() {
|
Chris@0
|
1178
|
Chris@7
|
1179 size_t transitionWidth = (size_t)m_blockSize/128 + 1;;
|
Chris@0
|
1180 m_timeWindow = new double[m_blockSize];
|
Chris@7
|
1181 double *lp_transitionWindow = new double[transitionWidth];
|
Chris@0
|
1182
|
Chris@7
|
1183 //memset(m_timeWindow, 1.0, m_blockSize);
|
Chris@7
|
1184 for (size_t i = 0; i < m_blockSize; i++) {
|
Chris@7
|
1185 m_timeWindow[i] = 1.0;
|
Chris@7
|
1186 }
|
Chris@0
|
1187
|
Chris@7
|
1188 for (size_t i = 0; i < transitionWidth; i++) {
|
Chris@0
|
1189 lp_transitionWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)transitionWidth+1.0)));
|
Chris@0
|
1190 }
|
Chris@0
|
1191
|
Chris@7
|
1192 for (size_t i = 0; i < transitionWidth/2; i++) {
|
Chris@7
|
1193 m_timeWindow[i] = lp_transitionWindow[i];
|
Chris@7
|
1194 m_timeWindow[m_blockSize-1-i] = lp_transitionWindow[transitionWidth-1-i];
|
Chris@7
|
1195 }
|
Chris@0
|
1196
|
Chris@7
|
1197 #ifdef DEBUG
|
Chris@7
|
1198 for (int i = 0; i < m_blockSize; i++) {
|
Chris@7
|
1199 if ((i<transitionWidth)) {
|
Chris@7
|
1200 printf(" m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]);
|
Chris@7
|
1201 }
|
Chris@7
|
1202 }
|
Chris@7
|
1203 #endif
|
Chris@0
|
1204
|
Chris@7
|
1205 delete [] lp_transitionWindow;
|
Chris@0
|
1206 }
|
Chris@0
|
1207
|