Chris@0
|
1 /*
|
Chris@0
|
2 copyright (C) 2011 I. Irigaray, M. Rocamora
|
Chris@0
|
3
|
Chris@0
|
4 This program is free software: you can redistribute it and/or modify
|
Chris@0
|
5 it under the terms of the GNU General Public License as published by
|
Chris@0
|
6 the Free Software Foundation, either version 3 of the License, or
|
Chris@0
|
7 (at your option) any later version.
|
Chris@0
|
8
|
Chris@0
|
9 This program is distributed in the hope that it will be useful,
|
Chris@0
|
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Chris@0
|
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Chris@0
|
12 GNU General Public License for more details.
|
Chris@0
|
13
|
Chris@0
|
14 You should have received a copy of the GNU General Public License
|
Chris@0
|
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
|
Chris@7
|
16 */
|
Chris@0
|
17
|
Chris@0
|
18 #include "FChTransformF0gram.h"
|
Chris@0
|
19 #include "FChTransformUtils.h"
|
Chris@0
|
20 #include <math.h>
|
Chris@0
|
21 #include <float.h>
|
Chris@14
|
22
|
Chris@14
|
23 #include "bqvec/Allocators.h"
|
Chris@14
|
24
|
Chris@14
|
25 using namespace breakfastquay;
|
Chris@14
|
26
|
Chris@16
|
27 #define DEBUG
|
Chris@7
|
28
|
Chris@0
|
29 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
Chris@0
|
30
|
Chris@15
|
31 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode,
|
Chris@15
|
32 float inputSampleRate) :
|
Chris@7
|
33 Plugin(inputSampleRate),
|
Chris@15
|
34 m_processingMode(mode),
|
Chris@7
|
35 m_stepSize(0), // We are using 0 for step and block size to indicate "not yet set".
|
Chris@7
|
36 m_blockSize(0) {
|
Chris@0
|
37
|
Chris@0
|
38 m_fs = inputSampleRate;
|
Chris@0
|
39 // max frequency of interest (Hz)
|
Chris@0
|
40 m_fmax = 10000.f;
|
Chris@0
|
41 // warping parameters
|
Chris@12
|
42 m_warp_params.nsamps_twarp = 2048;
|
Chris@0
|
43 m_warp_params.alpha_max = 4;
|
Chris@0
|
44 m_warp_params.num_warps = 21;
|
Chris@0
|
45 m_warp_params.fact_over_samp = 2;
|
Chris@0
|
46 m_warp_params.alpha_dist = 0;
|
Chris@0
|
47 // f0 parameters
|
Chris@0
|
48 m_f0_params.f0min = 80.0;
|
Chris@0
|
49 m_f0_params.num_octs = 4;
|
Chris@0
|
50 m_f0_params.num_f0s_per_oct = 192;
|
Chris@0
|
51 m_f0_params.num_f0_hyps = 5;
|
Chris@0
|
52 m_f0_params.prefer = true;
|
Chris@0
|
53 m_f0_params.prefer_mean = 60;
|
Chris@0
|
54 m_f0_params.prefer_stdev = 18;
|
Chris@0
|
55 // glogs parameters
|
Chris@0
|
56 m_glogs_params.HP_logS = true;
|
Chris@0
|
57 m_glogs_params.att_subharms = 1;
|
Chris@7
|
58 // display parameters
|
Chris@15
|
59 m_f0gram_mode = BestBinOfAllDirections;
|
Chris@0
|
60
|
Chris@0
|
61 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
|
Chris@0
|
62 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
|
Chris@0
|
63 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
|
Chris@0
|
64
|
Chris@0
|
65 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
|
Chris@0
|
66 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
|
Chris@0
|
67 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
|
Chris@0
|
68
|
Chris@0
|
69 // number of fft points (controls zero-padding)
|
Chris@0
|
70 m_nfft = m_warp_params.nsamps_twarp;
|
Chris@0
|
71 // hop in samples
|
Chris@0
|
72 m_hop = m_warp_params.fact_over_samp * 256;
|
Chris@0
|
73
|
Chris@0
|
74 m_num_f0s = 0;
|
Chris@16
|
75 m_f0s = 0;
|
Chris@0
|
76 }
|
Chris@0
|
77
|
Chris@14
|
78 FChTransformF0gram::~FChTransformF0gram()
|
Chris@14
|
79 {
|
Chris@14
|
80 if (!m_blockSize) {
|
Chris@14
|
81 return; // nothing was allocated
|
Chris@14
|
82 }
|
Chris@14
|
83
|
Chris@14
|
84 deallocate(m_warpings.pos_int);
|
Chris@14
|
85 deallocate(m_warpings.pos_frac);
|
Chris@14
|
86 deallocate(m_warpings.chirp_rates);
|
Chris@14
|
87
|
Chris@14
|
88 clean_LPF();
|
Chris@14
|
89
|
Chris@14
|
90 deallocate(m_timeWindow);
|
Chris@14
|
91
|
Chris@14
|
92 deallocate(mp_HanningWindow);
|
Chris@14
|
93
|
Chris@14
|
94 // Warping
|
Chris@14
|
95 deallocate(x_warping);
|
Chris@14
|
96 delete fft_xwarping;
|
Chris@14
|
97 deallocate(m_absFanChirpTransform);
|
Chris@14
|
98 deallocate(m_auxFanChirpTransform);
|
Chris@14
|
99
|
Chris@14
|
100 // design_GLogS
|
Chris@14
|
101 deallocate(m_glogs_f0);
|
Chris@14
|
102 deallocate(m_glogs);
|
Chris@14
|
103 deallocate(m_glogs_n);
|
Chris@14
|
104 deallocate(m_glogs_index);
|
Chris@14
|
105 deallocate(m_glogs_posint);
|
Chris@14
|
106 deallocate(m_glogs_posfrac);
|
Chris@14
|
107 deallocate(m_glogs_interp);
|
Chris@14
|
108 deallocate(m_glogs_third_harmonic_posint);
|
Chris@14
|
109 deallocate(m_glogs_third_harmonic_posfrac);
|
Chris@14
|
110 deallocate(m_glogs_third_harmonic);
|
Chris@14
|
111 deallocate(m_glogs_fifth_harmonic_posint);
|
Chris@14
|
112 deallocate(m_glogs_fifth_harmonic_posfrac);
|
Chris@14
|
113 deallocate(m_glogs_fifth_harmonic);
|
Chris@14
|
114 deallocate(m_glogs_f0_preference_weights);
|
Chris@14
|
115 deallocate(m_glogs_median_correction);
|
Chris@14
|
116 deallocate(m_glogs_sigma_correction);
|
Chris@16
|
117
|
Chris@16
|
118 deallocate(m_f0s);
|
Chris@0
|
119 }
|
Chris@0
|
120
|
Chris@0
|
121 string
|
Chris@0
|
122 FChTransformF0gram::getIdentifier() const {
|
Chris@15
|
123 switch (m_processingMode) {
|
Chris@15
|
124 case ModeF0Gram: return "fchtransformf0gram";
|
Chris@15
|
125 case ModeSpectrogram: return "fchtransformspectrogram";
|
Chris@15
|
126 case ModeRoughSpectrogram: return "fchtransformrough";
|
Chris@15
|
127 }
|
Chris@17
|
128 throw std::logic_error("unknown mode");
|
Chris@0
|
129 }
|
Chris@0
|
130
|
Chris@0
|
131 string
|
Chris@0
|
132 FChTransformF0gram::getName() const {
|
Chris@15
|
133 switch (m_processingMode) {
|
Chris@15
|
134 case ModeF0Gram: return "Fan Chirp Transform F0gram";
|
Chris@15
|
135 case ModeSpectrogram: return "Fan Chirp Transform Spectrogram";
|
Chris@15
|
136 case ModeRoughSpectrogram: return "Fan Chirp Transform Rough Spectrogram";
|
Chris@15
|
137 }
|
Chris@17
|
138 throw std::logic_error("unknown mode");
|
Chris@0
|
139 }
|
Chris@0
|
140
|
Chris@0
|
141 string
|
Chris@0
|
142 FChTransformF0gram::getDescription() const {
|
Chris@15
|
143 switch (m_processingMode) {
|
Chris@15
|
144 case ModeF0Gram:
|
Chris@15
|
145 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010.";
|
Chris@15
|
146 case ModeSpectrogram:
|
Chris@15
|
147 return "This plug-in produces a spectral representation of the audio using Fan Chirp Transform analysis.";
|
Chris@15
|
148 case ModeRoughSpectrogram:
|
Chris@15
|
149 return "This plug-in produces a more approximate spectral representation of the audio using Fan Chirp Transform analysis.";
|
Chris@15
|
150 }
|
Chris@17
|
151 throw std::logic_error("unknown mode");
|
Chris@0
|
152 }
|
Chris@0
|
153
|
Chris@0
|
154 string
|
Chris@0
|
155 FChTransformF0gram::getMaker() const {
|
Chris@0
|
156 // Your name here
|
Chris@0
|
157 return "Audio Processing Group \n Universidad de la Republica";
|
Chris@0
|
158 }
|
Chris@0
|
159
|
Chris@0
|
160 int
|
Chris@0
|
161 FChTransformF0gram::getPluginVersion() const {
|
Chris@0
|
162 // Increment this each time you release a version that behaves
|
Chris@0
|
163 // differently from the previous one
|
Chris@0
|
164 //
|
Chris@0
|
165 // 0 - initial version from scratch
|
Chris@15
|
166 return 1;
|
Chris@0
|
167 }
|
Chris@0
|
168
|
Chris@0
|
169 string
|
Chris@0
|
170 FChTransformF0gram::getCopyright() const {
|
Chris@0
|
171 // This function is not ideally named. It does not necessarily
|
Chris@0
|
172 // need to say who made the plugin -- getMaker does that -- but it
|
Chris@0
|
173 // should indicate the terms under which it is distributed. For
|
Chris@0
|
174 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@0
|
175 return "copyright (C) 2011 GPL - Audio Processing Group, UdelaR";
|
Chris@0
|
176 }
|
Chris@0
|
177
|
Chris@0
|
178 FChTransformF0gram::InputDomain
|
Chris@0
|
179 FChTransformF0gram::getInputDomain() const {
|
Chris@0
|
180 return TimeDomain;
|
Chris@0
|
181 }
|
Chris@0
|
182
|
Chris@0
|
183 size_t FChTransformF0gram::getPreferredBlockSize() const {
|
Chris@0
|
184 return 8192; // 0 means "I can handle any block size"
|
Chris@0
|
185 }
|
Chris@0
|
186
|
Chris@0
|
187 size_t
|
Chris@0
|
188 FChTransformF0gram::getPreferredStepSize() const {
|
Chris@0
|
189 return 256; // 0 means "anything sensible"; in practice this
|
Chris@0
|
190 // means the same as the block size for TimeDomain
|
Chris@0
|
191 // plugins, or half of it for FrequencyDomain plugins
|
Chris@0
|
192 }
|
Chris@0
|
193
|
Chris@0
|
194 size_t
|
Chris@0
|
195 FChTransformF0gram::getMinChannelCount() const {
|
Chris@0
|
196 return 1;
|
Chris@0
|
197 }
|
Chris@0
|
198
|
Chris@0
|
199 size_t
|
Chris@0
|
200 FChTransformF0gram::getMaxChannelCount() const {
|
Chris@0
|
201 return 1;
|
Chris@0
|
202 }
|
Chris@0
|
203
|
Chris@0
|
204 FChTransformF0gram::ParameterList
|
Chris@0
|
205 FChTransformF0gram::getParameterDescriptors() const {
|
Chris@0
|
206 ParameterList list;
|
Chris@0
|
207
|
Chris@0
|
208 // If the plugin has no adjustable parameters, return an empty
|
Chris@0
|
209 // list here (and there's no need to provide implementations of
|
Chris@0
|
210 // getParameter and setParameter in that case either).
|
Chris@0
|
211
|
Chris@0
|
212 // Note that it is your responsibility to make sure the parameters
|
Chris@0
|
213 // start off having their default values (e.g. in the constructor
|
Chris@0
|
214 // above). The host needs to know the default value so it can do
|
Chris@0
|
215 // things like provide a "reset to default" function, but it will
|
Chris@0
|
216 // not explicitly set your parameters to their defaults for you if
|
Chris@0
|
217 // they have not changed in the mean time.
|
Chris@0
|
218
|
Chris@0
|
219 // ============= WARPING PARAMETERS =============
|
Chris@0
|
220
|
Chris@0
|
221 ParameterDescriptor fmax;
|
Chris@0
|
222 fmax.identifier = "fmax";
|
Chris@0
|
223 fmax.name = "Maximum frequency";
|
Chris@0
|
224 fmax.description = "Maximum frequency of interest for the analysis.";
|
Chris@0
|
225 fmax.unit = "Hz";
|
Chris@0
|
226 fmax.minValue = 2000;
|
Chris@0
|
227 fmax.maxValue = 22050;
|
Chris@0
|
228 fmax.defaultValue = 10000;
|
Chris@0
|
229 fmax.isQuantized = true;
|
Chris@0
|
230 fmax.quantizeStep = 1.0;
|
Chris@0
|
231 list.push_back(fmax);
|
Chris@0
|
232
|
Chris@0
|
233 ParameterDescriptor nsamp;
|
Chris@0
|
234 nsamp.identifier = "nsamp";
|
Chris@0
|
235 nsamp.name = "Number of samples";
|
Chris@0
|
236 nsamp.description = "Number of samples of the time warped frame";
|
Chris@0
|
237 nsamp.unit = "samples";
|
Chris@0
|
238 nsamp.minValue = 128;
|
Chris@0
|
239 nsamp.maxValue = 4096;
|
Chris@0
|
240 nsamp.defaultValue = 2048;
|
Chris@0
|
241 nsamp.isQuantized = true;
|
Chris@0
|
242 nsamp.quantizeStep = 1.0;
|
Chris@0
|
243 list.push_back(nsamp);
|
Chris@0
|
244
|
Chris@0
|
245 ParameterDescriptor nfft;
|
Chris@0
|
246 nfft.identifier = "nfft";
|
Chris@0
|
247 nfft.name = "FFT number of points";
|
Chris@0
|
248 nfft.description = "Number of FFT points (controls zero-padding)";
|
Chris@0
|
249 nfft.unit = "samples";
|
Chris@0
|
250 nfft.minValue = 0;
|
Chris@0
|
251 nfft.maxValue = 4;
|
Chris@0
|
252 nfft.defaultValue = 3;
|
Chris@0
|
253 nfft.isQuantized = true;
|
Chris@0
|
254 nfft.quantizeStep = 1.0;
|
Chris@0
|
255 nfft.valueNames.push_back("256");
|
Chris@0
|
256 nfft.valueNames.push_back("512");
|
Chris@0
|
257 nfft.valueNames.push_back("1024");
|
Chris@0
|
258 nfft.valueNames.push_back("2048");
|
Chris@0
|
259 nfft.valueNames.push_back("4096");
|
Chris@0
|
260 nfft.valueNames.push_back("8192");
|
Chris@0
|
261 list.push_back(nfft);
|
Chris@0
|
262
|
Chris@0
|
263 ParameterDescriptor alpha_max;
|
Chris@0
|
264 alpha_max.identifier = "alpha_max";
|
Chris@0
|
265 alpha_max.name = "Maximum alpha value";
|
Chris@0
|
266 alpha_max.description = "Maximum value for the alpha parameter of the transform.";
|
Chris@0
|
267 alpha_max.unit = "Hz/s";
|
Chris@0
|
268 alpha_max.minValue = -10;
|
Chris@0
|
269 alpha_max.maxValue = 10;
|
Chris@0
|
270 alpha_max.defaultValue = 5;
|
Chris@0
|
271 alpha_max.isQuantized = true;
|
Chris@0
|
272 alpha_max.quantizeStep = 1.0;
|
Chris@0
|
273 list.push_back(alpha_max);
|
Chris@0
|
274
|
Chris@0
|
275 ParameterDescriptor num_warps;
|
Chris@0
|
276 num_warps.identifier = "num_warps";
|
Chris@0
|
277 num_warps.name = "Number of warpings";
|
Chris@0
|
278 num_warps.description = "Number of different warpings in the specified range (must be odd).";
|
Chris@0
|
279 num_warps.unit = "";
|
Chris@0
|
280 num_warps.minValue = 1;
|
Chris@0
|
281 num_warps.maxValue = 101;
|
Chris@0
|
282 num_warps.defaultValue = 21;
|
Chris@0
|
283 num_warps.isQuantized = true;
|
Chris@0
|
284 num_warps.quantizeStep = 2.0;
|
Chris@0
|
285 list.push_back(num_warps);
|
Chris@0
|
286
|
Chris@0
|
287 ParameterDescriptor alpha_dist;
|
Chris@0
|
288 alpha_dist.identifier = "alpha_dist";
|
Chris@0
|
289 alpha_dist.name = "alpha distribution";
|
Chris@0
|
290 alpha_dist.description = "Type of distribution of alpha values (linear or log).";
|
Chris@0
|
291 alpha_dist.unit = "";
|
Chris@0
|
292 alpha_dist.minValue = 0;
|
Chris@0
|
293 alpha_dist.maxValue = 1;
|
Chris@0
|
294 alpha_dist.defaultValue = 1;
|
Chris@0
|
295 alpha_dist.isQuantized = true;
|
Chris@0
|
296 alpha_dist.quantizeStep = 1.0;
|
Chris@0
|
297 // lin (0), log (1)
|
Chris@0
|
298 alpha_dist.valueNames.push_back("lin");
|
Chris@0
|
299 alpha_dist.valueNames.push_back("log");
|
Chris@0
|
300 list.push_back(alpha_dist);
|
Chris@0
|
301
|
Chris@0
|
302 // ============= F0-GRAM PARAMETERS =============
|
Chris@0
|
303
|
Chris@0
|
304 ParameterDescriptor f0min;
|
Chris@0
|
305 f0min.identifier = "f0min";
|
Chris@0
|
306 f0min.name = "min f0";
|
Chris@0
|
307 f0min.description = "Minimum fundamental frequency (f0) value.";
|
Chris@0
|
308 f0min.unit = "Hz";
|
Chris@0
|
309 f0min.minValue = 1;
|
Chris@0
|
310 f0min.maxValue = 500;
|
Chris@0
|
311 f0min.defaultValue = 80;
|
Chris@0
|
312 f0min.isQuantized = true;
|
Chris@0
|
313 f0min.quantizeStep = 1.0;
|
Chris@0
|
314 list.push_back(f0min);
|
Chris@0
|
315
|
Chris@0
|
316 ParameterDescriptor num_octs;
|
Chris@0
|
317 num_octs.identifier = "num_octs";
|
Chris@0
|
318 num_octs.name = "number of octaves";
|
Chris@0
|
319 num_octs.description = "Number of octaves for F0gram computation.";
|
Chris@0
|
320 num_octs.unit = "";
|
Chris@0
|
321 num_octs.minValue = 1;
|
Chris@0
|
322 num_octs.maxValue = 10;
|
Chris@0
|
323 num_octs.defaultValue = 4;
|
Chris@0
|
324 num_octs.isQuantized = true;
|
Chris@0
|
325 num_octs.quantizeStep = 1.0;
|
Chris@0
|
326 list.push_back(num_octs);
|
Chris@0
|
327
|
Chris@0
|
328 ParameterDescriptor num_f0_hyps;
|
Chris@0
|
329 num_f0_hyps.identifier = "num_f0_hyps";
|
Chris@0
|
330 num_f0_hyps.name = "number of f0 hypotesis";
|
Chris@0
|
331 num_f0_hyps.description = "Number of f0 hypotesis to extract.";
|
Chris@0
|
332 num_f0_hyps.unit = "";
|
Chris@0
|
333 num_f0_hyps.minValue = 1;
|
Chris@0
|
334 num_f0_hyps.maxValue = 100;
|
Chris@0
|
335 num_f0_hyps.defaultValue = 10;
|
Chris@0
|
336 num_f0_hyps.isQuantized = true;
|
Chris@0
|
337 num_f0_hyps.quantizeStep = 1.0;
|
Chris@0
|
338 list.push_back(num_f0_hyps);
|
Chris@0
|
339
|
Chris@0
|
340 ParameterDescriptor f0s_per_oct;
|
Chris@0
|
341 f0s_per_oct.identifier = "f0s_per_oct";
|
Chris@0
|
342 f0s_per_oct.name = "f0 values per octave";
|
Chris@0
|
343 f0s_per_oct.description = "Number of f0 values per octave.";
|
Chris@0
|
344 f0s_per_oct.unit = "";
|
Chris@0
|
345 f0s_per_oct.minValue = 12;
|
Chris@0
|
346 f0s_per_oct.maxValue = 768;
|
Chris@0
|
347 f0s_per_oct.defaultValue = 192;
|
Chris@0
|
348 f0s_per_oct.isQuantized = true;
|
Chris@0
|
349 f0s_per_oct.quantizeStep = 1.0;
|
Chris@0
|
350 list.push_back(f0s_per_oct);
|
Chris@0
|
351
|
Chris@0
|
352 ParameterDescriptor f0_prefer_fun;
|
Chris@0
|
353 f0_prefer_fun.identifier = "f0_prefer_fun";
|
Chris@0
|
354 f0_prefer_fun.name = "f0 preference function";
|
Chris@0
|
355 f0_prefer_fun.description = "Whether to use a f0 weighting function.";
|
Chris@0
|
356 f0_prefer_fun.unit = "";
|
Chris@0
|
357 f0_prefer_fun.minValue = 0;
|
Chris@0
|
358 f0_prefer_fun.maxValue = 1;
|
Chris@0
|
359 f0_prefer_fun.defaultValue = 1;
|
Chris@0
|
360 f0_prefer_fun.isQuantized = true;
|
Chris@0
|
361 f0_prefer_fun.quantizeStep = 1.0;
|
Chris@0
|
362 list.push_back(f0_prefer_fun);
|
Chris@0
|
363
|
Chris@0
|
364 ParameterDescriptor f0_prefer_mean;
|
Chris@0
|
365 f0_prefer_mean.identifier = "f0_prefer_mean";
|
Chris@0
|
366 f0_prefer_mean.name = "mean f0 preference function";
|
Chris@0
|
367 f0_prefer_mean.description = "Mean value for f0 weighting function (MIDI number).";
|
Chris@0
|
368 f0_prefer_mean.unit = "";
|
Chris@0
|
369 f0_prefer_mean.minValue = 1;
|
Chris@0
|
370 f0_prefer_mean.maxValue = 127;
|
Chris@0
|
371 f0_prefer_mean.defaultValue = 60;
|
Chris@0
|
372 f0_prefer_mean.isQuantized = true;
|
Chris@0
|
373 f0_prefer_mean.quantizeStep = 1.0;
|
Chris@0
|
374 list.push_back(f0_prefer_mean);
|
Chris@0
|
375
|
Chris@0
|
376 ParameterDescriptor f0_prefer_stdev;
|
Chris@0
|
377 f0_prefer_stdev.identifier = "f0_prefer_stdev";
|
Chris@0
|
378 f0_prefer_stdev.name = "stdev of f0 preference function";
|
Chris@0
|
379 f0_prefer_stdev.description = "Stdev for f0 weighting function (MIDI number).";
|
Chris@0
|
380 f0_prefer_stdev.unit = "";
|
Chris@0
|
381 f0_prefer_stdev.minValue = 1;
|
Chris@0
|
382 f0_prefer_stdev.maxValue = 127;
|
Chris@0
|
383 f0_prefer_stdev.defaultValue = 18;
|
Chris@0
|
384 f0_prefer_stdev.isQuantized = true;
|
Chris@0
|
385 f0_prefer_stdev.quantizeStep = 1.0;
|
Chris@0
|
386 list.push_back(f0_prefer_stdev);
|
Chris@0
|
387
|
Chris@0
|
388 ParameterDescriptor f0gram_mode;
|
Chris@0
|
389 f0gram_mode.identifier = "f0gram_mode";
|
Chris@0
|
390 f0gram_mode.name = "display mode of f0gram";
|
Chris@0
|
391 f0gram_mode.description = "Display all bins of the best direction, or the best bin for each direction.";
|
Chris@0
|
392 f0gram_mode.unit = "";
|
Chris@0
|
393 f0gram_mode.minValue = 0;
|
Chris@0
|
394 f0gram_mode.maxValue = 1;
|
Chris@0
|
395 f0gram_mode.defaultValue = 1;
|
Chris@0
|
396 f0gram_mode.isQuantized = true;
|
Chris@0
|
397 f0gram_mode.quantizeStep = 1.0;
|
Chris@0
|
398 list.push_back(f0gram_mode);
|
Chris@0
|
399
|
Chris@0
|
400 return list;
|
Chris@0
|
401 }
|
Chris@0
|
402
|
Chris@0
|
403 float
|
Chris@0
|
404 FChTransformF0gram::getParameter(string identifier) const {
|
Chris@0
|
405
|
Chris@0
|
406 if (identifier == "fmax") {
|
Chris@0
|
407 return m_fmax;
|
Chris@0
|
408 } else if (identifier == "nsamp") {
|
Chris@0
|
409 return m_warp_params.nsamps_twarp;
|
Chris@0
|
410 } else if (identifier == "alpha_max") {
|
Chris@0
|
411 return m_warp_params.alpha_max;
|
Chris@0
|
412 } else if (identifier == "num_warps") {
|
Chris@0
|
413 return m_warp_params.num_warps;
|
Chris@0
|
414 } else if (identifier == "alpha_dist") {
|
Chris@0
|
415 return m_warp_params.alpha_dist;
|
Chris@0
|
416 } else if (identifier == "nfft") {
|
Chris@0
|
417 return m_nfft;
|
Chris@0
|
418 } else if (identifier == "f0min") {
|
Chris@0
|
419 return m_f0_params.f0min;
|
Chris@0
|
420 } else if (identifier == "num_octs") {
|
Chris@0
|
421 return m_f0_params.num_octs;
|
Chris@0
|
422 } else if (identifier == "f0s_per_oct") {
|
Chris@0
|
423 return m_f0_params.num_f0s_per_oct;
|
Chris@0
|
424 } else if (identifier == "num_f0_hyps") {
|
Chris@0
|
425 return m_f0_params.num_f0_hyps;
|
Chris@0
|
426 } else if (identifier == "f0_prefer_fun") {
|
Chris@0
|
427 return m_f0_params.prefer;
|
Chris@0
|
428 } else if (identifier == "f0_prefer_mean") {
|
Chris@0
|
429 return m_f0_params.prefer_mean;
|
Chris@0
|
430 } else if (identifier == "f0_prefer_stdev") {
|
Chris@0
|
431 return m_f0_params.prefer_stdev;
|
Chris@7
|
432 } else if (identifier == "f0gram_mode") {
|
Chris@15
|
433 return m_f0gram_mode == BestBinOfAllDirections ? 1.0 : 0.0;
|
Chris@0
|
434 } else {
|
Chris@0
|
435 return 0.f;
|
Chris@0
|
436 }
|
Chris@0
|
437
|
Chris@0
|
438 }
|
Chris@0
|
439
|
Chris@15
|
440 void FChTransformF0gram::setParameter(string identifier, float value)
|
Chris@15
|
441 {
|
Chris@0
|
442 if (identifier == "fmax") {
|
Chris@0
|
443 m_fmax = value;
|
Chris@0
|
444 } else if (identifier == "nsamp") {
|
Chris@0
|
445 m_warp_params.nsamps_twarp = value;
|
Chris@0
|
446 } else if (identifier == "alpha_max") {
|
Chris@0
|
447 m_warp_params.alpha_max = value;
|
Chris@0
|
448 } else if (identifier == "num_warps") {
|
Chris@0
|
449 m_warp_params.num_warps = value;
|
Chris@0
|
450 } else if (identifier == "alpha_dist") {
|
Chris@0
|
451 m_warp_params.alpha_dist = value;
|
Chris@0
|
452 } else if (identifier == "nfft") {
|
Chris@0
|
453 m_nfft = value;
|
Chris@0
|
454 } else if (identifier == "f0min") {
|
Chris@0
|
455 m_f0_params.f0min = value;
|
Chris@0
|
456 } else if (identifier == "num_octs") {
|
Chris@0
|
457 m_f0_params.num_octs = value;
|
Chris@0
|
458 } else if (identifier == "f0s_per_oct") {
|
Chris@0
|
459 m_f0_params.num_f0s_per_oct = value;
|
Chris@0
|
460 } else if (identifier == "num_f0_hyps") {
|
Chris@0
|
461 m_f0_params.num_f0_hyps = value;
|
Chris@0
|
462 } else if (identifier == "f0_prefer_fun") {
|
Chris@0
|
463 m_f0_params.prefer = value;
|
Chris@0
|
464 } else if (identifier == "f0_prefer_mean") {
|
Chris@0
|
465 m_f0_params.prefer_mean = value;
|
Chris@0
|
466 } else if (identifier == "f0_prefer_stdev") {
|
Chris@0
|
467 m_f0_params.prefer_stdev = value;
|
Chris@0
|
468 } else if (identifier == "f0gram_mode") {
|
Chris@15
|
469 m_f0gram_mode = (value > 0.5 ?
|
Chris@15
|
470 BestBinOfAllDirections :
|
Chris@15
|
471 AllBinsOfBestDirection);
|
Chris@15
|
472 } else {
|
Chris@15
|
473 cerr << "WARNING: Unknown parameter id \""
|
Chris@15
|
474 << identifier << "\"" << endl;
|
Chris@0
|
475 }
|
Chris@0
|
476 }
|
Chris@0
|
477
|
Chris@0
|
478 FChTransformF0gram::ProgramList
|
Chris@0
|
479 FChTransformF0gram::getPrograms() const {
|
Chris@0
|
480 ProgramList list;
|
Chris@0
|
481 return list;
|
Chris@0
|
482 }
|
Chris@0
|
483
|
Chris@0
|
484 FChTransformF0gram::OutputList
|
Chris@0
|
485 FChTransformF0gram::getOutputDescriptors() const {
|
Chris@0
|
486
|
Chris@0
|
487 OutputList list;
|
Chris@0
|
488
|
Chris@16
|
489 vector<string> labels;
|
Chris@16
|
490 char label[100];
|
Chris@0
|
491
|
Chris@16
|
492 if (m_processingMode == ModeF0Gram) {
|
Chris@16
|
493
|
Chris@16
|
494 /* f0 values of F0gram grid as string values */
|
Chris@16
|
495 for (int i = 0; i < m_num_f0s; ++i) {
|
Chris@16
|
496 sprintf(label, "%4.2f Hz", m_f0s[i]);
|
Chris@16
|
497 labels.push_back(label);
|
Chris@16
|
498 }
|
Chris@16
|
499
|
Chris@16
|
500 /* The F0gram */
|
Chris@16
|
501 OutputDescriptor d;
|
Chris@16
|
502 d.identifier = "f0gram";
|
Chris@16
|
503 d.name = "F0gram: salience of f0s";
|
Chris@16
|
504 d.description = "This representation show the salience of the different f0s in the signal.";
|
Chris@16
|
505 d.hasFixedBinCount = true;
|
Chris@16
|
506 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
|
Chris@16
|
507 d.binNames = labels;
|
Chris@16
|
508 d.hasKnownExtents = false;
|
Chris@16
|
509 d.isQuantized = false;
|
Chris@16
|
510 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@16
|
511 d.hasDuration = false;
|
Chris@16
|
512 list.push_back(d);
|
Chris@16
|
513
|
Chris@16
|
514 } else {
|
Chris@16
|
515
|
Chris@16
|
516 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; ++i) {
|
Chris@16
|
517 double freq = i * (m_warpings.fs_warp / m_nfft);
|
Chris@16
|
518 sprintf(label, "%4.2f Hz", freq);
|
Chris@16
|
519 labels.push_back(label);
|
Chris@16
|
520 }
|
Chris@16
|
521
|
Chris@16
|
522 OutputDescriptor d;
|
Chris@16
|
523 d.identifier = "spectrogram";
|
Chris@16
|
524 d.name = "Spectrogram";
|
Chris@16
|
525 d.description = "Time/frequency spectrogram derived from the Fan Chirp Transform output";
|
Chris@16
|
526 d.hasFixedBinCount = true;
|
Chris@16
|
527 d.binCount = m_warp_params.nsamps_twarp/2+1;
|
Chris@16
|
528 d.binNames = labels;
|
Chris@16
|
529 d.hasKnownExtents = false;
|
Chris@16
|
530 d.isQuantized = false;
|
Chris@16
|
531 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@16
|
532 d.hasDuration = false;
|
Chris@16
|
533 list.push_back(d);
|
Chris@0
|
534 }
|
Chris@16
|
535
|
Chris@0
|
536 return list;
|
Chris@0
|
537 }
|
Chris@0
|
538
|
Chris@0
|
539 bool
|
Chris@0
|
540 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) {
|
Chris@0
|
541 if (channels < getMinChannelCount() ||
|
Chris@14
|
542 channels > getMaxChannelCount()) {
|
Chris@14
|
543 return false;
|
Chris@14
|
544 }
|
Chris@0
|
545
|
Chris@0
|
546 // set blockSize and stepSize (but changed below)
|
Chris@0
|
547 m_blockSize = blockSize;
|
Chris@0
|
548 m_stepSize = stepSize;
|
Chris@0
|
549
|
Chris@0
|
550 // WARNING !!!
|
Chris@0
|
551 // these values in fact are determined by the sampling frequency m_fs
|
Chris@0
|
552 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz
|
Chris@0
|
553 //m_blockSize = 4 * m_warp_params.nsamps_twarp;
|
Chris@16
|
554 // m_stepSize = floor(m_hop / m_warp_params.fact_over_samp);
|
Chris@16
|
555
|
Chris@16
|
556 /* design of FChT */
|
Chris@16
|
557 design_FChT();
|
Chris@0
|
558
|
Chris@0
|
559 /* initialise m_glogs_params */
|
Chris@7
|
560 design_GLogS();
|
Chris@0
|
561
|
Chris@7
|
562 design_LPF();
|
Chris@0
|
563
|
Chris@7
|
564 design_time_window();
|
Chris@0
|
565
|
Chris@7
|
566 // Create Hanning window for warped signals
|
Chris@14
|
567 mp_HanningWindow = allocate<double>(m_warp_params.nsamps_twarp);
|
Chris@7
|
568 bool normalize = false;
|
Chris@14
|
569 Utils::hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize);
|
Chris@0
|
570
|
Chris@16
|
571 m_num_f0s = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
|
Chris@16
|
572 m_f0s = allocate<double>(m_num_f0s);
|
Chris@16
|
573 for (int i = 0; i < m_num_f0s; ++i) {
|
Chris@16
|
574 m_f0s[i] = m_glogs_f0[m_glogs_init_f0s + i];
|
Chris@16
|
575 }
|
Chris@16
|
576
|
Chris@0
|
577 return true;
|
Chris@0
|
578 }
|
Chris@0
|
579
|
Chris@0
|
580 void
|
Chris@0
|
581 FChTransformF0gram::design_GLogS() {
|
Chris@0
|
582
|
Chris@7
|
583 // total number & initial quantity of f0s
|
Chris@16
|
584
|
Chris@16
|
585 cerr << "per oct = " << m_f0_params.num_f0s_per_oct << ", octs = " << m_f0_params.num_octs << endl;
|
Chris@10
|
586 m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1;
|
Chris@16
|
587 cerr << "init_f0s = " << m_glogs_init_f0s << endl;
|
Chris@7
|
588 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s;
|
Chris@16
|
589 cerr << "num_f0s = " << m_glogs_num_f0s << endl;
|
Chris@0
|
590
|
Chris@7
|
591 // Initialize arrays
|
Chris@14
|
592 m_glogs_f0 = allocate<double>(m_glogs_num_f0s);
|
Chris@14
|
593 m_glogs = allocate<double>(m_glogs_num_f0s*m_warp_params.num_warps);
|
Chris@14
|
594 m_glogs_n = allocate<int>(m_glogs_num_f0s);
|
Chris@14
|
595 m_glogs_index = allocate<int>(m_glogs_num_f0s);
|
Chris@0
|
596
|
Chris@7
|
597 // Compute f0 values
|
Chris@7
|
598 m_glogs_harmonic_count = 0;
|
Chris@7
|
599 double factor = (double)(m_warp_params.nsamps_twarp/2)/(double)(m_warp_params.nsamps_twarp/2+1);
|
Chris@10
|
600 for (int i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@7
|
601 m_glogs_f0[i] = (m_f0_params.f0min/5.0)*pow(2.0,(double)i/(double)m_f0_params.num_f0s_per_oct);
|
Chris@7
|
602 // for every f0 compute number of partials less or equal than m_fmax.
|
Chris@7
|
603 m_glogs_n[i] = m_fmax*factor/m_glogs_f0[i];
|
Chris@7
|
604 m_glogs_index[i] = m_glogs_harmonic_count;
|
Chris@7
|
605 m_glogs_harmonic_count += m_glogs_n[i];
|
Chris@7
|
606 }
|
Chris@0
|
607
|
Chris@7
|
608 // Initialize arrays for interpolation
|
Chris@14
|
609 m_glogs_posint = allocate<int>(m_glogs_harmonic_count);
|
Chris@14
|
610 m_glogs_posfrac = allocate<double>(m_glogs_harmonic_count);
|
Chris@14
|
611 m_glogs_interp = allocate<double>(m_glogs_harmonic_count);
|
Chris@0
|
612
|
Chris@7
|
613 // Compute int & frac of interpolation positions
|
Chris@10
|
614 int aux_index = 0;
|
Chris@7
|
615 double aux_pos;
|
Chris@10
|
616 for (int i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@10
|
617 for (int j = 1; j <= m_glogs_n[i]; j++) {
|
Chris@7
|
618 // indice en el vector de largo t_warp/2+1 donde el ultimo valor corresponde a f=m_fmax
|
Chris@7
|
619 aux_pos = ((double)j*m_glogs_f0[i])*((double)(m_warp_params.nsamps_twarp/2+1))/m_fmax;
|
Chris@16
|
620 //!!! cerr << "aux_pos = " << aux_pos << endl;
|
Chris@16
|
621 // aux_pos = ((double)j*m_glogs_f0[i])*((double)(m_warp_params.nsamps_twarp/2+1))/m_warpings.fs_warp;
|
Chris@16
|
622 // cerr << "or " << aux_pos << " (as fs_warp = " << m_warpings.fs_warp << ")" << endl;
|
Chris@10
|
623 m_glogs_posint[aux_index] = (int)aux_pos;
|
Chris@7
|
624 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index];
|
Chris@7
|
625 aux_index++;
|
Chris@7
|
626 }
|
Chris@7
|
627 }
|
Chris@0
|
628
|
Chris@7
|
629 // Third harmonic attenuation
|
Chris@7
|
630 double aux_third_harmonic;
|
Chris@14
|
631 m_glogs_third_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
632 m_glogs_third_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@10
|
633 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
634 aux_third_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(3.0);
|
Chris@10
|
635 m_glogs_third_harmonic_posint[i] = (int)aux_third_harmonic;
|
Chris@7
|
636 m_glogs_third_harmonic_posfrac[i] = aux_third_harmonic - (double)(m_glogs_third_harmonic_posint[i]);
|
Chris@7
|
637 }
|
Chris@14
|
638 m_glogs_third_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@0
|
639
|
Chris@7
|
640 // Fifth harmonic attenuation
|
Chris@7
|
641 double aux_fifth_harmonic;
|
Chris@14
|
642 m_glogs_fifth_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
643 m_glogs_fifth_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@10
|
644 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
645 aux_fifth_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(5.0);
|
Chris@10
|
646 m_glogs_fifth_harmonic_posint[i] = (int)aux_fifth_harmonic;
|
Chris@7
|
647 m_glogs_fifth_harmonic_posfrac[i] = aux_fifth_harmonic - (double)(m_glogs_fifth_harmonic_posint[i]);
|
Chris@7
|
648 }
|
Chris@14
|
649 m_glogs_fifth_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@0
|
650
|
Chris@7
|
651 // Normalization & attenuation windows
|
Chris@14
|
652 m_glogs_f0_preference_weights = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
653 m_glogs_median_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
654 m_glogs_sigma_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
|
Chris@7
|
655 double MIDI_value;
|
Chris@10
|
656 for (int i = 0; i < m_f0_params.num_octs*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
657 MIDI_value = 69.0 + 12.0 * log2(m_glogs_f0[i + m_glogs_init_f0s]/440.0);
|
Chris@7
|
658 m_glogs_f0_preference_weights[i] = 1.0/sqrt(2.0*M_PI*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev)*exp(-(MIDI_value-m_f0_params.prefer_mean)*(MIDI_value-m_f0_params.prefer_mean)/(2.0*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev));
|
Chris@7
|
659 m_glogs_f0_preference_weights[i] = (0.01 + m_glogs_f0_preference_weights[i]) / (1.01);
|
Chris@0
|
660
|
Chris@7
|
661 m_glogs_median_correction[i] = m_glogs_params.median_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.median_poly_coefs[1]*(i+1.0) + m_glogs_params.median_poly_coefs[2];
|
Chris@7
|
662 m_glogs_sigma_correction[i] = 1.0 / (m_glogs_params.sigma_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.sigma_poly_coefs[1]*(i+1.0) + m_glogs_params.sigma_poly_coefs[2]);
|
Chris@7
|
663 }
|
Chris@0
|
664 }
|
Chris@0
|
665
|
Chris@0
|
666 void
|
Chris@0
|
667 FChTransformF0gram::design_FChT() {
|
Chris@0
|
668
|
Chris@0
|
669 /*
|
Chris@0
|
670 * FILES FOR DEBUGGING
|
Chris@0
|
671 */
|
Chris@0
|
672
|
Chris@0
|
673 //ofstream output("output.txt");
|
Chris@0
|
674
|
Chris@0
|
675
|
Chris@0
|
676 /* ============= WARPING DESIGN ============= */
|
Chris@0
|
677
|
Chris@0
|
678 // sampling frequency after oversampling
|
Chris@0
|
679 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs;
|
Chris@0
|
680
|
Chris@0
|
681 // number of samples of the original signal frame
|
Chris@0
|
682 m_warpings.nsamps_torig = 4 * m_warp_params.fact_over_samp * m_warp_params.nsamps_twarp;
|
Chris@0
|
683 // equivalent to: m_warpings.nsamps_torig = m_warp_params.fact_over_samp * m_blockSize;
|
Chris@0
|
684
|
Chris@0
|
685 // time instants of the original signal frame
|
Chris@14
|
686 double *t_orig = allocate<double>(m_warpings.nsamps_torig);
|
Chris@10
|
687 for (int ind = 0; ind < m_warpings.nsamps_torig; ind++) {
|
Chris@0
|
688 t_orig[ind] = ((double)(ind + 1) - (double)m_warpings.nsamps_torig / 2.0) / m_warpings.fs_orig;
|
Chris@0
|
689 }
|
Chris@0
|
690
|
Chris@0
|
691 // linear chirps warping definition as relative frequency deviation
|
Chris@7
|
692 //TODO
|
Chris@14
|
693 double *freq_relative = allocate<double>(m_warpings.nsamps_torig * m_warp_params.num_warps);
|
Chris@0
|
694 define_warps_linear_chirps(freq_relative, t_orig);
|
Chris@0
|
695
|
Chris@0
|
696 // maximum relative frequency deviation
|
Chris@0
|
697 double freq_relative_max = 0;
|
Chris@14
|
698 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
|
Chris@14
|
699 for (int j = 0; j < m_warp_params.num_warps; j++) {
|
Chris@14
|
700 if (freq_relative_max < freq_relative[j * m_warpings.nsamps_torig + i]) {
|
Chris@0
|
701 freq_relative_max = freq_relative[j * m_warpings.nsamps_torig + i];
|
Chris@14
|
702 }
|
Chris@14
|
703 }
|
Chris@14
|
704 }
|
Chris@0
|
705
|
Chris@0
|
706 // sampling frequency of warped signal to be free of aliasing up to fmax
|
Chris@0
|
707 m_warpings.fs_warp = 2 * m_fmax * freq_relative_max;
|
Chris@0
|
708
|
Chris@0
|
709 // time instants of the warped signal frame
|
Chris@14
|
710 double *t_warp = allocate<double>(m_warp_params.nsamps_twarp);
|
Chris@10
|
711 for (int ind = 0; ind < m_warp_params.nsamps_twarp; ind++) {
|
Chris@0
|
712 t_warp[ind] = ((double)((int)(ind + 1)- (int)m_warp_params.nsamps_twarp / 2)) / (double)m_warpings.fs_warp;
|
Chris@0
|
713 }
|
Chris@0
|
714
|
Chris@0
|
715 // design of warpings for efficient interpolation
|
Chris@0
|
716 design_warps(freq_relative, t_orig, t_warp);
|
Chris@0
|
717
|
Chris@0
|
718
|
Chris@0
|
719 /*
|
Chris@0
|
720 * FILES FOR DEBUGGING
|
Chris@0
|
721 */
|
Chris@0
|
722
|
Chris@0
|
723 /*
|
Chris@7
|
724 output << "chirp_rates" << endl;
|
Chris@10
|
725 for (int j = 0; j < m_warp_params.num_warps; j++){
|
Chris@7
|
726 output << m_warpings.chirp_rates[j];
|
Chris@7
|
727 output << " ";
|
Chris@7
|
728 }
|
Chris@7
|
729 output << endl << "freq_relative" << endl;
|
Chris@0
|
730
|
Chris@10
|
731 for (int i = 0; i < m_warpings.nsamps_torig; i++){
|
Chris@10
|
732 for (int j = 0; j < m_warp_params.num_warps; j++){
|
Chris@7
|
733 output << freq_relative[j * m_warpings.nsamps_torig + i];
|
Chris@7
|
734 output << " ";
|
Chris@7
|
735 }
|
Chris@7
|
736 output << endl;
|
Chris@7
|
737 }
|
Chris@0
|
738
|
Chris@7
|
739 output << endl << "t_orig" << endl;
|
Chris@0
|
740
|
Chris@10
|
741 for (int i = 0; i < m_warpings.nsamps_torig; i++){
|
Chris@7
|
742 output << t_orig[i] << endl ;
|
Chris@7
|
743 }
|
Chris@7
|
744 */
|
Chris@0
|
745
|
Chris@14
|
746 deallocate(freq_relative);
|
Chris@14
|
747 deallocate(t_orig);
|
Chris@14
|
748 deallocate(t_warp);
|
Chris@14
|
749
|
Chris@0
|
750 //output.close();
|
Chris@0
|
751
|
Chris@0
|
752 /* ============= FFTW PLAN DESIGN ============= */
|
Chris@7
|
753 // Initialize 2-d array for warped signals
|
Chris@14
|
754 x_warping = allocate<double>(m_warp_params.nsamps_twarp);
|
Chris@14
|
755 m_absFanChirpTransform = allocate<double>(m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1));
|
Chris@14
|
756 m_auxFanChirpTransform = allocate<double>(2 * (m_warp_params.nsamps_twarp/2 + 1));
|
Chris@14
|
757 fft_xwarping = new FFTReal(m_warp_params.nsamps_twarp);
|
Chris@0
|
758 }
|
Chris@0
|
759
|
Chris@0
|
760 void
|
Chris@0
|
761 FChTransformF0gram::design_warps(double * freq_relative, double * t_orig, double * t_warp) {
|
Chris@0
|
762 /* the warping is done by interpolating the original signal in time instants
|
Chris@0
|
763 given by the desired frequency deviation, to do this, the interpolation
|
Chris@0
|
764 instants are stored in a structure as an integer index and a fractional value
|
Chris@0
|
765 hypothesis: sampling frequency at the central point equals the original
|
Chris@7
|
766 */
|
Chris@0
|
767
|
Chris@14
|
768 m_warpings.pos_int = allocate<int>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
|
Chris@14
|
769 m_warpings.pos_frac = allocate<double>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
|
Chris@0
|
770
|
Chris@7
|
771 // vector of phase values
|
Chris@14
|
772 double *phi = allocate<double>(m_warpings.nsamps_torig);
|
Chris@7
|
773 double aux;
|
Chris@0
|
774
|
Chris@7
|
775 // warped positions
|
Chris@14
|
776 double *pos1 = allocate<double>(m_warp_params.nsamps_twarp*m_warp_params.num_warps);
|
Chris@0
|
777
|
Chris@10
|
778 for (int i = 0; i < m_warp_params.num_warps; i++) {
|
Chris@0
|
779
|
Chris@7
|
780 // integration of relative frequency to obtain phase values
|
Chris@14
|
781 Utils::cumtrapz(t_orig, freq_relative + i*(m_warpings.nsamps_torig), m_warpings.nsamps_torig, phi);
|
Chris@0
|
782
|
Chris@7
|
783 // centering of phase values to force original frequency in the middle
|
Chris@7
|
784 aux = phi[m_warpings.nsamps_torig/2];
|
Chris@10
|
785 for (int j = 0; j < m_warpings.nsamps_torig; j++) {
|
Chris@7
|
786 phi[j] -= aux;
|
Chris@7
|
787 } //for
|
Chris@0
|
788
|
Chris@7
|
789 // interpolation of phase values to obtain warped positions
|
Chris@14
|
790 Utils::interp1(phi, t_orig, m_warpings.nsamps_torig, t_warp, pos1 + i*m_warp_params.nsamps_twarp, m_warp_params.nsamps_twarp);
|
Chris@0
|
791 }
|
Chris@0
|
792
|
Chris@0
|
793 // % previous sample index
|
Chris@0
|
794 // pos1_int = uint32(floor(pos1))';
|
Chris@0
|
795 // % integer corresponding to previous sample index in "c"
|
Chris@0
|
796 // warps.pos1_int = (pos1_int - uint32(1));
|
Chris@0
|
797 // % fractional value that defines the warped position
|
Chris@0
|
798 // warps.pos1_frac = (double(pos1)' - double(pos1_int));
|
Chris@0
|
799
|
Chris@10
|
800 for (int j = 0; j < m_warp_params.nsamps_twarp*m_warp_params.num_warps; j++) {
|
Chris@7
|
801 // previous sample index
|
Chris@7
|
802 pos1[j] = pos1[j]*m_warpings.fs_orig + m_warpings.nsamps_torig/2 + 1;
|
Chris@10
|
803 m_warpings.pos_int[j] = (int) pos1[j];
|
Chris@7
|
804 m_warpings.pos_frac[j] = pos1[j] - (double)(m_warpings.pos_int[j]);
|
Chris@7
|
805 } //for
|
Chris@0
|
806
|
Chris@14
|
807 deallocate(phi);
|
Chris@14
|
808 deallocate(pos1);
|
Chris@0
|
809 }
|
Chris@0
|
810
|
Chris@0
|
811 void
|
Chris@0
|
812 FChTransformF0gram::define_warps_linear_chirps(double * freq_relative, double * t_orig) {
|
Chris@0
|
813 /** define warps as relative frequency deviation from original frequency
|
Chris@7
|
814 t_orig : time vector
|
Chris@7
|
815 freq_relative : relative frequency deviations
|
Chris@7
|
816 */
|
Chris@0
|
817 if (m_warp_params.alpha_dist == 0) {
|
Chris@0
|
818
|
Chris@0
|
819 // linear alpha values spacing
|
Chris@14
|
820 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
|
Chris@0
|
821 // WARNING m_warp_params.num_warps must be odd
|
Chris@0
|
822 m_warpings.chirp_rates[0] = -m_warp_params.alpha_max;
|
Chris@0
|
823 double increment = (double) m_warp_params.alpha_max / ((m_warp_params.num_warps - 1) / 2);
|
Chris@0
|
824
|
Chris@10
|
825 for (int ind = 1; ind < m_warp_params.num_warps; ind++) {
|
Chris@0
|
826 m_warpings.chirp_rates[ind] = m_warpings.chirp_rates[ind - 1] + increment;
|
Chris@0
|
827 }
|
Chris@0
|
828 // force zero value
|
Chris@0
|
829 m_warpings.chirp_rates[(int) ((m_warp_params.num_warps - 1) / 2)] = 0;
|
Chris@0
|
830
|
Chris@0
|
831 } else {
|
Chris@0
|
832 // log alpha values spacing
|
Chris@14
|
833 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
|
Chris@0
|
834
|
Chris@0
|
835 // force zero value
|
Chris@0
|
836 int middle_point = (int) ((m_warp_params.num_warps - 1) / 2);
|
Chris@0
|
837 m_warpings.chirp_rates[middle_point] = 0;
|
Chris@0
|
838
|
Chris@0
|
839 double logMax = log10(m_warp_params.alpha_max + 1);
|
Chris@0
|
840 double increment = logMax / ((m_warp_params.num_warps - 1) / 2.0f);
|
Chris@0
|
841 double exponent = 0;
|
Chris@0
|
842
|
Chris@0
|
843 // fill positive values
|
Chris@0
|
844 int ind_log = middle_point;
|
Chris@10
|
845 for (int ind = 0; ind < (m_warp_params.num_warps + 1) / 2; ind++) {
|
Chris@0
|
846 m_warpings.chirp_rates[ind_log] = pow(10, exponent) - 1;
|
Chris@0
|
847 exponent += increment;
|
Chris@0
|
848 ind_log++;
|
Chris@0
|
849 }
|
Chris@0
|
850 // fill negative values
|
Chris@10
|
851 for (int ind = 0; ind < (m_warp_params.num_warps - 1) / 2; ind++) {
|
Chris@0
|
852 m_warpings.chirp_rates[ind] = -m_warpings.chirp_rates[m_warp_params.num_warps - 1 - ind];
|
Chris@0
|
853 }
|
Chris@0
|
854 }
|
Chris@0
|
855
|
Chris@0
|
856 // compute relative frequency deviation
|
Chris@14
|
857 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
|
Chris@14
|
858 for (int j = 0; j < m_warp_params.num_warps; j++) {
|
Chris@0
|
859 freq_relative[j * m_warpings.nsamps_torig + i] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
|
Chris@14
|
860 }
|
Chris@14
|
861 }
|
Chris@0
|
862 }
|
Chris@0
|
863
|
Chris@0
|
864 void
|
Chris@14
|
865 FChTransformF0gram::design_LPF()
|
Chris@14
|
866 {
|
Chris@14
|
867 double *lp_LPFWindow_aux = allocate<double>(m_blockSize/2+1);
|
Chris@14
|
868 mp_LPFWindow = allocate<double>(m_blockSize/2+1);
|
Chris@0
|
869
|
Chris@10
|
870 int i_max = (int) ((2.0*m_fmax/m_fs) * ( (double)m_blockSize / 2.0 + 1.0 ));
|
Chris@10
|
871 for (int i = 0; i < m_blockSize/2+1; i++) {
|
Chris@0
|
872 if (i >= i_max) {
|
Chris@0
|
873 lp_LPFWindow_aux[i] = 0.0;
|
Chris@0
|
874 } else {
|
Chris@0
|
875 lp_LPFWindow_aux[i] = 1.0;
|
Chris@0
|
876 }
|
Chris@0
|
877 }
|
Chris@14
|
878
|
Chris@14
|
879 LPF_time = allocate_and_zero<double>(m_warpings.nsamps_torig);
|
Chris@14
|
880 LPF_frequency = allocate_and_zero<double>(2 * (m_warpings.nsamps_torig/2 + 1));
|
Chris@14
|
881
|
Chris@14
|
882 fft_forward_LPF = new FFTReal(m_blockSize);
|
Chris@14
|
883 fft_inverse_LPF = new FFTReal(m_warpings.nsamps_torig);
|
Chris@0
|
884
|
Chris@10
|
885 int winWidth = 11;
|
Chris@14
|
886 double *lp_hanningWindow = allocate<double>(winWidth);
|
Chris@0
|
887 double accum=0;
|
Chris@10
|
888 for (int i = 0; i < winWidth; i++) {
|
Chris@0
|
889 lp_hanningWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)winWidth+1.0)));
|
Chris@0
|
890 accum+=lp_hanningWindow[i];
|
Chris@0
|
891
|
Chris@0
|
892 }
|
Chris@10
|
893 for (int i = 0; i < winWidth; i++) { //window normalization
|
Chris@0
|
894 lp_hanningWindow[i]=lp_hanningWindow[i]/accum;
|
Chris@0
|
895 }
|
Chris@10
|
896 for (int i = 0; i < m_blockSize/2+1; i++) {
|
Chris@0
|
897 //if (((i-(winWidth-1)/2)<0)||(i+(winWidth-1))/2>m_blockSize/2-1) {//consideramos winWidth impar, si la ventana sale del arreglo se rellena con el valor origianl
|
Chris@7
|
898 if ( (i > (i_max + (winWidth-1)/2)) || (i <= (i_max - (winWidth-1)/2)) ) {
|
Chris@0
|
899 mp_LPFWindow[i]=lp_LPFWindow_aux[i];
|
Chris@0
|
900 } else {
|
Chris@0
|
901 accum=0;
|
Chris@10
|
902 for (int j = -((winWidth-1)/2); j <= (winWidth-1)/2; j++) {
|
Chris@0
|
903 accum+=lp_LPFWindow_aux[i-j]*lp_hanningWindow[j+(winWidth-1)/2];
|
Chris@7
|
904 }
|
Chris@0
|
905 mp_LPFWindow[i]=accum;
|
Chris@0
|
906 }
|
Chris@0
|
907 }
|
Chris@0
|
908
|
Chris@14
|
909 deallocate(lp_LPFWindow_aux);
|
Chris@14
|
910 deallocate(lp_hanningWindow);
|
Chris@0
|
911 }
|
Chris@0
|
912
|
Chris@14
|
913 void FChTransformF0gram::apply_LPF()
|
Chris@14
|
914 {
|
Chris@14
|
915 fft_forward_LPF->forward(LPF_time, LPF_frequency);
|
Chris@14
|
916
|
Chris@10
|
917 for (int i = 0; i < m_blockSize/2+1; i++) {
|
Chris@16
|
918 LPF_frequency[i*2] *= mp_LPFWindow[i];
|
Chris@16
|
919 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i];
|
Chris@0
|
920 }
|
Chris@14
|
921
|
Chris@14
|
922 fft_inverse_LPF->inverse(LPF_frequency, LPF_time);
|
Chris@0
|
923
|
Chris@7
|
924 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame.
|
Chris@7
|
925 // nota: además de aplicar el LPF, esta función resamplea la señal original.
|
Chris@0
|
926 }
|
Chris@0
|
927
|
Chris@14
|
928 void FChTransformF0gram::clean_LPF()
|
Chris@14
|
929 {
|
Chris@14
|
930 delete fft_forward_LPF;
|
Chris@14
|
931 delete fft_inverse_LPF;
|
Chris@14
|
932 deallocate(LPF_time);
|
Chris@14
|
933 deallocate(LPF_frequency);
|
Chris@14
|
934 deallocate(mp_LPFWindow);
|
Chris@0
|
935 }
|
Chris@0
|
936
|
Chris@14
|
937 void FChTransformF0gram::reset()
|
Chris@14
|
938 {
|
Chris@0
|
939 }
|
Chris@0
|
940
|
Chris@0
|
941 FChTransformF0gram::FeatureSet
|
Chris@5
|
942 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) {
|
Chris@0
|
943
|
Chris@0
|
944 // // Do actual work!
|
Chris@0
|
945 //
|
Chris@0
|
946
|
Chris@7
|
947 /* PSEUDOCÓDIGO:
|
Chris@7
|
948 - Aplicar FFT al frame entero.
|
Chris@7
|
949 - Filtro pasabajos en frecuencia.
|
Chris@7
|
950 - FFT inversa al frame entero.
|
Chris@7
|
951 -----------------------------------------------------------------------------
|
Chris@7
|
952 - Para cada warp: *Si es un espectrograma direccional (un solo warp
|
Chris@7
|
953 => no es para cada warp sino para el elegido)
|
Chris@7
|
954 - Hacer la interpolación con interp1q.
|
Chris@7
|
955 - Aplicar la FFT al frame warpeado.
|
Chris@7
|
956 - (Opcional) GLogS.
|
Chris@7
|
957 - ...
|
Chris@7
|
958 */
|
Chris@0
|
959
|
Chris@0
|
960 //---------------------------------------------------------------------------
|
Chris@7
|
961 FeatureSet fs;
|
Chris@0
|
962
|
Chris@7
|
963 #ifdef DEBUG
|
Chris@16
|
964 fprintf(stderr, "\n ----- DEBUG INFORMATION ----- \n");
|
Chris@16
|
965 fprintf(stderr, " m_fs = %f Hz.\n",m_fs);
|
Chris@16
|
966 fprintf(stderr, " fs_orig = %f Hz.\n",m_warpings.fs_orig);
|
Chris@16
|
967 fprintf(stderr, " fs_warp = %f Hz.\n",m_warpings.fs_warp);
|
Chris@16
|
968 fprintf(stderr, " m_nfft = %d.\n",m_nfft);
|
Chris@16
|
969 fprintf(stderr, " m_blockSize = %d.\n",m_blockSize);
|
Chris@16
|
970 fprintf(stderr, " m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig);
|
Chris@16
|
971 fprintf(stderr, " m_warp_params.num_warps = %d.\n",m_warp_params.num_warps);
|
Chris@16
|
972 fprintf(stderr, " m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count);
|
Chris@7
|
973 #endif
|
Chris@0
|
974
|
Chris@10
|
975 for (int i = 0; i < m_blockSize; i++) {
|
Chris@0
|
976 LPF_time[i] = (double)(inputBuffers[0][i]) * m_timeWindow[i];
|
Chris@16
|
977 LPF_time[m_blockSize+i] = 0.0;
|
Chris@0
|
978 }
|
Chris@0
|
979
|
Chris@0
|
980 // #ifdef DEBUG
|
Chris@16
|
981 // fprintf(stderr, " HASTA ACÁ ANDA!!!\n");
|
Chris@0
|
982 // cout << flush;
|
Chris@0
|
983 // #endif
|
Chris@0
|
984
|
Chris@7
|
985 apply_LPF();
|
Chris@7
|
986 // Señal filtrada queda en LPF_time
|
Chris@0
|
987
|
Chris@7
|
988 Feature feature;
|
Chris@0
|
989 feature.hasTimestamp = false;
|
Chris@0
|
990
|
Chris@15
|
991 if (m_processingMode == ModeRoughSpectrogram) {
|
Chris@15
|
992 feature.values = vector<float>(m_warp_params.nsamps_twarp/2+1, 0.f);
|
Chris@15
|
993 }
|
Chris@15
|
994
|
Chris@0
|
995 // ----------------------------------------------------------------------------------------------
|
Chris@0
|
996 // Hanning window & FFT for all warp directions
|
Chris@0
|
997
|
Chris@7
|
998 double max_glogs = -DBL_MAX;
|
Chris@10
|
999 int ind_max_glogs = 0;
|
Chris@0
|
1000
|
Chris@10
|
1001 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
|
Chris@16
|
1002
|
Chris@7
|
1003 // Interpolate
|
Chris@14
|
1004 Utils::interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp);
|
Chris@0
|
1005
|
Chris@7
|
1006 // Apply window
|
Chris@10
|
1007 for (int i = 0; i < m_warp_params.nsamps_twarp; i++) {
|
Chris@7
|
1008 x_warping[i] *= mp_HanningWindow[i];
|
Chris@7
|
1009 }
|
Chris@0
|
1010
|
Chris@7
|
1011 // Transform
|
Chris@14
|
1012 fft_xwarping->forward(x_warping, m_auxFanChirpTransform);
|
Chris@0
|
1013
|
Chris@15
|
1014 if (m_processingMode == ModeRoughSpectrogram) {
|
Chris@15
|
1015 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
|
Chris@15
|
1016 double abs = sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]);
|
Chris@15
|
1017 if (abs > feature.values[i]) {
|
Chris@15
|
1018 feature.values[i] = abs;
|
Chris@15
|
1019 }
|
Chris@15
|
1020 }
|
Chris@15
|
1021 continue;
|
Chris@15
|
1022 }
|
Chris@15
|
1023
|
Chris@7
|
1024 // Copy result
|
Chris@7
|
1025 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1);
|
Chris@10
|
1026 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
|
Chris@14
|
1027 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]));
|
Chris@7
|
1028 }
|
Chris@0
|
1029
|
Chris@0
|
1030 // -----------------------------------------------------------------------------------------
|
Chris@0
|
1031 // GLogS
|
Chris@14
|
1032 Utils::interp1q(aux_abs_fcht, m_glogs_posint, m_glogs_posfrac, m_glogs_interp, m_glogs_harmonic_count);
|
Chris@10
|
1033 int glogs_ind = 0;
|
Chris@10
|
1034 for (int i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@7
|
1035 double glogs_accum = 0;
|
Chris@10
|
1036 for (int j = 1; j <= m_glogs_n[i]; j++) {
|
Chris@7
|
1037 glogs_accum += m_glogs_interp[glogs_ind++];
|
Chris@7
|
1038 }
|
Chris@7
|
1039 m_glogs[i + i_warp*m_glogs_num_f0s] = glogs_accum/(double)m_glogs_n[i];
|
Chris@7
|
1040 }
|
Chris@0
|
1041
|
Chris@0
|
1042 // Sub/super harmonic correction
|
Chris@14
|
1043 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_third_harmonic_posint, m_glogs_third_harmonic_posfrac, m_glogs_third_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
1044 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_fifth_harmonic_posint, m_glogs_fifth_harmonic_posfrac, m_glogs_fifth_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@10
|
1045 for (int i = m_glogs_num_f0s-1; i >= m_glogs_init_f0s; i--) {
|
Chris@7
|
1046 m_glogs[i + i_warp*m_glogs_num_f0s] -= MAX(MAX(m_glogs[i-m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s],m_glogs_third_harmonic[i-m_glogs_init_f0s]),m_glogs_fifth_harmonic[i-m_glogs_init_f0s]);
|
Chris@7
|
1047 //m_glogs[i] -= MAX(m_glogs[i-m_f0_params.num_f0s_per_oct],m_glogs_third_harmonic[i-m_glogs_init_f0s]);
|
Chris@7
|
1048 }
|
Chris@10
|
1049 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
1050 m_glogs[i + i_warp*m_glogs_num_f0s] -= 0.3*m_glogs[i+m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s];
|
Chris@7
|
1051 // Median, sigma $ weights correction
|
Chris@7
|
1052 m_glogs[i + i_warp*m_glogs_num_f0s] = (m_glogs[i + i_warp*m_glogs_num_f0s]-m_glogs_median_correction[i-m_glogs_init_f0s])*m_glogs_sigma_correction[i-m_glogs_init_f0s]*m_glogs_f0_preference_weights[i-m_glogs_init_f0s];
|
Chris@7
|
1053 }
|
Chris@0
|
1054
|
Chris@7
|
1055 // Look for maximum value to determine best direction
|
Chris@10
|
1056 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
1057 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
|
Chris@7
|
1058 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
|
Chris@7
|
1059 ind_max_glogs = i_warp;
|
Chris@7
|
1060 }
|
Chris@7
|
1061 }
|
Chris@7
|
1062 }
|
Chris@0
|
1063
|
Chris@15
|
1064 if (m_processingMode == ModeRoughSpectrogram) {
|
Chris@15
|
1065
|
Chris@15
|
1066 // already accumulated our return values in feature
|
Chris@15
|
1067
|
Chris@15
|
1068 } else if (m_processingMode == ModeSpectrogram) {
|
Chris@15
|
1069
|
Chris@15
|
1070 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) {
|
Chris@15
|
1071 feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0);
|
Chris@15
|
1072 }
|
Chris@15
|
1073
|
Chris@15
|
1074 } else { // f0gram
|
Chris@15
|
1075
|
Chris@15
|
1076 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) {
|
Chris@15
|
1077 switch (m_f0gram_mode) {
|
Chris@15
|
1078 case AllBinsOfBestDirection:
|
Chris@15
|
1079 feature.values.push_back((float)m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s]);
|
Chris@15
|
1080 break;
|
Chris@15
|
1081 case BestBinOfAllDirections:
|
Chris@15
|
1082 max_glogs = -DBL_MAX;
|
Chris@15
|
1083 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
|
Chris@15
|
1084 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
|
Chris@15
|
1085 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
|
Chris@15
|
1086 ind_max_glogs = i_warp;
|
Chris@15
|
1087 }
|
Chris@7
|
1088 }
|
Chris@15
|
1089 feature.values.push_back((float)max_glogs);
|
Chris@15
|
1090 break;
|
Chris@7
|
1091 }
|
Chris@7
|
1092 }
|
Chris@7
|
1093 }
|
Chris@0
|
1094
|
Chris@7
|
1095 fs[0].push_back(feature);
|
Chris@7
|
1096 return fs;
|
Chris@0
|
1097 }
|
Chris@0
|
1098
|
Chris@0
|
1099 FChTransformF0gram::FeatureSet
|
Chris@0
|
1100 FChTransformF0gram::getRemainingFeatures() {
|
Chris@0
|
1101 return FeatureSet();
|
Chris@0
|
1102 }
|
Chris@0
|
1103
|
Chris@0
|
1104 void
|
Chris@0
|
1105 FChTransformF0gram::design_time_window() {
|
Chris@0
|
1106
|
Chris@10
|
1107 int transitionWidth = (int)m_blockSize/128 + 1;;
|
Chris@14
|
1108 m_timeWindow = allocate<double>(m_blockSize);
|
Chris@14
|
1109 double *lp_transitionWindow = allocate<double>(transitionWidth);
|
Chris@0
|
1110
|
Chris@7
|
1111 //memset(m_timeWindow, 1.0, m_blockSize);
|
Chris@10
|
1112 for (int i = 0; i < m_blockSize; i++) {
|
Chris@7
|
1113 m_timeWindow[i] = 1.0;
|
Chris@7
|
1114 }
|
Chris@0
|
1115
|
Chris@10
|
1116 for (int i = 0; i < transitionWidth; i++) {
|
Chris@0
|
1117 lp_transitionWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)transitionWidth+1.0)));
|
Chris@0
|
1118 }
|
Chris@0
|
1119
|
Chris@10
|
1120 for (int i = 0; i < transitionWidth/2; i++) {
|
Chris@7
|
1121 m_timeWindow[i] = lp_transitionWindow[i];
|
Chris@7
|
1122 m_timeWindow[m_blockSize-1-i] = lp_transitionWindow[transitionWidth-1-i];
|
Chris@7
|
1123 }
|
Chris@0
|
1124
|
Chris@7
|
1125 #ifdef DEBUG
|
Chris@7
|
1126 for (int i = 0; i < m_blockSize; i++) {
|
Chris@7
|
1127 if ((i<transitionWidth)) {
|
Chris@16
|
1128 fprintf(stderr, " m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]);
|
Chris@7
|
1129 }
|
Chris@7
|
1130 }
|
Chris@7
|
1131 #endif
|
Chris@0
|
1132
|
Chris@14
|
1133 deallocate(lp_transitionWindow);
|
Chris@0
|
1134 }
|
Chris@0
|
1135
|