Chris@0
|
1 /*
|
Chris@0
|
2 copyright (C) 2011 I. Irigaray, M. Rocamora
|
Chris@0
|
3
|
Chris@0
|
4 This program is free software: you can redistribute it and/or modify
|
Chris@0
|
5 it under the terms of the GNU General Public License as published by
|
Chris@0
|
6 the Free Software Foundation, either version 3 of the License, or
|
Chris@0
|
7 (at your option) any later version.
|
Chris@0
|
8
|
Chris@0
|
9 This program is distributed in the hope that it will be useful,
|
Chris@0
|
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Chris@0
|
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Chris@0
|
12 GNU General Public License for more details.
|
Chris@0
|
13
|
Chris@0
|
14 You should have received a copy of the GNU General Public License
|
Chris@0
|
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
|
Chris@7
|
16 */
|
Chris@0
|
17
|
Chris@0
|
18 #include "FChTransformF0gram.h"
|
Chris@0
|
19 #include "FChTransformUtils.h"
|
Chris@0
|
20 #include <math.h>
|
Chris@0
|
21 #include <float.h>
|
Chris@14
|
22
|
Chris@19
|
23 #include <set>
|
Chris@19
|
24
|
Chris@14
|
25 #include "bqvec/Allocators.h"
|
Chris@14
|
26
|
Chris@14
|
27 using namespace breakfastquay;
|
Chris@14
|
28
|
Chris@16
|
29 #define DEBUG
|
Chris@7
|
30
|
Chris@0
|
31 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
Chris@0
|
32
|
Chris@15
|
33 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode,
|
Chris@15
|
34 float inputSampleRate) :
|
Chris@7
|
35 Plugin(inputSampleRate),
|
Chris@15
|
36 m_processingMode(mode),
|
Chris@7
|
37 m_stepSize(0), // We are using 0 for step and block size to indicate "not yet set".
|
Chris@7
|
38 m_blockSize(0) {
|
Chris@0
|
39
|
Chris@0
|
40 m_fs = inputSampleRate;
|
Chris@0
|
41 // max frequency of interest (Hz)
|
Chris@0
|
42 m_fmax = 10000.f;
|
Chris@0
|
43 // warping parameters
|
Chris@12
|
44 m_warp_params.nsamps_twarp = 2048;
|
Chris@0
|
45 m_warp_params.alpha_max = 4;
|
Chris@0
|
46 m_warp_params.num_warps = 21;
|
Chris@0
|
47 m_warp_params.fact_over_samp = 2;
|
Chris@0
|
48 m_warp_params.alpha_dist = 0;
|
Chris@0
|
49 // f0 parameters
|
Chris@0
|
50 m_f0_params.f0min = 80.0;
|
Chris@0
|
51 m_f0_params.num_octs = 4;
|
Chris@0
|
52 m_f0_params.num_f0s_per_oct = 192;
|
Chris@0
|
53 m_f0_params.num_f0_hyps = 5;
|
Chris@0
|
54 m_f0_params.prefer = true;
|
Chris@0
|
55 m_f0_params.prefer_mean = 60;
|
Chris@0
|
56 m_f0_params.prefer_stdev = 18;
|
Chris@0
|
57 // glogs parameters
|
Chris@0
|
58 m_glogs_params.HP_logS = true;
|
Chris@0
|
59 m_glogs_params.att_subharms = 1;
|
Chris@7
|
60 // display parameters
|
Chris@15
|
61 m_f0gram_mode = BestBinOfAllDirections;
|
Chris@0
|
62
|
Chris@0
|
63 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
|
Chris@0
|
64 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
|
Chris@0
|
65 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
|
Chris@0
|
66
|
Chris@0
|
67 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
|
Chris@0
|
68 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
|
Chris@0
|
69 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
|
Chris@0
|
70
|
Chris@0
|
71 // number of fft points (controls zero-padding)
|
Chris@0
|
72 m_nfft = m_warp_params.nsamps_twarp;
|
Chris@0
|
73 // hop in samples
|
Chris@0
|
74 m_hop = m_warp_params.fact_over_samp * 256;
|
Chris@0
|
75
|
Chris@0
|
76 m_num_f0s = 0;
|
Chris@16
|
77 m_f0s = 0;
|
Chris@0
|
78 }
|
Chris@0
|
79
|
Chris@14
|
80 FChTransformF0gram::~FChTransformF0gram()
|
Chris@14
|
81 {
|
Chris@14
|
82 if (!m_blockSize) {
|
Chris@14
|
83 return; // nothing was allocated
|
Chris@14
|
84 }
|
Chris@14
|
85
|
Chris@14
|
86 deallocate(m_warpings.pos_int);
|
Chris@14
|
87 deallocate(m_warpings.pos_frac);
|
Chris@14
|
88 deallocate(m_warpings.chirp_rates);
|
Chris@14
|
89
|
Chris@14
|
90 clean_LPF();
|
Chris@14
|
91
|
Chris@14
|
92 deallocate(m_timeWindow);
|
Chris@14
|
93
|
Chris@14
|
94 deallocate(mp_HanningWindow);
|
Chris@14
|
95
|
Chris@14
|
96 // Warping
|
Chris@14
|
97 deallocate(x_warping);
|
Chris@14
|
98 delete fft_xwarping;
|
Chris@14
|
99 deallocate(m_absFanChirpTransform);
|
Chris@14
|
100 deallocate(m_auxFanChirpTransform);
|
Chris@14
|
101
|
Chris@14
|
102 // design_GLogS
|
Chris@14
|
103 deallocate(m_glogs_f0);
|
Chris@14
|
104 deallocate(m_glogs);
|
Chris@14
|
105 deallocate(m_glogs_n);
|
Chris@14
|
106 deallocate(m_glogs_index);
|
Chris@14
|
107 deallocate(m_glogs_posint);
|
Chris@14
|
108 deallocate(m_glogs_posfrac);
|
Chris@14
|
109 deallocate(m_glogs_interp);
|
Chris@14
|
110 deallocate(m_glogs_third_harmonic_posint);
|
Chris@14
|
111 deallocate(m_glogs_third_harmonic_posfrac);
|
Chris@14
|
112 deallocate(m_glogs_third_harmonic);
|
Chris@14
|
113 deallocate(m_glogs_fifth_harmonic_posint);
|
Chris@14
|
114 deallocate(m_glogs_fifth_harmonic_posfrac);
|
Chris@14
|
115 deallocate(m_glogs_fifth_harmonic);
|
Chris@14
|
116 deallocate(m_glogs_f0_preference_weights);
|
Chris@14
|
117 deallocate(m_glogs_median_correction);
|
Chris@14
|
118 deallocate(m_glogs_sigma_correction);
|
Chris@16
|
119
|
Chris@16
|
120 deallocate(m_f0s);
|
Chris@0
|
121 }
|
Chris@0
|
122
|
Chris@0
|
123 string
|
Chris@0
|
124 FChTransformF0gram::getIdentifier() const {
|
Chris@15
|
125 switch (m_processingMode) {
|
Chris@15
|
126 case ModeF0Gram: return "fchtransformf0gram";
|
Chris@15
|
127 case ModeSpectrogram: return "fchtransformspectrogram";
|
Chris@15
|
128 case ModeRoughSpectrogram: return "fchtransformrough";
|
Chris@15
|
129 }
|
Chris@17
|
130 throw std::logic_error("unknown mode");
|
Chris@0
|
131 }
|
Chris@0
|
132
|
Chris@0
|
133 string
|
Chris@0
|
134 FChTransformF0gram::getName() const {
|
Chris@15
|
135 switch (m_processingMode) {
|
Chris@15
|
136 case ModeF0Gram: return "Fan Chirp Transform F0gram";
|
Chris@15
|
137 case ModeSpectrogram: return "Fan Chirp Transform Spectrogram";
|
Chris@15
|
138 case ModeRoughSpectrogram: return "Fan Chirp Transform Rough Spectrogram";
|
Chris@15
|
139 }
|
Chris@17
|
140 throw std::logic_error("unknown mode");
|
Chris@0
|
141 }
|
Chris@0
|
142
|
Chris@0
|
143 string
|
Chris@0
|
144 FChTransformF0gram::getDescription() const {
|
Chris@15
|
145 switch (m_processingMode) {
|
Chris@15
|
146 case ModeF0Gram:
|
Chris@15
|
147 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010.";
|
Chris@15
|
148 case ModeSpectrogram:
|
Chris@15
|
149 return "This plug-in produces a spectral representation of the audio using Fan Chirp Transform analysis.";
|
Chris@15
|
150 case ModeRoughSpectrogram:
|
Chris@15
|
151 return "This plug-in produces a more approximate spectral representation of the audio using Fan Chirp Transform analysis.";
|
Chris@15
|
152 }
|
Chris@17
|
153 throw std::logic_error("unknown mode");
|
Chris@0
|
154 }
|
Chris@0
|
155
|
Chris@0
|
156 string
|
Chris@0
|
157 FChTransformF0gram::getMaker() const {
|
Chris@0
|
158 // Your name here
|
Chris@0
|
159 return "Audio Processing Group \n Universidad de la Republica";
|
Chris@0
|
160 }
|
Chris@0
|
161
|
Chris@0
|
162 int
|
Chris@0
|
163 FChTransformF0gram::getPluginVersion() const {
|
Chris@0
|
164 // Increment this each time you release a version that behaves
|
Chris@0
|
165 // differently from the previous one
|
Chris@0
|
166 //
|
Chris@0
|
167 // 0 - initial version from scratch
|
Chris@15
|
168 return 1;
|
Chris@0
|
169 }
|
Chris@0
|
170
|
Chris@0
|
171 string
|
Chris@0
|
172 FChTransformF0gram::getCopyright() const {
|
Chris@0
|
173 // This function is not ideally named. It does not necessarily
|
Chris@0
|
174 // need to say who made the plugin -- getMaker does that -- but it
|
Chris@0
|
175 // should indicate the terms under which it is distributed. For
|
Chris@0
|
176 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@0
|
177 return "copyright (C) 2011 GPL - Audio Processing Group, UdelaR";
|
Chris@0
|
178 }
|
Chris@0
|
179
|
Chris@0
|
180 FChTransformF0gram::InputDomain
|
Chris@0
|
181 FChTransformF0gram::getInputDomain() const {
|
Chris@0
|
182 return TimeDomain;
|
Chris@0
|
183 }
|
Chris@0
|
184
|
Chris@0
|
185 size_t FChTransformF0gram::getPreferredBlockSize() const {
|
Chris@0
|
186 return 8192; // 0 means "I can handle any block size"
|
Chris@0
|
187 }
|
Chris@0
|
188
|
Chris@0
|
189 size_t
|
Chris@0
|
190 FChTransformF0gram::getPreferredStepSize() const {
|
Chris@0
|
191 return 256; // 0 means "anything sensible"; in practice this
|
Chris@0
|
192 // means the same as the block size for TimeDomain
|
Chris@0
|
193 // plugins, or half of it for FrequencyDomain plugins
|
Chris@0
|
194 }
|
Chris@0
|
195
|
Chris@0
|
196 size_t
|
Chris@0
|
197 FChTransformF0gram::getMinChannelCount() const {
|
Chris@0
|
198 return 1;
|
Chris@0
|
199 }
|
Chris@0
|
200
|
Chris@0
|
201 size_t
|
Chris@0
|
202 FChTransformF0gram::getMaxChannelCount() const {
|
Chris@0
|
203 return 1;
|
Chris@0
|
204 }
|
Chris@0
|
205
|
Chris@0
|
206 FChTransformF0gram::ParameterList
|
Chris@0
|
207 FChTransformF0gram::getParameterDescriptors() const {
|
Chris@0
|
208 ParameterList list;
|
Chris@0
|
209
|
Chris@0
|
210 // If the plugin has no adjustable parameters, return an empty
|
Chris@0
|
211 // list here (and there's no need to provide implementations of
|
Chris@0
|
212 // getParameter and setParameter in that case either).
|
Chris@0
|
213
|
Chris@0
|
214 // Note that it is your responsibility to make sure the parameters
|
Chris@0
|
215 // start off having their default values (e.g. in the constructor
|
Chris@0
|
216 // above). The host needs to know the default value so it can do
|
Chris@0
|
217 // things like provide a "reset to default" function, but it will
|
Chris@0
|
218 // not explicitly set your parameters to their defaults for you if
|
Chris@0
|
219 // they have not changed in the mean time.
|
Chris@0
|
220
|
Chris@0
|
221 // ============= WARPING PARAMETERS =============
|
Chris@0
|
222
|
Chris@0
|
223 ParameterDescriptor fmax;
|
Chris@0
|
224 fmax.identifier = "fmax";
|
Chris@0
|
225 fmax.name = "Maximum frequency";
|
Chris@0
|
226 fmax.description = "Maximum frequency of interest for the analysis.";
|
Chris@0
|
227 fmax.unit = "Hz";
|
Chris@0
|
228 fmax.minValue = 2000;
|
Chris@0
|
229 fmax.maxValue = 22050;
|
Chris@0
|
230 fmax.defaultValue = 10000;
|
Chris@0
|
231 fmax.isQuantized = true;
|
Chris@0
|
232 fmax.quantizeStep = 1.0;
|
Chris@0
|
233 list.push_back(fmax);
|
Chris@0
|
234
|
Chris@0
|
235 ParameterDescriptor nsamp;
|
Chris@0
|
236 nsamp.identifier = "nsamp";
|
Chris@0
|
237 nsamp.name = "Number of samples";
|
Chris@0
|
238 nsamp.description = "Number of samples of the time warped frame";
|
Chris@0
|
239 nsamp.unit = "samples";
|
Chris@0
|
240 nsamp.minValue = 128;
|
Chris@0
|
241 nsamp.maxValue = 4096;
|
Chris@0
|
242 nsamp.defaultValue = 2048;
|
Chris@0
|
243 nsamp.isQuantized = true;
|
Chris@0
|
244 nsamp.quantizeStep = 1.0;
|
Chris@0
|
245 list.push_back(nsamp);
|
Chris@0
|
246
|
Chris@0
|
247 ParameterDescriptor nfft;
|
Chris@0
|
248 nfft.identifier = "nfft";
|
Chris@0
|
249 nfft.name = "FFT number of points";
|
Chris@0
|
250 nfft.description = "Number of FFT points (controls zero-padding)";
|
Chris@0
|
251 nfft.unit = "samples";
|
Chris@0
|
252 nfft.minValue = 0;
|
Chris@0
|
253 nfft.maxValue = 4;
|
Chris@0
|
254 nfft.defaultValue = 3;
|
Chris@0
|
255 nfft.isQuantized = true;
|
Chris@0
|
256 nfft.quantizeStep = 1.0;
|
Chris@0
|
257 nfft.valueNames.push_back("256");
|
Chris@0
|
258 nfft.valueNames.push_back("512");
|
Chris@0
|
259 nfft.valueNames.push_back("1024");
|
Chris@0
|
260 nfft.valueNames.push_back("2048");
|
Chris@0
|
261 nfft.valueNames.push_back("4096");
|
Chris@0
|
262 nfft.valueNames.push_back("8192");
|
Chris@0
|
263 list.push_back(nfft);
|
Chris@0
|
264
|
Chris@0
|
265 ParameterDescriptor alpha_max;
|
Chris@0
|
266 alpha_max.identifier = "alpha_max";
|
Chris@0
|
267 alpha_max.name = "Maximum alpha value";
|
Chris@0
|
268 alpha_max.description = "Maximum value for the alpha parameter of the transform.";
|
Chris@0
|
269 alpha_max.unit = "Hz/s";
|
Chris@0
|
270 alpha_max.minValue = -10;
|
Chris@0
|
271 alpha_max.maxValue = 10;
|
Chris@0
|
272 alpha_max.defaultValue = 5;
|
Chris@0
|
273 alpha_max.isQuantized = true;
|
Chris@0
|
274 alpha_max.quantizeStep = 1.0;
|
Chris@0
|
275 list.push_back(alpha_max);
|
Chris@0
|
276
|
Chris@0
|
277 ParameterDescriptor num_warps;
|
Chris@0
|
278 num_warps.identifier = "num_warps";
|
Chris@0
|
279 num_warps.name = "Number of warpings";
|
Chris@0
|
280 num_warps.description = "Number of different warpings in the specified range (must be odd).";
|
Chris@0
|
281 num_warps.unit = "";
|
Chris@0
|
282 num_warps.minValue = 1;
|
Chris@0
|
283 num_warps.maxValue = 101;
|
Chris@0
|
284 num_warps.defaultValue = 21;
|
Chris@0
|
285 num_warps.isQuantized = true;
|
Chris@0
|
286 num_warps.quantizeStep = 2.0;
|
Chris@0
|
287 list.push_back(num_warps);
|
Chris@0
|
288
|
Chris@0
|
289 ParameterDescriptor alpha_dist;
|
Chris@0
|
290 alpha_dist.identifier = "alpha_dist";
|
Chris@0
|
291 alpha_dist.name = "alpha distribution";
|
Chris@0
|
292 alpha_dist.description = "Type of distribution of alpha values (linear or log).";
|
Chris@0
|
293 alpha_dist.unit = "";
|
Chris@0
|
294 alpha_dist.minValue = 0;
|
Chris@0
|
295 alpha_dist.maxValue = 1;
|
Chris@0
|
296 alpha_dist.defaultValue = 1;
|
Chris@0
|
297 alpha_dist.isQuantized = true;
|
Chris@0
|
298 alpha_dist.quantizeStep = 1.0;
|
Chris@0
|
299 // lin (0), log (1)
|
Chris@0
|
300 alpha_dist.valueNames.push_back("lin");
|
Chris@0
|
301 alpha_dist.valueNames.push_back("log");
|
Chris@0
|
302 list.push_back(alpha_dist);
|
Chris@0
|
303
|
Chris@0
|
304 // ============= F0-GRAM PARAMETERS =============
|
Chris@0
|
305
|
Chris@0
|
306 ParameterDescriptor f0min;
|
Chris@0
|
307 f0min.identifier = "f0min";
|
Chris@0
|
308 f0min.name = "min f0";
|
Chris@0
|
309 f0min.description = "Minimum fundamental frequency (f0) value.";
|
Chris@0
|
310 f0min.unit = "Hz";
|
Chris@0
|
311 f0min.minValue = 1;
|
Chris@0
|
312 f0min.maxValue = 500;
|
Chris@0
|
313 f0min.defaultValue = 80;
|
Chris@0
|
314 f0min.isQuantized = true;
|
Chris@0
|
315 f0min.quantizeStep = 1.0;
|
Chris@0
|
316 list.push_back(f0min);
|
Chris@0
|
317
|
Chris@0
|
318 ParameterDescriptor num_octs;
|
Chris@0
|
319 num_octs.identifier = "num_octs";
|
Chris@0
|
320 num_octs.name = "number of octaves";
|
Chris@0
|
321 num_octs.description = "Number of octaves for F0gram computation.";
|
Chris@0
|
322 num_octs.unit = "";
|
Chris@0
|
323 num_octs.minValue = 1;
|
Chris@0
|
324 num_octs.maxValue = 10;
|
Chris@0
|
325 num_octs.defaultValue = 4;
|
Chris@0
|
326 num_octs.isQuantized = true;
|
Chris@0
|
327 num_octs.quantizeStep = 1.0;
|
Chris@0
|
328 list.push_back(num_octs);
|
Chris@0
|
329
|
Chris@0
|
330 ParameterDescriptor num_f0_hyps;
|
Chris@0
|
331 num_f0_hyps.identifier = "num_f0_hyps";
|
Chris@0
|
332 num_f0_hyps.name = "number of f0 hypotesis";
|
Chris@0
|
333 num_f0_hyps.description = "Number of f0 hypotesis to extract.";
|
Chris@0
|
334 num_f0_hyps.unit = "";
|
Chris@0
|
335 num_f0_hyps.minValue = 1;
|
Chris@0
|
336 num_f0_hyps.maxValue = 100;
|
Chris@0
|
337 num_f0_hyps.defaultValue = 10;
|
Chris@0
|
338 num_f0_hyps.isQuantized = true;
|
Chris@0
|
339 num_f0_hyps.quantizeStep = 1.0;
|
Chris@0
|
340 list.push_back(num_f0_hyps);
|
Chris@0
|
341
|
Chris@0
|
342 ParameterDescriptor f0s_per_oct;
|
Chris@0
|
343 f0s_per_oct.identifier = "f0s_per_oct";
|
Chris@0
|
344 f0s_per_oct.name = "f0 values per octave";
|
Chris@0
|
345 f0s_per_oct.description = "Number of f0 values per octave.";
|
Chris@0
|
346 f0s_per_oct.unit = "";
|
Chris@0
|
347 f0s_per_oct.minValue = 12;
|
Chris@0
|
348 f0s_per_oct.maxValue = 768;
|
Chris@0
|
349 f0s_per_oct.defaultValue = 192;
|
Chris@0
|
350 f0s_per_oct.isQuantized = true;
|
Chris@0
|
351 f0s_per_oct.quantizeStep = 1.0;
|
Chris@0
|
352 list.push_back(f0s_per_oct);
|
Chris@0
|
353
|
Chris@0
|
354 ParameterDescriptor f0_prefer_fun;
|
Chris@0
|
355 f0_prefer_fun.identifier = "f0_prefer_fun";
|
Chris@0
|
356 f0_prefer_fun.name = "f0 preference function";
|
Chris@0
|
357 f0_prefer_fun.description = "Whether to use a f0 weighting function.";
|
Chris@0
|
358 f0_prefer_fun.unit = "";
|
Chris@0
|
359 f0_prefer_fun.minValue = 0;
|
Chris@0
|
360 f0_prefer_fun.maxValue = 1;
|
Chris@0
|
361 f0_prefer_fun.defaultValue = 1;
|
Chris@0
|
362 f0_prefer_fun.isQuantized = true;
|
Chris@0
|
363 f0_prefer_fun.quantizeStep = 1.0;
|
Chris@0
|
364 list.push_back(f0_prefer_fun);
|
Chris@0
|
365
|
Chris@0
|
366 ParameterDescriptor f0_prefer_mean;
|
Chris@0
|
367 f0_prefer_mean.identifier = "f0_prefer_mean";
|
Chris@0
|
368 f0_prefer_mean.name = "mean f0 preference function";
|
Chris@0
|
369 f0_prefer_mean.description = "Mean value for f0 weighting function (MIDI number).";
|
Chris@0
|
370 f0_prefer_mean.unit = "";
|
Chris@0
|
371 f0_prefer_mean.minValue = 1;
|
Chris@0
|
372 f0_prefer_mean.maxValue = 127;
|
Chris@0
|
373 f0_prefer_mean.defaultValue = 60;
|
Chris@0
|
374 f0_prefer_mean.isQuantized = true;
|
Chris@0
|
375 f0_prefer_mean.quantizeStep = 1.0;
|
Chris@0
|
376 list.push_back(f0_prefer_mean);
|
Chris@0
|
377
|
Chris@0
|
378 ParameterDescriptor f0_prefer_stdev;
|
Chris@0
|
379 f0_prefer_stdev.identifier = "f0_prefer_stdev";
|
Chris@0
|
380 f0_prefer_stdev.name = "stdev of f0 preference function";
|
Chris@0
|
381 f0_prefer_stdev.description = "Stdev for f0 weighting function (MIDI number).";
|
Chris@0
|
382 f0_prefer_stdev.unit = "";
|
Chris@0
|
383 f0_prefer_stdev.minValue = 1;
|
Chris@0
|
384 f0_prefer_stdev.maxValue = 127;
|
Chris@0
|
385 f0_prefer_stdev.defaultValue = 18;
|
Chris@0
|
386 f0_prefer_stdev.isQuantized = true;
|
Chris@0
|
387 f0_prefer_stdev.quantizeStep = 1.0;
|
Chris@0
|
388 list.push_back(f0_prefer_stdev);
|
Chris@0
|
389
|
Chris@0
|
390 ParameterDescriptor f0gram_mode;
|
Chris@0
|
391 f0gram_mode.identifier = "f0gram_mode";
|
Chris@0
|
392 f0gram_mode.name = "display mode of f0gram";
|
Chris@0
|
393 f0gram_mode.description = "Display all bins of the best direction, or the best bin for each direction.";
|
Chris@0
|
394 f0gram_mode.unit = "";
|
Chris@0
|
395 f0gram_mode.minValue = 0;
|
Chris@0
|
396 f0gram_mode.maxValue = 1;
|
Chris@0
|
397 f0gram_mode.defaultValue = 1;
|
Chris@0
|
398 f0gram_mode.isQuantized = true;
|
Chris@0
|
399 f0gram_mode.quantizeStep = 1.0;
|
Chris@0
|
400 list.push_back(f0gram_mode);
|
Chris@0
|
401
|
Chris@0
|
402 return list;
|
Chris@0
|
403 }
|
Chris@0
|
404
|
Chris@0
|
405 float
|
Chris@0
|
406 FChTransformF0gram::getParameter(string identifier) const {
|
Chris@0
|
407
|
Chris@0
|
408 if (identifier == "fmax") {
|
Chris@0
|
409 return m_fmax;
|
Chris@0
|
410 } else if (identifier == "nsamp") {
|
Chris@0
|
411 return m_warp_params.nsamps_twarp;
|
Chris@0
|
412 } else if (identifier == "alpha_max") {
|
Chris@0
|
413 return m_warp_params.alpha_max;
|
Chris@0
|
414 } else if (identifier == "num_warps") {
|
Chris@0
|
415 return m_warp_params.num_warps;
|
Chris@0
|
416 } else if (identifier == "alpha_dist") {
|
Chris@0
|
417 return m_warp_params.alpha_dist;
|
Chris@0
|
418 } else if (identifier == "nfft") {
|
Chris@0
|
419 return m_nfft;
|
Chris@0
|
420 } else if (identifier == "f0min") {
|
Chris@0
|
421 return m_f0_params.f0min;
|
Chris@0
|
422 } else if (identifier == "num_octs") {
|
Chris@0
|
423 return m_f0_params.num_octs;
|
Chris@0
|
424 } else if (identifier == "f0s_per_oct") {
|
Chris@0
|
425 return m_f0_params.num_f0s_per_oct;
|
Chris@0
|
426 } else if (identifier == "num_f0_hyps") {
|
Chris@0
|
427 return m_f0_params.num_f0_hyps;
|
Chris@0
|
428 } else if (identifier == "f0_prefer_fun") {
|
Chris@0
|
429 return m_f0_params.prefer;
|
Chris@0
|
430 } else if (identifier == "f0_prefer_mean") {
|
Chris@0
|
431 return m_f0_params.prefer_mean;
|
Chris@0
|
432 } else if (identifier == "f0_prefer_stdev") {
|
Chris@0
|
433 return m_f0_params.prefer_stdev;
|
Chris@7
|
434 } else if (identifier == "f0gram_mode") {
|
Chris@15
|
435 return m_f0gram_mode == BestBinOfAllDirections ? 1.0 : 0.0;
|
Chris@0
|
436 } else {
|
Chris@0
|
437 return 0.f;
|
Chris@0
|
438 }
|
Chris@0
|
439
|
Chris@0
|
440 }
|
Chris@0
|
441
|
Chris@15
|
442 void FChTransformF0gram::setParameter(string identifier, float value)
|
Chris@15
|
443 {
|
Chris@0
|
444 if (identifier == "fmax") {
|
Chris@0
|
445 m_fmax = value;
|
Chris@0
|
446 } else if (identifier == "nsamp") {
|
Chris@0
|
447 m_warp_params.nsamps_twarp = value;
|
Chris@0
|
448 } else if (identifier == "alpha_max") {
|
Chris@0
|
449 m_warp_params.alpha_max = value;
|
Chris@0
|
450 } else if (identifier == "num_warps") {
|
Chris@0
|
451 m_warp_params.num_warps = value;
|
Chris@0
|
452 } else if (identifier == "alpha_dist") {
|
Chris@0
|
453 m_warp_params.alpha_dist = value;
|
Chris@0
|
454 } else if (identifier == "nfft") {
|
Chris@0
|
455 m_nfft = value;
|
Chris@0
|
456 } else if (identifier == "f0min") {
|
Chris@0
|
457 m_f0_params.f0min = value;
|
Chris@0
|
458 } else if (identifier == "num_octs") {
|
Chris@0
|
459 m_f0_params.num_octs = value;
|
Chris@0
|
460 } else if (identifier == "f0s_per_oct") {
|
Chris@0
|
461 m_f0_params.num_f0s_per_oct = value;
|
Chris@0
|
462 } else if (identifier == "num_f0_hyps") {
|
Chris@0
|
463 m_f0_params.num_f0_hyps = value;
|
Chris@0
|
464 } else if (identifier == "f0_prefer_fun") {
|
Chris@0
|
465 m_f0_params.prefer = value;
|
Chris@0
|
466 } else if (identifier == "f0_prefer_mean") {
|
Chris@0
|
467 m_f0_params.prefer_mean = value;
|
Chris@0
|
468 } else if (identifier == "f0_prefer_stdev") {
|
Chris@0
|
469 m_f0_params.prefer_stdev = value;
|
Chris@0
|
470 } else if (identifier == "f0gram_mode") {
|
Chris@15
|
471 m_f0gram_mode = (value > 0.5 ?
|
Chris@15
|
472 BestBinOfAllDirections :
|
Chris@15
|
473 AllBinsOfBestDirection);
|
Chris@15
|
474 } else {
|
Chris@15
|
475 cerr << "WARNING: Unknown parameter id \""
|
Chris@15
|
476 << identifier << "\"" << endl;
|
Chris@0
|
477 }
|
Chris@0
|
478 }
|
Chris@0
|
479
|
Chris@0
|
480 FChTransformF0gram::ProgramList
|
Chris@0
|
481 FChTransformF0gram::getPrograms() const {
|
Chris@0
|
482 ProgramList list;
|
Chris@0
|
483 return list;
|
Chris@0
|
484 }
|
Chris@0
|
485
|
Chris@0
|
486 FChTransformF0gram::OutputList
|
Chris@0
|
487 FChTransformF0gram::getOutputDescriptors() const {
|
Chris@0
|
488
|
Chris@0
|
489 OutputList list;
|
Chris@0
|
490
|
Chris@16
|
491 vector<string> labels;
|
Chris@16
|
492 char label[100];
|
Chris@0
|
493
|
Chris@16
|
494 if (m_processingMode == ModeF0Gram) {
|
Chris@16
|
495
|
Chris@16
|
496 /* f0 values of F0gram grid as string values */
|
Chris@16
|
497 for (int i = 0; i < m_num_f0s; ++i) {
|
Chris@16
|
498 sprintf(label, "%4.2f Hz", m_f0s[i]);
|
Chris@16
|
499 labels.push_back(label);
|
Chris@16
|
500 }
|
Chris@16
|
501
|
Chris@16
|
502 /* The F0gram */
|
Chris@16
|
503 OutputDescriptor d;
|
Chris@16
|
504 d.identifier = "f0gram";
|
Chris@19
|
505 d.name = "F0gram";
|
Chris@19
|
506 d.description = "The salience of the different f0s in the signal.";
|
Chris@16
|
507 d.hasFixedBinCount = true;
|
Chris@16
|
508 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
|
Chris@16
|
509 d.binNames = labels;
|
Chris@16
|
510 d.hasKnownExtents = false;
|
Chris@16
|
511 d.isQuantized = false;
|
Chris@16
|
512 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@16
|
513 d.hasDuration = false;
|
Chris@16
|
514 list.push_back(d);
|
Chris@16
|
515
|
Chris@19
|
516 d.identifier = "pitch";
|
Chris@19
|
517 d.name = "Most salient pitch";
|
Chris@19
|
518 d.description = "The most salient f0 in the signal for each time step.";
|
Chris@19
|
519 d.unit = "Hz";
|
Chris@19
|
520 d.hasFixedBinCount = true;
|
Chris@19
|
521 d.binCount = 1;
|
Chris@19
|
522 d.binNames.clear();
|
Chris@19
|
523 d.hasKnownExtents = false;
|
Chris@19
|
524 d.isQuantized = false;
|
Chris@19
|
525 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@19
|
526 d.hasDuration = false;
|
Chris@19
|
527 list.push_back(d);
|
Chris@19
|
528
|
Chris@16
|
529 } else {
|
Chris@16
|
530
|
Chris@16
|
531 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; ++i) {
|
Chris@16
|
532 double freq = i * (m_warpings.fs_warp / m_nfft);
|
Chris@16
|
533 sprintf(label, "%4.2f Hz", freq);
|
Chris@16
|
534 labels.push_back(label);
|
Chris@16
|
535 }
|
Chris@16
|
536
|
Chris@16
|
537 OutputDescriptor d;
|
Chris@16
|
538 d.identifier = "spectrogram";
|
Chris@16
|
539 d.name = "Spectrogram";
|
Chris@16
|
540 d.description = "Time/frequency spectrogram derived from the Fan Chirp Transform output";
|
Chris@16
|
541 d.hasFixedBinCount = true;
|
Chris@16
|
542 d.binCount = m_warp_params.nsamps_twarp/2+1;
|
Chris@16
|
543 d.binNames = labels;
|
Chris@16
|
544 d.hasKnownExtents = false;
|
Chris@16
|
545 d.isQuantized = false;
|
Chris@16
|
546 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@16
|
547 d.hasDuration = false;
|
Chris@16
|
548 list.push_back(d);
|
Chris@0
|
549 }
|
Chris@16
|
550
|
Chris@0
|
551 return list;
|
Chris@0
|
552 }
|
Chris@0
|
553
|
Chris@0
|
554 bool
|
Chris@0
|
555 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) {
|
Chris@0
|
556 if (channels < getMinChannelCount() ||
|
Chris@14
|
557 channels > getMaxChannelCount()) {
|
Chris@14
|
558 return false;
|
Chris@14
|
559 }
|
Chris@0
|
560
|
Chris@0
|
561 // set blockSize and stepSize (but changed below)
|
Chris@0
|
562 m_blockSize = blockSize;
|
Chris@0
|
563 m_stepSize = stepSize;
|
Chris@0
|
564
|
Chris@0
|
565 // WARNING !!!
|
Chris@0
|
566 // these values in fact are determined by the sampling frequency m_fs
|
Chris@0
|
567 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz
|
Chris@0
|
568 //m_blockSize = 4 * m_warp_params.nsamps_twarp;
|
Chris@16
|
569 // m_stepSize = floor(m_hop / m_warp_params.fact_over_samp);
|
Chris@16
|
570
|
Chris@16
|
571 /* design of FChT */
|
Chris@16
|
572 design_FChT();
|
Chris@0
|
573
|
Chris@0
|
574 /* initialise m_glogs_params */
|
Chris@7
|
575 design_GLogS();
|
Chris@0
|
576
|
Chris@7
|
577 design_LPF();
|
Chris@0
|
578
|
Chris@7
|
579 design_time_window();
|
Chris@0
|
580
|
Chris@7
|
581 // Create Hanning window for warped signals
|
Chris@14
|
582 mp_HanningWindow = allocate<double>(m_warp_params.nsamps_twarp);
|
Chris@7
|
583 bool normalize = false;
|
Chris@14
|
584 Utils::hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize);
|
Chris@0
|
585
|
Chris@16
|
586 m_num_f0s = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
|
Chris@16
|
587 m_f0s = allocate<double>(m_num_f0s);
|
Chris@16
|
588 for (int i = 0; i < m_num_f0s; ++i) {
|
Chris@16
|
589 m_f0s[i] = m_glogs_f0[m_glogs_init_f0s + i];
|
Chris@16
|
590 }
|
Chris@16
|
591
|
Chris@0
|
592 return true;
|
Chris@0
|
593 }
|
Chris@0
|
594
|
Chris@0
|
595 void
|
Chris@0
|
596 FChTransformF0gram::design_GLogS() {
|
Chris@0
|
597
|
Chris@7
|
598 // total number & initial quantity of f0s
|
Chris@16
|
599
|
Chris@16
|
600 cerr << "per oct = " << m_f0_params.num_f0s_per_oct << ", octs = " << m_f0_params.num_octs << endl;
|
Chris@10
|
601 m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1;
|
Chris@16
|
602 cerr << "init_f0s = " << m_glogs_init_f0s << endl;
|
Chris@7
|
603 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s;
|
Chris@16
|
604 cerr << "num_f0s = " << m_glogs_num_f0s << endl;
|
Chris@0
|
605
|
Chris@7
|
606 // Initialize arrays
|
Chris@14
|
607 m_glogs_f0 = allocate<double>(m_glogs_num_f0s);
|
Chris@14
|
608 m_glogs = allocate<double>(m_glogs_num_f0s*m_warp_params.num_warps);
|
Chris@14
|
609 m_glogs_n = allocate<int>(m_glogs_num_f0s);
|
Chris@14
|
610 m_glogs_index = allocate<int>(m_glogs_num_f0s);
|
Chris@0
|
611
|
Chris@7
|
612 // Compute f0 values
|
Chris@7
|
613 m_glogs_harmonic_count = 0;
|
Chris@7
|
614 double factor = (double)(m_warp_params.nsamps_twarp/2)/(double)(m_warp_params.nsamps_twarp/2+1);
|
Chris@10
|
615 for (int i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@7
|
616 m_glogs_f0[i] = (m_f0_params.f0min/5.0)*pow(2.0,(double)i/(double)m_f0_params.num_f0s_per_oct);
|
Chris@7
|
617 // for every f0 compute number of partials less or equal than m_fmax.
|
Chris@7
|
618 m_glogs_n[i] = m_fmax*factor/m_glogs_f0[i];
|
Chris@7
|
619 m_glogs_index[i] = m_glogs_harmonic_count;
|
Chris@7
|
620 m_glogs_harmonic_count += m_glogs_n[i];
|
Chris@7
|
621 }
|
Chris@0
|
622
|
Chris@7
|
623 // Initialize arrays for interpolation
|
Chris@14
|
624 m_glogs_posint = allocate<int>(m_glogs_harmonic_count);
|
Chris@14
|
625 m_glogs_posfrac = allocate<double>(m_glogs_harmonic_count);
|
Chris@14
|
626 m_glogs_interp = allocate<double>(m_glogs_harmonic_count);
|
Chris@0
|
627
|
Chris@7
|
628 // Compute int & frac of interpolation positions
|
Chris@10
|
629 int aux_index = 0;
|
Chris@7
|
630 double aux_pos;
|
Chris@10
|
631 for (int i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@10
|
632 for (int j = 1; j <= m_glogs_n[i]; j++) {
|
Chris@18
|
633 aux_pos = ((double)j * m_glogs_f0[i]) * ((double)(m_warp_params.nsamps_twarp))/m_warpings.fs_warp;
|
Chris@10
|
634 m_glogs_posint[aux_index] = (int)aux_pos;
|
Chris@7
|
635 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index];
|
Chris@7
|
636 aux_index++;
|
Chris@7
|
637 }
|
Chris@7
|
638 }
|
Chris@0
|
639
|
Chris@7
|
640 // Third harmonic attenuation
|
Chris@7
|
641 double aux_third_harmonic;
|
Chris@14
|
642 m_glogs_third_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
643 m_glogs_third_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@10
|
644 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
645 aux_third_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(3.0);
|
Chris@10
|
646 m_glogs_third_harmonic_posint[i] = (int)aux_third_harmonic;
|
Chris@7
|
647 m_glogs_third_harmonic_posfrac[i] = aux_third_harmonic - (double)(m_glogs_third_harmonic_posint[i]);
|
Chris@7
|
648 }
|
Chris@14
|
649 m_glogs_third_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@0
|
650
|
Chris@7
|
651 // Fifth harmonic attenuation
|
Chris@7
|
652 double aux_fifth_harmonic;
|
Chris@14
|
653 m_glogs_fifth_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
654 m_glogs_fifth_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@10
|
655 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
656 aux_fifth_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(5.0);
|
Chris@10
|
657 m_glogs_fifth_harmonic_posint[i] = (int)aux_fifth_harmonic;
|
Chris@7
|
658 m_glogs_fifth_harmonic_posfrac[i] = aux_fifth_harmonic - (double)(m_glogs_fifth_harmonic_posint[i]);
|
Chris@7
|
659 }
|
Chris@14
|
660 m_glogs_fifth_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@0
|
661
|
Chris@7
|
662 // Normalization & attenuation windows
|
Chris@14
|
663 m_glogs_f0_preference_weights = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
664 m_glogs_median_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
665 m_glogs_sigma_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
|
Chris@7
|
666 double MIDI_value;
|
Chris@10
|
667 for (int i = 0; i < m_f0_params.num_octs*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
668 MIDI_value = 69.0 + 12.0 * log2(m_glogs_f0[i + m_glogs_init_f0s]/440.0);
|
Chris@7
|
669 m_glogs_f0_preference_weights[i] = 1.0/sqrt(2.0*M_PI*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev)*exp(-(MIDI_value-m_f0_params.prefer_mean)*(MIDI_value-m_f0_params.prefer_mean)/(2.0*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev));
|
Chris@7
|
670 m_glogs_f0_preference_weights[i] = (0.01 + m_glogs_f0_preference_weights[i]) / (1.01);
|
Chris@0
|
671
|
Chris@7
|
672 m_glogs_median_correction[i] = m_glogs_params.median_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.median_poly_coefs[1]*(i+1.0) + m_glogs_params.median_poly_coefs[2];
|
Chris@7
|
673 m_glogs_sigma_correction[i] = 1.0 / (m_glogs_params.sigma_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.sigma_poly_coefs[1]*(i+1.0) + m_glogs_params.sigma_poly_coefs[2]);
|
Chris@7
|
674 }
|
Chris@0
|
675 }
|
Chris@0
|
676
|
Chris@0
|
677 void
|
Chris@0
|
678 FChTransformF0gram::design_FChT() {
|
Chris@0
|
679
|
Chris@0
|
680 /*
|
Chris@0
|
681 * FILES FOR DEBUGGING
|
Chris@0
|
682 */
|
Chris@0
|
683
|
Chris@0
|
684 //ofstream output("output.txt");
|
Chris@0
|
685
|
Chris@0
|
686
|
Chris@0
|
687 /* ============= WARPING DESIGN ============= */
|
Chris@0
|
688
|
Chris@0
|
689 // sampling frequency after oversampling
|
Chris@0
|
690 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs;
|
Chris@0
|
691
|
Chris@0
|
692 // number of samples of the original signal frame
|
Chris@0
|
693 m_warpings.nsamps_torig = 4 * m_warp_params.fact_over_samp * m_warp_params.nsamps_twarp;
|
Chris@0
|
694 // equivalent to: m_warpings.nsamps_torig = m_warp_params.fact_over_samp * m_blockSize;
|
Chris@0
|
695
|
Chris@0
|
696 // time instants of the original signal frame
|
Chris@14
|
697 double *t_orig = allocate<double>(m_warpings.nsamps_torig);
|
Chris@10
|
698 for (int ind = 0; ind < m_warpings.nsamps_torig; ind++) {
|
Chris@0
|
699 t_orig[ind] = ((double)(ind + 1) - (double)m_warpings.nsamps_torig / 2.0) / m_warpings.fs_orig;
|
Chris@0
|
700 }
|
Chris@0
|
701
|
Chris@0
|
702 // linear chirps warping definition as relative frequency deviation
|
Chris@7
|
703 //TODO
|
Chris@14
|
704 double *freq_relative = allocate<double>(m_warpings.nsamps_torig * m_warp_params.num_warps);
|
Chris@0
|
705 define_warps_linear_chirps(freq_relative, t_orig);
|
Chris@0
|
706
|
Chris@0
|
707 // maximum relative frequency deviation
|
Chris@0
|
708 double freq_relative_max = 0;
|
Chris@14
|
709 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
|
Chris@14
|
710 for (int j = 0; j < m_warp_params.num_warps; j++) {
|
Chris@14
|
711 if (freq_relative_max < freq_relative[j * m_warpings.nsamps_torig + i]) {
|
Chris@0
|
712 freq_relative_max = freq_relative[j * m_warpings.nsamps_torig + i];
|
Chris@14
|
713 }
|
Chris@14
|
714 }
|
Chris@14
|
715 }
|
Chris@0
|
716
|
Chris@0
|
717 // sampling frequency of warped signal to be free of aliasing up to fmax
|
Chris@0
|
718 m_warpings.fs_warp = 2 * m_fmax * freq_relative_max;
|
Chris@0
|
719
|
Chris@0
|
720 // time instants of the warped signal frame
|
Chris@14
|
721 double *t_warp = allocate<double>(m_warp_params.nsamps_twarp);
|
Chris@10
|
722 for (int ind = 0; ind < m_warp_params.nsamps_twarp; ind++) {
|
Chris@0
|
723 t_warp[ind] = ((double)((int)(ind + 1)- (int)m_warp_params.nsamps_twarp / 2)) / (double)m_warpings.fs_warp;
|
Chris@0
|
724 }
|
Chris@0
|
725
|
Chris@0
|
726 // design of warpings for efficient interpolation
|
Chris@0
|
727 design_warps(freq_relative, t_orig, t_warp);
|
Chris@0
|
728
|
Chris@0
|
729
|
Chris@0
|
730 /*
|
Chris@0
|
731 * FILES FOR DEBUGGING
|
Chris@0
|
732 */
|
Chris@0
|
733
|
Chris@0
|
734 /*
|
Chris@7
|
735 output << "chirp_rates" << endl;
|
Chris@10
|
736 for (int j = 0; j < m_warp_params.num_warps; j++){
|
Chris@7
|
737 output << m_warpings.chirp_rates[j];
|
Chris@7
|
738 output << " ";
|
Chris@7
|
739 }
|
Chris@7
|
740 output << endl << "freq_relative" << endl;
|
Chris@0
|
741
|
Chris@10
|
742 for (int i = 0; i < m_warpings.nsamps_torig; i++){
|
Chris@10
|
743 for (int j = 0; j < m_warp_params.num_warps; j++){
|
Chris@7
|
744 output << freq_relative[j * m_warpings.nsamps_torig + i];
|
Chris@7
|
745 output << " ";
|
Chris@7
|
746 }
|
Chris@7
|
747 output << endl;
|
Chris@7
|
748 }
|
Chris@0
|
749
|
Chris@7
|
750 output << endl << "t_orig" << endl;
|
Chris@0
|
751
|
Chris@10
|
752 for (int i = 0; i < m_warpings.nsamps_torig; i++){
|
Chris@7
|
753 output << t_orig[i] << endl ;
|
Chris@7
|
754 }
|
Chris@7
|
755 */
|
Chris@0
|
756
|
Chris@14
|
757 deallocate(freq_relative);
|
Chris@14
|
758 deallocate(t_orig);
|
Chris@14
|
759 deallocate(t_warp);
|
Chris@14
|
760
|
Chris@0
|
761 //output.close();
|
Chris@0
|
762
|
Chris@0
|
763 /* ============= FFTW PLAN DESIGN ============= */
|
Chris@7
|
764 // Initialize 2-d array for warped signals
|
Chris@14
|
765 x_warping = allocate<double>(m_warp_params.nsamps_twarp);
|
Chris@14
|
766 m_absFanChirpTransform = allocate<double>(m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1));
|
Chris@14
|
767 m_auxFanChirpTransform = allocate<double>(2 * (m_warp_params.nsamps_twarp/2 + 1));
|
Chris@14
|
768 fft_xwarping = new FFTReal(m_warp_params.nsamps_twarp);
|
Chris@0
|
769 }
|
Chris@0
|
770
|
Chris@0
|
771 void
|
Chris@0
|
772 FChTransformF0gram::design_warps(double * freq_relative, double * t_orig, double * t_warp) {
|
Chris@0
|
773 /* the warping is done by interpolating the original signal in time instants
|
Chris@0
|
774 given by the desired frequency deviation, to do this, the interpolation
|
Chris@0
|
775 instants are stored in a structure as an integer index and a fractional value
|
Chris@0
|
776 hypothesis: sampling frequency at the central point equals the original
|
Chris@7
|
777 */
|
Chris@0
|
778
|
Chris@14
|
779 m_warpings.pos_int = allocate<int>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
|
Chris@14
|
780 m_warpings.pos_frac = allocate<double>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
|
Chris@0
|
781
|
Chris@7
|
782 // vector of phase values
|
Chris@14
|
783 double *phi = allocate<double>(m_warpings.nsamps_torig);
|
Chris@7
|
784 double aux;
|
Chris@0
|
785
|
Chris@7
|
786 // warped positions
|
Chris@14
|
787 double *pos1 = allocate<double>(m_warp_params.nsamps_twarp*m_warp_params.num_warps);
|
Chris@0
|
788
|
Chris@10
|
789 for (int i = 0; i < m_warp_params.num_warps; i++) {
|
Chris@0
|
790
|
Chris@7
|
791 // integration of relative frequency to obtain phase values
|
Chris@14
|
792 Utils::cumtrapz(t_orig, freq_relative + i*(m_warpings.nsamps_torig), m_warpings.nsamps_torig, phi);
|
Chris@0
|
793
|
Chris@7
|
794 // centering of phase values to force original frequency in the middle
|
Chris@7
|
795 aux = phi[m_warpings.nsamps_torig/2];
|
Chris@10
|
796 for (int j = 0; j < m_warpings.nsamps_torig; j++) {
|
Chris@7
|
797 phi[j] -= aux;
|
Chris@7
|
798 } //for
|
Chris@0
|
799
|
Chris@7
|
800 // interpolation of phase values to obtain warped positions
|
Chris@14
|
801 Utils::interp1(phi, t_orig, m_warpings.nsamps_torig, t_warp, pos1 + i*m_warp_params.nsamps_twarp, m_warp_params.nsamps_twarp);
|
Chris@0
|
802 }
|
Chris@0
|
803
|
Chris@0
|
804 // % previous sample index
|
Chris@0
|
805 // pos1_int = uint32(floor(pos1))';
|
Chris@0
|
806 // % integer corresponding to previous sample index in "c"
|
Chris@0
|
807 // warps.pos1_int = (pos1_int - uint32(1));
|
Chris@0
|
808 // % fractional value that defines the warped position
|
Chris@0
|
809 // warps.pos1_frac = (double(pos1)' - double(pos1_int));
|
Chris@0
|
810
|
Chris@10
|
811 for (int j = 0; j < m_warp_params.nsamps_twarp*m_warp_params.num_warps; j++) {
|
Chris@7
|
812 // previous sample index
|
Chris@7
|
813 pos1[j] = pos1[j]*m_warpings.fs_orig + m_warpings.nsamps_torig/2 + 1;
|
Chris@10
|
814 m_warpings.pos_int[j] = (int) pos1[j];
|
Chris@7
|
815 m_warpings.pos_frac[j] = pos1[j] - (double)(m_warpings.pos_int[j]);
|
Chris@7
|
816 } //for
|
Chris@0
|
817
|
Chris@14
|
818 deallocate(phi);
|
Chris@14
|
819 deallocate(pos1);
|
Chris@0
|
820 }
|
Chris@0
|
821
|
Chris@0
|
822 void
|
Chris@0
|
823 FChTransformF0gram::define_warps_linear_chirps(double * freq_relative, double * t_orig) {
|
Chris@0
|
824 /** define warps as relative frequency deviation from original frequency
|
Chris@7
|
825 t_orig : time vector
|
Chris@7
|
826 freq_relative : relative frequency deviations
|
Chris@7
|
827 */
|
Chris@0
|
828 if (m_warp_params.alpha_dist == 0) {
|
Chris@0
|
829
|
Chris@0
|
830 // linear alpha values spacing
|
Chris@14
|
831 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
|
Chris@0
|
832 // WARNING m_warp_params.num_warps must be odd
|
Chris@0
|
833 m_warpings.chirp_rates[0] = -m_warp_params.alpha_max;
|
Chris@0
|
834 double increment = (double) m_warp_params.alpha_max / ((m_warp_params.num_warps - 1) / 2);
|
Chris@0
|
835
|
Chris@10
|
836 for (int ind = 1; ind < m_warp_params.num_warps; ind++) {
|
Chris@0
|
837 m_warpings.chirp_rates[ind] = m_warpings.chirp_rates[ind - 1] + increment;
|
Chris@0
|
838 }
|
Chris@0
|
839 // force zero value
|
Chris@0
|
840 m_warpings.chirp_rates[(int) ((m_warp_params.num_warps - 1) / 2)] = 0;
|
Chris@0
|
841
|
Chris@0
|
842 } else {
|
Chris@0
|
843 // log alpha values spacing
|
Chris@14
|
844 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
|
Chris@0
|
845
|
Chris@0
|
846 // force zero value
|
Chris@0
|
847 int middle_point = (int) ((m_warp_params.num_warps - 1) / 2);
|
Chris@0
|
848 m_warpings.chirp_rates[middle_point] = 0;
|
Chris@0
|
849
|
Chris@0
|
850 double logMax = log10(m_warp_params.alpha_max + 1);
|
Chris@0
|
851 double increment = logMax / ((m_warp_params.num_warps - 1) / 2.0f);
|
Chris@0
|
852 double exponent = 0;
|
Chris@0
|
853
|
Chris@0
|
854 // fill positive values
|
Chris@0
|
855 int ind_log = middle_point;
|
Chris@10
|
856 for (int ind = 0; ind < (m_warp_params.num_warps + 1) / 2; ind++) {
|
Chris@0
|
857 m_warpings.chirp_rates[ind_log] = pow(10, exponent) - 1;
|
Chris@0
|
858 exponent += increment;
|
Chris@0
|
859 ind_log++;
|
Chris@0
|
860 }
|
Chris@0
|
861 // fill negative values
|
Chris@10
|
862 for (int ind = 0; ind < (m_warp_params.num_warps - 1) / 2; ind++) {
|
Chris@0
|
863 m_warpings.chirp_rates[ind] = -m_warpings.chirp_rates[m_warp_params.num_warps - 1 - ind];
|
Chris@0
|
864 }
|
Chris@0
|
865 }
|
Chris@0
|
866
|
Chris@0
|
867 // compute relative frequency deviation
|
Chris@14
|
868 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
|
Chris@14
|
869 for (int j = 0; j < m_warp_params.num_warps; j++) {
|
Chris@0
|
870 freq_relative[j * m_warpings.nsamps_torig + i] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
|
Chris@14
|
871 }
|
Chris@14
|
872 }
|
Chris@0
|
873 }
|
Chris@0
|
874
|
Chris@0
|
875 void
|
Chris@14
|
876 FChTransformF0gram::design_LPF()
|
Chris@14
|
877 {
|
Chris@14
|
878 double *lp_LPFWindow_aux = allocate<double>(m_blockSize/2+1);
|
Chris@14
|
879 mp_LPFWindow = allocate<double>(m_blockSize/2+1);
|
Chris@0
|
880
|
Chris@10
|
881 int i_max = (int) ((2.0*m_fmax/m_fs) * ( (double)m_blockSize / 2.0 + 1.0 ));
|
Chris@10
|
882 for (int i = 0; i < m_blockSize/2+1; i++) {
|
Chris@0
|
883 if (i >= i_max) {
|
Chris@0
|
884 lp_LPFWindow_aux[i] = 0.0;
|
Chris@0
|
885 } else {
|
Chris@0
|
886 lp_LPFWindow_aux[i] = 1.0;
|
Chris@0
|
887 }
|
Chris@0
|
888 }
|
Chris@14
|
889
|
Chris@14
|
890 LPF_time = allocate_and_zero<double>(m_warpings.nsamps_torig);
|
Chris@14
|
891 LPF_frequency = allocate_and_zero<double>(2 * (m_warpings.nsamps_torig/2 + 1));
|
Chris@14
|
892
|
Chris@14
|
893 fft_forward_LPF = new FFTReal(m_blockSize);
|
Chris@14
|
894 fft_inverse_LPF = new FFTReal(m_warpings.nsamps_torig);
|
Chris@0
|
895
|
Chris@10
|
896 int winWidth = 11;
|
Chris@14
|
897 double *lp_hanningWindow = allocate<double>(winWidth);
|
Chris@0
|
898 double accum=0;
|
Chris@10
|
899 for (int i = 0; i < winWidth; i++) {
|
Chris@0
|
900 lp_hanningWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)winWidth+1.0)));
|
Chris@0
|
901 accum+=lp_hanningWindow[i];
|
Chris@0
|
902
|
Chris@0
|
903 }
|
Chris@10
|
904 for (int i = 0; i < winWidth; i++) { //window normalization
|
Chris@0
|
905 lp_hanningWindow[i]=lp_hanningWindow[i]/accum;
|
Chris@0
|
906 }
|
Chris@10
|
907 for (int i = 0; i < m_blockSize/2+1; i++) {
|
Chris@0
|
908 //if (((i-(winWidth-1)/2)<0)||(i+(winWidth-1))/2>m_blockSize/2-1) {//consideramos winWidth impar, si la ventana sale del arreglo se rellena con el valor origianl
|
Chris@7
|
909 if ( (i > (i_max + (winWidth-1)/2)) || (i <= (i_max - (winWidth-1)/2)) ) {
|
Chris@0
|
910 mp_LPFWindow[i]=lp_LPFWindow_aux[i];
|
Chris@0
|
911 } else {
|
Chris@0
|
912 accum=0;
|
Chris@10
|
913 for (int j = -((winWidth-1)/2); j <= (winWidth-1)/2; j++) {
|
Chris@0
|
914 accum+=lp_LPFWindow_aux[i-j]*lp_hanningWindow[j+(winWidth-1)/2];
|
Chris@7
|
915 }
|
Chris@0
|
916 mp_LPFWindow[i]=accum;
|
Chris@0
|
917 }
|
Chris@0
|
918 }
|
Chris@0
|
919
|
Chris@14
|
920 deallocate(lp_LPFWindow_aux);
|
Chris@14
|
921 deallocate(lp_hanningWindow);
|
Chris@0
|
922 }
|
Chris@0
|
923
|
Chris@14
|
924 void FChTransformF0gram::apply_LPF()
|
Chris@14
|
925 {
|
Chris@14
|
926 fft_forward_LPF->forward(LPF_time, LPF_frequency);
|
Chris@14
|
927
|
Chris@10
|
928 for (int i = 0; i < m_blockSize/2+1; i++) {
|
Chris@16
|
929 LPF_frequency[i*2] *= mp_LPFWindow[i];
|
Chris@16
|
930 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i];
|
Chris@0
|
931 }
|
Chris@14
|
932
|
Chris@14
|
933 fft_inverse_LPF->inverse(LPF_frequency, LPF_time);
|
Chris@0
|
934
|
Chris@7
|
935 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame.
|
Chris@7
|
936 // nota: ademÔs de aplicar el LPF, esta función resamplea la señal original.
|
Chris@0
|
937 }
|
Chris@0
|
938
|
Chris@14
|
939 void FChTransformF0gram::clean_LPF()
|
Chris@14
|
940 {
|
Chris@14
|
941 delete fft_forward_LPF;
|
Chris@14
|
942 delete fft_inverse_LPF;
|
Chris@14
|
943 deallocate(LPF_time);
|
Chris@14
|
944 deallocate(LPF_frequency);
|
Chris@14
|
945 deallocate(mp_LPFWindow);
|
Chris@0
|
946 }
|
Chris@0
|
947
|
Chris@14
|
948 void FChTransformF0gram::reset()
|
Chris@14
|
949 {
|
Chris@0
|
950 }
|
Chris@0
|
951
|
Chris@0
|
952 FChTransformF0gram::FeatureSet
|
Chris@5
|
953 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) {
|
Chris@0
|
954
|
Chris@0
|
955 // // Do actual work!
|
Chris@0
|
956 //
|
Chris@0
|
957
|
Chris@7
|
958 /* PSEUDOCĆDIGO:
|
Chris@7
|
959 - Aplicar FFT al frame entero.
|
Chris@7
|
960 - Filtro pasabajos en frecuencia.
|
Chris@7
|
961 - FFT inversa al frame entero.
|
Chris@7
|
962 -----------------------------------------------------------------------------
|
Chris@7
|
963 - Para cada warp: *Si es un espectrograma direccional (un solo warp
|
Chris@7
|
964 => no es para cada warp sino para el elegido)
|
Chris@7
|
965 - Hacer la interpolación con interp1q.
|
Chris@7
|
966 - Aplicar la FFT al frame warpeado.
|
Chris@7
|
967 - (Opcional) GLogS.
|
Chris@7
|
968 - ...
|
Chris@7
|
969 */
|
Chris@0
|
970
|
Chris@0
|
971 //---------------------------------------------------------------------------
|
Chris@7
|
972 FeatureSet fs;
|
Chris@0
|
973
|
Chris@7
|
974 #ifdef DEBUG
|
Chris@16
|
975 fprintf(stderr, "\n ----- DEBUG INFORMATION ----- \n");
|
Chris@16
|
976 fprintf(stderr, " m_fs = %f Hz.\n",m_fs);
|
Chris@16
|
977 fprintf(stderr, " fs_orig = %f Hz.\n",m_warpings.fs_orig);
|
Chris@16
|
978 fprintf(stderr, " fs_warp = %f Hz.\n",m_warpings.fs_warp);
|
Chris@16
|
979 fprintf(stderr, " m_nfft = %d.\n",m_nfft);
|
Chris@16
|
980 fprintf(stderr, " m_blockSize = %d.\n",m_blockSize);
|
Chris@16
|
981 fprintf(stderr, " m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig);
|
Chris@16
|
982 fprintf(stderr, " m_warp_params.num_warps = %d.\n",m_warp_params.num_warps);
|
Chris@16
|
983 fprintf(stderr, " m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count);
|
Chris@7
|
984 #endif
|
Chris@0
|
985
|
Chris@10
|
986 for (int i = 0; i < m_blockSize; i++) {
|
Chris@0
|
987 LPF_time[i] = (double)(inputBuffers[0][i]) * m_timeWindow[i];
|
Chris@16
|
988 LPF_time[m_blockSize+i] = 0.0;
|
Chris@0
|
989 }
|
Chris@0
|
990
|
Chris@0
|
991 // #ifdef DEBUG
|
Chris@16
|
992 // fprintf(stderr, " HASTA ACĆ ANDA!!!\n");
|
Chris@0
|
993 // cout << flush;
|
Chris@0
|
994 // #endif
|
Chris@0
|
995
|
Chris@7
|
996 apply_LPF();
|
Chris@7
|
997 // SeƱal filtrada queda en LPF_time
|
Chris@0
|
998
|
Chris@7
|
999 Feature feature;
|
Chris@0
|
1000 feature.hasTimestamp = false;
|
Chris@0
|
1001
|
Chris@15
|
1002 if (m_processingMode == ModeRoughSpectrogram) {
|
Chris@15
|
1003 feature.values = vector<float>(m_warp_params.nsamps_twarp/2+1, 0.f);
|
Chris@15
|
1004 }
|
Chris@15
|
1005
|
Chris@0
|
1006 // ----------------------------------------------------------------------------------------------
|
Chris@0
|
1007 // Hanning window & FFT for all warp directions
|
Chris@0
|
1008
|
Chris@7
|
1009 double max_glogs = -DBL_MAX;
|
Chris@10
|
1010 int ind_max_glogs = 0;
|
Chris@0
|
1011
|
Chris@10
|
1012 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
|
Chris@16
|
1013
|
Chris@7
|
1014 // Interpolate
|
Chris@14
|
1015 Utils::interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp);
|
Chris@0
|
1016
|
Chris@7
|
1017 // Apply window
|
Chris@10
|
1018 for (int i = 0; i < m_warp_params.nsamps_twarp; i++) {
|
Chris@7
|
1019 x_warping[i] *= mp_HanningWindow[i];
|
Chris@7
|
1020 }
|
Chris@0
|
1021
|
Chris@7
|
1022 // Transform
|
Chris@14
|
1023 fft_xwarping->forward(x_warping, m_auxFanChirpTransform);
|
Chris@0
|
1024
|
Chris@15
|
1025 if (m_processingMode == ModeRoughSpectrogram) {
|
Chris@15
|
1026 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
|
Chris@15
|
1027 double abs = sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]);
|
Chris@15
|
1028 if (abs > feature.values[i]) {
|
Chris@15
|
1029 feature.values[i] = abs;
|
Chris@15
|
1030 }
|
Chris@15
|
1031 }
|
Chris@15
|
1032 continue;
|
Chris@15
|
1033 }
|
Chris@15
|
1034
|
Chris@7
|
1035 // Copy result
|
Chris@7
|
1036 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1);
|
Chris@10
|
1037 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
|
Chris@14
|
1038 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]));
|
Chris@7
|
1039 }
|
Chris@0
|
1040
|
Chris@0
|
1041 // -----------------------------------------------------------------------------------------
|
Chris@0
|
1042 // GLogS
|
Chris@14
|
1043 Utils::interp1q(aux_abs_fcht, m_glogs_posint, m_glogs_posfrac, m_glogs_interp, m_glogs_harmonic_count);
|
Chris@10
|
1044 int glogs_ind = 0;
|
Chris@10
|
1045 for (int i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@7
|
1046 double glogs_accum = 0;
|
Chris@10
|
1047 for (int j = 1; j <= m_glogs_n[i]; j++) {
|
Chris@7
|
1048 glogs_accum += m_glogs_interp[glogs_ind++];
|
Chris@7
|
1049 }
|
Chris@7
|
1050 m_glogs[i + i_warp*m_glogs_num_f0s] = glogs_accum/(double)m_glogs_n[i];
|
Chris@7
|
1051 }
|
Chris@0
|
1052
|
Chris@0
|
1053 // Sub/super harmonic correction
|
Chris@14
|
1054 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_third_harmonic_posint, m_glogs_third_harmonic_posfrac, m_glogs_third_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
1055 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_fifth_harmonic_posint, m_glogs_fifth_harmonic_posfrac, m_glogs_fifth_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@10
|
1056 for (int i = m_glogs_num_f0s-1; i >= m_glogs_init_f0s; i--) {
|
Chris@7
|
1057 m_glogs[i + i_warp*m_glogs_num_f0s] -= MAX(MAX(m_glogs[i-m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s],m_glogs_third_harmonic[i-m_glogs_init_f0s]),m_glogs_fifth_harmonic[i-m_glogs_init_f0s]);
|
Chris@7
|
1058 //m_glogs[i] -= MAX(m_glogs[i-m_f0_params.num_f0s_per_oct],m_glogs_third_harmonic[i-m_glogs_init_f0s]);
|
Chris@7
|
1059 }
|
Chris@10
|
1060 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
1061 m_glogs[i + i_warp*m_glogs_num_f0s] -= 0.3*m_glogs[i+m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s];
|
Chris@7
|
1062 // Median, sigma $ weights correction
|
Chris@7
|
1063 m_glogs[i + i_warp*m_glogs_num_f0s] = (m_glogs[i + i_warp*m_glogs_num_f0s]-m_glogs_median_correction[i-m_glogs_init_f0s])*m_glogs_sigma_correction[i-m_glogs_init_f0s]*m_glogs_f0_preference_weights[i-m_glogs_init_f0s];
|
Chris@7
|
1064 }
|
Chris@0
|
1065
|
Chris@7
|
1066 // Look for maximum value to determine best direction
|
Chris@10
|
1067 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
1068 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
|
Chris@7
|
1069 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
|
Chris@7
|
1070 ind_max_glogs = i_warp;
|
Chris@7
|
1071 }
|
Chris@7
|
1072 }
|
Chris@7
|
1073 }
|
Chris@0
|
1074
|
Chris@15
|
1075 if (m_processingMode == ModeRoughSpectrogram) {
|
Chris@15
|
1076
|
Chris@15
|
1077 // already accumulated our return values in feature
|
Chris@19
|
1078 fs[0].push_back(feature);
|
Chris@15
|
1079
|
Chris@15
|
1080 } else if (m_processingMode == ModeSpectrogram) {
|
Chris@15
|
1081
|
Chris@15
|
1082 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) {
|
Chris@15
|
1083 feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0);
|
Chris@15
|
1084 }
|
Chris@19
|
1085 fs[0].push_back(feature);
|
Chris@15
|
1086
|
Chris@15
|
1087 } else { // f0gram
|
Chris@15
|
1088
|
Chris@19
|
1089 int bestIndex = -1;
|
Chris@19
|
1090
|
Chris@15
|
1091 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) {
|
Chris@19
|
1092 double value = 0.0;
|
Chris@15
|
1093 switch (m_f0gram_mode) {
|
Chris@15
|
1094 case AllBinsOfBestDirection:
|
Chris@19
|
1095 value = m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s];
|
Chris@15
|
1096 break;
|
Chris@15
|
1097 case BestBinOfAllDirections:
|
Chris@15
|
1098 max_glogs = -DBL_MAX;
|
Chris@15
|
1099 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
|
Chris@15
|
1100 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
|
Chris@15
|
1101 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
|
Chris@15
|
1102 ind_max_glogs = i_warp;
|
Chris@15
|
1103 }
|
Chris@7
|
1104 }
|
Chris@19
|
1105 value = max_glogs;
|
Chris@15
|
1106 break;
|
Chris@7
|
1107 }
|
Chris@19
|
1108 if (bestIndex < 0 || float(value) > feature.values[bestIndex]) {
|
Chris@19
|
1109 bestIndex = int(feature.values.size());
|
Chris@19
|
1110 }
|
Chris@19
|
1111 feature.values.push_back(float(value));
|
Chris@19
|
1112 }
|
Chris@19
|
1113
|
Chris@19
|
1114 fs[0].push_back(feature);
|
Chris@19
|
1115
|
Chris@19
|
1116 if (bestIndex >= 0) {
|
Chris@19
|
1117
|
Chris@19
|
1118 double bestValue = feature.values[bestIndex];
|
Chris@19
|
1119 set<double> ordered(feature.values.begin(), feature.values.end());
|
Chris@19
|
1120 vector<double> flattened(ordered.begin(), ordered.end());
|
Chris@19
|
1121 double median = flattened[flattened.size()/2];
|
Chris@19
|
1122 if (bestValue > median * 8.0) {
|
Chris@19
|
1123 Feature pfeature;
|
Chris@19
|
1124 pfeature.hasTimestamp = false;
|
Chris@19
|
1125 pfeature.values.push_back(m_f0s[bestIndex]);
|
Chris@19
|
1126 fs[1].push_back(pfeature);
|
Chris@19
|
1127 }
|
Chris@7
|
1128 }
|
Chris@7
|
1129 }
|
Chris@0
|
1130
|
Chris@7
|
1131 return fs;
|
Chris@0
|
1132 }
|
Chris@0
|
1133
|
Chris@0
|
1134 FChTransformF0gram::FeatureSet
|
Chris@0
|
1135 FChTransformF0gram::getRemainingFeatures() {
|
Chris@0
|
1136 return FeatureSet();
|
Chris@0
|
1137 }
|
Chris@0
|
1138
|
Chris@0
|
1139 void
|
Chris@0
|
1140 FChTransformF0gram::design_time_window() {
|
Chris@0
|
1141
|
Chris@10
|
1142 int transitionWidth = (int)m_blockSize/128 + 1;;
|
Chris@14
|
1143 m_timeWindow = allocate<double>(m_blockSize);
|
Chris@14
|
1144 double *lp_transitionWindow = allocate<double>(transitionWidth);
|
Chris@0
|
1145
|
Chris@7
|
1146 //memset(m_timeWindow, 1.0, m_blockSize);
|
Chris@10
|
1147 for (int i = 0; i < m_blockSize; i++) {
|
Chris@7
|
1148 m_timeWindow[i] = 1.0;
|
Chris@7
|
1149 }
|
Chris@0
|
1150
|
Chris@10
|
1151 for (int i = 0; i < transitionWidth; i++) {
|
Chris@0
|
1152 lp_transitionWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)transitionWidth+1.0)));
|
Chris@0
|
1153 }
|
Chris@0
|
1154
|
Chris@10
|
1155 for (int i = 0; i < transitionWidth/2; i++) {
|
Chris@7
|
1156 m_timeWindow[i] = lp_transitionWindow[i];
|
Chris@7
|
1157 m_timeWindow[m_blockSize-1-i] = lp_transitionWindow[transitionWidth-1-i];
|
Chris@7
|
1158 }
|
Chris@0
|
1159
|
Chris@7
|
1160 #ifdef DEBUG
|
Chris@7
|
1161 for (int i = 0; i < m_blockSize; i++) {
|
Chris@7
|
1162 if ((i<transitionWidth)) {
|
Chris@16
|
1163 fprintf(stderr, " m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]);
|
Chris@7
|
1164 }
|
Chris@7
|
1165 }
|
Chris@7
|
1166 #endif
|
Chris@0
|
1167
|
Chris@14
|
1168 deallocate(lp_transitionWindow);
|
Chris@0
|
1169 }
|
Chris@0
|
1170
|