Chris@0
|
1 /*
|
Chris@0
|
2 copyright (C) 2011 I. Irigaray, M. Rocamora
|
Chris@0
|
3
|
Chris@0
|
4 This program is free software: you can redistribute it and/or modify
|
Chris@0
|
5 it under the terms of the GNU General Public License as published by
|
Chris@0
|
6 the Free Software Foundation, either version 3 of the License, or
|
Chris@0
|
7 (at your option) any later version.
|
Chris@0
|
8
|
Chris@0
|
9 This program is distributed in the hope that it will be useful,
|
Chris@0
|
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Chris@0
|
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Chris@0
|
12 GNU General Public License for more details.
|
Chris@0
|
13
|
Chris@0
|
14 You should have received a copy of the GNU General Public License
|
Chris@0
|
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
|
Chris@7
|
16 */
|
Chris@0
|
17
|
Chris@0
|
18 #include "FChTransformF0gram.h"
|
Chris@0
|
19 #include "FChTransformUtils.h"
|
Chris@0
|
20 #include <math.h>
|
Chris@0
|
21 #include <float.h>
|
Chris@14
|
22
|
Chris@19
|
23 #include <set>
|
Chris@19
|
24
|
Chris@14
|
25 #include "bqvec/Allocators.h"
|
Chris@14
|
26
|
Chris@14
|
27 using namespace breakfastquay;
|
Chris@14
|
28
|
Chris@27
|
29 //#define DEBUG
|
Chris@7
|
30
|
Chris@0
|
31 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
Chris@0
|
32
|
Chris@15
|
33 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode,
|
Chris@15
|
34 float inputSampleRate) :
|
Chris@7
|
35 Plugin(inputSampleRate),
|
Chris@15
|
36 m_processingMode(mode),
|
Chris@20
|
37 m_initialised(false),
|
Chris@20
|
38 m_stepSize(256),
|
Chris@20
|
39 m_blockSize(8192) {
|
Chris@0
|
40
|
Chris@24
|
41 nsamp_options.push_back(256);
|
Chris@24
|
42 nsamp_options.push_back(512);
|
Chris@24
|
43 nsamp_options.push_back(1024);
|
Chris@24
|
44 nsamp_options.push_back(2048);
|
Chris@24
|
45 nsamp_options.push_back(4096);
|
Chris@24
|
46 nsamp_options.push_back(8192);
|
Chris@24
|
47
|
Chris@0
|
48 m_fs = inputSampleRate;
|
Chris@0
|
49 // max frequency of interest (Hz)
|
Chris@0
|
50 m_fmax = 10000.f;
|
Chris@0
|
51 // warping parameters
|
Chris@12
|
52 m_warp_params.nsamps_twarp = 2048;
|
Chris@0
|
53 m_warp_params.alpha_max = 4;
|
Chris@0
|
54 m_warp_params.num_warps = 21;
|
Chris@0
|
55 m_warp_params.fact_over_samp = 2;
|
Chris@0
|
56 m_warp_params.alpha_dist = 0;
|
Chris@0
|
57 // f0 parameters
|
Chris@0
|
58 m_f0_params.f0min = 80.0;
|
Chris@0
|
59 m_f0_params.num_octs = 4;
|
Chris@0
|
60 m_f0_params.num_f0s_per_oct = 192;
|
Chris@0
|
61 m_f0_params.prefer = true;
|
Chris@0
|
62 m_f0_params.prefer_mean = 60;
|
Chris@0
|
63 m_f0_params.prefer_stdev = 18;
|
Chris@0
|
64 // glogs parameters
|
Chris@0
|
65 m_glogs_params.HP_logS = true;
|
Chris@0
|
66 m_glogs_params.att_subharms = 1;
|
Chris@7
|
67 // display parameters
|
Chris@15
|
68 m_f0gram_mode = BestBinOfAllDirections;
|
Chris@0
|
69
|
Chris@0
|
70 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
|
Chris@0
|
71 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
|
Chris@0
|
72 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
|
Chris@0
|
73
|
Chris@0
|
74 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
|
Chris@0
|
75 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
|
Chris@0
|
76 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
|
Chris@0
|
77
|
Chris@0
|
78 m_num_f0s = 0;
|
Chris@16
|
79 m_f0s = 0;
|
Chris@0
|
80 }
|
Chris@0
|
81
|
Chris@14
|
82 FChTransformF0gram::~FChTransformF0gram()
|
Chris@14
|
83 {
|
Chris@20
|
84 if (!m_initialised) {
|
Chris@14
|
85 return; // nothing was allocated
|
Chris@14
|
86 }
|
Chris@14
|
87
|
Chris@20
|
88 deallocate(m_inputBuffer);
|
Chris@20
|
89
|
Chris@14
|
90 deallocate(m_warpings.pos_int);
|
Chris@14
|
91 deallocate(m_warpings.pos_frac);
|
Chris@14
|
92 deallocate(m_warpings.chirp_rates);
|
Chris@14
|
93
|
Chris@14
|
94 clean_LPF();
|
Chris@14
|
95
|
Chris@14
|
96 deallocate(m_timeWindow);
|
Chris@14
|
97
|
Chris@14
|
98 deallocate(mp_HanningWindow);
|
Chris@14
|
99
|
Chris@14
|
100 // Warping
|
Chris@14
|
101 deallocate(x_warping);
|
Chris@14
|
102 delete fft_xwarping;
|
Chris@14
|
103 deallocate(m_absFanChirpTransform);
|
Chris@14
|
104 deallocate(m_auxFanChirpTransform);
|
Chris@14
|
105
|
Chris@14
|
106 // design_GLogS
|
Chris@14
|
107 deallocate(m_glogs_f0);
|
Chris@14
|
108 deallocate(m_glogs);
|
Chris@14
|
109 deallocate(m_glogs_n);
|
Chris@14
|
110 deallocate(m_glogs_index);
|
Chris@14
|
111 deallocate(m_glogs_posint);
|
Chris@14
|
112 deallocate(m_glogs_posfrac);
|
Chris@14
|
113 deallocate(m_glogs_interp);
|
Chris@14
|
114 deallocate(m_glogs_third_harmonic_posint);
|
Chris@14
|
115 deallocate(m_glogs_third_harmonic_posfrac);
|
Chris@14
|
116 deallocate(m_glogs_third_harmonic);
|
Chris@14
|
117 deallocate(m_glogs_fifth_harmonic_posint);
|
Chris@14
|
118 deallocate(m_glogs_fifth_harmonic_posfrac);
|
Chris@14
|
119 deallocate(m_glogs_fifth_harmonic);
|
Chris@14
|
120 deallocate(m_glogs_f0_preference_weights);
|
Chris@14
|
121 deallocate(m_glogs_median_correction);
|
Chris@14
|
122 deallocate(m_glogs_sigma_correction);
|
Chris@16
|
123
|
Chris@16
|
124 deallocate(m_f0s);
|
Chris@0
|
125 }
|
Chris@0
|
126
|
Chris@0
|
127 string
|
Chris@0
|
128 FChTransformF0gram::getIdentifier() const {
|
Chris@15
|
129 switch (m_processingMode) {
|
Chris@15
|
130 case ModeF0Gram: return "fchtransformf0gram";
|
Chris@15
|
131 case ModeSpectrogram: return "fchtransformspectrogram";
|
Chris@15
|
132 case ModeRoughSpectrogram: return "fchtransformrough";
|
Chris@15
|
133 }
|
Chris@17
|
134 throw std::logic_error("unknown mode");
|
Chris@0
|
135 }
|
Chris@0
|
136
|
Chris@0
|
137 string
|
Chris@0
|
138 FChTransformF0gram::getName() const {
|
Chris@15
|
139 switch (m_processingMode) {
|
Chris@15
|
140 case ModeF0Gram: return "Fan Chirp Transform F0gram";
|
Chris@15
|
141 case ModeSpectrogram: return "Fan Chirp Transform Spectrogram";
|
Chris@15
|
142 case ModeRoughSpectrogram: return "Fan Chirp Transform Rough Spectrogram";
|
Chris@15
|
143 }
|
Chris@17
|
144 throw std::logic_error("unknown mode");
|
Chris@0
|
145 }
|
Chris@0
|
146
|
Chris@0
|
147 string
|
Chris@0
|
148 FChTransformF0gram::getDescription() const {
|
Chris@15
|
149 switch (m_processingMode) {
|
Chris@15
|
150 case ModeF0Gram:
|
Chris@15
|
151 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010.";
|
Chris@15
|
152 case ModeSpectrogram:
|
Chris@15
|
153 return "This plug-in produces a spectral representation of the audio using Fan Chirp Transform analysis.";
|
Chris@15
|
154 case ModeRoughSpectrogram:
|
Chris@15
|
155 return "This plug-in produces a more approximate spectral representation of the audio using Fan Chirp Transform analysis.";
|
Chris@15
|
156 }
|
Chris@17
|
157 throw std::logic_error("unknown mode");
|
Chris@0
|
158 }
|
Chris@0
|
159
|
Chris@0
|
160 string
|
Chris@0
|
161 FChTransformF0gram::getMaker() const {
|
Chris@0
|
162 // Your name here
|
Chris@0
|
163 return "Audio Processing Group \n Universidad de la Republica";
|
Chris@0
|
164 }
|
Chris@0
|
165
|
Chris@0
|
166 int
|
Chris@0
|
167 FChTransformF0gram::getPluginVersion() const {
|
Chris@0
|
168 // Increment this each time you release a version that behaves
|
Chris@0
|
169 // differently from the previous one
|
Chris@0
|
170 //
|
Chris@0
|
171 // 0 - initial version from scratch
|
Chris@15
|
172 return 1;
|
Chris@0
|
173 }
|
Chris@0
|
174
|
Chris@0
|
175 string
|
Chris@0
|
176 FChTransformF0gram::getCopyright() const {
|
Chris@0
|
177 // This function is not ideally named. It does not necessarily
|
Chris@0
|
178 // need to say who made the plugin -- getMaker does that -- but it
|
Chris@0
|
179 // should indicate the terms under which it is distributed. For
|
Chris@0
|
180 // example, "Copyright (year). All Rights Reserved", or "GPL"
|
Chris@0
|
181 return "copyright (C) 2011 GPL - Audio Processing Group, UdelaR";
|
Chris@0
|
182 }
|
Chris@0
|
183
|
Chris@0
|
184 FChTransformF0gram::InputDomain
|
Chris@0
|
185 FChTransformF0gram::getInputDomain() const {
|
Chris@0
|
186 return TimeDomain;
|
Chris@0
|
187 }
|
Chris@0
|
188
|
Chris@0
|
189 size_t FChTransformF0gram::getPreferredBlockSize() const {
|
Chris@20
|
190 // We do our own accumulating into blocks within process()
|
Chris@20
|
191 return m_blockSize/2;
|
Chris@0
|
192 }
|
Chris@0
|
193
|
Chris@0
|
194 size_t
|
Chris@0
|
195 FChTransformF0gram::getPreferredStepSize() const {
|
Chris@20
|
196 return m_stepSize;
|
Chris@0
|
197 }
|
Chris@0
|
198
|
Chris@0
|
199 size_t
|
Chris@0
|
200 FChTransformF0gram::getMinChannelCount() const {
|
Chris@0
|
201 return 1;
|
Chris@0
|
202 }
|
Chris@0
|
203
|
Chris@0
|
204 size_t
|
Chris@0
|
205 FChTransformF0gram::getMaxChannelCount() const {
|
Chris@0
|
206 return 1;
|
Chris@0
|
207 }
|
Chris@0
|
208
|
Chris@0
|
209 FChTransformF0gram::ParameterList
|
Chris@0
|
210 FChTransformF0gram::getParameterDescriptors() const {
|
Chris@0
|
211 ParameterList list;
|
Chris@0
|
212
|
Chris@0
|
213 // If the plugin has no adjustable parameters, return an empty
|
Chris@0
|
214 // list here (and there's no need to provide implementations of
|
Chris@0
|
215 // getParameter and setParameter in that case either).
|
Chris@0
|
216
|
Chris@0
|
217 // Note that it is your responsibility to make sure the parameters
|
Chris@0
|
218 // start off having their default values (e.g. in the constructor
|
Chris@0
|
219 // above). The host needs to know the default value so it can do
|
Chris@0
|
220 // things like provide a "reset to default" function, but it will
|
Chris@0
|
221 // not explicitly set your parameters to their defaults for you if
|
Chris@0
|
222 // they have not changed in the mean time.
|
Chris@0
|
223
|
Chris@0
|
224 // ============= WARPING PARAMETERS =============
|
Chris@0
|
225
|
Chris@0
|
226 ParameterDescriptor fmax;
|
Chris@0
|
227 fmax.identifier = "fmax";
|
Chris@0
|
228 fmax.name = "Maximum frequency";
|
Chris@0
|
229 fmax.description = "Maximum frequency of interest for the analysis.";
|
Chris@0
|
230 fmax.unit = "Hz";
|
Chris@0
|
231 fmax.minValue = 2000;
|
Chris@0
|
232 fmax.maxValue = 22050;
|
Chris@0
|
233 fmax.defaultValue = 10000;
|
Chris@0
|
234 fmax.isQuantized = true;
|
Chris@0
|
235 fmax.quantizeStep = 1.0;
|
Chris@0
|
236 list.push_back(fmax);
|
Chris@0
|
237
|
Chris@0
|
238 ParameterDescriptor nsamp;
|
Chris@24
|
239 nsamp.identifier = "nsamp_ix";
|
Chris@0
|
240 nsamp.name = "Number of samples";
|
Chris@0
|
241 nsamp.description = "Number of samples of the time warped frame";
|
Chris@24
|
242 nsamp.minValue = 0;
|
Chris@24
|
243 nsamp.maxValue = nsamp_options.size()-1;
|
Chris@24
|
244 nsamp.defaultValue = 3;
|
Chris@24
|
245 nsamp.isQuantized = true;
|
Chris@24
|
246 nsamp.quantizeStep = 1.0;
|
Chris@24
|
247 char label[100];
|
Chris@24
|
248 for (int i = 0; i < int(nsamp_options.size()); ++i) {
|
Chris@24
|
249 sprintf(label, "%d", nsamp_options[i]);
|
Chris@24
|
250 nsamp.valueNames.push_back(label);
|
Chris@24
|
251 }
|
Chris@0
|
252 nsamp.isQuantized = true;
|
Chris@0
|
253 nsamp.quantizeStep = 1.0;
|
Chris@0
|
254 list.push_back(nsamp);
|
Chris@0
|
255
|
Chris@0
|
256 ParameterDescriptor alpha_max;
|
Chris@0
|
257 alpha_max.identifier = "alpha_max";
|
Chris@0
|
258 alpha_max.name = "Maximum alpha value";
|
Chris@0
|
259 alpha_max.description = "Maximum value for the alpha parameter of the transform.";
|
Chris@0
|
260 alpha_max.unit = "Hz/s";
|
Chris@0
|
261 alpha_max.minValue = -10;
|
Chris@0
|
262 alpha_max.maxValue = 10;
|
Chris@33
|
263 alpha_max.defaultValue = 4;
|
Chris@0
|
264 alpha_max.isQuantized = true;
|
Chris@0
|
265 alpha_max.quantizeStep = 1.0;
|
Chris@0
|
266 list.push_back(alpha_max);
|
Chris@0
|
267
|
Chris@0
|
268 ParameterDescriptor num_warps;
|
Chris@0
|
269 num_warps.identifier = "num_warps";
|
Chris@0
|
270 num_warps.name = "Number of warpings";
|
Chris@0
|
271 num_warps.description = "Number of different warpings in the specified range (must be odd).";
|
Chris@0
|
272 num_warps.unit = "";
|
Chris@0
|
273 num_warps.minValue = 1;
|
Chris@0
|
274 num_warps.maxValue = 101;
|
Chris@0
|
275 num_warps.defaultValue = 21;
|
Chris@0
|
276 num_warps.isQuantized = true;
|
Chris@0
|
277 num_warps.quantizeStep = 2.0;
|
Chris@0
|
278 list.push_back(num_warps);
|
Chris@0
|
279
|
Chris@0
|
280 ParameterDescriptor alpha_dist;
|
Chris@0
|
281 alpha_dist.identifier = "alpha_dist";
|
Chris@0
|
282 alpha_dist.name = "alpha distribution";
|
Chris@0
|
283 alpha_dist.description = "Type of distribution of alpha values (linear or log).";
|
Chris@0
|
284 alpha_dist.unit = "";
|
Chris@0
|
285 alpha_dist.minValue = 0;
|
Chris@0
|
286 alpha_dist.maxValue = 1;
|
Chris@33
|
287 alpha_dist.defaultValue = 0;
|
Chris@0
|
288 alpha_dist.isQuantized = true;
|
Chris@0
|
289 alpha_dist.quantizeStep = 1.0;
|
Chris@0
|
290 // lin (0), log (1)
|
Chris@0
|
291 alpha_dist.valueNames.push_back("lin");
|
Chris@0
|
292 alpha_dist.valueNames.push_back("log");
|
Chris@0
|
293 list.push_back(alpha_dist);
|
Chris@0
|
294
|
Chris@0
|
295 // ============= F0-GRAM PARAMETERS =============
|
Chris@0
|
296
|
Chris@0
|
297 ParameterDescriptor f0min;
|
Chris@0
|
298 f0min.identifier = "f0min";
|
Chris@0
|
299 f0min.name = "min f0";
|
Chris@0
|
300 f0min.description = "Minimum fundamental frequency (f0) value.";
|
Chris@0
|
301 f0min.unit = "Hz";
|
Chris@0
|
302 f0min.minValue = 1;
|
Chris@0
|
303 f0min.maxValue = 500;
|
Chris@0
|
304 f0min.defaultValue = 80;
|
Chris@0
|
305 f0min.isQuantized = true;
|
Chris@0
|
306 f0min.quantizeStep = 1.0;
|
Chris@0
|
307 list.push_back(f0min);
|
Chris@0
|
308
|
Chris@0
|
309 ParameterDescriptor num_octs;
|
Chris@0
|
310 num_octs.identifier = "num_octs";
|
Chris@0
|
311 num_octs.name = "number of octaves";
|
Chris@0
|
312 num_octs.description = "Number of octaves for F0gram computation.";
|
Chris@0
|
313 num_octs.unit = "";
|
Chris@0
|
314 num_octs.minValue = 1;
|
Chris@0
|
315 num_octs.maxValue = 10;
|
Chris@0
|
316 num_octs.defaultValue = 4;
|
Chris@0
|
317 num_octs.isQuantized = true;
|
Chris@0
|
318 num_octs.quantizeStep = 1.0;
|
Chris@0
|
319 list.push_back(num_octs);
|
Chris@0
|
320
|
Chris@0
|
321 ParameterDescriptor f0s_per_oct;
|
Chris@0
|
322 f0s_per_oct.identifier = "f0s_per_oct";
|
Chris@0
|
323 f0s_per_oct.name = "f0 values per octave";
|
Chris@0
|
324 f0s_per_oct.description = "Number of f0 values per octave.";
|
Chris@0
|
325 f0s_per_oct.unit = "";
|
Chris@0
|
326 f0s_per_oct.minValue = 12;
|
Chris@0
|
327 f0s_per_oct.maxValue = 768;
|
Chris@0
|
328 f0s_per_oct.defaultValue = 192;
|
Chris@0
|
329 f0s_per_oct.isQuantized = true;
|
Chris@0
|
330 f0s_per_oct.quantizeStep = 1.0;
|
Chris@0
|
331 list.push_back(f0s_per_oct);
|
Chris@0
|
332
|
Chris@0
|
333 ParameterDescriptor f0_prefer_fun;
|
Chris@0
|
334 f0_prefer_fun.identifier = "f0_prefer_fun";
|
Chris@22
|
335 f0_prefer_fun.name = "Use f0 weighting";
|
Chris@22
|
336 f0_prefer_fun.description = "Whether to use a f0 weighting function to prefer frequencies nearer a mean value.";
|
Chris@0
|
337 f0_prefer_fun.unit = "";
|
Chris@0
|
338 f0_prefer_fun.minValue = 0;
|
Chris@0
|
339 f0_prefer_fun.maxValue = 1;
|
Chris@0
|
340 f0_prefer_fun.defaultValue = 1;
|
Chris@0
|
341 f0_prefer_fun.isQuantized = true;
|
Chris@0
|
342 f0_prefer_fun.quantizeStep = 1.0;
|
Chris@0
|
343 list.push_back(f0_prefer_fun);
|
Chris@0
|
344
|
Chris@0
|
345 ParameterDescriptor f0_prefer_mean;
|
Chris@0
|
346 f0_prefer_mean.identifier = "f0_prefer_mean";
|
Chris@22
|
347 f0_prefer_mean.name = "Mean pitch for f0 weighting";
|
Chris@0
|
348 f0_prefer_mean.description = "Mean value for f0 weighting function (MIDI number).";
|
Chris@0
|
349 f0_prefer_mean.unit = "";
|
Chris@0
|
350 f0_prefer_mean.minValue = 1;
|
Chris@0
|
351 f0_prefer_mean.maxValue = 127;
|
Chris@0
|
352 f0_prefer_mean.defaultValue = 60;
|
Chris@0
|
353 f0_prefer_mean.isQuantized = true;
|
Chris@0
|
354 f0_prefer_mean.quantizeStep = 1.0;
|
Chris@0
|
355 list.push_back(f0_prefer_mean);
|
Chris@0
|
356
|
Chris@0
|
357 ParameterDescriptor f0_prefer_stdev;
|
Chris@0
|
358 f0_prefer_stdev.identifier = "f0_prefer_stdev";
|
Chris@22
|
359 f0_prefer_stdev.name = "Stdev for f0 weighting";
|
Chris@22
|
360 f0_prefer_stdev.description = "Standard deviation for f0 weighting function (MIDI number).";
|
Chris@0
|
361 f0_prefer_stdev.unit = "";
|
Chris@0
|
362 f0_prefer_stdev.minValue = 1;
|
Chris@0
|
363 f0_prefer_stdev.maxValue = 127;
|
Chris@0
|
364 f0_prefer_stdev.defaultValue = 18;
|
Chris@0
|
365 f0_prefer_stdev.isQuantized = true;
|
Chris@0
|
366 f0_prefer_stdev.quantizeStep = 1.0;
|
Chris@0
|
367 list.push_back(f0_prefer_stdev);
|
Chris@0
|
368
|
Chris@0
|
369 ParameterDescriptor f0gram_mode;
|
Chris@0
|
370 f0gram_mode.identifier = "f0gram_mode";
|
Chris@0
|
371 f0gram_mode.name = "display mode of f0gram";
|
Chris@0
|
372 f0gram_mode.description = "Display all bins of the best direction, or the best bin for each direction.";
|
Chris@0
|
373 f0gram_mode.unit = "";
|
Chris@0
|
374 f0gram_mode.minValue = 0;
|
Chris@0
|
375 f0gram_mode.maxValue = 1;
|
Chris@0
|
376 f0gram_mode.defaultValue = 1;
|
Chris@0
|
377 f0gram_mode.isQuantized = true;
|
Chris@0
|
378 f0gram_mode.quantizeStep = 1.0;
|
Chris@0
|
379 list.push_back(f0gram_mode);
|
Chris@0
|
380
|
Chris@0
|
381 return list;
|
Chris@0
|
382 }
|
Chris@0
|
383
|
Chris@0
|
384 float
|
Chris@0
|
385 FChTransformF0gram::getParameter(string identifier) const {
|
Chris@0
|
386
|
Chris@0
|
387 if (identifier == "fmax") {
|
Chris@0
|
388 return m_fmax;
|
Chris@24
|
389 } else if (identifier == "nsamp_ix") {
|
Chris@24
|
390 for (int i = 0; i < int(nsamp_options.size()); ++i) {
|
Chris@24
|
391 if (m_warp_params.nsamps_twarp == nsamp_options[i]) {
|
Chris@24
|
392 return i;
|
Chris@24
|
393 }
|
Chris@24
|
394 }
|
Chris@24
|
395 throw std::logic_error("internal error: nsamps_twarp not in nsamp_options");
|
Chris@0
|
396 } else if (identifier == "alpha_max") {
|
Chris@0
|
397 return m_warp_params.alpha_max;
|
Chris@0
|
398 } else if (identifier == "num_warps") {
|
Chris@0
|
399 return m_warp_params.num_warps;
|
Chris@0
|
400 } else if (identifier == "alpha_dist") {
|
Chris@0
|
401 return m_warp_params.alpha_dist;
|
Chris@0
|
402 } else if (identifier == "f0min") {
|
Chris@0
|
403 return m_f0_params.f0min;
|
Chris@0
|
404 } else if (identifier == "num_octs") {
|
Chris@0
|
405 return m_f0_params.num_octs;
|
Chris@0
|
406 } else if (identifier == "f0s_per_oct") {
|
Chris@0
|
407 return m_f0_params.num_f0s_per_oct;
|
Chris@0
|
408 } else if (identifier == "f0_prefer_fun") {
|
Chris@22
|
409 return m_f0_params.prefer ? 1.0 : 0.0;
|
Chris@0
|
410 } else if (identifier == "f0_prefer_mean") {
|
Chris@0
|
411 return m_f0_params.prefer_mean;
|
Chris@0
|
412 } else if (identifier == "f0_prefer_stdev") {
|
Chris@0
|
413 return m_f0_params.prefer_stdev;
|
Chris@7
|
414 } else if (identifier == "f0gram_mode") {
|
Chris@15
|
415 return m_f0gram_mode == BestBinOfAllDirections ? 1.0 : 0.0;
|
Chris@0
|
416 } else {
|
Chris@0
|
417 return 0.f;
|
Chris@0
|
418 }
|
Chris@0
|
419
|
Chris@0
|
420 }
|
Chris@0
|
421
|
Chris@15
|
422 void FChTransformF0gram::setParameter(string identifier, float value)
|
Chris@15
|
423 {
|
Chris@0
|
424 if (identifier == "fmax") {
|
Chris@0
|
425 m_fmax = value;
|
Chris@24
|
426 } else if (identifier == "nsamp_ix") {
|
Chris@24
|
427 int n = int(roundf(value));
|
Chris@24
|
428 for (int i = 0; i < int(nsamp_options.size()); ++i) {
|
Chris@24
|
429 if (i == n) {
|
Chris@24
|
430 m_warp_params.nsamps_twarp = nsamp_options[i];
|
Chris@24
|
431 m_blockSize = m_warp_params.nsamps_twarp * 4;
|
Chris@24
|
432 }
|
Chris@24
|
433 }
|
Chris@0
|
434 } else if (identifier == "alpha_max") {
|
Chris@0
|
435 m_warp_params.alpha_max = value;
|
Chris@0
|
436 } else if (identifier == "num_warps") {
|
Chris@0
|
437 m_warp_params.num_warps = value;
|
Chris@0
|
438 } else if (identifier == "alpha_dist") {
|
Chris@0
|
439 m_warp_params.alpha_dist = value;
|
Chris@0
|
440 } else if (identifier == "f0min") {
|
Chris@0
|
441 m_f0_params.f0min = value;
|
Chris@0
|
442 } else if (identifier == "num_octs") {
|
Chris@0
|
443 m_f0_params.num_octs = value;
|
Chris@0
|
444 } else if (identifier == "f0s_per_oct") {
|
Chris@0
|
445 m_f0_params.num_f0s_per_oct = value;
|
Chris@0
|
446 } else if (identifier == "f0_prefer_fun") {
|
Chris@22
|
447 m_f0_params.prefer = (value > 0.5);
|
Chris@0
|
448 } else if (identifier == "f0_prefer_mean") {
|
Chris@0
|
449 m_f0_params.prefer_mean = value;
|
Chris@0
|
450 } else if (identifier == "f0_prefer_stdev") {
|
Chris@0
|
451 m_f0_params.prefer_stdev = value;
|
Chris@0
|
452 } else if (identifier == "f0gram_mode") {
|
Chris@15
|
453 m_f0gram_mode = (value > 0.5 ?
|
Chris@15
|
454 BestBinOfAllDirections :
|
Chris@15
|
455 AllBinsOfBestDirection);
|
Chris@15
|
456 } else {
|
Chris@15
|
457 cerr << "WARNING: Unknown parameter id \""
|
Chris@15
|
458 << identifier << "\"" << endl;
|
Chris@0
|
459 }
|
Chris@0
|
460 }
|
Chris@0
|
461
|
Chris@0
|
462 FChTransformF0gram::ProgramList
|
Chris@0
|
463 FChTransformF0gram::getPrograms() const {
|
Chris@0
|
464 ProgramList list;
|
Chris@0
|
465 return list;
|
Chris@0
|
466 }
|
Chris@0
|
467
|
Chris@0
|
468 FChTransformF0gram::OutputList
|
Chris@0
|
469 FChTransformF0gram::getOutputDescriptors() const {
|
Chris@0
|
470
|
Chris@0
|
471 OutputList list;
|
Chris@0
|
472
|
Chris@16
|
473 vector<string> labels;
|
Chris@16
|
474 char label[100];
|
Chris@0
|
475
|
Chris@16
|
476 if (m_processingMode == ModeF0Gram) {
|
Chris@16
|
477
|
Chris@16
|
478 /* f0 values of F0gram grid as string values */
|
Chris@16
|
479 for (int i = 0; i < m_num_f0s; ++i) {
|
Chris@16
|
480 sprintf(label, "%4.2f Hz", m_f0s[i]);
|
Chris@16
|
481 labels.push_back(label);
|
Chris@16
|
482 }
|
Chris@16
|
483
|
Chris@16
|
484 /* The F0gram */
|
Chris@16
|
485 OutputDescriptor d;
|
Chris@16
|
486 d.identifier = "f0gram";
|
Chris@19
|
487 d.name = "F0gram";
|
Chris@19
|
488 d.description = "The salience of the different f0s in the signal.";
|
Chris@16
|
489 d.hasFixedBinCount = true;
|
Chris@16
|
490 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
|
Chris@16
|
491 d.binNames = labels;
|
Chris@16
|
492 d.hasKnownExtents = false;
|
Chris@16
|
493 d.isQuantized = false;
|
Chris@16
|
494 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@16
|
495 d.hasDuration = false;
|
Chris@16
|
496 list.push_back(d);
|
Chris@16
|
497
|
Chris@19
|
498 d.identifier = "pitch";
|
Chris@19
|
499 d.name = "Most salient pitch";
|
Chris@19
|
500 d.description = "The most salient f0 in the signal for each time step.";
|
Chris@19
|
501 d.unit = "Hz";
|
Chris@19
|
502 d.hasFixedBinCount = true;
|
Chris@19
|
503 d.binCount = 1;
|
Chris@19
|
504 d.binNames.clear();
|
Chris@19
|
505 d.hasKnownExtents = false;
|
Chris@19
|
506 d.isQuantized = false;
|
Chris@19
|
507 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@19
|
508 d.hasDuration = false;
|
Chris@19
|
509 list.push_back(d);
|
Chris@19
|
510
|
Chris@16
|
511 } else {
|
Chris@16
|
512
|
Chris@16
|
513 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; ++i) {
|
Chris@24
|
514 double freq = i * (m_warpings.fs_warp / m_warp_params.nsamps_twarp);
|
Chris@16
|
515 sprintf(label, "%4.2f Hz", freq);
|
Chris@16
|
516 labels.push_back(label);
|
Chris@16
|
517 }
|
Chris@16
|
518
|
Chris@16
|
519 OutputDescriptor d;
|
Chris@16
|
520 d.identifier = "spectrogram";
|
Chris@16
|
521 d.name = "Spectrogram";
|
Chris@16
|
522 d.description = "Time/frequency spectrogram derived from the Fan Chirp Transform output";
|
Chris@16
|
523 d.hasFixedBinCount = true;
|
Chris@16
|
524 d.binCount = m_warp_params.nsamps_twarp/2+1;
|
Chris@16
|
525 d.binNames = labels;
|
Chris@16
|
526 d.hasKnownExtents = false;
|
Chris@16
|
527 d.isQuantized = false;
|
Chris@16
|
528 d.sampleType = OutputDescriptor::OneSamplePerStep;
|
Chris@16
|
529 d.hasDuration = false;
|
Chris@16
|
530 list.push_back(d);
|
Chris@0
|
531 }
|
Chris@16
|
532
|
Chris@0
|
533 return list;
|
Chris@0
|
534 }
|
Chris@0
|
535
|
Chris@0
|
536 bool
|
Chris@0
|
537 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) {
|
Chris@0
|
538 if (channels < getMinChannelCount() ||
|
Chris@20
|
539 channels > getMaxChannelCount() ||
|
Chris@27
|
540 int(blockSize) != m_blockSize/2 ||
|
Chris@27
|
541 int(stepSize) != m_stepSize) {
|
Chris@14
|
542 return false;
|
Chris@14
|
543 }
|
Chris@0
|
544
|
Chris@20
|
545 m_inputBuffer = allocate_and_zero<float>(m_blockSize);
|
Chris@20
|
546
|
Chris@0
|
547 // WARNING !!!
|
Chris@0
|
548 // these values in fact are determined by the sampling frequency m_fs
|
Chris@0
|
549 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz
|
Chris@0
|
550 //m_blockSize = 4 * m_warp_params.nsamps_twarp;
|
Chris@16
|
551 // m_stepSize = floor(m_hop / m_warp_params.fact_over_samp);
|
Chris@16
|
552
|
Chris@16
|
553 /* design of FChT */
|
Chris@16
|
554 design_FChT();
|
Chris@0
|
555
|
Chris@0
|
556 /* initialise m_glogs_params */
|
Chris@7
|
557 design_GLogS();
|
Chris@0
|
558
|
Chris@7
|
559 design_LPF();
|
Chris@0
|
560
|
Chris@7
|
561 design_time_window();
|
Chris@0
|
562
|
Chris@7
|
563 // Create Hanning window for warped signals
|
Chris@14
|
564 mp_HanningWindow = allocate<double>(m_warp_params.nsamps_twarp);
|
Chris@7
|
565 bool normalize = false;
|
Chris@14
|
566 Utils::hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize);
|
Chris@0
|
567
|
Chris@16
|
568 m_num_f0s = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
|
Chris@16
|
569 m_f0s = allocate<double>(m_num_f0s);
|
Chris@16
|
570 for (int i = 0; i < m_num_f0s; ++i) {
|
Chris@16
|
571 m_f0s[i] = m_glogs_f0[m_glogs_init_f0s + i];
|
Chris@16
|
572 }
|
Chris@20
|
573
|
Chris@20
|
574 m_initialised = true;
|
Chris@0
|
575 return true;
|
Chris@0
|
576 }
|
Chris@0
|
577
|
Chris@0
|
578 void
|
Chris@0
|
579 FChTransformF0gram::design_GLogS() {
|
Chris@0
|
580
|
Chris@7
|
581 // total number & initial quantity of f0s
|
Chris@16
|
582
|
Chris@10
|
583 m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1;
|
Chris@7
|
584 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s;
|
Chris@0
|
585
|
Chris@7
|
586 // Initialize arrays
|
Chris@14
|
587 m_glogs_f0 = allocate<double>(m_glogs_num_f0s);
|
Chris@14
|
588 m_glogs = allocate<double>(m_glogs_num_f0s*m_warp_params.num_warps);
|
Chris@14
|
589 m_glogs_n = allocate<int>(m_glogs_num_f0s);
|
Chris@14
|
590 m_glogs_index = allocate<int>(m_glogs_num_f0s);
|
Chris@0
|
591
|
Chris@7
|
592 // Compute f0 values
|
Chris@7
|
593 m_glogs_harmonic_count = 0;
|
Chris@7
|
594 double factor = (double)(m_warp_params.nsamps_twarp/2)/(double)(m_warp_params.nsamps_twarp/2+1);
|
Chris@10
|
595 for (int i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@7
|
596 m_glogs_f0[i] = (m_f0_params.f0min/5.0)*pow(2.0,(double)i/(double)m_f0_params.num_f0s_per_oct);
|
Chris@7
|
597 // for every f0 compute number of partials less or equal than m_fmax.
|
Chris@7
|
598 m_glogs_n[i] = m_fmax*factor/m_glogs_f0[i];
|
Chris@7
|
599 m_glogs_index[i] = m_glogs_harmonic_count;
|
Chris@7
|
600 m_glogs_harmonic_count += m_glogs_n[i];
|
Chris@7
|
601 }
|
Chris@0
|
602
|
Chris@7
|
603 // Initialize arrays for interpolation
|
Chris@14
|
604 m_glogs_posint = allocate<int>(m_glogs_harmonic_count);
|
Chris@14
|
605 m_glogs_posfrac = allocate<double>(m_glogs_harmonic_count);
|
Chris@14
|
606 m_glogs_interp = allocate<double>(m_glogs_harmonic_count);
|
Chris@0
|
607
|
Chris@7
|
608 // Compute int & frac of interpolation positions
|
Chris@10
|
609 int aux_index = 0;
|
Chris@7
|
610 double aux_pos;
|
Chris@10
|
611 for (int i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@10
|
612 for (int j = 1; j <= m_glogs_n[i]; j++) {
|
Chris@18
|
613 aux_pos = ((double)j * m_glogs_f0[i]) * ((double)(m_warp_params.nsamps_twarp))/m_warpings.fs_warp;
|
Chris@10
|
614 m_glogs_posint[aux_index] = (int)aux_pos;
|
Chris@7
|
615 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index];
|
Chris@7
|
616 aux_index++;
|
Chris@7
|
617 }
|
Chris@7
|
618 }
|
Chris@0
|
619
|
Chris@7
|
620 // Third harmonic attenuation
|
Chris@7
|
621 double aux_third_harmonic;
|
Chris@14
|
622 m_glogs_third_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
623 m_glogs_third_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@10
|
624 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
625 aux_third_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(3.0);
|
Chris@10
|
626 m_glogs_third_harmonic_posint[i] = (int)aux_third_harmonic;
|
Chris@7
|
627 m_glogs_third_harmonic_posfrac[i] = aux_third_harmonic - (double)(m_glogs_third_harmonic_posint[i]);
|
Chris@7
|
628 }
|
Chris@14
|
629 m_glogs_third_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@0
|
630
|
Chris@7
|
631 // Fifth harmonic attenuation
|
Chris@7
|
632 double aux_fifth_harmonic;
|
Chris@14
|
633 m_glogs_fifth_harmonic_posint = allocate<int>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
634 m_glogs_fifth_harmonic_posfrac = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@10
|
635 for (int i = 0; i < (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
636 aux_fifth_harmonic = (double)i + (double)m_glogs_init_f0s - ((double)m_f0_params.num_f0s_per_oct)*log2(5.0);
|
Chris@10
|
637 m_glogs_fifth_harmonic_posint[i] = (int)aux_fifth_harmonic;
|
Chris@7
|
638 m_glogs_fifth_harmonic_posfrac[i] = aux_fifth_harmonic - (double)(m_glogs_fifth_harmonic_posint[i]);
|
Chris@7
|
639 }
|
Chris@14
|
640 m_glogs_fifth_harmonic = allocate<double>((m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@0
|
641
|
Chris@7
|
642 // Normalization & attenuation windows
|
Chris@14
|
643 m_glogs_f0_preference_weights = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
644 m_glogs_median_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
645 m_glogs_sigma_correction = allocate<double>(m_f0_params.num_octs*m_f0_params.num_f0s_per_oct);
|
Chris@7
|
646 double MIDI_value;
|
Chris@10
|
647 for (int i = 0; i < m_f0_params.num_octs*m_f0_params.num_f0s_per_oct; i++) {
|
Chris@22
|
648 if (m_f0_params.prefer) {
|
Chris@22
|
649 MIDI_value = 69.0 + 12.0 * log2(m_glogs_f0[i + m_glogs_init_f0s]/440.0);
|
Chris@22
|
650 m_glogs_f0_preference_weights[i] = 1.0/sqrt(2.0*M_PI*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev)*exp(-(MIDI_value-m_f0_params.prefer_mean)*(MIDI_value-m_f0_params.prefer_mean)/(2.0*m_f0_params.prefer_stdev*m_f0_params.prefer_stdev));
|
Chris@22
|
651 m_glogs_f0_preference_weights[i] = (0.01 + m_glogs_f0_preference_weights[i]) / (1.01);
|
Chris@22
|
652 } else {
|
Chris@22
|
653 m_glogs_f0_preference_weights[i] = 1.0;
|
Chris@22
|
654 }
|
Chris@0
|
655
|
Chris@7
|
656 m_glogs_median_correction[i] = m_glogs_params.median_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.median_poly_coefs[1]*(i+1.0) + m_glogs_params.median_poly_coefs[2];
|
Chris@7
|
657 m_glogs_sigma_correction[i] = 1.0 / (m_glogs_params.sigma_poly_coefs[0]*(i+1.0)*(i+1.0) + m_glogs_params.sigma_poly_coefs[1]*(i+1.0) + m_glogs_params.sigma_poly_coefs[2]);
|
Chris@7
|
658 }
|
Chris@0
|
659 }
|
Chris@0
|
660
|
Chris@0
|
661 void
|
Chris@0
|
662 FChTransformF0gram::design_FChT() {
|
Chris@0
|
663
|
Chris@0
|
664 /* ============= WARPING DESIGN ============= */
|
Chris@0
|
665
|
Chris@0
|
666 // sampling frequency after oversampling
|
Chris@0
|
667 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs;
|
Chris@0
|
668
|
Chris@0
|
669 // number of samples of the original signal frame
|
Chris@0
|
670 m_warpings.nsamps_torig = 4 * m_warp_params.fact_over_samp * m_warp_params.nsamps_twarp;
|
Chris@0
|
671 // equivalent to: m_warpings.nsamps_torig = m_warp_params.fact_over_samp * m_blockSize;
|
Chris@0
|
672
|
Chris@0
|
673 // time instants of the original signal frame
|
Chris@14
|
674 double *t_orig = allocate<double>(m_warpings.nsamps_torig);
|
Chris@10
|
675 for (int ind = 0; ind < m_warpings.nsamps_torig; ind++) {
|
Chris@0
|
676 t_orig[ind] = ((double)(ind + 1) - (double)m_warpings.nsamps_torig / 2.0) / m_warpings.fs_orig;
|
Chris@0
|
677 }
|
Chris@0
|
678
|
Chris@0
|
679 // linear chirps warping definition as relative frequency deviation
|
Chris@7
|
680 //TODO
|
Chris@14
|
681 double *freq_relative = allocate<double>(m_warpings.nsamps_torig * m_warp_params.num_warps);
|
Chris@0
|
682 define_warps_linear_chirps(freq_relative, t_orig);
|
Chris@0
|
683
|
Chris@0
|
684 // maximum relative frequency deviation
|
Chris@0
|
685 double freq_relative_max = 0;
|
Chris@14
|
686 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
|
Chris@14
|
687 for (int j = 0; j < m_warp_params.num_warps; j++) {
|
Chris@14
|
688 if (freq_relative_max < freq_relative[j * m_warpings.nsamps_torig + i]) {
|
Chris@0
|
689 freq_relative_max = freq_relative[j * m_warpings.nsamps_torig + i];
|
Chris@14
|
690 }
|
Chris@14
|
691 }
|
Chris@14
|
692 }
|
Chris@0
|
693
|
Chris@0
|
694 // sampling frequency of warped signal to be free of aliasing up to fmax
|
Chris@0
|
695 m_warpings.fs_warp = 2 * m_fmax * freq_relative_max;
|
Chris@0
|
696
|
Chris@0
|
697 // time instants of the warped signal frame
|
Chris@14
|
698 double *t_warp = allocate<double>(m_warp_params.nsamps_twarp);
|
Chris@10
|
699 for (int ind = 0; ind < m_warp_params.nsamps_twarp; ind++) {
|
Chris@0
|
700 t_warp[ind] = ((double)((int)(ind + 1)- (int)m_warp_params.nsamps_twarp / 2)) / (double)m_warpings.fs_warp;
|
Chris@0
|
701 }
|
Chris@0
|
702
|
Chris@0
|
703 // design of warpings for efficient interpolation
|
Chris@0
|
704 design_warps(freq_relative, t_orig, t_warp);
|
Chris@0
|
705
|
Chris@14
|
706 deallocate(freq_relative);
|
Chris@14
|
707 deallocate(t_orig);
|
Chris@14
|
708 deallocate(t_warp);
|
Chris@14
|
709
|
Chris@14
|
710 x_warping = allocate<double>(m_warp_params.nsamps_twarp);
|
Chris@14
|
711 m_absFanChirpTransform = allocate<double>(m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1));
|
Chris@14
|
712 m_auxFanChirpTransform = allocate<double>(2 * (m_warp_params.nsamps_twarp/2 + 1));
|
Chris@14
|
713 fft_xwarping = new FFTReal(m_warp_params.nsamps_twarp);
|
Chris@0
|
714 }
|
Chris@0
|
715
|
Chris@0
|
716 void
|
Chris@0
|
717 FChTransformF0gram::design_warps(double * freq_relative, double * t_orig, double * t_warp) {
|
Chris@0
|
718 /* the warping is done by interpolating the original signal in time instants
|
Chris@0
|
719 given by the desired frequency deviation, to do this, the interpolation
|
Chris@0
|
720 instants are stored in a structure as an integer index and a fractional value
|
Chris@0
|
721 hypothesis: sampling frequency at the central point equals the original
|
Chris@7
|
722 */
|
Chris@0
|
723
|
Chris@14
|
724 m_warpings.pos_int = allocate<int>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
|
Chris@14
|
725 m_warpings.pos_frac = allocate<double>(m_warp_params.num_warps * m_warp_params.nsamps_twarp);
|
Chris@0
|
726
|
Chris@7
|
727 // vector of phase values
|
Chris@14
|
728 double *phi = allocate<double>(m_warpings.nsamps_torig);
|
Chris@7
|
729 double aux;
|
Chris@0
|
730
|
Chris@7
|
731 // warped positions
|
Chris@14
|
732 double *pos1 = allocate<double>(m_warp_params.nsamps_twarp*m_warp_params.num_warps);
|
Chris@0
|
733
|
Chris@10
|
734 for (int i = 0; i < m_warp_params.num_warps; i++) {
|
Chris@0
|
735
|
Chris@7
|
736 // integration of relative frequency to obtain phase values
|
Chris@14
|
737 Utils::cumtrapz(t_orig, freq_relative + i*(m_warpings.nsamps_torig), m_warpings.nsamps_torig, phi);
|
Chris@0
|
738
|
Chris@7
|
739 // centering of phase values to force original frequency in the middle
|
Chris@7
|
740 aux = phi[m_warpings.nsamps_torig/2];
|
Chris@10
|
741 for (int j = 0; j < m_warpings.nsamps_torig; j++) {
|
Chris@7
|
742 phi[j] -= aux;
|
Chris@7
|
743 } //for
|
Chris@0
|
744
|
Chris@7
|
745 // interpolation of phase values to obtain warped positions
|
Chris@14
|
746 Utils::interp1(phi, t_orig, m_warpings.nsamps_torig, t_warp, pos1 + i*m_warp_params.nsamps_twarp, m_warp_params.nsamps_twarp);
|
Chris@0
|
747 }
|
Chris@0
|
748
|
Chris@0
|
749 // % previous sample index
|
Chris@0
|
750 // pos1_int = uint32(floor(pos1))';
|
Chris@0
|
751 // % integer corresponding to previous sample index in "c"
|
Chris@0
|
752 // warps.pos1_int = (pos1_int - uint32(1));
|
Chris@0
|
753 // % fractional value that defines the warped position
|
Chris@0
|
754 // warps.pos1_frac = (double(pos1)' - double(pos1_int));
|
Chris@0
|
755
|
Chris@10
|
756 for (int j = 0; j < m_warp_params.nsamps_twarp*m_warp_params.num_warps; j++) {
|
Chris@7
|
757 // previous sample index
|
Chris@7
|
758 pos1[j] = pos1[j]*m_warpings.fs_orig + m_warpings.nsamps_torig/2 + 1;
|
Chris@10
|
759 m_warpings.pos_int[j] = (int) pos1[j];
|
Chris@7
|
760 m_warpings.pos_frac[j] = pos1[j] - (double)(m_warpings.pos_int[j]);
|
Chris@7
|
761 } //for
|
Chris@0
|
762
|
Chris@14
|
763 deallocate(phi);
|
Chris@14
|
764 deallocate(pos1);
|
Chris@0
|
765 }
|
Chris@0
|
766
|
Chris@0
|
767 void
|
Chris@0
|
768 FChTransformF0gram::define_warps_linear_chirps(double * freq_relative, double * t_orig) {
|
Chris@0
|
769 /** define warps as relative frequency deviation from original frequency
|
Chris@7
|
770 t_orig : time vector
|
Chris@7
|
771 freq_relative : relative frequency deviations
|
Chris@7
|
772 */
|
Chris@0
|
773 if (m_warp_params.alpha_dist == 0) {
|
Chris@0
|
774
|
Chris@0
|
775 // linear alpha values spacing
|
Chris@14
|
776 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
|
Chris@0
|
777 // WARNING m_warp_params.num_warps must be odd
|
Chris@0
|
778 m_warpings.chirp_rates[0] = -m_warp_params.alpha_max;
|
Chris@0
|
779 double increment = (double) m_warp_params.alpha_max / ((m_warp_params.num_warps - 1) / 2);
|
Chris@0
|
780
|
Chris@10
|
781 for (int ind = 1; ind < m_warp_params.num_warps; ind++) {
|
Chris@0
|
782 m_warpings.chirp_rates[ind] = m_warpings.chirp_rates[ind - 1] + increment;
|
Chris@0
|
783 }
|
Chris@0
|
784 // force zero value
|
Chris@0
|
785 m_warpings.chirp_rates[(int) ((m_warp_params.num_warps - 1) / 2)] = 0;
|
Chris@0
|
786
|
Chris@0
|
787 } else {
|
Chris@0
|
788 // log alpha values spacing
|
Chris@14
|
789 m_warpings.chirp_rates = allocate<double>(m_warp_params.num_warps);
|
Chris@0
|
790
|
Chris@0
|
791 // force zero value
|
Chris@0
|
792 int middle_point = (int) ((m_warp_params.num_warps - 1) / 2);
|
Chris@0
|
793 m_warpings.chirp_rates[middle_point] = 0;
|
Chris@0
|
794
|
Chris@0
|
795 double logMax = log10(m_warp_params.alpha_max + 1);
|
Chris@0
|
796 double increment = logMax / ((m_warp_params.num_warps - 1) / 2.0f);
|
Chris@0
|
797 double exponent = 0;
|
Chris@0
|
798
|
Chris@0
|
799 // fill positive values
|
Chris@0
|
800 int ind_log = middle_point;
|
Chris@10
|
801 for (int ind = 0; ind < (m_warp_params.num_warps + 1) / 2; ind++) {
|
Chris@0
|
802 m_warpings.chirp_rates[ind_log] = pow(10, exponent) - 1;
|
Chris@0
|
803 exponent += increment;
|
Chris@0
|
804 ind_log++;
|
Chris@0
|
805 }
|
Chris@0
|
806 // fill negative values
|
Chris@10
|
807 for (int ind = 0; ind < (m_warp_params.num_warps - 1) / 2; ind++) {
|
Chris@0
|
808 m_warpings.chirp_rates[ind] = -m_warpings.chirp_rates[m_warp_params.num_warps - 1 - ind];
|
Chris@0
|
809 }
|
Chris@0
|
810 }
|
Chris@0
|
811
|
Chris@0
|
812 // compute relative frequency deviation
|
Chris@14
|
813 for (int i = 0; i < m_warpings.nsamps_torig; i++) {
|
Chris@14
|
814 for (int j = 0; j < m_warp_params.num_warps; j++) {
|
Chris@0
|
815 freq_relative[j * m_warpings.nsamps_torig + i] = 1.0 + t_orig[i] * m_warpings.chirp_rates[j];
|
Chris@14
|
816 }
|
Chris@14
|
817 }
|
Chris@0
|
818 }
|
Chris@0
|
819
|
Chris@0
|
820 void
|
Chris@14
|
821 FChTransformF0gram::design_LPF()
|
Chris@14
|
822 {
|
Chris@14
|
823 double *lp_LPFWindow_aux = allocate<double>(m_blockSize/2+1);
|
Chris@14
|
824 mp_LPFWindow = allocate<double>(m_blockSize/2+1);
|
Chris@0
|
825
|
Chris@10
|
826 int i_max = (int) ((2.0*m_fmax/m_fs) * ( (double)m_blockSize / 2.0 + 1.0 ));
|
Chris@10
|
827 for (int i = 0; i < m_blockSize/2+1; i++) {
|
Chris@0
|
828 if (i >= i_max) {
|
Chris@0
|
829 lp_LPFWindow_aux[i] = 0.0;
|
Chris@0
|
830 } else {
|
Chris@0
|
831 lp_LPFWindow_aux[i] = 1.0;
|
Chris@0
|
832 }
|
Chris@0
|
833 }
|
Chris@14
|
834
|
Chris@14
|
835 LPF_time = allocate_and_zero<double>(m_warpings.nsamps_torig);
|
Chris@14
|
836 LPF_frequency = allocate_and_zero<double>(2 * (m_warpings.nsamps_torig/2 + 1));
|
Chris@14
|
837
|
Chris@14
|
838 fft_forward_LPF = new FFTReal(m_blockSize);
|
Chris@14
|
839 fft_inverse_LPF = new FFTReal(m_warpings.nsamps_torig);
|
Chris@0
|
840
|
Chris@10
|
841 int winWidth = 11;
|
Chris@14
|
842 double *lp_hanningWindow = allocate<double>(winWidth);
|
Chris@0
|
843 double accum=0;
|
Chris@10
|
844 for (int i = 0; i < winWidth; i++) {
|
Chris@0
|
845 lp_hanningWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)winWidth+1.0)));
|
Chris@0
|
846 accum+=lp_hanningWindow[i];
|
Chris@0
|
847
|
Chris@0
|
848 }
|
Chris@10
|
849 for (int i = 0; i < winWidth; i++) { //window normalization
|
Chris@0
|
850 lp_hanningWindow[i]=lp_hanningWindow[i]/accum;
|
Chris@0
|
851 }
|
Chris@10
|
852 for (int i = 0; i < m_blockSize/2+1; i++) {
|
Chris@0
|
853 //if (((i-(winWidth-1)/2)<0)||(i+(winWidth-1))/2>m_blockSize/2-1) {//consideramos winWidth impar, si la ventana sale del arreglo se rellena con el valor origianl
|
Chris@7
|
854 if ( (i > (i_max + (winWidth-1)/2)) || (i <= (i_max - (winWidth-1)/2)) ) {
|
Chris@0
|
855 mp_LPFWindow[i]=lp_LPFWindow_aux[i];
|
Chris@0
|
856 } else {
|
Chris@0
|
857 accum=0;
|
Chris@10
|
858 for (int j = -((winWidth-1)/2); j <= (winWidth-1)/2; j++) {
|
Chris@0
|
859 accum+=lp_LPFWindow_aux[i-j]*lp_hanningWindow[j+(winWidth-1)/2];
|
Chris@7
|
860 }
|
Chris@0
|
861 mp_LPFWindow[i]=accum;
|
Chris@0
|
862 }
|
Chris@0
|
863 }
|
Chris@0
|
864
|
Chris@14
|
865 deallocate(lp_LPFWindow_aux);
|
Chris@14
|
866 deallocate(lp_hanningWindow);
|
Chris@0
|
867 }
|
Chris@0
|
868
|
Chris@14
|
869 void FChTransformF0gram::apply_LPF()
|
Chris@14
|
870 {
|
Chris@14
|
871 fft_forward_LPF->forward(LPF_time, LPF_frequency);
|
Chris@14
|
872
|
Chris@10
|
873 for (int i = 0; i < m_blockSize/2+1; i++) {
|
Chris@16
|
874 LPF_frequency[i*2] *= mp_LPFWindow[i];
|
Chris@16
|
875 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i];
|
Chris@0
|
876 }
|
Chris@14
|
877
|
Chris@14
|
878 fft_inverse_LPF->inverse(LPF_frequency, LPF_time);
|
Chris@20
|
879
|
Chris@7
|
880 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame.
|
Chris@7
|
881 // nota: ademÔs de aplicar el LPF, esta función resamplea la señal original.
|
Chris@0
|
882 }
|
Chris@0
|
883
|
Chris@14
|
884 void FChTransformF0gram::clean_LPF()
|
Chris@14
|
885 {
|
Chris@14
|
886 delete fft_forward_LPF;
|
Chris@14
|
887 delete fft_inverse_LPF;
|
Chris@14
|
888 deallocate(LPF_time);
|
Chris@14
|
889 deallocate(LPF_frequency);
|
Chris@14
|
890 deallocate(mp_LPFWindow);
|
Chris@0
|
891 }
|
Chris@0
|
892
|
Chris@14
|
893 void FChTransformF0gram::reset()
|
Chris@14
|
894 {
|
Chris@0
|
895 }
|
Chris@0
|
896
|
Chris@0
|
897 FChTransformF0gram::FeatureSet
|
Chris@5
|
898 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) {
|
Chris@0
|
899
|
Chris@20
|
900 if (!m_initialised) return FeatureSet();
|
Chris@20
|
901
|
Chris@7
|
902 /* PSEUDOCĆDIGO:
|
Chris@7
|
903 - Aplicar FFT al frame entero.
|
Chris@7
|
904 - Filtro pasabajos en frecuencia.
|
Chris@7
|
905 - FFT inversa al frame entero.
|
Chris@7
|
906 -----------------------------------------------------------------------------
|
Chris@7
|
907 - Para cada warp: *Si es un espectrograma direccional (un solo warp
|
Chris@7
|
908 => no es para cada warp sino para el elegido)
|
Chris@7
|
909 - Hacer la interpolación con interp1q.
|
Chris@7
|
910 - Aplicar la FFT al frame warpeado.
|
Chris@7
|
911 - (Opcional) GLogS.
|
Chris@7
|
912 - ...
|
Chris@7
|
913 */
|
Chris@0
|
914
|
Chris@0
|
915 //---------------------------------------------------------------------------
|
Chris@7
|
916 FeatureSet fs;
|
Chris@0
|
917
|
Chris@7
|
918 #ifdef DEBUG
|
Chris@16
|
919 fprintf(stderr, "\n ----- DEBUG INFORMATION ----- \n");
|
Chris@16
|
920 fprintf(stderr, " m_fs = %f Hz.\n",m_fs);
|
Chris@16
|
921 fprintf(stderr, " fs_orig = %f Hz.\n",m_warpings.fs_orig);
|
Chris@16
|
922 fprintf(stderr, " fs_warp = %f Hz.\n",m_warpings.fs_warp);
|
Chris@16
|
923 fprintf(stderr, " m_blockSize = %d.\n",m_blockSize);
|
Chris@16
|
924 fprintf(stderr, " m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig);
|
Chris@24
|
925 fprintf(stderr, " m_warp_params.nsamps_twarp = %d.\n",m_warp_params.nsamps_twarp);
|
Chris@16
|
926 fprintf(stderr, " m_warp_params.num_warps = %d.\n",m_warp_params.num_warps);
|
Chris@16
|
927 fprintf(stderr, " m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count);
|
Chris@7
|
928 #endif
|
Chris@0
|
929
|
Chris@20
|
930 for (int i = 0; i < m_blockSize - m_stepSize; ++i) {
|
Chris@20
|
931 m_inputBuffer[i] = m_inputBuffer[i + m_stepSize];
|
Chris@0
|
932 }
|
Chris@20
|
933 for (int i = 0; i < m_blockSize/2; ++i) {
|
Chris@20
|
934 m_inputBuffer[m_blockSize/2 + i] = inputBuffers[0][i];
|
Chris@20
|
935 }
|
Chris@20
|
936 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@20
|
937 LPF_time[i] = m_inputBuffer[i] * m_timeWindow[i];
|
Chris@20
|
938 }
|
Chris@20
|
939 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@20
|
940 LPF_time[m_blockSize + i] = 0.0;
|
Chris@20
|
941 }
|
Chris@20
|
942
|
Chris@7
|
943 apply_LPF();
|
Chris@7
|
944 // SeƱal filtrada queda en LPF_time
|
Chris@0
|
945
|
Chris@7
|
946 Feature feature;
|
Chris@0
|
947 feature.hasTimestamp = false;
|
Chris@0
|
948
|
Chris@15
|
949 if (m_processingMode == ModeRoughSpectrogram) {
|
Chris@15
|
950 feature.values = vector<float>(m_warp_params.nsamps_twarp/2+1, 0.f);
|
Chris@15
|
951 }
|
Chris@15
|
952
|
Chris@0
|
953 // ----------------------------------------------------------------------------------------------
|
Chris@0
|
954 // Hanning window & FFT for all warp directions
|
Chris@0
|
955
|
Chris@7
|
956 double max_glogs = -DBL_MAX;
|
Chris@10
|
957 int ind_max_glogs = 0;
|
Chris@0
|
958
|
Chris@10
|
959 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
|
Chris@16
|
960
|
Chris@7
|
961 // Interpolate
|
Chris@14
|
962 Utils::interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp);
|
Chris@0
|
963
|
Chris@7
|
964 // Apply window
|
Chris@10
|
965 for (int i = 0; i < m_warp_params.nsamps_twarp; i++) {
|
Chris@7
|
966 x_warping[i] *= mp_HanningWindow[i];
|
Chris@7
|
967 }
|
Chris@0
|
968
|
Chris@7
|
969 // Transform
|
Chris@14
|
970 fft_xwarping->forward(x_warping, m_auxFanChirpTransform);
|
Chris@0
|
971
|
Chris@15
|
972 if (m_processingMode == ModeRoughSpectrogram) {
|
Chris@15
|
973 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
|
Chris@15
|
974 double abs = sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]);
|
Chris@15
|
975 if (abs > feature.values[i]) {
|
Chris@15
|
976 feature.values[i] = abs;
|
Chris@15
|
977 }
|
Chris@15
|
978 }
|
Chris@15
|
979 continue;
|
Chris@15
|
980 }
|
Chris@15
|
981
|
Chris@7
|
982 // Copy result
|
Chris@7
|
983 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1);
|
Chris@10
|
984 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
|
Chris@14
|
985 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]));
|
Chris@7
|
986 }
|
Chris@0
|
987
|
Chris@0
|
988 // -----------------------------------------------------------------------------------------
|
Chris@0
|
989 // GLogS
|
Chris@14
|
990 Utils::interp1q(aux_abs_fcht, m_glogs_posint, m_glogs_posfrac, m_glogs_interp, m_glogs_harmonic_count);
|
Chris@10
|
991 int glogs_ind = 0;
|
Chris@10
|
992 for (int i = 0; i < m_glogs_num_f0s; i++) {
|
Chris@7
|
993 double glogs_accum = 0;
|
Chris@10
|
994 for (int j = 1; j <= m_glogs_n[i]; j++) {
|
Chris@7
|
995 glogs_accum += m_glogs_interp[glogs_ind++];
|
Chris@7
|
996 }
|
Chris@7
|
997 m_glogs[i + i_warp*m_glogs_num_f0s] = glogs_accum/(double)m_glogs_n[i];
|
Chris@7
|
998 }
|
Chris@0
|
999
|
Chris@0
|
1000 // Sub/super harmonic correction
|
Chris@14
|
1001 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_third_harmonic_posint, m_glogs_third_harmonic_posfrac, m_glogs_third_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@14
|
1002 Utils::interp1q(m_glogs + i_warp*m_glogs_num_f0s, m_glogs_fifth_harmonic_posint, m_glogs_fifth_harmonic_posfrac, m_glogs_fifth_harmonic, (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct);
|
Chris@10
|
1003 for (int i = m_glogs_num_f0s-1; i >= m_glogs_init_f0s; i--) {
|
Chris@7
|
1004 m_glogs[i + i_warp*m_glogs_num_f0s] -= MAX(MAX(m_glogs[i-m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s],m_glogs_third_harmonic[i-m_glogs_init_f0s]),m_glogs_fifth_harmonic[i-m_glogs_init_f0s]);
|
Chris@7
|
1005 }
|
Chris@10
|
1006 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
1007 m_glogs[i + i_warp*m_glogs_num_f0s] -= 0.3*m_glogs[i+m_f0_params.num_f0s_per_oct + i_warp*m_glogs_num_f0s];
|
Chris@7
|
1008 // Median, sigma $ weights correction
|
Chris@7
|
1009 m_glogs[i + i_warp*m_glogs_num_f0s] = (m_glogs[i + i_warp*m_glogs_num_f0s]-m_glogs_median_correction[i-m_glogs_init_f0s])*m_glogs_sigma_correction[i-m_glogs_init_f0s]*m_glogs_f0_preference_weights[i-m_glogs_init_f0s];
|
Chris@7
|
1010 }
|
Chris@0
|
1011
|
Chris@7
|
1012 // Look for maximum value to determine best direction
|
Chris@10
|
1013 for (int i = m_glogs_init_f0s; i < m_glogs_num_f0s-m_f0_params.num_f0s_per_oct; i++) {
|
Chris@7
|
1014 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
|
Chris@7
|
1015 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
|
Chris@7
|
1016 ind_max_glogs = i_warp;
|
Chris@7
|
1017 }
|
Chris@7
|
1018 }
|
Chris@7
|
1019 }
|
Chris@0
|
1020
|
Chris@15
|
1021 if (m_processingMode == ModeRoughSpectrogram) {
|
Chris@15
|
1022
|
Chris@15
|
1023 // already accumulated our return values in feature
|
Chris@19
|
1024 fs[0].push_back(feature);
|
Chris@15
|
1025
|
Chris@15
|
1026 } else if (m_processingMode == ModeSpectrogram) {
|
Chris@15
|
1027
|
Chris@15
|
1028 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) {
|
Chris@15
|
1029 feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0);
|
Chris@15
|
1030 }
|
Chris@19
|
1031 fs[0].push_back(feature);
|
Chris@15
|
1032
|
Chris@15
|
1033 } else { // f0gram
|
Chris@15
|
1034
|
Chris@19
|
1035 int bestIndex = -1;
|
Chris@19
|
1036
|
Chris@15
|
1037 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) {
|
Chris@19
|
1038 double value = 0.0;
|
Chris@15
|
1039 switch (m_f0gram_mode) {
|
Chris@15
|
1040 case AllBinsOfBestDirection:
|
Chris@19
|
1041 value = m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s];
|
Chris@15
|
1042 break;
|
Chris@15
|
1043 case BestBinOfAllDirections:
|
Chris@15
|
1044 max_glogs = -DBL_MAX;
|
Chris@15
|
1045 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
|
Chris@15
|
1046 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
|
Chris@15
|
1047 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
|
Chris@15
|
1048 ind_max_glogs = i_warp;
|
Chris@15
|
1049 }
|
Chris@7
|
1050 }
|
Chris@19
|
1051 value = max_glogs;
|
Chris@15
|
1052 break;
|
Chris@7
|
1053 }
|
Chris@19
|
1054 if (bestIndex < 0 || float(value) > feature.values[bestIndex]) {
|
Chris@19
|
1055 bestIndex = int(feature.values.size());
|
Chris@19
|
1056 }
|
Chris@19
|
1057 feature.values.push_back(float(value));
|
Chris@19
|
1058 }
|
Chris@19
|
1059
|
Chris@19
|
1060 fs[0].push_back(feature);
|
Chris@19
|
1061
|
Chris@19
|
1062 if (bestIndex >= 0) {
|
Chris@19
|
1063
|
Chris@19
|
1064 double bestValue = feature.values[bestIndex];
|
Chris@19
|
1065 set<double> ordered(feature.values.begin(), feature.values.end());
|
Chris@19
|
1066 vector<double> flattened(ordered.begin(), ordered.end());
|
Chris@19
|
1067 double median = flattened[flattened.size()/2];
|
Chris@19
|
1068 if (bestValue > median * 8.0) {
|
Chris@19
|
1069 Feature pfeature;
|
Chris@19
|
1070 pfeature.hasTimestamp = false;
|
Chris@19
|
1071 pfeature.values.push_back(m_f0s[bestIndex]);
|
Chris@19
|
1072 fs[1].push_back(pfeature);
|
Chris@19
|
1073 }
|
Chris@7
|
1074 }
|
Chris@7
|
1075 }
|
Chris@0
|
1076
|
Chris@7
|
1077 return fs;
|
Chris@0
|
1078 }
|
Chris@0
|
1079
|
Chris@0
|
1080 FChTransformF0gram::FeatureSet
|
Chris@0
|
1081 FChTransformF0gram::getRemainingFeatures() {
|
Chris@0
|
1082 return FeatureSet();
|
Chris@0
|
1083 }
|
Chris@0
|
1084
|
Chris@0
|
1085 void
|
Chris@0
|
1086 FChTransformF0gram::design_time_window() {
|
Chris@0
|
1087
|
Chris@20
|
1088 int transitionWidth = (int)m_blockSize/128 + 128;
|
Chris@14
|
1089 m_timeWindow = allocate<double>(m_blockSize);
|
Chris@14
|
1090 double *lp_transitionWindow = allocate<double>(transitionWidth);
|
Chris@0
|
1091
|
Chris@10
|
1092 for (int i = 0; i < m_blockSize; i++) {
|
Chris@7
|
1093 m_timeWindow[i] = 1.0;
|
Chris@7
|
1094 }
|
Chris@0
|
1095
|
Chris@10
|
1096 for (int i = 0; i < transitionWidth; i++) {
|
Chris@0
|
1097 lp_transitionWindow[i]=0.5*(1.0-cos(2*M_PI*(double)(i+1)/((double)transitionWidth+1.0)));
|
Chris@0
|
1098 }
|
Chris@0
|
1099
|
Chris@10
|
1100 for (int i = 0; i < transitionWidth/2; i++) {
|
Chris@7
|
1101 m_timeWindow[i] = lp_transitionWindow[i];
|
Chris@7
|
1102 m_timeWindow[m_blockSize-1-i] = lp_transitionWindow[transitionWidth-1-i];
|
Chris@7
|
1103 }
|
Chris@0
|
1104
|
Chris@7
|
1105 #ifdef DEBUG
|
Chris@7
|
1106 for (int i = 0; i < m_blockSize; i++) {
|
Chris@7
|
1107 if ((i<transitionWidth)) {
|
Chris@16
|
1108 fprintf(stderr, " m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]);
|
Chris@7
|
1109 }
|
Chris@7
|
1110 }
|
Chris@7
|
1111 #endif
|
Chris@0
|
1112
|
Chris@14
|
1113 deallocate(lp_transitionWindow);
|
Chris@0
|
1114 }
|
Chris@0
|
1115
|