comparison SimpleCepstrum.cpp @ 5:05c558f1a23b

Change "relative to mean" to "relative to RMS", and add peak-to-RMS output. Add a mean-filter history. Remove essentially useless forward-power method (same as forward-magnitude with 2x factor). Refactor a bit
author Chris Cannam
date Mon, 25 Jun 2012 11:45:33 +0100
parents 3467d995ea2b
children ffed34f519db
comparison
equal deleted inserted replaced
4:3467d995ea2b 5:05c558f1a23b
16 m_channels(0), 16 m_channels(0),
17 m_stepSize(256), 17 m_stepSize(256),
18 m_blockSize(1024), 18 m_blockSize(1024),
19 m_fmin(50), 19 m_fmin(50),
20 m_fmax(1000), 20 m_fmax(1000),
21 m_histlen(3),
21 m_clamp(false), 22 m_clamp(false),
22 m_method(InverseSymmetric) 23 m_method(InverseSymmetric),
24 m_binFrom(0),
25 m_binTo(0),
26 m_bins(0),
27 m_history(0)
23 { 28 {
24 } 29 }
25 30
26 SimpleCepstrum::~SimpleCepstrum() 31 SimpleCepstrum::~SimpleCepstrum()
27 { 32 {
33 if (m_history) {
34 for (int i = 0; i < m_histlen; ++i) {
35 delete[] m_history[i];
36 }
37 delete[] m_history;
38 }
28 } 39 }
29 40
30 string 41 string
31 SimpleCepstrum::getIdentifier() const 42 SimpleCepstrum::getIdentifier() const
32 { 43 {
125 d.maxValue = m_inputSampleRate / 2; 136 d.maxValue = m_inputSampleRate / 2;
126 d.defaultValue = 1000; 137 d.defaultValue = 1000;
127 d.isQuantized = false; 138 d.isQuantized = false;
128 list.push_back(d); 139 list.push_back(d);
129 140
141 d.identifier = "histlen";
142 d.name = "Mean filter history length";
143 d.description = "";
144 d.unit = "";
145 d.minValue = 1;
146 d.maxValue = 10;
147 d.defaultValue = 3;
148 d.isQuantized = true;
149 d.quantizeStep = 1;
150 list.push_back(d);
151
130 d.identifier = "method"; 152 d.identifier = "method";
131 d.name = "Cepstrum transform method"; 153 d.name = "Cepstrum transform method";
132 d.unit = ""; 154 d.unit = "";
133 d.minValue = 0; 155 d.minValue = 0;
134 d.maxValue = 5; 156 d.maxValue = 4;
135 d.defaultValue = 0; 157 d.defaultValue = 0;
136 d.isQuantized = true; 158 d.isQuantized = true;
137 d.quantizeStep = 1; 159 d.quantizeStep = 1;
138 d.valueNames.push_back("Inverse symmetric"); 160 d.valueNames.push_back("Inverse symmetric");
139 d.valueNames.push_back("Inverse asymmetric"); 161 d.valueNames.push_back("Inverse asymmetric");
140 d.valueNames.push_back("Inverse complex"); 162 d.valueNames.push_back("Inverse complex");
141 d.valueNames.push_back("Forward power");
142 d.valueNames.push_back("Forward magnitude"); 163 d.valueNames.push_back("Forward magnitude");
143 d.valueNames.push_back("Forward difference"); 164 d.valueNames.push_back("Forward difference");
144 list.push_back(d); 165 list.push_back(d);
145 166
146 d.identifier = "clamp"; 167 d.identifier = "clamp";
160 float 181 float
161 SimpleCepstrum::getParameter(string identifier) const 182 SimpleCepstrum::getParameter(string identifier) const
162 { 183 {
163 if (identifier == "fmin") return m_fmin; 184 if (identifier == "fmin") return m_fmin;
164 else if (identifier == "fmax") return m_fmax; 185 else if (identifier == "fmax") return m_fmax;
186 else if (identifier == "histlen") return m_histlen;
165 else if (identifier == "clamp") return (m_clamp ? 1 : 0); 187 else if (identifier == "clamp") return (m_clamp ? 1 : 0);
166 else if (identifier == "method") return (int)m_method; 188 else if (identifier == "method") return (int)m_method;
167 else return 0.f; 189 else return 0.f;
168 } 190 }
169 191
170 void 192 void
171 SimpleCepstrum::setParameter(string identifier, float value) 193 SimpleCepstrum::setParameter(string identifier, float value)
172 { 194 {
173 if (identifier == "fmin") m_fmin = value; 195 if (identifier == "fmin") m_fmin = value;
174 else if (identifier == "fmax") m_fmax = value; 196 else if (identifier == "fmax") m_fmax = value;
197 else if (identifier == "histlen") m_histlen = value;
175 else if (identifier == "clamp") m_clamp = (value > 0.5); 198 else if (identifier == "clamp") m_clamp = (value > 0.5);
176 else if (identifier == "method") m_method = Method(int(value + 0.5)); 199 else if (identifier == "method") m_method = Method(int(value + 0.5));
177 } 200 }
178 201
179 SimpleCepstrum::ProgramList 202 SimpleCepstrum::ProgramList
222 245
223 d.identifier = "raw_cepstral_peak"; 246 d.identifier = "raw_cepstral_peak";
224 d.name = "Frequency corresponding to raw cepstral peak"; 247 d.name = "Frequency corresponding to raw cepstral peak";
225 d.description = "Return the frequency whose period corresponds to the quefrency with the maximum value within the specified range of the cepstrum"; 248 d.description = "Return the frequency whose period corresponds to the quefrency with the maximum value within the specified range of the cepstrum";
226 d.unit = "Hz"; 249 d.unit = "Hz";
227 m_rawOutput = n++; 250 m_pkOutput = n++;
228 outputs.push_back(d); 251 outputs.push_back(d);
229 252
230 d.identifier = "variance"; 253 d.identifier = "variance";
231 d.name = "Variance of cepstral bins in range"; 254 d.name = "Variance of cepstral bins in range";
232 d.unit = ""; 255 d.unit = "";
244 d.identifier = "peak_to_mean"; 267 d.identifier = "peak_to_mean";
245 d.name = "Peak-to-mean distance"; 268 d.name = "Peak-to-mean distance";
246 d.unit = ""; 269 d.unit = "";
247 d.description = "Return the difference between maximum and mean bin values within the specified range of the cepstrum"; 270 d.description = "Return the difference between maximum and mean bin values within the specified range of the cepstrum";
248 m_p2mOutput = n++; 271 m_p2mOutput = n++;
272 outputs.push_back(d);
273
274 d.identifier = "peak_to_rms";
275 d.name = "Peak-to-RMS distance";
276 d.unit = "";
277 d.description = "Return the difference between maximum and root mean square bin values within the specified range of the cepstrum";
278 m_p2rOutput = n++;
249 outputs.push_back(d); 279 outputs.push_back(d);
250 280
251 d.identifier = "cepstrum"; 281 d.identifier = "cepstrum";
252 d.name = "Cepstrum"; 282 d.name = "Cepstrum";
253 d.unit = ""; 283 d.unit = "";
259 to = m_blockSize / 2 - 1; 289 to = m_blockSize / 2 - 1;
260 } 290 }
261 d.binCount = to - from + 1; 291 d.binCount = to - from + 1;
262 for (int i = from; i <= to; ++i) { 292 for (int i = from; i <= to; ++i) {
263 float freq = m_inputSampleRate / i; 293 float freq = m_inputSampleRate / i;
264 char buffer[10]; 294 char buffer[20];
265 sprintf(buffer, "%.2f Hz", freq); 295 sprintf(buffer, "%.2f Hz", freq);
266 d.binNames.push_back(buffer); 296 d.binNames.push_back(buffer);
267 } 297 }
268 298
269 d.hasKnownExtents = false; 299 d.hasKnownExtents = false;
270 m_cepOutput = n++; 300 m_cepOutput = n++;
271 outputs.push_back(d); 301 outputs.push_back(d);
272 302
273 d.identifier = "am"; 303 d.identifier = "am";
274 d.name = "Cepstrum bins relative to mean"; 304 d.name = "Cepstrum bins relative to RMS";
275 d.description = "The cepstrum bins within the specified range, expressed as a value relative to the mean bin value in the range, with values below the mean clamped to zero"; 305 d.description = "The cepstrum bins within the specified range, expressed as a value relative to the root mean square bin value in the range, with values below the RMS clamped to zero";
276 m_amOutput = n++; 306 m_amOutput = n++;
277 outputs.push_back(d); 307 outputs.push_back(d);
278 308
279 d.identifier = "env"; 309 d.identifier = "env";
280 d.name = "Spectral envelope"; 310 d.name = "Spectral envelope";
281 d.description = "Envelope calculated from the cepstral values below the specified minimum"; 311 d.description = "Envelope calculated from the cepstral values below the specified minimum";
282 d.binCount = m_blockSize/2 + 1; 312 d.binCount = m_blockSize/2 + 1;
283 d.binNames.clear(); 313 d.binNames.clear();
284 for (int i = 0; i < d.binCount; ++i) { 314 for (int i = 0; i < d.binCount; ++i) {
285 float freq = (m_inputSampleRate / m_blockSize) * i; 315 float freq = (m_inputSampleRate / m_blockSize) * i;
286 char buffer[10]; 316 char buffer[20];
287 sprintf(buffer, "%.2f Hz", freq); 317 sprintf(buffer, "%.2f Hz", freq);
288 d.binNames.push_back(buffer); 318 d.binNames.push_back(buffer);
289 } 319 }
290 m_envOutput = n++; 320 m_envOutput = n++;
291 outputs.push_back(d); 321 outputs.push_back(d);
311 341
312 m_channels = channels; 342 m_channels = channels;
313 m_stepSize = stepSize; 343 m_stepSize = stepSize;
314 m_blockSize = blockSize; 344 m_blockSize = blockSize;
315 345
346 m_binFrom = int(m_inputSampleRate / m_fmax);
347 m_binTo = int(m_inputSampleRate / m_fmin);
348
349 if (m_binTo >= m_blockSize / 2) {
350 m_binTo = m_blockSize / 2 - 1;
351 }
352
353 m_bins = (m_binTo - m_binFrom) + 1;
354
355 m_history = new double *[m_histlen];
356 for (int i = 0; i < m_histlen; ++i) {
357 m_history[i] = new double[m_bins];
358 }
359
360 reset();
361
316 return true; 362 return true;
317 } 363 }
318 364
319 void 365 void
320 SimpleCepstrum::reset() 366 SimpleCepstrum::reset()
321 { 367 {
368 for (int i = 0; i < m_histlen; ++i) {
369 for (int j = 0; j < m_bins; ++j) {
370 m_history[i][j] = 0.0;
371 }
372 }
373 }
374
375 void
376 SimpleCepstrum::filter(const double *cep, double *result)
377 {
378 int hix = m_histlen - 1; // current history index
379
380 // roll back the history
381 if (m_histlen > 1) {
382 double *oldest = m_history[0];
383 for (int i = 1; i < m_histlen; ++i) {
384 m_history[i-1] = m_history[i];
385 }
386 // and stick this back in the newest spot, to recycle
387 m_history[hix] = oldest;
388 }
389
390 for (int i = 0; i < m_bins; ++i) {
391 m_history[hix][i] = cep[i + m_binFrom];
392 }
393
394 for (int i = 0; i < m_bins; ++i) {
395 double mean = 0.0;
396 for (int j = 0; j < m_histlen; ++j) {
397 mean += m_history[j][i];
398 }
399 mean /= m_histlen;
400 result[i] = mean;
401 }
402 }
403
404 void
405 SimpleCepstrum::addStatisticalOutputs(FeatureSet &fs, const double *data)
406 {
407 int n = m_bins;
408
409 double maxval = 0.f;
410 int maxbin = 0;
411
412 for (int i = 0; i < n; ++i) {
413 if (data[i] > maxval) {
414 maxval = data[i];
415 maxbin = i + m_binFrom;
416 }
417 }
418
419 Feature rf;
420 if (maxbin > 0) {
421 rf.values.push_back(m_inputSampleRate / maxbin);
422 } else {
423 rf.values.push_back(0);
424 }
425 fs[m_pkOutput].push_back(rf);
426
427 double mean = 0;
428 for (int i = 0; i < n; ++i) {
429 mean += data[i];
430 }
431 mean /= n;
432
433 double rms = 0;
434 for (int i = 0; i < n; ++i) {
435 rms += data[i] * data[i];
436 }
437 rms = sqrt(rms / n);
438
439 double variance = 0;
440 for (int i = 0; i < n; ++i) {
441 double dev = fabs(data[i] - mean);
442 variance += dev * dev;
443 }
444 variance /= n;
445
446 Feature vf;
447 vf.values.push_back(variance);
448 fs[m_varOutput].push_back(vf);
449
450 Feature pf;
451 pf.values.push_back(maxval - mean);
452 fs[m_p2mOutput].push_back(pf);
453
454 Feature pr;
455 pr.values.push_back(maxval - rms);
456 fs[m_p2rOutput].push_back(pr);
457
458 Feature pv;
459 pv.values.push_back(maxval);
460 fs[m_pvOutput].push_back(pv);
461
462 Feature am;
463 for (int i = 0; i < n; ++i) {
464 if (data[i] < rms) am.values.push_back(0);
465 else am.values.push_back(data[i] - rms);
466 }
467 fs[m_amOutput].push_back(am);
468 }
469
470 void
471 SimpleCepstrum::addEnvelopeOutputs(FeatureSet &fs, const float *const *inputBuffers, const double *cep)
472 {
473 // Wipe the higher cepstral bins in order to calculate the
474 // envelope. This calculation uses the raw cepstrum, not the
475 // filtered values (because only values "in frequency range" are
476 // filtered).
477 int bs = m_blockSize;
478 int hs = m_blockSize/2 + 1;
479
480 double *ecep = new double[bs];
481 for (int i = 0; i < m_binFrom; ++i) {
482 ecep[i] = cep[i] / bs;
483 }
484 for (int i = m_binFrom; i < bs; ++i) {
485 ecep[i] = 0;
486 }
487 ecep[0] /= 2;
488 ecep[m_binFrom-1] /= 2;
489
490 double *env = new double[bs];
491 double *io = new double[bs];
492 fft(bs, false, ecep, 0, env, io);
493
494 for (int i = 0; i < hs; ++i) {
495 env[i] = exp(env[i]);
496 }
497 Feature envf;
498 for (int i = 0; i < hs; ++i) {
499 envf.values.push_back(env[i]);
500 }
501 fs[m_envOutput].push_back(envf);
502
503 Feature es;
504 for (int i = 0; i < hs; ++i) {
505 double re = inputBuffers[0][i*2 ] / env[i];
506 double im = inputBuffers[0][i*2+1] / env[i];
507 double mag = sqrt(re*re + im*im);
508 es.values.push_back(mag);
509 }
510 fs[m_esOutput].push_back(es);
511
512 delete[] env;
513 delete[] ecep;
514 delete[] io;
322 } 515 }
323 516
324 SimpleCepstrum::FeatureSet 517 SimpleCepstrum::FeatureSet
325 SimpleCepstrum::process(const float *const *inputBuffers, Vamp::RealTime timestamp) 518 SimpleCepstrum::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
326 { 519 {
327 FeatureSet fs; 520 FeatureSet fs;
328 521
329 int bs = m_blockSize; 522 int bs = m_blockSize;
330 int hs = m_blockSize/2 + 1; 523 int hs = m_blockSize/2 + 1;
331 524
332 double *cep = new double[bs]; 525 double *rawcep = new double[bs];
333 double *io = new double[bs]; 526 double *io = new double[bs];
334 527
335 if (m_method != InverseComplex) { 528 if (m_method != InverseComplex) {
336 529
337 double *logmag = new double[bs]; 530 double *logmag = new double[bs];
339 for (int i = 0; i < hs; ++i) { 532 for (int i = 0; i < hs; ++i) {
340 533
341 double power = 534 double power =
342 inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] + 535 inputBuffers[0][i*2 ] * inputBuffers[0][i*2 ] +
343 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1]; 536 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1];
344 537 double mag = sqrt(power);
345 double lm; 538
346 539 double lm = log(mag + 0.00000001);
347 if (m_method == ForwardPower) {
348 lm = log(power + 0.00000001);
349 } else {
350 double mag = sqrt(power);
351 lm = log(mag + 0.00000001);
352 }
353 540
354 switch (m_method) { 541 switch (m_method) {
355 case InverseSymmetric: 542 case InverseSymmetric:
356 logmag[i] = lm; 543 logmag[i] = lm;
357 if (i > 0) logmag[bs - i] = lm; 544 if (i > 0) logmag[bs - i] = lm;
370 } 557 }
371 558
372 if (m_method == InverseSymmetric || 559 if (m_method == InverseSymmetric ||
373 m_method == InverseAsymmetric) { 560 m_method == InverseAsymmetric) {
374 561
375 fft(bs, true, logmag, 0, cep, io); 562 fft(bs, true, logmag, 0, rawcep, io);
376 563
377 } else { 564 } else {
378 565
379 fft(bs, false, logmag, 0, cep, io); 566 fft(bs, false, logmag, 0, rawcep, io);
380 567
381 if (m_method == ForwardDifference) { 568 if (m_method == ForwardDifference) {
382 for (int i = 0; i < hs; ++i) { 569 for (int i = 0; i < hs; ++i) {
383 cep[i] = fabs(io[i]) - fabs(cep[i]); 570 rawcep[i] = fabs(io[i]) - fabs(rawcep[i]);
384 } 571 }
385 } else { 572 } else {
386 for (int i = 0; i < hs; ++i) { 573 for (int i = 0; i < hs; ++i) {
387 cep[i] = sqrt(cep[i]*cep[i] + io[i]*io[i]); 574 rawcep[i] = sqrt(rawcep[i]*rawcep[i] + io[i]*io[i]);
388 } 575 }
389 } 576 }
390 } 577 }
391 578
392 delete[] logmag; 579 delete[] logmag;
407 ri[bs - i] = ri[i]; 594 ri[bs - i] = ri[i];
408 ii[bs - i] = -ii[i]; 595 ii[bs - i] = -ii[i];
409 } 596 }
410 } 597 }
411 598
412 fft(bs, true, ri, ii, cep, io); 599 fft(bs, true, ri, ii, rawcep, io);
413 600
414 delete[] ri; 601 delete[] ri;
415 delete[] ii; 602 delete[] ii;
416 } 603 }
417 604
418 if (m_clamp) { 605 if (m_clamp) {
419 for (int i = 0; i < bs; ++i) { 606 for (int i = 0; i < bs; ++i) {
420 if (cep[i] < 0) cep[i] = 0; 607 if (rawcep[i] < 0) rawcep[i] = 0;
421 } 608 }
422 } 609 }
423 610
424 int from = int(m_inputSampleRate / m_fmax); 611 delete[] io;
425 int to = int(m_inputSampleRate / m_fmin); 612
426 613 double *latest = new double[m_bins];
427 if (to >= bs / 2) { 614 filter(rawcep, latest);
428 to = bs / 2 - 1; 615
429 } 616 int n = m_bins;
430 617
431 Feature cf; 618 Feature cf;
432 for (int i = from; i <= to; ++i) { 619 for (int i = 0; i < n; ++i) {
433 cf.values.push_back(cep[i]); 620 cf.values.push_back(latest[i]);
434 } 621 }
435 fs[m_cepOutput].push_back(cf); 622 fs[m_cepOutput].push_back(cf);
436 623
437 float maxval = 0.f; 624 addStatisticalOutputs(fs, latest);
438 int maxbin = 0; 625
439 626 addEnvelopeOutputs(fs, inputBuffers, rawcep);
440 for (int i = from; i <= to; ++i) { 627
441 if (cep[i] > maxval) { 628 delete[] latest;
442 maxval = cep[i];
443 maxbin = i;
444 }
445 }
446
447 Feature rf;
448 if (maxbin > 0) {
449 rf.values.push_back(m_inputSampleRate / maxbin);
450 } else {
451 rf.values.push_back(0);
452 }
453 fs[m_rawOutput].push_back(rf);
454
455 float mean = 0;
456 for (int i = from; i <= to; ++i) {
457 mean += cep[i];
458 }
459 mean /= (to - from) + 1;
460
461 float variance = 0;
462 for (int i = from; i <= to; ++i) {
463 float dev = fabsf(cep[i] - mean);
464 variance += dev * dev;
465 }
466 variance /= (to - from) + 1;
467
468 Feature vf;
469 vf.values.push_back(variance);
470 fs[m_varOutput].push_back(vf);
471
472 Feature pf;
473 pf.values.push_back(maxval - mean);
474 fs[m_p2mOutput].push_back(pf);
475
476 Feature pv;
477 pv.values.push_back(maxval);
478 fs[m_pvOutput].push_back(pv);
479
480 Feature am;
481 for (int i = from; i <= to; ++i) {
482 if (cep[i] < mean) am.values.push_back(0);
483 else am.values.push_back(cep[i] - mean);
484 }
485 fs[m_amOutput].push_back(am);
486
487 // destructively wipe the higher cepstral bins in order to
488 // calculate the envelope
489 double *env = new double[bs];
490 cep[0] /= 2;
491 cep[from-1] /= 2;
492 for (int i = 0; i < from; ++i) {
493 cep[i] /= bs;
494 }
495 for (int i = from; i < bs; ++i) {
496 cep[i] = 0;
497 }
498 fft(bs, false, cep, 0, env, io);
499 for (int i = 0; i < hs; ++i) {
500 env[i] = exp(env[i]);
501 }
502 Feature envf;
503 for (int i = 0; i < hs; ++i) {
504 envf.values.push_back(env[i]);
505 }
506 fs[m_envOutput].push_back(envf);
507
508 Feature es;
509 for (int i = 0; i < hs; ++i) {
510 double re = inputBuffers[0][i*2 ] / env[i];
511 double im = inputBuffers[0][i*2+1] / env[i];
512 double mag = sqrt(re*re + im*im);
513 es.values.push_back(mag);
514 }
515 fs[m_esOutput].push_back(es);
516
517 delete[] env;
518 delete[] io;
519 delete[] cep;
520 629
521 return fs; 630 return fs;
522 } 631 }
523 632
524 SimpleCepstrum::FeatureSet 633 SimpleCepstrum::FeatureSet