comparison FChTransformF0gram.cpp @ 16:ce62ed201de8 spect

Toward (but not quite reaching) accurate frequency labels for outputs
author Chris Cannam
date Wed, 03 Oct 2018 15:47:00 +0100
parents 0a860992b4f4
children 436eab0bc1ff
comparison
equal deleted inserted replaced
15:0a860992b4f4 16:ce62ed201de8
22 22
23 #include "bqvec/Allocators.h" 23 #include "bqvec/Allocators.h"
24 24
25 using namespace breakfastquay; 25 using namespace breakfastquay;
26 26
27 //#define DEBUG 27 #define DEBUG
28 28
29 #define MAX(x, y) (((x) > (y)) ? (x) : (y)) 29 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
30 30
31 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode, 31 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode,
32 float inputSampleRate) : 32 float inputSampleRate) :
38 m_fs = inputSampleRate; 38 m_fs = inputSampleRate;
39 // max frequency of interest (Hz) 39 // max frequency of interest (Hz)
40 m_fmax = 10000.f; 40 m_fmax = 10000.f;
41 // warping parameters 41 // warping parameters
42 m_warp_params.nsamps_twarp = 2048; 42 m_warp_params.nsamps_twarp = 2048;
43 //m_warp_params.nsamps_twarp = 8;
44 m_warp_params.alpha_max = 4; 43 m_warp_params.alpha_max = 4;
45 m_warp_params.num_warps = 21; 44 m_warp_params.num_warps = 21;
46 //m_warp_params.num_warps = 11;
47 m_warp_params.fact_over_samp = 2; 45 m_warp_params.fact_over_samp = 2;
48 m_warp_params.alpha_dist = 0; 46 m_warp_params.alpha_dist = 0;
49 // f0 parameters 47 // f0 parameters
50 m_f0_params.f0min = 80.0; 48 m_f0_params.f0min = 80.0;
51 m_f0_params.num_octs = 4; 49 m_f0_params.num_octs = 4;
72 m_nfft = m_warp_params.nsamps_twarp; 70 m_nfft = m_warp_params.nsamps_twarp;
73 // hop in samples 71 // hop in samples
74 m_hop = m_warp_params.fact_over_samp * 256; 72 m_hop = m_warp_params.fact_over_samp * 256;
75 73
76 m_num_f0s = 0; 74 m_num_f0s = 0;
77 75 m_f0s = 0;
78 } 76 }
79 77
80 FChTransformF0gram::~FChTransformF0gram() 78 FChTransformF0gram::~FChTransformF0gram()
81 { 79 {
82 if (!m_blockSize) { 80 if (!m_blockSize) {
114 deallocate(m_glogs_fifth_harmonic_posfrac); 112 deallocate(m_glogs_fifth_harmonic_posfrac);
115 deallocate(m_glogs_fifth_harmonic); 113 deallocate(m_glogs_fifth_harmonic);
116 deallocate(m_glogs_f0_preference_weights); 114 deallocate(m_glogs_f0_preference_weights);
117 deallocate(m_glogs_median_correction); 115 deallocate(m_glogs_median_correction);
118 deallocate(m_glogs_sigma_correction); 116 deallocate(m_glogs_sigma_correction);
117
118 deallocate(m_f0s);
119 } 119 }
120 120
121 string 121 string
122 FChTransformF0gram::getIdentifier() const { 122 FChTransformF0gram::getIdentifier() const {
123 switch (m_processingMode) { 123 switch (m_processingMode) {
481 FChTransformF0gram::OutputList 481 FChTransformF0gram::OutputList
482 FChTransformF0gram::getOutputDescriptors() const { 482 FChTransformF0gram::getOutputDescriptors() const {
483 483
484 OutputList list; 484 OutputList list;
485 485
486 // See OutputDescriptor documentation for the possibilities here. 486 vector<string> labels;
487 // Every plugin must have at least one output. 487 char label[100];
488 488
489 /* f0 values of F0gram grid as string values */ 489 if (m_processingMode == ModeF0Gram) {
490 vector<string> f0values; 490
491 int ind = 0; 491 /* f0 values of F0gram grid as string values */
492 char f0String[100]; 492 for (int i = 0; i < m_num_f0s; ++i) {
493 while (ind < m_num_f0s) { 493 sprintf(label, "%4.2f Hz", m_f0s[i]);
494 sprintf(f0String, "%4.2f", m_f0s[ind]); 494 labels.push_back(label);
495 f0values.push_back(f0String); 495 }
496 ind++; 496
497 } 497 /* The F0gram */
498 498 OutputDescriptor d;
499 /* The F0gram */ 499 d.identifier = "f0gram";
500 OutputDescriptor d; 500 d.name = "F0gram: salience of f0s";
501 d.identifier = "f0gram"; 501 d.description = "This representation show the salience of the different f0s in the signal.";
502 d.name = "F0gram: salience of f0s"; 502 d.hasFixedBinCount = true;
503 d.description = "This representation show the salience of the different f0s in the signal."; 503 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
504 d.hasFixedBinCount = true; 504 d.binNames = labels;
505 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct; 505 d.hasKnownExtents = false;
506 d.binNames = f0values; 506 d.isQuantized = false;
507 d.hasKnownExtents = false; 507 d.sampleType = OutputDescriptor::OneSamplePerStep;
508 d.isQuantized = false; 508 d.hasDuration = false;
509 d.sampleType = OutputDescriptor::OneSamplePerStep; 509 list.push_back(d);
510 d.hasDuration = false; 510
511 list.push_back(d); 511 } else {
512 512
513 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; ++i) {
514 double freq = i * (m_warpings.fs_warp / m_nfft);
515 sprintf(label, "%4.2f Hz", freq);
516 labels.push_back(label);
517 }
518
519 OutputDescriptor d;
520 d.identifier = "spectrogram";
521 d.name = "Spectrogram";
522 d.description = "Time/frequency spectrogram derived from the Fan Chirp Transform output";
523 d.hasFixedBinCount = true;
524 d.binCount = m_warp_params.nsamps_twarp/2+1;
525 d.binNames = labels;
526 d.hasKnownExtents = false;
527 d.isQuantized = false;
528 d.sampleType = OutputDescriptor::OneSamplePerStep;
529 d.hasDuration = false;
530 list.push_back(d);
531 }
532
513 return list; 533 return list;
514 } 534 }
515 535
516 bool 536 bool
517 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) { 537 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) {
526 546
527 // WARNING !!! 547 // WARNING !!!
528 // these values in fact are determined by the sampling frequency m_fs 548 // these values in fact are determined by the sampling frequency m_fs
529 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz 549 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz
530 //m_blockSize = 4 * m_warp_params.nsamps_twarp; 550 //m_blockSize = 4 * m_warp_params.nsamps_twarp;
531 m_stepSize = floor(m_hop / m_warp_params.fact_over_samp); 551 // m_stepSize = floor(m_hop / m_warp_params.fact_over_samp);
552
553 /* design of FChT */
554 design_FChT();
532 555
533 /* initialise m_glogs_params */ 556 /* initialise m_glogs_params */
534 design_GLogS(); 557 design_GLogS();
535
536 /* design of FChT */
537 design_FChT();
538 558
539 design_LPF(); 559 design_LPF();
540 560
541 design_time_window(); 561 design_time_window();
542 562
543 // Create Hanning window for warped signals 563 // Create Hanning window for warped signals
544 mp_HanningWindow = allocate<double>(m_warp_params.nsamps_twarp); 564 mp_HanningWindow = allocate<double>(m_warp_params.nsamps_twarp);
545 bool normalize = false; 565 bool normalize = false;
546 Utils::hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize); 566 Utils::hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize);
547 567
568 m_num_f0s = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
569 m_f0s = allocate<double>(m_num_f0s);
570 for (int i = 0; i < m_num_f0s; ++i) {
571 m_f0s[i] = m_glogs_f0[m_glogs_init_f0s + i];
572 }
573
548 return true; 574 return true;
549 } 575 }
550 576
551 void 577 void
552 FChTransformF0gram::design_GLogS() { 578 FChTransformF0gram::design_GLogS() {
553 579
554 // total number & initial quantity of f0s 580 // total number & initial quantity of f0s
581
582 cerr << "per oct = " << m_f0_params.num_f0s_per_oct << ", octs = " << m_f0_params.num_octs << endl;
555 m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1; 583 m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1;
584 cerr << "init_f0s = " << m_glogs_init_f0s << endl;
556 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s; 585 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s;
586 cerr << "num_f0s = " << m_glogs_num_f0s << endl;
557 587
558 // Initialize arrays 588 // Initialize arrays
559 m_glogs_f0 = allocate<double>(m_glogs_num_f0s); 589 m_glogs_f0 = allocate<double>(m_glogs_num_f0s);
560 m_glogs = allocate<double>(m_glogs_num_f0s*m_warp_params.num_warps); 590 m_glogs = allocate<double>(m_glogs_num_f0s*m_warp_params.num_warps);
561 m_glogs_n = allocate<int>(m_glogs_num_f0s); 591 m_glogs_n = allocate<int>(m_glogs_num_f0s);
582 double aux_pos; 612 double aux_pos;
583 for (int i = 0; i < m_glogs_num_f0s; i++) { 613 for (int i = 0; i < m_glogs_num_f0s; i++) {
584 for (int j = 1; j <= m_glogs_n[i]; j++) { 614 for (int j = 1; j <= m_glogs_n[i]; j++) {
585 // indice en el vector de largo t_warp/2+1 donde el ultimo valor corresponde a f=m_fmax 615 // indice en el vector de largo t_warp/2+1 donde el ultimo valor corresponde a f=m_fmax
586 aux_pos = ((double)j*m_glogs_f0[i])*((double)(m_warp_params.nsamps_twarp/2+1))/m_fmax; 616 aux_pos = ((double)j*m_glogs_f0[i])*((double)(m_warp_params.nsamps_twarp/2+1))/m_fmax;
617 //!!! cerr << "aux_pos = " << aux_pos << endl;
618 // aux_pos = ((double)j*m_glogs_f0[i])*((double)(m_warp_params.nsamps_twarp/2+1))/m_warpings.fs_warp;
619 // cerr << "or " << aux_pos << " (as fs_warp = " << m_warpings.fs_warp << ")" << endl;
587 m_glogs_posint[aux_index] = (int)aux_pos; 620 m_glogs_posint[aux_index] = (int)aux_pos;
588 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index]; 621 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index];
589 aux_index++; 622 aux_index++;
590 } 623 }
591 } 624 }
877 void FChTransformF0gram::apply_LPF() 910 void FChTransformF0gram::apply_LPF()
878 { 911 {
879 fft_forward_LPF->forward(LPF_time, LPF_frequency); 912 fft_forward_LPF->forward(LPF_time, LPF_frequency);
880 913
881 for (int i = 0; i < m_blockSize/2+1; i++) { 914 for (int i = 0; i < m_blockSize/2+1; i++) {
882 LPF_frequency[i*2] *= mp_LPFWindow[i] * m_warpings.nsamps_torig; 915 LPF_frequency[i*2] *= mp_LPFWindow[i];
883 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i] * m_warpings.nsamps_torig; 916 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i];
884 } 917 }
885 918
886 fft_inverse_LPF->inverse(LPF_frequency, LPF_time); 919 fft_inverse_LPF->inverse(LPF_frequency, LPF_time);
887 920
888 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame. 921 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame.
923 956
924 //--------------------------------------------------------------------------- 957 //---------------------------------------------------------------------------
925 FeatureSet fs; 958 FeatureSet fs;
926 959
927 #ifdef DEBUG 960 #ifdef DEBUG
928 printf("\n ----- DEBUG INFORMATION ----- \n"); 961 fprintf(stderr, "\n ----- DEBUG INFORMATION ----- \n");
929 printf(" m_fs = %f Hz.\n",m_fs); 962 fprintf(stderr, " m_fs = %f Hz.\n",m_fs);
930 printf(" fs_orig = %f Hz.\n",m_warpings.fs_orig); 963 fprintf(stderr, " fs_orig = %f Hz.\n",m_warpings.fs_orig);
931 printf(" fs_warp = %f Hz.\n",m_warpings.fs_warp); 964 fprintf(stderr, " fs_warp = %f Hz.\n",m_warpings.fs_warp);
932 printf(" m_nfft = %d.\n",m_nfft); 965 fprintf(stderr, " m_nfft = %d.\n",m_nfft);
933 printf(" m_blockSize = %d.\n",m_blockSize); 966 fprintf(stderr, " m_blockSize = %d.\n",m_blockSize);
934 printf(" m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig); 967 fprintf(stderr, " m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig);
935 printf(" m_warp_params.num_warps = %d.\n",m_warp_params.num_warps); 968 fprintf(stderr, " m_warp_params.num_warps = %d.\n",m_warp_params.num_warps);
936 printf(" m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count); 969 fprintf(stderr, " m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count);
937 #endif 970 #endif
938 971
939 for (int i = 0; i < m_blockSize; i++) { 972 for (int i = 0; i < m_blockSize; i++) {
940 LPF_time[i] = (double)(inputBuffers[0][i]) * m_timeWindow[i]; 973 LPF_time[i] = (double)(inputBuffers[0][i]) * m_timeWindow[i];
974 LPF_time[m_blockSize+i] = 0.0;
941 } 975 }
942 976
943 // #ifdef DEBUG 977 // #ifdef DEBUG
944 // printf(" HASTA ACÁ ANDA!!!\n"); 978 // fprintf(stderr, " HASTA ACÁ ANDA!!!\n");
945 // cout << flush; 979 // cout << flush;
946 // #endif 980 // #endif
947 981
948 apply_LPF(); 982 apply_LPF();
949 // Señal filtrada queda en LPF_time 983 // Señal filtrada queda en LPF_time
960 994
961 double max_glogs = -DBL_MAX; 995 double max_glogs = -DBL_MAX;
962 int ind_max_glogs = 0; 996 int ind_max_glogs = 0;
963 997
964 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) { 998 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
999
965 // Interpolate 1000 // Interpolate
966 Utils::interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp); 1001 Utils::interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp);
967 1002
968 // Apply window 1003 // Apply window
969 for (int i = 0; i < m_warp_params.nsamps_twarp; i++) { 1004 for (int i = 0; i < m_warp_params.nsamps_twarp; i++) {
1085 } 1120 }
1086 1121
1087 #ifdef DEBUG 1122 #ifdef DEBUG
1088 for (int i = 0; i < m_blockSize; i++) { 1123 for (int i = 0; i < m_blockSize; i++) {
1089 if ((i<transitionWidth)) { 1124 if ((i<transitionWidth)) {
1090 printf(" m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]); 1125 fprintf(stderr, " m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]);
1091 } 1126 }
1092 } 1127 }
1093 #endif 1128 #endif
1094 1129
1095 deallocate(lp_transitionWindow); 1130 deallocate(lp_transitionWindow);