comparison FChTransformF0gram.cpp @ 20:7964cc5ad98f spect

Correct the time-alignment of the output blocks
author Chris Cannam
date Thu, 04 Oct 2018 13:32:47 +0100
parents d7fbd446f47f
children 37917af73ae9
comparison
equal deleted inserted replaced
19:d7fbd446f47f 20:7964cc5ad98f
32 32
33 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode, 33 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode,
34 float inputSampleRate) : 34 float inputSampleRate) :
35 Plugin(inputSampleRate), 35 Plugin(inputSampleRate),
36 m_processingMode(mode), 36 m_processingMode(mode),
37 m_stepSize(0), // We are using 0 for step and block size to indicate "not yet set". 37 m_initialised(false),
38 m_blockSize(0) { 38 m_stepSize(256),
39 m_blockSize(8192) {
39 40
40 m_fs = inputSampleRate; 41 m_fs = inputSampleRate;
41 // max frequency of interest (Hz) 42 // max frequency of interest (Hz)
42 m_fmax = 10000.f; 43 m_fmax = 10000.f;
43 // warping parameters 44 // warping parameters
77 m_f0s = 0; 78 m_f0s = 0;
78 } 79 }
79 80
80 FChTransformF0gram::~FChTransformF0gram() 81 FChTransformF0gram::~FChTransformF0gram()
81 { 82 {
82 if (!m_blockSize) { 83 if (!m_initialised) {
83 return; // nothing was allocated 84 return; // nothing was allocated
84 } 85 }
85 86
87 deallocate(m_inputBuffer);
88
86 deallocate(m_warpings.pos_int); 89 deallocate(m_warpings.pos_int);
87 deallocate(m_warpings.pos_frac); 90 deallocate(m_warpings.pos_frac);
88 deallocate(m_warpings.chirp_rates); 91 deallocate(m_warpings.chirp_rates);
89 92
90 clean_LPF(); 93 clean_LPF();
181 FChTransformF0gram::getInputDomain() const { 184 FChTransformF0gram::getInputDomain() const {
182 return TimeDomain; 185 return TimeDomain;
183 } 186 }
184 187
185 size_t FChTransformF0gram::getPreferredBlockSize() const { 188 size_t FChTransformF0gram::getPreferredBlockSize() const {
186 return 8192; // 0 means "I can handle any block size" 189 // We do our own accumulating into blocks within process()
190 return m_blockSize/2;
187 } 191 }
188 192
189 size_t 193 size_t
190 FChTransformF0gram::getPreferredStepSize() const { 194 FChTransformF0gram::getPreferredStepSize() const {
191 return 256; // 0 means "anything sensible"; in practice this 195 return m_stepSize;
192 // means the same as the block size for TimeDomain
193 // plugins, or half of it for FrequencyDomain plugins
194 } 196 }
195 197
196 size_t 198 size_t
197 FChTransformF0gram::getMinChannelCount() const { 199 FChTransformF0gram::getMinChannelCount() const {
198 return 1; 200 return 1;
552 } 554 }
553 555
554 bool 556 bool
555 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) { 557 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) {
556 if (channels < getMinChannelCount() || 558 if (channels < getMinChannelCount() ||
557 channels > getMaxChannelCount()) { 559 channels > getMaxChannelCount() ||
560 blockSize != m_blockSize/2 ||
561 stepSize != m_stepSize) {
558 return false; 562 return false;
559 } 563 }
560 564
561 // set blockSize and stepSize (but changed below) 565 m_inputBuffer = allocate_and_zero<float>(m_blockSize);
562 m_blockSize = blockSize; 566
563 m_stepSize = stepSize;
564
565 // WARNING !!! 567 // WARNING !!!
566 // these values in fact are determined by the sampling frequency m_fs 568 // these values in fact are determined by the sampling frequency m_fs
567 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz 569 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz
568 //m_blockSize = 4 * m_warp_params.nsamps_twarp; 570 //m_blockSize = 4 * m_warp_params.nsamps_twarp;
569 // m_stepSize = floor(m_hop / m_warp_params.fact_over_samp); 571 // m_stepSize = floor(m_hop / m_warp_params.fact_over_samp);
586 m_num_f0s = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct; 588 m_num_f0s = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
587 m_f0s = allocate<double>(m_num_f0s); 589 m_f0s = allocate<double>(m_num_f0s);
588 for (int i = 0; i < m_num_f0s; ++i) { 590 for (int i = 0; i < m_num_f0s; ++i) {
589 m_f0s[i] = m_glogs_f0[m_glogs_init_f0s + i]; 591 m_f0s[i] = m_glogs_f0[m_glogs_init_f0s + i];
590 } 592 }
591 593
594 m_initialised = true;
592 return true; 595 return true;
593 } 596 }
594 597
595 void 598 void
596 FChTransformF0gram::design_GLogS() { 599 FChTransformF0gram::design_GLogS() {
675 } 678 }
676 679
677 void 680 void
678 FChTransformF0gram::design_FChT() { 681 FChTransformF0gram::design_FChT() {
679 682
680 /*
681 * FILES FOR DEBUGGING
682 */
683
684 //ofstream output("output.txt");
685
686
687 /* ============= WARPING DESIGN ============= */ 683 /* ============= WARPING DESIGN ============= */
688 684
689 // sampling frequency after oversampling 685 // sampling frequency after oversampling
690 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs; 686 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs;
691 687
724 } 720 }
725 721
726 // design of warpings for efficient interpolation 722 // design of warpings for efficient interpolation
727 design_warps(freq_relative, t_orig, t_warp); 723 design_warps(freq_relative, t_orig, t_warp);
728 724
729
730 /*
731 * FILES FOR DEBUGGING
732 */
733
734 /*
735 output << "chirp_rates" << endl;
736 for (int j = 0; j < m_warp_params.num_warps; j++){
737 output << m_warpings.chirp_rates[j];
738 output << " ";
739 }
740 output << endl << "freq_relative" << endl;
741
742 for (int i = 0; i < m_warpings.nsamps_torig; i++){
743 for (int j = 0; j < m_warp_params.num_warps; j++){
744 output << freq_relative[j * m_warpings.nsamps_torig + i];
745 output << " ";
746 }
747 output << endl;
748 }
749
750 output << endl << "t_orig" << endl;
751
752 for (int i = 0; i < m_warpings.nsamps_torig; i++){
753 output << t_orig[i] << endl ;
754 }
755 */
756
757 deallocate(freq_relative); 725 deallocate(freq_relative);
758 deallocate(t_orig); 726 deallocate(t_orig);
759 deallocate(t_warp); 727 deallocate(t_warp);
760
761 //output.close();
762 728
763 /* ============= FFTW PLAN DESIGN ============= */ 729 /* ============= FFTW PLAN DESIGN ============= */
764 // Initialize 2-d array for warped signals 730 // Initialize 2-d array for warped signals
765 x_warping = allocate<double>(m_warp_params.nsamps_twarp); 731 x_warping = allocate<double>(m_warp_params.nsamps_twarp);
766 m_absFanChirpTransform = allocate<double>(m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1)); 732 m_absFanChirpTransform = allocate<double>(m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1));
929 LPF_frequency[i*2] *= mp_LPFWindow[i]; 895 LPF_frequency[i*2] *= mp_LPFWindow[i];
930 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i]; 896 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i];
931 } 897 }
932 898
933 fft_inverse_LPF->inverse(LPF_frequency, LPF_time); 899 fft_inverse_LPF->inverse(LPF_frequency, LPF_time);
934 900
935 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame. 901 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame.
936 // nota: además de aplicar el LPF, esta función resamplea la señal original. 902 // nota: además de aplicar el LPF, esta función resamplea la señal original.
937 } 903 }
938 904
939 void FChTransformF0gram::clean_LPF() 905 void FChTransformF0gram::clean_LPF()
950 } 916 }
951 917
952 FChTransformF0gram::FeatureSet 918 FChTransformF0gram::FeatureSet
953 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) { 919 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) {
954 920
955 // // Do actual work! 921 if (!m_initialised) return FeatureSet();
956 // 922
957
958 /* PSEUDOCÓDIGO: 923 /* PSEUDOCÓDIGO:
959 - Aplicar FFT al frame entero. 924 - Aplicar FFT al frame entero.
960 - Filtro pasabajos en frecuencia. 925 - Filtro pasabajos en frecuencia.
961 - FFT inversa al frame entero. 926 - FFT inversa al frame entero.
962 ----------------------------------------------------------------------------- 927 -----------------------------------------------------------------------------
981 fprintf(stderr, " m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig); 946 fprintf(stderr, " m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig);
982 fprintf(stderr, " m_warp_params.num_warps = %d.\n",m_warp_params.num_warps); 947 fprintf(stderr, " m_warp_params.num_warps = %d.\n",m_warp_params.num_warps);
983 fprintf(stderr, " m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count); 948 fprintf(stderr, " m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count);
984 #endif 949 #endif
985 950
986 for (int i = 0; i < m_blockSize; i++) { 951 for (int i = 0; i < m_blockSize - m_stepSize; ++i) {
987 LPF_time[i] = (double)(inputBuffers[0][i]) * m_timeWindow[i]; 952 m_inputBuffer[i] = m_inputBuffer[i + m_stepSize];
988 LPF_time[m_blockSize+i] = 0.0; 953 }
989 } 954 for (int i = 0; i < m_blockSize/2; ++i) {
990 955 m_inputBuffer[m_blockSize/2 + i] = inputBuffers[0][i];
991 // #ifdef DEBUG 956 }
992 // fprintf(stderr, " HASTA ACÁ ANDA!!!\n"); 957 for (int i = 0; i < m_blockSize; ++i) {
993 // cout << flush; 958 LPF_time[i] = m_inputBuffer[i] * m_timeWindow[i];
994 // #endif 959 }
995 960 for (int i = 0; i < m_blockSize; ++i) {
961 LPF_time[m_blockSize + i] = 0.0;
962 }
963
996 apply_LPF(); 964 apply_LPF();
997 // Señal filtrada queda en LPF_time 965 // Señal filtrada queda en LPF_time
998 966
999 Feature feature; 967 Feature feature;
1000 feature.hasTimestamp = false; 968 feature.hasTimestamp = false;
1137 } 1105 }
1138 1106
1139 void 1107 void
1140 FChTransformF0gram::design_time_window() { 1108 FChTransformF0gram::design_time_window() {
1141 1109
1142 int transitionWidth = (int)m_blockSize/128 + 1;; 1110 int transitionWidth = (int)m_blockSize/128 + 128;
1143 m_timeWindow = allocate<double>(m_blockSize); 1111 m_timeWindow = allocate<double>(m_blockSize);
1144 double *lp_transitionWindow = allocate<double>(transitionWidth); 1112 double *lp_transitionWindow = allocate<double>(transitionWidth);
1145 1113
1146 //memset(m_timeWindow, 1.0, m_blockSize);
1147 for (int i = 0; i < m_blockSize; i++) { 1114 for (int i = 0; i < m_blockSize; i++) {
1148 m_timeWindow[i] = 1.0; 1115 m_timeWindow[i] = 1.0;
1149 } 1116 }
1150 1117
1151 for (int i = 0; i < transitionWidth; i++) { 1118 for (int i = 0; i < transitionWidth; i++) {