Mercurial > hg > vamp-fanchirp
comparison FChTransformF0gram.cpp @ 20:7964cc5ad98f spect
Correct the time-alignment of the output blocks
author | Chris Cannam |
---|---|
date | Thu, 04 Oct 2018 13:32:47 +0100 |
parents | d7fbd446f47f |
children | 37917af73ae9 |
comparison
equal
deleted
inserted
replaced
19:d7fbd446f47f | 20:7964cc5ad98f |
---|---|
32 | 32 |
33 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode, | 33 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode, |
34 float inputSampleRate) : | 34 float inputSampleRate) : |
35 Plugin(inputSampleRate), | 35 Plugin(inputSampleRate), |
36 m_processingMode(mode), | 36 m_processingMode(mode), |
37 m_stepSize(0), // We are using 0 for step and block size to indicate "not yet set". | 37 m_initialised(false), |
38 m_blockSize(0) { | 38 m_stepSize(256), |
39 m_blockSize(8192) { | |
39 | 40 |
40 m_fs = inputSampleRate; | 41 m_fs = inputSampleRate; |
41 // max frequency of interest (Hz) | 42 // max frequency of interest (Hz) |
42 m_fmax = 10000.f; | 43 m_fmax = 10000.f; |
43 // warping parameters | 44 // warping parameters |
77 m_f0s = 0; | 78 m_f0s = 0; |
78 } | 79 } |
79 | 80 |
80 FChTransformF0gram::~FChTransformF0gram() | 81 FChTransformF0gram::~FChTransformF0gram() |
81 { | 82 { |
82 if (!m_blockSize) { | 83 if (!m_initialised) { |
83 return; // nothing was allocated | 84 return; // nothing was allocated |
84 } | 85 } |
85 | 86 |
87 deallocate(m_inputBuffer); | |
88 | |
86 deallocate(m_warpings.pos_int); | 89 deallocate(m_warpings.pos_int); |
87 deallocate(m_warpings.pos_frac); | 90 deallocate(m_warpings.pos_frac); |
88 deallocate(m_warpings.chirp_rates); | 91 deallocate(m_warpings.chirp_rates); |
89 | 92 |
90 clean_LPF(); | 93 clean_LPF(); |
181 FChTransformF0gram::getInputDomain() const { | 184 FChTransformF0gram::getInputDomain() const { |
182 return TimeDomain; | 185 return TimeDomain; |
183 } | 186 } |
184 | 187 |
185 size_t FChTransformF0gram::getPreferredBlockSize() const { | 188 size_t FChTransformF0gram::getPreferredBlockSize() const { |
186 return 8192; // 0 means "I can handle any block size" | 189 // We do our own accumulating into blocks within process() |
190 return m_blockSize/2; | |
187 } | 191 } |
188 | 192 |
189 size_t | 193 size_t |
190 FChTransformF0gram::getPreferredStepSize() const { | 194 FChTransformF0gram::getPreferredStepSize() const { |
191 return 256; // 0 means "anything sensible"; in practice this | 195 return m_stepSize; |
192 // means the same as the block size for TimeDomain | |
193 // plugins, or half of it for FrequencyDomain plugins | |
194 } | 196 } |
195 | 197 |
196 size_t | 198 size_t |
197 FChTransformF0gram::getMinChannelCount() const { | 199 FChTransformF0gram::getMinChannelCount() const { |
198 return 1; | 200 return 1; |
552 } | 554 } |
553 | 555 |
554 bool | 556 bool |
555 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) { | 557 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) { |
556 if (channels < getMinChannelCount() || | 558 if (channels < getMinChannelCount() || |
557 channels > getMaxChannelCount()) { | 559 channels > getMaxChannelCount() || |
560 blockSize != m_blockSize/2 || | |
561 stepSize != m_stepSize) { | |
558 return false; | 562 return false; |
559 } | 563 } |
560 | 564 |
561 // set blockSize and stepSize (but changed below) | 565 m_inputBuffer = allocate_and_zero<float>(m_blockSize); |
562 m_blockSize = blockSize; | 566 |
563 m_stepSize = stepSize; | |
564 | |
565 // WARNING !!! | 567 // WARNING !!! |
566 // these values in fact are determined by the sampling frequency m_fs | 568 // these values in fact are determined by the sampling frequency m_fs |
567 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz | 569 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz |
568 //m_blockSize = 4 * m_warp_params.nsamps_twarp; | 570 //m_blockSize = 4 * m_warp_params.nsamps_twarp; |
569 // m_stepSize = floor(m_hop / m_warp_params.fact_over_samp); | 571 // m_stepSize = floor(m_hop / m_warp_params.fact_over_samp); |
586 m_num_f0s = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct; | 588 m_num_f0s = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct; |
587 m_f0s = allocate<double>(m_num_f0s); | 589 m_f0s = allocate<double>(m_num_f0s); |
588 for (int i = 0; i < m_num_f0s; ++i) { | 590 for (int i = 0; i < m_num_f0s; ++i) { |
589 m_f0s[i] = m_glogs_f0[m_glogs_init_f0s + i]; | 591 m_f0s[i] = m_glogs_f0[m_glogs_init_f0s + i]; |
590 } | 592 } |
591 | 593 |
594 m_initialised = true; | |
592 return true; | 595 return true; |
593 } | 596 } |
594 | 597 |
595 void | 598 void |
596 FChTransformF0gram::design_GLogS() { | 599 FChTransformF0gram::design_GLogS() { |
675 } | 678 } |
676 | 679 |
677 void | 680 void |
678 FChTransformF0gram::design_FChT() { | 681 FChTransformF0gram::design_FChT() { |
679 | 682 |
680 /* | |
681 * FILES FOR DEBUGGING | |
682 */ | |
683 | |
684 //ofstream output("output.txt"); | |
685 | |
686 | |
687 /* ============= WARPING DESIGN ============= */ | 683 /* ============= WARPING DESIGN ============= */ |
688 | 684 |
689 // sampling frequency after oversampling | 685 // sampling frequency after oversampling |
690 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs; | 686 m_warpings.fs_orig = m_warp_params.fact_over_samp * m_fs; |
691 | 687 |
724 } | 720 } |
725 | 721 |
726 // design of warpings for efficient interpolation | 722 // design of warpings for efficient interpolation |
727 design_warps(freq_relative, t_orig, t_warp); | 723 design_warps(freq_relative, t_orig, t_warp); |
728 | 724 |
729 | |
730 /* | |
731 * FILES FOR DEBUGGING | |
732 */ | |
733 | |
734 /* | |
735 output << "chirp_rates" << endl; | |
736 for (int j = 0; j < m_warp_params.num_warps; j++){ | |
737 output << m_warpings.chirp_rates[j]; | |
738 output << " "; | |
739 } | |
740 output << endl << "freq_relative" << endl; | |
741 | |
742 for (int i = 0; i < m_warpings.nsamps_torig; i++){ | |
743 for (int j = 0; j < m_warp_params.num_warps; j++){ | |
744 output << freq_relative[j * m_warpings.nsamps_torig + i]; | |
745 output << " "; | |
746 } | |
747 output << endl; | |
748 } | |
749 | |
750 output << endl << "t_orig" << endl; | |
751 | |
752 for (int i = 0; i < m_warpings.nsamps_torig; i++){ | |
753 output << t_orig[i] << endl ; | |
754 } | |
755 */ | |
756 | |
757 deallocate(freq_relative); | 725 deallocate(freq_relative); |
758 deallocate(t_orig); | 726 deallocate(t_orig); |
759 deallocate(t_warp); | 727 deallocate(t_warp); |
760 | |
761 //output.close(); | |
762 | 728 |
763 /* ============= FFTW PLAN DESIGN ============= */ | 729 /* ============= FFTW PLAN DESIGN ============= */ |
764 // Initialize 2-d array for warped signals | 730 // Initialize 2-d array for warped signals |
765 x_warping = allocate<double>(m_warp_params.nsamps_twarp); | 731 x_warping = allocate<double>(m_warp_params.nsamps_twarp); |
766 m_absFanChirpTransform = allocate<double>(m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1)); | 732 m_absFanChirpTransform = allocate<double>(m_warp_params.num_warps * (m_warp_params.nsamps_twarp/2 + 1)); |
929 LPF_frequency[i*2] *= mp_LPFWindow[i]; | 895 LPF_frequency[i*2] *= mp_LPFWindow[i]; |
930 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i]; | 896 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i]; |
931 } | 897 } |
932 | 898 |
933 fft_inverse_LPF->inverse(LPF_frequency, LPF_time); | 899 fft_inverse_LPF->inverse(LPF_frequency, LPF_time); |
934 | 900 |
935 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame. | 901 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame. |
936 // nota: además de aplicar el LPF, esta función resamplea la señal original. | 902 // nota: además de aplicar el LPF, esta función resamplea la señal original. |
937 } | 903 } |
938 | 904 |
939 void FChTransformF0gram::clean_LPF() | 905 void FChTransformF0gram::clean_LPF() |
950 } | 916 } |
951 | 917 |
952 FChTransformF0gram::FeatureSet | 918 FChTransformF0gram::FeatureSet |
953 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) { | 919 FChTransformF0gram::process(const float *const *inputBuffers, Vamp::RealTime) { |
954 | 920 |
955 // // Do actual work! | 921 if (!m_initialised) return FeatureSet(); |
956 // | 922 |
957 | |
958 /* PSEUDOCÓDIGO: | 923 /* PSEUDOCÓDIGO: |
959 - Aplicar FFT al frame entero. | 924 - Aplicar FFT al frame entero. |
960 - Filtro pasabajos en frecuencia. | 925 - Filtro pasabajos en frecuencia. |
961 - FFT inversa al frame entero. | 926 - FFT inversa al frame entero. |
962 ----------------------------------------------------------------------------- | 927 ----------------------------------------------------------------------------- |
981 fprintf(stderr, " m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig); | 946 fprintf(stderr, " m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig); |
982 fprintf(stderr, " m_warp_params.num_warps = %d.\n",m_warp_params.num_warps); | 947 fprintf(stderr, " m_warp_params.num_warps = %d.\n",m_warp_params.num_warps); |
983 fprintf(stderr, " m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count); | 948 fprintf(stderr, " m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count); |
984 #endif | 949 #endif |
985 | 950 |
986 for (int i = 0; i < m_blockSize; i++) { | 951 for (int i = 0; i < m_blockSize - m_stepSize; ++i) { |
987 LPF_time[i] = (double)(inputBuffers[0][i]) * m_timeWindow[i]; | 952 m_inputBuffer[i] = m_inputBuffer[i + m_stepSize]; |
988 LPF_time[m_blockSize+i] = 0.0; | 953 } |
989 } | 954 for (int i = 0; i < m_blockSize/2; ++i) { |
990 | 955 m_inputBuffer[m_blockSize/2 + i] = inputBuffers[0][i]; |
991 // #ifdef DEBUG | 956 } |
992 // fprintf(stderr, " HASTA ACÁ ANDA!!!\n"); | 957 for (int i = 0; i < m_blockSize; ++i) { |
993 // cout << flush; | 958 LPF_time[i] = m_inputBuffer[i] * m_timeWindow[i]; |
994 // #endif | 959 } |
995 | 960 for (int i = 0; i < m_blockSize; ++i) { |
961 LPF_time[m_blockSize + i] = 0.0; | |
962 } | |
963 | |
996 apply_LPF(); | 964 apply_LPF(); |
997 // Señal filtrada queda en LPF_time | 965 // Señal filtrada queda en LPF_time |
998 | 966 |
999 Feature feature; | 967 Feature feature; |
1000 feature.hasTimestamp = false; | 968 feature.hasTimestamp = false; |
1137 } | 1105 } |
1138 | 1106 |
1139 void | 1107 void |
1140 FChTransformF0gram::design_time_window() { | 1108 FChTransformF0gram::design_time_window() { |
1141 | 1109 |
1142 int transitionWidth = (int)m_blockSize/128 + 1;; | 1110 int transitionWidth = (int)m_blockSize/128 + 128; |
1143 m_timeWindow = allocate<double>(m_blockSize); | 1111 m_timeWindow = allocate<double>(m_blockSize); |
1144 double *lp_transitionWindow = allocate<double>(transitionWidth); | 1112 double *lp_transitionWindow = allocate<double>(transitionWidth); |
1145 | 1113 |
1146 //memset(m_timeWindow, 1.0, m_blockSize); | |
1147 for (int i = 0; i < m_blockSize; i++) { | 1114 for (int i = 0; i < m_blockSize; i++) { |
1148 m_timeWindow[i] = 1.0; | 1115 m_timeWindow[i] = 1.0; |
1149 } | 1116 } |
1150 | 1117 |
1151 for (int i = 0; i < transitionWidth; i++) { | 1118 for (int i = 0; i < transitionWidth; i++) { |