Mercurial > hg > vamp-fanchirp
comparison FChTransformF0gram.cpp @ 16:ce62ed201de8 spect
Toward (but not quite reaching) accurate frequency labels for outputs
author | Chris Cannam |
---|---|
date | Wed, 03 Oct 2018 15:47:00 +0100 |
parents | 0a860992b4f4 |
children | 436eab0bc1ff |
comparison
equal
deleted
inserted
replaced
15:0a860992b4f4 | 16:ce62ed201de8 |
---|---|
22 | 22 |
23 #include "bqvec/Allocators.h" | 23 #include "bqvec/Allocators.h" |
24 | 24 |
25 using namespace breakfastquay; | 25 using namespace breakfastquay; |
26 | 26 |
27 //#define DEBUG | 27 #define DEBUG |
28 | 28 |
29 #define MAX(x, y) (((x) > (y)) ? (x) : (y)) | 29 #define MAX(x, y) (((x) > (y)) ? (x) : (y)) |
30 | 30 |
31 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode, | 31 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode, |
32 float inputSampleRate) : | 32 float inputSampleRate) : |
38 m_fs = inputSampleRate; | 38 m_fs = inputSampleRate; |
39 // max frequency of interest (Hz) | 39 // max frequency of interest (Hz) |
40 m_fmax = 10000.f; | 40 m_fmax = 10000.f; |
41 // warping parameters | 41 // warping parameters |
42 m_warp_params.nsamps_twarp = 2048; | 42 m_warp_params.nsamps_twarp = 2048; |
43 //m_warp_params.nsamps_twarp = 8; | |
44 m_warp_params.alpha_max = 4; | 43 m_warp_params.alpha_max = 4; |
45 m_warp_params.num_warps = 21; | 44 m_warp_params.num_warps = 21; |
46 //m_warp_params.num_warps = 11; | |
47 m_warp_params.fact_over_samp = 2; | 45 m_warp_params.fact_over_samp = 2; |
48 m_warp_params.alpha_dist = 0; | 46 m_warp_params.alpha_dist = 0; |
49 // f0 parameters | 47 // f0 parameters |
50 m_f0_params.f0min = 80.0; | 48 m_f0_params.f0min = 80.0; |
51 m_f0_params.num_octs = 4; | 49 m_f0_params.num_octs = 4; |
72 m_nfft = m_warp_params.nsamps_twarp; | 70 m_nfft = m_warp_params.nsamps_twarp; |
73 // hop in samples | 71 // hop in samples |
74 m_hop = m_warp_params.fact_over_samp * 256; | 72 m_hop = m_warp_params.fact_over_samp * 256; |
75 | 73 |
76 m_num_f0s = 0; | 74 m_num_f0s = 0; |
77 | 75 m_f0s = 0; |
78 } | 76 } |
79 | 77 |
80 FChTransformF0gram::~FChTransformF0gram() | 78 FChTransformF0gram::~FChTransformF0gram() |
81 { | 79 { |
82 if (!m_blockSize) { | 80 if (!m_blockSize) { |
114 deallocate(m_glogs_fifth_harmonic_posfrac); | 112 deallocate(m_glogs_fifth_harmonic_posfrac); |
115 deallocate(m_glogs_fifth_harmonic); | 113 deallocate(m_glogs_fifth_harmonic); |
116 deallocate(m_glogs_f0_preference_weights); | 114 deallocate(m_glogs_f0_preference_weights); |
117 deallocate(m_glogs_median_correction); | 115 deallocate(m_glogs_median_correction); |
118 deallocate(m_glogs_sigma_correction); | 116 deallocate(m_glogs_sigma_correction); |
117 | |
118 deallocate(m_f0s); | |
119 } | 119 } |
120 | 120 |
121 string | 121 string |
122 FChTransformF0gram::getIdentifier() const { | 122 FChTransformF0gram::getIdentifier() const { |
123 switch (m_processingMode) { | 123 switch (m_processingMode) { |
481 FChTransformF0gram::OutputList | 481 FChTransformF0gram::OutputList |
482 FChTransformF0gram::getOutputDescriptors() const { | 482 FChTransformF0gram::getOutputDescriptors() const { |
483 | 483 |
484 OutputList list; | 484 OutputList list; |
485 | 485 |
486 // See OutputDescriptor documentation for the possibilities here. | 486 vector<string> labels; |
487 // Every plugin must have at least one output. | 487 char label[100]; |
488 | 488 |
489 /* f0 values of F0gram grid as string values */ | 489 if (m_processingMode == ModeF0Gram) { |
490 vector<string> f0values; | 490 |
491 int ind = 0; | 491 /* f0 values of F0gram grid as string values */ |
492 char f0String[100]; | 492 for (int i = 0; i < m_num_f0s; ++i) { |
493 while (ind < m_num_f0s) { | 493 sprintf(label, "%4.2f Hz", m_f0s[i]); |
494 sprintf(f0String, "%4.2f", m_f0s[ind]); | 494 labels.push_back(label); |
495 f0values.push_back(f0String); | 495 } |
496 ind++; | 496 |
497 } | 497 /* The F0gram */ |
498 | 498 OutputDescriptor d; |
499 /* The F0gram */ | 499 d.identifier = "f0gram"; |
500 OutputDescriptor d; | 500 d.name = "F0gram: salience of f0s"; |
501 d.identifier = "f0gram"; | 501 d.description = "This representation show the salience of the different f0s in the signal."; |
502 d.name = "F0gram: salience of f0s"; | 502 d.hasFixedBinCount = true; |
503 d.description = "This representation show the salience of the different f0s in the signal."; | 503 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct; |
504 d.hasFixedBinCount = true; | 504 d.binNames = labels; |
505 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct; | 505 d.hasKnownExtents = false; |
506 d.binNames = f0values; | 506 d.isQuantized = false; |
507 d.hasKnownExtents = false; | 507 d.sampleType = OutputDescriptor::OneSamplePerStep; |
508 d.isQuantized = false; | 508 d.hasDuration = false; |
509 d.sampleType = OutputDescriptor::OneSamplePerStep; | 509 list.push_back(d); |
510 d.hasDuration = false; | 510 |
511 list.push_back(d); | 511 } else { |
512 | 512 |
513 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; ++i) { | |
514 double freq = i * (m_warpings.fs_warp / m_nfft); | |
515 sprintf(label, "%4.2f Hz", freq); | |
516 labels.push_back(label); | |
517 } | |
518 | |
519 OutputDescriptor d; | |
520 d.identifier = "spectrogram"; | |
521 d.name = "Spectrogram"; | |
522 d.description = "Time/frequency spectrogram derived from the Fan Chirp Transform output"; | |
523 d.hasFixedBinCount = true; | |
524 d.binCount = m_warp_params.nsamps_twarp/2+1; | |
525 d.binNames = labels; | |
526 d.hasKnownExtents = false; | |
527 d.isQuantized = false; | |
528 d.sampleType = OutputDescriptor::OneSamplePerStep; | |
529 d.hasDuration = false; | |
530 list.push_back(d); | |
531 } | |
532 | |
513 return list; | 533 return list; |
514 } | 534 } |
515 | 535 |
516 bool | 536 bool |
517 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) { | 537 FChTransformF0gram::initialise(size_t channels, size_t stepSize, size_t blockSize) { |
526 | 546 |
527 // WARNING !!! | 547 // WARNING !!! |
528 // these values in fact are determined by the sampling frequency m_fs | 548 // these values in fact are determined by the sampling frequency m_fs |
529 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz | 549 // the parameters used below correspond to default values i.e. m_fs = 44.100 Hz |
530 //m_blockSize = 4 * m_warp_params.nsamps_twarp; | 550 //m_blockSize = 4 * m_warp_params.nsamps_twarp; |
531 m_stepSize = floor(m_hop / m_warp_params.fact_over_samp); | 551 // m_stepSize = floor(m_hop / m_warp_params.fact_over_samp); |
552 | |
553 /* design of FChT */ | |
554 design_FChT(); | |
532 | 555 |
533 /* initialise m_glogs_params */ | 556 /* initialise m_glogs_params */ |
534 design_GLogS(); | 557 design_GLogS(); |
535 | |
536 /* design of FChT */ | |
537 design_FChT(); | |
538 | 558 |
539 design_LPF(); | 559 design_LPF(); |
540 | 560 |
541 design_time_window(); | 561 design_time_window(); |
542 | 562 |
543 // Create Hanning window for warped signals | 563 // Create Hanning window for warped signals |
544 mp_HanningWindow = allocate<double>(m_warp_params.nsamps_twarp); | 564 mp_HanningWindow = allocate<double>(m_warp_params.nsamps_twarp); |
545 bool normalize = false; | 565 bool normalize = false; |
546 Utils::hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize); | 566 Utils::hanning_window(mp_HanningWindow, m_warp_params.nsamps_twarp, normalize); |
547 | 567 |
568 m_num_f0s = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct; | |
569 m_f0s = allocate<double>(m_num_f0s); | |
570 for (int i = 0; i < m_num_f0s; ++i) { | |
571 m_f0s[i] = m_glogs_f0[m_glogs_init_f0s + i]; | |
572 } | |
573 | |
548 return true; | 574 return true; |
549 } | 575 } |
550 | 576 |
551 void | 577 void |
552 FChTransformF0gram::design_GLogS() { | 578 FChTransformF0gram::design_GLogS() { |
553 | 579 |
554 // total number & initial quantity of f0s | 580 // total number & initial quantity of f0s |
581 | |
582 cerr << "per oct = " << m_f0_params.num_f0s_per_oct << ", octs = " << m_f0_params.num_octs << endl; | |
555 m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1; | 583 m_glogs_init_f0s = (int)(((double)m_f0_params.num_f0s_per_oct)*log2(5.0))+1; |
584 cerr << "init_f0s = " << m_glogs_init_f0s << endl; | |
556 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s; | 585 m_glogs_num_f0s = (m_f0_params.num_octs+1)*m_f0_params.num_f0s_per_oct + m_glogs_init_f0s; |
586 cerr << "num_f0s = " << m_glogs_num_f0s << endl; | |
557 | 587 |
558 // Initialize arrays | 588 // Initialize arrays |
559 m_glogs_f0 = allocate<double>(m_glogs_num_f0s); | 589 m_glogs_f0 = allocate<double>(m_glogs_num_f0s); |
560 m_glogs = allocate<double>(m_glogs_num_f0s*m_warp_params.num_warps); | 590 m_glogs = allocate<double>(m_glogs_num_f0s*m_warp_params.num_warps); |
561 m_glogs_n = allocate<int>(m_glogs_num_f0s); | 591 m_glogs_n = allocate<int>(m_glogs_num_f0s); |
582 double aux_pos; | 612 double aux_pos; |
583 for (int i = 0; i < m_glogs_num_f0s; i++) { | 613 for (int i = 0; i < m_glogs_num_f0s; i++) { |
584 for (int j = 1; j <= m_glogs_n[i]; j++) { | 614 for (int j = 1; j <= m_glogs_n[i]; j++) { |
585 // indice en el vector de largo t_warp/2+1 donde el ultimo valor corresponde a f=m_fmax | 615 // indice en el vector de largo t_warp/2+1 donde el ultimo valor corresponde a f=m_fmax |
586 aux_pos = ((double)j*m_glogs_f0[i])*((double)(m_warp_params.nsamps_twarp/2+1))/m_fmax; | 616 aux_pos = ((double)j*m_glogs_f0[i])*((double)(m_warp_params.nsamps_twarp/2+1))/m_fmax; |
617 //!!! cerr << "aux_pos = " << aux_pos << endl; | |
618 // aux_pos = ((double)j*m_glogs_f0[i])*((double)(m_warp_params.nsamps_twarp/2+1))/m_warpings.fs_warp; | |
619 // cerr << "or " << aux_pos << " (as fs_warp = " << m_warpings.fs_warp << ")" << endl; | |
587 m_glogs_posint[aux_index] = (int)aux_pos; | 620 m_glogs_posint[aux_index] = (int)aux_pos; |
588 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index]; | 621 m_glogs_posfrac[aux_index] = aux_pos - (double)m_glogs_posint[aux_index]; |
589 aux_index++; | 622 aux_index++; |
590 } | 623 } |
591 } | 624 } |
877 void FChTransformF0gram::apply_LPF() | 910 void FChTransformF0gram::apply_LPF() |
878 { | 911 { |
879 fft_forward_LPF->forward(LPF_time, LPF_frequency); | 912 fft_forward_LPF->forward(LPF_time, LPF_frequency); |
880 | 913 |
881 for (int i = 0; i < m_blockSize/2+1; i++) { | 914 for (int i = 0; i < m_blockSize/2+1; i++) { |
882 LPF_frequency[i*2] *= mp_LPFWindow[i] * m_warpings.nsamps_torig; | 915 LPF_frequency[i*2] *= mp_LPFWindow[i]; |
883 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i] * m_warpings.nsamps_torig; | 916 LPF_frequency[i*2 + 1] *= mp_LPFWindow[i]; |
884 } | 917 } |
885 | 918 |
886 fft_inverse_LPF->inverse(LPF_frequency, LPF_time); | 919 fft_inverse_LPF->inverse(LPF_frequency, LPF_time); |
887 | 920 |
888 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame. | 921 // TODO ver si hay que hacer fftshift para corregir la fase respecto al centro del frame. |
923 | 956 |
924 //--------------------------------------------------------------------------- | 957 //--------------------------------------------------------------------------- |
925 FeatureSet fs; | 958 FeatureSet fs; |
926 | 959 |
927 #ifdef DEBUG | 960 #ifdef DEBUG |
928 printf("\n ----- DEBUG INFORMATION ----- \n"); | 961 fprintf(stderr, "\n ----- DEBUG INFORMATION ----- \n"); |
929 printf(" m_fs = %f Hz.\n",m_fs); | 962 fprintf(stderr, " m_fs = %f Hz.\n",m_fs); |
930 printf(" fs_orig = %f Hz.\n",m_warpings.fs_orig); | 963 fprintf(stderr, " fs_orig = %f Hz.\n",m_warpings.fs_orig); |
931 printf(" fs_warp = %f Hz.\n",m_warpings.fs_warp); | 964 fprintf(stderr, " fs_warp = %f Hz.\n",m_warpings.fs_warp); |
932 printf(" m_nfft = %d.\n",m_nfft); | 965 fprintf(stderr, " m_nfft = %d.\n",m_nfft); |
933 printf(" m_blockSize = %d.\n",m_blockSize); | 966 fprintf(stderr, " m_blockSize = %d.\n",m_blockSize); |
934 printf(" m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig); | 967 fprintf(stderr, " m_warpings.nsamps_torig = %d.\n",m_warpings.nsamps_torig); |
935 printf(" m_warp_params.num_warps = %d.\n",m_warp_params.num_warps); | 968 fprintf(stderr, " m_warp_params.num_warps = %d.\n",m_warp_params.num_warps); |
936 printf(" m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count); | 969 fprintf(stderr, " m_glogs_harmonic_count = %d.\n",m_glogs_harmonic_count); |
937 #endif | 970 #endif |
938 | 971 |
939 for (int i = 0; i < m_blockSize; i++) { | 972 for (int i = 0; i < m_blockSize; i++) { |
940 LPF_time[i] = (double)(inputBuffers[0][i]) * m_timeWindow[i]; | 973 LPF_time[i] = (double)(inputBuffers[0][i]) * m_timeWindow[i]; |
974 LPF_time[m_blockSize+i] = 0.0; | |
941 } | 975 } |
942 | 976 |
943 // #ifdef DEBUG | 977 // #ifdef DEBUG |
944 // printf(" HASTA ACÁ ANDA!!!\n"); | 978 // fprintf(stderr, " HASTA ACÁ ANDA!!!\n"); |
945 // cout << flush; | 979 // cout << flush; |
946 // #endif | 980 // #endif |
947 | 981 |
948 apply_LPF(); | 982 apply_LPF(); |
949 // Señal filtrada queda en LPF_time | 983 // Señal filtrada queda en LPF_time |
960 | 994 |
961 double max_glogs = -DBL_MAX; | 995 double max_glogs = -DBL_MAX; |
962 int ind_max_glogs = 0; | 996 int ind_max_glogs = 0; |
963 | 997 |
964 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) { | 998 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) { |
999 | |
965 // Interpolate | 1000 // Interpolate |
966 Utils::interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp); | 1001 Utils::interp1q(LPF_time, (m_warpings.pos_int) + i_warp*m_warp_params.nsamps_twarp, m_warpings.pos_frac + i_warp*m_warp_params.nsamps_twarp, x_warping, m_warp_params.nsamps_twarp); |
967 | 1002 |
968 // Apply window | 1003 // Apply window |
969 for (int i = 0; i < m_warp_params.nsamps_twarp; i++) { | 1004 for (int i = 0; i < m_warp_params.nsamps_twarp; i++) { |
1085 } | 1120 } |
1086 | 1121 |
1087 #ifdef DEBUG | 1122 #ifdef DEBUG |
1088 for (int i = 0; i < m_blockSize; i++) { | 1123 for (int i = 0; i < m_blockSize; i++) { |
1089 if ((i<transitionWidth)) { | 1124 if ((i<transitionWidth)) { |
1090 printf(" m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]); | 1125 fprintf(stderr, " m_timeWindow[%d] = %f.\n",i,m_timeWindow[i]); |
1091 } | 1126 } |
1092 } | 1127 } |
1093 #endif | 1128 #endif |
1094 | 1129 |
1095 deallocate(lp_transitionWindow); | 1130 deallocate(lp_transitionWindow); |