comparison FChTransformF0gram.cpp @ 15:0a860992b4f4 spect

Break out the plugin into three different plugins (using the same class in different modes) in order to provide simplistic and more sophisticated spectrograms as well as the f0-gram. Remove the program support, since it doesn't work usefully anyway (it just overrides the user's preferred settings).
author Chris Cannam
date Wed, 03 Oct 2018 13:16:09 +0100
parents 44b86c346a5a
children ce62ed201de8
comparison
equal deleted inserted replaced
14:44b86c346a5a 15:0a860992b4f4
26 26
27 //#define DEBUG 27 //#define DEBUG
28 28
29 #define MAX(x, y) (((x) > (y)) ? (x) : (y)) 29 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
30 30
31 FChTransformF0gram::FChTransformF0gram(float inputSampleRate) : 31 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode,
32 float inputSampleRate) :
32 Plugin(inputSampleRate), 33 Plugin(inputSampleRate),
33 m_currentProgram("default"), 34 m_processingMode(mode),
34 m_stepSize(0), // We are using 0 for step and block size to indicate "not yet set". 35 m_stepSize(0), // We are using 0 for step and block size to indicate "not yet set".
35 m_blockSize(0) { 36 m_blockSize(0) {
36 37
37 m_fs = inputSampleRate; 38 m_fs = inputSampleRate;
38 // max frequency of interest (Hz) 39 // max frequency of interest (Hz)
55 m_f0_params.prefer_stdev = 18; 56 m_f0_params.prefer_stdev = 18;
56 // glogs parameters 57 // glogs parameters
57 m_glogs_params.HP_logS = true; 58 m_glogs_params.HP_logS = true;
58 m_glogs_params.att_subharms = 1; 59 m_glogs_params.att_subharms = 1;
59 // display parameters 60 // display parameters
60 m_f0gram_mode = true; 61 m_f0gram_mode = BestBinOfAllDirections;
61 62
62 m_glogs_params.median_poly_coefs[0] = -0.000000058551680; 63 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
63 m_glogs_params.median_poly_coefs[1] = -0.000006945207775; 64 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
64 m_glogs_params.median_poly_coefs[2] = 0.002357223226588; 65 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
65 66
117 deallocate(m_glogs_sigma_correction); 118 deallocate(m_glogs_sigma_correction);
118 } 119 }
119 120
120 string 121 string
121 FChTransformF0gram::getIdentifier() const { 122 FChTransformF0gram::getIdentifier() const {
122 return "fchtransformf0gram"; 123 switch (m_processingMode) {
124 case ModeF0Gram: return "fchtransformf0gram";
125 case ModeSpectrogram: return "fchtransformspectrogram";
126 case ModeRoughSpectrogram: return "fchtransformrough";
127 }
123 } 128 }
124 129
125 string 130 string
126 FChTransformF0gram::getName() const { 131 FChTransformF0gram::getName() const {
127 return "Fan Chirp Transform F0gram"; 132 switch (m_processingMode) {
133 case ModeF0Gram: return "Fan Chirp Transform F0gram";
134 case ModeSpectrogram: return "Fan Chirp Transform Spectrogram";
135 case ModeRoughSpectrogram: return "Fan Chirp Transform Rough Spectrogram";
136 }
128 } 137 }
129 138
130 string 139 string
131 FChTransformF0gram::getDescription() const { 140 FChTransformF0gram::getDescription() const {
132 // Return something helpful here! 141 switch (m_processingMode) {
133 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010."; 142 case ModeF0Gram:
143 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010.";
144 case ModeSpectrogram:
145 return "This plug-in produces a spectral representation of the audio using Fan Chirp Transform analysis.";
146 case ModeRoughSpectrogram:
147 return "This plug-in produces a more approximate spectral representation of the audio using Fan Chirp Transform analysis.";
148 }
134 } 149 }
135 150
136 string 151 string
137 FChTransformF0gram::getMaker() const { 152 FChTransformF0gram::getMaker() const {
138 // Your name here 153 // Your name here
143 FChTransformF0gram::getPluginVersion() const { 158 FChTransformF0gram::getPluginVersion() const {
144 // Increment this each time you release a version that behaves 159 // Increment this each time you release a version that behaves
145 // differently from the previous one 160 // differently from the previous one
146 // 161 //
147 // 0 - initial version from scratch 162 // 0 - initial version from scratch
148 return 0; 163 return 1;
149 } 164 }
150 165
151 string 166 string
152 FChTransformF0gram::getCopyright() const { 167 FChTransformF0gram::getCopyright() const {
153 // This function is not ideally named. It does not necessarily 168 // This function is not ideally named. It does not necessarily
410 } else if (identifier == "f0_prefer_mean") { 425 } else if (identifier == "f0_prefer_mean") {
411 return m_f0_params.prefer_mean; 426 return m_f0_params.prefer_mean;
412 } else if (identifier == "f0_prefer_stdev") { 427 } else if (identifier == "f0_prefer_stdev") {
413 return m_f0_params.prefer_stdev; 428 return m_f0_params.prefer_stdev;
414 } else if (identifier == "f0gram_mode") { 429 } else if (identifier == "f0gram_mode") {
415 return m_f0gram_mode; 430 return m_f0gram_mode == BestBinOfAllDirections ? 1.0 : 0.0;
416 } else { 431 } else {
417 return 0.f; 432 return 0.f;
418 } 433 }
419 434
420 } 435 }
421 436
422 void FChTransformF0gram::setParameter(string identifier, float value) { 437 void FChTransformF0gram::setParameter(string identifier, float value)
423 438 {
424 if (identifier == "fmax") { 439 if (identifier == "fmax") {
425 m_fmax = value; 440 m_fmax = value;
426 } else if (identifier == "nsamp") { 441 } else if (identifier == "nsamp") {
427 m_warp_params.nsamps_twarp = value; 442 m_warp_params.nsamps_twarp = value;
428 } else if (identifier == "alpha_max") { 443 } else if (identifier == "alpha_max") {
446 } else if (identifier == "f0_prefer_mean") { 461 } else if (identifier == "f0_prefer_mean") {
447 m_f0_params.prefer_mean = value; 462 m_f0_params.prefer_mean = value;
448 } else if (identifier == "f0_prefer_stdev") { 463 } else if (identifier == "f0_prefer_stdev") {
449 m_f0_params.prefer_stdev = value; 464 m_f0_params.prefer_stdev = value;
450 } else if (identifier == "f0gram_mode") { 465 } else if (identifier == "f0gram_mode") {
451 m_f0gram_mode = value; 466 m_f0gram_mode = (value > 0.5 ?
452 } 467 BestBinOfAllDirections :
453 468 AllBinsOfBestDirection);
469 } else {
470 cerr << "WARNING: Unknown parameter id \""
471 << identifier << "\"" << endl;
472 }
454 } 473 }
455 474
456 FChTransformF0gram::ProgramList 475 FChTransformF0gram::ProgramList
457 FChTransformF0gram::getPrograms() const { 476 FChTransformF0gram::getPrograms() const {
458 ProgramList list; 477 ProgramList list;
459
460 list.push_back("default");
461
462 return list; 478 return list;
463 }
464
465 string
466 FChTransformF0gram::getCurrentProgram() const {
467 return m_currentProgram;
468 }
469
470 void
471 FChTransformF0gram::selectProgram(string name) {
472
473 m_currentProgram = name;
474
475 if (name == "default") {
476 m_fmax = 10000.f;
477
478 m_warp_params.nsamps_twarp = 2048;
479 m_warp_params.alpha_max = 4;
480 m_warp_params.num_warps = 21;
481 m_warp_params.fact_over_samp = 2;
482 m_warp_params.alpha_dist = 0;
483
484 m_f0_params.f0min = 80.0;
485 m_f0_params.num_octs = 4;
486 m_f0_params.num_f0s_per_oct = 192;
487 m_f0_params.num_f0_hyps = 5;
488 m_f0_params.prefer = true;
489 m_f0_params.prefer_mean = 60;
490 m_f0_params.prefer_stdev = 18;
491
492 m_glogs_params.HP_logS = true;
493 m_glogs_params.att_subharms = 1;
494
495 m_glogs_params.median_poly_coefs[0] = -0.000000058551680;
496 m_glogs_params.median_poly_coefs[1] = -0.000006945207775;
497 m_glogs_params.median_poly_coefs[2] = 0.002357223226588;
498
499 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308;
500 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898;
501 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288;
502
503 m_nfft = m_warp_params.nsamps_twarp;
504 m_hop = m_warp_params.fact_over_samp * 256;
505
506 m_num_f0s = 0;
507
508 m_f0gram_mode = 1;
509
510 }
511 } 479 }
512 480
513 FChTransformF0gram::OutputList 481 FChTransformF0gram::OutputList
514 FChTransformF0gram::getOutputDescriptors() const { 482 FChTransformF0gram::getOutputDescriptors() const {
515 483
981 // Señal filtrada queda en LPF_time 949 // Señal filtrada queda en LPF_time
982 950
983 Feature feature; 951 Feature feature;
984 feature.hasTimestamp = false; 952 feature.hasTimestamp = false;
985 953
954 if (m_processingMode == ModeRoughSpectrogram) {
955 feature.values = vector<float>(m_warp_params.nsamps_twarp/2+1, 0.f);
956 }
957
986 // ---------------------------------------------------------------------------------------------- 958 // ----------------------------------------------------------------------------------------------
987 // Hanning window & FFT for all warp directions 959 // Hanning window & FFT for all warp directions
988 960
989 double max_glogs = -DBL_MAX; 961 double max_glogs = -DBL_MAX;
990 int ind_max_glogs = 0; 962 int ind_max_glogs = 0;
998 x_warping[i] *= mp_HanningWindow[i]; 970 x_warping[i] *= mp_HanningWindow[i];
999 } 971 }
1000 972
1001 // Transform 973 // Transform
1002 fft_xwarping->forward(x_warping, m_auxFanChirpTransform); 974 fft_xwarping->forward(x_warping, m_auxFanChirpTransform);
975
976 if (m_processingMode == ModeRoughSpectrogram) {
977 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
978 double abs = sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]);
979 if (abs > feature.values[i]) {
980 feature.values[i] = abs;
981 }
982 }
983 continue;
984 }
1003 985
1004 // Copy result 986 // Copy result
1005 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1); 987 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1);
1006 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) { 988 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) {
1007 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1])); 989 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]));
1038 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s]; 1020 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
1039 ind_max_glogs = i_warp; 1021 ind_max_glogs = i_warp;
1040 } 1022 }
1041 } 1023 }
1042 } 1024 }
1043 1025
1044 // ---------------------------------------------------------------------------------------------- 1026 if (m_processingMode == ModeRoughSpectrogram) {
1045 1027
1046 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) { 1028 // already accumulated our return values in feature
1047 switch (m_f0gram_mode) { 1029
1048 case 1: 1030 } else if (m_processingMode == ModeSpectrogram) {
1049 max_glogs = -DBL_MAX; 1031
1050 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) { 1032 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) {
1051 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) { 1033 feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0);
1052 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s]; 1034 }
1053 ind_max_glogs = i_warp; 1035
1036 } else { // f0gram
1037
1038 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) {
1039 switch (m_f0gram_mode) {
1040 case AllBinsOfBestDirection:
1041 feature.values.push_back((float)m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s]);
1042 break;
1043 case BestBinOfAllDirections:
1044 max_glogs = -DBL_MAX;
1045 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
1046 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
1047 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
1048 ind_max_glogs = i_warp;
1049 }
1054 } 1050 }
1051 feature.values.push_back((float)max_glogs);
1052 break;
1055 } 1053 }
1056 feature.values.push_back((float)max_glogs); 1054 }
1057 break; 1055 }
1058 case 0:
1059 feature.values.push_back((float)m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s]);
1060 break;
1061 }
1062 }
1063
1064 // ----------------------------------------------------------------------------------------------
1065 1056
1066 fs[0].push_back(feature); 1057 fs[0].push_back(feature);
1067
1068 #ifdef DEBUG
1069 printf(" ----------------------------- \n");
1070 #endif
1071
1072 return fs; 1058 return fs;
1073 //---------------------------------------------------------------------------
1074
1075 } 1059 }
1076 1060
1077 FChTransformF0gram::FeatureSet 1061 FChTransformF0gram::FeatureSet
1078 FChTransformF0gram::getRemainingFeatures() { 1062 FChTransformF0gram::getRemainingFeatures() {
1079 return FeatureSet(); 1063 return FeatureSet();