Mercurial > hg > vamp-fanchirp
comparison FChTransformF0gram.cpp @ 15:0a860992b4f4 spect
Break out the plugin into three different plugins (using the same class in different modes) in order to provide simplistic and more sophisticated spectrograms as well as the f0-gram. Remove the program support, since it doesn't work usefully anyway (it just overrides the user's preferred settings).
author | Chris Cannam |
---|---|
date | Wed, 03 Oct 2018 13:16:09 +0100 |
parents | 44b86c346a5a |
children | ce62ed201de8 |
comparison
equal
deleted
inserted
replaced
14:44b86c346a5a | 15:0a860992b4f4 |
---|---|
26 | 26 |
27 //#define DEBUG | 27 //#define DEBUG |
28 | 28 |
29 #define MAX(x, y) (((x) > (y)) ? (x) : (y)) | 29 #define MAX(x, y) (((x) > (y)) ? (x) : (y)) |
30 | 30 |
31 FChTransformF0gram::FChTransformF0gram(float inputSampleRate) : | 31 FChTransformF0gram::FChTransformF0gram(ProcessingMode mode, |
32 float inputSampleRate) : | |
32 Plugin(inputSampleRate), | 33 Plugin(inputSampleRate), |
33 m_currentProgram("default"), | 34 m_processingMode(mode), |
34 m_stepSize(0), // We are using 0 for step and block size to indicate "not yet set". | 35 m_stepSize(0), // We are using 0 for step and block size to indicate "not yet set". |
35 m_blockSize(0) { | 36 m_blockSize(0) { |
36 | 37 |
37 m_fs = inputSampleRate; | 38 m_fs = inputSampleRate; |
38 // max frequency of interest (Hz) | 39 // max frequency of interest (Hz) |
55 m_f0_params.prefer_stdev = 18; | 56 m_f0_params.prefer_stdev = 18; |
56 // glogs parameters | 57 // glogs parameters |
57 m_glogs_params.HP_logS = true; | 58 m_glogs_params.HP_logS = true; |
58 m_glogs_params.att_subharms = 1; | 59 m_glogs_params.att_subharms = 1; |
59 // display parameters | 60 // display parameters |
60 m_f0gram_mode = true; | 61 m_f0gram_mode = BestBinOfAllDirections; |
61 | 62 |
62 m_glogs_params.median_poly_coefs[0] = -0.000000058551680; | 63 m_glogs_params.median_poly_coefs[0] = -0.000000058551680; |
63 m_glogs_params.median_poly_coefs[1] = -0.000006945207775; | 64 m_glogs_params.median_poly_coefs[1] = -0.000006945207775; |
64 m_glogs_params.median_poly_coefs[2] = 0.002357223226588; | 65 m_glogs_params.median_poly_coefs[2] = 0.002357223226588; |
65 | 66 |
117 deallocate(m_glogs_sigma_correction); | 118 deallocate(m_glogs_sigma_correction); |
118 } | 119 } |
119 | 120 |
120 string | 121 string |
121 FChTransformF0gram::getIdentifier() const { | 122 FChTransformF0gram::getIdentifier() const { |
122 return "fchtransformf0gram"; | 123 switch (m_processingMode) { |
124 case ModeF0Gram: return "fchtransformf0gram"; | |
125 case ModeSpectrogram: return "fchtransformspectrogram"; | |
126 case ModeRoughSpectrogram: return "fchtransformrough"; | |
127 } | |
123 } | 128 } |
124 | 129 |
125 string | 130 string |
126 FChTransformF0gram::getName() const { | 131 FChTransformF0gram::getName() const { |
127 return "Fan Chirp Transform F0gram"; | 132 switch (m_processingMode) { |
133 case ModeF0Gram: return "Fan Chirp Transform F0gram"; | |
134 case ModeSpectrogram: return "Fan Chirp Transform Spectrogram"; | |
135 case ModeRoughSpectrogram: return "Fan Chirp Transform Rough Spectrogram"; | |
136 } | |
128 } | 137 } |
129 | 138 |
130 string | 139 string |
131 FChTransformF0gram::getDescription() const { | 140 FChTransformF0gram::getDescription() const { |
132 // Return something helpful here! | 141 switch (m_processingMode) { |
133 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010."; | 142 case ModeF0Gram: |
143 return "This plug-in produces a representation, called F0gram, which exhibits the salience of the fundamental frequency of the sound sources in the audio file. The computation of the F0gram makes use of the Fan Chirp Transform analysis. It is based on the article \"Fan chirp transform for music representation\" P. Cancela, E. Lopez, M. Rocamora, International Conference on Digital Audio Effects, 13th. DAFx-10. Graz, Austria - 6-10 Sep 2010."; | |
144 case ModeSpectrogram: | |
145 return "This plug-in produces a spectral representation of the audio using Fan Chirp Transform analysis."; | |
146 case ModeRoughSpectrogram: | |
147 return "This plug-in produces a more approximate spectral representation of the audio using Fan Chirp Transform analysis."; | |
148 } | |
134 } | 149 } |
135 | 150 |
136 string | 151 string |
137 FChTransformF0gram::getMaker() const { | 152 FChTransformF0gram::getMaker() const { |
138 // Your name here | 153 // Your name here |
143 FChTransformF0gram::getPluginVersion() const { | 158 FChTransformF0gram::getPluginVersion() const { |
144 // Increment this each time you release a version that behaves | 159 // Increment this each time you release a version that behaves |
145 // differently from the previous one | 160 // differently from the previous one |
146 // | 161 // |
147 // 0 - initial version from scratch | 162 // 0 - initial version from scratch |
148 return 0; | 163 return 1; |
149 } | 164 } |
150 | 165 |
151 string | 166 string |
152 FChTransformF0gram::getCopyright() const { | 167 FChTransformF0gram::getCopyright() const { |
153 // This function is not ideally named. It does not necessarily | 168 // This function is not ideally named. It does not necessarily |
410 } else if (identifier == "f0_prefer_mean") { | 425 } else if (identifier == "f0_prefer_mean") { |
411 return m_f0_params.prefer_mean; | 426 return m_f0_params.prefer_mean; |
412 } else if (identifier == "f0_prefer_stdev") { | 427 } else if (identifier == "f0_prefer_stdev") { |
413 return m_f0_params.prefer_stdev; | 428 return m_f0_params.prefer_stdev; |
414 } else if (identifier == "f0gram_mode") { | 429 } else if (identifier == "f0gram_mode") { |
415 return m_f0gram_mode; | 430 return m_f0gram_mode == BestBinOfAllDirections ? 1.0 : 0.0; |
416 } else { | 431 } else { |
417 return 0.f; | 432 return 0.f; |
418 } | 433 } |
419 | 434 |
420 } | 435 } |
421 | 436 |
422 void FChTransformF0gram::setParameter(string identifier, float value) { | 437 void FChTransformF0gram::setParameter(string identifier, float value) |
423 | 438 { |
424 if (identifier == "fmax") { | 439 if (identifier == "fmax") { |
425 m_fmax = value; | 440 m_fmax = value; |
426 } else if (identifier == "nsamp") { | 441 } else if (identifier == "nsamp") { |
427 m_warp_params.nsamps_twarp = value; | 442 m_warp_params.nsamps_twarp = value; |
428 } else if (identifier == "alpha_max") { | 443 } else if (identifier == "alpha_max") { |
446 } else if (identifier == "f0_prefer_mean") { | 461 } else if (identifier == "f0_prefer_mean") { |
447 m_f0_params.prefer_mean = value; | 462 m_f0_params.prefer_mean = value; |
448 } else if (identifier == "f0_prefer_stdev") { | 463 } else if (identifier == "f0_prefer_stdev") { |
449 m_f0_params.prefer_stdev = value; | 464 m_f0_params.prefer_stdev = value; |
450 } else if (identifier == "f0gram_mode") { | 465 } else if (identifier == "f0gram_mode") { |
451 m_f0gram_mode = value; | 466 m_f0gram_mode = (value > 0.5 ? |
452 } | 467 BestBinOfAllDirections : |
453 | 468 AllBinsOfBestDirection); |
469 } else { | |
470 cerr << "WARNING: Unknown parameter id \"" | |
471 << identifier << "\"" << endl; | |
472 } | |
454 } | 473 } |
455 | 474 |
456 FChTransformF0gram::ProgramList | 475 FChTransformF0gram::ProgramList |
457 FChTransformF0gram::getPrograms() const { | 476 FChTransformF0gram::getPrograms() const { |
458 ProgramList list; | 477 ProgramList list; |
459 | |
460 list.push_back("default"); | |
461 | |
462 return list; | 478 return list; |
463 } | |
464 | |
465 string | |
466 FChTransformF0gram::getCurrentProgram() const { | |
467 return m_currentProgram; | |
468 } | |
469 | |
470 void | |
471 FChTransformF0gram::selectProgram(string name) { | |
472 | |
473 m_currentProgram = name; | |
474 | |
475 if (name == "default") { | |
476 m_fmax = 10000.f; | |
477 | |
478 m_warp_params.nsamps_twarp = 2048; | |
479 m_warp_params.alpha_max = 4; | |
480 m_warp_params.num_warps = 21; | |
481 m_warp_params.fact_over_samp = 2; | |
482 m_warp_params.alpha_dist = 0; | |
483 | |
484 m_f0_params.f0min = 80.0; | |
485 m_f0_params.num_octs = 4; | |
486 m_f0_params.num_f0s_per_oct = 192; | |
487 m_f0_params.num_f0_hyps = 5; | |
488 m_f0_params.prefer = true; | |
489 m_f0_params.prefer_mean = 60; | |
490 m_f0_params.prefer_stdev = 18; | |
491 | |
492 m_glogs_params.HP_logS = true; | |
493 m_glogs_params.att_subharms = 1; | |
494 | |
495 m_glogs_params.median_poly_coefs[0] = -0.000000058551680; | |
496 m_glogs_params.median_poly_coefs[1] = -0.000006945207775; | |
497 m_glogs_params.median_poly_coefs[2] = 0.002357223226588; | |
498 | |
499 m_glogs_params.sigma_poly_coefs[0] = 0.000000092782308; | |
500 m_glogs_params.sigma_poly_coefs[1] = 0.000057283574898; | |
501 m_glogs_params.sigma_poly_coefs[2] = 0.022199903714288; | |
502 | |
503 m_nfft = m_warp_params.nsamps_twarp; | |
504 m_hop = m_warp_params.fact_over_samp * 256; | |
505 | |
506 m_num_f0s = 0; | |
507 | |
508 m_f0gram_mode = 1; | |
509 | |
510 } | |
511 } | 479 } |
512 | 480 |
513 FChTransformF0gram::OutputList | 481 FChTransformF0gram::OutputList |
514 FChTransformF0gram::getOutputDescriptors() const { | 482 FChTransformF0gram::getOutputDescriptors() const { |
515 | 483 |
981 // Señal filtrada queda en LPF_time | 949 // Señal filtrada queda en LPF_time |
982 | 950 |
983 Feature feature; | 951 Feature feature; |
984 feature.hasTimestamp = false; | 952 feature.hasTimestamp = false; |
985 | 953 |
954 if (m_processingMode == ModeRoughSpectrogram) { | |
955 feature.values = vector<float>(m_warp_params.nsamps_twarp/2+1, 0.f); | |
956 } | |
957 | |
986 // ---------------------------------------------------------------------------------------------- | 958 // ---------------------------------------------------------------------------------------------- |
987 // Hanning window & FFT for all warp directions | 959 // Hanning window & FFT for all warp directions |
988 | 960 |
989 double max_glogs = -DBL_MAX; | 961 double max_glogs = -DBL_MAX; |
990 int ind_max_glogs = 0; | 962 int ind_max_glogs = 0; |
998 x_warping[i] *= mp_HanningWindow[i]; | 970 x_warping[i] *= mp_HanningWindow[i]; |
999 } | 971 } |
1000 | 972 |
1001 // Transform | 973 // Transform |
1002 fft_xwarping->forward(x_warping, m_auxFanChirpTransform); | 974 fft_xwarping->forward(x_warping, m_auxFanChirpTransform); |
975 | |
976 if (m_processingMode == ModeRoughSpectrogram) { | |
977 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) { | |
978 double abs = sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1]); | |
979 if (abs > feature.values[i]) { | |
980 feature.values[i] = abs; | |
981 } | |
982 } | |
983 continue; | |
984 } | |
1003 | 985 |
1004 // Copy result | 986 // Copy result |
1005 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1); | 987 double *aux_abs_fcht = m_absFanChirpTransform + i_warp*(m_warp_params.nsamps_twarp/2+1); |
1006 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) { | 988 for (int i = 0; i < (m_warp_params.nsamps_twarp/2+1); i++) { |
1007 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1])); | 989 aux_abs_fcht[i] = log10(1.0 + 10.0*sqrt(m_auxFanChirpTransform[i*2]*m_auxFanChirpTransform[i*2]+m_auxFanChirpTransform[i*2+1]*m_auxFanChirpTransform[i*2+1])); |
1038 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s]; | 1020 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s]; |
1039 ind_max_glogs = i_warp; | 1021 ind_max_glogs = i_warp; |
1040 } | 1022 } |
1041 } | 1023 } |
1042 } | 1024 } |
1043 | 1025 |
1044 // ---------------------------------------------------------------------------------------------- | 1026 if (m_processingMode == ModeRoughSpectrogram) { |
1045 | 1027 |
1046 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) { | 1028 // already accumulated our return values in feature |
1047 switch (m_f0gram_mode) { | 1029 |
1048 case 1: | 1030 } else if (m_processingMode == ModeSpectrogram) { |
1049 max_glogs = -DBL_MAX; | 1031 |
1050 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) { | 1032 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) { |
1051 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) { | 1033 feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0); |
1052 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s]; | 1034 } |
1053 ind_max_glogs = i_warp; | 1035 |
1036 } else { // f0gram | |
1037 | |
1038 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) { | |
1039 switch (m_f0gram_mode) { | |
1040 case AllBinsOfBestDirection: | |
1041 feature.values.push_back((float)m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s]); | |
1042 break; | |
1043 case BestBinOfAllDirections: | |
1044 max_glogs = -DBL_MAX; | |
1045 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) { | |
1046 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) { | |
1047 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s]; | |
1048 ind_max_glogs = i_warp; | |
1049 } | |
1054 } | 1050 } |
1051 feature.values.push_back((float)max_glogs); | |
1052 break; | |
1055 } | 1053 } |
1056 feature.values.push_back((float)max_glogs); | 1054 } |
1057 break; | 1055 } |
1058 case 0: | |
1059 feature.values.push_back((float)m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s]); | |
1060 break; | |
1061 } | |
1062 } | |
1063 | |
1064 // ---------------------------------------------------------------------------------------------- | |
1065 | 1056 |
1066 fs[0].push_back(feature); | 1057 fs[0].push_back(feature); |
1067 | |
1068 #ifdef DEBUG | |
1069 printf(" ----------------------------- \n"); | |
1070 #endif | |
1071 | |
1072 return fs; | 1058 return fs; |
1073 //--------------------------------------------------------------------------- | |
1074 | |
1075 } | 1059 } |
1076 | 1060 |
1077 FChTransformF0gram::FeatureSet | 1061 FChTransformF0gram::FeatureSet |
1078 FChTransformF0gram::getRemainingFeatures() { | 1062 FChTransformF0gram::getRemainingFeatures() { |
1079 return FeatureSet(); | 1063 return FeatureSet(); |