Mercurial > hg > vamp-fanchirp
comparison FChTransformF0gram.cpp @ 19:d7fbd446f47f spect
Add a simple most-salient-pitch output (making a pitch tracker)
| author | Chris Cannam |
|---|---|
| date | Thu, 04 Oct 2018 11:38:47 +0100 |
| parents | 3835e03650cc |
| children | 7964cc5ad98f |
comparison
equal
deleted
inserted
replaced
| 18:3835e03650cc | 19:d7fbd446f47f |
|---|---|
| 17 | 17 |
| 18 #include "FChTransformF0gram.h" | 18 #include "FChTransformF0gram.h" |
| 19 #include "FChTransformUtils.h" | 19 #include "FChTransformUtils.h" |
| 20 #include <math.h> | 20 #include <math.h> |
| 21 #include <float.h> | 21 #include <float.h> |
| 22 | |
| 23 #include <set> | |
| 22 | 24 |
| 23 #include "bqvec/Allocators.h" | 25 #include "bqvec/Allocators.h" |
| 24 | 26 |
| 25 using namespace breakfastquay; | 27 using namespace breakfastquay; |
| 26 | 28 |
| 498 } | 500 } |
| 499 | 501 |
| 500 /* The F0gram */ | 502 /* The F0gram */ |
| 501 OutputDescriptor d; | 503 OutputDescriptor d; |
| 502 d.identifier = "f0gram"; | 504 d.identifier = "f0gram"; |
| 503 d.name = "F0gram: salience of f0s"; | 505 d.name = "F0gram"; |
| 504 d.description = "This representation show the salience of the different f0s in the signal."; | 506 d.description = "The salience of the different f0s in the signal."; |
| 505 d.hasFixedBinCount = true; | 507 d.hasFixedBinCount = true; |
| 506 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct; | 508 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct; |
| 507 d.binNames = labels; | 509 d.binNames = labels; |
| 510 d.hasKnownExtents = false; | |
| 511 d.isQuantized = false; | |
| 512 d.sampleType = OutputDescriptor::OneSamplePerStep; | |
| 513 d.hasDuration = false; | |
| 514 list.push_back(d); | |
| 515 | |
| 516 d.identifier = "pitch"; | |
| 517 d.name = "Most salient pitch"; | |
| 518 d.description = "The most salient f0 in the signal for each time step."; | |
| 519 d.unit = "Hz"; | |
| 520 d.hasFixedBinCount = true; | |
| 521 d.binCount = 1; | |
| 522 d.binNames.clear(); | |
| 508 d.hasKnownExtents = false; | 523 d.hasKnownExtents = false; |
| 509 d.isQuantized = false; | 524 d.isQuantized = false; |
| 510 d.sampleType = OutputDescriptor::OneSamplePerStep; | 525 d.sampleType = OutputDescriptor::OneSamplePerStep; |
| 511 d.hasDuration = false; | 526 d.hasDuration = false; |
| 512 list.push_back(d); | 527 list.push_back(d); |
| 1058 } | 1073 } |
| 1059 | 1074 |
| 1060 if (m_processingMode == ModeRoughSpectrogram) { | 1075 if (m_processingMode == ModeRoughSpectrogram) { |
| 1061 | 1076 |
| 1062 // already accumulated our return values in feature | 1077 // already accumulated our return values in feature |
| 1078 fs[0].push_back(feature); | |
| 1063 | 1079 |
| 1064 } else if (m_processingMode == ModeSpectrogram) { | 1080 } else if (m_processingMode == ModeSpectrogram) { |
| 1065 | 1081 |
| 1066 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) { | 1082 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) { |
| 1067 feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0); | 1083 feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0); |
| 1068 } | 1084 } |
| 1085 fs[0].push_back(feature); | |
| 1069 | 1086 |
| 1070 } else { // f0gram | 1087 } else { // f0gram |
| 1071 | 1088 |
| 1089 int bestIndex = -1; | |
| 1090 | |
| 1072 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) { | 1091 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) { |
| 1092 double value = 0.0; | |
| 1073 switch (m_f0gram_mode) { | 1093 switch (m_f0gram_mode) { |
| 1074 case AllBinsOfBestDirection: | 1094 case AllBinsOfBestDirection: |
| 1075 feature.values.push_back((float)m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s]); | 1095 value = m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s]; |
| 1076 break; | 1096 break; |
| 1077 case BestBinOfAllDirections: | 1097 case BestBinOfAllDirections: |
| 1078 max_glogs = -DBL_MAX; | 1098 max_glogs = -DBL_MAX; |
| 1079 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) { | 1099 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) { |
| 1080 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) { | 1100 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) { |
| 1081 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s]; | 1101 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s]; |
| 1082 ind_max_glogs = i_warp; | 1102 ind_max_glogs = i_warp; |
| 1083 } | 1103 } |
| 1084 } | 1104 } |
| 1085 feature.values.push_back((float)max_glogs); | 1105 value = max_glogs; |
| 1086 break; | 1106 break; |
| 1087 } | 1107 } |
| 1088 } | 1108 if (bestIndex < 0 || float(value) > feature.values[bestIndex]) { |
| 1089 } | 1109 bestIndex = int(feature.values.size()); |
| 1090 | 1110 } |
| 1091 fs[0].push_back(feature); | 1111 feature.values.push_back(float(value)); |
| 1112 } | |
| 1113 | |
| 1114 fs[0].push_back(feature); | |
| 1115 | |
| 1116 if (bestIndex >= 0) { | |
| 1117 | |
| 1118 double bestValue = feature.values[bestIndex]; | |
| 1119 set<double> ordered(feature.values.begin(), feature.values.end()); | |
| 1120 vector<double> flattened(ordered.begin(), ordered.end()); | |
| 1121 double median = flattened[flattened.size()/2]; | |
| 1122 if (bestValue > median * 8.0) { | |
| 1123 Feature pfeature; | |
| 1124 pfeature.hasTimestamp = false; | |
| 1125 pfeature.values.push_back(m_f0s[bestIndex]); | |
| 1126 fs[1].push_back(pfeature); | |
| 1127 } | |
| 1128 } | |
| 1129 } | |
| 1130 | |
| 1092 return fs; | 1131 return fs; |
| 1093 } | 1132 } |
| 1094 | 1133 |
| 1095 FChTransformF0gram::FeatureSet | 1134 FChTransformF0gram::FeatureSet |
| 1096 FChTransformF0gram::getRemainingFeatures() { | 1135 FChTransformF0gram::getRemainingFeatures() { |
