comparison FChTransformF0gram.cpp @ 19:d7fbd446f47f spect

Add a simple most-salient-pitch output (making a pitch tracker)
author Chris Cannam
date Thu, 04 Oct 2018 11:38:47 +0100
parents 3835e03650cc
children 7964cc5ad98f
comparison
equal deleted inserted replaced
18:3835e03650cc 19:d7fbd446f47f
17 17
18 #include "FChTransformF0gram.h" 18 #include "FChTransformF0gram.h"
19 #include "FChTransformUtils.h" 19 #include "FChTransformUtils.h"
20 #include <math.h> 20 #include <math.h>
21 #include <float.h> 21 #include <float.h>
22
23 #include <set>
22 24
23 #include "bqvec/Allocators.h" 25 #include "bqvec/Allocators.h"
24 26
25 using namespace breakfastquay; 27 using namespace breakfastquay;
26 28
498 } 500 }
499 501
500 /* The F0gram */ 502 /* The F0gram */
501 OutputDescriptor d; 503 OutputDescriptor d;
502 d.identifier = "f0gram"; 504 d.identifier = "f0gram";
503 d.name = "F0gram: salience of f0s"; 505 d.name = "F0gram";
504 d.description = "This representation show the salience of the different f0s in the signal."; 506 d.description = "The salience of the different f0s in the signal.";
505 d.hasFixedBinCount = true; 507 d.hasFixedBinCount = true;
506 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct; 508 d.binCount = m_f0_params.num_octs * m_f0_params.num_f0s_per_oct;
507 d.binNames = labels; 509 d.binNames = labels;
510 d.hasKnownExtents = false;
511 d.isQuantized = false;
512 d.sampleType = OutputDescriptor::OneSamplePerStep;
513 d.hasDuration = false;
514 list.push_back(d);
515
516 d.identifier = "pitch";
517 d.name = "Most salient pitch";
518 d.description = "The most salient f0 in the signal for each time step.";
519 d.unit = "Hz";
520 d.hasFixedBinCount = true;
521 d.binCount = 1;
522 d.binNames.clear();
508 d.hasKnownExtents = false; 523 d.hasKnownExtents = false;
509 d.isQuantized = false; 524 d.isQuantized = false;
510 d.sampleType = OutputDescriptor::OneSamplePerStep; 525 d.sampleType = OutputDescriptor::OneSamplePerStep;
511 d.hasDuration = false; 526 d.hasDuration = false;
512 list.push_back(d); 527 list.push_back(d);
1058 } 1073 }
1059 1074
1060 if (m_processingMode == ModeRoughSpectrogram) { 1075 if (m_processingMode == ModeRoughSpectrogram) {
1061 1076
1062 // already accumulated our return values in feature 1077 // already accumulated our return values in feature
1078 fs[0].push_back(feature);
1063 1079
1064 } else if (m_processingMode == ModeSpectrogram) { 1080 } else if (m_processingMode == ModeSpectrogram) {
1065 1081
1066 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) { 1082 for (int i = 0; i < m_warp_params.nsamps_twarp/2+1; i++) {
1067 feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0); 1083 feature.values.push_back(pow(10.0, m_absFanChirpTransform[ind_max_glogs * (m_warp_params.nsamps_twarp/2+1) + i]) - 1.0);
1068 } 1084 }
1085 fs[0].push_back(feature);
1069 1086
1070 } else { // f0gram 1087 } else { // f0gram
1071 1088
1089 int bestIndex = -1;
1090
1072 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) { 1091 for (int i=m_glogs_init_f0s; i< m_glogs_num_f0s - m_f0_params.num_f0s_per_oct; i++) {
1092 double value = 0.0;
1073 switch (m_f0gram_mode) { 1093 switch (m_f0gram_mode) {
1074 case AllBinsOfBestDirection: 1094 case AllBinsOfBestDirection:
1075 feature.values.push_back((float)m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s]); 1095 value = m_glogs[i+(int)ind_max_glogs*(int)m_glogs_num_f0s];
1076 break; 1096 break;
1077 case BestBinOfAllDirections: 1097 case BestBinOfAllDirections:
1078 max_glogs = -DBL_MAX; 1098 max_glogs = -DBL_MAX;
1079 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) { 1099 for (int i_warp = 0; i_warp < m_warp_params.num_warps; i_warp++) {
1080 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) { 1100 if (m_glogs[i + i_warp*m_glogs_num_f0s] > max_glogs) {
1081 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s]; 1101 max_glogs = m_glogs[i + i_warp*m_glogs_num_f0s];
1082 ind_max_glogs = i_warp; 1102 ind_max_glogs = i_warp;
1083 } 1103 }
1084 } 1104 }
1085 feature.values.push_back((float)max_glogs); 1105 value = max_glogs;
1086 break; 1106 break;
1087 } 1107 }
1088 } 1108 if (bestIndex < 0 || float(value) > feature.values[bestIndex]) {
1089 } 1109 bestIndex = int(feature.values.size());
1090 1110 }
1091 fs[0].push_back(feature); 1111 feature.values.push_back(float(value));
1112 }
1113
1114 fs[0].push_back(feature);
1115
1116 if (bestIndex >= 0) {
1117
1118 double bestValue = feature.values[bestIndex];
1119 set<double> ordered(feature.values.begin(), feature.values.end());
1120 vector<double> flattened(ordered.begin(), ordered.end());
1121 double median = flattened[flattened.size()/2];
1122 if (bestValue > median * 8.0) {
1123 Feature pfeature;
1124 pfeature.hasTimestamp = false;
1125 pfeature.values.push_back(m_f0s[bestIndex]);
1126 fs[1].push_back(pfeature);
1127 }
1128 }
1129 }
1130
1092 return fs; 1131 return fs;
1093 } 1132 }
1094 1133
1095 FChTransformF0gram::FeatureSet 1134 FChTransformF0gram::FeatureSet
1096 FChTransformF0gram::getRemainingFeatures() { 1135 FChTransformF0gram::getRemainingFeatures() {