comparison src/FeatureExtractor.cpp @ 169:001db4c32eb0 tuning-rescale

Alternative handling of reference frequency parameter -- scale the whole spectrum, don't just adjust the semitone bins above the crossover freq
author Chris Cannam
date Thu, 05 Feb 2015 16:26:41 +0000
parents d23dad16d6f9
children 1440773da492
comparison
equal deleted inserted replaced
166:d23dad16d6f9 169:001db4c32eb0
68 } 68 }
69 69
70 void 70 void
71 FeatureExtractor::makeStandardFrequencyMap() 71 FeatureExtractor::makeStandardFrequencyMap()
72 { 72 {
73 double refFreq = m_params.referenceFrequency; 73 // Our handling of the referenceFrequency parameter depends on the
74 // frequency map in use.
75
76 // With the chroma frequency map, we use referenceFrequency to set
77 // up the chroma bin frequencies when constructing the map, and
78 // then just follow the map (without having to refer to
79 // referenceFrequency again) when we get the frequency-domain
80 // audio.
81
82 // With the standard frequency map, using referenceFrequency to
83 // set up the map doesn't work so well -- it only really affects
84 // the crossover frequency, and much of the useful information is
85 // below that frequency. What we do instead is to ignore the
86 // referenceFrequency when creating the map -- setting it up for
87 // 440Hz -- and then use it to scale the individual
88 // frequency-domain audio frames before applying the map to them.
89
90 double refFreq = 440.; // See above -- *not* the parameter!
74 double binWidth = m_params.sampleRate / m_params.fftSize; 91 double binWidth = m_params.sampleRate / m_params.fftSize;
75 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); 92 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1));
76 int crossoverMidi = lrint(log(crossoverBin * binWidth / refFreq)/ 93 int crossoverMidi = lrint(log(crossoverBin * binWidth / refFreq)/
77 log(2.0) * 12 + 69); 94 log(2.0) * 12 + 69);
78 95
79 #ifdef DEBUG_FEATURE_EXTRACTOR
80 cerr << "FeatureExtractor::makeStandardFrequencyMap: refFreq = " << refFreq << endl;
81 #endif
82
83 int i = 0; 96 int i = 0;
84 while (i <= crossoverBin) { 97 while (i <= crossoverBin) {
85 m_freqMap[i] = i; 98 m_freqMap[i] = i;
86 ++i; 99 ++i;
87 } 100 }
117 } 130 }
118 131
119 vector<double> 132 vector<double>
120 FeatureExtractor::process(const vector<double> &real, const vector<double> &imag) 133 FeatureExtractor::process(const vector<double> &real, const vector<double> &imag)
121 { 134 {
135 vector<double> mags(m_params.fftSize/2 + 1, 0.0);
136
137 for (int i = 0; i <= m_params.fftSize/2; i++) {
138 mags[i] = real[i] * real[i] + imag[i] * imag[i];
139 }
140
141 return processMags(mags);
142 }
143
144 vector<double>
145 FeatureExtractor::process(const float *cframe)
146 {
147 vector<double> mags(m_params.fftSize/2 + 1, 0.0);
148
149 for (int i = 0; i <= m_params.fftSize/2; i++) {
150 mags[i] = cframe[i*2] * cframe[i*2] + cframe[i*2+1] * cframe[i*2+1];
151 }
152
153 return processMags(mags);
154 }
155
156 vector<double>
157 FeatureExtractor::processMags(const vector<double> &mags)
158 {
122 vector<double> frame(m_featureSize, 0.0); 159 vector<double> frame(m_featureSize, 0.0);
123 160
124 for (int i = 0; i <= m_params.fftSize/2; i++) { 161 if (!m_params.useChromaFrequencyMap &&
125 double mag = real[i] * real[i] + imag[i] * imag[i]; 162 (m_params.referenceFrequency != 440.)) {
126 frame[m_freqMap[i]] += mag; 163
164 // See comment in makeStandardFrequencyMap above
165 vector<double> scaled = scaleMags(mags);
166
167 for (int i = 0; i <= m_params.fftSize/2; i++) {
168 frame[m_freqMap[i]] += scaled[i];
169 }
170
171 } else {
172 for (int i = 0; i <= m_params.fftSize/2; i++) {
173 frame[m_freqMap[i]] += mags[i];
174 }
127 } 175 }
128 176
129 return frame; 177 return frame;
130 } 178 }
131 179
132 vector<double> 180 vector<double>
133 FeatureExtractor::process(const float *cframe) 181 FeatureExtractor::scaleMags(const vector<double> &mags)
134 { 182 {
135 vector<double> frame(m_featureSize, 0.0); 183 // Scale the pitch content in the given magnitude spectrum to
136 184 // accommodate a difference in tuning frequency (between the 440Hz
137 for (int i = 0; i <= m_params.fftSize/2; i++) { 185 // reference and the actual tuning frequency of the input audio).
138 double mag = cframe[i*2] * cframe[i*2] + cframe[i*2+1] * cframe[i*2+1]; 186 // We only do this when not using chroma features -- see the
139 frame[m_freqMap[i]] += mag; 187 // comment in makeStandardFrequencyMap() above.
140 } 188
141 189 if (m_params.useChromaFrequencyMap) return mags;
142 return frame; 190
143 } 191 double ratio = 440. / m_params.referenceFrequency;
144 192
193 int n = mags.size();
194
195 vector<double> scaled(n, 0.0);
196
197 for (int target = 0; target < n; ++target) {
198
199 double source = target / ratio;
200
201 int lower = int(source);
202 int higher = lower + 1;
203
204 double lowerProp = higher - source;
205 double higherProp = source - lower;
206
207 double value = 0.0;
208 if (lower >= 0 && lower < n) {
209 value += lowerProp * mags[lower];
210 }
211 if (higher >= 0 && higher < n) {
212 value += higherProp * mags[higher];
213 }
214
215 scaled[target] = value;
216 }
217
218 return scaled;
219 }
220