Chris@7
|
1
|
Chris@7
|
2 #include "TipicVampPlugin.h"
|
Chris@7
|
3
|
Chris@19
|
4 #include "PitchFilterbank.h"
|
Chris@19
|
5 #include "CRP.h"
|
Chris@26
|
6 #include "Chroma.h"
|
Chris@39
|
7 #include "CENS.h"
|
Chris@35
|
8 #include "FeatureDownsample.h"
|
Chris@19
|
9
|
Chris@9
|
10 #include <iostream>
|
Chris@27
|
11 #include <sstream>
|
Chris@9
|
12
|
Chris@9
|
13 using namespace std;
|
Chris@9
|
14
|
Chris@15
|
15 static const float defaultTuningFrequency = 440.f;
|
Chris@15
|
16
|
Chris@7
|
17 Tipic::Tipic(float inputSampleRate) :
|
Chris@9
|
18 Plugin(inputSampleRate),
|
Chris@9
|
19 m_stepSize(0),
|
Chris@9
|
20 m_blockSize(0),
|
Chris@15
|
21 m_tuningFrequency(defaultTuningFrequency),
|
Chris@15
|
22 m_filterbank(0),
|
Chris@19
|
23 m_crp(0),
|
Chris@26
|
24 m_chroma(0),
|
Chris@26
|
25 m_logChroma(0),
|
Chris@39
|
26 m_cens(0),
|
Chris@19
|
27 m_pitchOutputNo(-1),
|
Chris@26
|
28 m_cpOutputNo(-1),
|
Chris@26
|
29 m_clpOutputNo(-1),
|
Chris@39
|
30 m_censOutputNo(-1),
|
Chris@19
|
31 m_crpOutputNo(-1)
|
Chris@7
|
32 {
|
Chris@7
|
33 }
|
Chris@7
|
34
|
Chris@7
|
35 Tipic::~Tipic()
|
Chris@7
|
36 {
|
Chris@15
|
37 delete m_filterbank;
|
Chris@26
|
38 delete m_crp;
|
Chris@26
|
39 delete m_chroma;
|
Chris@26
|
40 delete m_logChroma;
|
Chris@39
|
41 delete m_cens;
|
Chris@35
|
42
|
Chris@35
|
43 for (auto &d: m_downsamplers) delete d.second;
|
Chris@7
|
44 }
|
Chris@7
|
45
|
Chris@7
|
46 string
|
Chris@7
|
47 Tipic::getIdentifier() const
|
Chris@7
|
48 {
|
Chris@7
|
49 return "tipic";
|
Chris@7
|
50 }
|
Chris@7
|
51
|
Chris@7
|
52 string
|
Chris@7
|
53 Tipic::getName() const
|
Chris@7
|
54 {
|
Chris@47
|
55 return "TIPIC";
|
Chris@7
|
56 }
|
Chris@7
|
57
|
Chris@7
|
58 string
|
Chris@7
|
59 Tipic::getDescription() const
|
Chris@7
|
60 {
|
Chris@47
|
61 return "Chroma and pitch features, including DCT-reduced chroma with extra timbre invariance. Based on the MATLAB Chroma Toolbox by Müller and Ewert, adapted to use causal filters.";
|
Chris@7
|
62 }
|
Chris@7
|
63
|
Chris@7
|
64 string
|
Chris@7
|
65 Tipic::getMaker() const
|
Chris@7
|
66 {
|
Chris@41
|
67 return "Queen Mary, University of London";
|
Chris@7
|
68 }
|
Chris@7
|
69
|
Chris@7
|
70 int
|
Chris@7
|
71 Tipic::getPluginVersion() const
|
Chris@7
|
72 {
|
Chris@47
|
73 return 1;
|
Chris@7
|
74 }
|
Chris@7
|
75
|
Chris@7
|
76 string
|
Chris@7
|
77 Tipic::getCopyright() const
|
Chris@7
|
78 {
|
Chris@54
|
79 return "Methods by Sebastian Ewert and Meinard Müller, plugin by Chris Cannam. GPL licence.";
|
Chris@7
|
80 }
|
Chris@7
|
81
|
Chris@7
|
82 Tipic::InputDomain
|
Chris@7
|
83 Tipic::getInputDomain() const
|
Chris@7
|
84 {
|
Chris@7
|
85 return TimeDomain;
|
Chris@7
|
86 }
|
Chris@7
|
87
|
Chris@7
|
88 size_t
|
Chris@7
|
89 Tipic::getPreferredBlockSize() const
|
Chris@7
|
90 {
|
Chris@41
|
91 return 0;
|
Chris@7
|
92 }
|
Chris@7
|
93
|
Chris@7
|
94 size_t
|
Chris@7
|
95 Tipic::getPreferredStepSize() const
|
Chris@7
|
96 {
|
Chris@41
|
97 return 0;
|
Chris@7
|
98 }
|
Chris@7
|
99
|
Chris@7
|
100 size_t
|
Chris@7
|
101 Tipic::getMinChannelCount() const
|
Chris@7
|
102 {
|
Chris@7
|
103 return 1;
|
Chris@7
|
104 }
|
Chris@7
|
105
|
Chris@7
|
106 size_t
|
Chris@7
|
107 Tipic::getMaxChannelCount() const
|
Chris@7
|
108 {
|
Chris@7
|
109 return 1;
|
Chris@7
|
110 }
|
Chris@7
|
111
|
Chris@7
|
112 Tipic::ParameterList
|
Chris@7
|
113 Tipic::getParameterDescriptors() const
|
Chris@7
|
114 {
|
Chris@7
|
115 ParameterList list;
|
Chris@15
|
116
|
Chris@15
|
117 ParameterDescriptor desc;
|
Chris@15
|
118 desc.identifier = "tuning";
|
Chris@15
|
119 desc.name = "Tuning Frequency";
|
Chris@15
|
120 desc.unit = "Hz";
|
Chris@15
|
121 desc.description = "Frequency of concert A";
|
Chris@15
|
122 desc.minValue = 360;
|
Chris@15
|
123 desc.maxValue = 500;
|
Chris@15
|
124 desc.defaultValue = defaultTuningFrequency;
|
Chris@15
|
125 desc.isQuantized = false;
|
Chris@15
|
126 list.push_back(desc);
|
Chris@15
|
127
|
Chris@7
|
128 return list;
|
Chris@7
|
129 }
|
Chris@7
|
130
|
Chris@7
|
131 float
|
Chris@7
|
132 Tipic::getParameter(string identifier) const
|
Chris@7
|
133 {
|
Chris@15
|
134 if (identifier == "tuning") {
|
Chris@15
|
135 return m_tuningFrequency;
|
Chris@15
|
136 }
|
Chris@7
|
137 return 0;
|
Chris@7
|
138 }
|
Chris@7
|
139
|
Chris@7
|
140 void
|
Chris@7
|
141 Tipic::setParameter(string identifier, float value)
|
Chris@7
|
142 {
|
Chris@15
|
143 if (identifier == "tuning") {
|
Chris@15
|
144 m_tuningFrequency = value;
|
Chris@15
|
145 }
|
Chris@7
|
146 }
|
Chris@7
|
147
|
Chris@7
|
148 Tipic::ProgramList
|
Chris@7
|
149 Tipic::getPrograms() const
|
Chris@7
|
150 {
|
Chris@7
|
151 ProgramList list;
|
Chris@7
|
152 return list;
|
Chris@7
|
153 }
|
Chris@7
|
154
|
Chris@7
|
155 string
|
Chris@7
|
156 Tipic::getCurrentProgram() const
|
Chris@7
|
157 {
|
Chris@7
|
158 return ""; // no programs
|
Chris@7
|
159 }
|
Chris@7
|
160
|
Chris@7
|
161 void
|
Chris@50
|
162 Tipic::selectProgram(string)
|
Chris@7
|
163 {
|
Chris@7
|
164 }
|
Chris@7
|
165
|
Chris@27
|
166 static vector<string> noteNames
|
Chris@27
|
167 { "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B" };
|
Chris@27
|
168
|
Chris@27
|
169 static std::string noteName(int i)
|
Chris@27
|
170 {
|
Chris@27
|
171 string name = noteNames[i % 12];
|
Chris@27
|
172 int oct = i / 12 - 1;
|
Chris@27
|
173 ostringstream sstr;
|
Chris@27
|
174 sstr << i << " " << name << oct << ends;
|
Chris@27
|
175 return sstr.str();
|
Chris@27
|
176 }
|
Chris@27
|
177
|
Chris@7
|
178 Tipic::OutputList
|
Chris@7
|
179 Tipic::getOutputDescriptors() const
|
Chris@7
|
180 {
|
Chris@7
|
181 OutputList list;
|
Chris@7
|
182
|
Chris@47
|
183 string downIdSuffix = "-smoothed";
|
Chris@47
|
184 string downNamePrefix = "Smoothed ";
|
Chris@47
|
185 string downDescSuffix = ", smoothed by 10x downsampling";
|
Chris@47
|
186
|
Chris@7
|
187 OutputDescriptor d;
|
Chris@7
|
188 d.identifier = "pitch";
|
Chris@47
|
189 d.name = "Pitch Representation";
|
Chris@47
|
190 d.description = "Short-time energy content of the signal within 88 semitone-tuned frequency bands";
|
Chris@7
|
191 d.unit = "";
|
Chris@7
|
192 d.hasFixedBinCount = true;
|
Chris@27
|
193 int min = 0, max = 0;
|
Chris@27
|
194 PitchFilterbank::getPitchRange(min, max);
|
Chris@27
|
195 d.binCount = max - min + 1;
|
Chris@27
|
196 d.binNames.clear();
|
Chris@27
|
197 for (int p = min; p <= max; ++p) {
|
Chris@27
|
198 d.binNames.push_back(noteName(p));
|
Chris@27
|
199 }
|
Chris@7
|
200 d.hasKnownExtents = false;
|
Chris@7
|
201 d.isQuantized = false;
|
Chris@7
|
202 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@32
|
203 d.sampleRate = PitchFilterbank::getOutputSampleRate();
|
Chris@7
|
204 d.hasDuration = false;
|
Chris@9
|
205 m_pitchOutputNo = list.size();
|
Chris@7
|
206 list.push_back(d);
|
Chris@7
|
207
|
Chris@47
|
208 d.identifier += downIdSuffix;
|
Chris@47
|
209 d.name = downNamePrefix + d.name;
|
Chris@47
|
210 d.description += downDescSuffix;
|
Chris@35
|
211 d.sampleRate /= 10.0;
|
Chris@35
|
212 list.push_back(d);
|
Chris@35
|
213
|
Chris@37
|
214 d.identifier = "chroma";
|
Chris@26
|
215 d.name = "Chroma Pitch Features";
|
Chris@47
|
216 d.description = "CP Chroma Pitch features derived by summing the Pitch Representation into a single octave";
|
Chris@26
|
217 d.unit = "";
|
Chris@26
|
218 d.hasFixedBinCount = true;
|
Chris@26
|
219 d.binCount = 12;
|
Chris@27
|
220 d.binNames = noteNames;
|
Chris@26
|
221 d.hasKnownExtents = false;
|
Chris@26
|
222 d.isQuantized = false;
|
Chris@26
|
223 d.sampleType = OutputDescriptor::FixedSampleRate;
|
Chris@32
|
224 d.sampleRate = PitchFilterbank::getOutputSampleRate();
|
Chris@26
|
225 d.hasDuration = false;
|
Chris@26
|
226 m_cpOutputNo = list.size();
|
Chris@26
|
227 list.push_back(d);
|
Chris@26
|
228
|
Chris@47
|
229 d.identifier += downIdSuffix;
|
Chris@47
|
230 d.name = downNamePrefix + d.name;
|
Chris@47
|
231 d.description += downDescSuffix;
|
Chris@35
|
232 d.sampleRate /= 10.0;
|
Chris@35
|
233 list.push_back(d);
|
Chris@35
|
234
|
Chris@26
|
235 d.identifier = "clp";
|
Chris@26
|
236 d.name = "Chroma Log Pitch Features";
|
Chris@47
|
237 d.description = "CLP Chroma Logarithmic Pitch features derived by summing log of the Pitch Representation energy values into a single octave";
|
Chris@35
|
238 d.sampleRate = PitchFilterbank::getOutputSampleRate();
|
Chris@26
|
239 m_clpOutputNo = list.size();
|
Chris@26
|
240 list.push_back(d);
|
Chris@26
|
241
|
Chris@47
|
242 d.identifier += downIdSuffix;
|
Chris@47
|
243 d.name = downNamePrefix + d.name;
|
Chris@47
|
244 d.description += downDescSuffix;
|
Chris@35
|
245 d.sampleRate /= 10.0;
|
Chris@35
|
246 list.push_back(d);
|
Chris@35
|
247
|
Chris@39
|
248 d.identifier = "cens";
|
Chris@39
|
249 d.name = "Chroma Energy Normalised Statistics Features";
|
Chris@47
|
250 d.description = "CENS statistical features based on L1 normalized pitch energy distribions";
|
Chris@39
|
251 d.sampleRate = PitchFilterbank::getOutputSampleRate() / 10.0;
|
Chris@39
|
252 m_censOutputNo = list.size();
|
Chris@39
|
253 list.push_back(d);
|
Chris@39
|
254
|
Chris@19
|
255 d.identifier = "crp";
|
Chris@47
|
256 d.name = "Chroma DCT-Reduced Pitch Features";
|
Chris@47
|
257 d.description = "CRP Chroma DCT-Reduced Log Pitch features, providing some timbre-invariance by discarding timbre-related information from lower cepstral coefficients";
|
Chris@35
|
258 d.sampleRate = PitchFilterbank::getOutputSampleRate();
|
Chris@19
|
259 m_crpOutputNo = list.size();
|
Chris@19
|
260 list.push_back(d);
|
Chris@19
|
261
|
Chris@47
|
262 d.identifier += downIdSuffix;
|
Chris@47
|
263 d.name = downNamePrefix + d.name;
|
Chris@47
|
264 d.description += downDescSuffix;
|
Chris@35
|
265 d.sampleRate /= 10.0;
|
Chris@35
|
266 list.push_back(d);
|
Chris@35
|
267
|
Chris@7
|
268 return list;
|
Chris@7
|
269 }
|
Chris@7
|
270
|
Chris@7
|
271 bool
|
Chris@7
|
272 Tipic::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
Chris@7
|
273 {
|
Chris@19
|
274 if (m_inputSampleRate > 192000) {
|
Chris@19
|
275 cerr << "ERROR: Tipic::initialise: Max sample rate 192000 exceeded "
|
Chris@19
|
276 << "(requested rate = " << m_inputSampleRate << ")" << endl;
|
Chris@19
|
277 return false;
|
Chris@19
|
278 }
|
Chris@19
|
279
|
Chris@9
|
280 if (m_pitchOutputNo < 0) {
|
Chris@9
|
281 // getOutputDescriptors has never been called, it sets up the
|
Chris@9
|
282 // outputNo members
|
Chris@9
|
283 (void)getOutputDescriptors();
|
Chris@9
|
284 }
|
Chris@26
|
285 if (m_pitchOutputNo < 0 ||
|
Chris@26
|
286 m_cpOutputNo < 0 ||
|
Chris@26
|
287 m_clpOutputNo < 0 ||
|
Chris@39
|
288 m_censOutputNo < 0 ||
|
Chris@26
|
289 m_crpOutputNo < 0) {
|
Chris@19
|
290 throw std::logic_error("setup went wrong");
|
Chris@19
|
291 }
|
Chris@9
|
292
|
Chris@7
|
293 if (channels < getMinChannelCount() ||
|
Chris@9
|
294 channels > getMaxChannelCount()) {
|
Chris@19
|
295 cerr << "ERROR: Tipic::initialise: wrong number of channels supplied (only 1 supported)" << endl;
|
Chris@9
|
296 return false;
|
Chris@9
|
297 }
|
Chris@7
|
298
|
Chris@9
|
299 m_stepSize = stepSize;
|
Chris@9
|
300 m_blockSize = blockSize;
|
Chris@9
|
301
|
Chris@9
|
302 if (m_stepSize != m_blockSize) {
|
Chris@9
|
303 cerr << "ERROR: initialise: step size and block size must be equal" << endl;
|
Chris@9
|
304 return false;
|
Chris@9
|
305 }
|
Chris@9
|
306
|
Chris@9
|
307 reset();
|
Chris@9
|
308
|
Chris@7
|
309 return true;
|
Chris@7
|
310 }
|
Chris@7
|
311
|
Chris@7
|
312 void
|
Chris@7
|
313 Tipic::reset()
|
Chris@7
|
314 {
|
Chris@15
|
315 if (!m_filterbank) {
|
Chris@26
|
316
|
Chris@15
|
317 m_filterbank = new PitchFilterbank(m_inputSampleRate, m_tuningFrequency);
|
Chris@26
|
318
|
Chris@19
|
319 m_crp = new CRP({});
|
Chris@26
|
320
|
Chris@26
|
321 m_chroma = new Chroma({});
|
Chris@26
|
322
|
Chris@26
|
323 Chroma::Parameters params;
|
Chris@26
|
324 params.applyLogCompression = true;
|
Chris@26
|
325 m_logChroma = new Chroma(params);
|
Chris@39
|
326
|
Chris@39
|
327 m_cens = new CENS({});
|
Chris@15
|
328 }
|
Chris@26
|
329
|
Chris@15
|
330 m_filterbank->reset();
|
Chris@35
|
331
|
Chris@35
|
332 for (auto &d: m_downsamplers) d.second->reset();
|
Chris@7
|
333 }
|
Chris@7
|
334
|
Chris@7
|
335 Tipic::FeatureSet
|
Chris@50
|
336 Tipic::process(const float *const *inputBuffers, Vamp::RealTime)
|
Chris@7
|
337 {
|
Chris@19
|
338 RealSequence in;
|
Chris@9
|
339 in.resize(m_blockSize);
|
Chris@42
|
340 for (int i = 0; i < m_blockSize; ++i) {
|
Chris@42
|
341 in[i] = inputBuffers[0][i];
|
Chris@42
|
342 }
|
Chris@9
|
343
|
Chris@19
|
344 RealBlock pitchFiltered = m_filterbank->process(in);
|
Chris@26
|
345
|
Chris@26
|
346 RealBlock cp = m_chroma->process(pitchFiltered);
|
Chris@26
|
347 RealBlock clp = m_logChroma->process(pitchFiltered);
|
Chris@39
|
348 RealBlock cens = m_cens->process(pitchFiltered);
|
Chris@26
|
349 RealBlock crp = m_crp->process(pitchFiltered);
|
Chris@9
|
350
|
Chris@9
|
351 FeatureSet fs;
|
Chris@36
|
352 addFeatures(fs, m_pitchOutputNo, pitchFiltered, false);
|
Chris@36
|
353 addFeatures(fs, m_cpOutputNo, cp, false);
|
Chris@36
|
354 addFeatures(fs, m_clpOutputNo, clp, false);
|
Chris@39
|
355 addFeatures(fs, m_censOutputNo, cens, false);
|
Chris@36
|
356 addFeatures(fs, m_crpOutputNo, crp, false);
|
Chris@9
|
357 return fs;
|
Chris@7
|
358 }
|
Chris@7
|
359
|
Chris@7
|
360 Tipic::FeatureSet
|
Chris@7
|
361 Tipic::getRemainingFeatures()
|
Chris@7
|
362 {
|
Chris@19
|
363 RealBlock pitchFiltered = m_filterbank->getRemainingOutput();
|
Chris@26
|
364
|
Chris@26
|
365 RealBlock cp = m_chroma->process(pitchFiltered);
|
Chris@26
|
366 RealBlock clp = m_logChroma->process(pitchFiltered);
|
Chris@39
|
367 RealBlock cens = m_cens->process(pitchFiltered);
|
Chris@26
|
368 RealBlock crp = m_crp->process(pitchFiltered);
|
Chris@9
|
369
|
Chris@9
|
370 FeatureSet fs;
|
Chris@36
|
371 addFeatures(fs, m_pitchOutputNo, pitchFiltered, true);
|
Chris@36
|
372 addFeatures(fs, m_cpOutputNo, cp, true);
|
Chris@36
|
373 addFeatures(fs, m_clpOutputNo, clp, true);
|
Chris@39
|
374 addFeatures(fs, m_censOutputNo, cens, true);
|
Chris@36
|
375 addFeatures(fs, m_crpOutputNo, crp, true);
|
Chris@9
|
376 return fs;
|
Chris@7
|
377 }
|
Chris@7
|
378
|
Chris@9
|
379 void
|
Chris@36
|
380 Tipic::addFeatures(FeatureSet &fs, int outputNo, const RealBlock &block, bool final)
|
Chris@9
|
381 {
|
Chris@35
|
382 if (block.empty()) return;
|
Chris@39
|
383
|
Chris@39
|
384 int downsampledOutputNo = outputNo + 1;
|
Chris@39
|
385 if (outputNo == m_censOutputNo) {
|
Chris@39
|
386 // CENS exists only in downsampled form
|
Chris@39
|
387 downsampledOutputNo = outputNo;
|
Chris@39
|
388 }
|
Chris@39
|
389
|
Chris@42
|
390 int n = block.size();
|
Chris@42
|
391
|
Chris@39
|
392 if (outputNo != downsampledOutputNo) {
|
Chris@42
|
393 for (int i = 0; i < n; ++i) {
|
Chris@39
|
394 Feature f;
|
Chris@39
|
395 int h = block[i].size();
|
Chris@39
|
396 f.values.resize(h);
|
Chris@42
|
397 for (int j = 0; j < h; ++j) {
|
Chris@42
|
398 f.values[j] = block[i][j];
|
Chris@42
|
399 }
|
Chris@39
|
400 fs[outputNo].push_back(f);
|
Chris@39
|
401 }
|
Chris@9
|
402 }
|
Chris@37
|
403
|
Chris@35
|
404 if (m_downsamplers.find(outputNo) == m_downsamplers.end()) {
|
Chris@35
|
405 FeatureDownsample::Parameters params;
|
Chris@35
|
406 params.featureSize = block[0].size();
|
Chris@35
|
407 m_downsamplers[outputNo] = new FeatureDownsample(params);
|
Chris@35
|
408 }
|
Chris@35
|
409
|
Chris@35
|
410 RealBlock downsampled = m_downsamplers[outputNo]->process(block);
|
Chris@36
|
411
|
Chris@36
|
412 if (final) {
|
Chris@36
|
413 RealBlock remaining = m_downsamplers[outputNo]->getRemainingOutput();
|
Chris@36
|
414 downsampled.insert(downsampled.end(), remaining.begin(), remaining.end());
|
Chris@36
|
415 }
|
Chris@42
|
416
|
Chris@42
|
417 n = downsampled.size();
|
Chris@35
|
418
|
Chris@42
|
419 for (int i = 0; i < n; ++i) {
|
Chris@35
|
420 Feature f;
|
Chris@35
|
421 int h = downsampled[i].size();
|
Chris@35
|
422 f.values.resize(h);
|
Chris@42
|
423 for (int j = 0; j < h; ++j) {
|
Chris@42
|
424 f.values[j] = downsampled[i][j];
|
Chris@42
|
425 }
|
Chris@39
|
426 fs[downsampledOutputNo].push_back(f);
|
Chris@35
|
427 }
|
Chris@9
|
428 }
|