annotate src/TipicVampPlugin.cpp @ 60:1ea2aed23d4a tip

Fix version
author Chris Cannam
date Thu, 13 Feb 2020 13:37:36 +0000
parents 83e91b13feb7
children
rev   line source
Chris@7 1
Chris@7 2 #include "TipicVampPlugin.h"
Chris@7 3
Chris@19 4 #include "PitchFilterbank.h"
Chris@19 5 #include "CRP.h"
Chris@26 6 #include "Chroma.h"
Chris@39 7 #include "CENS.h"
Chris@35 8 #include "FeatureDownsample.h"
Chris@19 9
Chris@9 10 #include <iostream>
Chris@27 11 #include <sstream>
Chris@9 12
Chris@9 13 using namespace std;
Chris@9 14
Chris@15 15 static const float defaultTuningFrequency = 440.f;
Chris@15 16
Chris@7 17 Tipic::Tipic(float inputSampleRate) :
Chris@9 18 Plugin(inputSampleRate),
Chris@9 19 m_stepSize(0),
Chris@9 20 m_blockSize(0),
Chris@15 21 m_tuningFrequency(defaultTuningFrequency),
Chris@15 22 m_filterbank(0),
Chris@19 23 m_crp(0),
Chris@26 24 m_chroma(0),
Chris@26 25 m_logChroma(0),
Chris@39 26 m_cens(0),
Chris@19 27 m_pitchOutputNo(-1),
Chris@26 28 m_cpOutputNo(-1),
Chris@26 29 m_clpOutputNo(-1),
Chris@39 30 m_censOutputNo(-1),
Chris@19 31 m_crpOutputNo(-1)
Chris@7 32 {
Chris@7 33 }
Chris@7 34
Chris@7 35 Tipic::~Tipic()
Chris@7 36 {
Chris@15 37 delete m_filterbank;
Chris@26 38 delete m_crp;
Chris@26 39 delete m_chroma;
Chris@26 40 delete m_logChroma;
Chris@39 41 delete m_cens;
Chris@35 42
Chris@35 43 for (auto &d: m_downsamplers) delete d.second;
Chris@7 44 }
Chris@7 45
Chris@7 46 string
Chris@7 47 Tipic::getIdentifier() const
Chris@7 48 {
Chris@7 49 return "tipic";
Chris@7 50 }
Chris@7 51
Chris@7 52 string
Chris@7 53 Tipic::getName() const
Chris@7 54 {
Chris@47 55 return "TIPIC";
Chris@7 56 }
Chris@7 57
Chris@7 58 string
Chris@7 59 Tipic::getDescription() const
Chris@7 60 {
Chris@47 61 return "Chroma and pitch features, including DCT-reduced chroma with extra timbre invariance. Based on the MATLAB Chroma Toolbox by Müller and Ewert, adapted to use causal filters.";
Chris@7 62 }
Chris@7 63
Chris@7 64 string
Chris@7 65 Tipic::getMaker() const
Chris@7 66 {
Chris@41 67 return "Queen Mary, University of London";
Chris@7 68 }
Chris@7 69
Chris@7 70 int
Chris@7 71 Tipic::getPluginVersion() const
Chris@7 72 {
Chris@47 73 return 1;
Chris@7 74 }
Chris@7 75
Chris@7 76 string
Chris@7 77 Tipic::getCopyright() const
Chris@7 78 {
Chris@54 79 return "Methods by Sebastian Ewert and Meinard Müller, plugin by Chris Cannam. GPL licence.";
Chris@7 80 }
Chris@7 81
Chris@7 82 Tipic::InputDomain
Chris@7 83 Tipic::getInputDomain() const
Chris@7 84 {
Chris@7 85 return TimeDomain;
Chris@7 86 }
Chris@7 87
Chris@7 88 size_t
Chris@7 89 Tipic::getPreferredBlockSize() const
Chris@7 90 {
Chris@41 91 return 0;
Chris@7 92 }
Chris@7 93
Chris@7 94 size_t
Chris@7 95 Tipic::getPreferredStepSize() const
Chris@7 96 {
Chris@41 97 return 0;
Chris@7 98 }
Chris@7 99
Chris@7 100 size_t
Chris@7 101 Tipic::getMinChannelCount() const
Chris@7 102 {
Chris@7 103 return 1;
Chris@7 104 }
Chris@7 105
Chris@7 106 size_t
Chris@7 107 Tipic::getMaxChannelCount() const
Chris@7 108 {
Chris@7 109 return 1;
Chris@7 110 }
Chris@7 111
Chris@7 112 Tipic::ParameterList
Chris@7 113 Tipic::getParameterDescriptors() const
Chris@7 114 {
Chris@7 115 ParameterList list;
Chris@15 116
Chris@15 117 ParameterDescriptor desc;
Chris@15 118 desc.identifier = "tuning";
Chris@15 119 desc.name = "Tuning Frequency";
Chris@15 120 desc.unit = "Hz";
Chris@15 121 desc.description = "Frequency of concert A";
Chris@15 122 desc.minValue = 360;
Chris@15 123 desc.maxValue = 500;
Chris@15 124 desc.defaultValue = defaultTuningFrequency;
Chris@15 125 desc.isQuantized = false;
Chris@15 126 list.push_back(desc);
Chris@15 127
Chris@7 128 return list;
Chris@7 129 }
Chris@7 130
Chris@7 131 float
Chris@7 132 Tipic::getParameter(string identifier) const
Chris@7 133 {
Chris@15 134 if (identifier == "tuning") {
Chris@15 135 return m_tuningFrequency;
Chris@15 136 }
Chris@7 137 return 0;
Chris@7 138 }
Chris@7 139
Chris@7 140 void
Chris@7 141 Tipic::setParameter(string identifier, float value)
Chris@7 142 {
Chris@15 143 if (identifier == "tuning") {
Chris@15 144 m_tuningFrequency = value;
Chris@15 145 }
Chris@7 146 }
Chris@7 147
Chris@7 148 Tipic::ProgramList
Chris@7 149 Tipic::getPrograms() const
Chris@7 150 {
Chris@7 151 ProgramList list;
Chris@7 152 return list;
Chris@7 153 }
Chris@7 154
Chris@7 155 string
Chris@7 156 Tipic::getCurrentProgram() const
Chris@7 157 {
Chris@7 158 return ""; // no programs
Chris@7 159 }
Chris@7 160
Chris@7 161 void
Chris@50 162 Tipic::selectProgram(string)
Chris@7 163 {
Chris@7 164 }
Chris@7 165
Chris@27 166 static vector<string> noteNames
Chris@27 167 { "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B" };
Chris@27 168
Chris@27 169 static std::string noteName(int i)
Chris@27 170 {
Chris@27 171 string name = noteNames[i % 12];
Chris@27 172 int oct = i / 12 - 1;
Chris@27 173 ostringstream sstr;
Chris@27 174 sstr << i << " " << name << oct << ends;
Chris@27 175 return sstr.str();
Chris@27 176 }
Chris@27 177
Chris@7 178 Tipic::OutputList
Chris@7 179 Tipic::getOutputDescriptors() const
Chris@7 180 {
Chris@7 181 OutputList list;
Chris@7 182
Chris@47 183 string downIdSuffix = "-smoothed";
Chris@47 184 string downNamePrefix = "Smoothed ";
Chris@47 185 string downDescSuffix = ", smoothed by 10x downsampling";
Chris@47 186
Chris@7 187 OutputDescriptor d;
Chris@7 188 d.identifier = "pitch";
Chris@47 189 d.name = "Pitch Representation";
Chris@47 190 d.description = "Short-time energy content of the signal within 88 semitone-tuned frequency bands";
Chris@7 191 d.unit = "";
Chris@7 192 d.hasFixedBinCount = true;
Chris@27 193 int min = 0, max = 0;
Chris@27 194 PitchFilterbank::getPitchRange(min, max);
Chris@27 195 d.binCount = max - min + 1;
Chris@27 196 d.binNames.clear();
Chris@27 197 for (int p = min; p <= max; ++p) {
Chris@27 198 d.binNames.push_back(noteName(p));
Chris@27 199 }
Chris@7 200 d.hasKnownExtents = false;
Chris@7 201 d.isQuantized = false;
Chris@7 202 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@32 203 d.sampleRate = PitchFilterbank::getOutputSampleRate();
Chris@7 204 d.hasDuration = false;
Chris@9 205 m_pitchOutputNo = list.size();
Chris@7 206 list.push_back(d);
Chris@7 207
Chris@47 208 d.identifier += downIdSuffix;
Chris@47 209 d.name = downNamePrefix + d.name;
Chris@47 210 d.description += downDescSuffix;
Chris@35 211 d.sampleRate /= 10.0;
Chris@35 212 list.push_back(d);
Chris@35 213
Chris@37 214 d.identifier = "chroma";
Chris@26 215 d.name = "Chroma Pitch Features";
Chris@47 216 d.description = "CP Chroma Pitch features derived by summing the Pitch Representation into a single octave";
Chris@26 217 d.unit = "";
Chris@26 218 d.hasFixedBinCount = true;
Chris@26 219 d.binCount = 12;
Chris@27 220 d.binNames = noteNames;
Chris@26 221 d.hasKnownExtents = false;
Chris@26 222 d.isQuantized = false;
Chris@26 223 d.sampleType = OutputDescriptor::FixedSampleRate;
Chris@32 224 d.sampleRate = PitchFilterbank::getOutputSampleRate();
Chris@26 225 d.hasDuration = false;
Chris@26 226 m_cpOutputNo = list.size();
Chris@26 227 list.push_back(d);
Chris@26 228
Chris@47 229 d.identifier += downIdSuffix;
Chris@47 230 d.name = downNamePrefix + d.name;
Chris@47 231 d.description += downDescSuffix;
Chris@35 232 d.sampleRate /= 10.0;
Chris@35 233 list.push_back(d);
Chris@35 234
Chris@26 235 d.identifier = "clp";
Chris@26 236 d.name = "Chroma Log Pitch Features";
Chris@47 237 d.description = "CLP Chroma Logarithmic Pitch features derived by summing log of the Pitch Representation energy values into a single octave";
Chris@35 238 d.sampleRate = PitchFilterbank::getOutputSampleRate();
Chris@26 239 m_clpOutputNo = list.size();
Chris@26 240 list.push_back(d);
Chris@26 241
Chris@47 242 d.identifier += downIdSuffix;
Chris@47 243 d.name = downNamePrefix + d.name;
Chris@47 244 d.description += downDescSuffix;
Chris@35 245 d.sampleRate /= 10.0;
Chris@35 246 list.push_back(d);
Chris@35 247
Chris@39 248 d.identifier = "cens";
Chris@39 249 d.name = "Chroma Energy Normalised Statistics Features";
Chris@47 250 d.description = "CENS statistical features based on L1 normalized pitch energy distribions";
Chris@39 251 d.sampleRate = PitchFilterbank::getOutputSampleRate() / 10.0;
Chris@39 252 m_censOutputNo = list.size();
Chris@39 253 list.push_back(d);
Chris@39 254
Chris@19 255 d.identifier = "crp";
Chris@47 256 d.name = "Chroma DCT-Reduced Pitch Features";
Chris@47 257 d.description = "CRP Chroma DCT-Reduced Log Pitch features, providing some timbre-invariance by discarding timbre-related information from lower cepstral coefficients";
Chris@35 258 d.sampleRate = PitchFilterbank::getOutputSampleRate();
Chris@19 259 m_crpOutputNo = list.size();
Chris@19 260 list.push_back(d);
Chris@19 261
Chris@47 262 d.identifier += downIdSuffix;
Chris@47 263 d.name = downNamePrefix + d.name;
Chris@47 264 d.description += downDescSuffix;
Chris@35 265 d.sampleRate /= 10.0;
Chris@35 266 list.push_back(d);
Chris@35 267
Chris@7 268 return list;
Chris@7 269 }
Chris@7 270
Chris@7 271 bool
Chris@7 272 Tipic::initialise(size_t channels, size_t stepSize, size_t blockSize)
Chris@7 273 {
Chris@19 274 if (m_inputSampleRate > 192000) {
Chris@19 275 cerr << "ERROR: Tipic::initialise: Max sample rate 192000 exceeded "
Chris@19 276 << "(requested rate = " << m_inputSampleRate << ")" << endl;
Chris@19 277 return false;
Chris@19 278 }
Chris@19 279
Chris@9 280 if (m_pitchOutputNo < 0) {
Chris@9 281 // getOutputDescriptors has never been called, it sets up the
Chris@9 282 // outputNo members
Chris@9 283 (void)getOutputDescriptors();
Chris@9 284 }
Chris@26 285 if (m_pitchOutputNo < 0 ||
Chris@26 286 m_cpOutputNo < 0 ||
Chris@26 287 m_clpOutputNo < 0 ||
Chris@39 288 m_censOutputNo < 0 ||
Chris@26 289 m_crpOutputNo < 0) {
Chris@19 290 throw std::logic_error("setup went wrong");
Chris@19 291 }
Chris@9 292
Chris@7 293 if (channels < getMinChannelCount() ||
Chris@9 294 channels > getMaxChannelCount()) {
Chris@19 295 cerr << "ERROR: Tipic::initialise: wrong number of channels supplied (only 1 supported)" << endl;
Chris@9 296 return false;
Chris@9 297 }
Chris@7 298
Chris@9 299 m_stepSize = stepSize;
Chris@9 300 m_blockSize = blockSize;
Chris@9 301
Chris@9 302 if (m_stepSize != m_blockSize) {
Chris@9 303 cerr << "ERROR: initialise: step size and block size must be equal" << endl;
Chris@9 304 return false;
Chris@9 305 }
Chris@9 306
Chris@9 307 reset();
Chris@9 308
Chris@7 309 return true;
Chris@7 310 }
Chris@7 311
Chris@7 312 void
Chris@7 313 Tipic::reset()
Chris@7 314 {
Chris@15 315 if (!m_filterbank) {
Chris@26 316
Chris@15 317 m_filterbank = new PitchFilterbank(m_inputSampleRate, m_tuningFrequency);
Chris@26 318
Chris@19 319 m_crp = new CRP({});
Chris@26 320
Chris@26 321 m_chroma = new Chroma({});
Chris@26 322
Chris@26 323 Chroma::Parameters params;
Chris@26 324 params.applyLogCompression = true;
Chris@26 325 m_logChroma = new Chroma(params);
Chris@39 326
Chris@39 327 m_cens = new CENS({});
Chris@15 328 }
Chris@26 329
Chris@15 330 m_filterbank->reset();
Chris@35 331
Chris@35 332 for (auto &d: m_downsamplers) d.second->reset();
Chris@7 333 }
Chris@7 334
Chris@7 335 Tipic::FeatureSet
Chris@50 336 Tipic::process(const float *const *inputBuffers, Vamp::RealTime)
Chris@7 337 {
Chris@19 338 RealSequence in;
Chris@9 339 in.resize(m_blockSize);
Chris@42 340 for (int i = 0; i < m_blockSize; ++i) {
Chris@42 341 in[i] = inputBuffers[0][i];
Chris@42 342 }
Chris@9 343
Chris@19 344 RealBlock pitchFiltered = m_filterbank->process(in);
Chris@26 345
Chris@26 346 RealBlock cp = m_chroma->process(pitchFiltered);
Chris@26 347 RealBlock clp = m_logChroma->process(pitchFiltered);
Chris@39 348 RealBlock cens = m_cens->process(pitchFiltered);
Chris@26 349 RealBlock crp = m_crp->process(pitchFiltered);
Chris@9 350
Chris@9 351 FeatureSet fs;
Chris@36 352 addFeatures(fs, m_pitchOutputNo, pitchFiltered, false);
Chris@36 353 addFeatures(fs, m_cpOutputNo, cp, false);
Chris@36 354 addFeatures(fs, m_clpOutputNo, clp, false);
Chris@39 355 addFeatures(fs, m_censOutputNo, cens, false);
Chris@36 356 addFeatures(fs, m_crpOutputNo, crp, false);
Chris@9 357 return fs;
Chris@7 358 }
Chris@7 359
Chris@7 360 Tipic::FeatureSet
Chris@7 361 Tipic::getRemainingFeatures()
Chris@7 362 {
Chris@19 363 RealBlock pitchFiltered = m_filterbank->getRemainingOutput();
Chris@26 364
Chris@26 365 RealBlock cp = m_chroma->process(pitchFiltered);
Chris@26 366 RealBlock clp = m_logChroma->process(pitchFiltered);
Chris@39 367 RealBlock cens = m_cens->process(pitchFiltered);
Chris@26 368 RealBlock crp = m_crp->process(pitchFiltered);
Chris@9 369
Chris@9 370 FeatureSet fs;
Chris@36 371 addFeatures(fs, m_pitchOutputNo, pitchFiltered, true);
Chris@36 372 addFeatures(fs, m_cpOutputNo, cp, true);
Chris@36 373 addFeatures(fs, m_clpOutputNo, clp, true);
Chris@39 374 addFeatures(fs, m_censOutputNo, cens, true);
Chris@36 375 addFeatures(fs, m_crpOutputNo, crp, true);
Chris@9 376 return fs;
Chris@7 377 }
Chris@7 378
Chris@9 379 void
Chris@36 380 Tipic::addFeatures(FeatureSet &fs, int outputNo, const RealBlock &block, bool final)
Chris@9 381 {
Chris@35 382 if (block.empty()) return;
Chris@39 383
Chris@39 384 int downsampledOutputNo = outputNo + 1;
Chris@39 385 if (outputNo == m_censOutputNo) {
Chris@39 386 // CENS exists only in downsampled form
Chris@39 387 downsampledOutputNo = outputNo;
Chris@39 388 }
Chris@39 389
Chris@42 390 int n = block.size();
Chris@42 391
Chris@39 392 if (outputNo != downsampledOutputNo) {
Chris@42 393 for (int i = 0; i < n; ++i) {
Chris@39 394 Feature f;
Chris@39 395 int h = block[i].size();
Chris@39 396 f.values.resize(h);
Chris@42 397 for (int j = 0; j < h; ++j) {
Chris@42 398 f.values[j] = block[i][j];
Chris@42 399 }
Chris@39 400 fs[outputNo].push_back(f);
Chris@39 401 }
Chris@9 402 }
Chris@37 403
Chris@35 404 if (m_downsamplers.find(outputNo) == m_downsamplers.end()) {
Chris@35 405 FeatureDownsample::Parameters params;
Chris@35 406 params.featureSize = block[0].size();
Chris@35 407 m_downsamplers[outputNo] = new FeatureDownsample(params);
Chris@35 408 }
Chris@35 409
Chris@35 410 RealBlock downsampled = m_downsamplers[outputNo]->process(block);
Chris@36 411
Chris@36 412 if (final) {
Chris@36 413 RealBlock remaining = m_downsamplers[outputNo]->getRemainingOutput();
Chris@36 414 downsampled.insert(downsampled.end(), remaining.begin(), remaining.end());
Chris@36 415 }
Chris@42 416
Chris@42 417 n = downsampled.size();
Chris@35 418
Chris@42 419 for (int i = 0; i < n; ++i) {
Chris@35 420 Feature f;
Chris@35 421 int h = downsampled[i].size();
Chris@35 422 f.values.resize(h);
Chris@42 423 for (int j = 0; j < h; ++j) {
Chris@42 424 f.values[j] = downsampled[i][j];
Chris@42 425 }
Chris@39 426 fs[downsampledOutputNo].push_back(f);
Chris@35 427 }
Chris@9 428 }