max@1: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
max@1:
max@1: /*
max@1: QM Vamp Plugin Set
max@1:
max@1: Centre for Digital Music, Queen Mary, University of London.
max@1:
max@1: This program is free software; you can redistribute it and/or
max@1: modify it under the terms of the GNU General Public License as
max@1: published by the Free Software Foundation; either version 2 of the
max@1: License, or (at your option) any later version. See the file
max@1: COPYING included with this distribution for more information.
max@1: */
max@1:
max@1: #include "SongParts.h"
max@1:
max@1: #include
max@1: #include
max@1: #include
max@1: #include
max@1: #include
max@1: #include
max@1: #include
max@1: #include
max@1: #include
max@1: #include
max@1: #include
max@1: #include "armadillo"
max@1: #include
max@1: #include
max@1: #include
max@1: #include
max@1:
max@1: #include
max@1:
max@1: using namespace boost::numeric;
max@1: using namespace arma;
max@1: using std::string;
max@1: using std::vector;
max@1: using std::cerr;
max@1: using std::cout;
max@1: using std::endl;
max@1:
max@1:
max@1: #ifndef __GNUC__
max@1: #include
max@1: #endif
max@1:
max@1:
max@1: // Result Struct
max@1: typedef struct Part {
max@1: int n;
Chris@21: vector indices;
max@1: string letter;
Chris@21: int value;
max@1: int level;
max@1: int nInd;
max@1: }Part;
max@1:
max@1:
max@8:
max@1: /* ------------------------------------ */
max@1: /* ----- BEAT DETECTOR CLASS ---------- */
max@1: /* ------------------------------------ */
max@1:
max@1: class BeatTrackerData
max@1: {
max@1: /* --- ATTRIBUTES --- */
max@1: public:
max@1: DFConfig dfConfig;
max@1: DetectionFunction *df;
max@1: DownBeat *downBeat;
max@1: vector dfOutput;
max@1: Vamp::RealTime origin;
max@1:
max@1:
max@1: /* --- METHODS --- */
max@1:
max@1: /* --- Constructor --- */
max@1: public:
max@1: BeatTrackerData(float rate, const DFConfig &config) : dfConfig(config) {
Chris@22:
max@1: df = new DetectionFunction(config);
max@1: // decimation factor aims at resampling to c. 3KHz; must be power of 2
max@1: int factor = MathUtilities::nextPowerOfTwo(rate / 3000);
max@1: // std::cerr << "BeatTrackerData: factor = " << factor << std::endl;
max@1: downBeat = new DownBeat(rate, factor, config.stepSize);
max@1: }
max@1:
max@1: /* --- Desctructor --- */
max@1: ~BeatTrackerData() {
Chris@22: delete df;
max@1: delete downBeat;
max@1: }
max@1:
max@1: void reset() {
max@1: delete df;
max@1: df = new DetectionFunction(dfConfig);
max@1: dfOutput.clear();
max@1: downBeat->resetAudioBuffer();
max@1: origin = Vamp::RealTime::zeroTime;
max@1: }
max@1: };
max@1:
max@1:
max@1: /* --------------------------------------- */
max@1: /* ----- CHROMA EXTRACTOR CLASS ---------- */
max@1: /* --------------------------------------- */
max@1:
max@1: class ChromaData
max@1: {
max@1:
max@1: /* --- ATTRIBUTES --- */
max@1:
max@1: public:
max@1: int frameCount;
max@1: int nBPS;
max@1: Vamp::Plugin::FeatureList logSpectrum;
max@1: size_t blockSize;
max@1: int lengthOfNoteIndex;
max@1: vector meanTunings;
max@1: vector localTunings;
max@1: float whitening;
max@1: float preset;
max@1: float useNNLS;
max@1: vector localTuning;
max@1: vector kernelValue;
max@1: vector kernelFftIndex;
max@1: vector kernelNoteIndex;
max@1: float *dict;
max@1: bool tuneLocal;
max@1: float doNormalizeChroma;
max@1: float rollon;
max@1: float s;
max@1: vector hw;
max@1: vector sinvalues;
max@1: vector cosvalues;
max@1: Window window;
max@1: FFTReal fft;
max@1: size_t inputSampleRate;
max@1:
max@1: /* --- METHODS --- */
max@1:
max@1: /* --- Constructor --- */
max@1:
max@1: public:
max@1: ChromaData(float inputSampleRate, size_t block_size) :
max@1: frameCount(0),
max@1: nBPS(3),
max@1: logSpectrum(0),
max@1: blockSize(0),
max@1: lengthOfNoteIndex(0),
max@1: meanTunings(0),
max@1: localTunings(0),
max@1: whitening(1.0),
max@1: preset(0.0),
max@1: useNNLS(1.0),
max@1: localTuning(0.0),
max@1: kernelValue(0),
max@1: kernelFftIndex(0),
max@1: kernelNoteIndex(0),
max@1: dict(0),
max@1: tuneLocal(0.0),
max@1: doNormalizeChroma(0),
max@1: rollon(0.0),
Chris@22: s(0.7),
Chris@22: sinvalues(0),
Chris@22: cosvalues(0),
Chris@22: window(HanningWindow, block_size),
Chris@22: fft(block_size),
Chris@22: inputSampleRate(inputSampleRate)
max@1: {
max@1: // make the *note* dictionary matrix
max@1: dict = new float[nNote * 84];
max@1: for (int i = 0; i < nNote * 84; ++i) dict[i] = 0.0;
max@1: blockSize = block_size;
max@1: }
max@1:
max@1:
max@1: /* --- Desctructor --- */
max@1:
max@1: ~ChromaData() {
max@1: delete [] dict;
max@1: }
max@1:
max@1: /* --- Public Methods --- */
max@1:
max@1: void reset() {
max@1: frameCount = 0;
max@1: logSpectrum.clear();
max@1: for (int iBPS = 0; iBPS < 3; ++iBPS) {
max@1: meanTunings[iBPS] = 0;
max@1: localTunings[iBPS] = 0;
max@1: }
max@1: localTuning.clear();
max@1: }
max@1:
max@1: void baseProcess(float *inputBuffers, Vamp::RealTime timestamp)
max@1: {
Chris@22:
max@1: frameCount++;
max@1: float *magnitude = new float[blockSize/2];
max@1: double *fftReal = new double[blockSize];
max@1: double *fftImag = new double[blockSize];
max@1:
max@1: // FFTReal wants doubles, so we need to make a local copy of inputBuffers
max@1: double *inputBuffersDouble = new double[blockSize];
max@1: for (size_t i = 0; i < blockSize; i++) inputBuffersDouble[i] = inputBuffers[i];
max@1:
max@1: fft.process(false, inputBuffersDouble, fftReal, fftImag);
max@1:
max@1: float energysum = 0;
max@1: // make magnitude
max@1: float maxmag = -10000;
max@1: for (int iBin = 0; iBin < static_cast(blockSize/2); iBin++) {
max@1: magnitude[iBin] = sqrt(fftReal[iBin] * fftReal[iBin] +
max@1: fftImag[iBin] * fftImag[iBin]);
max@1: if (magnitude[iBin]>blockSize*1.0) magnitude[iBin] = blockSize;
max@1: // a valid audio signal (between -1 and 1) should not be limited here.
max@1: if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin];
max@1: if (rollon > 0) {
max@1: energysum += pow(magnitude[iBin],2);
max@1: }
max@1: }
max@1:
max@1: float cumenergy = 0;
max@1: if (rollon > 0) {
max@1: for (int iBin = 2; iBin < static_cast(blockSize/2); iBin++) {
max@1: cumenergy += pow(magnitude[iBin],2);
max@1: if (cumenergy < energysum * rollon / 100) magnitude[iBin-2] = 0;
max@1: else break;
max@1: }
max@1: }
max@1:
max@1: if (maxmag < 2) {
max@1: // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl;
max@1: for (int iBin = 0; iBin < static_cast(blockSize/2); iBin++) {
max@1: magnitude[iBin] = 0;
max@1: }
max@1: }
max@1:
max@1: // cerr << magnitude[200] << endl;
max@1:
max@1: // note magnitude mapping using pre-calculated matrix
max@1: float *nm = new float[nNote]; // note magnitude
max@1: for (int iNote = 0; iNote < nNote; iNote++) {
max@1: nm[iNote] = 0; // initialise as 0
max@1: }
max@1: int binCount = 0;
max@1: for (vector::iterator it = kernelValue.begin(); it != kernelValue.end(); ++it) {
max@1: nm[kernelNoteIndex[binCount]] += magnitude[kernelFftIndex[binCount]] * kernelValue[binCount];
max@1: binCount++;
max@1: }
max@1:
max@1: float one_over_N = 1.0/frameCount;
max@1: // update means of complex tuning variables
max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] *= float(frameCount-1)*one_over_N;
max@1:
max@1: for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) {
max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N;
max@1: float ratioOld = 0.997;
max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1: localTunings[iBPS] *= ratioOld;
max@1: localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld);
max@1: }
max@1: }
max@1:
max@1: float localTuningImag = 0;
max@1: float localTuningReal = 0;
max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1: localTuningReal += localTunings[iBPS] * cosvalues[iBPS];
max@1: localTuningImag += localTunings[iBPS] * sinvalues[iBPS];
max@1: }
max@1:
max@1: float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
max@1: localTuning.push_back(normalisedtuning);
max@1:
max@1: Vamp::Plugin::Feature f1; // logfreqspec
max@1: f1.hasTimestamp = true;
max@1: f1.timestamp = timestamp;
max@1: for (int iNote = 0; iNote < nNote; iNote++) {
max@1: f1.values.push_back(nm[iNote]);
max@1: }
max@1:
max@1: // deletes
max@1: delete[] inputBuffersDouble;
max@1: delete[] magnitude;
max@1: delete[] fftReal;
max@1: delete[] fftImag;
max@1: delete[] nm;
max@1:
max@1: logSpectrum.push_back(f1); // remember note magnitude
max@1: }
max@1:
max@1: bool initialise()
max@1: {
max@1: dictionaryMatrix(dict, s);
Chris@22:
Chris@22: // make things for tuning estimation
Chris@22: for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1: sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS)));
max@1: cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS)));
max@1: }
max@1:
Chris@22:
Chris@22: // make hamming window of length 1/2 octave
Chris@22: int hamwinlength = nBPS * 6 + 1;
max@1: float hamwinsum = 0;
max@1: for (int i = 0; i < hamwinlength; ++i) {
max@1: hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)));
max@1: hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1));
max@1: }
max@1: for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum;
max@1:
max@1:
max@1: // initialise the tuning
max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1: meanTunings.push_back(0);
max@1: localTunings.push_back(0);
max@1: }
Chris@22:
max@1: blockSize = blockSize;
max@1: frameCount = 0;
max@1: int tempn = nNote * blockSize/2;
max@1: // cerr << "length of tempkernel : " << tempn << endl;
max@1: float *tempkernel;
max@1:
max@1: tempkernel = new float[tempn];
max@1:
max@1: logFreqMatrix(inputSampleRate, blockSize, tempkernel);
max@1: kernelValue.clear();
max@1: kernelFftIndex.clear();
max@1: kernelNoteIndex.clear();
max@1: int countNonzero = 0;
max@1: for (int iNote = 0; iNote < nNote; ++iNote) {
max@1: // I don't know if this is wise: manually making a sparse matrix
max@1: for (int iFFT = 0; iFFT < static_cast(blockSize/2); ++iFFT) {
max@1: if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
max@1: kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
max@1: if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
max@1: countNonzero++;
max@1: }
max@1: kernelFftIndex.push_back(iFFT);
Chris@23: kernelNoteIndex.push_back(iNote);
max@1: }
max@1: }
max@1: }
max@1: delete [] tempkernel;
max@1: }
max@1: };
max@1:
max@1:
max@1: /* --------------------------------- */
max@1: /* ----- SONG PARTITIONER ---------- */
max@1: /* --------------------------------- */
max@1:
max@1:
max@1: /* --- ATTRIBUTES --- */
max@1:
max@1: float SongPartitioner::m_stepSecs = 0.01161; // 512 samples at 44100
max@1: size_t SongPartitioner::m_chromaFramesizeFactor = 16; // 16 times as long as beat tracker's
max@1: size_t SongPartitioner::m_chromaStepsizeFactor = 4; // 4 times as long as beat tracker's
max@1:
max@1:
max@1: /* --- METHODS --- */
max@1:
max@1: /* --- Constructor --- */
max@1: SongPartitioner::SongPartitioner(float inputSampleRate) :
max@1: Vamp::Plugin(inputSampleRate),
max@1: m_d(0),
max@1: m_bpb(4),
max@1: m_pluginFrameCount(0)
max@1: {
max@1: }
max@1:
max@1:
max@1: /* --- Desctructor --- */
max@1: SongPartitioner::~SongPartitioner()
max@1: {
max@1: delete m_d;
max@1: }
max@1:
max@1:
max@1: /* --- Methods --- */
max@1: string SongPartitioner::getIdentifier() const
max@1: {
max@1: return "qm-songpartitioner";
max@1: }
max@1:
max@1: string SongPartitioner::getName() const
max@1: {
max@1: return "Song Partitioner";
max@1: }
max@1:
max@1: string SongPartitioner::getDescription() const
max@1: {
max@1: return "Estimate contiguous segments pertaining to song parts such as verse and chorus.";
max@1: }
max@1:
max@1: string SongPartitioner::getMaker() const
max@1: {
max@1: return "Queen Mary, University of London";
max@1: }
max@1:
max@1: int SongPartitioner::getPluginVersion() const
max@1: {
max@1: return 2;
max@1: }
max@1:
max@1: string SongPartitioner::getCopyright() const
max@1: {
max@1: return "Plugin by Matthew Davies, Christian Landone, Chris Cannam, Matthias Mauch and Massimiliano Zanoni Copyright (c) 2006-2012 QMUL - All Rights Reserved";
max@1: }
max@1:
max@1: SongPartitioner::ParameterList SongPartitioner::getParameterDescriptors() const
max@1: {
max@1: ParameterList list;
max@1:
max@1: ParameterDescriptor desc;
max@1:
max@1: desc.identifier = "bpb";
max@1: desc.name = "Beats per Bar";
max@1: desc.description = "The number of beats in each bar";
max@1: desc.minValue = 2;
max@1: desc.maxValue = 16;
max@1: desc.defaultValue = 4;
max@1: desc.isQuantized = true;
max@1: desc.quantizeStep = 1;
max@1: list.push_back(desc);
max@1:
max@1: return list;
max@1: }
max@1:
max@1: float SongPartitioner::getParameter(std::string name) const
max@1: {
max@1: if (name == "bpb") return m_bpb;
max@1: return 0.0;
max@1: }
max@1:
max@1: void SongPartitioner::setParameter(std::string name, float value)
max@1: {
max@1: if (name == "bpb") m_bpb = lrintf(value);
max@1: }
max@1:
max@1:
max@1: // Return the StepSize for Chroma Extractor
max@1: size_t SongPartitioner::getPreferredStepSize() const
max@1: {
max@1: size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001);
max@1: if (step < 1) step = 1;
max@1:
max@1: return step;
max@1: }
max@1:
max@1: // Return the BlockSize for Chroma Extractor
max@1: size_t SongPartitioner::getPreferredBlockSize() const
max@1: {
max@1: size_t theoretical = getPreferredStepSize() * 2;
max@1: theoretical *= m_chromaFramesizeFactor;
max@1:
max@1: return theoretical;
max@1: }
max@1:
max@1:
max@1: // Initialize the plugin and define Beat Tracker and Chroma Extractor Objects
max@1: bool SongPartitioner::initialise(size_t channels, size_t stepSize, size_t blockSize)
max@1: {
max@1: if (m_d) {
Chris@22: delete m_d;
Chris@22: m_d = 0;
max@1: }
max@1:
max@1: if (channels < getMinChannelCount() ||
Chris@22: channels > getMaxChannelCount()) {
max@1: std::cerr << "SongPartitioner::initialise: Unsupported channel count: "
max@1: << channels << std::endl;
max@1: return false;
max@1: }
max@1:
max@1: if (stepSize != getPreferredStepSize()) {
max@1: std::cerr << "ERROR: SongPartitioner::initialise: Unsupported step size for this sample rate: "
max@1: << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl;
max@1: return false;
max@1: }
max@1:
max@1: if (blockSize != getPreferredBlockSize()) {
max@1: std::cerr << "WARNING: SongPartitioner::initialise: Sub-optimal block size for this sample rate: "
max@1: << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl;
max@1: }
max@1:
max@1: // Beat tracker and Chroma extractor has two different configuration parameters
max@1:
max@1: // Configuration Parameters for Beat Tracker
max@1: DFConfig dfConfig;
max@1: dfConfig.DFType = DF_COMPLEXSD;
max@1: dfConfig.stepSize = stepSize;
max@1: dfConfig.frameLength = blockSize / m_chromaFramesizeFactor;
max@1: dfConfig.dbRise = 3;
max@1: dfConfig.adaptiveWhitening = false;
max@1: dfConfig.whiteningRelaxCoeff = -1;
max@1: dfConfig.whiteningFloor = -1;
max@1:
max@1: // Initialise Beat Tracker
max@1: m_d = new BeatTrackerData(m_inputSampleRate, dfConfig);
max@1: m_d->downBeat->setBeatsPerBar(m_bpb);
max@1:
max@1: // Initialise Chroma Extractor
max@1: m_chromadata = new ChromaData(m_inputSampleRate, blockSize);
max@1: m_chromadata->initialise();
max@1:
max@1: return true;
max@1: }
max@1:
max@1: void SongPartitioner::reset()
max@1: {
max@1: if (m_d) m_d->reset();
max@1: m_pluginFrameCount = 0;
max@1: }
max@1:
max@1: SongPartitioner::OutputList SongPartitioner::getOutputDescriptors() const
max@1: {
max@1: OutputList list;
max@1: size_t outputCounter = 0;
max@1:
max@1: OutputDescriptor beat;
max@1: beat.identifier = "beats";
max@1: beat.name = "Beats";
max@1: beat.description = "Beat locations labelled with metrical position";
max@1: beat.unit = "";
max@1: beat.hasFixedBinCount = true;
max@1: beat.binCount = 0;
max@1: beat.sampleType = OutputDescriptor::VariableSampleRate;
max@1: beat.sampleRate = 1.0 / m_stepSecs;
max@1: m_beatOutputNumber = outputCounter++;
max@1:
max@1: OutputDescriptor bars;
max@1: bars.identifier = "bars";
max@1: bars.name = "Bars";
max@1: bars.description = "Bar locations";
max@1: bars.unit = "";
max@1: bars.hasFixedBinCount = true;
max@1: bars.binCount = 0;
max@1: bars.sampleType = OutputDescriptor::VariableSampleRate;
max@1: bars.sampleRate = 1.0 / m_stepSecs;
max@1: m_barsOutputNumber = outputCounter++;
max@1:
max@1: OutputDescriptor beatcounts;
max@1: beatcounts.identifier = "beatcounts";
max@1: beatcounts.name = "Beat Count";
max@1: beatcounts.description = "Beat counter function";
max@1: beatcounts.unit = "";
max@1: beatcounts.hasFixedBinCount = true;
max@1: beatcounts.binCount = 1;
max@1: beatcounts.sampleType = OutputDescriptor::VariableSampleRate;
max@1: beatcounts.sampleRate = 1.0 / m_stepSecs;
max@1: m_beatcountsOutputNumber = outputCounter++;
max@1:
max@1: OutputDescriptor beatsd;
max@1: beatsd.identifier = "beatsd";
max@1: beatsd.name = "Beat Spectral Difference";
max@1: beatsd.description = "Beat spectral difference function used for bar-line detection";
max@1: beatsd.unit = "";
max@1: beatsd.hasFixedBinCount = true;
max@1: beatsd.binCount = 1;
max@1: beatsd.sampleType = OutputDescriptor::VariableSampleRate;
max@1: beatsd.sampleRate = 1.0 / m_stepSecs;
max@1: m_beatsdOutputNumber = outputCounter++;
max@1:
max@1: OutputDescriptor logscalespec;
max@1: logscalespec.identifier = "logscalespec";
max@1: logscalespec.name = "Log-Frequency Spectrum";
max@1: logscalespec.description = "Spectrum with linear frequency on a log scale.";
max@1: logscalespec.unit = "";
max@1: logscalespec.hasFixedBinCount = true;
max@1: logscalespec.binCount = nNote;
max@1: logscalespec.hasKnownExtents = false;
max@1: logscalespec.isQuantized = false;
max@1: logscalespec.sampleType = OutputDescriptor::FixedSampleRate;
max@1: logscalespec.hasDuration = false;
max@1: logscalespec.sampleRate = m_inputSampleRate/2048;
max@1: m_logscalespecOutputNumber = outputCounter++;
max@1:
max@1: OutputDescriptor bothchroma;
max@1: bothchroma.identifier = "bothchroma";
max@1: bothchroma.name = "Chromagram and Bass Chromagram";
max@1: bothchroma.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS approximate transcription.";
max@1: bothchroma.unit = "";
max@1: bothchroma.hasFixedBinCount = true;
max@1: bothchroma.binCount = 24;
max@1: bothchroma.hasKnownExtents = false;
max@1: bothchroma.isQuantized = false;
max@1: bothchroma.sampleType = OutputDescriptor::FixedSampleRate;
max@1: bothchroma.hasDuration = false;
max@1: bothchroma.sampleRate = m_inputSampleRate/2048;
max@1: m_bothchromaOutputNumber = outputCounter++;
max@1:
max@1: OutputDescriptor qchromafw;
max@1: qchromafw.identifier = "qchromafw";
max@1: qchromafw.name = "Pseudo-Quantised Chromagram and Bass Chromagram";
max@1: qchromafw.description = "Pseudo-Quantised Chromagram and Bass Chromagram (frames between two beats are identical).";
max@1: qchromafw.unit = "";
max@1: qchromafw.hasFixedBinCount = true;
max@1: qchromafw.binCount = 24;
max@1: qchromafw.hasKnownExtents = false;
max@1: qchromafw.isQuantized = false;
max@1: qchromafw.sampleType = OutputDescriptor::FixedSampleRate;
max@1: qchromafw.hasDuration = false;
max@1: qchromafw.sampleRate = m_inputSampleRate/2048;
max@1: m_qchromafwOutputNumber = outputCounter++;
max@1:
max@1: OutputDescriptor qchroma;
max@1: qchroma.identifier = "qchroma";
max@1: qchroma.name = "Quantised Chromagram and Bass Chromagram";
max@1: qchroma.description = "Quantised Chromagram and Bass Chromagram.";
max@1: qchroma.unit = "";
max@1: qchroma.hasFixedBinCount = true;
max@1: qchroma.binCount = 24;
max@1: qchroma.hasKnownExtents = false;
max@1: qchroma.isQuantized = false;
max@1: qchroma.sampleType = OutputDescriptor::FixedSampleRate;
max@1: qchroma.hasDuration = true;
Chris@17: qchroma.sampleRate = m_inputSampleRate/2048;
max@1: m_qchromaOutputNumber = outputCounter++;
max@1:
max@1: OutputDescriptor segm;
Chris@15: segm.identifier = "segmentation";
max@1: segm.name = "Segmentation";
max@1: segm.description = "Segmentation";
max@1: segm.unit = "segment-type";
max@1: segm.hasFixedBinCount = true;
max@1: //segm.binCount = 24;
max@1: segm.binCount = 1;
max@1: segm.hasKnownExtents = true;
max@1: segm.minValue = 1;
max@1: segm.maxValue = 5;
max@1: segm.isQuantized = true;
max@1: segm.quantizeStep = 1;
max@1: segm.sampleType = OutputDescriptor::VariableSampleRate;
Chris@17: segm.sampleRate = 1.0 / m_stepSecs;
max@1: segm.hasDuration = true;
max@1: m_segmOutputNumber = outputCounter++;
max@1:
max@1:
max@1: /*
max@1: OutputList list;
max@1: OutputDescriptor segmentation;
max@1: segmentation.identifier = "segmentation";
max@1: segmentation.name = "Segmentation";
max@1: segmentation.description = "Segmentation";
max@1: segmentation.unit = "segment-type";
max@1: segmentation.hasFixedBinCount = true;
max@1: segmentation.binCount = 1;
max@1: segmentation.hasKnownExtents = true;
max@1: segmentation.minValue = 1;
max@1: segmentation.maxValue = nSegmentTypes;
max@1: segmentation.isQuantized = true;
max@1: segmentation.quantizeStep = 1;
max@1: segmentation.sampleType = OutputDescriptor::VariableSampleRate;
max@1: segmentation.sampleRate = m_inputSampleRate / getPreferredStepSize();
max@1: list.push_back(segmentation);
max@1: return list;
max@1: */
max@1:
max@1:
max@1: list.push_back(beat);
max@1: list.push_back(bars);
max@1: list.push_back(beatcounts);
max@1: list.push_back(beatsd);
max@1: list.push_back(logscalespec);
max@1: list.push_back(bothchroma);
max@1: list.push_back(qchromafw);
max@1: list.push_back(qchroma);
max@1: list.push_back(segm);
max@1:
max@1: return list;
max@1: }
max@1:
max@1: // Executed for each frame - called from the host
max@1:
max@1: // We use time domain input, because DownBeat requires it -- so we
max@1: // use the time-domain version of DetectionFunction::process which
max@1: // does its own FFT. It requires doubles as input, so we need to
max@1: // make a temporary copy
max@1:
max@1: // We only support a single input channel
max@1: SongPartitioner::FeatureSet SongPartitioner::process(const float *const *inputBuffers,Vamp::RealTime timestamp)
max@1: {
max@1: if (!m_d) {
Chris@22: cerr << "ERROR: SongPartitioner::process: "
Chris@22: << "SongPartitioner has not been initialised"
Chris@22: << endl;
Chris@22: return FeatureSet();
max@1: }
max@1:
max@1: const int fl = m_d->dfConfig.frameLength;
max@1: #ifndef __GNUC__
max@1: double *dfinput = (double *)alloca(fl * sizeof(double));
max@1: #else
max@1: double dfinput[fl];
max@1: #endif
max@1: int sampleOffset = ((m_chromaFramesizeFactor-1) * fl) / 2;
max@1:
max@1: // Since chroma needs a much longer frame size, we only ever use the very
max@1: // beginning of the frame for beat tracking.
max@1: for (int i = 0; i < fl; ++i) dfinput[i] = inputBuffers[0][i];
max@1: double output = m_d->df->process(dfinput);
max@1:
max@1: if (m_d->dfOutput.empty()) m_d->origin = timestamp;
max@1:
max@1: // std::cerr << "df[" << m_d->dfOutput.size() << "] is " << output << std::endl;
max@1: m_d->dfOutput.push_back(output);
max@1:
max@1: // Downsample and store the incoming audio block.
max@1: // We have an overlap on the incoming audio stream (step size is
max@1: // half block size) -- this function is configured to take only a
max@1: // step size's worth, so effectively ignoring the overlap. Note
max@1: // however that this means we omit the last blocksize - stepsize
max@1: // samples completely for the purposes of barline detection
max@1: // (hopefully not a problem)
max@1: m_d->downBeat->pushAudioBlock(inputBuffers[0]);
max@1:
max@1: // The following is not done every time, but only every m_chromaFramesizeFactor times,
max@1: // because the chroma does not need dense time frames.
max@1:
max@1: if (m_pluginFrameCount % m_chromaStepsizeFactor == 0)
max@1: {
max@1:
max@1: // Window the full time domain, data, FFT it and process chroma stuff.
max@1:
max@1: #ifndef __GNUC__
max@1: float *windowedBuffers = (float *)alloca(m_chromadata->blockSize * sizeof(float));
max@1: #else
max@1: float windowedBuffers[m_chromadata->blockSize];
max@1: #endif
max@1: m_chromadata->window.cut(&inputBuffers[0][0], &windowedBuffers[0]);
max@1:
max@1: // adjust timestamp (we want the middle of the frame)
max@1: timestamp = timestamp + Vamp::RealTime::frame2RealTime(sampleOffset, lrintf(m_inputSampleRate));
max@1:
max@1: m_chromadata->baseProcess(&windowedBuffers[0], timestamp);
max@1:
max@1: }
max@1: m_pluginFrameCount++;
max@1:
max@1: FeatureSet fs;
max@1: fs[m_logscalespecOutputNumber].push_back(
max@1: m_chromadata->logSpectrum.back());
max@1: return fs;
max@1: }
max@1:
max@1: SongPartitioner::FeatureSet SongPartitioner::getRemainingFeatures()
max@1: {
max@1: if (!m_d) {
Chris@22: cerr << "ERROR: SongPartitioner::getRemainingFeatures: "
Chris@22: << "SongPartitioner has not been initialised"
Chris@22: << endl;
Chris@22: return FeatureSet();
max@1: }
max@1:
Chris@16: FeatureSet masterFeatureset = beatTrack();
Chris@16: FeatureList chromaList = chromaFeatures();
max@1:
max@1: for (size_t i = 0; i < chromaList.size(); ++i)
max@1: {
max@1: masterFeatureset[m_bothchromaOutputNumber].push_back(chromaList[i]);
max@1: }
max@1:
max@1: // quantised and pseudo-quantised (beat-wise) chroma
Chris@16: std::vector quantisedChroma = beatQuantiser(chromaList, masterFeatureset[m_beatOutputNumber]);
max@1:
max@1: masterFeatureset[m_qchromafwOutputNumber] = quantisedChroma[0];
max@1: masterFeatureset[m_qchromaOutputNumber] = quantisedChroma[1];
max@1:
max@1: // Segmentation
Chris@14: masterFeatureset[m_segmOutputNumber] = runSegmenter(quantisedChroma[1]);
max@1:
max@1: return(masterFeatureset);
max@1: }
max@1:
max@1: /* ------ Beat Tracker ------ */
max@1:
Chris@16: SongPartitioner::FeatureSet SongPartitioner::beatTrack()
max@1: {
max@1: vector df;
max@1: vector beatPeriod;
max@1: vector tempi;
max@1:
max@1: for (size_t i = 2; i < m_d->dfOutput.size(); ++i) { // discard first two elts
max@1: df.push_back(m_d->dfOutput[i]);
max@1: beatPeriod.push_back(0.0);
max@1: }
max@1: if (df.empty()) return FeatureSet();
max@1:
max@1: TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize);
max@1: tt.calculateBeatPeriod(df, beatPeriod, tempi);
max@1:
max@1: vector beats;
max@1: tt.calculateBeats(df, beatPeriod, beats);
max@1:
max@1: vector downbeats;
max@1: size_t downLength = 0;
max@1: const float *downsampled = m_d->downBeat->getBufferedAudio(downLength);
max@1: m_d->downBeat->findDownBeats(downsampled, downLength, beats, downbeats);
max@1:
max@1: vector beatsd;
max@1: m_d->downBeat->getBeatSD(beatsd);
max@1:
max@1: /*std::cout << "BeatTracker: found downbeats at: ";
max@1: for (int i = 0; i < downbeats.size(); ++i) std::cout << downbeats[i] << " " << std::endl;*/
max@1:
max@1: FeatureSet returnFeatures;
max@1:
max@1: char label[20];
max@1:
max@1: int dbi = 0;
max@1: int beat = 0;
max@1: int bar = 0;
max@1:
max@1: if (!downbeats.empty()) {
max@1: // get the right number for the first beat; this will be
max@1: // incremented before use (at top of the following loop)
max@1: int firstDown = downbeats[0];
max@1: beat = m_bpb - firstDown - 1;
max@1: if (beat == m_bpb) beat = 0;
max@1: }
max@1:
max@1: for (size_t i = 0; i < beats.size(); ++i) {
max@1:
max@1: size_t frame = beats[i] * m_d->dfConfig.stepSize;
max@1:
max@1: if (dbi < downbeats.size() && i == downbeats[dbi]) {
max@1: beat = 0;
max@1: ++bar;
max@1: ++dbi;
max@1: } else {
max@1: ++beat;
max@1: }
max@1:
max@1: /* Ooutput Section */
max@1:
max@1: // outputs are:
max@1: //
max@1: // 0 -> beats
max@1: // 1 -> bars
max@1: // 2 -> beat counter function
max@1:
max@1: Feature feature;
max@1: feature.hasTimestamp = true;
max@1: feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime (frame, lrintf(m_inputSampleRate));
max@1:
max@1: sprintf(label, "%d", beat + 1);
max@1: feature.label = label;
max@1: returnFeatures[m_beatOutputNumber].push_back(feature); // labelled beats
max@1:
max@1: feature.values.push_back(beat + 1);
max@1: returnFeatures[m_beatcountsOutputNumber].push_back(feature); // beat function
max@1:
max@1: if (i > 0 && i <= beatsd.size()) {
max@1: feature.values.clear();
max@1: feature.values.push_back(beatsd[i-1]);
max@1: feature.label = "";
max@1: returnFeatures[m_beatsdOutputNumber].push_back(feature); // beat spectral difference
max@1: }
max@1:
max@1: if (beat == 0) {
max@1: feature.values.clear();
max@1: sprintf(label, "%d", bar);
max@1: feature.label = label;
max@1: returnFeatures[m_barsOutputNumber].push_back(feature); // bars
max@1: }
max@1: }
max@1:
max@1: return returnFeatures;
max@1: }
max@1:
max@1:
max@1: /* ------ Chroma Extractor ------ */
max@1:
Chris@16: SongPartitioner::FeatureList SongPartitioner::chromaFeatures()
max@1: {
max@1:
max@1: FeatureList returnFeatureList;
max@1: FeatureList tunedlogfreqspec;
max@1:
max@1: if (m_chromadata->logSpectrum.size() == 0) return returnFeatureList;
max@1:
max@1: /** Calculate Tuning
max@1: calculate tuning from (using the angle of the complex number defined by the
max@1: cumulative mean real and imag values)
max@1: **/
max@1: float meanTuningImag = 0;
max@1: float meanTuningReal = 0;
max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
max@1: meanTuningReal += m_chromadata->meanTunings[iBPS] * m_chromadata->cosvalues[iBPS];
max@1: meanTuningImag += m_chromadata->meanTunings[iBPS] * m_chromadata->sinvalues[iBPS];
max@1: }
max@1: float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
max@1: float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
max@1: int intShift = floor(normalisedtuning * 3);
max@1: float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
max@1:
max@1: char buffer0 [50];
max@1:
max@1: sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
max@1:
max@1: /** Tune Log-Frequency Spectrogram
max@1: calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
max@1: perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
max@1: **/
max@1: cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
max@1:
max@1: float tempValue = 0;
max@1:
max@1: int count = 0;
max@1:
max@1: for (FeatureList::iterator i = m_chromadata->logSpectrum.begin(); i != m_chromadata->logSpectrum.end(); ++i)
max@1: {
max@1:
max@1: Feature f1 = *i;
max@1: Feature f2; // tuned log-frequency spectrum
max@1:
max@1: f2.hasTimestamp = true;
max@1: f2.timestamp = f1.timestamp;
max@1:
max@1: f2.values.push_back(0.0);
max@1: f2.values.push_back(0.0); // set lower edge to zero
max@1:
max@1: if (m_chromadata->tuneLocal) {
max@1: intShift = floor(m_chromadata->localTuning[count] * 3);
max@1: floatShift = m_chromadata->localTuning[count] * 3 - intShift;
max@1: // floatShift is a really bad name for this
max@1: }
max@1:
max@1: for (int k = 2; k < (int)f1.values.size() - 3; ++k)
max@1: { // interpolate all inner bins
max@1: tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift;
max@1: f2.values.push_back(tempValue);
max@1: }
max@1:
max@1: f2.values.push_back(0.0);
max@1: f2.values.push_back(0.0);
max@1: f2.values.push_back(0.0); // upper edge
max@1:
max@1: vector runningmean = SpecialConvolution(f2.values,m_chromadata->hw);
max@1: vector runningstd;
max@1: for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
max@1: runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
max@1: }
max@1: runningstd = SpecialConvolution(runningstd,m_chromadata->hw); // second step convolve
max@1: for (int i = 0; i < nNote; i++)
max@1: {
max@1:
max@1: runningstd[i] = sqrt(runningstd[i]);
max@1: // square root to finally have running std
max@1:
max@1: if (runningstd[i] > 0)
max@1: {
max@1: f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
max@1: (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_chromadata->whitening) : 0;
max@1: }
max@1:
max@1: if (f2.values[i] < 0) {
max@1:
max@1: cerr << "ERROR: negative value in logfreq spectrum" << endl;
max@1:
max@1: }
max@1: }
max@1: tunedlogfreqspec.push_back(f2);
max@1: count++;
max@1: }
max@1: cerr << "done." << endl;
max@1: /** Semitone spectrum and chromagrams
max@1: Semitone-spaced log-frequency spectrum derived
max@1: from the tuned log-freq spectrum above. the spectrum
max@1: is inferred using a non-negative least squares algorithm.
max@1: Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
max@1: bass and treble stacked onto each other).
max@1: **/
max@1: if (m_chromadata->useNNLS == 0) {
max@1: cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
max@1: } else {
max@1: cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
max@1: }
max@1:
max@1: vector oldchroma = vector(12,0);
max@1: vector oldbasschroma = vector(12,0);
max@1: count = 0;
max@1:
max@1: for (FeatureList::iterator it = tunedlogfreqspec.begin(); it != tunedlogfreqspec.end(); ++it) {
max@1: Feature logfreqsp = *it; // logfreq spectrum
max@1: Feature bothchroma; // treble and bass chromagram
max@1:
max@1: bothchroma.hasTimestamp = true;
max@1: bothchroma.timestamp = logfreqsp.timestamp;
max@1:
max@1: float b[nNote];
max@1:
max@1: bool some_b_greater_zero = false;
max@1: float sumb = 0;
max@1: for (int i = 0; i < nNote; i++) {
max@1: b[i] = logfreqsp.values[i];
max@1: sumb += b[i];
max@1: if (b[i] > 0) {
max@1: some_b_greater_zero = true;
max@1: }
max@1: }
max@1:
max@1: // here's where the non-negative least squares algorithm calculates the note activation x
max@1:
max@1: vector chroma = vector(12, 0);
max@1: vector basschroma = vector(12, 0);
max@1: float currval;
max@1: int iSemitone = 0;
max@1:
max@1: if (some_b_greater_zero) {
max@1: if (m_chromadata->useNNLS == 0) {
max@1: for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
max@1: currval = 0;
max@1: for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
max@1: currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
max@1: }
max@1: chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
max@1: basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
max@1: iSemitone++;
max@1: }
max@1:
max@1: } else {
max@1: float x[84+1000];
max@1: for (int i = 1; i < 1084; ++i) x[i] = 1.0;
max@1: vector signifIndex;
max@1: int index=0;
max@1: sumb /= 84.0;
max@1: for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
max@1: float currval = 0;
max@1: for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
max@1: currval += b[iNote + iBPS];
max@1: }
max@1: if (currval > 0) signifIndex.push_back(index);
max@1: index++;
max@1: }
max@1: float rnorm;
max@1: float w[84+1000];
max@1: float zz[84+1000];
max@1: int indx[84+1000];
max@1: int mode;
max@1: int dictsize = nNote*signifIndex.size();
max@1:
max@1: float *curr_dict = new float[dictsize];
max@1: for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
max@1: for (int iBin = 0; iBin < nNote; iBin++) {
max@1: curr_dict[iNote * nNote + iBin] =
max@1: 1.0 * m_chromadata->dict[signifIndex[iNote] * nNote + iBin];
max@1: }
max@1: }
max@1: nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
max@1: delete [] curr_dict;
max@1: for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
max@1: // cerr << mode << endl;
max@1: chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
max@1: basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
max@1: }
max@1: }
max@1: }
max@1:
max@1: chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end());
max@1: // just stack the both chromas
max@1:
max@1: bothchroma.values = chroma;
max@1: returnFeatureList.push_back(bothchroma);
max@1: count++;
max@1: }
max@1: cerr << "done." << endl;
max@1:
max@1: return returnFeatureList;
max@1: }
max@1:
max@1: /* ------ Beat Quantizer ------ */
max@1:
max@4: std::vector
Chris@16: SongPartitioner::beatQuantiser(Vamp::Plugin::FeatureList chromagram, Vamp::Plugin::FeatureList beats)
max@1: {
max@1: std::vector returnVector;
max@1:
max@1: FeatureList fwQchromagram; // frame-wise beat-quantised chroma
max@1: FeatureList bwQchromagram; // beat-wise beat-quantised chroma
max@1:
max@4: int nChromaFrame = (int) chromagram.size();
max@4: int nBeat = (int) beats.size();
max@1:
max@1: if (nBeat == 0 && nChromaFrame == 0) return returnVector;
max@1:
max@1: size_t nBin = chromagram[0].values.size();
max@1:
max@1: vector tempChroma = vector(nBin);
max@1:
max@1: Vamp::RealTime beatTimestamp = Vamp::RealTime::zeroTime;
max@1: int currBeatCount = -1; // start before first beat
max@1: int framesInBeat = 0;
max@1:
max@4: for (int iChroma = 0; iChroma < nChromaFrame; ++iChroma)
max@1: {
max@4: Vamp::RealTime frameTimestamp = chromagram[iChroma].timestamp;
Chris@24: Vamp::RealTime newBeatTimestamp;
Chris@22:
Chris@24: if (currBeatCount != beats.size()-1) newBeatTimestamp = beats[currBeatCount+1].timestamp;
Chris@24: else newBeatTimestamp = chromagram[nChromaFrame-1].timestamp;
Chris@22:
Chris@24: if (frameTimestamp > newBeatTimestamp ||
max@1: iChroma == nChromaFrame-1)
max@1: {
max@1: // new beat (or last chroma frame)
max@1: // 1. finish all the old beat processing
Chris@23: if (framesInBeat > 0)
Chris@23: {
Chris@23: for (int i = 0; i < nBin; ++i) tempChroma[i] /= framesInBeat; // average
Chris@23: }
max@1:
max@1: Feature bwQchromaFrame;
max@1: bwQchromaFrame.hasTimestamp = true;
max@1: bwQchromaFrame.timestamp = beatTimestamp;
max@1: bwQchromaFrame.values = tempChroma;
Chris@24: bwQchromaFrame.duration = newBeatTimestamp - beatTimestamp;
max@1: bwQchromagram.push_back(bwQchromaFrame);
max@1:
max@1: for (int iFrame = -framesInBeat; iFrame < 0; ++iFrame)
max@1: {
max@1: Feature fwQchromaFrame;
max@1: fwQchromaFrame.hasTimestamp = true;
max@1: fwQchromaFrame.timestamp = chromagram[iChroma+iFrame].timestamp;
max@1: fwQchromaFrame.values = tempChroma; // all between two beats get the same
max@1: fwQchromagram.push_back(fwQchromaFrame);
max@1: }
max@1:
max@1: // 2. increments / resets for current (new) beat
max@1: currBeatCount++;
Chris@24: beatTimestamp = newBeatTimestamp;
max@1: for (size_t i = 0; i < nBin; ++i) tempChroma[i] = 0; // average
max@1: framesInBeat = 0;
max@1: }
max@1: framesInBeat++;
max@1: for (size_t i = 0; i < nBin; ++i) tempChroma[i] += chromagram[iChroma].values[i];
max@1: }
max@1: returnVector.push_back(fwQchromagram);
max@1: returnVector.push_back(bwQchromagram);
Chris@30: return returnVector;
max@1: }
max@1:
max@1: /* -------------------------------- */
max@1: /* ------ Support Functions ------ */
max@1: /* -------------------------------- */
max@1:
max@1: // one-dimesion median filter
max@1: arma::vec medfilt1(arma::vec v, int medfilt_length)
max@1: {
max@1: int halfWin = medfilt_length/2;
max@1:
max@1: // result vector
max@1: arma::vec res = arma::zeros(v.size());
max@1:
max@1: // padding
max@1: arma::vec padV = arma::zeros(v.size()+medfilt_length-1);
max@1:
Chris@21: for (int i=medfilt_length/2; i < medfilt_length/2+v.size(); ++ i)
max@1: {
max@1: padV(i) = v(i-medfilt_length/2);
max@1: }
max@1:
max@1: // Median filter
max@1: arma::vec win = arma::zeros(medfilt_length);
max@1:
Chris@21: for (int i=0; i < v.size(); ++i)
max@1: {
max@1: win = padV.subvec(i,i+halfWin*2);
max@1: win = sort(win);
max@1: res(i) = win(halfWin);
max@1: }
max@1:
max@1: return res;
max@1: }
max@1:
max@1:
max@1: // Quantile
max@1: double quantile(arma::vec v, double p)
max@1: {
max@1: arma::vec sortV = arma::sort(v);
max@1: int n = sortV.size();
max@1: arma::vec x = arma::zeros(n+2);
max@1: arma::vec y = arma::zeros(n+2);
max@1:
max@1: x(0) = 0;
max@1: x(n+1) = 100;
max@1:
Chris@21: for (int i=1; i=p*100);
max@1:
max@1: // Interpolation
max@1: double x1 = x(x2index(0)-1);
max@1: double x2 = x(x2index(0));
max@1: double y1 = y(x2index(0)-1);
max@1: double y2 = y(x2index(0));
max@1:
max@1: double res = (y2-y1)/(x2-x1)*(p*100-x1)+y1;
max@1:
max@1: return res;
max@1: }
max@1:
max@1: // Max Filtering
max@1: arma::mat maxfilt1(arma::mat inmat, int len)
max@1: {
max@1: arma::mat outmat = inmat;
max@1:
max@1: for (int i=0; i 0)
max@1: {
max@1: // Take a window of rows
max@1: int startWin;
max@1: int endWin;
max@1:
max@1: if (0 > i-len)
max@1: startWin = 0;
max@1: else
max@1: startWin = i-len;
max@1:
max@1: if (inmat.n_rows-1 < i+len-1)
max@1: endWin = inmat.n_rows-1;
max@1: else
max@1: endWin = i+len-1;
max@1:
max@1: outmat(i,span::all) = arma::max(inmat(span(startWin,endWin),span::all));
max@1: }
max@1: }
max@1:
max@1: return outmat;
max@1:
max@1: }
max@1:
max@1: // Null Parts
max@1: Part nullpart(vector parts, arma::vec barline)
max@1: {
max@1: arma::uvec nullindices = arma::ones(barline.size());
Chris@21: for (int iPart=0; iPart 0);
max@1:
Chris@21: for (int i=0; i &parts)
max@1: {
Chris@21: for (int iPart=0; iPart newVectorPart;
max@1:
max@1: if (parts[iPart].letter.compare("-")==0)
max@1: {
max@1: sort (parts[iPart].indices.begin(), parts[iPart].indices.end());
Chris@21: int newpartind = -1;
max@1:
max@1: vector indices;
max@1: indices.push_back(-2);
max@1:
Chris@21: for (int iIndex=0; iIndex 1)
max@1: {
max@1: newpartind++;
max@1:
max@1: Part newPart;
max@1: newPart.letter = 'n';
max@1: std::stringstream out;
max@1: out << newpartind+1;
max@1: newPart.letter.append(out.str());
max@1: newPart.value = 20+newpartind+1;
max@1: newPart.n = 1;
max@1: newPart.indices.push_back(indices[iInd]);
max@1: newPart.level = 0;
max@1:
max@1: newVectorPart.push_back(newPart);
max@1: }
max@1: else
max@1: {
max@1: newVectorPart[newpartind].n = newVectorPart[newpartind].n+1;
max@1: }
max@1: }
max@1: parts.erase (parts.end());
max@1:
Chris@21: for (int i=0; i songSegment(Vamp::Plugin::FeatureList quantisedChromagram)
max@1: {
max@1:
max@1:
max@1: /* ------ Parameters ------ */
max@1: double thresh_beat = 0.85;
max@1: double thresh_seg = 0.80;
max@1: int medfilt_length = 5;
max@1: int minlength = 28;
max@1: int maxlength = 128;
max@1: double quantilePerc = 0.1;
max@1: /* ------------------------ */
max@1:
max@1:
max@1: // Collect Info
Chris@19: int nBeat = quantisedChromagram.size(); // Number of feature vector
Chris@19: int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector
max@1:
Chris@27: if (nBeat < minlength) {
Chris@27: // return a single part
Chris@27: vector parts;
Chris@27: Part newPart;
Chris@27: newPart.n = 1;
Chris@27: newPart.indices.push_back(0);
Chris@27: newPart.letter = "n1";
Chris@27: newPart.value = 20;
Chris@27: newPart.level = 0;
Chris@27: parts.push_back(newPart);
Chris@27: return parts;
Chris@27: }
Chris@27:
max@1: arma::irowvec timeStamp = arma::zeros(1,nBeat); // Vector of Time Stamps
max@1:
Chris@22: // Save time stamp as a Vector
Chris@19: if (quantisedChromagram[0].hasTimestamp)
max@1: {
Chris@21: for (int i = 0; i < nBeat; ++ i)
Chris@19: timeStamp[i] = quantisedChromagram[i].timestamp.nsec;
max@1: }
max@1:
max@1:
max@1: // Build a ObservationTOFeatures Matrix
max@1: arma::mat featVal = arma::zeros(nBeat,nFeatValues/2);
max@1:
Chris@21: for (int i = 0; i < nBeat; ++ i)
Chris@21: for (int j = 0; j < nFeatValues/2; ++ j)
max@1: {
Chris@19: featVal(i,j) = (quantisedChromagram[i].values[j]+quantisedChromagram[i].values[j+12]) * 0.8;
max@1: }
max@1:
max@1: // Set to arbitrary value to feature vectors with low std
max@1: arma::mat a = stddev(featVal,1,1);
max@1:
max@1: // Feature Colleration Matrix
max@1: arma::mat simmat0 = 1-arma::cor(arma::trans(featVal));
max@1:
max@1:
Chris@21: for (int i = 0; i < nBeat; ++ i)
max@1: {
max@1: if (a(i)<0.000001)
max@1: {
max@1: featVal(i,1) = 1000; // arbitrary
max@1:
Chris@21: for (int j = 0; j < nFeatValues/2; ++j)
max@1: {
max@1: simmat0(i,j) = 1;
max@1: simmat0(j,i) = 1;
max@1: }
max@1: }
max@1: }
max@1:
max@1: arma::mat simmat = 1-simmat0/2;
max@1:
max@1: // -------- To delate when the proble with the add of beat will be solved -------
Chris@21: for (int i = 0; i < nBeat; ++ i)
Chris@21: for (int j = 0; j < nBeat; ++ j)
max@1: if (!std::isfinite(simmat(i,j)))
max@1: simmat(i,j)=0;
max@1: // ------------------------------------------------------------------------------
max@1:
max@1: // Median Filtering applied to the Correlation Matrix
max@1: // The median filter is for each diagonal of the Matrix
max@1: arma::mat median_simmat = arma::zeros(nBeat,nBeat);
max@1:
Chris@21: for (int i = 0; i < nBeat; ++ i)
max@1: {
max@1: arma::vec temp = medfilt1(simmat.diag(i),medfilt_length);
max@1: median_simmat.diag(i) = temp;
max@1: median_simmat.diag(-i) = temp;
max@1: }
max@1:
Chris@21: for (int i = 0; i < nBeat; ++ i)
Chris@21: for (int j = 0; j < nBeat; ++ j)
max@1: if (!std::isfinite(median_simmat(i,j)))
max@1: median_simmat(i,j) = 0;
max@1:
max@1: // -------------- NOT CONVERTED -------------------------------------
max@1: // if param.seg.standardise
max@1: // med_median_simmat = repmat(median(median_simmat),nBeat,1);
max@1: // std_median_simmat = repmat(std(median_simmat),nBeat,1);
max@1: // median_simmat = (median_simmat - med_median_simmat) ./ std_median_simmat;
max@1: // end
max@1: // --------------------------------------------------------
max@1:
max@1: // Retrieve Bar Bounderies
max@1: arma::uvec dup = find(median_simmat > thresh_beat);
max@1: arma::mat potential_duplicates = arma::zeros(nBeat,nBeat);
max@1: potential_duplicates.elem(dup) = arma::ones(dup.size());
max@1: potential_duplicates = trimatu(potential_duplicates);
max@1:
Chris@21: int nPartlengths = round((maxlength-minlength)/4)+1;
max@1: arma::vec partlengths = zeros(nPartlengths);
max@1:
Chris@21: for (int i = 0; i < nPartlengths; ++ i)
max@1: partlengths(i) = (i*4)+ minlength;
max@1:
max@1: // initialise arrays
max@1: arma::cube simArray = zeros(nBeat,nBeat,nPartlengths);
max@1: arma::cube decisionArray2 = zeros(nBeat,nBeat,nPartlengths);
max@1:
max@1: int conta = 0;
max@1:
Chris@21: //for (int iLength = 0; iLength < nPartlengths; ++ iLength)
Chris@21: for (int iLength = 0; iLength < 20; ++ iLength)
max@1: {
Chris@21: int len = partlengths(iLength);
Chris@21: int nUsedBeat = nBeat - len + 1; // number of potential rep beginnings: they can't overlap at the end of the song
max@1:
Chris@21: for (int iBeat = 0; iBeat < nUsedBeat; ++ iBeat) // looping over all columns (arbitrarily chosen columns)
max@1: {
max@1: arma::uvec help2 = find(potential_duplicates(span(0,nUsedBeat-1),iBeat)==1);
max@1:
Chris@21: for (int i=0; i(nUsedBeat,nUsedBeat)%tempM);
max@1:
max@1: // convolution
max@1: arma::vec K = arma::zeros(3);
max@1: K << 0.01 << 0.98 << 0.01;
max@1:
max@1:
Chris@21: for (int i=0; i(simArray.n_rows, simArray.n_cols);
max@1: temp(span::all, span(0,nUsedBeat-1)) = simArray.slice(iLength)(span::all,span(0,nUsedBeat-1));
max@1:
Chris@21: for (int i=0; i(decisionArray2.n_rows,decisionArray2.n_cols);
max@1:
Chris@21: for (int rows=0; rows 0)
max@1: temp(rows,cols) = 1;
max@1:
max@1: arma::vec currLogicSum = arma::sum(temp,1);
max@1:
Chris@21: for (int iBeat=0; iBeat 1)
max@1: {
max@1: arma::vec t = decisionArray2.slice(iLength)(span::all,iBeat);
max@1: double currSum = sum(t);
max@1:
Chris@21: int count = 0;
Chris@21: for (int i=0; i0)
max@1: count++;
max@1:
max@1: currSum = (currSum/count)/2;
max@1:
max@1: arma::rowvec t1;
max@1: t1 << (currLogicSum(iBeat)-1) * partlengths(iLength) << currSum << iLength << iBeat << currLogicSum(iBeat);
max@1:
max@1: bestval = join_cols(bestval,t1);
max@1: }
max@1: }
max@1:
max@1: // Definition of the resulting vector
max@1: vector parts;
max@1:
max@1: // make a table of all valid sets of parts
max@1:
max@1: char partletters[] = {'A','B','C','D','E','F','G', 'H','I','J','K','L','M','N','O','P','Q','R','S'};
Chris@21: int partvalues[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
max@1: arma::vec valid_sets = arma::ones(bestval.n_rows);
max@1:
max@1: if (!bestval.is_empty())
max@1: {
max@1:
max@1: // In questo punto viene introdotto un errore alla 3 cifra decimale
max@1:
max@1: arma::colvec t = arma::zeros(bestval.n_rows);
Chris@21: for (int i=0; i(bestval2.n_rows, bestval2.n_cols);
Chris@21: for (int i=0; i(bestIndices.size());
Chris@21: for (int i=0; i0)
max@1: bestIndicesMap(i) = 1;
max@1:
max@1: arma::rowvec mask = arma::zeros(bestLength*2-1);
Chris@21: for (int i=0; i 0);
max@1:
Chris@21: for (int i=0; i(s*2-1);
Chris@21: for (int i=0; i(Ind.size());
Chris@21: for (int i=0; i0)
max@1: IndMap(i) = 2;
max@1:
max@1: arma::rowvec t3 = arma::conv(IndMap,mask1);
max@6: arma::rowvec currislands = t3.subvec(mask1.size()/2,t3.size()-1-mask1.size()/2);
max@1: arma::rowvec islandsdMult = currislands%island;
max@6:
max@1: arma::uvec islandsIndex = find(islandsdMult > 0);
max@1:
max@6: if (islandsIndex.size() > 0)
max@1: valid_sets(iSet) = 0;
max@1: }
max@1: }
max@1: }
max@1: else
max@1: {
max@1: Part newPart;
max@1: newPart.n = nBeat;
max@1: newPart.indices.push_back(1);
max@1: newPart.letter = 'A';
max@1: newPart.value = 1;
max@1: newPart.level = 1;
max@1: parts.push_back(newPart);
max@1: }
max@6:
max@1: arma::vec bar = linspace(1,nBeat,nBeat);
max@1: Part np = nullpart(parts,bar);
max@7:
max@1: parts.push_back(np);
max@1:
max@1: // -------------- NOT CONVERTED -------------------------------------
max@1: // if param.seg.editor
max@1: // [pa, ta] = partarray(parts);
max@1: // parts = editorssearch(pa, ta, parts);
max@1: // parts = [parts, nullpart(parts,1:nBeat)];
max@1: // end
max@1: // ------------------------------------------------------------------
max@1:
max@1:
max@1: mergenulls(parts);
max@1:
max@1:
max@1: // -------------- NOT CONVERTED -------------------------------------
max@1: // if param.seg.editor
max@1: // [pa, ta] = partarray(parts);
max@1: // parts = editorssearch(pa, ta, parts);
max@1: // parts = [parts, nullpart(parts,1:nBeat)];
max@1: // end
max@1: // ------------------------------------------------------------------
max@1:
max@1: return parts;
max@1: }
max@1:
max@1:
max@1:
Chris@19: void songSegmentChroma(Vamp::Plugin::FeatureList quantisedChromagram, vector &parts)
max@1: {
max@1: // Collect Info
Chris@19: int nBeat = quantisedChromagram.size(); // Number of feature vector
Chris@19: int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector
max@1:
max@1: arma::mat synchTreble = arma::zeros(nBeat,nFeatValues/2);
max@1:
Chris@21: for (int i = 0; i < nBeat; ++ i)
Chris@21: for (int j = 0; j < nFeatValues/2; ++ j)
max@1: {
Chris@19: synchTreble(i,j) = quantisedChromagram[i].values[j];
max@1: }
max@1:
max@1: arma::mat synchBass = arma::zeros(nBeat,nFeatValues/2);
max@1:
Chris@21: for (int i = 0; i < nBeat; ++ i)
Chris@21: for (int j = 0; j < nFeatValues/2; ++ j)
max@1: {
Chris@19: synchBass(i,j) = quantisedChromagram[i].values[j+12];
max@1: }
max@1:
max@1: // Process
max@1:
Chris@19: arma::mat segTreble = arma::zeros(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2);
Chris@19: arma::mat segBass = arma::zeros(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2);
max@1:
Chris@21: for (int iPart=0; iPart songSegmentIntegration(vector &parts)
max@1: {
max@1: // Break up parts (every part will have one instance)
max@1: vector newPartVector;
max@1: vector partindices;
max@1:
Chris@21: for (int iPart=0; iPart parts;
max@1: vector finalParts;
max@1:
Chris@19: parts = songSegment(quantisedChromagram);
Chris@19: songSegmentChroma(quantisedChromagram,parts);
max@7:
max@1: finalParts = songSegmentIntegration(parts);
max@1:
max@1:
max@1: // TEMP ----
Chris@21: /*for (int i=0;i values;
max@1: vector letters;
max@1:
Chris@21: for (int iPart=0; iPart